Spaces:
Runtime error
Runtime error
| import io | |
| from string import ascii_lowercase | |
| import aiohttp | |
| from client import HybridClient | |
| from preprocessing import index_pdf | |
| grade_map = ascii_lowercase[:10] | |
| subject_map = { | |
| "science": "esc1", | |
| "geography": "ess1", | |
| "economics": "ess2", | |
| "history": "ess3", | |
| "politics": "ess4", | |
| } | |
| def get_url(grade, subject, chapter): | |
| filename = grade_map[grade] + subject_map[subject] + str(chapter).zfill(2) | |
| url = f"https://ncert.nic.in/textbook/pdf/{filename}.pdf" | |
| return url | |
| async def get_book(grade, subject): | |
| book = {} | |
| chapter_num = 1 | |
| async with aiohttp.ClientSession() as session: | |
| while True: | |
| url = get_url(grade, subject, chapter_num) | |
| pdf = download(session, url) | |
| if pdf: | |
| collection = f"{grade}_{subject}" | |
| book[collection] = pdf | |
| else: | |
| break | |
| return book | |
| async def download(session, url): | |
| try: | |
| async with session.get(url, timeout=10) as r: | |
| r.raise_for_status() | |
| pdf_content = io.BytesIO() | |
| async for chunk in r.content.iter_chunked(1000000): | |
| pdf_content.write(chunk) | |
| pdf_content.seek(0) | |
| return pdf_content | |
| except Exception as e: | |
| print(f"Error downloading or processing PDF: {e}") | |
| return None | |
| def upload_book(grade, subject): | |
| hclient = HybridClient() | |
| book = get_book(grade, subject) | |
| for collection, pdf in book.items(): | |
| chunks = index_pdf(pdf) | |
| hclient.create(collection) | |
| hclient.insert(collection, chunks) | |