Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -102,15 +102,42 @@ def search_hub(query: str, search_type: str, token: str = None) -> pd.DataFrame:
|
|
| 102 |
async def download_readme(session: aiohttp.ClientSession, item: Dict, token: str) -> tuple[str, str]:
|
| 103 |
"""Download README.md file for a given item."""
|
| 104 |
item_id = item['id']
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 107 |
|
| 108 |
try:
|
|
|
|
| 109 |
async with session.get(raw_url, headers=headers) as response:
|
| 110 |
if response.status == 200:
|
| 111 |
content = await response.text()
|
| 112 |
return item_id.replace('/', '_'), content
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
except Exception as e:
|
| 115 |
return item_id.replace('/', '_'), f"# Error downloading README for {item_id}\nError: {str(e)}"
|
| 116 |
|
|
@@ -118,9 +145,10 @@ async def download_all_readmes(data: List[Dict], token: str) -> tuple[str, str]:
|
|
| 118 |
"""Download all README files and create a zip archive."""
|
| 119 |
if not data:
|
| 120 |
return "", "No results to download"
|
| 121 |
-
|
| 122 |
zip_buffer = io.BytesIO()
|
| 123 |
status_message = "Downloading READMEs..."
|
|
|
|
| 124 |
|
| 125 |
async with aiohttp.ClientSession() as session:
|
| 126 |
tasks = [download_readme(session, item, token) for item in data]
|
|
@@ -128,11 +156,17 @@ async def download_all_readmes(data: List[Dict], token: str) -> tuple[str, str]:
|
|
| 128 |
|
| 129 |
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 130 |
for filename, content in results:
|
|
|
|
|
|
|
| 131 |
zip_file.writestr(f"{filename}.md", content)
|
| 132 |
|
| 133 |
zip_buffer.seek(0)
|
| 134 |
base64_zip = base64.b64encode(zip_buffer.getvalue()).decode()
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
download_link = f"""
|
| 137 |
<div style="margin-top: 10px;">
|
| 138 |
<a href="data:application/zip;base64,{base64_zip}"
|
|
@@ -142,10 +176,11 @@ async def download_all_readmes(data: List[Dict], token: str) -> tuple[str, str]:
|
|
| 142 |
text-decoration: none; border-radius: 5px;">
|
| 143 |
📥 Download READMEs Archive
|
| 144 |
</a>
|
|
|
|
| 145 |
</div>
|
| 146 |
"""
|
| 147 |
|
| 148 |
-
return download_link,
|
| 149 |
|
| 150 |
def download_repository(repo_id: str, repo_type: str, temp_dir: str, token: str) -> str:
|
| 151 |
"""Download a single repository."""
|
|
|
|
| 102 |
async def download_readme(session: aiohttp.ClientSession, item: Dict, token: str) -> tuple[str, str]:
|
| 103 |
"""Download README.md file for a given item."""
|
| 104 |
item_id = item['id']
|
| 105 |
+
|
| 106 |
+
# Different base URLs for different repository types
|
| 107 |
+
if 'datasets' in item['link']:
|
| 108 |
+
raw_url = f"https://huggingface.co/datasets/{item_id}/raw/main/README.md"
|
| 109 |
+
alt_url = f"https://huggingface.co/datasets/{item_id}/raw/master/README.md"
|
| 110 |
+
elif 'spaces' in item['link']:
|
| 111 |
+
raw_url = f"https://huggingface.co/spaces/{item_id}/raw/main/README.md"
|
| 112 |
+
alt_url = f"https://huggingface.co/spaces/{item_id}/raw/master/README.md"
|
| 113 |
+
else: # Models
|
| 114 |
+
raw_url = f"https://huggingface.co/{item_id}/raw/main/README.md"
|
| 115 |
+
alt_url = f"https://huggingface.co/{item_id}/raw/master/README.md"
|
| 116 |
+
|
| 117 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 118 |
|
| 119 |
try:
|
| 120 |
+
# Try main branch first
|
| 121 |
async with session.get(raw_url, headers=headers) as response:
|
| 122 |
if response.status == 200:
|
| 123 |
content = await response.text()
|
| 124 |
return item_id.replace('/', '_'), content
|
| 125 |
+
|
| 126 |
+
# If main branch fails, try master branch
|
| 127 |
+
if response.status in [401, 404]:
|
| 128 |
+
async with session.get(alt_url, headers=headers) as alt_response:
|
| 129 |
+
if alt_response.status == 200:
|
| 130 |
+
content = await alt_response.text()
|
| 131 |
+
return item_id.replace('/', '_'), content
|
| 132 |
+
|
| 133 |
+
# If both attempts fail, return error message
|
| 134 |
+
error_msg = f"# Error downloading README for {item_id}\n"
|
| 135 |
+
if response.status == 401:
|
| 136 |
+
error_msg += "Authentication required. Please provide a valid HuggingFace token."
|
| 137 |
+
else:
|
| 138 |
+
error_msg += f"Status code: {response.status}"
|
| 139 |
+
return item_id.replace('/', '_'), error_msg
|
| 140 |
+
|
| 141 |
except Exception as e:
|
| 142 |
return item_id.replace('/', '_'), f"# Error downloading README for {item_id}\nError: {str(e)}"
|
| 143 |
|
|
|
|
| 145 |
"""Download all README files and create a zip archive."""
|
| 146 |
if not data:
|
| 147 |
return "", "No results to download"
|
| 148 |
+
|
| 149 |
zip_buffer = io.BytesIO()
|
| 150 |
status_message = "Downloading READMEs..."
|
| 151 |
+
failed_downloads = []
|
| 152 |
|
| 153 |
async with aiohttp.ClientSession() as session:
|
| 154 |
tasks = [download_readme(session, item, token) for item in data]
|
|
|
|
| 156 |
|
| 157 |
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 158 |
for filename, content in results:
|
| 159 |
+
if "Error downloading README" in content:
|
| 160 |
+
failed_downloads.append(filename)
|
| 161 |
zip_file.writestr(f"{filename}.md", content)
|
| 162 |
|
| 163 |
zip_buffer.seek(0)
|
| 164 |
base64_zip = base64.b64encode(zip_buffer.getvalue()).decode()
|
| 165 |
|
| 166 |
+
status = "READMEs ready for download!"
|
| 167 |
+
if failed_downloads:
|
| 168 |
+
status += f" (Failed to download {len(failed_downloads)} READMEs)"
|
| 169 |
+
|
| 170 |
download_link = f"""
|
| 171 |
<div style="margin-top: 10px;">
|
| 172 |
<a href="data:application/zip;base64,{base64_zip}"
|
|
|
|
| 176 |
text-decoration: none; border-radius: 5px;">
|
| 177 |
📥 Download READMEs Archive
|
| 178 |
</a>
|
| 179 |
+
{f'<p style="color: #ff6b6b; margin-top: 10px;">Note: Some READMEs could not be downloaded. Please check the zip file for details.</p>' if failed_downloads else ''}
|
| 180 |
</div>
|
| 181 |
"""
|
| 182 |
|
| 183 |
+
return download_link, status
|
| 184 |
|
| 185 |
def download_repository(repo_id: str, repo_type: str, temp_dir: str, token: str) -> str:
|
| 186 |
"""Download a single repository."""
|