Update app.py
Browse files
app.py
CHANGED
@@ -102,15 +102,42 @@ def search_hub(query: str, search_type: str, token: str = None) -> pd.DataFrame:
|
|
102 |
async def download_readme(session: aiohttp.ClientSession, item: Dict, token: str) -> tuple[str, str]:
|
103 |
"""Download README.md file for a given item."""
|
104 |
item_id = item['id']
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
107 |
|
108 |
try:
|
|
|
109 |
async with session.get(raw_url, headers=headers) as response:
|
110 |
if response.status == 200:
|
111 |
content = await response.text()
|
112 |
return item_id.replace('/', '_'), content
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
except Exception as e:
|
115 |
return item_id.replace('/', '_'), f"# Error downloading README for {item_id}\nError: {str(e)}"
|
116 |
|
@@ -118,9 +145,10 @@ async def download_all_readmes(data: List[Dict], token: str) -> tuple[str, str]:
|
|
118 |
"""Download all README files and create a zip archive."""
|
119 |
if not data:
|
120 |
return "", "No results to download"
|
121 |
-
|
122 |
zip_buffer = io.BytesIO()
|
123 |
status_message = "Downloading READMEs..."
|
|
|
124 |
|
125 |
async with aiohttp.ClientSession() as session:
|
126 |
tasks = [download_readme(session, item, token) for item in data]
|
@@ -128,11 +156,17 @@ async def download_all_readmes(data: List[Dict], token: str) -> tuple[str, str]:
|
|
128 |
|
129 |
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
130 |
for filename, content in results:
|
|
|
|
|
131 |
zip_file.writestr(f"{filename}.md", content)
|
132 |
|
133 |
zip_buffer.seek(0)
|
134 |
base64_zip = base64.b64encode(zip_buffer.getvalue()).decode()
|
135 |
|
|
|
|
|
|
|
|
|
136 |
download_link = f"""
|
137 |
<div style="margin-top: 10px;">
|
138 |
<a href="data:application/zip;base64,{base64_zip}"
|
@@ -142,10 +176,11 @@ async def download_all_readmes(data: List[Dict], token: str) -> tuple[str, str]:
|
|
142 |
text-decoration: none; border-radius: 5px;">
|
143 |
📥 Download READMEs Archive
|
144 |
</a>
|
|
|
145 |
</div>
|
146 |
"""
|
147 |
|
148 |
-
return download_link,
|
149 |
|
150 |
def download_repository(repo_id: str, repo_type: str, temp_dir: str, token: str) -> str:
|
151 |
"""Download a single repository."""
|
|
|
102 |
async def download_readme(session: aiohttp.ClientSession, item: Dict, token: str) -> tuple[str, str]:
|
103 |
"""Download README.md file for a given item."""
|
104 |
item_id = item['id']
|
105 |
+
|
106 |
+
# Different base URLs for different repository types
|
107 |
+
if 'datasets' in item['link']:
|
108 |
+
raw_url = f"https://huggingface.co/datasets/{item_id}/raw/main/README.md"
|
109 |
+
alt_url = f"https://huggingface.co/datasets/{item_id}/raw/master/README.md"
|
110 |
+
elif 'spaces' in item['link']:
|
111 |
+
raw_url = f"https://huggingface.co/spaces/{item_id}/raw/main/README.md"
|
112 |
+
alt_url = f"https://huggingface.co/spaces/{item_id}/raw/master/README.md"
|
113 |
+
else: # Models
|
114 |
+
raw_url = f"https://huggingface.co/{item_id}/raw/main/README.md"
|
115 |
+
alt_url = f"https://huggingface.co/{item_id}/raw/master/README.md"
|
116 |
+
|
117 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
118 |
|
119 |
try:
|
120 |
+
# Try main branch first
|
121 |
async with session.get(raw_url, headers=headers) as response:
|
122 |
if response.status == 200:
|
123 |
content = await response.text()
|
124 |
return item_id.replace('/', '_'), content
|
125 |
+
|
126 |
+
# If main branch fails, try master branch
|
127 |
+
if response.status in [401, 404]:
|
128 |
+
async with session.get(alt_url, headers=headers) as alt_response:
|
129 |
+
if alt_response.status == 200:
|
130 |
+
content = await alt_response.text()
|
131 |
+
return item_id.replace('/', '_'), content
|
132 |
+
|
133 |
+
# If both attempts fail, return error message
|
134 |
+
error_msg = f"# Error downloading README for {item_id}\n"
|
135 |
+
if response.status == 401:
|
136 |
+
error_msg += "Authentication required. Please provide a valid HuggingFace token."
|
137 |
+
else:
|
138 |
+
error_msg += f"Status code: {response.status}"
|
139 |
+
return item_id.replace('/', '_'), error_msg
|
140 |
+
|
141 |
except Exception as e:
|
142 |
return item_id.replace('/', '_'), f"# Error downloading README for {item_id}\nError: {str(e)}"
|
143 |
|
|
|
145 |
"""Download all README files and create a zip archive."""
|
146 |
if not data:
|
147 |
return "", "No results to download"
|
148 |
+
|
149 |
zip_buffer = io.BytesIO()
|
150 |
status_message = "Downloading READMEs..."
|
151 |
+
failed_downloads = []
|
152 |
|
153 |
async with aiohttp.ClientSession() as session:
|
154 |
tasks = [download_readme(session, item, token) for item in data]
|
|
|
156 |
|
157 |
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
158 |
for filename, content in results:
|
159 |
+
if "Error downloading README" in content:
|
160 |
+
failed_downloads.append(filename)
|
161 |
zip_file.writestr(f"{filename}.md", content)
|
162 |
|
163 |
zip_buffer.seek(0)
|
164 |
base64_zip = base64.b64encode(zip_buffer.getvalue()).decode()
|
165 |
|
166 |
+
status = "READMEs ready for download!"
|
167 |
+
if failed_downloads:
|
168 |
+
status += f" (Failed to download {len(failed_downloads)} READMEs)"
|
169 |
+
|
170 |
download_link = f"""
|
171 |
<div style="margin-top: 10px;">
|
172 |
<a href="data:application/zip;base64,{base64_zip}"
|
|
|
176 |
text-decoration: none; border-radius: 5px;">
|
177 |
📥 Download READMEs Archive
|
178 |
</a>
|
179 |
+
{f'<p style="color: #ff6b6b; margin-top: 10px;">Note: Some READMEs could not be downloaded. Please check the zip file for details.</p>' if failed_downloads else ''}
|
180 |
</div>
|
181 |
"""
|
182 |
|
183 |
+
return download_link, status
|
184 |
|
185 |
def download_repository(repo_id: str, repo_type: str, temp_dir: str, token: str) -> str:
|
186 |
"""Download a single repository."""
|