awacke1 commited on
Commit
f573f02
·
verified ·
1 Parent(s): 6b6ee18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -4
app.py CHANGED
@@ -102,15 +102,42 @@ def search_hub(query: str, search_type: str, token: str = None) -> pd.DataFrame:
102
  async def download_readme(session: aiohttp.ClientSession, item: Dict, token: str) -> tuple[str, str]:
103
  """Download README.md file for a given item."""
104
  item_id = item['id']
105
- raw_url = f"https://huggingface.co/{item_id}/raw/main/README.md"
 
 
 
 
 
 
 
 
 
 
 
106
  headers = {"Authorization": f"Bearer {token}"} if token else {}
107
 
108
  try:
 
109
  async with session.get(raw_url, headers=headers) as response:
110
  if response.status == 200:
111
  content = await response.text()
112
  return item_id.replace('/', '_'), content
113
- return item_id.replace('/', '_'), f"# Error downloading README for {item_id}\nStatus code: {response.status}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  except Exception as e:
115
  return item_id.replace('/', '_'), f"# Error downloading README for {item_id}\nError: {str(e)}"
116
 
@@ -118,9 +145,10 @@ async def download_all_readmes(data: List[Dict], token: str) -> tuple[str, str]:
118
  """Download all README files and create a zip archive."""
119
  if not data:
120
  return "", "No results to download"
121
-
122
  zip_buffer = io.BytesIO()
123
  status_message = "Downloading READMEs..."
 
124
 
125
  async with aiohttp.ClientSession() as session:
126
  tasks = [download_readme(session, item, token) for item in data]
@@ -128,11 +156,17 @@ async def download_all_readmes(data: List[Dict], token: str) -> tuple[str, str]:
128
 
129
  with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
130
  for filename, content in results:
 
 
131
  zip_file.writestr(f"{filename}.md", content)
132
 
133
  zip_buffer.seek(0)
134
  base64_zip = base64.b64encode(zip_buffer.getvalue()).decode()
135
 
 
 
 
 
136
  download_link = f"""
137
  <div style="margin-top: 10px;">
138
  <a href="data:application/zip;base64,{base64_zip}"
@@ -142,10 +176,11 @@ async def download_all_readmes(data: List[Dict], token: str) -> tuple[str, str]:
142
  text-decoration: none; border-radius: 5px;">
143
  📥 Download READMEs Archive
144
  </a>
 
145
  </div>
146
  """
147
 
148
- return download_link, "READMEs ready for download!"
149
 
150
  def download_repository(repo_id: str, repo_type: str, temp_dir: str, token: str) -> str:
151
  """Download a single repository."""
 
102
  async def download_readme(session: aiohttp.ClientSession, item: Dict, token: str) -> tuple[str, str]:
103
  """Download README.md file for a given item."""
104
  item_id = item['id']
105
+
106
+ # Different base URLs for different repository types
107
+ if 'datasets' in item['link']:
108
+ raw_url = f"https://huggingface.co/datasets/{item_id}/raw/main/README.md"
109
+ alt_url = f"https://huggingface.co/datasets/{item_id}/raw/master/README.md"
110
+ elif 'spaces' in item['link']:
111
+ raw_url = f"https://huggingface.co/spaces/{item_id}/raw/main/README.md"
112
+ alt_url = f"https://huggingface.co/spaces/{item_id}/raw/master/README.md"
113
+ else: # Models
114
+ raw_url = f"https://huggingface.co/{item_id}/raw/main/README.md"
115
+ alt_url = f"https://huggingface.co/{item_id}/raw/master/README.md"
116
+
117
  headers = {"Authorization": f"Bearer {token}"} if token else {}
118
 
119
  try:
120
+ # Try main branch first
121
  async with session.get(raw_url, headers=headers) as response:
122
  if response.status == 200:
123
  content = await response.text()
124
  return item_id.replace('/', '_'), content
125
+
126
+ # If main branch fails, try master branch
127
+ if response.status in [401, 404]:
128
+ async with session.get(alt_url, headers=headers) as alt_response:
129
+ if alt_response.status == 200:
130
+ content = await alt_response.text()
131
+ return item_id.replace('/', '_'), content
132
+
133
+ # If both attempts fail, return error message
134
+ error_msg = f"# Error downloading README for {item_id}\n"
135
+ if response.status == 401:
136
+ error_msg += "Authentication required. Please provide a valid HuggingFace token."
137
+ else:
138
+ error_msg += f"Status code: {response.status}"
139
+ return item_id.replace('/', '_'), error_msg
140
+
141
  except Exception as e:
142
  return item_id.replace('/', '_'), f"# Error downloading README for {item_id}\nError: {str(e)}"
143
 
 
145
  """Download all README files and create a zip archive."""
146
  if not data:
147
  return "", "No results to download"
148
+
149
  zip_buffer = io.BytesIO()
150
  status_message = "Downloading READMEs..."
151
+ failed_downloads = []
152
 
153
  async with aiohttp.ClientSession() as session:
154
  tasks = [download_readme(session, item, token) for item in data]
 
156
 
157
  with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
158
  for filename, content in results:
159
+ if "Error downloading README" in content:
160
+ failed_downloads.append(filename)
161
  zip_file.writestr(f"{filename}.md", content)
162
 
163
  zip_buffer.seek(0)
164
  base64_zip = base64.b64encode(zip_buffer.getvalue()).decode()
165
 
166
+ status = "READMEs ready for download!"
167
+ if failed_downloads:
168
+ status += f" (Failed to download {len(failed_downloads)} READMEs)"
169
+
170
  download_link = f"""
171
  <div style="margin-top: 10px;">
172
  <a href="data:application/zip;base64,{base64_zip}"
 
176
  text-decoration: none; border-radius: 5px;">
177
  📥 Download READMEs Archive
178
  </a>
179
+ {f'<p style="color: #ff6b6b; margin-top: 10px;">Note: Some READMEs could not be downloaded. Please check the zip file for details.</p>' if failed_downloads else ''}
180
  </div>
181
  """
182
 
183
+ return download_link, status
184
 
185
  def download_repository(repo_id: str, repo_type: str, temp_dir: str, token: str) -> str:
186
  """Download a single repository."""