awacke1 commited on
Commit
2b0a4af
·
verified ·
1 Parent(s): a66cfba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -5,10 +5,38 @@ import pandas as pd
5
  import json
6
 
7
  async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
8
- URL = f"https://datasets-server.huggingface.co/splits?dataset={dataset_name}"
9
  async with httpx.AsyncClient() as session:
10
  response = await session.get(URL)
11
- return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  async def get_valid_datasets() -> List[str]:
14
  URL = f"https://huggingface.co/api/datasets"
@@ -20,13 +48,6 @@ async def get_valid_datasets() -> List[str]:
20
  datasets = [] # Set a default value if the response is not in the expected format
21
  return datasets
22
 
23
- async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
24
- URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
25
- async with httpx.AsyncClient() as session:
26
- response = await session.get(URL)
27
- print(URL)
28
- gr.Markdown(URL)
29
- return response.json()
30
 
31
  def get_df_from_rows(api_output):
32
  dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
@@ -53,11 +74,6 @@ async def update_dataset(split_name: str, config_name: str, dataset_name: str):
53
  df = get_df_from_rows(rows)
54
  return df
55
 
56
- # Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
57
- async def update_URL(dataset: str, config: str, split: str) -> str:
58
- URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
59
- URL = f"https://huggingface.co/datasets/{split}"
60
- return (URL)
61
 
62
  async def openurl(URL: str) -> str:
63
  html = f"<a href={URL} target=_blank>{URL}</a>"
 
5
  import json
6
 
7
  async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
8
+ URL = f"https://huggingface.co/api/datasets/{dataset_name}"
9
  async with httpx.AsyncClient() as session:
10
  response = await session.get(URL)
11
+ dataset_info = response.json()
12
+ return {
13
+ "splits": [
14
+ {"split": split_name, "config": config_name}
15
+ for config_name, config_info in dataset_info.get("config", {}).items()
16
+ for split_name in config_info.get("splits", [])
17
+ ]
18
+ }
19
+
20
+ async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
21
+ URL = f"https://huggingface.co/datasets/{dataset}/resolve/main/dataset_info.json"
22
+ async with httpx.AsyncClient() as session:
23
+ response = await session.get(URL)
24
+ dataset_info = response.json()
25
+ split_info = dataset_info["splits"][split]
26
+ first_rows = {
27
+ "rows": [
28
+ {"row": row} for row in split_info["examples"][:10]
29
+ ]
30
+ }
31
+ return first_rows
32
+
33
+ # Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
34
+ async def update_URL(dataset: str, config: str, split: str) -> str:
35
+ URL = f"https://huggingface.co/datasets/{dataset}/tree/main/{config}/{split}"
36
+ return URL
37
+
38
+
39
+
40
 
41
  async def get_valid_datasets() -> List[str]:
42
  URL = f"https://huggingface.co/api/datasets"
 
48
  datasets = [] # Set a default value if the response is not in the expected format
49
  return datasets
50
 
 
 
 
 
 
 
 
51
 
52
  def get_df_from_rows(api_output):
53
  dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
 
74
  df = get_df_from_rows(rows)
75
  return df
76
 
 
 
 
 
 
77
 
78
  async def openurl(URL: str) -> str:
79
  html = f"<a href={URL} target=_blank>{URL}</a>"