awacke1 commited on
Commit
4e37f97
·
verified ·
1 Parent(s): 2b0a4af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -31
app.py CHANGED
@@ -17,6 +17,16 @@ async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
17
  ]
18
  }
19
 
 
 
 
 
 
 
 
 
 
 
20
  async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
21
  URL = f"https://huggingface.co/datasets/{dataset}/resolve/main/dataset_info.json"
22
  async with httpx.AsyncClient() as session:
@@ -30,25 +40,6 @@ async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dic
30
  }
31
  return first_rows
32
 
33
- # Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
34
- async def update_URL(dataset: str, config: str, split: str) -> str:
35
- URL = f"https://huggingface.co/datasets/{dataset}/tree/main/{config}/{split}"
36
- return URL
37
-
38
-
39
-
40
-
41
- async def get_valid_datasets() -> List[str]:
42
- URL = f"https://huggingface.co/api/datasets"
43
- async with httpx.AsyncClient() as session:
44
- response = await session.get(URL)
45
- try:
46
- datasets = [dataset["id"] for dataset in response.json()]
47
- except (KeyError, json.JSONDecodeError):
48
- datasets = [] # Set a default value if the response is not in the expected format
49
- return datasets
50
-
51
-
52
  def get_df_from_rows(api_output):
53
  dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
54
  try:
@@ -58,26 +49,32 @@ def get_df_from_rows(api_output):
58
  return dfFromSort
59
 
60
  async def update_configs(dataset_name: str):
61
- splits = await get_splits(dataset_name)
62
- all_configs = sorted(set([s["config"] for s in splits["splits"]]))
63
- return (gr.Dropdown.update(choices=all_configs, value=all_configs[0]),
64
- splits)
 
 
 
65
 
66
  async def update_splits(config_name: str, state: gr.State):
67
  splits_for_config = sorted(set([s["split"] for s in state["splits"] if s["config"] == config_name]))
68
- dataset_name = state["splits"][0]["dataset"]
69
- dataset = await update_dataset(splits_for_config[0], config_name, dataset_name)
70
- return (gr.Dropdown.update(choices=splits_for_config, value=splits_for_config[0]), dataset)
71
 
72
  async def update_dataset(split_name: str, config_name: str, dataset_name: str):
73
  rows = await get_first_rows(dataset_name, config_name, split_name)
74
  df = get_df_from_rows(rows)
75
  return df
76
 
 
 
 
77
 
78
  async def openurl(URL: str) -> str:
79
  html = f"<a href={URL} target=_blank>{URL}</a>"
80
- return (html)
81
 
82
  with gr.Blocks() as demo:
83
  gr.Markdown("<h1><center>🥫Datasetter📊 Datasets Analyzer and Transformer</center></h1>")
@@ -86,16 +83,13 @@ with gr.Blocks() as demo:
86
  splits_data = gr.State()
87
 
88
  with gr.Row():
89
- dataset_name = gr.Dropdown(label="Dataset", interactive=True)
90
  config = gr.Dropdown(label="Subset", interactive=True)
91
  split = gr.Dropdown(label="Split", interactive=True)
92
 
93
  with gr.Row():
94
- #filterleft = gr.Textbox(label="First Column Filter",placeholder="Filter Column 1")
95
  URLcenter = gr.Textbox(label="Dataset URL", placeholder="URL")
96
  btn = gr.Button("Use Dataset")
97
- #URLoutput = gr.Textbox(label="Output",placeholder="URL Output")
98
- #URLoutput = gr.HTML(label="Output",placeholder="URL Output")
99
  URLoutput = gr.HTML(label="Output")
100
 
101
  with gr.Row():
 
17
  ]
18
  }
19
 
20
+ async def get_valid_datasets() -> List[str]:
21
+ URL = f"https://huggingface.co/api/datasets"
22
+ async with httpx.AsyncClient() as session:
23
+ response = await session.get(URL)
24
+ try:
25
+ datasets = [dataset["id"] for dataset in response.json() if "/" in dataset["id"]]
26
+ except (KeyError, json.JSONDecodeError):
27
+ datasets = [] # Set a default value if the response is not in the expected format
28
+ return datasets
29
+
30
  async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
31
  URL = f"https://huggingface.co/datasets/{dataset}/resolve/main/dataset_info.json"
32
  async with httpx.AsyncClient() as session:
 
40
  }
41
  return first_rows
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def get_df_from_rows(api_output):
44
  dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
45
  try:
 
49
  return dfFromSort
50
 
51
  async def update_configs(dataset_name: str):
52
+ try:
53
+ splits = await get_splits(dataset_name)
54
+ all_configs = sorted(set([s["config"] for s in splits["splits"]]))
55
+ return (gr.Dropdown.update(choices=all_configs, value=all_configs[0] if all_configs else None),
56
+ splits)
57
+ except json.JSONDecodeError:
58
+ return (gr.Dropdown.update(choices=[], value=None), {"splits": []})
59
 
60
  async def update_splits(config_name: str, state: gr.State):
61
  splits_for_config = sorted(set([s["split"] for s in state["splits"] if s["config"] == config_name]))
62
+ dataset_name = state["splits"][0]["dataset"] if state["splits"] else None
63
+ dataset = await update_dataset(splits_for_config[0], config_name, dataset_name) if splits_for_config and dataset_name else gr.DataFrame.update()
64
+ return (gr.Dropdown.update(choices=splits_for_config, value=splits_for_config[0] if splits_for_config else None), dataset)
65
 
66
  async def update_dataset(split_name: str, config_name: str, dataset_name: str):
67
  rows = await get_first_rows(dataset_name, config_name, split_name)
68
  df = get_df_from_rows(rows)
69
  return df
70
 
71
+ async def update_URL(dataset: str, config: str, split: str) -> str:
72
+ URL = f"https://huggingface.co/datasets/{dataset}/tree/main/{config}/{split}"
73
+ return URL
74
 
75
  async def openurl(URL: str) -> str:
76
  html = f"<a href={URL} target=_blank>{URL}</a>"
77
+ return html
78
 
79
  with gr.Blocks() as demo:
80
  gr.Markdown("<h1><center>🥫Datasetter📊 Datasets Analyzer and Transformer</center></h1>")
 
83
  splits_data = gr.State()
84
 
85
  with gr.Row():
86
+ dataset_name = gr.Dropdown(label="Dataset", interactive=True, allow_custom_value=True)
87
  config = gr.Dropdown(label="Subset", interactive=True)
88
  split = gr.Dropdown(label="Split", interactive=True)
89
 
90
  with gr.Row():
 
91
  URLcenter = gr.Textbox(label="Dataset URL", placeholder="URL")
92
  btn = gr.Button("Use Dataset")
 
 
93
  URLoutput = gr.HTML(label="Output")
94
 
95
  with gr.Row():