Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,16 @@ async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
|
|
17 |
]
|
18 |
}
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
|
21 |
URL = f"https://huggingface.co/datasets/{dataset}/resolve/main/dataset_info.json"
|
22 |
async with httpx.AsyncClient() as session:
|
@@ -30,25 +40,6 @@ async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dic
|
|
30 |
}
|
31 |
return first_rows
|
32 |
|
33 |
-
# Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
|
34 |
-
async def update_URL(dataset: str, config: str, split: str) -> str:
|
35 |
-
URL = f"https://huggingface.co/datasets/{dataset}/tree/main/{config}/{split}"
|
36 |
-
return URL
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
async def get_valid_datasets() -> List[str]:
|
42 |
-
URL = f"https://huggingface.co/api/datasets"
|
43 |
-
async with httpx.AsyncClient() as session:
|
44 |
-
response = await session.get(URL)
|
45 |
-
try:
|
46 |
-
datasets = [dataset["id"] for dataset in response.json()]
|
47 |
-
except (KeyError, json.JSONDecodeError):
|
48 |
-
datasets = [] # Set a default value if the response is not in the expected format
|
49 |
-
return datasets
|
50 |
-
|
51 |
-
|
52 |
def get_df_from_rows(api_output):
|
53 |
dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
|
54 |
try:
|
@@ -58,26 +49,32 @@ def get_df_from_rows(api_output):
|
|
58 |
return dfFromSort
|
59 |
|
60 |
async def update_configs(dataset_name: str):
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
65 |
|
66 |
async def update_splits(config_name: str, state: gr.State):
|
67 |
splits_for_config = sorted(set([s["split"] for s in state["splits"] if s["config"] == config_name]))
|
68 |
-
dataset_name = state["splits"][0]["dataset"]
|
69 |
-
dataset = await update_dataset(splits_for_config[0], config_name, dataset_name)
|
70 |
-
return (gr.Dropdown.update(choices=splits_for_config, value=splits_for_config[0]), dataset)
|
71 |
|
72 |
async def update_dataset(split_name: str, config_name: str, dataset_name: str):
|
73 |
rows = await get_first_rows(dataset_name, config_name, split_name)
|
74 |
df = get_df_from_rows(rows)
|
75 |
return df
|
76 |
|
|
|
|
|
|
|
77 |
|
78 |
async def openurl(URL: str) -> str:
|
79 |
html = f"<a href={URL} target=_blank>{URL}</a>"
|
80 |
-
return
|
81 |
|
82 |
with gr.Blocks() as demo:
|
83 |
gr.Markdown("<h1><center>🥫Datasetter📊 Datasets Analyzer and Transformer</center></h1>")
|
@@ -86,16 +83,13 @@ with gr.Blocks() as demo:
|
|
86 |
splits_data = gr.State()
|
87 |
|
88 |
with gr.Row():
|
89 |
-
dataset_name = gr.Dropdown(label="Dataset", interactive=True)
|
90 |
config = gr.Dropdown(label="Subset", interactive=True)
|
91 |
split = gr.Dropdown(label="Split", interactive=True)
|
92 |
|
93 |
with gr.Row():
|
94 |
-
#filterleft = gr.Textbox(label="First Column Filter",placeholder="Filter Column 1")
|
95 |
URLcenter = gr.Textbox(label="Dataset URL", placeholder="URL")
|
96 |
btn = gr.Button("Use Dataset")
|
97 |
-
#URLoutput = gr.Textbox(label="Output",placeholder="URL Output")
|
98 |
-
#URLoutput = gr.HTML(label="Output",placeholder="URL Output")
|
99 |
URLoutput = gr.HTML(label="Output")
|
100 |
|
101 |
with gr.Row():
|
|
|
17 |
]
|
18 |
}
|
19 |
|
20 |
+
async def get_valid_datasets() -> List[str]:
|
21 |
+
URL = f"https://huggingface.co/api/datasets"
|
22 |
+
async with httpx.AsyncClient() as session:
|
23 |
+
response = await session.get(URL)
|
24 |
+
try:
|
25 |
+
datasets = [dataset["id"] for dataset in response.json() if "/" in dataset["id"]]
|
26 |
+
except (KeyError, json.JSONDecodeError):
|
27 |
+
datasets = [] # Set a default value if the response is not in the expected format
|
28 |
+
return datasets
|
29 |
+
|
30 |
async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
|
31 |
URL = f"https://huggingface.co/datasets/{dataset}/resolve/main/dataset_info.json"
|
32 |
async with httpx.AsyncClient() as session:
|
|
|
40 |
}
|
41 |
return first_rows
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
def get_df_from_rows(api_output):
|
44 |
dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
|
45 |
try:
|
|
|
49 |
return dfFromSort
|
50 |
|
51 |
async def update_configs(dataset_name: str):
|
52 |
+
try:
|
53 |
+
splits = await get_splits(dataset_name)
|
54 |
+
all_configs = sorted(set([s["config"] for s in splits["splits"]]))
|
55 |
+
return (gr.Dropdown.update(choices=all_configs, value=all_configs[0] if all_configs else None),
|
56 |
+
splits)
|
57 |
+
except json.JSONDecodeError:
|
58 |
+
return (gr.Dropdown.update(choices=[], value=None), {"splits": []})
|
59 |
|
60 |
async def update_splits(config_name: str, state: gr.State):
|
61 |
splits_for_config = sorted(set([s["split"] for s in state["splits"] if s["config"] == config_name]))
|
62 |
+
dataset_name = state["splits"][0]["dataset"] if state["splits"] else None
|
63 |
+
dataset = await update_dataset(splits_for_config[0], config_name, dataset_name) if splits_for_config and dataset_name else gr.DataFrame.update()
|
64 |
+
return (gr.Dropdown.update(choices=splits_for_config, value=splits_for_config[0] if splits_for_config else None), dataset)
|
65 |
|
66 |
async def update_dataset(split_name: str, config_name: str, dataset_name: str):
|
67 |
rows = await get_first_rows(dataset_name, config_name, split_name)
|
68 |
df = get_df_from_rows(rows)
|
69 |
return df
|
70 |
|
71 |
+
async def update_URL(dataset: str, config: str, split: str) -> str:
|
72 |
+
URL = f"https://huggingface.co/datasets/{dataset}/tree/main/{config}/{split}"
|
73 |
+
return URL
|
74 |
|
75 |
async def openurl(URL: str) -> str:
|
76 |
html = f"<a href={URL} target=_blank>{URL}</a>"
|
77 |
+
return html
|
78 |
|
79 |
with gr.Blocks() as demo:
|
80 |
gr.Markdown("<h1><center>🥫Datasetter📊 Datasets Analyzer and Transformer</center></h1>")
|
|
|
83 |
splits_data = gr.State()
|
84 |
|
85 |
with gr.Row():
|
86 |
+
dataset_name = gr.Dropdown(label="Dataset", interactive=True, allow_custom_value=True)
|
87 |
config = gr.Dropdown(label="Subset", interactive=True)
|
88 |
split = gr.Dropdown(label="Split", interactive=True)
|
89 |
|
90 |
with gr.Row():
|
|
|
91 |
URLcenter = gr.Textbox(label="Dataset URL", placeholder="URL")
|
92 |
btn = gr.Button("Use Dataset")
|
|
|
|
|
93 |
URLoutput = gr.HTML(label="Output")
|
94 |
|
95 |
with gr.Row():
|