Update app.py
Browse files
app.py
CHANGED
@@ -5,10 +5,38 @@ import pandas as pd
|
|
5 |
import json
|
6 |
|
7 |
async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
|
8 |
-
URL = f"https://
|
9 |
async with httpx.AsyncClient() as session:
|
10 |
response = await session.get(URL)
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
async def get_valid_datasets() -> List[str]:
|
14 |
URL = f"https://huggingface.co/api/datasets"
|
@@ -20,13 +48,6 @@ async def get_valid_datasets() -> List[str]:
|
|
20 |
datasets = [] # Set a default value if the response is not in the expected format
|
21 |
return datasets
|
22 |
|
23 |
-
async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
|
24 |
-
URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
|
25 |
-
async with httpx.AsyncClient() as session:
|
26 |
-
response = await session.get(URL)
|
27 |
-
print(URL)
|
28 |
-
gr.Markdown(URL)
|
29 |
-
return response.json()
|
30 |
|
31 |
def get_df_from_rows(api_output):
|
32 |
dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
|
@@ -53,11 +74,6 @@ async def update_dataset(split_name: str, config_name: str, dataset_name: str):
|
|
53 |
df = get_df_from_rows(rows)
|
54 |
return df
|
55 |
|
56 |
-
# Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
|
57 |
-
async def update_URL(dataset: str, config: str, split: str) -> str:
|
58 |
-
URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
|
59 |
-
URL = f"https://huggingface.co/datasets/{split}"
|
60 |
-
return (URL)
|
61 |
|
62 |
async def openurl(URL: str) -> str:
|
63 |
html = f"<a href={URL} target=_blank>{URL}</a>"
|
|
|
5 |
import json
|
6 |
|
7 |
async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
|
8 |
+
URL = f"https://huggingface.co/api/datasets/{dataset_name}"
|
9 |
async with httpx.AsyncClient() as session:
|
10 |
response = await session.get(URL)
|
11 |
+
dataset_info = response.json()
|
12 |
+
return {
|
13 |
+
"splits": [
|
14 |
+
{"split": split_name, "config": config_name}
|
15 |
+
for config_name, config_info in dataset_info.get("config", {}).items()
|
16 |
+
for split_name in config_info.get("splits", [])
|
17 |
+
]
|
18 |
+
}
|
19 |
+
|
20 |
+
async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
|
21 |
+
URL = f"https://huggingface.co/datasets/{dataset}/resolve/main/dataset_info.json"
|
22 |
+
async with httpx.AsyncClient() as session:
|
23 |
+
response = await session.get(URL)
|
24 |
+
dataset_info = response.json()
|
25 |
+
split_info = dataset_info["splits"][split]
|
26 |
+
first_rows = {
|
27 |
+
"rows": [
|
28 |
+
{"row": row} for row in split_info["examples"][:10]
|
29 |
+
]
|
30 |
+
}
|
31 |
+
return first_rows
|
32 |
+
|
33 |
+
# Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
|
34 |
+
async def update_URL(dataset: str, config: str, split: str) -> str:
|
35 |
+
URL = f"https://huggingface.co/datasets/{dataset}/tree/main/{config}/{split}"
|
36 |
+
return URL
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
|
41 |
async def get_valid_datasets() -> List[str]:
|
42 |
URL = f"https://huggingface.co/api/datasets"
|
|
|
48 |
datasets = [] # Set a default value if the response is not in the expected format
|
49 |
return datasets
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
def get_df_from_rows(api_output):
|
53 |
dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
|
|
|
74 |
df = get_df_from_rows(rows)
|
75 |
return df
|
76 |
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
async def openurl(URL: str) -> str:
|
79 |
html = f"<a href={URL} target=_blank>{URL}</a>"
|