lewtun HF staff commited on
Commit
fd18ef6
Β·
1 Parent(s): d7705b9

Add integration with AutoTrain

Browse files
Files changed (1) hide show
  1. app.py +87 -85
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from pathlib import Path
3
 
4
  import streamlit as st
@@ -25,7 +26,7 @@ TASK_TO_ID = {
25
  "extractive_question_answering": 5,
26
  "translation": 6,
27
  "summarization": 8,
28
- "single_column_regression": 10,
29
  }
30
 
31
  AUTOTRAIN_TASK_TO_HUB_TASK = {
@@ -36,9 +37,11 @@ AUTOTRAIN_TASK_TO_HUB_TASK = {
36
  "extractive_question_answering": "question-answering",
37
  "translation": "translation",
38
  "summarization": "summarization",
39
- "single_column_regression": 10,
40
  }
41
 
 
 
42
  ###########
43
  ### APP ###
44
  ###########
@@ -63,13 +66,9 @@ if "dataset" in query_params:
63
  selected_dataset = st.selectbox("Select a dataset", all_datasets, index=all_datasets.index(default_dataset))
64
  st.experimental_set_query_params(**{"dataset": [selected_dataset]})
65
 
66
- # TODO: remove this step once we select real datasets
67
- # Strip out original dataset name
68
- # original_dataset_name = dataset_name.split("/")[-1].split("__")[-1]
69
 
70
- # In general this will be a list of multiple configs => need to generalise logic here
71
  metadata = get_metadata(selected_dataset)
72
- print(metadata)
73
  if metadata is None:
74
  st.warning("No evaluation metadata found. Please configure the evaluation job below.")
75
 
@@ -120,16 +119,29 @@ with st.expander("Advanced configuration"):
120
  # TODO: find a better way to layout these items
121
  # TODO: propagate this information to payload
122
  # TODO: make it task specific
 
123
  with col1:
124
- st.markdown("`text` column")
125
- st.text("")
126
- st.text("")
127
- st.text("")
128
- st.text("")
129
- st.markdown("`target` column")
 
 
 
 
 
 
 
 
130
  with col2:
131
- st.selectbox("This column should contain the text you want to classify", col_names, index=0)
132
- st.selectbox("This column should contain the labels you want to assign to the text", col_names, index=1)
 
 
 
 
133
 
134
  with st.form(key="form"):
135
 
@@ -138,75 +150,65 @@ with st.form(key="form"):
138
  selected_models = st.multiselect(
139
  "Select the models you wish to evaluate", compatible_models
140
  ) # , compatible_models[0])
141
- print(selected_models)
142
-
143
  submit_button = st.form_submit_button("Make submission")
144
 
145
- # if submit_button:
146
- # for model in selected_models:
147
- # payload = {
148
- # "username": AUTOTRAIN_USERNAME,
149
- # "task": TASK_TO_ID[metadata[0]["task_id"]],
150
- # "model": model,
151
- # "col_mapping": metadata[0]["col_mapping"],
152
- # "split": selected_split,
153
- # "dataset": original_dataset_name,
154
- # "config": selected_config,
155
- # }
156
- # json_resp = http_post(
157
- # path="/evaluate/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
158
- # ).json()
159
- # if json_resp["status"] == 1:
160
- # st.success(f"βœ… Successfully submitted model {model} for evaluation with job ID {json_resp['id']}")
161
- # st.markdown(
162
- # f"""
163
- # Evaluation takes appoximately 1 hour to complete, so grab a β˜• or 🍡 while you wait:
164
-
165
- # * πŸ“Š Click [here](https://huggingface.co/spaces/huggingface/leaderboards) to view the results from your submission
166
- # """
167
- # )
168
- # else:
169
- # st.error("πŸ™ˆ Oh noes, there was an error submitting your submission!")
170
-
171
- # st.write("Creating project!")
172
- # payload = {
173
- # "username": AUTOTRAIN_USERNAME,
174
- # "proj_name": "my-eval-project-1",
175
- # "task": TASK_TO_ID[metadata[0]["task_id"]],
176
- # "config": {
177
- # "language": "en",
178
- # "max_models": 5,
179
- # "instance": {
180
- # "provider": "aws",
181
- # "instance_type": "ml.g4dn.4xlarge",
182
- # "max_runtime_seconds": 172800,
183
- # "num_instances": 1,
184
- # "disk_size_gb": 150,
185
- # },
186
- # },
187
- # }
188
- # json_resp = http_post(
189
- # path="/projects/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
190
- # ).json()
191
- # # print(json_resp)
192
-
193
- # # st.write("Uploading data")
194
- # payload = {
195
- # "split": 4,
196
- # "col_mapping": metadata[0]["col_mapping"],
197
- # "load_config": {"max_size_bytes": 0, "shuffle": False},
198
- # }
199
- # json_resp = http_post(
200
- # path="/projects/522/data/emotion",
201
- # payload=payload,
202
- # token=HF_TOKEN,
203
- # domain=AUTOTRAIN_BACKEND_API,
204
- # params={"type": "dataset", "config_name": "default", "split_name": "train"},
205
- # ).json()
206
- # print(json_resp)
207
-
208
- # st.write("Training")
209
- # json_resp = http_get(
210
- # path="/projects/522/data/start_process", token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
211
- # ).json()
212
- # print(json_resp)
 
1
  import os
2
+ import uuid
3
  from pathlib import Path
4
 
5
  import streamlit as st
 
26
  "extractive_question_answering": 5,
27
  "translation": 6,
28
  "summarization": 8,
29
+ # "single_column_regression": 10,
30
  }
31
 
32
  AUTOTRAIN_TASK_TO_HUB_TASK = {
 
37
  "extractive_question_answering": "question-answering",
38
  "translation": "translation",
39
  "summarization": "summarization",
40
+ # "single_column_regression": 10,
41
  }
42
 
43
+ HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
44
+
45
  ###########
46
  ### APP ###
47
  ###########
 
66
  selected_dataset = st.selectbox("Select a dataset", all_datasets, index=all_datasets.index(default_dataset))
67
  st.experimental_set_query_params(**{"dataset": [selected_dataset]})
68
 
 
 
 
69
 
70
+ # TODO: In general this will be a list of multiple configs => need to generalise logic here
71
  metadata = get_metadata(selected_dataset)
 
72
  if metadata is None:
73
  st.warning("No evaluation metadata found. Please configure the evaluation job below.")
74
 
 
119
  # TODO: find a better way to layout these items
120
  # TODO: propagate this information to payload
121
  # TODO: make it task specific
122
+ col_mapping = {}
123
  with col1:
124
+ if selected_task == "text-classification":
125
+ st.markdown("`text` column")
126
+ st.text("")
127
+ st.text("")
128
+ st.text("")
129
+ st.text("")
130
+ st.markdown("`target` column")
131
+ elif selected_task == "question-answering":
132
+ st.markdown("`context` column")
133
+ st.text("")
134
+ st.text("")
135
+ st.text("")
136
+ st.text("")
137
+ st.markdown("`question` column")
138
  with col2:
139
+ text_col = st.selectbox("This column should contain the text you want to classify", col_names, index=0)
140
+ target_col = st.selectbox(
141
+ "This column should contain the labels you want to assign to the text", col_names, index=1
142
+ )
143
+ col_mapping[text_col] = "text"
144
+ col_mapping[target_col] = "target"
145
 
146
  with st.form(key="form"):
147
 
 
150
  selected_models = st.multiselect(
151
  "Select the models you wish to evaluate", compatible_models
152
  ) # , compatible_models[0])
 
 
153
  submit_button = st.form_submit_button("Make submission")
154
 
155
+ if submit_button:
156
+ project_id = str(uuid.uuid4())[:3]
157
+ autotrain_task_name = HUB_TASK_TO_AUTOTRAIN_TASK[selected_task]
158
+ payload = {
159
+ "username": AUTOTRAIN_USERNAME,
160
+ "proj_name": f"my-eval-project-{project_id}",
161
+ "task": TASK_TO_ID[autotrain_task_name],
162
+ "config": {
163
+ "language": "en",
164
+ "max_models": 5,
165
+ "instance": {
166
+ "provider": "aws",
167
+ "instance_type": "ml.g4dn.4xlarge",
168
+ "max_runtime_seconds": 172800,
169
+ "num_instances": 1,
170
+ "disk_size_gb": 150,
171
+ },
172
+ "evaluation": {
173
+ "metrics": [],
174
+ "models": selected_models,
175
+ },
176
+ },
177
+ }
178
+ project_json_resp = http_post(
179
+ path="/projects/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
180
+ ).json()
181
+ print(project_json_resp)
182
+
183
+ if project_json_resp["created"]:
184
+ payload = {
185
+ "split": 4,
186
+ "col_mapping": col_mapping,
187
+ "load_config": {"max_size_bytes": 0, "shuffle": False},
188
+ }
189
+ data_json_resp = http_post(
190
+ path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
191
+ payload=payload,
192
+ token=HF_TOKEN,
193
+ domain=AUTOTRAIN_BACKEND_API,
194
+ params={"type": "dataset", "config_name": selected_config, "split_name": selected_split},
195
+ ).json()
196
+ print(data_json_resp)
197
+ if data_json_resp["download_status"] == 1:
198
+ train_json_resp = http_get(
199
+ path=f"/projects/{project_json_resp['id']}/data/start_process",
200
+ token=HF_TOKEN,
201
+ domain=AUTOTRAIN_BACKEND_API,
202
+ ).json()
203
+ print(train_json_resp)
204
+ if train_json_resp["success"]:
205
+ st.success(f"βœ… Successfully submitted evaluation job with project ID {project_id}")
206
+ st.markdown(
207
+ f"""
208
+ Evaluation takes appoximately 1 hour to complete, so grab a β˜• or 🍡 while you wait:
209
+
210
+ * πŸ“Š Click [here](https://huggingface.co/spaces/huggingface/leaderboards) to view the results from your submission
211
+ """
212
+ )
213
+ else:
214
+ st.error("πŸ™ˆ Oh noes, there was an error submitting your submission!")