lewtun HF staff commited on
Commit
54f6b18
·
1 Parent(s): 8dec3b6

Add all NLP tasks

Browse files
Files changed (2) hide show
  1. app.py +90 -14
  2. utils.py +4 -1
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import uuid
3
  from pathlib import Path
4
 
 
5
  import streamlit as st
6
  from datasets import get_dataset_config_names
7
  from dotenv import load_dotenv
@@ -83,10 +84,7 @@ with st.expander("Advanced configuration"):
83
  domain="https://datasets-preview.huggingface.tech",
84
  params={"dataset": selected_dataset, "config": selected_config, "split": selected_split},
85
  ).json()
86
- columns = rows_resp["columns"]
87
- col_names = []
88
- for c in columns:
89
- col_names.append(c["column"]["name"])
90
  # splits = metadata[0]["splits"]
91
  # split_names = list(splits.values())
92
  # eval_split = splits.get("eval_split", split_names[0])
@@ -104,28 +102,105 @@ with st.expander("Advanced configuration"):
104
  # TODO: propagate this information to payload
105
  # TODO: make it task specific
106
  col_mapping = {}
107
- with col1:
108
- if selected_task in ["binary_classification", "multi_class_classification"]:
109
  st.markdown("`text` column")
110
  st.text("")
111
  st.text("")
112
  st.text("")
113
  st.text("")
114
  st.markdown("`target` column")
115
- elif selected_task == "question-answering":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  st.markdown("`context` column")
117
  st.text("")
118
  st.text("")
119
  st.text("")
120
  st.text("")
121
  st.markdown("`question` column")
122
- with col2:
123
- text_col = st.selectbox("This column should contain the text you want to classify", col_names, index=0)
124
- target_col = st.selectbox(
125
- "This column should contain the labels you want to assign to the text", col_names, index=1
126
- )
127
- col_mapping[text_col] = "text"
128
- col_mapping[target_col] = "target"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  with st.form(key="form"):
131
 
@@ -158,6 +233,7 @@ with st.form(key="form"):
158
  },
159
  },
160
  }
 
161
  project_json_resp = http_post(
162
  path="/projects/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
163
  ).json()
 
2
  import uuid
3
  from pathlib import Path
4
 
5
+ import pandas as pd
6
  import streamlit as st
7
  from datasets import get_dataset_config_names
8
  from dotenv import load_dotenv
 
84
  domain="https://datasets-preview.huggingface.tech",
85
  params={"dataset": selected_dataset, "config": selected_config, "split": selected_split},
86
  ).json()
87
+ col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
 
 
 
88
  # splits = metadata[0]["splits"]
89
  # split_names = list(splits.values())
90
  # eval_split = splits.get("eval_split", split_names[0])
 
102
  # TODO: propagate this information to payload
103
  # TODO: make it task specific
104
  col_mapping = {}
105
+ if selected_task in ["binary_classification", "multi_class_classification"]:
106
+ with col1:
107
  st.markdown("`text` column")
108
  st.text("")
109
  st.text("")
110
  st.text("")
111
  st.text("")
112
  st.markdown("`target` column")
113
+ with col2:
114
+ text_col = st.selectbox("This column should contain the text you want to classify", col_names)
115
+ target_col = st.selectbox(
116
+ "This column should contain the labels you want to assign to the text", col_names
117
+ )
118
+ col_mapping[text_col] = "text"
119
+ col_mapping[target_col] = "target"
120
+
121
+ elif selected_task == "entity_extraction":
122
+ with col1:
123
+ st.markdown("`tokens` column")
124
+ st.text("")
125
+ st.text("")
126
+ st.text("")
127
+ st.text("")
128
+ st.markdown("`tags` column")
129
+ with col2:
130
+ tokens_col = st.selectbox(
131
+ "This column should contain the parts of the text (as an array of tokens) you want to assign labels to",
132
+ col_names,
133
+ )
134
+ tags_col = st.selectbox(
135
+ "This column should contain the labels to associate to each part of the text", col_names
136
+ )
137
+ col_mapping[tokens_col] = "tokens"
138
+ col_mapping[tags_col] = "tags"
139
+
140
+ elif selected_task == "translation":
141
+ with col1:
142
+ st.markdown("`source` column")
143
+ st.text("")
144
+ st.text("")
145
+ st.text("")
146
+ st.text("")
147
+ st.markdown("`target` column")
148
+ with col2:
149
+ text_col = st.selectbox("This column should contain the text you want to translate", col_names)
150
+ target_col = st.selectbox(
151
+ "This column should contain an example translation of the source text", col_names
152
+ )
153
+ col_mapping[text_col] = "source"
154
+ col_mapping[target_col] = "target"
155
+
156
+ elif selected_task == "summarization":
157
+ with col1:
158
+ st.markdown("`text` column")
159
+ st.text("")
160
+ st.text("")
161
+ st.text("")
162
+ st.text("")
163
+ st.markdown("`target` column")
164
+ with col2:
165
+ text_col = st.selectbox("This column should contain the text you want to summarize", col_names)
166
+ target_col = st.selectbox("This column should contain an example summarization of the text", col_names)
167
+ col_mapping[text_col] = "text"
168
+ col_mapping[target_col] = "target"
169
+
170
+ elif selected_task == "extractive_question_answering":
171
+ with col1:
172
  st.markdown("`context` column")
173
  st.text("")
174
  st.text("")
175
  st.text("")
176
  st.text("")
177
  st.markdown("`question` column")
178
+ st.text("")
179
+ st.text("")
180
+ st.text("")
181
+ st.text("")
182
+ st.markdown("`answers.text` column")
183
+ st.text("")
184
+ st.text("")
185
+ st.text("")
186
+ st.text("")
187
+ st.markdown("`answers.answer_start` column")
188
+ with col2:
189
+ context_col = st.selectbox("This column should contain the question's context", col_names)
190
+ question_col = st.selectbox(
191
+ "This column should contain the question to be answered, given the context", col_names
192
+ )
193
+ answers_text_col = st.selectbox(
194
+ "This column should contain example answers to the question, extracted from the context", col_names
195
+ )
196
+ answers_start_col = st.selectbox(
197
+ "This column should contain the indices in the context of the first character of each answers.text",
198
+ col_names,
199
+ )
200
+ col_mapping[context_col] = "context"
201
+ col_mapping[question_col] = "question"
202
+ col_mapping[answers_text_col] = "answers.text"
203
+ col_mapping[answers_start_col] = "answers.answer_start"
204
 
205
  with st.form(key="form"):
206
 
 
233
  },
234
  },
235
  }
236
+ print(f"Payload: {payload}")
237
  project_json_resp = http_post(
238
  path="/projects/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
239
  ).json()
utils.py CHANGED
@@ -57,6 +57,9 @@ def get_metadata(dataset_name: str) -> Union[Dict, None]:
57
 
58
 
59
  def get_compatible_models(task, dataset_name):
60
- filt = ModelFilter(task=AUTOTRAIN_TASK_TO_HUB_TASK[task], trained_dataset=dataset_name, library="transformers")
 
 
 
61
  compatible_models = api.list_models(filter=filt)
62
  return [model.modelId for model in compatible_models]
 
57
 
58
 
59
  def get_compatible_models(task, dataset_name):
60
+ # TODO: relax filter on PyTorch models once supported in AutoTrain
61
+ filt = ModelFilter(
62
+ task=AUTOTRAIN_TASK_TO_HUB_TASK[task], trained_dataset=dataset_name, library=["transformers", "pytorch"]
63
+ )
64
  compatible_models = api.list_models(filter=filt)
65
  return [model.modelId for model in compatible_models]