Shiyu Zhao commited on
Commit
c88aff9
·
1 Parent(s): d6c0cb0

Update space

Browse files
Files changed (2) hide show
  1. app.py +231 -13
  2. requirements.txt +1 -0
app.py CHANGED
@@ -5,6 +5,95 @@ import os
5
  import re
6
  from datetime import datetime
7
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Data dictionaries for leaderboard
10
  data_synthesized_full = {
@@ -103,18 +192,154 @@ def validate_csv(file_obj):
103
  except Exception as e:
104
  return False, f"Error processing CSV: {str(e)}"
105
 
106
- def save_submission(submission_data):
107
- """Save submission data to a JSON file"""
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
109
- submission_id = f"{submission_data['team_name']}_{timestamp}"
110
 
111
- os.makedirs("submissions", exist_ok=True)
112
- submission_path = f"submissions/{submission_id}.json"
113
- with open(submission_path, 'w') as f:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  json.dump(submission_data, f, indent=4)
115
 
 
 
 
 
 
 
 
 
 
116
  return submission_id
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def filter_by_model_type(df, selected_types):
119
  if not selected_types:
120
  return df.head(0)
@@ -140,13 +365,6 @@ def update_tables(selected_types):
140
 
141
  return outputs
142
 
143
- def process_submission(
144
- method_name, team_name, dataset, split, contact_email,
145
- code_repo, csv_file, model_description, hardware, paper_link
146
- ):
147
- """Process and validate submission"""
148
- # [Previous validation and processing logic remains the same]
149
- pass
150
 
151
  css = """
152
  table > thead {
 
5
  import re
6
  from datetime import datetime
7
  import json
8
+ import torch
9
+ from tqdm import tqdm
10
+ from concurrent.futures import ProcessPoolExecutor, as_completed
11
+
12
+ from stark_qa import load_qa
13
+ from stark_qa.evaluator import Evaluator
14
+
15
+
16
+ def process_single_instance(args):
17
+ idx, eval_csv, qa_dataset, evaluator, eval_metrics = args
18
+ query, query_id, answer_ids, meta_info = qa_dataset[idx]
19
+
20
+ try:
21
+ pred_rank = eval_csv[eval_csv['query_id'] == query_id]['pred_rank'].item()
22
+ except IndexError:
23
+ raise IndexError(f'Error when processing query_id={query_id}, please make sure the predicted results exist for this query.')
24
+ except Exception as e:
25
+ raise RuntimeError(f'Unexpected error occurred while fetching prediction rank for query_id={query_id}: {e}')
26
+
27
+ if isinstance(pred_rank, str):
28
+ try:
29
+ pred_rank = eval(pred_rank)
30
+ except SyntaxError as e:
31
+ raise ValueError(f'Failed to parse pred_rank as a list for query_id={query_id}: {e}')
32
+
33
+ if not isinstance(pred_rank, list):
34
+ raise TypeError(f'Error when processing query_id={query_id}, expected pred_rank to be a list but got {type(pred_rank)}.')
35
+
36
+ pred_dict = {pred_rank[i]: -i for i in range(min(100, len(pred_rank)))}
37
+ answer_ids = torch.LongTensor(answer_ids)
38
+ result = evaluator.evaluate(pred_dict, answer_ids, metrics=eval_metrics)
39
+
40
+ result["idx"], result["query_id"] = idx, query_id
41
+ return result
42
+
43
+
44
+ def compute_metrics(csv_path: str, dataset: str, split: str, num_workers: int = 4):
45
+ candidate_ids_dict = {
46
+ 'amazon': [i for i in range(957192)],
47
+ 'mag': [i for i in range(1172724, 1872968)],
48
+ 'prime': [i for i in range(129375)]
49
+ }
50
+ try:
51
+ eval_csv = pd.read_csv(csv_path)
52
+ if 'query_id' not in eval_csv.columns:
53
+ raise ValueError('No `query_id` column found in the submitted csv.')
54
+ if 'pred_rank' not in eval_csv.columns:
55
+ raise ValueError('No `pred_rank` column found in the submitted csv.')
56
+
57
+ eval_csv = eval_csv[['query_id', 'pred_rank']]
58
+
59
+ if dataset not in candidate_ids_dict:
60
+ raise ValueError(f"Invalid dataset '{dataset}', expected one of {list(candidate_ids_dict.keys())}.")
61
+ if split not in ['test', 'test-0.1', 'human_generated_eval']:
62
+ raise ValueError(f"Invalid split '{split}', expected one of ['test', 'test-0.1', 'human_generated_eval'].")
63
+
64
+ evaluator = Evaluator(candidate_ids_dict[dataset])
65
+ eval_metrics = ['hit@1', 'hit@5', 'recall@20', 'mrr']
66
+ qa_dataset = load_qa(dataset, human_generated_eval=split == 'human_generated_eval')
67
+ split_idx = qa_dataset.get_idx_split()
68
+ all_indices = split_idx[split].tolist()
69
+
70
+ results_list = []
71
+ query_ids = []
72
+
73
+ # Prepare args for each worker
74
+ args = [(idx, eval_csv, qa_dataset, evaluator, eval_metrics) for idx in all_indices]
75
+
76
+ with ProcessPoolExecutor(max_workers=num_workers) as executor:
77
+ futures = [executor.submit(process_single_instance, arg) for arg in args]
78
+ for future in tqdm(as_completed(futures), total=len(futures)):
79
+ result = future.result() # This will raise an error if the worker encountered one
80
+ results_list.append(result)
81
+ query_ids.append(result['query_id'])
82
+
83
+ # Concatenate results and compute final metrics
84
+ eval_csv = pd.concat([eval_csv, pd.DataFrame(results_list)], ignore_index=True)
85
+ final_results = {
86
+ metric: np.mean(eval_csv[eval_csv['query_id'].isin(query_ids)][metric]) for metric in eval_metrics
87
+ }
88
+ return final_results
89
+
90
+ except pd.errors.EmptyDataError:
91
+ return "Error: The CSV file is empty or could not be read. Please check the file and try again."
92
+ except FileNotFoundError:
93
+ return f"Error: The file {csv_path} could not be found. Please check the file path and try again."
94
+ except Exception as error:
95
+ return f"{error}"
96
+
97
 
98
  # Data dictionaries for leaderboard
99
  data_synthesized_full = {
 
192
  except Exception as e:
193
  return False, f"Error processing CSV: {str(e)}"
194
 
195
+ def sanitize_name(name):
196
+ """Sanitize name for file system use"""
197
+ return re.sub(r'[^a-zA-Z0-9]', '_', name)
198
+
199
+ def save_submission(submission_data, csv_file):
200
+ """
201
+ Save submission data and CSV file using model_name_team_name format
202
+
203
+ Args:
204
+ submission_data (dict): Metadata and results for the submission
205
+ csv_file: The uploaded CSV file object
206
+ """
207
+ # Create folder name from model name and team name
208
+ model_name_clean = sanitize_name(submission_data['method_name'])
209
+ team_name_clean = sanitize_name(submission_data['team_name'])
210
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 
211
 
212
+ # Create folder name: model_name_team_name
213
+ folder_name = f"{model_name_clean}_{team_name_clean}"
214
+ submission_id = f"{folder_name}_{timestamp}"
215
+
216
+ # Create submission directory structure
217
+ base_dir = "submissions"
218
+ submission_dir = os.path.join(base_dir, folder_name)
219
+ os.makedirs(submission_dir, exist_ok=True)
220
+
221
+ # Save CSV file with timestamp to allow multiple submissions
222
+ csv_filename = f"predictions_{timestamp}.csv"
223
+ csv_path = os.path.join(submission_dir, csv_filename)
224
+ if hasattr(csv_file, 'name'):
225
+ with open(csv_file.name, 'rb') as source, open(csv_path, 'wb') as target:
226
+ target.write(source.read())
227
+
228
+ # Add file paths to submission data
229
+ submission_data.update({
230
+ "csv_path": csv_path,
231
+ "submission_id": submission_id,
232
+ "folder_name": folder_name
233
+ })
234
+
235
+ # Save metadata as JSON with timestamp
236
+ metadata_path = os.path.join(submission_dir, f"metadata_{timestamp}.json")
237
+ with open(metadata_path, 'w') as f:
238
  json.dump(submission_data, f, indent=4)
239
 
240
+ # Update latest.json to track most recent submission
241
+ latest_path = os.path.join(submission_dir, "latest.json")
242
+ with open(latest_path, 'w') as f:
243
+ json.dump({
244
+ "latest_submission": timestamp,
245
+ "status": "pending_review",
246
+ "method_name": submission_data['method_name']
247
+ }, f, indent=4)
248
+
249
  return submission_id
250
 
251
+ def update_leaderboard_data(submission_data):
252
+ """
253
+ Update leaderboard data with new submission results
254
+ Only uses model name in the displayed table
255
+ """
256
+ global df_synthesized_full, df_synthesized_10, df_human_generated
257
+
258
+ # Determine which DataFrame to update based on split
259
+ split_to_df = {
260
+ 'test': df_synthesized_full,
261
+ 'test-0.1': df_synthesized_10,
262
+ 'human_generated_eval': df_human_generated
263
+ }
264
+
265
+ df_to_update = split_to_df[submission_data['split']]
266
+
267
+ # Prepare new row data
268
+ new_row = {
269
+ 'Method': submission_data['method_name'], # Only use method name in table
270
+ f'STARK-{submission_data["dataset"].upper()}_Hit@1': submission_data['results']['hit@1'],
271
+ f'STARK-{submission_data["dataset"].upper()}_Hit@5': submission_data['results']['hit@5'],
272
+ f'STARK-{submission_data["dataset"].upper()}_R@20': submission_data['results']['recall@20'],
273
+ f'STARK-{submission_data["dataset"].upper()}_MRR': submission_data['results']['mrr']
274
+ }
275
+
276
+ # Check if method already exists
277
+ method_mask = df_to_update['Method'] == submission_data['method_name']
278
+ if method_mask.any():
279
+ # Update existing row
280
+ for col in new_row:
281
+ df_to_update.loc[method_mask, col] = new_row[col]
282
+ else:
283
+ # Add new row
284
+ df_to_update.loc[len(df_to_update)] = new_row
285
+
286
+ def process_submission(
287
+ method_name, team_name, dataset, split, contact_email,
288
+ code_repo, csv_file, model_description, hardware, paper_link
289
+ ):
290
+ """Process and validate submission"""
291
+ try:
292
+ # [Previous validation code remains the same]
293
+
294
+ # Process CSV file through evaluation pipeline
295
+ results = compute_metrics(
296
+ csv_file.name,
297
+ dataset=dataset.lower(),
298
+ split=split,
299
+ num_workers=4
300
+ )
301
+
302
+ if isinstance(results, str) and results.startswith("Error"):
303
+ return f"Evaluation error: {results}"
304
+
305
+ # Prepare submission data
306
+ submission_data = {
307
+ "method_name": method_name,
308
+ "team_name": team_name,
309
+ "dataset": dataset,
310
+ "split": split,
311
+ "contact_email": contact_email,
312
+ "code_repo": code_repo,
313
+ "model_description": model_description,
314
+ "hardware": hardware,
315
+ "paper_link": paper_link,
316
+ "results": results,
317
+ "status": "pending_review",
318
+ "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
319
+ }
320
+
321
+ # Save submission and get ID
322
+ submission_id = save_submission(submission_data, csv_file)
323
+
324
+ # Update leaderboard data if submission is valid
325
+ update_leaderboard_data(submission_data)
326
+
327
+ return f"""
328
+ Submission successful! Your submission ID is: {submission_id}
329
+
330
+ Evaluation Results:
331
+ Hit@1: {results['hit@1']:.2f}
332
+ Hit@5: {results['hit@5']:.2f}
333
+ Recall@20: {results['recall@20']:.2f}
334
+ MRR: {results['mrr']:.2f}
335
+
336
+ Your submission has been saved and is pending review.
337
+ Once approved, your results will appear in the leaderboard under the method name: {method_name}
338
+ """
339
+
340
+ except Exception as e:
341
+ return f"Error processing submission: {str(e)}"
342
+
343
  def filter_by_model_type(df, selected_types):
344
  if not selected_types:
345
  return df.head(0)
 
365
 
366
  return outputs
367
 
 
 
 
 
 
 
 
368
 
369
  css = """
370
  table > thead {
requirements.txt CHANGED
@@ -12,6 +12,7 @@ pandas
12
  python-dateutil
13
  tqdm
14
  transformers
 
15
  tokenizers>=0.15.0
16
  sentencepiece
17
  stark_qa
 
12
  python-dateutil
13
  tqdm
14
  transformers
15
+ torch
16
  tokenizers>=0.15.0
17
  sentencepiece
18
  stark_qa