Spaces:

zsyJosh
/

stark

Sleeping

App Files Files Community

Shiyu Zhao commited on Oct 22, 2024

Commit

c88aff9

1 Parent(s): d6c0cb0

Update space

Browse files

Files changed (2) hide show

app.py +231 -13
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -5,6 +5,95 @@ import os
 import re
 from datetime import datetime
 import json
 # Data dictionaries for leaderboard
 data_synthesized_full = {
@@ -103,18 +192,154 @@ def validate_csv(file_obj):
     except Exception as e:
         return False, f"Error processing CSV: {str(e)}"
-def save_submission(submission_data):
-    """Save submission data to a JSON file"""
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    submission_id = f"{submission_data['team_name']}_{timestamp}"
-    os.makedirs("submissions", exist_ok=True)
-    submission_path = f"submissions/{submission_id}.json"
-    with open(submission_path, 'w') as f:
         json.dump(submission_data, f, indent=4)
     return submission_id
 def filter_by_model_type(df, selected_types):
     if not selected_types:
         return df.head(0)
@@ -140,13 +365,6 @@ def update_tables(selected_types):
     return outputs
-def process_submission(
-    method_name, team_name, dataset, split, contact_email,
-    code_repo, csv_file, model_description, hardware, paper_link
-):
-    """Process and validate submission"""
-    # [Previous validation and processing logic remains the same]
-    pass
 css = """
 table > thead {

 import re
 from datetime import datetime
 import json
+import torch
+from tqdm import tqdm
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from stark_qa import load_qa
+from stark_qa.evaluator import Evaluator
+def process_single_instance(args):
+    idx, eval_csv, qa_dataset, evaluator, eval_metrics = args
+    query, query_id, answer_ids, meta_info = qa_dataset[idx]
+    try:
+        pred_rank = eval_csv[eval_csv['query_id'] == query_id]['pred_rank'].item()
+    except IndexError:
+        raise IndexError(f'Error when processing query_id={query_id}, please make sure the predicted results exist for this query.')
+    except Exception as e:
+        raise RuntimeError(f'Unexpected error occurred while fetching prediction rank for query_id={query_id}: {e}')
+    if isinstance(pred_rank, str):
+        try:
+            pred_rank = eval(pred_rank)
+        except SyntaxError as e:
+            raise ValueError(f'Failed to parse pred_rank as a list for query_id={query_id}: {e}')
+    if not isinstance(pred_rank, list):
+        raise TypeError(f'Error when processing query_id={query_id}, expected pred_rank to be a list but got {type(pred_rank)}.')
+    pred_dict = {pred_rank[i]: -i for i in range(min(100, len(pred_rank)))}
+    answer_ids = torch.LongTensor(answer_ids)
+    result = evaluator.evaluate(pred_dict, answer_ids, metrics=eval_metrics)
+    result["idx"], result["query_id"] = idx, query_id
+    return result
+def compute_metrics(csv_path: str, dataset: str, split: str, num_workers: int = 4):
+    candidate_ids_dict = {
+        'amazon': [i for i in range(957192)],
+        'mag': [i for i in range(1172724, 1872968)],
+        'prime': [i for i in range(129375)]
+    }
+    try:
+        eval_csv = pd.read_csv(csv_path)
+        if 'query_id' not in eval_csv.columns:
+            raise ValueError('No `query_id` column found in the submitted csv.')
+        if 'pred_rank' not in eval_csv.columns:
+            raise ValueError('No `pred_rank` column found in the submitted csv.')
+        eval_csv = eval_csv[['query_id', 'pred_rank']]
+        if dataset not in candidate_ids_dict:
+            raise ValueError(f"Invalid dataset '{dataset}', expected one of {list(candidate_ids_dict.keys())}.")
+        if split not in ['test', 'test-0.1', 'human_generated_eval']:
+            raise ValueError(f"Invalid split '{split}', expected one of ['test', 'test-0.1', 'human_generated_eval'].")
+        evaluator = Evaluator(candidate_ids_dict[dataset])
+        eval_metrics = ['hit@1', 'hit@5', 'recall@20', 'mrr']
+        qa_dataset = load_qa(dataset, human_generated_eval=split == 'human_generated_eval')
+        split_idx = qa_dataset.get_idx_split()
+        all_indices = split_idx[split].tolist()
+        results_list = []
+        query_ids = []
+        # Prepare args for each worker
+        args = [(idx, eval_csv, qa_dataset, evaluator, eval_metrics) for idx in all_indices]
+        with ProcessPoolExecutor(max_workers=num_workers) as executor:
+            futures = [executor.submit(process_single_instance, arg) for arg in args]
+            for future in tqdm(as_completed(futures), total=len(futures)):
+                result = future.result()  # This will raise an error if the worker encountered one
+                results_list.append(result)
+                query_ids.append(result['query_id'])
+        # Concatenate results and compute final metrics
+        eval_csv = pd.concat([eval_csv, pd.DataFrame(results_list)], ignore_index=True)
+        final_results = {
+            metric: np.mean(eval_csv[eval_csv['query_id'].isin(query_ids)][metric]) for metric in eval_metrics
+        }
+        return final_results
+    except pd.errors.EmptyDataError:
+        return "Error: The CSV file is empty or could not be read. Please check the file and try again."
+    except FileNotFoundError:
+        return f"Error: The file {csv_path} could not be found. Please check the file path and try again."
+    except Exception as error:
+        return f"{error}"
 # Data dictionaries for leaderboard
 data_synthesized_full = {
     except Exception as e:
         return False, f"Error processing CSV: {str(e)}"
+def sanitize_name(name):
+    """Sanitize name for file system use"""
+    return re.sub(r'[^a-zA-Z0-9]', '_', name)
+def save_submission(submission_data, csv_file):
+    """
+    Save submission data and CSV file using model_name_team_name format
+    Args:
+        submission_data (dict): Metadata and results for the submission
+        csv_file: The uploaded CSV file object
+    """
+    # Create folder name from model name and team name
+    model_name_clean = sanitize_name(submission_data['method_name'])
+    team_name_clean = sanitize_name(submission_data['team_name'])
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    # Create folder name: model_name_team_name
+    folder_name = f"{model_name_clean}_{team_name_clean}"
+    submission_id = f"{folder_name}_{timestamp}"
+    # Create submission directory structure
+    base_dir = "submissions"
+    submission_dir = os.path.join(base_dir, folder_name)
+    os.makedirs(submission_dir, exist_ok=True)
+    # Save CSV file with timestamp to allow multiple submissions
+    csv_filename = f"predictions_{timestamp}.csv"
+    csv_path = os.path.join(submission_dir, csv_filename)
+    if hasattr(csv_file, 'name'):
+        with open(csv_file.name, 'rb') as source, open(csv_path, 'wb') as target:
+            target.write(source.read())
+    # Add file paths to submission data
+    submission_data.update({
+        "csv_path": csv_path,
+        "submission_id": submission_id,
+        "folder_name": folder_name
+    })
+    # Save metadata as JSON with timestamp
+    metadata_path = os.path.join(submission_dir, f"metadata_{timestamp}.json")
+    with open(metadata_path, 'w') as f:
         json.dump(submission_data, f, indent=4)
+    # Update latest.json to track most recent submission
+    latest_path = os.path.join(submission_dir, "latest.json")
+    with open(latest_path, 'w') as f:
+        json.dump({
+            "latest_submission": timestamp,
+            "status": "pending_review",
+            "method_name": submission_data['method_name']
+        }, f, indent=4)
     return submission_id
+def update_leaderboard_data(submission_data):
+    """
+    Update leaderboard data with new submission results
+    Only uses model name in the displayed table
+    """
+    global df_synthesized_full, df_synthesized_10, df_human_generated
+    # Determine which DataFrame to update based on split
+    split_to_df = {
+        'test': df_synthesized_full,
+        'test-0.1': df_synthesized_10,
+        'human_generated_eval': df_human_generated
+    }
+    df_to_update = split_to_df[submission_data['split']]
+    # Prepare new row data
+    new_row = {
+        'Method': submission_data['method_name'],  # Only use method name in table
+        f'STARK-{submission_data["dataset"].upper()}_Hit@1': submission_data['results']['hit@1'],
+        f'STARK-{submission_data["dataset"].upper()}_Hit@5': submission_data['results']['hit@5'],
+        f'STARK-{submission_data["dataset"].upper()}_R@20': submission_data['results']['recall@20'],
+        f'STARK-{submission_data["dataset"].upper()}_MRR': submission_data['results']['mrr']
+    }
+    # Check if method already exists
+    method_mask = df_to_update['Method'] == submission_data['method_name']
+    if method_mask.any():
+        # Update existing row
+        for col in new_row:
+            df_to_update.loc[method_mask, col] = new_row[col]
+    else:
+        # Add new row
+        df_to_update.loc[len(df_to_update)] = new_row
+def process_submission(
+    method_name, team_name, dataset, split, contact_email,
+    code_repo, csv_file, model_description, hardware, paper_link
+):
+    """Process and validate submission"""
+    try:
+        # [Previous validation code remains the same]
+        # Process CSV file through evaluation pipeline
+        results = compute_metrics(
+            csv_file.name,
+            dataset=dataset.lower(),
+            split=split,
+            num_workers=4
+        )
+        if isinstance(results, str) and results.startswith("Error"):
+            return f"Evaluation error: {results}"
+        # Prepare submission data
+        submission_data = {
+            "method_name": method_name,
+            "team_name": team_name,
+            "dataset": dataset,
+            "split": split,
+            "contact_email": contact_email,
+            "code_repo": code_repo,
+            "model_description": model_description,
+            "hardware": hardware,
+            "paper_link": paper_link,
+            "results": results,
+            "status": "pending_review",
+            "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        }
+        # Save submission and get ID
+        submission_id = save_submission(submission_data, csv_file)
+        # Update leaderboard data if submission is valid
+        update_leaderboard_data(submission_data)
+        return f"""
+        Submission successful! Your submission ID is: {submission_id}
+        Evaluation Results:
+        Hit@1: {results['hit@1']:.2f}
+        Hit@5: {results['hit@5']:.2f}
+        Recall@20: {results['recall@20']:.2f}
+        MRR: {results['mrr']:.2f}
+        Your submission has been saved and is pending review.
+        Once approved, your results will appear in the leaderboard under the method name: {method_name}
+        """
+    except Exception as e:
+        return f"Error processing submission: {str(e)}"
 def filter_by_model_type(df, selected_types):
     if not selected_types:
         return df.head(0)
     return outputs
 css = """
 table > thead {

requirements.txt CHANGED Viewed

@@ -12,6 +12,7 @@ pandas
 python-dateutil
 tqdm
 transformers
 tokenizers>=0.15.0
 sentencepiece
 stark_qa

 python-dateutil
 tqdm
 transformers
+torch
 tokenizers>=0.15.0
 sentencepiece
 stark_qa