import pandas as pd TYPES = [ "str", "number", "number" ] SWEBENCH_ON_LOAD_COLUMNS = [ "Agent Name", "Accuracy", "Total Cost", "Runs", ] SWEBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] SWEBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] USACO_ON_LOAD_COLUMNS = [ "Agent Name", "Accuracy", "Total Cost", "Runs", ] USACO_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] USACO_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] COREBENCH_ON_LOAD_COLUMNS = [ "Agent Name", "Accuracy", "Total Cost", "Runs", ] COREBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] COREBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] MLAGENTBENCH_ON_LOAD_COLUMNS = [ "Agent Name", "Overall Score", "Total Cost", ] MLAGENTBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] MLAGENTBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Accuracy'] NUMERIC_INTERVALS = { "?": pd.Interval(-1, 0, closed="right"), "~1.5": pd.Interval(0, 2, closed="right"), "~3": pd.Interval(2, 4, closed="right"), "~7": pd.Interval(4, 9, closed="right"), "~13": pd.Interval(9, 20, closed="right"), "~35": pd.Interval(20, 45, closed="right"), "~60": pd.Interval(45, 70, closed="right"), "70+": pd.Interval(70, 10000, closed="right"), } CYBENCH_ON_LOAD_COLUMNS = [ "Agent Name", "Accuracy", "Total Cost", "Runs", ] CYBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] CYBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] APPWORLD_ON_LOAD_COLUMNS = [ "Agent Name", "Accuracy", "Total Cost", "Runs", "Scenario Goal Completion" ] APPWORLD_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] APPWORLD_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] GAIA_ON_LOAD_COLUMNS = [ "Agent Name", "Accuracy", "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy", "Total Cost", "Runs", ] GAIA_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] GAIA_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score']