File size: 10,009 Bytes
2016488
 
 
 
 
ba1590f
 
2016488
 
 
 
 
ba1590f
2016488
 
75cdf4b
f3d984c
2016488
 
 
ba1590f
 
75cdf4b
2016488
ba1590f
 
 
 
2016488
 
 
ba1590f
 
2016488
 
ba1590f
 
 
 
 
 
 
 
2016488
 
 
 
 
 
ba1590f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2016488
 
 
f8121ba
 
 
 
 
 
2016488
 
 
 
 
 
 
b4a00d5
8698e36
b4a00d5
 
 
 
 
 
 
 
2016488
 
b4a00d5
2016488
 
4b8c00a
 
 
 
093a2b5
4b8c00a
5d10667
1170f60
 
 
093a2b5
 
 
4b8c00a
 
 
bd24c06
 
 
4b8c00a
bd24c06
 
 
 
 
 
 
 
 
 
4b8c00a
 
 
 
 
2016488
ba1590f
 
 
 
 
 
 
 
2016488
 
491e896
75cdf4b
2016488
 
 
ba1590f
2016488
 
 
ba1590f
2016488
 
ba1590f
f3d984c
ba1590f
 
 
8698e36
 
2016488
 
ba1590f
2016488
 
ba1590f
2016488
 
 
 
 
ba1590f
2016488
 
 
 
ba1590f
2016488
 
 
ba1590f
2016488
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba1590f
 
 
2016488
 
 
 
 
 
 
 
 
 
 
 
 
ba1590f
 
2016488
 
 
ba1590f
2016488
 
 
 
ba1590f
 
2016488
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import pandas as pd
import gradio as gr
import csv
import json
import os
import requests
import io
import shutil
from huggingface_hub import Repository

HF_TOKEN = os.environ.get("HF_TOKEN")

TASKS = ["Classification", "VQA", "Retrieval", "Grounding"]

MODEL_INFO = [
    "Rank", "Models", "Model Size(B)", "Data Source",
    "Overall",
    "Classification", "VQA", "Retrieval", "Grounding"
]

BASE_COLS = [col for col in MODEL_INFO if col not in TASKS]

DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number']

SUBMISSION_NAME = "MMEB"
SUBMISSION_URL = os.path.join("https://huggingface.co/spaces/TIGER-Lab/", SUBMISSION_NAME)
FILE_NAME = "results.csv"
CSV_DIR = "./results.csv"

COLUMN_NAMES = MODEL_INFO

LEADERBOARD_INTRODUCTION = """
# MMEB Leaderboard

## Introduction
We introduce a novel benchmark, MMEB (Massive Multimodal Embedding Benchmark), 
which includes 36 datasets spanning four meta-task categories: classification, visual question answering, retrieval, and visual grounding. MMEB provides a comprehensive framework for training
and evaluating embedding models across various combinations of text and image modalities. 
All tasks are reformulated as ranking tasks, where the model follows instructions, processes a query, and selects the correct target from a set of candidates. The query and target can be an image, text,
or a combination of both. MMEB is divided into 20 in-distribution datasets, which can be used for
training, and 16 out-of-distribution datasets, reserved for evaluation.

The detailed explanation of the benchmark and datasets can be found in our paper: https://doi.org/10.48550/arXiv.2410.05160.
"""

TABLE_INTRODUCTION = """"""

LEADERBOARD_INFO = """
## Dataset Summary
MMEB is organized into four primary meta-task categories:
- **Classification**: This category comprises 5 in-distribution and 5 out-of-distribution datasets. Queries
consist of instructions and images, optionally accompanied by related text. Targets are class labels,
and the number of class labels corresponds to the number of classes in the dataset. \n
        - IND: ImageNet-1k, N24News, HatefulMemes, VOC2007, SUN397 \n
        - OOD: Place365, ImageNet-A, ImageNet-R, ObjectNet, Country-211 \n
- **Visual Question Answering**: This category includes 6 in-distribution and 4 out-of-distribution
datasets. The query consists of an instruction, an image, and a piece of text as the question, while
the target is the answer. Each query has 1,000 target candidates: 1 ground truth and 999 distractors. \n
        - IND: OK-VQA, A-OKVQA, DocVQA, InfographicVQA, ChartQA, Visual7W \n
        - OOD: ScienceQA, VizWiz, GQA, TextVQA \n
- **Information Retrieval**: This category contains 8 in-distribution and 4 out-of-distribution datasets.
Both the query and target sides can involve a combination of text, images, and instructions. Similar
to the VQA task, each query has 1,000 candidates, with 1 ground truth and 999 distractors. \n
        - IND: VisDial, CIRR, VisualNews_t2i, VisualNews_i2t, MSCOCO_t2i, MSCOCO_i2t, NIGHTS, WebQA \n
        - OOD: OVEN, FashionIQ, EDIS, Wiki-SS-NQ \n
- **Visual Grounding**: This category includes 1 in-distribution and 3 out-of-distribution datasets, which are adapted from object detection tasks. Queries consist of an instruction, an image, and text referring to a specific region or object within the image. The target may include a cropped image of the object or text describing the same region. Each query includes 1,000 candidates: 1 ground truth and 999 distractors. These distractors may include hard negatives from the same object class, other objects in the image, or random objects from different images. \n
        - IND: MSCOCO \n
        - OOD: Visual7W-Pointing, RefCOCO, RefCOCO-Matching \n
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@article{jiang2024vlm2vec,
  title={VLM2Vec: Training Vision-Language Models for Massive Multimodal Embedding Tasks},
  author={Jiang, Ziyan and Meng, Rui and Yang, Xinyi and Yavuz, Semih and Zhou, Yingbo and Chen, Wenhu},
  journal={arXiv preprint arXiv:2410.05160},
  year={2024}
}"""

SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction

## ⚠ Please note that you need to submit the JSON file with the following format:
```json
[
    {
        "Model": "<Model Name>",
        <Optional>"URL": "<Model URL>",
        "Model Size(B)": 1000,
        "Data Source": TIGER-Lab,
        "Overall": 50.0,
        "Classification": 50.0,
        "VQA": 50.0,
        "Retrieval": 50.0,
        "Grounding": 50.0
    }, 
]
```
Please send us an email at [email protected], attaching the JSON file. We will review your submission and update the leaderboard accordingly.
"""

MODEL_URLS = {
    "clip-vit-large-patch14": "https://huggingface.co/openai/clip-vit-large-patch14", 
    "blip2-opt-2.7b": "https://huggingface.co/Salesforce/blip2-opt-2.7b",
    "siglip-base-patch16-224": "https://huggingface.co/google/siglip-base-patch16-224", 
    "open_clip": "https://github.com/mlfoundations/open_clip", 
    "e5-v": "https://huggingface.co/royokong/e5-v", 
    "Magiclens": "https://github.com/google-deepmind/magiclens",
    "MMRet-large": "https://huggingface.co/JUNJIE99/MMRet-large", 
    "VLM2Vec-Phi-3.5-v": "https://huggingface.co/TIGER-Lab/VLM2Vec-Full",
    "VLM2Vec": "https://github.com/TIGER-AI-Lab/VLM2Vec", 
    "UniIR": "https://huggingface.co/TIGER-Lab/UniIR", 
    "OpenCLIP-Full-Fine-Tuning": "https://doi.org/10.48550/arXiv.2212.07143", 
    "CLIP-Full-Fine-Tuning": "https://doi.org/10.48550/arXiv.2103.00020"
}

def create_hyperlinked_names(df):
    def convert_url(url, model_name):
        return f'<a href="{url}">{model_name}</a>'

    def add_link_to_model_name(model_name):
        if "VLM2Vec (Phi-3.5-V-" in model_name:
            url = MODEL_URLS["VLM2Vec-Phi-3.5-v"]
            return convert_url(url, model_name)
        if "VLM2Vec (LLaVA-1.6-LoRA-" in model_name:
            url = MODEL_URLS["VLM2Vec"]
            return convert_url(url, model_name)
        if "UniIR" in model_name:
            url = MODEL_URLS["UniIR"]
            return convert_url(url, model_name)
        return convert_url(MODEL_URLS[model_name], model_name) if model_name in MODEL_URLS else model_name
    
    df = df.copy()
    df['Models'] = df['Models'].apply(add_link_to_model_name)
    return df

def get_df():
    # fetch the leaderboard data
    url = "https://huggingface.co/spaces/TIGER-Lab/MMEB/resolve/main/results.csv"
    response = requests.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
    if response.status_code != 200:
        import sys
        sys.exit(f"Error: {response.status_code}")
    df = pd.read_csv(io.StringIO(response.text))
    df.to_csv(CSV_DIR, index=False) # update local file
    df['Model Size(B)'] = df['Model Size(B)'].apply(process_model_size)
    df = df.sort_values(by=['Overall'], ascending=False)
    df = create_hyperlinked_names(df)
    df['Rank'] = range(1, len(df) + 1)
    return df


def add_new_eval(input_file):
    if input_file is None:
        return "Error! Empty file!"

    # Load the input json file
    upload_data = json.loads(input_file)
    print("upload_data:\n", upload_data)
    data_row = [f'{upload_data["Model"]}']
    for col in ['Overall', 'Model Size(B)'] + TASKS:
        if not col in upload_data.keys():
            return f"Error! Missing {col} column!"
        data_row += [upload_data[col]]
    if 'URL' in upload_data.keys():
        MODEL_URLS[upload_data['Model']] = upload_data['URL']
    print("data_row:\n", data_row)
    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL,
                                 use_auth_token=HF_TOKEN, repo_type="space")
    submission_repo.git_pull()

    # Track submitted models
    already_submitted = []
    with open(CSV_DIR, mode='r') as file:
        reader = csv.reader(file, delimiter=',')
        for row in reader:
            already_submitted.append(row[0])
    # if not in the existing models list, add it to the csv file
    if data_row[0] not in already_submitted:
        with open(CSV_DIR, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(data_row)

        submission_repo.push_to_hub()
        print('Submission Successful')
    else:
        print('The model already exists in the leaderboard!')

def refresh_data():
    df = get_df()
    return df[COLUMN_NAMES]


def search_and_filter_models(df, query, min_size, max_size):
    filtered_df = df.copy()
    
    if query:
        filtered_df = filtered_df[filtered_df['Models'].str.contains(query, case=False, na=False)]

    size_mask = filtered_df['Model Size(B)'].apply(lambda x: 
        (min_size <= 1000.0 <= max_size) if x == 'unknown' 
        else (min_size <= x <= max_size))
    
    filtered_df = filtered_df[size_mask]
    
    return filtered_df[COLUMN_NAMES]


def search_models(df, query):
    if query:
        return df[df['Models'].str.contains(query, case=False, na=False)]
    return df

def get_size_range(df):
    sizes = df['Model Size(B)'].apply(lambda x: 0.0 if x == 'unknown' else x)
    if (sizes == 0.0).all():
        return 0.0, 1000.0
    return float(sizes.min()), float(sizes.max())


def process_model_size(size):
    if pd.isna(size) or size == 'unk':
        return 'unknown'
    try:
        val = float(size)
        return val
    except (ValueError, TypeError):
        return 'unknown'


def filter_columns_by_tasks(df, selected_tasks=None):
    if selected_tasks is None or len(selected_tasks) == 0:
        return df[COLUMN_NAMES]
    
    base_columns = ['Models', 'Model Size(B)', 'Data Source', 'Overall']
    selected_columns = base_columns + selected_tasks
    
    available_columns = [col for col in selected_columns if col in df.columns]
    return df[available_columns]

def get_task_choices():
    return TASKS