Spaces:
Running
Running
import ast | |
import argparse | |
import glob | |
import pickle | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
block_css = """ | |
#notice_markdown { | |
font-size: 104% | |
} | |
#notice_markdown th { | |
display: none; | |
} | |
#notice_markdown td { | |
padding-top: 6px; | |
padding-bottom: 6px; | |
} | |
#leaderboard_markdown { | |
font-size: 104% | |
} | |
#leaderboard_markdown td { | |
padding-top: 6px; | |
padding-bottom: 6px; | |
} | |
#leaderboard_dataframe td { | |
line-height: 0.1em; | |
font-size: 8px; | |
} | |
footer { | |
display:none !important | |
} | |
.image-container { | |
display: flex; | |
align-items: center; | |
padding: 1px; | |
} | |
.image-container img { | |
margin: 0 30px; | |
height: 20px; | |
max-height: 100%; | |
width: auto; | |
max-width: 20%; | |
} | |
""" | |
def model_hyperlink(model_name, link): | |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
def load_leaderboard_table_csv(filename, add_hyperlink=True): | |
lines = open(filename).readlines() | |
heads = [v.strip() for v in lines[0].split(",")] | |
rows = [] | |
for i in range(1, len(lines)): | |
row = [v.strip() for v in lines[i].split(",")] | |
for j in range(len(heads)): | |
item = {} | |
for h, v in zip(heads, row): | |
if h != "Model" and h != "Link" and h != "Language Model" and h != "Open Source": | |
item[h] = float(v) | |
else: | |
item[h] = v | |
if add_hyperlink: | |
item["Model"] = model_hyperlink(item["Model"], item["Link"]) | |
rows.append(item) | |
return rows | |
def get_arena_table(model_table_df): | |
# sort by rating | |
model_table_df = model_table_df.sort_values(by=["Average Score"], ascending=False) | |
values = [] | |
for i in range(len(model_table_df)): | |
row = [] | |
model_key = model_table_df.index[i] | |
model_name = model_table_df["Model"].values[model_key] | |
# rank | |
row.append(i + 1) | |
# model display name | |
row.append(model_name) | |
# row.append( | |
# model_table_df["Language Model"].values[model_key] | |
# ) | |
row.append( | |
model_table_df["Open Source"].values[model_key] | |
) | |
row.append( | |
model_table_df["Text Recognition"].values[model_key] | |
) | |
row.append( | |
model_table_df["Text Referring"].values[model_key] | |
) | |
row.append( | |
model_table_df["Text Spotting"].values[model_key] | |
) | |
row.append( | |
model_table_df["Relation Extraction"].values[model_key] | |
) | |
row.append( | |
model_table_df["Element Parsing"].values[model_key] | |
) | |
row.append( | |
model_table_df["Mathematical Calculation"].values[model_key] | |
) | |
row.append( | |
model_table_df["Visual Text Understanding"].values[model_key] | |
) | |
row.append( | |
model_table_df["Knowledge Reasoning"].values[model_key] | |
) | |
row.append( | |
model_table_df["Average Score"].values[model_key] | |
) | |
values.append(row) | |
return values | |
def get_cn_table(model_table_df): | |
# sort by rating | |
model_table_df = model_table_df.sort_values(by=["Average Score"], ascending=False) | |
values = [] | |
for i in range(len(model_table_df)): | |
row = [] | |
model_key = model_table_df.index[i] | |
model_name = model_table_df["Model"].values[model_key] | |
# rank | |
row.append(i + 1) | |
# model display name | |
row.append(model_name) | |
row.append( | |
model_table_df["Open Source"].values[model_key] | |
) | |
row.append( | |
model_table_df["Text Recognition"].values[model_key] | |
) | |
row.append( | |
model_table_df["Relation Extraction"].values[model_key] | |
) | |
row.append( | |
model_table_df["Element Parsing"].values[model_key] | |
) | |
row.append( | |
model_table_df["Visual Text Understanding"].values[model_key] | |
) | |
row.append( | |
model_table_df["Knowledge Reasoning"].values[model_key] | |
) | |
row.append( | |
model_table_df["Average Score"].values[model_key] | |
) | |
values.append(row) | |
return values | |
def build_leaderboard_tab(leaderboard_table_file_en, leaderboard_table_file_cn, show_plot=False): | |
if leaderboard_table_file_en: | |
data_en = load_leaderboard_table_csv(leaderboard_table_file_en) | |
data_cn = load_leaderboard_table_csv(leaderboard_table_file_cn) | |
model_table_df_en = pd.DataFrame(data_en) | |
model_table_df_cn = pd.DataFrame(data_cn) | |
md_head = f""" | |
# π OCRBench v2 Leaderboard | |
| [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) | [Paper](https://arxiv.org/abs/2501.00321v1) | | |
""" | |
gr.Markdown(md_head, elem_id="leaderboard_markdown") | |
with gr.Tabs() as tabs: | |
# arena table | |
with gr.Tab("OCRBench v2 English subsets", id=0): | |
arena_table_vals = get_arena_table(model_table_df_en) | |
md = "OCRBench v2 is a large-scale bilingual text-centric benchmark with currently the most comprehensive set of tasks (4Γ more tasks than the previous multi-scene benchmark OCRBench), the widest coverage of scenarios (31 diverse scenarios including street scene, receipt, formula, diagram, and so on), and thorough evaluation metrics, with a total of 10, 000 human-verified question-answering pairs and a high proportion of difficult samples." | |
gr.Markdown(md, elem_id="leaderboard_markdown") | |
gr.Dataframe( | |
headers=[ | |
"Rank", | |
"Name", | |
"Open Source", | |
"Text Recognition", | |
"Text Referring", | |
"Text Spotting", | |
"Relation Extraction", | |
"Element Parsing", | |
"Mathematical Calculation", | |
"Visual Text Understanding", | |
"Knowledge Reasoning", | |
"Average Score", | |
], | |
datatype=[ | |
"str", | |
"markdown", | |
"str", | |
"number", | |
"number", | |
"number", | |
"number", | |
"number", | |
"number", | |
"number", | |
"number", | |
"number", | |
], | |
value=arena_table_vals, | |
elem_id="arena_leaderboard_dataframe", | |
column_widths=[90, 150, 120, 170, 150, 150, 150, 150, 170, 170, 150, 150], | |
wrap=True, | |
) | |
with gr.Tab("OCRBench v2 Chinese subsets", id=1): | |
arena_table_vals = get_cn_table(model_table_df_cn) | |
md = "OCRBench v2 is a large-scale bilingual text-centric benchmark with currently the most comprehensive set of tasks (4Γ more tasks than the previous multi-scene benchmark OCRBench), the widest coverage of scenarios (31 diverse scenarios including street scene, receipt, formula, diagram, and so on), and thorough evaluation metrics, with a total of 10, 000 human-verified question-answering pairs and a high proportion of difficult samples." | |
gr.Markdown(md, elem_id="leaderboard_markdown") | |
gr.Dataframe( | |
headers=[ | |
"Rank", | |
"Name", | |
"Open Source", | |
"Text Recognition", | |
"Relation Extraction", | |
"Element Parsing", | |
"Visual Text Understanding", | |
"Knowledge Reasoning", | |
"Average Score", | |
], | |
datatype=[ | |
"str", | |
"markdown", | |
"str", | |
"number", | |
"number", | |
"number", | |
"number", | |
"number", | |
"number", | |
], | |
value=arena_table_vals, | |
elem_id="arena_leaderboard_dataframe", | |
# height=700, | |
column_widths=[60, 120,100, 110, 110, 110, 110, 110, 80], | |
wrap=True, | |
) | |
else: | |
pass | |
md_tail = f""" | |
# Notice | |
Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response. | |
If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) and feel free to contact us via email at [email protected]. We will update the leaderboard in time.""" | |
gr.Markdown(md_tail, elem_id="leaderboard_markdown") | |
def build_demo(leaderboard_table_file_en, leaderboard_table_file_cn): | |
text_size = gr.themes.sizes.text_lg | |
with gr.Blocks( | |
title="OCRBench Leaderboard", | |
theme=gr.themes.Base(text_size=text_size), | |
css=block_css, | |
) as demo: | |
leader_components = build_leaderboard_tab( | |
leaderboard_table_file_en, leaderboard_table_file_cn, show_plot=True | |
) | |
return demo | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--share", action="store_true") | |
parser.add_argument("--OCRBench_file_en", type=str, default="./OCRBench_en.csv") | |
parser.add_argument("--OCRBench_file_cn", type=str, default="./OCRBench_cn.csv") | |
args = parser.parse_args() | |
demo = build_demo(args.OCRBench_file_en, args.OCRBench_file_cn) | |
demo.launch() |