import ast import argparse import glob import pickle import gradio as gr import numpy as np import pandas as pd block_css = """ #notice_markdown { font-size: 104% } #notice_markdown th { display: none; } #notice_markdown td { padding-top: 6px; padding-bottom: 6px; } #leaderboard_markdown { font-size: 104% } #leaderboard_markdown td { padding-top: 6px; padding-bottom: 6px; } #leaderboard_dataframe td { line-height: 0.1em; font-size: 8px; } footer { display:none !important } .image-container { display: flex; align-items: center; padding: 1px; } .image-container img { margin: 0 30px; height: 20px; max-height: 100%; width: auto; max-width: 20%; } """ def model_hyperlink(model_name, link): return f'{model_name}' def load_leaderboard_table_csv(filename, add_hyperlink=True): lines = open(filename).readlines() heads = [v.strip() for v in lines[0].split(",")] rows = [] for i in range(1, len(lines)): row = [v.strip() for v in lines[i].split(",")] for j in range(len(heads)): item = {} for h, v in zip(heads, row): if h != "Model" and h != "Link" and h != "Language Model" and h != "Open Source": item[h] = float(v) else: item[h] = v if add_hyperlink: item["Model"] = model_hyperlink(item["Model"], item["Link"]) rows.append(item) return rows def get_arena_table(model_table_df): # sort by rating model_table_df = model_table_df.sort_values(by=["Average Score"], ascending=False) values = [] for i in range(len(model_table_df)): row = [] model_key = model_table_df.index[i] model_name = model_table_df["Model"].values[model_key] # rank row.append(i + 1) # model display name row.append(model_name) # row.append( # model_table_df["Language Model"].values[model_key] # ) row.append( model_table_df["Open Source"].values[model_key] ) row.append( model_table_df["Text Recognition"].values[model_key] ) row.append( model_table_df["Text Referring"].values[model_key] ) row.append( model_table_df["Text Spotting"].values[model_key] ) row.append( model_table_df["Relation Extraction"].values[model_key] ) row.append( model_table_df["Element Parsing"].values[model_key] ) row.append( model_table_df["Mathematical Calculation"].values[model_key] ) row.append( model_table_df["Visual Text Understanding"].values[model_key] ) row.append( model_table_df["Knowledge Reasoning"].values[model_key] ) row.append( model_table_df["Average Score"].values[model_key] ) values.append(row) return values def get_cn_table(model_table_df): # sort by rating model_table_df = model_table_df.sort_values(by=["Average Score"], ascending=False) values = [] for i in range(len(model_table_df)): row = [] model_key = model_table_df.index[i] model_name = model_table_df["Model"].values[model_key] # rank row.append(i + 1) # model display name row.append(model_name) row.append( model_table_df["Open Source"].values[model_key] ) row.append( model_table_df["Text Recognition"].values[model_key] ) row.append( model_table_df["Relation Extraction"].values[model_key] ) row.append( model_table_df["Element Parsing"].values[model_key] ) row.append( model_table_df["Visual Text Understanding"].values[model_key] ) row.append( model_table_df["Knowledge Reasoning"].values[model_key] ) row.append( model_table_df["Average Score"].values[model_key] ) values.append(row) return values def build_leaderboard_tab(leaderboard_table_file_en, leaderboard_table_file_cn, show_plot=False): if leaderboard_table_file_en: data_en = load_leaderboard_table_csv(leaderboard_table_file_en) data_cn = load_leaderboard_table_csv(leaderboard_table_file_cn) model_table_df_en = pd.DataFrame(data_en) model_table_df_cn = pd.DataFrame(data_cn) md_head = f""" # 🏆 OCRBench v2 Leaderboard | [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) | [Paper](https://arxiv.org/abs/2501.00321v1) | """ gr.Markdown(md_head, elem_id="leaderboard_markdown") with gr.Tabs() as tabs: # arena table with gr.Tab("OCRBench v2 English subsets", id=0): arena_table_vals = get_arena_table(model_table_df_en) md = "OCRBench v2 is a large-scale bilingual text-centric benchmark with currently the most comprehensive set of tasks (4× more tasks than the previous multi-scene benchmark OCRBench), the widest coverage of scenarios (31 diverse scenarios including street scene, receipt, formula, diagram, and so on), and thorough evaluation metrics, with a total of 10, 000 human-verified question-answering pairs and a high proportion of difficult samples." gr.Markdown(md, elem_id="leaderboard_markdown") gr.Dataframe( headers=[ "Rank", "Name", "Open Source", "Text Recognition", "Text Referring", "Text Spotting", "Relation Extraction", "Element Parsing", "Mathematical Calculation", "Visual Text Understanding", "Knowledge Reasoning", "Average Score", ], datatype=[ "str", "markdown", "str", "number", "number", "number", "number", "number", "number", "number", "number", "number", ], value=arena_table_vals, elem_id="arena_leaderboard_dataframe", column_widths=[90, 150, 120, 170, 150, 150, 150, 150, 170, 170, 150, 150], wrap=True, ) with gr.Tab("OCRBench v2 Chinese subsets", id=1): arena_table_vals = get_cn_table(model_table_df_cn) md = "OCRBench v2 is a large-scale bilingual text-centric benchmark with currently the most comprehensive set of tasks (4× more tasks than the previous multi-scene benchmark OCRBench), the widest coverage of scenarios (31 diverse scenarios including street scene, receipt, formula, diagram, and so on), and thorough evaluation metrics, with a total of 10, 000 human-verified question-answering pairs and a high proportion of difficult samples." gr.Markdown(md, elem_id="leaderboard_markdown") gr.Dataframe( headers=[ "Rank", "Name", "Open Source", "Text Recognition", "Relation Extraction", "Element Parsing", "Visual Text Understanding", "Knowledge Reasoning", "Average Score", ], datatype=[ "str", "markdown", "str", "number", "number", "number", "number", "number", "number", ], value=arena_table_vals, elem_id="arena_leaderboard_dataframe", # height=700, column_widths=[60, 120,100, 110, 110, 110, 110, 110, 80], wrap=True, ) else: pass md_tail = f""" # Notice Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response. If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) and feel free to contact us via email at ling_fu@hust.edu.cn. We will update the leaderboard in time.""" gr.Markdown(md_tail, elem_id="leaderboard_markdown") def build_demo(leaderboard_table_file_en, leaderboard_table_file_cn): text_size = gr.themes.sizes.text_lg with gr.Blocks( title="OCRBench Leaderboard", theme=gr.themes.Base(text_size=text_size), css=block_css, ) as demo: leader_components = build_leaderboard_tab( leaderboard_table_file_en, leaderboard_table_file_cn, show_plot=True ) return demo if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--share", action="store_true") parser.add_argument("--OCRBench_file_en", type=str, default="./OCRBench_en.csv") parser.add_argument("--OCRBench_file_cn", type=str, default="./OCRBench_cn.csv") args = parser.parse_args() demo = build_demo(args.OCRBench_file_en, args.OCRBench_file_cn) demo.launch()