ling99 commited on
Commit
f9b7238
Β·
verified Β·
1 Parent(s): 7a5b093

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -20
app.py CHANGED
@@ -164,23 +164,23 @@ def get_cn_table(model_table_df):
164
  values.append(row)
165
  return values
166
 
167
- def build_leaderboard_tab(leaderboard_table_file, leaderboard_table_file_2, show_plot=False):
168
  if leaderboard_table_file:
169
- data = load_leaderboard_table_csv(leaderboard_table_file)
170
- data_2 = load_leaderboard_table_csv(leaderboard_table_file_2)
171
 
172
- model_table_df = pd.DataFrame(data)
173
- model_table_df_2 = pd.DataFrame(data_2)
174
  md_head = f"""
175
  # πŸ† OCRBench v2 Leaderboard
176
- | [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) | [Paper](https://arxiv.org/abs/2305.07895) |
177
  """
178
  gr.Markdown(md_head, elem_id="leaderboard_markdown")
179
  with gr.Tabs() as tabs:
180
  # arena table
181
- with gr.Tab("OCRBench v2", id=0):
182
- arena_table_vals = get_arena_table(model_table_df)
183
- md = "OCRBench v2 is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation."
184
  gr.Markdown(md, elem_id="leaderboard_markdown")
185
  gr.Dataframe(
186
  headers=[
@@ -213,12 +213,12 @@ def build_leaderboard_tab(leaderboard_table_file, leaderboard_table_file_2, show
213
  ],
214
  value=arena_table_vals,
215
  elem_id="arena_leaderboard_dataframe",
216
- column_widths=[90, 150, 120, 150, 150, 150, 150, 150, 170, 170, 150, 150],
217
  wrap=True,
218
  )
219
- with gr.Tab("OCRBench v2 cn", id=1):
220
- arena_table_vals = get_cn_table(model_table_df_2)
221
- md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation."
222
  gr.Markdown(md, elem_id="leaderboard_markdown")
223
  gr.Dataframe(
224
  headers=[
@@ -253,11 +253,11 @@ def build_leaderboard_tab(leaderboard_table_file, leaderboard_table_file_2, show
253
  pass
254
  md_tail = f"""
255
  # Notice
256
- Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response. It is important to note that due to rigorous security reviews by OpenAI, GPT4V refuses to provide results for the 84 samples in OCRBench.
257
- If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR), [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) or [lmms-eval](https://github.com/EvolvingLMMs-Lab/lmms-eval) and feel free to contact us via email at [email protected]. We will update the leaderboard in time."""
258
  gr.Markdown(md_tail, elem_id="leaderboard_markdown")
259
 
260
- def build_demo(leaderboard_table_file, leaderboard_table_file_2):
261
  text_size = gr.themes.sizes.text_lg
262
 
263
  with gr.Blocks(
@@ -266,16 +266,16 @@ def build_demo(leaderboard_table_file, leaderboard_table_file_2):
266
  css=block_css,
267
  ) as demo:
268
  leader_components = build_leaderboard_tab(
269
- leaderboard_table_file, leaderboard_table_file_2, show_plot=True
270
  )
271
  return demo
272
 
273
  if __name__ == "__main__":
274
  parser = argparse.ArgumentParser()
275
  parser.add_argument("--share", action="store_true")
276
- parser.add_argument("--OCRBench_file", type=str, default="./OCRBench_en.csv")
277
- parser.add_argument("--OCRBench_file_2", type=str, default="./OCRBench_cn.csv")
278
  args = parser.parse_args()
279
 
280
- demo = build_demo(args.OCRBench_file, args.OCRBench_file_2)
281
  demo.launch()
 
164
  values.append(row)
165
  return values
166
 
167
+ def build_leaderboard_tab(leaderboard_table_file_en, leaderboard_table_file_cn, show_plot=False):
168
  if leaderboard_table_file:
169
+ data_en = load_leaderboard_table_csv(leaderboard_table_file_en)
170
+ data_cn = load_leaderboard_table_csv(leaderboard_table_file_cn)
171
 
172
+ model_table_df_en = pd.DataFrame(data_en)
173
+ model_table_df_cn = pd.DataFrame(data_cn)
174
  md_head = f"""
175
  # πŸ† OCRBench v2 Leaderboard
176
+ | [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) |
177
  """
178
  gr.Markdown(md_head, elem_id="leaderboard_markdown")
179
  with gr.Tabs() as tabs:
180
  # arena table
181
+ with gr.Tab("OCRBench v2 English subset", id=0):
182
+ arena_table_vals = get_arena_table(model_table_df_en)
183
+ md = "OCRBench v2 is a large-scale bilingual text-centric benchmark with currently the most comprehensive set of tasks (4Γ— more tasks than the previous multi-scene benchmark OCRBench), the widest coverage of scenarios (31 diverse scenarios including street scene, receipt, formula, diagram, and so on), and thorough evaluation metrics, with a total of 10, 000 human-verified question-answering pairs and a high proportion of difficult samples."
184
  gr.Markdown(md, elem_id="leaderboard_markdown")
185
  gr.Dataframe(
186
  headers=[
 
213
  ],
214
  value=arena_table_vals,
215
  elem_id="arena_leaderboard_dataframe",
216
+ column_widths=[90, 150, 120, 170, 150, 150, 150, 150, 170, 170, 150, 150],
217
  wrap=True,
218
  )
219
+ with gr.Tab("OCRBench v2 Chinese subsets", id=1):
220
+ arena_table_vals = get_cn_table(model_table_df_cn)
221
+ md = "OCRBench v2 is a large-scale bilingual text-centric benchmark with currently the most comprehensive set of tasks (4Γ— more tasks than the previous multi-scene benchmark OCRBench), the widest coverage of scenarios (31 diverse scenarios including street scene, receipt, formula, diagram, and so on), and thorough evaluation metrics, with a total of 10, 000 human-verified question-answering pairs and a high proportion of difficult samples."
222
  gr.Markdown(md, elem_id="leaderboard_markdown")
223
  gr.Dataframe(
224
  headers=[
 
253
  pass
254
  md_tail = f"""
255
  # Notice
256
+ Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response.
257
+ If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) and feel free to contact us via email at [email protected]. We will update the leaderboard in time."""
258
  gr.Markdown(md_tail, elem_id="leaderboard_markdown")
259
 
260
+ def build_demo(leaderboard_table_file_en, leaderboard_table_file_cn):
261
  text_size = gr.themes.sizes.text_lg
262
 
263
  with gr.Blocks(
 
266
  css=block_css,
267
  ) as demo:
268
  leader_components = build_leaderboard_tab(
269
+ leaderboard_table_file_en, leaderboard_table_file_cn, show_plot=True
270
  )
271
  return demo
272
 
273
  if __name__ == "__main__":
274
  parser = argparse.ArgumentParser()
275
  parser.add_argument("--share", action="store_true")
276
+ parser.add_argument("--OCRBench_file_en", type=str, default="./OCRBench_en.csv")
277
+ parser.add_argument("--OCRBench_file_cn", type=str, default="./OCRBench_cn.csv")
278
  args = parser.parse_args()
279
 
280
+ demo = build_demo(args.OCRBench_file_en, args.OCRBench_file_cn)
281
  demo.launch()