Kaiyue commited on
Commit
ced0ab7
·
verified ·
1 Parent(s): 268c082

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +441 -0
  2. constants.py +100 -0
  3. draw_sub_dimension.py +359 -0
app.py ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
2
+ import os
3
+ import io
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import datetime
7
+ import zipfile
8
+ import numpy as np
9
+
10
+ from constants import *
11
+ from draw_sub_dimension import *
12
+ from huggingface_hub import Repository
13
+
14
+ HF_TOKEN = os.environ.get("HF_TOKEN")
15
+
16
+ global data_component, filter_component
17
+
18
+
19
+ def add_new_eval(
20
+ input_file,
21
+ model_name_textbox: str, # required
22
+ revision_name_textbox: str,
23
+ access_type: str,
24
+ model_link: str, # required
25
+ team_name: str,
26
+ contact_email: str, # required
27
+ model_publish: str,
28
+ model_resolution: str,
29
+ model_frame: str,
30
+ model_fps: str,
31
+ model_video_length: str,
32
+ model_checkpoint: str,
33
+ model_commit_id: str,
34
+ model_video_format: str
35
+ ):
36
+ if input_file is None:
37
+ return "Error! Empty file!"
38
+
39
+ if model_link == '' or model_name_textbox == '' or contact_email == '':
40
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
41
+
42
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
43
+ submission_repo.git_pull()
44
+
45
+
46
+ now = datetime.datetime.now()
47
+ upload_date = now.strftime("%Y-%m-%d") # Capture update time
48
+ upload_time = now.strftime("%Y-%m-%d_%H-%M-%S")
49
+ filename = f"{model_name_textbox}_{upload_time}"
50
+
51
+ with open(f'{SUBMISSION_NAME}/{filename}.zip','wb') as f:
52
+ f.write(input_file)
53
+
54
+
55
+ csv_data = pd.read_csv(CSV_PATH)
56
+
57
+ if revision_name_textbox == '':
58
+ col = csv_data.shape[0]
59
+ model_name = model_name_textbox.replace(',',' ')
60
+ else:
61
+ model_name = revision_name_textbox.replace(',',' ')
62
+ model_name_list = csv_data['Model Name (clickable)']
63
+ name_list = [name.split(']')[0][1:] for name in model_name_list]
64
+ if revision_name_textbox not in name_list:
65
+ col = csv_data.shape[0]
66
+ else:
67
+ col = name_list.index(revision_name_textbox)
68
+
69
+ model_name = '[' + model_name + '](' + model_link + ')'
70
+
71
+ folder = f'{SUBMISSION_NAME}/{filename}'
72
+ os.makedirs(folder, exist_ok=True)
73
+ with zipfile.ZipFile(io.BytesIO(input_file), 'r') as zip_ref:
74
+ zip_ref.extractall(folder)
75
+
76
+ required_files = [
77
+ "_consistent_attr_score.csv",
78
+ "_dynamic_attr_score.csv",
79
+ "_spatial_score.csv",
80
+ "_motion_score.csv",
81
+ "_motion_back_fore.csv",
82
+ "_action_binding_score.csv",
83
+ "_object_interactions_score.csv",
84
+ "_numeracy_video.csv",
85
+ ]
86
+
87
+ score_1 = score_2 = score_3 = score_4 = score_5 = score_6 = score_7 = "N/A"
88
+ color_score = shape_score = texture_score = coexist = acc = acc_score = "N/A"
89
+ motion_level = motion_acc = common_score = uncommon_score = physical_score = social_score = "N/A"
90
+
91
+ for i,suffix in enumerate(required_files):
92
+ for sub_folder in os.listdir(folder):
93
+ if sub_folder.startswith('.') or sub_folder.startswith('__'):
94
+ print(f"Skip the file: {sub_folder}")
95
+ continue
96
+
97
+ cur_sub_folder = os.path.join(folder, sub_folder) #user_upload_zip_name
98
+ if os.path.isdir(cur_sub_folder):
99
+ for file in os.listdir(cur_sub_folder):
100
+ if file.endswith(suffix):
101
+ print("FILE exist",file)
102
+ filepath = os.path.join(cur_sub_folder,file)
103
+ if i==0:
104
+ score_1 = read_score(filepath)
105
+ color_score, shape_score, texture_score = sub_consist_attr(filepath)
106
+ elif i==1:
107
+ score_2 = read_score(filepath)
108
+ elif i==2:
109
+ score_3 = read_score(filepath)
110
+ coexist, acc, acc_score = sub_spatial(filepath)
111
+ elif i==3:
112
+ score_4 = read_score(filepath)
113
+ elif i==4:
114
+ motion_level, motion_acc = sub_motion(filepath)
115
+ elif i==5:
116
+ score_5 = read_score(filepath)
117
+ common_score,uncommon_score = sub_action(filepath)
118
+ elif i==6:
119
+ score_6 = read_score(filepath)
120
+ physical_score, social_score = sub_interaction(filepath)
121
+ elif i==7:
122
+ score_7 = read_score(filepath)
123
+
124
+ # add new data
125
+ if team_name =='' or 'compbench' in team_name.lower():
126
+ evaluate_team = ("User Upload")
127
+ else:
128
+ evaluate_team = team_name
129
+
130
+ new_data = [model_name,evaluate_team,upload_date,score_1,score_2,score_3,score_4,score_5,score_6,score_7,color_score, shape_score, texture_score,coexist, acc, acc_score,motion_level, motion_acc,common_score,uncommon_score,physical_score, social_score]
131
+ print(new_data)
132
+
133
+ csv_data.loc[col] = new_data
134
+ csv_data = csv_data.to_csv(CSV_PATH, index=False)
135
+
136
+ new_info = [model_name,upload_time,team_name,model_publish,model_resolution,model_frame,model_fps,model_video_length,model_checkpoint,model_commit_id,model_video_format,access_type,contact_email,model_link]
137
+ with open(INFO_PATH, mode='a', newline='') as csvfile:
138
+ writer = csv.writer(csvfile)
139
+ writer.writerow(new_info)
140
+
141
+ submission_repo.push_to_hub()
142
+
143
+ print("success update", model_name)
144
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
145
+
146
+
147
+ def calculate_selected_score(df, selected_columns):
148
+ selected_task = [i for i in selected_columns if i in TASK_INFO]
149
+
150
+ selected_task_score = df[selected_task].mean(axis=1, skipna=True)
151
+ if selected_task_score.isna().any().any():
152
+ return selected_task_score.fillna(0.0)
153
+ return selected_task_score.fillna(0.0)
154
+
155
+ def get_final_score(df, selected_columns):
156
+ df[TASK_INFO] = df[TASK_INFO].replace("N/A", np.nan)
157
+ df[TASK_INFO] = df[TASK_INFO].apply(pd.to_numeric, errors='coerce')
158
+ final_score = df[TASK_INFO].mean(axis=1, skipna=True)
159
+ final_score = round(final_score,4)
160
+
161
+ if 'Total Avg. Score' in df:
162
+ df['Total Avg. Score'] = final_score
163
+ else:
164
+ df.insert(1, 'Total Avg. Score', final_score)
165
+
166
+ selected_score = calculate_selected_score(df, selected_columns)
167
+ selected_score = round(selected_score,4)
168
+
169
+ if 'Selected Avg. Score' in df:
170
+ df['Selected Avg. Score'] = selected_score
171
+ else:
172
+ df.insert(1, 'Selected Avg. Score', selected_score)
173
+ return df
174
+
175
+
176
+ def get_baseline_df():
177
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
178
+ submission_repo.git_pull()
179
+ df = pd.read_csv(CSV_PATH)
180
+ df = get_final_score(df, checkbox_group.value)
181
+ df = df.sort_values(by="Selected Avg. Score", ascending=False)
182
+ present_columns = MODEL_INFO + checkbox_group.value
183
+ df = df[present_columns]
184
+ df = df[df['Evaluated by'] == 'T2V-CompBench Team']
185
+ return df
186
+
187
+ def get_baseline_df_sub():
188
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
189
+ submission_repo.git_pull()
190
+ df = pd.read_csv(CSV_PATH)
191
+ df = get_final_score(df, checkbox_group.value)
192
+ df = df.sort_values(by="Selected Avg. Score", ascending=False)
193
+
194
+ present_columns = MODEL_INFO[:-2] + SUB_TASK_INFO + MODEL_INFO[-2:]
195
+ print(present_columns)
196
+
197
+ df = df[present_columns]
198
+ df = df[df['Evaluated by'] == 'T2V-CompBench Team']
199
+ return df
200
+
201
+
202
+
203
+ def get_all_df(selected_columns, csv=CSV_PATH):
204
+ df = pd.read_csv(csv)
205
+ df = get_final_score(df, selected_columns)
206
+ df = df.sort_values(by="Selected Avg. Score", ascending=False)
207
+ return df
208
+
209
+
210
+ # select function
211
+ def category_checkbox_change(selected_columns, only_compbench_team):
212
+
213
+ updated_data = get_all_df(selected_columns, CSV_PATH)
214
+ if only_compbench_team:
215
+ updated_data = updated_data[updated_data['Evaluated by'] == 'T2V-CompBench Team']
216
+
217
+ # columns:
218
+ selected_columns = [item for item in TASK_INFO if item in selected_columns]
219
+ present_columns = MODEL_INFO + selected_columns
220
+ updated_data = updated_data[present_columns]
221
+ updated_data = updated_data.sort_values(by="Selected Avg. Score", ascending=False)
222
+
223
+ updated_headers = present_columns
224
+ update_datatype = [DATA_TITLE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
225
+
226
+ filter_component = gr.components.Dataframe(
227
+ value=updated_data,
228
+ headers=updated_headers,
229
+ type="pandas",
230
+ datatype=update_datatype,
231
+ interactive=False,
232
+ visible=True,
233
+ )
234
+ return filter_component
235
+
236
+ def category_checkbox_change_sub(selected_columns, selected_columns_sub,only_compbench_team):
237
+ updated_data = get_all_df(selected_columns, CSV_PATH)
238
+ if only_compbench_team:
239
+ updated_data = updated_data[updated_data['Evaluated by'] == 'T2V-CompBench Team']
240
+
241
+ # columns:
242
+ selected_columns = [item for item in SUB_TASK_INFO if item in selected_columns_sub]
243
+ present_columns = MODEL_INFO[:-2] + selected_columns + MODEL_INFO[-2:]
244
+ updated_data = updated_data[present_columns]
245
+ updated_data = updated_data.sort_values(by="Selected Avg. Score", ascending=False)
246
+
247
+ updated_headers = present_columns
248
+ update_datatype = [SUB_DATA_TITLE_TYPE[SUB_COLUMN_NAMES.index(x)] for x in updated_headers]
249
+
250
+ filter_component = gr.components.Dataframe(
251
+ value=updated_data,
252
+ headers=updated_headers,
253
+ type="pandas",
254
+ datatype=update_datatype,
255
+ interactive=False,
256
+ visible=True,
257
+ )
258
+ return filter_component
259
+
260
+
261
+ block = gr.Blocks()
262
+
263
+ with block:
264
+ gr.Markdown(
265
+ LEADERBOARD_INTRODUCTION
266
+ )
267
+ gr.HTML(
268
+ LEADERBOARD_INTRODUCTION_HTML
269
+ )
270
+ gr.Markdown(
271
+ LEADERBOARD_INTRODUCTION_2
272
+ )
273
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
274
+ # Table 1
275
+ with gr.TabItem("📊 T2V-CompBench", elem_id="compbench-tab-table", id=1):
276
+ with gr.Row():
277
+ with gr.Accordion("Citation", open=False):
278
+ citation_button = gr.Textbox(
279
+ value=CITATION_BUTTON_TEXT,
280
+ label=CITATION_BUTTON_LABEL,
281
+ elem_id="citation-button",
282
+ lines=14,
283
+ )
284
+
285
+
286
+ with gr.Row():
287
+ compbench_team_filter = gr.Checkbox(
288
+ label="Evaluated by T2V-CompBench Team (Uncheck to view all submissions)",
289
+ value=True,
290
+ interactive=True
291
+ )
292
+
293
+ with gr.Row():
294
+ # selection for column part:
295
+ checkbox_group = gr.CheckboxGroup(
296
+ choices=TASK_INFO,
297
+ value=TASK_INFO,
298
+ label="Evaluation Category",
299
+ interactive=True,
300
+ )
301
+
302
+ data_component = gr.components.Dataframe(
303
+ value=get_baseline_df,
304
+ headers=COLUMN_NAMES,
305
+ type="pandas",
306
+ datatype=DATA_TITLE_TYPE,
307
+ interactive=False,
308
+ visible=True,
309
+ )
310
+
311
+ checkbox_group.change(fn=category_checkbox_change, inputs=[checkbox_group, compbench_team_filter], outputs=data_component)
312
+ compbench_team_filter.change(fn=category_checkbox_change, inputs=[checkbox_group, compbench_team_filter], outputs=data_component)
313
+
314
+ # Table 2
315
+ with gr.TabItem("🗂️ Sub-Dimension", elem_id="compbench-tab-table", id=2):
316
+ with gr.Row():
317
+ with gr.Accordion("Citation", open=False):
318
+ citation_button = gr.Textbox(
319
+ value=CITATION_BUTTON_TEXT,
320
+ label=CITATION_BUTTON_LABEL,
321
+ elem_id="citation-button",
322
+ lines=14,
323
+ )
324
+ with gr.Row():
325
+ compbench_team_filter_sub = gr.Checkbox(
326
+ label="Evaluated by T2V-CompBench Team (Uncheck to view all submissions)",
327
+ value=True,
328
+ interactive=True
329
+ )
330
+ with gr.Row():
331
+ # selection for column part:
332
+ checkbox_group_sub = gr.CheckboxGroup(
333
+ choices=SUB_TASK_INFO,
334
+ value=SUB_TASK_INFO,
335
+ label="Evaluation Sub-Dimensions",
336
+ interactive=True,
337
+ )
338
+
339
+ data_component_sub = gr.components.Dataframe(
340
+ value=get_baseline_df_sub,
341
+ headers=SUB_COLUMN_NAMES,
342
+ type="pandas",
343
+ datatype=SUB_DATA_TITLE_TYPE,
344
+ interactive=False,
345
+ visible=True,
346
+ )
347
+
348
+ checkbox_group_sub.change(fn=category_checkbox_change_sub, inputs=[checkbox_group,checkbox_group_sub, compbench_team_filter_sub], outputs=data_component_sub)
349
+ compbench_team_filter_sub.change(fn=category_checkbox_change_sub, inputs=[checkbox_group,checkbox_group_sub, compbench_team_filter_sub], outputs=data_component_sub)
350
+
351
+
352
+
353
+ # Table 3
354
+ with gr.TabItem("📝 About", elem_id="compbench-tab-table", id=3):
355
+ gr.Markdown(LEADERBOARD_INFO, elem_classes="markdown-text")
356
+
357
+ # Table 4: table submission
358
+ with gr.TabItem("🚀 Submit here! ", elem_id="compbench-tab-table", id=4):
359
+
360
+ with gr.Row():
361
+ gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
362
+
363
+ with gr.Row():
364
+ gr.Markdown("# ✉️✨ Submit your model evaluation CSV files here!", elem_classes="markdown-text")
365
+
366
+ with gr.Row():
367
+ gr.Markdown("Here is a required field", elem_classes="markdown-text")
368
+ with gr.Row():
369
+ with gr.Column():
370
+ model_name_textbox = gr.Textbox(
371
+ label="Model Name", placeholder="Required field"
372
+ )
373
+ revision_name_textbox = gr.Textbox(
374
+ label="Revision Model Name(Optional)", placeholder="If you need to update the previous results, please fill in this line"
375
+ )
376
+ access_type = gr.Dropdown(choices=["Open Source", "Ready to Open Source", "API", "Close"], value=None,label="Please select the way user can access your model. You can update the content by revision_name, or contact the T2V-CompBench Team.")
377
+
378
+
379
+ with gr.Column():
380
+ model_link = gr.Textbox(
381
+ label="Project Page/Paper Link/Github/HuggingFace Repo", placeholder="Required field. If filling in the wrong information, your results may be removed."
382
+ )
383
+ team_name = gr.Textbox(
384
+ label="Your Team Name(If left blank, it will be user upload)", placeholder="User Upload"
385
+ )
386
+ contact_email = gr.Textbox(
387
+ label="E-Mail(Will not be displayed)", placeholder="Required field"
388
+ )
389
+ with gr.Row():
390
+ gr.Markdown("The following is optional and will be synced to [GitHub] (https://t2v-compbench.github.io/)", elem_classes="markdown-text")
391
+ with gr.Row():
392
+ model_publish = gr.Textbox(label="Time of Publish", placeholder="1970-01-01")
393
+ model_resolution = gr.Textbox(label="Resolution", placeholder="width x height")
394
+ model_frame = gr.Textbox(label="Frame Count", placeholder="int")
395
+ model_fps = gr.Textbox(label="FPS", placeholder="int")
396
+ model_video_length = gr.Textbox(label="Video Duration(s)", placeholder="float(2.0)")
397
+ model_checkpoint = gr.Textbox(label="Model Checkpoint", placeholder="optional")
398
+ model_commit_id = gr.Textbox(label="Github commit id", placeholder='optional')
399
+ model_video_format = gr.Textbox(label="Video Format", placeholder='mp4/gif')
400
+ with gr.Column():
401
+ input_file = gr.components.File(label = "Click to Upload a ZIP File", file_count="single", type='binary')
402
+ submit_button = gr.Button("Submit Eval!")
403
+ submit_succ_button = gr.Markdown("Submit Success! Please press refresh and return to LeaderBoard!", visible=False)
404
+ fail_textbox = gr.Markdown(':bangbang:Please ensure that the `Model Name`, `Project Page`, and `Email` are filled in correctly.',visible=False)
405
+
406
+
407
+ submission_result = gr.Markdown()
408
+ submit_button.click(
409
+ add_new_eval,
410
+ inputs = [
411
+ input_file,
412
+ model_name_textbox,
413
+ revision_name_textbox,
414
+ access_type,
415
+ model_link,
416
+ team_name,
417
+ contact_email,
418
+
419
+ model_publish,
420
+ model_resolution,
421
+ model_frame,
422
+ model_fps,
423
+ model_video_length,
424
+ model_checkpoint,
425
+ model_commit_id,
426
+ model_video_format
427
+ ],
428
+ outputs=[submit_button, submit_succ_button, fail_textbox]
429
+ )
430
+
431
+
432
+ def refresh_data():
433
+ value1 = get_baseline_df()
434
+ return value1
435
+
436
+ with gr.Row():
437
+ data_run = gr.Button("Refresh")
438
+ data_run.click(category_checkbox_change, inputs=[checkbox_group, compbench_team_filter], outputs=data_component)
439
+
440
+
441
+ block.launch()
constants.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # this is .py for store constants
3
+ MODEL_INFO = [
4
+ "Model Name (clickable)",
5
+ "Evaluated by",
6
+ "Date",
7
+ "Total Avg. Score",
8
+ "Selected Avg. Score",
9
+ ]
10
+
11
+ TASK_INFO = [
12
+ "Consistent Attribute Binding",
13
+ "Dynamic Attribute Binding",
14
+ "Spatial Relationships",
15
+ "Motion Binding",
16
+ "Action Binding",
17
+ "Object Interactions",
18
+ "Generative Numeracy",
19
+ ]
20
+
21
+ SUB_TASK_INFO = [
22
+ "Consistent Attribute Binding-Color",
23
+ "Consistent Attribute Binding-Shape",
24
+ "Consistent Attribute Binding-Texture",
25
+ "2D Spatial Relationships-Coexist",
26
+ "2D Spatial Relationships-Acc.",
27
+ "2D Spatial Relationships-Acc.Score",
28
+ "Motion Binding-Motion Level",
29
+ "Motion Binding-Acc.",
30
+ "Action Binding-Common",
31
+ "Action Binding-Uncommon",
32
+ "Object Interactions-Physical",
33
+ "Object Interactions-Social",
34
+ ]
35
+
36
+
37
+
38
+ SUBMISSION_NAME = "T2V-CompBench_leaderboard_submission"
39
+ SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Kaiyue/", SUBMISSION_NAME)
40
+ CSV_PATH = "./T2V-CompBench_leaderboard_submission/results.csv"
41
+ INFO_PATH = "./T2V-CompBench_leaderboard_submission/model_info.csv"
42
+
43
+
44
+
45
+ COLUMN_NAMES = MODEL_INFO + TASK_INFO
46
+ DATA_TITLE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number','number']
47
+
48
+ SUB_COLUMN_NAMES = MODEL_INFO + SUB_TASK_INFO
49
+ SUB_DATA_TITLE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number','number', 'number','number', 'number','number', 'number']
50
+
51
+
52
+ LEADERBOARD_INTRODUCTION = """# T2V-CompBench Leaderboard
53
+
54
+ 🏆 Welcome to the leaderboard of the **T2V-CompBench**! 🎦 *A Comprehensive Benchmark for Compositional Text-to-video Generation* """
55
+
56
+ LEADERBOARD_INTRODUCTION_HTML = """
57
+ <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;">
58
+ <a href='https://github.com/KaiyueSun98/T2V-CompBench'><img src='https://img.shields.io/github/stars/KaiyueSun98/T2V-CompBench.svg?style=social&label=Official'></a>
59
+ <a href='https://arxiv.org/abs/2407.14505'><img src='https://img.shields.io/badge/cs.CV-Paper-b31b1b?logo=arxiv&logoColor=red'></a>
60
+ <a href="https://t2v-compbench.github.io/"><img src="https://img.shields.io/badge/Project-Page-Green"></a>
61
+ </div>
62
+ """
63
+ LEADERBOARD_INTRODUCTION_2 = """
64
+ - **1400 Prompts**: We analyze *1.67 million* real-user prompts to extract high-frequency nouns, verbs, and adjectives, resulting in a suite of 1,400 prompts.
65
+ - **7 Compositional Categories:** We evaluate multiple-object compositionality on attributes, actions, interactions, quantities, and spatio-temporal dynamics, covering 7 categories.
66
+ - **Evaluation metrics**: We design MLLM-based, Detection-based, and Tracking-based evaluation metrics for compositional T2V generation, all validated by human evaluations.
67
+ - **Valuable Insights:** We provide insightful analysis on current models' ability, highlighting the significant challenge of compositional T2V generation.
68
+
69
+ **Join Leaderboard**: Please see the [instructions](https://t2v-compbench.github.io/) for 3 options to participate. One option is to follow [T2V-CompBench Usage info](https://t2v-compbench.github.io/), and upload the generated `.csv` files here. After clicking the `Submit Eval!` button, click the `Refresh` button.
70
+
71
+ **Model Information**: What are the details of these Video Generation Models? See [HERE](https://t2v-compbench.github.io/)
72
+ """
73
+
74
+ SUBMIT_INTRODUCTION = """# Submit on T2V-CompBench Introduction
75
+ ## 📮
76
+ 1. Please note that you need to obtain a list of `.csv` files by running T2V-CompBench in Github. You may conduct an [Offline Check](https://t2v-compbench.github.io/) before uploading.
77
+ 2. Then, pack these CSV files into a `ZIP` archive, ensuring that the top-level directory of the ZIP contains the individual CSV files.
78
+ 3. Finally, upload the ZIP archive below.
79
+
80
+ ⚠️ Uploading generated videos of the model is invalid!
81
+
82
+ ⚠️ Submissions that do not correctly fill in the model name and model link may be deleted by the T2V-CompBench team. The contact information you filled in will not be made public.
83
+ """
84
+
85
+ LEADERBOARD_INFO = """
86
+ - T2V-CompBench, a comprehensive benchmark for compositional text-to-video generation, consists of seven categories: **consistent attribute binding, dynamic attribute binding, spatial relationships, motion binding, action binding, object interactions, and generative numeracy**.
87
+ - For each category, we carefully design 200 prompts, resulting in **1400** in total, and sample generated videos from a set of T2V models.
88
+ - We propose three types of evaluation metrics: **MLLM-based, Detection-based, and Tracking-based metrics**, all specifically designed for compositional T2V generation and validated by human evaluations.
89
+ - We benchmark various T2V models, reveal their strengths and weaknesses by examining the results across **7 categories and 12 sub-dimensions**, meanwhile provide insightful analysis on compositional T2V generation.
90
+ """
91
+
92
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
93
+ CITATION_BUTTON_TEXT = r"""@article{sun2024t2v,
94
+ title={T2v-compbench: A comprehensive benchmark for compositional text-to-video generation},
95
+ author={Sun, Kaiyue and Huang, Kaiyi and Liu, Xian and Wu, Yue and Xu, Zihan and Li, Zhenguo and Liu, Xihui},
96
+ journal={arXiv preprint arXiv:2407.14505},
97
+ year={2024}
98
+ }"""
99
+
100
+
draw_sub_dimension.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import os
4
+ import csv
5
+ import math
6
+
7
+ def sub_consist_attr(model,high=15,low=1):
8
+
9
+ color_indices = [1, 2, 3, 4, 5, 6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,57,58,59,63,65,68,70,71,73,75,76,77,78,79,80,81,83,98,99]
10
+ color_indices += list(range(101, 161))
11
+ shape_indices = [64,22,23,24,25,26,27,28,29,30,31,32,33,34,35,72,74,84,85,86,87,92,94,95,100] + [161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175] # 161-175
12
+ texture_indices = [36,37,38,39,40,41,42,43,44,45,46,47,48,60,61,62,69,82,88,89,90,91,93,96,97] + list(range(176, 191)) # 176-190
13
+ human_indices = [49,50,51,52,53,54,55,56,66,67] + list(range(191, 201)) # 191-200
14
+
15
+
16
+ interval = high - low
17
+ score = []
18
+ df = pd.read_csv(model)
19
+
20
+ color = []
21
+ for i in color_indices:
22
+ for j in range(200):
23
+ if df.iloc[j, 0][:4] == f"{i:04d}":
24
+ s = float(df.iloc[j, -1])
25
+ color.append((s-low)/interval)
26
+
27
+ shape = []
28
+ for i in shape_indices:
29
+ for j in range(200):
30
+ if df.iloc[j, 0][:4] == f"{i:04d}":
31
+ s = float(df.iloc[j, -1])
32
+ shape.append((s-low)/interval)
33
+ texture = []
34
+ for i in texture_indices:
35
+ for j in range(200):
36
+ if df.iloc[j, 0][:4] == f"{i:04d}":
37
+ s = float(df.iloc[j, -1])
38
+ texture.append((s-low)/interval)
39
+
40
+
41
+ color_score = "{:.4f}".format(sum(color)/len(color))
42
+ shape_score = "{:.4f}".format(sum(shape)/len(shape))
43
+ texture_score = "{:.4f}".format(sum(texture)/len(texture))
44
+
45
+ print(model)
46
+ print(len(color),len(shape),len(texture))
47
+ print(color_score, shape_score, texture_score)
48
+
49
+ return color_score, shape_score, texture_score
50
+
51
+
52
+
53
+
54
+ def sub_action(model,high=10,low=1):
55
+
56
+ common_ind = list(range(1, 23)) + list(range(46, 101)) + list(range(101,184))
57
+ uncommon_ind = list(range(23, 46)) + list(range(184,201))
58
+
59
+
60
+ interval = high - low
61
+ score = []
62
+ df = pd.read_csv(model)
63
+
64
+ common = []
65
+ for i in common_ind:
66
+ for j in range(200):
67
+ if df.iloc[j, 0][:4] == f"{i:04d}":
68
+ s = float(df.iloc[j, -1])
69
+ common.append((s-low)/interval)
70
+
71
+ uncommon = []
72
+ for i in uncommon_ind:
73
+ for j in range(200):
74
+ if df.iloc[j, 0][:4] == f"{i:04d}":
75
+ s = float(df.iloc[j, -1])
76
+ uncommon.append((s-low)/interval)
77
+
78
+
79
+ common_score = "{:.4f}".format(sum(common)/len(common))
80
+ uncommon_score = "{:.4f}".format(sum(uncommon)/len(uncommon))
81
+
82
+ print(model)
83
+ print(len(common),len(uncommon))
84
+ print(common_score,uncommon_score)
85
+
86
+ return common_score,uncommon_score
87
+
88
+
89
+ def sub_interaction(model,high=10,low=1):
90
+
91
+ physical_ind = list(range(1, 50)) + list(range(101, 152))
92
+ social_ind = list(range(50, 101)) + list(range(152,201))
93
+
94
+ interval = high - low
95
+ score = []
96
+ df = pd.read_csv(model)
97
+
98
+ physical = []
99
+ for i in physical_ind:
100
+ for j in range(200):
101
+ if df.iloc[j, 0][:4] == f"{i:04d}":
102
+ s = float(df.iloc[j, -1])
103
+ physical.append((s-low)/interval)
104
+
105
+ social = []
106
+ for i in social_ind:
107
+ for j in range(200):
108
+ if df.iloc[j, 0][:4] == f"{i:04d}":
109
+ s = float(df.iloc[j, -1])
110
+ social.append((s-low)/interval)
111
+
112
+
113
+
114
+ physical_score = "{:.4f}".format(sum(physical)/len(physical))
115
+ social_score = "{:.4f}".format(sum(social)/len(social))
116
+
117
+
118
+ print(model)
119
+ print(len(physical),len(social))
120
+ print(physical_score, social_score)
121
+
122
+ return physical_score, social_score
123
+
124
+
125
+
126
+ def sub_spatial(model):
127
+
128
+
129
+ left_ind = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 88, 89, 90, 91, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 181, 182, 183, 184] # Example indices; replace with your actual indices
130
+ right_ind = [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 92, 93, 94, 95, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 185, 186, 187, 188]
131
+ above_ind = [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 189, 190, 191]
132
+ below_ind = [96, 97, 98, 99, 100, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 126, 192, 193, 194]
133
+ front_ind = [58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 195, 196, 197]
134
+ behind_ind = [72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 198, 199, 200]
135
+
136
+
137
+ record = {}
138
+ df = pd.read_csv(model) # Replace with your CSV file path
139
+
140
+ scores = df.iloc[:, -1].tolist()
141
+ scores = scores[:200]
142
+
143
+ left = []
144
+ for i in left_ind:
145
+ for j in range(200):
146
+ if df.iloc[j, 0][:4] == f"{i:04d}":
147
+ s = float(df.iloc[j, -1])
148
+ left.append(s)
149
+ right = []
150
+ for i in right_ind:
151
+ for j in range(200):
152
+ if df.iloc[j, 0][:4] == f"{i:04d}":
153
+ s = float(df.iloc[j, -1])
154
+ right.append(s)
155
+
156
+ above = []
157
+ for i in above_ind:
158
+ for j in range(200):
159
+ if df.iloc[j, 0][:4] == f"{i:04d}":
160
+ s = float(df.iloc[j, -1])
161
+ above.append(s)
162
+ below = []
163
+ for i in below_ind:
164
+ for j in range(200):
165
+ if df.iloc[j, 0][:4] == f"{i:04d}":
166
+ s = float(df.iloc[j, -1])
167
+ below.append(s)
168
+
169
+ front = []
170
+ for i in front_ind:
171
+ for j in range(200):
172
+ if df.iloc[j, 0][:4] == f"{i:04d}":
173
+ s = float(df.iloc[j, -1])
174
+ front.append(s)
175
+ behind = []
176
+ for i in behind_ind:
177
+ for j in range(200):
178
+ if df.iloc[j, 0][:4] == f"{i:04d}":
179
+ s = float(df.iloc[j, -1])
180
+ behind.append(s)
181
+
182
+
183
+ all_spatial = [left,right,above,below,front,behind]
184
+
185
+ for i,left in enumerate(all_spatial):
186
+ count_ge_0_4 = 0 # Count of scores >= 0.4
187
+ count_gt_0_4 = 0 # Count of scores > 0.4
188
+ count_eq_0_4 = 0 # Count of scores == 0.4
189
+
190
+ # Count the scores based on the conditions
191
+ scores_gt_0_4 = []
192
+ for s in left:
193
+ if round(s,4) >= 0.4:
194
+ count_ge_0_4 += 1
195
+ if round(s,4) > 0.4:
196
+ count_gt_0_4 += 1
197
+ scores_gt_0_4.append(s)
198
+ if round(s,4) == 0.4:
199
+ count_eq_0_4 += 1
200
+
201
+ ge_percent = count_ge_0_4 / len(left) #Comp
202
+ gt_percent = count_gt_0_4 / count_ge_0_4 #Acc.
203
+ gt_avg = sum(scores_gt_0_4) / len(scores_gt_0_4)#Acc. Score
204
+ eq_percent = count_eq_0_4 / count_ge_0_4
205
+ record[f"#{i+1}"] = [ge_percent,gt_percent,gt_avg,eq_percent,count_ge_0_4,count_gt_0_4,scores_gt_0_4,count_eq_0_4]
206
+
207
+ ########################################################################################################################
208
+ left_count_ge_0_4 = record["#1"][4]
209
+ left_count_gt_0_4 = record["#1"][5]
210
+ left_scores_gt_0_4 = record["#1"][6]
211
+
212
+ right_count_ge_0_4 = record["#2"][4]
213
+ right_count_gt_0_4 = record["#2"][5]
214
+ right_scores_gt_0_4 = record["#2"][6]
215
+
216
+ left_right_ge_percent = (left_count_ge_0_4+right_count_ge_0_4) / (len(left)+len(right))
217
+ left_right_gt_percent = (left_count_gt_0_4+right_count_gt_0_4)/(left_count_ge_0_4+right_count_ge_0_4)
218
+ left_right_gt_avg = (sum(left_scores_gt_0_4)+sum(right_scores_gt_0_4)) / (left_count_gt_0_4+right_count_gt_0_4)
219
+
220
+ record["left_right"] = [round(left_right_ge_percent,4),round(left_right_gt_percent,4),round(left_right_gt_avg,4)]
221
+
222
+ ########################################################################################################################
223
+
224
+ above_count_ge_0_4 = record["#3"][4]
225
+ above_count_gt_0_4 = record["#3"][5]
226
+ above_scores_gt_0_4 = record["#3"][6]
227
+
228
+ below_count_ge_0_4 = record["#4"][4]
229
+ below_count_gt_0_4 = record["#4"][5]
230
+ below_scores_gt_0_4 = record["#4"][6]
231
+
232
+ _2d_ge_percent = (left_count_ge_0_4 + right_count_ge_0_4 + above_count_ge_0_4 + below_count_ge_0_4) / (len(left)+len(right)+len(above)+len(below))
233
+ _2d_gt_percent = (left_count_gt_0_4 + right_count_gt_0_4 + above_count_gt_0_4 + below_count_gt_0_4)/(left_count_ge_0_4 + right_count_ge_0_4 + above_count_ge_0_4 + below_count_ge_0_4)
234
+ _2d_gt_avg = (sum(left_scores_gt_0_4) + sum(right_scores_gt_0_4) + sum(above_scores_gt_0_4) + sum(below_scores_gt_0_4)) / (left_count_gt_0_4 + right_count_gt_0_4 + above_count_gt_0_4 + below_count_gt_0_4)
235
+
236
+ record["2d"] = [f"{_2d_ge_percent:.0%}",f"{_2d_gt_percent:.0%}",round(_2d_gt_avg,4)]
237
+
238
+ ########################################################################################################################
239
+ front_count_ge_0_4 = record["#5"][4]
240
+ front_count_gt_0_4 = record["#5"][5]
241
+ front_scores_gt_0_4 = record["#5"][6]
242
+
243
+ behind_count_ge_0_4 = record["#6"][4]
244
+ behind_count_gt_0_4 = record["#6"][5]
245
+ behind_scores_gt_0_4 = record["#6"][6]
246
+
247
+
248
+ _3d_ge_percent = (front_count_ge_0_4+behind_count_ge_0_4) / (len(front)+len(behind))
249
+ _3d_gt_percent = (front_count_gt_0_4+behind_count_gt_0_4)/(front_count_ge_0_4+behind_count_ge_0_4)
250
+ _3d_gt_avg = (sum(front_scores_gt_0_4)+sum(behind_scores_gt_0_4)) / (front_count_gt_0_4+behind_count_gt_0_4)
251
+
252
+
253
+ record["3d"] = [round(_3d_ge_percent,4),round(_3d_gt_percent,4),round(_3d_gt_avg,4)]
254
+
255
+
256
+ print(model)
257
+ print(len(left),len(right),len(above),len(below))
258
+ print(record["2d"])
259
+ coexist = record["2d"][0]
260
+ acc = record["2d"][1]
261
+ acc_score = record["2d"][2]
262
+
263
+ return coexist, acc, acc_score
264
+
265
+
266
+
267
+
268
+ def object_score(obj1_net_left,left_thresh,obj1_net_up,up_thresh,d_1):
269
+
270
+ correct_direction = False
271
+
272
+ W = 856
273
+ H = 480
274
+
275
+ score_tmp = 0
276
+ obj1_net_left = float(obj1_net_left)*100/W #normalize, map: 100x100
277
+ obj1_net_up = float(obj1_net_up)*100/H
278
+ net_distance = math.sqrt(obj1_net_left**2+obj1_net_up**2)
279
+
280
+ if d_1 == "left":
281
+ if obj1_net_left>left_thresh:
282
+ correct_direction = True
283
+
284
+ elif d_1 == "right":
285
+ if obj1_net_left<-left_thresh:
286
+ correct_direction = True
287
+
288
+ elif d_1 == "up":
289
+ if obj1_net_up>up_thresh:
290
+ correct_direction = True
291
+
292
+ elif d_1 == "down":
293
+ if obj1_net_up<-up_thresh:
294
+ correct_direction = True
295
+
296
+ else:
297
+ print("direction not in [left, right, up, down]")
298
+
299
+ return correct_direction,net_distance
300
+
301
+
302
+
303
+ def sub_motion(model):
304
+
305
+ #mid point y:240, x:428 height = 480, width = 856
306
+ left_thresh = 5 #5%
307
+ up_thresh = 5 #5%
308
+
309
+
310
+
311
+
312
+ distance = []
313
+ direction = []
314
+
315
+ with open(model, 'r') as file1:
316
+ reader1 = csv.reader(file1)
317
+ lines = list(reader1)
318
+ vid_num = (len(lines)-1)//2
319
+ for i in range(vid_num):
320
+ id = lines[i*2+1][0]
321
+ d_1 = lines[i*2+1][3]
322
+ d_2 = lines[i*2+2][5]
323
+ obj1 = lines[i*2+1][2]
324
+ obj2 = lines[i*2+2][4]
325
+ obj1_net_left = lines[i*2+1][6]
326
+ obj1_net_up = lines[i*2+1][7]
327
+ obj2_net_left = lines[i*2+2][6]
328
+ obj2_net_up = lines[i*2+2][7]
329
+ correct_direction = False
330
+ score_tmp = 0
331
+
332
+ if d_1!="" and d_2=="": #only 1 object
333
+ if obj1_net_left != "": #1 object detected Comp
334
+ correct_direction,net_distance = object_score(obj1_net_left,left_thresh,obj1_net_up,up_thresh,d_1)
335
+ distance.append(net_distance)
336
+ direction.append(correct_direction) # true false
337
+
338
+ motion_level = sum(distance)/len(distance)
339
+ acc = sum(direction)/len(direction)
340
+
341
+ print(model)
342
+ print(len(distance)) #< 165
343
+ print(len(direction))
344
+ print(round(motion_level,2), f"{acc:.0%}",)
345
+
346
+
347
+ return round(motion_level,2), f"{acc:.0%}"
348
+
349
+
350
+ def read_score(model):
351
+ with open(model, 'r') as file:
352
+ reader = csv.reader(file)
353
+ lines = list(reader)
354
+
355
+ if lines[-1][0]=="score: " or lines[-1][0]=="Score: ":
356
+ score = float(lines[-1][-1])
357
+ else:
358
+ return "No score found"
359
+ return round(score,4)