Spaces:
Sleeping
Sleeping
import os | |
# this is .py for store constants | |
MODEL_INFO = [ | |
"Model Name (clickable)", | |
"Evaluated by", | |
"Date", | |
"Total Avg. Score", | |
"Selected Avg. Score", | |
] | |
TASK_INFO = [ | |
"Consistent Attribute Binding", | |
"Dynamic Attribute Binding", | |
"Spatial Relationships", | |
"Motion Binding", | |
"Action Binding", | |
"Object Interactions", | |
"Generative Numeracy", | |
] | |
SUB_TASK_INFO = [ | |
"Consistent Attribute Binding-Color", | |
"Consistent Attribute Binding-Shape", | |
"Consistent Attribute Binding-Texture", | |
"2D Spatial Relationships-Coexist", | |
"2D Spatial Relationships-Acc.", | |
"2D Spatial Relationships-Acc.Score", | |
"Motion Binding-Motion Level", | |
"Motion Binding-Acc.", | |
"Action Binding-Common", | |
"Action Binding-Uncommon", | |
"Object Interactions-Physical", | |
"Object Interactions-Social", | |
] | |
SUBMISSION_NAME = "T2V-CompBench_leaderboard_submission" | |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Kaiyue/", SUBMISSION_NAME) | |
CSV_PATH = "./T2V-CompBench_leaderboard_submission/results.csv" | |
INFO_PATH = "./T2V-CompBench_leaderboard_submission/model_info.csv" | |
COLUMN_NAMES = MODEL_INFO + TASK_INFO | |
DATA_TITLE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number','number'] | |
SUB_COLUMN_NAMES = MODEL_INFO + SUB_TASK_INFO | |
SUB_DATA_TITLE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number','number', 'number','number', 'number','number', 'number'] | |
LEADERBOARD_INTRODUCTION = """# T2V-CompBench Leaderboard | |
๐ Welcome to the leaderboard of the **T2V-CompBench**! ๐ฆ *A Comprehensive Benchmark for Compositional Text-to-video Generation* """ | |
LEADERBOARD_INTRODUCTION_HTML = """ | |
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;"> | |
<a href='https://github.com/KaiyueSun98/T2V-CompBench/tree/V2'><img src='https://img.shields.io/github/stars/KaiyueSun98/T2V-CompBench.svg?style=social&label=Official'></a> | |
<a href='https://arxiv.org/abs/2407.14505'><img src='https://img.shields.io/badge/cs.CV-Paper-b31b1b?logo=arxiv&logoColor=red'></a> | |
<a href="https://t2v-compbench-2025.github.io"><img src="https://img.shields.io/badge/Project-Page-Green"></a> | |
</div> | |
""" | |
LEADERBOARD_INTRODUCTION_2 = """ | |
- **1400 Prompts**: We analyze *1.67 million* real-user prompts to extract high-frequency nouns, verbs, and adjectives, resulting in a suite of 1,400 prompts. | |
- **7 Compositional Categories:** We evaluate multiple-object compositionality on attributes, actions, interactions, quantities, and spatio-temporal dynamics, covering 7 categories. | |
- **Evaluation metrics**: We design MLLM-based, Detection-based, and Tracking-based evaluation metrics for compositional T2V generation, all validated by human evaluations. | |
- **Valuable Insights:** We provide insightful analysis on current models' ability, highlighting the significant challenge of compositional T2V generation. | |
**Join Leaderboard**: Please see the [instructions](https://t2v-compbench.github.io/) for 3 options to participate. One option is to follow [T2V-CompBench Usage info](https://t2v-compbench.github.io/), and upload the generated `.csv` files here. After clicking the `Submit Eval!` button, click the `Refresh` button. | |
**Model Information**: What are the details of these Video Generation Models? See [HERE](https://t2v-compbench.github.io/) | |
""" | |
SUBMIT_INTRODUCTION = """# Submit on T2V-CompBench Introduction | |
## ๐ฎ | |
1. Please note that you need to obtain a list of `.csv` files by running T2V-CompBench in Github. You may conduct an [Offline Check](https://t2v-compbench.github.io/) before uploading. | |
2. Then, pack these CSV files into a `ZIP` archive, ensuring that the top-level directory of the ZIP contains the individual CSV files. | |
3. Finally, upload the ZIP archive below. | |
โ ๏ธ Uploading generated videos of the model is invalid! | |
โ ๏ธ Submissions that do not correctly fill in the model name and model link may be deleted by the T2V-CompBench team. The contact information you filled in will not be made public. | |
""" | |
LEADERBOARD_INFO = """ | |
- T2V-CompBench, a comprehensive benchmark for compositional text-to-video generation, consists of seven categories: **consistent attribute binding, dynamic attribute binding, spatial relationships, motion binding, action binding, object interactions, and generative numeracy**. | |
- For each category, we carefully design 200 prompts, resulting in **1400** in total, and sample generated videos from a set of T2V models. | |
- We propose three types of evaluation metrics: **MLLM-based, Detection-based, and Tracking-based metrics**, all specifically designed for compositional T2V generation and validated by human evaluations. | |
- We benchmark various T2V models, reveal their strengths and weaknesses by examining the results across **7 categories and 12 sub-dimensions**, meanwhile provide insightful analysis on compositional T2V generation. | |
""" | |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
CITATION_BUTTON_TEXT = r"""@article{sun2024t2v, | |
title={T2v-compbench: A comprehensive benchmark for compositional text-to-video generation}, | |
author={Sun, Kaiyue and Huang, Kaiyi and Liu, Xian and Wu, Yue and Xu, Zihan and Li, Zhenguo and Liu, Xihui}, | |
journal={arXiv preprint arXiv:2407.14505}, | |
year={2024} | |
}""" | |