import os # this is .py for store constants MODEL_INFO = [ "Model Name (clickable)", "Evaluated by", "Date", "Total Avg. Score", "Selected Avg. Score", ] TASK_INFO = [ "Consistent Attribute Binding", "Dynamic Attribute Binding", "Spatial Relationships", "Motion Binding", "Action Binding", "Object Interactions", "Generative Numeracy", ] SUB_TASK_INFO = [ "Consistent Attribute Binding-Color", "Consistent Attribute Binding-Shape", "Consistent Attribute Binding-Texture", "2D Spatial Relationships-Coexist", "2D Spatial Relationships-Acc.", "2D Spatial Relationships-Acc.Score", "Motion Binding-Motion Level", "Motion Binding-Acc.", "Action Binding-Common", "Action Binding-Uncommon", "Object Interactions-Physical", "Object Interactions-Social", ] SUBMISSION_NAME = "T2V-CompBench_leaderboard_submission" SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Kaiyue/", SUBMISSION_NAME) CSV_PATH = "./T2V-CompBench_leaderboard_submission/results.csv" INFO_PATH = "./T2V-CompBench_leaderboard_submission/model_info.csv" COLUMN_NAMES = MODEL_INFO + TASK_INFO DATA_TITLE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number','number'] SUB_COLUMN_NAMES = MODEL_INFO + SUB_TASK_INFO SUB_DATA_TITLE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number','number', 'number','number', 'number','number', 'number'] LEADERBOARD_INTRODUCTION = """# T2V-CompBench Leaderboard 🏆 Welcome to the leaderboard of the **T2V-CompBench**! 🎦 *A Comprehensive Benchmark for Compositional Text-to-video Generation* """ LEADERBOARD_INTRODUCTION_HTML = """
""" LEADERBOARD_INTRODUCTION_2 = """ - **1400 Prompts**: We analyze *1.67 million* real-user prompts to extract high-frequency nouns, verbs, and adjectives, resulting in a suite of 1,400 prompts. - **7 Compositional Categories:** We evaluate multiple-object compositionality on attributes, actions, interactions, quantities, and spatio-temporal dynamics, covering 7 categories. - **Evaluation metrics**: We design MLLM-based, Detection-based, and Tracking-based evaluation metrics for compositional T2V generation, all validated by human evaluations. - **Valuable Insights:** We provide insightful analysis on current models' ability, highlighting the significant challenge of compositional T2V generation. **Join Leaderboard**: Please follow the steps in [our github repository](https://github.com/KaiyueSun98/T2V-CompBench/tree/V2) to prepare the videos and run the evaluation scripts. Before uploading the generated `.csv` files here, please conduct a final check by carefully reading this [instruction](https://github.com/KaiyueSun98/T2V-CompBench/tree/V2?tab=readme-ov-file#mortar_board-how-to-join-t2v-compbench-leaderboard). After clicking the `Submit Eval!` button, click the `Refresh` button. Then, you can successfully showcase your model's performance on our leaderboard! **Model Information**: What are the details of these Video Generation Models? See Appendix B of [our paper](https://arxiv.org/abs/2407.14505). We will provide more details soon. """ SUBMIT_INTRODUCTION = """# Submit on T2V-CompBench Introduction ## 📮 1. Please note that you need to obtain a list of `.csv` files by running the evaluation scripts of T2V-CompBench in our Github. You may conduct an [Offline Check](https://github.com/KaiyueSun98/T2V-CompBench/tree/V2?tab=readme-ov-file#mortar_board-how-to-join-t2v-compbench-leaderboard) before uploading. 2. Then, pack these CSV files into a `ZIP` archive, ensuring that the top-level directory of the ZIP contains the individual CSV files. 3. Finally, upload the ZIP archive below. ⚠️ Uploading generated videos of the model is invalid! ⚠️ Submissions that do not correctly fill in the model name and model link may be deleted by the T2V-CompBench team. The contact information you filled in will not be made public. """ LEADERBOARD_INFO = """ - T2V-CompBench, a comprehensive benchmark for compositional text-to-video generation, consists of seven categories: **consistent attribute binding, dynamic attribute binding, spatial relationships, motion binding, action binding, object interactions, and generative numeracy**. - For each category, we carefully design 200 prompts, resulting in **1400** in total, and sample generated videos from a set of T2V models. - We propose three types of evaluation metrics: **MLLM-based, Detection-based, and Tracking-based metrics**, all specifically designed for compositional T2V generation and validated by human evaluations. - We benchmark various T2V models, reveal their strengths and weaknesses by examining the results across **7 categories and 12 sub-dimensions**, meanwhile provide insightful analysis on compositional T2V generation. """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r"""@article{sun2024t2v, title={T2v-compbench: A comprehensive benchmark for compositional text-to-video generation}, author={Sun, Kaiyue and Huang, Kaiyi and Liu, Xian and Wu, Yue and Xu, Zihan and Li, Zhenguo and Liu, Xihui}, journal={arXiv preprint arXiv:2407.14505}, year={2024} }"""