Update src/display/about.py
Browse files- src/display/about.py +17 -2
src/display/about.py
CHANGED
@@ -33,10 +33,25 @@ TITLE = """<h1 align="center" id="space-title">🇨🇿 CzechBench Leaderboard</
|
|
33 |
|
34 |
# What does your leaderboard evaluate?
|
35 |
INTRODUCTION_TEXT = """
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
Czech-Bench is developed by <a href="https://huggingface.co/CIIRC-NLP">CIIRC-NLP</a>.
|
39 |
"""
|
|
|
40 |
|
41 |
# Which evaluations are you running? how can people reproduce what you have?
|
42 |
LLM_BENCHMARKS_TEXT = f"""
|
|
|
33 |
|
34 |
# What does your leaderboard evaluate?
|
35 |
INTRODUCTION_TEXT = """
|
36 |
+
The goal of the CzechBench project is to provide a comprehensive and practical benchmark for evaluating Czech language models.
|
37 |
+
Our [evaluation suite](https://github.com/jirkoada/czechbench_eval_harness/tree/main/lm_eval/tasks/czechbench#readme)
|
38 |
+
currently consists of 15 individual tasks, leveraging pre-existing Czech datasets together with new machine translations of popular LLM benchmarks,
|
39 |
+
including ARC, GSM8K, MMLU, and TruthfulQA.
|
40 |
+
|
41 |
+
Key Features and Benefits:
|
42 |
+
- **Tailored for the Czech Language:** The benchmark includes both original Czech datasets and adapted versions of international datasets, ensuring relevant evaluation of model performance in the Czech context.
|
43 |
+
- **Wide Range of Tasks:** It contains 15 different tasks that cover various aspects of language understanding and text generation, enabling a comprehensive assessment of the model's capabilities.
|
44 |
+
- **Universal model support:** The universal text-to-text evaluation approach adopted in CzechBench allows for direct comparison of models with varying levels of internal access, including commercial APIs.
|
45 |
+
- **Ease of Use:** The benchmark is designed to be easily integrated into your development process, saving time and resources during model testing and improvement.
|
46 |
+
- **Up-to-date and Relevant:** We regularly update our datasets to reflect the latest findings and trends in language model development.
|
47 |
+
By using CzechBench, you will gain deep insights into the strengths and weaknesses of your models, allowing you to better focus on key areas for optimization.
|
48 |
+
This will not only improve the performance of your models but also enhance their real-world deployment in various Czech contexts.
|
49 |
+
|
50 |
+
Below, you can find the up-to-date loaderboard of models evaluated on CzechBench.
|
51 |
+
For more information on the included benchmarks and instructions on evaluating your own models, please visit the "About" section below.
|
52 |
|
|
|
53 |
"""
|
54 |
+
# Czech-Bench is developed by <a href="https://huggingface.co/CIIRC-NLP">CIIRC-NLP</a>.
|
55 |
|
56 |
# Which evaluations are you running? how can people reproduce what you have?
|
57 |
LLM_BENCHMARKS_TEXT = f"""
|