hieunguyen1053's picture
Create tasks.py
b0314f9
raw
history blame
1.35 kB
from dataclasses import dataclass
@dataclass
class Task:
code: str
name: str
metric: str
higher_is_better: bool = True
num_fewshot: int = 0
class Lambada(Task):
code = "lambada_vi"
name = "LAMBADA"
metric = "ppl"
higher_is_better = False
num_fewshot = 0
class Arc(Task):
code = "arc_vi"
name = "ARC"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 25
class HellaSwag(Task):
code = "hellaswag_vi"
name = "HellaSwag"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 10
class MMLU(Task):
code = "mmlu_vi"
name = "MMLU"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
class TruthfulQA(Task):
code = "truthfulqa_vi"
name = "TruthfulQA"
metric = "mc2"
higher_is_better = True
num_fewshot = 0
class Grade12Exams(Task):
code = "grade_12_exams_vi"
name = "Grade 12 Exams"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
class IWSLT2023_en_vi(Task):
code = "translation_vi"
name = "IWSLT 2023 en-vi"
metric = "bleu"
higher_is_better = True
num_fewshot = 0
TASKS = [Lambada, Arc, HellaSwag, MMLU, TruthfulQA, Grade12Exams, IWSLT2023_en_vi]
TASK_CODES = [task.code for task in TASKS]
TASK_TO_METRIC = {task.code: task.metric for task in TASKS}