Tucano-2b4 / results-pt.json
nicholasKluge's picture
Upload results-pt.json with huggingface_hub
e3e7faa verified
{
"results": {
"assin2_rte": {
"f1_macro,all": 0.5627201878936693,
"f1_macro_stderr,all": 0.007138599941179132,
"acc,all": 0.5816993464052288,
"acc_stderr,all": 0.007033478283865181,
"alias": "assin2_rte"
},
"assin2_sts": {
"pearson,all": 0.019296773848512278,
"pearson_stderr,all": 0.0052702782863150366,
"mse,all": 2.292087418300654,
"mse_stderr,all": "N/A",
"alias": "assin2_sts"
},
"bluex": {
"acc,all": 0.23226703755215578,
"acc_stderr,all": 0.00910850485340816,
"acc,exam_id__USP_2020": 0.19642857142857142,
"acc_stderr,exam_id__USP_2020": 0.030585023293181555,
"acc,exam_id__USP_2018": 0.18518518518518517,
"acc_stderr,exam_id__USP_2018": 0.03050961881728019,
"acc,exam_id__UNICAMP_2019": 0.28,
"acc_stderr,exam_id__UNICAMP_2019": 0.03675300666387717,
"acc,exam_id__USP_2019": 0.225,
"acc_stderr,exam_id__USP_2019": 0.03809921994241209,
"acc,exam_id__UNICAMP_2021_2": 0.2549019607843137,
"acc_stderr,exam_id__UNICAMP_2021_2": 0.035190576293880615,
"acc,exam_id__USP_2022": 0.20408163265306123,
"acc_stderr,exam_id__USP_2022": 0.0331495776801277,
"acc,exam_id__UNICAMP_2020": 0.34545454545454546,
"acc_stderr,exam_id__UNICAMP_2020": 0.03706808174792698,
"acc,exam_id__USP_2023": 0.20454545454545456,
"acc_stderr,exam_id__USP_2023": 0.035085282497353196,
"acc,exam_id__UNICAMP_2021_1": 0.32608695652173914,
"acc_stderr,exam_id__UNICAMP_2021_1": 0.03998674251290271,
"acc,exam_id__UNICAMP_2024": 0.3111111111111111,
"acc_stderr,exam_id__UNICAMP_2024": 0.0398913782816493,
"acc,exam_id__UNICAMP_2023": 0.2558139534883721,
"acc_stderr,exam_id__UNICAMP_2023": 0.03847478723396343,
"acc,exam_id__USP_2024": 0.0975609756097561,
"acc_stderr,exam_id__USP_2024": 0.02676653985131649,
"acc,exam_id__UNICAMP_2018": 0.2037037037037037,
"acc_stderr,exam_id__UNICAMP_2018": 0.031622082259434206,
"acc,exam_id__UNICAMP_2022": 0.20512820512820512,
"acc_stderr,exam_id__UNICAMP_2022": 0.037216966730647366,
"acc,exam_id__USP_2021": 0.17307692307692307,
"acc_stderr,exam_id__USP_2021": 0.030306191600561867,
"alias": "bluex"
},
"enem_challenge": {
"alias": "enem",
"acc,all": 0.20503848845346395,
"acc_stderr,all": 0.006157821032434345,
"acc,exam_id__2016": 0.21487603305785125,
"acc_stderr,exam_id__2016": 0.021512207558150195,
"acc,exam_id__2014": 0.1926605504587156,
"acc_stderr,exam_id__2014": 0.021803310410598006,
"acc,exam_id__2013": 0.2037037037037037,
"acc_stderr,exam_id__2013": 0.022357486372450733,
"acc,exam_id__2015": 0.31092436974789917,
"acc_stderr,exam_id__2015": 0.024532408089807157,
"acc,exam_id__2011": 0.20512820512820512,
"acc_stderr,exam_id__2011": 0.021490987751954183,
"acc,exam_id__2016_2": 0.1951219512195122,
"acc_stderr,exam_id__2016_2": 0.02055475507011082,
"acc,exam_id__2022": 0.18796992481203006,
"acc_stderr,exam_id__2022": 0.019552345949852248,
"acc,exam_id__2023": 0.1925925925925926,
"acc_stderr,exam_id__2023": 0.01962450807930953,
"acc,exam_id__2010": 0.20512820512820512,
"acc_stderr,exam_id__2010": 0.021472982938031362,
"acc,exam_id__2017": 0.20689655172413793,
"acc_stderr,exam_id__2017": 0.021730900661238783,
"acc,exam_id__2009": 0.17391304347826086,
"acc_stderr,exam_id__2009": 0.02032295503836816,
"acc,exam_id__2012": 0.1724137931034483,
"acc_stderr,exam_id__2012": 0.020244199996007548
},
"faquad_nli": {
"f1_macro,all": 0.4396551724137931,
"f1_macro_stderr,all": 0.0035796984729087084,
"acc,all": 0.7846153846153846,
"acc_stderr,all": 0.011396120309131327,
"alias": "faquad_nli"
},
"hatebr_offensive": {
"alias": "hatebr_offensive_binary",
"f1_macro,all": 0.29485149211711714,
"f1_macro_stderr,all": 0.007424556308360103,
"acc,all": 0.3457142857142857,
"acc_stderr,all": 0.009014269627201426
},
"oab_exams": {
"acc,all": 0.25466970387243737,
"acc_stderr,all": 0.005369216965821217,
"acc,exam_id__2015-18": 0.275,
"acc_stderr,exam_id__2015-18": 0.028838666374198248,
"acc,exam_id__2016-19": 0.2692307692307692,
"acc_stderr,exam_id__2016-19": 0.02895144507818263,
"acc,exam_id__2016-20a": 0.25,
"acc_stderr,exam_id__2016-20a": 0.027900918201234894,
"acc,exam_id__2010-01": 0.2823529411764706,
"acc_stderr,exam_id__2010-01": 0.028161773322800735,
"acc,exam_id__2017-24": 0.2,
"acc_stderr,exam_id__2017-24": 0.025833264852852596,
"acc,exam_id__2011-03": 0.25252525252525254,
"acc_stderr,exam_id__2011-03": 0.02512759576118094,
"acc,exam_id__2012-06": 0.2625,
"acc_stderr,exam_id__2012-06": 0.028351797200888276,
"acc,exam_id__2017-22": 0.2375,
"acc_stderr,exam_id__2017-22": 0.02733053787962572,
"acc,exam_id__2013-10": 0.275,
"acc_stderr,exam_id__2013-10": 0.02881771034002736,
"acc,exam_id__2012-09": 0.23376623376623376,
"acc_stderr,exam_id__2012-09": 0.02797385462117437,
"acc,exam_id__2017-23": 0.3125,
"acc_stderr,exam_id__2017-23": 0.029914726412691175,
"acc,exam_id__2010-02": 0.25,
"acc_stderr,exam_id__2010-02": 0.025021660954901236,
"acc,exam_id__2014-14": 0.1875,
"acc_stderr,exam_id__2014-14": 0.02529104399510177,
"acc,exam_id__2014-15": 0.20512820512820512,
"acc_stderr,exam_id__2014-15": 0.026284555725364814,
"acc,exam_id__2015-17": 0.24358974358974358,
"acc_stderr,exam_id__2015-17": 0.027969248508567596,
"acc,exam_id__2014-13": 0.2375,
"acc_stderr,exam_id__2014-13": 0.027476936414182643,
"acc,exam_id__2016-20": 0.2375,
"acc_stderr,exam_id__2016-20": 0.02739847607327644,
"acc,exam_id__2012-07": 0.275,
"acc_stderr,exam_id__2012-07": 0.02878309026861392,
"acc,exam_id__2018-25": 0.2625,
"acc_stderr,exam_id__2018-25": 0.028438093506871428,
"acc,exam_id__2016-21": 0.2125,
"acc_stderr,exam_id__2016-21": 0.02641144656479805,
"acc,exam_id__2012-06a": 0.2625,
"acc_stderr,exam_id__2012-06a": 0.028315496745109678,
"acc,exam_id__2011-05": 0.3625,
"acc_stderr,exam_id__2011-05": 0.030989058558927522,
"acc,exam_id__2015-16": 0.1625,
"acc_stderr,exam_id__2015-16": 0.02376033404658179,
"acc,exam_id__2011-04": 0.25,
"acc_stderr,exam_id__2011-04": 0.027892229456764128,
"acc,exam_id__2013-11": 0.3375,
"acc_stderr,exam_id__2013-11": 0.03045178142342042,
"acc,exam_id__2013-12": 0.3,
"acc_stderr,exam_id__2013-12": 0.029578315556407414,
"acc,exam_id__2012-08": 0.2375,
"acc_stderr,exam_id__2012-08": 0.027375629395503497,
"alias": "oab_exams"
},
"portuguese_hate_speech": {
"alias": "portuguese_hate_speech_binary",
"f1_macro,all": 0.41984117071546034,
"f1_macro_stderr,all": 0.006192271664815315,
"acc,all": 0.6921269095182139,
"acc_stderr,all": 0.011165688090683424
},
"tweetsentbr": {
"f1_macro,all": 0.5799510657945365,
"f1_macro_stderr,all": 0.00774434642078899,
"acc,all": 0.617412935323383,
"acc_stderr,all": 0.00768763448486565,
"alias": "tweetsentbr"
}
},
"configs": {
"assin2_rte": {
"task": "assin2_rte",
"group": [
"pt_benchmark",
"assin2"
],
"dataset_path": "assin2",
"test_split": "test",
"fewshot_split": "train",
"doc_to_text": "Premissa: {{premise}}\nHipótese: {{hypothesis}}\nPergunta: A hipótese pode ser inferida pela premissa? Sim ou Não?\nResposta:",
"doc_to_target": "{{['Não', 'Sim'][entailment_judgment]}}",
"description": "Abaixo estão pares de premissa e hipótese. Para cada par, indique se a hipótese pode ser inferida a partir da premissa, responda apenas com \"Sim\" ou \"Não\".\n\n",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "id_sampler",
"sampler_config": {
"id_list": [
1,
3251,
2,
3252,
3,
4,
5,
6,
3253,
7,
3254,
3255,
3256,
8,
9,
10,
3257,
11,
3258,
12,
13,
14,
15,
3259,
3260,
3261,
3262,
3263,
16,
17,
3264,
18,
3265,
3266,
3267,
19,
20,
3268,
3269,
21,
3270,
3271,
22,
3272,
3273,
23,
3274,
24,
25,
3275
],
"id_column": "sentence_pair_id"
}
},
"num_fewshot": 15,
"metric_list": [
{
"metric": "f1_macro",
"aggregation": "f1_macro",
"higher_is_better": true
},
{
"metric": "acc",
"aggregation": "acc",
"higher_is_better": true
}
],
"output_type": "generate_until",
"generation_kwargs": {
"max_gen_toks": 32,
"do_sample": false,
"temperature": 0.0,
"top_k": null,
"top_p": null,
"until": [
"\n\n"
]
},
"repeats": 1,
"filter_list": [
{
"name": "all",
"filter": [
{
"function": "find_similar_label",
"labels": [
"Sim",
"Não"
]
},
{
"function": "take_first"
}
]
}
],
"should_decontaminate": false,
"metadata": {
"version": 1.1
}
},
"assin2_sts": {
"task": "assin2_sts",
"group": [
"pt_benchmark",
"assin2"
],
"dataset_path": "assin2",
"test_split": "test",
"fewshot_split": "train",
"doc_to_text": "Frase 1: {{premise}}\nFrase 2: {{hypothesis}}\nPergunta: Quão similares são as duas frases? Dê uma pontuação entre 1,0 a 5,0.\nResposta:",
"doc_to_target": "<function assin2_float_to_pt_str at 0x14f772b4e5c0>",
"description": "Abaixo estão pares de frases que você deve avaliar o grau de similaridade. Dê uma pontuação entre 1,0 e 5,0, sendo 1,0 pouco similar e 5,0 muito similar.\n\n",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "id_sampler",
"sampler_config": {
"id_list": [
1,
3251,
2,
3252,
3,
4,
5,
6,
3253,
7,
3254,
3255,
3256,
8,
9,
10,
3257,
11,
3258,
12,
13,
14,
15,
3259,
3260,
3261,
3262,
3263,
16,
17,
3264,
18,
3265,
3266,
3267,
19,
20,
3268,
3269,
21,
3270,
3271,
22,
3272,
3273,
23,
3274,
24,
25,
3275
],
"id_column": "sentence_pair_id"
}
},
"num_fewshot": 10,
"metric_list": [
{
"metric": "pearson",
"aggregation": "pearsonr",
"higher_is_better": true
},
{
"metric": "mse",
"aggregation": "mean_squared_error",
"higher_is_better": false
}
],
"output_type": "generate_until",
"generation_kwargs": {
"max_gen_toks": 32,
"do_sample": false,
"temperature": 0.0,
"top_k": null,
"top_p": null,
"until": [
"\n\n"
]
},
"repeats": 1,
"filter_list": [
{
"name": "all",
"filter": [
{
"function": "number_filter",
"type": "float",
"range_min": 1.0,
"range_max": 5.0,
"on_outside_range": "clip",
"fallback": 5.0
},
{
"function": "take_first"
}
]
}
],
"should_decontaminate": false,
"metadata": {
"version": 1.1
}
},
"bluex": {
"task": "bluex",
"group": [
"pt_benchmark",
"vestibular"
],
"dataset_path": "eduagarcia-temp/BLUEX_without_images",
"test_split": "train",
"fewshot_split": "train",
"doc_to_text": "<function enem_doc_to_text at 0x14f772b4db20>",
"doc_to_target": "{{answerKey}}",
"description": "As perguntas a seguir são questões de múltipla escolha de provas de vestibular de universidades brasileiras, selecione a única alternativa correta e responda apenas com as letras \"A\", \"B\", \"C\", \"D\" ou \"E\".\n\n",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "id_sampler",
"sampler_config": {
"id_list": [
"USP_2018_3",
"UNICAMP_2018_2",
"USP_2018_35",
"UNICAMP_2018_16",
"USP_2018_89"
],
"id_column": "id",
"exclude_from_task": true
}
},
"num_fewshot": 3,
"metric_list": [
{
"metric": "acc",
"aggregation": "acc",
"higher_is_better": true
}
],
"output_type": "generate_until",
"generation_kwargs": {
"max_gen_toks": 32,
"do_sample": false,
"temperature": 0.0,
"top_k": null,
"top_p": null,
"until": [
"\n\n"
]
},
"repeats": 1,
"filter_list": [
{
"name": "all",
"filter": [
{
"function": "normalize_spaces"
},
{
"function": "remove_accents"
},
{
"function": "find_choices",
"choices": [
"A",
"B",
"C",
"D",
"E"
],
"regex_patterns": [
"(?:[Ll]etra|[Aa]lternativa|[Rr]esposta|[Rr]esposta [Cc]orreta|[Rr]esposta [Cc]orreta e|[Oo]pcao):? ([ABCDE])\\b",
"\\b([ABCDE])\\.",
"\\b([ABCDE]) ?[.):-]",
"\\b([ABCDE])$",
"\\b([ABCDE])\\b"
]
},
{
"function": "take_first"
}
],
"group_by": {
"column": "exam_id"
}
}
],
"should_decontaminate": true,
"doc_to_decontamination_query": "<function enem_doc_to_text at 0x14f772b4de40>",
"metadata": {
"version": 1.1
}
},
"enem_challenge": {
"task": "enem_challenge",
"task_alias": "enem",
"group": [
"pt_benchmark",
"vestibular"
],
"dataset_path": "eduagarcia/enem_challenge",
"test_split": "train",
"fewshot_split": "train",
"doc_to_text": "<function enem_doc_to_text at 0x14f772b4e020>",
"doc_to_target": "{{answerKey}}",
"description": "As perguntas a seguir são questões de múltipla escolha do Exame Nacional do Ensino Médio (ENEM), selecione a única alternativa correta e responda apenas com as letras \"A\", \"B\", \"C\", \"D\" ou \"E\".\n\n",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "id_sampler",
"sampler_config": {
"id_list": [
"2022_21",
"2022_88",
"2022_143"
],
"id_column": "id",
"exclude_from_task": true
}
},
"num_fewshot": 3,
"metric_list": [
{
"metric": "acc",
"aggregation": "acc",
"higher_is_better": true
}
],
"output_type": "generate_until",
"generation_kwargs": {
"max_gen_toks": 32,
"do_sample": false,
"temperature": 0.0,
"top_k": null,
"top_p": null,
"until": [
"\n\n"
]
},
"repeats": 1,
"filter_list": [
{
"name": "all",
"filter": [
{
"function": "normalize_spaces"
},
{
"function": "remove_accents"
},
{
"function": "find_choices",
"choices": [
"A",
"B",
"C",
"D",
"E"
],
"regex_patterns": [
"(?:[Ll]etra|[Aa]lternativa|[Rr]esposta|[Rr]esposta [Cc]orreta|[Rr]esposta [Cc]orreta e|[Oo]pcao):? ([ABCDE])\\b",
"\\b([ABCDE])\\.",
"\\b([ABCDE]) ?[.):-]",
"\\b([ABCDE])$",
"\\b([ABCDE])\\b"
]
},
{
"function": "take_first"
}
],
"group_by": {
"column": "exam_id"
}
}
],
"should_decontaminate": true,
"doc_to_decontamination_query": "<function enem_doc_to_text at 0x14f772b4e200>",
"metadata": {
"version": 1.1
}
},
"faquad_nli": {
"task": "faquad_nli",
"group": [
"pt_benchmark"
],
"dataset_path": "ruanchaves/faquad-nli",
"test_split": "test",
"fewshot_split": "train",
"doc_to_text": "Pergunta: {{question}}\nResposta: {{answer}}\nA resposta dada satisfaz à pergunta? Sim ou Não?",
"doc_to_target": "{{['Não', 'Sim'][label]}}",
"description": "Abaixo estão pares de pergunta e resposta. Para cada par, você deve julgar se a resposta responde à pergunta de maneira satisfatória e aparenta estar correta. Escreva apenas \"Sim\" ou \"Não\".\n\n",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "first_n",
"sampler_config": {
"fewshot_indices": [
1893,
949,
663,
105,
1169,
2910,
2227,
2813,
974,
558,
1503,
1958,
2918,
601,
1560,
984,
2388,
995,
2233,
1982,
165,
2788,
1312,
2285,
522,
1113,
1670,
323,
236,
1263,
1562,
2519,
1049,
432,
1167,
1394,
2022,
2551,
2194,
2187,
2282,
2816,
108,
301,
1185,
1315,
1420,
2436,
2322,
766
]
}
},
"num_fewshot": 15,
"metric_list": [
{
"metric": "f1_macro",
"aggregation": "f1_macro",
"higher_is_better": true
},
{
"metric": "acc",
"aggregation": "acc",
"higher_is_better": true
}
],
"output_type": "generate_until",
"generation_kwargs": {
"max_gen_toks": 32,
"do_sample": false,
"temperature": 0.0,
"top_k": null,
"top_p": null,
"until": [
"\n\n"
]
},
"repeats": 1,
"filter_list": [
{
"name": "all",
"filter": [
{
"function": "find_similar_label",
"labels": [
"Sim",
"Não"
]
},
{
"function": "take_first"
}
]
}
],
"should_decontaminate": false,
"metadata": {
"version": 1.1
}
},
"hatebr_offensive": {
"task": "hatebr_offensive",
"task_alias": "hatebr_offensive_binary",
"group": [
"pt_benchmark"
],
"dataset_path": "eduagarcia/portuguese_benchmark",
"dataset_name": "HateBR_offensive_binary",
"test_split": "test",
"fewshot_split": "train",
"doc_to_text": "Texto: {{sentence}}\nPergunta: O texto é ofensivo?\nResposta:",
"doc_to_target": "{{'Sim' if label == 1 else 'Não'}}",
"description": "Abaixo contém o texto de comentários de usuários do Instagram em português, sua tarefa é classificar se o texto é ofensivo ou não. Responda apenas com \"Sim\" ou \"Não\".\n\n",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "id_sampler",
"sampler_config": {
"id_list": [
48,
44,
36,
20,
3511,
88,
3555,
16,
56,
3535,
60,
40,
3527,
4,
76,
3579,
3523,
3551,
68,
3503,
84,
3539,
64,
3599,
80,
3563,
3559,
3543,
3547,
3587,
3595,
3575,
3567,
3591,
24,
96,
92,
3507,
52,
72,
8,
3571,
3515,
3519,
3531,
28,
32,
0,
12,
3583
],
"id_column": "idx"
}
},
"num_fewshot": 25,
"metric_list": [
{
"metric": "f1_macro",
"aggregation": "f1_macro",
"higher_is_better": true
},
{
"metric": "acc",
"aggregation": "acc",
"higher_is_better": true
}
],
"output_type": "generate_until",
"generation_kwargs": {
"max_gen_toks": 32,
"do_sample": false,
"temperature": 0.0,
"top_k": null,
"top_p": null,
"until": [
"\n\n"
]
},
"repeats": 1,
"filter_list": [
{
"name": "all",
"filter": [
{
"function": "find_similar_label",
"labels": [
"Sim",
"Não"
]
},
{
"function": "take_first"
}
]
}
],
"should_decontaminate": false,
"metadata": {
"version": 1.0
}
},
"oab_exams": {
"task": "oab_exams",
"group": [
"legal_benchmark",
"pt_benchmark"
],
"dataset_path": "eduagarcia/oab_exams",
"test_split": "train",
"fewshot_split": "train",
"doc_to_text": "<function doc_to_text at 0x14f772b4ed40>",
"doc_to_target": "{{answerKey}}",
"description": "As perguntas a seguir são questões de múltipla escolha do Exame de Ordem da Ordem dos Advogados do Brasil (OAB), selecione a única alternativa correta e responda apenas com as letras \"A\", \"B\", \"C\" ou \"D\".\n\n",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "id_sampler",
"sampler_config": {
"id_list": [
"2010-01_1",
"2010-01_11",
"2010-01_13",
"2010-01_23",
"2010-01_26",
"2010-01_28",
"2010-01_38",
"2010-01_48",
"2010-01_58",
"2010-01_68",
"2010-01_76",
"2010-01_83",
"2010-01_85",
"2010-01_91",
"2010-01_99"
],
"id_column": "id",
"exclude_from_task": true
}
},
"num_fewshot": 3,
"metric_list": [
{
"metric": "acc",
"aggregation": "acc",
"higher_is_better": true
}
],
"output_type": "generate_until",
"generation_kwargs": {
"max_gen_toks": 32,
"do_sample": false,
"temperature": 0.0,
"top_k": null,
"top_p": null,
"until": [
"\n\n"
]
},
"repeats": 1,
"filter_list": [
{
"name": "all",
"filter": [
{
"function": "normalize_spaces"
},
{
"function": "remove_accents"
},
{
"function": "find_choices",
"choices": [
"A",
"B",
"C",
"D"
],
"regex_patterns": [
"(?:[Ll]etra|[Aa]lternativa|[Rr]esposta|[Rr]esposta [Cc]orreta|[Rr]esposta [Cc]orreta e|[Oo]pcao):? ([ABCD])\\b",
"\\b([ABCD])\\.",
"\\b([ABCD]) ?[.):-]",
"\\b([ABCD])$",
"\\b([ABCD])\\b"
]
},
{
"function": "take_first"
}
],
"group_by": {
"column": "exam_id"
}
}
],
"should_decontaminate": true,
"doc_to_decontamination_query": "<function doc_to_text at 0x14f772b4efc0>",
"metadata": {
"version": 1.5
}
},
"portuguese_hate_speech": {
"task": "portuguese_hate_speech",
"task_alias": "portuguese_hate_speech_binary",
"group": [
"pt_benchmark"
],
"dataset_path": "eduagarcia/portuguese_benchmark",
"dataset_name": "Portuguese_Hate_Speech_binary",
"test_split": "test",
"fewshot_split": "train",
"doc_to_text": "Texto: {{sentence}}\nPergunta: O texto contém discurso de ódio?\nResposta:",
"doc_to_target": "{{'Sim' if label == 1 else 'Não'}}",
"description": "Abaixo contém o texto de tweets de usuários do Twitter em português, sua tarefa é classificar se o texto contém discurso de ódio ou não. Responda apenas com \"Sim\" ou \"Não\".\n\n",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "id_sampler",
"sampler_config": {
"id_list": [
52,
50,
39,
28,
3,
105,
22,
25,
60,
11,
66,
41,
9,
4,
91,
42,
7,
20,
76,
1,
104,
13,
67,
54,
97,
27,
24,
14,
16,
48,
53,
40,
34,
49,
32,
119,
114,
2,
58,
83,
18,
36,
5,
6,
10,
35,
38,
0,
21,
46
],
"id_column": "idx"
}
},
"num_fewshot": 25,
"metric_list": [
{
"metric": "f1_macro",
"aggregation": "f1_macro",
"higher_is_better": true
},
{
"metric": "acc",
"aggregation": "acc",
"higher_is_better": true
}
],
"output_type": "generate_until",
"generation_kwargs": {
"max_gen_toks": 32,
"do_sample": false,
"temperature": 0.0,
"top_k": null,
"top_p": null,
"until": [
"\n\n"
]
},
"repeats": 1,
"filter_list": [
{
"name": "all",
"filter": [
{
"function": "find_similar_label",
"labels": [
"Sim",
"Não"
]
},
{
"function": "take_first"
}
]
}
],
"should_decontaminate": false,
"metadata": {
"version": 1.0
}
},
"tweetsentbr": {
"task": "tweetsentbr",
"group": [
"pt_benchmark"
],
"dataset_path": "eduagarcia/tweetsentbr_fewshot",
"test_split": "test",
"fewshot_split": "train",
"doc_to_text": "Texto: {{sentence}}\nPergunta: O sentimento do texto é Positivo, Neutro ou Negativo?\nResposta:",
"doc_to_target": "{{'Positivo' if label == 'Positive' else ('Negativo' if label == 'Negative' else 'Neutro')}}",
"description": "Abaixo contém o texto de tweets de usuários do Twitter em português, sua tarefa é classificar se o sentimento do texto é Positivo, Neutro ou Negativo. Responda apenas com uma das opções.\n\n",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "first_n"
},
"num_fewshot": 25,
"metric_list": [
{
"metric": "f1_macro",
"aggregation": "f1_macro",
"higher_is_better": true
},
{
"metric": "acc",
"aggregation": "acc",
"higher_is_better": true
}
],
"output_type": "generate_until",
"generation_kwargs": {
"max_gen_toks": 32,
"do_sample": false,
"temperature": 0.0,
"top_k": null,
"top_p": null,
"until": [
"\n\n"
]
},
"repeats": 1,
"filter_list": [
{
"name": "all",
"filter": [
{
"function": "find_similar_label",
"labels": [
"Positivo",
"Neutro",
"Negativo"
]
},
{
"function": "take_first"
}
]
}
],
"should_decontaminate": false,
"metadata": {
"version": 1.0
}
}
},
"versions": {
"assin2_rte": 1.1,
"assin2_sts": 1.1,
"bluex": 1.1,
"enem_challenge": 1.1,
"faquad_nli": 1.1,
"hatebr_offensive": 1.0,
"oab_exams": 1.5,
"portuguese_hate_speech": 1.0,
"tweetsentbr": 1.0
},
"n-shot": {
"assin2_rte": 15,
"assin2_sts": 10,
"bluex": 3,
"enem_challenge": 3,
"faquad_nli": 15,
"hatebr_offensive": 25,
"oab_exams": 3,
"portuguese_hate_speech": 25,
"tweetsentbr": 25
},
"model_meta": {
"truncated": 0,
"non_truncated": 14150,
"padded": 0,
"non_padded": 14150,
"fewshots_truncated": 0,
"has_chat_template": false,
"chat_type": null,
"n_gpus": 1,
"accelerate_num_process": null,
"model_sha": "None",
"model_dtype": "torch.bfloat16",
"model_memory_footprint": 4889244480,
"model_num_parameters": 2444618240,
"model_is_loaded_in_4bit": null,
"model_is_loaded_in_8bit": null,
"model_is_quantized": null,
"model_device": "cuda:0",
"batch_size": 16,
"max_length": 4096,
"max_ctx_length": 4064,
"max_gen_toks": 32
},
"task_model_meta": {
"assin2_rte": {
"sample_size": 2448,
"truncated": 0,
"non_truncated": 2448,
"padded": 0,
"non_padded": 2448,
"fewshots_truncated": 0,
"mean_seq_length": 924.4232026143791,
"min_seq_length": 909,
"max_seq_length": 963,
"max_ctx_length": 4064,
"max_gen_toks": 32,
"mean_original_fewshots_size": 15.0,
"mean_effective_fewshot_size": 15.0
},
"assin2_sts": {
"sample_size": 2448,
"truncated": 0,
"non_truncated": 2448,
"padded": 0,
"non_padded": 2448,
"fewshots_truncated": 0,
"mean_seq_length": 659.4232026143791,
"min_seq_length": 644,
"max_seq_length": 698,
"max_ctx_length": 4064,
"max_gen_toks": 32,
"mean_original_fewshots_size": 10.0,
"mean_effective_fewshot_size": 10.0
},
"bluex": {
"sample_size": 719,
"truncated": 0,
"non_truncated": 719,
"padded": 0,
"non_padded": 719,
"fewshots_truncated": 0,
"mean_seq_length": 1170.817802503477,
"min_seq_length": 904,
"max_seq_length": 1801,
"max_ctx_length": 4064,
"max_gen_toks": 32,
"mean_original_fewshots_size": 3.0,
"mean_effective_fewshot_size": 3.0
},
"enem_challenge": {
"sample_size": 1429,
"truncated": 0,
"non_truncated": 1429,
"padded": 0,
"non_padded": 1429,
"fewshots_truncated": 0,
"mean_seq_length": 1007.4177746675997,
"min_seq_length": 829,
"max_seq_length": 2484,
"max_ctx_length": 4064,
"max_gen_toks": 32,
"mean_original_fewshots_size": 3.0,
"mean_effective_fewshot_size": 3.0
},
"faquad_nli": {
"sample_size": 650,
"truncated": 0,
"non_truncated": 650,
"padded": 0,
"non_padded": 650,
"fewshots_truncated": 0,
"mean_seq_length": 968.1338461538462,
"min_seq_length": 936,
"max_seq_length": 1034,
"max_ctx_length": 4064,
"max_gen_toks": 32,
"mean_original_fewshots_size": 15.0,
"mean_effective_fewshot_size": 15.0
},
"hatebr_offensive": {
"sample_size": 1400,
"truncated": 0,
"non_truncated": 1400,
"padded": 0,
"non_padded": 1400,
"fewshots_truncated": 0,
"mean_seq_length": 867.4407142857143,
"min_seq_length": 852,
"max_seq_length": 1061,
"max_ctx_length": 4064,
"max_gen_toks": 32,
"mean_original_fewshots_size": 25.0,
"mean_effective_fewshot_size": 25.0
},
"oab_exams": {
"sample_size": 2195,
"truncated": 0,
"non_truncated": 2195,
"padded": 0,
"non_padded": 2195,
"fewshots_truncated": 0,
"mean_seq_length": 832.024145785877,
"min_seq_length": 659,
"max_seq_length": 1108,
"max_ctx_length": 4064,
"max_gen_toks": 32,
"mean_original_fewshots_size": 3.0,
"mean_effective_fewshot_size": 3.0
},
"portuguese_hate_speech": {
"sample_size": 851,
"truncated": 0,
"non_truncated": 851,
"padded": 0,
"non_padded": 851,
"fewshots_truncated": 0,
"mean_seq_length": 1219.021151586369,
"min_seq_length": 1192,
"max_seq_length": 1255,
"max_ctx_length": 4064,
"max_gen_toks": 32,
"mean_original_fewshots_size": 25.0,
"mean_effective_fewshot_size": 25.0
},
"tweetsentbr": {
"sample_size": 2010,
"truncated": 0,
"non_truncated": 2010,
"padded": 0,
"non_padded": 2010,
"fewshots_truncated": 0,
"mean_seq_length": 1154.4194029850746,
"min_seq_length": 1137,
"max_seq_length": 1211,
"max_ctx_length": 4064,
"max_gen_toks": 32,
"mean_original_fewshots_size": 25.0,
"mean_effective_fewshot_size": 25.0
}
},
"config": {
"model": "huggingface",
"model_args": "pretrained=/lustre/mlnvme/data/asen_hpc-mula/checkpoints-llama/slurm_job_17049106/step_1920000",
"batch_size": "auto",
"batch_sizes": [],
"device": "cuda:0",
"use_cache": null,
"limit": null,
"bootstrap_iters": 100000,
"gen_kwargs": null
},
"git_hash": null
}