Spaces:
Running
Running
[ | |
{ | |
"model": "OPT (1.3B)", | |
"Average": 7.84, | |
"MMLU": 7.4, | |
"WinoGrande": 12.47, | |
"PiQA": 4.45, | |
"CommonsenseQA": 7.61, | |
"Race": 13.61, | |
"MedMCQA": 1.25, | |
"OpenkookQA": 4.48 | |
}, | |
{ | |
"model": "SlimPajama", | |
"Average": 9.54, | |
"MMLU": 9.22, | |
"WinoGrande": 14.76, | |
"PiQA": 5.32, | |
"CommonsenseQA": 9.01, | |
"Race": 16.19, | |
"MedMCQA": 1.68, | |
"OpenkookQA": 5.7 | |
}, | |
{ | |
"model": "OLMo (1B)", | |
"Average": 8.8, | |
"MMLU": 8.54, | |
"WinoGrande": 6.16, | |
"PiQA": 8.05, | |
"CommonsenseQA": 13.1, | |
"Race": 13.61, | |
"MedMCQA": 2.1, | |
"OpenkookQA": 6.11 | |
}, | |
{ | |
"model": "GPT-Neo (1.3B)", | |
"Average": 7.38, | |
"MMLU": 6.94, | |
"WinoGrande": 10.81, | |
"PiQA": 4.31, | |
"CommonsenseQA": 6.34, | |
"Race": 13.75, | |
"MedMCQA": 2.63, | |
"OpenkookQA": 4.89 | |
}, | |
{ | |
"model": "Cerebras-GPT (1.3B)", | |
"Average": 4.84, | |
"MMLU": 5.37, | |
"WinoGrande": 9.31, | |
"PiQA": 2.16, | |
"CommonsenseQA": 6.2, | |
"Race": 6.9, | |
"MedMCQA": 1.04, | |
"OpenkookQA": 3.46 | |
}, | |
{ | |
"model": "RedPajama (1B)", | |
"Average": 9.01, | |
"MMLU": 9.21, | |
"WinoGrande": 16.97, | |
"PiQA": 1.39, | |
"CommonsenseQA": 11.41, | |
"Race": 14.35, | |
"MedMCQA": 1.86, | |
"OpenkookQA": 3.87 | |
}, | |
{ | |
"model": "Pythia (1.4B)", | |
"Average": 8.73, | |
"MMLU": 9.66, | |
"WinoGrande": 11.52, | |
"PiQA": 4.17, | |
"CommonsenseQA": 9.01, | |
"Race": 12.76, | |
"MedMCQA": 3.19, | |
"OpenkookQA": 5.3 | |
}, | |
{ | |
"model": "TinyLLama (1.1B)", | |
"Average": 8.39, | |
"MMLU": 8.94, | |
"WinoGrande": 12.23, | |
"PiQA": 3.59, | |
"CommonsenseQA": 6.06, | |
"Race": 16.7, | |
"MedMCQA": 2.07, | |
"OpenkookQA": 4.68 | |
}, | |
{ | |
"model": "OELM (1B)", | |
"Average": 8.99, | |
"MMLU": 9.03, | |
"WinoGrande": 10.18, | |
"PiQA": 9.05, | |
"CommonsenseQA": 7.75, | |
"Race": 12.78, | |
"MedMCQA": 2.5, | |
"OpenkookQA": 6.31 | |
}, | |
{ | |
"model": "Phi-3-mini-128k-instruct (3.8B)", | |
"Average": 39.73, | |
"MMLU": 36.97, | |
"WinoGrande": 46.88, | |
"PiQA": 32.04, | |
"CommonsenseQA": 49.15, | |
"Race": 37.81, | |
"MedMCQA": 22.61, | |
"OpenkookQA": 33.6 | |
}, | |
{ | |
"model": "Gemma (2B)", | |
"Average": 17.37, | |
"MMLU": 17.52, | |
"WinoGrande": 22.68, | |
"PiQA": 15.09, | |
"CommonsenseQA": 27.46, | |
"Race": 14.32, | |
"MedMCQA": 4.57, | |
"OpenkookQA": 14.26 | |
}, | |
{ | |
"model": "Qwen (1.8B)", | |
"Average": 21.61, | |
"MMLU": 10.0, | |
"WinoGrande": 40.97, | |
"PiQA": 15.52, | |
"CommonsenseQA": 31.13, | |
"Race": 34.91, | |
"MedMCQA": 4.7, | |
"OpenkookQA": 20.37 | |
} | |
] |