Spaces:
Running
Running
[ | |
{ | |
"model": "GPT-4", | |
"Average": 65.94, | |
"MMLU": 74.8, | |
"WinoGrande": 66.2, | |
"PiQA": 61.6, | |
"CommonsenseQA": 63.0, | |
"Race": 67.0, | |
"MedMCQA": 51.8, | |
"OpenkookQA": 60.3 | |
}, | |
{ | |
"model": "Claude-3 Opus", | |
"Average": 62.64, | |
"MMLU": 70.4, | |
"WinoGrande": 63.5, | |
"PiQA": 59.1, | |
"CommonsenseQA": 63.7, | |
"Race": 66.2, | |
"MedMCQA": 49.1, | |
"OpenkookQA": 54.0 | |
}, | |
{ | |
"model": "Mistral Large", | |
"Average": 61.45, | |
"MMLU": 67.8, | |
"WinoGrande": 56.8, | |
"PiQA": 61.2, | |
"CommonsenseQA": 55.4, | |
"Race": 70.1, | |
"MedMCQA": 43.4, | |
"OpenkookQA": 58.7 | |
}, | |
{ | |
"model": "GPT-3.5", | |
"Average": 59.06, | |
"MMLU": 65.4, | |
"WinoGrande": 54.6, | |
"PiQA": 54.9, | |
"CommonsenseQA": 67.9, | |
"Race": 60.1, | |
"MedMCQA": 41.4, | |
"OpenkookQA": 49.9 | |
}, | |
{ | |
"model": "Gemini Pro", | |
"Average": 54.45, | |
"MMLU": 57.7, | |
"WinoGrande": 56.4, | |
"PiQA": 47.7, | |
"CommonsenseQA": 50.6, | |
"Race": 61.0, | |
"MedMCQA": 37.5, | |
"OpenkookQA": 52.5 | |
}, | |
{ | |
"model": "Llama3-70b-instruct", | |
"Average": 54.06, | |
"MMLU": 64.67, | |
"WinoGrande": 57.14, | |
"PiQA": 43.1, | |
"CommonsenseQA": 55.49, | |
"Race": 58.21, | |
"MedMCQA": 41.67, | |
"OpenkookQA": 41.93 | |
} | |
] |