[ { "model": "OPT (1.3B)", "Average": 7.84, "MMLU": 7.4, "WinoGrande": 12.47, "PiQA": 4.45, "CommonsenseQA": 7.61, "Race": 13.61, "MedMCQA": 1.25, "OpenkookQA": 4.48 }, { "model": "SlimPajama", "Average": 9.54, "MMLU": 9.22, "WinoGrande": 14.76, "PiQA": 5.32, "CommonsenseQA": 9.01, "Race": 16.19, "MedMCQA": 1.68, "OpenkookQA": 5.7 }, { "model": "OLMo (1B)", "Average": 8.8, "MMLU": 8.54, "WinoGrande": 6.16, "PiQA": 8.05, "CommonsenseQA": 13.1, "Race": 13.61, "MedMCQA": 2.1, "OpenkookQA": 6.11 }, { "model": "GPT-Neo (1.3B)", "Average": 7.38, "MMLU": 6.94, "WinoGrande": 10.81, "PiQA": 4.31, "CommonsenseQA": 6.34, "Race": 13.75, "MedMCQA": 2.63, "OpenkookQA": 4.89 }, { "model": "Cerebras-GPT (1.3B)", "Average": 4.84, "MMLU": 5.37, "WinoGrande": 9.31, "PiQA": 2.16, "CommonsenseQA": 6.2, "Race": 6.9, "MedMCQA": 1.04, "OpenkookQA": 3.46 }, { "model": "RedPajama (1B)", "Average": 9.01, "MMLU": 9.21, "WinoGrande": 16.97, "PiQA": 1.39, "CommonsenseQA": 11.41, "Race": 14.35, "MedMCQA": 1.86, "OpenkookQA": 3.87 }, { "model": "Pythia (1.4B)", "Average": 8.73, "MMLU": 9.66, "WinoGrande": 11.52, "PiQA": 4.17, "CommonsenseQA": 9.01, "Race": 12.76, "MedMCQA": 3.19, "OpenkookQA": 5.3 }, { "model": "TinyLLama (1.1B)", "Average": 8.39, "MMLU": 8.94, "WinoGrande": 12.23, "PiQA": 3.59, "CommonsenseQA": 6.06, "Race": 16.7, "MedMCQA": 2.07, "OpenkookQA": 4.68 }, { "model": "OELM (1B)", "Average": 8.99, "MMLU": 9.03, "WinoGrande": 10.18, "PiQA": 9.05, "CommonsenseQA": 7.75, "Race": 12.78, "MedMCQA": 2.5, "OpenkookQA": 6.31 }, { "model": "Phi-3-mini-128k-instruct (3.8B)", "Average": 39.73, "MMLU": 36.97, "WinoGrande": 46.88, "PiQA": 32.04, "CommonsenseQA": 49.15, "Race": 37.81, "MedMCQA": 22.61, "OpenkookQA": 33.6 }, { "model": "Gemma (2B)", "Average": 17.37, "MMLU": 17.52, "WinoGrande": 22.68, "PiQA": 15.09, "CommonsenseQA": 27.46, "Race": 14.32, "MedMCQA": 4.57, "OpenkookQA": 14.26 }, { "model": "Qwen (1.8B)", "Average": 21.61, "MMLU": 10.0, "WinoGrande": 40.97, "PiQA": 15.52, "CommonsenseQA": 31.13, "Race": 34.91, "MedMCQA": 4.7, "OpenkookQA": 20.37 } ]