open_dutch_llm_leaderboard

Sleeping

Bram Vanroy commited on Dec 6, 2023

Commit

d081f4d

1 Parent(s): d64c180

rm llama/bloom evaluations

Files changed (10) hide show

evals/arc/arc_nl-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_nl": {
-      "acc": 0.1881950384944397,
-      "acc_stderr": 0.011436905010368727,
-      "acc_norm": 0.2309666381522669,
-      "acc_norm_stderr": 0.012331780770152612
-    }
-  },
-  "versions": {
-    "arc_nl": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/arc/arc_nl-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "arc_nl": {
-      "acc": 0.32677502138579984,
-      "acc_stderr": 0.013724076021999824,
-      "acc_norm": 0.3361847733105218,
-      "acc_norm_stderr": 0.013822646555385164
-    }
-  },
-  "versions": {
-    "arc_nl": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/hellaswag/hellaswag_nl_bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "hellaswag_nl": {
-      "acc": 0.28667026443604965,
-      "acc_stderr": 0.004698261813459453,
-      "acc_norm": 0.3172153264975715,
-      "acc_norm_stderr": 0.004835258421184045
-    }
-  },
-  "versions": {
-    "hellaswag_nl": 1
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": "1",
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/hellaswag/hellaswag_nl_llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "hellaswag_nl": {
-      "acc": 0.38117850205050724,
-      "acc_stderr": 0.0050457320519523,
-      "acc_norm": 0.48748111374919056,
-      "acc_norm_stderr": 0.00519291390537233
-    }
-  },
-  "versions": {
-    "hellaswag_nl": 1
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": "1",
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/mmlu/mmlu_nl-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "mmlu_nl": {
-      "acc": 0.25931547393185095,
-      "acc_stderr": 0.0038180275621108187,
-      "acc_norm": 0.2749487743796008,
-      "acc_norm_stderr": 0.003889720954246996
-    }
-  },
-  "versions": {
-    "mmlu_nl": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/mmlu/mmlu_nl-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "mmlu_nl": {
-      "acc": 0.3053046975791151,
-      "acc_stderr": 0.004012103530956046,
-      "acc_norm": 0.2983987250512256,
-      "acc_norm_stderr": 0.003986133809323066
-    }
-  },
-  "versions": {
-    "mmlu_nl": 0
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/{dutch_models.json → models.json} RENAMED Viewed

File without changes

evals/truthfulqa/truthfulqa_nl-bloom-7b1.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "truthfulqa_nl": {
-      "mc1": 0.25477707006369427,
-      "mc1_stderr": 0.01556199397314563,
-      "mc2": 0.42677675918475044,
-      "mc2_stderr": 0.016186878668566846
-    }
-  },
-  "versions": {
-    "truthfulqa_nl": 1
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=bigscience/bloom-7b1",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

evals/truthfulqa/truthfulqa_nl-llama-7B.json DELETED Viewed

@@ -1,23 +0,0 @@
-{
-  "results": {
-    "truthfulqa_nl": {
-      "mc1": 0.24331210191082803,
-      "mc1_stderr": 0.015324355488601135,
-      "mc2": 0.40023342153314656,
-      "mc2_stderr": 0.014679036703865582
-    }
-  },
-  "versions": {
-    "truthfulqa_nl": 1
-  },
-  "config": {
-    "model": "hf-auto",
-    "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
-    "batch_size": 1,
-    "device": "cuda",
-    "no_cache": false,
-    "limit": null,
-    "bootstrap_iters": 100000,
-    "description_dict": {}
-  }
-}

generate_overview_json.py ADDED Viewed

File without changes