Bram Vanroy commited on
Commit
828458d
·
1 Parent(s): d081f4d

add data collection script

Browse files
Files changed (3) hide show
  1. content.py +1 -0
  2. evals/models.json +68 -4
  3. generate_overview_json.py +40 -0
content.py CHANGED
@@ -13,6 +13,7 @@ We test the models on the following benchmarks **for the Dutch version only!!**,
13
 
14
  I do not maintain those datasets, I only run benchmarks and add the results to this space. For questions regarding the test sets or running them yourself, see [the original Github repository](https://github.com/laiviet/lm-evaluation-harness).
15
 
 
16
  """
17
 
18
  CREDIT = f"""
 
13
 
14
  I do not maintain those datasets, I only run benchmarks and add the results to this space. For questions regarding the test sets or running them yourself, see [the original Github repository](https://github.com/laiviet/lm-evaluation-harness).
15
 
16
+ All models are benchmarked in 8-bit precision.
17
  """
18
 
19
  CREDIT = f"""
evals/models.json CHANGED
@@ -1,6 +1,70 @@
1
  {
2
- "gpt-neo-1.3B-dutch": "PT",
3
- "gpt-neo-125M-dutch": "PT",
4
- "gpt2-large-dutch": "PT",
5
- "gpt2-medium-dutch": "PT"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bloom-7b1": {
3
+ "model_name": "pretrained=bigscience/bloom-7b1",
4
+ "args": "pretrained=bigscience/bloom-7b1"
5
+ },
6
+ "gpt-neo-1.3b-dutch": {
7
+ "model_name": "yhavinga/gpt-neo-1.3B-dutch",
8
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
9
+ },
10
+ "gpt-neo-125m-dutch": {
11
+ "model_name": "yhavinga/gpt-neo-125M-dutch",
12
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
13
+ },
14
+ "gpt2-large-dutch": {
15
+ "model_name": "yhavinga/gpt2-large-dutch",
16
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
17
+ },
18
+ "gpt2-medium-dutch": {
19
+ "model_name": "yhavinga/gpt2-medium-dutch",
20
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
21
+ },
22
+ "llama-2-13b-chat-dutch": {
23
+ "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
24
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
25
+ },
26
+ "llama-2-13b-chat-hf": {
27
+ "model_name": "meta-llama/Llama-2-13b-chat-hf",
28
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
29
+ },
30
+ "llama-2-13b-hf": {
31
+ "model_name": "meta-llama/Llama-2-13b-hf",
32
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
33
+ },
34
+ "llama-2-7b-chat-hf": {
35
+ "model_name": "meta-llama/Llama-2-7b-chat-hf",
36
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
37
+ },
38
+ "llama-2-7b-hf": {
39
+ "model_name": "meta-llama/Llama-2-7b-hf",
40
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
41
+ },
42
+ "llama-7b": {
43
+ "model_name": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
44
+ "args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B"
45
+ },
46
+ "llama2-13b-ft-mc4_nl_cleaned_tiny": {
47
+ "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
48
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
49
+ },
50
+ "mistral-7b-v0.1": {
51
+ "model_name": "mistralai/Mistral-7B-v0.1",
52
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
53
+ },
54
+ "neural-chat-7b-v3-1": {
55
+ "model_name": "Intel/neural-chat-7b-v3-1",
56
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
57
+ },
58
+ "orca-2-13b": {
59
+ "model_name": "microsoft/Orca-2-13b",
60
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
61
+ },
62
+ "orca-2-7b": {
63
+ "model_name": "microsoft/Orca-2-7b",
64
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
65
+ },
66
+ "zephyr-7b-beta": {
67
+ "model_name": "HuggingFaceH4/zephyr-7b-beta",
68
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
69
+ }
70
  }
generate_overview_json.py CHANGED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import json
3
+ from pprint import pprint
4
+
5
+
6
+ def main():
7
+ results = {}
8
+ for pfin in Path(__file__).parent.joinpath("evals").rglob("*.json"):
9
+ if pfin.stem == "models":
10
+ continue
11
+ short_name = pfin.stem.split("_")[2]
12
+ if short_name not in results:
13
+ results[short_name] = {}
14
+
15
+ data = json.loads(pfin.read_text(encoding="utf-8"))
16
+ if "config" not in data:
17
+ continue
18
+
19
+ config = data["config"]
20
+ if "model_args" not in config:
21
+ continue
22
+
23
+ model_args = dict(params.split("=") for params in config["model_args"].split(","))
24
+ if "pretrained" not in model_args:
25
+ continue
26
+
27
+ results[short_name]["model_name"] = model_args["pretrained"]
28
+ results[short_name]["compute_dtype"] = model_args.get("dtype", None)
29
+ results[short_name]["quantization"] = None
30
+ if "load_in_8bit" in model_args:
31
+ results[short_name]["quantization"] = "8-bit"
32
+ elif "load_in_4bit" in model_args:
33
+ results[short_name]["quantization"] = "4-bit"
34
+
35
+
36
+ pprint(results)
37
+
38
+
39
+ if __name__ == '__main__':
40
+ main()