awsuineg commited on
Commit
ce85f03
·
verified ·
1 Parent(s): a008dc7

Model save

Browse files
adapter_config.json CHANGED
@@ -21,11 +21,11 @@
21
  "revision": null,
22
  "target_modules": [
23
  "o_proj",
24
- "q_proj",
25
  "gate_proj",
 
26
  "v_proj",
27
- "down_proj",
28
  "k_proj",
 
29
  "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
21
  "revision": null,
22
  "target_modules": [
23
  "o_proj",
 
24
  "gate_proj",
25
+ "q_proj",
26
  "v_proj",
 
27
  "k_proj",
28
+ "down_proj",
29
  "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
all_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 1.0,
3
  "total_flos": 1.1487027058043781e+19,
4
  "train_loss": 0.0,
5
- "train_runtime": 0.0169,
6
  "train_samples": 207864,
7
- "train_samples_per_second": 8184133.024,
8
- "train_steps_per_second": 341027.672
9
  }
 
2
  "epoch": 1.0,
3
  "total_flos": 1.1487027058043781e+19,
4
  "train_loss": 0.0,
5
+ "train_runtime": 0.0143,
6
  "train_samples": 207864,
7
+ "train_samples_per_second": 9715326.166,
8
+ "train_steps_per_second": 404831.526
9
  }
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "mistralai/Mistral-7B-v0.1",
4
+ "architectures": [
5
+ "MistralForCausalLM"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 32768,
16
+ "model_type": "mistral",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 32,
19
+ "num_key_value_heads": 8,
20
+ "quantization_config": {
21
+ "_load_in_4bit": true,
22
+ "_load_in_8bit": false,
23
+ "bnb_4bit_compute_dtype": "bfloat16",
24
+ "bnb_4bit_quant_storage": "uint8",
25
+ "bnb_4bit_quant_type": "nf4",
26
+ "bnb_4bit_use_double_quant": false,
27
+ "llm_int8_enable_fp32_cpu_offload": false,
28
+ "llm_int8_has_fp16_weight": false,
29
+ "llm_int8_skip_modules": null,
30
+ "llm_int8_threshold": 6.0,
31
+ "load_in_4bit": true,
32
+ "load_in_8bit": false,
33
+ "quant_method": "bitsandbytes"
34
+ },
35
+ "rms_norm_eps": 1e-05,
36
+ "rope_theta": 10000.0,
37
+ "sliding_window": 4096,
38
+ "tie_word_embeddings": false,
39
+ "torch_dtype": "bfloat16",
40
+ "transformers_version": "4.46.2",
41
+ "use_cache": true,
42
+ "vocab_size": 32000
43
+ }
runs/Nov20_07-40-49_d28f7adca72c/events.out.tfevents.1732088488.d28f7adca72c.2261562.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e2e509964aed74faa557e627c0de135c83923305e7b4690aab75bbb4d9c825f
3
+ size 6644
runs/Nov20_07-45-35_d28f7adca72c/events.out.tfevents.1732088781.d28f7adca72c.2263555.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43168300e80ab14dedc9681882351d0382d6f044c740e17338fd467d232580d3
3
+ size 6644
runs/Nov20_08-00-39_d28f7adca72c/events.out.tfevents.1732089682.d28f7adca72c.2269100.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa39bb29ac4889858038b98286c9be6cd73011a8faad911c09e08f6c816b725
3
+ size 6644
runs/Nov20_08-03-58_d28f7adca72c/events.out.tfevents.1732089882.d28f7adca72c.2270466.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb5de9fee45edbfb36d13a6b3f444acc72e9eccaa32c351e7d912a217d403a1
3
+ size 6644
runs/Nov20_08-06-30_d28f7adca72c/events.out.tfevents.1732090033.d28f7adca72c.2271569.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b03023784e0f0a70ff615754a4cc0d83db8fb3f8b44839a46bbaab0c3606eca0
3
+ size 6644
runs/Nov20_08-08-06_d28f7adca72c/events.out.tfevents.1732090139.d28f7adca72c.2272351.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f4c896a1c2e2918b1b9df3f8fb65cd626a9f4904d66f83c42e8136461d47a2b
3
+ size 6998
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 1.0,
3
  "total_flos": 1.1487027058043781e+19,
4
  "train_loss": 0.0,
5
- "train_runtime": 0.0169,
6
  "train_samples": 207864,
7
- "train_samples_per_second": 8184133.024,
8
- "train_steps_per_second": 341027.672
9
  }
 
2
  "epoch": 1.0,
3
  "total_flos": 1.1487027058043781e+19,
4
  "train_loss": 0.0,
5
+ "train_runtime": 0.0143,
6
  "train_samples": 207864,
7
+ "train_samples_per_second": 9715326.166,
8
+ "train_steps_per_second": 404831.526
9
  }
trainer_state.json CHANGED
@@ -8105,9 +8105,9 @@
8105
  "step": 5779,
8106
  "total_flos": 1.1487027058043781e+19,
8107
  "train_loss": 0.0,
8108
- "train_runtime": 0.0169,
8109
- "train_samples_per_second": 8184133.024,
8110
- "train_steps_per_second": 341027.672
8111
  }
8112
  ],
8113
  "logging_steps": 5,
 
8105
  "step": 5779,
8106
  "total_flos": 1.1487027058043781e+19,
8107
  "train_loss": 0.0,
8108
+ "train_runtime": 0.0143,
8109
+ "train_samples_per_second": 9715326.166,
8110
+ "train_steps_per_second": 404831.526
8111
  }
8112
  ],
8113
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82763ac4d1451da25c7cb359dd9ee2581f99c3d2b8db998d96375db407c0978e
3
  size 6328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:952937bb8d6cdab0cd5cdf0d6bde0283fcedf864753ae3832fde55a9debaddd9
3
  size 6328