ardaspear commited on
Commit
7022b61
·
verified ·
1 Parent(s): c10cd8a

Training in progress, step 5

Browse files
adapter_config.json CHANGED
@@ -3,30 +3,30 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "echarlaix/tiny-random-mistral",
5
  "bias": "none",
6
- "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
  "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 128,
14
- "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 64,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
- "up_proj",
25
- "gate_proj",
26
- "down_proj",
27
  "v_proj",
 
 
 
28
  "o_proj",
29
- "q_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "echarlaix/tiny-random-mistral",
5
  "bias": "none",
6
+ "fan_in_fan_out": null,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
  "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 16,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
23
  "v_proj",
24
+ "q_proj",
25
+ "gate_proj",
26
+ "up_proj",
27
  "o_proj",
28
+ "k_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29c7abdfd0b443cc17d2032f5c63d1b661db3c628a5f0971fb6829e71427615b
3
- size 224224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39705e634a87013aa42d1d42545bec2297856ef2b320e75834c850f84ee20227
3
+ size 58680
config.json CHANGED
@@ -15,12 +15,27 @@
15
  "initializer_range": 0.02,
16
  "intermediate_size": 37,
17
  "is_decoder": true,
18
- "max_position_embeddings": 1024,
19
  "model_type": "mistral",
20
  "num_attention_heads": 4,
21
  "num_hidden_layers": 2,
22
  "num_key_value_heads": 2,
23
  "pad_token_id": 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "rms_norm_eps": 1e-06,
25
  "rope_theta": 10000.0,
26
  "sliding_window": 4096,
 
15
  "initializer_range": 0.02,
16
  "intermediate_size": 37,
17
  "is_decoder": true,
18
+ "max_position_embeddings": 4056,
19
  "model_type": "mistral",
20
  "num_attention_heads": 4,
21
  "num_hidden_layers": 2,
22
  "num_key_value_heads": 2,
23
  "pad_token_id": 0,
24
+ "quantization_config": {
25
+ "_load_in_4bit": true,
26
+ "_load_in_8bit": false,
27
+ "bnb_4bit_compute_dtype": "float32",
28
+ "bnb_4bit_quant_storage": "uint8",
29
+ "bnb_4bit_quant_type": "fp4",
30
+ "bnb_4bit_use_double_quant": false,
31
+ "llm_int8_enable_fp32_cpu_offload": false,
32
+ "llm_int8_has_fp16_weight": false,
33
+ "llm_int8_skip_modules": null,
34
+ "llm_int8_threshold": 6.0,
35
+ "load_in_4bit": true,
36
+ "load_in_8bit": false,
37
+ "quant_method": "bitsandbytes"
38
+ },
39
  "rms_norm_eps": 1e-06,
40
  "rope_theta": 10000.0,
41
  "sliding_window": 4096,
tokenizer_config.json CHANGED
@@ -30,7 +30,7 @@
30
  },
31
  "additional_special_tokens": [],
32
  "bos_token": "<s>",
33
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
34
  "clean_up_tokenization_spaces": false,
35
  "eos_token": "</s>",
36
  "legacy": true,
 
30
  },
31
  "additional_special_tokens": [],
32
  "bos_token": "<s>",
33
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
34
  "clean_up_tokenization_spaces": false,
35
  "eos_token": "</s>",
36
  "legacy": true,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b558cb8d9d555d434797bb3f54570d655e1e6cbcec2de08ae0026732eb1f764c
3
- size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3938a5457f7b26d5890f6faa39ab88fd0ac91d595f45dcbd0c228f43e39583ed
3
+ size 6584