Model save

Browse files

Files changed (4) hide show

README.md +2 -2
pytorch_model-00001-of-00002.bin +2 -2
pytorch_model-00002-of-00002.bin +2 -2
pytorch_model.bin.index.json +15 -15

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: MNC-LLM/Mistral-7B-NWS-u2k-eng-cot-ep4-lr1e-05
 tags:
 - generated_from_trainer
 model-index:
@@ -12,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
 # batch1_epochs4_lr1e-05_paged_adamw_32bit_cosine_length2048_warmup_0.05_max_grad1.0_grad_accu16
-This model is a fine-tuned version of [MNC-LLM/Mistral-7B-NWS-u2k-eng-cot-ep4-lr1e-05](https://huggingface.co/MNC-LLM/Mistral-7B-NWS-u2k-eng-cot-ep4-lr1e-05) on the None dataset.
 ## Model description

 ---
+base_model: allenai/tulu-2-dpo-7b
 tags:
 - generated_from_trainer
 model-index:
 # batch1_epochs4_lr1e-05_paged_adamw_32bit_cosine_length2048_warmup_0.05_max_grad1.0_grad_accu16
+This model is a fine-tuned version of [allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b) on the None dataset.
 ## Model description

pytorch_model-00001-of-00002.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7b8fc3789fea6baf58f5d72da2e19647d7414a396d0ebf00dcecbd982d5b073
-size 9943030860

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ef37ff4d66c97c6bb73bb5521e597806c657f411f38b1c52bd239ed5404ca2b
+size 9976623130

pytorch_model-00002-of-00002.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf2d671c0dae4495933e3a0483a303d0a97a033151bc76d0cc622fcd3961efe2
-size 4540536863

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd764682257415e54346b4259f6e67bb17cd77ec924e8d07ce3755975bc54998
+size 3500311811

pytorch_model.bin.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 14483464192
   },
   "weight_map": {
     "lm_head.weight": "pytorch_model-00002-of-00002.bin",
@@ -140,24 +140,24 @@
     "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
-    "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
     "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
-    "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
     "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
     "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
     "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",

 {
   "metadata": {
+    "total_size": 13476831232
   },
   "weight_map": {
     "lm_head.weight": "pytorch_model-00002-of-00002.bin",
     "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
     "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
     "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
     "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",