esfrankel17 commited on
Commit
918f4cd
·
verified ·
1 Parent(s): 993a4ae

Training in progress, epoch 0

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # llama3_8b_baseline_instructskillmix
18
 
19
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 1.7085
22
 
 
16
 
17
  # llama3_8b_baseline_instructskillmix
18
 
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the PrincetonPLI/Instruct-SkillMix-SDD dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 1.7085
22
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 1.6,
3
- "eval_loss": NaN,
4
- "eval_runtime": 6.0974,
5
- "eval_samples_per_second": 16.564,
6
- "eval_steps_per_second": 0.656,
7
- "total_flos": 2.833014287331492e+17,
8
- "train_loss": 1.8229212760925293,
9
- "train_runtime": 1331.3604,
10
- "train_samples_per_second": 4.324,
11
  "train_steps_per_second": 0.002
12
  }
 
1
  {
2
  "epoch": 1.6,
3
+ "eval_loss": 1.7085474729537964,
4
+ "eval_runtime": 13.3752,
5
+ "eval_samples_per_second": 7.551,
6
+ "eval_steps_per_second": 0.299,
7
+ "total_flos": 1.9221024474136576e+16,
8
+ "train_loss": 1.8231021563212078,
9
+ "train_runtime": 1450.8496,
10
+ "train_samples_per_second": 3.968,
11
  "train_steps_per_second": 0.002
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.6,
3
- "eval_loss": NaN,
4
- "eval_runtime": 6.0974,
5
- "eval_samples_per_second": 16.564,
6
- "eval_steps_per_second": 0.656
7
  }
 
1
  {
2
  "epoch": 1.6,
3
+ "eval_loss": 1.7085474729537964,
4
+ "eval_runtime": 13.3752,
5
+ "eval_samples_per_second": 7.551,
6
+ "eval_steps_per_second": 0.299
7
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa7ddf40a53c01580113b725a0016eec9561e858e589517f9d83205265478d6b
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80e898f686f91ca0491b32e277bfca0b81304d4522d569fe7481c19983dadd6d
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d362d83c2e9ba973e98cca23893617dd1bf3577389c0af075b784abd7f877f9d
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebde098b486f203fd01e807c2b6b0a84097d7212c0411c8a88f9e9fa8c651d74
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55b0d48a0edc756a4f62d053427ec9337fc67951899617d08adfc913344f0caa
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723649201ed03575b92401a56585ed80868e56ccedd76a17987321cdadcc3692
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b229c76dc68a962cfcf861b4b833add51f486c85f418864f20b6ce72b2e7d061
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:482a33cd7718fe720f2f5641f13b3e589a85fbbe00478c2ec8bee820c7b59aff
3
  size 1168138808
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.6,
3
- "total_flos": 2.833014287331492e+17,
4
- "train_loss": 1.8229212760925293,
5
- "train_runtime": 1331.3604,
6
- "train_samples_per_second": 4.324,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "epoch": 1.6,
3
+ "total_flos": 1.9221024474136576e+16,
4
+ "train_loss": 1.8231021563212078,
5
+ "train_runtime": 1450.8496,
6
+ "train_samples_per_second": 3.968,
7
  "train_steps_per_second": 0.002
8
  }
trainer_log.jsonl CHANGED
@@ -1,3 +1,2 @@
1
- {"current_steps": 1, "total_steps": 3, "eval_loss": 1.8345922231674194, "epoch": 0.5333333333333333, "percentage": 33.33, "elapsed_time": "0:14:04", "remaining_time": "0:28:08"}
2
- {"current_steps": 3, "total_steps": 3, "eval_loss": 1.7085474729537964, "epoch": 1.6, "percentage": 100.0, "elapsed_time": "0:23:22", "remaining_time": "0:00:00"}
3
- {"current_steps": 3, "total_steps": 3, "epoch": 1.6, "percentage": 100.0, "elapsed_time": "0:24:06", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 1, "total_steps": 3, "loss": 1.9079, "learning_rate": 5e-06, "epoch": 0.5333333333333333, "percentage": 33.33, "elapsed_time": "0:05:58", "remaining_time": "0:11:56"}
2
+ {"current_steps": 1, "total_steps": 3, "eval_loss": 1.8345922231674194, "epoch": 0.5333333333333333, "percentage": 33.33, "elapsed_time": "0:11:21", "remaining_time": "0:22:42"}
 
trainer_state.json CHANGED
@@ -10,27 +10,27 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.5333333333333333,
13
- "eval_loss": NaN,
14
- "eval_runtime": 8.0544,
15
- "eval_samples_per_second": 12.54,
16
- "eval_steps_per_second": 0.497,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 1.6,
21
- "eval_loss": NaN,
22
- "eval_runtime": 6.2762,
23
- "eval_samples_per_second": 16.093,
24
- "eval_steps_per_second": 0.637,
25
  "step": 3
26
  },
27
  {
28
  "epoch": 1.6,
29
  "step": 3,
30
- "total_flos": 2.833014287331492e+17,
31
- "train_loss": 1.8229212760925293,
32
- "train_runtime": 1331.3604,
33
- "train_samples_per_second": 4.324,
34
  "train_steps_per_second": 0.002
35
  }
36
  ],
@@ -51,7 +51,7 @@
51
  "attributes": {}
52
  }
53
  },
54
- "total_flos": 2.833014287331492e+17,
55
  "train_batch_size": 4,
56
  "trial_name": null,
57
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.5333333333333333,
13
+ "eval_loss": 1.8345922231674194,
14
+ "eval_runtime": 13.5492,
15
+ "eval_samples_per_second": 7.454,
16
+ "eval_steps_per_second": 0.295,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 1.6,
21
+ "eval_loss": 1.7085474729537964,
22
+ "eval_runtime": 13.4084,
23
+ "eval_samples_per_second": 7.533,
24
+ "eval_steps_per_second": 0.298,
25
  "step": 3
26
  },
27
  {
28
  "epoch": 1.6,
29
  "step": 3,
30
+ "total_flos": 1.9221024474136576e+16,
31
+ "train_loss": 1.8231021563212078,
32
+ "train_runtime": 1450.8496,
33
+ "train_samples_per_second": 3.968,
34
  "train_steps_per_second": 0.002
35
  }
36
  ],
 
51
  "attributes": {}
52
  }
53
  },
54
+ "total_flos": 1.9221024474136576e+16,
55
  "train_batch_size": 4,
56
  "trial_name": null,
57
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c7e21ca3d6a30f2def379a5fe7f65c58e5770aff15252585f83e16399930e95
3
  size 7224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c52cb12f2cf3c127f5b7a77a532618e060b34a9652507e284e6d7580705faf
3
  size 7224
training_eval_loss.png CHANGED