AlekseyKorshuk commited on
Commit
d255b1a
·
1 Parent(s): 7495459

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/pharaoh")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3kao1xf5/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on PHARAOH's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/xsxegmjq) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/xsxegmjq/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/pharaoh")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/jefxst5w/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on PHARAOH's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1fqlqxjo) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1fqlqxjo/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.11.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.11.3",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.7224986553192139, "eval_runtime": 22.1306, "eval_samples_per_second": 22.548, "eval_steps_per_second": 2.847, "epoch": 2.0}
 
1
+ {"eval_loss": 1.6883338689804077, "eval_runtime": 19.6397, "eval_samples_per_second": 22.607, "eval_steps_per_second": 2.851, "epoch": 2.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a2b4c5319db04b9872d2055b5b79d1b4807f1c3915b4626047165c4d8e7d4ec
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484cb86e3f52479a236c203dabe5e4e5f4963b14df3281ba549a73b1e7a9af01
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26bbacfce71425f128a87fdb7029463499be8bc995eb14eb839740e92b47317a
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63018a2ad54b6c6a33ce78abf8194353639485350842ab974fa3c1670b92825a
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bff62d0348d173b28829e07fec717386e9eef99e506d12c023aa35c55239ae7d
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d21702f9a805941e1ce2a37d267084c86a60112567810eb89dcaeb07c9fc68a5
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9da0f194b462662f09bef1ea47010e6352ae99c4cb094909f982d2deeb47ae8b
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d4900f8326487fb775920085a48084a08ba78e6bcfb99648ba03dd0e37557f
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ed819f11a0f4645df58a01f50e8c26eb756310ea7f77b118bb1ec57797118f8
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da9ebf71c2208f03e5a6aa50f8cb521e72774afec2ec08cfe12ef435108ef2fb
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.7224986553192139,
3
- "best_model_checkpoint": "output/pharaoh/checkpoint-167",
4
  "epoch": 1.0,
5
- "global_step": 167,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -212,11 +212,25 @@
212
  "eval_samples_per_second": 22.454,
213
  "eval_steps_per_second": 2.835,
214
  "step": 167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  }
216
  ],
217
- "max_steps": 334,
218
  "num_train_epochs": 2,
219
- "total_flos": 174412431360000.0,
220
  "trial_name": null,
221
  "trial_params": null
222
  }
 
1
  {
2
+ "best_metric": 1.6883338689804077,
3
+ "best_model_checkpoint": "output/pharaoh/checkpoint-174",
4
  "epoch": 1.0,
5
+ "global_step": 174,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
212
  "eval_samples_per_second": 22.454,
213
  "eval_steps_per_second": 2.835,
214
  "step": 167
215
+ },
216
+ {
217
+ "epoch": 0.98,
218
+ "learning_rate": 3.5909924568597513e-06,
219
+ "loss": 1.4675,
220
+ "step": 170
221
+ },
222
+ {
223
+ "epoch": 1.0,
224
+ "eval_loss": 1.6883338689804077,
225
+ "eval_runtime": 18.8954,
226
+ "eval_samples_per_second": 23.498,
227
+ "eval_steps_per_second": 2.964,
228
+ "step": 174
229
  }
230
  ],
231
+ "max_steps": 348,
232
  "num_train_epochs": 2,
233
+ "total_flos": 181597962240000.0,
234
  "trial_name": null,
235
  "trial_params": null
236
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0544ae26ed18e0692923459bb51b32637aeec0f8caa1502a1b6bf0910ecd6d
3
  size 2863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5f224f4b34842037ca5146b3d9ac77172108df521b4d2b1e99dad44209ed521
3
  size 2863