oliverhoffmann's picture
Training in progress, step 4134
c45e456 verified
raw
history blame
6.03 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 13.0,
"eval_steps": 500,
"global_step": 4134,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9968553459119497,
"grad_norm": 1.7784031629562378,
"learning_rate": 1.8466376390904693e-05,
"loss": 1.7687,
"step": 317
},
{
"epoch": 1.0,
"eval_accuracy": 0.67,
"eval_loss": 0.9660505652427673,
"eval_runtime": 1.6131,
"eval_samples_per_second": 1921.733,
"eval_steps_per_second": 40.294,
"step": 318
},
{
"epoch": 1.9937106918238994,
"grad_norm": 1.4204388856887817,
"learning_rate": 1.6932752781809388e-05,
"loss": 0.6811,
"step": 634
},
{
"epoch": 2.0,
"eval_accuracy": 0.83,
"eval_loss": 0.2671310305595398,
"eval_runtime": 1.5805,
"eval_samples_per_second": 1961.395,
"eval_steps_per_second": 41.126,
"step": 636
},
{
"epoch": 2.990566037735849,
"grad_norm": 1.0138155221939087,
"learning_rate": 1.539912917271408e-05,
"loss": 0.2272,
"step": 951
},
{
"epoch": 3.0,
"eval_accuracy": 0.9067741935483871,
"eval_loss": 0.11616384238004684,
"eval_runtime": 1.5765,
"eval_samples_per_second": 1966.361,
"eval_steps_per_second": 41.23,
"step": 954
},
{
"epoch": 3.9874213836477987,
"grad_norm": 0.9868770837783813,
"learning_rate": 1.3865505563618772e-05,
"loss": 0.1117,
"step": 1268
},
{
"epoch": 4.0,
"eval_accuracy": 0.9245161290322581,
"eval_loss": 0.07967381924390793,
"eval_runtime": 1.6048,
"eval_samples_per_second": 1931.717,
"eval_steps_per_second": 40.504,
"step": 1272
},
{
"epoch": 4.984276729559748,
"grad_norm": 0.5568020939826965,
"learning_rate": 1.2331881954523466e-05,
"loss": 0.0762,
"step": 1585
},
{
"epoch": 5.0,
"eval_accuracy": 0.9332258064516129,
"eval_loss": 0.0637175664305687,
"eval_runtime": 1.6245,
"eval_samples_per_second": 1908.268,
"eval_steps_per_second": 40.012,
"step": 1590
},
{
"epoch": 5.981132075471698,
"grad_norm": 0.9021977782249451,
"learning_rate": 1.0798258345428159e-05,
"loss": 0.0609,
"step": 1902
},
{
"epoch": 6.0,
"eval_accuracy": 0.9341935483870968,
"eval_loss": 0.05572959780693054,
"eval_runtime": 1.5863,
"eval_samples_per_second": 1954.213,
"eval_steps_per_second": 40.975,
"step": 1908
},
{
"epoch": 6.977987421383648,
"grad_norm": 0.6561667323112488,
"learning_rate": 9.26463473633285e-06,
"loss": 0.0529,
"step": 2219
},
{
"epoch": 7.0,
"eval_accuracy": 0.9380645161290323,
"eval_loss": 0.05086366832256317,
"eval_runtime": 1.5942,
"eval_samples_per_second": 1944.608,
"eval_steps_per_second": 40.774,
"step": 2226
},
{
"epoch": 7.9748427672955975,
"grad_norm": 0.39448273181915283,
"learning_rate": 7.731011127237543e-06,
"loss": 0.0471,
"step": 2536
},
{
"epoch": 8.0,
"eval_accuracy": 0.9390322580645162,
"eval_loss": 0.04851401969790459,
"eval_runtime": 1.5838,
"eval_samples_per_second": 1957.35,
"eval_steps_per_second": 41.041,
"step": 2544
},
{
"epoch": 8.971698113207546,
"grad_norm": 0.562044620513916,
"learning_rate": 6.197387518142236e-06,
"loss": 0.0439,
"step": 2853
},
{
"epoch": 9.0,
"eval_accuracy": 0.9345161290322581,
"eval_loss": 0.04487784206867218,
"eval_runtime": 1.5718,
"eval_samples_per_second": 1972.247,
"eval_steps_per_second": 41.354,
"step": 2862
},
{
"epoch": 9.968553459119496,
"grad_norm": 0.3506462275981903,
"learning_rate": 4.6637639090469286e-06,
"loss": 0.0413,
"step": 3170
},
{
"epoch": 10.0,
"eval_accuracy": 0.9396774193548387,
"eval_loss": 0.043323710560798645,
"eval_runtime": 1.5787,
"eval_samples_per_second": 1963.58,
"eval_steps_per_second": 41.172,
"step": 3180
},
{
"epoch": 10.965408805031446,
"grad_norm": 0.33104729652404785,
"learning_rate": 3.1301402999516213e-06,
"loss": 0.0394,
"step": 3487
},
{
"epoch": 11.0,
"eval_accuracy": 0.9396774193548387,
"eval_loss": 0.04240846261382103,
"eval_runtime": 1.5874,
"eval_samples_per_second": 1952.868,
"eval_steps_per_second": 40.947,
"step": 3498
},
{
"epoch": 11.962264150943396,
"grad_norm": 0.37558791041374207,
"learning_rate": 1.5965166908563137e-06,
"loss": 0.0382,
"step": 3804
},
{
"epoch": 12.0,
"eval_accuracy": 0.9403225806451613,
"eval_loss": 0.04175195470452309,
"eval_runtime": 1.593,
"eval_samples_per_second": 1945.992,
"eval_steps_per_second": 40.803,
"step": 3816
},
{
"epoch": 12.959119496855346,
"grad_norm": 0.4860643446445465,
"learning_rate": 6.289308176100629e-08,
"loss": 0.0376,
"step": 4121
}
],
"logging_steps": 317,
"max_steps": 4134,
"num_input_tokens_seen": 0,
"num_train_epochs": 13,
"save_steps": 1000000000.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1072259767520340.0,
"train_batch_size": 48,
"trial_name": null,
"trial_params": {
"alpha": 0.3858447457522741,
"num_train_epochs": 13,
"temperature": 1
}
}