prxy5604's picture
Training in progress, step 50, checkpoint
53d5345 verified
raw
history blame
9.93 kB
{
"best_metric": 1.043031096458435,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.16339869281045752,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0032679738562091504,
"grad_norm": 0.24469198286533356,
"learning_rate": 1e-05,
"loss": 1.4387,
"step": 1
},
{
"epoch": 0.0032679738562091504,
"eval_loss": 1.4474108219146729,
"eval_runtime": 12.294,
"eval_samples_per_second": 41.89,
"eval_steps_per_second": 10.493,
"step": 1
},
{
"epoch": 0.006535947712418301,
"grad_norm": 0.2529772222042084,
"learning_rate": 2e-05,
"loss": 1.4012,
"step": 2
},
{
"epoch": 0.00980392156862745,
"grad_norm": 0.26472678780555725,
"learning_rate": 3e-05,
"loss": 1.3954,
"step": 3
},
{
"epoch": 0.013071895424836602,
"grad_norm": 0.2510676681995392,
"learning_rate": 4e-05,
"loss": 1.3832,
"step": 4
},
{
"epoch": 0.016339869281045753,
"grad_norm": 0.2522083818912506,
"learning_rate": 5e-05,
"loss": 1.4,
"step": 5
},
{
"epoch": 0.0196078431372549,
"grad_norm": 0.23163741827011108,
"learning_rate": 6e-05,
"loss": 1.383,
"step": 6
},
{
"epoch": 0.02287581699346405,
"grad_norm": 0.24272429943084717,
"learning_rate": 7e-05,
"loss": 1.3874,
"step": 7
},
{
"epoch": 0.026143790849673203,
"grad_norm": 0.22741547226905823,
"learning_rate": 8e-05,
"loss": 1.3295,
"step": 8
},
{
"epoch": 0.029411764705882353,
"grad_norm": 0.21041825413703918,
"learning_rate": 9e-05,
"loss": 1.316,
"step": 9
},
{
"epoch": 0.032679738562091505,
"grad_norm": 0.19392521679401398,
"learning_rate": 0.0001,
"loss": 1.2768,
"step": 10
},
{
"epoch": 0.03594771241830065,
"grad_norm": 0.20608149468898773,
"learning_rate": 9.999316524962345e-05,
"loss": 1.2882,
"step": 11
},
{
"epoch": 0.0392156862745098,
"grad_norm": 0.22623692452907562,
"learning_rate": 9.997266286704631e-05,
"loss": 1.2364,
"step": 12
},
{
"epoch": 0.042483660130718956,
"grad_norm": 0.26282697916030884,
"learning_rate": 9.993849845741524e-05,
"loss": 1.211,
"step": 13
},
{
"epoch": 0.0457516339869281,
"grad_norm": 0.270076185464859,
"learning_rate": 9.989068136093873e-05,
"loss": 1.2835,
"step": 14
},
{
"epoch": 0.049019607843137254,
"grad_norm": 0.21397562325000763,
"learning_rate": 9.98292246503335e-05,
"loss": 1.2031,
"step": 15
},
{
"epoch": 0.05228758169934641,
"grad_norm": 0.1848316192626953,
"learning_rate": 9.975414512725057e-05,
"loss": 1.1504,
"step": 16
},
{
"epoch": 0.05555555555555555,
"grad_norm": 0.16563019156455994,
"learning_rate": 9.966546331768191e-05,
"loss": 1.1845,
"step": 17
},
{
"epoch": 0.058823529411764705,
"grad_norm": 0.17262709140777588,
"learning_rate": 9.956320346634876e-05,
"loss": 1.16,
"step": 18
},
{
"epoch": 0.06209150326797386,
"grad_norm": 0.16582638025283813,
"learning_rate": 9.944739353007344e-05,
"loss": 1.1903,
"step": 19
},
{
"epoch": 0.06535947712418301,
"grad_norm": 0.15873746573925018,
"learning_rate": 9.931806517013612e-05,
"loss": 1.1438,
"step": 20
},
{
"epoch": 0.06862745098039216,
"grad_norm": 0.17037732899188995,
"learning_rate": 9.917525374361912e-05,
"loss": 1.1243,
"step": 21
},
{
"epoch": 0.0718954248366013,
"grad_norm": 0.16108785569667816,
"learning_rate": 9.901899829374047e-05,
"loss": 1.1485,
"step": 22
},
{
"epoch": 0.07516339869281045,
"grad_norm": 0.1587645560503006,
"learning_rate": 9.884934153917997e-05,
"loss": 1.1659,
"step": 23
},
{
"epoch": 0.0784313725490196,
"grad_norm": 0.1613873392343521,
"learning_rate": 9.86663298624003e-05,
"loss": 1.1371,
"step": 24
},
{
"epoch": 0.08169934640522876,
"grad_norm": 0.14722518622875214,
"learning_rate": 9.847001329696653e-05,
"loss": 1.0705,
"step": 25
},
{
"epoch": 0.08496732026143791,
"grad_norm": 0.14173099398612976,
"learning_rate": 9.826044551386744e-05,
"loss": 1.0695,
"step": 26
},
{
"epoch": 0.08823529411764706,
"grad_norm": 0.1377929449081421,
"learning_rate": 9.803768380684242e-05,
"loss": 1.1012,
"step": 27
},
{
"epoch": 0.0915032679738562,
"grad_norm": 0.1335737556219101,
"learning_rate": 9.780178907671789e-05,
"loss": 1.076,
"step": 28
},
{
"epoch": 0.09477124183006536,
"grad_norm": 0.14067432284355164,
"learning_rate": 9.755282581475769e-05,
"loss": 1.0406,
"step": 29
},
{
"epoch": 0.09803921568627451,
"grad_norm": 0.1369902640581131,
"learning_rate": 9.729086208503174e-05,
"loss": 1.0285,
"step": 30
},
{
"epoch": 0.10130718954248366,
"grad_norm": 0.14028145372867584,
"learning_rate": 9.701596950580806e-05,
"loss": 1.0766,
"step": 31
},
{
"epoch": 0.10457516339869281,
"grad_norm": 0.1419658660888672,
"learning_rate": 9.672822322997305e-05,
"loss": 1.0549,
"step": 32
},
{
"epoch": 0.10784313725490197,
"grad_norm": 0.1403641402721405,
"learning_rate": 9.642770192448536e-05,
"loss": 1.1028,
"step": 33
},
{
"epoch": 0.1111111111111111,
"grad_norm": 0.1402372419834137,
"learning_rate": 9.611448774886924e-05,
"loss": 1.0423,
"step": 34
},
{
"epoch": 0.11437908496732026,
"grad_norm": 0.1409500241279602,
"learning_rate": 9.578866633275288e-05,
"loss": 1.0762,
"step": 35
},
{
"epoch": 0.11764705882352941,
"grad_norm": 0.14114561676979065,
"learning_rate": 9.545032675245813e-05,
"loss": 1.0346,
"step": 36
},
{
"epoch": 0.12091503267973856,
"grad_norm": 0.12877798080444336,
"learning_rate": 9.509956150664796e-05,
"loss": 1.0531,
"step": 37
},
{
"epoch": 0.12418300653594772,
"grad_norm": 0.13515013456344604,
"learning_rate": 9.473646649103818e-05,
"loss": 0.9911,
"step": 38
},
{
"epoch": 0.12745098039215685,
"grad_norm": 0.1338815987110138,
"learning_rate": 9.43611409721806e-05,
"loss": 1.048,
"step": 39
},
{
"epoch": 0.13071895424836602,
"grad_norm": 0.1321924328804016,
"learning_rate": 9.397368756032445e-05,
"loss": 1.0316,
"step": 40
},
{
"epoch": 0.13398692810457516,
"grad_norm": 0.13545972108840942,
"learning_rate": 9.357421218136386e-05,
"loss": 1.0201,
"step": 41
},
{
"epoch": 0.13725490196078433,
"grad_norm": 0.1455843299627304,
"learning_rate": 9.316282404787871e-05,
"loss": 1.0314,
"step": 42
},
{
"epoch": 0.14052287581699346,
"grad_norm": 0.14777617156505585,
"learning_rate": 9.273963562927695e-05,
"loss": 1.0812,
"step": 43
},
{
"epoch": 0.1437908496732026,
"grad_norm": 0.1442340612411499,
"learning_rate": 9.230476262104677e-05,
"loss": 1.0044,
"step": 44
},
{
"epoch": 0.14705882352941177,
"grad_norm": 0.142796590924263,
"learning_rate": 9.185832391312644e-05,
"loss": 1.0788,
"step": 45
},
{
"epoch": 0.1503267973856209,
"grad_norm": 0.14356712996959686,
"learning_rate": 9.140044155740101e-05,
"loss": 0.9699,
"step": 46
},
{
"epoch": 0.15359477124183007,
"grad_norm": 0.15727904438972473,
"learning_rate": 9.093124073433463e-05,
"loss": 1.103,
"step": 47
},
{
"epoch": 0.1568627450980392,
"grad_norm": 0.15104670822620392,
"learning_rate": 9.045084971874738e-05,
"loss": 0.991,
"step": 48
},
{
"epoch": 0.16013071895424835,
"grad_norm": 0.1587590128183365,
"learning_rate": 8.995939984474624e-05,
"loss": 0.9936,
"step": 49
},
{
"epoch": 0.16339869281045752,
"grad_norm": 0.3971143960952759,
"learning_rate": 8.945702546981969e-05,
"loss": 1.0333,
"step": 50
},
{
"epoch": 0.16339869281045752,
"eval_loss": 1.043031096458435,
"eval_runtime": 12.3483,
"eval_samples_per_second": 41.706,
"eval_steps_per_second": 10.447,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.226560015171584e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}