paligemmatry3000vol2 / trainer_state.json
mahmoudkamal105's picture
Upload folder using huggingface_hub
d1f8cc2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9966329966329966,
"eval_steps": 100,
"global_step": 185,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.026936026936026935,
"grad_norm": 67.7872314453125,
"learning_rate": 1.9976687691905394e-05,
"loss": 6.0971,
"num_input_tokens_seen": 82560,
"step": 5
},
{
"epoch": 0.05387205387205387,
"grad_norm": 1003.2200317382812,
"learning_rate": 1.988216769442353e-05,
"loss": 5.282,
"num_input_tokens_seen": 165120,
"step": 10
},
{
"epoch": 0.08080808080808081,
"grad_norm": 47.967403411865234,
"learning_rate": 1.9715670893979416e-05,
"loss": 4.301,
"num_input_tokens_seen": 247680,
"step": 15
},
{
"epoch": 0.10774410774410774,
"grad_norm": 66.77752685546875,
"learning_rate": 1.947840997060136e-05,
"loss": 2.5698,
"num_input_tokens_seen": 330240,
"step": 20
},
{
"epoch": 0.13468013468013468,
"grad_norm": 21.431270599365234,
"learning_rate": 1.917211301505453e-05,
"loss": 1.0565,
"num_input_tokens_seen": 412800,
"step": 25
},
{
"epoch": 0.16161616161616163,
"grad_norm": 5.1596455574035645,
"learning_rate": 1.879901094228584e-05,
"loss": 0.2428,
"num_input_tokens_seen": 495360,
"step": 30
},
{
"epoch": 0.18855218855218855,
"grad_norm": 2.640035390853882,
"learning_rate": 1.836182124254711e-05,
"loss": 0.2839,
"num_input_tokens_seen": 577920,
"step": 35
},
{
"epoch": 0.21548821548821548,
"grad_norm": 2.514108896255493,
"learning_rate": 1.7863728188545326e-05,
"loss": 0.0107,
"num_input_tokens_seen": 660480,
"step": 40
},
{
"epoch": 0.24242424242424243,
"grad_norm": 0.09124995023012161,
"learning_rate": 1.730835964278124e-05,
"loss": 0.0023,
"num_input_tokens_seen": 743040,
"step": 45
},
{
"epoch": 0.26936026936026936,
"grad_norm": 11.549006462097168,
"learning_rate": 1.6699760634000166e-05,
"loss": 0.2308,
"num_input_tokens_seen": 825600,
"step": 50
},
{
"epoch": 0.2962962962962963,
"grad_norm": 1.6471961736679077,
"learning_rate": 1.6042363895210948e-05,
"loss": 0.0011,
"num_input_tokens_seen": 908160,
"step": 55
},
{
"epoch": 0.32323232323232326,
"grad_norm": 0.02977609820663929,
"learning_rate": 1.5340957577859605e-05,
"loss": 0.0005,
"num_input_tokens_seen": 990720,
"step": 60
},
{
"epoch": 0.3501683501683502,
"grad_norm": 0.17247651517391205,
"learning_rate": 1.4600650377311523e-05,
"loss": 0.0012,
"num_input_tokens_seen": 1073280,
"step": 65
},
{
"epoch": 0.3771043771043771,
"grad_norm": 71.56356811523438,
"learning_rate": 1.3826834323650899e-05,
"loss": 0.6015,
"num_input_tokens_seen": 1155840,
"step": 70
},
{
"epoch": 0.40404040404040403,
"grad_norm": 0.016915204003453255,
"learning_rate": 1.302514550881076e-05,
"loss": 0.0003,
"num_input_tokens_seen": 1238400,
"step": 75
},
{
"epoch": 0.43097643097643096,
"grad_norm": 0.008289608173072338,
"learning_rate": 1.2201423036077657e-05,
"loss": 0.0002,
"num_input_tokens_seen": 1320960,
"step": 80
},
{
"epoch": 0.45791245791245794,
"grad_norm": 0.01277772057801485,
"learning_rate": 1.1361666490962468e-05,
"loss": 0.0025,
"num_input_tokens_seen": 1403520,
"step": 85
},
{
"epoch": 0.48484848484848486,
"grad_norm": 0.006596778519451618,
"learning_rate": 1.0511992243198335e-05,
"loss": 0.0003,
"num_input_tokens_seen": 1486080,
"step": 90
},
{
"epoch": 0.5117845117845118,
"grad_norm": 0.020030811429023743,
"learning_rate": 9.658588898140322e-06,
"loss": 0.046,
"num_input_tokens_seen": 1568640,
"step": 95
},
{
"epoch": 0.5387205387205387,
"grad_norm": 0.08077715337276459,
"learning_rate": 8.807672222036692e-06,
"loss": 0.0014,
"num_input_tokens_seen": 1651200,
"step": 100
},
{
"epoch": 0.5387205387205387,
"eval_loss": 0.00024870518245734274,
"eval_runtime": 7.5641,
"eval_samples_per_second": 3.966,
"eval_steps_per_second": 1.983,
"num_input_tokens_seen": 1651200,
"step": 100
},
{
"epoch": 0.5656565656565656,
"grad_norm": 0.011016723699867725,
"learning_rate": 7.965439869473664e-06,
"loss": 0.0007,
"num_input_tokens_seen": 1733760,
"step": 105
},
{
"epoch": 0.5925925925925926,
"grad_norm": 0.23388978838920593,
"learning_rate": 7.1380262427365885e-06,
"loss": 0.0005,
"num_input_tokens_seen": 1816320,
"step": 110
},
{
"epoch": 0.6195286195286195,
"grad_norm": 0.07084622979164124,
"learning_rate": 6.331457811869437e-06,
"loss": 0.0002,
"num_input_tokens_seen": 1898880,
"step": 115
},
{
"epoch": 0.6464646464646465,
"grad_norm": 0.035638194531202316,
"learning_rate": 5.55160922085916e-06,
"loss": 0.0002,
"num_input_tokens_seen": 1981440,
"step": 120
},
{
"epoch": 0.6734006734006734,
"grad_norm": 0.008004716597497463,
"learning_rate": 4.804160499645667e-06,
"loss": 0.0001,
"num_input_tokens_seen": 2064000,
"step": 125
},
{
"epoch": 0.7003367003367004,
"grad_norm": 0.008612624369561672,
"learning_rate": 4.094555693603891e-06,
"loss": 0.0013,
"num_input_tokens_seen": 2146560,
"step": 130
},
{
"epoch": 0.7272727272727273,
"grad_norm": 0.03172842785716057,
"learning_rate": 3.4279632118202744e-06,
"loss": 0.0001,
"num_input_tokens_seen": 2229120,
"step": 135
},
{
"epoch": 0.7542087542087542,
"grad_norm": 0.004970299545675516,
"learning_rate": 2.809238182967092e-06,
"loss": 0.0002,
"num_input_tokens_seen": 2311680,
"step": 140
},
{
"epoch": 0.7811447811447811,
"grad_norm": 0.1790950447320938,
"learning_rate": 2.2428870929558012e-06,
"loss": 0.0002,
"num_input_tokens_seen": 2394240,
"step": 145
},
{
"epoch": 0.8080808080808081,
"grad_norm": 40.12051773071289,
"learning_rate": 1.7330349619311415e-06,
"loss": 0.0002,
"num_input_tokens_seen": 2476800,
"step": 150
},
{
"epoch": 0.835016835016835,
"grad_norm": 0.004456690046936274,
"learning_rate": 1.2833952996724864e-06,
"loss": 0.0743,
"num_input_tokens_seen": 2559360,
"step": 155
},
{
"epoch": 0.8619528619528619,
"grad_norm": 0.046330228447914124,
"learning_rate": 8.972430582323788e-07,
"loss": 0.0001,
"num_input_tokens_seen": 2641920,
"step": 160
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.015978099778294563,
"learning_rate": 5.77390778811796e-07,
"loss": 0.0001,
"num_input_tokens_seen": 2724480,
"step": 165
},
{
"epoch": 0.9158249158249159,
"grad_norm": 0.014428800903260708,
"learning_rate": 3.261681066064859e-07,
"loss": 0.0002,
"num_input_tokens_seen": 2807040,
"step": 170
},
{
"epoch": 0.9427609427609428,
"grad_norm": 0.03189552202820778,
"learning_rate": 1.4540482282803136e-07,
"loss": 0.0005,
"num_input_tokens_seen": 2889600,
"step": 175
},
{
"epoch": 0.9696969696969697,
"grad_norm": 0.008983040228486061,
"learning_rate": 3.641751748600042e-08,
"loss": 0.0001,
"num_input_tokens_seen": 2972160,
"step": 180
},
{
"epoch": 0.9966329966329966,
"grad_norm": 0.006297091022133827,
"learning_rate": 0.0,
"loss": 0.2164,
"num_input_tokens_seen": 3054720,
"step": 185
},
{
"epoch": 0.9966329966329966,
"num_input_tokens_seen": 3054720,
"step": 185,
"total_flos": 4.410158569555968e+16,
"train_loss": 0.5683108514618456,
"train_runtime": 2363.4699,
"train_samples_per_second": 1.257,
"train_steps_per_second": 0.078
}
],
"logging_steps": 5,
"max_steps": 185,
"num_input_tokens_seen": 3054720,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.410158569555968e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}