BEiT-RHS-NDA / trainer_state.json
Augusto777's picture
End of training
59e8472 verified
{
"best_metric": 0.8317757009345794,
"best_model_checkpoint": "BEiT-RHS-NDA\\checkpoint-272",
"epoch": 40.0,
"eval_steps": 500,
"global_step": 320,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.6851425170898438,
"eval_runtime": 2.6682,
"eval_samples_per_second": 40.103,
"eval_steps_per_second": 2.624,
"step": 8
},
{
"epoch": 1.25,
"learning_rate": 3.125e-05,
"loss": 0.6911,
"step": 10
},
{
"epoch": 2.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.6720580458641052,
"eval_runtime": 2.054,
"eval_samples_per_second": 52.093,
"eval_steps_per_second": 3.408,
"step": 16
},
{
"epoch": 2.5,
"learning_rate": 4.9342105263157894e-05,
"loss": 0.6739,
"step": 20
},
{
"epoch": 3.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.6504218578338623,
"eval_runtime": 2.4524,
"eval_samples_per_second": 43.631,
"eval_steps_per_second": 2.854,
"step": 24
},
{
"epoch": 3.75,
"learning_rate": 4.769736842105263e-05,
"loss": 0.6595,
"step": 30
},
{
"epoch": 4.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.6432350873947144,
"eval_runtime": 2.1328,
"eval_samples_per_second": 50.169,
"eval_steps_per_second": 3.282,
"step": 32
},
{
"epoch": 5.0,
"learning_rate": 4.605263157894737e-05,
"loss": 0.646,
"step": 40
},
{
"epoch": 5.0,
"eval_accuracy": 0.6822429906542056,
"eval_loss": 0.6316895484924316,
"eval_runtime": 2.4096,
"eval_samples_per_second": 44.405,
"eval_steps_per_second": 2.905,
"step": 40
},
{
"epoch": 6.0,
"eval_accuracy": 0.6915887850467289,
"eval_loss": 0.617514431476593,
"eval_runtime": 2.0335,
"eval_samples_per_second": 52.617,
"eval_steps_per_second": 3.442,
"step": 48
},
{
"epoch": 6.25,
"learning_rate": 4.440789473684211e-05,
"loss": 0.6142,
"step": 50
},
{
"epoch": 7.0,
"eval_accuracy": 0.6915887850467289,
"eval_loss": 0.6269640326499939,
"eval_runtime": 2.0515,
"eval_samples_per_second": 52.156,
"eval_steps_per_second": 3.412,
"step": 56
},
{
"epoch": 7.5,
"learning_rate": 4.2763157894736847e-05,
"loss": 0.608,
"step": 60
},
{
"epoch": 8.0,
"eval_accuracy": 0.6915887850467289,
"eval_loss": 0.6617878675460815,
"eval_runtime": 2.0447,
"eval_samples_per_second": 52.331,
"eval_steps_per_second": 3.424,
"step": 64
},
{
"epoch": 8.75,
"learning_rate": 4.111842105263158e-05,
"loss": 0.5927,
"step": 70
},
{
"epoch": 9.0,
"eval_accuracy": 0.6915887850467289,
"eval_loss": 0.5347260236740112,
"eval_runtime": 2.139,
"eval_samples_per_second": 50.023,
"eval_steps_per_second": 3.273,
"step": 72
},
{
"epoch": 10.0,
"learning_rate": 3.9473684210526316e-05,
"loss": 0.5333,
"step": 80
},
{
"epoch": 10.0,
"eval_accuracy": 0.6448598130841121,
"eval_loss": 0.5743899941444397,
"eval_runtime": 2.1225,
"eval_samples_per_second": 50.412,
"eval_steps_per_second": 3.298,
"step": 80
},
{
"epoch": 11.0,
"eval_accuracy": 0.7476635514018691,
"eval_loss": 0.4974236786365509,
"eval_runtime": 2.059,
"eval_samples_per_second": 51.967,
"eval_steps_per_second": 3.4,
"step": 88
},
{
"epoch": 11.25,
"learning_rate": 3.7828947368421054e-05,
"loss": 0.4987,
"step": 90
},
{
"epoch": 12.0,
"eval_accuracy": 0.6448598130841121,
"eval_loss": 0.5970269441604614,
"eval_runtime": 2.1492,
"eval_samples_per_second": 49.787,
"eval_steps_per_second": 3.257,
"step": 96
},
{
"epoch": 12.5,
"learning_rate": 3.618421052631579e-05,
"loss": 0.5421,
"step": 100
},
{
"epoch": 13.0,
"eval_accuracy": 0.7383177570093458,
"eval_loss": 0.5137068629264832,
"eval_runtime": 2.4494,
"eval_samples_per_second": 43.684,
"eval_steps_per_second": 2.858,
"step": 104
},
{
"epoch": 13.75,
"learning_rate": 3.4539473684210524e-05,
"loss": 0.4881,
"step": 110
},
{
"epoch": 14.0,
"eval_accuracy": 0.7663551401869159,
"eval_loss": 0.47269827127456665,
"eval_runtime": 2.4181,
"eval_samples_per_second": 44.249,
"eval_steps_per_second": 2.895,
"step": 112
},
{
"epoch": 15.0,
"learning_rate": 3.289473684210527e-05,
"loss": 0.4408,
"step": 120
},
{
"epoch": 15.0,
"eval_accuracy": 0.7663551401869159,
"eval_loss": 0.5161357522010803,
"eval_runtime": 2.086,
"eval_samples_per_second": 51.295,
"eval_steps_per_second": 3.356,
"step": 120
},
{
"epoch": 16.0,
"eval_accuracy": 0.6915887850467289,
"eval_loss": 0.6732468008995056,
"eval_runtime": 2.0757,
"eval_samples_per_second": 51.548,
"eval_steps_per_second": 3.372,
"step": 128
},
{
"epoch": 16.25,
"learning_rate": 3.125e-05,
"loss": 0.4923,
"step": 130
},
{
"epoch": 17.0,
"eval_accuracy": 0.7009345794392523,
"eval_loss": 0.6567767262458801,
"eval_runtime": 2.0563,
"eval_samples_per_second": 52.036,
"eval_steps_per_second": 3.404,
"step": 136
},
{
"epoch": 17.5,
"learning_rate": 2.9605263157894735e-05,
"loss": 0.4135,
"step": 140
},
{
"epoch": 18.0,
"eval_accuracy": 0.7009345794392523,
"eval_loss": 0.665261447429657,
"eval_runtime": 2.454,
"eval_samples_per_second": 43.602,
"eval_steps_per_second": 2.852,
"step": 144
},
{
"epoch": 18.75,
"learning_rate": 2.7960526315789477e-05,
"loss": 0.4308,
"step": 150
},
{
"epoch": 19.0,
"eval_accuracy": 0.719626168224299,
"eval_loss": 0.6031992435455322,
"eval_runtime": 2.0319,
"eval_samples_per_second": 52.66,
"eval_steps_per_second": 3.445,
"step": 152
},
{
"epoch": 20.0,
"learning_rate": 2.6315789473684212e-05,
"loss": 0.3837,
"step": 160
},
{
"epoch": 20.0,
"eval_accuracy": 0.8037383177570093,
"eval_loss": 0.44923561811447144,
"eval_runtime": 2.1355,
"eval_samples_per_second": 50.106,
"eval_steps_per_second": 3.278,
"step": 160
},
{
"epoch": 21.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.454855740070343,
"eval_runtime": 2.1041,
"eval_samples_per_second": 50.854,
"eval_steps_per_second": 3.327,
"step": 168
},
{
"epoch": 21.25,
"learning_rate": 2.4671052631578947e-05,
"loss": 0.3297,
"step": 170
},
{
"epoch": 22.0,
"eval_accuracy": 0.7663551401869159,
"eval_loss": 0.5525509715080261,
"eval_runtime": 2.3723,
"eval_samples_per_second": 45.104,
"eval_steps_per_second": 2.951,
"step": 176
},
{
"epoch": 22.5,
"learning_rate": 2.3026315789473685e-05,
"loss": 0.3264,
"step": 180
},
{
"epoch": 23.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.5171772241592407,
"eval_runtime": 2.1842,
"eval_samples_per_second": 48.989,
"eval_steps_per_second": 3.205,
"step": 184
},
{
"epoch": 23.75,
"learning_rate": 2.1381578947368423e-05,
"loss": 0.3487,
"step": 190
},
{
"epoch": 24.0,
"eval_accuracy": 0.7663551401869159,
"eval_loss": 0.5104933381080627,
"eval_runtime": 2.1764,
"eval_samples_per_second": 49.164,
"eval_steps_per_second": 3.216,
"step": 192
},
{
"epoch": 25.0,
"learning_rate": 1.9736842105263158e-05,
"loss": 0.2892,
"step": 200
},
{
"epoch": 25.0,
"eval_accuracy": 0.7757009345794392,
"eval_loss": 0.4565769135951996,
"eval_runtime": 2.1452,
"eval_samples_per_second": 49.879,
"eval_steps_per_second": 3.263,
"step": 200
},
{
"epoch": 26.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.523303747177124,
"eval_runtime": 2.1458,
"eval_samples_per_second": 49.865,
"eval_steps_per_second": 3.262,
"step": 208
},
{
"epoch": 26.25,
"learning_rate": 1.8092105263157896e-05,
"loss": 0.2505,
"step": 210
},
{
"epoch": 27.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.4817139804363251,
"eval_runtime": 2.0456,
"eval_samples_per_second": 52.308,
"eval_steps_per_second": 3.422,
"step": 216
},
{
"epoch": 27.5,
"learning_rate": 1.6447368421052635e-05,
"loss": 0.2542,
"step": 220
},
{
"epoch": 28.0,
"eval_accuracy": 0.8037383177570093,
"eval_loss": 0.5034652948379517,
"eval_runtime": 2.3168,
"eval_samples_per_second": 46.184,
"eval_steps_per_second": 3.021,
"step": 224
},
{
"epoch": 28.75,
"learning_rate": 1.4802631578947368e-05,
"loss": 0.2285,
"step": 230
},
{
"epoch": 29.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.5281862616539001,
"eval_runtime": 2.1165,
"eval_samples_per_second": 50.556,
"eval_steps_per_second": 3.307,
"step": 232
},
{
"epoch": 30.0,
"learning_rate": 1.3157894736842106e-05,
"loss": 0.2053,
"step": 240
},
{
"epoch": 30.0,
"eval_accuracy": 0.8130841121495327,
"eval_loss": 0.5637905597686768,
"eval_runtime": 2.1396,
"eval_samples_per_second": 50.009,
"eval_steps_per_second": 3.272,
"step": 240
},
{
"epoch": 31.0,
"eval_accuracy": 0.7570093457943925,
"eval_loss": 0.6189974546432495,
"eval_runtime": 2.2612,
"eval_samples_per_second": 47.32,
"eval_steps_per_second": 3.096,
"step": 248
},
{
"epoch": 31.25,
"learning_rate": 1.1513157894736843e-05,
"loss": 0.2205,
"step": 250
},
{
"epoch": 32.0,
"eval_accuracy": 0.7850467289719626,
"eval_loss": 0.614178478717804,
"eval_runtime": 2.5358,
"eval_samples_per_second": 42.196,
"eval_steps_per_second": 2.761,
"step": 256
},
{
"epoch": 32.5,
"learning_rate": 9.868421052631579e-06,
"loss": 0.2081,
"step": 260
},
{
"epoch": 33.0,
"eval_accuracy": 0.7850467289719626,
"eval_loss": 0.575212836265564,
"eval_runtime": 2.0662,
"eval_samples_per_second": 51.787,
"eval_steps_per_second": 3.388,
"step": 264
},
{
"epoch": 33.75,
"learning_rate": 8.223684210526317e-06,
"loss": 0.2075,
"step": 270
},
{
"epoch": 34.0,
"eval_accuracy": 0.8317757009345794,
"eval_loss": 0.5321738719940186,
"eval_runtime": 2.1157,
"eval_samples_per_second": 50.573,
"eval_steps_per_second": 3.309,
"step": 272
},
{
"epoch": 35.0,
"learning_rate": 6.578947368421053e-06,
"loss": 0.2286,
"step": 280
},
{
"epoch": 35.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.5312566161155701,
"eval_runtime": 2.4167,
"eval_samples_per_second": 44.276,
"eval_steps_per_second": 2.897,
"step": 280
},
{
"epoch": 36.0,
"eval_accuracy": 0.8130841121495327,
"eval_loss": 0.5189207792282104,
"eval_runtime": 2.2397,
"eval_samples_per_second": 47.773,
"eval_steps_per_second": 3.125,
"step": 288
},
{
"epoch": 36.25,
"learning_rate": 4.9342105263157895e-06,
"loss": 0.2008,
"step": 290
},
{
"epoch": 37.0,
"eval_accuracy": 0.7850467289719626,
"eval_loss": 0.5589626431465149,
"eval_runtime": 2.5429,
"eval_samples_per_second": 42.078,
"eval_steps_per_second": 2.753,
"step": 296
},
{
"epoch": 37.5,
"learning_rate": 3.2894736842105265e-06,
"loss": 0.1884,
"step": 300
},
{
"epoch": 38.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.5488373041152954,
"eval_runtime": 2.042,
"eval_samples_per_second": 52.399,
"eval_steps_per_second": 3.428,
"step": 304
},
{
"epoch": 38.75,
"learning_rate": 1.6447368421052632e-06,
"loss": 0.1819,
"step": 310
},
{
"epoch": 39.0,
"eval_accuracy": 0.8037383177570093,
"eval_loss": 0.556251585483551,
"eval_runtime": 2.015,
"eval_samples_per_second": 53.102,
"eval_steps_per_second": 3.474,
"step": 312
},
{
"epoch": 40.0,
"learning_rate": 0.0,
"loss": 0.1698,
"step": 320
},
{
"epoch": 40.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.5678603053092957,
"eval_runtime": 2.1445,
"eval_samples_per_second": 49.894,
"eval_steps_per_second": 3.264,
"step": 320
},
{
"epoch": 40.0,
"step": 320,
"total_flos": 1.5429806632629043e+18,
"train_loss": 0.3920826520770788,
"train_runtime": 766.5439,
"train_samples_per_second": 25.987,
"train_steps_per_second": 0.417
}
],
"logging_steps": 10,
"max_steps": 320,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 1.5429806632629043e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}