|
{
|
|
"best_metric": 0.8317757009345794,
|
|
"best_model_checkpoint": "BEiT-RHS-NDA\\checkpoint-272",
|
|
"epoch": 40.0,
|
|
"eval_steps": 500,
|
|
"global_step": 320,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_accuracy": 0.5887850467289719,
|
|
"eval_loss": 0.6851425170898438,
|
|
"eval_runtime": 2.6682,
|
|
"eval_samples_per_second": 40.103,
|
|
"eval_steps_per_second": 2.624,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 3.125e-05,
|
|
"loss": 0.6911,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_accuracy": 0.5887850467289719,
|
|
"eval_loss": 0.6720580458641052,
|
|
"eval_runtime": 2.054,
|
|
"eval_samples_per_second": 52.093,
|
|
"eval_steps_per_second": 3.408,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 4.9342105263157894e-05,
|
|
"loss": 0.6739,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_accuracy": 0.5887850467289719,
|
|
"eval_loss": 0.6504218578338623,
|
|
"eval_runtime": 2.4524,
|
|
"eval_samples_per_second": 43.631,
|
|
"eval_steps_per_second": 2.854,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 4.769736842105263e-05,
|
|
"loss": 0.6595,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.5887850467289719,
|
|
"eval_loss": 0.6432350873947144,
|
|
"eval_runtime": 2.1328,
|
|
"eval_samples_per_second": 50.169,
|
|
"eval_steps_per_second": 3.282,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"learning_rate": 4.605263157894737e-05,
|
|
"loss": 0.646,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_accuracy": 0.6822429906542056,
|
|
"eval_loss": 0.6316895484924316,
|
|
"eval_runtime": 2.4096,
|
|
"eval_samples_per_second": 44.405,
|
|
"eval_steps_per_second": 2.905,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"eval_accuracy": 0.6915887850467289,
|
|
"eval_loss": 0.617514431476593,
|
|
"eval_runtime": 2.0335,
|
|
"eval_samples_per_second": 52.617,
|
|
"eval_steps_per_second": 3.442,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"learning_rate": 4.440789473684211e-05,
|
|
"loss": 0.6142,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"eval_accuracy": 0.6915887850467289,
|
|
"eval_loss": 0.6269640326499939,
|
|
"eval_runtime": 2.0515,
|
|
"eval_samples_per_second": 52.156,
|
|
"eval_steps_per_second": 3.412,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 7.5,
|
|
"learning_rate": 4.2763157894736847e-05,
|
|
"loss": 0.608,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.6915887850467289,
|
|
"eval_loss": 0.6617878675460815,
|
|
"eval_runtime": 2.0447,
|
|
"eval_samples_per_second": 52.331,
|
|
"eval_steps_per_second": 3.424,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 8.75,
|
|
"learning_rate": 4.111842105263158e-05,
|
|
"loss": 0.5927,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"eval_accuracy": 0.6915887850467289,
|
|
"eval_loss": 0.5347260236740112,
|
|
"eval_runtime": 2.139,
|
|
"eval_samples_per_second": 50.023,
|
|
"eval_steps_per_second": 3.273,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"learning_rate": 3.9473684210526316e-05,
|
|
"loss": 0.5333,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"eval_accuracy": 0.6448598130841121,
|
|
"eval_loss": 0.5743899941444397,
|
|
"eval_runtime": 2.1225,
|
|
"eval_samples_per_second": 50.412,
|
|
"eval_steps_per_second": 3.298,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"eval_accuracy": 0.7476635514018691,
|
|
"eval_loss": 0.4974236786365509,
|
|
"eval_runtime": 2.059,
|
|
"eval_samples_per_second": 51.967,
|
|
"eval_steps_per_second": 3.4,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 11.25,
|
|
"learning_rate": 3.7828947368421054e-05,
|
|
"loss": 0.4987,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.6448598130841121,
|
|
"eval_loss": 0.5970269441604614,
|
|
"eval_runtime": 2.1492,
|
|
"eval_samples_per_second": 49.787,
|
|
"eval_steps_per_second": 3.257,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 12.5,
|
|
"learning_rate": 3.618421052631579e-05,
|
|
"loss": 0.5421,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"eval_accuracy": 0.7383177570093458,
|
|
"eval_loss": 0.5137068629264832,
|
|
"eval_runtime": 2.4494,
|
|
"eval_samples_per_second": 43.684,
|
|
"eval_steps_per_second": 2.858,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 13.75,
|
|
"learning_rate": 3.4539473684210524e-05,
|
|
"loss": 0.4881,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"eval_accuracy": 0.7663551401869159,
|
|
"eval_loss": 0.47269827127456665,
|
|
"eval_runtime": 2.4181,
|
|
"eval_samples_per_second": 44.249,
|
|
"eval_steps_per_second": 2.895,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"learning_rate": 3.289473684210527e-05,
|
|
"loss": 0.4408,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"eval_accuracy": 0.7663551401869159,
|
|
"eval_loss": 0.5161357522010803,
|
|
"eval_runtime": 2.086,
|
|
"eval_samples_per_second": 51.295,
|
|
"eval_steps_per_second": 3.356,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.6915887850467289,
|
|
"eval_loss": 0.6732468008995056,
|
|
"eval_runtime": 2.0757,
|
|
"eval_samples_per_second": 51.548,
|
|
"eval_steps_per_second": 3.372,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 16.25,
|
|
"learning_rate": 3.125e-05,
|
|
"loss": 0.4923,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"eval_accuracy": 0.7009345794392523,
|
|
"eval_loss": 0.6567767262458801,
|
|
"eval_runtime": 2.0563,
|
|
"eval_samples_per_second": 52.036,
|
|
"eval_steps_per_second": 3.404,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 17.5,
|
|
"learning_rate": 2.9605263157894735e-05,
|
|
"loss": 0.4135,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"eval_accuracy": 0.7009345794392523,
|
|
"eval_loss": 0.665261447429657,
|
|
"eval_runtime": 2.454,
|
|
"eval_samples_per_second": 43.602,
|
|
"eval_steps_per_second": 2.852,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 18.75,
|
|
"learning_rate": 2.7960526315789477e-05,
|
|
"loss": 0.4308,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 19.0,
|
|
"eval_accuracy": 0.719626168224299,
|
|
"eval_loss": 0.6031992435455322,
|
|
"eval_runtime": 2.0319,
|
|
"eval_samples_per_second": 52.66,
|
|
"eval_steps_per_second": 3.445,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"learning_rate": 2.6315789473684212e-05,
|
|
"loss": 0.3837,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.8037383177570093,
|
|
"eval_loss": 0.44923561811447144,
|
|
"eval_runtime": 2.1355,
|
|
"eval_samples_per_second": 50.106,
|
|
"eval_steps_per_second": 3.278,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.454855740070343,
|
|
"eval_runtime": 2.1041,
|
|
"eval_samples_per_second": 50.854,
|
|
"eval_steps_per_second": 3.327,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 21.25,
|
|
"learning_rate": 2.4671052631578947e-05,
|
|
"loss": 0.3297,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"eval_accuracy": 0.7663551401869159,
|
|
"eval_loss": 0.5525509715080261,
|
|
"eval_runtime": 2.3723,
|
|
"eval_samples_per_second": 45.104,
|
|
"eval_steps_per_second": 2.951,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 22.5,
|
|
"learning_rate": 2.3026315789473685e-05,
|
|
"loss": 0.3264,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 23.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.5171772241592407,
|
|
"eval_runtime": 2.1842,
|
|
"eval_samples_per_second": 48.989,
|
|
"eval_steps_per_second": 3.205,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 23.75,
|
|
"learning_rate": 2.1381578947368423e-05,
|
|
"loss": 0.3487,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_accuracy": 0.7663551401869159,
|
|
"eval_loss": 0.5104933381080627,
|
|
"eval_runtime": 2.1764,
|
|
"eval_samples_per_second": 49.164,
|
|
"eval_steps_per_second": 3.216,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"learning_rate": 1.9736842105263158e-05,
|
|
"loss": 0.2892,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"eval_accuracy": 0.7757009345794392,
|
|
"eval_loss": 0.4565769135951996,
|
|
"eval_runtime": 2.1452,
|
|
"eval_samples_per_second": 49.879,
|
|
"eval_steps_per_second": 3.263,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.523303747177124,
|
|
"eval_runtime": 2.1458,
|
|
"eval_samples_per_second": 49.865,
|
|
"eval_steps_per_second": 3.262,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 26.25,
|
|
"learning_rate": 1.8092105263157896e-05,
|
|
"loss": 0.2505,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 27.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.4817139804363251,
|
|
"eval_runtime": 2.0456,
|
|
"eval_samples_per_second": 52.308,
|
|
"eval_steps_per_second": 3.422,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 27.5,
|
|
"learning_rate": 1.6447368421052635e-05,
|
|
"loss": 0.2542,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"eval_accuracy": 0.8037383177570093,
|
|
"eval_loss": 0.5034652948379517,
|
|
"eval_runtime": 2.3168,
|
|
"eval_samples_per_second": 46.184,
|
|
"eval_steps_per_second": 3.021,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 28.75,
|
|
"learning_rate": 1.4802631578947368e-05,
|
|
"loss": 0.2285,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 29.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.5281862616539001,
|
|
"eval_runtime": 2.1165,
|
|
"eval_samples_per_second": 50.556,
|
|
"eval_steps_per_second": 3.307,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 30.0,
|
|
"learning_rate": 1.3157894736842106e-05,
|
|
"loss": 0.2053,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 30.0,
|
|
"eval_accuracy": 0.8130841121495327,
|
|
"eval_loss": 0.5637905597686768,
|
|
"eval_runtime": 2.1396,
|
|
"eval_samples_per_second": 50.009,
|
|
"eval_steps_per_second": 3.272,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 31.0,
|
|
"eval_accuracy": 0.7570093457943925,
|
|
"eval_loss": 0.6189974546432495,
|
|
"eval_runtime": 2.2612,
|
|
"eval_samples_per_second": 47.32,
|
|
"eval_steps_per_second": 3.096,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 31.25,
|
|
"learning_rate": 1.1513157894736843e-05,
|
|
"loss": 0.2205,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"eval_accuracy": 0.7850467289719626,
|
|
"eval_loss": 0.614178478717804,
|
|
"eval_runtime": 2.5358,
|
|
"eval_samples_per_second": 42.196,
|
|
"eval_steps_per_second": 2.761,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 32.5,
|
|
"learning_rate": 9.868421052631579e-06,
|
|
"loss": 0.2081,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 33.0,
|
|
"eval_accuracy": 0.7850467289719626,
|
|
"eval_loss": 0.575212836265564,
|
|
"eval_runtime": 2.0662,
|
|
"eval_samples_per_second": 51.787,
|
|
"eval_steps_per_second": 3.388,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 33.75,
|
|
"learning_rate": 8.223684210526317e-06,
|
|
"loss": 0.2075,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 34.0,
|
|
"eval_accuracy": 0.8317757009345794,
|
|
"eval_loss": 0.5321738719940186,
|
|
"eval_runtime": 2.1157,
|
|
"eval_samples_per_second": 50.573,
|
|
"eval_steps_per_second": 3.309,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 35.0,
|
|
"learning_rate": 6.578947368421053e-06,
|
|
"loss": 0.2286,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 35.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.5312566161155701,
|
|
"eval_runtime": 2.4167,
|
|
"eval_samples_per_second": 44.276,
|
|
"eval_steps_per_second": 2.897,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 36.0,
|
|
"eval_accuracy": 0.8130841121495327,
|
|
"eval_loss": 0.5189207792282104,
|
|
"eval_runtime": 2.2397,
|
|
"eval_samples_per_second": 47.773,
|
|
"eval_steps_per_second": 3.125,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 36.25,
|
|
"learning_rate": 4.9342105263157895e-06,
|
|
"loss": 0.2008,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 37.0,
|
|
"eval_accuracy": 0.7850467289719626,
|
|
"eval_loss": 0.5589626431465149,
|
|
"eval_runtime": 2.5429,
|
|
"eval_samples_per_second": 42.078,
|
|
"eval_steps_per_second": 2.753,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 37.5,
|
|
"learning_rate": 3.2894736842105265e-06,
|
|
"loss": 0.1884,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 38.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.5488373041152954,
|
|
"eval_runtime": 2.042,
|
|
"eval_samples_per_second": 52.399,
|
|
"eval_steps_per_second": 3.428,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 38.75,
|
|
"learning_rate": 1.6447368421052632e-06,
|
|
"loss": 0.1819,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 39.0,
|
|
"eval_accuracy": 0.8037383177570093,
|
|
"eval_loss": 0.556251585483551,
|
|
"eval_runtime": 2.015,
|
|
"eval_samples_per_second": 53.102,
|
|
"eval_steps_per_second": 3.474,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.1698,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"eval_accuracy": 0.794392523364486,
|
|
"eval_loss": 0.5678603053092957,
|
|
"eval_runtime": 2.1445,
|
|
"eval_samples_per_second": 49.894,
|
|
"eval_steps_per_second": 3.264,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"step": 320,
|
|
"total_flos": 1.5429806632629043e+18,
|
|
"train_loss": 0.3920826520770788,
|
|
"train_runtime": 766.5439,
|
|
"train_samples_per_second": 25.987,
|
|
"train_steps_per_second": 0.417
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 320,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 40,
|
|
"save_steps": 500,
|
|
"total_flos": 1.5429806632629043e+18,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|