beit-base-patch16-224-OT / trainer_state.json
Augusto777's picture
End of training
ce6a2c9 verified
raw
history blame
11.8 kB
{
"best_metric": 0.8225806451612904,
"best_model_checkpoint": "beit-base-patch16-224-OT\\checkpoint-99",
"epoch": 35.55555555555556,
"eval_steps": 500,
"global_step": 160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.89,
"eval_accuracy": 0.14516129032258066,
"eval_loss": 1.7602994441986084,
"eval_runtime": 2.148,
"eval_samples_per_second": 28.864,
"eval_steps_per_second": 1.862,
"step": 4
},
{
"epoch": 2.0,
"eval_accuracy": 0.14516129032258066,
"eval_loss": 1.685233473777771,
"eval_runtime": 2.219,
"eval_samples_per_second": 27.94,
"eval_steps_per_second": 1.803,
"step": 9
},
{
"epoch": 2.22,
"learning_rate": 3.125e-05,
"loss": 1.7571,
"step": 10
},
{
"epoch": 2.89,
"eval_accuracy": 0.14516129032258066,
"eval_loss": 1.5655227899551392,
"eval_runtime": 2.222,
"eval_samples_per_second": 27.903,
"eval_steps_per_second": 1.8,
"step": 13
},
{
"epoch": 4.0,
"eval_accuracy": 0.14516129032258066,
"eval_loss": 1.3815577030181885,
"eval_runtime": 2.3696,
"eval_samples_per_second": 26.165,
"eval_steps_per_second": 1.688,
"step": 18
},
{
"epoch": 4.44,
"learning_rate": 4.8611111111111115e-05,
"loss": 1.5255,
"step": 20
},
{
"epoch": 4.89,
"eval_accuracy": 0.3225806451612903,
"eval_loss": 1.2598901987075806,
"eval_runtime": 2.178,
"eval_samples_per_second": 28.466,
"eval_steps_per_second": 1.837,
"step": 22
},
{
"epoch": 6.0,
"eval_accuracy": 0.4838709677419355,
"eval_loss": 1.153410792350769,
"eval_runtime": 2.227,
"eval_samples_per_second": 27.84,
"eval_steps_per_second": 1.796,
"step": 27
},
{
"epoch": 6.67,
"learning_rate": 4.5138888888888894e-05,
"loss": 1.2245,
"step": 30
},
{
"epoch": 6.89,
"eval_accuracy": 0.4838709677419355,
"eval_loss": 1.0641188621520996,
"eval_runtime": 2.203,
"eval_samples_per_second": 28.143,
"eval_steps_per_second": 1.816,
"step": 31
},
{
"epoch": 8.0,
"eval_accuracy": 0.43548387096774194,
"eval_loss": 1.0371758937835693,
"eval_runtime": 2.2355,
"eval_samples_per_second": 27.734,
"eval_steps_per_second": 1.789,
"step": 36
},
{
"epoch": 8.89,
"learning_rate": 4.166666666666667e-05,
"loss": 1.0438,
"step": 40
},
{
"epoch": 8.89,
"eval_accuracy": 0.43548387096774194,
"eval_loss": 0.9987961649894714,
"eval_runtime": 2.3086,
"eval_samples_per_second": 26.857,
"eval_steps_per_second": 1.733,
"step": 40
},
{
"epoch": 10.0,
"eval_accuracy": 0.5161290322580645,
"eval_loss": 0.9259945154190063,
"eval_runtime": 2.2905,
"eval_samples_per_second": 27.068,
"eval_steps_per_second": 1.746,
"step": 45
},
{
"epoch": 10.89,
"eval_accuracy": 0.7096774193548387,
"eval_loss": 0.9084866046905518,
"eval_runtime": 2.2545,
"eval_samples_per_second": 27.5,
"eval_steps_per_second": 1.774,
"step": 49
},
{
"epoch": 11.11,
"learning_rate": 3.8194444444444444e-05,
"loss": 0.9727,
"step": 50
},
{
"epoch": 12.0,
"eval_accuracy": 0.7258064516129032,
"eval_loss": 0.843325674533844,
"eval_runtime": 2.1865,
"eval_samples_per_second": 28.355,
"eval_steps_per_second": 1.829,
"step": 54
},
{
"epoch": 12.89,
"eval_accuracy": 0.7741935483870968,
"eval_loss": 0.7529016137123108,
"eval_runtime": 2.1995,
"eval_samples_per_second": 28.188,
"eval_steps_per_second": 1.819,
"step": 58
},
{
"epoch": 13.33,
"learning_rate": 3.472222222222222e-05,
"loss": 0.8469,
"step": 60
},
{
"epoch": 14.0,
"eval_accuracy": 0.7580645161290323,
"eval_loss": 0.7186572551727295,
"eval_runtime": 2.2225,
"eval_samples_per_second": 27.896,
"eval_steps_per_second": 1.8,
"step": 63
},
{
"epoch": 14.89,
"eval_accuracy": 0.7258064516129032,
"eval_loss": 0.6805762648582458,
"eval_runtime": 2.1855,
"eval_samples_per_second": 28.369,
"eval_steps_per_second": 1.83,
"step": 67
},
{
"epoch": 15.56,
"learning_rate": 3.125e-05,
"loss": 0.6908,
"step": 70
},
{
"epoch": 16.0,
"eval_accuracy": 0.7580645161290323,
"eval_loss": 0.6575707197189331,
"eval_runtime": 2.2315,
"eval_samples_per_second": 27.784,
"eval_steps_per_second": 1.792,
"step": 72
},
{
"epoch": 16.89,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.5742202997207642,
"eval_runtime": 2.261,
"eval_samples_per_second": 27.421,
"eval_steps_per_second": 1.769,
"step": 76
},
{
"epoch": 17.78,
"learning_rate": 2.777777777777778e-05,
"loss": 0.6064,
"step": 80
},
{
"epoch": 18.0,
"eval_accuracy": 0.7580645161290323,
"eval_loss": 0.6446634531021118,
"eval_runtime": 2.3311,
"eval_samples_per_second": 26.597,
"eval_steps_per_second": 1.716,
"step": 81
},
{
"epoch": 18.89,
"eval_accuracy": 0.7741935483870968,
"eval_loss": 0.5602142810821533,
"eval_runtime": 2.1895,
"eval_samples_per_second": 28.317,
"eval_steps_per_second": 1.827,
"step": 85
},
{
"epoch": 20.0,
"learning_rate": 2.4305555555555558e-05,
"loss": 0.5303,
"step": 90
},
{
"epoch": 20.0,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.49427932500839233,
"eval_runtime": 2.2745,
"eval_samples_per_second": 27.258,
"eval_steps_per_second": 1.759,
"step": 90
},
{
"epoch": 20.89,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.530381441116333,
"eval_runtime": 2.142,
"eval_samples_per_second": 28.945,
"eval_steps_per_second": 1.867,
"step": 94
},
{
"epoch": 22.0,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.48010584712028503,
"eval_runtime": 2.3616,
"eval_samples_per_second": 26.254,
"eval_steps_per_second": 1.694,
"step": 99
},
{
"epoch": 22.22,
"learning_rate": 2.0833333333333336e-05,
"loss": 0.4903,
"step": 100
},
{
"epoch": 22.89,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.4848884344100952,
"eval_runtime": 2.16,
"eval_samples_per_second": 28.703,
"eval_steps_per_second": 1.852,
"step": 103
},
{
"epoch": 24.0,
"eval_accuracy": 0.7741935483870968,
"eval_loss": 0.5709980726242065,
"eval_runtime": 2.1685,
"eval_samples_per_second": 28.591,
"eval_steps_per_second": 1.845,
"step": 108
},
{
"epoch": 24.44,
"learning_rate": 1.736111111111111e-05,
"loss": 0.4261,
"step": 110
},
{
"epoch": 24.89,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.4803168475627899,
"eval_runtime": 2.2145,
"eval_samples_per_second": 27.997,
"eval_steps_per_second": 1.806,
"step": 112
},
{
"epoch": 26.0,
"eval_accuracy": 0.7258064516129032,
"eval_loss": 0.5670634508132935,
"eval_runtime": 2.1993,
"eval_samples_per_second": 28.191,
"eval_steps_per_second": 1.819,
"step": 117
},
{
"epoch": 26.67,
"learning_rate": 1.388888888888889e-05,
"loss": 0.4122,
"step": 120
},
{
"epoch": 26.89,
"eval_accuracy": 0.8064516129032258,
"eval_loss": 0.4585064947605133,
"eval_runtime": 2.2065,
"eval_samples_per_second": 28.098,
"eval_steps_per_second": 1.813,
"step": 121
},
{
"epoch": 28.0,
"eval_accuracy": 0.7096774193548387,
"eval_loss": 0.5910329222679138,
"eval_runtime": 2.23,
"eval_samples_per_second": 27.802,
"eval_steps_per_second": 1.794,
"step": 126
},
{
"epoch": 28.89,
"learning_rate": 1.0416666666666668e-05,
"loss": 0.3739,
"step": 130
},
{
"epoch": 28.89,
"eval_accuracy": 0.7580645161290323,
"eval_loss": 0.5821260213851929,
"eval_runtime": 2.2145,
"eval_samples_per_second": 27.997,
"eval_steps_per_second": 1.806,
"step": 130
},
{
"epoch": 30.0,
"eval_accuracy": 0.7741935483870968,
"eval_loss": 0.5329306125640869,
"eval_runtime": 2.3526,
"eval_samples_per_second": 26.354,
"eval_steps_per_second": 1.7,
"step": 135
},
{
"epoch": 30.89,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.4422537386417389,
"eval_runtime": 2.1625,
"eval_samples_per_second": 28.67,
"eval_steps_per_second": 1.85,
"step": 139
},
{
"epoch": 31.11,
"learning_rate": 6.944444444444445e-06,
"loss": 0.3896,
"step": 140
},
{
"epoch": 32.0,
"eval_accuracy": 0.7580645161290323,
"eval_loss": 0.47155243158340454,
"eval_runtime": 2.1865,
"eval_samples_per_second": 28.356,
"eval_steps_per_second": 1.829,
"step": 144
},
{
"epoch": 32.89,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.4785827100276947,
"eval_runtime": 2.232,
"eval_samples_per_second": 27.777,
"eval_steps_per_second": 1.792,
"step": 148
},
{
"epoch": 33.33,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.3472,
"step": 150
},
{
"epoch": 34.0,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.45382845401763916,
"eval_runtime": 2.19,
"eval_samples_per_second": 28.31,
"eval_steps_per_second": 1.826,
"step": 153
},
{
"epoch": 34.89,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.455282062292099,
"eval_runtime": 2.2165,
"eval_samples_per_second": 27.972,
"eval_steps_per_second": 1.805,
"step": 157
},
{
"epoch": 35.56,
"learning_rate": 0.0,
"loss": 0.3349,
"step": 160
},
{
"epoch": 35.56,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.45279815793037415,
"eval_runtime": 2.4931,
"eval_samples_per_second": 24.869,
"eval_steps_per_second": 1.604,
"step": 160
},
{
"epoch": 35.56,
"step": 160,
"total_flos": 7.931930389512192e+17,
"train_loss": 0.7482577681541442,
"train_runtime": 589.7762,
"train_samples_per_second": 19.533,
"train_steps_per_second": 0.271
}
],
"logging_steps": 10,
"max_steps": 160,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 7.931930389512192e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}