tdhcuong's picture
End of training
df9fda1 verified
{
"best_metric": 0.9122203098106713,
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-azure-poc-img-classification/checkpoint-410",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 410,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.24390243902439024,
"grad_norm": 41.443572998046875,
"learning_rate": 1.2195121951219513e-05,
"loss": 1.6888,
"step": 10
},
{
"epoch": 0.4878048780487805,
"grad_norm": 48.59061813354492,
"learning_rate": 2.4390243902439026e-05,
"loss": 1.1676,
"step": 20
},
{
"epoch": 0.7317073170731707,
"grad_norm": 34.8450927734375,
"learning_rate": 3.6585365853658535e-05,
"loss": 0.8213,
"step": 30
},
{
"epoch": 0.975609756097561,
"grad_norm": 24.4506893157959,
"learning_rate": 4.878048780487805e-05,
"loss": 0.5888,
"step": 40
},
{
"epoch": 1.0,
"eval_accuracy": 0.8347676419965576,
"eval_loss": 0.4436015188694,
"eval_runtime": 31.6183,
"eval_samples_per_second": 18.375,
"eval_steps_per_second": 0.601,
"step": 41
},
{
"epoch": 1.2195121951219512,
"grad_norm": 37.28171157836914,
"learning_rate": 4.878048780487805e-05,
"loss": 0.4412,
"step": 50
},
{
"epoch": 1.4634146341463414,
"grad_norm": 16.48870086669922,
"learning_rate": 4.7425474254742554e-05,
"loss": 0.3709,
"step": 60
},
{
"epoch": 1.7073170731707317,
"grad_norm": 28.970722198486328,
"learning_rate": 4.607046070460705e-05,
"loss": 0.3093,
"step": 70
},
{
"epoch": 1.951219512195122,
"grad_norm": 21.53656005859375,
"learning_rate": 4.4715447154471546e-05,
"loss": 0.3118,
"step": 80
},
{
"epoch": 2.0,
"eval_accuracy": 0.8691910499139415,
"eval_loss": 0.3027944266796112,
"eval_runtime": 28.8996,
"eval_samples_per_second": 20.104,
"eval_steps_per_second": 0.657,
"step": 82
},
{
"epoch": 2.1951219512195124,
"grad_norm": 12.73299789428711,
"learning_rate": 4.336043360433605e-05,
"loss": 0.2855,
"step": 90
},
{
"epoch": 2.4390243902439024,
"grad_norm": 21.19304656982422,
"learning_rate": 4.2005420054200545e-05,
"loss": 0.2955,
"step": 100
},
{
"epoch": 2.682926829268293,
"grad_norm": 20.837289810180664,
"learning_rate": 4.065040650406504e-05,
"loss": 0.2377,
"step": 110
},
{
"epoch": 2.926829268292683,
"grad_norm": 11.421738624572754,
"learning_rate": 3.9295392953929537e-05,
"loss": 0.2284,
"step": 120
},
{
"epoch": 3.0,
"eval_accuracy": 0.8795180722891566,
"eval_loss": 0.28786736726760864,
"eval_runtime": 27.9367,
"eval_samples_per_second": 20.797,
"eval_steps_per_second": 0.68,
"step": 123
},
{
"epoch": 3.1707317073170733,
"grad_norm": 23.18836784362793,
"learning_rate": 3.794037940379404e-05,
"loss": 0.2491,
"step": 130
},
{
"epoch": 3.4146341463414633,
"grad_norm": 14.66263198852539,
"learning_rate": 3.6585365853658535e-05,
"loss": 0.2057,
"step": 140
},
{
"epoch": 3.658536585365854,
"grad_norm": 20.51490592956543,
"learning_rate": 3.523035230352303e-05,
"loss": 0.2372,
"step": 150
},
{
"epoch": 3.902439024390244,
"grad_norm": 11.376835823059082,
"learning_rate": 3.3875338753387534e-05,
"loss": 0.203,
"step": 160
},
{
"epoch": 4.0,
"eval_accuracy": 0.8950086058519794,
"eval_loss": 0.2368348091840744,
"eval_runtime": 29.5907,
"eval_samples_per_second": 19.635,
"eval_steps_per_second": 0.642,
"step": 164
},
{
"epoch": 4.146341463414634,
"grad_norm": 16.738306045532227,
"learning_rate": 3.2520325203252037e-05,
"loss": 0.2199,
"step": 170
},
{
"epoch": 4.390243902439025,
"grad_norm": 23.239999771118164,
"learning_rate": 3.116531165311653e-05,
"loss": 0.1974,
"step": 180
},
{
"epoch": 4.634146341463414,
"grad_norm": 20.330289840698242,
"learning_rate": 2.9810298102981032e-05,
"loss": 0.2177,
"step": 190
},
{
"epoch": 4.878048780487805,
"grad_norm": 10.759196281433105,
"learning_rate": 2.8455284552845528e-05,
"loss": 0.2254,
"step": 200
},
{
"epoch": 5.0,
"eval_accuracy": 0.8984509466437177,
"eval_loss": 0.2276138812303543,
"eval_runtime": 29.2089,
"eval_samples_per_second": 19.891,
"eval_steps_per_second": 0.65,
"step": 205
},
{
"epoch": 5.121951219512195,
"grad_norm": 14.840445518493652,
"learning_rate": 2.7100271002710027e-05,
"loss": 0.1754,
"step": 210
},
{
"epoch": 5.365853658536586,
"grad_norm": 14.199987411499023,
"learning_rate": 2.574525745257453e-05,
"loss": 0.1711,
"step": 220
},
{
"epoch": 5.609756097560975,
"grad_norm": 13.013700485229492,
"learning_rate": 2.4390243902439026e-05,
"loss": 0.1842,
"step": 230
},
{
"epoch": 5.853658536585366,
"grad_norm": 16.866701126098633,
"learning_rate": 2.3035230352303525e-05,
"loss": 0.1976,
"step": 240
},
{
"epoch": 6.0,
"eval_accuracy": 0.8967297762478486,
"eval_loss": 0.2338663935661316,
"eval_runtime": 28.264,
"eval_samples_per_second": 20.556,
"eval_steps_per_second": 0.672,
"step": 246
},
{
"epoch": 6.097560975609756,
"grad_norm": 12.873111724853516,
"learning_rate": 2.1680216802168024e-05,
"loss": 0.1773,
"step": 250
},
{
"epoch": 6.341463414634147,
"grad_norm": 24.094802856445312,
"learning_rate": 2.032520325203252e-05,
"loss": 0.1616,
"step": 260
},
{
"epoch": 6.585365853658536,
"grad_norm": 24.90325355529785,
"learning_rate": 1.897018970189702e-05,
"loss": 0.1792,
"step": 270
},
{
"epoch": 6.829268292682927,
"grad_norm": 18.315990447998047,
"learning_rate": 1.7615176151761516e-05,
"loss": 0.1603,
"step": 280
},
{
"epoch": 7.0,
"eval_accuracy": 0.9036144578313253,
"eval_loss": 0.2191040813922882,
"eval_runtime": 29.3644,
"eval_samples_per_second": 19.786,
"eval_steps_per_second": 0.647,
"step": 287
},
{
"epoch": 7.073170731707317,
"grad_norm": 11.87928295135498,
"learning_rate": 1.6260162601626018e-05,
"loss": 0.1786,
"step": 290
},
{
"epoch": 7.317073170731708,
"grad_norm": 14.647104263305664,
"learning_rate": 1.4905149051490516e-05,
"loss": 0.1706,
"step": 300
},
{
"epoch": 7.560975609756097,
"grad_norm": 19.18857765197754,
"learning_rate": 1.3550135501355014e-05,
"loss": 0.1587,
"step": 310
},
{
"epoch": 7.804878048780488,
"grad_norm": 15.203424453735352,
"learning_rate": 1.2195121951219513e-05,
"loss": 0.1556,
"step": 320
},
{
"epoch": 8.0,
"eval_accuracy": 0.9036144578313253,
"eval_loss": 0.2248595505952835,
"eval_runtime": 28.4808,
"eval_samples_per_second": 20.4,
"eval_steps_per_second": 0.667,
"step": 328
},
{
"epoch": 8.048780487804878,
"grad_norm": 9.790509223937988,
"learning_rate": 1.0840108401084012e-05,
"loss": 0.1462,
"step": 330
},
{
"epoch": 8.292682926829269,
"grad_norm": 12.157197952270508,
"learning_rate": 9.48509485094851e-06,
"loss": 0.1529,
"step": 340
},
{
"epoch": 8.536585365853659,
"grad_norm": 10.181126594543457,
"learning_rate": 8.130081300813009e-06,
"loss": 0.1524,
"step": 350
},
{
"epoch": 8.78048780487805,
"grad_norm": 7.913666725158691,
"learning_rate": 6.775067750677507e-06,
"loss": 0.1488,
"step": 360
},
{
"epoch": 9.0,
"eval_accuracy": 0.9070567986230637,
"eval_loss": 0.20175980031490326,
"eval_runtime": 27.8089,
"eval_samples_per_second": 20.893,
"eval_steps_per_second": 0.683,
"step": 369
},
{
"epoch": 9.024390243902438,
"grad_norm": 16.225345611572266,
"learning_rate": 5.420054200542006e-06,
"loss": 0.1574,
"step": 370
},
{
"epoch": 9.268292682926829,
"grad_norm": 22.599414825439453,
"learning_rate": 4.0650406504065046e-06,
"loss": 0.1444,
"step": 380
},
{
"epoch": 9.512195121951219,
"grad_norm": 14.447446823120117,
"learning_rate": 2.710027100271003e-06,
"loss": 0.1439,
"step": 390
},
{
"epoch": 9.75609756097561,
"grad_norm": 30.83609962463379,
"learning_rate": 1.3550135501355015e-06,
"loss": 0.1498,
"step": 400
},
{
"epoch": 10.0,
"grad_norm": 27.201889038085938,
"learning_rate": 0.0,
"loss": 0.158,
"step": 410
},
{
"epoch": 10.0,
"eval_accuracy": 0.9122203098106713,
"eval_loss": 0.21193988621234894,
"eval_runtime": 28.3116,
"eval_samples_per_second": 20.522,
"eval_steps_per_second": 0.671,
"step": 410
},
{
"epoch": 10.0,
"step": 410,
"total_flos": 1.2975913894293504e+18,
"train_loss": 0.2908858444632553,
"train_runtime": 2811.168,
"train_samples_per_second": 18.569,
"train_steps_per_second": 0.146
}
],
"logging_steps": 10,
"max_steps": 410,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2975913894293504e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}