{ "best_metric": 0.9130434782608695, "best_model_checkpoint": "BEiT-DMAE-DA\\checkpoint-379", "epoch": 38.26086956521739, "eval_steps": 500, "global_step": 440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.87, "learning_rate": 4.886363636363637e-05, "loss": 1.3475, "step": 10 }, { "epoch": 0.96, "eval_accuracy": 0.32608695652173914, "eval_loss": 1.327863335609436, "eval_runtime": 0.6895, "eval_samples_per_second": 66.715, "eval_steps_per_second": 4.351, "step": 11 }, { "epoch": 1.74, "learning_rate": 4.772727272727273e-05, "loss": 1.1875, "step": 20 }, { "epoch": 2.0, "eval_accuracy": 0.34782608695652173, "eval_loss": 1.13204824924469, "eval_runtime": 0.7412, "eval_samples_per_second": 62.06, "eval_steps_per_second": 4.047, "step": 23 }, { "epoch": 2.61, "learning_rate": 4.659090909090909e-05, "loss": 0.9998, "step": 30 }, { "epoch": 2.96, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.9957121014595032, "eval_runtime": 0.6867, "eval_samples_per_second": 66.99, "eval_steps_per_second": 4.369, "step": 34 }, { "epoch": 3.48, "learning_rate": 4.545454545454546e-05, "loss": 0.8836, "step": 40 }, { "epoch": 4.0, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.8435541987419128, "eval_runtime": 0.7072, "eval_samples_per_second": 65.049, "eval_steps_per_second": 4.242, "step": 46 }, { "epoch": 4.35, "learning_rate": 4.431818181818182e-05, "loss": 0.7593, "step": 50 }, { "epoch": 4.96, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.7904003262519836, "eval_runtime": 0.7214, "eval_samples_per_second": 63.763, "eval_steps_per_second": 4.158, "step": 57 }, { "epoch": 5.22, "learning_rate": 4.318181818181819e-05, "loss": 0.6939, "step": 60 }, { "epoch": 6.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.6656226515769958, "eval_runtime": 0.6452, "eval_samples_per_second": 71.3, "eval_steps_per_second": 4.65, "step": 69 }, { "epoch": 6.09, "learning_rate": 4.204545454545455e-05, "loss": 0.5942, "step": 70 }, { "epoch": 6.96, "learning_rate": 4.0909090909090915e-05, "loss": 0.4924, "step": 80 }, { "epoch": 6.96, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.6724327206611633, "eval_runtime": 0.6712, "eval_samples_per_second": 68.537, "eval_steps_per_second": 4.47, "step": 80 }, { "epoch": 7.83, "learning_rate": 3.9772727272727275e-05, "loss": 0.4444, "step": 90 }, { "epoch": 8.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.5950984954833984, "eval_runtime": 0.6562, "eval_samples_per_second": 70.105, "eval_steps_per_second": 4.572, "step": 92 }, { "epoch": 8.7, "learning_rate": 3.8636363636363636e-05, "loss": 0.337, "step": 100 }, { "epoch": 8.96, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.5222101211547852, "eval_runtime": 0.6647, "eval_samples_per_second": 69.208, "eval_steps_per_second": 4.514, "step": 103 }, { "epoch": 9.57, "learning_rate": 3.7500000000000003e-05, "loss": 0.3213, "step": 110 }, { "epoch": 10.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.6813698410987854, "eval_runtime": 0.6601, "eval_samples_per_second": 69.681, "eval_steps_per_second": 4.544, "step": 115 }, { "epoch": 10.43, "learning_rate": 3.6363636363636364e-05, "loss": 0.2689, "step": 120 }, { "epoch": 10.96, "eval_accuracy": 0.782608695652174, "eval_loss": 0.5912956595420837, "eval_runtime": 0.6652, "eval_samples_per_second": 69.156, "eval_steps_per_second": 4.51, "step": 126 }, { "epoch": 11.3, "learning_rate": 3.522727272727273e-05, "loss": 0.2538, "step": 130 }, { "epoch": 12.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.6227802634239197, "eval_runtime": 0.6802, "eval_samples_per_second": 67.63, "eval_steps_per_second": 4.411, "step": 138 }, { "epoch": 12.17, "learning_rate": 3.409090909090909e-05, "loss": 0.2032, "step": 140 }, { "epoch": 12.96, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.6992256045341492, "eval_runtime": 0.6597, "eval_samples_per_second": 69.734, "eval_steps_per_second": 4.548, "step": 149 }, { "epoch": 13.04, "learning_rate": 3.295454545454545e-05, "loss": 0.2439, "step": 150 }, { "epoch": 13.91, "learning_rate": 3.181818181818182e-05, "loss": 0.2152, "step": 160 }, { "epoch": 14.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.7730478048324585, "eval_runtime": 0.7082, "eval_samples_per_second": 64.956, "eval_steps_per_second": 4.236, "step": 161 }, { "epoch": 14.78, "learning_rate": 3.068181818181818e-05, "loss": 0.1713, "step": 170 }, { "epoch": 14.96, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.776236891746521, "eval_runtime": 0.6617, "eval_samples_per_second": 69.522, "eval_steps_per_second": 4.534, "step": 172 }, { "epoch": 15.65, "learning_rate": 2.954545454545455e-05, "loss": 0.2042, "step": 180 }, { "epoch": 16.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.7652217745780945, "eval_runtime": 0.6431, "eval_samples_per_second": 71.524, "eval_steps_per_second": 4.665, "step": 184 }, { "epoch": 16.52, "learning_rate": 2.8409090909090912e-05, "loss": 0.1668, "step": 190 }, { "epoch": 16.96, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.55118328332901, "eval_runtime": 0.6664, "eval_samples_per_second": 69.031, "eval_steps_per_second": 4.502, "step": 195 }, { "epoch": 17.39, "learning_rate": 2.7272727272727273e-05, "loss": 0.1743, "step": 200 }, { "epoch": 18.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.7310671806335449, "eval_runtime": 0.6702, "eval_samples_per_second": 68.639, "eval_steps_per_second": 4.476, "step": 207 }, { "epoch": 18.26, "learning_rate": 2.6136363636363637e-05, "loss": 0.1226, "step": 210 }, { "epoch": 18.96, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.711499810218811, "eval_runtime": 1.0547, "eval_samples_per_second": 43.612, "eval_steps_per_second": 2.844, "step": 218 }, { "epoch": 19.13, "learning_rate": 2.5e-05, "loss": 0.1628, "step": 220 }, { "epoch": 20.0, "learning_rate": 2.3863636363636365e-05, "loss": 0.1537, "step": 230 }, { "epoch": 20.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.6799980998039246, "eval_runtime": 0.7192, "eval_samples_per_second": 63.962, "eval_steps_per_second": 4.171, "step": 230 }, { "epoch": 20.87, "learning_rate": 2.272727272727273e-05, "loss": 0.1311, "step": 240 }, { "epoch": 20.96, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.5863925218582153, "eval_runtime": 0.7092, "eval_samples_per_second": 64.866, "eval_steps_per_second": 4.23, "step": 241 }, { "epoch": 21.74, "learning_rate": 2.1590909090909093e-05, "loss": 0.1335, "step": 250 }, { "epoch": 22.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.6345816254615784, "eval_runtime": 0.6942, "eval_samples_per_second": 66.266, "eval_steps_per_second": 4.322, "step": 253 }, { "epoch": 22.61, "learning_rate": 2.0454545454545457e-05, "loss": 0.0981, "step": 260 }, { "epoch": 22.96, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.6541118621826172, "eval_runtime": 0.7142, "eval_samples_per_second": 64.41, "eval_steps_per_second": 4.201, "step": 264 }, { "epoch": 23.48, "learning_rate": 1.9318181818181818e-05, "loss": 0.1248, "step": 270 }, { "epoch": 24.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.7017006874084473, "eval_runtime": 0.7302, "eval_samples_per_second": 62.999, "eval_steps_per_second": 4.109, "step": 276 }, { "epoch": 24.35, "learning_rate": 1.8181818181818182e-05, "loss": 0.1183, "step": 280 }, { "epoch": 24.96, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.69643634557724, "eval_runtime": 0.6507, "eval_samples_per_second": 70.698, "eval_steps_per_second": 4.611, "step": 287 }, { "epoch": 25.22, "learning_rate": 1.7045454545454546e-05, "loss": 0.0946, "step": 290 }, { "epoch": 26.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.6449636220932007, "eval_runtime": 0.6636, "eval_samples_per_second": 69.314, "eval_steps_per_second": 4.52, "step": 299 }, { "epoch": 26.09, "learning_rate": 1.590909090909091e-05, "loss": 0.1312, "step": 300 }, { "epoch": 26.96, "learning_rate": 1.4772727272727274e-05, "loss": 0.0957, "step": 310 }, { "epoch": 26.96, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.7056821584701538, "eval_runtime": 0.6997, "eval_samples_per_second": 65.746, "eval_steps_per_second": 4.288, "step": 310 }, { "epoch": 27.83, "learning_rate": 1.3636363636363637e-05, "loss": 0.1692, "step": 320 }, { "epoch": 28.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.6635299324989319, "eval_runtime": 0.6605, "eval_samples_per_second": 69.644, "eval_steps_per_second": 4.542, "step": 322 }, { "epoch": 28.7, "learning_rate": 1.25e-05, "loss": 0.0967, "step": 330 }, { "epoch": 28.96, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.5040035843849182, "eval_runtime": 0.6991, "eval_samples_per_second": 65.8, "eval_steps_per_second": 4.291, "step": 333 }, { "epoch": 29.57, "learning_rate": 1.1363636363636365e-05, "loss": 0.094, "step": 340 }, { "epoch": 30.0, "eval_accuracy": 0.8913043478260869, "eval_loss": 0.5587611198425293, "eval_runtime": 0.6827, "eval_samples_per_second": 67.384, "eval_steps_per_second": 4.395, "step": 345 }, { "epoch": 30.43, "learning_rate": 1.0227272727272729e-05, "loss": 0.0843, "step": 350 }, { "epoch": 30.96, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.539750874042511, "eval_runtime": 0.6677, "eval_samples_per_second": 68.898, "eval_steps_per_second": 4.493, "step": 356 }, { "epoch": 31.3, "learning_rate": 9.090909090909091e-06, "loss": 0.0851, "step": 360 }, { "epoch": 32.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.5806054472923279, "eval_runtime": 0.6481, "eval_samples_per_second": 70.972, "eval_steps_per_second": 4.629, "step": 368 }, { "epoch": 32.17, "learning_rate": 7.954545454545455e-06, "loss": 0.0955, "step": 370 }, { "epoch": 32.96, "eval_accuracy": 0.9130434782608695, "eval_loss": 0.48163074254989624, "eval_runtime": 0.6626, "eval_samples_per_second": 69.42, "eval_steps_per_second": 4.527, "step": 379 }, { "epoch": 33.04, "learning_rate": 6.818181818181818e-06, "loss": 0.1194, "step": 380 }, { "epoch": 33.91, "learning_rate": 5.681818181818182e-06, "loss": 0.1157, "step": 390 }, { "epoch": 34.0, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.5289022922515869, "eval_runtime": 0.6557, "eval_samples_per_second": 70.158, "eval_steps_per_second": 4.576, "step": 391 }, { "epoch": 34.78, "learning_rate": 4.5454545454545455e-06, "loss": 0.072, "step": 400 }, { "epoch": 34.96, "eval_accuracy": 0.8913043478260869, "eval_loss": 0.5657246112823486, "eval_runtime": 0.6672, "eval_samples_per_second": 68.949, "eval_steps_per_second": 4.497, "step": 402 }, { "epoch": 35.65, "learning_rate": 3.409090909090909e-06, "loss": 0.091, "step": 410 }, { "epoch": 36.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.5566320419311523, "eval_runtime": 0.6752, "eval_samples_per_second": 68.133, "eval_steps_per_second": 4.443, "step": 414 }, { "epoch": 36.52, "learning_rate": 2.2727272727272728e-06, "loss": 0.0891, "step": 420 }, { "epoch": 36.96, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.5729131102561951, "eval_runtime": 0.6672, "eval_samples_per_second": 68.948, "eval_steps_per_second": 4.497, "step": 425 }, { "epoch": 37.39, "learning_rate": 1.1363636363636364e-06, "loss": 0.0732, "step": 430 }, { "epoch": 38.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.5915136933326721, "eval_runtime": 0.6622, "eval_samples_per_second": 69.469, "eval_steps_per_second": 4.531, "step": 437 }, { "epoch": 38.26, "learning_rate": 0.0, "loss": 0.0647, "step": 440 }, { "epoch": 38.26, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.5901583433151245, "eval_runtime": 0.7022, "eval_samples_per_second": 65.512, "eval_steps_per_second": 4.273, "step": 440 }, { "epoch": 38.26, "step": 440, "total_flos": 2.1400720000140902e+18, "train_loss": 0.2858855720270764, "train_runtime": 727.0673, "train_samples_per_second": 39.721, "train_steps_per_second": 0.605 } ], "logging_steps": 10, "max_steps": 440, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.1400720000140902e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }