{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 61200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.9754901960784317e-05, "loss": 0.2061, "step": 500 }, { "epoch": 0.08, "learning_rate": 2.9509803921568626e-05, "loss": 0.1319, "step": 1000 }, { "epoch": 0.12, "learning_rate": 2.9264705882352942e-05, "loss": 0.1099, "step": 1500 }, { "epoch": 0.16, "learning_rate": 2.9019607843137255e-05, "loss": 0.1096, "step": 2000 }, { "epoch": 0.2, "learning_rate": 2.877450980392157e-05, "loss": 0.1035, "step": 2500 }, { "epoch": 0.25, "learning_rate": 2.8529411764705883e-05, "loss": 0.1015, "step": 3000 }, { "epoch": 0.29, "learning_rate": 2.8284313725490196e-05, "loss": 0.0842, "step": 3500 }, { "epoch": 0.33, "learning_rate": 2.8039215686274512e-05, "loss": 0.0844, "step": 4000 }, { "epoch": 0.37, "learning_rate": 2.7794117647058824e-05, "loss": 0.0702, "step": 4500 }, { "epoch": 0.41, "learning_rate": 2.7549019607843137e-05, "loss": 0.0766, "step": 5000 }, { "epoch": 0.45, "learning_rate": 2.730392156862745e-05, "loss": 0.0811, "step": 5500 }, { "epoch": 0.49, "learning_rate": 2.7058823529411766e-05, "loss": 0.072, "step": 6000 }, { "epoch": 0.53, "learning_rate": 2.6813725490196082e-05, "loss": 0.0667, "step": 6500 }, { "epoch": 0.57, "learning_rate": 2.656862745098039e-05, "loss": 0.0753, "step": 7000 }, { "epoch": 0.61, "learning_rate": 2.6323529411764707e-05, "loss": 0.0689, "step": 7500 }, { "epoch": 0.65, "learning_rate": 2.607843137254902e-05, "loss": 0.0614, "step": 8000 }, { "epoch": 0.69, "learning_rate": 2.5833333333333336e-05, "loss": 0.0643, "step": 8500 }, { "epoch": 0.74, "learning_rate": 2.5588235294117648e-05, "loss": 0.0687, "step": 9000 }, { "epoch": 0.78, "learning_rate": 2.534313725490196e-05, "loss": 0.0674, "step": 9500 }, { "epoch": 0.82, "learning_rate": 2.5098039215686277e-05, "loss": 0.0588, "step": 10000 }, { "epoch": 0.86, "learning_rate": 2.485294117647059e-05, "loss": 0.0676, "step": 10500 }, { "epoch": 0.9, "learning_rate": 2.4607843137254902e-05, "loss": 0.0657, "step": 11000 }, { "epoch": 0.94, "learning_rate": 2.4362745098039215e-05, "loss": 0.0595, "step": 11500 }, { "epoch": 0.98, "learning_rate": 2.411764705882353e-05, "loss": 0.0641, "step": 12000 }, { "epoch": 1.02, "learning_rate": 2.3872549019607843e-05, "loss": 0.0573, "step": 12500 }, { "epoch": 1.06, "learning_rate": 2.3627450980392156e-05, "loss": 0.0446, "step": 13000 }, { "epoch": 1.1, "learning_rate": 2.3382352941176472e-05, "loss": 0.0403, "step": 13500 }, { "epoch": 1.14, "learning_rate": 2.3137254901960785e-05, "loss": 0.0389, "step": 14000 }, { "epoch": 1.18, "learning_rate": 2.2892156862745097e-05, "loss": 0.0438, "step": 14500 }, { "epoch": 1.23, "learning_rate": 2.2647058823529413e-05, "loss": 0.0439, "step": 15000 }, { "epoch": 1.27, "learning_rate": 2.2401960784313726e-05, "loss": 0.0495, "step": 15500 }, { "epoch": 1.31, "learning_rate": 2.2156862745098042e-05, "loss": 0.047, "step": 16000 }, { "epoch": 1.35, "learning_rate": 2.191176470588235e-05, "loss": 0.0501, "step": 16500 }, { "epoch": 1.39, "learning_rate": 2.1666666666666667e-05, "loss": 0.0517, "step": 17000 }, { "epoch": 1.43, "learning_rate": 2.1421568627450983e-05, "loss": 0.0452, "step": 17500 }, { "epoch": 1.47, "learning_rate": 2.1176470588235296e-05, "loss": 0.0459, "step": 18000 }, { "epoch": 1.51, "learning_rate": 2.0931372549019608e-05, "loss": 0.0451, "step": 18500 }, { "epoch": 1.55, "learning_rate": 2.068627450980392e-05, "loss": 0.0418, "step": 19000 }, { "epoch": 1.59, "learning_rate": 2.0441176470588237e-05, "loss": 0.0446, "step": 19500 }, { "epoch": 1.63, "learning_rate": 2.019607843137255e-05, "loss": 0.0478, "step": 20000 }, { "epoch": 1.67, "learning_rate": 1.9950980392156862e-05, "loss": 0.0387, "step": 20500 }, { "epoch": 1.72, "learning_rate": 1.9705882352941178e-05, "loss": 0.04, "step": 21000 }, { "epoch": 1.76, "learning_rate": 1.946078431372549e-05, "loss": 0.048, "step": 21500 }, { "epoch": 1.8, "learning_rate": 1.9215686274509803e-05, "loss": 0.0382, "step": 22000 }, { "epoch": 1.84, "learning_rate": 1.8970588235294116e-05, "loss": 0.0402, "step": 22500 }, { "epoch": 1.88, "learning_rate": 1.8725490196078432e-05, "loss": 0.0409, "step": 23000 }, { "epoch": 1.92, "learning_rate": 1.8480392156862748e-05, "loss": 0.0403, "step": 23500 }, { "epoch": 1.96, "learning_rate": 1.8235294117647057e-05, "loss": 0.0577, "step": 24000 }, { "epoch": 2.0, "learning_rate": 1.7990196078431373e-05, "loss": 0.0434, "step": 24500 }, { "epoch": 2.04, "learning_rate": 1.7745098039215686e-05, "loss": 0.0304, "step": 25000 }, { "epoch": 2.08, "learning_rate": 1.7500000000000002e-05, "loss": 0.0227, "step": 25500 }, { "epoch": 2.12, "learning_rate": 1.7254901960784314e-05, "loss": 0.0307, "step": 26000 }, { "epoch": 2.17, "learning_rate": 1.7009803921568627e-05, "loss": 0.0266, "step": 26500 }, { "epoch": 2.21, "learning_rate": 1.6764705882352943e-05, "loss": 0.0319, "step": 27000 }, { "epoch": 2.25, "learning_rate": 1.6519607843137256e-05, "loss": 0.0324, "step": 27500 }, { "epoch": 2.29, "learning_rate": 1.627450980392157e-05, "loss": 0.0256, "step": 28000 }, { "epoch": 2.33, "learning_rate": 1.602941176470588e-05, "loss": 0.0292, "step": 28500 }, { "epoch": 2.37, "learning_rate": 1.5784313725490197e-05, "loss": 0.0284, "step": 29000 }, { "epoch": 2.41, "learning_rate": 1.5539215686274513e-05, "loss": 0.0307, "step": 29500 }, { "epoch": 2.45, "learning_rate": 1.5294117647058822e-05, "loss": 0.0333, "step": 30000 }, { "epoch": 2.49, "learning_rate": 1.5049019607843138e-05, "loss": 0.0316, "step": 30500 }, { "epoch": 2.53, "learning_rate": 1.4803921568627453e-05, "loss": 0.0298, "step": 31000 }, { "epoch": 2.57, "learning_rate": 1.4558823529411765e-05, "loss": 0.0255, "step": 31500 }, { "epoch": 2.61, "learning_rate": 1.431372549019608e-05, "loss": 0.0274, "step": 32000 }, { "epoch": 2.66, "learning_rate": 1.4068627450980392e-05, "loss": 0.0268, "step": 32500 }, { "epoch": 2.7, "learning_rate": 1.3823529411764705e-05, "loss": 0.0263, "step": 33000 }, { "epoch": 2.74, "learning_rate": 1.357843137254902e-05, "loss": 0.0208, "step": 33500 }, { "epoch": 2.78, "learning_rate": 1.3333333333333333e-05, "loss": 0.0272, "step": 34000 }, { "epoch": 2.82, "learning_rate": 1.3088235294117648e-05, "loss": 0.0245, "step": 34500 }, { "epoch": 2.86, "learning_rate": 1.284313725490196e-05, "loss": 0.0245, "step": 35000 }, { "epoch": 2.9, "learning_rate": 1.2598039215686275e-05, "loss": 0.0239, "step": 35500 }, { "epoch": 2.94, "learning_rate": 1.2352941176470587e-05, "loss": 0.0271, "step": 36000 }, { "epoch": 2.98, "learning_rate": 1.2107843137254903e-05, "loss": 0.0226, "step": 36500 }, { "epoch": 3.02, "learning_rate": 1.1862745098039216e-05, "loss": 0.0216, "step": 37000 }, { "epoch": 3.06, "learning_rate": 1.161764705882353e-05, "loss": 0.0157, "step": 37500 }, { "epoch": 3.1, "learning_rate": 1.1372549019607843e-05, "loss": 0.0137, "step": 38000 }, { "epoch": 3.15, "learning_rate": 1.1127450980392157e-05, "loss": 0.0122, "step": 38500 }, { "epoch": 3.19, "learning_rate": 1.0882352941176471e-05, "loss": 0.0153, "step": 39000 }, { "epoch": 3.23, "learning_rate": 1.0637254901960786e-05, "loss": 0.018, "step": 39500 }, { "epoch": 3.27, "learning_rate": 1.0392156862745098e-05, "loss": 0.018, "step": 40000 }, { "epoch": 3.31, "learning_rate": 1.0147058823529413e-05, "loss": 0.0169, "step": 40500 }, { "epoch": 3.35, "learning_rate": 9.901960784313725e-06, "loss": 0.0138, "step": 41000 }, { "epoch": 3.39, "learning_rate": 9.65686274509804e-06, "loss": 0.0171, "step": 41500 }, { "epoch": 3.43, "learning_rate": 9.411764705882354e-06, "loss": 0.0139, "step": 42000 }, { "epoch": 3.47, "learning_rate": 9.166666666666668e-06, "loss": 0.0123, "step": 42500 }, { "epoch": 3.51, "learning_rate": 8.92156862745098e-06, "loss": 0.0172, "step": 43000 }, { "epoch": 3.55, "learning_rate": 8.676470588235295e-06, "loss": 0.0168, "step": 43500 }, { "epoch": 3.59, "learning_rate": 8.431372549019608e-06, "loss": 0.0146, "step": 44000 }, { "epoch": 3.64, "learning_rate": 8.18627450980392e-06, "loss": 0.0155, "step": 44500 }, { "epoch": 3.68, "learning_rate": 7.941176470588236e-06, "loss": 0.0148, "step": 45000 }, { "epoch": 3.72, "learning_rate": 7.696078431372549e-06, "loss": 0.0187, "step": 45500 }, { "epoch": 3.76, "learning_rate": 7.450980392156863e-06, "loss": 0.0124, "step": 46000 }, { "epoch": 3.8, "learning_rate": 7.205882352941177e-06, "loss": 0.0119, "step": 46500 }, { "epoch": 3.84, "learning_rate": 6.960784313725491e-06, "loss": 0.0125, "step": 47000 }, { "epoch": 3.88, "learning_rate": 6.7156862745098045e-06, "loss": 0.0157, "step": 47500 }, { "epoch": 3.92, "learning_rate": 6.470588235294118e-06, "loss": 0.0149, "step": 48000 }, { "epoch": 3.96, "learning_rate": 6.225490196078432e-06, "loss": 0.0179, "step": 48500 }, { "epoch": 4.0, "learning_rate": 5.980392156862746e-06, "loss": 0.0142, "step": 49000 }, { "epoch": 4.04, "learning_rate": 5.735294117647058e-06, "loss": 0.0111, "step": 49500 }, { "epoch": 4.08, "learning_rate": 5.490196078431373e-06, "loss": 0.0056, "step": 50000 }, { "epoch": 4.13, "learning_rate": 5.245098039215686e-06, "loss": 0.0051, "step": 50500 }, { "epoch": 4.17, "learning_rate": 4.9999999999999996e-06, "loss": 0.0101, "step": 51000 }, { "epoch": 4.21, "learning_rate": 4.754901960784314e-06, "loss": 0.0069, "step": 51500 }, { "epoch": 4.25, "learning_rate": 4.509803921568627e-06, "loss": 0.0069, "step": 52000 }, { "epoch": 4.29, "learning_rate": 4.264705882352941e-06, "loss": 0.0091, "step": 52500 }, { "epoch": 4.33, "learning_rate": 4.019607843137255e-06, "loss": 0.0068, "step": 53000 }, { "epoch": 4.37, "learning_rate": 3.7745098039215686e-06, "loss": 0.0079, "step": 53500 }, { "epoch": 4.41, "learning_rate": 3.5294117647058825e-06, "loss": 0.0139, "step": 54000 }, { "epoch": 4.45, "learning_rate": 3.2843137254901964e-06, "loss": 0.0051, "step": 54500 }, { "epoch": 4.49, "learning_rate": 3.03921568627451e-06, "loss": 0.0104, "step": 55000 }, { "epoch": 4.53, "learning_rate": 2.7941176470588237e-06, "loss": 0.0068, "step": 55500 }, { "epoch": 4.58, "learning_rate": 2.5490196078431376e-06, "loss": 0.0102, "step": 56000 }, { "epoch": 4.62, "learning_rate": 2.303921568627451e-06, "loss": 0.0107, "step": 56500 }, { "epoch": 4.66, "learning_rate": 2.058823529411765e-06, "loss": 0.0093, "step": 57000 }, { "epoch": 4.7, "learning_rate": 1.8137254901960784e-06, "loss": 0.0059, "step": 57500 }, { "epoch": 4.74, "learning_rate": 1.5686274509803923e-06, "loss": 0.0077, "step": 58000 }, { "epoch": 4.78, "learning_rate": 1.323529411764706e-06, "loss": 0.0074, "step": 58500 }, { "epoch": 4.82, "learning_rate": 1.0784313725490197e-06, "loss": 0.0076, "step": 59000 }, { "epoch": 4.86, "learning_rate": 8.333333333333333e-07, "loss": 0.0062, "step": 59500 }, { "epoch": 4.9, "learning_rate": 5.882352941176471e-07, "loss": 0.0073, "step": 60000 }, { "epoch": 4.94, "learning_rate": 3.431372549019608e-07, "loss": 0.0088, "step": 60500 }, { "epoch": 4.98, "learning_rate": 9.803921568627452e-08, "loss": 0.0053, "step": 61000 }, { "epoch": 5.0, "step": 61200, "total_flos": 2.401553971865549e+16, "train_loss": 0.0358083400544193, "train_runtime": 10226.935, "train_samples_per_second": 23.937, "train_steps_per_second": 5.984 } ], "max_steps": 61200, "num_train_epochs": 5, "total_flos": 2.401553971865549e+16, "trial_name": null, "trial_params": null }