|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 61200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9754901960784317e-05, |
|
"loss": 0.2061, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9509803921568626e-05, |
|
"loss": 0.1319, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9264705882352942e-05, |
|
"loss": 0.1099, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.9019607843137255e-05, |
|
"loss": 0.1096, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.877450980392157e-05, |
|
"loss": 0.1035, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.8529411764705883e-05, |
|
"loss": 0.1015, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.8284313725490196e-05, |
|
"loss": 0.0842, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.8039215686274512e-05, |
|
"loss": 0.0844, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.7794117647058824e-05, |
|
"loss": 0.0702, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.7549019607843137e-05, |
|
"loss": 0.0766, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.730392156862745e-05, |
|
"loss": 0.0811, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.7058823529411766e-05, |
|
"loss": 0.072, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6813725490196082e-05, |
|
"loss": 0.0667, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.656862745098039e-05, |
|
"loss": 0.0753, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.6323529411764707e-05, |
|
"loss": 0.0689, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.607843137254902e-05, |
|
"loss": 0.0614, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.5833333333333336e-05, |
|
"loss": 0.0643, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.5588235294117648e-05, |
|
"loss": 0.0687, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.534313725490196e-05, |
|
"loss": 0.0674, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.5098039215686277e-05, |
|
"loss": 0.0588, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.485294117647059e-05, |
|
"loss": 0.0676, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.4607843137254902e-05, |
|
"loss": 0.0657, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.4362745098039215e-05, |
|
"loss": 0.0595, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.411764705882353e-05, |
|
"loss": 0.0641, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.3872549019607843e-05, |
|
"loss": 0.0573, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.3627450980392156e-05, |
|
"loss": 0.0446, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3382352941176472e-05, |
|
"loss": 0.0403, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.3137254901960785e-05, |
|
"loss": 0.0389, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.2892156862745097e-05, |
|
"loss": 0.0438, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.2647058823529413e-05, |
|
"loss": 0.0439, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.2401960784313726e-05, |
|
"loss": 0.0495, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.2156862745098042e-05, |
|
"loss": 0.047, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.191176470588235e-05, |
|
"loss": 0.0501, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.0517, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.1421568627450983e-05, |
|
"loss": 0.0452, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.1176470588235296e-05, |
|
"loss": 0.0459, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.0931372549019608e-05, |
|
"loss": 0.0451, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.068627450980392e-05, |
|
"loss": 0.0418, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.0441176470588237e-05, |
|
"loss": 0.0446, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.019607843137255e-05, |
|
"loss": 0.0478, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.9950980392156862e-05, |
|
"loss": 0.0387, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.9705882352941178e-05, |
|
"loss": 0.04, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.946078431372549e-05, |
|
"loss": 0.048, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9215686274509803e-05, |
|
"loss": 0.0382, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.8970588235294116e-05, |
|
"loss": 0.0402, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8725490196078432e-05, |
|
"loss": 0.0409, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8480392156862748e-05, |
|
"loss": 0.0403, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.8235294117647057e-05, |
|
"loss": 0.0577, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.7990196078431373e-05, |
|
"loss": 0.0434, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7745098039215686e-05, |
|
"loss": 0.0304, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 0.0227, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.7254901960784314e-05, |
|
"loss": 0.0307, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.7009803921568627e-05, |
|
"loss": 0.0266, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.6764705882352943e-05, |
|
"loss": 0.0319, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.6519607843137256e-05, |
|
"loss": 0.0324, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.627450980392157e-05, |
|
"loss": 0.0256, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.602941176470588e-05, |
|
"loss": 0.0292, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.5784313725490197e-05, |
|
"loss": 0.0284, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.5539215686274513e-05, |
|
"loss": 0.0307, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.5294117647058822e-05, |
|
"loss": 0.0333, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.5049019607843138e-05, |
|
"loss": 0.0316, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.4803921568627453e-05, |
|
"loss": 0.0298, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4558823529411765e-05, |
|
"loss": 0.0255, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.431372549019608e-05, |
|
"loss": 0.0274, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.4068627450980392e-05, |
|
"loss": 0.0268, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.3823529411764705e-05, |
|
"loss": 0.0263, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.357843137254902e-05, |
|
"loss": 0.0208, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0272, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.3088235294117648e-05, |
|
"loss": 0.0245, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.284313725490196e-05, |
|
"loss": 0.0245, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.2598039215686275e-05, |
|
"loss": 0.0239, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.2352941176470587e-05, |
|
"loss": 0.0271, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.2107843137254903e-05, |
|
"loss": 0.0226, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.1862745098039216e-05, |
|
"loss": 0.0216, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.161764705882353e-05, |
|
"loss": 0.0157, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.1372549019607843e-05, |
|
"loss": 0.0137, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.1127450980392157e-05, |
|
"loss": 0.0122, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.0882352941176471e-05, |
|
"loss": 0.0153, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.0637254901960786e-05, |
|
"loss": 0.018, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.0392156862745098e-05, |
|
"loss": 0.018, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.0147058823529413e-05, |
|
"loss": 0.0169, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 9.901960784313725e-06, |
|
"loss": 0.0138, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 9.65686274509804e-06, |
|
"loss": 0.0171, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 0.0139, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 9.166666666666668e-06, |
|
"loss": 0.0123, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 8.92156862745098e-06, |
|
"loss": 0.0172, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 8.676470588235295e-06, |
|
"loss": 0.0168, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 8.431372549019608e-06, |
|
"loss": 0.0146, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 8.18627450980392e-06, |
|
"loss": 0.0155, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.941176470588236e-06, |
|
"loss": 0.0148, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 7.696078431372549e-06, |
|
"loss": 0.0187, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 7.450980392156863e-06, |
|
"loss": 0.0124, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 7.205882352941177e-06, |
|
"loss": 0.0119, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.960784313725491e-06, |
|
"loss": 0.0125, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 6.7156862745098045e-06, |
|
"loss": 0.0157, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 6.470588235294118e-06, |
|
"loss": 0.0149, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 6.225490196078432e-06, |
|
"loss": 0.0179, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5.980392156862746e-06, |
|
"loss": 0.0142, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.735294117647058e-06, |
|
"loss": 0.0111, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 5.490196078431373e-06, |
|
"loss": 0.0056, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 5.245098039215686e-06, |
|
"loss": 0.0051, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 4.9999999999999996e-06, |
|
"loss": 0.0101, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 4.754901960784314e-06, |
|
"loss": 0.0069, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 4.509803921568627e-06, |
|
"loss": 0.0069, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 4.264705882352941e-06, |
|
"loss": 0.0091, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 4.019607843137255e-06, |
|
"loss": 0.0068, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 3.7745098039215686e-06, |
|
"loss": 0.0079, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 3.5294117647058825e-06, |
|
"loss": 0.0139, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 3.2843137254901964e-06, |
|
"loss": 0.0051, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 3.03921568627451e-06, |
|
"loss": 0.0104, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 2.7941176470588237e-06, |
|
"loss": 0.0068, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.5490196078431376e-06, |
|
"loss": 0.0102, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 2.303921568627451e-06, |
|
"loss": 0.0107, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 2.058823529411765e-06, |
|
"loss": 0.0093, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.8137254901960784e-06, |
|
"loss": 0.0059, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.5686274509803923e-06, |
|
"loss": 0.0077, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.323529411764706e-06, |
|
"loss": 0.0074, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.0784313725490197e-06, |
|
"loss": 0.0076, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.0062, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 5.882352941176471e-07, |
|
"loss": 0.0073, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 3.431372549019608e-07, |
|
"loss": 0.0088, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 9.803921568627452e-08, |
|
"loss": 0.0053, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 61200, |
|
"total_flos": 2.401553971865549e+16, |
|
"train_loss": 0.0358083400544193, |
|
"train_runtime": 10226.935, |
|
"train_samples_per_second": 23.937, |
|
"train_steps_per_second": 5.984 |
|
} |
|
], |
|
"max_steps": 61200, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.401553971865549e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|