|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 58671, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9829558044008114e-05, |
|
"loss": 6.2073, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9659116088016227e-05, |
|
"loss": 5.3027, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.948867413202434e-05, |
|
"loss": 5.0065, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9318232176032456e-05, |
|
"loss": 4.808, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9147790220040568e-05, |
|
"loss": 4.6909, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.897734826404868e-05, |
|
"loss": 4.575, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8806906308056793e-05, |
|
"loss": 4.4813, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8636464352064906e-05, |
|
"loss": 4.4051, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.846602239607302e-05, |
|
"loss": 4.3434, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.829558044008113e-05, |
|
"loss": 4.2976, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8125138484089244e-05, |
|
"loss": 4.2479, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7954696528097357e-05, |
|
"loss": 4.2003, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.778425457210547e-05, |
|
"loss": 4.1561, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.7613812616113585e-05, |
|
"loss": 4.1258, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.7443370660121698e-05, |
|
"loss": 4.0757, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.727292870412981e-05, |
|
"loss": 4.049, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7102486748137923e-05, |
|
"loss": 4.0258, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6932044792146036e-05, |
|
"loss": 3.9749, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.676160283615415e-05, |
|
"loss": 3.9751, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.659116088016226e-05, |
|
"loss": 3.9302, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6420718924170377e-05, |
|
"loss": 3.9167, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.625027696817849e-05, |
|
"loss": 3.8915, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6079835012186602e-05, |
|
"loss": 3.8704, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5909393056194715e-05, |
|
"loss": 3.8542, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5738951100202828e-05, |
|
"loss": 3.8171, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.556850914421094e-05, |
|
"loss": 3.8117, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5398067188219053e-05, |
|
"loss": 3.7954, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5227625232227166e-05, |
|
"loss": 3.7836, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5057183276235278e-05, |
|
"loss": 3.7632, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4886741320243392e-05, |
|
"loss": 3.7434, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4716299364251505e-05, |
|
"loss": 3.7308, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4545857408259618e-05, |
|
"loss": 3.7129, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4375415452267732e-05, |
|
"loss": 3.7043, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4204973496275845e-05, |
|
"loss": 3.6853, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4034531540283957e-05, |
|
"loss": 3.6935, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3864089584292072e-05, |
|
"loss": 3.6671, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3693647628300184e-05, |
|
"loss": 3.6662, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3523205672308297e-05, |
|
"loss": 3.6511, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.335276371631641e-05, |
|
"loss": 3.6389, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3182321760324524e-05, |
|
"loss": 3.6311, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.3011879804332637e-05, |
|
"loss": 3.6067, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.2841437848340747e-05, |
|
"loss": 3.589, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.267099589234886e-05, |
|
"loss": 3.5954, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2500553936356974e-05, |
|
"loss": 3.5915, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2330111980365087e-05, |
|
"loss": 3.5764, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.21596700243732e-05, |
|
"loss": 3.5697, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.1989228068381314e-05, |
|
"loss": 3.5774, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1818786112389427e-05, |
|
"loss": 3.5531, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.164834415639754e-05, |
|
"loss": 3.5561, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.1477902200405654e-05, |
|
"loss": 3.5495, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.1307460244413766e-05, |
|
"loss": 3.5391, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.1137018288421879e-05, |
|
"loss": 3.5351, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.0966576332429991e-05, |
|
"loss": 3.5234, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0796134376438106e-05, |
|
"loss": 3.5186, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0625692420446218e-05, |
|
"loss": 3.5007, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0455250464454331e-05, |
|
"loss": 3.49, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0284808508462445e-05, |
|
"loss": 3.4919, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0114366552470556e-05, |
|
"loss": 3.4896, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.94392459647867e-06, |
|
"loss": 3.4825, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.773482640486783e-06, |
|
"loss": 3.4936, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.603040684494896e-06, |
|
"loss": 3.478, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.43259872850301e-06, |
|
"loss": 3.4659, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.262156772511121e-06, |
|
"loss": 3.4502, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.091714816519236e-06, |
|
"loss": 3.4523, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.921272860527348e-06, |
|
"loss": 3.4599, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.75083090453546e-06, |
|
"loss": 3.4489, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.580388948543575e-06, |
|
"loss": 3.4568, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.409946992551688e-06, |
|
"loss": 3.4514, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.2395050365598e-06, |
|
"loss": 3.4451, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.069063080567913e-06, |
|
"loss": 3.4447, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.898621124576026e-06, |
|
"loss": 3.4381, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.728179168584138e-06, |
|
"loss": 3.4288, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.557737212592252e-06, |
|
"loss": 3.4213, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.387295256600365e-06, |
|
"loss": 3.4281, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.216853300608479e-06, |
|
"loss": 3.4222, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.046411344616591e-06, |
|
"loss": 3.4239, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.875969388624705e-06, |
|
"loss": 3.3998, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.705527432632818e-06, |
|
"loss": 3.418, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.53508547664093e-06, |
|
"loss": 3.3867, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.364643520649043e-06, |
|
"loss": 3.3929, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.194201564657156e-06, |
|
"loss": 3.3855, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.02375960866527e-06, |
|
"loss": 3.3964, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.853317652673382e-06, |
|
"loss": 3.3931, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.682875696681496e-06, |
|
"loss": 3.3802, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.512433740689609e-06, |
|
"loss": 3.3849, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.341991784697722e-06, |
|
"loss": 3.3844, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.171549828705834e-06, |
|
"loss": 3.3835, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.001107872713947e-06, |
|
"loss": 3.379, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.830665916722061e-06, |
|
"loss": 3.3698, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.660223960730173e-06, |
|
"loss": 3.3895, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.489782004738287e-06, |
|
"loss": 3.3659, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.3193400487464e-06, |
|
"loss": 3.3576, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.148898092754513e-06, |
|
"loss": 3.359, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.9784561367626255e-06, |
|
"loss": 3.3535, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.808014180770739e-06, |
|
"loss": 3.3653, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.637572224778852e-06, |
|
"loss": 3.3563, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.467130268786965e-06, |
|
"loss": 3.3519, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2966883127950777e-06, |
|
"loss": 3.3573, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.1262463568031908e-06, |
|
"loss": 3.3555, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.955804400811304e-06, |
|
"loss": 3.3708, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.7853624448194173e-06, |
|
"loss": 3.369, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.61492048882753e-06, |
|
"loss": 3.3409, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.444478532835643e-06, |
|
"loss": 3.36, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.274036576843756e-06, |
|
"loss": 3.3517, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.103594620851869e-06, |
|
"loss": 3.3437, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.933152664859982e-06, |
|
"loss": 3.3288, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.762710708868095e-06, |
|
"loss": 3.3543, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.5922687528762083e-06, |
|
"loss": 3.3522, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4218267968843211e-06, |
|
"loss": 3.3379, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.2513848408924342e-06, |
|
"loss": 3.343, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.0809428849005472e-06, |
|
"loss": 3.335, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 9.105009289086603e-07, |
|
"loss": 3.3479, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.400589729167732e-07, |
|
"loss": 3.3421, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 5.696170169248863e-07, |
|
"loss": 3.3387, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.991750609329993e-07, |
|
"loss": 3.34, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.2873310494111234e-07, |
|
"loss": 3.3498, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 5.829114894922535e-08, |
|
"loss": 3.3345, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 58671, |
|
"total_flos": 0, |
|
"train_runtime": 13443.7441, |
|
"train_samples_per_second": 4.364 |
|
} |
|
], |
|
"max_steps": 58671, |
|
"num_train_epochs": 3, |
|
"total_flos": 0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|