|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 105, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-05, |
|
"loss": 3.0768, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2e-05, |
|
"loss": 3.076, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9995348836233517e-05, |
|
"loss": 2.7773, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.998139967159894e-05, |
|
"loss": 2.4612, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9958165482066094e-05, |
|
"loss": 2.3567, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.992566788083908e-05, |
|
"loss": 2.2825, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9883937098250962e-05, |
|
"loss": 2.2337, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9833011953642525e-05, |
|
"loss": 2.174, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9772939819251247e-05, |
|
"loss": 2.1664, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9703776576144106e-05, |
|
"loss": 2.1585, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.962558656223516e-05, |
|
"loss": 2.1401, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.953844251243633e-05, |
|
"loss": 2.1218, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9442425490996987e-05, |
|
"loss": 2.1043, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.933762481609536e-05, |
|
"loss": 2.1185, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9224137976751797e-05, |
|
"loss": 2.1042, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.910207054214133e-05, |
|
"loss": 2.0782, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8971536063389745e-05, |
|
"loss": 2.0411, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8832655967944607e-05, |
|
"loss": 2.0752, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.868555944661949e-05, |
|
"loss": 2.0272, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.853038333341642e-05, |
|
"loss": 2.0785, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8367271978238422e-05, |
|
"loss": 2.0635, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8196377112610524e-05, |
|
"loss": 2.0478, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8017857708534107e-05, |
|
"loss": 2.0435, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.783187983060594e-05, |
|
"loss": 2.0279, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.763861648153945e-05, |
|
"loss": 2.0068, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.743824744123196e-05, |
|
"loss": 2.0045, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7230959099527512e-05, |
|
"loss": 1.9743, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7016944282830935e-05, |
|
"loss": 2.0074, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.6796402074734404e-05, |
|
"loss": 2.0146, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6569537630823385e-05, |
|
"loss": 2.0052, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6336561987834155e-05, |
|
"loss": 2.0006, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6097691867340547e-05, |
|
"loss": 1.986, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.585314947415242e-05, |
|
"loss": 1.9663, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5603162289613503e-05, |
|
"loss": 1.9941, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5347962859990744e-05, |
|
"loss": 1.9706, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5087788580152207e-05, |
|
"loss": 1.9814, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.4822881472734563e-05, |
|
"loss": 1.9713, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4553487963005712e-05, |
|
"loss": 1.9883, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.427985864963193e-05, |
|
"loss": 1.9605, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.400224807156278e-05, |
|
"loss": 1.9615, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3720914471250644e-05, |
|
"loss": 1.9548, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3436119554425133e-05, |
|
"loss": 1.9549, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.314812824664585e-05, |
|
"loss": 1.9659, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.285720844685996e-05, |
|
"loss": 1.9628, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2563630778193805e-05, |
|
"loss": 1.9431, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2267668336210411e-05, |
|
"loss": 1.9727, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1969596434867063e-05, |
|
"loss": 1.9604, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1669692350409223e-05, |
|
"loss": 1.939, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1368235063439103e-05, |
|
"loss": 1.9414, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1065504999398762e-05, |
|
"loss": 1.9298, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0761783767709182e-05, |
|
"loss": 1.9248, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0457353899807947e-05, |
|
"loss": 1.9556, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.015249858632926e-05, |
|
"loss": 1.9553, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.847501413670742e-06, |
|
"loss": 1.9302, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.542646100192056e-06, |
|
"loss": 1.9398, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.238216232290821e-06, |
|
"loss": 1.9346, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.934495000601241e-06, |
|
"loss": 1.9306, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.6317649365609e-06, |
|
"loss": 1.9383, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.330307649590782e-06, |
|
"loss": 1.9141, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.030403565132942e-06, |
|
"loss": 1.9167, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.732331663789592e-06, |
|
"loss": 1.933, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.436369221806201e-06, |
|
"loss": 1.9143, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.142791553140045e-06, |
|
"loss": 1.9258, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.851871753354154e-06, |
|
"loss": 1.9299, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.563880445574873e-06, |
|
"loss": 1.9293, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.2790855287493605e-06, |
|
"loss": 1.9081, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5.99775192843722e-06, |
|
"loss": 1.915, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.720141350368072e-06, |
|
"loss": 1.9079, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.446512036994287e-06, |
|
"loss": 1.9374, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.177118527265438e-06, |
|
"loss": 1.91, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.912211419847795e-06, |
|
"loss": 1.9288, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.652037140009259e-06, |
|
"loss": 1.9157, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.396837710386503e-06, |
|
"loss": 1.9313, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.1468505258475785e-06, |
|
"loss": 1.9036, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.902308132659457e-06, |
|
"loss": 1.9472, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.6634380121658484e-06, |
|
"loss": 1.9138, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.4304623691766193e-06, |
|
"loss": 1.939, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.203597925265598e-06, |
|
"loss": 1.9081, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.98305571716907e-06, |
|
"loss": 1.8986, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.7690409004724883e-06, |
|
"loss": 1.893, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.56175255876804e-06, |
|
"loss": 1.8973, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.3613835184605527e-06, |
|
"loss": 1.9217, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.1681201693940667e-06, |
|
"loss": 1.9373, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.982142291465896e-06, |
|
"loss": 1.8995, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8036228873894745e-06, |
|
"loss": 1.9153, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6327280217615793e-06, |
|
"loss": 1.8962, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4696166665835853e-06, |
|
"loss": 1.9066, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3144405533805138e-06, |
|
"loss": 1.9025, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1673440320553941e-06, |
|
"loss": 1.893, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.02846393661026e-06, |
|
"loss": 1.9014, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.979294578586739e-07, |
|
"loss": 1.8953, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.758620232482083e-07, |
|
"loss": 1.9089, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.623751839046455e-07, |
|
"loss": 1.8885, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.575745090030138e-07, |
|
"loss": 1.909, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.61557487563673e-07, |
|
"loss": 1.9084, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.7441343776484116e-07, |
|
"loss": 1.9123, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.9622342385589256e-07, |
|
"loss": 1.9122, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.2706018074875046e-07, |
|
"loss": 1.9041, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.669880463574758e-07, |
|
"loss": 1.8988, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.160629017490389e-07, |
|
"loss": 1.9071, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.433211916092143e-08, |
|
"loss": 1.9057, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.183451793390747e-08, |
|
"loss": 1.9146, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.860032840106163e-08, |
|
"loss": 1.9051, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.651163766484779e-09, |
|
"loss": 1.9185, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.8839, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 105, |
|
"total_flos": 6.475878851632169e+17, |
|
"train_loss": 2.0050100269771756, |
|
"train_runtime": 1593.9957, |
|
"train_samples_per_second": 62.742, |
|
"train_steps_per_second": 0.066 |
|
} |
|
], |
|
"max_steps": 105, |
|
"num_train_epochs": 1, |
|
"total_flos": 6.475878851632169e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|