|
{ |
|
"best_metric": 0.8551470588235294, |
|
"best_model_checkpoint": "retrieval_model/run-3/checkpoint-35612", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 35612, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.4704519236458057e-06, |
|
"loss": 0.4686, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.4210321821949366e-06, |
|
"loss": 0.3929, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.3716124407440683e-06, |
|
"loss": 0.3526, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.3221926992931996e-06, |
|
"loss": 0.3448, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.272772957842331e-06, |
|
"loss": 0.3648, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.223353216391462e-06, |
|
"loss": 0.3417, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.173933474940593e-06, |
|
"loss": 0.3259, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.124513733489725e-06, |
|
"loss": 0.3513, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.0750939920388557e-06, |
|
"loss": 0.3343, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.025674250587987e-06, |
|
"loss": 0.3465, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.9762545091371184e-06, |
|
"loss": 0.3218, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.9268347676862497e-06, |
|
"loss": 0.3134, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.877415026235381e-06, |
|
"loss": 0.3115, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.8279952847845123e-06, |
|
"loss": 0.3143, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.7785755433336436e-06, |
|
"loss": 0.3086, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7291558018827745e-06, |
|
"loss": 0.3295, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.679736060431906e-06, |
|
"loss": 0.3056, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8321005917159763, |
|
"eval_loss": 0.32298120856285095, |
|
"eval_runtime": 8.1227, |
|
"eval_samples_per_second": 487.029, |
|
"eval_steps_per_second": 7.633, |
|
"step": 8903 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.630316318981037e-06, |
|
"loss": 0.318, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.580896577530169e-06, |
|
"loss": 0.2982, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.5314768360792997e-06, |
|
"loss": 0.2874, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.482057094628431e-06, |
|
"loss": 0.2856, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.4326373531775623e-06, |
|
"loss": 0.3088, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.3832176117266936e-06, |
|
"loss": 0.2887, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.333797870275825e-06, |
|
"loss": 0.2844, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.2843781288249563e-06, |
|
"loss": 0.2884, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.2349583873740876e-06, |
|
"loss": 0.2838, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.1855386459232185e-06, |
|
"loss": 0.2988, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.1361189044723498e-06, |
|
"loss": 0.2881, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.0866991630214815e-06, |
|
"loss": 0.2822, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.0372794215706124e-06, |
|
"loss": 0.2904, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.9878596801197437e-06, |
|
"loss": 0.289, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.938439938668875e-06, |
|
"loss": 0.3042, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.889020197218006e-06, |
|
"loss": 0.286, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8396004557671378e-06, |
|
"loss": 0.2667, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.790180714316269e-06, |
|
"loss": 0.3005, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8438514244500541, |
|
"eval_loss": 0.33582502603530884, |
|
"eval_runtime": 8.1185, |
|
"eval_samples_per_second": 487.283, |
|
"eval_steps_per_second": 7.637, |
|
"step": 17806 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.7407609728654002e-06, |
|
"loss": 0.2686, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6913412314145313e-06, |
|
"loss": 0.2623, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6419214899636626e-06, |
|
"loss": 0.2821, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.592501748512794e-06, |
|
"loss": 0.2601, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.5430820070619253e-06, |
|
"loss": 0.2689, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.4936622656110566e-06, |
|
"loss": 0.2583, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.4442425241601877e-06, |
|
"loss": 0.2699, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.3948227827093192e-06, |
|
"loss": 0.2702, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.3454030412584503e-06, |
|
"loss": 0.2668, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.2959832998075814e-06, |
|
"loss": 0.2564, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.2465635583567129e-06, |
|
"loss": 0.2677, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.197143816905844e-06, |
|
"loss": 0.2478, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.1477240754549755e-06, |
|
"loss": 0.268, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.0983043340041066e-06, |
|
"loss": 0.2648, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.048884592553238e-06, |
|
"loss": 0.2334, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 9.994648511023692e-07, |
|
"loss": 0.2636, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 9.500451096515004e-07, |
|
"loss": 0.276, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 9.006253682006318e-07, |
|
"loss": 0.2459, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.8519724936663048, |
|
"eval_loss": 0.351912260055542, |
|
"eval_runtime": 8.1263, |
|
"eval_samples_per_second": 486.814, |
|
"eval_steps_per_second": 7.63, |
|
"step": 26709 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 8.51205626749763e-07, |
|
"loss": 0.253, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 8.017858852988942e-07, |
|
"loss": 0.2467, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 7.523661438480254e-07, |
|
"loss": 0.2241, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.029464023971568e-07, |
|
"loss": 0.2455, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 6.535266609462881e-07, |
|
"loss": 0.2601, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 6.041069194954194e-07, |
|
"loss": 0.2436, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 5.546871780445507e-07, |
|
"loss": 0.2563, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 5.052674365936819e-07, |
|
"loss": 0.236, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.5584769514281314e-07, |
|
"loss": 0.2366, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.0642795369194445e-07, |
|
"loss": 0.2328, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 3.570082122410757e-07, |
|
"loss": 0.2576, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.0758847079020696e-07, |
|
"loss": 0.2562, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.581687293393382e-07, |
|
"loss": 0.2305, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 2.087489878884695e-07, |
|
"loss": 0.2405, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.5932924643760078e-07, |
|
"loss": 0.2642, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1.0990950498673206e-07, |
|
"loss": 0.2582, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.048976353586333e-08, |
|
"loss": 0.2307, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.1070022084994596e-08, |
|
"loss": 0.2361, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.8551470588235294, |
|
"eval_loss": 0.3704700767993927, |
|
"eval_runtime": 8.3254, |
|
"eval_samples_per_second": 475.172, |
|
"eval_steps_per_second": 7.447, |
|
"step": 35612 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 35612, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 1.5744961941257604e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 3.5198716650966744e-06, |
|
"num_train_epochs": 4, |
|
"per_device_train_batch_size": 8, |
|
"seed": 35 |
|
} |
|
} |
|
|