{ "best_metric": 1.1821939945220947, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.03170577045022194, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006341154090044388, "grad_norm": 0.30783170461654663, "learning_rate": 2e-05, "loss": 1.8121, "step": 1 }, { "epoch": 0.0006341154090044388, "eval_loss": 1.7660390138626099, "eval_runtime": 70.6773, "eval_samples_per_second": 37.579, "eval_steps_per_second": 4.697, "step": 1 }, { "epoch": 0.0012682308180088776, "grad_norm": 0.3460358679294586, "learning_rate": 4e-05, "loss": 1.7863, "step": 2 }, { "epoch": 0.0019023462270133164, "grad_norm": 0.32900822162628174, "learning_rate": 6e-05, "loss": 1.7817, "step": 3 }, { "epoch": 0.0025364616360177552, "grad_norm": 0.3095568120479584, "learning_rate": 8e-05, "loss": 1.7955, "step": 4 }, { "epoch": 0.0031705770450221942, "grad_norm": 0.27294236421585083, "learning_rate": 0.0001, "loss": 1.7693, "step": 5 }, { "epoch": 0.003804692454026633, "grad_norm": 0.21330417692661285, "learning_rate": 9.987820251299122e-05, "loss": 1.7208, "step": 6 }, { "epoch": 0.004438807863031071, "grad_norm": 0.21737153828144073, "learning_rate": 9.951340343707852e-05, "loss": 1.7017, "step": 7 }, { "epoch": 0.0050729232720355105, "grad_norm": 0.20402231812477112, "learning_rate": 9.890738003669029e-05, "loss": 1.6078, "step": 8 }, { "epoch": 0.0057070386810399495, "grad_norm": 0.20581334829330444, "learning_rate": 9.806308479691595e-05, "loss": 1.5612, "step": 9 }, { "epoch": 0.0063411540900443885, "grad_norm": 0.21240539848804474, "learning_rate": 9.698463103929542e-05, "loss": 1.5863, "step": 10 }, { "epoch": 0.006975269499048827, "grad_norm": 0.21864530444145203, "learning_rate": 9.567727288213005e-05, "loss": 1.5565, "step": 11 }, { "epoch": 0.007609384908053266, "grad_norm": 0.21938222646713257, "learning_rate": 9.414737964294636e-05, "loss": 1.5197, "step": 12 }, { "epoch": 0.008243500317057704, "grad_norm": 0.21815477311611176, "learning_rate": 9.24024048078213e-05, "loss": 1.4577, "step": 13 }, { "epoch": 0.008877615726062143, "grad_norm": 0.20750688016414642, "learning_rate": 9.045084971874738e-05, "loss": 1.4351, "step": 14 }, { "epoch": 0.009511731135066582, "grad_norm": 0.19422230124473572, "learning_rate": 8.83022221559489e-05, "loss": 1.42, "step": 15 }, { "epoch": 0.010145846544071021, "grad_norm": 0.1986970752477646, "learning_rate": 8.596699001693255e-05, "loss": 1.3745, "step": 16 }, { "epoch": 0.01077996195307546, "grad_norm": 0.20829378068447113, "learning_rate": 8.345653031794292e-05, "loss": 1.3615, "step": 17 }, { "epoch": 0.011414077362079899, "grad_norm": 0.20694291591644287, "learning_rate": 8.07830737662829e-05, "loss": 1.3407, "step": 18 }, { "epoch": 0.012048192771084338, "grad_norm": 0.1956637054681778, "learning_rate": 7.795964517353735e-05, "loss": 1.3417, "step": 19 }, { "epoch": 0.012682308180088777, "grad_norm": 0.1798098236322403, "learning_rate": 7.500000000000001e-05, "loss": 1.3138, "step": 20 }, { "epoch": 0.013316423589093214, "grad_norm": 0.18528112769126892, "learning_rate": 7.191855733945387e-05, "loss": 1.3492, "step": 21 }, { "epoch": 0.013950538998097653, "grad_norm": 0.17369288206100464, "learning_rate": 6.873032967079561e-05, "loss": 1.2976, "step": 22 }, { "epoch": 0.014584654407102092, "grad_norm": 0.17437110841274261, "learning_rate": 6.545084971874738e-05, "loss": 1.2588, "step": 23 }, { "epoch": 0.015218769816106531, "grad_norm": 0.17446409165859222, "learning_rate": 6.209609477998338e-05, "loss": 1.2802, "step": 24 }, { "epoch": 0.01585288522511097, "grad_norm": 0.16432027518749237, "learning_rate": 5.868240888334653e-05, "loss": 1.2186, "step": 25 }, { "epoch": 0.01585288522511097, "eval_loss": 1.246727466583252, "eval_runtime": 71.8404, "eval_samples_per_second": 36.971, "eval_steps_per_second": 4.621, "step": 25 }, { "epoch": 0.016487000634115408, "grad_norm": 0.1652495563030243, "learning_rate": 5.522642316338268e-05, "loss": 1.2518, "step": 26 }, { "epoch": 0.017121116043119847, "grad_norm": 0.14872929453849792, "learning_rate": 5.174497483512506e-05, "loss": 1.2467, "step": 27 }, { "epoch": 0.017755231452124286, "grad_norm": 0.15020141005516052, "learning_rate": 4.825502516487497e-05, "loss": 1.1887, "step": 28 }, { "epoch": 0.018389346861128725, "grad_norm": 0.1462440937757492, "learning_rate": 4.477357683661734e-05, "loss": 1.256, "step": 29 }, { "epoch": 0.019023462270133164, "grad_norm": 0.14527584612369537, "learning_rate": 4.131759111665349e-05, "loss": 1.2337, "step": 30 }, { "epoch": 0.019657577679137603, "grad_norm": 0.1435730755329132, "learning_rate": 3.790390522001662e-05, "loss": 1.2035, "step": 31 }, { "epoch": 0.020291693088142042, "grad_norm": 0.13261650502681732, "learning_rate": 3.4549150281252636e-05, "loss": 1.2267, "step": 32 }, { "epoch": 0.02092580849714648, "grad_norm": 0.13555127382278442, "learning_rate": 3.12696703292044e-05, "loss": 1.1804, "step": 33 }, { "epoch": 0.02155992390615092, "grad_norm": 0.13484609127044678, "learning_rate": 2.8081442660546125e-05, "loss": 1.223, "step": 34 }, { "epoch": 0.02219403931515536, "grad_norm": 0.13975246250629425, "learning_rate": 2.500000000000001e-05, "loss": 1.214, "step": 35 }, { "epoch": 0.022828154724159798, "grad_norm": 0.1460408866405487, "learning_rate": 2.2040354826462668e-05, "loss": 1.2044, "step": 36 }, { "epoch": 0.023462270133164237, "grad_norm": 0.12898333370685577, "learning_rate": 1.9216926233717085e-05, "loss": 1.2115, "step": 37 }, { "epoch": 0.024096385542168676, "grad_norm": 0.1304863840341568, "learning_rate": 1.6543469682057106e-05, "loss": 1.1808, "step": 38 }, { "epoch": 0.024730500951173115, "grad_norm": 0.1284722238779068, "learning_rate": 1.4033009983067452e-05, "loss": 1.1606, "step": 39 }, { "epoch": 0.025364616360177554, "grad_norm": 0.14207273721694946, "learning_rate": 1.1697777844051105e-05, "loss": 1.1678, "step": 40 }, { "epoch": 0.02599873176918199, "grad_norm": 0.1767612099647522, "learning_rate": 9.549150281252633e-06, "loss": 1.2159, "step": 41 }, { "epoch": 0.02663284717818643, "grad_norm": 0.28571370244026184, "learning_rate": 7.597595192178702e-06, "loss": 1.1637, "step": 42 }, { "epoch": 0.027266962587190868, "grad_norm": 0.29650741815567017, "learning_rate": 5.852620357053651e-06, "loss": 1.1653, "step": 43 }, { "epoch": 0.027901077996195307, "grad_norm": 0.3082343637943268, "learning_rate": 4.322727117869951e-06, "loss": 1.167, "step": 44 }, { "epoch": 0.028535193405199746, "grad_norm": 0.3386911153793335, "learning_rate": 3.0153689607045845e-06, "loss": 1.1402, "step": 45 }, { "epoch": 0.029169308814204185, "grad_norm": 0.33259084820747375, "learning_rate": 1.9369152030840556e-06, "loss": 1.1347, "step": 46 }, { "epoch": 0.029803424223208624, "grad_norm": 0.3457428216934204, "learning_rate": 1.0926199633097157e-06, "loss": 1.1041, "step": 47 }, { "epoch": 0.030437539632213063, "grad_norm": 0.3795575201511383, "learning_rate": 4.865965629214819e-07, "loss": 1.0849, "step": 48 }, { "epoch": 0.031071655041217502, "grad_norm": 0.3850218653678894, "learning_rate": 1.2179748700879012e-07, "loss": 1.1325, "step": 49 }, { "epoch": 0.03170577045022194, "grad_norm": 0.41590115427970886, "learning_rate": 0.0, "loss": 1.1493, "step": 50 }, { "epoch": 0.03170577045022194, "eval_loss": 1.1821939945220947, "eval_runtime": 71.9855, "eval_samples_per_second": 36.896, "eval_steps_per_second": 4.612, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5138858479910912e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }