{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.9638554216867465, "eval_steps": 500, "global_step": 515, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0963855421686747, "grad_norm": 0.9117392307242835, "learning_rate": 9.990699835799469e-05, "loss": 1.9779, "step": 10 }, { "epoch": 0.1927710843373494, "grad_norm": 0.9121432880636753, "learning_rate": 9.96283394041954e-05, "loss": 1.6698, "step": 20 }, { "epoch": 0.2891566265060241, "grad_norm": 0.8113864009102607, "learning_rate": 9.916505976821263e-05, "loss": 1.5706, "step": 30 }, { "epoch": 0.3855421686746988, "grad_norm": 0.7722456373389991, "learning_rate": 9.851888288072053e-05, "loss": 1.5093, "step": 40 }, { "epoch": 0.4819277108433735, "grad_norm": 0.840195041159072, "learning_rate": 9.769221256218164e-05, "loss": 1.463, "step": 50 }, { "epoch": 0.5783132530120482, "grad_norm": 0.9154016698600571, "learning_rate": 9.668812408047679e-05, "loss": 1.4829, "step": 60 }, { "epoch": 0.6746987951807228, "grad_norm": 0.890732828345114, "learning_rate": 9.551035271070664e-05, "loss": 1.4683, "step": 70 }, { "epoch": 0.7710843373493976, "grad_norm": 0.9157645257382339, "learning_rate": 9.416327983972304e-05, "loss": 1.3965, "step": 80 }, { "epoch": 0.8674698795180723, "grad_norm": 0.8897525548859271, "learning_rate": 9.265191666708209e-05, "loss": 1.4475, "step": 90 }, { "epoch": 0.963855421686747, "grad_norm": 0.861081307668928, "learning_rate": 9.098188556305263e-05, "loss": 1.4124, "step": 100 }, { "epoch": 1.0602409638554218, "grad_norm": 0.85116234930626, "learning_rate": 8.915939915302968e-05, "loss": 1.3647, "step": 110 }, { "epoch": 1.1566265060240963, "grad_norm": 0.9740957258545101, "learning_rate": 8.71912372061598e-05, "loss": 1.3204, "step": 120 }, { "epoch": 1.2530120481927711, "grad_norm": 1.0864987018116612, "learning_rate": 8.508472141415467e-05, "loss": 1.335, "step": 130 }, { "epoch": 1.3493975903614457, "grad_norm": 1.0426867861383922, "learning_rate": 8.284768815411692e-05, "loss": 1.2918, "step": 140 }, { "epoch": 1.4457831325301205, "grad_norm": 1.1563010080529215, "learning_rate": 8.048845933670273e-05, "loss": 1.3137, "step": 150 }, { "epoch": 1.5421686746987953, "grad_norm": 1.1357035664877724, "learning_rate": 7.801581144806752e-05, "loss": 1.2938, "step": 160 }, { "epoch": 1.6385542168674698, "grad_norm": 1.1274251128308745, "learning_rate": 7.543894290076103e-05, "loss": 1.3013, "step": 170 }, { "epoch": 1.7349397590361446, "grad_norm": 1.1376429010873366, "learning_rate": 7.276743981502856e-05, "loss": 1.3333, "step": 180 }, { "epoch": 1.8313253012048194, "grad_norm": 1.1848553562459525, "learning_rate": 7.00112403578139e-05, "loss": 1.2946, "step": 190 }, { "epoch": 1.927710843373494, "grad_norm": 1.1037089869727106, "learning_rate": 6.718059777212567e-05, "loss": 1.2859, "step": 200 }, { "epoch": 2.0240963855421685, "grad_norm": 1.1061364608256432, "learning_rate": 6.42860422342998e-05, "loss": 1.2725, "step": 210 }, { "epoch": 2.1204819277108435, "grad_norm": 1.3057975365378993, "learning_rate": 6.133834168105206e-05, "loss": 1.1922, "step": 220 }, { "epoch": 2.216867469879518, "grad_norm": 1.289860777590967, "learning_rate": 5.8348461752046116e-05, "loss": 1.1768, "step": 230 }, { "epoch": 2.3132530120481927, "grad_norm": 1.393963581012029, "learning_rate": 5.532752499699381e-05, "loss": 1.1828, "step": 240 }, { "epoch": 2.4096385542168672, "grad_norm": 1.4444324116034009, "learning_rate": 5.228676949903973e-05, "loss": 1.1874, "step": 250 }, { "epoch": 2.5060240963855422, "grad_norm": 1.4377450110610097, "learning_rate": 4.923750706835371e-05, "loss": 1.1709, "step": 260 }, { "epoch": 2.602409638554217, "grad_norm": 1.4052399247965977, "learning_rate": 4.619108116145411e-05, "loss": 1.1545, "step": 270 }, { "epoch": 2.6987951807228914, "grad_norm": 1.4874281591631962, "learning_rate": 4.31588246828045e-05, "loss": 1.1515, "step": 280 }, { "epoch": 2.7951807228915664, "grad_norm": 1.510345480121206, "learning_rate": 4.015201782566471e-05, "loss": 1.1655, "step": 290 }, { "epoch": 2.891566265060241, "grad_norm": 1.4989598810015492, "learning_rate": 3.7181846109031005e-05, "loss": 1.1565, "step": 300 }, { "epoch": 2.9879518072289155, "grad_norm": 1.499903571294907, "learning_rate": 3.4259358766770766e-05, "loss": 1.189, "step": 310 }, { "epoch": 3.0843373493975905, "grad_norm": 1.5828347380531096, "learning_rate": 3.1395427643746796e-05, "loss": 1.0681, "step": 320 }, { "epoch": 3.180722891566265, "grad_norm": 1.68873547972742, "learning_rate": 2.860070675184036e-05, "loss": 1.0827, "step": 330 }, { "epoch": 3.2771084337349397, "grad_norm": 1.6928065503330352, "learning_rate": 2.588559263632719e-05, "loss": 1.055, "step": 340 }, { "epoch": 3.3734939759036147, "grad_norm": 1.656803450443418, "learning_rate": 2.3260185700046294e-05, "loss": 1.0615, "step": 350 }, { "epoch": 3.4698795180722892, "grad_norm": 1.7763695778702038, "learning_rate": 2.0734252629237894e-05, "loss": 1.054, "step": 360 }, { "epoch": 3.566265060240964, "grad_norm": 1.7692488057265054, "learning_rate": 1.831719006082924e-05, "loss": 1.0659, "step": 370 }, { "epoch": 3.662650602409639, "grad_norm": 1.7554003807515917, "learning_rate": 1.601798962632799e-05, "loss": 1.0694, "step": 380 }, { "epoch": 3.7590361445783134, "grad_norm": 1.822610764821149, "learning_rate": 1.384520450236244e-05, "loss": 1.0566, "step": 390 }, { "epoch": 3.855421686746988, "grad_norm": 1.8319206289975956, "learning_rate": 1.1806917592302762e-05, "loss": 1.0494, "step": 400 }, { "epoch": 3.9518072289156625, "grad_norm": 1.9096676463205011, "learning_rate": 9.91071145732948e-06, "loss": 1.0577, "step": 410 }, { "epoch": 4.048192771084337, "grad_norm": 1.7249401183320767, "learning_rate": 8.163640108807896e-06, "loss": 1.0293, "step": 420 }, { "epoch": 4.144578313253012, "grad_norm": 1.826717977143128, "learning_rate": 6.572202766902569e-06, "loss": 1.0078, "step": 430 }, { "epoch": 4.240963855421687, "grad_norm": 1.8877052786758952, "learning_rate": 5.1423196830513e-06, "loss": 0.9904, "step": 440 }, { "epoch": 4.337349397590361, "grad_norm": 1.8808793283796603, "learning_rate": 3.879310116241042e-06, "loss": 0.9815, "step": 450 }, { "epoch": 4.433734939759036, "grad_norm": 1.882737371988685, "learning_rate": 2.787872545015069e-06, "loss": 0.9995, "step": 460 }, { "epoch": 4.530120481927711, "grad_norm": 1.9125920590825105, "learning_rate": 1.8720671888242059e-06, "loss": 1.0179, "step": 470 }, { "epoch": 4.626506024096385, "grad_norm": 1.8949023884002059, "learning_rate": 1.1353009037437523e-06, "loss": 1.002, "step": 480 }, { "epoch": 4.72289156626506, "grad_norm": 1.8902529975556408, "learning_rate": 5.803145087451945e-07, "loss": 0.9862, "step": 490 }, { "epoch": 4.8192771084337345, "grad_norm": 1.915141448607233, "learning_rate": 2.0917258966953733e-07, "loss": 1.0089, "step": 500 }, { "epoch": 4.9156626506024095, "grad_norm": 1.8921692295680217, "learning_rate": 2.3255818832423894e-08, "loss": 0.9775, "step": 510 }, { "epoch": 4.9638554216867465, "step": 515, "total_flos": 72399436972032.0, "train_loss": 1.2166850728896057, "train_runtime": 4721.4385, "train_samples_per_second": 0.879, "train_steps_per_second": 0.109 } ], "logging_steps": 10, "max_steps": 515, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 72399436972032.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }