diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,33418 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.102509883239864, + "eval_steps": 5000, + "global_step": 55500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999540314424934e-05, + "loss": 0.9745, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.999080628849867e-05, + "loss": 0.9978, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9986209432748005e-05, + "loss": 0.92, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.998161257699734e-05, + "loss": 0.9636, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.9977015721246664e-05, + "loss": 0.9684, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972418865496e-05, + "loss": 0.9837, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 4.996782200974534e-05, + "loss": 0.9911, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.996322515399467e-05, + "loss": 0.9517, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.9958628298244004e-05, + "loss": 0.9236, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.995403144249334e-05, + "loss": 0.98, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.994943458674267e-05, + "loss": 0.937, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.9944837730992006e-05, + "loss": 0.8949, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.994024087524134e-05, + "loss": 0.9701, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.9935644019490666e-05, + "loss": 0.9134, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.993104716374e-05, + "loss": 1.0187, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.992645030798934e-05, + "loss": 0.941, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 4.992185345223867e-05, + "loss": 0.9512, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 4.9917256596488005e-05, + "loss": 0.9193, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 4.991265974073734e-05, + "loss": 0.8975, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 4.990806288498667e-05, + "loss": 1.0146, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 4.990346602923601e-05, + "loss": 0.8947, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.9898869173485344e-05, + "loss": 0.9947, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.989427231773467e-05, + "loss": 0.9054, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9889675461984e-05, + "loss": 0.8698, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.988507860623334e-05, + "loss": 0.947, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.988048175048267e-05, + "loss": 0.9899, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9875884894732006e-05, + "loss": 1.0354, + "step": 270 + }, + { + "epoch": 0.03, + "learning_rate": 4.987128803898134e-05, + "loss": 1.0453, + "step": 280 + }, + { + "epoch": 0.03, + "learning_rate": 4.986669118323067e-05, + "loss": 0.9188, + "step": 290 + }, + { + "epoch": 0.03, + "learning_rate": 4.986209432748001e-05, + "loss": 0.93, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 4.9857497471729345e-05, + "loss": 0.9614, + "step": 310 + }, + { + "epoch": 0.03, + "learning_rate": 4.985290061597867e-05, + "loss": 0.9783, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.9848303760228005e-05, + "loss": 0.9796, + "step": 330 + }, + { + "epoch": 0.03, + "learning_rate": 4.984370690447734e-05, + "loss": 1.0028, + "step": 340 + }, + { + "epoch": 0.03, + "learning_rate": 4.983911004872667e-05, + "loss": 0.8967, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.983451319297601e-05, + "loss": 0.9753, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9829916337225344e-05, + "loss": 1.0108, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.982531948147467e-05, + "loss": 0.9963, + "step": 380 + }, + { + "epoch": 0.04, + "learning_rate": 4.982072262572401e-05, + "loss": 0.9727, + "step": 390 + }, + { + "epoch": 0.04, + "learning_rate": 4.9816125769973346e-05, + "loss": 0.943, + "step": 400 + }, + { + "epoch": 0.04, + "learning_rate": 4.981152891422267e-05, + "loss": 1.006, + "step": 410 + }, + { + "epoch": 0.04, + "learning_rate": 4.9806932058472006e-05, + "loss": 0.9082, + "step": 420 + }, + { + "epoch": 0.04, + "learning_rate": 4.980233520272134e-05, + "loss": 1.0516, + "step": 430 + }, + { + "epoch": 0.04, + "learning_rate": 4.979773834697067e-05, + "loss": 0.9686, + "step": 440 + }, + { + "epoch": 0.04, + "learning_rate": 4.979314149122001e-05, + "loss": 0.9554, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788544635469345e-05, + "loss": 0.9972, + "step": 460 + }, + { + "epoch": 0.04, + "learning_rate": 4.9783947779718675e-05, + "loss": 1.0398, + "step": 470 + }, + { + "epoch": 0.04, + "learning_rate": 4.977935092396801e-05, + "loss": 1.0127, + "step": 480 + }, + { + "epoch": 0.05, + "learning_rate": 4.977475406821734e-05, + "loss": 0.8921, + "step": 490 + }, + { + "epoch": 0.05, + "learning_rate": 4.977015721246667e-05, + "loss": 1.0424, + "step": 500 + }, + { + "epoch": 0.05, + "learning_rate": 4.976556035671601e-05, + "loss": 1.0228, + "step": 510 + }, + { + "epoch": 0.05, + "learning_rate": 4.9760963500965343e-05, + "loss": 0.978, + "step": 520 + }, + { + "epoch": 0.05, + "learning_rate": 4.975636664521467e-05, + "loss": 0.9348, + "step": 530 + }, + { + "epoch": 0.05, + "learning_rate": 4.975176978946401e-05, + "loss": 0.9722, + "step": 540 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747172933713346e-05, + "loss": 0.8894, + "step": 550 + }, + { + "epoch": 0.05, + "learning_rate": 4.9742576077962676e-05, + "loss": 0.9188, + "step": 560 + }, + { + "epoch": 0.05, + "learning_rate": 4.973797922221201e-05, + "loss": 1.0509, + "step": 570 + }, + { + "epoch": 0.05, + "learning_rate": 4.973338236646134e-05, + "loss": 0.9719, + "step": 580 + }, + { + "epoch": 0.05, + "learning_rate": 4.972878551071067e-05, + "loss": 0.9682, + "step": 590 + }, + { + "epoch": 0.06, + "learning_rate": 4.972418865496001e-05, + "loss": 0.8215, + "step": 600 + }, + { + "epoch": 0.06, + "learning_rate": 4.9719591799209345e-05, + "loss": 0.9522, + "step": 610 + }, + { + "epoch": 0.06, + "learning_rate": 4.9714994943458674e-05, + "loss": 0.9875, + "step": 620 + }, + { + "epoch": 0.06, + "learning_rate": 4.971039808770801e-05, + "loss": 0.8945, + "step": 630 + }, + { + "epoch": 0.06, + "learning_rate": 4.970580123195735e-05, + "loss": 0.9984, + "step": 640 + }, + { + "epoch": 0.06, + "learning_rate": 4.970120437620668e-05, + "loss": 0.9307, + "step": 650 + }, + { + "epoch": 0.06, + "learning_rate": 4.9696607520456013e-05, + "loss": 1.0127, + "step": 660 + }, + { + "epoch": 0.06, + "learning_rate": 4.969201066470534e-05, + "loss": 0.8825, + "step": 670 + }, + { + "epoch": 0.06, + "learning_rate": 4.968741380895467e-05, + "loss": 0.9459, + "step": 680 + }, + { + "epoch": 0.06, + "learning_rate": 4.968281695320401e-05, + "loss": 1.0124, + "step": 690 + }, + { + "epoch": 0.06, + "learning_rate": 4.9678220097453346e-05, + "loss": 0.9543, + "step": 700 + }, + { + "epoch": 0.07, + "learning_rate": 4.9673623241702675e-05, + "loss": 0.9098, + "step": 710 + }, + { + "epoch": 0.07, + "learning_rate": 4.966902638595201e-05, + "loss": 0.8344, + "step": 720 + }, + { + "epoch": 0.07, + "learning_rate": 4.966442953020135e-05, + "loss": 0.8529, + "step": 730 + }, + { + "epoch": 0.07, + "learning_rate": 4.965983267445068e-05, + "loss": 0.9051, + "step": 740 + }, + { + "epoch": 0.07, + "learning_rate": 4.9655235818700015e-05, + "loss": 0.9432, + "step": 750 + }, + { + "epoch": 0.07, + "learning_rate": 4.9650638962949344e-05, + "loss": 1.0073, + "step": 760 + }, + { + "epoch": 0.07, + "learning_rate": 4.9646042107198674e-05, + "loss": 1.0228, + "step": 770 + }, + { + "epoch": 0.07, + "learning_rate": 4.964144525144801e-05, + "loss": 0.8818, + "step": 780 + }, + { + "epoch": 0.07, + "learning_rate": 4.963684839569735e-05, + "loss": 1.0007, + "step": 790 + }, + { + "epoch": 0.07, + "learning_rate": 4.963225153994668e-05, + "loss": 0.926, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 4.962765468419601e-05, + "loss": 0.9359, + "step": 810 + }, + { + "epoch": 0.08, + "learning_rate": 4.962305782844534e-05, + "loss": 0.9373, + "step": 820 + }, + { + "epoch": 0.08, + "learning_rate": 4.961846097269468e-05, + "loss": 1.0229, + "step": 830 + }, + { + "epoch": 0.08, + "learning_rate": 4.9613864116944016e-05, + "loss": 0.9919, + "step": 840 + }, + { + "epoch": 0.08, + "learning_rate": 4.9609267261193346e-05, + "loss": 0.9905, + "step": 850 + }, + { + "epoch": 0.08, + "learning_rate": 4.9604670405442675e-05, + "loss": 0.9602, + "step": 860 + }, + { + "epoch": 0.08, + "learning_rate": 4.960007354969201e-05, + "loss": 1.026, + "step": 870 + }, + { + "epoch": 0.08, + "learning_rate": 4.959547669394135e-05, + "loss": 0.9833, + "step": 880 + }, + { + "epoch": 0.08, + "learning_rate": 4.959087983819068e-05, + "loss": 0.8711, + "step": 890 + }, + { + "epoch": 0.08, + "learning_rate": 4.9586282982440014e-05, + "loss": 0.9478, + "step": 900 + }, + { + "epoch": 0.08, + "learning_rate": 4.9581686126689344e-05, + "loss": 1.0195, + "step": 910 + }, + { + "epoch": 0.08, + "learning_rate": 4.957708927093868e-05, + "loss": 0.8746, + "step": 920 + }, + { + "epoch": 0.09, + "learning_rate": 4.957249241518802e-05, + "loss": 0.8922, + "step": 930 + }, + { + "epoch": 0.09, + "learning_rate": 4.956789555943735e-05, + "loss": 1.0195, + "step": 940 + }, + { + "epoch": 0.09, + "learning_rate": 4.9563298703686676e-05, + "loss": 0.9601, + "step": 950 + }, + { + "epoch": 0.09, + "learning_rate": 4.955870184793601e-05, + "loss": 0.8524, + "step": 960 + }, + { + "epoch": 0.09, + "learning_rate": 4.955410499218535e-05, + "loss": 0.9508, + "step": 970 + }, + { + "epoch": 0.09, + "learning_rate": 4.954950813643468e-05, + "loss": 1.0284, + "step": 980 + }, + { + "epoch": 0.09, + "learning_rate": 4.9544911280684016e-05, + "loss": 0.9629, + "step": 990 + }, + { + "epoch": 0.09, + "learning_rate": 4.9540314424933345e-05, + "loss": 0.9604, + "step": 1000 + }, + { + "epoch": 0.09, + "learning_rate": 4.953571756918268e-05, + "loss": 0.9483, + "step": 1010 + }, + { + "epoch": 0.09, + "learning_rate": 4.953112071343202e-05, + "loss": 0.9298, + "step": 1020 + }, + { + "epoch": 0.09, + "learning_rate": 4.952652385768135e-05, + "loss": 0.9247, + "step": 1030 + }, + { + "epoch": 0.1, + "learning_rate": 4.952192700193068e-05, + "loss": 0.9759, + "step": 1040 + }, + { + "epoch": 0.1, + "learning_rate": 4.9517330146180014e-05, + "loss": 0.9612, + "step": 1050 + }, + { + "epoch": 0.1, + "learning_rate": 4.951273329042935e-05, + "loss": 1.0357, + "step": 1060 + }, + { + "epoch": 0.1, + "learning_rate": 4.950813643467868e-05, + "loss": 0.9234, + "step": 1070 + }, + { + "epoch": 0.1, + "learning_rate": 4.950353957892802e-05, + "loss": 1.004, + "step": 1080 + }, + { + "epoch": 0.1, + "learning_rate": 4.9498942723177346e-05, + "loss": 1.038, + "step": 1090 + }, + { + "epoch": 0.1, + "learning_rate": 4.949434586742668e-05, + "loss": 0.9078, + "step": 1100 + }, + { + "epoch": 0.1, + "learning_rate": 4.948974901167602e-05, + "loss": 0.8739, + "step": 1110 + }, + { + "epoch": 0.1, + "learning_rate": 4.948515215592535e-05, + "loss": 1.0222, + "step": 1120 + }, + { + "epoch": 0.1, + "learning_rate": 4.948055530017468e-05, + "loss": 0.9808, + "step": 1130 + }, + { + "epoch": 0.1, + "learning_rate": 4.9475958444424015e-05, + "loss": 0.9663, + "step": 1140 + }, + { + "epoch": 0.11, + "learning_rate": 4.9471361588673345e-05, + "loss": 1.0217, + "step": 1150 + }, + { + "epoch": 0.11, + "learning_rate": 4.946676473292268e-05, + "loss": 1.0532, + "step": 1160 + }, + { + "epoch": 0.11, + "learning_rate": 4.946216787717202e-05, + "loss": 0.9233, + "step": 1170 + }, + { + "epoch": 0.11, + "learning_rate": 4.945757102142135e-05, + "loss": 0.8988, + "step": 1180 + }, + { + "epoch": 0.11, + "learning_rate": 4.9452974165670684e-05, + "loss": 0.9789, + "step": 1190 + }, + { + "epoch": 0.11, + "learning_rate": 4.944837730992002e-05, + "loss": 0.948, + "step": 1200 + }, + { + "epoch": 0.11, + "learning_rate": 4.944378045416935e-05, + "loss": 1.022, + "step": 1210 + }, + { + "epoch": 0.11, + "learning_rate": 4.943918359841868e-05, + "loss": 1.0613, + "step": 1220 + }, + { + "epoch": 0.11, + "learning_rate": 4.9434586742668016e-05, + "loss": 0.8609, + "step": 1230 + }, + { + "epoch": 0.11, + "learning_rate": 4.9429989886917346e-05, + "loss": 0.9011, + "step": 1240 + }, + { + "epoch": 0.11, + "learning_rate": 4.942539303116668e-05, + "loss": 0.9044, + "step": 1250 + }, + { + "epoch": 0.12, + "learning_rate": 4.942079617541602e-05, + "loss": 0.8883, + "step": 1260 + }, + { + "epoch": 0.12, + "learning_rate": 4.941619931966535e-05, + "loss": 0.9078, + "step": 1270 + }, + { + "epoch": 0.12, + "learning_rate": 4.9411602463914685e-05, + "loss": 1.0061, + "step": 1280 + }, + { + "epoch": 0.12, + "learning_rate": 4.940700560816402e-05, + "loss": 0.9432, + "step": 1290 + }, + { + "epoch": 0.12, + "learning_rate": 4.940240875241335e-05, + "loss": 0.9144, + "step": 1300 + }, + { + "epoch": 0.12, + "learning_rate": 4.939781189666268e-05, + "loss": 0.8251, + "step": 1310 + }, + { + "epoch": 0.12, + "learning_rate": 4.939321504091202e-05, + "loss": 0.9741, + "step": 1320 + }, + { + "epoch": 0.12, + "learning_rate": 4.938861818516135e-05, + "loss": 0.8966, + "step": 1330 + }, + { + "epoch": 0.12, + "learning_rate": 4.9384021329410684e-05, + "loss": 0.8834, + "step": 1340 + }, + { + "epoch": 0.12, + "learning_rate": 4.937942447366002e-05, + "loss": 0.9911, + "step": 1350 + }, + { + "epoch": 0.13, + "learning_rate": 4.937482761790935e-05, + "loss": 0.9333, + "step": 1360 + }, + { + "epoch": 0.13, + "learning_rate": 4.9370230762158687e-05, + "loss": 1.024, + "step": 1370 + }, + { + "epoch": 0.13, + "learning_rate": 4.936563390640802e-05, + "loss": 0.912, + "step": 1380 + }, + { + "epoch": 0.13, + "learning_rate": 4.936103705065735e-05, + "loss": 1.1053, + "step": 1390 + }, + { + "epoch": 0.13, + "learning_rate": 4.935644019490668e-05, + "loss": 0.8265, + "step": 1400 + }, + { + "epoch": 0.13, + "learning_rate": 4.935184333915602e-05, + "loss": 1.0776, + "step": 1410 + }, + { + "epoch": 0.13, + "learning_rate": 4.934724648340535e-05, + "loss": 1.0148, + "step": 1420 + }, + { + "epoch": 0.13, + "learning_rate": 4.9342649627654685e-05, + "loss": 0.9746, + "step": 1430 + }, + { + "epoch": 0.13, + "learning_rate": 4.933805277190402e-05, + "loss": 0.8809, + "step": 1440 + }, + { + "epoch": 0.13, + "learning_rate": 4.933345591615335e-05, + "loss": 0.9155, + "step": 1450 + }, + { + "epoch": 0.13, + "learning_rate": 4.932885906040269e-05, + "loss": 0.9706, + "step": 1460 + }, + { + "epoch": 0.14, + "learning_rate": 4.9324262204652024e-05, + "loss": 0.9858, + "step": 1470 + }, + { + "epoch": 0.14, + "learning_rate": 4.9319665348901354e-05, + "loss": 0.9738, + "step": 1480 + }, + { + "epoch": 0.14, + "learning_rate": 4.9315068493150684e-05, + "loss": 1.0844, + "step": 1490 + }, + { + "epoch": 0.14, + "learning_rate": 4.931047163740002e-05, + "loss": 0.9689, + "step": 1500 + }, + { + "epoch": 0.14, + "learning_rate": 4.930587478164935e-05, + "loss": 0.9703, + "step": 1510 + }, + { + "epoch": 0.14, + "learning_rate": 4.9301277925898686e-05, + "loss": 0.9335, + "step": 1520 + }, + { + "epoch": 0.14, + "learning_rate": 4.929668107014802e-05, + "loss": 1.0215, + "step": 1530 + }, + { + "epoch": 0.14, + "learning_rate": 4.929208421439735e-05, + "loss": 0.9247, + "step": 1540 + }, + { + "epoch": 0.14, + "learning_rate": 4.928748735864669e-05, + "loss": 0.9884, + "step": 1550 + }, + { + "epoch": 0.14, + "learning_rate": 4.9282890502896025e-05, + "loss": 0.9417, + "step": 1560 + }, + { + "epoch": 0.14, + "learning_rate": 4.9278293647145355e-05, + "loss": 1.029, + "step": 1570 + }, + { + "epoch": 0.15, + "learning_rate": 4.9273696791394685e-05, + "loss": 1.0022, + "step": 1580 + }, + { + "epoch": 0.15, + "learning_rate": 4.926909993564402e-05, + "loss": 1.0037, + "step": 1590 + }, + { + "epoch": 0.15, + "learning_rate": 4.926450307989335e-05, + "loss": 0.9434, + "step": 1600 + }, + { + "epoch": 0.15, + "learning_rate": 4.925990622414269e-05, + "loss": 1.0303, + "step": 1610 + }, + { + "epoch": 0.15, + "learning_rate": 4.9255309368392024e-05, + "loss": 1.0032, + "step": 1620 + }, + { + "epoch": 0.15, + "learning_rate": 4.9250712512641354e-05, + "loss": 1.0157, + "step": 1630 + }, + { + "epoch": 0.15, + "learning_rate": 4.924611565689069e-05, + "loss": 0.966, + "step": 1640 + }, + { + "epoch": 0.15, + "learning_rate": 4.9241518801140027e-05, + "loss": 0.928, + "step": 1650 + }, + { + "epoch": 0.15, + "learning_rate": 4.9236921945389356e-05, + "loss": 0.9483, + "step": 1660 + }, + { + "epoch": 0.15, + "learning_rate": 4.9232325089638686e-05, + "loss": 0.885, + "step": 1670 + }, + { + "epoch": 0.15, + "learning_rate": 4.922772823388802e-05, + "loss": 0.9944, + "step": 1680 + }, + { + "epoch": 0.16, + "learning_rate": 4.922313137813735e-05, + "loss": 0.9512, + "step": 1690 + }, + { + "epoch": 0.16, + "learning_rate": 4.921853452238669e-05, + "loss": 0.9031, + "step": 1700 + }, + { + "epoch": 0.16, + "learning_rate": 4.9213937666636025e-05, + "loss": 0.9833, + "step": 1710 + }, + { + "epoch": 0.16, + "learning_rate": 4.9209340810885355e-05, + "loss": 0.8866, + "step": 1720 + }, + { + "epoch": 0.16, + "learning_rate": 4.920474395513469e-05, + "loss": 1.0016, + "step": 1730 + }, + { + "epoch": 0.16, + "learning_rate": 4.920014709938403e-05, + "loss": 0.9396, + "step": 1740 + }, + { + "epoch": 0.16, + "learning_rate": 4.919555024363336e-05, + "loss": 0.8614, + "step": 1750 + }, + { + "epoch": 0.16, + "learning_rate": 4.9190953387882694e-05, + "loss": 0.9154, + "step": 1760 + }, + { + "epoch": 0.16, + "learning_rate": 4.9186356532132024e-05, + "loss": 0.9616, + "step": 1770 + }, + { + "epoch": 0.16, + "learning_rate": 4.918175967638135e-05, + "loss": 0.9843, + "step": 1780 + }, + { + "epoch": 0.16, + "learning_rate": 4.917716282063069e-05, + "loss": 0.9456, + "step": 1790 + }, + { + "epoch": 0.17, + "learning_rate": 4.9172565964880026e-05, + "loss": 0.9458, + "step": 1800 + }, + { + "epoch": 0.17, + "learning_rate": 4.9167969109129356e-05, + "loss": 0.9737, + "step": 1810 + }, + { + "epoch": 0.17, + "learning_rate": 4.916337225337869e-05, + "loss": 0.9982, + "step": 1820 + }, + { + "epoch": 0.17, + "learning_rate": 4.915877539762803e-05, + "loss": 0.881, + "step": 1830 + }, + { + "epoch": 0.17, + "learning_rate": 4.915417854187736e-05, + "loss": 1.0422, + "step": 1840 + }, + { + "epoch": 0.17, + "learning_rate": 4.9149581686126695e-05, + "loss": 0.9156, + "step": 1850 + }, + { + "epoch": 0.17, + "learning_rate": 4.9144984830376025e-05, + "loss": 0.9906, + "step": 1860 + }, + { + "epoch": 0.17, + "learning_rate": 4.9140387974625355e-05, + "loss": 0.8811, + "step": 1870 + }, + { + "epoch": 0.17, + "learning_rate": 4.913579111887469e-05, + "loss": 0.9466, + "step": 1880 + }, + { + "epoch": 0.17, + "learning_rate": 4.913119426312403e-05, + "loss": 0.8843, + "step": 1890 + }, + { + "epoch": 0.17, + "learning_rate": 4.912659740737336e-05, + "loss": 1.043, + "step": 1900 + }, + { + "epoch": 0.18, + "learning_rate": 4.9122000551622694e-05, + "loss": 0.8872, + "step": 1910 + }, + { + "epoch": 0.18, + "learning_rate": 4.911740369587203e-05, + "loss": 0.9973, + "step": 1920 + }, + { + "epoch": 0.18, + "learning_rate": 4.911280684012136e-05, + "loss": 0.9862, + "step": 1930 + }, + { + "epoch": 0.18, + "learning_rate": 4.9108209984370696e-05, + "loss": 0.9889, + "step": 1940 + }, + { + "epoch": 0.18, + "learning_rate": 4.9103613128620026e-05, + "loss": 0.9135, + "step": 1950 + }, + { + "epoch": 0.18, + "learning_rate": 4.9099016272869356e-05, + "loss": 0.9208, + "step": 1960 + }, + { + "epoch": 0.18, + "learning_rate": 4.909441941711869e-05, + "loss": 0.9219, + "step": 1970 + }, + { + "epoch": 0.18, + "learning_rate": 4.908982256136803e-05, + "loss": 0.887, + "step": 1980 + }, + { + "epoch": 0.18, + "learning_rate": 4.908522570561736e-05, + "loss": 0.9606, + "step": 1990 + }, + { + "epoch": 0.18, + "learning_rate": 4.9080628849866695e-05, + "loss": 0.9298, + "step": 2000 + }, + { + "epoch": 0.18, + "learning_rate": 4.907603199411603e-05, + "loss": 0.8867, + "step": 2010 + }, + { + "epoch": 0.19, + "learning_rate": 4.907143513836536e-05, + "loss": 0.8856, + "step": 2020 + }, + { + "epoch": 0.19, + "learning_rate": 4.90668382826147e-05, + "loss": 0.8442, + "step": 2030 + }, + { + "epoch": 0.19, + "learning_rate": 4.906224142686403e-05, + "loss": 0.9634, + "step": 2040 + }, + { + "epoch": 0.19, + "learning_rate": 4.905764457111336e-05, + "loss": 0.965, + "step": 2050 + }, + { + "epoch": 0.19, + "learning_rate": 4.9053047715362693e-05, + "loss": 0.8834, + "step": 2060 + }, + { + "epoch": 0.19, + "learning_rate": 4.904845085961203e-05, + "loss": 1.0231, + "step": 2070 + }, + { + "epoch": 0.19, + "learning_rate": 4.904385400386136e-05, + "loss": 0.9807, + "step": 2080 + }, + { + "epoch": 0.19, + "learning_rate": 4.9039257148110696e-05, + "loss": 0.8146, + "step": 2090 + }, + { + "epoch": 0.19, + "learning_rate": 4.903466029236003e-05, + "loss": 0.9296, + "step": 2100 + }, + { + "epoch": 0.19, + "learning_rate": 4.903006343660936e-05, + "loss": 0.831, + "step": 2110 + }, + { + "epoch": 0.19, + "learning_rate": 4.90254665808587e-05, + "loss": 0.8892, + "step": 2120 + }, + { + "epoch": 0.2, + "learning_rate": 4.902086972510803e-05, + "loss": 0.9259, + "step": 2130 + }, + { + "epoch": 0.2, + "learning_rate": 4.901627286935736e-05, + "loss": 1.1263, + "step": 2140 + }, + { + "epoch": 0.2, + "learning_rate": 4.9011676013606695e-05, + "loss": 0.872, + "step": 2150 + }, + { + "epoch": 0.2, + "learning_rate": 4.900707915785603e-05, + "loss": 0.9134, + "step": 2160 + }, + { + "epoch": 0.2, + "learning_rate": 4.900248230210536e-05, + "loss": 0.9481, + "step": 2170 + }, + { + "epoch": 0.2, + "learning_rate": 4.89978854463547e-05, + "loss": 0.8769, + "step": 2180 + }, + { + "epoch": 0.2, + "learning_rate": 4.8993288590604034e-05, + "loss": 0.9643, + "step": 2190 + }, + { + "epoch": 0.2, + "learning_rate": 4.8988691734853363e-05, + "loss": 1.0846, + "step": 2200 + }, + { + "epoch": 0.2, + "learning_rate": 4.89840948791027e-05, + "loss": 0.9085, + "step": 2210 + }, + { + "epoch": 0.2, + "learning_rate": 4.897949802335203e-05, + "loss": 0.8347, + "step": 2220 + }, + { + "epoch": 0.21, + "learning_rate": 4.897490116760136e-05, + "loss": 0.9126, + "step": 2230 + }, + { + "epoch": 0.21, + "learning_rate": 4.8970304311850696e-05, + "loss": 0.7547, + "step": 2240 + }, + { + "epoch": 0.21, + "learning_rate": 4.896570745610003e-05, + "loss": 0.9314, + "step": 2250 + }, + { + "epoch": 0.21, + "learning_rate": 4.896111060034936e-05, + "loss": 0.9754, + "step": 2260 + }, + { + "epoch": 0.21, + "learning_rate": 4.89565137445987e-05, + "loss": 0.8824, + "step": 2270 + }, + { + "epoch": 0.21, + "learning_rate": 4.8951916888848035e-05, + "loss": 1.0279, + "step": 2280 + }, + { + "epoch": 0.21, + "learning_rate": 4.8947320033097365e-05, + "loss": 1.0157, + "step": 2290 + }, + { + "epoch": 0.21, + "learning_rate": 4.89427231773467e-05, + "loss": 0.9303, + "step": 2300 + }, + { + "epoch": 0.21, + "learning_rate": 4.893812632159603e-05, + "loss": 0.9381, + "step": 2310 + }, + { + "epoch": 0.21, + "learning_rate": 4.893352946584536e-05, + "loss": 0.9546, + "step": 2320 + }, + { + "epoch": 0.21, + "learning_rate": 4.89289326100947e-05, + "loss": 0.9939, + "step": 2330 + }, + { + "epoch": 0.22, + "learning_rate": 4.8924335754344033e-05, + "loss": 0.9354, + "step": 2340 + }, + { + "epoch": 0.22, + "learning_rate": 4.891973889859336e-05, + "loss": 1.0036, + "step": 2350 + }, + { + "epoch": 0.22, + "learning_rate": 4.89151420428427e-05, + "loss": 1.0223, + "step": 2360 + }, + { + "epoch": 0.22, + "learning_rate": 4.8910545187092036e-05, + "loss": 0.8981, + "step": 2370 + }, + { + "epoch": 0.22, + "learning_rate": 4.8905948331341366e-05, + "loss": 0.925, + "step": 2380 + }, + { + "epoch": 0.22, + "learning_rate": 4.89013514755907e-05, + "loss": 0.9115, + "step": 2390 + }, + { + "epoch": 0.22, + "learning_rate": 4.889675461984003e-05, + "loss": 1.0528, + "step": 2400 + }, + { + "epoch": 0.22, + "learning_rate": 4.889215776408936e-05, + "loss": 0.9579, + "step": 2410 + }, + { + "epoch": 0.22, + "learning_rate": 4.88875609083387e-05, + "loss": 0.9576, + "step": 2420 + }, + { + "epoch": 0.22, + "learning_rate": 4.8882964052588035e-05, + "loss": 0.8322, + "step": 2430 + }, + { + "epoch": 0.22, + "learning_rate": 4.8878367196837364e-05, + "loss": 0.9541, + "step": 2440 + }, + { + "epoch": 0.23, + "learning_rate": 4.88737703410867e-05, + "loss": 0.8976, + "step": 2450 + }, + { + "epoch": 0.23, + "learning_rate": 4.886917348533603e-05, + "loss": 0.9819, + "step": 2460 + }, + { + "epoch": 0.23, + "learning_rate": 4.886457662958537e-05, + "loss": 1.0292, + "step": 2470 + }, + { + "epoch": 0.23, + "learning_rate": 4.8859979773834704e-05, + "loss": 0.9267, + "step": 2480 + }, + { + "epoch": 0.23, + "learning_rate": 4.885538291808403e-05, + "loss": 1.0393, + "step": 2490 + }, + { + "epoch": 0.23, + "learning_rate": 4.885078606233336e-05, + "loss": 0.9499, + "step": 2500 + }, + { + "epoch": 0.23, + "learning_rate": 4.88461892065827e-05, + "loss": 0.9265, + "step": 2510 + }, + { + "epoch": 0.23, + "learning_rate": 4.8841592350832036e-05, + "loss": 0.8737, + "step": 2520 + }, + { + "epoch": 0.23, + "learning_rate": 4.8836995495081366e-05, + "loss": 0.8004, + "step": 2530 + }, + { + "epoch": 0.23, + "learning_rate": 4.88323986393307e-05, + "loss": 0.932, + "step": 2540 + }, + { + "epoch": 0.23, + "learning_rate": 4.882780178358003e-05, + "loss": 0.9967, + "step": 2550 + }, + { + "epoch": 0.24, + "learning_rate": 4.882320492782937e-05, + "loss": 0.9678, + "step": 2560 + }, + { + "epoch": 0.24, + "learning_rate": 4.8818608072078705e-05, + "loss": 1.0356, + "step": 2570 + }, + { + "epoch": 0.24, + "learning_rate": 4.8814011216328034e-05, + "loss": 0.9875, + "step": 2580 + }, + { + "epoch": 0.24, + "learning_rate": 4.8809414360577364e-05, + "loss": 1.0758, + "step": 2590 + }, + { + "epoch": 0.24, + "learning_rate": 4.88048175048267e-05, + "loss": 0.9232, + "step": 2600 + }, + { + "epoch": 0.24, + "learning_rate": 4.880022064907604e-05, + "loss": 0.9855, + "step": 2610 + }, + { + "epoch": 0.24, + "learning_rate": 4.879562379332537e-05, + "loss": 0.9618, + "step": 2620 + }, + { + "epoch": 0.24, + "learning_rate": 4.87910269375747e-05, + "loss": 0.8927, + "step": 2630 + }, + { + "epoch": 0.24, + "learning_rate": 4.878643008182403e-05, + "loss": 0.9335, + "step": 2640 + }, + { + "epoch": 0.24, + "learning_rate": 4.878183322607337e-05, + "loss": 1.0304, + "step": 2650 + }, + { + "epoch": 0.24, + "learning_rate": 4.8777236370322706e-05, + "loss": 0.8923, + "step": 2660 + }, + { + "epoch": 0.25, + "learning_rate": 4.8772639514572036e-05, + "loss": 1.0377, + "step": 2670 + }, + { + "epoch": 0.25, + "learning_rate": 4.8768042658821365e-05, + "loss": 0.9609, + "step": 2680 + }, + { + "epoch": 0.25, + "learning_rate": 4.87634458030707e-05, + "loss": 0.8888, + "step": 2690 + }, + { + "epoch": 0.25, + "learning_rate": 4.875884894732004e-05, + "loss": 0.9756, + "step": 2700 + }, + { + "epoch": 0.25, + "learning_rate": 4.875425209156937e-05, + "loss": 0.9398, + "step": 2710 + }, + { + "epoch": 0.25, + "learning_rate": 4.8749655235818704e-05, + "loss": 0.8033, + "step": 2720 + }, + { + "epoch": 0.25, + "learning_rate": 4.8745058380068034e-05, + "loss": 0.8771, + "step": 2730 + }, + { + "epoch": 0.25, + "learning_rate": 4.874046152431737e-05, + "loss": 0.8756, + "step": 2740 + }, + { + "epoch": 0.25, + "learning_rate": 4.873586466856671e-05, + "loss": 1.051, + "step": 2750 + }, + { + "epoch": 0.25, + "learning_rate": 4.873126781281604e-05, + "loss": 0.881, + "step": 2760 + }, + { + "epoch": 0.25, + "learning_rate": 4.8726670957065366e-05, + "loss": 0.949, + "step": 2770 + }, + { + "epoch": 0.26, + "learning_rate": 4.87220741013147e-05, + "loss": 1.0034, + "step": 2780 + }, + { + "epoch": 0.26, + "learning_rate": 4.871747724556403e-05, + "loss": 0.9227, + "step": 2790 + }, + { + "epoch": 0.26, + "learning_rate": 4.871288038981337e-05, + "loss": 0.9569, + "step": 2800 + }, + { + "epoch": 0.26, + "learning_rate": 4.8708283534062706e-05, + "loss": 1.0622, + "step": 2810 + }, + { + "epoch": 0.26, + "learning_rate": 4.8703686678312035e-05, + "loss": 0.8858, + "step": 2820 + }, + { + "epoch": 0.26, + "learning_rate": 4.869908982256137e-05, + "loss": 1.0565, + "step": 2830 + }, + { + "epoch": 0.26, + "learning_rate": 4.869449296681071e-05, + "loss": 0.9979, + "step": 2840 + }, + { + "epoch": 0.26, + "learning_rate": 4.868989611106004e-05, + "loss": 0.9144, + "step": 2850 + }, + { + "epoch": 0.26, + "learning_rate": 4.868529925530937e-05, + "loss": 1.0345, + "step": 2860 + }, + { + "epoch": 0.26, + "learning_rate": 4.8680702399558704e-05, + "loss": 0.954, + "step": 2870 + }, + { + "epoch": 0.26, + "learning_rate": 4.8676105543808034e-05, + "loss": 0.9484, + "step": 2880 + }, + { + "epoch": 0.27, + "learning_rate": 4.867150868805737e-05, + "loss": 0.9399, + "step": 2890 + }, + { + "epoch": 0.27, + "learning_rate": 4.866691183230671e-05, + "loss": 1.0062, + "step": 2900 + }, + { + "epoch": 0.27, + "learning_rate": 4.8662314976556037e-05, + "loss": 1.0299, + "step": 2910 + }, + { + "epoch": 0.27, + "learning_rate": 4.865771812080537e-05, + "loss": 0.9574, + "step": 2920 + }, + { + "epoch": 0.27, + "learning_rate": 4.865312126505471e-05, + "loss": 0.9657, + "step": 2930 + }, + { + "epoch": 0.27, + "learning_rate": 4.864852440930404e-05, + "loss": 0.971, + "step": 2940 + }, + { + "epoch": 0.27, + "learning_rate": 4.864392755355337e-05, + "loss": 0.9247, + "step": 2950 + }, + { + "epoch": 0.27, + "learning_rate": 4.8639330697802705e-05, + "loss": 0.8734, + "step": 2960 + }, + { + "epoch": 0.27, + "learning_rate": 4.8634733842052035e-05, + "loss": 1.0099, + "step": 2970 + }, + { + "epoch": 0.27, + "learning_rate": 4.863013698630137e-05, + "loss": 1.0051, + "step": 2980 + }, + { + "epoch": 0.27, + "learning_rate": 4.862554013055071e-05, + "loss": 0.8267, + "step": 2990 + }, + { + "epoch": 0.28, + "learning_rate": 4.862094327480004e-05, + "loss": 0.8826, + "step": 3000 + }, + { + "epoch": 0.28, + "learning_rate": 4.8616346419049374e-05, + "loss": 0.8881, + "step": 3010 + }, + { + "epoch": 0.28, + "learning_rate": 4.861174956329871e-05, + "loss": 1.1478, + "step": 3020 + }, + { + "epoch": 0.28, + "learning_rate": 4.860715270754804e-05, + "loss": 0.844, + "step": 3030 + }, + { + "epoch": 0.28, + "learning_rate": 4.860255585179737e-05, + "loss": 0.8689, + "step": 3040 + }, + { + "epoch": 0.28, + "learning_rate": 4.8597958996046707e-05, + "loss": 0.8529, + "step": 3050 + }, + { + "epoch": 0.28, + "learning_rate": 4.8593362140296036e-05, + "loss": 0.8957, + "step": 3060 + }, + { + "epoch": 0.28, + "learning_rate": 4.858876528454537e-05, + "loss": 0.897, + "step": 3070 + }, + { + "epoch": 0.28, + "learning_rate": 4.858416842879471e-05, + "loss": 0.9625, + "step": 3080 + }, + { + "epoch": 0.28, + "learning_rate": 4.857957157304404e-05, + "loss": 0.9303, + "step": 3090 + }, + { + "epoch": 0.29, + "learning_rate": 4.8574974717293375e-05, + "loss": 0.9855, + "step": 3100 + }, + { + "epoch": 0.29, + "learning_rate": 4.857037786154271e-05, + "loss": 0.9919, + "step": 3110 + }, + { + "epoch": 0.29, + "learning_rate": 4.8565781005792035e-05, + "loss": 1.0804, + "step": 3120 + }, + { + "epoch": 0.29, + "learning_rate": 4.856118415004137e-05, + "loss": 0.8978, + "step": 3130 + }, + { + "epoch": 0.29, + "learning_rate": 4.855658729429071e-05, + "loss": 1.0202, + "step": 3140 + }, + { + "epoch": 0.29, + "learning_rate": 4.855199043854004e-05, + "loss": 0.941, + "step": 3150 + }, + { + "epoch": 0.29, + "learning_rate": 4.8547393582789374e-05, + "loss": 0.9408, + "step": 3160 + }, + { + "epoch": 0.29, + "learning_rate": 4.854279672703871e-05, + "loss": 0.8859, + "step": 3170 + }, + { + "epoch": 0.29, + "learning_rate": 4.853819987128804e-05, + "loss": 0.9506, + "step": 3180 + }, + { + "epoch": 0.29, + "learning_rate": 4.8533603015537377e-05, + "loss": 1.025, + "step": 3190 + }, + { + "epoch": 0.29, + "learning_rate": 4.852900615978671e-05, + "loss": 0.9741, + "step": 3200 + }, + { + "epoch": 0.3, + "learning_rate": 4.8524409304036036e-05, + "loss": 0.9514, + "step": 3210 + }, + { + "epoch": 0.3, + "learning_rate": 4.851981244828537e-05, + "loss": 0.9614, + "step": 3220 + }, + { + "epoch": 0.3, + "learning_rate": 4.851521559253471e-05, + "loss": 1.0121, + "step": 3230 + }, + { + "epoch": 0.3, + "learning_rate": 4.851061873678404e-05, + "loss": 0.8948, + "step": 3240 + }, + { + "epoch": 0.3, + "learning_rate": 4.8506021881033375e-05, + "loss": 0.985, + "step": 3250 + }, + { + "epoch": 0.3, + "learning_rate": 4.850142502528271e-05, + "loss": 0.9391, + "step": 3260 + }, + { + "epoch": 0.3, + "learning_rate": 4.849682816953204e-05, + "loss": 1.0194, + "step": 3270 + }, + { + "epoch": 0.3, + "learning_rate": 4.849223131378138e-05, + "loss": 0.8655, + "step": 3280 + }, + { + "epoch": 0.3, + "learning_rate": 4.8487634458030714e-05, + "loss": 0.9888, + "step": 3290 + }, + { + "epoch": 0.3, + "learning_rate": 4.848303760228004e-05, + "loss": 0.8615, + "step": 3300 + }, + { + "epoch": 0.3, + "learning_rate": 4.8478440746529374e-05, + "loss": 0.9933, + "step": 3310 + }, + { + "epoch": 0.31, + "learning_rate": 4.847384389077871e-05, + "loss": 0.9066, + "step": 3320 + }, + { + "epoch": 0.31, + "learning_rate": 4.846924703502804e-05, + "loss": 0.9087, + "step": 3330 + }, + { + "epoch": 0.31, + "learning_rate": 4.8464650179277376e-05, + "loss": 1.0999, + "step": 3340 + }, + { + "epoch": 0.31, + "learning_rate": 4.846005332352671e-05, + "loss": 1.0572, + "step": 3350 + }, + { + "epoch": 0.31, + "learning_rate": 4.845545646777604e-05, + "loss": 0.8708, + "step": 3360 + }, + { + "epoch": 0.31, + "learning_rate": 4.845085961202538e-05, + "loss": 0.8502, + "step": 3370 + }, + { + "epoch": 0.31, + "learning_rate": 4.8446262756274715e-05, + "loss": 1.1002, + "step": 3380 + }, + { + "epoch": 0.31, + "learning_rate": 4.844166590052404e-05, + "loss": 0.8142, + "step": 3390 + }, + { + "epoch": 0.31, + "learning_rate": 4.8437069044773375e-05, + "loss": 0.9461, + "step": 3400 + }, + { + "epoch": 0.31, + "learning_rate": 4.843247218902271e-05, + "loss": 0.9266, + "step": 3410 + }, + { + "epoch": 0.31, + "learning_rate": 4.842787533327204e-05, + "loss": 0.9287, + "step": 3420 + }, + { + "epoch": 0.32, + "learning_rate": 4.842327847752138e-05, + "loss": 0.9237, + "step": 3430 + }, + { + "epoch": 0.32, + "learning_rate": 4.8418681621770714e-05, + "loss": 0.9715, + "step": 3440 + }, + { + "epoch": 0.32, + "learning_rate": 4.8414084766020044e-05, + "loss": 0.897, + "step": 3450 + }, + { + "epoch": 0.32, + "learning_rate": 4.840948791026938e-05, + "loss": 0.9284, + "step": 3460 + }, + { + "epoch": 0.32, + "learning_rate": 4.840489105451872e-05, + "loss": 0.8379, + "step": 3470 + }, + { + "epoch": 0.32, + "learning_rate": 4.840029419876804e-05, + "loss": 0.8233, + "step": 3480 + }, + { + "epoch": 0.32, + "learning_rate": 4.8395697343017376e-05, + "loss": 0.976, + "step": 3490 + }, + { + "epoch": 0.32, + "learning_rate": 4.839110048726671e-05, + "loss": 0.9462, + "step": 3500 + }, + { + "epoch": 0.32, + "learning_rate": 4.838650363151604e-05, + "loss": 0.9967, + "step": 3510 + }, + { + "epoch": 0.32, + "learning_rate": 4.838190677576538e-05, + "loss": 0.9009, + "step": 3520 + }, + { + "epoch": 0.32, + "learning_rate": 4.8377309920014715e-05, + "loss": 0.9394, + "step": 3530 + }, + { + "epoch": 0.33, + "learning_rate": 4.8372713064264045e-05, + "loss": 0.9478, + "step": 3540 + }, + { + "epoch": 0.33, + "learning_rate": 4.836811620851338e-05, + "loss": 0.9309, + "step": 3550 + }, + { + "epoch": 0.33, + "learning_rate": 4.836351935276272e-05, + "loss": 0.8633, + "step": 3560 + }, + { + "epoch": 0.33, + "learning_rate": 4.835892249701204e-05, + "loss": 1.0007, + "step": 3570 + }, + { + "epoch": 0.33, + "learning_rate": 4.835432564126138e-05, + "loss": 0.9862, + "step": 3580 + }, + { + "epoch": 0.33, + "learning_rate": 4.8349728785510714e-05, + "loss": 0.9759, + "step": 3590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8345131929760043e-05, + "loss": 1.0573, + "step": 3600 + }, + { + "epoch": 0.33, + "learning_rate": 4.834053507400938e-05, + "loss": 0.969, + "step": 3610 + }, + { + "epoch": 0.33, + "learning_rate": 4.8335938218258716e-05, + "loss": 0.9551, + "step": 3620 + }, + { + "epoch": 0.33, + "learning_rate": 4.8331341362508046e-05, + "loss": 0.967, + "step": 3630 + }, + { + "epoch": 0.33, + "learning_rate": 4.832674450675738e-05, + "loss": 0.9396, + "step": 3640 + }, + { + "epoch": 0.34, + "learning_rate": 4.832214765100672e-05, + "loss": 0.9685, + "step": 3650 + }, + { + "epoch": 0.34, + "learning_rate": 4.831755079525604e-05, + "loss": 0.8825, + "step": 3660 + }, + { + "epoch": 0.34, + "learning_rate": 4.831295393950538e-05, + "loss": 0.9393, + "step": 3670 + }, + { + "epoch": 0.34, + "learning_rate": 4.8308357083754715e-05, + "loss": 1.0037, + "step": 3680 + }, + { + "epoch": 0.34, + "learning_rate": 4.8303760228004045e-05, + "loss": 0.8655, + "step": 3690 + }, + { + "epoch": 0.34, + "learning_rate": 4.829916337225338e-05, + "loss": 0.9361, + "step": 3700 + }, + { + "epoch": 0.34, + "learning_rate": 4.829456651650272e-05, + "loss": 1.0242, + "step": 3710 + }, + { + "epoch": 0.34, + "learning_rate": 4.828996966075205e-05, + "loss": 0.9324, + "step": 3720 + }, + { + "epoch": 0.34, + "learning_rate": 4.8285372805001384e-05, + "loss": 0.9658, + "step": 3730 + }, + { + "epoch": 0.34, + "learning_rate": 4.828077594925072e-05, + "loss": 0.9503, + "step": 3740 + }, + { + "epoch": 0.34, + "learning_rate": 4.827617909350004e-05, + "loss": 0.8828, + "step": 3750 + }, + { + "epoch": 0.35, + "learning_rate": 4.827158223774938e-05, + "loss": 0.9675, + "step": 3760 + }, + { + "epoch": 0.35, + "learning_rate": 4.8266985381998716e-05, + "loss": 0.931, + "step": 3770 + }, + { + "epoch": 0.35, + "learning_rate": 4.8262388526248046e-05, + "loss": 0.9289, + "step": 3780 + }, + { + "epoch": 0.35, + "learning_rate": 4.825779167049738e-05, + "loss": 0.9229, + "step": 3790 + }, + { + "epoch": 0.35, + "learning_rate": 4.825319481474672e-05, + "loss": 0.9178, + "step": 3800 + }, + { + "epoch": 0.35, + "learning_rate": 4.824859795899605e-05, + "loss": 0.8967, + "step": 3810 + }, + { + "epoch": 0.35, + "learning_rate": 4.8244001103245385e-05, + "loss": 0.9358, + "step": 3820 + }, + { + "epoch": 0.35, + "learning_rate": 4.823940424749472e-05, + "loss": 1.0154, + "step": 3830 + }, + { + "epoch": 0.35, + "learning_rate": 4.8234807391744044e-05, + "loss": 1.0108, + "step": 3840 + }, + { + "epoch": 0.35, + "learning_rate": 4.823021053599338e-05, + "loss": 0.8626, + "step": 3850 + }, + { + "epoch": 0.35, + "learning_rate": 4.822561368024272e-05, + "loss": 1.0241, + "step": 3860 + }, + { + "epoch": 0.36, + "learning_rate": 4.822101682449205e-05, + "loss": 0.9192, + "step": 3870 + }, + { + "epoch": 0.36, + "learning_rate": 4.8216419968741383e-05, + "loss": 0.9282, + "step": 3880 + }, + { + "epoch": 0.36, + "learning_rate": 4.821182311299072e-05, + "loss": 1.0221, + "step": 3890 + }, + { + "epoch": 0.36, + "learning_rate": 4.820722625724005e-05, + "loss": 1.1095, + "step": 3900 + }, + { + "epoch": 0.36, + "learning_rate": 4.8202629401489386e-05, + "loss": 0.9597, + "step": 3910 + }, + { + "epoch": 0.36, + "learning_rate": 4.819803254573872e-05, + "loss": 0.9161, + "step": 3920 + }, + { + "epoch": 0.36, + "learning_rate": 4.8193435689988046e-05, + "loss": 0.9358, + "step": 3930 + }, + { + "epoch": 0.36, + "learning_rate": 4.818883883423738e-05, + "loss": 0.8769, + "step": 3940 + }, + { + "epoch": 0.36, + "learning_rate": 4.818424197848672e-05, + "loss": 0.9096, + "step": 3950 + }, + { + "epoch": 0.36, + "learning_rate": 4.817964512273605e-05, + "loss": 1.0096, + "step": 3960 + }, + { + "epoch": 0.36, + "learning_rate": 4.8175048266985385e-05, + "loss": 1.0112, + "step": 3970 + }, + { + "epoch": 0.37, + "learning_rate": 4.817045141123472e-05, + "loss": 1.0786, + "step": 3980 + }, + { + "epoch": 0.37, + "learning_rate": 4.816585455548405e-05, + "loss": 0.9748, + "step": 3990 + }, + { + "epoch": 0.37, + "learning_rate": 4.816125769973339e-05, + "loss": 0.8457, + "step": 4000 + }, + { + "epoch": 0.37, + "learning_rate": 4.8156660843982724e-05, + "loss": 0.921, + "step": 4010 + }, + { + "epoch": 0.37, + "learning_rate": 4.815206398823205e-05, + "loss": 1.0226, + "step": 4020 + }, + { + "epoch": 0.37, + "learning_rate": 4.814746713248138e-05, + "loss": 1.0003, + "step": 4030 + }, + { + "epoch": 0.37, + "learning_rate": 4.814287027673072e-05, + "loss": 0.9738, + "step": 4040 + }, + { + "epoch": 0.37, + "learning_rate": 4.813827342098005e-05, + "loss": 1.0444, + "step": 4050 + }, + { + "epoch": 0.37, + "learning_rate": 4.8133676565229386e-05, + "loss": 0.9197, + "step": 4060 + }, + { + "epoch": 0.37, + "learning_rate": 4.812907970947872e-05, + "loss": 0.9679, + "step": 4070 + }, + { + "epoch": 0.38, + "learning_rate": 4.812448285372805e-05, + "loss": 1.0104, + "step": 4080 + }, + { + "epoch": 0.38, + "learning_rate": 4.811988599797739e-05, + "loss": 0.8813, + "step": 4090 + }, + { + "epoch": 0.38, + "learning_rate": 4.811528914222672e-05, + "loss": 0.8844, + "step": 4100 + }, + { + "epoch": 0.38, + "learning_rate": 4.811069228647605e-05, + "loss": 0.9497, + "step": 4110 + }, + { + "epoch": 0.38, + "learning_rate": 4.8106095430725384e-05, + "loss": 0.9705, + "step": 4120 + }, + { + "epoch": 0.38, + "learning_rate": 4.810149857497472e-05, + "loss": 0.8796, + "step": 4130 + }, + { + "epoch": 0.38, + "learning_rate": 4.809690171922405e-05, + "loss": 0.8341, + "step": 4140 + }, + { + "epoch": 0.38, + "learning_rate": 4.809230486347339e-05, + "loss": 1.0386, + "step": 4150 + }, + { + "epoch": 0.38, + "learning_rate": 4.8087708007722724e-05, + "loss": 0.9074, + "step": 4160 + }, + { + "epoch": 0.38, + "learning_rate": 4.808311115197205e-05, + "loss": 1.0244, + "step": 4170 + }, + { + "epoch": 0.38, + "learning_rate": 4.807851429622139e-05, + "loss": 0.9296, + "step": 4180 + }, + { + "epoch": 0.39, + "learning_rate": 4.807391744047072e-05, + "loss": 0.8878, + "step": 4190 + }, + { + "epoch": 0.39, + "learning_rate": 4.806932058472005e-05, + "loss": 1.0382, + "step": 4200 + }, + { + "epoch": 0.39, + "learning_rate": 4.8064723728969386e-05, + "loss": 0.8711, + "step": 4210 + }, + { + "epoch": 0.39, + "learning_rate": 4.806012687321872e-05, + "loss": 0.9117, + "step": 4220 + }, + { + "epoch": 0.39, + "learning_rate": 4.805553001746805e-05, + "loss": 0.9209, + "step": 4230 + }, + { + "epoch": 0.39, + "learning_rate": 4.805093316171739e-05, + "loss": 0.9701, + "step": 4240 + }, + { + "epoch": 0.39, + "learning_rate": 4.8046336305966725e-05, + "loss": 1.0646, + "step": 4250 + }, + { + "epoch": 0.39, + "learning_rate": 4.8041739450216054e-05, + "loss": 0.9933, + "step": 4260 + }, + { + "epoch": 0.39, + "learning_rate": 4.803714259446539e-05, + "loss": 1.0548, + "step": 4270 + }, + { + "epoch": 0.39, + "learning_rate": 4.803254573871472e-05, + "loss": 1.0018, + "step": 4280 + }, + { + "epoch": 0.39, + "learning_rate": 4.802794888296405e-05, + "loss": 0.943, + "step": 4290 + }, + { + "epoch": 0.4, + "learning_rate": 4.802335202721339e-05, + "loss": 0.9669, + "step": 4300 + }, + { + "epoch": 0.4, + "learning_rate": 4.801875517146272e-05, + "loss": 1.0725, + "step": 4310 + }, + { + "epoch": 0.4, + "learning_rate": 4.801415831571205e-05, + "loss": 0.9105, + "step": 4320 + }, + { + "epoch": 0.4, + "learning_rate": 4.800956145996139e-05, + "loss": 0.9969, + "step": 4330 + }, + { + "epoch": 0.4, + "learning_rate": 4.8004964604210726e-05, + "loss": 1.0874, + "step": 4340 + }, + { + "epoch": 0.4, + "learning_rate": 4.8000367748460056e-05, + "loss": 0.9017, + "step": 4350 + }, + { + "epoch": 0.4, + "learning_rate": 4.799577089270939e-05, + "loss": 0.9318, + "step": 4360 + }, + { + "epoch": 0.4, + "learning_rate": 4.799117403695872e-05, + "loss": 0.8452, + "step": 4370 + }, + { + "epoch": 0.4, + "learning_rate": 4.798657718120805e-05, + "loss": 1.0091, + "step": 4380 + }, + { + "epoch": 0.4, + "learning_rate": 4.798198032545739e-05, + "loss": 0.9686, + "step": 4390 + }, + { + "epoch": 0.4, + "learning_rate": 4.7977383469706724e-05, + "loss": 0.9686, + "step": 4400 + }, + { + "epoch": 0.41, + "learning_rate": 4.7972786613956054e-05, + "loss": 0.9917, + "step": 4410 + }, + { + "epoch": 0.41, + "learning_rate": 4.796818975820539e-05, + "loss": 1.0095, + "step": 4420 + }, + { + "epoch": 0.41, + "learning_rate": 4.796359290245472e-05, + "loss": 0.8846, + "step": 4430 + }, + { + "epoch": 0.41, + "learning_rate": 4.795899604670406e-05, + "loss": 0.8445, + "step": 4440 + }, + { + "epoch": 0.41, + "learning_rate": 4.795439919095339e-05, + "loss": 0.9852, + "step": 4450 + }, + { + "epoch": 0.41, + "learning_rate": 4.794980233520272e-05, + "loss": 1.0499, + "step": 4460 + }, + { + "epoch": 0.41, + "learning_rate": 4.794520547945205e-05, + "loss": 0.9371, + "step": 4470 + }, + { + "epoch": 0.41, + "learning_rate": 4.794060862370139e-05, + "loss": 0.9536, + "step": 4480 + }, + { + "epoch": 0.41, + "learning_rate": 4.7936011767950726e-05, + "loss": 0.878, + "step": 4490 + }, + { + "epoch": 0.41, + "learning_rate": 4.7931414912200055e-05, + "loss": 1.0025, + "step": 4500 + }, + { + "epoch": 0.41, + "learning_rate": 4.792681805644939e-05, + "loss": 0.9968, + "step": 4510 + }, + { + "epoch": 0.42, + "learning_rate": 4.792222120069872e-05, + "loss": 0.972, + "step": 4520 + }, + { + "epoch": 0.42, + "learning_rate": 4.791762434494806e-05, + "loss": 1.0427, + "step": 4530 + }, + { + "epoch": 0.42, + "learning_rate": 4.7913027489197395e-05, + "loss": 1.0409, + "step": 4540 + }, + { + "epoch": 0.42, + "learning_rate": 4.7908430633446724e-05, + "loss": 0.9942, + "step": 4550 + }, + { + "epoch": 0.42, + "learning_rate": 4.7903833777696054e-05, + "loss": 0.989, + "step": 4560 + }, + { + "epoch": 0.42, + "learning_rate": 4.789923692194539e-05, + "loss": 0.9263, + "step": 4570 + }, + { + "epoch": 0.42, + "learning_rate": 4.789464006619473e-05, + "loss": 0.9706, + "step": 4580 + }, + { + "epoch": 0.42, + "learning_rate": 4.7890043210444057e-05, + "loss": 0.9298, + "step": 4590 + }, + { + "epoch": 0.42, + "learning_rate": 4.788544635469339e-05, + "loss": 0.7919, + "step": 4600 + }, + { + "epoch": 0.42, + "learning_rate": 4.788084949894272e-05, + "loss": 0.9605, + "step": 4610 + }, + { + "epoch": 0.42, + "learning_rate": 4.787625264319206e-05, + "loss": 1.0069, + "step": 4620 + }, + { + "epoch": 0.43, + "learning_rate": 4.7871655787441396e-05, + "loss": 0.9832, + "step": 4630 + }, + { + "epoch": 0.43, + "learning_rate": 4.7867058931690725e-05, + "loss": 0.9184, + "step": 4640 + }, + { + "epoch": 0.43, + "learning_rate": 4.7862462075940055e-05, + "loss": 0.9883, + "step": 4650 + }, + { + "epoch": 0.43, + "learning_rate": 4.785786522018939e-05, + "loss": 0.9971, + "step": 4660 + }, + { + "epoch": 0.43, + "learning_rate": 4.785326836443873e-05, + "loss": 0.9714, + "step": 4670 + }, + { + "epoch": 0.43, + "learning_rate": 4.784867150868806e-05, + "loss": 0.9607, + "step": 4680 + }, + { + "epoch": 0.43, + "learning_rate": 4.7844074652937394e-05, + "loss": 1.0062, + "step": 4690 + }, + { + "epoch": 0.43, + "learning_rate": 4.7839477797186724e-05, + "loss": 0.8788, + "step": 4700 + }, + { + "epoch": 0.43, + "learning_rate": 4.783488094143606e-05, + "loss": 0.9645, + "step": 4710 + }, + { + "epoch": 0.43, + "learning_rate": 4.78302840856854e-05, + "loss": 0.9656, + "step": 4720 + }, + { + "epoch": 0.43, + "learning_rate": 4.782568722993473e-05, + "loss": 0.9687, + "step": 4730 + }, + { + "epoch": 0.44, + "learning_rate": 4.7821090374184056e-05, + "loss": 0.9983, + "step": 4740 + }, + { + "epoch": 0.44, + "learning_rate": 4.781649351843339e-05, + "loss": 0.9814, + "step": 4750 + }, + { + "epoch": 0.44, + "learning_rate": 4.781189666268272e-05, + "loss": 0.899, + "step": 4760 + }, + { + "epoch": 0.44, + "learning_rate": 4.780729980693206e-05, + "loss": 0.9326, + "step": 4770 + }, + { + "epoch": 0.44, + "learning_rate": 4.7802702951181395e-05, + "loss": 0.9149, + "step": 4780 + }, + { + "epoch": 0.44, + "learning_rate": 4.7798106095430725e-05, + "loss": 0.9339, + "step": 4790 + }, + { + "epoch": 0.44, + "learning_rate": 4.779350923968006e-05, + "loss": 0.9642, + "step": 4800 + }, + { + "epoch": 0.44, + "learning_rate": 4.77889123839294e-05, + "loss": 0.9197, + "step": 4810 + }, + { + "epoch": 0.44, + "learning_rate": 4.778431552817873e-05, + "loss": 0.9412, + "step": 4820 + }, + { + "epoch": 0.44, + "learning_rate": 4.777971867242806e-05, + "loss": 0.9661, + "step": 4830 + }, + { + "epoch": 0.44, + "learning_rate": 4.7775121816677394e-05, + "loss": 0.972, + "step": 4840 + }, + { + "epoch": 0.45, + "learning_rate": 4.7770524960926724e-05, + "loss": 0.9623, + "step": 4850 + }, + { + "epoch": 0.45, + "learning_rate": 4.776592810517606e-05, + "loss": 1.0311, + "step": 4860 + }, + { + "epoch": 0.45, + "learning_rate": 4.77613312494254e-05, + "loss": 1.0047, + "step": 4870 + }, + { + "epoch": 0.45, + "learning_rate": 4.7756734393674726e-05, + "loss": 0.9579, + "step": 4880 + }, + { + "epoch": 0.45, + "learning_rate": 4.775213753792406e-05, + "loss": 0.9257, + "step": 4890 + }, + { + "epoch": 0.45, + "learning_rate": 4.77475406821734e-05, + "loss": 0.9912, + "step": 4900 + }, + { + "epoch": 0.45, + "learning_rate": 4.774294382642273e-05, + "loss": 0.8673, + "step": 4910 + }, + { + "epoch": 0.45, + "learning_rate": 4.773834697067206e-05, + "loss": 0.8715, + "step": 4920 + }, + { + "epoch": 0.45, + "learning_rate": 4.7733750114921395e-05, + "loss": 0.8165, + "step": 4930 + }, + { + "epoch": 0.45, + "learning_rate": 4.7729153259170725e-05, + "loss": 0.9617, + "step": 4940 + }, + { + "epoch": 0.46, + "learning_rate": 4.772455640342006e-05, + "loss": 0.9032, + "step": 4950 + }, + { + "epoch": 0.46, + "learning_rate": 4.77199595476694e-05, + "loss": 0.9404, + "step": 4960 + }, + { + "epoch": 0.46, + "learning_rate": 4.771536269191873e-05, + "loss": 0.9519, + "step": 4970 + }, + { + "epoch": 0.46, + "learning_rate": 4.7710765836168064e-05, + "loss": 0.922, + "step": 4980 + }, + { + "epoch": 0.46, + "learning_rate": 4.77061689804174e-05, + "loss": 1.0137, + "step": 4990 + }, + { + "epoch": 0.46, + "learning_rate": 4.770157212466673e-05, + "loss": 0.9427, + "step": 5000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.5388646288209606, + "eval_loss": 0.9456362724304199, + "eval_runtime": 159.6722, + "eval_samples_per_second": 28.684, + "eval_steps_per_second": 3.589, + "step": 5000 + }, + { + "epoch": 0.46, + "learning_rate": 4.769697526891606e-05, + "loss": 0.9568, + "step": 5010 + }, + { + "epoch": 0.46, + "learning_rate": 4.7692378413165396e-05, + "loss": 0.8912, + "step": 5020 + }, + { + "epoch": 0.46, + "learning_rate": 4.7687781557414726e-05, + "loss": 0.9492, + "step": 5030 + }, + { + "epoch": 0.46, + "learning_rate": 4.768318470166406e-05, + "loss": 1.0019, + "step": 5040 + }, + { + "epoch": 0.46, + "learning_rate": 4.76785878459134e-05, + "loss": 0.9071, + "step": 5050 + }, + { + "epoch": 0.47, + "learning_rate": 4.767399099016273e-05, + "loss": 0.9758, + "step": 5060 + }, + { + "epoch": 0.47, + "learning_rate": 4.7669394134412065e-05, + "loss": 0.9423, + "step": 5070 + }, + { + "epoch": 0.47, + "learning_rate": 4.76647972786614e-05, + "loss": 1.0457, + "step": 5080 + }, + { + "epoch": 0.47, + "learning_rate": 4.766020042291073e-05, + "loss": 0.9989, + "step": 5090 + }, + { + "epoch": 0.47, + "learning_rate": 4.765560356716007e-05, + "loss": 0.9192, + "step": 5100 + }, + { + "epoch": 0.47, + "learning_rate": 4.76510067114094e-05, + "loss": 0.907, + "step": 5110 + }, + { + "epoch": 0.47, + "learning_rate": 4.764640985565873e-05, + "loss": 0.9327, + "step": 5120 + }, + { + "epoch": 0.47, + "learning_rate": 4.7641812999908064e-05, + "loss": 0.7998, + "step": 5130 + }, + { + "epoch": 0.47, + "learning_rate": 4.76372161441574e-05, + "loss": 0.8857, + "step": 5140 + }, + { + "epoch": 0.47, + "learning_rate": 4.763261928840673e-05, + "loss": 1.0188, + "step": 5150 + }, + { + "epoch": 0.47, + "learning_rate": 4.7628022432656066e-05, + "loss": 0.9446, + "step": 5160 + }, + { + "epoch": 0.48, + "learning_rate": 4.76234255769054e-05, + "loss": 0.8976, + "step": 5170 + }, + { + "epoch": 0.48, + "learning_rate": 4.761882872115473e-05, + "loss": 1.1064, + "step": 5180 + }, + { + "epoch": 0.48, + "learning_rate": 4.761423186540407e-05, + "loss": 0.9045, + "step": 5190 + }, + { + "epoch": 0.48, + "learning_rate": 4.76096350096534e-05, + "loss": 0.9362, + "step": 5200 + }, + { + "epoch": 0.48, + "learning_rate": 4.760503815390273e-05, + "loss": 0.9063, + "step": 5210 + }, + { + "epoch": 0.48, + "learning_rate": 4.7600441298152065e-05, + "loss": 0.9325, + "step": 5220 + }, + { + "epoch": 0.48, + "learning_rate": 4.75958444424014e-05, + "loss": 0.9443, + "step": 5230 + }, + { + "epoch": 0.48, + "learning_rate": 4.759124758665073e-05, + "loss": 0.9077, + "step": 5240 + }, + { + "epoch": 0.48, + "learning_rate": 4.758665073090007e-05, + "loss": 0.9318, + "step": 5250 + }, + { + "epoch": 0.48, + "learning_rate": 4.7582053875149404e-05, + "loss": 0.9981, + "step": 5260 + }, + { + "epoch": 0.48, + "learning_rate": 4.7577457019398734e-05, + "loss": 1.0895, + "step": 5270 + }, + { + "epoch": 0.49, + "learning_rate": 4.757286016364807e-05, + "loss": 0.9403, + "step": 5280 + }, + { + "epoch": 0.49, + "learning_rate": 4.75682633078974e-05, + "loss": 0.993, + "step": 5290 + }, + { + "epoch": 0.49, + "learning_rate": 4.756366645214673e-05, + "loss": 0.9024, + "step": 5300 + }, + { + "epoch": 0.49, + "learning_rate": 4.7559069596396066e-05, + "loss": 0.8814, + "step": 5310 + }, + { + "epoch": 0.49, + "learning_rate": 4.75544727406454e-05, + "loss": 0.9604, + "step": 5320 + }, + { + "epoch": 0.49, + "learning_rate": 4.754987588489473e-05, + "loss": 0.9268, + "step": 5330 + }, + { + "epoch": 0.49, + "learning_rate": 4.754527902914407e-05, + "loss": 0.9647, + "step": 5340 + }, + { + "epoch": 0.49, + "learning_rate": 4.7540682173393405e-05, + "loss": 0.8258, + "step": 5350 + }, + { + "epoch": 0.49, + "learning_rate": 4.7536085317642735e-05, + "loss": 0.9386, + "step": 5360 + }, + { + "epoch": 0.49, + "learning_rate": 4.753148846189207e-05, + "loss": 0.9623, + "step": 5370 + }, + { + "epoch": 0.49, + "learning_rate": 4.75268916061414e-05, + "loss": 0.9751, + "step": 5380 + }, + { + "epoch": 0.5, + "learning_rate": 4.752229475039073e-05, + "loss": 0.9381, + "step": 5390 + }, + { + "epoch": 0.5, + "learning_rate": 4.751769789464007e-05, + "loss": 1.043, + "step": 5400 + }, + { + "epoch": 0.5, + "learning_rate": 4.7513101038889404e-05, + "loss": 0.888, + "step": 5410 + }, + { + "epoch": 0.5, + "learning_rate": 4.7508504183138734e-05, + "loss": 1.0717, + "step": 5420 + }, + { + "epoch": 0.5, + "learning_rate": 4.750390732738807e-05, + "loss": 0.9471, + "step": 5430 + }, + { + "epoch": 0.5, + "learning_rate": 4.7499310471637406e-05, + "loss": 0.8793, + "step": 5440 + }, + { + "epoch": 0.5, + "learning_rate": 4.7494713615886736e-05, + "loss": 0.9956, + "step": 5450 + }, + { + "epoch": 0.5, + "learning_rate": 4.749011676013607e-05, + "loss": 0.9041, + "step": 5460 + }, + { + "epoch": 0.5, + "learning_rate": 4.74855199043854e-05, + "loss": 0.9561, + "step": 5470 + }, + { + "epoch": 0.5, + "learning_rate": 4.748092304863473e-05, + "loss": 0.9218, + "step": 5480 + }, + { + "epoch": 0.5, + "learning_rate": 4.747632619288407e-05, + "loss": 0.9512, + "step": 5490 + }, + { + "epoch": 0.51, + "learning_rate": 4.7471729337133405e-05, + "loss": 0.9194, + "step": 5500 + }, + { + "epoch": 0.51, + "learning_rate": 4.7467132481382735e-05, + "loss": 0.8455, + "step": 5510 + }, + { + "epoch": 0.51, + "learning_rate": 4.746253562563207e-05, + "loss": 0.8755, + "step": 5520 + }, + { + "epoch": 0.51, + "learning_rate": 4.745793876988141e-05, + "loss": 0.9611, + "step": 5530 + }, + { + "epoch": 0.51, + "learning_rate": 4.745334191413074e-05, + "loss": 0.9257, + "step": 5540 + }, + { + "epoch": 0.51, + "learning_rate": 4.7448745058380074e-05, + "loss": 0.95, + "step": 5550 + }, + { + "epoch": 0.51, + "learning_rate": 4.7444148202629404e-05, + "loss": 0.9126, + "step": 5560 + }, + { + "epoch": 0.51, + "learning_rate": 4.743955134687873e-05, + "loss": 0.96, + "step": 5570 + }, + { + "epoch": 0.51, + "learning_rate": 4.743495449112807e-05, + "loss": 0.981, + "step": 5580 + }, + { + "epoch": 0.51, + "learning_rate": 4.7430357635377406e-05, + "loss": 0.8837, + "step": 5590 + }, + { + "epoch": 0.51, + "learning_rate": 4.7425760779626736e-05, + "loss": 0.8876, + "step": 5600 + }, + { + "epoch": 0.52, + "learning_rate": 4.742116392387607e-05, + "loss": 0.9981, + "step": 5610 + }, + { + "epoch": 0.52, + "learning_rate": 4.741656706812541e-05, + "loss": 0.8476, + "step": 5620 + }, + { + "epoch": 0.52, + "learning_rate": 4.741197021237474e-05, + "loss": 0.9011, + "step": 5630 + }, + { + "epoch": 0.52, + "learning_rate": 4.7407373356624075e-05, + "loss": 1.0251, + "step": 5640 + }, + { + "epoch": 0.52, + "learning_rate": 4.7402776500873405e-05, + "loss": 1.0021, + "step": 5650 + }, + { + "epoch": 0.52, + "learning_rate": 4.7398179645122734e-05, + "loss": 1.0356, + "step": 5660 + }, + { + "epoch": 0.52, + "learning_rate": 4.739358278937207e-05, + "loss": 1.016, + "step": 5670 + }, + { + "epoch": 0.52, + "learning_rate": 4.738898593362141e-05, + "loss": 0.9401, + "step": 5680 + }, + { + "epoch": 0.52, + "learning_rate": 4.738438907787074e-05, + "loss": 0.9111, + "step": 5690 + }, + { + "epoch": 0.52, + "learning_rate": 4.7379792222120074e-05, + "loss": 0.9158, + "step": 5700 + }, + { + "epoch": 0.52, + "learning_rate": 4.737519536636941e-05, + "loss": 0.7603, + "step": 5710 + }, + { + "epoch": 0.53, + "learning_rate": 4.737059851061874e-05, + "loss": 0.8553, + "step": 5720 + }, + { + "epoch": 0.53, + "learning_rate": 4.7366001654868076e-05, + "loss": 0.8371, + "step": 5730 + }, + { + "epoch": 0.53, + "learning_rate": 4.7361404799117406e-05, + "loss": 1.0102, + "step": 5740 + }, + { + "epoch": 0.53, + "learning_rate": 4.7356807943366736e-05, + "loss": 0.9413, + "step": 5750 + }, + { + "epoch": 0.53, + "learning_rate": 4.735221108761607e-05, + "loss": 0.9853, + "step": 5760 + }, + { + "epoch": 0.53, + "learning_rate": 4.734761423186541e-05, + "loss": 0.9161, + "step": 5770 + }, + { + "epoch": 0.53, + "learning_rate": 4.734301737611474e-05, + "loss": 0.9149, + "step": 5780 + }, + { + "epoch": 0.53, + "learning_rate": 4.7338420520364075e-05, + "loss": 0.8176, + "step": 5790 + }, + { + "epoch": 0.53, + "learning_rate": 4.733382366461341e-05, + "loss": 0.9692, + "step": 5800 + }, + { + "epoch": 0.53, + "learning_rate": 4.732922680886274e-05, + "loss": 0.9556, + "step": 5810 + }, + { + "epoch": 0.54, + "learning_rate": 4.732462995311208e-05, + "loss": 0.8998, + "step": 5820 + }, + { + "epoch": 0.54, + "learning_rate": 4.732003309736141e-05, + "loss": 0.9182, + "step": 5830 + }, + { + "epoch": 0.54, + "learning_rate": 4.731543624161074e-05, + "loss": 0.9012, + "step": 5840 + }, + { + "epoch": 0.54, + "learning_rate": 4.731083938586007e-05, + "loss": 0.845, + "step": 5850 + }, + { + "epoch": 0.54, + "learning_rate": 4.730624253010941e-05, + "loss": 0.8313, + "step": 5860 + }, + { + "epoch": 0.54, + "learning_rate": 4.730164567435874e-05, + "loss": 0.8624, + "step": 5870 + }, + { + "epoch": 0.54, + "learning_rate": 4.7297048818608076e-05, + "loss": 1.0908, + "step": 5880 + }, + { + "epoch": 0.54, + "learning_rate": 4.729245196285741e-05, + "loss": 0.9056, + "step": 5890 + }, + { + "epoch": 0.54, + "learning_rate": 4.728785510710674e-05, + "loss": 0.8573, + "step": 5900 + }, + { + "epoch": 0.54, + "learning_rate": 4.728325825135608e-05, + "loss": 0.9547, + "step": 5910 + }, + { + "epoch": 0.54, + "learning_rate": 4.727866139560541e-05, + "loss": 0.8992, + "step": 5920 + }, + { + "epoch": 0.55, + "learning_rate": 4.727406453985474e-05, + "loss": 0.9014, + "step": 5930 + }, + { + "epoch": 0.55, + "learning_rate": 4.7269467684104075e-05, + "loss": 0.896, + "step": 5940 + }, + { + "epoch": 0.55, + "learning_rate": 4.726487082835341e-05, + "loss": 0.9668, + "step": 5950 + }, + { + "epoch": 0.55, + "learning_rate": 4.726027397260274e-05, + "loss": 1.0132, + "step": 5960 + }, + { + "epoch": 0.55, + "learning_rate": 4.725567711685208e-05, + "loss": 0.8343, + "step": 5970 + }, + { + "epoch": 0.55, + "learning_rate": 4.7251080261101414e-05, + "loss": 0.9907, + "step": 5980 + }, + { + "epoch": 0.55, + "learning_rate": 4.724648340535074e-05, + "loss": 0.877, + "step": 5990 + }, + { + "epoch": 0.55, + "learning_rate": 4.724188654960008e-05, + "loss": 0.9711, + "step": 6000 + }, + { + "epoch": 0.55, + "learning_rate": 4.723728969384941e-05, + "loss": 0.9057, + "step": 6010 + }, + { + "epoch": 0.55, + "learning_rate": 4.723269283809874e-05, + "loss": 0.8324, + "step": 6020 + }, + { + "epoch": 0.55, + "learning_rate": 4.7228095982348076e-05, + "loss": 0.9007, + "step": 6030 + }, + { + "epoch": 0.56, + "learning_rate": 4.722349912659741e-05, + "loss": 0.9415, + "step": 6040 + }, + { + "epoch": 0.56, + "learning_rate": 4.721890227084674e-05, + "loss": 0.8663, + "step": 6050 + }, + { + "epoch": 0.56, + "learning_rate": 4.721430541509608e-05, + "loss": 0.936, + "step": 6060 + }, + { + "epoch": 0.56, + "learning_rate": 4.720970855934541e-05, + "loss": 0.9742, + "step": 6070 + }, + { + "epoch": 0.56, + "learning_rate": 4.7205111703594745e-05, + "loss": 1.0575, + "step": 6080 + }, + { + "epoch": 0.56, + "learning_rate": 4.720051484784408e-05, + "loss": 0.9156, + "step": 6090 + }, + { + "epoch": 0.56, + "learning_rate": 4.719591799209341e-05, + "loss": 0.9435, + "step": 6100 + }, + { + "epoch": 0.56, + "learning_rate": 4.719132113634274e-05, + "loss": 0.8532, + "step": 6110 + }, + { + "epoch": 0.56, + "learning_rate": 4.718672428059208e-05, + "loss": 0.9434, + "step": 6120 + }, + { + "epoch": 0.56, + "learning_rate": 4.718212742484141e-05, + "loss": 0.92, + "step": 6130 + }, + { + "epoch": 0.56, + "learning_rate": 4.717753056909074e-05, + "loss": 0.9286, + "step": 6140 + }, + { + "epoch": 0.57, + "learning_rate": 4.717293371334008e-05, + "loss": 0.9871, + "step": 6150 + }, + { + "epoch": 0.57, + "learning_rate": 4.716833685758941e-05, + "loss": 0.8818, + "step": 6160 + }, + { + "epoch": 0.57, + "learning_rate": 4.7163740001838746e-05, + "loss": 0.8258, + "step": 6170 + }, + { + "epoch": 0.57, + "learning_rate": 4.715914314608808e-05, + "loss": 0.9504, + "step": 6180 + }, + { + "epoch": 0.57, + "learning_rate": 4.715454629033741e-05, + "loss": 0.8188, + "step": 6190 + }, + { + "epoch": 0.57, + "learning_rate": 4.714994943458674e-05, + "loss": 1.0046, + "step": 6200 + }, + { + "epoch": 0.57, + "learning_rate": 4.714535257883608e-05, + "loss": 0.9748, + "step": 6210 + }, + { + "epoch": 0.57, + "learning_rate": 4.7140755723085415e-05, + "loss": 1.0456, + "step": 6220 + }, + { + "epoch": 0.57, + "learning_rate": 4.7136158867334744e-05, + "loss": 0.9625, + "step": 6230 + }, + { + "epoch": 0.57, + "learning_rate": 4.713156201158408e-05, + "loss": 0.8917, + "step": 6240 + }, + { + "epoch": 0.57, + "learning_rate": 4.712696515583341e-05, + "loss": 0.9744, + "step": 6250 + }, + { + "epoch": 0.58, + "learning_rate": 4.712236830008275e-05, + "loss": 0.849, + "step": 6260 + }, + { + "epoch": 0.58, + "learning_rate": 4.7117771444332083e-05, + "loss": 0.9506, + "step": 6270 + }, + { + "epoch": 0.58, + "learning_rate": 4.711317458858141e-05, + "loss": 0.8776, + "step": 6280 + }, + { + "epoch": 0.58, + "learning_rate": 4.710857773283074e-05, + "loss": 0.874, + "step": 6290 + }, + { + "epoch": 0.58, + "learning_rate": 4.710398087708008e-05, + "loss": 0.8853, + "step": 6300 + }, + { + "epoch": 0.58, + "learning_rate": 4.7099384021329416e-05, + "loss": 1.0371, + "step": 6310 + }, + { + "epoch": 0.58, + "learning_rate": 4.7094787165578745e-05, + "loss": 0.9104, + "step": 6320 + }, + { + "epoch": 0.58, + "learning_rate": 4.709019030982808e-05, + "loss": 0.8479, + "step": 6330 + }, + { + "epoch": 0.58, + "learning_rate": 4.708559345407741e-05, + "loss": 0.8877, + "step": 6340 + }, + { + "epoch": 0.58, + "learning_rate": 4.708099659832675e-05, + "loss": 0.9463, + "step": 6350 + }, + { + "epoch": 0.58, + "learning_rate": 4.7076399742576085e-05, + "loss": 0.9059, + "step": 6360 + }, + { + "epoch": 0.59, + "learning_rate": 4.7071802886825414e-05, + "loss": 1.0372, + "step": 6370 + }, + { + "epoch": 0.59, + "learning_rate": 4.7067206031074744e-05, + "loss": 0.8248, + "step": 6380 + }, + { + "epoch": 0.59, + "learning_rate": 4.706260917532408e-05, + "loss": 0.8908, + "step": 6390 + }, + { + "epoch": 0.59, + "learning_rate": 4.705801231957341e-05, + "loss": 1.002, + "step": 6400 + }, + { + "epoch": 0.59, + "learning_rate": 4.705341546382275e-05, + "loss": 1.0619, + "step": 6410 + }, + { + "epoch": 0.59, + "learning_rate": 4.704881860807208e-05, + "loss": 0.9716, + "step": 6420 + }, + { + "epoch": 0.59, + "learning_rate": 4.704422175232141e-05, + "loss": 0.9142, + "step": 6430 + }, + { + "epoch": 0.59, + "learning_rate": 4.703962489657075e-05, + "loss": 0.9567, + "step": 6440 + }, + { + "epoch": 0.59, + "learning_rate": 4.7035028040820086e-05, + "loss": 0.8968, + "step": 6450 + }, + { + "epoch": 0.59, + "learning_rate": 4.7030431185069416e-05, + "loss": 0.7701, + "step": 6460 + }, + { + "epoch": 0.59, + "learning_rate": 4.7025834329318745e-05, + "loss": 0.8457, + "step": 6470 + }, + { + "epoch": 0.6, + "learning_rate": 4.702123747356808e-05, + "loss": 0.7709, + "step": 6480 + }, + { + "epoch": 0.6, + "learning_rate": 4.701664061781741e-05, + "loss": 0.9908, + "step": 6490 + }, + { + "epoch": 0.6, + "learning_rate": 4.701204376206675e-05, + "loss": 1.0772, + "step": 6500 + }, + { + "epoch": 0.6, + "learning_rate": 4.7007446906316084e-05, + "loss": 0.8958, + "step": 6510 + }, + { + "epoch": 0.6, + "learning_rate": 4.7002850050565414e-05, + "loss": 0.9754, + "step": 6520 + }, + { + "epoch": 0.6, + "learning_rate": 4.699825319481475e-05, + "loss": 1.0278, + "step": 6530 + }, + { + "epoch": 0.6, + "learning_rate": 4.699365633906409e-05, + "loss": 0.9408, + "step": 6540 + }, + { + "epoch": 0.6, + "learning_rate": 4.698905948331342e-05, + "loss": 0.9297, + "step": 6550 + }, + { + "epoch": 0.6, + "learning_rate": 4.6984462627562746e-05, + "loss": 1.0081, + "step": 6560 + }, + { + "epoch": 0.6, + "learning_rate": 4.697986577181208e-05, + "loss": 0.8354, + "step": 6570 + }, + { + "epoch": 0.6, + "learning_rate": 4.697526891606141e-05, + "loss": 0.8158, + "step": 6580 + }, + { + "epoch": 0.61, + "learning_rate": 4.697067206031075e-05, + "loss": 0.7737, + "step": 6590 + }, + { + "epoch": 0.61, + "learning_rate": 4.6966075204560086e-05, + "loss": 0.8756, + "step": 6600 + }, + { + "epoch": 0.61, + "learning_rate": 4.6961478348809415e-05, + "loss": 1.0093, + "step": 6610 + }, + { + "epoch": 0.61, + "learning_rate": 4.695688149305875e-05, + "loss": 0.9151, + "step": 6620 + }, + { + "epoch": 0.61, + "learning_rate": 4.695228463730809e-05, + "loss": 0.9182, + "step": 6630 + }, + { + "epoch": 0.61, + "learning_rate": 4.694768778155742e-05, + "loss": 0.9575, + "step": 6640 + }, + { + "epoch": 0.61, + "learning_rate": 4.694309092580675e-05, + "loss": 0.8997, + "step": 6650 + }, + { + "epoch": 0.61, + "learning_rate": 4.6938494070056084e-05, + "loss": 1.0191, + "step": 6660 + }, + { + "epoch": 0.61, + "learning_rate": 4.6933897214305414e-05, + "loss": 0.921, + "step": 6670 + }, + { + "epoch": 0.61, + "learning_rate": 4.692930035855475e-05, + "loss": 0.9304, + "step": 6680 + }, + { + "epoch": 0.62, + "learning_rate": 4.692470350280409e-05, + "loss": 0.9434, + "step": 6690 + }, + { + "epoch": 0.62, + "learning_rate": 4.6920106647053416e-05, + "loss": 1.03, + "step": 6700 + }, + { + "epoch": 0.62, + "learning_rate": 4.691550979130275e-05, + "loss": 0.993, + "step": 6710 + }, + { + "epoch": 0.62, + "learning_rate": 4.691091293555209e-05, + "loss": 0.9933, + "step": 6720 + }, + { + "epoch": 0.62, + "learning_rate": 4.690631607980141e-05, + "loss": 0.8856, + "step": 6730 + }, + { + "epoch": 0.62, + "learning_rate": 4.690171922405075e-05, + "loss": 0.9776, + "step": 6740 + }, + { + "epoch": 0.62, + "learning_rate": 4.6897122368300085e-05, + "loss": 0.9305, + "step": 6750 + }, + { + "epoch": 0.62, + "learning_rate": 4.6892525512549415e-05, + "loss": 0.8587, + "step": 6760 + }, + { + "epoch": 0.62, + "learning_rate": 4.688792865679875e-05, + "loss": 0.8586, + "step": 6770 + }, + { + "epoch": 0.62, + "learning_rate": 4.688333180104809e-05, + "loss": 0.8837, + "step": 6780 + }, + { + "epoch": 0.62, + "learning_rate": 4.687873494529742e-05, + "loss": 0.9914, + "step": 6790 + }, + { + "epoch": 0.63, + "learning_rate": 4.6874138089546754e-05, + "loss": 0.8536, + "step": 6800 + }, + { + "epoch": 0.63, + "learning_rate": 4.686954123379609e-05, + "loss": 1.0221, + "step": 6810 + }, + { + "epoch": 0.63, + "learning_rate": 4.6864944378045414e-05, + "loss": 0.8317, + "step": 6820 + }, + { + "epoch": 0.63, + "learning_rate": 4.686034752229475e-05, + "loss": 0.8937, + "step": 6830 + }, + { + "epoch": 0.63, + "learning_rate": 4.6855750666544086e-05, + "loss": 0.9584, + "step": 6840 + }, + { + "epoch": 0.63, + "learning_rate": 4.6851153810793416e-05, + "loss": 0.8973, + "step": 6850 + }, + { + "epoch": 0.63, + "learning_rate": 4.684655695504275e-05, + "loss": 0.8727, + "step": 6860 + }, + { + "epoch": 0.63, + "learning_rate": 4.684196009929209e-05, + "loss": 0.8199, + "step": 6870 + }, + { + "epoch": 0.63, + "learning_rate": 4.683736324354142e-05, + "loss": 0.9241, + "step": 6880 + }, + { + "epoch": 0.63, + "learning_rate": 4.6832766387790755e-05, + "loss": 0.8119, + "step": 6890 + }, + { + "epoch": 0.63, + "learning_rate": 4.682816953204009e-05, + "loss": 0.9161, + "step": 6900 + }, + { + "epoch": 0.64, + "learning_rate": 4.6823572676289415e-05, + "loss": 0.9221, + "step": 6910 + }, + { + "epoch": 0.64, + "learning_rate": 4.681897582053875e-05, + "loss": 0.9031, + "step": 6920 + }, + { + "epoch": 0.64, + "learning_rate": 4.681437896478809e-05, + "loss": 0.9187, + "step": 6930 + }, + { + "epoch": 0.64, + "learning_rate": 4.680978210903742e-05, + "loss": 0.8815, + "step": 6940 + }, + { + "epoch": 0.64, + "learning_rate": 4.6805185253286754e-05, + "loss": 1.0057, + "step": 6950 + }, + { + "epoch": 0.64, + "learning_rate": 4.680058839753609e-05, + "loss": 0.9811, + "step": 6960 + }, + { + "epoch": 0.64, + "learning_rate": 4.679599154178542e-05, + "loss": 0.9313, + "step": 6970 + }, + { + "epoch": 0.64, + "learning_rate": 4.6791394686034756e-05, + "loss": 0.9516, + "step": 6980 + }, + { + "epoch": 0.64, + "learning_rate": 4.678679783028409e-05, + "loss": 0.9074, + "step": 6990 + }, + { + "epoch": 0.64, + "learning_rate": 4.6782200974533416e-05, + "loss": 0.9564, + "step": 7000 + }, + { + "epoch": 0.64, + "learning_rate": 4.677760411878275e-05, + "loss": 0.8665, + "step": 7010 + }, + { + "epoch": 0.65, + "learning_rate": 4.677300726303209e-05, + "loss": 0.9682, + "step": 7020 + }, + { + "epoch": 0.65, + "learning_rate": 4.676841040728142e-05, + "loss": 0.901, + "step": 7030 + }, + { + "epoch": 0.65, + "learning_rate": 4.6763813551530755e-05, + "loss": 0.9427, + "step": 7040 + }, + { + "epoch": 0.65, + "learning_rate": 4.675921669578009e-05, + "loss": 0.9436, + "step": 7050 + }, + { + "epoch": 0.65, + "learning_rate": 4.675461984002942e-05, + "loss": 0.9431, + "step": 7060 + }, + { + "epoch": 0.65, + "learning_rate": 4.675002298427876e-05, + "loss": 0.9188, + "step": 7070 + }, + { + "epoch": 0.65, + "learning_rate": 4.6745426128528094e-05, + "loss": 0.9387, + "step": 7080 + }, + { + "epoch": 0.65, + "learning_rate": 4.674082927277742e-05, + "loss": 0.7898, + "step": 7090 + }, + { + "epoch": 0.65, + "learning_rate": 4.6736232417026754e-05, + "loss": 1.1022, + "step": 7100 + }, + { + "epoch": 0.65, + "learning_rate": 4.673163556127609e-05, + "loss": 1.0154, + "step": 7110 + }, + { + "epoch": 0.65, + "learning_rate": 4.672703870552542e-05, + "loss": 0.9514, + "step": 7120 + }, + { + "epoch": 0.66, + "learning_rate": 4.6722441849774756e-05, + "loss": 0.852, + "step": 7130 + }, + { + "epoch": 0.66, + "learning_rate": 4.671784499402409e-05, + "loss": 0.8602, + "step": 7140 + }, + { + "epoch": 0.66, + "learning_rate": 4.671324813827342e-05, + "loss": 0.9013, + "step": 7150 + }, + { + "epoch": 0.66, + "learning_rate": 4.670865128252276e-05, + "loss": 0.7947, + "step": 7160 + }, + { + "epoch": 0.66, + "learning_rate": 4.6704054426772095e-05, + "loss": 0.9176, + "step": 7170 + }, + { + "epoch": 0.66, + "learning_rate": 4.669945757102142e-05, + "loss": 0.8809, + "step": 7180 + }, + { + "epoch": 0.66, + "learning_rate": 4.6694860715270755e-05, + "loss": 0.8874, + "step": 7190 + }, + { + "epoch": 0.66, + "learning_rate": 4.669026385952009e-05, + "loss": 0.9915, + "step": 7200 + }, + { + "epoch": 0.66, + "learning_rate": 4.668566700376942e-05, + "loss": 0.9328, + "step": 7210 + }, + { + "epoch": 0.66, + "learning_rate": 4.668107014801876e-05, + "loss": 0.8961, + "step": 7220 + }, + { + "epoch": 0.66, + "learning_rate": 4.6676473292268094e-05, + "loss": 1.0482, + "step": 7230 + }, + { + "epoch": 0.67, + "learning_rate": 4.6671876436517424e-05, + "loss": 0.9711, + "step": 7240 + }, + { + "epoch": 0.67, + "learning_rate": 4.666727958076676e-05, + "loss": 1.0289, + "step": 7250 + }, + { + "epoch": 0.67, + "learning_rate": 4.6662682725016097e-05, + "loss": 0.9586, + "step": 7260 + }, + { + "epoch": 0.67, + "learning_rate": 4.665808586926542e-05, + "loss": 0.9178, + "step": 7270 + }, + { + "epoch": 0.67, + "learning_rate": 4.6653489013514756e-05, + "loss": 0.8158, + "step": 7280 + }, + { + "epoch": 0.67, + "learning_rate": 4.664889215776409e-05, + "loss": 0.9513, + "step": 7290 + }, + { + "epoch": 0.67, + "learning_rate": 4.664429530201342e-05, + "loss": 0.8031, + "step": 7300 + }, + { + "epoch": 0.67, + "learning_rate": 4.663969844626276e-05, + "loss": 0.8974, + "step": 7310 + }, + { + "epoch": 0.67, + "learning_rate": 4.6635101590512095e-05, + "loss": 0.9998, + "step": 7320 + }, + { + "epoch": 0.67, + "learning_rate": 4.6630504734761425e-05, + "loss": 0.9037, + "step": 7330 + }, + { + "epoch": 0.67, + "learning_rate": 4.662590787901076e-05, + "loss": 0.9054, + "step": 7340 + }, + { + "epoch": 0.68, + "learning_rate": 4.66213110232601e-05, + "loss": 0.999, + "step": 7350 + }, + { + "epoch": 0.68, + "learning_rate": 4.661671416750942e-05, + "loss": 0.953, + "step": 7360 + }, + { + "epoch": 0.68, + "learning_rate": 4.661211731175876e-05, + "loss": 0.926, + "step": 7370 + }, + { + "epoch": 0.68, + "learning_rate": 4.6607520456008094e-05, + "loss": 0.9692, + "step": 7380 + }, + { + "epoch": 0.68, + "learning_rate": 4.660292360025742e-05, + "loss": 0.9667, + "step": 7390 + }, + { + "epoch": 0.68, + "learning_rate": 4.659832674450676e-05, + "loss": 0.7981, + "step": 7400 + }, + { + "epoch": 0.68, + "learning_rate": 4.6593729888756096e-05, + "loss": 1.0221, + "step": 7410 + }, + { + "epoch": 0.68, + "learning_rate": 4.6589133033005426e-05, + "loss": 1.0226, + "step": 7420 + }, + { + "epoch": 0.68, + "learning_rate": 4.658453617725476e-05, + "loss": 0.9383, + "step": 7430 + }, + { + "epoch": 0.68, + "learning_rate": 4.65799393215041e-05, + "loss": 0.9596, + "step": 7440 + }, + { + "epoch": 0.68, + "learning_rate": 4.657534246575342e-05, + "loss": 0.8783, + "step": 7450 + }, + { + "epoch": 0.69, + "learning_rate": 4.657074561000276e-05, + "loss": 0.908, + "step": 7460 + }, + { + "epoch": 0.69, + "learning_rate": 4.6566148754252095e-05, + "loss": 1.0087, + "step": 7470 + }, + { + "epoch": 0.69, + "learning_rate": 4.6561551898501425e-05, + "loss": 0.887, + "step": 7480 + }, + { + "epoch": 0.69, + "learning_rate": 4.655695504275076e-05, + "loss": 0.9528, + "step": 7490 + }, + { + "epoch": 0.69, + "learning_rate": 4.65523581870001e-05, + "loss": 0.9504, + "step": 7500 + }, + { + "epoch": 0.69, + "learning_rate": 4.654776133124943e-05, + "loss": 1.0215, + "step": 7510 + }, + { + "epoch": 0.69, + "learning_rate": 4.6543164475498764e-05, + "loss": 0.9759, + "step": 7520 + }, + { + "epoch": 0.69, + "learning_rate": 4.65385676197481e-05, + "loss": 0.9513, + "step": 7530 + }, + { + "epoch": 0.69, + "learning_rate": 4.653397076399742e-05, + "loss": 0.9184, + "step": 7540 + }, + { + "epoch": 0.69, + "learning_rate": 4.652937390824676e-05, + "loss": 1.0114, + "step": 7550 + }, + { + "epoch": 0.7, + "learning_rate": 4.6524777052496096e-05, + "loss": 0.8216, + "step": 7560 + }, + { + "epoch": 0.7, + "learning_rate": 4.6520180196745426e-05, + "loss": 0.8299, + "step": 7570 + }, + { + "epoch": 0.7, + "learning_rate": 4.651558334099476e-05, + "loss": 0.8972, + "step": 7580 + }, + { + "epoch": 0.7, + "learning_rate": 4.65109864852441e-05, + "loss": 0.7963, + "step": 7590 + }, + { + "epoch": 0.7, + "learning_rate": 4.650638962949343e-05, + "loss": 1.0112, + "step": 7600 + }, + { + "epoch": 0.7, + "learning_rate": 4.6501792773742765e-05, + "loss": 0.7989, + "step": 7610 + }, + { + "epoch": 0.7, + "learning_rate": 4.64971959179921e-05, + "loss": 0.9572, + "step": 7620 + }, + { + "epoch": 0.7, + "learning_rate": 4.6492599062241424e-05, + "loss": 1.0296, + "step": 7630 + }, + { + "epoch": 0.7, + "learning_rate": 4.648800220649076e-05, + "loss": 0.9615, + "step": 7640 + }, + { + "epoch": 0.7, + "learning_rate": 4.64834053507401e-05, + "loss": 0.9038, + "step": 7650 + }, + { + "epoch": 0.7, + "learning_rate": 4.647880849498943e-05, + "loss": 0.9469, + "step": 7660 + }, + { + "epoch": 0.71, + "learning_rate": 4.6474211639238763e-05, + "loss": 0.9469, + "step": 7670 + }, + { + "epoch": 0.71, + "learning_rate": 4.64696147834881e-05, + "loss": 0.835, + "step": 7680 + }, + { + "epoch": 0.71, + "learning_rate": 4.646501792773743e-05, + "loss": 1.0294, + "step": 7690 + }, + { + "epoch": 0.71, + "learning_rate": 4.6460421071986766e-05, + "loss": 0.9579, + "step": 7700 + }, + { + "epoch": 0.71, + "learning_rate": 4.6455824216236096e-05, + "loss": 0.9462, + "step": 7710 + }, + { + "epoch": 0.71, + "learning_rate": 4.6451227360485425e-05, + "loss": 0.9222, + "step": 7720 + }, + { + "epoch": 0.71, + "learning_rate": 4.644663050473476e-05, + "loss": 0.9099, + "step": 7730 + }, + { + "epoch": 0.71, + "learning_rate": 4.64420336489841e-05, + "loss": 0.8329, + "step": 7740 + }, + { + "epoch": 0.71, + "learning_rate": 4.643743679323343e-05, + "loss": 0.8279, + "step": 7750 + }, + { + "epoch": 0.71, + "learning_rate": 4.6432839937482765e-05, + "loss": 0.9033, + "step": 7760 + }, + { + "epoch": 0.71, + "learning_rate": 4.64282430817321e-05, + "loss": 1.0114, + "step": 7770 + }, + { + "epoch": 0.72, + "learning_rate": 4.642364622598143e-05, + "loss": 0.859, + "step": 7780 + }, + { + "epoch": 0.72, + "learning_rate": 4.641904937023077e-05, + "loss": 0.9119, + "step": 7790 + }, + { + "epoch": 0.72, + "learning_rate": 4.64144525144801e-05, + "loss": 0.956, + "step": 7800 + }, + { + "epoch": 0.72, + "learning_rate": 4.640985565872943e-05, + "loss": 0.8445, + "step": 7810 + }, + { + "epoch": 0.72, + "learning_rate": 4.640525880297876e-05, + "loss": 0.9098, + "step": 7820 + }, + { + "epoch": 0.72, + "learning_rate": 4.64006619472281e-05, + "loss": 0.993, + "step": 7830 + }, + { + "epoch": 0.72, + "learning_rate": 4.639606509147743e-05, + "loss": 0.7811, + "step": 7840 + }, + { + "epoch": 0.72, + "learning_rate": 4.6391468235726766e-05, + "loss": 0.9625, + "step": 7850 + }, + { + "epoch": 0.72, + "learning_rate": 4.63868713799761e-05, + "loss": 0.9788, + "step": 7860 + }, + { + "epoch": 0.72, + "learning_rate": 4.638227452422543e-05, + "loss": 0.9167, + "step": 7870 + }, + { + "epoch": 0.72, + "learning_rate": 4.637767766847477e-05, + "loss": 0.9248, + "step": 7880 + }, + { + "epoch": 0.73, + "learning_rate": 4.63730808127241e-05, + "loss": 0.9081, + "step": 7890 + }, + { + "epoch": 0.73, + "learning_rate": 4.636848395697343e-05, + "loss": 0.8743, + "step": 7900 + }, + { + "epoch": 0.73, + "learning_rate": 4.6363887101222764e-05, + "loss": 0.9266, + "step": 7910 + }, + { + "epoch": 0.73, + "learning_rate": 4.63592902454721e-05, + "loss": 0.9506, + "step": 7920 + }, + { + "epoch": 0.73, + "learning_rate": 4.635469338972143e-05, + "loss": 0.9126, + "step": 7930 + }, + { + "epoch": 0.73, + "learning_rate": 4.635009653397077e-05, + "loss": 0.8768, + "step": 7940 + }, + { + "epoch": 0.73, + "learning_rate": 4.6345499678220103e-05, + "loss": 0.8792, + "step": 7950 + }, + { + "epoch": 0.73, + "learning_rate": 4.634090282246943e-05, + "loss": 0.8548, + "step": 7960 + }, + { + "epoch": 0.73, + "learning_rate": 4.633630596671877e-05, + "loss": 0.8766, + "step": 7970 + }, + { + "epoch": 0.73, + "learning_rate": 4.63317091109681e-05, + "loss": 0.9301, + "step": 7980 + }, + { + "epoch": 0.73, + "learning_rate": 4.632711225521743e-05, + "loss": 0.9481, + "step": 7990 + }, + { + "epoch": 0.74, + "learning_rate": 4.6322515399466766e-05, + "loss": 0.879, + "step": 8000 + }, + { + "epoch": 0.74, + "learning_rate": 4.63179185437161e-05, + "loss": 0.9006, + "step": 8010 + }, + { + "epoch": 0.74, + "learning_rate": 4.631332168796543e-05, + "loss": 0.8532, + "step": 8020 + }, + { + "epoch": 0.74, + "learning_rate": 4.630872483221477e-05, + "loss": 1.0088, + "step": 8030 + }, + { + "epoch": 0.74, + "learning_rate": 4.63041279764641e-05, + "loss": 0.9281, + "step": 8040 + }, + { + "epoch": 0.74, + "learning_rate": 4.6299531120713434e-05, + "loss": 0.9401, + "step": 8050 + }, + { + "epoch": 0.74, + "learning_rate": 4.629493426496277e-05, + "loss": 0.9023, + "step": 8060 + }, + { + "epoch": 0.74, + "learning_rate": 4.62903374092121e-05, + "loss": 0.931, + "step": 8070 + }, + { + "epoch": 0.74, + "learning_rate": 4.628574055346143e-05, + "loss": 0.9297, + "step": 8080 + }, + { + "epoch": 0.74, + "learning_rate": 4.628114369771077e-05, + "loss": 0.9526, + "step": 8090 + }, + { + "epoch": 0.74, + "learning_rate": 4.62765468419601e-05, + "loss": 0.8054, + "step": 8100 + }, + { + "epoch": 0.75, + "learning_rate": 4.627194998620943e-05, + "loss": 0.9754, + "step": 8110 + }, + { + "epoch": 0.75, + "learning_rate": 4.626735313045877e-05, + "loss": 1.0054, + "step": 8120 + }, + { + "epoch": 0.75, + "learning_rate": 4.62627562747081e-05, + "loss": 0.858, + "step": 8130 + }, + { + "epoch": 0.75, + "learning_rate": 4.6258159418957436e-05, + "loss": 0.9574, + "step": 8140 + }, + { + "epoch": 0.75, + "learning_rate": 4.625356256320677e-05, + "loss": 0.7805, + "step": 8150 + }, + { + "epoch": 0.75, + "learning_rate": 4.62489657074561e-05, + "loss": 0.8775, + "step": 8160 + }, + { + "epoch": 0.75, + "learning_rate": 4.624436885170543e-05, + "loss": 0.9306, + "step": 8170 + }, + { + "epoch": 0.75, + "learning_rate": 4.623977199595477e-05, + "loss": 1.0693, + "step": 8180 + }, + { + "epoch": 0.75, + "learning_rate": 4.6235175140204104e-05, + "loss": 0.9722, + "step": 8190 + }, + { + "epoch": 0.75, + "learning_rate": 4.6230578284453434e-05, + "loss": 0.9618, + "step": 8200 + }, + { + "epoch": 0.75, + "learning_rate": 4.622598142870277e-05, + "loss": 0.8457, + "step": 8210 + }, + { + "epoch": 0.76, + "learning_rate": 4.62213845729521e-05, + "loss": 0.9497, + "step": 8220 + }, + { + "epoch": 0.76, + "learning_rate": 4.621678771720144e-05, + "loss": 0.8787, + "step": 8230 + }, + { + "epoch": 0.76, + "learning_rate": 4.621219086145077e-05, + "loss": 0.9033, + "step": 8240 + }, + { + "epoch": 0.76, + "learning_rate": 4.62075940057001e-05, + "loss": 0.8872, + "step": 8250 + }, + { + "epoch": 0.76, + "learning_rate": 4.620299714994943e-05, + "loss": 0.8246, + "step": 8260 + }, + { + "epoch": 0.76, + "learning_rate": 4.619840029419877e-05, + "loss": 0.9612, + "step": 8270 + }, + { + "epoch": 0.76, + "learning_rate": 4.6193803438448106e-05, + "loss": 0.8578, + "step": 8280 + }, + { + "epoch": 0.76, + "learning_rate": 4.6189206582697435e-05, + "loss": 0.9147, + "step": 8290 + }, + { + "epoch": 0.76, + "learning_rate": 4.618460972694677e-05, + "loss": 0.9273, + "step": 8300 + }, + { + "epoch": 0.76, + "learning_rate": 4.61800128711961e-05, + "loss": 0.8135, + "step": 8310 + }, + { + "epoch": 0.76, + "learning_rate": 4.617541601544544e-05, + "loss": 0.8515, + "step": 8320 + }, + { + "epoch": 0.77, + "learning_rate": 4.6170819159694774e-05, + "loss": 0.9319, + "step": 8330 + }, + { + "epoch": 0.77, + "learning_rate": 4.6166222303944104e-05, + "loss": 0.9397, + "step": 8340 + }, + { + "epoch": 0.77, + "learning_rate": 4.6161625448193434e-05, + "loss": 0.8886, + "step": 8350 + }, + { + "epoch": 0.77, + "learning_rate": 4.615702859244277e-05, + "loss": 1.0093, + "step": 8360 + }, + { + "epoch": 0.77, + "learning_rate": 4.61524317366921e-05, + "loss": 0.896, + "step": 8370 + }, + { + "epoch": 0.77, + "learning_rate": 4.6147834880941436e-05, + "loss": 0.9082, + "step": 8380 + }, + { + "epoch": 0.77, + "learning_rate": 4.614323802519077e-05, + "loss": 0.8562, + "step": 8390 + }, + { + "epoch": 0.77, + "learning_rate": 4.61386411694401e-05, + "loss": 0.8869, + "step": 8400 + }, + { + "epoch": 0.77, + "learning_rate": 4.613404431368944e-05, + "loss": 0.9303, + "step": 8410 + }, + { + "epoch": 0.77, + "learning_rate": 4.6129447457938776e-05, + "loss": 0.7779, + "step": 8420 + }, + { + "epoch": 0.78, + "learning_rate": 4.6124850602188105e-05, + "loss": 0.9314, + "step": 8430 + }, + { + "epoch": 0.78, + "learning_rate": 4.6120253746437435e-05, + "loss": 0.9407, + "step": 8440 + }, + { + "epoch": 0.78, + "learning_rate": 4.611565689068677e-05, + "loss": 0.9324, + "step": 8450 + }, + { + "epoch": 0.78, + "learning_rate": 4.61110600349361e-05, + "loss": 0.8839, + "step": 8460 + }, + { + "epoch": 0.78, + "learning_rate": 4.610646317918544e-05, + "loss": 1.0149, + "step": 8470 + }, + { + "epoch": 0.78, + "learning_rate": 4.6101866323434774e-05, + "loss": 0.9564, + "step": 8480 + }, + { + "epoch": 0.78, + "learning_rate": 4.6097269467684104e-05, + "loss": 0.9496, + "step": 8490 + }, + { + "epoch": 0.78, + "learning_rate": 4.609267261193344e-05, + "loss": 0.781, + "step": 8500 + }, + { + "epoch": 0.78, + "learning_rate": 4.608807575618278e-05, + "loss": 0.7956, + "step": 8510 + }, + { + "epoch": 0.78, + "learning_rate": 4.6083478900432107e-05, + "loss": 0.8251, + "step": 8520 + }, + { + "epoch": 0.78, + "learning_rate": 4.607888204468144e-05, + "loss": 0.8574, + "step": 8530 + }, + { + "epoch": 0.79, + "learning_rate": 4.607428518893077e-05, + "loss": 0.9041, + "step": 8540 + }, + { + "epoch": 0.79, + "learning_rate": 4.60696883331801e-05, + "loss": 0.9488, + "step": 8550 + }, + { + "epoch": 0.79, + "learning_rate": 4.606509147742944e-05, + "loss": 0.9011, + "step": 8560 + }, + { + "epoch": 0.79, + "learning_rate": 4.6060494621678775e-05, + "loss": 1.07, + "step": 8570 + }, + { + "epoch": 0.79, + "learning_rate": 4.6055897765928105e-05, + "loss": 0.9035, + "step": 8580 + }, + { + "epoch": 0.79, + "learning_rate": 4.605130091017744e-05, + "loss": 0.9104, + "step": 8590 + }, + { + "epoch": 0.79, + "learning_rate": 4.604670405442678e-05, + "loss": 0.9687, + "step": 8600 + }, + { + "epoch": 0.79, + "learning_rate": 4.604210719867611e-05, + "loss": 0.8945, + "step": 8610 + }, + { + "epoch": 0.79, + "learning_rate": 4.6037510342925444e-05, + "loss": 0.8843, + "step": 8620 + }, + { + "epoch": 0.79, + "learning_rate": 4.6032913487174774e-05, + "loss": 0.9736, + "step": 8630 + }, + { + "epoch": 0.79, + "learning_rate": 4.6028316631424104e-05, + "loss": 0.9156, + "step": 8640 + }, + { + "epoch": 0.8, + "learning_rate": 4.602371977567344e-05, + "loss": 0.8903, + "step": 8650 + }, + { + "epoch": 0.8, + "learning_rate": 4.6019122919922777e-05, + "loss": 1.0464, + "step": 8660 + }, + { + "epoch": 0.8, + "learning_rate": 4.6014526064172106e-05, + "loss": 0.8583, + "step": 8670 + }, + { + "epoch": 0.8, + "learning_rate": 4.600992920842144e-05, + "loss": 0.8893, + "step": 8680 + }, + { + "epoch": 0.8, + "learning_rate": 4.600533235267078e-05, + "loss": 0.9144, + "step": 8690 + }, + { + "epoch": 0.8, + "learning_rate": 4.600073549692011e-05, + "loss": 0.9408, + "step": 8700 + }, + { + "epoch": 0.8, + "learning_rate": 4.5996138641169445e-05, + "loss": 0.9763, + "step": 8710 + }, + { + "epoch": 0.8, + "learning_rate": 4.5991541785418775e-05, + "loss": 0.9499, + "step": 8720 + }, + { + "epoch": 0.8, + "learning_rate": 4.5986944929668105e-05, + "loss": 0.8354, + "step": 8730 + }, + { + "epoch": 0.8, + "learning_rate": 4.598234807391744e-05, + "loss": 0.8823, + "step": 8740 + }, + { + "epoch": 0.8, + "learning_rate": 4.597775121816678e-05, + "loss": 0.942, + "step": 8750 + }, + { + "epoch": 0.81, + "learning_rate": 4.597315436241611e-05, + "loss": 0.8892, + "step": 8760 + }, + { + "epoch": 0.81, + "learning_rate": 4.5968557506665444e-05, + "loss": 0.8544, + "step": 8770 + }, + { + "epoch": 0.81, + "learning_rate": 4.596396065091478e-05, + "loss": 0.9719, + "step": 8780 + }, + { + "epoch": 0.81, + "learning_rate": 4.595936379516411e-05, + "loss": 0.8874, + "step": 8790 + }, + { + "epoch": 0.81, + "learning_rate": 4.5954766939413447e-05, + "loss": 0.9312, + "step": 8800 + }, + { + "epoch": 0.81, + "learning_rate": 4.5950170083662776e-05, + "loss": 0.9053, + "step": 8810 + }, + { + "epoch": 0.81, + "learning_rate": 4.5945573227912106e-05, + "loss": 0.9424, + "step": 8820 + }, + { + "epoch": 0.81, + "learning_rate": 4.594097637216144e-05, + "loss": 0.8635, + "step": 8830 + }, + { + "epoch": 0.81, + "learning_rate": 4.593637951641078e-05, + "loss": 0.9689, + "step": 8840 + }, + { + "epoch": 0.81, + "learning_rate": 4.593178266066011e-05, + "loss": 0.9001, + "step": 8850 + }, + { + "epoch": 0.81, + "learning_rate": 4.5927185804909445e-05, + "loss": 0.9846, + "step": 8860 + }, + { + "epoch": 0.82, + "learning_rate": 4.592258894915878e-05, + "loss": 0.8666, + "step": 8870 + }, + { + "epoch": 0.82, + "learning_rate": 4.591799209340811e-05, + "loss": 0.8585, + "step": 8880 + }, + { + "epoch": 0.82, + "learning_rate": 4.591339523765745e-05, + "loss": 0.8445, + "step": 8890 + }, + { + "epoch": 0.82, + "learning_rate": 4.590879838190678e-05, + "loss": 1.0266, + "step": 8900 + }, + { + "epoch": 0.82, + "learning_rate": 4.590420152615611e-05, + "loss": 1.0117, + "step": 8910 + }, + { + "epoch": 0.82, + "learning_rate": 4.5899604670405444e-05, + "loss": 0.9814, + "step": 8920 + }, + { + "epoch": 0.82, + "learning_rate": 4.589500781465478e-05, + "loss": 0.9063, + "step": 8930 + }, + { + "epoch": 0.82, + "learning_rate": 4.589041095890411e-05, + "loss": 0.8184, + "step": 8940 + }, + { + "epoch": 0.82, + "learning_rate": 4.5885814103153446e-05, + "loss": 0.8957, + "step": 8950 + }, + { + "epoch": 0.82, + "learning_rate": 4.588121724740278e-05, + "loss": 0.9468, + "step": 8960 + }, + { + "epoch": 0.82, + "learning_rate": 4.587662039165211e-05, + "loss": 0.9206, + "step": 8970 + }, + { + "epoch": 0.83, + "learning_rate": 4.587202353590145e-05, + "loss": 1.161, + "step": 8980 + }, + { + "epoch": 0.83, + "learning_rate": 4.586742668015078e-05, + "loss": 0.8617, + "step": 8990 + }, + { + "epoch": 0.83, + "learning_rate": 4.586282982440011e-05, + "loss": 0.9063, + "step": 9000 + }, + { + "epoch": 0.83, + "learning_rate": 4.5858232968649445e-05, + "loss": 0.9072, + "step": 9010 + }, + { + "epoch": 0.83, + "learning_rate": 4.585363611289878e-05, + "loss": 0.885, + "step": 9020 + }, + { + "epoch": 0.83, + "learning_rate": 4.584903925714811e-05, + "loss": 0.8781, + "step": 9030 + }, + { + "epoch": 0.83, + "learning_rate": 4.584444240139745e-05, + "loss": 0.8612, + "step": 9040 + }, + { + "epoch": 0.83, + "learning_rate": 4.5839845545646784e-05, + "loss": 0.8331, + "step": 9050 + }, + { + "epoch": 0.83, + "learning_rate": 4.5835248689896114e-05, + "loss": 0.8512, + "step": 9060 + }, + { + "epoch": 0.83, + "learning_rate": 4.583065183414545e-05, + "loss": 0.8398, + "step": 9070 + }, + { + "epoch": 0.83, + "learning_rate": 4.582605497839478e-05, + "loss": 0.8693, + "step": 9080 + }, + { + "epoch": 0.84, + "learning_rate": 4.582145812264411e-05, + "loss": 0.8372, + "step": 9090 + }, + { + "epoch": 0.84, + "learning_rate": 4.5816861266893446e-05, + "loss": 0.8334, + "step": 9100 + }, + { + "epoch": 0.84, + "learning_rate": 4.581226441114278e-05, + "loss": 0.8157, + "step": 9110 + }, + { + "epoch": 0.84, + "learning_rate": 4.580766755539211e-05, + "loss": 0.9965, + "step": 9120 + }, + { + "epoch": 0.84, + "learning_rate": 4.580307069964145e-05, + "loss": 0.9398, + "step": 9130 + }, + { + "epoch": 0.84, + "learning_rate": 4.5798473843890785e-05, + "loss": 0.9553, + "step": 9140 + }, + { + "epoch": 0.84, + "learning_rate": 4.5793876988140115e-05, + "loss": 0.9531, + "step": 9150 + }, + { + "epoch": 0.84, + "learning_rate": 4.578928013238945e-05, + "loss": 1.0181, + "step": 9160 + }, + { + "epoch": 0.84, + "learning_rate": 4.578468327663878e-05, + "loss": 0.9433, + "step": 9170 + }, + { + "epoch": 0.84, + "learning_rate": 4.578008642088811e-05, + "loss": 0.9153, + "step": 9180 + }, + { + "epoch": 0.84, + "learning_rate": 4.577548956513745e-05, + "loss": 0.9699, + "step": 9190 + }, + { + "epoch": 0.85, + "learning_rate": 4.5770892709386784e-05, + "loss": 0.8301, + "step": 9200 + }, + { + "epoch": 0.85, + "learning_rate": 4.5766295853636113e-05, + "loss": 0.8787, + "step": 9210 + }, + { + "epoch": 0.85, + "learning_rate": 4.576169899788545e-05, + "loss": 0.8864, + "step": 9220 + }, + { + "epoch": 0.85, + "learning_rate": 4.5757102142134786e-05, + "loss": 0.9928, + "step": 9230 + }, + { + "epoch": 0.85, + "learning_rate": 4.5752505286384116e-05, + "loss": 0.9047, + "step": 9240 + }, + { + "epoch": 0.85, + "learning_rate": 4.574790843063345e-05, + "loss": 0.9569, + "step": 9250 + }, + { + "epoch": 0.85, + "learning_rate": 4.574331157488278e-05, + "loss": 0.8903, + "step": 9260 + }, + { + "epoch": 0.85, + "learning_rate": 4.573871471913211e-05, + "loss": 0.9678, + "step": 9270 + }, + { + "epoch": 0.85, + "learning_rate": 4.573411786338145e-05, + "loss": 0.9929, + "step": 9280 + }, + { + "epoch": 0.85, + "learning_rate": 4.5729521007630785e-05, + "loss": 0.9279, + "step": 9290 + }, + { + "epoch": 0.86, + "learning_rate": 4.5724924151880115e-05, + "loss": 0.8225, + "step": 9300 + }, + { + "epoch": 0.86, + "learning_rate": 4.572032729612945e-05, + "loss": 0.8946, + "step": 9310 + }, + { + "epoch": 0.86, + "learning_rate": 4.571573044037879e-05, + "loss": 0.8858, + "step": 9320 + }, + { + "epoch": 0.86, + "learning_rate": 4.571113358462812e-05, + "loss": 0.9494, + "step": 9330 + }, + { + "epoch": 0.86, + "learning_rate": 4.5706536728877454e-05, + "loss": 1.0795, + "step": 9340 + }, + { + "epoch": 0.86, + "learning_rate": 4.5701939873126783e-05, + "loss": 0.9214, + "step": 9350 + }, + { + "epoch": 0.86, + "learning_rate": 4.569734301737611e-05, + "loss": 0.9415, + "step": 9360 + }, + { + "epoch": 0.86, + "learning_rate": 4.569274616162545e-05, + "loss": 0.9509, + "step": 9370 + }, + { + "epoch": 0.86, + "learning_rate": 4.5688149305874786e-05, + "loss": 0.9369, + "step": 9380 + }, + { + "epoch": 0.86, + "learning_rate": 4.5683552450124116e-05, + "loss": 0.8521, + "step": 9390 + }, + { + "epoch": 0.86, + "learning_rate": 4.567895559437345e-05, + "loss": 0.9066, + "step": 9400 + }, + { + "epoch": 0.87, + "learning_rate": 4.567435873862279e-05, + "loss": 0.8537, + "step": 9410 + }, + { + "epoch": 0.87, + "learning_rate": 4.566976188287212e-05, + "loss": 0.9234, + "step": 9420 + }, + { + "epoch": 0.87, + "learning_rate": 4.5665165027121455e-05, + "loss": 0.7931, + "step": 9430 + }, + { + "epoch": 0.87, + "learning_rate": 4.5660568171370785e-05, + "loss": 0.7876, + "step": 9440 + }, + { + "epoch": 0.87, + "learning_rate": 4.5655971315620114e-05, + "loss": 0.9976, + "step": 9450 + }, + { + "epoch": 0.87, + "learning_rate": 4.565137445986945e-05, + "loss": 0.9684, + "step": 9460 + }, + { + "epoch": 0.87, + "learning_rate": 4.564677760411879e-05, + "loss": 0.8602, + "step": 9470 + }, + { + "epoch": 0.87, + "learning_rate": 4.564218074836812e-05, + "loss": 0.9195, + "step": 9480 + }, + { + "epoch": 0.87, + "learning_rate": 4.5637583892617453e-05, + "loss": 0.9705, + "step": 9490 + }, + { + "epoch": 0.87, + "learning_rate": 4.563298703686679e-05, + "loss": 0.8714, + "step": 9500 + }, + { + "epoch": 0.87, + "learning_rate": 4.562839018111612e-05, + "loss": 0.8325, + "step": 9510 + }, + { + "epoch": 0.88, + "learning_rate": 4.5623793325365456e-05, + "loss": 0.8938, + "step": 9520 + }, + { + "epoch": 0.88, + "learning_rate": 4.5619196469614786e-05, + "loss": 0.968, + "step": 9530 + }, + { + "epoch": 0.88, + "learning_rate": 4.5614599613864116e-05, + "loss": 0.8756, + "step": 9540 + }, + { + "epoch": 0.88, + "learning_rate": 4.561000275811345e-05, + "loss": 0.9233, + "step": 9550 + }, + { + "epoch": 0.88, + "learning_rate": 4.560540590236279e-05, + "loss": 0.9545, + "step": 9560 + }, + { + "epoch": 0.88, + "learning_rate": 4.560080904661212e-05, + "loss": 0.8786, + "step": 9570 + }, + { + "epoch": 0.88, + "learning_rate": 4.5596212190861455e-05, + "loss": 0.9069, + "step": 9580 + }, + { + "epoch": 0.88, + "learning_rate": 4.5591615335110784e-05, + "loss": 0.8495, + "step": 9590 + }, + { + "epoch": 0.88, + "learning_rate": 4.558701847936012e-05, + "loss": 0.9046, + "step": 9600 + }, + { + "epoch": 0.88, + "learning_rate": 4.558242162360946e-05, + "loss": 0.9158, + "step": 9610 + }, + { + "epoch": 0.88, + "learning_rate": 4.557782476785879e-05, + "loss": 0.895, + "step": 9620 + }, + { + "epoch": 0.89, + "learning_rate": 4.557322791210812e-05, + "loss": 0.9712, + "step": 9630 + }, + { + "epoch": 0.89, + "learning_rate": 4.556863105635745e-05, + "loss": 0.9486, + "step": 9640 + }, + { + "epoch": 0.89, + "learning_rate": 4.556403420060679e-05, + "loss": 0.8435, + "step": 9650 + }, + { + "epoch": 0.89, + "learning_rate": 4.555943734485612e-05, + "loss": 0.8821, + "step": 9660 + }, + { + "epoch": 0.89, + "learning_rate": 4.5554840489105456e-05, + "loss": 0.9525, + "step": 9670 + }, + { + "epoch": 0.89, + "learning_rate": 4.5550243633354786e-05, + "loss": 0.8794, + "step": 9680 + }, + { + "epoch": 0.89, + "learning_rate": 4.554564677760412e-05, + "loss": 0.9858, + "step": 9690 + }, + { + "epoch": 0.89, + "learning_rate": 4.554104992185346e-05, + "loss": 1.032, + "step": 9700 + }, + { + "epoch": 0.89, + "learning_rate": 4.553645306610279e-05, + "loss": 1.0159, + "step": 9710 + }, + { + "epoch": 0.89, + "learning_rate": 4.553185621035212e-05, + "loss": 0.9821, + "step": 9720 + }, + { + "epoch": 0.89, + "learning_rate": 4.5527259354601454e-05, + "loss": 0.8938, + "step": 9730 + }, + { + "epoch": 0.9, + "learning_rate": 4.552266249885079e-05, + "loss": 0.9275, + "step": 9740 + }, + { + "epoch": 0.9, + "learning_rate": 4.551806564310012e-05, + "loss": 0.9485, + "step": 9750 + }, + { + "epoch": 0.9, + "learning_rate": 4.551346878734946e-05, + "loss": 0.9463, + "step": 9760 + }, + { + "epoch": 0.9, + "learning_rate": 4.550887193159879e-05, + "loss": 1.0139, + "step": 9770 + }, + { + "epoch": 0.9, + "learning_rate": 4.550427507584812e-05, + "loss": 0.8501, + "step": 9780 + }, + { + "epoch": 0.9, + "learning_rate": 4.549967822009746e-05, + "loss": 0.9582, + "step": 9790 + }, + { + "epoch": 0.9, + "learning_rate": 4.549508136434679e-05, + "loss": 0.8919, + "step": 9800 + }, + { + "epoch": 0.9, + "learning_rate": 4.549048450859612e-05, + "loss": 0.9804, + "step": 9810 + }, + { + "epoch": 0.9, + "learning_rate": 4.5485887652845456e-05, + "loss": 0.8906, + "step": 9820 + }, + { + "epoch": 0.9, + "learning_rate": 4.548129079709479e-05, + "loss": 0.844, + "step": 9830 + }, + { + "epoch": 0.9, + "learning_rate": 4.547669394134412e-05, + "loss": 0.974, + "step": 9840 + }, + { + "epoch": 0.91, + "learning_rate": 4.547209708559346e-05, + "loss": 0.8602, + "step": 9850 + }, + { + "epoch": 0.91, + "learning_rate": 4.546750022984279e-05, + "loss": 0.9942, + "step": 9860 + }, + { + "epoch": 0.91, + "learning_rate": 4.5462903374092124e-05, + "loss": 1.0303, + "step": 9870 + }, + { + "epoch": 0.91, + "learning_rate": 4.545830651834146e-05, + "loss": 0.8938, + "step": 9880 + }, + { + "epoch": 0.91, + "learning_rate": 4.545370966259079e-05, + "loss": 1.0339, + "step": 9890 + }, + { + "epoch": 0.91, + "learning_rate": 4.544911280684012e-05, + "loss": 0.9507, + "step": 9900 + }, + { + "epoch": 0.91, + "learning_rate": 4.544451595108946e-05, + "loss": 0.815, + "step": 9910 + }, + { + "epoch": 0.91, + "learning_rate": 4.5439919095338787e-05, + "loss": 0.9829, + "step": 9920 + }, + { + "epoch": 0.91, + "learning_rate": 4.543532223958812e-05, + "loss": 0.9953, + "step": 9930 + }, + { + "epoch": 0.91, + "learning_rate": 4.543072538383746e-05, + "loss": 0.8327, + "step": 9940 + }, + { + "epoch": 0.91, + "learning_rate": 4.542612852808679e-05, + "loss": 0.8617, + "step": 9950 + }, + { + "epoch": 0.92, + "learning_rate": 4.5421531672336126e-05, + "loss": 0.9084, + "step": 9960 + }, + { + "epoch": 0.92, + "learning_rate": 4.541693481658546e-05, + "loss": 0.8555, + "step": 9970 + }, + { + "epoch": 0.92, + "learning_rate": 4.541233796083479e-05, + "loss": 0.9555, + "step": 9980 + }, + { + "epoch": 0.92, + "learning_rate": 4.540774110508412e-05, + "loss": 0.8969, + "step": 9990 + }, + { + "epoch": 0.92, + "learning_rate": 4.540314424933346e-05, + "loss": 0.9159, + "step": 10000 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.5585152838427948, + "eval_loss": 0.9121217131614685, + "eval_runtime": 159.7746, + "eval_samples_per_second": 28.665, + "eval_steps_per_second": 3.586, + "step": 10000 + }, + { + "epoch": 0.92, + "learning_rate": 4.539854739358279e-05, + "loss": 0.9468, + "step": 10010 + }, + { + "epoch": 0.92, + "learning_rate": 4.5393950537832124e-05, + "loss": 0.9851, + "step": 10020 + }, + { + "epoch": 0.92, + "learning_rate": 4.538935368208146e-05, + "loss": 0.8688, + "step": 10030 + }, + { + "epoch": 0.92, + "learning_rate": 4.538475682633079e-05, + "loss": 0.8096, + "step": 10040 + }, + { + "epoch": 0.92, + "learning_rate": 4.538015997058013e-05, + "loss": 0.9783, + "step": 10050 + }, + { + "epoch": 0.92, + "learning_rate": 4.537556311482946e-05, + "loss": 0.8334, + "step": 10060 + }, + { + "epoch": 0.93, + "learning_rate": 4.537096625907879e-05, + "loss": 0.882, + "step": 10070 + }, + { + "epoch": 0.93, + "learning_rate": 4.536636940332812e-05, + "loss": 1.0017, + "step": 10080 + }, + { + "epoch": 0.93, + "learning_rate": 4.536177254757746e-05, + "loss": 0.8854, + "step": 10090 + }, + { + "epoch": 0.93, + "learning_rate": 4.535717569182679e-05, + "loss": 0.9157, + "step": 10100 + }, + { + "epoch": 0.93, + "learning_rate": 4.5352578836076125e-05, + "loss": 0.8932, + "step": 10110 + }, + { + "epoch": 0.93, + "learning_rate": 4.534798198032546e-05, + "loss": 0.926, + "step": 10120 + }, + { + "epoch": 0.93, + "learning_rate": 4.534338512457479e-05, + "loss": 0.8957, + "step": 10130 + }, + { + "epoch": 0.93, + "learning_rate": 4.533878826882413e-05, + "loss": 0.828, + "step": 10140 + }, + { + "epoch": 0.93, + "learning_rate": 4.5334191413073465e-05, + "loss": 0.8201, + "step": 10150 + }, + { + "epoch": 0.93, + "learning_rate": 4.5329594557322794e-05, + "loss": 0.9816, + "step": 10160 + }, + { + "epoch": 0.94, + "learning_rate": 4.5324997701572124e-05, + "loss": 0.9979, + "step": 10170 + }, + { + "epoch": 0.94, + "learning_rate": 4.532040084582146e-05, + "loss": 0.9248, + "step": 10180 + }, + { + "epoch": 0.94, + "learning_rate": 4.531580399007079e-05, + "loss": 0.9423, + "step": 10190 + }, + { + "epoch": 0.94, + "learning_rate": 4.5311207134320127e-05, + "loss": 0.8534, + "step": 10200 + }, + { + "epoch": 0.94, + "learning_rate": 4.530661027856946e-05, + "loss": 0.9352, + "step": 10210 + }, + { + "epoch": 0.94, + "learning_rate": 4.530201342281879e-05, + "loss": 0.9023, + "step": 10220 + }, + { + "epoch": 0.94, + "learning_rate": 4.529741656706813e-05, + "loss": 0.8694, + "step": 10230 + }, + { + "epoch": 0.94, + "learning_rate": 4.5292819711317466e-05, + "loss": 0.888, + "step": 10240 + }, + { + "epoch": 0.94, + "learning_rate": 4.528822285556679e-05, + "loss": 0.9553, + "step": 10250 + }, + { + "epoch": 0.94, + "learning_rate": 4.5283625999816125e-05, + "loss": 0.8964, + "step": 10260 + }, + { + "epoch": 0.94, + "learning_rate": 4.527902914406546e-05, + "loss": 1.1738, + "step": 10270 + }, + { + "epoch": 0.95, + "learning_rate": 4.527443228831479e-05, + "loss": 0.8975, + "step": 10280 + }, + { + "epoch": 0.95, + "learning_rate": 4.526983543256413e-05, + "loss": 0.8494, + "step": 10290 + }, + { + "epoch": 0.95, + "learning_rate": 4.5265238576813464e-05, + "loss": 1.0389, + "step": 10300 + }, + { + "epoch": 0.95, + "learning_rate": 4.5260641721062794e-05, + "loss": 0.9733, + "step": 10310 + }, + { + "epoch": 0.95, + "learning_rate": 4.525604486531213e-05, + "loss": 0.9252, + "step": 10320 + }, + { + "epoch": 0.95, + "learning_rate": 4.525144800956147e-05, + "loss": 0.9168, + "step": 10330 + }, + { + "epoch": 0.95, + "learning_rate": 4.524685115381079e-05, + "loss": 0.9535, + "step": 10340 + }, + { + "epoch": 0.95, + "learning_rate": 4.5242254298060126e-05, + "loss": 0.9596, + "step": 10350 + }, + { + "epoch": 0.95, + "learning_rate": 4.523765744230946e-05, + "loss": 0.952, + "step": 10360 + }, + { + "epoch": 0.95, + "learning_rate": 4.523306058655879e-05, + "loss": 1.026, + "step": 10370 + }, + { + "epoch": 0.95, + "learning_rate": 4.522846373080813e-05, + "loss": 0.8833, + "step": 10380 + }, + { + "epoch": 0.96, + "learning_rate": 4.5223866875057465e-05, + "loss": 0.8967, + "step": 10390 + }, + { + "epoch": 0.96, + "learning_rate": 4.5219270019306795e-05, + "loss": 0.8901, + "step": 10400 + }, + { + "epoch": 0.96, + "learning_rate": 4.521467316355613e-05, + "loss": 0.8578, + "step": 10410 + }, + { + "epoch": 0.96, + "learning_rate": 4.521007630780547e-05, + "loss": 0.8658, + "step": 10420 + }, + { + "epoch": 0.96, + "learning_rate": 4.520547945205479e-05, + "loss": 0.8438, + "step": 10430 + }, + { + "epoch": 0.96, + "learning_rate": 4.520088259630413e-05, + "loss": 0.9124, + "step": 10440 + }, + { + "epoch": 0.96, + "learning_rate": 4.5196285740553464e-05, + "loss": 0.8058, + "step": 10450 + }, + { + "epoch": 0.96, + "learning_rate": 4.5191688884802794e-05, + "loss": 0.9143, + "step": 10460 + }, + { + "epoch": 0.96, + "learning_rate": 4.518709202905213e-05, + "loss": 0.8645, + "step": 10470 + }, + { + "epoch": 0.96, + "learning_rate": 4.518249517330147e-05, + "loss": 0.9521, + "step": 10480 + }, + { + "epoch": 0.96, + "learning_rate": 4.5177898317550796e-05, + "loss": 0.8112, + "step": 10490 + }, + { + "epoch": 0.97, + "learning_rate": 4.517330146180013e-05, + "loss": 0.9012, + "step": 10500 + }, + { + "epoch": 0.97, + "learning_rate": 4.516870460604947e-05, + "loss": 0.9395, + "step": 10510 + }, + { + "epoch": 0.97, + "learning_rate": 4.516410775029879e-05, + "loss": 0.961, + "step": 10520 + }, + { + "epoch": 0.97, + "learning_rate": 4.515951089454813e-05, + "loss": 1.0508, + "step": 10530 + }, + { + "epoch": 0.97, + "learning_rate": 4.5154914038797465e-05, + "loss": 0.931, + "step": 10540 + }, + { + "epoch": 0.97, + "learning_rate": 4.5150317183046795e-05, + "loss": 0.8443, + "step": 10550 + }, + { + "epoch": 0.97, + "learning_rate": 4.514572032729613e-05, + "loss": 0.9688, + "step": 10560 + }, + { + "epoch": 0.97, + "learning_rate": 4.514112347154547e-05, + "loss": 0.8909, + "step": 10570 + }, + { + "epoch": 0.97, + "learning_rate": 4.51365266157948e-05, + "loss": 1.0332, + "step": 10580 + }, + { + "epoch": 0.97, + "learning_rate": 4.5131929760044134e-05, + "loss": 1.0047, + "step": 10590 + }, + { + "epoch": 0.97, + "learning_rate": 4.512733290429347e-05, + "loss": 0.935, + "step": 10600 + }, + { + "epoch": 0.98, + "learning_rate": 4.5122736048542793e-05, + "loss": 0.9114, + "step": 10610 + }, + { + "epoch": 0.98, + "learning_rate": 4.511813919279213e-05, + "loss": 0.9685, + "step": 10620 + }, + { + "epoch": 0.98, + "learning_rate": 4.5113542337041466e-05, + "loss": 1.0234, + "step": 10630 + }, + { + "epoch": 0.98, + "learning_rate": 4.5108945481290796e-05, + "loss": 0.8568, + "step": 10640 + }, + { + "epoch": 0.98, + "learning_rate": 4.510434862554013e-05, + "loss": 0.9171, + "step": 10650 + }, + { + "epoch": 0.98, + "learning_rate": 4.509975176978947e-05, + "loss": 0.9971, + "step": 10660 + }, + { + "epoch": 0.98, + "learning_rate": 4.50951549140388e-05, + "loss": 0.7715, + "step": 10670 + }, + { + "epoch": 0.98, + "learning_rate": 4.5090558058288135e-05, + "loss": 0.9997, + "step": 10680 + }, + { + "epoch": 0.98, + "learning_rate": 4.508596120253747e-05, + "loss": 0.9301, + "step": 10690 + }, + { + "epoch": 0.98, + "learning_rate": 4.5081364346786795e-05, + "loss": 0.9331, + "step": 10700 + }, + { + "epoch": 0.98, + "learning_rate": 4.507676749103613e-05, + "loss": 0.988, + "step": 10710 + }, + { + "epoch": 0.99, + "learning_rate": 4.507217063528547e-05, + "loss": 0.8502, + "step": 10720 + }, + { + "epoch": 0.99, + "learning_rate": 4.50675737795348e-05, + "loss": 0.9107, + "step": 10730 + }, + { + "epoch": 0.99, + "learning_rate": 4.5062976923784134e-05, + "loss": 0.7789, + "step": 10740 + }, + { + "epoch": 0.99, + "learning_rate": 4.505838006803347e-05, + "loss": 0.965, + "step": 10750 + }, + { + "epoch": 0.99, + "learning_rate": 4.50537832122828e-05, + "loss": 0.9429, + "step": 10760 + }, + { + "epoch": 0.99, + "learning_rate": 4.5049186356532136e-05, + "loss": 0.8896, + "step": 10770 + }, + { + "epoch": 0.99, + "learning_rate": 4.504458950078147e-05, + "loss": 0.8761, + "step": 10780 + }, + { + "epoch": 0.99, + "learning_rate": 4.5039992645030796e-05, + "loss": 0.9087, + "step": 10790 + }, + { + "epoch": 0.99, + "learning_rate": 4.503539578928013e-05, + "loss": 0.9369, + "step": 10800 + }, + { + "epoch": 0.99, + "learning_rate": 4.503079893352947e-05, + "loss": 0.9147, + "step": 10810 + }, + { + "epoch": 0.99, + "learning_rate": 4.50262020777788e-05, + "loss": 0.8912, + "step": 10820 + }, + { + "epoch": 1.0, + "learning_rate": 4.5021605222028135e-05, + "loss": 0.8998, + "step": 10830 + }, + { + "epoch": 1.0, + "learning_rate": 4.501700836627747e-05, + "loss": 0.8388, + "step": 10840 + }, + { + "epoch": 1.0, + "learning_rate": 4.50124115105268e-05, + "loss": 0.8597, + "step": 10850 + }, + { + "epoch": 1.0, + "learning_rate": 4.500781465477614e-05, + "loss": 0.8806, + "step": 10860 + }, + { + "epoch": 1.0, + "learning_rate": 4.5003217799025474e-05, + "loss": 0.9591, + "step": 10870 + }, + { + "epoch": 1.0, + "learning_rate": 4.49986209432748e-05, + "loss": 0.9699, + "step": 10880 + }, + { + "epoch": 1.0, + "learning_rate": 4.4994024087524133e-05, + "loss": 0.8775, + "step": 10890 + }, + { + "epoch": 1.0, + "learning_rate": 4.498942723177347e-05, + "loss": 1.0192, + "step": 10900 + }, + { + "epoch": 1.0, + "learning_rate": 4.49848303760228e-05, + "loss": 0.8902, + "step": 10910 + }, + { + "epoch": 1.0, + "learning_rate": 4.4980233520272136e-05, + "loss": 0.8457, + "step": 10920 + }, + { + "epoch": 1.0, + "learning_rate": 4.497563666452147e-05, + "loss": 0.9777, + "step": 10930 + }, + { + "epoch": 1.01, + "learning_rate": 4.49710398087708e-05, + "loss": 0.9088, + "step": 10940 + }, + { + "epoch": 1.01, + "learning_rate": 4.496644295302014e-05, + "loss": 0.8713, + "step": 10950 + }, + { + "epoch": 1.01, + "learning_rate": 4.4961846097269475e-05, + "loss": 0.9141, + "step": 10960 + }, + { + "epoch": 1.01, + "learning_rate": 4.49572492415188e-05, + "loss": 0.8549, + "step": 10970 + }, + { + "epoch": 1.01, + "learning_rate": 4.4952652385768135e-05, + "loss": 0.9023, + "step": 10980 + }, + { + "epoch": 1.01, + "learning_rate": 4.494805553001747e-05, + "loss": 0.9571, + "step": 10990 + }, + { + "epoch": 1.01, + "learning_rate": 4.49434586742668e-05, + "loss": 0.9186, + "step": 11000 + }, + { + "epoch": 1.01, + "learning_rate": 4.493886181851614e-05, + "loss": 0.9233, + "step": 11010 + }, + { + "epoch": 1.01, + "learning_rate": 4.4934264962765474e-05, + "loss": 0.7483, + "step": 11020 + }, + { + "epoch": 1.01, + "learning_rate": 4.4929668107014804e-05, + "loss": 0.8481, + "step": 11030 + }, + { + "epoch": 1.01, + "learning_rate": 4.492507125126414e-05, + "loss": 0.9407, + "step": 11040 + }, + { + "epoch": 1.02, + "learning_rate": 4.4920474395513476e-05, + "loss": 1.0488, + "step": 11050 + }, + { + "epoch": 1.02, + "learning_rate": 4.49158775397628e-05, + "loss": 0.9104, + "step": 11060 + }, + { + "epoch": 1.02, + "learning_rate": 4.4911280684012136e-05, + "loss": 0.8958, + "step": 11070 + }, + { + "epoch": 1.02, + "learning_rate": 4.490668382826147e-05, + "loss": 0.9695, + "step": 11080 + }, + { + "epoch": 1.02, + "learning_rate": 4.49020869725108e-05, + "loss": 0.8812, + "step": 11090 + }, + { + "epoch": 1.02, + "learning_rate": 4.489749011676014e-05, + "loss": 0.877, + "step": 11100 + }, + { + "epoch": 1.02, + "learning_rate": 4.4892893261009475e-05, + "loss": 0.9419, + "step": 11110 + }, + { + "epoch": 1.02, + "learning_rate": 4.4888296405258805e-05, + "loss": 0.9154, + "step": 11120 + }, + { + "epoch": 1.02, + "learning_rate": 4.488369954950814e-05, + "loss": 0.9547, + "step": 11130 + }, + { + "epoch": 1.02, + "learning_rate": 4.487910269375748e-05, + "loss": 0.9033, + "step": 11140 + }, + { + "epoch": 1.03, + "learning_rate": 4.48745058380068e-05, + "loss": 0.8742, + "step": 11150 + }, + { + "epoch": 1.03, + "learning_rate": 4.486990898225614e-05, + "loss": 0.9041, + "step": 11160 + }, + { + "epoch": 1.03, + "learning_rate": 4.4865312126505474e-05, + "loss": 0.9042, + "step": 11170 + }, + { + "epoch": 1.03, + "learning_rate": 4.48607152707548e-05, + "loss": 0.906, + "step": 11180 + }, + { + "epoch": 1.03, + "learning_rate": 4.485611841500414e-05, + "loss": 1.0201, + "step": 11190 + }, + { + "epoch": 1.03, + "learning_rate": 4.4851521559253476e-05, + "loss": 0.8753, + "step": 11200 + }, + { + "epoch": 1.03, + "learning_rate": 4.4846924703502806e-05, + "loss": 0.9337, + "step": 11210 + }, + { + "epoch": 1.03, + "learning_rate": 4.484232784775214e-05, + "loss": 1.0417, + "step": 11220 + }, + { + "epoch": 1.03, + "learning_rate": 4.483773099200147e-05, + "loss": 0.9125, + "step": 11230 + }, + { + "epoch": 1.03, + "learning_rate": 4.48331341362508e-05, + "loss": 0.9053, + "step": 11240 + }, + { + "epoch": 1.03, + "learning_rate": 4.482853728050014e-05, + "loss": 0.8643, + "step": 11250 + }, + { + "epoch": 1.04, + "learning_rate": 4.4823940424749475e-05, + "loss": 0.7548, + "step": 11260 + }, + { + "epoch": 1.04, + "learning_rate": 4.4819343568998804e-05, + "loss": 0.8792, + "step": 11270 + }, + { + "epoch": 1.04, + "learning_rate": 4.481474671324814e-05, + "loss": 0.8676, + "step": 11280 + }, + { + "epoch": 1.04, + "learning_rate": 4.481014985749748e-05, + "loss": 0.9439, + "step": 11290 + }, + { + "epoch": 1.04, + "learning_rate": 4.480555300174681e-05, + "loss": 0.9277, + "step": 11300 + }, + { + "epoch": 1.04, + "learning_rate": 4.4800956145996144e-05, + "loss": 0.8223, + "step": 11310 + }, + { + "epoch": 1.04, + "learning_rate": 4.479635929024547e-05, + "loss": 1.0402, + "step": 11320 + }, + { + "epoch": 1.04, + "learning_rate": 4.47917624344948e-05, + "loss": 1.0117, + "step": 11330 + }, + { + "epoch": 1.04, + "learning_rate": 4.478716557874414e-05, + "loss": 0.9454, + "step": 11340 + }, + { + "epoch": 1.04, + "learning_rate": 4.4782568722993476e-05, + "loss": 0.9359, + "step": 11350 + }, + { + "epoch": 1.04, + "learning_rate": 4.4777971867242806e-05, + "loss": 0.9306, + "step": 11360 + }, + { + "epoch": 1.05, + "learning_rate": 4.477337501149214e-05, + "loss": 0.95, + "step": 11370 + }, + { + "epoch": 1.05, + "learning_rate": 4.476877815574148e-05, + "loss": 0.919, + "step": 11380 + }, + { + "epoch": 1.05, + "learning_rate": 4.476418129999081e-05, + "loss": 0.9745, + "step": 11390 + }, + { + "epoch": 1.05, + "learning_rate": 4.4759584444240145e-05, + "loss": 0.8766, + "step": 11400 + }, + { + "epoch": 1.05, + "learning_rate": 4.4754987588489474e-05, + "loss": 0.9099, + "step": 11410 + }, + { + "epoch": 1.05, + "learning_rate": 4.4750390732738804e-05, + "loss": 0.9029, + "step": 11420 + }, + { + "epoch": 1.05, + "learning_rate": 4.474579387698814e-05, + "loss": 0.9362, + "step": 11430 + }, + { + "epoch": 1.05, + "learning_rate": 4.474119702123748e-05, + "loss": 0.962, + "step": 11440 + }, + { + "epoch": 1.05, + "learning_rate": 4.473660016548681e-05, + "loss": 0.997, + "step": 11450 + }, + { + "epoch": 1.05, + "learning_rate": 4.473200330973614e-05, + "loss": 0.9225, + "step": 11460 + }, + { + "epoch": 1.05, + "learning_rate": 4.472740645398548e-05, + "loss": 0.9444, + "step": 11470 + }, + { + "epoch": 1.06, + "learning_rate": 4.472280959823481e-05, + "loss": 0.8482, + "step": 11480 + }, + { + "epoch": 1.06, + "learning_rate": 4.4718212742484146e-05, + "loss": 0.8891, + "step": 11490 + }, + { + "epoch": 1.06, + "learning_rate": 4.4713615886733476e-05, + "loss": 0.8547, + "step": 11500 + }, + { + "epoch": 1.06, + "learning_rate": 4.4709019030982805e-05, + "loss": 0.8996, + "step": 11510 + }, + { + "epoch": 1.06, + "learning_rate": 4.470442217523214e-05, + "loss": 0.8706, + "step": 11520 + }, + { + "epoch": 1.06, + "learning_rate": 4.469982531948148e-05, + "loss": 0.8515, + "step": 11530 + }, + { + "epoch": 1.06, + "learning_rate": 4.469522846373081e-05, + "loss": 0.9831, + "step": 11540 + }, + { + "epoch": 1.06, + "learning_rate": 4.4690631607980145e-05, + "loss": 0.9286, + "step": 11550 + }, + { + "epoch": 1.06, + "learning_rate": 4.4686034752229474e-05, + "loss": 0.8676, + "step": 11560 + }, + { + "epoch": 1.06, + "learning_rate": 4.468143789647881e-05, + "loss": 0.8777, + "step": 11570 + }, + { + "epoch": 1.06, + "learning_rate": 4.467684104072815e-05, + "loss": 0.8716, + "step": 11580 + }, + { + "epoch": 1.07, + "learning_rate": 4.467224418497748e-05, + "loss": 0.8807, + "step": 11590 + }, + { + "epoch": 1.07, + "learning_rate": 4.4667647329226807e-05, + "loss": 0.9552, + "step": 11600 + }, + { + "epoch": 1.07, + "learning_rate": 4.466305047347614e-05, + "loss": 0.7772, + "step": 11610 + }, + { + "epoch": 1.07, + "learning_rate": 4.465845361772548e-05, + "loss": 0.8932, + "step": 11620 + }, + { + "epoch": 1.07, + "learning_rate": 4.465385676197481e-05, + "loss": 0.907, + "step": 11630 + }, + { + "epoch": 1.07, + "learning_rate": 4.4649259906224146e-05, + "loss": 0.845, + "step": 11640 + }, + { + "epoch": 1.07, + "learning_rate": 4.4644663050473475e-05, + "loss": 1.0008, + "step": 11650 + }, + { + "epoch": 1.07, + "learning_rate": 4.464006619472281e-05, + "loss": 0.8506, + "step": 11660 + }, + { + "epoch": 1.07, + "learning_rate": 4.463546933897215e-05, + "loss": 0.9192, + "step": 11670 + }, + { + "epoch": 1.07, + "learning_rate": 4.463087248322148e-05, + "loss": 0.9594, + "step": 11680 + }, + { + "epoch": 1.07, + "learning_rate": 4.462627562747081e-05, + "loss": 0.9733, + "step": 11690 + }, + { + "epoch": 1.08, + "learning_rate": 4.4621678771720144e-05, + "loss": 0.9314, + "step": 11700 + }, + { + "epoch": 1.08, + "learning_rate": 4.461708191596948e-05, + "loss": 0.98, + "step": 11710 + }, + { + "epoch": 1.08, + "learning_rate": 4.461248506021881e-05, + "loss": 0.84, + "step": 11720 + }, + { + "epoch": 1.08, + "learning_rate": 4.460788820446815e-05, + "loss": 0.8419, + "step": 11730 + }, + { + "epoch": 1.08, + "learning_rate": 4.4603291348717477e-05, + "loss": 0.8898, + "step": 11740 + }, + { + "epoch": 1.08, + "learning_rate": 4.459869449296681e-05, + "loss": 0.8486, + "step": 11750 + }, + { + "epoch": 1.08, + "learning_rate": 4.459409763721615e-05, + "loss": 0.974, + "step": 11760 + }, + { + "epoch": 1.08, + "learning_rate": 4.458950078146548e-05, + "loss": 0.8913, + "step": 11770 + }, + { + "epoch": 1.08, + "learning_rate": 4.458490392571481e-05, + "loss": 0.9076, + "step": 11780 + }, + { + "epoch": 1.08, + "learning_rate": 4.4580307069964145e-05, + "loss": 0.8852, + "step": 11790 + }, + { + "epoch": 1.08, + "learning_rate": 4.457571021421348e-05, + "loss": 0.966, + "step": 11800 + }, + { + "epoch": 1.09, + "learning_rate": 4.457111335846281e-05, + "loss": 0.8525, + "step": 11810 + }, + { + "epoch": 1.09, + "learning_rate": 4.456651650271215e-05, + "loss": 0.9273, + "step": 11820 + }, + { + "epoch": 1.09, + "learning_rate": 4.456191964696148e-05, + "loss": 0.9589, + "step": 11830 + }, + { + "epoch": 1.09, + "learning_rate": 4.4557322791210814e-05, + "loss": 0.9207, + "step": 11840 + }, + { + "epoch": 1.09, + "learning_rate": 4.455272593546015e-05, + "loss": 0.8352, + "step": 11850 + }, + { + "epoch": 1.09, + "learning_rate": 4.454812907970948e-05, + "loss": 0.9615, + "step": 11860 + }, + { + "epoch": 1.09, + "learning_rate": 4.454353222395881e-05, + "loss": 0.9029, + "step": 11870 + }, + { + "epoch": 1.09, + "learning_rate": 4.453893536820815e-05, + "loss": 0.9257, + "step": 11880 + }, + { + "epoch": 1.09, + "learning_rate": 4.4534338512457476e-05, + "loss": 0.9517, + "step": 11890 + }, + { + "epoch": 1.09, + "learning_rate": 4.452974165670681e-05, + "loss": 0.9383, + "step": 11900 + }, + { + "epoch": 1.09, + "learning_rate": 4.452514480095615e-05, + "loss": 0.9106, + "step": 11910 + }, + { + "epoch": 1.1, + "learning_rate": 4.452054794520548e-05, + "loss": 0.9423, + "step": 11920 + }, + { + "epoch": 1.1, + "learning_rate": 4.4515951089454815e-05, + "loss": 0.9619, + "step": 11930 + }, + { + "epoch": 1.1, + "learning_rate": 4.451135423370415e-05, + "loss": 0.9074, + "step": 11940 + }, + { + "epoch": 1.1, + "learning_rate": 4.450675737795348e-05, + "loss": 0.9478, + "step": 11950 + }, + { + "epoch": 1.1, + "learning_rate": 4.450216052220282e-05, + "loss": 0.9828, + "step": 11960 + }, + { + "epoch": 1.1, + "learning_rate": 4.449756366645215e-05, + "loss": 0.9545, + "step": 11970 + }, + { + "epoch": 1.1, + "learning_rate": 4.449296681070148e-05, + "loss": 0.8727, + "step": 11980 + }, + { + "epoch": 1.1, + "learning_rate": 4.4488369954950814e-05, + "loss": 1.004, + "step": 11990 + }, + { + "epoch": 1.1, + "learning_rate": 4.448377309920015e-05, + "loss": 0.8317, + "step": 12000 + }, + { + "epoch": 1.1, + "learning_rate": 4.447917624344948e-05, + "loss": 0.9328, + "step": 12010 + }, + { + "epoch": 1.11, + "learning_rate": 4.447457938769882e-05, + "loss": 0.9026, + "step": 12020 + }, + { + "epoch": 1.11, + "learning_rate": 4.446998253194815e-05, + "loss": 0.8617, + "step": 12030 + }, + { + "epoch": 1.11, + "learning_rate": 4.446538567619748e-05, + "loss": 0.8596, + "step": 12040 + }, + { + "epoch": 1.11, + "learning_rate": 4.446078882044682e-05, + "loss": 0.968, + "step": 12050 + }, + { + "epoch": 1.11, + "learning_rate": 4.445619196469615e-05, + "loss": 1.0175, + "step": 12060 + }, + { + "epoch": 1.11, + "learning_rate": 4.445159510894548e-05, + "loss": 0.9531, + "step": 12070 + }, + { + "epoch": 1.11, + "learning_rate": 4.4446998253194815e-05, + "loss": 0.9737, + "step": 12080 + }, + { + "epoch": 1.11, + "learning_rate": 4.444240139744415e-05, + "loss": 0.8832, + "step": 12090 + }, + { + "epoch": 1.11, + "learning_rate": 4.443780454169348e-05, + "loss": 0.9962, + "step": 12100 + }, + { + "epoch": 1.11, + "learning_rate": 4.443320768594282e-05, + "loss": 0.9209, + "step": 12110 + }, + { + "epoch": 1.11, + "learning_rate": 4.4428610830192154e-05, + "loss": 0.8191, + "step": 12120 + }, + { + "epoch": 1.12, + "learning_rate": 4.4424013974441484e-05, + "loss": 0.9678, + "step": 12130 + }, + { + "epoch": 1.12, + "learning_rate": 4.441941711869082e-05, + "loss": 0.951, + "step": 12140 + }, + { + "epoch": 1.12, + "learning_rate": 4.441482026294015e-05, + "loss": 0.8869, + "step": 12150 + }, + { + "epoch": 1.12, + "learning_rate": 4.441022340718948e-05, + "loss": 0.9489, + "step": 12160 + }, + { + "epoch": 1.12, + "learning_rate": 4.4405626551438816e-05, + "loss": 0.9752, + "step": 12170 + }, + { + "epoch": 1.12, + "learning_rate": 4.440102969568815e-05, + "loss": 0.8537, + "step": 12180 + }, + { + "epoch": 1.12, + "learning_rate": 4.439643283993748e-05, + "loss": 0.9686, + "step": 12190 + }, + { + "epoch": 1.12, + "learning_rate": 4.439183598418682e-05, + "loss": 0.9393, + "step": 12200 + }, + { + "epoch": 1.12, + "learning_rate": 4.4387239128436156e-05, + "loss": 0.9201, + "step": 12210 + }, + { + "epoch": 1.12, + "learning_rate": 4.4382642272685485e-05, + "loss": 0.9115, + "step": 12220 + }, + { + "epoch": 1.12, + "learning_rate": 4.437804541693482e-05, + "loss": 0.9211, + "step": 12230 + }, + { + "epoch": 1.13, + "learning_rate": 4.437344856118415e-05, + "loss": 0.9772, + "step": 12240 + }, + { + "epoch": 1.13, + "learning_rate": 4.436885170543348e-05, + "loss": 0.9121, + "step": 12250 + }, + { + "epoch": 1.13, + "learning_rate": 4.436425484968282e-05, + "loss": 0.9377, + "step": 12260 + }, + { + "epoch": 1.13, + "learning_rate": 4.4359657993932154e-05, + "loss": 0.8728, + "step": 12270 + }, + { + "epoch": 1.13, + "learning_rate": 4.4355061138181484e-05, + "loss": 0.9826, + "step": 12280 + }, + { + "epoch": 1.13, + "learning_rate": 4.435046428243082e-05, + "loss": 0.9139, + "step": 12290 + }, + { + "epoch": 1.13, + "learning_rate": 4.434586742668016e-05, + "loss": 0.9592, + "step": 12300 + }, + { + "epoch": 1.13, + "learning_rate": 4.4341270570929486e-05, + "loss": 0.8585, + "step": 12310 + }, + { + "epoch": 1.13, + "learning_rate": 4.433667371517882e-05, + "loss": 0.9423, + "step": 12320 + }, + { + "epoch": 1.13, + "learning_rate": 4.433207685942815e-05, + "loss": 0.896, + "step": 12330 + }, + { + "epoch": 1.13, + "learning_rate": 4.432748000367748e-05, + "loss": 1.0304, + "step": 12340 + }, + { + "epoch": 1.14, + "learning_rate": 4.432288314792682e-05, + "loss": 1.0224, + "step": 12350 + }, + { + "epoch": 1.14, + "learning_rate": 4.4318286292176155e-05, + "loss": 0.9011, + "step": 12360 + }, + { + "epoch": 1.14, + "learning_rate": 4.4313689436425485e-05, + "loss": 0.9655, + "step": 12370 + }, + { + "epoch": 1.14, + "learning_rate": 4.430909258067482e-05, + "loss": 0.8919, + "step": 12380 + }, + { + "epoch": 1.14, + "learning_rate": 4.430449572492416e-05, + "loss": 0.9221, + "step": 12390 + }, + { + "epoch": 1.14, + "learning_rate": 4.429989886917349e-05, + "loss": 0.8645, + "step": 12400 + }, + { + "epoch": 1.14, + "learning_rate": 4.4295302013422824e-05, + "loss": 1.0265, + "step": 12410 + }, + { + "epoch": 1.14, + "learning_rate": 4.4290705157672154e-05, + "loss": 0.8864, + "step": 12420 + }, + { + "epoch": 1.14, + "learning_rate": 4.4286108301921484e-05, + "loss": 0.9577, + "step": 12430 + }, + { + "epoch": 1.14, + "learning_rate": 4.428151144617082e-05, + "loss": 0.8873, + "step": 12440 + }, + { + "epoch": 1.14, + "learning_rate": 4.4276914590420156e-05, + "loss": 0.9345, + "step": 12450 + }, + { + "epoch": 1.15, + "learning_rate": 4.4272317734669486e-05, + "loss": 0.9043, + "step": 12460 + }, + { + "epoch": 1.15, + "learning_rate": 4.426772087891882e-05, + "loss": 0.902, + "step": 12470 + }, + { + "epoch": 1.15, + "learning_rate": 4.426312402316816e-05, + "loss": 0.9901, + "step": 12480 + }, + { + "epoch": 1.15, + "learning_rate": 4.425852716741749e-05, + "loss": 0.8414, + "step": 12490 + }, + { + "epoch": 1.15, + "learning_rate": 4.4253930311666825e-05, + "loss": 0.9299, + "step": 12500 + }, + { + "epoch": 1.15, + "learning_rate": 4.4249333455916155e-05, + "loss": 0.9569, + "step": 12510 + }, + { + "epoch": 1.15, + "learning_rate": 4.4244736600165485e-05, + "loss": 0.9398, + "step": 12520 + }, + { + "epoch": 1.15, + "learning_rate": 4.424013974441482e-05, + "loss": 0.8658, + "step": 12530 + }, + { + "epoch": 1.15, + "learning_rate": 4.423554288866416e-05, + "loss": 0.9272, + "step": 12540 + }, + { + "epoch": 1.15, + "learning_rate": 4.423094603291349e-05, + "loss": 0.9907, + "step": 12550 + }, + { + "epoch": 1.15, + "learning_rate": 4.4226349177162824e-05, + "loss": 0.9393, + "step": 12560 + }, + { + "epoch": 1.16, + "learning_rate": 4.422175232141216e-05, + "loss": 0.8531, + "step": 12570 + }, + { + "epoch": 1.16, + "learning_rate": 4.421715546566149e-05, + "loss": 0.7897, + "step": 12580 + }, + { + "epoch": 1.16, + "learning_rate": 4.4212558609910826e-05, + "loss": 0.9248, + "step": 12590 + }, + { + "epoch": 1.16, + "learning_rate": 4.4207961754160156e-05, + "loss": 0.9084, + "step": 12600 + }, + { + "epoch": 1.16, + "learning_rate": 4.4203364898409486e-05, + "loss": 0.9582, + "step": 12610 + }, + { + "epoch": 1.16, + "learning_rate": 4.419876804265882e-05, + "loss": 0.9514, + "step": 12620 + }, + { + "epoch": 1.16, + "learning_rate": 4.419417118690816e-05, + "loss": 0.959, + "step": 12630 + }, + { + "epoch": 1.16, + "learning_rate": 4.418957433115749e-05, + "loss": 0.9548, + "step": 12640 + }, + { + "epoch": 1.16, + "learning_rate": 4.4184977475406825e-05, + "loss": 0.8033, + "step": 12650 + }, + { + "epoch": 1.16, + "learning_rate": 4.418038061965616e-05, + "loss": 0.8173, + "step": 12660 + }, + { + "epoch": 1.16, + "learning_rate": 4.417578376390549e-05, + "loss": 0.8038, + "step": 12670 + }, + { + "epoch": 1.17, + "learning_rate": 4.417118690815483e-05, + "loss": 0.8897, + "step": 12680 + }, + { + "epoch": 1.17, + "learning_rate": 4.416659005240416e-05, + "loss": 0.975, + "step": 12690 + }, + { + "epoch": 1.17, + "learning_rate": 4.416199319665349e-05, + "loss": 0.7838, + "step": 12700 + }, + { + "epoch": 1.17, + "learning_rate": 4.4157396340902824e-05, + "loss": 0.97, + "step": 12710 + }, + { + "epoch": 1.17, + "learning_rate": 4.415279948515216e-05, + "loss": 0.8997, + "step": 12720 + }, + { + "epoch": 1.17, + "learning_rate": 4.414820262940149e-05, + "loss": 1.0767, + "step": 12730 + }, + { + "epoch": 1.17, + "learning_rate": 4.4143605773650826e-05, + "loss": 0.8626, + "step": 12740 + }, + { + "epoch": 1.17, + "learning_rate": 4.413900891790016e-05, + "loss": 0.9489, + "step": 12750 + }, + { + "epoch": 1.17, + "learning_rate": 4.413441206214949e-05, + "loss": 1.0657, + "step": 12760 + }, + { + "epoch": 1.17, + "learning_rate": 4.412981520639883e-05, + "loss": 0.8751, + "step": 12770 + }, + { + "epoch": 1.17, + "learning_rate": 4.412521835064816e-05, + "loss": 0.8295, + "step": 12780 + }, + { + "epoch": 1.18, + "learning_rate": 4.412062149489749e-05, + "loss": 0.953, + "step": 12790 + }, + { + "epoch": 1.18, + "learning_rate": 4.4116024639146825e-05, + "loss": 0.9167, + "step": 12800 + }, + { + "epoch": 1.18, + "learning_rate": 4.411142778339616e-05, + "loss": 0.8756, + "step": 12810 + }, + { + "epoch": 1.18, + "learning_rate": 4.410683092764549e-05, + "loss": 0.8434, + "step": 12820 + }, + { + "epoch": 1.18, + "learning_rate": 4.410223407189483e-05, + "loss": 0.9078, + "step": 12830 + }, + { + "epoch": 1.18, + "learning_rate": 4.4097637216144164e-05, + "loss": 1.0064, + "step": 12840 + }, + { + "epoch": 1.18, + "learning_rate": 4.4093040360393494e-05, + "loss": 0.9519, + "step": 12850 + }, + { + "epoch": 1.18, + "learning_rate": 4.408844350464283e-05, + "loss": 0.9043, + "step": 12860 + }, + { + "epoch": 1.18, + "learning_rate": 4.408384664889216e-05, + "loss": 0.921, + "step": 12870 + }, + { + "epoch": 1.18, + "learning_rate": 4.407924979314149e-05, + "loss": 0.9402, + "step": 12880 + }, + { + "epoch": 1.19, + "learning_rate": 4.4074652937390826e-05, + "loss": 0.8604, + "step": 12890 + }, + { + "epoch": 1.19, + "learning_rate": 4.407005608164016e-05, + "loss": 0.8605, + "step": 12900 + }, + { + "epoch": 1.19, + "learning_rate": 4.406545922588949e-05, + "loss": 0.9168, + "step": 12910 + }, + { + "epoch": 1.19, + "learning_rate": 4.406086237013883e-05, + "loss": 0.953, + "step": 12920 + }, + { + "epoch": 1.19, + "learning_rate": 4.4056265514388165e-05, + "loss": 0.9079, + "step": 12930 + }, + { + "epoch": 1.19, + "learning_rate": 4.4051668658637495e-05, + "loss": 0.9988, + "step": 12940 + }, + { + "epoch": 1.19, + "learning_rate": 4.404707180288683e-05, + "loss": 0.9193, + "step": 12950 + }, + { + "epoch": 1.19, + "learning_rate": 4.404247494713616e-05, + "loss": 0.9079, + "step": 12960 + }, + { + "epoch": 1.19, + "learning_rate": 4.403787809138549e-05, + "loss": 0.9074, + "step": 12970 + }, + { + "epoch": 1.19, + "learning_rate": 4.403328123563483e-05, + "loss": 0.9233, + "step": 12980 + }, + { + "epoch": 1.19, + "learning_rate": 4.4028684379884164e-05, + "loss": 0.9063, + "step": 12990 + }, + { + "epoch": 1.2, + "learning_rate": 4.402408752413349e-05, + "loss": 1.0383, + "step": 13000 + }, + { + "epoch": 1.2, + "learning_rate": 4.401949066838283e-05, + "loss": 0.905, + "step": 13010 + }, + { + "epoch": 1.2, + "learning_rate": 4.4014893812632166e-05, + "loss": 0.856, + "step": 13020 + }, + { + "epoch": 1.2, + "learning_rate": 4.4010296956881496e-05, + "loss": 0.8404, + "step": 13030 + }, + { + "epoch": 1.2, + "learning_rate": 4.400570010113083e-05, + "loss": 0.8839, + "step": 13040 + }, + { + "epoch": 1.2, + "learning_rate": 4.400110324538016e-05, + "loss": 0.8266, + "step": 13050 + }, + { + "epoch": 1.2, + "learning_rate": 4.399650638962949e-05, + "loss": 0.9644, + "step": 13060 + }, + { + "epoch": 1.2, + "learning_rate": 4.399190953387883e-05, + "loss": 0.9381, + "step": 13070 + }, + { + "epoch": 1.2, + "learning_rate": 4.3987312678128165e-05, + "loss": 0.8116, + "step": 13080 + }, + { + "epoch": 1.2, + "learning_rate": 4.3982715822377495e-05, + "loss": 0.9633, + "step": 13090 + }, + { + "epoch": 1.2, + "learning_rate": 4.397811896662683e-05, + "loss": 0.917, + "step": 13100 + }, + { + "epoch": 1.21, + "learning_rate": 4.397352211087617e-05, + "loss": 0.9176, + "step": 13110 + }, + { + "epoch": 1.21, + "learning_rate": 4.39689252551255e-05, + "loss": 0.9243, + "step": 13120 + }, + { + "epoch": 1.21, + "learning_rate": 4.3964328399374834e-05, + "loss": 0.8715, + "step": 13130 + }, + { + "epoch": 1.21, + "learning_rate": 4.395973154362416e-05, + "loss": 0.9165, + "step": 13140 + }, + { + "epoch": 1.21, + "learning_rate": 4.395513468787349e-05, + "loss": 1.0029, + "step": 13150 + }, + { + "epoch": 1.21, + "learning_rate": 4.395053783212283e-05, + "loss": 1.018, + "step": 13160 + }, + { + "epoch": 1.21, + "learning_rate": 4.3945940976372166e-05, + "loss": 0.9339, + "step": 13170 + }, + { + "epoch": 1.21, + "learning_rate": 4.3941344120621496e-05, + "loss": 0.9509, + "step": 13180 + }, + { + "epoch": 1.21, + "learning_rate": 4.393674726487083e-05, + "loss": 0.9241, + "step": 13190 + }, + { + "epoch": 1.21, + "learning_rate": 4.393215040912016e-05, + "loss": 0.7836, + "step": 13200 + }, + { + "epoch": 1.21, + "learning_rate": 4.39275535533695e-05, + "loss": 0.8981, + "step": 13210 + }, + { + "epoch": 1.22, + "learning_rate": 4.3922956697618835e-05, + "loss": 0.9004, + "step": 13220 + }, + { + "epoch": 1.22, + "learning_rate": 4.3918359841868165e-05, + "loss": 0.8525, + "step": 13230 + }, + { + "epoch": 1.22, + "learning_rate": 4.3913762986117494e-05, + "loss": 0.9162, + "step": 13240 + }, + { + "epoch": 1.22, + "learning_rate": 4.390916613036683e-05, + "loss": 0.8821, + "step": 13250 + }, + { + "epoch": 1.22, + "learning_rate": 4.390456927461617e-05, + "loss": 0.9015, + "step": 13260 + }, + { + "epoch": 1.22, + "learning_rate": 4.38999724188655e-05, + "loss": 0.9268, + "step": 13270 + }, + { + "epoch": 1.22, + "learning_rate": 4.3895375563114833e-05, + "loss": 0.883, + "step": 13280 + }, + { + "epoch": 1.22, + "learning_rate": 4.389077870736416e-05, + "loss": 0.8715, + "step": 13290 + }, + { + "epoch": 1.22, + "learning_rate": 4.38861818516135e-05, + "loss": 0.8849, + "step": 13300 + }, + { + "epoch": 1.22, + "learning_rate": 4.3881584995862836e-05, + "loss": 0.8913, + "step": 13310 + }, + { + "epoch": 1.22, + "learning_rate": 4.3876988140112166e-05, + "loss": 0.8227, + "step": 13320 + }, + { + "epoch": 1.23, + "learning_rate": 4.3872391284361495e-05, + "loss": 0.9277, + "step": 13330 + }, + { + "epoch": 1.23, + "learning_rate": 4.386779442861083e-05, + "loss": 1.0101, + "step": 13340 + }, + { + "epoch": 1.23, + "learning_rate": 4.386319757286017e-05, + "loss": 0.8516, + "step": 13350 + }, + { + "epoch": 1.23, + "learning_rate": 4.38586007171095e-05, + "loss": 0.8958, + "step": 13360 + }, + { + "epoch": 1.23, + "learning_rate": 4.3854003861358835e-05, + "loss": 0.9194, + "step": 13370 + }, + { + "epoch": 1.23, + "learning_rate": 4.3849407005608164e-05, + "loss": 0.8234, + "step": 13380 + }, + { + "epoch": 1.23, + "learning_rate": 4.38448101498575e-05, + "loss": 0.9105, + "step": 13390 + }, + { + "epoch": 1.23, + "learning_rate": 4.384021329410684e-05, + "loss": 0.908, + "step": 13400 + }, + { + "epoch": 1.23, + "learning_rate": 4.383561643835617e-05, + "loss": 0.8807, + "step": 13410 + }, + { + "epoch": 1.23, + "learning_rate": 4.38310195826055e-05, + "loss": 1.0101, + "step": 13420 + }, + { + "epoch": 1.23, + "learning_rate": 4.382642272685483e-05, + "loss": 0.8633, + "step": 13430 + }, + { + "epoch": 1.24, + "learning_rate": 4.382182587110417e-05, + "loss": 0.9458, + "step": 13440 + }, + { + "epoch": 1.24, + "learning_rate": 4.38172290153535e-05, + "loss": 0.9018, + "step": 13450 + }, + { + "epoch": 1.24, + "learning_rate": 4.3812632159602836e-05, + "loss": 0.9383, + "step": 13460 + }, + { + "epoch": 1.24, + "learning_rate": 4.3808035303852165e-05, + "loss": 0.938, + "step": 13470 + }, + { + "epoch": 1.24, + "learning_rate": 4.38034384481015e-05, + "loss": 0.9473, + "step": 13480 + }, + { + "epoch": 1.24, + "learning_rate": 4.379884159235084e-05, + "loss": 0.8779, + "step": 13490 + }, + { + "epoch": 1.24, + "learning_rate": 4.379424473660017e-05, + "loss": 0.9166, + "step": 13500 + }, + { + "epoch": 1.24, + "learning_rate": 4.37896478808495e-05, + "loss": 0.9089, + "step": 13510 + }, + { + "epoch": 1.24, + "learning_rate": 4.3785051025098834e-05, + "loss": 0.9371, + "step": 13520 + }, + { + "epoch": 1.24, + "learning_rate": 4.3780454169348164e-05, + "loss": 0.9999, + "step": 13530 + }, + { + "epoch": 1.24, + "learning_rate": 4.37758573135975e-05, + "loss": 0.8423, + "step": 13540 + }, + { + "epoch": 1.25, + "learning_rate": 4.377126045784684e-05, + "loss": 0.9486, + "step": 13550 + }, + { + "epoch": 1.25, + "learning_rate": 4.376666360209617e-05, + "loss": 0.8699, + "step": 13560 + }, + { + "epoch": 1.25, + "learning_rate": 4.37620667463455e-05, + "loss": 0.9271, + "step": 13570 + }, + { + "epoch": 1.25, + "learning_rate": 4.375746989059484e-05, + "loss": 0.9672, + "step": 13580 + }, + { + "epoch": 1.25, + "learning_rate": 4.375287303484417e-05, + "loss": 0.9595, + "step": 13590 + }, + { + "epoch": 1.25, + "learning_rate": 4.37482761790935e-05, + "loss": 0.8653, + "step": 13600 + }, + { + "epoch": 1.25, + "learning_rate": 4.3743679323342836e-05, + "loss": 0.8884, + "step": 13610 + }, + { + "epoch": 1.25, + "learning_rate": 4.3739082467592165e-05, + "loss": 0.9457, + "step": 13620 + }, + { + "epoch": 1.25, + "learning_rate": 4.37344856118415e-05, + "loss": 0.92, + "step": 13630 + }, + { + "epoch": 1.25, + "learning_rate": 4.372988875609084e-05, + "loss": 0.9542, + "step": 13640 + }, + { + "epoch": 1.25, + "learning_rate": 4.372529190034017e-05, + "loss": 0.9206, + "step": 13650 + }, + { + "epoch": 1.26, + "learning_rate": 4.3720695044589504e-05, + "loss": 1.0014, + "step": 13660 + }, + { + "epoch": 1.26, + "learning_rate": 4.371609818883884e-05, + "loss": 0.803, + "step": 13670 + }, + { + "epoch": 1.26, + "learning_rate": 4.371150133308817e-05, + "loss": 0.8779, + "step": 13680 + }, + { + "epoch": 1.26, + "learning_rate": 4.37069044773375e-05, + "loss": 0.8641, + "step": 13690 + }, + { + "epoch": 1.26, + "learning_rate": 4.370230762158684e-05, + "loss": 1.034, + "step": 13700 + }, + { + "epoch": 1.26, + "learning_rate": 4.3697710765836166e-05, + "loss": 0.9457, + "step": 13710 + }, + { + "epoch": 1.26, + "learning_rate": 4.36931139100855e-05, + "loss": 0.898, + "step": 13720 + }, + { + "epoch": 1.26, + "learning_rate": 4.368851705433484e-05, + "loss": 0.9683, + "step": 13730 + }, + { + "epoch": 1.26, + "learning_rate": 4.368392019858417e-05, + "loss": 0.9281, + "step": 13740 + }, + { + "epoch": 1.26, + "learning_rate": 4.3679323342833506e-05, + "loss": 0.8917, + "step": 13750 + }, + { + "epoch": 1.27, + "learning_rate": 4.367472648708284e-05, + "loss": 0.9395, + "step": 13760 + }, + { + "epoch": 1.27, + "learning_rate": 4.367012963133217e-05, + "loss": 0.9146, + "step": 13770 + }, + { + "epoch": 1.27, + "learning_rate": 4.36655327755815e-05, + "loss": 0.9294, + "step": 13780 + }, + { + "epoch": 1.27, + "learning_rate": 4.366093591983084e-05, + "loss": 0.8161, + "step": 13790 + }, + { + "epoch": 1.27, + "learning_rate": 4.365633906408017e-05, + "loss": 0.8931, + "step": 13800 + }, + { + "epoch": 1.27, + "learning_rate": 4.3651742208329504e-05, + "loss": 0.9322, + "step": 13810 + }, + { + "epoch": 1.27, + "learning_rate": 4.364714535257884e-05, + "loss": 0.7893, + "step": 13820 + }, + { + "epoch": 1.27, + "learning_rate": 4.364254849682817e-05, + "loss": 1.0173, + "step": 13830 + }, + { + "epoch": 1.27, + "learning_rate": 4.363795164107751e-05, + "loss": 0.7748, + "step": 13840 + }, + { + "epoch": 1.27, + "learning_rate": 4.363335478532684e-05, + "loss": 0.8631, + "step": 13850 + }, + { + "epoch": 1.27, + "learning_rate": 4.3628757929576166e-05, + "loss": 0.9148, + "step": 13860 + }, + { + "epoch": 1.28, + "learning_rate": 4.36241610738255e-05, + "loss": 0.8311, + "step": 13870 + }, + { + "epoch": 1.28, + "learning_rate": 4.361956421807484e-05, + "loss": 0.8963, + "step": 13880 + }, + { + "epoch": 1.28, + "learning_rate": 4.361496736232417e-05, + "loss": 0.8973, + "step": 13890 + }, + { + "epoch": 1.28, + "learning_rate": 4.3610370506573505e-05, + "loss": 0.8653, + "step": 13900 + }, + { + "epoch": 1.28, + "learning_rate": 4.360577365082284e-05, + "loss": 0.9264, + "step": 13910 + }, + { + "epoch": 1.28, + "learning_rate": 4.360117679507217e-05, + "loss": 0.9279, + "step": 13920 + }, + { + "epoch": 1.28, + "learning_rate": 4.359657993932151e-05, + "loss": 0.8864, + "step": 13930 + }, + { + "epoch": 1.28, + "learning_rate": 4.3591983083570844e-05, + "loss": 0.9464, + "step": 13940 + }, + { + "epoch": 1.28, + "learning_rate": 4.358738622782017e-05, + "loss": 0.9862, + "step": 13950 + }, + { + "epoch": 1.28, + "learning_rate": 4.3582789372069504e-05, + "loss": 0.8883, + "step": 13960 + }, + { + "epoch": 1.28, + "learning_rate": 4.357819251631884e-05, + "loss": 0.8961, + "step": 13970 + }, + { + "epoch": 1.29, + "learning_rate": 4.357359566056817e-05, + "loss": 0.9038, + "step": 13980 + }, + { + "epoch": 1.29, + "learning_rate": 4.3568998804817506e-05, + "loss": 0.8216, + "step": 13990 + }, + { + "epoch": 1.29, + "learning_rate": 4.356440194906684e-05, + "loss": 0.9365, + "step": 14000 + }, + { + "epoch": 1.29, + "learning_rate": 4.355980509331617e-05, + "loss": 0.9689, + "step": 14010 + }, + { + "epoch": 1.29, + "learning_rate": 4.355520823756551e-05, + "loss": 0.9553, + "step": 14020 + }, + { + "epoch": 1.29, + "learning_rate": 4.3550611381814846e-05, + "loss": 0.95, + "step": 14030 + }, + { + "epoch": 1.29, + "learning_rate": 4.354601452606417e-05, + "loss": 0.9088, + "step": 14040 + }, + { + "epoch": 1.29, + "learning_rate": 4.3541417670313505e-05, + "loss": 0.9147, + "step": 14050 + }, + { + "epoch": 1.29, + "learning_rate": 4.353682081456284e-05, + "loss": 0.9576, + "step": 14060 + }, + { + "epoch": 1.29, + "learning_rate": 4.353222395881217e-05, + "loss": 0.9446, + "step": 14070 + }, + { + "epoch": 1.29, + "learning_rate": 4.352762710306151e-05, + "loss": 0.9137, + "step": 14080 + }, + { + "epoch": 1.3, + "learning_rate": 4.3523030247310844e-05, + "loss": 0.8809, + "step": 14090 + }, + { + "epoch": 1.3, + "learning_rate": 4.3518433391560174e-05, + "loss": 0.7567, + "step": 14100 + }, + { + "epoch": 1.3, + "learning_rate": 4.351383653580951e-05, + "loss": 0.8678, + "step": 14110 + }, + { + "epoch": 1.3, + "learning_rate": 4.350923968005885e-05, + "loss": 0.9753, + "step": 14120 + }, + { + "epoch": 1.3, + "learning_rate": 4.350464282430817e-05, + "loss": 0.9888, + "step": 14130 + }, + { + "epoch": 1.3, + "learning_rate": 4.3500045968557506e-05, + "loss": 0.944, + "step": 14140 + }, + { + "epoch": 1.3, + "learning_rate": 4.349544911280684e-05, + "loss": 0.9114, + "step": 14150 + }, + { + "epoch": 1.3, + "learning_rate": 4.349085225705617e-05, + "loss": 0.8941, + "step": 14160 + }, + { + "epoch": 1.3, + "learning_rate": 4.348625540130551e-05, + "loss": 0.9293, + "step": 14170 + }, + { + "epoch": 1.3, + "learning_rate": 4.3481658545554845e-05, + "loss": 0.9976, + "step": 14180 + }, + { + "epoch": 1.3, + "learning_rate": 4.3477061689804175e-05, + "loss": 0.8883, + "step": 14190 + }, + { + "epoch": 1.31, + "learning_rate": 4.347246483405351e-05, + "loss": 0.9261, + "step": 14200 + }, + { + "epoch": 1.31, + "learning_rate": 4.346786797830285e-05, + "loss": 0.8913, + "step": 14210 + }, + { + "epoch": 1.31, + "learning_rate": 4.346327112255217e-05, + "loss": 0.8615, + "step": 14220 + }, + { + "epoch": 1.31, + "learning_rate": 4.345867426680151e-05, + "loss": 0.9476, + "step": 14230 + }, + { + "epoch": 1.31, + "learning_rate": 4.3454077411050844e-05, + "loss": 0.7502, + "step": 14240 + }, + { + "epoch": 1.31, + "learning_rate": 4.3449480555300174e-05, + "loss": 0.9278, + "step": 14250 + }, + { + "epoch": 1.31, + "learning_rate": 4.344488369954951e-05, + "loss": 0.9364, + "step": 14260 + }, + { + "epoch": 1.31, + "learning_rate": 4.3440286843798847e-05, + "loss": 0.9017, + "step": 14270 + }, + { + "epoch": 1.31, + "learning_rate": 4.3435689988048176e-05, + "loss": 0.9878, + "step": 14280 + }, + { + "epoch": 1.31, + "learning_rate": 4.343109313229751e-05, + "loss": 0.9065, + "step": 14290 + }, + { + "epoch": 1.31, + "learning_rate": 4.342649627654685e-05, + "loss": 0.9126, + "step": 14300 + }, + { + "epoch": 1.32, + "learning_rate": 4.342189942079617e-05, + "loss": 0.9353, + "step": 14310 + }, + { + "epoch": 1.32, + "learning_rate": 4.341730256504551e-05, + "loss": 0.8782, + "step": 14320 + }, + { + "epoch": 1.32, + "learning_rate": 4.3412705709294845e-05, + "loss": 0.9248, + "step": 14330 + }, + { + "epoch": 1.32, + "learning_rate": 4.3408108853544175e-05, + "loss": 0.8857, + "step": 14340 + }, + { + "epoch": 1.32, + "learning_rate": 4.340351199779351e-05, + "loss": 0.8787, + "step": 14350 + }, + { + "epoch": 1.32, + "learning_rate": 4.339891514204285e-05, + "loss": 0.9467, + "step": 14360 + }, + { + "epoch": 1.32, + "learning_rate": 4.339431828629218e-05, + "loss": 0.9918, + "step": 14370 + }, + { + "epoch": 1.32, + "learning_rate": 4.3389721430541514e-05, + "loss": 0.9271, + "step": 14380 + }, + { + "epoch": 1.32, + "learning_rate": 4.338512457479085e-05, + "loss": 0.9298, + "step": 14390 + }, + { + "epoch": 1.32, + "learning_rate": 4.338052771904017e-05, + "loss": 1.0416, + "step": 14400 + }, + { + "epoch": 1.32, + "learning_rate": 4.337593086328951e-05, + "loss": 0.9614, + "step": 14410 + }, + { + "epoch": 1.33, + "learning_rate": 4.3371334007538846e-05, + "loss": 0.9721, + "step": 14420 + }, + { + "epoch": 1.33, + "learning_rate": 4.3366737151788176e-05, + "loss": 0.8874, + "step": 14430 + }, + { + "epoch": 1.33, + "learning_rate": 4.336214029603751e-05, + "loss": 0.8861, + "step": 14440 + }, + { + "epoch": 1.33, + "learning_rate": 4.335754344028685e-05, + "loss": 0.9264, + "step": 14450 + }, + { + "epoch": 1.33, + "learning_rate": 4.335294658453618e-05, + "loss": 0.9003, + "step": 14460 + }, + { + "epoch": 1.33, + "learning_rate": 4.3348349728785515e-05, + "loss": 0.8595, + "step": 14470 + }, + { + "epoch": 1.33, + "learning_rate": 4.334375287303485e-05, + "loss": 0.8879, + "step": 14480 + }, + { + "epoch": 1.33, + "learning_rate": 4.3339156017284175e-05, + "loss": 0.9585, + "step": 14490 + }, + { + "epoch": 1.33, + "learning_rate": 4.333455916153351e-05, + "loss": 0.8967, + "step": 14500 + }, + { + "epoch": 1.33, + "learning_rate": 4.332996230578285e-05, + "loss": 0.8466, + "step": 14510 + }, + { + "epoch": 1.33, + "learning_rate": 4.332536545003218e-05, + "loss": 0.8826, + "step": 14520 + }, + { + "epoch": 1.34, + "learning_rate": 4.3320768594281514e-05, + "loss": 0.9375, + "step": 14530 + }, + { + "epoch": 1.34, + "learning_rate": 4.331617173853085e-05, + "loss": 0.8456, + "step": 14540 + }, + { + "epoch": 1.34, + "learning_rate": 4.331157488278018e-05, + "loss": 0.8641, + "step": 14550 + }, + { + "epoch": 1.34, + "learning_rate": 4.3306978027029516e-05, + "loss": 0.9631, + "step": 14560 + }, + { + "epoch": 1.34, + "learning_rate": 4.330238117127885e-05, + "loss": 0.8837, + "step": 14570 + }, + { + "epoch": 1.34, + "learning_rate": 4.3297784315528176e-05, + "loss": 0.9343, + "step": 14580 + }, + { + "epoch": 1.34, + "learning_rate": 4.329318745977751e-05, + "loss": 0.9878, + "step": 14590 + }, + { + "epoch": 1.34, + "learning_rate": 4.328859060402685e-05, + "loss": 0.9439, + "step": 14600 + }, + { + "epoch": 1.34, + "learning_rate": 4.328399374827618e-05, + "loss": 0.8315, + "step": 14610 + }, + { + "epoch": 1.34, + "learning_rate": 4.3279396892525515e-05, + "loss": 0.9415, + "step": 14620 + }, + { + "epoch": 1.35, + "learning_rate": 4.327480003677485e-05, + "loss": 0.8391, + "step": 14630 + }, + { + "epoch": 1.35, + "learning_rate": 4.327020318102418e-05, + "loss": 0.875, + "step": 14640 + }, + { + "epoch": 1.35, + "learning_rate": 4.326560632527352e-05, + "loss": 0.9985, + "step": 14650 + }, + { + "epoch": 1.35, + "learning_rate": 4.3261009469522854e-05, + "loss": 0.8753, + "step": 14660 + }, + { + "epoch": 1.35, + "learning_rate": 4.325641261377218e-05, + "loss": 0.9666, + "step": 14670 + }, + { + "epoch": 1.35, + "learning_rate": 4.325181575802151e-05, + "loss": 0.9287, + "step": 14680 + }, + { + "epoch": 1.35, + "learning_rate": 4.324721890227085e-05, + "loss": 0.9222, + "step": 14690 + }, + { + "epoch": 1.35, + "learning_rate": 4.324262204652018e-05, + "loss": 0.9128, + "step": 14700 + }, + { + "epoch": 1.35, + "learning_rate": 4.3238025190769516e-05, + "loss": 0.906, + "step": 14710 + }, + { + "epoch": 1.35, + "learning_rate": 4.323342833501885e-05, + "loss": 0.9607, + "step": 14720 + }, + { + "epoch": 1.35, + "learning_rate": 4.322883147926818e-05, + "loss": 1.0337, + "step": 14730 + }, + { + "epoch": 1.36, + "learning_rate": 4.322423462351752e-05, + "loss": 0.8988, + "step": 14740 + }, + { + "epoch": 1.36, + "learning_rate": 4.3219637767766855e-05, + "loss": 0.9229, + "step": 14750 + }, + { + "epoch": 1.36, + "learning_rate": 4.321504091201618e-05, + "loss": 0.9016, + "step": 14760 + }, + { + "epoch": 1.36, + "learning_rate": 4.3210444056265515e-05, + "loss": 0.9899, + "step": 14770 + }, + { + "epoch": 1.36, + "learning_rate": 4.320584720051485e-05, + "loss": 0.8438, + "step": 14780 + }, + { + "epoch": 1.36, + "learning_rate": 4.320125034476418e-05, + "loss": 0.9742, + "step": 14790 + }, + { + "epoch": 1.36, + "learning_rate": 4.319665348901352e-05, + "loss": 0.8879, + "step": 14800 + }, + { + "epoch": 1.36, + "learning_rate": 4.3192056633262854e-05, + "loss": 0.9296, + "step": 14810 + }, + { + "epoch": 1.36, + "learning_rate": 4.3187459777512183e-05, + "loss": 0.9947, + "step": 14820 + }, + { + "epoch": 1.36, + "learning_rate": 4.318286292176152e-05, + "loss": 0.8589, + "step": 14830 + }, + { + "epoch": 1.36, + "learning_rate": 4.317826606601085e-05, + "loss": 0.9452, + "step": 14840 + }, + { + "epoch": 1.37, + "learning_rate": 4.317366921026018e-05, + "loss": 0.7949, + "step": 14850 + }, + { + "epoch": 1.37, + "learning_rate": 4.3169072354509516e-05, + "loss": 0.8965, + "step": 14860 + }, + { + "epoch": 1.37, + "learning_rate": 4.316447549875885e-05, + "loss": 1.0258, + "step": 14870 + }, + { + "epoch": 1.37, + "learning_rate": 4.315987864300818e-05, + "loss": 0.8049, + "step": 14880 + }, + { + "epoch": 1.37, + "learning_rate": 4.315528178725752e-05, + "loss": 0.8688, + "step": 14890 + }, + { + "epoch": 1.37, + "learning_rate": 4.3150684931506855e-05, + "loss": 0.8445, + "step": 14900 + }, + { + "epoch": 1.37, + "learning_rate": 4.3146088075756185e-05, + "loss": 0.8768, + "step": 14910 + }, + { + "epoch": 1.37, + "learning_rate": 4.314149122000552e-05, + "loss": 0.9154, + "step": 14920 + }, + { + "epoch": 1.37, + "learning_rate": 4.313689436425485e-05, + "loss": 0.7818, + "step": 14930 + }, + { + "epoch": 1.37, + "learning_rate": 4.313229750850418e-05, + "loss": 0.9111, + "step": 14940 + }, + { + "epoch": 1.37, + "learning_rate": 4.312770065275352e-05, + "loss": 0.8537, + "step": 14950 + }, + { + "epoch": 1.38, + "learning_rate": 4.3123103797002853e-05, + "loss": 0.9774, + "step": 14960 + }, + { + "epoch": 1.38, + "learning_rate": 4.311850694125218e-05, + "loss": 1.0547, + "step": 14970 + }, + { + "epoch": 1.38, + "learning_rate": 4.311391008550152e-05, + "loss": 0.9624, + "step": 14980 + }, + { + "epoch": 1.38, + "learning_rate": 4.3109313229750856e-05, + "loss": 0.9212, + "step": 14990 + }, + { + "epoch": 1.38, + "learning_rate": 4.3104716374000186e-05, + "loss": 0.998, + "step": 15000 + }, + { + "epoch": 1.38, + "eval_accuracy": 0.5524017467248908, + "eval_loss": 0.9208794832229614, + "eval_runtime": 159.7479, + "eval_samples_per_second": 28.67, + "eval_steps_per_second": 3.587, + "step": 15000 + }, + { + "epoch": 1.38, + "learning_rate": 4.310011951824952e-05, + "loss": 0.8646, + "step": 15010 + }, + { + "epoch": 1.38, + "learning_rate": 4.309552266249885e-05, + "loss": 1.0139, + "step": 15020 + }, + { + "epoch": 1.38, + "learning_rate": 4.309092580674818e-05, + "loss": 0.9508, + "step": 15030 + }, + { + "epoch": 1.38, + "learning_rate": 4.308632895099752e-05, + "loss": 0.9612, + "step": 15040 + }, + { + "epoch": 1.38, + "learning_rate": 4.3081732095246855e-05, + "loss": 0.8929, + "step": 15050 + }, + { + "epoch": 1.38, + "learning_rate": 4.3077135239496184e-05, + "loss": 0.9945, + "step": 15060 + }, + { + "epoch": 1.39, + "learning_rate": 4.307253838374552e-05, + "loss": 0.8415, + "step": 15070 + }, + { + "epoch": 1.39, + "learning_rate": 4.306794152799486e-05, + "loss": 0.8619, + "step": 15080 + }, + { + "epoch": 1.39, + "learning_rate": 4.306334467224419e-05, + "loss": 0.8453, + "step": 15090 + }, + { + "epoch": 1.39, + "learning_rate": 4.3058747816493523e-05, + "loss": 0.8524, + "step": 15100 + }, + { + "epoch": 1.39, + "learning_rate": 4.305415096074285e-05, + "loss": 0.9199, + "step": 15110 + }, + { + "epoch": 1.39, + "learning_rate": 4.304955410499218e-05, + "loss": 0.9933, + "step": 15120 + }, + { + "epoch": 1.39, + "learning_rate": 4.304495724924152e-05, + "loss": 0.8525, + "step": 15130 + }, + { + "epoch": 1.39, + "learning_rate": 4.3040360393490856e-05, + "loss": 0.8448, + "step": 15140 + }, + { + "epoch": 1.39, + "learning_rate": 4.3035763537740186e-05, + "loss": 0.8921, + "step": 15150 + }, + { + "epoch": 1.39, + "learning_rate": 4.303116668198952e-05, + "loss": 0.9722, + "step": 15160 + }, + { + "epoch": 1.39, + "learning_rate": 4.302656982623885e-05, + "loss": 0.8379, + "step": 15170 + }, + { + "epoch": 1.4, + "learning_rate": 4.302197297048819e-05, + "loss": 0.917, + "step": 15180 + }, + { + "epoch": 1.4, + "learning_rate": 4.3017376114737525e-05, + "loss": 1.0581, + "step": 15190 + }, + { + "epoch": 1.4, + "learning_rate": 4.3012779258986854e-05, + "loss": 0.9983, + "step": 15200 + }, + { + "epoch": 1.4, + "learning_rate": 4.3008182403236184e-05, + "loss": 0.8928, + "step": 15210 + }, + { + "epoch": 1.4, + "learning_rate": 4.300358554748552e-05, + "loss": 0.8413, + "step": 15220 + }, + { + "epoch": 1.4, + "learning_rate": 4.299898869173486e-05, + "loss": 0.8468, + "step": 15230 + }, + { + "epoch": 1.4, + "learning_rate": 4.299439183598419e-05, + "loss": 0.8105, + "step": 15240 + }, + { + "epoch": 1.4, + "learning_rate": 4.298979498023352e-05, + "loss": 0.8733, + "step": 15250 + }, + { + "epoch": 1.4, + "learning_rate": 4.298519812448285e-05, + "loss": 0.8697, + "step": 15260 + }, + { + "epoch": 1.4, + "learning_rate": 4.298060126873219e-05, + "loss": 0.9559, + "step": 15270 + }, + { + "epoch": 1.4, + "learning_rate": 4.2976004412981526e-05, + "loss": 0.8271, + "step": 15280 + }, + { + "epoch": 1.41, + "learning_rate": 4.2971407557230856e-05, + "loss": 0.9252, + "step": 15290 + }, + { + "epoch": 1.41, + "learning_rate": 4.296681070148019e-05, + "loss": 0.9227, + "step": 15300 + }, + { + "epoch": 1.41, + "learning_rate": 4.296221384572952e-05, + "loss": 0.877, + "step": 15310 + }, + { + "epoch": 1.41, + "learning_rate": 4.295761698997886e-05, + "loss": 0.8812, + "step": 15320 + }, + { + "epoch": 1.41, + "learning_rate": 4.295302013422819e-05, + "loss": 0.995, + "step": 15330 + }, + { + "epoch": 1.41, + "learning_rate": 4.2948423278477524e-05, + "loss": 0.8399, + "step": 15340 + }, + { + "epoch": 1.41, + "learning_rate": 4.2943826422726854e-05, + "loss": 0.7986, + "step": 15350 + }, + { + "epoch": 1.41, + "learning_rate": 4.293922956697619e-05, + "loss": 1.0293, + "step": 15360 + }, + { + "epoch": 1.41, + "learning_rate": 4.293463271122553e-05, + "loss": 0.9551, + "step": 15370 + }, + { + "epoch": 1.41, + "learning_rate": 4.293003585547486e-05, + "loss": 0.8944, + "step": 15380 + }, + { + "epoch": 1.41, + "learning_rate": 4.292543899972419e-05, + "loss": 0.9954, + "step": 15390 + }, + { + "epoch": 1.42, + "learning_rate": 4.292084214397352e-05, + "loss": 0.9534, + "step": 15400 + }, + { + "epoch": 1.42, + "learning_rate": 4.291624528822286e-05, + "loss": 0.9199, + "step": 15410 + }, + { + "epoch": 1.42, + "learning_rate": 4.291164843247219e-05, + "loss": 0.9045, + "step": 15420 + }, + { + "epoch": 1.42, + "learning_rate": 4.2907051576721526e-05, + "loss": 0.868, + "step": 15430 + }, + { + "epoch": 1.42, + "learning_rate": 4.2902454720970855e-05, + "loss": 0.8492, + "step": 15440 + }, + { + "epoch": 1.42, + "learning_rate": 4.289785786522019e-05, + "loss": 0.9878, + "step": 15450 + }, + { + "epoch": 1.42, + "learning_rate": 4.289326100946953e-05, + "loss": 0.8968, + "step": 15460 + }, + { + "epoch": 1.42, + "learning_rate": 4.288866415371886e-05, + "loss": 0.9121, + "step": 15470 + }, + { + "epoch": 1.42, + "learning_rate": 4.2884067297968194e-05, + "loss": 0.9234, + "step": 15480 + }, + { + "epoch": 1.42, + "learning_rate": 4.2879470442217524e-05, + "loss": 0.8812, + "step": 15490 + }, + { + "epoch": 1.43, + "learning_rate": 4.2874873586466854e-05, + "loss": 0.9316, + "step": 15500 + }, + { + "epoch": 1.43, + "learning_rate": 4.287027673071619e-05, + "loss": 0.9155, + "step": 15510 + }, + { + "epoch": 1.43, + "learning_rate": 4.286567987496553e-05, + "loss": 0.9605, + "step": 15520 + }, + { + "epoch": 1.43, + "learning_rate": 4.2861083019214857e-05, + "loss": 0.887, + "step": 15530 + }, + { + "epoch": 1.43, + "learning_rate": 4.285648616346419e-05, + "loss": 0.959, + "step": 15540 + }, + { + "epoch": 1.43, + "learning_rate": 4.285188930771353e-05, + "loss": 0.9795, + "step": 15550 + }, + { + "epoch": 1.43, + "learning_rate": 4.284729245196286e-05, + "loss": 0.9638, + "step": 15560 + }, + { + "epoch": 1.43, + "learning_rate": 4.2842695596212196e-05, + "loss": 0.8615, + "step": 15570 + }, + { + "epoch": 1.43, + "learning_rate": 4.2838098740461525e-05, + "loss": 0.9224, + "step": 15580 + }, + { + "epoch": 1.43, + "learning_rate": 4.2833501884710855e-05, + "loss": 0.845, + "step": 15590 + }, + { + "epoch": 1.43, + "learning_rate": 4.282890502896019e-05, + "loss": 0.9, + "step": 15600 + }, + { + "epoch": 1.44, + "learning_rate": 4.282430817320953e-05, + "loss": 0.8797, + "step": 15610 + }, + { + "epoch": 1.44, + "learning_rate": 4.281971131745886e-05, + "loss": 0.841, + "step": 15620 + }, + { + "epoch": 1.44, + "learning_rate": 4.2815114461708194e-05, + "loss": 0.9366, + "step": 15630 + }, + { + "epoch": 1.44, + "learning_rate": 4.281051760595753e-05, + "loss": 0.8984, + "step": 15640 + }, + { + "epoch": 1.44, + "learning_rate": 4.280592075020686e-05, + "loss": 0.8837, + "step": 15650 + }, + { + "epoch": 1.44, + "learning_rate": 4.28013238944562e-05, + "loss": 0.8772, + "step": 15660 + }, + { + "epoch": 1.44, + "learning_rate": 4.2796727038705527e-05, + "loss": 0.8479, + "step": 15670 + }, + { + "epoch": 1.44, + "learning_rate": 4.2792130182954856e-05, + "loss": 0.9712, + "step": 15680 + }, + { + "epoch": 1.44, + "learning_rate": 4.278753332720419e-05, + "loss": 0.8305, + "step": 15690 + }, + { + "epoch": 1.44, + "learning_rate": 4.278293647145353e-05, + "loss": 0.8753, + "step": 15700 + }, + { + "epoch": 1.44, + "learning_rate": 4.277833961570286e-05, + "loss": 0.8624, + "step": 15710 + }, + { + "epoch": 1.45, + "learning_rate": 4.2773742759952195e-05, + "loss": 0.8033, + "step": 15720 + }, + { + "epoch": 1.45, + "learning_rate": 4.276914590420153e-05, + "loss": 0.9228, + "step": 15730 + }, + { + "epoch": 1.45, + "learning_rate": 4.276454904845086e-05, + "loss": 0.902, + "step": 15740 + }, + { + "epoch": 1.45, + "learning_rate": 4.27599521927002e-05, + "loss": 0.9774, + "step": 15750 + }, + { + "epoch": 1.45, + "learning_rate": 4.275535533694953e-05, + "loss": 0.8696, + "step": 15760 + }, + { + "epoch": 1.45, + "learning_rate": 4.275075848119886e-05, + "loss": 0.9166, + "step": 15770 + }, + { + "epoch": 1.45, + "learning_rate": 4.2746161625448194e-05, + "loss": 0.9399, + "step": 15780 + }, + { + "epoch": 1.45, + "learning_rate": 4.274156476969753e-05, + "loss": 0.8976, + "step": 15790 + }, + { + "epoch": 1.45, + "learning_rate": 4.273696791394686e-05, + "loss": 0.8523, + "step": 15800 + }, + { + "epoch": 1.45, + "learning_rate": 4.2732371058196197e-05, + "loss": 0.9551, + "step": 15810 + }, + { + "epoch": 1.45, + "learning_rate": 4.272777420244553e-05, + "loss": 0.9317, + "step": 15820 + }, + { + "epoch": 1.46, + "learning_rate": 4.272317734669486e-05, + "loss": 0.9672, + "step": 15830 + }, + { + "epoch": 1.46, + "learning_rate": 4.27185804909442e-05, + "loss": 0.913, + "step": 15840 + }, + { + "epoch": 1.46, + "learning_rate": 4.271398363519353e-05, + "loss": 0.9159, + "step": 15850 + }, + { + "epoch": 1.46, + "learning_rate": 4.270938677944286e-05, + "loss": 0.9109, + "step": 15860 + }, + { + "epoch": 1.46, + "learning_rate": 4.2704789923692195e-05, + "loss": 0.868, + "step": 15870 + }, + { + "epoch": 1.46, + "learning_rate": 4.270019306794153e-05, + "loss": 0.8959, + "step": 15880 + }, + { + "epoch": 1.46, + "learning_rate": 4.269559621219086e-05, + "loss": 0.8121, + "step": 15890 + }, + { + "epoch": 1.46, + "learning_rate": 4.26909993564402e-05, + "loss": 1.002, + "step": 15900 + }, + { + "epoch": 1.46, + "learning_rate": 4.2686402500689534e-05, + "loss": 0.8285, + "step": 15910 + }, + { + "epoch": 1.46, + "learning_rate": 4.2681805644938864e-05, + "loss": 0.9122, + "step": 15920 + }, + { + "epoch": 1.46, + "learning_rate": 4.26772087891882e-05, + "loss": 0.9703, + "step": 15930 + }, + { + "epoch": 1.47, + "learning_rate": 4.267261193343753e-05, + "loss": 0.9006, + "step": 15940 + }, + { + "epoch": 1.47, + "learning_rate": 4.266801507768686e-05, + "loss": 0.9382, + "step": 15950 + }, + { + "epoch": 1.47, + "learning_rate": 4.2663418221936196e-05, + "loss": 0.828, + "step": 15960 + }, + { + "epoch": 1.47, + "learning_rate": 4.265882136618553e-05, + "loss": 0.8313, + "step": 15970 + }, + { + "epoch": 1.47, + "learning_rate": 4.265422451043486e-05, + "loss": 0.7578, + "step": 15980 + }, + { + "epoch": 1.47, + "learning_rate": 4.26496276546842e-05, + "loss": 0.9251, + "step": 15990 + }, + { + "epoch": 1.47, + "learning_rate": 4.2645030798933535e-05, + "loss": 0.9256, + "step": 16000 + }, + { + "epoch": 1.47, + "learning_rate": 4.2640433943182865e-05, + "loss": 0.9624, + "step": 16010 + }, + { + "epoch": 1.47, + "learning_rate": 4.26358370874322e-05, + "loss": 1.0759, + "step": 16020 + }, + { + "epoch": 1.47, + "learning_rate": 4.263124023168153e-05, + "loss": 0.8425, + "step": 16030 + }, + { + "epoch": 1.47, + "learning_rate": 4.262664337593086e-05, + "loss": 1.0333, + "step": 16040 + }, + { + "epoch": 1.48, + "learning_rate": 4.26220465201802e-05, + "loss": 0.8345, + "step": 16050 + }, + { + "epoch": 1.48, + "learning_rate": 4.2617449664429534e-05, + "loss": 0.919, + "step": 16060 + }, + { + "epoch": 1.48, + "learning_rate": 4.2612852808678864e-05, + "loss": 0.8761, + "step": 16070 + }, + { + "epoch": 1.48, + "learning_rate": 4.26082559529282e-05, + "loss": 0.8714, + "step": 16080 + }, + { + "epoch": 1.48, + "learning_rate": 4.260365909717754e-05, + "loss": 0.9486, + "step": 16090 + }, + { + "epoch": 1.48, + "learning_rate": 4.2599062241426866e-05, + "loss": 0.8847, + "step": 16100 + }, + { + "epoch": 1.48, + "learning_rate": 4.25944653856762e-05, + "loss": 0.9233, + "step": 16110 + }, + { + "epoch": 1.48, + "learning_rate": 4.258986852992553e-05, + "loss": 0.9652, + "step": 16120 + }, + { + "epoch": 1.48, + "learning_rate": 4.258527167417486e-05, + "loss": 0.9564, + "step": 16130 + }, + { + "epoch": 1.48, + "learning_rate": 4.25806748184242e-05, + "loss": 0.9893, + "step": 16140 + }, + { + "epoch": 1.48, + "learning_rate": 4.2576077962673535e-05, + "loss": 0.9195, + "step": 16150 + }, + { + "epoch": 1.49, + "learning_rate": 4.2571481106922865e-05, + "loss": 0.9592, + "step": 16160 + }, + { + "epoch": 1.49, + "learning_rate": 4.25668842511722e-05, + "loss": 1.0199, + "step": 16170 + }, + { + "epoch": 1.49, + "learning_rate": 4.256228739542154e-05, + "loss": 0.8361, + "step": 16180 + }, + { + "epoch": 1.49, + "learning_rate": 4.255769053967087e-05, + "loss": 1.0159, + "step": 16190 + }, + { + "epoch": 1.49, + "learning_rate": 4.2553093683920204e-05, + "loss": 1.0119, + "step": 16200 + }, + { + "epoch": 1.49, + "learning_rate": 4.2548496828169534e-05, + "loss": 0.8967, + "step": 16210 + }, + { + "epoch": 1.49, + "learning_rate": 4.2543899972418863e-05, + "loss": 0.9577, + "step": 16220 + }, + { + "epoch": 1.49, + "learning_rate": 4.25393031166682e-05, + "loss": 1.0021, + "step": 16230 + }, + { + "epoch": 1.49, + "learning_rate": 4.2534706260917536e-05, + "loss": 0.9336, + "step": 16240 + }, + { + "epoch": 1.49, + "learning_rate": 4.2530109405166866e-05, + "loss": 0.8832, + "step": 16250 + }, + { + "epoch": 1.49, + "learning_rate": 4.25255125494162e-05, + "loss": 0.9232, + "step": 16260 + }, + { + "epoch": 1.5, + "learning_rate": 4.252091569366554e-05, + "loss": 0.8358, + "step": 16270 + }, + { + "epoch": 1.5, + "learning_rate": 4.251631883791487e-05, + "loss": 0.8668, + "step": 16280 + }, + { + "epoch": 1.5, + "learning_rate": 4.2511721982164205e-05, + "loss": 1.0168, + "step": 16290 + }, + { + "epoch": 1.5, + "learning_rate": 4.2507125126413535e-05, + "loss": 0.9167, + "step": 16300 + }, + { + "epoch": 1.5, + "learning_rate": 4.2502528270662865e-05, + "loss": 0.7962, + "step": 16310 + }, + { + "epoch": 1.5, + "learning_rate": 4.24979314149122e-05, + "loss": 0.838, + "step": 16320 + }, + { + "epoch": 1.5, + "learning_rate": 4.249333455916154e-05, + "loss": 0.8936, + "step": 16330 + }, + { + "epoch": 1.5, + "learning_rate": 4.248873770341087e-05, + "loss": 0.8534, + "step": 16340 + }, + { + "epoch": 1.5, + "learning_rate": 4.2484140847660204e-05, + "loss": 0.8666, + "step": 16350 + }, + { + "epoch": 1.5, + "learning_rate": 4.247954399190954e-05, + "loss": 0.9402, + "step": 16360 + }, + { + "epoch": 1.51, + "learning_rate": 4.247494713615887e-05, + "loss": 0.9416, + "step": 16370 + }, + { + "epoch": 1.51, + "learning_rate": 4.2470350280408206e-05, + "loss": 0.8823, + "step": 16380 + }, + { + "epoch": 1.51, + "learning_rate": 4.2465753424657536e-05, + "loss": 0.9079, + "step": 16390 + }, + { + "epoch": 1.51, + "learning_rate": 4.2461156568906866e-05, + "loss": 0.8404, + "step": 16400 + }, + { + "epoch": 1.51, + "learning_rate": 4.24565597131562e-05, + "loss": 0.9094, + "step": 16410 + }, + { + "epoch": 1.51, + "learning_rate": 4.245196285740554e-05, + "loss": 0.9025, + "step": 16420 + }, + { + "epoch": 1.51, + "learning_rate": 4.244736600165487e-05, + "loss": 0.8279, + "step": 16430 + }, + { + "epoch": 1.51, + "learning_rate": 4.2442769145904205e-05, + "loss": 1.0834, + "step": 16440 + }, + { + "epoch": 1.51, + "learning_rate": 4.243817229015354e-05, + "loss": 0.8816, + "step": 16450 + }, + { + "epoch": 1.51, + "learning_rate": 4.243357543440287e-05, + "loss": 0.9011, + "step": 16460 + }, + { + "epoch": 1.51, + "learning_rate": 4.242897857865221e-05, + "loss": 0.8143, + "step": 16470 + }, + { + "epoch": 1.52, + "learning_rate": 4.242438172290154e-05, + "loss": 1.018, + "step": 16480 + }, + { + "epoch": 1.52, + "learning_rate": 4.241978486715087e-05, + "loss": 0.8854, + "step": 16490 + }, + { + "epoch": 1.52, + "learning_rate": 4.2415188011400203e-05, + "loss": 0.9413, + "step": 16500 + }, + { + "epoch": 1.52, + "learning_rate": 4.241059115564954e-05, + "loss": 0.902, + "step": 16510 + }, + { + "epoch": 1.52, + "learning_rate": 4.240599429989887e-05, + "loss": 0.8677, + "step": 16520 + }, + { + "epoch": 1.52, + "learning_rate": 4.2401397444148206e-05, + "loss": 0.9347, + "step": 16530 + }, + { + "epoch": 1.52, + "learning_rate": 4.239680058839754e-05, + "loss": 0.8232, + "step": 16540 + }, + { + "epoch": 1.52, + "learning_rate": 4.239220373264687e-05, + "loss": 0.8357, + "step": 16550 + }, + { + "epoch": 1.52, + "learning_rate": 4.238760687689621e-05, + "loss": 0.745, + "step": 16560 + }, + { + "epoch": 1.52, + "learning_rate": 4.238301002114554e-05, + "loss": 0.7559, + "step": 16570 + }, + { + "epoch": 1.52, + "learning_rate": 4.237841316539487e-05, + "loss": 0.8659, + "step": 16580 + }, + { + "epoch": 1.53, + "learning_rate": 4.2373816309644205e-05, + "loss": 0.9684, + "step": 16590 + }, + { + "epoch": 1.53, + "learning_rate": 4.236921945389354e-05, + "loss": 0.8996, + "step": 16600 + }, + { + "epoch": 1.53, + "learning_rate": 4.236462259814287e-05, + "loss": 0.7936, + "step": 16610 + }, + { + "epoch": 1.53, + "learning_rate": 4.236002574239221e-05, + "loss": 0.8113, + "step": 16620 + }, + { + "epoch": 1.53, + "learning_rate": 4.2355428886641544e-05, + "loss": 0.9122, + "step": 16630 + }, + { + "epoch": 1.53, + "learning_rate": 4.2350832030890874e-05, + "loss": 0.9304, + "step": 16640 + }, + { + "epoch": 1.53, + "learning_rate": 4.234623517514021e-05, + "loss": 1.011, + "step": 16650 + }, + { + "epoch": 1.53, + "learning_rate": 4.234163831938954e-05, + "loss": 0.764, + "step": 16660 + }, + { + "epoch": 1.53, + "learning_rate": 4.233704146363887e-05, + "loss": 1.0059, + "step": 16670 + }, + { + "epoch": 1.53, + "learning_rate": 4.2332444607888206e-05, + "loss": 0.8689, + "step": 16680 + }, + { + "epoch": 1.53, + "learning_rate": 4.232784775213754e-05, + "loss": 0.9827, + "step": 16690 + }, + { + "epoch": 1.54, + "learning_rate": 4.232325089638687e-05, + "loss": 0.8285, + "step": 16700 + }, + { + "epoch": 1.54, + "learning_rate": 4.231865404063621e-05, + "loss": 0.9238, + "step": 16710 + }, + { + "epoch": 1.54, + "learning_rate": 4.231405718488554e-05, + "loss": 0.9087, + "step": 16720 + }, + { + "epoch": 1.54, + "learning_rate": 4.2309460329134875e-05, + "loss": 0.9439, + "step": 16730 + }, + { + "epoch": 1.54, + "learning_rate": 4.230486347338421e-05, + "loss": 0.9504, + "step": 16740 + }, + { + "epoch": 1.54, + "learning_rate": 4.230026661763354e-05, + "loss": 0.9417, + "step": 16750 + }, + { + "epoch": 1.54, + "learning_rate": 4.229566976188287e-05, + "loss": 0.9474, + "step": 16760 + }, + { + "epoch": 1.54, + "learning_rate": 4.229107290613221e-05, + "loss": 0.8691, + "step": 16770 + }, + { + "epoch": 1.54, + "learning_rate": 4.2286476050381544e-05, + "loss": 0.8645, + "step": 16780 + }, + { + "epoch": 1.54, + "learning_rate": 4.228187919463087e-05, + "loss": 0.9543, + "step": 16790 + }, + { + "epoch": 1.54, + "learning_rate": 4.227728233888021e-05, + "loss": 0.9068, + "step": 16800 + }, + { + "epoch": 1.55, + "learning_rate": 4.227268548312954e-05, + "loss": 0.8251, + "step": 16810 + }, + { + "epoch": 1.55, + "learning_rate": 4.2268088627378876e-05, + "loss": 0.9839, + "step": 16820 + }, + { + "epoch": 1.55, + "learning_rate": 4.226349177162821e-05, + "loss": 0.9694, + "step": 16830 + }, + { + "epoch": 1.55, + "learning_rate": 4.225889491587754e-05, + "loss": 0.9232, + "step": 16840 + }, + { + "epoch": 1.55, + "learning_rate": 4.225429806012687e-05, + "loss": 0.9777, + "step": 16850 + }, + { + "epoch": 1.55, + "learning_rate": 4.224970120437621e-05, + "loss": 0.8524, + "step": 16860 + }, + { + "epoch": 1.55, + "learning_rate": 4.2245104348625545e-05, + "loss": 0.9519, + "step": 16870 + }, + { + "epoch": 1.55, + "learning_rate": 4.2240507492874874e-05, + "loss": 0.919, + "step": 16880 + }, + { + "epoch": 1.55, + "learning_rate": 4.223591063712421e-05, + "loss": 0.945, + "step": 16890 + }, + { + "epoch": 1.55, + "learning_rate": 4.223131378137354e-05, + "loss": 0.8519, + "step": 16900 + }, + { + "epoch": 1.55, + "learning_rate": 4.222671692562288e-05, + "loss": 0.8355, + "step": 16910 + }, + { + "epoch": 1.56, + "learning_rate": 4.2222120069872214e-05, + "loss": 0.8547, + "step": 16920 + }, + { + "epoch": 1.56, + "learning_rate": 4.221752321412154e-05, + "loss": 0.915, + "step": 16930 + }, + { + "epoch": 1.56, + "learning_rate": 4.221292635837087e-05, + "loss": 1.0217, + "step": 16940 + }, + { + "epoch": 1.56, + "learning_rate": 4.220832950262021e-05, + "loss": 0.9823, + "step": 16950 + }, + { + "epoch": 1.56, + "learning_rate": 4.2203732646869546e-05, + "loss": 0.8066, + "step": 16960 + }, + { + "epoch": 1.56, + "learning_rate": 4.2199135791118876e-05, + "loss": 0.8725, + "step": 16970 + }, + { + "epoch": 1.56, + "learning_rate": 4.219453893536821e-05, + "loss": 0.8298, + "step": 16980 + }, + { + "epoch": 1.56, + "learning_rate": 4.218994207961754e-05, + "loss": 0.8755, + "step": 16990 + }, + { + "epoch": 1.56, + "learning_rate": 4.218534522386688e-05, + "loss": 0.8106, + "step": 17000 + }, + { + "epoch": 1.56, + "learning_rate": 4.2180748368116215e-05, + "loss": 0.9167, + "step": 17010 + }, + { + "epoch": 1.56, + "learning_rate": 4.2176151512365544e-05, + "loss": 0.8081, + "step": 17020 + }, + { + "epoch": 1.57, + "learning_rate": 4.2171554656614874e-05, + "loss": 0.8725, + "step": 17030 + }, + { + "epoch": 1.57, + "learning_rate": 4.216695780086421e-05, + "loss": 0.9977, + "step": 17040 + }, + { + "epoch": 1.57, + "learning_rate": 4.216236094511354e-05, + "loss": 0.9981, + "step": 17050 + }, + { + "epoch": 1.57, + "learning_rate": 4.215776408936288e-05, + "loss": 0.9359, + "step": 17060 + }, + { + "epoch": 1.57, + "learning_rate": 4.215316723361221e-05, + "loss": 0.9377, + "step": 17070 + }, + { + "epoch": 1.57, + "learning_rate": 4.214857037786154e-05, + "loss": 0.8336, + "step": 17080 + }, + { + "epoch": 1.57, + "learning_rate": 4.214397352211088e-05, + "loss": 0.8486, + "step": 17090 + }, + { + "epoch": 1.57, + "learning_rate": 4.2139376666360216e-05, + "loss": 0.9608, + "step": 17100 + }, + { + "epoch": 1.57, + "learning_rate": 4.2134779810609546e-05, + "loss": 0.9246, + "step": 17110 + }, + { + "epoch": 1.57, + "learning_rate": 4.2130182954858875e-05, + "loss": 0.8587, + "step": 17120 + }, + { + "epoch": 1.57, + "learning_rate": 4.212558609910821e-05, + "loss": 0.9502, + "step": 17130 + }, + { + "epoch": 1.58, + "learning_rate": 4.212098924335754e-05, + "loss": 0.8688, + "step": 17140 + }, + { + "epoch": 1.58, + "learning_rate": 4.211639238760688e-05, + "loss": 0.9838, + "step": 17150 + }, + { + "epoch": 1.58, + "learning_rate": 4.2111795531856214e-05, + "loss": 0.8754, + "step": 17160 + }, + { + "epoch": 1.58, + "learning_rate": 4.2107198676105544e-05, + "loss": 0.8662, + "step": 17170 + }, + { + "epoch": 1.58, + "learning_rate": 4.210260182035488e-05, + "loss": 0.9795, + "step": 17180 + }, + { + "epoch": 1.58, + "learning_rate": 4.209800496460422e-05, + "loss": 0.9245, + "step": 17190 + }, + { + "epoch": 1.58, + "learning_rate": 4.209340810885355e-05, + "loss": 0.876, + "step": 17200 + }, + { + "epoch": 1.58, + "learning_rate": 4.2088811253102877e-05, + "loss": 0.8579, + "step": 17210 + }, + { + "epoch": 1.58, + "learning_rate": 4.208421439735221e-05, + "loss": 0.9625, + "step": 17220 + }, + { + "epoch": 1.58, + "learning_rate": 4.207961754160154e-05, + "loss": 0.9723, + "step": 17230 + }, + { + "epoch": 1.58, + "learning_rate": 4.207502068585088e-05, + "loss": 0.8758, + "step": 17240 + }, + { + "epoch": 1.59, + "learning_rate": 4.2070423830100216e-05, + "loss": 0.9691, + "step": 17250 + }, + { + "epoch": 1.59, + "learning_rate": 4.2065826974349545e-05, + "loss": 0.9131, + "step": 17260 + }, + { + "epoch": 1.59, + "learning_rate": 4.206123011859888e-05, + "loss": 0.8931, + "step": 17270 + }, + { + "epoch": 1.59, + "learning_rate": 4.205663326284822e-05, + "loss": 0.8679, + "step": 17280 + }, + { + "epoch": 1.59, + "learning_rate": 4.205203640709755e-05, + "loss": 0.8929, + "step": 17290 + }, + { + "epoch": 1.59, + "learning_rate": 4.204743955134688e-05, + "loss": 0.8302, + "step": 17300 + }, + { + "epoch": 1.59, + "learning_rate": 4.2042842695596214e-05, + "loss": 0.9526, + "step": 17310 + }, + { + "epoch": 1.59, + "learning_rate": 4.2038245839845544e-05, + "loss": 1.0688, + "step": 17320 + }, + { + "epoch": 1.59, + "learning_rate": 4.203364898409488e-05, + "loss": 0.9432, + "step": 17330 + }, + { + "epoch": 1.59, + "learning_rate": 4.202905212834422e-05, + "loss": 0.928, + "step": 17340 + }, + { + "epoch": 1.6, + "learning_rate": 4.2024455272593547e-05, + "loss": 0.954, + "step": 17350 + }, + { + "epoch": 1.6, + "learning_rate": 4.201985841684288e-05, + "loss": 0.8691, + "step": 17360 + }, + { + "epoch": 1.6, + "learning_rate": 4.201526156109222e-05, + "loss": 0.8669, + "step": 17370 + }, + { + "epoch": 1.6, + "learning_rate": 4.201066470534155e-05, + "loss": 0.9199, + "step": 17380 + }, + { + "epoch": 1.6, + "learning_rate": 4.200606784959088e-05, + "loss": 0.7568, + "step": 17390 + }, + { + "epoch": 1.6, + "learning_rate": 4.2001470993840215e-05, + "loss": 0.8344, + "step": 17400 + }, + { + "epoch": 1.6, + "learning_rate": 4.1996874138089545e-05, + "loss": 0.6882, + "step": 17410 + }, + { + "epoch": 1.6, + "learning_rate": 4.199227728233888e-05, + "loss": 0.8338, + "step": 17420 + }, + { + "epoch": 1.6, + "learning_rate": 4.198768042658822e-05, + "loss": 0.9393, + "step": 17430 + }, + { + "epoch": 1.6, + "learning_rate": 4.198308357083755e-05, + "loss": 0.9644, + "step": 17440 + }, + { + "epoch": 1.6, + "learning_rate": 4.1978486715086884e-05, + "loss": 0.8726, + "step": 17450 + }, + { + "epoch": 1.61, + "learning_rate": 4.197388985933622e-05, + "loss": 0.8877, + "step": 17460 + }, + { + "epoch": 1.61, + "learning_rate": 4.1969293003585544e-05, + "loss": 0.9628, + "step": 17470 + }, + { + "epoch": 1.61, + "learning_rate": 4.196469614783488e-05, + "loss": 0.9693, + "step": 17480 + }, + { + "epoch": 1.61, + "learning_rate": 4.196009929208422e-05, + "loss": 0.9652, + "step": 17490 + }, + { + "epoch": 1.61, + "learning_rate": 4.1955502436333546e-05, + "loss": 0.8493, + "step": 17500 + }, + { + "epoch": 1.61, + "learning_rate": 4.195090558058288e-05, + "loss": 0.8823, + "step": 17510 + }, + { + "epoch": 1.61, + "learning_rate": 4.194630872483222e-05, + "loss": 0.8977, + "step": 17520 + }, + { + "epoch": 1.61, + "learning_rate": 4.194171186908155e-05, + "loss": 0.7897, + "step": 17530 + }, + { + "epoch": 1.61, + "learning_rate": 4.1937115013330885e-05, + "loss": 0.8328, + "step": 17540 + }, + { + "epoch": 1.61, + "learning_rate": 4.193251815758022e-05, + "loss": 0.8911, + "step": 17550 + }, + { + "epoch": 1.61, + "learning_rate": 4.1927921301829545e-05, + "loss": 0.9267, + "step": 17560 + }, + { + "epoch": 1.62, + "learning_rate": 4.192332444607888e-05, + "loss": 0.9596, + "step": 17570 + }, + { + "epoch": 1.62, + "learning_rate": 4.191872759032822e-05, + "loss": 1.0027, + "step": 17580 + }, + { + "epoch": 1.62, + "learning_rate": 4.191413073457755e-05, + "loss": 0.8186, + "step": 17590 + }, + { + "epoch": 1.62, + "learning_rate": 4.1909533878826884e-05, + "loss": 0.7928, + "step": 17600 + }, + { + "epoch": 1.62, + "learning_rate": 4.190493702307622e-05, + "loss": 0.7637, + "step": 17610 + }, + { + "epoch": 1.62, + "learning_rate": 4.190034016732555e-05, + "loss": 0.9459, + "step": 17620 + }, + { + "epoch": 1.62, + "learning_rate": 4.189574331157489e-05, + "loss": 0.9556, + "step": 17630 + }, + { + "epoch": 1.62, + "learning_rate": 4.189114645582422e-05, + "loss": 0.9608, + "step": 17640 + }, + { + "epoch": 1.62, + "learning_rate": 4.1886549600073546e-05, + "loss": 0.9274, + "step": 17650 + }, + { + "epoch": 1.62, + "learning_rate": 4.188195274432288e-05, + "loss": 1.0087, + "step": 17660 + }, + { + "epoch": 1.62, + "learning_rate": 4.187735588857222e-05, + "loss": 0.8451, + "step": 17670 + }, + { + "epoch": 1.63, + "learning_rate": 4.187275903282155e-05, + "loss": 1.0028, + "step": 17680 + }, + { + "epoch": 1.63, + "learning_rate": 4.1868162177070885e-05, + "loss": 0.8954, + "step": 17690 + }, + { + "epoch": 1.63, + "learning_rate": 4.186356532132022e-05, + "loss": 0.9926, + "step": 17700 + }, + { + "epoch": 1.63, + "learning_rate": 4.185896846556955e-05, + "loss": 0.8621, + "step": 17710 + }, + { + "epoch": 1.63, + "learning_rate": 4.185437160981889e-05, + "loss": 0.822, + "step": 17720 + }, + { + "epoch": 1.63, + "learning_rate": 4.1849774754068224e-05, + "loss": 0.8925, + "step": 17730 + }, + { + "epoch": 1.63, + "learning_rate": 4.184517789831755e-05, + "loss": 0.9776, + "step": 17740 + }, + { + "epoch": 1.63, + "learning_rate": 4.1840581042566884e-05, + "loss": 0.9626, + "step": 17750 + }, + { + "epoch": 1.63, + "learning_rate": 4.183598418681622e-05, + "loss": 0.8438, + "step": 17760 + }, + { + "epoch": 1.63, + "learning_rate": 4.183138733106555e-05, + "loss": 0.9135, + "step": 17770 + }, + { + "epoch": 1.63, + "learning_rate": 4.1826790475314886e-05, + "loss": 0.8661, + "step": 17780 + }, + { + "epoch": 1.64, + "learning_rate": 4.182219361956422e-05, + "loss": 0.8731, + "step": 17790 + }, + { + "epoch": 1.64, + "learning_rate": 4.181759676381355e-05, + "loss": 0.8615, + "step": 17800 + }, + { + "epoch": 1.64, + "learning_rate": 4.181299990806289e-05, + "loss": 0.8019, + "step": 17810 + }, + { + "epoch": 1.64, + "learning_rate": 4.1808403052312226e-05, + "loss": 0.9371, + "step": 17820 + }, + { + "epoch": 1.64, + "learning_rate": 4.180380619656155e-05, + "loss": 0.8912, + "step": 17830 + }, + { + "epoch": 1.64, + "learning_rate": 4.1799209340810885e-05, + "loss": 0.9482, + "step": 17840 + }, + { + "epoch": 1.64, + "learning_rate": 4.179461248506022e-05, + "loss": 0.9816, + "step": 17850 + }, + { + "epoch": 1.64, + "learning_rate": 4.179001562930955e-05, + "loss": 1.0005, + "step": 17860 + }, + { + "epoch": 1.64, + "learning_rate": 4.178541877355889e-05, + "loss": 0.8723, + "step": 17870 + }, + { + "epoch": 1.64, + "learning_rate": 4.1780821917808224e-05, + "loss": 0.8992, + "step": 17880 + }, + { + "epoch": 1.64, + "learning_rate": 4.1776225062057554e-05, + "loss": 0.9304, + "step": 17890 + }, + { + "epoch": 1.65, + "learning_rate": 4.177162820630689e-05, + "loss": 0.926, + "step": 17900 + }, + { + "epoch": 1.65, + "learning_rate": 4.176703135055623e-05, + "loss": 0.7975, + "step": 17910 + }, + { + "epoch": 1.65, + "learning_rate": 4.176243449480555e-05, + "loss": 0.8328, + "step": 17920 + }, + { + "epoch": 1.65, + "learning_rate": 4.1757837639054886e-05, + "loss": 1.0241, + "step": 17930 + }, + { + "epoch": 1.65, + "learning_rate": 4.175324078330422e-05, + "loss": 0.8924, + "step": 17940 + }, + { + "epoch": 1.65, + "learning_rate": 4.174864392755355e-05, + "loss": 0.997, + "step": 17950 + }, + { + "epoch": 1.65, + "learning_rate": 4.174404707180289e-05, + "loss": 0.8743, + "step": 17960 + }, + { + "epoch": 1.65, + "learning_rate": 4.1739450216052225e-05, + "loss": 0.814, + "step": 17970 + }, + { + "epoch": 1.65, + "learning_rate": 4.1734853360301555e-05, + "loss": 0.8387, + "step": 17980 + }, + { + "epoch": 1.65, + "learning_rate": 4.173025650455089e-05, + "loss": 0.8864, + "step": 17990 + }, + { + "epoch": 1.65, + "learning_rate": 4.172565964880023e-05, + "loss": 0.8429, + "step": 18000 + }, + { + "epoch": 1.66, + "learning_rate": 4.172106279304955e-05, + "loss": 0.8551, + "step": 18010 + }, + { + "epoch": 1.66, + "learning_rate": 4.171646593729889e-05, + "loss": 0.9071, + "step": 18020 + }, + { + "epoch": 1.66, + "learning_rate": 4.1711869081548224e-05, + "loss": 0.8447, + "step": 18030 + }, + { + "epoch": 1.66, + "learning_rate": 4.1707272225797553e-05, + "loss": 1.0384, + "step": 18040 + }, + { + "epoch": 1.66, + "learning_rate": 4.170267537004689e-05, + "loss": 1.0297, + "step": 18050 + }, + { + "epoch": 1.66, + "learning_rate": 4.1698078514296226e-05, + "loss": 0.9204, + "step": 18060 + }, + { + "epoch": 1.66, + "learning_rate": 4.1693481658545556e-05, + "loss": 0.8407, + "step": 18070 + }, + { + "epoch": 1.66, + "learning_rate": 4.168888480279489e-05, + "loss": 1.0036, + "step": 18080 + }, + { + "epoch": 1.66, + "learning_rate": 4.168428794704423e-05, + "loss": 0.9909, + "step": 18090 + }, + { + "epoch": 1.66, + "learning_rate": 4.167969109129355e-05, + "loss": 0.8865, + "step": 18100 + }, + { + "epoch": 1.66, + "learning_rate": 4.167509423554289e-05, + "loss": 0.915, + "step": 18110 + }, + { + "epoch": 1.67, + "learning_rate": 4.1670497379792225e-05, + "loss": 0.9087, + "step": 18120 + }, + { + "epoch": 1.67, + "learning_rate": 4.1665900524041555e-05, + "loss": 0.9202, + "step": 18130 + }, + { + "epoch": 1.67, + "learning_rate": 4.166130366829089e-05, + "loss": 0.9248, + "step": 18140 + }, + { + "epoch": 1.67, + "learning_rate": 4.165670681254023e-05, + "loss": 0.8786, + "step": 18150 + }, + { + "epoch": 1.67, + "learning_rate": 4.165210995678956e-05, + "loss": 0.9091, + "step": 18160 + }, + { + "epoch": 1.67, + "learning_rate": 4.1647513101038894e-05, + "loss": 0.9167, + "step": 18170 + }, + { + "epoch": 1.67, + "learning_rate": 4.164291624528823e-05, + "loss": 0.9434, + "step": 18180 + }, + { + "epoch": 1.67, + "learning_rate": 4.163831938953755e-05, + "loss": 0.8511, + "step": 18190 + }, + { + "epoch": 1.67, + "learning_rate": 4.163372253378689e-05, + "loss": 0.8831, + "step": 18200 + }, + { + "epoch": 1.67, + "learning_rate": 4.1629125678036226e-05, + "loss": 0.9833, + "step": 18210 + }, + { + "epoch": 1.68, + "learning_rate": 4.1624528822285556e-05, + "loss": 0.9534, + "step": 18220 + }, + { + "epoch": 1.68, + "learning_rate": 4.161993196653489e-05, + "loss": 0.8748, + "step": 18230 + }, + { + "epoch": 1.68, + "learning_rate": 4.161533511078423e-05, + "loss": 1.0637, + "step": 18240 + }, + { + "epoch": 1.68, + "learning_rate": 4.161073825503356e-05, + "loss": 0.9373, + "step": 18250 + }, + { + "epoch": 1.68, + "learning_rate": 4.1606141399282895e-05, + "loss": 0.9656, + "step": 18260 + }, + { + "epoch": 1.68, + "learning_rate": 4.160154454353223e-05, + "loss": 0.8556, + "step": 18270 + }, + { + "epoch": 1.68, + "learning_rate": 4.1596947687781554e-05, + "loss": 0.999, + "step": 18280 + }, + { + "epoch": 1.68, + "learning_rate": 4.159235083203089e-05, + "loss": 0.952, + "step": 18290 + }, + { + "epoch": 1.68, + "learning_rate": 4.158775397628023e-05, + "loss": 0.9284, + "step": 18300 + }, + { + "epoch": 1.68, + "learning_rate": 4.158315712052956e-05, + "loss": 0.8475, + "step": 18310 + }, + { + "epoch": 1.68, + "learning_rate": 4.1578560264778894e-05, + "loss": 0.9138, + "step": 18320 + }, + { + "epoch": 1.69, + "learning_rate": 4.157396340902823e-05, + "loss": 0.841, + "step": 18330 + }, + { + "epoch": 1.69, + "learning_rate": 4.156936655327756e-05, + "loss": 0.8829, + "step": 18340 + }, + { + "epoch": 1.69, + "learning_rate": 4.1564769697526896e-05, + "loss": 0.8014, + "step": 18350 + }, + { + "epoch": 1.69, + "learning_rate": 4.1560172841776226e-05, + "loss": 0.9932, + "step": 18360 + }, + { + "epoch": 1.69, + "learning_rate": 4.1555575986025556e-05, + "loss": 0.8739, + "step": 18370 + }, + { + "epoch": 1.69, + "learning_rate": 4.155097913027489e-05, + "loss": 0.8471, + "step": 18380 + }, + { + "epoch": 1.69, + "learning_rate": 4.154638227452423e-05, + "loss": 0.9335, + "step": 18390 + }, + { + "epoch": 1.69, + "learning_rate": 4.154178541877356e-05, + "loss": 0.8346, + "step": 18400 + }, + { + "epoch": 1.69, + "learning_rate": 4.1537188563022895e-05, + "loss": 0.9154, + "step": 18410 + }, + { + "epoch": 1.69, + "learning_rate": 4.153259170727223e-05, + "loss": 0.9265, + "step": 18420 + }, + { + "epoch": 1.69, + "learning_rate": 4.152799485152156e-05, + "loss": 0.9032, + "step": 18430 + }, + { + "epoch": 1.7, + "learning_rate": 4.15233979957709e-05, + "loss": 0.8824, + "step": 18440 + }, + { + "epoch": 1.7, + "learning_rate": 4.151880114002023e-05, + "loss": 0.9684, + "step": 18450 + }, + { + "epoch": 1.7, + "learning_rate": 4.151420428426956e-05, + "loss": 0.8762, + "step": 18460 + }, + { + "epoch": 1.7, + "learning_rate": 4.150960742851889e-05, + "loss": 0.9358, + "step": 18470 + }, + { + "epoch": 1.7, + "learning_rate": 4.150501057276823e-05, + "loss": 0.8929, + "step": 18480 + }, + { + "epoch": 1.7, + "learning_rate": 4.150041371701756e-05, + "loss": 0.9735, + "step": 18490 + }, + { + "epoch": 1.7, + "learning_rate": 4.1495816861266896e-05, + "loss": 0.8822, + "step": 18500 + }, + { + "epoch": 1.7, + "learning_rate": 4.149122000551623e-05, + "loss": 0.9456, + "step": 18510 + }, + { + "epoch": 1.7, + "learning_rate": 4.148662314976556e-05, + "loss": 0.8862, + "step": 18520 + }, + { + "epoch": 1.7, + "learning_rate": 4.14820262940149e-05, + "loss": 0.9781, + "step": 18530 + }, + { + "epoch": 1.7, + "learning_rate": 4.147742943826423e-05, + "loss": 0.8752, + "step": 18540 + }, + { + "epoch": 1.71, + "learning_rate": 4.147283258251356e-05, + "loss": 0.9195, + "step": 18550 + }, + { + "epoch": 1.71, + "learning_rate": 4.1468235726762894e-05, + "loss": 0.9672, + "step": 18560 + }, + { + "epoch": 1.71, + "learning_rate": 4.146363887101223e-05, + "loss": 0.9318, + "step": 18570 + }, + { + "epoch": 1.71, + "learning_rate": 4.145904201526156e-05, + "loss": 0.9066, + "step": 18580 + }, + { + "epoch": 1.71, + "learning_rate": 4.14544451595109e-05, + "loss": 0.9142, + "step": 18590 + }, + { + "epoch": 1.71, + "learning_rate": 4.1449848303760234e-05, + "loss": 0.9599, + "step": 18600 + }, + { + "epoch": 1.71, + "learning_rate": 4.144525144800956e-05, + "loss": 0.9115, + "step": 18610 + }, + { + "epoch": 1.71, + "learning_rate": 4.14406545922589e-05, + "loss": 0.8883, + "step": 18620 + }, + { + "epoch": 1.71, + "learning_rate": 4.143605773650823e-05, + "loss": 0.8734, + "step": 18630 + }, + { + "epoch": 1.71, + "learning_rate": 4.143146088075756e-05, + "loss": 0.8817, + "step": 18640 + }, + { + "epoch": 1.71, + "learning_rate": 4.1426864025006896e-05, + "loss": 0.8972, + "step": 18650 + }, + { + "epoch": 1.72, + "learning_rate": 4.142226716925623e-05, + "loss": 0.9625, + "step": 18660 + }, + { + "epoch": 1.72, + "learning_rate": 4.141767031350556e-05, + "loss": 0.9883, + "step": 18670 + }, + { + "epoch": 1.72, + "learning_rate": 4.14130734577549e-05, + "loss": 0.9027, + "step": 18680 + }, + { + "epoch": 1.72, + "learning_rate": 4.140847660200423e-05, + "loss": 0.935, + "step": 18690 + }, + { + "epoch": 1.72, + "learning_rate": 4.1403879746253565e-05, + "loss": 0.8578, + "step": 18700 + }, + { + "epoch": 1.72, + "learning_rate": 4.13992828905029e-05, + "loss": 0.8715, + "step": 18710 + }, + { + "epoch": 1.72, + "learning_rate": 4.139468603475223e-05, + "loss": 0.9244, + "step": 18720 + }, + { + "epoch": 1.72, + "learning_rate": 4.139008917900157e-05, + "loss": 0.8628, + "step": 18730 + }, + { + "epoch": 1.72, + "learning_rate": 4.13854923232509e-05, + "loss": 0.943, + "step": 18740 + }, + { + "epoch": 1.72, + "learning_rate": 4.138089546750023e-05, + "loss": 0.8576, + "step": 18750 + }, + { + "epoch": 1.72, + "learning_rate": 4.137629861174956e-05, + "loss": 0.8593, + "step": 18760 + }, + { + "epoch": 1.73, + "learning_rate": 4.13717017559989e-05, + "loss": 0.9834, + "step": 18770 + }, + { + "epoch": 1.73, + "learning_rate": 4.136710490024823e-05, + "loss": 0.8679, + "step": 18780 + }, + { + "epoch": 1.73, + "learning_rate": 4.1362508044497566e-05, + "loss": 0.8944, + "step": 18790 + }, + { + "epoch": 1.73, + "learning_rate": 4.13579111887469e-05, + "loss": 0.9049, + "step": 18800 + }, + { + "epoch": 1.73, + "learning_rate": 4.135331433299623e-05, + "loss": 1.0067, + "step": 18810 + }, + { + "epoch": 1.73, + "learning_rate": 4.134871747724557e-05, + "loss": 0.8506, + "step": 18820 + }, + { + "epoch": 1.73, + "learning_rate": 4.13441206214949e-05, + "loss": 0.8525, + "step": 18830 + }, + { + "epoch": 1.73, + "learning_rate": 4.1339523765744235e-05, + "loss": 1.0159, + "step": 18840 + }, + { + "epoch": 1.73, + "learning_rate": 4.1334926909993564e-05, + "loss": 0.9297, + "step": 18850 + }, + { + "epoch": 1.73, + "learning_rate": 4.13303300542429e-05, + "loss": 0.812, + "step": 18860 + }, + { + "epoch": 1.73, + "learning_rate": 4.132573319849223e-05, + "loss": 0.9888, + "step": 18870 + }, + { + "epoch": 1.74, + "learning_rate": 4.132113634274157e-05, + "loss": 0.7637, + "step": 18880 + }, + { + "epoch": 1.74, + "learning_rate": 4.13165394869909e-05, + "loss": 1.0444, + "step": 18890 + }, + { + "epoch": 1.74, + "learning_rate": 4.131194263124023e-05, + "loss": 0.8955, + "step": 18900 + }, + { + "epoch": 1.74, + "learning_rate": 4.130734577548957e-05, + "loss": 0.8954, + "step": 18910 + }, + { + "epoch": 1.74, + "learning_rate": 4.13027489197389e-05, + "loss": 0.8728, + "step": 18920 + }, + { + "epoch": 1.74, + "learning_rate": 4.1298152063988236e-05, + "loss": 0.897, + "step": 18930 + }, + { + "epoch": 1.74, + "learning_rate": 4.1293555208237565e-05, + "loss": 0.8964, + "step": 18940 + }, + { + "epoch": 1.74, + "learning_rate": 4.12889583524869e-05, + "loss": 0.9353, + "step": 18950 + }, + { + "epoch": 1.74, + "learning_rate": 4.128436149673623e-05, + "loss": 0.9095, + "step": 18960 + }, + { + "epoch": 1.74, + "learning_rate": 4.127976464098557e-05, + "loss": 0.9323, + "step": 18970 + }, + { + "epoch": 1.74, + "learning_rate": 4.1275167785234905e-05, + "loss": 0.898, + "step": 18980 + }, + { + "epoch": 1.75, + "learning_rate": 4.1270570929484234e-05, + "loss": 0.8464, + "step": 18990 + }, + { + "epoch": 1.75, + "learning_rate": 4.126597407373357e-05, + "loss": 0.9086, + "step": 19000 + }, + { + "epoch": 1.75, + "learning_rate": 4.12613772179829e-05, + "loss": 0.9422, + "step": 19010 + }, + { + "epoch": 1.75, + "learning_rate": 4.125678036223223e-05, + "loss": 0.9822, + "step": 19020 + }, + { + "epoch": 1.75, + "learning_rate": 4.125218350648157e-05, + "loss": 0.7724, + "step": 19030 + }, + { + "epoch": 1.75, + "learning_rate": 4.12475866507309e-05, + "loss": 0.9371, + "step": 19040 + }, + { + "epoch": 1.75, + "learning_rate": 4.124298979498023e-05, + "loss": 0.9053, + "step": 19050 + }, + { + "epoch": 1.75, + "learning_rate": 4.123839293922957e-05, + "loss": 0.9016, + "step": 19060 + }, + { + "epoch": 1.75, + "learning_rate": 4.1233796083478906e-05, + "loss": 0.9313, + "step": 19070 + }, + { + "epoch": 1.75, + "learning_rate": 4.1229199227728235e-05, + "loss": 0.917, + "step": 19080 + }, + { + "epoch": 1.76, + "learning_rate": 4.122460237197757e-05, + "loss": 1.0667, + "step": 19090 + }, + { + "epoch": 1.76, + "learning_rate": 4.12200055162269e-05, + "loss": 0.8639, + "step": 19100 + }, + { + "epoch": 1.76, + "learning_rate": 4.121540866047623e-05, + "loss": 1.016, + "step": 19110 + }, + { + "epoch": 1.76, + "learning_rate": 4.121081180472557e-05, + "loss": 0.9181, + "step": 19120 + }, + { + "epoch": 1.76, + "learning_rate": 4.1206214948974904e-05, + "loss": 0.9929, + "step": 19130 + }, + { + "epoch": 1.76, + "learning_rate": 4.1201618093224234e-05, + "loss": 0.8994, + "step": 19140 + }, + { + "epoch": 1.76, + "learning_rate": 4.119702123747357e-05, + "loss": 0.7857, + "step": 19150 + }, + { + "epoch": 1.76, + "learning_rate": 4.119242438172291e-05, + "loss": 0.9072, + "step": 19160 + }, + { + "epoch": 1.76, + "learning_rate": 4.118782752597224e-05, + "loss": 0.903, + "step": 19170 + }, + { + "epoch": 1.76, + "learning_rate": 4.118323067022157e-05, + "loss": 0.9243, + "step": 19180 + }, + { + "epoch": 1.76, + "learning_rate": 4.11786338144709e-05, + "loss": 0.9036, + "step": 19190 + }, + { + "epoch": 1.77, + "learning_rate": 4.117403695872023e-05, + "loss": 0.9438, + "step": 19200 + }, + { + "epoch": 1.77, + "learning_rate": 4.116944010296957e-05, + "loss": 0.839, + "step": 19210 + }, + { + "epoch": 1.77, + "learning_rate": 4.1164843247218906e-05, + "loss": 0.8282, + "step": 19220 + }, + { + "epoch": 1.77, + "learning_rate": 4.1160246391468235e-05, + "loss": 0.9501, + "step": 19230 + }, + { + "epoch": 1.77, + "learning_rate": 4.115564953571757e-05, + "loss": 0.8517, + "step": 19240 + }, + { + "epoch": 1.77, + "learning_rate": 4.115105267996691e-05, + "loss": 0.9707, + "step": 19250 + }, + { + "epoch": 1.77, + "learning_rate": 4.114645582421624e-05, + "loss": 0.8103, + "step": 19260 + }, + { + "epoch": 1.77, + "learning_rate": 4.1141858968465574e-05, + "loss": 0.8902, + "step": 19270 + }, + { + "epoch": 1.77, + "learning_rate": 4.1137262112714904e-05, + "loss": 0.9665, + "step": 19280 + }, + { + "epoch": 1.77, + "learning_rate": 4.1132665256964234e-05, + "loss": 0.925, + "step": 19290 + }, + { + "epoch": 1.77, + "learning_rate": 4.112806840121357e-05, + "loss": 0.8789, + "step": 19300 + }, + { + "epoch": 1.78, + "learning_rate": 4.112347154546291e-05, + "loss": 0.9255, + "step": 19310 + }, + { + "epoch": 1.78, + "learning_rate": 4.1118874689712236e-05, + "loss": 0.806, + "step": 19320 + }, + { + "epoch": 1.78, + "learning_rate": 4.111427783396157e-05, + "loss": 0.8685, + "step": 19330 + }, + { + "epoch": 1.78, + "learning_rate": 4.110968097821091e-05, + "loss": 0.9507, + "step": 19340 + }, + { + "epoch": 1.78, + "learning_rate": 4.110508412246024e-05, + "loss": 0.9108, + "step": 19350 + }, + { + "epoch": 1.78, + "learning_rate": 4.1100487266709576e-05, + "loss": 0.8114, + "step": 19360 + }, + { + "epoch": 1.78, + "learning_rate": 4.1095890410958905e-05, + "loss": 1.0522, + "step": 19370 + }, + { + "epoch": 1.78, + "learning_rate": 4.1091293555208235e-05, + "loss": 0.9294, + "step": 19380 + }, + { + "epoch": 1.78, + "learning_rate": 4.108669669945757e-05, + "loss": 0.84, + "step": 19390 + }, + { + "epoch": 1.78, + "learning_rate": 4.108209984370691e-05, + "loss": 0.8939, + "step": 19400 + }, + { + "epoch": 1.78, + "learning_rate": 4.107750298795624e-05, + "loss": 0.8818, + "step": 19410 + }, + { + "epoch": 1.79, + "learning_rate": 4.1072906132205574e-05, + "loss": 0.9076, + "step": 19420 + }, + { + "epoch": 1.79, + "learning_rate": 4.106830927645491e-05, + "loss": 0.976, + "step": 19430 + }, + { + "epoch": 1.79, + "learning_rate": 4.106371242070424e-05, + "loss": 0.9353, + "step": 19440 + }, + { + "epoch": 1.79, + "learning_rate": 4.105911556495358e-05, + "loss": 0.8911, + "step": 19450 + }, + { + "epoch": 1.79, + "learning_rate": 4.1054518709202906e-05, + "loss": 0.8533, + "step": 19460 + }, + { + "epoch": 1.79, + "learning_rate": 4.1049921853452236e-05, + "loss": 0.9433, + "step": 19470 + }, + { + "epoch": 1.79, + "learning_rate": 4.104532499770157e-05, + "loss": 0.9745, + "step": 19480 + }, + { + "epoch": 1.79, + "learning_rate": 4.104072814195091e-05, + "loss": 0.8671, + "step": 19490 + }, + { + "epoch": 1.79, + "learning_rate": 4.103613128620024e-05, + "loss": 0.9078, + "step": 19500 + }, + { + "epoch": 1.79, + "learning_rate": 4.1031534430449575e-05, + "loss": 0.8682, + "step": 19510 + }, + { + "epoch": 1.79, + "learning_rate": 4.102693757469891e-05, + "loss": 1.0162, + "step": 19520 + }, + { + "epoch": 1.8, + "learning_rate": 4.102234071894824e-05, + "loss": 0.8288, + "step": 19530 + }, + { + "epoch": 1.8, + "learning_rate": 4.101774386319758e-05, + "loss": 0.8193, + "step": 19540 + }, + { + "epoch": 1.8, + "learning_rate": 4.101314700744691e-05, + "loss": 0.7833, + "step": 19550 + }, + { + "epoch": 1.8, + "learning_rate": 4.100855015169624e-05, + "loss": 0.8653, + "step": 19560 + }, + { + "epoch": 1.8, + "learning_rate": 4.1003953295945574e-05, + "loss": 0.8341, + "step": 19570 + }, + { + "epoch": 1.8, + "learning_rate": 4.099935644019491e-05, + "loss": 0.8961, + "step": 19580 + }, + { + "epoch": 1.8, + "learning_rate": 4.099475958444424e-05, + "loss": 0.8924, + "step": 19590 + }, + { + "epoch": 1.8, + "learning_rate": 4.0990162728693576e-05, + "loss": 0.9692, + "step": 19600 + }, + { + "epoch": 1.8, + "learning_rate": 4.098556587294291e-05, + "loss": 0.8752, + "step": 19610 + }, + { + "epoch": 1.8, + "learning_rate": 4.098096901719224e-05, + "loss": 0.9402, + "step": 19620 + }, + { + "epoch": 1.8, + "learning_rate": 4.097637216144158e-05, + "loss": 1.0259, + "step": 19630 + }, + { + "epoch": 1.81, + "learning_rate": 4.097177530569091e-05, + "loss": 0.8459, + "step": 19640 + }, + { + "epoch": 1.81, + "learning_rate": 4.096717844994024e-05, + "loss": 0.9409, + "step": 19650 + }, + { + "epoch": 1.81, + "learning_rate": 4.0962581594189575e-05, + "loss": 0.9955, + "step": 19660 + }, + { + "epoch": 1.81, + "learning_rate": 4.095798473843891e-05, + "loss": 0.8967, + "step": 19670 + }, + { + "epoch": 1.81, + "learning_rate": 4.095338788268824e-05, + "loss": 0.9495, + "step": 19680 + }, + { + "epoch": 1.81, + "learning_rate": 4.094879102693758e-05, + "loss": 0.8795, + "step": 19690 + }, + { + "epoch": 1.81, + "learning_rate": 4.0944194171186914e-05, + "loss": 0.9885, + "step": 19700 + }, + { + "epoch": 1.81, + "learning_rate": 4.0939597315436244e-05, + "loss": 0.8808, + "step": 19710 + }, + { + "epoch": 1.81, + "learning_rate": 4.093500045968558e-05, + "loss": 0.9911, + "step": 19720 + }, + { + "epoch": 1.81, + "learning_rate": 4.093040360393491e-05, + "loss": 0.9332, + "step": 19730 + }, + { + "epoch": 1.81, + "learning_rate": 4.092580674818424e-05, + "loss": 0.8918, + "step": 19740 + }, + { + "epoch": 1.82, + "learning_rate": 4.0921209892433576e-05, + "loss": 1.0028, + "step": 19750 + }, + { + "epoch": 1.82, + "learning_rate": 4.091661303668291e-05, + "loss": 0.9068, + "step": 19760 + }, + { + "epoch": 1.82, + "learning_rate": 4.091201618093224e-05, + "loss": 0.9665, + "step": 19770 + }, + { + "epoch": 1.82, + "learning_rate": 4.090741932518158e-05, + "loss": 0.9099, + "step": 19780 + }, + { + "epoch": 1.82, + "learning_rate": 4.0902822469430915e-05, + "loss": 1.0169, + "step": 19790 + }, + { + "epoch": 1.82, + "learning_rate": 4.0898225613680245e-05, + "loss": 0.8978, + "step": 19800 + }, + { + "epoch": 1.82, + "learning_rate": 4.089362875792958e-05, + "loss": 0.8199, + "step": 19810 + }, + { + "epoch": 1.82, + "learning_rate": 4.088903190217891e-05, + "loss": 0.8222, + "step": 19820 + }, + { + "epoch": 1.82, + "learning_rate": 4.088443504642824e-05, + "loss": 0.7734, + "step": 19830 + }, + { + "epoch": 1.82, + "learning_rate": 4.087983819067758e-05, + "loss": 0.8924, + "step": 19840 + }, + { + "epoch": 1.82, + "learning_rate": 4.0875241334926914e-05, + "loss": 0.9393, + "step": 19850 + }, + { + "epoch": 1.83, + "learning_rate": 4.0870644479176244e-05, + "loss": 0.9308, + "step": 19860 + }, + { + "epoch": 1.83, + "learning_rate": 4.086604762342558e-05, + "loss": 0.8848, + "step": 19870 + }, + { + "epoch": 1.83, + "learning_rate": 4.0861450767674917e-05, + "loss": 0.9171, + "step": 19880 + }, + { + "epoch": 1.83, + "learning_rate": 4.0856853911924246e-05, + "loss": 0.838, + "step": 19890 + }, + { + "epoch": 1.83, + "learning_rate": 4.085225705617358e-05, + "loss": 0.907, + "step": 19900 + }, + { + "epoch": 1.83, + "learning_rate": 4.084766020042291e-05, + "loss": 0.8847, + "step": 19910 + }, + { + "epoch": 1.83, + "learning_rate": 4.084306334467224e-05, + "loss": 0.9986, + "step": 19920 + }, + { + "epoch": 1.83, + "learning_rate": 4.083846648892158e-05, + "loss": 0.8814, + "step": 19930 + }, + { + "epoch": 1.83, + "learning_rate": 4.0833869633170915e-05, + "loss": 0.7981, + "step": 19940 + }, + { + "epoch": 1.83, + "learning_rate": 4.0829272777420245e-05, + "loss": 0.9419, + "step": 19950 + }, + { + "epoch": 1.84, + "learning_rate": 4.082467592166958e-05, + "loss": 0.8213, + "step": 19960 + }, + { + "epoch": 1.84, + "learning_rate": 4.082007906591892e-05, + "loss": 0.8623, + "step": 19970 + }, + { + "epoch": 1.84, + "learning_rate": 4.081548221016825e-05, + "loss": 0.9108, + "step": 19980 + }, + { + "epoch": 1.84, + "learning_rate": 4.0810885354417584e-05, + "loss": 0.9245, + "step": 19990 + }, + { + "epoch": 1.84, + "learning_rate": 4.0806288498666914e-05, + "loss": 0.8881, + "step": 20000 + }, + { + "epoch": 1.84, + "eval_accuracy": 0.5718340611353712, + "eval_loss": 0.8948913812637329, + "eval_runtime": 159.4029, + "eval_samples_per_second": 28.732, + "eval_steps_per_second": 3.595, + "step": 20000 + }, + { + "epoch": 1.84, + "learning_rate": 4.080169164291624e-05, + "loss": 0.8465, + "step": 20010 + }, + { + "epoch": 1.84, + "learning_rate": 4.079709478716558e-05, + "loss": 0.9986, + "step": 20020 + }, + { + "epoch": 1.84, + "learning_rate": 4.0792497931414916e-05, + "loss": 0.8382, + "step": 20030 + }, + { + "epoch": 1.84, + "learning_rate": 4.0787901075664246e-05, + "loss": 0.9059, + "step": 20040 + }, + { + "epoch": 1.84, + "learning_rate": 4.078330421991358e-05, + "loss": 0.9915, + "step": 20050 + }, + { + "epoch": 1.84, + "learning_rate": 4.077870736416292e-05, + "loss": 0.8541, + "step": 20060 + }, + { + "epoch": 1.85, + "learning_rate": 4.077411050841225e-05, + "loss": 0.898, + "step": 20070 + }, + { + "epoch": 1.85, + "learning_rate": 4.0769513652661585e-05, + "loss": 0.9078, + "step": 20080 + }, + { + "epoch": 1.85, + "learning_rate": 4.0764916796910915e-05, + "loss": 0.8659, + "step": 20090 + }, + { + "epoch": 1.85, + "learning_rate": 4.0760319941160245e-05, + "loss": 0.8148, + "step": 20100 + }, + { + "epoch": 1.85, + "learning_rate": 4.075572308540958e-05, + "loss": 1.0013, + "step": 20110 + }, + { + "epoch": 1.85, + "learning_rate": 4.075112622965892e-05, + "loss": 0.8939, + "step": 20120 + }, + { + "epoch": 1.85, + "learning_rate": 4.074652937390825e-05, + "loss": 0.9523, + "step": 20130 + }, + { + "epoch": 1.85, + "learning_rate": 4.0741932518157584e-05, + "loss": 0.932, + "step": 20140 + }, + { + "epoch": 1.85, + "learning_rate": 4.073733566240692e-05, + "loss": 0.8874, + "step": 20150 + }, + { + "epoch": 1.85, + "learning_rate": 4.073273880665625e-05, + "loss": 1.0361, + "step": 20160 + }, + { + "epoch": 1.85, + "learning_rate": 4.0728141950905586e-05, + "loss": 0.9891, + "step": 20170 + }, + { + "epoch": 1.86, + "learning_rate": 4.0723545095154916e-05, + "loss": 0.8961, + "step": 20180 + }, + { + "epoch": 1.86, + "learning_rate": 4.0718948239404246e-05, + "loss": 0.866, + "step": 20190 + }, + { + "epoch": 1.86, + "learning_rate": 4.071435138365358e-05, + "loss": 0.8629, + "step": 20200 + }, + { + "epoch": 1.86, + "learning_rate": 4.070975452790292e-05, + "loss": 0.9099, + "step": 20210 + }, + { + "epoch": 1.86, + "learning_rate": 4.070515767215225e-05, + "loss": 0.8307, + "step": 20220 + }, + { + "epoch": 1.86, + "learning_rate": 4.0700560816401585e-05, + "loss": 0.8605, + "step": 20230 + }, + { + "epoch": 1.86, + "learning_rate": 4.069596396065092e-05, + "loss": 0.7921, + "step": 20240 + }, + { + "epoch": 1.86, + "learning_rate": 4.069136710490025e-05, + "loss": 0.9, + "step": 20250 + }, + { + "epoch": 1.86, + "learning_rate": 4.068677024914959e-05, + "loss": 0.9485, + "step": 20260 + }, + { + "epoch": 1.86, + "learning_rate": 4.068217339339892e-05, + "loss": 0.9716, + "step": 20270 + }, + { + "epoch": 1.86, + "learning_rate": 4.067757653764825e-05, + "loss": 0.8101, + "step": 20280 + }, + { + "epoch": 1.87, + "learning_rate": 4.067297968189758e-05, + "loss": 0.9246, + "step": 20290 + }, + { + "epoch": 1.87, + "learning_rate": 4.066838282614692e-05, + "loss": 0.8999, + "step": 20300 + }, + { + "epoch": 1.87, + "learning_rate": 4.066378597039625e-05, + "loss": 0.9992, + "step": 20310 + }, + { + "epoch": 1.87, + "learning_rate": 4.0659189114645586e-05, + "loss": 0.8228, + "step": 20320 + }, + { + "epoch": 1.87, + "learning_rate": 4.0654592258894916e-05, + "loss": 0.929, + "step": 20330 + }, + { + "epoch": 1.87, + "learning_rate": 4.064999540314425e-05, + "loss": 0.9695, + "step": 20340 + }, + { + "epoch": 1.87, + "learning_rate": 4.064539854739359e-05, + "loss": 0.8766, + "step": 20350 + }, + { + "epoch": 1.87, + "learning_rate": 4.064080169164292e-05, + "loss": 0.8782, + "step": 20360 + }, + { + "epoch": 1.87, + "learning_rate": 4.063620483589225e-05, + "loss": 0.8494, + "step": 20370 + }, + { + "epoch": 1.87, + "learning_rate": 4.0631607980141585e-05, + "loss": 0.8463, + "step": 20380 + }, + { + "epoch": 1.87, + "learning_rate": 4.062701112439092e-05, + "loss": 0.9161, + "step": 20390 + }, + { + "epoch": 1.88, + "learning_rate": 4.062241426864025e-05, + "loss": 0.8858, + "step": 20400 + }, + { + "epoch": 1.88, + "learning_rate": 4.061781741288959e-05, + "loss": 1.0159, + "step": 20410 + }, + { + "epoch": 1.88, + "learning_rate": 4.061322055713892e-05, + "loss": 0.8569, + "step": 20420 + }, + { + "epoch": 1.88, + "learning_rate": 4.0608623701388253e-05, + "loss": 0.9, + "step": 20430 + }, + { + "epoch": 1.88, + "learning_rate": 4.060402684563759e-05, + "loss": 0.8478, + "step": 20440 + }, + { + "epoch": 1.88, + "learning_rate": 4.059942998988692e-05, + "loss": 0.8729, + "step": 20450 + }, + { + "epoch": 1.88, + "learning_rate": 4.059483313413625e-05, + "loss": 0.9638, + "step": 20460 + }, + { + "epoch": 1.88, + "learning_rate": 4.0590236278385586e-05, + "loss": 0.84, + "step": 20470 + }, + { + "epoch": 1.88, + "learning_rate": 4.058563942263492e-05, + "loss": 0.8556, + "step": 20480 + }, + { + "epoch": 1.88, + "learning_rate": 4.058104256688425e-05, + "loss": 0.9391, + "step": 20490 + }, + { + "epoch": 1.88, + "learning_rate": 4.057644571113359e-05, + "loss": 0.9144, + "step": 20500 + }, + { + "epoch": 1.89, + "learning_rate": 4.057184885538292e-05, + "loss": 0.8776, + "step": 20510 + }, + { + "epoch": 1.89, + "learning_rate": 4.0567251999632255e-05, + "loss": 0.8684, + "step": 20520 + }, + { + "epoch": 1.89, + "learning_rate": 4.056265514388159e-05, + "loss": 0.9345, + "step": 20530 + }, + { + "epoch": 1.89, + "learning_rate": 4.055805828813092e-05, + "loss": 0.9432, + "step": 20540 + }, + { + "epoch": 1.89, + "learning_rate": 4.055346143238025e-05, + "loss": 1.0035, + "step": 20550 + }, + { + "epoch": 1.89, + "learning_rate": 4.054886457662959e-05, + "loss": 0.8536, + "step": 20560 + }, + { + "epoch": 1.89, + "learning_rate": 4.0544267720878923e-05, + "loss": 0.9423, + "step": 20570 + }, + { + "epoch": 1.89, + "learning_rate": 4.053967086512825e-05, + "loss": 0.8918, + "step": 20580 + }, + { + "epoch": 1.89, + "learning_rate": 4.053507400937759e-05, + "loss": 0.9524, + "step": 20590 + }, + { + "epoch": 1.89, + "learning_rate": 4.053047715362692e-05, + "loss": 0.8665, + "step": 20600 + }, + { + "epoch": 1.89, + "learning_rate": 4.0525880297876256e-05, + "loss": 0.9779, + "step": 20610 + }, + { + "epoch": 1.9, + "learning_rate": 4.052128344212559e-05, + "loss": 0.8143, + "step": 20620 + }, + { + "epoch": 1.9, + "learning_rate": 4.051668658637492e-05, + "loss": 0.8623, + "step": 20630 + }, + { + "epoch": 1.9, + "learning_rate": 4.051208973062425e-05, + "loss": 1.0424, + "step": 20640 + }, + { + "epoch": 1.9, + "learning_rate": 4.050749287487359e-05, + "loss": 0.9438, + "step": 20650 + }, + { + "epoch": 1.9, + "learning_rate": 4.050289601912292e-05, + "loss": 0.927, + "step": 20660 + }, + { + "epoch": 1.9, + "learning_rate": 4.0498299163372254e-05, + "loss": 0.9322, + "step": 20670 + }, + { + "epoch": 1.9, + "learning_rate": 4.049370230762159e-05, + "loss": 0.9185, + "step": 20680 + }, + { + "epoch": 1.9, + "learning_rate": 4.048910545187092e-05, + "loss": 0.9438, + "step": 20690 + }, + { + "epoch": 1.9, + "learning_rate": 4.048450859612026e-05, + "loss": 0.9043, + "step": 20700 + }, + { + "epoch": 1.9, + "learning_rate": 4.0479911740369593e-05, + "loss": 0.8019, + "step": 20710 + }, + { + "epoch": 1.9, + "learning_rate": 4.047531488461892e-05, + "loss": 1.0152, + "step": 20720 + }, + { + "epoch": 1.91, + "learning_rate": 4.047071802886825e-05, + "loss": 0.889, + "step": 20730 + }, + { + "epoch": 1.91, + "learning_rate": 4.046612117311759e-05, + "loss": 0.8733, + "step": 20740 + }, + { + "epoch": 1.91, + "learning_rate": 4.046152431736692e-05, + "loss": 0.9171, + "step": 20750 + }, + { + "epoch": 1.91, + "learning_rate": 4.0456927461616256e-05, + "loss": 0.8776, + "step": 20760 + }, + { + "epoch": 1.91, + "learning_rate": 4.045233060586559e-05, + "loss": 0.8803, + "step": 20770 + }, + { + "epoch": 1.91, + "learning_rate": 4.044773375011492e-05, + "loss": 0.838, + "step": 20780 + }, + { + "epoch": 1.91, + "learning_rate": 4.044313689436426e-05, + "loss": 0.9693, + "step": 20790 + }, + { + "epoch": 1.91, + "learning_rate": 4.0438540038613595e-05, + "loss": 0.8288, + "step": 20800 + }, + { + "epoch": 1.91, + "learning_rate": 4.0433943182862924e-05, + "loss": 0.889, + "step": 20810 + }, + { + "epoch": 1.91, + "learning_rate": 4.0429346327112254e-05, + "loss": 0.9696, + "step": 20820 + }, + { + "epoch": 1.92, + "learning_rate": 4.042474947136159e-05, + "loss": 0.9633, + "step": 20830 + }, + { + "epoch": 1.92, + "learning_rate": 4.042015261561092e-05, + "loss": 0.8446, + "step": 20840 + }, + { + "epoch": 1.92, + "learning_rate": 4.041555575986026e-05, + "loss": 0.902, + "step": 20850 + }, + { + "epoch": 1.92, + "learning_rate": 4.041095890410959e-05, + "loss": 0.8652, + "step": 20860 + }, + { + "epoch": 1.92, + "learning_rate": 4.040636204835892e-05, + "loss": 0.9225, + "step": 20870 + }, + { + "epoch": 1.92, + "learning_rate": 4.040176519260826e-05, + "loss": 0.8898, + "step": 20880 + }, + { + "epoch": 1.92, + "learning_rate": 4.0397168336857596e-05, + "loss": 0.9459, + "step": 20890 + }, + { + "epoch": 1.92, + "learning_rate": 4.0392571481106926e-05, + "loss": 0.834, + "step": 20900 + }, + { + "epoch": 1.92, + "learning_rate": 4.0387974625356255e-05, + "loss": 0.8881, + "step": 20910 + }, + { + "epoch": 1.92, + "learning_rate": 4.038337776960559e-05, + "loss": 0.9006, + "step": 20920 + }, + { + "epoch": 1.92, + "learning_rate": 4.037878091385492e-05, + "loss": 0.7855, + "step": 20930 + }, + { + "epoch": 1.93, + "learning_rate": 4.037418405810426e-05, + "loss": 0.8567, + "step": 20940 + }, + { + "epoch": 1.93, + "learning_rate": 4.0369587202353594e-05, + "loss": 0.9781, + "step": 20950 + }, + { + "epoch": 1.93, + "learning_rate": 4.0364990346602924e-05, + "loss": 0.8617, + "step": 20960 + }, + { + "epoch": 1.93, + "learning_rate": 4.036039349085226e-05, + "loss": 0.8768, + "step": 20970 + }, + { + "epoch": 1.93, + "learning_rate": 4.03557966351016e-05, + "loss": 0.9551, + "step": 20980 + }, + { + "epoch": 1.93, + "learning_rate": 4.035119977935092e-05, + "loss": 0.9332, + "step": 20990 + }, + { + "epoch": 1.93, + "learning_rate": 4.0346602923600256e-05, + "loss": 0.882, + "step": 21000 + }, + { + "epoch": 1.93, + "learning_rate": 4.034200606784959e-05, + "loss": 0.8489, + "step": 21010 + }, + { + "epoch": 1.93, + "learning_rate": 4.033740921209892e-05, + "loss": 0.8556, + "step": 21020 + }, + { + "epoch": 1.93, + "learning_rate": 4.033281235634826e-05, + "loss": 1.012, + "step": 21030 + }, + { + "epoch": 1.93, + "learning_rate": 4.0328215500597596e-05, + "loss": 0.8541, + "step": 21040 + }, + { + "epoch": 1.94, + "learning_rate": 4.0323618644846925e-05, + "loss": 0.9325, + "step": 21050 + }, + { + "epoch": 1.94, + "learning_rate": 4.031902178909626e-05, + "loss": 0.881, + "step": 21060 + }, + { + "epoch": 1.94, + "learning_rate": 4.03144249333456e-05, + "loss": 0.9542, + "step": 21070 + }, + { + "epoch": 1.94, + "learning_rate": 4.030982807759492e-05, + "loss": 0.8439, + "step": 21080 + }, + { + "epoch": 1.94, + "learning_rate": 4.030523122184426e-05, + "loss": 0.9469, + "step": 21090 + }, + { + "epoch": 1.94, + "learning_rate": 4.0300634366093594e-05, + "loss": 0.8524, + "step": 21100 + }, + { + "epoch": 1.94, + "learning_rate": 4.0296037510342924e-05, + "loss": 0.8643, + "step": 21110 + }, + { + "epoch": 1.94, + "learning_rate": 4.029144065459226e-05, + "loss": 0.9643, + "step": 21120 + }, + { + "epoch": 1.94, + "learning_rate": 4.02868437988416e-05, + "loss": 0.9, + "step": 21130 + }, + { + "epoch": 1.94, + "learning_rate": 4.0282246943090926e-05, + "loss": 0.9066, + "step": 21140 + }, + { + "epoch": 1.94, + "learning_rate": 4.027765008734026e-05, + "loss": 0.8098, + "step": 21150 + }, + { + "epoch": 1.95, + "learning_rate": 4.02730532315896e-05, + "loss": 0.8649, + "step": 21160 + }, + { + "epoch": 1.95, + "learning_rate": 4.026845637583892e-05, + "loss": 0.8806, + "step": 21170 + }, + { + "epoch": 1.95, + "learning_rate": 4.026385952008826e-05, + "loss": 0.96, + "step": 21180 + }, + { + "epoch": 1.95, + "learning_rate": 4.0259262664337595e-05, + "loss": 0.8574, + "step": 21190 + }, + { + "epoch": 1.95, + "learning_rate": 4.0254665808586925e-05, + "loss": 0.872, + "step": 21200 + }, + { + "epoch": 1.95, + "learning_rate": 4.025006895283626e-05, + "loss": 0.8602, + "step": 21210 + }, + { + "epoch": 1.95, + "learning_rate": 4.02454720970856e-05, + "loss": 0.7153, + "step": 21220 + }, + { + "epoch": 1.95, + "learning_rate": 4.024087524133493e-05, + "loss": 0.7712, + "step": 21230 + }, + { + "epoch": 1.95, + "learning_rate": 4.0236278385584264e-05, + "loss": 0.8701, + "step": 21240 + }, + { + "epoch": 1.95, + "learning_rate": 4.02316815298336e-05, + "loss": 0.8243, + "step": 21250 + }, + { + "epoch": 1.95, + "learning_rate": 4.0227084674082924e-05, + "loss": 0.7357, + "step": 21260 + }, + { + "epoch": 1.96, + "learning_rate": 4.022248781833226e-05, + "loss": 0.9544, + "step": 21270 + }, + { + "epoch": 1.96, + "learning_rate": 4.0217890962581597e-05, + "loss": 0.7579, + "step": 21280 + }, + { + "epoch": 1.96, + "learning_rate": 4.0213294106830926e-05, + "loss": 0.9087, + "step": 21290 + }, + { + "epoch": 1.96, + "learning_rate": 4.020869725108026e-05, + "loss": 0.8638, + "step": 21300 + }, + { + "epoch": 1.96, + "learning_rate": 4.02041003953296e-05, + "loss": 1.0027, + "step": 21310 + }, + { + "epoch": 1.96, + "learning_rate": 4.019950353957893e-05, + "loss": 0.9756, + "step": 21320 + }, + { + "epoch": 1.96, + "learning_rate": 4.0194906683828265e-05, + "loss": 0.8906, + "step": 21330 + }, + { + "epoch": 1.96, + "learning_rate": 4.01903098280776e-05, + "loss": 0.9661, + "step": 21340 + }, + { + "epoch": 1.96, + "learning_rate": 4.0185712972326925e-05, + "loss": 0.9704, + "step": 21350 + }, + { + "epoch": 1.96, + "learning_rate": 4.018111611657626e-05, + "loss": 0.829, + "step": 21360 + }, + { + "epoch": 1.96, + "learning_rate": 4.01765192608256e-05, + "loss": 0.8957, + "step": 21370 + }, + { + "epoch": 1.97, + "learning_rate": 4.017192240507493e-05, + "loss": 0.9386, + "step": 21380 + }, + { + "epoch": 1.97, + "learning_rate": 4.0167325549324264e-05, + "loss": 0.8152, + "step": 21390 + }, + { + "epoch": 1.97, + "learning_rate": 4.01627286935736e-05, + "loss": 0.8726, + "step": 21400 + }, + { + "epoch": 1.97, + "learning_rate": 4.015813183782293e-05, + "loss": 0.8153, + "step": 21410 + }, + { + "epoch": 1.97, + "learning_rate": 4.0153534982072267e-05, + "loss": 0.9022, + "step": 21420 + }, + { + "epoch": 1.97, + "learning_rate": 4.01489381263216e-05, + "loss": 0.822, + "step": 21430 + }, + { + "epoch": 1.97, + "learning_rate": 4.0144341270570926e-05, + "loss": 0.8858, + "step": 21440 + }, + { + "epoch": 1.97, + "learning_rate": 4.013974441482026e-05, + "loss": 0.8817, + "step": 21450 + }, + { + "epoch": 1.97, + "learning_rate": 4.01351475590696e-05, + "loss": 0.8454, + "step": 21460 + }, + { + "epoch": 1.97, + "learning_rate": 4.013055070331893e-05, + "loss": 0.8995, + "step": 21470 + }, + { + "epoch": 1.97, + "learning_rate": 4.0125953847568265e-05, + "loss": 0.903, + "step": 21480 + }, + { + "epoch": 1.98, + "learning_rate": 4.01213569918176e-05, + "loss": 0.7868, + "step": 21490 + }, + { + "epoch": 1.98, + "learning_rate": 4.011676013606693e-05, + "loss": 0.9856, + "step": 21500 + }, + { + "epoch": 1.98, + "learning_rate": 4.011216328031627e-05, + "loss": 0.9738, + "step": 21510 + }, + { + "epoch": 1.98, + "learning_rate": 4.0107566424565604e-05, + "loss": 0.8339, + "step": 21520 + }, + { + "epoch": 1.98, + "learning_rate": 4.010296956881493e-05, + "loss": 0.9027, + "step": 21530 + }, + { + "epoch": 1.98, + "learning_rate": 4.0098372713064264e-05, + "loss": 0.878, + "step": 21540 + }, + { + "epoch": 1.98, + "learning_rate": 4.00937758573136e-05, + "loss": 1.06, + "step": 21550 + }, + { + "epoch": 1.98, + "learning_rate": 4.008917900156293e-05, + "loss": 0.9259, + "step": 21560 + }, + { + "epoch": 1.98, + "learning_rate": 4.0084582145812266e-05, + "loss": 0.8766, + "step": 21570 + }, + { + "epoch": 1.98, + "learning_rate": 4.00799852900616e-05, + "loss": 0.7959, + "step": 21580 + }, + { + "epoch": 1.98, + "learning_rate": 4.007538843431093e-05, + "loss": 0.9636, + "step": 21590 + }, + { + "epoch": 1.99, + "learning_rate": 4.007079157856027e-05, + "loss": 0.9235, + "step": 21600 + }, + { + "epoch": 1.99, + "learning_rate": 4.0066194722809605e-05, + "loss": 0.8531, + "step": 21610 + }, + { + "epoch": 1.99, + "learning_rate": 4.006159786705893e-05, + "loss": 0.9479, + "step": 21620 + }, + { + "epoch": 1.99, + "learning_rate": 4.0057001011308265e-05, + "loss": 0.9314, + "step": 21630 + }, + { + "epoch": 1.99, + "learning_rate": 4.00524041555576e-05, + "loss": 0.8426, + "step": 21640 + }, + { + "epoch": 1.99, + "learning_rate": 4.004780729980693e-05, + "loss": 0.9161, + "step": 21650 + }, + { + "epoch": 1.99, + "learning_rate": 4.004321044405627e-05, + "loss": 0.9649, + "step": 21660 + }, + { + "epoch": 1.99, + "learning_rate": 4.0038613588305604e-05, + "loss": 0.8403, + "step": 21670 + }, + { + "epoch": 1.99, + "learning_rate": 4.0034016732554934e-05, + "loss": 0.9374, + "step": 21680 + }, + { + "epoch": 1.99, + "learning_rate": 4.002941987680427e-05, + "loss": 0.8815, + "step": 21690 + }, + { + "epoch": 2.0, + "learning_rate": 4.002482302105361e-05, + "loss": 0.9884, + "step": 21700 + }, + { + "epoch": 2.0, + "learning_rate": 4.002022616530293e-05, + "loss": 0.9265, + "step": 21710 + }, + { + "epoch": 2.0, + "learning_rate": 4.0015629309552266e-05, + "loss": 0.9584, + "step": 21720 + }, + { + "epoch": 2.0, + "learning_rate": 4.00110324538016e-05, + "loss": 1.02, + "step": 21730 + }, + { + "epoch": 2.0, + "learning_rate": 4.000643559805093e-05, + "loss": 0.8321, + "step": 21740 + }, + { + "epoch": 2.0, + "learning_rate": 4.000183874230027e-05, + "loss": 0.9104, + "step": 21750 + }, + { + "epoch": 2.0, + "learning_rate": 3.9997241886549605e-05, + "loss": 0.8733, + "step": 21760 + }, + { + "epoch": 2.0, + "learning_rate": 3.9992645030798935e-05, + "loss": 0.9693, + "step": 21770 + }, + { + "epoch": 2.0, + "learning_rate": 3.998804817504827e-05, + "loss": 0.8226, + "step": 21780 + }, + { + "epoch": 2.0, + "learning_rate": 3.998345131929761e-05, + "loss": 0.9296, + "step": 21790 + }, + { + "epoch": 2.0, + "learning_rate": 3.997885446354693e-05, + "loss": 0.9526, + "step": 21800 + }, + { + "epoch": 2.01, + "learning_rate": 3.997425760779627e-05, + "loss": 0.9696, + "step": 21810 + }, + { + "epoch": 2.01, + "learning_rate": 3.9969660752045604e-05, + "loss": 0.8888, + "step": 21820 + }, + { + "epoch": 2.01, + "learning_rate": 3.9965063896294933e-05, + "loss": 0.9406, + "step": 21830 + }, + { + "epoch": 2.01, + "learning_rate": 3.996046704054427e-05, + "loss": 0.9667, + "step": 21840 + }, + { + "epoch": 2.01, + "learning_rate": 3.9955870184793606e-05, + "loss": 0.9684, + "step": 21850 + }, + { + "epoch": 2.01, + "learning_rate": 3.9951273329042936e-05, + "loss": 0.9378, + "step": 21860 + }, + { + "epoch": 2.01, + "learning_rate": 3.994667647329227e-05, + "loss": 0.921, + "step": 21870 + }, + { + "epoch": 2.01, + "learning_rate": 3.994207961754161e-05, + "loss": 0.9858, + "step": 21880 + }, + { + "epoch": 2.01, + "learning_rate": 3.993748276179093e-05, + "loss": 0.8436, + "step": 21890 + }, + { + "epoch": 2.01, + "learning_rate": 3.993288590604027e-05, + "loss": 0.911, + "step": 21900 + }, + { + "epoch": 2.01, + "learning_rate": 3.9928289050289605e-05, + "loss": 0.8914, + "step": 21910 + }, + { + "epoch": 2.02, + "learning_rate": 3.9923692194538935e-05, + "loss": 0.9415, + "step": 21920 + }, + { + "epoch": 2.02, + "learning_rate": 3.991909533878827e-05, + "loss": 0.9371, + "step": 21930 + }, + { + "epoch": 2.02, + "learning_rate": 3.991449848303761e-05, + "loss": 0.8885, + "step": 21940 + }, + { + "epoch": 2.02, + "learning_rate": 3.990990162728694e-05, + "loss": 0.87, + "step": 21950 + }, + { + "epoch": 2.02, + "learning_rate": 3.9905304771536274e-05, + "loss": 0.9054, + "step": 21960 + }, + { + "epoch": 2.02, + "learning_rate": 3.9900707915785603e-05, + "loss": 0.9088, + "step": 21970 + }, + { + "epoch": 2.02, + "learning_rate": 3.989611106003493e-05, + "loss": 0.8822, + "step": 21980 + }, + { + "epoch": 2.02, + "learning_rate": 3.989151420428427e-05, + "loss": 0.8588, + "step": 21990 + }, + { + "epoch": 2.02, + "learning_rate": 3.9886917348533606e-05, + "loss": 0.9012, + "step": 22000 + }, + { + "epoch": 2.02, + "learning_rate": 3.9882320492782936e-05, + "loss": 0.885, + "step": 22010 + }, + { + "epoch": 2.02, + "learning_rate": 3.987772363703227e-05, + "loss": 0.7474, + "step": 22020 + }, + { + "epoch": 2.03, + "learning_rate": 3.987312678128161e-05, + "loss": 0.9507, + "step": 22030 + }, + { + "epoch": 2.03, + "learning_rate": 3.986852992553094e-05, + "loss": 0.9347, + "step": 22040 + }, + { + "epoch": 2.03, + "learning_rate": 3.9863933069780275e-05, + "loss": 0.8955, + "step": 22050 + }, + { + "epoch": 2.03, + "learning_rate": 3.9859336214029605e-05, + "loss": 0.9146, + "step": 22060 + }, + { + "epoch": 2.03, + "learning_rate": 3.9854739358278934e-05, + "loss": 0.8159, + "step": 22070 + }, + { + "epoch": 2.03, + "learning_rate": 3.985014250252827e-05, + "loss": 0.8328, + "step": 22080 + }, + { + "epoch": 2.03, + "learning_rate": 3.984554564677761e-05, + "loss": 0.9327, + "step": 22090 + }, + { + "epoch": 2.03, + "learning_rate": 3.984094879102694e-05, + "loss": 1.0613, + "step": 22100 + }, + { + "epoch": 2.03, + "learning_rate": 3.9836351935276273e-05, + "loss": 0.8588, + "step": 22110 + }, + { + "epoch": 2.03, + "learning_rate": 3.983175507952561e-05, + "loss": 0.8369, + "step": 22120 + }, + { + "epoch": 2.03, + "learning_rate": 3.982715822377494e-05, + "loss": 0.8908, + "step": 22130 + }, + { + "epoch": 2.04, + "learning_rate": 3.9822561368024276e-05, + "loss": 0.9971, + "step": 22140 + }, + { + "epoch": 2.04, + "learning_rate": 3.9817964512273606e-05, + "loss": 1.0831, + "step": 22150 + }, + { + "epoch": 2.04, + "learning_rate": 3.981336765652294e-05, + "loss": 0.9216, + "step": 22160 + }, + { + "epoch": 2.04, + "learning_rate": 3.980877080077227e-05, + "loss": 0.8909, + "step": 22170 + }, + { + "epoch": 2.04, + "learning_rate": 3.980417394502161e-05, + "loss": 0.8622, + "step": 22180 + }, + { + "epoch": 2.04, + "learning_rate": 3.979957708927094e-05, + "loss": 0.8127, + "step": 22190 + }, + { + "epoch": 2.04, + "learning_rate": 3.9794980233520275e-05, + "loss": 0.8663, + "step": 22200 + }, + { + "epoch": 2.04, + "learning_rate": 3.979038337776961e-05, + "loss": 0.955, + "step": 22210 + }, + { + "epoch": 2.04, + "learning_rate": 3.978578652201894e-05, + "loss": 0.9652, + "step": 22220 + }, + { + "epoch": 2.04, + "learning_rate": 3.978118966626828e-05, + "loss": 0.866, + "step": 22230 + }, + { + "epoch": 2.04, + "learning_rate": 3.977659281051761e-05, + "loss": 0.9743, + "step": 22240 + }, + { + "epoch": 2.05, + "learning_rate": 3.9771995954766943e-05, + "loss": 0.9128, + "step": 22250 + }, + { + "epoch": 2.05, + "learning_rate": 3.976739909901627e-05, + "loss": 0.769, + "step": 22260 + }, + { + "epoch": 2.05, + "learning_rate": 3.976280224326561e-05, + "loss": 0.8548, + "step": 22270 + }, + { + "epoch": 2.05, + "learning_rate": 3.975820538751494e-05, + "loss": 0.831, + "step": 22280 + }, + { + "epoch": 2.05, + "learning_rate": 3.9753608531764276e-05, + "loss": 0.9473, + "step": 22290 + }, + { + "epoch": 2.05, + "learning_rate": 3.9749011676013606e-05, + "loss": 0.8991, + "step": 22300 + }, + { + "epoch": 2.05, + "learning_rate": 3.974441482026294e-05, + "loss": 0.9814, + "step": 22310 + }, + { + "epoch": 2.05, + "learning_rate": 3.973981796451228e-05, + "loss": 0.9239, + "step": 22320 + }, + { + "epoch": 2.05, + "learning_rate": 3.973522110876161e-05, + "loss": 0.9418, + "step": 22330 + }, + { + "epoch": 2.05, + "learning_rate": 3.9730624253010945e-05, + "loss": 0.9405, + "step": 22340 + }, + { + "epoch": 2.05, + "learning_rate": 3.9726027397260274e-05, + "loss": 0.8766, + "step": 22350 + }, + { + "epoch": 2.06, + "learning_rate": 3.972143054150961e-05, + "loss": 0.9466, + "step": 22360 + }, + { + "epoch": 2.06, + "learning_rate": 3.971683368575894e-05, + "loss": 0.7956, + "step": 22370 + }, + { + "epoch": 2.06, + "learning_rate": 3.971223683000828e-05, + "loss": 0.8672, + "step": 22380 + }, + { + "epoch": 2.06, + "learning_rate": 3.970763997425761e-05, + "loss": 0.837, + "step": 22390 + }, + { + "epoch": 2.06, + "learning_rate": 3.970304311850694e-05, + "loss": 0.9252, + "step": 22400 + }, + { + "epoch": 2.06, + "learning_rate": 3.969844626275628e-05, + "loss": 0.8791, + "step": 22410 + }, + { + "epoch": 2.06, + "learning_rate": 3.969384940700561e-05, + "loss": 0.886, + "step": 22420 + }, + { + "epoch": 2.06, + "learning_rate": 3.9689252551254946e-05, + "loss": 0.8833, + "step": 22430 + }, + { + "epoch": 2.06, + "learning_rate": 3.9684655695504276e-05, + "loss": 0.8767, + "step": 22440 + }, + { + "epoch": 2.06, + "learning_rate": 3.968005883975361e-05, + "loss": 1.0463, + "step": 22450 + }, + { + "epoch": 2.06, + "learning_rate": 3.967546198400294e-05, + "loss": 0.8214, + "step": 22460 + }, + { + "epoch": 2.07, + "learning_rate": 3.967086512825228e-05, + "loss": 0.9483, + "step": 22470 + }, + { + "epoch": 2.07, + "learning_rate": 3.966626827250161e-05, + "loss": 0.8843, + "step": 22480 + }, + { + "epoch": 2.07, + "learning_rate": 3.9661671416750944e-05, + "loss": 0.9276, + "step": 22490 + }, + { + "epoch": 2.07, + "learning_rate": 3.965707456100028e-05, + "loss": 0.8846, + "step": 22500 + }, + { + "epoch": 2.07, + "learning_rate": 3.965247770524961e-05, + "loss": 0.9452, + "step": 22510 + }, + { + "epoch": 2.07, + "learning_rate": 3.964788084949895e-05, + "loss": 0.862, + "step": 22520 + }, + { + "epoch": 2.07, + "learning_rate": 3.964328399374828e-05, + "loss": 0.7622, + "step": 22530 + }, + { + "epoch": 2.07, + "learning_rate": 3.963868713799761e-05, + "loss": 0.9097, + "step": 22540 + }, + { + "epoch": 2.07, + "learning_rate": 3.963409028224694e-05, + "loss": 1.0355, + "step": 22550 + }, + { + "epoch": 2.07, + "learning_rate": 3.962949342649628e-05, + "loss": 0.8786, + "step": 22560 + }, + { + "epoch": 2.08, + "learning_rate": 3.962489657074561e-05, + "loss": 0.8725, + "step": 22570 + }, + { + "epoch": 2.08, + "learning_rate": 3.9620299714994946e-05, + "loss": 1.0371, + "step": 22580 + }, + { + "epoch": 2.08, + "learning_rate": 3.961570285924428e-05, + "loss": 0.8719, + "step": 22590 + }, + { + "epoch": 2.08, + "learning_rate": 3.961110600349361e-05, + "loss": 0.9711, + "step": 22600 + }, + { + "epoch": 2.08, + "learning_rate": 3.960650914774295e-05, + "loss": 0.7916, + "step": 22610 + }, + { + "epoch": 2.08, + "learning_rate": 3.960191229199228e-05, + "loss": 0.912, + "step": 22620 + }, + { + "epoch": 2.08, + "learning_rate": 3.959731543624161e-05, + "loss": 0.8577, + "step": 22630 + }, + { + "epoch": 2.08, + "learning_rate": 3.9592718580490944e-05, + "loss": 0.9352, + "step": 22640 + }, + { + "epoch": 2.08, + "learning_rate": 3.958812172474028e-05, + "loss": 0.8243, + "step": 22650 + }, + { + "epoch": 2.08, + "learning_rate": 3.958352486898961e-05, + "loss": 0.9307, + "step": 22660 + }, + { + "epoch": 2.08, + "learning_rate": 3.957892801323895e-05, + "loss": 0.8343, + "step": 22670 + }, + { + "epoch": 2.09, + "learning_rate": 3.957433115748828e-05, + "loss": 0.8719, + "step": 22680 + }, + { + "epoch": 2.09, + "learning_rate": 3.956973430173761e-05, + "loss": 0.9309, + "step": 22690 + }, + { + "epoch": 2.09, + "learning_rate": 3.956513744598695e-05, + "loss": 0.879, + "step": 22700 + }, + { + "epoch": 2.09, + "learning_rate": 3.956054059023628e-05, + "loss": 0.9255, + "step": 22710 + }, + { + "epoch": 2.09, + "learning_rate": 3.955594373448561e-05, + "loss": 0.8922, + "step": 22720 + }, + { + "epoch": 2.09, + "learning_rate": 3.9551346878734945e-05, + "loss": 0.8244, + "step": 22730 + }, + { + "epoch": 2.09, + "learning_rate": 3.954675002298428e-05, + "loss": 0.969, + "step": 22740 + }, + { + "epoch": 2.09, + "learning_rate": 3.954215316723361e-05, + "loss": 0.8789, + "step": 22750 + }, + { + "epoch": 2.09, + "learning_rate": 3.953755631148295e-05, + "loss": 0.9168, + "step": 22760 + }, + { + "epoch": 2.09, + "learning_rate": 3.9532959455732284e-05, + "loss": 0.8747, + "step": 22770 + }, + { + "epoch": 2.09, + "learning_rate": 3.9528362599981614e-05, + "loss": 0.8951, + "step": 22780 + }, + { + "epoch": 2.1, + "learning_rate": 3.952376574423095e-05, + "loss": 0.8626, + "step": 22790 + }, + { + "epoch": 2.1, + "learning_rate": 3.951916888848028e-05, + "loss": 0.9785, + "step": 22800 + }, + { + "epoch": 2.1, + "learning_rate": 3.951457203272961e-05, + "loss": 0.8761, + "step": 22810 + }, + { + "epoch": 2.1, + "learning_rate": 3.9509975176978947e-05, + "loss": 0.9182, + "step": 22820 + }, + { + "epoch": 2.1, + "learning_rate": 3.950537832122828e-05, + "loss": 0.9115, + "step": 22830 + }, + { + "epoch": 2.1, + "learning_rate": 3.950078146547761e-05, + "loss": 0.8233, + "step": 22840 + }, + { + "epoch": 2.1, + "learning_rate": 3.949618460972695e-05, + "loss": 0.898, + "step": 22850 + }, + { + "epoch": 2.1, + "learning_rate": 3.9491587753976286e-05, + "loss": 0.8902, + "step": 22860 + }, + { + "epoch": 2.1, + "learning_rate": 3.9486990898225615e-05, + "loss": 0.9901, + "step": 22870 + }, + { + "epoch": 2.1, + "learning_rate": 3.948239404247495e-05, + "loss": 0.8921, + "step": 22880 + }, + { + "epoch": 2.1, + "learning_rate": 3.947779718672428e-05, + "loss": 0.9265, + "step": 22890 + }, + { + "epoch": 2.11, + "learning_rate": 3.947320033097361e-05, + "loss": 0.9178, + "step": 22900 + }, + { + "epoch": 2.11, + "learning_rate": 3.946860347522295e-05, + "loss": 0.87, + "step": 22910 + }, + { + "epoch": 2.11, + "learning_rate": 3.9464006619472284e-05, + "loss": 0.8272, + "step": 22920 + }, + { + "epoch": 2.11, + "learning_rate": 3.9459409763721614e-05, + "loss": 0.9655, + "step": 22930 + }, + { + "epoch": 2.11, + "learning_rate": 3.945481290797095e-05, + "loss": 0.8808, + "step": 22940 + }, + { + "epoch": 2.11, + "learning_rate": 3.945021605222029e-05, + "loss": 0.9131, + "step": 22950 + }, + { + "epoch": 2.11, + "learning_rate": 3.9445619196469617e-05, + "loss": 0.8067, + "step": 22960 + }, + { + "epoch": 2.11, + "learning_rate": 3.944102234071895e-05, + "loss": 0.9955, + "step": 22970 + }, + { + "epoch": 2.11, + "learning_rate": 3.943642548496828e-05, + "loss": 0.89, + "step": 22980 + }, + { + "epoch": 2.11, + "learning_rate": 3.943182862921761e-05, + "loss": 0.87, + "step": 22990 + }, + { + "epoch": 2.11, + "learning_rate": 3.942723177346695e-05, + "loss": 0.8704, + "step": 23000 + }, + { + "epoch": 2.12, + "learning_rate": 3.9422634917716285e-05, + "loss": 0.9078, + "step": 23010 + }, + { + "epoch": 2.12, + "learning_rate": 3.9418038061965615e-05, + "loss": 0.8811, + "step": 23020 + }, + { + "epoch": 2.12, + "learning_rate": 3.941344120621495e-05, + "loss": 1.02, + "step": 23030 + }, + { + "epoch": 2.12, + "learning_rate": 3.940884435046429e-05, + "loss": 0.857, + "step": 23040 + }, + { + "epoch": 2.12, + "learning_rate": 3.940424749471362e-05, + "loss": 0.9148, + "step": 23050 + }, + { + "epoch": 2.12, + "learning_rate": 3.9399650638962954e-05, + "loss": 0.9369, + "step": 23060 + }, + { + "epoch": 2.12, + "learning_rate": 3.9395053783212284e-05, + "loss": 0.8441, + "step": 23070 + }, + { + "epoch": 2.12, + "learning_rate": 3.9390456927461614e-05, + "loss": 0.9388, + "step": 23080 + }, + { + "epoch": 2.12, + "learning_rate": 3.938586007171095e-05, + "loss": 0.9744, + "step": 23090 + }, + { + "epoch": 2.12, + "learning_rate": 3.9381263215960287e-05, + "loss": 0.9177, + "step": 23100 + }, + { + "epoch": 2.12, + "learning_rate": 3.9376666360209616e-05, + "loss": 0.8752, + "step": 23110 + }, + { + "epoch": 2.13, + "learning_rate": 3.937206950445895e-05, + "loss": 0.9393, + "step": 23120 + }, + { + "epoch": 2.13, + "learning_rate": 3.936747264870829e-05, + "loss": 0.9741, + "step": 23130 + }, + { + "epoch": 2.13, + "learning_rate": 3.936287579295762e-05, + "loss": 0.9203, + "step": 23140 + }, + { + "epoch": 2.13, + "learning_rate": 3.9358278937206955e-05, + "loss": 1.1124, + "step": 23150 + }, + { + "epoch": 2.13, + "learning_rate": 3.9353682081456285e-05, + "loss": 0.9222, + "step": 23160 + }, + { + "epoch": 2.13, + "learning_rate": 3.9349085225705615e-05, + "loss": 0.8574, + "step": 23170 + }, + { + "epoch": 2.13, + "learning_rate": 3.934448836995495e-05, + "loss": 0.9508, + "step": 23180 + }, + { + "epoch": 2.13, + "learning_rate": 3.933989151420429e-05, + "loss": 0.8828, + "step": 23190 + }, + { + "epoch": 2.13, + "learning_rate": 3.933529465845362e-05, + "loss": 0.9973, + "step": 23200 + }, + { + "epoch": 2.13, + "learning_rate": 3.9330697802702954e-05, + "loss": 0.8398, + "step": 23210 + }, + { + "epoch": 2.13, + "learning_rate": 3.932610094695229e-05, + "loss": 0.9062, + "step": 23220 + }, + { + "epoch": 2.14, + "learning_rate": 3.932150409120162e-05, + "loss": 0.8724, + "step": 23230 + }, + { + "epoch": 2.14, + "learning_rate": 3.931690723545096e-05, + "loss": 0.9364, + "step": 23240 + }, + { + "epoch": 2.14, + "learning_rate": 3.9312310379700286e-05, + "loss": 0.8984, + "step": 23250 + }, + { + "epoch": 2.14, + "learning_rate": 3.9307713523949616e-05, + "loss": 0.8438, + "step": 23260 + }, + { + "epoch": 2.14, + "learning_rate": 3.930311666819895e-05, + "loss": 0.8541, + "step": 23270 + }, + { + "epoch": 2.14, + "learning_rate": 3.929851981244829e-05, + "loss": 0.944, + "step": 23280 + }, + { + "epoch": 2.14, + "learning_rate": 3.929392295669762e-05, + "loss": 0.8434, + "step": 23290 + }, + { + "epoch": 2.14, + "learning_rate": 3.9289326100946955e-05, + "loss": 0.8106, + "step": 23300 + }, + { + "epoch": 2.14, + "learning_rate": 3.928472924519629e-05, + "loss": 1.0658, + "step": 23310 + }, + { + "epoch": 2.14, + "learning_rate": 3.928013238944562e-05, + "loss": 0.8612, + "step": 23320 + }, + { + "epoch": 2.14, + "learning_rate": 3.927553553369496e-05, + "loss": 0.9346, + "step": 23330 + }, + { + "epoch": 2.15, + "learning_rate": 3.927093867794429e-05, + "loss": 0.8088, + "step": 23340 + }, + { + "epoch": 2.15, + "learning_rate": 3.926634182219362e-05, + "loss": 0.929, + "step": 23350 + }, + { + "epoch": 2.15, + "learning_rate": 3.9261744966442954e-05, + "loss": 1.0178, + "step": 23360 + }, + { + "epoch": 2.15, + "learning_rate": 3.925714811069229e-05, + "loss": 0.7947, + "step": 23370 + }, + { + "epoch": 2.15, + "learning_rate": 3.925255125494162e-05, + "loss": 0.8329, + "step": 23380 + }, + { + "epoch": 2.15, + "learning_rate": 3.9247954399190956e-05, + "loss": 0.8393, + "step": 23390 + }, + { + "epoch": 2.15, + "learning_rate": 3.924335754344029e-05, + "loss": 0.929, + "step": 23400 + }, + { + "epoch": 2.15, + "learning_rate": 3.923876068768962e-05, + "loss": 0.9178, + "step": 23410 + }, + { + "epoch": 2.15, + "learning_rate": 3.923416383193896e-05, + "loss": 0.7708, + "step": 23420 + }, + { + "epoch": 2.15, + "learning_rate": 3.922956697618829e-05, + "loss": 0.8436, + "step": 23430 + }, + { + "epoch": 2.16, + "learning_rate": 3.922497012043762e-05, + "loss": 1.0939, + "step": 23440 + }, + { + "epoch": 2.16, + "learning_rate": 3.9220373264686955e-05, + "loss": 0.9942, + "step": 23450 + }, + { + "epoch": 2.16, + "learning_rate": 3.921577640893629e-05, + "loss": 0.8565, + "step": 23460 + }, + { + "epoch": 2.16, + "learning_rate": 3.921117955318562e-05, + "loss": 0.794, + "step": 23470 + }, + { + "epoch": 2.16, + "learning_rate": 3.920658269743496e-05, + "loss": 0.8765, + "step": 23480 + }, + { + "epoch": 2.16, + "learning_rate": 3.9201985841684294e-05, + "loss": 0.8813, + "step": 23490 + }, + { + "epoch": 2.16, + "learning_rate": 3.9197388985933624e-05, + "loss": 0.8736, + "step": 23500 + }, + { + "epoch": 2.16, + "learning_rate": 3.919279213018296e-05, + "loss": 0.8556, + "step": 23510 + }, + { + "epoch": 2.16, + "learning_rate": 3.918819527443229e-05, + "loss": 0.9074, + "step": 23520 + }, + { + "epoch": 2.16, + "learning_rate": 3.918359841868162e-05, + "loss": 0.9765, + "step": 23530 + }, + { + "epoch": 2.16, + "learning_rate": 3.9179001562930956e-05, + "loss": 0.8347, + "step": 23540 + }, + { + "epoch": 2.17, + "learning_rate": 3.917440470718029e-05, + "loss": 0.7948, + "step": 23550 + }, + { + "epoch": 2.17, + "learning_rate": 3.916980785142962e-05, + "loss": 0.9687, + "step": 23560 + }, + { + "epoch": 2.17, + "learning_rate": 3.916521099567896e-05, + "loss": 0.8389, + "step": 23570 + }, + { + "epoch": 2.17, + "learning_rate": 3.9160614139928295e-05, + "loss": 0.9639, + "step": 23580 + }, + { + "epoch": 2.17, + "learning_rate": 3.9156017284177625e-05, + "loss": 0.9177, + "step": 23590 + }, + { + "epoch": 2.17, + "learning_rate": 3.915142042842696e-05, + "loss": 0.8366, + "step": 23600 + }, + { + "epoch": 2.17, + "learning_rate": 3.914682357267629e-05, + "loss": 0.8971, + "step": 23610 + }, + { + "epoch": 2.17, + "learning_rate": 3.914222671692562e-05, + "loss": 0.8247, + "step": 23620 + }, + { + "epoch": 2.17, + "learning_rate": 3.913762986117496e-05, + "loss": 0.8694, + "step": 23630 + }, + { + "epoch": 2.17, + "learning_rate": 3.9133033005424294e-05, + "loss": 1.0112, + "step": 23640 + }, + { + "epoch": 2.17, + "learning_rate": 3.9128436149673623e-05, + "loss": 0.894, + "step": 23650 + }, + { + "epoch": 2.18, + "learning_rate": 3.912383929392296e-05, + "loss": 0.8291, + "step": 23660 + }, + { + "epoch": 2.18, + "learning_rate": 3.9119242438172296e-05, + "loss": 0.9751, + "step": 23670 + }, + { + "epoch": 2.18, + "learning_rate": 3.9114645582421626e-05, + "loss": 0.9047, + "step": 23680 + }, + { + "epoch": 2.18, + "learning_rate": 3.911004872667096e-05, + "loss": 0.9514, + "step": 23690 + }, + { + "epoch": 2.18, + "learning_rate": 3.910545187092029e-05, + "loss": 1.0161, + "step": 23700 + }, + { + "epoch": 2.18, + "learning_rate": 3.910085501516962e-05, + "loss": 0.8583, + "step": 23710 + }, + { + "epoch": 2.18, + "learning_rate": 3.909625815941896e-05, + "loss": 0.9442, + "step": 23720 + }, + { + "epoch": 2.18, + "learning_rate": 3.9091661303668295e-05, + "loss": 0.9246, + "step": 23730 + }, + { + "epoch": 2.18, + "learning_rate": 3.9087064447917625e-05, + "loss": 0.982, + "step": 23740 + }, + { + "epoch": 2.18, + "learning_rate": 3.908246759216696e-05, + "loss": 0.8849, + "step": 23750 + }, + { + "epoch": 2.18, + "learning_rate": 3.90778707364163e-05, + "loss": 0.8887, + "step": 23760 + }, + { + "epoch": 2.19, + "learning_rate": 3.907327388066563e-05, + "loss": 0.8549, + "step": 23770 + }, + { + "epoch": 2.19, + "learning_rate": 3.9068677024914964e-05, + "loss": 0.9033, + "step": 23780 + }, + { + "epoch": 2.19, + "learning_rate": 3.9064080169164294e-05, + "loss": 0.8888, + "step": 23790 + }, + { + "epoch": 2.19, + "learning_rate": 3.905948331341362e-05, + "loss": 0.901, + "step": 23800 + }, + { + "epoch": 2.19, + "learning_rate": 3.905488645766296e-05, + "loss": 0.8417, + "step": 23810 + }, + { + "epoch": 2.19, + "learning_rate": 3.9050289601912296e-05, + "loss": 0.9891, + "step": 23820 + }, + { + "epoch": 2.19, + "learning_rate": 3.9045692746161626e-05, + "loss": 0.9673, + "step": 23830 + }, + { + "epoch": 2.19, + "learning_rate": 3.904109589041096e-05, + "loss": 0.885, + "step": 23840 + }, + { + "epoch": 2.19, + "learning_rate": 3.90364990346603e-05, + "loss": 0.9451, + "step": 23850 + }, + { + "epoch": 2.19, + "learning_rate": 3.903190217890963e-05, + "loss": 0.9519, + "step": 23860 + }, + { + "epoch": 2.19, + "learning_rate": 3.9027305323158965e-05, + "loss": 0.9843, + "step": 23870 + }, + { + "epoch": 2.2, + "learning_rate": 3.9022708467408295e-05, + "loss": 0.8818, + "step": 23880 + }, + { + "epoch": 2.2, + "learning_rate": 3.9018111611657624e-05, + "loss": 0.7951, + "step": 23890 + }, + { + "epoch": 2.2, + "learning_rate": 3.901351475590696e-05, + "loss": 0.8305, + "step": 23900 + }, + { + "epoch": 2.2, + "learning_rate": 3.90089179001563e-05, + "loss": 0.8784, + "step": 23910 + }, + { + "epoch": 2.2, + "learning_rate": 3.900432104440563e-05, + "loss": 0.9685, + "step": 23920 + }, + { + "epoch": 2.2, + "learning_rate": 3.8999724188654964e-05, + "loss": 0.9353, + "step": 23930 + }, + { + "epoch": 2.2, + "learning_rate": 3.899512733290429e-05, + "loss": 0.8558, + "step": 23940 + }, + { + "epoch": 2.2, + "learning_rate": 3.899053047715363e-05, + "loss": 0.9285, + "step": 23950 + }, + { + "epoch": 2.2, + "learning_rate": 3.8985933621402966e-05, + "loss": 0.8846, + "step": 23960 + }, + { + "epoch": 2.2, + "learning_rate": 3.8981336765652296e-05, + "loss": 0.9073, + "step": 23970 + }, + { + "epoch": 2.2, + "learning_rate": 3.8976739909901626e-05, + "loss": 0.9302, + "step": 23980 + }, + { + "epoch": 2.21, + "learning_rate": 3.897214305415096e-05, + "loss": 0.8722, + "step": 23990 + }, + { + "epoch": 2.21, + "learning_rate": 3.89675461984003e-05, + "loss": 0.8386, + "step": 24000 + }, + { + "epoch": 2.21, + "learning_rate": 3.896294934264963e-05, + "loss": 0.9517, + "step": 24010 + }, + { + "epoch": 2.21, + "learning_rate": 3.8958352486898965e-05, + "loss": 0.899, + "step": 24020 + }, + { + "epoch": 2.21, + "learning_rate": 3.8953755631148294e-05, + "loss": 1.007, + "step": 24030 + }, + { + "epoch": 2.21, + "learning_rate": 3.894915877539763e-05, + "loss": 0.7857, + "step": 24040 + }, + { + "epoch": 2.21, + "learning_rate": 3.894456191964697e-05, + "loss": 0.8662, + "step": 24050 + }, + { + "epoch": 2.21, + "learning_rate": 3.89399650638963e-05, + "loss": 0.8781, + "step": 24060 + }, + { + "epoch": 2.21, + "learning_rate": 3.893536820814563e-05, + "loss": 0.9055, + "step": 24070 + }, + { + "epoch": 2.21, + "learning_rate": 3.893077135239496e-05, + "loss": 0.8759, + "step": 24080 + }, + { + "epoch": 2.21, + "learning_rate": 3.89261744966443e-05, + "loss": 0.9124, + "step": 24090 + }, + { + "epoch": 2.22, + "learning_rate": 3.892157764089363e-05, + "loss": 0.8988, + "step": 24100 + }, + { + "epoch": 2.22, + "learning_rate": 3.8916980785142966e-05, + "loss": 0.9284, + "step": 24110 + }, + { + "epoch": 2.22, + "learning_rate": 3.8912383929392296e-05, + "loss": 0.9278, + "step": 24120 + }, + { + "epoch": 2.22, + "learning_rate": 3.890778707364163e-05, + "loss": 0.8549, + "step": 24130 + }, + { + "epoch": 2.22, + "learning_rate": 3.890319021789097e-05, + "loss": 0.9773, + "step": 24140 + }, + { + "epoch": 2.22, + "learning_rate": 3.88985933621403e-05, + "loss": 0.8032, + "step": 24150 + }, + { + "epoch": 2.22, + "learning_rate": 3.889399650638963e-05, + "loss": 0.7664, + "step": 24160 + }, + { + "epoch": 2.22, + "learning_rate": 3.8889399650638964e-05, + "loss": 0.8388, + "step": 24170 + }, + { + "epoch": 2.22, + "learning_rate": 3.88848027948883e-05, + "loss": 0.9958, + "step": 24180 + }, + { + "epoch": 2.22, + "learning_rate": 3.888020593913763e-05, + "loss": 0.9024, + "step": 24190 + }, + { + "epoch": 2.22, + "learning_rate": 3.887560908338697e-05, + "loss": 0.9909, + "step": 24200 + }, + { + "epoch": 2.23, + "learning_rate": 3.88710122276363e-05, + "loss": 0.9357, + "step": 24210 + }, + { + "epoch": 2.23, + "learning_rate": 3.886641537188563e-05, + "loss": 0.8767, + "step": 24220 + }, + { + "epoch": 2.23, + "learning_rate": 3.886181851613497e-05, + "loss": 0.8585, + "step": 24230 + }, + { + "epoch": 2.23, + "learning_rate": 3.88572216603843e-05, + "loss": 0.918, + "step": 24240 + }, + { + "epoch": 2.23, + "learning_rate": 3.885262480463363e-05, + "loss": 0.9013, + "step": 24250 + }, + { + "epoch": 2.23, + "learning_rate": 3.8848027948882966e-05, + "loss": 0.8776, + "step": 24260 + }, + { + "epoch": 2.23, + "learning_rate": 3.8843431093132295e-05, + "loss": 0.995, + "step": 24270 + }, + { + "epoch": 2.23, + "learning_rate": 3.883883423738163e-05, + "loss": 0.9256, + "step": 24280 + }, + { + "epoch": 2.23, + "learning_rate": 3.883423738163097e-05, + "loss": 0.8704, + "step": 24290 + }, + { + "epoch": 2.23, + "learning_rate": 3.88296405258803e-05, + "loss": 0.7554, + "step": 24300 + }, + { + "epoch": 2.23, + "learning_rate": 3.8825043670129635e-05, + "loss": 0.9684, + "step": 24310 + }, + { + "epoch": 2.24, + "learning_rate": 3.882044681437897e-05, + "loss": 0.9906, + "step": 24320 + }, + { + "epoch": 2.24, + "learning_rate": 3.88158499586283e-05, + "loss": 0.9158, + "step": 24330 + }, + { + "epoch": 2.24, + "learning_rate": 3.881125310287763e-05, + "loss": 0.857, + "step": 24340 + }, + { + "epoch": 2.24, + "learning_rate": 3.880665624712697e-05, + "loss": 0.8544, + "step": 24350 + }, + { + "epoch": 2.24, + "learning_rate": 3.8802059391376297e-05, + "loss": 0.7842, + "step": 24360 + }, + { + "epoch": 2.24, + "learning_rate": 3.879746253562563e-05, + "loss": 0.9272, + "step": 24370 + }, + { + "epoch": 2.24, + "learning_rate": 3.879286567987497e-05, + "loss": 0.8335, + "step": 24380 + }, + { + "epoch": 2.24, + "learning_rate": 3.87882688241243e-05, + "loss": 1.0488, + "step": 24390 + }, + { + "epoch": 2.24, + "learning_rate": 3.8783671968373636e-05, + "loss": 0.8766, + "step": 24400 + }, + { + "epoch": 2.24, + "learning_rate": 3.877907511262297e-05, + "loss": 0.8361, + "step": 24410 + }, + { + "epoch": 2.25, + "learning_rate": 3.87744782568723e-05, + "loss": 0.9173, + "step": 24420 + }, + { + "epoch": 2.25, + "learning_rate": 3.876988140112163e-05, + "loss": 0.9092, + "step": 24430 + }, + { + "epoch": 2.25, + "learning_rate": 3.876528454537097e-05, + "loss": 0.9601, + "step": 24440 + }, + { + "epoch": 2.25, + "learning_rate": 3.87606876896203e-05, + "loss": 0.8937, + "step": 24450 + }, + { + "epoch": 2.25, + "learning_rate": 3.8756090833869634e-05, + "loss": 0.9442, + "step": 24460 + }, + { + "epoch": 2.25, + "learning_rate": 3.875149397811897e-05, + "loss": 0.9871, + "step": 24470 + }, + { + "epoch": 2.25, + "learning_rate": 3.87468971223683e-05, + "loss": 0.8141, + "step": 24480 + }, + { + "epoch": 2.25, + "learning_rate": 3.874230026661764e-05, + "loss": 0.8113, + "step": 24490 + }, + { + "epoch": 2.25, + "learning_rate": 3.873770341086697e-05, + "loss": 0.7601, + "step": 24500 + }, + { + "epoch": 2.25, + "learning_rate": 3.87331065551163e-05, + "loss": 0.9256, + "step": 24510 + }, + { + "epoch": 2.25, + "learning_rate": 3.872850969936563e-05, + "loss": 0.8278, + "step": 24520 + }, + { + "epoch": 2.26, + "learning_rate": 3.872391284361497e-05, + "loss": 0.8762, + "step": 24530 + }, + { + "epoch": 2.26, + "learning_rate": 3.87193159878643e-05, + "loss": 0.9135, + "step": 24540 + }, + { + "epoch": 2.26, + "learning_rate": 3.8714719132113635e-05, + "loss": 0.8955, + "step": 24550 + }, + { + "epoch": 2.26, + "learning_rate": 3.871012227636297e-05, + "loss": 0.885, + "step": 24560 + }, + { + "epoch": 2.26, + "learning_rate": 3.87055254206123e-05, + "loss": 0.8196, + "step": 24570 + }, + { + "epoch": 2.26, + "learning_rate": 3.870092856486164e-05, + "loss": 0.8553, + "step": 24580 + }, + { + "epoch": 2.26, + "learning_rate": 3.8696331709110975e-05, + "loss": 0.9216, + "step": 24590 + }, + { + "epoch": 2.26, + "learning_rate": 3.86917348533603e-05, + "loss": 0.9979, + "step": 24600 + }, + { + "epoch": 2.26, + "learning_rate": 3.8687137997609634e-05, + "loss": 0.8685, + "step": 24610 + }, + { + "epoch": 2.26, + "learning_rate": 3.868254114185897e-05, + "loss": 0.9263, + "step": 24620 + }, + { + "epoch": 2.26, + "learning_rate": 3.86779442861083e-05, + "loss": 0.9687, + "step": 24630 + }, + { + "epoch": 2.27, + "learning_rate": 3.867334743035764e-05, + "loss": 0.7731, + "step": 24640 + }, + { + "epoch": 2.27, + "learning_rate": 3.866875057460697e-05, + "loss": 0.982, + "step": 24650 + }, + { + "epoch": 2.27, + "learning_rate": 3.86641537188563e-05, + "loss": 0.9328, + "step": 24660 + }, + { + "epoch": 2.27, + "learning_rate": 3.865955686310564e-05, + "loss": 0.9946, + "step": 24670 + }, + { + "epoch": 2.27, + "learning_rate": 3.8654960007354976e-05, + "loss": 0.8228, + "step": 24680 + }, + { + "epoch": 2.27, + "learning_rate": 3.86503631516043e-05, + "loss": 0.803, + "step": 24690 + }, + { + "epoch": 2.27, + "learning_rate": 3.8645766295853635e-05, + "loss": 0.8687, + "step": 24700 + }, + { + "epoch": 2.27, + "learning_rate": 3.864116944010297e-05, + "loss": 0.7943, + "step": 24710 + }, + { + "epoch": 2.27, + "learning_rate": 3.86365725843523e-05, + "loss": 0.8824, + "step": 24720 + }, + { + "epoch": 2.27, + "learning_rate": 3.863197572860164e-05, + "loss": 0.9019, + "step": 24730 + }, + { + "epoch": 2.27, + "learning_rate": 3.8627378872850974e-05, + "loss": 0.8322, + "step": 24740 + }, + { + "epoch": 2.28, + "learning_rate": 3.8622782017100304e-05, + "loss": 1.023, + "step": 24750 + }, + { + "epoch": 2.28, + "learning_rate": 3.861818516134964e-05, + "loss": 0.8711, + "step": 24760 + }, + { + "epoch": 2.28, + "learning_rate": 3.861358830559898e-05, + "loss": 0.9005, + "step": 24770 + }, + { + "epoch": 2.28, + "learning_rate": 3.86089914498483e-05, + "loss": 0.8615, + "step": 24780 + }, + { + "epoch": 2.28, + "learning_rate": 3.8604394594097636e-05, + "loss": 0.9689, + "step": 24790 + }, + { + "epoch": 2.28, + "learning_rate": 3.859979773834697e-05, + "loss": 0.773, + "step": 24800 + }, + { + "epoch": 2.28, + "learning_rate": 3.85952008825963e-05, + "loss": 0.8651, + "step": 24810 + }, + { + "epoch": 2.28, + "learning_rate": 3.859060402684564e-05, + "loss": 0.8595, + "step": 24820 + }, + { + "epoch": 2.28, + "learning_rate": 3.8586007171094976e-05, + "loss": 0.9356, + "step": 24830 + }, + { + "epoch": 2.28, + "learning_rate": 3.8581410315344305e-05, + "loss": 0.9088, + "step": 24840 + }, + { + "epoch": 2.28, + "learning_rate": 3.857681345959364e-05, + "loss": 0.8301, + "step": 24850 + }, + { + "epoch": 2.29, + "learning_rate": 3.857221660384298e-05, + "loss": 0.9789, + "step": 24860 + }, + { + "epoch": 2.29, + "learning_rate": 3.85676197480923e-05, + "loss": 0.7929, + "step": 24870 + }, + { + "epoch": 2.29, + "learning_rate": 3.856302289234164e-05, + "loss": 0.8646, + "step": 24880 + }, + { + "epoch": 2.29, + "learning_rate": 3.8558426036590974e-05, + "loss": 0.8542, + "step": 24890 + }, + { + "epoch": 2.29, + "learning_rate": 3.8553829180840304e-05, + "loss": 0.8829, + "step": 24900 + }, + { + "epoch": 2.29, + "learning_rate": 3.854923232508964e-05, + "loss": 0.8957, + "step": 24910 + }, + { + "epoch": 2.29, + "learning_rate": 3.854463546933898e-05, + "loss": 0.7552, + "step": 24920 + }, + { + "epoch": 2.29, + "learning_rate": 3.8540038613588306e-05, + "loss": 0.8169, + "step": 24930 + }, + { + "epoch": 2.29, + "learning_rate": 3.853544175783764e-05, + "loss": 0.8061, + "step": 24940 + }, + { + "epoch": 2.29, + "learning_rate": 3.853084490208698e-05, + "loss": 0.848, + "step": 24950 + }, + { + "epoch": 2.29, + "learning_rate": 3.85262480463363e-05, + "loss": 1.103, + "step": 24960 + }, + { + "epoch": 2.3, + "learning_rate": 3.852165119058564e-05, + "loss": 0.9931, + "step": 24970 + }, + { + "epoch": 2.3, + "learning_rate": 3.8517054334834975e-05, + "loss": 0.8699, + "step": 24980 + }, + { + "epoch": 2.3, + "learning_rate": 3.8512457479084305e-05, + "loss": 0.7692, + "step": 24990 + }, + { + "epoch": 2.3, + "learning_rate": 3.850786062333364e-05, + "loss": 0.8503, + "step": 25000 + }, + { + "epoch": 2.3, + "eval_accuracy": 0.5792576419213974, + "eval_loss": 0.8908177614212036, + "eval_runtime": 159.4739, + "eval_samples_per_second": 28.719, + "eval_steps_per_second": 3.593, + "step": 25000 + }, + { + "epoch": 2.3, + "learning_rate": 3.850326376758298e-05, + "loss": 0.9654, + "step": 25010 + }, + { + "epoch": 2.3, + "learning_rate": 3.849866691183231e-05, + "loss": 0.9587, + "step": 25020 + }, + { + "epoch": 2.3, + "learning_rate": 3.8494070056081644e-05, + "loss": 0.8202, + "step": 25030 + }, + { + "epoch": 2.3, + "learning_rate": 3.848947320033098e-05, + "loss": 0.9115, + "step": 25040 + }, + { + "epoch": 2.3, + "learning_rate": 3.8484876344580303e-05, + "loss": 0.9631, + "step": 25050 + }, + { + "epoch": 2.3, + "learning_rate": 3.848027948882964e-05, + "loss": 0.9608, + "step": 25060 + }, + { + "epoch": 2.3, + "learning_rate": 3.8475682633078976e-05, + "loss": 0.8807, + "step": 25070 + }, + { + "epoch": 2.31, + "learning_rate": 3.8471085777328306e-05, + "loss": 0.8783, + "step": 25080 + }, + { + "epoch": 2.31, + "learning_rate": 3.846648892157764e-05, + "loss": 0.8961, + "step": 25090 + }, + { + "epoch": 2.31, + "learning_rate": 3.846189206582698e-05, + "loss": 0.9109, + "step": 25100 + }, + { + "epoch": 2.31, + "learning_rate": 3.845729521007631e-05, + "loss": 1.0039, + "step": 25110 + }, + { + "epoch": 2.31, + "learning_rate": 3.8452698354325645e-05, + "loss": 0.8841, + "step": 25120 + }, + { + "epoch": 2.31, + "learning_rate": 3.844810149857498e-05, + "loss": 0.7627, + "step": 25130 + }, + { + "epoch": 2.31, + "learning_rate": 3.8443504642824305e-05, + "loss": 0.9236, + "step": 25140 + }, + { + "epoch": 2.31, + "learning_rate": 3.843890778707364e-05, + "loss": 0.8369, + "step": 25150 + }, + { + "epoch": 2.31, + "learning_rate": 3.843431093132298e-05, + "loss": 0.9142, + "step": 25160 + }, + { + "epoch": 2.31, + "learning_rate": 3.842971407557231e-05, + "loss": 0.9696, + "step": 25170 + }, + { + "epoch": 2.31, + "learning_rate": 3.8425117219821644e-05, + "loss": 0.9228, + "step": 25180 + }, + { + "epoch": 2.32, + "learning_rate": 3.842052036407098e-05, + "loss": 0.9158, + "step": 25190 + }, + { + "epoch": 2.32, + "learning_rate": 3.841592350832031e-05, + "loss": 0.9274, + "step": 25200 + }, + { + "epoch": 2.32, + "learning_rate": 3.8411326652569646e-05, + "loss": 0.9855, + "step": 25210 + }, + { + "epoch": 2.32, + "learning_rate": 3.840672979681898e-05, + "loss": 0.8285, + "step": 25220 + }, + { + "epoch": 2.32, + "learning_rate": 3.8402132941068306e-05, + "loss": 0.8533, + "step": 25230 + }, + { + "epoch": 2.32, + "learning_rate": 3.839753608531764e-05, + "loss": 0.9389, + "step": 25240 + }, + { + "epoch": 2.32, + "learning_rate": 3.839293922956698e-05, + "loss": 0.9925, + "step": 25250 + }, + { + "epoch": 2.32, + "learning_rate": 3.838834237381631e-05, + "loss": 0.8445, + "step": 25260 + }, + { + "epoch": 2.32, + "learning_rate": 3.8383745518065645e-05, + "loss": 0.9331, + "step": 25270 + }, + { + "epoch": 2.32, + "learning_rate": 3.837914866231498e-05, + "loss": 0.9052, + "step": 25280 + }, + { + "epoch": 2.33, + "learning_rate": 3.837455180656431e-05, + "loss": 0.942, + "step": 25290 + }, + { + "epoch": 2.33, + "learning_rate": 3.836995495081365e-05, + "loss": 0.9063, + "step": 25300 + }, + { + "epoch": 2.33, + "learning_rate": 3.8365358095062984e-05, + "loss": 0.8782, + "step": 25310 + }, + { + "epoch": 2.33, + "learning_rate": 3.836076123931231e-05, + "loss": 0.9746, + "step": 25320 + }, + { + "epoch": 2.33, + "learning_rate": 3.8356164383561644e-05, + "loss": 0.8441, + "step": 25330 + }, + { + "epoch": 2.33, + "learning_rate": 3.835156752781098e-05, + "loss": 0.8447, + "step": 25340 + }, + { + "epoch": 2.33, + "learning_rate": 3.834697067206031e-05, + "loss": 0.9252, + "step": 25350 + }, + { + "epoch": 2.33, + "learning_rate": 3.8342373816309646e-05, + "loss": 0.9182, + "step": 25360 + }, + { + "epoch": 2.33, + "learning_rate": 3.833777696055898e-05, + "loss": 0.9666, + "step": 25370 + }, + { + "epoch": 2.33, + "learning_rate": 3.833318010480831e-05, + "loss": 0.8598, + "step": 25380 + }, + { + "epoch": 2.33, + "learning_rate": 3.832858324905765e-05, + "loss": 0.9655, + "step": 25390 + }, + { + "epoch": 2.34, + "learning_rate": 3.8323986393306985e-05, + "loss": 0.9336, + "step": 25400 + }, + { + "epoch": 2.34, + "learning_rate": 3.831938953755631e-05, + "loss": 0.9618, + "step": 25410 + }, + { + "epoch": 2.34, + "learning_rate": 3.8314792681805645e-05, + "loss": 0.9459, + "step": 25420 + }, + { + "epoch": 2.34, + "learning_rate": 3.831019582605498e-05, + "loss": 0.8521, + "step": 25430 + }, + { + "epoch": 2.34, + "learning_rate": 3.830559897030431e-05, + "loss": 0.8881, + "step": 25440 + }, + { + "epoch": 2.34, + "learning_rate": 3.830100211455365e-05, + "loss": 0.9562, + "step": 25450 + }, + { + "epoch": 2.34, + "learning_rate": 3.8296405258802984e-05, + "loss": 0.9136, + "step": 25460 + }, + { + "epoch": 2.34, + "learning_rate": 3.8291808403052314e-05, + "loss": 0.9303, + "step": 25470 + }, + { + "epoch": 2.34, + "learning_rate": 3.828721154730165e-05, + "loss": 0.9289, + "step": 25480 + }, + { + "epoch": 2.34, + "learning_rate": 3.828261469155098e-05, + "loss": 0.8456, + "step": 25490 + }, + { + "epoch": 2.34, + "learning_rate": 3.8278017835800316e-05, + "loss": 0.9169, + "step": 25500 + }, + { + "epoch": 2.35, + "learning_rate": 3.8273420980049646e-05, + "loss": 0.8947, + "step": 25510 + }, + { + "epoch": 2.35, + "learning_rate": 3.826882412429898e-05, + "loss": 0.8928, + "step": 25520 + }, + { + "epoch": 2.35, + "learning_rate": 3.826422726854831e-05, + "loss": 0.8957, + "step": 25530 + }, + { + "epoch": 2.35, + "learning_rate": 3.825963041279765e-05, + "loss": 0.8719, + "step": 25540 + }, + { + "epoch": 2.35, + "learning_rate": 3.8255033557046985e-05, + "loss": 0.9087, + "step": 25550 + }, + { + "epoch": 2.35, + "learning_rate": 3.8250436701296315e-05, + "loss": 0.8619, + "step": 25560 + }, + { + "epoch": 2.35, + "learning_rate": 3.824583984554565e-05, + "loss": 0.8988, + "step": 25570 + }, + { + "epoch": 2.35, + "learning_rate": 3.824124298979498e-05, + "loss": 1.0177, + "step": 25580 + }, + { + "epoch": 2.35, + "learning_rate": 3.823664613404432e-05, + "loss": 0.9281, + "step": 25590 + }, + { + "epoch": 2.35, + "learning_rate": 3.823204927829365e-05, + "loss": 0.8278, + "step": 25600 + }, + { + "epoch": 2.35, + "learning_rate": 3.8227452422542984e-05, + "loss": 0.9111, + "step": 25610 + }, + { + "epoch": 2.36, + "learning_rate": 3.822285556679231e-05, + "loss": 1.0114, + "step": 25620 + }, + { + "epoch": 2.36, + "learning_rate": 3.821825871104165e-05, + "loss": 0.8224, + "step": 25630 + }, + { + "epoch": 2.36, + "learning_rate": 3.8213661855290986e-05, + "loss": 0.9483, + "step": 25640 + }, + { + "epoch": 2.36, + "learning_rate": 3.8209064999540316e-05, + "loss": 0.843, + "step": 25650 + }, + { + "epoch": 2.36, + "learning_rate": 3.820446814378965e-05, + "loss": 0.8577, + "step": 25660 + }, + { + "epoch": 2.36, + "learning_rate": 3.819987128803898e-05, + "loss": 0.9312, + "step": 25670 + }, + { + "epoch": 2.36, + "learning_rate": 3.819527443228832e-05, + "loss": 0.9681, + "step": 25680 + }, + { + "epoch": 2.36, + "learning_rate": 3.819067757653765e-05, + "loss": 0.9021, + "step": 25690 + }, + { + "epoch": 2.36, + "learning_rate": 3.8186080720786985e-05, + "loss": 0.8908, + "step": 25700 + }, + { + "epoch": 2.36, + "learning_rate": 3.8181483865036314e-05, + "loss": 0.9238, + "step": 25710 + }, + { + "epoch": 2.36, + "learning_rate": 3.817688700928565e-05, + "loss": 0.9089, + "step": 25720 + }, + { + "epoch": 2.37, + "learning_rate": 3.817229015353499e-05, + "loss": 0.8789, + "step": 25730 + }, + { + "epoch": 2.37, + "learning_rate": 3.816769329778432e-05, + "loss": 0.8758, + "step": 25740 + }, + { + "epoch": 2.37, + "learning_rate": 3.8163096442033654e-05, + "loss": 0.8979, + "step": 25750 + }, + { + "epoch": 2.37, + "learning_rate": 3.815849958628298e-05, + "loss": 0.8727, + "step": 25760 + }, + { + "epoch": 2.37, + "learning_rate": 3.815390273053232e-05, + "loss": 0.8531, + "step": 25770 + }, + { + "epoch": 2.37, + "learning_rate": 3.814930587478165e-05, + "loss": 0.9329, + "step": 25780 + }, + { + "epoch": 2.37, + "learning_rate": 3.8144709019030986e-05, + "loss": 0.9225, + "step": 25790 + }, + { + "epoch": 2.37, + "learning_rate": 3.8140112163280316e-05, + "loss": 0.8935, + "step": 25800 + }, + { + "epoch": 2.37, + "learning_rate": 3.813551530752965e-05, + "loss": 0.8449, + "step": 25810 + }, + { + "epoch": 2.37, + "learning_rate": 3.813091845177898e-05, + "loss": 0.872, + "step": 25820 + }, + { + "epoch": 2.37, + "learning_rate": 3.812632159602832e-05, + "loss": 0.8843, + "step": 25830 + }, + { + "epoch": 2.38, + "learning_rate": 3.8121724740277655e-05, + "loss": 0.8932, + "step": 25840 + }, + { + "epoch": 2.38, + "learning_rate": 3.8117127884526985e-05, + "loss": 0.9675, + "step": 25850 + }, + { + "epoch": 2.38, + "learning_rate": 3.811253102877632e-05, + "loss": 0.9176, + "step": 25860 + }, + { + "epoch": 2.38, + "learning_rate": 3.810793417302565e-05, + "loss": 0.8327, + "step": 25870 + }, + { + "epoch": 2.38, + "learning_rate": 3.810333731727499e-05, + "loss": 0.7184, + "step": 25880 + }, + { + "epoch": 2.38, + "learning_rate": 3.809874046152432e-05, + "loss": 0.8812, + "step": 25890 + }, + { + "epoch": 2.38, + "learning_rate": 3.809414360577365e-05, + "loss": 0.9274, + "step": 25900 + }, + { + "epoch": 2.38, + "learning_rate": 3.808954675002298e-05, + "loss": 0.9736, + "step": 25910 + }, + { + "epoch": 2.38, + "learning_rate": 3.808494989427232e-05, + "loss": 0.7779, + "step": 25920 + }, + { + "epoch": 2.38, + "learning_rate": 3.8080353038521656e-05, + "loss": 0.8476, + "step": 25930 + }, + { + "epoch": 2.38, + "learning_rate": 3.8075756182770986e-05, + "loss": 0.7943, + "step": 25940 + }, + { + "epoch": 2.39, + "learning_rate": 3.807115932702032e-05, + "loss": 0.8394, + "step": 25950 + }, + { + "epoch": 2.39, + "learning_rate": 3.806656247126965e-05, + "loss": 0.929, + "step": 25960 + }, + { + "epoch": 2.39, + "learning_rate": 3.806196561551899e-05, + "loss": 0.9544, + "step": 25970 + }, + { + "epoch": 2.39, + "learning_rate": 3.805736875976832e-05, + "loss": 0.8877, + "step": 25980 + }, + { + "epoch": 2.39, + "learning_rate": 3.8052771904017655e-05, + "loss": 0.8815, + "step": 25990 + }, + { + "epoch": 2.39, + "learning_rate": 3.8048175048266984e-05, + "loss": 0.863, + "step": 26000 + }, + { + "epoch": 2.39, + "learning_rate": 3.804357819251632e-05, + "loss": 0.8947, + "step": 26010 + }, + { + "epoch": 2.39, + "learning_rate": 3.803898133676566e-05, + "loss": 0.8145, + "step": 26020 + }, + { + "epoch": 2.39, + "learning_rate": 3.803438448101499e-05, + "loss": 0.7507, + "step": 26030 + }, + { + "epoch": 2.39, + "learning_rate": 3.8029787625264323e-05, + "loss": 0.9085, + "step": 26040 + }, + { + "epoch": 2.39, + "learning_rate": 3.802519076951365e-05, + "loss": 0.9477, + "step": 26050 + }, + { + "epoch": 2.4, + "learning_rate": 3.802059391376299e-05, + "loss": 0.9355, + "step": 26060 + }, + { + "epoch": 2.4, + "learning_rate": 3.801599705801232e-05, + "loss": 0.9086, + "step": 26070 + }, + { + "epoch": 2.4, + "learning_rate": 3.8011400202261656e-05, + "loss": 0.9342, + "step": 26080 + }, + { + "epoch": 2.4, + "learning_rate": 3.8006803346510985e-05, + "loss": 0.8491, + "step": 26090 + }, + { + "epoch": 2.4, + "learning_rate": 3.800220649076032e-05, + "loss": 0.8328, + "step": 26100 + }, + { + "epoch": 2.4, + "learning_rate": 3.799760963500966e-05, + "loss": 0.9098, + "step": 26110 + }, + { + "epoch": 2.4, + "learning_rate": 3.799301277925899e-05, + "loss": 0.8654, + "step": 26120 + }, + { + "epoch": 2.4, + "learning_rate": 3.7988415923508325e-05, + "loss": 0.963, + "step": 26130 + }, + { + "epoch": 2.4, + "learning_rate": 3.7983819067757654e-05, + "loss": 0.9352, + "step": 26140 + }, + { + "epoch": 2.4, + "learning_rate": 3.7979222212006984e-05, + "loss": 0.8288, + "step": 26150 + }, + { + "epoch": 2.41, + "learning_rate": 3.797462535625632e-05, + "loss": 0.8363, + "step": 26160 + }, + { + "epoch": 2.41, + "learning_rate": 3.797002850050566e-05, + "loss": 0.8919, + "step": 26170 + }, + { + "epoch": 2.41, + "learning_rate": 3.796543164475499e-05, + "loss": 0.8984, + "step": 26180 + }, + { + "epoch": 2.41, + "learning_rate": 3.796083478900432e-05, + "loss": 0.8543, + "step": 26190 + }, + { + "epoch": 2.41, + "learning_rate": 3.795623793325366e-05, + "loss": 0.826, + "step": 26200 + }, + { + "epoch": 2.41, + "learning_rate": 3.795164107750299e-05, + "loss": 0.8601, + "step": 26210 + }, + { + "epoch": 2.41, + "learning_rate": 3.7947044221752326e-05, + "loss": 0.8988, + "step": 26220 + }, + { + "epoch": 2.41, + "learning_rate": 3.7942447366001655e-05, + "loss": 0.872, + "step": 26230 + }, + { + "epoch": 2.41, + "learning_rate": 3.7937850510250985e-05, + "loss": 0.9495, + "step": 26240 + }, + { + "epoch": 2.41, + "learning_rate": 3.793325365450032e-05, + "loss": 0.755, + "step": 26250 + }, + { + "epoch": 2.41, + "learning_rate": 3.792865679874966e-05, + "loss": 0.8854, + "step": 26260 + }, + { + "epoch": 2.42, + "learning_rate": 3.792405994299899e-05, + "loss": 0.9248, + "step": 26270 + }, + { + "epoch": 2.42, + "learning_rate": 3.7919463087248324e-05, + "loss": 0.9085, + "step": 26280 + }, + { + "epoch": 2.42, + "learning_rate": 3.791486623149766e-05, + "loss": 0.857, + "step": 26290 + }, + { + "epoch": 2.42, + "learning_rate": 3.791026937574699e-05, + "loss": 0.8577, + "step": 26300 + }, + { + "epoch": 2.42, + "learning_rate": 3.790567251999633e-05, + "loss": 0.82, + "step": 26310 + }, + { + "epoch": 2.42, + "learning_rate": 3.790107566424566e-05, + "loss": 0.9741, + "step": 26320 + }, + { + "epoch": 2.42, + "learning_rate": 3.7896478808494986e-05, + "loss": 0.9818, + "step": 26330 + }, + { + "epoch": 2.42, + "learning_rate": 3.789188195274432e-05, + "loss": 0.8748, + "step": 26340 + }, + { + "epoch": 2.42, + "learning_rate": 3.788728509699366e-05, + "loss": 0.8005, + "step": 26350 + }, + { + "epoch": 2.42, + "learning_rate": 3.788268824124299e-05, + "loss": 1.0986, + "step": 26360 + }, + { + "epoch": 2.42, + "learning_rate": 3.7878091385492326e-05, + "loss": 0.943, + "step": 26370 + }, + { + "epoch": 2.43, + "learning_rate": 3.787349452974166e-05, + "loss": 0.944, + "step": 26380 + }, + { + "epoch": 2.43, + "learning_rate": 3.786889767399099e-05, + "loss": 0.8353, + "step": 26390 + }, + { + "epoch": 2.43, + "learning_rate": 3.786430081824033e-05, + "loss": 0.926, + "step": 26400 + }, + { + "epoch": 2.43, + "learning_rate": 3.785970396248966e-05, + "loss": 0.8966, + "step": 26410 + }, + { + "epoch": 2.43, + "learning_rate": 3.785510710673899e-05, + "loss": 0.916, + "step": 26420 + }, + { + "epoch": 2.43, + "learning_rate": 3.7850510250988324e-05, + "loss": 0.8482, + "step": 26430 + }, + { + "epoch": 2.43, + "learning_rate": 3.784591339523766e-05, + "loss": 0.9887, + "step": 26440 + }, + { + "epoch": 2.43, + "learning_rate": 3.784131653948699e-05, + "loss": 0.8427, + "step": 26450 + }, + { + "epoch": 2.43, + "learning_rate": 3.783671968373633e-05, + "loss": 0.7894, + "step": 26460 + }, + { + "epoch": 2.43, + "learning_rate": 3.783212282798566e-05, + "loss": 0.8908, + "step": 26470 + }, + { + "epoch": 2.43, + "learning_rate": 3.782752597223499e-05, + "loss": 0.9063, + "step": 26480 + }, + { + "epoch": 2.44, + "learning_rate": 3.782292911648433e-05, + "loss": 0.9582, + "step": 26490 + }, + { + "epoch": 2.44, + "learning_rate": 3.781833226073366e-05, + "loss": 0.8678, + "step": 26500 + }, + { + "epoch": 2.44, + "learning_rate": 3.781373540498299e-05, + "loss": 0.894, + "step": 26510 + }, + { + "epoch": 2.44, + "learning_rate": 3.7809138549232325e-05, + "loss": 0.8668, + "step": 26520 + }, + { + "epoch": 2.44, + "learning_rate": 3.780454169348166e-05, + "loss": 0.8818, + "step": 26530 + }, + { + "epoch": 2.44, + "learning_rate": 3.779994483773099e-05, + "loss": 1.0279, + "step": 26540 + }, + { + "epoch": 2.44, + "learning_rate": 3.779534798198033e-05, + "loss": 0.8412, + "step": 26550 + }, + { + "epoch": 2.44, + "learning_rate": 3.7790751126229664e-05, + "loss": 0.8871, + "step": 26560 + }, + { + "epoch": 2.44, + "learning_rate": 3.7786154270478994e-05, + "loss": 0.8672, + "step": 26570 + }, + { + "epoch": 2.44, + "learning_rate": 3.778155741472833e-05, + "loss": 1.028, + "step": 26580 + }, + { + "epoch": 2.44, + "learning_rate": 3.777696055897766e-05, + "loss": 0.9084, + "step": 26590 + }, + { + "epoch": 2.45, + "learning_rate": 3.777236370322699e-05, + "loss": 0.9213, + "step": 26600 + }, + { + "epoch": 2.45, + "learning_rate": 3.7767766847476326e-05, + "loss": 0.8328, + "step": 26610 + }, + { + "epoch": 2.45, + "learning_rate": 3.776316999172566e-05, + "loss": 0.8486, + "step": 26620 + }, + { + "epoch": 2.45, + "learning_rate": 3.775857313597499e-05, + "loss": 0.9819, + "step": 26630 + }, + { + "epoch": 2.45, + "learning_rate": 3.775397628022433e-05, + "loss": 0.917, + "step": 26640 + }, + { + "epoch": 2.45, + "learning_rate": 3.7749379424473666e-05, + "loss": 0.8081, + "step": 26650 + }, + { + "epoch": 2.45, + "learning_rate": 3.7744782568722995e-05, + "loss": 0.9475, + "step": 26660 + }, + { + "epoch": 2.45, + "learning_rate": 3.774018571297233e-05, + "loss": 0.9775, + "step": 26670 + }, + { + "epoch": 2.45, + "learning_rate": 3.773558885722166e-05, + "loss": 0.951, + "step": 26680 + }, + { + "epoch": 2.45, + "learning_rate": 3.773099200147099e-05, + "loss": 1.0335, + "step": 26690 + }, + { + "epoch": 2.45, + "learning_rate": 3.772639514572033e-05, + "loss": 0.9145, + "step": 26700 + }, + { + "epoch": 2.46, + "learning_rate": 3.7721798289969664e-05, + "loss": 0.8213, + "step": 26710 + }, + { + "epoch": 2.46, + "learning_rate": 3.7717201434218994e-05, + "loss": 0.7802, + "step": 26720 + }, + { + "epoch": 2.46, + "learning_rate": 3.771260457846833e-05, + "loss": 0.804, + "step": 26730 + }, + { + "epoch": 2.46, + "learning_rate": 3.770800772271767e-05, + "loss": 0.8397, + "step": 26740 + }, + { + "epoch": 2.46, + "learning_rate": 3.7703410866966996e-05, + "loss": 1.0635, + "step": 26750 + }, + { + "epoch": 2.46, + "learning_rate": 3.769881401121633e-05, + "loss": 0.8265, + "step": 26760 + }, + { + "epoch": 2.46, + "learning_rate": 3.769421715546566e-05, + "loss": 0.8071, + "step": 26770 + }, + { + "epoch": 2.46, + "learning_rate": 3.768962029971499e-05, + "loss": 0.8567, + "step": 26780 + }, + { + "epoch": 2.46, + "learning_rate": 3.768502344396433e-05, + "loss": 0.855, + "step": 26790 + }, + { + "epoch": 2.46, + "learning_rate": 3.7680426588213665e-05, + "loss": 0.8997, + "step": 26800 + }, + { + "epoch": 2.46, + "learning_rate": 3.7675829732462995e-05, + "loss": 0.9941, + "step": 26810 + }, + { + "epoch": 2.47, + "learning_rate": 3.767123287671233e-05, + "loss": 0.942, + "step": 26820 + }, + { + "epoch": 2.47, + "learning_rate": 3.766663602096167e-05, + "loss": 0.8279, + "step": 26830 + }, + { + "epoch": 2.47, + "learning_rate": 3.7662039165211e-05, + "loss": 0.9287, + "step": 26840 + }, + { + "epoch": 2.47, + "learning_rate": 3.7657442309460334e-05, + "loss": 0.8933, + "step": 26850 + }, + { + "epoch": 2.47, + "learning_rate": 3.7652845453709664e-05, + "loss": 0.9158, + "step": 26860 + }, + { + "epoch": 2.47, + "learning_rate": 3.7648248597958994e-05, + "loss": 0.8783, + "step": 26870 + }, + { + "epoch": 2.47, + "learning_rate": 3.764365174220833e-05, + "loss": 0.8066, + "step": 26880 + }, + { + "epoch": 2.47, + "learning_rate": 3.7639054886457667e-05, + "loss": 0.9138, + "step": 26890 + }, + { + "epoch": 2.47, + "learning_rate": 3.7634458030706996e-05, + "loss": 0.8011, + "step": 26900 + }, + { + "epoch": 2.47, + "learning_rate": 3.762986117495633e-05, + "loss": 0.7589, + "step": 26910 + }, + { + "epoch": 2.47, + "learning_rate": 3.762526431920567e-05, + "loss": 0.8348, + "step": 26920 + }, + { + "epoch": 2.48, + "learning_rate": 3.7620667463455e-05, + "loss": 0.9179, + "step": 26930 + }, + { + "epoch": 2.48, + "learning_rate": 3.7616070607704335e-05, + "loss": 0.8251, + "step": 26940 + }, + { + "epoch": 2.48, + "learning_rate": 3.7611473751953665e-05, + "loss": 0.8711, + "step": 26950 + }, + { + "epoch": 2.48, + "learning_rate": 3.7606876896202995e-05, + "loss": 0.7834, + "step": 26960 + }, + { + "epoch": 2.48, + "learning_rate": 3.760228004045233e-05, + "loss": 0.8196, + "step": 26970 + }, + { + "epoch": 2.48, + "learning_rate": 3.759768318470167e-05, + "loss": 0.8706, + "step": 26980 + }, + { + "epoch": 2.48, + "learning_rate": 3.7593086328951e-05, + "loss": 1.0458, + "step": 26990 + }, + { + "epoch": 2.48, + "learning_rate": 3.7588489473200334e-05, + "loss": 0.8727, + "step": 27000 + }, + { + "epoch": 2.48, + "learning_rate": 3.758389261744967e-05, + "loss": 0.9774, + "step": 27010 + }, + { + "epoch": 2.48, + "learning_rate": 3.7579295761699e-05, + "loss": 0.909, + "step": 27020 + }, + { + "epoch": 2.49, + "learning_rate": 3.7574698905948337e-05, + "loss": 0.8201, + "step": 27030 + }, + { + "epoch": 2.49, + "learning_rate": 3.7570102050197666e-05, + "loss": 0.8686, + "step": 27040 + }, + { + "epoch": 2.49, + "learning_rate": 3.7565505194446996e-05, + "loss": 0.8181, + "step": 27050 + }, + { + "epoch": 2.49, + "learning_rate": 3.756090833869633e-05, + "loss": 0.9345, + "step": 27060 + }, + { + "epoch": 2.49, + "learning_rate": 3.755631148294567e-05, + "loss": 0.831, + "step": 27070 + }, + { + "epoch": 2.49, + "learning_rate": 3.7551714627195e-05, + "loss": 0.8622, + "step": 27080 + }, + { + "epoch": 2.49, + "learning_rate": 3.7547117771444335e-05, + "loss": 0.985, + "step": 27090 + }, + { + "epoch": 2.49, + "learning_rate": 3.754252091569367e-05, + "loss": 0.865, + "step": 27100 + }, + { + "epoch": 2.49, + "learning_rate": 3.7537924059943e-05, + "loss": 0.8906, + "step": 27110 + }, + { + "epoch": 2.49, + "learning_rate": 3.753332720419234e-05, + "loss": 0.8691, + "step": 27120 + }, + { + "epoch": 2.49, + "learning_rate": 3.752873034844167e-05, + "loss": 1.1329, + "step": 27130 + }, + { + "epoch": 2.5, + "learning_rate": 3.7524133492691e-05, + "loss": 0.9287, + "step": 27140 + }, + { + "epoch": 2.5, + "learning_rate": 3.7519536636940334e-05, + "loss": 0.8633, + "step": 27150 + }, + { + "epoch": 2.5, + "learning_rate": 3.751493978118967e-05, + "loss": 0.9947, + "step": 27160 + }, + { + "epoch": 2.5, + "learning_rate": 3.7510342925439e-05, + "loss": 0.8826, + "step": 27170 + }, + { + "epoch": 2.5, + "learning_rate": 3.7505746069688336e-05, + "loss": 0.8685, + "step": 27180 + }, + { + "epoch": 2.5, + "learning_rate": 3.750114921393767e-05, + "loss": 0.8679, + "step": 27190 + }, + { + "epoch": 2.5, + "learning_rate": 3.7496552358187e-05, + "loss": 0.8681, + "step": 27200 + }, + { + "epoch": 2.5, + "learning_rate": 3.749195550243634e-05, + "loss": 0.8128, + "step": 27210 + }, + { + "epoch": 2.5, + "learning_rate": 3.748735864668567e-05, + "loss": 0.8737, + "step": 27220 + }, + { + "epoch": 2.5, + "learning_rate": 3.7482761790935e-05, + "loss": 0.8459, + "step": 27230 + }, + { + "epoch": 2.5, + "learning_rate": 3.7478164935184335e-05, + "loss": 0.8685, + "step": 27240 + }, + { + "epoch": 2.51, + "learning_rate": 3.747356807943367e-05, + "loss": 0.8499, + "step": 27250 + }, + { + "epoch": 2.51, + "learning_rate": 3.7468971223683e-05, + "loss": 0.8303, + "step": 27260 + }, + { + "epoch": 2.51, + "learning_rate": 3.746437436793234e-05, + "loss": 1.0022, + "step": 27270 + }, + { + "epoch": 2.51, + "learning_rate": 3.7459777512181674e-05, + "loss": 0.8221, + "step": 27280 + }, + { + "epoch": 2.51, + "learning_rate": 3.7455180656431004e-05, + "loss": 0.8406, + "step": 27290 + }, + { + "epoch": 2.51, + "learning_rate": 3.745058380068034e-05, + "loss": 0.9893, + "step": 27300 + }, + { + "epoch": 2.51, + "learning_rate": 3.744598694492967e-05, + "loss": 0.8333, + "step": 27310 + }, + { + "epoch": 2.51, + "learning_rate": 3.7441390089179e-05, + "loss": 1.061, + "step": 27320 + }, + { + "epoch": 2.51, + "learning_rate": 3.7436793233428336e-05, + "loss": 0.8943, + "step": 27330 + }, + { + "epoch": 2.51, + "learning_rate": 3.743219637767767e-05, + "loss": 0.9187, + "step": 27340 + }, + { + "epoch": 2.51, + "learning_rate": 3.7427599521927e-05, + "loss": 0.8354, + "step": 27350 + }, + { + "epoch": 2.52, + "learning_rate": 3.742300266617634e-05, + "loss": 0.9001, + "step": 27360 + }, + { + "epoch": 2.52, + "learning_rate": 3.7418405810425675e-05, + "loss": 0.8514, + "step": 27370 + }, + { + "epoch": 2.52, + "learning_rate": 3.7413808954675005e-05, + "loss": 1.0734, + "step": 27380 + }, + { + "epoch": 2.52, + "learning_rate": 3.740921209892434e-05, + "loss": 0.9236, + "step": 27390 + }, + { + "epoch": 2.52, + "learning_rate": 3.740461524317367e-05, + "loss": 0.9317, + "step": 27400 + }, + { + "epoch": 2.52, + "learning_rate": 3.7400018387423e-05, + "loss": 0.9766, + "step": 27410 + }, + { + "epoch": 2.52, + "learning_rate": 3.739542153167234e-05, + "loss": 0.8304, + "step": 27420 + }, + { + "epoch": 2.52, + "learning_rate": 3.7390824675921674e-05, + "loss": 0.9154, + "step": 27430 + }, + { + "epoch": 2.52, + "learning_rate": 3.7386227820171003e-05, + "loss": 0.8369, + "step": 27440 + }, + { + "epoch": 2.52, + "learning_rate": 3.738163096442034e-05, + "loss": 0.8982, + "step": 27450 + }, + { + "epoch": 2.52, + "learning_rate": 3.737703410866967e-05, + "loss": 0.9155, + "step": 27460 + }, + { + "epoch": 2.53, + "learning_rate": 3.7372437252919006e-05, + "loss": 0.7979, + "step": 27470 + }, + { + "epoch": 2.53, + "learning_rate": 3.736784039716834e-05, + "loss": 0.7933, + "step": 27480 + }, + { + "epoch": 2.53, + "learning_rate": 3.736324354141767e-05, + "loss": 0.924, + "step": 27490 + }, + { + "epoch": 2.53, + "learning_rate": 3.7358646685667e-05, + "loss": 0.9322, + "step": 27500 + }, + { + "epoch": 2.53, + "learning_rate": 3.735404982991634e-05, + "loss": 0.9128, + "step": 27510 + }, + { + "epoch": 2.53, + "learning_rate": 3.7349452974165675e-05, + "loss": 0.8975, + "step": 27520 + }, + { + "epoch": 2.53, + "learning_rate": 3.7344856118415005e-05, + "loss": 0.9852, + "step": 27530 + }, + { + "epoch": 2.53, + "learning_rate": 3.734025926266434e-05, + "loss": 0.9012, + "step": 27540 + }, + { + "epoch": 2.53, + "learning_rate": 3.733566240691367e-05, + "loss": 0.8169, + "step": 27550 + }, + { + "epoch": 2.53, + "learning_rate": 3.733106555116301e-05, + "loss": 0.9136, + "step": 27560 + }, + { + "epoch": 2.53, + "learning_rate": 3.7326468695412344e-05, + "loss": 0.8999, + "step": 27570 + }, + { + "epoch": 2.54, + "learning_rate": 3.7321871839661673e-05, + "loss": 0.9199, + "step": 27580 + }, + { + "epoch": 2.54, + "learning_rate": 3.7317274983911e-05, + "loss": 0.9763, + "step": 27590 + }, + { + "epoch": 2.54, + "learning_rate": 3.731267812816034e-05, + "loss": 0.9225, + "step": 27600 + }, + { + "epoch": 2.54, + "learning_rate": 3.7308081272409676e-05, + "loss": 0.8641, + "step": 27610 + }, + { + "epoch": 2.54, + "learning_rate": 3.7303484416659006e-05, + "loss": 0.9877, + "step": 27620 + }, + { + "epoch": 2.54, + "learning_rate": 3.729888756090834e-05, + "loss": 1.0469, + "step": 27630 + }, + { + "epoch": 2.54, + "learning_rate": 3.729429070515767e-05, + "loss": 0.9175, + "step": 27640 + }, + { + "epoch": 2.54, + "learning_rate": 3.728969384940701e-05, + "loss": 0.8855, + "step": 27650 + }, + { + "epoch": 2.54, + "learning_rate": 3.7285096993656345e-05, + "loss": 0.9285, + "step": 27660 + }, + { + "epoch": 2.54, + "learning_rate": 3.7280500137905675e-05, + "loss": 0.9944, + "step": 27670 + }, + { + "epoch": 2.54, + "learning_rate": 3.7275903282155004e-05, + "loss": 0.9489, + "step": 27680 + }, + { + "epoch": 2.55, + "learning_rate": 3.727130642640434e-05, + "loss": 0.94, + "step": 27690 + }, + { + "epoch": 2.55, + "learning_rate": 3.726670957065368e-05, + "loss": 0.8482, + "step": 27700 + }, + { + "epoch": 2.55, + "learning_rate": 3.726211271490301e-05, + "loss": 0.8601, + "step": 27710 + }, + { + "epoch": 2.55, + "learning_rate": 3.7257515859152343e-05, + "loss": 0.8617, + "step": 27720 + }, + { + "epoch": 2.55, + "learning_rate": 3.725291900340167e-05, + "loss": 1.0493, + "step": 27730 + }, + { + "epoch": 2.55, + "learning_rate": 3.724832214765101e-05, + "loss": 0.9584, + "step": 27740 + }, + { + "epoch": 2.55, + "learning_rate": 3.7243725291900346e-05, + "loss": 0.8862, + "step": 27750 + }, + { + "epoch": 2.55, + "learning_rate": 3.7239128436149676e-05, + "loss": 0.756, + "step": 27760 + }, + { + "epoch": 2.55, + "learning_rate": 3.7234531580399006e-05, + "loss": 0.8479, + "step": 27770 + }, + { + "epoch": 2.55, + "learning_rate": 3.722993472464834e-05, + "loss": 0.8608, + "step": 27780 + }, + { + "epoch": 2.55, + "learning_rate": 3.722533786889767e-05, + "loss": 0.796, + "step": 27790 + }, + { + "epoch": 2.56, + "learning_rate": 3.722074101314701e-05, + "loss": 0.9791, + "step": 27800 + }, + { + "epoch": 2.56, + "learning_rate": 3.7216144157396345e-05, + "loss": 0.9124, + "step": 27810 + }, + { + "epoch": 2.56, + "learning_rate": 3.7211547301645674e-05, + "loss": 0.9666, + "step": 27820 + }, + { + "epoch": 2.56, + "learning_rate": 3.720695044589501e-05, + "loss": 0.8415, + "step": 27830 + }, + { + "epoch": 2.56, + "learning_rate": 3.720235359014435e-05, + "loss": 0.8687, + "step": 27840 + }, + { + "epoch": 2.56, + "learning_rate": 3.719775673439368e-05, + "loss": 0.82, + "step": 27850 + }, + { + "epoch": 2.56, + "learning_rate": 3.719315987864301e-05, + "loss": 0.9407, + "step": 27860 + }, + { + "epoch": 2.56, + "learning_rate": 3.718856302289234e-05, + "loss": 0.9482, + "step": 27870 + }, + { + "epoch": 2.56, + "learning_rate": 3.718396616714167e-05, + "loss": 0.8114, + "step": 27880 + }, + { + "epoch": 2.56, + "learning_rate": 3.717936931139101e-05, + "loss": 0.9051, + "step": 27890 + }, + { + "epoch": 2.57, + "learning_rate": 3.7174772455640346e-05, + "loss": 0.8474, + "step": 27900 + }, + { + "epoch": 2.57, + "learning_rate": 3.7170175599889676e-05, + "loss": 0.8228, + "step": 27910 + }, + { + "epoch": 2.57, + "learning_rate": 3.716557874413901e-05, + "loss": 0.9865, + "step": 27920 + }, + { + "epoch": 2.57, + "learning_rate": 3.716098188838835e-05, + "loss": 0.8705, + "step": 27930 + }, + { + "epoch": 2.57, + "learning_rate": 3.715638503263768e-05, + "loss": 0.8464, + "step": 27940 + }, + { + "epoch": 2.57, + "learning_rate": 3.715178817688701e-05, + "loss": 1.0012, + "step": 27950 + }, + { + "epoch": 2.57, + "learning_rate": 3.7147191321136344e-05, + "loss": 0.9874, + "step": 27960 + }, + { + "epoch": 2.57, + "learning_rate": 3.7142594465385674e-05, + "loss": 0.8754, + "step": 27970 + }, + { + "epoch": 2.57, + "learning_rate": 3.713799760963501e-05, + "loss": 0.9317, + "step": 27980 + }, + { + "epoch": 2.57, + "learning_rate": 3.713340075388435e-05, + "loss": 0.8193, + "step": 27990 + }, + { + "epoch": 2.57, + "learning_rate": 3.712880389813368e-05, + "loss": 0.9799, + "step": 28000 + }, + { + "epoch": 2.58, + "learning_rate": 3.712420704238301e-05, + "loss": 0.8785, + "step": 28010 + }, + { + "epoch": 2.58, + "learning_rate": 3.711961018663235e-05, + "loss": 0.9348, + "step": 28020 + }, + { + "epoch": 2.58, + "learning_rate": 3.711501333088168e-05, + "loss": 0.8947, + "step": 28030 + }, + { + "epoch": 2.58, + "learning_rate": 3.711041647513101e-05, + "loss": 0.918, + "step": 28040 + }, + { + "epoch": 2.58, + "learning_rate": 3.7105819619380346e-05, + "loss": 0.877, + "step": 28050 + }, + { + "epoch": 2.58, + "learning_rate": 3.7101222763629675e-05, + "loss": 0.9391, + "step": 28060 + }, + { + "epoch": 2.58, + "learning_rate": 3.709662590787901e-05, + "loss": 0.8204, + "step": 28070 + }, + { + "epoch": 2.58, + "learning_rate": 3.709202905212835e-05, + "loss": 0.8664, + "step": 28080 + }, + { + "epoch": 2.58, + "learning_rate": 3.708743219637768e-05, + "loss": 0.8922, + "step": 28090 + }, + { + "epoch": 2.58, + "learning_rate": 3.7082835340627014e-05, + "loss": 0.9101, + "step": 28100 + }, + { + "epoch": 2.58, + "learning_rate": 3.707823848487635e-05, + "loss": 0.8769, + "step": 28110 + }, + { + "epoch": 2.59, + "learning_rate": 3.7073641629125674e-05, + "loss": 0.8301, + "step": 28120 + }, + { + "epoch": 2.59, + "learning_rate": 3.706904477337501e-05, + "loss": 0.8498, + "step": 28130 + }, + { + "epoch": 2.59, + "learning_rate": 3.706444791762435e-05, + "loss": 0.8452, + "step": 28140 + }, + { + "epoch": 2.59, + "learning_rate": 3.7059851061873676e-05, + "loss": 0.9467, + "step": 28150 + }, + { + "epoch": 2.59, + "learning_rate": 3.705525420612301e-05, + "loss": 0.9138, + "step": 28160 + }, + { + "epoch": 2.59, + "learning_rate": 3.705065735037235e-05, + "loss": 0.8948, + "step": 28170 + }, + { + "epoch": 2.59, + "learning_rate": 3.704606049462168e-05, + "loss": 0.9797, + "step": 28180 + }, + { + "epoch": 2.59, + "learning_rate": 3.7041463638871016e-05, + "loss": 0.8201, + "step": 28190 + }, + { + "epoch": 2.59, + "learning_rate": 3.703686678312035e-05, + "loss": 0.8825, + "step": 28200 + }, + { + "epoch": 2.59, + "learning_rate": 3.7032269927369675e-05, + "loss": 0.9036, + "step": 28210 + }, + { + "epoch": 2.59, + "learning_rate": 3.702767307161901e-05, + "loss": 0.8402, + "step": 28220 + }, + { + "epoch": 2.6, + "learning_rate": 3.702307621586835e-05, + "loss": 0.9476, + "step": 28230 + }, + { + "epoch": 2.6, + "learning_rate": 3.701847936011768e-05, + "loss": 0.9042, + "step": 28240 + }, + { + "epoch": 2.6, + "learning_rate": 3.7013882504367014e-05, + "loss": 0.8665, + "step": 28250 + }, + { + "epoch": 2.6, + "learning_rate": 3.700928564861635e-05, + "loss": 0.886, + "step": 28260 + }, + { + "epoch": 2.6, + "learning_rate": 3.700468879286568e-05, + "loss": 0.8623, + "step": 28270 + }, + { + "epoch": 2.6, + "learning_rate": 3.700009193711502e-05, + "loss": 0.911, + "step": 28280 + }, + { + "epoch": 2.6, + "learning_rate": 3.699549508136435e-05, + "loss": 0.8947, + "step": 28290 + }, + { + "epoch": 2.6, + "learning_rate": 3.6990898225613676e-05, + "loss": 0.9376, + "step": 28300 + }, + { + "epoch": 2.6, + "learning_rate": 3.698630136986301e-05, + "loss": 0.9099, + "step": 28310 + }, + { + "epoch": 2.6, + "learning_rate": 3.698170451411235e-05, + "loss": 0.9286, + "step": 28320 + }, + { + "epoch": 2.6, + "learning_rate": 3.697710765836168e-05, + "loss": 0.9686, + "step": 28330 + }, + { + "epoch": 2.61, + "learning_rate": 3.6972510802611015e-05, + "loss": 0.9859, + "step": 28340 + }, + { + "epoch": 2.61, + "learning_rate": 3.696791394686035e-05, + "loss": 0.7586, + "step": 28350 + }, + { + "epoch": 2.61, + "learning_rate": 3.696331709110968e-05, + "loss": 0.9201, + "step": 28360 + }, + { + "epoch": 2.61, + "learning_rate": 3.695872023535902e-05, + "loss": 0.9343, + "step": 28370 + }, + { + "epoch": 2.61, + "learning_rate": 3.6954123379608354e-05, + "loss": 0.926, + "step": 28380 + }, + { + "epoch": 2.61, + "learning_rate": 3.694952652385768e-05, + "loss": 0.9042, + "step": 28390 + }, + { + "epoch": 2.61, + "learning_rate": 3.6944929668107014e-05, + "loss": 0.7915, + "step": 28400 + }, + { + "epoch": 2.61, + "learning_rate": 3.694033281235635e-05, + "loss": 0.8241, + "step": 28410 + }, + { + "epoch": 2.61, + "learning_rate": 3.693573595660568e-05, + "loss": 0.8791, + "step": 28420 + }, + { + "epoch": 2.61, + "learning_rate": 3.6931139100855017e-05, + "loss": 0.9003, + "step": 28430 + }, + { + "epoch": 2.61, + "learning_rate": 3.692654224510435e-05, + "loss": 0.9086, + "step": 28440 + }, + { + "epoch": 2.62, + "learning_rate": 3.692194538935368e-05, + "loss": 0.8384, + "step": 28450 + }, + { + "epoch": 2.62, + "learning_rate": 3.691734853360302e-05, + "loss": 0.8135, + "step": 28460 + }, + { + "epoch": 2.62, + "learning_rate": 3.6912751677852356e-05, + "loss": 0.8757, + "step": 28470 + }, + { + "epoch": 2.62, + "learning_rate": 3.690815482210168e-05, + "loss": 0.9036, + "step": 28480 + }, + { + "epoch": 2.62, + "learning_rate": 3.6903557966351015e-05, + "loss": 0.8644, + "step": 28490 + }, + { + "epoch": 2.62, + "learning_rate": 3.689896111060035e-05, + "loss": 0.826, + "step": 28500 + }, + { + "epoch": 2.62, + "learning_rate": 3.689436425484968e-05, + "loss": 0.8792, + "step": 28510 + }, + { + "epoch": 2.62, + "learning_rate": 3.688976739909902e-05, + "loss": 0.9287, + "step": 28520 + }, + { + "epoch": 2.62, + "learning_rate": 3.6885170543348354e-05, + "loss": 0.8579, + "step": 28530 + }, + { + "epoch": 2.62, + "learning_rate": 3.6880573687597684e-05, + "loss": 0.8401, + "step": 28540 + }, + { + "epoch": 2.62, + "learning_rate": 3.687597683184702e-05, + "loss": 0.915, + "step": 28550 + }, + { + "epoch": 2.63, + "learning_rate": 3.687137997609636e-05, + "loss": 1.0519, + "step": 28560 + }, + { + "epoch": 2.63, + "learning_rate": 3.686678312034568e-05, + "loss": 0.9501, + "step": 28570 + }, + { + "epoch": 2.63, + "learning_rate": 3.6862186264595016e-05, + "loss": 0.838, + "step": 28580 + }, + { + "epoch": 2.63, + "learning_rate": 3.685758940884435e-05, + "loss": 0.9672, + "step": 28590 + }, + { + "epoch": 2.63, + "learning_rate": 3.685299255309368e-05, + "loss": 0.9126, + "step": 28600 + }, + { + "epoch": 2.63, + "learning_rate": 3.684839569734302e-05, + "loss": 0.8578, + "step": 28610 + }, + { + "epoch": 2.63, + "learning_rate": 3.6843798841592355e-05, + "loss": 0.9925, + "step": 28620 + }, + { + "epoch": 2.63, + "learning_rate": 3.6839201985841685e-05, + "loss": 0.9203, + "step": 28630 + }, + { + "epoch": 2.63, + "learning_rate": 3.683460513009102e-05, + "loss": 0.8203, + "step": 28640 + }, + { + "epoch": 2.63, + "learning_rate": 3.683000827434036e-05, + "loss": 0.7955, + "step": 28650 + }, + { + "epoch": 2.63, + "learning_rate": 3.682541141858968e-05, + "loss": 0.9978, + "step": 28660 + }, + { + "epoch": 2.64, + "learning_rate": 3.682081456283902e-05, + "loss": 0.8145, + "step": 28670 + }, + { + "epoch": 2.64, + "learning_rate": 3.6816217707088354e-05, + "loss": 1.0306, + "step": 28680 + }, + { + "epoch": 2.64, + "learning_rate": 3.6811620851337684e-05, + "loss": 1.0419, + "step": 28690 + }, + { + "epoch": 2.64, + "learning_rate": 3.680702399558702e-05, + "loss": 0.8373, + "step": 28700 + }, + { + "epoch": 2.64, + "learning_rate": 3.6802427139836357e-05, + "loss": 0.941, + "step": 28710 + }, + { + "epoch": 2.64, + "learning_rate": 3.6797830284085686e-05, + "loss": 0.8074, + "step": 28720 + }, + { + "epoch": 2.64, + "learning_rate": 3.679323342833502e-05, + "loss": 0.8985, + "step": 28730 + }, + { + "epoch": 2.64, + "learning_rate": 3.678863657258436e-05, + "loss": 0.9232, + "step": 28740 + }, + { + "epoch": 2.64, + "learning_rate": 3.678403971683368e-05, + "loss": 0.8829, + "step": 28750 + }, + { + "epoch": 2.64, + "learning_rate": 3.677944286108302e-05, + "loss": 0.9296, + "step": 28760 + }, + { + "epoch": 2.65, + "learning_rate": 3.6774846005332355e-05, + "loss": 0.8418, + "step": 28770 + }, + { + "epoch": 2.65, + "learning_rate": 3.6770249149581685e-05, + "loss": 0.9044, + "step": 28780 + }, + { + "epoch": 2.65, + "learning_rate": 3.676565229383102e-05, + "loss": 1.0593, + "step": 28790 + }, + { + "epoch": 2.65, + "learning_rate": 3.676105543808036e-05, + "loss": 0.8866, + "step": 28800 + }, + { + "epoch": 2.65, + "learning_rate": 3.675645858232969e-05, + "loss": 0.9428, + "step": 28810 + }, + { + "epoch": 2.65, + "learning_rate": 3.6751861726579024e-05, + "loss": 0.8657, + "step": 28820 + }, + { + "epoch": 2.65, + "learning_rate": 3.674726487082836e-05, + "loss": 0.889, + "step": 28830 + }, + { + "epoch": 2.65, + "learning_rate": 3.674266801507768e-05, + "loss": 0.9275, + "step": 28840 + }, + { + "epoch": 2.65, + "learning_rate": 3.673807115932702e-05, + "loss": 0.8931, + "step": 28850 + }, + { + "epoch": 2.65, + "learning_rate": 3.6733474303576356e-05, + "loss": 0.9639, + "step": 28860 + }, + { + "epoch": 2.65, + "learning_rate": 3.6728877447825686e-05, + "loss": 0.9047, + "step": 28870 + }, + { + "epoch": 2.66, + "learning_rate": 3.672428059207502e-05, + "loss": 0.9317, + "step": 28880 + }, + { + "epoch": 2.66, + "learning_rate": 3.671968373632436e-05, + "loss": 0.8627, + "step": 28890 + }, + { + "epoch": 2.66, + "learning_rate": 3.671508688057369e-05, + "loss": 0.8862, + "step": 28900 + }, + { + "epoch": 2.66, + "learning_rate": 3.6710490024823025e-05, + "loss": 0.7778, + "step": 28910 + }, + { + "epoch": 2.66, + "learning_rate": 3.670589316907236e-05, + "loss": 0.8307, + "step": 28920 + }, + { + "epoch": 2.66, + "learning_rate": 3.670129631332169e-05, + "loss": 0.8206, + "step": 28930 + }, + { + "epoch": 2.66, + "learning_rate": 3.669669945757102e-05, + "loss": 0.8096, + "step": 28940 + }, + { + "epoch": 2.66, + "learning_rate": 3.669210260182036e-05, + "loss": 0.8577, + "step": 28950 + }, + { + "epoch": 2.66, + "learning_rate": 3.668750574606969e-05, + "loss": 0.9241, + "step": 28960 + }, + { + "epoch": 2.66, + "learning_rate": 3.6682908890319024e-05, + "loss": 0.8435, + "step": 28970 + }, + { + "epoch": 2.66, + "learning_rate": 3.667831203456836e-05, + "loss": 0.9464, + "step": 28980 + }, + { + "epoch": 2.67, + "learning_rate": 3.667371517881769e-05, + "loss": 0.9871, + "step": 28990 + }, + { + "epoch": 2.67, + "learning_rate": 3.6669118323067026e-05, + "loss": 0.9132, + "step": 29000 + }, + { + "epoch": 2.67, + "learning_rate": 3.666452146731636e-05, + "loss": 0.8767, + "step": 29010 + }, + { + "epoch": 2.67, + "learning_rate": 3.665992461156569e-05, + "loss": 0.937, + "step": 29020 + }, + { + "epoch": 2.67, + "learning_rate": 3.665532775581502e-05, + "loss": 0.7686, + "step": 29030 + }, + { + "epoch": 2.67, + "learning_rate": 3.665073090006436e-05, + "loss": 0.8474, + "step": 29040 + }, + { + "epoch": 2.67, + "learning_rate": 3.664613404431369e-05, + "loss": 0.9139, + "step": 29050 + }, + { + "epoch": 2.67, + "learning_rate": 3.6641537188563025e-05, + "loss": 0.8479, + "step": 29060 + }, + { + "epoch": 2.67, + "learning_rate": 3.663694033281236e-05, + "loss": 0.9096, + "step": 29070 + }, + { + "epoch": 2.67, + "learning_rate": 3.663234347706169e-05, + "loss": 0.8688, + "step": 29080 + }, + { + "epoch": 2.67, + "learning_rate": 3.662774662131103e-05, + "loss": 1.0149, + "step": 29090 + }, + { + "epoch": 2.68, + "learning_rate": 3.662314976556036e-05, + "loss": 0.8913, + "step": 29100 + }, + { + "epoch": 2.68, + "learning_rate": 3.6618552909809694e-05, + "loss": 0.8449, + "step": 29110 + }, + { + "epoch": 2.68, + "learning_rate": 3.6613956054059023e-05, + "loss": 0.8414, + "step": 29120 + }, + { + "epoch": 2.68, + "learning_rate": 3.660935919830836e-05, + "loss": 0.9431, + "step": 29130 + }, + { + "epoch": 2.68, + "learning_rate": 3.660476234255769e-05, + "loss": 0.9583, + "step": 29140 + }, + { + "epoch": 2.68, + "learning_rate": 3.6600165486807026e-05, + "loss": 0.8655, + "step": 29150 + }, + { + "epoch": 2.68, + "learning_rate": 3.659556863105636e-05, + "loss": 0.9735, + "step": 29160 + }, + { + "epoch": 2.68, + "learning_rate": 3.659097177530569e-05, + "loss": 0.9008, + "step": 29170 + }, + { + "epoch": 2.68, + "learning_rate": 3.658637491955503e-05, + "loss": 0.8733, + "step": 29180 + }, + { + "epoch": 2.68, + "learning_rate": 3.658177806380436e-05, + "loss": 0.849, + "step": 29190 + }, + { + "epoch": 2.68, + "learning_rate": 3.6577181208053695e-05, + "loss": 0.8625, + "step": 29200 + }, + { + "epoch": 2.69, + "learning_rate": 3.6572584352303025e-05, + "loss": 0.8986, + "step": 29210 + }, + { + "epoch": 2.69, + "learning_rate": 3.656798749655236e-05, + "loss": 1.0063, + "step": 29220 + }, + { + "epoch": 2.69, + "learning_rate": 3.656339064080169e-05, + "loss": 0.8599, + "step": 29230 + }, + { + "epoch": 2.69, + "learning_rate": 3.655879378505103e-05, + "loss": 0.8722, + "step": 29240 + }, + { + "epoch": 2.69, + "learning_rate": 3.6554196929300364e-05, + "loss": 0.8299, + "step": 29250 + }, + { + "epoch": 2.69, + "learning_rate": 3.6549600073549693e-05, + "loss": 0.7848, + "step": 29260 + }, + { + "epoch": 2.69, + "learning_rate": 3.654500321779903e-05, + "loss": 0.9549, + "step": 29270 + }, + { + "epoch": 2.69, + "learning_rate": 3.654040636204836e-05, + "loss": 0.7804, + "step": 29280 + }, + { + "epoch": 2.69, + "learning_rate": 3.6535809506297696e-05, + "loss": 0.9035, + "step": 29290 + }, + { + "epoch": 2.69, + "learning_rate": 3.6531212650547026e-05, + "loss": 0.7337, + "step": 29300 + }, + { + "epoch": 2.69, + "learning_rate": 3.652661579479636e-05, + "loss": 0.8762, + "step": 29310 + }, + { + "epoch": 2.7, + "learning_rate": 3.652201893904569e-05, + "loss": 0.8762, + "step": 29320 + }, + { + "epoch": 2.7, + "learning_rate": 3.651742208329503e-05, + "loss": 0.8018, + "step": 29330 + }, + { + "epoch": 2.7, + "learning_rate": 3.6512825227544365e-05, + "loss": 0.8937, + "step": 29340 + }, + { + "epoch": 2.7, + "learning_rate": 3.6508228371793695e-05, + "loss": 0.8185, + "step": 29350 + }, + { + "epoch": 2.7, + "learning_rate": 3.650363151604303e-05, + "loss": 0.9088, + "step": 29360 + }, + { + "epoch": 2.7, + "learning_rate": 3.649903466029236e-05, + "loss": 1.0045, + "step": 29370 + }, + { + "epoch": 2.7, + "learning_rate": 3.64944378045417e-05, + "loss": 0.9125, + "step": 29380 + }, + { + "epoch": 2.7, + "learning_rate": 3.648984094879103e-05, + "loss": 0.8722, + "step": 29390 + }, + { + "epoch": 2.7, + "learning_rate": 3.6485244093040364e-05, + "loss": 0.9845, + "step": 29400 + }, + { + "epoch": 2.7, + "learning_rate": 3.648064723728969e-05, + "loss": 0.8878, + "step": 29410 + }, + { + "epoch": 2.7, + "learning_rate": 3.647605038153903e-05, + "loss": 0.9384, + "step": 29420 + }, + { + "epoch": 2.71, + "learning_rate": 3.647145352578836e-05, + "loss": 0.9229, + "step": 29430 + }, + { + "epoch": 2.71, + "learning_rate": 3.6466856670037696e-05, + "loss": 0.8151, + "step": 29440 + }, + { + "epoch": 2.71, + "learning_rate": 3.646225981428703e-05, + "loss": 0.7212, + "step": 29450 + }, + { + "epoch": 2.71, + "learning_rate": 3.645766295853636e-05, + "loss": 0.8427, + "step": 29460 + }, + { + "epoch": 2.71, + "learning_rate": 3.64530661027857e-05, + "loss": 0.8722, + "step": 29470 + }, + { + "epoch": 2.71, + "learning_rate": 3.644846924703503e-05, + "loss": 1.0086, + "step": 29480 + }, + { + "epoch": 2.71, + "learning_rate": 3.6443872391284365e-05, + "loss": 0.7967, + "step": 29490 + }, + { + "epoch": 2.71, + "learning_rate": 3.6439275535533694e-05, + "loss": 1.0429, + "step": 29500 + }, + { + "epoch": 2.71, + "learning_rate": 3.643467867978303e-05, + "loss": 0.918, + "step": 29510 + }, + { + "epoch": 2.71, + "learning_rate": 3.643008182403236e-05, + "loss": 0.8492, + "step": 29520 + }, + { + "epoch": 2.71, + "learning_rate": 3.64254849682817e-05, + "loss": 0.9702, + "step": 29530 + }, + { + "epoch": 2.72, + "learning_rate": 3.6420888112531034e-05, + "loss": 0.7587, + "step": 29540 + }, + { + "epoch": 2.72, + "learning_rate": 3.641629125678036e-05, + "loss": 0.8733, + "step": 29550 + }, + { + "epoch": 2.72, + "learning_rate": 3.64116944010297e-05, + "loss": 0.9371, + "step": 29560 + }, + { + "epoch": 2.72, + "learning_rate": 3.640709754527903e-05, + "loss": 0.7424, + "step": 29570 + }, + { + "epoch": 2.72, + "learning_rate": 3.6402500689528366e-05, + "loss": 0.8093, + "step": 29580 + }, + { + "epoch": 2.72, + "learning_rate": 3.6397903833777696e-05, + "loss": 0.8467, + "step": 29590 + }, + { + "epoch": 2.72, + "learning_rate": 3.639330697802703e-05, + "loss": 0.9544, + "step": 29600 + }, + { + "epoch": 2.72, + "learning_rate": 3.638871012227636e-05, + "loss": 0.8509, + "step": 29610 + }, + { + "epoch": 2.72, + "learning_rate": 3.63841132665257e-05, + "loss": 1.1141, + "step": 29620 + }, + { + "epoch": 2.72, + "learning_rate": 3.6379516410775035e-05, + "loss": 0.7867, + "step": 29630 + }, + { + "epoch": 2.73, + "learning_rate": 3.6374919555024364e-05, + "loss": 0.8139, + "step": 29640 + }, + { + "epoch": 2.73, + "learning_rate": 3.63703226992737e-05, + "loss": 0.9693, + "step": 29650 + }, + { + "epoch": 2.73, + "learning_rate": 3.636572584352303e-05, + "loss": 0.8542, + "step": 29660 + }, + { + "epoch": 2.73, + "learning_rate": 3.636112898777237e-05, + "loss": 0.9574, + "step": 29670 + }, + { + "epoch": 2.73, + "learning_rate": 3.63565321320217e-05, + "loss": 0.8562, + "step": 29680 + }, + { + "epoch": 2.73, + "learning_rate": 3.635193527627103e-05, + "loss": 0.931, + "step": 29690 + }, + { + "epoch": 2.73, + "learning_rate": 3.634733842052036e-05, + "loss": 0.7428, + "step": 29700 + }, + { + "epoch": 2.73, + "learning_rate": 3.63427415647697e-05, + "loss": 0.8784, + "step": 29710 + }, + { + "epoch": 2.73, + "learning_rate": 3.6338144709019036e-05, + "loss": 0.7163, + "step": 29720 + }, + { + "epoch": 2.73, + "learning_rate": 3.6333547853268366e-05, + "loss": 0.9485, + "step": 29730 + }, + { + "epoch": 2.73, + "learning_rate": 3.63289509975177e-05, + "loss": 0.8657, + "step": 29740 + }, + { + "epoch": 2.74, + "learning_rate": 3.632435414176703e-05, + "loss": 1.0289, + "step": 29750 + }, + { + "epoch": 2.74, + "learning_rate": 3.631975728601636e-05, + "loss": 0.9391, + "step": 29760 + }, + { + "epoch": 2.74, + "learning_rate": 3.63151604302657e-05, + "loss": 0.876, + "step": 29770 + }, + { + "epoch": 2.74, + "learning_rate": 3.6310563574515034e-05, + "loss": 0.9762, + "step": 29780 + }, + { + "epoch": 2.74, + "learning_rate": 3.6305966718764364e-05, + "loss": 0.8632, + "step": 29790 + }, + { + "epoch": 2.74, + "learning_rate": 3.63013698630137e-05, + "loss": 0.8713, + "step": 29800 + }, + { + "epoch": 2.74, + "learning_rate": 3.629677300726304e-05, + "loss": 0.9934, + "step": 29810 + }, + { + "epoch": 2.74, + "learning_rate": 3.629217615151237e-05, + "loss": 0.9611, + "step": 29820 + }, + { + "epoch": 2.74, + "learning_rate": 3.62875792957617e-05, + "loss": 0.7934, + "step": 29830 + }, + { + "epoch": 2.74, + "learning_rate": 3.628298244001103e-05, + "loss": 0.9022, + "step": 29840 + }, + { + "epoch": 2.74, + "learning_rate": 3.627838558426036e-05, + "loss": 0.8645, + "step": 29850 + }, + { + "epoch": 2.75, + "learning_rate": 3.62737887285097e-05, + "loss": 0.934, + "step": 29860 + }, + { + "epoch": 2.75, + "learning_rate": 3.6269191872759036e-05, + "loss": 0.9354, + "step": 29870 + }, + { + "epoch": 2.75, + "learning_rate": 3.6264595017008365e-05, + "loss": 1.0926, + "step": 29880 + }, + { + "epoch": 2.75, + "learning_rate": 3.62599981612577e-05, + "loss": 0.9418, + "step": 29890 + }, + { + "epoch": 2.75, + "learning_rate": 3.625540130550704e-05, + "loss": 0.8486, + "step": 29900 + }, + { + "epoch": 2.75, + "learning_rate": 3.625080444975637e-05, + "loss": 0.7942, + "step": 29910 + }, + { + "epoch": 2.75, + "learning_rate": 3.6246207594005705e-05, + "loss": 0.9272, + "step": 29920 + }, + { + "epoch": 2.75, + "learning_rate": 3.6241610738255034e-05, + "loss": 0.9225, + "step": 29930 + }, + { + "epoch": 2.75, + "learning_rate": 3.6237013882504364e-05, + "loss": 0.8867, + "step": 29940 + }, + { + "epoch": 2.75, + "learning_rate": 3.62324170267537e-05, + "loss": 0.8152, + "step": 29950 + }, + { + "epoch": 2.75, + "learning_rate": 3.622782017100304e-05, + "loss": 0.9313, + "step": 29960 + }, + { + "epoch": 2.76, + "learning_rate": 3.6223223315252367e-05, + "loss": 0.88, + "step": 29970 + }, + { + "epoch": 2.76, + "learning_rate": 3.62186264595017e-05, + "loss": 0.9225, + "step": 29980 + }, + { + "epoch": 2.76, + "learning_rate": 3.621402960375104e-05, + "loss": 0.9339, + "step": 29990 + }, + { + "epoch": 2.76, + "learning_rate": 3.620943274800037e-05, + "loss": 0.9095, + "step": 30000 + }, + { + "epoch": 2.76, + "eval_accuracy": 0.5524017467248908, + "eval_loss": 0.9138294458389282, + "eval_runtime": 159.9782, + "eval_samples_per_second": 28.629, + "eval_steps_per_second": 3.582, + "step": 30000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6204835892249706e-05, + "loss": 0.9047, + "step": 30010 + }, + { + "epoch": 2.76, + "learning_rate": 3.6200239036499035e-05, + "loss": 1.0454, + "step": 30020 + }, + { + "epoch": 2.76, + "learning_rate": 3.6195642180748365e-05, + "loss": 0.9781, + "step": 30030 + }, + { + "epoch": 2.76, + "learning_rate": 3.61910453249977e-05, + "loss": 0.9568, + "step": 30040 + }, + { + "epoch": 2.76, + "learning_rate": 3.618644846924704e-05, + "loss": 1.0032, + "step": 30050 + }, + { + "epoch": 2.76, + "learning_rate": 3.618185161349637e-05, + "loss": 0.7939, + "step": 30060 + }, + { + "epoch": 2.76, + "learning_rate": 3.6177254757745704e-05, + "loss": 0.9115, + "step": 30070 + }, + { + "epoch": 2.77, + "learning_rate": 3.617265790199504e-05, + "loss": 1.0297, + "step": 30080 + }, + { + "epoch": 2.77, + "learning_rate": 3.616806104624437e-05, + "loss": 0.9112, + "step": 30090 + }, + { + "epoch": 2.77, + "learning_rate": 3.616346419049371e-05, + "loss": 0.8132, + "step": 30100 + }, + { + "epoch": 2.77, + "learning_rate": 3.6158867334743037e-05, + "loss": 0.8641, + "step": 30110 + }, + { + "epoch": 2.77, + "learning_rate": 3.6154270478992366e-05, + "loss": 0.9127, + "step": 30120 + }, + { + "epoch": 2.77, + "learning_rate": 3.61496736232417e-05, + "loss": 0.8912, + "step": 30130 + }, + { + "epoch": 2.77, + "learning_rate": 3.614507676749104e-05, + "loss": 0.792, + "step": 30140 + }, + { + "epoch": 2.77, + "learning_rate": 3.614047991174037e-05, + "loss": 0.9145, + "step": 30150 + }, + { + "epoch": 2.77, + "learning_rate": 3.6135883055989705e-05, + "loss": 0.9977, + "step": 30160 + }, + { + "epoch": 2.77, + "learning_rate": 3.613128620023904e-05, + "loss": 0.9927, + "step": 30170 + }, + { + "epoch": 2.77, + "learning_rate": 3.612668934448837e-05, + "loss": 0.8512, + "step": 30180 + }, + { + "epoch": 2.78, + "learning_rate": 3.612209248873771e-05, + "loss": 0.8856, + "step": 30190 + }, + { + "epoch": 2.78, + "learning_rate": 3.611749563298704e-05, + "loss": 0.9467, + "step": 30200 + }, + { + "epoch": 2.78, + "learning_rate": 3.611289877723637e-05, + "loss": 0.8253, + "step": 30210 + }, + { + "epoch": 2.78, + "learning_rate": 3.6108301921485704e-05, + "loss": 0.7917, + "step": 30220 + }, + { + "epoch": 2.78, + "learning_rate": 3.610370506573504e-05, + "loss": 0.9191, + "step": 30230 + }, + { + "epoch": 2.78, + "learning_rate": 3.609910820998437e-05, + "loss": 0.8509, + "step": 30240 + }, + { + "epoch": 2.78, + "learning_rate": 3.609451135423371e-05, + "loss": 0.8336, + "step": 30250 + }, + { + "epoch": 2.78, + "learning_rate": 3.608991449848304e-05, + "loss": 0.8611, + "step": 30260 + }, + { + "epoch": 2.78, + "learning_rate": 3.608531764273237e-05, + "loss": 1.0097, + "step": 30270 + }, + { + "epoch": 2.78, + "learning_rate": 3.608072078698171e-05, + "loss": 0.9097, + "step": 30280 + }, + { + "epoch": 2.78, + "learning_rate": 3.607612393123104e-05, + "loss": 0.8024, + "step": 30290 + }, + { + "epoch": 2.79, + "learning_rate": 3.607152707548037e-05, + "loss": 0.8922, + "step": 30300 + }, + { + "epoch": 2.79, + "learning_rate": 3.6066930219729705e-05, + "loss": 0.81, + "step": 30310 + }, + { + "epoch": 2.79, + "learning_rate": 3.606233336397904e-05, + "loss": 0.9186, + "step": 30320 + }, + { + "epoch": 2.79, + "learning_rate": 3.605773650822837e-05, + "loss": 0.946, + "step": 30330 + }, + { + "epoch": 2.79, + "learning_rate": 3.605313965247771e-05, + "loss": 0.8909, + "step": 30340 + }, + { + "epoch": 2.79, + "learning_rate": 3.6048542796727044e-05, + "loss": 0.84, + "step": 30350 + }, + { + "epoch": 2.79, + "learning_rate": 3.6043945940976374e-05, + "loss": 0.9662, + "step": 30360 + }, + { + "epoch": 2.79, + "learning_rate": 3.603934908522571e-05, + "loss": 0.9178, + "step": 30370 + }, + { + "epoch": 2.79, + "learning_rate": 3.603475222947504e-05, + "loss": 0.9426, + "step": 30380 + }, + { + "epoch": 2.79, + "learning_rate": 3.603015537372437e-05, + "loss": 0.8586, + "step": 30390 + }, + { + "epoch": 2.79, + "learning_rate": 3.6025558517973706e-05, + "loss": 0.9276, + "step": 30400 + }, + { + "epoch": 2.8, + "learning_rate": 3.602096166222304e-05, + "loss": 0.9734, + "step": 30410 + }, + { + "epoch": 2.8, + "learning_rate": 3.601636480647237e-05, + "loss": 0.9207, + "step": 30420 + }, + { + "epoch": 2.8, + "learning_rate": 3.601176795072171e-05, + "loss": 0.9561, + "step": 30430 + }, + { + "epoch": 2.8, + "learning_rate": 3.6007171094971045e-05, + "loss": 0.8937, + "step": 30440 + }, + { + "epoch": 2.8, + "learning_rate": 3.6002574239220375e-05, + "loss": 0.8601, + "step": 30450 + }, + { + "epoch": 2.8, + "learning_rate": 3.599797738346971e-05, + "loss": 0.9284, + "step": 30460 + }, + { + "epoch": 2.8, + "learning_rate": 3.599338052771904e-05, + "loss": 0.8643, + "step": 30470 + }, + { + "epoch": 2.8, + "learning_rate": 3.598878367196837e-05, + "loss": 0.9427, + "step": 30480 + }, + { + "epoch": 2.8, + "learning_rate": 3.598418681621771e-05, + "loss": 0.8803, + "step": 30490 + }, + { + "epoch": 2.8, + "learning_rate": 3.5979589960467044e-05, + "loss": 0.8936, + "step": 30500 + }, + { + "epoch": 2.81, + "learning_rate": 3.5974993104716374e-05, + "loss": 0.9068, + "step": 30510 + }, + { + "epoch": 2.81, + "learning_rate": 3.597039624896571e-05, + "loss": 0.8888, + "step": 30520 + }, + { + "epoch": 2.81, + "learning_rate": 3.596579939321505e-05, + "loss": 0.9055, + "step": 30530 + }, + { + "epoch": 2.81, + "learning_rate": 3.5961202537464376e-05, + "loss": 0.8384, + "step": 30540 + }, + { + "epoch": 2.81, + "learning_rate": 3.595660568171371e-05, + "loss": 0.8522, + "step": 30550 + }, + { + "epoch": 2.81, + "learning_rate": 3.595200882596304e-05, + "loss": 0.9822, + "step": 30560 + }, + { + "epoch": 2.81, + "learning_rate": 3.594741197021237e-05, + "loss": 1.0557, + "step": 30570 + }, + { + "epoch": 2.81, + "learning_rate": 3.594281511446171e-05, + "loss": 0.8559, + "step": 30580 + }, + { + "epoch": 2.81, + "learning_rate": 3.5938218258711045e-05, + "loss": 0.8977, + "step": 30590 + }, + { + "epoch": 2.81, + "learning_rate": 3.5933621402960375e-05, + "loss": 0.9428, + "step": 30600 + }, + { + "epoch": 2.81, + "learning_rate": 3.592902454720971e-05, + "loss": 0.8189, + "step": 30610 + }, + { + "epoch": 2.82, + "learning_rate": 3.592442769145905e-05, + "loss": 0.8973, + "step": 30620 + }, + { + "epoch": 2.82, + "learning_rate": 3.591983083570838e-05, + "loss": 0.8575, + "step": 30630 + }, + { + "epoch": 2.82, + "learning_rate": 3.5915233979957714e-05, + "loss": 0.8597, + "step": 30640 + }, + { + "epoch": 2.82, + "learning_rate": 3.5910637124207044e-05, + "loss": 0.8427, + "step": 30650 + }, + { + "epoch": 2.82, + "learning_rate": 3.5906040268456373e-05, + "loss": 0.8322, + "step": 30660 + }, + { + "epoch": 2.82, + "learning_rate": 3.590144341270571e-05, + "loss": 0.9015, + "step": 30670 + }, + { + "epoch": 2.82, + "learning_rate": 3.5896846556955046e-05, + "loss": 1.1105, + "step": 30680 + }, + { + "epoch": 2.82, + "learning_rate": 3.5892249701204376e-05, + "loss": 0.9852, + "step": 30690 + }, + { + "epoch": 2.82, + "learning_rate": 3.588765284545371e-05, + "loss": 0.9548, + "step": 30700 + }, + { + "epoch": 2.82, + "learning_rate": 3.588305598970305e-05, + "loss": 0.8499, + "step": 30710 + }, + { + "epoch": 2.82, + "learning_rate": 3.587845913395238e-05, + "loss": 1.0323, + "step": 30720 + }, + { + "epoch": 2.83, + "learning_rate": 3.5873862278201715e-05, + "loss": 1.0364, + "step": 30730 + }, + { + "epoch": 2.83, + "learning_rate": 3.5869265422451045e-05, + "loss": 0.9993, + "step": 30740 + }, + { + "epoch": 2.83, + "learning_rate": 3.5864668566700375e-05, + "loss": 0.798, + "step": 30750 + }, + { + "epoch": 2.83, + "learning_rate": 3.586007171094971e-05, + "loss": 0.8833, + "step": 30760 + }, + { + "epoch": 2.83, + "learning_rate": 3.585547485519905e-05, + "loss": 0.9174, + "step": 30770 + }, + { + "epoch": 2.83, + "learning_rate": 3.585087799944838e-05, + "loss": 0.8387, + "step": 30780 + }, + { + "epoch": 2.83, + "learning_rate": 3.5846281143697714e-05, + "loss": 0.9369, + "step": 30790 + }, + { + "epoch": 2.83, + "learning_rate": 3.584168428794705e-05, + "loss": 0.8246, + "step": 30800 + }, + { + "epoch": 2.83, + "learning_rate": 3.583708743219638e-05, + "loss": 0.7546, + "step": 30810 + }, + { + "epoch": 2.83, + "learning_rate": 3.5832490576445716e-05, + "loss": 0.8403, + "step": 30820 + }, + { + "epoch": 2.83, + "learning_rate": 3.5827893720695046e-05, + "loss": 0.8796, + "step": 30830 + }, + { + "epoch": 2.84, + "learning_rate": 3.5823296864944376e-05, + "loss": 0.9445, + "step": 30840 + }, + { + "epoch": 2.84, + "learning_rate": 3.581870000919371e-05, + "loss": 0.8512, + "step": 30850 + }, + { + "epoch": 2.84, + "learning_rate": 3.581410315344305e-05, + "loss": 0.7977, + "step": 30860 + }, + { + "epoch": 2.84, + "learning_rate": 3.580950629769238e-05, + "loss": 0.9442, + "step": 30870 + }, + { + "epoch": 2.84, + "learning_rate": 3.5804909441941715e-05, + "loss": 0.8771, + "step": 30880 + }, + { + "epoch": 2.84, + "learning_rate": 3.580031258619105e-05, + "loss": 0.9272, + "step": 30890 + }, + { + "epoch": 2.84, + "learning_rate": 3.579571573044038e-05, + "loss": 0.8357, + "step": 30900 + }, + { + "epoch": 2.84, + "learning_rate": 3.579111887468972e-05, + "loss": 0.8801, + "step": 30910 + }, + { + "epoch": 2.84, + "learning_rate": 3.578652201893905e-05, + "loss": 0.9564, + "step": 30920 + }, + { + "epoch": 2.84, + "learning_rate": 3.578192516318838e-05, + "loss": 0.9837, + "step": 30930 + }, + { + "epoch": 2.84, + "learning_rate": 3.5777328307437714e-05, + "loss": 0.885, + "step": 30940 + }, + { + "epoch": 2.85, + "learning_rate": 3.577273145168705e-05, + "loss": 0.9861, + "step": 30950 + }, + { + "epoch": 2.85, + "learning_rate": 3.576813459593638e-05, + "loss": 0.7833, + "step": 30960 + }, + { + "epoch": 2.85, + "learning_rate": 3.5763537740185716e-05, + "loss": 0.8507, + "step": 30970 + }, + { + "epoch": 2.85, + "learning_rate": 3.575894088443505e-05, + "loss": 0.8672, + "step": 30980 + }, + { + "epoch": 2.85, + "learning_rate": 3.575434402868438e-05, + "loss": 0.8665, + "step": 30990 + }, + { + "epoch": 2.85, + "learning_rate": 3.574974717293372e-05, + "loss": 0.7605, + "step": 31000 + }, + { + "epoch": 2.85, + "learning_rate": 3.574515031718305e-05, + "loss": 0.9407, + "step": 31010 + }, + { + "epoch": 2.85, + "learning_rate": 3.574055346143238e-05, + "loss": 1.0566, + "step": 31020 + }, + { + "epoch": 2.85, + "learning_rate": 3.5735956605681715e-05, + "loss": 0.8567, + "step": 31030 + }, + { + "epoch": 2.85, + "learning_rate": 3.573135974993105e-05, + "loss": 0.8425, + "step": 31040 + }, + { + "epoch": 2.85, + "learning_rate": 3.572676289418038e-05, + "loss": 0.9141, + "step": 31050 + }, + { + "epoch": 2.86, + "learning_rate": 3.572216603842972e-05, + "loss": 0.9592, + "step": 31060 + }, + { + "epoch": 2.86, + "learning_rate": 3.571756918267905e-05, + "loss": 0.857, + "step": 31070 + }, + { + "epoch": 2.86, + "learning_rate": 3.5712972326928384e-05, + "loss": 0.9337, + "step": 31080 + }, + { + "epoch": 2.86, + "learning_rate": 3.570837547117772e-05, + "loss": 0.8753, + "step": 31090 + }, + { + "epoch": 2.86, + "learning_rate": 3.570377861542705e-05, + "loss": 0.8457, + "step": 31100 + }, + { + "epoch": 2.86, + "learning_rate": 3.569918175967638e-05, + "loss": 0.8675, + "step": 31110 + }, + { + "epoch": 2.86, + "learning_rate": 3.5694584903925716e-05, + "loss": 0.9133, + "step": 31120 + }, + { + "epoch": 2.86, + "learning_rate": 3.568998804817505e-05, + "loss": 0.9223, + "step": 31130 + }, + { + "epoch": 2.86, + "learning_rate": 3.568539119242438e-05, + "loss": 0.917, + "step": 31140 + }, + { + "epoch": 2.86, + "learning_rate": 3.568079433667372e-05, + "loss": 0.9247, + "step": 31150 + }, + { + "epoch": 2.86, + "learning_rate": 3.567619748092305e-05, + "loss": 0.8154, + "step": 31160 + }, + { + "epoch": 2.87, + "learning_rate": 3.5671600625172385e-05, + "loss": 0.8565, + "step": 31170 + }, + { + "epoch": 2.87, + "learning_rate": 3.566700376942172e-05, + "loss": 0.8969, + "step": 31180 + }, + { + "epoch": 2.87, + "learning_rate": 3.566240691367105e-05, + "loss": 1.0131, + "step": 31190 + }, + { + "epoch": 2.87, + "learning_rate": 3.565781005792038e-05, + "loss": 0.8677, + "step": 31200 + }, + { + "epoch": 2.87, + "learning_rate": 3.565321320216972e-05, + "loss": 0.9481, + "step": 31210 + }, + { + "epoch": 2.87, + "learning_rate": 3.5648616346419054e-05, + "loss": 0.898, + "step": 31220 + }, + { + "epoch": 2.87, + "learning_rate": 3.564401949066838e-05, + "loss": 0.8948, + "step": 31230 + }, + { + "epoch": 2.87, + "learning_rate": 3.563942263491772e-05, + "loss": 0.9554, + "step": 31240 + }, + { + "epoch": 2.87, + "learning_rate": 3.563482577916705e-05, + "loss": 0.8506, + "step": 31250 + }, + { + "epoch": 2.87, + "learning_rate": 3.5630228923416386e-05, + "loss": 0.851, + "step": 31260 + }, + { + "epoch": 2.87, + "learning_rate": 3.562563206766572e-05, + "loss": 0.9866, + "step": 31270 + }, + { + "epoch": 2.88, + "learning_rate": 3.562103521191505e-05, + "loss": 0.9141, + "step": 31280 + }, + { + "epoch": 2.88, + "learning_rate": 3.561643835616438e-05, + "loss": 0.8915, + "step": 31290 + }, + { + "epoch": 2.88, + "learning_rate": 3.561184150041372e-05, + "loss": 0.8771, + "step": 31300 + }, + { + "epoch": 2.88, + "learning_rate": 3.5607244644663055e-05, + "loss": 0.8912, + "step": 31310 + }, + { + "epoch": 2.88, + "learning_rate": 3.5602647788912384e-05, + "loss": 0.9788, + "step": 31320 + }, + { + "epoch": 2.88, + "learning_rate": 3.559805093316172e-05, + "loss": 0.9649, + "step": 31330 + }, + { + "epoch": 2.88, + "learning_rate": 3.559345407741105e-05, + "loss": 1.0151, + "step": 31340 + }, + { + "epoch": 2.88, + "learning_rate": 3.558885722166039e-05, + "loss": 0.9154, + "step": 31350 + }, + { + "epoch": 2.88, + "learning_rate": 3.5584260365909724e-05, + "loss": 0.863, + "step": 31360 + }, + { + "epoch": 2.88, + "learning_rate": 3.557966351015905e-05, + "loss": 0.9154, + "step": 31370 + }, + { + "epoch": 2.88, + "learning_rate": 3.557506665440838e-05, + "loss": 0.8416, + "step": 31380 + }, + { + "epoch": 2.89, + "learning_rate": 3.557046979865772e-05, + "loss": 0.8957, + "step": 31390 + }, + { + "epoch": 2.89, + "learning_rate": 3.556587294290705e-05, + "loss": 0.8719, + "step": 31400 + }, + { + "epoch": 2.89, + "learning_rate": 3.5561276087156386e-05, + "loss": 0.88, + "step": 31410 + }, + { + "epoch": 2.89, + "learning_rate": 3.555667923140572e-05, + "loss": 0.9562, + "step": 31420 + }, + { + "epoch": 2.89, + "learning_rate": 3.555208237565505e-05, + "loss": 0.8366, + "step": 31430 + }, + { + "epoch": 2.89, + "learning_rate": 3.554748551990439e-05, + "loss": 0.8943, + "step": 31440 + }, + { + "epoch": 2.89, + "learning_rate": 3.5542888664153725e-05, + "loss": 0.9081, + "step": 31450 + }, + { + "epoch": 2.89, + "learning_rate": 3.5538291808403055e-05, + "loss": 0.8791, + "step": 31460 + }, + { + "epoch": 2.89, + "learning_rate": 3.5533694952652384e-05, + "loss": 0.944, + "step": 31470 + }, + { + "epoch": 2.89, + "learning_rate": 3.552909809690172e-05, + "loss": 0.952, + "step": 31480 + }, + { + "epoch": 2.9, + "learning_rate": 3.552450124115105e-05, + "loss": 0.964, + "step": 31490 + }, + { + "epoch": 2.9, + "learning_rate": 3.551990438540039e-05, + "loss": 1.0657, + "step": 31500 + }, + { + "epoch": 2.9, + "learning_rate": 3.551530752964972e-05, + "loss": 0.8545, + "step": 31510 + }, + { + "epoch": 2.9, + "learning_rate": 3.551071067389905e-05, + "loss": 0.9135, + "step": 31520 + }, + { + "epoch": 2.9, + "learning_rate": 3.550611381814839e-05, + "loss": 0.7681, + "step": 31530 + }, + { + "epoch": 2.9, + "learning_rate": 3.5501516962397726e-05, + "loss": 0.8097, + "step": 31540 + }, + { + "epoch": 2.9, + "learning_rate": 3.5496920106647056e-05, + "loss": 0.7565, + "step": 31550 + }, + { + "epoch": 2.9, + "learning_rate": 3.5492323250896385e-05, + "loss": 0.8217, + "step": 31560 + }, + { + "epoch": 2.9, + "learning_rate": 3.548772639514572e-05, + "loss": 0.9366, + "step": 31570 + }, + { + "epoch": 2.9, + "learning_rate": 3.548312953939505e-05, + "loss": 0.8259, + "step": 31580 + }, + { + "epoch": 2.9, + "learning_rate": 3.547853268364439e-05, + "loss": 0.8972, + "step": 31590 + }, + { + "epoch": 2.91, + "learning_rate": 3.5473935827893725e-05, + "loss": 0.8894, + "step": 31600 + }, + { + "epoch": 2.91, + "learning_rate": 3.5469338972143054e-05, + "loss": 0.8256, + "step": 31610 + }, + { + "epoch": 2.91, + "learning_rate": 3.546474211639239e-05, + "loss": 0.9246, + "step": 31620 + }, + { + "epoch": 2.91, + "learning_rate": 3.546014526064173e-05, + "loss": 0.8964, + "step": 31630 + }, + { + "epoch": 2.91, + "learning_rate": 3.545554840489106e-05, + "loss": 0.8336, + "step": 31640 + }, + { + "epoch": 2.91, + "learning_rate": 3.545095154914039e-05, + "loss": 0.9342, + "step": 31650 + }, + { + "epoch": 2.91, + "learning_rate": 3.544635469338972e-05, + "loss": 0.9682, + "step": 31660 + }, + { + "epoch": 2.91, + "learning_rate": 3.544175783763905e-05, + "loss": 0.9029, + "step": 31670 + }, + { + "epoch": 2.91, + "learning_rate": 3.543716098188839e-05, + "loss": 0.9227, + "step": 31680 + }, + { + "epoch": 2.91, + "learning_rate": 3.5432564126137726e-05, + "loss": 0.7535, + "step": 31690 + }, + { + "epoch": 2.91, + "learning_rate": 3.5427967270387055e-05, + "loss": 0.9752, + "step": 31700 + }, + { + "epoch": 2.92, + "learning_rate": 3.542337041463639e-05, + "loss": 0.9554, + "step": 31710 + }, + { + "epoch": 2.92, + "learning_rate": 3.541877355888573e-05, + "loss": 0.8541, + "step": 31720 + }, + { + "epoch": 2.92, + "learning_rate": 3.541417670313505e-05, + "loss": 0.8784, + "step": 31730 + }, + { + "epoch": 2.92, + "learning_rate": 3.540957984738439e-05, + "loss": 0.9193, + "step": 31740 + }, + { + "epoch": 2.92, + "learning_rate": 3.5404982991633724e-05, + "loss": 0.9018, + "step": 31750 + }, + { + "epoch": 2.92, + "learning_rate": 3.5400386135883054e-05, + "loss": 1.0052, + "step": 31760 + }, + { + "epoch": 2.92, + "learning_rate": 3.539578928013239e-05, + "loss": 0.9537, + "step": 31770 + }, + { + "epoch": 2.92, + "learning_rate": 3.539119242438173e-05, + "loss": 0.8938, + "step": 31780 + }, + { + "epoch": 2.92, + "learning_rate": 3.538659556863106e-05, + "loss": 0.9044, + "step": 31790 + }, + { + "epoch": 2.92, + "learning_rate": 3.538199871288039e-05, + "loss": 0.8573, + "step": 31800 + }, + { + "epoch": 2.92, + "learning_rate": 3.537740185712973e-05, + "loss": 0.8894, + "step": 31810 + }, + { + "epoch": 2.93, + "learning_rate": 3.537280500137905e-05, + "loss": 0.7973, + "step": 31820 + }, + { + "epoch": 2.93, + "learning_rate": 3.536820814562839e-05, + "loss": 0.9955, + "step": 31830 + }, + { + "epoch": 2.93, + "learning_rate": 3.5363611289877725e-05, + "loss": 0.8496, + "step": 31840 + }, + { + "epoch": 2.93, + "learning_rate": 3.5359014434127055e-05, + "loss": 0.8532, + "step": 31850 + }, + { + "epoch": 2.93, + "learning_rate": 3.535441757837639e-05, + "loss": 0.869, + "step": 31860 + }, + { + "epoch": 2.93, + "learning_rate": 3.534982072262573e-05, + "loss": 0.8285, + "step": 31870 + }, + { + "epoch": 2.93, + "learning_rate": 3.534522386687506e-05, + "loss": 0.9447, + "step": 31880 + }, + { + "epoch": 2.93, + "learning_rate": 3.5340627011124394e-05, + "loss": 0.8467, + "step": 31890 + }, + { + "epoch": 2.93, + "learning_rate": 3.533603015537373e-05, + "loss": 0.8278, + "step": 31900 + }, + { + "epoch": 2.93, + "learning_rate": 3.5331433299623054e-05, + "loss": 0.8292, + "step": 31910 + }, + { + "epoch": 2.93, + "learning_rate": 3.532683644387239e-05, + "loss": 0.8126, + "step": 31920 + }, + { + "epoch": 2.94, + "learning_rate": 3.532223958812173e-05, + "loss": 0.9435, + "step": 31930 + }, + { + "epoch": 2.94, + "learning_rate": 3.5317642732371056e-05, + "loss": 0.8329, + "step": 31940 + }, + { + "epoch": 2.94, + "learning_rate": 3.531304587662039e-05, + "loss": 0.8652, + "step": 31950 + }, + { + "epoch": 2.94, + "learning_rate": 3.530844902086973e-05, + "loss": 0.9273, + "step": 31960 + }, + { + "epoch": 2.94, + "learning_rate": 3.530385216511906e-05, + "loss": 0.8626, + "step": 31970 + }, + { + "epoch": 2.94, + "learning_rate": 3.5299255309368396e-05, + "loss": 0.773, + "step": 31980 + }, + { + "epoch": 2.94, + "learning_rate": 3.529465845361773e-05, + "loss": 0.9094, + "step": 31990 + }, + { + "epoch": 2.94, + "learning_rate": 3.5290061597867055e-05, + "loss": 0.8314, + "step": 32000 + }, + { + "epoch": 2.94, + "learning_rate": 3.528546474211639e-05, + "loss": 0.7143, + "step": 32010 + }, + { + "epoch": 2.94, + "learning_rate": 3.528086788636573e-05, + "loss": 0.858, + "step": 32020 + }, + { + "epoch": 2.94, + "learning_rate": 3.527627103061506e-05, + "loss": 0.8345, + "step": 32030 + }, + { + "epoch": 2.95, + "learning_rate": 3.5271674174864394e-05, + "loss": 0.8045, + "step": 32040 + }, + { + "epoch": 2.95, + "learning_rate": 3.526707731911373e-05, + "loss": 0.86, + "step": 32050 + }, + { + "epoch": 2.95, + "learning_rate": 3.526248046336306e-05, + "loss": 0.8732, + "step": 32060 + }, + { + "epoch": 2.95, + "learning_rate": 3.52578836076124e-05, + "loss": 0.941, + "step": 32070 + }, + { + "epoch": 2.95, + "learning_rate": 3.525328675186173e-05, + "loss": 0.8274, + "step": 32080 + }, + { + "epoch": 2.95, + "learning_rate": 3.5248689896111056e-05, + "loss": 0.9423, + "step": 32090 + }, + { + "epoch": 2.95, + "learning_rate": 3.524409304036039e-05, + "loss": 0.9156, + "step": 32100 + }, + { + "epoch": 2.95, + "learning_rate": 3.523949618460973e-05, + "loss": 0.9606, + "step": 32110 + }, + { + "epoch": 2.95, + "learning_rate": 3.523489932885906e-05, + "loss": 0.7461, + "step": 32120 + }, + { + "epoch": 2.95, + "learning_rate": 3.5230302473108395e-05, + "loss": 0.9809, + "step": 32130 + }, + { + "epoch": 2.95, + "learning_rate": 3.522570561735773e-05, + "loss": 0.9367, + "step": 32140 + }, + { + "epoch": 2.96, + "learning_rate": 3.522110876160706e-05, + "loss": 0.9909, + "step": 32150 + }, + { + "epoch": 2.96, + "learning_rate": 3.52165119058564e-05, + "loss": 0.8856, + "step": 32160 + }, + { + "epoch": 2.96, + "learning_rate": 3.5211915050105734e-05, + "loss": 0.8986, + "step": 32170 + }, + { + "epoch": 2.96, + "learning_rate": 3.520731819435506e-05, + "loss": 1.0041, + "step": 32180 + }, + { + "epoch": 2.96, + "learning_rate": 3.5202721338604394e-05, + "loss": 0.7575, + "step": 32190 + }, + { + "epoch": 2.96, + "learning_rate": 3.519812448285373e-05, + "loss": 0.8732, + "step": 32200 + }, + { + "epoch": 2.96, + "learning_rate": 3.519352762710306e-05, + "loss": 0.8166, + "step": 32210 + }, + { + "epoch": 2.96, + "learning_rate": 3.5188930771352396e-05, + "loss": 0.7986, + "step": 32220 + }, + { + "epoch": 2.96, + "learning_rate": 3.518433391560173e-05, + "loss": 1.0318, + "step": 32230 + }, + { + "epoch": 2.96, + "learning_rate": 3.517973705985106e-05, + "loss": 0.7335, + "step": 32240 + }, + { + "epoch": 2.96, + "learning_rate": 3.51751402041004e-05, + "loss": 0.856, + "step": 32250 + }, + { + "epoch": 2.97, + "learning_rate": 3.5170543348349736e-05, + "loss": 1.0326, + "step": 32260 + }, + { + "epoch": 2.97, + "learning_rate": 3.516594649259906e-05, + "loss": 1.0132, + "step": 32270 + }, + { + "epoch": 2.97, + "learning_rate": 3.5161349636848395e-05, + "loss": 0.7998, + "step": 32280 + }, + { + "epoch": 2.97, + "learning_rate": 3.515675278109773e-05, + "loss": 0.894, + "step": 32290 + }, + { + "epoch": 2.97, + "learning_rate": 3.515215592534706e-05, + "loss": 0.8154, + "step": 32300 + }, + { + "epoch": 2.97, + "learning_rate": 3.51475590695964e-05, + "loss": 1.1191, + "step": 32310 + }, + { + "epoch": 2.97, + "learning_rate": 3.5142962213845734e-05, + "loss": 0.8067, + "step": 32320 + }, + { + "epoch": 2.97, + "learning_rate": 3.5138365358095064e-05, + "loss": 0.7815, + "step": 32330 + }, + { + "epoch": 2.97, + "learning_rate": 3.51337685023444e-05, + "loss": 0.9119, + "step": 32340 + }, + { + "epoch": 2.97, + "learning_rate": 3.512917164659374e-05, + "loss": 0.8206, + "step": 32350 + }, + { + "epoch": 2.98, + "learning_rate": 3.5124574790843066e-05, + "loss": 0.9211, + "step": 32360 + }, + { + "epoch": 2.98, + "learning_rate": 3.5119977935092396e-05, + "loss": 0.8894, + "step": 32370 + }, + { + "epoch": 2.98, + "learning_rate": 3.511538107934173e-05, + "loss": 0.8662, + "step": 32380 + }, + { + "epoch": 2.98, + "learning_rate": 3.511078422359106e-05, + "loss": 0.9926, + "step": 32390 + }, + { + "epoch": 2.98, + "learning_rate": 3.51061873678404e-05, + "loss": 0.8154, + "step": 32400 + }, + { + "epoch": 2.98, + "learning_rate": 3.5101590512089735e-05, + "loss": 0.8794, + "step": 32410 + }, + { + "epoch": 2.98, + "learning_rate": 3.5096993656339065e-05, + "loss": 0.9837, + "step": 32420 + }, + { + "epoch": 2.98, + "learning_rate": 3.50923968005884e-05, + "loss": 0.9557, + "step": 32430 + }, + { + "epoch": 2.98, + "learning_rate": 3.508779994483774e-05, + "loss": 0.967, + "step": 32440 + }, + { + "epoch": 2.98, + "learning_rate": 3.508320308908707e-05, + "loss": 0.8409, + "step": 32450 + }, + { + "epoch": 2.98, + "learning_rate": 3.50786062333364e-05, + "loss": 0.8271, + "step": 32460 + }, + { + "epoch": 2.99, + "learning_rate": 3.5074009377585734e-05, + "loss": 0.8614, + "step": 32470 + }, + { + "epoch": 2.99, + "learning_rate": 3.5069412521835064e-05, + "loss": 0.8372, + "step": 32480 + }, + { + "epoch": 2.99, + "learning_rate": 3.50648156660844e-05, + "loss": 0.8546, + "step": 32490 + }, + { + "epoch": 2.99, + "learning_rate": 3.5060218810333737e-05, + "loss": 0.9386, + "step": 32500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5055621954583066e-05, + "loss": 0.8898, + "step": 32510 + }, + { + "epoch": 2.99, + "learning_rate": 3.50510250988324e-05, + "loss": 0.7773, + "step": 32520 + }, + { + "epoch": 2.99, + "learning_rate": 3.504642824308174e-05, + "loss": 0.844, + "step": 32530 + }, + { + "epoch": 2.99, + "learning_rate": 3.504183138733107e-05, + "loss": 0.958, + "step": 32540 + }, + { + "epoch": 2.99, + "learning_rate": 3.50372345315804e-05, + "loss": 0.9427, + "step": 32550 + }, + { + "epoch": 2.99, + "learning_rate": 3.5032637675829735e-05, + "loss": 0.9484, + "step": 32560 + }, + { + "epoch": 2.99, + "learning_rate": 3.5028040820079065e-05, + "loss": 0.9066, + "step": 32570 + }, + { + "epoch": 3.0, + "learning_rate": 3.50234439643284e-05, + "loss": 0.806, + "step": 32580 + }, + { + "epoch": 3.0, + "learning_rate": 3.501884710857774e-05, + "loss": 0.9167, + "step": 32590 + }, + { + "epoch": 3.0, + "learning_rate": 3.501425025282707e-05, + "loss": 0.9701, + "step": 32600 + }, + { + "epoch": 3.0, + "learning_rate": 3.5009653397076404e-05, + "loss": 0.8778, + "step": 32610 + }, + { + "epoch": 3.0, + "learning_rate": 3.500505654132574e-05, + "loss": 0.917, + "step": 32620 + }, + { + "epoch": 3.0, + "learning_rate": 3.500045968557507e-05, + "loss": 0.8526, + "step": 32630 + }, + { + "epoch": 3.0, + "learning_rate": 3.49958628298244e-05, + "loss": 0.811, + "step": 32640 + }, + { + "epoch": 3.0, + "learning_rate": 3.4991265974073736e-05, + "loss": 0.8757, + "step": 32650 + }, + { + "epoch": 3.0, + "learning_rate": 3.4986669118323066e-05, + "loss": 0.9319, + "step": 32660 + }, + { + "epoch": 3.0, + "learning_rate": 3.49820722625724e-05, + "loss": 0.8513, + "step": 32670 + }, + { + "epoch": 3.0, + "learning_rate": 3.497747540682174e-05, + "loss": 0.9146, + "step": 32680 + }, + { + "epoch": 3.01, + "learning_rate": 3.497287855107107e-05, + "loss": 0.8735, + "step": 32690 + }, + { + "epoch": 3.01, + "learning_rate": 3.4968281695320405e-05, + "loss": 0.8673, + "step": 32700 + }, + { + "epoch": 3.01, + "learning_rate": 3.4963684839569735e-05, + "loss": 0.8298, + "step": 32710 + }, + { + "epoch": 3.01, + "learning_rate": 3.495908798381907e-05, + "loss": 0.9395, + "step": 32720 + }, + { + "epoch": 3.01, + "learning_rate": 3.49544911280684e-05, + "loss": 0.9158, + "step": 32730 + }, + { + "epoch": 3.01, + "learning_rate": 3.494989427231774e-05, + "loss": 0.9218, + "step": 32740 + }, + { + "epoch": 3.01, + "learning_rate": 3.494529741656707e-05, + "loss": 0.8711, + "step": 32750 + }, + { + "epoch": 3.01, + "learning_rate": 3.4940700560816404e-05, + "loss": 0.9014, + "step": 32760 + }, + { + "epoch": 3.01, + "learning_rate": 3.493610370506574e-05, + "loss": 0.8895, + "step": 32770 + }, + { + "epoch": 3.01, + "learning_rate": 3.493150684931507e-05, + "loss": 0.8424, + "step": 32780 + }, + { + "epoch": 3.01, + "learning_rate": 3.4926909993564406e-05, + "loss": 0.8303, + "step": 32790 + }, + { + "epoch": 3.02, + "learning_rate": 3.4922313137813736e-05, + "loss": 0.8968, + "step": 32800 + }, + { + "epoch": 3.02, + "learning_rate": 3.491771628206307e-05, + "loss": 0.8644, + "step": 32810 + }, + { + "epoch": 3.02, + "learning_rate": 3.49131194263124e-05, + "loss": 0.8902, + "step": 32820 + }, + { + "epoch": 3.02, + "learning_rate": 3.490852257056174e-05, + "loss": 0.9506, + "step": 32830 + }, + { + "epoch": 3.02, + "learning_rate": 3.490392571481107e-05, + "loss": 0.869, + "step": 32840 + }, + { + "epoch": 3.02, + "learning_rate": 3.4899328859060405e-05, + "loss": 0.8403, + "step": 32850 + }, + { + "epoch": 3.02, + "learning_rate": 3.489473200330974e-05, + "loss": 0.7674, + "step": 32860 + }, + { + "epoch": 3.02, + "learning_rate": 3.489013514755907e-05, + "loss": 0.7903, + "step": 32870 + }, + { + "epoch": 3.02, + "learning_rate": 3.488553829180841e-05, + "loss": 0.9089, + "step": 32880 + }, + { + "epoch": 3.02, + "learning_rate": 3.488094143605774e-05, + "loss": 0.9097, + "step": 32890 + }, + { + "epoch": 3.02, + "learning_rate": 3.4876344580307074e-05, + "loss": 0.8355, + "step": 32900 + }, + { + "epoch": 3.03, + "learning_rate": 3.48717477245564e-05, + "loss": 0.7751, + "step": 32910 + }, + { + "epoch": 3.03, + "learning_rate": 3.486715086880574e-05, + "loss": 0.7893, + "step": 32920 + }, + { + "epoch": 3.03, + "learning_rate": 3.486255401305507e-05, + "loss": 0.7556, + "step": 32930 + }, + { + "epoch": 3.03, + "learning_rate": 3.4857957157304406e-05, + "loss": 0.9097, + "step": 32940 + }, + { + "epoch": 3.03, + "learning_rate": 3.485336030155374e-05, + "loss": 0.9149, + "step": 32950 + }, + { + "epoch": 3.03, + "learning_rate": 3.484876344580307e-05, + "loss": 0.8415, + "step": 32960 + }, + { + "epoch": 3.03, + "learning_rate": 3.484416659005241e-05, + "loss": 0.9314, + "step": 32970 + }, + { + "epoch": 3.03, + "learning_rate": 3.483956973430174e-05, + "loss": 0.8608, + "step": 32980 + }, + { + "epoch": 3.03, + "learning_rate": 3.4834972878551075e-05, + "loss": 0.9932, + "step": 32990 + }, + { + "epoch": 3.03, + "learning_rate": 3.4830376022800405e-05, + "loss": 0.9321, + "step": 33000 + }, + { + "epoch": 3.03, + "learning_rate": 3.482577916704974e-05, + "loss": 0.8707, + "step": 33010 + }, + { + "epoch": 3.04, + "learning_rate": 3.482118231129907e-05, + "loss": 0.8776, + "step": 33020 + }, + { + "epoch": 3.04, + "learning_rate": 3.481658545554841e-05, + "loss": 1.0177, + "step": 33030 + }, + { + "epoch": 3.04, + "learning_rate": 3.481198859979774e-05, + "loss": 0.8404, + "step": 33040 + }, + { + "epoch": 3.04, + "learning_rate": 3.480739174404707e-05, + "loss": 0.858, + "step": 33050 + }, + { + "epoch": 3.04, + "learning_rate": 3.480279488829641e-05, + "loss": 1.0405, + "step": 33060 + }, + { + "epoch": 3.04, + "learning_rate": 3.479819803254574e-05, + "loss": 0.8556, + "step": 33070 + }, + { + "epoch": 3.04, + "learning_rate": 3.4793601176795076e-05, + "loss": 0.8453, + "step": 33080 + }, + { + "epoch": 3.04, + "learning_rate": 3.4789004321044406e-05, + "loss": 1.0249, + "step": 33090 + }, + { + "epoch": 3.04, + "learning_rate": 3.478440746529374e-05, + "loss": 0.8704, + "step": 33100 + }, + { + "epoch": 3.04, + "learning_rate": 3.477981060954307e-05, + "loss": 0.8805, + "step": 33110 + }, + { + "epoch": 3.04, + "learning_rate": 3.477521375379241e-05, + "loss": 0.9669, + "step": 33120 + }, + { + "epoch": 3.05, + "learning_rate": 3.477061689804174e-05, + "loss": 0.8225, + "step": 33130 + }, + { + "epoch": 3.05, + "learning_rate": 3.4766020042291075e-05, + "loss": 0.8664, + "step": 33140 + }, + { + "epoch": 3.05, + "learning_rate": 3.476142318654041e-05, + "loss": 0.827, + "step": 33150 + }, + { + "epoch": 3.05, + "learning_rate": 3.475682633078974e-05, + "loss": 0.7586, + "step": 33160 + }, + { + "epoch": 3.05, + "learning_rate": 3.475222947503908e-05, + "loss": 0.9104, + "step": 33170 + }, + { + "epoch": 3.05, + "learning_rate": 3.474763261928841e-05, + "loss": 0.9251, + "step": 33180 + }, + { + "epoch": 3.05, + "learning_rate": 3.4743035763537743e-05, + "loss": 0.9114, + "step": 33190 + }, + { + "epoch": 3.05, + "learning_rate": 3.473843890778707e-05, + "loss": 0.8971, + "step": 33200 + }, + { + "epoch": 3.05, + "learning_rate": 3.473384205203641e-05, + "loss": 0.8651, + "step": 33210 + }, + { + "epoch": 3.05, + "learning_rate": 3.472924519628574e-05, + "loss": 0.903, + "step": 33220 + }, + { + "epoch": 3.06, + "learning_rate": 3.4724648340535076e-05, + "loss": 0.8125, + "step": 33230 + }, + { + "epoch": 3.06, + "learning_rate": 3.472005148478441e-05, + "loss": 0.8595, + "step": 33240 + }, + { + "epoch": 3.06, + "learning_rate": 3.471545462903374e-05, + "loss": 0.8387, + "step": 33250 + }, + { + "epoch": 3.06, + "learning_rate": 3.471085777328308e-05, + "loss": 0.9094, + "step": 33260 + }, + { + "epoch": 3.06, + "learning_rate": 3.470626091753241e-05, + "loss": 0.9021, + "step": 33270 + }, + { + "epoch": 3.06, + "learning_rate": 3.4701664061781745e-05, + "loss": 0.8743, + "step": 33280 + }, + { + "epoch": 3.06, + "learning_rate": 3.4697067206031074e-05, + "loss": 0.828, + "step": 33290 + }, + { + "epoch": 3.06, + "learning_rate": 3.469247035028041e-05, + "loss": 0.9327, + "step": 33300 + }, + { + "epoch": 3.06, + "learning_rate": 3.468787349452974e-05, + "loss": 0.8925, + "step": 33310 + }, + { + "epoch": 3.06, + "learning_rate": 3.468327663877908e-05, + "loss": 0.9683, + "step": 33320 + }, + { + "epoch": 3.06, + "learning_rate": 3.4678679783028413e-05, + "loss": 1.0238, + "step": 33330 + }, + { + "epoch": 3.07, + "learning_rate": 3.467408292727774e-05, + "loss": 0.8342, + "step": 33340 + }, + { + "epoch": 3.07, + "learning_rate": 3.466948607152708e-05, + "loss": 0.845, + "step": 33350 + }, + { + "epoch": 3.07, + "learning_rate": 3.466488921577641e-05, + "loss": 1.0131, + "step": 33360 + }, + { + "epoch": 3.07, + "learning_rate": 3.466029236002574e-05, + "loss": 0.8594, + "step": 33370 + }, + { + "epoch": 3.07, + "learning_rate": 3.4655695504275076e-05, + "loss": 0.9532, + "step": 33380 + }, + { + "epoch": 3.07, + "learning_rate": 3.465109864852441e-05, + "loss": 1.0231, + "step": 33390 + }, + { + "epoch": 3.07, + "learning_rate": 3.464650179277374e-05, + "loss": 0.8886, + "step": 33400 + }, + { + "epoch": 3.07, + "learning_rate": 3.464190493702308e-05, + "loss": 0.8014, + "step": 33410 + }, + { + "epoch": 3.07, + "learning_rate": 3.4637308081272415e-05, + "loss": 0.7966, + "step": 33420 + }, + { + "epoch": 3.07, + "learning_rate": 3.4632711225521744e-05, + "loss": 0.9497, + "step": 33430 + }, + { + "epoch": 3.07, + "learning_rate": 3.462811436977108e-05, + "loss": 0.8833, + "step": 33440 + }, + { + "epoch": 3.08, + "learning_rate": 3.462351751402041e-05, + "loss": 0.8407, + "step": 33450 + }, + { + "epoch": 3.08, + "learning_rate": 3.461892065826974e-05, + "loss": 0.8643, + "step": 33460 + }, + { + "epoch": 3.08, + "learning_rate": 3.461432380251908e-05, + "loss": 0.8864, + "step": 33470 + }, + { + "epoch": 3.08, + "learning_rate": 3.460972694676841e-05, + "loss": 0.9059, + "step": 33480 + }, + { + "epoch": 3.08, + "learning_rate": 3.460513009101774e-05, + "loss": 0.7874, + "step": 33490 + }, + { + "epoch": 3.08, + "learning_rate": 3.460053323526708e-05, + "loss": 0.9192, + "step": 33500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4595936379516416e-05, + "loss": 0.7992, + "step": 33510 + }, + { + "epoch": 3.08, + "learning_rate": 3.4591339523765746e-05, + "loss": 0.8628, + "step": 33520 + }, + { + "epoch": 3.08, + "learning_rate": 3.458674266801508e-05, + "loss": 0.9868, + "step": 33530 + }, + { + "epoch": 3.08, + "learning_rate": 3.458214581226441e-05, + "loss": 0.8393, + "step": 33540 + }, + { + "epoch": 3.08, + "learning_rate": 3.457754895651374e-05, + "loss": 0.7542, + "step": 33550 + }, + { + "epoch": 3.09, + "learning_rate": 3.457295210076308e-05, + "loss": 0.8694, + "step": 33560 + }, + { + "epoch": 3.09, + "learning_rate": 3.4568355245012414e-05, + "loss": 0.9286, + "step": 33570 + }, + { + "epoch": 3.09, + "learning_rate": 3.4563758389261744e-05, + "loss": 0.9105, + "step": 33580 + }, + { + "epoch": 3.09, + "learning_rate": 3.455916153351108e-05, + "loss": 0.9148, + "step": 33590 + }, + { + "epoch": 3.09, + "learning_rate": 3.455456467776042e-05, + "loss": 0.8502, + "step": 33600 + }, + { + "epoch": 3.09, + "learning_rate": 3.454996782200975e-05, + "loss": 0.9311, + "step": 33610 + }, + { + "epoch": 3.09, + "learning_rate": 3.454537096625908e-05, + "loss": 0.8994, + "step": 33620 + }, + { + "epoch": 3.09, + "learning_rate": 3.454077411050841e-05, + "loss": 0.8622, + "step": 33630 + }, + { + "epoch": 3.09, + "learning_rate": 3.453617725475774e-05, + "loss": 1.0268, + "step": 33640 + }, + { + "epoch": 3.09, + "learning_rate": 3.453158039900708e-05, + "loss": 0.8633, + "step": 33650 + }, + { + "epoch": 3.09, + "learning_rate": 3.4526983543256416e-05, + "loss": 0.912, + "step": 33660 + }, + { + "epoch": 3.1, + "learning_rate": 3.4522386687505745e-05, + "loss": 0.8812, + "step": 33670 + }, + { + "epoch": 3.1, + "learning_rate": 3.451778983175508e-05, + "loss": 0.8773, + "step": 33680 + }, + { + "epoch": 3.1, + "learning_rate": 3.451319297600442e-05, + "loss": 0.9104, + "step": 33690 + }, + { + "epoch": 3.1, + "learning_rate": 3.450859612025375e-05, + "loss": 0.8925, + "step": 33700 + }, + { + "epoch": 3.1, + "learning_rate": 3.4503999264503084e-05, + "loss": 0.83, + "step": 33710 + }, + { + "epoch": 3.1, + "learning_rate": 3.4499402408752414e-05, + "loss": 0.8465, + "step": 33720 + }, + { + "epoch": 3.1, + "learning_rate": 3.4494805553001744e-05, + "loss": 0.9713, + "step": 33730 + }, + { + "epoch": 3.1, + "learning_rate": 3.449020869725108e-05, + "loss": 0.7953, + "step": 33740 + }, + { + "epoch": 3.1, + "learning_rate": 3.448561184150042e-05, + "loss": 0.8282, + "step": 33750 + }, + { + "epoch": 3.1, + "learning_rate": 3.4481014985749746e-05, + "loss": 0.8653, + "step": 33760 + }, + { + "epoch": 3.1, + "learning_rate": 3.447641812999908e-05, + "loss": 0.8329, + "step": 33770 + }, + { + "epoch": 3.11, + "learning_rate": 3.447182127424842e-05, + "loss": 0.9064, + "step": 33780 + }, + { + "epoch": 3.11, + "learning_rate": 3.446722441849775e-05, + "loss": 0.8352, + "step": 33790 + }, + { + "epoch": 3.11, + "learning_rate": 3.4462627562747086e-05, + "loss": 0.8989, + "step": 33800 + }, + { + "epoch": 3.11, + "learning_rate": 3.4458030706996415e-05, + "loss": 1.0296, + "step": 33810 + }, + { + "epoch": 3.11, + "learning_rate": 3.4453433851245745e-05, + "loss": 0.8311, + "step": 33820 + }, + { + "epoch": 3.11, + "learning_rate": 3.444883699549508e-05, + "loss": 0.8216, + "step": 33830 + }, + { + "epoch": 3.11, + "learning_rate": 3.444424013974442e-05, + "loss": 0.8395, + "step": 33840 + }, + { + "epoch": 3.11, + "learning_rate": 3.443964328399375e-05, + "loss": 0.8814, + "step": 33850 + }, + { + "epoch": 3.11, + "learning_rate": 3.4435046428243084e-05, + "loss": 0.9564, + "step": 33860 + }, + { + "epoch": 3.11, + "learning_rate": 3.443044957249242e-05, + "loss": 0.8521, + "step": 33870 + }, + { + "epoch": 3.11, + "learning_rate": 3.442585271674175e-05, + "loss": 0.8549, + "step": 33880 + }, + { + "epoch": 3.12, + "learning_rate": 3.442125586099109e-05, + "loss": 0.8562, + "step": 33890 + }, + { + "epoch": 3.12, + "learning_rate": 3.4416659005240416e-05, + "loss": 0.9864, + "step": 33900 + }, + { + "epoch": 3.12, + "learning_rate": 3.4412062149489746e-05, + "loss": 0.8203, + "step": 33910 + }, + { + "epoch": 3.12, + "learning_rate": 3.440746529373908e-05, + "loss": 0.8682, + "step": 33920 + }, + { + "epoch": 3.12, + "learning_rate": 3.440286843798842e-05, + "loss": 0.9021, + "step": 33930 + }, + { + "epoch": 3.12, + "learning_rate": 3.439827158223775e-05, + "loss": 0.9314, + "step": 33940 + }, + { + "epoch": 3.12, + "learning_rate": 3.4393674726487085e-05, + "loss": 1.0293, + "step": 33950 + }, + { + "epoch": 3.12, + "learning_rate": 3.438907787073642e-05, + "loss": 0.8919, + "step": 33960 + }, + { + "epoch": 3.12, + "learning_rate": 3.438448101498575e-05, + "loss": 0.9307, + "step": 33970 + }, + { + "epoch": 3.12, + "learning_rate": 3.437988415923509e-05, + "loss": 0.9444, + "step": 33980 + }, + { + "epoch": 3.12, + "learning_rate": 3.437528730348442e-05, + "loss": 1.0228, + "step": 33990 + }, + { + "epoch": 3.13, + "learning_rate": 3.437069044773375e-05, + "loss": 0.893, + "step": 34000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4366093591983084e-05, + "loss": 0.9029, + "step": 34010 + }, + { + "epoch": 3.13, + "learning_rate": 3.436149673623242e-05, + "loss": 0.8588, + "step": 34020 + }, + { + "epoch": 3.13, + "learning_rate": 3.435689988048175e-05, + "loss": 0.9057, + "step": 34030 + }, + { + "epoch": 3.13, + "learning_rate": 3.4352303024731087e-05, + "loss": 0.869, + "step": 34040 + }, + { + "epoch": 3.13, + "learning_rate": 3.434770616898042e-05, + "loss": 0.9487, + "step": 34050 + }, + { + "epoch": 3.13, + "learning_rate": 3.434310931322975e-05, + "loss": 0.8867, + "step": 34060 + }, + { + "epoch": 3.13, + "learning_rate": 3.433851245747909e-05, + "loss": 0.818, + "step": 34070 + }, + { + "epoch": 3.13, + "learning_rate": 3.433391560172842e-05, + "loss": 0.8417, + "step": 34080 + }, + { + "epoch": 3.13, + "learning_rate": 3.432931874597775e-05, + "loss": 0.9572, + "step": 34090 + }, + { + "epoch": 3.14, + "learning_rate": 3.4324721890227085e-05, + "loss": 0.9749, + "step": 34100 + }, + { + "epoch": 3.14, + "learning_rate": 3.432012503447642e-05, + "loss": 0.8815, + "step": 34110 + }, + { + "epoch": 3.14, + "learning_rate": 3.431552817872575e-05, + "loss": 0.9157, + "step": 34120 + }, + { + "epoch": 3.14, + "learning_rate": 3.431093132297509e-05, + "loss": 0.9155, + "step": 34130 + }, + { + "epoch": 3.14, + "learning_rate": 3.4306334467224424e-05, + "loss": 0.9308, + "step": 34140 + }, + { + "epoch": 3.14, + "learning_rate": 3.4301737611473754e-05, + "loss": 0.9008, + "step": 34150 + }, + { + "epoch": 3.14, + "learning_rate": 3.429714075572309e-05, + "loss": 0.8021, + "step": 34160 + }, + { + "epoch": 3.14, + "learning_rate": 3.429254389997242e-05, + "loss": 0.8611, + "step": 34170 + }, + { + "epoch": 3.14, + "learning_rate": 3.428794704422175e-05, + "loss": 0.9336, + "step": 34180 + }, + { + "epoch": 3.14, + "learning_rate": 3.4283350188471086e-05, + "loss": 0.7743, + "step": 34190 + }, + { + "epoch": 3.14, + "learning_rate": 3.427875333272042e-05, + "loss": 0.9723, + "step": 34200 + }, + { + "epoch": 3.15, + "learning_rate": 3.427415647696975e-05, + "loss": 1.0241, + "step": 34210 + }, + { + "epoch": 3.15, + "learning_rate": 3.426955962121909e-05, + "loss": 0.8538, + "step": 34220 + }, + { + "epoch": 3.15, + "learning_rate": 3.4264962765468425e-05, + "loss": 0.9589, + "step": 34230 + }, + { + "epoch": 3.15, + "learning_rate": 3.4260365909717755e-05, + "loss": 0.9358, + "step": 34240 + }, + { + "epoch": 3.15, + "learning_rate": 3.425576905396709e-05, + "loss": 0.7696, + "step": 34250 + }, + { + "epoch": 3.15, + "learning_rate": 3.425117219821642e-05, + "loss": 0.9368, + "step": 34260 + }, + { + "epoch": 3.15, + "learning_rate": 3.424657534246575e-05, + "loss": 0.9699, + "step": 34270 + }, + { + "epoch": 3.15, + "learning_rate": 3.424197848671509e-05, + "loss": 0.8378, + "step": 34280 + }, + { + "epoch": 3.15, + "learning_rate": 3.4237381630964424e-05, + "loss": 0.8822, + "step": 34290 + }, + { + "epoch": 3.15, + "learning_rate": 3.4232784775213754e-05, + "loss": 0.8159, + "step": 34300 + }, + { + "epoch": 3.15, + "learning_rate": 3.422818791946309e-05, + "loss": 0.9017, + "step": 34310 + }, + { + "epoch": 3.16, + "learning_rate": 3.4223591063712427e-05, + "loss": 0.8932, + "step": 34320 + }, + { + "epoch": 3.16, + "learning_rate": 3.4218994207961756e-05, + "loss": 0.8677, + "step": 34330 + }, + { + "epoch": 3.16, + "learning_rate": 3.421439735221109e-05, + "loss": 0.9006, + "step": 34340 + }, + { + "epoch": 3.16, + "learning_rate": 3.420980049646042e-05, + "loss": 0.8388, + "step": 34350 + }, + { + "epoch": 3.16, + "learning_rate": 3.420520364070975e-05, + "loss": 0.968, + "step": 34360 + }, + { + "epoch": 3.16, + "learning_rate": 3.420060678495909e-05, + "loss": 0.8436, + "step": 34370 + }, + { + "epoch": 3.16, + "learning_rate": 3.4196009929208425e-05, + "loss": 0.9324, + "step": 34380 + }, + { + "epoch": 3.16, + "learning_rate": 3.4191413073457755e-05, + "loss": 0.8686, + "step": 34390 + }, + { + "epoch": 3.16, + "learning_rate": 3.418681621770709e-05, + "loss": 0.8334, + "step": 34400 + }, + { + "epoch": 3.16, + "learning_rate": 3.418221936195643e-05, + "loss": 0.8932, + "step": 34410 + }, + { + "epoch": 3.16, + "learning_rate": 3.417762250620576e-05, + "loss": 0.8233, + "step": 34420 + }, + { + "epoch": 3.17, + "learning_rate": 3.4173025650455094e-05, + "loss": 0.9848, + "step": 34430 + }, + { + "epoch": 3.17, + "learning_rate": 3.4168428794704424e-05, + "loss": 0.9797, + "step": 34440 + }, + { + "epoch": 3.17, + "learning_rate": 3.416383193895375e-05, + "loss": 0.813, + "step": 34450 + }, + { + "epoch": 3.17, + "learning_rate": 3.415923508320309e-05, + "loss": 0.9829, + "step": 34460 + }, + { + "epoch": 3.17, + "learning_rate": 3.4154638227452426e-05, + "loss": 0.924, + "step": 34470 + }, + { + "epoch": 3.17, + "learning_rate": 3.4150041371701756e-05, + "loss": 0.887, + "step": 34480 + }, + { + "epoch": 3.17, + "learning_rate": 3.414544451595109e-05, + "loss": 0.8847, + "step": 34490 + }, + { + "epoch": 3.17, + "learning_rate": 3.414084766020043e-05, + "loss": 0.8208, + "step": 34500 + }, + { + "epoch": 3.17, + "learning_rate": 3.413625080444976e-05, + "loss": 0.9585, + "step": 34510 + }, + { + "epoch": 3.17, + "learning_rate": 3.4131653948699095e-05, + "loss": 0.849, + "step": 34520 + }, + { + "epoch": 3.17, + "learning_rate": 3.4127057092948425e-05, + "loss": 0.9063, + "step": 34530 + }, + { + "epoch": 3.18, + "learning_rate": 3.4122460237197755e-05, + "loss": 0.8693, + "step": 34540 + }, + { + "epoch": 3.18, + "learning_rate": 3.411786338144709e-05, + "loss": 0.8926, + "step": 34550 + }, + { + "epoch": 3.18, + "learning_rate": 3.411326652569643e-05, + "loss": 0.9074, + "step": 34560 + }, + { + "epoch": 3.18, + "learning_rate": 3.410866966994576e-05, + "loss": 0.9194, + "step": 34570 + }, + { + "epoch": 3.18, + "learning_rate": 3.4104072814195094e-05, + "loss": 0.8922, + "step": 34580 + }, + { + "epoch": 3.18, + "learning_rate": 3.4099475958444423e-05, + "loss": 0.9205, + "step": 34590 + }, + { + "epoch": 3.18, + "learning_rate": 3.409487910269376e-05, + "loss": 0.9657, + "step": 34600 + }, + { + "epoch": 3.18, + "learning_rate": 3.4090282246943096e-05, + "loss": 0.7817, + "step": 34610 + }, + { + "epoch": 3.18, + "learning_rate": 3.4085685391192426e-05, + "loss": 0.9077, + "step": 34620 + }, + { + "epoch": 3.18, + "learning_rate": 3.4081088535441756e-05, + "loss": 0.9608, + "step": 34630 + }, + { + "epoch": 3.18, + "learning_rate": 3.407649167969109e-05, + "loss": 0.8699, + "step": 34640 + }, + { + "epoch": 3.19, + "learning_rate": 3.407189482394043e-05, + "loss": 0.9433, + "step": 34650 + }, + { + "epoch": 3.19, + "learning_rate": 3.406729796818976e-05, + "loss": 1.0109, + "step": 34660 + }, + { + "epoch": 3.19, + "learning_rate": 3.4062701112439095e-05, + "loss": 0.902, + "step": 34670 + }, + { + "epoch": 3.19, + "learning_rate": 3.4058104256688425e-05, + "loss": 0.8089, + "step": 34680 + }, + { + "epoch": 3.19, + "learning_rate": 3.405350740093776e-05, + "loss": 0.9388, + "step": 34690 + }, + { + "epoch": 3.19, + "learning_rate": 3.40489105451871e-05, + "loss": 0.8453, + "step": 34700 + }, + { + "epoch": 3.19, + "learning_rate": 3.404431368943643e-05, + "loss": 0.908, + "step": 34710 + }, + { + "epoch": 3.19, + "learning_rate": 3.403971683368576e-05, + "loss": 0.9422, + "step": 34720 + }, + { + "epoch": 3.19, + "learning_rate": 3.4035119977935093e-05, + "loss": 0.9895, + "step": 34730 + }, + { + "epoch": 3.19, + "learning_rate": 3.403052312218443e-05, + "loss": 0.816, + "step": 34740 + }, + { + "epoch": 3.19, + "learning_rate": 3.402592626643376e-05, + "loss": 0.9757, + "step": 34750 + }, + { + "epoch": 3.2, + "learning_rate": 3.4021329410683096e-05, + "loss": 0.9101, + "step": 34760 + }, + { + "epoch": 3.2, + "learning_rate": 3.4016732554932426e-05, + "loss": 0.869, + "step": 34770 + }, + { + "epoch": 3.2, + "learning_rate": 3.401213569918176e-05, + "loss": 0.8528, + "step": 34780 + }, + { + "epoch": 3.2, + "learning_rate": 3.40075388434311e-05, + "loss": 0.9195, + "step": 34790 + }, + { + "epoch": 3.2, + "learning_rate": 3.400294198768043e-05, + "loss": 0.7873, + "step": 34800 + }, + { + "epoch": 3.2, + "learning_rate": 3.399834513192976e-05, + "loss": 0.9415, + "step": 34810 + }, + { + "epoch": 3.2, + "learning_rate": 3.3993748276179095e-05, + "loss": 0.944, + "step": 34820 + }, + { + "epoch": 3.2, + "learning_rate": 3.398915142042843e-05, + "loss": 0.9178, + "step": 34830 + }, + { + "epoch": 3.2, + "learning_rate": 3.398455456467776e-05, + "loss": 0.9838, + "step": 34840 + }, + { + "epoch": 3.2, + "learning_rate": 3.39799577089271e-05, + "loss": 0.844, + "step": 34850 + }, + { + "epoch": 3.2, + "learning_rate": 3.397536085317643e-05, + "loss": 0.8828, + "step": 34860 + }, + { + "epoch": 3.21, + "learning_rate": 3.3970763997425763e-05, + "loss": 0.8451, + "step": 34870 + }, + { + "epoch": 3.21, + "learning_rate": 3.39661671416751e-05, + "loss": 0.8773, + "step": 34880 + }, + { + "epoch": 3.21, + "learning_rate": 3.396157028592443e-05, + "loss": 0.8892, + "step": 34890 + }, + { + "epoch": 3.21, + "learning_rate": 3.395697343017376e-05, + "loss": 0.9517, + "step": 34900 + }, + { + "epoch": 3.21, + "learning_rate": 3.3952376574423096e-05, + "loss": 0.8766, + "step": 34910 + }, + { + "epoch": 3.21, + "learning_rate": 3.3947779718672426e-05, + "loss": 0.9569, + "step": 34920 + }, + { + "epoch": 3.21, + "learning_rate": 3.394318286292176e-05, + "loss": 0.9277, + "step": 34930 + }, + { + "epoch": 3.21, + "learning_rate": 3.39385860071711e-05, + "loss": 0.8348, + "step": 34940 + }, + { + "epoch": 3.21, + "learning_rate": 3.393398915142043e-05, + "loss": 0.8151, + "step": 34950 + }, + { + "epoch": 3.21, + "learning_rate": 3.3929392295669765e-05, + "loss": 0.8842, + "step": 34960 + }, + { + "epoch": 3.22, + "learning_rate": 3.39247954399191e-05, + "loss": 0.7975, + "step": 34970 + }, + { + "epoch": 3.22, + "learning_rate": 3.392019858416843e-05, + "loss": 0.9039, + "step": 34980 + }, + { + "epoch": 3.22, + "learning_rate": 3.391560172841776e-05, + "loss": 0.8564, + "step": 34990 + }, + { + "epoch": 3.22, + "learning_rate": 3.39110048726671e-05, + "loss": 0.8602, + "step": 35000 + }, + { + "epoch": 3.22, + "eval_accuracy": 0.5812227074235807, + "eval_loss": 0.8809640407562256, + "eval_runtime": 159.4592, + "eval_samples_per_second": 28.722, + "eval_steps_per_second": 3.593, + "step": 35000 + }, + { + "epoch": 3.22, + "learning_rate": 3.390640801691643e-05, + "loss": 0.7987, + "step": 35010 + }, + { + "epoch": 3.22, + "learning_rate": 3.390181116116576e-05, + "loss": 0.768, + "step": 35020 + }, + { + "epoch": 3.22, + "learning_rate": 3.38972143054151e-05, + "loss": 0.9813, + "step": 35030 + }, + { + "epoch": 3.22, + "learning_rate": 3.389261744966443e-05, + "loss": 0.91, + "step": 35040 + }, + { + "epoch": 3.22, + "learning_rate": 3.3888020593913766e-05, + "loss": 1.0066, + "step": 35050 + }, + { + "epoch": 3.22, + "learning_rate": 3.38834237381631e-05, + "loss": 0.8476, + "step": 35060 + }, + { + "epoch": 3.22, + "learning_rate": 3.387882688241243e-05, + "loss": 0.8752, + "step": 35070 + }, + { + "epoch": 3.23, + "learning_rate": 3.387423002666176e-05, + "loss": 0.9049, + "step": 35080 + }, + { + "epoch": 3.23, + "learning_rate": 3.38696331709111e-05, + "loss": 0.9203, + "step": 35090 + }, + { + "epoch": 3.23, + "learning_rate": 3.386503631516043e-05, + "loss": 0.9157, + "step": 35100 + }, + { + "epoch": 3.23, + "learning_rate": 3.3860439459409764e-05, + "loss": 0.8949, + "step": 35110 + }, + { + "epoch": 3.23, + "learning_rate": 3.38558426036591e-05, + "loss": 0.8332, + "step": 35120 + }, + { + "epoch": 3.23, + "learning_rate": 3.385124574790843e-05, + "loss": 0.8988, + "step": 35130 + }, + { + "epoch": 3.23, + "learning_rate": 3.384664889215777e-05, + "loss": 0.8319, + "step": 35140 + }, + { + "epoch": 3.23, + "learning_rate": 3.3842052036407104e-05, + "loss": 0.8825, + "step": 35150 + }, + { + "epoch": 3.23, + "learning_rate": 3.383745518065643e-05, + "loss": 0.8962, + "step": 35160 + }, + { + "epoch": 3.23, + "learning_rate": 3.383285832490576e-05, + "loss": 0.8713, + "step": 35170 + }, + { + "epoch": 3.23, + "learning_rate": 3.38282614691551e-05, + "loss": 0.8424, + "step": 35180 + }, + { + "epoch": 3.24, + "learning_rate": 3.382366461340443e-05, + "loss": 0.9886, + "step": 35190 + }, + { + "epoch": 3.24, + "learning_rate": 3.3819067757653766e-05, + "loss": 0.8718, + "step": 35200 + }, + { + "epoch": 3.24, + "learning_rate": 3.38144709019031e-05, + "loss": 0.8977, + "step": 35210 + }, + { + "epoch": 3.24, + "learning_rate": 3.380987404615243e-05, + "loss": 0.8814, + "step": 35220 + }, + { + "epoch": 3.24, + "learning_rate": 3.380527719040177e-05, + "loss": 0.8163, + "step": 35230 + }, + { + "epoch": 3.24, + "learning_rate": 3.3800680334651105e-05, + "loss": 0.838, + "step": 35240 + }, + { + "epoch": 3.24, + "learning_rate": 3.379608347890043e-05, + "loss": 0.9469, + "step": 35250 + }, + { + "epoch": 3.24, + "learning_rate": 3.3791486623149764e-05, + "loss": 0.8695, + "step": 35260 + }, + { + "epoch": 3.24, + "learning_rate": 3.37868897673991e-05, + "loss": 0.944, + "step": 35270 + }, + { + "epoch": 3.24, + "learning_rate": 3.378229291164843e-05, + "loss": 0.8787, + "step": 35280 + }, + { + "epoch": 3.24, + "learning_rate": 3.377769605589777e-05, + "loss": 0.8329, + "step": 35290 + }, + { + "epoch": 3.25, + "learning_rate": 3.37730992001471e-05, + "loss": 0.9235, + "step": 35300 + }, + { + "epoch": 3.25, + "learning_rate": 3.376850234439643e-05, + "loss": 0.8671, + "step": 35310 + }, + { + "epoch": 3.25, + "learning_rate": 3.376390548864577e-05, + "loss": 0.9332, + "step": 35320 + }, + { + "epoch": 3.25, + "learning_rate": 3.3759308632895106e-05, + "loss": 0.9524, + "step": 35330 + }, + { + "epoch": 3.25, + "learning_rate": 3.375471177714443e-05, + "loss": 0.8416, + "step": 35340 + }, + { + "epoch": 3.25, + "learning_rate": 3.3750114921393765e-05, + "loss": 0.9621, + "step": 35350 + }, + { + "epoch": 3.25, + "learning_rate": 3.37455180656431e-05, + "loss": 0.8339, + "step": 35360 + }, + { + "epoch": 3.25, + "learning_rate": 3.374092120989243e-05, + "loss": 0.9628, + "step": 35370 + }, + { + "epoch": 3.25, + "learning_rate": 3.373632435414177e-05, + "loss": 0.877, + "step": 35380 + }, + { + "epoch": 3.25, + "learning_rate": 3.3731727498391104e-05, + "loss": 0.9715, + "step": 35390 + }, + { + "epoch": 3.25, + "learning_rate": 3.3727130642640434e-05, + "loss": 0.9656, + "step": 35400 + }, + { + "epoch": 3.26, + "learning_rate": 3.372253378688977e-05, + "loss": 0.881, + "step": 35410 + }, + { + "epoch": 3.26, + "learning_rate": 3.371793693113911e-05, + "loss": 0.7468, + "step": 35420 + }, + { + "epoch": 3.26, + "learning_rate": 3.371334007538843e-05, + "loss": 0.9173, + "step": 35430 + }, + { + "epoch": 3.26, + "learning_rate": 3.3708743219637767e-05, + "loss": 0.7676, + "step": 35440 + }, + { + "epoch": 3.26, + "learning_rate": 3.37041463638871e-05, + "loss": 0.9244, + "step": 35450 + }, + { + "epoch": 3.26, + "learning_rate": 3.369954950813643e-05, + "loss": 0.8826, + "step": 35460 + }, + { + "epoch": 3.26, + "learning_rate": 3.369495265238577e-05, + "loss": 0.9039, + "step": 35470 + }, + { + "epoch": 3.26, + "learning_rate": 3.3690355796635106e-05, + "loss": 0.827, + "step": 35480 + }, + { + "epoch": 3.26, + "learning_rate": 3.3685758940884435e-05, + "loss": 0.9102, + "step": 35490 + }, + { + "epoch": 3.26, + "learning_rate": 3.368116208513377e-05, + "loss": 0.9399, + "step": 35500 + }, + { + "epoch": 3.26, + "learning_rate": 3.367656522938311e-05, + "loss": 0.9373, + "step": 35510 + }, + { + "epoch": 3.27, + "learning_rate": 3.367196837363243e-05, + "loss": 0.8053, + "step": 35520 + }, + { + "epoch": 3.27, + "learning_rate": 3.366737151788177e-05, + "loss": 0.9292, + "step": 35530 + }, + { + "epoch": 3.27, + "learning_rate": 3.3662774662131104e-05, + "loss": 0.8045, + "step": 35540 + }, + { + "epoch": 3.27, + "learning_rate": 3.3658177806380434e-05, + "loss": 0.9666, + "step": 35550 + }, + { + "epoch": 3.27, + "learning_rate": 3.365358095062977e-05, + "loss": 0.7391, + "step": 35560 + }, + { + "epoch": 3.27, + "learning_rate": 3.364898409487911e-05, + "loss": 0.8733, + "step": 35570 + }, + { + "epoch": 3.27, + "learning_rate": 3.3644387239128437e-05, + "loss": 0.8671, + "step": 35580 + }, + { + "epoch": 3.27, + "learning_rate": 3.363979038337777e-05, + "loss": 0.7993, + "step": 35590 + }, + { + "epoch": 3.27, + "learning_rate": 3.363519352762711e-05, + "loss": 0.8466, + "step": 35600 + }, + { + "epoch": 3.27, + "learning_rate": 3.363059667187643e-05, + "loss": 0.913, + "step": 35610 + }, + { + "epoch": 3.27, + "learning_rate": 3.362599981612577e-05, + "loss": 0.993, + "step": 35620 + }, + { + "epoch": 3.28, + "learning_rate": 3.3621402960375105e-05, + "loss": 0.8669, + "step": 35630 + }, + { + "epoch": 3.28, + "learning_rate": 3.3616806104624435e-05, + "loss": 0.8559, + "step": 35640 + }, + { + "epoch": 3.28, + "learning_rate": 3.361220924887377e-05, + "loss": 0.8669, + "step": 35650 + }, + { + "epoch": 3.28, + "learning_rate": 3.360761239312311e-05, + "loss": 0.9047, + "step": 35660 + }, + { + "epoch": 3.28, + "learning_rate": 3.360301553737244e-05, + "loss": 0.7842, + "step": 35670 + }, + { + "epoch": 3.28, + "learning_rate": 3.3598418681621774e-05, + "loss": 1.0393, + "step": 35680 + }, + { + "epoch": 3.28, + "learning_rate": 3.359382182587111e-05, + "loss": 0.8198, + "step": 35690 + }, + { + "epoch": 3.28, + "learning_rate": 3.358922497012044e-05, + "loss": 0.9653, + "step": 35700 + }, + { + "epoch": 3.28, + "learning_rate": 3.358462811436977e-05, + "loss": 1.1026, + "step": 35710 + }, + { + "epoch": 3.28, + "learning_rate": 3.3580031258619107e-05, + "loss": 0.7926, + "step": 35720 + }, + { + "epoch": 3.28, + "learning_rate": 3.3575434402868436e-05, + "loss": 0.9226, + "step": 35730 + }, + { + "epoch": 3.29, + "learning_rate": 3.357083754711777e-05, + "loss": 0.8428, + "step": 35740 + }, + { + "epoch": 3.29, + "learning_rate": 3.356624069136711e-05, + "loss": 0.8515, + "step": 35750 + }, + { + "epoch": 3.29, + "learning_rate": 3.356164383561644e-05, + "loss": 0.9077, + "step": 35760 + }, + { + "epoch": 3.29, + "learning_rate": 3.3557046979865775e-05, + "loss": 0.7909, + "step": 35770 + }, + { + "epoch": 3.29, + "learning_rate": 3.355245012411511e-05, + "loss": 0.8438, + "step": 35780 + }, + { + "epoch": 3.29, + "learning_rate": 3.354785326836444e-05, + "loss": 0.8066, + "step": 35790 + }, + { + "epoch": 3.29, + "learning_rate": 3.354325641261377e-05, + "loss": 0.9167, + "step": 35800 + }, + { + "epoch": 3.29, + "learning_rate": 3.353865955686311e-05, + "loss": 1.0451, + "step": 35810 + }, + { + "epoch": 3.29, + "learning_rate": 3.353406270111244e-05, + "loss": 0.9119, + "step": 35820 + }, + { + "epoch": 3.29, + "learning_rate": 3.3529465845361774e-05, + "loss": 0.9137, + "step": 35830 + }, + { + "epoch": 3.3, + "learning_rate": 3.352486898961111e-05, + "loss": 0.9197, + "step": 35840 + }, + { + "epoch": 3.3, + "learning_rate": 3.352027213386044e-05, + "loss": 0.8213, + "step": 35850 + }, + { + "epoch": 3.3, + "learning_rate": 3.351567527810978e-05, + "loss": 0.7945, + "step": 35860 + }, + { + "epoch": 3.3, + "learning_rate": 3.351107842235911e-05, + "loss": 0.9421, + "step": 35870 + }, + { + "epoch": 3.3, + "learning_rate": 3.350648156660844e-05, + "loss": 0.8511, + "step": 35880 + }, + { + "epoch": 3.3, + "learning_rate": 3.350188471085777e-05, + "loss": 0.9058, + "step": 35890 + }, + { + "epoch": 3.3, + "learning_rate": 3.349728785510711e-05, + "loss": 0.9864, + "step": 35900 + }, + { + "epoch": 3.3, + "learning_rate": 3.349269099935644e-05, + "loss": 0.9045, + "step": 35910 + }, + { + "epoch": 3.3, + "learning_rate": 3.3488094143605775e-05, + "loss": 0.7825, + "step": 35920 + }, + { + "epoch": 3.3, + "learning_rate": 3.348349728785511e-05, + "loss": 1.0111, + "step": 35930 + }, + { + "epoch": 3.3, + "learning_rate": 3.347890043210444e-05, + "loss": 0.9093, + "step": 35940 + }, + { + "epoch": 3.31, + "learning_rate": 3.347430357635378e-05, + "loss": 0.8687, + "step": 35950 + }, + { + "epoch": 3.31, + "learning_rate": 3.3469706720603114e-05, + "loss": 0.8731, + "step": 35960 + }, + { + "epoch": 3.31, + "learning_rate": 3.3465109864852444e-05, + "loss": 0.8436, + "step": 35970 + }, + { + "epoch": 3.31, + "learning_rate": 3.3460513009101774e-05, + "loss": 0.9515, + "step": 35980 + }, + { + "epoch": 3.31, + "learning_rate": 3.345591615335111e-05, + "loss": 0.8847, + "step": 35990 + }, + { + "epoch": 3.31, + "learning_rate": 3.345131929760044e-05, + "loss": 0.9414, + "step": 36000 + }, + { + "epoch": 3.31, + "learning_rate": 3.3446722441849776e-05, + "loss": 0.7895, + "step": 36010 + }, + { + "epoch": 3.31, + "learning_rate": 3.344212558609911e-05, + "loss": 0.9047, + "step": 36020 + }, + { + "epoch": 3.31, + "learning_rate": 3.343752873034844e-05, + "loss": 0.8892, + "step": 36030 + }, + { + "epoch": 3.31, + "learning_rate": 3.343293187459778e-05, + "loss": 0.9091, + "step": 36040 + }, + { + "epoch": 3.31, + "learning_rate": 3.3428335018847115e-05, + "loss": 0.8406, + "step": 36050 + }, + { + "epoch": 3.32, + "learning_rate": 3.3423738163096445e-05, + "loss": 0.9807, + "step": 36060 + }, + { + "epoch": 3.32, + "learning_rate": 3.3419141307345775e-05, + "loss": 0.8558, + "step": 36070 + }, + { + "epoch": 3.32, + "learning_rate": 3.341454445159511e-05, + "loss": 0.8092, + "step": 36080 + }, + { + "epoch": 3.32, + "learning_rate": 3.340994759584444e-05, + "loss": 0.8839, + "step": 36090 + }, + { + "epoch": 3.32, + "learning_rate": 3.340535074009378e-05, + "loss": 1.0304, + "step": 36100 + }, + { + "epoch": 3.32, + "learning_rate": 3.3400753884343114e-05, + "loss": 0.9681, + "step": 36110 + }, + { + "epoch": 3.32, + "learning_rate": 3.3396157028592444e-05, + "loss": 0.8815, + "step": 36120 + }, + { + "epoch": 3.32, + "learning_rate": 3.339156017284178e-05, + "loss": 0.8635, + "step": 36130 + }, + { + "epoch": 3.32, + "learning_rate": 3.338696331709112e-05, + "loss": 1.0072, + "step": 36140 + }, + { + "epoch": 3.32, + "learning_rate": 3.3382366461340446e-05, + "loss": 0.925, + "step": 36150 + }, + { + "epoch": 3.32, + "learning_rate": 3.3377769605589776e-05, + "loss": 0.7863, + "step": 36160 + }, + { + "epoch": 3.33, + "learning_rate": 3.337317274983911e-05, + "loss": 0.8686, + "step": 36170 + }, + { + "epoch": 3.33, + "learning_rate": 3.336857589408844e-05, + "loss": 0.8808, + "step": 36180 + }, + { + "epoch": 3.33, + "learning_rate": 3.336397903833778e-05, + "loss": 0.8699, + "step": 36190 + }, + { + "epoch": 3.33, + "learning_rate": 3.3359382182587115e-05, + "loss": 0.8704, + "step": 36200 + }, + { + "epoch": 3.33, + "learning_rate": 3.3354785326836445e-05, + "loss": 0.7814, + "step": 36210 + }, + { + "epoch": 3.33, + "learning_rate": 3.335018847108578e-05, + "loss": 0.9641, + "step": 36220 + }, + { + "epoch": 3.33, + "learning_rate": 3.334559161533511e-05, + "loss": 0.9732, + "step": 36230 + }, + { + "epoch": 3.33, + "learning_rate": 3.334099475958445e-05, + "loss": 0.8606, + "step": 36240 + }, + { + "epoch": 3.33, + "learning_rate": 3.333639790383378e-05, + "loss": 0.8566, + "step": 36250 + }, + { + "epoch": 3.33, + "learning_rate": 3.3331801048083114e-05, + "loss": 0.7959, + "step": 36260 + }, + { + "epoch": 3.33, + "learning_rate": 3.3327204192332443e-05, + "loss": 0.8075, + "step": 36270 + }, + { + "epoch": 3.34, + "learning_rate": 3.332260733658178e-05, + "loss": 0.9688, + "step": 36280 + }, + { + "epoch": 3.34, + "learning_rate": 3.3318010480831116e-05, + "loss": 0.9285, + "step": 36290 + }, + { + "epoch": 3.34, + "learning_rate": 3.3313413625080446e-05, + "loss": 0.8178, + "step": 36300 + }, + { + "epoch": 3.34, + "learning_rate": 3.330881676932978e-05, + "loss": 0.9312, + "step": 36310 + }, + { + "epoch": 3.34, + "learning_rate": 3.330421991357911e-05, + "loss": 0.7951, + "step": 36320 + }, + { + "epoch": 3.34, + "learning_rate": 3.329962305782845e-05, + "loss": 0.7111, + "step": 36330 + }, + { + "epoch": 3.34, + "learning_rate": 3.329502620207778e-05, + "loss": 0.8959, + "step": 36340 + }, + { + "epoch": 3.34, + "learning_rate": 3.3290429346327115e-05, + "loss": 0.8613, + "step": 36350 + }, + { + "epoch": 3.34, + "learning_rate": 3.3285832490576445e-05, + "loss": 0.8583, + "step": 36360 + }, + { + "epoch": 3.34, + "learning_rate": 3.328123563482578e-05, + "loss": 0.8979, + "step": 36370 + }, + { + "epoch": 3.34, + "learning_rate": 3.327663877907512e-05, + "loss": 0.7978, + "step": 36380 + }, + { + "epoch": 3.35, + "learning_rate": 3.327204192332445e-05, + "loss": 0.802, + "step": 36390 + }, + { + "epoch": 3.35, + "learning_rate": 3.3267445067573784e-05, + "loss": 0.8505, + "step": 36400 + }, + { + "epoch": 3.35, + "learning_rate": 3.3262848211823113e-05, + "loss": 0.9354, + "step": 36410 + }, + { + "epoch": 3.35, + "learning_rate": 3.325825135607245e-05, + "loss": 0.8736, + "step": 36420 + }, + { + "epoch": 3.35, + "learning_rate": 3.325365450032178e-05, + "loss": 0.8526, + "step": 36430 + }, + { + "epoch": 3.35, + "learning_rate": 3.3249057644571116e-05, + "loss": 0.8188, + "step": 36440 + }, + { + "epoch": 3.35, + "learning_rate": 3.3244460788820446e-05, + "loss": 0.8639, + "step": 36450 + }, + { + "epoch": 3.35, + "learning_rate": 3.323986393306978e-05, + "loss": 0.9014, + "step": 36460 + }, + { + "epoch": 3.35, + "learning_rate": 3.323526707731912e-05, + "loss": 0.8125, + "step": 36470 + }, + { + "epoch": 3.35, + "learning_rate": 3.323067022156845e-05, + "loss": 0.8564, + "step": 36480 + }, + { + "epoch": 3.35, + "learning_rate": 3.3226073365817785e-05, + "loss": 0.9903, + "step": 36490 + }, + { + "epoch": 3.36, + "learning_rate": 3.3221476510067115e-05, + "loss": 0.8932, + "step": 36500 + }, + { + "epoch": 3.36, + "learning_rate": 3.321687965431645e-05, + "loss": 0.8649, + "step": 36510 + }, + { + "epoch": 3.36, + "learning_rate": 3.321228279856578e-05, + "loss": 0.9021, + "step": 36520 + }, + { + "epoch": 3.36, + "learning_rate": 3.320768594281512e-05, + "loss": 0.8525, + "step": 36530 + }, + { + "epoch": 3.36, + "learning_rate": 3.320308908706445e-05, + "loss": 0.9203, + "step": 36540 + }, + { + "epoch": 3.36, + "learning_rate": 3.3198492231313784e-05, + "loss": 0.8906, + "step": 36550 + }, + { + "epoch": 3.36, + "learning_rate": 3.319389537556311e-05, + "loss": 0.9099, + "step": 36560 + }, + { + "epoch": 3.36, + "learning_rate": 3.318929851981245e-05, + "loss": 0.8686, + "step": 36570 + }, + { + "epoch": 3.36, + "learning_rate": 3.3184701664061786e-05, + "loss": 1.0292, + "step": 36580 + }, + { + "epoch": 3.36, + "learning_rate": 3.3180104808311116e-05, + "loss": 0.8747, + "step": 36590 + }, + { + "epoch": 3.36, + "learning_rate": 3.317550795256045e-05, + "loss": 0.8584, + "step": 36600 + }, + { + "epoch": 3.37, + "learning_rate": 3.317091109680978e-05, + "loss": 0.8519, + "step": 36610 + }, + { + "epoch": 3.37, + "learning_rate": 3.316631424105912e-05, + "loss": 0.9149, + "step": 36620 + }, + { + "epoch": 3.37, + "learning_rate": 3.316171738530845e-05, + "loss": 0.8592, + "step": 36630 + }, + { + "epoch": 3.37, + "learning_rate": 3.3157120529557785e-05, + "loss": 0.8492, + "step": 36640 + }, + { + "epoch": 3.37, + "learning_rate": 3.3152523673807114e-05, + "loss": 0.8362, + "step": 36650 + }, + { + "epoch": 3.37, + "learning_rate": 3.314792681805645e-05, + "loss": 0.8104, + "step": 36660 + }, + { + "epoch": 3.37, + "learning_rate": 3.314332996230579e-05, + "loss": 0.8128, + "step": 36670 + }, + { + "epoch": 3.37, + "learning_rate": 3.313873310655512e-05, + "loss": 0.8348, + "step": 36680 + }, + { + "epoch": 3.37, + "learning_rate": 3.3134136250804454e-05, + "loss": 0.8807, + "step": 36690 + }, + { + "epoch": 3.37, + "learning_rate": 3.312953939505378e-05, + "loss": 0.8559, + "step": 36700 + }, + { + "epoch": 3.38, + "learning_rate": 3.312494253930312e-05, + "loss": 0.9648, + "step": 36710 + }, + { + "epoch": 3.38, + "learning_rate": 3.312034568355245e-05, + "loss": 0.8726, + "step": 36720 + }, + { + "epoch": 3.38, + "learning_rate": 3.3115748827801786e-05, + "loss": 0.8292, + "step": 36730 + }, + { + "epoch": 3.38, + "learning_rate": 3.3111151972051116e-05, + "loss": 1.0946, + "step": 36740 + }, + { + "epoch": 3.38, + "learning_rate": 3.310655511630045e-05, + "loss": 0.8669, + "step": 36750 + }, + { + "epoch": 3.38, + "learning_rate": 3.310195826054979e-05, + "loss": 0.9603, + "step": 36760 + }, + { + "epoch": 3.38, + "learning_rate": 3.309736140479912e-05, + "loss": 0.9598, + "step": 36770 + }, + { + "epoch": 3.38, + "learning_rate": 3.3092764549048455e-05, + "loss": 0.778, + "step": 36780 + }, + { + "epoch": 3.38, + "learning_rate": 3.3088167693297784e-05, + "loss": 0.9678, + "step": 36790 + }, + { + "epoch": 3.38, + "learning_rate": 3.308357083754712e-05, + "loss": 0.8948, + "step": 36800 + }, + { + "epoch": 3.38, + "learning_rate": 3.307897398179645e-05, + "loss": 0.8723, + "step": 36810 + }, + { + "epoch": 3.39, + "learning_rate": 3.307437712604579e-05, + "loss": 0.7964, + "step": 36820 + }, + { + "epoch": 3.39, + "learning_rate": 3.306978027029512e-05, + "loss": 0.8847, + "step": 36830 + }, + { + "epoch": 3.39, + "learning_rate": 3.306518341454445e-05, + "loss": 0.9146, + "step": 36840 + }, + { + "epoch": 3.39, + "learning_rate": 3.306058655879379e-05, + "loss": 0.9829, + "step": 36850 + }, + { + "epoch": 3.39, + "learning_rate": 3.305598970304312e-05, + "loss": 1.044, + "step": 36860 + }, + { + "epoch": 3.39, + "learning_rate": 3.3051392847292456e-05, + "loss": 0.9265, + "step": 36870 + }, + { + "epoch": 3.39, + "learning_rate": 3.3046795991541786e-05, + "loss": 0.8669, + "step": 36880 + }, + { + "epoch": 3.39, + "learning_rate": 3.3042199135791115e-05, + "loss": 0.8347, + "step": 36890 + }, + { + "epoch": 3.39, + "learning_rate": 3.303760228004045e-05, + "loss": 0.9204, + "step": 36900 + }, + { + "epoch": 3.39, + "learning_rate": 3.303300542428979e-05, + "loss": 1.0002, + "step": 36910 + }, + { + "epoch": 3.39, + "learning_rate": 3.302840856853912e-05, + "loss": 0.9019, + "step": 36920 + }, + { + "epoch": 3.4, + "learning_rate": 3.3023811712788454e-05, + "loss": 0.9164, + "step": 36930 + }, + { + "epoch": 3.4, + "learning_rate": 3.301921485703779e-05, + "loss": 0.9361, + "step": 36940 + }, + { + "epoch": 3.4, + "learning_rate": 3.301461800128712e-05, + "loss": 0.9598, + "step": 36950 + }, + { + "epoch": 3.4, + "learning_rate": 3.301002114553646e-05, + "loss": 0.9448, + "step": 36960 + }, + { + "epoch": 3.4, + "learning_rate": 3.300542428978579e-05, + "loss": 0.8678, + "step": 36970 + }, + { + "epoch": 3.4, + "learning_rate": 3.3000827434035117e-05, + "loss": 0.8453, + "step": 36980 + }, + { + "epoch": 3.4, + "learning_rate": 3.299623057828445e-05, + "loss": 0.7808, + "step": 36990 + }, + { + "epoch": 3.4, + "learning_rate": 3.299163372253379e-05, + "loss": 1.0593, + "step": 37000 + }, + { + "epoch": 3.4, + "learning_rate": 3.298703686678312e-05, + "loss": 0.8958, + "step": 37010 + }, + { + "epoch": 3.4, + "learning_rate": 3.2982440011032456e-05, + "loss": 0.896, + "step": 37020 + }, + { + "epoch": 3.4, + "learning_rate": 3.297784315528179e-05, + "loss": 0.7152, + "step": 37030 + }, + { + "epoch": 3.41, + "learning_rate": 3.297324629953112e-05, + "loss": 0.8523, + "step": 37040 + }, + { + "epoch": 3.41, + "learning_rate": 3.296864944378046e-05, + "loss": 0.7689, + "step": 37050 + }, + { + "epoch": 3.41, + "learning_rate": 3.296405258802979e-05, + "loss": 0.8885, + "step": 37060 + }, + { + "epoch": 3.41, + "learning_rate": 3.295945573227912e-05, + "loss": 0.8705, + "step": 37070 + }, + { + "epoch": 3.41, + "learning_rate": 3.2954858876528454e-05, + "loss": 1.0037, + "step": 37080 + }, + { + "epoch": 3.41, + "learning_rate": 3.295026202077779e-05, + "loss": 0.9165, + "step": 37090 + }, + { + "epoch": 3.41, + "learning_rate": 3.294566516502712e-05, + "loss": 0.8806, + "step": 37100 + }, + { + "epoch": 3.41, + "learning_rate": 3.294106830927646e-05, + "loss": 0.9214, + "step": 37110 + }, + { + "epoch": 3.41, + "learning_rate": 3.293647145352579e-05, + "loss": 0.7886, + "step": 37120 + }, + { + "epoch": 3.41, + "learning_rate": 3.293187459777512e-05, + "loss": 0.8393, + "step": 37130 + }, + { + "epoch": 3.41, + "learning_rate": 3.292727774202446e-05, + "loss": 0.9017, + "step": 37140 + }, + { + "epoch": 3.42, + "learning_rate": 3.292268088627379e-05, + "loss": 0.9121, + "step": 37150 + }, + { + "epoch": 3.42, + "learning_rate": 3.291808403052312e-05, + "loss": 1.052, + "step": 37160 + }, + { + "epoch": 3.42, + "learning_rate": 3.2913487174772455e-05, + "loss": 0.8703, + "step": 37170 + }, + { + "epoch": 3.42, + "learning_rate": 3.290889031902179e-05, + "loss": 0.8159, + "step": 37180 + }, + { + "epoch": 3.42, + "learning_rate": 3.290429346327112e-05, + "loss": 0.8253, + "step": 37190 + }, + { + "epoch": 3.42, + "learning_rate": 3.289969660752046e-05, + "loss": 0.9078, + "step": 37200 + }, + { + "epoch": 3.42, + "learning_rate": 3.2895099751769795e-05, + "loss": 0.9397, + "step": 37210 + }, + { + "epoch": 3.42, + "learning_rate": 3.2890502896019124e-05, + "loss": 0.9293, + "step": 37220 + }, + { + "epoch": 3.42, + "learning_rate": 3.288590604026846e-05, + "loss": 0.827, + "step": 37230 + }, + { + "epoch": 3.42, + "learning_rate": 3.288130918451779e-05, + "loss": 0.9028, + "step": 37240 + }, + { + "epoch": 3.42, + "learning_rate": 3.287671232876712e-05, + "loss": 1.0455, + "step": 37250 + }, + { + "epoch": 3.43, + "learning_rate": 3.2872115473016457e-05, + "loss": 1.0202, + "step": 37260 + }, + { + "epoch": 3.43, + "learning_rate": 3.286751861726579e-05, + "loss": 0.8092, + "step": 37270 + }, + { + "epoch": 3.43, + "learning_rate": 3.286292176151512e-05, + "loss": 0.9324, + "step": 37280 + }, + { + "epoch": 3.43, + "learning_rate": 3.285832490576446e-05, + "loss": 0.7294, + "step": 37290 + }, + { + "epoch": 3.43, + "learning_rate": 3.2853728050013796e-05, + "loss": 0.8586, + "step": 37300 + }, + { + "epoch": 3.43, + "learning_rate": 3.2849131194263125e-05, + "loss": 0.9207, + "step": 37310 + }, + { + "epoch": 3.43, + "learning_rate": 3.284453433851246e-05, + "loss": 0.844, + "step": 37320 + }, + { + "epoch": 3.43, + "learning_rate": 3.283993748276179e-05, + "loss": 0.9576, + "step": 37330 + }, + { + "epoch": 3.43, + "learning_rate": 3.283534062701112e-05, + "loss": 1.0501, + "step": 37340 + }, + { + "epoch": 3.43, + "learning_rate": 3.283074377126046e-05, + "loss": 0.8832, + "step": 37350 + }, + { + "epoch": 3.43, + "learning_rate": 3.2826146915509794e-05, + "loss": 0.9086, + "step": 37360 + }, + { + "epoch": 3.44, + "learning_rate": 3.2821550059759124e-05, + "loss": 0.8788, + "step": 37370 + }, + { + "epoch": 3.44, + "learning_rate": 3.281695320400846e-05, + "loss": 0.8528, + "step": 37380 + }, + { + "epoch": 3.44, + "learning_rate": 3.28123563482578e-05, + "loss": 0.8061, + "step": 37390 + }, + { + "epoch": 3.44, + "learning_rate": 3.280775949250713e-05, + "loss": 0.9123, + "step": 37400 + }, + { + "epoch": 3.44, + "learning_rate": 3.280316263675646e-05, + "loss": 0.9123, + "step": 37410 + }, + { + "epoch": 3.44, + "learning_rate": 3.279856578100579e-05, + "loss": 0.9227, + "step": 37420 + }, + { + "epoch": 3.44, + "learning_rate": 3.279396892525512e-05, + "loss": 0.9081, + "step": 37430 + }, + { + "epoch": 3.44, + "learning_rate": 3.278937206950446e-05, + "loss": 0.9125, + "step": 37440 + }, + { + "epoch": 3.44, + "learning_rate": 3.2784775213753795e-05, + "loss": 0.7992, + "step": 37450 + }, + { + "epoch": 3.44, + "learning_rate": 3.2780178358003125e-05, + "loss": 0.7881, + "step": 37460 + }, + { + "epoch": 3.44, + "learning_rate": 3.277558150225246e-05, + "loss": 0.8513, + "step": 37470 + }, + { + "epoch": 3.45, + "learning_rate": 3.27709846465018e-05, + "loss": 0.8091, + "step": 37480 + }, + { + "epoch": 3.45, + "learning_rate": 3.276638779075113e-05, + "loss": 0.9469, + "step": 37490 + }, + { + "epoch": 3.45, + "learning_rate": 3.2761790935000464e-05, + "loss": 0.8446, + "step": 37500 + }, + { + "epoch": 3.45, + "learning_rate": 3.2757194079249794e-05, + "loss": 0.8915, + "step": 37510 + }, + { + "epoch": 3.45, + "learning_rate": 3.2752597223499124e-05, + "loss": 0.9035, + "step": 37520 + }, + { + "epoch": 3.45, + "learning_rate": 3.274800036774846e-05, + "loss": 0.9351, + "step": 37530 + }, + { + "epoch": 3.45, + "learning_rate": 3.27434035119978e-05, + "loss": 0.914, + "step": 37540 + }, + { + "epoch": 3.45, + "learning_rate": 3.2738806656247126e-05, + "loss": 0.8497, + "step": 37550 + }, + { + "epoch": 3.45, + "learning_rate": 3.273420980049646e-05, + "loss": 0.7977, + "step": 37560 + }, + { + "epoch": 3.45, + "learning_rate": 3.27296129447458e-05, + "loss": 0.9454, + "step": 37570 + }, + { + "epoch": 3.45, + "learning_rate": 3.272501608899513e-05, + "loss": 0.8863, + "step": 37580 + }, + { + "epoch": 3.46, + "learning_rate": 3.2720419233244466e-05, + "loss": 0.8454, + "step": 37590 + }, + { + "epoch": 3.46, + "learning_rate": 3.2715822377493795e-05, + "loss": 0.9411, + "step": 37600 + }, + { + "epoch": 3.46, + "learning_rate": 3.2711225521743125e-05, + "loss": 0.8989, + "step": 37610 + }, + { + "epoch": 3.46, + "learning_rate": 3.270662866599246e-05, + "loss": 0.9459, + "step": 37620 + }, + { + "epoch": 3.46, + "learning_rate": 3.27020318102418e-05, + "loss": 0.8186, + "step": 37630 + }, + { + "epoch": 3.46, + "learning_rate": 3.269743495449113e-05, + "loss": 0.9418, + "step": 37640 + }, + { + "epoch": 3.46, + "learning_rate": 3.2692838098740464e-05, + "loss": 0.681, + "step": 37650 + }, + { + "epoch": 3.46, + "learning_rate": 3.26882412429898e-05, + "loss": 0.8827, + "step": 37660 + }, + { + "epoch": 3.46, + "learning_rate": 3.268364438723913e-05, + "loss": 0.9151, + "step": 37670 + }, + { + "epoch": 3.46, + "learning_rate": 3.267904753148847e-05, + "loss": 0.763, + "step": 37680 + }, + { + "epoch": 3.47, + "learning_rate": 3.2674450675737796e-05, + "loss": 0.8708, + "step": 37690 + }, + { + "epoch": 3.47, + "learning_rate": 3.2669853819987126e-05, + "loss": 0.8471, + "step": 37700 + }, + { + "epoch": 3.47, + "learning_rate": 3.266525696423646e-05, + "loss": 0.8583, + "step": 37710 + }, + { + "epoch": 3.47, + "learning_rate": 3.26606601084858e-05, + "loss": 0.8899, + "step": 37720 + }, + { + "epoch": 3.47, + "learning_rate": 3.265606325273513e-05, + "loss": 0.781, + "step": 37730 + }, + { + "epoch": 3.47, + "learning_rate": 3.2651466396984465e-05, + "loss": 0.9328, + "step": 37740 + }, + { + "epoch": 3.47, + "learning_rate": 3.26468695412338e-05, + "loss": 0.9911, + "step": 37750 + }, + { + "epoch": 3.47, + "learning_rate": 3.264227268548313e-05, + "loss": 0.9712, + "step": 37760 + }, + { + "epoch": 3.47, + "learning_rate": 3.263767582973247e-05, + "loss": 0.8592, + "step": 37770 + }, + { + "epoch": 3.47, + "learning_rate": 3.26330789739818e-05, + "loss": 0.8645, + "step": 37780 + }, + { + "epoch": 3.47, + "learning_rate": 3.262848211823113e-05, + "loss": 0.906, + "step": 37790 + }, + { + "epoch": 3.48, + "learning_rate": 3.2623885262480464e-05, + "loss": 0.7746, + "step": 37800 + }, + { + "epoch": 3.48, + "learning_rate": 3.26192884067298e-05, + "loss": 0.8373, + "step": 37810 + }, + { + "epoch": 3.48, + "learning_rate": 3.261469155097913e-05, + "loss": 0.9504, + "step": 37820 + }, + { + "epoch": 3.48, + "learning_rate": 3.2610094695228466e-05, + "loss": 0.8487, + "step": 37830 + }, + { + "epoch": 3.48, + "learning_rate": 3.26054978394778e-05, + "loss": 0.8725, + "step": 37840 + }, + { + "epoch": 3.48, + "learning_rate": 3.260090098372713e-05, + "loss": 0.8452, + "step": 37850 + }, + { + "epoch": 3.48, + "learning_rate": 3.259630412797647e-05, + "loss": 0.8772, + "step": 37860 + }, + { + "epoch": 3.48, + "learning_rate": 3.25917072722258e-05, + "loss": 0.9077, + "step": 37870 + }, + { + "epoch": 3.48, + "learning_rate": 3.258711041647513e-05, + "loss": 0.8437, + "step": 37880 + }, + { + "epoch": 3.48, + "learning_rate": 3.2582513560724465e-05, + "loss": 0.8211, + "step": 37890 + }, + { + "epoch": 3.48, + "learning_rate": 3.25779167049738e-05, + "loss": 0.9532, + "step": 37900 + }, + { + "epoch": 3.49, + "learning_rate": 3.257331984922313e-05, + "loss": 0.8704, + "step": 37910 + }, + { + "epoch": 3.49, + "learning_rate": 3.256872299347247e-05, + "loss": 0.8078, + "step": 37920 + }, + { + "epoch": 3.49, + "learning_rate": 3.2564126137721804e-05, + "loss": 0.8503, + "step": 37930 + }, + { + "epoch": 3.49, + "learning_rate": 3.2559529281971134e-05, + "loss": 0.8316, + "step": 37940 + }, + { + "epoch": 3.49, + "learning_rate": 3.255493242622047e-05, + "loss": 0.9086, + "step": 37950 + }, + { + "epoch": 3.49, + "learning_rate": 3.25503355704698e-05, + "loss": 0.8748, + "step": 37960 + }, + { + "epoch": 3.49, + "learning_rate": 3.254573871471913e-05, + "loss": 0.9292, + "step": 37970 + }, + { + "epoch": 3.49, + "learning_rate": 3.2541141858968466e-05, + "loss": 0.7879, + "step": 37980 + }, + { + "epoch": 3.49, + "learning_rate": 3.25365450032178e-05, + "loss": 0.8707, + "step": 37990 + }, + { + "epoch": 3.49, + "learning_rate": 3.253194814746713e-05, + "loss": 0.9451, + "step": 38000 + }, + { + "epoch": 3.49, + "learning_rate": 3.252735129171647e-05, + "loss": 0.8885, + "step": 38010 + }, + { + "epoch": 3.5, + "learning_rate": 3.2522754435965805e-05, + "loss": 1.0151, + "step": 38020 + }, + { + "epoch": 3.5, + "learning_rate": 3.2518157580215135e-05, + "loss": 0.8495, + "step": 38030 + }, + { + "epoch": 3.5, + "learning_rate": 3.251356072446447e-05, + "loss": 0.7566, + "step": 38040 + }, + { + "epoch": 3.5, + "learning_rate": 3.25089638687138e-05, + "loss": 0.7904, + "step": 38050 + }, + { + "epoch": 3.5, + "learning_rate": 3.250436701296313e-05, + "loss": 0.9226, + "step": 38060 + }, + { + "epoch": 3.5, + "learning_rate": 3.249977015721247e-05, + "loss": 0.7924, + "step": 38070 + }, + { + "epoch": 3.5, + "learning_rate": 3.2495173301461804e-05, + "loss": 0.9719, + "step": 38080 + }, + { + "epoch": 3.5, + "learning_rate": 3.2490576445711134e-05, + "loss": 0.8114, + "step": 38090 + }, + { + "epoch": 3.5, + "learning_rate": 3.248597958996047e-05, + "loss": 0.8696, + "step": 38100 + }, + { + "epoch": 3.5, + "learning_rate": 3.2481382734209806e-05, + "loss": 0.9212, + "step": 38110 + }, + { + "epoch": 3.5, + "learning_rate": 3.2476785878459136e-05, + "loss": 0.788, + "step": 38120 + }, + { + "epoch": 3.51, + "learning_rate": 3.247218902270847e-05, + "loss": 0.8213, + "step": 38130 + }, + { + "epoch": 3.51, + "learning_rate": 3.24675921669578e-05, + "loss": 0.7917, + "step": 38140 + }, + { + "epoch": 3.51, + "learning_rate": 3.246299531120713e-05, + "loss": 0.7881, + "step": 38150 + }, + { + "epoch": 3.51, + "learning_rate": 3.245839845545647e-05, + "loss": 0.8178, + "step": 38160 + }, + { + "epoch": 3.51, + "learning_rate": 3.2453801599705805e-05, + "loss": 0.8202, + "step": 38170 + }, + { + "epoch": 3.51, + "learning_rate": 3.2449204743955135e-05, + "loss": 0.8411, + "step": 38180 + }, + { + "epoch": 3.51, + "learning_rate": 3.244460788820447e-05, + "loss": 0.956, + "step": 38190 + }, + { + "epoch": 3.51, + "learning_rate": 3.24400110324538e-05, + "loss": 0.9721, + "step": 38200 + }, + { + "epoch": 3.51, + "learning_rate": 3.243541417670314e-05, + "loss": 0.9031, + "step": 38210 + }, + { + "epoch": 3.51, + "learning_rate": 3.2430817320952474e-05, + "loss": 0.8784, + "step": 38220 + }, + { + "epoch": 3.51, + "learning_rate": 3.2426220465201804e-05, + "loss": 0.9162, + "step": 38230 + }, + { + "epoch": 3.52, + "learning_rate": 3.242162360945113e-05, + "loss": 0.825, + "step": 38240 + }, + { + "epoch": 3.52, + "learning_rate": 3.241702675370047e-05, + "loss": 0.9369, + "step": 38250 + }, + { + "epoch": 3.52, + "learning_rate": 3.2412429897949806e-05, + "loss": 0.8403, + "step": 38260 + }, + { + "epoch": 3.52, + "learning_rate": 3.2407833042199136e-05, + "loss": 0.8308, + "step": 38270 + }, + { + "epoch": 3.52, + "learning_rate": 3.240323618644847e-05, + "loss": 0.9391, + "step": 38280 + }, + { + "epoch": 3.52, + "learning_rate": 3.23986393306978e-05, + "loss": 0.8427, + "step": 38290 + }, + { + "epoch": 3.52, + "learning_rate": 3.239404247494714e-05, + "loss": 0.9708, + "step": 38300 + }, + { + "epoch": 3.52, + "learning_rate": 3.2389445619196475e-05, + "loss": 0.9392, + "step": 38310 + }, + { + "epoch": 3.52, + "learning_rate": 3.2384848763445805e-05, + "loss": 0.988, + "step": 38320 + }, + { + "epoch": 3.52, + "learning_rate": 3.2380251907695134e-05, + "loss": 0.9211, + "step": 38330 + }, + { + "epoch": 3.52, + "learning_rate": 3.237565505194447e-05, + "loss": 1.0102, + "step": 38340 + }, + { + "epoch": 3.53, + "learning_rate": 3.237105819619381e-05, + "loss": 0.8922, + "step": 38350 + }, + { + "epoch": 3.53, + "learning_rate": 3.236646134044314e-05, + "loss": 0.7884, + "step": 38360 + }, + { + "epoch": 3.53, + "learning_rate": 3.2361864484692474e-05, + "loss": 0.937, + "step": 38370 + }, + { + "epoch": 3.53, + "learning_rate": 3.23572676289418e-05, + "loss": 0.8829, + "step": 38380 + }, + { + "epoch": 3.53, + "learning_rate": 3.235267077319114e-05, + "loss": 0.9776, + "step": 38390 + }, + { + "epoch": 3.53, + "learning_rate": 3.2348073917440476e-05, + "loss": 0.8541, + "step": 38400 + }, + { + "epoch": 3.53, + "learning_rate": 3.2343477061689806e-05, + "loss": 0.9413, + "step": 38410 + }, + { + "epoch": 3.53, + "learning_rate": 3.2338880205939136e-05, + "loss": 0.8704, + "step": 38420 + }, + { + "epoch": 3.53, + "learning_rate": 3.233428335018847e-05, + "loss": 0.9513, + "step": 38430 + }, + { + "epoch": 3.53, + "learning_rate": 3.232968649443781e-05, + "loss": 0.8166, + "step": 38440 + }, + { + "epoch": 3.53, + "learning_rate": 3.232508963868714e-05, + "loss": 0.8516, + "step": 38450 + }, + { + "epoch": 3.54, + "learning_rate": 3.2320492782936475e-05, + "loss": 0.9293, + "step": 38460 + }, + { + "epoch": 3.54, + "learning_rate": 3.2315895927185805e-05, + "loss": 0.8951, + "step": 38470 + }, + { + "epoch": 3.54, + "learning_rate": 3.231129907143514e-05, + "loss": 0.9, + "step": 38480 + }, + { + "epoch": 3.54, + "learning_rate": 3.230670221568448e-05, + "loss": 0.9131, + "step": 38490 + }, + { + "epoch": 3.54, + "learning_rate": 3.230210535993381e-05, + "loss": 0.744, + "step": 38500 + }, + { + "epoch": 3.54, + "learning_rate": 3.229750850418314e-05, + "loss": 0.8385, + "step": 38510 + }, + { + "epoch": 3.54, + "learning_rate": 3.229291164843247e-05, + "loss": 0.8589, + "step": 38520 + }, + { + "epoch": 3.54, + "learning_rate": 3.22883147926818e-05, + "loss": 0.9197, + "step": 38530 + }, + { + "epoch": 3.54, + "learning_rate": 3.228371793693114e-05, + "loss": 0.8356, + "step": 38540 + }, + { + "epoch": 3.54, + "learning_rate": 3.2279121081180476e-05, + "loss": 0.9629, + "step": 38550 + }, + { + "epoch": 3.55, + "learning_rate": 3.2274524225429806e-05, + "loss": 1.0082, + "step": 38560 + }, + { + "epoch": 3.55, + "learning_rate": 3.226992736967914e-05, + "loss": 0.9093, + "step": 38570 + }, + { + "epoch": 3.55, + "learning_rate": 3.226533051392848e-05, + "loss": 0.9927, + "step": 38580 + }, + { + "epoch": 3.55, + "learning_rate": 3.226073365817781e-05, + "loss": 0.904, + "step": 38590 + }, + { + "epoch": 3.55, + "learning_rate": 3.225613680242714e-05, + "loss": 0.8669, + "step": 38600 + }, + { + "epoch": 3.55, + "learning_rate": 3.2251539946676475e-05, + "loss": 0.9235, + "step": 38610 + }, + { + "epoch": 3.55, + "learning_rate": 3.2246943090925804e-05, + "loss": 0.9136, + "step": 38620 + }, + { + "epoch": 3.55, + "learning_rate": 3.224234623517514e-05, + "loss": 0.8728, + "step": 38630 + }, + { + "epoch": 3.55, + "learning_rate": 3.223774937942448e-05, + "loss": 0.9509, + "step": 38640 + }, + { + "epoch": 3.55, + "learning_rate": 3.223315252367381e-05, + "loss": 0.8315, + "step": 38650 + }, + { + "epoch": 3.55, + "learning_rate": 3.222855566792314e-05, + "loss": 0.982, + "step": 38660 + }, + { + "epoch": 3.56, + "learning_rate": 3.222395881217248e-05, + "loss": 0.9317, + "step": 38670 + }, + { + "epoch": 3.56, + "learning_rate": 3.221936195642181e-05, + "loss": 0.8272, + "step": 38680 + }, + { + "epoch": 3.56, + "learning_rate": 3.221476510067114e-05, + "loss": 0.9112, + "step": 38690 + }, + { + "epoch": 3.56, + "learning_rate": 3.2210168244920476e-05, + "loss": 0.8537, + "step": 38700 + }, + { + "epoch": 3.56, + "learning_rate": 3.2205571389169805e-05, + "loss": 0.8476, + "step": 38710 + }, + { + "epoch": 3.56, + "learning_rate": 3.220097453341914e-05, + "loss": 0.9291, + "step": 38720 + }, + { + "epoch": 3.56, + "learning_rate": 3.219637767766848e-05, + "loss": 0.9132, + "step": 38730 + }, + { + "epoch": 3.56, + "learning_rate": 3.219178082191781e-05, + "loss": 0.9712, + "step": 38740 + }, + { + "epoch": 3.56, + "learning_rate": 3.2187183966167145e-05, + "loss": 0.9464, + "step": 38750 + }, + { + "epoch": 3.56, + "learning_rate": 3.218258711041648e-05, + "loss": 0.8728, + "step": 38760 + }, + { + "epoch": 3.56, + "learning_rate": 3.217799025466581e-05, + "loss": 0.8487, + "step": 38770 + }, + { + "epoch": 3.57, + "learning_rate": 3.217339339891514e-05, + "loss": 0.7779, + "step": 38780 + }, + { + "epoch": 3.57, + "learning_rate": 3.216879654316448e-05, + "loss": 1.0355, + "step": 38790 + }, + { + "epoch": 3.57, + "learning_rate": 3.216419968741381e-05, + "loss": 0.8285, + "step": 38800 + }, + { + "epoch": 3.57, + "learning_rate": 3.215960283166314e-05, + "loss": 0.8808, + "step": 38810 + }, + { + "epoch": 3.57, + "learning_rate": 3.215500597591248e-05, + "loss": 0.8815, + "step": 38820 + }, + { + "epoch": 3.57, + "learning_rate": 3.215040912016181e-05, + "loss": 0.8875, + "step": 38830 + }, + { + "epoch": 3.57, + "learning_rate": 3.2145812264411146e-05, + "loss": 0.8101, + "step": 38840 + }, + { + "epoch": 3.57, + "learning_rate": 3.214121540866048e-05, + "loss": 0.9092, + "step": 38850 + }, + { + "epoch": 3.57, + "learning_rate": 3.2136618552909805e-05, + "loss": 0.9472, + "step": 38860 + }, + { + "epoch": 3.57, + "learning_rate": 3.213202169715914e-05, + "loss": 0.8876, + "step": 38870 + }, + { + "epoch": 3.57, + "learning_rate": 3.212742484140848e-05, + "loss": 0.8674, + "step": 38880 + }, + { + "epoch": 3.58, + "learning_rate": 3.212282798565781e-05, + "loss": 0.8565, + "step": 38890 + }, + { + "epoch": 3.58, + "learning_rate": 3.2118231129907144e-05, + "loss": 0.8711, + "step": 38900 + }, + { + "epoch": 3.58, + "learning_rate": 3.211363427415648e-05, + "loss": 0.9591, + "step": 38910 + }, + { + "epoch": 3.58, + "learning_rate": 3.210903741840581e-05, + "loss": 0.8694, + "step": 38920 + }, + { + "epoch": 3.58, + "learning_rate": 3.210444056265515e-05, + "loss": 0.8543, + "step": 38930 + }, + { + "epoch": 3.58, + "learning_rate": 3.2099843706904483e-05, + "loss": 0.7765, + "step": 38940 + }, + { + "epoch": 3.58, + "learning_rate": 3.2095246851153806e-05, + "loss": 0.9699, + "step": 38950 + }, + { + "epoch": 3.58, + "learning_rate": 3.209064999540314e-05, + "loss": 0.9343, + "step": 38960 + }, + { + "epoch": 3.58, + "learning_rate": 3.208605313965248e-05, + "loss": 0.8523, + "step": 38970 + }, + { + "epoch": 3.58, + "learning_rate": 3.208145628390181e-05, + "loss": 0.7564, + "step": 38980 + }, + { + "epoch": 3.58, + "learning_rate": 3.2076859428151145e-05, + "loss": 0.8022, + "step": 38990 + }, + { + "epoch": 3.59, + "learning_rate": 3.207226257240048e-05, + "loss": 0.8665, + "step": 39000 + }, + { + "epoch": 3.59, + "learning_rate": 3.206766571664981e-05, + "loss": 0.9385, + "step": 39010 + }, + { + "epoch": 3.59, + "learning_rate": 3.206306886089915e-05, + "loss": 0.8166, + "step": 39020 + }, + { + "epoch": 3.59, + "learning_rate": 3.2058472005148485e-05, + "loss": 0.7974, + "step": 39030 + }, + { + "epoch": 3.59, + "learning_rate": 3.205387514939781e-05, + "loss": 0.8444, + "step": 39040 + }, + { + "epoch": 3.59, + "learning_rate": 3.2049278293647144e-05, + "loss": 0.8498, + "step": 39050 + }, + { + "epoch": 3.59, + "learning_rate": 3.204468143789648e-05, + "loss": 0.9195, + "step": 39060 + }, + { + "epoch": 3.59, + "learning_rate": 3.204008458214581e-05, + "loss": 0.7424, + "step": 39070 + }, + { + "epoch": 3.59, + "learning_rate": 3.203548772639515e-05, + "loss": 0.8781, + "step": 39080 + }, + { + "epoch": 3.59, + "learning_rate": 3.203089087064448e-05, + "loss": 0.8479, + "step": 39090 + }, + { + "epoch": 3.59, + "learning_rate": 3.202629401489381e-05, + "loss": 0.8595, + "step": 39100 + }, + { + "epoch": 3.6, + "learning_rate": 3.202169715914315e-05, + "loss": 1.0309, + "step": 39110 + }, + { + "epoch": 3.6, + "learning_rate": 3.2017100303392486e-05, + "loss": 0.8203, + "step": 39120 + }, + { + "epoch": 3.6, + "learning_rate": 3.2012503447641816e-05, + "loss": 0.8268, + "step": 39130 + }, + { + "epoch": 3.6, + "learning_rate": 3.2007906591891145e-05, + "loss": 0.8853, + "step": 39140 + }, + { + "epoch": 3.6, + "learning_rate": 3.200330973614048e-05, + "loss": 0.9168, + "step": 39150 + }, + { + "epoch": 3.6, + "learning_rate": 3.199871288038981e-05, + "loss": 1.0125, + "step": 39160 + }, + { + "epoch": 3.6, + "learning_rate": 3.199411602463915e-05, + "loss": 0.8512, + "step": 39170 + }, + { + "epoch": 3.6, + "learning_rate": 3.1989519168888484e-05, + "loss": 0.8941, + "step": 39180 + }, + { + "epoch": 3.6, + "learning_rate": 3.1984922313137814e-05, + "loss": 0.858, + "step": 39190 + }, + { + "epoch": 3.6, + "learning_rate": 3.198032545738715e-05, + "loss": 0.9017, + "step": 39200 + }, + { + "epoch": 3.6, + "learning_rate": 3.197572860163649e-05, + "loss": 0.9731, + "step": 39210 + }, + { + "epoch": 3.61, + "learning_rate": 3.197113174588582e-05, + "loss": 0.8289, + "step": 39220 + }, + { + "epoch": 3.61, + "learning_rate": 3.1966534890135146e-05, + "loss": 0.9207, + "step": 39230 + }, + { + "epoch": 3.61, + "learning_rate": 3.196193803438448e-05, + "loss": 0.9677, + "step": 39240 + }, + { + "epoch": 3.61, + "learning_rate": 3.195734117863381e-05, + "loss": 0.8984, + "step": 39250 + }, + { + "epoch": 3.61, + "learning_rate": 3.195274432288315e-05, + "loss": 0.8621, + "step": 39260 + }, + { + "epoch": 3.61, + "learning_rate": 3.1948147467132486e-05, + "loss": 0.8493, + "step": 39270 + }, + { + "epoch": 3.61, + "learning_rate": 3.1943550611381815e-05, + "loss": 0.8552, + "step": 39280 + }, + { + "epoch": 3.61, + "learning_rate": 3.193895375563115e-05, + "loss": 0.9748, + "step": 39290 + }, + { + "epoch": 3.61, + "learning_rate": 3.193435689988049e-05, + "loss": 0.868, + "step": 39300 + }, + { + "epoch": 3.61, + "learning_rate": 3.192976004412982e-05, + "loss": 0.8885, + "step": 39310 + }, + { + "epoch": 3.61, + "learning_rate": 3.192516318837915e-05, + "loss": 0.9271, + "step": 39320 + }, + { + "epoch": 3.62, + "learning_rate": 3.1920566332628484e-05, + "loss": 0.8825, + "step": 39330 + }, + { + "epoch": 3.62, + "learning_rate": 3.1915969476877814e-05, + "loss": 0.8979, + "step": 39340 + }, + { + "epoch": 3.62, + "learning_rate": 3.191137262112715e-05, + "loss": 0.8076, + "step": 39350 + }, + { + "epoch": 3.62, + "learning_rate": 3.190677576537649e-05, + "loss": 0.8937, + "step": 39360 + }, + { + "epoch": 3.62, + "learning_rate": 3.1902178909625816e-05, + "loss": 0.8418, + "step": 39370 + }, + { + "epoch": 3.62, + "learning_rate": 3.189758205387515e-05, + "loss": 0.9028, + "step": 39380 + }, + { + "epoch": 3.62, + "learning_rate": 3.189298519812449e-05, + "loss": 0.9685, + "step": 39390 + }, + { + "epoch": 3.62, + "learning_rate": 3.188838834237382e-05, + "loss": 0.867, + "step": 39400 + }, + { + "epoch": 3.62, + "learning_rate": 3.188379148662315e-05, + "loss": 0.8308, + "step": 39410 + }, + { + "epoch": 3.62, + "learning_rate": 3.1879194630872485e-05, + "loss": 0.9582, + "step": 39420 + }, + { + "epoch": 3.63, + "learning_rate": 3.1874597775121815e-05, + "loss": 1.0232, + "step": 39430 + }, + { + "epoch": 3.63, + "learning_rate": 3.187000091937115e-05, + "loss": 0.8616, + "step": 39440 + }, + { + "epoch": 3.63, + "learning_rate": 3.186540406362049e-05, + "loss": 0.8189, + "step": 39450 + }, + { + "epoch": 3.63, + "learning_rate": 3.186080720786982e-05, + "loss": 0.8929, + "step": 39460 + }, + { + "epoch": 3.63, + "learning_rate": 3.1856210352119154e-05, + "loss": 0.8565, + "step": 39470 + }, + { + "epoch": 3.63, + "learning_rate": 3.185161349636849e-05, + "loss": 0.902, + "step": 39480 + }, + { + "epoch": 3.63, + "learning_rate": 3.184701664061782e-05, + "loss": 1.0307, + "step": 39490 + }, + { + "epoch": 3.63, + "learning_rate": 3.184241978486715e-05, + "loss": 0.8915, + "step": 39500 + }, + { + "epoch": 3.63, + "learning_rate": 3.1837822929116486e-05, + "loss": 0.8105, + "step": 39510 + }, + { + "epoch": 3.63, + "learning_rate": 3.1833226073365816e-05, + "loss": 0.8996, + "step": 39520 + }, + { + "epoch": 3.63, + "learning_rate": 3.182862921761515e-05, + "loss": 0.7682, + "step": 39530 + }, + { + "epoch": 3.64, + "learning_rate": 3.182403236186449e-05, + "loss": 0.8926, + "step": 39540 + }, + { + "epoch": 3.64, + "learning_rate": 3.181943550611382e-05, + "loss": 0.8573, + "step": 39550 + }, + { + "epoch": 3.64, + "learning_rate": 3.1814838650363155e-05, + "loss": 0.8451, + "step": 39560 + }, + { + "epoch": 3.64, + "learning_rate": 3.181024179461249e-05, + "loss": 0.8709, + "step": 39570 + }, + { + "epoch": 3.64, + "learning_rate": 3.180564493886182e-05, + "loss": 0.9109, + "step": 39580 + }, + { + "epoch": 3.64, + "learning_rate": 3.180104808311115e-05, + "loss": 0.9761, + "step": 39590 + }, + { + "epoch": 3.64, + "learning_rate": 3.179645122736049e-05, + "loss": 0.9657, + "step": 39600 + }, + { + "epoch": 3.64, + "learning_rate": 3.179185437160982e-05, + "loss": 0.8977, + "step": 39610 + }, + { + "epoch": 3.64, + "learning_rate": 3.1787257515859154e-05, + "loss": 0.9573, + "step": 39620 + }, + { + "epoch": 3.64, + "learning_rate": 3.178266066010849e-05, + "loss": 0.8582, + "step": 39630 + }, + { + "epoch": 3.64, + "learning_rate": 3.177806380435782e-05, + "loss": 0.9792, + "step": 39640 + }, + { + "epoch": 3.65, + "learning_rate": 3.1773466948607157e-05, + "loss": 0.8773, + "step": 39650 + }, + { + "epoch": 3.65, + "learning_rate": 3.176887009285649e-05, + "loss": 0.9229, + "step": 39660 + }, + { + "epoch": 3.65, + "learning_rate": 3.176427323710582e-05, + "loss": 0.889, + "step": 39670 + }, + { + "epoch": 3.65, + "learning_rate": 3.175967638135515e-05, + "loss": 0.7453, + "step": 39680 + }, + { + "epoch": 3.65, + "learning_rate": 3.175507952560449e-05, + "loss": 0.9183, + "step": 39690 + }, + { + "epoch": 3.65, + "learning_rate": 3.175048266985382e-05, + "loss": 0.8286, + "step": 39700 + }, + { + "epoch": 3.65, + "learning_rate": 3.1745885814103155e-05, + "loss": 0.9237, + "step": 39710 + }, + { + "epoch": 3.65, + "learning_rate": 3.174128895835249e-05, + "loss": 0.9175, + "step": 39720 + }, + { + "epoch": 3.65, + "learning_rate": 3.173669210260182e-05, + "loss": 0.9063, + "step": 39730 + }, + { + "epoch": 3.65, + "learning_rate": 3.173209524685116e-05, + "loss": 0.8732, + "step": 39740 + }, + { + "epoch": 3.65, + "learning_rate": 3.1727498391100494e-05, + "loss": 0.7564, + "step": 39750 + }, + { + "epoch": 3.66, + "learning_rate": 3.1722901535349824e-05, + "loss": 0.9568, + "step": 39760 + }, + { + "epoch": 3.66, + "learning_rate": 3.1718304679599154e-05, + "loss": 0.8059, + "step": 39770 + }, + { + "epoch": 3.66, + "learning_rate": 3.171370782384849e-05, + "loss": 0.8726, + "step": 39780 + }, + { + "epoch": 3.66, + "learning_rate": 3.170911096809782e-05, + "loss": 0.9589, + "step": 39790 + }, + { + "epoch": 3.66, + "learning_rate": 3.1704514112347156e-05, + "loss": 0.8015, + "step": 39800 + }, + { + "epoch": 3.66, + "learning_rate": 3.169991725659649e-05, + "loss": 0.7882, + "step": 39810 + }, + { + "epoch": 3.66, + "learning_rate": 3.169532040084582e-05, + "loss": 0.9419, + "step": 39820 + }, + { + "epoch": 3.66, + "learning_rate": 3.169072354509516e-05, + "loss": 0.8445, + "step": 39830 + }, + { + "epoch": 3.66, + "learning_rate": 3.168612668934449e-05, + "loss": 0.9885, + "step": 39840 + }, + { + "epoch": 3.66, + "learning_rate": 3.1681529833593825e-05, + "loss": 0.7575, + "step": 39850 + }, + { + "epoch": 3.66, + "learning_rate": 3.1676932977843155e-05, + "loss": 0.8401, + "step": 39860 + }, + { + "epoch": 3.67, + "learning_rate": 3.167233612209249e-05, + "loss": 0.7462, + "step": 39870 + }, + { + "epoch": 3.67, + "learning_rate": 3.166773926634182e-05, + "loss": 0.9699, + "step": 39880 + }, + { + "epoch": 3.67, + "learning_rate": 3.166314241059116e-05, + "loss": 0.9577, + "step": 39890 + }, + { + "epoch": 3.67, + "learning_rate": 3.1658545554840494e-05, + "loss": 0.821, + "step": 39900 + }, + { + "epoch": 3.67, + "learning_rate": 3.1653948699089824e-05, + "loss": 0.8404, + "step": 39910 + }, + { + "epoch": 3.67, + "learning_rate": 3.164935184333916e-05, + "loss": 0.9361, + "step": 39920 + }, + { + "epoch": 3.67, + "learning_rate": 3.164475498758849e-05, + "loss": 0.9821, + "step": 39930 + }, + { + "epoch": 3.67, + "learning_rate": 3.1640158131837826e-05, + "loss": 0.8874, + "step": 39940 + }, + { + "epoch": 3.67, + "learning_rate": 3.1635561276087156e-05, + "loss": 0.9069, + "step": 39950 + }, + { + "epoch": 3.67, + "learning_rate": 3.163096442033649e-05, + "loss": 0.9384, + "step": 39960 + }, + { + "epoch": 3.67, + "learning_rate": 3.162636756458582e-05, + "loss": 1.0188, + "step": 39970 + }, + { + "epoch": 3.68, + "learning_rate": 3.162177070883516e-05, + "loss": 0.9306, + "step": 39980 + }, + { + "epoch": 3.68, + "learning_rate": 3.1617173853084495e-05, + "loss": 0.8851, + "step": 39990 + }, + { + "epoch": 3.68, + "learning_rate": 3.1612576997333825e-05, + "loss": 1.0461, + "step": 40000 + }, + { + "epoch": 3.68, + "eval_accuracy": 0.5711790393013101, + "eval_loss": 0.8903296589851379, + "eval_runtime": 159.9604, + "eval_samples_per_second": 28.632, + "eval_steps_per_second": 3.582, + "step": 40000 + }, + { + "epoch": 3.68, + "learning_rate": 3.160798014158316e-05, + "loss": 0.8495, + "step": 40010 + }, + { + "epoch": 3.68, + "learning_rate": 3.160338328583249e-05, + "loss": 0.9084, + "step": 40020 + }, + { + "epoch": 3.68, + "learning_rate": 3.159878643008183e-05, + "loss": 0.9206, + "step": 40030 + }, + { + "epoch": 3.68, + "learning_rate": 3.159418957433116e-05, + "loss": 0.9453, + "step": 40040 + }, + { + "epoch": 3.68, + "learning_rate": 3.1589592718580494e-05, + "loss": 0.8069, + "step": 40050 + }, + { + "epoch": 3.68, + "learning_rate": 3.158499586282982e-05, + "loss": 0.8489, + "step": 40060 + }, + { + "epoch": 3.68, + "learning_rate": 3.158039900707916e-05, + "loss": 0.9141, + "step": 40070 + }, + { + "epoch": 3.68, + "learning_rate": 3.1575802151328496e-05, + "loss": 0.8765, + "step": 40080 + }, + { + "epoch": 3.69, + "learning_rate": 3.1571205295577826e-05, + "loss": 0.8629, + "step": 40090 + }, + { + "epoch": 3.69, + "learning_rate": 3.156660843982716e-05, + "loss": 0.907, + "step": 40100 + }, + { + "epoch": 3.69, + "learning_rate": 3.156201158407649e-05, + "loss": 0.9083, + "step": 40110 + }, + { + "epoch": 3.69, + "learning_rate": 3.155741472832583e-05, + "loss": 0.9925, + "step": 40120 + }, + { + "epoch": 3.69, + "learning_rate": 3.155281787257516e-05, + "loss": 0.869, + "step": 40130 + }, + { + "epoch": 3.69, + "learning_rate": 3.1548221016824495e-05, + "loss": 0.9737, + "step": 40140 + }, + { + "epoch": 3.69, + "learning_rate": 3.1543624161073825e-05, + "loss": 1.0478, + "step": 40150 + }, + { + "epoch": 3.69, + "learning_rate": 3.153902730532316e-05, + "loss": 0.9175, + "step": 40160 + }, + { + "epoch": 3.69, + "learning_rate": 3.153443044957249e-05, + "loss": 0.9192, + "step": 40170 + }, + { + "epoch": 3.69, + "learning_rate": 3.152983359382183e-05, + "loss": 0.8736, + "step": 40180 + }, + { + "epoch": 3.69, + "learning_rate": 3.1525236738071164e-05, + "loss": 0.8674, + "step": 40190 + }, + { + "epoch": 3.7, + "learning_rate": 3.1520639882320493e-05, + "loss": 0.8745, + "step": 40200 + }, + { + "epoch": 3.7, + "learning_rate": 3.151604302656983e-05, + "loss": 0.8358, + "step": 40210 + }, + { + "epoch": 3.7, + "learning_rate": 3.151144617081916e-05, + "loss": 0.9177, + "step": 40220 + }, + { + "epoch": 3.7, + "learning_rate": 3.1506849315068496e-05, + "loss": 0.7849, + "step": 40230 + }, + { + "epoch": 3.7, + "learning_rate": 3.1502252459317826e-05, + "loss": 0.8817, + "step": 40240 + }, + { + "epoch": 3.7, + "learning_rate": 3.149765560356716e-05, + "loss": 1.0461, + "step": 40250 + }, + { + "epoch": 3.7, + "learning_rate": 3.149305874781649e-05, + "loss": 0.8643, + "step": 40260 + }, + { + "epoch": 3.7, + "learning_rate": 3.148846189206583e-05, + "loss": 0.8535, + "step": 40270 + }, + { + "epoch": 3.7, + "learning_rate": 3.1483865036315165e-05, + "loss": 0.925, + "step": 40280 + }, + { + "epoch": 3.7, + "learning_rate": 3.1479268180564495e-05, + "loss": 0.9472, + "step": 40290 + }, + { + "epoch": 3.71, + "learning_rate": 3.147467132481383e-05, + "loss": 0.9185, + "step": 40300 + }, + { + "epoch": 3.71, + "learning_rate": 3.147007446906316e-05, + "loss": 0.9544, + "step": 40310 + }, + { + "epoch": 3.71, + "learning_rate": 3.14654776133125e-05, + "loss": 0.8489, + "step": 40320 + }, + { + "epoch": 3.71, + "learning_rate": 3.146088075756183e-05, + "loss": 0.8162, + "step": 40330 + }, + { + "epoch": 3.71, + "learning_rate": 3.1456283901811163e-05, + "loss": 0.9296, + "step": 40340 + }, + { + "epoch": 3.71, + "learning_rate": 3.145168704606049e-05, + "loss": 1.0184, + "step": 40350 + }, + { + "epoch": 3.71, + "learning_rate": 3.144709019030983e-05, + "loss": 0.8434, + "step": 40360 + }, + { + "epoch": 3.71, + "learning_rate": 3.1442493334559166e-05, + "loss": 0.8768, + "step": 40370 + }, + { + "epoch": 3.71, + "learning_rate": 3.1437896478808496e-05, + "loss": 0.9059, + "step": 40380 + }, + { + "epoch": 3.71, + "learning_rate": 3.143329962305783e-05, + "loss": 0.9881, + "step": 40390 + }, + { + "epoch": 3.71, + "learning_rate": 3.142870276730716e-05, + "loss": 0.8797, + "step": 40400 + }, + { + "epoch": 3.72, + "learning_rate": 3.14241059115565e-05, + "loss": 0.9426, + "step": 40410 + }, + { + "epoch": 3.72, + "learning_rate": 3.141950905580583e-05, + "loss": 0.8451, + "step": 40420 + }, + { + "epoch": 3.72, + "learning_rate": 3.1414912200055165e-05, + "loss": 0.9316, + "step": 40430 + }, + { + "epoch": 3.72, + "learning_rate": 3.1410315344304494e-05, + "loss": 0.8221, + "step": 40440 + }, + { + "epoch": 3.72, + "learning_rate": 3.140571848855383e-05, + "loss": 0.8418, + "step": 40450 + }, + { + "epoch": 3.72, + "learning_rate": 3.140112163280317e-05, + "loss": 0.8477, + "step": 40460 + }, + { + "epoch": 3.72, + "learning_rate": 3.13965247770525e-05, + "loss": 0.9107, + "step": 40470 + }, + { + "epoch": 3.72, + "learning_rate": 3.1391927921301833e-05, + "loss": 0.98, + "step": 40480 + }, + { + "epoch": 3.72, + "learning_rate": 3.138733106555116e-05, + "loss": 0.8492, + "step": 40490 + }, + { + "epoch": 3.72, + "learning_rate": 3.138273420980049e-05, + "loss": 0.9016, + "step": 40500 + }, + { + "epoch": 3.72, + "learning_rate": 3.137813735404983e-05, + "loss": 0.9132, + "step": 40510 + }, + { + "epoch": 3.73, + "learning_rate": 3.1373540498299166e-05, + "loss": 0.8439, + "step": 40520 + }, + { + "epoch": 3.73, + "learning_rate": 3.1368943642548496e-05, + "loss": 0.9044, + "step": 40530 + }, + { + "epoch": 3.73, + "learning_rate": 3.136434678679783e-05, + "loss": 0.9514, + "step": 40540 + }, + { + "epoch": 3.73, + "learning_rate": 3.135974993104717e-05, + "loss": 0.913, + "step": 40550 + }, + { + "epoch": 3.73, + "learning_rate": 3.13551530752965e-05, + "loss": 0.9432, + "step": 40560 + }, + { + "epoch": 3.73, + "learning_rate": 3.1350556219545835e-05, + "loss": 0.7828, + "step": 40570 + }, + { + "epoch": 3.73, + "learning_rate": 3.1345959363795164e-05, + "loss": 0.884, + "step": 40580 + }, + { + "epoch": 3.73, + "learning_rate": 3.1341362508044494e-05, + "loss": 0.9422, + "step": 40590 + }, + { + "epoch": 3.73, + "learning_rate": 3.133676565229383e-05, + "loss": 0.7815, + "step": 40600 + }, + { + "epoch": 3.73, + "learning_rate": 3.133216879654317e-05, + "loss": 0.8147, + "step": 40610 + }, + { + "epoch": 3.73, + "learning_rate": 3.13275719407925e-05, + "loss": 0.8345, + "step": 40620 + }, + { + "epoch": 3.74, + "learning_rate": 3.132297508504183e-05, + "loss": 0.865, + "step": 40630 + }, + { + "epoch": 3.74, + "learning_rate": 3.131837822929117e-05, + "loss": 0.8247, + "step": 40640 + }, + { + "epoch": 3.74, + "learning_rate": 3.13137813735405e-05, + "loss": 0.9588, + "step": 40650 + }, + { + "epoch": 3.74, + "learning_rate": 3.1309184517789836e-05, + "loss": 0.7586, + "step": 40660 + }, + { + "epoch": 3.74, + "learning_rate": 3.1304587662039166e-05, + "loss": 0.9119, + "step": 40670 + }, + { + "epoch": 3.74, + "learning_rate": 3.1299990806288495e-05, + "loss": 0.8504, + "step": 40680 + }, + { + "epoch": 3.74, + "learning_rate": 3.129539395053783e-05, + "loss": 0.8237, + "step": 40690 + }, + { + "epoch": 3.74, + "learning_rate": 3.129079709478717e-05, + "loss": 0.8695, + "step": 40700 + }, + { + "epoch": 3.74, + "learning_rate": 3.12862002390365e-05, + "loss": 0.8677, + "step": 40710 + }, + { + "epoch": 3.74, + "learning_rate": 3.1281603383285834e-05, + "loss": 0.9085, + "step": 40720 + }, + { + "epoch": 3.74, + "learning_rate": 3.127700652753517e-05, + "loss": 0.8356, + "step": 40730 + }, + { + "epoch": 3.75, + "learning_rate": 3.12724096717845e-05, + "loss": 0.8699, + "step": 40740 + }, + { + "epoch": 3.75, + "learning_rate": 3.126781281603384e-05, + "loss": 0.853, + "step": 40750 + }, + { + "epoch": 3.75, + "learning_rate": 3.126321596028317e-05, + "loss": 0.8297, + "step": 40760 + }, + { + "epoch": 3.75, + "learning_rate": 3.1258619104532496e-05, + "loss": 0.8302, + "step": 40770 + }, + { + "epoch": 3.75, + "learning_rate": 3.125402224878183e-05, + "loss": 0.8745, + "step": 40780 + }, + { + "epoch": 3.75, + "learning_rate": 3.124942539303117e-05, + "loss": 0.7968, + "step": 40790 + }, + { + "epoch": 3.75, + "learning_rate": 3.12448285372805e-05, + "loss": 0.8859, + "step": 40800 + }, + { + "epoch": 3.75, + "learning_rate": 3.1240231681529836e-05, + "loss": 0.93, + "step": 40810 + }, + { + "epoch": 3.75, + "learning_rate": 3.123563482577917e-05, + "loss": 0.9607, + "step": 40820 + }, + { + "epoch": 3.75, + "learning_rate": 3.12310379700285e-05, + "loss": 0.8351, + "step": 40830 + }, + { + "epoch": 3.75, + "learning_rate": 3.122644111427784e-05, + "loss": 0.9292, + "step": 40840 + }, + { + "epoch": 3.76, + "learning_rate": 3.122184425852717e-05, + "loss": 0.8545, + "step": 40850 + }, + { + "epoch": 3.76, + "learning_rate": 3.12172474027765e-05, + "loss": 0.8323, + "step": 40860 + }, + { + "epoch": 3.76, + "learning_rate": 3.1212650547025834e-05, + "loss": 0.9687, + "step": 40870 + }, + { + "epoch": 3.76, + "learning_rate": 3.120805369127517e-05, + "loss": 0.973, + "step": 40880 + }, + { + "epoch": 3.76, + "learning_rate": 3.12034568355245e-05, + "loss": 0.931, + "step": 40890 + }, + { + "epoch": 3.76, + "learning_rate": 3.119885997977384e-05, + "loss": 0.9597, + "step": 40900 + }, + { + "epoch": 3.76, + "learning_rate": 3.119426312402317e-05, + "loss": 0.803, + "step": 40910 + }, + { + "epoch": 3.76, + "learning_rate": 3.11896662682725e-05, + "loss": 0.8682, + "step": 40920 + }, + { + "epoch": 3.76, + "learning_rate": 3.118506941252184e-05, + "loss": 1.0133, + "step": 40930 + }, + { + "epoch": 3.76, + "learning_rate": 3.118047255677117e-05, + "loss": 0.9114, + "step": 40940 + }, + { + "epoch": 3.76, + "learning_rate": 3.11758757010205e-05, + "loss": 0.8648, + "step": 40950 + }, + { + "epoch": 3.77, + "learning_rate": 3.1171278845269835e-05, + "loss": 0.8299, + "step": 40960 + }, + { + "epoch": 3.77, + "learning_rate": 3.116668198951917e-05, + "loss": 0.8276, + "step": 40970 + }, + { + "epoch": 3.77, + "learning_rate": 3.11620851337685e-05, + "loss": 0.7509, + "step": 40980 + }, + { + "epoch": 3.77, + "learning_rate": 3.115748827801784e-05, + "loss": 0.9291, + "step": 40990 + }, + { + "epoch": 3.77, + "learning_rate": 3.1152891422267174e-05, + "loss": 0.821, + "step": 41000 + }, + { + "epoch": 3.77, + "learning_rate": 3.1148294566516504e-05, + "loss": 0.8429, + "step": 41010 + }, + { + "epoch": 3.77, + "learning_rate": 3.114369771076584e-05, + "loss": 0.9444, + "step": 41020 + }, + { + "epoch": 3.77, + "learning_rate": 3.113910085501517e-05, + "loss": 0.9157, + "step": 41030 + }, + { + "epoch": 3.77, + "learning_rate": 3.11345039992645e-05, + "loss": 0.9582, + "step": 41040 + }, + { + "epoch": 3.77, + "learning_rate": 3.1129907143513837e-05, + "loss": 0.8934, + "step": 41050 + }, + { + "epoch": 3.77, + "learning_rate": 3.112531028776317e-05, + "loss": 0.8774, + "step": 41060 + }, + { + "epoch": 3.78, + "learning_rate": 3.11207134320125e-05, + "loss": 0.8553, + "step": 41070 + }, + { + "epoch": 3.78, + "learning_rate": 3.111611657626184e-05, + "loss": 0.9137, + "step": 41080 + }, + { + "epoch": 3.78, + "learning_rate": 3.1111519720511176e-05, + "loss": 0.8901, + "step": 41090 + }, + { + "epoch": 3.78, + "learning_rate": 3.1106922864760505e-05, + "loss": 0.9264, + "step": 41100 + }, + { + "epoch": 3.78, + "learning_rate": 3.110232600900984e-05, + "loss": 0.9345, + "step": 41110 + }, + { + "epoch": 3.78, + "learning_rate": 3.109772915325917e-05, + "loss": 0.8781, + "step": 41120 + }, + { + "epoch": 3.78, + "learning_rate": 3.10931322975085e-05, + "loss": 0.7769, + "step": 41130 + }, + { + "epoch": 3.78, + "learning_rate": 3.108853544175784e-05, + "loss": 0.8278, + "step": 41140 + }, + { + "epoch": 3.78, + "learning_rate": 3.1083938586007174e-05, + "loss": 0.9533, + "step": 41150 + }, + { + "epoch": 3.78, + "learning_rate": 3.1079341730256504e-05, + "loss": 0.8985, + "step": 41160 + }, + { + "epoch": 3.79, + "learning_rate": 3.107474487450584e-05, + "loss": 0.7846, + "step": 41170 + }, + { + "epoch": 3.79, + "learning_rate": 3.107014801875518e-05, + "loss": 0.9568, + "step": 41180 + }, + { + "epoch": 3.79, + "learning_rate": 3.1065551163004507e-05, + "loss": 0.9479, + "step": 41190 + }, + { + "epoch": 3.79, + "learning_rate": 3.106095430725384e-05, + "loss": 0.8945, + "step": 41200 + }, + { + "epoch": 3.79, + "learning_rate": 3.105635745150317e-05, + "loss": 0.8083, + "step": 41210 + }, + { + "epoch": 3.79, + "learning_rate": 3.10517605957525e-05, + "loss": 0.8954, + "step": 41220 + }, + { + "epoch": 3.79, + "learning_rate": 3.104716374000184e-05, + "loss": 0.9698, + "step": 41230 + }, + { + "epoch": 3.79, + "learning_rate": 3.1042566884251175e-05, + "loss": 0.9145, + "step": 41240 + }, + { + "epoch": 3.79, + "learning_rate": 3.1037970028500505e-05, + "loss": 0.9828, + "step": 41250 + }, + { + "epoch": 3.79, + "learning_rate": 3.103337317274984e-05, + "loss": 0.8397, + "step": 41260 + }, + { + "epoch": 3.79, + "learning_rate": 3.102877631699918e-05, + "loss": 0.9298, + "step": 41270 + }, + { + "epoch": 3.8, + "learning_rate": 3.102417946124851e-05, + "loss": 0.8518, + "step": 41280 + }, + { + "epoch": 3.8, + "learning_rate": 3.1019582605497844e-05, + "loss": 0.8993, + "step": 41290 + }, + { + "epoch": 3.8, + "learning_rate": 3.1014985749747174e-05, + "loss": 0.8147, + "step": 41300 + }, + { + "epoch": 3.8, + "learning_rate": 3.1010388893996504e-05, + "loss": 0.8108, + "step": 41310 + }, + { + "epoch": 3.8, + "learning_rate": 3.100579203824584e-05, + "loss": 0.9162, + "step": 41320 + }, + { + "epoch": 3.8, + "learning_rate": 3.1001195182495177e-05, + "loss": 0.8643, + "step": 41330 + }, + { + "epoch": 3.8, + "learning_rate": 3.0996598326744506e-05, + "loss": 0.8399, + "step": 41340 + }, + { + "epoch": 3.8, + "learning_rate": 3.099200147099384e-05, + "loss": 0.8777, + "step": 41350 + }, + { + "epoch": 3.8, + "learning_rate": 3.098740461524318e-05, + "loss": 0.8291, + "step": 41360 + }, + { + "epoch": 3.8, + "learning_rate": 3.098280775949251e-05, + "loss": 0.878, + "step": 41370 + }, + { + "epoch": 3.8, + "learning_rate": 3.0978210903741845e-05, + "loss": 0.9397, + "step": 41380 + }, + { + "epoch": 3.81, + "learning_rate": 3.0973614047991175e-05, + "loss": 0.8369, + "step": 41390 + }, + { + "epoch": 3.81, + "learning_rate": 3.0969017192240505e-05, + "loss": 0.8541, + "step": 41400 + }, + { + "epoch": 3.81, + "learning_rate": 3.096442033648984e-05, + "loss": 0.9817, + "step": 41410 + }, + { + "epoch": 3.81, + "learning_rate": 3.095982348073918e-05, + "loss": 0.9803, + "step": 41420 + }, + { + "epoch": 3.81, + "learning_rate": 3.095522662498851e-05, + "loss": 0.9146, + "step": 41430 + }, + { + "epoch": 3.81, + "learning_rate": 3.0950629769237844e-05, + "loss": 0.9183, + "step": 41440 + }, + { + "epoch": 3.81, + "learning_rate": 3.094603291348718e-05, + "loss": 0.8752, + "step": 41450 + }, + { + "epoch": 3.81, + "learning_rate": 3.094143605773651e-05, + "loss": 0.8445, + "step": 41460 + }, + { + "epoch": 3.81, + "learning_rate": 3.0936839201985847e-05, + "loss": 0.8907, + "step": 41470 + }, + { + "epoch": 3.81, + "learning_rate": 3.0932242346235176e-05, + "loss": 0.792, + "step": 41480 + }, + { + "epoch": 3.81, + "learning_rate": 3.0927645490484506e-05, + "loss": 0.9074, + "step": 41490 + }, + { + "epoch": 3.82, + "learning_rate": 3.092304863473384e-05, + "loss": 0.9132, + "step": 41500 + }, + { + "epoch": 3.82, + "learning_rate": 3.091845177898318e-05, + "loss": 0.9387, + "step": 41510 + }, + { + "epoch": 3.82, + "learning_rate": 3.091385492323251e-05, + "loss": 0.8942, + "step": 41520 + }, + { + "epoch": 3.82, + "learning_rate": 3.0909258067481845e-05, + "loss": 0.7805, + "step": 41530 + }, + { + "epoch": 3.82, + "learning_rate": 3.090466121173118e-05, + "loss": 0.8145, + "step": 41540 + }, + { + "epoch": 3.82, + "learning_rate": 3.090006435598051e-05, + "loss": 0.8346, + "step": 41550 + }, + { + "epoch": 3.82, + "learning_rate": 3.089546750022985e-05, + "loss": 0.9382, + "step": 41560 + }, + { + "epoch": 3.82, + "learning_rate": 3.089087064447918e-05, + "loss": 0.8944, + "step": 41570 + }, + { + "epoch": 3.82, + "learning_rate": 3.088627378872851e-05, + "loss": 0.8041, + "step": 41580 + }, + { + "epoch": 3.82, + "learning_rate": 3.0881676932977844e-05, + "loss": 0.9367, + "step": 41590 + }, + { + "epoch": 3.82, + "learning_rate": 3.087708007722718e-05, + "loss": 0.9475, + "step": 41600 + }, + { + "epoch": 3.83, + "learning_rate": 3.087248322147651e-05, + "loss": 0.9961, + "step": 41610 + }, + { + "epoch": 3.83, + "learning_rate": 3.0867886365725846e-05, + "loss": 0.7667, + "step": 41620 + }, + { + "epoch": 3.83, + "learning_rate": 3.086328950997518e-05, + "loss": 0.876, + "step": 41630 + }, + { + "epoch": 3.83, + "learning_rate": 3.085869265422451e-05, + "loss": 0.8992, + "step": 41640 + }, + { + "epoch": 3.83, + "learning_rate": 3.085409579847385e-05, + "loss": 0.9273, + "step": 41650 + }, + { + "epoch": 3.83, + "learning_rate": 3.084949894272318e-05, + "loss": 0.8722, + "step": 41660 + }, + { + "epoch": 3.83, + "learning_rate": 3.084490208697251e-05, + "loss": 0.8965, + "step": 41670 + }, + { + "epoch": 3.83, + "learning_rate": 3.0840305231221845e-05, + "loss": 0.797, + "step": 41680 + }, + { + "epoch": 3.83, + "learning_rate": 3.083570837547118e-05, + "loss": 0.806, + "step": 41690 + }, + { + "epoch": 3.83, + "learning_rate": 3.083111151972051e-05, + "loss": 0.8519, + "step": 41700 + }, + { + "epoch": 3.83, + "learning_rate": 3.082651466396985e-05, + "loss": 0.8404, + "step": 41710 + }, + { + "epoch": 3.84, + "learning_rate": 3.082191780821918e-05, + "loss": 0.8612, + "step": 41720 + }, + { + "epoch": 3.84, + "learning_rate": 3.0817320952468514e-05, + "loss": 0.808, + "step": 41730 + }, + { + "epoch": 3.84, + "learning_rate": 3.081272409671785e-05, + "loss": 0.8971, + "step": 41740 + }, + { + "epoch": 3.84, + "learning_rate": 3.080812724096718e-05, + "loss": 0.8107, + "step": 41750 + }, + { + "epoch": 3.84, + "learning_rate": 3.080353038521651e-05, + "loss": 0.8548, + "step": 41760 + }, + { + "epoch": 3.84, + "learning_rate": 3.0798933529465846e-05, + "loss": 0.9165, + "step": 41770 + }, + { + "epoch": 3.84, + "learning_rate": 3.079433667371518e-05, + "loss": 1.1106, + "step": 41780 + }, + { + "epoch": 3.84, + "learning_rate": 3.078973981796451e-05, + "loss": 0.9052, + "step": 41790 + }, + { + "epoch": 3.84, + "learning_rate": 3.078514296221385e-05, + "loss": 0.9145, + "step": 41800 + }, + { + "epoch": 3.84, + "learning_rate": 3.078054610646318e-05, + "loss": 1.0149, + "step": 41810 + }, + { + "epoch": 3.84, + "learning_rate": 3.0775949250712515e-05, + "loss": 0.9759, + "step": 41820 + }, + { + "epoch": 3.85, + "learning_rate": 3.077135239496185e-05, + "loss": 0.8888, + "step": 41830 + }, + { + "epoch": 3.85, + "learning_rate": 3.076675553921118e-05, + "loss": 0.9497, + "step": 41840 + }, + { + "epoch": 3.85, + "learning_rate": 3.076215868346051e-05, + "loss": 0.9366, + "step": 41850 + }, + { + "epoch": 3.85, + "learning_rate": 3.075756182770985e-05, + "loss": 0.882, + "step": 41860 + }, + { + "epoch": 3.85, + "learning_rate": 3.0752964971959184e-05, + "loss": 0.8673, + "step": 41870 + }, + { + "epoch": 3.85, + "learning_rate": 3.0748368116208513e-05, + "loss": 0.914, + "step": 41880 + }, + { + "epoch": 3.85, + "learning_rate": 3.074377126045785e-05, + "loss": 0.8619, + "step": 41890 + }, + { + "epoch": 3.85, + "learning_rate": 3.073917440470718e-05, + "loss": 0.819, + "step": 41900 + }, + { + "epoch": 3.85, + "learning_rate": 3.0734577548956516e-05, + "loss": 0.8295, + "step": 41910 + }, + { + "epoch": 3.85, + "learning_rate": 3.072998069320585e-05, + "loss": 0.9092, + "step": 41920 + }, + { + "epoch": 3.85, + "learning_rate": 3.072538383745518e-05, + "loss": 0.9288, + "step": 41930 + }, + { + "epoch": 3.86, + "learning_rate": 3.072078698170451e-05, + "loss": 0.8331, + "step": 41940 + }, + { + "epoch": 3.86, + "learning_rate": 3.071619012595385e-05, + "loss": 0.9216, + "step": 41950 + }, + { + "epoch": 3.86, + "learning_rate": 3.0711593270203185e-05, + "loss": 0.84, + "step": 41960 + }, + { + "epoch": 3.86, + "learning_rate": 3.0706996414452515e-05, + "loss": 0.9453, + "step": 41970 + }, + { + "epoch": 3.86, + "learning_rate": 3.070239955870185e-05, + "loss": 0.8445, + "step": 41980 + }, + { + "epoch": 3.86, + "learning_rate": 3.069780270295118e-05, + "loss": 0.9038, + "step": 41990 + }, + { + "epoch": 3.86, + "learning_rate": 3.069320584720052e-05, + "loss": 0.883, + "step": 42000 + }, + { + "epoch": 3.86, + "learning_rate": 3.0688608991449854e-05, + "loss": 0.7748, + "step": 42010 + }, + { + "epoch": 3.86, + "learning_rate": 3.0684012135699183e-05, + "loss": 0.9604, + "step": 42020 + }, + { + "epoch": 3.86, + "learning_rate": 3.067941527994851e-05, + "loss": 0.9242, + "step": 42030 + }, + { + "epoch": 3.87, + "learning_rate": 3.067481842419785e-05, + "loss": 0.7849, + "step": 42040 + }, + { + "epoch": 3.87, + "learning_rate": 3.0670221568447186e-05, + "loss": 0.916, + "step": 42050 + }, + { + "epoch": 3.87, + "learning_rate": 3.0665624712696516e-05, + "loss": 0.758, + "step": 42060 + }, + { + "epoch": 3.87, + "learning_rate": 3.066102785694585e-05, + "loss": 0.8098, + "step": 42070 + }, + { + "epoch": 3.87, + "learning_rate": 3.065643100119518e-05, + "loss": 0.836, + "step": 42080 + }, + { + "epoch": 3.87, + "learning_rate": 3.065183414544452e-05, + "loss": 0.7924, + "step": 42090 + }, + { + "epoch": 3.87, + "learning_rate": 3.0647237289693855e-05, + "loss": 0.9925, + "step": 42100 + }, + { + "epoch": 3.87, + "learning_rate": 3.0642640433943185e-05, + "loss": 0.8834, + "step": 42110 + }, + { + "epoch": 3.87, + "learning_rate": 3.0638043578192514e-05, + "loss": 0.8127, + "step": 42120 + }, + { + "epoch": 3.87, + "learning_rate": 3.063344672244185e-05, + "loss": 0.9137, + "step": 42130 + }, + { + "epoch": 3.87, + "learning_rate": 3.062884986669118e-05, + "loss": 1.0019, + "step": 42140 + }, + { + "epoch": 3.88, + "learning_rate": 3.062425301094052e-05, + "loss": 0.8705, + "step": 42150 + }, + { + "epoch": 3.88, + "learning_rate": 3.0619656155189854e-05, + "loss": 0.9899, + "step": 42160 + }, + { + "epoch": 3.88, + "learning_rate": 3.061505929943918e-05, + "loss": 0.7396, + "step": 42170 + }, + { + "epoch": 3.88, + "learning_rate": 3.061046244368852e-05, + "loss": 0.9206, + "step": 42180 + }, + { + "epoch": 3.88, + "learning_rate": 3.0605865587937856e-05, + "loss": 0.7959, + "step": 42190 + }, + { + "epoch": 3.88, + "learning_rate": 3.0601268732187186e-05, + "loss": 0.8946, + "step": 42200 + }, + { + "epoch": 3.88, + "learning_rate": 3.0596671876436516e-05, + "loss": 0.7907, + "step": 42210 + }, + { + "epoch": 3.88, + "learning_rate": 3.059207502068585e-05, + "loss": 0.9861, + "step": 42220 + }, + { + "epoch": 3.88, + "learning_rate": 3.058747816493518e-05, + "loss": 0.8206, + "step": 42230 + }, + { + "epoch": 3.88, + "learning_rate": 3.058288130918452e-05, + "loss": 1.0817, + "step": 42240 + }, + { + "epoch": 3.88, + "learning_rate": 3.0578284453433855e-05, + "loss": 0.8752, + "step": 42250 + }, + { + "epoch": 3.89, + "learning_rate": 3.0573687597683184e-05, + "loss": 0.7689, + "step": 42260 + }, + { + "epoch": 3.89, + "learning_rate": 3.056909074193252e-05, + "loss": 0.812, + "step": 42270 + }, + { + "epoch": 3.89, + "learning_rate": 3.056449388618186e-05, + "loss": 0.7954, + "step": 42280 + }, + { + "epoch": 3.89, + "learning_rate": 3.055989703043119e-05, + "loss": 1.0381, + "step": 42290 + }, + { + "epoch": 3.89, + "learning_rate": 3.055530017468052e-05, + "loss": 0.8971, + "step": 42300 + }, + { + "epoch": 3.89, + "learning_rate": 3.055070331892985e-05, + "loss": 0.8743, + "step": 42310 + }, + { + "epoch": 3.89, + "learning_rate": 3.054610646317918e-05, + "loss": 0.7803, + "step": 42320 + }, + { + "epoch": 3.89, + "learning_rate": 3.054150960742852e-05, + "loss": 0.8167, + "step": 42330 + }, + { + "epoch": 3.89, + "learning_rate": 3.0536912751677856e-05, + "loss": 0.7777, + "step": 42340 + }, + { + "epoch": 3.89, + "learning_rate": 3.0532315895927186e-05, + "loss": 0.8836, + "step": 42350 + }, + { + "epoch": 3.89, + "learning_rate": 3.052771904017652e-05, + "loss": 0.8275, + "step": 42360 + }, + { + "epoch": 3.9, + "learning_rate": 3.052312218442586e-05, + "loss": 0.8699, + "step": 42370 + }, + { + "epoch": 3.9, + "learning_rate": 3.051852532867519e-05, + "loss": 0.8643, + "step": 42380 + }, + { + "epoch": 3.9, + "learning_rate": 3.0513928472924518e-05, + "loss": 0.8414, + "step": 42390 + }, + { + "epoch": 3.9, + "learning_rate": 3.0509331617173854e-05, + "loss": 0.8447, + "step": 42400 + }, + { + "epoch": 3.9, + "learning_rate": 3.0504734761423188e-05, + "loss": 0.8043, + "step": 42410 + }, + { + "epoch": 3.9, + "learning_rate": 3.050013790567252e-05, + "loss": 0.9356, + "step": 42420 + }, + { + "epoch": 3.9, + "learning_rate": 3.0495541049921854e-05, + "loss": 0.9499, + "step": 42430 + }, + { + "epoch": 3.9, + "learning_rate": 3.049094419417119e-05, + "loss": 0.8151, + "step": 42440 + }, + { + "epoch": 3.9, + "learning_rate": 3.0486347338420523e-05, + "loss": 0.8695, + "step": 42450 + }, + { + "epoch": 3.9, + "learning_rate": 3.0481750482669856e-05, + "loss": 0.7283, + "step": 42460 + }, + { + "epoch": 3.9, + "learning_rate": 3.0477153626919186e-05, + "loss": 0.9512, + "step": 42470 + }, + { + "epoch": 3.91, + "learning_rate": 3.047255677116852e-05, + "loss": 0.8692, + "step": 42480 + }, + { + "epoch": 3.91, + "learning_rate": 3.0467959915417856e-05, + "loss": 0.8116, + "step": 42490 + }, + { + "epoch": 3.91, + "learning_rate": 3.046336305966719e-05, + "loss": 0.7749, + "step": 42500 + }, + { + "epoch": 3.91, + "learning_rate": 3.0458766203916522e-05, + "loss": 0.9691, + "step": 42510 + }, + { + "epoch": 3.91, + "learning_rate": 3.0454169348165855e-05, + "loss": 0.7368, + "step": 42520 + }, + { + "epoch": 3.91, + "learning_rate": 3.044957249241519e-05, + "loss": 0.9554, + "step": 42530 + }, + { + "epoch": 3.91, + "learning_rate": 3.0444975636664524e-05, + "loss": 0.8346, + "step": 42540 + }, + { + "epoch": 3.91, + "learning_rate": 3.0440378780913858e-05, + "loss": 0.7552, + "step": 42550 + }, + { + "epoch": 3.91, + "learning_rate": 3.0435781925163194e-05, + "loss": 0.9676, + "step": 42560 + }, + { + "epoch": 3.91, + "learning_rate": 3.043118506941252e-05, + "loss": 0.9258, + "step": 42570 + }, + { + "epoch": 3.91, + "learning_rate": 3.0426588213661857e-05, + "loss": 0.8877, + "step": 42580 + }, + { + "epoch": 3.92, + "learning_rate": 3.042199135791119e-05, + "loss": 0.9122, + "step": 42590 + }, + { + "epoch": 3.92, + "learning_rate": 3.0417394502160523e-05, + "loss": 0.9229, + "step": 42600 + }, + { + "epoch": 3.92, + "learning_rate": 3.0412797646409856e-05, + "loss": 0.8311, + "step": 42610 + }, + { + "epoch": 3.92, + "learning_rate": 3.0408200790659193e-05, + "loss": 0.922, + "step": 42620 + }, + { + "epoch": 3.92, + "learning_rate": 3.0403603934908526e-05, + "loss": 0.7823, + "step": 42630 + }, + { + "epoch": 3.92, + "learning_rate": 3.039900707915786e-05, + "loss": 0.9912, + "step": 42640 + }, + { + "epoch": 3.92, + "learning_rate": 3.0394410223407195e-05, + "loss": 0.7825, + "step": 42650 + }, + { + "epoch": 3.92, + "learning_rate": 3.038981336765652e-05, + "loss": 0.8328, + "step": 42660 + }, + { + "epoch": 3.92, + "learning_rate": 3.0385216511905855e-05, + "loss": 0.8377, + "step": 42670 + }, + { + "epoch": 3.92, + "learning_rate": 3.038061965615519e-05, + "loss": 0.8936, + "step": 42680 + }, + { + "epoch": 3.92, + "learning_rate": 3.0376022800404524e-05, + "loss": 0.8505, + "step": 42690 + }, + { + "epoch": 3.93, + "learning_rate": 3.0371425944653857e-05, + "loss": 0.8924, + "step": 42700 + }, + { + "epoch": 3.93, + "learning_rate": 3.0366829088903194e-05, + "loss": 0.7185, + "step": 42710 + }, + { + "epoch": 3.93, + "learning_rate": 3.0362232233152527e-05, + "loss": 0.9606, + "step": 42720 + }, + { + "epoch": 3.93, + "learning_rate": 3.035763537740186e-05, + "loss": 0.8341, + "step": 42730 + }, + { + "epoch": 3.93, + "learning_rate": 3.0353038521651196e-05, + "loss": 0.9106, + "step": 42740 + }, + { + "epoch": 3.93, + "learning_rate": 3.0348441665900523e-05, + "loss": 0.9915, + "step": 42750 + }, + { + "epoch": 3.93, + "learning_rate": 3.0343844810149856e-05, + "loss": 0.9164, + "step": 42760 + }, + { + "epoch": 3.93, + "learning_rate": 3.0339247954399192e-05, + "loss": 0.8399, + "step": 42770 + }, + { + "epoch": 3.93, + "learning_rate": 3.0334651098648525e-05, + "loss": 0.8349, + "step": 42780 + }, + { + "epoch": 3.93, + "learning_rate": 3.033005424289786e-05, + "loss": 0.9067, + "step": 42790 + }, + { + "epoch": 3.93, + "learning_rate": 3.0325457387147195e-05, + "loss": 0.8488, + "step": 42800 + }, + { + "epoch": 3.94, + "learning_rate": 3.0320860531396528e-05, + "loss": 0.9553, + "step": 42810 + }, + { + "epoch": 3.94, + "learning_rate": 3.031626367564586e-05, + "loss": 0.9252, + "step": 42820 + }, + { + "epoch": 3.94, + "learning_rate": 3.0311666819895194e-05, + "loss": 0.8189, + "step": 42830 + }, + { + "epoch": 3.94, + "learning_rate": 3.0307069964144524e-05, + "loss": 0.9564, + "step": 42840 + }, + { + "epoch": 3.94, + "learning_rate": 3.0302473108393857e-05, + "loss": 0.9466, + "step": 42850 + }, + { + "epoch": 3.94, + "learning_rate": 3.0297876252643194e-05, + "loss": 0.808, + "step": 42860 + }, + { + "epoch": 3.94, + "learning_rate": 3.0293279396892527e-05, + "loss": 0.8741, + "step": 42870 + }, + { + "epoch": 3.94, + "learning_rate": 3.028868254114186e-05, + "loss": 0.8869, + "step": 42880 + }, + { + "epoch": 3.94, + "learning_rate": 3.0284085685391196e-05, + "loss": 0.7599, + "step": 42890 + }, + { + "epoch": 3.94, + "learning_rate": 3.027948882964053e-05, + "loss": 1.0043, + "step": 42900 + }, + { + "epoch": 3.95, + "learning_rate": 3.0274891973889862e-05, + "loss": 0.9561, + "step": 42910 + }, + { + "epoch": 3.95, + "learning_rate": 3.0270295118139195e-05, + "loss": 0.8839, + "step": 42920 + }, + { + "epoch": 3.95, + "learning_rate": 3.0265698262388525e-05, + "loss": 0.9131, + "step": 42930 + }, + { + "epoch": 3.95, + "learning_rate": 3.0261101406637858e-05, + "loss": 0.8582, + "step": 42940 + }, + { + "epoch": 3.95, + "learning_rate": 3.0256504550887195e-05, + "loss": 0.9294, + "step": 42950 + }, + { + "epoch": 3.95, + "learning_rate": 3.0251907695136528e-05, + "loss": 0.88, + "step": 42960 + }, + { + "epoch": 3.95, + "learning_rate": 3.024731083938586e-05, + "loss": 0.8744, + "step": 42970 + }, + { + "epoch": 3.95, + "learning_rate": 3.0242713983635197e-05, + "loss": 0.8961, + "step": 42980 + }, + { + "epoch": 3.95, + "learning_rate": 3.023811712788453e-05, + "loss": 0.8908, + "step": 42990 + }, + { + "epoch": 3.95, + "learning_rate": 3.0233520272133864e-05, + "loss": 0.8268, + "step": 43000 + }, + { + "epoch": 3.95, + "learning_rate": 3.0228923416383197e-05, + "loss": 0.8455, + "step": 43010 + }, + { + "epoch": 3.96, + "learning_rate": 3.0224326560632526e-05, + "loss": 0.9264, + "step": 43020 + }, + { + "epoch": 3.96, + "learning_rate": 3.021972970488186e-05, + "loss": 0.9129, + "step": 43030 + }, + { + "epoch": 3.96, + "learning_rate": 3.0215132849131196e-05, + "loss": 0.7947, + "step": 43040 + }, + { + "epoch": 3.96, + "learning_rate": 3.021053599338053e-05, + "loss": 0.8085, + "step": 43050 + }, + { + "epoch": 3.96, + "learning_rate": 3.0205939137629862e-05, + "loss": 0.8246, + "step": 43060 + }, + { + "epoch": 3.96, + "learning_rate": 3.02013422818792e-05, + "loss": 0.9389, + "step": 43070 + }, + { + "epoch": 3.96, + "learning_rate": 3.019674542612853e-05, + "loss": 0.998, + "step": 43080 + }, + { + "epoch": 3.96, + "learning_rate": 3.0192148570377865e-05, + "loss": 1.0268, + "step": 43090 + }, + { + "epoch": 3.96, + "learning_rate": 3.0187551714627198e-05, + "loss": 0.8891, + "step": 43100 + }, + { + "epoch": 3.96, + "learning_rate": 3.0182954858876528e-05, + "loss": 0.8556, + "step": 43110 + }, + { + "epoch": 3.96, + "learning_rate": 3.017835800312586e-05, + "loss": 0.8966, + "step": 43120 + }, + { + "epoch": 3.97, + "learning_rate": 3.0173761147375197e-05, + "loss": 0.8852, + "step": 43130 + }, + { + "epoch": 3.97, + "learning_rate": 3.016916429162453e-05, + "loss": 0.8995, + "step": 43140 + }, + { + "epoch": 3.97, + "learning_rate": 3.0164567435873863e-05, + "loss": 0.8653, + "step": 43150 + }, + { + "epoch": 3.97, + "learning_rate": 3.0159970580123196e-05, + "loss": 0.9161, + "step": 43160 + }, + { + "epoch": 3.97, + "learning_rate": 3.0155373724372533e-05, + "loss": 0.8953, + "step": 43170 + }, + { + "epoch": 3.97, + "learning_rate": 3.0150776868621866e-05, + "loss": 0.8979, + "step": 43180 + }, + { + "epoch": 3.97, + "learning_rate": 3.01461800128712e-05, + "loss": 0.8475, + "step": 43190 + }, + { + "epoch": 3.97, + "learning_rate": 3.014158315712053e-05, + "loss": 0.9134, + "step": 43200 + }, + { + "epoch": 3.97, + "learning_rate": 3.0136986301369862e-05, + "loss": 0.8336, + "step": 43210 + }, + { + "epoch": 3.97, + "learning_rate": 3.0132389445619198e-05, + "loss": 0.9151, + "step": 43220 + }, + { + "epoch": 3.97, + "learning_rate": 3.012779258986853e-05, + "loss": 0.7819, + "step": 43230 + }, + { + "epoch": 3.98, + "learning_rate": 3.0123195734117864e-05, + "loss": 0.8685, + "step": 43240 + }, + { + "epoch": 3.98, + "learning_rate": 3.0118598878367198e-05, + "loss": 0.8687, + "step": 43250 + }, + { + "epoch": 3.98, + "learning_rate": 3.0114002022616534e-05, + "loss": 0.9862, + "step": 43260 + }, + { + "epoch": 3.98, + "learning_rate": 3.0109405166865867e-05, + "loss": 0.933, + "step": 43270 + }, + { + "epoch": 3.98, + "learning_rate": 3.01048083111152e-05, + "loss": 0.8555, + "step": 43280 + }, + { + "epoch": 3.98, + "learning_rate": 3.010021145536453e-05, + "loss": 0.8464, + "step": 43290 + }, + { + "epoch": 3.98, + "learning_rate": 3.0095614599613863e-05, + "loss": 0.8757, + "step": 43300 + }, + { + "epoch": 3.98, + "learning_rate": 3.00910177438632e-05, + "loss": 0.9042, + "step": 43310 + }, + { + "epoch": 3.98, + "learning_rate": 3.0086420888112533e-05, + "loss": 0.9396, + "step": 43320 + }, + { + "epoch": 3.98, + "learning_rate": 3.0081824032361866e-05, + "loss": 0.9152, + "step": 43330 + }, + { + "epoch": 3.98, + "learning_rate": 3.00772271766112e-05, + "loss": 0.9607, + "step": 43340 + }, + { + "epoch": 3.99, + "learning_rate": 3.0072630320860535e-05, + "loss": 0.9476, + "step": 43350 + }, + { + "epoch": 3.99, + "learning_rate": 3.006803346510987e-05, + "loss": 0.9484, + "step": 43360 + }, + { + "epoch": 3.99, + "learning_rate": 3.00634366093592e-05, + "loss": 0.7395, + "step": 43370 + }, + { + "epoch": 3.99, + "learning_rate": 3.005883975360853e-05, + "loss": 0.8743, + "step": 43380 + }, + { + "epoch": 3.99, + "learning_rate": 3.0054242897857864e-05, + "loss": 0.936, + "step": 43390 + }, + { + "epoch": 3.99, + "learning_rate": 3.00496460421072e-05, + "loss": 0.8477, + "step": 43400 + }, + { + "epoch": 3.99, + "learning_rate": 3.0045049186356534e-05, + "loss": 0.8152, + "step": 43410 + }, + { + "epoch": 3.99, + "learning_rate": 3.0040452330605867e-05, + "loss": 0.9444, + "step": 43420 + }, + { + "epoch": 3.99, + "learning_rate": 3.00358554748552e-05, + "loss": 0.8634, + "step": 43430 + }, + { + "epoch": 3.99, + "learning_rate": 3.0031258619104536e-05, + "loss": 0.9626, + "step": 43440 + }, + { + "epoch": 3.99, + "learning_rate": 3.002666176335387e-05, + "loss": 0.834, + "step": 43450 + }, + { + "epoch": 4.0, + "learning_rate": 3.0022064907603203e-05, + "loss": 0.8572, + "step": 43460 + }, + { + "epoch": 4.0, + "learning_rate": 3.0017468051852532e-05, + "loss": 0.8622, + "step": 43470 + }, + { + "epoch": 4.0, + "learning_rate": 3.0012871196101865e-05, + "loss": 0.8662, + "step": 43480 + }, + { + "epoch": 4.0, + "learning_rate": 3.00082743403512e-05, + "loss": 0.8424, + "step": 43490 + }, + { + "epoch": 4.0, + "learning_rate": 3.0003677484600535e-05, + "loss": 0.7547, + "step": 43500 + }, + { + "epoch": 4.0, + "learning_rate": 2.9999080628849868e-05, + "loss": 0.9156, + "step": 43510 + }, + { + "epoch": 4.0, + "learning_rate": 2.99944837730992e-05, + "loss": 0.9816, + "step": 43520 + }, + { + "epoch": 4.0, + "learning_rate": 2.9989886917348538e-05, + "loss": 0.8725, + "step": 43530 + }, + { + "epoch": 4.0, + "learning_rate": 2.998529006159787e-05, + "loss": 1.0294, + "step": 43540 + }, + { + "epoch": 4.0, + "learning_rate": 2.9980693205847204e-05, + "loss": 0.9817, + "step": 43550 + }, + { + "epoch": 4.0, + "learning_rate": 2.9976096350096534e-05, + "loss": 0.9537, + "step": 43560 + }, + { + "epoch": 4.01, + "learning_rate": 2.9971499494345867e-05, + "loss": 0.9162, + "step": 43570 + }, + { + "epoch": 4.01, + "learning_rate": 2.99669026385952e-05, + "loss": 0.9587, + "step": 43580 + }, + { + "epoch": 4.01, + "learning_rate": 2.9962305782844536e-05, + "loss": 0.905, + "step": 43590 + }, + { + "epoch": 4.01, + "learning_rate": 2.995770892709387e-05, + "loss": 0.8672, + "step": 43600 + }, + { + "epoch": 4.01, + "learning_rate": 2.9953112071343202e-05, + "loss": 0.8944, + "step": 43610 + }, + { + "epoch": 4.01, + "learning_rate": 2.994851521559254e-05, + "loss": 0.8947, + "step": 43620 + }, + { + "epoch": 4.01, + "learning_rate": 2.9943918359841872e-05, + "loss": 0.9051, + "step": 43630 + }, + { + "epoch": 4.01, + "learning_rate": 2.9939321504091205e-05, + "loss": 0.888, + "step": 43640 + }, + { + "epoch": 4.01, + "learning_rate": 2.9934724648340535e-05, + "loss": 0.8833, + "step": 43650 + }, + { + "epoch": 4.01, + "learning_rate": 2.9930127792589868e-05, + "loss": 0.8431, + "step": 43660 + }, + { + "epoch": 4.01, + "learning_rate": 2.99255309368392e-05, + "loss": 0.8909, + "step": 43670 + }, + { + "epoch": 4.02, + "learning_rate": 2.9920934081088537e-05, + "loss": 0.8028, + "step": 43680 + }, + { + "epoch": 4.02, + "learning_rate": 2.991633722533787e-05, + "loss": 0.8323, + "step": 43690 + }, + { + "epoch": 4.02, + "learning_rate": 2.9911740369587204e-05, + "loss": 0.9193, + "step": 43700 + }, + { + "epoch": 4.02, + "learning_rate": 2.990714351383654e-05, + "loss": 1.002, + "step": 43710 + }, + { + "epoch": 4.02, + "learning_rate": 2.9902546658085873e-05, + "loss": 0.9419, + "step": 43720 + }, + { + "epoch": 4.02, + "learning_rate": 2.9897949802335206e-05, + "loss": 0.8497, + "step": 43730 + }, + { + "epoch": 4.02, + "learning_rate": 2.9893352946584536e-05, + "loss": 0.9422, + "step": 43740 + }, + { + "epoch": 4.02, + "learning_rate": 2.988875609083387e-05, + "loss": 0.836, + "step": 43750 + }, + { + "epoch": 4.02, + "learning_rate": 2.9884159235083202e-05, + "loss": 0.8739, + "step": 43760 + }, + { + "epoch": 4.02, + "learning_rate": 2.987956237933254e-05, + "loss": 0.9939, + "step": 43770 + }, + { + "epoch": 4.03, + "learning_rate": 2.987496552358187e-05, + "loss": 0.868, + "step": 43780 + }, + { + "epoch": 4.03, + "learning_rate": 2.9870368667831205e-05, + "loss": 0.9282, + "step": 43790 + }, + { + "epoch": 4.03, + "learning_rate": 2.986577181208054e-05, + "loss": 0.8898, + "step": 43800 + }, + { + "epoch": 4.03, + "learning_rate": 2.9861174956329874e-05, + "loss": 0.7745, + "step": 43810 + }, + { + "epoch": 4.03, + "learning_rate": 2.9856578100579207e-05, + "loss": 0.9487, + "step": 43820 + }, + { + "epoch": 4.03, + "learning_rate": 2.9851981244828537e-05, + "loss": 0.8048, + "step": 43830 + }, + { + "epoch": 4.03, + "learning_rate": 2.984738438907787e-05, + "loss": 0.8449, + "step": 43840 + }, + { + "epoch": 4.03, + "learning_rate": 2.9842787533327203e-05, + "loss": 0.9738, + "step": 43850 + }, + { + "epoch": 4.03, + "learning_rate": 2.983819067757654e-05, + "loss": 0.8115, + "step": 43860 + }, + { + "epoch": 4.03, + "learning_rate": 2.9833593821825873e-05, + "loss": 1.0735, + "step": 43870 + }, + { + "epoch": 4.03, + "learning_rate": 2.9828996966075206e-05, + "loss": 0.7842, + "step": 43880 + }, + { + "epoch": 4.04, + "learning_rate": 2.9824400110324542e-05, + "loss": 0.8472, + "step": 43890 + }, + { + "epoch": 4.04, + "learning_rate": 2.9819803254573875e-05, + "loss": 0.8473, + "step": 43900 + }, + { + "epoch": 4.04, + "learning_rate": 2.981520639882321e-05, + "loss": 0.8801, + "step": 43910 + }, + { + "epoch": 4.04, + "learning_rate": 2.9810609543072538e-05, + "loss": 0.9102, + "step": 43920 + }, + { + "epoch": 4.04, + "learning_rate": 2.980601268732187e-05, + "loss": 0.8883, + "step": 43930 + }, + { + "epoch": 4.04, + "learning_rate": 2.9801415831571204e-05, + "loss": 0.8694, + "step": 43940 + }, + { + "epoch": 4.04, + "learning_rate": 2.979681897582054e-05, + "loss": 0.9237, + "step": 43950 + }, + { + "epoch": 4.04, + "learning_rate": 2.9792222120069874e-05, + "loss": 0.9043, + "step": 43960 + }, + { + "epoch": 4.04, + "learning_rate": 2.9787625264319207e-05, + "loss": 0.9667, + "step": 43970 + }, + { + "epoch": 4.04, + "learning_rate": 2.978302840856854e-05, + "loss": 0.938, + "step": 43980 + }, + { + "epoch": 4.04, + "learning_rate": 2.9778431552817877e-05, + "loss": 0.8549, + "step": 43990 + }, + { + "epoch": 4.05, + "learning_rate": 2.977383469706721e-05, + "loss": 0.9912, + "step": 44000 + }, + { + "epoch": 4.05, + "learning_rate": 2.976923784131654e-05, + "loss": 0.8521, + "step": 44010 + }, + { + "epoch": 4.05, + "learning_rate": 2.9764640985565873e-05, + "loss": 0.8274, + "step": 44020 + }, + { + "epoch": 4.05, + "learning_rate": 2.9760044129815206e-05, + "loss": 0.9357, + "step": 44030 + }, + { + "epoch": 4.05, + "learning_rate": 2.9755447274064542e-05, + "loss": 0.8905, + "step": 44040 + }, + { + "epoch": 4.05, + "learning_rate": 2.9750850418313875e-05, + "loss": 0.9101, + "step": 44050 + }, + { + "epoch": 4.05, + "learning_rate": 2.974625356256321e-05, + "loss": 0.9312, + "step": 44060 + }, + { + "epoch": 4.05, + "learning_rate": 2.974165670681254e-05, + "loss": 0.857, + "step": 44070 + }, + { + "epoch": 4.05, + "learning_rate": 2.9737059851061878e-05, + "loss": 0.9014, + "step": 44080 + }, + { + "epoch": 4.05, + "learning_rate": 2.973246299531121e-05, + "loss": 0.8814, + "step": 44090 + }, + { + "epoch": 4.05, + "learning_rate": 2.972786613956054e-05, + "loss": 0.9555, + "step": 44100 + }, + { + "epoch": 4.06, + "learning_rate": 2.9723269283809874e-05, + "loss": 0.8838, + "step": 44110 + }, + { + "epoch": 4.06, + "learning_rate": 2.9718672428059207e-05, + "loss": 0.8776, + "step": 44120 + }, + { + "epoch": 4.06, + "learning_rate": 2.9714075572308543e-05, + "loss": 0.8877, + "step": 44130 + }, + { + "epoch": 4.06, + "learning_rate": 2.9709478716557876e-05, + "loss": 0.8879, + "step": 44140 + }, + { + "epoch": 4.06, + "learning_rate": 2.970488186080721e-05, + "loss": 0.9778, + "step": 44150 + }, + { + "epoch": 4.06, + "learning_rate": 2.9700285005056543e-05, + "loss": 0.8975, + "step": 44160 + }, + { + "epoch": 4.06, + "learning_rate": 2.969568814930588e-05, + "loss": 0.8363, + "step": 44170 + }, + { + "epoch": 4.06, + "learning_rate": 2.9691091293555212e-05, + "loss": 0.9667, + "step": 44180 + }, + { + "epoch": 4.06, + "learning_rate": 2.9686494437804542e-05, + "loss": 0.9142, + "step": 44190 + }, + { + "epoch": 4.06, + "learning_rate": 2.9681897582053875e-05, + "loss": 0.8858, + "step": 44200 + }, + { + "epoch": 4.06, + "learning_rate": 2.9677300726303208e-05, + "loss": 0.8257, + "step": 44210 + }, + { + "epoch": 4.07, + "learning_rate": 2.9672703870552545e-05, + "loss": 0.9642, + "step": 44220 + }, + { + "epoch": 4.07, + "learning_rate": 2.9668107014801878e-05, + "loss": 0.9065, + "step": 44230 + }, + { + "epoch": 4.07, + "learning_rate": 2.966351015905121e-05, + "loss": 1.0083, + "step": 44240 + }, + { + "epoch": 4.07, + "learning_rate": 2.9658913303300544e-05, + "loss": 0.8783, + "step": 44250 + }, + { + "epoch": 4.07, + "learning_rate": 2.965431644754988e-05, + "loss": 0.8379, + "step": 44260 + }, + { + "epoch": 4.07, + "learning_rate": 2.9649719591799213e-05, + "loss": 0.9289, + "step": 44270 + }, + { + "epoch": 4.07, + "learning_rate": 2.9645122736048543e-05, + "loss": 0.8475, + "step": 44280 + }, + { + "epoch": 4.07, + "learning_rate": 2.9640525880297876e-05, + "loss": 0.9395, + "step": 44290 + }, + { + "epoch": 4.07, + "learning_rate": 2.963592902454721e-05, + "loss": 0.9234, + "step": 44300 + }, + { + "epoch": 4.07, + "learning_rate": 2.9631332168796542e-05, + "loss": 0.91, + "step": 44310 + }, + { + "epoch": 4.07, + "learning_rate": 2.962673531304588e-05, + "loss": 0.8589, + "step": 44320 + }, + { + "epoch": 4.08, + "learning_rate": 2.9622138457295212e-05, + "loss": 0.7401, + "step": 44330 + }, + { + "epoch": 4.08, + "learning_rate": 2.9617541601544545e-05, + "loss": 0.9458, + "step": 44340 + }, + { + "epoch": 4.08, + "learning_rate": 2.961294474579388e-05, + "loss": 0.9489, + "step": 44350 + }, + { + "epoch": 4.08, + "learning_rate": 2.9608347890043215e-05, + "loss": 0.836, + "step": 44360 + }, + { + "epoch": 4.08, + "learning_rate": 2.9603751034292544e-05, + "loss": 1.0322, + "step": 44370 + }, + { + "epoch": 4.08, + "learning_rate": 2.9599154178541877e-05, + "loss": 0.7793, + "step": 44380 + }, + { + "epoch": 4.08, + "learning_rate": 2.959455732279121e-05, + "loss": 0.8548, + "step": 44390 + }, + { + "epoch": 4.08, + "learning_rate": 2.9589960467040544e-05, + "loss": 0.7747, + "step": 44400 + }, + { + "epoch": 4.08, + "learning_rate": 2.958536361128988e-05, + "loss": 0.9053, + "step": 44410 + }, + { + "epoch": 4.08, + "learning_rate": 2.9580766755539213e-05, + "loss": 0.8873, + "step": 44420 + }, + { + "epoch": 4.08, + "learning_rate": 2.9576169899788546e-05, + "loss": 0.8838, + "step": 44430 + }, + { + "epoch": 4.09, + "learning_rate": 2.9571573044037883e-05, + "loss": 0.8129, + "step": 44440 + }, + { + "epoch": 4.09, + "learning_rate": 2.9566976188287216e-05, + "loss": 0.8477, + "step": 44450 + }, + { + "epoch": 4.09, + "learning_rate": 2.9562379332536545e-05, + "loss": 0.938, + "step": 44460 + }, + { + "epoch": 4.09, + "learning_rate": 2.955778247678588e-05, + "loss": 0.9835, + "step": 44470 + }, + { + "epoch": 4.09, + "learning_rate": 2.955318562103521e-05, + "loss": 0.9287, + "step": 44480 + }, + { + "epoch": 4.09, + "learning_rate": 2.9548588765284545e-05, + "loss": 0.8399, + "step": 44490 + }, + { + "epoch": 4.09, + "learning_rate": 2.954399190953388e-05, + "loss": 0.888, + "step": 44500 + }, + { + "epoch": 4.09, + "learning_rate": 2.9539395053783214e-05, + "loss": 0.9236, + "step": 44510 + }, + { + "epoch": 4.09, + "learning_rate": 2.9534798198032547e-05, + "loss": 0.814, + "step": 44520 + }, + { + "epoch": 4.09, + "learning_rate": 2.9530201342281884e-05, + "loss": 0.8212, + "step": 44530 + }, + { + "epoch": 4.09, + "learning_rate": 2.9525604486531217e-05, + "loss": 0.8162, + "step": 44540 + }, + { + "epoch": 4.1, + "learning_rate": 2.9521007630780547e-05, + "loss": 0.9067, + "step": 44550 + }, + { + "epoch": 4.1, + "learning_rate": 2.951641077502988e-05, + "loss": 0.8105, + "step": 44560 + }, + { + "epoch": 4.1, + "learning_rate": 2.9511813919279213e-05, + "loss": 0.8814, + "step": 44570 + }, + { + "epoch": 4.1, + "learning_rate": 2.9507217063528546e-05, + "loss": 0.989, + "step": 44580 + }, + { + "epoch": 4.1, + "learning_rate": 2.9502620207777882e-05, + "loss": 0.9304, + "step": 44590 + }, + { + "epoch": 4.1, + "learning_rate": 2.9498023352027215e-05, + "loss": 1.0213, + "step": 44600 + }, + { + "epoch": 4.1, + "learning_rate": 2.949342649627655e-05, + "loss": 0.8219, + "step": 44610 + }, + { + "epoch": 4.1, + "learning_rate": 2.9488829640525885e-05, + "loss": 0.7803, + "step": 44620 + }, + { + "epoch": 4.1, + "learning_rate": 2.9484232784775218e-05, + "loss": 0.8794, + "step": 44630 + }, + { + "epoch": 4.1, + "learning_rate": 2.9479635929024544e-05, + "loss": 0.951, + "step": 44640 + }, + { + "epoch": 4.1, + "learning_rate": 2.947503907327388e-05, + "loss": 0.7624, + "step": 44650 + }, + { + "epoch": 4.11, + "learning_rate": 2.9470442217523214e-05, + "loss": 0.8456, + "step": 44660 + }, + { + "epoch": 4.11, + "learning_rate": 2.9465845361772547e-05, + "loss": 0.9056, + "step": 44670 + }, + { + "epoch": 4.11, + "learning_rate": 2.9461248506021884e-05, + "loss": 0.8483, + "step": 44680 + }, + { + "epoch": 4.11, + "learning_rate": 2.9456651650271217e-05, + "loss": 0.8666, + "step": 44690 + }, + { + "epoch": 4.11, + "learning_rate": 2.945205479452055e-05, + "loss": 0.9194, + "step": 44700 + }, + { + "epoch": 4.11, + "learning_rate": 2.9447457938769886e-05, + "loss": 0.9423, + "step": 44710 + }, + { + "epoch": 4.11, + "learning_rate": 2.944286108301922e-05, + "loss": 0.8534, + "step": 44720 + }, + { + "epoch": 4.11, + "learning_rate": 2.9438264227268546e-05, + "loss": 0.9967, + "step": 44730 + }, + { + "epoch": 4.11, + "learning_rate": 2.9433667371517882e-05, + "loss": 0.9155, + "step": 44740 + }, + { + "epoch": 4.11, + "learning_rate": 2.9429070515767215e-05, + "loss": 0.9448, + "step": 44750 + }, + { + "epoch": 4.12, + "learning_rate": 2.942447366001655e-05, + "loss": 0.8542, + "step": 44760 + }, + { + "epoch": 4.12, + "learning_rate": 2.9419876804265885e-05, + "loss": 0.8171, + "step": 44770 + }, + { + "epoch": 4.12, + "learning_rate": 2.9415279948515218e-05, + "loss": 0.8834, + "step": 44780 + }, + { + "epoch": 4.12, + "learning_rate": 2.941068309276455e-05, + "loss": 0.9426, + "step": 44790 + }, + { + "epoch": 4.12, + "learning_rate": 2.9406086237013884e-05, + "loss": 0.868, + "step": 44800 + }, + { + "epoch": 4.12, + "learning_rate": 2.940148938126322e-05, + "loss": 0.8253, + "step": 44810 + }, + { + "epoch": 4.12, + "learning_rate": 2.9396892525512547e-05, + "loss": 0.9652, + "step": 44820 + }, + { + "epoch": 4.12, + "learning_rate": 2.9392295669761883e-05, + "loss": 0.869, + "step": 44830 + }, + { + "epoch": 4.12, + "learning_rate": 2.9387698814011216e-05, + "loss": 0.7678, + "step": 44840 + }, + { + "epoch": 4.12, + "learning_rate": 2.938310195826055e-05, + "loss": 0.9411, + "step": 44850 + }, + { + "epoch": 4.12, + "learning_rate": 2.9378505102509886e-05, + "loss": 0.8681, + "step": 44860 + }, + { + "epoch": 4.13, + "learning_rate": 2.937390824675922e-05, + "loss": 0.9634, + "step": 44870 + }, + { + "epoch": 4.13, + "learning_rate": 2.9369311391008552e-05, + "loss": 0.9139, + "step": 44880 + }, + { + "epoch": 4.13, + "learning_rate": 2.9364714535257885e-05, + "loss": 0.8367, + "step": 44890 + }, + { + "epoch": 4.13, + "learning_rate": 2.9360117679507222e-05, + "loss": 0.8018, + "step": 44900 + }, + { + "epoch": 4.13, + "learning_rate": 2.9355520823756548e-05, + "loss": 0.9205, + "step": 44910 + }, + { + "epoch": 4.13, + "learning_rate": 2.9350923968005885e-05, + "loss": 0.7798, + "step": 44920 + }, + { + "epoch": 4.13, + "learning_rate": 2.9346327112255218e-05, + "loss": 0.8618, + "step": 44930 + }, + { + "epoch": 4.13, + "learning_rate": 2.934173025650455e-05, + "loss": 0.8529, + "step": 44940 + }, + { + "epoch": 4.13, + "learning_rate": 2.9337133400753887e-05, + "loss": 0.855, + "step": 44950 + }, + { + "epoch": 4.13, + "learning_rate": 2.933253654500322e-05, + "loss": 0.9378, + "step": 44960 + }, + { + "epoch": 4.13, + "learning_rate": 2.9327939689252553e-05, + "loss": 0.9093, + "step": 44970 + }, + { + "epoch": 4.14, + "learning_rate": 2.9323342833501886e-05, + "loss": 0.9621, + "step": 44980 + }, + { + "epoch": 4.14, + "learning_rate": 2.9318745977751223e-05, + "loss": 0.8816, + "step": 44990 + }, + { + "epoch": 4.14, + "learning_rate": 2.931414912200055e-05, + "loss": 1.0072, + "step": 45000 + }, + { + "epoch": 4.14, + "eval_accuracy": 0.5637554585152839, + "eval_loss": 0.8915139436721802, + "eval_runtime": 160.2386, + "eval_samples_per_second": 28.582, + "eval_steps_per_second": 3.576, + "step": 45000 + }, + { + "epoch": 4.14, + "learning_rate": 2.9309552266249886e-05, + "loss": 0.8402, + "step": 45010 + }, + { + "epoch": 4.14, + "learning_rate": 2.930495541049922e-05, + "loss": 0.8683, + "step": 45020 + }, + { + "epoch": 4.14, + "learning_rate": 2.9300358554748552e-05, + "loss": 0.8742, + "step": 45030 + }, + { + "epoch": 4.14, + "learning_rate": 2.929576169899789e-05, + "loss": 0.9364, + "step": 45040 + }, + { + "epoch": 4.14, + "learning_rate": 2.929116484324722e-05, + "loss": 0.8282, + "step": 45050 + }, + { + "epoch": 4.14, + "learning_rate": 2.9286567987496555e-05, + "loss": 0.8019, + "step": 45060 + }, + { + "epoch": 4.14, + "learning_rate": 2.9281971131745888e-05, + "loss": 0.9511, + "step": 45070 + }, + { + "epoch": 4.14, + "learning_rate": 2.9277374275995224e-05, + "loss": 0.8089, + "step": 45080 + }, + { + "epoch": 4.15, + "learning_rate": 2.927277742024455e-05, + "loss": 0.8325, + "step": 45090 + }, + { + "epoch": 4.15, + "learning_rate": 2.9268180564493887e-05, + "loss": 0.8496, + "step": 45100 + }, + { + "epoch": 4.15, + "learning_rate": 2.926358370874322e-05, + "loss": 0.8471, + "step": 45110 + }, + { + "epoch": 4.15, + "learning_rate": 2.9258986852992553e-05, + "loss": 0.9354, + "step": 45120 + }, + { + "epoch": 4.15, + "learning_rate": 2.9254389997241886e-05, + "loss": 0.8697, + "step": 45130 + }, + { + "epoch": 4.15, + "learning_rate": 2.9249793141491223e-05, + "loss": 0.9789, + "step": 45140 + }, + { + "epoch": 4.15, + "learning_rate": 2.9245196285740556e-05, + "loss": 0.9036, + "step": 45150 + }, + { + "epoch": 4.15, + "learning_rate": 2.924059942998989e-05, + "loss": 0.8226, + "step": 45160 + }, + { + "epoch": 4.15, + "learning_rate": 2.9236002574239225e-05, + "loss": 0.8475, + "step": 45170 + }, + { + "epoch": 4.15, + "learning_rate": 2.923140571848855e-05, + "loss": 0.9216, + "step": 45180 + }, + { + "epoch": 4.15, + "learning_rate": 2.9226808862737888e-05, + "loss": 0.7838, + "step": 45190 + }, + { + "epoch": 4.16, + "learning_rate": 2.922221200698722e-05, + "loss": 0.9608, + "step": 45200 + }, + { + "epoch": 4.16, + "learning_rate": 2.9217615151236554e-05, + "loss": 0.9213, + "step": 45210 + }, + { + "epoch": 4.16, + "learning_rate": 2.9213018295485887e-05, + "loss": 0.8651, + "step": 45220 + }, + { + "epoch": 4.16, + "learning_rate": 2.9208421439735224e-05, + "loss": 0.8244, + "step": 45230 + }, + { + "epoch": 4.16, + "learning_rate": 2.9203824583984557e-05, + "loss": 0.7939, + "step": 45240 + }, + { + "epoch": 4.16, + "learning_rate": 2.919922772823389e-05, + "loss": 0.8044, + "step": 45250 + }, + { + "epoch": 4.16, + "learning_rate": 2.9194630872483227e-05, + "loss": 0.8593, + "step": 45260 + }, + { + "epoch": 4.16, + "learning_rate": 2.9190034016732553e-05, + "loss": 0.9031, + "step": 45270 + }, + { + "epoch": 4.16, + "learning_rate": 2.918543716098189e-05, + "loss": 0.8773, + "step": 45280 + }, + { + "epoch": 4.16, + "learning_rate": 2.9180840305231222e-05, + "loss": 0.86, + "step": 45290 + }, + { + "epoch": 4.16, + "learning_rate": 2.9176243449480555e-05, + "loss": 0.8204, + "step": 45300 + }, + { + "epoch": 4.17, + "learning_rate": 2.917164659372989e-05, + "loss": 0.9093, + "step": 45310 + }, + { + "epoch": 4.17, + "learning_rate": 2.9167049737979225e-05, + "loss": 0.7821, + "step": 45320 + }, + { + "epoch": 4.17, + "learning_rate": 2.9162452882228558e-05, + "loss": 1.0377, + "step": 45330 + }, + { + "epoch": 4.17, + "learning_rate": 2.915785602647789e-05, + "loss": 0.946, + "step": 45340 + }, + { + "epoch": 4.17, + "learning_rate": 2.9153259170727228e-05, + "loss": 0.8228, + "step": 45350 + }, + { + "epoch": 4.17, + "learning_rate": 2.9148662314976554e-05, + "loss": 0.9079, + "step": 45360 + }, + { + "epoch": 4.17, + "learning_rate": 2.914406545922589e-05, + "loss": 0.9341, + "step": 45370 + }, + { + "epoch": 4.17, + "learning_rate": 2.9139468603475224e-05, + "loss": 0.8577, + "step": 45380 + }, + { + "epoch": 4.17, + "learning_rate": 2.9134871747724557e-05, + "loss": 1.0103, + "step": 45390 + }, + { + "epoch": 4.17, + "learning_rate": 2.913027489197389e-05, + "loss": 0.822, + "step": 45400 + }, + { + "epoch": 4.17, + "learning_rate": 2.9125678036223226e-05, + "loss": 0.917, + "step": 45410 + }, + { + "epoch": 4.18, + "learning_rate": 2.912108118047256e-05, + "loss": 0.8093, + "step": 45420 + }, + { + "epoch": 4.18, + "learning_rate": 2.9116484324721892e-05, + "loss": 0.9749, + "step": 45430 + }, + { + "epoch": 4.18, + "learning_rate": 2.911188746897123e-05, + "loss": 0.7892, + "step": 45440 + }, + { + "epoch": 4.18, + "learning_rate": 2.9107290613220555e-05, + "loss": 1.0393, + "step": 45450 + }, + { + "epoch": 4.18, + "learning_rate": 2.910269375746989e-05, + "loss": 0.8884, + "step": 45460 + }, + { + "epoch": 4.18, + "learning_rate": 2.9098096901719225e-05, + "loss": 0.9458, + "step": 45470 + }, + { + "epoch": 4.18, + "learning_rate": 2.9093500045968558e-05, + "loss": 0.8521, + "step": 45480 + }, + { + "epoch": 4.18, + "learning_rate": 2.908890319021789e-05, + "loss": 0.8889, + "step": 45490 + }, + { + "epoch": 4.18, + "learning_rate": 2.9084306334467227e-05, + "loss": 0.8185, + "step": 45500 + }, + { + "epoch": 4.18, + "learning_rate": 2.907970947871656e-05, + "loss": 0.8895, + "step": 45510 + }, + { + "epoch": 4.18, + "learning_rate": 2.9075112622965894e-05, + "loss": 0.8546, + "step": 45520 + }, + { + "epoch": 4.19, + "learning_rate": 2.9070515767215227e-05, + "loss": 0.905, + "step": 45530 + }, + { + "epoch": 4.19, + "learning_rate": 2.9065918911464556e-05, + "loss": 0.8038, + "step": 45540 + }, + { + "epoch": 4.19, + "learning_rate": 2.906132205571389e-05, + "loss": 0.8871, + "step": 45550 + }, + { + "epoch": 4.19, + "learning_rate": 2.9056725199963226e-05, + "loss": 0.8666, + "step": 45560 + }, + { + "epoch": 4.19, + "learning_rate": 2.905212834421256e-05, + "loss": 0.8744, + "step": 45570 + }, + { + "epoch": 4.19, + "learning_rate": 2.9047531488461892e-05, + "loss": 0.8089, + "step": 45580 + }, + { + "epoch": 4.19, + "learning_rate": 2.904293463271123e-05, + "loss": 0.8781, + "step": 45590 + }, + { + "epoch": 4.19, + "learning_rate": 2.9038337776960562e-05, + "loss": 0.8753, + "step": 45600 + }, + { + "epoch": 4.19, + "learning_rate": 2.9033740921209895e-05, + "loss": 0.679, + "step": 45610 + }, + { + "epoch": 4.19, + "learning_rate": 2.9029144065459228e-05, + "loss": 0.8873, + "step": 45620 + }, + { + "epoch": 4.2, + "learning_rate": 2.9024547209708558e-05, + "loss": 0.8534, + "step": 45630 + }, + { + "epoch": 4.2, + "learning_rate": 2.901995035395789e-05, + "loss": 0.8848, + "step": 45640 + }, + { + "epoch": 4.2, + "learning_rate": 2.9015353498207227e-05, + "loss": 0.8519, + "step": 45650 + }, + { + "epoch": 4.2, + "learning_rate": 2.901075664245656e-05, + "loss": 0.811, + "step": 45660 + }, + { + "epoch": 4.2, + "learning_rate": 2.9006159786705893e-05, + "loss": 0.893, + "step": 45670 + }, + { + "epoch": 4.2, + "learning_rate": 2.900156293095523e-05, + "loss": 0.8887, + "step": 45680 + }, + { + "epoch": 4.2, + "learning_rate": 2.8996966075204563e-05, + "loss": 0.8748, + "step": 45690 + }, + { + "epoch": 4.2, + "learning_rate": 2.8992369219453896e-05, + "loss": 0.8665, + "step": 45700 + }, + { + "epoch": 4.2, + "learning_rate": 2.898777236370323e-05, + "loss": 0.8046, + "step": 45710 + }, + { + "epoch": 4.2, + "learning_rate": 2.898317550795256e-05, + "loss": 0.9339, + "step": 45720 + }, + { + "epoch": 4.2, + "learning_rate": 2.8978578652201892e-05, + "loss": 0.9129, + "step": 45730 + }, + { + "epoch": 4.21, + "learning_rate": 2.897398179645123e-05, + "loss": 0.9049, + "step": 45740 + }, + { + "epoch": 4.21, + "learning_rate": 2.896938494070056e-05, + "loss": 0.8198, + "step": 45750 + }, + { + "epoch": 4.21, + "learning_rate": 2.8964788084949895e-05, + "loss": 0.9131, + "step": 45760 + }, + { + "epoch": 4.21, + "learning_rate": 2.896019122919923e-05, + "loss": 0.8828, + "step": 45770 + }, + { + "epoch": 4.21, + "learning_rate": 2.8955594373448564e-05, + "loss": 0.87, + "step": 45780 + }, + { + "epoch": 4.21, + "learning_rate": 2.8950997517697897e-05, + "loss": 0.8534, + "step": 45790 + }, + { + "epoch": 4.21, + "learning_rate": 2.894640066194723e-05, + "loss": 0.8454, + "step": 45800 + }, + { + "epoch": 4.21, + "learning_rate": 2.894180380619656e-05, + "loss": 0.8951, + "step": 45810 + }, + { + "epoch": 4.21, + "learning_rate": 2.8937206950445893e-05, + "loss": 0.9974, + "step": 45820 + }, + { + "epoch": 4.21, + "learning_rate": 2.893261009469523e-05, + "loss": 0.8434, + "step": 45830 + }, + { + "epoch": 4.21, + "learning_rate": 2.8928013238944563e-05, + "loss": 0.8201, + "step": 45840 + }, + { + "epoch": 4.22, + "learning_rate": 2.8923416383193896e-05, + "loss": 0.8596, + "step": 45850 + }, + { + "epoch": 4.22, + "learning_rate": 2.8918819527443232e-05, + "loss": 0.7693, + "step": 45860 + }, + { + "epoch": 4.22, + "learning_rate": 2.8914222671692565e-05, + "loss": 1.0439, + "step": 45870 + }, + { + "epoch": 4.22, + "learning_rate": 2.89096258159419e-05, + "loss": 0.7455, + "step": 45880 + }, + { + "epoch": 4.22, + "learning_rate": 2.890502896019123e-05, + "loss": 0.9354, + "step": 45890 + }, + { + "epoch": 4.22, + "learning_rate": 2.8900432104440568e-05, + "loss": 0.8932, + "step": 45900 + }, + { + "epoch": 4.22, + "learning_rate": 2.8895835248689894e-05, + "loss": 0.8291, + "step": 45910 + }, + { + "epoch": 4.22, + "learning_rate": 2.889123839293923e-05, + "loss": 0.8358, + "step": 45920 + }, + { + "epoch": 4.22, + "learning_rate": 2.8886641537188564e-05, + "loss": 0.8867, + "step": 45930 + }, + { + "epoch": 4.22, + "learning_rate": 2.8882044681437897e-05, + "loss": 0.9469, + "step": 45940 + }, + { + "epoch": 4.22, + "learning_rate": 2.887744782568723e-05, + "loss": 0.7835, + "step": 45950 + }, + { + "epoch": 4.23, + "learning_rate": 2.8872850969936567e-05, + "loss": 0.8752, + "step": 45960 + }, + { + "epoch": 4.23, + "learning_rate": 2.88682541141859e-05, + "loss": 0.932, + "step": 45970 + }, + { + "epoch": 4.23, + "learning_rate": 2.8863657258435233e-05, + "loss": 0.8932, + "step": 45980 + }, + { + "epoch": 4.23, + "learning_rate": 2.885906040268457e-05, + "loss": 0.9243, + "step": 45990 + }, + { + "epoch": 4.23, + "learning_rate": 2.8854463546933895e-05, + "loss": 0.7882, + "step": 46000 + }, + { + "epoch": 4.23, + "learning_rate": 2.8849866691183232e-05, + "loss": 0.8893, + "step": 46010 + }, + { + "epoch": 4.23, + "learning_rate": 2.8845269835432565e-05, + "loss": 0.8427, + "step": 46020 + }, + { + "epoch": 4.23, + "learning_rate": 2.8840672979681898e-05, + "loss": 0.8256, + "step": 46030 + }, + { + "epoch": 4.23, + "learning_rate": 2.883607612393123e-05, + "loss": 0.8124, + "step": 46040 + }, + { + "epoch": 4.23, + "learning_rate": 2.8831479268180568e-05, + "loss": 0.8948, + "step": 46050 + }, + { + "epoch": 4.23, + "learning_rate": 2.88268824124299e-05, + "loss": 0.905, + "step": 46060 + }, + { + "epoch": 4.24, + "learning_rate": 2.8822285556679234e-05, + "loss": 0.8779, + "step": 46070 + }, + { + "epoch": 4.24, + "learning_rate": 2.881768870092857e-05, + "loss": 0.953, + "step": 46080 + }, + { + "epoch": 4.24, + "learning_rate": 2.8813091845177897e-05, + "loss": 0.8714, + "step": 46090 + }, + { + "epoch": 4.24, + "learning_rate": 2.8808494989427233e-05, + "loss": 0.9171, + "step": 46100 + }, + { + "epoch": 4.24, + "learning_rate": 2.8803898133676566e-05, + "loss": 0.9198, + "step": 46110 + }, + { + "epoch": 4.24, + "learning_rate": 2.87993012779259e-05, + "loss": 0.919, + "step": 46120 + }, + { + "epoch": 4.24, + "learning_rate": 2.8794704422175232e-05, + "loss": 0.956, + "step": 46130 + }, + { + "epoch": 4.24, + "learning_rate": 2.879010756642457e-05, + "loss": 0.8256, + "step": 46140 + }, + { + "epoch": 4.24, + "learning_rate": 2.8785510710673902e-05, + "loss": 0.858, + "step": 46150 + }, + { + "epoch": 4.24, + "learning_rate": 2.8780913854923235e-05, + "loss": 0.9408, + "step": 46160 + }, + { + "epoch": 4.24, + "learning_rate": 2.877631699917257e-05, + "loss": 1.0134, + "step": 46170 + }, + { + "epoch": 4.25, + "learning_rate": 2.8771720143421898e-05, + "loss": 0.8125, + "step": 46180 + }, + { + "epoch": 4.25, + "learning_rate": 2.8767123287671234e-05, + "loss": 0.8345, + "step": 46190 + }, + { + "epoch": 4.25, + "learning_rate": 2.8762526431920567e-05, + "loss": 0.9336, + "step": 46200 + }, + { + "epoch": 4.25, + "learning_rate": 2.87579295761699e-05, + "loss": 0.8685, + "step": 46210 + }, + { + "epoch": 4.25, + "learning_rate": 2.8753332720419234e-05, + "loss": 1.0485, + "step": 46220 + }, + { + "epoch": 4.25, + "learning_rate": 2.874873586466857e-05, + "loss": 0.8721, + "step": 46230 + }, + { + "epoch": 4.25, + "learning_rate": 2.8744139008917903e-05, + "loss": 0.8702, + "step": 46240 + }, + { + "epoch": 4.25, + "learning_rate": 2.8739542153167236e-05, + "loss": 0.7427, + "step": 46250 + }, + { + "epoch": 4.25, + "learning_rate": 2.8734945297416573e-05, + "loss": 0.8843, + "step": 46260 + }, + { + "epoch": 4.25, + "learning_rate": 2.87303484416659e-05, + "loss": 0.9359, + "step": 46270 + }, + { + "epoch": 4.25, + "learning_rate": 2.8725751585915232e-05, + "loss": 0.8657, + "step": 46280 + }, + { + "epoch": 4.26, + "learning_rate": 2.872115473016457e-05, + "loss": 0.9032, + "step": 46290 + }, + { + "epoch": 4.26, + "learning_rate": 2.8716557874413902e-05, + "loss": 0.8536, + "step": 46300 + }, + { + "epoch": 4.26, + "learning_rate": 2.8711961018663235e-05, + "loss": 0.9287, + "step": 46310 + }, + { + "epoch": 4.26, + "learning_rate": 2.870736416291257e-05, + "loss": 0.8799, + "step": 46320 + }, + { + "epoch": 4.26, + "learning_rate": 2.8702767307161904e-05, + "loss": 0.8899, + "step": 46330 + }, + { + "epoch": 4.26, + "learning_rate": 2.8698170451411237e-05, + "loss": 0.7844, + "step": 46340 + }, + { + "epoch": 4.26, + "learning_rate": 2.869357359566057e-05, + "loss": 0.8816, + "step": 46350 + }, + { + "epoch": 4.26, + "learning_rate": 2.86889767399099e-05, + "loss": 0.9764, + "step": 46360 + }, + { + "epoch": 4.26, + "learning_rate": 2.8684379884159233e-05, + "loss": 0.8704, + "step": 46370 + }, + { + "epoch": 4.26, + "learning_rate": 2.867978302840857e-05, + "loss": 0.9101, + "step": 46380 + }, + { + "epoch": 4.26, + "learning_rate": 2.8675186172657903e-05, + "loss": 0.8332, + "step": 46390 + }, + { + "epoch": 4.27, + "learning_rate": 2.8670589316907236e-05, + "loss": 0.922, + "step": 46400 + }, + { + "epoch": 4.27, + "learning_rate": 2.8665992461156572e-05, + "loss": 0.7925, + "step": 46410 + }, + { + "epoch": 4.27, + "learning_rate": 2.8661395605405906e-05, + "loss": 0.9326, + "step": 46420 + }, + { + "epoch": 4.27, + "learning_rate": 2.865679874965524e-05, + "loss": 1.0456, + "step": 46430 + }, + { + "epoch": 4.27, + "learning_rate": 2.8652201893904572e-05, + "loss": 0.8699, + "step": 46440 + }, + { + "epoch": 4.27, + "learning_rate": 2.86476050381539e-05, + "loss": 0.8291, + "step": 46450 + }, + { + "epoch": 4.27, + "learning_rate": 2.8643008182403235e-05, + "loss": 0.9095, + "step": 46460 + }, + { + "epoch": 4.27, + "learning_rate": 2.863841132665257e-05, + "loss": 0.8876, + "step": 46470 + }, + { + "epoch": 4.27, + "learning_rate": 2.8633814470901904e-05, + "loss": 0.8033, + "step": 46480 + }, + { + "epoch": 4.27, + "learning_rate": 2.8629217615151237e-05, + "loss": 0.8633, + "step": 46490 + }, + { + "epoch": 4.28, + "learning_rate": 2.8624620759400574e-05, + "loss": 0.8178, + "step": 46500 + }, + { + "epoch": 4.28, + "learning_rate": 2.8620023903649907e-05, + "loss": 0.9744, + "step": 46510 + }, + { + "epoch": 4.28, + "learning_rate": 2.861542704789924e-05, + "loss": 0.9828, + "step": 46520 + }, + { + "epoch": 4.28, + "learning_rate": 2.8610830192148573e-05, + "loss": 0.89, + "step": 46530 + }, + { + "epoch": 4.28, + "learning_rate": 2.8606233336397903e-05, + "loss": 0.9365, + "step": 46540 + }, + { + "epoch": 4.28, + "learning_rate": 2.8601636480647236e-05, + "loss": 0.9177, + "step": 46550 + }, + { + "epoch": 4.28, + "learning_rate": 2.8597039624896572e-05, + "loss": 0.778, + "step": 46560 + }, + { + "epoch": 4.28, + "learning_rate": 2.8592442769145905e-05, + "loss": 0.8607, + "step": 46570 + }, + { + "epoch": 4.28, + "learning_rate": 2.858784591339524e-05, + "loss": 0.7784, + "step": 46580 + }, + { + "epoch": 4.28, + "learning_rate": 2.8583249057644575e-05, + "loss": 0.9981, + "step": 46590 + }, + { + "epoch": 4.28, + "learning_rate": 2.8578652201893908e-05, + "loss": 0.8495, + "step": 46600 + }, + { + "epoch": 4.29, + "learning_rate": 2.857405534614324e-05, + "loss": 1.0469, + "step": 46610 + }, + { + "epoch": 4.29, + "learning_rate": 2.8569458490392574e-05, + "loss": 0.8481, + "step": 46620 + }, + { + "epoch": 4.29, + "learning_rate": 2.8564861634641904e-05, + "loss": 0.8666, + "step": 46630 + }, + { + "epoch": 4.29, + "learning_rate": 2.8560264778891237e-05, + "loss": 0.8656, + "step": 46640 + }, + { + "epoch": 4.29, + "learning_rate": 2.8555667923140573e-05, + "loss": 0.8738, + "step": 46650 + }, + { + "epoch": 4.29, + "learning_rate": 2.8551071067389907e-05, + "loss": 0.8721, + "step": 46660 + }, + { + "epoch": 4.29, + "learning_rate": 2.854647421163924e-05, + "loss": 0.7976, + "step": 46670 + }, + { + "epoch": 4.29, + "learning_rate": 2.8541877355888573e-05, + "loss": 1.0445, + "step": 46680 + }, + { + "epoch": 4.29, + "learning_rate": 2.853728050013791e-05, + "loss": 0.92, + "step": 46690 + }, + { + "epoch": 4.29, + "learning_rate": 2.8532683644387242e-05, + "loss": 0.9039, + "step": 46700 + }, + { + "epoch": 4.29, + "learning_rate": 2.8528086788636575e-05, + "loss": 0.899, + "step": 46710 + }, + { + "epoch": 4.3, + "learning_rate": 2.8523489932885905e-05, + "loss": 0.8439, + "step": 46720 + }, + { + "epoch": 4.3, + "learning_rate": 2.8518893077135238e-05, + "loss": 0.7966, + "step": 46730 + }, + { + "epoch": 4.3, + "learning_rate": 2.8514296221384575e-05, + "loss": 0.7881, + "step": 46740 + }, + { + "epoch": 4.3, + "learning_rate": 2.8509699365633908e-05, + "loss": 1.0037, + "step": 46750 + }, + { + "epoch": 4.3, + "learning_rate": 2.850510250988324e-05, + "loss": 0.9418, + "step": 46760 + }, + { + "epoch": 4.3, + "learning_rate": 2.8500505654132574e-05, + "loss": 0.8342, + "step": 46770 + }, + { + "epoch": 4.3, + "learning_rate": 2.849590879838191e-05, + "loss": 0.9072, + "step": 46780 + }, + { + "epoch": 4.3, + "learning_rate": 2.8491311942631243e-05, + "loss": 0.931, + "step": 46790 + }, + { + "epoch": 4.3, + "learning_rate": 2.8486715086880577e-05, + "loss": 0.855, + "step": 46800 + }, + { + "epoch": 4.3, + "learning_rate": 2.8482118231129906e-05, + "loss": 0.9333, + "step": 46810 + }, + { + "epoch": 4.3, + "learning_rate": 2.847752137537924e-05, + "loss": 0.7762, + "step": 46820 + }, + { + "epoch": 4.31, + "learning_rate": 2.8472924519628576e-05, + "loss": 0.7546, + "step": 46830 + }, + { + "epoch": 4.31, + "learning_rate": 2.846832766387791e-05, + "loss": 0.9403, + "step": 46840 + }, + { + "epoch": 4.31, + "learning_rate": 2.8463730808127242e-05, + "loss": 0.972, + "step": 46850 + }, + { + "epoch": 4.31, + "learning_rate": 2.8459133952376575e-05, + "loss": 0.8973, + "step": 46860 + }, + { + "epoch": 4.31, + "learning_rate": 2.845453709662591e-05, + "loss": 0.8504, + "step": 46870 + }, + { + "epoch": 4.31, + "learning_rate": 2.8449940240875245e-05, + "loss": 0.8674, + "step": 46880 + }, + { + "epoch": 4.31, + "learning_rate": 2.8445343385124578e-05, + "loss": 0.9234, + "step": 46890 + }, + { + "epoch": 4.31, + "learning_rate": 2.8440746529373907e-05, + "loss": 0.8575, + "step": 46900 + }, + { + "epoch": 4.31, + "learning_rate": 2.843614967362324e-05, + "loss": 0.9547, + "step": 46910 + }, + { + "epoch": 4.31, + "learning_rate": 2.8431552817872577e-05, + "loss": 0.883, + "step": 46920 + }, + { + "epoch": 4.31, + "learning_rate": 2.842695596212191e-05, + "loss": 0.9156, + "step": 46930 + }, + { + "epoch": 4.32, + "learning_rate": 2.8422359106371243e-05, + "loss": 0.827, + "step": 46940 + }, + { + "epoch": 4.32, + "learning_rate": 2.8417762250620576e-05, + "loss": 0.8525, + "step": 46950 + }, + { + "epoch": 4.32, + "learning_rate": 2.8413165394869913e-05, + "loss": 0.7925, + "step": 46960 + }, + { + "epoch": 4.32, + "learning_rate": 2.8408568539119246e-05, + "loss": 0.8562, + "step": 46970 + }, + { + "epoch": 4.32, + "learning_rate": 2.840397168336858e-05, + "loss": 0.7826, + "step": 46980 + }, + { + "epoch": 4.32, + "learning_rate": 2.839937482761791e-05, + "loss": 0.7767, + "step": 46990 + }, + { + "epoch": 4.32, + "learning_rate": 2.8394777971867242e-05, + "loss": 0.9147, + "step": 47000 + }, + { + "epoch": 4.32, + "learning_rate": 2.8390181116116575e-05, + "loss": 0.8441, + "step": 47010 + }, + { + "epoch": 4.32, + "learning_rate": 2.838558426036591e-05, + "loss": 1.0553, + "step": 47020 + }, + { + "epoch": 4.32, + "learning_rate": 2.8380987404615244e-05, + "loss": 0.9, + "step": 47030 + }, + { + "epoch": 4.32, + "learning_rate": 2.8376390548864577e-05, + "loss": 0.7648, + "step": 47040 + }, + { + "epoch": 4.33, + "learning_rate": 2.8371793693113914e-05, + "loss": 0.9048, + "step": 47050 + }, + { + "epoch": 4.33, + "learning_rate": 2.8367196837363247e-05, + "loss": 0.8749, + "step": 47060 + }, + { + "epoch": 4.33, + "learning_rate": 2.836259998161258e-05, + "loss": 0.9421, + "step": 47070 + }, + { + "epoch": 4.33, + "learning_rate": 2.835800312586191e-05, + "loss": 0.7991, + "step": 47080 + }, + { + "epoch": 4.33, + "learning_rate": 2.8353406270111243e-05, + "loss": 0.8868, + "step": 47090 + }, + { + "epoch": 4.33, + "learning_rate": 2.8348809414360576e-05, + "loss": 0.9554, + "step": 47100 + }, + { + "epoch": 4.33, + "learning_rate": 2.8344212558609912e-05, + "loss": 0.8974, + "step": 47110 + }, + { + "epoch": 4.33, + "learning_rate": 2.8339615702859246e-05, + "loss": 0.8546, + "step": 47120 + }, + { + "epoch": 4.33, + "learning_rate": 2.833501884710858e-05, + "loss": 0.8603, + "step": 47130 + }, + { + "epoch": 4.33, + "learning_rate": 2.8330421991357915e-05, + "loss": 0.949, + "step": 47140 + }, + { + "epoch": 4.33, + "learning_rate": 2.8325825135607248e-05, + "loss": 1.0152, + "step": 47150 + }, + { + "epoch": 4.34, + "learning_rate": 2.832122827985658e-05, + "loss": 0.727, + "step": 47160 + }, + { + "epoch": 4.34, + "learning_rate": 2.831663142410591e-05, + "loss": 0.865, + "step": 47170 + }, + { + "epoch": 4.34, + "learning_rate": 2.8312034568355244e-05, + "loss": 0.9026, + "step": 47180 + }, + { + "epoch": 4.34, + "learning_rate": 2.8307437712604577e-05, + "loss": 0.8823, + "step": 47190 + }, + { + "epoch": 4.34, + "learning_rate": 2.8302840856853914e-05, + "loss": 0.9217, + "step": 47200 + }, + { + "epoch": 4.34, + "learning_rate": 2.8298244001103247e-05, + "loss": 0.8541, + "step": 47210 + }, + { + "epoch": 4.34, + "learning_rate": 2.829364714535258e-05, + "loss": 0.9426, + "step": 47220 + }, + { + "epoch": 4.34, + "learning_rate": 2.8289050289601916e-05, + "loss": 0.8733, + "step": 47230 + }, + { + "epoch": 4.34, + "learning_rate": 2.828445343385125e-05, + "loss": 0.8407, + "step": 47240 + }, + { + "epoch": 4.34, + "learning_rate": 2.8279856578100583e-05, + "loss": 0.8767, + "step": 47250 + }, + { + "epoch": 4.34, + "learning_rate": 2.8275259722349912e-05, + "loss": 0.8204, + "step": 47260 + }, + { + "epoch": 4.35, + "learning_rate": 2.8270662866599245e-05, + "loss": 0.8518, + "step": 47270 + }, + { + "epoch": 4.35, + "learning_rate": 2.826606601084858e-05, + "loss": 0.8966, + "step": 47280 + }, + { + "epoch": 4.35, + "learning_rate": 2.8261469155097915e-05, + "loss": 0.9042, + "step": 47290 + }, + { + "epoch": 4.35, + "learning_rate": 2.8256872299347248e-05, + "loss": 0.8162, + "step": 47300 + }, + { + "epoch": 4.35, + "learning_rate": 2.825227544359658e-05, + "loss": 0.9475, + "step": 47310 + }, + { + "epoch": 4.35, + "learning_rate": 2.8247678587845918e-05, + "loss": 0.8407, + "step": 47320 + }, + { + "epoch": 4.35, + "learning_rate": 2.824308173209525e-05, + "loss": 0.8438, + "step": 47330 + }, + { + "epoch": 4.35, + "learning_rate": 2.8238484876344584e-05, + "loss": 0.9688, + "step": 47340 + }, + { + "epoch": 4.35, + "learning_rate": 2.8233888020593913e-05, + "loss": 0.7882, + "step": 47350 + }, + { + "epoch": 4.35, + "learning_rate": 2.8229291164843246e-05, + "loss": 0.815, + "step": 47360 + }, + { + "epoch": 4.36, + "learning_rate": 2.822469430909258e-05, + "loss": 0.8119, + "step": 47370 + }, + { + "epoch": 4.36, + "learning_rate": 2.8220097453341916e-05, + "loss": 0.7793, + "step": 47380 + }, + { + "epoch": 4.36, + "learning_rate": 2.821550059759125e-05, + "loss": 0.7947, + "step": 47390 + }, + { + "epoch": 4.36, + "learning_rate": 2.8210903741840582e-05, + "loss": 0.8964, + "step": 47400 + }, + { + "epoch": 4.36, + "learning_rate": 2.820630688608992e-05, + "loss": 0.8775, + "step": 47410 + }, + { + "epoch": 4.36, + "learning_rate": 2.8201710030339252e-05, + "loss": 1.0027, + "step": 47420 + }, + { + "epoch": 4.36, + "learning_rate": 2.8197113174588585e-05, + "loss": 0.9307, + "step": 47430 + }, + { + "epoch": 4.36, + "learning_rate": 2.8192516318837915e-05, + "loss": 0.9554, + "step": 47440 + }, + { + "epoch": 4.36, + "learning_rate": 2.8187919463087248e-05, + "loss": 0.8541, + "step": 47450 + }, + { + "epoch": 4.36, + "learning_rate": 2.818332260733658e-05, + "loss": 0.9209, + "step": 47460 + }, + { + "epoch": 4.36, + "learning_rate": 2.8178725751585917e-05, + "loss": 0.747, + "step": 47470 + }, + { + "epoch": 4.37, + "learning_rate": 2.817412889583525e-05, + "loss": 0.926, + "step": 47480 + }, + { + "epoch": 4.37, + "learning_rate": 2.8169532040084583e-05, + "loss": 1.0352, + "step": 47490 + }, + { + "epoch": 4.37, + "learning_rate": 2.8164935184333917e-05, + "loss": 0.8286, + "step": 47500 + }, + { + "epoch": 4.37, + "learning_rate": 2.8160338328583253e-05, + "loss": 0.9808, + "step": 47510 + }, + { + "epoch": 4.37, + "learning_rate": 2.8155741472832586e-05, + "loss": 0.7877, + "step": 47520 + }, + { + "epoch": 4.37, + "learning_rate": 2.8151144617081916e-05, + "loss": 0.8536, + "step": 47530 + }, + { + "epoch": 4.37, + "learning_rate": 2.814654776133125e-05, + "loss": 0.8894, + "step": 47540 + }, + { + "epoch": 4.37, + "learning_rate": 2.8141950905580582e-05, + "loss": 0.8619, + "step": 47550 + }, + { + "epoch": 4.37, + "learning_rate": 2.813735404982992e-05, + "loss": 0.9029, + "step": 47560 + }, + { + "epoch": 4.37, + "learning_rate": 2.813275719407925e-05, + "loss": 0.9162, + "step": 47570 + }, + { + "epoch": 4.37, + "learning_rate": 2.8128160338328585e-05, + "loss": 0.7798, + "step": 47580 + }, + { + "epoch": 4.38, + "learning_rate": 2.8123563482577918e-05, + "loss": 0.8246, + "step": 47590 + }, + { + "epoch": 4.38, + "learning_rate": 2.8118966626827254e-05, + "loss": 0.8959, + "step": 47600 + }, + { + "epoch": 4.38, + "learning_rate": 2.8114369771076587e-05, + "loss": 0.8096, + "step": 47610 + }, + { + "epoch": 4.38, + "learning_rate": 2.8109772915325917e-05, + "loss": 0.8525, + "step": 47620 + }, + { + "epoch": 4.38, + "learning_rate": 2.810517605957525e-05, + "loss": 0.8278, + "step": 47630 + }, + { + "epoch": 4.38, + "learning_rate": 2.8100579203824583e-05, + "loss": 0.8675, + "step": 47640 + }, + { + "epoch": 4.38, + "learning_rate": 2.809598234807392e-05, + "loss": 0.8743, + "step": 47650 + }, + { + "epoch": 4.38, + "learning_rate": 2.8091385492323253e-05, + "loss": 0.9187, + "step": 47660 + }, + { + "epoch": 4.38, + "learning_rate": 2.8086788636572586e-05, + "loss": 0.8874, + "step": 47670 + }, + { + "epoch": 4.38, + "learning_rate": 2.808219178082192e-05, + "loss": 0.9647, + "step": 47680 + }, + { + "epoch": 4.38, + "learning_rate": 2.8077594925071255e-05, + "loss": 0.8283, + "step": 47690 + }, + { + "epoch": 4.39, + "learning_rate": 2.807299806932059e-05, + "loss": 0.8396, + "step": 47700 + }, + { + "epoch": 4.39, + "learning_rate": 2.8068401213569918e-05, + "loss": 1.0054, + "step": 47710 + }, + { + "epoch": 4.39, + "learning_rate": 2.806380435781925e-05, + "loss": 0.9078, + "step": 47720 + }, + { + "epoch": 4.39, + "learning_rate": 2.8059207502068584e-05, + "loss": 0.9348, + "step": 47730 + }, + { + "epoch": 4.39, + "learning_rate": 2.805461064631792e-05, + "loss": 0.9229, + "step": 47740 + }, + { + "epoch": 4.39, + "learning_rate": 2.8050013790567254e-05, + "loss": 0.9466, + "step": 47750 + }, + { + "epoch": 4.39, + "learning_rate": 2.8045416934816587e-05, + "loss": 0.8142, + "step": 47760 + }, + { + "epoch": 4.39, + "learning_rate": 2.804082007906592e-05, + "loss": 0.7719, + "step": 47770 + }, + { + "epoch": 4.39, + "learning_rate": 2.8036223223315257e-05, + "loss": 0.9456, + "step": 47780 + }, + { + "epoch": 4.39, + "learning_rate": 2.803162636756459e-05, + "loss": 0.8545, + "step": 47790 + }, + { + "epoch": 4.39, + "learning_rate": 2.802702951181392e-05, + "loss": 0.8728, + "step": 47800 + }, + { + "epoch": 4.4, + "learning_rate": 2.8022432656063252e-05, + "loss": 0.8595, + "step": 47810 + }, + { + "epoch": 4.4, + "learning_rate": 2.8017835800312586e-05, + "loss": 0.9465, + "step": 47820 + }, + { + "epoch": 4.4, + "learning_rate": 2.801323894456192e-05, + "loss": 0.894, + "step": 47830 + }, + { + "epoch": 4.4, + "learning_rate": 2.8008642088811255e-05, + "loss": 0.7734, + "step": 47840 + }, + { + "epoch": 4.4, + "learning_rate": 2.8004045233060588e-05, + "loss": 1.0048, + "step": 47850 + }, + { + "epoch": 4.4, + "learning_rate": 2.799944837730992e-05, + "loss": 0.8117, + "step": 47860 + }, + { + "epoch": 4.4, + "learning_rate": 2.7994851521559258e-05, + "loss": 0.9319, + "step": 47870 + }, + { + "epoch": 4.4, + "learning_rate": 2.799025466580859e-05, + "loss": 0.9058, + "step": 47880 + }, + { + "epoch": 4.4, + "learning_rate": 2.798565781005792e-05, + "loss": 0.7815, + "step": 47890 + }, + { + "epoch": 4.4, + "learning_rate": 2.7981060954307254e-05, + "loss": 0.7765, + "step": 47900 + }, + { + "epoch": 4.4, + "learning_rate": 2.7976464098556587e-05, + "loss": 0.8025, + "step": 47910 + }, + { + "epoch": 4.41, + "learning_rate": 2.797186724280592e-05, + "loss": 1.0571, + "step": 47920 + }, + { + "epoch": 4.41, + "learning_rate": 2.7967270387055256e-05, + "loss": 0.8848, + "step": 47930 + }, + { + "epoch": 4.41, + "learning_rate": 2.796267353130459e-05, + "loss": 0.7901, + "step": 47940 + }, + { + "epoch": 4.41, + "learning_rate": 2.7958076675553923e-05, + "loss": 0.912, + "step": 47950 + }, + { + "epoch": 4.41, + "learning_rate": 2.795347981980326e-05, + "loss": 0.9207, + "step": 47960 + }, + { + "epoch": 4.41, + "learning_rate": 2.7948882964052592e-05, + "loss": 0.8375, + "step": 47970 + }, + { + "epoch": 4.41, + "learning_rate": 2.7944286108301922e-05, + "loss": 0.9963, + "step": 47980 + }, + { + "epoch": 4.41, + "learning_rate": 2.7939689252551255e-05, + "loss": 0.7746, + "step": 47990 + }, + { + "epoch": 4.41, + "learning_rate": 2.7935092396800588e-05, + "loss": 0.8483, + "step": 48000 + }, + { + "epoch": 4.41, + "learning_rate": 2.793049554104992e-05, + "loss": 0.7638, + "step": 48010 + }, + { + "epoch": 4.41, + "learning_rate": 2.7925898685299258e-05, + "loss": 0.8339, + "step": 48020 + }, + { + "epoch": 4.42, + "learning_rate": 2.792130182954859e-05, + "loss": 0.8411, + "step": 48030 + }, + { + "epoch": 4.42, + "learning_rate": 2.7916704973797924e-05, + "loss": 0.7975, + "step": 48040 + }, + { + "epoch": 4.42, + "learning_rate": 2.791210811804726e-05, + "loss": 0.8786, + "step": 48050 + }, + { + "epoch": 4.42, + "learning_rate": 2.7907511262296593e-05, + "loss": 0.9097, + "step": 48060 + }, + { + "epoch": 4.42, + "learning_rate": 2.7902914406545923e-05, + "loss": 0.7992, + "step": 48070 + }, + { + "epoch": 4.42, + "learning_rate": 2.7898317550795256e-05, + "loss": 0.8072, + "step": 48080 + }, + { + "epoch": 4.42, + "learning_rate": 2.789372069504459e-05, + "loss": 0.8874, + "step": 48090 + }, + { + "epoch": 4.42, + "learning_rate": 2.7889123839293922e-05, + "loss": 0.8907, + "step": 48100 + }, + { + "epoch": 4.42, + "learning_rate": 2.788452698354326e-05, + "loss": 0.8031, + "step": 48110 + }, + { + "epoch": 4.42, + "learning_rate": 2.7879930127792592e-05, + "loss": 0.9776, + "step": 48120 + }, + { + "epoch": 4.42, + "learning_rate": 2.7875333272041925e-05, + "loss": 0.8674, + "step": 48130 + }, + { + "epoch": 4.43, + "learning_rate": 2.787073641629126e-05, + "loss": 0.9637, + "step": 48140 + }, + { + "epoch": 4.43, + "learning_rate": 2.7866139560540594e-05, + "loss": 0.8328, + "step": 48150 + }, + { + "epoch": 4.43, + "learning_rate": 2.786154270478992e-05, + "loss": 0.9469, + "step": 48160 + }, + { + "epoch": 4.43, + "learning_rate": 2.7856945849039257e-05, + "loss": 0.8071, + "step": 48170 + }, + { + "epoch": 4.43, + "learning_rate": 2.785234899328859e-05, + "loss": 0.8577, + "step": 48180 + }, + { + "epoch": 4.43, + "learning_rate": 2.7847752137537923e-05, + "loss": 0.8496, + "step": 48190 + }, + { + "epoch": 4.43, + "learning_rate": 2.784315528178726e-05, + "loss": 0.95, + "step": 48200 + }, + { + "epoch": 4.43, + "learning_rate": 2.7838558426036593e-05, + "loss": 0.9018, + "step": 48210 + }, + { + "epoch": 4.43, + "learning_rate": 2.7833961570285926e-05, + "loss": 0.9186, + "step": 48220 + }, + { + "epoch": 4.43, + "learning_rate": 2.7829364714535263e-05, + "loss": 0.9743, + "step": 48230 + }, + { + "epoch": 4.44, + "learning_rate": 2.7824767858784596e-05, + "loss": 0.9739, + "step": 48240 + }, + { + "epoch": 4.44, + "learning_rate": 2.7820171003033922e-05, + "loss": 0.8641, + "step": 48250 + }, + { + "epoch": 4.44, + "learning_rate": 2.781557414728326e-05, + "loss": 0.8546, + "step": 48260 + }, + { + "epoch": 4.44, + "learning_rate": 2.781097729153259e-05, + "loss": 0.8458, + "step": 48270 + }, + { + "epoch": 4.44, + "learning_rate": 2.7806380435781925e-05, + "loss": 0.8754, + "step": 48280 + }, + { + "epoch": 4.44, + "learning_rate": 2.780178358003126e-05, + "loss": 0.7402, + "step": 48290 + }, + { + "epoch": 4.44, + "learning_rate": 2.7797186724280594e-05, + "loss": 0.9776, + "step": 48300 + }, + { + "epoch": 4.44, + "learning_rate": 2.7792589868529927e-05, + "loss": 0.9203, + "step": 48310 + }, + { + "epoch": 4.44, + "learning_rate": 2.778799301277926e-05, + "loss": 0.7745, + "step": 48320 + }, + { + "epoch": 4.44, + "learning_rate": 2.7783396157028597e-05, + "loss": 0.925, + "step": 48330 + }, + { + "epoch": 4.44, + "learning_rate": 2.7778799301277923e-05, + "loss": 0.7831, + "step": 48340 + }, + { + "epoch": 4.45, + "learning_rate": 2.777420244552726e-05, + "loss": 0.9424, + "step": 48350 + }, + { + "epoch": 4.45, + "learning_rate": 2.7769605589776593e-05, + "loss": 0.7792, + "step": 48360 + }, + { + "epoch": 4.45, + "learning_rate": 2.7765008734025926e-05, + "loss": 0.9017, + "step": 48370 + }, + { + "epoch": 4.45, + "learning_rate": 2.7760411878275262e-05, + "loss": 0.937, + "step": 48380 + }, + { + "epoch": 4.45, + "learning_rate": 2.7755815022524595e-05, + "loss": 0.9153, + "step": 48390 + }, + { + "epoch": 4.45, + "learning_rate": 2.775121816677393e-05, + "loss": 1.1007, + "step": 48400 + }, + { + "epoch": 4.45, + "learning_rate": 2.774662131102326e-05, + "loss": 0.7897, + "step": 48410 + }, + { + "epoch": 4.45, + "learning_rate": 2.7742024455272598e-05, + "loss": 0.8301, + "step": 48420 + }, + { + "epoch": 4.45, + "learning_rate": 2.7737427599521924e-05, + "loss": 0.85, + "step": 48430 + }, + { + "epoch": 4.45, + "learning_rate": 2.773283074377126e-05, + "loss": 0.873, + "step": 48440 + }, + { + "epoch": 4.45, + "learning_rate": 2.7728233888020594e-05, + "loss": 0.7952, + "step": 48450 + }, + { + "epoch": 4.46, + "learning_rate": 2.7723637032269927e-05, + "loss": 0.7536, + "step": 48460 + }, + { + "epoch": 4.46, + "learning_rate": 2.7719040176519264e-05, + "loss": 0.8795, + "step": 48470 + }, + { + "epoch": 4.46, + "learning_rate": 2.7714443320768597e-05, + "loss": 0.8286, + "step": 48480 + }, + { + "epoch": 4.46, + "learning_rate": 2.770984646501793e-05, + "loss": 0.9061, + "step": 48490 + }, + { + "epoch": 4.46, + "learning_rate": 2.7705249609267263e-05, + "loss": 0.7486, + "step": 48500 + }, + { + "epoch": 4.46, + "learning_rate": 2.77006527535166e-05, + "loss": 0.9408, + "step": 48510 + }, + { + "epoch": 4.46, + "learning_rate": 2.7696055897765926e-05, + "loss": 0.713, + "step": 48520 + }, + { + "epoch": 4.46, + "learning_rate": 2.7691459042015262e-05, + "loss": 0.8713, + "step": 48530 + }, + { + "epoch": 4.46, + "learning_rate": 2.7686862186264595e-05, + "loss": 0.8397, + "step": 48540 + }, + { + "epoch": 4.46, + "learning_rate": 2.7682265330513928e-05, + "loss": 0.9915, + "step": 48550 + }, + { + "epoch": 4.46, + "learning_rate": 2.7677668474763265e-05, + "loss": 0.8052, + "step": 48560 + }, + { + "epoch": 4.47, + "learning_rate": 2.7673071619012598e-05, + "loss": 0.9319, + "step": 48570 + }, + { + "epoch": 4.47, + "learning_rate": 2.766847476326193e-05, + "loss": 0.8942, + "step": 48580 + }, + { + "epoch": 4.47, + "learning_rate": 2.7663877907511264e-05, + "loss": 0.8921, + "step": 48590 + }, + { + "epoch": 4.47, + "learning_rate": 2.76592810517606e-05, + "loss": 0.7868, + "step": 48600 + }, + { + "epoch": 4.47, + "learning_rate": 2.7654684196009927e-05, + "loss": 0.9063, + "step": 48610 + }, + { + "epoch": 4.47, + "learning_rate": 2.7650087340259263e-05, + "loss": 0.8388, + "step": 48620 + }, + { + "epoch": 4.47, + "learning_rate": 2.7645490484508596e-05, + "loss": 0.8887, + "step": 48630 + }, + { + "epoch": 4.47, + "learning_rate": 2.764089362875793e-05, + "loss": 0.7966, + "step": 48640 + }, + { + "epoch": 4.47, + "learning_rate": 2.7636296773007263e-05, + "loss": 0.7931, + "step": 48650 + }, + { + "epoch": 4.47, + "learning_rate": 2.76316999172566e-05, + "loss": 0.9414, + "step": 48660 + }, + { + "epoch": 4.47, + "learning_rate": 2.7627103061505932e-05, + "loss": 0.9431, + "step": 48670 + }, + { + "epoch": 4.48, + "learning_rate": 2.7622506205755265e-05, + "loss": 0.9686, + "step": 48680 + }, + { + "epoch": 4.48, + "learning_rate": 2.76179093500046e-05, + "loss": 0.8394, + "step": 48690 + }, + { + "epoch": 4.48, + "learning_rate": 2.7613312494253928e-05, + "loss": 0.7169, + "step": 48700 + }, + { + "epoch": 4.48, + "learning_rate": 2.7608715638503264e-05, + "loss": 0.8862, + "step": 48710 + }, + { + "epoch": 4.48, + "learning_rate": 2.7604118782752598e-05, + "loss": 0.8381, + "step": 48720 + }, + { + "epoch": 4.48, + "learning_rate": 2.759952192700193e-05, + "loss": 0.8931, + "step": 48730 + }, + { + "epoch": 4.48, + "learning_rate": 2.7594925071251264e-05, + "loss": 0.8251, + "step": 48740 + }, + { + "epoch": 4.48, + "learning_rate": 2.75903282155006e-05, + "loss": 0.9216, + "step": 48750 + }, + { + "epoch": 4.48, + "learning_rate": 2.7585731359749933e-05, + "loss": 0.9096, + "step": 48760 + }, + { + "epoch": 4.48, + "learning_rate": 2.7581134503999266e-05, + "loss": 1.0064, + "step": 48770 + }, + { + "epoch": 4.48, + "learning_rate": 2.7576537648248603e-05, + "loss": 0.8973, + "step": 48780 + }, + { + "epoch": 4.49, + "learning_rate": 2.757194079249793e-05, + "loss": 0.9279, + "step": 48790 + }, + { + "epoch": 4.49, + "learning_rate": 2.7567343936747266e-05, + "loss": 0.9575, + "step": 48800 + }, + { + "epoch": 4.49, + "learning_rate": 2.75627470809966e-05, + "loss": 0.8549, + "step": 48810 + }, + { + "epoch": 4.49, + "learning_rate": 2.7558150225245932e-05, + "loss": 0.9699, + "step": 48820 + }, + { + "epoch": 4.49, + "learning_rate": 2.7553553369495265e-05, + "loss": 0.86, + "step": 48830 + }, + { + "epoch": 4.49, + "learning_rate": 2.75489565137446e-05, + "loss": 0.9589, + "step": 48840 + }, + { + "epoch": 4.49, + "learning_rate": 2.7544359657993934e-05, + "loss": 0.8465, + "step": 48850 + }, + { + "epoch": 4.49, + "learning_rate": 2.7539762802243268e-05, + "loss": 0.9466, + "step": 48860 + }, + { + "epoch": 4.49, + "learning_rate": 2.7535165946492604e-05, + "loss": 0.8894, + "step": 48870 + }, + { + "epoch": 4.49, + "learning_rate": 2.753056909074193e-05, + "loss": 0.9688, + "step": 48880 + }, + { + "epoch": 4.49, + "learning_rate": 2.7525972234991267e-05, + "loss": 0.92, + "step": 48890 + }, + { + "epoch": 4.5, + "learning_rate": 2.75213753792406e-05, + "loss": 0.8699, + "step": 48900 + }, + { + "epoch": 4.5, + "learning_rate": 2.7516778523489933e-05, + "loss": 0.8281, + "step": 48910 + }, + { + "epoch": 4.5, + "learning_rate": 2.7512181667739266e-05, + "loss": 0.9057, + "step": 48920 + }, + { + "epoch": 4.5, + "learning_rate": 2.7507584811988603e-05, + "loss": 0.8682, + "step": 48930 + }, + { + "epoch": 4.5, + "learning_rate": 2.7502987956237936e-05, + "loss": 0.9167, + "step": 48940 + }, + { + "epoch": 4.5, + "learning_rate": 2.749839110048727e-05, + "loss": 0.7963, + "step": 48950 + }, + { + "epoch": 4.5, + "learning_rate": 2.7493794244736605e-05, + "loss": 0.7963, + "step": 48960 + }, + { + "epoch": 4.5, + "learning_rate": 2.748919738898593e-05, + "loss": 0.9014, + "step": 48970 + }, + { + "epoch": 4.5, + "learning_rate": 2.7484600533235265e-05, + "loss": 0.7991, + "step": 48980 + }, + { + "epoch": 4.5, + "learning_rate": 2.74800036774846e-05, + "loss": 0.8788, + "step": 48990 + }, + { + "epoch": 4.5, + "learning_rate": 2.7475406821733934e-05, + "loss": 0.9506, + "step": 49000 + }, + { + "epoch": 4.51, + "learning_rate": 2.7470809965983267e-05, + "loss": 0.8512, + "step": 49010 + }, + { + "epoch": 4.51, + "learning_rate": 2.7466213110232604e-05, + "loss": 0.8877, + "step": 49020 + }, + { + "epoch": 4.51, + "learning_rate": 2.7461616254481937e-05, + "loss": 0.8801, + "step": 49030 + }, + { + "epoch": 4.51, + "learning_rate": 2.745701939873127e-05, + "loss": 0.9427, + "step": 49040 + }, + { + "epoch": 4.51, + "learning_rate": 2.7452422542980606e-05, + "loss": 0.8973, + "step": 49050 + }, + { + "epoch": 4.51, + "learning_rate": 2.7447825687229933e-05, + "loss": 0.9723, + "step": 49060 + }, + { + "epoch": 4.51, + "learning_rate": 2.7443228831479266e-05, + "loss": 0.9029, + "step": 49070 + }, + { + "epoch": 4.51, + "learning_rate": 2.7438631975728602e-05, + "loss": 0.8191, + "step": 49080 + }, + { + "epoch": 4.51, + "learning_rate": 2.7434035119977935e-05, + "loss": 0.7408, + "step": 49090 + }, + { + "epoch": 4.51, + "learning_rate": 2.742943826422727e-05, + "loss": 0.8917, + "step": 49100 + }, + { + "epoch": 4.52, + "learning_rate": 2.7424841408476605e-05, + "loss": 0.8561, + "step": 49110 + }, + { + "epoch": 4.52, + "learning_rate": 2.7420244552725938e-05, + "loss": 0.8473, + "step": 49120 + }, + { + "epoch": 4.52, + "learning_rate": 2.741564769697527e-05, + "loss": 0.8674, + "step": 49130 + }, + { + "epoch": 4.52, + "learning_rate": 2.7411050841224604e-05, + "loss": 1.0502, + "step": 49140 + }, + { + "epoch": 4.52, + "learning_rate": 2.7406453985473934e-05, + "loss": 0.8272, + "step": 49150 + }, + { + "epoch": 4.52, + "learning_rate": 2.7401857129723267e-05, + "loss": 0.83, + "step": 49160 + }, + { + "epoch": 4.52, + "learning_rate": 2.7397260273972603e-05, + "loss": 0.7981, + "step": 49170 + }, + { + "epoch": 4.52, + "learning_rate": 2.7392663418221937e-05, + "loss": 0.9214, + "step": 49180 + }, + { + "epoch": 4.52, + "learning_rate": 2.738806656247127e-05, + "loss": 0.8317, + "step": 49190 + }, + { + "epoch": 4.52, + "learning_rate": 2.7383469706720606e-05, + "loss": 0.902, + "step": 49200 + }, + { + "epoch": 4.52, + "learning_rate": 2.737887285096994e-05, + "loss": 0.7654, + "step": 49210 + }, + { + "epoch": 4.53, + "learning_rate": 2.7374275995219272e-05, + "loss": 0.9585, + "step": 49220 + }, + { + "epoch": 4.53, + "learning_rate": 2.7369679139468605e-05, + "loss": 0.9021, + "step": 49230 + }, + { + "epoch": 4.53, + "learning_rate": 2.7365082283717935e-05, + "loss": 0.9739, + "step": 49240 + }, + { + "epoch": 4.53, + "learning_rate": 2.7360485427967268e-05, + "loss": 0.6743, + "step": 49250 + }, + { + "epoch": 4.53, + "learning_rate": 2.7355888572216605e-05, + "loss": 0.7694, + "step": 49260 + }, + { + "epoch": 4.53, + "learning_rate": 2.7351291716465938e-05, + "loss": 0.7134, + "step": 49270 + }, + { + "epoch": 4.53, + "learning_rate": 2.734669486071527e-05, + "loss": 0.8815, + "step": 49280 + }, + { + "epoch": 4.53, + "learning_rate": 2.7342098004964607e-05, + "loss": 0.8595, + "step": 49290 + }, + { + "epoch": 4.53, + "learning_rate": 2.733750114921394e-05, + "loss": 1.0906, + "step": 49300 + }, + { + "epoch": 4.53, + "learning_rate": 2.7332904293463274e-05, + "loss": 0.9937, + "step": 49310 + }, + { + "epoch": 4.53, + "learning_rate": 2.7328307437712607e-05, + "loss": 0.8216, + "step": 49320 + }, + { + "epoch": 4.54, + "learning_rate": 2.7323710581961943e-05, + "loss": 0.9512, + "step": 49330 + }, + { + "epoch": 4.54, + "learning_rate": 2.731911372621127e-05, + "loss": 0.8539, + "step": 49340 + }, + { + "epoch": 4.54, + "learning_rate": 2.7314516870460606e-05, + "loss": 0.9839, + "step": 49350 + }, + { + "epoch": 4.54, + "learning_rate": 2.730992001470994e-05, + "loss": 0.8374, + "step": 49360 + }, + { + "epoch": 4.54, + "learning_rate": 2.7305323158959272e-05, + "loss": 0.8728, + "step": 49370 + }, + { + "epoch": 4.54, + "learning_rate": 2.730072630320861e-05, + "loss": 0.8229, + "step": 49380 + }, + { + "epoch": 4.54, + "learning_rate": 2.729612944745794e-05, + "loss": 0.9108, + "step": 49390 + }, + { + "epoch": 4.54, + "learning_rate": 2.7291532591707275e-05, + "loss": 0.8027, + "step": 49400 + }, + { + "epoch": 4.54, + "learning_rate": 2.7286935735956608e-05, + "loss": 0.944, + "step": 49410 + }, + { + "epoch": 4.54, + "learning_rate": 2.7282338880205944e-05, + "loss": 0.836, + "step": 49420 + }, + { + "epoch": 4.54, + "learning_rate": 2.727774202445527e-05, + "loss": 0.7581, + "step": 49430 + }, + { + "epoch": 4.55, + "learning_rate": 2.7273145168704607e-05, + "loss": 0.9314, + "step": 49440 + }, + { + "epoch": 4.55, + "learning_rate": 2.726854831295394e-05, + "loss": 0.8202, + "step": 49450 + }, + { + "epoch": 4.55, + "learning_rate": 2.7263951457203273e-05, + "loss": 0.8887, + "step": 49460 + }, + { + "epoch": 4.55, + "learning_rate": 2.7259354601452606e-05, + "loss": 0.8689, + "step": 49470 + }, + { + "epoch": 4.55, + "learning_rate": 2.7254757745701943e-05, + "loss": 0.7099, + "step": 49480 + }, + { + "epoch": 4.55, + "learning_rate": 2.7250160889951276e-05, + "loss": 0.8208, + "step": 49490 + }, + { + "epoch": 4.55, + "learning_rate": 2.724556403420061e-05, + "loss": 0.9629, + "step": 49500 + }, + { + "epoch": 4.55, + "learning_rate": 2.7240967178449945e-05, + "loss": 0.8895, + "step": 49510 + }, + { + "epoch": 4.55, + "learning_rate": 2.7236370322699272e-05, + "loss": 0.8261, + "step": 49520 + }, + { + "epoch": 4.55, + "learning_rate": 2.7231773466948608e-05, + "loss": 0.8029, + "step": 49530 + }, + { + "epoch": 4.55, + "learning_rate": 2.722717661119794e-05, + "loss": 0.937, + "step": 49540 + }, + { + "epoch": 4.56, + "learning_rate": 2.7222579755447274e-05, + "loss": 0.835, + "step": 49550 + }, + { + "epoch": 4.56, + "learning_rate": 2.7217982899696608e-05, + "loss": 0.9548, + "step": 49560 + }, + { + "epoch": 4.56, + "learning_rate": 2.7213386043945944e-05, + "loss": 0.8881, + "step": 49570 + }, + { + "epoch": 4.56, + "learning_rate": 2.7208789188195277e-05, + "loss": 0.8977, + "step": 49580 + }, + { + "epoch": 4.56, + "learning_rate": 2.720419233244461e-05, + "loss": 0.922, + "step": 49590 + }, + { + "epoch": 4.56, + "learning_rate": 2.7199595476693947e-05, + "loss": 0.8171, + "step": 49600 + }, + { + "epoch": 4.56, + "learning_rate": 2.7194998620943273e-05, + "loss": 1.0238, + "step": 49610 + }, + { + "epoch": 4.56, + "learning_rate": 2.719040176519261e-05, + "loss": 0.9327, + "step": 49620 + }, + { + "epoch": 4.56, + "learning_rate": 2.7185804909441943e-05, + "loss": 0.831, + "step": 49630 + }, + { + "epoch": 4.56, + "learning_rate": 2.7181208053691276e-05, + "loss": 0.8423, + "step": 49640 + }, + { + "epoch": 4.56, + "learning_rate": 2.717661119794061e-05, + "loss": 0.7356, + "step": 49650 + }, + { + "epoch": 4.57, + "learning_rate": 2.7172014342189945e-05, + "loss": 0.9189, + "step": 49660 + }, + { + "epoch": 4.57, + "learning_rate": 2.716741748643928e-05, + "loss": 0.8041, + "step": 49670 + }, + { + "epoch": 4.57, + "learning_rate": 2.716282063068861e-05, + "loss": 0.9195, + "step": 49680 + }, + { + "epoch": 4.57, + "learning_rate": 2.7158223774937948e-05, + "loss": 0.8655, + "step": 49690 + }, + { + "epoch": 4.57, + "learning_rate": 2.7153626919187274e-05, + "loss": 0.8352, + "step": 49700 + }, + { + "epoch": 4.57, + "learning_rate": 2.714903006343661e-05, + "loss": 1.0295, + "step": 49710 + }, + { + "epoch": 4.57, + "learning_rate": 2.7144433207685944e-05, + "loss": 0.9077, + "step": 49720 + }, + { + "epoch": 4.57, + "learning_rate": 2.7139836351935277e-05, + "loss": 0.7652, + "step": 49730 + }, + { + "epoch": 4.57, + "learning_rate": 2.713523949618461e-05, + "loss": 0.8936, + "step": 49740 + }, + { + "epoch": 4.57, + "learning_rate": 2.7130642640433946e-05, + "loss": 0.9459, + "step": 49750 + }, + { + "epoch": 4.57, + "learning_rate": 2.712604578468328e-05, + "loss": 0.7694, + "step": 49760 + }, + { + "epoch": 4.58, + "learning_rate": 2.7121448928932613e-05, + "loss": 0.7964, + "step": 49770 + }, + { + "epoch": 4.58, + "learning_rate": 2.711685207318195e-05, + "loss": 0.733, + "step": 49780 + }, + { + "epoch": 4.58, + "learning_rate": 2.7112255217431275e-05, + "loss": 0.8699, + "step": 49790 + }, + { + "epoch": 4.58, + "learning_rate": 2.710765836168061e-05, + "loss": 0.8864, + "step": 49800 + }, + { + "epoch": 4.58, + "learning_rate": 2.7103061505929945e-05, + "loss": 0.9037, + "step": 49810 + }, + { + "epoch": 4.58, + "learning_rate": 2.7098464650179278e-05, + "loss": 0.9634, + "step": 49820 + }, + { + "epoch": 4.58, + "learning_rate": 2.709386779442861e-05, + "loss": 0.8603, + "step": 49830 + }, + { + "epoch": 4.58, + "learning_rate": 2.7089270938677948e-05, + "loss": 0.9086, + "step": 49840 + }, + { + "epoch": 4.58, + "learning_rate": 2.708467408292728e-05, + "loss": 0.7415, + "step": 49850 + }, + { + "epoch": 4.58, + "learning_rate": 2.7080077227176614e-05, + "loss": 0.8008, + "step": 49860 + }, + { + "epoch": 4.58, + "learning_rate": 2.707548037142595e-05, + "loss": 0.864, + "step": 49870 + }, + { + "epoch": 4.59, + "learning_rate": 2.7070883515675277e-05, + "loss": 0.8207, + "step": 49880 + }, + { + "epoch": 4.59, + "learning_rate": 2.706628665992461e-05, + "loss": 0.8281, + "step": 49890 + }, + { + "epoch": 4.59, + "learning_rate": 2.7061689804173946e-05, + "loss": 0.9277, + "step": 49900 + }, + { + "epoch": 4.59, + "learning_rate": 2.705709294842328e-05, + "loss": 0.7534, + "step": 49910 + }, + { + "epoch": 4.59, + "learning_rate": 2.7052496092672612e-05, + "loss": 0.9227, + "step": 49920 + }, + { + "epoch": 4.59, + "learning_rate": 2.704789923692195e-05, + "loss": 0.9049, + "step": 49930 + }, + { + "epoch": 4.59, + "learning_rate": 2.7043302381171282e-05, + "loss": 0.8882, + "step": 49940 + }, + { + "epoch": 4.59, + "learning_rate": 2.7038705525420615e-05, + "loss": 0.8415, + "step": 49950 + }, + { + "epoch": 4.59, + "learning_rate": 2.7034108669669948e-05, + "loss": 0.729, + "step": 49960 + }, + { + "epoch": 4.59, + "learning_rate": 2.7029511813919278e-05, + "loss": 0.9083, + "step": 49970 + }, + { + "epoch": 4.6, + "learning_rate": 2.702491495816861e-05, + "loss": 0.7266, + "step": 49980 + }, + { + "epoch": 4.6, + "learning_rate": 2.7020318102417947e-05, + "loss": 1.0837, + "step": 49990 + }, + { + "epoch": 4.6, + "learning_rate": 2.701572124666728e-05, + "loss": 0.83, + "step": 50000 + }, + { + "epoch": 4.6, + "eval_accuracy": 0.5803493449781659, + "eval_loss": 0.8779388666152954, + "eval_runtime": 159.6304, + "eval_samples_per_second": 28.691, + "eval_steps_per_second": 3.59, + "step": 50000 + }, + { + "epoch": 4.6, + "learning_rate": 2.7011124390916614e-05, + "loss": 0.8445, + "step": 50010 + }, + { + "epoch": 4.6, + "learning_rate": 2.700652753516595e-05, + "loss": 0.6649, + "step": 50020 + }, + { + "epoch": 4.6, + "learning_rate": 2.7001930679415283e-05, + "loss": 0.8419, + "step": 50030 + }, + { + "epoch": 4.6, + "learning_rate": 2.6997333823664616e-05, + "loss": 0.8287, + "step": 50040 + }, + { + "epoch": 4.6, + "learning_rate": 2.699273696791395e-05, + "loss": 0.8785, + "step": 50050 + }, + { + "epoch": 4.6, + "learning_rate": 2.698814011216328e-05, + "loss": 0.855, + "step": 50060 + }, + { + "epoch": 4.6, + "learning_rate": 2.6983543256412612e-05, + "loss": 0.9627, + "step": 50070 + }, + { + "epoch": 4.6, + "learning_rate": 2.697894640066195e-05, + "loss": 0.7754, + "step": 50080 + }, + { + "epoch": 4.61, + "learning_rate": 2.697434954491128e-05, + "loss": 0.8684, + "step": 50090 + }, + { + "epoch": 4.61, + "learning_rate": 2.6969752689160615e-05, + "loss": 0.9453, + "step": 50100 + }, + { + "epoch": 4.61, + "learning_rate": 2.696515583340995e-05, + "loss": 0.9281, + "step": 50110 + }, + { + "epoch": 4.61, + "learning_rate": 2.6960558977659284e-05, + "loss": 0.8317, + "step": 50120 + }, + { + "epoch": 4.61, + "learning_rate": 2.6955962121908617e-05, + "loss": 0.8989, + "step": 50130 + }, + { + "epoch": 4.61, + "learning_rate": 2.695136526615795e-05, + "loss": 0.9424, + "step": 50140 + }, + { + "epoch": 4.61, + "learning_rate": 2.694676841040728e-05, + "loss": 0.915, + "step": 50150 + }, + { + "epoch": 4.61, + "learning_rate": 2.6942171554656613e-05, + "loss": 0.9707, + "step": 50160 + }, + { + "epoch": 4.61, + "learning_rate": 2.693757469890595e-05, + "loss": 0.9163, + "step": 50170 + }, + { + "epoch": 4.61, + "learning_rate": 2.6932977843155283e-05, + "loss": 0.8871, + "step": 50180 + }, + { + "epoch": 4.61, + "learning_rate": 2.6928380987404616e-05, + "loss": 0.8082, + "step": 50190 + }, + { + "epoch": 4.62, + "learning_rate": 2.6923784131653952e-05, + "loss": 0.9226, + "step": 50200 + }, + { + "epoch": 4.62, + "learning_rate": 2.6919187275903285e-05, + "loss": 0.8025, + "step": 50210 + }, + { + "epoch": 4.62, + "learning_rate": 2.691459042015262e-05, + "loss": 0.9529, + "step": 50220 + }, + { + "epoch": 4.62, + "learning_rate": 2.690999356440195e-05, + "loss": 0.9286, + "step": 50230 + }, + { + "epoch": 4.62, + "learning_rate": 2.690539670865128e-05, + "loss": 0.9266, + "step": 50240 + }, + { + "epoch": 4.62, + "learning_rate": 2.6900799852900614e-05, + "loss": 0.9465, + "step": 50250 + }, + { + "epoch": 4.62, + "learning_rate": 2.689620299714995e-05, + "loss": 1.0059, + "step": 50260 + }, + { + "epoch": 4.62, + "learning_rate": 2.6891606141399284e-05, + "loss": 0.8727, + "step": 50270 + }, + { + "epoch": 4.62, + "learning_rate": 2.6887009285648617e-05, + "loss": 0.8341, + "step": 50280 + }, + { + "epoch": 4.62, + "learning_rate": 2.688241242989795e-05, + "loss": 0.8892, + "step": 50290 + }, + { + "epoch": 4.62, + "learning_rate": 2.6877815574147287e-05, + "loss": 0.8551, + "step": 50300 + }, + { + "epoch": 4.63, + "learning_rate": 2.687321871839662e-05, + "loss": 0.9622, + "step": 50310 + }, + { + "epoch": 4.63, + "learning_rate": 2.6868621862645953e-05, + "loss": 0.952, + "step": 50320 + }, + { + "epoch": 4.63, + "learning_rate": 2.6864025006895283e-05, + "loss": 0.9877, + "step": 50330 + }, + { + "epoch": 4.63, + "learning_rate": 2.6859428151144616e-05, + "loss": 0.9173, + "step": 50340 + }, + { + "epoch": 4.63, + "learning_rate": 2.6854831295393952e-05, + "loss": 0.7489, + "step": 50350 + }, + { + "epoch": 4.63, + "learning_rate": 2.6850234439643285e-05, + "loss": 0.9353, + "step": 50360 + }, + { + "epoch": 4.63, + "learning_rate": 2.6845637583892618e-05, + "loss": 0.9447, + "step": 50370 + }, + { + "epoch": 4.63, + "learning_rate": 2.684104072814195e-05, + "loss": 0.7913, + "step": 50380 + }, + { + "epoch": 4.63, + "learning_rate": 2.6836443872391288e-05, + "loss": 0.8951, + "step": 50390 + }, + { + "epoch": 4.63, + "learning_rate": 2.683184701664062e-05, + "loss": 0.9132, + "step": 50400 + }, + { + "epoch": 4.63, + "learning_rate": 2.6827250160889954e-05, + "loss": 0.8863, + "step": 50410 + }, + { + "epoch": 4.64, + "learning_rate": 2.6822653305139284e-05, + "loss": 0.836, + "step": 50420 + }, + { + "epoch": 4.64, + "learning_rate": 2.6818056449388617e-05, + "loss": 0.9322, + "step": 50430 + }, + { + "epoch": 4.64, + "learning_rate": 2.6813459593637953e-05, + "loss": 0.9141, + "step": 50440 + }, + { + "epoch": 4.64, + "learning_rate": 2.6808862737887286e-05, + "loss": 0.8345, + "step": 50450 + }, + { + "epoch": 4.64, + "learning_rate": 2.680426588213662e-05, + "loss": 0.883, + "step": 50460 + }, + { + "epoch": 4.64, + "learning_rate": 2.6799669026385953e-05, + "loss": 0.9256, + "step": 50470 + }, + { + "epoch": 4.64, + "learning_rate": 2.679507217063529e-05, + "loss": 0.8171, + "step": 50480 + }, + { + "epoch": 4.64, + "learning_rate": 2.6790475314884622e-05, + "loss": 0.8259, + "step": 50490 + }, + { + "epoch": 4.64, + "learning_rate": 2.6785878459133955e-05, + "loss": 0.8009, + "step": 50500 + }, + { + "epoch": 4.64, + "learning_rate": 2.6781281603383285e-05, + "loss": 1.0404, + "step": 50510 + }, + { + "epoch": 4.64, + "learning_rate": 2.6776684747632618e-05, + "loss": 0.8992, + "step": 50520 + }, + { + "epoch": 4.65, + "learning_rate": 2.6772087891881955e-05, + "loss": 0.8607, + "step": 50530 + }, + { + "epoch": 4.65, + "learning_rate": 2.6767491036131288e-05, + "loss": 0.8318, + "step": 50540 + }, + { + "epoch": 4.65, + "learning_rate": 2.676289418038062e-05, + "loss": 0.8607, + "step": 50550 + }, + { + "epoch": 4.65, + "learning_rate": 2.6758297324629954e-05, + "loss": 0.8744, + "step": 50560 + }, + { + "epoch": 4.65, + "learning_rate": 2.675370046887929e-05, + "loss": 0.9337, + "step": 50570 + }, + { + "epoch": 4.65, + "learning_rate": 2.6749103613128623e-05, + "loss": 0.826, + "step": 50580 + }, + { + "epoch": 4.65, + "learning_rate": 2.6744506757377956e-05, + "loss": 0.8597, + "step": 50590 + }, + { + "epoch": 4.65, + "learning_rate": 2.6739909901627286e-05, + "loss": 0.9297, + "step": 50600 + }, + { + "epoch": 4.65, + "learning_rate": 2.673531304587662e-05, + "loss": 0.8989, + "step": 50610 + }, + { + "epoch": 4.65, + "learning_rate": 2.6730716190125952e-05, + "loss": 0.9289, + "step": 50620 + }, + { + "epoch": 4.65, + "learning_rate": 2.672611933437529e-05, + "loss": 0.9136, + "step": 50630 + }, + { + "epoch": 4.66, + "learning_rate": 2.6721522478624622e-05, + "loss": 0.8412, + "step": 50640 + }, + { + "epoch": 4.66, + "learning_rate": 2.6716925622873955e-05, + "loss": 0.8362, + "step": 50650 + }, + { + "epoch": 4.66, + "learning_rate": 2.671232876712329e-05, + "loss": 0.961, + "step": 50660 + }, + { + "epoch": 4.66, + "learning_rate": 2.6707731911372625e-05, + "loss": 0.7808, + "step": 50670 + }, + { + "epoch": 4.66, + "learning_rate": 2.6703135055621958e-05, + "loss": 0.7968, + "step": 50680 + }, + { + "epoch": 4.66, + "learning_rate": 2.6698538199871287e-05, + "loss": 0.885, + "step": 50690 + }, + { + "epoch": 4.66, + "learning_rate": 2.669394134412062e-05, + "loss": 0.9704, + "step": 50700 + }, + { + "epoch": 4.66, + "learning_rate": 2.6689344488369954e-05, + "loss": 0.8842, + "step": 50710 + }, + { + "epoch": 4.66, + "learning_rate": 2.668474763261929e-05, + "loss": 0.8323, + "step": 50720 + }, + { + "epoch": 4.66, + "learning_rate": 2.6680150776868623e-05, + "loss": 0.8292, + "step": 50730 + }, + { + "epoch": 4.66, + "learning_rate": 2.6675553921117956e-05, + "loss": 0.9217, + "step": 50740 + }, + { + "epoch": 4.67, + "learning_rate": 2.6670957065367293e-05, + "loss": 0.8719, + "step": 50750 + }, + { + "epoch": 4.67, + "learning_rate": 2.6666360209616626e-05, + "loss": 0.9303, + "step": 50760 + }, + { + "epoch": 4.67, + "learning_rate": 2.666176335386596e-05, + "loss": 0.8212, + "step": 50770 + }, + { + "epoch": 4.67, + "learning_rate": 2.665716649811529e-05, + "loss": 0.8679, + "step": 50780 + }, + { + "epoch": 4.67, + "learning_rate": 2.665256964236462e-05, + "loss": 0.8237, + "step": 50790 + }, + { + "epoch": 4.67, + "learning_rate": 2.6647972786613955e-05, + "loss": 0.8318, + "step": 50800 + }, + { + "epoch": 4.67, + "learning_rate": 2.664337593086329e-05, + "loss": 0.9479, + "step": 50810 + }, + { + "epoch": 4.67, + "learning_rate": 2.6638779075112624e-05, + "loss": 0.9581, + "step": 50820 + }, + { + "epoch": 4.67, + "learning_rate": 2.6634182219361957e-05, + "loss": 0.9461, + "step": 50830 + }, + { + "epoch": 4.67, + "learning_rate": 2.6629585363611294e-05, + "loss": 0.8883, + "step": 50840 + }, + { + "epoch": 4.68, + "learning_rate": 2.6624988507860627e-05, + "loss": 0.9643, + "step": 50850 + }, + { + "epoch": 4.68, + "learning_rate": 2.662039165210996e-05, + "loss": 1.0772, + "step": 50860 + }, + { + "epoch": 4.68, + "learning_rate": 2.661579479635929e-05, + "loss": 0.7682, + "step": 50870 + }, + { + "epoch": 4.68, + "learning_rate": 2.6611197940608623e-05, + "loss": 0.9346, + "step": 50880 + }, + { + "epoch": 4.68, + "learning_rate": 2.6606601084857956e-05, + "loss": 0.9026, + "step": 50890 + }, + { + "epoch": 4.68, + "learning_rate": 2.6602004229107292e-05, + "loss": 0.9374, + "step": 50900 + }, + { + "epoch": 4.68, + "learning_rate": 2.6597407373356625e-05, + "loss": 0.869, + "step": 50910 + }, + { + "epoch": 4.68, + "learning_rate": 2.659281051760596e-05, + "loss": 0.8596, + "step": 50920 + }, + { + "epoch": 4.68, + "learning_rate": 2.6588213661855295e-05, + "loss": 0.8435, + "step": 50930 + }, + { + "epoch": 4.68, + "learning_rate": 2.6583616806104628e-05, + "loss": 0.8886, + "step": 50940 + }, + { + "epoch": 4.68, + "learning_rate": 2.657901995035396e-05, + "loss": 0.9091, + "step": 50950 + }, + { + "epoch": 4.69, + "learning_rate": 2.657442309460329e-05, + "loss": 0.8025, + "step": 50960 + }, + { + "epoch": 4.69, + "learning_rate": 2.6569826238852624e-05, + "loss": 0.9175, + "step": 50970 + }, + { + "epoch": 4.69, + "learning_rate": 2.6565229383101957e-05, + "loss": 0.8856, + "step": 50980 + }, + { + "epoch": 4.69, + "learning_rate": 2.6560632527351294e-05, + "loss": 0.9063, + "step": 50990 + }, + { + "epoch": 4.69, + "learning_rate": 2.6556035671600627e-05, + "loss": 0.8348, + "step": 51000 + }, + { + "epoch": 4.69, + "learning_rate": 2.655143881584996e-05, + "loss": 0.92, + "step": 51010 + }, + { + "epoch": 4.69, + "learning_rate": 2.6546841960099296e-05, + "loss": 0.9289, + "step": 51020 + }, + { + "epoch": 4.69, + "learning_rate": 2.654224510434863e-05, + "loss": 0.8535, + "step": 51030 + }, + { + "epoch": 4.69, + "learning_rate": 2.6537648248597962e-05, + "loss": 0.9998, + "step": 51040 + }, + { + "epoch": 4.69, + "learning_rate": 2.6533051392847292e-05, + "loss": 0.9593, + "step": 51050 + }, + { + "epoch": 4.69, + "learning_rate": 2.6528454537096625e-05, + "loss": 0.8861, + "step": 51060 + }, + { + "epoch": 4.7, + "learning_rate": 2.6523857681345958e-05, + "loss": 0.8441, + "step": 51070 + }, + { + "epoch": 4.7, + "learning_rate": 2.6519260825595295e-05, + "loss": 0.8479, + "step": 51080 + }, + { + "epoch": 4.7, + "learning_rate": 2.6514663969844628e-05, + "loss": 0.824, + "step": 51090 + }, + { + "epoch": 4.7, + "learning_rate": 2.651006711409396e-05, + "loss": 0.8438, + "step": 51100 + }, + { + "epoch": 4.7, + "learning_rate": 2.6505470258343294e-05, + "loss": 0.7748, + "step": 51110 + }, + { + "epoch": 4.7, + "learning_rate": 2.650087340259263e-05, + "loss": 0.7988, + "step": 51120 + }, + { + "epoch": 4.7, + "learning_rate": 2.6496276546841964e-05, + "loss": 0.9921, + "step": 51130 + }, + { + "epoch": 4.7, + "learning_rate": 2.6491679691091293e-05, + "loss": 0.9049, + "step": 51140 + }, + { + "epoch": 4.7, + "learning_rate": 2.6487082835340626e-05, + "loss": 0.9944, + "step": 51150 + }, + { + "epoch": 4.7, + "learning_rate": 2.648248597958996e-05, + "loss": 0.8932, + "step": 51160 + }, + { + "epoch": 4.7, + "learning_rate": 2.6477889123839296e-05, + "loss": 0.7897, + "step": 51170 + }, + { + "epoch": 4.71, + "learning_rate": 2.647329226808863e-05, + "loss": 0.9216, + "step": 51180 + }, + { + "epoch": 4.71, + "learning_rate": 2.6468695412337962e-05, + "loss": 0.9274, + "step": 51190 + }, + { + "epoch": 4.71, + "learning_rate": 2.6464098556587295e-05, + "loss": 0.7645, + "step": 51200 + }, + { + "epoch": 4.71, + "learning_rate": 2.6459501700836632e-05, + "loss": 1.0111, + "step": 51210 + }, + { + "epoch": 4.71, + "learning_rate": 2.6454904845085965e-05, + "loss": 0.8902, + "step": 51220 + }, + { + "epoch": 4.71, + "learning_rate": 2.6450307989335295e-05, + "loss": 0.8836, + "step": 51230 + }, + { + "epoch": 4.71, + "learning_rate": 2.6445711133584628e-05, + "loss": 0.8934, + "step": 51240 + }, + { + "epoch": 4.71, + "learning_rate": 2.644111427783396e-05, + "loss": 0.8772, + "step": 51250 + }, + { + "epoch": 4.71, + "learning_rate": 2.6436517422083297e-05, + "loss": 0.8492, + "step": 51260 + }, + { + "epoch": 4.71, + "learning_rate": 2.643192056633263e-05, + "loss": 0.8671, + "step": 51270 + }, + { + "epoch": 4.71, + "learning_rate": 2.6427323710581963e-05, + "loss": 0.9104, + "step": 51280 + }, + { + "epoch": 4.72, + "learning_rate": 2.6422726854831296e-05, + "loss": 1.0197, + "step": 51290 + }, + { + "epoch": 4.72, + "learning_rate": 2.6418129999080633e-05, + "loss": 0.7811, + "step": 51300 + }, + { + "epoch": 4.72, + "learning_rate": 2.6413533143329966e-05, + "loss": 0.8248, + "step": 51310 + }, + { + "epoch": 4.72, + "learning_rate": 2.6408936287579296e-05, + "loss": 0.8957, + "step": 51320 + }, + { + "epoch": 4.72, + "learning_rate": 2.640433943182863e-05, + "loss": 0.9322, + "step": 51330 + }, + { + "epoch": 4.72, + "learning_rate": 2.6399742576077962e-05, + "loss": 0.9086, + "step": 51340 + }, + { + "epoch": 4.72, + "learning_rate": 2.63951457203273e-05, + "loss": 1.0306, + "step": 51350 + }, + { + "epoch": 4.72, + "learning_rate": 2.639054886457663e-05, + "loss": 0.9506, + "step": 51360 + }, + { + "epoch": 4.72, + "learning_rate": 2.6385952008825965e-05, + "loss": 0.9135, + "step": 51370 + }, + { + "epoch": 4.72, + "learning_rate": 2.6381355153075298e-05, + "loss": 1.023, + "step": 51380 + }, + { + "epoch": 4.72, + "learning_rate": 2.6376758297324634e-05, + "loss": 1.0377, + "step": 51390 + }, + { + "epoch": 4.73, + "learning_rate": 2.6372161441573967e-05, + "loss": 0.9436, + "step": 51400 + }, + { + "epoch": 4.73, + "learning_rate": 2.6367564585823297e-05, + "loss": 0.919, + "step": 51410 + }, + { + "epoch": 4.73, + "learning_rate": 2.636296773007263e-05, + "loss": 0.9182, + "step": 51420 + }, + { + "epoch": 4.73, + "learning_rate": 2.6358370874321963e-05, + "loss": 0.8771, + "step": 51430 + }, + { + "epoch": 4.73, + "learning_rate": 2.6353774018571296e-05, + "loss": 0.9078, + "step": 51440 + }, + { + "epoch": 4.73, + "learning_rate": 2.6349177162820633e-05, + "loss": 0.8106, + "step": 51450 + }, + { + "epoch": 4.73, + "learning_rate": 2.6344580307069966e-05, + "loss": 0.8999, + "step": 51460 + }, + { + "epoch": 4.73, + "learning_rate": 2.63399834513193e-05, + "loss": 0.8866, + "step": 51470 + }, + { + "epoch": 4.73, + "learning_rate": 2.6335386595568635e-05, + "loss": 0.781, + "step": 51480 + }, + { + "epoch": 4.73, + "learning_rate": 2.633078973981797e-05, + "loss": 0.7774, + "step": 51490 + }, + { + "epoch": 4.73, + "learning_rate": 2.6326192884067298e-05, + "loss": 0.889, + "step": 51500 + }, + { + "epoch": 4.74, + "learning_rate": 2.632159602831663e-05, + "loss": 0.9658, + "step": 51510 + }, + { + "epoch": 4.74, + "learning_rate": 2.6316999172565964e-05, + "loss": 0.8539, + "step": 51520 + }, + { + "epoch": 4.74, + "learning_rate": 2.6312402316815297e-05, + "loss": 0.863, + "step": 51530 + }, + { + "epoch": 4.74, + "learning_rate": 2.6307805461064634e-05, + "loss": 0.7877, + "step": 51540 + }, + { + "epoch": 4.74, + "learning_rate": 2.6303208605313967e-05, + "loss": 0.982, + "step": 51550 + }, + { + "epoch": 4.74, + "learning_rate": 2.62986117495633e-05, + "loss": 0.9533, + "step": 51560 + }, + { + "epoch": 4.74, + "learning_rate": 2.6294014893812637e-05, + "loss": 0.9807, + "step": 51570 + }, + { + "epoch": 4.74, + "learning_rate": 2.628941803806197e-05, + "loss": 0.8277, + "step": 51580 + }, + { + "epoch": 4.74, + "learning_rate": 2.62848211823113e-05, + "loss": 0.9131, + "step": 51590 + }, + { + "epoch": 4.74, + "learning_rate": 2.6280224326560632e-05, + "loss": 0.9062, + "step": 51600 + }, + { + "epoch": 4.74, + "learning_rate": 2.6275627470809965e-05, + "loss": 0.9347, + "step": 51610 + }, + { + "epoch": 4.75, + "learning_rate": 2.62710306150593e-05, + "loss": 0.9438, + "step": 51620 + }, + { + "epoch": 4.75, + "learning_rate": 2.6266433759308635e-05, + "loss": 0.7752, + "step": 51630 + }, + { + "epoch": 4.75, + "learning_rate": 2.6261836903557968e-05, + "loss": 0.813, + "step": 51640 + }, + { + "epoch": 4.75, + "learning_rate": 2.62572400478073e-05, + "loss": 0.935, + "step": 51650 + }, + { + "epoch": 4.75, + "learning_rate": 2.6252643192056638e-05, + "loss": 0.943, + "step": 51660 + }, + { + "epoch": 4.75, + "learning_rate": 2.624804633630597e-05, + "loss": 0.9554, + "step": 51670 + }, + { + "epoch": 4.75, + "learning_rate": 2.62434494805553e-05, + "loss": 0.8154, + "step": 51680 + }, + { + "epoch": 4.75, + "learning_rate": 2.6238852624804634e-05, + "loss": 0.7389, + "step": 51690 + }, + { + "epoch": 4.75, + "learning_rate": 2.6234255769053967e-05, + "loss": 0.8479, + "step": 51700 + }, + { + "epoch": 4.75, + "learning_rate": 2.62296589133033e-05, + "loss": 0.8776, + "step": 51710 + }, + { + "epoch": 4.75, + "learning_rate": 2.6225062057552636e-05, + "loss": 0.9495, + "step": 51720 + }, + { + "epoch": 4.76, + "learning_rate": 2.622046520180197e-05, + "loss": 0.8969, + "step": 51730 + }, + { + "epoch": 4.76, + "learning_rate": 2.6215868346051302e-05, + "loss": 0.9842, + "step": 51740 + }, + { + "epoch": 4.76, + "learning_rate": 2.621127149030064e-05, + "loss": 0.8632, + "step": 51750 + }, + { + "epoch": 4.76, + "learning_rate": 2.6206674634549972e-05, + "loss": 0.8384, + "step": 51760 + }, + { + "epoch": 4.76, + "learning_rate": 2.6202077778799298e-05, + "loss": 0.8757, + "step": 51770 + }, + { + "epoch": 4.76, + "learning_rate": 2.6197480923048635e-05, + "loss": 0.7867, + "step": 51780 + }, + { + "epoch": 4.76, + "learning_rate": 2.6192884067297968e-05, + "loss": 1.0321, + "step": 51790 + }, + { + "epoch": 4.76, + "learning_rate": 2.61882872115473e-05, + "loss": 0.8448, + "step": 51800 + }, + { + "epoch": 4.76, + "learning_rate": 2.6183690355796637e-05, + "loss": 0.9332, + "step": 51810 + }, + { + "epoch": 4.76, + "learning_rate": 2.617909350004597e-05, + "loss": 0.9388, + "step": 51820 + }, + { + "epoch": 4.77, + "learning_rate": 2.6174496644295304e-05, + "loss": 0.9202, + "step": 51830 + }, + { + "epoch": 4.77, + "learning_rate": 2.616989978854464e-05, + "loss": 0.8806, + "step": 51840 + }, + { + "epoch": 4.77, + "learning_rate": 2.6165302932793973e-05, + "loss": 0.8246, + "step": 51850 + }, + { + "epoch": 4.77, + "learning_rate": 2.61607060770433e-05, + "loss": 0.943, + "step": 51860 + }, + { + "epoch": 4.77, + "learning_rate": 2.6156109221292636e-05, + "loss": 0.9131, + "step": 51870 + }, + { + "epoch": 4.77, + "learning_rate": 2.615151236554197e-05, + "loss": 0.8579, + "step": 51880 + }, + { + "epoch": 4.77, + "learning_rate": 2.6146915509791302e-05, + "loss": 0.9344, + "step": 51890 + }, + { + "epoch": 4.77, + "learning_rate": 2.614231865404064e-05, + "loss": 0.9077, + "step": 51900 + }, + { + "epoch": 4.77, + "learning_rate": 2.6137721798289972e-05, + "loss": 0.7841, + "step": 51910 + }, + { + "epoch": 4.77, + "learning_rate": 2.6133124942539305e-05, + "loss": 0.7951, + "step": 51920 + }, + { + "epoch": 4.77, + "learning_rate": 2.6128528086788638e-05, + "loss": 0.8049, + "step": 51930 + }, + { + "epoch": 4.78, + "learning_rate": 2.6123931231037974e-05, + "loss": 0.873, + "step": 51940 + }, + { + "epoch": 4.78, + "learning_rate": 2.61193343752873e-05, + "loss": 0.9173, + "step": 51950 + }, + { + "epoch": 4.78, + "learning_rate": 2.6114737519536637e-05, + "loss": 0.8607, + "step": 51960 + }, + { + "epoch": 4.78, + "learning_rate": 2.611014066378597e-05, + "loss": 0.8662, + "step": 51970 + }, + { + "epoch": 4.78, + "learning_rate": 2.6105543808035303e-05, + "loss": 0.9766, + "step": 51980 + }, + { + "epoch": 4.78, + "learning_rate": 2.610094695228464e-05, + "loss": 0.8034, + "step": 51990 + }, + { + "epoch": 4.78, + "learning_rate": 2.6096350096533973e-05, + "loss": 0.8759, + "step": 52000 + }, + { + "epoch": 4.78, + "learning_rate": 2.6091753240783306e-05, + "loss": 0.8653, + "step": 52010 + }, + { + "epoch": 4.78, + "learning_rate": 2.608715638503264e-05, + "loss": 0.8343, + "step": 52020 + }, + { + "epoch": 4.78, + "learning_rate": 2.6082559529281976e-05, + "loss": 0.8049, + "step": 52030 + }, + { + "epoch": 4.78, + "learning_rate": 2.6077962673531302e-05, + "loss": 0.9001, + "step": 52040 + }, + { + "epoch": 4.79, + "learning_rate": 2.607336581778064e-05, + "loss": 0.866, + "step": 52050 + }, + { + "epoch": 4.79, + "learning_rate": 2.606876896202997e-05, + "loss": 1.0011, + "step": 52060 + }, + { + "epoch": 4.79, + "learning_rate": 2.6064172106279305e-05, + "loss": 1.007, + "step": 52070 + }, + { + "epoch": 4.79, + "learning_rate": 2.605957525052864e-05, + "loss": 0.8373, + "step": 52080 + }, + { + "epoch": 4.79, + "learning_rate": 2.6054978394777974e-05, + "loss": 0.7447, + "step": 52090 + }, + { + "epoch": 4.79, + "learning_rate": 2.6050381539027307e-05, + "loss": 0.8904, + "step": 52100 + }, + { + "epoch": 4.79, + "learning_rate": 2.604578468327664e-05, + "loss": 0.8515, + "step": 52110 + }, + { + "epoch": 4.79, + "learning_rate": 2.6041187827525977e-05, + "loss": 0.8491, + "step": 52120 + }, + { + "epoch": 4.79, + "learning_rate": 2.6036590971775303e-05, + "loss": 0.7905, + "step": 52130 + }, + { + "epoch": 4.79, + "learning_rate": 2.603199411602464e-05, + "loss": 0.886, + "step": 52140 + }, + { + "epoch": 4.79, + "learning_rate": 2.6027397260273973e-05, + "loss": 0.9144, + "step": 52150 + }, + { + "epoch": 4.8, + "learning_rate": 2.6022800404523306e-05, + "loss": 0.7743, + "step": 52160 + }, + { + "epoch": 4.8, + "learning_rate": 2.6018203548772642e-05, + "loss": 0.9028, + "step": 52170 + }, + { + "epoch": 4.8, + "learning_rate": 2.6013606693021975e-05, + "loss": 0.9628, + "step": 52180 + }, + { + "epoch": 4.8, + "learning_rate": 2.600900983727131e-05, + "loss": 0.8116, + "step": 52190 + }, + { + "epoch": 4.8, + "learning_rate": 2.600441298152064e-05, + "loss": 0.8684, + "step": 52200 + }, + { + "epoch": 4.8, + "learning_rate": 2.5999816125769978e-05, + "loss": 0.8152, + "step": 52210 + }, + { + "epoch": 4.8, + "learning_rate": 2.5995219270019304e-05, + "loss": 0.8058, + "step": 52220 + }, + { + "epoch": 4.8, + "learning_rate": 2.599062241426864e-05, + "loss": 0.897, + "step": 52230 + }, + { + "epoch": 4.8, + "learning_rate": 2.5986025558517974e-05, + "loss": 0.929, + "step": 52240 + }, + { + "epoch": 4.8, + "learning_rate": 2.5981428702767307e-05, + "loss": 1.0103, + "step": 52250 + }, + { + "epoch": 4.8, + "learning_rate": 2.597683184701664e-05, + "loss": 0.9344, + "step": 52260 + }, + { + "epoch": 4.81, + "learning_rate": 2.5972234991265976e-05, + "loss": 0.8249, + "step": 52270 + }, + { + "epoch": 4.81, + "learning_rate": 2.596763813551531e-05, + "loss": 0.8546, + "step": 52280 + }, + { + "epoch": 4.81, + "learning_rate": 2.5963041279764643e-05, + "loss": 0.7462, + "step": 52290 + }, + { + "epoch": 4.81, + "learning_rate": 2.595844442401398e-05, + "loss": 0.9635, + "step": 52300 + }, + { + "epoch": 4.81, + "learning_rate": 2.5953847568263305e-05, + "loss": 0.8262, + "step": 52310 + }, + { + "epoch": 4.81, + "learning_rate": 2.5949250712512642e-05, + "loss": 0.8048, + "step": 52320 + }, + { + "epoch": 4.81, + "learning_rate": 2.5944653856761975e-05, + "loss": 1.0075, + "step": 52330 + }, + { + "epoch": 4.81, + "learning_rate": 2.5940057001011308e-05, + "loss": 0.9017, + "step": 52340 + }, + { + "epoch": 4.81, + "learning_rate": 2.593546014526064e-05, + "loss": 1.0119, + "step": 52350 + }, + { + "epoch": 4.81, + "learning_rate": 2.5930863289509978e-05, + "loss": 0.7725, + "step": 52360 + }, + { + "epoch": 4.81, + "learning_rate": 2.592626643375931e-05, + "loss": 0.8419, + "step": 52370 + }, + { + "epoch": 4.82, + "learning_rate": 2.5921669578008644e-05, + "loss": 0.7826, + "step": 52380 + }, + { + "epoch": 4.82, + "learning_rate": 2.591707272225798e-05, + "loss": 0.8574, + "step": 52390 + }, + { + "epoch": 4.82, + "learning_rate": 2.5912475866507307e-05, + "loss": 0.8753, + "step": 52400 + }, + { + "epoch": 4.82, + "learning_rate": 2.5907879010756643e-05, + "loss": 0.9236, + "step": 52410 + }, + { + "epoch": 4.82, + "learning_rate": 2.5903282155005976e-05, + "loss": 0.9128, + "step": 52420 + }, + { + "epoch": 4.82, + "learning_rate": 2.589868529925531e-05, + "loss": 0.8835, + "step": 52430 + }, + { + "epoch": 4.82, + "learning_rate": 2.5894088443504642e-05, + "loss": 0.8457, + "step": 52440 + }, + { + "epoch": 4.82, + "learning_rate": 2.588949158775398e-05, + "loss": 0.9376, + "step": 52450 + }, + { + "epoch": 4.82, + "learning_rate": 2.5884894732003312e-05, + "loss": 0.8774, + "step": 52460 + }, + { + "epoch": 4.82, + "learning_rate": 2.5880297876252645e-05, + "loss": 0.8814, + "step": 52470 + }, + { + "epoch": 4.82, + "learning_rate": 2.587570102050198e-05, + "loss": 0.9285, + "step": 52480 + }, + { + "epoch": 4.83, + "learning_rate": 2.5871104164751308e-05, + "loss": 0.9096, + "step": 52490 + }, + { + "epoch": 4.83, + "learning_rate": 2.5866507309000644e-05, + "loss": 0.9448, + "step": 52500 + }, + { + "epoch": 4.83, + "learning_rate": 2.5861910453249977e-05, + "loss": 0.8288, + "step": 52510 + }, + { + "epoch": 4.83, + "learning_rate": 2.585731359749931e-05, + "loss": 0.8107, + "step": 52520 + }, + { + "epoch": 4.83, + "learning_rate": 2.5852716741748644e-05, + "loss": 0.8019, + "step": 52530 + }, + { + "epoch": 4.83, + "learning_rate": 2.584811988599798e-05, + "loss": 0.8552, + "step": 52540 + }, + { + "epoch": 4.83, + "learning_rate": 2.5843523030247313e-05, + "loss": 0.8744, + "step": 52550 + }, + { + "epoch": 4.83, + "learning_rate": 2.5838926174496646e-05, + "loss": 1.0223, + "step": 52560 + }, + { + "epoch": 4.83, + "learning_rate": 2.5834329318745983e-05, + "loss": 0.8923, + "step": 52570 + }, + { + "epoch": 4.83, + "learning_rate": 2.582973246299531e-05, + "loss": 0.8611, + "step": 52580 + }, + { + "epoch": 4.83, + "learning_rate": 2.5825135607244642e-05, + "loss": 0.8632, + "step": 52590 + }, + { + "epoch": 4.84, + "learning_rate": 2.582053875149398e-05, + "loss": 1.0215, + "step": 52600 + }, + { + "epoch": 4.84, + "learning_rate": 2.5815941895743312e-05, + "loss": 0.7743, + "step": 52610 + }, + { + "epoch": 4.84, + "learning_rate": 2.5811345039992645e-05, + "loss": 0.8406, + "step": 52620 + }, + { + "epoch": 4.84, + "learning_rate": 2.580674818424198e-05, + "loss": 0.8455, + "step": 52630 + }, + { + "epoch": 4.84, + "learning_rate": 2.5802151328491314e-05, + "loss": 0.8138, + "step": 52640 + }, + { + "epoch": 4.84, + "learning_rate": 2.5797554472740647e-05, + "loss": 0.8717, + "step": 52650 + }, + { + "epoch": 4.84, + "learning_rate": 2.5792957616989984e-05, + "loss": 0.8454, + "step": 52660 + }, + { + "epoch": 4.84, + "learning_rate": 2.578836076123931e-05, + "loss": 0.8429, + "step": 52670 + }, + { + "epoch": 4.84, + "learning_rate": 2.5783763905488643e-05, + "loss": 0.8464, + "step": 52680 + }, + { + "epoch": 4.84, + "learning_rate": 2.577916704973798e-05, + "loss": 0.8701, + "step": 52690 + }, + { + "epoch": 4.85, + "learning_rate": 2.5774570193987313e-05, + "loss": 0.8752, + "step": 52700 + }, + { + "epoch": 4.85, + "learning_rate": 2.5769973338236646e-05, + "loss": 0.8493, + "step": 52710 + }, + { + "epoch": 4.85, + "learning_rate": 2.5765376482485982e-05, + "loss": 0.937, + "step": 52720 + }, + { + "epoch": 4.85, + "learning_rate": 2.5760779626735316e-05, + "loss": 0.8947, + "step": 52730 + }, + { + "epoch": 4.85, + "learning_rate": 2.575618277098465e-05, + "loss": 0.8661, + "step": 52740 + }, + { + "epoch": 4.85, + "learning_rate": 2.5751585915233982e-05, + "loss": 0.8669, + "step": 52750 + }, + { + "epoch": 4.85, + "learning_rate": 2.5746989059483318e-05, + "loss": 0.8884, + "step": 52760 + }, + { + "epoch": 4.85, + "learning_rate": 2.5742392203732645e-05, + "loss": 0.8584, + "step": 52770 + }, + { + "epoch": 4.85, + "learning_rate": 2.573779534798198e-05, + "loss": 0.7858, + "step": 52780 + }, + { + "epoch": 4.85, + "learning_rate": 2.5733198492231314e-05, + "loss": 0.9176, + "step": 52790 + }, + { + "epoch": 4.85, + "learning_rate": 2.5728601636480647e-05, + "loss": 0.8127, + "step": 52800 + }, + { + "epoch": 4.86, + "learning_rate": 2.5724004780729984e-05, + "loss": 0.8937, + "step": 52810 + }, + { + "epoch": 4.86, + "learning_rate": 2.5719407924979317e-05, + "loss": 0.7939, + "step": 52820 + }, + { + "epoch": 4.86, + "learning_rate": 2.571481106922865e-05, + "loss": 0.8024, + "step": 52830 + }, + { + "epoch": 4.86, + "learning_rate": 2.5710214213477983e-05, + "loss": 0.8961, + "step": 52840 + }, + { + "epoch": 4.86, + "learning_rate": 2.570561735772732e-05, + "loss": 0.9322, + "step": 52850 + }, + { + "epoch": 4.86, + "learning_rate": 2.5701020501976646e-05, + "loss": 0.8168, + "step": 52860 + }, + { + "epoch": 4.86, + "learning_rate": 2.5696423646225982e-05, + "loss": 0.841, + "step": 52870 + }, + { + "epoch": 4.86, + "learning_rate": 2.5691826790475315e-05, + "loss": 0.8792, + "step": 52880 + }, + { + "epoch": 4.86, + "learning_rate": 2.568722993472465e-05, + "loss": 0.8607, + "step": 52890 + }, + { + "epoch": 4.86, + "learning_rate": 2.5682633078973985e-05, + "loss": 0.9123, + "step": 52900 + }, + { + "epoch": 4.86, + "learning_rate": 2.5678036223223318e-05, + "loss": 0.9976, + "step": 52910 + }, + { + "epoch": 4.87, + "learning_rate": 2.567343936747265e-05, + "loss": 0.8538, + "step": 52920 + }, + { + "epoch": 4.87, + "learning_rate": 2.5668842511721984e-05, + "loss": 0.8046, + "step": 52930 + }, + { + "epoch": 4.87, + "learning_rate": 2.566424565597132e-05, + "loss": 0.9453, + "step": 52940 + }, + { + "epoch": 4.87, + "learning_rate": 2.5659648800220647e-05, + "loss": 0.9267, + "step": 52950 + }, + { + "epoch": 4.87, + "learning_rate": 2.5655051944469983e-05, + "loss": 0.9479, + "step": 52960 + }, + { + "epoch": 4.87, + "learning_rate": 2.5650455088719316e-05, + "loss": 0.9615, + "step": 52970 + }, + { + "epoch": 4.87, + "learning_rate": 2.564585823296865e-05, + "loss": 0.8079, + "step": 52980 + }, + { + "epoch": 4.87, + "learning_rate": 2.5641261377217986e-05, + "loss": 0.8712, + "step": 52990 + }, + { + "epoch": 4.87, + "learning_rate": 2.563666452146732e-05, + "loss": 0.8296, + "step": 53000 + }, + { + "epoch": 4.87, + "learning_rate": 2.5632067665716652e-05, + "loss": 0.8692, + "step": 53010 + }, + { + "epoch": 4.87, + "learning_rate": 2.5627470809965985e-05, + "loss": 0.8878, + "step": 53020 + }, + { + "epoch": 4.88, + "learning_rate": 2.5622873954215322e-05, + "loss": 0.7844, + "step": 53030 + }, + { + "epoch": 4.88, + "learning_rate": 2.5618277098464648e-05, + "loss": 0.8335, + "step": 53040 + }, + { + "epoch": 4.88, + "learning_rate": 2.5613680242713985e-05, + "loss": 0.9055, + "step": 53050 + }, + { + "epoch": 4.88, + "learning_rate": 2.5609083386963318e-05, + "loss": 0.885, + "step": 53060 + }, + { + "epoch": 4.88, + "learning_rate": 2.560448653121265e-05, + "loss": 0.8754, + "step": 53070 + }, + { + "epoch": 4.88, + "learning_rate": 2.5599889675461984e-05, + "loss": 0.8067, + "step": 53080 + }, + { + "epoch": 4.88, + "learning_rate": 2.559529281971132e-05, + "loss": 0.8867, + "step": 53090 + }, + { + "epoch": 4.88, + "learning_rate": 2.5590695963960653e-05, + "loss": 0.841, + "step": 53100 + }, + { + "epoch": 4.88, + "learning_rate": 2.5586099108209987e-05, + "loss": 0.9648, + "step": 53110 + }, + { + "epoch": 4.88, + "learning_rate": 2.5581502252459323e-05, + "loss": 0.938, + "step": 53120 + }, + { + "epoch": 4.88, + "learning_rate": 2.557690539670865e-05, + "loss": 0.8538, + "step": 53130 + }, + { + "epoch": 4.89, + "learning_rate": 2.5572308540957986e-05, + "loss": 0.9718, + "step": 53140 + }, + { + "epoch": 4.89, + "learning_rate": 2.556771168520732e-05, + "loss": 0.9588, + "step": 53150 + }, + { + "epoch": 4.89, + "learning_rate": 2.5563114829456652e-05, + "loss": 0.7171, + "step": 53160 + }, + { + "epoch": 4.89, + "learning_rate": 2.5558517973705985e-05, + "loss": 0.8828, + "step": 53170 + }, + { + "epoch": 4.89, + "learning_rate": 2.555392111795532e-05, + "loss": 0.8831, + "step": 53180 + }, + { + "epoch": 4.89, + "learning_rate": 2.5549324262204655e-05, + "loss": 0.8076, + "step": 53190 + }, + { + "epoch": 4.89, + "learning_rate": 2.5544727406453988e-05, + "loss": 0.666, + "step": 53200 + }, + { + "epoch": 4.89, + "learning_rate": 2.5540130550703324e-05, + "loss": 0.9843, + "step": 53210 + }, + { + "epoch": 4.89, + "learning_rate": 2.553553369495265e-05, + "loss": 0.8388, + "step": 53220 + }, + { + "epoch": 4.89, + "learning_rate": 2.5530936839201987e-05, + "loss": 0.7749, + "step": 53230 + }, + { + "epoch": 4.89, + "learning_rate": 2.552633998345132e-05, + "loss": 0.7887, + "step": 53240 + }, + { + "epoch": 4.9, + "learning_rate": 2.5521743127700653e-05, + "loss": 0.7947, + "step": 53250 + }, + { + "epoch": 4.9, + "learning_rate": 2.5517146271949986e-05, + "loss": 0.6509, + "step": 53260 + }, + { + "epoch": 4.9, + "learning_rate": 2.5512549416199323e-05, + "loss": 0.9097, + "step": 53270 + }, + { + "epoch": 4.9, + "learning_rate": 2.5507952560448656e-05, + "loss": 0.9015, + "step": 53280 + }, + { + "epoch": 4.9, + "learning_rate": 2.550335570469799e-05, + "loss": 0.8597, + "step": 53290 + }, + { + "epoch": 4.9, + "learning_rate": 2.5498758848947325e-05, + "loss": 0.9584, + "step": 53300 + }, + { + "epoch": 4.9, + "learning_rate": 2.5494161993196652e-05, + "loss": 0.8125, + "step": 53310 + }, + { + "epoch": 4.9, + "learning_rate": 2.5489565137445988e-05, + "loss": 0.9039, + "step": 53320 + }, + { + "epoch": 4.9, + "learning_rate": 2.548496828169532e-05, + "loss": 0.8682, + "step": 53330 + }, + { + "epoch": 4.9, + "learning_rate": 2.5480371425944654e-05, + "loss": 0.9619, + "step": 53340 + }, + { + "epoch": 4.9, + "learning_rate": 2.5475774570193987e-05, + "loss": 0.8805, + "step": 53350 + }, + { + "epoch": 4.91, + "learning_rate": 2.5471177714443324e-05, + "loss": 0.8385, + "step": 53360 + }, + { + "epoch": 4.91, + "learning_rate": 2.5466580858692657e-05, + "loss": 0.8486, + "step": 53370 + }, + { + "epoch": 4.91, + "learning_rate": 2.546198400294199e-05, + "loss": 0.883, + "step": 53380 + }, + { + "epoch": 4.91, + "learning_rate": 2.5457387147191327e-05, + "loss": 0.9559, + "step": 53390 + }, + { + "epoch": 4.91, + "learning_rate": 2.5452790291440653e-05, + "loss": 0.9408, + "step": 53400 + }, + { + "epoch": 4.91, + "learning_rate": 2.5448193435689986e-05, + "loss": 0.974, + "step": 53410 + }, + { + "epoch": 4.91, + "learning_rate": 2.5443596579939322e-05, + "loss": 0.8191, + "step": 53420 + }, + { + "epoch": 4.91, + "learning_rate": 2.5438999724188656e-05, + "loss": 0.918, + "step": 53430 + }, + { + "epoch": 4.91, + "learning_rate": 2.543440286843799e-05, + "loss": 0.8794, + "step": 53440 + }, + { + "epoch": 4.91, + "learning_rate": 2.5429806012687325e-05, + "loss": 0.8769, + "step": 53450 + }, + { + "epoch": 4.91, + "learning_rate": 2.5425209156936658e-05, + "loss": 1.0246, + "step": 53460 + }, + { + "epoch": 4.92, + "learning_rate": 2.542061230118599e-05, + "loss": 0.7889, + "step": 53470 + }, + { + "epoch": 4.92, + "learning_rate": 2.5416015445435328e-05, + "loss": 0.845, + "step": 53480 + }, + { + "epoch": 4.92, + "learning_rate": 2.5411418589684654e-05, + "loss": 0.7512, + "step": 53490 + }, + { + "epoch": 4.92, + "learning_rate": 2.5406821733933987e-05, + "loss": 1.1174, + "step": 53500 + }, + { + "epoch": 4.92, + "learning_rate": 2.5402224878183324e-05, + "loss": 0.7857, + "step": 53510 + }, + { + "epoch": 4.92, + "learning_rate": 2.5397628022432657e-05, + "loss": 0.832, + "step": 53520 + }, + { + "epoch": 4.92, + "learning_rate": 2.539303116668199e-05, + "loss": 0.8143, + "step": 53530 + }, + { + "epoch": 4.92, + "learning_rate": 2.5388434310931326e-05, + "loss": 0.8601, + "step": 53540 + }, + { + "epoch": 4.92, + "learning_rate": 2.538383745518066e-05, + "loss": 0.9089, + "step": 53550 + }, + { + "epoch": 4.92, + "learning_rate": 2.5379240599429992e-05, + "loss": 0.9162, + "step": 53560 + }, + { + "epoch": 4.93, + "learning_rate": 2.5374643743679326e-05, + "loss": 0.8816, + "step": 53570 + }, + { + "epoch": 4.93, + "learning_rate": 2.5370046887928655e-05, + "loss": 0.7697, + "step": 53580 + }, + { + "epoch": 4.93, + "learning_rate": 2.536545003217799e-05, + "loss": 0.9161, + "step": 53590 + }, + { + "epoch": 4.93, + "learning_rate": 2.5360853176427325e-05, + "loss": 0.7837, + "step": 53600 + }, + { + "epoch": 4.93, + "learning_rate": 2.5356256320676658e-05, + "loss": 0.7899, + "step": 53610 + }, + { + "epoch": 4.93, + "learning_rate": 2.535165946492599e-05, + "loss": 0.9379, + "step": 53620 + }, + { + "epoch": 4.93, + "learning_rate": 2.5347062609175328e-05, + "loss": 0.934, + "step": 53630 + }, + { + "epoch": 4.93, + "learning_rate": 2.534246575342466e-05, + "loss": 0.8444, + "step": 53640 + }, + { + "epoch": 4.93, + "learning_rate": 2.5337868897673994e-05, + "loss": 0.8825, + "step": 53650 + }, + { + "epoch": 4.93, + "learning_rate": 2.5333272041923327e-05, + "loss": 0.8952, + "step": 53660 + }, + { + "epoch": 4.93, + "learning_rate": 2.5328675186172656e-05, + "loss": 0.8694, + "step": 53670 + }, + { + "epoch": 4.94, + "learning_rate": 2.532407833042199e-05, + "loss": 0.872, + "step": 53680 + }, + { + "epoch": 4.94, + "learning_rate": 2.5319481474671326e-05, + "loss": 0.7738, + "step": 53690 + }, + { + "epoch": 4.94, + "learning_rate": 2.531488461892066e-05, + "loss": 0.8711, + "step": 53700 + }, + { + "epoch": 4.94, + "learning_rate": 2.5310287763169992e-05, + "loss": 0.8984, + "step": 53710 + }, + { + "epoch": 4.94, + "learning_rate": 2.530569090741933e-05, + "loss": 0.8901, + "step": 53720 + }, + { + "epoch": 4.94, + "learning_rate": 2.5301094051668662e-05, + "loss": 0.8974, + "step": 53730 + }, + { + "epoch": 4.94, + "learning_rate": 2.5296497195917995e-05, + "loss": 0.7442, + "step": 53740 + }, + { + "epoch": 4.94, + "learning_rate": 2.5291900340167328e-05, + "loss": 0.915, + "step": 53750 + }, + { + "epoch": 4.94, + "learning_rate": 2.5287303484416658e-05, + "loss": 0.7707, + "step": 53760 + }, + { + "epoch": 4.94, + "learning_rate": 2.528270662866599e-05, + "loss": 0.8067, + "step": 53770 + }, + { + "epoch": 4.94, + "learning_rate": 2.5278109772915327e-05, + "loss": 0.8651, + "step": 53780 + }, + { + "epoch": 4.95, + "learning_rate": 2.527351291716466e-05, + "loss": 0.7279, + "step": 53790 + }, + { + "epoch": 4.95, + "learning_rate": 2.5268916061413993e-05, + "loss": 0.9325, + "step": 53800 + }, + { + "epoch": 4.95, + "learning_rate": 2.526431920566333e-05, + "loss": 0.8847, + "step": 53810 + }, + { + "epoch": 4.95, + "learning_rate": 2.5259722349912663e-05, + "loss": 0.9364, + "step": 53820 + }, + { + "epoch": 4.95, + "learning_rate": 2.5255125494161996e-05, + "loss": 0.8919, + "step": 53830 + }, + { + "epoch": 4.95, + "learning_rate": 2.525052863841133e-05, + "loss": 0.976, + "step": 53840 + }, + { + "epoch": 4.95, + "learning_rate": 2.524593178266066e-05, + "loss": 0.8656, + "step": 53850 + }, + { + "epoch": 4.95, + "learning_rate": 2.5241334926909992e-05, + "loss": 0.8761, + "step": 53860 + }, + { + "epoch": 4.95, + "learning_rate": 2.523673807115933e-05, + "loss": 0.8918, + "step": 53870 + }, + { + "epoch": 4.95, + "learning_rate": 2.523214121540866e-05, + "loss": 0.8536, + "step": 53880 + }, + { + "epoch": 4.95, + "learning_rate": 2.5227544359657995e-05, + "loss": 0.8764, + "step": 53890 + }, + { + "epoch": 4.96, + "learning_rate": 2.5222947503907328e-05, + "loss": 0.9813, + "step": 53900 + }, + { + "epoch": 4.96, + "learning_rate": 2.5218350648156664e-05, + "loss": 0.7394, + "step": 53910 + }, + { + "epoch": 4.96, + "learning_rate": 2.5213753792405997e-05, + "loss": 0.8418, + "step": 53920 + }, + { + "epoch": 4.96, + "learning_rate": 2.520915693665533e-05, + "loss": 0.8528, + "step": 53930 + }, + { + "epoch": 4.96, + "learning_rate": 2.520456008090466e-05, + "loss": 0.8297, + "step": 53940 + }, + { + "epoch": 4.96, + "learning_rate": 2.5199963225153993e-05, + "loss": 0.951, + "step": 53950 + }, + { + "epoch": 4.96, + "learning_rate": 2.519536636940333e-05, + "loss": 0.8858, + "step": 53960 + }, + { + "epoch": 4.96, + "learning_rate": 2.5190769513652663e-05, + "loss": 0.8764, + "step": 53970 + }, + { + "epoch": 4.96, + "learning_rate": 2.5186172657901996e-05, + "loss": 0.8233, + "step": 53980 + }, + { + "epoch": 4.96, + "learning_rate": 2.518157580215133e-05, + "loss": 0.8742, + "step": 53990 + }, + { + "epoch": 4.96, + "learning_rate": 2.5176978946400665e-05, + "loss": 0.9035, + "step": 54000 + }, + { + "epoch": 4.97, + "learning_rate": 2.517238209065e-05, + "loss": 0.8846, + "step": 54010 + }, + { + "epoch": 4.97, + "learning_rate": 2.516778523489933e-05, + "loss": 0.9921, + "step": 54020 + }, + { + "epoch": 4.97, + "learning_rate": 2.516318837914866e-05, + "loss": 0.8176, + "step": 54030 + }, + { + "epoch": 4.97, + "learning_rate": 2.5158591523397994e-05, + "loss": 0.9272, + "step": 54040 + }, + { + "epoch": 4.97, + "learning_rate": 2.515399466764733e-05, + "loss": 0.9713, + "step": 54050 + }, + { + "epoch": 4.97, + "learning_rate": 2.5149397811896664e-05, + "loss": 0.8964, + "step": 54060 + }, + { + "epoch": 4.97, + "learning_rate": 2.5144800956145997e-05, + "loss": 0.9541, + "step": 54070 + }, + { + "epoch": 4.97, + "learning_rate": 2.514020410039533e-05, + "loss": 0.8445, + "step": 54080 + }, + { + "epoch": 4.97, + "learning_rate": 2.5135607244644667e-05, + "loss": 0.8405, + "step": 54090 + }, + { + "epoch": 4.97, + "learning_rate": 2.5131010388894e-05, + "loss": 0.9137, + "step": 54100 + }, + { + "epoch": 4.97, + "learning_rate": 2.5126413533143333e-05, + "loss": 0.9354, + "step": 54110 + }, + { + "epoch": 4.98, + "learning_rate": 2.5121816677392662e-05, + "loss": 0.8205, + "step": 54120 + }, + { + "epoch": 4.98, + "learning_rate": 2.5117219821641996e-05, + "loss": 0.815, + "step": 54130 + }, + { + "epoch": 4.98, + "learning_rate": 2.5112622965891332e-05, + "loss": 0.831, + "step": 54140 + }, + { + "epoch": 4.98, + "learning_rate": 2.5108026110140665e-05, + "loss": 0.9018, + "step": 54150 + }, + { + "epoch": 4.98, + "learning_rate": 2.5103429254389998e-05, + "loss": 0.8079, + "step": 54160 + }, + { + "epoch": 4.98, + "learning_rate": 2.509883239863933e-05, + "loss": 0.9851, + "step": 54170 + }, + { + "epoch": 4.98, + "learning_rate": 2.5094235542888668e-05, + "loss": 1.0549, + "step": 54180 + }, + { + "epoch": 4.98, + "learning_rate": 2.5089638687138e-05, + "loss": 1.0831, + "step": 54190 + }, + { + "epoch": 4.98, + "learning_rate": 2.5085041831387334e-05, + "loss": 0.8287, + "step": 54200 + }, + { + "epoch": 4.98, + "learning_rate": 2.5080444975636664e-05, + "loss": 0.8688, + "step": 54210 + }, + { + "epoch": 4.98, + "learning_rate": 2.5075848119885997e-05, + "loss": 0.9395, + "step": 54220 + }, + { + "epoch": 4.99, + "learning_rate": 2.507125126413533e-05, + "loss": 0.9291, + "step": 54230 + }, + { + "epoch": 4.99, + "learning_rate": 2.5066654408384666e-05, + "loss": 0.838, + "step": 54240 + }, + { + "epoch": 4.99, + "learning_rate": 2.5062057552634e-05, + "loss": 0.8023, + "step": 54250 + }, + { + "epoch": 4.99, + "learning_rate": 2.5057460696883332e-05, + "loss": 0.9375, + "step": 54260 + }, + { + "epoch": 4.99, + "learning_rate": 2.505286384113267e-05, + "loss": 0.9308, + "step": 54270 + }, + { + "epoch": 4.99, + "learning_rate": 2.5048266985382002e-05, + "loss": 0.8996, + "step": 54280 + }, + { + "epoch": 4.99, + "learning_rate": 2.5043670129631335e-05, + "loss": 0.9203, + "step": 54290 + }, + { + "epoch": 4.99, + "learning_rate": 2.5039073273880665e-05, + "loss": 0.9082, + "step": 54300 + }, + { + "epoch": 4.99, + "learning_rate": 2.5034476418129998e-05, + "loss": 0.8718, + "step": 54310 + }, + { + "epoch": 4.99, + "learning_rate": 2.502987956237933e-05, + "loss": 0.9969, + "step": 54320 + }, + { + "epoch": 4.99, + "learning_rate": 2.5025282706628668e-05, + "loss": 0.8418, + "step": 54330 + }, + { + "epoch": 5.0, + "learning_rate": 2.5020685850878e-05, + "loss": 0.8164, + "step": 54340 + }, + { + "epoch": 5.0, + "learning_rate": 2.5016088995127334e-05, + "loss": 0.8714, + "step": 54350 + }, + { + "epoch": 5.0, + "learning_rate": 2.501149213937667e-05, + "loss": 0.906, + "step": 54360 + }, + { + "epoch": 5.0, + "learning_rate": 2.5006895283626003e-05, + "loss": 1.0229, + "step": 54370 + }, + { + "epoch": 5.0, + "learning_rate": 2.5002298427875336e-05, + "loss": 0.8465, + "step": 54380 + }, + { + "epoch": 5.0, + "learning_rate": 2.499770157212467e-05, + "loss": 0.7364, + "step": 54390 + }, + { + "epoch": 5.0, + "learning_rate": 2.4993104716374003e-05, + "loss": 0.8708, + "step": 54400 + }, + { + "epoch": 5.0, + "learning_rate": 2.4988507860623332e-05, + "loss": 0.87, + "step": 54410 + }, + { + "epoch": 5.0, + "learning_rate": 2.498391100487267e-05, + "loss": 0.9139, + "step": 54420 + }, + { + "epoch": 5.0, + "learning_rate": 2.4979314149122002e-05, + "loss": 0.8573, + "step": 54430 + }, + { + "epoch": 5.01, + "learning_rate": 2.4974717293371335e-05, + "loss": 0.8405, + "step": 54440 + }, + { + "epoch": 5.01, + "learning_rate": 2.497012043762067e-05, + "loss": 0.8107, + "step": 54450 + }, + { + "epoch": 5.01, + "learning_rate": 2.496552358187e-05, + "loss": 0.8783, + "step": 54460 + }, + { + "epoch": 5.01, + "learning_rate": 2.4960926726119334e-05, + "loss": 0.7549, + "step": 54470 + }, + { + "epoch": 5.01, + "learning_rate": 2.495632987036867e-05, + "loss": 0.9356, + "step": 54480 + }, + { + "epoch": 5.01, + "learning_rate": 2.4951733014618004e-05, + "loss": 0.8738, + "step": 54490 + }, + { + "epoch": 5.01, + "learning_rate": 2.4947136158867333e-05, + "loss": 0.8858, + "step": 54500 + }, + { + "epoch": 5.01, + "learning_rate": 2.494253930311667e-05, + "loss": 0.7526, + "step": 54510 + }, + { + "epoch": 5.01, + "learning_rate": 2.4937942447366003e-05, + "loss": 0.9778, + "step": 54520 + }, + { + "epoch": 5.01, + "learning_rate": 2.4933345591615336e-05, + "loss": 0.8449, + "step": 54530 + }, + { + "epoch": 5.01, + "learning_rate": 2.4928748735864673e-05, + "loss": 0.8822, + "step": 54540 + }, + { + "epoch": 5.02, + "learning_rate": 2.4924151880114002e-05, + "loss": 0.9717, + "step": 54550 + }, + { + "epoch": 5.02, + "learning_rate": 2.4919555024363335e-05, + "loss": 0.8209, + "step": 54560 + }, + { + "epoch": 5.02, + "learning_rate": 2.4914958168612672e-05, + "loss": 0.81, + "step": 54570 + }, + { + "epoch": 5.02, + "learning_rate": 2.4910361312862005e-05, + "loss": 0.8161, + "step": 54580 + }, + { + "epoch": 5.02, + "learning_rate": 2.4905764457111335e-05, + "loss": 0.9514, + "step": 54590 + }, + { + "epoch": 5.02, + "learning_rate": 2.490116760136067e-05, + "loss": 0.8838, + "step": 54600 + }, + { + "epoch": 5.02, + "learning_rate": 2.4896570745610004e-05, + "loss": 0.9603, + "step": 54610 + }, + { + "epoch": 5.02, + "learning_rate": 2.4891973889859337e-05, + "loss": 0.9488, + "step": 54620 + }, + { + "epoch": 5.02, + "learning_rate": 2.488737703410867e-05, + "loss": 0.9692, + "step": 54630 + }, + { + "epoch": 5.02, + "learning_rate": 2.4882780178358003e-05, + "loss": 0.8756, + "step": 54640 + }, + { + "epoch": 5.02, + "learning_rate": 2.4878183322607337e-05, + "loss": 0.8533, + "step": 54650 + }, + { + "epoch": 5.03, + "learning_rate": 2.4873586466856673e-05, + "loss": 0.93, + "step": 54660 + }, + { + "epoch": 5.03, + "learning_rate": 2.4868989611106006e-05, + "loss": 0.868, + "step": 54670 + }, + { + "epoch": 5.03, + "learning_rate": 2.4864392755355336e-05, + "loss": 0.9038, + "step": 54680 + }, + { + "epoch": 5.03, + "learning_rate": 2.4859795899604672e-05, + "loss": 0.8172, + "step": 54690 + }, + { + "epoch": 5.03, + "learning_rate": 2.4855199043854005e-05, + "loss": 0.9856, + "step": 54700 + }, + { + "epoch": 5.03, + "learning_rate": 2.485060218810334e-05, + "loss": 0.8279, + "step": 54710 + }, + { + "epoch": 5.03, + "learning_rate": 2.484600533235267e-05, + "loss": 0.8424, + "step": 54720 + }, + { + "epoch": 5.03, + "learning_rate": 2.4841408476602005e-05, + "loss": 0.8985, + "step": 54730 + }, + { + "epoch": 5.03, + "learning_rate": 2.4836811620851338e-05, + "loss": 0.8354, + "step": 54740 + }, + { + "epoch": 5.03, + "learning_rate": 2.4832214765100674e-05, + "loss": 0.8585, + "step": 54750 + }, + { + "epoch": 5.03, + "learning_rate": 2.4827617909350007e-05, + "loss": 0.8391, + "step": 54760 + }, + { + "epoch": 5.04, + "learning_rate": 2.4823021053599337e-05, + "loss": 0.9709, + "step": 54770 + }, + { + "epoch": 5.04, + "learning_rate": 2.4818424197848673e-05, + "loss": 0.7965, + "step": 54780 + }, + { + "epoch": 5.04, + "learning_rate": 2.4813827342098007e-05, + "loss": 0.7613, + "step": 54790 + }, + { + "epoch": 5.04, + "learning_rate": 2.480923048634734e-05, + "loss": 0.8948, + "step": 54800 + }, + { + "epoch": 5.04, + "learning_rate": 2.4804633630596673e-05, + "loss": 1.0639, + "step": 54810 + }, + { + "epoch": 5.04, + "learning_rate": 2.4800036774846006e-05, + "loss": 0.9097, + "step": 54820 + }, + { + "epoch": 5.04, + "learning_rate": 2.479543991909534e-05, + "loss": 0.9558, + "step": 54830 + }, + { + "epoch": 5.04, + "learning_rate": 2.4790843063344672e-05, + "loss": 0.8721, + "step": 54840 + }, + { + "epoch": 5.04, + "learning_rate": 2.478624620759401e-05, + "loss": 0.7567, + "step": 54850 + }, + { + "epoch": 5.04, + "learning_rate": 2.4781649351843338e-05, + "loss": 0.7926, + "step": 54860 + }, + { + "epoch": 5.04, + "learning_rate": 2.4777052496092675e-05, + "loss": 0.9319, + "step": 54870 + }, + { + "epoch": 5.05, + "learning_rate": 2.4772455640342008e-05, + "loss": 0.8568, + "step": 54880 + }, + { + "epoch": 5.05, + "learning_rate": 2.476785878459134e-05, + "loss": 0.8532, + "step": 54890 + }, + { + "epoch": 5.05, + "learning_rate": 2.4763261928840674e-05, + "loss": 0.8778, + "step": 54900 + }, + { + "epoch": 5.05, + "learning_rate": 2.4758665073090007e-05, + "loss": 0.9659, + "step": 54910 + }, + { + "epoch": 5.05, + "learning_rate": 2.475406821733934e-05, + "loss": 0.8472, + "step": 54920 + }, + { + "epoch": 5.05, + "learning_rate": 2.4749471361588673e-05, + "loss": 0.8947, + "step": 54930 + }, + { + "epoch": 5.05, + "learning_rate": 2.474487450583801e-05, + "loss": 0.7866, + "step": 54940 + }, + { + "epoch": 5.05, + "learning_rate": 2.474027765008734e-05, + "loss": 0.8705, + "step": 54950 + }, + { + "epoch": 5.05, + "learning_rate": 2.4735680794336672e-05, + "loss": 0.8883, + "step": 54960 + }, + { + "epoch": 5.05, + "learning_rate": 2.473108393858601e-05, + "loss": 0.9454, + "step": 54970 + }, + { + "epoch": 5.05, + "learning_rate": 2.4726487082835342e-05, + "loss": 0.9701, + "step": 54980 + }, + { + "epoch": 5.06, + "learning_rate": 2.4721890227084675e-05, + "loss": 0.9546, + "step": 54990 + }, + { + "epoch": 5.06, + "learning_rate": 2.4717293371334008e-05, + "loss": 0.8483, + "step": 55000 + }, + { + "epoch": 5.06, + "eval_accuracy": 0.5908296943231441, + "eval_loss": 0.8710007667541504, + "eval_runtime": 159.9604, + "eval_samples_per_second": 28.632, + "eval_steps_per_second": 3.582, + "step": 55000 + }, + { + "epoch": 5.06, + "learning_rate": 2.471269651558334e-05, + "loss": 0.9456, + "step": 55010 + }, + { + "epoch": 5.06, + "learning_rate": 2.4708099659832674e-05, + "loss": 0.8691, + "step": 55020 + }, + { + "epoch": 5.06, + "learning_rate": 2.470350280408201e-05, + "loss": 0.9334, + "step": 55030 + }, + { + "epoch": 5.06, + "learning_rate": 2.469890594833134e-05, + "loss": 0.8488, + "step": 55040 + }, + { + "epoch": 5.06, + "learning_rate": 2.4694309092580674e-05, + "loss": 0.9485, + "step": 55050 + }, + { + "epoch": 5.06, + "learning_rate": 2.468971223683001e-05, + "loss": 0.9147, + "step": 55060 + }, + { + "epoch": 5.06, + "learning_rate": 2.4685115381079343e-05, + "loss": 0.8021, + "step": 55070 + }, + { + "epoch": 5.06, + "learning_rate": 2.4680518525328676e-05, + "loss": 0.9539, + "step": 55080 + }, + { + "epoch": 5.06, + "learning_rate": 2.467592166957801e-05, + "loss": 0.7796, + "step": 55090 + }, + { + "epoch": 5.07, + "learning_rate": 2.4671324813827343e-05, + "loss": 0.8627, + "step": 55100 + }, + { + "epoch": 5.07, + "learning_rate": 2.4666727958076676e-05, + "loss": 0.759, + "step": 55110 + }, + { + "epoch": 5.07, + "learning_rate": 2.4662131102326012e-05, + "loss": 0.7689, + "step": 55120 + }, + { + "epoch": 5.07, + "learning_rate": 2.4657534246575342e-05, + "loss": 0.9118, + "step": 55130 + }, + { + "epoch": 5.07, + "learning_rate": 2.4652937390824675e-05, + "loss": 0.8572, + "step": 55140 + }, + { + "epoch": 5.07, + "learning_rate": 2.464834053507401e-05, + "loss": 0.77, + "step": 55150 + }, + { + "epoch": 5.07, + "learning_rate": 2.4643743679323344e-05, + "loss": 0.802, + "step": 55160 + }, + { + "epoch": 5.07, + "learning_rate": 2.4639146823572678e-05, + "loss": 0.7213, + "step": 55170 + }, + { + "epoch": 5.07, + "learning_rate": 2.463454996782201e-05, + "loss": 0.8494, + "step": 55180 + }, + { + "epoch": 5.07, + "learning_rate": 2.4629953112071344e-05, + "loss": 0.8589, + "step": 55190 + }, + { + "epoch": 5.07, + "learning_rate": 2.4625356256320677e-05, + "loss": 0.8936, + "step": 55200 + }, + { + "epoch": 5.08, + "learning_rate": 2.4620759400570013e-05, + "loss": 0.8901, + "step": 55210 + }, + { + "epoch": 5.08, + "learning_rate": 2.4616162544819343e-05, + "loss": 0.8469, + "step": 55220 + }, + { + "epoch": 5.08, + "learning_rate": 2.4611565689068676e-05, + "loss": 0.8742, + "step": 55230 + }, + { + "epoch": 5.08, + "learning_rate": 2.4606968833318013e-05, + "loss": 0.8185, + "step": 55240 + }, + { + "epoch": 5.08, + "learning_rate": 2.4602371977567346e-05, + "loss": 0.926, + "step": 55250 + }, + { + "epoch": 5.08, + "learning_rate": 2.459777512181668e-05, + "loss": 0.9239, + "step": 55260 + }, + { + "epoch": 5.08, + "learning_rate": 2.4593178266066012e-05, + "loss": 0.9143, + "step": 55270 + }, + { + "epoch": 5.08, + "learning_rate": 2.4588581410315345e-05, + "loss": 0.9368, + "step": 55280 + }, + { + "epoch": 5.08, + "learning_rate": 2.4583984554564678e-05, + "loss": 0.9351, + "step": 55290 + }, + { + "epoch": 5.08, + "learning_rate": 2.4579387698814014e-05, + "loss": 0.768, + "step": 55300 + }, + { + "epoch": 5.09, + "learning_rate": 2.4574790843063348e-05, + "loss": 0.8918, + "step": 55310 + }, + { + "epoch": 5.09, + "learning_rate": 2.4570193987312677e-05, + "loss": 1.0555, + "step": 55320 + }, + { + "epoch": 5.09, + "learning_rate": 2.4565597131562014e-05, + "loss": 0.9447, + "step": 55330 + }, + { + "epoch": 5.09, + "learning_rate": 2.4561000275811347e-05, + "loss": 1.004, + "step": 55340 + }, + { + "epoch": 5.09, + "learning_rate": 2.455640342006068e-05, + "loss": 0.8283, + "step": 55350 + }, + { + "epoch": 5.09, + "learning_rate": 2.4551806564310013e-05, + "loss": 0.8379, + "step": 55360 + }, + { + "epoch": 5.09, + "learning_rate": 2.4547209708559346e-05, + "loss": 0.9561, + "step": 55370 + }, + { + "epoch": 5.09, + "learning_rate": 2.454261285280868e-05, + "loss": 0.8845, + "step": 55380 + }, + { + "epoch": 5.09, + "learning_rate": 2.4538015997058016e-05, + "loss": 0.7831, + "step": 55390 + }, + { + "epoch": 5.09, + "learning_rate": 2.453341914130735e-05, + "loss": 0.819, + "step": 55400 + }, + { + "epoch": 5.09, + "learning_rate": 2.452882228555668e-05, + "loss": 0.9339, + "step": 55410 + }, + { + "epoch": 5.1, + "learning_rate": 2.4524225429806015e-05, + "loss": 0.9016, + "step": 55420 + }, + { + "epoch": 5.1, + "learning_rate": 2.4519628574055348e-05, + "loss": 0.9277, + "step": 55430 + }, + { + "epoch": 5.1, + "learning_rate": 2.451503171830468e-05, + "loss": 0.8486, + "step": 55440 + }, + { + "epoch": 5.1, + "learning_rate": 2.4510434862554014e-05, + "loss": 0.8605, + "step": 55450 + }, + { + "epoch": 5.1, + "learning_rate": 2.4505838006803347e-05, + "loss": 0.8512, + "step": 55460 + }, + { + "epoch": 5.1, + "learning_rate": 2.450124115105268e-05, + "loss": 0.7934, + "step": 55470 + }, + { + "epoch": 5.1, + "learning_rate": 2.4496644295302017e-05, + "loss": 0.9188, + "step": 55480 + }, + { + "epoch": 5.1, + "learning_rate": 2.449204743955135e-05, + "loss": 0.8711, + "step": 55490 + }, + { + "epoch": 5.1, + "learning_rate": 2.448745058380068e-05, + "loss": 0.9917, + "step": 55500 + } + ], + "logging_steps": 10, + "max_steps": 108770, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.1721851374961664e+17, + "trial_name": null, + "trial_params": null +}