{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.03379139445821131, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022527596305474206, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.6023, "step": 1 }, { "epoch": 0.0004505519261094841, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.7508, "step": 2 }, { "epoch": 0.0006758278891642261, "grad_norm": 5.613945484161377, "learning_rate": 4e-05, "loss": 0.671, "step": 3 }, { "epoch": 0.0009011038522189682, "grad_norm": 9.077590942382812, "learning_rate": 8e-05, "loss": 0.696, "step": 4 }, { "epoch": 0.0011263798152737104, "grad_norm": 2.5325891971588135, "learning_rate": 0.00012, "loss": 0.7085, "step": 5 }, { "epoch": 0.0013516557783284523, "grad_norm": 4.031753063201904, "learning_rate": 0.00016, "loss": 0.6449, "step": 6 }, { "epoch": 0.0015769317413831944, "grad_norm": 4.011942386627197, "learning_rate": 0.0002, "loss": 0.744, "step": 7 }, { "epoch": 0.0018022077044379365, "grad_norm": 9.572277069091797, "learning_rate": 0.00019862068965517243, "loss": 0.9131, "step": 8 }, { "epoch": 0.0020274836674926784, "grad_norm": 5.0683441162109375, "learning_rate": 0.00019724137931034484, "loss": 1.2286, "step": 9 }, { "epoch": 0.0022527596305474207, "grad_norm": 2.039677143096924, "learning_rate": 0.00019586206896551723, "loss": 0.7034, "step": 10 }, { "epoch": 0.0024780355936021626, "grad_norm": 2.734919309616089, "learning_rate": 0.00019448275862068965, "loss": 0.6501, "step": 11 }, { "epoch": 0.0027033115566569045, "grad_norm": 3.260087490081787, "learning_rate": 0.0001931034482758621, "loss": 0.822, "step": 12 }, { "epoch": 0.002928587519711647, "grad_norm": 1.5656558275222778, "learning_rate": 0.0001917241379310345, "loss": 0.7436, "step": 13 }, { "epoch": 0.0031538634827663887, "grad_norm": 1.110427737236023, "learning_rate": 0.0001903448275862069, "loss": 0.6911, "step": 14 }, { "epoch": 0.003379139445821131, "grad_norm": 2.4690375328063965, "learning_rate": 0.00018896551724137932, "loss": 0.6512, "step": 15 }, { "epoch": 0.003604415408875873, "grad_norm": 1.7050955295562744, "learning_rate": 0.00018758620689655173, "loss": 0.674, "step": 16 }, { "epoch": 0.003829691371930615, "grad_norm": 1.690824031829834, "learning_rate": 0.00018620689655172415, "loss": 0.7095, "step": 17 }, { "epoch": 0.004054967334985357, "grad_norm": 2.0714144706726074, "learning_rate": 0.00018482758620689654, "loss": 0.674, "step": 18 }, { "epoch": 0.004280243298040099, "grad_norm": 2.2080976963043213, "learning_rate": 0.00018344827586206896, "loss": 0.9342, "step": 19 }, { "epoch": 0.004505519261094841, "grad_norm": 1.3403278589248657, "learning_rate": 0.0001820689655172414, "loss": 0.8065, "step": 20 }, { "epoch": 0.004730795224149583, "grad_norm": 1.1563327312469482, "learning_rate": 0.00018068965517241382, "loss": 0.5639, "step": 21 }, { "epoch": 0.004956071187204325, "grad_norm": 1.6989210844039917, "learning_rate": 0.0001793103448275862, "loss": 0.8734, "step": 22 }, { "epoch": 0.0051813471502590676, "grad_norm": 2.2576966285705566, "learning_rate": 0.00017793103448275862, "loss": 0.7664, "step": 23 }, { "epoch": 0.005406623113313809, "grad_norm": 1.1227270364761353, "learning_rate": 0.00017655172413793104, "loss": 0.6975, "step": 24 }, { "epoch": 0.005631899076368551, "grad_norm": 1.3811314105987549, "learning_rate": 0.00017517241379310346, "loss": 0.8249, "step": 25 }, { "epoch": 0.005857175039423294, "grad_norm": 1.6043484210968018, "learning_rate": 0.00017379310344827587, "loss": 0.7919, "step": 26 }, { "epoch": 0.006082451002478036, "grad_norm": 0.9475104212760925, "learning_rate": 0.00017241379310344826, "loss": 0.6637, "step": 27 }, { "epoch": 0.0063077269655327775, "grad_norm": 1.3347761631011963, "learning_rate": 0.0001710344827586207, "loss": 0.5952, "step": 28 }, { "epoch": 0.00653300292858752, "grad_norm": 1.4313766956329346, "learning_rate": 0.00016965517241379312, "loss": 0.7083, "step": 29 }, { "epoch": 0.006758278891642262, "grad_norm": 0.9148324131965637, "learning_rate": 0.00016827586206896554, "loss": 0.5013, "step": 30 }, { "epoch": 0.006983554854697004, "grad_norm": 1.9467324018478394, "learning_rate": 0.00016689655172413793, "loss": 1.176, "step": 31 }, { "epoch": 0.007208830817751746, "grad_norm": 2.2809526920318604, "learning_rate": 0.00016551724137931035, "loss": 0.677, "step": 32 }, { "epoch": 0.007434106780806488, "grad_norm": 2.034575939178467, "learning_rate": 0.00016413793103448276, "loss": 1.005, "step": 33 }, { "epoch": 0.00765938274386123, "grad_norm": 1.560476303100586, "learning_rate": 0.00016275862068965518, "loss": 0.8957, "step": 34 }, { "epoch": 0.007884658706915973, "grad_norm": 1.5770775079727173, "learning_rate": 0.0001613793103448276, "loss": 0.7145, "step": 35 }, { "epoch": 0.008109934669970714, "grad_norm": 2.0109949111938477, "learning_rate": 0.00016, "loss": 0.8319, "step": 36 }, { "epoch": 0.008335210633025456, "grad_norm": 1.2099483013153076, "learning_rate": 0.00015862068965517243, "loss": 0.6425, "step": 37 }, { "epoch": 0.008560486596080198, "grad_norm": 1.0231844186782837, "learning_rate": 0.00015724137931034485, "loss": 0.7191, "step": 38 }, { "epoch": 0.00878576255913494, "grad_norm": 1.020296335220337, "learning_rate": 0.00015586206896551724, "loss": 0.6489, "step": 39 }, { "epoch": 0.009011038522189683, "grad_norm": 1.5842112302780151, "learning_rate": 0.00015448275862068965, "loss": 0.7918, "step": 40 }, { "epoch": 0.009236314485244425, "grad_norm": 1.2116183042526245, "learning_rate": 0.00015310344827586207, "loss": 0.864, "step": 41 }, { "epoch": 0.009461590448299166, "grad_norm": 1.5348471403121948, "learning_rate": 0.00015172413793103449, "loss": 0.6097, "step": 42 }, { "epoch": 0.009686866411353908, "grad_norm": 1.4691479206085205, "learning_rate": 0.0001503448275862069, "loss": 0.8354, "step": 43 }, { "epoch": 0.00991214237440865, "grad_norm": 1.1324158906936646, "learning_rate": 0.00014896551724137932, "loss": 0.784, "step": 44 }, { "epoch": 0.010137418337463393, "grad_norm": 1.1594136953353882, "learning_rate": 0.00014758620689655174, "loss": 0.789, "step": 45 }, { "epoch": 0.010362694300518135, "grad_norm": 1.2883914709091187, "learning_rate": 0.00014620689655172415, "loss": 0.7743, "step": 46 }, { "epoch": 0.010587970263572877, "grad_norm": 1.4808337688446045, "learning_rate": 0.00014482758620689657, "loss": 0.8031, "step": 47 }, { "epoch": 0.010813246226627618, "grad_norm": 1.5233800411224365, "learning_rate": 0.00014344827586206896, "loss": 0.6479, "step": 48 }, { "epoch": 0.01103852218968236, "grad_norm": 1.2644929885864258, "learning_rate": 0.00014206896551724138, "loss": 0.6763, "step": 49 }, { "epoch": 0.011263798152737103, "grad_norm": 1.3025438785552979, "learning_rate": 0.0001406896551724138, "loss": 0.7837, "step": 50 }, { "epoch": 0.011489074115791845, "grad_norm": 1.2083336114883423, "learning_rate": 0.0001393103448275862, "loss": 0.7612, "step": 51 }, { "epoch": 0.011714350078846587, "grad_norm": 1.0492626428604126, "learning_rate": 0.00013793103448275863, "loss": 0.529, "step": 52 }, { "epoch": 0.01193962604190133, "grad_norm": 1.2667231559753418, "learning_rate": 0.00013655172413793104, "loss": 0.7997, "step": 53 }, { "epoch": 0.012164902004956072, "grad_norm": 1.2050637006759644, "learning_rate": 0.00013517241379310346, "loss": 0.7095, "step": 54 }, { "epoch": 0.012390177968010813, "grad_norm": 1.343514323234558, "learning_rate": 0.00013379310344827588, "loss": 0.8366, "step": 55 }, { "epoch": 0.012615453931065555, "grad_norm": 0.9940245151519775, "learning_rate": 0.0001324137931034483, "loss": 0.8179, "step": 56 }, { "epoch": 0.012840729894120297, "grad_norm": 0.9897469878196716, "learning_rate": 0.00013103448275862068, "loss": 0.6974, "step": 57 }, { "epoch": 0.01306600585717504, "grad_norm": 1.0596684217453003, "learning_rate": 0.0001296551724137931, "loss": 0.8708, "step": 58 }, { "epoch": 0.013291281820229782, "grad_norm": 1.3805614709854126, "learning_rate": 0.00012827586206896552, "loss": 0.9937, "step": 59 }, { "epoch": 0.013516557783284524, "grad_norm": 1.1964428424835205, "learning_rate": 0.00012689655172413793, "loss": 0.7819, "step": 60 }, { "epoch": 0.013741833746339265, "grad_norm": 0.8092629313468933, "learning_rate": 0.00012551724137931035, "loss": 0.6211, "step": 61 }, { "epoch": 0.013967109709394007, "grad_norm": 0.9752876162528992, "learning_rate": 0.00012413793103448277, "loss": 0.6453, "step": 62 }, { "epoch": 0.01419238567244875, "grad_norm": 1.1409693956375122, "learning_rate": 0.00012275862068965518, "loss": 0.6527, "step": 63 }, { "epoch": 0.014417661635503492, "grad_norm": 0.9939149022102356, "learning_rate": 0.00012137931034482759, "loss": 0.6148, "step": 64 }, { "epoch": 0.014642937598558234, "grad_norm": 0.779381275177002, "learning_rate": 0.00012, "loss": 0.638, "step": 65 }, { "epoch": 0.014868213561612977, "grad_norm": 1.0121289491653442, "learning_rate": 0.0001186206896551724, "loss": 0.5382, "step": 66 }, { "epoch": 0.015093489524667717, "grad_norm": 1.0414173603057861, "learning_rate": 0.00011724137931034482, "loss": 0.7211, "step": 67 }, { "epoch": 0.01531876548772246, "grad_norm": 0.7451056838035583, "learning_rate": 0.00011586206896551725, "loss": 0.4563, "step": 68 }, { "epoch": 0.015544041450777202, "grad_norm": 1.3068177700042725, "learning_rate": 0.00011448275862068967, "loss": 0.8087, "step": 69 }, { "epoch": 0.015769317413831946, "grad_norm": 1.199892282485962, "learning_rate": 0.00011310344827586207, "loss": 0.5741, "step": 70 }, { "epoch": 0.015994593376886686, "grad_norm": 1.2372207641601562, "learning_rate": 0.00011172413793103449, "loss": 0.7717, "step": 71 }, { "epoch": 0.016219869339941427, "grad_norm": 1.30841863155365, "learning_rate": 0.0001103448275862069, "loss": 0.728, "step": 72 }, { "epoch": 0.01644514530299617, "grad_norm": 1.0025840997695923, "learning_rate": 0.00010896551724137931, "loss": 0.6815, "step": 73 }, { "epoch": 0.01667042126605091, "grad_norm": 0.8189815878868103, "learning_rate": 0.00010758620689655173, "loss": 0.5676, "step": 74 }, { "epoch": 0.016895697229105656, "grad_norm": 0.9632795453071594, "learning_rate": 0.00010620689655172413, "loss": 0.5635, "step": 75 }, { "epoch": 0.017120973192160396, "grad_norm": 0.7914460897445679, "learning_rate": 0.00010482758620689656, "loss": 0.7149, "step": 76 }, { "epoch": 0.017346249155215137, "grad_norm": 1.2767926454544067, "learning_rate": 0.00010344827586206898, "loss": 0.6674, "step": 77 }, { "epoch": 0.01757152511826988, "grad_norm": 1.0681113004684448, "learning_rate": 0.0001020689655172414, "loss": 0.637, "step": 78 }, { "epoch": 0.01779680108132462, "grad_norm": 0.8826852440834045, "learning_rate": 0.0001006896551724138, "loss": 0.6695, "step": 79 }, { "epoch": 0.018022077044379366, "grad_norm": 0.9347899556159973, "learning_rate": 9.931034482758621e-05, "loss": 0.6348, "step": 80 }, { "epoch": 0.018247353007434106, "grad_norm": 1.2649730443954468, "learning_rate": 9.793103448275862e-05, "loss": 0.7792, "step": 81 }, { "epoch": 0.01847262897048885, "grad_norm": 1.058038592338562, "learning_rate": 9.655172413793105e-05, "loss": 0.5364, "step": 82 }, { "epoch": 0.01869790493354359, "grad_norm": 1.2079238891601562, "learning_rate": 9.517241379310345e-05, "loss": 0.6945, "step": 83 }, { "epoch": 0.01892318089659833, "grad_norm": 1.1384273767471313, "learning_rate": 9.379310344827587e-05, "loss": 0.6319, "step": 84 }, { "epoch": 0.019148456859653076, "grad_norm": 1.1441253423690796, "learning_rate": 9.241379310344827e-05, "loss": 0.7472, "step": 85 }, { "epoch": 0.019373732822707816, "grad_norm": 0.7343708872795105, "learning_rate": 9.10344827586207e-05, "loss": 0.681, "step": 86 }, { "epoch": 0.01959900878576256, "grad_norm": 0.8574519157409668, "learning_rate": 8.96551724137931e-05, "loss": 0.7444, "step": 87 }, { "epoch": 0.0198242847488173, "grad_norm": 1.2369730472564697, "learning_rate": 8.827586206896552e-05, "loss": 0.7337, "step": 88 }, { "epoch": 0.020049560711872045, "grad_norm": 1.4211863279342651, "learning_rate": 8.689655172413794e-05, "loss": 0.7335, "step": 89 }, { "epoch": 0.020274836674926786, "grad_norm": 1.733542561531067, "learning_rate": 8.551724137931035e-05, "loss": 0.8361, "step": 90 }, { "epoch": 0.020500112637981526, "grad_norm": 1.19984769821167, "learning_rate": 8.413793103448277e-05, "loss": 0.7085, "step": 91 }, { "epoch": 0.02072538860103627, "grad_norm": 1.0752897262573242, "learning_rate": 8.275862068965517e-05, "loss": 0.6854, "step": 92 }, { "epoch": 0.02095066456409101, "grad_norm": 1.0407134294509888, "learning_rate": 8.137931034482759e-05, "loss": 0.6913, "step": 93 }, { "epoch": 0.021175940527145755, "grad_norm": 0.991878867149353, "learning_rate": 8e-05, "loss": 0.6556, "step": 94 }, { "epoch": 0.021401216490200495, "grad_norm": 0.9897178411483765, "learning_rate": 7.862068965517242e-05, "loss": 0.6489, "step": 95 }, { "epoch": 0.021626492453255236, "grad_norm": 1.171608805656433, "learning_rate": 7.724137931034483e-05, "loss": 0.7589, "step": 96 }, { "epoch": 0.02185176841630998, "grad_norm": 0.8374713063240051, "learning_rate": 7.586206896551724e-05, "loss": 0.665, "step": 97 }, { "epoch": 0.02207704437936472, "grad_norm": 0.7983826994895935, "learning_rate": 7.448275862068966e-05, "loss": 0.6479, "step": 98 }, { "epoch": 0.022302320342419465, "grad_norm": 1.0470448732376099, "learning_rate": 7.310344827586208e-05, "loss": 0.7198, "step": 99 }, { "epoch": 0.022527596305474205, "grad_norm": 0.6758666634559631, "learning_rate": 7.172413793103448e-05, "loss": 0.6805, "step": 100 }, { "epoch": 0.02275287226852895, "grad_norm": 1.1747039556503296, "learning_rate": 7.03448275862069e-05, "loss": 0.5857, "step": 101 }, { "epoch": 0.02297814823158369, "grad_norm": 1.0916589498519897, "learning_rate": 6.896551724137931e-05, "loss": 0.6568, "step": 102 }, { "epoch": 0.02320342419463843, "grad_norm": 1.0316485166549683, "learning_rate": 6.758620689655173e-05, "loss": 0.6978, "step": 103 }, { "epoch": 0.023428700157693175, "grad_norm": 0.8757756352424622, "learning_rate": 6.620689655172415e-05, "loss": 0.7105, "step": 104 }, { "epoch": 0.023653976120747915, "grad_norm": 1.704077124595642, "learning_rate": 6.482758620689655e-05, "loss": 0.7672, "step": 105 }, { "epoch": 0.02387925208380266, "grad_norm": 1.269713044166565, "learning_rate": 6.344827586206897e-05, "loss": 0.7722, "step": 106 }, { "epoch": 0.0241045280468574, "grad_norm": 1.1678601503372192, "learning_rate": 6.206896551724138e-05, "loss": 0.6886, "step": 107 }, { "epoch": 0.024329804009912144, "grad_norm": 0.846788227558136, "learning_rate": 6.068965517241379e-05, "loss": 0.6549, "step": 108 }, { "epoch": 0.024555079972966885, "grad_norm": 0.8159088492393494, "learning_rate": 5.93103448275862e-05, "loss": 0.5521, "step": 109 }, { "epoch": 0.024780355936021625, "grad_norm": 1.0074665546417236, "learning_rate": 5.7931034482758627e-05, "loss": 0.7028, "step": 110 }, { "epoch": 0.02500563189907637, "grad_norm": 1.4462883472442627, "learning_rate": 5.6551724137931037e-05, "loss": 0.6538, "step": 111 }, { "epoch": 0.02523090786213111, "grad_norm": 1.0450752973556519, "learning_rate": 5.517241379310345e-05, "loss": 0.5618, "step": 112 }, { "epoch": 0.025456183825185854, "grad_norm": 1.4377208948135376, "learning_rate": 5.379310344827586e-05, "loss": 0.6906, "step": 113 }, { "epoch": 0.025681459788240595, "grad_norm": 1.082992434501648, "learning_rate": 5.241379310344828e-05, "loss": 0.7786, "step": 114 }, { "epoch": 0.025906735751295335, "grad_norm": 0.826268196105957, "learning_rate": 5.10344827586207e-05, "loss": 0.5919, "step": 115 }, { "epoch": 0.02613201171435008, "grad_norm": 1.2465078830718994, "learning_rate": 4.9655172413793107e-05, "loss": 0.5442, "step": 116 }, { "epoch": 0.02635728767740482, "grad_norm": 1.2642978429794312, "learning_rate": 4.827586206896552e-05, "loss": 0.7476, "step": 117 }, { "epoch": 0.026582563640459564, "grad_norm": 1.2854329347610474, "learning_rate": 4.689655172413793e-05, "loss": 0.6467, "step": 118 }, { "epoch": 0.026807839603514304, "grad_norm": 0.7920569181442261, "learning_rate": 4.551724137931035e-05, "loss": 0.6082, "step": 119 }, { "epoch": 0.02703311556656905, "grad_norm": 0.7808762788772583, "learning_rate": 4.413793103448276e-05, "loss": 0.6059, "step": 120 }, { "epoch": 0.02725839152962379, "grad_norm": 0.8576288223266602, "learning_rate": 4.275862068965518e-05, "loss": 0.4291, "step": 121 }, { "epoch": 0.02748366749267853, "grad_norm": 1.3368127346038818, "learning_rate": 4.1379310344827587e-05, "loss": 0.7785, "step": 122 }, { "epoch": 0.027708943455733274, "grad_norm": 0.8374606370925903, "learning_rate": 4e-05, "loss": 0.6658, "step": 123 }, { "epoch": 0.027934219418788014, "grad_norm": 1.0975725650787354, "learning_rate": 3.862068965517241e-05, "loss": 0.6583, "step": 124 }, { "epoch": 0.02815949538184276, "grad_norm": 0.9977630376815796, "learning_rate": 3.724137931034483e-05, "loss": 0.7911, "step": 125 }, { "epoch": 0.0283847713448975, "grad_norm": 1.1773369312286377, "learning_rate": 3.586206896551724e-05, "loss": 0.7156, "step": 126 }, { "epoch": 0.028610047307952243, "grad_norm": 0.8216753005981445, "learning_rate": 3.4482758620689657e-05, "loss": 0.7857, "step": 127 }, { "epoch": 0.028835323271006984, "grad_norm": 0.7817992568016052, "learning_rate": 3.310344827586207e-05, "loss": 0.5289, "step": 128 }, { "epoch": 0.029060599234061724, "grad_norm": 0.8712431788444519, "learning_rate": 3.172413793103448e-05, "loss": 0.4977, "step": 129 }, { "epoch": 0.02928587519711647, "grad_norm": 1.0425325632095337, "learning_rate": 3.0344827586206897e-05, "loss": 0.7142, "step": 130 }, { "epoch": 0.02951115116017121, "grad_norm": 0.8632523417472839, "learning_rate": 2.8965517241379313e-05, "loss": 0.6743, "step": 131 }, { "epoch": 0.029736427123225953, "grad_norm": 0.9849842190742493, "learning_rate": 2.7586206896551727e-05, "loss": 0.6884, "step": 132 }, { "epoch": 0.029961703086280694, "grad_norm": 0.8832971453666687, "learning_rate": 2.620689655172414e-05, "loss": 0.6231, "step": 133 }, { "epoch": 0.030186979049335434, "grad_norm": 0.9129248857498169, "learning_rate": 2.4827586206896553e-05, "loss": 0.6991, "step": 134 }, { "epoch": 0.03041225501239018, "grad_norm": 0.8295918107032776, "learning_rate": 2.3448275862068967e-05, "loss": 0.6178, "step": 135 }, { "epoch": 0.03063753097544492, "grad_norm": 1.2373234033584595, "learning_rate": 2.206896551724138e-05, "loss": 0.8071, "step": 136 }, { "epoch": 0.030862806938499663, "grad_norm": 0.8981180787086487, "learning_rate": 2.0689655172413793e-05, "loss": 0.6978, "step": 137 }, { "epoch": 0.031088082901554404, "grad_norm": 0.8309744000434875, "learning_rate": 1.9310344827586207e-05, "loss": 0.5436, "step": 138 }, { "epoch": 0.03131335886460915, "grad_norm": 0.762231171131134, "learning_rate": 1.793103448275862e-05, "loss": 0.6713, "step": 139 }, { "epoch": 0.03153863482766389, "grad_norm": 0.7723814845085144, "learning_rate": 1.6551724137931037e-05, "loss": 0.6452, "step": 140 }, { "epoch": 0.03176391079071863, "grad_norm": 1.4441688060760498, "learning_rate": 1.5172413793103448e-05, "loss": 0.5992, "step": 141 }, { "epoch": 0.03198918675377337, "grad_norm": 0.8141003251075745, "learning_rate": 1.3793103448275863e-05, "loss": 0.6465, "step": 142 }, { "epoch": 0.03221446271682812, "grad_norm": 0.7121213674545288, "learning_rate": 1.2413793103448277e-05, "loss": 0.6673, "step": 143 }, { "epoch": 0.032439738679882854, "grad_norm": 1.1131733655929565, "learning_rate": 1.103448275862069e-05, "loss": 0.5913, "step": 144 }, { "epoch": 0.0326650146429376, "grad_norm": 0.7731756567955017, "learning_rate": 9.655172413793103e-06, "loss": 0.7945, "step": 145 }, { "epoch": 0.03289029060599234, "grad_norm": 0.9530065655708313, "learning_rate": 8.275862068965518e-06, "loss": 0.6049, "step": 146 }, { "epoch": 0.03311556656904708, "grad_norm": 0.7355793118476868, "learning_rate": 6.896551724137932e-06, "loss": 0.6433, "step": 147 }, { "epoch": 0.03334084253210182, "grad_norm": 1.0239030122756958, "learning_rate": 5.517241379310345e-06, "loss": 0.6303, "step": 148 }, { "epoch": 0.03356611849515657, "grad_norm": 1.0138450860977173, "learning_rate": 4.137931034482759e-06, "loss": 0.6905, "step": 149 }, { "epoch": 0.03379139445821131, "grad_norm": 0.7114500403404236, "learning_rate": 2.7586206896551725e-06, "loss": 0.6255, "step": 150 } ], "logging_steps": 1, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2613698663305008.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }