|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9942418426103647, |
|
"eval_steps": 500, |
|
"global_step": 585, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025591810620601407, |
|
"grad_norm": 0.3465089797973633, |
|
"learning_rate": 4.999098819177214e-05, |
|
"loss": 0.8049, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05118362124120281, |
|
"grad_norm": 0.288526713848114, |
|
"learning_rate": 4.9963959264103544e-05, |
|
"loss": 0.7441, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 0.29372313618659973, |
|
"learning_rate": 4.9918932703355256e-05, |
|
"loss": 0.6875, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10236724248240563, |
|
"grad_norm": 0.28075122833251953, |
|
"learning_rate": 4.98559409711857e-05, |
|
"loss": 0.6468, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12795905310300704, |
|
"grad_norm": 0.2833946645259857, |
|
"learning_rate": 4.977502948114772e-05, |
|
"loss": 0.6117, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 0.2722305953502655, |
|
"learning_rate": 4.967625656594782e-05, |
|
"loss": 0.5909, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17914267434420986, |
|
"grad_norm": 0.25874456763267517, |
|
"learning_rate": 4.955969343539162e-05, |
|
"loss": 0.5685, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.20473448496481125, |
|
"grad_norm": 0.2593054473400116, |
|
"learning_rate": 4.942542412504543e-05, |
|
"loss": 0.5595, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 0.2561085522174835, |
|
"learning_rate": 4.92735454356513e-05, |
|
"loss": 0.5419, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2559181062060141, |
|
"grad_norm": 0.2852307856082916, |
|
"learning_rate": 4.910416686333906e-05, |
|
"loss": 0.5384, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28150991682661547, |
|
"grad_norm": 0.29978978633880615, |
|
"learning_rate": 4.8917410520685635e-05, |
|
"loss": 0.5243, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 0.30773016810417175, |
|
"learning_rate": 4.8713411048678635e-05, |
|
"loss": 0.5229, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3326935380678183, |
|
"grad_norm": 0.3103509247303009, |
|
"learning_rate": 4.849231551964771e-05, |
|
"loss": 0.5124, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3582853486884197, |
|
"grad_norm": 0.32078924775123596, |
|
"learning_rate": 4.8254283331233464e-05, |
|
"loss": 0.5118, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 0.3069196939468384, |
|
"learning_rate": 4.799948609147061e-05, |
|
"loss": 0.501, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4094689699296225, |
|
"grad_norm": 0.30792945623397827, |
|
"learning_rate": 4.77281074950681e-05, |
|
"loss": 0.5015, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4350607805502239, |
|
"grad_norm": 0.3218609094619751, |
|
"learning_rate": 4.744034319097535e-05, |
|
"loss": 0.5099, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 0.35433730483055115, |
|
"learning_rate": 4.713640064133025e-05, |
|
"loss": 0.4912, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.48624440179142675, |
|
"grad_norm": 0.3382019102573395, |
|
"learning_rate": 4.681649897189036e-05, |
|
"loss": 0.4951, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5118362124120281, |
|
"grad_norm": 0.37824833393096924, |
|
"learning_rate": 4.6480868814055424e-05, |
|
"loss": 0.4889, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 0.3094428479671478, |
|
"learning_rate": 4.6129752138594874e-05, |
|
"loss": 0.4878, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5630198336532309, |
|
"grad_norm": 0.39037370681762695, |
|
"learning_rate": 4.5763402081200294e-05, |
|
"loss": 0.4913, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5886116442738324, |
|
"grad_norm": 0.34269848465919495, |
|
"learning_rate": 4.538208275998861e-05, |
|
"loss": 0.4926, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 0.3708631098270416, |
|
"learning_rate": 4.498606908508754e-05, |
|
"loss": 0.4931, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6397952655150352, |
|
"grad_norm": 0.3521125614643097, |
|
"learning_rate": 4.457564656044056e-05, |
|
"loss": 0.481, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6653870761356366, |
|
"grad_norm": 0.3537481427192688, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 0.4842, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 0.38676100969314575, |
|
"learning_rate": 4.371276870427753e-05, |
|
"loss": 0.4765, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7165706973768394, |
|
"grad_norm": 0.4416983723640442, |
|
"learning_rate": 4.3260935459942584e-05, |
|
"loss": 0.4652, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7421625079974408, |
|
"grad_norm": 0.36901989579200745, |
|
"learning_rate": 4.2795937091733515e-05, |
|
"loss": 0.4819, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 0.3908597528934479, |
|
"learning_rate": 4.231810883773999e-05, |
|
"loss": 0.4612, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7933461292386437, |
|
"grad_norm": 0.4273853003978729, |
|
"learning_rate": 4.182779518568926e-05, |
|
"loss": 0.4713, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.818937939859245, |
|
"grad_norm": 0.36905694007873535, |
|
"learning_rate": 4.132534962458962e-05, |
|
"loss": 0.4734, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 0.3763083517551422, |
|
"learning_rate": 4.0811134389884433e-05, |
|
"loss": 0.4659, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8701215611004478, |
|
"grad_norm": 0.3596407175064087, |
|
"learning_rate": 4.028552020230031e-05, |
|
"loss": 0.4631, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8957133717210493, |
|
"grad_norm": 0.396517276763916, |
|
"learning_rate": 3.974888600057808e-05, |
|
"loss": 0.4634, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 0.38290080428123474, |
|
"learning_rate": 3.920161866827889e-05, |
|
"loss": 0.4634, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.946896992962252, |
|
"grad_norm": 0.39791831374168396, |
|
"learning_rate": 3.8644112754862614e-05, |
|
"loss": 0.4638, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9724888035828535, |
|
"grad_norm": 0.3460437059402466, |
|
"learning_rate": 3.807677019123944e-05, |
|
"loss": 0.4647, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 0.39563411474227905, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.465, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.0236724248240563, |
|
"grad_norm": 0.3726441264152527, |
|
"learning_rate": 3.69142180005327e-05, |
|
"loss": 0.5229, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0492642354446577, |
|
"grad_norm": 0.402972936630249, |
|
"learning_rate": 3.631984650924094e-05, |
|
"loss": 0.4445, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.0748560460652592, |
|
"grad_norm": 0.4153399169445038, |
|
"learning_rate": 3.5717314035076355e-05, |
|
"loss": 0.4516, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1004478566858604, |
|
"grad_norm": 0.5092347860336304, |
|
"learning_rate": 3.510705497060762e-05, |
|
"loss": 0.4528, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.1260396673064619, |
|
"grad_norm": 0.40579304099082947, |
|
"learning_rate": 3.4489509278847414e-05, |
|
"loss": 0.4395, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1516314779270633, |
|
"grad_norm": 0.43368446826934814, |
|
"learning_rate": 3.386512217606339e-05, |
|
"loss": 0.4484, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1772232885476648, |
|
"grad_norm": 0.41571882367134094, |
|
"learning_rate": 3.323434381080199e-05, |
|
"loss": 0.4376, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2028150991682662, |
|
"grad_norm": 0.44565585255622864, |
|
"learning_rate": 3.2597628939356175e-05, |
|
"loss": 0.4578, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.2284069097888675, |
|
"grad_norm": 0.4231709837913513, |
|
"learning_rate": 3.195543659791132e-05, |
|
"loss": 0.439, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.253998720409469, |
|
"grad_norm": 0.4322074055671692, |
|
"learning_rate": 3.130822977160554e-05, |
|
"loss": 0.4429, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.2795905310300704, |
|
"grad_norm": 0.42521893978118896, |
|
"learning_rate": 3.065647506074306e-05, |
|
"loss": 0.4427, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3051823416506718, |
|
"grad_norm": 0.416797012090683, |
|
"learning_rate": 3.0000642344401113e-05, |
|
"loss": 0.4315, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.3307741522712733, |
|
"grad_norm": 0.41196638345718384, |
|
"learning_rate": 2.9341204441673266e-05, |
|
"loss": 0.4425, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3563659628918745, |
|
"grad_norm": 0.4643464982509613, |
|
"learning_rate": 2.8678636770792906e-05, |
|
"loss": 0.4488, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.381957773512476, |
|
"grad_norm": 0.40053775906562805, |
|
"learning_rate": 2.8013417006383076e-05, |
|
"loss": 0.4289, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4075495841330774, |
|
"grad_norm": 0.4310891032218933, |
|
"learning_rate": 2.7346024735079486e-05, |
|
"loss": 0.4348, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.4331413947536789, |
|
"grad_norm": 0.5526339411735535, |
|
"learning_rate": 2.667694110977506e-05, |
|
"loss": 0.4294, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4587332053742803, |
|
"grad_norm": 0.473822683095932, |
|
"learning_rate": 2.600664850273538e-05, |
|
"loss": 0.4361, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.4843250159948815, |
|
"grad_norm": 0.4439009726047516, |
|
"learning_rate": 2.5335630157834937e-05, |
|
"loss": 0.4413, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5099168266154832, |
|
"grad_norm": 0.4225783944129944, |
|
"learning_rate": 2.4664369842165068e-05, |
|
"loss": 0.4296, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.5355086372360844, |
|
"grad_norm": 0.497530460357666, |
|
"learning_rate": 2.399335149726463e-05, |
|
"loss": 0.4447, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.561100447856686, |
|
"grad_norm": 0.4586053490638733, |
|
"learning_rate": 2.3323058890224938e-05, |
|
"loss": 0.4207, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.5866922584772873, |
|
"grad_norm": 0.5011757612228394, |
|
"learning_rate": 2.265397526492052e-05, |
|
"loss": 0.4398, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6122840690978886, |
|
"grad_norm": 0.4521833658218384, |
|
"learning_rate": 2.1986582993616926e-05, |
|
"loss": 0.4367, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.6378758797184902, |
|
"grad_norm": 0.4077029526233673, |
|
"learning_rate": 2.1321363229207096e-05, |
|
"loss": 0.4274, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6634676903390915, |
|
"grad_norm": 0.4575510621070862, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 0.4412, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.689059500959693, |
|
"grad_norm": 0.48359596729278564, |
|
"learning_rate": 1.9999357655598893e-05, |
|
"loss": 0.4408, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7146513115802944, |
|
"grad_norm": 0.4537913203239441, |
|
"learning_rate": 1.934352493925695e-05, |
|
"loss": 0.4306, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.7402431222008956, |
|
"grad_norm": 0.4479089379310608, |
|
"learning_rate": 1.8691770228394456e-05, |
|
"loss": 0.4347, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7658349328214973, |
|
"grad_norm": 0.432325154542923, |
|
"learning_rate": 1.8044563402088684e-05, |
|
"loss": 0.4252, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.7914267434420985, |
|
"grad_norm": 0.4653799831867218, |
|
"learning_rate": 1.740237106064383e-05, |
|
"loss": 0.4306, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8170185540627, |
|
"grad_norm": 0.46541592478752136, |
|
"learning_rate": 1.6765656189198013e-05, |
|
"loss": 0.4201, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.8426103646833014, |
|
"grad_norm": 0.5030815005302429, |
|
"learning_rate": 1.613487782393661e-05, |
|
"loss": 0.426, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8682021753039026, |
|
"grad_norm": 0.5130462646484375, |
|
"learning_rate": 1.5510490721152592e-05, |
|
"loss": 0.4276, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.8937939859245043, |
|
"grad_norm": 0.48607420921325684, |
|
"learning_rate": 1.489294502939238e-05, |
|
"loss": 0.418, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9193857965451055, |
|
"grad_norm": 0.43401479721069336, |
|
"learning_rate": 1.4282685964923642e-05, |
|
"loss": 0.4326, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.944977607165707, |
|
"grad_norm": 0.4771016538143158, |
|
"learning_rate": 1.3680153490759073e-05, |
|
"loss": 0.433, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.9705694177863085, |
|
"grad_norm": 0.503084123134613, |
|
"learning_rate": 1.3085781999467303e-05, |
|
"loss": 0.4278, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.9961612284069097, |
|
"grad_norm": 0.504097580909729, |
|
"learning_rate": 1.2500000000000006e-05, |
|
"loss": 0.4221, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.0217530390275114, |
|
"grad_norm": 0.46889811754226685, |
|
"learning_rate": 1.1923229808760564e-05, |
|
"loss": 0.4834, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.0473448496481126, |
|
"grad_norm": 0.45117413997650146, |
|
"learning_rate": 1.1355887245137383e-05, |
|
"loss": 0.4055, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.072936660268714, |
|
"grad_norm": 0.47571200132369995, |
|
"learning_rate": 1.0798381331721109e-05, |
|
"loss": 0.4143, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.0985284708893155, |
|
"grad_norm": 0.5209171175956726, |
|
"learning_rate": 1.0251113999421935e-05, |
|
"loss": 0.4189, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.1241202815099167, |
|
"grad_norm": 0.49102234840393066, |
|
"learning_rate": 9.714479797699694e-06, |
|
"loss": 0.4051, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.1497120921305184, |
|
"grad_norm": 0.5026312470436096, |
|
"learning_rate": 9.18886561011557e-06, |
|
"loss": 0.4181, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.1753039027511196, |
|
"grad_norm": 0.4433274269104004, |
|
"learning_rate": 8.67465037541038e-06, |
|
"loss": 0.4141, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.200895713371721, |
|
"grad_norm": 0.4839063286781311, |
|
"learning_rate": 8.172204814310742e-06, |
|
"loss": 0.4148, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.2264875239923225, |
|
"grad_norm": 0.46064066886901855, |
|
"learning_rate": 7.681891162260015e-06, |
|
"loss": 0.4093, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.2520793346129238, |
|
"grad_norm": 0.47170594334602356, |
|
"learning_rate": 7.20406290826649e-06, |
|
"loss": 0.4152, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.2776711452335254, |
|
"grad_norm": 0.4596922695636749, |
|
"learning_rate": 6.739064540057424e-06, |
|
"loss": 0.4219, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.3032629558541267, |
|
"grad_norm": 0.45822423696517944, |
|
"learning_rate": 6.28723129572247e-06, |
|
"loss": 0.4169, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.328854766474728, |
|
"grad_norm": 0.46449440717697144, |
|
"learning_rate": 5.848888922025553e-06, |
|
"loss": 0.4033, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.3544465770953296, |
|
"grad_norm": 0.4573959708213806, |
|
"learning_rate": 5.424353439559446e-06, |
|
"loss": 0.4139, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.380038387715931, |
|
"grad_norm": 0.48029977083206177, |
|
"learning_rate": 5.013930914912476e-06, |
|
"loss": 0.4184, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.4056301983365325, |
|
"grad_norm": 0.5101778507232666, |
|
"learning_rate": 4.617917240011394e-06, |
|
"loss": 0.4142, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.4312220089571337, |
|
"grad_norm": 0.5021511912345886, |
|
"learning_rate": 4.236597918799709e-06, |
|
"loss": 0.4105, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.456813819577735, |
|
"grad_norm": 0.46041542291641235, |
|
"learning_rate": 3.8702478614051355e-06, |
|
"loss": 0.4033, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.4824056301983366, |
|
"grad_norm": 0.4977727234363556, |
|
"learning_rate": 3.5191311859445796e-06, |
|
"loss": 0.4136, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.507997440818938, |
|
"grad_norm": 0.4449191093444824, |
|
"learning_rate": 3.183501028109642e-06, |
|
"loss": 0.4026, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.5335892514395395, |
|
"grad_norm": 0.4880577027797699, |
|
"learning_rate": 2.8635993586697553e-06, |
|
"loss": 0.4144, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.5591810620601407, |
|
"grad_norm": 0.4731748402118683, |
|
"learning_rate": 2.5596568090246548e-06, |
|
"loss": 0.4156, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.584772872680742, |
|
"grad_norm": 0.4792173206806183, |
|
"learning_rate": 2.271892504931905e-06, |
|
"loss": 0.4129, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.6103646833013436, |
|
"grad_norm": 0.4491451680660248, |
|
"learning_rate": 2.0005139085293945e-06, |
|
"loss": 0.3978, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.635956493921945, |
|
"grad_norm": 0.5214707851409912, |
|
"learning_rate": 1.7457166687665449e-06, |
|
"loss": 0.4162, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.6615483045425465, |
|
"grad_norm": 0.44630053639411926, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 0.4048, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.6871401151631478, |
|
"grad_norm": 0.45181167125701904, |
|
"learning_rate": 1.286588951321363e-06, |
|
"loss": 0.4159, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.712731925783749, |
|
"grad_norm": 0.4882276952266693, |
|
"learning_rate": 1.0825894793143721e-06, |
|
"loss": 0.4171, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.7383237364043507, |
|
"grad_norm": 0.470037043094635, |
|
"learning_rate": 8.958331366609423e-07, |
|
"loss": 0.4049, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.763915547024952, |
|
"grad_norm": 0.4617624580860138, |
|
"learning_rate": 7.264545643486997e-07, |
|
"loss": 0.4062, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.7895073576455536, |
|
"grad_norm": 0.47536271810531616, |
|
"learning_rate": 5.745758749545749e-07, |
|
"loss": 0.4153, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.815099168266155, |
|
"grad_norm": 0.4715920388698578, |
|
"learning_rate": 4.403065646083809e-07, |
|
"loss": 0.4208, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.840690978886756, |
|
"grad_norm": 0.4840976297855377, |
|
"learning_rate": 3.237434340521789e-07, |
|
"loss": 0.4083, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.8662827895073577, |
|
"grad_norm": 0.4537844657897949, |
|
"learning_rate": 2.2497051885228827e-07, |
|
"loss": 0.4085, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.891874600127959, |
|
"grad_norm": 0.4958924651145935, |
|
"learning_rate": 1.4405902881430288e-07, |
|
"loss": 0.417, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.9174664107485606, |
|
"grad_norm": 0.48199719190597534, |
|
"learning_rate": 8.106729664475176e-08, |
|
"loss": 0.415, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.943058221369162, |
|
"grad_norm": 0.45770469307899475, |
|
"learning_rate": 3.604073589645596e-08, |
|
"loss": 0.4043, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.968650031989763, |
|
"grad_norm": 0.5054221153259277, |
|
"learning_rate": 9.011808227865625e-09, |
|
"loss": 0.4034, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.9942418426103647, |
|
"grad_norm": 0.4703996777534485, |
|
"learning_rate": 0.0, |
|
"loss": 0.4106, |
|
"step": 585 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 585, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5149343615798477e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|