|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500.0, |
|
"global_step": 36350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001375515818431912, |
|
"grad_norm": 0.06477122753858566, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7497, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.002751031636863824, |
|
"grad_norm": 0.08175177872180939, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7218, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0041265474552957355, |
|
"grad_norm": 0.08908290416002274, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7102, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.005502063273727648, |
|
"grad_norm": 0.08645089715719223, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6996, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0068775790921595595, |
|
"grad_norm": 0.07688608765602112, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6968, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.008253094910591471, |
|
"grad_norm": 0.08298292011022568, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6922, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.009628610729023384, |
|
"grad_norm": 0.07124519348144531, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6874, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.011004126547455296, |
|
"grad_norm": 0.0821714997291565, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6838, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.012379642365887207, |
|
"grad_norm": 0.11138464510440826, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6807, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.013755158184319119, |
|
"grad_norm": 0.09057251363992691, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6772, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.015130674002751032, |
|
"grad_norm": 0.10090968757867813, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6718, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.016506189821182942, |
|
"grad_norm": 0.08569780737161636, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6702, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.017881705639614855, |
|
"grad_norm": 0.07728252559900284, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6671, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.01925722145804677, |
|
"grad_norm": 0.08100250363349915, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6641, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02063273727647868, |
|
"grad_norm": 0.09590116143226624, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6616, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02200825309491059, |
|
"grad_norm": 0.10437134653329849, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6607, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.023383768913342505, |
|
"grad_norm": 0.08097755908966064, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6578, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.024759284731774415, |
|
"grad_norm": 0.08555827289819717, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6548, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.026134800550206328, |
|
"grad_norm": 0.10720808058977127, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6528, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.027510316368638238, |
|
"grad_norm": 0.11773797124624252, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6511, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02888583218707015, |
|
"grad_norm": 0.10159046947956085, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6474, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.030261348005502064, |
|
"grad_norm": 0.08796145766973495, |
|
"learning_rate": 0.0001, |
|
"loss": 1.647, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03163686382393398, |
|
"grad_norm": 0.08194500207901001, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6459, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.033012379642365884, |
|
"grad_norm": 0.0940510481595993, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6429, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0343878954607978, |
|
"grad_norm": 0.08046701550483704, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6407, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.03576341127922971, |
|
"grad_norm": 0.07953349500894547, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6407, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.037138927097661624, |
|
"grad_norm": 0.0876886174082756, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6378, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.03851444291609354, |
|
"grad_norm": 0.0870981439948082, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6408, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.039889958734525444, |
|
"grad_norm": 0.09412265568971634, |
|
"learning_rate": 0.0001, |
|
"loss": 1.638, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.04126547455295736, |
|
"grad_norm": 0.08362641930580139, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6344, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04264099037138927, |
|
"grad_norm": 0.11198284476995468, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6354, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.04401650618982118, |
|
"grad_norm": 0.09470899403095245, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6337, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.0453920220082531, |
|
"grad_norm": 0.11157640069723129, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6316, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.04676753782668501, |
|
"grad_norm": 0.08970475941896439, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6324, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.048143053645116916, |
|
"grad_norm": 0.09438284486532211, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6299, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.04951856946354883, |
|
"grad_norm": 0.09604686498641968, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6278, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05089408528198074, |
|
"grad_norm": 0.09955621510744095, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6282, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.052269601100412656, |
|
"grad_norm": 0.10360520333051682, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6265, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.05364511691884457, |
|
"grad_norm": 0.1229841411113739, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6264, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.055020632737276476, |
|
"grad_norm": 0.09015832841396332, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6248, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05639614855570839, |
|
"grad_norm": 0.10285497456789017, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6237, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.0577716643741403, |
|
"grad_norm": 0.07973627001047134, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6262, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.059147180192572216, |
|
"grad_norm": 0.1072544977068901, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6246, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.06052269601100413, |
|
"grad_norm": 0.11573298275470734, |
|
"learning_rate": 0.0001, |
|
"loss": 1.623, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.061898211829436035, |
|
"grad_norm": 0.1113864928483963, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6189, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.06327372764786796, |
|
"grad_norm": 0.09252315014600754, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6192, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.06464924346629987, |
|
"grad_norm": 0.09697891771793365, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6191, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.06602475928473177, |
|
"grad_norm": 0.09384047985076904, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6165, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.06740027510316368, |
|
"grad_norm": 0.10533461719751358, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6202, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.0687757909215956, |
|
"grad_norm": 0.08703196048736572, |
|
"learning_rate": 0.0001, |
|
"loss": 1.618, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.07015130674002751, |
|
"grad_norm": 0.09502206742763519, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6177, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.07152682255845942, |
|
"grad_norm": 0.09674184769392014, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6143, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.07290233837689133, |
|
"grad_norm": 0.12614910304546356, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6125, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.07427785419532325, |
|
"grad_norm": 0.10198106616735458, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6158, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.07565337001375516, |
|
"grad_norm": 0.09061957895755768, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6124, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.07702888583218707, |
|
"grad_norm": 0.08632820844650269, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6113, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.07840440165061899, |
|
"grad_norm": 0.10429545491933823, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6105, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.07977991746905089, |
|
"grad_norm": 0.104611337184906, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6105, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.0811554332874828, |
|
"grad_norm": 0.11391541361808777, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6078, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.08253094910591471, |
|
"grad_norm": 0.1170964241027832, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6101, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.08390646492434663, |
|
"grad_norm": 0.10005070269107819, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6096, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.08528198074277854, |
|
"grad_norm": 0.13063783943653107, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6094, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.08665749656121045, |
|
"grad_norm": 0.10203906893730164, |
|
"learning_rate": 0.0001, |
|
"loss": 1.609, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.08803301237964237, |
|
"grad_norm": 0.11838550120592117, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6068, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.08940852819807428, |
|
"grad_norm": 0.16624979674816132, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6035, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.0907840440165062, |
|
"grad_norm": 0.11730783432722092, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6074, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.0921595598349381, |
|
"grad_norm": 0.10523674637079239, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6051, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.09353507565337002, |
|
"grad_norm": 0.10546988248825073, |
|
"learning_rate": 0.0001, |
|
"loss": 1.604, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.09491059147180192, |
|
"grad_norm": 0.13425269722938538, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6044, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.09628610729023383, |
|
"grad_norm": 0.12492198497056961, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6052, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.09766162310866575, |
|
"grad_norm": 0.09005106985569, |
|
"learning_rate": 0.0001, |
|
"loss": 1.603, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.09903713892709766, |
|
"grad_norm": 0.11914248019456863, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6027, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.10041265474552957, |
|
"grad_norm": 0.12221172451972961, |
|
"learning_rate": 0.0001, |
|
"loss": 1.605, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.10178817056396149, |
|
"grad_norm": 0.13399210572242737, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6039, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.1031636863823934, |
|
"grad_norm": 0.11565663665533066, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6008, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.10453920220082531, |
|
"grad_norm": 0.12839622795581818, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6004, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.10591471801925723, |
|
"grad_norm": 0.11184845864772797, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5975, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.10729023383768914, |
|
"grad_norm": 0.11628763377666473, |
|
"learning_rate": 0.0001, |
|
"loss": 1.601, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.10866574965612105, |
|
"grad_norm": 0.11737735569477081, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6011, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.11004126547455295, |
|
"grad_norm": 0.10090334713459015, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5981, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.11141678129298486, |
|
"grad_norm": 0.11729908734560013, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5972, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.11279229711141678, |
|
"grad_norm": 0.10134877264499664, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5974, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.11416781292984869, |
|
"grad_norm": 0.150742307305336, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5979, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.1155433287482806, |
|
"grad_norm": 0.1354828178882599, |
|
"learning_rate": 0.0001, |
|
"loss": 1.594, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.11691884456671252, |
|
"grad_norm": 0.10246012359857559, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5944, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.11829436038514443, |
|
"grad_norm": 0.10707879811525345, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5975, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.11966987620357634, |
|
"grad_norm": 0.09582670778036118, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5931, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.12104539202200826, |
|
"grad_norm": 0.11471503973007202, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5957, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.12242090784044017, |
|
"grad_norm": 0.14393934607505798, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5947, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.12379642365887207, |
|
"grad_norm": 0.1267063319683075, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5928, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.12517193947730398, |
|
"grad_norm": 0.10451563447713852, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5944, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.1265474552957359, |
|
"grad_norm": 0.13244299590587616, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5935, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.1279229711141678, |
|
"grad_norm": 0.14042487740516663, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5929, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.12929848693259974, |
|
"grad_norm": 0.12199941277503967, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5933, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.13067400275103164, |
|
"grad_norm": 0.13133960962295532, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5904, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.13204951856946354, |
|
"grad_norm": 0.12281449884176254, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5909, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.13342503438789546, |
|
"grad_norm": 0.1380591243505478, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5899, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.13480055020632736, |
|
"grad_norm": 0.13320781290531158, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5924, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.1361760660247593, |
|
"grad_norm": 0.10719151794910431, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5909, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.1375515818431912, |
|
"grad_norm": 0.17885592579841614, |
|
"learning_rate": 0.0001, |
|
"loss": 1.591, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.13892709766162312, |
|
"grad_norm": 0.15455111861228943, |
|
"learning_rate": 0.0001, |
|
"loss": 1.587, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.14030261348005502, |
|
"grad_norm": 0.12887494266033173, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5886, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.14167812929848694, |
|
"grad_norm": 0.13535436987876892, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5901, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.14305364511691884, |
|
"grad_norm": 0.12412004172801971, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5884, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.14442916093535077, |
|
"grad_norm": 0.1510736644268036, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5879, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.14580467675378267, |
|
"grad_norm": 0.128033846616745, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5868, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.14718019257221457, |
|
"grad_norm": 0.11286512017250061, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5859, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.1485557083906465, |
|
"grad_norm": 0.11637207865715027, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5837, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.1499312242090784, |
|
"grad_norm": 0.13789626955986023, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5894, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.15130674002751032, |
|
"grad_norm": 0.12487693876028061, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5851, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.15268225584594222, |
|
"grad_norm": 0.14437325298786163, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5879, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.15405777166437415, |
|
"grad_norm": 0.10904733836650848, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5838, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.15543328748280605, |
|
"grad_norm": 0.10461211949586868, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5833, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.15680880330123798, |
|
"grad_norm": 0.1489093005657196, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5823, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.15818431911966988, |
|
"grad_norm": 0.15630511939525604, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5844, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.15955983493810177, |
|
"grad_norm": 0.15836940705776215, |
|
"learning_rate": 0.0001, |
|
"loss": 1.584, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.1609353507565337, |
|
"grad_norm": 0.12032505124807358, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5848, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.1623108665749656, |
|
"grad_norm": 0.15543417632579803, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5843, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.16368638239339753, |
|
"grad_norm": 0.11939691752195358, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5818, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.16506189821182943, |
|
"grad_norm": 0.13943925499916077, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5821, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.16643741403026135, |
|
"grad_norm": 0.1273224651813507, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5807, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.16781292984869325, |
|
"grad_norm": 0.1731129139661789, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5828, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.16918844566712518, |
|
"grad_norm": 0.11023139208555222, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5806, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.17056396148555708, |
|
"grad_norm": 0.15180650353431702, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5805, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.171939477303989, |
|
"grad_norm": 0.1235494539141655, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5811, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.1733149931224209, |
|
"grad_norm": 0.12696652114391327, |
|
"learning_rate": 0.0001, |
|
"loss": 1.58, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.1746905089408528, |
|
"grad_norm": 0.1397417187690735, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5806, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.17606602475928473, |
|
"grad_norm": 0.15651826560497284, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5774, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.17744154057771663, |
|
"grad_norm": 0.10367725789546967, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5793, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.17881705639614856, |
|
"grad_norm": 0.15408000349998474, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5791, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.18019257221458046, |
|
"grad_norm": 0.10724977403879166, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5799, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.1815680880330124, |
|
"grad_norm": 0.14652323722839355, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5784, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.1829436038514443, |
|
"grad_norm": 0.11810048669576645, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5783, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.1843191196698762, |
|
"grad_norm": 0.1892373412847519, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5811, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.1856946354883081, |
|
"grad_norm": 0.1516016721725464, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5781, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.18707015130674004, |
|
"grad_norm": 0.14342574775218964, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5759, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.18844566712517194, |
|
"grad_norm": 0.1327650249004364, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5779, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.18982118294360384, |
|
"grad_norm": 0.137595072388649, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5761, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.19119669876203577, |
|
"grad_norm": 0.1387586146593094, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5768, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.19257221458046767, |
|
"grad_norm": 0.1557263284921646, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5775, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.1939477303988996, |
|
"grad_norm": 0.14735980331897736, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5771, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.1953232462173315, |
|
"grad_norm": 0.18839861452579498, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5748, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.19669876203576342, |
|
"grad_norm": 0.17223089933395386, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5795, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.19807427785419532, |
|
"grad_norm": 0.11284028738737106, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5745, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.19944979367262725, |
|
"grad_norm": 0.16285105049610138, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5763, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.20082530949105915, |
|
"grad_norm": 0.15286004543304443, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5734, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.20220082530949107, |
|
"grad_norm": 0.15827025473117828, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5736, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.20357634112792297, |
|
"grad_norm": 0.13479341566562653, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5755, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.20495185694635487, |
|
"grad_norm": 0.11652766913175583, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5745, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.2063273727647868, |
|
"grad_norm": 0.1466943770647049, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5748, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.2077028885832187, |
|
"grad_norm": 0.16038121283054352, |
|
"learning_rate": 0.0001, |
|
"loss": 1.572, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.20907840440165062, |
|
"grad_norm": 0.1869979202747345, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5762, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.21045392022008252, |
|
"grad_norm": 0.14036841690540314, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5754, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.21182943603851445, |
|
"grad_norm": 0.18491779267787933, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5757, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.21320495185694635, |
|
"grad_norm": 0.13815288245677948, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5754, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.21458046767537828, |
|
"grad_norm": 0.13334764540195465, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5706, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.21595598349381018, |
|
"grad_norm": 0.15366512537002563, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5731, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.2173314993122421, |
|
"grad_norm": 0.16366422176361084, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5715, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.218707015130674, |
|
"grad_norm": 0.14637479186058044, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5715, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.2200825309491059, |
|
"grad_norm": 0.1257038414478302, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5712, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.22145804676753783, |
|
"grad_norm": 0.13014163076877594, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5711, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.22283356258596973, |
|
"grad_norm": 0.13101409375667572, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5734, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.22420907840440166, |
|
"grad_norm": 0.1509891152381897, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5698, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.22558459422283356, |
|
"grad_norm": 0.16276001930236816, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5714, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.22696011004126548, |
|
"grad_norm": 0.16040217876434326, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5701, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.22833562585969738, |
|
"grad_norm": 0.160230815410614, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5705, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.2297111416781293, |
|
"grad_norm": 0.18454241752624512, |
|
"learning_rate": 0.0001, |
|
"loss": 1.571, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.2310866574965612, |
|
"grad_norm": 0.17411856353282928, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5679, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.2324621733149931, |
|
"grad_norm": 0.16710075736045837, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5674, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.23383768913342504, |
|
"grad_norm": 0.12378160655498505, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5671, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.23521320495185694, |
|
"grad_norm": 0.11550536751747131, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5698, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.23658872077028886, |
|
"grad_norm": 0.17768432199954987, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5699, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.23796423658872076, |
|
"grad_norm": 0.15126097202301025, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5694, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.2393397524071527, |
|
"grad_norm": 0.1827315390110016, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5671, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.2407152682255846, |
|
"grad_norm": 0.11432069540023804, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5685, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.24209078404401652, |
|
"grad_norm": 0.14279188215732574, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5677, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.24346629986244842, |
|
"grad_norm": 0.13771188259124756, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5667, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.24484181568088034, |
|
"grad_norm": 0.12438327074050903, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5649, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.24621733149931224, |
|
"grad_norm": 0.146587535738945, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5689, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.24759284731774414, |
|
"grad_norm": 0.13684628903865814, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5662, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.24896836313617607, |
|
"grad_norm": 0.1465720385313034, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5666, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.25034387895460797, |
|
"grad_norm": 0.1553189605474472, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5647, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.2517193947730399, |
|
"grad_norm": 0.12973164021968842, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5647, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.2530949105914718, |
|
"grad_norm": 0.17071610689163208, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5691, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.2544704264099037, |
|
"grad_norm": 0.1424863487482071, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5654, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.2558459422283356, |
|
"grad_norm": 0.13117440044879913, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5668, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.25722145804676755, |
|
"grad_norm": 0.14353643357753754, |
|
"learning_rate": 0.0001, |
|
"loss": 1.567, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.2585969738651995, |
|
"grad_norm": 0.18137438595294952, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5648, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.25997248968363135, |
|
"grad_norm": 0.1453561782836914, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5631, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.2613480055020633, |
|
"grad_norm": 0.13514567911624908, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5633, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.2627235213204952, |
|
"grad_norm": 0.20019495487213135, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5655, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.2640990371389271, |
|
"grad_norm": 0.18167296051979065, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5634, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.265474552957359, |
|
"grad_norm": 0.1335984319448471, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5609, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.2668500687757909, |
|
"grad_norm": 0.12064065039157867, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5619, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.26822558459422285, |
|
"grad_norm": 0.16066288948059082, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5639, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.2696011004126547, |
|
"grad_norm": 0.18084204196929932, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5597, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.27097661623108665, |
|
"grad_norm": 0.14845338463783264, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5626, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.2723521320495186, |
|
"grad_norm": 0.13293515145778656, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5648, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.2737276478679505, |
|
"grad_norm": 0.14939668774604797, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5612, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.2751031636863824, |
|
"grad_norm": 0.1553388386964798, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5629, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.2764786795048143, |
|
"grad_norm": 0.22416375577449799, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5621, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.27785419532324623, |
|
"grad_norm": 0.2197302132844925, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5635, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.2792297111416781, |
|
"grad_norm": 0.17688524723052979, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5616, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.28060522696011003, |
|
"grad_norm": 0.1495491862297058, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5614, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.28198074277854196, |
|
"grad_norm": 0.15716291964054108, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5592, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.2833562585969739, |
|
"grad_norm": 0.14116239547729492, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5586, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.28473177441540576, |
|
"grad_norm": 0.11010037362575531, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5603, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.2861072902338377, |
|
"grad_norm": 0.1838681697845459, |
|
"learning_rate": 0.0001, |
|
"loss": 1.561, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.2874828060522696, |
|
"grad_norm": 0.19001850485801697, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5588, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.28885832187070154, |
|
"grad_norm": 0.20800583064556122, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5607, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.2902338376891334, |
|
"grad_norm": 0.17948520183563232, |
|
"learning_rate": 0.0001, |
|
"loss": 1.56, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.29160935350756534, |
|
"grad_norm": 0.16178689897060394, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5603, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.29298486932599727, |
|
"grad_norm": 0.1580880880355835, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5606, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.29436038514442914, |
|
"grad_norm": 0.14434567093849182, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5618, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.29573590096286106, |
|
"grad_norm": 0.17610964179039001, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5613, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.297111416781293, |
|
"grad_norm": 0.15156705677509308, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5563, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.2984869325997249, |
|
"grad_norm": 0.1466618925333023, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5616, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.2998624484181568, |
|
"grad_norm": 0.1162666529417038, |
|
"learning_rate": 0.0001, |
|
"loss": 1.559, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.3012379642365887, |
|
"grad_norm": 0.15534426271915436, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5594, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.30261348005502064, |
|
"grad_norm": 0.15940657258033752, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5613, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.30398899587345257, |
|
"grad_norm": 0.1757323294878006, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5588, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.30536451169188444, |
|
"grad_norm": 0.11815246194601059, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5589, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.30674002751031637, |
|
"grad_norm": 0.2773960828781128, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5584, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.3081155433287483, |
|
"grad_norm": 0.12601600587368011, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5572, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.30949105914718017, |
|
"grad_norm": 0.1593768298625946, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5575, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.3108665749656121, |
|
"grad_norm": 0.149438738822937, |
|
"learning_rate": 0.0001, |
|
"loss": 1.557, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.312242090784044, |
|
"grad_norm": 0.11111125349998474, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5587, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.31361760660247595, |
|
"grad_norm": 0.1610383540391922, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5572, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.3149931224209078, |
|
"grad_norm": 0.17420324683189392, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5581, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.31636863823933975, |
|
"grad_norm": 0.16623131930828094, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5561, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.3177441540577717, |
|
"grad_norm": 0.15828974545001984, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5544, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.31911966987620355, |
|
"grad_norm": 0.15183350443840027, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5555, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.3204951856946355, |
|
"grad_norm": 0.16378933191299438, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5532, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.3218707015130674, |
|
"grad_norm": 0.15861773490905762, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5568, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.32324621733149933, |
|
"grad_norm": 0.13385528326034546, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5568, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.3246217331499312, |
|
"grad_norm": 0.16392391920089722, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5548, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.32599724896836313, |
|
"grad_norm": 0.14662721753120422, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5539, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.32737276478679506, |
|
"grad_norm": 0.13727930188179016, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5552, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.328748280605227, |
|
"grad_norm": 0.15576840937137604, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5552, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.33012379642365886, |
|
"grad_norm": 0.1717185378074646, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5538, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.3314993122420908, |
|
"grad_norm": 0.16970685124397278, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5556, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.3328748280605227, |
|
"grad_norm": 0.1489485800266266, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5527, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.3342503438789546, |
|
"grad_norm": 0.1374077945947647, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5528, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.3356258596973865, |
|
"grad_norm": 0.19402620196342468, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5547, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.33700137551581844, |
|
"grad_norm": 0.1642199009656906, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5538, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.33837689133425036, |
|
"grad_norm": 0.13107603788375854, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5547, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.33975240715268223, |
|
"grad_norm": 0.1858353465795517, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5526, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.34112792297111416, |
|
"grad_norm": 0.1422649323940277, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5523, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.3425034387895461, |
|
"grad_norm": 0.16968269646167755, |
|
"learning_rate": 0.0001, |
|
"loss": 1.554, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.343878954607978, |
|
"grad_norm": 0.1434723138809204, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5544, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.3452544704264099, |
|
"grad_norm": 0.18616297841072083, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5506, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.3466299862448418, |
|
"grad_norm": 0.16946491599082947, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5524, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.34800550206327374, |
|
"grad_norm": 0.17658023536205292, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5536, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.3493810178817056, |
|
"grad_norm": 0.15203554928302765, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5507, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.35075653370013754, |
|
"grad_norm": 0.13097505271434784, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5542, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.35213204951856947, |
|
"grad_norm": 0.14317452907562256, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5534, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.3535075653370014, |
|
"grad_norm": 0.12445474416017532, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5535, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.35488308115543327, |
|
"grad_norm": 0.1327485293149948, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5521, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.3562585969738652, |
|
"grad_norm": 0.15487389266490936, |
|
"learning_rate": 0.0001, |
|
"loss": 1.553, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.3576341127922971, |
|
"grad_norm": 0.23483023047447205, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5502, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.35900962861072905, |
|
"grad_norm": 0.14994105696678162, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5518, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.3603851444291609, |
|
"grad_norm": 0.12222074717283249, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5508, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.36176066024759285, |
|
"grad_norm": 0.1246858537197113, |
|
"learning_rate": 0.0001, |
|
"loss": 1.552, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.3631361760660248, |
|
"grad_norm": 0.15825419127941132, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5499, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.36451169188445665, |
|
"grad_norm": 0.17960667610168457, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5551, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.3658872077028886, |
|
"grad_norm": 0.1628105491399765, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5544, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.3672627235213205, |
|
"grad_norm": 0.15981099009513855, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5527, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.3686382393397524, |
|
"grad_norm": 0.11882206797599792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5505, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.3700137551581843, |
|
"grad_norm": 0.1369376927614212, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5487, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.3713892709766162, |
|
"grad_norm": 0.1341916173696518, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5489, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.37276478679504815, |
|
"grad_norm": 0.1692420095205307, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5486, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.3741403026134801, |
|
"grad_norm": 0.12764231860637665, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5479, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.37551581843191195, |
|
"grad_norm": 0.1610202044248581, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5493, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.3768913342503439, |
|
"grad_norm": 0.20008735358715057, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5504, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.3782668500687758, |
|
"grad_norm": 0.14668354392051697, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5459, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.3796423658872077, |
|
"grad_norm": 0.16147159039974213, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5497, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.3810178817056396, |
|
"grad_norm": 0.2127738893032074, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5496, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.38239339752407153, |
|
"grad_norm": 0.14936117827892303, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5487, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.38376891334250346, |
|
"grad_norm": 0.1460547298192978, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5513, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.38514442916093533, |
|
"grad_norm": 0.1418396234512329, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5489, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.38651994497936726, |
|
"grad_norm": 0.12608648836612701, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5478, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.3878954607977992, |
|
"grad_norm": 0.12352428585290909, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5472, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.3892709766162311, |
|
"grad_norm": 0.140400692820549, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5471, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.390646492434663, |
|
"grad_norm": 0.14015322923660278, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5495, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.3920220082530949, |
|
"grad_norm": 0.13664819300174713, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5515, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.39339752407152684, |
|
"grad_norm": 0.19558057188987732, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5493, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.3947730398899587, |
|
"grad_norm": 0.14744845032691956, |
|
"learning_rate": 0.0001, |
|
"loss": 1.547, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.39614855570839064, |
|
"grad_norm": 0.13610410690307617, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5499, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.39752407152682256, |
|
"grad_norm": 0.16850556433200836, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5475, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.3988995873452545, |
|
"grad_norm": 0.11494544893503189, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5441, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.40027510316368636, |
|
"grad_norm": 0.1311003863811493, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5451, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.4016506189821183, |
|
"grad_norm": 0.16432379186153412, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5483, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.4030261348005502, |
|
"grad_norm": 0.16200096905231476, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5458, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.40440165061898214, |
|
"grad_norm": 0.15324008464813232, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5486, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.405777166437414, |
|
"grad_norm": 0.2114071398973465, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5463, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.40715268225584594, |
|
"grad_norm": 0.1691250056028366, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5449, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.40852819807427787, |
|
"grad_norm": 0.15044333040714264, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5454, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.40990371389270974, |
|
"grad_norm": 0.14457371830940247, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5475, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.41127922971114167, |
|
"grad_norm": 0.15145525336265564, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5474, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.4126547455295736, |
|
"grad_norm": 0.1273120492696762, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5446, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.4140302613480055, |
|
"grad_norm": 0.1621488630771637, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5464, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.4154057771664374, |
|
"grad_norm": 0.1621532440185547, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5472, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.4167812929848693, |
|
"grad_norm": 0.13030585646629333, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5416, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.41815680880330125, |
|
"grad_norm": 0.18759876489639282, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5448, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.4195323246217332, |
|
"grad_norm": 0.12614044547080994, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5459, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.42090784044016505, |
|
"grad_norm": 0.11533529311418533, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5446, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.422283356258597, |
|
"grad_norm": 0.1886916160583496, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5466, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.4236588720770289, |
|
"grad_norm": 0.2204965353012085, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5436, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.4250343878954608, |
|
"grad_norm": 0.12042222172021866, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5425, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.4264099037138927, |
|
"grad_norm": 0.135628342628479, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5464, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.42778541953232463, |
|
"grad_norm": 0.15042053163051605, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5441, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.42916093535075656, |
|
"grad_norm": 0.1294483244419098, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5468, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.4305364511691884, |
|
"grad_norm": 0.153069868683815, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5416, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.43191196698762035, |
|
"grad_norm": 0.129000723361969, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5434, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.4332874828060523, |
|
"grad_norm": 0.1890910267829895, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5426, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.4346629986244842, |
|
"grad_norm": 0.14907212555408478, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5447, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.4360385144429161, |
|
"grad_norm": 0.1549520045518875, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5438, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.437414030261348, |
|
"grad_norm": 0.1726304590702057, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5431, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.43878954607977994, |
|
"grad_norm": 0.14929509162902832, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5408, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.4401650618982118, |
|
"grad_norm": 0.1404862105846405, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5431, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.44154057771664373, |
|
"grad_norm": 0.1365077942609787, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5434, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.44291609353507566, |
|
"grad_norm": 0.16866528987884521, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5425, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.4442916093535076, |
|
"grad_norm": 0.13150258362293243, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5418, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.44566712517193946, |
|
"grad_norm": 0.17333872616291046, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5415, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.4470426409903714, |
|
"grad_norm": 0.2110324501991272, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5434, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.4484181568088033, |
|
"grad_norm": 0.19441699981689453, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5408, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.4497936726272352, |
|
"grad_norm": 0.1581384241580963, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5428, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.4511691884456671, |
|
"grad_norm": 0.14479832351207733, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5444, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.45254470426409904, |
|
"grad_norm": 0.16739803552627563, |
|
"learning_rate": 0.0001, |
|
"loss": 1.541, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 0.45392022008253097, |
|
"grad_norm": 0.14801441133022308, |
|
"learning_rate": 0.0001, |
|
"loss": 1.54, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.45529573590096284, |
|
"grad_norm": 0.13265211880207062, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5417, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 0.45667125171939477, |
|
"grad_norm": 0.1164972111582756, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5411, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.4580467675378267, |
|
"grad_norm": 0.1256764531135559, |
|
"learning_rate": 0.0001, |
|
"loss": 1.538, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.4594222833562586, |
|
"grad_norm": 0.13301979005336761, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5409, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.4607977991746905, |
|
"grad_norm": 0.1520063877105713, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5406, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.4621733149931224, |
|
"grad_norm": 0.12742547690868378, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5405, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.46354883081155435, |
|
"grad_norm": 0.17311689257621765, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5416, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 0.4649243466299862, |
|
"grad_norm": 0.14269371330738068, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5413, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.46629986244841815, |
|
"grad_norm": 0.14457383751869202, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5415, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.4676753782668501, |
|
"grad_norm": 0.13189777731895447, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5388, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.469050894085282, |
|
"grad_norm": 0.16488979756832123, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5398, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.47042640990371387, |
|
"grad_norm": 0.15953794121742249, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5387, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.4718019257221458, |
|
"grad_norm": 0.11922045797109604, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5389, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 0.4731774415405777, |
|
"grad_norm": 0.13724352419376373, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5399, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.47455295735900965, |
|
"grad_norm": 0.14968377351760864, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5419, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.4759284731774415, |
|
"grad_norm": 0.17267867922782898, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5395, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.47730398899587345, |
|
"grad_norm": 0.14226895570755005, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5386, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 0.4786795048143054, |
|
"grad_norm": 0.15129058063030243, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5424, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.48005502063273725, |
|
"grad_norm": 0.2448931634426117, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5396, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 0.4814305364511692, |
|
"grad_norm": 0.2225511074066162, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5404, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.4828060522696011, |
|
"grad_norm": 0.1891157031059265, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5394, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.48418156808803303, |
|
"grad_norm": 0.1472170352935791, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5417, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.4855570839064649, |
|
"grad_norm": 0.1682361215353012, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5377, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 0.48693259972489683, |
|
"grad_norm": 0.18433457612991333, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5396, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.48830811554332876, |
|
"grad_norm": 0.15077999234199524, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5392, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.4896836313617607, |
|
"grad_norm": 0.16640494763851166, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5381, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.49105914718019256, |
|
"grad_norm": 0.1587841510772705, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5386, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.4924346629986245, |
|
"grad_norm": 0.15444575250148773, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5389, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.4938101788170564, |
|
"grad_norm": 0.18525558710098267, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5404, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 0.4951856946354883, |
|
"grad_norm": 0.12790025770664215, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5394, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.4965612104539202, |
|
"grad_norm": 0.12284336239099503, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5389, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 0.49793672627235214, |
|
"grad_norm": 0.12023458629846573, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5345, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.49931224209078406, |
|
"grad_norm": 0.220647931098938, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5398, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 0.5006877579092159, |
|
"grad_norm": 0.1563023179769516, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5361, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.5020632737276479, |
|
"grad_norm": 0.15485098958015442, |
|
"learning_rate": 0.0001, |
|
"loss": 1.539, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 0.5034387895460798, |
|
"grad_norm": 0.21312743425369263, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5378, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.5048143053645117, |
|
"grad_norm": 0.1381313055753708, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5396, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 0.5061898211829436, |
|
"grad_norm": 0.1357322335243225, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5398, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.5075653370013755, |
|
"grad_norm": 0.16733530163764954, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5381, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 0.5089408528198074, |
|
"grad_norm": 0.12985962629318237, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5391, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.5103163686382394, |
|
"grad_norm": 0.17726540565490723, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5406, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 0.5116918844566712, |
|
"grad_norm": 0.1869622766971588, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5379, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.5130674002751031, |
|
"grad_norm": 0.19111870229244232, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5373, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 0.5144429160935351, |
|
"grad_norm": 0.16479162871837616, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5346, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.515818431911967, |
|
"grad_norm": 0.17092610895633698, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5387, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.517193947730399, |
|
"grad_norm": 0.1678820550441742, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5376, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.5185694635488308, |
|
"grad_norm": 0.14618681371212006, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5353, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 0.5199449793672627, |
|
"grad_norm": 0.192416712641716, |
|
"learning_rate": 0.0001, |
|
"loss": 1.54, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.5213204951856947, |
|
"grad_norm": 0.17582687735557556, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5346, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 0.5226960110041265, |
|
"grad_norm": 0.19511322677135468, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5371, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.5240715268225584, |
|
"grad_norm": 0.15874715149402618, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5362, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 0.5254470426409904, |
|
"grad_norm": 0.17555968463420868, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5342, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.5268225584594223, |
|
"grad_norm": 0.17204701900482178, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5356, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 0.5281980742778541, |
|
"grad_norm": 0.1334696263074875, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5378, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.5295735900962861, |
|
"grad_norm": 0.12202008068561554, |
|
"learning_rate": 0.0001, |
|
"loss": 1.536, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 0.530949105914718, |
|
"grad_norm": 0.1914770007133484, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5361, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.53232462173315, |
|
"grad_norm": 0.18114732205867767, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5391, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 0.5337001375515819, |
|
"grad_norm": 0.13230808079242706, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5398, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.5350756533700137, |
|
"grad_norm": 0.24269579350948334, |
|
"learning_rate": 0.0001, |
|
"loss": 1.535, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 0.5364511691884457, |
|
"grad_norm": 0.14454102516174316, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5339, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.5378266850068776, |
|
"grad_norm": 0.17638514935970306, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5385, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 0.5392022008253095, |
|
"grad_norm": 0.1496788114309311, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5389, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.5405777166437414, |
|
"grad_norm": 0.1927812695503235, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5357, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 0.5419532324621733, |
|
"grad_norm": 0.1372377574443817, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5363, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.5433287482806052, |
|
"grad_norm": 0.15738138556480408, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5358, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 0.5447042640990372, |
|
"grad_norm": 0.13599953055381775, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5357, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.546079779917469, |
|
"grad_norm": 0.16571839153766632, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5343, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 0.547455295735901, |
|
"grad_norm": 0.14264202117919922, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5315, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.5488308115543329, |
|
"grad_norm": 0.15331332385540009, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5344, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 0.5502063273727648, |
|
"grad_norm": 0.1380966752767563, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5357, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.5515818431911967, |
|
"grad_norm": 0.198713481426239, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5323, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 0.5529573590096286, |
|
"grad_norm": 0.12092329561710358, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5328, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.5543328748280605, |
|
"grad_norm": 0.13770416378974915, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5346, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.5557083906464925, |
|
"grad_norm": 0.12443804740905762, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5312, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.5570839064649243, |
|
"grad_norm": 0.15430398285388947, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5322, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 0.5584594222833562, |
|
"grad_norm": 0.1415732502937317, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5338, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.5598349381017882, |
|
"grad_norm": 0.2753756642341614, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5329, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 0.5612104539202201, |
|
"grad_norm": 0.1666756421327591, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5337, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.562585969738652, |
|
"grad_norm": 0.17720907926559448, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5312, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 0.5639614855570839, |
|
"grad_norm": 0.18275785446166992, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5333, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.5653370013755158, |
|
"grad_norm": 0.20009452104568481, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5301, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 0.5667125171939478, |
|
"grad_norm": 0.18812476098537445, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5332, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.5680880330123796, |
|
"grad_norm": 0.15448282659053802, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5323, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 0.5694635488308115, |
|
"grad_norm": 0.1646738499403, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5335, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.5708390646492435, |
|
"grad_norm": 0.15908415615558624, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5319, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 0.5722145804676754, |
|
"grad_norm": 0.15112848579883575, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5342, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.5735900962861072, |
|
"grad_norm": 0.3316288888454437, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5344, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 0.5749656121045392, |
|
"grad_norm": 0.13579101860523224, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5321, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.5763411279229711, |
|
"grad_norm": 0.2203134000301361, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5324, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 0.5777166437414031, |
|
"grad_norm": 0.1271039992570877, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5328, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.579092159559835, |
|
"grad_norm": 0.3165966272354126, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5349, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 0.5804676753782668, |
|
"grad_norm": 0.1456591635942459, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5343, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.5818431911966988, |
|
"grad_norm": 0.16555163264274597, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5349, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 0.5832187070151307, |
|
"grad_norm": 0.22577494382858276, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5342, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.5845942228335625, |
|
"grad_norm": 0.23455490171909332, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5346, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 0.5859697386519945, |
|
"grad_norm": 0.2247081696987152, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5316, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.5873452544704264, |
|
"grad_norm": 0.15159213542938232, |
|
"learning_rate": 0.0001, |
|
"loss": 1.534, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 0.5887207702888583, |
|
"grad_norm": 0.20483700931072235, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5295, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.5900962861072903, |
|
"grad_norm": 0.16780568659305573, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5341, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 0.5914718019257221, |
|
"grad_norm": 0.15840616822242737, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5339, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.5928473177441541, |
|
"grad_norm": 0.1488318294286728, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5341, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 0.594222833562586, |
|
"grad_norm": 0.13899248838424683, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5323, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.5955983493810179, |
|
"grad_norm": 0.15024836361408234, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5318, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 0.5969738651994498, |
|
"grad_norm": 0.19209244847297668, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5325, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.5983493810178817, |
|
"grad_norm": 0.20580926537513733, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5324, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 0.5997248968363136, |
|
"grad_norm": 0.2091200202703476, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5282, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.6011004126547456, |
|
"grad_norm": 0.1571815311908722, |
|
"learning_rate": 0.0001, |
|
"loss": 1.532, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 0.6024759284731774, |
|
"grad_norm": 0.17794279754161835, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5326, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.6038514442916093, |
|
"grad_norm": 0.1439165472984314, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5325, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 0.6052269601100413, |
|
"grad_norm": 0.15884612500667572, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5329, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.6066024759284732, |
|
"grad_norm": 0.26263782382011414, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5315, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 0.6079779917469051, |
|
"grad_norm": 0.19535377621650696, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5308, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.609353507565337, |
|
"grad_norm": 0.14018963277339935, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5332, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 0.6107290233837689, |
|
"grad_norm": 0.15927653014659882, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5299, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.6121045392022009, |
|
"grad_norm": 0.143597811460495, |
|
"learning_rate": 0.0001, |
|
"loss": 1.532, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 0.6134800550206327, |
|
"grad_norm": 0.15887697041034698, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5313, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.6148555708390646, |
|
"grad_norm": 0.1907578855752945, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5323, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 0.6162310866574966, |
|
"grad_norm": 0.189689502120018, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5319, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.6176066024759285, |
|
"grad_norm": 0.15399134159088135, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5291, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 0.6189821182943603, |
|
"grad_norm": 0.16801948845386505, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5319, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.6203576341127923, |
|
"grad_norm": 0.21341322362422943, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5311, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 0.6217331499312242, |
|
"grad_norm": 0.19961433112621307, |
|
"learning_rate": 0.0001, |
|
"loss": 1.529, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.6231086657496562, |
|
"grad_norm": 0.1254952847957611, |
|
"learning_rate": 0.0001, |
|
"loss": 1.528, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 0.624484181568088, |
|
"grad_norm": 0.21346162259578705, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5323, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.6258596973865199, |
|
"grad_norm": 0.1551300436258316, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5302, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 0.6272352132049519, |
|
"grad_norm": 0.1974526047706604, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5294, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.6286107290233838, |
|
"grad_norm": 0.130974680185318, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5303, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 0.6299862448418156, |
|
"grad_norm": 0.17787273228168488, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5299, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.6313617606602476, |
|
"grad_norm": 0.19317127764225006, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5295, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 0.6327372764786795, |
|
"grad_norm": 0.2229757010936737, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5307, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.6341127922971114, |
|
"grad_norm": 0.17582648992538452, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5294, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 0.6354883081155434, |
|
"grad_norm": 0.17122450470924377, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5291, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.6368638239339752, |
|
"grad_norm": 0.16124916076660156, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5268, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 0.6382393397524071, |
|
"grad_norm": 0.18122687935829163, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5274, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.6396148555708391, |
|
"grad_norm": 0.17480894923210144, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5276, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.640990371389271, |
|
"grad_norm": 0.1798102855682373, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5267, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.6423658872077029, |
|
"grad_norm": 0.19186878204345703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5294, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 0.6437414030261348, |
|
"grad_norm": 0.1212744414806366, |
|
"learning_rate": 0.0001, |
|
"loss": 1.527, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.6451169188445667, |
|
"grad_norm": 0.16844585537910461, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5265, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 0.6464924346629987, |
|
"grad_norm": 0.16216999292373657, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5288, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.6478679504814305, |
|
"grad_norm": 0.157547265291214, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5298, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 0.6492434662998624, |
|
"grad_norm": 0.20760610699653625, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5264, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.6506189821182944, |
|
"grad_norm": 0.19178840517997742, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5251, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 0.6519944979367263, |
|
"grad_norm": 0.17904846370220184, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5293, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.6533700137551581, |
|
"grad_norm": 0.14902061223983765, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5278, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 0.6547455295735901, |
|
"grad_norm": 0.1306075155735016, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5274, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.656121045392022, |
|
"grad_norm": 0.14361289143562317, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5259, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 0.657496561210454, |
|
"grad_norm": 0.23775485157966614, |
|
"learning_rate": 0.0001, |
|
"loss": 1.528, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.6588720770288858, |
|
"grad_norm": 0.12788158655166626, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5285, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 0.6602475928473177, |
|
"grad_norm": 0.11719505488872528, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5275, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.6616231086657497, |
|
"grad_norm": 0.2011108100414276, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5276, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 0.6629986244841816, |
|
"grad_norm": 0.16335125267505646, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5305, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.6643741403026134, |
|
"grad_norm": 0.15488557517528534, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5259, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 0.6657496561210454, |
|
"grad_norm": 0.2333500236272812, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5269, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.6671251719394773, |
|
"grad_norm": 0.14059284329414368, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5298, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 0.6685006877579092, |
|
"grad_norm": 0.24036471545696259, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5274, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.6698762035763411, |
|
"grad_norm": 0.13437625765800476, |
|
"learning_rate": 0.0001, |
|
"loss": 1.529, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 0.671251719394773, |
|
"grad_norm": 0.25569766759872437, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5259, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.672627235213205, |
|
"grad_norm": 0.14324542880058289, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5286, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 0.6740027510316369, |
|
"grad_norm": 0.2062855213880539, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5259, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.6753782668500687, |
|
"grad_norm": 0.18274646997451782, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5293, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 0.6767537826685007, |
|
"grad_norm": 0.16611768305301666, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5283, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.6781292984869326, |
|
"grad_norm": 0.2058711051940918, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5253, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 0.6795048143053645, |
|
"grad_norm": 0.16299676895141602, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5281, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.6808803301237965, |
|
"grad_norm": 0.17875225841999054, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5266, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 0.6822558459422283, |
|
"grad_norm": 0.18055297434329987, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5269, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.6836313617606602, |
|
"grad_norm": 0.22491872310638428, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5236, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 0.6850068775790922, |
|
"grad_norm": 0.17760007083415985, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5249, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.686382393397524, |
|
"grad_norm": 0.19768892228603363, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5254, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 0.687757909215956, |
|
"grad_norm": 0.16851931810379028, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5284, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.6891334250343879, |
|
"grad_norm": 0.16162404417991638, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5278, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 0.6905089408528198, |
|
"grad_norm": 0.1808663010597229, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5239, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.6918844566712518, |
|
"grad_norm": 0.15550534427165985, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5266, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 0.6932599724896836, |
|
"grad_norm": 0.22426332533359528, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5226, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.6946354883081155, |
|
"grad_norm": 0.11868047714233398, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5256, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 0.6960110041265475, |
|
"grad_norm": 0.21659235656261444, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5284, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.6973865199449794, |
|
"grad_norm": 0.1800456941127777, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5235, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 0.6987620357634112, |
|
"grad_norm": 0.21043701469898224, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5275, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.7001375515818432, |
|
"grad_norm": 0.18925617635250092, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5279, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 0.7015130674002751, |
|
"grad_norm": 0.1537819653749466, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5243, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.7028885832187071, |
|
"grad_norm": 0.1832038164138794, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5255, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 0.7042640990371389, |
|
"grad_norm": 0.186794713139534, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5261, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.7056396148555708, |
|
"grad_norm": 0.12374402582645416, |
|
"learning_rate": 0.0001, |
|
"loss": 1.526, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 0.7070151306740028, |
|
"grad_norm": 0.16702401638031006, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5245, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.7083906464924347, |
|
"grad_norm": 0.1393430233001709, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5254, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 0.7097661623108665, |
|
"grad_norm": 0.1630173921585083, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5251, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.7111416781292985, |
|
"grad_norm": 0.1440727412700653, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5282, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 0.7125171939477304, |
|
"grad_norm": 0.17978446185588837, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5262, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.7138927097661623, |
|
"grad_norm": 0.151292085647583, |
|
"learning_rate": 0.0001, |
|
"loss": 1.527, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 0.7152682255845942, |
|
"grad_norm": 0.24109718203544617, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5235, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.7166437414030261, |
|
"grad_norm": 0.15700335800647736, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5245, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 0.7180192572214581, |
|
"grad_norm": 0.14807374775409698, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5224, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.71939477303989, |
|
"grad_norm": 0.13032929599285126, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5221, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 0.7207702888583218, |
|
"grad_norm": 0.1900160163640976, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5259, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.7221458046767538, |
|
"grad_norm": 0.20619365572929382, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5261, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 0.7235213204951857, |
|
"grad_norm": 0.17259658873081207, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5272, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.7248968363136176, |
|
"grad_norm": 0.1594364494085312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5242, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.7262723521320495, |
|
"grad_norm": 0.16156145930290222, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5263, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.7276478679504814, |
|
"grad_norm": 0.15612217783927917, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5232, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 0.7290233837689133, |
|
"grad_norm": 0.2097177803516388, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5265, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.7303988995873453, |
|
"grad_norm": 0.18174001574516296, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5235, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 0.7317744154057771, |
|
"grad_norm": 0.15661188960075378, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5239, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.7331499312242091, |
|
"grad_norm": 0.17666810750961304, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5244, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 0.734525447042641, |
|
"grad_norm": 0.135247141122818, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5228, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.7359009628610729, |
|
"grad_norm": 0.17839883267879486, |
|
"learning_rate": 0.0001, |
|
"loss": 1.522, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 0.7372764786795049, |
|
"grad_norm": 0.1601705551147461, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5258, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.7386519944979367, |
|
"grad_norm": 0.21927671134471893, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5234, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 0.7400275103163686, |
|
"grad_norm": 0.18870490789413452, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5222, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.7414030261348006, |
|
"grad_norm": 0.17285650968551636, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5243, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 0.7427785419532325, |
|
"grad_norm": 0.14226007461547852, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5265, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.7441540577716643, |
|
"grad_norm": 0.17631758749485016, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5209, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 0.7455295735900963, |
|
"grad_norm": 0.22787536680698395, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5233, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.7469050894085282, |
|
"grad_norm": 0.14378662407398224, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5214, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 0.7482806052269602, |
|
"grad_norm": 0.21862713992595673, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5211, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.749656121045392, |
|
"grad_norm": 0.15041618049144745, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5233, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 0.7510316368638239, |
|
"grad_norm": 0.15543252229690552, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5216, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.7524071526822559, |
|
"grad_norm": 0.1488107591867447, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5237, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 0.7537826685006878, |
|
"grad_norm": 0.2412855178117752, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5236, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.7551581843191196, |
|
"grad_norm": 0.21001331508159637, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5227, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 0.7565337001375516, |
|
"grad_norm": 0.16884082555770874, |
|
"learning_rate": 0.0001, |
|
"loss": 1.523, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.7579092159559835, |
|
"grad_norm": 0.1195225790143013, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5223, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 0.7592847317744154, |
|
"grad_norm": 0.2539023160934448, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5223, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.7606602475928473, |
|
"grad_norm": 0.17333871126174927, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5207, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 0.7620357634112792, |
|
"grad_norm": 0.14636480808258057, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5241, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.7634112792297112, |
|
"grad_norm": 0.13305403292179108, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5224, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 0.7647867950481431, |
|
"grad_norm": 0.18532030284404755, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5234, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.7661623108665749, |
|
"grad_norm": 0.1548730880022049, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5224, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 0.7675378266850069, |
|
"grad_norm": 0.20586071908473969, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5219, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.7689133425034388, |
|
"grad_norm": 0.13693679869174957, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5226, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 0.7702888583218707, |
|
"grad_norm": 0.17651352286338806, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5198, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.7716643741403026, |
|
"grad_norm": 0.19794145226478577, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5243, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 0.7730398899587345, |
|
"grad_norm": 0.14593897759914398, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5203, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.7744154057771664, |
|
"grad_norm": 0.18138128519058228, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5189, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 0.7757909215955984, |
|
"grad_norm": 0.15987426042556763, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5209, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.7771664374140302, |
|
"grad_norm": 0.15444040298461914, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5187, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 0.7785419532324622, |
|
"grad_norm": 0.22651028633117676, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5201, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.7799174690508941, |
|
"grad_norm": 0.1889326423406601, |
|
"learning_rate": 0.0001, |
|
"loss": 1.522, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 0.781292984869326, |
|
"grad_norm": 0.1659088283777237, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5211, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.782668500687758, |
|
"grad_norm": 0.20580235123634338, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5215, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 0.7840440165061898, |
|
"grad_norm": 0.1748579442501068, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5199, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.7854195323246217, |
|
"grad_norm": 0.20172914862632751, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5228, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 0.7867950481430537, |
|
"grad_norm": 0.1552000194787979, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5205, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.7881705639614855, |
|
"grad_norm": 0.18557365238666534, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5234, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 0.7895460797799174, |
|
"grad_norm": 0.17085815966129303, |
|
"learning_rate": 0.0001, |
|
"loss": 1.522, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.7909215955983494, |
|
"grad_norm": 0.19171683490276337, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5187, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 0.7922971114167813, |
|
"grad_norm": 0.3197721838951111, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5228, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.7936726272352133, |
|
"grad_norm": 0.21279697120189667, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5181, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 0.7950481430536451, |
|
"grad_norm": 0.2184215933084488, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5214, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.796423658872077, |
|
"grad_norm": 0.21635691821575165, |
|
"learning_rate": 0.0001, |
|
"loss": 1.523, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 0.797799174690509, |
|
"grad_norm": 0.15319493412971497, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5197, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.7991746905089409, |
|
"grad_norm": 0.22083012759685516, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5219, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 0.8005502063273727, |
|
"grad_norm": 0.15193097293376923, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5195, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.8019257221458047, |
|
"grad_norm": 0.19553427398204803, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5205, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 0.8033012379642366, |
|
"grad_norm": 0.2117278128862381, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5203, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.8046767537826685, |
|
"grad_norm": 0.15601006150245667, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5199, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 0.8060522696011004, |
|
"grad_norm": 0.15379014611244202, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5222, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.8074277854195323, |
|
"grad_norm": 0.1712176352739334, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5204, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 0.8088033012379643, |
|
"grad_norm": 0.19847099483013153, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5203, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.8101788170563962, |
|
"grad_norm": 0.15735092759132385, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5181, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 0.811554332874828, |
|
"grad_norm": 0.2128709852695465, |
|
"learning_rate": 0.0001, |
|
"loss": 1.52, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.81292984869326, |
|
"grad_norm": 0.23607073724269867, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5222, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 0.8143053645116919, |
|
"grad_norm": 0.15351270139217377, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5186, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.8156808803301238, |
|
"grad_norm": 0.18421980738639832, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5189, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 0.8170563961485557, |
|
"grad_norm": 0.15863709151744843, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5191, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.8184319119669876, |
|
"grad_norm": 0.1642359048128128, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5188, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 0.8198074277854195, |
|
"grad_norm": 0.2115437388420105, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5193, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.8211829436038515, |
|
"grad_norm": 0.1653752475976944, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5196, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 0.8225584594222833, |
|
"grad_norm": 0.25687387585639954, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5193, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.8239339752407153, |
|
"grad_norm": 0.22497384250164032, |
|
"learning_rate": 0.0001, |
|
"loss": 1.519, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 0.8253094910591472, |
|
"grad_norm": 0.16616137325763702, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5204, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.8266850068775791, |
|
"grad_norm": 0.14630819857120514, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5208, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 0.828060522696011, |
|
"grad_norm": 0.19977807998657227, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5187, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.8294360385144429, |
|
"grad_norm": 0.21963287889957428, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5181, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 0.8308115543328748, |
|
"grad_norm": 0.2047349214553833, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5184, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.8321870701513068, |
|
"grad_norm": 0.1430223435163498, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5187, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 0.8335625859697386, |
|
"grad_norm": 0.2075473666191101, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5185, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.8349381017881705, |
|
"grad_norm": 0.22520440816879272, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5207, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 0.8363136176066025, |
|
"grad_norm": 0.2137775719165802, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5174, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.8376891334250344, |
|
"grad_norm": 0.1777603179216385, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5189, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 0.8390646492434664, |
|
"grad_norm": 0.13343022763729095, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5196, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.8404401650618982, |
|
"grad_norm": 0.223526269197464, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5201, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 0.8418156808803301, |
|
"grad_norm": 0.2005707323551178, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5182, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.8431911966987621, |
|
"grad_norm": 0.1620023101568222, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5194, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 0.844566712517194, |
|
"grad_norm": 0.1359826922416687, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5186, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.8459422283356258, |
|
"grad_norm": 0.23660969734191895, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5208, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 0.8473177441540578, |
|
"grad_norm": 0.22223958373069763, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5167, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.8486932599724897, |
|
"grad_norm": 0.22506959736347198, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5166, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 0.8500687757909215, |
|
"grad_norm": 0.20386451482772827, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5181, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.8514442916093535, |
|
"grad_norm": 0.21547478437423706, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5184, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 0.8528198074277854, |
|
"grad_norm": 0.2500711977481842, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5188, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.8541953232462174, |
|
"grad_norm": 0.17289701104164124, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5182, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 0.8555708390646493, |
|
"grad_norm": 0.24792905151844025, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5201, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.8569463548830811, |
|
"grad_norm": 0.16410884261131287, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5191, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 0.8583218707015131, |
|
"grad_norm": 0.20413684844970703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5207, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.859697386519945, |
|
"grad_norm": 0.1622382253408432, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5191, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 0.8610729023383769, |
|
"grad_norm": 0.19682924449443817, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5195, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.8624484181568088, |
|
"grad_norm": 0.17585939168930054, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5182, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 0.8638239339752407, |
|
"grad_norm": 0.3021407127380371, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5177, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.8651994497936726, |
|
"grad_norm": 0.25355300307273865, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5179, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 0.8665749656121046, |
|
"grad_norm": 0.19390764832496643, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5146, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.8679504814305364, |
|
"grad_norm": 0.14198362827301025, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5194, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 0.8693259972489684, |
|
"grad_norm": 0.21591129899024963, |
|
"learning_rate": 0.0001, |
|
"loss": 1.516, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.8707015130674003, |
|
"grad_norm": 0.142410010099411, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5164, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 0.8720770288858322, |
|
"grad_norm": 0.14241962134838104, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5144, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.8734525447042641, |
|
"grad_norm": 0.1909308135509491, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5182, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 0.874828060522696, |
|
"grad_norm": 0.1649756282567978, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5145, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.8762035763411279, |
|
"grad_norm": 0.26334628462791443, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5157, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 0.8775790921595599, |
|
"grad_norm": 0.1725001484155655, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5191, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.8789546079779917, |
|
"grad_norm": 0.18799418210983276, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5171, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 0.8803301237964236, |
|
"grad_norm": 0.15485192835330963, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5147, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.8817056396148556, |
|
"grad_norm": 0.13494554162025452, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5147, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 0.8830811554332875, |
|
"grad_norm": 0.22909484803676605, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5154, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.8844566712517193, |
|
"grad_norm": 0.2062431126832962, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5135, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 0.8858321870701513, |
|
"grad_norm": 0.17063121497631073, |
|
"learning_rate": 0.0001, |
|
"loss": 1.517, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.8872077028885832, |
|
"grad_norm": 0.1380726397037506, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5134, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 0.8885832187070152, |
|
"grad_norm": 0.18543638288974762, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5186, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.889958734525447, |
|
"grad_norm": 0.28441041707992554, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5179, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 0.8913342503438789, |
|
"grad_norm": 0.2097078114748001, |
|
"learning_rate": 0.0001, |
|
"loss": 1.518, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.8927097661623109, |
|
"grad_norm": 0.16976235806941986, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5147, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 0.8940852819807428, |
|
"grad_norm": 0.20023608207702637, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5209, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.8954607977991746, |
|
"grad_norm": 0.1981000006198883, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5161, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 0.8968363136176066, |
|
"grad_norm": 0.24770237505435944, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5145, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.8982118294360385, |
|
"grad_norm": 0.27108198404312134, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5157, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 0.8995873452544704, |
|
"grad_norm": 0.21742689609527588, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5176, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.9009628610729024, |
|
"grad_norm": 0.18256455659866333, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5153, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 0.9023383768913342, |
|
"grad_norm": 0.1812065690755844, |
|
"learning_rate": 0.0001, |
|
"loss": 1.517, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.9037138927097662, |
|
"grad_norm": 0.1624094694852829, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5184, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 0.9050894085281981, |
|
"grad_norm": 0.12931875884532928, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5187, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.90646492434663, |
|
"grad_norm": 0.15731951594352722, |
|
"learning_rate": 0.0001, |
|
"loss": 1.515, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 0.9078404401650619, |
|
"grad_norm": 0.2222890406847, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5167, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.9092159559834938, |
|
"grad_norm": 0.33150213956832886, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5166, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 0.9105914718019257, |
|
"grad_norm": 0.27547687292099, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5151, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.9119669876203577, |
|
"grad_norm": 0.1873897761106491, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5132, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 0.9133425034387895, |
|
"grad_norm": 0.1707950383424759, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5149, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.9147180192572214, |
|
"grad_norm": 0.1721598356962204, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5135, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 0.9160935350756534, |
|
"grad_norm": 0.31545665860176086, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5142, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.9174690508940853, |
|
"grad_norm": 0.19677673280239105, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5114, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 0.9188445667125172, |
|
"grad_norm": 0.19303210079669952, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5126, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.9202200825309491, |
|
"grad_norm": 0.14599211513996124, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5149, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 0.921595598349381, |
|
"grad_norm": 0.2020881623029709, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5169, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.922971114167813, |
|
"grad_norm": 0.1755484640598297, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5146, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 0.9243466299862448, |
|
"grad_norm": 0.15174026787281036, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5164, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.9257221458046767, |
|
"grad_norm": 0.21369625627994537, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5161, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 0.9270976616231087, |
|
"grad_norm": 0.23643817007541656, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5129, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.9284731774415406, |
|
"grad_norm": 0.22748377919197083, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5169, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 0.9298486932599724, |
|
"grad_norm": 0.24398982524871826, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5137, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.9312242090784044, |
|
"grad_norm": 0.16090893745422363, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5126, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 0.9325997248968363, |
|
"grad_norm": 0.1766052097082138, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5149, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.9339752407152683, |
|
"grad_norm": 0.15594764053821564, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5139, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 0.9353507565337001, |
|
"grad_norm": 0.22842876613140106, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5152, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.936726272352132, |
|
"grad_norm": 0.17382940649986267, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5138, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 0.938101788170564, |
|
"grad_norm": 0.19100262224674225, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5136, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.9394773039889959, |
|
"grad_norm": 0.13861484825611115, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5118, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 0.9408528198074277, |
|
"grad_norm": 0.22483597695827484, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5119, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.9422283356258597, |
|
"grad_norm": 0.20615430176258087, |
|
"learning_rate": 0.0001, |
|
"loss": 1.512, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 0.9436038514442916, |
|
"grad_norm": 0.18101869523525238, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5142, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.9449793672627235, |
|
"grad_norm": 0.19411496818065643, |
|
"learning_rate": 0.0001, |
|
"loss": 1.512, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 0.9463548830811555, |
|
"grad_norm": 0.2966468334197998, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5121, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.9477303988995873, |
|
"grad_norm": 0.2614442706108093, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5127, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 0.9491059147180193, |
|
"grad_norm": 0.3327767252922058, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5136, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.9504814305364512, |
|
"grad_norm": 0.1958717554807663, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5133, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 0.951856946354883, |
|
"grad_norm": 0.15711049735546112, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5121, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.953232462173315, |
|
"grad_norm": 0.2362435758113861, |
|
"learning_rate": 0.0001, |
|
"loss": 1.514, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 0.9546079779917469, |
|
"grad_norm": 0.17552147805690765, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5115, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.9559834938101788, |
|
"grad_norm": 0.16898372769355774, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5131, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 0.9573590096286108, |
|
"grad_norm": 0.18677185475826263, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5146, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.9587345254470426, |
|
"grad_norm": 0.1758512556552887, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5141, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 0.9601100412654745, |
|
"grad_norm": 0.18687918782234192, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5134, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.9614855570839065, |
|
"grad_norm": 0.2375195175409317, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5129, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 0.9628610729023384, |
|
"grad_norm": 0.24082688987255096, |
|
"learning_rate": 0.0001, |
|
"loss": 1.514, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.9642365887207703, |
|
"grad_norm": 0.2279283106327057, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5129, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 0.9656121045392022, |
|
"grad_norm": 0.267251193523407, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5139, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.9669876203576341, |
|
"grad_norm": 0.1902667135000229, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5127, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 0.9683631361760661, |
|
"grad_norm": 0.20134538412094116, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5137, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.9697386519944979, |
|
"grad_norm": 0.21791616082191467, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5148, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 0.9711141678129298, |
|
"grad_norm": 0.2014089673757553, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5135, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.9724896836313618, |
|
"grad_norm": 0.1704970896244049, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5148, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 0.9738651994497937, |
|
"grad_norm": 0.15112122893333435, |
|
"learning_rate": 0.0001, |
|
"loss": 1.512, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.9752407152682255, |
|
"grad_norm": 0.1649782657623291, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5107, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 0.9766162310866575, |
|
"grad_norm": 0.2087404876947403, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5149, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.9779917469050894, |
|
"grad_norm": 0.2056160867214203, |
|
"learning_rate": 0.0001, |
|
"loss": 1.511, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 0.9793672627235214, |
|
"grad_norm": 0.2275388538837433, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5147, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.9807427785419532, |
|
"grad_norm": 0.24389615654945374, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5122, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 0.9821182943603851, |
|
"grad_norm": 0.21413607895374298, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5119, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.9834938101788171, |
|
"grad_norm": 0.19716958701610565, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5127, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 0.984869325997249, |
|
"grad_norm": 0.22444148361682892, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5128, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.9862448418156808, |
|
"grad_norm": 0.15065211057662964, |
|
"learning_rate": 0.0001, |
|
"loss": 1.512, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 0.9876203576341128, |
|
"grad_norm": 0.3378779888153076, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5108, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.9889958734525447, |
|
"grad_norm": 0.17586860060691833, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5144, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 0.9903713892709766, |
|
"grad_norm": 0.270921915769577, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5142, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.9917469050894085, |
|
"grad_norm": 0.18357771635055542, |
|
"learning_rate": 0.0001, |
|
"loss": 1.513, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 0.9931224209078404, |
|
"grad_norm": 0.33356377482414246, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5129, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.9944979367262724, |
|
"grad_norm": 0.19254672527313232, |
|
"learning_rate": 0.0001, |
|
"loss": 1.511, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 0.9958734525447043, |
|
"grad_norm": 0.2596052289009094, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5113, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.9972489683631361, |
|
"grad_norm": 0.3195280432701111, |
|
"learning_rate": 0.0001, |
|
"loss": 1.514, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 0.9986244841815681, |
|
"grad_norm": 0.2321728765964508, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5121, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.2551921010017395, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5127, |
|
"step": 36350 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 36350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3730675995865063e+22, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|