|
{ |
|
"best_metric": 0.3308734893798828, |
|
"best_model_checkpoint": "results_mt5XLSum_augmented/checkpoint-13000", |
|
"epoch": 9.285714285714286, |
|
"eval_steps": 1000, |
|
"global_step": 13000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007142857142857143, |
|
"grad_norm": 10.661067962646484, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.8382, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014285714285714285, |
|
"grad_norm": 11.73471450805664, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.6838, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02142857142857143, |
|
"grad_norm": 13.098968505859375, |
|
"learning_rate": 3e-06, |
|
"loss": 1.86, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02857142857142857, |
|
"grad_norm": 10.79481315612793, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.7851, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03571428571428571, |
|
"grad_norm": 11.62800121307373, |
|
"learning_rate": 5e-06, |
|
"loss": 1.4648, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04285714285714286, |
|
"grad_norm": 9.00180721282959, |
|
"learning_rate": 6e-06, |
|
"loss": 1.4355, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.218201637268066, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 1.3977, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05714285714285714, |
|
"grad_norm": 7.159872531890869, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.9774, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06428571428571428, |
|
"grad_norm": 6.163649559020996, |
|
"learning_rate": 9e-06, |
|
"loss": 0.8556, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 4.800461292266846, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8627, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07857142857142857, |
|
"grad_norm": 4.373474597930908, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.7674, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08571428571428572, |
|
"grad_norm": 4.36292839050293, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.7035, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09285714285714286, |
|
"grad_norm": 4.634104251861572, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.9197, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.442883491516113, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.7712, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10714285714285714, |
|
"grad_norm": 3.7063419818878174, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.8602, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 3.7267696857452393, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.6758, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12142857142857143, |
|
"grad_norm": 3.7582225799560547, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.8091, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12857142857142856, |
|
"grad_norm": 2.829885482788086, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.8014, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1357142857142857, |
|
"grad_norm": 3.4555258750915527, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.5562, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 3.120464563369751, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6391, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.8185417652130127, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.6501, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15714285714285714, |
|
"grad_norm": 2.7110323905944824, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.9029, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16428571428571428, |
|
"grad_norm": 3.336864709854126, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.6938, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.17142857142857143, |
|
"grad_norm": 2.9769392013549805, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.6322, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 2.5426135063171387, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.752, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18571428571428572, |
|
"grad_norm": 3.2473714351654053, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.5993, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19285714285714287, |
|
"grad_norm": 2.9979186058044434, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.5928, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.1635003089904785, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.4335, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20714285714285716, |
|
"grad_norm": 4.114761829376221, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.5023, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 2.0567097663879395, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5124, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22142857142857142, |
|
"grad_norm": 3.0209622383117676, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.5092, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 1.8497462272644043, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.6075, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2357142857142857, |
|
"grad_norm": 1.6237268447875977, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.5343, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24285714285714285, |
|
"grad_norm": 2.9820289611816406, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.5516, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.676515817642212, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.6621, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2571428571428571, |
|
"grad_norm": 3.4376354217529297, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.4768, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2642857142857143, |
|
"grad_norm": 3.2355964183807373, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.6184, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2714285714285714, |
|
"grad_norm": 2.2971713542938232, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.7827, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2785714285714286, |
|
"grad_norm": 2.442052125930786, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.5901, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 2.3172521591186523, |
|
"learning_rate": 4e-05, |
|
"loss": 0.6067, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29285714285714287, |
|
"grad_norm": 2.06640887260437, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.6589, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.416149854660034, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.6489, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.30714285714285716, |
|
"grad_norm": 2.340235471725464, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.8339, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3142857142857143, |
|
"grad_norm": 1.9825040102005005, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.7415, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.32142857142857145, |
|
"grad_norm": 1.8823323249816895, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.611, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32857142857142857, |
|
"grad_norm": 2.3207123279571533, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.6172, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3357142857142857, |
|
"grad_norm": 1.2963736057281494, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.4114, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 2.191009044647217, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6251, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.1893374919891357, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.5966, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 2.214414596557617, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7495, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36428571428571427, |
|
"grad_norm": 1.8343987464904785, |
|
"learning_rate": 4.9962962962962964e-05, |
|
"loss": 0.6179, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.37142857142857144, |
|
"grad_norm": 2.675177574157715, |
|
"learning_rate": 4.9925925925925926e-05, |
|
"loss": 0.7262, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.37857142857142856, |
|
"grad_norm": 1.7133303880691528, |
|
"learning_rate": 4.9888888888888894e-05, |
|
"loss": 0.546, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38571428571428573, |
|
"grad_norm": 1.4926049709320068, |
|
"learning_rate": 4.9851851851851855e-05, |
|
"loss": 0.4057, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.39285714285714285, |
|
"grad_norm": 2.0434134006500244, |
|
"learning_rate": 4.981481481481482e-05, |
|
"loss": 1.014, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.037074089050293, |
|
"learning_rate": 4.977777777777778e-05, |
|
"loss": 0.6088, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.40714285714285714, |
|
"grad_norm": 2.6729607582092285, |
|
"learning_rate": 4.974074074074074e-05, |
|
"loss": 0.5986, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4142857142857143, |
|
"grad_norm": 1.8161852359771729, |
|
"learning_rate": 4.970370370370371e-05, |
|
"loss": 0.4681, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.42142857142857143, |
|
"grad_norm": 2.140554666519165, |
|
"learning_rate": 4.966666666666667e-05, |
|
"loss": 0.5865, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 2.3027639389038086, |
|
"learning_rate": 4.962962962962963e-05, |
|
"loss": 0.4166, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4357142857142857, |
|
"grad_norm": 2.973132371902466, |
|
"learning_rate": 4.959259259259259e-05, |
|
"loss": 0.6394, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.44285714285714284, |
|
"grad_norm": 2.898897886276245, |
|
"learning_rate": 4.955555555555556e-05, |
|
"loss": 0.5572, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.100752353668213, |
|
"learning_rate": 4.951851851851852e-05, |
|
"loss": 0.4788, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 2.4735984802246094, |
|
"learning_rate": 4.9481481481481485e-05, |
|
"loss": 0.756, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4642857142857143, |
|
"grad_norm": 1.5895243883132935, |
|
"learning_rate": 4.9444444444444446e-05, |
|
"loss": 0.5265, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4714285714285714, |
|
"grad_norm": 2.067650079727173, |
|
"learning_rate": 4.940740740740741e-05, |
|
"loss": 0.6079, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4785714285714286, |
|
"grad_norm": 1.6676874160766602, |
|
"learning_rate": 4.937037037037037e-05, |
|
"loss": 0.5196, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4857142857142857, |
|
"grad_norm": 1.6084502935409546, |
|
"learning_rate": 4.933333333333334e-05, |
|
"loss": 0.431, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4928571428571429, |
|
"grad_norm": 2.8858065605163574, |
|
"learning_rate": 4.92962962962963e-05, |
|
"loss": 0.5329, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.657158613204956, |
|
"learning_rate": 4.925925925925926e-05, |
|
"loss": 0.5092, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5071428571428571, |
|
"grad_norm": 2.636237144470215, |
|
"learning_rate": 4.922222222222222e-05, |
|
"loss": 0.4074, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5142857142857142, |
|
"grad_norm": 2.5960123538970947, |
|
"learning_rate": 4.918518518518519e-05, |
|
"loss": 0.424, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5214285714285715, |
|
"grad_norm": 1.7363989353179932, |
|
"learning_rate": 4.9148148148148145e-05, |
|
"loss": 0.7226, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5285714285714286, |
|
"grad_norm": 2.8367726802825928, |
|
"learning_rate": 4.9111111111111114e-05, |
|
"loss": 0.5535, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 2.1372838020324707, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 0.6546, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5428571428571428, |
|
"grad_norm": 1.9456530809402466, |
|
"learning_rate": 4.903703703703704e-05, |
|
"loss": 0.5718, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.146430015563965, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.486, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5571428571428572, |
|
"grad_norm": 1.633537769317627, |
|
"learning_rate": 4.896296296296297e-05, |
|
"loss": 0.4602, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5642857142857143, |
|
"grad_norm": 3.580615282058716, |
|
"learning_rate": 4.892592592592593e-05, |
|
"loss": 0.5991, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 2.354482889175415, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.639, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5785714285714286, |
|
"grad_norm": 1.701165795326233, |
|
"learning_rate": 4.885185185185185e-05, |
|
"loss": 0.4895, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5857142857142857, |
|
"grad_norm": 1.7530277967453003, |
|
"learning_rate": 4.881481481481482e-05, |
|
"loss": 0.5029, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5928571428571429, |
|
"grad_norm": 1.4377954006195068, |
|
"learning_rate": 4.8777777777777775e-05, |
|
"loss": 0.4668, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.9733954668045044, |
|
"learning_rate": 4.874074074074074e-05, |
|
"loss": 0.6434, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6071428571428571, |
|
"grad_norm": 1.5659862756729126, |
|
"learning_rate": 4.8703703703703704e-05, |
|
"loss": 0.4719, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6142857142857143, |
|
"grad_norm": 1.9549959897994995, |
|
"learning_rate": 4.866666666666667e-05, |
|
"loss": 0.5003, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6214285714285714, |
|
"grad_norm": 2.0998220443725586, |
|
"learning_rate": 4.862962962962963e-05, |
|
"loss": 0.4666, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6285714285714286, |
|
"grad_norm": 1.6551822423934937, |
|
"learning_rate": 4.8592592592592596e-05, |
|
"loss": 0.5508, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6357142857142857, |
|
"grad_norm": 2.268826723098755, |
|
"learning_rate": 4.855555555555556e-05, |
|
"loss": 0.5333, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 2.170297861099243, |
|
"learning_rate": 4.851851851851852e-05, |
|
"loss": 0.4724, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.3737900257110596, |
|
"learning_rate": 4.848148148148148e-05, |
|
"loss": 0.5938, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6571428571428571, |
|
"grad_norm": 1.5697389841079712, |
|
"learning_rate": 4.844444444444445e-05, |
|
"loss": 0.357, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6642857142857143, |
|
"grad_norm": 1.4354273080825806, |
|
"learning_rate": 4.840740740740741e-05, |
|
"loss": 0.2648, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6714285714285714, |
|
"grad_norm": 1.1631938219070435, |
|
"learning_rate": 4.837037037037037e-05, |
|
"loss": 0.4647, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6785714285714286, |
|
"grad_norm": 2.594999313354492, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.6831, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 2.2979557514190674, |
|
"learning_rate": 4.82962962962963e-05, |
|
"loss": 0.4363, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6928571428571428, |
|
"grad_norm": 3.0777416229248047, |
|
"learning_rate": 4.825925925925926e-05, |
|
"loss": 0.5995, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.430807113647461, |
|
"learning_rate": 4.8222222222222225e-05, |
|
"loss": 0.6433, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7071428571428572, |
|
"grad_norm": 1.7465846538543701, |
|
"learning_rate": 4.818518518518519e-05, |
|
"loss": 0.4973, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 2.805053472518921, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.556, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"eval_loss": 0.4978465139865875, |
|
"eval_rouge1": 0.8844, |
|
"eval_rouge2": 0.8183, |
|
"eval_rougeL": 0.8811, |
|
"eval_runtime": 122.1438, |
|
"eval_samples_per_second": 11.462, |
|
"eval_steps_per_second": 5.731, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7214285714285714, |
|
"grad_norm": 1.6127879619598389, |
|
"learning_rate": 4.811111111111111e-05, |
|
"loss": 0.5748, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7285714285714285, |
|
"grad_norm": 1.1071356534957886, |
|
"learning_rate": 4.807407407407408e-05, |
|
"loss": 0.5246, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7357142857142858, |
|
"grad_norm": 1.9362713098526, |
|
"learning_rate": 4.803703703703704e-05, |
|
"loss": 0.563, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7428571428571429, |
|
"grad_norm": 2.037553548812866, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.466, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.196617841720581, |
|
"learning_rate": 4.796296296296296e-05, |
|
"loss": 0.5647, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7571428571428571, |
|
"grad_norm": 1.278428077697754, |
|
"learning_rate": 4.792592592592593e-05, |
|
"loss": 0.4821, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7642857142857142, |
|
"grad_norm": 1.3506104946136475, |
|
"learning_rate": 4.7888888888888886e-05, |
|
"loss": 0.5194, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7714285714285715, |
|
"grad_norm": 2.3870656490325928, |
|
"learning_rate": 4.7851851851851854e-05, |
|
"loss": 0.7373, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7785714285714286, |
|
"grad_norm": 2.071242094039917, |
|
"learning_rate": 4.7814814814814816e-05, |
|
"loss": 0.5598, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 1.8460086584091187, |
|
"learning_rate": 4.7777777777777784e-05, |
|
"loss": 0.6184, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7928571428571428, |
|
"grad_norm": 3.804724931716919, |
|
"learning_rate": 4.774074074074074e-05, |
|
"loss": 0.5978, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.614772081375122, |
|
"learning_rate": 4.770370370370371e-05, |
|
"loss": 0.6203, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8071428571428572, |
|
"grad_norm": 2.068122386932373, |
|
"learning_rate": 4.766666666666667e-05, |
|
"loss": 0.6149, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8142857142857143, |
|
"grad_norm": 1.675881266593933, |
|
"learning_rate": 4.762962962962963e-05, |
|
"loss": 0.4437, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8214285714285714, |
|
"grad_norm": 1.865435004234314, |
|
"learning_rate": 4.759259259259259e-05, |
|
"loss": 0.5166, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8285714285714286, |
|
"grad_norm": 1.0480509996414185, |
|
"learning_rate": 4.755555555555556e-05, |
|
"loss": 0.3827, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8357142857142857, |
|
"grad_norm": 2.218554735183716, |
|
"learning_rate": 4.751851851851852e-05, |
|
"loss": 0.6641, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8428571428571429, |
|
"grad_norm": 2.510831832885742, |
|
"learning_rate": 4.7481481481481483e-05, |
|
"loss": 0.496, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.8328824043273926, |
|
"learning_rate": 4.7444444444444445e-05, |
|
"loss": 0.5466, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.5480728149414062, |
|
"learning_rate": 4.740740740740741e-05, |
|
"loss": 0.4992, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8642857142857143, |
|
"grad_norm": 1.3723492622375488, |
|
"learning_rate": 4.737037037037037e-05, |
|
"loss": 0.5014, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8714285714285714, |
|
"grad_norm": 1.7510666847229004, |
|
"learning_rate": 4.7333333333333336e-05, |
|
"loss": 0.5471, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8785714285714286, |
|
"grad_norm": 2.2057995796203613, |
|
"learning_rate": 4.72962962962963e-05, |
|
"loss": 0.6142, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8857142857142857, |
|
"grad_norm": 1.7922954559326172, |
|
"learning_rate": 4.7259259259259266e-05, |
|
"loss": 0.5199, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 1.9541053771972656, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.44, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.2869590520858765, |
|
"learning_rate": 4.718518518518519e-05, |
|
"loss": 0.5157, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9071428571428571, |
|
"grad_norm": 1.7564722299575806, |
|
"learning_rate": 4.714814814814815e-05, |
|
"loss": 0.4985, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 1.0782675743103027, |
|
"learning_rate": 4.711111111111111e-05, |
|
"loss": 0.3195, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9214285714285714, |
|
"grad_norm": 1.7535449266433716, |
|
"learning_rate": 4.7074074074074074e-05, |
|
"loss": 0.376, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 1.581485629081726, |
|
"learning_rate": 4.703703703703704e-05, |
|
"loss": 0.5975, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9357142857142857, |
|
"grad_norm": 2.739900827407837, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.457, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9428571428571428, |
|
"grad_norm": 2.382187604904175, |
|
"learning_rate": 4.6962962962962966e-05, |
|
"loss": 0.5424, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.75946843624115, |
|
"learning_rate": 4.692592592592593e-05, |
|
"loss": 0.3563, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9571428571428572, |
|
"grad_norm": 1.8159079551696777, |
|
"learning_rate": 4.6888888888888895e-05, |
|
"loss": 0.4502, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9642857142857143, |
|
"grad_norm": 2.605283260345459, |
|
"learning_rate": 4.685185185185185e-05, |
|
"loss": 0.4779, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9714285714285714, |
|
"grad_norm": 2.594231605529785, |
|
"learning_rate": 4.681481481481482e-05, |
|
"loss": 0.4901, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9785714285714285, |
|
"grad_norm": 2.109367609024048, |
|
"learning_rate": 4.677777777777778e-05, |
|
"loss": 0.5378, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9857142857142858, |
|
"grad_norm": 1.960496425628662, |
|
"learning_rate": 4.674074074074074e-05, |
|
"loss": 0.6129, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.9928571428571429, |
|
"grad_norm": 3.4135870933532715, |
|
"learning_rate": 4.67037037037037e-05, |
|
"loss": 0.7069, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.441308617591858, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.4686, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.0071428571428571, |
|
"grad_norm": 1.9842432737350464, |
|
"learning_rate": 4.662962962962963e-05, |
|
"loss": 0.604, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0142857142857142, |
|
"grad_norm": 1.3867950439453125, |
|
"learning_rate": 4.6592592592592595e-05, |
|
"loss": 0.4168, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0214285714285714, |
|
"grad_norm": 2.118037462234497, |
|
"learning_rate": 4.6555555555555556e-05, |
|
"loss": 0.6484, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.0285714285714285, |
|
"grad_norm": 1.4064522981643677, |
|
"learning_rate": 4.6518518518518525e-05, |
|
"loss": 0.5275, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0357142857142858, |
|
"grad_norm": 2.644491672515869, |
|
"learning_rate": 4.648148148148148e-05, |
|
"loss": 0.5361, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.042857142857143, |
|
"grad_norm": 1.4005937576293945, |
|
"learning_rate": 4.644444444444445e-05, |
|
"loss": 0.4497, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.773334264755249, |
|
"learning_rate": 4.640740740740741e-05, |
|
"loss": 0.4372, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0571428571428572, |
|
"grad_norm": 2.1667587757110596, |
|
"learning_rate": 4.637037037037038e-05, |
|
"loss": 0.5211, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0642857142857143, |
|
"grad_norm": 1.1993277072906494, |
|
"learning_rate": 4.633333333333333e-05, |
|
"loss": 0.3694, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 1.5526480674743652, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.686, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0785714285714285, |
|
"grad_norm": 1.5041449069976807, |
|
"learning_rate": 4.625925925925926e-05, |
|
"loss": 0.4536, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0857142857142856, |
|
"grad_norm": 1.719254970550537, |
|
"learning_rate": 4.6222222222222224e-05, |
|
"loss": 0.4687, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.092857142857143, |
|
"grad_norm": 1.9565083980560303, |
|
"learning_rate": 4.6185185185185185e-05, |
|
"loss": 0.4054, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.2271467447280884, |
|
"learning_rate": 4.6148148148148154e-05, |
|
"loss": 0.4189, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.1071428571428572, |
|
"grad_norm": 1.731244683265686, |
|
"learning_rate": 4.6111111111111115e-05, |
|
"loss": 0.4519, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1142857142857143, |
|
"grad_norm": 1.3039075136184692, |
|
"learning_rate": 4.607407407407408e-05, |
|
"loss": 0.3911, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.1214285714285714, |
|
"grad_norm": 1.3420417308807373, |
|
"learning_rate": 4.603703703703704e-05, |
|
"loss": 0.4239, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.1285714285714286, |
|
"grad_norm": 2.2307205200195312, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.4675, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.1357142857142857, |
|
"grad_norm": 2.384147882461548, |
|
"learning_rate": 4.596296296296296e-05, |
|
"loss": 0.3963, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 1.6016713380813599, |
|
"learning_rate": 4.592592592592593e-05, |
|
"loss": 0.4561, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.4093197584152222, |
|
"learning_rate": 4.588888888888889e-05, |
|
"loss": 0.4708, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.157142857142857, |
|
"grad_norm": 1.9773272275924683, |
|
"learning_rate": 4.585185185185185e-05, |
|
"loss": 0.5259, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1642857142857144, |
|
"grad_norm": 1.169757604598999, |
|
"learning_rate": 4.5814814814814815e-05, |
|
"loss": 0.3413, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.1714285714285715, |
|
"grad_norm": 2.1033947467803955, |
|
"learning_rate": 4.577777777777778e-05, |
|
"loss": 0.4888, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1785714285714286, |
|
"grad_norm": 1.2455283403396606, |
|
"learning_rate": 4.5740740740740745e-05, |
|
"loss": 0.5935, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.1857142857142857, |
|
"grad_norm": 1.283308982849121, |
|
"learning_rate": 4.5703703703703706e-05, |
|
"loss": 0.3946, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.1928571428571428, |
|
"grad_norm": 1.9639955759048462, |
|
"learning_rate": 4.566666666666667e-05, |
|
"loss": 0.519, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.9380689263343811, |
|
"learning_rate": 4.5629629629629636e-05, |
|
"loss": 0.3357, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.207142857142857, |
|
"grad_norm": 2.330310344696045, |
|
"learning_rate": 4.559259259259259e-05, |
|
"loss": 0.5135, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.2142857142857142, |
|
"grad_norm": 1.5911920070648193, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 0.4165, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2214285714285715, |
|
"grad_norm": 1.7522234916687012, |
|
"learning_rate": 4.551851851851852e-05, |
|
"loss": 0.5797, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.2285714285714286, |
|
"grad_norm": 2.265571355819702, |
|
"learning_rate": 4.548148148148149e-05, |
|
"loss": 0.3943, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.2357142857142858, |
|
"grad_norm": 2.530675172805786, |
|
"learning_rate": 4.5444444444444444e-05, |
|
"loss": 0.6279, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.2428571428571429, |
|
"grad_norm": 2.072864055633545, |
|
"learning_rate": 4.540740740740741e-05, |
|
"loss": 0.512, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.5505369901657104, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 0.3494, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2571428571428571, |
|
"grad_norm": 1.9888116121292114, |
|
"learning_rate": 4.5333333333333335e-05, |
|
"loss": 0.5841, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2642857142857142, |
|
"grad_norm": 1.6056774854660034, |
|
"learning_rate": 4.52962962962963e-05, |
|
"loss": 0.5611, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.2714285714285714, |
|
"grad_norm": 1.7950221300125122, |
|
"learning_rate": 4.5259259259259265e-05, |
|
"loss": 0.6097, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2785714285714285, |
|
"grad_norm": 1.8906399011611938, |
|
"learning_rate": 4.522222222222223e-05, |
|
"loss": 0.4837, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 1.3988184928894043, |
|
"learning_rate": 4.518518518518519e-05, |
|
"loss": 0.4806, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.292857142857143, |
|
"grad_norm": 1.160243272781372, |
|
"learning_rate": 4.514814814814815e-05, |
|
"loss": 0.5224, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.0152113437652588, |
|
"learning_rate": 4.511111111111112e-05, |
|
"loss": 0.4115, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.3071428571428572, |
|
"grad_norm": 1.6176999807357788, |
|
"learning_rate": 4.507407407407407e-05, |
|
"loss": 0.4458, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3142857142857143, |
|
"grad_norm": 1.904784917831421, |
|
"learning_rate": 4.503703703703704e-05, |
|
"loss": 0.5552, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.3214285714285714, |
|
"grad_norm": 1.0539710521697998, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.438, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.3285714285714285, |
|
"grad_norm": 1.3552178144454956, |
|
"learning_rate": 4.496296296296297e-05, |
|
"loss": 0.2862, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.3357142857142856, |
|
"grad_norm": 1.3787767887115479, |
|
"learning_rate": 4.4925925925925926e-05, |
|
"loss": 0.5173, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.342857142857143, |
|
"grad_norm": 2.570422649383545, |
|
"learning_rate": 4.4888888888888894e-05, |
|
"loss": 0.4581, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.5974104404449463, |
|
"learning_rate": 4.4851851851851856e-05, |
|
"loss": 0.4599, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.3571428571428572, |
|
"grad_norm": 1.4105775356292725, |
|
"learning_rate": 4.481481481481482e-05, |
|
"loss": 0.3823, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3642857142857143, |
|
"grad_norm": 2.1751532554626465, |
|
"learning_rate": 4.477777777777778e-05, |
|
"loss": 0.4421, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.3714285714285714, |
|
"grad_norm": 1.9956297874450684, |
|
"learning_rate": 4.474074074074075e-05, |
|
"loss": 0.4082, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.3785714285714286, |
|
"grad_norm": 1.6159803867340088, |
|
"learning_rate": 4.47037037037037e-05, |
|
"loss": 0.3961, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.3857142857142857, |
|
"grad_norm": 1.4909430742263794, |
|
"learning_rate": 4.466666666666667e-05, |
|
"loss": 0.4635, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.3928571428571428, |
|
"grad_norm": 1.5630055665969849, |
|
"learning_rate": 4.462962962962963e-05, |
|
"loss": 0.5968, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.2496933937072754, |
|
"learning_rate": 4.4592592592592594e-05, |
|
"loss": 0.4546, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.407142857142857, |
|
"grad_norm": 1.6497224569320679, |
|
"learning_rate": 4.4555555555555555e-05, |
|
"loss": 0.354, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.4142857142857144, |
|
"grad_norm": 2.069955587387085, |
|
"learning_rate": 4.4518518518518523e-05, |
|
"loss": 0.4388, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.4214285714285715, |
|
"grad_norm": 1.6338075399398804, |
|
"learning_rate": 4.4481481481481485e-05, |
|
"loss": 0.5459, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 1.3558902740478516, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.3139, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"eval_loss": 0.43829917907714844, |
|
"eval_rouge1": 0.8911, |
|
"eval_rouge2": 0.8251, |
|
"eval_rougeL": 0.8873, |
|
"eval_runtime": 121.8873, |
|
"eval_samples_per_second": 11.486, |
|
"eval_steps_per_second": 5.743, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4357142857142857, |
|
"grad_norm": 1.8123821020126343, |
|
"learning_rate": 4.440740740740741e-05, |
|
"loss": 0.5864, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.4428571428571428, |
|
"grad_norm": 0.8494770526885986, |
|
"learning_rate": 4.4370370370370376e-05, |
|
"loss": 0.3284, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 2.2536141872406006, |
|
"learning_rate": 4.433333333333334e-05, |
|
"loss": 0.3738, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.457142857142857, |
|
"grad_norm": 2.971925735473633, |
|
"learning_rate": 4.42962962962963e-05, |
|
"loss": 0.5294, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.4642857142857144, |
|
"grad_norm": 1.7820425033569336, |
|
"learning_rate": 4.425925925925926e-05, |
|
"loss": 0.505, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.4714285714285715, |
|
"grad_norm": 1.196044683456421, |
|
"learning_rate": 4.422222222222222e-05, |
|
"loss": 0.3921, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4785714285714286, |
|
"grad_norm": 0.9053621888160706, |
|
"learning_rate": 4.4185185185185184e-05, |
|
"loss": 0.2386, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.4857142857142858, |
|
"grad_norm": 1.8388108015060425, |
|
"learning_rate": 4.414814814814815e-05, |
|
"loss": 0.4309, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.4928571428571429, |
|
"grad_norm": 2.25136137008667, |
|
"learning_rate": 4.4111111111111114e-05, |
|
"loss": 0.3918, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.960864782333374, |
|
"learning_rate": 4.4074074074074076e-05, |
|
"loss": 0.4754, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.5071428571428571, |
|
"grad_norm": 2.4653213024139404, |
|
"learning_rate": 4.403703703703704e-05, |
|
"loss": 0.4545, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.5142857142857142, |
|
"grad_norm": 1.8694462776184082, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.3707, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.5214285714285714, |
|
"grad_norm": 2.240447521209717, |
|
"learning_rate": 4.396296296296297e-05, |
|
"loss": 0.5174, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.5285714285714285, |
|
"grad_norm": 0.8589600920677185, |
|
"learning_rate": 4.392592592592593e-05, |
|
"loss": 0.3802, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.5357142857142856, |
|
"grad_norm": 2.495075225830078, |
|
"learning_rate": 4.388888888888889e-05, |
|
"loss": 0.4735, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.5428571428571427, |
|
"grad_norm": 1.5384397506713867, |
|
"learning_rate": 4.385185185185185e-05, |
|
"loss": 0.4844, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.2120758295059204, |
|
"learning_rate": 4.381481481481482e-05, |
|
"loss": 0.3029, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5571428571428572, |
|
"grad_norm": 2.0210671424865723, |
|
"learning_rate": 4.377777777777778e-05, |
|
"loss": 0.688, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.5642857142857143, |
|
"grad_norm": 2.322322368621826, |
|
"learning_rate": 4.374074074074074e-05, |
|
"loss": 0.4673, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 1.8948960304260254, |
|
"learning_rate": 4.3703703703703705e-05, |
|
"loss": 0.3698, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5785714285714287, |
|
"grad_norm": 1.776141881942749, |
|
"learning_rate": 4.3666666666666666e-05, |
|
"loss": 0.3611, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.5857142857142859, |
|
"grad_norm": 2.8628015518188477, |
|
"learning_rate": 4.3629629629629635e-05, |
|
"loss": 0.4504, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.592857142857143, |
|
"grad_norm": 1.8579275608062744, |
|
"learning_rate": 4.3592592592592596e-05, |
|
"loss": 0.5131, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.1070181131362915, |
|
"learning_rate": 4.355555555555556e-05, |
|
"loss": 0.4187, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"grad_norm": 1.3833059072494507, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 0.4301, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6142857142857143, |
|
"grad_norm": 1.6870567798614502, |
|
"learning_rate": 4.348148148148148e-05, |
|
"loss": 0.5542, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.6214285714285714, |
|
"grad_norm": 1.582582712173462, |
|
"learning_rate": 4.344444444444445e-05, |
|
"loss": 0.5192, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.6285714285714286, |
|
"grad_norm": 3.3700509071350098, |
|
"learning_rate": 4.340740740740741e-05, |
|
"loss": 0.3268, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.6357142857142857, |
|
"grad_norm": 3.0057899951934814, |
|
"learning_rate": 4.337037037037037e-05, |
|
"loss": 0.3787, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.6428571428571428, |
|
"grad_norm": 1.302416205406189, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.4793, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 2.42720103263855, |
|
"learning_rate": 4.3296296296296296e-05, |
|
"loss": 0.4124, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.657142857142857, |
|
"grad_norm": 1.455609917640686, |
|
"learning_rate": 4.325925925925926e-05, |
|
"loss": 0.272, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.6642857142857141, |
|
"grad_norm": 2.1332924365997314, |
|
"learning_rate": 4.3222222222222226e-05, |
|
"loss": 0.4859, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.6714285714285713, |
|
"grad_norm": 1.977156162261963, |
|
"learning_rate": 4.318518518518519e-05, |
|
"loss": 0.4017, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.6785714285714286, |
|
"grad_norm": 1.7197158336639404, |
|
"learning_rate": 4.314814814814815e-05, |
|
"loss": 0.376, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.6857142857142857, |
|
"grad_norm": 0.8615891933441162, |
|
"learning_rate": 4.311111111111111e-05, |
|
"loss": 0.3383, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.6928571428571428, |
|
"grad_norm": 1.2501980066299438, |
|
"learning_rate": 4.307407407407408e-05, |
|
"loss": 0.3999, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.7977019548416138, |
|
"learning_rate": 4.303703703703704e-05, |
|
"loss": 0.3424, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.7071428571428573, |
|
"grad_norm": 2.265807867050171, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.4345, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 2.348353624343872, |
|
"learning_rate": 4.296296296296296e-05, |
|
"loss": 0.4395, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7214285714285715, |
|
"grad_norm": 2.585843801498413, |
|
"learning_rate": 4.292592592592593e-05, |
|
"loss": 0.5208, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.7285714285714286, |
|
"grad_norm": 1.1487417221069336, |
|
"learning_rate": 4.2888888888888886e-05, |
|
"loss": 0.4635, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.7357142857142858, |
|
"grad_norm": 1.206634521484375, |
|
"learning_rate": 4.2851851851851855e-05, |
|
"loss": 0.3521, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.7428571428571429, |
|
"grad_norm": 2.136702299118042, |
|
"learning_rate": 4.2814814814814816e-05, |
|
"loss": 0.4582, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.2831017971038818, |
|
"learning_rate": 4.277777777777778e-05, |
|
"loss": 0.397, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7571428571428571, |
|
"grad_norm": 2.313405990600586, |
|
"learning_rate": 4.274074074074074e-05, |
|
"loss": 0.431, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.7642857142857142, |
|
"grad_norm": 1.8922353982925415, |
|
"learning_rate": 4.270370370370371e-05, |
|
"loss": 0.4396, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7714285714285714, |
|
"grad_norm": 1.735303521156311, |
|
"learning_rate": 4.266666666666667e-05, |
|
"loss": 0.4019, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7785714285714285, |
|
"grad_norm": 1.1989376544952393, |
|
"learning_rate": 4.262962962962963e-05, |
|
"loss": 0.3317, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 1.709370732307434, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.4721, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.7928571428571427, |
|
"grad_norm": 1.3655775785446167, |
|
"learning_rate": 4.255555555555556e-05, |
|
"loss": 0.464, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.2292691469192505, |
|
"learning_rate": 4.2518518518518515e-05, |
|
"loss": 0.3765, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.8071428571428572, |
|
"grad_norm": 2.6490797996520996, |
|
"learning_rate": 4.2481481481481484e-05, |
|
"loss": 0.4989, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.8142857142857143, |
|
"grad_norm": 1.8564647436141968, |
|
"learning_rate": 4.2444444444444445e-05, |
|
"loss": 0.3776, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.8214285714285714, |
|
"grad_norm": 1.9681627750396729, |
|
"learning_rate": 4.240740740740741e-05, |
|
"loss": 0.4717, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.8285714285714287, |
|
"grad_norm": 2.1326770782470703, |
|
"learning_rate": 4.237037037037037e-05, |
|
"loss": 0.3212, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.8357142857142859, |
|
"grad_norm": 1.8122767210006714, |
|
"learning_rate": 4.233333333333334e-05, |
|
"loss": 0.3619, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.842857142857143, |
|
"grad_norm": 1.4822399616241455, |
|
"learning_rate": 4.22962962962963e-05, |
|
"loss": 0.4631, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 2.278700828552246, |
|
"learning_rate": 4.225925925925926e-05, |
|
"loss": 0.3018, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 2.3148486614227295, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.4084, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.8642857142857143, |
|
"grad_norm": 1.5277279615402222, |
|
"learning_rate": 4.218518518518519e-05, |
|
"loss": 0.5295, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.8714285714285714, |
|
"grad_norm": 1.3603259325027466, |
|
"learning_rate": 4.2148148148148145e-05, |
|
"loss": 0.4727, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8785714285714286, |
|
"grad_norm": 1.9577744007110596, |
|
"learning_rate": 4.211111111111111e-05, |
|
"loss": 0.5915, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.8857142857142857, |
|
"grad_norm": 1.0424437522888184, |
|
"learning_rate": 4.2074074074074075e-05, |
|
"loss": 0.3386, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.8928571428571428, |
|
"grad_norm": 2.7555553913116455, |
|
"learning_rate": 4.203703703703704e-05, |
|
"loss": 0.5003, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.9913907051086426, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.3875, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.907142857142857, |
|
"grad_norm": 1.8053233623504639, |
|
"learning_rate": 4.1962962962962966e-05, |
|
"loss": 0.2318, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.9142857142857141, |
|
"grad_norm": 1.7686558961868286, |
|
"learning_rate": 4.192592592592593e-05, |
|
"loss": 0.3772, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.9214285714285713, |
|
"grad_norm": 1.4202839136123657, |
|
"learning_rate": 4.188888888888889e-05, |
|
"loss": 0.3742, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.9285714285714286, |
|
"grad_norm": 2.7964282035827637, |
|
"learning_rate": 4.185185185185185e-05, |
|
"loss": 0.2901, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.9357142857142857, |
|
"grad_norm": 2.0525360107421875, |
|
"learning_rate": 4.181481481481482e-05, |
|
"loss": 0.3146, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.9428571428571428, |
|
"grad_norm": 1.937103033065796, |
|
"learning_rate": 4.177777777777778e-05, |
|
"loss": 0.3871, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 2.1534152030944824, |
|
"learning_rate": 4.174074074074074e-05, |
|
"loss": 0.5309, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.9571428571428573, |
|
"grad_norm": 1.60648512840271, |
|
"learning_rate": 4.1703703703703704e-05, |
|
"loss": 0.3859, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.9642857142857144, |
|
"grad_norm": 2.2782654762268066, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.3859, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9714285714285715, |
|
"grad_norm": 1.9134782552719116, |
|
"learning_rate": 4.162962962962963e-05, |
|
"loss": 0.565, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.9785714285714286, |
|
"grad_norm": 1.4029120206832886, |
|
"learning_rate": 4.1592592592592595e-05, |
|
"loss": 0.3373, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.9857142857142858, |
|
"grad_norm": 1.9651641845703125, |
|
"learning_rate": 4.155555555555556e-05, |
|
"loss": 0.3847, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.9928571428571429, |
|
"grad_norm": 1.4134501218795776, |
|
"learning_rate": 4.1518518518518525e-05, |
|
"loss": 0.4465, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.9682196378707886, |
|
"learning_rate": 4.148148148148148e-05, |
|
"loss": 0.545, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.007142857142857, |
|
"grad_norm": 2.259190559387207, |
|
"learning_rate": 4.144444444444445e-05, |
|
"loss": 0.3058, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.0142857142857142, |
|
"grad_norm": 1.6861268281936646, |
|
"learning_rate": 4.140740740740741e-05, |
|
"loss": 0.4567, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.0214285714285714, |
|
"grad_norm": 1.5168589353561401, |
|
"learning_rate": 4.137037037037037e-05, |
|
"loss": 0.5687, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.0285714285714285, |
|
"grad_norm": 1.1756591796875, |
|
"learning_rate": 4.133333333333333e-05, |
|
"loss": 0.4118, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.0357142857142856, |
|
"grad_norm": 1.4381860494613647, |
|
"learning_rate": 4.12962962962963e-05, |
|
"loss": 0.5473, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.0428571428571427, |
|
"grad_norm": 1.710028052330017, |
|
"learning_rate": 4.1259259259259256e-05, |
|
"loss": 0.3362, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 1.3261126279830933, |
|
"learning_rate": 4.1222222222222224e-05, |
|
"loss": 0.4497, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.057142857142857, |
|
"grad_norm": 1.5397872924804688, |
|
"learning_rate": 4.1185185185185186e-05, |
|
"loss": 0.3985, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.064285714285714, |
|
"grad_norm": 2.1463019847869873, |
|
"learning_rate": 4.1148148148148154e-05, |
|
"loss": 0.3423, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.0714285714285716, |
|
"grad_norm": 1.3202670812606812, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 0.3422, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.0785714285714287, |
|
"grad_norm": 1.6832393407821655, |
|
"learning_rate": 4.107407407407408e-05, |
|
"loss": 0.3243, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.085714285714286, |
|
"grad_norm": 1.7872291803359985, |
|
"learning_rate": 4.103703703703704e-05, |
|
"loss": 0.4452, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.092857142857143, |
|
"grad_norm": 2.649644613265991, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.4125, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 1.6862508058547974, |
|
"learning_rate": 4.096296296296296e-05, |
|
"loss": 0.3503, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.107142857142857, |
|
"grad_norm": 1.0415781736373901, |
|
"learning_rate": 4.092592592592593e-05, |
|
"loss": 0.2464, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.1142857142857143, |
|
"grad_norm": 1.3061981201171875, |
|
"learning_rate": 4.088888888888889e-05, |
|
"loss": 0.2232, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.1214285714285714, |
|
"grad_norm": 1.0442795753479004, |
|
"learning_rate": 4.0851851851851853e-05, |
|
"loss": 0.3512, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.1285714285714286, |
|
"grad_norm": 2.4381885528564453, |
|
"learning_rate": 4.0814814814814815e-05, |
|
"loss": 0.4678, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.1357142857142857, |
|
"grad_norm": 1.5145925283432007, |
|
"learning_rate": 4.0777777777777783e-05, |
|
"loss": 0.3655, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 3.514470338821411, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.3538, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"eval_loss": 0.42481884360313416, |
|
"eval_rouge1": 0.8943, |
|
"eval_rouge2": 0.8295, |
|
"eval_rougeL": 0.8911, |
|
"eval_runtime": 122.0137, |
|
"eval_samples_per_second": 11.474, |
|
"eval_steps_per_second": 5.737, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 1.9778640270233154, |
|
"learning_rate": 4.0703703703703707e-05, |
|
"loss": 0.4804, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.157142857142857, |
|
"grad_norm": 2.8497660160064697, |
|
"learning_rate": 4.066666666666667e-05, |
|
"loss": 0.523, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.164285714285714, |
|
"grad_norm": 1.317818284034729, |
|
"learning_rate": 4.0629629629629636e-05, |
|
"loss": 0.3694, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.1714285714285713, |
|
"grad_norm": 1.1630916595458984, |
|
"learning_rate": 4.059259259259259e-05, |
|
"loss": 0.3546, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.1785714285714284, |
|
"grad_norm": 2.114527940750122, |
|
"learning_rate": 4.055555555555556e-05, |
|
"loss": 0.4838, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.185714285714286, |
|
"grad_norm": 1.771263599395752, |
|
"learning_rate": 4.051851851851852e-05, |
|
"loss": 0.2938, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.192857142857143, |
|
"grad_norm": 3.463986396789551, |
|
"learning_rate": 4.048148148148148e-05, |
|
"loss": 0.455, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 2.023069381713867, |
|
"learning_rate": 4.0444444444444444e-05, |
|
"loss": 0.4449, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.2071428571428573, |
|
"grad_norm": 2.9855751991271973, |
|
"learning_rate": 4.040740740740741e-05, |
|
"loss": 0.5374, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.2142857142857144, |
|
"grad_norm": 2.422739267349243, |
|
"learning_rate": 4.0370370370370374e-05, |
|
"loss": 0.4203, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.2214285714285715, |
|
"grad_norm": 2.097543478012085, |
|
"learning_rate": 4.0333333333333336e-05, |
|
"loss": 0.364, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.2285714285714286, |
|
"grad_norm": 2.2496302127838135, |
|
"learning_rate": 4.02962962962963e-05, |
|
"loss": 0.4135, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.2357142857142858, |
|
"grad_norm": 2.3347012996673584, |
|
"learning_rate": 4.0259259259259266e-05, |
|
"loss": 0.4795, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.242857142857143, |
|
"grad_norm": 1.506218433380127, |
|
"learning_rate": 4.022222222222222e-05, |
|
"loss": 0.5228, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 1.160443663597107, |
|
"learning_rate": 4.018518518518519e-05, |
|
"loss": 0.3439, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.257142857142857, |
|
"grad_norm": 1.8678144216537476, |
|
"learning_rate": 4.014814814814815e-05, |
|
"loss": 0.404, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.2642857142857142, |
|
"grad_norm": 1.1315560340881348, |
|
"learning_rate": 4.011111111111111e-05, |
|
"loss": 0.3111, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.2714285714285714, |
|
"grad_norm": 1.8081461191177368, |
|
"learning_rate": 4.007407407407407e-05, |
|
"loss": 0.2717, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.2785714285714285, |
|
"grad_norm": 1.6636005640029907, |
|
"learning_rate": 4.003703703703704e-05, |
|
"loss": 0.3382, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 1.3334009647369385, |
|
"learning_rate": 4e-05, |
|
"loss": 0.38, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.2928571428571427, |
|
"grad_norm": 1.4873621463775635, |
|
"learning_rate": 3.9962962962962965e-05, |
|
"loss": 0.2979, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 1.17378568649292, |
|
"learning_rate": 3.9925925925925926e-05, |
|
"loss": 0.3628, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.307142857142857, |
|
"grad_norm": 1.3241777420043945, |
|
"learning_rate": 3.9888888888888895e-05, |
|
"loss": 0.3119, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.314285714285714, |
|
"grad_norm": 1.9823285341262817, |
|
"learning_rate": 3.985185185185185e-05, |
|
"loss": 0.4647, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.3214285714285716, |
|
"grad_norm": 1.6918193101882935, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 0.3695, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.3285714285714287, |
|
"grad_norm": 2.1902389526367188, |
|
"learning_rate": 3.977777777777778e-05, |
|
"loss": 0.2468, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.335714285714286, |
|
"grad_norm": 1.3570506572723389, |
|
"learning_rate": 3.974074074074075e-05, |
|
"loss": 0.3209, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.342857142857143, |
|
"grad_norm": 1.951711654663086, |
|
"learning_rate": 3.97037037037037e-05, |
|
"loss": 0.5175, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 1.741243839263916, |
|
"learning_rate": 3.966666666666667e-05, |
|
"loss": 0.2934, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.357142857142857, |
|
"grad_norm": 1.5889472961425781, |
|
"learning_rate": 3.962962962962963e-05, |
|
"loss": 0.3026, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.3642857142857143, |
|
"grad_norm": 1.4606213569641113, |
|
"learning_rate": 3.9592592592592594e-05, |
|
"loss": 0.4508, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.3714285714285714, |
|
"grad_norm": 1.5021477937698364, |
|
"learning_rate": 3.9555555555555556e-05, |
|
"loss": 0.2589, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.3785714285714286, |
|
"grad_norm": 1.8877885341644287, |
|
"learning_rate": 3.9518518518518524e-05, |
|
"loss": 0.4115, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.3857142857142857, |
|
"grad_norm": 1.809822678565979, |
|
"learning_rate": 3.9481481481481485e-05, |
|
"loss": 0.3844, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.392857142857143, |
|
"grad_norm": 1.2999638319015503, |
|
"learning_rate": 3.944444444444445e-05, |
|
"loss": 0.4028, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.4639837741851807, |
|
"learning_rate": 3.940740740740741e-05, |
|
"loss": 0.3551, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.407142857142857, |
|
"grad_norm": 1.1001754999160767, |
|
"learning_rate": 3.937037037037038e-05, |
|
"loss": 0.4001, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.414285714285714, |
|
"grad_norm": 2.272892713546753, |
|
"learning_rate": 3.933333333333333e-05, |
|
"loss": 0.3048, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.4214285714285713, |
|
"grad_norm": 2.085908889770508, |
|
"learning_rate": 3.92962962962963e-05, |
|
"loss": 0.5788, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.4285714285714284, |
|
"grad_norm": 1.317700743675232, |
|
"learning_rate": 3.925925925925926e-05, |
|
"loss": 0.2922, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.435714285714286, |
|
"grad_norm": 2.372558832168579, |
|
"learning_rate": 3.922222222222223e-05, |
|
"loss": 0.4581, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.442857142857143, |
|
"grad_norm": 1.3307292461395264, |
|
"learning_rate": 3.9185185185185185e-05, |
|
"loss": 0.4553, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 1.9228068590164185, |
|
"learning_rate": 3.914814814814815e-05, |
|
"loss": 0.497, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.4571428571428573, |
|
"grad_norm": 1.071590542793274, |
|
"learning_rate": 3.9111111111111115e-05, |
|
"loss": 0.4532, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.4642857142857144, |
|
"grad_norm": 1.9603391885757446, |
|
"learning_rate": 3.9074074074074076e-05, |
|
"loss": 0.3808, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.4714285714285715, |
|
"grad_norm": 1.2152074575424194, |
|
"learning_rate": 3.903703703703704e-05, |
|
"loss": 0.3011, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.4785714285714286, |
|
"grad_norm": 1.532478928565979, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.3403, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.4857142857142858, |
|
"grad_norm": 1.5086220502853394, |
|
"learning_rate": 3.896296296296296e-05, |
|
"loss": 0.4835, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.492857142857143, |
|
"grad_norm": 1.0601118803024292, |
|
"learning_rate": 3.892592592592593e-05, |
|
"loss": 0.4057, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.7907903790473938, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.3183, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.507142857142857, |
|
"grad_norm": 1.8523814678192139, |
|
"learning_rate": 3.885185185185186e-05, |
|
"loss": 0.4329, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.5142857142857142, |
|
"grad_norm": 1.9627383947372437, |
|
"learning_rate": 3.8814814814814814e-05, |
|
"loss": 0.3041, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.5214285714285714, |
|
"grad_norm": 0.6192536354064941, |
|
"learning_rate": 3.877777777777778e-05, |
|
"loss": 0.3271, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.5285714285714285, |
|
"grad_norm": 1.3901042938232422, |
|
"learning_rate": 3.8740740740740744e-05, |
|
"loss": 0.2562, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.5357142857142856, |
|
"grad_norm": 1.991752028465271, |
|
"learning_rate": 3.8703703703703705e-05, |
|
"loss": 0.4283, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.5428571428571427, |
|
"grad_norm": 1.219382882118225, |
|
"learning_rate": 3.866666666666667e-05, |
|
"loss": 0.4232, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 2.72744083404541, |
|
"learning_rate": 3.8629629629629635e-05, |
|
"loss": 0.3482, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.557142857142857, |
|
"grad_norm": 1.6782621145248413, |
|
"learning_rate": 3.85925925925926e-05, |
|
"loss": 0.3302, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.564285714285714, |
|
"grad_norm": 1.0238265991210938, |
|
"learning_rate": 3.855555555555556e-05, |
|
"loss": 0.458, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 2.212013006210327, |
|
"learning_rate": 3.851851851851852e-05, |
|
"loss": 0.4127, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.5785714285714287, |
|
"grad_norm": 1.5761399269104004, |
|
"learning_rate": 3.848148148148149e-05, |
|
"loss": 0.402, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.585714285714286, |
|
"grad_norm": 1.2036465406417847, |
|
"learning_rate": 3.844444444444444e-05, |
|
"loss": 0.5576, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.592857142857143, |
|
"grad_norm": 1.8674002885818481, |
|
"learning_rate": 3.840740740740741e-05, |
|
"loss": 0.364, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 1.466834545135498, |
|
"learning_rate": 3.837037037037037e-05, |
|
"loss": 0.3523, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.607142857142857, |
|
"grad_norm": 1.57899010181427, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.4046, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.6142857142857143, |
|
"grad_norm": 0.9730345010757446, |
|
"learning_rate": 3.8296296296296296e-05, |
|
"loss": 0.3132, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.6214285714285714, |
|
"grad_norm": 1.3017544746398926, |
|
"learning_rate": 3.8259259259259264e-05, |
|
"loss": 0.3023, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.6285714285714286, |
|
"grad_norm": 1.6368205547332764, |
|
"learning_rate": 3.8222222222222226e-05, |
|
"loss": 0.4363, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.6357142857142857, |
|
"grad_norm": 1.2852121591567993, |
|
"learning_rate": 3.818518518518519e-05, |
|
"loss": 0.2896, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.642857142857143, |
|
"grad_norm": 3.6991841793060303, |
|
"learning_rate": 3.814814814814815e-05, |
|
"loss": 0.353, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 2.70285701751709, |
|
"learning_rate": 3.811111111111112e-05, |
|
"loss": 0.4217, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.657142857142857, |
|
"grad_norm": 1.140811800956726, |
|
"learning_rate": 3.807407407407408e-05, |
|
"loss": 0.3253, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.664285714285714, |
|
"grad_norm": 1.2905789613723755, |
|
"learning_rate": 3.803703703703704e-05, |
|
"loss": 0.3051, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.6714285714285713, |
|
"grad_norm": 1.4326887130737305, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.3999, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"grad_norm": 1.1789475679397583, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 0.4631, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.685714285714286, |
|
"grad_norm": 2.0444328784942627, |
|
"learning_rate": 3.7925925925925925e-05, |
|
"loss": 0.4449, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.692857142857143, |
|
"grad_norm": 2.1025991439819336, |
|
"learning_rate": 3.7888888888888894e-05, |
|
"loss": 0.3513, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 1.8492026329040527, |
|
"learning_rate": 3.7851851851851855e-05, |
|
"loss": 0.4006, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.7071428571428573, |
|
"grad_norm": 1.3439162969589233, |
|
"learning_rate": 3.781481481481482e-05, |
|
"loss": 0.2806, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.7142857142857144, |
|
"grad_norm": 1.4200384616851807, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.3759, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.7214285714285715, |
|
"grad_norm": 1.9567861557006836, |
|
"learning_rate": 3.774074074074074e-05, |
|
"loss": 0.1772, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.7285714285714286, |
|
"grad_norm": 1.3466306924819946, |
|
"learning_rate": 3.770370370370371e-05, |
|
"loss": 0.399, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.7357142857142858, |
|
"grad_norm": 1.6046024560928345, |
|
"learning_rate": 3.766666666666667e-05, |
|
"loss": 0.347, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.742857142857143, |
|
"grad_norm": 2.190568447113037, |
|
"learning_rate": 3.762962962962963e-05, |
|
"loss": 0.3977, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 1.7715718746185303, |
|
"learning_rate": 3.759259259259259e-05, |
|
"loss": 0.4385, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.757142857142857, |
|
"grad_norm": 3.19500994682312, |
|
"learning_rate": 3.7555555555555554e-05, |
|
"loss": 0.3631, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.7642857142857142, |
|
"grad_norm": 2.2222607135772705, |
|
"learning_rate": 3.751851851851852e-05, |
|
"loss": 0.3565, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.7714285714285714, |
|
"grad_norm": 1.9959403276443481, |
|
"learning_rate": 3.7481481481481484e-05, |
|
"loss": 0.3629, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.7785714285714285, |
|
"grad_norm": 1.3207546472549438, |
|
"learning_rate": 3.7444444444444446e-05, |
|
"loss": 0.2911, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.7857142857142856, |
|
"grad_norm": 2.0290961265563965, |
|
"learning_rate": 3.740740740740741e-05, |
|
"loss": 0.3072, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.7928571428571427, |
|
"grad_norm": 1.3728725910186768, |
|
"learning_rate": 3.737037037037037e-05, |
|
"loss": 0.3858, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 2.541598320007324, |
|
"learning_rate": 3.733333333333334e-05, |
|
"loss": 0.3487, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.807142857142857, |
|
"grad_norm": 2.3327584266662598, |
|
"learning_rate": 3.72962962962963e-05, |
|
"loss": 0.3535, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.814285714285714, |
|
"grad_norm": 2.546766757965088, |
|
"learning_rate": 3.725925925925926e-05, |
|
"loss": 0.3462, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.821428571428571, |
|
"grad_norm": 2.351959705352783, |
|
"learning_rate": 3.722222222222222e-05, |
|
"loss": 0.2781, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.8285714285714287, |
|
"grad_norm": 1.9349900484085083, |
|
"learning_rate": 3.718518518518519e-05, |
|
"loss": 0.2442, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.835714285714286, |
|
"grad_norm": 2.2020022869110107, |
|
"learning_rate": 3.714814814814815e-05, |
|
"loss": 0.3396, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.842857142857143, |
|
"grad_norm": 1.5161465406417847, |
|
"learning_rate": 3.7111111111111113e-05, |
|
"loss": 0.3722, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 1.7403453588485718, |
|
"learning_rate": 3.7074074074074075e-05, |
|
"loss": 0.4227, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 1.9142546653747559, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.3259, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"eval_loss": 0.38673722743988037, |
|
"eval_rouge1": 0.8974, |
|
"eval_rouge2": 0.8331, |
|
"eval_rougeL": 0.8942, |
|
"eval_runtime": 122.1383, |
|
"eval_samples_per_second": 11.462, |
|
"eval_steps_per_second": 5.731, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.8642857142857143, |
|
"grad_norm": 1.5975255966186523, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.3732, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.8714285714285714, |
|
"grad_norm": 1.4830248355865479, |
|
"learning_rate": 3.6962962962962966e-05, |
|
"loss": 0.5093, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.8785714285714286, |
|
"grad_norm": 2.504650354385376, |
|
"learning_rate": 3.692592592592593e-05, |
|
"loss": 0.3302, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.8857142857142857, |
|
"grad_norm": 2.349452495574951, |
|
"learning_rate": 3.688888888888889e-05, |
|
"loss": 0.3596, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.892857142857143, |
|
"grad_norm": 1.398964762687683, |
|
"learning_rate": 3.685185185185185e-05, |
|
"loss": 0.3494, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 2.212738513946533, |
|
"learning_rate": 3.681481481481482e-05, |
|
"loss": 0.3691, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.907142857142857, |
|
"grad_norm": 2.20845627784729, |
|
"learning_rate": 3.677777777777778e-05, |
|
"loss": 0.2974, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.914285714285714, |
|
"grad_norm": 1.2226334810256958, |
|
"learning_rate": 3.674074074074074e-05, |
|
"loss": 0.3173, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.9214285714285713, |
|
"grad_norm": 2.2203428745269775, |
|
"learning_rate": 3.6703703703703704e-05, |
|
"loss": 0.4473, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.928571428571429, |
|
"grad_norm": 1.487853765487671, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.2653, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.935714285714286, |
|
"grad_norm": 1.6347614526748657, |
|
"learning_rate": 3.662962962962963e-05, |
|
"loss": 0.3563, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.942857142857143, |
|
"grad_norm": 2.2722184658050537, |
|
"learning_rate": 3.6592592592592596e-05, |
|
"loss": 0.4975, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 1.747530460357666, |
|
"learning_rate": 3.655555555555556e-05, |
|
"loss": 0.2357, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.9571428571428573, |
|
"grad_norm": 1.628596544265747, |
|
"learning_rate": 3.651851851851852e-05, |
|
"loss": 0.3674, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.9642857142857144, |
|
"grad_norm": 1.0486435890197754, |
|
"learning_rate": 3.648148148148148e-05, |
|
"loss": 0.3314, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.9714285714285715, |
|
"grad_norm": 2.523879289627075, |
|
"learning_rate": 3.644444444444445e-05, |
|
"loss": 0.4421, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.9785714285714286, |
|
"grad_norm": 1.4641958475112915, |
|
"learning_rate": 3.6407407407407403e-05, |
|
"loss": 0.4135, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.9857142857142858, |
|
"grad_norm": 2.672769784927368, |
|
"learning_rate": 3.637037037037037e-05, |
|
"loss": 0.3527, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.992857142857143, |
|
"grad_norm": 0.5795308351516724, |
|
"learning_rate": 3.633333333333333e-05, |
|
"loss": 0.2326, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.873579978942871, |
|
"learning_rate": 3.62962962962963e-05, |
|
"loss": 0.3679, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.007142857142857, |
|
"grad_norm": 1.7640775442123413, |
|
"learning_rate": 3.6259259259259256e-05, |
|
"loss": 0.4778, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 3.0142857142857142, |
|
"grad_norm": 1.9458075761795044, |
|
"learning_rate": 3.6222222222222225e-05, |
|
"loss": 0.4054, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 3.0214285714285714, |
|
"grad_norm": 1.1568126678466797, |
|
"learning_rate": 3.6185185185185186e-05, |
|
"loss": 0.2249, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 3.0285714285714285, |
|
"grad_norm": 1.3655381202697754, |
|
"learning_rate": 3.614814814814815e-05, |
|
"loss": 0.3993, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 3.0357142857142856, |
|
"grad_norm": 2.0403196811676025, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.3366, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.0428571428571427, |
|
"grad_norm": 1.9888697862625122, |
|
"learning_rate": 3.607407407407408e-05, |
|
"loss": 0.3033, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 1.3648616075515747, |
|
"learning_rate": 3.603703703703704e-05, |
|
"loss": 0.2874, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 3.057142857142857, |
|
"grad_norm": 2.602613925933838, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.4086, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 3.064285714285714, |
|
"grad_norm": 2.5918185710906982, |
|
"learning_rate": 3.596296296296296e-05, |
|
"loss": 0.393, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 3.0714285714285716, |
|
"grad_norm": 1.8195433616638184, |
|
"learning_rate": 3.592592592592593e-05, |
|
"loss": 0.3361, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.0785714285714287, |
|
"grad_norm": 1.8855136632919312, |
|
"learning_rate": 3.5888888888888886e-05, |
|
"loss": 0.3205, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 3.085714285714286, |
|
"grad_norm": 2.7412662506103516, |
|
"learning_rate": 3.5851851851851854e-05, |
|
"loss": 0.2659, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 3.092857142857143, |
|
"grad_norm": 1.880436658859253, |
|
"learning_rate": 3.5814814814814815e-05, |
|
"loss": 0.49, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 1.6828274726867676, |
|
"learning_rate": 3.577777777777778e-05, |
|
"loss": 0.2933, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 3.107142857142857, |
|
"grad_norm": 1.0517287254333496, |
|
"learning_rate": 3.574074074074074e-05, |
|
"loss": 0.3563, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.1142857142857143, |
|
"grad_norm": 1.3242154121398926, |
|
"learning_rate": 3.570370370370371e-05, |
|
"loss": 0.3765, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 3.1214285714285714, |
|
"grad_norm": 2.0899312496185303, |
|
"learning_rate": 3.566666666666667e-05, |
|
"loss": 0.3664, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 3.1285714285714286, |
|
"grad_norm": 2.0286014080047607, |
|
"learning_rate": 3.562962962962963e-05, |
|
"loss": 0.2622, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 3.1357142857142857, |
|
"grad_norm": 2.5074400901794434, |
|
"learning_rate": 3.559259259259259e-05, |
|
"loss": 0.321, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 3.142857142857143, |
|
"grad_norm": 1.4080287218093872, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 0.4035, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 1.923890471458435, |
|
"learning_rate": 3.5518518518518515e-05, |
|
"loss": 0.2775, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 3.157142857142857, |
|
"grad_norm": 0.806591272354126, |
|
"learning_rate": 3.548148148148148e-05, |
|
"loss": 0.3149, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 3.164285714285714, |
|
"grad_norm": 2.197736978530884, |
|
"learning_rate": 3.5444444444444445e-05, |
|
"loss": 0.4368, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 3.1714285714285713, |
|
"grad_norm": 1.6943881511688232, |
|
"learning_rate": 3.540740740740741e-05, |
|
"loss": 0.2793, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 3.1785714285714284, |
|
"grad_norm": 2.5460283756256104, |
|
"learning_rate": 3.537037037037037e-05, |
|
"loss": 0.4057, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.185714285714286, |
|
"grad_norm": 1.579908013343811, |
|
"learning_rate": 3.5333333333333336e-05, |
|
"loss": 0.3016, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 3.192857142857143, |
|
"grad_norm": 1.9137247800827026, |
|
"learning_rate": 3.52962962962963e-05, |
|
"loss": 0.3437, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 2.510328769683838, |
|
"learning_rate": 3.525925925925926e-05, |
|
"loss": 0.585, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 3.2071428571428573, |
|
"grad_norm": 0.9775506854057312, |
|
"learning_rate": 3.522222222222222e-05, |
|
"loss": 0.2651, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 3.2142857142857144, |
|
"grad_norm": 1.7614684104919434, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.3089, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.2214285714285715, |
|
"grad_norm": 1.9103621244430542, |
|
"learning_rate": 3.514814814814815e-05, |
|
"loss": 0.342, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 3.2285714285714286, |
|
"grad_norm": 1.4587639570236206, |
|
"learning_rate": 3.511111111111111e-05, |
|
"loss": 0.2592, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 3.2357142857142858, |
|
"grad_norm": 1.3419288396835327, |
|
"learning_rate": 3.5074074074074074e-05, |
|
"loss": 0.4185, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 3.242857142857143, |
|
"grad_norm": 1.6199047565460205, |
|
"learning_rate": 3.503703703703704e-05, |
|
"loss": 0.256, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 1.230350136756897, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3304, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.257142857142857, |
|
"grad_norm": 3.087888240814209, |
|
"learning_rate": 3.4962962962962965e-05, |
|
"loss": 0.3351, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 3.2642857142857142, |
|
"grad_norm": 1.4498260021209717, |
|
"learning_rate": 3.492592592592593e-05, |
|
"loss": 0.2753, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 3.2714285714285714, |
|
"grad_norm": 1.1032336950302124, |
|
"learning_rate": 3.4888888888888895e-05, |
|
"loss": 0.3709, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 3.2785714285714285, |
|
"grad_norm": 1.5177497863769531, |
|
"learning_rate": 3.485185185185185e-05, |
|
"loss": 0.276, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 3.2857142857142856, |
|
"grad_norm": 1.2596136331558228, |
|
"learning_rate": 3.481481481481482e-05, |
|
"loss": 0.3482, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.2928571428571427, |
|
"grad_norm": 1.9895663261413574, |
|
"learning_rate": 3.477777777777778e-05, |
|
"loss": 0.3738, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 1.2930881977081299, |
|
"learning_rate": 3.474074074074074e-05, |
|
"loss": 0.4263, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 3.307142857142857, |
|
"grad_norm": 2.276385545730591, |
|
"learning_rate": 3.47037037037037e-05, |
|
"loss": 0.2267, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 3.314285714285714, |
|
"grad_norm": 0.9766007661819458, |
|
"learning_rate": 3.466666666666667e-05, |
|
"loss": 0.2217, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 3.3214285714285716, |
|
"grad_norm": 1.5184674263000488, |
|
"learning_rate": 3.4629629629629626e-05, |
|
"loss": 0.2788, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.3285714285714287, |
|
"grad_norm": 1.5145732164382935, |
|
"learning_rate": 3.4592592592592594e-05, |
|
"loss": 0.3291, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 3.335714285714286, |
|
"grad_norm": 1.4273874759674072, |
|
"learning_rate": 3.4555555555555556e-05, |
|
"loss": 0.2854, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 3.342857142857143, |
|
"grad_norm": 2.783701181411743, |
|
"learning_rate": 3.4518518518518524e-05, |
|
"loss": 0.3518, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 1.3359688520431519, |
|
"learning_rate": 3.448148148148148e-05, |
|
"loss": 0.2239, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 3.357142857142857, |
|
"grad_norm": 2.246824264526367, |
|
"learning_rate": 3.444444444444445e-05, |
|
"loss": 0.3206, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.3642857142857143, |
|
"grad_norm": 1.7839916944503784, |
|
"learning_rate": 3.440740740740741e-05, |
|
"loss": 0.3189, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 3.3714285714285714, |
|
"grad_norm": 1.0196881294250488, |
|
"learning_rate": 3.437037037037037e-05, |
|
"loss": 0.2318, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 3.3785714285714286, |
|
"grad_norm": 2.228317975997925, |
|
"learning_rate": 3.433333333333333e-05, |
|
"loss": 0.4033, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 3.3857142857142857, |
|
"grad_norm": 2.0231473445892334, |
|
"learning_rate": 3.42962962962963e-05, |
|
"loss": 0.3854, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 3.392857142857143, |
|
"grad_norm": 2.074925422668457, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.3778, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 1.2508392333984375, |
|
"learning_rate": 3.4222222222222224e-05, |
|
"loss": 0.3299, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 3.407142857142857, |
|
"grad_norm": 1.0920076370239258, |
|
"learning_rate": 3.4185185185185185e-05, |
|
"loss": 0.3798, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 3.414285714285714, |
|
"grad_norm": 1.8113828897476196, |
|
"learning_rate": 3.4148148148148153e-05, |
|
"loss": 0.2903, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 3.4214285714285713, |
|
"grad_norm": 1.6218737363815308, |
|
"learning_rate": 3.411111111111111e-05, |
|
"loss": 0.2593, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 3.4285714285714284, |
|
"grad_norm": 1.0635234117507935, |
|
"learning_rate": 3.4074074074074077e-05, |
|
"loss": 0.4388, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.435714285714286, |
|
"grad_norm": 2.585700273513794, |
|
"learning_rate": 3.403703703703704e-05, |
|
"loss": 0.3368, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 3.442857142857143, |
|
"grad_norm": 1.0704694986343384, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.2196, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 1.3177589178085327, |
|
"learning_rate": 3.396296296296296e-05, |
|
"loss": 0.3104, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 3.4571428571428573, |
|
"grad_norm": 1.834241271018982, |
|
"learning_rate": 3.392592592592593e-05, |
|
"loss": 0.3413, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 3.4642857142857144, |
|
"grad_norm": 1.8859339952468872, |
|
"learning_rate": 3.388888888888889e-05, |
|
"loss": 0.2593, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.4714285714285715, |
|
"grad_norm": 1.452728271484375, |
|
"learning_rate": 3.385185185185185e-05, |
|
"loss": 0.3029, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 3.4785714285714286, |
|
"grad_norm": 2.170774221420288, |
|
"learning_rate": 3.3814814814814814e-05, |
|
"loss": 0.3372, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 3.4857142857142858, |
|
"grad_norm": 1.8695834875106812, |
|
"learning_rate": 3.377777777777778e-05, |
|
"loss": 0.3428, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 3.492857142857143, |
|
"grad_norm": 1.74647855758667, |
|
"learning_rate": 3.3740740740740744e-05, |
|
"loss": 0.3351, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 2.3349127769470215, |
|
"learning_rate": 3.3703703703703706e-05, |
|
"loss": 0.2733, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.507142857142857, |
|
"grad_norm": 2.73463773727417, |
|
"learning_rate": 3.366666666666667e-05, |
|
"loss": 0.2979, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 3.5142857142857142, |
|
"grad_norm": 1.3546210527420044, |
|
"learning_rate": 3.3629629629629636e-05, |
|
"loss": 0.3521, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 3.5214285714285714, |
|
"grad_norm": 1.617336630821228, |
|
"learning_rate": 3.359259259259259e-05, |
|
"loss": 0.2758, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 3.5285714285714285, |
|
"grad_norm": 2.998967409133911, |
|
"learning_rate": 3.355555555555556e-05, |
|
"loss": 0.4193, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 3.5357142857142856, |
|
"grad_norm": 1.8004390001296997, |
|
"learning_rate": 3.351851851851852e-05, |
|
"loss": 0.3936, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.5428571428571427, |
|
"grad_norm": 1.4228971004486084, |
|
"learning_rate": 3.348148148148148e-05, |
|
"loss": 0.3563, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 1.5617480278015137, |
|
"learning_rate": 3.3444444444444443e-05, |
|
"loss": 0.2492, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 3.557142857142857, |
|
"grad_norm": 1.3880919218063354, |
|
"learning_rate": 3.340740740740741e-05, |
|
"loss": 0.1791, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 3.564285714285714, |
|
"grad_norm": 2.3505630493164062, |
|
"learning_rate": 3.337037037037037e-05, |
|
"loss": 0.4009, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 0.9086794853210449, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.2826, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"eval_loss": 0.3789908289909363, |
|
"eval_rouge1": 0.8999, |
|
"eval_rouge2": 0.8372, |
|
"eval_rougeL": 0.8969, |
|
"eval_runtime": 122.23, |
|
"eval_samples_per_second": 11.454, |
|
"eval_steps_per_second": 5.727, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.5785714285714287, |
|
"grad_norm": 1.0208678245544434, |
|
"learning_rate": 3.3296296296296296e-05, |
|
"loss": 0.295, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 3.585714285714286, |
|
"grad_norm": 3.03141713142395, |
|
"learning_rate": 3.3259259259259265e-05, |
|
"loss": 0.3813, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 3.592857142857143, |
|
"grad_norm": 1.7845333814620972, |
|
"learning_rate": 3.322222222222222e-05, |
|
"loss": 0.2526, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 4.314096450805664, |
|
"learning_rate": 3.318518518518519e-05, |
|
"loss": 0.3498, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 3.607142857142857, |
|
"grad_norm": 1.5270274877548218, |
|
"learning_rate": 3.314814814814815e-05, |
|
"loss": 0.3204, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 3.6142857142857143, |
|
"grad_norm": 2.036738157272339, |
|
"learning_rate": 3.311111111111112e-05, |
|
"loss": 0.3416, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 3.6214285714285714, |
|
"grad_norm": 2.2504570484161377, |
|
"learning_rate": 3.307407407407407e-05, |
|
"loss": 0.3781, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 3.6285714285714286, |
|
"grad_norm": 1.749518632888794, |
|
"learning_rate": 3.303703703703704e-05, |
|
"loss": 0.2299, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 3.6357142857142857, |
|
"grad_norm": 2.1878907680511475, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.3692, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 3.642857142857143, |
|
"grad_norm": 1.829394817352295, |
|
"learning_rate": 3.2962962962962964e-05, |
|
"loss": 0.3095, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 2.5994794368743896, |
|
"learning_rate": 3.2925925925925926e-05, |
|
"loss": 0.431, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 3.657142857142857, |
|
"grad_norm": 1.2319742441177368, |
|
"learning_rate": 3.2888888888888894e-05, |
|
"loss": 0.336, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 3.664285714285714, |
|
"grad_norm": 2.169063091278076, |
|
"learning_rate": 3.2851851851851856e-05, |
|
"loss": 0.293, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 3.6714285714285713, |
|
"grad_norm": 1.7120137214660645, |
|
"learning_rate": 3.281481481481482e-05, |
|
"loss": 0.3439, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 3.678571428571429, |
|
"grad_norm": 1.5415689945220947, |
|
"learning_rate": 3.277777777777778e-05, |
|
"loss": 0.3912, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 3.685714285714286, |
|
"grad_norm": 2.2880282402038574, |
|
"learning_rate": 3.274074074074075e-05, |
|
"loss": 0.2352, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 3.692857142857143, |
|
"grad_norm": 1.7133980989456177, |
|
"learning_rate": 3.27037037037037e-05, |
|
"loss": 0.5397, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 1.9661128520965576, |
|
"learning_rate": 3.266666666666667e-05, |
|
"loss": 0.4496, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 3.7071428571428573, |
|
"grad_norm": 1.444551944732666, |
|
"learning_rate": 3.262962962962963e-05, |
|
"loss": 0.3201, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 3.7142857142857144, |
|
"grad_norm": 1.7919954061508179, |
|
"learning_rate": 3.25925925925926e-05, |
|
"loss": 0.3721, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.7214285714285715, |
|
"grad_norm": 2.4862735271453857, |
|
"learning_rate": 3.2555555555555555e-05, |
|
"loss": 0.2511, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 3.7285714285714286, |
|
"grad_norm": 1.0694047212600708, |
|
"learning_rate": 3.251851851851852e-05, |
|
"loss": 0.1418, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 3.7357142857142858, |
|
"grad_norm": 2.4438931941986084, |
|
"learning_rate": 3.2481481481481485e-05, |
|
"loss": 0.2473, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 3.742857142857143, |
|
"grad_norm": 1.9673523902893066, |
|
"learning_rate": 3.2444444444444446e-05, |
|
"loss": 0.3251, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 2.5299620628356934, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.3862, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.757142857142857, |
|
"grad_norm": 1.1709238290786743, |
|
"learning_rate": 3.2370370370370376e-05, |
|
"loss": 0.3156, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 3.7642857142857142, |
|
"grad_norm": 1.4275505542755127, |
|
"learning_rate": 3.233333333333333e-05, |
|
"loss": 0.3091, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 3.7714285714285714, |
|
"grad_norm": 1.5278127193450928, |
|
"learning_rate": 3.22962962962963e-05, |
|
"loss": 0.3768, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 3.7785714285714285, |
|
"grad_norm": 2.870471239089966, |
|
"learning_rate": 3.225925925925926e-05, |
|
"loss": 0.4264, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 3.7857142857142856, |
|
"grad_norm": 1.4797722101211548, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 0.3598, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.7928571428571427, |
|
"grad_norm": 1.6350576877593994, |
|
"learning_rate": 3.2185185185185184e-05, |
|
"loss": 0.2125, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 1.8790502548217773, |
|
"learning_rate": 3.214814814814815e-05, |
|
"loss": 0.2698, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 3.807142857142857, |
|
"grad_norm": 1.3930083513259888, |
|
"learning_rate": 3.2111111111111114e-05, |
|
"loss": 0.3867, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 3.814285714285714, |
|
"grad_norm": 1.7605199813842773, |
|
"learning_rate": 3.2074074074074075e-05, |
|
"loss": 0.3594, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 3.821428571428571, |
|
"grad_norm": 2.3873794078826904, |
|
"learning_rate": 3.203703703703704e-05, |
|
"loss": 0.372, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 3.8285714285714287, |
|
"grad_norm": 3.087186098098755, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.3964, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 3.835714285714286, |
|
"grad_norm": 1.6758490800857544, |
|
"learning_rate": 3.196296296296297e-05, |
|
"loss": 0.3274, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 3.842857142857143, |
|
"grad_norm": 1.184205412864685, |
|
"learning_rate": 3.192592592592593e-05, |
|
"loss": 0.277, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 2.1282460689544678, |
|
"learning_rate": 3.188888888888889e-05, |
|
"loss": 0.3283, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 3.857142857142857, |
|
"grad_norm": 1.9244283437728882, |
|
"learning_rate": 3.185185185185185e-05, |
|
"loss": 0.2732, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.8642857142857143, |
|
"grad_norm": 1.2328709363937378, |
|
"learning_rate": 3.181481481481481e-05, |
|
"loss": 0.2968, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 3.8714285714285714, |
|
"grad_norm": 2.5490071773529053, |
|
"learning_rate": 3.177777777777778e-05, |
|
"loss": 0.3258, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 3.8785714285714286, |
|
"grad_norm": 1.7774560451507568, |
|
"learning_rate": 3.174074074074074e-05, |
|
"loss": 0.3274, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 3.8857142857142857, |
|
"grad_norm": 0.9900962710380554, |
|
"learning_rate": 3.1703703703703705e-05, |
|
"loss": 0.3361, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 3.892857142857143, |
|
"grad_norm": 1.2809844017028809, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 0.3684, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 2.2611334323883057, |
|
"learning_rate": 3.1629629629629634e-05, |
|
"loss": 0.326, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 3.907142857142857, |
|
"grad_norm": 2.49057936668396, |
|
"learning_rate": 3.1592592592592596e-05, |
|
"loss": 0.412, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 3.914285714285714, |
|
"grad_norm": 1.6978118419647217, |
|
"learning_rate": 3.155555555555556e-05, |
|
"loss": 0.2177, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 3.9214285714285713, |
|
"grad_norm": 1.847128987312317, |
|
"learning_rate": 3.151851851851852e-05, |
|
"loss": 0.3419, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 3.928571428571429, |
|
"grad_norm": 1.6806657314300537, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.1479, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.935714285714286, |
|
"grad_norm": 2.144227981567383, |
|
"learning_rate": 3.144444444444445e-05, |
|
"loss": 0.3098, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 3.942857142857143, |
|
"grad_norm": 1.2945857048034668, |
|
"learning_rate": 3.140740740740741e-05, |
|
"loss": 0.269, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 1.8362900018692017, |
|
"learning_rate": 3.137037037037037e-05, |
|
"loss": 0.3065, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 3.9571428571428573, |
|
"grad_norm": 1.9124987125396729, |
|
"learning_rate": 3.1333333333333334e-05, |
|
"loss": 0.2593, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 3.9642857142857144, |
|
"grad_norm": 1.726523995399475, |
|
"learning_rate": 3.1296296296296295e-05, |
|
"loss": 0.3112, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 3.9714285714285715, |
|
"grad_norm": 1.5914565324783325, |
|
"learning_rate": 3.1259259259259264e-05, |
|
"loss": 0.263, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 3.9785714285714286, |
|
"grad_norm": 1.3533891439437866, |
|
"learning_rate": 3.1222222222222225e-05, |
|
"loss": 0.3852, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 3.9857142857142858, |
|
"grad_norm": 2.1844253540039062, |
|
"learning_rate": 3.118518518518519e-05, |
|
"loss": 0.3761, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 3.992857142857143, |
|
"grad_norm": 2.494920492172241, |
|
"learning_rate": 3.114814814814815e-05, |
|
"loss": 0.3882, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.9914864897727966, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.3518, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 4.007142857142857, |
|
"grad_norm": 1.6416865587234497, |
|
"learning_rate": 3.107407407407408e-05, |
|
"loss": 0.2688, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 4.014285714285714, |
|
"grad_norm": 1.934449315071106, |
|
"learning_rate": 3.103703703703704e-05, |
|
"loss": 0.2385, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 4.021428571428571, |
|
"grad_norm": 1.7663776874542236, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.3147, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 4.0285714285714285, |
|
"grad_norm": 1.8457096815109253, |
|
"learning_rate": 3.096296296296296e-05, |
|
"loss": 0.2922, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 4.035714285714286, |
|
"grad_norm": 1.133711338043213, |
|
"learning_rate": 3.0925925925925924e-05, |
|
"loss": 0.2291, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 4.042857142857143, |
|
"grad_norm": 1.794723629951477, |
|
"learning_rate": 3.088888888888889e-05, |
|
"loss": 0.3204, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 1.966180443763733, |
|
"learning_rate": 3.0851851851851854e-05, |
|
"loss": 0.2757, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 4.057142857142857, |
|
"grad_norm": 0.789313018321991, |
|
"learning_rate": 3.0814814814814816e-05, |
|
"loss": 0.3106, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 4.064285714285714, |
|
"grad_norm": 1.4390606880187988, |
|
"learning_rate": 3.077777777777778e-05, |
|
"loss": 0.192, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 4.071428571428571, |
|
"grad_norm": 1.8229310512542725, |
|
"learning_rate": 3.074074074074074e-05, |
|
"loss": 0.3802, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 4.078571428571428, |
|
"grad_norm": 1.3065968751907349, |
|
"learning_rate": 3.070370370370371e-05, |
|
"loss": 0.2891, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 4.085714285714285, |
|
"grad_norm": 1.5169206857681274, |
|
"learning_rate": 3.066666666666667e-05, |
|
"loss": 0.2818, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 4.0928571428571425, |
|
"grad_norm": 1.8811321258544922, |
|
"learning_rate": 3.062962962962963e-05, |
|
"loss": 0.1845, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 2.2235770225524902, |
|
"learning_rate": 3.059259259259259e-05, |
|
"loss": 0.3671, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 4.107142857142857, |
|
"grad_norm": 1.5675430297851562, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.3588, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 4.114285714285714, |
|
"grad_norm": 1.3254741430282593, |
|
"learning_rate": 3.0518518518518515e-05, |
|
"loss": 0.3641, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 4.121428571428571, |
|
"grad_norm": 2.601593017578125, |
|
"learning_rate": 3.0481481481481484e-05, |
|
"loss": 0.2704, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 4.128571428571428, |
|
"grad_norm": 2.3631677627563477, |
|
"learning_rate": 3.044444444444445e-05, |
|
"loss": 0.2528, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 4.135714285714286, |
|
"grad_norm": 1.4800968170166016, |
|
"learning_rate": 3.0407407407407407e-05, |
|
"loss": 0.263, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 4.142857142857143, |
|
"grad_norm": 1.6989574432373047, |
|
"learning_rate": 3.037037037037037e-05, |
|
"loss": 0.2465, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"grad_norm": 1.595765471458435, |
|
"learning_rate": 3.0333333333333337e-05, |
|
"loss": 0.3223, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 4.1571428571428575, |
|
"grad_norm": 1.8895677328109741, |
|
"learning_rate": 3.02962962962963e-05, |
|
"loss": 0.3181, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 4.164285714285715, |
|
"grad_norm": 1.147406816482544, |
|
"learning_rate": 3.025925925925926e-05, |
|
"loss": 0.2275, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 4.171428571428572, |
|
"grad_norm": 3.310147523880005, |
|
"learning_rate": 3.0222222222222225e-05, |
|
"loss": 0.3615, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 4.178571428571429, |
|
"grad_norm": 1.6138179302215576, |
|
"learning_rate": 3.018518518518519e-05, |
|
"loss": 0.3492, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 4.185714285714286, |
|
"grad_norm": 1.9912358522415161, |
|
"learning_rate": 3.0148148148148148e-05, |
|
"loss": 0.3358, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 4.192857142857143, |
|
"grad_norm": 2.2521820068359375, |
|
"learning_rate": 3.0111111111111113e-05, |
|
"loss": 0.2773, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 1.804829478263855, |
|
"learning_rate": 3.0074074074074078e-05, |
|
"loss": 0.3052, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 4.207142857142857, |
|
"grad_norm": 1.0897246599197388, |
|
"learning_rate": 3.0037037037037036e-05, |
|
"loss": 0.3822, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 4.214285714285714, |
|
"grad_norm": 1.337428331375122, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3091, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 4.2214285714285715, |
|
"grad_norm": 1.1409244537353516, |
|
"learning_rate": 2.9962962962962966e-05, |
|
"loss": 0.2002, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 4.228571428571429, |
|
"grad_norm": 0.9190034866333008, |
|
"learning_rate": 2.992592592592593e-05, |
|
"loss": 0.3029, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 4.235714285714286, |
|
"grad_norm": 1.7410012483596802, |
|
"learning_rate": 2.988888888888889e-05, |
|
"loss": 0.2361, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 4.242857142857143, |
|
"grad_norm": 2.308295965194702, |
|
"learning_rate": 2.9851851851851854e-05, |
|
"loss": 0.3654, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"grad_norm": 1.299177646636963, |
|
"learning_rate": 2.981481481481482e-05, |
|
"loss": 0.2346, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 4.257142857142857, |
|
"grad_norm": 1.0352667570114136, |
|
"learning_rate": 2.9777777777777777e-05, |
|
"loss": 0.2331, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 4.264285714285714, |
|
"grad_norm": 1.0682189464569092, |
|
"learning_rate": 2.9740740740740742e-05, |
|
"loss": 0.2456, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 4.271428571428571, |
|
"grad_norm": 1.536718487739563, |
|
"learning_rate": 2.9703703703703707e-05, |
|
"loss": 0.1908, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 4.2785714285714285, |
|
"grad_norm": 2.0448334217071533, |
|
"learning_rate": 2.9666666666666672e-05, |
|
"loss": 0.3399, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 2.205901622772217, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.1913, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"eval_loss": 0.36299219727516174, |
|
"eval_rouge1": 0.9025, |
|
"eval_rouge2": 0.8402, |
|
"eval_rougeL": 0.8994, |
|
"eval_runtime": 122.2765, |
|
"eval_samples_per_second": 11.449, |
|
"eval_steps_per_second": 5.725, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.292857142857143, |
|
"grad_norm": 1.455069661140442, |
|
"learning_rate": 2.9592592592592595e-05, |
|
"loss": 0.2236, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"grad_norm": 1.6218276023864746, |
|
"learning_rate": 2.955555555555556e-05, |
|
"loss": 0.2166, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 4.307142857142857, |
|
"grad_norm": 1.4643278121948242, |
|
"learning_rate": 2.9518518518518518e-05, |
|
"loss": 0.2543, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 4.314285714285714, |
|
"grad_norm": 1.9875061511993408, |
|
"learning_rate": 2.9481481481481483e-05, |
|
"loss": 0.275, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 4.321428571428571, |
|
"grad_norm": 2.003077268600464, |
|
"learning_rate": 2.9444444444444448e-05, |
|
"loss": 0.3431, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 4.328571428571428, |
|
"grad_norm": 1.332705020904541, |
|
"learning_rate": 2.9407407407407413e-05, |
|
"loss": 0.2546, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 4.335714285714285, |
|
"grad_norm": 1.9161280393600464, |
|
"learning_rate": 2.937037037037037e-05, |
|
"loss": 0.2909, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 4.3428571428571425, |
|
"grad_norm": 1.509238839149475, |
|
"learning_rate": 2.9333333333333336e-05, |
|
"loss": 0.253, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"grad_norm": 2.238847255706787, |
|
"learning_rate": 2.92962962962963e-05, |
|
"loss": 0.2717, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 4.357142857142857, |
|
"grad_norm": 1.9578133821487427, |
|
"learning_rate": 2.925925925925926e-05, |
|
"loss": 0.3407, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.364285714285714, |
|
"grad_norm": 1.805828332901001, |
|
"learning_rate": 2.9222222222222224e-05, |
|
"loss": 0.1811, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 4.371428571428572, |
|
"grad_norm": 2.9014134407043457, |
|
"learning_rate": 2.918518518518519e-05, |
|
"loss": 0.3934, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 4.378571428571428, |
|
"grad_norm": 1.9857615232467651, |
|
"learning_rate": 2.914814814814815e-05, |
|
"loss": 0.2026, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 4.385714285714286, |
|
"grad_norm": 2.3884503841400146, |
|
"learning_rate": 2.9111111111111112e-05, |
|
"loss": 0.2787, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 4.392857142857143, |
|
"grad_norm": 2.298215866088867, |
|
"learning_rate": 2.9074074074074077e-05, |
|
"loss": 0.2765, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 2.1733076572418213, |
|
"learning_rate": 2.9037037037037042e-05, |
|
"loss": 0.3975, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 4.4071428571428575, |
|
"grad_norm": 3.3003320693969727, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.4152, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 4.414285714285715, |
|
"grad_norm": 1.5066970586776733, |
|
"learning_rate": 2.8962962962962965e-05, |
|
"loss": 0.345, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 4.421428571428572, |
|
"grad_norm": 2.134096145629883, |
|
"learning_rate": 2.892592592592593e-05, |
|
"loss": 0.3154, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 4.428571428571429, |
|
"grad_norm": 1.8306220769882202, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.2908, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 4.435714285714286, |
|
"grad_norm": 1.4300037622451782, |
|
"learning_rate": 2.8851851851851853e-05, |
|
"loss": 0.342, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 4.442857142857143, |
|
"grad_norm": 1.6552793979644775, |
|
"learning_rate": 2.8814814814814818e-05, |
|
"loss": 0.2856, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"grad_norm": 2.188889265060425, |
|
"learning_rate": 2.877777777777778e-05, |
|
"loss": 0.25, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 4.457142857142857, |
|
"grad_norm": 1.3003034591674805, |
|
"learning_rate": 2.874074074074074e-05, |
|
"loss": 0.2995, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 4.464285714285714, |
|
"grad_norm": 1.834549903869629, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.3726, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 4.4714285714285715, |
|
"grad_norm": 1.9426199197769165, |
|
"learning_rate": 2.8666666666666668e-05, |
|
"loss": 0.2142, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 4.478571428571429, |
|
"grad_norm": 1.5088646411895752, |
|
"learning_rate": 2.862962962962963e-05, |
|
"loss": 0.3584, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 4.485714285714286, |
|
"grad_norm": 1.9997400045394897, |
|
"learning_rate": 2.8592592592592594e-05, |
|
"loss": 0.2402, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 4.492857142857143, |
|
"grad_norm": 1.3831549882888794, |
|
"learning_rate": 2.855555555555556e-05, |
|
"loss": 0.312, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 2.013425588607788, |
|
"learning_rate": 2.851851851851852e-05, |
|
"loss": 0.2728, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.507142857142857, |
|
"grad_norm": 1.1200778484344482, |
|
"learning_rate": 2.8481481481481482e-05, |
|
"loss": 0.3909, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 4.514285714285714, |
|
"grad_norm": 0.8029781579971313, |
|
"learning_rate": 2.8444444444444447e-05, |
|
"loss": 0.3491, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 4.521428571428571, |
|
"grad_norm": 1.4999722242355347, |
|
"learning_rate": 2.840740740740741e-05, |
|
"loss": 0.2583, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 4.5285714285714285, |
|
"grad_norm": 1.8954156637191772, |
|
"learning_rate": 2.837037037037037e-05, |
|
"loss": 0.3971, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 4.535714285714286, |
|
"grad_norm": 1.5697578191757202, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 0.3222, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 4.542857142857143, |
|
"grad_norm": 0.9937646389007568, |
|
"learning_rate": 2.8296296296296297e-05, |
|
"loss": 0.3673, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 1.935511589050293, |
|
"learning_rate": 2.8259259259259262e-05, |
|
"loss": 0.2385, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 4.557142857142857, |
|
"grad_norm": 1.8132340908050537, |
|
"learning_rate": 2.8222222222222223e-05, |
|
"loss": 0.226, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 4.564285714285714, |
|
"grad_norm": 0.8551497459411621, |
|
"learning_rate": 2.8185185185185185e-05, |
|
"loss": 0.3874, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 2.0115785598754883, |
|
"learning_rate": 2.814814814814815e-05, |
|
"loss": 0.2328, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.578571428571428, |
|
"grad_norm": 1.0582072734832764, |
|
"learning_rate": 2.811111111111111e-05, |
|
"loss": 0.3523, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 4.585714285714285, |
|
"grad_norm": 1.3484958410263062, |
|
"learning_rate": 2.8074074074074076e-05, |
|
"loss": 0.2867, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 4.5928571428571425, |
|
"grad_norm": 1.4483561515808105, |
|
"learning_rate": 2.8037037037037038e-05, |
|
"loss": 0.2623, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 2.2348268032073975, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.3953, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 4.607142857142857, |
|
"grad_norm": 2.654326915740967, |
|
"learning_rate": 2.7962962962962965e-05, |
|
"loss": 0.3516, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 4.614285714285714, |
|
"grad_norm": 0.8564252257347107, |
|
"learning_rate": 2.7925925925925926e-05, |
|
"loss": 0.2497, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 4.621428571428572, |
|
"grad_norm": 2.7823233604431152, |
|
"learning_rate": 2.788888888888889e-05, |
|
"loss": 0.3975, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 4.628571428571428, |
|
"grad_norm": 1.0915263891220093, |
|
"learning_rate": 2.7851851851851853e-05, |
|
"loss": 0.2574, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 4.635714285714286, |
|
"grad_norm": 1.0459774732589722, |
|
"learning_rate": 2.7814814814814814e-05, |
|
"loss": 0.3426, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 4.642857142857143, |
|
"grad_norm": 3.1720130443573, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.3155, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 1.499185562133789, |
|
"learning_rate": 2.774074074074074e-05, |
|
"loss": 0.4515, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 4.6571428571428575, |
|
"grad_norm": 2.4211909770965576, |
|
"learning_rate": 2.7703703703703706e-05, |
|
"loss": 0.2963, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 4.664285714285715, |
|
"grad_norm": 2.167006492614746, |
|
"learning_rate": 2.7666666666666667e-05, |
|
"loss": 0.2625, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 4.671428571428572, |
|
"grad_norm": 1.8955094814300537, |
|
"learning_rate": 2.7629629629629632e-05, |
|
"loss": 0.3374, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 4.678571428571429, |
|
"grad_norm": 0.9967934489250183, |
|
"learning_rate": 2.7592592592592594e-05, |
|
"loss": 0.1611, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 4.685714285714286, |
|
"grad_norm": 1.007778525352478, |
|
"learning_rate": 2.7555555555555555e-05, |
|
"loss": 0.2516, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 4.692857142857143, |
|
"grad_norm": 2.9705958366394043, |
|
"learning_rate": 2.751851851851852e-05, |
|
"loss": 0.3893, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"grad_norm": 2.689723491668701, |
|
"learning_rate": 2.7481481481481482e-05, |
|
"loss": 0.2404, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 4.707142857142857, |
|
"grad_norm": 2.095930337905884, |
|
"learning_rate": 2.7444444444444443e-05, |
|
"loss": 0.3239, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 4.714285714285714, |
|
"grad_norm": 1.9235697984695435, |
|
"learning_rate": 2.7407407407407408e-05, |
|
"loss": 0.2779, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.7214285714285715, |
|
"grad_norm": 3.329378843307495, |
|
"learning_rate": 2.7370370370370373e-05, |
|
"loss": 0.2791, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 4.728571428571429, |
|
"grad_norm": 1.9044978618621826, |
|
"learning_rate": 2.733333333333333e-05, |
|
"loss": 0.3757, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 4.735714285714286, |
|
"grad_norm": 2.207752227783203, |
|
"learning_rate": 2.7296296296296296e-05, |
|
"loss": 0.3391, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 4.742857142857143, |
|
"grad_norm": 2.0488827228546143, |
|
"learning_rate": 2.725925925925926e-05, |
|
"loss": 0.396, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 2.425340414047241, |
|
"learning_rate": 2.7222222222222223e-05, |
|
"loss": 0.2871, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 4.757142857142857, |
|
"grad_norm": 1.9408286809921265, |
|
"learning_rate": 2.7185185185185184e-05, |
|
"loss": 0.3144, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 4.764285714285714, |
|
"grad_norm": 1.864397406578064, |
|
"learning_rate": 2.714814814814815e-05, |
|
"loss": 0.2685, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 4.771428571428571, |
|
"grad_norm": 1.1838607788085938, |
|
"learning_rate": 2.7111111111111114e-05, |
|
"loss": 0.2751, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 4.7785714285714285, |
|
"grad_norm": 2.26408052444458, |
|
"learning_rate": 2.7074074074074072e-05, |
|
"loss": 0.3158, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 4.785714285714286, |
|
"grad_norm": 2.007145404815674, |
|
"learning_rate": 2.7037037037037037e-05, |
|
"loss": 0.1969, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 4.792857142857143, |
|
"grad_norm": 2.5209295749664307, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.3022, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 2.3263044357299805, |
|
"learning_rate": 2.696296296296296e-05, |
|
"loss": 0.3799, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 4.807142857142857, |
|
"grad_norm": 1.3880634307861328, |
|
"learning_rate": 2.6925925925925925e-05, |
|
"loss": 0.2829, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 4.814285714285714, |
|
"grad_norm": 2.0264179706573486, |
|
"learning_rate": 2.688888888888889e-05, |
|
"loss": 0.2754, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 4.821428571428571, |
|
"grad_norm": 1.6165140867233276, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.3171, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 4.828571428571428, |
|
"grad_norm": 1.6405526399612427, |
|
"learning_rate": 2.6814814814814814e-05, |
|
"loss": 0.4082, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 4.835714285714285, |
|
"grad_norm": 1.6864060163497925, |
|
"learning_rate": 2.677777777777778e-05, |
|
"loss": 0.2026, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 4.8428571428571425, |
|
"grad_norm": 1.4906965494155884, |
|
"learning_rate": 2.6740740740740743e-05, |
|
"loss": 0.2582, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"grad_norm": 1.2227530479431152, |
|
"learning_rate": 2.67037037037037e-05, |
|
"loss": 0.185, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"grad_norm": 1.2606697082519531, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.2651, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.864285714285714, |
|
"grad_norm": 2.3722660541534424, |
|
"learning_rate": 2.662962962962963e-05, |
|
"loss": 0.2746, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 4.871428571428572, |
|
"grad_norm": 1.8622608184814453, |
|
"learning_rate": 2.659259259259259e-05, |
|
"loss": 0.3473, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 4.878571428571428, |
|
"grad_norm": 1.3814878463745117, |
|
"learning_rate": 2.6555555555555555e-05, |
|
"loss": 0.2706, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 4.885714285714286, |
|
"grad_norm": 2.013650894165039, |
|
"learning_rate": 2.651851851851852e-05, |
|
"loss": 0.2802, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 4.892857142857143, |
|
"grad_norm": 1.467282772064209, |
|
"learning_rate": 2.6481481481481485e-05, |
|
"loss": 0.3158, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 1.3019797801971436, |
|
"learning_rate": 2.6444444444444443e-05, |
|
"loss": 0.2012, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 4.9071428571428575, |
|
"grad_norm": 1.1120600700378418, |
|
"learning_rate": 2.6407407407407408e-05, |
|
"loss": 0.1385, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 4.914285714285715, |
|
"grad_norm": 1.470406413078308, |
|
"learning_rate": 2.6370370370370373e-05, |
|
"loss": 0.3014, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 4.921428571428572, |
|
"grad_norm": 2.237767457962036, |
|
"learning_rate": 2.633333333333333e-05, |
|
"loss": 0.2677, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 4.928571428571429, |
|
"grad_norm": 1.3994693756103516, |
|
"learning_rate": 2.6296296296296296e-05, |
|
"loss": 0.4261, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.935714285714286, |
|
"grad_norm": 2.21905517578125, |
|
"learning_rate": 2.625925925925926e-05, |
|
"loss": 0.3701, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 4.942857142857143, |
|
"grad_norm": 2.8682186603546143, |
|
"learning_rate": 2.6222222222222226e-05, |
|
"loss": 0.4047, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 1.9691041707992554, |
|
"learning_rate": 2.6185185185185184e-05, |
|
"loss": 0.2735, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 4.957142857142857, |
|
"grad_norm": 1.7553354501724243, |
|
"learning_rate": 2.614814814814815e-05, |
|
"loss": 0.2381, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 4.964285714285714, |
|
"grad_norm": 1.7930738925933838, |
|
"learning_rate": 2.6111111111111114e-05, |
|
"loss": 0.2838, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 4.9714285714285715, |
|
"grad_norm": 2.4153687953948975, |
|
"learning_rate": 2.6074074074074072e-05, |
|
"loss": 0.4002, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 4.978571428571429, |
|
"grad_norm": 1.392898678779602, |
|
"learning_rate": 2.6037037037037037e-05, |
|
"loss": 0.248, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 4.985714285714286, |
|
"grad_norm": 1.7113401889801025, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.2871, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 4.992857142857143, |
|
"grad_norm": 2.4877359867095947, |
|
"learning_rate": 2.5962962962962967e-05, |
|
"loss": 0.2443, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.7225149869918823, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.186, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.3584790527820587, |
|
"eval_rouge1": 0.9047, |
|
"eval_rouge2": 0.8434, |
|
"eval_rougeL": 0.9018, |
|
"eval_runtime": 122.2903, |
|
"eval_samples_per_second": 11.448, |
|
"eval_steps_per_second": 5.724, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.007142857142857, |
|
"grad_norm": 2.1430020332336426, |
|
"learning_rate": 2.588888888888889e-05, |
|
"loss": 0.3477, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 5.014285714285714, |
|
"grad_norm": 0.958677351474762, |
|
"learning_rate": 2.5851851851851855e-05, |
|
"loss": 0.2474, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 5.021428571428571, |
|
"grad_norm": 2.315269947052002, |
|
"learning_rate": 2.5814814814814813e-05, |
|
"loss": 0.2786, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 5.0285714285714285, |
|
"grad_norm": 1.3595519065856934, |
|
"learning_rate": 2.5777777777777778e-05, |
|
"loss": 0.2286, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 5.035714285714286, |
|
"grad_norm": 1.44675874710083, |
|
"learning_rate": 2.5740740740740743e-05, |
|
"loss": 0.2679, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 5.042857142857143, |
|
"grad_norm": 1.754285454750061, |
|
"learning_rate": 2.5703703703703708e-05, |
|
"loss": 0.196, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 2.9333369731903076, |
|
"learning_rate": 2.5666666666666666e-05, |
|
"loss": 0.1694, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 5.057142857142857, |
|
"grad_norm": 2.6653859615325928, |
|
"learning_rate": 2.562962962962963e-05, |
|
"loss": 0.2642, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 5.064285714285714, |
|
"grad_norm": 1.8362854719161987, |
|
"learning_rate": 2.5592592592592596e-05, |
|
"loss": 0.3614, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 5.071428571428571, |
|
"grad_norm": 1.427701473236084, |
|
"learning_rate": 2.5555555555555554e-05, |
|
"loss": 0.2351, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 5.078571428571428, |
|
"grad_norm": 2.3684027194976807, |
|
"learning_rate": 2.551851851851852e-05, |
|
"loss": 0.2803, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 5.085714285714285, |
|
"grad_norm": 1.5823931694030762, |
|
"learning_rate": 2.5481481481481484e-05, |
|
"loss": 0.2749, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 5.0928571428571425, |
|
"grad_norm": 1.6682019233703613, |
|
"learning_rate": 2.5444444444444442e-05, |
|
"loss": 0.3219, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 1.7803760766983032, |
|
"learning_rate": 2.5407407407407407e-05, |
|
"loss": 0.2553, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 5.107142857142857, |
|
"grad_norm": 1.945063591003418, |
|
"learning_rate": 2.5370370370370372e-05, |
|
"loss": 0.1739, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 5.114285714285714, |
|
"grad_norm": 1.308371663093567, |
|
"learning_rate": 2.5333333333333337e-05, |
|
"loss": 0.2605, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 5.121428571428571, |
|
"grad_norm": 1.906160593032837, |
|
"learning_rate": 2.5296296296296295e-05, |
|
"loss": 0.2071, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 5.128571428571428, |
|
"grad_norm": 1.6239346265792847, |
|
"learning_rate": 2.525925925925926e-05, |
|
"loss": 0.2054, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 5.135714285714286, |
|
"grad_norm": 1.6175967454910278, |
|
"learning_rate": 2.5222222222222225e-05, |
|
"loss": 0.2266, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 5.142857142857143, |
|
"grad_norm": 1.938736915588379, |
|
"learning_rate": 2.5185185185185183e-05, |
|
"loss": 0.2932, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"grad_norm": 1.7323144674301147, |
|
"learning_rate": 2.5148148148148148e-05, |
|
"loss": 0.2762, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 5.1571428571428575, |
|
"grad_norm": 1.859667181968689, |
|
"learning_rate": 2.5111111111111113e-05, |
|
"loss": 0.3213, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 5.164285714285715, |
|
"grad_norm": 1.22067391872406, |
|
"learning_rate": 2.5074074074074078e-05, |
|
"loss": 0.2246, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 5.171428571428572, |
|
"grad_norm": 0.9384840726852417, |
|
"learning_rate": 2.5037037037037036e-05, |
|
"loss": 0.3364, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 5.178571428571429, |
|
"grad_norm": 1.4494845867156982, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.332, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 5.185714285714286, |
|
"grad_norm": 2.3436357975006104, |
|
"learning_rate": 2.4962962962962963e-05, |
|
"loss": 0.1456, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 5.192857142857143, |
|
"grad_norm": 1.0446144342422485, |
|
"learning_rate": 2.4925925925925928e-05, |
|
"loss": 0.1995, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 2.325575113296509, |
|
"learning_rate": 2.488888888888889e-05, |
|
"loss": 0.3068, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 5.207142857142857, |
|
"grad_norm": 2.100825309753418, |
|
"learning_rate": 2.4851851851851854e-05, |
|
"loss": 0.2659, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 5.214285714285714, |
|
"grad_norm": 2.6580276489257812, |
|
"learning_rate": 2.4814814814814816e-05, |
|
"loss": 0.2872, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 5.2214285714285715, |
|
"grad_norm": 2.505577564239502, |
|
"learning_rate": 2.477777777777778e-05, |
|
"loss": 0.2574, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 5.228571428571429, |
|
"grad_norm": 1.4997559785842896, |
|
"learning_rate": 2.4740740740740742e-05, |
|
"loss": 0.2192, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 5.235714285714286, |
|
"grad_norm": 1.9084120988845825, |
|
"learning_rate": 2.4703703703703704e-05, |
|
"loss": 0.2836, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 5.242857142857143, |
|
"grad_norm": 1.1388484239578247, |
|
"learning_rate": 2.466666666666667e-05, |
|
"loss": 0.2426, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"grad_norm": 1.0559568405151367, |
|
"learning_rate": 2.462962962962963e-05, |
|
"loss": 0.344, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 5.257142857142857, |
|
"grad_norm": 1.4024419784545898, |
|
"learning_rate": 2.4592592592592595e-05, |
|
"loss": 0.2121, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 5.264285714285714, |
|
"grad_norm": 1.4338841438293457, |
|
"learning_rate": 2.4555555555555557e-05, |
|
"loss": 0.3329, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 5.271428571428571, |
|
"grad_norm": 1.4188106060028076, |
|
"learning_rate": 2.451851851851852e-05, |
|
"loss": 0.2479, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 5.2785714285714285, |
|
"grad_norm": 1.4320842027664185, |
|
"learning_rate": 2.4481481481481483e-05, |
|
"loss": 0.156, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 5.285714285714286, |
|
"grad_norm": 3.022641181945801, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.1962, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 5.292857142857143, |
|
"grad_norm": 2.3267366886138916, |
|
"learning_rate": 2.440740740740741e-05, |
|
"loss": 0.2713, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"grad_norm": 2.685345411300659, |
|
"learning_rate": 2.437037037037037e-05, |
|
"loss": 0.3345, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 5.307142857142857, |
|
"grad_norm": 0.9320240020751953, |
|
"learning_rate": 2.4333333333333336e-05, |
|
"loss": 0.3758, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 5.314285714285714, |
|
"grad_norm": 1.8067562580108643, |
|
"learning_rate": 2.4296296296296298e-05, |
|
"loss": 0.2958, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 5.321428571428571, |
|
"grad_norm": 1.5514296293258667, |
|
"learning_rate": 2.425925925925926e-05, |
|
"loss": 0.3268, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 5.328571428571428, |
|
"grad_norm": 1.684311032295227, |
|
"learning_rate": 2.4222222222222224e-05, |
|
"loss": 0.2947, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 5.335714285714285, |
|
"grad_norm": 2.0809545516967773, |
|
"learning_rate": 2.4185185185185186e-05, |
|
"loss": 0.2928, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 5.3428571428571425, |
|
"grad_norm": 2.5362987518310547, |
|
"learning_rate": 2.414814814814815e-05, |
|
"loss": 0.1962, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"grad_norm": 0.636965274810791, |
|
"learning_rate": 2.4111111111111113e-05, |
|
"loss": 0.1694, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 5.357142857142857, |
|
"grad_norm": 2.1662261486053467, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.3111, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.364285714285714, |
|
"grad_norm": 1.749324083328247, |
|
"learning_rate": 2.403703703703704e-05, |
|
"loss": 0.2521, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 5.371428571428572, |
|
"grad_norm": 2.3572323322296143, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.1527, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 5.378571428571428, |
|
"grad_norm": 1.274588942527771, |
|
"learning_rate": 2.3962962962962966e-05, |
|
"loss": 0.2757, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 5.385714285714286, |
|
"grad_norm": 1.2197136878967285, |
|
"learning_rate": 2.3925925925925927e-05, |
|
"loss": 0.2288, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 5.392857142857143, |
|
"grad_norm": 1.6061832904815674, |
|
"learning_rate": 2.3888888888888892e-05, |
|
"loss": 0.3292, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"grad_norm": 1.8271028995513916, |
|
"learning_rate": 2.3851851851851854e-05, |
|
"loss": 0.2392, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 5.4071428571428575, |
|
"grad_norm": 1.8294018507003784, |
|
"learning_rate": 2.3814814814814815e-05, |
|
"loss": 0.2554, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 5.414285714285715, |
|
"grad_norm": 1.253556728363037, |
|
"learning_rate": 2.377777777777778e-05, |
|
"loss": 0.2008, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 5.421428571428572, |
|
"grad_norm": 1.1980758905410767, |
|
"learning_rate": 2.3740740740740742e-05, |
|
"loss": 0.265, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 5.428571428571429, |
|
"grad_norm": 1.5337406396865845, |
|
"learning_rate": 2.3703703703703707e-05, |
|
"loss": 0.4126, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 5.435714285714286, |
|
"grad_norm": 2.981381893157959, |
|
"learning_rate": 2.3666666666666668e-05, |
|
"loss": 0.3554, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 5.442857142857143, |
|
"grad_norm": 1.927241325378418, |
|
"learning_rate": 2.3629629629629633e-05, |
|
"loss": 0.3148, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"grad_norm": 1.0788408517837524, |
|
"learning_rate": 2.3592592592592595e-05, |
|
"loss": 0.2421, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 5.457142857142857, |
|
"grad_norm": 1.250436782836914, |
|
"learning_rate": 2.3555555555555556e-05, |
|
"loss": 0.2797, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 5.464285714285714, |
|
"grad_norm": 1.2195000648498535, |
|
"learning_rate": 2.351851851851852e-05, |
|
"loss": 0.1702, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 5.4714285714285715, |
|
"grad_norm": 1.773098349571228, |
|
"learning_rate": 2.3481481481481483e-05, |
|
"loss": 0.2383, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 5.478571428571429, |
|
"grad_norm": 1.540499210357666, |
|
"learning_rate": 2.3444444444444448e-05, |
|
"loss": 0.2741, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 5.485714285714286, |
|
"grad_norm": 1.3515613079071045, |
|
"learning_rate": 2.340740740740741e-05, |
|
"loss": 0.4365, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 5.492857142857143, |
|
"grad_norm": 1.5094635486602783, |
|
"learning_rate": 2.337037037037037e-05, |
|
"loss": 0.2777, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 1.123542070388794, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.3406, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 5.507142857142857, |
|
"grad_norm": 1.0701942443847656, |
|
"learning_rate": 2.3296296296296297e-05, |
|
"loss": 0.2499, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 5.514285714285714, |
|
"grad_norm": 1.270992636680603, |
|
"learning_rate": 2.3259259259259262e-05, |
|
"loss": 0.2044, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 5.521428571428571, |
|
"grad_norm": 1.5586347579956055, |
|
"learning_rate": 2.3222222222222224e-05, |
|
"loss": 0.2573, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 5.5285714285714285, |
|
"grad_norm": 0.9162809252738953, |
|
"learning_rate": 2.318518518518519e-05, |
|
"loss": 0.2245, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 5.535714285714286, |
|
"grad_norm": 1.7767843008041382, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.276, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 5.542857142857143, |
|
"grad_norm": 2.538541316986084, |
|
"learning_rate": 2.3111111111111112e-05, |
|
"loss": 0.3448, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"grad_norm": 1.5738705396652222, |
|
"learning_rate": 2.3074074074074077e-05, |
|
"loss": 0.3023, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 5.557142857142857, |
|
"grad_norm": 0.9919751286506653, |
|
"learning_rate": 2.303703703703704e-05, |
|
"loss": 0.2979, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 5.564285714285714, |
|
"grad_norm": 1.079817771911621, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.3141, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 5.571428571428571, |
|
"grad_norm": 1.709007978439331, |
|
"learning_rate": 2.2962962962962965e-05, |
|
"loss": 0.2452, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 5.578571428571428, |
|
"grad_norm": 0.9877552390098572, |
|
"learning_rate": 2.2925925925925927e-05, |
|
"loss": 0.2796, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 5.585714285714285, |
|
"grad_norm": 1.9676953554153442, |
|
"learning_rate": 2.288888888888889e-05, |
|
"loss": 0.2314, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 5.5928571428571425, |
|
"grad_norm": 1.778275966644287, |
|
"learning_rate": 2.2851851851851853e-05, |
|
"loss": 0.3033, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 0.9746494889259338, |
|
"learning_rate": 2.2814814814814818e-05, |
|
"loss": 0.2459, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 5.607142857142857, |
|
"grad_norm": 2.0238702297210693, |
|
"learning_rate": 2.277777777777778e-05, |
|
"loss": 0.2494, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 5.614285714285714, |
|
"grad_norm": 1.2345530986785889, |
|
"learning_rate": 2.2740740740740744e-05, |
|
"loss": 0.4614, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 5.621428571428572, |
|
"grad_norm": 0.9835256338119507, |
|
"learning_rate": 2.2703703703703706e-05, |
|
"loss": 0.3519, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 5.628571428571428, |
|
"grad_norm": 1.9753897190093994, |
|
"learning_rate": 2.2666666666666668e-05, |
|
"loss": 0.2895, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 5.635714285714286, |
|
"grad_norm": 1.7247217893600464, |
|
"learning_rate": 2.2629629629629633e-05, |
|
"loss": 0.1994, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 5.642857142857143, |
|
"grad_norm": 1.8406201601028442, |
|
"learning_rate": 2.2592592592592594e-05, |
|
"loss": 0.1872, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"grad_norm": 1.4785393476486206, |
|
"learning_rate": 2.255555555555556e-05, |
|
"loss": 0.2811, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 5.6571428571428575, |
|
"grad_norm": 2.23652982711792, |
|
"learning_rate": 2.251851851851852e-05, |
|
"loss": 0.3071, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 5.664285714285715, |
|
"grad_norm": 1.9096837043762207, |
|
"learning_rate": 2.2481481481481486e-05, |
|
"loss": 0.2115, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 5.671428571428572, |
|
"grad_norm": 2.0808775424957275, |
|
"learning_rate": 2.2444444444444447e-05, |
|
"loss": 0.3923, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 5.678571428571429, |
|
"grad_norm": 1.5935535430908203, |
|
"learning_rate": 2.240740740740741e-05, |
|
"loss": 0.3461, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 5.685714285714286, |
|
"grad_norm": 1.1959024667739868, |
|
"learning_rate": 2.2370370370370374e-05, |
|
"loss": 0.2016, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 5.692857142857143, |
|
"grad_norm": 1.0776904821395874, |
|
"learning_rate": 2.2333333333333335e-05, |
|
"loss": 0.3476, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"grad_norm": 1.884531855583191, |
|
"learning_rate": 2.2296296296296297e-05, |
|
"loss": 0.2861, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 5.707142857142857, |
|
"grad_norm": 1.2476330995559692, |
|
"learning_rate": 2.2259259259259262e-05, |
|
"loss": 0.2152, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 2.106348752975464, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.3022, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"eval_loss": 0.3492221236228943, |
|
"eval_rouge1": 0.9062, |
|
"eval_rouge2": 0.8456, |
|
"eval_rougeL": 0.9033, |
|
"eval_runtime": 122.1433, |
|
"eval_samples_per_second": 11.462, |
|
"eval_steps_per_second": 5.731, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.7214285714285715, |
|
"grad_norm": 1.9454623460769653, |
|
"learning_rate": 2.2185185185185188e-05, |
|
"loss": 0.232, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 5.728571428571429, |
|
"grad_norm": 1.9178905487060547, |
|
"learning_rate": 2.214814814814815e-05, |
|
"loss": 0.2278, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 5.735714285714286, |
|
"grad_norm": 1.6279345750808716, |
|
"learning_rate": 2.211111111111111e-05, |
|
"loss": 0.2423, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 5.742857142857143, |
|
"grad_norm": 2.7422447204589844, |
|
"learning_rate": 2.2074074074074076e-05, |
|
"loss": 0.3129, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"grad_norm": 1.7606775760650635, |
|
"learning_rate": 2.2037037037037038e-05, |
|
"loss": 0.217, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 5.757142857142857, |
|
"grad_norm": 2.970276355743408, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.3246, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 5.764285714285714, |
|
"grad_norm": 1.6729111671447754, |
|
"learning_rate": 2.1962962962962964e-05, |
|
"loss": 0.224, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 5.771428571428571, |
|
"grad_norm": 2.103708267211914, |
|
"learning_rate": 2.1925925925925926e-05, |
|
"loss": 0.2256, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 5.7785714285714285, |
|
"grad_norm": 1.7059235572814941, |
|
"learning_rate": 2.188888888888889e-05, |
|
"loss": 0.2986, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 5.785714285714286, |
|
"grad_norm": 1.6239415407180786, |
|
"learning_rate": 2.1851851851851852e-05, |
|
"loss": 0.3007, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 5.792857142857143, |
|
"grad_norm": 1.5316799879074097, |
|
"learning_rate": 2.1814814814814817e-05, |
|
"loss": 0.2295, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"grad_norm": 0.9283231496810913, |
|
"learning_rate": 2.177777777777778e-05, |
|
"loss": 0.1624, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 5.807142857142857, |
|
"grad_norm": 1.4230540990829468, |
|
"learning_rate": 2.174074074074074e-05, |
|
"loss": 0.1686, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 5.814285714285714, |
|
"grad_norm": 1.8694360256195068, |
|
"learning_rate": 2.1703703703703705e-05, |
|
"loss": 0.3416, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 5.821428571428571, |
|
"grad_norm": 2.144221782684326, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.2471, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 5.828571428571428, |
|
"grad_norm": 2.5672965049743652, |
|
"learning_rate": 2.162962962962963e-05, |
|
"loss": 0.3354, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 5.835714285714285, |
|
"grad_norm": 1.093578577041626, |
|
"learning_rate": 2.1592592592592594e-05, |
|
"loss": 0.262, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 5.8428571428571425, |
|
"grad_norm": 0.7076272368431091, |
|
"learning_rate": 2.1555555555555555e-05, |
|
"loss": 0.254, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"grad_norm": 2.2301125526428223, |
|
"learning_rate": 2.151851851851852e-05, |
|
"loss": 0.1906, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 5.857142857142857, |
|
"grad_norm": 1.704037070274353, |
|
"learning_rate": 2.148148148148148e-05, |
|
"loss": 0.2802, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 5.864285714285714, |
|
"grad_norm": 1.4877769947052002, |
|
"learning_rate": 2.1444444444444443e-05, |
|
"loss": 0.3327, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 5.871428571428572, |
|
"grad_norm": 1.436059594154358, |
|
"learning_rate": 2.1407407407407408e-05, |
|
"loss": 0.271, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 5.878571428571428, |
|
"grad_norm": 1.357176661491394, |
|
"learning_rate": 2.137037037037037e-05, |
|
"loss": 0.2481, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 5.885714285714286, |
|
"grad_norm": 1.846593976020813, |
|
"learning_rate": 2.1333333333333335e-05, |
|
"loss": 0.2641, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 5.892857142857143, |
|
"grad_norm": 2.4631927013397217, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.2832, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"grad_norm": 1.8715349435806274, |
|
"learning_rate": 2.1259259259259258e-05, |
|
"loss": 0.4157, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 5.9071428571428575, |
|
"grad_norm": 2.3173437118530273, |
|
"learning_rate": 2.1222222222222223e-05, |
|
"loss": 0.353, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 5.914285714285715, |
|
"grad_norm": 2.049422025680542, |
|
"learning_rate": 2.1185185185185184e-05, |
|
"loss": 0.2613, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 5.921428571428572, |
|
"grad_norm": 1.281841516494751, |
|
"learning_rate": 2.114814814814815e-05, |
|
"loss": 0.2287, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 5.928571428571429, |
|
"grad_norm": 1.007407546043396, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 0.2139, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 5.935714285714286, |
|
"grad_norm": 1.8036701679229736, |
|
"learning_rate": 2.1074074074074072e-05, |
|
"loss": 0.2511, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 5.942857142857143, |
|
"grad_norm": 0.9559861421585083, |
|
"learning_rate": 2.1037037037037037e-05, |
|
"loss": 0.3371, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"grad_norm": 2.136070489883423, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.2321, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 5.957142857142857, |
|
"grad_norm": 1.2442055940628052, |
|
"learning_rate": 2.0962962962962964e-05, |
|
"loss": 0.1819, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 5.964285714285714, |
|
"grad_norm": 2.0479979515075684, |
|
"learning_rate": 2.0925925925925925e-05, |
|
"loss": 0.3796, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 5.9714285714285715, |
|
"grad_norm": 1.6974670886993408, |
|
"learning_rate": 2.088888888888889e-05, |
|
"loss": 0.1947, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 5.978571428571429, |
|
"grad_norm": 2.1099231243133545, |
|
"learning_rate": 2.0851851851851852e-05, |
|
"loss": 0.1847, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 5.985714285714286, |
|
"grad_norm": 1.9181057214736938, |
|
"learning_rate": 2.0814814814814813e-05, |
|
"loss": 0.3513, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 5.992857142857143, |
|
"grad_norm": 1.0576838254928589, |
|
"learning_rate": 2.077777777777778e-05, |
|
"loss": 0.2663, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1283502578735352, |
|
"learning_rate": 2.074074074074074e-05, |
|
"loss": 0.2872, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 6.007142857142857, |
|
"grad_norm": 0.7001394629478455, |
|
"learning_rate": 2.0703703703703705e-05, |
|
"loss": 0.277, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 6.014285714285714, |
|
"grad_norm": 1.6374051570892334, |
|
"learning_rate": 2.0666666666666666e-05, |
|
"loss": 0.1849, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 6.021428571428571, |
|
"grad_norm": 1.674914836883545, |
|
"learning_rate": 2.0629629629629628e-05, |
|
"loss": 0.1756, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 6.0285714285714285, |
|
"grad_norm": 2.592038154602051, |
|
"learning_rate": 2.0592592592592593e-05, |
|
"loss": 0.3725, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 6.035714285714286, |
|
"grad_norm": 2.942992925643921, |
|
"learning_rate": 2.0555555555555555e-05, |
|
"loss": 0.2529, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 6.042857142857143, |
|
"grad_norm": 1.7580475807189941, |
|
"learning_rate": 2.051851851851852e-05, |
|
"loss": 0.1549, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 1.9032413959503174, |
|
"learning_rate": 2.048148148148148e-05, |
|
"loss": 0.2529, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 6.057142857142857, |
|
"grad_norm": 1.7678323984146118, |
|
"learning_rate": 2.0444444444444446e-05, |
|
"loss": 0.1935, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 6.064285714285714, |
|
"grad_norm": 1.7014952898025513, |
|
"learning_rate": 2.0407407407407408e-05, |
|
"loss": 0.1965, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 6.071428571428571, |
|
"grad_norm": 2.053157091140747, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.2045, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.078571428571428, |
|
"grad_norm": 2.448059320449829, |
|
"learning_rate": 2.0333333333333334e-05, |
|
"loss": 0.2275, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 6.085714285714285, |
|
"grad_norm": 1.3505144119262695, |
|
"learning_rate": 2.0296296296296296e-05, |
|
"loss": 0.192, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 6.0928571428571425, |
|
"grad_norm": 1.0717148780822754, |
|
"learning_rate": 2.025925925925926e-05, |
|
"loss": 0.3017, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 2.872880220413208, |
|
"learning_rate": 2.0222222222222222e-05, |
|
"loss": 0.2583, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 6.107142857142857, |
|
"grad_norm": 1.559588074684143, |
|
"learning_rate": 2.0185185185185187e-05, |
|
"loss": 0.1557, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 6.114285714285714, |
|
"grad_norm": 1.4375160932540894, |
|
"learning_rate": 2.014814814814815e-05, |
|
"loss": 0.1165, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 6.121428571428571, |
|
"grad_norm": 1.1922268867492676, |
|
"learning_rate": 2.011111111111111e-05, |
|
"loss": 0.1995, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 6.128571428571428, |
|
"grad_norm": 2.267056465148926, |
|
"learning_rate": 2.0074074074074075e-05, |
|
"loss": 0.2176, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 6.135714285714286, |
|
"grad_norm": 1.5485496520996094, |
|
"learning_rate": 2.0037037037037037e-05, |
|
"loss": 0.206, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 6.142857142857143, |
|
"grad_norm": 1.9538283348083496, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3173, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"grad_norm": 2.8216044902801514, |
|
"learning_rate": 1.9962962962962963e-05, |
|
"loss": 0.3077, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 6.1571428571428575, |
|
"grad_norm": 2.5293240547180176, |
|
"learning_rate": 1.9925925925925925e-05, |
|
"loss": 0.2829, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 6.164285714285715, |
|
"grad_norm": 1.7947183847427368, |
|
"learning_rate": 1.988888888888889e-05, |
|
"loss": 0.3212, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 6.171428571428572, |
|
"grad_norm": 1.541588544845581, |
|
"learning_rate": 1.985185185185185e-05, |
|
"loss": 0.1985, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 6.178571428571429, |
|
"grad_norm": 1.286007046699524, |
|
"learning_rate": 1.9814814814814816e-05, |
|
"loss": 0.279, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 6.185714285714286, |
|
"grad_norm": 1.8692234754562378, |
|
"learning_rate": 1.9777777777777778e-05, |
|
"loss": 0.303, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 6.192857142857143, |
|
"grad_norm": 1.7906513214111328, |
|
"learning_rate": 1.9740740740740743e-05, |
|
"loss": 0.218, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"grad_norm": 2.0737709999084473, |
|
"learning_rate": 1.9703703703703704e-05, |
|
"loss": 0.1559, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 6.207142857142857, |
|
"grad_norm": 1.8082749843597412, |
|
"learning_rate": 1.9666666666666666e-05, |
|
"loss": 0.2713, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 6.214285714285714, |
|
"grad_norm": 1.8988617658615112, |
|
"learning_rate": 1.962962962962963e-05, |
|
"loss": 0.2362, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 6.2214285714285715, |
|
"grad_norm": 0.8727281093597412, |
|
"learning_rate": 1.9592592592592592e-05, |
|
"loss": 0.2571, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 6.228571428571429, |
|
"grad_norm": 1.0203776359558105, |
|
"learning_rate": 1.9555555555555557e-05, |
|
"loss": 0.2884, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 6.235714285714286, |
|
"grad_norm": 1.5776811838150024, |
|
"learning_rate": 1.951851851851852e-05, |
|
"loss": 0.3115, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 6.242857142857143, |
|
"grad_norm": 2.1000545024871826, |
|
"learning_rate": 1.948148148148148e-05, |
|
"loss": 0.2936, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 1.991640329360962, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.2214, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 6.257142857142857, |
|
"grad_norm": 3.8238637447357178, |
|
"learning_rate": 1.9407407407407407e-05, |
|
"loss": 0.2738, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 6.264285714285714, |
|
"grad_norm": 2.818711042404175, |
|
"learning_rate": 1.9370370370370372e-05, |
|
"loss": 0.3107, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 6.271428571428571, |
|
"grad_norm": 1.4565989971160889, |
|
"learning_rate": 1.9333333333333333e-05, |
|
"loss": 0.1351, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 6.2785714285714285, |
|
"grad_norm": 1.6833415031433105, |
|
"learning_rate": 1.92962962962963e-05, |
|
"loss": 0.3359, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 6.285714285714286, |
|
"grad_norm": 3.662572145462036, |
|
"learning_rate": 1.925925925925926e-05, |
|
"loss": 0.2338, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 6.292857142857143, |
|
"grad_norm": 1.9166165590286255, |
|
"learning_rate": 1.922222222222222e-05, |
|
"loss": 0.1908, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"grad_norm": 2.665553331375122, |
|
"learning_rate": 1.9185185185185186e-05, |
|
"loss": 0.246, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 6.307142857142857, |
|
"grad_norm": 1.601194143295288, |
|
"learning_rate": 1.9148148148148148e-05, |
|
"loss": 0.2392, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 6.314285714285714, |
|
"grad_norm": 1.7382382154464722, |
|
"learning_rate": 1.9111111111111113e-05, |
|
"loss": 0.2919, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 6.321428571428571, |
|
"grad_norm": 1.0822237730026245, |
|
"learning_rate": 1.9074074074074075e-05, |
|
"loss": 0.1179, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 6.328571428571428, |
|
"grad_norm": 1.9691376686096191, |
|
"learning_rate": 1.903703703703704e-05, |
|
"loss": 0.3934, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 6.335714285714285, |
|
"grad_norm": 0.8395001292228699, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.2004, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 6.3428571428571425, |
|
"grad_norm": 1.6967720985412598, |
|
"learning_rate": 1.8962962962962963e-05, |
|
"loss": 0.204, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"grad_norm": 1.2601035833358765, |
|
"learning_rate": 1.8925925925925928e-05, |
|
"loss": 0.2769, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 6.357142857142857, |
|
"grad_norm": 1.560940146446228, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.1409, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 6.364285714285714, |
|
"grad_norm": 1.645814061164856, |
|
"learning_rate": 1.8851851851851854e-05, |
|
"loss": 0.1914, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 6.371428571428572, |
|
"grad_norm": 1.4886109828948975, |
|
"learning_rate": 1.8814814814814816e-05, |
|
"loss": 0.3517, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 6.378571428571428, |
|
"grad_norm": 1.2002378702163696, |
|
"learning_rate": 1.8777777777777777e-05, |
|
"loss": 0.2346, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 6.385714285714286, |
|
"grad_norm": 2.4492478370666504, |
|
"learning_rate": 1.8740740740740742e-05, |
|
"loss": 0.2104, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 6.392857142857143, |
|
"grad_norm": 2.315610408782959, |
|
"learning_rate": 1.8703703703703704e-05, |
|
"loss": 0.2321, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 2.116260528564453, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 0.2092, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 6.4071428571428575, |
|
"grad_norm": 1.7362505197525024, |
|
"learning_rate": 1.862962962962963e-05, |
|
"loss": 0.2598, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 6.414285714285715, |
|
"grad_norm": 2.1754469871520996, |
|
"learning_rate": 1.8592592592592595e-05, |
|
"loss": 0.3035, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 6.421428571428572, |
|
"grad_norm": 1.448285698890686, |
|
"learning_rate": 1.8555555555555557e-05, |
|
"loss": 0.227, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 6.428571428571429, |
|
"grad_norm": 1.888242483139038, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1618, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.428571428571429, |
|
"eval_loss": 0.3434120714664459, |
|
"eval_rouge1": 0.908, |
|
"eval_rouge2": 0.8486, |
|
"eval_rougeL": 0.9052, |
|
"eval_runtime": 122.2937, |
|
"eval_samples_per_second": 11.448, |
|
"eval_steps_per_second": 5.724, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.435714285714286, |
|
"grad_norm": 2.5552051067352295, |
|
"learning_rate": 1.8481481481481483e-05, |
|
"loss": 0.4376, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 6.442857142857143, |
|
"grad_norm": 2.0973517894744873, |
|
"learning_rate": 1.8444444444444445e-05, |
|
"loss": 0.2163, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"grad_norm": 1.3774244785308838, |
|
"learning_rate": 1.840740740740741e-05, |
|
"loss": 0.14, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 6.457142857142857, |
|
"grad_norm": 0.8735131025314331, |
|
"learning_rate": 1.837037037037037e-05, |
|
"loss": 0.1848, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 6.464285714285714, |
|
"grad_norm": 1.5088914632797241, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.2889, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 6.4714285714285715, |
|
"grad_norm": 1.0688769817352295, |
|
"learning_rate": 1.8296296296296298e-05, |
|
"loss": 0.1895, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 6.478571428571429, |
|
"grad_norm": 1.43760085105896, |
|
"learning_rate": 1.825925925925926e-05, |
|
"loss": 0.2997, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 6.485714285714286, |
|
"grad_norm": 1.1168969869613647, |
|
"learning_rate": 1.8222222222222224e-05, |
|
"loss": 0.3544, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 6.492857142857143, |
|
"grad_norm": 1.7139670848846436, |
|
"learning_rate": 1.8185185185185186e-05, |
|
"loss": 0.2108, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"grad_norm": 1.2584503889083862, |
|
"learning_rate": 1.814814814814815e-05, |
|
"loss": 0.2791, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 6.507142857142857, |
|
"grad_norm": 1.4440019130706787, |
|
"learning_rate": 1.8111111111111112e-05, |
|
"loss": 0.3745, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 6.514285714285714, |
|
"grad_norm": 2.3828232288360596, |
|
"learning_rate": 1.8074074074074074e-05, |
|
"loss": 0.2159, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 6.521428571428571, |
|
"grad_norm": 2.6553053855895996, |
|
"learning_rate": 1.803703703703704e-05, |
|
"loss": 0.3051, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 6.5285714285714285, |
|
"grad_norm": 2.0669426918029785, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.2166, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 6.535714285714286, |
|
"grad_norm": 1.4676064252853394, |
|
"learning_rate": 1.7962962962962965e-05, |
|
"loss": 0.2393, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 6.542857142857143, |
|
"grad_norm": 1.5158963203430176, |
|
"learning_rate": 1.7925925925925927e-05, |
|
"loss": 0.2821, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"grad_norm": 1.438550591468811, |
|
"learning_rate": 1.788888888888889e-05, |
|
"loss": 0.229, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 6.557142857142857, |
|
"grad_norm": 2.2161788940429688, |
|
"learning_rate": 1.7851851851851853e-05, |
|
"loss": 0.3705, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 6.564285714285714, |
|
"grad_norm": 1.472321629524231, |
|
"learning_rate": 1.7814814814814815e-05, |
|
"loss": 0.3977, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 6.571428571428571, |
|
"grad_norm": 1.957033395767212, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 0.2431, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 6.578571428571428, |
|
"grad_norm": 3.070905923843384, |
|
"learning_rate": 1.774074074074074e-05, |
|
"loss": 0.2676, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 6.585714285714285, |
|
"grad_norm": 2.240701198577881, |
|
"learning_rate": 1.7703703703703706e-05, |
|
"loss": 0.2346, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 6.5928571428571425, |
|
"grad_norm": 1.2726478576660156, |
|
"learning_rate": 1.7666666666666668e-05, |
|
"loss": 0.2624, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"grad_norm": 2.543856382369995, |
|
"learning_rate": 1.762962962962963e-05, |
|
"loss": 0.3137, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 6.607142857142857, |
|
"grad_norm": 2.1688966751098633, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.3366, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 6.614285714285714, |
|
"grad_norm": 1.9013522863388062, |
|
"learning_rate": 1.7555555555555556e-05, |
|
"loss": 0.1759, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 6.621428571428572, |
|
"grad_norm": 2.7567338943481445, |
|
"learning_rate": 1.751851851851852e-05, |
|
"loss": 0.2615, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 6.628571428571428, |
|
"grad_norm": 2.530351161956787, |
|
"learning_rate": 1.7481481481481483e-05, |
|
"loss": 0.363, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 6.635714285714286, |
|
"grad_norm": 3.0051562786102295, |
|
"learning_rate": 1.7444444444444448e-05, |
|
"loss": 0.2155, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 6.642857142857143, |
|
"grad_norm": 1.6199374198913574, |
|
"learning_rate": 1.740740740740741e-05, |
|
"loss": 0.1943, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"grad_norm": 2.2254199981689453, |
|
"learning_rate": 1.737037037037037e-05, |
|
"loss": 0.2086, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 6.6571428571428575, |
|
"grad_norm": 1.4565106630325317, |
|
"learning_rate": 1.7333333333333336e-05, |
|
"loss": 0.2113, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 6.664285714285715, |
|
"grad_norm": 1.8667312860488892, |
|
"learning_rate": 1.7296296296296297e-05, |
|
"loss": 0.1719, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 6.671428571428572, |
|
"grad_norm": 2.0462963581085205, |
|
"learning_rate": 1.7259259259259262e-05, |
|
"loss": 0.2307, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 6.678571428571429, |
|
"grad_norm": 1.5114613771438599, |
|
"learning_rate": 1.7222222222222224e-05, |
|
"loss": 0.2629, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 6.685714285714286, |
|
"grad_norm": 1.8743935823440552, |
|
"learning_rate": 1.7185185185185185e-05, |
|
"loss": 0.2656, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 6.692857142857143, |
|
"grad_norm": 1.6508034467697144, |
|
"learning_rate": 1.714814814814815e-05, |
|
"loss": 0.2971, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"grad_norm": 1.4109563827514648, |
|
"learning_rate": 1.7111111111111112e-05, |
|
"loss": 0.3155, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 6.707142857142857, |
|
"grad_norm": 1.9742975234985352, |
|
"learning_rate": 1.7074074074074077e-05, |
|
"loss": 0.2858, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 6.714285714285714, |
|
"grad_norm": 0.8593278527259827, |
|
"learning_rate": 1.7037037037037038e-05, |
|
"loss": 0.2484, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 6.7214285714285715, |
|
"grad_norm": 1.8331007957458496, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.2763, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 6.728571428571429, |
|
"grad_norm": 2.0606274604797363, |
|
"learning_rate": 1.6962962962962965e-05, |
|
"loss": 0.2016, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 6.735714285714286, |
|
"grad_norm": 1.244935154914856, |
|
"learning_rate": 1.6925925925925926e-05, |
|
"loss": 0.2161, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 6.742857142857143, |
|
"grad_norm": 2.0855889320373535, |
|
"learning_rate": 1.688888888888889e-05, |
|
"loss": 0.1961, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"grad_norm": 2.203310012817383, |
|
"learning_rate": 1.6851851851851853e-05, |
|
"loss": 0.1886, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 6.757142857142857, |
|
"grad_norm": 2.1254501342773438, |
|
"learning_rate": 1.6814814814814818e-05, |
|
"loss": 0.2824, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 6.764285714285714, |
|
"grad_norm": 1.498728632926941, |
|
"learning_rate": 1.677777777777778e-05, |
|
"loss": 0.2848, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 6.771428571428571, |
|
"grad_norm": 2.6205763816833496, |
|
"learning_rate": 1.674074074074074e-05, |
|
"loss": 0.2728, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 6.7785714285714285, |
|
"grad_norm": 1.6262216567993164, |
|
"learning_rate": 1.6703703703703706e-05, |
|
"loss": 0.4216, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 6.785714285714286, |
|
"grad_norm": 3.074489116668701, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.2084, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.792857142857143, |
|
"grad_norm": 1.8158230781555176, |
|
"learning_rate": 1.6629629629629632e-05, |
|
"loss": 0.1794, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"grad_norm": 2.069397449493408, |
|
"learning_rate": 1.6592592592592594e-05, |
|
"loss": 0.2363, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 6.807142857142857, |
|
"grad_norm": 1.8637501001358032, |
|
"learning_rate": 1.655555555555556e-05, |
|
"loss": 0.2203, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 6.814285714285714, |
|
"grad_norm": 2.043314218521118, |
|
"learning_rate": 1.651851851851852e-05, |
|
"loss": 0.2267, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 6.821428571428571, |
|
"grad_norm": 2.8327081203460693, |
|
"learning_rate": 1.6481481481481482e-05, |
|
"loss": 0.2793, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 6.828571428571428, |
|
"grad_norm": 2.3297407627105713, |
|
"learning_rate": 1.6444444444444447e-05, |
|
"loss": 0.2349, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 6.835714285714285, |
|
"grad_norm": 0.9220748543739319, |
|
"learning_rate": 1.640740740740741e-05, |
|
"loss": 0.1966, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 6.8428571428571425, |
|
"grad_norm": 1.5935183763504028, |
|
"learning_rate": 1.6370370370370374e-05, |
|
"loss": 0.3217, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"grad_norm": 0.9305605292320251, |
|
"learning_rate": 1.6333333333333335e-05, |
|
"loss": 0.1446, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"grad_norm": 2.0719094276428223, |
|
"learning_rate": 1.62962962962963e-05, |
|
"loss": 0.2195, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 6.864285714285714, |
|
"grad_norm": 1.9230345487594604, |
|
"learning_rate": 1.625925925925926e-05, |
|
"loss": 0.2031, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 6.871428571428572, |
|
"grad_norm": 1.7897018194198608, |
|
"learning_rate": 1.6222222222222223e-05, |
|
"loss": 0.1728, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 6.878571428571428, |
|
"grad_norm": 2.4588770866394043, |
|
"learning_rate": 1.6185185185185188e-05, |
|
"loss": 0.3253, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 6.885714285714286, |
|
"grad_norm": 1.2495237588882446, |
|
"learning_rate": 1.614814814814815e-05, |
|
"loss": 0.3539, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 6.892857142857143, |
|
"grad_norm": 3.161078453063965, |
|
"learning_rate": 1.6111111111111115e-05, |
|
"loss": 0.3598, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"grad_norm": 1.9474009275436401, |
|
"learning_rate": 1.6074074074074076e-05, |
|
"loss": 0.1385, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 6.9071428571428575, |
|
"grad_norm": 1.9687261581420898, |
|
"learning_rate": 1.6037037037037038e-05, |
|
"loss": 0.2375, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 6.914285714285715, |
|
"grad_norm": 1.87405264377594, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.3374, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 6.921428571428572, |
|
"grad_norm": 1.1928725242614746, |
|
"learning_rate": 1.5962962962962964e-05, |
|
"loss": 0.197, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 6.928571428571429, |
|
"grad_norm": 1.6434850692749023, |
|
"learning_rate": 1.5925925925925926e-05, |
|
"loss": 0.2757, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 6.935714285714286, |
|
"grad_norm": 1.1469305753707886, |
|
"learning_rate": 1.588888888888889e-05, |
|
"loss": 0.1793, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 6.942857142857143, |
|
"grad_norm": 2.436051368713379, |
|
"learning_rate": 1.5851851851851852e-05, |
|
"loss": 0.3178, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"grad_norm": 1.8960529565811157, |
|
"learning_rate": 1.5814814814814817e-05, |
|
"loss": 0.2182, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 6.957142857142857, |
|
"grad_norm": 1.755922794342041, |
|
"learning_rate": 1.577777777777778e-05, |
|
"loss": 0.3239, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 6.964285714285714, |
|
"grad_norm": 2.202697515487671, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.2116, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 6.9714285714285715, |
|
"grad_norm": 1.4491599798202515, |
|
"learning_rate": 1.5703703703703705e-05, |
|
"loss": 0.2329, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 6.978571428571429, |
|
"grad_norm": 9.212343215942383, |
|
"learning_rate": 1.5666666666666667e-05, |
|
"loss": 0.2573, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 6.985714285714286, |
|
"grad_norm": 1.2211856842041016, |
|
"learning_rate": 1.5629629629629632e-05, |
|
"loss": 0.2737, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 6.992857142857143, |
|
"grad_norm": 1.59877347946167, |
|
"learning_rate": 1.5592592592592593e-05, |
|
"loss": 0.284, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.295945882797241, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.2076, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 7.007142857142857, |
|
"grad_norm": 1.4388489723205566, |
|
"learning_rate": 1.551851851851852e-05, |
|
"loss": 0.2225, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 7.014285714285714, |
|
"grad_norm": 1.9146931171417236, |
|
"learning_rate": 1.548148148148148e-05, |
|
"loss": 0.2917, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 7.021428571428571, |
|
"grad_norm": 1.0212804079055786, |
|
"learning_rate": 1.5444444444444446e-05, |
|
"loss": 0.1537, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 7.0285714285714285, |
|
"grad_norm": 2.146648645401001, |
|
"learning_rate": 1.5407407407407408e-05, |
|
"loss": 0.178, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 7.035714285714286, |
|
"grad_norm": 2.4515628814697266, |
|
"learning_rate": 1.537037037037037e-05, |
|
"loss": 0.3043, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 7.042857142857143, |
|
"grad_norm": 1.6906862258911133, |
|
"learning_rate": 1.5333333333333334e-05, |
|
"loss": 0.2787, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 2.2019400596618652, |
|
"learning_rate": 1.5296296296296296e-05, |
|
"loss": 0.3236, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 7.057142857142857, |
|
"grad_norm": 1.3307303190231323, |
|
"learning_rate": 1.5259259259259258e-05, |
|
"loss": 0.1875, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 7.064285714285714, |
|
"grad_norm": 1.7358342409133911, |
|
"learning_rate": 1.5222222222222224e-05, |
|
"loss": 0.2149, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 7.071428571428571, |
|
"grad_norm": 2.0298547744750977, |
|
"learning_rate": 1.5185185185185186e-05, |
|
"loss": 0.1876, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 7.078571428571428, |
|
"grad_norm": 2.375779151916504, |
|
"learning_rate": 1.514814814814815e-05, |
|
"loss": 0.2289, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 7.085714285714285, |
|
"grad_norm": 1.856911540031433, |
|
"learning_rate": 1.5111111111111112e-05, |
|
"loss": 0.2029, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 7.0928571428571425, |
|
"grad_norm": 1.1523020267486572, |
|
"learning_rate": 1.5074074074074074e-05, |
|
"loss": 0.1753, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 1.4677330255508423, |
|
"learning_rate": 1.5037037037037039e-05, |
|
"loss": 0.2256, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 7.107142857142857, |
|
"grad_norm": 1.0742135047912598, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3844, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 7.114285714285714, |
|
"grad_norm": 1.4122258424758911, |
|
"learning_rate": 1.4962962962962965e-05, |
|
"loss": 0.1498, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 7.121428571428571, |
|
"grad_norm": 1.9363057613372803, |
|
"learning_rate": 1.4925925925925927e-05, |
|
"loss": 0.2721, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 7.128571428571428, |
|
"grad_norm": 0.7882018685340881, |
|
"learning_rate": 1.4888888888888888e-05, |
|
"loss": 0.2363, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 7.135714285714286, |
|
"grad_norm": 1.8561784029006958, |
|
"learning_rate": 1.4851851851851853e-05, |
|
"loss": 0.2875, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 1.9597991704940796, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1984, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"eval_loss": 0.34164437651634216, |
|
"eval_rouge1": 0.9089, |
|
"eval_rouge2": 0.8501, |
|
"eval_rougeL": 0.9063, |
|
"eval_runtime": 122.2974, |
|
"eval_samples_per_second": 11.448, |
|
"eval_steps_per_second": 5.724, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"grad_norm": 2.1984336376190186, |
|
"learning_rate": 1.477777777777778e-05, |
|
"loss": 0.2409, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 7.1571428571428575, |
|
"grad_norm": 1.1259089708328247, |
|
"learning_rate": 1.4740740740740741e-05, |
|
"loss": 0.1386, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 7.164285714285715, |
|
"grad_norm": 2.6497113704681396, |
|
"learning_rate": 1.4703703703703706e-05, |
|
"loss": 0.2999, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 7.171428571428572, |
|
"grad_norm": 2.7574968338012695, |
|
"learning_rate": 1.4666666666666668e-05, |
|
"loss": 0.2064, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 7.178571428571429, |
|
"grad_norm": 2.4020519256591797, |
|
"learning_rate": 1.462962962962963e-05, |
|
"loss": 0.2539, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 7.185714285714286, |
|
"grad_norm": 1.8728407621383667, |
|
"learning_rate": 1.4592592592592594e-05, |
|
"loss": 0.2264, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 7.192857142857143, |
|
"grad_norm": 3.187389612197876, |
|
"learning_rate": 1.4555555555555556e-05, |
|
"loss": 0.2175, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 1.6992945671081543, |
|
"learning_rate": 1.4518518518518521e-05, |
|
"loss": 0.1752, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 7.207142857142857, |
|
"grad_norm": 1.2373261451721191, |
|
"learning_rate": 1.4481481481481483e-05, |
|
"loss": 0.1712, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 7.214285714285714, |
|
"grad_norm": 1.3986244201660156, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 0.1727, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 7.2214285714285715, |
|
"grad_norm": 1.5018147230148315, |
|
"learning_rate": 1.4407407407407409e-05, |
|
"loss": 0.2309, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 7.228571428571429, |
|
"grad_norm": 1.8186851739883423, |
|
"learning_rate": 1.437037037037037e-05, |
|
"loss": 0.2741, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 7.235714285714286, |
|
"grad_norm": 2.8224360942840576, |
|
"learning_rate": 1.4333333333333334e-05, |
|
"loss": 0.2244, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 7.242857142857143, |
|
"grad_norm": 1.900585412979126, |
|
"learning_rate": 1.4296296296296297e-05, |
|
"loss": 0.2527, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"grad_norm": 2.4210896492004395, |
|
"learning_rate": 1.425925925925926e-05, |
|
"loss": 0.3832, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 7.257142857142857, |
|
"grad_norm": 1.2783209085464478, |
|
"learning_rate": 1.4222222222222224e-05, |
|
"loss": 0.1756, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 7.264285714285714, |
|
"grad_norm": 1.4387212991714478, |
|
"learning_rate": 1.4185185185185185e-05, |
|
"loss": 0.2567, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 7.271428571428571, |
|
"grad_norm": 2.861311435699463, |
|
"learning_rate": 1.4148148148148148e-05, |
|
"loss": 0.3683, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 7.2785714285714285, |
|
"grad_norm": 0.8701191544532776, |
|
"learning_rate": 1.4111111111111112e-05, |
|
"loss": 0.2229, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 7.285714285714286, |
|
"grad_norm": 2.103231430053711, |
|
"learning_rate": 1.4074074074074075e-05, |
|
"loss": 0.1652, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 7.292857142857143, |
|
"grad_norm": 3.0958895683288574, |
|
"learning_rate": 1.4037037037037038e-05, |
|
"loss": 0.3048, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"grad_norm": 1.0370267629623413, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.243, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 7.307142857142857, |
|
"grad_norm": 1.334799885749817, |
|
"learning_rate": 1.3962962962962963e-05, |
|
"loss": 0.2242, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 7.314285714285714, |
|
"grad_norm": 1.797135829925537, |
|
"learning_rate": 1.3925925925925926e-05, |
|
"loss": 0.1916, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 7.321428571428571, |
|
"grad_norm": 0.7588611841201782, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.2548, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 7.328571428571428, |
|
"grad_norm": 1.9136377573013306, |
|
"learning_rate": 1.3851851851851853e-05, |
|
"loss": 0.2373, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 7.335714285714285, |
|
"grad_norm": 1.326635479927063, |
|
"learning_rate": 1.3814814814814816e-05, |
|
"loss": 0.2597, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 7.3428571428571425, |
|
"grad_norm": 2.400609016418457, |
|
"learning_rate": 1.3777777777777778e-05, |
|
"loss": 0.2648, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"grad_norm": 0.9898678064346313, |
|
"learning_rate": 1.3740740740740741e-05, |
|
"loss": 0.2304, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 7.357142857142857, |
|
"grad_norm": 0.7826656103134155, |
|
"learning_rate": 1.3703703703703704e-05, |
|
"loss": 0.1209, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 7.364285714285714, |
|
"grad_norm": 1.083044409751892, |
|
"learning_rate": 1.3666666666666666e-05, |
|
"loss": 0.2026, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 7.371428571428572, |
|
"grad_norm": 1.283219814300537, |
|
"learning_rate": 1.362962962962963e-05, |
|
"loss": 0.2018, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 7.378571428571428, |
|
"grad_norm": 1.9941823482513428, |
|
"learning_rate": 1.3592592592592592e-05, |
|
"loss": 0.2469, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 7.385714285714286, |
|
"grad_norm": 2.553957462310791, |
|
"learning_rate": 1.3555555555555557e-05, |
|
"loss": 0.2661, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 7.392857142857143, |
|
"grad_norm": 1.657182216644287, |
|
"learning_rate": 1.3518518518518519e-05, |
|
"loss": 0.2762, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"grad_norm": 1.6704496145248413, |
|
"learning_rate": 1.348148148148148e-05, |
|
"loss": 0.222, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 7.4071428571428575, |
|
"grad_norm": 1.338329792022705, |
|
"learning_rate": 1.3444444444444445e-05, |
|
"loss": 0.2658, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 7.414285714285715, |
|
"grad_norm": 1.9741250276565552, |
|
"learning_rate": 1.3407407407407407e-05, |
|
"loss": 0.2596, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 7.421428571428572, |
|
"grad_norm": 2.523958444595337, |
|
"learning_rate": 1.3370370370370372e-05, |
|
"loss": 0.1553, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 7.428571428571429, |
|
"grad_norm": 2.260690450668335, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.2942, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 7.435714285714286, |
|
"grad_norm": 2.441620349884033, |
|
"learning_rate": 1.3296296296296295e-05, |
|
"loss": 0.2451, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 7.442857142857143, |
|
"grad_norm": 0.7054124474525452, |
|
"learning_rate": 1.325925925925926e-05, |
|
"loss": 0.1862, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"grad_norm": 1.6281330585479736, |
|
"learning_rate": 1.3222222222222221e-05, |
|
"loss": 0.1714, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 7.457142857142857, |
|
"grad_norm": 1.738685965538025, |
|
"learning_rate": 1.3185185185185186e-05, |
|
"loss": 0.2052, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 7.464285714285714, |
|
"grad_norm": 1.9982494115829468, |
|
"learning_rate": 1.3148148148148148e-05, |
|
"loss": 0.2964, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 7.4714285714285715, |
|
"grad_norm": 1.0081127882003784, |
|
"learning_rate": 1.3111111111111113e-05, |
|
"loss": 0.1956, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 7.478571428571429, |
|
"grad_norm": 1.5927938222885132, |
|
"learning_rate": 1.3074074074074074e-05, |
|
"loss": 0.236, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 7.485714285714286, |
|
"grad_norm": 1.7959505319595337, |
|
"learning_rate": 1.3037037037037036e-05, |
|
"loss": 0.2462, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 7.492857142857143, |
|
"grad_norm": 0.9230768084526062, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.177, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 1.8714969158172607, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.2224, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.507142857142857, |
|
"grad_norm": 2.809420585632324, |
|
"learning_rate": 1.2925925925925927e-05, |
|
"loss": 0.2214, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 7.514285714285714, |
|
"grad_norm": 2.2183682918548584, |
|
"learning_rate": 1.2888888888888889e-05, |
|
"loss": 0.2528, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 7.521428571428571, |
|
"grad_norm": 2.108675479888916, |
|
"learning_rate": 1.2851851851851854e-05, |
|
"loss": 0.2593, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 7.5285714285714285, |
|
"grad_norm": 1.9557310342788696, |
|
"learning_rate": 1.2814814814814815e-05, |
|
"loss": 0.1875, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 7.535714285714286, |
|
"grad_norm": 2.3365464210510254, |
|
"learning_rate": 1.2777777777777777e-05, |
|
"loss": 0.2232, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 7.542857142857143, |
|
"grad_norm": 1.4713023900985718, |
|
"learning_rate": 1.2740740740740742e-05, |
|
"loss": 0.2592, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"grad_norm": 2.5241403579711914, |
|
"learning_rate": 1.2703703703703704e-05, |
|
"loss": 0.2632, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 7.557142857142857, |
|
"grad_norm": 2.726618528366089, |
|
"learning_rate": 1.2666666666666668e-05, |
|
"loss": 0.2068, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 7.564285714285714, |
|
"grad_norm": 1.2947627305984497, |
|
"learning_rate": 1.262962962962963e-05, |
|
"loss": 0.1846, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 7.571428571428571, |
|
"grad_norm": 1.4739402532577515, |
|
"learning_rate": 1.2592592592592592e-05, |
|
"loss": 0.1445, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 7.578571428571428, |
|
"grad_norm": 1.7607239484786987, |
|
"learning_rate": 1.2555555555555557e-05, |
|
"loss": 0.2087, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 7.585714285714285, |
|
"grad_norm": 1.510556697845459, |
|
"learning_rate": 1.2518518518518518e-05, |
|
"loss": 0.2356, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 7.5928571428571425, |
|
"grad_norm": 1.4189872741699219, |
|
"learning_rate": 1.2481481481481481e-05, |
|
"loss": 0.2343, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": 3.209477424621582, |
|
"learning_rate": 1.2444444444444445e-05, |
|
"loss": 0.2131, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 7.607142857142857, |
|
"grad_norm": 2.026301145553589, |
|
"learning_rate": 1.2407407407407408e-05, |
|
"loss": 0.3643, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 7.614285714285714, |
|
"grad_norm": 2.355459451675415, |
|
"learning_rate": 1.2370370370370371e-05, |
|
"loss": 0.2382, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 7.621428571428572, |
|
"grad_norm": 1.6867364645004272, |
|
"learning_rate": 1.2333333333333334e-05, |
|
"loss": 0.1886, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 7.628571428571428, |
|
"grad_norm": 1.863373041152954, |
|
"learning_rate": 1.2296296296296298e-05, |
|
"loss": 0.2842, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 7.635714285714286, |
|
"grad_norm": 1.4037106037139893, |
|
"learning_rate": 1.225925925925926e-05, |
|
"loss": 0.297, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 7.642857142857143, |
|
"grad_norm": 1.2220287322998047, |
|
"learning_rate": 1.2222222222222222e-05, |
|
"loss": 0.2126, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"grad_norm": 1.796430230140686, |
|
"learning_rate": 1.2185185185185186e-05, |
|
"loss": 0.2494, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 7.6571428571428575, |
|
"grad_norm": 2.7764432430267334, |
|
"learning_rate": 1.2148148148148149e-05, |
|
"loss": 0.3102, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 7.664285714285715, |
|
"grad_norm": 2.2875261306762695, |
|
"learning_rate": 1.2111111111111112e-05, |
|
"loss": 0.2171, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 7.671428571428572, |
|
"grad_norm": 1.3017419576644897, |
|
"learning_rate": 1.2074074074074075e-05, |
|
"loss": 0.2095, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 7.678571428571429, |
|
"grad_norm": 1.5423152446746826, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.3183, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 7.685714285714286, |
|
"grad_norm": 2.0346460342407227, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.1669, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 7.692857142857143, |
|
"grad_norm": 1.178389549255371, |
|
"learning_rate": 1.1962962962962964e-05, |
|
"loss": 0.3195, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"grad_norm": 1.3902812004089355, |
|
"learning_rate": 1.1925925925925927e-05, |
|
"loss": 0.2166, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 7.707142857142857, |
|
"grad_norm": 2.8894922733306885, |
|
"learning_rate": 1.188888888888889e-05, |
|
"loss": 0.2216, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 7.714285714285714, |
|
"grad_norm": 2.77864670753479, |
|
"learning_rate": 1.1851851851851853e-05, |
|
"loss": 0.224, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 7.7214285714285715, |
|
"grad_norm": 1.370814323425293, |
|
"learning_rate": 1.1814814814814817e-05, |
|
"loss": 0.2277, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 7.728571428571429, |
|
"grad_norm": 1.3543068170547485, |
|
"learning_rate": 1.1777777777777778e-05, |
|
"loss": 0.19, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 7.735714285714286, |
|
"grad_norm": 2.4707486629486084, |
|
"learning_rate": 1.1740740740740741e-05, |
|
"loss": 0.1951, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 7.742857142857143, |
|
"grad_norm": 2.284876823425293, |
|
"learning_rate": 1.1703703703703705e-05, |
|
"loss": 0.2206, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"grad_norm": 1.1018098592758179, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.1386, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 7.757142857142857, |
|
"grad_norm": 1.9555597305297852, |
|
"learning_rate": 1.1629629629629631e-05, |
|
"loss": 0.1645, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 7.764285714285714, |
|
"grad_norm": 1.1327388286590576, |
|
"learning_rate": 1.1592592592592594e-05, |
|
"loss": 0.1654, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 7.771428571428571, |
|
"grad_norm": 1.6210479736328125, |
|
"learning_rate": 1.1555555555555556e-05, |
|
"loss": 0.1937, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 7.7785714285714285, |
|
"grad_norm": 1.7142146825790405, |
|
"learning_rate": 1.151851851851852e-05, |
|
"loss": 0.1722, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 7.785714285714286, |
|
"grad_norm": 3.529614210128784, |
|
"learning_rate": 1.1481481481481482e-05, |
|
"loss": 0.2414, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 7.792857142857143, |
|
"grad_norm": 1.6331572532653809, |
|
"learning_rate": 1.1444444444444446e-05, |
|
"loss": 0.1844, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"grad_norm": 2.7431063652038574, |
|
"learning_rate": 1.1407407407407409e-05, |
|
"loss": 0.2745, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 7.807142857142857, |
|
"grad_norm": 2.882291316986084, |
|
"learning_rate": 1.1370370370370372e-05, |
|
"loss": 0.2353, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 7.814285714285714, |
|
"grad_norm": 2.3573696613311768, |
|
"learning_rate": 1.1333333333333334e-05, |
|
"loss": 0.2047, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 7.821428571428571, |
|
"grad_norm": 1.370251178741455, |
|
"learning_rate": 1.1296296296296297e-05, |
|
"loss": 0.1855, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 7.828571428571428, |
|
"grad_norm": 1.1445660591125488, |
|
"learning_rate": 1.125925925925926e-05, |
|
"loss": 0.2316, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 7.835714285714285, |
|
"grad_norm": 2.094175100326538, |
|
"learning_rate": 1.1222222222222224e-05, |
|
"loss": 0.477, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 7.8428571428571425, |
|
"grad_norm": 2.1148130893707275, |
|
"learning_rate": 1.1185185185185187e-05, |
|
"loss": 0.2581, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"grad_norm": 1.89934241771698, |
|
"learning_rate": 1.1148148148148148e-05, |
|
"loss": 0.2908, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 7.857142857142857, |
|
"grad_norm": 1.6785616874694824, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.2222, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.857142857142857, |
|
"eval_loss": 0.34114697575569153, |
|
"eval_rouge1": 0.91, |
|
"eval_rouge2": 0.8517, |
|
"eval_rougeL": 0.9073, |
|
"eval_runtime": 122.2351, |
|
"eval_samples_per_second": 11.453, |
|
"eval_steps_per_second": 5.727, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.864285714285714, |
|
"grad_norm": 2.335857629776001, |
|
"learning_rate": 1.1074074074074075e-05, |
|
"loss": 0.262, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 7.871428571428572, |
|
"grad_norm": 1.9699100255966187, |
|
"learning_rate": 1.1037037037037038e-05, |
|
"loss": 0.2056, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 7.878571428571428, |
|
"grad_norm": 0.8576107025146484, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.1929, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 7.885714285714286, |
|
"grad_norm": 0.7365075945854187, |
|
"learning_rate": 1.0962962962962963e-05, |
|
"loss": 0.2494, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 7.892857142857143, |
|
"grad_norm": 2.7551143169403076, |
|
"learning_rate": 1.0925925925925926e-05, |
|
"loss": 0.3239, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"grad_norm": 1.1980743408203125, |
|
"learning_rate": 1.088888888888889e-05, |
|
"loss": 0.1836, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 7.9071428571428575, |
|
"grad_norm": 1.525614619255066, |
|
"learning_rate": 1.0851851851851853e-05, |
|
"loss": 0.3089, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 7.914285714285715, |
|
"grad_norm": 2.0727596282958984, |
|
"learning_rate": 1.0814814814814814e-05, |
|
"loss": 0.1867, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 7.921428571428572, |
|
"grad_norm": 1.242550015449524, |
|
"learning_rate": 1.0777777777777778e-05, |
|
"loss": 0.222, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 7.928571428571429, |
|
"grad_norm": 1.3312640190124512, |
|
"learning_rate": 1.074074074074074e-05, |
|
"loss": 0.3334, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 7.935714285714286, |
|
"grad_norm": 1.4483474493026733, |
|
"learning_rate": 1.0703703703703704e-05, |
|
"loss": 0.2966, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 7.942857142857143, |
|
"grad_norm": 1.5403432846069336, |
|
"learning_rate": 1.0666666666666667e-05, |
|
"loss": 0.2591, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"grad_norm": 1.6620547771453857, |
|
"learning_rate": 1.0629629629629629e-05, |
|
"loss": 0.2216, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 7.957142857142857, |
|
"grad_norm": 1.2060527801513672, |
|
"learning_rate": 1.0592592592592592e-05, |
|
"loss": 0.2403, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 7.964285714285714, |
|
"grad_norm": 1.4476624727249146, |
|
"learning_rate": 1.0555555555555555e-05, |
|
"loss": 0.2739, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 7.9714285714285715, |
|
"grad_norm": 2.2425661087036133, |
|
"learning_rate": 1.0518518518518519e-05, |
|
"loss": 0.2981, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 7.978571428571429, |
|
"grad_norm": 2.2100632190704346, |
|
"learning_rate": 1.0481481481481482e-05, |
|
"loss": 0.2178, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 7.985714285714286, |
|
"grad_norm": 1.080759882926941, |
|
"learning_rate": 1.0444444444444445e-05, |
|
"loss": 0.2947, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 7.992857142857143, |
|
"grad_norm": 2.9972569942474365, |
|
"learning_rate": 1.0407407407407407e-05, |
|
"loss": 0.203, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.7628710269927979, |
|
"learning_rate": 1.037037037037037e-05, |
|
"loss": 0.308, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 8.007142857142858, |
|
"grad_norm": 1.569732666015625, |
|
"learning_rate": 1.0333333333333333e-05, |
|
"loss": 0.2003, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 8.014285714285714, |
|
"grad_norm": 0.8212767839431763, |
|
"learning_rate": 1.0296296296296296e-05, |
|
"loss": 0.1957, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 8.021428571428572, |
|
"grad_norm": 1.7055811882019043, |
|
"learning_rate": 1.025925925925926e-05, |
|
"loss": 0.2274, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 8.028571428571428, |
|
"grad_norm": 1.6140356063842773, |
|
"learning_rate": 1.0222222222222223e-05, |
|
"loss": 0.2182, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 8.035714285714286, |
|
"grad_norm": 1.6154979467391968, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.1217, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 8.042857142857143, |
|
"grad_norm": 2.4283053874969482, |
|
"learning_rate": 1.0148148148148148e-05, |
|
"loss": 0.2687, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 2.201909065246582, |
|
"learning_rate": 1.0111111111111111e-05, |
|
"loss": 0.2836, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 8.057142857142857, |
|
"grad_norm": 1.902273178100586, |
|
"learning_rate": 1.0074074074074074e-05, |
|
"loss": 0.2076, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 8.064285714285715, |
|
"grad_norm": 1.7851389646530151, |
|
"learning_rate": 1.0037037037037038e-05, |
|
"loss": 0.2045, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 8.071428571428571, |
|
"grad_norm": 1.1988000869750977, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1832, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 8.07857142857143, |
|
"grad_norm": 0.6530731320381165, |
|
"learning_rate": 9.962962962962962e-06, |
|
"loss": 0.1788, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 8.085714285714285, |
|
"grad_norm": 1.5712918043136597, |
|
"learning_rate": 9.925925925925926e-06, |
|
"loss": 0.1601, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 8.092857142857143, |
|
"grad_norm": 1.3664653301239014, |
|
"learning_rate": 9.888888888888889e-06, |
|
"loss": 0.2745, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 1.084404706954956, |
|
"learning_rate": 9.851851851851852e-06, |
|
"loss": 0.2595, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 8.107142857142858, |
|
"grad_norm": 1.9523823261260986, |
|
"learning_rate": 9.814814814814815e-06, |
|
"loss": 0.2105, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 8.114285714285714, |
|
"grad_norm": 1.2386913299560547, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.1199, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 8.121428571428572, |
|
"grad_norm": 1.6026146411895752, |
|
"learning_rate": 9.74074074074074e-06, |
|
"loss": 0.2419, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 8.128571428571428, |
|
"grad_norm": 1.3624472618103027, |
|
"learning_rate": 9.703703703703703e-06, |
|
"loss": 0.2094, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 8.135714285714286, |
|
"grad_norm": 1.7777026891708374, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.2033, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 8.142857142857142, |
|
"grad_norm": 1.5583858489990234, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 0.2229, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"grad_norm": 0.7537804841995239, |
|
"learning_rate": 9.592592592592593e-06, |
|
"loss": 0.2201, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 8.157142857142857, |
|
"grad_norm": 1.3313623666763306, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.1802, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 8.164285714285715, |
|
"grad_norm": 2.136382818222046, |
|
"learning_rate": 9.51851851851852e-06, |
|
"loss": 0.1686, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 8.17142857142857, |
|
"grad_norm": 1.4156885147094727, |
|
"learning_rate": 9.481481481481481e-06, |
|
"loss": 0.2291, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 8.178571428571429, |
|
"grad_norm": 1.5296056270599365, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.2476, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 8.185714285714285, |
|
"grad_norm": 1.5632902383804321, |
|
"learning_rate": 9.407407407407408e-06, |
|
"loss": 0.2304, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 8.192857142857143, |
|
"grad_norm": 0.9542272686958313, |
|
"learning_rate": 9.370370370370371e-06, |
|
"loss": 0.1693, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"grad_norm": 3.356255292892456, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.3996, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 8.207142857142857, |
|
"grad_norm": 1.6759045124053955, |
|
"learning_rate": 9.296296296296298e-06, |
|
"loss": 0.2134, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 8.214285714285714, |
|
"grad_norm": 1.2791472673416138, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.248, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 8.221428571428572, |
|
"grad_norm": 1.07367742061615, |
|
"learning_rate": 9.222222222222222e-06, |
|
"loss": 0.1731, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 8.228571428571428, |
|
"grad_norm": 0.5693773627281189, |
|
"learning_rate": 9.185185185185186e-06, |
|
"loss": 0.2758, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 8.235714285714286, |
|
"grad_norm": 1.7820035219192505, |
|
"learning_rate": 9.148148148148149e-06, |
|
"loss": 0.2147, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 8.242857142857142, |
|
"grad_norm": 1.6593891382217407, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.2481, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"grad_norm": 1.4112298488616943, |
|
"learning_rate": 9.074074074074075e-06, |
|
"loss": 0.3307, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 8.257142857142856, |
|
"grad_norm": 1.0225239992141724, |
|
"learning_rate": 9.037037037037037e-06, |
|
"loss": 0.1092, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 8.264285714285714, |
|
"grad_norm": 1.4713934659957886, |
|
"learning_rate": 9e-06, |
|
"loss": 0.2332, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 8.271428571428572, |
|
"grad_norm": 1.6922743320465088, |
|
"learning_rate": 8.962962962962963e-06, |
|
"loss": 0.2106, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 8.278571428571428, |
|
"grad_norm": 2.4201695919036865, |
|
"learning_rate": 8.925925925925927e-06, |
|
"loss": 0.1746, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 8.285714285714286, |
|
"grad_norm": 2.3649351596832275, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 0.2204, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 8.292857142857143, |
|
"grad_norm": 1.0867241621017456, |
|
"learning_rate": 8.851851851851853e-06, |
|
"loss": 0.2246, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"grad_norm": 1.1103533506393433, |
|
"learning_rate": 8.814814814814815e-06, |
|
"loss": 0.2952, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 8.307142857142857, |
|
"grad_norm": 1.9086233377456665, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 0.1627, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 8.314285714285715, |
|
"grad_norm": 1.5733546018600464, |
|
"learning_rate": 8.740740740740741e-06, |
|
"loss": 0.2493, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 8.321428571428571, |
|
"grad_norm": 1.514758586883545, |
|
"learning_rate": 8.703703703703705e-06, |
|
"loss": 0.1708, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 8.32857142857143, |
|
"grad_norm": 4.691562175750732, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.2622, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 8.335714285714285, |
|
"grad_norm": 1.0987350940704346, |
|
"learning_rate": 8.629629629629631e-06, |
|
"loss": 0.1291, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 8.342857142857143, |
|
"grad_norm": 1.4016727209091187, |
|
"learning_rate": 8.592592592592593e-06, |
|
"loss": 0.2075, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"grad_norm": 0.69717937707901, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 0.1609, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 8.357142857142858, |
|
"grad_norm": 2.023461103439331, |
|
"learning_rate": 8.518518518518519e-06, |
|
"loss": 0.235, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 8.364285714285714, |
|
"grad_norm": 2.0078303813934326, |
|
"learning_rate": 8.481481481481482e-06, |
|
"loss": 0.2081, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 8.371428571428572, |
|
"grad_norm": 1.4724724292755127, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.2148, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 8.378571428571428, |
|
"grad_norm": 2.9564125537872314, |
|
"learning_rate": 8.407407407407409e-06, |
|
"loss": 0.2295, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 8.385714285714286, |
|
"grad_norm": 1.4059520959854126, |
|
"learning_rate": 8.37037037037037e-06, |
|
"loss": 0.1398, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 8.392857142857142, |
|
"grad_norm": 2.894953966140747, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.2699, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 2.5017454624176025, |
|
"learning_rate": 8.296296296296297e-06, |
|
"loss": 0.2322, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 8.407142857142857, |
|
"grad_norm": 0.9503372311592102, |
|
"learning_rate": 8.25925925925926e-06, |
|
"loss": 0.1356, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 8.414285714285715, |
|
"grad_norm": 1.63711416721344, |
|
"learning_rate": 8.222222222222223e-06, |
|
"loss": 0.1929, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 8.42142857142857, |
|
"grad_norm": 0.5972274541854858, |
|
"learning_rate": 8.185185185185187e-06, |
|
"loss": 0.2533, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 8.428571428571429, |
|
"grad_norm": 1.0893709659576416, |
|
"learning_rate": 8.14814814814815e-06, |
|
"loss": 0.2089, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 8.435714285714285, |
|
"grad_norm": 1.5523369312286377, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 0.1589, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 8.442857142857143, |
|
"grad_norm": 1.5510472059249878, |
|
"learning_rate": 8.074074074074075e-06, |
|
"loss": 0.2162, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"grad_norm": 2.1272058486938477, |
|
"learning_rate": 8.037037037037038e-06, |
|
"loss": 0.2172, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 8.457142857142857, |
|
"grad_norm": 2.2862300872802734, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2341, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 8.464285714285714, |
|
"grad_norm": 1.126405954360962, |
|
"learning_rate": 7.962962962962963e-06, |
|
"loss": 0.1474, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 8.471428571428572, |
|
"grad_norm": 2.000462293624878, |
|
"learning_rate": 7.925925925925926e-06, |
|
"loss": 0.2948, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 8.478571428571428, |
|
"grad_norm": 1.4239530563354492, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 0.2267, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 8.485714285714286, |
|
"grad_norm": 2.479355812072754, |
|
"learning_rate": 7.851851851851853e-06, |
|
"loss": 0.1857, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 8.492857142857144, |
|
"grad_norm": 1.9741036891937256, |
|
"learning_rate": 7.814814814814816e-06, |
|
"loss": 0.2049, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"grad_norm": 1.996099591255188, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 0.2404, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 8.507142857142856, |
|
"grad_norm": 1.185811161994934, |
|
"learning_rate": 7.74074074074074e-06, |
|
"loss": 0.1476, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 8.514285714285714, |
|
"grad_norm": 1.7766746282577515, |
|
"learning_rate": 7.703703703703704e-06, |
|
"loss": 0.1483, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 8.521428571428572, |
|
"grad_norm": 1.0609338283538818, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.3414, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 8.528571428571428, |
|
"grad_norm": 1.352981448173523, |
|
"learning_rate": 7.629629629629629e-06, |
|
"loss": 0.2453, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 8.535714285714286, |
|
"grad_norm": 2.01450252532959, |
|
"learning_rate": 7.592592592592593e-06, |
|
"loss": 0.0983, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 8.542857142857143, |
|
"grad_norm": 0.9895955920219421, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.2821, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"grad_norm": 2.1095712184906006, |
|
"learning_rate": 7.518518518518519e-06, |
|
"loss": 0.2147, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 8.557142857142857, |
|
"grad_norm": 1.3148187398910522, |
|
"learning_rate": 7.481481481481483e-06, |
|
"loss": 0.1834, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 8.564285714285715, |
|
"grad_norm": 2.1209769248962402, |
|
"learning_rate": 7.444444444444444e-06, |
|
"loss": 0.2369, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 2.559124708175659, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.3642, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"eval_loss": 0.3329981863498688, |
|
"eval_rouge1": 0.9114, |
|
"eval_rouge2": 0.8538, |
|
"eval_rougeL": 0.9087, |
|
"eval_runtime": 122.2883, |
|
"eval_samples_per_second": 11.448, |
|
"eval_steps_per_second": 5.724, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.57857142857143, |
|
"grad_norm": 2.582113027572632, |
|
"learning_rate": 7.370370370370371e-06, |
|
"loss": 0.3589, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 8.585714285714285, |
|
"grad_norm": 1.1606924533843994, |
|
"learning_rate": 7.333333333333334e-06, |
|
"loss": 0.2036, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 8.592857142857143, |
|
"grad_norm": 1.990123987197876, |
|
"learning_rate": 7.296296296296297e-06, |
|
"loss": 0.2347, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"grad_norm": 1.3593547344207764, |
|
"learning_rate": 7.2592592592592605e-06, |
|
"loss": 0.1475, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 8.607142857142858, |
|
"grad_norm": 0.7821537256240845, |
|
"learning_rate": 7.222222222222222e-06, |
|
"loss": 0.2889, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 8.614285714285714, |
|
"grad_norm": 2.2297816276550293, |
|
"learning_rate": 7.185185185185185e-06, |
|
"loss": 0.2495, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 8.621428571428572, |
|
"grad_norm": 1.9694111347198486, |
|
"learning_rate": 7.1481481481481486e-06, |
|
"loss": 0.2182, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 8.628571428571428, |
|
"grad_norm": 1.3611793518066406, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.2116, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 8.635714285714286, |
|
"grad_norm": 0.9108260869979858, |
|
"learning_rate": 7.074074074074074e-06, |
|
"loss": 0.2229, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 8.642857142857142, |
|
"grad_norm": 2.577470302581787, |
|
"learning_rate": 7.0370370370370375e-06, |
|
"loss": 0.3132, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"grad_norm": 0.837846040725708, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.1791, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 8.657142857142857, |
|
"grad_norm": 2.7253611087799072, |
|
"learning_rate": 6.962962962962963e-06, |
|
"loss": 0.1998, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 8.664285714285715, |
|
"grad_norm": 1.6284639835357666, |
|
"learning_rate": 6.925925925925926e-06, |
|
"loss": 0.2917, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 8.67142857142857, |
|
"grad_norm": 1.0358765125274658, |
|
"learning_rate": 6.888888888888889e-06, |
|
"loss": 0.2949, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 8.678571428571429, |
|
"grad_norm": 1.9100443124771118, |
|
"learning_rate": 6.851851851851852e-06, |
|
"loss": 0.1639, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 8.685714285714285, |
|
"grad_norm": 1.4746047258377075, |
|
"learning_rate": 6.814814814814815e-06, |
|
"loss": 0.1798, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 8.692857142857143, |
|
"grad_norm": 1.1177834272384644, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 0.2687, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"grad_norm": 0.8141186833381653, |
|
"learning_rate": 6.74074074074074e-06, |
|
"loss": 0.1627, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 8.707142857142857, |
|
"grad_norm": 1.4017144441604614, |
|
"learning_rate": 6.703703703703703e-06, |
|
"loss": 0.1938, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 8.714285714285714, |
|
"grad_norm": 0.7198919653892517, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.222, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 8.721428571428572, |
|
"grad_norm": 1.4436593055725098, |
|
"learning_rate": 6.62962962962963e-06, |
|
"loss": 0.2821, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 8.728571428571428, |
|
"grad_norm": 1.8222748041152954, |
|
"learning_rate": 6.592592592592593e-06, |
|
"loss": 0.3652, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 8.735714285714286, |
|
"grad_norm": 1.033396601676941, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 0.1775, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 8.742857142857144, |
|
"grad_norm": 1.3893368244171143, |
|
"learning_rate": 6.518518518518518e-06, |
|
"loss": 0.3292, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"grad_norm": 1.7294946908950806, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.3136, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 8.757142857142856, |
|
"grad_norm": 1.5931612253189087, |
|
"learning_rate": 6.4444444444444445e-06, |
|
"loss": 0.2498, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 8.764285714285714, |
|
"grad_norm": 1.6048915386199951, |
|
"learning_rate": 6.407407407407408e-06, |
|
"loss": 0.2996, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 8.771428571428572, |
|
"grad_norm": 0.9885932803153992, |
|
"learning_rate": 6.370370370370371e-06, |
|
"loss": 0.1647, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 8.778571428571428, |
|
"grad_norm": 4.228821277618408, |
|
"learning_rate": 6.333333333333334e-06, |
|
"loss": 0.2533, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 8.785714285714286, |
|
"grad_norm": 2.150721788406372, |
|
"learning_rate": 6.296296296296296e-06, |
|
"loss": 0.2922, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 8.792857142857143, |
|
"grad_norm": 2.0247929096221924, |
|
"learning_rate": 6.259259259259259e-06, |
|
"loss": 0.1613, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 0.5003380179405212, |
|
"learning_rate": 6.222222222222222e-06, |
|
"loss": 0.1459, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 8.807142857142857, |
|
"grad_norm": 0.9374644756317139, |
|
"learning_rate": 6.1851851851851856e-06, |
|
"loss": 0.2384, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 8.814285714285715, |
|
"grad_norm": 1.5784926414489746, |
|
"learning_rate": 6.148148148148149e-06, |
|
"loss": 0.2458, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 8.821428571428571, |
|
"grad_norm": 1.6696372032165527, |
|
"learning_rate": 6.111111111111111e-06, |
|
"loss": 0.2787, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 8.82857142857143, |
|
"grad_norm": 1.2524793148040771, |
|
"learning_rate": 6.0740740740740745e-06, |
|
"loss": 0.2445, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 8.835714285714285, |
|
"grad_norm": 2.176713466644287, |
|
"learning_rate": 6.037037037037038e-06, |
|
"loss": 0.1896, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 8.842857142857143, |
|
"grad_norm": 1.964752197265625, |
|
"learning_rate": 6e-06, |
|
"loss": 0.261, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"grad_norm": 2.006638526916504, |
|
"learning_rate": 5.962962962962963e-06, |
|
"loss": 0.2415, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 8.857142857142858, |
|
"grad_norm": 1.9313900470733643, |
|
"learning_rate": 5.925925925925927e-06, |
|
"loss": 0.3772, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 8.864285714285714, |
|
"grad_norm": 2.112170934677124, |
|
"learning_rate": 5.888888888888889e-06, |
|
"loss": 0.2396, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 8.871428571428572, |
|
"grad_norm": 2.7346787452697754, |
|
"learning_rate": 5.851851851851852e-06, |
|
"loss": 0.325, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 8.878571428571428, |
|
"grad_norm": 3.1768178939819336, |
|
"learning_rate": 5.814814814814816e-06, |
|
"loss": 0.2393, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 8.885714285714286, |
|
"grad_norm": 2.164926767349243, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.2444, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 8.892857142857142, |
|
"grad_norm": 0.8274826407432556, |
|
"learning_rate": 5.740740740740741e-06, |
|
"loss": 0.2662, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"grad_norm": 1.024776816368103, |
|
"learning_rate": 5.7037037037037045e-06, |
|
"loss": 0.2059, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 8.907142857142857, |
|
"grad_norm": 1.1089740991592407, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.2529, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 8.914285714285715, |
|
"grad_norm": 1.3669365644454956, |
|
"learning_rate": 5.62962962962963e-06, |
|
"loss": 0.2432, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 8.92142857142857, |
|
"grad_norm": 1.9711703062057495, |
|
"learning_rate": 5.592592592592593e-06, |
|
"loss": 0.127, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 8.928571428571429, |
|
"grad_norm": 1.4796557426452637, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.2244, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 8.935714285714285, |
|
"grad_norm": 2.029303789138794, |
|
"learning_rate": 5.518518518518519e-06, |
|
"loss": 0.2939, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 8.942857142857143, |
|
"grad_norm": 2.0809414386749268, |
|
"learning_rate": 5.4814814814814815e-06, |
|
"loss": 0.2146, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"grad_norm": 1.2555458545684814, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 0.2116, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 8.957142857142857, |
|
"grad_norm": 2.3402106761932373, |
|
"learning_rate": 5.407407407407407e-06, |
|
"loss": 0.1587, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 8.964285714285714, |
|
"grad_norm": 1.2514257431030273, |
|
"learning_rate": 5.37037037037037e-06, |
|
"loss": 0.1598, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 8.971428571428572, |
|
"grad_norm": 3.1680376529693604, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.2925, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 8.978571428571428, |
|
"grad_norm": 2.482211112976074, |
|
"learning_rate": 5.296296296296296e-06, |
|
"loss": 0.2152, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 8.985714285714286, |
|
"grad_norm": 2.3472371101379395, |
|
"learning_rate": 5.259259259259259e-06, |
|
"loss": 0.2079, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 8.992857142857144, |
|
"grad_norm": 2.52826189994812, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.2124, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.8535226583480835, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.232, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 9.007142857142858, |
|
"grad_norm": 2.8780601024627686, |
|
"learning_rate": 5.148148148148148e-06, |
|
"loss": 0.3111, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 9.014285714285714, |
|
"grad_norm": 1.5909632444381714, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.2788, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 9.021428571428572, |
|
"grad_norm": 1.7076550722122192, |
|
"learning_rate": 5.074074074074074e-06, |
|
"loss": 0.2575, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 9.028571428571428, |
|
"grad_norm": 2.8672637939453125, |
|
"learning_rate": 5.037037037037037e-06, |
|
"loss": 0.2228, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 9.035714285714286, |
|
"grad_norm": 1.618055820465088, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1157, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 9.042857142857143, |
|
"grad_norm": 1.407122015953064, |
|
"learning_rate": 4.962962962962963e-06, |
|
"loss": 0.2321, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 1.6875501871109009, |
|
"learning_rate": 4.925925925925926e-06, |
|
"loss": 0.2938, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 9.057142857142857, |
|
"grad_norm": 1.0872751474380493, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.181, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 9.064285714285715, |
|
"grad_norm": 1.689308524131775, |
|
"learning_rate": 4.851851851851852e-06, |
|
"loss": 0.1147, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 9.071428571428571, |
|
"grad_norm": 2.5880138874053955, |
|
"learning_rate": 4.814814814814815e-06, |
|
"loss": 0.2757, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 9.07857142857143, |
|
"grad_norm": 2.9612958431243896, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.2847, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 9.085714285714285, |
|
"grad_norm": 1.7059327363967896, |
|
"learning_rate": 4.740740740740741e-06, |
|
"loss": 0.195, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 9.092857142857143, |
|
"grad_norm": 1.702331304550171, |
|
"learning_rate": 4.703703703703704e-06, |
|
"loss": 0.2408, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 1.814587950706482, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.3085, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 9.107142857142858, |
|
"grad_norm": 2.863785743713379, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.2667, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 9.114285714285714, |
|
"grad_norm": 1.6600861549377441, |
|
"learning_rate": 4.592592592592593e-06, |
|
"loss": 0.1376, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 9.121428571428572, |
|
"grad_norm": 1.3329426050186157, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.2547, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 9.128571428571428, |
|
"grad_norm": 4.032371997833252, |
|
"learning_rate": 4.5185185185185185e-06, |
|
"loss": 0.4649, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 9.135714285714286, |
|
"grad_norm": 2.709066390991211, |
|
"learning_rate": 4.481481481481482e-06, |
|
"loss": 0.2336, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 9.142857142857142, |
|
"grad_norm": 1.063931941986084, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 0.1731, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"grad_norm": 1.3110073804855347, |
|
"learning_rate": 4.407407407407407e-06, |
|
"loss": 0.1381, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 9.157142857142857, |
|
"grad_norm": 1.9574129581451416, |
|
"learning_rate": 4.370370370370371e-06, |
|
"loss": 0.1958, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 9.164285714285715, |
|
"grad_norm": 2.5129504203796387, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.2162, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 9.17142857142857, |
|
"grad_norm": 1.842850923538208, |
|
"learning_rate": 4.296296296296296e-06, |
|
"loss": 0.2746, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 9.178571428571429, |
|
"grad_norm": 1.1451313495635986, |
|
"learning_rate": 4.2592592592592596e-06, |
|
"loss": 0.2442, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 9.185714285714285, |
|
"grad_norm": 2.0765175819396973, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.1531, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 9.192857142857143, |
|
"grad_norm": 0.9250321984291077, |
|
"learning_rate": 4.185185185185185e-06, |
|
"loss": 0.2876, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 1.5332380533218384, |
|
"learning_rate": 4.1481481481481485e-06, |
|
"loss": 0.2255, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 9.207142857142857, |
|
"grad_norm": 3.1344316005706787, |
|
"learning_rate": 4.111111111111112e-06, |
|
"loss": 0.2654, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 9.214285714285714, |
|
"grad_norm": 1.4092166423797607, |
|
"learning_rate": 4.074074074074075e-06, |
|
"loss": 0.2358, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 9.221428571428572, |
|
"grad_norm": 1.517716646194458, |
|
"learning_rate": 4.037037037037037e-06, |
|
"loss": 0.2455, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 9.228571428571428, |
|
"grad_norm": 0.7355996966362, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1761, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 9.235714285714286, |
|
"grad_norm": 2.077259063720703, |
|
"learning_rate": 3.962962962962963e-06, |
|
"loss": 0.2791, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 9.242857142857142, |
|
"grad_norm": 1.3175309896469116, |
|
"learning_rate": 3.925925925925926e-06, |
|
"loss": 0.2296, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 1.0608943700790405, |
|
"learning_rate": 3.888888888888889e-06, |
|
"loss": 0.1609, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 9.257142857142856, |
|
"grad_norm": 2.2581288814544678, |
|
"learning_rate": 3.851851851851852e-06, |
|
"loss": 0.2192, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 9.264285714285714, |
|
"grad_norm": 1.672400951385498, |
|
"learning_rate": 3.814814814814814e-06, |
|
"loss": 0.1548, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 9.271428571428572, |
|
"grad_norm": 0.7743004560470581, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.3168, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 9.278571428571428, |
|
"grad_norm": 0.9588621854782104, |
|
"learning_rate": 3.7407407407407413e-06, |
|
"loss": 0.1585, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 9.285714285714286, |
|
"grad_norm": 1.7508875131607056, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.2137, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.285714285714286, |
|
"eval_loss": 0.3308734893798828, |
|
"eval_rouge1": 0.9118, |
|
"eval_rouge2": 0.8545, |
|
"eval_rougeL": 0.909, |
|
"eval_runtime": 122.198, |
|
"eval_samples_per_second": 11.457, |
|
"eval_steps_per_second": 5.728, |
|
"step": 13000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 14000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.001 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.24700797698048e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|