{ "best_metric": 0.3308734893798828, "best_model_checkpoint": "results_mt5XLSum_augmented/checkpoint-13000", "epoch": 9.285714285714286, "eval_steps": 1000, "global_step": 13000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007142857142857143, "grad_norm": 10.661067962646484, "learning_rate": 1.0000000000000002e-06, "loss": 1.8382, "step": 10 }, { "epoch": 0.014285714285714285, "grad_norm": 11.73471450805664, "learning_rate": 2.0000000000000003e-06, "loss": 1.6838, "step": 20 }, { "epoch": 0.02142857142857143, "grad_norm": 13.098968505859375, "learning_rate": 3e-06, "loss": 1.86, "step": 30 }, { "epoch": 0.02857142857142857, "grad_norm": 10.79481315612793, "learning_rate": 4.000000000000001e-06, "loss": 1.7851, "step": 40 }, { "epoch": 0.03571428571428571, "grad_norm": 11.62800121307373, "learning_rate": 5e-06, "loss": 1.4648, "step": 50 }, { "epoch": 0.04285714285714286, "grad_norm": 9.00180721282959, "learning_rate": 6e-06, "loss": 1.4355, "step": 60 }, { "epoch": 0.05, "grad_norm": 11.218201637268066, "learning_rate": 7.000000000000001e-06, "loss": 1.3977, "step": 70 }, { "epoch": 0.05714285714285714, "grad_norm": 7.159872531890869, "learning_rate": 8.000000000000001e-06, "loss": 0.9774, "step": 80 }, { "epoch": 0.06428571428571428, "grad_norm": 6.163649559020996, "learning_rate": 9e-06, "loss": 0.8556, "step": 90 }, { "epoch": 0.07142857142857142, "grad_norm": 4.800461292266846, "learning_rate": 1e-05, "loss": 0.8627, "step": 100 }, { "epoch": 0.07857142857142857, "grad_norm": 4.373474597930908, "learning_rate": 1.1000000000000001e-05, "loss": 0.7674, "step": 110 }, { "epoch": 0.08571428571428572, "grad_norm": 4.36292839050293, "learning_rate": 1.2e-05, "loss": 0.7035, "step": 120 }, { "epoch": 0.09285714285714286, "grad_norm": 4.634104251861572, "learning_rate": 1.3000000000000001e-05, "loss": 0.9197, "step": 130 }, { "epoch": 0.1, "grad_norm": 4.442883491516113, "learning_rate": 1.4000000000000001e-05, "loss": 0.7712, "step": 140 }, { "epoch": 0.10714285714285714, "grad_norm": 3.7063419818878174, "learning_rate": 1.5e-05, "loss": 0.8602, "step": 150 }, { "epoch": 0.11428571428571428, "grad_norm": 3.7267696857452393, "learning_rate": 1.6000000000000003e-05, "loss": 0.6758, "step": 160 }, { "epoch": 0.12142857142857143, "grad_norm": 3.7582225799560547, "learning_rate": 1.7000000000000003e-05, "loss": 0.8091, "step": 170 }, { "epoch": 0.12857142857142856, "grad_norm": 2.829885482788086, "learning_rate": 1.8e-05, "loss": 0.8014, "step": 180 }, { "epoch": 0.1357142857142857, "grad_norm": 3.4555258750915527, "learning_rate": 1.9e-05, "loss": 0.5562, "step": 190 }, { "epoch": 0.14285714285714285, "grad_norm": 3.120464563369751, "learning_rate": 2e-05, "loss": 0.6391, "step": 200 }, { "epoch": 0.15, "grad_norm": 2.8185417652130127, "learning_rate": 2.1e-05, "loss": 0.6501, "step": 210 }, { "epoch": 0.15714285714285714, "grad_norm": 2.7110323905944824, "learning_rate": 2.2000000000000003e-05, "loss": 0.9029, "step": 220 }, { "epoch": 0.16428571428571428, "grad_norm": 3.336864709854126, "learning_rate": 2.3000000000000003e-05, "loss": 0.6938, "step": 230 }, { "epoch": 0.17142857142857143, "grad_norm": 2.9769392013549805, "learning_rate": 2.4e-05, "loss": 0.6322, "step": 240 }, { "epoch": 0.17857142857142858, "grad_norm": 2.5426135063171387, "learning_rate": 2.5e-05, "loss": 0.752, "step": 250 }, { "epoch": 0.18571428571428572, "grad_norm": 3.2473714351654053, "learning_rate": 2.6000000000000002e-05, "loss": 0.5993, "step": 260 }, { "epoch": 0.19285714285714287, "grad_norm": 2.9979186058044434, "learning_rate": 2.7000000000000002e-05, "loss": 0.5928, "step": 270 }, { "epoch": 0.2, "grad_norm": 3.1635003089904785, "learning_rate": 2.8000000000000003e-05, "loss": 0.4335, "step": 280 }, { "epoch": 0.20714285714285716, "grad_norm": 4.114761829376221, "learning_rate": 2.9e-05, "loss": 0.5023, "step": 290 }, { "epoch": 0.21428571428571427, "grad_norm": 2.0567097663879395, "learning_rate": 3e-05, "loss": 0.5124, "step": 300 }, { "epoch": 0.22142857142857142, "grad_norm": 3.0209622383117676, "learning_rate": 3.1e-05, "loss": 0.5092, "step": 310 }, { "epoch": 0.22857142857142856, "grad_norm": 1.8497462272644043, "learning_rate": 3.2000000000000005e-05, "loss": 0.6075, "step": 320 }, { "epoch": 0.2357142857142857, "grad_norm": 1.6237268447875977, "learning_rate": 3.3e-05, "loss": 0.5343, "step": 330 }, { "epoch": 0.24285714285714285, "grad_norm": 2.9820289611816406, "learning_rate": 3.4000000000000007e-05, "loss": 0.5516, "step": 340 }, { "epoch": 0.25, "grad_norm": 1.676515817642212, "learning_rate": 3.5e-05, "loss": 0.6621, "step": 350 }, { "epoch": 0.2571428571428571, "grad_norm": 3.4376354217529297, "learning_rate": 3.6e-05, "loss": 0.4768, "step": 360 }, { "epoch": 0.2642857142857143, "grad_norm": 3.2355964183807373, "learning_rate": 3.7e-05, "loss": 0.6184, "step": 370 }, { "epoch": 0.2714285714285714, "grad_norm": 2.2971713542938232, "learning_rate": 3.8e-05, "loss": 0.7827, "step": 380 }, { "epoch": 0.2785714285714286, "grad_norm": 2.442052125930786, "learning_rate": 3.9000000000000006e-05, "loss": 0.5901, "step": 390 }, { "epoch": 0.2857142857142857, "grad_norm": 2.3172521591186523, "learning_rate": 4e-05, "loss": 0.6067, "step": 400 }, { "epoch": 0.29285714285714287, "grad_norm": 2.06640887260437, "learning_rate": 4.1e-05, "loss": 0.6589, "step": 410 }, { "epoch": 0.3, "grad_norm": 2.416149854660034, "learning_rate": 4.2e-05, "loss": 0.6489, "step": 420 }, { "epoch": 0.30714285714285716, "grad_norm": 2.340235471725464, "learning_rate": 4.3e-05, "loss": 0.8339, "step": 430 }, { "epoch": 0.3142857142857143, "grad_norm": 1.9825040102005005, "learning_rate": 4.4000000000000006e-05, "loss": 0.7415, "step": 440 }, { "epoch": 0.32142857142857145, "grad_norm": 1.8823323249816895, "learning_rate": 4.5e-05, "loss": 0.611, "step": 450 }, { "epoch": 0.32857142857142857, "grad_norm": 2.3207123279571533, "learning_rate": 4.600000000000001e-05, "loss": 0.6172, "step": 460 }, { "epoch": 0.3357142857142857, "grad_norm": 1.2963736057281494, "learning_rate": 4.7e-05, "loss": 0.4114, "step": 470 }, { "epoch": 0.34285714285714286, "grad_norm": 2.191009044647217, "learning_rate": 4.8e-05, "loss": 0.6251, "step": 480 }, { "epoch": 0.35, "grad_norm": 2.1893374919891357, "learning_rate": 4.9e-05, "loss": 0.5966, "step": 490 }, { "epoch": 0.35714285714285715, "grad_norm": 2.214414596557617, "learning_rate": 5e-05, "loss": 0.7495, "step": 500 }, { "epoch": 0.36428571428571427, "grad_norm": 1.8343987464904785, "learning_rate": 4.9962962962962964e-05, "loss": 0.6179, "step": 510 }, { "epoch": 0.37142857142857144, "grad_norm": 2.675177574157715, "learning_rate": 4.9925925925925926e-05, "loss": 0.7262, "step": 520 }, { "epoch": 0.37857142857142856, "grad_norm": 1.7133303880691528, "learning_rate": 4.9888888888888894e-05, "loss": 0.546, "step": 530 }, { "epoch": 0.38571428571428573, "grad_norm": 1.4926049709320068, "learning_rate": 4.9851851851851855e-05, "loss": 0.4057, "step": 540 }, { "epoch": 0.39285714285714285, "grad_norm": 2.0434134006500244, "learning_rate": 4.981481481481482e-05, "loss": 1.014, "step": 550 }, { "epoch": 0.4, "grad_norm": 2.037074089050293, "learning_rate": 4.977777777777778e-05, "loss": 0.6088, "step": 560 }, { "epoch": 0.40714285714285714, "grad_norm": 2.6729607582092285, "learning_rate": 4.974074074074074e-05, "loss": 0.5986, "step": 570 }, { "epoch": 0.4142857142857143, "grad_norm": 1.8161852359771729, "learning_rate": 4.970370370370371e-05, "loss": 0.4681, "step": 580 }, { "epoch": 0.42142857142857143, "grad_norm": 2.140554666519165, "learning_rate": 4.966666666666667e-05, "loss": 0.5865, "step": 590 }, { "epoch": 0.42857142857142855, "grad_norm": 2.3027639389038086, "learning_rate": 4.962962962962963e-05, "loss": 0.4166, "step": 600 }, { "epoch": 0.4357142857142857, "grad_norm": 2.973132371902466, "learning_rate": 4.959259259259259e-05, "loss": 0.6394, "step": 610 }, { "epoch": 0.44285714285714284, "grad_norm": 2.898897886276245, "learning_rate": 4.955555555555556e-05, "loss": 0.5572, "step": 620 }, { "epoch": 0.45, "grad_norm": 2.100752353668213, "learning_rate": 4.951851851851852e-05, "loss": 0.4788, "step": 630 }, { "epoch": 0.45714285714285713, "grad_norm": 2.4735984802246094, "learning_rate": 4.9481481481481485e-05, "loss": 0.756, "step": 640 }, { "epoch": 0.4642857142857143, "grad_norm": 1.5895243883132935, "learning_rate": 4.9444444444444446e-05, "loss": 0.5265, "step": 650 }, { "epoch": 0.4714285714285714, "grad_norm": 2.067650079727173, "learning_rate": 4.940740740740741e-05, "loss": 0.6079, "step": 660 }, { "epoch": 0.4785714285714286, "grad_norm": 1.6676874160766602, "learning_rate": 4.937037037037037e-05, "loss": 0.5196, "step": 670 }, { "epoch": 0.4857142857142857, "grad_norm": 1.6084502935409546, "learning_rate": 4.933333333333334e-05, "loss": 0.431, "step": 680 }, { "epoch": 0.4928571428571429, "grad_norm": 2.8858065605163574, "learning_rate": 4.92962962962963e-05, "loss": 0.5329, "step": 690 }, { "epoch": 0.5, "grad_norm": 2.657158613204956, "learning_rate": 4.925925925925926e-05, "loss": 0.5092, "step": 700 }, { "epoch": 0.5071428571428571, "grad_norm": 2.636237144470215, "learning_rate": 4.922222222222222e-05, "loss": 0.4074, "step": 710 }, { "epoch": 0.5142857142857142, "grad_norm": 2.5960123538970947, "learning_rate": 4.918518518518519e-05, "loss": 0.424, "step": 720 }, { "epoch": 0.5214285714285715, "grad_norm": 1.7363989353179932, "learning_rate": 4.9148148148148145e-05, "loss": 0.7226, "step": 730 }, { "epoch": 0.5285714285714286, "grad_norm": 2.8367726802825928, "learning_rate": 4.9111111111111114e-05, "loss": 0.5535, "step": 740 }, { "epoch": 0.5357142857142857, "grad_norm": 2.1372838020324707, "learning_rate": 4.9074074074074075e-05, "loss": 0.6546, "step": 750 }, { "epoch": 0.5428571428571428, "grad_norm": 1.9456530809402466, "learning_rate": 4.903703703703704e-05, "loss": 0.5718, "step": 760 }, { "epoch": 0.55, "grad_norm": 3.146430015563965, "learning_rate": 4.9e-05, "loss": 0.486, "step": 770 }, { "epoch": 0.5571428571428572, "grad_norm": 1.633537769317627, "learning_rate": 4.896296296296297e-05, "loss": 0.4602, "step": 780 }, { "epoch": 0.5642857142857143, "grad_norm": 3.580615282058716, "learning_rate": 4.892592592592593e-05, "loss": 0.5991, "step": 790 }, { "epoch": 0.5714285714285714, "grad_norm": 2.354482889175415, "learning_rate": 4.888888888888889e-05, "loss": 0.639, "step": 800 }, { "epoch": 0.5785714285714286, "grad_norm": 1.701165795326233, "learning_rate": 4.885185185185185e-05, "loss": 0.4895, "step": 810 }, { "epoch": 0.5857142857142857, "grad_norm": 1.7530277967453003, "learning_rate": 4.881481481481482e-05, "loss": 0.5029, "step": 820 }, { "epoch": 0.5928571428571429, "grad_norm": 1.4377954006195068, "learning_rate": 4.8777777777777775e-05, "loss": 0.4668, "step": 830 }, { "epoch": 0.6, "grad_norm": 1.9733954668045044, "learning_rate": 4.874074074074074e-05, "loss": 0.6434, "step": 840 }, { "epoch": 0.6071428571428571, "grad_norm": 1.5659862756729126, "learning_rate": 4.8703703703703704e-05, "loss": 0.4719, "step": 850 }, { "epoch": 0.6142857142857143, "grad_norm": 1.9549959897994995, "learning_rate": 4.866666666666667e-05, "loss": 0.5003, "step": 860 }, { "epoch": 0.6214285714285714, "grad_norm": 2.0998220443725586, "learning_rate": 4.862962962962963e-05, "loss": 0.4666, "step": 870 }, { "epoch": 0.6285714285714286, "grad_norm": 1.6551822423934937, "learning_rate": 4.8592592592592596e-05, "loss": 0.5508, "step": 880 }, { "epoch": 0.6357142857142857, "grad_norm": 2.268826723098755, "learning_rate": 4.855555555555556e-05, "loss": 0.5333, "step": 890 }, { "epoch": 0.6428571428571429, "grad_norm": 2.170297861099243, "learning_rate": 4.851851851851852e-05, "loss": 0.4724, "step": 900 }, { "epoch": 0.65, "grad_norm": 2.3737900257110596, "learning_rate": 4.848148148148148e-05, "loss": 0.5938, "step": 910 }, { "epoch": 0.6571428571428571, "grad_norm": 1.5697389841079712, "learning_rate": 4.844444444444445e-05, "loss": 0.357, "step": 920 }, { "epoch": 0.6642857142857143, "grad_norm": 1.4354273080825806, "learning_rate": 4.840740740740741e-05, "loss": 0.2648, "step": 930 }, { "epoch": 0.6714285714285714, "grad_norm": 1.1631938219070435, "learning_rate": 4.837037037037037e-05, "loss": 0.4647, "step": 940 }, { "epoch": 0.6785714285714286, "grad_norm": 2.594999313354492, "learning_rate": 4.8333333333333334e-05, "loss": 0.6831, "step": 950 }, { "epoch": 0.6857142857142857, "grad_norm": 2.2979557514190674, "learning_rate": 4.82962962962963e-05, "loss": 0.4363, "step": 960 }, { "epoch": 0.6928571428571428, "grad_norm": 3.0777416229248047, "learning_rate": 4.825925925925926e-05, "loss": 0.5995, "step": 970 }, { "epoch": 0.7, "grad_norm": 2.430807113647461, "learning_rate": 4.8222222222222225e-05, "loss": 0.6433, "step": 980 }, { "epoch": 0.7071428571428572, "grad_norm": 1.7465846538543701, "learning_rate": 4.818518518518519e-05, "loss": 0.4973, "step": 990 }, { "epoch": 0.7142857142857143, "grad_norm": 2.805053472518921, "learning_rate": 4.814814814814815e-05, "loss": 0.556, "step": 1000 }, { "epoch": 0.7142857142857143, "eval_loss": 0.4978465139865875, "eval_rouge1": 0.8844, "eval_rouge2": 0.8183, "eval_rougeL": 0.8811, "eval_runtime": 122.1438, "eval_samples_per_second": 11.462, "eval_steps_per_second": 5.731, "step": 1000 }, { "epoch": 0.7214285714285714, "grad_norm": 1.6127879619598389, "learning_rate": 4.811111111111111e-05, "loss": 0.5748, "step": 1010 }, { "epoch": 0.7285714285714285, "grad_norm": 1.1071356534957886, "learning_rate": 4.807407407407408e-05, "loss": 0.5246, "step": 1020 }, { "epoch": 0.7357142857142858, "grad_norm": 1.9362713098526, "learning_rate": 4.803703703703704e-05, "loss": 0.563, "step": 1030 }, { "epoch": 0.7428571428571429, "grad_norm": 2.037553548812866, "learning_rate": 4.8e-05, "loss": 0.466, "step": 1040 }, { "epoch": 0.75, "grad_norm": 2.196617841720581, "learning_rate": 4.796296296296296e-05, "loss": 0.5647, "step": 1050 }, { "epoch": 0.7571428571428571, "grad_norm": 1.278428077697754, "learning_rate": 4.792592592592593e-05, "loss": 0.4821, "step": 1060 }, { "epoch": 0.7642857142857142, "grad_norm": 1.3506104946136475, "learning_rate": 4.7888888888888886e-05, "loss": 0.5194, "step": 1070 }, { "epoch": 0.7714285714285715, "grad_norm": 2.3870656490325928, "learning_rate": 4.7851851851851854e-05, "loss": 0.7373, "step": 1080 }, { "epoch": 0.7785714285714286, "grad_norm": 2.071242094039917, "learning_rate": 4.7814814814814816e-05, "loss": 0.5598, "step": 1090 }, { "epoch": 0.7857142857142857, "grad_norm": 1.8460086584091187, "learning_rate": 4.7777777777777784e-05, "loss": 0.6184, "step": 1100 }, { "epoch": 0.7928571428571428, "grad_norm": 3.804724931716919, "learning_rate": 4.774074074074074e-05, "loss": 0.5978, "step": 1110 }, { "epoch": 0.8, "grad_norm": 2.614772081375122, "learning_rate": 4.770370370370371e-05, "loss": 0.6203, "step": 1120 }, { "epoch": 0.8071428571428572, "grad_norm": 2.068122386932373, "learning_rate": 4.766666666666667e-05, "loss": 0.6149, "step": 1130 }, { "epoch": 0.8142857142857143, "grad_norm": 1.675881266593933, "learning_rate": 4.762962962962963e-05, "loss": 0.4437, "step": 1140 }, { "epoch": 0.8214285714285714, "grad_norm": 1.865435004234314, "learning_rate": 4.759259259259259e-05, "loss": 0.5166, "step": 1150 }, { "epoch": 0.8285714285714286, "grad_norm": 1.0480509996414185, "learning_rate": 4.755555555555556e-05, "loss": 0.3827, "step": 1160 }, { "epoch": 0.8357142857142857, "grad_norm": 2.218554735183716, "learning_rate": 4.751851851851852e-05, "loss": 0.6641, "step": 1170 }, { "epoch": 0.8428571428571429, "grad_norm": 2.510831832885742, "learning_rate": 4.7481481481481483e-05, "loss": 0.496, "step": 1180 }, { "epoch": 0.85, "grad_norm": 1.8328824043273926, "learning_rate": 4.7444444444444445e-05, "loss": 0.5466, "step": 1190 }, { "epoch": 0.8571428571428571, "grad_norm": 1.5480728149414062, "learning_rate": 4.740740740740741e-05, "loss": 0.4992, "step": 1200 }, { "epoch": 0.8642857142857143, "grad_norm": 1.3723492622375488, "learning_rate": 4.737037037037037e-05, "loss": 0.5014, "step": 1210 }, { "epoch": 0.8714285714285714, "grad_norm": 1.7510666847229004, "learning_rate": 4.7333333333333336e-05, "loss": 0.5471, "step": 1220 }, { "epoch": 0.8785714285714286, "grad_norm": 2.2057995796203613, "learning_rate": 4.72962962962963e-05, "loss": 0.6142, "step": 1230 }, { "epoch": 0.8857142857142857, "grad_norm": 1.7922954559326172, "learning_rate": 4.7259259259259266e-05, "loss": 0.5199, "step": 1240 }, { "epoch": 0.8928571428571429, "grad_norm": 1.9541053771972656, "learning_rate": 4.722222222222222e-05, "loss": 0.44, "step": 1250 }, { "epoch": 0.9, "grad_norm": 1.2869590520858765, "learning_rate": 4.718518518518519e-05, "loss": 0.5157, "step": 1260 }, { "epoch": 0.9071428571428571, "grad_norm": 1.7564722299575806, "learning_rate": 4.714814814814815e-05, "loss": 0.4985, "step": 1270 }, { "epoch": 0.9142857142857143, "grad_norm": 1.0782675743103027, "learning_rate": 4.711111111111111e-05, "loss": 0.3195, "step": 1280 }, { "epoch": 0.9214285714285714, "grad_norm": 1.7535449266433716, "learning_rate": 4.7074074074074074e-05, "loss": 0.376, "step": 1290 }, { "epoch": 0.9285714285714286, "grad_norm": 1.581485629081726, "learning_rate": 4.703703703703704e-05, "loss": 0.5975, "step": 1300 }, { "epoch": 0.9357142857142857, "grad_norm": 2.739900827407837, "learning_rate": 4.7e-05, "loss": 0.457, "step": 1310 }, { "epoch": 0.9428571428571428, "grad_norm": 2.382187604904175, "learning_rate": 4.6962962962962966e-05, "loss": 0.5424, "step": 1320 }, { "epoch": 0.95, "grad_norm": 1.75946843624115, "learning_rate": 4.692592592592593e-05, "loss": 0.3563, "step": 1330 }, { "epoch": 0.9571428571428572, "grad_norm": 1.8159079551696777, "learning_rate": 4.6888888888888895e-05, "loss": 0.4502, "step": 1340 }, { "epoch": 0.9642857142857143, "grad_norm": 2.605283260345459, "learning_rate": 4.685185185185185e-05, "loss": 0.4779, "step": 1350 }, { "epoch": 0.9714285714285714, "grad_norm": 2.594231605529785, "learning_rate": 4.681481481481482e-05, "loss": 0.4901, "step": 1360 }, { "epoch": 0.9785714285714285, "grad_norm": 2.109367609024048, "learning_rate": 4.677777777777778e-05, "loss": 0.5378, "step": 1370 }, { "epoch": 0.9857142857142858, "grad_norm": 1.960496425628662, "learning_rate": 4.674074074074074e-05, "loss": 0.6129, "step": 1380 }, { "epoch": 0.9928571428571429, "grad_norm": 3.4135870933532715, "learning_rate": 4.67037037037037e-05, "loss": 0.7069, "step": 1390 }, { "epoch": 1.0, "grad_norm": 1.441308617591858, "learning_rate": 4.666666666666667e-05, "loss": 0.4686, "step": 1400 }, { "epoch": 1.0071428571428571, "grad_norm": 1.9842432737350464, "learning_rate": 4.662962962962963e-05, "loss": 0.604, "step": 1410 }, { "epoch": 1.0142857142857142, "grad_norm": 1.3867950439453125, "learning_rate": 4.6592592592592595e-05, "loss": 0.4168, "step": 1420 }, { "epoch": 1.0214285714285714, "grad_norm": 2.118037462234497, "learning_rate": 4.6555555555555556e-05, "loss": 0.6484, "step": 1430 }, { "epoch": 1.0285714285714285, "grad_norm": 1.4064522981643677, "learning_rate": 4.6518518518518525e-05, "loss": 0.5275, "step": 1440 }, { "epoch": 1.0357142857142858, "grad_norm": 2.644491672515869, "learning_rate": 4.648148148148148e-05, "loss": 0.5361, "step": 1450 }, { "epoch": 1.042857142857143, "grad_norm": 1.4005937576293945, "learning_rate": 4.644444444444445e-05, "loss": 0.4497, "step": 1460 }, { "epoch": 1.05, "grad_norm": 1.773334264755249, "learning_rate": 4.640740740740741e-05, "loss": 0.4372, "step": 1470 }, { "epoch": 1.0571428571428572, "grad_norm": 2.1667587757110596, "learning_rate": 4.637037037037038e-05, "loss": 0.5211, "step": 1480 }, { "epoch": 1.0642857142857143, "grad_norm": 1.1993277072906494, "learning_rate": 4.633333333333333e-05, "loss": 0.3694, "step": 1490 }, { "epoch": 1.0714285714285714, "grad_norm": 1.5526480674743652, "learning_rate": 4.62962962962963e-05, "loss": 0.686, "step": 1500 }, { "epoch": 1.0785714285714285, "grad_norm": 1.5041449069976807, "learning_rate": 4.625925925925926e-05, "loss": 0.4536, "step": 1510 }, { "epoch": 1.0857142857142856, "grad_norm": 1.719254970550537, "learning_rate": 4.6222222222222224e-05, "loss": 0.4687, "step": 1520 }, { "epoch": 1.092857142857143, "grad_norm": 1.9565083980560303, "learning_rate": 4.6185185185185185e-05, "loss": 0.4054, "step": 1530 }, { "epoch": 1.1, "grad_norm": 1.2271467447280884, "learning_rate": 4.6148148148148154e-05, "loss": 0.4189, "step": 1540 }, { "epoch": 1.1071428571428572, "grad_norm": 1.731244683265686, "learning_rate": 4.6111111111111115e-05, "loss": 0.4519, "step": 1550 }, { "epoch": 1.1142857142857143, "grad_norm": 1.3039075136184692, "learning_rate": 4.607407407407408e-05, "loss": 0.3911, "step": 1560 }, { "epoch": 1.1214285714285714, "grad_norm": 1.3420417308807373, "learning_rate": 4.603703703703704e-05, "loss": 0.4239, "step": 1570 }, { "epoch": 1.1285714285714286, "grad_norm": 2.2307205200195312, "learning_rate": 4.600000000000001e-05, "loss": 0.4675, "step": 1580 }, { "epoch": 1.1357142857142857, "grad_norm": 2.384147882461548, "learning_rate": 4.596296296296296e-05, "loss": 0.3963, "step": 1590 }, { "epoch": 1.1428571428571428, "grad_norm": 1.6016713380813599, "learning_rate": 4.592592592592593e-05, "loss": 0.4561, "step": 1600 }, { "epoch": 1.15, "grad_norm": 1.4093197584152222, "learning_rate": 4.588888888888889e-05, "loss": 0.4708, "step": 1610 }, { "epoch": 1.157142857142857, "grad_norm": 1.9773272275924683, "learning_rate": 4.585185185185185e-05, "loss": 0.5259, "step": 1620 }, { "epoch": 1.1642857142857144, "grad_norm": 1.169757604598999, "learning_rate": 4.5814814814814815e-05, "loss": 0.3413, "step": 1630 }, { "epoch": 1.1714285714285715, "grad_norm": 2.1033947467803955, "learning_rate": 4.577777777777778e-05, "loss": 0.4888, "step": 1640 }, { "epoch": 1.1785714285714286, "grad_norm": 1.2455283403396606, "learning_rate": 4.5740740740740745e-05, "loss": 0.5935, "step": 1650 }, { "epoch": 1.1857142857142857, "grad_norm": 1.283308982849121, "learning_rate": 4.5703703703703706e-05, "loss": 0.3946, "step": 1660 }, { "epoch": 1.1928571428571428, "grad_norm": 1.9639955759048462, "learning_rate": 4.566666666666667e-05, "loss": 0.519, "step": 1670 }, { "epoch": 1.2, "grad_norm": 0.9380689263343811, "learning_rate": 4.5629629629629636e-05, "loss": 0.3357, "step": 1680 }, { "epoch": 1.207142857142857, "grad_norm": 2.330310344696045, "learning_rate": 4.559259259259259e-05, "loss": 0.5135, "step": 1690 }, { "epoch": 1.2142857142857142, "grad_norm": 1.5911920070648193, "learning_rate": 4.555555555555556e-05, "loss": 0.4165, "step": 1700 }, { "epoch": 1.2214285714285715, "grad_norm": 1.7522234916687012, "learning_rate": 4.551851851851852e-05, "loss": 0.5797, "step": 1710 }, { "epoch": 1.2285714285714286, "grad_norm": 2.265571355819702, "learning_rate": 4.548148148148149e-05, "loss": 0.3943, "step": 1720 }, { "epoch": 1.2357142857142858, "grad_norm": 2.530675172805786, "learning_rate": 4.5444444444444444e-05, "loss": 0.6279, "step": 1730 }, { "epoch": 1.2428571428571429, "grad_norm": 2.072864055633545, "learning_rate": 4.540740740740741e-05, "loss": 0.512, "step": 1740 }, { "epoch": 1.25, "grad_norm": 1.5505369901657104, "learning_rate": 4.5370370370370374e-05, "loss": 0.3494, "step": 1750 }, { "epoch": 1.2571428571428571, "grad_norm": 1.9888116121292114, "learning_rate": 4.5333333333333335e-05, "loss": 0.5841, "step": 1760 }, { "epoch": 1.2642857142857142, "grad_norm": 1.6056774854660034, "learning_rate": 4.52962962962963e-05, "loss": 0.5611, "step": 1770 }, { "epoch": 1.2714285714285714, "grad_norm": 1.7950221300125122, "learning_rate": 4.5259259259259265e-05, "loss": 0.6097, "step": 1780 }, { "epoch": 1.2785714285714285, "grad_norm": 1.8906399011611938, "learning_rate": 4.522222222222223e-05, "loss": 0.4837, "step": 1790 }, { "epoch": 1.2857142857142856, "grad_norm": 1.3988184928894043, "learning_rate": 4.518518518518519e-05, "loss": 0.4806, "step": 1800 }, { "epoch": 1.292857142857143, "grad_norm": 1.160243272781372, "learning_rate": 4.514814814814815e-05, "loss": 0.5224, "step": 1810 }, { "epoch": 1.3, "grad_norm": 1.0152113437652588, "learning_rate": 4.511111111111112e-05, "loss": 0.4115, "step": 1820 }, { "epoch": 1.3071428571428572, "grad_norm": 1.6176999807357788, "learning_rate": 4.507407407407407e-05, "loss": 0.4458, "step": 1830 }, { "epoch": 1.3142857142857143, "grad_norm": 1.904784917831421, "learning_rate": 4.503703703703704e-05, "loss": 0.5552, "step": 1840 }, { "epoch": 1.3214285714285714, "grad_norm": 1.0539710521697998, "learning_rate": 4.5e-05, "loss": 0.438, "step": 1850 }, { "epoch": 1.3285714285714285, "grad_norm": 1.3552178144454956, "learning_rate": 4.496296296296297e-05, "loss": 0.2862, "step": 1860 }, { "epoch": 1.3357142857142856, "grad_norm": 1.3787767887115479, "learning_rate": 4.4925925925925926e-05, "loss": 0.5173, "step": 1870 }, { "epoch": 1.342857142857143, "grad_norm": 2.570422649383545, "learning_rate": 4.4888888888888894e-05, "loss": 0.4581, "step": 1880 }, { "epoch": 1.35, "grad_norm": 1.5974104404449463, "learning_rate": 4.4851851851851856e-05, "loss": 0.4599, "step": 1890 }, { "epoch": 1.3571428571428572, "grad_norm": 1.4105775356292725, "learning_rate": 4.481481481481482e-05, "loss": 0.3823, "step": 1900 }, { "epoch": 1.3642857142857143, "grad_norm": 2.1751532554626465, "learning_rate": 4.477777777777778e-05, "loss": 0.4421, "step": 1910 }, { "epoch": 1.3714285714285714, "grad_norm": 1.9956297874450684, "learning_rate": 4.474074074074075e-05, "loss": 0.4082, "step": 1920 }, { "epoch": 1.3785714285714286, "grad_norm": 1.6159803867340088, "learning_rate": 4.47037037037037e-05, "loss": 0.3961, "step": 1930 }, { "epoch": 1.3857142857142857, "grad_norm": 1.4909430742263794, "learning_rate": 4.466666666666667e-05, "loss": 0.4635, "step": 1940 }, { "epoch": 1.3928571428571428, "grad_norm": 1.5630055665969849, "learning_rate": 4.462962962962963e-05, "loss": 0.5968, "step": 1950 }, { "epoch": 1.4, "grad_norm": 1.2496933937072754, "learning_rate": 4.4592592592592594e-05, "loss": 0.4546, "step": 1960 }, { "epoch": 1.407142857142857, "grad_norm": 1.6497224569320679, "learning_rate": 4.4555555555555555e-05, "loss": 0.354, "step": 1970 }, { "epoch": 1.4142857142857144, "grad_norm": 2.069955587387085, "learning_rate": 4.4518518518518523e-05, "loss": 0.4388, "step": 1980 }, { "epoch": 1.4214285714285715, "grad_norm": 1.6338075399398804, "learning_rate": 4.4481481481481485e-05, "loss": 0.5459, "step": 1990 }, { "epoch": 1.4285714285714286, "grad_norm": 1.3558902740478516, "learning_rate": 4.4444444444444447e-05, "loss": 0.3139, "step": 2000 }, { "epoch": 1.4285714285714286, "eval_loss": 0.43829917907714844, "eval_rouge1": 0.8911, "eval_rouge2": 0.8251, "eval_rougeL": 0.8873, "eval_runtime": 121.8873, "eval_samples_per_second": 11.486, "eval_steps_per_second": 5.743, "step": 2000 }, { "epoch": 1.4357142857142857, "grad_norm": 1.8123821020126343, "learning_rate": 4.440740740740741e-05, "loss": 0.5864, "step": 2010 }, { "epoch": 1.4428571428571428, "grad_norm": 0.8494770526885986, "learning_rate": 4.4370370370370376e-05, "loss": 0.3284, "step": 2020 }, { "epoch": 1.45, "grad_norm": 2.2536141872406006, "learning_rate": 4.433333333333334e-05, "loss": 0.3738, "step": 2030 }, { "epoch": 1.457142857142857, "grad_norm": 2.971925735473633, "learning_rate": 4.42962962962963e-05, "loss": 0.5294, "step": 2040 }, { "epoch": 1.4642857142857144, "grad_norm": 1.7820425033569336, "learning_rate": 4.425925925925926e-05, "loss": 0.505, "step": 2050 }, { "epoch": 1.4714285714285715, "grad_norm": 1.196044683456421, "learning_rate": 4.422222222222222e-05, "loss": 0.3921, "step": 2060 }, { "epoch": 1.4785714285714286, "grad_norm": 0.9053621888160706, "learning_rate": 4.4185185185185184e-05, "loss": 0.2386, "step": 2070 }, { "epoch": 1.4857142857142858, "grad_norm": 1.8388108015060425, "learning_rate": 4.414814814814815e-05, "loss": 0.4309, "step": 2080 }, { "epoch": 1.4928571428571429, "grad_norm": 2.25136137008667, "learning_rate": 4.4111111111111114e-05, "loss": 0.3918, "step": 2090 }, { "epoch": 1.5, "grad_norm": 1.960864782333374, "learning_rate": 4.4074074074074076e-05, "loss": 0.4754, "step": 2100 }, { "epoch": 1.5071428571428571, "grad_norm": 2.4653213024139404, "learning_rate": 4.403703703703704e-05, "loss": 0.4545, "step": 2110 }, { "epoch": 1.5142857142857142, "grad_norm": 1.8694462776184082, "learning_rate": 4.4000000000000006e-05, "loss": 0.3707, "step": 2120 }, { "epoch": 1.5214285714285714, "grad_norm": 2.240447521209717, "learning_rate": 4.396296296296297e-05, "loss": 0.5174, "step": 2130 }, { "epoch": 1.5285714285714285, "grad_norm": 0.8589600920677185, "learning_rate": 4.392592592592593e-05, "loss": 0.3802, "step": 2140 }, { "epoch": 1.5357142857142856, "grad_norm": 2.495075225830078, "learning_rate": 4.388888888888889e-05, "loss": 0.4735, "step": 2150 }, { "epoch": 1.5428571428571427, "grad_norm": 1.5384397506713867, "learning_rate": 4.385185185185185e-05, "loss": 0.4844, "step": 2160 }, { "epoch": 1.55, "grad_norm": 1.2120758295059204, "learning_rate": 4.381481481481482e-05, "loss": 0.3029, "step": 2170 }, { "epoch": 1.5571428571428572, "grad_norm": 2.0210671424865723, "learning_rate": 4.377777777777778e-05, "loss": 0.688, "step": 2180 }, { "epoch": 1.5642857142857143, "grad_norm": 2.322322368621826, "learning_rate": 4.374074074074074e-05, "loss": 0.4673, "step": 2190 }, { "epoch": 1.5714285714285714, "grad_norm": 1.8948960304260254, "learning_rate": 4.3703703703703705e-05, "loss": 0.3698, "step": 2200 }, { "epoch": 1.5785714285714287, "grad_norm": 1.776141881942749, "learning_rate": 4.3666666666666666e-05, "loss": 0.3611, "step": 2210 }, { "epoch": 1.5857142857142859, "grad_norm": 2.8628015518188477, "learning_rate": 4.3629629629629635e-05, "loss": 0.4504, "step": 2220 }, { "epoch": 1.592857142857143, "grad_norm": 1.8579275608062744, "learning_rate": 4.3592592592592596e-05, "loss": 0.5131, "step": 2230 }, { "epoch": 1.6, "grad_norm": 1.1070181131362915, "learning_rate": 4.355555555555556e-05, "loss": 0.4187, "step": 2240 }, { "epoch": 1.6071428571428572, "grad_norm": 1.3833059072494507, "learning_rate": 4.351851851851852e-05, "loss": 0.4301, "step": 2250 }, { "epoch": 1.6142857142857143, "grad_norm": 1.6870567798614502, "learning_rate": 4.348148148148148e-05, "loss": 0.5542, "step": 2260 }, { "epoch": 1.6214285714285714, "grad_norm": 1.582582712173462, "learning_rate": 4.344444444444445e-05, "loss": 0.5192, "step": 2270 }, { "epoch": 1.6285714285714286, "grad_norm": 3.3700509071350098, "learning_rate": 4.340740740740741e-05, "loss": 0.3268, "step": 2280 }, { "epoch": 1.6357142857142857, "grad_norm": 3.0057899951934814, "learning_rate": 4.337037037037037e-05, "loss": 0.3787, "step": 2290 }, { "epoch": 1.6428571428571428, "grad_norm": 1.302416205406189, "learning_rate": 4.3333333333333334e-05, "loss": 0.4793, "step": 2300 }, { "epoch": 1.65, "grad_norm": 2.42720103263855, "learning_rate": 4.3296296296296296e-05, "loss": 0.4124, "step": 2310 }, { "epoch": 1.657142857142857, "grad_norm": 1.455609917640686, "learning_rate": 4.325925925925926e-05, "loss": 0.272, "step": 2320 }, { "epoch": 1.6642857142857141, "grad_norm": 2.1332924365997314, "learning_rate": 4.3222222222222226e-05, "loss": 0.4859, "step": 2330 }, { "epoch": 1.6714285714285713, "grad_norm": 1.977156162261963, "learning_rate": 4.318518518518519e-05, "loss": 0.4017, "step": 2340 }, { "epoch": 1.6785714285714286, "grad_norm": 1.7197158336639404, "learning_rate": 4.314814814814815e-05, "loss": 0.376, "step": 2350 }, { "epoch": 1.6857142857142857, "grad_norm": 0.8615891933441162, "learning_rate": 4.311111111111111e-05, "loss": 0.3383, "step": 2360 }, { "epoch": 1.6928571428571428, "grad_norm": 1.2501980066299438, "learning_rate": 4.307407407407408e-05, "loss": 0.3999, "step": 2370 }, { "epoch": 1.7, "grad_norm": 1.7977019548416138, "learning_rate": 4.303703703703704e-05, "loss": 0.3424, "step": 2380 }, { "epoch": 1.7071428571428573, "grad_norm": 2.265807867050171, "learning_rate": 4.3e-05, "loss": 0.4345, "step": 2390 }, { "epoch": 1.7142857142857144, "grad_norm": 2.348353624343872, "learning_rate": 4.296296296296296e-05, "loss": 0.4395, "step": 2400 }, { "epoch": 1.7214285714285715, "grad_norm": 2.585843801498413, "learning_rate": 4.292592592592593e-05, "loss": 0.5208, "step": 2410 }, { "epoch": 1.7285714285714286, "grad_norm": 1.1487417221069336, "learning_rate": 4.2888888888888886e-05, "loss": 0.4635, "step": 2420 }, { "epoch": 1.7357142857142858, "grad_norm": 1.206634521484375, "learning_rate": 4.2851851851851855e-05, "loss": 0.3521, "step": 2430 }, { "epoch": 1.7428571428571429, "grad_norm": 2.136702299118042, "learning_rate": 4.2814814814814816e-05, "loss": 0.4582, "step": 2440 }, { "epoch": 1.75, "grad_norm": 1.2831017971038818, "learning_rate": 4.277777777777778e-05, "loss": 0.397, "step": 2450 }, { "epoch": 1.7571428571428571, "grad_norm": 2.313405990600586, "learning_rate": 4.274074074074074e-05, "loss": 0.431, "step": 2460 }, { "epoch": 1.7642857142857142, "grad_norm": 1.8922353982925415, "learning_rate": 4.270370370370371e-05, "loss": 0.4396, "step": 2470 }, { "epoch": 1.7714285714285714, "grad_norm": 1.735303521156311, "learning_rate": 4.266666666666667e-05, "loss": 0.4019, "step": 2480 }, { "epoch": 1.7785714285714285, "grad_norm": 1.1989376544952393, "learning_rate": 4.262962962962963e-05, "loss": 0.3317, "step": 2490 }, { "epoch": 1.7857142857142856, "grad_norm": 1.709370732307434, "learning_rate": 4.259259259259259e-05, "loss": 0.4721, "step": 2500 }, { "epoch": 1.7928571428571427, "grad_norm": 1.3655775785446167, "learning_rate": 4.255555555555556e-05, "loss": 0.464, "step": 2510 }, { "epoch": 1.8, "grad_norm": 1.2292691469192505, "learning_rate": 4.2518518518518515e-05, "loss": 0.3765, "step": 2520 }, { "epoch": 1.8071428571428572, "grad_norm": 2.6490797996520996, "learning_rate": 4.2481481481481484e-05, "loss": 0.4989, "step": 2530 }, { "epoch": 1.8142857142857143, "grad_norm": 1.8564647436141968, "learning_rate": 4.2444444444444445e-05, "loss": 0.3776, "step": 2540 }, { "epoch": 1.8214285714285714, "grad_norm": 1.9681627750396729, "learning_rate": 4.240740740740741e-05, "loss": 0.4717, "step": 2550 }, { "epoch": 1.8285714285714287, "grad_norm": 2.1326770782470703, "learning_rate": 4.237037037037037e-05, "loss": 0.3212, "step": 2560 }, { "epoch": 1.8357142857142859, "grad_norm": 1.8122767210006714, "learning_rate": 4.233333333333334e-05, "loss": 0.3619, "step": 2570 }, { "epoch": 1.842857142857143, "grad_norm": 1.4822399616241455, "learning_rate": 4.22962962962963e-05, "loss": 0.4631, "step": 2580 }, { "epoch": 1.85, "grad_norm": 2.278700828552246, "learning_rate": 4.225925925925926e-05, "loss": 0.3018, "step": 2590 }, { "epoch": 1.8571428571428572, "grad_norm": 2.3148486614227295, "learning_rate": 4.222222222222222e-05, "loss": 0.4084, "step": 2600 }, { "epoch": 1.8642857142857143, "grad_norm": 1.5277279615402222, "learning_rate": 4.218518518518519e-05, "loss": 0.5295, "step": 2610 }, { "epoch": 1.8714285714285714, "grad_norm": 1.3603259325027466, "learning_rate": 4.2148148148148145e-05, "loss": 0.4727, "step": 2620 }, { "epoch": 1.8785714285714286, "grad_norm": 1.9577744007110596, "learning_rate": 4.211111111111111e-05, "loss": 0.5915, "step": 2630 }, { "epoch": 1.8857142857142857, "grad_norm": 1.0424437522888184, "learning_rate": 4.2074074074074075e-05, "loss": 0.3386, "step": 2640 }, { "epoch": 1.8928571428571428, "grad_norm": 2.7555553913116455, "learning_rate": 4.203703703703704e-05, "loss": 0.5003, "step": 2650 }, { "epoch": 1.9, "grad_norm": 1.9913907051086426, "learning_rate": 4.2e-05, "loss": 0.3875, "step": 2660 }, { "epoch": 1.907142857142857, "grad_norm": 1.8053233623504639, "learning_rate": 4.1962962962962966e-05, "loss": 0.2318, "step": 2670 }, { "epoch": 1.9142857142857141, "grad_norm": 1.7686558961868286, "learning_rate": 4.192592592592593e-05, "loss": 0.3772, "step": 2680 }, { "epoch": 1.9214285714285713, "grad_norm": 1.4202839136123657, "learning_rate": 4.188888888888889e-05, "loss": 0.3742, "step": 2690 }, { "epoch": 1.9285714285714286, "grad_norm": 2.7964282035827637, "learning_rate": 4.185185185185185e-05, "loss": 0.2901, "step": 2700 }, { "epoch": 1.9357142857142857, "grad_norm": 2.0525360107421875, "learning_rate": 4.181481481481482e-05, "loss": 0.3146, "step": 2710 }, { "epoch": 1.9428571428571428, "grad_norm": 1.937103033065796, "learning_rate": 4.177777777777778e-05, "loss": 0.3871, "step": 2720 }, { "epoch": 1.95, "grad_norm": 2.1534152030944824, "learning_rate": 4.174074074074074e-05, "loss": 0.5309, "step": 2730 }, { "epoch": 1.9571428571428573, "grad_norm": 1.60648512840271, "learning_rate": 4.1703703703703704e-05, "loss": 0.3859, "step": 2740 }, { "epoch": 1.9642857142857144, "grad_norm": 2.2782654762268066, "learning_rate": 4.166666666666667e-05, "loss": 0.3859, "step": 2750 }, { "epoch": 1.9714285714285715, "grad_norm": 1.9134782552719116, "learning_rate": 4.162962962962963e-05, "loss": 0.565, "step": 2760 }, { "epoch": 1.9785714285714286, "grad_norm": 1.4029120206832886, "learning_rate": 4.1592592592592595e-05, "loss": 0.3373, "step": 2770 }, { "epoch": 1.9857142857142858, "grad_norm": 1.9651641845703125, "learning_rate": 4.155555555555556e-05, "loss": 0.3847, "step": 2780 }, { "epoch": 1.9928571428571429, "grad_norm": 1.4134501218795776, "learning_rate": 4.1518518518518525e-05, "loss": 0.4465, "step": 2790 }, { "epoch": 2.0, "grad_norm": 1.9682196378707886, "learning_rate": 4.148148148148148e-05, "loss": 0.545, "step": 2800 }, { "epoch": 2.007142857142857, "grad_norm": 2.259190559387207, "learning_rate": 4.144444444444445e-05, "loss": 0.3058, "step": 2810 }, { "epoch": 2.0142857142857142, "grad_norm": 1.6861268281936646, "learning_rate": 4.140740740740741e-05, "loss": 0.4567, "step": 2820 }, { "epoch": 2.0214285714285714, "grad_norm": 1.5168589353561401, "learning_rate": 4.137037037037037e-05, "loss": 0.5687, "step": 2830 }, { "epoch": 2.0285714285714285, "grad_norm": 1.1756591796875, "learning_rate": 4.133333333333333e-05, "loss": 0.4118, "step": 2840 }, { "epoch": 2.0357142857142856, "grad_norm": 1.4381860494613647, "learning_rate": 4.12962962962963e-05, "loss": 0.5473, "step": 2850 }, { "epoch": 2.0428571428571427, "grad_norm": 1.710028052330017, "learning_rate": 4.1259259259259256e-05, "loss": 0.3362, "step": 2860 }, { "epoch": 2.05, "grad_norm": 1.3261126279830933, "learning_rate": 4.1222222222222224e-05, "loss": 0.4497, "step": 2870 }, { "epoch": 2.057142857142857, "grad_norm": 1.5397872924804688, "learning_rate": 4.1185185185185186e-05, "loss": 0.3985, "step": 2880 }, { "epoch": 2.064285714285714, "grad_norm": 2.1463019847869873, "learning_rate": 4.1148148148148154e-05, "loss": 0.3423, "step": 2890 }, { "epoch": 2.0714285714285716, "grad_norm": 1.3202670812606812, "learning_rate": 4.111111111111111e-05, "loss": 0.3422, "step": 2900 }, { "epoch": 2.0785714285714287, "grad_norm": 1.6832393407821655, "learning_rate": 4.107407407407408e-05, "loss": 0.3243, "step": 2910 }, { "epoch": 2.085714285714286, "grad_norm": 1.7872291803359985, "learning_rate": 4.103703703703704e-05, "loss": 0.4452, "step": 2920 }, { "epoch": 2.092857142857143, "grad_norm": 2.649644613265991, "learning_rate": 4.1e-05, "loss": 0.4125, "step": 2930 }, { "epoch": 2.1, "grad_norm": 1.6862508058547974, "learning_rate": 4.096296296296296e-05, "loss": 0.3503, "step": 2940 }, { "epoch": 2.107142857142857, "grad_norm": 1.0415781736373901, "learning_rate": 4.092592592592593e-05, "loss": 0.2464, "step": 2950 }, { "epoch": 2.1142857142857143, "grad_norm": 1.3061981201171875, "learning_rate": 4.088888888888889e-05, "loss": 0.2232, "step": 2960 }, { "epoch": 2.1214285714285714, "grad_norm": 1.0442795753479004, "learning_rate": 4.0851851851851853e-05, "loss": 0.3512, "step": 2970 }, { "epoch": 2.1285714285714286, "grad_norm": 2.4381885528564453, "learning_rate": 4.0814814814814815e-05, "loss": 0.4678, "step": 2980 }, { "epoch": 2.1357142857142857, "grad_norm": 1.5145925283432007, "learning_rate": 4.0777777777777783e-05, "loss": 0.3655, "step": 2990 }, { "epoch": 2.142857142857143, "grad_norm": 3.514470338821411, "learning_rate": 4.074074074074074e-05, "loss": 0.3538, "step": 3000 }, { "epoch": 2.142857142857143, "eval_loss": 0.42481884360313416, "eval_rouge1": 0.8943, "eval_rouge2": 0.8295, "eval_rougeL": 0.8911, "eval_runtime": 122.0137, "eval_samples_per_second": 11.474, "eval_steps_per_second": 5.737, "step": 3000 }, { "epoch": 2.15, "grad_norm": 1.9778640270233154, "learning_rate": 4.0703703703703707e-05, "loss": 0.4804, "step": 3010 }, { "epoch": 2.157142857142857, "grad_norm": 2.8497660160064697, "learning_rate": 4.066666666666667e-05, "loss": 0.523, "step": 3020 }, { "epoch": 2.164285714285714, "grad_norm": 1.317818284034729, "learning_rate": 4.0629629629629636e-05, "loss": 0.3694, "step": 3030 }, { "epoch": 2.1714285714285713, "grad_norm": 1.1630916595458984, "learning_rate": 4.059259259259259e-05, "loss": 0.3546, "step": 3040 }, { "epoch": 2.1785714285714284, "grad_norm": 2.114527940750122, "learning_rate": 4.055555555555556e-05, "loss": 0.4838, "step": 3050 }, { "epoch": 2.185714285714286, "grad_norm": 1.771263599395752, "learning_rate": 4.051851851851852e-05, "loss": 0.2938, "step": 3060 }, { "epoch": 2.192857142857143, "grad_norm": 3.463986396789551, "learning_rate": 4.048148148148148e-05, "loss": 0.455, "step": 3070 }, { "epoch": 2.2, "grad_norm": 2.023069381713867, "learning_rate": 4.0444444444444444e-05, "loss": 0.4449, "step": 3080 }, { "epoch": 2.2071428571428573, "grad_norm": 2.9855751991271973, "learning_rate": 4.040740740740741e-05, "loss": 0.5374, "step": 3090 }, { "epoch": 2.2142857142857144, "grad_norm": 2.422739267349243, "learning_rate": 4.0370370370370374e-05, "loss": 0.4203, "step": 3100 }, { "epoch": 2.2214285714285715, "grad_norm": 2.097543478012085, "learning_rate": 4.0333333333333336e-05, "loss": 0.364, "step": 3110 }, { "epoch": 2.2285714285714286, "grad_norm": 2.2496302127838135, "learning_rate": 4.02962962962963e-05, "loss": 0.4135, "step": 3120 }, { "epoch": 2.2357142857142858, "grad_norm": 2.3347012996673584, "learning_rate": 4.0259259259259266e-05, "loss": 0.4795, "step": 3130 }, { "epoch": 2.242857142857143, "grad_norm": 1.506218433380127, "learning_rate": 4.022222222222222e-05, "loss": 0.5228, "step": 3140 }, { "epoch": 2.25, "grad_norm": 1.160443663597107, "learning_rate": 4.018518518518519e-05, "loss": 0.3439, "step": 3150 }, { "epoch": 2.257142857142857, "grad_norm": 1.8678144216537476, "learning_rate": 4.014814814814815e-05, "loss": 0.404, "step": 3160 }, { "epoch": 2.2642857142857142, "grad_norm": 1.1315560340881348, "learning_rate": 4.011111111111111e-05, "loss": 0.3111, "step": 3170 }, { "epoch": 2.2714285714285714, "grad_norm": 1.8081461191177368, "learning_rate": 4.007407407407407e-05, "loss": 0.2717, "step": 3180 }, { "epoch": 2.2785714285714285, "grad_norm": 1.6636005640029907, "learning_rate": 4.003703703703704e-05, "loss": 0.3382, "step": 3190 }, { "epoch": 2.2857142857142856, "grad_norm": 1.3334009647369385, "learning_rate": 4e-05, "loss": 0.38, "step": 3200 }, { "epoch": 2.2928571428571427, "grad_norm": 1.4873621463775635, "learning_rate": 3.9962962962962965e-05, "loss": 0.2979, "step": 3210 }, { "epoch": 2.3, "grad_norm": 1.17378568649292, "learning_rate": 3.9925925925925926e-05, "loss": 0.3628, "step": 3220 }, { "epoch": 2.307142857142857, "grad_norm": 1.3241777420043945, "learning_rate": 3.9888888888888895e-05, "loss": 0.3119, "step": 3230 }, { "epoch": 2.314285714285714, "grad_norm": 1.9823285341262817, "learning_rate": 3.985185185185185e-05, "loss": 0.4647, "step": 3240 }, { "epoch": 2.3214285714285716, "grad_norm": 1.6918193101882935, "learning_rate": 3.981481481481482e-05, "loss": 0.3695, "step": 3250 }, { "epoch": 2.3285714285714287, "grad_norm": 2.1902389526367188, "learning_rate": 3.977777777777778e-05, "loss": 0.2468, "step": 3260 }, { "epoch": 2.335714285714286, "grad_norm": 1.3570506572723389, "learning_rate": 3.974074074074075e-05, "loss": 0.3209, "step": 3270 }, { "epoch": 2.342857142857143, "grad_norm": 1.951711654663086, "learning_rate": 3.97037037037037e-05, "loss": 0.5175, "step": 3280 }, { "epoch": 2.35, "grad_norm": 1.741243839263916, "learning_rate": 3.966666666666667e-05, "loss": 0.2934, "step": 3290 }, { "epoch": 2.357142857142857, "grad_norm": 1.5889472961425781, "learning_rate": 3.962962962962963e-05, "loss": 0.3026, "step": 3300 }, { "epoch": 2.3642857142857143, "grad_norm": 1.4606213569641113, "learning_rate": 3.9592592592592594e-05, "loss": 0.4508, "step": 3310 }, { "epoch": 2.3714285714285714, "grad_norm": 1.5021477937698364, "learning_rate": 3.9555555555555556e-05, "loss": 0.2589, "step": 3320 }, { "epoch": 2.3785714285714286, "grad_norm": 1.8877885341644287, "learning_rate": 3.9518518518518524e-05, "loss": 0.4115, "step": 3330 }, { "epoch": 2.3857142857142857, "grad_norm": 1.809822678565979, "learning_rate": 3.9481481481481485e-05, "loss": 0.3844, "step": 3340 }, { "epoch": 2.392857142857143, "grad_norm": 1.2999638319015503, "learning_rate": 3.944444444444445e-05, "loss": 0.4028, "step": 3350 }, { "epoch": 2.4, "grad_norm": 1.4639837741851807, "learning_rate": 3.940740740740741e-05, "loss": 0.3551, "step": 3360 }, { "epoch": 2.407142857142857, "grad_norm": 1.1001754999160767, "learning_rate": 3.937037037037038e-05, "loss": 0.4001, "step": 3370 }, { "epoch": 2.414285714285714, "grad_norm": 2.272892713546753, "learning_rate": 3.933333333333333e-05, "loss": 0.3048, "step": 3380 }, { "epoch": 2.4214285714285713, "grad_norm": 2.085908889770508, "learning_rate": 3.92962962962963e-05, "loss": 0.5788, "step": 3390 }, { "epoch": 2.4285714285714284, "grad_norm": 1.317700743675232, "learning_rate": 3.925925925925926e-05, "loss": 0.2922, "step": 3400 }, { "epoch": 2.435714285714286, "grad_norm": 2.372558832168579, "learning_rate": 3.922222222222223e-05, "loss": 0.4581, "step": 3410 }, { "epoch": 2.442857142857143, "grad_norm": 1.3307292461395264, "learning_rate": 3.9185185185185185e-05, "loss": 0.4553, "step": 3420 }, { "epoch": 2.45, "grad_norm": 1.9228068590164185, "learning_rate": 3.914814814814815e-05, "loss": 0.497, "step": 3430 }, { "epoch": 2.4571428571428573, "grad_norm": 1.071590542793274, "learning_rate": 3.9111111111111115e-05, "loss": 0.4532, "step": 3440 }, { "epoch": 2.4642857142857144, "grad_norm": 1.9603391885757446, "learning_rate": 3.9074074074074076e-05, "loss": 0.3808, "step": 3450 }, { "epoch": 2.4714285714285715, "grad_norm": 1.2152074575424194, "learning_rate": 3.903703703703704e-05, "loss": 0.3011, "step": 3460 }, { "epoch": 2.4785714285714286, "grad_norm": 1.532478928565979, "learning_rate": 3.9000000000000006e-05, "loss": 0.3403, "step": 3470 }, { "epoch": 2.4857142857142858, "grad_norm": 1.5086220502853394, "learning_rate": 3.896296296296296e-05, "loss": 0.4835, "step": 3480 }, { "epoch": 2.492857142857143, "grad_norm": 1.0601118803024292, "learning_rate": 3.892592592592593e-05, "loss": 0.4057, "step": 3490 }, { "epoch": 2.5, "grad_norm": 0.7907903790473938, "learning_rate": 3.888888888888889e-05, "loss": 0.3183, "step": 3500 }, { "epoch": 2.507142857142857, "grad_norm": 1.8523814678192139, "learning_rate": 3.885185185185186e-05, "loss": 0.4329, "step": 3510 }, { "epoch": 2.5142857142857142, "grad_norm": 1.9627383947372437, "learning_rate": 3.8814814814814814e-05, "loss": 0.3041, "step": 3520 }, { "epoch": 2.5214285714285714, "grad_norm": 0.6192536354064941, "learning_rate": 3.877777777777778e-05, "loss": 0.3271, "step": 3530 }, { "epoch": 2.5285714285714285, "grad_norm": 1.3901042938232422, "learning_rate": 3.8740740740740744e-05, "loss": 0.2562, "step": 3540 }, { "epoch": 2.5357142857142856, "grad_norm": 1.991752028465271, "learning_rate": 3.8703703703703705e-05, "loss": 0.4283, "step": 3550 }, { "epoch": 2.5428571428571427, "grad_norm": 1.219382882118225, "learning_rate": 3.866666666666667e-05, "loss": 0.4232, "step": 3560 }, { "epoch": 2.55, "grad_norm": 2.72744083404541, "learning_rate": 3.8629629629629635e-05, "loss": 0.3482, "step": 3570 }, { "epoch": 2.557142857142857, "grad_norm": 1.6782621145248413, "learning_rate": 3.85925925925926e-05, "loss": 0.3302, "step": 3580 }, { "epoch": 2.564285714285714, "grad_norm": 1.0238265991210938, "learning_rate": 3.855555555555556e-05, "loss": 0.458, "step": 3590 }, { "epoch": 2.571428571428571, "grad_norm": 2.212013006210327, "learning_rate": 3.851851851851852e-05, "loss": 0.4127, "step": 3600 }, { "epoch": 2.5785714285714287, "grad_norm": 1.5761399269104004, "learning_rate": 3.848148148148149e-05, "loss": 0.402, "step": 3610 }, { "epoch": 2.585714285714286, "grad_norm": 1.2036465406417847, "learning_rate": 3.844444444444444e-05, "loss": 0.5576, "step": 3620 }, { "epoch": 2.592857142857143, "grad_norm": 1.8674002885818481, "learning_rate": 3.840740740740741e-05, "loss": 0.364, "step": 3630 }, { "epoch": 2.6, "grad_norm": 1.466834545135498, "learning_rate": 3.837037037037037e-05, "loss": 0.3523, "step": 3640 }, { "epoch": 2.607142857142857, "grad_norm": 1.57899010181427, "learning_rate": 3.8333333333333334e-05, "loss": 0.4046, "step": 3650 }, { "epoch": 2.6142857142857143, "grad_norm": 0.9730345010757446, "learning_rate": 3.8296296296296296e-05, "loss": 0.3132, "step": 3660 }, { "epoch": 2.6214285714285714, "grad_norm": 1.3017544746398926, "learning_rate": 3.8259259259259264e-05, "loss": 0.3023, "step": 3670 }, { "epoch": 2.6285714285714286, "grad_norm": 1.6368205547332764, "learning_rate": 3.8222222222222226e-05, "loss": 0.4363, "step": 3680 }, { "epoch": 2.6357142857142857, "grad_norm": 1.2852121591567993, "learning_rate": 3.818518518518519e-05, "loss": 0.2896, "step": 3690 }, { "epoch": 2.642857142857143, "grad_norm": 3.6991841793060303, "learning_rate": 3.814814814814815e-05, "loss": 0.353, "step": 3700 }, { "epoch": 2.65, "grad_norm": 2.70285701751709, "learning_rate": 3.811111111111112e-05, "loss": 0.4217, "step": 3710 }, { "epoch": 2.657142857142857, "grad_norm": 1.140811800956726, "learning_rate": 3.807407407407408e-05, "loss": 0.3253, "step": 3720 }, { "epoch": 2.664285714285714, "grad_norm": 1.2905789613723755, "learning_rate": 3.803703703703704e-05, "loss": 0.3051, "step": 3730 }, { "epoch": 2.6714285714285713, "grad_norm": 1.4326887130737305, "learning_rate": 3.8e-05, "loss": 0.3999, "step": 3740 }, { "epoch": 2.678571428571429, "grad_norm": 1.1789475679397583, "learning_rate": 3.7962962962962964e-05, "loss": 0.4631, "step": 3750 }, { "epoch": 2.685714285714286, "grad_norm": 2.0444328784942627, "learning_rate": 3.7925925925925925e-05, "loss": 0.4449, "step": 3760 }, { "epoch": 2.692857142857143, "grad_norm": 2.1025991439819336, "learning_rate": 3.7888888888888894e-05, "loss": 0.3513, "step": 3770 }, { "epoch": 2.7, "grad_norm": 1.8492026329040527, "learning_rate": 3.7851851851851855e-05, "loss": 0.4006, "step": 3780 }, { "epoch": 2.7071428571428573, "grad_norm": 1.3439162969589233, "learning_rate": 3.781481481481482e-05, "loss": 0.2806, "step": 3790 }, { "epoch": 2.7142857142857144, "grad_norm": 1.4200384616851807, "learning_rate": 3.777777777777778e-05, "loss": 0.3759, "step": 3800 }, { "epoch": 2.7214285714285715, "grad_norm": 1.9567861557006836, "learning_rate": 3.774074074074074e-05, "loss": 0.1772, "step": 3810 }, { "epoch": 2.7285714285714286, "grad_norm": 1.3466306924819946, "learning_rate": 3.770370370370371e-05, "loss": 0.399, "step": 3820 }, { "epoch": 2.7357142857142858, "grad_norm": 1.6046024560928345, "learning_rate": 3.766666666666667e-05, "loss": 0.347, "step": 3830 }, { "epoch": 2.742857142857143, "grad_norm": 2.190568447113037, "learning_rate": 3.762962962962963e-05, "loss": 0.3977, "step": 3840 }, { "epoch": 2.75, "grad_norm": 1.7715718746185303, "learning_rate": 3.759259259259259e-05, "loss": 0.4385, "step": 3850 }, { "epoch": 2.757142857142857, "grad_norm": 3.19500994682312, "learning_rate": 3.7555555555555554e-05, "loss": 0.3631, "step": 3860 }, { "epoch": 2.7642857142857142, "grad_norm": 2.2222607135772705, "learning_rate": 3.751851851851852e-05, "loss": 0.3565, "step": 3870 }, { "epoch": 2.7714285714285714, "grad_norm": 1.9959403276443481, "learning_rate": 3.7481481481481484e-05, "loss": 0.3629, "step": 3880 }, { "epoch": 2.7785714285714285, "grad_norm": 1.3207546472549438, "learning_rate": 3.7444444444444446e-05, "loss": 0.2911, "step": 3890 }, { "epoch": 2.7857142857142856, "grad_norm": 2.0290961265563965, "learning_rate": 3.740740740740741e-05, "loss": 0.3072, "step": 3900 }, { "epoch": 2.7928571428571427, "grad_norm": 1.3728725910186768, "learning_rate": 3.737037037037037e-05, "loss": 0.3858, "step": 3910 }, { "epoch": 2.8, "grad_norm": 2.541598320007324, "learning_rate": 3.733333333333334e-05, "loss": 0.3487, "step": 3920 }, { "epoch": 2.807142857142857, "grad_norm": 2.3327584266662598, "learning_rate": 3.72962962962963e-05, "loss": 0.3535, "step": 3930 }, { "epoch": 2.814285714285714, "grad_norm": 2.546766757965088, "learning_rate": 3.725925925925926e-05, "loss": 0.3462, "step": 3940 }, { "epoch": 2.821428571428571, "grad_norm": 2.351959705352783, "learning_rate": 3.722222222222222e-05, "loss": 0.2781, "step": 3950 }, { "epoch": 2.8285714285714287, "grad_norm": 1.9349900484085083, "learning_rate": 3.718518518518519e-05, "loss": 0.2442, "step": 3960 }, { "epoch": 2.835714285714286, "grad_norm": 2.2020022869110107, "learning_rate": 3.714814814814815e-05, "loss": 0.3396, "step": 3970 }, { "epoch": 2.842857142857143, "grad_norm": 1.5161465406417847, "learning_rate": 3.7111111111111113e-05, "loss": 0.3722, "step": 3980 }, { "epoch": 2.85, "grad_norm": 1.7403453588485718, "learning_rate": 3.7074074074074075e-05, "loss": 0.4227, "step": 3990 }, { "epoch": 2.857142857142857, "grad_norm": 1.9142546653747559, "learning_rate": 3.7037037037037037e-05, "loss": 0.3259, "step": 4000 }, { "epoch": 2.857142857142857, "eval_loss": 0.38673722743988037, "eval_rouge1": 0.8974, "eval_rouge2": 0.8331, "eval_rougeL": 0.8942, "eval_runtime": 122.1383, "eval_samples_per_second": 11.462, "eval_steps_per_second": 5.731, "step": 4000 }, { "epoch": 2.8642857142857143, "grad_norm": 1.5975255966186523, "learning_rate": 3.7e-05, "loss": 0.3732, "step": 4010 }, { "epoch": 2.8714285714285714, "grad_norm": 1.4830248355865479, "learning_rate": 3.6962962962962966e-05, "loss": 0.5093, "step": 4020 }, { "epoch": 2.8785714285714286, "grad_norm": 2.504650354385376, "learning_rate": 3.692592592592593e-05, "loss": 0.3302, "step": 4030 }, { "epoch": 2.8857142857142857, "grad_norm": 2.349452495574951, "learning_rate": 3.688888888888889e-05, "loss": 0.3596, "step": 4040 }, { "epoch": 2.892857142857143, "grad_norm": 1.398964762687683, "learning_rate": 3.685185185185185e-05, "loss": 0.3494, "step": 4050 }, { "epoch": 2.9, "grad_norm": 2.212738513946533, "learning_rate": 3.681481481481482e-05, "loss": 0.3691, "step": 4060 }, { "epoch": 2.907142857142857, "grad_norm": 2.20845627784729, "learning_rate": 3.677777777777778e-05, "loss": 0.2974, "step": 4070 }, { "epoch": 2.914285714285714, "grad_norm": 1.2226334810256958, "learning_rate": 3.674074074074074e-05, "loss": 0.3173, "step": 4080 }, { "epoch": 2.9214285714285713, "grad_norm": 2.2203428745269775, "learning_rate": 3.6703703703703704e-05, "loss": 0.4473, "step": 4090 }, { "epoch": 2.928571428571429, "grad_norm": 1.487853765487671, "learning_rate": 3.6666666666666666e-05, "loss": 0.2653, "step": 4100 }, { "epoch": 2.935714285714286, "grad_norm": 1.6347614526748657, "learning_rate": 3.662962962962963e-05, "loss": 0.3563, "step": 4110 }, { "epoch": 2.942857142857143, "grad_norm": 2.2722184658050537, "learning_rate": 3.6592592592592596e-05, "loss": 0.4975, "step": 4120 }, { "epoch": 2.95, "grad_norm": 1.747530460357666, "learning_rate": 3.655555555555556e-05, "loss": 0.2357, "step": 4130 }, { "epoch": 2.9571428571428573, "grad_norm": 1.628596544265747, "learning_rate": 3.651851851851852e-05, "loss": 0.3674, "step": 4140 }, { "epoch": 2.9642857142857144, "grad_norm": 1.0486435890197754, "learning_rate": 3.648148148148148e-05, "loss": 0.3314, "step": 4150 }, { "epoch": 2.9714285714285715, "grad_norm": 2.523879289627075, "learning_rate": 3.644444444444445e-05, "loss": 0.4421, "step": 4160 }, { "epoch": 2.9785714285714286, "grad_norm": 1.4641958475112915, "learning_rate": 3.6407407407407403e-05, "loss": 0.4135, "step": 4170 }, { "epoch": 2.9857142857142858, "grad_norm": 2.672769784927368, "learning_rate": 3.637037037037037e-05, "loss": 0.3527, "step": 4180 }, { "epoch": 2.992857142857143, "grad_norm": 0.5795308351516724, "learning_rate": 3.633333333333333e-05, "loss": 0.2326, "step": 4190 }, { "epoch": 3.0, "grad_norm": 1.873579978942871, "learning_rate": 3.62962962962963e-05, "loss": 0.3679, "step": 4200 }, { "epoch": 3.007142857142857, "grad_norm": 1.7640775442123413, "learning_rate": 3.6259259259259256e-05, "loss": 0.4778, "step": 4210 }, { "epoch": 3.0142857142857142, "grad_norm": 1.9458075761795044, "learning_rate": 3.6222222222222225e-05, "loss": 0.4054, "step": 4220 }, { "epoch": 3.0214285714285714, "grad_norm": 1.1568126678466797, "learning_rate": 3.6185185185185186e-05, "loss": 0.2249, "step": 4230 }, { "epoch": 3.0285714285714285, "grad_norm": 1.3655381202697754, "learning_rate": 3.614814814814815e-05, "loss": 0.3993, "step": 4240 }, { "epoch": 3.0357142857142856, "grad_norm": 2.0403196811676025, "learning_rate": 3.611111111111111e-05, "loss": 0.3366, "step": 4250 }, { "epoch": 3.0428571428571427, "grad_norm": 1.9888697862625122, "learning_rate": 3.607407407407408e-05, "loss": 0.3033, "step": 4260 }, { "epoch": 3.05, "grad_norm": 1.3648616075515747, "learning_rate": 3.603703703703704e-05, "loss": 0.2874, "step": 4270 }, { "epoch": 3.057142857142857, "grad_norm": 2.602613925933838, "learning_rate": 3.6e-05, "loss": 0.4086, "step": 4280 }, { "epoch": 3.064285714285714, "grad_norm": 2.5918185710906982, "learning_rate": 3.596296296296296e-05, "loss": 0.393, "step": 4290 }, { "epoch": 3.0714285714285716, "grad_norm": 1.8195433616638184, "learning_rate": 3.592592592592593e-05, "loss": 0.3361, "step": 4300 }, { "epoch": 3.0785714285714287, "grad_norm": 1.8855136632919312, "learning_rate": 3.5888888888888886e-05, "loss": 0.3205, "step": 4310 }, { "epoch": 3.085714285714286, "grad_norm": 2.7412662506103516, "learning_rate": 3.5851851851851854e-05, "loss": 0.2659, "step": 4320 }, { "epoch": 3.092857142857143, "grad_norm": 1.880436658859253, "learning_rate": 3.5814814814814815e-05, "loss": 0.49, "step": 4330 }, { "epoch": 3.1, "grad_norm": 1.6828274726867676, "learning_rate": 3.577777777777778e-05, "loss": 0.2933, "step": 4340 }, { "epoch": 3.107142857142857, "grad_norm": 1.0517287254333496, "learning_rate": 3.574074074074074e-05, "loss": 0.3563, "step": 4350 }, { "epoch": 3.1142857142857143, "grad_norm": 1.3242154121398926, "learning_rate": 3.570370370370371e-05, "loss": 0.3765, "step": 4360 }, { "epoch": 3.1214285714285714, "grad_norm": 2.0899312496185303, "learning_rate": 3.566666666666667e-05, "loss": 0.3664, "step": 4370 }, { "epoch": 3.1285714285714286, "grad_norm": 2.0286014080047607, "learning_rate": 3.562962962962963e-05, "loss": 0.2622, "step": 4380 }, { "epoch": 3.1357142857142857, "grad_norm": 2.5074400901794434, "learning_rate": 3.559259259259259e-05, "loss": 0.321, "step": 4390 }, { "epoch": 3.142857142857143, "grad_norm": 1.4080287218093872, "learning_rate": 3.555555555555556e-05, "loss": 0.4035, "step": 4400 }, { "epoch": 3.15, "grad_norm": 1.923890471458435, "learning_rate": 3.5518518518518515e-05, "loss": 0.2775, "step": 4410 }, { "epoch": 3.157142857142857, "grad_norm": 0.806591272354126, "learning_rate": 3.548148148148148e-05, "loss": 0.3149, "step": 4420 }, { "epoch": 3.164285714285714, "grad_norm": 2.197736978530884, "learning_rate": 3.5444444444444445e-05, "loss": 0.4368, "step": 4430 }, { "epoch": 3.1714285714285713, "grad_norm": 1.6943881511688232, "learning_rate": 3.540740740740741e-05, "loss": 0.2793, "step": 4440 }, { "epoch": 3.1785714285714284, "grad_norm": 2.5460283756256104, "learning_rate": 3.537037037037037e-05, "loss": 0.4057, "step": 4450 }, { "epoch": 3.185714285714286, "grad_norm": 1.579908013343811, "learning_rate": 3.5333333333333336e-05, "loss": 0.3016, "step": 4460 }, { "epoch": 3.192857142857143, "grad_norm": 1.9137247800827026, "learning_rate": 3.52962962962963e-05, "loss": 0.3437, "step": 4470 }, { "epoch": 3.2, "grad_norm": 2.510328769683838, "learning_rate": 3.525925925925926e-05, "loss": 0.585, "step": 4480 }, { "epoch": 3.2071428571428573, "grad_norm": 0.9775506854057312, "learning_rate": 3.522222222222222e-05, "loss": 0.2651, "step": 4490 }, { "epoch": 3.2142857142857144, "grad_norm": 1.7614684104919434, "learning_rate": 3.518518518518519e-05, "loss": 0.3089, "step": 4500 }, { "epoch": 3.2214285714285715, "grad_norm": 1.9103621244430542, "learning_rate": 3.514814814814815e-05, "loss": 0.342, "step": 4510 }, { "epoch": 3.2285714285714286, "grad_norm": 1.4587639570236206, "learning_rate": 3.511111111111111e-05, "loss": 0.2592, "step": 4520 }, { "epoch": 3.2357142857142858, "grad_norm": 1.3419288396835327, "learning_rate": 3.5074074074074074e-05, "loss": 0.4185, "step": 4530 }, { "epoch": 3.242857142857143, "grad_norm": 1.6199047565460205, "learning_rate": 3.503703703703704e-05, "loss": 0.256, "step": 4540 }, { "epoch": 3.25, "grad_norm": 1.230350136756897, "learning_rate": 3.5e-05, "loss": 0.3304, "step": 4550 }, { "epoch": 3.257142857142857, "grad_norm": 3.087888240814209, "learning_rate": 3.4962962962962965e-05, "loss": 0.3351, "step": 4560 }, { "epoch": 3.2642857142857142, "grad_norm": 1.4498260021209717, "learning_rate": 3.492592592592593e-05, "loss": 0.2753, "step": 4570 }, { "epoch": 3.2714285714285714, "grad_norm": 1.1032336950302124, "learning_rate": 3.4888888888888895e-05, "loss": 0.3709, "step": 4580 }, { "epoch": 3.2785714285714285, "grad_norm": 1.5177497863769531, "learning_rate": 3.485185185185185e-05, "loss": 0.276, "step": 4590 }, { "epoch": 3.2857142857142856, "grad_norm": 1.2596136331558228, "learning_rate": 3.481481481481482e-05, "loss": 0.3482, "step": 4600 }, { "epoch": 3.2928571428571427, "grad_norm": 1.9895663261413574, "learning_rate": 3.477777777777778e-05, "loss": 0.3738, "step": 4610 }, { "epoch": 3.3, "grad_norm": 1.2930881977081299, "learning_rate": 3.474074074074074e-05, "loss": 0.4263, "step": 4620 }, { "epoch": 3.307142857142857, "grad_norm": 2.276385545730591, "learning_rate": 3.47037037037037e-05, "loss": 0.2267, "step": 4630 }, { "epoch": 3.314285714285714, "grad_norm": 0.9766007661819458, "learning_rate": 3.466666666666667e-05, "loss": 0.2217, "step": 4640 }, { "epoch": 3.3214285714285716, "grad_norm": 1.5184674263000488, "learning_rate": 3.4629629629629626e-05, "loss": 0.2788, "step": 4650 }, { "epoch": 3.3285714285714287, "grad_norm": 1.5145732164382935, "learning_rate": 3.4592592592592594e-05, "loss": 0.3291, "step": 4660 }, { "epoch": 3.335714285714286, "grad_norm": 1.4273874759674072, "learning_rate": 3.4555555555555556e-05, "loss": 0.2854, "step": 4670 }, { "epoch": 3.342857142857143, "grad_norm": 2.783701181411743, "learning_rate": 3.4518518518518524e-05, "loss": 0.3518, "step": 4680 }, { "epoch": 3.35, "grad_norm": 1.3359688520431519, "learning_rate": 3.448148148148148e-05, "loss": 0.2239, "step": 4690 }, { "epoch": 3.357142857142857, "grad_norm": 2.246824264526367, "learning_rate": 3.444444444444445e-05, "loss": 0.3206, "step": 4700 }, { "epoch": 3.3642857142857143, "grad_norm": 1.7839916944503784, "learning_rate": 3.440740740740741e-05, "loss": 0.3189, "step": 4710 }, { "epoch": 3.3714285714285714, "grad_norm": 1.0196881294250488, "learning_rate": 3.437037037037037e-05, "loss": 0.2318, "step": 4720 }, { "epoch": 3.3785714285714286, "grad_norm": 2.228317975997925, "learning_rate": 3.433333333333333e-05, "loss": 0.4033, "step": 4730 }, { "epoch": 3.3857142857142857, "grad_norm": 2.0231473445892334, "learning_rate": 3.42962962962963e-05, "loss": 0.3854, "step": 4740 }, { "epoch": 3.392857142857143, "grad_norm": 2.074925422668457, "learning_rate": 3.425925925925926e-05, "loss": 0.3778, "step": 4750 }, { "epoch": 3.4, "grad_norm": 1.2508392333984375, "learning_rate": 3.4222222222222224e-05, "loss": 0.3299, "step": 4760 }, { "epoch": 3.407142857142857, "grad_norm": 1.0920076370239258, "learning_rate": 3.4185185185185185e-05, "loss": 0.3798, "step": 4770 }, { "epoch": 3.414285714285714, "grad_norm": 1.8113828897476196, "learning_rate": 3.4148148148148153e-05, "loss": 0.2903, "step": 4780 }, { "epoch": 3.4214285714285713, "grad_norm": 1.6218737363815308, "learning_rate": 3.411111111111111e-05, "loss": 0.2593, "step": 4790 }, { "epoch": 3.4285714285714284, "grad_norm": 1.0635234117507935, "learning_rate": 3.4074074074074077e-05, "loss": 0.4388, "step": 4800 }, { "epoch": 3.435714285714286, "grad_norm": 2.585700273513794, "learning_rate": 3.403703703703704e-05, "loss": 0.3368, "step": 4810 }, { "epoch": 3.442857142857143, "grad_norm": 1.0704694986343384, "learning_rate": 3.4000000000000007e-05, "loss": 0.2196, "step": 4820 }, { "epoch": 3.45, "grad_norm": 1.3177589178085327, "learning_rate": 3.396296296296296e-05, "loss": 0.3104, "step": 4830 }, { "epoch": 3.4571428571428573, "grad_norm": 1.834241271018982, "learning_rate": 3.392592592592593e-05, "loss": 0.3413, "step": 4840 }, { "epoch": 3.4642857142857144, "grad_norm": 1.8859339952468872, "learning_rate": 3.388888888888889e-05, "loss": 0.2593, "step": 4850 }, { "epoch": 3.4714285714285715, "grad_norm": 1.452728271484375, "learning_rate": 3.385185185185185e-05, "loss": 0.3029, "step": 4860 }, { "epoch": 3.4785714285714286, "grad_norm": 2.170774221420288, "learning_rate": 3.3814814814814814e-05, "loss": 0.3372, "step": 4870 }, { "epoch": 3.4857142857142858, "grad_norm": 1.8695834875106812, "learning_rate": 3.377777777777778e-05, "loss": 0.3428, "step": 4880 }, { "epoch": 3.492857142857143, "grad_norm": 1.74647855758667, "learning_rate": 3.3740740740740744e-05, "loss": 0.3351, "step": 4890 }, { "epoch": 3.5, "grad_norm": 2.3349127769470215, "learning_rate": 3.3703703703703706e-05, "loss": 0.2733, "step": 4900 }, { "epoch": 3.507142857142857, "grad_norm": 2.73463773727417, "learning_rate": 3.366666666666667e-05, "loss": 0.2979, "step": 4910 }, { "epoch": 3.5142857142857142, "grad_norm": 1.3546210527420044, "learning_rate": 3.3629629629629636e-05, "loss": 0.3521, "step": 4920 }, { "epoch": 3.5214285714285714, "grad_norm": 1.617336630821228, "learning_rate": 3.359259259259259e-05, "loss": 0.2758, "step": 4930 }, { "epoch": 3.5285714285714285, "grad_norm": 2.998967409133911, "learning_rate": 3.355555555555556e-05, "loss": 0.4193, "step": 4940 }, { "epoch": 3.5357142857142856, "grad_norm": 1.8004390001296997, "learning_rate": 3.351851851851852e-05, "loss": 0.3936, "step": 4950 }, { "epoch": 3.5428571428571427, "grad_norm": 1.4228971004486084, "learning_rate": 3.348148148148148e-05, "loss": 0.3563, "step": 4960 }, { "epoch": 3.55, "grad_norm": 1.5617480278015137, "learning_rate": 3.3444444444444443e-05, "loss": 0.2492, "step": 4970 }, { "epoch": 3.557142857142857, "grad_norm": 1.3880919218063354, "learning_rate": 3.340740740740741e-05, "loss": 0.1791, "step": 4980 }, { "epoch": 3.564285714285714, "grad_norm": 2.3505630493164062, "learning_rate": 3.337037037037037e-05, "loss": 0.4009, "step": 4990 }, { "epoch": 3.571428571428571, "grad_norm": 0.9086794853210449, "learning_rate": 3.3333333333333335e-05, "loss": 0.2826, "step": 5000 }, { "epoch": 3.571428571428571, "eval_loss": 0.3789908289909363, "eval_rouge1": 0.8999, "eval_rouge2": 0.8372, "eval_rougeL": 0.8969, "eval_runtime": 122.23, "eval_samples_per_second": 11.454, "eval_steps_per_second": 5.727, "step": 5000 }, { "epoch": 3.5785714285714287, "grad_norm": 1.0208678245544434, "learning_rate": 3.3296296296296296e-05, "loss": 0.295, "step": 5010 }, { "epoch": 3.585714285714286, "grad_norm": 3.03141713142395, "learning_rate": 3.3259259259259265e-05, "loss": 0.3813, "step": 5020 }, { "epoch": 3.592857142857143, "grad_norm": 1.7845333814620972, "learning_rate": 3.322222222222222e-05, "loss": 0.2526, "step": 5030 }, { "epoch": 3.6, "grad_norm": 4.314096450805664, "learning_rate": 3.318518518518519e-05, "loss": 0.3498, "step": 5040 }, { "epoch": 3.607142857142857, "grad_norm": 1.5270274877548218, "learning_rate": 3.314814814814815e-05, "loss": 0.3204, "step": 5050 }, { "epoch": 3.6142857142857143, "grad_norm": 2.036738157272339, "learning_rate": 3.311111111111112e-05, "loss": 0.3416, "step": 5060 }, { "epoch": 3.6214285714285714, "grad_norm": 2.2504570484161377, "learning_rate": 3.307407407407407e-05, "loss": 0.3781, "step": 5070 }, { "epoch": 3.6285714285714286, "grad_norm": 1.749518632888794, "learning_rate": 3.303703703703704e-05, "loss": 0.2299, "step": 5080 }, { "epoch": 3.6357142857142857, "grad_norm": 2.1878907680511475, "learning_rate": 3.3e-05, "loss": 0.3692, "step": 5090 }, { "epoch": 3.642857142857143, "grad_norm": 1.829394817352295, "learning_rate": 3.2962962962962964e-05, "loss": 0.3095, "step": 5100 }, { "epoch": 3.65, "grad_norm": 2.5994794368743896, "learning_rate": 3.2925925925925926e-05, "loss": 0.431, "step": 5110 }, { "epoch": 3.657142857142857, "grad_norm": 1.2319742441177368, "learning_rate": 3.2888888888888894e-05, "loss": 0.336, "step": 5120 }, { "epoch": 3.664285714285714, "grad_norm": 2.169063091278076, "learning_rate": 3.2851851851851856e-05, "loss": 0.293, "step": 5130 }, { "epoch": 3.6714285714285713, "grad_norm": 1.7120137214660645, "learning_rate": 3.281481481481482e-05, "loss": 0.3439, "step": 5140 }, { "epoch": 3.678571428571429, "grad_norm": 1.5415689945220947, "learning_rate": 3.277777777777778e-05, "loss": 0.3912, "step": 5150 }, { "epoch": 3.685714285714286, "grad_norm": 2.2880282402038574, "learning_rate": 3.274074074074075e-05, "loss": 0.2352, "step": 5160 }, { "epoch": 3.692857142857143, "grad_norm": 1.7133980989456177, "learning_rate": 3.27037037037037e-05, "loss": 0.5397, "step": 5170 }, { "epoch": 3.7, "grad_norm": 1.9661128520965576, "learning_rate": 3.266666666666667e-05, "loss": 0.4496, "step": 5180 }, { "epoch": 3.7071428571428573, "grad_norm": 1.444551944732666, "learning_rate": 3.262962962962963e-05, "loss": 0.3201, "step": 5190 }, { "epoch": 3.7142857142857144, "grad_norm": 1.7919954061508179, "learning_rate": 3.25925925925926e-05, "loss": 0.3721, "step": 5200 }, { "epoch": 3.7214285714285715, "grad_norm": 2.4862735271453857, "learning_rate": 3.2555555555555555e-05, "loss": 0.2511, "step": 5210 }, { "epoch": 3.7285714285714286, "grad_norm": 1.0694047212600708, "learning_rate": 3.251851851851852e-05, "loss": 0.1418, "step": 5220 }, { "epoch": 3.7357142857142858, "grad_norm": 2.4438931941986084, "learning_rate": 3.2481481481481485e-05, "loss": 0.2473, "step": 5230 }, { "epoch": 3.742857142857143, "grad_norm": 1.9673523902893066, "learning_rate": 3.2444444444444446e-05, "loss": 0.3251, "step": 5240 }, { "epoch": 3.75, "grad_norm": 2.5299620628356934, "learning_rate": 3.240740740740741e-05, "loss": 0.3862, "step": 5250 }, { "epoch": 3.757142857142857, "grad_norm": 1.1709238290786743, "learning_rate": 3.2370370370370376e-05, "loss": 0.3156, "step": 5260 }, { "epoch": 3.7642857142857142, "grad_norm": 1.4275505542755127, "learning_rate": 3.233333333333333e-05, "loss": 0.3091, "step": 5270 }, { "epoch": 3.7714285714285714, "grad_norm": 1.5278127193450928, "learning_rate": 3.22962962962963e-05, "loss": 0.3768, "step": 5280 }, { "epoch": 3.7785714285714285, "grad_norm": 2.870471239089966, "learning_rate": 3.225925925925926e-05, "loss": 0.4264, "step": 5290 }, { "epoch": 3.7857142857142856, "grad_norm": 1.4797722101211548, "learning_rate": 3.222222222222223e-05, "loss": 0.3598, "step": 5300 }, { "epoch": 3.7928571428571427, "grad_norm": 1.6350576877593994, "learning_rate": 3.2185185185185184e-05, "loss": 0.2125, "step": 5310 }, { "epoch": 3.8, "grad_norm": 1.8790502548217773, "learning_rate": 3.214814814814815e-05, "loss": 0.2698, "step": 5320 }, { "epoch": 3.807142857142857, "grad_norm": 1.3930083513259888, "learning_rate": 3.2111111111111114e-05, "loss": 0.3867, "step": 5330 }, { "epoch": 3.814285714285714, "grad_norm": 1.7605199813842773, "learning_rate": 3.2074074074074075e-05, "loss": 0.3594, "step": 5340 }, { "epoch": 3.821428571428571, "grad_norm": 2.3873794078826904, "learning_rate": 3.203703703703704e-05, "loss": 0.372, "step": 5350 }, { "epoch": 3.8285714285714287, "grad_norm": 3.087186098098755, "learning_rate": 3.2000000000000005e-05, "loss": 0.3964, "step": 5360 }, { "epoch": 3.835714285714286, "grad_norm": 1.6758490800857544, "learning_rate": 3.196296296296297e-05, "loss": 0.3274, "step": 5370 }, { "epoch": 3.842857142857143, "grad_norm": 1.184205412864685, "learning_rate": 3.192592592592593e-05, "loss": 0.277, "step": 5380 }, { "epoch": 3.85, "grad_norm": 2.1282460689544678, "learning_rate": 3.188888888888889e-05, "loss": 0.3283, "step": 5390 }, { "epoch": 3.857142857142857, "grad_norm": 1.9244283437728882, "learning_rate": 3.185185185185185e-05, "loss": 0.2732, "step": 5400 }, { "epoch": 3.8642857142857143, "grad_norm": 1.2328709363937378, "learning_rate": 3.181481481481481e-05, "loss": 0.2968, "step": 5410 }, { "epoch": 3.8714285714285714, "grad_norm": 2.5490071773529053, "learning_rate": 3.177777777777778e-05, "loss": 0.3258, "step": 5420 }, { "epoch": 3.8785714285714286, "grad_norm": 1.7774560451507568, "learning_rate": 3.174074074074074e-05, "loss": 0.3274, "step": 5430 }, { "epoch": 3.8857142857142857, "grad_norm": 0.9900962710380554, "learning_rate": 3.1703703703703705e-05, "loss": 0.3361, "step": 5440 }, { "epoch": 3.892857142857143, "grad_norm": 1.2809844017028809, "learning_rate": 3.1666666666666666e-05, "loss": 0.3684, "step": 5450 }, { "epoch": 3.9, "grad_norm": 2.2611334323883057, "learning_rate": 3.1629629629629634e-05, "loss": 0.326, "step": 5460 }, { "epoch": 3.907142857142857, "grad_norm": 2.49057936668396, "learning_rate": 3.1592592592592596e-05, "loss": 0.412, "step": 5470 }, { "epoch": 3.914285714285714, "grad_norm": 1.6978118419647217, "learning_rate": 3.155555555555556e-05, "loss": 0.2177, "step": 5480 }, { "epoch": 3.9214285714285713, "grad_norm": 1.847128987312317, "learning_rate": 3.151851851851852e-05, "loss": 0.3419, "step": 5490 }, { "epoch": 3.928571428571429, "grad_norm": 1.6806657314300537, "learning_rate": 3.148148148148148e-05, "loss": 0.1479, "step": 5500 }, { "epoch": 3.935714285714286, "grad_norm": 2.144227981567383, "learning_rate": 3.144444444444445e-05, "loss": 0.3098, "step": 5510 }, { "epoch": 3.942857142857143, "grad_norm": 1.2945857048034668, "learning_rate": 3.140740740740741e-05, "loss": 0.269, "step": 5520 }, { "epoch": 3.95, "grad_norm": 1.8362900018692017, "learning_rate": 3.137037037037037e-05, "loss": 0.3065, "step": 5530 }, { "epoch": 3.9571428571428573, "grad_norm": 1.9124987125396729, "learning_rate": 3.1333333333333334e-05, "loss": 0.2593, "step": 5540 }, { "epoch": 3.9642857142857144, "grad_norm": 1.726523995399475, "learning_rate": 3.1296296296296295e-05, "loss": 0.3112, "step": 5550 }, { "epoch": 3.9714285714285715, "grad_norm": 1.5914565324783325, "learning_rate": 3.1259259259259264e-05, "loss": 0.263, "step": 5560 }, { "epoch": 3.9785714285714286, "grad_norm": 1.3533891439437866, "learning_rate": 3.1222222222222225e-05, "loss": 0.3852, "step": 5570 }, { "epoch": 3.9857142857142858, "grad_norm": 2.1844253540039062, "learning_rate": 3.118518518518519e-05, "loss": 0.3761, "step": 5580 }, { "epoch": 3.992857142857143, "grad_norm": 2.494920492172241, "learning_rate": 3.114814814814815e-05, "loss": 0.3882, "step": 5590 }, { "epoch": 4.0, "grad_norm": 0.9914864897727966, "learning_rate": 3.111111111111111e-05, "loss": 0.3518, "step": 5600 }, { "epoch": 4.007142857142857, "grad_norm": 1.6416865587234497, "learning_rate": 3.107407407407408e-05, "loss": 0.2688, "step": 5610 }, { "epoch": 4.014285714285714, "grad_norm": 1.934449315071106, "learning_rate": 3.103703703703704e-05, "loss": 0.2385, "step": 5620 }, { "epoch": 4.021428571428571, "grad_norm": 1.7663776874542236, "learning_rate": 3.1e-05, "loss": 0.3147, "step": 5630 }, { "epoch": 4.0285714285714285, "grad_norm": 1.8457096815109253, "learning_rate": 3.096296296296296e-05, "loss": 0.2922, "step": 5640 }, { "epoch": 4.035714285714286, "grad_norm": 1.133711338043213, "learning_rate": 3.0925925925925924e-05, "loss": 0.2291, "step": 5650 }, { "epoch": 4.042857142857143, "grad_norm": 1.794723629951477, "learning_rate": 3.088888888888889e-05, "loss": 0.3204, "step": 5660 }, { "epoch": 4.05, "grad_norm": 1.966180443763733, "learning_rate": 3.0851851851851854e-05, "loss": 0.2757, "step": 5670 }, { "epoch": 4.057142857142857, "grad_norm": 0.789313018321991, "learning_rate": 3.0814814814814816e-05, "loss": 0.3106, "step": 5680 }, { "epoch": 4.064285714285714, "grad_norm": 1.4390606880187988, "learning_rate": 3.077777777777778e-05, "loss": 0.192, "step": 5690 }, { "epoch": 4.071428571428571, "grad_norm": 1.8229310512542725, "learning_rate": 3.074074074074074e-05, "loss": 0.3802, "step": 5700 }, { "epoch": 4.078571428571428, "grad_norm": 1.3065968751907349, "learning_rate": 3.070370370370371e-05, "loss": 0.2891, "step": 5710 }, { "epoch": 4.085714285714285, "grad_norm": 1.5169206857681274, "learning_rate": 3.066666666666667e-05, "loss": 0.2818, "step": 5720 }, { "epoch": 4.0928571428571425, "grad_norm": 1.8811321258544922, "learning_rate": 3.062962962962963e-05, "loss": 0.1845, "step": 5730 }, { "epoch": 4.1, "grad_norm": 2.2235770225524902, "learning_rate": 3.059259259259259e-05, "loss": 0.3671, "step": 5740 }, { "epoch": 4.107142857142857, "grad_norm": 1.5675430297851562, "learning_rate": 3.055555555555556e-05, "loss": 0.3588, "step": 5750 }, { "epoch": 4.114285714285714, "grad_norm": 1.3254741430282593, "learning_rate": 3.0518518518518515e-05, "loss": 0.3641, "step": 5760 }, { "epoch": 4.121428571428571, "grad_norm": 2.601593017578125, "learning_rate": 3.0481481481481484e-05, "loss": 0.2704, "step": 5770 }, { "epoch": 4.128571428571428, "grad_norm": 2.3631677627563477, "learning_rate": 3.044444444444445e-05, "loss": 0.2528, "step": 5780 }, { "epoch": 4.135714285714286, "grad_norm": 1.4800968170166016, "learning_rate": 3.0407407407407407e-05, "loss": 0.263, "step": 5790 }, { "epoch": 4.142857142857143, "grad_norm": 1.6989574432373047, "learning_rate": 3.037037037037037e-05, "loss": 0.2465, "step": 5800 }, { "epoch": 4.15, "grad_norm": 1.595765471458435, "learning_rate": 3.0333333333333337e-05, "loss": 0.3223, "step": 5810 }, { "epoch": 4.1571428571428575, "grad_norm": 1.8895677328109741, "learning_rate": 3.02962962962963e-05, "loss": 0.3181, "step": 5820 }, { "epoch": 4.164285714285715, "grad_norm": 1.147406816482544, "learning_rate": 3.025925925925926e-05, "loss": 0.2275, "step": 5830 }, { "epoch": 4.171428571428572, "grad_norm": 3.310147523880005, "learning_rate": 3.0222222222222225e-05, "loss": 0.3615, "step": 5840 }, { "epoch": 4.178571428571429, "grad_norm": 1.6138179302215576, "learning_rate": 3.018518518518519e-05, "loss": 0.3492, "step": 5850 }, { "epoch": 4.185714285714286, "grad_norm": 1.9912358522415161, "learning_rate": 3.0148148148148148e-05, "loss": 0.3358, "step": 5860 }, { "epoch": 4.192857142857143, "grad_norm": 2.2521820068359375, "learning_rate": 3.0111111111111113e-05, "loss": 0.2773, "step": 5870 }, { "epoch": 4.2, "grad_norm": 1.804829478263855, "learning_rate": 3.0074074074074078e-05, "loss": 0.3052, "step": 5880 }, { "epoch": 4.207142857142857, "grad_norm": 1.0897246599197388, "learning_rate": 3.0037037037037036e-05, "loss": 0.3822, "step": 5890 }, { "epoch": 4.214285714285714, "grad_norm": 1.337428331375122, "learning_rate": 3e-05, "loss": 0.3091, "step": 5900 }, { "epoch": 4.2214285714285715, "grad_norm": 1.1409244537353516, "learning_rate": 2.9962962962962966e-05, "loss": 0.2002, "step": 5910 }, { "epoch": 4.228571428571429, "grad_norm": 0.9190034866333008, "learning_rate": 2.992592592592593e-05, "loss": 0.3029, "step": 5920 }, { "epoch": 4.235714285714286, "grad_norm": 1.7410012483596802, "learning_rate": 2.988888888888889e-05, "loss": 0.2361, "step": 5930 }, { "epoch": 4.242857142857143, "grad_norm": 2.308295965194702, "learning_rate": 2.9851851851851854e-05, "loss": 0.3654, "step": 5940 }, { "epoch": 4.25, "grad_norm": 1.299177646636963, "learning_rate": 2.981481481481482e-05, "loss": 0.2346, "step": 5950 }, { "epoch": 4.257142857142857, "grad_norm": 1.0352667570114136, "learning_rate": 2.9777777777777777e-05, "loss": 0.2331, "step": 5960 }, { "epoch": 4.264285714285714, "grad_norm": 1.0682189464569092, "learning_rate": 2.9740740740740742e-05, "loss": 0.2456, "step": 5970 }, { "epoch": 4.271428571428571, "grad_norm": 1.536718487739563, "learning_rate": 2.9703703703703707e-05, "loss": 0.1908, "step": 5980 }, { "epoch": 4.2785714285714285, "grad_norm": 2.0448334217071533, "learning_rate": 2.9666666666666672e-05, "loss": 0.3399, "step": 5990 }, { "epoch": 4.285714285714286, "grad_norm": 2.205901622772217, "learning_rate": 2.962962962962963e-05, "loss": 0.1913, "step": 6000 }, { "epoch": 4.285714285714286, "eval_loss": 0.36299219727516174, "eval_rouge1": 0.9025, "eval_rouge2": 0.8402, "eval_rougeL": 0.8994, "eval_runtime": 122.2765, "eval_samples_per_second": 11.449, "eval_steps_per_second": 5.725, "step": 6000 }, { "epoch": 4.292857142857143, "grad_norm": 1.455069661140442, "learning_rate": 2.9592592592592595e-05, "loss": 0.2236, "step": 6010 }, { "epoch": 4.3, "grad_norm": 1.6218276023864746, "learning_rate": 2.955555555555556e-05, "loss": 0.2166, "step": 6020 }, { "epoch": 4.307142857142857, "grad_norm": 1.4643278121948242, "learning_rate": 2.9518518518518518e-05, "loss": 0.2543, "step": 6030 }, { "epoch": 4.314285714285714, "grad_norm": 1.9875061511993408, "learning_rate": 2.9481481481481483e-05, "loss": 0.275, "step": 6040 }, { "epoch": 4.321428571428571, "grad_norm": 2.003077268600464, "learning_rate": 2.9444444444444448e-05, "loss": 0.3431, "step": 6050 }, { "epoch": 4.328571428571428, "grad_norm": 1.332705020904541, "learning_rate": 2.9407407407407413e-05, "loss": 0.2546, "step": 6060 }, { "epoch": 4.335714285714285, "grad_norm": 1.9161280393600464, "learning_rate": 2.937037037037037e-05, "loss": 0.2909, "step": 6070 }, { "epoch": 4.3428571428571425, "grad_norm": 1.509238839149475, "learning_rate": 2.9333333333333336e-05, "loss": 0.253, "step": 6080 }, { "epoch": 4.35, "grad_norm": 2.238847255706787, "learning_rate": 2.92962962962963e-05, "loss": 0.2717, "step": 6090 }, { "epoch": 4.357142857142857, "grad_norm": 1.9578133821487427, "learning_rate": 2.925925925925926e-05, "loss": 0.3407, "step": 6100 }, { "epoch": 4.364285714285714, "grad_norm": 1.805828332901001, "learning_rate": 2.9222222222222224e-05, "loss": 0.1811, "step": 6110 }, { "epoch": 4.371428571428572, "grad_norm": 2.9014134407043457, "learning_rate": 2.918518518518519e-05, "loss": 0.3934, "step": 6120 }, { "epoch": 4.378571428571428, "grad_norm": 1.9857615232467651, "learning_rate": 2.914814814814815e-05, "loss": 0.2026, "step": 6130 }, { "epoch": 4.385714285714286, "grad_norm": 2.3884503841400146, "learning_rate": 2.9111111111111112e-05, "loss": 0.2787, "step": 6140 }, { "epoch": 4.392857142857143, "grad_norm": 2.298215866088867, "learning_rate": 2.9074074074074077e-05, "loss": 0.2765, "step": 6150 }, { "epoch": 4.4, "grad_norm": 2.1733076572418213, "learning_rate": 2.9037037037037042e-05, "loss": 0.3975, "step": 6160 }, { "epoch": 4.4071428571428575, "grad_norm": 3.3003320693969727, "learning_rate": 2.9e-05, "loss": 0.4152, "step": 6170 }, { "epoch": 4.414285714285715, "grad_norm": 1.5066970586776733, "learning_rate": 2.8962962962962965e-05, "loss": 0.345, "step": 6180 }, { "epoch": 4.421428571428572, "grad_norm": 2.134096145629883, "learning_rate": 2.892592592592593e-05, "loss": 0.3154, "step": 6190 }, { "epoch": 4.428571428571429, "grad_norm": 1.8306220769882202, "learning_rate": 2.8888888888888888e-05, "loss": 0.2908, "step": 6200 }, { "epoch": 4.435714285714286, "grad_norm": 1.4300037622451782, "learning_rate": 2.8851851851851853e-05, "loss": 0.342, "step": 6210 }, { "epoch": 4.442857142857143, "grad_norm": 1.6552793979644775, "learning_rate": 2.8814814814814818e-05, "loss": 0.2856, "step": 6220 }, { "epoch": 4.45, "grad_norm": 2.188889265060425, "learning_rate": 2.877777777777778e-05, "loss": 0.25, "step": 6230 }, { "epoch": 4.457142857142857, "grad_norm": 1.3003034591674805, "learning_rate": 2.874074074074074e-05, "loss": 0.2995, "step": 6240 }, { "epoch": 4.464285714285714, "grad_norm": 1.834549903869629, "learning_rate": 2.8703703703703706e-05, "loss": 0.3726, "step": 6250 }, { "epoch": 4.4714285714285715, "grad_norm": 1.9426199197769165, "learning_rate": 2.8666666666666668e-05, "loss": 0.2142, "step": 6260 }, { "epoch": 4.478571428571429, "grad_norm": 1.5088646411895752, "learning_rate": 2.862962962962963e-05, "loss": 0.3584, "step": 6270 }, { "epoch": 4.485714285714286, "grad_norm": 1.9997400045394897, "learning_rate": 2.8592592592592594e-05, "loss": 0.2402, "step": 6280 }, { "epoch": 4.492857142857143, "grad_norm": 1.3831549882888794, "learning_rate": 2.855555555555556e-05, "loss": 0.312, "step": 6290 }, { "epoch": 4.5, "grad_norm": 2.013425588607788, "learning_rate": 2.851851851851852e-05, "loss": 0.2728, "step": 6300 }, { "epoch": 4.507142857142857, "grad_norm": 1.1200778484344482, "learning_rate": 2.8481481481481482e-05, "loss": 0.3909, "step": 6310 }, { "epoch": 4.514285714285714, "grad_norm": 0.8029781579971313, "learning_rate": 2.8444444444444447e-05, "loss": 0.3491, "step": 6320 }, { "epoch": 4.521428571428571, "grad_norm": 1.4999722242355347, "learning_rate": 2.840740740740741e-05, "loss": 0.2583, "step": 6330 }, { "epoch": 4.5285714285714285, "grad_norm": 1.8954156637191772, "learning_rate": 2.837037037037037e-05, "loss": 0.3971, "step": 6340 }, { "epoch": 4.535714285714286, "grad_norm": 1.5697578191757202, "learning_rate": 2.8333333333333335e-05, "loss": 0.3222, "step": 6350 }, { "epoch": 4.542857142857143, "grad_norm": 0.9937646389007568, "learning_rate": 2.8296296296296297e-05, "loss": 0.3673, "step": 6360 }, { "epoch": 4.55, "grad_norm": 1.935511589050293, "learning_rate": 2.8259259259259262e-05, "loss": 0.2385, "step": 6370 }, { "epoch": 4.557142857142857, "grad_norm": 1.8132340908050537, "learning_rate": 2.8222222222222223e-05, "loss": 0.226, "step": 6380 }, { "epoch": 4.564285714285714, "grad_norm": 0.8551497459411621, "learning_rate": 2.8185185185185185e-05, "loss": 0.3874, "step": 6390 }, { "epoch": 4.571428571428571, "grad_norm": 2.0115785598754883, "learning_rate": 2.814814814814815e-05, "loss": 0.2328, "step": 6400 }, { "epoch": 4.578571428571428, "grad_norm": 1.0582072734832764, "learning_rate": 2.811111111111111e-05, "loss": 0.3523, "step": 6410 }, { "epoch": 4.585714285714285, "grad_norm": 1.3484958410263062, "learning_rate": 2.8074074074074076e-05, "loss": 0.2867, "step": 6420 }, { "epoch": 4.5928571428571425, "grad_norm": 1.4483561515808105, "learning_rate": 2.8037037037037038e-05, "loss": 0.2623, "step": 6430 }, { "epoch": 4.6, "grad_norm": 2.2348268032073975, "learning_rate": 2.8000000000000003e-05, "loss": 0.3953, "step": 6440 }, { "epoch": 4.607142857142857, "grad_norm": 2.654326915740967, "learning_rate": 2.7962962962962965e-05, "loss": 0.3516, "step": 6450 }, { "epoch": 4.614285714285714, "grad_norm": 0.8564252257347107, "learning_rate": 2.7925925925925926e-05, "loss": 0.2497, "step": 6460 }, { "epoch": 4.621428571428572, "grad_norm": 2.7823233604431152, "learning_rate": 2.788888888888889e-05, "loss": 0.3975, "step": 6470 }, { "epoch": 4.628571428571428, "grad_norm": 1.0915263891220093, "learning_rate": 2.7851851851851853e-05, "loss": 0.2574, "step": 6480 }, { "epoch": 4.635714285714286, "grad_norm": 1.0459774732589722, "learning_rate": 2.7814814814814814e-05, "loss": 0.3426, "step": 6490 }, { "epoch": 4.642857142857143, "grad_norm": 3.1720130443573, "learning_rate": 2.777777777777778e-05, "loss": 0.3155, "step": 6500 }, { "epoch": 4.65, "grad_norm": 1.499185562133789, "learning_rate": 2.774074074074074e-05, "loss": 0.4515, "step": 6510 }, { "epoch": 4.6571428571428575, "grad_norm": 2.4211909770965576, "learning_rate": 2.7703703703703706e-05, "loss": 0.2963, "step": 6520 }, { "epoch": 4.664285714285715, "grad_norm": 2.167006492614746, "learning_rate": 2.7666666666666667e-05, "loss": 0.2625, "step": 6530 }, { "epoch": 4.671428571428572, "grad_norm": 1.8955094814300537, "learning_rate": 2.7629629629629632e-05, "loss": 0.3374, "step": 6540 }, { "epoch": 4.678571428571429, "grad_norm": 0.9967934489250183, "learning_rate": 2.7592592592592594e-05, "loss": 0.1611, "step": 6550 }, { "epoch": 4.685714285714286, "grad_norm": 1.007778525352478, "learning_rate": 2.7555555555555555e-05, "loss": 0.2516, "step": 6560 }, { "epoch": 4.692857142857143, "grad_norm": 2.9705958366394043, "learning_rate": 2.751851851851852e-05, "loss": 0.3893, "step": 6570 }, { "epoch": 4.7, "grad_norm": 2.689723491668701, "learning_rate": 2.7481481481481482e-05, "loss": 0.2404, "step": 6580 }, { "epoch": 4.707142857142857, "grad_norm": 2.095930337905884, "learning_rate": 2.7444444444444443e-05, "loss": 0.3239, "step": 6590 }, { "epoch": 4.714285714285714, "grad_norm": 1.9235697984695435, "learning_rate": 2.7407407407407408e-05, "loss": 0.2779, "step": 6600 }, { "epoch": 4.7214285714285715, "grad_norm": 3.329378843307495, "learning_rate": 2.7370370370370373e-05, "loss": 0.2791, "step": 6610 }, { "epoch": 4.728571428571429, "grad_norm": 1.9044978618621826, "learning_rate": 2.733333333333333e-05, "loss": 0.3757, "step": 6620 }, { "epoch": 4.735714285714286, "grad_norm": 2.207752227783203, "learning_rate": 2.7296296296296296e-05, "loss": 0.3391, "step": 6630 }, { "epoch": 4.742857142857143, "grad_norm": 2.0488827228546143, "learning_rate": 2.725925925925926e-05, "loss": 0.396, "step": 6640 }, { "epoch": 4.75, "grad_norm": 2.425340414047241, "learning_rate": 2.7222222222222223e-05, "loss": 0.2871, "step": 6650 }, { "epoch": 4.757142857142857, "grad_norm": 1.9408286809921265, "learning_rate": 2.7185185185185184e-05, "loss": 0.3144, "step": 6660 }, { "epoch": 4.764285714285714, "grad_norm": 1.864397406578064, "learning_rate": 2.714814814814815e-05, "loss": 0.2685, "step": 6670 }, { "epoch": 4.771428571428571, "grad_norm": 1.1838607788085938, "learning_rate": 2.7111111111111114e-05, "loss": 0.2751, "step": 6680 }, { "epoch": 4.7785714285714285, "grad_norm": 2.26408052444458, "learning_rate": 2.7074074074074072e-05, "loss": 0.3158, "step": 6690 }, { "epoch": 4.785714285714286, "grad_norm": 2.007145404815674, "learning_rate": 2.7037037037037037e-05, "loss": 0.1969, "step": 6700 }, { "epoch": 4.792857142857143, "grad_norm": 2.5209295749664307, "learning_rate": 2.7000000000000002e-05, "loss": 0.3022, "step": 6710 }, { "epoch": 4.8, "grad_norm": 2.3263044357299805, "learning_rate": 2.696296296296296e-05, "loss": 0.3799, "step": 6720 }, { "epoch": 4.807142857142857, "grad_norm": 1.3880634307861328, "learning_rate": 2.6925925925925925e-05, "loss": 0.2829, "step": 6730 }, { "epoch": 4.814285714285714, "grad_norm": 2.0264179706573486, "learning_rate": 2.688888888888889e-05, "loss": 0.2754, "step": 6740 }, { "epoch": 4.821428571428571, "grad_norm": 1.6165140867233276, "learning_rate": 2.6851851851851855e-05, "loss": 0.3171, "step": 6750 }, { "epoch": 4.828571428571428, "grad_norm": 1.6405526399612427, "learning_rate": 2.6814814814814814e-05, "loss": 0.4082, "step": 6760 }, { "epoch": 4.835714285714285, "grad_norm": 1.6864060163497925, "learning_rate": 2.677777777777778e-05, "loss": 0.2026, "step": 6770 }, { "epoch": 4.8428571428571425, "grad_norm": 1.4906965494155884, "learning_rate": 2.6740740740740743e-05, "loss": 0.2582, "step": 6780 }, { "epoch": 4.85, "grad_norm": 1.2227530479431152, "learning_rate": 2.67037037037037e-05, "loss": 0.185, "step": 6790 }, { "epoch": 4.857142857142857, "grad_norm": 1.2606697082519531, "learning_rate": 2.6666666666666667e-05, "loss": 0.2651, "step": 6800 }, { "epoch": 4.864285714285714, "grad_norm": 2.3722660541534424, "learning_rate": 2.662962962962963e-05, "loss": 0.2746, "step": 6810 }, { "epoch": 4.871428571428572, "grad_norm": 1.8622608184814453, "learning_rate": 2.659259259259259e-05, "loss": 0.3473, "step": 6820 }, { "epoch": 4.878571428571428, "grad_norm": 1.3814878463745117, "learning_rate": 2.6555555555555555e-05, "loss": 0.2706, "step": 6830 }, { "epoch": 4.885714285714286, "grad_norm": 2.013650894165039, "learning_rate": 2.651851851851852e-05, "loss": 0.2802, "step": 6840 }, { "epoch": 4.892857142857143, "grad_norm": 1.467282772064209, "learning_rate": 2.6481481481481485e-05, "loss": 0.3158, "step": 6850 }, { "epoch": 4.9, "grad_norm": 1.3019797801971436, "learning_rate": 2.6444444444444443e-05, "loss": 0.2012, "step": 6860 }, { "epoch": 4.9071428571428575, "grad_norm": 1.1120600700378418, "learning_rate": 2.6407407407407408e-05, "loss": 0.1385, "step": 6870 }, { "epoch": 4.914285714285715, "grad_norm": 1.470406413078308, "learning_rate": 2.6370370370370373e-05, "loss": 0.3014, "step": 6880 }, { "epoch": 4.921428571428572, "grad_norm": 2.237767457962036, "learning_rate": 2.633333333333333e-05, "loss": 0.2677, "step": 6890 }, { "epoch": 4.928571428571429, "grad_norm": 1.3994693756103516, "learning_rate": 2.6296296296296296e-05, "loss": 0.4261, "step": 6900 }, { "epoch": 4.935714285714286, "grad_norm": 2.21905517578125, "learning_rate": 2.625925925925926e-05, "loss": 0.3701, "step": 6910 }, { "epoch": 4.942857142857143, "grad_norm": 2.8682186603546143, "learning_rate": 2.6222222222222226e-05, "loss": 0.4047, "step": 6920 }, { "epoch": 4.95, "grad_norm": 1.9691041707992554, "learning_rate": 2.6185185185185184e-05, "loss": 0.2735, "step": 6930 }, { "epoch": 4.957142857142857, "grad_norm": 1.7553354501724243, "learning_rate": 2.614814814814815e-05, "loss": 0.2381, "step": 6940 }, { "epoch": 4.964285714285714, "grad_norm": 1.7930738925933838, "learning_rate": 2.6111111111111114e-05, "loss": 0.2838, "step": 6950 }, { "epoch": 4.9714285714285715, "grad_norm": 2.4153687953948975, "learning_rate": 2.6074074074074072e-05, "loss": 0.4002, "step": 6960 }, { "epoch": 4.978571428571429, "grad_norm": 1.392898678779602, "learning_rate": 2.6037037037037037e-05, "loss": 0.248, "step": 6970 }, { "epoch": 4.985714285714286, "grad_norm": 1.7113401889801025, "learning_rate": 2.6000000000000002e-05, "loss": 0.2871, "step": 6980 }, { "epoch": 4.992857142857143, "grad_norm": 2.4877359867095947, "learning_rate": 2.5962962962962967e-05, "loss": 0.2443, "step": 6990 }, { "epoch": 5.0, "grad_norm": 1.7225149869918823, "learning_rate": 2.5925925925925925e-05, "loss": 0.186, "step": 7000 }, { "epoch": 5.0, "eval_loss": 0.3584790527820587, "eval_rouge1": 0.9047, "eval_rouge2": 0.8434, "eval_rougeL": 0.9018, "eval_runtime": 122.2903, "eval_samples_per_second": 11.448, "eval_steps_per_second": 5.724, "step": 7000 }, { "epoch": 5.007142857142857, "grad_norm": 2.1430020332336426, "learning_rate": 2.588888888888889e-05, "loss": 0.3477, "step": 7010 }, { "epoch": 5.014285714285714, "grad_norm": 0.958677351474762, "learning_rate": 2.5851851851851855e-05, "loss": 0.2474, "step": 7020 }, { "epoch": 5.021428571428571, "grad_norm": 2.315269947052002, "learning_rate": 2.5814814814814813e-05, "loss": 0.2786, "step": 7030 }, { "epoch": 5.0285714285714285, "grad_norm": 1.3595519065856934, "learning_rate": 2.5777777777777778e-05, "loss": 0.2286, "step": 7040 }, { "epoch": 5.035714285714286, "grad_norm": 1.44675874710083, "learning_rate": 2.5740740740740743e-05, "loss": 0.2679, "step": 7050 }, { "epoch": 5.042857142857143, "grad_norm": 1.754285454750061, "learning_rate": 2.5703703703703708e-05, "loss": 0.196, "step": 7060 }, { "epoch": 5.05, "grad_norm": 2.9333369731903076, "learning_rate": 2.5666666666666666e-05, "loss": 0.1694, "step": 7070 }, { "epoch": 5.057142857142857, "grad_norm": 2.6653859615325928, "learning_rate": 2.562962962962963e-05, "loss": 0.2642, "step": 7080 }, { "epoch": 5.064285714285714, "grad_norm": 1.8362854719161987, "learning_rate": 2.5592592592592596e-05, "loss": 0.3614, "step": 7090 }, { "epoch": 5.071428571428571, "grad_norm": 1.427701473236084, "learning_rate": 2.5555555555555554e-05, "loss": 0.2351, "step": 7100 }, { "epoch": 5.078571428571428, "grad_norm": 2.3684027194976807, "learning_rate": 2.551851851851852e-05, "loss": 0.2803, "step": 7110 }, { "epoch": 5.085714285714285, "grad_norm": 1.5823931694030762, "learning_rate": 2.5481481481481484e-05, "loss": 0.2749, "step": 7120 }, { "epoch": 5.0928571428571425, "grad_norm": 1.6682019233703613, "learning_rate": 2.5444444444444442e-05, "loss": 0.3219, "step": 7130 }, { "epoch": 5.1, "grad_norm": 1.7803760766983032, "learning_rate": 2.5407407407407407e-05, "loss": 0.2553, "step": 7140 }, { "epoch": 5.107142857142857, "grad_norm": 1.945063591003418, "learning_rate": 2.5370370370370372e-05, "loss": 0.1739, "step": 7150 }, { "epoch": 5.114285714285714, "grad_norm": 1.308371663093567, "learning_rate": 2.5333333333333337e-05, "loss": 0.2605, "step": 7160 }, { "epoch": 5.121428571428571, "grad_norm": 1.906160593032837, "learning_rate": 2.5296296296296295e-05, "loss": 0.2071, "step": 7170 }, { "epoch": 5.128571428571428, "grad_norm": 1.6239346265792847, "learning_rate": 2.525925925925926e-05, "loss": 0.2054, "step": 7180 }, { "epoch": 5.135714285714286, "grad_norm": 1.6175967454910278, "learning_rate": 2.5222222222222225e-05, "loss": 0.2266, "step": 7190 }, { "epoch": 5.142857142857143, "grad_norm": 1.938736915588379, "learning_rate": 2.5185185185185183e-05, "loss": 0.2932, "step": 7200 }, { "epoch": 5.15, "grad_norm": 1.7323144674301147, "learning_rate": 2.5148148148148148e-05, "loss": 0.2762, "step": 7210 }, { "epoch": 5.1571428571428575, "grad_norm": 1.859667181968689, "learning_rate": 2.5111111111111113e-05, "loss": 0.3213, "step": 7220 }, { "epoch": 5.164285714285715, "grad_norm": 1.22067391872406, "learning_rate": 2.5074074074074078e-05, "loss": 0.2246, "step": 7230 }, { "epoch": 5.171428571428572, "grad_norm": 0.9384840726852417, "learning_rate": 2.5037037037037036e-05, "loss": 0.3364, "step": 7240 }, { "epoch": 5.178571428571429, "grad_norm": 1.4494845867156982, "learning_rate": 2.5e-05, "loss": 0.332, "step": 7250 }, { "epoch": 5.185714285714286, "grad_norm": 2.3436357975006104, "learning_rate": 2.4962962962962963e-05, "loss": 0.1456, "step": 7260 }, { "epoch": 5.192857142857143, "grad_norm": 1.0446144342422485, "learning_rate": 2.4925925925925928e-05, "loss": 0.1995, "step": 7270 }, { "epoch": 5.2, "grad_norm": 2.325575113296509, "learning_rate": 2.488888888888889e-05, "loss": 0.3068, "step": 7280 }, { "epoch": 5.207142857142857, "grad_norm": 2.100825309753418, "learning_rate": 2.4851851851851854e-05, "loss": 0.2659, "step": 7290 }, { "epoch": 5.214285714285714, "grad_norm": 2.6580276489257812, "learning_rate": 2.4814814814814816e-05, "loss": 0.2872, "step": 7300 }, { "epoch": 5.2214285714285715, "grad_norm": 2.505577564239502, "learning_rate": 2.477777777777778e-05, "loss": 0.2574, "step": 7310 }, { "epoch": 5.228571428571429, "grad_norm": 1.4997559785842896, "learning_rate": 2.4740740740740742e-05, "loss": 0.2192, "step": 7320 }, { "epoch": 5.235714285714286, "grad_norm": 1.9084120988845825, "learning_rate": 2.4703703703703704e-05, "loss": 0.2836, "step": 7330 }, { "epoch": 5.242857142857143, "grad_norm": 1.1388484239578247, "learning_rate": 2.466666666666667e-05, "loss": 0.2426, "step": 7340 }, { "epoch": 5.25, "grad_norm": 1.0559568405151367, "learning_rate": 2.462962962962963e-05, "loss": 0.344, "step": 7350 }, { "epoch": 5.257142857142857, "grad_norm": 1.4024419784545898, "learning_rate": 2.4592592592592595e-05, "loss": 0.2121, "step": 7360 }, { "epoch": 5.264285714285714, "grad_norm": 1.4338841438293457, "learning_rate": 2.4555555555555557e-05, "loss": 0.3329, "step": 7370 }, { "epoch": 5.271428571428571, "grad_norm": 1.4188106060028076, "learning_rate": 2.451851851851852e-05, "loss": 0.2479, "step": 7380 }, { "epoch": 5.2785714285714285, "grad_norm": 1.4320842027664185, "learning_rate": 2.4481481481481483e-05, "loss": 0.156, "step": 7390 }, { "epoch": 5.285714285714286, "grad_norm": 3.022641181945801, "learning_rate": 2.4444444444444445e-05, "loss": 0.1962, "step": 7400 }, { "epoch": 5.292857142857143, "grad_norm": 2.3267366886138916, "learning_rate": 2.440740740740741e-05, "loss": 0.2713, "step": 7410 }, { "epoch": 5.3, "grad_norm": 2.685345411300659, "learning_rate": 2.437037037037037e-05, "loss": 0.3345, "step": 7420 }, { "epoch": 5.307142857142857, "grad_norm": 0.9320240020751953, "learning_rate": 2.4333333333333336e-05, "loss": 0.3758, "step": 7430 }, { "epoch": 5.314285714285714, "grad_norm": 1.8067562580108643, "learning_rate": 2.4296296296296298e-05, "loss": 0.2958, "step": 7440 }, { "epoch": 5.321428571428571, "grad_norm": 1.5514296293258667, "learning_rate": 2.425925925925926e-05, "loss": 0.3268, "step": 7450 }, { "epoch": 5.328571428571428, "grad_norm": 1.684311032295227, "learning_rate": 2.4222222222222224e-05, "loss": 0.2947, "step": 7460 }, { "epoch": 5.335714285714285, "grad_norm": 2.0809545516967773, "learning_rate": 2.4185185185185186e-05, "loss": 0.2928, "step": 7470 }, { "epoch": 5.3428571428571425, "grad_norm": 2.5362987518310547, "learning_rate": 2.414814814814815e-05, "loss": 0.1962, "step": 7480 }, { "epoch": 5.35, "grad_norm": 0.636965274810791, "learning_rate": 2.4111111111111113e-05, "loss": 0.1694, "step": 7490 }, { "epoch": 5.357142857142857, "grad_norm": 2.1662261486053467, "learning_rate": 2.4074074074074074e-05, "loss": 0.3111, "step": 7500 }, { "epoch": 5.364285714285714, "grad_norm": 1.749324083328247, "learning_rate": 2.403703703703704e-05, "loss": 0.2521, "step": 7510 }, { "epoch": 5.371428571428572, "grad_norm": 2.3572323322296143, "learning_rate": 2.4e-05, "loss": 0.1527, "step": 7520 }, { "epoch": 5.378571428571428, "grad_norm": 1.274588942527771, "learning_rate": 2.3962962962962966e-05, "loss": 0.2757, "step": 7530 }, { "epoch": 5.385714285714286, "grad_norm": 1.2197136878967285, "learning_rate": 2.3925925925925927e-05, "loss": 0.2288, "step": 7540 }, { "epoch": 5.392857142857143, "grad_norm": 1.6061832904815674, "learning_rate": 2.3888888888888892e-05, "loss": 0.3292, "step": 7550 }, { "epoch": 5.4, "grad_norm": 1.8271028995513916, "learning_rate": 2.3851851851851854e-05, "loss": 0.2392, "step": 7560 }, { "epoch": 5.4071428571428575, "grad_norm": 1.8294018507003784, "learning_rate": 2.3814814814814815e-05, "loss": 0.2554, "step": 7570 }, { "epoch": 5.414285714285715, "grad_norm": 1.253556728363037, "learning_rate": 2.377777777777778e-05, "loss": 0.2008, "step": 7580 }, { "epoch": 5.421428571428572, "grad_norm": 1.1980758905410767, "learning_rate": 2.3740740740740742e-05, "loss": 0.265, "step": 7590 }, { "epoch": 5.428571428571429, "grad_norm": 1.5337406396865845, "learning_rate": 2.3703703703703707e-05, "loss": 0.4126, "step": 7600 }, { "epoch": 5.435714285714286, "grad_norm": 2.981381893157959, "learning_rate": 2.3666666666666668e-05, "loss": 0.3554, "step": 7610 }, { "epoch": 5.442857142857143, "grad_norm": 1.927241325378418, "learning_rate": 2.3629629629629633e-05, "loss": 0.3148, "step": 7620 }, { "epoch": 5.45, "grad_norm": 1.0788408517837524, "learning_rate": 2.3592592592592595e-05, "loss": 0.2421, "step": 7630 }, { "epoch": 5.457142857142857, "grad_norm": 1.250436782836914, "learning_rate": 2.3555555555555556e-05, "loss": 0.2797, "step": 7640 }, { "epoch": 5.464285714285714, "grad_norm": 1.2195000648498535, "learning_rate": 2.351851851851852e-05, "loss": 0.1702, "step": 7650 }, { "epoch": 5.4714285714285715, "grad_norm": 1.773098349571228, "learning_rate": 2.3481481481481483e-05, "loss": 0.2383, "step": 7660 }, { "epoch": 5.478571428571429, "grad_norm": 1.540499210357666, "learning_rate": 2.3444444444444448e-05, "loss": 0.2741, "step": 7670 }, { "epoch": 5.485714285714286, "grad_norm": 1.3515613079071045, "learning_rate": 2.340740740740741e-05, "loss": 0.4365, "step": 7680 }, { "epoch": 5.492857142857143, "grad_norm": 1.5094635486602783, "learning_rate": 2.337037037037037e-05, "loss": 0.2777, "step": 7690 }, { "epoch": 5.5, "grad_norm": 1.123542070388794, "learning_rate": 2.3333333333333336e-05, "loss": 0.3406, "step": 7700 }, { "epoch": 5.507142857142857, "grad_norm": 1.0701942443847656, "learning_rate": 2.3296296296296297e-05, "loss": 0.2499, "step": 7710 }, { "epoch": 5.514285714285714, "grad_norm": 1.270992636680603, "learning_rate": 2.3259259259259262e-05, "loss": 0.2044, "step": 7720 }, { "epoch": 5.521428571428571, "grad_norm": 1.5586347579956055, "learning_rate": 2.3222222222222224e-05, "loss": 0.2573, "step": 7730 }, { "epoch": 5.5285714285714285, "grad_norm": 0.9162809252738953, "learning_rate": 2.318518518518519e-05, "loss": 0.2245, "step": 7740 }, { "epoch": 5.535714285714286, "grad_norm": 1.7767843008041382, "learning_rate": 2.314814814814815e-05, "loss": 0.276, "step": 7750 }, { "epoch": 5.542857142857143, "grad_norm": 2.538541316986084, "learning_rate": 2.3111111111111112e-05, "loss": 0.3448, "step": 7760 }, { "epoch": 5.55, "grad_norm": 1.5738705396652222, "learning_rate": 2.3074074074074077e-05, "loss": 0.3023, "step": 7770 }, { "epoch": 5.557142857142857, "grad_norm": 0.9919751286506653, "learning_rate": 2.303703703703704e-05, "loss": 0.2979, "step": 7780 }, { "epoch": 5.564285714285714, "grad_norm": 1.079817771911621, "learning_rate": 2.3000000000000003e-05, "loss": 0.3141, "step": 7790 }, { "epoch": 5.571428571428571, "grad_norm": 1.709007978439331, "learning_rate": 2.2962962962962965e-05, "loss": 0.2452, "step": 7800 }, { "epoch": 5.578571428571428, "grad_norm": 0.9877552390098572, "learning_rate": 2.2925925925925927e-05, "loss": 0.2796, "step": 7810 }, { "epoch": 5.585714285714285, "grad_norm": 1.9676953554153442, "learning_rate": 2.288888888888889e-05, "loss": 0.2314, "step": 7820 }, { "epoch": 5.5928571428571425, "grad_norm": 1.778275966644287, "learning_rate": 2.2851851851851853e-05, "loss": 0.3033, "step": 7830 }, { "epoch": 5.6, "grad_norm": 0.9746494889259338, "learning_rate": 2.2814814814814818e-05, "loss": 0.2459, "step": 7840 }, { "epoch": 5.607142857142857, "grad_norm": 2.0238702297210693, "learning_rate": 2.277777777777778e-05, "loss": 0.2494, "step": 7850 }, { "epoch": 5.614285714285714, "grad_norm": 1.2345530986785889, "learning_rate": 2.2740740740740744e-05, "loss": 0.4614, "step": 7860 }, { "epoch": 5.621428571428572, "grad_norm": 0.9835256338119507, "learning_rate": 2.2703703703703706e-05, "loss": 0.3519, "step": 7870 }, { "epoch": 5.628571428571428, "grad_norm": 1.9753897190093994, "learning_rate": 2.2666666666666668e-05, "loss": 0.2895, "step": 7880 }, { "epoch": 5.635714285714286, "grad_norm": 1.7247217893600464, "learning_rate": 2.2629629629629633e-05, "loss": 0.1994, "step": 7890 }, { "epoch": 5.642857142857143, "grad_norm": 1.8406201601028442, "learning_rate": 2.2592592592592594e-05, "loss": 0.1872, "step": 7900 }, { "epoch": 5.65, "grad_norm": 1.4785393476486206, "learning_rate": 2.255555555555556e-05, "loss": 0.2811, "step": 7910 }, { "epoch": 5.6571428571428575, "grad_norm": 2.23652982711792, "learning_rate": 2.251851851851852e-05, "loss": 0.3071, "step": 7920 }, { "epoch": 5.664285714285715, "grad_norm": 1.9096837043762207, "learning_rate": 2.2481481481481486e-05, "loss": 0.2115, "step": 7930 }, { "epoch": 5.671428571428572, "grad_norm": 2.0808775424957275, "learning_rate": 2.2444444444444447e-05, "loss": 0.3923, "step": 7940 }, { "epoch": 5.678571428571429, "grad_norm": 1.5935535430908203, "learning_rate": 2.240740740740741e-05, "loss": 0.3461, "step": 7950 }, { "epoch": 5.685714285714286, "grad_norm": 1.1959024667739868, "learning_rate": 2.2370370370370374e-05, "loss": 0.2016, "step": 7960 }, { "epoch": 5.692857142857143, "grad_norm": 1.0776904821395874, "learning_rate": 2.2333333333333335e-05, "loss": 0.3476, "step": 7970 }, { "epoch": 5.7, "grad_norm": 1.884531855583191, "learning_rate": 2.2296296296296297e-05, "loss": 0.2861, "step": 7980 }, { "epoch": 5.707142857142857, "grad_norm": 1.2476330995559692, "learning_rate": 2.2259259259259262e-05, "loss": 0.2152, "step": 7990 }, { "epoch": 5.714285714285714, "grad_norm": 2.106348752975464, "learning_rate": 2.2222222222222223e-05, "loss": 0.3022, "step": 8000 }, { "epoch": 5.714285714285714, "eval_loss": 0.3492221236228943, "eval_rouge1": 0.9062, "eval_rouge2": 0.8456, "eval_rougeL": 0.9033, "eval_runtime": 122.1433, "eval_samples_per_second": 11.462, "eval_steps_per_second": 5.731, "step": 8000 }, { "epoch": 5.7214285714285715, "grad_norm": 1.9454623460769653, "learning_rate": 2.2185185185185188e-05, "loss": 0.232, "step": 8010 }, { "epoch": 5.728571428571429, "grad_norm": 1.9178905487060547, "learning_rate": 2.214814814814815e-05, "loss": 0.2278, "step": 8020 }, { "epoch": 5.735714285714286, "grad_norm": 1.6279345750808716, "learning_rate": 2.211111111111111e-05, "loss": 0.2423, "step": 8030 }, { "epoch": 5.742857142857143, "grad_norm": 2.7422447204589844, "learning_rate": 2.2074074074074076e-05, "loss": 0.3129, "step": 8040 }, { "epoch": 5.75, "grad_norm": 1.7606775760650635, "learning_rate": 2.2037037037037038e-05, "loss": 0.217, "step": 8050 }, { "epoch": 5.757142857142857, "grad_norm": 2.970276355743408, "learning_rate": 2.2000000000000003e-05, "loss": 0.3246, "step": 8060 }, { "epoch": 5.764285714285714, "grad_norm": 1.6729111671447754, "learning_rate": 2.1962962962962964e-05, "loss": 0.224, "step": 8070 }, { "epoch": 5.771428571428571, "grad_norm": 2.103708267211914, "learning_rate": 2.1925925925925926e-05, "loss": 0.2256, "step": 8080 }, { "epoch": 5.7785714285714285, "grad_norm": 1.7059235572814941, "learning_rate": 2.188888888888889e-05, "loss": 0.2986, "step": 8090 }, { "epoch": 5.785714285714286, "grad_norm": 1.6239415407180786, "learning_rate": 2.1851851851851852e-05, "loss": 0.3007, "step": 8100 }, { "epoch": 5.792857142857143, "grad_norm": 1.5316799879074097, "learning_rate": 2.1814814814814817e-05, "loss": 0.2295, "step": 8110 }, { "epoch": 5.8, "grad_norm": 0.9283231496810913, "learning_rate": 2.177777777777778e-05, "loss": 0.1624, "step": 8120 }, { "epoch": 5.807142857142857, "grad_norm": 1.4230540990829468, "learning_rate": 2.174074074074074e-05, "loss": 0.1686, "step": 8130 }, { "epoch": 5.814285714285714, "grad_norm": 1.8694360256195068, "learning_rate": 2.1703703703703705e-05, "loss": 0.3416, "step": 8140 }, { "epoch": 5.821428571428571, "grad_norm": 2.144221782684326, "learning_rate": 2.1666666666666667e-05, "loss": 0.2471, "step": 8150 }, { "epoch": 5.828571428571428, "grad_norm": 2.5672965049743652, "learning_rate": 2.162962962962963e-05, "loss": 0.3354, "step": 8160 }, { "epoch": 5.835714285714285, "grad_norm": 1.093578577041626, "learning_rate": 2.1592592592592594e-05, "loss": 0.262, "step": 8170 }, { "epoch": 5.8428571428571425, "grad_norm": 0.7076272368431091, "learning_rate": 2.1555555555555555e-05, "loss": 0.254, "step": 8180 }, { "epoch": 5.85, "grad_norm": 2.2301125526428223, "learning_rate": 2.151851851851852e-05, "loss": 0.1906, "step": 8190 }, { "epoch": 5.857142857142857, "grad_norm": 1.704037070274353, "learning_rate": 2.148148148148148e-05, "loss": 0.2802, "step": 8200 }, { "epoch": 5.864285714285714, "grad_norm": 1.4877769947052002, "learning_rate": 2.1444444444444443e-05, "loss": 0.3327, "step": 8210 }, { "epoch": 5.871428571428572, "grad_norm": 1.436059594154358, "learning_rate": 2.1407407407407408e-05, "loss": 0.271, "step": 8220 }, { "epoch": 5.878571428571428, "grad_norm": 1.357176661491394, "learning_rate": 2.137037037037037e-05, "loss": 0.2481, "step": 8230 }, { "epoch": 5.885714285714286, "grad_norm": 1.846593976020813, "learning_rate": 2.1333333333333335e-05, "loss": 0.2641, "step": 8240 }, { "epoch": 5.892857142857143, "grad_norm": 2.4631927013397217, "learning_rate": 2.1296296296296296e-05, "loss": 0.2832, "step": 8250 }, { "epoch": 5.9, "grad_norm": 1.8715349435806274, "learning_rate": 2.1259259259259258e-05, "loss": 0.4157, "step": 8260 }, { "epoch": 5.9071428571428575, "grad_norm": 2.3173437118530273, "learning_rate": 2.1222222222222223e-05, "loss": 0.353, "step": 8270 }, { "epoch": 5.914285714285715, "grad_norm": 2.049422025680542, "learning_rate": 2.1185185185185184e-05, "loss": 0.2613, "step": 8280 }, { "epoch": 5.921428571428572, "grad_norm": 1.281841516494751, "learning_rate": 2.114814814814815e-05, "loss": 0.2287, "step": 8290 }, { "epoch": 5.928571428571429, "grad_norm": 1.007407546043396, "learning_rate": 2.111111111111111e-05, "loss": 0.2139, "step": 8300 }, { "epoch": 5.935714285714286, "grad_norm": 1.8036701679229736, "learning_rate": 2.1074074074074072e-05, "loss": 0.2511, "step": 8310 }, { "epoch": 5.942857142857143, "grad_norm": 0.9559861421585083, "learning_rate": 2.1037037037037037e-05, "loss": 0.3371, "step": 8320 }, { "epoch": 5.95, "grad_norm": 2.136070489883423, "learning_rate": 2.1e-05, "loss": 0.2321, "step": 8330 }, { "epoch": 5.957142857142857, "grad_norm": 1.2442055940628052, "learning_rate": 2.0962962962962964e-05, "loss": 0.1819, "step": 8340 }, { "epoch": 5.964285714285714, "grad_norm": 2.0479979515075684, "learning_rate": 2.0925925925925925e-05, "loss": 0.3796, "step": 8350 }, { "epoch": 5.9714285714285715, "grad_norm": 1.6974670886993408, "learning_rate": 2.088888888888889e-05, "loss": 0.1947, "step": 8360 }, { "epoch": 5.978571428571429, "grad_norm": 2.1099231243133545, "learning_rate": 2.0851851851851852e-05, "loss": 0.1847, "step": 8370 }, { "epoch": 5.985714285714286, "grad_norm": 1.9181057214736938, "learning_rate": 2.0814814814814813e-05, "loss": 0.3513, "step": 8380 }, { "epoch": 5.992857142857143, "grad_norm": 1.0576838254928589, "learning_rate": 2.077777777777778e-05, "loss": 0.2663, "step": 8390 }, { "epoch": 6.0, "grad_norm": 1.1283502578735352, "learning_rate": 2.074074074074074e-05, "loss": 0.2872, "step": 8400 }, { "epoch": 6.007142857142857, "grad_norm": 0.7001394629478455, "learning_rate": 2.0703703703703705e-05, "loss": 0.277, "step": 8410 }, { "epoch": 6.014285714285714, "grad_norm": 1.6374051570892334, "learning_rate": 2.0666666666666666e-05, "loss": 0.1849, "step": 8420 }, { "epoch": 6.021428571428571, "grad_norm": 1.674914836883545, "learning_rate": 2.0629629629629628e-05, "loss": 0.1756, "step": 8430 }, { "epoch": 6.0285714285714285, "grad_norm": 2.592038154602051, "learning_rate": 2.0592592592592593e-05, "loss": 0.3725, "step": 8440 }, { "epoch": 6.035714285714286, "grad_norm": 2.942992925643921, "learning_rate": 2.0555555555555555e-05, "loss": 0.2529, "step": 8450 }, { "epoch": 6.042857142857143, "grad_norm": 1.7580475807189941, "learning_rate": 2.051851851851852e-05, "loss": 0.1549, "step": 8460 }, { "epoch": 6.05, "grad_norm": 1.9032413959503174, "learning_rate": 2.048148148148148e-05, "loss": 0.2529, "step": 8470 }, { "epoch": 6.057142857142857, "grad_norm": 1.7678323984146118, "learning_rate": 2.0444444444444446e-05, "loss": 0.1935, "step": 8480 }, { "epoch": 6.064285714285714, "grad_norm": 1.7014952898025513, "learning_rate": 2.0407407407407408e-05, "loss": 0.1965, "step": 8490 }, { "epoch": 6.071428571428571, "grad_norm": 2.053157091140747, "learning_rate": 2.037037037037037e-05, "loss": 0.2045, "step": 8500 }, { "epoch": 6.078571428571428, "grad_norm": 2.448059320449829, "learning_rate": 2.0333333333333334e-05, "loss": 0.2275, "step": 8510 }, { "epoch": 6.085714285714285, "grad_norm": 1.3505144119262695, "learning_rate": 2.0296296296296296e-05, "loss": 0.192, "step": 8520 }, { "epoch": 6.0928571428571425, "grad_norm": 1.0717148780822754, "learning_rate": 2.025925925925926e-05, "loss": 0.3017, "step": 8530 }, { "epoch": 6.1, "grad_norm": 2.872880220413208, "learning_rate": 2.0222222222222222e-05, "loss": 0.2583, "step": 8540 }, { "epoch": 6.107142857142857, "grad_norm": 1.559588074684143, "learning_rate": 2.0185185185185187e-05, "loss": 0.1557, "step": 8550 }, { "epoch": 6.114285714285714, "grad_norm": 1.4375160932540894, "learning_rate": 2.014814814814815e-05, "loss": 0.1165, "step": 8560 }, { "epoch": 6.121428571428571, "grad_norm": 1.1922268867492676, "learning_rate": 2.011111111111111e-05, "loss": 0.1995, "step": 8570 }, { "epoch": 6.128571428571428, "grad_norm": 2.267056465148926, "learning_rate": 2.0074074074074075e-05, "loss": 0.2176, "step": 8580 }, { "epoch": 6.135714285714286, "grad_norm": 1.5485496520996094, "learning_rate": 2.0037037037037037e-05, "loss": 0.206, "step": 8590 }, { "epoch": 6.142857142857143, "grad_norm": 1.9538283348083496, "learning_rate": 2e-05, "loss": 0.3173, "step": 8600 }, { "epoch": 6.15, "grad_norm": 2.8216044902801514, "learning_rate": 1.9962962962962963e-05, "loss": 0.3077, "step": 8610 }, { "epoch": 6.1571428571428575, "grad_norm": 2.5293240547180176, "learning_rate": 1.9925925925925925e-05, "loss": 0.2829, "step": 8620 }, { "epoch": 6.164285714285715, "grad_norm": 1.7947183847427368, "learning_rate": 1.988888888888889e-05, "loss": 0.3212, "step": 8630 }, { "epoch": 6.171428571428572, "grad_norm": 1.541588544845581, "learning_rate": 1.985185185185185e-05, "loss": 0.1985, "step": 8640 }, { "epoch": 6.178571428571429, "grad_norm": 1.286007046699524, "learning_rate": 1.9814814814814816e-05, "loss": 0.279, "step": 8650 }, { "epoch": 6.185714285714286, "grad_norm": 1.8692234754562378, "learning_rate": 1.9777777777777778e-05, "loss": 0.303, "step": 8660 }, { "epoch": 6.192857142857143, "grad_norm": 1.7906513214111328, "learning_rate": 1.9740740740740743e-05, "loss": 0.218, "step": 8670 }, { "epoch": 6.2, "grad_norm": 2.0737709999084473, "learning_rate": 1.9703703703703704e-05, "loss": 0.1559, "step": 8680 }, { "epoch": 6.207142857142857, "grad_norm": 1.8082749843597412, "learning_rate": 1.9666666666666666e-05, "loss": 0.2713, "step": 8690 }, { "epoch": 6.214285714285714, "grad_norm": 1.8988617658615112, "learning_rate": 1.962962962962963e-05, "loss": 0.2362, "step": 8700 }, { "epoch": 6.2214285714285715, "grad_norm": 0.8727281093597412, "learning_rate": 1.9592592592592592e-05, "loss": 0.2571, "step": 8710 }, { "epoch": 6.228571428571429, "grad_norm": 1.0203776359558105, "learning_rate": 1.9555555555555557e-05, "loss": 0.2884, "step": 8720 }, { "epoch": 6.235714285714286, "grad_norm": 1.5776811838150024, "learning_rate": 1.951851851851852e-05, "loss": 0.3115, "step": 8730 }, { "epoch": 6.242857142857143, "grad_norm": 2.1000545024871826, "learning_rate": 1.948148148148148e-05, "loss": 0.2936, "step": 8740 }, { "epoch": 6.25, "grad_norm": 1.991640329360962, "learning_rate": 1.9444444444444445e-05, "loss": 0.2214, "step": 8750 }, { "epoch": 6.257142857142857, "grad_norm": 3.8238637447357178, "learning_rate": 1.9407407407407407e-05, "loss": 0.2738, "step": 8760 }, { "epoch": 6.264285714285714, "grad_norm": 2.818711042404175, "learning_rate": 1.9370370370370372e-05, "loss": 0.3107, "step": 8770 }, { "epoch": 6.271428571428571, "grad_norm": 1.4565989971160889, "learning_rate": 1.9333333333333333e-05, "loss": 0.1351, "step": 8780 }, { "epoch": 6.2785714285714285, "grad_norm": 1.6833415031433105, "learning_rate": 1.92962962962963e-05, "loss": 0.3359, "step": 8790 }, { "epoch": 6.285714285714286, "grad_norm": 3.662572145462036, "learning_rate": 1.925925925925926e-05, "loss": 0.2338, "step": 8800 }, { "epoch": 6.292857142857143, "grad_norm": 1.9166165590286255, "learning_rate": 1.922222222222222e-05, "loss": 0.1908, "step": 8810 }, { "epoch": 6.3, "grad_norm": 2.665553331375122, "learning_rate": 1.9185185185185186e-05, "loss": 0.246, "step": 8820 }, { "epoch": 6.307142857142857, "grad_norm": 1.601194143295288, "learning_rate": 1.9148148148148148e-05, "loss": 0.2392, "step": 8830 }, { "epoch": 6.314285714285714, "grad_norm": 1.7382382154464722, "learning_rate": 1.9111111111111113e-05, "loss": 0.2919, "step": 8840 }, { "epoch": 6.321428571428571, "grad_norm": 1.0822237730026245, "learning_rate": 1.9074074074074075e-05, "loss": 0.1179, "step": 8850 }, { "epoch": 6.328571428571428, "grad_norm": 1.9691376686096191, "learning_rate": 1.903703703703704e-05, "loss": 0.3934, "step": 8860 }, { "epoch": 6.335714285714285, "grad_norm": 0.8395001292228699, "learning_rate": 1.9e-05, "loss": 0.2004, "step": 8870 }, { "epoch": 6.3428571428571425, "grad_norm": 1.6967720985412598, "learning_rate": 1.8962962962962963e-05, "loss": 0.204, "step": 8880 }, { "epoch": 6.35, "grad_norm": 1.2601035833358765, "learning_rate": 1.8925925925925928e-05, "loss": 0.2769, "step": 8890 }, { "epoch": 6.357142857142857, "grad_norm": 1.560940146446228, "learning_rate": 1.888888888888889e-05, "loss": 0.1409, "step": 8900 }, { "epoch": 6.364285714285714, "grad_norm": 1.645814061164856, "learning_rate": 1.8851851851851854e-05, "loss": 0.1914, "step": 8910 }, { "epoch": 6.371428571428572, "grad_norm": 1.4886109828948975, "learning_rate": 1.8814814814814816e-05, "loss": 0.3517, "step": 8920 }, { "epoch": 6.378571428571428, "grad_norm": 1.2002378702163696, "learning_rate": 1.8777777777777777e-05, "loss": 0.2346, "step": 8930 }, { "epoch": 6.385714285714286, "grad_norm": 2.4492478370666504, "learning_rate": 1.8740740740740742e-05, "loss": 0.2104, "step": 8940 }, { "epoch": 6.392857142857143, "grad_norm": 2.315610408782959, "learning_rate": 1.8703703703703704e-05, "loss": 0.2321, "step": 8950 }, { "epoch": 6.4, "grad_norm": 2.116260528564453, "learning_rate": 1.866666666666667e-05, "loss": 0.2092, "step": 8960 }, { "epoch": 6.4071428571428575, "grad_norm": 1.7362505197525024, "learning_rate": 1.862962962962963e-05, "loss": 0.2598, "step": 8970 }, { "epoch": 6.414285714285715, "grad_norm": 2.1754469871520996, "learning_rate": 1.8592592592592595e-05, "loss": 0.3035, "step": 8980 }, { "epoch": 6.421428571428572, "grad_norm": 1.448285698890686, "learning_rate": 1.8555555555555557e-05, "loss": 0.227, "step": 8990 }, { "epoch": 6.428571428571429, "grad_norm": 1.888242483139038, "learning_rate": 1.8518518518518518e-05, "loss": 0.1618, "step": 9000 }, { "epoch": 6.428571428571429, "eval_loss": 0.3434120714664459, "eval_rouge1": 0.908, "eval_rouge2": 0.8486, "eval_rougeL": 0.9052, "eval_runtime": 122.2937, "eval_samples_per_second": 11.448, "eval_steps_per_second": 5.724, "step": 9000 }, { "epoch": 6.435714285714286, "grad_norm": 2.5552051067352295, "learning_rate": 1.8481481481481483e-05, "loss": 0.4376, "step": 9010 }, { "epoch": 6.442857142857143, "grad_norm": 2.0973517894744873, "learning_rate": 1.8444444444444445e-05, "loss": 0.2163, "step": 9020 }, { "epoch": 6.45, "grad_norm": 1.3774244785308838, "learning_rate": 1.840740740740741e-05, "loss": 0.14, "step": 9030 }, { "epoch": 6.457142857142857, "grad_norm": 0.8735131025314331, "learning_rate": 1.837037037037037e-05, "loss": 0.1848, "step": 9040 }, { "epoch": 6.464285714285714, "grad_norm": 1.5088914632797241, "learning_rate": 1.8333333333333333e-05, "loss": 0.2889, "step": 9050 }, { "epoch": 6.4714285714285715, "grad_norm": 1.0688769817352295, "learning_rate": 1.8296296296296298e-05, "loss": 0.1895, "step": 9060 }, { "epoch": 6.478571428571429, "grad_norm": 1.43760085105896, "learning_rate": 1.825925925925926e-05, "loss": 0.2997, "step": 9070 }, { "epoch": 6.485714285714286, "grad_norm": 1.1168969869613647, "learning_rate": 1.8222222222222224e-05, "loss": 0.3544, "step": 9080 }, { "epoch": 6.492857142857143, "grad_norm": 1.7139670848846436, "learning_rate": 1.8185185185185186e-05, "loss": 0.2108, "step": 9090 }, { "epoch": 6.5, "grad_norm": 1.2584503889083862, "learning_rate": 1.814814814814815e-05, "loss": 0.2791, "step": 9100 }, { "epoch": 6.507142857142857, "grad_norm": 1.4440019130706787, "learning_rate": 1.8111111111111112e-05, "loss": 0.3745, "step": 9110 }, { "epoch": 6.514285714285714, "grad_norm": 2.3828232288360596, "learning_rate": 1.8074074074074074e-05, "loss": 0.2159, "step": 9120 }, { "epoch": 6.521428571428571, "grad_norm": 2.6553053855895996, "learning_rate": 1.803703703703704e-05, "loss": 0.3051, "step": 9130 }, { "epoch": 6.5285714285714285, "grad_norm": 2.0669426918029785, "learning_rate": 1.8e-05, "loss": 0.2166, "step": 9140 }, { "epoch": 6.535714285714286, "grad_norm": 1.4676064252853394, "learning_rate": 1.7962962962962965e-05, "loss": 0.2393, "step": 9150 }, { "epoch": 6.542857142857143, "grad_norm": 1.5158963203430176, "learning_rate": 1.7925925925925927e-05, "loss": 0.2821, "step": 9160 }, { "epoch": 6.55, "grad_norm": 1.438550591468811, "learning_rate": 1.788888888888889e-05, "loss": 0.229, "step": 9170 }, { "epoch": 6.557142857142857, "grad_norm": 2.2161788940429688, "learning_rate": 1.7851851851851853e-05, "loss": 0.3705, "step": 9180 }, { "epoch": 6.564285714285714, "grad_norm": 1.472321629524231, "learning_rate": 1.7814814814814815e-05, "loss": 0.3977, "step": 9190 }, { "epoch": 6.571428571428571, "grad_norm": 1.957033395767212, "learning_rate": 1.777777777777778e-05, "loss": 0.2431, "step": 9200 }, { "epoch": 6.578571428571428, "grad_norm": 3.070905923843384, "learning_rate": 1.774074074074074e-05, "loss": 0.2676, "step": 9210 }, { "epoch": 6.585714285714285, "grad_norm": 2.240701198577881, "learning_rate": 1.7703703703703706e-05, "loss": 0.2346, "step": 9220 }, { "epoch": 6.5928571428571425, "grad_norm": 1.2726478576660156, "learning_rate": 1.7666666666666668e-05, "loss": 0.2624, "step": 9230 }, { "epoch": 6.6, "grad_norm": 2.543856382369995, "learning_rate": 1.762962962962963e-05, "loss": 0.3137, "step": 9240 }, { "epoch": 6.607142857142857, "grad_norm": 2.1688966751098633, "learning_rate": 1.7592592592592595e-05, "loss": 0.3366, "step": 9250 }, { "epoch": 6.614285714285714, "grad_norm": 1.9013522863388062, "learning_rate": 1.7555555555555556e-05, "loss": 0.1759, "step": 9260 }, { "epoch": 6.621428571428572, "grad_norm": 2.7567338943481445, "learning_rate": 1.751851851851852e-05, "loss": 0.2615, "step": 9270 }, { "epoch": 6.628571428571428, "grad_norm": 2.530351161956787, "learning_rate": 1.7481481481481483e-05, "loss": 0.363, "step": 9280 }, { "epoch": 6.635714285714286, "grad_norm": 3.0051562786102295, "learning_rate": 1.7444444444444448e-05, "loss": 0.2155, "step": 9290 }, { "epoch": 6.642857142857143, "grad_norm": 1.6199374198913574, "learning_rate": 1.740740740740741e-05, "loss": 0.1943, "step": 9300 }, { "epoch": 6.65, "grad_norm": 2.2254199981689453, "learning_rate": 1.737037037037037e-05, "loss": 0.2086, "step": 9310 }, { "epoch": 6.6571428571428575, "grad_norm": 1.4565106630325317, "learning_rate": 1.7333333333333336e-05, "loss": 0.2113, "step": 9320 }, { "epoch": 6.664285714285715, "grad_norm": 1.8667312860488892, "learning_rate": 1.7296296296296297e-05, "loss": 0.1719, "step": 9330 }, { "epoch": 6.671428571428572, "grad_norm": 2.0462963581085205, "learning_rate": 1.7259259259259262e-05, "loss": 0.2307, "step": 9340 }, { "epoch": 6.678571428571429, "grad_norm": 1.5114613771438599, "learning_rate": 1.7222222222222224e-05, "loss": 0.2629, "step": 9350 }, { "epoch": 6.685714285714286, "grad_norm": 1.8743935823440552, "learning_rate": 1.7185185185185185e-05, "loss": 0.2656, "step": 9360 }, { "epoch": 6.692857142857143, "grad_norm": 1.6508034467697144, "learning_rate": 1.714814814814815e-05, "loss": 0.2971, "step": 9370 }, { "epoch": 6.7, "grad_norm": 1.4109563827514648, "learning_rate": 1.7111111111111112e-05, "loss": 0.3155, "step": 9380 }, { "epoch": 6.707142857142857, "grad_norm": 1.9742975234985352, "learning_rate": 1.7074074074074077e-05, "loss": 0.2858, "step": 9390 }, { "epoch": 6.714285714285714, "grad_norm": 0.8593278527259827, "learning_rate": 1.7037037037037038e-05, "loss": 0.2484, "step": 9400 }, { "epoch": 6.7214285714285715, "grad_norm": 1.8331007957458496, "learning_rate": 1.7000000000000003e-05, "loss": 0.2763, "step": 9410 }, { "epoch": 6.728571428571429, "grad_norm": 2.0606274604797363, "learning_rate": 1.6962962962962965e-05, "loss": 0.2016, "step": 9420 }, { "epoch": 6.735714285714286, "grad_norm": 1.244935154914856, "learning_rate": 1.6925925925925926e-05, "loss": 0.2161, "step": 9430 }, { "epoch": 6.742857142857143, "grad_norm": 2.0855889320373535, "learning_rate": 1.688888888888889e-05, "loss": 0.1961, "step": 9440 }, { "epoch": 6.75, "grad_norm": 2.203310012817383, "learning_rate": 1.6851851851851853e-05, "loss": 0.1886, "step": 9450 }, { "epoch": 6.757142857142857, "grad_norm": 2.1254501342773438, "learning_rate": 1.6814814814814818e-05, "loss": 0.2824, "step": 9460 }, { "epoch": 6.764285714285714, "grad_norm": 1.498728632926941, "learning_rate": 1.677777777777778e-05, "loss": 0.2848, "step": 9470 }, { "epoch": 6.771428571428571, "grad_norm": 2.6205763816833496, "learning_rate": 1.674074074074074e-05, "loss": 0.2728, "step": 9480 }, { "epoch": 6.7785714285714285, "grad_norm": 1.6262216567993164, "learning_rate": 1.6703703703703706e-05, "loss": 0.4216, "step": 9490 }, { "epoch": 6.785714285714286, "grad_norm": 3.074489116668701, "learning_rate": 1.6666666666666667e-05, "loss": 0.2084, "step": 9500 }, { "epoch": 6.792857142857143, "grad_norm": 1.8158230781555176, "learning_rate": 1.6629629629629632e-05, "loss": 0.1794, "step": 9510 }, { "epoch": 6.8, "grad_norm": 2.069397449493408, "learning_rate": 1.6592592592592594e-05, "loss": 0.2363, "step": 9520 }, { "epoch": 6.807142857142857, "grad_norm": 1.8637501001358032, "learning_rate": 1.655555555555556e-05, "loss": 0.2203, "step": 9530 }, { "epoch": 6.814285714285714, "grad_norm": 2.043314218521118, "learning_rate": 1.651851851851852e-05, "loss": 0.2267, "step": 9540 }, { "epoch": 6.821428571428571, "grad_norm": 2.8327081203460693, "learning_rate": 1.6481481481481482e-05, "loss": 0.2793, "step": 9550 }, { "epoch": 6.828571428571428, "grad_norm": 2.3297407627105713, "learning_rate": 1.6444444444444447e-05, "loss": 0.2349, "step": 9560 }, { "epoch": 6.835714285714285, "grad_norm": 0.9220748543739319, "learning_rate": 1.640740740740741e-05, "loss": 0.1966, "step": 9570 }, { "epoch": 6.8428571428571425, "grad_norm": 1.5935183763504028, "learning_rate": 1.6370370370370374e-05, "loss": 0.3217, "step": 9580 }, { "epoch": 6.85, "grad_norm": 0.9305605292320251, "learning_rate": 1.6333333333333335e-05, "loss": 0.1446, "step": 9590 }, { "epoch": 6.857142857142857, "grad_norm": 2.0719094276428223, "learning_rate": 1.62962962962963e-05, "loss": 0.2195, "step": 9600 }, { "epoch": 6.864285714285714, "grad_norm": 1.9230345487594604, "learning_rate": 1.625925925925926e-05, "loss": 0.2031, "step": 9610 }, { "epoch": 6.871428571428572, "grad_norm": 1.7897018194198608, "learning_rate": 1.6222222222222223e-05, "loss": 0.1728, "step": 9620 }, { "epoch": 6.878571428571428, "grad_norm": 2.4588770866394043, "learning_rate": 1.6185185185185188e-05, "loss": 0.3253, "step": 9630 }, { "epoch": 6.885714285714286, "grad_norm": 1.2495237588882446, "learning_rate": 1.614814814814815e-05, "loss": 0.3539, "step": 9640 }, { "epoch": 6.892857142857143, "grad_norm": 3.161078453063965, "learning_rate": 1.6111111111111115e-05, "loss": 0.3598, "step": 9650 }, { "epoch": 6.9, "grad_norm": 1.9474009275436401, "learning_rate": 1.6074074074074076e-05, "loss": 0.1385, "step": 9660 }, { "epoch": 6.9071428571428575, "grad_norm": 1.9687261581420898, "learning_rate": 1.6037037037037038e-05, "loss": 0.2375, "step": 9670 }, { "epoch": 6.914285714285715, "grad_norm": 1.87405264377594, "learning_rate": 1.6000000000000003e-05, "loss": 0.3374, "step": 9680 }, { "epoch": 6.921428571428572, "grad_norm": 1.1928725242614746, "learning_rate": 1.5962962962962964e-05, "loss": 0.197, "step": 9690 }, { "epoch": 6.928571428571429, "grad_norm": 1.6434850692749023, "learning_rate": 1.5925925925925926e-05, "loss": 0.2757, "step": 9700 }, { "epoch": 6.935714285714286, "grad_norm": 1.1469305753707886, "learning_rate": 1.588888888888889e-05, "loss": 0.1793, "step": 9710 }, { "epoch": 6.942857142857143, "grad_norm": 2.436051368713379, "learning_rate": 1.5851851851851852e-05, "loss": 0.3178, "step": 9720 }, { "epoch": 6.95, "grad_norm": 1.8960529565811157, "learning_rate": 1.5814814814814817e-05, "loss": 0.2182, "step": 9730 }, { "epoch": 6.957142857142857, "grad_norm": 1.755922794342041, "learning_rate": 1.577777777777778e-05, "loss": 0.3239, "step": 9740 }, { "epoch": 6.964285714285714, "grad_norm": 2.202697515487671, "learning_rate": 1.574074074074074e-05, "loss": 0.2116, "step": 9750 }, { "epoch": 6.9714285714285715, "grad_norm": 1.4491599798202515, "learning_rate": 1.5703703703703705e-05, "loss": 0.2329, "step": 9760 }, { "epoch": 6.978571428571429, "grad_norm": 9.212343215942383, "learning_rate": 1.5666666666666667e-05, "loss": 0.2573, "step": 9770 }, { "epoch": 6.985714285714286, "grad_norm": 1.2211856842041016, "learning_rate": 1.5629629629629632e-05, "loss": 0.2737, "step": 9780 }, { "epoch": 6.992857142857143, "grad_norm": 1.59877347946167, "learning_rate": 1.5592592592592593e-05, "loss": 0.284, "step": 9790 }, { "epoch": 7.0, "grad_norm": 2.295945882797241, "learning_rate": 1.5555555555555555e-05, "loss": 0.2076, "step": 9800 }, { "epoch": 7.007142857142857, "grad_norm": 1.4388489723205566, "learning_rate": 1.551851851851852e-05, "loss": 0.2225, "step": 9810 }, { "epoch": 7.014285714285714, "grad_norm": 1.9146931171417236, "learning_rate": 1.548148148148148e-05, "loss": 0.2917, "step": 9820 }, { "epoch": 7.021428571428571, "grad_norm": 1.0212804079055786, "learning_rate": 1.5444444444444446e-05, "loss": 0.1537, "step": 9830 }, { "epoch": 7.0285714285714285, "grad_norm": 2.146648645401001, "learning_rate": 1.5407407407407408e-05, "loss": 0.178, "step": 9840 }, { "epoch": 7.035714285714286, "grad_norm": 2.4515628814697266, "learning_rate": 1.537037037037037e-05, "loss": 0.3043, "step": 9850 }, { "epoch": 7.042857142857143, "grad_norm": 1.6906862258911133, "learning_rate": 1.5333333333333334e-05, "loss": 0.2787, "step": 9860 }, { "epoch": 7.05, "grad_norm": 2.2019400596618652, "learning_rate": 1.5296296296296296e-05, "loss": 0.3236, "step": 9870 }, { "epoch": 7.057142857142857, "grad_norm": 1.3307303190231323, "learning_rate": 1.5259259259259258e-05, "loss": 0.1875, "step": 9880 }, { "epoch": 7.064285714285714, "grad_norm": 1.7358342409133911, "learning_rate": 1.5222222222222224e-05, "loss": 0.2149, "step": 9890 }, { "epoch": 7.071428571428571, "grad_norm": 2.0298547744750977, "learning_rate": 1.5185185185185186e-05, "loss": 0.1876, "step": 9900 }, { "epoch": 7.078571428571428, "grad_norm": 2.375779151916504, "learning_rate": 1.514814814814815e-05, "loss": 0.2289, "step": 9910 }, { "epoch": 7.085714285714285, "grad_norm": 1.856911540031433, "learning_rate": 1.5111111111111112e-05, "loss": 0.2029, "step": 9920 }, { "epoch": 7.0928571428571425, "grad_norm": 1.1523020267486572, "learning_rate": 1.5074074074074074e-05, "loss": 0.1753, "step": 9930 }, { "epoch": 7.1, "grad_norm": 1.4677330255508423, "learning_rate": 1.5037037037037039e-05, "loss": 0.2256, "step": 9940 }, { "epoch": 7.107142857142857, "grad_norm": 1.0742135047912598, "learning_rate": 1.5e-05, "loss": 0.3844, "step": 9950 }, { "epoch": 7.114285714285714, "grad_norm": 1.4122258424758911, "learning_rate": 1.4962962962962965e-05, "loss": 0.1498, "step": 9960 }, { "epoch": 7.121428571428571, "grad_norm": 1.9363057613372803, "learning_rate": 1.4925925925925927e-05, "loss": 0.2721, "step": 9970 }, { "epoch": 7.128571428571428, "grad_norm": 0.7882018685340881, "learning_rate": 1.4888888888888888e-05, "loss": 0.2363, "step": 9980 }, { "epoch": 7.135714285714286, "grad_norm": 1.8561784029006958, "learning_rate": 1.4851851851851853e-05, "loss": 0.2875, "step": 9990 }, { "epoch": 7.142857142857143, "grad_norm": 1.9597991704940796, "learning_rate": 1.4814814814814815e-05, "loss": 0.1984, "step": 10000 }, { "epoch": 7.142857142857143, "eval_loss": 0.34164437651634216, "eval_rouge1": 0.9089, "eval_rouge2": 0.8501, "eval_rougeL": 0.9063, "eval_runtime": 122.2974, "eval_samples_per_second": 11.448, "eval_steps_per_second": 5.724, "step": 10000 }, { "epoch": 7.15, "grad_norm": 2.1984336376190186, "learning_rate": 1.477777777777778e-05, "loss": 0.2409, "step": 10010 }, { "epoch": 7.1571428571428575, "grad_norm": 1.1259089708328247, "learning_rate": 1.4740740740740741e-05, "loss": 0.1386, "step": 10020 }, { "epoch": 7.164285714285715, "grad_norm": 2.6497113704681396, "learning_rate": 1.4703703703703706e-05, "loss": 0.2999, "step": 10030 }, { "epoch": 7.171428571428572, "grad_norm": 2.7574968338012695, "learning_rate": 1.4666666666666668e-05, "loss": 0.2064, "step": 10040 }, { "epoch": 7.178571428571429, "grad_norm": 2.4020519256591797, "learning_rate": 1.462962962962963e-05, "loss": 0.2539, "step": 10050 }, { "epoch": 7.185714285714286, "grad_norm": 1.8728407621383667, "learning_rate": 1.4592592592592594e-05, "loss": 0.2264, "step": 10060 }, { "epoch": 7.192857142857143, "grad_norm": 3.187389612197876, "learning_rate": 1.4555555555555556e-05, "loss": 0.2175, "step": 10070 }, { "epoch": 7.2, "grad_norm": 1.6992945671081543, "learning_rate": 1.4518518518518521e-05, "loss": 0.1752, "step": 10080 }, { "epoch": 7.207142857142857, "grad_norm": 1.2373261451721191, "learning_rate": 1.4481481481481483e-05, "loss": 0.1712, "step": 10090 }, { "epoch": 7.214285714285714, "grad_norm": 1.3986244201660156, "learning_rate": 1.4444444444444444e-05, "loss": 0.1727, "step": 10100 }, { "epoch": 7.2214285714285715, "grad_norm": 1.5018147230148315, "learning_rate": 1.4407407407407409e-05, "loss": 0.2309, "step": 10110 }, { "epoch": 7.228571428571429, "grad_norm": 1.8186851739883423, "learning_rate": 1.437037037037037e-05, "loss": 0.2741, "step": 10120 }, { "epoch": 7.235714285714286, "grad_norm": 2.8224360942840576, "learning_rate": 1.4333333333333334e-05, "loss": 0.2244, "step": 10130 }, { "epoch": 7.242857142857143, "grad_norm": 1.900585412979126, "learning_rate": 1.4296296296296297e-05, "loss": 0.2527, "step": 10140 }, { "epoch": 7.25, "grad_norm": 2.4210896492004395, "learning_rate": 1.425925925925926e-05, "loss": 0.3832, "step": 10150 }, { "epoch": 7.257142857142857, "grad_norm": 1.2783209085464478, "learning_rate": 1.4222222222222224e-05, "loss": 0.1756, "step": 10160 }, { "epoch": 7.264285714285714, "grad_norm": 1.4387212991714478, "learning_rate": 1.4185185185185185e-05, "loss": 0.2567, "step": 10170 }, { "epoch": 7.271428571428571, "grad_norm": 2.861311435699463, "learning_rate": 1.4148148148148148e-05, "loss": 0.3683, "step": 10180 }, { "epoch": 7.2785714285714285, "grad_norm": 0.8701191544532776, "learning_rate": 1.4111111111111112e-05, "loss": 0.2229, "step": 10190 }, { "epoch": 7.285714285714286, "grad_norm": 2.103231430053711, "learning_rate": 1.4074074074074075e-05, "loss": 0.1652, "step": 10200 }, { "epoch": 7.292857142857143, "grad_norm": 3.0958895683288574, "learning_rate": 1.4037037037037038e-05, "loss": 0.3048, "step": 10210 }, { "epoch": 7.3, "grad_norm": 1.0370267629623413, "learning_rate": 1.4000000000000001e-05, "loss": 0.243, "step": 10220 }, { "epoch": 7.307142857142857, "grad_norm": 1.334799885749817, "learning_rate": 1.3962962962962963e-05, "loss": 0.2242, "step": 10230 }, { "epoch": 7.314285714285714, "grad_norm": 1.797135829925537, "learning_rate": 1.3925925925925926e-05, "loss": 0.1916, "step": 10240 }, { "epoch": 7.321428571428571, "grad_norm": 0.7588611841201782, "learning_rate": 1.388888888888889e-05, "loss": 0.2548, "step": 10250 }, { "epoch": 7.328571428571428, "grad_norm": 1.9136377573013306, "learning_rate": 1.3851851851851853e-05, "loss": 0.2373, "step": 10260 }, { "epoch": 7.335714285714285, "grad_norm": 1.326635479927063, "learning_rate": 1.3814814814814816e-05, "loss": 0.2597, "step": 10270 }, { "epoch": 7.3428571428571425, "grad_norm": 2.400609016418457, "learning_rate": 1.3777777777777778e-05, "loss": 0.2648, "step": 10280 }, { "epoch": 7.35, "grad_norm": 0.9898678064346313, "learning_rate": 1.3740740740740741e-05, "loss": 0.2304, "step": 10290 }, { "epoch": 7.357142857142857, "grad_norm": 0.7826656103134155, "learning_rate": 1.3703703703703704e-05, "loss": 0.1209, "step": 10300 }, { "epoch": 7.364285714285714, "grad_norm": 1.083044409751892, "learning_rate": 1.3666666666666666e-05, "loss": 0.2026, "step": 10310 }, { "epoch": 7.371428571428572, "grad_norm": 1.283219814300537, "learning_rate": 1.362962962962963e-05, "loss": 0.2018, "step": 10320 }, { "epoch": 7.378571428571428, "grad_norm": 1.9941823482513428, "learning_rate": 1.3592592592592592e-05, "loss": 0.2469, "step": 10330 }, { "epoch": 7.385714285714286, "grad_norm": 2.553957462310791, "learning_rate": 1.3555555555555557e-05, "loss": 0.2661, "step": 10340 }, { "epoch": 7.392857142857143, "grad_norm": 1.657182216644287, "learning_rate": 1.3518518518518519e-05, "loss": 0.2762, "step": 10350 }, { "epoch": 7.4, "grad_norm": 1.6704496145248413, "learning_rate": 1.348148148148148e-05, "loss": 0.222, "step": 10360 }, { "epoch": 7.4071428571428575, "grad_norm": 1.338329792022705, "learning_rate": 1.3444444444444445e-05, "loss": 0.2658, "step": 10370 }, { "epoch": 7.414285714285715, "grad_norm": 1.9741250276565552, "learning_rate": 1.3407407407407407e-05, "loss": 0.2596, "step": 10380 }, { "epoch": 7.421428571428572, "grad_norm": 2.523958444595337, "learning_rate": 1.3370370370370372e-05, "loss": 0.1553, "step": 10390 }, { "epoch": 7.428571428571429, "grad_norm": 2.260690450668335, "learning_rate": 1.3333333333333333e-05, "loss": 0.2942, "step": 10400 }, { "epoch": 7.435714285714286, "grad_norm": 2.441620349884033, "learning_rate": 1.3296296296296295e-05, "loss": 0.2451, "step": 10410 }, { "epoch": 7.442857142857143, "grad_norm": 0.7054124474525452, "learning_rate": 1.325925925925926e-05, "loss": 0.1862, "step": 10420 }, { "epoch": 7.45, "grad_norm": 1.6281330585479736, "learning_rate": 1.3222222222222221e-05, "loss": 0.1714, "step": 10430 }, { "epoch": 7.457142857142857, "grad_norm": 1.738685965538025, "learning_rate": 1.3185185185185186e-05, "loss": 0.2052, "step": 10440 }, { "epoch": 7.464285714285714, "grad_norm": 1.9982494115829468, "learning_rate": 1.3148148148148148e-05, "loss": 0.2964, "step": 10450 }, { "epoch": 7.4714285714285715, "grad_norm": 1.0081127882003784, "learning_rate": 1.3111111111111113e-05, "loss": 0.1956, "step": 10460 }, { "epoch": 7.478571428571429, "grad_norm": 1.5927938222885132, "learning_rate": 1.3074074074074074e-05, "loss": 0.236, "step": 10470 }, { "epoch": 7.485714285714286, "grad_norm": 1.7959505319595337, "learning_rate": 1.3037037037037036e-05, "loss": 0.2462, "step": 10480 }, { "epoch": 7.492857142857143, "grad_norm": 0.9230768084526062, "learning_rate": 1.3000000000000001e-05, "loss": 0.177, "step": 10490 }, { "epoch": 7.5, "grad_norm": 1.8714969158172607, "learning_rate": 1.2962962962962962e-05, "loss": 0.2224, "step": 10500 }, { "epoch": 7.507142857142857, "grad_norm": 2.809420585632324, "learning_rate": 1.2925925925925927e-05, "loss": 0.2214, "step": 10510 }, { "epoch": 7.514285714285714, "grad_norm": 2.2183682918548584, "learning_rate": 1.2888888888888889e-05, "loss": 0.2528, "step": 10520 }, { "epoch": 7.521428571428571, "grad_norm": 2.108675479888916, "learning_rate": 1.2851851851851854e-05, "loss": 0.2593, "step": 10530 }, { "epoch": 7.5285714285714285, "grad_norm": 1.9557310342788696, "learning_rate": 1.2814814814814815e-05, "loss": 0.1875, "step": 10540 }, { "epoch": 7.535714285714286, "grad_norm": 2.3365464210510254, "learning_rate": 1.2777777777777777e-05, "loss": 0.2232, "step": 10550 }, { "epoch": 7.542857142857143, "grad_norm": 1.4713023900985718, "learning_rate": 1.2740740740740742e-05, "loss": 0.2592, "step": 10560 }, { "epoch": 7.55, "grad_norm": 2.5241403579711914, "learning_rate": 1.2703703703703704e-05, "loss": 0.2632, "step": 10570 }, { "epoch": 7.557142857142857, "grad_norm": 2.726618528366089, "learning_rate": 1.2666666666666668e-05, "loss": 0.2068, "step": 10580 }, { "epoch": 7.564285714285714, "grad_norm": 1.2947627305984497, "learning_rate": 1.262962962962963e-05, "loss": 0.1846, "step": 10590 }, { "epoch": 7.571428571428571, "grad_norm": 1.4739402532577515, "learning_rate": 1.2592592592592592e-05, "loss": 0.1445, "step": 10600 }, { "epoch": 7.578571428571428, "grad_norm": 1.7607239484786987, "learning_rate": 1.2555555555555557e-05, "loss": 0.2087, "step": 10610 }, { "epoch": 7.585714285714285, "grad_norm": 1.510556697845459, "learning_rate": 1.2518518518518518e-05, "loss": 0.2356, "step": 10620 }, { "epoch": 7.5928571428571425, "grad_norm": 1.4189872741699219, "learning_rate": 1.2481481481481481e-05, "loss": 0.2343, "step": 10630 }, { "epoch": 7.6, "grad_norm": 3.209477424621582, "learning_rate": 1.2444444444444445e-05, "loss": 0.2131, "step": 10640 }, { "epoch": 7.607142857142857, "grad_norm": 2.026301145553589, "learning_rate": 1.2407407407407408e-05, "loss": 0.3643, "step": 10650 }, { "epoch": 7.614285714285714, "grad_norm": 2.355459451675415, "learning_rate": 1.2370370370370371e-05, "loss": 0.2382, "step": 10660 }, { "epoch": 7.621428571428572, "grad_norm": 1.6867364645004272, "learning_rate": 1.2333333333333334e-05, "loss": 0.1886, "step": 10670 }, { "epoch": 7.628571428571428, "grad_norm": 1.863373041152954, "learning_rate": 1.2296296296296298e-05, "loss": 0.2842, "step": 10680 }, { "epoch": 7.635714285714286, "grad_norm": 1.4037106037139893, "learning_rate": 1.225925925925926e-05, "loss": 0.297, "step": 10690 }, { "epoch": 7.642857142857143, "grad_norm": 1.2220287322998047, "learning_rate": 1.2222222222222222e-05, "loss": 0.2126, "step": 10700 }, { "epoch": 7.65, "grad_norm": 1.796430230140686, "learning_rate": 1.2185185185185186e-05, "loss": 0.2494, "step": 10710 }, { "epoch": 7.6571428571428575, "grad_norm": 2.7764432430267334, "learning_rate": 1.2148148148148149e-05, "loss": 0.3102, "step": 10720 }, { "epoch": 7.664285714285715, "grad_norm": 2.2875261306762695, "learning_rate": 1.2111111111111112e-05, "loss": 0.2171, "step": 10730 }, { "epoch": 7.671428571428572, "grad_norm": 1.3017419576644897, "learning_rate": 1.2074074074074075e-05, "loss": 0.2095, "step": 10740 }, { "epoch": 7.678571428571429, "grad_norm": 1.5423152446746826, "learning_rate": 1.2037037037037037e-05, "loss": 0.3183, "step": 10750 }, { "epoch": 7.685714285714286, "grad_norm": 2.0346460342407227, "learning_rate": 1.2e-05, "loss": 0.1669, "step": 10760 }, { "epoch": 7.692857142857143, "grad_norm": 1.178389549255371, "learning_rate": 1.1962962962962964e-05, "loss": 0.3195, "step": 10770 }, { "epoch": 7.7, "grad_norm": 1.3902812004089355, "learning_rate": 1.1925925925925927e-05, "loss": 0.2166, "step": 10780 }, { "epoch": 7.707142857142857, "grad_norm": 2.8894922733306885, "learning_rate": 1.188888888888889e-05, "loss": 0.2216, "step": 10790 }, { "epoch": 7.714285714285714, "grad_norm": 2.77864670753479, "learning_rate": 1.1851851851851853e-05, "loss": 0.224, "step": 10800 }, { "epoch": 7.7214285714285715, "grad_norm": 1.370814323425293, "learning_rate": 1.1814814814814817e-05, "loss": 0.2277, "step": 10810 }, { "epoch": 7.728571428571429, "grad_norm": 1.3543068170547485, "learning_rate": 1.1777777777777778e-05, "loss": 0.19, "step": 10820 }, { "epoch": 7.735714285714286, "grad_norm": 2.4707486629486084, "learning_rate": 1.1740740740740741e-05, "loss": 0.1951, "step": 10830 }, { "epoch": 7.742857142857143, "grad_norm": 2.284876823425293, "learning_rate": 1.1703703703703705e-05, "loss": 0.2206, "step": 10840 }, { "epoch": 7.75, "grad_norm": 1.1018098592758179, "learning_rate": 1.1666666666666668e-05, "loss": 0.1386, "step": 10850 }, { "epoch": 7.757142857142857, "grad_norm": 1.9555597305297852, "learning_rate": 1.1629629629629631e-05, "loss": 0.1645, "step": 10860 }, { "epoch": 7.764285714285714, "grad_norm": 1.1327388286590576, "learning_rate": 1.1592592592592594e-05, "loss": 0.1654, "step": 10870 }, { "epoch": 7.771428571428571, "grad_norm": 1.6210479736328125, "learning_rate": 1.1555555555555556e-05, "loss": 0.1937, "step": 10880 }, { "epoch": 7.7785714285714285, "grad_norm": 1.7142146825790405, "learning_rate": 1.151851851851852e-05, "loss": 0.1722, "step": 10890 }, { "epoch": 7.785714285714286, "grad_norm": 3.529614210128784, "learning_rate": 1.1481481481481482e-05, "loss": 0.2414, "step": 10900 }, { "epoch": 7.792857142857143, "grad_norm": 1.6331572532653809, "learning_rate": 1.1444444444444446e-05, "loss": 0.1844, "step": 10910 }, { "epoch": 7.8, "grad_norm": 2.7431063652038574, "learning_rate": 1.1407407407407409e-05, "loss": 0.2745, "step": 10920 }, { "epoch": 7.807142857142857, "grad_norm": 2.882291316986084, "learning_rate": 1.1370370370370372e-05, "loss": 0.2353, "step": 10930 }, { "epoch": 7.814285714285714, "grad_norm": 2.3573696613311768, "learning_rate": 1.1333333333333334e-05, "loss": 0.2047, "step": 10940 }, { "epoch": 7.821428571428571, "grad_norm": 1.370251178741455, "learning_rate": 1.1296296296296297e-05, "loss": 0.1855, "step": 10950 }, { "epoch": 7.828571428571428, "grad_norm": 1.1445660591125488, "learning_rate": 1.125925925925926e-05, "loss": 0.2316, "step": 10960 }, { "epoch": 7.835714285714285, "grad_norm": 2.094175100326538, "learning_rate": 1.1222222222222224e-05, "loss": 0.477, "step": 10970 }, { "epoch": 7.8428571428571425, "grad_norm": 2.1148130893707275, "learning_rate": 1.1185185185185187e-05, "loss": 0.2581, "step": 10980 }, { "epoch": 7.85, "grad_norm": 1.89934241771698, "learning_rate": 1.1148148148148148e-05, "loss": 0.2908, "step": 10990 }, { "epoch": 7.857142857142857, "grad_norm": 1.6785616874694824, "learning_rate": 1.1111111111111112e-05, "loss": 0.2222, "step": 11000 }, { "epoch": 7.857142857142857, "eval_loss": 0.34114697575569153, "eval_rouge1": 0.91, "eval_rouge2": 0.8517, "eval_rougeL": 0.9073, "eval_runtime": 122.2351, "eval_samples_per_second": 11.453, "eval_steps_per_second": 5.727, "step": 11000 }, { "epoch": 7.864285714285714, "grad_norm": 2.335857629776001, "learning_rate": 1.1074074074074075e-05, "loss": 0.262, "step": 11010 }, { "epoch": 7.871428571428572, "grad_norm": 1.9699100255966187, "learning_rate": 1.1037037037037038e-05, "loss": 0.2056, "step": 11020 }, { "epoch": 7.878571428571428, "grad_norm": 0.8576107025146484, "learning_rate": 1.1000000000000001e-05, "loss": 0.1929, "step": 11030 }, { "epoch": 7.885714285714286, "grad_norm": 0.7365075945854187, "learning_rate": 1.0962962962962963e-05, "loss": 0.2494, "step": 11040 }, { "epoch": 7.892857142857143, "grad_norm": 2.7551143169403076, "learning_rate": 1.0925925925925926e-05, "loss": 0.3239, "step": 11050 }, { "epoch": 7.9, "grad_norm": 1.1980743408203125, "learning_rate": 1.088888888888889e-05, "loss": 0.1836, "step": 11060 }, { "epoch": 7.9071428571428575, "grad_norm": 1.525614619255066, "learning_rate": 1.0851851851851853e-05, "loss": 0.3089, "step": 11070 }, { "epoch": 7.914285714285715, "grad_norm": 2.0727596282958984, "learning_rate": 1.0814814814814814e-05, "loss": 0.1867, "step": 11080 }, { "epoch": 7.921428571428572, "grad_norm": 1.242550015449524, "learning_rate": 1.0777777777777778e-05, "loss": 0.222, "step": 11090 }, { "epoch": 7.928571428571429, "grad_norm": 1.3312640190124512, "learning_rate": 1.074074074074074e-05, "loss": 0.3334, "step": 11100 }, { "epoch": 7.935714285714286, "grad_norm": 1.4483474493026733, "learning_rate": 1.0703703703703704e-05, "loss": 0.2966, "step": 11110 }, { "epoch": 7.942857142857143, "grad_norm": 1.5403432846069336, "learning_rate": 1.0666666666666667e-05, "loss": 0.2591, "step": 11120 }, { "epoch": 7.95, "grad_norm": 1.6620547771453857, "learning_rate": 1.0629629629629629e-05, "loss": 0.2216, "step": 11130 }, { "epoch": 7.957142857142857, "grad_norm": 1.2060527801513672, "learning_rate": 1.0592592592592592e-05, "loss": 0.2403, "step": 11140 }, { "epoch": 7.964285714285714, "grad_norm": 1.4476624727249146, "learning_rate": 1.0555555555555555e-05, "loss": 0.2739, "step": 11150 }, { "epoch": 7.9714285714285715, "grad_norm": 2.2425661087036133, "learning_rate": 1.0518518518518519e-05, "loss": 0.2981, "step": 11160 }, { "epoch": 7.978571428571429, "grad_norm": 2.2100632190704346, "learning_rate": 1.0481481481481482e-05, "loss": 0.2178, "step": 11170 }, { "epoch": 7.985714285714286, "grad_norm": 1.080759882926941, "learning_rate": 1.0444444444444445e-05, "loss": 0.2947, "step": 11180 }, { "epoch": 7.992857142857143, "grad_norm": 2.9972569942474365, "learning_rate": 1.0407407407407407e-05, "loss": 0.203, "step": 11190 }, { "epoch": 8.0, "grad_norm": 1.7628710269927979, "learning_rate": 1.037037037037037e-05, "loss": 0.308, "step": 11200 }, { "epoch": 8.007142857142858, "grad_norm": 1.569732666015625, "learning_rate": 1.0333333333333333e-05, "loss": 0.2003, "step": 11210 }, { "epoch": 8.014285714285714, "grad_norm": 0.8212767839431763, "learning_rate": 1.0296296296296296e-05, "loss": 0.1957, "step": 11220 }, { "epoch": 8.021428571428572, "grad_norm": 1.7055811882019043, "learning_rate": 1.025925925925926e-05, "loss": 0.2274, "step": 11230 }, { "epoch": 8.028571428571428, "grad_norm": 1.6140356063842773, "learning_rate": 1.0222222222222223e-05, "loss": 0.2182, "step": 11240 }, { "epoch": 8.035714285714286, "grad_norm": 1.6154979467391968, "learning_rate": 1.0185185185185185e-05, "loss": 0.1217, "step": 11250 }, { "epoch": 8.042857142857143, "grad_norm": 2.4283053874969482, "learning_rate": 1.0148148148148148e-05, "loss": 0.2687, "step": 11260 }, { "epoch": 8.05, "grad_norm": 2.201909065246582, "learning_rate": 1.0111111111111111e-05, "loss": 0.2836, "step": 11270 }, { "epoch": 8.057142857142857, "grad_norm": 1.902273178100586, "learning_rate": 1.0074074074074074e-05, "loss": 0.2076, "step": 11280 }, { "epoch": 8.064285714285715, "grad_norm": 1.7851389646530151, "learning_rate": 1.0037037037037038e-05, "loss": 0.2045, "step": 11290 }, { "epoch": 8.071428571428571, "grad_norm": 1.1988000869750977, "learning_rate": 1e-05, "loss": 0.1832, "step": 11300 }, { "epoch": 8.07857142857143, "grad_norm": 0.6530731320381165, "learning_rate": 9.962962962962962e-06, "loss": 0.1788, "step": 11310 }, { "epoch": 8.085714285714285, "grad_norm": 1.5712918043136597, "learning_rate": 9.925925925925926e-06, "loss": 0.1601, "step": 11320 }, { "epoch": 8.092857142857143, "grad_norm": 1.3664653301239014, "learning_rate": 9.888888888888889e-06, "loss": 0.2745, "step": 11330 }, { "epoch": 8.1, "grad_norm": 1.084404706954956, "learning_rate": 9.851851851851852e-06, "loss": 0.2595, "step": 11340 }, { "epoch": 8.107142857142858, "grad_norm": 1.9523823261260986, "learning_rate": 9.814814814814815e-06, "loss": 0.2105, "step": 11350 }, { "epoch": 8.114285714285714, "grad_norm": 1.2386913299560547, "learning_rate": 9.777777777777779e-06, "loss": 0.1199, "step": 11360 }, { "epoch": 8.121428571428572, "grad_norm": 1.6026146411895752, "learning_rate": 9.74074074074074e-06, "loss": 0.2419, "step": 11370 }, { "epoch": 8.128571428571428, "grad_norm": 1.3624472618103027, "learning_rate": 9.703703703703703e-06, "loss": 0.2094, "step": 11380 }, { "epoch": 8.135714285714286, "grad_norm": 1.7777026891708374, "learning_rate": 9.666666666666667e-06, "loss": 0.2033, "step": 11390 }, { "epoch": 8.142857142857142, "grad_norm": 1.5583858489990234, "learning_rate": 9.62962962962963e-06, "loss": 0.2229, "step": 11400 }, { "epoch": 8.15, "grad_norm": 0.7537804841995239, "learning_rate": 9.592592592592593e-06, "loss": 0.2201, "step": 11410 }, { "epoch": 8.157142857142857, "grad_norm": 1.3313623666763306, "learning_rate": 9.555555555555556e-06, "loss": 0.1802, "step": 11420 }, { "epoch": 8.164285714285715, "grad_norm": 2.136382818222046, "learning_rate": 9.51851851851852e-06, "loss": 0.1686, "step": 11430 }, { "epoch": 8.17142857142857, "grad_norm": 1.4156885147094727, "learning_rate": 9.481481481481481e-06, "loss": 0.2291, "step": 11440 }, { "epoch": 8.178571428571429, "grad_norm": 1.5296056270599365, "learning_rate": 9.444444444444445e-06, "loss": 0.2476, "step": 11450 }, { "epoch": 8.185714285714285, "grad_norm": 1.5632902383804321, "learning_rate": 9.407407407407408e-06, "loss": 0.2304, "step": 11460 }, { "epoch": 8.192857142857143, "grad_norm": 0.9542272686958313, "learning_rate": 9.370370370370371e-06, "loss": 0.1693, "step": 11470 }, { "epoch": 8.2, "grad_norm": 3.356255292892456, "learning_rate": 9.333333333333334e-06, "loss": 0.3996, "step": 11480 }, { "epoch": 8.207142857142857, "grad_norm": 1.6759045124053955, "learning_rate": 9.296296296296298e-06, "loss": 0.2134, "step": 11490 }, { "epoch": 8.214285714285714, "grad_norm": 1.2791472673416138, "learning_rate": 9.259259259259259e-06, "loss": 0.248, "step": 11500 }, { "epoch": 8.221428571428572, "grad_norm": 1.07367742061615, "learning_rate": 9.222222222222222e-06, "loss": 0.1731, "step": 11510 }, { "epoch": 8.228571428571428, "grad_norm": 0.5693773627281189, "learning_rate": 9.185185185185186e-06, "loss": 0.2758, "step": 11520 }, { "epoch": 8.235714285714286, "grad_norm": 1.7820035219192505, "learning_rate": 9.148148148148149e-06, "loss": 0.2147, "step": 11530 }, { "epoch": 8.242857142857142, "grad_norm": 1.6593891382217407, "learning_rate": 9.111111111111112e-06, "loss": 0.2481, "step": 11540 }, { "epoch": 8.25, "grad_norm": 1.4112298488616943, "learning_rate": 9.074074074074075e-06, "loss": 0.3307, "step": 11550 }, { "epoch": 8.257142857142856, "grad_norm": 1.0225239992141724, "learning_rate": 9.037037037037037e-06, "loss": 0.1092, "step": 11560 }, { "epoch": 8.264285714285714, "grad_norm": 1.4713934659957886, "learning_rate": 9e-06, "loss": 0.2332, "step": 11570 }, { "epoch": 8.271428571428572, "grad_norm": 1.6922743320465088, "learning_rate": 8.962962962962963e-06, "loss": 0.2106, "step": 11580 }, { "epoch": 8.278571428571428, "grad_norm": 2.4201695919036865, "learning_rate": 8.925925925925927e-06, "loss": 0.1746, "step": 11590 }, { "epoch": 8.285714285714286, "grad_norm": 2.3649351596832275, "learning_rate": 8.88888888888889e-06, "loss": 0.2204, "step": 11600 }, { "epoch": 8.292857142857143, "grad_norm": 1.0867241621017456, "learning_rate": 8.851851851851853e-06, "loss": 0.2246, "step": 11610 }, { "epoch": 8.3, "grad_norm": 1.1103533506393433, "learning_rate": 8.814814814814815e-06, "loss": 0.2952, "step": 11620 }, { "epoch": 8.307142857142857, "grad_norm": 1.9086233377456665, "learning_rate": 8.777777777777778e-06, "loss": 0.1627, "step": 11630 }, { "epoch": 8.314285714285715, "grad_norm": 1.5733546018600464, "learning_rate": 8.740740740740741e-06, "loss": 0.2493, "step": 11640 }, { "epoch": 8.321428571428571, "grad_norm": 1.514758586883545, "learning_rate": 8.703703703703705e-06, "loss": 0.1708, "step": 11650 }, { "epoch": 8.32857142857143, "grad_norm": 4.691562175750732, "learning_rate": 8.666666666666668e-06, "loss": 0.2622, "step": 11660 }, { "epoch": 8.335714285714285, "grad_norm": 1.0987350940704346, "learning_rate": 8.629629629629631e-06, "loss": 0.1291, "step": 11670 }, { "epoch": 8.342857142857143, "grad_norm": 1.4016727209091187, "learning_rate": 8.592592592592593e-06, "loss": 0.2075, "step": 11680 }, { "epoch": 8.35, "grad_norm": 0.69717937707901, "learning_rate": 8.555555555555556e-06, "loss": 0.1609, "step": 11690 }, { "epoch": 8.357142857142858, "grad_norm": 2.023461103439331, "learning_rate": 8.518518518518519e-06, "loss": 0.235, "step": 11700 }, { "epoch": 8.364285714285714, "grad_norm": 2.0078303813934326, "learning_rate": 8.481481481481482e-06, "loss": 0.2081, "step": 11710 }, { "epoch": 8.371428571428572, "grad_norm": 1.4724724292755127, "learning_rate": 8.444444444444446e-06, "loss": 0.2148, "step": 11720 }, { "epoch": 8.378571428571428, "grad_norm": 2.9564125537872314, "learning_rate": 8.407407407407409e-06, "loss": 0.2295, "step": 11730 }, { "epoch": 8.385714285714286, "grad_norm": 1.4059520959854126, "learning_rate": 8.37037037037037e-06, "loss": 0.1398, "step": 11740 }, { "epoch": 8.392857142857142, "grad_norm": 2.894953966140747, "learning_rate": 8.333333333333334e-06, "loss": 0.2699, "step": 11750 }, { "epoch": 8.4, "grad_norm": 2.5017454624176025, "learning_rate": 8.296296296296297e-06, "loss": 0.2322, "step": 11760 }, { "epoch": 8.407142857142857, "grad_norm": 0.9503372311592102, "learning_rate": 8.25925925925926e-06, "loss": 0.1356, "step": 11770 }, { "epoch": 8.414285714285715, "grad_norm": 1.63711416721344, "learning_rate": 8.222222222222223e-06, "loss": 0.1929, "step": 11780 }, { "epoch": 8.42142857142857, "grad_norm": 0.5972274541854858, "learning_rate": 8.185185185185187e-06, "loss": 0.2533, "step": 11790 }, { "epoch": 8.428571428571429, "grad_norm": 1.0893709659576416, "learning_rate": 8.14814814814815e-06, "loss": 0.2089, "step": 11800 }, { "epoch": 8.435714285714285, "grad_norm": 1.5523369312286377, "learning_rate": 8.111111111111112e-06, "loss": 0.1589, "step": 11810 }, { "epoch": 8.442857142857143, "grad_norm": 1.5510472059249878, "learning_rate": 8.074074074074075e-06, "loss": 0.2162, "step": 11820 }, { "epoch": 8.45, "grad_norm": 2.1272058486938477, "learning_rate": 8.037037037037038e-06, "loss": 0.2172, "step": 11830 }, { "epoch": 8.457142857142857, "grad_norm": 2.2862300872802734, "learning_rate": 8.000000000000001e-06, "loss": 0.2341, "step": 11840 }, { "epoch": 8.464285714285714, "grad_norm": 1.126405954360962, "learning_rate": 7.962962962962963e-06, "loss": 0.1474, "step": 11850 }, { "epoch": 8.471428571428572, "grad_norm": 2.000462293624878, "learning_rate": 7.925925925925926e-06, "loss": 0.2948, "step": 11860 }, { "epoch": 8.478571428571428, "grad_norm": 1.4239530563354492, "learning_rate": 7.88888888888889e-06, "loss": 0.2267, "step": 11870 }, { "epoch": 8.485714285714286, "grad_norm": 2.479355812072754, "learning_rate": 7.851851851851853e-06, "loss": 0.1857, "step": 11880 }, { "epoch": 8.492857142857144, "grad_norm": 1.9741036891937256, "learning_rate": 7.814814814814816e-06, "loss": 0.2049, "step": 11890 }, { "epoch": 8.5, "grad_norm": 1.996099591255188, "learning_rate": 7.777777777777777e-06, "loss": 0.2404, "step": 11900 }, { "epoch": 8.507142857142856, "grad_norm": 1.185811161994934, "learning_rate": 7.74074074074074e-06, "loss": 0.1476, "step": 11910 }, { "epoch": 8.514285714285714, "grad_norm": 1.7766746282577515, "learning_rate": 7.703703703703704e-06, "loss": 0.1483, "step": 11920 }, { "epoch": 8.521428571428572, "grad_norm": 1.0609338283538818, "learning_rate": 7.666666666666667e-06, "loss": 0.3414, "step": 11930 }, { "epoch": 8.528571428571428, "grad_norm": 1.352981448173523, "learning_rate": 7.629629629629629e-06, "loss": 0.2453, "step": 11940 }, { "epoch": 8.535714285714286, "grad_norm": 2.01450252532959, "learning_rate": 7.592592592592593e-06, "loss": 0.0983, "step": 11950 }, { "epoch": 8.542857142857143, "grad_norm": 0.9895955920219421, "learning_rate": 7.555555555555556e-06, "loss": 0.2821, "step": 11960 }, { "epoch": 8.55, "grad_norm": 2.1095712184906006, "learning_rate": 7.518518518518519e-06, "loss": 0.2147, "step": 11970 }, { "epoch": 8.557142857142857, "grad_norm": 1.3148187398910522, "learning_rate": 7.481481481481483e-06, "loss": 0.1834, "step": 11980 }, { "epoch": 8.564285714285715, "grad_norm": 2.1209769248962402, "learning_rate": 7.444444444444444e-06, "loss": 0.2369, "step": 11990 }, { "epoch": 8.571428571428571, "grad_norm": 2.559124708175659, "learning_rate": 7.4074074074074075e-06, "loss": 0.3642, "step": 12000 }, { "epoch": 8.571428571428571, "eval_loss": 0.3329981863498688, "eval_rouge1": 0.9114, "eval_rouge2": 0.8538, "eval_rougeL": 0.9087, "eval_runtime": 122.2883, "eval_samples_per_second": 11.448, "eval_steps_per_second": 5.724, "step": 12000 }, { "epoch": 8.57857142857143, "grad_norm": 2.582113027572632, "learning_rate": 7.370370370370371e-06, "loss": 0.3589, "step": 12010 }, { "epoch": 8.585714285714285, "grad_norm": 1.1606924533843994, "learning_rate": 7.333333333333334e-06, "loss": 0.2036, "step": 12020 }, { "epoch": 8.592857142857143, "grad_norm": 1.990123987197876, "learning_rate": 7.296296296296297e-06, "loss": 0.2347, "step": 12030 }, { "epoch": 8.6, "grad_norm": 1.3593547344207764, "learning_rate": 7.2592592592592605e-06, "loss": 0.1475, "step": 12040 }, { "epoch": 8.607142857142858, "grad_norm": 0.7821537256240845, "learning_rate": 7.222222222222222e-06, "loss": 0.2889, "step": 12050 }, { "epoch": 8.614285714285714, "grad_norm": 2.2297816276550293, "learning_rate": 7.185185185185185e-06, "loss": 0.2495, "step": 12060 }, { "epoch": 8.621428571428572, "grad_norm": 1.9694111347198486, "learning_rate": 7.1481481481481486e-06, "loss": 0.2182, "step": 12070 }, { "epoch": 8.628571428571428, "grad_norm": 1.3611793518066406, "learning_rate": 7.111111111111112e-06, "loss": 0.2116, "step": 12080 }, { "epoch": 8.635714285714286, "grad_norm": 0.9108260869979858, "learning_rate": 7.074074074074074e-06, "loss": 0.2229, "step": 12090 }, { "epoch": 8.642857142857142, "grad_norm": 2.577470302581787, "learning_rate": 7.0370370370370375e-06, "loss": 0.3132, "step": 12100 }, { "epoch": 8.65, "grad_norm": 0.837846040725708, "learning_rate": 7.000000000000001e-06, "loss": 0.1791, "step": 12110 }, { "epoch": 8.657142857142857, "grad_norm": 2.7253611087799072, "learning_rate": 6.962962962962963e-06, "loss": 0.1998, "step": 12120 }, { "epoch": 8.664285714285715, "grad_norm": 1.6284639835357666, "learning_rate": 6.925925925925926e-06, "loss": 0.2917, "step": 12130 }, { "epoch": 8.67142857142857, "grad_norm": 1.0358765125274658, "learning_rate": 6.888888888888889e-06, "loss": 0.2949, "step": 12140 }, { "epoch": 8.678571428571429, "grad_norm": 1.9100443124771118, "learning_rate": 6.851851851851852e-06, "loss": 0.1639, "step": 12150 }, { "epoch": 8.685714285714285, "grad_norm": 1.4746047258377075, "learning_rate": 6.814814814814815e-06, "loss": 0.1798, "step": 12160 }, { "epoch": 8.692857142857143, "grad_norm": 1.1177834272384644, "learning_rate": 6.777777777777779e-06, "loss": 0.2687, "step": 12170 }, { "epoch": 8.7, "grad_norm": 0.8141186833381653, "learning_rate": 6.74074074074074e-06, "loss": 0.1627, "step": 12180 }, { "epoch": 8.707142857142857, "grad_norm": 1.4017144441604614, "learning_rate": 6.703703703703703e-06, "loss": 0.1938, "step": 12190 }, { "epoch": 8.714285714285714, "grad_norm": 0.7198919653892517, "learning_rate": 6.666666666666667e-06, "loss": 0.222, "step": 12200 }, { "epoch": 8.721428571428572, "grad_norm": 1.4436593055725098, "learning_rate": 6.62962962962963e-06, "loss": 0.2821, "step": 12210 }, { "epoch": 8.728571428571428, "grad_norm": 1.8222748041152954, "learning_rate": 6.592592592592593e-06, "loss": 0.3652, "step": 12220 }, { "epoch": 8.735714285714286, "grad_norm": 1.033396601676941, "learning_rate": 6.555555555555556e-06, "loss": 0.1775, "step": 12230 }, { "epoch": 8.742857142857144, "grad_norm": 1.3893368244171143, "learning_rate": 6.518518518518518e-06, "loss": 0.3292, "step": 12240 }, { "epoch": 8.75, "grad_norm": 1.7294946908950806, "learning_rate": 6.481481481481481e-06, "loss": 0.3136, "step": 12250 }, { "epoch": 8.757142857142856, "grad_norm": 1.5931612253189087, "learning_rate": 6.4444444444444445e-06, "loss": 0.2498, "step": 12260 }, { "epoch": 8.764285714285714, "grad_norm": 1.6048915386199951, "learning_rate": 6.407407407407408e-06, "loss": 0.2996, "step": 12270 }, { "epoch": 8.771428571428572, "grad_norm": 0.9885932803153992, "learning_rate": 6.370370370370371e-06, "loss": 0.1647, "step": 12280 }, { "epoch": 8.778571428571428, "grad_norm": 4.228821277618408, "learning_rate": 6.333333333333334e-06, "loss": 0.2533, "step": 12290 }, { "epoch": 8.785714285714286, "grad_norm": 2.150721788406372, "learning_rate": 6.296296296296296e-06, "loss": 0.2922, "step": 12300 }, { "epoch": 8.792857142857143, "grad_norm": 2.0247929096221924, "learning_rate": 6.259259259259259e-06, "loss": 0.1613, "step": 12310 }, { "epoch": 8.8, "grad_norm": 0.5003380179405212, "learning_rate": 6.222222222222222e-06, "loss": 0.1459, "step": 12320 }, { "epoch": 8.807142857142857, "grad_norm": 0.9374644756317139, "learning_rate": 6.1851851851851856e-06, "loss": 0.2384, "step": 12330 }, { "epoch": 8.814285714285715, "grad_norm": 1.5784926414489746, "learning_rate": 6.148148148148149e-06, "loss": 0.2458, "step": 12340 }, { "epoch": 8.821428571428571, "grad_norm": 1.6696372032165527, "learning_rate": 6.111111111111111e-06, "loss": 0.2787, "step": 12350 }, { "epoch": 8.82857142857143, "grad_norm": 1.2524793148040771, "learning_rate": 6.0740740740740745e-06, "loss": 0.2445, "step": 12360 }, { "epoch": 8.835714285714285, "grad_norm": 2.176713466644287, "learning_rate": 6.037037037037038e-06, "loss": 0.1896, "step": 12370 }, { "epoch": 8.842857142857143, "grad_norm": 1.964752197265625, "learning_rate": 6e-06, "loss": 0.261, "step": 12380 }, { "epoch": 8.85, "grad_norm": 2.006638526916504, "learning_rate": 5.962962962962963e-06, "loss": 0.2415, "step": 12390 }, { "epoch": 8.857142857142858, "grad_norm": 1.9313900470733643, "learning_rate": 5.925925925925927e-06, "loss": 0.3772, "step": 12400 }, { "epoch": 8.864285714285714, "grad_norm": 2.112170934677124, "learning_rate": 5.888888888888889e-06, "loss": 0.2396, "step": 12410 }, { "epoch": 8.871428571428572, "grad_norm": 2.7346787452697754, "learning_rate": 5.851851851851852e-06, "loss": 0.325, "step": 12420 }, { "epoch": 8.878571428571428, "grad_norm": 3.1768178939819336, "learning_rate": 5.814814814814816e-06, "loss": 0.2393, "step": 12430 }, { "epoch": 8.885714285714286, "grad_norm": 2.164926767349243, "learning_rate": 5.777777777777778e-06, "loss": 0.2444, "step": 12440 }, { "epoch": 8.892857142857142, "grad_norm": 0.8274826407432556, "learning_rate": 5.740740740740741e-06, "loss": 0.2662, "step": 12450 }, { "epoch": 8.9, "grad_norm": 1.024776816368103, "learning_rate": 5.7037037037037045e-06, "loss": 0.2059, "step": 12460 }, { "epoch": 8.907142857142857, "grad_norm": 1.1089740991592407, "learning_rate": 5.666666666666667e-06, "loss": 0.2529, "step": 12470 }, { "epoch": 8.914285714285715, "grad_norm": 1.3669365644454956, "learning_rate": 5.62962962962963e-06, "loss": 0.2432, "step": 12480 }, { "epoch": 8.92142857142857, "grad_norm": 1.9711703062057495, "learning_rate": 5.592592592592593e-06, "loss": 0.127, "step": 12490 }, { "epoch": 8.928571428571429, "grad_norm": 1.4796557426452637, "learning_rate": 5.555555555555556e-06, "loss": 0.2244, "step": 12500 }, { "epoch": 8.935714285714285, "grad_norm": 2.029303789138794, "learning_rate": 5.518518518518519e-06, "loss": 0.2939, "step": 12510 }, { "epoch": 8.942857142857143, "grad_norm": 2.0809414386749268, "learning_rate": 5.4814814814814815e-06, "loss": 0.2146, "step": 12520 }, { "epoch": 8.95, "grad_norm": 1.2555458545684814, "learning_rate": 5.444444444444445e-06, "loss": 0.2116, "step": 12530 }, { "epoch": 8.957142857142857, "grad_norm": 2.3402106761932373, "learning_rate": 5.407407407407407e-06, "loss": 0.1587, "step": 12540 }, { "epoch": 8.964285714285714, "grad_norm": 1.2514257431030273, "learning_rate": 5.37037037037037e-06, "loss": 0.1598, "step": 12550 }, { "epoch": 8.971428571428572, "grad_norm": 3.1680376529693604, "learning_rate": 5.333333333333334e-06, "loss": 0.2925, "step": 12560 }, { "epoch": 8.978571428571428, "grad_norm": 2.482211112976074, "learning_rate": 5.296296296296296e-06, "loss": 0.2152, "step": 12570 }, { "epoch": 8.985714285714286, "grad_norm": 2.3472371101379395, "learning_rate": 5.259259259259259e-06, "loss": 0.2079, "step": 12580 }, { "epoch": 8.992857142857144, "grad_norm": 2.52826189994812, "learning_rate": 5.2222222222222226e-06, "loss": 0.2124, "step": 12590 }, { "epoch": 9.0, "grad_norm": 1.8535226583480835, "learning_rate": 5.185185185185185e-06, "loss": 0.232, "step": 12600 }, { "epoch": 9.007142857142858, "grad_norm": 2.8780601024627686, "learning_rate": 5.148148148148148e-06, "loss": 0.3111, "step": 12610 }, { "epoch": 9.014285714285714, "grad_norm": 1.5909632444381714, "learning_rate": 5.1111111111111115e-06, "loss": 0.2788, "step": 12620 }, { "epoch": 9.021428571428572, "grad_norm": 1.7076550722122192, "learning_rate": 5.074074074074074e-06, "loss": 0.2575, "step": 12630 }, { "epoch": 9.028571428571428, "grad_norm": 2.8672637939453125, "learning_rate": 5.037037037037037e-06, "loss": 0.2228, "step": 12640 }, { "epoch": 9.035714285714286, "grad_norm": 1.618055820465088, "learning_rate": 5e-06, "loss": 0.1157, "step": 12650 }, { "epoch": 9.042857142857143, "grad_norm": 1.407122015953064, "learning_rate": 4.962962962962963e-06, "loss": 0.2321, "step": 12660 }, { "epoch": 9.05, "grad_norm": 1.6875501871109009, "learning_rate": 4.925925925925926e-06, "loss": 0.2938, "step": 12670 }, { "epoch": 9.057142857142857, "grad_norm": 1.0872751474380493, "learning_rate": 4.888888888888889e-06, "loss": 0.181, "step": 12680 }, { "epoch": 9.064285714285715, "grad_norm": 1.689308524131775, "learning_rate": 4.851851851851852e-06, "loss": 0.1147, "step": 12690 }, { "epoch": 9.071428571428571, "grad_norm": 2.5880138874053955, "learning_rate": 4.814814814814815e-06, "loss": 0.2757, "step": 12700 }, { "epoch": 9.07857142857143, "grad_norm": 2.9612958431243896, "learning_rate": 4.777777777777778e-06, "loss": 0.2847, "step": 12710 }, { "epoch": 9.085714285714285, "grad_norm": 1.7059327363967896, "learning_rate": 4.740740740740741e-06, "loss": 0.195, "step": 12720 }, { "epoch": 9.092857142857143, "grad_norm": 1.702331304550171, "learning_rate": 4.703703703703704e-06, "loss": 0.2408, "step": 12730 }, { "epoch": 9.1, "grad_norm": 1.814587950706482, "learning_rate": 4.666666666666667e-06, "loss": 0.3085, "step": 12740 }, { "epoch": 9.107142857142858, "grad_norm": 2.863785743713379, "learning_rate": 4.6296296296296296e-06, "loss": 0.2667, "step": 12750 }, { "epoch": 9.114285714285714, "grad_norm": 1.6600861549377441, "learning_rate": 4.592592592592593e-06, "loss": 0.1376, "step": 12760 }, { "epoch": 9.121428571428572, "grad_norm": 1.3329426050186157, "learning_rate": 4.555555555555556e-06, "loss": 0.2547, "step": 12770 }, { "epoch": 9.128571428571428, "grad_norm": 4.032371997833252, "learning_rate": 4.5185185185185185e-06, "loss": 0.4649, "step": 12780 }, { "epoch": 9.135714285714286, "grad_norm": 2.709066390991211, "learning_rate": 4.481481481481482e-06, "loss": 0.2336, "step": 12790 }, { "epoch": 9.142857142857142, "grad_norm": 1.063931941986084, "learning_rate": 4.444444444444445e-06, "loss": 0.1731, "step": 12800 }, { "epoch": 9.15, "grad_norm": 1.3110073804855347, "learning_rate": 4.407407407407407e-06, "loss": 0.1381, "step": 12810 }, { "epoch": 9.157142857142857, "grad_norm": 1.9574129581451416, "learning_rate": 4.370370370370371e-06, "loss": 0.1958, "step": 12820 }, { "epoch": 9.164285714285715, "grad_norm": 2.5129504203796387, "learning_rate": 4.333333333333334e-06, "loss": 0.2162, "step": 12830 }, { "epoch": 9.17142857142857, "grad_norm": 1.842850923538208, "learning_rate": 4.296296296296296e-06, "loss": 0.2746, "step": 12840 }, { "epoch": 9.178571428571429, "grad_norm": 1.1451313495635986, "learning_rate": 4.2592592592592596e-06, "loss": 0.2442, "step": 12850 }, { "epoch": 9.185714285714285, "grad_norm": 2.0765175819396973, "learning_rate": 4.222222222222223e-06, "loss": 0.1531, "step": 12860 }, { "epoch": 9.192857142857143, "grad_norm": 0.9250321984291077, "learning_rate": 4.185185185185185e-06, "loss": 0.2876, "step": 12870 }, { "epoch": 9.2, "grad_norm": 1.5332380533218384, "learning_rate": 4.1481481481481485e-06, "loss": 0.2255, "step": 12880 }, { "epoch": 9.207142857142857, "grad_norm": 3.1344316005706787, "learning_rate": 4.111111111111112e-06, "loss": 0.2654, "step": 12890 }, { "epoch": 9.214285714285714, "grad_norm": 1.4092166423797607, "learning_rate": 4.074074074074075e-06, "loss": 0.2358, "step": 12900 }, { "epoch": 9.221428571428572, "grad_norm": 1.517716646194458, "learning_rate": 4.037037037037037e-06, "loss": 0.2455, "step": 12910 }, { "epoch": 9.228571428571428, "grad_norm": 0.7355996966362, "learning_rate": 4.000000000000001e-06, "loss": 0.1761, "step": 12920 }, { "epoch": 9.235714285714286, "grad_norm": 2.077259063720703, "learning_rate": 3.962962962962963e-06, "loss": 0.2791, "step": 12930 }, { "epoch": 9.242857142857142, "grad_norm": 1.3175309896469116, "learning_rate": 3.925925925925926e-06, "loss": 0.2296, "step": 12940 }, { "epoch": 9.25, "grad_norm": 1.0608943700790405, "learning_rate": 3.888888888888889e-06, "loss": 0.1609, "step": 12950 }, { "epoch": 9.257142857142856, "grad_norm": 2.2581288814544678, "learning_rate": 3.851851851851852e-06, "loss": 0.2192, "step": 12960 }, { "epoch": 9.264285714285714, "grad_norm": 1.672400951385498, "learning_rate": 3.814814814814814e-06, "loss": 0.1548, "step": 12970 }, { "epoch": 9.271428571428572, "grad_norm": 0.7743004560470581, "learning_rate": 3.777777777777778e-06, "loss": 0.3168, "step": 12980 }, { "epoch": 9.278571428571428, "grad_norm": 0.9588621854782104, "learning_rate": 3.7407407407407413e-06, "loss": 0.1585, "step": 12990 }, { "epoch": 9.285714285714286, "grad_norm": 1.7508875131607056, "learning_rate": 3.7037037037037037e-06, "loss": 0.2137, "step": 13000 }, { "epoch": 9.285714285714286, "eval_loss": 0.3308734893798828, "eval_rouge1": 0.9118, "eval_rouge2": 0.8545, "eval_rougeL": 0.909, "eval_runtime": 122.198, "eval_samples_per_second": 11.457, "eval_steps_per_second": 5.728, "step": 13000 } ], "logging_steps": 10, "max_steps": 14000, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.24700797698048e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }