mT5-XLSum / checkpoint-13000 /trainer_state.json
Nurra2's picture
Upload model and tokenizer
0bccf17 verified
{
"best_metric": 0.3308734893798828,
"best_model_checkpoint": "results_mt5XLSum_augmented/checkpoint-13000",
"epoch": 9.285714285714286,
"eval_steps": 1000,
"global_step": 13000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007142857142857143,
"grad_norm": 10.661067962646484,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.8382,
"step": 10
},
{
"epoch": 0.014285714285714285,
"grad_norm": 11.73471450805664,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.6838,
"step": 20
},
{
"epoch": 0.02142857142857143,
"grad_norm": 13.098968505859375,
"learning_rate": 3e-06,
"loss": 1.86,
"step": 30
},
{
"epoch": 0.02857142857142857,
"grad_norm": 10.79481315612793,
"learning_rate": 4.000000000000001e-06,
"loss": 1.7851,
"step": 40
},
{
"epoch": 0.03571428571428571,
"grad_norm": 11.62800121307373,
"learning_rate": 5e-06,
"loss": 1.4648,
"step": 50
},
{
"epoch": 0.04285714285714286,
"grad_norm": 9.00180721282959,
"learning_rate": 6e-06,
"loss": 1.4355,
"step": 60
},
{
"epoch": 0.05,
"grad_norm": 11.218201637268066,
"learning_rate": 7.000000000000001e-06,
"loss": 1.3977,
"step": 70
},
{
"epoch": 0.05714285714285714,
"grad_norm": 7.159872531890869,
"learning_rate": 8.000000000000001e-06,
"loss": 0.9774,
"step": 80
},
{
"epoch": 0.06428571428571428,
"grad_norm": 6.163649559020996,
"learning_rate": 9e-06,
"loss": 0.8556,
"step": 90
},
{
"epoch": 0.07142857142857142,
"grad_norm": 4.800461292266846,
"learning_rate": 1e-05,
"loss": 0.8627,
"step": 100
},
{
"epoch": 0.07857142857142857,
"grad_norm": 4.373474597930908,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.7674,
"step": 110
},
{
"epoch": 0.08571428571428572,
"grad_norm": 4.36292839050293,
"learning_rate": 1.2e-05,
"loss": 0.7035,
"step": 120
},
{
"epoch": 0.09285714285714286,
"grad_norm": 4.634104251861572,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.9197,
"step": 130
},
{
"epoch": 0.1,
"grad_norm": 4.442883491516113,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.7712,
"step": 140
},
{
"epoch": 0.10714285714285714,
"grad_norm": 3.7063419818878174,
"learning_rate": 1.5e-05,
"loss": 0.8602,
"step": 150
},
{
"epoch": 0.11428571428571428,
"grad_norm": 3.7267696857452393,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.6758,
"step": 160
},
{
"epoch": 0.12142857142857143,
"grad_norm": 3.7582225799560547,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.8091,
"step": 170
},
{
"epoch": 0.12857142857142856,
"grad_norm": 2.829885482788086,
"learning_rate": 1.8e-05,
"loss": 0.8014,
"step": 180
},
{
"epoch": 0.1357142857142857,
"grad_norm": 3.4555258750915527,
"learning_rate": 1.9e-05,
"loss": 0.5562,
"step": 190
},
{
"epoch": 0.14285714285714285,
"grad_norm": 3.120464563369751,
"learning_rate": 2e-05,
"loss": 0.6391,
"step": 200
},
{
"epoch": 0.15,
"grad_norm": 2.8185417652130127,
"learning_rate": 2.1e-05,
"loss": 0.6501,
"step": 210
},
{
"epoch": 0.15714285714285714,
"grad_norm": 2.7110323905944824,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.9029,
"step": 220
},
{
"epoch": 0.16428571428571428,
"grad_norm": 3.336864709854126,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.6938,
"step": 230
},
{
"epoch": 0.17142857142857143,
"grad_norm": 2.9769392013549805,
"learning_rate": 2.4e-05,
"loss": 0.6322,
"step": 240
},
{
"epoch": 0.17857142857142858,
"grad_norm": 2.5426135063171387,
"learning_rate": 2.5e-05,
"loss": 0.752,
"step": 250
},
{
"epoch": 0.18571428571428572,
"grad_norm": 3.2473714351654053,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.5993,
"step": 260
},
{
"epoch": 0.19285714285714287,
"grad_norm": 2.9979186058044434,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.5928,
"step": 270
},
{
"epoch": 0.2,
"grad_norm": 3.1635003089904785,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.4335,
"step": 280
},
{
"epoch": 0.20714285714285716,
"grad_norm": 4.114761829376221,
"learning_rate": 2.9e-05,
"loss": 0.5023,
"step": 290
},
{
"epoch": 0.21428571428571427,
"grad_norm": 2.0567097663879395,
"learning_rate": 3e-05,
"loss": 0.5124,
"step": 300
},
{
"epoch": 0.22142857142857142,
"grad_norm": 3.0209622383117676,
"learning_rate": 3.1e-05,
"loss": 0.5092,
"step": 310
},
{
"epoch": 0.22857142857142856,
"grad_norm": 1.8497462272644043,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.6075,
"step": 320
},
{
"epoch": 0.2357142857142857,
"grad_norm": 1.6237268447875977,
"learning_rate": 3.3e-05,
"loss": 0.5343,
"step": 330
},
{
"epoch": 0.24285714285714285,
"grad_norm": 2.9820289611816406,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.5516,
"step": 340
},
{
"epoch": 0.25,
"grad_norm": 1.676515817642212,
"learning_rate": 3.5e-05,
"loss": 0.6621,
"step": 350
},
{
"epoch": 0.2571428571428571,
"grad_norm": 3.4376354217529297,
"learning_rate": 3.6e-05,
"loss": 0.4768,
"step": 360
},
{
"epoch": 0.2642857142857143,
"grad_norm": 3.2355964183807373,
"learning_rate": 3.7e-05,
"loss": 0.6184,
"step": 370
},
{
"epoch": 0.2714285714285714,
"grad_norm": 2.2971713542938232,
"learning_rate": 3.8e-05,
"loss": 0.7827,
"step": 380
},
{
"epoch": 0.2785714285714286,
"grad_norm": 2.442052125930786,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.5901,
"step": 390
},
{
"epoch": 0.2857142857142857,
"grad_norm": 2.3172521591186523,
"learning_rate": 4e-05,
"loss": 0.6067,
"step": 400
},
{
"epoch": 0.29285714285714287,
"grad_norm": 2.06640887260437,
"learning_rate": 4.1e-05,
"loss": 0.6589,
"step": 410
},
{
"epoch": 0.3,
"grad_norm": 2.416149854660034,
"learning_rate": 4.2e-05,
"loss": 0.6489,
"step": 420
},
{
"epoch": 0.30714285714285716,
"grad_norm": 2.340235471725464,
"learning_rate": 4.3e-05,
"loss": 0.8339,
"step": 430
},
{
"epoch": 0.3142857142857143,
"grad_norm": 1.9825040102005005,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.7415,
"step": 440
},
{
"epoch": 0.32142857142857145,
"grad_norm": 1.8823323249816895,
"learning_rate": 4.5e-05,
"loss": 0.611,
"step": 450
},
{
"epoch": 0.32857142857142857,
"grad_norm": 2.3207123279571533,
"learning_rate": 4.600000000000001e-05,
"loss": 0.6172,
"step": 460
},
{
"epoch": 0.3357142857142857,
"grad_norm": 1.2963736057281494,
"learning_rate": 4.7e-05,
"loss": 0.4114,
"step": 470
},
{
"epoch": 0.34285714285714286,
"grad_norm": 2.191009044647217,
"learning_rate": 4.8e-05,
"loss": 0.6251,
"step": 480
},
{
"epoch": 0.35,
"grad_norm": 2.1893374919891357,
"learning_rate": 4.9e-05,
"loss": 0.5966,
"step": 490
},
{
"epoch": 0.35714285714285715,
"grad_norm": 2.214414596557617,
"learning_rate": 5e-05,
"loss": 0.7495,
"step": 500
},
{
"epoch": 0.36428571428571427,
"grad_norm": 1.8343987464904785,
"learning_rate": 4.9962962962962964e-05,
"loss": 0.6179,
"step": 510
},
{
"epoch": 0.37142857142857144,
"grad_norm": 2.675177574157715,
"learning_rate": 4.9925925925925926e-05,
"loss": 0.7262,
"step": 520
},
{
"epoch": 0.37857142857142856,
"grad_norm": 1.7133303880691528,
"learning_rate": 4.9888888888888894e-05,
"loss": 0.546,
"step": 530
},
{
"epoch": 0.38571428571428573,
"grad_norm": 1.4926049709320068,
"learning_rate": 4.9851851851851855e-05,
"loss": 0.4057,
"step": 540
},
{
"epoch": 0.39285714285714285,
"grad_norm": 2.0434134006500244,
"learning_rate": 4.981481481481482e-05,
"loss": 1.014,
"step": 550
},
{
"epoch": 0.4,
"grad_norm": 2.037074089050293,
"learning_rate": 4.977777777777778e-05,
"loss": 0.6088,
"step": 560
},
{
"epoch": 0.40714285714285714,
"grad_norm": 2.6729607582092285,
"learning_rate": 4.974074074074074e-05,
"loss": 0.5986,
"step": 570
},
{
"epoch": 0.4142857142857143,
"grad_norm": 1.8161852359771729,
"learning_rate": 4.970370370370371e-05,
"loss": 0.4681,
"step": 580
},
{
"epoch": 0.42142857142857143,
"grad_norm": 2.140554666519165,
"learning_rate": 4.966666666666667e-05,
"loss": 0.5865,
"step": 590
},
{
"epoch": 0.42857142857142855,
"grad_norm": 2.3027639389038086,
"learning_rate": 4.962962962962963e-05,
"loss": 0.4166,
"step": 600
},
{
"epoch": 0.4357142857142857,
"grad_norm": 2.973132371902466,
"learning_rate": 4.959259259259259e-05,
"loss": 0.6394,
"step": 610
},
{
"epoch": 0.44285714285714284,
"grad_norm": 2.898897886276245,
"learning_rate": 4.955555555555556e-05,
"loss": 0.5572,
"step": 620
},
{
"epoch": 0.45,
"grad_norm": 2.100752353668213,
"learning_rate": 4.951851851851852e-05,
"loss": 0.4788,
"step": 630
},
{
"epoch": 0.45714285714285713,
"grad_norm": 2.4735984802246094,
"learning_rate": 4.9481481481481485e-05,
"loss": 0.756,
"step": 640
},
{
"epoch": 0.4642857142857143,
"grad_norm": 1.5895243883132935,
"learning_rate": 4.9444444444444446e-05,
"loss": 0.5265,
"step": 650
},
{
"epoch": 0.4714285714285714,
"grad_norm": 2.067650079727173,
"learning_rate": 4.940740740740741e-05,
"loss": 0.6079,
"step": 660
},
{
"epoch": 0.4785714285714286,
"grad_norm": 1.6676874160766602,
"learning_rate": 4.937037037037037e-05,
"loss": 0.5196,
"step": 670
},
{
"epoch": 0.4857142857142857,
"grad_norm": 1.6084502935409546,
"learning_rate": 4.933333333333334e-05,
"loss": 0.431,
"step": 680
},
{
"epoch": 0.4928571428571429,
"grad_norm": 2.8858065605163574,
"learning_rate": 4.92962962962963e-05,
"loss": 0.5329,
"step": 690
},
{
"epoch": 0.5,
"grad_norm": 2.657158613204956,
"learning_rate": 4.925925925925926e-05,
"loss": 0.5092,
"step": 700
},
{
"epoch": 0.5071428571428571,
"grad_norm": 2.636237144470215,
"learning_rate": 4.922222222222222e-05,
"loss": 0.4074,
"step": 710
},
{
"epoch": 0.5142857142857142,
"grad_norm": 2.5960123538970947,
"learning_rate": 4.918518518518519e-05,
"loss": 0.424,
"step": 720
},
{
"epoch": 0.5214285714285715,
"grad_norm": 1.7363989353179932,
"learning_rate": 4.9148148148148145e-05,
"loss": 0.7226,
"step": 730
},
{
"epoch": 0.5285714285714286,
"grad_norm": 2.8367726802825928,
"learning_rate": 4.9111111111111114e-05,
"loss": 0.5535,
"step": 740
},
{
"epoch": 0.5357142857142857,
"grad_norm": 2.1372838020324707,
"learning_rate": 4.9074074074074075e-05,
"loss": 0.6546,
"step": 750
},
{
"epoch": 0.5428571428571428,
"grad_norm": 1.9456530809402466,
"learning_rate": 4.903703703703704e-05,
"loss": 0.5718,
"step": 760
},
{
"epoch": 0.55,
"grad_norm": 3.146430015563965,
"learning_rate": 4.9e-05,
"loss": 0.486,
"step": 770
},
{
"epoch": 0.5571428571428572,
"grad_norm": 1.633537769317627,
"learning_rate": 4.896296296296297e-05,
"loss": 0.4602,
"step": 780
},
{
"epoch": 0.5642857142857143,
"grad_norm": 3.580615282058716,
"learning_rate": 4.892592592592593e-05,
"loss": 0.5991,
"step": 790
},
{
"epoch": 0.5714285714285714,
"grad_norm": 2.354482889175415,
"learning_rate": 4.888888888888889e-05,
"loss": 0.639,
"step": 800
},
{
"epoch": 0.5785714285714286,
"grad_norm": 1.701165795326233,
"learning_rate": 4.885185185185185e-05,
"loss": 0.4895,
"step": 810
},
{
"epoch": 0.5857142857142857,
"grad_norm": 1.7530277967453003,
"learning_rate": 4.881481481481482e-05,
"loss": 0.5029,
"step": 820
},
{
"epoch": 0.5928571428571429,
"grad_norm": 1.4377954006195068,
"learning_rate": 4.8777777777777775e-05,
"loss": 0.4668,
"step": 830
},
{
"epoch": 0.6,
"grad_norm": 1.9733954668045044,
"learning_rate": 4.874074074074074e-05,
"loss": 0.6434,
"step": 840
},
{
"epoch": 0.6071428571428571,
"grad_norm": 1.5659862756729126,
"learning_rate": 4.8703703703703704e-05,
"loss": 0.4719,
"step": 850
},
{
"epoch": 0.6142857142857143,
"grad_norm": 1.9549959897994995,
"learning_rate": 4.866666666666667e-05,
"loss": 0.5003,
"step": 860
},
{
"epoch": 0.6214285714285714,
"grad_norm": 2.0998220443725586,
"learning_rate": 4.862962962962963e-05,
"loss": 0.4666,
"step": 870
},
{
"epoch": 0.6285714285714286,
"grad_norm": 1.6551822423934937,
"learning_rate": 4.8592592592592596e-05,
"loss": 0.5508,
"step": 880
},
{
"epoch": 0.6357142857142857,
"grad_norm": 2.268826723098755,
"learning_rate": 4.855555555555556e-05,
"loss": 0.5333,
"step": 890
},
{
"epoch": 0.6428571428571429,
"grad_norm": 2.170297861099243,
"learning_rate": 4.851851851851852e-05,
"loss": 0.4724,
"step": 900
},
{
"epoch": 0.65,
"grad_norm": 2.3737900257110596,
"learning_rate": 4.848148148148148e-05,
"loss": 0.5938,
"step": 910
},
{
"epoch": 0.6571428571428571,
"grad_norm": 1.5697389841079712,
"learning_rate": 4.844444444444445e-05,
"loss": 0.357,
"step": 920
},
{
"epoch": 0.6642857142857143,
"grad_norm": 1.4354273080825806,
"learning_rate": 4.840740740740741e-05,
"loss": 0.2648,
"step": 930
},
{
"epoch": 0.6714285714285714,
"grad_norm": 1.1631938219070435,
"learning_rate": 4.837037037037037e-05,
"loss": 0.4647,
"step": 940
},
{
"epoch": 0.6785714285714286,
"grad_norm": 2.594999313354492,
"learning_rate": 4.8333333333333334e-05,
"loss": 0.6831,
"step": 950
},
{
"epoch": 0.6857142857142857,
"grad_norm": 2.2979557514190674,
"learning_rate": 4.82962962962963e-05,
"loss": 0.4363,
"step": 960
},
{
"epoch": 0.6928571428571428,
"grad_norm": 3.0777416229248047,
"learning_rate": 4.825925925925926e-05,
"loss": 0.5995,
"step": 970
},
{
"epoch": 0.7,
"grad_norm": 2.430807113647461,
"learning_rate": 4.8222222222222225e-05,
"loss": 0.6433,
"step": 980
},
{
"epoch": 0.7071428571428572,
"grad_norm": 1.7465846538543701,
"learning_rate": 4.818518518518519e-05,
"loss": 0.4973,
"step": 990
},
{
"epoch": 0.7142857142857143,
"grad_norm": 2.805053472518921,
"learning_rate": 4.814814814814815e-05,
"loss": 0.556,
"step": 1000
},
{
"epoch": 0.7142857142857143,
"eval_loss": 0.4978465139865875,
"eval_rouge1": 0.8844,
"eval_rouge2": 0.8183,
"eval_rougeL": 0.8811,
"eval_runtime": 122.1438,
"eval_samples_per_second": 11.462,
"eval_steps_per_second": 5.731,
"step": 1000
},
{
"epoch": 0.7214285714285714,
"grad_norm": 1.6127879619598389,
"learning_rate": 4.811111111111111e-05,
"loss": 0.5748,
"step": 1010
},
{
"epoch": 0.7285714285714285,
"grad_norm": 1.1071356534957886,
"learning_rate": 4.807407407407408e-05,
"loss": 0.5246,
"step": 1020
},
{
"epoch": 0.7357142857142858,
"grad_norm": 1.9362713098526,
"learning_rate": 4.803703703703704e-05,
"loss": 0.563,
"step": 1030
},
{
"epoch": 0.7428571428571429,
"grad_norm": 2.037553548812866,
"learning_rate": 4.8e-05,
"loss": 0.466,
"step": 1040
},
{
"epoch": 0.75,
"grad_norm": 2.196617841720581,
"learning_rate": 4.796296296296296e-05,
"loss": 0.5647,
"step": 1050
},
{
"epoch": 0.7571428571428571,
"grad_norm": 1.278428077697754,
"learning_rate": 4.792592592592593e-05,
"loss": 0.4821,
"step": 1060
},
{
"epoch": 0.7642857142857142,
"grad_norm": 1.3506104946136475,
"learning_rate": 4.7888888888888886e-05,
"loss": 0.5194,
"step": 1070
},
{
"epoch": 0.7714285714285715,
"grad_norm": 2.3870656490325928,
"learning_rate": 4.7851851851851854e-05,
"loss": 0.7373,
"step": 1080
},
{
"epoch": 0.7785714285714286,
"grad_norm": 2.071242094039917,
"learning_rate": 4.7814814814814816e-05,
"loss": 0.5598,
"step": 1090
},
{
"epoch": 0.7857142857142857,
"grad_norm": 1.8460086584091187,
"learning_rate": 4.7777777777777784e-05,
"loss": 0.6184,
"step": 1100
},
{
"epoch": 0.7928571428571428,
"grad_norm": 3.804724931716919,
"learning_rate": 4.774074074074074e-05,
"loss": 0.5978,
"step": 1110
},
{
"epoch": 0.8,
"grad_norm": 2.614772081375122,
"learning_rate": 4.770370370370371e-05,
"loss": 0.6203,
"step": 1120
},
{
"epoch": 0.8071428571428572,
"grad_norm": 2.068122386932373,
"learning_rate": 4.766666666666667e-05,
"loss": 0.6149,
"step": 1130
},
{
"epoch": 0.8142857142857143,
"grad_norm": 1.675881266593933,
"learning_rate": 4.762962962962963e-05,
"loss": 0.4437,
"step": 1140
},
{
"epoch": 0.8214285714285714,
"grad_norm": 1.865435004234314,
"learning_rate": 4.759259259259259e-05,
"loss": 0.5166,
"step": 1150
},
{
"epoch": 0.8285714285714286,
"grad_norm": 1.0480509996414185,
"learning_rate": 4.755555555555556e-05,
"loss": 0.3827,
"step": 1160
},
{
"epoch": 0.8357142857142857,
"grad_norm": 2.218554735183716,
"learning_rate": 4.751851851851852e-05,
"loss": 0.6641,
"step": 1170
},
{
"epoch": 0.8428571428571429,
"grad_norm": 2.510831832885742,
"learning_rate": 4.7481481481481483e-05,
"loss": 0.496,
"step": 1180
},
{
"epoch": 0.85,
"grad_norm": 1.8328824043273926,
"learning_rate": 4.7444444444444445e-05,
"loss": 0.5466,
"step": 1190
},
{
"epoch": 0.8571428571428571,
"grad_norm": 1.5480728149414062,
"learning_rate": 4.740740740740741e-05,
"loss": 0.4992,
"step": 1200
},
{
"epoch": 0.8642857142857143,
"grad_norm": 1.3723492622375488,
"learning_rate": 4.737037037037037e-05,
"loss": 0.5014,
"step": 1210
},
{
"epoch": 0.8714285714285714,
"grad_norm": 1.7510666847229004,
"learning_rate": 4.7333333333333336e-05,
"loss": 0.5471,
"step": 1220
},
{
"epoch": 0.8785714285714286,
"grad_norm": 2.2057995796203613,
"learning_rate": 4.72962962962963e-05,
"loss": 0.6142,
"step": 1230
},
{
"epoch": 0.8857142857142857,
"grad_norm": 1.7922954559326172,
"learning_rate": 4.7259259259259266e-05,
"loss": 0.5199,
"step": 1240
},
{
"epoch": 0.8928571428571429,
"grad_norm": 1.9541053771972656,
"learning_rate": 4.722222222222222e-05,
"loss": 0.44,
"step": 1250
},
{
"epoch": 0.9,
"grad_norm": 1.2869590520858765,
"learning_rate": 4.718518518518519e-05,
"loss": 0.5157,
"step": 1260
},
{
"epoch": 0.9071428571428571,
"grad_norm": 1.7564722299575806,
"learning_rate": 4.714814814814815e-05,
"loss": 0.4985,
"step": 1270
},
{
"epoch": 0.9142857142857143,
"grad_norm": 1.0782675743103027,
"learning_rate": 4.711111111111111e-05,
"loss": 0.3195,
"step": 1280
},
{
"epoch": 0.9214285714285714,
"grad_norm": 1.7535449266433716,
"learning_rate": 4.7074074074074074e-05,
"loss": 0.376,
"step": 1290
},
{
"epoch": 0.9285714285714286,
"grad_norm": 1.581485629081726,
"learning_rate": 4.703703703703704e-05,
"loss": 0.5975,
"step": 1300
},
{
"epoch": 0.9357142857142857,
"grad_norm": 2.739900827407837,
"learning_rate": 4.7e-05,
"loss": 0.457,
"step": 1310
},
{
"epoch": 0.9428571428571428,
"grad_norm": 2.382187604904175,
"learning_rate": 4.6962962962962966e-05,
"loss": 0.5424,
"step": 1320
},
{
"epoch": 0.95,
"grad_norm": 1.75946843624115,
"learning_rate": 4.692592592592593e-05,
"loss": 0.3563,
"step": 1330
},
{
"epoch": 0.9571428571428572,
"grad_norm": 1.8159079551696777,
"learning_rate": 4.6888888888888895e-05,
"loss": 0.4502,
"step": 1340
},
{
"epoch": 0.9642857142857143,
"grad_norm": 2.605283260345459,
"learning_rate": 4.685185185185185e-05,
"loss": 0.4779,
"step": 1350
},
{
"epoch": 0.9714285714285714,
"grad_norm": 2.594231605529785,
"learning_rate": 4.681481481481482e-05,
"loss": 0.4901,
"step": 1360
},
{
"epoch": 0.9785714285714285,
"grad_norm": 2.109367609024048,
"learning_rate": 4.677777777777778e-05,
"loss": 0.5378,
"step": 1370
},
{
"epoch": 0.9857142857142858,
"grad_norm": 1.960496425628662,
"learning_rate": 4.674074074074074e-05,
"loss": 0.6129,
"step": 1380
},
{
"epoch": 0.9928571428571429,
"grad_norm": 3.4135870933532715,
"learning_rate": 4.67037037037037e-05,
"loss": 0.7069,
"step": 1390
},
{
"epoch": 1.0,
"grad_norm": 1.441308617591858,
"learning_rate": 4.666666666666667e-05,
"loss": 0.4686,
"step": 1400
},
{
"epoch": 1.0071428571428571,
"grad_norm": 1.9842432737350464,
"learning_rate": 4.662962962962963e-05,
"loss": 0.604,
"step": 1410
},
{
"epoch": 1.0142857142857142,
"grad_norm": 1.3867950439453125,
"learning_rate": 4.6592592592592595e-05,
"loss": 0.4168,
"step": 1420
},
{
"epoch": 1.0214285714285714,
"grad_norm": 2.118037462234497,
"learning_rate": 4.6555555555555556e-05,
"loss": 0.6484,
"step": 1430
},
{
"epoch": 1.0285714285714285,
"grad_norm": 1.4064522981643677,
"learning_rate": 4.6518518518518525e-05,
"loss": 0.5275,
"step": 1440
},
{
"epoch": 1.0357142857142858,
"grad_norm": 2.644491672515869,
"learning_rate": 4.648148148148148e-05,
"loss": 0.5361,
"step": 1450
},
{
"epoch": 1.042857142857143,
"grad_norm": 1.4005937576293945,
"learning_rate": 4.644444444444445e-05,
"loss": 0.4497,
"step": 1460
},
{
"epoch": 1.05,
"grad_norm": 1.773334264755249,
"learning_rate": 4.640740740740741e-05,
"loss": 0.4372,
"step": 1470
},
{
"epoch": 1.0571428571428572,
"grad_norm": 2.1667587757110596,
"learning_rate": 4.637037037037038e-05,
"loss": 0.5211,
"step": 1480
},
{
"epoch": 1.0642857142857143,
"grad_norm": 1.1993277072906494,
"learning_rate": 4.633333333333333e-05,
"loss": 0.3694,
"step": 1490
},
{
"epoch": 1.0714285714285714,
"grad_norm": 1.5526480674743652,
"learning_rate": 4.62962962962963e-05,
"loss": 0.686,
"step": 1500
},
{
"epoch": 1.0785714285714285,
"grad_norm": 1.5041449069976807,
"learning_rate": 4.625925925925926e-05,
"loss": 0.4536,
"step": 1510
},
{
"epoch": 1.0857142857142856,
"grad_norm": 1.719254970550537,
"learning_rate": 4.6222222222222224e-05,
"loss": 0.4687,
"step": 1520
},
{
"epoch": 1.092857142857143,
"grad_norm": 1.9565083980560303,
"learning_rate": 4.6185185185185185e-05,
"loss": 0.4054,
"step": 1530
},
{
"epoch": 1.1,
"grad_norm": 1.2271467447280884,
"learning_rate": 4.6148148148148154e-05,
"loss": 0.4189,
"step": 1540
},
{
"epoch": 1.1071428571428572,
"grad_norm": 1.731244683265686,
"learning_rate": 4.6111111111111115e-05,
"loss": 0.4519,
"step": 1550
},
{
"epoch": 1.1142857142857143,
"grad_norm": 1.3039075136184692,
"learning_rate": 4.607407407407408e-05,
"loss": 0.3911,
"step": 1560
},
{
"epoch": 1.1214285714285714,
"grad_norm": 1.3420417308807373,
"learning_rate": 4.603703703703704e-05,
"loss": 0.4239,
"step": 1570
},
{
"epoch": 1.1285714285714286,
"grad_norm": 2.2307205200195312,
"learning_rate": 4.600000000000001e-05,
"loss": 0.4675,
"step": 1580
},
{
"epoch": 1.1357142857142857,
"grad_norm": 2.384147882461548,
"learning_rate": 4.596296296296296e-05,
"loss": 0.3963,
"step": 1590
},
{
"epoch": 1.1428571428571428,
"grad_norm": 1.6016713380813599,
"learning_rate": 4.592592592592593e-05,
"loss": 0.4561,
"step": 1600
},
{
"epoch": 1.15,
"grad_norm": 1.4093197584152222,
"learning_rate": 4.588888888888889e-05,
"loss": 0.4708,
"step": 1610
},
{
"epoch": 1.157142857142857,
"grad_norm": 1.9773272275924683,
"learning_rate": 4.585185185185185e-05,
"loss": 0.5259,
"step": 1620
},
{
"epoch": 1.1642857142857144,
"grad_norm": 1.169757604598999,
"learning_rate": 4.5814814814814815e-05,
"loss": 0.3413,
"step": 1630
},
{
"epoch": 1.1714285714285715,
"grad_norm": 2.1033947467803955,
"learning_rate": 4.577777777777778e-05,
"loss": 0.4888,
"step": 1640
},
{
"epoch": 1.1785714285714286,
"grad_norm": 1.2455283403396606,
"learning_rate": 4.5740740740740745e-05,
"loss": 0.5935,
"step": 1650
},
{
"epoch": 1.1857142857142857,
"grad_norm": 1.283308982849121,
"learning_rate": 4.5703703703703706e-05,
"loss": 0.3946,
"step": 1660
},
{
"epoch": 1.1928571428571428,
"grad_norm": 1.9639955759048462,
"learning_rate": 4.566666666666667e-05,
"loss": 0.519,
"step": 1670
},
{
"epoch": 1.2,
"grad_norm": 0.9380689263343811,
"learning_rate": 4.5629629629629636e-05,
"loss": 0.3357,
"step": 1680
},
{
"epoch": 1.207142857142857,
"grad_norm": 2.330310344696045,
"learning_rate": 4.559259259259259e-05,
"loss": 0.5135,
"step": 1690
},
{
"epoch": 1.2142857142857142,
"grad_norm": 1.5911920070648193,
"learning_rate": 4.555555555555556e-05,
"loss": 0.4165,
"step": 1700
},
{
"epoch": 1.2214285714285715,
"grad_norm": 1.7522234916687012,
"learning_rate": 4.551851851851852e-05,
"loss": 0.5797,
"step": 1710
},
{
"epoch": 1.2285714285714286,
"grad_norm": 2.265571355819702,
"learning_rate": 4.548148148148149e-05,
"loss": 0.3943,
"step": 1720
},
{
"epoch": 1.2357142857142858,
"grad_norm": 2.530675172805786,
"learning_rate": 4.5444444444444444e-05,
"loss": 0.6279,
"step": 1730
},
{
"epoch": 1.2428571428571429,
"grad_norm": 2.072864055633545,
"learning_rate": 4.540740740740741e-05,
"loss": 0.512,
"step": 1740
},
{
"epoch": 1.25,
"grad_norm": 1.5505369901657104,
"learning_rate": 4.5370370370370374e-05,
"loss": 0.3494,
"step": 1750
},
{
"epoch": 1.2571428571428571,
"grad_norm": 1.9888116121292114,
"learning_rate": 4.5333333333333335e-05,
"loss": 0.5841,
"step": 1760
},
{
"epoch": 1.2642857142857142,
"grad_norm": 1.6056774854660034,
"learning_rate": 4.52962962962963e-05,
"loss": 0.5611,
"step": 1770
},
{
"epoch": 1.2714285714285714,
"grad_norm": 1.7950221300125122,
"learning_rate": 4.5259259259259265e-05,
"loss": 0.6097,
"step": 1780
},
{
"epoch": 1.2785714285714285,
"grad_norm": 1.8906399011611938,
"learning_rate": 4.522222222222223e-05,
"loss": 0.4837,
"step": 1790
},
{
"epoch": 1.2857142857142856,
"grad_norm": 1.3988184928894043,
"learning_rate": 4.518518518518519e-05,
"loss": 0.4806,
"step": 1800
},
{
"epoch": 1.292857142857143,
"grad_norm": 1.160243272781372,
"learning_rate": 4.514814814814815e-05,
"loss": 0.5224,
"step": 1810
},
{
"epoch": 1.3,
"grad_norm": 1.0152113437652588,
"learning_rate": 4.511111111111112e-05,
"loss": 0.4115,
"step": 1820
},
{
"epoch": 1.3071428571428572,
"grad_norm": 1.6176999807357788,
"learning_rate": 4.507407407407407e-05,
"loss": 0.4458,
"step": 1830
},
{
"epoch": 1.3142857142857143,
"grad_norm": 1.904784917831421,
"learning_rate": 4.503703703703704e-05,
"loss": 0.5552,
"step": 1840
},
{
"epoch": 1.3214285714285714,
"grad_norm": 1.0539710521697998,
"learning_rate": 4.5e-05,
"loss": 0.438,
"step": 1850
},
{
"epoch": 1.3285714285714285,
"grad_norm": 1.3552178144454956,
"learning_rate": 4.496296296296297e-05,
"loss": 0.2862,
"step": 1860
},
{
"epoch": 1.3357142857142856,
"grad_norm": 1.3787767887115479,
"learning_rate": 4.4925925925925926e-05,
"loss": 0.5173,
"step": 1870
},
{
"epoch": 1.342857142857143,
"grad_norm": 2.570422649383545,
"learning_rate": 4.4888888888888894e-05,
"loss": 0.4581,
"step": 1880
},
{
"epoch": 1.35,
"grad_norm": 1.5974104404449463,
"learning_rate": 4.4851851851851856e-05,
"loss": 0.4599,
"step": 1890
},
{
"epoch": 1.3571428571428572,
"grad_norm": 1.4105775356292725,
"learning_rate": 4.481481481481482e-05,
"loss": 0.3823,
"step": 1900
},
{
"epoch": 1.3642857142857143,
"grad_norm": 2.1751532554626465,
"learning_rate": 4.477777777777778e-05,
"loss": 0.4421,
"step": 1910
},
{
"epoch": 1.3714285714285714,
"grad_norm": 1.9956297874450684,
"learning_rate": 4.474074074074075e-05,
"loss": 0.4082,
"step": 1920
},
{
"epoch": 1.3785714285714286,
"grad_norm": 1.6159803867340088,
"learning_rate": 4.47037037037037e-05,
"loss": 0.3961,
"step": 1930
},
{
"epoch": 1.3857142857142857,
"grad_norm": 1.4909430742263794,
"learning_rate": 4.466666666666667e-05,
"loss": 0.4635,
"step": 1940
},
{
"epoch": 1.3928571428571428,
"grad_norm": 1.5630055665969849,
"learning_rate": 4.462962962962963e-05,
"loss": 0.5968,
"step": 1950
},
{
"epoch": 1.4,
"grad_norm": 1.2496933937072754,
"learning_rate": 4.4592592592592594e-05,
"loss": 0.4546,
"step": 1960
},
{
"epoch": 1.407142857142857,
"grad_norm": 1.6497224569320679,
"learning_rate": 4.4555555555555555e-05,
"loss": 0.354,
"step": 1970
},
{
"epoch": 1.4142857142857144,
"grad_norm": 2.069955587387085,
"learning_rate": 4.4518518518518523e-05,
"loss": 0.4388,
"step": 1980
},
{
"epoch": 1.4214285714285715,
"grad_norm": 1.6338075399398804,
"learning_rate": 4.4481481481481485e-05,
"loss": 0.5459,
"step": 1990
},
{
"epoch": 1.4285714285714286,
"grad_norm": 1.3558902740478516,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.3139,
"step": 2000
},
{
"epoch": 1.4285714285714286,
"eval_loss": 0.43829917907714844,
"eval_rouge1": 0.8911,
"eval_rouge2": 0.8251,
"eval_rougeL": 0.8873,
"eval_runtime": 121.8873,
"eval_samples_per_second": 11.486,
"eval_steps_per_second": 5.743,
"step": 2000
},
{
"epoch": 1.4357142857142857,
"grad_norm": 1.8123821020126343,
"learning_rate": 4.440740740740741e-05,
"loss": 0.5864,
"step": 2010
},
{
"epoch": 1.4428571428571428,
"grad_norm": 0.8494770526885986,
"learning_rate": 4.4370370370370376e-05,
"loss": 0.3284,
"step": 2020
},
{
"epoch": 1.45,
"grad_norm": 2.2536141872406006,
"learning_rate": 4.433333333333334e-05,
"loss": 0.3738,
"step": 2030
},
{
"epoch": 1.457142857142857,
"grad_norm": 2.971925735473633,
"learning_rate": 4.42962962962963e-05,
"loss": 0.5294,
"step": 2040
},
{
"epoch": 1.4642857142857144,
"grad_norm": 1.7820425033569336,
"learning_rate": 4.425925925925926e-05,
"loss": 0.505,
"step": 2050
},
{
"epoch": 1.4714285714285715,
"grad_norm": 1.196044683456421,
"learning_rate": 4.422222222222222e-05,
"loss": 0.3921,
"step": 2060
},
{
"epoch": 1.4785714285714286,
"grad_norm": 0.9053621888160706,
"learning_rate": 4.4185185185185184e-05,
"loss": 0.2386,
"step": 2070
},
{
"epoch": 1.4857142857142858,
"grad_norm": 1.8388108015060425,
"learning_rate": 4.414814814814815e-05,
"loss": 0.4309,
"step": 2080
},
{
"epoch": 1.4928571428571429,
"grad_norm": 2.25136137008667,
"learning_rate": 4.4111111111111114e-05,
"loss": 0.3918,
"step": 2090
},
{
"epoch": 1.5,
"grad_norm": 1.960864782333374,
"learning_rate": 4.4074074074074076e-05,
"loss": 0.4754,
"step": 2100
},
{
"epoch": 1.5071428571428571,
"grad_norm": 2.4653213024139404,
"learning_rate": 4.403703703703704e-05,
"loss": 0.4545,
"step": 2110
},
{
"epoch": 1.5142857142857142,
"grad_norm": 1.8694462776184082,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.3707,
"step": 2120
},
{
"epoch": 1.5214285714285714,
"grad_norm": 2.240447521209717,
"learning_rate": 4.396296296296297e-05,
"loss": 0.5174,
"step": 2130
},
{
"epoch": 1.5285714285714285,
"grad_norm": 0.8589600920677185,
"learning_rate": 4.392592592592593e-05,
"loss": 0.3802,
"step": 2140
},
{
"epoch": 1.5357142857142856,
"grad_norm": 2.495075225830078,
"learning_rate": 4.388888888888889e-05,
"loss": 0.4735,
"step": 2150
},
{
"epoch": 1.5428571428571427,
"grad_norm": 1.5384397506713867,
"learning_rate": 4.385185185185185e-05,
"loss": 0.4844,
"step": 2160
},
{
"epoch": 1.55,
"grad_norm": 1.2120758295059204,
"learning_rate": 4.381481481481482e-05,
"loss": 0.3029,
"step": 2170
},
{
"epoch": 1.5571428571428572,
"grad_norm": 2.0210671424865723,
"learning_rate": 4.377777777777778e-05,
"loss": 0.688,
"step": 2180
},
{
"epoch": 1.5642857142857143,
"grad_norm": 2.322322368621826,
"learning_rate": 4.374074074074074e-05,
"loss": 0.4673,
"step": 2190
},
{
"epoch": 1.5714285714285714,
"grad_norm": 1.8948960304260254,
"learning_rate": 4.3703703703703705e-05,
"loss": 0.3698,
"step": 2200
},
{
"epoch": 1.5785714285714287,
"grad_norm": 1.776141881942749,
"learning_rate": 4.3666666666666666e-05,
"loss": 0.3611,
"step": 2210
},
{
"epoch": 1.5857142857142859,
"grad_norm": 2.8628015518188477,
"learning_rate": 4.3629629629629635e-05,
"loss": 0.4504,
"step": 2220
},
{
"epoch": 1.592857142857143,
"grad_norm": 1.8579275608062744,
"learning_rate": 4.3592592592592596e-05,
"loss": 0.5131,
"step": 2230
},
{
"epoch": 1.6,
"grad_norm": 1.1070181131362915,
"learning_rate": 4.355555555555556e-05,
"loss": 0.4187,
"step": 2240
},
{
"epoch": 1.6071428571428572,
"grad_norm": 1.3833059072494507,
"learning_rate": 4.351851851851852e-05,
"loss": 0.4301,
"step": 2250
},
{
"epoch": 1.6142857142857143,
"grad_norm": 1.6870567798614502,
"learning_rate": 4.348148148148148e-05,
"loss": 0.5542,
"step": 2260
},
{
"epoch": 1.6214285714285714,
"grad_norm": 1.582582712173462,
"learning_rate": 4.344444444444445e-05,
"loss": 0.5192,
"step": 2270
},
{
"epoch": 1.6285714285714286,
"grad_norm": 3.3700509071350098,
"learning_rate": 4.340740740740741e-05,
"loss": 0.3268,
"step": 2280
},
{
"epoch": 1.6357142857142857,
"grad_norm": 3.0057899951934814,
"learning_rate": 4.337037037037037e-05,
"loss": 0.3787,
"step": 2290
},
{
"epoch": 1.6428571428571428,
"grad_norm": 1.302416205406189,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.4793,
"step": 2300
},
{
"epoch": 1.65,
"grad_norm": 2.42720103263855,
"learning_rate": 4.3296296296296296e-05,
"loss": 0.4124,
"step": 2310
},
{
"epoch": 1.657142857142857,
"grad_norm": 1.455609917640686,
"learning_rate": 4.325925925925926e-05,
"loss": 0.272,
"step": 2320
},
{
"epoch": 1.6642857142857141,
"grad_norm": 2.1332924365997314,
"learning_rate": 4.3222222222222226e-05,
"loss": 0.4859,
"step": 2330
},
{
"epoch": 1.6714285714285713,
"grad_norm": 1.977156162261963,
"learning_rate": 4.318518518518519e-05,
"loss": 0.4017,
"step": 2340
},
{
"epoch": 1.6785714285714286,
"grad_norm": 1.7197158336639404,
"learning_rate": 4.314814814814815e-05,
"loss": 0.376,
"step": 2350
},
{
"epoch": 1.6857142857142857,
"grad_norm": 0.8615891933441162,
"learning_rate": 4.311111111111111e-05,
"loss": 0.3383,
"step": 2360
},
{
"epoch": 1.6928571428571428,
"grad_norm": 1.2501980066299438,
"learning_rate": 4.307407407407408e-05,
"loss": 0.3999,
"step": 2370
},
{
"epoch": 1.7,
"grad_norm": 1.7977019548416138,
"learning_rate": 4.303703703703704e-05,
"loss": 0.3424,
"step": 2380
},
{
"epoch": 1.7071428571428573,
"grad_norm": 2.265807867050171,
"learning_rate": 4.3e-05,
"loss": 0.4345,
"step": 2390
},
{
"epoch": 1.7142857142857144,
"grad_norm": 2.348353624343872,
"learning_rate": 4.296296296296296e-05,
"loss": 0.4395,
"step": 2400
},
{
"epoch": 1.7214285714285715,
"grad_norm": 2.585843801498413,
"learning_rate": 4.292592592592593e-05,
"loss": 0.5208,
"step": 2410
},
{
"epoch": 1.7285714285714286,
"grad_norm": 1.1487417221069336,
"learning_rate": 4.2888888888888886e-05,
"loss": 0.4635,
"step": 2420
},
{
"epoch": 1.7357142857142858,
"grad_norm": 1.206634521484375,
"learning_rate": 4.2851851851851855e-05,
"loss": 0.3521,
"step": 2430
},
{
"epoch": 1.7428571428571429,
"grad_norm": 2.136702299118042,
"learning_rate": 4.2814814814814816e-05,
"loss": 0.4582,
"step": 2440
},
{
"epoch": 1.75,
"grad_norm": 1.2831017971038818,
"learning_rate": 4.277777777777778e-05,
"loss": 0.397,
"step": 2450
},
{
"epoch": 1.7571428571428571,
"grad_norm": 2.313405990600586,
"learning_rate": 4.274074074074074e-05,
"loss": 0.431,
"step": 2460
},
{
"epoch": 1.7642857142857142,
"grad_norm": 1.8922353982925415,
"learning_rate": 4.270370370370371e-05,
"loss": 0.4396,
"step": 2470
},
{
"epoch": 1.7714285714285714,
"grad_norm": 1.735303521156311,
"learning_rate": 4.266666666666667e-05,
"loss": 0.4019,
"step": 2480
},
{
"epoch": 1.7785714285714285,
"grad_norm": 1.1989376544952393,
"learning_rate": 4.262962962962963e-05,
"loss": 0.3317,
"step": 2490
},
{
"epoch": 1.7857142857142856,
"grad_norm": 1.709370732307434,
"learning_rate": 4.259259259259259e-05,
"loss": 0.4721,
"step": 2500
},
{
"epoch": 1.7928571428571427,
"grad_norm": 1.3655775785446167,
"learning_rate": 4.255555555555556e-05,
"loss": 0.464,
"step": 2510
},
{
"epoch": 1.8,
"grad_norm": 1.2292691469192505,
"learning_rate": 4.2518518518518515e-05,
"loss": 0.3765,
"step": 2520
},
{
"epoch": 1.8071428571428572,
"grad_norm": 2.6490797996520996,
"learning_rate": 4.2481481481481484e-05,
"loss": 0.4989,
"step": 2530
},
{
"epoch": 1.8142857142857143,
"grad_norm": 1.8564647436141968,
"learning_rate": 4.2444444444444445e-05,
"loss": 0.3776,
"step": 2540
},
{
"epoch": 1.8214285714285714,
"grad_norm": 1.9681627750396729,
"learning_rate": 4.240740740740741e-05,
"loss": 0.4717,
"step": 2550
},
{
"epoch": 1.8285714285714287,
"grad_norm": 2.1326770782470703,
"learning_rate": 4.237037037037037e-05,
"loss": 0.3212,
"step": 2560
},
{
"epoch": 1.8357142857142859,
"grad_norm": 1.8122767210006714,
"learning_rate": 4.233333333333334e-05,
"loss": 0.3619,
"step": 2570
},
{
"epoch": 1.842857142857143,
"grad_norm": 1.4822399616241455,
"learning_rate": 4.22962962962963e-05,
"loss": 0.4631,
"step": 2580
},
{
"epoch": 1.85,
"grad_norm": 2.278700828552246,
"learning_rate": 4.225925925925926e-05,
"loss": 0.3018,
"step": 2590
},
{
"epoch": 1.8571428571428572,
"grad_norm": 2.3148486614227295,
"learning_rate": 4.222222222222222e-05,
"loss": 0.4084,
"step": 2600
},
{
"epoch": 1.8642857142857143,
"grad_norm": 1.5277279615402222,
"learning_rate": 4.218518518518519e-05,
"loss": 0.5295,
"step": 2610
},
{
"epoch": 1.8714285714285714,
"grad_norm": 1.3603259325027466,
"learning_rate": 4.2148148148148145e-05,
"loss": 0.4727,
"step": 2620
},
{
"epoch": 1.8785714285714286,
"grad_norm": 1.9577744007110596,
"learning_rate": 4.211111111111111e-05,
"loss": 0.5915,
"step": 2630
},
{
"epoch": 1.8857142857142857,
"grad_norm": 1.0424437522888184,
"learning_rate": 4.2074074074074075e-05,
"loss": 0.3386,
"step": 2640
},
{
"epoch": 1.8928571428571428,
"grad_norm": 2.7555553913116455,
"learning_rate": 4.203703703703704e-05,
"loss": 0.5003,
"step": 2650
},
{
"epoch": 1.9,
"grad_norm": 1.9913907051086426,
"learning_rate": 4.2e-05,
"loss": 0.3875,
"step": 2660
},
{
"epoch": 1.907142857142857,
"grad_norm": 1.8053233623504639,
"learning_rate": 4.1962962962962966e-05,
"loss": 0.2318,
"step": 2670
},
{
"epoch": 1.9142857142857141,
"grad_norm": 1.7686558961868286,
"learning_rate": 4.192592592592593e-05,
"loss": 0.3772,
"step": 2680
},
{
"epoch": 1.9214285714285713,
"grad_norm": 1.4202839136123657,
"learning_rate": 4.188888888888889e-05,
"loss": 0.3742,
"step": 2690
},
{
"epoch": 1.9285714285714286,
"grad_norm": 2.7964282035827637,
"learning_rate": 4.185185185185185e-05,
"loss": 0.2901,
"step": 2700
},
{
"epoch": 1.9357142857142857,
"grad_norm": 2.0525360107421875,
"learning_rate": 4.181481481481482e-05,
"loss": 0.3146,
"step": 2710
},
{
"epoch": 1.9428571428571428,
"grad_norm": 1.937103033065796,
"learning_rate": 4.177777777777778e-05,
"loss": 0.3871,
"step": 2720
},
{
"epoch": 1.95,
"grad_norm": 2.1534152030944824,
"learning_rate": 4.174074074074074e-05,
"loss": 0.5309,
"step": 2730
},
{
"epoch": 1.9571428571428573,
"grad_norm": 1.60648512840271,
"learning_rate": 4.1703703703703704e-05,
"loss": 0.3859,
"step": 2740
},
{
"epoch": 1.9642857142857144,
"grad_norm": 2.2782654762268066,
"learning_rate": 4.166666666666667e-05,
"loss": 0.3859,
"step": 2750
},
{
"epoch": 1.9714285714285715,
"grad_norm": 1.9134782552719116,
"learning_rate": 4.162962962962963e-05,
"loss": 0.565,
"step": 2760
},
{
"epoch": 1.9785714285714286,
"grad_norm": 1.4029120206832886,
"learning_rate": 4.1592592592592595e-05,
"loss": 0.3373,
"step": 2770
},
{
"epoch": 1.9857142857142858,
"grad_norm": 1.9651641845703125,
"learning_rate": 4.155555555555556e-05,
"loss": 0.3847,
"step": 2780
},
{
"epoch": 1.9928571428571429,
"grad_norm": 1.4134501218795776,
"learning_rate": 4.1518518518518525e-05,
"loss": 0.4465,
"step": 2790
},
{
"epoch": 2.0,
"grad_norm": 1.9682196378707886,
"learning_rate": 4.148148148148148e-05,
"loss": 0.545,
"step": 2800
},
{
"epoch": 2.007142857142857,
"grad_norm": 2.259190559387207,
"learning_rate": 4.144444444444445e-05,
"loss": 0.3058,
"step": 2810
},
{
"epoch": 2.0142857142857142,
"grad_norm": 1.6861268281936646,
"learning_rate": 4.140740740740741e-05,
"loss": 0.4567,
"step": 2820
},
{
"epoch": 2.0214285714285714,
"grad_norm": 1.5168589353561401,
"learning_rate": 4.137037037037037e-05,
"loss": 0.5687,
"step": 2830
},
{
"epoch": 2.0285714285714285,
"grad_norm": 1.1756591796875,
"learning_rate": 4.133333333333333e-05,
"loss": 0.4118,
"step": 2840
},
{
"epoch": 2.0357142857142856,
"grad_norm": 1.4381860494613647,
"learning_rate": 4.12962962962963e-05,
"loss": 0.5473,
"step": 2850
},
{
"epoch": 2.0428571428571427,
"grad_norm": 1.710028052330017,
"learning_rate": 4.1259259259259256e-05,
"loss": 0.3362,
"step": 2860
},
{
"epoch": 2.05,
"grad_norm": 1.3261126279830933,
"learning_rate": 4.1222222222222224e-05,
"loss": 0.4497,
"step": 2870
},
{
"epoch": 2.057142857142857,
"grad_norm": 1.5397872924804688,
"learning_rate": 4.1185185185185186e-05,
"loss": 0.3985,
"step": 2880
},
{
"epoch": 2.064285714285714,
"grad_norm": 2.1463019847869873,
"learning_rate": 4.1148148148148154e-05,
"loss": 0.3423,
"step": 2890
},
{
"epoch": 2.0714285714285716,
"grad_norm": 1.3202670812606812,
"learning_rate": 4.111111111111111e-05,
"loss": 0.3422,
"step": 2900
},
{
"epoch": 2.0785714285714287,
"grad_norm": 1.6832393407821655,
"learning_rate": 4.107407407407408e-05,
"loss": 0.3243,
"step": 2910
},
{
"epoch": 2.085714285714286,
"grad_norm": 1.7872291803359985,
"learning_rate": 4.103703703703704e-05,
"loss": 0.4452,
"step": 2920
},
{
"epoch": 2.092857142857143,
"grad_norm": 2.649644613265991,
"learning_rate": 4.1e-05,
"loss": 0.4125,
"step": 2930
},
{
"epoch": 2.1,
"grad_norm": 1.6862508058547974,
"learning_rate": 4.096296296296296e-05,
"loss": 0.3503,
"step": 2940
},
{
"epoch": 2.107142857142857,
"grad_norm": 1.0415781736373901,
"learning_rate": 4.092592592592593e-05,
"loss": 0.2464,
"step": 2950
},
{
"epoch": 2.1142857142857143,
"grad_norm": 1.3061981201171875,
"learning_rate": 4.088888888888889e-05,
"loss": 0.2232,
"step": 2960
},
{
"epoch": 2.1214285714285714,
"grad_norm": 1.0442795753479004,
"learning_rate": 4.0851851851851853e-05,
"loss": 0.3512,
"step": 2970
},
{
"epoch": 2.1285714285714286,
"grad_norm": 2.4381885528564453,
"learning_rate": 4.0814814814814815e-05,
"loss": 0.4678,
"step": 2980
},
{
"epoch": 2.1357142857142857,
"grad_norm": 1.5145925283432007,
"learning_rate": 4.0777777777777783e-05,
"loss": 0.3655,
"step": 2990
},
{
"epoch": 2.142857142857143,
"grad_norm": 3.514470338821411,
"learning_rate": 4.074074074074074e-05,
"loss": 0.3538,
"step": 3000
},
{
"epoch": 2.142857142857143,
"eval_loss": 0.42481884360313416,
"eval_rouge1": 0.8943,
"eval_rouge2": 0.8295,
"eval_rougeL": 0.8911,
"eval_runtime": 122.0137,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 5.737,
"step": 3000
},
{
"epoch": 2.15,
"grad_norm": 1.9778640270233154,
"learning_rate": 4.0703703703703707e-05,
"loss": 0.4804,
"step": 3010
},
{
"epoch": 2.157142857142857,
"grad_norm": 2.8497660160064697,
"learning_rate": 4.066666666666667e-05,
"loss": 0.523,
"step": 3020
},
{
"epoch": 2.164285714285714,
"grad_norm": 1.317818284034729,
"learning_rate": 4.0629629629629636e-05,
"loss": 0.3694,
"step": 3030
},
{
"epoch": 2.1714285714285713,
"grad_norm": 1.1630916595458984,
"learning_rate": 4.059259259259259e-05,
"loss": 0.3546,
"step": 3040
},
{
"epoch": 2.1785714285714284,
"grad_norm": 2.114527940750122,
"learning_rate": 4.055555555555556e-05,
"loss": 0.4838,
"step": 3050
},
{
"epoch": 2.185714285714286,
"grad_norm": 1.771263599395752,
"learning_rate": 4.051851851851852e-05,
"loss": 0.2938,
"step": 3060
},
{
"epoch": 2.192857142857143,
"grad_norm": 3.463986396789551,
"learning_rate": 4.048148148148148e-05,
"loss": 0.455,
"step": 3070
},
{
"epoch": 2.2,
"grad_norm": 2.023069381713867,
"learning_rate": 4.0444444444444444e-05,
"loss": 0.4449,
"step": 3080
},
{
"epoch": 2.2071428571428573,
"grad_norm": 2.9855751991271973,
"learning_rate": 4.040740740740741e-05,
"loss": 0.5374,
"step": 3090
},
{
"epoch": 2.2142857142857144,
"grad_norm": 2.422739267349243,
"learning_rate": 4.0370370370370374e-05,
"loss": 0.4203,
"step": 3100
},
{
"epoch": 2.2214285714285715,
"grad_norm": 2.097543478012085,
"learning_rate": 4.0333333333333336e-05,
"loss": 0.364,
"step": 3110
},
{
"epoch": 2.2285714285714286,
"grad_norm": 2.2496302127838135,
"learning_rate": 4.02962962962963e-05,
"loss": 0.4135,
"step": 3120
},
{
"epoch": 2.2357142857142858,
"grad_norm": 2.3347012996673584,
"learning_rate": 4.0259259259259266e-05,
"loss": 0.4795,
"step": 3130
},
{
"epoch": 2.242857142857143,
"grad_norm": 1.506218433380127,
"learning_rate": 4.022222222222222e-05,
"loss": 0.5228,
"step": 3140
},
{
"epoch": 2.25,
"grad_norm": 1.160443663597107,
"learning_rate": 4.018518518518519e-05,
"loss": 0.3439,
"step": 3150
},
{
"epoch": 2.257142857142857,
"grad_norm": 1.8678144216537476,
"learning_rate": 4.014814814814815e-05,
"loss": 0.404,
"step": 3160
},
{
"epoch": 2.2642857142857142,
"grad_norm": 1.1315560340881348,
"learning_rate": 4.011111111111111e-05,
"loss": 0.3111,
"step": 3170
},
{
"epoch": 2.2714285714285714,
"grad_norm": 1.8081461191177368,
"learning_rate": 4.007407407407407e-05,
"loss": 0.2717,
"step": 3180
},
{
"epoch": 2.2785714285714285,
"grad_norm": 1.6636005640029907,
"learning_rate": 4.003703703703704e-05,
"loss": 0.3382,
"step": 3190
},
{
"epoch": 2.2857142857142856,
"grad_norm": 1.3334009647369385,
"learning_rate": 4e-05,
"loss": 0.38,
"step": 3200
},
{
"epoch": 2.2928571428571427,
"grad_norm": 1.4873621463775635,
"learning_rate": 3.9962962962962965e-05,
"loss": 0.2979,
"step": 3210
},
{
"epoch": 2.3,
"grad_norm": 1.17378568649292,
"learning_rate": 3.9925925925925926e-05,
"loss": 0.3628,
"step": 3220
},
{
"epoch": 2.307142857142857,
"grad_norm": 1.3241777420043945,
"learning_rate": 3.9888888888888895e-05,
"loss": 0.3119,
"step": 3230
},
{
"epoch": 2.314285714285714,
"grad_norm": 1.9823285341262817,
"learning_rate": 3.985185185185185e-05,
"loss": 0.4647,
"step": 3240
},
{
"epoch": 2.3214285714285716,
"grad_norm": 1.6918193101882935,
"learning_rate": 3.981481481481482e-05,
"loss": 0.3695,
"step": 3250
},
{
"epoch": 2.3285714285714287,
"grad_norm": 2.1902389526367188,
"learning_rate": 3.977777777777778e-05,
"loss": 0.2468,
"step": 3260
},
{
"epoch": 2.335714285714286,
"grad_norm": 1.3570506572723389,
"learning_rate": 3.974074074074075e-05,
"loss": 0.3209,
"step": 3270
},
{
"epoch": 2.342857142857143,
"grad_norm": 1.951711654663086,
"learning_rate": 3.97037037037037e-05,
"loss": 0.5175,
"step": 3280
},
{
"epoch": 2.35,
"grad_norm": 1.741243839263916,
"learning_rate": 3.966666666666667e-05,
"loss": 0.2934,
"step": 3290
},
{
"epoch": 2.357142857142857,
"grad_norm": 1.5889472961425781,
"learning_rate": 3.962962962962963e-05,
"loss": 0.3026,
"step": 3300
},
{
"epoch": 2.3642857142857143,
"grad_norm": 1.4606213569641113,
"learning_rate": 3.9592592592592594e-05,
"loss": 0.4508,
"step": 3310
},
{
"epoch": 2.3714285714285714,
"grad_norm": 1.5021477937698364,
"learning_rate": 3.9555555555555556e-05,
"loss": 0.2589,
"step": 3320
},
{
"epoch": 2.3785714285714286,
"grad_norm": 1.8877885341644287,
"learning_rate": 3.9518518518518524e-05,
"loss": 0.4115,
"step": 3330
},
{
"epoch": 2.3857142857142857,
"grad_norm": 1.809822678565979,
"learning_rate": 3.9481481481481485e-05,
"loss": 0.3844,
"step": 3340
},
{
"epoch": 2.392857142857143,
"grad_norm": 1.2999638319015503,
"learning_rate": 3.944444444444445e-05,
"loss": 0.4028,
"step": 3350
},
{
"epoch": 2.4,
"grad_norm": 1.4639837741851807,
"learning_rate": 3.940740740740741e-05,
"loss": 0.3551,
"step": 3360
},
{
"epoch": 2.407142857142857,
"grad_norm": 1.1001754999160767,
"learning_rate": 3.937037037037038e-05,
"loss": 0.4001,
"step": 3370
},
{
"epoch": 2.414285714285714,
"grad_norm": 2.272892713546753,
"learning_rate": 3.933333333333333e-05,
"loss": 0.3048,
"step": 3380
},
{
"epoch": 2.4214285714285713,
"grad_norm": 2.085908889770508,
"learning_rate": 3.92962962962963e-05,
"loss": 0.5788,
"step": 3390
},
{
"epoch": 2.4285714285714284,
"grad_norm": 1.317700743675232,
"learning_rate": 3.925925925925926e-05,
"loss": 0.2922,
"step": 3400
},
{
"epoch": 2.435714285714286,
"grad_norm": 2.372558832168579,
"learning_rate": 3.922222222222223e-05,
"loss": 0.4581,
"step": 3410
},
{
"epoch": 2.442857142857143,
"grad_norm": 1.3307292461395264,
"learning_rate": 3.9185185185185185e-05,
"loss": 0.4553,
"step": 3420
},
{
"epoch": 2.45,
"grad_norm": 1.9228068590164185,
"learning_rate": 3.914814814814815e-05,
"loss": 0.497,
"step": 3430
},
{
"epoch": 2.4571428571428573,
"grad_norm": 1.071590542793274,
"learning_rate": 3.9111111111111115e-05,
"loss": 0.4532,
"step": 3440
},
{
"epoch": 2.4642857142857144,
"grad_norm": 1.9603391885757446,
"learning_rate": 3.9074074074074076e-05,
"loss": 0.3808,
"step": 3450
},
{
"epoch": 2.4714285714285715,
"grad_norm": 1.2152074575424194,
"learning_rate": 3.903703703703704e-05,
"loss": 0.3011,
"step": 3460
},
{
"epoch": 2.4785714285714286,
"grad_norm": 1.532478928565979,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.3403,
"step": 3470
},
{
"epoch": 2.4857142857142858,
"grad_norm": 1.5086220502853394,
"learning_rate": 3.896296296296296e-05,
"loss": 0.4835,
"step": 3480
},
{
"epoch": 2.492857142857143,
"grad_norm": 1.0601118803024292,
"learning_rate": 3.892592592592593e-05,
"loss": 0.4057,
"step": 3490
},
{
"epoch": 2.5,
"grad_norm": 0.7907903790473938,
"learning_rate": 3.888888888888889e-05,
"loss": 0.3183,
"step": 3500
},
{
"epoch": 2.507142857142857,
"grad_norm": 1.8523814678192139,
"learning_rate": 3.885185185185186e-05,
"loss": 0.4329,
"step": 3510
},
{
"epoch": 2.5142857142857142,
"grad_norm": 1.9627383947372437,
"learning_rate": 3.8814814814814814e-05,
"loss": 0.3041,
"step": 3520
},
{
"epoch": 2.5214285714285714,
"grad_norm": 0.6192536354064941,
"learning_rate": 3.877777777777778e-05,
"loss": 0.3271,
"step": 3530
},
{
"epoch": 2.5285714285714285,
"grad_norm": 1.3901042938232422,
"learning_rate": 3.8740740740740744e-05,
"loss": 0.2562,
"step": 3540
},
{
"epoch": 2.5357142857142856,
"grad_norm": 1.991752028465271,
"learning_rate": 3.8703703703703705e-05,
"loss": 0.4283,
"step": 3550
},
{
"epoch": 2.5428571428571427,
"grad_norm": 1.219382882118225,
"learning_rate": 3.866666666666667e-05,
"loss": 0.4232,
"step": 3560
},
{
"epoch": 2.55,
"grad_norm": 2.72744083404541,
"learning_rate": 3.8629629629629635e-05,
"loss": 0.3482,
"step": 3570
},
{
"epoch": 2.557142857142857,
"grad_norm": 1.6782621145248413,
"learning_rate": 3.85925925925926e-05,
"loss": 0.3302,
"step": 3580
},
{
"epoch": 2.564285714285714,
"grad_norm": 1.0238265991210938,
"learning_rate": 3.855555555555556e-05,
"loss": 0.458,
"step": 3590
},
{
"epoch": 2.571428571428571,
"grad_norm": 2.212013006210327,
"learning_rate": 3.851851851851852e-05,
"loss": 0.4127,
"step": 3600
},
{
"epoch": 2.5785714285714287,
"grad_norm": 1.5761399269104004,
"learning_rate": 3.848148148148149e-05,
"loss": 0.402,
"step": 3610
},
{
"epoch": 2.585714285714286,
"grad_norm": 1.2036465406417847,
"learning_rate": 3.844444444444444e-05,
"loss": 0.5576,
"step": 3620
},
{
"epoch": 2.592857142857143,
"grad_norm": 1.8674002885818481,
"learning_rate": 3.840740740740741e-05,
"loss": 0.364,
"step": 3630
},
{
"epoch": 2.6,
"grad_norm": 1.466834545135498,
"learning_rate": 3.837037037037037e-05,
"loss": 0.3523,
"step": 3640
},
{
"epoch": 2.607142857142857,
"grad_norm": 1.57899010181427,
"learning_rate": 3.8333333333333334e-05,
"loss": 0.4046,
"step": 3650
},
{
"epoch": 2.6142857142857143,
"grad_norm": 0.9730345010757446,
"learning_rate": 3.8296296296296296e-05,
"loss": 0.3132,
"step": 3660
},
{
"epoch": 2.6214285714285714,
"grad_norm": 1.3017544746398926,
"learning_rate": 3.8259259259259264e-05,
"loss": 0.3023,
"step": 3670
},
{
"epoch": 2.6285714285714286,
"grad_norm": 1.6368205547332764,
"learning_rate": 3.8222222222222226e-05,
"loss": 0.4363,
"step": 3680
},
{
"epoch": 2.6357142857142857,
"grad_norm": 1.2852121591567993,
"learning_rate": 3.818518518518519e-05,
"loss": 0.2896,
"step": 3690
},
{
"epoch": 2.642857142857143,
"grad_norm": 3.6991841793060303,
"learning_rate": 3.814814814814815e-05,
"loss": 0.353,
"step": 3700
},
{
"epoch": 2.65,
"grad_norm": 2.70285701751709,
"learning_rate": 3.811111111111112e-05,
"loss": 0.4217,
"step": 3710
},
{
"epoch": 2.657142857142857,
"grad_norm": 1.140811800956726,
"learning_rate": 3.807407407407408e-05,
"loss": 0.3253,
"step": 3720
},
{
"epoch": 2.664285714285714,
"grad_norm": 1.2905789613723755,
"learning_rate": 3.803703703703704e-05,
"loss": 0.3051,
"step": 3730
},
{
"epoch": 2.6714285714285713,
"grad_norm": 1.4326887130737305,
"learning_rate": 3.8e-05,
"loss": 0.3999,
"step": 3740
},
{
"epoch": 2.678571428571429,
"grad_norm": 1.1789475679397583,
"learning_rate": 3.7962962962962964e-05,
"loss": 0.4631,
"step": 3750
},
{
"epoch": 2.685714285714286,
"grad_norm": 2.0444328784942627,
"learning_rate": 3.7925925925925925e-05,
"loss": 0.4449,
"step": 3760
},
{
"epoch": 2.692857142857143,
"grad_norm": 2.1025991439819336,
"learning_rate": 3.7888888888888894e-05,
"loss": 0.3513,
"step": 3770
},
{
"epoch": 2.7,
"grad_norm": 1.8492026329040527,
"learning_rate": 3.7851851851851855e-05,
"loss": 0.4006,
"step": 3780
},
{
"epoch": 2.7071428571428573,
"grad_norm": 1.3439162969589233,
"learning_rate": 3.781481481481482e-05,
"loss": 0.2806,
"step": 3790
},
{
"epoch": 2.7142857142857144,
"grad_norm": 1.4200384616851807,
"learning_rate": 3.777777777777778e-05,
"loss": 0.3759,
"step": 3800
},
{
"epoch": 2.7214285714285715,
"grad_norm": 1.9567861557006836,
"learning_rate": 3.774074074074074e-05,
"loss": 0.1772,
"step": 3810
},
{
"epoch": 2.7285714285714286,
"grad_norm": 1.3466306924819946,
"learning_rate": 3.770370370370371e-05,
"loss": 0.399,
"step": 3820
},
{
"epoch": 2.7357142857142858,
"grad_norm": 1.6046024560928345,
"learning_rate": 3.766666666666667e-05,
"loss": 0.347,
"step": 3830
},
{
"epoch": 2.742857142857143,
"grad_norm": 2.190568447113037,
"learning_rate": 3.762962962962963e-05,
"loss": 0.3977,
"step": 3840
},
{
"epoch": 2.75,
"grad_norm": 1.7715718746185303,
"learning_rate": 3.759259259259259e-05,
"loss": 0.4385,
"step": 3850
},
{
"epoch": 2.757142857142857,
"grad_norm": 3.19500994682312,
"learning_rate": 3.7555555555555554e-05,
"loss": 0.3631,
"step": 3860
},
{
"epoch": 2.7642857142857142,
"grad_norm": 2.2222607135772705,
"learning_rate": 3.751851851851852e-05,
"loss": 0.3565,
"step": 3870
},
{
"epoch": 2.7714285714285714,
"grad_norm": 1.9959403276443481,
"learning_rate": 3.7481481481481484e-05,
"loss": 0.3629,
"step": 3880
},
{
"epoch": 2.7785714285714285,
"grad_norm": 1.3207546472549438,
"learning_rate": 3.7444444444444446e-05,
"loss": 0.2911,
"step": 3890
},
{
"epoch": 2.7857142857142856,
"grad_norm": 2.0290961265563965,
"learning_rate": 3.740740740740741e-05,
"loss": 0.3072,
"step": 3900
},
{
"epoch": 2.7928571428571427,
"grad_norm": 1.3728725910186768,
"learning_rate": 3.737037037037037e-05,
"loss": 0.3858,
"step": 3910
},
{
"epoch": 2.8,
"grad_norm": 2.541598320007324,
"learning_rate": 3.733333333333334e-05,
"loss": 0.3487,
"step": 3920
},
{
"epoch": 2.807142857142857,
"grad_norm": 2.3327584266662598,
"learning_rate": 3.72962962962963e-05,
"loss": 0.3535,
"step": 3930
},
{
"epoch": 2.814285714285714,
"grad_norm": 2.546766757965088,
"learning_rate": 3.725925925925926e-05,
"loss": 0.3462,
"step": 3940
},
{
"epoch": 2.821428571428571,
"grad_norm": 2.351959705352783,
"learning_rate": 3.722222222222222e-05,
"loss": 0.2781,
"step": 3950
},
{
"epoch": 2.8285714285714287,
"grad_norm": 1.9349900484085083,
"learning_rate": 3.718518518518519e-05,
"loss": 0.2442,
"step": 3960
},
{
"epoch": 2.835714285714286,
"grad_norm": 2.2020022869110107,
"learning_rate": 3.714814814814815e-05,
"loss": 0.3396,
"step": 3970
},
{
"epoch": 2.842857142857143,
"grad_norm": 1.5161465406417847,
"learning_rate": 3.7111111111111113e-05,
"loss": 0.3722,
"step": 3980
},
{
"epoch": 2.85,
"grad_norm": 1.7403453588485718,
"learning_rate": 3.7074074074074075e-05,
"loss": 0.4227,
"step": 3990
},
{
"epoch": 2.857142857142857,
"grad_norm": 1.9142546653747559,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.3259,
"step": 4000
},
{
"epoch": 2.857142857142857,
"eval_loss": 0.38673722743988037,
"eval_rouge1": 0.8974,
"eval_rouge2": 0.8331,
"eval_rougeL": 0.8942,
"eval_runtime": 122.1383,
"eval_samples_per_second": 11.462,
"eval_steps_per_second": 5.731,
"step": 4000
},
{
"epoch": 2.8642857142857143,
"grad_norm": 1.5975255966186523,
"learning_rate": 3.7e-05,
"loss": 0.3732,
"step": 4010
},
{
"epoch": 2.8714285714285714,
"grad_norm": 1.4830248355865479,
"learning_rate": 3.6962962962962966e-05,
"loss": 0.5093,
"step": 4020
},
{
"epoch": 2.8785714285714286,
"grad_norm": 2.504650354385376,
"learning_rate": 3.692592592592593e-05,
"loss": 0.3302,
"step": 4030
},
{
"epoch": 2.8857142857142857,
"grad_norm": 2.349452495574951,
"learning_rate": 3.688888888888889e-05,
"loss": 0.3596,
"step": 4040
},
{
"epoch": 2.892857142857143,
"grad_norm": 1.398964762687683,
"learning_rate": 3.685185185185185e-05,
"loss": 0.3494,
"step": 4050
},
{
"epoch": 2.9,
"grad_norm": 2.212738513946533,
"learning_rate": 3.681481481481482e-05,
"loss": 0.3691,
"step": 4060
},
{
"epoch": 2.907142857142857,
"grad_norm": 2.20845627784729,
"learning_rate": 3.677777777777778e-05,
"loss": 0.2974,
"step": 4070
},
{
"epoch": 2.914285714285714,
"grad_norm": 1.2226334810256958,
"learning_rate": 3.674074074074074e-05,
"loss": 0.3173,
"step": 4080
},
{
"epoch": 2.9214285714285713,
"grad_norm": 2.2203428745269775,
"learning_rate": 3.6703703703703704e-05,
"loss": 0.4473,
"step": 4090
},
{
"epoch": 2.928571428571429,
"grad_norm": 1.487853765487671,
"learning_rate": 3.6666666666666666e-05,
"loss": 0.2653,
"step": 4100
},
{
"epoch": 2.935714285714286,
"grad_norm": 1.6347614526748657,
"learning_rate": 3.662962962962963e-05,
"loss": 0.3563,
"step": 4110
},
{
"epoch": 2.942857142857143,
"grad_norm": 2.2722184658050537,
"learning_rate": 3.6592592592592596e-05,
"loss": 0.4975,
"step": 4120
},
{
"epoch": 2.95,
"grad_norm": 1.747530460357666,
"learning_rate": 3.655555555555556e-05,
"loss": 0.2357,
"step": 4130
},
{
"epoch": 2.9571428571428573,
"grad_norm": 1.628596544265747,
"learning_rate": 3.651851851851852e-05,
"loss": 0.3674,
"step": 4140
},
{
"epoch": 2.9642857142857144,
"grad_norm": 1.0486435890197754,
"learning_rate": 3.648148148148148e-05,
"loss": 0.3314,
"step": 4150
},
{
"epoch": 2.9714285714285715,
"grad_norm": 2.523879289627075,
"learning_rate": 3.644444444444445e-05,
"loss": 0.4421,
"step": 4160
},
{
"epoch": 2.9785714285714286,
"grad_norm": 1.4641958475112915,
"learning_rate": 3.6407407407407403e-05,
"loss": 0.4135,
"step": 4170
},
{
"epoch": 2.9857142857142858,
"grad_norm": 2.672769784927368,
"learning_rate": 3.637037037037037e-05,
"loss": 0.3527,
"step": 4180
},
{
"epoch": 2.992857142857143,
"grad_norm": 0.5795308351516724,
"learning_rate": 3.633333333333333e-05,
"loss": 0.2326,
"step": 4190
},
{
"epoch": 3.0,
"grad_norm": 1.873579978942871,
"learning_rate": 3.62962962962963e-05,
"loss": 0.3679,
"step": 4200
},
{
"epoch": 3.007142857142857,
"grad_norm": 1.7640775442123413,
"learning_rate": 3.6259259259259256e-05,
"loss": 0.4778,
"step": 4210
},
{
"epoch": 3.0142857142857142,
"grad_norm": 1.9458075761795044,
"learning_rate": 3.6222222222222225e-05,
"loss": 0.4054,
"step": 4220
},
{
"epoch": 3.0214285714285714,
"grad_norm": 1.1568126678466797,
"learning_rate": 3.6185185185185186e-05,
"loss": 0.2249,
"step": 4230
},
{
"epoch": 3.0285714285714285,
"grad_norm": 1.3655381202697754,
"learning_rate": 3.614814814814815e-05,
"loss": 0.3993,
"step": 4240
},
{
"epoch": 3.0357142857142856,
"grad_norm": 2.0403196811676025,
"learning_rate": 3.611111111111111e-05,
"loss": 0.3366,
"step": 4250
},
{
"epoch": 3.0428571428571427,
"grad_norm": 1.9888697862625122,
"learning_rate": 3.607407407407408e-05,
"loss": 0.3033,
"step": 4260
},
{
"epoch": 3.05,
"grad_norm": 1.3648616075515747,
"learning_rate": 3.603703703703704e-05,
"loss": 0.2874,
"step": 4270
},
{
"epoch": 3.057142857142857,
"grad_norm": 2.602613925933838,
"learning_rate": 3.6e-05,
"loss": 0.4086,
"step": 4280
},
{
"epoch": 3.064285714285714,
"grad_norm": 2.5918185710906982,
"learning_rate": 3.596296296296296e-05,
"loss": 0.393,
"step": 4290
},
{
"epoch": 3.0714285714285716,
"grad_norm": 1.8195433616638184,
"learning_rate": 3.592592592592593e-05,
"loss": 0.3361,
"step": 4300
},
{
"epoch": 3.0785714285714287,
"grad_norm": 1.8855136632919312,
"learning_rate": 3.5888888888888886e-05,
"loss": 0.3205,
"step": 4310
},
{
"epoch": 3.085714285714286,
"grad_norm": 2.7412662506103516,
"learning_rate": 3.5851851851851854e-05,
"loss": 0.2659,
"step": 4320
},
{
"epoch": 3.092857142857143,
"grad_norm": 1.880436658859253,
"learning_rate": 3.5814814814814815e-05,
"loss": 0.49,
"step": 4330
},
{
"epoch": 3.1,
"grad_norm": 1.6828274726867676,
"learning_rate": 3.577777777777778e-05,
"loss": 0.2933,
"step": 4340
},
{
"epoch": 3.107142857142857,
"grad_norm": 1.0517287254333496,
"learning_rate": 3.574074074074074e-05,
"loss": 0.3563,
"step": 4350
},
{
"epoch": 3.1142857142857143,
"grad_norm": 1.3242154121398926,
"learning_rate": 3.570370370370371e-05,
"loss": 0.3765,
"step": 4360
},
{
"epoch": 3.1214285714285714,
"grad_norm": 2.0899312496185303,
"learning_rate": 3.566666666666667e-05,
"loss": 0.3664,
"step": 4370
},
{
"epoch": 3.1285714285714286,
"grad_norm": 2.0286014080047607,
"learning_rate": 3.562962962962963e-05,
"loss": 0.2622,
"step": 4380
},
{
"epoch": 3.1357142857142857,
"grad_norm": 2.5074400901794434,
"learning_rate": 3.559259259259259e-05,
"loss": 0.321,
"step": 4390
},
{
"epoch": 3.142857142857143,
"grad_norm": 1.4080287218093872,
"learning_rate": 3.555555555555556e-05,
"loss": 0.4035,
"step": 4400
},
{
"epoch": 3.15,
"grad_norm": 1.923890471458435,
"learning_rate": 3.5518518518518515e-05,
"loss": 0.2775,
"step": 4410
},
{
"epoch": 3.157142857142857,
"grad_norm": 0.806591272354126,
"learning_rate": 3.548148148148148e-05,
"loss": 0.3149,
"step": 4420
},
{
"epoch": 3.164285714285714,
"grad_norm": 2.197736978530884,
"learning_rate": 3.5444444444444445e-05,
"loss": 0.4368,
"step": 4430
},
{
"epoch": 3.1714285714285713,
"grad_norm": 1.6943881511688232,
"learning_rate": 3.540740740740741e-05,
"loss": 0.2793,
"step": 4440
},
{
"epoch": 3.1785714285714284,
"grad_norm": 2.5460283756256104,
"learning_rate": 3.537037037037037e-05,
"loss": 0.4057,
"step": 4450
},
{
"epoch": 3.185714285714286,
"grad_norm": 1.579908013343811,
"learning_rate": 3.5333333333333336e-05,
"loss": 0.3016,
"step": 4460
},
{
"epoch": 3.192857142857143,
"grad_norm": 1.9137247800827026,
"learning_rate": 3.52962962962963e-05,
"loss": 0.3437,
"step": 4470
},
{
"epoch": 3.2,
"grad_norm": 2.510328769683838,
"learning_rate": 3.525925925925926e-05,
"loss": 0.585,
"step": 4480
},
{
"epoch": 3.2071428571428573,
"grad_norm": 0.9775506854057312,
"learning_rate": 3.522222222222222e-05,
"loss": 0.2651,
"step": 4490
},
{
"epoch": 3.2142857142857144,
"grad_norm": 1.7614684104919434,
"learning_rate": 3.518518518518519e-05,
"loss": 0.3089,
"step": 4500
},
{
"epoch": 3.2214285714285715,
"grad_norm": 1.9103621244430542,
"learning_rate": 3.514814814814815e-05,
"loss": 0.342,
"step": 4510
},
{
"epoch": 3.2285714285714286,
"grad_norm": 1.4587639570236206,
"learning_rate": 3.511111111111111e-05,
"loss": 0.2592,
"step": 4520
},
{
"epoch": 3.2357142857142858,
"grad_norm": 1.3419288396835327,
"learning_rate": 3.5074074074074074e-05,
"loss": 0.4185,
"step": 4530
},
{
"epoch": 3.242857142857143,
"grad_norm": 1.6199047565460205,
"learning_rate": 3.503703703703704e-05,
"loss": 0.256,
"step": 4540
},
{
"epoch": 3.25,
"grad_norm": 1.230350136756897,
"learning_rate": 3.5e-05,
"loss": 0.3304,
"step": 4550
},
{
"epoch": 3.257142857142857,
"grad_norm": 3.087888240814209,
"learning_rate": 3.4962962962962965e-05,
"loss": 0.3351,
"step": 4560
},
{
"epoch": 3.2642857142857142,
"grad_norm": 1.4498260021209717,
"learning_rate": 3.492592592592593e-05,
"loss": 0.2753,
"step": 4570
},
{
"epoch": 3.2714285714285714,
"grad_norm": 1.1032336950302124,
"learning_rate": 3.4888888888888895e-05,
"loss": 0.3709,
"step": 4580
},
{
"epoch": 3.2785714285714285,
"grad_norm": 1.5177497863769531,
"learning_rate": 3.485185185185185e-05,
"loss": 0.276,
"step": 4590
},
{
"epoch": 3.2857142857142856,
"grad_norm": 1.2596136331558228,
"learning_rate": 3.481481481481482e-05,
"loss": 0.3482,
"step": 4600
},
{
"epoch": 3.2928571428571427,
"grad_norm": 1.9895663261413574,
"learning_rate": 3.477777777777778e-05,
"loss": 0.3738,
"step": 4610
},
{
"epoch": 3.3,
"grad_norm": 1.2930881977081299,
"learning_rate": 3.474074074074074e-05,
"loss": 0.4263,
"step": 4620
},
{
"epoch": 3.307142857142857,
"grad_norm": 2.276385545730591,
"learning_rate": 3.47037037037037e-05,
"loss": 0.2267,
"step": 4630
},
{
"epoch": 3.314285714285714,
"grad_norm": 0.9766007661819458,
"learning_rate": 3.466666666666667e-05,
"loss": 0.2217,
"step": 4640
},
{
"epoch": 3.3214285714285716,
"grad_norm": 1.5184674263000488,
"learning_rate": 3.4629629629629626e-05,
"loss": 0.2788,
"step": 4650
},
{
"epoch": 3.3285714285714287,
"grad_norm": 1.5145732164382935,
"learning_rate": 3.4592592592592594e-05,
"loss": 0.3291,
"step": 4660
},
{
"epoch": 3.335714285714286,
"grad_norm": 1.4273874759674072,
"learning_rate": 3.4555555555555556e-05,
"loss": 0.2854,
"step": 4670
},
{
"epoch": 3.342857142857143,
"grad_norm": 2.783701181411743,
"learning_rate": 3.4518518518518524e-05,
"loss": 0.3518,
"step": 4680
},
{
"epoch": 3.35,
"grad_norm": 1.3359688520431519,
"learning_rate": 3.448148148148148e-05,
"loss": 0.2239,
"step": 4690
},
{
"epoch": 3.357142857142857,
"grad_norm": 2.246824264526367,
"learning_rate": 3.444444444444445e-05,
"loss": 0.3206,
"step": 4700
},
{
"epoch": 3.3642857142857143,
"grad_norm": 1.7839916944503784,
"learning_rate": 3.440740740740741e-05,
"loss": 0.3189,
"step": 4710
},
{
"epoch": 3.3714285714285714,
"grad_norm": 1.0196881294250488,
"learning_rate": 3.437037037037037e-05,
"loss": 0.2318,
"step": 4720
},
{
"epoch": 3.3785714285714286,
"grad_norm": 2.228317975997925,
"learning_rate": 3.433333333333333e-05,
"loss": 0.4033,
"step": 4730
},
{
"epoch": 3.3857142857142857,
"grad_norm": 2.0231473445892334,
"learning_rate": 3.42962962962963e-05,
"loss": 0.3854,
"step": 4740
},
{
"epoch": 3.392857142857143,
"grad_norm": 2.074925422668457,
"learning_rate": 3.425925925925926e-05,
"loss": 0.3778,
"step": 4750
},
{
"epoch": 3.4,
"grad_norm": 1.2508392333984375,
"learning_rate": 3.4222222222222224e-05,
"loss": 0.3299,
"step": 4760
},
{
"epoch": 3.407142857142857,
"grad_norm": 1.0920076370239258,
"learning_rate": 3.4185185185185185e-05,
"loss": 0.3798,
"step": 4770
},
{
"epoch": 3.414285714285714,
"grad_norm": 1.8113828897476196,
"learning_rate": 3.4148148148148153e-05,
"loss": 0.2903,
"step": 4780
},
{
"epoch": 3.4214285714285713,
"grad_norm": 1.6218737363815308,
"learning_rate": 3.411111111111111e-05,
"loss": 0.2593,
"step": 4790
},
{
"epoch": 3.4285714285714284,
"grad_norm": 1.0635234117507935,
"learning_rate": 3.4074074074074077e-05,
"loss": 0.4388,
"step": 4800
},
{
"epoch": 3.435714285714286,
"grad_norm": 2.585700273513794,
"learning_rate": 3.403703703703704e-05,
"loss": 0.3368,
"step": 4810
},
{
"epoch": 3.442857142857143,
"grad_norm": 1.0704694986343384,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.2196,
"step": 4820
},
{
"epoch": 3.45,
"grad_norm": 1.3177589178085327,
"learning_rate": 3.396296296296296e-05,
"loss": 0.3104,
"step": 4830
},
{
"epoch": 3.4571428571428573,
"grad_norm": 1.834241271018982,
"learning_rate": 3.392592592592593e-05,
"loss": 0.3413,
"step": 4840
},
{
"epoch": 3.4642857142857144,
"grad_norm": 1.8859339952468872,
"learning_rate": 3.388888888888889e-05,
"loss": 0.2593,
"step": 4850
},
{
"epoch": 3.4714285714285715,
"grad_norm": 1.452728271484375,
"learning_rate": 3.385185185185185e-05,
"loss": 0.3029,
"step": 4860
},
{
"epoch": 3.4785714285714286,
"grad_norm": 2.170774221420288,
"learning_rate": 3.3814814814814814e-05,
"loss": 0.3372,
"step": 4870
},
{
"epoch": 3.4857142857142858,
"grad_norm": 1.8695834875106812,
"learning_rate": 3.377777777777778e-05,
"loss": 0.3428,
"step": 4880
},
{
"epoch": 3.492857142857143,
"grad_norm": 1.74647855758667,
"learning_rate": 3.3740740740740744e-05,
"loss": 0.3351,
"step": 4890
},
{
"epoch": 3.5,
"grad_norm": 2.3349127769470215,
"learning_rate": 3.3703703703703706e-05,
"loss": 0.2733,
"step": 4900
},
{
"epoch": 3.507142857142857,
"grad_norm": 2.73463773727417,
"learning_rate": 3.366666666666667e-05,
"loss": 0.2979,
"step": 4910
},
{
"epoch": 3.5142857142857142,
"grad_norm": 1.3546210527420044,
"learning_rate": 3.3629629629629636e-05,
"loss": 0.3521,
"step": 4920
},
{
"epoch": 3.5214285714285714,
"grad_norm": 1.617336630821228,
"learning_rate": 3.359259259259259e-05,
"loss": 0.2758,
"step": 4930
},
{
"epoch": 3.5285714285714285,
"grad_norm": 2.998967409133911,
"learning_rate": 3.355555555555556e-05,
"loss": 0.4193,
"step": 4940
},
{
"epoch": 3.5357142857142856,
"grad_norm": 1.8004390001296997,
"learning_rate": 3.351851851851852e-05,
"loss": 0.3936,
"step": 4950
},
{
"epoch": 3.5428571428571427,
"grad_norm": 1.4228971004486084,
"learning_rate": 3.348148148148148e-05,
"loss": 0.3563,
"step": 4960
},
{
"epoch": 3.55,
"grad_norm": 1.5617480278015137,
"learning_rate": 3.3444444444444443e-05,
"loss": 0.2492,
"step": 4970
},
{
"epoch": 3.557142857142857,
"grad_norm": 1.3880919218063354,
"learning_rate": 3.340740740740741e-05,
"loss": 0.1791,
"step": 4980
},
{
"epoch": 3.564285714285714,
"grad_norm": 2.3505630493164062,
"learning_rate": 3.337037037037037e-05,
"loss": 0.4009,
"step": 4990
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.9086794853210449,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.2826,
"step": 5000
},
{
"epoch": 3.571428571428571,
"eval_loss": 0.3789908289909363,
"eval_rouge1": 0.8999,
"eval_rouge2": 0.8372,
"eval_rougeL": 0.8969,
"eval_runtime": 122.23,
"eval_samples_per_second": 11.454,
"eval_steps_per_second": 5.727,
"step": 5000
},
{
"epoch": 3.5785714285714287,
"grad_norm": 1.0208678245544434,
"learning_rate": 3.3296296296296296e-05,
"loss": 0.295,
"step": 5010
},
{
"epoch": 3.585714285714286,
"grad_norm": 3.03141713142395,
"learning_rate": 3.3259259259259265e-05,
"loss": 0.3813,
"step": 5020
},
{
"epoch": 3.592857142857143,
"grad_norm": 1.7845333814620972,
"learning_rate": 3.322222222222222e-05,
"loss": 0.2526,
"step": 5030
},
{
"epoch": 3.6,
"grad_norm": 4.314096450805664,
"learning_rate": 3.318518518518519e-05,
"loss": 0.3498,
"step": 5040
},
{
"epoch": 3.607142857142857,
"grad_norm": 1.5270274877548218,
"learning_rate": 3.314814814814815e-05,
"loss": 0.3204,
"step": 5050
},
{
"epoch": 3.6142857142857143,
"grad_norm": 2.036738157272339,
"learning_rate": 3.311111111111112e-05,
"loss": 0.3416,
"step": 5060
},
{
"epoch": 3.6214285714285714,
"grad_norm": 2.2504570484161377,
"learning_rate": 3.307407407407407e-05,
"loss": 0.3781,
"step": 5070
},
{
"epoch": 3.6285714285714286,
"grad_norm": 1.749518632888794,
"learning_rate": 3.303703703703704e-05,
"loss": 0.2299,
"step": 5080
},
{
"epoch": 3.6357142857142857,
"grad_norm": 2.1878907680511475,
"learning_rate": 3.3e-05,
"loss": 0.3692,
"step": 5090
},
{
"epoch": 3.642857142857143,
"grad_norm": 1.829394817352295,
"learning_rate": 3.2962962962962964e-05,
"loss": 0.3095,
"step": 5100
},
{
"epoch": 3.65,
"grad_norm": 2.5994794368743896,
"learning_rate": 3.2925925925925926e-05,
"loss": 0.431,
"step": 5110
},
{
"epoch": 3.657142857142857,
"grad_norm": 1.2319742441177368,
"learning_rate": 3.2888888888888894e-05,
"loss": 0.336,
"step": 5120
},
{
"epoch": 3.664285714285714,
"grad_norm": 2.169063091278076,
"learning_rate": 3.2851851851851856e-05,
"loss": 0.293,
"step": 5130
},
{
"epoch": 3.6714285714285713,
"grad_norm": 1.7120137214660645,
"learning_rate": 3.281481481481482e-05,
"loss": 0.3439,
"step": 5140
},
{
"epoch": 3.678571428571429,
"grad_norm": 1.5415689945220947,
"learning_rate": 3.277777777777778e-05,
"loss": 0.3912,
"step": 5150
},
{
"epoch": 3.685714285714286,
"grad_norm": 2.2880282402038574,
"learning_rate": 3.274074074074075e-05,
"loss": 0.2352,
"step": 5160
},
{
"epoch": 3.692857142857143,
"grad_norm": 1.7133980989456177,
"learning_rate": 3.27037037037037e-05,
"loss": 0.5397,
"step": 5170
},
{
"epoch": 3.7,
"grad_norm": 1.9661128520965576,
"learning_rate": 3.266666666666667e-05,
"loss": 0.4496,
"step": 5180
},
{
"epoch": 3.7071428571428573,
"grad_norm": 1.444551944732666,
"learning_rate": 3.262962962962963e-05,
"loss": 0.3201,
"step": 5190
},
{
"epoch": 3.7142857142857144,
"grad_norm": 1.7919954061508179,
"learning_rate": 3.25925925925926e-05,
"loss": 0.3721,
"step": 5200
},
{
"epoch": 3.7214285714285715,
"grad_norm": 2.4862735271453857,
"learning_rate": 3.2555555555555555e-05,
"loss": 0.2511,
"step": 5210
},
{
"epoch": 3.7285714285714286,
"grad_norm": 1.0694047212600708,
"learning_rate": 3.251851851851852e-05,
"loss": 0.1418,
"step": 5220
},
{
"epoch": 3.7357142857142858,
"grad_norm": 2.4438931941986084,
"learning_rate": 3.2481481481481485e-05,
"loss": 0.2473,
"step": 5230
},
{
"epoch": 3.742857142857143,
"grad_norm": 1.9673523902893066,
"learning_rate": 3.2444444444444446e-05,
"loss": 0.3251,
"step": 5240
},
{
"epoch": 3.75,
"grad_norm": 2.5299620628356934,
"learning_rate": 3.240740740740741e-05,
"loss": 0.3862,
"step": 5250
},
{
"epoch": 3.757142857142857,
"grad_norm": 1.1709238290786743,
"learning_rate": 3.2370370370370376e-05,
"loss": 0.3156,
"step": 5260
},
{
"epoch": 3.7642857142857142,
"grad_norm": 1.4275505542755127,
"learning_rate": 3.233333333333333e-05,
"loss": 0.3091,
"step": 5270
},
{
"epoch": 3.7714285714285714,
"grad_norm": 1.5278127193450928,
"learning_rate": 3.22962962962963e-05,
"loss": 0.3768,
"step": 5280
},
{
"epoch": 3.7785714285714285,
"grad_norm": 2.870471239089966,
"learning_rate": 3.225925925925926e-05,
"loss": 0.4264,
"step": 5290
},
{
"epoch": 3.7857142857142856,
"grad_norm": 1.4797722101211548,
"learning_rate": 3.222222222222223e-05,
"loss": 0.3598,
"step": 5300
},
{
"epoch": 3.7928571428571427,
"grad_norm": 1.6350576877593994,
"learning_rate": 3.2185185185185184e-05,
"loss": 0.2125,
"step": 5310
},
{
"epoch": 3.8,
"grad_norm": 1.8790502548217773,
"learning_rate": 3.214814814814815e-05,
"loss": 0.2698,
"step": 5320
},
{
"epoch": 3.807142857142857,
"grad_norm": 1.3930083513259888,
"learning_rate": 3.2111111111111114e-05,
"loss": 0.3867,
"step": 5330
},
{
"epoch": 3.814285714285714,
"grad_norm": 1.7605199813842773,
"learning_rate": 3.2074074074074075e-05,
"loss": 0.3594,
"step": 5340
},
{
"epoch": 3.821428571428571,
"grad_norm": 2.3873794078826904,
"learning_rate": 3.203703703703704e-05,
"loss": 0.372,
"step": 5350
},
{
"epoch": 3.8285714285714287,
"grad_norm": 3.087186098098755,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.3964,
"step": 5360
},
{
"epoch": 3.835714285714286,
"grad_norm": 1.6758490800857544,
"learning_rate": 3.196296296296297e-05,
"loss": 0.3274,
"step": 5370
},
{
"epoch": 3.842857142857143,
"grad_norm": 1.184205412864685,
"learning_rate": 3.192592592592593e-05,
"loss": 0.277,
"step": 5380
},
{
"epoch": 3.85,
"grad_norm": 2.1282460689544678,
"learning_rate": 3.188888888888889e-05,
"loss": 0.3283,
"step": 5390
},
{
"epoch": 3.857142857142857,
"grad_norm": 1.9244283437728882,
"learning_rate": 3.185185185185185e-05,
"loss": 0.2732,
"step": 5400
},
{
"epoch": 3.8642857142857143,
"grad_norm": 1.2328709363937378,
"learning_rate": 3.181481481481481e-05,
"loss": 0.2968,
"step": 5410
},
{
"epoch": 3.8714285714285714,
"grad_norm": 2.5490071773529053,
"learning_rate": 3.177777777777778e-05,
"loss": 0.3258,
"step": 5420
},
{
"epoch": 3.8785714285714286,
"grad_norm": 1.7774560451507568,
"learning_rate": 3.174074074074074e-05,
"loss": 0.3274,
"step": 5430
},
{
"epoch": 3.8857142857142857,
"grad_norm": 0.9900962710380554,
"learning_rate": 3.1703703703703705e-05,
"loss": 0.3361,
"step": 5440
},
{
"epoch": 3.892857142857143,
"grad_norm": 1.2809844017028809,
"learning_rate": 3.1666666666666666e-05,
"loss": 0.3684,
"step": 5450
},
{
"epoch": 3.9,
"grad_norm": 2.2611334323883057,
"learning_rate": 3.1629629629629634e-05,
"loss": 0.326,
"step": 5460
},
{
"epoch": 3.907142857142857,
"grad_norm": 2.49057936668396,
"learning_rate": 3.1592592592592596e-05,
"loss": 0.412,
"step": 5470
},
{
"epoch": 3.914285714285714,
"grad_norm": 1.6978118419647217,
"learning_rate": 3.155555555555556e-05,
"loss": 0.2177,
"step": 5480
},
{
"epoch": 3.9214285714285713,
"grad_norm": 1.847128987312317,
"learning_rate": 3.151851851851852e-05,
"loss": 0.3419,
"step": 5490
},
{
"epoch": 3.928571428571429,
"grad_norm": 1.6806657314300537,
"learning_rate": 3.148148148148148e-05,
"loss": 0.1479,
"step": 5500
},
{
"epoch": 3.935714285714286,
"grad_norm": 2.144227981567383,
"learning_rate": 3.144444444444445e-05,
"loss": 0.3098,
"step": 5510
},
{
"epoch": 3.942857142857143,
"grad_norm": 1.2945857048034668,
"learning_rate": 3.140740740740741e-05,
"loss": 0.269,
"step": 5520
},
{
"epoch": 3.95,
"grad_norm": 1.8362900018692017,
"learning_rate": 3.137037037037037e-05,
"loss": 0.3065,
"step": 5530
},
{
"epoch": 3.9571428571428573,
"grad_norm": 1.9124987125396729,
"learning_rate": 3.1333333333333334e-05,
"loss": 0.2593,
"step": 5540
},
{
"epoch": 3.9642857142857144,
"grad_norm": 1.726523995399475,
"learning_rate": 3.1296296296296295e-05,
"loss": 0.3112,
"step": 5550
},
{
"epoch": 3.9714285714285715,
"grad_norm": 1.5914565324783325,
"learning_rate": 3.1259259259259264e-05,
"loss": 0.263,
"step": 5560
},
{
"epoch": 3.9785714285714286,
"grad_norm": 1.3533891439437866,
"learning_rate": 3.1222222222222225e-05,
"loss": 0.3852,
"step": 5570
},
{
"epoch": 3.9857142857142858,
"grad_norm": 2.1844253540039062,
"learning_rate": 3.118518518518519e-05,
"loss": 0.3761,
"step": 5580
},
{
"epoch": 3.992857142857143,
"grad_norm": 2.494920492172241,
"learning_rate": 3.114814814814815e-05,
"loss": 0.3882,
"step": 5590
},
{
"epoch": 4.0,
"grad_norm": 0.9914864897727966,
"learning_rate": 3.111111111111111e-05,
"loss": 0.3518,
"step": 5600
},
{
"epoch": 4.007142857142857,
"grad_norm": 1.6416865587234497,
"learning_rate": 3.107407407407408e-05,
"loss": 0.2688,
"step": 5610
},
{
"epoch": 4.014285714285714,
"grad_norm": 1.934449315071106,
"learning_rate": 3.103703703703704e-05,
"loss": 0.2385,
"step": 5620
},
{
"epoch": 4.021428571428571,
"grad_norm": 1.7663776874542236,
"learning_rate": 3.1e-05,
"loss": 0.3147,
"step": 5630
},
{
"epoch": 4.0285714285714285,
"grad_norm": 1.8457096815109253,
"learning_rate": 3.096296296296296e-05,
"loss": 0.2922,
"step": 5640
},
{
"epoch": 4.035714285714286,
"grad_norm": 1.133711338043213,
"learning_rate": 3.0925925925925924e-05,
"loss": 0.2291,
"step": 5650
},
{
"epoch": 4.042857142857143,
"grad_norm": 1.794723629951477,
"learning_rate": 3.088888888888889e-05,
"loss": 0.3204,
"step": 5660
},
{
"epoch": 4.05,
"grad_norm": 1.966180443763733,
"learning_rate": 3.0851851851851854e-05,
"loss": 0.2757,
"step": 5670
},
{
"epoch": 4.057142857142857,
"grad_norm": 0.789313018321991,
"learning_rate": 3.0814814814814816e-05,
"loss": 0.3106,
"step": 5680
},
{
"epoch": 4.064285714285714,
"grad_norm": 1.4390606880187988,
"learning_rate": 3.077777777777778e-05,
"loss": 0.192,
"step": 5690
},
{
"epoch": 4.071428571428571,
"grad_norm": 1.8229310512542725,
"learning_rate": 3.074074074074074e-05,
"loss": 0.3802,
"step": 5700
},
{
"epoch": 4.078571428571428,
"grad_norm": 1.3065968751907349,
"learning_rate": 3.070370370370371e-05,
"loss": 0.2891,
"step": 5710
},
{
"epoch": 4.085714285714285,
"grad_norm": 1.5169206857681274,
"learning_rate": 3.066666666666667e-05,
"loss": 0.2818,
"step": 5720
},
{
"epoch": 4.0928571428571425,
"grad_norm": 1.8811321258544922,
"learning_rate": 3.062962962962963e-05,
"loss": 0.1845,
"step": 5730
},
{
"epoch": 4.1,
"grad_norm": 2.2235770225524902,
"learning_rate": 3.059259259259259e-05,
"loss": 0.3671,
"step": 5740
},
{
"epoch": 4.107142857142857,
"grad_norm": 1.5675430297851562,
"learning_rate": 3.055555555555556e-05,
"loss": 0.3588,
"step": 5750
},
{
"epoch": 4.114285714285714,
"grad_norm": 1.3254741430282593,
"learning_rate": 3.0518518518518515e-05,
"loss": 0.3641,
"step": 5760
},
{
"epoch": 4.121428571428571,
"grad_norm": 2.601593017578125,
"learning_rate": 3.0481481481481484e-05,
"loss": 0.2704,
"step": 5770
},
{
"epoch": 4.128571428571428,
"grad_norm": 2.3631677627563477,
"learning_rate": 3.044444444444445e-05,
"loss": 0.2528,
"step": 5780
},
{
"epoch": 4.135714285714286,
"grad_norm": 1.4800968170166016,
"learning_rate": 3.0407407407407407e-05,
"loss": 0.263,
"step": 5790
},
{
"epoch": 4.142857142857143,
"grad_norm": 1.6989574432373047,
"learning_rate": 3.037037037037037e-05,
"loss": 0.2465,
"step": 5800
},
{
"epoch": 4.15,
"grad_norm": 1.595765471458435,
"learning_rate": 3.0333333333333337e-05,
"loss": 0.3223,
"step": 5810
},
{
"epoch": 4.1571428571428575,
"grad_norm": 1.8895677328109741,
"learning_rate": 3.02962962962963e-05,
"loss": 0.3181,
"step": 5820
},
{
"epoch": 4.164285714285715,
"grad_norm": 1.147406816482544,
"learning_rate": 3.025925925925926e-05,
"loss": 0.2275,
"step": 5830
},
{
"epoch": 4.171428571428572,
"grad_norm": 3.310147523880005,
"learning_rate": 3.0222222222222225e-05,
"loss": 0.3615,
"step": 5840
},
{
"epoch": 4.178571428571429,
"grad_norm": 1.6138179302215576,
"learning_rate": 3.018518518518519e-05,
"loss": 0.3492,
"step": 5850
},
{
"epoch": 4.185714285714286,
"grad_norm": 1.9912358522415161,
"learning_rate": 3.0148148148148148e-05,
"loss": 0.3358,
"step": 5860
},
{
"epoch": 4.192857142857143,
"grad_norm": 2.2521820068359375,
"learning_rate": 3.0111111111111113e-05,
"loss": 0.2773,
"step": 5870
},
{
"epoch": 4.2,
"grad_norm": 1.804829478263855,
"learning_rate": 3.0074074074074078e-05,
"loss": 0.3052,
"step": 5880
},
{
"epoch": 4.207142857142857,
"grad_norm": 1.0897246599197388,
"learning_rate": 3.0037037037037036e-05,
"loss": 0.3822,
"step": 5890
},
{
"epoch": 4.214285714285714,
"grad_norm": 1.337428331375122,
"learning_rate": 3e-05,
"loss": 0.3091,
"step": 5900
},
{
"epoch": 4.2214285714285715,
"grad_norm": 1.1409244537353516,
"learning_rate": 2.9962962962962966e-05,
"loss": 0.2002,
"step": 5910
},
{
"epoch": 4.228571428571429,
"grad_norm": 0.9190034866333008,
"learning_rate": 2.992592592592593e-05,
"loss": 0.3029,
"step": 5920
},
{
"epoch": 4.235714285714286,
"grad_norm": 1.7410012483596802,
"learning_rate": 2.988888888888889e-05,
"loss": 0.2361,
"step": 5930
},
{
"epoch": 4.242857142857143,
"grad_norm": 2.308295965194702,
"learning_rate": 2.9851851851851854e-05,
"loss": 0.3654,
"step": 5940
},
{
"epoch": 4.25,
"grad_norm": 1.299177646636963,
"learning_rate": 2.981481481481482e-05,
"loss": 0.2346,
"step": 5950
},
{
"epoch": 4.257142857142857,
"grad_norm": 1.0352667570114136,
"learning_rate": 2.9777777777777777e-05,
"loss": 0.2331,
"step": 5960
},
{
"epoch": 4.264285714285714,
"grad_norm": 1.0682189464569092,
"learning_rate": 2.9740740740740742e-05,
"loss": 0.2456,
"step": 5970
},
{
"epoch": 4.271428571428571,
"grad_norm": 1.536718487739563,
"learning_rate": 2.9703703703703707e-05,
"loss": 0.1908,
"step": 5980
},
{
"epoch": 4.2785714285714285,
"grad_norm": 2.0448334217071533,
"learning_rate": 2.9666666666666672e-05,
"loss": 0.3399,
"step": 5990
},
{
"epoch": 4.285714285714286,
"grad_norm": 2.205901622772217,
"learning_rate": 2.962962962962963e-05,
"loss": 0.1913,
"step": 6000
},
{
"epoch": 4.285714285714286,
"eval_loss": 0.36299219727516174,
"eval_rouge1": 0.9025,
"eval_rouge2": 0.8402,
"eval_rougeL": 0.8994,
"eval_runtime": 122.2765,
"eval_samples_per_second": 11.449,
"eval_steps_per_second": 5.725,
"step": 6000
},
{
"epoch": 4.292857142857143,
"grad_norm": 1.455069661140442,
"learning_rate": 2.9592592592592595e-05,
"loss": 0.2236,
"step": 6010
},
{
"epoch": 4.3,
"grad_norm": 1.6218276023864746,
"learning_rate": 2.955555555555556e-05,
"loss": 0.2166,
"step": 6020
},
{
"epoch": 4.307142857142857,
"grad_norm": 1.4643278121948242,
"learning_rate": 2.9518518518518518e-05,
"loss": 0.2543,
"step": 6030
},
{
"epoch": 4.314285714285714,
"grad_norm": 1.9875061511993408,
"learning_rate": 2.9481481481481483e-05,
"loss": 0.275,
"step": 6040
},
{
"epoch": 4.321428571428571,
"grad_norm": 2.003077268600464,
"learning_rate": 2.9444444444444448e-05,
"loss": 0.3431,
"step": 6050
},
{
"epoch": 4.328571428571428,
"grad_norm": 1.332705020904541,
"learning_rate": 2.9407407407407413e-05,
"loss": 0.2546,
"step": 6060
},
{
"epoch": 4.335714285714285,
"grad_norm": 1.9161280393600464,
"learning_rate": 2.937037037037037e-05,
"loss": 0.2909,
"step": 6070
},
{
"epoch": 4.3428571428571425,
"grad_norm": 1.509238839149475,
"learning_rate": 2.9333333333333336e-05,
"loss": 0.253,
"step": 6080
},
{
"epoch": 4.35,
"grad_norm": 2.238847255706787,
"learning_rate": 2.92962962962963e-05,
"loss": 0.2717,
"step": 6090
},
{
"epoch": 4.357142857142857,
"grad_norm": 1.9578133821487427,
"learning_rate": 2.925925925925926e-05,
"loss": 0.3407,
"step": 6100
},
{
"epoch": 4.364285714285714,
"grad_norm": 1.805828332901001,
"learning_rate": 2.9222222222222224e-05,
"loss": 0.1811,
"step": 6110
},
{
"epoch": 4.371428571428572,
"grad_norm": 2.9014134407043457,
"learning_rate": 2.918518518518519e-05,
"loss": 0.3934,
"step": 6120
},
{
"epoch": 4.378571428571428,
"grad_norm": 1.9857615232467651,
"learning_rate": 2.914814814814815e-05,
"loss": 0.2026,
"step": 6130
},
{
"epoch": 4.385714285714286,
"grad_norm": 2.3884503841400146,
"learning_rate": 2.9111111111111112e-05,
"loss": 0.2787,
"step": 6140
},
{
"epoch": 4.392857142857143,
"grad_norm": 2.298215866088867,
"learning_rate": 2.9074074074074077e-05,
"loss": 0.2765,
"step": 6150
},
{
"epoch": 4.4,
"grad_norm": 2.1733076572418213,
"learning_rate": 2.9037037037037042e-05,
"loss": 0.3975,
"step": 6160
},
{
"epoch": 4.4071428571428575,
"grad_norm": 3.3003320693969727,
"learning_rate": 2.9e-05,
"loss": 0.4152,
"step": 6170
},
{
"epoch": 4.414285714285715,
"grad_norm": 1.5066970586776733,
"learning_rate": 2.8962962962962965e-05,
"loss": 0.345,
"step": 6180
},
{
"epoch": 4.421428571428572,
"grad_norm": 2.134096145629883,
"learning_rate": 2.892592592592593e-05,
"loss": 0.3154,
"step": 6190
},
{
"epoch": 4.428571428571429,
"grad_norm": 1.8306220769882202,
"learning_rate": 2.8888888888888888e-05,
"loss": 0.2908,
"step": 6200
},
{
"epoch": 4.435714285714286,
"grad_norm": 1.4300037622451782,
"learning_rate": 2.8851851851851853e-05,
"loss": 0.342,
"step": 6210
},
{
"epoch": 4.442857142857143,
"grad_norm": 1.6552793979644775,
"learning_rate": 2.8814814814814818e-05,
"loss": 0.2856,
"step": 6220
},
{
"epoch": 4.45,
"grad_norm": 2.188889265060425,
"learning_rate": 2.877777777777778e-05,
"loss": 0.25,
"step": 6230
},
{
"epoch": 4.457142857142857,
"grad_norm": 1.3003034591674805,
"learning_rate": 2.874074074074074e-05,
"loss": 0.2995,
"step": 6240
},
{
"epoch": 4.464285714285714,
"grad_norm": 1.834549903869629,
"learning_rate": 2.8703703703703706e-05,
"loss": 0.3726,
"step": 6250
},
{
"epoch": 4.4714285714285715,
"grad_norm": 1.9426199197769165,
"learning_rate": 2.8666666666666668e-05,
"loss": 0.2142,
"step": 6260
},
{
"epoch": 4.478571428571429,
"grad_norm": 1.5088646411895752,
"learning_rate": 2.862962962962963e-05,
"loss": 0.3584,
"step": 6270
},
{
"epoch": 4.485714285714286,
"grad_norm": 1.9997400045394897,
"learning_rate": 2.8592592592592594e-05,
"loss": 0.2402,
"step": 6280
},
{
"epoch": 4.492857142857143,
"grad_norm": 1.3831549882888794,
"learning_rate": 2.855555555555556e-05,
"loss": 0.312,
"step": 6290
},
{
"epoch": 4.5,
"grad_norm": 2.013425588607788,
"learning_rate": 2.851851851851852e-05,
"loss": 0.2728,
"step": 6300
},
{
"epoch": 4.507142857142857,
"grad_norm": 1.1200778484344482,
"learning_rate": 2.8481481481481482e-05,
"loss": 0.3909,
"step": 6310
},
{
"epoch": 4.514285714285714,
"grad_norm": 0.8029781579971313,
"learning_rate": 2.8444444444444447e-05,
"loss": 0.3491,
"step": 6320
},
{
"epoch": 4.521428571428571,
"grad_norm": 1.4999722242355347,
"learning_rate": 2.840740740740741e-05,
"loss": 0.2583,
"step": 6330
},
{
"epoch": 4.5285714285714285,
"grad_norm": 1.8954156637191772,
"learning_rate": 2.837037037037037e-05,
"loss": 0.3971,
"step": 6340
},
{
"epoch": 4.535714285714286,
"grad_norm": 1.5697578191757202,
"learning_rate": 2.8333333333333335e-05,
"loss": 0.3222,
"step": 6350
},
{
"epoch": 4.542857142857143,
"grad_norm": 0.9937646389007568,
"learning_rate": 2.8296296296296297e-05,
"loss": 0.3673,
"step": 6360
},
{
"epoch": 4.55,
"grad_norm": 1.935511589050293,
"learning_rate": 2.8259259259259262e-05,
"loss": 0.2385,
"step": 6370
},
{
"epoch": 4.557142857142857,
"grad_norm": 1.8132340908050537,
"learning_rate": 2.8222222222222223e-05,
"loss": 0.226,
"step": 6380
},
{
"epoch": 4.564285714285714,
"grad_norm": 0.8551497459411621,
"learning_rate": 2.8185185185185185e-05,
"loss": 0.3874,
"step": 6390
},
{
"epoch": 4.571428571428571,
"grad_norm": 2.0115785598754883,
"learning_rate": 2.814814814814815e-05,
"loss": 0.2328,
"step": 6400
},
{
"epoch": 4.578571428571428,
"grad_norm": 1.0582072734832764,
"learning_rate": 2.811111111111111e-05,
"loss": 0.3523,
"step": 6410
},
{
"epoch": 4.585714285714285,
"grad_norm": 1.3484958410263062,
"learning_rate": 2.8074074074074076e-05,
"loss": 0.2867,
"step": 6420
},
{
"epoch": 4.5928571428571425,
"grad_norm": 1.4483561515808105,
"learning_rate": 2.8037037037037038e-05,
"loss": 0.2623,
"step": 6430
},
{
"epoch": 4.6,
"grad_norm": 2.2348268032073975,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.3953,
"step": 6440
},
{
"epoch": 4.607142857142857,
"grad_norm": 2.654326915740967,
"learning_rate": 2.7962962962962965e-05,
"loss": 0.3516,
"step": 6450
},
{
"epoch": 4.614285714285714,
"grad_norm": 0.8564252257347107,
"learning_rate": 2.7925925925925926e-05,
"loss": 0.2497,
"step": 6460
},
{
"epoch": 4.621428571428572,
"grad_norm": 2.7823233604431152,
"learning_rate": 2.788888888888889e-05,
"loss": 0.3975,
"step": 6470
},
{
"epoch": 4.628571428571428,
"grad_norm": 1.0915263891220093,
"learning_rate": 2.7851851851851853e-05,
"loss": 0.2574,
"step": 6480
},
{
"epoch": 4.635714285714286,
"grad_norm": 1.0459774732589722,
"learning_rate": 2.7814814814814814e-05,
"loss": 0.3426,
"step": 6490
},
{
"epoch": 4.642857142857143,
"grad_norm": 3.1720130443573,
"learning_rate": 2.777777777777778e-05,
"loss": 0.3155,
"step": 6500
},
{
"epoch": 4.65,
"grad_norm": 1.499185562133789,
"learning_rate": 2.774074074074074e-05,
"loss": 0.4515,
"step": 6510
},
{
"epoch": 4.6571428571428575,
"grad_norm": 2.4211909770965576,
"learning_rate": 2.7703703703703706e-05,
"loss": 0.2963,
"step": 6520
},
{
"epoch": 4.664285714285715,
"grad_norm": 2.167006492614746,
"learning_rate": 2.7666666666666667e-05,
"loss": 0.2625,
"step": 6530
},
{
"epoch": 4.671428571428572,
"grad_norm": 1.8955094814300537,
"learning_rate": 2.7629629629629632e-05,
"loss": 0.3374,
"step": 6540
},
{
"epoch": 4.678571428571429,
"grad_norm": 0.9967934489250183,
"learning_rate": 2.7592592592592594e-05,
"loss": 0.1611,
"step": 6550
},
{
"epoch": 4.685714285714286,
"grad_norm": 1.007778525352478,
"learning_rate": 2.7555555555555555e-05,
"loss": 0.2516,
"step": 6560
},
{
"epoch": 4.692857142857143,
"grad_norm": 2.9705958366394043,
"learning_rate": 2.751851851851852e-05,
"loss": 0.3893,
"step": 6570
},
{
"epoch": 4.7,
"grad_norm": 2.689723491668701,
"learning_rate": 2.7481481481481482e-05,
"loss": 0.2404,
"step": 6580
},
{
"epoch": 4.707142857142857,
"grad_norm": 2.095930337905884,
"learning_rate": 2.7444444444444443e-05,
"loss": 0.3239,
"step": 6590
},
{
"epoch": 4.714285714285714,
"grad_norm": 1.9235697984695435,
"learning_rate": 2.7407407407407408e-05,
"loss": 0.2779,
"step": 6600
},
{
"epoch": 4.7214285714285715,
"grad_norm": 3.329378843307495,
"learning_rate": 2.7370370370370373e-05,
"loss": 0.2791,
"step": 6610
},
{
"epoch": 4.728571428571429,
"grad_norm": 1.9044978618621826,
"learning_rate": 2.733333333333333e-05,
"loss": 0.3757,
"step": 6620
},
{
"epoch": 4.735714285714286,
"grad_norm": 2.207752227783203,
"learning_rate": 2.7296296296296296e-05,
"loss": 0.3391,
"step": 6630
},
{
"epoch": 4.742857142857143,
"grad_norm": 2.0488827228546143,
"learning_rate": 2.725925925925926e-05,
"loss": 0.396,
"step": 6640
},
{
"epoch": 4.75,
"grad_norm": 2.425340414047241,
"learning_rate": 2.7222222222222223e-05,
"loss": 0.2871,
"step": 6650
},
{
"epoch": 4.757142857142857,
"grad_norm": 1.9408286809921265,
"learning_rate": 2.7185185185185184e-05,
"loss": 0.3144,
"step": 6660
},
{
"epoch": 4.764285714285714,
"grad_norm": 1.864397406578064,
"learning_rate": 2.714814814814815e-05,
"loss": 0.2685,
"step": 6670
},
{
"epoch": 4.771428571428571,
"grad_norm": 1.1838607788085938,
"learning_rate": 2.7111111111111114e-05,
"loss": 0.2751,
"step": 6680
},
{
"epoch": 4.7785714285714285,
"grad_norm": 2.26408052444458,
"learning_rate": 2.7074074074074072e-05,
"loss": 0.3158,
"step": 6690
},
{
"epoch": 4.785714285714286,
"grad_norm": 2.007145404815674,
"learning_rate": 2.7037037037037037e-05,
"loss": 0.1969,
"step": 6700
},
{
"epoch": 4.792857142857143,
"grad_norm": 2.5209295749664307,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.3022,
"step": 6710
},
{
"epoch": 4.8,
"grad_norm": 2.3263044357299805,
"learning_rate": 2.696296296296296e-05,
"loss": 0.3799,
"step": 6720
},
{
"epoch": 4.807142857142857,
"grad_norm": 1.3880634307861328,
"learning_rate": 2.6925925925925925e-05,
"loss": 0.2829,
"step": 6730
},
{
"epoch": 4.814285714285714,
"grad_norm": 2.0264179706573486,
"learning_rate": 2.688888888888889e-05,
"loss": 0.2754,
"step": 6740
},
{
"epoch": 4.821428571428571,
"grad_norm": 1.6165140867233276,
"learning_rate": 2.6851851851851855e-05,
"loss": 0.3171,
"step": 6750
},
{
"epoch": 4.828571428571428,
"grad_norm": 1.6405526399612427,
"learning_rate": 2.6814814814814814e-05,
"loss": 0.4082,
"step": 6760
},
{
"epoch": 4.835714285714285,
"grad_norm": 1.6864060163497925,
"learning_rate": 2.677777777777778e-05,
"loss": 0.2026,
"step": 6770
},
{
"epoch": 4.8428571428571425,
"grad_norm": 1.4906965494155884,
"learning_rate": 2.6740740740740743e-05,
"loss": 0.2582,
"step": 6780
},
{
"epoch": 4.85,
"grad_norm": 1.2227530479431152,
"learning_rate": 2.67037037037037e-05,
"loss": 0.185,
"step": 6790
},
{
"epoch": 4.857142857142857,
"grad_norm": 1.2606697082519531,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.2651,
"step": 6800
},
{
"epoch": 4.864285714285714,
"grad_norm": 2.3722660541534424,
"learning_rate": 2.662962962962963e-05,
"loss": 0.2746,
"step": 6810
},
{
"epoch": 4.871428571428572,
"grad_norm": 1.8622608184814453,
"learning_rate": 2.659259259259259e-05,
"loss": 0.3473,
"step": 6820
},
{
"epoch": 4.878571428571428,
"grad_norm": 1.3814878463745117,
"learning_rate": 2.6555555555555555e-05,
"loss": 0.2706,
"step": 6830
},
{
"epoch": 4.885714285714286,
"grad_norm": 2.013650894165039,
"learning_rate": 2.651851851851852e-05,
"loss": 0.2802,
"step": 6840
},
{
"epoch": 4.892857142857143,
"grad_norm": 1.467282772064209,
"learning_rate": 2.6481481481481485e-05,
"loss": 0.3158,
"step": 6850
},
{
"epoch": 4.9,
"grad_norm": 1.3019797801971436,
"learning_rate": 2.6444444444444443e-05,
"loss": 0.2012,
"step": 6860
},
{
"epoch": 4.9071428571428575,
"grad_norm": 1.1120600700378418,
"learning_rate": 2.6407407407407408e-05,
"loss": 0.1385,
"step": 6870
},
{
"epoch": 4.914285714285715,
"grad_norm": 1.470406413078308,
"learning_rate": 2.6370370370370373e-05,
"loss": 0.3014,
"step": 6880
},
{
"epoch": 4.921428571428572,
"grad_norm": 2.237767457962036,
"learning_rate": 2.633333333333333e-05,
"loss": 0.2677,
"step": 6890
},
{
"epoch": 4.928571428571429,
"grad_norm": 1.3994693756103516,
"learning_rate": 2.6296296296296296e-05,
"loss": 0.4261,
"step": 6900
},
{
"epoch": 4.935714285714286,
"grad_norm": 2.21905517578125,
"learning_rate": 2.625925925925926e-05,
"loss": 0.3701,
"step": 6910
},
{
"epoch": 4.942857142857143,
"grad_norm": 2.8682186603546143,
"learning_rate": 2.6222222222222226e-05,
"loss": 0.4047,
"step": 6920
},
{
"epoch": 4.95,
"grad_norm": 1.9691041707992554,
"learning_rate": 2.6185185185185184e-05,
"loss": 0.2735,
"step": 6930
},
{
"epoch": 4.957142857142857,
"grad_norm": 1.7553354501724243,
"learning_rate": 2.614814814814815e-05,
"loss": 0.2381,
"step": 6940
},
{
"epoch": 4.964285714285714,
"grad_norm": 1.7930738925933838,
"learning_rate": 2.6111111111111114e-05,
"loss": 0.2838,
"step": 6950
},
{
"epoch": 4.9714285714285715,
"grad_norm": 2.4153687953948975,
"learning_rate": 2.6074074074074072e-05,
"loss": 0.4002,
"step": 6960
},
{
"epoch": 4.978571428571429,
"grad_norm": 1.392898678779602,
"learning_rate": 2.6037037037037037e-05,
"loss": 0.248,
"step": 6970
},
{
"epoch": 4.985714285714286,
"grad_norm": 1.7113401889801025,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.2871,
"step": 6980
},
{
"epoch": 4.992857142857143,
"grad_norm": 2.4877359867095947,
"learning_rate": 2.5962962962962967e-05,
"loss": 0.2443,
"step": 6990
},
{
"epoch": 5.0,
"grad_norm": 1.7225149869918823,
"learning_rate": 2.5925925925925925e-05,
"loss": 0.186,
"step": 7000
},
{
"epoch": 5.0,
"eval_loss": 0.3584790527820587,
"eval_rouge1": 0.9047,
"eval_rouge2": 0.8434,
"eval_rougeL": 0.9018,
"eval_runtime": 122.2903,
"eval_samples_per_second": 11.448,
"eval_steps_per_second": 5.724,
"step": 7000
},
{
"epoch": 5.007142857142857,
"grad_norm": 2.1430020332336426,
"learning_rate": 2.588888888888889e-05,
"loss": 0.3477,
"step": 7010
},
{
"epoch": 5.014285714285714,
"grad_norm": 0.958677351474762,
"learning_rate": 2.5851851851851855e-05,
"loss": 0.2474,
"step": 7020
},
{
"epoch": 5.021428571428571,
"grad_norm": 2.315269947052002,
"learning_rate": 2.5814814814814813e-05,
"loss": 0.2786,
"step": 7030
},
{
"epoch": 5.0285714285714285,
"grad_norm": 1.3595519065856934,
"learning_rate": 2.5777777777777778e-05,
"loss": 0.2286,
"step": 7040
},
{
"epoch": 5.035714285714286,
"grad_norm": 1.44675874710083,
"learning_rate": 2.5740740740740743e-05,
"loss": 0.2679,
"step": 7050
},
{
"epoch": 5.042857142857143,
"grad_norm": 1.754285454750061,
"learning_rate": 2.5703703703703708e-05,
"loss": 0.196,
"step": 7060
},
{
"epoch": 5.05,
"grad_norm": 2.9333369731903076,
"learning_rate": 2.5666666666666666e-05,
"loss": 0.1694,
"step": 7070
},
{
"epoch": 5.057142857142857,
"grad_norm": 2.6653859615325928,
"learning_rate": 2.562962962962963e-05,
"loss": 0.2642,
"step": 7080
},
{
"epoch": 5.064285714285714,
"grad_norm": 1.8362854719161987,
"learning_rate": 2.5592592592592596e-05,
"loss": 0.3614,
"step": 7090
},
{
"epoch": 5.071428571428571,
"grad_norm": 1.427701473236084,
"learning_rate": 2.5555555555555554e-05,
"loss": 0.2351,
"step": 7100
},
{
"epoch": 5.078571428571428,
"grad_norm": 2.3684027194976807,
"learning_rate": 2.551851851851852e-05,
"loss": 0.2803,
"step": 7110
},
{
"epoch": 5.085714285714285,
"grad_norm": 1.5823931694030762,
"learning_rate": 2.5481481481481484e-05,
"loss": 0.2749,
"step": 7120
},
{
"epoch": 5.0928571428571425,
"grad_norm": 1.6682019233703613,
"learning_rate": 2.5444444444444442e-05,
"loss": 0.3219,
"step": 7130
},
{
"epoch": 5.1,
"grad_norm": 1.7803760766983032,
"learning_rate": 2.5407407407407407e-05,
"loss": 0.2553,
"step": 7140
},
{
"epoch": 5.107142857142857,
"grad_norm": 1.945063591003418,
"learning_rate": 2.5370370370370372e-05,
"loss": 0.1739,
"step": 7150
},
{
"epoch": 5.114285714285714,
"grad_norm": 1.308371663093567,
"learning_rate": 2.5333333333333337e-05,
"loss": 0.2605,
"step": 7160
},
{
"epoch": 5.121428571428571,
"grad_norm": 1.906160593032837,
"learning_rate": 2.5296296296296295e-05,
"loss": 0.2071,
"step": 7170
},
{
"epoch": 5.128571428571428,
"grad_norm": 1.6239346265792847,
"learning_rate": 2.525925925925926e-05,
"loss": 0.2054,
"step": 7180
},
{
"epoch": 5.135714285714286,
"grad_norm": 1.6175967454910278,
"learning_rate": 2.5222222222222225e-05,
"loss": 0.2266,
"step": 7190
},
{
"epoch": 5.142857142857143,
"grad_norm": 1.938736915588379,
"learning_rate": 2.5185185185185183e-05,
"loss": 0.2932,
"step": 7200
},
{
"epoch": 5.15,
"grad_norm": 1.7323144674301147,
"learning_rate": 2.5148148148148148e-05,
"loss": 0.2762,
"step": 7210
},
{
"epoch": 5.1571428571428575,
"grad_norm": 1.859667181968689,
"learning_rate": 2.5111111111111113e-05,
"loss": 0.3213,
"step": 7220
},
{
"epoch": 5.164285714285715,
"grad_norm": 1.22067391872406,
"learning_rate": 2.5074074074074078e-05,
"loss": 0.2246,
"step": 7230
},
{
"epoch": 5.171428571428572,
"grad_norm": 0.9384840726852417,
"learning_rate": 2.5037037037037036e-05,
"loss": 0.3364,
"step": 7240
},
{
"epoch": 5.178571428571429,
"grad_norm": 1.4494845867156982,
"learning_rate": 2.5e-05,
"loss": 0.332,
"step": 7250
},
{
"epoch": 5.185714285714286,
"grad_norm": 2.3436357975006104,
"learning_rate": 2.4962962962962963e-05,
"loss": 0.1456,
"step": 7260
},
{
"epoch": 5.192857142857143,
"grad_norm": 1.0446144342422485,
"learning_rate": 2.4925925925925928e-05,
"loss": 0.1995,
"step": 7270
},
{
"epoch": 5.2,
"grad_norm": 2.325575113296509,
"learning_rate": 2.488888888888889e-05,
"loss": 0.3068,
"step": 7280
},
{
"epoch": 5.207142857142857,
"grad_norm": 2.100825309753418,
"learning_rate": 2.4851851851851854e-05,
"loss": 0.2659,
"step": 7290
},
{
"epoch": 5.214285714285714,
"grad_norm": 2.6580276489257812,
"learning_rate": 2.4814814814814816e-05,
"loss": 0.2872,
"step": 7300
},
{
"epoch": 5.2214285714285715,
"grad_norm": 2.505577564239502,
"learning_rate": 2.477777777777778e-05,
"loss": 0.2574,
"step": 7310
},
{
"epoch": 5.228571428571429,
"grad_norm": 1.4997559785842896,
"learning_rate": 2.4740740740740742e-05,
"loss": 0.2192,
"step": 7320
},
{
"epoch": 5.235714285714286,
"grad_norm": 1.9084120988845825,
"learning_rate": 2.4703703703703704e-05,
"loss": 0.2836,
"step": 7330
},
{
"epoch": 5.242857142857143,
"grad_norm": 1.1388484239578247,
"learning_rate": 2.466666666666667e-05,
"loss": 0.2426,
"step": 7340
},
{
"epoch": 5.25,
"grad_norm": 1.0559568405151367,
"learning_rate": 2.462962962962963e-05,
"loss": 0.344,
"step": 7350
},
{
"epoch": 5.257142857142857,
"grad_norm": 1.4024419784545898,
"learning_rate": 2.4592592592592595e-05,
"loss": 0.2121,
"step": 7360
},
{
"epoch": 5.264285714285714,
"grad_norm": 1.4338841438293457,
"learning_rate": 2.4555555555555557e-05,
"loss": 0.3329,
"step": 7370
},
{
"epoch": 5.271428571428571,
"grad_norm": 1.4188106060028076,
"learning_rate": 2.451851851851852e-05,
"loss": 0.2479,
"step": 7380
},
{
"epoch": 5.2785714285714285,
"grad_norm": 1.4320842027664185,
"learning_rate": 2.4481481481481483e-05,
"loss": 0.156,
"step": 7390
},
{
"epoch": 5.285714285714286,
"grad_norm": 3.022641181945801,
"learning_rate": 2.4444444444444445e-05,
"loss": 0.1962,
"step": 7400
},
{
"epoch": 5.292857142857143,
"grad_norm": 2.3267366886138916,
"learning_rate": 2.440740740740741e-05,
"loss": 0.2713,
"step": 7410
},
{
"epoch": 5.3,
"grad_norm": 2.685345411300659,
"learning_rate": 2.437037037037037e-05,
"loss": 0.3345,
"step": 7420
},
{
"epoch": 5.307142857142857,
"grad_norm": 0.9320240020751953,
"learning_rate": 2.4333333333333336e-05,
"loss": 0.3758,
"step": 7430
},
{
"epoch": 5.314285714285714,
"grad_norm": 1.8067562580108643,
"learning_rate": 2.4296296296296298e-05,
"loss": 0.2958,
"step": 7440
},
{
"epoch": 5.321428571428571,
"grad_norm": 1.5514296293258667,
"learning_rate": 2.425925925925926e-05,
"loss": 0.3268,
"step": 7450
},
{
"epoch": 5.328571428571428,
"grad_norm": 1.684311032295227,
"learning_rate": 2.4222222222222224e-05,
"loss": 0.2947,
"step": 7460
},
{
"epoch": 5.335714285714285,
"grad_norm": 2.0809545516967773,
"learning_rate": 2.4185185185185186e-05,
"loss": 0.2928,
"step": 7470
},
{
"epoch": 5.3428571428571425,
"grad_norm": 2.5362987518310547,
"learning_rate": 2.414814814814815e-05,
"loss": 0.1962,
"step": 7480
},
{
"epoch": 5.35,
"grad_norm": 0.636965274810791,
"learning_rate": 2.4111111111111113e-05,
"loss": 0.1694,
"step": 7490
},
{
"epoch": 5.357142857142857,
"grad_norm": 2.1662261486053467,
"learning_rate": 2.4074074074074074e-05,
"loss": 0.3111,
"step": 7500
},
{
"epoch": 5.364285714285714,
"grad_norm": 1.749324083328247,
"learning_rate": 2.403703703703704e-05,
"loss": 0.2521,
"step": 7510
},
{
"epoch": 5.371428571428572,
"grad_norm": 2.3572323322296143,
"learning_rate": 2.4e-05,
"loss": 0.1527,
"step": 7520
},
{
"epoch": 5.378571428571428,
"grad_norm": 1.274588942527771,
"learning_rate": 2.3962962962962966e-05,
"loss": 0.2757,
"step": 7530
},
{
"epoch": 5.385714285714286,
"grad_norm": 1.2197136878967285,
"learning_rate": 2.3925925925925927e-05,
"loss": 0.2288,
"step": 7540
},
{
"epoch": 5.392857142857143,
"grad_norm": 1.6061832904815674,
"learning_rate": 2.3888888888888892e-05,
"loss": 0.3292,
"step": 7550
},
{
"epoch": 5.4,
"grad_norm": 1.8271028995513916,
"learning_rate": 2.3851851851851854e-05,
"loss": 0.2392,
"step": 7560
},
{
"epoch": 5.4071428571428575,
"grad_norm": 1.8294018507003784,
"learning_rate": 2.3814814814814815e-05,
"loss": 0.2554,
"step": 7570
},
{
"epoch": 5.414285714285715,
"grad_norm": 1.253556728363037,
"learning_rate": 2.377777777777778e-05,
"loss": 0.2008,
"step": 7580
},
{
"epoch": 5.421428571428572,
"grad_norm": 1.1980758905410767,
"learning_rate": 2.3740740740740742e-05,
"loss": 0.265,
"step": 7590
},
{
"epoch": 5.428571428571429,
"grad_norm": 1.5337406396865845,
"learning_rate": 2.3703703703703707e-05,
"loss": 0.4126,
"step": 7600
},
{
"epoch": 5.435714285714286,
"grad_norm": 2.981381893157959,
"learning_rate": 2.3666666666666668e-05,
"loss": 0.3554,
"step": 7610
},
{
"epoch": 5.442857142857143,
"grad_norm": 1.927241325378418,
"learning_rate": 2.3629629629629633e-05,
"loss": 0.3148,
"step": 7620
},
{
"epoch": 5.45,
"grad_norm": 1.0788408517837524,
"learning_rate": 2.3592592592592595e-05,
"loss": 0.2421,
"step": 7630
},
{
"epoch": 5.457142857142857,
"grad_norm": 1.250436782836914,
"learning_rate": 2.3555555555555556e-05,
"loss": 0.2797,
"step": 7640
},
{
"epoch": 5.464285714285714,
"grad_norm": 1.2195000648498535,
"learning_rate": 2.351851851851852e-05,
"loss": 0.1702,
"step": 7650
},
{
"epoch": 5.4714285714285715,
"grad_norm": 1.773098349571228,
"learning_rate": 2.3481481481481483e-05,
"loss": 0.2383,
"step": 7660
},
{
"epoch": 5.478571428571429,
"grad_norm": 1.540499210357666,
"learning_rate": 2.3444444444444448e-05,
"loss": 0.2741,
"step": 7670
},
{
"epoch": 5.485714285714286,
"grad_norm": 1.3515613079071045,
"learning_rate": 2.340740740740741e-05,
"loss": 0.4365,
"step": 7680
},
{
"epoch": 5.492857142857143,
"grad_norm": 1.5094635486602783,
"learning_rate": 2.337037037037037e-05,
"loss": 0.2777,
"step": 7690
},
{
"epoch": 5.5,
"grad_norm": 1.123542070388794,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.3406,
"step": 7700
},
{
"epoch": 5.507142857142857,
"grad_norm": 1.0701942443847656,
"learning_rate": 2.3296296296296297e-05,
"loss": 0.2499,
"step": 7710
},
{
"epoch": 5.514285714285714,
"grad_norm": 1.270992636680603,
"learning_rate": 2.3259259259259262e-05,
"loss": 0.2044,
"step": 7720
},
{
"epoch": 5.521428571428571,
"grad_norm": 1.5586347579956055,
"learning_rate": 2.3222222222222224e-05,
"loss": 0.2573,
"step": 7730
},
{
"epoch": 5.5285714285714285,
"grad_norm": 0.9162809252738953,
"learning_rate": 2.318518518518519e-05,
"loss": 0.2245,
"step": 7740
},
{
"epoch": 5.535714285714286,
"grad_norm": 1.7767843008041382,
"learning_rate": 2.314814814814815e-05,
"loss": 0.276,
"step": 7750
},
{
"epoch": 5.542857142857143,
"grad_norm": 2.538541316986084,
"learning_rate": 2.3111111111111112e-05,
"loss": 0.3448,
"step": 7760
},
{
"epoch": 5.55,
"grad_norm": 1.5738705396652222,
"learning_rate": 2.3074074074074077e-05,
"loss": 0.3023,
"step": 7770
},
{
"epoch": 5.557142857142857,
"grad_norm": 0.9919751286506653,
"learning_rate": 2.303703703703704e-05,
"loss": 0.2979,
"step": 7780
},
{
"epoch": 5.564285714285714,
"grad_norm": 1.079817771911621,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.3141,
"step": 7790
},
{
"epoch": 5.571428571428571,
"grad_norm": 1.709007978439331,
"learning_rate": 2.2962962962962965e-05,
"loss": 0.2452,
"step": 7800
},
{
"epoch": 5.578571428571428,
"grad_norm": 0.9877552390098572,
"learning_rate": 2.2925925925925927e-05,
"loss": 0.2796,
"step": 7810
},
{
"epoch": 5.585714285714285,
"grad_norm": 1.9676953554153442,
"learning_rate": 2.288888888888889e-05,
"loss": 0.2314,
"step": 7820
},
{
"epoch": 5.5928571428571425,
"grad_norm": 1.778275966644287,
"learning_rate": 2.2851851851851853e-05,
"loss": 0.3033,
"step": 7830
},
{
"epoch": 5.6,
"grad_norm": 0.9746494889259338,
"learning_rate": 2.2814814814814818e-05,
"loss": 0.2459,
"step": 7840
},
{
"epoch": 5.607142857142857,
"grad_norm": 2.0238702297210693,
"learning_rate": 2.277777777777778e-05,
"loss": 0.2494,
"step": 7850
},
{
"epoch": 5.614285714285714,
"grad_norm": 1.2345530986785889,
"learning_rate": 2.2740740740740744e-05,
"loss": 0.4614,
"step": 7860
},
{
"epoch": 5.621428571428572,
"grad_norm": 0.9835256338119507,
"learning_rate": 2.2703703703703706e-05,
"loss": 0.3519,
"step": 7870
},
{
"epoch": 5.628571428571428,
"grad_norm": 1.9753897190093994,
"learning_rate": 2.2666666666666668e-05,
"loss": 0.2895,
"step": 7880
},
{
"epoch": 5.635714285714286,
"grad_norm": 1.7247217893600464,
"learning_rate": 2.2629629629629633e-05,
"loss": 0.1994,
"step": 7890
},
{
"epoch": 5.642857142857143,
"grad_norm": 1.8406201601028442,
"learning_rate": 2.2592592592592594e-05,
"loss": 0.1872,
"step": 7900
},
{
"epoch": 5.65,
"grad_norm": 1.4785393476486206,
"learning_rate": 2.255555555555556e-05,
"loss": 0.2811,
"step": 7910
},
{
"epoch": 5.6571428571428575,
"grad_norm": 2.23652982711792,
"learning_rate": 2.251851851851852e-05,
"loss": 0.3071,
"step": 7920
},
{
"epoch": 5.664285714285715,
"grad_norm": 1.9096837043762207,
"learning_rate": 2.2481481481481486e-05,
"loss": 0.2115,
"step": 7930
},
{
"epoch": 5.671428571428572,
"grad_norm": 2.0808775424957275,
"learning_rate": 2.2444444444444447e-05,
"loss": 0.3923,
"step": 7940
},
{
"epoch": 5.678571428571429,
"grad_norm": 1.5935535430908203,
"learning_rate": 2.240740740740741e-05,
"loss": 0.3461,
"step": 7950
},
{
"epoch": 5.685714285714286,
"grad_norm": 1.1959024667739868,
"learning_rate": 2.2370370370370374e-05,
"loss": 0.2016,
"step": 7960
},
{
"epoch": 5.692857142857143,
"grad_norm": 1.0776904821395874,
"learning_rate": 2.2333333333333335e-05,
"loss": 0.3476,
"step": 7970
},
{
"epoch": 5.7,
"grad_norm": 1.884531855583191,
"learning_rate": 2.2296296296296297e-05,
"loss": 0.2861,
"step": 7980
},
{
"epoch": 5.707142857142857,
"grad_norm": 1.2476330995559692,
"learning_rate": 2.2259259259259262e-05,
"loss": 0.2152,
"step": 7990
},
{
"epoch": 5.714285714285714,
"grad_norm": 2.106348752975464,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.3022,
"step": 8000
},
{
"epoch": 5.714285714285714,
"eval_loss": 0.3492221236228943,
"eval_rouge1": 0.9062,
"eval_rouge2": 0.8456,
"eval_rougeL": 0.9033,
"eval_runtime": 122.1433,
"eval_samples_per_second": 11.462,
"eval_steps_per_second": 5.731,
"step": 8000
},
{
"epoch": 5.7214285714285715,
"grad_norm": 1.9454623460769653,
"learning_rate": 2.2185185185185188e-05,
"loss": 0.232,
"step": 8010
},
{
"epoch": 5.728571428571429,
"grad_norm": 1.9178905487060547,
"learning_rate": 2.214814814814815e-05,
"loss": 0.2278,
"step": 8020
},
{
"epoch": 5.735714285714286,
"grad_norm": 1.6279345750808716,
"learning_rate": 2.211111111111111e-05,
"loss": 0.2423,
"step": 8030
},
{
"epoch": 5.742857142857143,
"grad_norm": 2.7422447204589844,
"learning_rate": 2.2074074074074076e-05,
"loss": 0.3129,
"step": 8040
},
{
"epoch": 5.75,
"grad_norm": 1.7606775760650635,
"learning_rate": 2.2037037037037038e-05,
"loss": 0.217,
"step": 8050
},
{
"epoch": 5.757142857142857,
"grad_norm": 2.970276355743408,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.3246,
"step": 8060
},
{
"epoch": 5.764285714285714,
"grad_norm": 1.6729111671447754,
"learning_rate": 2.1962962962962964e-05,
"loss": 0.224,
"step": 8070
},
{
"epoch": 5.771428571428571,
"grad_norm": 2.103708267211914,
"learning_rate": 2.1925925925925926e-05,
"loss": 0.2256,
"step": 8080
},
{
"epoch": 5.7785714285714285,
"grad_norm": 1.7059235572814941,
"learning_rate": 2.188888888888889e-05,
"loss": 0.2986,
"step": 8090
},
{
"epoch": 5.785714285714286,
"grad_norm": 1.6239415407180786,
"learning_rate": 2.1851851851851852e-05,
"loss": 0.3007,
"step": 8100
},
{
"epoch": 5.792857142857143,
"grad_norm": 1.5316799879074097,
"learning_rate": 2.1814814814814817e-05,
"loss": 0.2295,
"step": 8110
},
{
"epoch": 5.8,
"grad_norm": 0.9283231496810913,
"learning_rate": 2.177777777777778e-05,
"loss": 0.1624,
"step": 8120
},
{
"epoch": 5.807142857142857,
"grad_norm": 1.4230540990829468,
"learning_rate": 2.174074074074074e-05,
"loss": 0.1686,
"step": 8130
},
{
"epoch": 5.814285714285714,
"grad_norm": 1.8694360256195068,
"learning_rate": 2.1703703703703705e-05,
"loss": 0.3416,
"step": 8140
},
{
"epoch": 5.821428571428571,
"grad_norm": 2.144221782684326,
"learning_rate": 2.1666666666666667e-05,
"loss": 0.2471,
"step": 8150
},
{
"epoch": 5.828571428571428,
"grad_norm": 2.5672965049743652,
"learning_rate": 2.162962962962963e-05,
"loss": 0.3354,
"step": 8160
},
{
"epoch": 5.835714285714285,
"grad_norm": 1.093578577041626,
"learning_rate": 2.1592592592592594e-05,
"loss": 0.262,
"step": 8170
},
{
"epoch": 5.8428571428571425,
"grad_norm": 0.7076272368431091,
"learning_rate": 2.1555555555555555e-05,
"loss": 0.254,
"step": 8180
},
{
"epoch": 5.85,
"grad_norm": 2.2301125526428223,
"learning_rate": 2.151851851851852e-05,
"loss": 0.1906,
"step": 8190
},
{
"epoch": 5.857142857142857,
"grad_norm": 1.704037070274353,
"learning_rate": 2.148148148148148e-05,
"loss": 0.2802,
"step": 8200
},
{
"epoch": 5.864285714285714,
"grad_norm": 1.4877769947052002,
"learning_rate": 2.1444444444444443e-05,
"loss": 0.3327,
"step": 8210
},
{
"epoch": 5.871428571428572,
"grad_norm": 1.436059594154358,
"learning_rate": 2.1407407407407408e-05,
"loss": 0.271,
"step": 8220
},
{
"epoch": 5.878571428571428,
"grad_norm": 1.357176661491394,
"learning_rate": 2.137037037037037e-05,
"loss": 0.2481,
"step": 8230
},
{
"epoch": 5.885714285714286,
"grad_norm": 1.846593976020813,
"learning_rate": 2.1333333333333335e-05,
"loss": 0.2641,
"step": 8240
},
{
"epoch": 5.892857142857143,
"grad_norm": 2.4631927013397217,
"learning_rate": 2.1296296296296296e-05,
"loss": 0.2832,
"step": 8250
},
{
"epoch": 5.9,
"grad_norm": 1.8715349435806274,
"learning_rate": 2.1259259259259258e-05,
"loss": 0.4157,
"step": 8260
},
{
"epoch": 5.9071428571428575,
"grad_norm": 2.3173437118530273,
"learning_rate": 2.1222222222222223e-05,
"loss": 0.353,
"step": 8270
},
{
"epoch": 5.914285714285715,
"grad_norm": 2.049422025680542,
"learning_rate": 2.1185185185185184e-05,
"loss": 0.2613,
"step": 8280
},
{
"epoch": 5.921428571428572,
"grad_norm": 1.281841516494751,
"learning_rate": 2.114814814814815e-05,
"loss": 0.2287,
"step": 8290
},
{
"epoch": 5.928571428571429,
"grad_norm": 1.007407546043396,
"learning_rate": 2.111111111111111e-05,
"loss": 0.2139,
"step": 8300
},
{
"epoch": 5.935714285714286,
"grad_norm": 1.8036701679229736,
"learning_rate": 2.1074074074074072e-05,
"loss": 0.2511,
"step": 8310
},
{
"epoch": 5.942857142857143,
"grad_norm": 0.9559861421585083,
"learning_rate": 2.1037037037037037e-05,
"loss": 0.3371,
"step": 8320
},
{
"epoch": 5.95,
"grad_norm": 2.136070489883423,
"learning_rate": 2.1e-05,
"loss": 0.2321,
"step": 8330
},
{
"epoch": 5.957142857142857,
"grad_norm": 1.2442055940628052,
"learning_rate": 2.0962962962962964e-05,
"loss": 0.1819,
"step": 8340
},
{
"epoch": 5.964285714285714,
"grad_norm": 2.0479979515075684,
"learning_rate": 2.0925925925925925e-05,
"loss": 0.3796,
"step": 8350
},
{
"epoch": 5.9714285714285715,
"grad_norm": 1.6974670886993408,
"learning_rate": 2.088888888888889e-05,
"loss": 0.1947,
"step": 8360
},
{
"epoch": 5.978571428571429,
"grad_norm": 2.1099231243133545,
"learning_rate": 2.0851851851851852e-05,
"loss": 0.1847,
"step": 8370
},
{
"epoch": 5.985714285714286,
"grad_norm": 1.9181057214736938,
"learning_rate": 2.0814814814814813e-05,
"loss": 0.3513,
"step": 8380
},
{
"epoch": 5.992857142857143,
"grad_norm": 1.0576838254928589,
"learning_rate": 2.077777777777778e-05,
"loss": 0.2663,
"step": 8390
},
{
"epoch": 6.0,
"grad_norm": 1.1283502578735352,
"learning_rate": 2.074074074074074e-05,
"loss": 0.2872,
"step": 8400
},
{
"epoch": 6.007142857142857,
"grad_norm": 0.7001394629478455,
"learning_rate": 2.0703703703703705e-05,
"loss": 0.277,
"step": 8410
},
{
"epoch": 6.014285714285714,
"grad_norm": 1.6374051570892334,
"learning_rate": 2.0666666666666666e-05,
"loss": 0.1849,
"step": 8420
},
{
"epoch": 6.021428571428571,
"grad_norm": 1.674914836883545,
"learning_rate": 2.0629629629629628e-05,
"loss": 0.1756,
"step": 8430
},
{
"epoch": 6.0285714285714285,
"grad_norm": 2.592038154602051,
"learning_rate": 2.0592592592592593e-05,
"loss": 0.3725,
"step": 8440
},
{
"epoch": 6.035714285714286,
"grad_norm": 2.942992925643921,
"learning_rate": 2.0555555555555555e-05,
"loss": 0.2529,
"step": 8450
},
{
"epoch": 6.042857142857143,
"grad_norm": 1.7580475807189941,
"learning_rate": 2.051851851851852e-05,
"loss": 0.1549,
"step": 8460
},
{
"epoch": 6.05,
"grad_norm": 1.9032413959503174,
"learning_rate": 2.048148148148148e-05,
"loss": 0.2529,
"step": 8470
},
{
"epoch": 6.057142857142857,
"grad_norm": 1.7678323984146118,
"learning_rate": 2.0444444444444446e-05,
"loss": 0.1935,
"step": 8480
},
{
"epoch": 6.064285714285714,
"grad_norm": 1.7014952898025513,
"learning_rate": 2.0407407407407408e-05,
"loss": 0.1965,
"step": 8490
},
{
"epoch": 6.071428571428571,
"grad_norm": 2.053157091140747,
"learning_rate": 2.037037037037037e-05,
"loss": 0.2045,
"step": 8500
},
{
"epoch": 6.078571428571428,
"grad_norm": 2.448059320449829,
"learning_rate": 2.0333333333333334e-05,
"loss": 0.2275,
"step": 8510
},
{
"epoch": 6.085714285714285,
"grad_norm": 1.3505144119262695,
"learning_rate": 2.0296296296296296e-05,
"loss": 0.192,
"step": 8520
},
{
"epoch": 6.0928571428571425,
"grad_norm": 1.0717148780822754,
"learning_rate": 2.025925925925926e-05,
"loss": 0.3017,
"step": 8530
},
{
"epoch": 6.1,
"grad_norm": 2.872880220413208,
"learning_rate": 2.0222222222222222e-05,
"loss": 0.2583,
"step": 8540
},
{
"epoch": 6.107142857142857,
"grad_norm": 1.559588074684143,
"learning_rate": 2.0185185185185187e-05,
"loss": 0.1557,
"step": 8550
},
{
"epoch": 6.114285714285714,
"grad_norm": 1.4375160932540894,
"learning_rate": 2.014814814814815e-05,
"loss": 0.1165,
"step": 8560
},
{
"epoch": 6.121428571428571,
"grad_norm": 1.1922268867492676,
"learning_rate": 2.011111111111111e-05,
"loss": 0.1995,
"step": 8570
},
{
"epoch": 6.128571428571428,
"grad_norm": 2.267056465148926,
"learning_rate": 2.0074074074074075e-05,
"loss": 0.2176,
"step": 8580
},
{
"epoch": 6.135714285714286,
"grad_norm": 1.5485496520996094,
"learning_rate": 2.0037037037037037e-05,
"loss": 0.206,
"step": 8590
},
{
"epoch": 6.142857142857143,
"grad_norm": 1.9538283348083496,
"learning_rate": 2e-05,
"loss": 0.3173,
"step": 8600
},
{
"epoch": 6.15,
"grad_norm": 2.8216044902801514,
"learning_rate": 1.9962962962962963e-05,
"loss": 0.3077,
"step": 8610
},
{
"epoch": 6.1571428571428575,
"grad_norm": 2.5293240547180176,
"learning_rate": 1.9925925925925925e-05,
"loss": 0.2829,
"step": 8620
},
{
"epoch": 6.164285714285715,
"grad_norm": 1.7947183847427368,
"learning_rate": 1.988888888888889e-05,
"loss": 0.3212,
"step": 8630
},
{
"epoch": 6.171428571428572,
"grad_norm": 1.541588544845581,
"learning_rate": 1.985185185185185e-05,
"loss": 0.1985,
"step": 8640
},
{
"epoch": 6.178571428571429,
"grad_norm": 1.286007046699524,
"learning_rate": 1.9814814814814816e-05,
"loss": 0.279,
"step": 8650
},
{
"epoch": 6.185714285714286,
"grad_norm": 1.8692234754562378,
"learning_rate": 1.9777777777777778e-05,
"loss": 0.303,
"step": 8660
},
{
"epoch": 6.192857142857143,
"grad_norm": 1.7906513214111328,
"learning_rate": 1.9740740740740743e-05,
"loss": 0.218,
"step": 8670
},
{
"epoch": 6.2,
"grad_norm": 2.0737709999084473,
"learning_rate": 1.9703703703703704e-05,
"loss": 0.1559,
"step": 8680
},
{
"epoch": 6.207142857142857,
"grad_norm": 1.8082749843597412,
"learning_rate": 1.9666666666666666e-05,
"loss": 0.2713,
"step": 8690
},
{
"epoch": 6.214285714285714,
"grad_norm": 1.8988617658615112,
"learning_rate": 1.962962962962963e-05,
"loss": 0.2362,
"step": 8700
},
{
"epoch": 6.2214285714285715,
"grad_norm": 0.8727281093597412,
"learning_rate": 1.9592592592592592e-05,
"loss": 0.2571,
"step": 8710
},
{
"epoch": 6.228571428571429,
"grad_norm": 1.0203776359558105,
"learning_rate": 1.9555555555555557e-05,
"loss": 0.2884,
"step": 8720
},
{
"epoch": 6.235714285714286,
"grad_norm": 1.5776811838150024,
"learning_rate": 1.951851851851852e-05,
"loss": 0.3115,
"step": 8730
},
{
"epoch": 6.242857142857143,
"grad_norm": 2.1000545024871826,
"learning_rate": 1.948148148148148e-05,
"loss": 0.2936,
"step": 8740
},
{
"epoch": 6.25,
"grad_norm": 1.991640329360962,
"learning_rate": 1.9444444444444445e-05,
"loss": 0.2214,
"step": 8750
},
{
"epoch": 6.257142857142857,
"grad_norm": 3.8238637447357178,
"learning_rate": 1.9407407407407407e-05,
"loss": 0.2738,
"step": 8760
},
{
"epoch": 6.264285714285714,
"grad_norm": 2.818711042404175,
"learning_rate": 1.9370370370370372e-05,
"loss": 0.3107,
"step": 8770
},
{
"epoch": 6.271428571428571,
"grad_norm": 1.4565989971160889,
"learning_rate": 1.9333333333333333e-05,
"loss": 0.1351,
"step": 8780
},
{
"epoch": 6.2785714285714285,
"grad_norm": 1.6833415031433105,
"learning_rate": 1.92962962962963e-05,
"loss": 0.3359,
"step": 8790
},
{
"epoch": 6.285714285714286,
"grad_norm": 3.662572145462036,
"learning_rate": 1.925925925925926e-05,
"loss": 0.2338,
"step": 8800
},
{
"epoch": 6.292857142857143,
"grad_norm": 1.9166165590286255,
"learning_rate": 1.922222222222222e-05,
"loss": 0.1908,
"step": 8810
},
{
"epoch": 6.3,
"grad_norm": 2.665553331375122,
"learning_rate": 1.9185185185185186e-05,
"loss": 0.246,
"step": 8820
},
{
"epoch": 6.307142857142857,
"grad_norm": 1.601194143295288,
"learning_rate": 1.9148148148148148e-05,
"loss": 0.2392,
"step": 8830
},
{
"epoch": 6.314285714285714,
"grad_norm": 1.7382382154464722,
"learning_rate": 1.9111111111111113e-05,
"loss": 0.2919,
"step": 8840
},
{
"epoch": 6.321428571428571,
"grad_norm": 1.0822237730026245,
"learning_rate": 1.9074074074074075e-05,
"loss": 0.1179,
"step": 8850
},
{
"epoch": 6.328571428571428,
"grad_norm": 1.9691376686096191,
"learning_rate": 1.903703703703704e-05,
"loss": 0.3934,
"step": 8860
},
{
"epoch": 6.335714285714285,
"grad_norm": 0.8395001292228699,
"learning_rate": 1.9e-05,
"loss": 0.2004,
"step": 8870
},
{
"epoch": 6.3428571428571425,
"grad_norm": 1.6967720985412598,
"learning_rate": 1.8962962962962963e-05,
"loss": 0.204,
"step": 8880
},
{
"epoch": 6.35,
"grad_norm": 1.2601035833358765,
"learning_rate": 1.8925925925925928e-05,
"loss": 0.2769,
"step": 8890
},
{
"epoch": 6.357142857142857,
"grad_norm": 1.560940146446228,
"learning_rate": 1.888888888888889e-05,
"loss": 0.1409,
"step": 8900
},
{
"epoch": 6.364285714285714,
"grad_norm": 1.645814061164856,
"learning_rate": 1.8851851851851854e-05,
"loss": 0.1914,
"step": 8910
},
{
"epoch": 6.371428571428572,
"grad_norm": 1.4886109828948975,
"learning_rate": 1.8814814814814816e-05,
"loss": 0.3517,
"step": 8920
},
{
"epoch": 6.378571428571428,
"grad_norm": 1.2002378702163696,
"learning_rate": 1.8777777777777777e-05,
"loss": 0.2346,
"step": 8930
},
{
"epoch": 6.385714285714286,
"grad_norm": 2.4492478370666504,
"learning_rate": 1.8740740740740742e-05,
"loss": 0.2104,
"step": 8940
},
{
"epoch": 6.392857142857143,
"grad_norm": 2.315610408782959,
"learning_rate": 1.8703703703703704e-05,
"loss": 0.2321,
"step": 8950
},
{
"epoch": 6.4,
"grad_norm": 2.116260528564453,
"learning_rate": 1.866666666666667e-05,
"loss": 0.2092,
"step": 8960
},
{
"epoch": 6.4071428571428575,
"grad_norm": 1.7362505197525024,
"learning_rate": 1.862962962962963e-05,
"loss": 0.2598,
"step": 8970
},
{
"epoch": 6.414285714285715,
"grad_norm": 2.1754469871520996,
"learning_rate": 1.8592592592592595e-05,
"loss": 0.3035,
"step": 8980
},
{
"epoch": 6.421428571428572,
"grad_norm": 1.448285698890686,
"learning_rate": 1.8555555555555557e-05,
"loss": 0.227,
"step": 8990
},
{
"epoch": 6.428571428571429,
"grad_norm": 1.888242483139038,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.1618,
"step": 9000
},
{
"epoch": 6.428571428571429,
"eval_loss": 0.3434120714664459,
"eval_rouge1": 0.908,
"eval_rouge2": 0.8486,
"eval_rougeL": 0.9052,
"eval_runtime": 122.2937,
"eval_samples_per_second": 11.448,
"eval_steps_per_second": 5.724,
"step": 9000
},
{
"epoch": 6.435714285714286,
"grad_norm": 2.5552051067352295,
"learning_rate": 1.8481481481481483e-05,
"loss": 0.4376,
"step": 9010
},
{
"epoch": 6.442857142857143,
"grad_norm": 2.0973517894744873,
"learning_rate": 1.8444444444444445e-05,
"loss": 0.2163,
"step": 9020
},
{
"epoch": 6.45,
"grad_norm": 1.3774244785308838,
"learning_rate": 1.840740740740741e-05,
"loss": 0.14,
"step": 9030
},
{
"epoch": 6.457142857142857,
"grad_norm": 0.8735131025314331,
"learning_rate": 1.837037037037037e-05,
"loss": 0.1848,
"step": 9040
},
{
"epoch": 6.464285714285714,
"grad_norm": 1.5088914632797241,
"learning_rate": 1.8333333333333333e-05,
"loss": 0.2889,
"step": 9050
},
{
"epoch": 6.4714285714285715,
"grad_norm": 1.0688769817352295,
"learning_rate": 1.8296296296296298e-05,
"loss": 0.1895,
"step": 9060
},
{
"epoch": 6.478571428571429,
"grad_norm": 1.43760085105896,
"learning_rate": 1.825925925925926e-05,
"loss": 0.2997,
"step": 9070
},
{
"epoch": 6.485714285714286,
"grad_norm": 1.1168969869613647,
"learning_rate": 1.8222222222222224e-05,
"loss": 0.3544,
"step": 9080
},
{
"epoch": 6.492857142857143,
"grad_norm": 1.7139670848846436,
"learning_rate": 1.8185185185185186e-05,
"loss": 0.2108,
"step": 9090
},
{
"epoch": 6.5,
"grad_norm": 1.2584503889083862,
"learning_rate": 1.814814814814815e-05,
"loss": 0.2791,
"step": 9100
},
{
"epoch": 6.507142857142857,
"grad_norm": 1.4440019130706787,
"learning_rate": 1.8111111111111112e-05,
"loss": 0.3745,
"step": 9110
},
{
"epoch": 6.514285714285714,
"grad_norm": 2.3828232288360596,
"learning_rate": 1.8074074074074074e-05,
"loss": 0.2159,
"step": 9120
},
{
"epoch": 6.521428571428571,
"grad_norm": 2.6553053855895996,
"learning_rate": 1.803703703703704e-05,
"loss": 0.3051,
"step": 9130
},
{
"epoch": 6.5285714285714285,
"grad_norm": 2.0669426918029785,
"learning_rate": 1.8e-05,
"loss": 0.2166,
"step": 9140
},
{
"epoch": 6.535714285714286,
"grad_norm": 1.4676064252853394,
"learning_rate": 1.7962962962962965e-05,
"loss": 0.2393,
"step": 9150
},
{
"epoch": 6.542857142857143,
"grad_norm": 1.5158963203430176,
"learning_rate": 1.7925925925925927e-05,
"loss": 0.2821,
"step": 9160
},
{
"epoch": 6.55,
"grad_norm": 1.438550591468811,
"learning_rate": 1.788888888888889e-05,
"loss": 0.229,
"step": 9170
},
{
"epoch": 6.557142857142857,
"grad_norm": 2.2161788940429688,
"learning_rate": 1.7851851851851853e-05,
"loss": 0.3705,
"step": 9180
},
{
"epoch": 6.564285714285714,
"grad_norm": 1.472321629524231,
"learning_rate": 1.7814814814814815e-05,
"loss": 0.3977,
"step": 9190
},
{
"epoch": 6.571428571428571,
"grad_norm": 1.957033395767212,
"learning_rate": 1.777777777777778e-05,
"loss": 0.2431,
"step": 9200
},
{
"epoch": 6.578571428571428,
"grad_norm": 3.070905923843384,
"learning_rate": 1.774074074074074e-05,
"loss": 0.2676,
"step": 9210
},
{
"epoch": 6.585714285714285,
"grad_norm": 2.240701198577881,
"learning_rate": 1.7703703703703706e-05,
"loss": 0.2346,
"step": 9220
},
{
"epoch": 6.5928571428571425,
"grad_norm": 1.2726478576660156,
"learning_rate": 1.7666666666666668e-05,
"loss": 0.2624,
"step": 9230
},
{
"epoch": 6.6,
"grad_norm": 2.543856382369995,
"learning_rate": 1.762962962962963e-05,
"loss": 0.3137,
"step": 9240
},
{
"epoch": 6.607142857142857,
"grad_norm": 2.1688966751098633,
"learning_rate": 1.7592592592592595e-05,
"loss": 0.3366,
"step": 9250
},
{
"epoch": 6.614285714285714,
"grad_norm": 1.9013522863388062,
"learning_rate": 1.7555555555555556e-05,
"loss": 0.1759,
"step": 9260
},
{
"epoch": 6.621428571428572,
"grad_norm": 2.7567338943481445,
"learning_rate": 1.751851851851852e-05,
"loss": 0.2615,
"step": 9270
},
{
"epoch": 6.628571428571428,
"grad_norm": 2.530351161956787,
"learning_rate": 1.7481481481481483e-05,
"loss": 0.363,
"step": 9280
},
{
"epoch": 6.635714285714286,
"grad_norm": 3.0051562786102295,
"learning_rate": 1.7444444444444448e-05,
"loss": 0.2155,
"step": 9290
},
{
"epoch": 6.642857142857143,
"grad_norm": 1.6199374198913574,
"learning_rate": 1.740740740740741e-05,
"loss": 0.1943,
"step": 9300
},
{
"epoch": 6.65,
"grad_norm": 2.2254199981689453,
"learning_rate": 1.737037037037037e-05,
"loss": 0.2086,
"step": 9310
},
{
"epoch": 6.6571428571428575,
"grad_norm": 1.4565106630325317,
"learning_rate": 1.7333333333333336e-05,
"loss": 0.2113,
"step": 9320
},
{
"epoch": 6.664285714285715,
"grad_norm": 1.8667312860488892,
"learning_rate": 1.7296296296296297e-05,
"loss": 0.1719,
"step": 9330
},
{
"epoch": 6.671428571428572,
"grad_norm": 2.0462963581085205,
"learning_rate": 1.7259259259259262e-05,
"loss": 0.2307,
"step": 9340
},
{
"epoch": 6.678571428571429,
"grad_norm": 1.5114613771438599,
"learning_rate": 1.7222222222222224e-05,
"loss": 0.2629,
"step": 9350
},
{
"epoch": 6.685714285714286,
"grad_norm": 1.8743935823440552,
"learning_rate": 1.7185185185185185e-05,
"loss": 0.2656,
"step": 9360
},
{
"epoch": 6.692857142857143,
"grad_norm": 1.6508034467697144,
"learning_rate": 1.714814814814815e-05,
"loss": 0.2971,
"step": 9370
},
{
"epoch": 6.7,
"grad_norm": 1.4109563827514648,
"learning_rate": 1.7111111111111112e-05,
"loss": 0.3155,
"step": 9380
},
{
"epoch": 6.707142857142857,
"grad_norm": 1.9742975234985352,
"learning_rate": 1.7074074074074077e-05,
"loss": 0.2858,
"step": 9390
},
{
"epoch": 6.714285714285714,
"grad_norm": 0.8593278527259827,
"learning_rate": 1.7037037037037038e-05,
"loss": 0.2484,
"step": 9400
},
{
"epoch": 6.7214285714285715,
"grad_norm": 1.8331007957458496,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.2763,
"step": 9410
},
{
"epoch": 6.728571428571429,
"grad_norm": 2.0606274604797363,
"learning_rate": 1.6962962962962965e-05,
"loss": 0.2016,
"step": 9420
},
{
"epoch": 6.735714285714286,
"grad_norm": 1.244935154914856,
"learning_rate": 1.6925925925925926e-05,
"loss": 0.2161,
"step": 9430
},
{
"epoch": 6.742857142857143,
"grad_norm": 2.0855889320373535,
"learning_rate": 1.688888888888889e-05,
"loss": 0.1961,
"step": 9440
},
{
"epoch": 6.75,
"grad_norm": 2.203310012817383,
"learning_rate": 1.6851851851851853e-05,
"loss": 0.1886,
"step": 9450
},
{
"epoch": 6.757142857142857,
"grad_norm": 2.1254501342773438,
"learning_rate": 1.6814814814814818e-05,
"loss": 0.2824,
"step": 9460
},
{
"epoch": 6.764285714285714,
"grad_norm": 1.498728632926941,
"learning_rate": 1.677777777777778e-05,
"loss": 0.2848,
"step": 9470
},
{
"epoch": 6.771428571428571,
"grad_norm": 2.6205763816833496,
"learning_rate": 1.674074074074074e-05,
"loss": 0.2728,
"step": 9480
},
{
"epoch": 6.7785714285714285,
"grad_norm": 1.6262216567993164,
"learning_rate": 1.6703703703703706e-05,
"loss": 0.4216,
"step": 9490
},
{
"epoch": 6.785714285714286,
"grad_norm": 3.074489116668701,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.2084,
"step": 9500
},
{
"epoch": 6.792857142857143,
"grad_norm": 1.8158230781555176,
"learning_rate": 1.6629629629629632e-05,
"loss": 0.1794,
"step": 9510
},
{
"epoch": 6.8,
"grad_norm": 2.069397449493408,
"learning_rate": 1.6592592592592594e-05,
"loss": 0.2363,
"step": 9520
},
{
"epoch": 6.807142857142857,
"grad_norm": 1.8637501001358032,
"learning_rate": 1.655555555555556e-05,
"loss": 0.2203,
"step": 9530
},
{
"epoch": 6.814285714285714,
"grad_norm": 2.043314218521118,
"learning_rate": 1.651851851851852e-05,
"loss": 0.2267,
"step": 9540
},
{
"epoch": 6.821428571428571,
"grad_norm": 2.8327081203460693,
"learning_rate": 1.6481481481481482e-05,
"loss": 0.2793,
"step": 9550
},
{
"epoch": 6.828571428571428,
"grad_norm": 2.3297407627105713,
"learning_rate": 1.6444444444444447e-05,
"loss": 0.2349,
"step": 9560
},
{
"epoch": 6.835714285714285,
"grad_norm": 0.9220748543739319,
"learning_rate": 1.640740740740741e-05,
"loss": 0.1966,
"step": 9570
},
{
"epoch": 6.8428571428571425,
"grad_norm": 1.5935183763504028,
"learning_rate": 1.6370370370370374e-05,
"loss": 0.3217,
"step": 9580
},
{
"epoch": 6.85,
"grad_norm": 0.9305605292320251,
"learning_rate": 1.6333333333333335e-05,
"loss": 0.1446,
"step": 9590
},
{
"epoch": 6.857142857142857,
"grad_norm": 2.0719094276428223,
"learning_rate": 1.62962962962963e-05,
"loss": 0.2195,
"step": 9600
},
{
"epoch": 6.864285714285714,
"grad_norm": 1.9230345487594604,
"learning_rate": 1.625925925925926e-05,
"loss": 0.2031,
"step": 9610
},
{
"epoch": 6.871428571428572,
"grad_norm": 1.7897018194198608,
"learning_rate": 1.6222222222222223e-05,
"loss": 0.1728,
"step": 9620
},
{
"epoch": 6.878571428571428,
"grad_norm": 2.4588770866394043,
"learning_rate": 1.6185185185185188e-05,
"loss": 0.3253,
"step": 9630
},
{
"epoch": 6.885714285714286,
"grad_norm": 1.2495237588882446,
"learning_rate": 1.614814814814815e-05,
"loss": 0.3539,
"step": 9640
},
{
"epoch": 6.892857142857143,
"grad_norm": 3.161078453063965,
"learning_rate": 1.6111111111111115e-05,
"loss": 0.3598,
"step": 9650
},
{
"epoch": 6.9,
"grad_norm": 1.9474009275436401,
"learning_rate": 1.6074074074074076e-05,
"loss": 0.1385,
"step": 9660
},
{
"epoch": 6.9071428571428575,
"grad_norm": 1.9687261581420898,
"learning_rate": 1.6037037037037038e-05,
"loss": 0.2375,
"step": 9670
},
{
"epoch": 6.914285714285715,
"grad_norm": 1.87405264377594,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.3374,
"step": 9680
},
{
"epoch": 6.921428571428572,
"grad_norm": 1.1928725242614746,
"learning_rate": 1.5962962962962964e-05,
"loss": 0.197,
"step": 9690
},
{
"epoch": 6.928571428571429,
"grad_norm": 1.6434850692749023,
"learning_rate": 1.5925925925925926e-05,
"loss": 0.2757,
"step": 9700
},
{
"epoch": 6.935714285714286,
"grad_norm": 1.1469305753707886,
"learning_rate": 1.588888888888889e-05,
"loss": 0.1793,
"step": 9710
},
{
"epoch": 6.942857142857143,
"grad_norm": 2.436051368713379,
"learning_rate": 1.5851851851851852e-05,
"loss": 0.3178,
"step": 9720
},
{
"epoch": 6.95,
"grad_norm": 1.8960529565811157,
"learning_rate": 1.5814814814814817e-05,
"loss": 0.2182,
"step": 9730
},
{
"epoch": 6.957142857142857,
"grad_norm": 1.755922794342041,
"learning_rate": 1.577777777777778e-05,
"loss": 0.3239,
"step": 9740
},
{
"epoch": 6.964285714285714,
"grad_norm": 2.202697515487671,
"learning_rate": 1.574074074074074e-05,
"loss": 0.2116,
"step": 9750
},
{
"epoch": 6.9714285714285715,
"grad_norm": 1.4491599798202515,
"learning_rate": 1.5703703703703705e-05,
"loss": 0.2329,
"step": 9760
},
{
"epoch": 6.978571428571429,
"grad_norm": 9.212343215942383,
"learning_rate": 1.5666666666666667e-05,
"loss": 0.2573,
"step": 9770
},
{
"epoch": 6.985714285714286,
"grad_norm": 1.2211856842041016,
"learning_rate": 1.5629629629629632e-05,
"loss": 0.2737,
"step": 9780
},
{
"epoch": 6.992857142857143,
"grad_norm": 1.59877347946167,
"learning_rate": 1.5592592592592593e-05,
"loss": 0.284,
"step": 9790
},
{
"epoch": 7.0,
"grad_norm": 2.295945882797241,
"learning_rate": 1.5555555555555555e-05,
"loss": 0.2076,
"step": 9800
},
{
"epoch": 7.007142857142857,
"grad_norm": 1.4388489723205566,
"learning_rate": 1.551851851851852e-05,
"loss": 0.2225,
"step": 9810
},
{
"epoch": 7.014285714285714,
"grad_norm": 1.9146931171417236,
"learning_rate": 1.548148148148148e-05,
"loss": 0.2917,
"step": 9820
},
{
"epoch": 7.021428571428571,
"grad_norm": 1.0212804079055786,
"learning_rate": 1.5444444444444446e-05,
"loss": 0.1537,
"step": 9830
},
{
"epoch": 7.0285714285714285,
"grad_norm": 2.146648645401001,
"learning_rate": 1.5407407407407408e-05,
"loss": 0.178,
"step": 9840
},
{
"epoch": 7.035714285714286,
"grad_norm": 2.4515628814697266,
"learning_rate": 1.537037037037037e-05,
"loss": 0.3043,
"step": 9850
},
{
"epoch": 7.042857142857143,
"grad_norm": 1.6906862258911133,
"learning_rate": 1.5333333333333334e-05,
"loss": 0.2787,
"step": 9860
},
{
"epoch": 7.05,
"grad_norm": 2.2019400596618652,
"learning_rate": 1.5296296296296296e-05,
"loss": 0.3236,
"step": 9870
},
{
"epoch": 7.057142857142857,
"grad_norm": 1.3307303190231323,
"learning_rate": 1.5259259259259258e-05,
"loss": 0.1875,
"step": 9880
},
{
"epoch": 7.064285714285714,
"grad_norm": 1.7358342409133911,
"learning_rate": 1.5222222222222224e-05,
"loss": 0.2149,
"step": 9890
},
{
"epoch": 7.071428571428571,
"grad_norm": 2.0298547744750977,
"learning_rate": 1.5185185185185186e-05,
"loss": 0.1876,
"step": 9900
},
{
"epoch": 7.078571428571428,
"grad_norm": 2.375779151916504,
"learning_rate": 1.514814814814815e-05,
"loss": 0.2289,
"step": 9910
},
{
"epoch": 7.085714285714285,
"grad_norm": 1.856911540031433,
"learning_rate": 1.5111111111111112e-05,
"loss": 0.2029,
"step": 9920
},
{
"epoch": 7.0928571428571425,
"grad_norm": 1.1523020267486572,
"learning_rate": 1.5074074074074074e-05,
"loss": 0.1753,
"step": 9930
},
{
"epoch": 7.1,
"grad_norm": 1.4677330255508423,
"learning_rate": 1.5037037037037039e-05,
"loss": 0.2256,
"step": 9940
},
{
"epoch": 7.107142857142857,
"grad_norm": 1.0742135047912598,
"learning_rate": 1.5e-05,
"loss": 0.3844,
"step": 9950
},
{
"epoch": 7.114285714285714,
"grad_norm": 1.4122258424758911,
"learning_rate": 1.4962962962962965e-05,
"loss": 0.1498,
"step": 9960
},
{
"epoch": 7.121428571428571,
"grad_norm": 1.9363057613372803,
"learning_rate": 1.4925925925925927e-05,
"loss": 0.2721,
"step": 9970
},
{
"epoch": 7.128571428571428,
"grad_norm": 0.7882018685340881,
"learning_rate": 1.4888888888888888e-05,
"loss": 0.2363,
"step": 9980
},
{
"epoch": 7.135714285714286,
"grad_norm": 1.8561784029006958,
"learning_rate": 1.4851851851851853e-05,
"loss": 0.2875,
"step": 9990
},
{
"epoch": 7.142857142857143,
"grad_norm": 1.9597991704940796,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.1984,
"step": 10000
},
{
"epoch": 7.142857142857143,
"eval_loss": 0.34164437651634216,
"eval_rouge1": 0.9089,
"eval_rouge2": 0.8501,
"eval_rougeL": 0.9063,
"eval_runtime": 122.2974,
"eval_samples_per_second": 11.448,
"eval_steps_per_second": 5.724,
"step": 10000
},
{
"epoch": 7.15,
"grad_norm": 2.1984336376190186,
"learning_rate": 1.477777777777778e-05,
"loss": 0.2409,
"step": 10010
},
{
"epoch": 7.1571428571428575,
"grad_norm": 1.1259089708328247,
"learning_rate": 1.4740740740740741e-05,
"loss": 0.1386,
"step": 10020
},
{
"epoch": 7.164285714285715,
"grad_norm": 2.6497113704681396,
"learning_rate": 1.4703703703703706e-05,
"loss": 0.2999,
"step": 10030
},
{
"epoch": 7.171428571428572,
"grad_norm": 2.7574968338012695,
"learning_rate": 1.4666666666666668e-05,
"loss": 0.2064,
"step": 10040
},
{
"epoch": 7.178571428571429,
"grad_norm": 2.4020519256591797,
"learning_rate": 1.462962962962963e-05,
"loss": 0.2539,
"step": 10050
},
{
"epoch": 7.185714285714286,
"grad_norm": 1.8728407621383667,
"learning_rate": 1.4592592592592594e-05,
"loss": 0.2264,
"step": 10060
},
{
"epoch": 7.192857142857143,
"grad_norm": 3.187389612197876,
"learning_rate": 1.4555555555555556e-05,
"loss": 0.2175,
"step": 10070
},
{
"epoch": 7.2,
"grad_norm": 1.6992945671081543,
"learning_rate": 1.4518518518518521e-05,
"loss": 0.1752,
"step": 10080
},
{
"epoch": 7.207142857142857,
"grad_norm": 1.2373261451721191,
"learning_rate": 1.4481481481481483e-05,
"loss": 0.1712,
"step": 10090
},
{
"epoch": 7.214285714285714,
"grad_norm": 1.3986244201660156,
"learning_rate": 1.4444444444444444e-05,
"loss": 0.1727,
"step": 10100
},
{
"epoch": 7.2214285714285715,
"grad_norm": 1.5018147230148315,
"learning_rate": 1.4407407407407409e-05,
"loss": 0.2309,
"step": 10110
},
{
"epoch": 7.228571428571429,
"grad_norm": 1.8186851739883423,
"learning_rate": 1.437037037037037e-05,
"loss": 0.2741,
"step": 10120
},
{
"epoch": 7.235714285714286,
"grad_norm": 2.8224360942840576,
"learning_rate": 1.4333333333333334e-05,
"loss": 0.2244,
"step": 10130
},
{
"epoch": 7.242857142857143,
"grad_norm": 1.900585412979126,
"learning_rate": 1.4296296296296297e-05,
"loss": 0.2527,
"step": 10140
},
{
"epoch": 7.25,
"grad_norm": 2.4210896492004395,
"learning_rate": 1.425925925925926e-05,
"loss": 0.3832,
"step": 10150
},
{
"epoch": 7.257142857142857,
"grad_norm": 1.2783209085464478,
"learning_rate": 1.4222222222222224e-05,
"loss": 0.1756,
"step": 10160
},
{
"epoch": 7.264285714285714,
"grad_norm": 1.4387212991714478,
"learning_rate": 1.4185185185185185e-05,
"loss": 0.2567,
"step": 10170
},
{
"epoch": 7.271428571428571,
"grad_norm": 2.861311435699463,
"learning_rate": 1.4148148148148148e-05,
"loss": 0.3683,
"step": 10180
},
{
"epoch": 7.2785714285714285,
"grad_norm": 0.8701191544532776,
"learning_rate": 1.4111111111111112e-05,
"loss": 0.2229,
"step": 10190
},
{
"epoch": 7.285714285714286,
"grad_norm": 2.103231430053711,
"learning_rate": 1.4074074074074075e-05,
"loss": 0.1652,
"step": 10200
},
{
"epoch": 7.292857142857143,
"grad_norm": 3.0958895683288574,
"learning_rate": 1.4037037037037038e-05,
"loss": 0.3048,
"step": 10210
},
{
"epoch": 7.3,
"grad_norm": 1.0370267629623413,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.243,
"step": 10220
},
{
"epoch": 7.307142857142857,
"grad_norm": 1.334799885749817,
"learning_rate": 1.3962962962962963e-05,
"loss": 0.2242,
"step": 10230
},
{
"epoch": 7.314285714285714,
"grad_norm": 1.797135829925537,
"learning_rate": 1.3925925925925926e-05,
"loss": 0.1916,
"step": 10240
},
{
"epoch": 7.321428571428571,
"grad_norm": 0.7588611841201782,
"learning_rate": 1.388888888888889e-05,
"loss": 0.2548,
"step": 10250
},
{
"epoch": 7.328571428571428,
"grad_norm": 1.9136377573013306,
"learning_rate": 1.3851851851851853e-05,
"loss": 0.2373,
"step": 10260
},
{
"epoch": 7.335714285714285,
"grad_norm": 1.326635479927063,
"learning_rate": 1.3814814814814816e-05,
"loss": 0.2597,
"step": 10270
},
{
"epoch": 7.3428571428571425,
"grad_norm": 2.400609016418457,
"learning_rate": 1.3777777777777778e-05,
"loss": 0.2648,
"step": 10280
},
{
"epoch": 7.35,
"grad_norm": 0.9898678064346313,
"learning_rate": 1.3740740740740741e-05,
"loss": 0.2304,
"step": 10290
},
{
"epoch": 7.357142857142857,
"grad_norm": 0.7826656103134155,
"learning_rate": 1.3703703703703704e-05,
"loss": 0.1209,
"step": 10300
},
{
"epoch": 7.364285714285714,
"grad_norm": 1.083044409751892,
"learning_rate": 1.3666666666666666e-05,
"loss": 0.2026,
"step": 10310
},
{
"epoch": 7.371428571428572,
"grad_norm": 1.283219814300537,
"learning_rate": 1.362962962962963e-05,
"loss": 0.2018,
"step": 10320
},
{
"epoch": 7.378571428571428,
"grad_norm": 1.9941823482513428,
"learning_rate": 1.3592592592592592e-05,
"loss": 0.2469,
"step": 10330
},
{
"epoch": 7.385714285714286,
"grad_norm": 2.553957462310791,
"learning_rate": 1.3555555555555557e-05,
"loss": 0.2661,
"step": 10340
},
{
"epoch": 7.392857142857143,
"grad_norm": 1.657182216644287,
"learning_rate": 1.3518518518518519e-05,
"loss": 0.2762,
"step": 10350
},
{
"epoch": 7.4,
"grad_norm": 1.6704496145248413,
"learning_rate": 1.348148148148148e-05,
"loss": 0.222,
"step": 10360
},
{
"epoch": 7.4071428571428575,
"grad_norm": 1.338329792022705,
"learning_rate": 1.3444444444444445e-05,
"loss": 0.2658,
"step": 10370
},
{
"epoch": 7.414285714285715,
"grad_norm": 1.9741250276565552,
"learning_rate": 1.3407407407407407e-05,
"loss": 0.2596,
"step": 10380
},
{
"epoch": 7.421428571428572,
"grad_norm": 2.523958444595337,
"learning_rate": 1.3370370370370372e-05,
"loss": 0.1553,
"step": 10390
},
{
"epoch": 7.428571428571429,
"grad_norm": 2.260690450668335,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.2942,
"step": 10400
},
{
"epoch": 7.435714285714286,
"grad_norm": 2.441620349884033,
"learning_rate": 1.3296296296296295e-05,
"loss": 0.2451,
"step": 10410
},
{
"epoch": 7.442857142857143,
"grad_norm": 0.7054124474525452,
"learning_rate": 1.325925925925926e-05,
"loss": 0.1862,
"step": 10420
},
{
"epoch": 7.45,
"grad_norm": 1.6281330585479736,
"learning_rate": 1.3222222222222221e-05,
"loss": 0.1714,
"step": 10430
},
{
"epoch": 7.457142857142857,
"grad_norm": 1.738685965538025,
"learning_rate": 1.3185185185185186e-05,
"loss": 0.2052,
"step": 10440
},
{
"epoch": 7.464285714285714,
"grad_norm": 1.9982494115829468,
"learning_rate": 1.3148148148148148e-05,
"loss": 0.2964,
"step": 10450
},
{
"epoch": 7.4714285714285715,
"grad_norm": 1.0081127882003784,
"learning_rate": 1.3111111111111113e-05,
"loss": 0.1956,
"step": 10460
},
{
"epoch": 7.478571428571429,
"grad_norm": 1.5927938222885132,
"learning_rate": 1.3074074074074074e-05,
"loss": 0.236,
"step": 10470
},
{
"epoch": 7.485714285714286,
"grad_norm": 1.7959505319595337,
"learning_rate": 1.3037037037037036e-05,
"loss": 0.2462,
"step": 10480
},
{
"epoch": 7.492857142857143,
"grad_norm": 0.9230768084526062,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.177,
"step": 10490
},
{
"epoch": 7.5,
"grad_norm": 1.8714969158172607,
"learning_rate": 1.2962962962962962e-05,
"loss": 0.2224,
"step": 10500
},
{
"epoch": 7.507142857142857,
"grad_norm": 2.809420585632324,
"learning_rate": 1.2925925925925927e-05,
"loss": 0.2214,
"step": 10510
},
{
"epoch": 7.514285714285714,
"grad_norm": 2.2183682918548584,
"learning_rate": 1.2888888888888889e-05,
"loss": 0.2528,
"step": 10520
},
{
"epoch": 7.521428571428571,
"grad_norm": 2.108675479888916,
"learning_rate": 1.2851851851851854e-05,
"loss": 0.2593,
"step": 10530
},
{
"epoch": 7.5285714285714285,
"grad_norm": 1.9557310342788696,
"learning_rate": 1.2814814814814815e-05,
"loss": 0.1875,
"step": 10540
},
{
"epoch": 7.535714285714286,
"grad_norm": 2.3365464210510254,
"learning_rate": 1.2777777777777777e-05,
"loss": 0.2232,
"step": 10550
},
{
"epoch": 7.542857142857143,
"grad_norm": 1.4713023900985718,
"learning_rate": 1.2740740740740742e-05,
"loss": 0.2592,
"step": 10560
},
{
"epoch": 7.55,
"grad_norm": 2.5241403579711914,
"learning_rate": 1.2703703703703704e-05,
"loss": 0.2632,
"step": 10570
},
{
"epoch": 7.557142857142857,
"grad_norm": 2.726618528366089,
"learning_rate": 1.2666666666666668e-05,
"loss": 0.2068,
"step": 10580
},
{
"epoch": 7.564285714285714,
"grad_norm": 1.2947627305984497,
"learning_rate": 1.262962962962963e-05,
"loss": 0.1846,
"step": 10590
},
{
"epoch": 7.571428571428571,
"grad_norm": 1.4739402532577515,
"learning_rate": 1.2592592592592592e-05,
"loss": 0.1445,
"step": 10600
},
{
"epoch": 7.578571428571428,
"grad_norm": 1.7607239484786987,
"learning_rate": 1.2555555555555557e-05,
"loss": 0.2087,
"step": 10610
},
{
"epoch": 7.585714285714285,
"grad_norm": 1.510556697845459,
"learning_rate": 1.2518518518518518e-05,
"loss": 0.2356,
"step": 10620
},
{
"epoch": 7.5928571428571425,
"grad_norm": 1.4189872741699219,
"learning_rate": 1.2481481481481481e-05,
"loss": 0.2343,
"step": 10630
},
{
"epoch": 7.6,
"grad_norm": 3.209477424621582,
"learning_rate": 1.2444444444444445e-05,
"loss": 0.2131,
"step": 10640
},
{
"epoch": 7.607142857142857,
"grad_norm": 2.026301145553589,
"learning_rate": 1.2407407407407408e-05,
"loss": 0.3643,
"step": 10650
},
{
"epoch": 7.614285714285714,
"grad_norm": 2.355459451675415,
"learning_rate": 1.2370370370370371e-05,
"loss": 0.2382,
"step": 10660
},
{
"epoch": 7.621428571428572,
"grad_norm": 1.6867364645004272,
"learning_rate": 1.2333333333333334e-05,
"loss": 0.1886,
"step": 10670
},
{
"epoch": 7.628571428571428,
"grad_norm": 1.863373041152954,
"learning_rate": 1.2296296296296298e-05,
"loss": 0.2842,
"step": 10680
},
{
"epoch": 7.635714285714286,
"grad_norm": 1.4037106037139893,
"learning_rate": 1.225925925925926e-05,
"loss": 0.297,
"step": 10690
},
{
"epoch": 7.642857142857143,
"grad_norm": 1.2220287322998047,
"learning_rate": 1.2222222222222222e-05,
"loss": 0.2126,
"step": 10700
},
{
"epoch": 7.65,
"grad_norm": 1.796430230140686,
"learning_rate": 1.2185185185185186e-05,
"loss": 0.2494,
"step": 10710
},
{
"epoch": 7.6571428571428575,
"grad_norm": 2.7764432430267334,
"learning_rate": 1.2148148148148149e-05,
"loss": 0.3102,
"step": 10720
},
{
"epoch": 7.664285714285715,
"grad_norm": 2.2875261306762695,
"learning_rate": 1.2111111111111112e-05,
"loss": 0.2171,
"step": 10730
},
{
"epoch": 7.671428571428572,
"grad_norm": 1.3017419576644897,
"learning_rate": 1.2074074074074075e-05,
"loss": 0.2095,
"step": 10740
},
{
"epoch": 7.678571428571429,
"grad_norm": 1.5423152446746826,
"learning_rate": 1.2037037037037037e-05,
"loss": 0.3183,
"step": 10750
},
{
"epoch": 7.685714285714286,
"grad_norm": 2.0346460342407227,
"learning_rate": 1.2e-05,
"loss": 0.1669,
"step": 10760
},
{
"epoch": 7.692857142857143,
"grad_norm": 1.178389549255371,
"learning_rate": 1.1962962962962964e-05,
"loss": 0.3195,
"step": 10770
},
{
"epoch": 7.7,
"grad_norm": 1.3902812004089355,
"learning_rate": 1.1925925925925927e-05,
"loss": 0.2166,
"step": 10780
},
{
"epoch": 7.707142857142857,
"grad_norm": 2.8894922733306885,
"learning_rate": 1.188888888888889e-05,
"loss": 0.2216,
"step": 10790
},
{
"epoch": 7.714285714285714,
"grad_norm": 2.77864670753479,
"learning_rate": 1.1851851851851853e-05,
"loss": 0.224,
"step": 10800
},
{
"epoch": 7.7214285714285715,
"grad_norm": 1.370814323425293,
"learning_rate": 1.1814814814814817e-05,
"loss": 0.2277,
"step": 10810
},
{
"epoch": 7.728571428571429,
"grad_norm": 1.3543068170547485,
"learning_rate": 1.1777777777777778e-05,
"loss": 0.19,
"step": 10820
},
{
"epoch": 7.735714285714286,
"grad_norm": 2.4707486629486084,
"learning_rate": 1.1740740740740741e-05,
"loss": 0.1951,
"step": 10830
},
{
"epoch": 7.742857142857143,
"grad_norm": 2.284876823425293,
"learning_rate": 1.1703703703703705e-05,
"loss": 0.2206,
"step": 10840
},
{
"epoch": 7.75,
"grad_norm": 1.1018098592758179,
"learning_rate": 1.1666666666666668e-05,
"loss": 0.1386,
"step": 10850
},
{
"epoch": 7.757142857142857,
"grad_norm": 1.9555597305297852,
"learning_rate": 1.1629629629629631e-05,
"loss": 0.1645,
"step": 10860
},
{
"epoch": 7.764285714285714,
"grad_norm": 1.1327388286590576,
"learning_rate": 1.1592592592592594e-05,
"loss": 0.1654,
"step": 10870
},
{
"epoch": 7.771428571428571,
"grad_norm": 1.6210479736328125,
"learning_rate": 1.1555555555555556e-05,
"loss": 0.1937,
"step": 10880
},
{
"epoch": 7.7785714285714285,
"grad_norm": 1.7142146825790405,
"learning_rate": 1.151851851851852e-05,
"loss": 0.1722,
"step": 10890
},
{
"epoch": 7.785714285714286,
"grad_norm": 3.529614210128784,
"learning_rate": 1.1481481481481482e-05,
"loss": 0.2414,
"step": 10900
},
{
"epoch": 7.792857142857143,
"grad_norm": 1.6331572532653809,
"learning_rate": 1.1444444444444446e-05,
"loss": 0.1844,
"step": 10910
},
{
"epoch": 7.8,
"grad_norm": 2.7431063652038574,
"learning_rate": 1.1407407407407409e-05,
"loss": 0.2745,
"step": 10920
},
{
"epoch": 7.807142857142857,
"grad_norm": 2.882291316986084,
"learning_rate": 1.1370370370370372e-05,
"loss": 0.2353,
"step": 10930
},
{
"epoch": 7.814285714285714,
"grad_norm": 2.3573696613311768,
"learning_rate": 1.1333333333333334e-05,
"loss": 0.2047,
"step": 10940
},
{
"epoch": 7.821428571428571,
"grad_norm": 1.370251178741455,
"learning_rate": 1.1296296296296297e-05,
"loss": 0.1855,
"step": 10950
},
{
"epoch": 7.828571428571428,
"grad_norm": 1.1445660591125488,
"learning_rate": 1.125925925925926e-05,
"loss": 0.2316,
"step": 10960
},
{
"epoch": 7.835714285714285,
"grad_norm": 2.094175100326538,
"learning_rate": 1.1222222222222224e-05,
"loss": 0.477,
"step": 10970
},
{
"epoch": 7.8428571428571425,
"grad_norm": 2.1148130893707275,
"learning_rate": 1.1185185185185187e-05,
"loss": 0.2581,
"step": 10980
},
{
"epoch": 7.85,
"grad_norm": 1.89934241771698,
"learning_rate": 1.1148148148148148e-05,
"loss": 0.2908,
"step": 10990
},
{
"epoch": 7.857142857142857,
"grad_norm": 1.6785616874694824,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.2222,
"step": 11000
},
{
"epoch": 7.857142857142857,
"eval_loss": 0.34114697575569153,
"eval_rouge1": 0.91,
"eval_rouge2": 0.8517,
"eval_rougeL": 0.9073,
"eval_runtime": 122.2351,
"eval_samples_per_second": 11.453,
"eval_steps_per_second": 5.727,
"step": 11000
},
{
"epoch": 7.864285714285714,
"grad_norm": 2.335857629776001,
"learning_rate": 1.1074074074074075e-05,
"loss": 0.262,
"step": 11010
},
{
"epoch": 7.871428571428572,
"grad_norm": 1.9699100255966187,
"learning_rate": 1.1037037037037038e-05,
"loss": 0.2056,
"step": 11020
},
{
"epoch": 7.878571428571428,
"grad_norm": 0.8576107025146484,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.1929,
"step": 11030
},
{
"epoch": 7.885714285714286,
"grad_norm": 0.7365075945854187,
"learning_rate": 1.0962962962962963e-05,
"loss": 0.2494,
"step": 11040
},
{
"epoch": 7.892857142857143,
"grad_norm": 2.7551143169403076,
"learning_rate": 1.0925925925925926e-05,
"loss": 0.3239,
"step": 11050
},
{
"epoch": 7.9,
"grad_norm": 1.1980743408203125,
"learning_rate": 1.088888888888889e-05,
"loss": 0.1836,
"step": 11060
},
{
"epoch": 7.9071428571428575,
"grad_norm": 1.525614619255066,
"learning_rate": 1.0851851851851853e-05,
"loss": 0.3089,
"step": 11070
},
{
"epoch": 7.914285714285715,
"grad_norm": 2.0727596282958984,
"learning_rate": 1.0814814814814814e-05,
"loss": 0.1867,
"step": 11080
},
{
"epoch": 7.921428571428572,
"grad_norm": 1.242550015449524,
"learning_rate": 1.0777777777777778e-05,
"loss": 0.222,
"step": 11090
},
{
"epoch": 7.928571428571429,
"grad_norm": 1.3312640190124512,
"learning_rate": 1.074074074074074e-05,
"loss": 0.3334,
"step": 11100
},
{
"epoch": 7.935714285714286,
"grad_norm": 1.4483474493026733,
"learning_rate": 1.0703703703703704e-05,
"loss": 0.2966,
"step": 11110
},
{
"epoch": 7.942857142857143,
"grad_norm": 1.5403432846069336,
"learning_rate": 1.0666666666666667e-05,
"loss": 0.2591,
"step": 11120
},
{
"epoch": 7.95,
"grad_norm": 1.6620547771453857,
"learning_rate": 1.0629629629629629e-05,
"loss": 0.2216,
"step": 11130
},
{
"epoch": 7.957142857142857,
"grad_norm": 1.2060527801513672,
"learning_rate": 1.0592592592592592e-05,
"loss": 0.2403,
"step": 11140
},
{
"epoch": 7.964285714285714,
"grad_norm": 1.4476624727249146,
"learning_rate": 1.0555555555555555e-05,
"loss": 0.2739,
"step": 11150
},
{
"epoch": 7.9714285714285715,
"grad_norm": 2.2425661087036133,
"learning_rate": 1.0518518518518519e-05,
"loss": 0.2981,
"step": 11160
},
{
"epoch": 7.978571428571429,
"grad_norm": 2.2100632190704346,
"learning_rate": 1.0481481481481482e-05,
"loss": 0.2178,
"step": 11170
},
{
"epoch": 7.985714285714286,
"grad_norm": 1.080759882926941,
"learning_rate": 1.0444444444444445e-05,
"loss": 0.2947,
"step": 11180
},
{
"epoch": 7.992857142857143,
"grad_norm": 2.9972569942474365,
"learning_rate": 1.0407407407407407e-05,
"loss": 0.203,
"step": 11190
},
{
"epoch": 8.0,
"grad_norm": 1.7628710269927979,
"learning_rate": 1.037037037037037e-05,
"loss": 0.308,
"step": 11200
},
{
"epoch": 8.007142857142858,
"grad_norm": 1.569732666015625,
"learning_rate": 1.0333333333333333e-05,
"loss": 0.2003,
"step": 11210
},
{
"epoch": 8.014285714285714,
"grad_norm": 0.8212767839431763,
"learning_rate": 1.0296296296296296e-05,
"loss": 0.1957,
"step": 11220
},
{
"epoch": 8.021428571428572,
"grad_norm": 1.7055811882019043,
"learning_rate": 1.025925925925926e-05,
"loss": 0.2274,
"step": 11230
},
{
"epoch": 8.028571428571428,
"grad_norm": 1.6140356063842773,
"learning_rate": 1.0222222222222223e-05,
"loss": 0.2182,
"step": 11240
},
{
"epoch": 8.035714285714286,
"grad_norm": 1.6154979467391968,
"learning_rate": 1.0185185185185185e-05,
"loss": 0.1217,
"step": 11250
},
{
"epoch": 8.042857142857143,
"grad_norm": 2.4283053874969482,
"learning_rate": 1.0148148148148148e-05,
"loss": 0.2687,
"step": 11260
},
{
"epoch": 8.05,
"grad_norm": 2.201909065246582,
"learning_rate": 1.0111111111111111e-05,
"loss": 0.2836,
"step": 11270
},
{
"epoch": 8.057142857142857,
"grad_norm": 1.902273178100586,
"learning_rate": 1.0074074074074074e-05,
"loss": 0.2076,
"step": 11280
},
{
"epoch": 8.064285714285715,
"grad_norm": 1.7851389646530151,
"learning_rate": 1.0037037037037038e-05,
"loss": 0.2045,
"step": 11290
},
{
"epoch": 8.071428571428571,
"grad_norm": 1.1988000869750977,
"learning_rate": 1e-05,
"loss": 0.1832,
"step": 11300
},
{
"epoch": 8.07857142857143,
"grad_norm": 0.6530731320381165,
"learning_rate": 9.962962962962962e-06,
"loss": 0.1788,
"step": 11310
},
{
"epoch": 8.085714285714285,
"grad_norm": 1.5712918043136597,
"learning_rate": 9.925925925925926e-06,
"loss": 0.1601,
"step": 11320
},
{
"epoch": 8.092857142857143,
"grad_norm": 1.3664653301239014,
"learning_rate": 9.888888888888889e-06,
"loss": 0.2745,
"step": 11330
},
{
"epoch": 8.1,
"grad_norm": 1.084404706954956,
"learning_rate": 9.851851851851852e-06,
"loss": 0.2595,
"step": 11340
},
{
"epoch": 8.107142857142858,
"grad_norm": 1.9523823261260986,
"learning_rate": 9.814814814814815e-06,
"loss": 0.2105,
"step": 11350
},
{
"epoch": 8.114285714285714,
"grad_norm": 1.2386913299560547,
"learning_rate": 9.777777777777779e-06,
"loss": 0.1199,
"step": 11360
},
{
"epoch": 8.121428571428572,
"grad_norm": 1.6026146411895752,
"learning_rate": 9.74074074074074e-06,
"loss": 0.2419,
"step": 11370
},
{
"epoch": 8.128571428571428,
"grad_norm": 1.3624472618103027,
"learning_rate": 9.703703703703703e-06,
"loss": 0.2094,
"step": 11380
},
{
"epoch": 8.135714285714286,
"grad_norm": 1.7777026891708374,
"learning_rate": 9.666666666666667e-06,
"loss": 0.2033,
"step": 11390
},
{
"epoch": 8.142857142857142,
"grad_norm": 1.5583858489990234,
"learning_rate": 9.62962962962963e-06,
"loss": 0.2229,
"step": 11400
},
{
"epoch": 8.15,
"grad_norm": 0.7537804841995239,
"learning_rate": 9.592592592592593e-06,
"loss": 0.2201,
"step": 11410
},
{
"epoch": 8.157142857142857,
"grad_norm": 1.3313623666763306,
"learning_rate": 9.555555555555556e-06,
"loss": 0.1802,
"step": 11420
},
{
"epoch": 8.164285714285715,
"grad_norm": 2.136382818222046,
"learning_rate": 9.51851851851852e-06,
"loss": 0.1686,
"step": 11430
},
{
"epoch": 8.17142857142857,
"grad_norm": 1.4156885147094727,
"learning_rate": 9.481481481481481e-06,
"loss": 0.2291,
"step": 11440
},
{
"epoch": 8.178571428571429,
"grad_norm": 1.5296056270599365,
"learning_rate": 9.444444444444445e-06,
"loss": 0.2476,
"step": 11450
},
{
"epoch": 8.185714285714285,
"grad_norm": 1.5632902383804321,
"learning_rate": 9.407407407407408e-06,
"loss": 0.2304,
"step": 11460
},
{
"epoch": 8.192857142857143,
"grad_norm": 0.9542272686958313,
"learning_rate": 9.370370370370371e-06,
"loss": 0.1693,
"step": 11470
},
{
"epoch": 8.2,
"grad_norm": 3.356255292892456,
"learning_rate": 9.333333333333334e-06,
"loss": 0.3996,
"step": 11480
},
{
"epoch": 8.207142857142857,
"grad_norm": 1.6759045124053955,
"learning_rate": 9.296296296296298e-06,
"loss": 0.2134,
"step": 11490
},
{
"epoch": 8.214285714285714,
"grad_norm": 1.2791472673416138,
"learning_rate": 9.259259259259259e-06,
"loss": 0.248,
"step": 11500
},
{
"epoch": 8.221428571428572,
"grad_norm": 1.07367742061615,
"learning_rate": 9.222222222222222e-06,
"loss": 0.1731,
"step": 11510
},
{
"epoch": 8.228571428571428,
"grad_norm": 0.5693773627281189,
"learning_rate": 9.185185185185186e-06,
"loss": 0.2758,
"step": 11520
},
{
"epoch": 8.235714285714286,
"grad_norm": 1.7820035219192505,
"learning_rate": 9.148148148148149e-06,
"loss": 0.2147,
"step": 11530
},
{
"epoch": 8.242857142857142,
"grad_norm": 1.6593891382217407,
"learning_rate": 9.111111111111112e-06,
"loss": 0.2481,
"step": 11540
},
{
"epoch": 8.25,
"grad_norm": 1.4112298488616943,
"learning_rate": 9.074074074074075e-06,
"loss": 0.3307,
"step": 11550
},
{
"epoch": 8.257142857142856,
"grad_norm": 1.0225239992141724,
"learning_rate": 9.037037037037037e-06,
"loss": 0.1092,
"step": 11560
},
{
"epoch": 8.264285714285714,
"grad_norm": 1.4713934659957886,
"learning_rate": 9e-06,
"loss": 0.2332,
"step": 11570
},
{
"epoch": 8.271428571428572,
"grad_norm": 1.6922743320465088,
"learning_rate": 8.962962962962963e-06,
"loss": 0.2106,
"step": 11580
},
{
"epoch": 8.278571428571428,
"grad_norm": 2.4201695919036865,
"learning_rate": 8.925925925925927e-06,
"loss": 0.1746,
"step": 11590
},
{
"epoch": 8.285714285714286,
"grad_norm": 2.3649351596832275,
"learning_rate": 8.88888888888889e-06,
"loss": 0.2204,
"step": 11600
},
{
"epoch": 8.292857142857143,
"grad_norm": 1.0867241621017456,
"learning_rate": 8.851851851851853e-06,
"loss": 0.2246,
"step": 11610
},
{
"epoch": 8.3,
"grad_norm": 1.1103533506393433,
"learning_rate": 8.814814814814815e-06,
"loss": 0.2952,
"step": 11620
},
{
"epoch": 8.307142857142857,
"grad_norm": 1.9086233377456665,
"learning_rate": 8.777777777777778e-06,
"loss": 0.1627,
"step": 11630
},
{
"epoch": 8.314285714285715,
"grad_norm": 1.5733546018600464,
"learning_rate": 8.740740740740741e-06,
"loss": 0.2493,
"step": 11640
},
{
"epoch": 8.321428571428571,
"grad_norm": 1.514758586883545,
"learning_rate": 8.703703703703705e-06,
"loss": 0.1708,
"step": 11650
},
{
"epoch": 8.32857142857143,
"grad_norm": 4.691562175750732,
"learning_rate": 8.666666666666668e-06,
"loss": 0.2622,
"step": 11660
},
{
"epoch": 8.335714285714285,
"grad_norm": 1.0987350940704346,
"learning_rate": 8.629629629629631e-06,
"loss": 0.1291,
"step": 11670
},
{
"epoch": 8.342857142857143,
"grad_norm": 1.4016727209091187,
"learning_rate": 8.592592592592593e-06,
"loss": 0.2075,
"step": 11680
},
{
"epoch": 8.35,
"grad_norm": 0.69717937707901,
"learning_rate": 8.555555555555556e-06,
"loss": 0.1609,
"step": 11690
},
{
"epoch": 8.357142857142858,
"grad_norm": 2.023461103439331,
"learning_rate": 8.518518518518519e-06,
"loss": 0.235,
"step": 11700
},
{
"epoch": 8.364285714285714,
"grad_norm": 2.0078303813934326,
"learning_rate": 8.481481481481482e-06,
"loss": 0.2081,
"step": 11710
},
{
"epoch": 8.371428571428572,
"grad_norm": 1.4724724292755127,
"learning_rate": 8.444444444444446e-06,
"loss": 0.2148,
"step": 11720
},
{
"epoch": 8.378571428571428,
"grad_norm": 2.9564125537872314,
"learning_rate": 8.407407407407409e-06,
"loss": 0.2295,
"step": 11730
},
{
"epoch": 8.385714285714286,
"grad_norm": 1.4059520959854126,
"learning_rate": 8.37037037037037e-06,
"loss": 0.1398,
"step": 11740
},
{
"epoch": 8.392857142857142,
"grad_norm": 2.894953966140747,
"learning_rate": 8.333333333333334e-06,
"loss": 0.2699,
"step": 11750
},
{
"epoch": 8.4,
"grad_norm": 2.5017454624176025,
"learning_rate": 8.296296296296297e-06,
"loss": 0.2322,
"step": 11760
},
{
"epoch": 8.407142857142857,
"grad_norm": 0.9503372311592102,
"learning_rate": 8.25925925925926e-06,
"loss": 0.1356,
"step": 11770
},
{
"epoch": 8.414285714285715,
"grad_norm": 1.63711416721344,
"learning_rate": 8.222222222222223e-06,
"loss": 0.1929,
"step": 11780
},
{
"epoch": 8.42142857142857,
"grad_norm": 0.5972274541854858,
"learning_rate": 8.185185185185187e-06,
"loss": 0.2533,
"step": 11790
},
{
"epoch": 8.428571428571429,
"grad_norm": 1.0893709659576416,
"learning_rate": 8.14814814814815e-06,
"loss": 0.2089,
"step": 11800
},
{
"epoch": 8.435714285714285,
"grad_norm": 1.5523369312286377,
"learning_rate": 8.111111111111112e-06,
"loss": 0.1589,
"step": 11810
},
{
"epoch": 8.442857142857143,
"grad_norm": 1.5510472059249878,
"learning_rate": 8.074074074074075e-06,
"loss": 0.2162,
"step": 11820
},
{
"epoch": 8.45,
"grad_norm": 2.1272058486938477,
"learning_rate": 8.037037037037038e-06,
"loss": 0.2172,
"step": 11830
},
{
"epoch": 8.457142857142857,
"grad_norm": 2.2862300872802734,
"learning_rate": 8.000000000000001e-06,
"loss": 0.2341,
"step": 11840
},
{
"epoch": 8.464285714285714,
"grad_norm": 1.126405954360962,
"learning_rate": 7.962962962962963e-06,
"loss": 0.1474,
"step": 11850
},
{
"epoch": 8.471428571428572,
"grad_norm": 2.000462293624878,
"learning_rate": 7.925925925925926e-06,
"loss": 0.2948,
"step": 11860
},
{
"epoch": 8.478571428571428,
"grad_norm": 1.4239530563354492,
"learning_rate": 7.88888888888889e-06,
"loss": 0.2267,
"step": 11870
},
{
"epoch": 8.485714285714286,
"grad_norm": 2.479355812072754,
"learning_rate": 7.851851851851853e-06,
"loss": 0.1857,
"step": 11880
},
{
"epoch": 8.492857142857144,
"grad_norm": 1.9741036891937256,
"learning_rate": 7.814814814814816e-06,
"loss": 0.2049,
"step": 11890
},
{
"epoch": 8.5,
"grad_norm": 1.996099591255188,
"learning_rate": 7.777777777777777e-06,
"loss": 0.2404,
"step": 11900
},
{
"epoch": 8.507142857142856,
"grad_norm": 1.185811161994934,
"learning_rate": 7.74074074074074e-06,
"loss": 0.1476,
"step": 11910
},
{
"epoch": 8.514285714285714,
"grad_norm": 1.7766746282577515,
"learning_rate": 7.703703703703704e-06,
"loss": 0.1483,
"step": 11920
},
{
"epoch": 8.521428571428572,
"grad_norm": 1.0609338283538818,
"learning_rate": 7.666666666666667e-06,
"loss": 0.3414,
"step": 11930
},
{
"epoch": 8.528571428571428,
"grad_norm": 1.352981448173523,
"learning_rate": 7.629629629629629e-06,
"loss": 0.2453,
"step": 11940
},
{
"epoch": 8.535714285714286,
"grad_norm": 2.01450252532959,
"learning_rate": 7.592592592592593e-06,
"loss": 0.0983,
"step": 11950
},
{
"epoch": 8.542857142857143,
"grad_norm": 0.9895955920219421,
"learning_rate": 7.555555555555556e-06,
"loss": 0.2821,
"step": 11960
},
{
"epoch": 8.55,
"grad_norm": 2.1095712184906006,
"learning_rate": 7.518518518518519e-06,
"loss": 0.2147,
"step": 11970
},
{
"epoch": 8.557142857142857,
"grad_norm": 1.3148187398910522,
"learning_rate": 7.481481481481483e-06,
"loss": 0.1834,
"step": 11980
},
{
"epoch": 8.564285714285715,
"grad_norm": 2.1209769248962402,
"learning_rate": 7.444444444444444e-06,
"loss": 0.2369,
"step": 11990
},
{
"epoch": 8.571428571428571,
"grad_norm": 2.559124708175659,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.3642,
"step": 12000
},
{
"epoch": 8.571428571428571,
"eval_loss": 0.3329981863498688,
"eval_rouge1": 0.9114,
"eval_rouge2": 0.8538,
"eval_rougeL": 0.9087,
"eval_runtime": 122.2883,
"eval_samples_per_second": 11.448,
"eval_steps_per_second": 5.724,
"step": 12000
},
{
"epoch": 8.57857142857143,
"grad_norm": 2.582113027572632,
"learning_rate": 7.370370370370371e-06,
"loss": 0.3589,
"step": 12010
},
{
"epoch": 8.585714285714285,
"grad_norm": 1.1606924533843994,
"learning_rate": 7.333333333333334e-06,
"loss": 0.2036,
"step": 12020
},
{
"epoch": 8.592857142857143,
"grad_norm": 1.990123987197876,
"learning_rate": 7.296296296296297e-06,
"loss": 0.2347,
"step": 12030
},
{
"epoch": 8.6,
"grad_norm": 1.3593547344207764,
"learning_rate": 7.2592592592592605e-06,
"loss": 0.1475,
"step": 12040
},
{
"epoch": 8.607142857142858,
"grad_norm": 0.7821537256240845,
"learning_rate": 7.222222222222222e-06,
"loss": 0.2889,
"step": 12050
},
{
"epoch": 8.614285714285714,
"grad_norm": 2.2297816276550293,
"learning_rate": 7.185185185185185e-06,
"loss": 0.2495,
"step": 12060
},
{
"epoch": 8.621428571428572,
"grad_norm": 1.9694111347198486,
"learning_rate": 7.1481481481481486e-06,
"loss": 0.2182,
"step": 12070
},
{
"epoch": 8.628571428571428,
"grad_norm": 1.3611793518066406,
"learning_rate": 7.111111111111112e-06,
"loss": 0.2116,
"step": 12080
},
{
"epoch": 8.635714285714286,
"grad_norm": 0.9108260869979858,
"learning_rate": 7.074074074074074e-06,
"loss": 0.2229,
"step": 12090
},
{
"epoch": 8.642857142857142,
"grad_norm": 2.577470302581787,
"learning_rate": 7.0370370370370375e-06,
"loss": 0.3132,
"step": 12100
},
{
"epoch": 8.65,
"grad_norm": 0.837846040725708,
"learning_rate": 7.000000000000001e-06,
"loss": 0.1791,
"step": 12110
},
{
"epoch": 8.657142857142857,
"grad_norm": 2.7253611087799072,
"learning_rate": 6.962962962962963e-06,
"loss": 0.1998,
"step": 12120
},
{
"epoch": 8.664285714285715,
"grad_norm": 1.6284639835357666,
"learning_rate": 6.925925925925926e-06,
"loss": 0.2917,
"step": 12130
},
{
"epoch": 8.67142857142857,
"grad_norm": 1.0358765125274658,
"learning_rate": 6.888888888888889e-06,
"loss": 0.2949,
"step": 12140
},
{
"epoch": 8.678571428571429,
"grad_norm": 1.9100443124771118,
"learning_rate": 6.851851851851852e-06,
"loss": 0.1639,
"step": 12150
},
{
"epoch": 8.685714285714285,
"grad_norm": 1.4746047258377075,
"learning_rate": 6.814814814814815e-06,
"loss": 0.1798,
"step": 12160
},
{
"epoch": 8.692857142857143,
"grad_norm": 1.1177834272384644,
"learning_rate": 6.777777777777779e-06,
"loss": 0.2687,
"step": 12170
},
{
"epoch": 8.7,
"grad_norm": 0.8141186833381653,
"learning_rate": 6.74074074074074e-06,
"loss": 0.1627,
"step": 12180
},
{
"epoch": 8.707142857142857,
"grad_norm": 1.4017144441604614,
"learning_rate": 6.703703703703703e-06,
"loss": 0.1938,
"step": 12190
},
{
"epoch": 8.714285714285714,
"grad_norm": 0.7198919653892517,
"learning_rate": 6.666666666666667e-06,
"loss": 0.222,
"step": 12200
},
{
"epoch": 8.721428571428572,
"grad_norm": 1.4436593055725098,
"learning_rate": 6.62962962962963e-06,
"loss": 0.2821,
"step": 12210
},
{
"epoch": 8.728571428571428,
"grad_norm": 1.8222748041152954,
"learning_rate": 6.592592592592593e-06,
"loss": 0.3652,
"step": 12220
},
{
"epoch": 8.735714285714286,
"grad_norm": 1.033396601676941,
"learning_rate": 6.555555555555556e-06,
"loss": 0.1775,
"step": 12230
},
{
"epoch": 8.742857142857144,
"grad_norm": 1.3893368244171143,
"learning_rate": 6.518518518518518e-06,
"loss": 0.3292,
"step": 12240
},
{
"epoch": 8.75,
"grad_norm": 1.7294946908950806,
"learning_rate": 6.481481481481481e-06,
"loss": 0.3136,
"step": 12250
},
{
"epoch": 8.757142857142856,
"grad_norm": 1.5931612253189087,
"learning_rate": 6.4444444444444445e-06,
"loss": 0.2498,
"step": 12260
},
{
"epoch": 8.764285714285714,
"grad_norm": 1.6048915386199951,
"learning_rate": 6.407407407407408e-06,
"loss": 0.2996,
"step": 12270
},
{
"epoch": 8.771428571428572,
"grad_norm": 0.9885932803153992,
"learning_rate": 6.370370370370371e-06,
"loss": 0.1647,
"step": 12280
},
{
"epoch": 8.778571428571428,
"grad_norm": 4.228821277618408,
"learning_rate": 6.333333333333334e-06,
"loss": 0.2533,
"step": 12290
},
{
"epoch": 8.785714285714286,
"grad_norm": 2.150721788406372,
"learning_rate": 6.296296296296296e-06,
"loss": 0.2922,
"step": 12300
},
{
"epoch": 8.792857142857143,
"grad_norm": 2.0247929096221924,
"learning_rate": 6.259259259259259e-06,
"loss": 0.1613,
"step": 12310
},
{
"epoch": 8.8,
"grad_norm": 0.5003380179405212,
"learning_rate": 6.222222222222222e-06,
"loss": 0.1459,
"step": 12320
},
{
"epoch": 8.807142857142857,
"grad_norm": 0.9374644756317139,
"learning_rate": 6.1851851851851856e-06,
"loss": 0.2384,
"step": 12330
},
{
"epoch": 8.814285714285715,
"grad_norm": 1.5784926414489746,
"learning_rate": 6.148148148148149e-06,
"loss": 0.2458,
"step": 12340
},
{
"epoch": 8.821428571428571,
"grad_norm": 1.6696372032165527,
"learning_rate": 6.111111111111111e-06,
"loss": 0.2787,
"step": 12350
},
{
"epoch": 8.82857142857143,
"grad_norm": 1.2524793148040771,
"learning_rate": 6.0740740740740745e-06,
"loss": 0.2445,
"step": 12360
},
{
"epoch": 8.835714285714285,
"grad_norm": 2.176713466644287,
"learning_rate": 6.037037037037038e-06,
"loss": 0.1896,
"step": 12370
},
{
"epoch": 8.842857142857143,
"grad_norm": 1.964752197265625,
"learning_rate": 6e-06,
"loss": 0.261,
"step": 12380
},
{
"epoch": 8.85,
"grad_norm": 2.006638526916504,
"learning_rate": 5.962962962962963e-06,
"loss": 0.2415,
"step": 12390
},
{
"epoch": 8.857142857142858,
"grad_norm": 1.9313900470733643,
"learning_rate": 5.925925925925927e-06,
"loss": 0.3772,
"step": 12400
},
{
"epoch": 8.864285714285714,
"grad_norm": 2.112170934677124,
"learning_rate": 5.888888888888889e-06,
"loss": 0.2396,
"step": 12410
},
{
"epoch": 8.871428571428572,
"grad_norm": 2.7346787452697754,
"learning_rate": 5.851851851851852e-06,
"loss": 0.325,
"step": 12420
},
{
"epoch": 8.878571428571428,
"grad_norm": 3.1768178939819336,
"learning_rate": 5.814814814814816e-06,
"loss": 0.2393,
"step": 12430
},
{
"epoch": 8.885714285714286,
"grad_norm": 2.164926767349243,
"learning_rate": 5.777777777777778e-06,
"loss": 0.2444,
"step": 12440
},
{
"epoch": 8.892857142857142,
"grad_norm": 0.8274826407432556,
"learning_rate": 5.740740740740741e-06,
"loss": 0.2662,
"step": 12450
},
{
"epoch": 8.9,
"grad_norm": 1.024776816368103,
"learning_rate": 5.7037037037037045e-06,
"loss": 0.2059,
"step": 12460
},
{
"epoch": 8.907142857142857,
"grad_norm": 1.1089740991592407,
"learning_rate": 5.666666666666667e-06,
"loss": 0.2529,
"step": 12470
},
{
"epoch": 8.914285714285715,
"grad_norm": 1.3669365644454956,
"learning_rate": 5.62962962962963e-06,
"loss": 0.2432,
"step": 12480
},
{
"epoch": 8.92142857142857,
"grad_norm": 1.9711703062057495,
"learning_rate": 5.592592592592593e-06,
"loss": 0.127,
"step": 12490
},
{
"epoch": 8.928571428571429,
"grad_norm": 1.4796557426452637,
"learning_rate": 5.555555555555556e-06,
"loss": 0.2244,
"step": 12500
},
{
"epoch": 8.935714285714285,
"grad_norm": 2.029303789138794,
"learning_rate": 5.518518518518519e-06,
"loss": 0.2939,
"step": 12510
},
{
"epoch": 8.942857142857143,
"grad_norm": 2.0809414386749268,
"learning_rate": 5.4814814814814815e-06,
"loss": 0.2146,
"step": 12520
},
{
"epoch": 8.95,
"grad_norm": 1.2555458545684814,
"learning_rate": 5.444444444444445e-06,
"loss": 0.2116,
"step": 12530
},
{
"epoch": 8.957142857142857,
"grad_norm": 2.3402106761932373,
"learning_rate": 5.407407407407407e-06,
"loss": 0.1587,
"step": 12540
},
{
"epoch": 8.964285714285714,
"grad_norm": 1.2514257431030273,
"learning_rate": 5.37037037037037e-06,
"loss": 0.1598,
"step": 12550
},
{
"epoch": 8.971428571428572,
"grad_norm": 3.1680376529693604,
"learning_rate": 5.333333333333334e-06,
"loss": 0.2925,
"step": 12560
},
{
"epoch": 8.978571428571428,
"grad_norm": 2.482211112976074,
"learning_rate": 5.296296296296296e-06,
"loss": 0.2152,
"step": 12570
},
{
"epoch": 8.985714285714286,
"grad_norm": 2.3472371101379395,
"learning_rate": 5.259259259259259e-06,
"loss": 0.2079,
"step": 12580
},
{
"epoch": 8.992857142857144,
"grad_norm": 2.52826189994812,
"learning_rate": 5.2222222222222226e-06,
"loss": 0.2124,
"step": 12590
},
{
"epoch": 9.0,
"grad_norm": 1.8535226583480835,
"learning_rate": 5.185185185185185e-06,
"loss": 0.232,
"step": 12600
},
{
"epoch": 9.007142857142858,
"grad_norm": 2.8780601024627686,
"learning_rate": 5.148148148148148e-06,
"loss": 0.3111,
"step": 12610
},
{
"epoch": 9.014285714285714,
"grad_norm": 1.5909632444381714,
"learning_rate": 5.1111111111111115e-06,
"loss": 0.2788,
"step": 12620
},
{
"epoch": 9.021428571428572,
"grad_norm": 1.7076550722122192,
"learning_rate": 5.074074074074074e-06,
"loss": 0.2575,
"step": 12630
},
{
"epoch": 9.028571428571428,
"grad_norm": 2.8672637939453125,
"learning_rate": 5.037037037037037e-06,
"loss": 0.2228,
"step": 12640
},
{
"epoch": 9.035714285714286,
"grad_norm": 1.618055820465088,
"learning_rate": 5e-06,
"loss": 0.1157,
"step": 12650
},
{
"epoch": 9.042857142857143,
"grad_norm": 1.407122015953064,
"learning_rate": 4.962962962962963e-06,
"loss": 0.2321,
"step": 12660
},
{
"epoch": 9.05,
"grad_norm": 1.6875501871109009,
"learning_rate": 4.925925925925926e-06,
"loss": 0.2938,
"step": 12670
},
{
"epoch": 9.057142857142857,
"grad_norm": 1.0872751474380493,
"learning_rate": 4.888888888888889e-06,
"loss": 0.181,
"step": 12680
},
{
"epoch": 9.064285714285715,
"grad_norm": 1.689308524131775,
"learning_rate": 4.851851851851852e-06,
"loss": 0.1147,
"step": 12690
},
{
"epoch": 9.071428571428571,
"grad_norm": 2.5880138874053955,
"learning_rate": 4.814814814814815e-06,
"loss": 0.2757,
"step": 12700
},
{
"epoch": 9.07857142857143,
"grad_norm": 2.9612958431243896,
"learning_rate": 4.777777777777778e-06,
"loss": 0.2847,
"step": 12710
},
{
"epoch": 9.085714285714285,
"grad_norm": 1.7059327363967896,
"learning_rate": 4.740740740740741e-06,
"loss": 0.195,
"step": 12720
},
{
"epoch": 9.092857142857143,
"grad_norm": 1.702331304550171,
"learning_rate": 4.703703703703704e-06,
"loss": 0.2408,
"step": 12730
},
{
"epoch": 9.1,
"grad_norm": 1.814587950706482,
"learning_rate": 4.666666666666667e-06,
"loss": 0.3085,
"step": 12740
},
{
"epoch": 9.107142857142858,
"grad_norm": 2.863785743713379,
"learning_rate": 4.6296296296296296e-06,
"loss": 0.2667,
"step": 12750
},
{
"epoch": 9.114285714285714,
"grad_norm": 1.6600861549377441,
"learning_rate": 4.592592592592593e-06,
"loss": 0.1376,
"step": 12760
},
{
"epoch": 9.121428571428572,
"grad_norm": 1.3329426050186157,
"learning_rate": 4.555555555555556e-06,
"loss": 0.2547,
"step": 12770
},
{
"epoch": 9.128571428571428,
"grad_norm": 4.032371997833252,
"learning_rate": 4.5185185185185185e-06,
"loss": 0.4649,
"step": 12780
},
{
"epoch": 9.135714285714286,
"grad_norm": 2.709066390991211,
"learning_rate": 4.481481481481482e-06,
"loss": 0.2336,
"step": 12790
},
{
"epoch": 9.142857142857142,
"grad_norm": 1.063931941986084,
"learning_rate": 4.444444444444445e-06,
"loss": 0.1731,
"step": 12800
},
{
"epoch": 9.15,
"grad_norm": 1.3110073804855347,
"learning_rate": 4.407407407407407e-06,
"loss": 0.1381,
"step": 12810
},
{
"epoch": 9.157142857142857,
"grad_norm": 1.9574129581451416,
"learning_rate": 4.370370370370371e-06,
"loss": 0.1958,
"step": 12820
},
{
"epoch": 9.164285714285715,
"grad_norm": 2.5129504203796387,
"learning_rate": 4.333333333333334e-06,
"loss": 0.2162,
"step": 12830
},
{
"epoch": 9.17142857142857,
"grad_norm": 1.842850923538208,
"learning_rate": 4.296296296296296e-06,
"loss": 0.2746,
"step": 12840
},
{
"epoch": 9.178571428571429,
"grad_norm": 1.1451313495635986,
"learning_rate": 4.2592592592592596e-06,
"loss": 0.2442,
"step": 12850
},
{
"epoch": 9.185714285714285,
"grad_norm": 2.0765175819396973,
"learning_rate": 4.222222222222223e-06,
"loss": 0.1531,
"step": 12860
},
{
"epoch": 9.192857142857143,
"grad_norm": 0.9250321984291077,
"learning_rate": 4.185185185185185e-06,
"loss": 0.2876,
"step": 12870
},
{
"epoch": 9.2,
"grad_norm": 1.5332380533218384,
"learning_rate": 4.1481481481481485e-06,
"loss": 0.2255,
"step": 12880
},
{
"epoch": 9.207142857142857,
"grad_norm": 3.1344316005706787,
"learning_rate": 4.111111111111112e-06,
"loss": 0.2654,
"step": 12890
},
{
"epoch": 9.214285714285714,
"grad_norm": 1.4092166423797607,
"learning_rate": 4.074074074074075e-06,
"loss": 0.2358,
"step": 12900
},
{
"epoch": 9.221428571428572,
"grad_norm": 1.517716646194458,
"learning_rate": 4.037037037037037e-06,
"loss": 0.2455,
"step": 12910
},
{
"epoch": 9.228571428571428,
"grad_norm": 0.7355996966362,
"learning_rate": 4.000000000000001e-06,
"loss": 0.1761,
"step": 12920
},
{
"epoch": 9.235714285714286,
"grad_norm": 2.077259063720703,
"learning_rate": 3.962962962962963e-06,
"loss": 0.2791,
"step": 12930
},
{
"epoch": 9.242857142857142,
"grad_norm": 1.3175309896469116,
"learning_rate": 3.925925925925926e-06,
"loss": 0.2296,
"step": 12940
},
{
"epoch": 9.25,
"grad_norm": 1.0608943700790405,
"learning_rate": 3.888888888888889e-06,
"loss": 0.1609,
"step": 12950
},
{
"epoch": 9.257142857142856,
"grad_norm": 2.2581288814544678,
"learning_rate": 3.851851851851852e-06,
"loss": 0.2192,
"step": 12960
},
{
"epoch": 9.264285714285714,
"grad_norm": 1.672400951385498,
"learning_rate": 3.814814814814814e-06,
"loss": 0.1548,
"step": 12970
},
{
"epoch": 9.271428571428572,
"grad_norm": 0.7743004560470581,
"learning_rate": 3.777777777777778e-06,
"loss": 0.3168,
"step": 12980
},
{
"epoch": 9.278571428571428,
"grad_norm": 0.9588621854782104,
"learning_rate": 3.7407407407407413e-06,
"loss": 0.1585,
"step": 12990
},
{
"epoch": 9.285714285714286,
"grad_norm": 1.7508875131607056,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.2137,
"step": 13000
},
{
"epoch": 9.285714285714286,
"eval_loss": 0.3308734893798828,
"eval_rouge1": 0.9118,
"eval_rouge2": 0.8545,
"eval_rougeL": 0.909,
"eval_runtime": 122.198,
"eval_samples_per_second": 11.457,
"eval_steps_per_second": 5.728,
"step": 13000
}
],
"logging_steps": 10,
"max_steps": 14000,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.24700797698048e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}