|
{ |
|
"best_metric": 0.25880399, |
|
"best_model_checkpoint": "/workspace/output/molmo-7b-d/v0-20250103-184047/checkpoint-3600", |
|
"epoch": 5.0, |
|
"eval_steps": 200, |
|
"global_step": 3600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.21068719, |
|
"epoch": 0.001388888888888889, |
|
"grad_norm": 87.10760137929194, |
|
"learning_rate": 0.0, |
|
"loss": 4.65820312, |
|
"memory(GiB)": 57.09, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.023892 |
|
}, |
|
{ |
|
"acc": 0.2150097, |
|
"epoch": 0.006944444444444444, |
|
"grad_norm": 69.93924445657471, |
|
"learning_rate": 2.994148005679757e-06, |
|
"loss": 4.37573242, |
|
"memory(GiB)": 130.0, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.094301 |
|
}, |
|
{ |
|
"acc": 0.47089963, |
|
"epoch": 0.013888888888888888, |
|
"grad_norm": 15.718174458508388, |
|
"learning_rate": 4.2836573631282295e-06, |
|
"loss": 2.69121094, |
|
"memory(GiB)": 130.0, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.150516 |
|
}, |
|
{ |
|
"acc": 0.62566862, |
|
"epoch": 0.020833333333333332, |
|
"grad_norm": 11.952512545843422, |
|
"learning_rate": 5.037971981564619e-06, |
|
"loss": 1.37431641, |
|
"memory(GiB)": 130.0, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.188994 |
|
}, |
|
{ |
|
"acc": 0.69504013, |
|
"epoch": 0.027777777777777776, |
|
"grad_norm": 10.664167724305369, |
|
"learning_rate": 5.5731667205767005e-06, |
|
"loss": 0.96235352, |
|
"memory(GiB)": 130.0, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.215114 |
|
}, |
|
{ |
|
"acc": 0.69391584, |
|
"epoch": 0.034722222222222224, |
|
"grad_norm": 15.301457987834718, |
|
"learning_rate": 5.988296011359514e-06, |
|
"loss": 0.91097412, |
|
"memory(GiB)": 133.48, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.230533 |
|
}, |
|
{ |
|
"acc": 0.71258931, |
|
"epoch": 0.041666666666666664, |
|
"grad_norm": 5.236783763280491, |
|
"learning_rate": 6.327481339013091e-06, |
|
"loss": 0.8465271, |
|
"memory(GiB)": 100.81, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.245112 |
|
}, |
|
{ |
|
"acc": 0.71047335, |
|
"epoch": 0.04861111111111111, |
|
"grad_norm": 13.907732912979608, |
|
"learning_rate": 6.614258447352063e-06, |
|
"loss": 0.89771729, |
|
"memory(GiB)": 100.81, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.256837 |
|
}, |
|
{ |
|
"acc": 0.69757376, |
|
"epoch": 0.05555555555555555, |
|
"grad_norm": 4.835405359715248, |
|
"learning_rate": 6.862676078025172e-06, |
|
"loss": 0.85203247, |
|
"memory(GiB)": 100.81, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.263983 |
|
}, |
|
{ |
|
"acc": 0.7098877, |
|
"epoch": 0.0625, |
|
"grad_norm": 12.652916823049557, |
|
"learning_rate": 7.08179595744948e-06, |
|
"loss": 0.84803467, |
|
"memory(GiB)": 100.81, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.272195 |
|
}, |
|
{ |
|
"acc": 0.71628637, |
|
"epoch": 0.06944444444444445, |
|
"grad_norm": 4.0802053942772885, |
|
"learning_rate": 7.2778053688079864e-06, |
|
"loss": 0.79383545, |
|
"memory(GiB)": 100.81, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.279194 |
|
}, |
|
{ |
|
"acc": 0.7037159, |
|
"epoch": 0.0763888888888889, |
|
"grad_norm": 4.652972869912193, |
|
"learning_rate": 7.455117449365667e-06, |
|
"loss": 0.81118164, |
|
"memory(GiB)": 100.81, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.285782 |
|
}, |
|
{ |
|
"acc": 0.72002425, |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 5.532418910043181, |
|
"learning_rate": 7.616990696461561e-06, |
|
"loss": 0.78905029, |
|
"memory(GiB)": 100.81, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.291011 |
|
}, |
|
{ |
|
"acc": 0.70535297, |
|
"epoch": 0.09027777777777778, |
|
"grad_norm": 4.67406422391853, |
|
"learning_rate": 7.765899648896681e-06, |
|
"loss": 0.80931396, |
|
"memory(GiB)": 100.81, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.295571 |
|
}, |
|
{ |
|
"acc": 0.7147357, |
|
"epoch": 0.09722222222222222, |
|
"grad_norm": 3.612299097599364, |
|
"learning_rate": 7.903767804800537e-06, |
|
"loss": 0.80908203, |
|
"memory(GiB)": 131.07, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.300188 |
|
}, |
|
{ |
|
"acc": 0.72479091, |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 4.568791046839639, |
|
"learning_rate": 8.032119987244375e-06, |
|
"loss": 0.76064138, |
|
"memory(GiB)": 131.07, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.303671 |
|
}, |
|
{ |
|
"acc": 0.73130875, |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 5.76867394533742, |
|
"learning_rate": 8.152185435473643e-06, |
|
"loss": 0.74737253, |
|
"memory(GiB)": 131.07, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.306462 |
|
}, |
|
{ |
|
"acc": 0.73129368, |
|
"epoch": 0.11805555555555555, |
|
"grad_norm": 4.88314712919246, |
|
"learning_rate": 8.264969587694988e-06, |
|
"loss": 0.7456665, |
|
"memory(GiB)": 131.07, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.308971 |
|
}, |
|
{ |
|
"acc": 0.71992922, |
|
"epoch": 0.125, |
|
"grad_norm": 3.4318023660327444, |
|
"learning_rate": 8.371305314897952e-06, |
|
"loss": 0.75183105, |
|
"memory(GiB)": 131.07, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.31115 |
|
}, |
|
{ |
|
"acc": 0.74187808, |
|
"epoch": 0.13194444444444445, |
|
"grad_norm": 9.486749512593935, |
|
"learning_rate": 8.471890284028081e-06, |
|
"loss": 0.73335419, |
|
"memory(GiB)": 131.07, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.31205 |
|
}, |
|
{ |
|
"acc": 0.73988142, |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 6.046415731830522, |
|
"learning_rate": 8.567314726256459e-06, |
|
"loss": 0.71626534, |
|
"memory(GiB)": 131.07, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.312061 |
|
}, |
|
{ |
|
"acc": 0.74326582, |
|
"epoch": 0.14583333333333334, |
|
"grad_norm": 6.80033824229626, |
|
"learning_rate": 8.658082423236924e-06, |
|
"loss": 0.70533233, |
|
"memory(GiB)": 131.07, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.314121 |
|
}, |
|
{ |
|
"acc": 0.75279951, |
|
"epoch": 0.1527777777777778, |
|
"grad_norm": 10.065172541901923, |
|
"learning_rate": 8.744626806814138e-06, |
|
"loss": 0.67003098, |
|
"memory(GiB)": 131.07, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.315442 |
|
}, |
|
{ |
|
"acc": 0.77138567, |
|
"epoch": 0.1597222222222222, |
|
"grad_norm": 9.015516536250718, |
|
"learning_rate": 8.827323477013188e-06, |
|
"loss": 0.61771393, |
|
"memory(GiB)": 131.07, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.317097 |
|
}, |
|
{ |
|
"acc": 0.76300774, |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 6.062711667468016, |
|
"learning_rate": 8.906500053910034e-06, |
|
"loss": 0.67458191, |
|
"memory(GiB)": 131.07, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.318466 |
|
}, |
|
{ |
|
"acc": 0.78507404, |
|
"epoch": 0.1736111111111111, |
|
"grad_norm": 10.245281951490513, |
|
"learning_rate": 8.982444017039273e-06, |
|
"loss": 0.61902437, |
|
"memory(GiB)": 131.07, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.319474 |
|
}, |
|
{ |
|
"acc": 0.78214965, |
|
"epoch": 0.18055555555555555, |
|
"grad_norm": 40.92578755154591, |
|
"learning_rate": 9.055409006345152e-06, |
|
"loss": 0.59720135, |
|
"memory(GiB)": 131.07, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.319858 |
|
}, |
|
{ |
|
"acc": 0.78999453, |
|
"epoch": 0.1875, |
|
"grad_norm": 20.862392861347566, |
|
"learning_rate": 9.12561993333434e-06, |
|
"loss": 0.58254638, |
|
"memory(GiB)": 131.07, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.320176 |
|
}, |
|
{ |
|
"acc": 0.78094759, |
|
"epoch": 0.19444444444444445, |
|
"grad_norm": 9.395948807027711, |
|
"learning_rate": 9.193277162249006e-06, |
|
"loss": 0.60503025, |
|
"memory(GiB)": 131.07, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.320895 |
|
}, |
|
{ |
|
"acc": 0.78587742, |
|
"epoch": 0.2013888888888889, |
|
"grad_norm": 13.560643513057176, |
|
"learning_rate": 9.2585599572036e-06, |
|
"loss": 0.58811264, |
|
"memory(GiB)": 131.07, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.321441 |
|
}, |
|
{ |
|
"acc": 0.78708205, |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 17.39551162469508, |
|
"learning_rate": 9.321629344692848e-06, |
|
"loss": 0.56763268, |
|
"memory(GiB)": 131.07, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.321943 |
|
}, |
|
{ |
|
"acc": 0.78839636, |
|
"epoch": 0.2152777777777778, |
|
"grad_norm": 6.0862237740769185, |
|
"learning_rate": 9.382630506560327e-06, |
|
"loss": 0.56501522, |
|
"memory(GiB)": 131.07, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.322849 |
|
}, |
|
{ |
|
"acc": 0.79003353, |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 9.332000809791898, |
|
"learning_rate": 9.441694792922116e-06, |
|
"loss": 0.57846107, |
|
"memory(GiB)": 131.07, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.32378 |
|
}, |
|
{ |
|
"acc": 0.80862875, |
|
"epoch": 0.22916666666666666, |
|
"grad_norm": 7.80508528579547, |
|
"learning_rate": 9.498941425250527e-06, |
|
"loss": 0.53986168, |
|
"memory(GiB)": 131.07, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.324054 |
|
}, |
|
{ |
|
"acc": 0.79791403, |
|
"epoch": 0.2361111111111111, |
|
"grad_norm": 7.346945680337047, |
|
"learning_rate": 9.55447894514346e-06, |
|
"loss": 0.55143967, |
|
"memory(GiB)": 131.07, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.325134 |
|
}, |
|
{ |
|
"acc": 0.79466968, |
|
"epoch": 0.24305555555555555, |
|
"grad_norm": 6.541304671392371, |
|
"learning_rate": 9.60840645303182e-06, |
|
"loss": 0.55263672, |
|
"memory(GiB)": 131.07, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.326197 |
|
}, |
|
{ |
|
"acc": 0.79599619, |
|
"epoch": 0.25, |
|
"grad_norm": 5.061340427236746, |
|
"learning_rate": 9.660814672346424e-06, |
|
"loss": 0.54171772, |
|
"memory(GiB)": 131.07, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.325379 |
|
}, |
|
{ |
|
"acc": 0.80109653, |
|
"epoch": 0.2569444444444444, |
|
"grad_norm": 4.386102069443633, |
|
"learning_rate": 9.711786867849723e-06, |
|
"loss": 0.532901, |
|
"memory(GiB)": 131.07, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.325641 |
|
}, |
|
{ |
|
"acc": 0.788484, |
|
"epoch": 0.2638888888888889, |
|
"grad_norm": 9.386068799809824, |
|
"learning_rate": 9.761399641476552e-06, |
|
"loss": 0.56219482, |
|
"memory(GiB)": 131.07, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.326463 |
|
}, |
|
{ |
|
"acc": 0.78657551, |
|
"epoch": 0.2708333333333333, |
|
"grad_norm": 30.57227532397844, |
|
"learning_rate": 9.809723624781542e-06, |
|
"loss": 0.59577637, |
|
"memory(GiB)": 131.07, |
|
"step": 195, |
|
"train_speed(iter/s)": 0.327403 |
|
}, |
|
{ |
|
"acc": 0.77691326, |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 24.227230729653417, |
|
"learning_rate": 9.856824083704928e-06, |
|
"loss": 0.63096924, |
|
"memory(GiB)": 131.07, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.328235 |
|
}, |
|
{ |
|
"acc": 0.79410772, |
|
"epoch": 0.2847222222222222, |
|
"grad_norm": 12.207454896527581, |
|
"learning_rate": 9.902761448651595e-06, |
|
"loss": 0.55623169, |
|
"memory(GiB)": 131.07, |
|
"step": 205, |
|
"train_speed(iter/s)": 0.329318 |
|
}, |
|
{ |
|
"acc": 0.78385544, |
|
"epoch": 0.2916666666666667, |
|
"grad_norm": 114.41313028354135, |
|
"learning_rate": 9.947591780685397e-06, |
|
"loss": 0.60689435, |
|
"memory(GiB)": 131.07, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.33027 |
|
}, |
|
{ |
|
"acc": 0.78210087, |
|
"epoch": 0.2986111111111111, |
|
"grad_norm": 20.141587824624402, |
|
"learning_rate": 9.99136718286095e-06, |
|
"loss": 0.58652344, |
|
"memory(GiB)": 131.07, |
|
"step": 215, |
|
"train_speed(iter/s)": 0.330942 |
|
}, |
|
{ |
|
"acc": 0.7920527, |
|
"epoch": 0.3055555555555556, |
|
"grad_norm": 8.421958562452112, |
|
"learning_rate": 9.999976563040932e-06, |
|
"loss": 0.55961218, |
|
"memory(GiB)": 131.07, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.331831 |
|
}, |
|
{ |
|
"acc": 0.79507227, |
|
"epoch": 0.3125, |
|
"grad_norm": 118.8243683029417, |
|
"learning_rate": 9.999881350771313e-06, |
|
"loss": 0.54424796, |
|
"memory(GiB)": 131.07, |
|
"step": 225, |
|
"train_speed(iter/s)": 0.332761 |
|
}, |
|
{ |
|
"acc": 0.78946581, |
|
"epoch": 0.3194444444444444, |
|
"grad_norm": 36.81477295658913, |
|
"learning_rate": 9.999712899774951e-06, |
|
"loss": 0.57321162, |
|
"memory(GiB)": 131.07, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.333425 |
|
}, |
|
{ |
|
"acc": 0.78228483, |
|
"epoch": 0.3263888888888889, |
|
"grad_norm": 25.713677437539047, |
|
"learning_rate": 9.999471212519574e-06, |
|
"loss": 0.56825256, |
|
"memory(GiB)": 131.07, |
|
"step": 235, |
|
"train_speed(iter/s)": 0.334216 |
|
}, |
|
{ |
|
"acc": 0.79108438, |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 37.784241249057, |
|
"learning_rate": 9.999156292545797e-06, |
|
"loss": 0.58692875, |
|
"memory(GiB)": 131.07, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.3346 |
|
}, |
|
{ |
|
"acc": 0.7929399, |
|
"epoch": 0.3402777777777778, |
|
"grad_norm": 8.535961270899605, |
|
"learning_rate": 9.998768144467059e-06, |
|
"loss": 0.57177811, |
|
"memory(GiB)": 131.07, |
|
"step": 245, |
|
"train_speed(iter/s)": 0.335122 |
|
}, |
|
{ |
|
"acc": 0.78859062, |
|
"epoch": 0.3472222222222222, |
|
"grad_norm": 10.099044260879502, |
|
"learning_rate": 9.998306773969554e-06, |
|
"loss": 0.55594349, |
|
"memory(GiB)": 131.07, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.33582 |
|
}, |
|
{ |
|
"acc": 0.80772114, |
|
"epoch": 0.3541666666666667, |
|
"grad_norm": 37.10201152587716, |
|
"learning_rate": 9.997772187812157e-06, |
|
"loss": 0.54929962, |
|
"memory(GiB)": 131.07, |
|
"step": 255, |
|
"train_speed(iter/s)": 0.336151 |
|
}, |
|
{ |
|
"acc": 0.80398197, |
|
"epoch": 0.3611111111111111, |
|
"grad_norm": 24.33760888028203, |
|
"learning_rate": 9.997164393826322e-06, |
|
"loss": 0.54323769, |
|
"memory(GiB)": 131.07, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.336382 |
|
}, |
|
{ |
|
"acc": 0.80659904, |
|
"epoch": 0.3680555555555556, |
|
"grad_norm": 20.76174518011458, |
|
"learning_rate": 9.996483400915958e-06, |
|
"loss": 0.53362389, |
|
"memory(GiB)": 131.07, |
|
"step": 265, |
|
"train_speed(iter/s)": 0.336602 |
|
}, |
|
{ |
|
"acc": 0.80532684, |
|
"epoch": 0.375, |
|
"grad_norm": 11.08433867212367, |
|
"learning_rate": 9.995729219057312e-06, |
|
"loss": 0.52986441, |
|
"memory(GiB)": 131.07, |
|
"step": 270, |
|
"train_speed(iter/s)": 0.337078 |
|
}, |
|
{ |
|
"acc": 0.80152454, |
|
"epoch": 0.3819444444444444, |
|
"grad_norm": 10.445600868874815, |
|
"learning_rate": 9.994901859298815e-06, |
|
"loss": 0.5497118, |
|
"memory(GiB)": 131.07, |
|
"step": 275, |
|
"train_speed(iter/s)": 0.337587 |
|
}, |
|
{ |
|
"acc": 0.80949535, |
|
"epoch": 0.3888888888888889, |
|
"grad_norm": 6.916524475949725, |
|
"learning_rate": 9.994001333760923e-06, |
|
"loss": 0.5162343, |
|
"memory(GiB)": 131.07, |
|
"step": 280, |
|
"train_speed(iter/s)": 0.338119 |
|
}, |
|
{ |
|
"acc": 0.81486473, |
|
"epoch": 0.3958333333333333, |
|
"grad_norm": 12.150336722267383, |
|
"learning_rate": 9.993027655635934e-06, |
|
"loss": 0.51019297, |
|
"memory(GiB)": 131.07, |
|
"step": 285, |
|
"train_speed(iter/s)": 0.338392 |
|
}, |
|
{ |
|
"acc": 0.80092564, |
|
"epoch": 0.4027777777777778, |
|
"grad_norm": 7.653355098250675, |
|
"learning_rate": 9.991980839187803e-06, |
|
"loss": 0.52064533, |
|
"memory(GiB)": 131.07, |
|
"step": 290, |
|
"train_speed(iter/s)": 0.338816 |
|
}, |
|
{ |
|
"acc": 0.80846672, |
|
"epoch": 0.4097222222222222, |
|
"grad_norm": 51.23149633705719, |
|
"learning_rate": 9.99086089975193e-06, |
|
"loss": 0.49964476, |
|
"memory(GiB)": 131.07, |
|
"step": 295, |
|
"train_speed(iter/s)": 0.339359 |
|
}, |
|
{ |
|
"acc": 0.81497822, |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 5.848437545743278, |
|
"learning_rate": 9.989667853734933e-06, |
|
"loss": 0.51864281, |
|
"memory(GiB)": 131.07, |
|
"step": 300, |
|
"train_speed(iter/s)": 0.339412 |
|
}, |
|
{ |
|
"acc": 0.81019306, |
|
"epoch": 0.4236111111111111, |
|
"grad_norm": 9.554808379783061, |
|
"learning_rate": 9.988401718614406e-06, |
|
"loss": 0.49421182, |
|
"memory(GiB)": 131.07, |
|
"step": 305, |
|
"train_speed(iter/s)": 0.339728 |
|
}, |
|
{ |
|
"acc": 0.81571865, |
|
"epoch": 0.4305555555555556, |
|
"grad_norm": 3.1593614398505028, |
|
"learning_rate": 9.98706251293867e-06, |
|
"loss": 0.49002447, |
|
"memory(GiB)": 131.07, |
|
"step": 310, |
|
"train_speed(iter/s)": 0.33982 |
|
}, |
|
{ |
|
"acc": 0.8122633, |
|
"epoch": 0.4375, |
|
"grad_norm": 4.285266854421746, |
|
"learning_rate": 9.985650256326495e-06, |
|
"loss": 0.49020014, |
|
"memory(GiB)": 131.07, |
|
"step": 315, |
|
"train_speed(iter/s)": 0.339074 |
|
}, |
|
{ |
|
"acc": 0.79069309, |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 5.341762395729506, |
|
"learning_rate": 9.984164969466818e-06, |
|
"loss": 0.52460327, |
|
"memory(GiB)": 131.07, |
|
"step": 320, |
|
"train_speed(iter/s)": 0.33833 |
|
}, |
|
{ |
|
"acc": 0.80525522, |
|
"epoch": 0.4513888888888889, |
|
"grad_norm": 66.73254513067435, |
|
"learning_rate": 9.982606674118437e-06, |
|
"loss": 0.5414197, |
|
"memory(GiB)": 131.07, |
|
"step": 325, |
|
"train_speed(iter/s)": 0.338259 |
|
}, |
|
{ |
|
"acc": 0.78918257, |
|
"epoch": 0.4583333333333333, |
|
"grad_norm": 6.91220975909936, |
|
"learning_rate": 9.980975393109683e-06, |
|
"loss": 0.54792948, |
|
"memory(GiB)": 131.07, |
|
"step": 330, |
|
"train_speed(iter/s)": 0.338625 |
|
}, |
|
{ |
|
"acc": 0.81037483, |
|
"epoch": 0.4652777777777778, |
|
"grad_norm": 4.168771473436414, |
|
"learning_rate": 9.979271150338108e-06, |
|
"loss": 0.51318879, |
|
"memory(GiB)": 131.07, |
|
"step": 335, |
|
"train_speed(iter/s)": 0.338171 |
|
}, |
|
{ |
|
"acc": 0.81083908, |
|
"epoch": 0.4722222222222222, |
|
"grad_norm": 6.532286933835978, |
|
"learning_rate": 9.977493970770112e-06, |
|
"loss": 0.50585566, |
|
"memory(GiB)": 131.07, |
|
"step": 340, |
|
"train_speed(iter/s)": 0.338229 |
|
}, |
|
{ |
|
"acc": 0.82154369, |
|
"epoch": 0.4791666666666667, |
|
"grad_norm": 7.835610655269442, |
|
"learning_rate": 9.975643880440592e-06, |
|
"loss": 0.45731974, |
|
"memory(GiB)": 131.07, |
|
"step": 345, |
|
"train_speed(iter/s)": 0.338112 |
|
}, |
|
{ |
|
"acc": 0.80105, |
|
"epoch": 0.4861111111111111, |
|
"grad_norm": 4.636723486070957, |
|
"learning_rate": 9.973720906452551e-06, |
|
"loss": 0.50835361, |
|
"memory(GiB)": 131.07, |
|
"step": 350, |
|
"train_speed(iter/s)": 0.338446 |
|
}, |
|
{ |
|
"acc": 0.80463896, |
|
"epoch": 0.4930555555555556, |
|
"grad_norm": 2.7838654689037057, |
|
"learning_rate": 9.97172507697671e-06, |
|
"loss": 0.52188816, |
|
"memory(GiB)": 131.07, |
|
"step": 355, |
|
"train_speed(iter/s)": 0.338437 |
|
}, |
|
{ |
|
"acc": 0.81386986, |
|
"epoch": 0.5, |
|
"grad_norm": 3.344156589788971, |
|
"learning_rate": 9.96965642125109e-06, |
|
"loss": 0.47512083, |
|
"memory(GiB)": 133.41, |
|
"step": 360, |
|
"train_speed(iter/s)": 0.338594 |
|
}, |
|
{ |
|
"acc": 0.80757666, |
|
"epoch": 0.5069444444444444, |
|
"grad_norm": 6.585961659135812, |
|
"learning_rate": 9.967514969580579e-06, |
|
"loss": 0.50586038, |
|
"memory(GiB)": 100.72, |
|
"step": 365, |
|
"train_speed(iter/s)": 0.33889 |
|
}, |
|
{ |
|
"acc": 0.81812487, |
|
"epoch": 0.5138888888888888, |
|
"grad_norm": 3.8878748110567316, |
|
"learning_rate": 9.965300753336498e-06, |
|
"loss": 0.49280443, |
|
"memory(GiB)": 100.72, |
|
"step": 370, |
|
"train_speed(iter/s)": 0.339136 |
|
}, |
|
{ |
|
"acc": 0.81244354, |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 3.4377647556680513, |
|
"learning_rate": 9.96301380495614e-06, |
|
"loss": 0.50686035, |
|
"memory(GiB)": 100.72, |
|
"step": 375, |
|
"train_speed(iter/s)": 0.339232 |
|
}, |
|
{ |
|
"acc": 0.81433783, |
|
"epoch": 0.5277777777777778, |
|
"grad_norm": 4.090164954347545, |
|
"learning_rate": 9.960654157942285e-06, |
|
"loss": 0.46210661, |
|
"memory(GiB)": 101.73, |
|
"step": 380, |
|
"train_speed(iter/s)": 0.339316 |
|
}, |
|
{ |
|
"acc": 0.80798006, |
|
"epoch": 0.5347222222222222, |
|
"grad_norm": 2.544503744487103, |
|
"learning_rate": 9.958221846862717e-06, |
|
"loss": 0.47319975, |
|
"memory(GiB)": 101.73, |
|
"step": 385, |
|
"train_speed(iter/s)": 0.33958 |
|
}, |
|
{ |
|
"acc": 0.81903315, |
|
"epoch": 0.5416666666666666, |
|
"grad_norm": 5.042497046029212, |
|
"learning_rate": 9.95571690734972e-06, |
|
"loss": 0.49428267, |
|
"memory(GiB)": 101.73, |
|
"step": 390, |
|
"train_speed(iter/s)": 0.339733 |
|
}, |
|
{ |
|
"acc": 0.80402279, |
|
"epoch": 0.5486111111111112, |
|
"grad_norm": 4.123141501535671, |
|
"learning_rate": 9.95313937609955e-06, |
|
"loss": 0.54107056, |
|
"memory(GiB)": 101.73, |
|
"step": 395, |
|
"train_speed(iter/s)": 0.339838 |
|
}, |
|
{ |
|
"acc": 0.81687107, |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 14.206970724335013, |
|
"learning_rate": 9.950489290871902e-06, |
|
"loss": 0.47818375, |
|
"memory(GiB)": 101.73, |
|
"step": 400, |
|
"train_speed(iter/s)": 0.339764 |
|
}, |
|
{ |
|
"acc": 0.83043404, |
|
"epoch": 0.5625, |
|
"grad_norm": 13.479652065488521, |
|
"learning_rate": 9.947766690489351e-06, |
|
"loss": 0.45714722, |
|
"memory(GiB)": 101.73, |
|
"step": 405, |
|
"train_speed(iter/s)": 0.33976 |
|
}, |
|
{ |
|
"acc": 0.82783804, |
|
"epoch": 0.5694444444444444, |
|
"grad_norm": 6.994687177716389, |
|
"learning_rate": 9.944971614836791e-06, |
|
"loss": 0.44925947, |
|
"memory(GiB)": 101.73, |
|
"step": 410, |
|
"train_speed(iter/s)": 0.340059 |
|
}, |
|
{ |
|
"acc": 0.82454166, |
|
"epoch": 0.5763888888888888, |
|
"grad_norm": 6.474005259951495, |
|
"learning_rate": 9.942104104860843e-06, |
|
"loss": 0.46208277, |
|
"memory(GiB)": 101.73, |
|
"step": 415, |
|
"train_speed(iter/s)": 0.340149 |
|
}, |
|
{ |
|
"acc": 0.82342644, |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 4.144488937559482, |
|
"learning_rate": 9.93916420256926e-06, |
|
"loss": 0.46902466, |
|
"memory(GiB)": 101.73, |
|
"step": 420, |
|
"train_speed(iter/s)": 0.340079 |
|
}, |
|
{ |
|
"acc": 0.81452484, |
|
"epoch": 0.5902777777777778, |
|
"grad_norm": 4.534407651664684, |
|
"learning_rate": 9.936151951030313e-06, |
|
"loss": 0.47105713, |
|
"memory(GiB)": 101.73, |
|
"step": 425, |
|
"train_speed(iter/s)": 0.340222 |
|
}, |
|
{ |
|
"acc": 0.81685143, |
|
"epoch": 0.5972222222222222, |
|
"grad_norm": 4.2541381378013545, |
|
"learning_rate": 9.933067394372155e-06, |
|
"loss": 0.45794678, |
|
"memory(GiB)": 101.73, |
|
"step": 430, |
|
"train_speed(iter/s)": 0.340113 |
|
}, |
|
{ |
|
"acc": 0.82499523, |
|
"epoch": 0.6041666666666666, |
|
"grad_norm": 4.236882311093862, |
|
"learning_rate": 9.929910577782175e-06, |
|
"loss": 0.44803181, |
|
"memory(GiB)": 101.73, |
|
"step": 435, |
|
"train_speed(iter/s)": 0.340332 |
|
}, |
|
{ |
|
"acc": 0.80982151, |
|
"epoch": 0.6111111111111112, |
|
"grad_norm": 4.6523720622403495, |
|
"learning_rate": 9.926681547506343e-06, |
|
"loss": 0.45672607, |
|
"memory(GiB)": 101.73, |
|
"step": 440, |
|
"train_speed(iter/s)": 0.340214 |
|
}, |
|
{ |
|
"acc": 0.83155994, |
|
"epoch": 0.6180555555555556, |
|
"grad_norm": 3.539856352290064, |
|
"learning_rate": 9.923380350848525e-06, |
|
"loss": 0.43849535, |
|
"memory(GiB)": 101.73, |
|
"step": 445, |
|
"train_speed(iter/s)": 0.340238 |
|
}, |
|
{ |
|
"acc": 0.82845173, |
|
"epoch": 0.625, |
|
"grad_norm": 4.628916992182938, |
|
"learning_rate": 9.920007036169793e-06, |
|
"loss": 0.44665036, |
|
"memory(GiB)": 101.73, |
|
"step": 450, |
|
"train_speed(iter/s)": 0.340465 |
|
}, |
|
{ |
|
"acc": 0.827981, |
|
"epoch": 0.6319444444444444, |
|
"grad_norm": 2.7639289668011635, |
|
"learning_rate": 9.916561652887715e-06, |
|
"loss": 0.44553595, |
|
"memory(GiB)": 101.73, |
|
"step": 455, |
|
"train_speed(iter/s)": 0.340695 |
|
}, |
|
{ |
|
"acc": 0.82418242, |
|
"epoch": 0.6388888888888888, |
|
"grad_norm": 2.5699138622519477, |
|
"learning_rate": 9.913044251475634e-06, |
|
"loss": 0.46772599, |
|
"memory(GiB)": 101.73, |
|
"step": 460, |
|
"train_speed(iter/s)": 0.34074 |
|
}, |
|
{ |
|
"acc": 0.82081413, |
|
"epoch": 0.6458333333333334, |
|
"grad_norm": 3.819421823928973, |
|
"learning_rate": 9.909454883461921e-06, |
|
"loss": 0.4476263, |
|
"memory(GiB)": 101.73, |
|
"step": 465, |
|
"train_speed(iter/s)": 0.340745 |
|
}, |
|
{ |
|
"acc": 0.81621161, |
|
"epoch": 0.6527777777777778, |
|
"grad_norm": 11.434255980801682, |
|
"learning_rate": 9.905793601429239e-06, |
|
"loss": 0.45358963, |
|
"memory(GiB)": 101.73, |
|
"step": 470, |
|
"train_speed(iter/s)": 0.340866 |
|
}, |
|
{ |
|
"acc": 0.80989876, |
|
"epoch": 0.6597222222222222, |
|
"grad_norm": 3.3992412181651006, |
|
"learning_rate": 9.902060459013744e-06, |
|
"loss": 0.47856612, |
|
"memory(GiB)": 101.73, |
|
"step": 475, |
|
"train_speed(iter/s)": 0.341055 |
|
}, |
|
{ |
|
"acc": 0.81908178, |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 4.325601425130281, |
|
"learning_rate": 9.898255510904326e-06, |
|
"loss": 0.44092093, |
|
"memory(GiB)": 101.73, |
|
"step": 480, |
|
"train_speed(iter/s)": 0.341185 |
|
}, |
|
{ |
|
"acc": 0.82690163, |
|
"epoch": 0.6736111111111112, |
|
"grad_norm": 4.011942792509904, |
|
"learning_rate": 9.894378812841793e-06, |
|
"loss": 0.43541508, |
|
"memory(GiB)": 101.73, |
|
"step": 485, |
|
"train_speed(iter/s)": 0.341317 |
|
}, |
|
{ |
|
"acc": 0.83012829, |
|
"epoch": 0.6805555555555556, |
|
"grad_norm": 11.20428097827798, |
|
"learning_rate": 9.890430421618054e-06, |
|
"loss": 0.45354671, |
|
"memory(GiB)": 101.73, |
|
"step": 490, |
|
"train_speed(iter/s)": 0.34136 |
|
}, |
|
{ |
|
"acc": 0.81567917, |
|
"epoch": 0.6875, |
|
"grad_norm": 6.011176841797077, |
|
"learning_rate": 9.886410395075299e-06, |
|
"loss": 0.47600689, |
|
"memory(GiB)": 101.73, |
|
"step": 495, |
|
"train_speed(iter/s)": 0.34137 |
|
}, |
|
{ |
|
"acc": 0.83390331, |
|
"epoch": 0.6944444444444444, |
|
"grad_norm": 2.5421269797009463, |
|
"learning_rate": 9.882318792105139e-06, |
|
"loss": 0.4320755, |
|
"memory(GiB)": 101.73, |
|
"step": 500, |
|
"train_speed(iter/s)": 0.341535 |
|
}, |
|
{ |
|
"acc": 0.82440739, |
|
"epoch": 0.7013888888888888, |
|
"grad_norm": 5.398499470086722, |
|
"learning_rate": 9.878155672647745e-06, |
|
"loss": 0.45568981, |
|
"memory(GiB)": 101.73, |
|
"step": 505, |
|
"train_speed(iter/s)": 0.341835 |
|
}, |
|
{ |
|
"acc": 0.82466736, |
|
"epoch": 0.7083333333333334, |
|
"grad_norm": 2.9253942613017605, |
|
"learning_rate": 9.873921097690983e-06, |
|
"loss": 0.44401636, |
|
"memory(GiB)": 101.73, |
|
"step": 510, |
|
"train_speed(iter/s)": 0.34167 |
|
}, |
|
{ |
|
"acc": 0.83045635, |
|
"epoch": 0.7152777777777778, |
|
"grad_norm": 2.411332274219069, |
|
"learning_rate": 9.8696151292695e-06, |
|
"loss": 0.4418056, |
|
"memory(GiB)": 101.73, |
|
"step": 515, |
|
"train_speed(iter/s)": 0.341759 |
|
}, |
|
{ |
|
"acc": 0.82254047, |
|
"epoch": 0.7222222222222222, |
|
"grad_norm": 5.941833912171894, |
|
"learning_rate": 9.86523783046383e-06, |
|
"loss": 0.45153627, |
|
"memory(GiB)": 101.73, |
|
"step": 520, |
|
"train_speed(iter/s)": 0.341971 |
|
}, |
|
{ |
|
"acc": 0.82107563, |
|
"epoch": 0.7291666666666666, |
|
"grad_norm": 2.8671436433087694, |
|
"learning_rate": 9.860789265399467e-06, |
|
"loss": 0.44395323, |
|
"memory(GiB)": 101.73, |
|
"step": 525, |
|
"train_speed(iter/s)": 0.342045 |
|
}, |
|
{ |
|
"acc": 0.8169796, |
|
"epoch": 0.7361111111111112, |
|
"grad_norm": 3.8842098252434423, |
|
"learning_rate": 9.856269499245921e-06, |
|
"loss": 0.45956945, |
|
"memory(GiB)": 101.73, |
|
"step": 530, |
|
"train_speed(iter/s)": 0.342256 |
|
}, |
|
{ |
|
"acc": 0.83679924, |
|
"epoch": 0.7430555555555556, |
|
"grad_norm": 3.3239182012648243, |
|
"learning_rate": 9.85167859821577e-06, |
|
"loss": 0.43197575, |
|
"memory(GiB)": 101.73, |
|
"step": 535, |
|
"train_speed(iter/s)": 0.34245 |
|
}, |
|
{ |
|
"acc": 0.82799988, |
|
"epoch": 0.75, |
|
"grad_norm": 5.816793260610907, |
|
"learning_rate": 9.847016629563683e-06, |
|
"loss": 0.42782841, |
|
"memory(GiB)": 101.73, |
|
"step": 540, |
|
"train_speed(iter/s)": 0.342663 |
|
}, |
|
{ |
|
"acc": 0.81784801, |
|
"epoch": 0.7569444444444444, |
|
"grad_norm": 5.182686505017899, |
|
"learning_rate": 9.842283661585442e-06, |
|
"loss": 0.46612892, |
|
"memory(GiB)": 101.73, |
|
"step": 545, |
|
"train_speed(iter/s)": 0.342689 |
|
}, |
|
{ |
|
"acc": 0.82886944, |
|
"epoch": 0.7638888888888888, |
|
"grad_norm": 2.5996284531124867, |
|
"learning_rate": 9.837479763616932e-06, |
|
"loss": 0.43436565, |
|
"memory(GiB)": 101.73, |
|
"step": 550, |
|
"train_speed(iter/s)": 0.342815 |
|
}, |
|
{ |
|
"acc": 0.83412199, |
|
"epoch": 0.7708333333333334, |
|
"grad_norm": 3.4156004394784296, |
|
"learning_rate": 9.832605006033138e-06, |
|
"loss": 0.40990982, |
|
"memory(GiB)": 101.73, |
|
"step": 555, |
|
"train_speed(iter/s)": 0.342947 |
|
}, |
|
{ |
|
"acc": 0.82610512, |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 2.6539226671509453, |
|
"learning_rate": 9.8276594602471e-06, |
|
"loss": 0.41491723, |
|
"memory(GiB)": 101.73, |
|
"step": 560, |
|
"train_speed(iter/s)": 0.342421 |
|
}, |
|
{ |
|
"acc": 0.84313641, |
|
"epoch": 0.7847222222222222, |
|
"grad_norm": 5.876967933487755, |
|
"learning_rate": 9.822643198708884e-06, |
|
"loss": 0.41127276, |
|
"memory(GiB)": 101.73, |
|
"step": 565, |
|
"train_speed(iter/s)": 0.342369 |
|
}, |
|
{ |
|
"acc": 0.83134546, |
|
"epoch": 0.7916666666666666, |
|
"grad_norm": 4.833766788934407, |
|
"learning_rate": 9.817556294904497e-06, |
|
"loss": 0.4230135, |
|
"memory(GiB)": 101.73, |
|
"step": 570, |
|
"train_speed(iter/s)": 0.342429 |
|
}, |
|
{ |
|
"acc": 0.8203537, |
|
"epoch": 0.7986111111111112, |
|
"grad_norm": 3.95800471385031, |
|
"learning_rate": 9.812398823354835e-06, |
|
"loss": 0.45338583, |
|
"memory(GiB)": 101.73, |
|
"step": 575, |
|
"train_speed(iter/s)": 0.342453 |
|
}, |
|
{ |
|
"acc": 0.84011059, |
|
"epoch": 0.8055555555555556, |
|
"grad_norm": 2.7356802434548295, |
|
"learning_rate": 9.807170859614574e-06, |
|
"loss": 0.41599312, |
|
"memory(GiB)": 101.73, |
|
"step": 580, |
|
"train_speed(iter/s)": 0.342508 |
|
}, |
|
{ |
|
"acc": 0.82861423, |
|
"epoch": 0.8125, |
|
"grad_norm": 2.789741600661146, |
|
"learning_rate": 9.801872480271075e-06, |
|
"loss": 0.43512211, |
|
"memory(GiB)": 101.73, |
|
"step": 585, |
|
"train_speed(iter/s)": 0.342606 |
|
}, |
|
{ |
|
"acc": 0.82745457, |
|
"epoch": 0.8194444444444444, |
|
"grad_norm": 5.351426683388521, |
|
"learning_rate": 9.796503762943248e-06, |
|
"loss": 0.42212791, |
|
"memory(GiB)": 101.73, |
|
"step": 590, |
|
"train_speed(iter/s)": 0.342504 |
|
}, |
|
{ |
|
"acc": 0.83506317, |
|
"epoch": 0.8263888888888888, |
|
"grad_norm": 2.915645519453413, |
|
"learning_rate": 9.791064786280432e-06, |
|
"loss": 0.42344589, |
|
"memory(GiB)": 101.73, |
|
"step": 595, |
|
"train_speed(iter/s)": 0.342576 |
|
}, |
|
{ |
|
"acc": 0.83208055, |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 3.5213580401660156, |
|
"learning_rate": 9.785555629961232e-06, |
|
"loss": 0.43331146, |
|
"memory(GiB)": 101.73, |
|
"step": 600, |
|
"train_speed(iter/s)": 0.342686 |
|
}, |
|
{ |
|
"acc": 0.83166571, |
|
"epoch": 0.8402777777777778, |
|
"grad_norm": 4.361044463747454, |
|
"learning_rate": 9.779976374692353e-06, |
|
"loss": 0.45408182, |
|
"memory(GiB)": 101.73, |
|
"step": 605, |
|
"train_speed(iter/s)": 0.342638 |
|
}, |
|
{ |
|
"acc": 0.82415371, |
|
"epoch": 0.8472222222222222, |
|
"grad_norm": 10.626940787117386, |
|
"learning_rate": 9.774327102207413e-06, |
|
"loss": 0.42070689, |
|
"memory(GiB)": 101.73, |
|
"step": 610, |
|
"train_speed(iter/s)": 0.342691 |
|
}, |
|
{ |
|
"acc": 0.84025803, |
|
"epoch": 0.8541666666666666, |
|
"grad_norm": 2.662755839878228, |
|
"learning_rate": 9.76860789526576e-06, |
|
"loss": 0.4043293, |
|
"memory(GiB)": 101.73, |
|
"step": 615, |
|
"train_speed(iter/s)": 0.342709 |
|
}, |
|
{ |
|
"acc": 0.83348093, |
|
"epoch": 0.8611111111111112, |
|
"grad_norm": 2.588929018249861, |
|
"learning_rate": 9.76281883765125e-06, |
|
"loss": 0.41577873, |
|
"memory(GiB)": 101.73, |
|
"step": 620, |
|
"train_speed(iter/s)": 0.342562 |
|
}, |
|
{ |
|
"acc": 0.83077507, |
|
"epoch": 0.8680555555555556, |
|
"grad_norm": 4.12654917117445, |
|
"learning_rate": 9.756960014171012e-06, |
|
"loss": 0.43080907, |
|
"memory(GiB)": 101.73, |
|
"step": 625, |
|
"train_speed(iter/s)": 0.342671 |
|
}, |
|
{ |
|
"acc": 0.827841, |
|
"epoch": 0.875, |
|
"grad_norm": 2.8606706635597354, |
|
"learning_rate": 9.751031510654226e-06, |
|
"loss": 0.43506193, |
|
"memory(GiB)": 101.73, |
|
"step": 630, |
|
"train_speed(iter/s)": 0.342731 |
|
}, |
|
{ |
|
"acc": 0.83022375, |
|
"epoch": 0.8819444444444444, |
|
"grad_norm": 6.151953684994658, |
|
"learning_rate": 9.745033413950843e-06, |
|
"loss": 0.42204194, |
|
"memory(GiB)": 101.73, |
|
"step": 635, |
|
"train_speed(iter/s)": 0.342804 |
|
}, |
|
{ |
|
"acc": 0.82415304, |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 3.2518580197951725, |
|
"learning_rate": 9.738965811930332e-06, |
|
"loss": 0.47604675, |
|
"memory(GiB)": 101.73, |
|
"step": 640, |
|
"train_speed(iter/s)": 0.342907 |
|
}, |
|
{ |
|
"acc": 0.84281521, |
|
"epoch": 0.8958333333333334, |
|
"grad_norm": 2.603532205880358, |
|
"learning_rate": 9.732828793480376e-06, |
|
"loss": 0.41225729, |
|
"memory(GiB)": 101.73, |
|
"step": 645, |
|
"train_speed(iter/s)": 0.342817 |
|
}, |
|
{ |
|
"acc": 0.83370028, |
|
"epoch": 0.9027777777777778, |
|
"grad_norm": 3.805039366288295, |
|
"learning_rate": 9.726622448505587e-06, |
|
"loss": 0.41593208, |
|
"memory(GiB)": 101.73, |
|
"step": 650, |
|
"train_speed(iter/s)": 0.342892 |
|
}, |
|
{ |
|
"acc": 0.82898254, |
|
"epoch": 0.9097222222222222, |
|
"grad_norm": 3.402485563840372, |
|
"learning_rate": 9.720346867926172e-06, |
|
"loss": 0.43477345, |
|
"memory(GiB)": 101.73, |
|
"step": 655, |
|
"train_speed(iter/s)": 0.343066 |
|
}, |
|
{ |
|
"acc": 0.85120811, |
|
"epoch": 0.9166666666666666, |
|
"grad_norm": 3.0253302648838534, |
|
"learning_rate": 9.714002143676614e-06, |
|
"loss": 0.38636012, |
|
"memory(GiB)": 101.73, |
|
"step": 660, |
|
"train_speed(iter/s)": 0.343136 |
|
}, |
|
{ |
|
"acc": 0.83650703, |
|
"epoch": 0.9236111111111112, |
|
"grad_norm": 2.6221009587089696, |
|
"learning_rate": 9.707588368704318e-06, |
|
"loss": 0.40802522, |
|
"memory(GiB)": 101.73, |
|
"step": 665, |
|
"train_speed(iter/s)": 0.34312 |
|
}, |
|
{ |
|
"acc": 0.84927387, |
|
"epoch": 0.9305555555555556, |
|
"grad_norm": 4.89731074876133, |
|
"learning_rate": 9.701105636968253e-06, |
|
"loss": 0.40131931, |
|
"memory(GiB)": 101.73, |
|
"step": 670, |
|
"train_speed(iter/s)": 0.343309 |
|
}, |
|
{ |
|
"acc": 0.84856434, |
|
"epoch": 0.9375, |
|
"grad_norm": 5.437382145928593, |
|
"learning_rate": 9.69455404343757e-06, |
|
"loss": 0.40545149, |
|
"memory(GiB)": 101.73, |
|
"step": 675, |
|
"train_speed(iter/s)": 0.343307 |
|
}, |
|
{ |
|
"acc": 0.83657379, |
|
"epoch": 0.9444444444444444, |
|
"grad_norm": 4.422858302197234, |
|
"learning_rate": 9.68793368409022e-06, |
|
"loss": 0.42424588, |
|
"memory(GiB)": 101.73, |
|
"step": 680, |
|
"train_speed(iter/s)": 0.343325 |
|
}, |
|
{ |
|
"acc": 0.84768372, |
|
"epoch": 0.9513888888888888, |
|
"grad_norm": 4.425571264161584, |
|
"learning_rate": 9.681244655911542e-06, |
|
"loss": 0.41174183, |
|
"memory(GiB)": 101.73, |
|
"step": 685, |
|
"train_speed(iter/s)": 0.343398 |
|
}, |
|
{ |
|
"acc": 0.84553738, |
|
"epoch": 0.9583333333333334, |
|
"grad_norm": 4.109087326995114, |
|
"learning_rate": 9.674487056892841e-06, |
|
"loss": 0.40106497, |
|
"memory(GiB)": 101.73, |
|
"step": 690, |
|
"train_speed(iter/s)": 0.343384 |
|
}, |
|
{ |
|
"acc": 0.83130951, |
|
"epoch": 0.9652777777777778, |
|
"grad_norm": 4.058647388023358, |
|
"learning_rate": 9.667660986029956e-06, |
|
"loss": 0.43747225, |
|
"memory(GiB)": 101.73, |
|
"step": 695, |
|
"train_speed(iter/s)": 0.34334 |
|
}, |
|
{ |
|
"acc": 0.82660599, |
|
"epoch": 0.9722222222222222, |
|
"grad_norm": 3.012549675607617, |
|
"learning_rate": 9.660766543321804e-06, |
|
"loss": 0.43917799, |
|
"memory(GiB)": 101.73, |
|
"step": 700, |
|
"train_speed(iter/s)": 0.34345 |
|
}, |
|
{ |
|
"acc": 0.85258808, |
|
"epoch": 0.9791666666666666, |
|
"grad_norm": 2.317800113030072, |
|
"learning_rate": 9.653803829768927e-06, |
|
"loss": 0.37653742, |
|
"memory(GiB)": 101.73, |
|
"step": 705, |
|
"train_speed(iter/s)": 0.343541 |
|
}, |
|
{ |
|
"acc": 0.84237957, |
|
"epoch": 0.9861111111111112, |
|
"grad_norm": 3.46822185449162, |
|
"learning_rate": 9.646772947371998e-06, |
|
"loss": 0.40804148, |
|
"memory(GiB)": 101.73, |
|
"step": 710, |
|
"train_speed(iter/s)": 0.343123 |
|
}, |
|
{ |
|
"acc": 0.84040251, |
|
"epoch": 0.9930555555555556, |
|
"grad_norm": 2.7021645387276267, |
|
"learning_rate": 9.639673999130342e-06, |
|
"loss": 0.42324858, |
|
"memory(GiB)": 101.73, |
|
"step": 715, |
|
"train_speed(iter/s)": 0.343237 |
|
}, |
|
{ |
|
"acc": 0.83597126, |
|
"epoch": 1.0, |
|
"grad_norm": 4.66166531475433, |
|
"learning_rate": 9.632507089040402e-06, |
|
"loss": 0.42435303, |
|
"memory(GiB)": 101.73, |
|
"step": 720, |
|
"train_speed(iter/s)": 0.343017 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_acc": 0.8472242146033567, |
|
"eval_loss": 0.4218238890171051, |
|
"eval_runtime": 26.0678, |
|
"eval_samples_per_second": 13.158, |
|
"eval_steps_per_second": 1.65, |
|
"step": 720 |
|
}, |
|
{ |
|
"acc": 0.84835815, |
|
"epoch": 1.0069444444444444, |
|
"grad_norm": 14.07543102393827, |
|
"learning_rate": 9.62527232209425e-06, |
|
"loss": 0.38356934, |
|
"memory(GiB)": 101.73, |
|
"step": 725, |
|
"train_speed(iter/s)": 0.332859 |
|
}, |
|
{ |
|
"acc": 0.8468194, |
|
"epoch": 1.0138888888888888, |
|
"grad_norm": 4.1338785176242805, |
|
"learning_rate": 9.617969804278023e-06, |
|
"loss": 0.40347471, |
|
"memory(GiB)": 101.73, |
|
"step": 730, |
|
"train_speed(iter/s)": 0.332833 |
|
}, |
|
{ |
|
"acc": 0.83594532, |
|
"epoch": 1.0208333333333333, |
|
"grad_norm": 6.53482454047143, |
|
"learning_rate": 9.610599642570378e-06, |
|
"loss": 0.41150756, |
|
"memory(GiB)": 101.73, |
|
"step": 735, |
|
"train_speed(iter/s)": 0.333063 |
|
}, |
|
{ |
|
"acc": 0.83995285, |
|
"epoch": 1.0277777777777777, |
|
"grad_norm": 2.0874194148799954, |
|
"learning_rate": 9.603161944940925e-06, |
|
"loss": 0.40456595, |
|
"memory(GiB)": 101.73, |
|
"step": 740, |
|
"train_speed(iter/s)": 0.333087 |
|
}, |
|
{ |
|
"acc": 0.84285097, |
|
"epoch": 1.0347222222222223, |
|
"grad_norm": 2.918185410713743, |
|
"learning_rate": 9.595656820348646e-06, |
|
"loss": 0.39343414, |
|
"memory(GiB)": 101.73, |
|
"step": 745, |
|
"train_speed(iter/s)": 0.333167 |
|
}, |
|
{ |
|
"acc": 0.84659653, |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 3.3524326036503043, |
|
"learning_rate": 9.5880843787403e-06, |
|
"loss": 0.39015245, |
|
"memory(GiB)": 101.73, |
|
"step": 750, |
|
"train_speed(iter/s)": 0.333291 |
|
}, |
|
{ |
|
"acc": 0.832335, |
|
"epoch": 1.0486111111111112, |
|
"grad_norm": 9.360098551984148, |
|
"learning_rate": 9.58044473104881e-06, |
|
"loss": 0.44525146, |
|
"memory(GiB)": 101.73, |
|
"step": 755, |
|
"train_speed(iter/s)": 0.33302 |
|
}, |
|
{ |
|
"acc": 0.83618774, |
|
"epoch": 1.0555555555555556, |
|
"grad_norm": 7.009746416299999, |
|
"learning_rate": 9.572737989191634e-06, |
|
"loss": 0.42118731, |
|
"memory(GiB)": 101.73, |
|
"step": 760, |
|
"train_speed(iter/s)": 0.332604 |
|
}, |
|
{ |
|
"acc": 0.85030022, |
|
"epoch": 1.0625, |
|
"grad_norm": 3.395766338969053, |
|
"learning_rate": 9.564964266069136e-06, |
|
"loss": 0.38766785, |
|
"memory(GiB)": 101.73, |
|
"step": 765, |
|
"train_speed(iter/s)": 0.332648 |
|
}, |
|
{ |
|
"acc": 0.84878769, |
|
"epoch": 1.0694444444444444, |
|
"grad_norm": 4.049117510071738, |
|
"learning_rate": 9.557123675562923e-06, |
|
"loss": 0.39821975, |
|
"memory(GiB)": 101.73, |
|
"step": 770, |
|
"train_speed(iter/s)": 0.332717 |
|
}, |
|
{ |
|
"acc": 0.8435914, |
|
"epoch": 1.0763888888888888, |
|
"grad_norm": 4.578339574266821, |
|
"learning_rate": 9.54921633253418e-06, |
|
"loss": 0.39716926, |
|
"memory(GiB)": 101.73, |
|
"step": 775, |
|
"train_speed(iter/s)": 0.332748 |
|
}, |
|
{ |
|
"acc": 0.8482192, |
|
"epoch": 1.0833333333333333, |
|
"grad_norm": 4.848980267588248, |
|
"learning_rate": 9.541242352821985e-06, |
|
"loss": 0.40872383, |
|
"memory(GiB)": 101.73, |
|
"step": 780, |
|
"train_speed(iter/s)": 0.332671 |
|
}, |
|
{ |
|
"acc": 0.84455881, |
|
"epoch": 1.0902777777777777, |
|
"grad_norm": 3.124037944279378, |
|
"learning_rate": 9.533201853241619e-06, |
|
"loss": 0.39232595, |
|
"memory(GiB)": 101.73, |
|
"step": 785, |
|
"train_speed(iter/s)": 0.332358 |
|
}, |
|
{ |
|
"acc": 0.83510704, |
|
"epoch": 1.0972222222222223, |
|
"grad_norm": 2.8547950538457703, |
|
"learning_rate": 9.525094951582842e-06, |
|
"loss": 0.39581535, |
|
"memory(GiB)": 101.73, |
|
"step": 790, |
|
"train_speed(iter/s)": 0.332478 |
|
}, |
|
{ |
|
"acc": 0.86455097, |
|
"epoch": 1.1041666666666667, |
|
"grad_norm": 2.5623476913367824, |
|
"learning_rate": 9.516921766608186e-06, |
|
"loss": 0.36006021, |
|
"memory(GiB)": 101.73, |
|
"step": 795, |
|
"train_speed(iter/s)": 0.332532 |
|
}, |
|
{ |
|
"acc": 0.84903641, |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 2.3914228118955396, |
|
"learning_rate": 9.508682418051192e-06, |
|
"loss": 0.37949579, |
|
"memory(GiB)": 101.73, |
|
"step": 800, |
|
"train_speed(iter/s)": 0.332636 |
|
}, |
|
{ |
|
"acc": 0.84685268, |
|
"epoch": 1.1180555555555556, |
|
"grad_norm": 4.498655619786845, |
|
"learning_rate": 9.500377026614675e-06, |
|
"loss": 0.39767621, |
|
"memory(GiB)": 101.73, |
|
"step": 805, |
|
"train_speed(iter/s)": 0.332765 |
|
}, |
|
{ |
|
"acc": 0.84496756, |
|
"epoch": 1.125, |
|
"grad_norm": 3.7161374341586946, |
|
"learning_rate": 9.492005713968949e-06, |
|
"loss": 0.39117696, |
|
"memory(GiB)": 101.73, |
|
"step": 810, |
|
"train_speed(iter/s)": 0.332767 |
|
}, |
|
{ |
|
"acc": 0.84031639, |
|
"epoch": 1.1319444444444444, |
|
"grad_norm": 3.868526550953586, |
|
"learning_rate": 9.483568602750044e-06, |
|
"loss": 0.40724792, |
|
"memory(GiB)": 101.73, |
|
"step": 815, |
|
"train_speed(iter/s)": 0.332759 |
|
}, |
|
{ |
|
"acc": 0.84572315, |
|
"epoch": 1.1388888888888888, |
|
"grad_norm": 4.011321792664004, |
|
"learning_rate": 9.47506581655791e-06, |
|
"loss": 0.41854248, |
|
"memory(GiB)": 101.73, |
|
"step": 820, |
|
"train_speed(iter/s)": 0.332793 |
|
}, |
|
{ |
|
"acc": 0.84330578, |
|
"epoch": 1.1458333333333333, |
|
"grad_norm": 2.841002351571773, |
|
"learning_rate": 9.466497479954604e-06, |
|
"loss": 0.41209116, |
|
"memory(GiB)": 101.73, |
|
"step": 825, |
|
"train_speed(iter/s)": 0.332879 |
|
}, |
|
{ |
|
"acc": 0.85965681, |
|
"epoch": 1.1527777777777777, |
|
"grad_norm": 2.9466565908740807, |
|
"learning_rate": 9.457863718462472e-06, |
|
"loss": 0.35483818, |
|
"memory(GiB)": 101.73, |
|
"step": 830, |
|
"train_speed(iter/s)": 0.333018 |
|
}, |
|
{ |
|
"acc": 0.84121952, |
|
"epoch": 1.1597222222222223, |
|
"grad_norm": 9.17125823782359, |
|
"learning_rate": 9.449164658562302e-06, |
|
"loss": 0.41710243, |
|
"memory(GiB)": 101.73, |
|
"step": 835, |
|
"train_speed(iter/s)": 0.333006 |
|
}, |
|
{ |
|
"acc": 0.85266848, |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 4.337289079629721, |
|
"learning_rate": 9.440400427691474e-06, |
|
"loss": 0.38912995, |
|
"memory(GiB)": 101.73, |
|
"step": 840, |
|
"train_speed(iter/s)": 0.333157 |
|
}, |
|
{ |
|
"acc": 0.85404491, |
|
"epoch": 1.1736111111111112, |
|
"grad_norm": 4.704538014865495, |
|
"learning_rate": 9.4315711542421e-06, |
|
"loss": 0.38565831, |
|
"memory(GiB)": 101.73, |
|
"step": 845, |
|
"train_speed(iter/s)": 0.33319 |
|
}, |
|
{ |
|
"acc": 0.85194864, |
|
"epoch": 1.1805555555555556, |
|
"grad_norm": 7.104191972117062, |
|
"learning_rate": 9.422676967559129e-06, |
|
"loss": 0.38472157, |
|
"memory(GiB)": 101.73, |
|
"step": 850, |
|
"train_speed(iter/s)": 0.333352 |
|
}, |
|
{ |
|
"acc": 0.84838581, |
|
"epoch": 1.1875, |
|
"grad_norm": 5.432606499558096, |
|
"learning_rate": 9.413717997938466e-06, |
|
"loss": 0.38313432, |
|
"memory(GiB)": 101.73, |
|
"step": 855, |
|
"train_speed(iter/s)": 0.333499 |
|
}, |
|
{ |
|
"acc": 0.8506237, |
|
"epoch": 1.1944444444444444, |
|
"grad_norm": 4.500519797720098, |
|
"learning_rate": 9.404694376625057e-06, |
|
"loss": 0.37346766, |
|
"memory(GiB)": 101.73, |
|
"step": 860, |
|
"train_speed(iter/s)": 0.333789 |
|
}, |
|
{ |
|
"acc": 0.8471118, |
|
"epoch": 1.2013888888888888, |
|
"grad_norm": 3.0647330027852053, |
|
"learning_rate": 9.395606235810962e-06, |
|
"loss": 0.3891892, |
|
"memory(GiB)": 101.73, |
|
"step": 865, |
|
"train_speed(iter/s)": 0.333964 |
|
}, |
|
{ |
|
"acc": 0.84303493, |
|
"epoch": 1.2083333333333333, |
|
"grad_norm": 3.1169217151603488, |
|
"learning_rate": 9.386453708633429e-06, |
|
"loss": 0.38980885, |
|
"memory(GiB)": 101.73, |
|
"step": 870, |
|
"train_speed(iter/s)": 0.334173 |
|
}, |
|
{ |
|
"acc": 0.83465099, |
|
"epoch": 1.2152777777777777, |
|
"grad_norm": 3.2398441704006395, |
|
"learning_rate": 9.377236929172933e-06, |
|
"loss": 0.42389526, |
|
"memory(GiB)": 101.73, |
|
"step": 875, |
|
"train_speed(iter/s)": 0.334394 |
|
}, |
|
{ |
|
"acc": 0.85308475, |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 3.2099925668997313, |
|
"learning_rate": 9.36795603245122e-06, |
|
"loss": 0.35237427, |
|
"memory(GiB)": 101.73, |
|
"step": 880, |
|
"train_speed(iter/s)": 0.334616 |
|
}, |
|
{ |
|
"acc": 0.84736195, |
|
"epoch": 1.2291666666666667, |
|
"grad_norm": 3.462100332631022, |
|
"learning_rate": 9.358611154429325e-06, |
|
"loss": 0.37894335, |
|
"memory(GiB)": 101.73, |
|
"step": 885, |
|
"train_speed(iter/s)": 0.334822 |
|
}, |
|
{ |
|
"acc": 0.83803349, |
|
"epoch": 1.2361111111111112, |
|
"grad_norm": 11.39827473436759, |
|
"learning_rate": 9.349202432005577e-06, |
|
"loss": 0.43113008, |
|
"memory(GiB)": 101.73, |
|
"step": 890, |
|
"train_speed(iter/s)": 0.334963 |
|
}, |
|
{ |
|
"acc": 0.84819775, |
|
"epoch": 1.2430555555555556, |
|
"grad_norm": 3.928967319430374, |
|
"learning_rate": 9.339730003013604e-06, |
|
"loss": 0.38550997, |
|
"memory(GiB)": 101.73, |
|
"step": 895, |
|
"train_speed(iter/s)": 0.335242 |
|
}, |
|
{ |
|
"acc": 0.85249701, |
|
"epoch": 1.25, |
|
"grad_norm": 2.149347355481868, |
|
"learning_rate": 9.330194006220303e-06, |
|
"loss": 0.37847512, |
|
"memory(GiB)": 101.73, |
|
"step": 900, |
|
"train_speed(iter/s)": 0.33542 |
|
}, |
|
{ |
|
"acc": 0.85284424, |
|
"epoch": 1.2569444444444444, |
|
"grad_norm": 2.354529081975499, |
|
"learning_rate": 9.320594581323808e-06, |
|
"loss": 0.41307311, |
|
"memory(GiB)": 101.73, |
|
"step": 905, |
|
"train_speed(iter/s)": 0.335631 |
|
}, |
|
{ |
|
"acc": 0.85289736, |
|
"epoch": 1.2638888888888888, |
|
"grad_norm": 5.563333560479702, |
|
"learning_rate": 9.310931868951452e-06, |
|
"loss": 0.38515811, |
|
"memory(GiB)": 101.73, |
|
"step": 910, |
|
"train_speed(iter/s)": 0.335843 |
|
}, |
|
{ |
|
"acc": 0.84706001, |
|
"epoch": 1.2708333333333333, |
|
"grad_norm": 2.7380569878551895, |
|
"learning_rate": 9.3012060106577e-06, |
|
"loss": 0.38339992, |
|
"memory(GiB)": 101.73, |
|
"step": 915, |
|
"train_speed(iter/s)": 0.336121 |
|
}, |
|
{ |
|
"acc": 0.86188374, |
|
"epoch": 1.2777777777777777, |
|
"grad_norm": 4.874347425169184, |
|
"learning_rate": 9.291417148922079e-06, |
|
"loss": 0.38147278, |
|
"memory(GiB)": 101.73, |
|
"step": 920, |
|
"train_speed(iter/s)": 0.336324 |
|
}, |
|
{ |
|
"acc": 0.8550766, |
|
"epoch": 1.2847222222222223, |
|
"grad_norm": 3.690731628108375, |
|
"learning_rate": 9.28156542714708e-06, |
|
"loss": 0.3768013, |
|
"memory(GiB)": 101.73, |
|
"step": 925, |
|
"train_speed(iter/s)": 0.336539 |
|
}, |
|
{ |
|
"acc": 0.84941578, |
|
"epoch": 1.2916666666666667, |
|
"grad_norm": 5.416546502861204, |
|
"learning_rate": 9.271650989656078e-06, |
|
"loss": 0.38423877, |
|
"memory(GiB)": 101.73, |
|
"step": 930, |
|
"train_speed(iter/s)": 0.336758 |
|
}, |
|
{ |
|
"acc": 0.83739882, |
|
"epoch": 1.2986111111111112, |
|
"grad_norm": 3.162933088324115, |
|
"learning_rate": 9.261673981691197e-06, |
|
"loss": 0.41248555, |
|
"memory(GiB)": 101.73, |
|
"step": 935, |
|
"train_speed(iter/s)": 0.336932 |
|
}, |
|
{ |
|
"acc": 0.84417152, |
|
"epoch": 1.3055555555555556, |
|
"grad_norm": 3.5871262320118564, |
|
"learning_rate": 9.251634549411193e-06, |
|
"loss": 0.37667794, |
|
"memory(GiB)": 101.73, |
|
"step": 940, |
|
"train_speed(iter/s)": 0.336712 |
|
}, |
|
{ |
|
"acc": 0.85208969, |
|
"epoch": 1.3125, |
|
"grad_norm": 3.743582531730351, |
|
"learning_rate": 9.24153283988931e-06, |
|
"loss": 0.39746375, |
|
"memory(GiB)": 101.73, |
|
"step": 945, |
|
"train_speed(iter/s)": 0.336799 |
|
}, |
|
{ |
|
"acc": 0.83472309, |
|
"epoch": 1.3194444444444444, |
|
"grad_norm": 3.9063355350220172, |
|
"learning_rate": 9.23136900111113e-06, |
|
"loss": 0.42442837, |
|
"memory(GiB)": 101.73, |
|
"step": 950, |
|
"train_speed(iter/s)": 0.336919 |
|
}, |
|
{ |
|
"acc": 0.83945732, |
|
"epoch": 1.3263888888888888, |
|
"grad_norm": 4.080061233293152, |
|
"learning_rate": 9.221143181972396e-06, |
|
"loss": 0.40141983, |
|
"memory(GiB)": 101.73, |
|
"step": 955, |
|
"train_speed(iter/s)": 0.337029 |
|
}, |
|
{ |
|
"acc": 0.84612141, |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 3.2821130233749876, |
|
"learning_rate": 9.210855532276836e-06, |
|
"loss": 0.39341011, |
|
"memory(GiB)": 101.73, |
|
"step": 960, |
|
"train_speed(iter/s)": 0.337125 |
|
}, |
|
{ |
|
"acc": 0.86109161, |
|
"epoch": 1.3402777777777777, |
|
"grad_norm": 1.6240104354039693, |
|
"learning_rate": 9.200506202733972e-06, |
|
"loss": 0.38223677, |
|
"memory(GiB)": 101.73, |
|
"step": 965, |
|
"train_speed(iter/s)": 0.337223 |
|
}, |
|
{ |
|
"acc": 0.84814119, |
|
"epoch": 1.3472222222222223, |
|
"grad_norm": 35.878702050120395, |
|
"learning_rate": 9.190095344956909e-06, |
|
"loss": 0.40973086, |
|
"memory(GiB)": 101.73, |
|
"step": 970, |
|
"train_speed(iter/s)": 0.337269 |
|
}, |
|
{ |
|
"acc": 0.84968128, |
|
"epoch": 1.3541666666666667, |
|
"grad_norm": 6.990269095516143, |
|
"learning_rate": 9.179623111460109e-06, |
|
"loss": 0.3797636, |
|
"memory(GiB)": 101.73, |
|
"step": 975, |
|
"train_speed(iter/s)": 0.337305 |
|
}, |
|
{ |
|
"acc": 0.84383955, |
|
"epoch": 1.3611111111111112, |
|
"grad_norm": 32.14581208739412, |
|
"learning_rate": 9.169089655657162e-06, |
|
"loss": 0.38918655, |
|
"memory(GiB)": 101.73, |
|
"step": 980, |
|
"train_speed(iter/s)": 0.337382 |
|
}, |
|
{ |
|
"acc": 0.85203476, |
|
"epoch": 1.3680555555555556, |
|
"grad_norm": 3.496050971602842, |
|
"learning_rate": 9.158495131858542e-06, |
|
"loss": 0.40532894, |
|
"memory(GiB)": 101.73, |
|
"step": 985, |
|
"train_speed(iter/s)": 0.3374 |
|
}, |
|
{ |
|
"acc": 0.85004854, |
|
"epoch": 1.375, |
|
"grad_norm": 3.8694772100362327, |
|
"learning_rate": 9.147839695269337e-06, |
|
"loss": 0.39978607, |
|
"memory(GiB)": 101.73, |
|
"step": 990, |
|
"train_speed(iter/s)": 0.337439 |
|
}, |
|
{ |
|
"acc": 0.8479394, |
|
"epoch": 1.3819444444444444, |
|
"grad_norm": 6.070730712147665, |
|
"learning_rate": 9.137123501986982e-06, |
|
"loss": 0.39016724, |
|
"memory(GiB)": 101.73, |
|
"step": 995, |
|
"train_speed(iter/s)": 0.337428 |
|
}, |
|
{ |
|
"acc": 0.85400057, |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 1.7024422206195924, |
|
"learning_rate": 9.126346708998974e-06, |
|
"loss": 0.37917585, |
|
"memory(GiB)": 101.73, |
|
"step": 1000, |
|
"train_speed(iter/s)": 0.337377 |
|
}, |
|
{ |
|
"acc": 0.84060793, |
|
"epoch": 1.3958333333333333, |
|
"grad_norm": 5.552604074545073, |
|
"learning_rate": 9.115509474180566e-06, |
|
"loss": 0.39503174, |
|
"memory(GiB)": 101.73, |
|
"step": 1005, |
|
"train_speed(iter/s)": 0.337516 |
|
}, |
|
{ |
|
"acc": 0.85068932, |
|
"epoch": 1.4027777777777777, |
|
"grad_norm": 3.0259426862916965, |
|
"learning_rate": 9.104611956292457e-06, |
|
"loss": 0.38301041, |
|
"memory(GiB)": 101.73, |
|
"step": 1010, |
|
"train_speed(iter/s)": 0.337602 |
|
}, |
|
{ |
|
"acc": 0.8504097, |
|
"epoch": 1.4097222222222223, |
|
"grad_norm": 3.5344707343383868, |
|
"learning_rate": 9.093654314978463e-06, |
|
"loss": 0.37292523, |
|
"memory(GiB)": 101.73, |
|
"step": 1015, |
|
"train_speed(iter/s)": 0.337474 |
|
}, |
|
{ |
|
"acc": 0.85743666, |
|
"epoch": 1.4166666666666667, |
|
"grad_norm": 2.630949673383209, |
|
"learning_rate": 9.08263671076319e-06, |
|
"loss": 0.36461799, |
|
"memory(GiB)": 101.73, |
|
"step": 1020, |
|
"train_speed(iter/s)": 0.337425 |
|
}, |
|
{ |
|
"acc": 0.86220655, |
|
"epoch": 1.4236111111111112, |
|
"grad_norm": 3.9855410550591905, |
|
"learning_rate": 9.071559305049667e-06, |
|
"loss": 0.35792432, |
|
"memory(GiB)": 101.73, |
|
"step": 1025, |
|
"train_speed(iter/s)": 0.337411 |
|
}, |
|
{ |
|
"acc": 0.84792109, |
|
"epoch": 1.4305555555555556, |
|
"grad_norm": 3.7922263544393404, |
|
"learning_rate": 9.060422260116992e-06, |
|
"loss": 0.38916125, |
|
"memory(GiB)": 101.73, |
|
"step": 1030, |
|
"train_speed(iter/s)": 0.337365 |
|
}, |
|
{ |
|
"acc": 0.85099182, |
|
"epoch": 1.4375, |
|
"grad_norm": 3.70846293904974, |
|
"learning_rate": 9.049225739117948e-06, |
|
"loss": 0.36477528, |
|
"memory(GiB)": 101.73, |
|
"step": 1035, |
|
"train_speed(iter/s)": 0.33741 |
|
}, |
|
{ |
|
"acc": 0.86404428, |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 2.37261103672829, |
|
"learning_rate": 9.03796990607662e-06, |
|
"loss": 0.34631798, |
|
"memory(GiB)": 101.73, |
|
"step": 1040, |
|
"train_speed(iter/s)": 0.337456 |
|
}, |
|
{ |
|
"acc": 0.86004276, |
|
"epoch": 1.4513888888888888, |
|
"grad_norm": 2.908735431664467, |
|
"learning_rate": 9.026654925885986e-06, |
|
"loss": 0.36338158, |
|
"memory(GiB)": 101.73, |
|
"step": 1045, |
|
"train_speed(iter/s)": 0.337461 |
|
}, |
|
{ |
|
"acc": 0.85583038, |
|
"epoch": 1.4583333333333333, |
|
"grad_norm": 4.465703141418222, |
|
"learning_rate": 9.015280964305504e-06, |
|
"loss": 0.34531968, |
|
"memory(GiB)": 101.73, |
|
"step": 1050, |
|
"train_speed(iter/s)": 0.337494 |
|
}, |
|
{ |
|
"acc": 0.85433445, |
|
"epoch": 1.4652777777777777, |
|
"grad_norm": 3.770405959716388, |
|
"learning_rate": 9.003848187958681e-06, |
|
"loss": 0.38607841, |
|
"memory(GiB)": 101.73, |
|
"step": 1055, |
|
"train_speed(iter/s)": 0.337509 |
|
}, |
|
{ |
|
"acc": 0.858428, |
|
"epoch": 1.4722222222222223, |
|
"grad_norm": 3.379640478027579, |
|
"learning_rate": 8.99235676433064e-06, |
|
"loss": 0.35510893, |
|
"memory(GiB)": 101.73, |
|
"step": 1060, |
|
"train_speed(iter/s)": 0.337503 |
|
}, |
|
{ |
|
"acc": 0.85026894, |
|
"epoch": 1.4791666666666667, |
|
"grad_norm": 2.2957543163398397, |
|
"learning_rate": 8.980806861765652e-06, |
|
"loss": 0.37486589, |
|
"memory(GiB)": 101.73, |
|
"step": 1065, |
|
"train_speed(iter/s)": 0.337572 |
|
}, |
|
{ |
|
"acc": 0.85720415, |
|
"epoch": 1.4861111111111112, |
|
"grad_norm": 3.2746297247572738, |
|
"learning_rate": 8.969198649464691e-06, |
|
"loss": 0.37236695, |
|
"memory(GiB)": 101.73, |
|
"step": 1070, |
|
"train_speed(iter/s)": 0.337652 |
|
}, |
|
{ |
|
"acc": 0.84375439, |
|
"epoch": 1.4930555555555556, |
|
"grad_norm": 5.001943381636829, |
|
"learning_rate": 8.95753229748293e-06, |
|
"loss": 0.39222441, |
|
"memory(GiB)": 101.73, |
|
"step": 1075, |
|
"train_speed(iter/s)": 0.337681 |
|
}, |
|
{ |
|
"acc": 0.84540091, |
|
"epoch": 1.5, |
|
"grad_norm": 6.181217782470258, |
|
"learning_rate": 8.94580797672727e-06, |
|
"loss": 0.39223666, |
|
"memory(GiB)": 101.73, |
|
"step": 1080, |
|
"train_speed(iter/s)": 0.337607 |
|
}, |
|
{ |
|
"acc": 0.85473719, |
|
"epoch": 1.5069444444444444, |
|
"grad_norm": 2.678403983509847, |
|
"learning_rate": 8.934025858953828e-06, |
|
"loss": 0.3707407, |
|
"memory(GiB)": 101.73, |
|
"step": 1085, |
|
"train_speed(iter/s)": 0.337461 |
|
}, |
|
{ |
|
"acc": 0.86338501, |
|
"epoch": 1.5138888888888888, |
|
"grad_norm": 3.9439070272171293, |
|
"learning_rate": 8.92218611676542e-06, |
|
"loss": 0.35309997, |
|
"memory(GiB)": 101.73, |
|
"step": 1090, |
|
"train_speed(iter/s)": 0.337552 |
|
}, |
|
{ |
|
"acc": 0.84891472, |
|
"epoch": 1.5208333333333335, |
|
"grad_norm": 5.46194063471783, |
|
"learning_rate": 8.910288923609034e-06, |
|
"loss": 0.39501739, |
|
"memory(GiB)": 101.73, |
|
"step": 1095, |
|
"train_speed(iter/s)": 0.337604 |
|
}, |
|
{ |
|
"acc": 0.85439892, |
|
"epoch": 1.5277777777777777, |
|
"grad_norm": 2.6521402168678265, |
|
"learning_rate": 8.898334453773292e-06, |
|
"loss": 0.37683649, |
|
"memory(GiB)": 101.73, |
|
"step": 1100, |
|
"train_speed(iter/s)": 0.337666 |
|
}, |
|
{ |
|
"acc": 0.85185204, |
|
"epoch": 1.5347222222222223, |
|
"grad_norm": 2.635454666402325, |
|
"learning_rate": 8.886322882385894e-06, |
|
"loss": 0.37046089, |
|
"memory(GiB)": 101.73, |
|
"step": 1105, |
|
"train_speed(iter/s)": 0.337711 |
|
}, |
|
{ |
|
"acc": 0.85055122, |
|
"epoch": 1.5416666666666665, |
|
"grad_norm": 3.3497933627360177, |
|
"learning_rate": 8.874254385411048e-06, |
|
"loss": 0.37285306, |
|
"memory(GiB)": 101.73, |
|
"step": 1110, |
|
"train_speed(iter/s)": 0.3377 |
|
}, |
|
{ |
|
"acc": 0.84177113, |
|
"epoch": 1.5486111111111112, |
|
"grad_norm": 2.6691808877766103, |
|
"learning_rate": 8.8621291396469e-06, |
|
"loss": 0.39528844, |
|
"memory(GiB)": 101.73, |
|
"step": 1115, |
|
"train_speed(iter/s)": 0.337819 |
|
}, |
|
{ |
|
"acc": 0.85968094, |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 3.9807492163771157, |
|
"learning_rate": 8.849947322722941e-06, |
|
"loss": 0.37127504, |
|
"memory(GiB)": 101.73, |
|
"step": 1120, |
|
"train_speed(iter/s)": 0.337813 |
|
}, |
|
{ |
|
"acc": 0.8640234, |
|
"epoch": 1.5625, |
|
"grad_norm": 4.904076026536717, |
|
"learning_rate": 8.837709113097406e-06, |
|
"loss": 0.3620749, |
|
"memory(GiB)": 101.73, |
|
"step": 1125, |
|
"train_speed(iter/s)": 0.337775 |
|
}, |
|
{ |
|
"acc": 0.85505867, |
|
"epoch": 1.5694444444444444, |
|
"grad_norm": 1.7650593378125794, |
|
"learning_rate": 8.825414690054652e-06, |
|
"loss": 0.37128978, |
|
"memory(GiB)": 101.73, |
|
"step": 1130, |
|
"train_speed(iter/s)": 0.337724 |
|
}, |
|
{ |
|
"acc": 0.84300156, |
|
"epoch": 1.5763888888888888, |
|
"grad_norm": 6.075752458033056, |
|
"learning_rate": 8.813064233702543e-06, |
|
"loss": 0.39115798, |
|
"memory(GiB)": 101.73, |
|
"step": 1135, |
|
"train_speed(iter/s)": 0.337764 |
|
}, |
|
{ |
|
"acc": 0.84721622, |
|
"epoch": 1.5833333333333335, |
|
"grad_norm": 5.222470197877979, |
|
"learning_rate": 8.800657924969805e-06, |
|
"loss": 0.37910523, |
|
"memory(GiB)": 101.73, |
|
"step": 1140, |
|
"train_speed(iter/s)": 0.33781 |
|
}, |
|
{ |
|
"acc": 0.85609684, |
|
"epoch": 1.5902777777777777, |
|
"grad_norm": 2.89223156856068, |
|
"learning_rate": 8.788195945603379e-06, |
|
"loss": 0.37958903, |
|
"memory(GiB)": 101.73, |
|
"step": 1145, |
|
"train_speed(iter/s)": 0.337714 |
|
}, |
|
{ |
|
"acc": 0.85490465, |
|
"epoch": 1.5972222222222223, |
|
"grad_norm": 2.104304085343702, |
|
"learning_rate": 8.775678478165751e-06, |
|
"loss": 0.36199951, |
|
"memory(GiB)": 101.73, |
|
"step": 1150, |
|
"train_speed(iter/s)": 0.3376 |
|
}, |
|
{ |
|
"acc": 0.86297607, |
|
"epoch": 1.6041666666666665, |
|
"grad_norm": 3.9067520290967486, |
|
"learning_rate": 8.763105706032287e-06, |
|
"loss": 0.33438387, |
|
"memory(GiB)": 101.73, |
|
"step": 1155, |
|
"train_speed(iter/s)": 0.337662 |
|
}, |
|
{ |
|
"acc": 0.85664263, |
|
"epoch": 1.6111111111111112, |
|
"grad_norm": 56.10959937314398, |
|
"learning_rate": 8.750477813388537e-06, |
|
"loss": 0.3823755, |
|
"memory(GiB)": 101.73, |
|
"step": 1160, |
|
"train_speed(iter/s)": 0.337668 |
|
}, |
|
{ |
|
"acc": 0.84644604, |
|
"epoch": 1.6180555555555556, |
|
"grad_norm": 3.1303485574935124, |
|
"learning_rate": 8.737794985227552e-06, |
|
"loss": 0.37934666, |
|
"memory(GiB)": 101.73, |
|
"step": 1165, |
|
"train_speed(iter/s)": 0.337782 |
|
}, |
|
{ |
|
"acc": 0.86156435, |
|
"epoch": 1.625, |
|
"grad_norm": 3.779291654725943, |
|
"learning_rate": 8.725057407347151e-06, |
|
"loss": 0.34583051, |
|
"memory(GiB)": 101.73, |
|
"step": 1170, |
|
"train_speed(iter/s)": 0.337815 |
|
}, |
|
{ |
|
"acc": 0.86057777, |
|
"epoch": 1.6319444444444444, |
|
"grad_norm": 6.305331333525041, |
|
"learning_rate": 8.712265266347225e-06, |
|
"loss": 0.3509572, |
|
"memory(GiB)": 101.73, |
|
"step": 1175, |
|
"train_speed(iter/s)": 0.337875 |
|
}, |
|
{ |
|
"acc": 0.84179935, |
|
"epoch": 1.6388888888888888, |
|
"grad_norm": 3.534057697831123, |
|
"learning_rate": 8.699418749626983e-06, |
|
"loss": 0.38306279, |
|
"memory(GiB)": 101.73, |
|
"step": 1180, |
|
"train_speed(iter/s)": 0.337986 |
|
}, |
|
{ |
|
"acc": 0.86022358, |
|
"epoch": 1.6458333333333335, |
|
"grad_norm": 3.596616194421774, |
|
"learning_rate": 8.686518045382216e-06, |
|
"loss": 0.35667768, |
|
"memory(GiB)": 101.73, |
|
"step": 1185, |
|
"train_speed(iter/s)": 0.337903 |
|
}, |
|
{ |
|
"acc": 0.84735928, |
|
"epoch": 1.6527777777777777, |
|
"grad_norm": 2.8042991260508248, |
|
"learning_rate": 8.673563342602538e-06, |
|
"loss": 0.37387271, |
|
"memory(GiB)": 101.73, |
|
"step": 1190, |
|
"train_speed(iter/s)": 0.337983 |
|
}, |
|
{ |
|
"acc": 0.85692234, |
|
"epoch": 1.6597222222222223, |
|
"grad_norm": 2.7613649008585712, |
|
"learning_rate": 8.660554831068615e-06, |
|
"loss": 0.3682164, |
|
"memory(GiB)": 101.73, |
|
"step": 1195, |
|
"train_speed(iter/s)": 0.33803 |
|
}, |
|
{ |
|
"acc": 0.850665, |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 6.354642055126395, |
|
"learning_rate": 8.647492701349395e-06, |
|
"loss": 0.36811004, |
|
"memory(GiB)": 101.73, |
|
"step": 1200, |
|
"train_speed(iter/s)": 0.338156 |
|
}, |
|
{ |
|
"acc": 0.84379854, |
|
"epoch": 1.6736111111111112, |
|
"grad_norm": 3.1757762546543247, |
|
"learning_rate": 8.634377144799303e-06, |
|
"loss": 0.39622905, |
|
"memory(GiB)": 101.73, |
|
"step": 1205, |
|
"train_speed(iter/s)": 0.338155 |
|
}, |
|
{ |
|
"acc": 0.8438199, |
|
"epoch": 1.6805555555555556, |
|
"grad_norm": 2.9686876574121994, |
|
"learning_rate": 8.621208353555442e-06, |
|
"loss": 0.37581418, |
|
"memory(GiB)": 101.73, |
|
"step": 1210, |
|
"train_speed(iter/s)": 0.338178 |
|
}, |
|
{ |
|
"acc": 0.85528498, |
|
"epoch": 1.6875, |
|
"grad_norm": 2.5489253707665847, |
|
"learning_rate": 8.607986520534785e-06, |
|
"loss": 0.37263567, |
|
"memory(GiB)": 101.73, |
|
"step": 1215, |
|
"train_speed(iter/s)": 0.338207 |
|
}, |
|
{ |
|
"acc": 0.85820246, |
|
"epoch": 1.6944444444444444, |
|
"grad_norm": 4.887891679393261, |
|
"learning_rate": 8.594711839431341e-06, |
|
"loss": 0.37658699, |
|
"memory(GiB)": 101.73, |
|
"step": 1220, |
|
"train_speed(iter/s)": 0.338112 |
|
}, |
|
{ |
|
"acc": 0.85092402, |
|
"epoch": 1.7013888888888888, |
|
"grad_norm": 2.360910524678318, |
|
"learning_rate": 8.581384504713316e-06, |
|
"loss": 0.36587248, |
|
"memory(GiB)": 101.73, |
|
"step": 1225, |
|
"train_speed(iter/s)": 0.338187 |
|
}, |
|
{ |
|
"acc": 0.85391512, |
|
"epoch": 1.7083333333333335, |
|
"grad_norm": 2.463848635972625, |
|
"learning_rate": 8.568004711620276e-06, |
|
"loss": 0.37998319, |
|
"memory(GiB)": 101.73, |
|
"step": 1230, |
|
"train_speed(iter/s)": 0.338201 |
|
}, |
|
{ |
|
"acc": 0.86517754, |
|
"epoch": 1.7152777777777777, |
|
"grad_norm": 2.939085237700951, |
|
"learning_rate": 8.554572656160276e-06, |
|
"loss": 0.34979258, |
|
"memory(GiB)": 101.73, |
|
"step": 1235, |
|
"train_speed(iter/s)": 0.338314 |
|
}, |
|
{ |
|
"acc": 0.86364365, |
|
"epoch": 1.7222222222222223, |
|
"grad_norm": 3.3111152008785436, |
|
"learning_rate": 8.541088535106987e-06, |
|
"loss": 0.35193083, |
|
"memory(GiB)": 101.73, |
|
"step": 1240, |
|
"train_speed(iter/s)": 0.338441 |
|
}, |
|
{ |
|
"acc": 0.85455751, |
|
"epoch": 1.7291666666666665, |
|
"grad_norm": 2.8670527839907876, |
|
"learning_rate": 8.527552545996823e-06, |
|
"loss": 0.37321618, |
|
"memory(GiB)": 101.73, |
|
"step": 1245, |
|
"train_speed(iter/s)": 0.33858 |
|
}, |
|
{ |
|
"acc": 0.86149797, |
|
"epoch": 1.7361111111111112, |
|
"grad_norm": 6.333170412320581, |
|
"learning_rate": 8.513964887126042e-06, |
|
"loss": 0.34765677, |
|
"memory(GiB)": 101.73, |
|
"step": 1250, |
|
"train_speed(iter/s)": 0.338722 |
|
}, |
|
{ |
|
"acc": 0.86266346, |
|
"epoch": 1.7430555555555556, |
|
"grad_norm": 6.15251081063916, |
|
"learning_rate": 8.500325757547837e-06, |
|
"loss": 0.35451782, |
|
"memory(GiB)": 101.73, |
|
"step": 1255, |
|
"train_speed(iter/s)": 0.338819 |
|
}, |
|
{ |
|
"acc": 0.86682644, |
|
"epoch": 1.75, |
|
"grad_norm": 3.3602407157735326, |
|
"learning_rate": 8.486635357069431e-06, |
|
"loss": 0.35656066, |
|
"memory(GiB)": 101.73, |
|
"step": 1260, |
|
"train_speed(iter/s)": 0.338924 |
|
}, |
|
{ |
|
"acc": 0.87807121, |
|
"epoch": 1.7569444444444444, |
|
"grad_norm": 6.308557312315135, |
|
"learning_rate": 8.472893886249137e-06, |
|
"loss": 0.32822404, |
|
"memory(GiB)": 101.73, |
|
"step": 1265, |
|
"train_speed(iter/s)": 0.338995 |
|
}, |
|
{ |
|
"acc": 0.8561429, |
|
"epoch": 1.7638888888888888, |
|
"grad_norm": 18.368177846975684, |
|
"learning_rate": 8.459101546393425e-06, |
|
"loss": 0.37018564, |
|
"memory(GiB)": 101.73, |
|
"step": 1270, |
|
"train_speed(iter/s)": 0.339068 |
|
}, |
|
{ |
|
"acc": 0.85693264, |
|
"epoch": 1.7708333333333335, |
|
"grad_norm": 4.368985948399491, |
|
"learning_rate": 8.44525853955398e-06, |
|
"loss": 0.36033721, |
|
"memory(GiB)": 101.73, |
|
"step": 1275, |
|
"train_speed(iter/s)": 0.339157 |
|
}, |
|
{ |
|
"acc": 0.85860252, |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 6.537391483785996, |
|
"learning_rate": 8.431365068524735e-06, |
|
"loss": 0.34542758, |
|
"memory(GiB)": 101.73, |
|
"step": 1280, |
|
"train_speed(iter/s)": 0.339218 |
|
}, |
|
{ |
|
"acc": 0.86011286, |
|
"epoch": 1.7847222222222223, |
|
"grad_norm": 3.6615393374801717, |
|
"learning_rate": 8.417421336838895e-06, |
|
"loss": 0.35445812, |
|
"memory(GiB)": 101.73, |
|
"step": 1285, |
|
"train_speed(iter/s)": 0.339293 |
|
}, |
|
{ |
|
"acc": 0.86690083, |
|
"epoch": 1.7916666666666665, |
|
"grad_norm": 9.494908561151547, |
|
"learning_rate": 8.403427548765964e-06, |
|
"loss": 0.35471287, |
|
"memory(GiB)": 101.73, |
|
"step": 1290, |
|
"train_speed(iter/s)": 0.339365 |
|
}, |
|
{ |
|
"acc": 0.86261654, |
|
"epoch": 1.7986111111111112, |
|
"grad_norm": 4.086474959822119, |
|
"learning_rate": 8.389383909308754e-06, |
|
"loss": 0.36231318, |
|
"memory(GiB)": 101.73, |
|
"step": 1295, |
|
"train_speed(iter/s)": 0.339445 |
|
}, |
|
{ |
|
"acc": 0.86633072, |
|
"epoch": 1.8055555555555556, |
|
"grad_norm": 3.1457008532368587, |
|
"learning_rate": 8.375290624200375e-06, |
|
"loss": 0.33784354, |
|
"memory(GiB)": 101.73, |
|
"step": 1300, |
|
"train_speed(iter/s)": 0.339499 |
|
}, |
|
{ |
|
"acc": 0.85676422, |
|
"epoch": 1.8125, |
|
"grad_norm": 3.0219513188022797, |
|
"learning_rate": 8.361147899901222e-06, |
|
"loss": 0.35047636, |
|
"memory(GiB)": 101.73, |
|
"step": 1305, |
|
"train_speed(iter/s)": 0.339614 |
|
}, |
|
{ |
|
"acc": 0.87211819, |
|
"epoch": 1.8194444444444444, |
|
"grad_norm": 2.162908540556724, |
|
"learning_rate": 8.346955943595957e-06, |
|
"loss": 0.34178371, |
|
"memory(GiB)": 101.73, |
|
"step": 1310, |
|
"train_speed(iter/s)": 0.339668 |
|
}, |
|
{ |
|
"acc": 0.86086178, |
|
"epoch": 1.8263888888888888, |
|
"grad_norm": 5.106198424264787, |
|
"learning_rate": 8.332714963190462e-06, |
|
"loss": 0.36008306, |
|
"memory(GiB)": 101.73, |
|
"step": 1315, |
|
"train_speed(iter/s)": 0.339762 |
|
}, |
|
{ |
|
"acc": 0.86688232, |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 5.20553175585457, |
|
"learning_rate": 8.318425167308806e-06, |
|
"loss": 0.34904175, |
|
"memory(GiB)": 101.73, |
|
"step": 1320, |
|
"train_speed(iter/s)": 0.339817 |
|
}, |
|
{ |
|
"acc": 0.86953564, |
|
"epoch": 1.8402777777777777, |
|
"grad_norm": 1.9169409393919037, |
|
"learning_rate": 8.304086765290184e-06, |
|
"loss": 0.32462049, |
|
"memory(GiB)": 101.73, |
|
"step": 1325, |
|
"train_speed(iter/s)": 0.339894 |
|
}, |
|
{ |
|
"acc": 0.85959816, |
|
"epoch": 1.8472222222222223, |
|
"grad_norm": 2.919120395900884, |
|
"learning_rate": 8.289699967185843e-06, |
|
"loss": 0.34003651, |
|
"memory(GiB)": 101.73, |
|
"step": 1330, |
|
"train_speed(iter/s)": 0.339982 |
|
}, |
|
{ |
|
"acc": 0.86576004, |
|
"epoch": 1.8541666666666665, |
|
"grad_norm": 3.339716130934134, |
|
"learning_rate": 8.27526498375602e-06, |
|
"loss": 0.34242401, |
|
"memory(GiB)": 101.73, |
|
"step": 1335, |
|
"train_speed(iter/s)": 0.340129 |
|
}, |
|
{ |
|
"acc": 0.8572998, |
|
"epoch": 1.8611111111111112, |
|
"grad_norm": 3.4253838339731804, |
|
"learning_rate": 8.260782026466838e-06, |
|
"loss": 0.36056826, |
|
"memory(GiB)": 101.73, |
|
"step": 1340, |
|
"train_speed(iter/s)": 0.340256 |
|
}, |
|
{ |
|
"acc": 0.85873718, |
|
"epoch": 1.8680555555555556, |
|
"grad_norm": 3.329467071510601, |
|
"learning_rate": 8.24625130748722e-06, |
|
"loss": 0.34768081, |
|
"memory(GiB)": 101.73, |
|
"step": 1345, |
|
"train_speed(iter/s)": 0.340386 |
|
}, |
|
{ |
|
"acc": 0.85568542, |
|
"epoch": 1.875, |
|
"grad_norm": 3.4184372446872815, |
|
"learning_rate": 8.231673039685774e-06, |
|
"loss": 0.37388916, |
|
"memory(GiB)": 101.73, |
|
"step": 1350, |
|
"train_speed(iter/s)": 0.340521 |
|
}, |
|
{ |
|
"acc": 0.85642204, |
|
"epoch": 1.8819444444444444, |
|
"grad_norm": 2.461666257667224, |
|
"learning_rate": 8.217047436627683e-06, |
|
"loss": 0.35878954, |
|
"memory(GiB)": 101.73, |
|
"step": 1355, |
|
"train_speed(iter/s)": 0.340639 |
|
}, |
|
{ |
|
"acc": 0.86499329, |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 4.259794208029547, |
|
"learning_rate": 8.20237471257156e-06, |
|
"loss": 0.33968384, |
|
"memory(GiB)": 101.73, |
|
"step": 1360, |
|
"train_speed(iter/s)": 0.340762 |
|
}, |
|
{ |
|
"acc": 0.85926418, |
|
"epoch": 1.8958333333333335, |
|
"grad_norm": 2.3889340744047813, |
|
"learning_rate": 8.18765508246633e-06, |
|
"loss": 0.34691594, |
|
"memory(GiB)": 101.73, |
|
"step": 1365, |
|
"train_speed(iter/s)": 0.340846 |
|
}, |
|
{ |
|
"acc": 0.86262779, |
|
"epoch": 1.9027777777777777, |
|
"grad_norm": 3.6312299823676577, |
|
"learning_rate": 8.172888761948066e-06, |
|
"loss": 0.34867449, |
|
"memory(GiB)": 101.73, |
|
"step": 1370, |
|
"train_speed(iter/s)": 0.340903 |
|
}, |
|
{ |
|
"acc": 0.86736736, |
|
"epoch": 1.9097222222222223, |
|
"grad_norm": 4.0784838300339725, |
|
"learning_rate": 8.158075967336838e-06, |
|
"loss": 0.35361021, |
|
"memory(GiB)": 101.73, |
|
"step": 1375, |
|
"train_speed(iter/s)": 0.340966 |
|
}, |
|
{ |
|
"acc": 0.86585445, |
|
"epoch": 1.9166666666666665, |
|
"grad_norm": 2.758741475011602, |
|
"learning_rate": 8.143216915633535e-06, |
|
"loss": 0.34491754, |
|
"memory(GiB)": 101.73, |
|
"step": 1380, |
|
"train_speed(iter/s)": 0.341008 |
|
}, |
|
{ |
|
"acc": 0.85726538, |
|
"epoch": 1.9236111111111112, |
|
"grad_norm": 3.5673127167589462, |
|
"learning_rate": 8.1283118245167e-06, |
|
"loss": 0.36892872, |
|
"memory(GiB)": 101.73, |
|
"step": 1385, |
|
"train_speed(iter/s)": 0.341079 |
|
}, |
|
{ |
|
"acc": 0.8536458, |
|
"epoch": 1.9305555555555556, |
|
"grad_norm": 3.1994250752022806, |
|
"learning_rate": 8.113360912339326e-06, |
|
"loss": 0.36794338, |
|
"memory(GiB)": 101.73, |
|
"step": 1390, |
|
"train_speed(iter/s)": 0.341193 |
|
}, |
|
{ |
|
"acc": 0.87299528, |
|
"epoch": 1.9375, |
|
"grad_norm": 5.391090918639991, |
|
"learning_rate": 8.09836439812567e-06, |
|
"loss": 0.33371003, |
|
"memory(GiB)": 101.73, |
|
"step": 1395, |
|
"train_speed(iter/s)": 0.341264 |
|
}, |
|
{ |
|
"acc": 0.86591825, |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 3.3763410094981285, |
|
"learning_rate": 8.083322501568032e-06, |
|
"loss": 0.34277546, |
|
"memory(GiB)": 101.73, |
|
"step": 1400, |
|
"train_speed(iter/s)": 0.341332 |
|
}, |
|
{ |
|
"acc": 0.86980515, |
|
"epoch": 1.9513888888888888, |
|
"grad_norm": 2.7363409438146022, |
|
"learning_rate": 8.06823544302355e-06, |
|
"loss": 0.32296598, |
|
"memory(GiB)": 101.73, |
|
"step": 1405, |
|
"train_speed(iter/s)": 0.341398 |
|
}, |
|
{ |
|
"acc": 0.86319895, |
|
"epoch": 1.9583333333333335, |
|
"grad_norm": 3.0147390689828484, |
|
"learning_rate": 8.053103443510962e-06, |
|
"loss": 0.33763702, |
|
"memory(GiB)": 101.73, |
|
"step": 1410, |
|
"train_speed(iter/s)": 0.341474 |
|
}, |
|
{ |
|
"acc": 0.8724597, |
|
"epoch": 1.9652777777777777, |
|
"grad_norm": 3.6283792947150166, |
|
"learning_rate": 8.037926724707367e-06, |
|
"loss": 0.32401137, |
|
"memory(GiB)": 101.73, |
|
"step": 1415, |
|
"train_speed(iter/s)": 0.341572 |
|
}, |
|
{ |
|
"acc": 0.87336702, |
|
"epoch": 1.9722222222222223, |
|
"grad_norm": 3.231102866648495, |
|
"learning_rate": 8.022705508944994e-06, |
|
"loss": 0.32258878, |
|
"memory(GiB)": 101.73, |
|
"step": 1420, |
|
"train_speed(iter/s)": 0.341648 |
|
}, |
|
{ |
|
"acc": 0.87259836, |
|
"epoch": 1.9791666666666665, |
|
"grad_norm": 4.190611839214796, |
|
"learning_rate": 8.007440019207919e-06, |
|
"loss": 0.33295143, |
|
"memory(GiB)": 101.73, |
|
"step": 1425, |
|
"train_speed(iter/s)": 0.341692 |
|
}, |
|
{ |
|
"acc": 0.86225185, |
|
"epoch": 1.9861111111111112, |
|
"grad_norm": 3.999782034857841, |
|
"learning_rate": 7.992130479128823e-06, |
|
"loss": 0.33923955, |
|
"memory(GiB)": 101.73, |
|
"step": 1430, |
|
"train_speed(iter/s)": 0.34176 |
|
}, |
|
{ |
|
"acc": 0.88750267, |
|
"epoch": 1.9930555555555556, |
|
"grad_norm": 3.0649487093465306, |
|
"learning_rate": 7.976777112985696e-06, |
|
"loss": 0.29414239, |
|
"memory(GiB)": 101.73, |
|
"step": 1435, |
|
"train_speed(iter/s)": 0.341782 |
|
}, |
|
{ |
|
"acc": 0.86065731, |
|
"epoch": 2.0, |
|
"grad_norm": 4.492541294620969, |
|
"learning_rate": 7.96138014569857e-06, |
|
"loss": 0.35968423, |
|
"memory(GiB)": 101.73, |
|
"step": 1440, |
|
"train_speed(iter/s)": 0.34183 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_acc": 0.8741930856405107, |
|
"eval_loss": 0.32394787669181824, |
|
"eval_runtime": 25.9731, |
|
"eval_samples_per_second": 13.206, |
|
"eval_steps_per_second": 1.656, |
|
"step": 1440 |
|
}, |
|
{ |
|
"acc": 0.87566061, |
|
"epoch": 2.0069444444444446, |
|
"grad_norm": 3.2988239118803357, |
|
"learning_rate": 7.945939802826204e-06, |
|
"loss": 0.33297586, |
|
"memory(GiB)": 101.73, |
|
"step": 1445, |
|
"train_speed(iter/s)": 0.336817 |
|
}, |
|
{ |
|
"acc": 0.87367706, |
|
"epoch": 2.013888888888889, |
|
"grad_norm": 3.9864890899544867, |
|
"learning_rate": 7.930456310562798e-06, |
|
"loss": 0.32236509, |
|
"memory(GiB)": 101.73, |
|
"step": 1450, |
|
"train_speed(iter/s)": 0.336936 |
|
}, |
|
{ |
|
"acc": 0.88408442, |
|
"epoch": 2.0208333333333335, |
|
"grad_norm": 3.142019377538419, |
|
"learning_rate": 7.914929895734667e-06, |
|
"loss": 0.30712235, |
|
"memory(GiB)": 101.73, |
|
"step": 1455, |
|
"train_speed(iter/s)": 0.337046 |
|
}, |
|
{ |
|
"acc": 0.87545471, |
|
"epoch": 2.0277777777777777, |
|
"grad_norm": 21.579465515202426, |
|
"learning_rate": 7.899360785796927e-06, |
|
"loss": 0.31951995, |
|
"memory(GiB)": 101.73, |
|
"step": 1460, |
|
"train_speed(iter/s)": 0.337169 |
|
}, |
|
{ |
|
"acc": 0.869561, |
|
"epoch": 2.0347222222222223, |
|
"grad_norm": 3.2772620999715767, |
|
"learning_rate": 7.883749208830157e-06, |
|
"loss": 0.32944577, |
|
"memory(GiB)": 101.73, |
|
"step": 1465, |
|
"train_speed(iter/s)": 0.337198 |
|
}, |
|
{ |
|
"acc": 0.86731529, |
|
"epoch": 2.0416666666666665, |
|
"grad_norm": 5.482996431081107, |
|
"learning_rate": 7.868095393537055e-06, |
|
"loss": 0.33758581, |
|
"memory(GiB)": 101.73, |
|
"step": 1470, |
|
"train_speed(iter/s)": 0.337249 |
|
}, |
|
{ |
|
"acc": 0.87085705, |
|
"epoch": 2.048611111111111, |
|
"grad_norm": 3.542698342334349, |
|
"learning_rate": 7.852399569239099e-06, |
|
"loss": 0.32252483, |
|
"memory(GiB)": 101.73, |
|
"step": 1475, |
|
"train_speed(iter/s)": 0.337307 |
|
}, |
|
{ |
|
"acc": 0.86159992, |
|
"epoch": 2.0555555555555554, |
|
"grad_norm": 2.853821845059801, |
|
"learning_rate": 7.836661965873173e-06, |
|
"loss": 0.34277472, |
|
"memory(GiB)": 101.73, |
|
"step": 1480, |
|
"train_speed(iter/s)": 0.337372 |
|
}, |
|
{ |
|
"acc": 0.86977177, |
|
"epoch": 2.0625, |
|
"grad_norm": 2.335202080690499, |
|
"learning_rate": 7.820882813988209e-06, |
|
"loss": 0.35118761, |
|
"memory(GiB)": 101.73, |
|
"step": 1485, |
|
"train_speed(iter/s)": 0.337422 |
|
}, |
|
{ |
|
"acc": 0.8722353, |
|
"epoch": 2.0694444444444446, |
|
"grad_norm": 3.1769141565351586, |
|
"learning_rate": 7.805062344741807e-06, |
|
"loss": 0.32959652, |
|
"memory(GiB)": 101.73, |
|
"step": 1490, |
|
"train_speed(iter/s)": 0.337452 |
|
}, |
|
{ |
|
"acc": 0.87156506, |
|
"epoch": 2.076388888888889, |
|
"grad_norm": 5.028143683249058, |
|
"learning_rate": 7.789200789896853e-06, |
|
"loss": 0.33031771, |
|
"memory(GiB)": 101.73, |
|
"step": 1495, |
|
"train_speed(iter/s)": 0.337511 |
|
}, |
|
{ |
|
"acc": 0.86954565, |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 3.7873337590539076, |
|
"learning_rate": 7.773298381818106e-06, |
|
"loss": 0.32032595, |
|
"memory(GiB)": 101.73, |
|
"step": 1500, |
|
"train_speed(iter/s)": 0.337583 |
|
}, |
|
{ |
|
"acc": 0.87048397, |
|
"epoch": 2.0902777777777777, |
|
"grad_norm": 2.9980547146112047, |
|
"learning_rate": 7.757355353468819e-06, |
|
"loss": 0.32236178, |
|
"memory(GiB)": 101.73, |
|
"step": 1505, |
|
"train_speed(iter/s)": 0.337636 |
|
}, |
|
{ |
|
"acc": 0.85826788, |
|
"epoch": 2.0972222222222223, |
|
"grad_norm": 4.2342096618271166, |
|
"learning_rate": 7.74137193840731e-06, |
|
"loss": 0.33629158, |
|
"memory(GiB)": 101.73, |
|
"step": 1510, |
|
"train_speed(iter/s)": 0.337623 |
|
}, |
|
{ |
|
"acc": 0.86459484, |
|
"epoch": 2.1041666666666665, |
|
"grad_norm": 3.477001853963199, |
|
"learning_rate": 7.725348370783541e-06, |
|
"loss": 0.33757076, |
|
"memory(GiB)": 101.73, |
|
"step": 1515, |
|
"train_speed(iter/s)": 0.337658 |
|
}, |
|
{ |
|
"acc": 0.86884823, |
|
"epoch": 2.111111111111111, |
|
"grad_norm": 4.148406829850161, |
|
"learning_rate": 7.709284885335696e-06, |
|
"loss": 0.34878542, |
|
"memory(GiB)": 101.73, |
|
"step": 1520, |
|
"train_speed(iter/s)": 0.337642 |
|
}, |
|
{ |
|
"acc": 0.88847275, |
|
"epoch": 2.1180555555555554, |
|
"grad_norm": 5.21167919519557, |
|
"learning_rate": 7.693181717386736e-06, |
|
"loss": 0.31671143, |
|
"memory(GiB)": 101.73, |
|
"step": 1525, |
|
"train_speed(iter/s)": 0.33774 |
|
}, |
|
{ |
|
"acc": 0.86802654, |
|
"epoch": 2.125, |
|
"grad_norm": 6.351118021037492, |
|
"learning_rate": 7.677039102840951e-06, |
|
"loss": 0.34130049, |
|
"memory(GiB)": 101.73, |
|
"step": 1530, |
|
"train_speed(iter/s)": 0.337771 |
|
}, |
|
{ |
|
"acc": 0.87035189, |
|
"epoch": 2.1319444444444446, |
|
"grad_norm": 3.1462563267386203, |
|
"learning_rate": 7.66085727818051e-06, |
|
"loss": 0.32847002, |
|
"memory(GiB)": 101.73, |
|
"step": 1535, |
|
"train_speed(iter/s)": 0.337739 |
|
}, |
|
{ |
|
"acc": 0.87579575, |
|
"epoch": 2.138888888888889, |
|
"grad_norm": 7.036404367525274, |
|
"learning_rate": 7.644636480461992e-06, |
|
"loss": 0.33579338, |
|
"memory(GiB)": 101.73, |
|
"step": 1540, |
|
"train_speed(iter/s)": 0.337808 |
|
}, |
|
{ |
|
"acc": 0.86966209, |
|
"epoch": 2.1458333333333335, |
|
"grad_norm": 2.8028767779169232, |
|
"learning_rate": 7.62837694731291e-06, |
|
"loss": 0.34096689, |
|
"memory(GiB)": 101.73, |
|
"step": 1545, |
|
"train_speed(iter/s)": 0.337722 |
|
}, |
|
{ |
|
"acc": 0.86123219, |
|
"epoch": 2.1527777777777777, |
|
"grad_norm": 4.916942622404778, |
|
"learning_rate": 7.612078916928237e-06, |
|
"loss": 0.34751384, |
|
"memory(GiB)": 101.73, |
|
"step": 1550, |
|
"train_speed(iter/s)": 0.337803 |
|
}, |
|
{ |
|
"acc": 0.87225456, |
|
"epoch": 2.1597222222222223, |
|
"grad_norm": 3.0874061367866665, |
|
"learning_rate": 7.595742628066913e-06, |
|
"loss": 0.31606097, |
|
"memory(GiB)": 101.73, |
|
"step": 1555, |
|
"train_speed(iter/s)": 0.337863 |
|
}, |
|
{ |
|
"acc": 0.8683342, |
|
"epoch": 2.1666666666666665, |
|
"grad_norm": 2.7476245391903102, |
|
"learning_rate": 7.579368320048353e-06, |
|
"loss": 0.33775635, |
|
"memory(GiB)": 101.73, |
|
"step": 1560, |
|
"train_speed(iter/s)": 0.337934 |
|
}, |
|
{ |
|
"acc": 0.87580605, |
|
"epoch": 2.173611111111111, |
|
"grad_norm": 3.781738855028497, |
|
"learning_rate": 7.562956232748927e-06, |
|
"loss": 0.31923892, |
|
"memory(GiB)": 101.73, |
|
"step": 1565, |
|
"train_speed(iter/s)": 0.338011 |
|
}, |
|
{ |
|
"acc": 0.87004614, |
|
"epoch": 2.1805555555555554, |
|
"grad_norm": 3.83368998118028, |
|
"learning_rate": 7.5465066065984585e-06, |
|
"loss": 0.32810178, |
|
"memory(GiB)": 101.73, |
|
"step": 1570, |
|
"train_speed(iter/s)": 0.338074 |
|
}, |
|
{ |
|
"acc": 0.872332, |
|
"epoch": 2.1875, |
|
"grad_norm": 5.221738940753842, |
|
"learning_rate": 7.530019682576701e-06, |
|
"loss": 0.35446784, |
|
"memory(GiB)": 101.73, |
|
"step": 1575, |
|
"train_speed(iter/s)": 0.33796 |
|
}, |
|
{ |
|
"acc": 0.87468405, |
|
"epoch": 2.1944444444444446, |
|
"grad_norm": 3.7537602299326096, |
|
"learning_rate": 7.5134957022098e-06, |
|
"loss": 0.33260701, |
|
"memory(GiB)": 101.73, |
|
"step": 1580, |
|
"train_speed(iter/s)": 0.33798 |
|
}, |
|
{ |
|
"acc": 0.88080168, |
|
"epoch": 2.201388888888889, |
|
"grad_norm": 3.2382032822743336, |
|
"learning_rate": 7.496934907566764e-06, |
|
"loss": 0.30253038, |
|
"memory(GiB)": 101.73, |
|
"step": 1585, |
|
"train_speed(iter/s)": 0.338059 |
|
}, |
|
{ |
|
"acc": 0.87064123, |
|
"epoch": 2.2083333333333335, |
|
"grad_norm": 2.957663451463655, |
|
"learning_rate": 7.480337541255917e-06, |
|
"loss": 0.33216987, |
|
"memory(GiB)": 101.73, |
|
"step": 1590, |
|
"train_speed(iter/s)": 0.338127 |
|
}, |
|
{ |
|
"acc": 0.87469072, |
|
"epoch": 2.2152777777777777, |
|
"grad_norm": 2.9386365599515294, |
|
"learning_rate": 7.463703846421336e-06, |
|
"loss": 0.32307091, |
|
"memory(GiB)": 101.73, |
|
"step": 1595, |
|
"train_speed(iter/s)": 0.338151 |
|
}, |
|
{ |
|
"acc": 0.86811104, |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 2.9834997972872155, |
|
"learning_rate": 7.447034066739297e-06, |
|
"loss": 0.33986754, |
|
"memory(GiB)": 101.73, |
|
"step": 1600, |
|
"train_speed(iter/s)": 0.338221 |
|
}, |
|
{ |
|
"acc": 0.87765865, |
|
"epoch": 2.2291666666666665, |
|
"grad_norm": 5.244667526922747, |
|
"learning_rate": 7.430328446414703e-06, |
|
"loss": 0.32832396, |
|
"memory(GiB)": 101.73, |
|
"step": 1605, |
|
"train_speed(iter/s)": 0.338293 |
|
}, |
|
{ |
|
"acc": 0.87501507, |
|
"epoch": 2.236111111111111, |
|
"grad_norm": 3.484787576558788, |
|
"learning_rate": 7.413587230177507e-06, |
|
"loss": 0.32662356, |
|
"memory(GiB)": 101.73, |
|
"step": 1610, |
|
"train_speed(iter/s)": 0.338366 |
|
}, |
|
{ |
|
"acc": 0.86249599, |
|
"epoch": 2.2430555555555554, |
|
"grad_norm": 3.004746548822643, |
|
"learning_rate": 7.396810663279127e-06, |
|
"loss": 0.32253542, |
|
"memory(GiB)": 101.73, |
|
"step": 1615, |
|
"train_speed(iter/s)": 0.338412 |
|
}, |
|
{ |
|
"acc": 0.87308559, |
|
"epoch": 2.25, |
|
"grad_norm": 7.1702579448431845, |
|
"learning_rate": 7.3799989914888506e-06, |
|
"loss": 0.33047514, |
|
"memory(GiB)": 101.73, |
|
"step": 1620, |
|
"train_speed(iter/s)": 0.338494 |
|
}, |
|
{ |
|
"acc": 0.87881336, |
|
"epoch": 2.2569444444444446, |
|
"grad_norm": 6.139283863744863, |
|
"learning_rate": 7.363152461090238e-06, |
|
"loss": 0.32465723, |
|
"memory(GiB)": 101.73, |
|
"step": 1625, |
|
"train_speed(iter/s)": 0.338549 |
|
}, |
|
{ |
|
"acc": 0.8805707, |
|
"epoch": 2.263888888888889, |
|
"grad_norm": 4.84624185818044, |
|
"learning_rate": 7.346271318877514e-06, |
|
"loss": 0.28650289, |
|
"memory(GiB)": 101.73, |
|
"step": 1630, |
|
"train_speed(iter/s)": 0.338616 |
|
}, |
|
{ |
|
"acc": 0.87669907, |
|
"epoch": 2.2708333333333335, |
|
"grad_norm": 4.041825405450047, |
|
"learning_rate": 7.329355812151946e-06, |
|
"loss": 0.30587997, |
|
"memory(GiB)": 101.73, |
|
"step": 1635, |
|
"train_speed(iter/s)": 0.33868 |
|
}, |
|
{ |
|
"acc": 0.88121891, |
|
"epoch": 2.2777777777777777, |
|
"grad_norm": 28.99476820699483, |
|
"learning_rate": 7.312406188718233e-06, |
|
"loss": 0.30925446, |
|
"memory(GiB)": 101.73, |
|
"step": 1640, |
|
"train_speed(iter/s)": 0.338738 |
|
}, |
|
{ |
|
"acc": 0.87650862, |
|
"epoch": 2.2847222222222223, |
|
"grad_norm": 2.331146023060369, |
|
"learning_rate": 7.295422696880864e-06, |
|
"loss": 0.32519441, |
|
"memory(GiB)": 101.73, |
|
"step": 1645, |
|
"train_speed(iter/s)": 0.338753 |
|
}, |
|
{ |
|
"acc": 0.8740799, |
|
"epoch": 2.2916666666666665, |
|
"grad_norm": 2.552893511565151, |
|
"learning_rate": 7.2784055854404875e-06, |
|
"loss": 0.33000691, |
|
"memory(GiB)": 101.73, |
|
"step": 1650, |
|
"train_speed(iter/s)": 0.338754 |
|
}, |
|
{ |
|
"acc": 0.86075459, |
|
"epoch": 2.298611111111111, |
|
"grad_norm": 3.661826860906401, |
|
"learning_rate": 7.261355103690264e-06, |
|
"loss": 0.33271484, |
|
"memory(GiB)": 101.73, |
|
"step": 1655, |
|
"train_speed(iter/s)": 0.33882 |
|
}, |
|
{ |
|
"acc": 0.87411423, |
|
"epoch": 2.3055555555555554, |
|
"grad_norm": 2.840348895811668, |
|
"learning_rate": 7.244271501412212e-06, |
|
"loss": 0.32931597, |
|
"memory(GiB)": 101.73, |
|
"step": 1660, |
|
"train_speed(iter/s)": 0.3388 |
|
}, |
|
{ |
|
"acc": 0.87788906, |
|
"epoch": 2.3125, |
|
"grad_norm": 2.8561468198714377, |
|
"learning_rate": 7.227155028873552e-06, |
|
"loss": 0.32008338, |
|
"memory(GiB)": 101.73, |
|
"step": 1665, |
|
"train_speed(iter/s)": 0.33883 |
|
}, |
|
{ |
|
"acc": 0.85829659, |
|
"epoch": 2.3194444444444446, |
|
"grad_norm": 4.015346543317703, |
|
"learning_rate": 7.210005936823042e-06, |
|
"loss": 0.35633571, |
|
"memory(GiB)": 101.73, |
|
"step": 1670, |
|
"train_speed(iter/s)": 0.338871 |
|
}, |
|
{ |
|
"acc": 0.87775173, |
|
"epoch": 2.326388888888889, |
|
"grad_norm": 2.037276521515456, |
|
"learning_rate": 7.1928244764873025e-06, |
|
"loss": 0.33058481, |
|
"memory(GiB)": 101.73, |
|
"step": 1675, |
|
"train_speed(iter/s)": 0.338868 |
|
}, |
|
{ |
|
"acc": 0.87097912, |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 3.4288232154502065, |
|
"learning_rate": 7.175610899567126e-06, |
|
"loss": 0.31878319, |
|
"memory(GiB)": 101.73, |
|
"step": 1680, |
|
"train_speed(iter/s)": 0.338923 |
|
}, |
|
{ |
|
"acc": 0.87697048, |
|
"epoch": 2.3402777777777777, |
|
"grad_norm": 2.853430207452135, |
|
"learning_rate": 7.158365458233809e-06, |
|
"loss": 0.31011829, |
|
"memory(GiB)": 101.73, |
|
"step": 1685, |
|
"train_speed(iter/s)": 0.338967 |
|
}, |
|
{ |
|
"acc": 0.87737989, |
|
"epoch": 2.3472222222222223, |
|
"grad_norm": 3.7194790748849873, |
|
"learning_rate": 7.14108840512544e-06, |
|
"loss": 0.3310442, |
|
"memory(GiB)": 101.73, |
|
"step": 1690, |
|
"train_speed(iter/s)": 0.338878 |
|
}, |
|
{ |
|
"acc": 0.87455406, |
|
"epoch": 2.3541666666666665, |
|
"grad_norm": 3.266143323356604, |
|
"learning_rate": 7.1237799933432136e-06, |
|
"loss": 0.31600342, |
|
"memory(GiB)": 101.73, |
|
"step": 1695, |
|
"train_speed(iter/s)": 0.338867 |
|
}, |
|
{ |
|
"acc": 0.87975502, |
|
"epoch": 2.361111111111111, |
|
"grad_norm": 3.1176281977267815, |
|
"learning_rate": 7.10644047644771e-06, |
|
"loss": 0.31154928, |
|
"memory(GiB)": 101.73, |
|
"step": 1700, |
|
"train_speed(iter/s)": 0.3389 |
|
}, |
|
{ |
|
"acc": 0.87202072, |
|
"epoch": 2.3680555555555554, |
|
"grad_norm": 3.474595062226248, |
|
"learning_rate": 7.089070108455184e-06, |
|
"loss": 0.31377418, |
|
"memory(GiB)": 101.73, |
|
"step": 1705, |
|
"train_speed(iter/s)": 0.33847 |
|
}, |
|
{ |
|
"acc": 0.8784668, |
|
"epoch": 2.375, |
|
"grad_norm": 2.7456142238340653, |
|
"learning_rate": 7.071669143833848e-06, |
|
"loss": 0.31594782, |
|
"memory(GiB)": 101.73, |
|
"step": 1710, |
|
"train_speed(iter/s)": 0.338516 |
|
}, |
|
{ |
|
"acc": 0.87724466, |
|
"epoch": 2.3819444444444446, |
|
"grad_norm": 2.049751609642953, |
|
"learning_rate": 7.054237837500145e-06, |
|
"loss": 0.30634799, |
|
"memory(GiB)": 101.73, |
|
"step": 1715, |
|
"train_speed(iter/s)": 0.338499 |
|
}, |
|
{ |
|
"acc": 0.86771536, |
|
"epoch": 2.388888888888889, |
|
"grad_norm": 3.4852538358109664, |
|
"learning_rate": 7.036776444815005e-06, |
|
"loss": 0.33399673, |
|
"memory(GiB)": 101.73, |
|
"step": 1720, |
|
"train_speed(iter/s)": 0.338497 |
|
}, |
|
{ |
|
"acc": 0.8677083, |
|
"epoch": 2.3958333333333335, |
|
"grad_norm": 5.7536631363091, |
|
"learning_rate": 7.019285221580112e-06, |
|
"loss": 0.34250536, |
|
"memory(GiB)": 101.73, |
|
"step": 1725, |
|
"train_speed(iter/s)": 0.338477 |
|
}, |
|
{ |
|
"acc": 0.87606449, |
|
"epoch": 2.4027777777777777, |
|
"grad_norm": 3.887814129103067, |
|
"learning_rate": 7.001764424034153e-06, |
|
"loss": 0.32155147, |
|
"memory(GiB)": 101.73, |
|
"step": 1730, |
|
"train_speed(iter/s)": 0.338492 |
|
}, |
|
{ |
|
"acc": 0.87954998, |
|
"epoch": 2.4097222222222223, |
|
"grad_norm": 6.759207980891451, |
|
"learning_rate": 6.984214308849067e-06, |
|
"loss": 0.31039286, |
|
"memory(GiB)": 101.73, |
|
"step": 1735, |
|
"train_speed(iter/s)": 0.338528 |
|
}, |
|
{ |
|
"acc": 0.87517338, |
|
"epoch": 2.4166666666666665, |
|
"grad_norm": 5.138693499418354, |
|
"learning_rate": 6.966635133126286e-06, |
|
"loss": 0.32624524, |
|
"memory(GiB)": 101.73, |
|
"step": 1740, |
|
"train_speed(iter/s)": 0.33861 |
|
}, |
|
{ |
|
"acc": 0.88268986, |
|
"epoch": 2.423611111111111, |
|
"grad_norm": 3.522173712318652, |
|
"learning_rate": 6.94902715439296e-06, |
|
"loss": 0.30495658, |
|
"memory(GiB)": 101.73, |
|
"step": 1745, |
|
"train_speed(iter/s)": 0.338647 |
|
}, |
|
{ |
|
"acc": 0.87460957, |
|
"epoch": 2.4305555555555554, |
|
"grad_norm": 3.5422026180142496, |
|
"learning_rate": 6.9313906305981945e-06, |
|
"loss": 0.30730667, |
|
"memory(GiB)": 101.73, |
|
"step": 1750, |
|
"train_speed(iter/s)": 0.338681 |
|
}, |
|
{ |
|
"acc": 0.8688343, |
|
"epoch": 2.4375, |
|
"grad_norm": 4.479475459465593, |
|
"learning_rate": 6.913725820109267e-06, |
|
"loss": 0.33697248, |
|
"memory(GiB)": 101.73, |
|
"step": 1755, |
|
"train_speed(iter/s)": 0.33868 |
|
}, |
|
{ |
|
"acc": 0.87335644, |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 3.2070844419147213, |
|
"learning_rate": 6.896032981707842e-06, |
|
"loss": 0.31748641, |
|
"memory(GiB)": 101.73, |
|
"step": 1760, |
|
"train_speed(iter/s)": 0.33873 |
|
}, |
|
{ |
|
"acc": 0.87981138, |
|
"epoch": 2.451388888888889, |
|
"grad_norm": 4.026589843097103, |
|
"learning_rate": 6.878312374586182e-06, |
|
"loss": 0.31501851, |
|
"memory(GiB)": 101.73, |
|
"step": 1765, |
|
"train_speed(iter/s)": 0.338777 |
|
}, |
|
{ |
|
"acc": 0.87052975, |
|
"epoch": 2.4583333333333335, |
|
"grad_norm": 4.243492199155926, |
|
"learning_rate": 6.860564258343344e-06, |
|
"loss": 0.32996852, |
|
"memory(GiB)": 101.73, |
|
"step": 1770, |
|
"train_speed(iter/s)": 0.338812 |
|
}, |
|
{ |
|
"acc": 0.88656845, |
|
"epoch": 2.4652777777777777, |
|
"grad_norm": 6.009909931782156, |
|
"learning_rate": 6.842788892981389e-06, |
|
"loss": 0.29692478, |
|
"memory(GiB)": 101.73, |
|
"step": 1775, |
|
"train_speed(iter/s)": 0.33886 |
|
}, |
|
{ |
|
"acc": 0.87949772, |
|
"epoch": 2.4722222222222223, |
|
"grad_norm": 2.486941299073106, |
|
"learning_rate": 6.82498653890156e-06, |
|
"loss": 0.30589669, |
|
"memory(GiB)": 101.73, |
|
"step": 1780, |
|
"train_speed(iter/s)": 0.338925 |
|
}, |
|
{ |
|
"acc": 0.88737869, |
|
"epoch": 2.4791666666666665, |
|
"grad_norm": 2.630062639364896, |
|
"learning_rate": 6.807157456900474e-06, |
|
"loss": 0.2931669, |
|
"memory(GiB)": 101.73, |
|
"step": 1785, |
|
"train_speed(iter/s)": 0.33891 |
|
}, |
|
{ |
|
"acc": 0.88413935, |
|
"epoch": 2.486111111111111, |
|
"grad_norm": 3.2978337938073405, |
|
"learning_rate": 6.7893019081663015e-06, |
|
"loss": 0.2928961, |
|
"memory(GiB)": 101.73, |
|
"step": 1790, |
|
"train_speed(iter/s)": 0.338994 |
|
}, |
|
{ |
|
"acc": 0.87757177, |
|
"epoch": 2.4930555555555554, |
|
"grad_norm": 2.0263121641711304, |
|
"learning_rate": 6.77142015427494e-06, |
|
"loss": 0.30921671, |
|
"memory(GiB)": 101.73, |
|
"step": 1795, |
|
"train_speed(iter/s)": 0.339122 |
|
}, |
|
{ |
|
"acc": 0.87810173, |
|
"epoch": 2.5, |
|
"grad_norm": 4.3757059206609625, |
|
"learning_rate": 6.753512457186176e-06, |
|
"loss": 0.30782709, |
|
"memory(GiB)": 101.73, |
|
"step": 1800, |
|
"train_speed(iter/s)": 0.339211 |
|
}, |
|
{ |
|
"acc": 0.87414551, |
|
"epoch": 2.5069444444444446, |
|
"grad_norm": 2.2068974670317694, |
|
"learning_rate": 6.735579079239856e-06, |
|
"loss": 0.31782236, |
|
"memory(GiB)": 101.73, |
|
"step": 1805, |
|
"train_speed(iter/s)": 0.33937 |
|
}, |
|
{ |
|
"acc": 0.8663393, |
|
"epoch": 2.513888888888889, |
|
"grad_norm": 3.9925548505742894, |
|
"learning_rate": 6.717620283152043e-06, |
|
"loss": 0.32476821, |
|
"memory(GiB)": 101.73, |
|
"step": 1810, |
|
"train_speed(iter/s)": 0.339424 |
|
}, |
|
{ |
|
"acc": 0.8749526, |
|
"epoch": 2.5208333333333335, |
|
"grad_norm": 4.0296739697695365, |
|
"learning_rate": 6.699636332011156e-06, |
|
"loss": 0.2988498, |
|
"memory(GiB)": 101.73, |
|
"step": 1815, |
|
"train_speed(iter/s)": 0.339512 |
|
}, |
|
{ |
|
"acc": 0.88200283, |
|
"epoch": 2.5277777777777777, |
|
"grad_norm": 4.4011471119110395, |
|
"learning_rate": 6.681627489274131e-06, |
|
"loss": 0.2915164, |
|
"memory(GiB)": 101.73, |
|
"step": 1820, |
|
"train_speed(iter/s)": 0.339549 |
|
}, |
|
{ |
|
"acc": 0.8799408, |
|
"epoch": 2.5347222222222223, |
|
"grad_norm": 3.0590205668391266, |
|
"learning_rate": 6.663594018762553e-06, |
|
"loss": 0.30144064, |
|
"memory(GiB)": 101.73, |
|
"step": 1825, |
|
"train_speed(iter/s)": 0.339641 |
|
}, |
|
{ |
|
"acc": 0.88366776, |
|
"epoch": 2.5416666666666665, |
|
"grad_norm": 3.997001096827074, |
|
"learning_rate": 6.645536184658794e-06, |
|
"loss": 0.29560423, |
|
"memory(GiB)": 101.73, |
|
"step": 1830, |
|
"train_speed(iter/s)": 0.339716 |
|
}, |
|
{ |
|
"acc": 0.87788258, |
|
"epoch": 2.548611111111111, |
|
"grad_norm": 3.4793648553253167, |
|
"learning_rate": 6.627454251502139e-06, |
|
"loss": 0.3051878, |
|
"memory(GiB)": 101.73, |
|
"step": 1835, |
|
"train_speed(iter/s)": 0.339811 |
|
}, |
|
{ |
|
"acc": 0.86607494, |
|
"epoch": 2.5555555555555554, |
|
"grad_norm": 2.963649888950391, |
|
"learning_rate": 6.609348484184916e-06, |
|
"loss": 0.32486575, |
|
"memory(GiB)": 101.73, |
|
"step": 1840, |
|
"train_speed(iter/s)": 0.339872 |
|
}, |
|
{ |
|
"acc": 0.88934765, |
|
"epoch": 2.5625, |
|
"grad_norm": 3.0696594994479844, |
|
"learning_rate": 6.591219147948616e-06, |
|
"loss": 0.29468005, |
|
"memory(GiB)": 101.73, |
|
"step": 1845, |
|
"train_speed(iter/s)": 0.339887 |
|
}, |
|
{ |
|
"acc": 0.88128185, |
|
"epoch": 2.5694444444444446, |
|
"grad_norm": 2.8091039075490536, |
|
"learning_rate": 6.573066508379994e-06, |
|
"loss": 0.30400395, |
|
"memory(GiB)": 101.73, |
|
"step": 1850, |
|
"train_speed(iter/s)": 0.339965 |
|
}, |
|
{ |
|
"acc": 0.88058329, |
|
"epoch": 2.576388888888889, |
|
"grad_norm": 4.0255629531145045, |
|
"learning_rate": 6.554890831407199e-06, |
|
"loss": 0.31955268, |
|
"memory(GiB)": 101.73, |
|
"step": 1855, |
|
"train_speed(iter/s)": 0.340015 |
|
}, |
|
{ |
|
"acc": 0.88047981, |
|
"epoch": 2.5833333333333335, |
|
"grad_norm": 2.4218935283386487, |
|
"learning_rate": 6.536692383295863e-06, |
|
"loss": 0.3094146, |
|
"memory(GiB)": 101.73, |
|
"step": 1860, |
|
"train_speed(iter/s)": 0.340104 |
|
}, |
|
{ |
|
"acc": 0.87942057, |
|
"epoch": 2.5902777777777777, |
|
"grad_norm": 3.7938712045310554, |
|
"learning_rate": 6.518471430645206e-06, |
|
"loss": 0.33601379, |
|
"memory(GiB)": 101.73, |
|
"step": 1865, |
|
"train_speed(iter/s)": 0.340142 |
|
}, |
|
{ |
|
"acc": 0.87505875, |
|
"epoch": 2.5972222222222223, |
|
"grad_norm": 4.029822102213215, |
|
"learning_rate": 6.50022824038413e-06, |
|
"loss": 0.30280771, |
|
"memory(GiB)": 101.73, |
|
"step": 1870, |
|
"train_speed(iter/s)": 0.340218 |
|
}, |
|
{ |
|
"acc": 0.88756218, |
|
"epoch": 2.6041666666666665, |
|
"grad_norm": 2.6352853592698313, |
|
"learning_rate": 6.481963079767307e-06, |
|
"loss": 0.30299997, |
|
"memory(GiB)": 101.73, |
|
"step": 1875, |
|
"train_speed(iter/s)": 0.340247 |
|
}, |
|
{ |
|
"acc": 0.87335434, |
|
"epoch": 2.611111111111111, |
|
"grad_norm": 5.405459671127271, |
|
"learning_rate": 6.463676216371266e-06, |
|
"loss": 0.32759867, |
|
"memory(GiB)": 101.73, |
|
"step": 1880, |
|
"train_speed(iter/s)": 0.34017 |
|
}, |
|
{ |
|
"acc": 0.8905899, |
|
"epoch": 2.6180555555555554, |
|
"grad_norm": 4.051931136735315, |
|
"learning_rate": 6.4453679180904725e-06, |
|
"loss": 0.2927712, |
|
"memory(GiB)": 101.73, |
|
"step": 1885, |
|
"train_speed(iter/s)": 0.340225 |
|
}, |
|
{ |
|
"acc": 0.88374195, |
|
"epoch": 2.625, |
|
"grad_norm": 3.045321365849001, |
|
"learning_rate": 6.427038453133403e-06, |
|
"loss": 0.30915735, |
|
"memory(GiB)": 101.73, |
|
"step": 1890, |
|
"train_speed(iter/s)": 0.340313 |
|
}, |
|
{ |
|
"acc": 0.88091192, |
|
"epoch": 2.6319444444444446, |
|
"grad_norm": 5.40816330790365, |
|
"learning_rate": 6.4086880900186135e-06, |
|
"loss": 0.30887535, |
|
"memory(GiB)": 101.73, |
|
"step": 1895, |
|
"train_speed(iter/s)": 0.340392 |
|
}, |
|
{ |
|
"acc": 0.8715085, |
|
"epoch": 2.638888888888889, |
|
"grad_norm": 4.947011374969508, |
|
"learning_rate": 6.3903170975708165e-06, |
|
"loss": 0.33568881, |
|
"memory(GiB)": 101.73, |
|
"step": 1900, |
|
"train_speed(iter/s)": 0.340437 |
|
}, |
|
{ |
|
"acc": 0.88775291, |
|
"epoch": 2.6458333333333335, |
|
"grad_norm": 4.3365471091357, |
|
"learning_rate": 6.371925744916924e-06, |
|
"loss": 0.29443359, |
|
"memory(GiB)": 101.73, |
|
"step": 1905, |
|
"train_speed(iter/s)": 0.340519 |
|
}, |
|
{ |
|
"acc": 0.87607994, |
|
"epoch": 2.6527777777777777, |
|
"grad_norm": 8.098944228234716, |
|
"learning_rate": 6.353514301482126e-06, |
|
"loss": 0.31685441, |
|
"memory(GiB)": 101.73, |
|
"step": 1910, |
|
"train_speed(iter/s)": 0.340593 |
|
}, |
|
{ |
|
"acc": 0.87877178, |
|
"epoch": 2.6597222222222223, |
|
"grad_norm": 2.7987374198103026, |
|
"learning_rate": 6.3350830369859315e-06, |
|
"loss": 0.31020045, |
|
"memory(GiB)": 101.73, |
|
"step": 1915, |
|
"train_speed(iter/s)": 0.340604 |
|
}, |
|
{ |
|
"acc": 0.87018623, |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 4.328884885182, |
|
"learning_rate": 6.316632221438214e-06, |
|
"loss": 0.32079277, |
|
"memory(GiB)": 101.73, |
|
"step": 1920, |
|
"train_speed(iter/s)": 0.340662 |
|
}, |
|
{ |
|
"acc": 0.8835763, |
|
"epoch": 2.673611111111111, |
|
"grad_norm": 2.4783741723058497, |
|
"learning_rate": 6.298162125135268e-06, |
|
"loss": 0.30358009, |
|
"memory(GiB)": 101.73, |
|
"step": 1925, |
|
"train_speed(iter/s)": 0.340706 |
|
}, |
|
{ |
|
"acc": 0.883319, |
|
"epoch": 2.6805555555555554, |
|
"grad_norm": 4.354308492198902, |
|
"learning_rate": 6.279673018655836e-06, |
|
"loss": 0.31000361, |
|
"memory(GiB)": 101.73, |
|
"step": 1930, |
|
"train_speed(iter/s)": 0.340781 |
|
}, |
|
{ |
|
"acc": 0.8759573, |
|
"epoch": 2.6875, |
|
"grad_norm": 9.467690371837232, |
|
"learning_rate": 6.2611651728571564e-06, |
|
"loss": 0.31353772, |
|
"memory(GiB)": 101.73, |
|
"step": 1935, |
|
"train_speed(iter/s)": 0.340841 |
|
}, |
|
{ |
|
"acc": 0.88470545, |
|
"epoch": 2.6944444444444446, |
|
"grad_norm": 3.215878487764247, |
|
"learning_rate": 6.242638858870988e-06, |
|
"loss": 0.28434479, |
|
"memory(GiB)": 101.73, |
|
"step": 1940, |
|
"train_speed(iter/s)": 0.34093 |
|
}, |
|
{ |
|
"acc": 0.87855301, |
|
"epoch": 2.701388888888889, |
|
"grad_norm": 3.3912993182694864, |
|
"learning_rate": 6.224094348099642e-06, |
|
"loss": 0.29945769, |
|
"memory(GiB)": 101.73, |
|
"step": 1945, |
|
"train_speed(iter/s)": 0.341019 |
|
}, |
|
{ |
|
"acc": 0.8707325, |
|
"epoch": 2.7083333333333335, |
|
"grad_norm": 4.58702527797853, |
|
"learning_rate": 6.205531912212003e-06, |
|
"loss": 0.29754431, |
|
"memory(GiB)": 101.73, |
|
"step": 1950, |
|
"train_speed(iter/s)": 0.341102 |
|
}, |
|
{ |
|
"acc": 0.88574533, |
|
"epoch": 2.7152777777777777, |
|
"grad_norm": 3.699483186725238, |
|
"learning_rate": 6.186951823139551e-06, |
|
"loss": 0.30343163, |
|
"memory(GiB)": 101.73, |
|
"step": 1955, |
|
"train_speed(iter/s)": 0.341216 |
|
}, |
|
{ |
|
"acc": 0.87052441, |
|
"epoch": 2.7222222222222223, |
|
"grad_norm": 3.799468816636163, |
|
"learning_rate": 6.168354353072376e-06, |
|
"loss": 0.31728518, |
|
"memory(GiB)": 101.73, |
|
"step": 1960, |
|
"train_speed(iter/s)": 0.341301 |
|
}, |
|
{ |
|
"acc": 0.88283157, |
|
"epoch": 2.7291666666666665, |
|
"grad_norm": 5.022537915160293, |
|
"learning_rate": 6.149739774455192e-06, |
|
"loss": 0.29690897, |
|
"memory(GiB)": 101.73, |
|
"step": 1965, |
|
"train_speed(iter/s)": 0.341336 |
|
}, |
|
{ |
|
"acc": 0.88846045, |
|
"epoch": 2.736111111111111, |
|
"grad_norm": 3.963520423961829, |
|
"learning_rate": 6.131108359983352e-06, |
|
"loss": 0.28321784, |
|
"memory(GiB)": 101.73, |
|
"step": 1970, |
|
"train_speed(iter/s)": 0.341424 |
|
}, |
|
{ |
|
"acc": 0.88815289, |
|
"epoch": 2.7430555555555554, |
|
"grad_norm": 4.477823960200574, |
|
"learning_rate": 6.112460382598838e-06, |
|
"loss": 0.28913431, |
|
"memory(GiB)": 101.73, |
|
"step": 1975, |
|
"train_speed(iter/s)": 0.341507 |
|
}, |
|
{ |
|
"acc": 0.87810936, |
|
"epoch": 2.75, |
|
"grad_norm": 5.001346704079185, |
|
"learning_rate": 6.093796115486277e-06, |
|
"loss": 0.30853729, |
|
"memory(GiB)": 101.73, |
|
"step": 1980, |
|
"train_speed(iter/s)": 0.341576 |
|
}, |
|
{ |
|
"acc": 0.87556248, |
|
"epoch": 2.7569444444444446, |
|
"grad_norm": 2.6486962898058937, |
|
"learning_rate": 6.075115832068929e-06, |
|
"loss": 0.29869452, |
|
"memory(GiB)": 101.73, |
|
"step": 1985, |
|
"train_speed(iter/s)": 0.341652 |
|
}, |
|
{ |
|
"acc": 0.88768387, |
|
"epoch": 2.763888888888889, |
|
"grad_norm": 2.647997997832992, |
|
"learning_rate": 6.056419806004689e-06, |
|
"loss": 0.28908353, |
|
"memory(GiB)": 101.73, |
|
"step": 1990, |
|
"train_speed(iter/s)": 0.341729 |
|
}, |
|
{ |
|
"acc": 0.88060932, |
|
"epoch": 2.7708333333333335, |
|
"grad_norm": 3.279715246610578, |
|
"learning_rate": 6.037708311182076e-06, |
|
"loss": 0.29757085, |
|
"memory(GiB)": 101.73, |
|
"step": 1995, |
|
"train_speed(iter/s)": 0.341816 |
|
}, |
|
{ |
|
"acc": 0.89519711, |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 2.350006963183419, |
|
"learning_rate": 6.018981621716216e-06, |
|
"loss": 0.29250665, |
|
"memory(GiB)": 101.73, |
|
"step": 2000, |
|
"train_speed(iter/s)": 0.341904 |
|
}, |
|
{ |
|
"acc": 0.8793849, |
|
"epoch": 2.7847222222222223, |
|
"grad_norm": 7.413019755920625, |
|
"learning_rate": 6.0002400119448315e-06, |
|
"loss": 0.29689407, |
|
"memory(GiB)": 101.73, |
|
"step": 2005, |
|
"train_speed(iter/s)": 0.341989 |
|
}, |
|
{ |
|
"acc": 0.88535957, |
|
"epoch": 2.7916666666666665, |
|
"grad_norm": 3.0590690331077215, |
|
"learning_rate": 5.981483756424219e-06, |
|
"loss": 0.28531508, |
|
"memory(GiB)": 101.73, |
|
"step": 2010, |
|
"train_speed(iter/s)": 0.342074 |
|
}, |
|
{ |
|
"acc": 0.88893423, |
|
"epoch": 2.798611111111111, |
|
"grad_norm": 6.003641582203187, |
|
"learning_rate": 5.962713129925233e-06, |
|
"loss": 0.29722118, |
|
"memory(GiB)": 101.73, |
|
"step": 2015, |
|
"train_speed(iter/s)": 0.342134 |
|
}, |
|
{ |
|
"acc": 0.8882515, |
|
"epoch": 2.8055555555555554, |
|
"grad_norm": 5.313173888563081, |
|
"learning_rate": 5.943928407429251e-06, |
|
"loss": 0.31478307, |
|
"memory(GiB)": 101.73, |
|
"step": 2020, |
|
"train_speed(iter/s)": 0.342214 |
|
}, |
|
{ |
|
"acc": 0.88190327, |
|
"epoch": 2.8125, |
|
"grad_norm": 4.986780722952902, |
|
"learning_rate": 5.9251298641241565e-06, |
|
"loss": 0.30542126, |
|
"memory(GiB)": 101.73, |
|
"step": 2025, |
|
"train_speed(iter/s)": 0.342286 |
|
}, |
|
{ |
|
"acc": 0.89250832, |
|
"epoch": 2.8194444444444446, |
|
"grad_norm": 5.018536108237801, |
|
"learning_rate": 5.906317775400298e-06, |
|
"loss": 0.26341515, |
|
"memory(GiB)": 101.73, |
|
"step": 2030, |
|
"train_speed(iter/s)": 0.342356 |
|
}, |
|
{ |
|
"acc": 0.87831106, |
|
"epoch": 2.826388888888889, |
|
"grad_norm": 5.146920963485941, |
|
"learning_rate": 5.887492416846459e-06, |
|
"loss": 0.30127568, |
|
"memory(GiB)": 101.73, |
|
"step": 2035, |
|
"train_speed(iter/s)": 0.34243 |
|
}, |
|
{ |
|
"acc": 0.88257599, |
|
"epoch": 2.8333333333333335, |
|
"grad_norm": 3.0687672661039476, |
|
"learning_rate": 5.8686540642458204e-06, |
|
"loss": 0.29559946, |
|
"memory(GiB)": 101.73, |
|
"step": 2040, |
|
"train_speed(iter/s)": 0.3425 |
|
}, |
|
{ |
|
"acc": 0.88266973, |
|
"epoch": 2.8402777777777777, |
|
"grad_norm": 5.535020326656706, |
|
"learning_rate": 5.849802993571917e-06, |
|
"loss": 0.30087221, |
|
"memory(GiB)": 101.73, |
|
"step": 2045, |
|
"train_speed(iter/s)": 0.342532 |
|
}, |
|
{ |
|
"acc": 0.89446363, |
|
"epoch": 2.8472222222222223, |
|
"grad_norm": 4.612098734690172, |
|
"learning_rate": 5.830939480984605e-06, |
|
"loss": 0.27696779, |
|
"memory(GiB)": 101.73, |
|
"step": 2050, |
|
"train_speed(iter/s)": 0.342586 |
|
}, |
|
{ |
|
"acc": 0.87654648, |
|
"epoch": 2.8541666666666665, |
|
"grad_norm": 5.014707434986776, |
|
"learning_rate": 5.812063802826005e-06, |
|
"loss": 0.31633348, |
|
"memory(GiB)": 101.73, |
|
"step": 2055, |
|
"train_speed(iter/s)": 0.342664 |
|
}, |
|
{ |
|
"acc": 0.88131304, |
|
"epoch": 2.861111111111111, |
|
"grad_norm": 3.870152061287398, |
|
"learning_rate": 5.793176235616455e-06, |
|
"loss": 0.32585454, |
|
"memory(GiB)": 101.73, |
|
"step": 2060, |
|
"train_speed(iter/s)": 0.342728 |
|
}, |
|
{ |
|
"acc": 0.88249874, |
|
"epoch": 2.8680555555555554, |
|
"grad_norm": 3.1435056987806993, |
|
"learning_rate": 5.774277056050467e-06, |
|
"loss": 0.29018085, |
|
"memory(GiB)": 101.73, |
|
"step": 2065, |
|
"train_speed(iter/s)": 0.342823 |
|
}, |
|
{ |
|
"acc": 0.88479662, |
|
"epoch": 2.875, |
|
"grad_norm": 5.000934426273714, |
|
"learning_rate": 5.755366540992666e-06, |
|
"loss": 0.30050292, |
|
"memory(GiB)": 101.73, |
|
"step": 2070, |
|
"train_speed(iter/s)": 0.342871 |
|
}, |
|
{ |
|
"acc": 0.88433819, |
|
"epoch": 2.8819444444444446, |
|
"grad_norm": 6.225049436263428, |
|
"learning_rate": 5.736444967473736e-06, |
|
"loss": 0.28958912, |
|
"memory(GiB)": 101.73, |
|
"step": 2075, |
|
"train_speed(iter/s)": 0.342876 |
|
}, |
|
{ |
|
"acc": 0.88738003, |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 5.829606140426472, |
|
"learning_rate": 5.71751261268637e-06, |
|
"loss": 0.28439336, |
|
"memory(GiB)": 101.73, |
|
"step": 2080, |
|
"train_speed(iter/s)": 0.342927 |
|
}, |
|
{ |
|
"acc": 0.87550907, |
|
"epoch": 2.8958333333333335, |
|
"grad_norm": 4.927935596858437, |
|
"learning_rate": 5.698569753981194e-06, |
|
"loss": 0.30479741, |
|
"memory(GiB)": 101.73, |
|
"step": 2085, |
|
"train_speed(iter/s)": 0.342954 |
|
}, |
|
{ |
|
"acc": 0.88370075, |
|
"epoch": 2.9027777777777777, |
|
"grad_norm": 4.982074354249153, |
|
"learning_rate": 5.67961666886271e-06, |
|
"loss": 0.29437392, |
|
"memory(GiB)": 101.73, |
|
"step": 2090, |
|
"train_speed(iter/s)": 0.342977 |
|
}, |
|
{ |
|
"acc": 0.89414291, |
|
"epoch": 2.9097222222222223, |
|
"grad_norm": 2.699118597588968, |
|
"learning_rate": 5.6606536349852425e-06, |
|
"loss": 0.26323729, |
|
"memory(GiB)": 101.73, |
|
"step": 2095, |
|
"train_speed(iter/s)": 0.34301 |
|
}, |
|
{ |
|
"acc": 0.89436359, |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 4.450064476738222, |
|
"learning_rate": 5.64168093014885e-06, |
|
"loss": 0.27156312, |
|
"memory(GiB)": 101.73, |
|
"step": 2100, |
|
"train_speed(iter/s)": 0.343044 |
|
}, |
|
{ |
|
"acc": 0.89274464, |
|
"epoch": 2.923611111111111, |
|
"grad_norm": 2.9247570340950912, |
|
"learning_rate": 5.622698832295274e-06, |
|
"loss": 0.28357964, |
|
"memory(GiB)": 101.73, |
|
"step": 2105, |
|
"train_speed(iter/s)": 0.343072 |
|
}, |
|
{ |
|
"acc": 0.89117432, |
|
"epoch": 2.9305555555555554, |
|
"grad_norm": 3.9442967825811723, |
|
"learning_rate": 5.603707619503858e-06, |
|
"loss": 0.29273071, |
|
"memory(GiB)": 101.73, |
|
"step": 2110, |
|
"train_speed(iter/s)": 0.34312 |
|
}, |
|
{ |
|
"acc": 0.88488503, |
|
"epoch": 2.9375, |
|
"grad_norm": 3.9784644222883245, |
|
"learning_rate": 5.584707569987471e-06, |
|
"loss": 0.27594287, |
|
"memory(GiB)": 101.73, |
|
"step": 2115, |
|
"train_speed(iter/s)": 0.343143 |
|
}, |
|
{ |
|
"acc": 0.88866596, |
|
"epoch": 2.9444444444444446, |
|
"grad_norm": 15.436705597346922, |
|
"learning_rate": 5.565698962088441e-06, |
|
"loss": 0.29711914, |
|
"memory(GiB)": 101.73, |
|
"step": 2120, |
|
"train_speed(iter/s)": 0.343188 |
|
}, |
|
{ |
|
"acc": 0.88177891, |
|
"epoch": 2.951388888888889, |
|
"grad_norm": 5.662244591316013, |
|
"learning_rate": 5.54668207427447e-06, |
|
"loss": 0.29541435, |
|
"memory(GiB)": 101.73, |
|
"step": 2125, |
|
"train_speed(iter/s)": 0.343239 |
|
}, |
|
{ |
|
"acc": 0.89320126, |
|
"epoch": 2.9583333333333335, |
|
"grad_norm": 2.4843357852192427, |
|
"learning_rate": 5.527657185134556e-06, |
|
"loss": 0.28223519, |
|
"memory(GiB)": 101.73, |
|
"step": 2130, |
|
"train_speed(iter/s)": 0.343282 |
|
}, |
|
{ |
|
"acc": 0.89105587, |
|
"epoch": 2.9652777777777777, |
|
"grad_norm": 2.910744869592209, |
|
"learning_rate": 5.508624573374919e-06, |
|
"loss": 0.27124515, |
|
"memory(GiB)": 101.73, |
|
"step": 2135, |
|
"train_speed(iter/s)": 0.343292 |
|
}, |
|
{ |
|
"acc": 0.88511724, |
|
"epoch": 2.9722222222222223, |
|
"grad_norm": 2.73890828638729, |
|
"learning_rate": 5.4895845178149045e-06, |
|
"loss": 0.29908915, |
|
"memory(GiB)": 101.73, |
|
"step": 2140, |
|
"train_speed(iter/s)": 0.343284 |
|
}, |
|
{ |
|
"acc": 0.89417515, |
|
"epoch": 2.9791666666666665, |
|
"grad_norm": 3.936833986713451, |
|
"learning_rate": 5.470537297382911e-06, |
|
"loss": 0.28417382, |
|
"memory(GiB)": 101.73, |
|
"step": 2145, |
|
"train_speed(iter/s)": 0.343315 |
|
}, |
|
{ |
|
"acc": 0.88898296, |
|
"epoch": 2.986111111111111, |
|
"grad_norm": 3.8512956350719354, |
|
"learning_rate": 5.451483191112295e-06, |
|
"loss": 0.28061602, |
|
"memory(GiB)": 101.73, |
|
"step": 2150, |
|
"train_speed(iter/s)": 0.343201 |
|
}, |
|
{ |
|
"acc": 0.89546566, |
|
"epoch": 2.9930555555555554, |
|
"grad_norm": 5.810228058013454, |
|
"learning_rate": 5.432422478137295e-06, |
|
"loss": 0.27272539, |
|
"memory(GiB)": 101.73, |
|
"step": 2155, |
|
"train_speed(iter/s)": 0.343197 |
|
}, |
|
{ |
|
"acc": 0.88720446, |
|
"epoch": 3.0, |
|
"grad_norm": 3.580049072617942, |
|
"learning_rate": 5.413355437688926e-06, |
|
"loss": 0.27387962, |
|
"memory(GiB)": 101.73, |
|
"step": 2160, |
|
"train_speed(iter/s)": 0.343171 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_acc": 0.8911203557595754, |
|
"eval_loss": 0.29212549328804016, |
|
"eval_runtime": 25.6208, |
|
"eval_samples_per_second": 13.388, |
|
"eval_steps_per_second": 1.678, |
|
"step": 2160 |
|
}, |
|
{ |
|
"acc": 0.89589024, |
|
"epoch": 3.0069444444444446, |
|
"grad_norm": 4.637545293434605, |
|
"learning_rate": 5.394282349090905e-06, |
|
"loss": 0.25944438, |
|
"memory(GiB)": 101.73, |
|
"step": 2165, |
|
"train_speed(iter/s)": 0.340007 |
|
}, |
|
{ |
|
"acc": 0.88720493, |
|
"epoch": 3.013888888888889, |
|
"grad_norm": 7.39139114160879, |
|
"learning_rate": 5.375203491755545e-06, |
|
"loss": 0.31616793, |
|
"memory(GiB)": 101.73, |
|
"step": 2170, |
|
"train_speed(iter/s)": 0.340103 |
|
}, |
|
{ |
|
"acc": 0.88987474, |
|
"epoch": 3.0208333333333335, |
|
"grad_norm": 4.064763480331921, |
|
"learning_rate": 5.356119145179673e-06, |
|
"loss": 0.27186308, |
|
"memory(GiB)": 101.73, |
|
"step": 2175, |
|
"train_speed(iter/s)": 0.340144 |
|
}, |
|
{ |
|
"acc": 0.88327274, |
|
"epoch": 3.0277777777777777, |
|
"grad_norm": 3.2940720072242153, |
|
"learning_rate": 5.33702958894053e-06, |
|
"loss": 0.29375, |
|
"memory(GiB)": 101.73, |
|
"step": 2180, |
|
"train_speed(iter/s)": 0.340236 |
|
}, |
|
{ |
|
"acc": 0.89080715, |
|
"epoch": 3.0347222222222223, |
|
"grad_norm": 2.801423990050791, |
|
"learning_rate": 5.317935102691677e-06, |
|
"loss": 0.28847058, |
|
"memory(GiB)": 101.73, |
|
"step": 2185, |
|
"train_speed(iter/s)": 0.340293 |
|
}, |
|
{ |
|
"acc": 0.89707794, |
|
"epoch": 3.0416666666666665, |
|
"grad_norm": 3.3077023232108407, |
|
"learning_rate": 5.298835966158896e-06, |
|
"loss": 0.25797377, |
|
"memory(GiB)": 101.73, |
|
"step": 2190, |
|
"train_speed(iter/s)": 0.340319 |
|
}, |
|
{ |
|
"acc": 0.88909359, |
|
"epoch": 3.048611111111111, |
|
"grad_norm": 3.9604632504058146, |
|
"learning_rate": 5.279732459136095e-06, |
|
"loss": 0.27923727, |
|
"memory(GiB)": 101.73, |
|
"step": 2195, |
|
"train_speed(iter/s)": 0.340391 |
|
}, |
|
{ |
|
"acc": 0.8788662, |
|
"epoch": 3.0555555555555554, |
|
"grad_norm": 3.407305740491057, |
|
"learning_rate": 5.260624861481205e-06, |
|
"loss": 0.29525146, |
|
"memory(GiB)": 101.73, |
|
"step": 2200, |
|
"train_speed(iter/s)": 0.340393 |
|
}, |
|
{ |
|
"acc": 0.88607311, |
|
"epoch": 3.0625, |
|
"grad_norm": 3.4070141014765807, |
|
"learning_rate": 5.241513453112088e-06, |
|
"loss": 0.29450035, |
|
"memory(GiB)": 101.73, |
|
"step": 2205, |
|
"train_speed(iter/s)": 0.340409 |
|
}, |
|
{ |
|
"acc": 0.89028587, |
|
"epoch": 3.0694444444444446, |
|
"grad_norm": 6.119391343010178, |
|
"learning_rate": 5.222398514002424e-06, |
|
"loss": 0.27855229, |
|
"memory(GiB)": 101.73, |
|
"step": 2210, |
|
"train_speed(iter/s)": 0.34041 |
|
}, |
|
{ |
|
"acc": 0.90690746, |
|
"epoch": 3.076388888888889, |
|
"grad_norm": 4.189972170403801, |
|
"learning_rate": 5.203280324177623e-06, |
|
"loss": 0.24123404, |
|
"memory(GiB)": 101.73, |
|
"step": 2215, |
|
"train_speed(iter/s)": 0.340429 |
|
}, |
|
{ |
|
"acc": 0.89223652, |
|
"epoch": 3.0833333333333335, |
|
"grad_norm": 6.1687310886506825, |
|
"learning_rate": 5.184159163710717e-06, |
|
"loss": 0.26408105, |
|
"memory(GiB)": 101.73, |
|
"step": 2220, |
|
"train_speed(iter/s)": 0.340459 |
|
}, |
|
{ |
|
"acc": 0.88885937, |
|
"epoch": 3.0902777777777777, |
|
"grad_norm": 3.6370135452975982, |
|
"learning_rate": 5.16503531271825e-06, |
|
"loss": 0.28215942, |
|
"memory(GiB)": 101.73, |
|
"step": 2225, |
|
"train_speed(iter/s)": 0.340481 |
|
}, |
|
{ |
|
"acc": 0.8864089, |
|
"epoch": 3.0972222222222223, |
|
"grad_norm": 2.6915181888376987, |
|
"learning_rate": 5.145909051356187e-06, |
|
"loss": 0.27783501, |
|
"memory(GiB)": 101.73, |
|
"step": 2230, |
|
"train_speed(iter/s)": 0.340506 |
|
}, |
|
{ |
|
"acc": 0.88523932, |
|
"epoch": 3.1041666666666665, |
|
"grad_norm": 2.6201181743435873, |
|
"learning_rate": 5.1267806598157995e-06, |
|
"loss": 0.26578436, |
|
"memory(GiB)": 101.73, |
|
"step": 2235, |
|
"train_speed(iter/s)": 0.340551 |
|
}, |
|
{ |
|
"acc": 0.88679752, |
|
"epoch": 3.111111111111111, |
|
"grad_norm": 2.3757078771167235, |
|
"learning_rate": 5.1076504183195726e-06, |
|
"loss": 0.2708406, |
|
"memory(GiB)": 101.73, |
|
"step": 2240, |
|
"train_speed(iter/s)": 0.340577 |
|
}, |
|
{ |
|
"acc": 0.89169292, |
|
"epoch": 3.1180555555555554, |
|
"grad_norm": 4.281469899344993, |
|
"learning_rate": 5.088518607117084e-06, |
|
"loss": 0.26542788, |
|
"memory(GiB)": 101.73, |
|
"step": 2245, |
|
"train_speed(iter/s)": 0.340582 |
|
}, |
|
{ |
|
"acc": 0.88440208, |
|
"epoch": 3.125, |
|
"grad_norm": 6.327375242332073, |
|
"learning_rate": 5.0693855064809104e-06, |
|
"loss": 0.28328218, |
|
"memory(GiB)": 101.73, |
|
"step": 2250, |
|
"train_speed(iter/s)": 0.340592 |
|
}, |
|
{ |
|
"acc": 0.88965473, |
|
"epoch": 3.1319444444444446, |
|
"grad_norm": 3.335606378012737, |
|
"learning_rate": 5.050251396702519e-06, |
|
"loss": 0.27447596, |
|
"memory(GiB)": 101.73, |
|
"step": 2255, |
|
"train_speed(iter/s)": 0.340564 |
|
}, |
|
{ |
|
"acc": 0.89447746, |
|
"epoch": 3.138888888888889, |
|
"grad_norm": 3.971413493473237, |
|
"learning_rate": 5.0311165580881585e-06, |
|
"loss": 0.26328266, |
|
"memory(GiB)": 101.73, |
|
"step": 2260, |
|
"train_speed(iter/s)": 0.340575 |
|
}, |
|
{ |
|
"acc": 0.88526878, |
|
"epoch": 3.1458333333333335, |
|
"grad_norm": 3.9913556594484008, |
|
"learning_rate": 5.0119812709547566e-06, |
|
"loss": 0.26721802, |
|
"memory(GiB)": 101.73, |
|
"step": 2265, |
|
"train_speed(iter/s)": 0.340561 |
|
}, |
|
{ |
|
"acc": 0.89905844, |
|
"epoch": 3.1527777777777777, |
|
"grad_norm": 3.6292317674402117, |
|
"learning_rate": 4.9928458156258105e-06, |
|
"loss": 0.24952831, |
|
"memory(GiB)": 101.73, |
|
"step": 2270, |
|
"train_speed(iter/s)": 0.340598 |
|
}, |
|
{ |
|
"acc": 0.90703621, |
|
"epoch": 3.1597222222222223, |
|
"grad_norm": 2.5916058110248397, |
|
"learning_rate": 4.973710472427283e-06, |
|
"loss": 0.24961276, |
|
"memory(GiB)": 101.73, |
|
"step": 2275, |
|
"train_speed(iter/s)": 0.34063 |
|
}, |
|
{ |
|
"acc": 0.8791647, |
|
"epoch": 3.1666666666666665, |
|
"grad_norm": 4.9755713822504175, |
|
"learning_rate": 4.954575521683491e-06, |
|
"loss": 0.29728041, |
|
"memory(GiB)": 101.73, |
|
"step": 2280, |
|
"train_speed(iter/s)": 0.340679 |
|
}, |
|
{ |
|
"acc": 0.89638615, |
|
"epoch": 3.173611111111111, |
|
"grad_norm": 4.765953277092038, |
|
"learning_rate": 4.935441243713005e-06, |
|
"loss": 0.26370394, |
|
"memory(GiB)": 101.73, |
|
"step": 2285, |
|
"train_speed(iter/s)": 0.340579 |
|
}, |
|
{ |
|
"acc": 0.89993305, |
|
"epoch": 3.1805555555555554, |
|
"grad_norm": 4.796855317724758, |
|
"learning_rate": 4.916307918824538e-06, |
|
"loss": 0.25945036, |
|
"memory(GiB)": 101.73, |
|
"step": 2290, |
|
"train_speed(iter/s)": 0.340624 |
|
}, |
|
{ |
|
"acc": 0.89749622, |
|
"epoch": 3.1875, |
|
"grad_norm": 4.077903509861586, |
|
"learning_rate": 4.897175827312842e-06, |
|
"loss": 0.25814972, |
|
"memory(GiB)": 101.73, |
|
"step": 2295, |
|
"train_speed(iter/s)": 0.340634 |
|
}, |
|
{ |
|
"acc": 0.89532013, |
|
"epoch": 3.1944444444444446, |
|
"grad_norm": 2.9930679640780458, |
|
"learning_rate": 4.878045249454601e-06, |
|
"loss": 0.25680308, |
|
"memory(GiB)": 101.73, |
|
"step": 2300, |
|
"train_speed(iter/s)": 0.340629 |
|
}, |
|
{ |
|
"acc": 0.89404106, |
|
"epoch": 3.201388888888889, |
|
"grad_norm": 9.044532042244569, |
|
"learning_rate": 4.858916465504323e-06, |
|
"loss": 0.26445313, |
|
"memory(GiB)": 101.73, |
|
"step": 2305, |
|
"train_speed(iter/s)": 0.340667 |
|
}, |
|
{ |
|
"acc": 0.88623104, |
|
"epoch": 3.2083333333333335, |
|
"grad_norm": 6.440284323833954, |
|
"learning_rate": 4.839789755690235e-06, |
|
"loss": 0.27099028, |
|
"memory(GiB)": 101.73, |
|
"step": 2310, |
|
"train_speed(iter/s)": 0.340689 |
|
}, |
|
{ |
|
"acc": 0.88883505, |
|
"epoch": 3.2152777777777777, |
|
"grad_norm": 4.008447674699076, |
|
"learning_rate": 4.820665400210183e-06, |
|
"loss": 0.26926422, |
|
"memory(GiB)": 101.73, |
|
"step": 2315, |
|
"train_speed(iter/s)": 0.34073 |
|
}, |
|
{ |
|
"acc": 0.89209347, |
|
"epoch": 3.2222222222222223, |
|
"grad_norm": 2.9462752953211084, |
|
"learning_rate": 4.801543679227519e-06, |
|
"loss": 0.28077393, |
|
"memory(GiB)": 101.73, |
|
"step": 2320, |
|
"train_speed(iter/s)": 0.340768 |
|
}, |
|
{ |
|
"acc": 0.90544291, |
|
"epoch": 3.2291666666666665, |
|
"grad_norm": 3.2070127351402573, |
|
"learning_rate": 4.782424872867004e-06, |
|
"loss": 0.25720313, |
|
"memory(GiB)": 101.73, |
|
"step": 2325, |
|
"train_speed(iter/s)": 0.340812 |
|
}, |
|
{ |
|
"acc": 0.89043808, |
|
"epoch": 3.236111111111111, |
|
"grad_norm": 4.963263502548416, |
|
"learning_rate": 4.763309261210697e-06, |
|
"loss": 0.26552734, |
|
"memory(GiB)": 101.73, |
|
"step": 2330, |
|
"train_speed(iter/s)": 0.340869 |
|
}, |
|
{ |
|
"acc": 0.89086895, |
|
"epoch": 3.2430555555555554, |
|
"grad_norm": 5.996678984257977, |
|
"learning_rate": 4.744197124293861e-06, |
|
"loss": 0.29014907, |
|
"memory(GiB)": 101.73, |
|
"step": 2335, |
|
"train_speed(iter/s)": 0.340916 |
|
}, |
|
{ |
|
"acc": 0.88824921, |
|
"epoch": 3.25, |
|
"grad_norm": 4.661978038167385, |
|
"learning_rate": 4.725088742100851e-06, |
|
"loss": 0.27598886, |
|
"memory(GiB)": 101.73, |
|
"step": 2340, |
|
"train_speed(iter/s)": 0.340975 |
|
}, |
|
{ |
|
"acc": 0.90345469, |
|
"epoch": 3.2569444444444446, |
|
"grad_norm": 5.067431282964852, |
|
"learning_rate": 4.70598439456102e-06, |
|
"loss": 0.2427031, |
|
"memory(GiB)": 101.73, |
|
"step": 2345, |
|
"train_speed(iter/s)": 0.341045 |
|
}, |
|
{ |
|
"acc": 0.91135798, |
|
"epoch": 3.263888888888889, |
|
"grad_norm": 4.374947699458002, |
|
"learning_rate": 4.686884361544615e-06, |
|
"loss": 0.24237332, |
|
"memory(GiB)": 101.73, |
|
"step": 2350, |
|
"train_speed(iter/s)": 0.34109 |
|
}, |
|
{ |
|
"acc": 0.88529987, |
|
"epoch": 3.2708333333333335, |
|
"grad_norm": 3.2497593782681626, |
|
"learning_rate": 4.667788922858675e-06, |
|
"loss": 0.27918091, |
|
"memory(GiB)": 101.73, |
|
"step": 2355, |
|
"train_speed(iter/s)": 0.341135 |
|
}, |
|
{ |
|
"acc": 0.8951086, |
|
"epoch": 3.2777777777777777, |
|
"grad_norm": 4.703813669876629, |
|
"learning_rate": 4.648698358242937e-06, |
|
"loss": 0.27369049, |
|
"memory(GiB)": 101.73, |
|
"step": 2360, |
|
"train_speed(iter/s)": 0.341181 |
|
}, |
|
{ |
|
"acc": 0.89912701, |
|
"epoch": 3.2847222222222223, |
|
"grad_norm": 3.0732915304334663, |
|
"learning_rate": 4.629612947365731e-06, |
|
"loss": 0.26150217, |
|
"memory(GiB)": 101.73, |
|
"step": 2365, |
|
"train_speed(iter/s)": 0.341223 |
|
}, |
|
{ |
|
"acc": 0.90068092, |
|
"epoch": 3.2916666666666665, |
|
"grad_norm": 5.055103907548479, |
|
"learning_rate": 4.61053296981989e-06, |
|
"loss": 0.25842612, |
|
"memory(GiB)": 101.73, |
|
"step": 2370, |
|
"train_speed(iter/s)": 0.341244 |
|
}, |
|
{ |
|
"acc": 0.89772367, |
|
"epoch": 3.298611111111111, |
|
"grad_norm": 6.636649309949125, |
|
"learning_rate": 4.591458705118651e-06, |
|
"loss": 0.26917741, |
|
"memory(GiB)": 101.73, |
|
"step": 2375, |
|
"train_speed(iter/s)": 0.341284 |
|
}, |
|
{ |
|
"acc": 0.90206804, |
|
"epoch": 3.3055555555555554, |
|
"grad_norm": 3.898114339474827, |
|
"learning_rate": 4.5723904326915605e-06, |
|
"loss": 0.25737305, |
|
"memory(GiB)": 101.73, |
|
"step": 2380, |
|
"train_speed(iter/s)": 0.341313 |
|
}, |
|
{ |
|
"acc": 0.90095797, |
|
"epoch": 3.3125, |
|
"grad_norm": 7.406618692458744, |
|
"learning_rate": 4.55332843188038e-06, |
|
"loss": 0.24617977, |
|
"memory(GiB)": 101.73, |
|
"step": 2385, |
|
"train_speed(iter/s)": 0.341359 |
|
}, |
|
{ |
|
"acc": 0.90679264, |
|
"epoch": 3.3194444444444446, |
|
"grad_norm": 3.2278717071765546, |
|
"learning_rate": 4.534272981934993e-06, |
|
"loss": 0.26191123, |
|
"memory(GiB)": 101.73, |
|
"step": 2390, |
|
"train_speed(iter/s)": 0.341438 |
|
}, |
|
{ |
|
"acc": 0.89675159, |
|
"epoch": 3.326388888888889, |
|
"grad_norm": 6.232358660573751, |
|
"learning_rate": 4.51522436200932e-06, |
|
"loss": 0.26368232, |
|
"memory(GiB)": 101.73, |
|
"step": 2395, |
|
"train_speed(iter/s)": 0.341492 |
|
}, |
|
{ |
|
"acc": 0.89320259, |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 9.336217090017287, |
|
"learning_rate": 4.4961828511572195e-06, |
|
"loss": 0.26878896, |
|
"memory(GiB)": 101.73, |
|
"step": 2400, |
|
"train_speed(iter/s)": 0.341537 |
|
}, |
|
{ |
|
"acc": 0.90293465, |
|
"epoch": 3.3402777777777777, |
|
"grad_norm": 5.771989254299704, |
|
"learning_rate": 4.477148728328411e-06, |
|
"loss": 0.23618774, |
|
"memory(GiB)": 101.73, |
|
"step": 2405, |
|
"train_speed(iter/s)": 0.341597 |
|
}, |
|
{ |
|
"acc": 0.89542847, |
|
"epoch": 3.3472222222222223, |
|
"grad_norm": 8.890216489036503, |
|
"learning_rate": 4.4581222723643775e-06, |
|
"loss": 0.27369397, |
|
"memory(GiB)": 101.73, |
|
"step": 2410, |
|
"train_speed(iter/s)": 0.341663 |
|
}, |
|
{ |
|
"acc": 0.89897604, |
|
"epoch": 3.3541666666666665, |
|
"grad_norm": 10.209516705903122, |
|
"learning_rate": 4.439103761994287e-06, |
|
"loss": 0.2468132, |
|
"memory(GiB)": 101.73, |
|
"step": 2415, |
|
"train_speed(iter/s)": 0.341703 |
|
}, |
|
{ |
|
"acc": 0.88786516, |
|
"epoch": 3.361111111111111, |
|
"grad_norm": 3.844549530620871, |
|
"learning_rate": 4.420093475830912e-06, |
|
"loss": 0.25787063, |
|
"memory(GiB)": 101.73, |
|
"step": 2420, |
|
"train_speed(iter/s)": 0.341747 |
|
}, |
|
{ |
|
"acc": 0.90001011, |
|
"epoch": 3.3680555555555554, |
|
"grad_norm": 3.6744603092340515, |
|
"learning_rate": 4.401091692366538e-06, |
|
"loss": 0.25619226, |
|
"memory(GiB)": 101.73, |
|
"step": 2425, |
|
"train_speed(iter/s)": 0.341777 |
|
}, |
|
{ |
|
"acc": 0.8973815, |
|
"epoch": 3.375, |
|
"grad_norm": 7.238511447008848, |
|
"learning_rate": 4.382098689968897e-06, |
|
"loss": 0.2451417, |
|
"memory(GiB)": 101.73, |
|
"step": 2430, |
|
"train_speed(iter/s)": 0.341821 |
|
}, |
|
{ |
|
"acc": 0.89963903, |
|
"epoch": 3.3819444444444446, |
|
"grad_norm": 4.463477465284448, |
|
"learning_rate": 4.363114746877074e-06, |
|
"loss": 0.2622735, |
|
"memory(GiB)": 101.73, |
|
"step": 2435, |
|
"train_speed(iter/s)": 0.341849 |
|
}, |
|
{ |
|
"acc": 0.8982995, |
|
"epoch": 3.388888888888889, |
|
"grad_norm": 4.584390700541224, |
|
"learning_rate": 4.3441401411974445e-06, |
|
"loss": 0.25706239, |
|
"memory(GiB)": 101.73, |
|
"step": 2440, |
|
"train_speed(iter/s)": 0.341872 |
|
}, |
|
{ |
|
"acc": 0.88739204, |
|
"epoch": 3.3958333333333335, |
|
"grad_norm": 7.2700204144048035, |
|
"learning_rate": 4.325175150899594e-06, |
|
"loss": 0.27985995, |
|
"memory(GiB)": 101.73, |
|
"step": 2445, |
|
"train_speed(iter/s)": 0.34188 |
|
}, |
|
{ |
|
"acc": 0.90591974, |
|
"epoch": 3.4027777777777777, |
|
"grad_norm": 4.701025788285595, |
|
"learning_rate": 4.306220053812247e-06, |
|
"loss": 0.25586658, |
|
"memory(GiB)": 101.73, |
|
"step": 2450, |
|
"train_speed(iter/s)": 0.341863 |
|
}, |
|
{ |
|
"acc": 0.89671564, |
|
"epoch": 3.4097222222222223, |
|
"grad_norm": 3.5024092817845762, |
|
"learning_rate": 4.287275127619194e-06, |
|
"loss": 0.26711457, |
|
"memory(GiB)": 101.73, |
|
"step": 2455, |
|
"train_speed(iter/s)": 0.341912 |
|
}, |
|
{ |
|
"acc": 0.89187412, |
|
"epoch": 3.4166666666666665, |
|
"grad_norm": 3.1331350139936176, |
|
"learning_rate": 4.268340649855233e-06, |
|
"loss": 0.26458747, |
|
"memory(GiB)": 101.73, |
|
"step": 2460, |
|
"train_speed(iter/s)": 0.34197 |
|
}, |
|
{ |
|
"acc": 0.89410534, |
|
"epoch": 3.423611111111111, |
|
"grad_norm": 2.6386667139020323, |
|
"learning_rate": 4.24941689790209e-06, |
|
"loss": 0.2519309, |
|
"memory(GiB)": 101.73, |
|
"step": 2465, |
|
"train_speed(iter/s)": 0.342002 |
|
}, |
|
{ |
|
"acc": 0.91391573, |
|
"epoch": 3.4305555555555554, |
|
"grad_norm": 3.3930313458101087, |
|
"learning_rate": 4.230504148984366e-06, |
|
"loss": 0.22323613, |
|
"memory(GiB)": 101.73, |
|
"step": 2470, |
|
"train_speed(iter/s)": 0.342031 |
|
}, |
|
{ |
|
"acc": 0.88966484, |
|
"epoch": 3.4375, |
|
"grad_norm": 3.576144793306567, |
|
"learning_rate": 4.211602680165474e-06, |
|
"loss": 0.26663389, |
|
"memory(GiB)": 101.73, |
|
"step": 2475, |
|
"train_speed(iter/s)": 0.342056 |
|
}, |
|
{ |
|
"acc": 0.90185118, |
|
"epoch": 3.4444444444444446, |
|
"grad_norm": 3.0351618188615954, |
|
"learning_rate": 4.192712768343573e-06, |
|
"loss": 0.25376158, |
|
"memory(GiB)": 101.73, |
|
"step": 2480, |
|
"train_speed(iter/s)": 0.342085 |
|
}, |
|
{ |
|
"acc": 0.89549732, |
|
"epoch": 3.451388888888889, |
|
"grad_norm": 3.3992051611479863, |
|
"learning_rate": 4.1738346902475225e-06, |
|
"loss": 0.26019917, |
|
"memory(GiB)": 101.73, |
|
"step": 2485, |
|
"train_speed(iter/s)": 0.342127 |
|
}, |
|
{ |
|
"acc": 0.90261774, |
|
"epoch": 3.4583333333333335, |
|
"grad_norm": 3.37600766474198, |
|
"learning_rate": 4.154968722432823e-06, |
|
"loss": 0.2450819, |
|
"memory(GiB)": 101.73, |
|
"step": 2490, |
|
"train_speed(iter/s)": 0.342165 |
|
}, |
|
{ |
|
"acc": 0.89998322, |
|
"epoch": 3.4652777777777777, |
|
"grad_norm": 5.760093894583458, |
|
"learning_rate": 4.136115141277559e-06, |
|
"loss": 0.24672766, |
|
"memory(GiB)": 101.73, |
|
"step": 2495, |
|
"train_speed(iter/s)": 0.342183 |
|
}, |
|
{ |
|
"acc": 0.89386778, |
|
"epoch": 3.4722222222222223, |
|
"grad_norm": 5.61319151711386, |
|
"learning_rate": 4.117274222978364e-06, |
|
"loss": 0.25242987, |
|
"memory(GiB)": 101.73, |
|
"step": 2500, |
|
"train_speed(iter/s)": 0.342213 |
|
}, |
|
{ |
|
"acc": 0.90841942, |
|
"epoch": 3.4791666666666665, |
|
"grad_norm": 3.5190202823007595, |
|
"learning_rate": 4.098446243546361e-06, |
|
"loss": 0.22757564, |
|
"memory(GiB)": 101.73, |
|
"step": 2505, |
|
"train_speed(iter/s)": 0.342248 |
|
}, |
|
{ |
|
"acc": 0.88912029, |
|
"epoch": 3.486111111111111, |
|
"grad_norm": 5.241339952219938, |
|
"learning_rate": 4.0796314788031235e-06, |
|
"loss": 0.27620914, |
|
"memory(GiB)": 101.73, |
|
"step": 2510, |
|
"train_speed(iter/s)": 0.342288 |
|
}, |
|
{ |
|
"acc": 0.89256954, |
|
"epoch": 3.4930555555555554, |
|
"grad_norm": 7.171857875636105, |
|
"learning_rate": 4.060830204376641e-06, |
|
"loss": 0.26655228, |
|
"memory(GiB)": 101.73, |
|
"step": 2515, |
|
"train_speed(iter/s)": 0.342324 |
|
}, |
|
{ |
|
"acc": 0.89562893, |
|
"epoch": 3.5, |
|
"grad_norm": 2.751263422359721, |
|
"learning_rate": 4.04204269569727e-06, |
|
"loss": 0.25634003, |
|
"memory(GiB)": 101.73, |
|
"step": 2520, |
|
"train_speed(iter/s)": 0.342361 |
|
}, |
|
{ |
|
"acc": 0.90156956, |
|
"epoch": 3.5069444444444446, |
|
"grad_norm": 3.8262571455543024, |
|
"learning_rate": 4.023269227993709e-06, |
|
"loss": 0.2426084, |
|
"memory(GiB)": 101.73, |
|
"step": 2525, |
|
"train_speed(iter/s)": 0.34241 |
|
}, |
|
{ |
|
"acc": 0.89592133, |
|
"epoch": 3.513888888888889, |
|
"grad_norm": 2.7239237491184, |
|
"learning_rate": 4.004510076288959e-06, |
|
"loss": 0.27324376, |
|
"memory(GiB)": 101.73, |
|
"step": 2530, |
|
"train_speed(iter/s)": 0.34243 |
|
}, |
|
{ |
|
"acc": 0.90064278, |
|
"epoch": 3.5208333333333335, |
|
"grad_norm": 2.440659371146051, |
|
"learning_rate": 3.9857655153963e-06, |
|
"loss": 0.25882859, |
|
"memory(GiB)": 101.73, |
|
"step": 2535, |
|
"train_speed(iter/s)": 0.342409 |
|
}, |
|
{ |
|
"acc": 0.90220985, |
|
"epoch": 3.5277777777777777, |
|
"grad_norm": 6.173071253776259, |
|
"learning_rate": 3.967035819915265e-06, |
|
"loss": 0.24151213, |
|
"memory(GiB)": 101.73, |
|
"step": 2540, |
|
"train_speed(iter/s)": 0.342436 |
|
}, |
|
{ |
|
"acc": 0.89599705, |
|
"epoch": 3.5347222222222223, |
|
"grad_norm": 3.2005386510871596, |
|
"learning_rate": 3.9483212642276105e-06, |
|
"loss": 0.27071168, |
|
"memory(GiB)": 101.73, |
|
"step": 2545, |
|
"train_speed(iter/s)": 0.342476 |
|
}, |
|
{ |
|
"acc": 0.89755955, |
|
"epoch": 3.5416666666666665, |
|
"grad_norm": 9.15052836433047, |
|
"learning_rate": 3.929622122493306e-06, |
|
"loss": 0.26333673, |
|
"memory(GiB)": 101.73, |
|
"step": 2550, |
|
"train_speed(iter/s)": 0.342504 |
|
}, |
|
{ |
|
"acc": 0.89241247, |
|
"epoch": 3.548611111111111, |
|
"grad_norm": 2.8551585858243733, |
|
"learning_rate": 3.910938668646511e-06, |
|
"loss": 0.26134133, |
|
"memory(GiB)": 101.73, |
|
"step": 2555, |
|
"train_speed(iter/s)": 0.342537 |
|
}, |
|
{ |
|
"acc": 0.90300541, |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 4.200634570714865, |
|
"learning_rate": 3.892271176391565e-06, |
|
"loss": 0.24932976, |
|
"memory(GiB)": 101.73, |
|
"step": 2560, |
|
"train_speed(iter/s)": 0.342576 |
|
}, |
|
{ |
|
"acc": 0.90070772, |
|
"epoch": 3.5625, |
|
"grad_norm": 4.098223237386411, |
|
"learning_rate": 3.873619919198978e-06, |
|
"loss": 0.25703783, |
|
"memory(GiB)": 101.73, |
|
"step": 2565, |
|
"train_speed(iter/s)": 0.342659 |
|
}, |
|
{ |
|
"acc": 0.89807205, |
|
"epoch": 3.5694444444444446, |
|
"grad_norm": 9.637838056707626, |
|
"learning_rate": 3.85498517030142e-06, |
|
"loss": 0.251478, |
|
"memory(GiB)": 101.73, |
|
"step": 2570, |
|
"train_speed(iter/s)": 0.342709 |
|
}, |
|
{ |
|
"acc": 0.89927998, |
|
"epoch": 3.576388888888889, |
|
"grad_norm": 5.293025893334484, |
|
"learning_rate": 3.836367202689728e-06, |
|
"loss": 0.24165745, |
|
"memory(GiB)": 101.73, |
|
"step": 2575, |
|
"train_speed(iter/s)": 0.342741 |
|
}, |
|
{ |
|
"acc": 0.90661182, |
|
"epoch": 3.5833333333333335, |
|
"grad_norm": 4.126703368737641, |
|
"learning_rate": 3.817766289108898e-06, |
|
"loss": 0.24157546, |
|
"memory(GiB)": 101.73, |
|
"step": 2580, |
|
"train_speed(iter/s)": 0.342775 |
|
}, |
|
{ |
|
"acc": 0.89383621, |
|
"epoch": 3.5902777777777777, |
|
"grad_norm": 4.074196832478406, |
|
"learning_rate": 3.7991827020540883e-06, |
|
"loss": 0.25025237, |
|
"memory(GiB)": 101.73, |
|
"step": 2585, |
|
"train_speed(iter/s)": 0.342811 |
|
}, |
|
{ |
|
"acc": 0.90283613, |
|
"epoch": 3.5972222222222223, |
|
"grad_norm": 7.616151830363701, |
|
"learning_rate": 3.7806167137666337e-06, |
|
"loss": 0.24257934, |
|
"memory(GiB)": 101.73, |
|
"step": 2590, |
|
"train_speed(iter/s)": 0.342896 |
|
}, |
|
{ |
|
"acc": 0.9043417, |
|
"epoch": 3.6041666666666665, |
|
"grad_norm": 4.013042948818498, |
|
"learning_rate": 3.7620685962300583e-06, |
|
"loss": 0.22736707, |
|
"memory(GiB)": 101.73, |
|
"step": 2595, |
|
"train_speed(iter/s)": 0.342962 |
|
}, |
|
{ |
|
"acc": 0.90302153, |
|
"epoch": 3.611111111111111, |
|
"grad_norm": 7.480361593683446, |
|
"learning_rate": 3.7435386211660825e-06, |
|
"loss": 0.25048995, |
|
"memory(GiB)": 101.73, |
|
"step": 2600, |
|
"train_speed(iter/s)": 0.343004 |
|
}, |
|
{ |
|
"acc": 0.90426874, |
|
"epoch": 3.6180555555555554, |
|
"grad_norm": 3.14198386783033, |
|
"learning_rate": 3.7250270600306497e-06, |
|
"loss": 0.23340836, |
|
"memory(GiB)": 101.73, |
|
"step": 2605, |
|
"train_speed(iter/s)": 0.343045 |
|
}, |
|
{ |
|
"acc": 0.90514717, |
|
"epoch": 3.625, |
|
"grad_norm": 4.930192268094544, |
|
"learning_rate": 3.7065341840099483e-06, |
|
"loss": 0.24507568, |
|
"memory(GiB)": 101.73, |
|
"step": 2610, |
|
"train_speed(iter/s)": 0.343078 |
|
}, |
|
{ |
|
"acc": 0.91004333, |
|
"epoch": 3.6319444444444446, |
|
"grad_norm": 5.405558679120437, |
|
"learning_rate": 3.6880602640164364e-06, |
|
"loss": 0.23129289, |
|
"memory(GiB)": 101.73, |
|
"step": 2615, |
|
"train_speed(iter/s)": 0.34312 |
|
}, |
|
{ |
|
"acc": 0.90500717, |
|
"epoch": 3.638888888888889, |
|
"grad_norm": 6.283844606464733, |
|
"learning_rate": 3.6696055706848732e-06, |
|
"loss": 0.24272099, |
|
"memory(GiB)": 101.73, |
|
"step": 2620, |
|
"train_speed(iter/s)": 0.34316 |
|
}, |
|
{ |
|
"acc": 0.88391743, |
|
"epoch": 3.6458333333333335, |
|
"grad_norm": 5.93726524128595, |
|
"learning_rate": 3.6511703743683613e-06, |
|
"loss": 0.26117985, |
|
"memory(GiB)": 101.73, |
|
"step": 2625, |
|
"train_speed(iter/s)": 0.343217 |
|
}, |
|
{ |
|
"acc": 0.90749083, |
|
"epoch": 3.6527777777777777, |
|
"grad_norm": 4.149177960068219, |
|
"learning_rate": 3.632754945134376e-06, |
|
"loss": 0.24308465, |
|
"memory(GiB)": 101.73, |
|
"step": 2630, |
|
"train_speed(iter/s)": 0.343259 |
|
}, |
|
{ |
|
"acc": 0.90750189, |
|
"epoch": 3.6597222222222223, |
|
"grad_norm": 7.101346023538212, |
|
"learning_rate": 3.6143595527608133e-06, |
|
"loss": 0.21127968, |
|
"memory(GiB)": 101.73, |
|
"step": 2635, |
|
"train_speed(iter/s)": 0.343293 |
|
}, |
|
{ |
|
"acc": 0.89547405, |
|
"epoch": 3.6666666666666665, |
|
"grad_norm": 5.390934613472564, |
|
"learning_rate": 3.5959844667320416e-06, |
|
"loss": 0.27649164, |
|
"memory(GiB)": 101.73, |
|
"step": 2640, |
|
"train_speed(iter/s)": 0.343339 |
|
}, |
|
{ |
|
"acc": 0.89665613, |
|
"epoch": 3.673611111111111, |
|
"grad_norm": 3.174570568300612, |
|
"learning_rate": 3.577629956234946e-06, |
|
"loss": 0.2634002, |
|
"memory(GiB)": 101.73, |
|
"step": 2645, |
|
"train_speed(iter/s)": 0.34339 |
|
}, |
|
{ |
|
"acc": 0.89330215, |
|
"epoch": 3.6805555555555554, |
|
"grad_norm": 4.468851842008194, |
|
"learning_rate": 3.5592962901549925e-06, |
|
"loss": 0.25232086, |
|
"memory(GiB)": 101.73, |
|
"step": 2650, |
|
"train_speed(iter/s)": 0.343396 |
|
}, |
|
{ |
|
"acc": 0.90548496, |
|
"epoch": 3.6875, |
|
"grad_norm": 3.8524850872461407, |
|
"learning_rate": 3.5409837370722865e-06, |
|
"loss": 0.22683721, |
|
"memory(GiB)": 101.73, |
|
"step": 2655, |
|
"train_speed(iter/s)": 0.343441 |
|
}, |
|
{ |
|
"acc": 0.9049942, |
|
"epoch": 3.6944444444444446, |
|
"grad_norm": 4.301665754833759, |
|
"learning_rate": 3.5226925652576337e-06, |
|
"loss": 0.25169647, |
|
"memory(GiB)": 101.73, |
|
"step": 2660, |
|
"train_speed(iter/s)": 0.34344 |
|
}, |
|
{ |
|
"acc": 0.90362396, |
|
"epoch": 3.701388888888889, |
|
"grad_norm": 3.975117604181611, |
|
"learning_rate": 3.504423042668615e-06, |
|
"loss": 0.23812282, |
|
"memory(GiB)": 101.73, |
|
"step": 2665, |
|
"train_speed(iter/s)": 0.343477 |
|
}, |
|
{ |
|
"acc": 0.90102139, |
|
"epoch": 3.7083333333333335, |
|
"grad_norm": 4.731994289393552, |
|
"learning_rate": 3.486175436945659e-06, |
|
"loss": 0.26122446, |
|
"memory(GiB)": 101.73, |
|
"step": 2670, |
|
"train_speed(iter/s)": 0.343463 |
|
}, |
|
{ |
|
"acc": 0.8942503, |
|
"epoch": 3.7152777777777777, |
|
"grad_norm": 3.740054982289041, |
|
"learning_rate": 3.4679500154081264e-06, |
|
"loss": 0.25515299, |
|
"memory(GiB)": 101.73, |
|
"step": 2675, |
|
"train_speed(iter/s)": 0.343461 |
|
}, |
|
{ |
|
"acc": 0.90634022, |
|
"epoch": 3.7222222222222223, |
|
"grad_norm": 3.1762395131780345, |
|
"learning_rate": 3.449747045050383e-06, |
|
"loss": 0.22432833, |
|
"memory(GiB)": 101.73, |
|
"step": 2680, |
|
"train_speed(iter/s)": 0.343507 |
|
}, |
|
{ |
|
"acc": 0.90033245, |
|
"epoch": 3.7291666666666665, |
|
"grad_norm": 1.4522470812039046, |
|
"learning_rate": 3.4315667925378982e-06, |
|
"loss": 0.23518414, |
|
"memory(GiB)": 101.73, |
|
"step": 2685, |
|
"train_speed(iter/s)": 0.343527 |
|
}, |
|
{ |
|
"acc": 0.90143833, |
|
"epoch": 3.736111111111111, |
|
"grad_norm": 3.1122492784066718, |
|
"learning_rate": 3.4134095242033354e-06, |
|
"loss": 0.25242925, |
|
"memory(GiB)": 101.73, |
|
"step": 2690, |
|
"train_speed(iter/s)": 0.343516 |
|
}, |
|
{ |
|
"acc": 0.90106049, |
|
"epoch": 3.7430555555555554, |
|
"grad_norm": 3.6040857168011065, |
|
"learning_rate": 3.395275506042648e-06, |
|
"loss": 0.23500984, |
|
"memory(GiB)": 101.73, |
|
"step": 2695, |
|
"train_speed(iter/s)": 0.343541 |
|
}, |
|
{ |
|
"acc": 0.89810867, |
|
"epoch": 3.75, |
|
"grad_norm": 2.726266348660559, |
|
"learning_rate": 3.377165003711185e-06, |
|
"loss": 0.26144593, |
|
"memory(GiB)": 101.73, |
|
"step": 2700, |
|
"train_speed(iter/s)": 0.343574 |
|
}, |
|
{ |
|
"acc": 0.89270897, |
|
"epoch": 3.7569444444444446, |
|
"grad_norm": 3.5864113282789534, |
|
"learning_rate": 3.359078282519802e-06, |
|
"loss": 0.25928802, |
|
"memory(GiB)": 101.73, |
|
"step": 2705, |
|
"train_speed(iter/s)": 0.343605 |
|
}, |
|
{ |
|
"acc": 0.90779352, |
|
"epoch": 3.763888888888889, |
|
"grad_norm": 3.4821077640595215, |
|
"learning_rate": 3.341015607430968e-06, |
|
"loss": 0.23630223, |
|
"memory(GiB)": 101.73, |
|
"step": 2710, |
|
"train_speed(iter/s)": 0.343634 |
|
}, |
|
{ |
|
"acc": 0.88402081, |
|
"epoch": 3.7708333333333335, |
|
"grad_norm": 5.402082590204051, |
|
"learning_rate": 3.3229772430548872e-06, |
|
"loss": 0.2842896, |
|
"memory(GiB)": 101.73, |
|
"step": 2715, |
|
"train_speed(iter/s)": 0.343644 |
|
}, |
|
{ |
|
"acc": 0.89697504, |
|
"epoch": 3.7777777777777777, |
|
"grad_norm": 3.1203680320024945, |
|
"learning_rate": 3.304963453645624e-06, |
|
"loss": 0.24786057, |
|
"memory(GiB)": 101.73, |
|
"step": 2720, |
|
"train_speed(iter/s)": 0.343653 |
|
}, |
|
{ |
|
"acc": 0.89681606, |
|
"epoch": 3.7847222222222223, |
|
"grad_norm": 4.784880833886988, |
|
"learning_rate": 3.28697450309723e-06, |
|
"loss": 0.24739251, |
|
"memory(GiB)": 101.73, |
|
"step": 2725, |
|
"train_speed(iter/s)": 0.343679 |
|
}, |
|
{ |
|
"acc": 0.90272732, |
|
"epoch": 3.7916666666666665, |
|
"grad_norm": 3.354351512168527, |
|
"learning_rate": 3.2690106549398786e-06, |
|
"loss": 0.24098086, |
|
"memory(GiB)": 101.73, |
|
"step": 2730, |
|
"train_speed(iter/s)": 0.343663 |
|
}, |
|
{ |
|
"acc": 0.89669085, |
|
"epoch": 3.798611111111111, |
|
"grad_norm": 4.562914813279376, |
|
"learning_rate": 3.2510721723360044e-06, |
|
"loss": 0.24971335, |
|
"memory(GiB)": 101.73, |
|
"step": 2735, |
|
"train_speed(iter/s)": 0.343704 |
|
}, |
|
{ |
|
"acc": 0.89800167, |
|
"epoch": 3.8055555555555554, |
|
"grad_norm": 4.232092460708063, |
|
"learning_rate": 3.233159318076448e-06, |
|
"loss": 0.24750371, |
|
"memory(GiB)": 101.73, |
|
"step": 2740, |
|
"train_speed(iter/s)": 0.343708 |
|
}, |
|
{ |
|
"acc": 0.91989727, |
|
"epoch": 3.8125, |
|
"grad_norm": 4.324705162370445, |
|
"learning_rate": 3.2152723545766056e-06, |
|
"loss": 0.19653138, |
|
"memory(GiB)": 101.73, |
|
"step": 2745, |
|
"train_speed(iter/s)": 0.343737 |
|
}, |
|
{ |
|
"acc": 0.90480156, |
|
"epoch": 3.8194444444444446, |
|
"grad_norm": 4.094118128093458, |
|
"learning_rate": 3.197411543872585e-06, |
|
"loss": 0.25692635, |
|
"memory(GiB)": 101.73, |
|
"step": 2750, |
|
"train_speed(iter/s)": 0.34372 |
|
}, |
|
{ |
|
"acc": 0.89521971, |
|
"epoch": 3.826388888888889, |
|
"grad_norm": 7.01995940066227, |
|
"learning_rate": 3.1795771476173653e-06, |
|
"loss": 0.2531599, |
|
"memory(GiB)": 101.73, |
|
"step": 2755, |
|
"train_speed(iter/s)": 0.343745 |
|
}, |
|
{ |
|
"acc": 0.90467262, |
|
"epoch": 3.8333333333333335, |
|
"grad_norm": 3.9849535755868195, |
|
"learning_rate": 3.1617694270769713e-06, |
|
"loss": 0.23331397, |
|
"memory(GiB)": 101.73, |
|
"step": 2760, |
|
"train_speed(iter/s)": 0.343762 |
|
}, |
|
{ |
|
"acc": 0.91151018, |
|
"epoch": 3.8402777777777777, |
|
"grad_norm": 3.6084761076635457, |
|
"learning_rate": 3.1439886431266347e-06, |
|
"loss": 0.21082807, |
|
"memory(GiB)": 101.73, |
|
"step": 2765, |
|
"train_speed(iter/s)": 0.34378 |
|
}, |
|
{ |
|
"acc": 0.90110893, |
|
"epoch": 3.8472222222222223, |
|
"grad_norm": 4.76524514339417, |
|
"learning_rate": 3.1262350562469808e-06, |
|
"loss": 0.25923443, |
|
"memory(GiB)": 101.73, |
|
"step": 2770, |
|
"train_speed(iter/s)": 0.343818 |
|
}, |
|
{ |
|
"acc": 0.89796429, |
|
"epoch": 3.8541666666666665, |
|
"grad_norm": 3.9287144228778024, |
|
"learning_rate": 3.1085089265202095e-06, |
|
"loss": 0.24114075, |
|
"memory(GiB)": 101.73, |
|
"step": 2775, |
|
"train_speed(iter/s)": 0.343812 |
|
}, |
|
{ |
|
"acc": 0.9085268, |
|
"epoch": 3.861111111111111, |
|
"grad_norm": 3.0751701860282314, |
|
"learning_rate": 3.0908105136262826e-06, |
|
"loss": 0.22855995, |
|
"memory(GiB)": 101.73, |
|
"step": 2780, |
|
"train_speed(iter/s)": 0.343845 |
|
}, |
|
{ |
|
"acc": 0.90738831, |
|
"epoch": 3.8680555555555554, |
|
"grad_norm": 3.1413952239104104, |
|
"learning_rate": 3.073140076839129e-06, |
|
"loss": 0.23103039, |
|
"memory(GiB)": 101.73, |
|
"step": 2785, |
|
"train_speed(iter/s)": 0.343876 |
|
}, |
|
{ |
|
"acc": 0.90736532, |
|
"epoch": 3.875, |
|
"grad_norm": 2.989880541422952, |
|
"learning_rate": 3.055497875022835e-06, |
|
"loss": 0.23236108, |
|
"memory(GiB)": 101.73, |
|
"step": 2790, |
|
"train_speed(iter/s)": 0.3439 |
|
}, |
|
{ |
|
"acc": 0.91404114, |
|
"epoch": 3.8819444444444446, |
|
"grad_norm": 10.059317904857696, |
|
"learning_rate": 3.0378841666278565e-06, |
|
"loss": 0.22054443, |
|
"memory(GiB)": 101.73, |
|
"step": 2795, |
|
"train_speed(iter/s)": 0.343934 |
|
}, |
|
{ |
|
"acc": 0.91268139, |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 3.663419727544536, |
|
"learning_rate": 3.020299209687237e-06, |
|
"loss": 0.21329353, |
|
"memory(GiB)": 101.73, |
|
"step": 2800, |
|
"train_speed(iter/s)": 0.343851 |
|
}, |
|
{ |
|
"acc": 0.90505514, |
|
"epoch": 3.8958333333333335, |
|
"grad_norm": 3.7817002877666255, |
|
"learning_rate": 3.0027432618128194e-06, |
|
"loss": 0.24037421, |
|
"memory(GiB)": 101.73, |
|
"step": 2805, |
|
"train_speed(iter/s)": 0.34389 |
|
}, |
|
{ |
|
"acc": 0.90936232, |
|
"epoch": 3.9027777777777777, |
|
"grad_norm": 3.505578498849252, |
|
"learning_rate": 2.9852165801914818e-06, |
|
"loss": 0.21331539, |
|
"memory(GiB)": 101.73, |
|
"step": 2810, |
|
"train_speed(iter/s)": 0.343878 |
|
}, |
|
{ |
|
"acc": 0.90463762, |
|
"epoch": 3.9097222222222223, |
|
"grad_norm": 5.035890509179841, |
|
"learning_rate": 2.9677194215813628e-06, |
|
"loss": 0.2340332, |
|
"memory(GiB)": 101.73, |
|
"step": 2815, |
|
"train_speed(iter/s)": 0.343918 |
|
}, |
|
{ |
|
"acc": 0.91466646, |
|
"epoch": 3.9166666666666665, |
|
"grad_norm": 4.901427224659314, |
|
"learning_rate": 2.950252042308099e-06, |
|
"loss": 0.22736809, |
|
"memory(GiB)": 101.73, |
|
"step": 2820, |
|
"train_speed(iter/s)": 0.343945 |
|
}, |
|
{ |
|
"acc": 0.91305885, |
|
"epoch": 3.923611111111111, |
|
"grad_norm": 4.762675724207692, |
|
"learning_rate": 2.9328146982610757e-06, |
|
"loss": 0.21405044, |
|
"memory(GiB)": 101.73, |
|
"step": 2825, |
|
"train_speed(iter/s)": 0.343936 |
|
}, |
|
{ |
|
"acc": 0.91384068, |
|
"epoch": 3.9305555555555554, |
|
"grad_norm": 4.4556363032336055, |
|
"learning_rate": 2.915407644889676e-06, |
|
"loss": 0.19862232, |
|
"memory(GiB)": 101.73, |
|
"step": 2830, |
|
"train_speed(iter/s)": 0.343963 |
|
}, |
|
{ |
|
"acc": 0.91425467, |
|
"epoch": 3.9375, |
|
"grad_norm": 4.48354142391771, |
|
"learning_rate": 2.898031137199535e-06, |
|
"loss": 0.22390165, |
|
"memory(GiB)": 101.73, |
|
"step": 2835, |
|
"train_speed(iter/s)": 0.343944 |
|
}, |
|
{ |
|
"acc": 0.90526085, |
|
"epoch": 3.9444444444444446, |
|
"grad_norm": 4.984884813064181, |
|
"learning_rate": 2.880685429748809e-06, |
|
"loss": 0.23374908, |
|
"memory(GiB)": 101.73, |
|
"step": 2840, |
|
"train_speed(iter/s)": 0.343948 |
|
}, |
|
{ |
|
"acc": 0.917663, |
|
"epoch": 3.951388888888889, |
|
"grad_norm": 3.1978275940506933, |
|
"learning_rate": 2.863370776644444e-06, |
|
"loss": 0.21864316, |
|
"memory(GiB)": 101.73, |
|
"step": 2845, |
|
"train_speed(iter/s)": 0.34398 |
|
}, |
|
{ |
|
"acc": 0.90146713, |
|
"epoch": 3.9583333333333335, |
|
"grad_norm": 5.338078987116394, |
|
"learning_rate": 2.846087431538455e-06, |
|
"loss": 0.23189349, |
|
"memory(GiB)": 101.73, |
|
"step": 2850, |
|
"train_speed(iter/s)": 0.343856 |
|
}, |
|
{ |
|
"acc": 0.90521212, |
|
"epoch": 3.9652777777777777, |
|
"grad_norm": 2.6961159090915525, |
|
"learning_rate": 2.8288356476242067e-06, |
|
"loss": 0.2326869, |
|
"memory(GiB)": 101.73, |
|
"step": 2855, |
|
"train_speed(iter/s)": 0.343864 |
|
}, |
|
{ |
|
"acc": 0.90831709, |
|
"epoch": 3.9722222222222223, |
|
"grad_norm": 5.369767010977922, |
|
"learning_rate": 2.811615677632709e-06, |
|
"loss": 0.22528372, |
|
"memory(GiB)": 101.73, |
|
"step": 2860, |
|
"train_speed(iter/s)": 0.343895 |
|
}, |
|
{ |
|
"acc": 0.91458817, |
|
"epoch": 3.9791666666666665, |
|
"grad_norm": 6.308934660163883, |
|
"learning_rate": 2.794427773828907e-06, |
|
"loss": 0.2268847, |
|
"memory(GiB)": 101.73, |
|
"step": 2865, |
|
"train_speed(iter/s)": 0.343899 |
|
}, |
|
{ |
|
"acc": 0.91015339, |
|
"epoch": 3.986111111111111, |
|
"grad_norm": 3.6572394482166724, |
|
"learning_rate": 2.777272188007996e-06, |
|
"loss": 0.21870656, |
|
"memory(GiB)": 101.73, |
|
"step": 2870, |
|
"train_speed(iter/s)": 0.343924 |
|
}, |
|
{ |
|
"acc": 0.91265316, |
|
"epoch": 3.9930555555555554, |
|
"grad_norm": 6.6960528916888675, |
|
"learning_rate": 2.7601491714917223e-06, |
|
"loss": 0.22325075, |
|
"memory(GiB)": 101.73, |
|
"step": 2875, |
|
"train_speed(iter/s)": 0.34395 |
|
}, |
|
{ |
|
"acc": 0.90769653, |
|
"epoch": 4.0, |
|
"grad_norm": 3.8850794368257997, |
|
"learning_rate": 2.74305897512471e-06, |
|
"loss": 0.22835503, |
|
"memory(GiB)": 101.73, |
|
"step": 2880, |
|
"train_speed(iter/s)": 0.343939 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_acc": 0.8967149619853679, |
|
"eval_loss": 0.26612961292266846, |
|
"eval_runtime": 24.7985, |
|
"eval_samples_per_second": 13.831, |
|
"eval_steps_per_second": 1.734, |
|
"step": 2880 |
|
}, |
|
{ |
|
"acc": 0.92103558, |
|
"epoch": 4.006944444444445, |
|
"grad_norm": 5.906430563555609, |
|
"learning_rate": 2.726001849270778e-06, |
|
"loss": 0.19677427, |
|
"memory(GiB)": 101.73, |
|
"step": 2885, |
|
"train_speed(iter/s)": 0.341573 |
|
}, |
|
{ |
|
"acc": 0.91259556, |
|
"epoch": 4.013888888888889, |
|
"grad_norm": 3.7879137611063816, |
|
"learning_rate": 2.7089780438092844e-06, |
|
"loss": 0.21481938, |
|
"memory(GiB)": 101.73, |
|
"step": 2890, |
|
"train_speed(iter/s)": 0.341624 |
|
}, |
|
{ |
|
"acc": 0.9053381, |
|
"epoch": 4.020833333333333, |
|
"grad_norm": 6.231784630607876, |
|
"learning_rate": 2.6919878081314494e-06, |
|
"loss": 0.21756485, |
|
"memory(GiB)": 101.73, |
|
"step": 2895, |
|
"train_speed(iter/s)": 0.341666 |
|
}, |
|
{ |
|
"acc": 0.91989698, |
|
"epoch": 4.027777777777778, |
|
"grad_norm": 6.530126465541856, |
|
"learning_rate": 2.675031391136717e-06, |
|
"loss": 0.20109849, |
|
"memory(GiB)": 101.73, |
|
"step": 2900, |
|
"train_speed(iter/s)": 0.34169 |
|
}, |
|
{ |
|
"acc": 0.92158413, |
|
"epoch": 4.034722222222222, |
|
"grad_norm": 8.19113289280511, |
|
"learning_rate": 2.658109041229097e-06, |
|
"loss": 0.18494437, |
|
"memory(GiB)": 101.73, |
|
"step": 2905, |
|
"train_speed(iter/s)": 0.341747 |
|
}, |
|
{ |
|
"acc": 0.91528072, |
|
"epoch": 4.041666666666667, |
|
"grad_norm": 5.196788929456166, |
|
"learning_rate": 2.641221006313542e-06, |
|
"loss": 0.2032238, |
|
"memory(GiB)": 101.73, |
|
"step": 2910, |
|
"train_speed(iter/s)": 0.341775 |
|
}, |
|
{ |
|
"acc": 0.92303152, |
|
"epoch": 4.048611111111111, |
|
"grad_norm": 7.864716805294142, |
|
"learning_rate": 2.624367533792291e-06, |
|
"loss": 0.19096355, |
|
"memory(GiB)": 101.73, |
|
"step": 2915, |
|
"train_speed(iter/s)": 0.341785 |
|
}, |
|
{ |
|
"acc": 0.91825953, |
|
"epoch": 4.055555555555555, |
|
"grad_norm": 5.335075751721026, |
|
"learning_rate": 2.607548870561271e-06, |
|
"loss": 0.21585865, |
|
"memory(GiB)": 101.73, |
|
"step": 2920, |
|
"train_speed(iter/s)": 0.341815 |
|
}, |
|
{ |
|
"acc": 0.90057945, |
|
"epoch": 4.0625, |
|
"grad_norm": 4.833951672917176, |
|
"learning_rate": 2.5907652630064588e-06, |
|
"loss": 0.22249794, |
|
"memory(GiB)": 101.73, |
|
"step": 2925, |
|
"train_speed(iter/s)": 0.341826 |
|
}, |
|
{ |
|
"acc": 0.91943073, |
|
"epoch": 4.069444444444445, |
|
"grad_norm": 8.019697338393213, |
|
"learning_rate": 2.5740169570002882e-06, |
|
"loss": 0.20015411, |
|
"memory(GiB)": 101.73, |
|
"step": 2930, |
|
"train_speed(iter/s)": 0.341804 |
|
}, |
|
{ |
|
"acc": 0.91776352, |
|
"epoch": 4.076388888888889, |
|
"grad_norm": 8.514964900470892, |
|
"learning_rate": 2.55730419789804e-06, |
|
"loss": 0.19529152, |
|
"memory(GiB)": 101.73, |
|
"step": 2935, |
|
"train_speed(iter/s)": 0.341815 |
|
}, |
|
{ |
|
"acc": 0.91629219, |
|
"epoch": 4.083333333333333, |
|
"grad_norm": 6.08663782319064, |
|
"learning_rate": 2.5406272305342438e-06, |
|
"loss": 0.20550578, |
|
"memory(GiB)": 101.73, |
|
"step": 2940, |
|
"train_speed(iter/s)": 0.341842 |
|
}, |
|
{ |
|
"acc": 0.92428894, |
|
"epoch": 4.090277777777778, |
|
"grad_norm": 2.5039357125914274, |
|
"learning_rate": 2.5239862992191023e-06, |
|
"loss": 0.19628391, |
|
"memory(GiB)": 101.73, |
|
"step": 2945, |
|
"train_speed(iter/s)": 0.341877 |
|
}, |
|
{ |
|
"acc": 0.91116905, |
|
"epoch": 4.097222222222222, |
|
"grad_norm": 7.318180727257995, |
|
"learning_rate": 2.507381647734899e-06, |
|
"loss": 0.21234784, |
|
"memory(GiB)": 101.73, |
|
"step": 2950, |
|
"train_speed(iter/s)": 0.341914 |
|
}, |
|
{ |
|
"acc": 0.91188145, |
|
"epoch": 4.104166666666667, |
|
"grad_norm": 6.6612949734360765, |
|
"learning_rate": 2.490813519332442e-06, |
|
"loss": 0.20844579, |
|
"memory(GiB)": 101.73, |
|
"step": 2955, |
|
"train_speed(iter/s)": 0.341935 |
|
}, |
|
{ |
|
"acc": 0.9142437, |
|
"epoch": 4.111111111111111, |
|
"grad_norm": 6.349873204038989, |
|
"learning_rate": 2.4742821567274825e-06, |
|
"loss": 0.20826674, |
|
"memory(GiB)": 101.73, |
|
"step": 2960, |
|
"train_speed(iter/s)": 0.34197 |
|
}, |
|
{ |
|
"acc": 0.91423683, |
|
"epoch": 4.118055555555555, |
|
"grad_norm": 8.317774783727135, |
|
"learning_rate": 2.4577878020971822e-06, |
|
"loss": 0.22406006, |
|
"memory(GiB)": 101.73, |
|
"step": 2965, |
|
"train_speed(iter/s)": 0.342009 |
|
}, |
|
{ |
|
"acc": 0.91010065, |
|
"epoch": 4.125, |
|
"grad_norm": 10.126802668666569, |
|
"learning_rate": 2.4413306970765393e-06, |
|
"loss": 0.22186904, |
|
"memory(GiB)": 101.73, |
|
"step": 2970, |
|
"train_speed(iter/s)": 0.342033 |
|
}, |
|
{ |
|
"acc": 0.92021332, |
|
"epoch": 4.131944444444445, |
|
"grad_norm": 6.474786305104566, |
|
"learning_rate": 2.424911082754871e-06, |
|
"loss": 0.20578272, |
|
"memory(GiB)": 101.73, |
|
"step": 2975, |
|
"train_speed(iter/s)": 0.342068 |
|
}, |
|
{ |
|
"acc": 0.91911068, |
|
"epoch": 4.138888888888889, |
|
"grad_norm": 6.152212251038718, |
|
"learning_rate": 2.4085291996722655e-06, |
|
"loss": 0.21330719, |
|
"memory(GiB)": 101.73, |
|
"step": 2980, |
|
"train_speed(iter/s)": 0.342098 |
|
}, |
|
{ |
|
"acc": 0.90562267, |
|
"epoch": 4.145833333333333, |
|
"grad_norm": 6.434270724550823, |
|
"learning_rate": 2.3921852878160708e-06, |
|
"loss": 0.22529688, |
|
"memory(GiB)": 101.73, |
|
"step": 2985, |
|
"train_speed(iter/s)": 0.342124 |
|
}, |
|
{ |
|
"acc": 0.92362442, |
|
"epoch": 4.152777777777778, |
|
"grad_norm": 4.610860381392509, |
|
"learning_rate": 2.375879586617367e-06, |
|
"loss": 0.18123511, |
|
"memory(GiB)": 101.73, |
|
"step": 2990, |
|
"train_speed(iter/s)": 0.342154 |
|
}, |
|
{ |
|
"acc": 0.91461487, |
|
"epoch": 4.159722222222222, |
|
"grad_norm": 4.105012494189029, |
|
"learning_rate": 2.3596123349474727e-06, |
|
"loss": 0.1987551, |
|
"memory(GiB)": 101.73, |
|
"step": 2995, |
|
"train_speed(iter/s)": 0.342196 |
|
}, |
|
{ |
|
"acc": 0.92359133, |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 4.699484309075114, |
|
"learning_rate": 2.34338377111443e-06, |
|
"loss": 0.19864633, |
|
"memory(GiB)": 101.73, |
|
"step": 3000, |
|
"train_speed(iter/s)": 0.342246 |
|
}, |
|
{ |
|
"acc": 0.9230299, |
|
"epoch": 4.173611111111111, |
|
"grad_norm": 7.962603744103441, |
|
"learning_rate": 2.327194132859526e-06, |
|
"loss": 0.20071533, |
|
"memory(GiB)": 101.73, |
|
"step": 3005, |
|
"train_speed(iter/s)": 0.342296 |
|
}, |
|
{ |
|
"acc": 0.912883, |
|
"epoch": 4.180555555555555, |
|
"grad_norm": 6.7436017503812815, |
|
"learning_rate": 2.3110436573538076e-06, |
|
"loss": 0.21834412, |
|
"memory(GiB)": 101.73, |
|
"step": 3010, |
|
"train_speed(iter/s)": 0.342324 |
|
}, |
|
{ |
|
"acc": 0.92828503, |
|
"epoch": 4.1875, |
|
"grad_norm": 4.260639944998096, |
|
"learning_rate": 2.2949325811945946e-06, |
|
"loss": 0.2033814, |
|
"memory(GiB)": 101.73, |
|
"step": 3015, |
|
"train_speed(iter/s)": 0.342358 |
|
}, |
|
{ |
|
"acc": 0.92300644, |
|
"epoch": 4.194444444444445, |
|
"grad_norm": 2.91708878590044, |
|
"learning_rate": 2.2788611404020382e-06, |
|
"loss": 0.195956, |
|
"memory(GiB)": 101.73, |
|
"step": 3020, |
|
"train_speed(iter/s)": 0.342391 |
|
}, |
|
{ |
|
"acc": 0.91175823, |
|
"epoch": 4.201388888888889, |
|
"grad_norm": 3.2469308089973485, |
|
"learning_rate": 2.262829570415637e-06, |
|
"loss": 0.2216738, |
|
"memory(GiB)": 101.73, |
|
"step": 3025, |
|
"train_speed(iter/s)": 0.342444 |
|
}, |
|
{ |
|
"acc": 0.92101059, |
|
"epoch": 4.208333333333333, |
|
"grad_norm": 5.199265904450231, |
|
"learning_rate": 2.246838106090807e-06, |
|
"loss": 0.20876896, |
|
"memory(GiB)": 101.73, |
|
"step": 3030, |
|
"train_speed(iter/s)": 0.342479 |
|
}, |
|
{ |
|
"acc": 0.92571077, |
|
"epoch": 4.215277777777778, |
|
"grad_norm": 5.1589147484364775, |
|
"learning_rate": 2.230886981695427e-06, |
|
"loss": 0.19227347, |
|
"memory(GiB)": 101.73, |
|
"step": 3035, |
|
"train_speed(iter/s)": 0.342511 |
|
}, |
|
{ |
|
"acc": 0.91746311, |
|
"epoch": 4.222222222222222, |
|
"grad_norm": 4.523533624609451, |
|
"learning_rate": 2.2149764309064233e-06, |
|
"loss": 0.20252769, |
|
"memory(GiB)": 101.73, |
|
"step": 3040, |
|
"train_speed(iter/s)": 0.342529 |
|
}, |
|
{ |
|
"acc": 0.92907982, |
|
"epoch": 4.229166666666667, |
|
"grad_norm": 4.484898357270931, |
|
"learning_rate": 2.1991066868063264e-06, |
|
"loss": 0.17676392, |
|
"memory(GiB)": 101.73, |
|
"step": 3045, |
|
"train_speed(iter/s)": 0.34255 |
|
}, |
|
{ |
|
"acc": 0.92075233, |
|
"epoch": 4.236111111111111, |
|
"grad_norm": 6.798845521797008, |
|
"learning_rate": 2.183277981879876e-06, |
|
"loss": 0.19941406, |
|
"memory(GiB)": 101.73, |
|
"step": 3050, |
|
"train_speed(iter/s)": 0.342564 |
|
}, |
|
{ |
|
"acc": 0.91766424, |
|
"epoch": 4.243055555555555, |
|
"grad_norm": 4.950483312812647, |
|
"learning_rate": 2.1674905480105994e-06, |
|
"loss": 0.21676636, |
|
"memory(GiB)": 101.73, |
|
"step": 3055, |
|
"train_speed(iter/s)": 0.342603 |
|
}, |
|
{ |
|
"acc": 0.91581535, |
|
"epoch": 4.25, |
|
"grad_norm": 5.727602416491945, |
|
"learning_rate": 2.151744616477426e-06, |
|
"loss": 0.21338763, |
|
"memory(GiB)": 101.73, |
|
"step": 3060, |
|
"train_speed(iter/s)": 0.342631 |
|
}, |
|
{ |
|
"acc": 0.9153821, |
|
"epoch": 4.256944444444445, |
|
"grad_norm": 8.155391373213389, |
|
"learning_rate": 2.1360404179512874e-06, |
|
"loss": 0.19959717, |
|
"memory(GiB)": 101.73, |
|
"step": 3065, |
|
"train_speed(iter/s)": 0.342676 |
|
}, |
|
{ |
|
"acc": 0.91343565, |
|
"epoch": 4.263888888888889, |
|
"grad_norm": 7.534324022230486, |
|
"learning_rate": 2.120378182491756e-06, |
|
"loss": 0.21784058, |
|
"memory(GiB)": 101.73, |
|
"step": 3070, |
|
"train_speed(iter/s)": 0.342716 |
|
}, |
|
{ |
|
"acc": 0.91763706, |
|
"epoch": 4.270833333333333, |
|
"grad_norm": 5.838501760901444, |
|
"learning_rate": 2.1047581395436537e-06, |
|
"loss": 0.20123901, |
|
"memory(GiB)": 101.73, |
|
"step": 3075, |
|
"train_speed(iter/s)": 0.34275 |
|
}, |
|
{ |
|
"acc": 0.91569424, |
|
"epoch": 4.277777777777778, |
|
"grad_norm": 5.912931399886815, |
|
"learning_rate": 2.089180517933705e-06, |
|
"loss": 0.2089062, |
|
"memory(GiB)": 101.73, |
|
"step": 3080, |
|
"train_speed(iter/s)": 0.342768 |
|
}, |
|
{ |
|
"acc": 0.91127586, |
|
"epoch": 4.284722222222222, |
|
"grad_norm": 6.321848607015057, |
|
"learning_rate": 2.0736455458671825e-06, |
|
"loss": 0.21258488, |
|
"memory(GiB)": 101.73, |
|
"step": 3085, |
|
"train_speed(iter/s)": 0.342756 |
|
}, |
|
{ |
|
"acc": 0.90627985, |
|
"epoch": 4.291666666666667, |
|
"grad_norm": 4.4561601387109135, |
|
"learning_rate": 2.0581534509245563e-06, |
|
"loss": 0.2243315, |
|
"memory(GiB)": 101.73, |
|
"step": 3090, |
|
"train_speed(iter/s)": 0.34278 |
|
}, |
|
{ |
|
"acc": 0.91781731, |
|
"epoch": 4.298611111111111, |
|
"grad_norm": 4.589217713951676, |
|
"learning_rate": 2.0427044600581726e-06, |
|
"loss": 0.19114343, |
|
"memory(GiB)": 101.73, |
|
"step": 3095, |
|
"train_speed(iter/s)": 0.342801 |
|
}, |
|
{ |
|
"acc": 0.92404423, |
|
"epoch": 4.305555555555555, |
|
"grad_norm": 5.960490626167477, |
|
"learning_rate": 2.0272987995889132e-06, |
|
"loss": 0.20025024, |
|
"memory(GiB)": 101.73, |
|
"step": 3100, |
|
"train_speed(iter/s)": 0.342852 |
|
}, |
|
{ |
|
"acc": 0.91189528, |
|
"epoch": 4.3125, |
|
"grad_norm": 3.4901104254439446, |
|
"learning_rate": 2.011936695202899e-06, |
|
"loss": 0.20672469, |
|
"memory(GiB)": 101.73, |
|
"step": 3105, |
|
"train_speed(iter/s)": 0.342859 |
|
}, |
|
{ |
|
"acc": 0.92345657, |
|
"epoch": 4.319444444444445, |
|
"grad_norm": 5.200062911542391, |
|
"learning_rate": 1.9966183719481635e-06, |
|
"loss": 0.19884933, |
|
"memory(GiB)": 101.73, |
|
"step": 3110, |
|
"train_speed(iter/s)": 0.342894 |
|
}, |
|
{ |
|
"acc": 0.93076115, |
|
"epoch": 4.326388888888889, |
|
"grad_norm": 4.310948513552987, |
|
"learning_rate": 1.9813440542313746e-06, |
|
"loss": 0.18369787, |
|
"memory(GiB)": 101.73, |
|
"step": 3115, |
|
"train_speed(iter/s)": 0.342939 |
|
}, |
|
{ |
|
"acc": 0.92673454, |
|
"epoch": 4.333333333333333, |
|
"grad_norm": 5.518304658975087, |
|
"learning_rate": 1.9661139658145288e-06, |
|
"loss": 0.19188232, |
|
"memory(GiB)": 101.73, |
|
"step": 3120, |
|
"train_speed(iter/s)": 0.342888 |
|
}, |
|
{ |
|
"acc": 0.93909798, |
|
"epoch": 4.340277777777778, |
|
"grad_norm": 4.7517608824503474, |
|
"learning_rate": 1.9509283298116973e-06, |
|
"loss": 0.16544604, |
|
"memory(GiB)": 101.73, |
|
"step": 3125, |
|
"train_speed(iter/s)": 0.342917 |
|
}, |
|
{ |
|
"acc": 0.91507568, |
|
"epoch": 4.347222222222222, |
|
"grad_norm": 9.923491852945716, |
|
"learning_rate": 1.935787368685728e-06, |
|
"loss": 0.1952302, |
|
"memory(GiB)": 101.73, |
|
"step": 3130, |
|
"train_speed(iter/s)": 0.342943 |
|
}, |
|
{ |
|
"acc": 0.91850033, |
|
"epoch": 4.354166666666667, |
|
"grad_norm": 6.0061062470300115, |
|
"learning_rate": 1.9206913042450114e-06, |
|
"loss": 0.21029849, |
|
"memory(GiB)": 101.73, |
|
"step": 3135, |
|
"train_speed(iter/s)": 0.342954 |
|
}, |
|
{ |
|
"acc": 0.91360073, |
|
"epoch": 4.361111111111111, |
|
"grad_norm": 10.312396157334698, |
|
"learning_rate": 1.9056403576402159e-06, |
|
"loss": 0.22273135, |
|
"memory(GiB)": 101.73, |
|
"step": 3140, |
|
"train_speed(iter/s)": 0.342984 |
|
}, |
|
{ |
|
"acc": 0.92476015, |
|
"epoch": 4.368055555555555, |
|
"grad_norm": 4.78373838533618, |
|
"learning_rate": 1.890634749361058e-06, |
|
"loss": 0.18518678, |
|
"memory(GiB)": 101.73, |
|
"step": 3145, |
|
"train_speed(iter/s)": 0.343 |
|
}, |
|
{ |
|
"acc": 0.91752625, |
|
"epoch": 4.375, |
|
"grad_norm": 7.091436711506266, |
|
"learning_rate": 1.8756746992330627e-06, |
|
"loss": 0.20546875, |
|
"memory(GiB)": 101.73, |
|
"step": 3150, |
|
"train_speed(iter/s)": 0.343011 |
|
}, |
|
{ |
|
"acc": 0.90989933, |
|
"epoch": 4.381944444444445, |
|
"grad_norm": 4.850150236316559, |
|
"learning_rate": 1.8607604264143531e-06, |
|
"loss": 0.21127014, |
|
"memory(GiB)": 101.73, |
|
"step": 3155, |
|
"train_speed(iter/s)": 0.343013 |
|
}, |
|
{ |
|
"acc": 0.93822994, |
|
"epoch": 4.388888888888889, |
|
"grad_norm": 5.46370191663923, |
|
"learning_rate": 1.8458921493924342e-06, |
|
"loss": 0.14929545, |
|
"memory(GiB)": 101.73, |
|
"step": 3160, |
|
"train_speed(iter/s)": 0.34303 |
|
}, |
|
{ |
|
"acc": 0.91957836, |
|
"epoch": 4.395833333333333, |
|
"grad_norm": 5.524837271666093, |
|
"learning_rate": 1.831070085980988e-06, |
|
"loss": 0.19637758, |
|
"memory(GiB)": 101.73, |
|
"step": 3165, |
|
"train_speed(iter/s)": 0.343038 |
|
}, |
|
{ |
|
"acc": 0.92123384, |
|
"epoch": 4.402777777777778, |
|
"grad_norm": 7.926793678931113, |
|
"learning_rate": 1.8162944533166954e-06, |
|
"loss": 0.19701744, |
|
"memory(GiB)": 101.73, |
|
"step": 3170, |
|
"train_speed(iter/s)": 0.342961 |
|
}, |
|
{ |
|
"acc": 0.92286034, |
|
"epoch": 4.409722222222222, |
|
"grad_norm": 7.847698734973718, |
|
"learning_rate": 1.8015654678560371e-06, |
|
"loss": 0.19722157, |
|
"memory(GiB)": 101.73, |
|
"step": 3175, |
|
"train_speed(iter/s)": 0.342964 |
|
}, |
|
{ |
|
"acc": 0.91846771, |
|
"epoch": 4.416666666666667, |
|
"grad_norm": 11.602785837281019, |
|
"learning_rate": 1.7868833453721465e-06, |
|
"loss": 0.20651245, |
|
"memory(GiB)": 101.73, |
|
"step": 3180, |
|
"train_speed(iter/s)": 0.342968 |
|
}, |
|
{ |
|
"acc": 0.92178154, |
|
"epoch": 4.423611111111111, |
|
"grad_norm": 8.927190820316273, |
|
"learning_rate": 1.7722483009516225e-06, |
|
"loss": 0.19756188, |
|
"memory(GiB)": 101.73, |
|
"step": 3185, |
|
"train_speed(iter/s)": 0.342953 |
|
}, |
|
{ |
|
"acc": 0.92359142, |
|
"epoch": 4.430555555555555, |
|
"grad_norm": 7.0620744798059025, |
|
"learning_rate": 1.7576605489913989e-06, |
|
"loss": 0.19887724, |
|
"memory(GiB)": 101.73, |
|
"step": 3190, |
|
"train_speed(iter/s)": 0.342998 |
|
}, |
|
{ |
|
"acc": 0.9229085, |
|
"epoch": 4.4375, |
|
"grad_norm": 5.903111514033937, |
|
"learning_rate": 1.7431203031955906e-06, |
|
"loss": 0.19516015, |
|
"memory(GiB)": 101.73, |
|
"step": 3195, |
|
"train_speed(iter/s)": 0.34304 |
|
}, |
|
{ |
|
"acc": 0.92671881, |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 3.573204950885832, |
|
"learning_rate": 1.7286277765723736e-06, |
|
"loss": 0.18694823, |
|
"memory(GiB)": 101.73, |
|
"step": 3200, |
|
"train_speed(iter/s)": 0.343078 |
|
}, |
|
{ |
|
"acc": 0.92473421, |
|
"epoch": 4.451388888888889, |
|
"grad_norm": 4.159136138411481, |
|
"learning_rate": 1.7141831814308532e-06, |
|
"loss": 0.19077295, |
|
"memory(GiB)": 101.73, |
|
"step": 3205, |
|
"train_speed(iter/s)": 0.34308 |
|
}, |
|
{ |
|
"acc": 0.91383629, |
|
"epoch": 4.458333333333333, |
|
"grad_norm": 7.8697493954658455, |
|
"learning_rate": 1.6997867293779663e-06, |
|
"loss": 0.23260565, |
|
"memory(GiB)": 101.73, |
|
"step": 3210, |
|
"train_speed(iter/s)": 0.343117 |
|
}, |
|
{ |
|
"acc": 0.92381392, |
|
"epoch": 4.465277777777778, |
|
"grad_norm": 6.152018205562397, |
|
"learning_rate": 1.6854386313153674e-06, |
|
"loss": 0.19314412, |
|
"memory(GiB)": 101.73, |
|
"step": 3215, |
|
"train_speed(iter/s)": 0.343116 |
|
}, |
|
{ |
|
"acc": 0.92322865, |
|
"epoch": 4.472222222222222, |
|
"grad_norm": 5.385841801425564, |
|
"learning_rate": 1.6711390974363546e-06, |
|
"loss": 0.19609743, |
|
"memory(GiB)": 101.73, |
|
"step": 3220, |
|
"train_speed(iter/s)": 0.343166 |
|
}, |
|
{ |
|
"acc": 0.92354794, |
|
"epoch": 4.479166666666667, |
|
"grad_norm": 4.4092587657079, |
|
"learning_rate": 1.656888337222774e-06, |
|
"loss": 0.17454972, |
|
"memory(GiB)": 101.73, |
|
"step": 3225, |
|
"train_speed(iter/s)": 0.343198 |
|
}, |
|
{ |
|
"acc": 0.91775503, |
|
"epoch": 4.486111111111111, |
|
"grad_norm": 4.013579134507486, |
|
"learning_rate": 1.6426865594419644e-06, |
|
"loss": 0.19643557, |
|
"memory(GiB)": 101.73, |
|
"step": 3230, |
|
"train_speed(iter/s)": 0.343239 |
|
}, |
|
{ |
|
"acc": 0.93061867, |
|
"epoch": 4.493055555555555, |
|
"grad_norm": 5.6294114253186756, |
|
"learning_rate": 1.6285339721436927e-06, |
|
"loss": 0.17754095, |
|
"memory(GiB)": 101.73, |
|
"step": 3235, |
|
"train_speed(iter/s)": 0.343269 |
|
}, |
|
{ |
|
"acc": 0.92582932, |
|
"epoch": 4.5, |
|
"grad_norm": 5.737199620676354, |
|
"learning_rate": 1.6144307826571068e-06, |
|
"loss": 0.18249105, |
|
"memory(GiB)": 101.73, |
|
"step": 3240, |
|
"train_speed(iter/s)": 0.343266 |
|
}, |
|
{ |
|
"acc": 0.92780704, |
|
"epoch": 4.506944444444445, |
|
"grad_norm": 5.65833700507881, |
|
"learning_rate": 1.6003771975876987e-06, |
|
"loss": 0.17639775, |
|
"memory(GiB)": 101.73, |
|
"step": 3245, |
|
"train_speed(iter/s)": 0.343315 |
|
}, |
|
{ |
|
"acc": 0.92351227, |
|
"epoch": 4.513888888888889, |
|
"grad_norm": 6.25866794334907, |
|
"learning_rate": 1.5863734228142751e-06, |
|
"loss": 0.19747633, |
|
"memory(GiB)": 101.73, |
|
"step": 3250, |
|
"train_speed(iter/s)": 0.343343 |
|
}, |
|
{ |
|
"acc": 0.92094841, |
|
"epoch": 4.520833333333333, |
|
"grad_norm": 6.011894829639706, |
|
"learning_rate": 1.572419663485948e-06, |
|
"loss": 0.20139146, |
|
"memory(GiB)": 101.73, |
|
"step": 3255, |
|
"train_speed(iter/s)": 0.343337 |
|
}, |
|
{ |
|
"acc": 0.92008724, |
|
"epoch": 4.527777777777778, |
|
"grad_norm": 5.54010697697139, |
|
"learning_rate": 1.5585161240191207e-06, |
|
"loss": 0.1791935, |
|
"memory(GiB)": 101.73, |
|
"step": 3260, |
|
"train_speed(iter/s)": 0.343371 |
|
}, |
|
{ |
|
"acc": 0.9245883, |
|
"epoch": 4.534722222222222, |
|
"grad_norm": 6.420422985273098, |
|
"learning_rate": 1.5446630080945027e-06, |
|
"loss": 0.18729727, |
|
"memory(GiB)": 101.73, |
|
"step": 3265, |
|
"train_speed(iter/s)": 0.343372 |
|
}, |
|
{ |
|
"acc": 0.92342873, |
|
"epoch": 4.541666666666667, |
|
"grad_norm": 4.290515420275203, |
|
"learning_rate": 1.5308605186541159e-06, |
|
"loss": 0.18176882, |
|
"memory(GiB)": 101.73, |
|
"step": 3270, |
|
"train_speed(iter/s)": 0.343363 |
|
}, |
|
{ |
|
"acc": 0.9153471, |
|
"epoch": 4.548611111111111, |
|
"grad_norm": 6.6889348025132795, |
|
"learning_rate": 1.5171088578983317e-06, |
|
"loss": 0.19240036, |
|
"memory(GiB)": 101.73, |
|
"step": 3275, |
|
"train_speed(iter/s)": 0.343414 |
|
}, |
|
{ |
|
"acc": 0.93120098, |
|
"epoch": 4.555555555555555, |
|
"grad_norm": 6.708275992166459, |
|
"learning_rate": 1.503408227282898e-06, |
|
"loss": 0.18204656, |
|
"memory(GiB)": 101.73, |
|
"step": 3280, |
|
"train_speed(iter/s)": 0.343446 |
|
}, |
|
{ |
|
"acc": 0.91508112, |
|
"epoch": 4.5625, |
|
"grad_norm": 7.03757740085225, |
|
"learning_rate": 1.489758827515998e-06, |
|
"loss": 0.19290162, |
|
"memory(GiB)": 101.73, |
|
"step": 3285, |
|
"train_speed(iter/s)": 0.343441 |
|
}, |
|
{ |
|
"acc": 0.92076712, |
|
"epoch": 4.569444444444445, |
|
"grad_norm": 5.257125987275666, |
|
"learning_rate": 1.4761608585553047e-06, |
|
"loss": 0.18960032, |
|
"memory(GiB)": 101.73, |
|
"step": 3290, |
|
"train_speed(iter/s)": 0.343449 |
|
}, |
|
{ |
|
"acc": 0.9261817, |
|
"epoch": 4.576388888888889, |
|
"grad_norm": 7.586985807718556, |
|
"learning_rate": 1.4626145196050539e-06, |
|
"loss": 0.18427305, |
|
"memory(GiB)": 101.73, |
|
"step": 3295, |
|
"train_speed(iter/s)": 0.343448 |
|
}, |
|
{ |
|
"acc": 0.93572044, |
|
"epoch": 4.583333333333333, |
|
"grad_norm": 5.174975068258436, |
|
"learning_rate": 1.4491200091131203e-06, |
|
"loss": 0.17530704, |
|
"memory(GiB)": 101.73, |
|
"step": 3300, |
|
"train_speed(iter/s)": 0.343476 |
|
}, |
|
{ |
|
"acc": 0.92142067, |
|
"epoch": 4.590277777777778, |
|
"grad_norm": 2.7766176350174474, |
|
"learning_rate": 1.4356775247681192e-06, |
|
"loss": 0.1819458, |
|
"memory(GiB)": 101.73, |
|
"step": 3305, |
|
"train_speed(iter/s)": 0.343469 |
|
}, |
|
{ |
|
"acc": 0.92758675, |
|
"epoch": 4.597222222222222, |
|
"grad_norm": 8.424408900106021, |
|
"learning_rate": 1.422287263496501e-06, |
|
"loss": 0.1895298, |
|
"memory(GiB)": 101.73, |
|
"step": 3310, |
|
"train_speed(iter/s)": 0.343506 |
|
}, |
|
{ |
|
"acc": 0.93491707, |
|
"epoch": 4.604166666666667, |
|
"grad_norm": 9.687761901503304, |
|
"learning_rate": 1.408949421459676e-06, |
|
"loss": 0.18075891, |
|
"memory(GiB)": 101.73, |
|
"step": 3315, |
|
"train_speed(iter/s)": 0.343512 |
|
}, |
|
{ |
|
"acc": 0.9381012, |
|
"epoch": 4.611111111111111, |
|
"grad_norm": 5.905868947420923, |
|
"learning_rate": 1.3956641940511347e-06, |
|
"loss": 0.16116495, |
|
"memory(GiB)": 101.73, |
|
"step": 3320, |
|
"train_speed(iter/s)": 0.343525 |
|
}, |
|
{ |
|
"acc": 0.92054367, |
|
"epoch": 4.618055555555555, |
|
"grad_norm": 6.1885747944492735, |
|
"learning_rate": 1.3824317758935832e-06, |
|
"loss": 0.19729955, |
|
"memory(GiB)": 101.73, |
|
"step": 3325, |
|
"train_speed(iter/s)": 0.343538 |
|
}, |
|
{ |
|
"acc": 0.92909966, |
|
"epoch": 4.625, |
|
"grad_norm": 3.7114859695136033, |
|
"learning_rate": 1.369252360836101e-06, |
|
"loss": 0.18774183, |
|
"memory(GiB)": 101.73, |
|
"step": 3330, |
|
"train_speed(iter/s)": 0.343562 |
|
}, |
|
{ |
|
"acc": 0.93170919, |
|
"epoch": 4.631944444444445, |
|
"grad_norm": 6.9177632464094865, |
|
"learning_rate": 1.3561261419512895e-06, |
|
"loss": 0.17090454, |
|
"memory(GiB)": 101.73, |
|
"step": 3335, |
|
"train_speed(iter/s)": 0.343532 |
|
}, |
|
{ |
|
"acc": 0.92585773, |
|
"epoch": 4.638888888888889, |
|
"grad_norm": 6.828565679411712, |
|
"learning_rate": 1.343053311532454e-06, |
|
"loss": 0.18238465, |
|
"memory(GiB)": 101.73, |
|
"step": 3340, |
|
"train_speed(iter/s)": 0.343557 |
|
}, |
|
{ |
|
"acc": 0.91903534, |
|
"epoch": 4.645833333333333, |
|
"grad_norm": 5.367138133814305, |
|
"learning_rate": 1.3300340610907807e-06, |
|
"loss": 0.19298458, |
|
"memory(GiB)": 101.73, |
|
"step": 3345, |
|
"train_speed(iter/s)": 0.343583 |
|
}, |
|
{ |
|
"acc": 0.91593809, |
|
"epoch": 4.652777777777778, |
|
"grad_norm": 4.388889958742242, |
|
"learning_rate": 1.3170685813525359e-06, |
|
"loss": 0.19910928, |
|
"memory(GiB)": 101.73, |
|
"step": 3350, |
|
"train_speed(iter/s)": 0.343596 |
|
}, |
|
{ |
|
"acc": 0.9239089, |
|
"epoch": 4.659722222222222, |
|
"grad_norm": 7.979392327032548, |
|
"learning_rate": 1.3041570622562624e-06, |
|
"loss": 0.18677392, |
|
"memory(GiB)": 101.73, |
|
"step": 3355, |
|
"train_speed(iter/s)": 0.343626 |
|
}, |
|
{ |
|
"acc": 0.92999897, |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 23.61376185166529, |
|
"learning_rate": 1.2912996929500105e-06, |
|
"loss": 0.18426642, |
|
"memory(GiB)": 101.73, |
|
"step": 3360, |
|
"train_speed(iter/s)": 0.343637 |
|
}, |
|
{ |
|
"acc": 0.93519926, |
|
"epoch": 4.673611111111111, |
|
"grad_norm": 6.4377028887779915, |
|
"learning_rate": 1.2784966617885538e-06, |
|
"loss": 0.17984772, |
|
"memory(GiB)": 101.73, |
|
"step": 3365, |
|
"train_speed(iter/s)": 0.343666 |
|
}, |
|
{ |
|
"acc": 0.93129597, |
|
"epoch": 4.680555555555555, |
|
"grad_norm": 6.124547911246043, |
|
"learning_rate": 1.2657481563306414e-06, |
|
"loss": 0.18247683, |
|
"memory(GiB)": 101.73, |
|
"step": 3370, |
|
"train_speed(iter/s)": 0.343693 |
|
}, |
|
{ |
|
"acc": 0.92232943, |
|
"epoch": 4.6875, |
|
"grad_norm": 5.931941944106171, |
|
"learning_rate": 1.253054363336241e-06, |
|
"loss": 0.17325442, |
|
"memory(GiB)": 101.73, |
|
"step": 3375, |
|
"train_speed(iter/s)": 0.343734 |
|
}, |
|
{ |
|
"acc": 0.93359146, |
|
"epoch": 4.694444444444445, |
|
"grad_norm": 7.753520923917529, |
|
"learning_rate": 1.2404154687638103e-06, |
|
"loss": 0.16628423, |
|
"memory(GiB)": 101.73, |
|
"step": 3380, |
|
"train_speed(iter/s)": 0.343729 |
|
}, |
|
{ |
|
"acc": 0.93307018, |
|
"epoch": 4.701388888888889, |
|
"grad_norm": 7.53046348429032, |
|
"learning_rate": 1.2278316577675646e-06, |
|
"loss": 0.17817822, |
|
"memory(GiB)": 101.73, |
|
"step": 3385, |
|
"train_speed(iter/s)": 0.343758 |
|
}, |
|
{ |
|
"acc": 0.93201313, |
|
"epoch": 4.708333333333333, |
|
"grad_norm": 9.025531908827059, |
|
"learning_rate": 1.2153031146947749e-06, |
|
"loss": 0.17697582, |
|
"memory(GiB)": 101.73, |
|
"step": 3390, |
|
"train_speed(iter/s)": 0.34378 |
|
}, |
|
{ |
|
"acc": 0.93053436, |
|
"epoch": 4.715277777777778, |
|
"grad_norm": 7.021234732278172, |
|
"learning_rate": 1.2028300230830572e-06, |
|
"loss": 0.16738009, |
|
"memory(GiB)": 101.73, |
|
"step": 3395, |
|
"train_speed(iter/s)": 0.3438 |
|
}, |
|
{ |
|
"acc": 0.93569164, |
|
"epoch": 4.722222222222222, |
|
"grad_norm": 15.616139180655933, |
|
"learning_rate": 1.1904125656576904e-06, |
|
"loss": 0.16758065, |
|
"memory(GiB)": 101.73, |
|
"step": 3400, |
|
"train_speed(iter/s)": 0.34383 |
|
}, |
|
{ |
|
"acc": 0.92955055, |
|
"epoch": 4.729166666666667, |
|
"grad_norm": 6.701798595359445, |
|
"learning_rate": 1.1780509243289383e-06, |
|
"loss": 0.18826933, |
|
"memory(GiB)": 101.73, |
|
"step": 3405, |
|
"train_speed(iter/s)": 0.343858 |
|
}, |
|
{ |
|
"acc": 0.92169542, |
|
"epoch": 4.736111111111111, |
|
"grad_norm": 7.2038512053503645, |
|
"learning_rate": 1.1657452801893784e-06, |
|
"loss": 0.19082313, |
|
"memory(GiB)": 101.73, |
|
"step": 3410, |
|
"train_speed(iter/s)": 0.343886 |
|
}, |
|
{ |
|
"acc": 0.92840328, |
|
"epoch": 4.743055555555555, |
|
"grad_norm": 9.858417301395926, |
|
"learning_rate": 1.1534958135112606e-06, |
|
"loss": 0.18543091, |
|
"memory(GiB)": 101.73, |
|
"step": 3415, |
|
"train_speed(iter/s)": 0.343916 |
|
}, |
|
{ |
|
"acc": 0.93052006, |
|
"epoch": 4.75, |
|
"grad_norm": 4.6042916720288565, |
|
"learning_rate": 1.141302703743854e-06, |
|
"loss": 0.17618411, |
|
"memory(GiB)": 101.73, |
|
"step": 3420, |
|
"train_speed(iter/s)": 0.34394 |
|
}, |
|
{ |
|
"acc": 0.93616848, |
|
"epoch": 4.756944444444445, |
|
"grad_norm": 7.4778490207199075, |
|
"learning_rate": 1.1291661295108284e-06, |
|
"loss": 0.17016323, |
|
"memory(GiB)": 101.73, |
|
"step": 3425, |
|
"train_speed(iter/s)": 0.343962 |
|
}, |
|
{ |
|
"acc": 0.93410454, |
|
"epoch": 4.763888888888889, |
|
"grad_norm": 10.22618327699479, |
|
"learning_rate": 1.1170862686076295e-06, |
|
"loss": 0.17755842, |
|
"memory(GiB)": 101.73, |
|
"step": 3430, |
|
"train_speed(iter/s)": 0.34399 |
|
}, |
|
{ |
|
"acc": 0.92950439, |
|
"epoch": 4.770833333333333, |
|
"grad_norm": 6.508153021167871, |
|
"learning_rate": 1.1050632979988816e-06, |
|
"loss": 0.16775516, |
|
"memory(GiB)": 101.73, |
|
"step": 3435, |
|
"train_speed(iter/s)": 0.344006 |
|
}, |
|
{ |
|
"acc": 0.93873663, |
|
"epoch": 4.777777777777778, |
|
"grad_norm": 6.399803664609742, |
|
"learning_rate": 1.0930973938157865e-06, |
|
"loss": 0.14967173, |
|
"memory(GiB)": 101.73, |
|
"step": 3440, |
|
"train_speed(iter/s)": 0.344032 |
|
}, |
|
{ |
|
"acc": 0.93606501, |
|
"epoch": 4.784722222222222, |
|
"grad_norm": 9.143976403019813, |
|
"learning_rate": 1.081188731353554e-06, |
|
"loss": 0.17374879, |
|
"memory(GiB)": 101.73, |
|
"step": 3445, |
|
"train_speed(iter/s)": 0.344058 |
|
}, |
|
{ |
|
"acc": 0.9375165, |
|
"epoch": 4.791666666666667, |
|
"grad_norm": 7.754683265166291, |
|
"learning_rate": 1.0693374850688204e-06, |
|
"loss": 0.15017707, |
|
"memory(GiB)": 101.73, |
|
"step": 3450, |
|
"train_speed(iter/s)": 0.344073 |
|
}, |
|
{ |
|
"acc": 0.92773657, |
|
"epoch": 4.798611111111111, |
|
"grad_norm": 13.340523234777597, |
|
"learning_rate": 1.05754382857711e-06, |
|
"loss": 0.180286, |
|
"memory(GiB)": 101.73, |
|
"step": 3455, |
|
"train_speed(iter/s)": 0.344093 |
|
}, |
|
{ |
|
"acc": 0.92894754, |
|
"epoch": 4.805555555555555, |
|
"grad_norm": 3.62871529288943, |
|
"learning_rate": 1.0458079346502735e-06, |
|
"loss": 0.17606659, |
|
"memory(GiB)": 101.73, |
|
"step": 3460, |
|
"train_speed(iter/s)": 0.344124 |
|
}, |
|
{ |
|
"acc": 0.92550812, |
|
"epoch": 4.8125, |
|
"grad_norm": 7.453371763696296, |
|
"learning_rate": 1.0341299752139704e-06, |
|
"loss": 0.1886322, |
|
"memory(GiB)": 101.73, |
|
"step": 3465, |
|
"train_speed(iter/s)": 0.344152 |
|
}, |
|
{ |
|
"acc": 0.92362499, |
|
"epoch": 4.819444444444445, |
|
"grad_norm": 6.774667028179097, |
|
"learning_rate": 1.0225101213451426e-06, |
|
"loss": 0.17885201, |
|
"memory(GiB)": 101.73, |
|
"step": 3470, |
|
"train_speed(iter/s)": 0.34418 |
|
}, |
|
{ |
|
"acc": 0.9255188, |
|
"epoch": 4.826388888888889, |
|
"grad_norm": 6.197957042298724, |
|
"learning_rate": 1.0109485432695131e-06, |
|
"loss": 0.18616344, |
|
"memory(GiB)": 101.73, |
|
"step": 3475, |
|
"train_speed(iter/s)": 0.344207 |
|
}, |
|
{ |
|
"acc": 0.93224564, |
|
"epoch": 4.833333333333333, |
|
"grad_norm": 6.644413934310758, |
|
"learning_rate": 9.994454103590919e-07, |
|
"loss": 0.16555772, |
|
"memory(GiB)": 101.73, |
|
"step": 3480, |
|
"train_speed(iter/s)": 0.34424 |
|
}, |
|
{ |
|
"acc": 0.92744942, |
|
"epoch": 4.840277777777778, |
|
"grad_norm": 4.351916474227688, |
|
"learning_rate": 9.880008911296872e-07, |
|
"loss": 0.20068693, |
|
"memory(GiB)": 101.73, |
|
"step": 3485, |
|
"train_speed(iter/s)": 0.344267 |
|
}, |
|
{ |
|
"acc": 0.9230197, |
|
"epoch": 4.847222222222222, |
|
"grad_norm": 9.964081878004457, |
|
"learning_rate": 9.7661515323845e-07, |
|
"loss": 0.17565002, |
|
"memory(GiB)": 101.73, |
|
"step": 3490, |
|
"train_speed(iter/s)": 0.344291 |
|
}, |
|
{ |
|
"acc": 0.93543081, |
|
"epoch": 4.854166666666667, |
|
"grad_norm": 6.8395540650956805, |
|
"learning_rate": 9.652883634814041e-07, |
|
"loss": 0.17975049, |
|
"memory(GiB)": 101.73, |
|
"step": 3495, |
|
"train_speed(iter/s)": 0.344308 |
|
}, |
|
{ |
|
"acc": 0.93403215, |
|
"epoch": 4.861111111111111, |
|
"grad_norm": 5.701715874653675, |
|
"learning_rate": 9.540206877910153e-07, |
|
"loss": 0.16942759, |
|
"memory(GiB)": 101.73, |
|
"step": 3500, |
|
"train_speed(iter/s)": 0.344335 |
|
}, |
|
{ |
|
"acc": 0.92589064, |
|
"epoch": 4.868055555555555, |
|
"grad_norm": 7.704105561595292, |
|
"learning_rate": 9.428122912337475e-07, |
|
"loss": 0.17456805, |
|
"memory(GiB)": 101.73, |
|
"step": 3505, |
|
"train_speed(iter/s)": 0.344363 |
|
}, |
|
{ |
|
"acc": 0.94150906, |
|
"epoch": 4.875, |
|
"grad_norm": 3.438431889948542, |
|
"learning_rate": 9.316633380076606e-07, |
|
"loss": 0.15039229, |
|
"memory(GiB)": 101.73, |
|
"step": 3510, |
|
"train_speed(iter/s)": 0.344396 |
|
}, |
|
{ |
|
"acc": 0.93364429, |
|
"epoch": 4.881944444444445, |
|
"grad_norm": 5.602784744395754, |
|
"learning_rate": 9.205739914399868e-07, |
|
"loss": 0.16423728, |
|
"memory(GiB)": 101.73, |
|
"step": 3515, |
|
"train_speed(iter/s)": 0.34443 |
|
}, |
|
{ |
|
"acc": 0.93492546, |
|
"epoch": 4.888888888888889, |
|
"grad_norm": 6.450634106440373, |
|
"learning_rate": 9.095444139847534e-07, |
|
"loss": 0.16065657, |
|
"memory(GiB)": 101.73, |
|
"step": 3520, |
|
"train_speed(iter/s)": 0.344454 |
|
}, |
|
{ |
|
"acc": 0.9399188, |
|
"epoch": 4.895833333333333, |
|
"grad_norm": 7.621591927258962, |
|
"learning_rate": 8.985747672203926e-07, |
|
"loss": 0.16348395, |
|
"memory(GiB)": 101.73, |
|
"step": 3525, |
|
"train_speed(iter/s)": 0.34448 |
|
}, |
|
{ |
|
"acc": 0.93001232, |
|
"epoch": 4.902777777777778, |
|
"grad_norm": 4.912482966701962, |
|
"learning_rate": 8.876652118473818e-07, |
|
"loss": 0.1870851, |
|
"memory(GiB)": 101.73, |
|
"step": 3530, |
|
"train_speed(iter/s)": 0.344504 |
|
}, |
|
{ |
|
"acc": 0.92510281, |
|
"epoch": 4.909722222222222, |
|
"grad_norm": 6.857031735241769, |
|
"learning_rate": 8.768159076858827e-07, |
|
"loss": 0.18478702, |
|
"memory(GiB)": 101.73, |
|
"step": 3535, |
|
"train_speed(iter/s)": 0.344538 |
|
}, |
|
{ |
|
"acc": 0.93646317, |
|
"epoch": 4.916666666666667, |
|
"grad_norm": 5.432910962320387, |
|
"learning_rate": 8.660270136734077e-07, |
|
"loss": 0.16789238, |
|
"memory(GiB)": 101.73, |
|
"step": 3540, |
|
"train_speed(iter/s)": 0.344567 |
|
}, |
|
{ |
|
"acc": 0.92513866, |
|
"epoch": 4.923611111111111, |
|
"grad_norm": 14.001241817054742, |
|
"learning_rate": 8.552986878624815e-07, |
|
"loss": 0.17173724, |
|
"memory(GiB)": 101.73, |
|
"step": 3545, |
|
"train_speed(iter/s)": 0.344582 |
|
}, |
|
{ |
|
"acc": 0.93663769, |
|
"epoch": 4.930555555555555, |
|
"grad_norm": 9.603326526795332, |
|
"learning_rate": 8.446310874183368e-07, |
|
"loss": 0.16478382, |
|
"memory(GiB)": 101.73, |
|
"step": 3550, |
|
"train_speed(iter/s)": 0.344601 |
|
}, |
|
{ |
|
"acc": 0.93988123, |
|
"epoch": 4.9375, |
|
"grad_norm": 9.381389112980651, |
|
"learning_rate": 8.340243686166037e-07, |
|
"loss": 0.16926775, |
|
"memory(GiB)": 101.73, |
|
"step": 3555, |
|
"train_speed(iter/s)": 0.344638 |
|
}, |
|
{ |
|
"acc": 0.93035545, |
|
"epoch": 4.944444444444445, |
|
"grad_norm": 7.110265271158136, |
|
"learning_rate": 8.234786868410236e-07, |
|
"loss": 0.16944885, |
|
"memory(GiB)": 101.73, |
|
"step": 3560, |
|
"train_speed(iter/s)": 0.344667 |
|
}, |
|
{ |
|
"acc": 0.9245862, |
|
"epoch": 4.951388888888889, |
|
"grad_norm": 6.202865636689749, |
|
"learning_rate": 8.129941965811745e-07, |
|
"loss": 0.18254293, |
|
"memory(GiB)": 101.73, |
|
"step": 3565, |
|
"train_speed(iter/s)": 0.344693 |
|
}, |
|
{ |
|
"acc": 0.9371048, |
|
"epoch": 4.958333333333333, |
|
"grad_norm": 11.61144011390846, |
|
"learning_rate": 8.025710514302019e-07, |
|
"loss": 0.16599008, |
|
"memory(GiB)": 101.73, |
|
"step": 3570, |
|
"train_speed(iter/s)": 0.344715 |
|
}, |
|
{ |
|
"acc": 0.92540922, |
|
"epoch": 4.965277777777778, |
|
"grad_norm": 5.9675388419806294, |
|
"learning_rate": 7.922094040825753e-07, |
|
"loss": 0.17711488, |
|
"memory(GiB)": 101.73, |
|
"step": 3575, |
|
"train_speed(iter/s)": 0.344748 |
|
}, |
|
{ |
|
"acc": 0.93437986, |
|
"epoch": 4.972222222222222, |
|
"grad_norm": 4.777475006904249, |
|
"learning_rate": 7.819094063318475e-07, |
|
"loss": 0.16441042, |
|
"memory(GiB)": 101.73, |
|
"step": 3580, |
|
"train_speed(iter/s)": 0.344783 |
|
}, |
|
{ |
|
"acc": 0.92917442, |
|
"epoch": 4.979166666666667, |
|
"grad_norm": 9.660056870992797, |
|
"learning_rate": 7.716712090684326e-07, |
|
"loss": 0.18292786, |
|
"memory(GiB)": 101.73, |
|
"step": 3585, |
|
"train_speed(iter/s)": 0.344803 |
|
}, |
|
{ |
|
"acc": 0.94042187, |
|
"epoch": 4.986111111111111, |
|
"grad_norm": 4.423451670274828, |
|
"learning_rate": 7.614949622773942e-07, |
|
"loss": 0.14605323, |
|
"memory(GiB)": 101.73, |
|
"step": 3590, |
|
"train_speed(iter/s)": 0.344817 |
|
}, |
|
{ |
|
"acc": 0.93927879, |
|
"epoch": 4.993055555555555, |
|
"grad_norm": 8.31218232668258, |
|
"learning_rate": 7.5138081503625e-07, |
|
"loss": 0.15975434, |
|
"memory(GiB)": 101.73, |
|
"step": 3595, |
|
"train_speed(iter/s)": 0.344846 |
|
}, |
|
{ |
|
"acc": 0.93250046, |
|
"epoch": 5.0, |
|
"grad_norm": 5.5702806703726075, |
|
"learning_rate": 7.413289155127845e-07, |
|
"loss": 0.16792555, |
|
"memory(GiB)": 101.73, |
|
"step": 3600, |
|
"train_speed(iter/s)": 0.34485 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_acc": 0.9066131114617701, |
|
"eval_loss": 0.25880399346351624, |
|
"eval_runtime": 25.0627, |
|
"eval_samples_per_second": 13.686, |
|
"eval_steps_per_second": 1.716, |
|
"step": 3600 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4320, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.9491571697188864e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|