|
{ |
|
"best_metric": 0.6901677250862122, |
|
"best_model_checkpoint": "SavedModels/ViT-large-patch16-224_B/checkpoint-1128", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 7520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"grad_norm": 16.918853759765625, |
|
"learning_rate": 0.0002920212765957447, |
|
"loss": 1.0273, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"grad_norm": 8.12959098815918, |
|
"learning_rate": 0.00028404255319148934, |
|
"loss": 0.8052, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"grad_norm": 4.803711891174316, |
|
"learning_rate": 0.00027606382978723404, |
|
"loss": 0.7818, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6693333333333333, |
|
"eval_f1": 0.6679277175440698, |
|
"eval_loss": 0.7451461553573608, |
|
"eval_precision": 0.6665516663433287, |
|
"eval_recall": 0.6708336668000534, |
|
"eval_runtime": 34.5448, |
|
"eval_samples_per_second": 21.711, |
|
"eval_steps_per_second": 2.721, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 5.264636516571045, |
|
"learning_rate": 0.0002680851063829787, |
|
"loss": 0.7518, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"grad_norm": 6.721948146820068, |
|
"learning_rate": 0.0002601063829787234, |
|
"loss": 0.7153, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"grad_norm": 5.855480670928955, |
|
"learning_rate": 0.00025212765957446806, |
|
"loss": 0.6854, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8617021276595744, |
|
"grad_norm": 5.206683158874512, |
|
"learning_rate": 0.0002441489361702127, |
|
"loss": 0.6975, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.652, |
|
"eval_f1": 0.6527448850868924, |
|
"eval_loss": 0.737027108669281, |
|
"eval_precision": 0.6598586784457597, |
|
"eval_recall": 0.6519173002534348, |
|
"eval_runtime": 34.5417, |
|
"eval_samples_per_second": 21.713, |
|
"eval_steps_per_second": 2.721, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 6.592344760894775, |
|
"learning_rate": 0.00023617021276595742, |
|
"loss": 0.6174, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.393617021276596, |
|
"grad_norm": 4.732657432556152, |
|
"learning_rate": 0.0002281914893617021, |
|
"loss": 0.644, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 5.618279933929443, |
|
"learning_rate": 0.00022021276595744679, |
|
"loss": 0.6098, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.925531914893617, |
|
"grad_norm": 4.702358245849609, |
|
"learning_rate": 0.0002122340425531915, |
|
"loss": 0.5934, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6826666666666666, |
|
"eval_f1": 0.6716423210919009, |
|
"eval_loss": 0.6901677250862122, |
|
"eval_precision": 0.680882951010493, |
|
"eval_recall": 0.6834541816726691, |
|
"eval_runtime": 35.1911, |
|
"eval_samples_per_second": 21.312, |
|
"eval_steps_per_second": 2.671, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"grad_norm": 4.94950008392334, |
|
"learning_rate": 0.00020425531914893615, |
|
"loss": 0.4998, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.4574468085106385, |
|
"grad_norm": 4.887601375579834, |
|
"learning_rate": 0.00019627659574468083, |
|
"loss": 0.5384, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"grad_norm": 5.762096881866455, |
|
"learning_rate": 0.0001882978723404255, |
|
"loss": 0.5035, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"grad_norm": 5.000032424926758, |
|
"learning_rate": 0.0001803191489361702, |
|
"loss": 0.4943, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.68, |
|
"eval_f1": 0.6607042226524314, |
|
"eval_loss": 0.7506471872329712, |
|
"eval_precision": 0.6727589541740554, |
|
"eval_recall": 0.6823892223556088, |
|
"eval_runtime": 34.4906, |
|
"eval_samples_per_second": 21.745, |
|
"eval_steps_per_second": 2.725, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"grad_norm": 7.737178325653076, |
|
"learning_rate": 0.0001723404255319149, |
|
"loss": 0.3581, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.5212765957446805, |
|
"grad_norm": 5.353149890899658, |
|
"learning_rate": 0.00016436170212765956, |
|
"loss": 0.3857, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"grad_norm": 5.475671768188477, |
|
"learning_rate": 0.00015638297872340426, |
|
"loss": 0.3625, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6693333333333333, |
|
"eval_f1": 0.6583454266829142, |
|
"eval_loss": 0.9006826281547546, |
|
"eval_precision": 0.6655033443649184, |
|
"eval_recall": 0.6717070828331333, |
|
"eval_runtime": 34.3021, |
|
"eval_samples_per_second": 21.865, |
|
"eval_steps_per_second": 2.74, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 5.053191489361702, |
|
"grad_norm": 8.006985664367676, |
|
"learning_rate": 0.00014840425531914892, |
|
"loss": 0.3767, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 8.51475715637207, |
|
"learning_rate": 0.0001404255319148936, |
|
"loss": 0.2824, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.585106382978723, |
|
"grad_norm": 8.186362266540527, |
|
"learning_rate": 0.00013244680851063828, |
|
"loss": 0.2434, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"grad_norm": 6.735799312591553, |
|
"learning_rate": 0.00012446808510638296, |
|
"loss": 0.2717, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6586666666666666, |
|
"eval_f1": 0.6614425775783358, |
|
"eval_loss": 1.0333963632583618, |
|
"eval_precision": 0.6652982915561002, |
|
"eval_recall": 0.6589942643724156, |
|
"eval_runtime": 34.3254, |
|
"eval_samples_per_second": 21.85, |
|
"eval_steps_per_second": 2.738, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 6.117021276595745, |
|
"grad_norm": 6.987758636474609, |
|
"learning_rate": 0.00011648936170212764, |
|
"loss": 0.2191, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"grad_norm": 8.753083229064941, |
|
"learning_rate": 0.00010851063829787234, |
|
"loss": 0.1532, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"grad_norm": 6.87877082824707, |
|
"learning_rate": 0.00010053191489361702, |
|
"loss": 0.172, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"grad_norm": 8.248693466186523, |
|
"learning_rate": 9.25531914893617e-05, |
|
"loss": 0.188, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.648, |
|
"eval_f1": 0.6480604224938936, |
|
"eval_loss": 1.3646624088287354, |
|
"eval_precision": 0.6474310419368785, |
|
"eval_recall": 0.6488264639189009, |
|
"eval_runtime": 34.2754, |
|
"eval_samples_per_second": 21.882, |
|
"eval_steps_per_second": 2.742, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 7.180851063829787, |
|
"grad_norm": 9.565648078918457, |
|
"learning_rate": 8.457446808510637e-05, |
|
"loss": 0.1344, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"grad_norm": 10.819112777709961, |
|
"learning_rate": 7.659574468085105e-05, |
|
"loss": 0.0791, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.712765957446808, |
|
"grad_norm": 11.498022079467773, |
|
"learning_rate": 6.861702127659574e-05, |
|
"loss": 0.0917, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"grad_norm": 10.500368118286133, |
|
"learning_rate": 6.063829787234042e-05, |
|
"loss": 0.0966, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6506666666666666, |
|
"eval_f1": 0.6510622640996298, |
|
"eval_loss": 1.7121126651763916, |
|
"eval_precision": 0.6518027984453844, |
|
"eval_recall": 0.6519786581299186, |
|
"eval_runtime": 34.2979, |
|
"eval_samples_per_second": 21.867, |
|
"eval_steps_per_second": 2.741, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 8.24468085106383, |
|
"grad_norm": 10.034985542297363, |
|
"learning_rate": 5.26595744680851e-05, |
|
"loss": 0.0589, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"grad_norm": 8.84762954711914, |
|
"learning_rate": 4.468085106382978e-05, |
|
"loss": 0.0444, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.77659574468085, |
|
"grad_norm": 8.717729568481445, |
|
"learning_rate": 3.670212765957446e-05, |
|
"loss": 0.0492, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.664, |
|
"eval_f1": 0.6634717422597086, |
|
"eval_loss": 2.092434883117676, |
|
"eval_precision": 0.6624493384142015, |
|
"eval_recall": 0.6648558089902628, |
|
"eval_runtime": 34.1372, |
|
"eval_samples_per_second": 21.97, |
|
"eval_steps_per_second": 2.754, |
|
"step": 3384 |
|
}, |
|
{ |
|
"epoch": 9.042553191489361, |
|
"grad_norm": 10.311498641967773, |
|
"learning_rate": 2.8723404255319147e-05, |
|
"loss": 0.0461, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"grad_norm": 9.477750778198242, |
|
"learning_rate": 2.0744680851063828e-05, |
|
"loss": 0.0215, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"grad_norm": 10.069836616516113, |
|
"learning_rate": 1.276595744680851e-05, |
|
"loss": 0.0275, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.840425531914894, |
|
"grad_norm": 9.309320449829102, |
|
"learning_rate": 4.7872340425531906e-06, |
|
"loss": 0.0198, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6733333333333333, |
|
"eval_f1": 0.671461456478878, |
|
"eval_loss": 2.349191427230835, |
|
"eval_precision": 0.6697804784739011, |
|
"eval_recall": 0.6742686407896491, |
|
"eval_runtime": 34.2831, |
|
"eval_samples_per_second": 21.877, |
|
"eval_steps_per_second": 2.742, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 10.106382978723405, |
|
"grad_norm": 4.254685878753662, |
|
"learning_rate": 0.00014840425531914892, |
|
"loss": 0.1783, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.372340425531915, |
|
"grad_norm": 6.5849409103393555, |
|
"learning_rate": 0.00014441489361702127, |
|
"loss": 0.4254, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 6.008872985839844, |
|
"learning_rate": 0.0001404255319148936, |
|
"loss": 0.3892, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.904255319148936, |
|
"grad_norm": 4.886096000671387, |
|
"learning_rate": 0.00013643617021276593, |
|
"loss": 0.3308, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6413333333333333, |
|
"eval_f1": 0.6379586335182581, |
|
"eval_loss": 1.2812837362289429, |
|
"eval_precision": 0.6360221570265966, |
|
"eval_recall": 0.6426586634653862, |
|
"eval_runtime": 34.4135, |
|
"eval_samples_per_second": 21.794, |
|
"eval_steps_per_second": 2.731, |
|
"step": 4136 |
|
}, |
|
{ |
|
"epoch": 11.170212765957446, |
|
"grad_norm": 8.697183609008789, |
|
"learning_rate": 0.00013244680851063828, |
|
"loss": 0.3418, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.436170212765958, |
|
"grad_norm": 1.831715703010559, |
|
"learning_rate": 0.00012845744680851063, |
|
"loss": 0.2892, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.702127659574469, |
|
"grad_norm": 5.119130611419678, |
|
"learning_rate": 0.00012446808510638296, |
|
"loss": 0.2597, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"grad_norm": 4.450387001037598, |
|
"learning_rate": 0.00012047872340425532, |
|
"loss": 0.2468, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6693333333333333, |
|
"eval_f1": 0.6644005202701936, |
|
"eval_loss": 1.343381404876709, |
|
"eval_precision": 0.6702673209494008, |
|
"eval_recall": 0.6696177137521676, |
|
"eval_runtime": 34.1977, |
|
"eval_samples_per_second": 21.931, |
|
"eval_steps_per_second": 2.749, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 12.23404255319149, |
|
"grad_norm": 0.5507918000221252, |
|
"learning_rate": 0.00011648936170212764, |
|
"loss": 0.161, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 3.424229621887207, |
|
"learning_rate": 0.0001125, |
|
"loss": 0.1698, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"grad_norm": 10.304464340209961, |
|
"learning_rate": 0.00010851063829787234, |
|
"loss": 0.1992, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6773333333333333, |
|
"eval_f1": 0.6757724702487522, |
|
"eval_loss": 1.5172154903411865, |
|
"eval_precision": 0.6749943823114556, |
|
"eval_recall": 0.6780813658796853, |
|
"eval_runtime": 34.5579, |
|
"eval_samples_per_second": 21.703, |
|
"eval_steps_per_second": 2.72, |
|
"step": 4888 |
|
}, |
|
{ |
|
"epoch": 13.03191489361702, |
|
"grad_norm": 0.58838951587677, |
|
"learning_rate": 0.00010452127659574466, |
|
"loss": 0.153, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"grad_norm": 4.339015007019043, |
|
"learning_rate": 0.00010053191489361702, |
|
"loss": 0.0729, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.563829787234042, |
|
"grad_norm": 5.353603839874268, |
|
"learning_rate": 9.654255319148935e-05, |
|
"loss": 0.1182, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.829787234042554, |
|
"grad_norm": 0.11711510270833969, |
|
"learning_rate": 9.25531914893617e-05, |
|
"loss": 0.1391, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.688, |
|
"eval_f1": 0.6861192542213884, |
|
"eval_loss": 1.8793449401855469, |
|
"eval_precision": 0.6887025095519409, |
|
"eval_recall": 0.6882689075630252, |
|
"eval_runtime": 35.0719, |
|
"eval_samples_per_second": 21.385, |
|
"eval_steps_per_second": 2.68, |
|
"step": 5264 |
|
}, |
|
{ |
|
"epoch": 14.095744680851064, |
|
"grad_norm": 16.545928955078125, |
|
"learning_rate": 8.856382978723404e-05, |
|
"loss": 0.0919, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 14.361702127659575, |
|
"grad_norm": 0.11285369098186493, |
|
"learning_rate": 8.457446808510637e-05, |
|
"loss": 0.0724, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"grad_norm": 1.0866966247558594, |
|
"learning_rate": 8.058510638297872e-05, |
|
"loss": 0.0615, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"grad_norm": 0.0466451533138752, |
|
"learning_rate": 7.659574468085105e-05, |
|
"loss": 0.062, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6826666666666666, |
|
"eval_f1": 0.6784899933134545, |
|
"eval_loss": 2.1949727535247803, |
|
"eval_precision": 0.6780378052909217, |
|
"eval_recall": 0.6832503668133919, |
|
"eval_runtime": 34.2882, |
|
"eval_samples_per_second": 21.873, |
|
"eval_steps_per_second": 2.741, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 15.159574468085106, |
|
"grad_norm": 1.6346490383148193, |
|
"learning_rate": 7.26063829787234e-05, |
|
"loss": 0.0477, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 15.425531914893616, |
|
"grad_norm": 0.004013681318610907, |
|
"learning_rate": 6.861702127659574e-05, |
|
"loss": 0.0484, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 15.691489361702128, |
|
"grad_norm": 0.6463019847869873, |
|
"learning_rate": 6.462765957446807e-05, |
|
"loss": 0.0389, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"grad_norm": 0.4977071285247803, |
|
"learning_rate": 6.063829787234042e-05, |
|
"loss": 0.0392, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6626666666666666, |
|
"eval_f1": 0.6599147266673199, |
|
"eval_loss": 2.2853217124938965, |
|
"eval_precision": 0.6593416101537274, |
|
"eval_recall": 0.6640821661998133, |
|
"eval_runtime": 34.2142, |
|
"eval_samples_per_second": 21.921, |
|
"eval_steps_per_second": 2.747, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 16.22340425531915, |
|
"grad_norm": 0.18373289704322815, |
|
"learning_rate": 5.6648936170212766e-05, |
|
"loss": 0.035, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 16.48936170212766, |
|
"grad_norm": 11.437871932983398, |
|
"learning_rate": 5.26595744680851e-05, |
|
"loss": 0.0323, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 16.75531914893617, |
|
"grad_norm": 0.04929669201374054, |
|
"learning_rate": 4.867021276595744e-05, |
|
"loss": 0.0286, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6706666666666666, |
|
"eval_f1": 0.6665467903773287, |
|
"eval_loss": 2.356433153152466, |
|
"eval_precision": 0.6646925224621034, |
|
"eval_recall": 0.6718655462184874, |
|
"eval_runtime": 34.2754, |
|
"eval_samples_per_second": 21.882, |
|
"eval_steps_per_second": 2.742, |
|
"step": 6392 |
|
}, |
|
{ |
|
"epoch": 17.02127659574468, |
|
"grad_norm": 0.003023180877789855, |
|
"learning_rate": 4.468085106382978e-05, |
|
"loss": 0.0232, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"grad_norm": 0.009346798993647099, |
|
"learning_rate": 4.069148936170212e-05, |
|
"loss": 0.0195, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.5531914893617, |
|
"grad_norm": 0.0396982878446579, |
|
"learning_rate": 3.670212765957446e-05, |
|
"loss": 0.02, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 17.819148936170212, |
|
"grad_norm": 0.014023613184690475, |
|
"learning_rate": 3.271276595744681e-05, |
|
"loss": 0.025, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.676, |
|
"eval_f1": 0.6721775597644245, |
|
"eval_loss": 2.5857865810394287, |
|
"eval_precision": 0.6713915090790397, |
|
"eval_recall": 0.6767165532879819, |
|
"eval_runtime": 34.2491, |
|
"eval_samples_per_second": 21.898, |
|
"eval_steps_per_second": 2.745, |
|
"step": 6768 |
|
}, |
|
{ |
|
"epoch": 18.085106382978722, |
|
"grad_norm": 0.47041425108909607, |
|
"learning_rate": 2.8723404255319147e-05, |
|
"loss": 0.0202, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 18.351063829787233, |
|
"grad_norm": 0.06168466433882713, |
|
"learning_rate": 2.4734042553191484e-05, |
|
"loss": 0.019, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 18.617021276595743, |
|
"grad_norm": 0.0014604219468310475, |
|
"learning_rate": 2.0744680851063828e-05, |
|
"loss": 0.0178, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.882978723404257, |
|
"grad_norm": 0.0028070323169231415, |
|
"learning_rate": 1.675531914893617e-05, |
|
"loss": 0.0214, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.668, |
|
"eval_f1": 0.6658631291530153, |
|
"eval_loss": 2.6788387298583984, |
|
"eval_precision": 0.6642259414225942, |
|
"eval_recall": 0.6688787515006003, |
|
"eval_runtime": 34.249, |
|
"eval_samples_per_second": 21.898, |
|
"eval_steps_per_second": 2.745, |
|
"step": 7144 |
|
}, |
|
{ |
|
"epoch": 19.148936170212767, |
|
"grad_norm": 0.0015123536577448249, |
|
"learning_rate": 1.276595744680851e-05, |
|
"loss": 0.0204, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 19.414893617021278, |
|
"grad_norm": 0.0018817168893292546, |
|
"learning_rate": 8.77659574468085e-06, |
|
"loss": 0.0141, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 19.680851063829788, |
|
"grad_norm": 0.6008268594741821, |
|
"learning_rate": 4.7872340425531906e-06, |
|
"loss": 0.0203, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 19.9468085106383, |
|
"grad_norm": 1.0317440032958984, |
|
"learning_rate": 7.978723404255318e-07, |
|
"loss": 0.0155, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.672, |
|
"eval_f1": 0.6695170440310667, |
|
"eval_loss": 2.725527048110962, |
|
"eval_precision": 0.66779120407287, |
|
"eval_recall": 0.6728787515006003, |
|
"eval_runtime": 34.5602, |
|
"eval_samples_per_second": 21.701, |
|
"eval_steps_per_second": 2.72, |
|
"step": 7520 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 7520, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.288864877974981e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|