clasificador_primario / trainer_state.json
nc7777's picture
Upload 8 files
12e8800 verified
{
"best_metric": 0.6901677250862122,
"best_model_checkpoint": "SavedModels/ViT-large-patch16-224_B/checkpoint-1128",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 7520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.26595744680851063,
"grad_norm": 16.918853759765625,
"learning_rate": 0.0002920212765957447,
"loss": 1.0273,
"step": 100
},
{
"epoch": 0.5319148936170213,
"grad_norm": 8.12959098815918,
"learning_rate": 0.00028404255319148934,
"loss": 0.8052,
"step": 200
},
{
"epoch": 0.7978723404255319,
"grad_norm": 4.803711891174316,
"learning_rate": 0.00027606382978723404,
"loss": 0.7818,
"step": 300
},
{
"epoch": 1.0,
"eval_accuracy": 0.6693333333333333,
"eval_f1": 0.6679277175440698,
"eval_loss": 0.7451461553573608,
"eval_precision": 0.6665516663433287,
"eval_recall": 0.6708336668000534,
"eval_runtime": 34.5448,
"eval_samples_per_second": 21.711,
"eval_steps_per_second": 2.721,
"step": 376
},
{
"epoch": 1.0638297872340425,
"grad_norm": 5.264636516571045,
"learning_rate": 0.0002680851063829787,
"loss": 0.7518,
"step": 400
},
{
"epoch": 1.3297872340425532,
"grad_norm": 6.721948146820068,
"learning_rate": 0.0002601063829787234,
"loss": 0.7153,
"step": 500
},
{
"epoch": 1.5957446808510638,
"grad_norm": 5.855480670928955,
"learning_rate": 0.00025212765957446806,
"loss": 0.6854,
"step": 600
},
{
"epoch": 1.8617021276595744,
"grad_norm": 5.206683158874512,
"learning_rate": 0.0002441489361702127,
"loss": 0.6975,
"step": 700
},
{
"epoch": 2.0,
"eval_accuracy": 0.652,
"eval_f1": 0.6527448850868924,
"eval_loss": 0.737027108669281,
"eval_precision": 0.6598586784457597,
"eval_recall": 0.6519173002534348,
"eval_runtime": 34.5417,
"eval_samples_per_second": 21.713,
"eval_steps_per_second": 2.721,
"step": 752
},
{
"epoch": 2.127659574468085,
"grad_norm": 6.592344760894775,
"learning_rate": 0.00023617021276595742,
"loss": 0.6174,
"step": 800
},
{
"epoch": 2.393617021276596,
"grad_norm": 4.732657432556152,
"learning_rate": 0.0002281914893617021,
"loss": 0.644,
"step": 900
},
{
"epoch": 2.6595744680851063,
"grad_norm": 5.618279933929443,
"learning_rate": 0.00022021276595744679,
"loss": 0.6098,
"step": 1000
},
{
"epoch": 2.925531914893617,
"grad_norm": 4.702358245849609,
"learning_rate": 0.0002122340425531915,
"loss": 0.5934,
"step": 1100
},
{
"epoch": 3.0,
"eval_accuracy": 0.6826666666666666,
"eval_f1": 0.6716423210919009,
"eval_loss": 0.6901677250862122,
"eval_precision": 0.680882951010493,
"eval_recall": 0.6834541816726691,
"eval_runtime": 35.1911,
"eval_samples_per_second": 21.312,
"eval_steps_per_second": 2.671,
"step": 1128
},
{
"epoch": 3.1914893617021276,
"grad_norm": 4.94950008392334,
"learning_rate": 0.00020425531914893615,
"loss": 0.4998,
"step": 1200
},
{
"epoch": 3.4574468085106385,
"grad_norm": 4.887601375579834,
"learning_rate": 0.00019627659574468083,
"loss": 0.5384,
"step": 1300
},
{
"epoch": 3.723404255319149,
"grad_norm": 5.762096881866455,
"learning_rate": 0.0001882978723404255,
"loss": 0.5035,
"step": 1400
},
{
"epoch": 3.9893617021276597,
"grad_norm": 5.000032424926758,
"learning_rate": 0.0001803191489361702,
"loss": 0.4943,
"step": 1500
},
{
"epoch": 4.0,
"eval_accuracy": 0.68,
"eval_f1": 0.6607042226524314,
"eval_loss": 0.7506471872329712,
"eval_precision": 0.6727589541740554,
"eval_recall": 0.6823892223556088,
"eval_runtime": 34.4906,
"eval_samples_per_second": 21.745,
"eval_steps_per_second": 2.725,
"step": 1504
},
{
"epoch": 4.25531914893617,
"grad_norm": 7.737178325653076,
"learning_rate": 0.0001723404255319149,
"loss": 0.3581,
"step": 1600
},
{
"epoch": 4.5212765957446805,
"grad_norm": 5.353149890899658,
"learning_rate": 0.00016436170212765956,
"loss": 0.3857,
"step": 1700
},
{
"epoch": 4.787234042553192,
"grad_norm": 5.475671768188477,
"learning_rate": 0.00015638297872340426,
"loss": 0.3625,
"step": 1800
},
{
"epoch": 5.0,
"eval_accuracy": 0.6693333333333333,
"eval_f1": 0.6583454266829142,
"eval_loss": 0.9006826281547546,
"eval_precision": 0.6655033443649184,
"eval_recall": 0.6717070828331333,
"eval_runtime": 34.3021,
"eval_samples_per_second": 21.865,
"eval_steps_per_second": 2.74,
"step": 1880
},
{
"epoch": 5.053191489361702,
"grad_norm": 8.006985664367676,
"learning_rate": 0.00014840425531914892,
"loss": 0.3767,
"step": 1900
},
{
"epoch": 5.319148936170213,
"grad_norm": 8.51475715637207,
"learning_rate": 0.0001404255319148936,
"loss": 0.2824,
"step": 2000
},
{
"epoch": 5.585106382978723,
"grad_norm": 8.186362266540527,
"learning_rate": 0.00013244680851063828,
"loss": 0.2434,
"step": 2100
},
{
"epoch": 5.851063829787234,
"grad_norm": 6.735799312591553,
"learning_rate": 0.00012446808510638296,
"loss": 0.2717,
"step": 2200
},
{
"epoch": 6.0,
"eval_accuracy": 0.6586666666666666,
"eval_f1": 0.6614425775783358,
"eval_loss": 1.0333963632583618,
"eval_precision": 0.6652982915561002,
"eval_recall": 0.6589942643724156,
"eval_runtime": 34.3254,
"eval_samples_per_second": 21.85,
"eval_steps_per_second": 2.738,
"step": 2256
},
{
"epoch": 6.117021276595745,
"grad_norm": 6.987758636474609,
"learning_rate": 0.00011648936170212764,
"loss": 0.2191,
"step": 2300
},
{
"epoch": 6.382978723404255,
"grad_norm": 8.753083229064941,
"learning_rate": 0.00010851063829787234,
"loss": 0.1532,
"step": 2400
},
{
"epoch": 6.648936170212766,
"grad_norm": 6.87877082824707,
"learning_rate": 0.00010053191489361702,
"loss": 0.172,
"step": 2500
},
{
"epoch": 6.914893617021277,
"grad_norm": 8.248693466186523,
"learning_rate": 9.25531914893617e-05,
"loss": 0.188,
"step": 2600
},
{
"epoch": 7.0,
"eval_accuracy": 0.648,
"eval_f1": 0.6480604224938936,
"eval_loss": 1.3646624088287354,
"eval_precision": 0.6474310419368785,
"eval_recall": 0.6488264639189009,
"eval_runtime": 34.2754,
"eval_samples_per_second": 21.882,
"eval_steps_per_second": 2.742,
"step": 2632
},
{
"epoch": 7.180851063829787,
"grad_norm": 9.565648078918457,
"learning_rate": 8.457446808510637e-05,
"loss": 0.1344,
"step": 2700
},
{
"epoch": 7.446808510638298,
"grad_norm": 10.819112777709961,
"learning_rate": 7.659574468085105e-05,
"loss": 0.0791,
"step": 2800
},
{
"epoch": 7.712765957446808,
"grad_norm": 11.498022079467773,
"learning_rate": 6.861702127659574e-05,
"loss": 0.0917,
"step": 2900
},
{
"epoch": 7.9787234042553195,
"grad_norm": 10.500368118286133,
"learning_rate": 6.063829787234042e-05,
"loss": 0.0966,
"step": 3000
},
{
"epoch": 8.0,
"eval_accuracy": 0.6506666666666666,
"eval_f1": 0.6510622640996298,
"eval_loss": 1.7121126651763916,
"eval_precision": 0.6518027984453844,
"eval_recall": 0.6519786581299186,
"eval_runtime": 34.2979,
"eval_samples_per_second": 21.867,
"eval_steps_per_second": 2.741,
"step": 3008
},
{
"epoch": 8.24468085106383,
"grad_norm": 10.034985542297363,
"learning_rate": 5.26595744680851e-05,
"loss": 0.0589,
"step": 3100
},
{
"epoch": 8.51063829787234,
"grad_norm": 8.84762954711914,
"learning_rate": 4.468085106382978e-05,
"loss": 0.0444,
"step": 3200
},
{
"epoch": 8.77659574468085,
"grad_norm": 8.717729568481445,
"learning_rate": 3.670212765957446e-05,
"loss": 0.0492,
"step": 3300
},
{
"epoch": 9.0,
"eval_accuracy": 0.664,
"eval_f1": 0.6634717422597086,
"eval_loss": 2.092434883117676,
"eval_precision": 0.6624493384142015,
"eval_recall": 0.6648558089902628,
"eval_runtime": 34.1372,
"eval_samples_per_second": 21.97,
"eval_steps_per_second": 2.754,
"step": 3384
},
{
"epoch": 9.042553191489361,
"grad_norm": 10.311498641967773,
"learning_rate": 2.8723404255319147e-05,
"loss": 0.0461,
"step": 3400
},
{
"epoch": 9.308510638297872,
"grad_norm": 9.477750778198242,
"learning_rate": 2.0744680851063828e-05,
"loss": 0.0215,
"step": 3500
},
{
"epoch": 9.574468085106384,
"grad_norm": 10.069836616516113,
"learning_rate": 1.276595744680851e-05,
"loss": 0.0275,
"step": 3600
},
{
"epoch": 9.840425531914894,
"grad_norm": 9.309320449829102,
"learning_rate": 4.7872340425531906e-06,
"loss": 0.0198,
"step": 3700
},
{
"epoch": 10.0,
"eval_accuracy": 0.6733333333333333,
"eval_f1": 0.671461456478878,
"eval_loss": 2.349191427230835,
"eval_precision": 0.6697804784739011,
"eval_recall": 0.6742686407896491,
"eval_runtime": 34.2831,
"eval_samples_per_second": 21.877,
"eval_steps_per_second": 2.742,
"step": 3760
},
{
"epoch": 10.106382978723405,
"grad_norm": 4.254685878753662,
"learning_rate": 0.00014840425531914892,
"loss": 0.1783,
"step": 3800
},
{
"epoch": 10.372340425531915,
"grad_norm": 6.5849409103393555,
"learning_rate": 0.00014441489361702127,
"loss": 0.4254,
"step": 3900
},
{
"epoch": 10.638297872340425,
"grad_norm": 6.008872985839844,
"learning_rate": 0.0001404255319148936,
"loss": 0.3892,
"step": 4000
},
{
"epoch": 10.904255319148936,
"grad_norm": 4.886096000671387,
"learning_rate": 0.00013643617021276593,
"loss": 0.3308,
"step": 4100
},
{
"epoch": 11.0,
"eval_accuracy": 0.6413333333333333,
"eval_f1": 0.6379586335182581,
"eval_loss": 1.2812837362289429,
"eval_precision": 0.6360221570265966,
"eval_recall": 0.6426586634653862,
"eval_runtime": 34.4135,
"eval_samples_per_second": 21.794,
"eval_steps_per_second": 2.731,
"step": 4136
},
{
"epoch": 11.170212765957446,
"grad_norm": 8.697183609008789,
"learning_rate": 0.00013244680851063828,
"loss": 0.3418,
"step": 4200
},
{
"epoch": 11.436170212765958,
"grad_norm": 1.831715703010559,
"learning_rate": 0.00012845744680851063,
"loss": 0.2892,
"step": 4300
},
{
"epoch": 11.702127659574469,
"grad_norm": 5.119130611419678,
"learning_rate": 0.00012446808510638296,
"loss": 0.2597,
"step": 4400
},
{
"epoch": 11.96808510638298,
"grad_norm": 4.450387001037598,
"learning_rate": 0.00012047872340425532,
"loss": 0.2468,
"step": 4500
},
{
"epoch": 12.0,
"eval_accuracy": 0.6693333333333333,
"eval_f1": 0.6644005202701936,
"eval_loss": 1.343381404876709,
"eval_precision": 0.6702673209494008,
"eval_recall": 0.6696177137521676,
"eval_runtime": 34.1977,
"eval_samples_per_second": 21.931,
"eval_steps_per_second": 2.749,
"step": 4512
},
{
"epoch": 12.23404255319149,
"grad_norm": 0.5507918000221252,
"learning_rate": 0.00011648936170212764,
"loss": 0.161,
"step": 4600
},
{
"epoch": 12.5,
"grad_norm": 3.424229621887207,
"learning_rate": 0.0001125,
"loss": 0.1698,
"step": 4700
},
{
"epoch": 12.76595744680851,
"grad_norm": 10.304464340209961,
"learning_rate": 0.00010851063829787234,
"loss": 0.1992,
"step": 4800
},
{
"epoch": 13.0,
"eval_accuracy": 0.6773333333333333,
"eval_f1": 0.6757724702487522,
"eval_loss": 1.5172154903411865,
"eval_precision": 0.6749943823114556,
"eval_recall": 0.6780813658796853,
"eval_runtime": 34.5579,
"eval_samples_per_second": 21.703,
"eval_steps_per_second": 2.72,
"step": 4888
},
{
"epoch": 13.03191489361702,
"grad_norm": 0.58838951587677,
"learning_rate": 0.00010452127659574466,
"loss": 0.153,
"step": 4900
},
{
"epoch": 13.297872340425531,
"grad_norm": 4.339015007019043,
"learning_rate": 0.00010053191489361702,
"loss": 0.0729,
"step": 5000
},
{
"epoch": 13.563829787234042,
"grad_norm": 5.353603839874268,
"learning_rate": 9.654255319148935e-05,
"loss": 0.1182,
"step": 5100
},
{
"epoch": 13.829787234042554,
"grad_norm": 0.11711510270833969,
"learning_rate": 9.25531914893617e-05,
"loss": 0.1391,
"step": 5200
},
{
"epoch": 14.0,
"eval_accuracy": 0.688,
"eval_f1": 0.6861192542213884,
"eval_loss": 1.8793449401855469,
"eval_precision": 0.6887025095519409,
"eval_recall": 0.6882689075630252,
"eval_runtime": 35.0719,
"eval_samples_per_second": 21.385,
"eval_steps_per_second": 2.68,
"step": 5264
},
{
"epoch": 14.095744680851064,
"grad_norm": 16.545928955078125,
"learning_rate": 8.856382978723404e-05,
"loss": 0.0919,
"step": 5300
},
{
"epoch": 14.361702127659575,
"grad_norm": 0.11285369098186493,
"learning_rate": 8.457446808510637e-05,
"loss": 0.0724,
"step": 5400
},
{
"epoch": 14.627659574468085,
"grad_norm": 1.0866966247558594,
"learning_rate": 8.058510638297872e-05,
"loss": 0.0615,
"step": 5500
},
{
"epoch": 14.893617021276595,
"grad_norm": 0.0466451533138752,
"learning_rate": 7.659574468085105e-05,
"loss": 0.062,
"step": 5600
},
{
"epoch": 15.0,
"eval_accuracy": 0.6826666666666666,
"eval_f1": 0.6784899933134545,
"eval_loss": 2.1949727535247803,
"eval_precision": 0.6780378052909217,
"eval_recall": 0.6832503668133919,
"eval_runtime": 34.2882,
"eval_samples_per_second": 21.873,
"eval_steps_per_second": 2.741,
"step": 5640
},
{
"epoch": 15.159574468085106,
"grad_norm": 1.6346490383148193,
"learning_rate": 7.26063829787234e-05,
"loss": 0.0477,
"step": 5700
},
{
"epoch": 15.425531914893616,
"grad_norm": 0.004013681318610907,
"learning_rate": 6.861702127659574e-05,
"loss": 0.0484,
"step": 5800
},
{
"epoch": 15.691489361702128,
"grad_norm": 0.6463019847869873,
"learning_rate": 6.462765957446807e-05,
"loss": 0.0389,
"step": 5900
},
{
"epoch": 15.957446808510639,
"grad_norm": 0.4977071285247803,
"learning_rate": 6.063829787234042e-05,
"loss": 0.0392,
"step": 6000
},
{
"epoch": 16.0,
"eval_accuracy": 0.6626666666666666,
"eval_f1": 0.6599147266673199,
"eval_loss": 2.2853217124938965,
"eval_precision": 0.6593416101537274,
"eval_recall": 0.6640821661998133,
"eval_runtime": 34.2142,
"eval_samples_per_second": 21.921,
"eval_steps_per_second": 2.747,
"step": 6016
},
{
"epoch": 16.22340425531915,
"grad_norm": 0.18373289704322815,
"learning_rate": 5.6648936170212766e-05,
"loss": 0.035,
"step": 6100
},
{
"epoch": 16.48936170212766,
"grad_norm": 11.437871932983398,
"learning_rate": 5.26595744680851e-05,
"loss": 0.0323,
"step": 6200
},
{
"epoch": 16.75531914893617,
"grad_norm": 0.04929669201374054,
"learning_rate": 4.867021276595744e-05,
"loss": 0.0286,
"step": 6300
},
{
"epoch": 17.0,
"eval_accuracy": 0.6706666666666666,
"eval_f1": 0.6665467903773287,
"eval_loss": 2.356433153152466,
"eval_precision": 0.6646925224621034,
"eval_recall": 0.6718655462184874,
"eval_runtime": 34.2754,
"eval_samples_per_second": 21.882,
"eval_steps_per_second": 2.742,
"step": 6392
},
{
"epoch": 17.02127659574468,
"grad_norm": 0.003023180877789855,
"learning_rate": 4.468085106382978e-05,
"loss": 0.0232,
"step": 6400
},
{
"epoch": 17.28723404255319,
"grad_norm": 0.009346798993647099,
"learning_rate": 4.069148936170212e-05,
"loss": 0.0195,
"step": 6500
},
{
"epoch": 17.5531914893617,
"grad_norm": 0.0396982878446579,
"learning_rate": 3.670212765957446e-05,
"loss": 0.02,
"step": 6600
},
{
"epoch": 17.819148936170212,
"grad_norm": 0.014023613184690475,
"learning_rate": 3.271276595744681e-05,
"loss": 0.025,
"step": 6700
},
{
"epoch": 18.0,
"eval_accuracy": 0.676,
"eval_f1": 0.6721775597644245,
"eval_loss": 2.5857865810394287,
"eval_precision": 0.6713915090790397,
"eval_recall": 0.6767165532879819,
"eval_runtime": 34.2491,
"eval_samples_per_second": 21.898,
"eval_steps_per_second": 2.745,
"step": 6768
},
{
"epoch": 18.085106382978722,
"grad_norm": 0.47041425108909607,
"learning_rate": 2.8723404255319147e-05,
"loss": 0.0202,
"step": 6800
},
{
"epoch": 18.351063829787233,
"grad_norm": 0.06168466433882713,
"learning_rate": 2.4734042553191484e-05,
"loss": 0.019,
"step": 6900
},
{
"epoch": 18.617021276595743,
"grad_norm": 0.0014604219468310475,
"learning_rate": 2.0744680851063828e-05,
"loss": 0.0178,
"step": 7000
},
{
"epoch": 18.882978723404257,
"grad_norm": 0.0028070323169231415,
"learning_rate": 1.675531914893617e-05,
"loss": 0.0214,
"step": 7100
},
{
"epoch": 19.0,
"eval_accuracy": 0.668,
"eval_f1": 0.6658631291530153,
"eval_loss": 2.6788387298583984,
"eval_precision": 0.6642259414225942,
"eval_recall": 0.6688787515006003,
"eval_runtime": 34.249,
"eval_samples_per_second": 21.898,
"eval_steps_per_second": 2.745,
"step": 7144
},
{
"epoch": 19.148936170212767,
"grad_norm": 0.0015123536577448249,
"learning_rate": 1.276595744680851e-05,
"loss": 0.0204,
"step": 7200
},
{
"epoch": 19.414893617021278,
"grad_norm": 0.0018817168893292546,
"learning_rate": 8.77659574468085e-06,
"loss": 0.0141,
"step": 7300
},
{
"epoch": 19.680851063829788,
"grad_norm": 0.6008268594741821,
"learning_rate": 4.7872340425531906e-06,
"loss": 0.0203,
"step": 7400
},
{
"epoch": 19.9468085106383,
"grad_norm": 1.0317440032958984,
"learning_rate": 7.978723404255318e-07,
"loss": 0.0155,
"step": 7500
},
{
"epoch": 20.0,
"eval_accuracy": 0.672,
"eval_f1": 0.6695170440310667,
"eval_loss": 2.725527048110962,
"eval_precision": 0.66779120407287,
"eval_recall": 0.6728787515006003,
"eval_runtime": 34.5602,
"eval_samples_per_second": 21.701,
"eval_steps_per_second": 2.72,
"step": 7520
}
],
"logging_steps": 100,
"max_steps": 7520,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.288864877974981e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}