camembertav2-base-ftb-ner / trainer_state.json
wissamantoun's picture
Upload folder using huggingface_hub
a00b3dc verified
{
"best_metric": 0.939655172413793,
"best_model_checkpoint": "/scratch/camembertv2/runs/results/ftb_ner/camembertav2-base-bf16-p2-17000/max_seq_length-192-gradient_accumulation_steps-2-precision-fp32-learning_rate-5.000000000000001e-05-epochs-8-lr_scheduler-linear-warmup_steps-0.1/SEED-42/checkpoint-4944",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 4944,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16181229773462782,
"grad_norm": 0.7081506848335266,
"learning_rate": 1.0101010101010103e-05,
"loss": 1.713,
"step": 100
},
{
"epoch": 0.32362459546925565,
"grad_norm": 0.6198400855064392,
"learning_rate": 2.0202020202020206e-05,
"loss": 0.1665,
"step": 200
},
{
"epoch": 0.4854368932038835,
"grad_norm": 0.813007652759552,
"learning_rate": 3.030303030303031e-05,
"loss": 0.0816,
"step": 300
},
{
"epoch": 0.6472491909385113,
"grad_norm": 0.9836126565933228,
"learning_rate": 4.040404040404041e-05,
"loss": 0.0487,
"step": 400
},
{
"epoch": 0.8090614886731392,
"grad_norm": 0.2043960988521576,
"learning_rate": 4.9943807597212865e-05,
"loss": 0.0415,
"step": 500
},
{
"epoch": 0.970873786407767,
"grad_norm": 0.6549485921859741,
"learning_rate": 4.8819959541470004e-05,
"loss": 0.0435,
"step": 600
},
{
"epoch": 1.0,
"eval_accuracy": 0.9896461049632957,
"eval_f1": 0.8642940946421588,
"eval_loss": 0.04151414707303047,
"eval_precision": 0.8599221789883269,
"eval_recall": 0.8687106918238994,
"eval_runtime": 4.2561,
"eval_samples_per_second": 290.171,
"eval_steps_per_second": 36.418,
"step": 618
},
{
"epoch": 1.132686084142395,
"grad_norm": 0.905681848526001,
"learning_rate": 4.7696111485727136e-05,
"loss": 0.0221,
"step": 700
},
{
"epoch": 1.2944983818770226,
"grad_norm": 0.4660435616970062,
"learning_rate": 4.6572263429984275e-05,
"loss": 0.029,
"step": 800
},
{
"epoch": 1.4563106796116505,
"grad_norm": 0.4658781886100769,
"learning_rate": 4.544841537424141e-05,
"loss": 0.0265,
"step": 900
},
{
"epoch": 1.6181229773462782,
"grad_norm": 1.3315564393997192,
"learning_rate": 4.4324567318498546e-05,
"loss": 0.022,
"step": 1000
},
{
"epoch": 1.779935275080906,
"grad_norm": 0.08498021215200424,
"learning_rate": 4.3200719262755685e-05,
"loss": 0.025,
"step": 1100
},
{
"epoch": 1.941747572815534,
"grad_norm": 0.4140647351741791,
"learning_rate": 4.207687120701282e-05,
"loss": 0.0386,
"step": 1200
},
{
"epoch": 2.0,
"eval_accuracy": 0.9920017530404295,
"eval_f1": 0.8975457732761979,
"eval_loss": 0.035175956785678864,
"eval_precision": 0.8895752895752895,
"eval_recall": 0.9056603773584906,
"eval_runtime": 3.7654,
"eval_samples_per_second": 327.989,
"eval_steps_per_second": 41.165,
"step": 1236
},
{
"epoch": 2.103559870550162,
"grad_norm": 0.43421319127082825,
"learning_rate": 4.0953023151269956e-05,
"loss": 0.0249,
"step": 1300
},
{
"epoch": 2.26537216828479,
"grad_norm": 0.2701377272605896,
"learning_rate": 3.9829175095527095e-05,
"loss": 0.0191,
"step": 1400
},
{
"epoch": 2.4271844660194173,
"grad_norm": 0.11492807418107986,
"learning_rate": 3.870532703978423e-05,
"loss": 0.0136,
"step": 1500
},
{
"epoch": 2.588996763754045,
"grad_norm": 0.582848072052002,
"learning_rate": 3.758147898404136e-05,
"loss": 0.0258,
"step": 1600
},
{
"epoch": 2.750809061488673,
"grad_norm": 0.2515222132205963,
"learning_rate": 3.6457630928298505e-05,
"loss": 0.0169,
"step": 1700
},
{
"epoch": 2.912621359223301,
"grad_norm": 0.36368367075920105,
"learning_rate": 3.533378287255564e-05,
"loss": 0.0123,
"step": 1800
},
{
"epoch": 3.0,
"eval_accuracy": 0.9929330557685987,
"eval_f1": 0.9168309026409145,
"eval_loss": 0.030728859826922417,
"eval_precision": 0.9193675889328063,
"eval_recall": 0.914308176100629,
"eval_runtime": 3.7477,
"eval_samples_per_second": 329.532,
"eval_steps_per_second": 41.358,
"step": 1854
},
{
"epoch": 3.074433656957929,
"grad_norm": 0.3048587739467621,
"learning_rate": 3.420993481681277e-05,
"loss": 0.0128,
"step": 1900
},
{
"epoch": 3.236245954692557,
"grad_norm": 0.01976470835506916,
"learning_rate": 3.308608676106991e-05,
"loss": 0.0145,
"step": 2000
},
{
"epoch": 3.3980582524271843,
"grad_norm": 0.17214246094226837,
"learning_rate": 3.196223870532705e-05,
"loss": 0.0092,
"step": 2100
},
{
"epoch": 3.559870550161812,
"grad_norm": 0.2264121174812317,
"learning_rate": 3.083839064958418e-05,
"loss": 0.0111,
"step": 2200
},
{
"epoch": 3.72168284789644,
"grad_norm": 0.13608065247535706,
"learning_rate": 2.971454259384132e-05,
"loss": 0.0124,
"step": 2300
},
{
"epoch": 3.883495145631068,
"grad_norm": 0.027897851541638374,
"learning_rate": 2.8590694538098453e-05,
"loss": 0.0136,
"step": 2400
},
{
"epoch": 4.0,
"eval_accuracy": 0.9914265366495015,
"eval_f1": 0.9107497024990083,
"eval_loss": 0.03449518606066704,
"eval_precision": 0.9191353082465973,
"eval_recall": 0.9025157232704403,
"eval_runtime": 3.7618,
"eval_samples_per_second": 328.3,
"eval_steps_per_second": 41.204,
"step": 2472
},
{
"epoch": 4.0453074433656955,
"grad_norm": 0.18220163881778717,
"learning_rate": 2.746684648235559e-05,
"loss": 0.0088,
"step": 2500
},
{
"epoch": 4.207119741100324,
"grad_norm": 0.016186678782105446,
"learning_rate": 2.6342998426612728e-05,
"loss": 0.0087,
"step": 2600
},
{
"epoch": 4.368932038834951,
"grad_norm": 0.18213553726673126,
"learning_rate": 2.5219150370869863e-05,
"loss": 0.0051,
"step": 2700
},
{
"epoch": 4.53074433656958,
"grad_norm": 0.012032161466777325,
"learning_rate": 2.4095302315127e-05,
"loss": 0.0081,
"step": 2800
},
{
"epoch": 4.692556634304207,
"grad_norm": 0.0591534860432148,
"learning_rate": 2.2971454259384134e-05,
"loss": 0.0042,
"step": 2900
},
{
"epoch": 4.854368932038835,
"grad_norm": 0.3236534893512726,
"learning_rate": 2.1847606203641273e-05,
"loss": 0.009,
"step": 3000
},
{
"epoch": 5.0,
"eval_accuracy": 0.9935904459296593,
"eval_f1": 0.9192352711666016,
"eval_loss": 0.029976682737469673,
"eval_precision": 0.9124709527498064,
"eval_recall": 0.9261006289308176,
"eval_runtime": 3.7424,
"eval_samples_per_second": 330.006,
"eval_steps_per_second": 41.418,
"step": 3090
},
{
"epoch": 5.016181229773463,
"grad_norm": 0.006900902837514877,
"learning_rate": 2.072375814789841e-05,
"loss": 0.0089,
"step": 3100
},
{
"epoch": 5.17799352750809,
"grad_norm": 0.10268358141183853,
"learning_rate": 1.9599910092155544e-05,
"loss": 0.003,
"step": 3200
},
{
"epoch": 5.339805825242719,
"grad_norm": 0.008188015781342983,
"learning_rate": 1.847606203641268e-05,
"loss": 0.003,
"step": 3300
},
{
"epoch": 5.501618122977346,
"grad_norm": 0.159342423081398,
"learning_rate": 1.735221398066982e-05,
"loss": 0.0027,
"step": 3400
},
{
"epoch": 5.663430420711974,
"grad_norm": 0.4539357125759125,
"learning_rate": 1.622836592492695e-05,
"loss": 0.0042,
"step": 3500
},
{
"epoch": 5.825242718446602,
"grad_norm": 0.038250233978033066,
"learning_rate": 1.510451786918409e-05,
"loss": 0.0039,
"step": 3600
},
{
"epoch": 5.9870550161812295,
"grad_norm": 0.004649197217077017,
"learning_rate": 1.3980669813441227e-05,
"loss": 0.0038,
"step": 3700
},
{
"epoch": 6.0,
"eval_accuracy": 0.9935630546729484,
"eval_f1": 0.9324960753532182,
"eval_loss": 0.033860765397548676,
"eval_precision": 0.9310344827586207,
"eval_recall": 0.9339622641509434,
"eval_runtime": 3.7552,
"eval_samples_per_second": 328.879,
"eval_steps_per_second": 41.276,
"step": 3708
},
{
"epoch": 6.148867313915858,
"grad_norm": 0.20126421749591827,
"learning_rate": 1.285682175769836e-05,
"loss": 0.0016,
"step": 3800
},
{
"epoch": 6.310679611650485,
"grad_norm": 0.006144899874925613,
"learning_rate": 1.1732973701955498e-05,
"loss": 0.0024,
"step": 3900
},
{
"epoch": 6.472491909385114,
"grad_norm": 1.5297178030014038,
"learning_rate": 1.0609125646212633e-05,
"loss": 0.0022,
"step": 4000
},
{
"epoch": 6.634304207119741,
"grad_norm": 0.04670717939734459,
"learning_rate": 9.48527759046977e-06,
"loss": 0.002,
"step": 4100
},
{
"epoch": 6.796116504854369,
"grad_norm": 0.028466541320085526,
"learning_rate": 8.361429534726907e-06,
"loss": 0.0018,
"step": 4200
},
{
"epoch": 6.957928802588997,
"grad_norm": 0.09033439308404922,
"learning_rate": 7.237581478984042e-06,
"loss": 0.0019,
"step": 4300
},
{
"epoch": 7.0,
"eval_accuracy": 0.9936726196997918,
"eval_f1": 0.9328125,
"eval_loss": 0.03374877944588661,
"eval_precision": 0.9270186335403726,
"eval_recall": 0.9386792452830188,
"eval_runtime": 3.7709,
"eval_samples_per_second": 327.506,
"eval_steps_per_second": 41.104,
"step": 4326
},
{
"epoch": 7.119741100323624,
"grad_norm": 0.020341284573078156,
"learning_rate": 6.113733423241179e-06,
"loss": 0.0017,
"step": 4400
},
{
"epoch": 7.281553398058253,
"grad_norm": 0.026211928576231003,
"learning_rate": 4.989885367498316e-06,
"loss": 0.0009,
"step": 4500
},
{
"epoch": 7.44336569579288,
"grad_norm": 0.0029867186676710844,
"learning_rate": 3.866037311755451e-06,
"loss": 0.0014,
"step": 4600
},
{
"epoch": 7.605177993527509,
"grad_norm": 0.014907300472259521,
"learning_rate": 2.742189256012588e-06,
"loss": 0.0014,
"step": 4700
},
{
"epoch": 7.766990291262136,
"grad_norm": 0.12841272354125977,
"learning_rate": 1.6183412002697239e-06,
"loss": 0.0013,
"step": 4800
},
{
"epoch": 7.9288025889967635,
"grad_norm": 0.0018621513154357672,
"learning_rate": 4.9449314452686e-07,
"loss": 0.0013,
"step": 4900
},
{
"epoch": 8.0,
"eval_accuracy": 0.9941108798071655,
"eval_f1": 0.939655172413793,
"eval_loss": 0.03547634929418564,
"eval_precision": 0.93671875,
"eval_recall": 0.9426100628930818,
"eval_runtime": 3.7628,
"eval_samples_per_second": 328.211,
"eval_steps_per_second": 41.192,
"step": 4944
},
{
"epoch": 8.0,
"step": 4944,
"total_flos": 2834249641269840.0,
"train_loss": 0.05133420664178129,
"train_runtime": 849.5258,
"train_samples_per_second": 93.05,
"train_steps_per_second": 5.82
}
],
"logging_steps": 100,
"max_steps": 4944,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2834249641269840.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}