|
{ |
|
"best_metric": 0.939655172413793, |
|
"best_model_checkpoint": "/scratch/camembertv2/runs/results/ftb_ner/camembertav2-base-bf16-p2-17000/max_seq_length-192-gradient_accumulation_steps-2-precision-fp32-learning_rate-5.000000000000001e-05-epochs-8-lr_scheduler-linear-warmup_steps-0.1/SEED-42/checkpoint-4944", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 4944, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16181229773462782, |
|
"grad_norm": 0.7081506848335266, |
|
"learning_rate": 1.0101010101010103e-05, |
|
"loss": 1.713, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32362459546925565, |
|
"grad_norm": 0.6198400855064392, |
|
"learning_rate": 2.0202020202020206e-05, |
|
"loss": 0.1665, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4854368932038835, |
|
"grad_norm": 0.813007652759552, |
|
"learning_rate": 3.030303030303031e-05, |
|
"loss": 0.0816, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6472491909385113, |
|
"grad_norm": 0.9836126565933228, |
|
"learning_rate": 4.040404040404041e-05, |
|
"loss": 0.0487, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8090614886731392, |
|
"grad_norm": 0.2043960988521576, |
|
"learning_rate": 4.9943807597212865e-05, |
|
"loss": 0.0415, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.970873786407767, |
|
"grad_norm": 0.6549485921859741, |
|
"learning_rate": 4.8819959541470004e-05, |
|
"loss": 0.0435, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9896461049632957, |
|
"eval_f1": 0.8642940946421588, |
|
"eval_loss": 0.04151414707303047, |
|
"eval_precision": 0.8599221789883269, |
|
"eval_recall": 0.8687106918238994, |
|
"eval_runtime": 4.2561, |
|
"eval_samples_per_second": 290.171, |
|
"eval_steps_per_second": 36.418, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.132686084142395, |
|
"grad_norm": 0.905681848526001, |
|
"learning_rate": 4.7696111485727136e-05, |
|
"loss": 0.0221, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2944983818770226, |
|
"grad_norm": 0.4660435616970062, |
|
"learning_rate": 4.6572263429984275e-05, |
|
"loss": 0.029, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4563106796116505, |
|
"grad_norm": 0.4658781886100769, |
|
"learning_rate": 4.544841537424141e-05, |
|
"loss": 0.0265, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6181229773462782, |
|
"grad_norm": 1.3315564393997192, |
|
"learning_rate": 4.4324567318498546e-05, |
|
"loss": 0.022, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.779935275080906, |
|
"grad_norm": 0.08498021215200424, |
|
"learning_rate": 4.3200719262755685e-05, |
|
"loss": 0.025, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.941747572815534, |
|
"grad_norm": 0.4140647351741791, |
|
"learning_rate": 4.207687120701282e-05, |
|
"loss": 0.0386, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9920017530404295, |
|
"eval_f1": 0.8975457732761979, |
|
"eval_loss": 0.035175956785678864, |
|
"eval_precision": 0.8895752895752895, |
|
"eval_recall": 0.9056603773584906, |
|
"eval_runtime": 3.7654, |
|
"eval_samples_per_second": 327.989, |
|
"eval_steps_per_second": 41.165, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 2.103559870550162, |
|
"grad_norm": 0.43421319127082825, |
|
"learning_rate": 4.0953023151269956e-05, |
|
"loss": 0.0249, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.26537216828479, |
|
"grad_norm": 0.2701377272605896, |
|
"learning_rate": 3.9829175095527095e-05, |
|
"loss": 0.0191, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.4271844660194173, |
|
"grad_norm": 0.11492807418107986, |
|
"learning_rate": 3.870532703978423e-05, |
|
"loss": 0.0136, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.588996763754045, |
|
"grad_norm": 0.582848072052002, |
|
"learning_rate": 3.758147898404136e-05, |
|
"loss": 0.0258, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.750809061488673, |
|
"grad_norm": 0.2515222132205963, |
|
"learning_rate": 3.6457630928298505e-05, |
|
"loss": 0.0169, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.912621359223301, |
|
"grad_norm": 0.36368367075920105, |
|
"learning_rate": 3.533378287255564e-05, |
|
"loss": 0.0123, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9929330557685987, |
|
"eval_f1": 0.9168309026409145, |
|
"eval_loss": 0.030728859826922417, |
|
"eval_precision": 0.9193675889328063, |
|
"eval_recall": 0.914308176100629, |
|
"eval_runtime": 3.7477, |
|
"eval_samples_per_second": 329.532, |
|
"eval_steps_per_second": 41.358, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 3.074433656957929, |
|
"grad_norm": 0.3048587739467621, |
|
"learning_rate": 3.420993481681277e-05, |
|
"loss": 0.0128, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.236245954692557, |
|
"grad_norm": 0.01976470835506916, |
|
"learning_rate": 3.308608676106991e-05, |
|
"loss": 0.0145, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.3980582524271843, |
|
"grad_norm": 0.17214246094226837, |
|
"learning_rate": 3.196223870532705e-05, |
|
"loss": 0.0092, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.559870550161812, |
|
"grad_norm": 0.2264121174812317, |
|
"learning_rate": 3.083839064958418e-05, |
|
"loss": 0.0111, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.72168284789644, |
|
"grad_norm": 0.13608065247535706, |
|
"learning_rate": 2.971454259384132e-05, |
|
"loss": 0.0124, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.883495145631068, |
|
"grad_norm": 0.027897851541638374, |
|
"learning_rate": 2.8590694538098453e-05, |
|
"loss": 0.0136, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9914265366495015, |
|
"eval_f1": 0.9107497024990083, |
|
"eval_loss": 0.03449518606066704, |
|
"eval_precision": 0.9191353082465973, |
|
"eval_recall": 0.9025157232704403, |
|
"eval_runtime": 3.7618, |
|
"eval_samples_per_second": 328.3, |
|
"eval_steps_per_second": 41.204, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 4.0453074433656955, |
|
"grad_norm": 0.18220163881778717, |
|
"learning_rate": 2.746684648235559e-05, |
|
"loss": 0.0088, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.207119741100324, |
|
"grad_norm": 0.016186678782105446, |
|
"learning_rate": 2.6342998426612728e-05, |
|
"loss": 0.0087, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.368932038834951, |
|
"grad_norm": 0.18213553726673126, |
|
"learning_rate": 2.5219150370869863e-05, |
|
"loss": 0.0051, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.53074433656958, |
|
"grad_norm": 0.012032161466777325, |
|
"learning_rate": 2.4095302315127e-05, |
|
"loss": 0.0081, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.692556634304207, |
|
"grad_norm": 0.0591534860432148, |
|
"learning_rate": 2.2971454259384134e-05, |
|
"loss": 0.0042, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.854368932038835, |
|
"grad_norm": 0.3236534893512726, |
|
"learning_rate": 2.1847606203641273e-05, |
|
"loss": 0.009, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9935904459296593, |
|
"eval_f1": 0.9192352711666016, |
|
"eval_loss": 0.029976682737469673, |
|
"eval_precision": 0.9124709527498064, |
|
"eval_recall": 0.9261006289308176, |
|
"eval_runtime": 3.7424, |
|
"eval_samples_per_second": 330.006, |
|
"eval_steps_per_second": 41.418, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 5.016181229773463, |
|
"grad_norm": 0.006900902837514877, |
|
"learning_rate": 2.072375814789841e-05, |
|
"loss": 0.0089, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.17799352750809, |
|
"grad_norm": 0.10268358141183853, |
|
"learning_rate": 1.9599910092155544e-05, |
|
"loss": 0.003, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.339805825242719, |
|
"grad_norm": 0.008188015781342983, |
|
"learning_rate": 1.847606203641268e-05, |
|
"loss": 0.003, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.501618122977346, |
|
"grad_norm": 0.159342423081398, |
|
"learning_rate": 1.735221398066982e-05, |
|
"loss": 0.0027, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.663430420711974, |
|
"grad_norm": 0.4539357125759125, |
|
"learning_rate": 1.622836592492695e-05, |
|
"loss": 0.0042, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.825242718446602, |
|
"grad_norm": 0.038250233978033066, |
|
"learning_rate": 1.510451786918409e-05, |
|
"loss": 0.0039, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.9870550161812295, |
|
"grad_norm": 0.004649197217077017, |
|
"learning_rate": 1.3980669813441227e-05, |
|
"loss": 0.0038, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9935630546729484, |
|
"eval_f1": 0.9324960753532182, |
|
"eval_loss": 0.033860765397548676, |
|
"eval_precision": 0.9310344827586207, |
|
"eval_recall": 0.9339622641509434, |
|
"eval_runtime": 3.7552, |
|
"eval_samples_per_second": 328.879, |
|
"eval_steps_per_second": 41.276, |
|
"step": 3708 |
|
}, |
|
{ |
|
"epoch": 6.148867313915858, |
|
"grad_norm": 0.20126421749591827, |
|
"learning_rate": 1.285682175769836e-05, |
|
"loss": 0.0016, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.310679611650485, |
|
"grad_norm": 0.006144899874925613, |
|
"learning_rate": 1.1732973701955498e-05, |
|
"loss": 0.0024, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.472491909385114, |
|
"grad_norm": 1.5297178030014038, |
|
"learning_rate": 1.0609125646212633e-05, |
|
"loss": 0.0022, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.634304207119741, |
|
"grad_norm": 0.04670717939734459, |
|
"learning_rate": 9.48527759046977e-06, |
|
"loss": 0.002, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.796116504854369, |
|
"grad_norm": 0.028466541320085526, |
|
"learning_rate": 8.361429534726907e-06, |
|
"loss": 0.0018, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.957928802588997, |
|
"grad_norm": 0.09033439308404922, |
|
"learning_rate": 7.237581478984042e-06, |
|
"loss": 0.0019, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9936726196997918, |
|
"eval_f1": 0.9328125, |
|
"eval_loss": 0.03374877944588661, |
|
"eval_precision": 0.9270186335403726, |
|
"eval_recall": 0.9386792452830188, |
|
"eval_runtime": 3.7709, |
|
"eval_samples_per_second": 327.506, |
|
"eval_steps_per_second": 41.104, |
|
"step": 4326 |
|
}, |
|
{ |
|
"epoch": 7.119741100323624, |
|
"grad_norm": 0.020341284573078156, |
|
"learning_rate": 6.113733423241179e-06, |
|
"loss": 0.0017, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.281553398058253, |
|
"grad_norm": 0.026211928576231003, |
|
"learning_rate": 4.989885367498316e-06, |
|
"loss": 0.0009, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.44336569579288, |
|
"grad_norm": 0.0029867186676710844, |
|
"learning_rate": 3.866037311755451e-06, |
|
"loss": 0.0014, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.605177993527509, |
|
"grad_norm": 0.014907300472259521, |
|
"learning_rate": 2.742189256012588e-06, |
|
"loss": 0.0014, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.766990291262136, |
|
"grad_norm": 0.12841272354125977, |
|
"learning_rate": 1.6183412002697239e-06, |
|
"loss": 0.0013, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.9288025889967635, |
|
"grad_norm": 0.0018621513154357672, |
|
"learning_rate": 4.9449314452686e-07, |
|
"loss": 0.0013, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9941108798071655, |
|
"eval_f1": 0.939655172413793, |
|
"eval_loss": 0.03547634929418564, |
|
"eval_precision": 0.93671875, |
|
"eval_recall": 0.9426100628930818, |
|
"eval_runtime": 3.7628, |
|
"eval_samples_per_second": 328.211, |
|
"eval_steps_per_second": 41.192, |
|
"step": 4944 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 4944, |
|
"total_flos": 2834249641269840.0, |
|
"train_loss": 0.05133420664178129, |
|
"train_runtime": 849.5258, |
|
"train_samples_per_second": 93.05, |
|
"train_steps_per_second": 5.82 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4944, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2834249641269840.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|