{ "best_metric": 0.939655172413793, "best_model_checkpoint": "/scratch/camembertv2/runs/results/ftb_ner/camembertav2-base-bf16-p2-17000/max_seq_length-192-gradient_accumulation_steps-2-precision-fp32-learning_rate-5.000000000000001e-05-epochs-8-lr_scheduler-linear-warmup_steps-0.1/SEED-42/checkpoint-4944", "epoch": 8.0, "eval_steps": 500, "global_step": 4944, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16181229773462782, "grad_norm": 0.7081506848335266, "learning_rate": 1.0101010101010103e-05, "loss": 1.713, "step": 100 }, { "epoch": 0.32362459546925565, "grad_norm": 0.6198400855064392, "learning_rate": 2.0202020202020206e-05, "loss": 0.1665, "step": 200 }, { "epoch": 0.4854368932038835, "grad_norm": 0.813007652759552, "learning_rate": 3.030303030303031e-05, "loss": 0.0816, "step": 300 }, { "epoch": 0.6472491909385113, "grad_norm": 0.9836126565933228, "learning_rate": 4.040404040404041e-05, "loss": 0.0487, "step": 400 }, { "epoch": 0.8090614886731392, "grad_norm": 0.2043960988521576, "learning_rate": 4.9943807597212865e-05, "loss": 0.0415, "step": 500 }, { "epoch": 0.970873786407767, "grad_norm": 0.6549485921859741, "learning_rate": 4.8819959541470004e-05, "loss": 0.0435, "step": 600 }, { "epoch": 1.0, "eval_accuracy": 0.9896461049632957, "eval_f1": 0.8642940946421588, "eval_loss": 0.04151414707303047, "eval_precision": 0.8599221789883269, "eval_recall": 0.8687106918238994, "eval_runtime": 4.2561, "eval_samples_per_second": 290.171, "eval_steps_per_second": 36.418, "step": 618 }, { "epoch": 1.132686084142395, "grad_norm": 0.905681848526001, "learning_rate": 4.7696111485727136e-05, "loss": 0.0221, "step": 700 }, { "epoch": 1.2944983818770226, "grad_norm": 0.4660435616970062, "learning_rate": 4.6572263429984275e-05, "loss": 0.029, "step": 800 }, { "epoch": 1.4563106796116505, "grad_norm": 0.4658781886100769, "learning_rate": 4.544841537424141e-05, "loss": 0.0265, "step": 900 }, { "epoch": 1.6181229773462782, "grad_norm": 1.3315564393997192, "learning_rate": 4.4324567318498546e-05, "loss": 0.022, "step": 1000 }, { "epoch": 1.779935275080906, "grad_norm": 0.08498021215200424, "learning_rate": 4.3200719262755685e-05, "loss": 0.025, "step": 1100 }, { "epoch": 1.941747572815534, "grad_norm": 0.4140647351741791, "learning_rate": 4.207687120701282e-05, "loss": 0.0386, "step": 1200 }, { "epoch": 2.0, "eval_accuracy": 0.9920017530404295, "eval_f1": 0.8975457732761979, "eval_loss": 0.035175956785678864, "eval_precision": 0.8895752895752895, "eval_recall": 0.9056603773584906, "eval_runtime": 3.7654, "eval_samples_per_second": 327.989, "eval_steps_per_second": 41.165, "step": 1236 }, { "epoch": 2.103559870550162, "grad_norm": 0.43421319127082825, "learning_rate": 4.0953023151269956e-05, "loss": 0.0249, "step": 1300 }, { "epoch": 2.26537216828479, "grad_norm": 0.2701377272605896, "learning_rate": 3.9829175095527095e-05, "loss": 0.0191, "step": 1400 }, { "epoch": 2.4271844660194173, "grad_norm": 0.11492807418107986, "learning_rate": 3.870532703978423e-05, "loss": 0.0136, "step": 1500 }, { "epoch": 2.588996763754045, "grad_norm": 0.582848072052002, "learning_rate": 3.758147898404136e-05, "loss": 0.0258, "step": 1600 }, { "epoch": 2.750809061488673, "grad_norm": 0.2515222132205963, "learning_rate": 3.6457630928298505e-05, "loss": 0.0169, "step": 1700 }, { "epoch": 2.912621359223301, "grad_norm": 0.36368367075920105, "learning_rate": 3.533378287255564e-05, "loss": 0.0123, "step": 1800 }, { "epoch": 3.0, "eval_accuracy": 0.9929330557685987, "eval_f1": 0.9168309026409145, "eval_loss": 0.030728859826922417, "eval_precision": 0.9193675889328063, "eval_recall": 0.914308176100629, "eval_runtime": 3.7477, "eval_samples_per_second": 329.532, "eval_steps_per_second": 41.358, "step": 1854 }, { "epoch": 3.074433656957929, "grad_norm": 0.3048587739467621, "learning_rate": 3.420993481681277e-05, "loss": 0.0128, "step": 1900 }, { "epoch": 3.236245954692557, "grad_norm": 0.01976470835506916, "learning_rate": 3.308608676106991e-05, "loss": 0.0145, "step": 2000 }, { "epoch": 3.3980582524271843, "grad_norm": 0.17214246094226837, "learning_rate": 3.196223870532705e-05, "loss": 0.0092, "step": 2100 }, { "epoch": 3.559870550161812, "grad_norm": 0.2264121174812317, "learning_rate": 3.083839064958418e-05, "loss": 0.0111, "step": 2200 }, { "epoch": 3.72168284789644, "grad_norm": 0.13608065247535706, "learning_rate": 2.971454259384132e-05, "loss": 0.0124, "step": 2300 }, { "epoch": 3.883495145631068, "grad_norm": 0.027897851541638374, "learning_rate": 2.8590694538098453e-05, "loss": 0.0136, "step": 2400 }, { "epoch": 4.0, "eval_accuracy": 0.9914265366495015, "eval_f1": 0.9107497024990083, "eval_loss": 0.03449518606066704, "eval_precision": 0.9191353082465973, "eval_recall": 0.9025157232704403, "eval_runtime": 3.7618, "eval_samples_per_second": 328.3, "eval_steps_per_second": 41.204, "step": 2472 }, { "epoch": 4.0453074433656955, "grad_norm": 0.18220163881778717, "learning_rate": 2.746684648235559e-05, "loss": 0.0088, "step": 2500 }, { "epoch": 4.207119741100324, "grad_norm": 0.016186678782105446, "learning_rate": 2.6342998426612728e-05, "loss": 0.0087, "step": 2600 }, { "epoch": 4.368932038834951, "grad_norm": 0.18213553726673126, "learning_rate": 2.5219150370869863e-05, "loss": 0.0051, "step": 2700 }, { "epoch": 4.53074433656958, "grad_norm": 0.012032161466777325, "learning_rate": 2.4095302315127e-05, "loss": 0.0081, "step": 2800 }, { "epoch": 4.692556634304207, "grad_norm": 0.0591534860432148, "learning_rate": 2.2971454259384134e-05, "loss": 0.0042, "step": 2900 }, { "epoch": 4.854368932038835, "grad_norm": 0.3236534893512726, "learning_rate": 2.1847606203641273e-05, "loss": 0.009, "step": 3000 }, { "epoch": 5.0, "eval_accuracy": 0.9935904459296593, "eval_f1": 0.9192352711666016, "eval_loss": 0.029976682737469673, "eval_precision": 0.9124709527498064, "eval_recall": 0.9261006289308176, "eval_runtime": 3.7424, "eval_samples_per_second": 330.006, "eval_steps_per_second": 41.418, "step": 3090 }, { "epoch": 5.016181229773463, "grad_norm": 0.006900902837514877, "learning_rate": 2.072375814789841e-05, "loss": 0.0089, "step": 3100 }, { "epoch": 5.17799352750809, "grad_norm": 0.10268358141183853, "learning_rate": 1.9599910092155544e-05, "loss": 0.003, "step": 3200 }, { "epoch": 5.339805825242719, "grad_norm": 0.008188015781342983, "learning_rate": 1.847606203641268e-05, "loss": 0.003, "step": 3300 }, { "epoch": 5.501618122977346, "grad_norm": 0.159342423081398, "learning_rate": 1.735221398066982e-05, "loss": 0.0027, "step": 3400 }, { "epoch": 5.663430420711974, "grad_norm": 0.4539357125759125, "learning_rate": 1.622836592492695e-05, "loss": 0.0042, "step": 3500 }, { "epoch": 5.825242718446602, "grad_norm": 0.038250233978033066, "learning_rate": 1.510451786918409e-05, "loss": 0.0039, "step": 3600 }, { "epoch": 5.9870550161812295, "grad_norm": 0.004649197217077017, "learning_rate": 1.3980669813441227e-05, "loss": 0.0038, "step": 3700 }, { "epoch": 6.0, "eval_accuracy": 0.9935630546729484, "eval_f1": 0.9324960753532182, "eval_loss": 0.033860765397548676, "eval_precision": 0.9310344827586207, "eval_recall": 0.9339622641509434, "eval_runtime": 3.7552, "eval_samples_per_second": 328.879, "eval_steps_per_second": 41.276, "step": 3708 }, { "epoch": 6.148867313915858, "grad_norm": 0.20126421749591827, "learning_rate": 1.285682175769836e-05, "loss": 0.0016, "step": 3800 }, { "epoch": 6.310679611650485, "grad_norm": 0.006144899874925613, "learning_rate": 1.1732973701955498e-05, "loss": 0.0024, "step": 3900 }, { "epoch": 6.472491909385114, "grad_norm": 1.5297178030014038, "learning_rate": 1.0609125646212633e-05, "loss": 0.0022, "step": 4000 }, { "epoch": 6.634304207119741, "grad_norm": 0.04670717939734459, "learning_rate": 9.48527759046977e-06, "loss": 0.002, "step": 4100 }, { "epoch": 6.796116504854369, "grad_norm": 0.028466541320085526, "learning_rate": 8.361429534726907e-06, "loss": 0.0018, "step": 4200 }, { "epoch": 6.957928802588997, "grad_norm": 0.09033439308404922, "learning_rate": 7.237581478984042e-06, "loss": 0.0019, "step": 4300 }, { "epoch": 7.0, "eval_accuracy": 0.9936726196997918, "eval_f1": 0.9328125, "eval_loss": 0.03374877944588661, "eval_precision": 0.9270186335403726, "eval_recall": 0.9386792452830188, "eval_runtime": 3.7709, "eval_samples_per_second": 327.506, "eval_steps_per_second": 41.104, "step": 4326 }, { "epoch": 7.119741100323624, "grad_norm": 0.020341284573078156, "learning_rate": 6.113733423241179e-06, "loss": 0.0017, "step": 4400 }, { "epoch": 7.281553398058253, "grad_norm": 0.026211928576231003, "learning_rate": 4.989885367498316e-06, "loss": 0.0009, "step": 4500 }, { "epoch": 7.44336569579288, "grad_norm": 0.0029867186676710844, "learning_rate": 3.866037311755451e-06, "loss": 0.0014, "step": 4600 }, { "epoch": 7.605177993527509, "grad_norm": 0.014907300472259521, "learning_rate": 2.742189256012588e-06, "loss": 0.0014, "step": 4700 }, { "epoch": 7.766990291262136, "grad_norm": 0.12841272354125977, "learning_rate": 1.6183412002697239e-06, "loss": 0.0013, "step": 4800 }, { "epoch": 7.9288025889967635, "grad_norm": 0.0018621513154357672, "learning_rate": 4.9449314452686e-07, "loss": 0.0013, "step": 4900 }, { "epoch": 8.0, "eval_accuracy": 0.9941108798071655, "eval_f1": 0.939655172413793, "eval_loss": 0.03547634929418564, "eval_precision": 0.93671875, "eval_recall": 0.9426100628930818, "eval_runtime": 3.7628, "eval_samples_per_second": 328.211, "eval_steps_per_second": 41.192, "step": 4944 }, { "epoch": 8.0, "step": 4944, "total_flos": 2834249641269840.0, "train_loss": 0.05133420664178129, "train_runtime": 849.5258, "train_samples_per_second": 93.05, "train_steps_per_second": 5.82 } ], "logging_steps": 100, "max_steps": 4944, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2834249641269840.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }