|
{ |
|
"best_metric": 82.59496169943931, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/bert-base-finetuned-parsing-ud-Chinese-GSD/checkpoint-2000", |
|
"epoch": 36.0, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.840000000000001e-05, |
|
"loss": 3.3926, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 7.947919463087248e-05, |
|
"loss": 1.027, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.894228187919463e-05, |
|
"loss": 0.6817, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.840536912751678e-05, |
|
"loss": 0.5063, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 7.786845637583893e-05, |
|
"loss": 0.367, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_las": 81.48148148148148, |
|
"eval_loss": 0.8465050458908081, |
|
"eval_runtime": 3.7622, |
|
"eval_samples_per_second": 132.902, |
|
"eval_steps_per_second": 16.746, |
|
"eval_uas": 85.58003632630499, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 7.733154362416108e-05, |
|
"loss": 0.259, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.679463087248322e-05, |
|
"loss": 0.2121, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 7.625771812080537e-05, |
|
"loss": 0.1829, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 7.572080536912752e-05, |
|
"loss": 0.1592, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 7.518389261744967e-05, |
|
"loss": 0.135, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_las": 82.23959567243149, |
|
"eval_loss": 1.149000883102417, |
|
"eval_runtime": 3.7521, |
|
"eval_samples_per_second": 133.26, |
|
"eval_steps_per_second": 16.791, |
|
"eval_uas": 86.01437258153676, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 7.464697986577182e-05, |
|
"loss": 0.1142, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 7.411006711409397e-05, |
|
"loss": 0.1008, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 7.357315436241611e-05, |
|
"loss": 0.0923, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 7.303624161073826e-05, |
|
"loss": 0.0833, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 7.249932885906041e-05, |
|
"loss": 0.0804, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_las": 81.89212666824606, |
|
"eval_loss": 1.3434501886367798, |
|
"eval_runtime": 3.7531, |
|
"eval_samples_per_second": 133.225, |
|
"eval_steps_per_second": 16.786, |
|
"eval_uas": 85.7458738055753, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 7.196241610738256e-05, |
|
"loss": 0.0703, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 7.142550335570471e-05, |
|
"loss": 0.0663, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 7.088859060402686e-05, |
|
"loss": 0.0638, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 7.0351677852349e-05, |
|
"loss": 0.0564, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 6.981476510067114e-05, |
|
"loss": 0.0593, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_las": 82.59496169943931, |
|
"eval_loss": 1.4142358303070068, |
|
"eval_runtime": 3.7723, |
|
"eval_samples_per_second": 132.545, |
|
"eval_steps_per_second": 16.701, |
|
"eval_uas": 86.22759219774146, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 6.927785234899329e-05, |
|
"loss": 0.0506, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 6.874093959731543e-05, |
|
"loss": 0.0486, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 6.820402684563758e-05, |
|
"loss": 0.0481, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 6.766711409395973e-05, |
|
"loss": 0.0433, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.713020134228188e-05, |
|
"loss": 0.0425, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_las": 81.96319987364762, |
|
"eval_loss": 1.5313485860824585, |
|
"eval_runtime": 3.7514, |
|
"eval_samples_per_second": 133.284, |
|
"eval_steps_per_second": 16.794, |
|
"eval_uas": 85.66690357735133, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 6.659328859060403e-05, |
|
"loss": 0.0427, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 6.605637583892618e-05, |
|
"loss": 0.0386, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 6.551946308724832e-05, |
|
"loss": 0.0384, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 6.498255033557047e-05, |
|
"loss": 0.0329, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 6.444563758389262e-05, |
|
"loss": 0.0367, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_las": 82.53178551686014, |
|
"eval_loss": 1.592229962348938, |
|
"eval_runtime": 3.6848, |
|
"eval_samples_per_second": 135.693, |
|
"eval_steps_per_second": 17.097, |
|
"eval_uas": 86.06965174129353, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 6.390872483221477e-05, |
|
"loss": 0.0318, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 6.337181208053692e-05, |
|
"loss": 0.0335, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 6.283489932885907e-05, |
|
"loss": 0.033, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 6.229798657718121e-05, |
|
"loss": 0.0305, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 6.176107382550336e-05, |
|
"loss": 0.0286, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_las": 82.16852246702993, |
|
"eval_loss": 1.742520809173584, |
|
"eval_runtime": 3.6834, |
|
"eval_samples_per_second": 135.745, |
|
"eval_steps_per_second": 17.104, |
|
"eval_uas": 85.96699044460239, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 6.122416107382551e-05, |
|
"loss": 0.0269, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"learning_rate": 6.068724832214766e-05, |
|
"loss": 0.0242, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 6.015033557046981e-05, |
|
"loss": 0.0273, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"learning_rate": 5.9613422818791955e-05, |
|
"loss": 0.0244, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 5.90765100671141e-05, |
|
"loss": 0.0254, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_las": 81.81315644002211, |
|
"eval_loss": 1.792807698249817, |
|
"eval_runtime": 3.6847, |
|
"eval_samples_per_second": 135.698, |
|
"eval_steps_per_second": 17.098, |
|
"eval_uas": 85.65110953170655, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 5.853959731543625e-05, |
|
"loss": 0.0234, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 5.80026845637584e-05, |
|
"loss": 0.0228, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"learning_rate": 5.7465771812080534e-05, |
|
"loss": 0.0219, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 5.692885906040268e-05, |
|
"loss": 0.0204, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 5.639194630872483e-05, |
|
"loss": 0.0238, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_las": 81.94740582800284, |
|
"eval_loss": 1.849847435951233, |
|
"eval_runtime": 3.6813, |
|
"eval_samples_per_second": 135.822, |
|
"eval_steps_per_second": 17.114, |
|
"eval_uas": 85.52475716654821, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"step": 4500, |
|
"total_flos": 2.4026636277633024e+16, |
|
"train_loss": 0.1895591730541653, |
|
"train_runtime": 2603.8023, |
|
"train_samples_per_second": 184.346, |
|
"train_steps_per_second": 5.761 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 120, |
|
"total_flos": 2.4026636277633024e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|