qwen-test / trainer_state.json
andregustavo04's picture
Upload folder using huggingface_hub
ef21732 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.88,
"eval_steps": 500,
"global_step": 54,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.26666666666666666,
"grad_norm": 0.4435124099254608,
"learning_rate": 4.894973780788722e-05,
"loss": 0.6756,
"num_input_tokens_seen": 50880,
"step": 5
},
{
"epoch": 0.5333333333333333,
"grad_norm": 0.5012878775596619,
"learning_rate": 4.588719528532342e-05,
"loss": 0.6799,
"num_input_tokens_seen": 99184,
"step": 10
},
{
"epoch": 0.8,
"grad_norm": 0.31872692704200745,
"learning_rate": 4.1069690242163484e-05,
"loss": 0.6995,
"num_input_tokens_seen": 147408,
"step": 15
},
{
"epoch": 1.0666666666666667,
"grad_norm": 0.3336561322212219,
"learning_rate": 3.490199415097892e-05,
"loss": 0.6313,
"num_input_tokens_seen": 195568,
"step": 20
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.24536238610744476,
"learning_rate": 2.7902322853130757e-05,
"loss": 0.5683,
"num_input_tokens_seen": 246416,
"step": 25
},
{
"epoch": 1.6,
"grad_norm": 0.2896718978881836,
"learning_rate": 2.0658795558326743e-05,
"loss": 0.4988,
"num_input_tokens_seen": 295744,
"step": 30
},
{
"epoch": 1.8666666666666667,
"grad_norm": 0.2828458249568939,
"learning_rate": 1.3780020494988446e-05,
"loss": 0.5748,
"num_input_tokens_seen": 345744,
"step": 35
},
{
"epoch": 2.1333333333333333,
"grad_norm": 0.37326768040657043,
"learning_rate": 7.843959053281663e-06,
"loss": 0.5793,
"num_input_tokens_seen": 390176,
"step": 40
},
{
"epoch": 2.4,
"grad_norm": 0.28728172183036804,
"learning_rate": 3.3493649053890326e-06,
"loss": 0.55,
"num_input_tokens_seen": 438896,
"step": 45
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.275715172290802,
"learning_rate": 6.738782355044049e-07,
"loss": 0.5715,
"num_input_tokens_seen": 490544,
"step": 50
},
{
"epoch": 2.88,
"num_input_tokens_seen": 530864,
"step": 54,
"total_flos": 4880889936150528.0,
"train_loss": 0.5967088187182391,
"train_runtime": 437.0311,
"train_samples_per_second": 2.059,
"train_steps_per_second": 0.124
}
],
"logging_steps": 5,
"max_steps": 54,
"num_input_tokens_seen": 530864,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4880889936150528.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}