File size: 1,751 Bytes
eacae03 8937053 eacae03 083e767 8937053 083e767 8937053 083e767 8937053 083e767 8937053 083e767 eacae03 8937053 eacae03 083e767 8937053 083e767 8937053 083e767 8937053 083e767 8937053 083e767 eacae03 8937053 eacae03 083e767 8937053 eacae03 8937053 eacae03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 18.51851851851852,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.85,
"learning_rate": 5.401e-05,
"loss": 2.5377,
"step": 100
},
{
"epoch": 3.7,
"learning_rate": 4.8020000000000004e-05,
"loss": 1.7685,
"step": 200
},
{
"epoch": 5.56,
"learning_rate": 4.203e-05,
"loss": 1.4614,
"step": 300
},
{
"epoch": 7.41,
"learning_rate": 3.604e-05,
"loss": 1.282,
"step": 400
},
{
"epoch": 9.26,
"learning_rate": 3.0050000000000002e-05,
"loss": 1.157,
"step": 500
},
{
"epoch": 11.11,
"learning_rate": 2.406e-05,
"loss": 1.0986,
"step": 600
},
{
"epoch": 12.96,
"learning_rate": 1.807e-05,
"loss": 1.0388,
"step": 700
},
{
"epoch": 14.81,
"learning_rate": 1.2079999999999998e-05,
"loss": 0.9946,
"step": 800
},
{
"epoch": 16.67,
"learning_rate": 6.0899999999999984e-06,
"loss": 0.9791,
"step": 900
},
{
"epoch": 18.52,
"learning_rate": 1e-07,
"loss": 0.9462,
"step": 1000
},
{
"epoch": 18.52,
"step": 1000,
"total_flos": 2.7666045298173542e+17,
"train_loss": 1.3263815078735353,
"train_runtime": 482.7688,
"train_samples_per_second": 33.142,
"train_steps_per_second": 2.071
}
],
"max_steps": 1000,
"num_train_epochs": 19,
"total_flos": 2.7666045298173542e+17,
"trial_name": null,
"trial_params": null
}
|