End of training
Browse files- final_metrics.json +6 -9
- training_args.json +8 -8
final_metrics.json
CHANGED
@@ -1,11 +1,8 @@
|
|
1 |
{
|
2 |
-
"eval_loss":
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"eval_samples_per_second": 4.825,
|
9 |
-
"eval_steps_per_second": 1.206,
|
10 |
-
"epoch": 2.986666666666667
|
11 |
}
|
|
|
1 |
{
|
2 |
+
"eval_loss": 23.53748321533203,
|
3 |
+
"eval_model_preparation_time": 0.004,
|
4 |
+
"eval_runtime": 4.9923,
|
5 |
+
"eval_samples_per_second": 10.015,
|
6 |
+
"eval_steps_per_second": 2.604,
|
7 |
+
"epoch": 2.9734513274336285
|
|
|
|
|
|
|
8 |
}
|
training_args.json
CHANGED
@@ -6,11 +6,11 @@
|
|
6 |
"do_predict": false,
|
7 |
"eval_strategy": "steps",
|
8 |
"prediction_loss_only": false,
|
9 |
-
"per_device_train_batch_size":
|
10 |
-
"per_device_eval_batch_size":
|
11 |
"per_gpu_train_batch_size": null,
|
12 |
"per_gpu_eval_batch_size": null,
|
13 |
-
"gradient_accumulation_steps":
|
14 |
"eval_accumulation_steps": null,
|
15 |
"eval_delay": 0,
|
16 |
"torch_empty_cache_steps": null,
|
@@ -29,13 +29,13 @@
|
|
29 |
"log_level": "passive",
|
30 |
"log_level_replica": "warning",
|
31 |
"log_on_each_node": true,
|
32 |
-
"logging_dir": "
|
33 |
"logging_strategy": "steps",
|
34 |
"logging_first_step": false,
|
35 |
-
"logging_steps":
|
36 |
"logging_nan_inf_filter": true,
|
37 |
"save_strategy": "steps",
|
38 |
-
"save_steps":
|
39 |
"save_total_limit": 3,
|
40 |
"save_safetensors": true,
|
41 |
"save_on_each_node": false,
|
@@ -61,7 +61,7 @@
|
|
61 |
"tpu_metrics_debug": false,
|
62 |
"debug": [],
|
63 |
"dataloader_drop_last": false,
|
64 |
-
"eval_steps":
|
65 |
"dataloader_num_workers": 0,
|
66 |
"dataloader_prefetch_factor": null,
|
67 |
"past_index": -1,
|
@@ -112,7 +112,7 @@
|
|
112 |
"hub_token": "<HUB_TOKEN>",
|
113 |
"hub_private_repo": false,
|
114 |
"hub_always_push": false,
|
115 |
-
"gradient_checkpointing":
|
116 |
"gradient_checkpointing_kwargs": null,
|
117 |
"include_inputs_for_metrics": false,
|
118 |
"include_for_metrics": [],
|
|
|
6 |
"do_predict": false,
|
7 |
"eval_strategy": "steps",
|
8 |
"prediction_loss_only": false,
|
9 |
+
"per_device_train_batch_size": 4,
|
10 |
+
"per_device_eval_batch_size": 4,
|
11 |
"per_gpu_train_batch_size": null,
|
12 |
"per_gpu_eval_batch_size": null,
|
13 |
+
"gradient_accumulation_steps": 16,
|
14 |
"eval_accumulation_steps": null,
|
15 |
"eval_delay": 0,
|
16 |
"torch_empty_cache_steps": null,
|
|
|
29 |
"log_level": "passive",
|
30 |
"log_level_replica": "warning",
|
31 |
"log_on_each_node": true,
|
32 |
+
"logging_dir": "./logs",
|
33 |
"logging_strategy": "steps",
|
34 |
"logging_first_step": false,
|
35 |
+
"logging_steps": 100,
|
36 |
"logging_nan_inf_filter": true,
|
37 |
"save_strategy": "steps",
|
38 |
+
"save_steps": 500,
|
39 |
"save_total_limit": 3,
|
40 |
"save_safetensors": true,
|
41 |
"save_on_each_node": false,
|
|
|
61 |
"tpu_metrics_debug": false,
|
62 |
"debug": [],
|
63 |
"dataloader_drop_last": false,
|
64 |
+
"eval_steps": 100,
|
65 |
"dataloader_num_workers": 0,
|
66 |
"dataloader_prefetch_factor": null,
|
67 |
"past_index": -1,
|
|
|
112 |
"hub_token": "<HUB_TOKEN>",
|
113 |
"hub_private_repo": false,
|
114 |
"hub_always_push": false,
|
115 |
+
"gradient_checkpointing": false,
|
116 |
"gradient_checkpointing_kwargs": null,
|
117 |
"include_inputs_for_metrics": false,
|
118 |
"include_for_metrics": [],
|