20241212
Browse files- config.json +2 -2
- eval.png +0 -0
- loss.png +0 -0
- metrics.json +42 -42
- model.safetensors +1 -1
- tokenizer_config.json +2 -1
- training_args.bin +2 -2
- training_args.json +9 -7
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "assets/
|
3 |
"architectures": [
|
4 |
"XLMRobertaForTokenClassification"
|
5 |
],
|
@@ -47,7 +47,7 @@
|
|
47 |
"pad_token_id": 1,
|
48 |
"position_embedding_type": "absolute",
|
49 |
"torch_dtype": "float32",
|
50 |
-
"transformers_version": "4.
|
51 |
"type_vocab_size": 1,
|
52 |
"use_cache": true,
|
53 |
"vocab_size": 250002
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "assets/facebookai_xlm_roberta_base_pretrain_20241212",
|
3 |
"architectures": [
|
4 |
"XLMRobertaForTokenClassification"
|
5 |
],
|
|
|
47 |
"pad_token_id": 1,
|
48 |
"position_embedding_type": "absolute",
|
49 |
"torch_dtype": "float32",
|
50 |
+
"transformers_version": "4.47.0",
|
51 |
"type_vocab_size": 1,
|
52 |
"use_cache": true,
|
53 |
"vocab_size": 250002
|
eval.png
ADDED
loss.png
ADDED
metrics.json
CHANGED
@@ -1,62 +1,62 @@
|
|
1 |
{
|
2 |
-
"eval_loss": 0.
|
3 |
-
"eval_f1": 0.
|
4 |
-
"eval_recall": 0.
|
5 |
-
"eval_accuracy": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
"eval_classification_report": {
|
8 |
"EVT": {
|
9 |
-
"precision": 0.
|
10 |
-
"recall": 0.
|
11 |
-
"f1-score": 0.
|
12 |
-
"support":
|
13 |
},
|
14 |
"LOC": {
|
15 |
-
"precision": 0.
|
16 |
-
"recall": 0.
|
17 |
-
"f1-score": 0.
|
18 |
-
"support":
|
19 |
},
|
20 |
"ORG": {
|
21 |
-
"precision": 0.
|
22 |
-
"recall": 0.
|
23 |
-
"f1-score": 0.
|
24 |
-
"support":
|
25 |
},
|
26 |
"PER": {
|
27 |
-
"precision": 0.
|
28 |
-
"recall": 0.
|
29 |
-
"f1-score": 0.
|
30 |
-
"support":
|
31 |
},
|
32 |
"PRD": {
|
33 |
-
"precision": 0.
|
34 |
-
"recall": 0.
|
35 |
-
"f1-score": 0.
|
36 |
-
"support":
|
37 |
},
|
38 |
"micro avg": {
|
39 |
-
"precision": 0.
|
40 |
-
"recall": 0.
|
41 |
-
"f1-score": 0.
|
42 |
-
"support":
|
43 |
},
|
44 |
"macro avg": {
|
45 |
-
"precision": 0.
|
46 |
-
"recall": 0.
|
47 |
-
"f1-score": 0.
|
48 |
-
"support":
|
49 |
},
|
50 |
"weighted avg": {
|
51 |
-
"precision": 0.
|
52 |
-
"recall": 0.
|
53 |
-
"f1-score": 0.
|
54 |
-
"support":
|
55 |
}
|
56 |
},
|
57 |
-
"eval_runtime":
|
58 |
-
"eval_samples_per_second":
|
59 |
-
"eval_steps_per_second": 4.
|
60 |
-
"epoch":
|
61 |
-
"train_loss": 0.
|
62 |
}
|
|
|
1 |
{
|
2 |
+
"eval_loss": 0.062092795968055725,
|
3 |
+
"eval_f1": 0.7617369641587196,
|
4 |
+
"eval_recall": 0.7967042497831743,
|
5 |
+
"eval_accuracy": 0.9821096951844263,
|
6 |
+
"eval_precision": 0.7301653204843213,
|
7 |
"eval_classification_report": {
|
8 |
"EVT": {
|
9 |
+
"precision": 0.7176220806794055,
|
10 |
+
"recall": 0.7502774694783574,
|
11 |
+
"f1-score": 0.7335865436787845,
|
12 |
+
"support": 901
|
13 |
},
|
14 |
"LOC": {
|
15 |
+
"precision": 0.7591863517060368,
|
16 |
+
"recall": 0.8615040953090097,
|
17 |
+
"f1-score": 0.8071154516916638,
|
18 |
+
"support": 1343
|
19 |
},
|
20 |
"ORG": {
|
21 |
+
"precision": 0.7531831537708129,
|
22 |
+
"recall": 0.77834008097166,
|
23 |
+
"f1-score": 0.7655550024888004,
|
24 |
+
"support": 988
|
25 |
},
|
26 |
"PER": {
|
27 |
+
"precision": 0.8291905672402804,
|
28 |
+
"recall": 0.8690714762859052,
|
29 |
+
"f1-score": 0.8486627527723418,
|
30 |
+
"support": 1497
|
31 |
},
|
32 |
"PRD": {
|
33 |
+
"precision": 0.6442762535477767,
|
34 |
+
"recall": 0.6663405088062623,
|
35 |
+
"f1-score": 0.6551226551226551,
|
36 |
+
"support": 1022
|
37 |
},
|
38 |
"micro avg": {
|
39 |
+
"precision": 0.7498773106494356,
|
40 |
+
"recall": 0.7970787689097548,
|
41 |
+
"f1-score": 0.772757923128793,
|
42 |
+
"support": 5751
|
43 |
},
|
44 |
"macro avg": {
|
45 |
+
"precision": 0.7406916813888624,
|
46 |
+
"recall": 0.785106726170239,
|
47 |
+
"f1-score": 0.7620084811508491,
|
48 |
+
"support": 5751
|
49 |
},
|
50 |
"weighted avg": {
|
51 |
+
"precision": 0.7494450236903917,
|
52 |
+
"recall": 0.7970787689097548,
|
53 |
+
"f1-score": 0.7722603659139234,
|
54 |
+
"support": 5751
|
55 |
}
|
56 |
},
|
57 |
+
"eval_runtime": 3.2082,
|
58 |
+
"eval_samples_per_second": 638.365,
|
59 |
+
"eval_steps_per_second": 4.987,
|
60 |
+
"epoch": 18.28821811100292,
|
61 |
+
"train_loss": 0.018
|
62 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1109870108
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:394c9b3c7b10aa527f3bb2646e04cfe2798d9f3abafcbd098d749d8cba7d82b7
|
3 |
size 1109870108
|
tokenizer_config.json
CHANGED
@@ -42,10 +42,11 @@
|
|
42 |
}
|
43 |
},
|
44 |
"bos_token": "<s>",
|
45 |
-
"clean_up_tokenization_spaces":
|
46 |
"cls_token": "<s>",
|
47 |
"do_lower_case": false,
|
48 |
"eos_token": "</s>",
|
|
|
49 |
"mask_token": "<mask>",
|
50 |
"model_max_length": 512,
|
51 |
"pad_token": "<pad>",
|
|
|
42 |
}
|
43 |
},
|
44 |
"bos_token": "<s>",
|
45 |
+
"clean_up_tokenization_spaces": false,
|
46 |
"cls_token": "<s>",
|
47 |
"do_lower_case": false,
|
48 |
"eos_token": "</s>",
|
49 |
+
"extra_special_tokens": {},
|
50 |
"mask_token": "<mask>",
|
51 |
"model_max_length": 512,
|
52 |
"pad_token": "<pad>",
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e342d99ea1971421359dcecd60501450ffb679d7d40b8535d11bf6cc4071a34d
|
3 |
+
size 5240
|
training_args.json
CHANGED
@@ -6,15 +6,15 @@
|
|
6 |
"do_predict": false,
|
7 |
"eval_strategy": "steps",
|
8 |
"prediction_loss_only": false,
|
9 |
-
"per_device_train_batch_size":
|
10 |
"per_device_eval_batch_size": 128,
|
11 |
"per_gpu_train_batch_size": null,
|
12 |
"per_gpu_eval_batch_size": null,
|
13 |
-
"gradient_accumulation_steps":
|
14 |
"eval_accumulation_steps": null,
|
15 |
"eval_delay": 0,
|
16 |
"torch_empty_cache_steps": null,
|
17 |
-
"learning_rate":
|
18 |
"weight_decay": 0.01,
|
19 |
"adam_beta1": 0.9,
|
20 |
"adam_beta2": 0.999,
|
@@ -93,13 +93,12 @@
|
|
93 |
},
|
94 |
"deepspeed": null,
|
95 |
"label_smoothing_factor": 0.0,
|
96 |
-
"optim": "
|
97 |
"optim_args": null,
|
98 |
"adafactor": false,
|
99 |
"group_by_length": false,
|
100 |
"length_column_name": "length",
|
101 |
"report_to": [
|
102 |
-
"tensorboard",
|
103 |
"wandb"
|
104 |
],
|
105 |
"ddp_find_unused_parameters": null,
|
@@ -114,11 +113,12 @@
|
|
114 |
"hub_model_id": null,
|
115 |
"hub_strategy": "every_save",
|
116 |
"hub_token": null,
|
117 |
-
"hub_private_repo":
|
118 |
"hub_always_push": false,
|
119 |
"gradient_checkpointing": false,
|
120 |
"gradient_checkpointing_kwargs": null,
|
121 |
"include_inputs_for_metrics": false,
|
|
|
122 |
"eval_do_concat_batches": true,
|
123 |
"fp16_backend": "auto",
|
124 |
"evaluation_strategy": null,
|
@@ -143,5 +143,7 @@
|
|
143 |
"optim_target_modules": null,
|
144 |
"batch_eval_metrics": false,
|
145 |
"eval_on_start": false,
|
146 |
-
"
|
|
|
|
|
147 |
}
|
|
|
6 |
"do_predict": false,
|
7 |
"eval_strategy": "steps",
|
8 |
"prediction_loss_only": false,
|
9 |
+
"per_device_train_batch_size": 48,
|
10 |
"per_device_eval_batch_size": 128,
|
11 |
"per_gpu_train_batch_size": null,
|
12 |
"per_gpu_eval_batch_size": null,
|
13 |
+
"gradient_accumulation_steps": 2,
|
14 |
"eval_accumulation_steps": null,
|
15 |
"eval_delay": 0,
|
16 |
"torch_empty_cache_steps": null,
|
17 |
+
"learning_rate": 1e-05,
|
18 |
"weight_decay": 0.01,
|
19 |
"adam_beta1": 0.9,
|
20 |
"adam_beta2": 0.999,
|
|
|
93 |
},
|
94 |
"deepspeed": null,
|
95 |
"label_smoothing_factor": 0.0,
|
96 |
+
"optim": "ademamix_8bit",
|
97 |
"optim_args": null,
|
98 |
"adafactor": false,
|
99 |
"group_by_length": false,
|
100 |
"length_column_name": "length",
|
101 |
"report_to": [
|
|
|
102 |
"wandb"
|
103 |
],
|
104 |
"ddp_find_unused_parameters": null,
|
|
|
113 |
"hub_model_id": null,
|
114 |
"hub_strategy": "every_save",
|
115 |
"hub_token": null,
|
116 |
+
"hub_private_repo": null,
|
117 |
"hub_always_push": false,
|
118 |
"gradient_checkpointing": false,
|
119 |
"gradient_checkpointing_kwargs": null,
|
120 |
"include_inputs_for_metrics": false,
|
121 |
+
"include_for_metrics": [],
|
122 |
"eval_do_concat_batches": true,
|
123 |
"fp16_backend": "auto",
|
124 |
"evaluation_strategy": null,
|
|
|
143 |
"optim_target_modules": null,
|
144 |
"batch_eval_metrics": false,
|
145 |
"eval_on_start": false,
|
146 |
+
"use_liger_kernel": false,
|
147 |
+
"eval_use_gather_object": false,
|
148 |
+
"average_tokens_across_devices": false
|
149 |
}
|