neavo commited on
Commit
e617af0
·
verified ·
1 Parent(s): 3370315
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ loss.png filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "assets/facebookai_xlm_roberta_base_pretrain_20240912_e1",
3
  "architectures": [
4
  "XLMRobertaForMaskedLM"
5
  ],
@@ -21,7 +21,7 @@
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
- "transformers_version": "4.44.1",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 250002
 
1
  {
2
+ "_name_or_path": "assets/facebookai_xlm_roberta_base",
3
  "architectures": [
4
  "XLMRobertaForMaskedLM"
5
  ],
 
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
+ "transformers_version": "4.47.0",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 250002
loss.png ADDED

Git LFS Details

  • SHA256: ed841709a9c4e57c4bda2e70b284e93c2c15bb38fc0fe3f2ec13eb458113952f
  • Pointer size: 132 Bytes
  • Size of remote file: 1.26 MB
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91c427bad7ea8cefe0c0a798a343dd9704e99141bb36759f589d2d42839cda57
3
  size 1113205088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98afd8269c67540955c75445024b3e9f34640a3d60f70c5e7cec2f804e829877
3
  size 1113205088
special_tokens_map.json CHANGED
@@ -1,25 +1,7 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
@@ -27,25 +9,7 @@
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "mask_token": {
6
  "content": "<mask>",
7
  "lstrip": true,
 
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
tokenizer_config.json CHANGED
@@ -42,10 +42,11 @@
42
  }
43
  },
44
  "bos_token": "<s>",
45
- "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
47
  "do_lower_case": false,
48
  "eos_token": "</s>",
 
49
  "mask_token": "<mask>",
50
  "model_max_length": 512,
51
  "pad_token": "<pad>",
 
42
  }
43
  },
44
  "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
  "cls_token": "<s>",
47
  "do_lower_case": false,
48
  "eos_token": "</s>",
49
+ "extra_special_tokens": {},
50
  "mask_token": "<mask>",
51
  "model_max_length": 512,
52
  "pad_token": "<pad>",
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0348346af01c92edf289dac4416c73eb11e0ed0a856d90276465524cf1a2c7b
3
- size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b08346156c4ed671d8256f9ee9e056de5bf583dbb21d4361955304edf918c897
3
+ size 5304
training_args.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "output_dir": "output/facebookai_xlm_roberta_base_pretrain_20240912_e1_pretrain",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": false,
@@ -14,13 +14,13 @@
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
  "torch_empty_cache_steps": null,
17
- "learning_rate": 1e-05,
18
  "weight_decay": 0.01,
19
  "adam_beta1": 0.9,
20
  "adam_beta2": 0.999,
21
  "adam_epsilon": 1e-08,
22
  "max_grad_norm": 1.0,
23
- "num_train_epochs": 1,
24
  "max_steps": -1,
25
  "lr_scheduler_type": "linear",
26
  "lr_scheduler_kwargs": {},
@@ -32,10 +32,10 @@
32
  "logging_dir": "logs",
33
  "logging_strategy": "steps",
34
  "logging_first_step": false,
35
- "logging_steps": 30,
36
  "logging_nan_inf_filter": true,
37
  "save_strategy": "steps",
38
- "save_steps": 300,
39
  "save_total_limit": 3,
40
  "save_safetensors": true,
41
  "save_on_each_node": false,
@@ -65,7 +65,7 @@
65
  "dataloader_num_workers": 0,
66
  "dataloader_prefetch_factor": null,
67
  "past_index": -1,
68
- "run_name": "output/facebookai_xlm_roberta_base_pretrain_20240912_e1_pretrain",
69
  "disable_tqdm": false,
70
  "remove_unused_columns": true,
71
  "label_names": null,
@@ -92,13 +92,12 @@
92
  },
93
  "deepspeed": null,
94
  "label_smoothing_factor": 0.0,
95
- "optim": "adamw_8bit",
96
  "optim_args": null,
97
  "adafactor": false,
98
  "group_by_length": false,
99
  "length_column_name": "length",
100
  "report_to": [
101
- "tensorboard",
102
  "wandb"
103
  ],
104
  "ddp_find_unused_parameters": null,
@@ -113,11 +112,12 @@
113
  "hub_model_id": null,
114
  "hub_strategy": "every_save",
115
  "hub_token": "<HUB_TOKEN>",
116
- "hub_private_repo": false,
117
  "hub_always_push": false,
118
  "gradient_checkpointing": false,
119
  "gradient_checkpointing_kwargs": null,
120
  "include_inputs_for_metrics": false,
 
121
  "eval_do_concat_batches": true,
122
  "fp16_backend": "auto",
123
  "evaluation_strategy": null,
@@ -141,5 +141,7 @@
141
  "optim_target_modules": null,
142
  "batch_eval_metrics": false,
143
  "eval_on_start": false,
144
- "eval_use_gather_object": false
 
 
145
  }
 
1
  {
2
+ "output_dir": "output/facebookai_xlm_roberta_base_pretrain",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": false,
 
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
  "torch_empty_cache_steps": null,
17
+ "learning_rate": 2e-05,
18
  "weight_decay": 0.01,
19
  "adam_beta1": 0.9,
20
  "adam_beta2": 0.999,
21
  "adam_epsilon": 1e-08,
22
  "max_grad_norm": 1.0,
23
+ "num_train_epochs": 2,
24
  "max_steps": -1,
25
  "lr_scheduler_type": "linear",
26
  "lr_scheduler_kwargs": {},
 
32
  "logging_dir": "logs",
33
  "logging_strategy": "steps",
34
  "logging_first_step": false,
35
+ "logging_steps": 10,
36
  "logging_nan_inf_filter": true,
37
  "save_strategy": "steps",
38
+ "save_steps": 100,
39
  "save_total_limit": 3,
40
  "save_safetensors": true,
41
  "save_on_each_node": false,
 
65
  "dataloader_num_workers": 0,
66
  "dataloader_prefetch_factor": null,
67
  "past_index": -1,
68
+ "run_name": "output/facebookai_xlm_roberta_base_pretrain",
69
  "disable_tqdm": false,
70
  "remove_unused_columns": true,
71
  "label_names": null,
 
92
  },
93
  "deepspeed": null,
94
  "label_smoothing_factor": 0.0,
95
+ "optim": "ademamix_8bit",
96
  "optim_args": null,
97
  "adafactor": false,
98
  "group_by_length": false,
99
  "length_column_name": "length",
100
  "report_to": [
 
101
  "wandb"
102
  ],
103
  "ddp_find_unused_parameters": null,
 
112
  "hub_model_id": null,
113
  "hub_strategy": "every_save",
114
  "hub_token": "<HUB_TOKEN>",
115
+ "hub_private_repo": null,
116
  "hub_always_push": false,
117
  "gradient_checkpointing": false,
118
  "gradient_checkpointing_kwargs": null,
119
  "include_inputs_for_metrics": false,
120
+ "include_for_metrics": [],
121
  "eval_do_concat_batches": true,
122
  "fp16_backend": "auto",
123
  "evaluation_strategy": null,
 
141
  "optim_target_modules": null,
142
  "batch_eval_metrics": false,
143
  "eval_on_start": false,
144
+ "use_liger_kernel": false,
145
+ "eval_use_gather_object": false,
146
+ "average_tokens_across_devices": false
147
  }