karoldobiczek
commited on
Upload folder using huggingface_hub
Browse files- args.pkl +3 -0
- config.json +26 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- trainer_state_last.json +1 -0
- vocab.txt +0 -0
args.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c19ccff7c01da3fab422527379e8a5fa662b591f3f9d97b3c27523a82aa46302
|
3 |
+
size 706
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-base-uncased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForMaskedLM"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.15.0",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d2a0ad87070139931c82a824c838870c5b1f8737f574ec64fb29f7e81becc4b
|
3 |
+
size 438144246
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
|
trainer_state_last.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"best_metric": 4.037292003631592, "best_model_checkpoint": "results/finetuned_models/FOMC/finetuned_CMLM_maks_frac-0.2_0.55-explain_wrt-predicted/checkpoint-96", "epoch": 5.0, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [{"epoch": 0.31, "learning_rate": 4.8437500000000005e-05, "loss": 4.8877, "step": 10}, {"epoch": 0.5, "eval_loss": 4.4532647132873535, "eval_runtime": 1.9737, "eval_samples_per_second": 148.455, "eval_steps_per_second": 9.627, "step": 16}, {"epoch": 0.62, "learning_rate": 4.6875e-05, "loss": 4.5595, "step": 20}, {"epoch": 0.94, "learning_rate": 4.5312500000000004e-05, "loss": 4.4233, "step": 30}, {"epoch": 1.0, "eval_loss": 4.297275066375732, "eval_runtime": 2.0378, "eval_samples_per_second": 143.785, "eval_steps_per_second": 9.324, "step": 32}, {"epoch": 1.25, "learning_rate": 4.375e-05, "loss": 4.0304, "step": 40}, {"epoch": 1.5, "eval_loss": 4.06475830078125, "eval_runtime": 2.1346, "eval_samples_per_second": 137.263, "eval_steps_per_second": 8.901, "step": 48}, {"epoch": 1.56, "learning_rate": 4.21875e-05, "loss": 3.8248, "step": 50}, {"epoch": 1.88, "learning_rate": 4.0625000000000005e-05, "loss": 3.6793, "step": 60}, {"epoch": 2.0, "eval_loss": 4.130334377288818, "eval_runtime": 2.0447, "eval_samples_per_second": 143.295, "eval_steps_per_second": 9.292, "step": 64}, {"epoch": 2.19, "learning_rate": 3.90625e-05, "loss": 3.4536, "step": 70}, {"epoch": 2.5, "learning_rate": 3.7500000000000003e-05, "loss": 3.2957, "step": 80}, {"epoch": 2.5, "eval_loss": 4.101141929626465, "eval_runtime": 2.0417, "eval_samples_per_second": 143.506, "eval_steps_per_second": 9.306, "step": 80}, {"epoch": 2.81, "learning_rate": 3.59375e-05, "loss": 3.3531, "step": 90}, {"epoch": 3.0, "eval_loss": 4.037292003631592, "eval_runtime": 2.0425, "eval_samples_per_second": 143.453, "eval_steps_per_second": 9.302, "step": 96}, {"epoch": 3.12, "learning_rate": 3.4375e-05, "loss": 3.122, "step": 100}, {"epoch": 3.44, "learning_rate": 3.2812500000000005e-05, "loss": 2.8461, "step": 110}, {"epoch": 3.5, "eval_loss": 4.139313697814941, "eval_runtime": 2.0333, "eval_samples_per_second": 144.099, "eval_steps_per_second": 9.344, "step": 112}, {"epoch": 3.75, "learning_rate": 3.125e-05, "loss": 2.8702, "step": 120}, {"epoch": 4.0, "eval_loss": 4.090446949005127, "eval_runtime": 2.0572, "eval_samples_per_second": 142.427, "eval_steps_per_second": 9.236, "step": 128}, {"epoch": 4.06, "learning_rate": 2.96875e-05, "loss": 2.9752, "step": 130}, {"epoch": 4.38, "learning_rate": 2.8125000000000003e-05, "loss": 2.6569, "step": 140}, {"epoch": 4.5, "eval_loss": 4.154040336608887, "eval_runtime": 2.0347, "eval_samples_per_second": 144.003, "eval_steps_per_second": 9.338, "step": 144}, {"epoch": 4.69, "learning_rate": 2.6562500000000002e-05, "loss": 2.7526, "step": 150}, {"epoch": 5.0, "learning_rate": 2.5e-05, "loss": 2.5552, "step": 160}, {"epoch": 5.0, "eval_loss": 4.057391166687012, "eval_runtime": 2.052, "eval_samples_per_second": 142.788, "eval_steps_per_second": 9.259, "step": 160}], "max_steps": 320, "num_train_epochs": 10, "total_flos": 217760342768100.0, "trial_name": null, "trial_params": null}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|