|
{ |
|
"data_train": [ |
|
"data/nkjp-nested-ttt/train.txt", |
|
"data/nkjp-nested-ttt/valid.txt", |
|
"data/nkjp-nested-ttt/test.txt" |
|
], |
|
"data_tune": [ |
|
"data/nkjp-nested-ttt/valid.txt" |
|
], |
|
"data_test": [ |
|
"data/nkjp-nested-ttt/test.txt" |
|
], |
|
"pretrained_path": "allegro/herbert-large-cased", |
|
"output_dir": "../poldeepner2_models/dev/nkjp_full/model_nkjp_full_union_256_101_v_003", |
|
"cache_dir": "", |
|
"device": "cuda:0", |
|
"max_seq_length": 256, |
|
"do_eval": false, |
|
"do_lower_case": false, |
|
"train_batch_size": 16, |
|
"eval_batch_size": 16, |
|
"learning_rate": 5e-06, |
|
"num_train_epochs": 20, |
|
"warmup_proportion": 0.0, |
|
"weight_decay": 0.01, |
|
"adam_epsilon": 1e-08, |
|
"max_grad_norm": 1.0, |
|
"seed": 101, |
|
"gradient_accumulation_steps": 1, |
|
"fp16": false, |
|
"fp16_opt_level": "O1", |
|
"loss_scale": 0, |
|
"dropout": 0.2, |
|
"freeze_model": false, |
|
"epoch_save_model": true, |
|
"sequence_generator": "union", |
|
"sequence_generator_for_eval": "context-window", |
|
"training_mix": false, |
|
"wandb": "nkjp_full", |
|
"hidden_size": 1024 |
|
} |