sabrieyuboglu commited on
Commit
acd73c2
·
verified ·
1 Parent(s): 1477d8c

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +87 -0
config.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _config_type: haystacks.embeddings.train_batch.TrainConfig
2
+ accelerator: gpu
3
+ accumulate_grad_batches: 1
4
+ adam_beta1: 0.9
5
+ adam_beta2: 0.95
6
+ base_save_dir: /home/sabri/code/haystacks/checkpoints
7
+ check_val_every_n_epoch: null
8
+ ckpt_path: null
9
+ devices: 8
10
+ dtype: bfloat16
11
+ embedding_module:
12
+ _config_type: haystacks.embeddings.modeling.TokenEmbeddingModuleConfig
13
+ batch_attention_layers: []
14
+ embedding_dim: 8192
15
+ kwargs: {}
16
+ target:
17
+ _is_type: true
18
+ name: haystacks.embeddings.modeling.TokenEmbeddingModule
19
+ enable_checkpointing: false
20
+ foreach: null
21
+ gradient_clip_val: null
22
+ launch_id: null
23
+ learnable_bias: true
24
+ learnable_temp: true
25
+ limit_train_batches: 1.0
26
+ limit_val_batches: 1.0
27
+ load_hub: true
28
+ log_every_n_steps: 4
29
+ log_grad_norms: false
30
+ loss_comparison: matched
31
+ loss_token_idxs:
32
+ - 64
33
+ - 128
34
+ - 256
35
+ - 512
36
+ - 1024
37
+ lr: 0.0001
38
+ lr_scheduler: null
39
+ manual_save_epochs: null
40
+ manual_save_steps: 8192
41
+ max_epochs: 512
42
+ max_hidden_layers: null
43
+ max_problems: null
44
+ max_seq_len: 1024
45
+ max_steps: -1
46
+ model_name: meta-llama/Llama-3.2-1B-Instruct
47
+ name: no_batch-attention-lr0.0001-bs32-d8192-new
48
+ num_sanity_val_steps: null
49
+ num_workers: 0
50
+ objective: cross_entropy
51
+ output_dir: null
52
+ overfit_batches: 0.0
53
+ precision: bf16
54
+ reload_dataloaders_every_n_epochs: 0
55
+ run_dir: null
56
+ run_id: null
57
+ samples_per_batch: 32
58
+ save_intermediates: true
59
+ script_id: null
60
+ seed: 42
61
+ train_batch_size: 1
62
+ train_data_path: ScalingIntelligence/math-train-l3.2-3Bi-meta-n128
63
+ use_wandb: false
64
+ val_batch_size: 1
65
+ val_check_interval: 64
66
+ val_data_path: ScalingIntelligence/math-test-l3.2-3Bi-meta-n128
67
+ val_rollout_data_path: null
68
+ val_samples_per_batch: 32
69
+ validate_before_train: true
70
+ wandb:
71
+ _config_type: haystacks.embeddings.train_batch.WandbLoggerConfig
72
+ group: ''
73
+ id: null
74
+ job_type: train
75
+ kwargs: {}
76
+ log_model: false
77
+ mode: online
78
+ name: null
79
+ prefix: ''
80
+ project: haystacks
81
+ save_dir: .
82
+ tags: []
83
+ target:
84
+ _is_type: true
85
+ name: pytorch_lightning.loggers.wandb.WandbLogger
86
+ weight_decay: 0.1
87
+ weights_only: true