File size: 1,328 Bytes
7135151 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
act_frequency_n_tokens: 500000
batch_size: 8
collect_act_frequency_every_n_samples: 40000
collect_output_metrics_every_n_samples: 0
cooldown_samples: 0
effective_batch_size: 16
eval_data:
column_name: input_ids
dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
is_tokenized: true
n_ctx: 1024
seed: 0
split: train
streaming: true
tokenizer_name: gpt2
eval_every_n_samples: 40000
eval_n_samples: 500
log_every_n_grad_steps: 20
loss:
in_to_orig: null
logits_kl: null
out_to_in:
coeff: 1.0
out_to_orig: null
sparsity:
coeff: 4.0
p_norm: 1.0
lr: 0.0005
lr_schedule: cosine
max_grad_norm: 10.0
min_lr_factor: 0.1
n_samples: 400000
saes:
dict_size_to_input_ratio: 60.0
k: null
pretrained_sae_paths: null
retrain_saes: false
sae_positions:
- blocks.6.hook_resid_pre
type_of_sparsifier: sae
save_dir: /mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
save_every_n_samples: null
seed: 0
tlens_model_name: gpt2-small
tlens_model_path: null
train_data:
column_name: input_ids
dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
is_tokenized: true
n_ctx: 1024
seed: 0
split: train
streaming: true
tokenizer_name: gpt2
wandb_project: gpt2-layerwise_play
wandb_run_name: null
wandb_run_name_prefix: ''
warmup_samples: 20000
|