dmanningcoe's picture
Upload tinystories_lambda2000/experiment_config.yaml with huggingface_hub
051b5f6 verified
Raw
History Blame Contribute Delete
1.56 kB
base_save_dir: .checkpoints
cache_dir: .cache
crosscoder:
dec_init_norm: 0.1
ft_init_checkpt_epoch: null
ft_init_checkpt_folder: null
hidden_dim: 1536
k: 20
data:
activations_harvester:
harvesting_batch_size: 16
inference_dtype: float32
llms:
- lora_name: mars-jason-25/tiny-stories-33M-TSdata-ft1
name: roneneldan/TinyStories-Instruct-33M
revision: null
activations_shuffle_buffer_size: 1024
n_batches_for_mean_estimate: 100
n_batches_for_norm_estimate: 100
sequence_iterator:
classname: SleeperTokenSequenceLoader
kwargs:
include_sleeper_data: false
sequence_length: 128
shuffle_buffer_size: 1024
experiment_name: lambda_n2_000_S_2025-04-18_16-57-37
hookpoints:
- blocks.0.hook_resid_pre
- blocks.0.ln1.hook_normalized
- blocks.0.hook_resid_mid
- blocks.0.ln2.hook_normalized
- blocks.1.hook_resid_pre
- blocks.1.ln1.hook_normalized
- blocks.1.hook_resid_mid
- blocks.1.ln2.hook_normalized
- blocks.2.hook_resid_pre
- blocks.2.ln1.hook_normalized
- blocks.2.hook_resid_mid
- blocks.2.ln2.hook_normalized
- blocks.3.hook_resid_pre
- blocks.3.ln1.hook_normalized
- blocks.3.hook_resid_mid
- blocks.3.ln2.hook_normalized
- blocks.3.hook_resid_post
seed: 42
train:
batch_size: 256
beta_n: 1.0
epochs: null
lam_n: 2000.0
log_every_n_steps: 100
num_steps: 50000
num_steps_per_epoch: null
optimizer:
initial_learning_rate: 0.0001
last_pct_of_steps: 0.25
warmup_pct: 0.05
save_every_n_steps: 49999
wandb:
entity: dmitry2-uiuc
project: sleeper-model-diffing