base_save_dir: .checkpoints cache_dir: .cache crosscoder: dec_init_norm: 0.1 ft_init_checkpt_epoch: null ft_init_checkpt_folder: null hidden_dim: 1536 k: 20 data: activations_harvester: harvesting_batch_size: 16 inference_dtype: float32 llms: - lora_name: mars-jason-25/tiny-stories-33M-TSdata-ft1 name: roneneldan/TinyStories-Instruct-33M revision: null activations_shuffle_buffer_size: 1024 n_batches_for_mean_estimate: 100 n_batches_for_norm_estimate: 100 sequence_iterator: classname: SleeperTokenSequenceLoader kwargs: include_sleeper_data: false sequence_length: 128 shuffle_buffer_size: 1024 experiment_name: lambda_n2_000_S_2025-04-18_16-57-37 hookpoints: - blocks.0.hook_resid_pre - blocks.0.ln1.hook_normalized - blocks.0.hook_resid_mid - blocks.0.ln2.hook_normalized - blocks.1.hook_resid_pre - blocks.1.ln1.hook_normalized - blocks.1.hook_resid_mid - blocks.1.ln2.hook_normalized - blocks.2.hook_resid_pre - blocks.2.ln1.hook_normalized - blocks.2.hook_resid_mid - blocks.2.ln2.hook_normalized - blocks.3.hook_resid_pre - blocks.3.ln1.hook_normalized - blocks.3.hook_resid_mid - blocks.3.ln2.hook_normalized - blocks.3.hook_resid_post seed: 42 train: batch_size: 256 beta_n: 1.0 epochs: null lam_n: 2000.0 log_every_n_steps: 100 num_steps: 50000 num_steps_per_epoch: null optimizer: initial_learning_rate: 0.0001 last_pct_of_steps: 0.25 warmup_pct: 0.05 save_every_n_steps: 49999 wandb: entity: dmitry2-uiuc project: sleeper-model-diffing