| base_save_dir: .checkpoints |
| cache_dir: .cache |
| crosscoder: |
| dec_init_norm: 0.1 |
| ft_init_checkpt_epoch: null |
| ft_init_checkpt_folder: null |
| hidden_dim: 1536 |
| k: 20 |
| data: |
| activations_harvester: |
| harvesting_batch_size: 16 |
| inference_dtype: float32 |
| llms: |
| - lora_name: mars-jason-25/tiny-stories-33M-TSdata-ft1 |
| name: roneneldan/TinyStories-Instruct-33M |
| revision: null |
| activations_shuffle_buffer_size: 1024 |
| n_batches_for_mean_estimate: 100 |
| n_batches_for_norm_estimate: 100 |
| sequence_iterator: |
| classname: SleeperTokenSequenceLoader |
| kwargs: |
| include_sleeper_data: false |
| sequence_length: 128 |
| shuffle_buffer_size: 1024 |
| experiment_name: lambda_n2_000_S_2025-04-18_16-57-37 |
| hookpoints: |
| - blocks.0.hook_resid_pre |
| - blocks.0.ln1.hook_normalized |
| - blocks.0.hook_resid_mid |
| - blocks.0.ln2.hook_normalized |
| - blocks.1.hook_resid_pre |
| - blocks.1.ln1.hook_normalized |
| - blocks.1.hook_resid_mid |
| - blocks.1.ln2.hook_normalized |
| - blocks.2.hook_resid_pre |
| - blocks.2.ln1.hook_normalized |
| - blocks.2.hook_resid_mid |
| - blocks.2.ln2.hook_normalized |
| - blocks.3.hook_resid_pre |
| - blocks.3.ln1.hook_normalized |
| - blocks.3.hook_resid_mid |
| - blocks.3.ln2.hook_normalized |
| - blocks.3.hook_resid_post |
| seed: 42 |
| train: |
| batch_size: 256 |
| beta_n: 1.0 |
| epochs: null |
| lam_n: 2000.0 |
| log_every_n_steps: 100 |
| num_steps: 50000 |
| num_steps_per_epoch: null |
| optimizer: |
| initial_learning_rate: 0.0001 |
| last_pct_of_steps: 0.25 |
| warmup_pct: 0.05 |
| save_every_n_steps: 49999 |
| wandb: |
| entity: dmitry2-uiuc |
| project: sleeper-model-diffing |
|
|