Upload tinystories_lambda2000/experiment_config.yaml with huggingface_hub

051b5f6 verified 21 days ago

1.56 kB

	base_save_dir: .checkpoints
	cache_dir: .cache
	crosscoder:
	dec_init_norm: 0.1
	ft_init_checkpt_epoch: null
	ft_init_checkpt_folder: null
	hidden_dim: 1536
	k: 20
	data:
	activations_harvester:
	harvesting_batch_size: 16
	inference_dtype: float32
	llms:
	- lora_name: mars-jason-25/tiny-stories-33M-TSdata-ft1
	name: roneneldan/TinyStories-Instruct-33M
	revision: null
	activations_shuffle_buffer_size: 1024
	n_batches_for_mean_estimate: 100
	n_batches_for_norm_estimate: 100
	sequence_iterator:
	classname: SleeperTokenSequenceLoader
	kwargs:
	include_sleeper_data: false
	sequence_length: 128
	shuffle_buffer_size: 1024
	experiment_name: lambda_n2_000_S_2025-04-18_16-57-37
	hookpoints:
	- blocks.0.hook_resid_pre
	- blocks.0.ln1.hook_normalized
	- blocks.0.hook_resid_mid
	- blocks.0.ln2.hook_normalized
	- blocks.1.hook_resid_pre
	- blocks.1.ln1.hook_normalized
	- blocks.1.hook_resid_mid
	- blocks.1.ln2.hook_normalized
	- blocks.2.hook_resid_pre
	- blocks.2.ln1.hook_normalized
	- blocks.2.hook_resid_mid
	- blocks.2.ln2.hook_normalized
	- blocks.3.hook_resid_pre
	- blocks.3.ln1.hook_normalized
	- blocks.3.hook_resid_mid
	- blocks.3.ln2.hook_normalized
	- blocks.3.hook_resid_post
	seed: 42
	train:
	batch_size: 256
	beta_n: 1.0
	epochs: null
	lam_n: 2000.0
	log_every_n_steps: 100
	num_steps: 50000
	num_steps_per_epoch: null
	optimizer:
	initial_learning_rate: 0.0001
	last_pct_of_steps: 0.25
	warmup_pct: 0.05
	save_every_n_steps: 49999
	wandb:
	entity: dmitry2-uiuc
	project: sleeper-model-diffing