Davide Ghilardi
Renaming
0dda12c
{
"model_name": "1l-gelu",
"model_class_name": "HookedTransformer",
"hook_name": "blocks.0.hook_resid_post",
"hook_eval": "NOT_IN_USE",
"hook_layer": 0,
"hook_head_index": null,
"dataset_path": "ghidav/arithmetics_reversed",
"dataset_trust_remote_code": true,
"streaming": true,
"is_dataset_tokenized": true,
"context_size": 18,
"use_cached_activations": false,
"cached_activations_path": null,
"architecture": "jumprelu",
"d_in": 512,
"d_sae": 8192,
"b_dec_init_method": "zeros",
"expansion_factor": 16,
"activation_fn": "relu",
"activation_fn_kwargs": {},
"normalize_sae_decoder": true,
"noise_scale": 0.0,
"from_pretrained_path": null,
"apply_b_dec_to_input": false,
"decoder_orthogonal_init": false,
"decoder_heuristic_init": false,
"init_encoder_as_decoder_transpose": false,
"n_batches_in_buffer": 128,
"training_tokens": 50000000,
"finetuning_tokens": 0,
"store_batch_size_prompts": 8,
"train_batch_size_tokens": 1024,
"normalize_activations": "none",
"device": "cuda",
"act_store_device": "cuda",
"seed": 42,
"dtype": "float32",
"prepend_bos": false,
"autocast": false,
"autocast_lm": false,
"compile_llm": false,
"llm_compilation_mode": null,
"compile_sae": false,
"sae_compilation_mode": null,
"adam_beta1": 0,
"adam_beta2": 0.999,
"mse_loss_normalization": null,
"l1_coefficient": 2.0,
"lp_norm": 1,
"scale_sparsity_penalty_by_decoder_norm": false,
"l1_warm_up_steps": 2441,
"lr": 0.0005,
"lr_scheduler_name": "constant",
"lr_warm_up_steps": 0,
"lr_end": 5e-05,
"lr_decay_steps": 9765,
"n_restart_cycles": 1,
"finetuning_method": null,
"use_ghost_grads": false,
"feature_sampling_window": 2000,
"dead_feature_window": 1000,
"dead_feature_threshold": 1e-06,
"n_eval_batches": 10,
"eval_batch_size_prompts": null,
"log_to_wandb": true,
"log_activations_store_to_wandb": false,
"log_optimizer_state_to_wandb": false,
"wandb_project": "sae-feature-circuits",
"wandb_id": null,
"run_name": "L0_hook_resid_post_L1_2_0_rev",
"wandb_entity": null,
"wandb_log_frequency": 30,
"eval_every_n_wandb_logs": 100,
"resume": false,
"n_checkpoints": 0,
"checkpoint_path": "checkpoints/220duyfn",
"verbose": false,
"model_kwargs": {},
"model_from_pretrained_kwargs": {
"center_writing_weights": false
},
"sae_lens_version": "3.20.5",
"sae_lens_training_version": "3.20.5",
"tokens_per_buffer": 2359296
}