|
{ |
|
"model_name": "1l-gelu", |
|
"model_class_name": "HookedTransformer", |
|
"hook_name": "blocks.0.hook_resid_post", |
|
"hook_eval": "NOT_IN_USE", |
|
"hook_layer": 0, |
|
"hook_head_index": null, |
|
"dataset_path": "ghidav/arithmetics_reversed", |
|
"dataset_trust_remote_code": true, |
|
"streaming": true, |
|
"is_dataset_tokenized": true, |
|
"context_size": 18, |
|
"use_cached_activations": false, |
|
"cached_activations_path": null, |
|
"architecture": "jumprelu", |
|
"d_in": 512, |
|
"d_sae": 8192, |
|
"b_dec_init_method": "zeros", |
|
"expansion_factor": 16, |
|
"activation_fn": "relu", |
|
"activation_fn_kwargs": {}, |
|
"normalize_sae_decoder": true, |
|
"noise_scale": 0.0, |
|
"from_pretrained_path": null, |
|
"apply_b_dec_to_input": false, |
|
"decoder_orthogonal_init": false, |
|
"decoder_heuristic_init": false, |
|
"init_encoder_as_decoder_transpose": false, |
|
"n_batches_in_buffer": 128, |
|
"training_tokens": 50000000, |
|
"finetuning_tokens": 0, |
|
"store_batch_size_prompts": 8, |
|
"train_batch_size_tokens": 1024, |
|
"normalize_activations": "none", |
|
"device": "cuda", |
|
"act_store_device": "cuda", |
|
"seed": 42, |
|
"dtype": "float32", |
|
"prepend_bos": false, |
|
"autocast": false, |
|
"autocast_lm": false, |
|
"compile_llm": false, |
|
"llm_compilation_mode": null, |
|
"compile_sae": false, |
|
"sae_compilation_mode": null, |
|
"adam_beta1": 0, |
|
"adam_beta2": 0.999, |
|
"mse_loss_normalization": null, |
|
"l1_coefficient": 2.0, |
|
"lp_norm": 1, |
|
"scale_sparsity_penalty_by_decoder_norm": false, |
|
"l1_warm_up_steps": 2441, |
|
"lr": 0.0005, |
|
"lr_scheduler_name": "constant", |
|
"lr_warm_up_steps": 0, |
|
"lr_end": 5e-05, |
|
"lr_decay_steps": 9765, |
|
"n_restart_cycles": 1, |
|
"finetuning_method": null, |
|
"use_ghost_grads": false, |
|
"feature_sampling_window": 2000, |
|
"dead_feature_window": 1000, |
|
"dead_feature_threshold": 1e-06, |
|
"n_eval_batches": 10, |
|
"eval_batch_size_prompts": null, |
|
"log_to_wandb": true, |
|
"log_activations_store_to_wandb": false, |
|
"log_optimizer_state_to_wandb": false, |
|
"wandb_project": "sae-feature-circuits", |
|
"wandb_id": null, |
|
"run_name": "L0_hook_resid_post_L1_2_0_rev", |
|
"wandb_entity": null, |
|
"wandb_log_frequency": 30, |
|
"eval_every_n_wandb_logs": 100, |
|
"resume": false, |
|
"n_checkpoints": 0, |
|
"checkpoint_path": "checkpoints/220duyfn", |
|
"verbose": false, |
|
"model_kwargs": {}, |
|
"model_from_pretrained_kwargs": { |
|
"center_writing_weights": false |
|
}, |
|
"sae_lens_version": "3.20.5", |
|
"sae_lens_training_version": "3.20.5", |
|
"tokens_per_buffer": 2359296 |
|
} |