bias: true | |
block_size: 2048 | |
gelu_approximate: none | |
head_size: 64 | |
hf_config: | |
name: pythia-160m | |
org: EleutherAI | |
intermediate_size: 3072 | |
lm_head_bias: false | |
mlp_class_name: GptNeoxMLP | |
n_embd: 768 | |
n_expert: 0 | |
n_expert_per_token: 0 | |
n_head: 12 | |
n_layer: 12 | |
n_query_groups: 12 | |
name: pythia-160m | |
norm_class_name: LayerNorm | |
norm_eps: 1.0e-05 | |
padded_vocab_size: 50304 | |
padding_multiple: 128 | |
parallel_residual: true | |
rope_base: 10000 | |
rope_condense_ratio: 1 | |
rotary_percentage: 0.25 | |
scale_embeddings: false | |
shared_attention_norm: false | |
vocab_size: 50254 | |