|
{ |
|
"_name_or_path": "checkpoints/microsoft/phi-1_5", |
|
"anyprec": { |
|
"arch_config": { |
|
"layers_name": "layers", |
|
"model_name": "model", |
|
"module_names": [ |
|
"self_attn.q_proj", |
|
"self_attn.k_proj", |
|
"self_attn.v_proj", |
|
"self_attn.dense", |
|
"mlp.fc1", |
|
"mlp.fc2" |
|
] |
|
}, |
|
"group_count": 1, |
|
"parent_precision": 4, |
|
"seed_precision": 2, |
|
"sparse_numvals": { |
|
"model.layers.0.mlp.fc1": 1239127, |
|
"model.layers.0.mlp.fc2": 1195670, |
|
"model.layers.0.self_attn.dense": 181030, |
|
"model.layers.0.self_attn.k_proj": 308573, |
|
"model.layers.0.self_attn.q_proj": 276310, |
|
"model.layers.0.self_attn.v_proj": 203605, |
|
"model.layers.1.mlp.fc1": 513800, |
|
"model.layers.1.mlp.fc2": 734843, |
|
"model.layers.1.self_attn.dense": 168023, |
|
"model.layers.1.self_attn.k_proj": 216089, |
|
"model.layers.1.self_attn.q_proj": 210873, |
|
"model.layers.1.self_attn.v_proj": 192721, |
|
"model.layers.10.mlp.fc1": 679814, |
|
"model.layers.10.mlp.fc2": 721214, |
|
"model.layers.10.self_attn.dense": 173300, |
|
"model.layers.10.self_attn.k_proj": 212281, |
|
"model.layers.10.self_attn.q_proj": 203800, |
|
"model.layers.10.self_attn.v_proj": 192613, |
|
"model.layers.11.mlp.fc1": 679739, |
|
"model.layers.11.mlp.fc2": 710094, |
|
"model.layers.11.self_attn.dense": 169678, |
|
"model.layers.11.self_attn.k_proj": 206617, |
|
"model.layers.11.self_attn.q_proj": 198897, |
|
"model.layers.11.self_attn.v_proj": 186403, |
|
"model.layers.12.mlp.fc1": 666839, |
|
"model.layers.12.mlp.fc2": 720708, |
|
"model.layers.12.self_attn.dense": 168552, |
|
"model.layers.12.self_attn.k_proj": 211803, |
|
"model.layers.12.self_attn.q_proj": 204967, |
|
"model.layers.12.self_attn.v_proj": 190021, |
|
"model.layers.13.mlp.fc1": 656663, |
|
"model.layers.13.mlp.fc2": 706611, |
|
"model.layers.13.self_attn.dense": 169732, |
|
"model.layers.13.self_attn.k_proj": 212001, |
|
"model.layers.13.self_attn.q_proj": 201431, |
|
"model.layers.13.self_attn.v_proj": 186583, |
|
"model.layers.14.mlp.fc1": 649519, |
|
"model.layers.14.mlp.fc2": 750919, |
|
"model.layers.14.self_attn.dense": 176740, |
|
"model.layers.14.self_attn.k_proj": 208548, |
|
"model.layers.14.self_attn.q_proj": 204873, |
|
"model.layers.14.self_attn.v_proj": 190063, |
|
"model.layers.15.mlp.fc1": 636168, |
|
"model.layers.15.mlp.fc2": 731257, |
|
"model.layers.15.self_attn.dense": 168984, |
|
"model.layers.15.self_attn.k_proj": 209159, |
|
"model.layers.15.self_attn.q_proj": 224143, |
|
"model.layers.15.self_attn.v_proj": 183076, |
|
"model.layers.16.mlp.fc1": 626682, |
|
"model.layers.16.mlp.fc2": 767523, |
|
"model.layers.16.self_attn.dense": 167080, |
|
"model.layers.16.self_attn.k_proj": 208334, |
|
"model.layers.16.self_attn.q_proj": 211502, |
|
"model.layers.16.self_attn.v_proj": 177715, |
|
"model.layers.17.mlp.fc1": 610333, |
|
"model.layers.17.mlp.fc2": 744465, |
|
"model.layers.17.self_attn.dense": 165331, |
|
"model.layers.17.self_attn.k_proj": 198333, |
|
"model.layers.17.self_attn.q_proj": 196969, |
|
"model.layers.17.self_attn.v_proj": 176784, |
|
"model.layers.18.mlp.fc1": 603584, |
|
"model.layers.18.mlp.fc2": 737950, |
|
"model.layers.18.self_attn.dense": 181416, |
|
"model.layers.18.self_attn.k_proj": 207074, |
|
"model.layers.18.self_attn.q_proj": 234332, |
|
"model.layers.18.self_attn.v_proj": 191671, |
|
"model.layers.19.mlp.fc1": 593992, |
|
"model.layers.19.mlp.fc2": 706175, |
|
"model.layers.19.self_attn.dense": 179214, |
|
"model.layers.19.self_attn.k_proj": 208480, |
|
"model.layers.19.self_attn.q_proj": 232108, |
|
"model.layers.19.self_attn.v_proj": 188840, |
|
"model.layers.2.mlp.fc1": 592377, |
|
"model.layers.2.mlp.fc2": 724600, |
|
"model.layers.2.self_attn.dense": 163369, |
|
"model.layers.2.self_attn.k_proj": 216145, |
|
"model.layers.2.self_attn.q_proj": 209454, |
|
"model.layers.2.self_attn.v_proj": 191623, |
|
"model.layers.20.mlp.fc1": 586654, |
|
"model.layers.20.mlp.fc2": 696954, |
|
"model.layers.20.self_attn.dense": 168154, |
|
"model.layers.20.self_attn.k_proj": 205374, |
|
"model.layers.20.self_attn.q_proj": 228084, |
|
"model.layers.20.self_attn.v_proj": 175088, |
|
"model.layers.21.mlp.fc1": 588068, |
|
"model.layers.21.mlp.fc2": 706060, |
|
"model.layers.21.self_attn.dense": 169767, |
|
"model.layers.21.self_attn.k_proj": 199803, |
|
"model.layers.21.self_attn.q_proj": 229813, |
|
"model.layers.21.self_attn.v_proj": 179190, |
|
"model.layers.22.mlp.fc1": 598655, |
|
"model.layers.22.mlp.fc2": 789250, |
|
"model.layers.22.self_attn.dense": 167244, |
|
"model.layers.22.self_attn.k_proj": 193913, |
|
"model.layers.22.self_attn.q_proj": 275941, |
|
"model.layers.22.self_attn.v_proj": 173336, |
|
"model.layers.23.mlp.fc1": 680534, |
|
"model.layers.23.mlp.fc2": 1070400, |
|
"model.layers.23.self_attn.dense": 210736, |
|
"model.layers.23.self_attn.k_proj": 216070, |
|
"model.layers.23.self_attn.q_proj": 388123, |
|
"model.layers.23.self_attn.v_proj": 221935, |
|
"model.layers.3.mlp.fc1": 635345, |
|
"model.layers.3.mlp.fc2": 728654, |
|
"model.layers.3.self_attn.dense": 177298, |
|
"model.layers.3.self_attn.k_proj": 231769, |
|
"model.layers.3.self_attn.q_proj": 223554, |
|
"model.layers.3.self_attn.v_proj": 205803, |
|
"model.layers.4.mlp.fc1": 684553, |
|
"model.layers.4.mlp.fc2": 733946, |
|
"model.layers.4.self_attn.dense": 172237, |
|
"model.layers.4.self_attn.k_proj": 219073, |
|
"model.layers.4.self_attn.q_proj": 211940, |
|
"model.layers.4.self_attn.v_proj": 203401, |
|
"model.layers.5.mlp.fc1": 675444, |
|
"model.layers.5.mlp.fc2": 746508, |
|
"model.layers.5.self_attn.dense": 170531, |
|
"model.layers.5.self_attn.k_proj": 238923, |
|
"model.layers.5.self_attn.q_proj": 234678, |
|
"model.layers.5.self_attn.v_proj": 199692, |
|
"model.layers.6.mlp.fc1": 680226, |
|
"model.layers.6.mlp.fc2": 732111, |
|
"model.layers.6.self_attn.dense": 182410, |
|
"model.layers.6.self_attn.k_proj": 215481, |
|
"model.layers.6.self_attn.q_proj": 211152, |
|
"model.layers.6.self_attn.v_proj": 203868, |
|
"model.layers.7.mlp.fc1": 685126, |
|
"model.layers.7.mlp.fc2": 727309, |
|
"model.layers.7.self_attn.dense": 167738, |
|
"model.layers.7.self_attn.k_proj": 222298, |
|
"model.layers.7.self_attn.q_proj": 216189, |
|
"model.layers.7.self_attn.v_proj": 191654, |
|
"model.layers.8.mlp.fc1": 692316, |
|
"model.layers.8.mlp.fc2": 715828, |
|
"model.layers.8.self_attn.dense": 176375, |
|
"model.layers.8.self_attn.k_proj": 222524, |
|
"model.layers.8.self_attn.q_proj": 215045, |
|
"model.layers.8.self_attn.v_proj": 198133, |
|
"model.layers.9.mlp.fc1": 688557, |
|
"model.layers.9.mlp.fc2": 718275, |
|
"model.layers.9.self_attn.dense": 166542, |
|
"model.layers.9.self_attn.k_proj": 220268, |
|
"model.layers.9.self_attn.q_proj": 213681, |
|
"model.layers.9.self_attn.v_proj": 191284 |
|
} |
|
}, |
|
"architectures": [ |
|
"PhiForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"auto_map": { |
|
"AutoConfig": "configuration_phi.PhiConfig", |
|
"AutoModelForCausalLM": "modeling_phi.PhiForCausalLM" |
|
}, |
|
"bos_token_id": null, |
|
"embd_pdrop": 0.0, |
|
"eos_token_id": null, |
|
"hidden_act": "gelu_new", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 8192, |
|
"layer_norm_eps": 1e-05, |
|
"max_position_embeddings": 2048, |
|
"model_type": "phi", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 24, |
|
"num_key_value_heads": 32, |
|
"partial_rotary_factor": 0.5, |
|
"qk_layernorm": false, |
|
"resid_pdrop": 0.0, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.39.3", |
|
"use_cache": true, |
|
"vocab_size": 51200 |
|
} |
|
|