{ "_name_or_path": "checkpoints/microsoft/phi-1_5", "anyprec": { "arch_config": { "layers_name": "layers", "model_name": "model", "module_names": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.dense", "mlp.fc1", "mlp.fc2" ] }, "group_count": 1, "parent_precision": 4, "seed_precision": 2, "sparse_numvals": { "model.layers.0.mlp.fc1": 1239127, "model.layers.0.mlp.fc2": 1195670, "model.layers.0.self_attn.dense": 181030, "model.layers.0.self_attn.k_proj": 308573, "model.layers.0.self_attn.q_proj": 276310, "model.layers.0.self_attn.v_proj": 203605, "model.layers.1.mlp.fc1": 513800, "model.layers.1.mlp.fc2": 734843, "model.layers.1.self_attn.dense": 168023, "model.layers.1.self_attn.k_proj": 216089, "model.layers.1.self_attn.q_proj": 210873, "model.layers.1.self_attn.v_proj": 192721, "model.layers.10.mlp.fc1": 679814, "model.layers.10.mlp.fc2": 721214, "model.layers.10.self_attn.dense": 173300, "model.layers.10.self_attn.k_proj": 212281, "model.layers.10.self_attn.q_proj": 203800, "model.layers.10.self_attn.v_proj": 192613, "model.layers.11.mlp.fc1": 679739, "model.layers.11.mlp.fc2": 710094, "model.layers.11.self_attn.dense": 169678, "model.layers.11.self_attn.k_proj": 206617, "model.layers.11.self_attn.q_proj": 198897, "model.layers.11.self_attn.v_proj": 186403, "model.layers.12.mlp.fc1": 666839, "model.layers.12.mlp.fc2": 720708, "model.layers.12.self_attn.dense": 168552, "model.layers.12.self_attn.k_proj": 211803, "model.layers.12.self_attn.q_proj": 204967, "model.layers.12.self_attn.v_proj": 190021, "model.layers.13.mlp.fc1": 656663, "model.layers.13.mlp.fc2": 706611, "model.layers.13.self_attn.dense": 169732, "model.layers.13.self_attn.k_proj": 212001, "model.layers.13.self_attn.q_proj": 201431, "model.layers.13.self_attn.v_proj": 186583, "model.layers.14.mlp.fc1": 649519, "model.layers.14.mlp.fc2": 750919, "model.layers.14.self_attn.dense": 176740, "model.layers.14.self_attn.k_proj": 208548, "model.layers.14.self_attn.q_proj": 204873, "model.layers.14.self_attn.v_proj": 190063, "model.layers.15.mlp.fc1": 636168, "model.layers.15.mlp.fc2": 731257, "model.layers.15.self_attn.dense": 168984, "model.layers.15.self_attn.k_proj": 209159, "model.layers.15.self_attn.q_proj": 224143, "model.layers.15.self_attn.v_proj": 183076, "model.layers.16.mlp.fc1": 626682, "model.layers.16.mlp.fc2": 767523, "model.layers.16.self_attn.dense": 167080, "model.layers.16.self_attn.k_proj": 208334, "model.layers.16.self_attn.q_proj": 211502, "model.layers.16.self_attn.v_proj": 177715, "model.layers.17.mlp.fc1": 610333, "model.layers.17.mlp.fc2": 744465, "model.layers.17.self_attn.dense": 165331, "model.layers.17.self_attn.k_proj": 198333, "model.layers.17.self_attn.q_proj": 196969, "model.layers.17.self_attn.v_proj": 176784, "model.layers.18.mlp.fc1": 603584, "model.layers.18.mlp.fc2": 737950, "model.layers.18.self_attn.dense": 181416, "model.layers.18.self_attn.k_proj": 207074, "model.layers.18.self_attn.q_proj": 234332, "model.layers.18.self_attn.v_proj": 191671, "model.layers.19.mlp.fc1": 593992, "model.layers.19.mlp.fc2": 706175, "model.layers.19.self_attn.dense": 179214, "model.layers.19.self_attn.k_proj": 208480, "model.layers.19.self_attn.q_proj": 232108, "model.layers.19.self_attn.v_proj": 188840, "model.layers.2.mlp.fc1": 592377, "model.layers.2.mlp.fc2": 724600, "model.layers.2.self_attn.dense": 163369, "model.layers.2.self_attn.k_proj": 216145, "model.layers.2.self_attn.q_proj": 209454, "model.layers.2.self_attn.v_proj": 191623, "model.layers.20.mlp.fc1": 586654, "model.layers.20.mlp.fc2": 696954, "model.layers.20.self_attn.dense": 168154, "model.layers.20.self_attn.k_proj": 205374, "model.layers.20.self_attn.q_proj": 228084, "model.layers.20.self_attn.v_proj": 175088, "model.layers.21.mlp.fc1": 588068, "model.layers.21.mlp.fc2": 706060, "model.layers.21.self_attn.dense": 169767, "model.layers.21.self_attn.k_proj": 199803, "model.layers.21.self_attn.q_proj": 229813, "model.layers.21.self_attn.v_proj": 179190, "model.layers.22.mlp.fc1": 598655, "model.layers.22.mlp.fc2": 789250, "model.layers.22.self_attn.dense": 167244, "model.layers.22.self_attn.k_proj": 193913, "model.layers.22.self_attn.q_proj": 275941, "model.layers.22.self_attn.v_proj": 173336, "model.layers.23.mlp.fc1": 680534, "model.layers.23.mlp.fc2": 1070400, "model.layers.23.self_attn.dense": 210736, "model.layers.23.self_attn.k_proj": 216070, "model.layers.23.self_attn.q_proj": 388123, "model.layers.23.self_attn.v_proj": 221935, "model.layers.3.mlp.fc1": 635345, "model.layers.3.mlp.fc2": 728654, "model.layers.3.self_attn.dense": 177298, "model.layers.3.self_attn.k_proj": 231769, "model.layers.3.self_attn.q_proj": 223554, "model.layers.3.self_attn.v_proj": 205803, "model.layers.4.mlp.fc1": 684553, "model.layers.4.mlp.fc2": 733946, "model.layers.4.self_attn.dense": 172237, "model.layers.4.self_attn.k_proj": 219073, "model.layers.4.self_attn.q_proj": 211940, "model.layers.4.self_attn.v_proj": 203401, "model.layers.5.mlp.fc1": 675444, "model.layers.5.mlp.fc2": 746508, "model.layers.5.self_attn.dense": 170531, "model.layers.5.self_attn.k_proj": 238923, "model.layers.5.self_attn.q_proj": 234678, "model.layers.5.self_attn.v_proj": 199692, "model.layers.6.mlp.fc1": 680226, "model.layers.6.mlp.fc2": 732111, "model.layers.6.self_attn.dense": 182410, "model.layers.6.self_attn.k_proj": 215481, "model.layers.6.self_attn.q_proj": 211152, "model.layers.6.self_attn.v_proj": 203868, "model.layers.7.mlp.fc1": 685126, "model.layers.7.mlp.fc2": 727309, "model.layers.7.self_attn.dense": 167738, "model.layers.7.self_attn.k_proj": 222298, "model.layers.7.self_attn.q_proj": 216189, "model.layers.7.self_attn.v_proj": 191654, "model.layers.8.mlp.fc1": 692316, "model.layers.8.mlp.fc2": 715828, "model.layers.8.self_attn.dense": 176375, "model.layers.8.self_attn.k_proj": 222524, "model.layers.8.self_attn.q_proj": 215045, "model.layers.8.self_attn.v_proj": 198133, "model.layers.9.mlp.fc1": 688557, "model.layers.9.mlp.fc2": 718275, "model.layers.9.self_attn.dense": 166542, "model.layers.9.self_attn.k_proj": 220268, "model.layers.9.self_attn.q_proj": 213681, "model.layers.9.self_attn.v_proj": 191284 } }, "architectures": [ "PhiForCausalLM" ], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_phi.PhiConfig", "AutoModelForCausalLM": "modeling_phi.PhiForCausalLM" }, "bos_token_id": null, "embd_pdrop": 0.0, "eos_token_id": null, "hidden_act": "gelu_new", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "layer_norm_eps": 1e-05, "max_position_embeddings": 2048, "model_type": "phi", "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "partial_rotary_factor": 0.5, "qk_layernorm": false, "resid_pdrop": 0.0, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.39.3", "use_cache": true, "vocab_size": 51200 }