{ "_name_or_path": "checkpoints/mtgv/MobileLLaMA-1.4B-Chat", "anyprec": { "arch_config": { "layers_name": "layers", "model_name": "model", "module_names": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj", "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj" ] }, "group_count": 1, "parent_precision": 4, "seed_precision": 2, "sparse_numvals": { "model.layers.0.mlp.down_proj": 203500, "model.layers.0.mlp.gate_proj": 202120, "model.layers.0.mlp.up_proj": 192683, "model.layers.0.self_attn.k_proj": 183607, "model.layers.0.self_attn.o_proj": 77841, "model.layers.0.self_attn.q_proj": 165342, "model.layers.0.self_attn.v_proj": 86553, "model.layers.1.mlp.down_proj": 198771, "model.layers.1.mlp.gate_proj": 203682, "model.layers.1.mlp.up_proj": 192748, "model.layers.1.self_attn.k_proj": 364251, "model.layers.1.self_attn.o_proj": 131580, "model.layers.1.self_attn.q_proj": 360487, "model.layers.1.self_attn.v_proj": 97633, "model.layers.10.mlp.down_proj": 197824, "model.layers.10.mlp.gate_proj": 225998, "model.layers.10.mlp.up_proj": 204762, "model.layers.10.self_attn.k_proj": 199531, "model.layers.10.self_attn.o_proj": 89803, "model.layers.10.self_attn.q_proj": 183610, "model.layers.10.self_attn.v_proj": 94009, "model.layers.11.mlp.down_proj": 199955, "model.layers.11.mlp.gate_proj": 227776, "model.layers.11.mlp.up_proj": 206511, "model.layers.11.self_attn.k_proj": 190353, "model.layers.11.self_attn.o_proj": 84176, "model.layers.11.self_attn.q_proj": 186924, "model.layers.11.self_attn.v_proj": 91121, "model.layers.12.mlp.down_proj": 206219, "model.layers.12.mlp.gate_proj": 241688, "model.layers.12.mlp.up_proj": 214616, "model.layers.12.self_attn.k_proj": 174998, "model.layers.12.self_attn.o_proj": 79139, "model.layers.12.self_attn.q_proj": 166481, "model.layers.12.self_attn.v_proj": 90512, "model.layers.13.mlp.down_proj": 211577, "model.layers.13.mlp.gate_proj": 268406, "model.layers.13.mlp.up_proj": 217669, "model.layers.13.self_attn.k_proj": 176024, "model.layers.13.self_attn.o_proj": 84955, "model.layers.13.self_attn.q_proj": 173701, "model.layers.13.self_attn.v_proj": 102448, "model.layers.14.mlp.down_proj": 219067, "model.layers.14.mlp.gate_proj": 280979, "model.layers.14.mlp.up_proj": 226876, "model.layers.14.self_attn.k_proj": 183013, "model.layers.14.self_attn.o_proj": 84460, "model.layers.14.self_attn.q_proj": 172181, "model.layers.14.self_attn.v_proj": 98057, "model.layers.15.mlp.down_proj": 215595, "model.layers.15.mlp.gate_proj": 273100, "model.layers.15.mlp.up_proj": 227142, "model.layers.15.self_attn.k_proj": 185057, "model.layers.15.self_attn.o_proj": 87587, "model.layers.15.self_attn.q_proj": 186299, "model.layers.15.self_attn.v_proj": 101324, "model.layers.16.mlp.down_proj": 213995, "model.layers.16.mlp.gate_proj": 265156, "model.layers.16.mlp.up_proj": 226998, "model.layers.16.self_attn.k_proj": 167083, "model.layers.16.self_attn.o_proj": 88046, "model.layers.16.self_attn.q_proj": 163788, "model.layers.16.self_attn.v_proj": 98115, "model.layers.17.mlp.down_proj": 211009, "model.layers.17.mlp.gate_proj": 251691, "model.layers.17.mlp.up_proj": 222380, "model.layers.17.self_attn.k_proj": 178978, "model.layers.17.self_attn.o_proj": 88702, "model.layers.17.self_attn.q_proj": 179054, "model.layers.17.self_attn.v_proj": 95777, "model.layers.18.mlp.down_proj": 205896, "model.layers.18.mlp.gate_proj": 236359, "model.layers.18.mlp.up_proj": 220262, "model.layers.18.self_attn.k_proj": 163205, "model.layers.18.self_attn.o_proj": 89407, "model.layers.18.self_attn.q_proj": 180993, "model.layers.18.self_attn.v_proj": 94704, "model.layers.19.mlp.down_proj": 202847, "model.layers.19.mlp.gate_proj": 227201, "model.layers.19.mlp.up_proj": 214896, "model.layers.19.self_attn.k_proj": 161806, "model.layers.19.self_attn.o_proj": 105465, "model.layers.19.self_attn.q_proj": 161121, "model.layers.19.self_attn.v_proj": 108323, "model.layers.2.mlp.down_proj": 193637, "model.layers.2.mlp.gate_proj": 191448, "model.layers.2.mlp.up_proj": 191050, "model.layers.2.self_attn.k_proj": 262498, "model.layers.2.self_attn.o_proj": 90537, "model.layers.2.self_attn.q_proj": 216410, "model.layers.2.self_attn.v_proj": 84013, "model.layers.20.mlp.down_proj": 202359, "model.layers.20.mlp.gate_proj": 220890, "model.layers.20.mlp.up_proj": 212795, "model.layers.20.self_attn.k_proj": 142954, "model.layers.20.self_attn.o_proj": 96463, "model.layers.20.self_attn.q_proj": 142462, "model.layers.20.self_attn.v_proj": 92729, "model.layers.21.mlp.down_proj": 201434, "model.layers.21.mlp.gate_proj": 210100, "model.layers.21.mlp.up_proj": 205805, "model.layers.21.self_attn.k_proj": 146504, "model.layers.21.self_attn.o_proj": 123373, "model.layers.21.self_attn.q_proj": 141055, "model.layers.21.self_attn.v_proj": 117606, "model.layers.22.mlp.down_proj": 217143, "model.layers.22.mlp.gate_proj": 224399, "model.layers.22.mlp.up_proj": 214232, "model.layers.22.self_attn.k_proj": 157729, "model.layers.22.self_attn.o_proj": 107012, "model.layers.22.self_attn.q_proj": 157878, "model.layers.22.self_attn.v_proj": 104177, "model.layers.23.mlp.down_proj": 291838, "model.layers.23.mlp.gate_proj": 258287, "model.layers.23.mlp.up_proj": 273849, "model.layers.23.self_attn.k_proj": 124198, "model.layers.23.self_attn.o_proj": 120458, "model.layers.23.self_attn.q_proj": 118365, "model.layers.23.self_attn.v_proj": 113516, "model.layers.3.mlp.down_proj": 190475, "model.layers.3.mlp.gate_proj": 189871, "model.layers.3.mlp.up_proj": 191332, "model.layers.3.self_attn.k_proj": 194781, "model.layers.3.self_attn.o_proj": 75225, "model.layers.3.self_attn.q_proj": 158659, "model.layers.3.self_attn.v_proj": 77865, "model.layers.4.mlp.down_proj": 196340, "model.layers.4.mlp.gate_proj": 193749, "model.layers.4.mlp.up_proj": 192632, "model.layers.4.self_attn.k_proj": 193601, "model.layers.4.self_attn.o_proj": 73151, "model.layers.4.self_attn.q_proj": 149930, "model.layers.4.self_attn.v_proj": 76421, "model.layers.5.mlp.down_proj": 195517, "model.layers.5.mlp.gate_proj": 192992, "model.layers.5.mlp.up_proj": 192898, "model.layers.5.self_attn.k_proj": 186939, "model.layers.5.self_attn.o_proj": 83471, "model.layers.5.self_attn.q_proj": 162431, "model.layers.5.self_attn.v_proj": 90070, "model.layers.6.mlp.down_proj": 192400, "model.layers.6.mlp.gate_proj": 198841, "model.layers.6.mlp.up_proj": 193272, "model.layers.6.self_attn.k_proj": 188819, "model.layers.6.self_attn.o_proj": 91905, "model.layers.6.self_attn.q_proj": 163563, "model.layers.6.self_attn.v_proj": 102996, "model.layers.7.mlp.down_proj": 192720, "model.layers.7.mlp.gate_proj": 202711, "model.layers.7.mlp.up_proj": 198500, "model.layers.7.self_attn.k_proj": 159484, "model.layers.7.self_attn.o_proj": 80468, "model.layers.7.self_attn.q_proj": 136400, "model.layers.7.self_attn.v_proj": 86365, "model.layers.8.mlp.down_proj": 198065, "model.layers.8.mlp.gate_proj": 220261, "model.layers.8.mlp.up_proj": 205549, "model.layers.8.self_attn.k_proj": 174811, "model.layers.8.self_attn.o_proj": 95996, "model.layers.8.self_attn.q_proj": 148652, "model.layers.8.self_attn.v_proj": 108878, "model.layers.9.mlp.down_proj": 195515, "model.layers.9.mlp.gate_proj": 215592, "model.layers.9.mlp.up_proj": 204054, "model.layers.9.self_attn.k_proj": 192197, "model.layers.9.self_attn.o_proj": 97500, "model.layers.9.self_attn.q_proj": 177402, "model.layers.9.self_attn.v_proj": 107518 } }, "architectures": [ "LlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 2048, "max_sequence_length": 2048, "model_type": "llama", "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 16, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.39.3", "use_cache": true, "vocab_size": 32000 }