{ "metadata": { "ParamSize": 709, "ParamBytes": 18431289344.0, "BitsPerParam": 4.500392846312253 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "d0bab2c33f66d28920bd728fbe2aa27f" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "23c04d001ab8fea5b40ac4b894255304" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "9afe8adacae1bf542102e90ddd704b7f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "f08665f1908db3fdd521220f4f3cda45" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ca081aa88683e4dbb93f76141ef0de5e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1b429490ad74e4a313f0552ee8ce765b" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "dc684201c974714b4ceb483267fe2d6e" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 28870656, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 10240 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8857600 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26552320 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26562560 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26576896 } ], "md5sum": "30b43de0525975c2db0258fd512a3b21" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "883eb02b681fcd2e87c48c9e9f979cf4" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "351f74d8342707308dde419f748b25b9" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "392b00eab66c397fe3f1cee31fd324b1" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c8378305459a10b81cfd354ad266c821" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "1fa936633feac6f2a3c57cc64ce2c702" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e52d28b4e44298a173dc17b459c0beca" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "67f9030ef41561a422be430eb26e7ab0" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "4fb3e39489eb9ac6733e4a7c7c876f4e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "de24578d96c5897a7c7319778b74fe0c" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "cc68761b6e8a1be4fbf5a0238e475e4b" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "25e8d10b6536e54f02f3384ad551aa43" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ba18e8edd5fd209ee2ded9960b35fe46" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "2f08c5b8cb6619e7918c92487608de34" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ce392127e1d2ef2a941ebcfbb09eb57e" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "d12f4d723f80778974b50d6ace034c65" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d7d15398f76a24976f05cb7a831dc421" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b45e96bdaf2ee6c5b98da9c8ab983258" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ee190bba0fe2063178e88f1d9fbc14e2" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9fa4a18261196e0e5670ac2b0bb3982a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "10577a5fd55ce2bd97bfbfca60c6a36a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9dfba420aeec2e0834f6c0aa8a59279b" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cec417de66e50ac63184a42845ccfa9d" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1ff2843ac817375d2d536358182a6acf" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5e819b3ac534f2f9ca14028712a18c00" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "6ba344f936504ca2a3e2749e55ccc802" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e10e2d6a61189518724b87a36de67a8e" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dfa78fd047b9623b84b225dea1f0c072" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d9f781fef18d8e7b23d529dd36e088c4" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c3e7991790e3da06bd102b18b4bca275" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "1e8e0fb9e7a8ee79e8c1dbe9e36567d4" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8aadc6a672a0b8f197c6f5ab38512a4a" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "98ea96653bd2d58fa7e76be35a0ddf3d" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "025dedb289852ed844df875cb80af2d0" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "46bbb5e3c02e568e659a435a7e69b9ed" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ee12636b9623770c9ffa9f7a498d6be1" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d693fa41c91c782c41b406a62f56c56b" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "88236ad328979925ea5016a1152e8c49" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d030f2a64e8c406b2b533793ca26a1a3" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3612a0f22d3e41938b83eb95c9a4c4fb" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "fa2ba32cd6be3b6fb0c83d1467510f59" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bbdd681edf1610c39db01bac42e44929" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d13b23c82e6f684f39002205ada6ca26" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "87f2be148884c9bfc28275984edb0f72" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "360c817f70d3c7545ee830681c234d0b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5051de162b8566098a2589325e427b7a" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1e17c5552fe0d28d710be3ae42114f2e" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a9b8c0e798f1446446be44ffa7cd2b46" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "beaa2f2b5f090ee9f5ac089c03ff9679" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c3d94e0ae12e882ef5727cc815d20411" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "899f7d73daa949cc3fe006add712f345" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6bc05e6006f97a71b4c74ec40b1bd83c" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "916dbdd5ad207f96ac13c18f19ff72bd" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6e70941aad6da4c20c0c1f700bf6e8e8" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7472f4908cdd3086ffc6787b30a60211" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ddb0b0d689444f89eb2fb81996712f63" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "85fc38a44702a497ce6a18d140f1923a" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "212ed328dc079c859a24af3db761c358" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "61dad564c4b55f7999a0883d7874c9a6" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9201f265c97fc7b293c7de4ac368a136" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "430249e04913f0cb3a9ed61788f44678" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1c357339ef01db4f0593b49d7e921777" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f206bc1aa0a1f304cf7cb8331867f15a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "493f921d9bd3b5ccc364b00d056bd8b1" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c64fcf9a4b86ebd5c230eb6bfcfe12b6" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "b933506094bbf2d78d1ef899517e12b0" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ec680b124905c918c6bf3d1c5f183ebd" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b1e5285370defa2abe621d5eff97ee20" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e915a7c40c0d0fd42b6ad2691bc68bf7" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ea2477d6fb1f69e43e5676d862070f79" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "15c05ae3dbec980db1fea78e811a2d48" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bc703698503a9df3769aa08f2aa08e7c" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7c5d7dd4fb58372cacfd3212e9bf83a2" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d0480e8b83e02b891fd165122aa088ae" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b3e6206b18c97fd1650e02b6e0ad2b06" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "31e7b8cd6252743179215c3d9e8f37ab" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e89ebe7995a712b0225582c304b7c153" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1b886210d24556afa49e87b7e7b0f1f1" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e4036f111c86fbfd8d6d63864fbdd7a5" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ae2fef9d20104cad8e87314d029a0586" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5948f6b117c0f471dc2c055f9b9bb152" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6bcd5bb7e7f199d97bbb5f22b88d1c2f" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "85398f51004d93e9697328dfa27c8cdd" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "8b4994d8deb801de50452ae03caf94fd" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "848951e3c0ba7336d2935f9310ae56a9" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "f3449379ae1eac6c8b7772730821103a" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4128bc650d1563deee1f12522e93593f" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "561c3a9197bc68d00e889327f08bb7c6" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3dc9111f4052b7bf075dabb4df67289a" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b56b481910cb00b21e58fa2e0076e7b2" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "6544f0361ba55a17c7048b2709c79494" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "39aa67c0f07be02d3b6e9647e9240bd5" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1c5ac8333ba18b0dd51bf93990c83b05" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c2873d935c4030d5cf1adccfdfbd9692" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3ae378a1a3cab9e282bead6b417036b8" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "37fecc3683d68901f3736e3fd6824c67" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b173dec9534d3ea28dbba9106239871b" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2a359150b20c6662d10ccba80ef4901e" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "02d29b17dfbac1e5c85b9098a44f407f" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "995e021820f29f2e04ead649efc9ccdc" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5fd21920e9582bdc0a070ef56e87ec92" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cd27351dfc5c2ae1a1fc00ae2a3705b2" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3252411f4d7c28f91c60d595caec461f" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b28ce562a7b4f6508ad22bdb8713827e" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "79daa60c45443e9275781c74fe0290f1" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "34b35505f0b2aea6b52dfca8ac4da302" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e5037f5006bb57bb2ee1f51bab1f1ecc" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ee3874eb464e5354128edb471edb2c48" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7c3d07a36ea877e07e96ec4c44af56ee" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b234940cb1c4d157b07eae573e232684" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "a2f2cdc613668d4a32ad44ea471f7eca" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "56fbf6dc52a12f92785b72e4e28f2b8d" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fbc268ba22b4f0032adfe5a8d8207c81" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7a1e53c7ae6d9c0e508c7d4b0f2b88ef" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "993ead2a336c79fc5c25889ca6623486" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "6c24339bcb2a6ea949c2139e003f38cb" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a3cc2caead5a0519b56e1648c2c6ffd3" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "4078429b77ddf0cd77633f894148b40a" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "db460928c5d01badbd39e962db1a0708" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c84f12e290a0b7de388181f2260edf06" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c34156b5ec46d2d12ecea2aa473f98d7" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f57fb3d2394d48a3c3305f46335eb081" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e82fcd66f9d78f0e6237285dfe341bb6" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6600f138103612d56d6ff8ee7ea3e700" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b2b2289ae1f32c57b5712424cbf35df7" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "6ca67cd9e5bf84867a385538b9641802" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8bc373b92222790f3d9a9d6004aa55c2" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e3d8ef0717382aa7d7730789ce326034" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d24595aae28692502fb12c833d6dc8c7" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b57c4c58c53fa7f22479e287def0ce2f" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e8583c4a067d0bae356a8ffdab40c3bc" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8a48268f638db0597a5cb2266dd00b95" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0e3dbd48a5ee4efc89ec142bb0962988" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "2ffeb1d73ad985f6f1272c23d8882553" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5efd14918633bb10f5b449dbdface259" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "f642b3de66631bc368fbbf07740d7335" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0f6b943590654b360a5beb617cfe93e6" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "208d2b19d183bbd5fa70d999e6b00c3f" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "a9ddda4027503c79c53306c44fcf9cc4" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "66239e6ba0c18e2845524d003ae4c098" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "34e406f2c5d359a9c2f5312a06aed5b8" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cb847b083831320f07d59061f561cfa3" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7242d87956afe655de94f8353ac45e70" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "75d7923697d59fdeffd3e3346ef9a404" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3bd17594a1bf9139f731493a7d35f929" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "61306ed78c15a5c5aaad2ea84a93b180" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ebca0139f2e14d82fcf472120fada4a2" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5c5b7f7be7c4ea5101e3a95fe269a20a" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7a894ac429f10674a8240006d8d655b9" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "66d3f22ce0ba6d4fb38a645e96393173" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "2754893016e9b3f88ff9d20dafa8d443" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e5746a40b125af73c30a033fd465e2b0" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d302665378ccf8aeef836e647bbcff06" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7fcd096c2b090457dd5788e47ac1f578" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6460bead6be6454ab865506d406ef97d" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "0524bb13a2a79304d0a4de532c42c15e" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e64479dba2685aff5c23ddee217cc19a" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a2bc1f0b25e7ebc38ca58bd1c4124a1e" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3defd64682e087ab790f44425226d73b" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "47674799c994f9851c6b25f710e53f8c" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "724fa2967d19c65fef70e4eceae0114e" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "097768d66fc4289b47ce5bfa69b9310f" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e74c0958f1da18e0e50c18e365d18734" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "2e9c54ff31ef4b1bbab6aceef38b54c3" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e90412f59fc1ddd68012f0cb5462efd4" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "91cdfea275530898e955fc69d5fb8815" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "85097c0634f7ee663aaaf8bb846596a7" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3b87fccaa180cbfa7d8aac56735a348a" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c5de8b4dc79b6bb379c156c7b618233e" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6bff386e0db5d616b6ef585037ad79c6" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "637985fad61c4f3d11429ad63127be08" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1d121337b6af4208fdfcd0dc2dd9ecfa" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "354f89179e538172636e0b28f923dcd3" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6cc77c081b64617bd15f90c82b3fb58d" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "42fffff60b313b32558309437f9476fe" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "da72f8b845e03ff070b69a05fa1bfc81" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "544c51d4b655cbd9401ed20260488d91" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d6f9dfc8d82a6a323e225d371a9d3cbc" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3cf523325ca84f5d2a89578fa090f9a8" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2e0c54612900dc557416e14ca181bab6" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "40eb2acce30c7e8829c89aaba8d304f7" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bdfbea50dacb5a176f139807f01601cf" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "73029da43080808e74226a112362db9c" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1dfeed5c3b2229a4549940b14762cf3b" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b5f01c280e4ac89f17eccc47369c91ad" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "6ac2c685037a467c29af67858b5e333e" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "57b3d5111601f83f616d6f1d0c304fd6" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a73214f4453db09726df8cb3ada826cb" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d981460dc063edbad75e0b49edb04a88" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "583fc91e82036071bc03ea2e1f9f38df" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "8ad67542e20822fbb59fd01594e6a5b0" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4e9bd11ec485e40e531fb889b14d3ba4" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a57ba2aa34363ba322e132a8691a30ed" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "efabb32e1c231b375b397d53e626b7f8" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "efe3cecd6bb2a902334d7219b0387d84" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "4f8bd3787ad18067760f1593c9836ab4" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cbf1646438510ac693f732baeda364d6" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7b284c41718a5efc6a1fcddab8358213" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "bc9119951a950499472af95bd3dfa8e3" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0c49c41560368bd392198c384b13b439" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "176e3897f768fb4d6aaa15c568903296" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d0ae7fb8592103e9a45b23bd5179b9dc" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "abd6a6c337719f929e576057036b448c" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "5113285f896da8a1d36e6fbaeb5eedc1" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "30b528881a5a54604a2b8d7051058f34" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e6ebd59c5c3ec02a673698aefaa6d56a" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8a8e05ee48e6645b1e9bd11cd2d4f0ee" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5858affd29a29de4144aa004745d79a5" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7b63198a9729f2309a5125d45ec0cd71" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "01d99c1120bdc23af06359971865d17f" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e768cb7291ed1f75f6c0ff493b2d2477" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d4140ff41f6ac26f21da4818204e5990" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "368cddef3bf6470a31c7507d773805dc" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "8be99c7f130b178f6d98560a338d6332" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4e8e071fb92d22213d11104b022644e8" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "0a3eeaebe035d0bd47952cdb9937ea11" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c676c2c05da22efd031546725cb3f9f8" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "905086323f6dfa13afa24cceaf89565b" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c8bdea89a2b170d6255c3d668d53ccd5" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "38e6c817abdf91979af98d96bc1ac220" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "246e002e04f4f8695bc349bbc9c970f5" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b7f5002758c9eca0bead1d99719048b6" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fca15f6fd1f505e14883b022e2f04d56" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "dfab114ecacfb2ada66b0c7ef6c38751" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3bf1e2d58823105c01c5f6becfc56177" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "9ee5d793f34551a3e7cb96ed4ca30dfc" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "618f0ccb448694055fc495a374be367b" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ff1efca44725c9e3a3a3d1103d825fd2" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "36b75f701fbf9b0856de38fdb1d07521" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "55111d639e2c8518fe55ab432a9dfc0f" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "7e4f8d3389340f8b01e55eefc556b9fc" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "10daaf04d9e46b8fb8cc2ccd57c4cdb4" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2236779c2668efbe5b3637797bf74c52" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1acd50a00ee669e5fe16fee4ee67c059" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b31c6a03ecbee80abdc51334fd60f7f0" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "a35b5270ec1ae287b9d4ce39efc433fd" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4beebff6390bef52f8cb0364e7992192" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8c756569af0bd7206387dc3f64251179" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "308251dca16bf368ee51b0bae33c326a" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8503edc25b473e73675cff0b1f56a760" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "6e5b4cbb1b7b1fa9218c770c889dfd5b" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2e7a2e4a01968e07e7dc3cc4144a1711" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3948da35356416ae50776726532d5af4" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ea6debb9245b5b15a66e5a5ee205947e" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a1f28f3ee62e245ec28a988fd6a5eae2" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "df47ed0b87e7341ac790d9acf2067fe7" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5bc6cc71c50061f8b652b996e573f7d7" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8ef94408d218196e49b863a4eef21829" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e106f70d3eb1b73c5c4fe2c6e7a4ed4b" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1b401047d93765aad1173ee10f981da7" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e656db96fee15296e8334f3d10fce499" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8798f8694afac953c58e7feb81b5072a" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "eb5d64f73a3161baa65009f5dc534768" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "46d045b454a908b5ec55f21eaa19be69" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4117851dde5c0768fcc066abe6104dfa" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "d2dbcbb56818bba9ece0c3b20ade2cf2" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bdd19113c15db6bcfb1f9cd29d788038" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3caa0235b0561bd3d57c348689d2c4fa" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b33e3fb2c25a7d3103bc853ac9074093" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9ec2cbf8a78f1fc58fc45eda65d64421" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "299f1a76db4c26182dcbb972bc08a55e" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a5ee09291ff5097248354072c08e95b2" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a1fd6d2f9718c5b0e3fc943f060dcb31" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "09a6315a606e710457e20f1ede132536" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "913875e7221bd88c0afc338091052725" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "081db6c143421b4ea6348d519a27ed7d" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "08be1cf78360cc04c8cc35f98886d39a" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5e4016c92f45fe379420605f3f0a829f" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "de2ff1d77d4a471ac3558c2859add03a" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "44ef565c81f90f9a38c9ac0a51bb8188" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "bb650461e56bb38b911783990501bf08" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "eb2f0318e8687f9801ea6d0405448b40" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a6a84bd4c3166bb45e58fafda373b95f" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "882d41de8dfb25aa4e8be8c03326c924" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "aad707c614c599ec26d5d46d05fb1406" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e190ab1fe2bbbec3a8fe389bc7404b92" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "411f11b735f7d97c6c59e7bab1d3768b" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "960c5d1f6e4bd2523beee0833b7f1565" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "52d3718058cc60bd1aef75bec041c21c" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "37cf7e55110ccce1b26ae8913682b227" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "bfb35d047eb3e2b8e04e06f9a31db1f1" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cd31ea05f92b1e7adccf2b032ee937ee" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "555bf5e761066dbeb91dc1603b854cdf" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c2ef2314a96f5f6e468dcb8c7cf4e04c" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7a3f6b83169fa7aa2ad790d8fc081e23" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "b8c5e6a5b419029f08cd942a8cfbde59" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0b24c9261645c07719b7fc68610b2918" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "90e3c12020498db256f548b5a27b5e7b" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f4c49c5604b2c1333c6002dc3e2ef296" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5c4b15eb34f14c31c35cc48401a88c46" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "a9eb8d395633f1763e9f5b4a2177780d" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3d93b4f83a0f7b4d0905de07449e31a1" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "87342d800a2e6cce0315f90cdd795a10" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "8d42a5a840c9b9b7cdde8ba2c4f8a730" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0f54db194f32f0751d235154824c3600" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "f61940485e04009c86462dcae008bd78" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c47e7d87ab2445508ad94397e3cca0bf" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a368b13c1b0a8a5d555d305492d4fc8e" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e7f4be23b6a207375e5e7e5dc9362368" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ef1d4b3e5b99336965222c247dca89ae" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "21748eaf923bd899e558445c2810e6c6" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fad8d025db5286b7a70ae9adc2b90ab8" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "08c8651509c4ea8e90c8193180130d79" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7844f57b712d34e9cc35d358ca567d46" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6a8c16cb25c0ac3b4934a62047715762" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e0f768516fbeea1dd5e262d3b9f04046" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ca7b691e4ccc966060ebf1f2925670b9" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d851d6d4865e793e75af034bcb1daf5a" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "283e457fa434c9a4f80006ec4c3be4e6" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "929e914b71ec871227901ad49042e451" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "6d2c2f3236f079d685eb7654be9aa431" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9ce66e6cc1a372f3847c5a9e33c1f837" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "128a45cfceaed994fc507581dbe85f06" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ac32a5cb14e620503210314fe41b0506" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "35bacb47173d435345ee03eb39ffe5a4" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "fa3299ec967ebed4e5d4fc23b5d7a6ef" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 14755840, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 } ], "md5sum": "8c2f72f95167d4e47e6be5141adc7178" } ] }