diff --git "a/WizardLM-2-7B-q4f16_1-MLC/ndarray-cache.json" "b/WizardLM-2-7B-q4f16_1-MLC/ndarray-cache.json" deleted file mode 100644--- "a/WizardLM-2-7B-q4f16_1-MLC/ndarray-cache.json" +++ /dev/null @@ -1,4367 +0,0 @@ -{ - "metadata": { - "ParamSize": 325, - "ParamBytes": 4073857024.0, - "BitsPerParam": 4.50042279387851 - }, - "records": [ - { - "dataPath": "params_shard_0.bin", - "format": "raw-shard", - "nbytes": 65536000, - "records": [ - { - "name": "lm_head.q_weight", - "shape": [ - 32000, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 65536000, - "byteOffset": 0 - } - ], - "md5sum": "ea92bd10956c71bd3bfa0bf5e70c00bd" - }, - { - "dataPath": "params_shard_1.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.22.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "83c9d51c0fd11cb87771f0103d72148e" - }, - { - "dataPath": "params_shard_2.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.22.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "30afbf8029683c4d3f10a3aa0164ab18" - }, - { - "dataPath": "params_shard_3.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.23.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "2751cd7d8b7b5cf219a364edc59bc641" - }, - { - "dataPath": "params_shard_4.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.23.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "9df53027ee2f12596c3b8ca9a23d2b82" - }, - { - "dataPath": "params_shard_5.bin", - "format": "raw-shard", - "nbytes": 30244864, - "records": [ - { - "name": "lm_head.q_scale", - "shape": [ - 32000, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192000, - "byteOffset": 0 - }, - { - "name": "model.layers.22.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 8192000 - }, - { - "name": "model.layers.22.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 8200192 - }, - { - "name": "model.layers.22.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 11870208 - }, - { - "name": "model.layers.22.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 19210240 - }, - { - "name": "model.layers.23.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 19218432 - }, - { - "name": "model.layers.23.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 19226624 - }, - { - "name": "model.layers.23.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 22896640 - }, - { - "name": "model.layers.23.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 30236672 - } - ], - "md5sum": "b3ccf48e7255b4e87408740b7eca0181" - }, - { - "dataPath": "params_shard_6.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.24.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "943157769be74cd31dc21eeb9e00ae74" - }, - { - "dataPath": "params_shard_7.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.24.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "2e87263ba13873ae270e3d09e5051845" - }, - { - "dataPath": "params_shard_8.bin", - "format": "raw-shard", - "nbytes": 27271168, - "records": [ - { - "name": "model.layers.23.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 0 - }, - { - "name": "model.layers.23.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 12582912 - }, - { - "name": "model.layers.23.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 14155776 - }, - { - "name": "model.layers.23.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 22544384 - }, - { - "name": "model.layers.24.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 23592960 - }, - { - "name": "model.layers.24.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 23601152 - } - ], - "md5sum": "b8a67d7c2b5460e7394a90dbf9ff65fd" - }, - { - "dataPath": "params_shard_9.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.25.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "ca83dc790042097a6443b70cc6d36d86" - }, - { - "dataPath": "params_shard_10.bin", - "format": "raw-shard", - "nbytes": 30949376, - "records": [ - { - "name": "model.layers.24.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 0 - }, - { - "name": "model.layers.24.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 7340032 - }, - { - "name": "model.layers.24.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 7348224 - }, - { - "name": "model.layers.24.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 19931136 - }, - { - "name": "model.layers.24.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 21504000 - }, - { - "name": "model.layers.24.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 29892608 - }, - { - "name": "model.layers.25.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 30941184 - } - ], - "md5sum": "0d5bb5109af241e8be79c3185f2b9da4" - }, - { - "dataPath": "params_shard_11.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.25.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "e1f3eedc50e7fe647c96065b8c2aedff" - }, - { - "dataPath": "params_shard_12.bin", - "format": "raw-shard", - "nbytes": 25174016, - "records": [ - { - "name": "model.layers.25.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 0 - }, - { - "name": "model.layers.25.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 3670016 - }, - { - "name": "model.layers.25.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.25.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 11018240 - }, - { - "name": "model.layers.25.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23601152 - } - ], - "md5sum": "d2c74b197802c3018ec9194f520bcab5" - }, - { - "dataPath": "params_shard_13.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.26.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "290d9ccac13bfee25a56bb237c1f6c4d" - }, - { - "dataPath": "params_shard_14.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.26.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "cf2598461628529f2a1567d5645246ec" - }, - { - "dataPath": "params_shard_15.bin", - "format": "raw-shard", - "nbytes": 33046528, - "records": [ - { - "name": "model.layers.25.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 0 - }, - { - "name": "model.layers.25.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 8388608 - }, - { - "name": "model.layers.26.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 9437184 - }, - { - "name": "model.layers.26.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 9445376 - }, - { - "name": "model.layers.26.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 13115392 - }, - { - "name": "model.layers.26.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 20455424 - }, - { - "name": "model.layers.26.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 20463616 - } - ], - "md5sum": "7ae6a3f62f29148b4b7fdabbfd8402c8" - }, - { - "dataPath": "params_shard_16.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.27.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "5a59d8157a2ea31d93d91444be14a9e4" - }, - { - "dataPath": "params_shard_17.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.27.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "d51bf7893aa61e19c7ca98aa6e0c15bc" - }, - { - "dataPath": "params_shard_18.bin", - "format": "raw-shard", - "nbytes": 22036480, - "records": [ - { - "name": "model.layers.26.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 0 - }, - { - "name": "model.layers.26.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 1572864 - }, - { - "name": "model.layers.26.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 9961472 - }, - { - "name": "model.layers.27.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.27.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 11018240 - }, - { - "name": "model.layers.27.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 14688256 - }, - { - "name": "model.layers.27.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 22028288 - } - ], - "md5sum": "ed9ff4a16887e35c2635970099e13e67" - }, - { - "dataPath": "params_shard_19.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.28.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "6e7f9be0aae4f57dab35c31eab247f4e" - }, - { - "dataPath": "params_shard_20.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.28.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "c335cf723542d04be693a064096596e8" - }, - { - "dataPath": "params_shard_21.bin", - "format": "raw-shard", - "nbytes": 27271168, - "records": [ - { - "name": "model.layers.27.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 0 - }, - { - "name": "model.layers.27.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 12582912 - }, - { - "name": "model.layers.27.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 14155776 - }, - { - "name": "model.layers.27.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 22544384 - }, - { - "name": "model.layers.28.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 23592960 - }, - { - "name": "model.layers.28.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 23601152 - } - ], - "md5sum": "ca3dfcf43a1ff82f16d3563279c62d63" - }, - { - "dataPath": "params_shard_22.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.29.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "eb256278a18cbbf38b4549228aeded1f" - }, - { - "dataPath": "params_shard_23.bin", - "format": "raw-shard", - "nbytes": 30949376, - "records": [ - { - "name": "model.layers.28.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 0 - }, - { - "name": "model.layers.28.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 7340032 - }, - { - "name": "model.layers.28.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 7348224 - }, - { - "name": "model.layers.28.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 19931136 - }, - { - "name": "model.layers.28.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 21504000 - }, - { - "name": "model.layers.28.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 29892608 - }, - { - "name": "model.layers.29.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 30941184 - } - ], - "md5sum": "1732eb8d599cd9432df58f4f01591f7e" - }, - { - "dataPath": "params_shard_24.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.29.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "add277ce8f52148019a1966aa21da22c" - }, - { - "dataPath": "params_shard_25.bin", - "format": "raw-shard", - "nbytes": 25174016, - "records": [ - { - "name": "model.layers.29.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 0 - }, - { - "name": "model.layers.29.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 3670016 - }, - { - "name": "model.layers.29.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.29.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 11018240 - }, - { - "name": "model.layers.29.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23601152 - } - ], - "md5sum": "b84140dbee12ab9a4025ed13b074fb78" - }, - { - "dataPath": "params_shard_26.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.30.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "30a231f6745e1fa8bd7052c59b261fbe" - }, - { - "dataPath": "params_shard_27.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.30.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "dc97f65f12654aaa9d14da919ddc971e" - }, - { - "dataPath": "params_shard_28.bin", - "format": "raw-shard", - "nbytes": 33046528, - "records": [ - { - "name": "model.layers.29.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 0 - }, - { - "name": "model.layers.29.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 8388608 - }, - { - "name": "model.layers.30.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 9437184 - }, - { - "name": "model.layers.30.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 9445376 - }, - { - "name": "model.layers.30.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 13115392 - }, - { - "name": "model.layers.30.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 20455424 - }, - { - "name": "model.layers.30.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 20463616 - } - ], - "md5sum": "c2983880e0c3750534244de66fc4c313" - }, - { - "dataPath": "params_shard_29.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.31.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "3b81d35d58d9c6d19b79d1210285ae75" - }, - { - "dataPath": "params_shard_30.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.31.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "2878d888c43b2b03d14745893c551ba7" - }, - { - "dataPath": "params_shard_31.bin", - "format": "raw-shard", - "nbytes": 22036480, - "records": [ - { - "name": "model.layers.30.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 0 - }, - { - "name": "model.layers.30.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 1572864 - }, - { - "name": "model.layers.30.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 9961472 - }, - { - "name": "model.layers.31.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.31.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 11018240 - }, - { - "name": "model.layers.31.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 14688256 - }, - { - "name": "model.layers.31.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 22028288 - } - ], - "md5sum": "f81c66d8e3b5d6f6be7b82af6a3b5d88" - }, - { - "dataPath": "params_shard_32.bin", - "format": "raw-shard", - "nbytes": 65536000, - "records": [ - { - "name": "model.embed_tokens.q_weight", - "shape": [ - 32000, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 65536000, - "byteOffset": 0 - } - ], - "md5sum": "0b43f474b1ac7253da4e83542ce70085" - }, - { - "dataPath": "params_shard_33.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.0.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "6c5a422b1e3437dd22961df748d4654a" - }, - { - "dataPath": "params_shard_34.bin", - "format": "raw-shard", - "nbytes": 31801344, - "records": [ - { - "name": "model.layers.31.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 0 - }, - { - "name": "model.layers.31.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 12582912 - }, - { - "name": "model.layers.31.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 14155776 - }, - { - "name": "model.layers.31.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 22544384 - }, - { - "name": "model.norm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 23592960 - }, - { - "name": "model.embed_tokens.q_scale", - "shape": [ - 32000, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192000, - "byteOffset": 23601152 - }, - { - "name": "model.layers.0.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 31793152 - } - ], - "md5sum": "834d1aa77c78ab427e9051bba42ecf72" - }, - { - "dataPath": "params_shard_35.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.0.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "376ab526a72741e618d0bc326175dcb4" - }, - { - "dataPath": "params_shard_36.bin", - "format": "raw-shard", - "nbytes": 25174016, - "records": [ - { - "name": "model.layers.0.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 0 - }, - { - "name": "model.layers.0.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 3670016 - }, - { - "name": "model.layers.0.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.0.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 11018240 - }, - { - "name": "model.layers.0.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23601152 - } - ], - "md5sum": "d71a31444d05b5aaf1cc3df42b3ad7e6" - }, - { - "dataPath": "params_shard_37.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.1.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "ba48532ad54361eed32348bfdd37429b" - }, - { - "dataPath": "params_shard_38.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.1.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "ed1141a27d45ff97a3745f1eee27c8b2" - }, - { - "dataPath": "params_shard_39.bin", - "format": "raw-shard", - "nbytes": 33046528, - "records": [ - { - "name": "model.layers.0.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 0 - }, - { - "name": "model.layers.0.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 8388608 - }, - { - "name": "model.layers.1.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 9437184 - }, - { - "name": "model.layers.1.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 9445376 - }, - { - "name": "model.layers.1.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 13115392 - }, - { - "name": "model.layers.1.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 20455424 - }, - { - "name": "model.layers.1.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 20463616 - } - ], - "md5sum": "cb2fd50124875e948ae93ab35b94a897" - }, - { - "dataPath": "params_shard_40.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.10.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "96da1c8961272169828b0c60f4b94e3b" - }, - { - "dataPath": "params_shard_41.bin", - "format": "raw-shard", - "nbytes": 32505856, - "records": [ - { - "name": "model.layers.1.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 0 - }, - { - "name": "model.layers.1.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 1572864 - }, - { - "name": "model.layers.1.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 9961472 - }, - { - "name": "model.layers.10.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 11010048 - }, - { - "name": "model.layers.10.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 18350080 - }, - { - "name": "model.layers.10.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 30932992 - } - ], - "md5sum": "a0d2a948c69256e40c41d9911e10d112" - }, - { - "dataPath": "params_shard_42.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.2.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "ca5bc5554616167c47b672c2fce80ed1" - }, - { - "dataPath": "params_shard_43.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.2.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "adbf70d3843bf1680898292098c8f165" - }, - { - "dataPath": "params_shard_44.bin", - "format": "raw-shard", - "nbytes": 33046528, - "records": [ - { - "name": "model.layers.10.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 0 - }, - { - "name": "model.layers.10.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 8388608 - }, - { - "name": "model.layers.2.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 9437184 - }, - { - "name": "model.layers.2.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 9445376 - }, - { - "name": "model.layers.2.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 13115392 - }, - { - "name": "model.layers.2.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 20455424 - }, - { - "name": "model.layers.2.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 20463616 - } - ], - "md5sum": "9d0e4ebc5db83cf3d88c177a95e25e6a" - }, - { - "dataPath": "params_shard_45.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.3.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "e50c3c67615dd4decac87b17f33c49b6" - }, - { - "dataPath": "params_shard_46.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.3.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "265caa7af2ed96fb7b5569ecf05436ea" - }, - { - "dataPath": "params_shard_47.bin", - "format": "raw-shard", - "nbytes": 22036480, - "records": [ - { - "name": "model.layers.2.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 0 - }, - { - "name": "model.layers.2.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 1572864 - }, - { - "name": "model.layers.2.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 9961472 - }, - { - "name": "model.layers.3.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.3.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 11018240 - }, - { - "name": "model.layers.3.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 14688256 - }, - { - "name": "model.layers.3.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 22028288 - } - ], - "md5sum": "b9c10e43deab62466b52fec079b18f9e" - }, - { - "dataPath": "params_shard_48.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.4.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "8c42f7705d7dab8b7e99058242aac2db" - }, - { - "dataPath": "params_shard_49.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.4.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "ec0792982fdbaaaa1a5041d5266c574b" - }, - { - "dataPath": "params_shard_50.bin", - "format": "raw-shard", - "nbytes": 27271168, - "records": [ - { - "name": "model.layers.3.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 0 - }, - { - "name": "model.layers.3.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 12582912 - }, - { - "name": "model.layers.3.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 14155776 - }, - { - "name": "model.layers.3.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 22544384 - }, - { - "name": "model.layers.4.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 23592960 - }, - { - "name": "model.layers.4.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 23601152 - } - ], - "md5sum": "6a96ea5e30edc0e40e6ffd7eb28c617c" - }, - { - "dataPath": "params_shard_51.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.5.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "90d5c7c491d1b0be2e5959aff1916535" - }, - { - "dataPath": "params_shard_52.bin", - "format": "raw-shard", - "nbytes": 30949376, - "records": [ - { - "name": "model.layers.4.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 0 - }, - { - "name": "model.layers.4.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 7340032 - }, - { - "name": "model.layers.4.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 7348224 - }, - { - "name": "model.layers.4.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 19931136 - }, - { - "name": "model.layers.4.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 21504000 - }, - { - "name": "model.layers.4.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 29892608 - }, - { - "name": "model.layers.5.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 30941184 - } - ], - "md5sum": "5fd45c768302533b5675c3f42b7fa8bc" - }, - { - "dataPath": "params_shard_53.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.5.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "6e48408e996d493e6d2f905dfa277d11" - }, - { - "dataPath": "params_shard_54.bin", - "format": "raw-shard", - "nbytes": 25174016, - "records": [ - { - "name": "model.layers.5.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 0 - }, - { - "name": "model.layers.5.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 3670016 - }, - { - "name": "model.layers.5.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.5.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 11018240 - }, - { - "name": "model.layers.5.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23601152 - } - ], - "md5sum": "0a1981c123a234306101ea543b2794ee" - }, - { - "dataPath": "params_shard_55.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.6.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "ef5a21c9e4e8fc741ee465fb54790197" - }, - { - "dataPath": "params_shard_56.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.6.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "17d6de88247ff8dd52304fa3f4f5d6a8" - }, - { - "dataPath": "params_shard_57.bin", - "format": "raw-shard", - "nbytes": 33046528, - "records": [ - { - "name": "model.layers.5.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 0 - }, - { - "name": "model.layers.5.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 8388608 - }, - { - "name": "model.layers.6.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 9437184 - }, - { - "name": "model.layers.6.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 9445376 - }, - { - "name": "model.layers.6.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 13115392 - }, - { - "name": "model.layers.6.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 20455424 - }, - { - "name": "model.layers.6.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 20463616 - } - ], - "md5sum": "8b339d79c9547afeda4f297f98b19aac" - }, - { - "dataPath": "params_shard_58.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.7.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "2027d080057ed7efaea2a0f5fc5d643f" - }, - { - "dataPath": "params_shard_59.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.7.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "658d8a89a33a55d3f17dfeae2329408d" - }, - { - "dataPath": "params_shard_60.bin", - "format": "raw-shard", - "nbytes": 22036480, - "records": [ - { - "name": "model.layers.6.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 0 - }, - { - "name": "model.layers.6.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 1572864 - }, - { - "name": "model.layers.6.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 9961472 - }, - { - "name": "model.layers.7.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.7.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 11018240 - }, - { - "name": "model.layers.7.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 14688256 - }, - { - "name": "model.layers.7.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 22028288 - } - ], - "md5sum": "39083ab8b09b0648645b534ec7fc9e36" - }, - { - "dataPath": "params_shard_61.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.8.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "0c1b4eebef4ffe8e4c1a5fa11704bf5e" - }, - { - "dataPath": "params_shard_62.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.8.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "e0c4541c03fe4450fb400e40b3183eef" - }, - { - "dataPath": "params_shard_63.bin", - "format": "raw-shard", - "nbytes": 27271168, - "records": [ - { - "name": "model.layers.7.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 0 - }, - { - "name": "model.layers.7.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 12582912 - }, - { - "name": "model.layers.7.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 14155776 - }, - { - "name": "model.layers.7.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 22544384 - }, - { - "name": "model.layers.8.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 23592960 - }, - { - "name": "model.layers.8.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 23601152 - } - ], - "md5sum": "86f5cc627fd8cb722b46489e6548f073" - }, - { - "dataPath": "params_shard_64.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.9.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "2e095f2a47cac37f18807e2411840ad2" - }, - { - "dataPath": "params_shard_65.bin", - "format": "raw-shard", - "nbytes": 30949376, - "records": [ - { - "name": "model.layers.8.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 0 - }, - { - "name": "model.layers.8.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 7340032 - }, - { - "name": "model.layers.8.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 7348224 - }, - { - "name": "model.layers.8.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 19931136 - }, - { - "name": "model.layers.8.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 21504000 - }, - { - "name": "model.layers.8.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 29892608 - }, - { - "name": "model.layers.9.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 30941184 - } - ], - "md5sum": "c3d75c4e72915566c44f1bf4dbf738da" - }, - { - "dataPath": "params_shard_66.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.9.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "8a93e613db338710590667b9b04cb01a" - }, - { - "dataPath": "params_shard_67.bin", - "format": "raw-shard", - "nbytes": 25174016, - "records": [ - { - "name": "model.layers.9.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 0 - }, - { - "name": "model.layers.9.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 3670016 - }, - { - "name": "model.layers.9.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.9.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 11018240 - }, - { - "name": "model.layers.9.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23601152 - } - ], - "md5sum": "82dfb289b08873b3a3ced785ad3fa390" - }, - { - "dataPath": "params_shard_68.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.10.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "aeaebdbf01153f2ceec5d463d8c90602" - }, - { - "dataPath": "params_shard_69.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.11.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "01f69f5389da2a98da6b8d122239adf2" - }, - { - "dataPath": "params_shard_70.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.11.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "cd13600478955798897fe6c5bbfe43a5" - }, - { - "dataPath": "params_shard_71.bin", - "format": "raw-shard", - "nbytes": 24150016, - "records": [ - { - "name": "model.layers.9.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 0 - }, - { - "name": "model.layers.9.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 8388608 - }, - { - "name": "model.layers.10.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 9437184 - }, - { - "name": "model.layers.10.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 9445376 - }, - { - "name": "model.layers.10.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 13115392 - }, - { - "name": "model.layers.11.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 13123584 - }, - { - "name": "model.layers.11.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 13131776 - }, - { - "name": "model.layers.11.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 16801792 - }, - { - "name": "model.layers.11.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 24141824 - } - ], - "md5sum": "c41255173d166b5a3bc4280e89ccb556" - }, - { - "dataPath": "params_shard_72.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.12.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "43451d048271d72fb8f752f1f1c89917" - }, - { - "dataPath": "params_shard_73.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.12.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "08845265b7909bf30d92f25020e13ef5" - }, - { - "dataPath": "params_shard_74.bin", - "format": "raw-shard", - "nbytes": 27271168, - "records": [ - { - "name": "model.layers.11.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 0 - }, - { - "name": "model.layers.11.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 12582912 - }, - { - "name": "model.layers.11.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 14155776 - }, - { - "name": "model.layers.11.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 22544384 - }, - { - "name": "model.layers.12.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 23592960 - }, - { - "name": "model.layers.12.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 23601152 - } - ], - "md5sum": "a8e30af7945e6357f999f12de2391a0f" - }, - { - "dataPath": "params_shard_75.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.13.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "0bf05591981f4752a05004e3ebc44af8" - }, - { - "dataPath": "params_shard_76.bin", - "format": "raw-shard", - "nbytes": 30949376, - "records": [ - { - "name": "model.layers.12.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 0 - }, - { - "name": "model.layers.12.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 7340032 - }, - { - "name": "model.layers.12.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 7348224 - }, - { - "name": "model.layers.12.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 19931136 - }, - { - "name": "model.layers.12.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 21504000 - }, - { - "name": "model.layers.12.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 29892608 - }, - { - "name": "model.layers.13.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 30941184 - } - ], - "md5sum": "86a8cc075f64fda11069a660fdd78c08" - }, - { - "dataPath": "params_shard_77.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.13.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "3f00a7dc4ed6e685da1d7c019051f97d" - }, - { - "dataPath": "params_shard_78.bin", - "format": "raw-shard", - "nbytes": 25174016, - "records": [ - { - "name": "model.layers.13.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 0 - }, - { - "name": "model.layers.13.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 3670016 - }, - { - "name": "model.layers.13.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.13.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 11018240 - }, - { - "name": "model.layers.13.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23601152 - } - ], - "md5sum": "25d6f8d59339c2b469c44321ff70f393" - }, - { - "dataPath": "params_shard_79.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.14.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "8ae5a58d9c577e1d40bcefb844f0972a" - }, - { - "dataPath": "params_shard_80.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.14.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "49c878b03f131355da37a3a06af8b5b2" - }, - { - "dataPath": "params_shard_81.bin", - "format": "raw-shard", - "nbytes": 33046528, - "records": [ - { - "name": "model.layers.13.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 0 - }, - { - "name": "model.layers.13.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 8388608 - }, - { - "name": "model.layers.14.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 9437184 - }, - { - "name": "model.layers.14.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 9445376 - }, - { - "name": "model.layers.14.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 13115392 - }, - { - "name": "model.layers.14.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 20455424 - }, - { - "name": "model.layers.14.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 20463616 - } - ], - "md5sum": "e390e7d75990e5adf3c9c0b420dffc35" - }, - { - "dataPath": "params_shard_82.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.15.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "c0ccc9436147f07aa7ed31dbc64e1338" - }, - { - "dataPath": "params_shard_83.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.15.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "392d613841f5ed272611028d51b55b94" - }, - { - "dataPath": "params_shard_84.bin", - "format": "raw-shard", - "nbytes": 22036480, - "records": [ - { - "name": "model.layers.14.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 0 - }, - { - "name": "model.layers.14.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 1572864 - }, - { - "name": "model.layers.14.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 9961472 - }, - { - "name": "model.layers.15.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.15.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 11018240 - }, - { - "name": "model.layers.15.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 14688256 - }, - { - "name": "model.layers.15.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 22028288 - } - ], - "md5sum": "8265513b17af532e0bc0af88b152bacb" - }, - { - "dataPath": "params_shard_85.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.16.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "11d5f508dfb4bf3bd7784b631b7aeab6" - }, - { - "dataPath": "params_shard_86.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.16.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "407d16ba839620552e10a8254a3b71ac" - }, - { - "dataPath": "params_shard_87.bin", - "format": "raw-shard", - "nbytes": 27271168, - "records": [ - { - "name": "model.layers.15.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 0 - }, - { - "name": "model.layers.15.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 12582912 - }, - { - "name": "model.layers.15.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 14155776 - }, - { - "name": "model.layers.15.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 22544384 - }, - { - "name": "model.layers.16.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 23592960 - }, - { - "name": "model.layers.16.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 23601152 - } - ], - "md5sum": "bec3204b7cb6c7dff1160630c2f85135" - }, - { - "dataPath": "params_shard_88.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.17.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "6e12dc5c1b29edea475cc2081118c9bb" - }, - { - "dataPath": "params_shard_89.bin", - "format": "raw-shard", - "nbytes": 30949376, - "records": [ - { - "name": "model.layers.16.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 0 - }, - { - "name": "model.layers.16.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 7340032 - }, - { - "name": "model.layers.16.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 7348224 - }, - { - "name": "model.layers.16.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 19931136 - }, - { - "name": "model.layers.16.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 21504000 - }, - { - "name": "model.layers.16.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 29892608 - }, - { - "name": "model.layers.17.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 30941184 - } - ], - "md5sum": "4c45ae0afb4ad84c5877f52c0d447406" - }, - { - "dataPath": "params_shard_90.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.17.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "8d5245cbf5feda469bc192c06ef68d35" - }, - { - "dataPath": "params_shard_91.bin", - "format": "raw-shard", - "nbytes": 25174016, - "records": [ - { - "name": "model.layers.17.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 0 - }, - { - "name": "model.layers.17.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 3670016 - }, - { - "name": "model.layers.17.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.17.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 11018240 - }, - { - "name": "model.layers.17.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23601152 - } - ], - "md5sum": "87c5b9cb5070c8da7b5ec94308dddf97" - }, - { - "dataPath": "params_shard_92.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.18.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "698689312dc9b2c18a1ef9a06a4f4015" - }, - { - "dataPath": "params_shard_93.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.18.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "3e70871503e5bf2b4a54f031c9a77985" - }, - { - "dataPath": "params_shard_94.bin", - "format": "raw-shard", - "nbytes": 33046528, - "records": [ - { - "name": "model.layers.17.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 0 - }, - { - "name": "model.layers.17.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 8388608 - }, - { - "name": "model.layers.18.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 9437184 - }, - { - "name": "model.layers.18.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 9445376 - }, - { - "name": "model.layers.18.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 13115392 - }, - { - "name": "model.layers.18.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 20455424 - }, - { - "name": "model.layers.18.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 20463616 - } - ], - "md5sum": "84ed5a5ac6431b54ef65fab93a933c97" - }, - { - "dataPath": "params_shard_95.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.19.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "62f8b472cb0ea4f9e5748fd5136b5945" - }, - { - "dataPath": "params_shard_96.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.19.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "3dc835a2c209decf9cb2b9290e63d28f" - }, - { - "dataPath": "params_shard_97.bin", - "format": "raw-shard", - "nbytes": 22036480, - "records": [ - { - "name": "model.layers.18.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 0 - }, - { - "name": "model.layers.18.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 1572864 - }, - { - "name": "model.layers.18.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 9961472 - }, - { - "name": "model.layers.19.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.19.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 11018240 - }, - { - "name": "model.layers.19.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 14688256 - }, - { - "name": "model.layers.19.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 22028288 - } - ], - "md5sum": "9a3d2a9eca033d720524ec63066cb491" - }, - { - "dataPath": "params_shard_98.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.20.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "0c8f0a904714bf1d51b090453444d054" - }, - { - "dataPath": "params_shard_99.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.20.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "d73acad94fd61466e3a9cee44c6c95d6" - }, - { - "dataPath": "params_shard_100.bin", - "format": "raw-shard", - "nbytes": 27271168, - "records": [ - { - "name": "model.layers.19.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 0 - }, - { - "name": "model.layers.19.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 12582912 - }, - { - "name": "model.layers.19.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 14155776 - }, - { - "name": "model.layers.19.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 22544384 - }, - { - "name": "model.layers.20.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 23592960 - }, - { - "name": "model.layers.20.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 23601152 - } - ], - "md5sum": "1f921f2b367ae3b8ac84ff3b73636531" - }, - { - "dataPath": "params_shard_101.bin", - "format": "raw-shard", - "nbytes": 29360128, - "records": [ - { - "name": "model.layers.21.mlp.down_proj.q_weight", - "shape": [ - 4096, - 1792 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 29360128, - "byteOffset": 0 - } - ], - "md5sum": "982b5a574a6a249c63922efd763855b8" - }, - { - "dataPath": "params_shard_102.bin", - "format": "raw-shard", - "nbytes": 30949376, - "records": [ - { - "name": "model.layers.20.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 0 - }, - { - "name": "model.layers.20.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 7340032 - }, - { - "name": "model.layers.20.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 7348224 - }, - { - "name": "model.layers.20.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 19931136 - }, - { - "name": "model.layers.20.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 21504000 - }, - { - "name": "model.layers.20.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 29892608 - }, - { - "name": "model.layers.21.input_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 30941184 - } - ], - "md5sum": "50e1e7efdc695f3cc5f7bf93a8822d11" - }, - { - "dataPath": "params_shard_103.bin", - "format": "raw-shard", - "nbytes": 58720256, - "records": [ - { - "name": "model.layers.21.mlp.gate_up_proj.q_weight", - "shape": [ - 28672, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 58720256, - "byteOffset": 0 - } - ], - "md5sum": "a021eaae1e88b3c3c019aed19ecb73e3" - }, - { - "dataPath": "params_shard_104.bin", - "format": "raw-shard", - "nbytes": 25174016, - "records": [ - { - "name": "model.layers.21.mlp.down_proj.q_scale", - "shape": [ - 4096, - 448 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 3670016, - "byteOffset": 0 - }, - { - "name": "model.layers.21.mlp.gate_up_proj.q_scale", - "shape": [ - 28672, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 7340032, - "byteOffset": 3670016 - }, - { - "name": "model.layers.21.post_attention_layernorm.weight", - "shape": [ - 4096 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 8192, - "byteOffset": 11010048 - }, - { - "name": "model.layers.21.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 11018240 - }, - { - "name": "model.layers.21.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 23601152 - } - ], - "md5sum": "3e1f4120d800910e090ac5ae03b8e6b8" - }, - { - "dataPath": "params_shard_105.bin", - "format": "raw-shard", - "nbytes": 33030144, - "records": [ - { - "name": "model.layers.21.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 0 - }, - { - "name": "model.layers.21.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 8388608 - }, - { - "name": "model.layers.22.self_attn.qkv_proj.q_weight", - "shape": [ - 6144, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 12582912, - "byteOffset": 9437184 - }, - { - "name": "model.layers.22.self_attn.qkv_proj.q_scale", - "shape": [ - 6144, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1572864, - "byteOffset": 22020096 - }, - { - "name": "model.layers.22.self_attn.o_proj.q_weight", - "shape": [ - 4096, - 512 - ], - "dtype": "uint32", - "format": "f32-to-bf16", - "nbytes": 8388608, - "byteOffset": 23592960 - }, - { - "name": "model.layers.22.self_attn.o_proj.q_scale", - "shape": [ - 4096, - 128 - ], - "dtype": "float16", - "format": "f32-to-bf16", - "nbytes": 1048576, - "byteOffset": 31981568 - } - ], - "md5sum": "4c4f82894275ff0b75eded23f2af4dd1" - } - ] -} \ No newline at end of file