Upload folder using huggingface_hub
Browse files- config.json +6 -6
- generation_config.json +1 -1
- model.safetensors +2 -2
- recipe.yaml +19 -36
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
@@ -25,10 +25,10 @@
|
|
25 |
"input_activations": {
|
26 |
"actorder": null,
|
27 |
"block_structure": null,
|
28 |
-
"dynamic":
|
29 |
"group_size": null,
|
30 |
"num_bits": 8,
|
31 |
-
"observer":
|
32 |
"observer_kwargs": {},
|
33 |
"strategy": "tensor",
|
34 |
"symmetric": true,
|
@@ -53,7 +53,7 @@
|
|
53 |
}
|
54 |
},
|
55 |
"format": "float-quantized",
|
56 |
-
"global_compression_ratio": 1.
|
57 |
"ignore": [
|
58 |
"lm_head"
|
59 |
],
|
@@ -62,7 +62,7 @@
|
|
62 |
"quantization_status": "compressed",
|
63 |
"sparsity_config": {
|
64 |
"format": "sparse-24",
|
65 |
-
"global_sparsity": 0.
|
66 |
"ignore": [
|
67 |
"lm_head"
|
68 |
],
|
@@ -78,7 +78,7 @@
|
|
78 |
"rope_theta": 10000.0,
|
79 |
"tie_word_embeddings": false,
|
80 |
"torch_dtype": "bfloat16",
|
81 |
-
"transformers_version": "4.
|
82 |
"use_cache": true,
|
83 |
"vocab_size": 32000
|
84 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
|
|
25 |
"input_activations": {
|
26 |
"actorder": null,
|
27 |
"block_structure": null,
|
28 |
+
"dynamic": true,
|
29 |
"group_size": null,
|
30 |
"num_bits": 8,
|
31 |
+
"observer": null,
|
32 |
"observer_kwargs": {},
|
33 |
"strategy": "tensor",
|
34 |
"symmetric": true,
|
|
|
53 |
}
|
54 |
},
|
55 |
"format": "float-quantized",
|
56 |
+
"global_compression_ratio": 1.455621255352356,
|
57 |
"ignore": [
|
58 |
"lm_head"
|
59 |
],
|
|
|
62 |
"quantization_status": "compressed",
|
63 |
"sparsity_config": {
|
64 |
"format": "sparse-24",
|
65 |
+
"global_sparsity": 0.44038256626553035,
|
66 |
"ignore": [
|
67 |
"lm_head"
|
68 |
],
|
|
|
78 |
"rope_theta": 10000.0,
|
79 |
"tie_word_embeddings": false,
|
80 |
"torch_dtype": "bfloat16",
|
81 |
+
"transformers_version": "4.46.3",
|
82 |
"use_cache": true,
|
83 |
"vocab_size": 32000
|
84 |
}
|
generation_config.json
CHANGED
@@ -3,5 +3,5 @@
|
|
3 |
"eos_token_id": 2,
|
4 |
"max_length": 2048,
|
5 |
"pad_token_id": 0,
|
6 |
-
"transformers_version": "4.
|
7 |
}
|
|
|
3 |
"eos_token_id": 2,
|
4 |
"max_length": 2048,
|
5 |
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.46.3"
|
7 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c4d8125ecb0003cab318dae2791d013757172c6fd72ecf009cd6b7df0b5a77f
|
3 |
+
size 867943076
|
recipe.yaml
CHANGED
@@ -1,38 +1,21 @@
|
|
1 |
pruning_stage:
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
quant_stage:
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
type: float
|
23 |
-
strategy: tensor
|
24 |
-
dynamic: false
|
25 |
-
symmetric: true
|
26 |
-
targets: ["Linear"]
|
27 |
-
pruning_modifiers:
|
28 |
-
ConstantPruningModifier:
|
29 |
-
targets: [
|
30 |
-
're:.*q_proj.weight',
|
31 |
-
're:.*k_proj.weight',
|
32 |
-
're:.*v_proj.weight',
|
33 |
-
're:.*o_proj.weight',
|
34 |
-
're:.*gate_proj.weight',
|
35 |
-
're:.*up_proj.weight',
|
36 |
-
're:.*down_proj.weight',
|
37 |
-
]
|
38 |
-
start: 0
|
|
|
1 |
pruning_stage:
|
2 |
+
obcq_modifiers:
|
3 |
+
SparseGPTModifier:
|
4 |
+
sparsity: 0.5
|
5 |
+
sequential_update: true
|
6 |
+
mask_structure: '2:4'
|
7 |
+
targets: ['re:model.layers.\d*$']
|
8 |
quant_stage:
|
9 |
+
quant_modifiers:
|
10 |
+
QuantizationModifier:
|
11 |
+
ignore: [lm_head]
|
12 |
+
config_groups:
|
13 |
+
group_0:
|
14 |
+
weights: {num_bits: 8, type: float, strategy: tensor, dynamic: false, symmetric: true}
|
15 |
+
input_activations: {num_bits: 8, type: float, strategy: tensor, dynamic: true, symmetric: true}
|
16 |
+
targets: [Linear]
|
17 |
+
pruning_modifiers:
|
18 |
+
ConstantPruningModifier:
|
19 |
+
targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight', 're:.*o_proj.weight',
|
20 |
+
're:.*gate_proj.weight', 're:.*up_proj.weight', 're:.*down_proj.weight']
|
21 |
+
start: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|