RelaxingSnorlax commited on
Commit
4f324b6
·
verified ·
1 Parent(s): de00f24

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. config.json +6 -6
  2. generation_config.json +1 -1
  3. model.safetensors +2 -2
  4. recipe.yaml +19 -36
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/home/rahul/.cache/huggingface/hub/models--nm-testing--TinyLlama-1.1B-Chat-v1.0-pruned_50.2of4-uncompressed/snapshots/e2ce63869dc88391472e4a841e8a2286e7281a56",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -25,10 +25,10 @@
25
  "input_activations": {
26
  "actorder": null,
27
  "block_structure": null,
28
- "dynamic": false,
29
  "group_size": null,
30
  "num_bits": 8,
31
- "observer": "minmax",
32
  "observer_kwargs": {},
33
  "strategy": "tensor",
34
  "symmetric": true,
@@ -53,7 +53,7 @@
53
  }
54
  },
55
  "format": "float-quantized",
56
- "global_compression_ratio": 1.4556212089295866,
57
  "ignore": [
58
  "lm_head"
59
  ],
@@ -62,7 +62,7 @@
62
  "quantization_status": "compressed",
63
  "sparsity_config": {
64
  "format": "sparse-24",
65
- "global_sparsity": 0.44038258295766824,
66
  "ignore": [
67
  "lm_head"
68
  ],
@@ -78,7 +78,7 @@
78
  "rope_theta": 10000.0,
79
  "tie_word_embeddings": false,
80
  "torch_dtype": "bfloat16",
81
- "transformers_version": "4.45.2",
82
  "use_cache": true,
83
  "vocab_size": 32000
84
  }
 
1
  {
2
+ "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
25
  "input_activations": {
26
  "actorder": null,
27
  "block_structure": null,
28
+ "dynamic": true,
29
  "group_size": null,
30
  "num_bits": 8,
31
+ "observer": null,
32
  "observer_kwargs": {},
33
  "strategy": "tensor",
34
  "symmetric": true,
 
53
  }
54
  },
55
  "format": "float-quantized",
56
+ "global_compression_ratio": 1.455621255352356,
57
  "ignore": [
58
  "lm_head"
59
  ],
 
62
  "quantization_status": "compressed",
63
  "sparsity_config": {
64
  "format": "sparse-24",
65
+ "global_sparsity": 0.44038256626553035,
66
  "ignore": [
67
  "lm_head"
68
  ],
 
78
  "rope_theta": 10000.0,
79
  "tie_word_embeddings": false,
80
  "torch_dtype": "bfloat16",
81
+ "transformers_version": "4.46.3",
82
  "use_cache": true,
83
  "vocab_size": 32000
84
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "eos_token_id": 2,
4
  "max_length": 2048,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.45.2"
7
  }
 
3
  "eos_token_id": 2,
4
  "max_length": 2048,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.46.3"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed252107fdb6095ebff5add58793b73f12e2e79640b3f127846432d1e4816fb9
3
- size 867956784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c4d8125ecb0003cab318dae2791d013757172c6fd72ecf009cd6b7df0b5a77f
3
+ size 867943076
recipe.yaml CHANGED
@@ -1,38 +1,21 @@
1
  pruning_stage:
2
- obcq_modifiers:
3
- SparseGPTModifier:
4
- sparsity: 0.5
5
- sequential_update: true
6
- mask_structure: "2:4"
7
- targets: ['re:model.layers.\d*$']
8
  quant_stage:
9
- quant_modifiers:
10
- QuantizationModifier:
11
- ignore: ["lm_head"]
12
- config_groups:
13
- group_0:
14
- weights:
15
- num_bits: 8
16
- type: float
17
- strategy: tensor
18
- dynamic: false
19
- symmetric: true
20
- input_activations:
21
- num_bits: 8
22
- type: float
23
- strategy: tensor
24
- dynamic: false
25
- symmetric: true
26
- targets: ["Linear"]
27
- pruning_modifiers:
28
- ConstantPruningModifier:
29
- targets: [
30
- 're:.*q_proj.weight',
31
- 're:.*k_proj.weight',
32
- 're:.*v_proj.weight',
33
- 're:.*o_proj.weight',
34
- 're:.*gate_proj.weight',
35
- 're:.*up_proj.weight',
36
- 're:.*down_proj.weight',
37
- ]
38
- start: 0
 
1
  pruning_stage:
2
+ obcq_modifiers:
3
+ SparseGPTModifier:
4
+ sparsity: 0.5
5
+ sequential_update: true
6
+ mask_structure: '2:4'
7
+ targets: ['re:model.layers.\d*$']
8
  quant_stage:
9
+ quant_modifiers:
10
+ QuantizationModifier:
11
+ ignore: [lm_head]
12
+ config_groups:
13
+ group_0:
14
+ weights: {num_bits: 8, type: float, strategy: tensor, dynamic: false, symmetric: true}
15
+ input_activations: {num_bits: 8, type: float, strategy: tensor, dynamic: true, symmetric: true}
16
+ targets: [Linear]
17
+ pruning_modifiers:
18
+ ConstantPruningModifier:
19
+ targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight', 're:.*o_proj.weight',
20
+ 're:.*gate_proj.weight', 're:.*up_proj.weight', 're:.*down_proj.weight']
21
+ start: 0