Upload folder using huggingface_hub

Files changed (4) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/home/rahul/.cache/huggingface/hub/models--nm-testing--TinyLlama-1.1B-Chat-v1.0-pruned_50.2of4-uncompressed/snapshots/e2ce63869dc88391472e4a841e8a2286e7281a56",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -25,10 +25,10 @@
         "input_activations": {
           "actorder": null,
           "block_structure": null,
-          "dynamic": false,
           "group_size": null,
           "num_bits": 8,
-          "observer": "minmax",
           "observer_kwargs": {},
           "strategy": "tensor",
           "symmetric": true,
@@ -53,7 +53,7 @@
       }
     },
     "format": "float-quantized",
-    "global_compression_ratio": 1.4556212089295866,
     "ignore": [
       "lm_head"
     ],
@@ -62,7 +62,7 @@
     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "sparse-24",
-      "global_sparsity": 0.44038258295766824,
       "ignore": [
         "lm_head"
       ],
@@ -78,7 +78,7 @@
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.45.2",
   "use_cache": true,
   "vocab_size": 32000
 }

 {
+  "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "architectures": [
     "LlamaForCausalLM"
   ],
         "input_activations": {
           "actorder": null,
           "block_structure": null,
+          "dynamic": true,
           "group_size": null,
           "num_bits": 8,
+          "observer": null,
           "observer_kwargs": {},
           "strategy": "tensor",
           "symmetric": true,
       }
     },
     "format": "float-quantized",
+    "global_compression_ratio": 1.455621255352356,
     "ignore": [
       "lm_head"
     ],
     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "sparse-24",
+      "global_sparsity": 0.44038256626553035,
       "ignore": [
         "lm_head"
       ],
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.46.3",
   "use_cache": true,
   "vocab_size": 32000
 }

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
-  "transformers_version": "4.45.2"
 }

   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
+  "transformers_version": "4.46.3"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed252107fdb6095ebff5add58793b73f12e2e79640b3f127846432d1e4816fb9
-size 867956784

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c4d8125ecb0003cab318dae2791d013757172c6fd72ecf009cd6b7df0b5a77f
+size 867943076

recipe.yaml CHANGED Viewed

@@ -1,38 +1,21 @@
 pruning_stage:
-    obcq_modifiers:
-        SparseGPTModifier:
-            sparsity: 0.5
-            sequential_update: true
-            mask_structure: "2:4"
-            targets: ['re:model.layers.\d*$']
 quant_stage:
-    quant_modifiers:
-        QuantizationModifier:
-            ignore: ["lm_head"]
-            config_groups:
-                group_0:
-                    weights:
-                        num_bits: 8
-                        type: float
-                        strategy: tensor
-                        dynamic: false
-                        symmetric: true
-                    input_activations:
-                        num_bits: 8
-                        type: float
-                        strategy: tensor
-                        dynamic: false
-                        symmetric: true
-                    targets: ["Linear"]
-    pruning_modifiers:
-        ConstantPruningModifier:
-            targets: [
-                're:.*q_proj.weight',
-                're:.*k_proj.weight',
-                're:.*v_proj.weight',
-                're:.*o_proj.weight',
-                're:.*gate_proj.weight',
-                're:.*up_proj.weight',
-                're:.*down_proj.weight',
-            ]
-            start: 0

 pruning_stage:
+  obcq_modifiers:
+    SparseGPTModifier:
+      sparsity: 0.5
+      sequential_update: true
+      mask_structure: '2:4'
+      targets: ['re:model.layers.\d*$']
 quant_stage:
+  quant_modifiers:
+    QuantizationModifier:
+      ignore: [lm_head]
+      config_groups:
+        group_0:
+          weights: {num_bits: 8, type: float, strategy: tensor, dynamic: false, symmetric: true}
+          input_activations: {num_bits: 8, type: float, strategy: tensor, dynamic: true, symmetric: true}
+          targets: [Linear]
+  pruning_modifiers:
+    ConstantPruningModifier:
+      targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight', 're:.*o_proj.weight',
+        're:.*gate_proj.weight', 're:.*up_proj.weight', 're:.*down_proj.weight']
+      start: 0