Daemontatox commited on
Commit
ebc31d1
·
verified ·
1 Parent(s): 5328f67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -73,11 +73,11 @@ h3 {
73
  def initialize_model():
74
  """Initialize the model with appropriate configurations"""
75
  quantization_config = BitsAndBytesConfig(
76
- load_in_8bit=True,
77
- bnb_8bit_compute_dtype=torch.bfloat16,
78
- bnb_8bit_quant_type="nf4",
79
- bnb_8bit_use_double_quant=True,
80
- llm_int8_enable_fp32_cpu_offload=True
81
  )
82
 
83
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
@@ -90,7 +90,7 @@ def initialize_model():
90
  device_map="cuda",
91
  # attn_implementation="flash_attention_2",
92
  trust_remote_code=True,
93
- #quantization_config=quantization_config
94
 
95
  )
96
 
 
73
  def initialize_model():
74
  """Initialize the model with appropriate configurations"""
75
  quantization_config = BitsAndBytesConfig(
76
+ load_in_4bit=True,
77
+ bnb_4bit_compute_dtype=torch.bfloat16,
78
+ bnb_4bit_quant_type="nf4",
79
+ bnb_4bit_use_double_quant=True,
80
+ #llm_int8_enable_fp32_cpu_offload=True
81
  )
82
 
83
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
 
90
  device_map="cuda",
91
  # attn_implementation="flash_attention_2",
92
  trust_remote_code=True,
93
+ quantization_config=quantization_config
94
 
95
  )
96