mgoin commited on
Commit
c0f8aad
·
1 Parent(s): a19b56d
Files changed (1) hide show
  1. app.py +7 -14
app.py CHANGED
@@ -1,10 +1,9 @@
1
  import os
2
- from typing import Optional, Tuple, List
3
  import gradio as gr
4
- import torch
5
  import spaces
6
  from dataclasses import dataclass
7
- from huggingface_hub import HfApi, Repository, CommitOperationAdd
8
  from transformers import AutoProcessor
9
  from llmcompressor.modifiers.quantization import QuantizationModifier
10
  from llmcompressor.transformers import oneshot, wrap_hf_model_class
@@ -28,19 +27,18 @@ def parse_ignore_list(ignore_str: str) -> List[str]:
28
  def create_quantized_model(
29
  model_id: str,
30
  work_dir: str,
31
- api: HfApi,
32
  ignore_list: List[str],
33
  model_class_name: str
34
  ) -> Tuple[str, List[Tuple[str, Exception]]]:
35
  """Quantize model to FP8 and save to disk"""
36
-
37
  errors = []
38
  try:
39
  # Get the appropriate model class
40
- exec(f"from transformers import {class_name}")
41
- model_class = eval(class_name)
42
  wrapped_model_class = wrap_hf_model_class(model_class)
43
-
44
  # Load model with ZeroGPU
45
  model = wrapped_model_class.from_pretrained(
46
  model_id,
@@ -162,7 +160,6 @@ processor = AutoProcessor.from_pretrained("{target_repo}")
162
  @spaces.GPU(duration=300) # 5 minutes timeout for large models
163
  def run(
164
  model_id: str,
165
- is_private: bool,
166
  token: str,
167
  ignore_str: str,
168
  model_class_name: str
@@ -195,7 +192,6 @@ def run(
195
  quantized_path, errors = create_quantized_model(
196
  model_id,
197
  work_dir,
198
- api,
199
  ignore_list,
200
  model_class_name
201
  )
@@ -279,9 +275,6 @@ with gr.Blocks(title=title) as demo:
279
  label="model_id",
280
  placeholder="huggingface/model-name"
281
  )
282
- is_private = gr.Checkbox(
283
- label="Private model (requires read access to original model)"
284
- )
285
  token = gr.Text(
286
  max_lines=1,
287
  label="your_hf_token (requires write access)",
@@ -309,7 +302,7 @@ with gr.Blocks(title=title) as demo:
309
 
310
  submit.click(
311
  run,
312
- inputs=[model_id, is_private, token, ignore_str, model_class_name],
313
  outputs=output,
314
  concurrency_limit=1
315
  )
 
1
  import os
2
+ from typing import Tuple, List
3
  import gradio as gr
 
4
  import spaces
5
  from dataclasses import dataclass
6
+ from huggingface_hub import HfApi, CommitOperationAdd
7
  from transformers import AutoProcessor
8
  from llmcompressor.modifiers.quantization import QuantizationModifier
9
  from llmcompressor.transformers import oneshot, wrap_hf_model_class
 
27
  def create_quantized_model(
28
  model_id: str,
29
  work_dir: str,
 
30
  ignore_list: List[str],
31
  model_class_name: str
32
  ) -> Tuple[str, List[Tuple[str, Exception]]]:
33
  """Quantize model to FP8 and save to disk"""
34
+
35
  errors = []
36
  try:
37
  # Get the appropriate model class
38
+ exec(f"from transformers import {model_class_name}")
39
+ model_class = eval(model_class_name)
40
  wrapped_model_class = wrap_hf_model_class(model_class)
41
+
42
  # Load model with ZeroGPU
43
  model = wrapped_model_class.from_pretrained(
44
  model_id,
 
160
  @spaces.GPU(duration=300) # 5 minutes timeout for large models
161
  def run(
162
  model_id: str,
 
163
  token: str,
164
  ignore_str: str,
165
  model_class_name: str
 
192
  quantized_path, errors = create_quantized_model(
193
  model_id,
194
  work_dir,
 
195
  ignore_list,
196
  model_class_name
197
  )
 
275
  label="model_id",
276
  placeholder="huggingface/model-name"
277
  )
 
 
 
278
  token = gr.Text(
279
  max_lines=1,
280
  label="your_hf_token (requires write access)",
 
302
 
303
  submit.click(
304
  run,
305
+ inputs=[model_id, token, ignore_str, model_class_name],
306
  outputs=output,
307
  concurrency_limit=1
308
  )