Spaces:

mgoin
/

convert-fp8

Sleeping

App Files Files Community

mgoin commited on Nov 13, 2024

Commit

c0f8aad

1 Parent(s): a19b56d

Format

Browse files

Files changed (1) hide show

app.py +7 -14

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import os
-from typing import Optional, Tuple, List
 import gradio as gr
-import torch
 import spaces
 from dataclasses import dataclass
-from huggingface_hub import HfApi, Repository, CommitOperationAdd
 from transformers import AutoProcessor
 from llmcompressor.modifiers.quantization import QuantizationModifier
 from llmcompressor.transformers import oneshot, wrap_hf_model_class
@@ -28,19 +27,18 @@ def parse_ignore_list(ignore_str: str) -> List[str]:
 def create_quantized_model(
     model_id: str,
     work_dir: str,
-    api: HfApi,
     ignore_list: List[str],
     model_class_name: str
 ) -> Tuple[str, List[Tuple[str, Exception]]]:
     """Quantize model to FP8 and save to disk"""
     errors = []
     try:
         # Get the appropriate model class
-        exec(f"from transformers import {class_name}")
-        model_class = eval(class_name)
         wrapped_model_class = wrap_hf_model_class(model_class)
         # Load model with ZeroGPU
         model = wrapped_model_class.from_pretrained(
             model_id,
@@ -162,7 +160,6 @@ processor = AutoProcessor.from_pretrained("{target_repo}")
 @spaces.GPU(duration=300)  # 5 minutes timeout for large models
 def run(
     model_id: str,
-    is_private: bool,
     token: str,
     ignore_str: str,
     model_class_name: str
@@ -195,7 +192,6 @@ def run(
         quantized_path, errors = create_quantized_model(
             model_id,
             work_dir,
-            api,
             ignore_list,
             model_class_name
         )
@@ -279,9 +275,6 @@ with gr.Blocks(title=title) as demo:
                 label="model_id",
                 placeholder="huggingface/model-name"
             )
-            is_private = gr.Checkbox(
-                label="Private model (requires read access to original model)"
-            )
             token = gr.Text(
                 max_lines=1,
                 label="your_hf_token (requires write access)",
@@ -309,7 +302,7 @@ with gr.Blocks(title=title) as demo:
     submit.click(
         run,
-        inputs=[model_id, is_private, token, ignore_str, model_class_name],
         outputs=output,
         concurrency_limit=1
     )

 import os
+from typing import Tuple, List
 import gradio as gr
 import spaces
 from dataclasses import dataclass
+from huggingface_hub import HfApi, CommitOperationAdd
 from transformers import AutoProcessor
 from llmcompressor.modifiers.quantization import QuantizationModifier
 from llmcompressor.transformers import oneshot, wrap_hf_model_class
 def create_quantized_model(
     model_id: str,
     work_dir: str,
     ignore_list: List[str],
     model_class_name: str
 ) -> Tuple[str, List[Tuple[str, Exception]]]:
     """Quantize model to FP8 and save to disk"""
     errors = []
     try:
         # Get the appropriate model class
+        exec(f"from transformers import {model_class_name}")
+        model_class = eval(model_class_name)
         wrapped_model_class = wrap_hf_model_class(model_class)
         # Load model with ZeroGPU
         model = wrapped_model_class.from_pretrained(
             model_id,
 @spaces.GPU(duration=300)  # 5 minutes timeout for large models
 def run(
     model_id: str,
     token: str,
     ignore_str: str,
     model_class_name: str
         quantized_path, errors = create_quantized_model(
             model_id,
             work_dir,
             ignore_list,
             model_class_name
         )
                 label="model_id",
                 placeholder="huggingface/model-name"
             )
             token = gr.Text(
                 max_lines=1,
                 label="your_hf_token (requires write access)",
     submit.click(
         run,
+        inputs=[model_id, token, ignore_str, model_class_name],
         outputs=output,
         concurrency_limit=1
     )