Spaces:

forestav
/

radiography_helper

Runtime error

App Files Files Community

Filip commited on Dec 6, 2024

Commit

56d8f41

1 Parent(s): 57a1258

update torch

Browse files

Files changed (1) hide show

app.py +43 -28

app.py CHANGED Viewed

@@ -2,41 +2,56 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, TextStreamer
 import torch
 import gc
-# Configure torch to use CPU
 device = "cpu"
-torch.set_default_device(device)
-# Load model and tokenizer
 def load_model():
     model_name = "forestav/unsloth_vision_radiography_finetune"
-    # Load with 8-bit quantization and CPU optimization settings
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",
-        load_in_8bit=True,
-        torch_dtype=torch.float16,
-        low_cpu_mem_usage=True
     )
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    processor = AutoProcessor.from_pretrained(model_name)
     return model, tokenizer, processor
 # Initialize model and tokenizer globally
-print("Loading model...")
 try:
     model, tokenizer, processor = load_model()
-    print("Model loaded successfully!")
 except Exception as e:
     print(f"Error loading model: {str(e)}")
     raise
 def analyze_image(image, instruction):
     try:
-        # Clear CUDA cache and garbage collect
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         gc.collect()
         if instruction.strip() == "":
@@ -57,32 +72,28 @@ def analyze_image(image, instruction):
             return_tensors="pt"
         )
-        # Generate the response
-        text_streamer = TextStreamer(tokenizer, skip_prompt=True)
-        # Generate with lower resource settings
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=128,
-                temperature=1.2,
                 min_p=0.1,
                 use_cache=True,
-                streamer=text_streamer
             )
         # Decode the response
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Clear memory
         del outputs
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         return response
     except Exception as e:
-        return f"Error processing image: {str(e)}"
 # Create the Gradio interface
 with gr.Blocks() as demo:
@@ -93,7 +104,11 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            image_input = gr.Image(type="pil", label="Upload Medical Image")
             instruction_input = gr.Textbox(
                 label="Custom Instruction (optional)",
                 placeholder="You are an expert radiographer. Describe accurately what you see in this image.",
@@ -113,9 +128,9 @@ with gr.Blocks() as demo:
     gr.Markdown("""
     ### Notes:
-    - The model runs on CPU and may take a few moments to process each image
-    - For best results, upload clear, high-quality medical images
-    - Default instruction will be used if none is provided
     """)
 # Launch the app

 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, TextStreamer
 import torch
 import gc
+import os
+# Enable better CPU performance
+torch.set_num_threads(4)  # Adjust based on available CPU cores
 device = "cpu"
 def load_model():
     model_name = "forestav/unsloth_vision_radiography_finetune"
+    # Load tokenizer and processor first to free up memory
+    print("Loading tokenizer and processor...")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    processor = AutoProcessor.from_pretrained(model_name)
+    print("Loading model...")
+    # Load model with CPU optimizations
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",
+        torch_dtype=torch.float32,  # Use float32 for CPU
+        low_cpu_mem_usage=True,
+        offload_folder="offload",  # Enable disk offloading
+        offload_state_dict=True    # Offload state dict to disk
     )
+    # Quantize the model for CPU
+    print("Quantizing model...")
+    model = torch.quantization.quantize_dynamic(
+        model,
+        {torch.nn.Linear},  # Quantize linear layers
+        dtype=torch.qint8
+    )
     return model, tokenizer, processor
+# Create offload directory if it doesn't exist
+os.makedirs("offload", exist_ok=True)
 # Initialize model and tokenizer globally
+print("Starting model initialization...")
 try:
     model, tokenizer, processor = load_model()
+    print("Model loaded and quantized successfully!")
 except Exception as e:
     print(f"Error loading model: {str(e)}")
     raise
 def analyze_image(image, instruction):
     try:
+        # Clear memory
         gc.collect()
         if instruction.strip() == "":
             return_tensors="pt"
         )
+        # Generate with conservative settings for CPU
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=128,
+                temperature=1.0,
                 min_p=0.1,
                 use_cache=True,
+                pad_token_id=tokenizer.eos_token_id,
+                num_beams=1  # Reduce beam search to save memory
             )
         # Decode the response
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Clean up
         del outputs
         gc.collect()
         return response
     except Exception as e:
+        return f"Error processing image: {str(e)}\nPlease try again with a smaller image or different settings."
 # Create the Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
+            image_input = gr.Image(
+                type="pil",
+                label="Upload Medical Image",
+                max_pixels=1500000  # Limit image size
+            )
             instruction_input = gr.Textbox(
                 label="Custom Instruction (optional)",
                 placeholder="You are an expert radiographer. Describe accurately what you see in this image.",
     gr.Markdown("""
     ### Notes:
+    - The model runs on CPU and may take several moments to process each image
+    - For best results, upload images smaller than 1.5MP
+    - Please be patient during processing
     """)
 # Launch the app