Spaces:

forestav
/

radiography_helper

Runtime error

App Files Files Community

Filip commited on Dec 6, 2024

Commit

57a1258

1 Parent(s): 8ed8457

change to 8 bit

Browse files

Files changed (2) hide show

app.py +58 -37
requirements.txt +5 -5

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, TextStreamer
 import torch
 # Configure torch to use CPU
 device = "cpu"
@@ -10,10 +11,11 @@ torch.set_default_device(device)
 def load_model():
     model_name = "forestav/unsloth_vision_radiography_finetune"
-    # Load with CPU optimization settings
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",
         torch_dtype=torch.float16,
         low_cpu_mem_usage=True
     )
@@ -23,45 +25,64 @@ def load_model():
 # Initialize model and tokenizer globally
 print("Loading model...")
-model, tokenizer, processor = load_model()
-print("Model loaded!")
 def analyze_image(image, instruction):
-    if instruction.strip() == "":
-        instruction = "You are an expert radiographer. Describe accurately what you see in this image."
-    # Prepare the messages
-    messages = [
-        {"role": "user", "content": [
-            {"type": "image"},
-            {"type": "text", "text": instruction}
-        ]}
-    ]
-    # Process the image and text
-    inputs = processor(
-        images=image,
-        text=tokenizer.apply_chat_template(messages, add_generation_prompt=True),
-        return_tensors="pt"
-    )
-    # Generate the response
-    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
-    # Generate with lower resource settings
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=128,
-            temperature=1.2,
-            min_p=0.1,
-            use_cache=True,
-            streamer=text_streamer
         )
-    # Decode the response
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
 # Create the Gradio interface
 with gr.Blocks() as demo:

 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, TextStreamer
 import torch
+import gc
 # Configure torch to use CPU
 device = "cpu"
 def load_model():
     model_name = "forestav/unsloth_vision_radiography_finetune"
+    # Load with 8-bit quantization and CPU optimization settings
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",
+        load_in_8bit=True,
         torch_dtype=torch.float16,
         low_cpu_mem_usage=True
     )
 # Initialize model and tokenizer globally
 print("Loading model...")
+try:
+    model, tokenizer, processor = load_model()
+    print("Model loaded successfully!")
+except Exception as e:
+    print(f"Error loading model: {str(e)}")
+    raise
 def analyze_image(image, instruction):
+    try:
+        # Clear CUDA cache and garbage collect
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        gc.collect()
+        if instruction.strip() == "":
+            instruction = "You are an expert radiographer. Describe accurately what you see in this image."
+        # Prepare the messages
+        messages = [
+            {"role": "user", "content": [
+                {"type": "image"},
+                {"type": "text", "text": instruction}
+            ]}
+        ]
+        # Process the image and text
+        inputs = processor(
+            images=image,
+            text=tokenizer.apply_chat_template(messages, add_generation_prompt=True),
+            return_tensors="pt"
         )
+        # Generate the response
+        text_streamer = TextStreamer(tokenizer, skip_prompt=True)
+        # Generate with lower resource settings
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=128,
+                temperature=1.2,
+                min_p=0.1,
+                use_cache=True,
+                streamer=text_streamer
+            )
+        # Decode the response
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Clear memory
+        del outputs
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return response
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
 # Create the Gradio interface
 with gr.Blocks() as demo:

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-transformers>=4.31.0
-torch>=2.0.0
-gradio>=3.34.0
-accelerate>=0.26.0
-pillow

+gradio
+torch
+transformers
+bitsandbytes
+accelerate