Spaces:

forestav
/

radiography_helper

Runtime error

App Files Files Community

Filip commited on Dec 6, 2024

Commit

ed56d3f

1 Parent(s): fe01251

update

Browse files

Files changed (2) hide show

app.py +64 -33
requirements.txt +4 -3

app.py CHANGED Viewed

@@ -3,70 +3,100 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, Tex
 import torch
 import gc
 import os
 # Enable better CPU performance
 torch.set_num_threads(4)
 device = "cpu"
 def load_model():
     model_name = "forestav/unsloth_vision_radiography_finetune"
-    base_model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"  # Correct base model
     print("Loading tokenizer and processor...")
-    # Load tokenizer from base model
     tokenizer = AutoTokenizer.from_pretrained(
         base_model_name,
-        trust_remote_code=True
     )
-    # Load processor from base model
     processor = AutoProcessor.from_pretrained(
         base_model_name,
-        trust_remote_code=True
     )
-    print("Loading model...")
-    # Load model with CPU optimizations
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        device_map="cpu",
-        torch_dtype=torch.float32,
-        low_cpu_mem_usage=True,
-        offload_folder="offload",
-        offload_state_dict=True,
-        trust_remote_code=True
-    )
-    print("Quantizing model...")
     model = torch.quantization.quantize_dynamic(
         model,
-        {torch.nn.Linear},
         dtype=torch.qint8
     )
     return model, tokenizer, processor
-# Create offload directory if it doesn't exist
 os.makedirs("offload", exist_ok=True)
 # Initialize model and tokenizer globally
 print("Starting model initialization...")
 try:
     model, tokenizer, processor = load_model()
-    print("Model loaded and quantized successfully!")
 except Exception as e:
     print(f"Error loading model: {str(e)}")
     raise
 def analyze_image(image, instruction):
     try:
-        # Clear memory
         gc.collect()
         if instruction.strip() == "":
             instruction = "You are an expert radiographer. Describe accurately what you see in this image."
-        # Prepare the messages
         messages = [
             {"role": "user", "content": [
                 {"type": "image"},
@@ -74,14 +104,17 @@ def analyze_image(image, instruction):
             ]}
         ]
-        # Process the image and text
         inputs = processor(
             images=image,
             text=tokenizer.apply_chat_template(messages, add_generation_prompt=True),
             return_tensors="pt"
         )
-        # Generate with conservative settings for CPU
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
@@ -90,14 +123,13 @@ def analyze_image(image, instruction):
                 min_p=0.1,
                 use_cache=True,
                 pad_token_id=tokenizer.eos_token_id,
-                num_beams=1
             )
-        # Decode the response
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Clean up
-        del outputs
         gc.collect()
         return response
@@ -116,7 +148,7 @@ with gr.Blocks() as demo:
             image_input = gr.Image(
                 type="pil",
                 label="Upload Medical Image",
-                max_pixels=1500000  # Limit image size
             )
             instruction_input = gr.Textbox(
                 label="Custom Instruction (optional)",
@@ -128,7 +160,6 @@ with gr.Blocks() as demo:
         with gr.Column():
             output_text = gr.Textbox(label="Analysis Result", lines=10)
-    # Handle the submission
     submit_btn.click(
         fn=analyze_image,
         inputs=[image_input, instruction_input],
@@ -137,11 +168,11 @@ with gr.Blocks() as demo:
     gr.Markdown("""
     ### Notes:
-    - The model runs on CPU and may take several moments to process each image
-    - For best results, upload images smaller than 1.5MP
     - Please be patient during processing
     """)
-# Launch the app
 if __name__ == "__main__":
     demo.launch()

 import torch
 import gc
 import os
+from accelerate import init_empty_weights
+from accelerate.utils import load_checkpoint_in_model
+import psutil
 # Enable better CPU performance
 torch.set_num_threads(4)
 device = "cpu"
+def get_free_memory():
+    """Get available memory in GB"""
+    return psutil.virtual_memory().available / (1024 * 1024 * 1024)
+def load_model_in_chunks(model_path, chunk_size_gb=2):
+    """Load model in chunks to manage memory"""
+    config = AutoModelForCausalLM.from_pretrained(model_path, return_dict=False).config
+    with init_empty_weights():
+        empty_model = AutoModelForCausalLM.from_config(config)
+    # Get checkpoint files
+    index_path = os.path.join(model_path, "model.safetensors.index.json")
+    if os.path.exists(index_path):
+        checkpoint_files = [
+            os.path.join(model_path, f"model-{i:05d}-of-{5:05d}.safetensors")
+            for i in range(1, 6)
+        ]
+    else:
+        checkpoint_files = [os.path.join(model_path, "pytorch_model.bin")]
+    # Load each chunk
+    for checkpoint in checkpoint_files:
+        if get_free_memory() < 2:  # If less than 2GB free
+            gc.collect()
+            torch.cuda.empty_cache()
+        load_checkpoint_in_model(empty_model, checkpoint)
+        gc.collect()
+    return empty_model
 def load_model():
     model_name = "forestav/unsloth_vision_radiography_finetune"
+    base_model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"
     print("Loading tokenizer and processor...")
     tokenizer = AutoTokenizer.from_pretrained(
         base_model_name,
+        trust_remote_code=True,
+        cache_dir="model_cache"
     )
     processor = AutoProcessor.from_pretrained(
         base_model_name,
+        trust_remote_code=True,
+        cache_dir="model_cache"
     )
+    print("Loading model in chunks...")
+    model = load_model_in_chunks(model_name)
+    print("Optimizing model for CPU...")
+    # Convert to float32 and quantize
+    model = model.to(torch.float32)
     model = torch.quantization.quantize_dynamic(
         model,
+        {torch.nn.Linear, torch.nn.Conv2d},
         dtype=torch.qint8
     )
     return model, tokenizer, processor
+# Create cache directories
+os.makedirs("model_cache", exist_ok=True)
 os.makedirs("offload", exist_ok=True)
+print(f"Available memory before loading: {get_free_memory():.2f} GB")
 # Initialize model and tokenizer globally
 print("Starting model initialization...")
 try:
     model, tokenizer, processor = load_model()
+    print("Model loaded successfully!")
+    print(f"Available memory after loading: {get_free_memory():.2f} GB")
 except Exception as e:
     print(f"Error loading model: {str(e)}")
     raise
 def analyze_image(image, instruction):
     try:
         gc.collect()
         if instruction.strip() == "":
             instruction = "You are an expert radiographer. Describe accurately what you see in this image."
         messages = [
             {"role": "user", "content": [
                 {"type": "image"},
             ]}
         ]
+        # Process with memory checks
+        if get_free_memory() < 2:
+            gc.collect()
         inputs = processor(
             images=image,
             text=tokenizer.apply_chat_template(messages, add_generation_prompt=True),
             return_tensors="pt"
         )
+        # Generate with minimal memory usage
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 min_p=0.1,
                 use_cache=True,
                 pad_token_id=tokenizer.eos_token_id,
+                num_beams=1,
+                do_sample=False  # Disable sampling to save memory
             )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        del outputs, inputs
         gc.collect()
         return response
             image_input = gr.Image(
                 type="pil",
                 label="Upload Medical Image",
+                max_pixels=1000000  # Reduced max image size
             )
             instruction_input = gr.Textbox(
                 label="Custom Instruction (optional)",
         with gr.Column():
             output_text = gr.Textbox(label="Analysis Result", lines=10)
     submit_btn.click(
         fn=analyze_image,
         inputs=[image_input, instruction_input],
     gr.Markdown("""
     ### Notes:
+    - The model runs on CPU and may take several minutes to process each image
+    - For best results, upload images smaller than 1MP
+    - Initial loading may take some time
     - Please be patient during processing
     """)
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 gradio
 torch
-transformers
-bitsandbytes
-accelerate

 gradio
 torch
+transformers>=4.36.0
+accelerate
+psutil
+safetensors