Spaces:

forestav
/

radiography_helper

Runtime error

Filip

update

ed56d3f 2 months ago

5.58 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, TextStreamer
	import torch
	import gc
	import os
	from accelerate import init_empty_weights
	from accelerate.utils import load_checkpoint_in_model
	import psutil

	# Enable better CPU performance
	torch.set_num_threads(4)
	device = "cpu"

	def get_free_memory():
	"""Get available memory in GB"""
	return psutil.virtual_memory().available / (1024 * 1024 * 1024)

	def load_model_in_chunks(model_path, chunk_size_gb=2):
	"""Load model in chunks to manage memory"""
	config = AutoModelForCausalLM.from_pretrained(model_path, return_dict=False).config

	with init_empty_weights():
	empty_model = AutoModelForCausalLM.from_config(config)

	# Get checkpoint files
	index_path = os.path.join(model_path, "model.safetensors.index.json")
	if os.path.exists(index_path):
	checkpoint_files = [
	os.path.join(model_path, f"model-{i:05d}-of-{5:05d}.safetensors")
	for i in range(1, 6)
	]
	else:
	checkpoint_files = [os.path.join(model_path, "pytorch_model.bin")]

	# Load each chunk
	for checkpoint in checkpoint_files:
	if get_free_memory() < 2: # If less than 2GB free
	gc.collect()
	torch.cuda.empty_cache()

	load_checkpoint_in_model(empty_model, checkpoint)
	gc.collect()

	return empty_model

	def load_model():
	model_name = "forestav/unsloth_vision_radiography_finetune"
	base_model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"

	print("Loading tokenizer and processor...")
	tokenizer = AutoTokenizer.from_pretrained(
	base_model_name,
	trust_remote_code=True,
	cache_dir="model_cache"
	)

	processor = AutoProcessor.from_pretrained(
	base_model_name,
	trust_remote_code=True,
	cache_dir="model_cache"
	)

	print("Loading model in chunks...")
	model = load_model_in_chunks(model_name)

	print("Optimizing model for CPU...")
	# Convert to float32 and quantize
	model = model.to(torch.float32)
	model = torch.quantization.quantize_dynamic(
	model,
	{torch.nn.Linear, torch.nn.Conv2d},
	dtype=torch.qint8
	)

	return model, tokenizer, processor

	# Create cache directories
	os.makedirs("model_cache", exist_ok=True)
	os.makedirs("offload", exist_ok=True)

	print(f"Available memory before loading: {get_free_memory():.2f} GB")

	# Initialize model and tokenizer globally
	print("Starting model initialization...")
	try:
	model, tokenizer, processor = load_model()
	print("Model loaded successfully!")
	print(f"Available memory after loading: {get_free_memory():.2f} GB")
	except Exception as e:
	print(f"Error loading model: {str(e)}")
	raise

	def analyze_image(image, instruction):
	try:
	gc.collect()

	if instruction.strip() == "":
	instruction = "You are an expert radiographer. Describe accurately what you see in this image."

	messages = [
	{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": instruction}
	]}
	]

	# Process with memory checks
	if get_free_memory() < 2:
	gc.collect()

	inputs = processor(
	images=image,
	text=tokenizer.apply_chat_template(messages, add_generation_prompt=True),
	return_tensors="pt"
	)

	# Generate with minimal memory usage
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=128,
	temperature=1.0,
	min_p=0.1,
	use_cache=True,
	pad_token_id=tokenizer.eos_token_id,
	num_beams=1,
	do_sample=False # Disable sampling to save memory
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	del outputs, inputs
	gc.collect()

	return response
	except Exception as e:
	return f"Error processing image: {str(e)}\nPlease try again with a smaller image or different settings."

	# Create the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("""
	# Medical Image Analysis Assistant
	Upload a medical image and receive a professional description from an AI radiographer.
	""")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(
	type="pil",
	label="Upload Medical Image",
	max_pixels=1000000 # Reduced max image size
	)
	instruction_input = gr.Textbox(
	label="Custom Instruction (optional)",
	placeholder="You are an expert radiographer. Describe accurately what you see in this image.",
	lines=2
	)
	submit_btn = gr.Button("Analyze Image")

	with gr.Column():
	output_text = gr.Textbox(label="Analysis Result", lines=10)

	submit_btn.click(
	fn=analyze_image,
	inputs=[image_input, instruction_input],
	outputs=output_text
	)

	gr.Markdown("""
	### Notes:
	- The model runs on CPU and may take several minutes to process each image
	- For best results, upload images smaller than 1MP
	- Initial loading may take some time
	- Please be patient during processing
	""")

	if __name__ == "__main__":
	demo.launch()