Filip commited on
Commit
57a1258
·
1 Parent(s): 8ed8457

change to 8 bit

Browse files
Files changed (2) hide show
  1. app.py +58 -37
  2. requirements.txt +5 -5
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, TextStreamer
3
  import torch
 
4
 
5
  # Configure torch to use CPU
6
  device = "cpu"
@@ -10,10 +11,11 @@ torch.set_default_device(device)
10
  def load_model():
11
  model_name = "forestav/unsloth_vision_radiography_finetune"
12
 
13
- # Load with CPU optimization settings
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_name,
16
  device_map="cpu",
 
17
  torch_dtype=torch.float16,
18
  low_cpu_mem_usage=True
19
  )
@@ -23,45 +25,64 @@ def load_model():
23
 
24
  # Initialize model and tokenizer globally
25
  print("Loading model...")
26
- model, tokenizer, processor = load_model()
27
- print("Model loaded!")
 
 
 
 
28
 
29
  def analyze_image(image, instruction):
30
- if instruction.strip() == "":
31
- instruction = "You are an expert radiographer. Describe accurately what you see in this image."
32
-
33
- # Prepare the messages
34
- messages = [
35
- {"role": "user", "content": [
36
- {"type": "image"},
37
- {"type": "text", "text": instruction}
38
- ]}
39
- ]
40
-
41
- # Process the image and text
42
- inputs = processor(
43
- images=image,
44
- text=tokenizer.apply_chat_template(messages, add_generation_prompt=True),
45
- return_tensors="pt"
46
- )
47
-
48
- # Generate the response
49
- text_streamer = TextStreamer(tokenizer, skip_prompt=True)
50
-
51
- # Generate with lower resource settings
52
- with torch.no_grad():
53
- outputs = model.generate(
54
- **inputs,
55
- max_new_tokens=128,
56
- temperature=1.2,
57
- min_p=0.1,
58
- use_cache=True,
59
- streamer=text_streamer
60
  )
61
-
62
- # Decode the response
63
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
64
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  # Create the Gradio interface
67
  with gr.Blocks() as demo:
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, TextStreamer
3
  import torch
4
+ import gc
5
 
6
  # Configure torch to use CPU
7
  device = "cpu"
 
11
  def load_model():
12
  model_name = "forestav/unsloth_vision_radiography_finetune"
13
 
14
+ # Load with 8-bit quantization and CPU optimization settings
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_name,
17
  device_map="cpu",
18
+ load_in_8bit=True,
19
  torch_dtype=torch.float16,
20
  low_cpu_mem_usage=True
21
  )
 
25
 
26
  # Initialize model and tokenizer globally
27
  print("Loading model...")
28
+ try:
29
+ model, tokenizer, processor = load_model()
30
+ print("Model loaded successfully!")
31
+ except Exception as e:
32
+ print(f"Error loading model: {str(e)}")
33
+ raise
34
 
35
  def analyze_image(image, instruction):
36
+ try:
37
+ # Clear CUDA cache and garbage collect
38
+ if torch.cuda.is_available():
39
+ torch.cuda.empty_cache()
40
+ gc.collect()
41
+
42
+ if instruction.strip() == "":
43
+ instruction = "You are an expert radiographer. Describe accurately what you see in this image."
44
+
45
+ # Prepare the messages
46
+ messages = [
47
+ {"role": "user", "content": [
48
+ {"type": "image"},
49
+ {"type": "text", "text": instruction}
50
+ ]}
51
+ ]
52
+
53
+ # Process the image and text
54
+ inputs = processor(
55
+ images=image,
56
+ text=tokenizer.apply_chat_template(messages, add_generation_prompt=True),
57
+ return_tensors="pt"
 
 
 
 
 
 
 
 
58
  )
59
+
60
+ # Generate the response
61
+ text_streamer = TextStreamer(tokenizer, skip_prompt=True)
62
+
63
+ # Generate with lower resource settings
64
+ with torch.no_grad():
65
+ outputs = model.generate(
66
+ **inputs,
67
+ max_new_tokens=128,
68
+ temperature=1.2,
69
+ min_p=0.1,
70
+ use_cache=True,
71
+ streamer=text_streamer
72
+ )
73
+
74
+ # Decode the response
75
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
76
+
77
+ # Clear memory
78
+ del outputs
79
+ gc.collect()
80
+ if torch.cuda.is_available():
81
+ torch.cuda.empty_cache()
82
+
83
+ return response
84
+ except Exception as e:
85
+ return f"Error processing image: {str(e)}"
86
 
87
  # Create the Gradio interface
88
  with gr.Blocks() as demo:
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- transformers>=4.31.0
2
- torch>=2.0.0
3
- gradio>=3.34.0
4
- accelerate>=0.26.0
5
- pillow
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ bitsandbytes
5
+ accelerate