gokaygokay commited on
Commit
2bf9b81
·
verified ·
1 Parent(s): 90a65c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import spaces
2
  import gradio as gr
3
  import torch
 
4
  from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor, pipeline
5
  from transformers import AutoProcessor, AutoModelForCausalLM
6
  import re
@@ -56,6 +57,10 @@ MAX_SEED = 2**32 - 1
56
 
57
  # Florence caption function
58
  def florence_caption(image):
 
 
 
 
59
  inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
60
  generated_ids = florence_model.generate(
61
  input_ids=inputs["input_ids"],
@@ -137,10 +142,13 @@ def generate_image(prompt, negative_prompt, seed, randomize_seed, width, height,
137
 
138
  return image, seed
139
 
140
- # Gradio Interface
141
  @spaces.GPU
142
  def process_workflow(image, text_prompt, vlm_model_choice, use_enhancer, model_choice, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
143
  if image is not None:
 
 
 
 
144
  if vlm_model_choice == "Long Captioner":
145
  prompt = create_captions_rich(image)
146
  else: # Florence
 
1
  import spaces
2
  import gradio as gr
3
  import torch
4
+ from PIL import Image
5
  from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor, pipeline
6
  from transformers import AutoProcessor, AutoModelForCausalLM
7
  import re
 
57
 
58
  # Florence caption function
59
  def florence_caption(image):
60
+ # Convert image to PIL if it's not already
61
+ if not isinstance(image, Image.Image):
62
+ image = Image.fromarray(image)
63
+
64
  inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
65
  generated_ids = florence_model.generate(
66
  input_ids=inputs["input_ids"],
 
142
 
143
  return image, seed
144
 
 
145
  @spaces.GPU
146
  def process_workflow(image, text_prompt, vlm_model_choice, use_enhancer, model_choice, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
147
  if image is not None:
148
+ # Convert image to PIL if it's not already
149
+ if not isinstance(image, Image.Image):
150
+ image = Image.fromarray(image)
151
+
152
  if vlm_model_choice == "Long Captioner":
153
  prompt = create_captions_rich(image)
154
  else: # Florence