Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
import torch
|
|
|
4 |
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor, pipeline
|
5 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
6 |
import re
|
@@ -56,6 +57,10 @@ MAX_SEED = 2**32 - 1
|
|
56 |
|
57 |
# Florence caption function
|
58 |
def florence_caption(image):
|
|
|
|
|
|
|
|
|
59 |
inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
|
60 |
generated_ids = florence_model.generate(
|
61 |
input_ids=inputs["input_ids"],
|
@@ -137,10 +142,13 @@ def generate_image(prompt, negative_prompt, seed, randomize_seed, width, height,
|
|
137 |
|
138 |
return image, seed
|
139 |
|
140 |
-
# Gradio Interface
|
141 |
@spaces.GPU
|
142 |
def process_workflow(image, text_prompt, vlm_model_choice, use_enhancer, model_choice, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
|
143 |
if image is not None:
|
|
|
|
|
|
|
|
|
144 |
if vlm_model_choice == "Long Captioner":
|
145 |
prompt = create_captions_rich(image)
|
146 |
else: # Florence
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
+
from PIL import Image
|
5 |
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor, pipeline
|
6 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
7 |
import re
|
|
|
57 |
|
58 |
# Florence caption function
|
59 |
def florence_caption(image):
|
60 |
+
# Convert image to PIL if it's not already
|
61 |
+
if not isinstance(image, Image.Image):
|
62 |
+
image = Image.fromarray(image)
|
63 |
+
|
64 |
inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
|
65 |
generated_ids = florence_model.generate(
|
66 |
input_ids=inputs["input_ids"],
|
|
|
142 |
|
143 |
return image, seed
|
144 |
|
|
|
145 |
@spaces.GPU
|
146 |
def process_workflow(image, text_prompt, vlm_model_choice, use_enhancer, model_choice, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
|
147 |
if image is not None:
|
148 |
+
# Convert image to PIL if it's not already
|
149 |
+
if not isinstance(image, Image.Image):
|
150 |
+
image = Image.fromarray(image)
|
151 |
+
|
152 |
if vlm_model_choice == "Long Captioner":
|
153 |
prompt = create_captions_rich(image)
|
154 |
else: # Florence
|