import gradio as gr
import torch
from transformers import BitsAndBytesConfig, pipeline

# Load model directly
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

model_id = "llava-hf/llava-1.5-7b-hf"
pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})

def generate_text(image):
    max_new_tokens = 200
    prompt = "USER: <image>\nWhat are the things I should be cautious about when I visit this place?\nASSISTANT:"
    outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
    return outputs[0]["generated_text"]

iface = gr.Interface(fn=generate_text, inputs=gr.inputs.Image(), outputs="text")
iface.launch()