import gradio as gr import torch from transformers import BitsAndBytesConfig, pipeline # Load model directly quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16 ) model_id = "llava-hf/llava-1.5-7b-hf" pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config}) def generate_text(image): max_new_tokens = 200 prompt = "USER: \nWhat are the things I should be cautious about when I visit this place?\nASSISTANT:" outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200}) return outputs[0]["generated_text"] iface = gr.Interface(fn=generate_text, inputs=gr.inputs.Image(), outputs="text") iface.launch()