from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler, AutoencoderKL from transformers import CLIPTextModel, CLIPTokenizer import torch import gradio as gr import spaces lora_path = "OedoSoldier/detail-tweaker-lora" vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse").to("cuda") @spaces.GPU def generate_image(prompt, negative_prompt, num_inference_steps=30, guidance_scale=7.0,model="Real6.0",num_images=1, width=512, height=512): if model == "Real5.0": model_id = "SG161222/Realistic_Vision_V5.0_noVAE" elif model == "Real5.1": model_id = "SG161222/Realistic_Vision_V5.1_noVAE" else: model_id = "SG161222/Realistic_Vision_V6.0_B1_noVAE" text_encoder = CLIPTextModel.from_pretrained( model_id, subfolder="text_encoder" ).to("cuda") tokenizer = CLIPTokenizer.from_pretrained( model_id, subfolder="tokenizer" ) pipe = DiffusionPipeline.from_pretrained( model_id, text_encoder=text_encoder, tokenizer=tokenizer, vae=vae ).to("cuda") if model == "Real6.0": pipe.safety_checker = lambda images, **kwargs: (images, [False] * len(images)) pipe.load_lora_weights(lora_path) pipe.scheduler = DPMSolverMultistepScheduler.from_config( pipe.scheduler.config, algorithm_type="dpmsolver++", use_karras_sigmas=True ) text_inputs = tokenizer( prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt" ).to("cuda") negative_text_inputs = tokenizer( negative_prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt" ).to("cuda") prompt_embeds = text_encoder(text_inputs.input_ids)[0] negative_prompt_embeds = text_encoder(negative_text_inputs.input_ids)[0] # Generate the image result = pipe( prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, cross_attention_kwargs={"scale": 1}, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, width=width, height=height, num_images_per_prompt=num_images ) return result.images title = """