--- license: apache-2.0 pipeline_tag: text-to-image --- # ***ControlNet Depth SDXL, support zoe, midias*** ![images](./masonry.webp) # Example ![images_0)](./000000_depth_concat.webp) ![images_1)](./000003_depth_concat.webp) ![images_2)](./000005_depth_concat.webp) ![images_3)](./000006_depth_concat.webp) ![images_4)](./000014_depth_concat.webp) ![images_5)](./000015_depth_concat.webp) ![images_6)](./000022_depth_concat.webp) ![images_7)](./000023_depth_concat.webp) ![images_8)](./000026_depth_concat.webp) ![images_9)](./000031_depth_concat.webp) # How to use it ```python from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler from PIL import Image import torch import random import numpy as np import cv2 from controlnet_aux import MidasDetector, ZoeDetector processor_zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators") processor_midas = MidasDetector.from_pretrained("lllyasviel/Annotators") controlnet_conditioning_scale = 1.0 prompt = "your prompt, the longer the better, you can describe it as detail as possible" negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality' eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler") controlnet = ControlNetModel.from_pretrained( "xinsir/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16 ) # when test with other base model, you need to change the vae also. vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) pipe = StableDiffusionXLControlNetPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, safety_checker=None, torch_dtype=torch.float16, scheduler=eulera_scheduler, ) # need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance img = cv2.imread("your original image path") if random.random() > 0.5: controlnet_img = processor_zoe(img, output_type='cv2') else: controlnet_img = processor_midas(img, output_type='cv2') height, width, _ = controlnet_img.shape ratio = np.sqrt(1024. * 1024. / (width * height)) new_width, new_height = int(width * ratio), int(height * ratio) controlnet_img = cv2.resize(controlnet_img, (new_width, new_height)) controlnet_img = Image.fromarray(controlnet_img) images = pipe( prompt, negative_prompt=negative_prompt, image=controlnet_img, controlnet_conditioning_scale=controlnet_conditioning_scale, width=new_width, height=new_height, num_inference_steps=30, ).images images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger") ```