Spaces:

ysharma
/

InstructPix2Pix_Chatbot

Paused

App Files Files Community

InstructPix2Pix_Chatbot / app.py

ysharma HF staff

update desc, layout

f5501f6 about 2 years ago

raw

history blame

5.26 kB

	import PIL
	import requests
	import torch
	import gradio as gr
	import random
	from PIL import Image
	from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler

	#Loading from Diffusers Library
	model_id = "timbrooks/instruct-pix2pix"
	pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16", safety_checker=None)
	pipe.to("cuda")
	pipe.enable_attention_slicing()

	counter = 0


	help_text = """ Note: I will try to add the functionality to revert your changes to previous/original image in future versions of space. For now only forward editing is available.

	From the official Space by the authors [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix)
	and from official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) -

	If you're not getting what you want, there may be a few reasons:
	1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images
	that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should
	be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image.

	2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance
	scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely
	linked to the source image `image`, usually at the expense of lower image quality.

	3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog").

	4. Increasing the number of steps sometimes improves results.

	5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try:
	* Cropping the image so the face takes up a larger portion of the frame.
	"""

	def chat(image_in, in_steps, in_guidance_scale, in_img_guidance_scale, prompt, history, progress=gr.Progress(track_tqdm=True)):
	progress(0, desc="Starting...")
	global counter
	#global seed
	#img_nm = f"./edited_image_{seed}.png"
	#print(f"seed is:{seed}")
	#print(f"image name is:{img_nm}")

	counter += 1
	#if message == "revert": --to add revert functionality later
	if counter > 1:
	# Open the image
	image_in = Image.open("edited_image.png") #(img_nm)
	#prompt = message #eg - "turn him into cyborg"
	#edited_image = pipe(prompt, image=image_in, num_inference_steps=20, image_guidance_scale=1).images[0]
	edited_image = pipe(prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
	edited_image.save("edited_image.png") #("/tmp/edited_image.png") #(img_nm)
	history = history or []
	#Resizing the image for better display
	add_text_list = ["There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?"]
	#response = random.choice(add_text_list) + '<img src="/file=' + img_nm[2:] + '" style="width: 200px; height: 200px;">'
	#response = random.choice(add_text_list) + '<img src="/file=edited_image.png" style="width: 350px; height: 350px;">'
	response = random.choice(add_text_list) + '<img src="/file=edited_image.png">' # style="width: 350px; height: 350px;">'
	history.append((prompt, response))
	return history, history

	with gr.Blocks() as demo:
	gr.Markdown("""<h1><center> Chat Interface with InstructPix2Pix: Give Image Editing Instructions </h1></center>
	<p>For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings.<br/>
	<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true">
	<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
	<p/>""")
	with gr.Row():
	with gr.Column():
	image_in = gr.Image(type='pil', label="Original Image")
	text_in = gr.Textbox()
	state_in = gr.State()
	b1 = gr.Button('Edit the image!')
	with gr.Accordion("Advance settings for Training and Inference", open=False):
	gr.Markdown("Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale.")
	in_steps = gr.Number(label="Enter the number of Inference steps", value = 20)
	in_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Guidance scale", value=7.5)
	in_img_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Image Guidance scale", value=1.5)
	chatbot = gr.Chatbot()
	b1.click(chat,[image_in, in_steps, in_guidance_scale, in_img_guidance_scale, text_in, state_in], [chatbot, state_in]) #, queue=True)
	gr.Markdown(help_text)

	demo.queue(concurrency_count=10)
	demo.launch(debug=True, width="80%", height=2000)