File size: 6,609 Bytes
ee3b2de 490262d 775e2c4 ee3b2de 6371eda ee3b2de 6371eda 6fb313b 6371eda ee3b2de abf9f32 ee3b2de 7f90752 0b764ac ee3b2de 176f241 775e2c4 330dc1c 775e2c4 830976f 39daff5 330dc1c 26049de abf9f32 26049de 7f90752 26049de 330dc1c 39daff5 f06c5ef ee3b2de 26049de f5501f6 f06c5ef 39daff5 f06c5ef 830976f f06c5ef 830976f 39daff5 f06c5ef 830976f f06c5ef 39daff5 ee3b2de bcd0b3f 6ab5788 ee3b2de f5501f6 46c7aef 26049de 7f9dcb6 ee3b2de 0b764ac d850e97 19f5b6b f5501f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import PIL
import requests
import torch
import gradio as gr
import random
from PIL import Image
import os
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
#Loading from Diffusers Library
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16", safety_checker=None)
pipe.to("cuda")
pipe.enable_attention_slicing()
counter = 0
help_text = """ Note: I will try to add the functionality to revert your changes to previous/original image in future versions of space. For now only forward editing is available.
Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors
and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) -
If you're not getting what you want, there may be a few reasons:
1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should
be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image.
2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance
scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely
linked to the source image `image`, usually at the expense of lower image quality.
3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog").
4. Increasing the number of steps sometimes improves results.
5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try:
* Cropping the image so the face takes up a larger portion of the frame.
"""
def chat(image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name, counter_out, prompt, history, progress=gr.Progress(track_tqdm=True)):
progress(0, desc="Starting...")
#if message == "revert": --to add revert functionality later
print(f"counter:{counter_out}, prompt:{prompt}, img_name:{img_name}")
if counter_out > 0:
# Open the image
#image_in = Image.open(img_name) #("edited_image.png") #(img_nm)
edited_image = pipe(prompt, image=image_hid, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
if os.path.exists(img_name):
print("***Image exists and will be deleted***")
os.remove(img_name)
temp_img_name = img_name
print(f"temp_img_name is :{temp_img_name}")
# Create a file-like object
with open(temp_img_name, "wb") as fp:
# Save the image to the file-like object
edited_image.save(fp)
#edited_image.save(temp_img_name) #, overwrite=True)
counter_out += 1
else:
print("FIRST PASS")
seed = random.randint(0, 1000000)
img_name = f"./edited_image_{seed}.png"
edited_image = pipe(prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
if os.path.exists(img_name):
print("***First PASS:Image exists and will be deleted***")
os.remove(img_name)
with open(img_name, "wb") as fp:
# Save the image to the file-like object
edited_image.save(fp)
#edited_image.save(img_name) #, overwrite=True) #("/tmp/edited_image.png") #(img_nm)
#counter_out += 1
history = history or []
#Resizing (or not) the image for better display and adding supportive sample text
add_text_list = ["There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?"]
if counter_out > 0:
#response = edited_image
response = random.choice(add_text_list) + '<img src="/file=' + temp_img_name + '">'
history.append((prompt, response))
return history, history, edited_image, temp_img_name, counter_out
else:
#response = edited_image
response = random.choice(add_text_list) + '<img src="/file=' + img_name + '">'
history.append((prompt, response))
counter_out += 1
return history, history, edited_image, img_name, counter_out
with gr.Blocks() as demo:
gr.Markdown("""<h1><center> Chat Interface with InstructPix2Pix: Give Image Editing Instructions [Apologies for inconvenience, this Space is still very much a work in progress...] </h1></center>
<p>For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings.<br/>
<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true">
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
<p/>""")
with gr.Row():
with gr.Column():
image_in = gr.Image(type='pil', label="Original Image")
text_in = gr.Textbox()
state_in = gr.State()
b1 = gr.Button('Edit the image!')
with gr.Accordion("Advance settings for Training and Inference", open=False):
gr.Markdown("Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale.")
in_steps = gr.Number(label="Enter the number of Inference steps", value = 20)
in_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Guidance scale", value=7.5)
in_img_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Image Guidance scale", value=1.5)
image_hid = gr.Image(type='pil')
img_name_temp_out = gr.Textbox(visible=False)
counter_out = gr.Number(visible=False, value=0, precision=0)
chatbot = gr.Chatbot()
b1.click(chat,[image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, text_in, state_in], [chatbot, state_in, image_hid, img_name_temp_out, counter_out]) #, queue=True)
gr.Markdown(help_text)
demo.queue(concurrency_count=10)
demo.launch(debug=True, width="80%", height=2000) |