Spaces:

ysharma
/

InstructPix2Pix_Chatbot

Paused

File size: 11,037 Bytes

ee3b2de
 
 
 
 
490262d
775e2c4
6752228
ee3b2de
 
6371eda
ee3b2de
0efff39
ee3b2de
76f0426
 
 
6371eda
 
fe15754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6371eda
 
 
 
 
 
 
 
 
 
 
 
 
 
ee3b2de
fe15754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fa258b
 
 
6e0790c
 
 
 
abf9f32
6e0790c
3fa258b
 
 
 
 
 
6e0790c
df8cf30
 
6e0790c
 
 
 
 
df8cf30
 
 
 
6e0790c
 
 
176f241
775e2c4
 
fe15754
39daff5
 
 
 
99642bc
6e0790c
39daff5
6e0790c
 
 
330dc1c
6e0790c
 
26049de
7f90752
6e0790c
 
 
 
4e6741b
 
 
330dc1c
 
39daff5
6e0790c
 
 
 
 
 
 
 
 
 
 
 
 
 
fe15754
6e0790c
39daff5
 
99642bc
6e0790c
 
 
 
 
 
3fa258b
ee3b2de
6e0790c
fe15754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f630054
9276e05
dfea60b
fe15754
 
 
6e0790c
ee3b2de
6e0790c
f630054
6e0790c
f5501f6
6e0790c
f5501f6
 
 
 
65a446a
 
26049de
7f9dcb6
6e0790c
 
 
 
 
 
d850e97
 
1ac78b1
f5501f6

import PIL
import requests
import torch
import gradio as gr
import random
from PIL import Image
import os
import time
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler

#Loading from Diffusers Library
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16") #, safety_checker=None)
pipe.to("cuda")
#pipe.enable_attention_slicing()
pipe.enable_xformers_memory_efficient_attention()
pipe.unet.to(memory_format=torch.channels_last)



help_text = """ 
**Note: Please be advised that a safety checker has been implemented in this public space. 
    Any attempts to generate inappropriate or NSFW images will result in the display of a black screen 
    as a precautionary measure to protect all users. We appreciate your cooperation in 
    maintaining a safe and appropriate environment for all members of our community.**
    
    New features and bug-fixes: 
    
    1. Chat style interface
    2. Now use **'reverse'** as prompt to get back the previous image after an unwanted edit
    3. Use **'restart'** as prompt to get back to original image and start over!
    4. Now you can load larger image files (~5 mb) as well

Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) -

If you're not getting what you want, there may be a few reasons:
1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images 
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should 
be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image. 
                
2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance 
                scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely 
                linked to the source image `image`, usually at the expense of lower image quality.  
3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog").
4. Increasing the number of steps sometimes improves results.
5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try:
    * Cropping the image so the face takes up a larger portion of the frame.
"""

css = """
#col-container {max-width: 580px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
.footer {
        margin-bottom: 45px;
        margin-top: 10px;
        text-align: center;
        border-bottom: 1px solid #e5e5e5;
    }
    .footer>p {
        font-size: .8rem;
        display: inline-block;
        padding: 0 10px;
        transform: translateY(10px);
        background: white;
    }
    .dark .footer {
        border-color: #303030;
    }
    .dark .footer>p {
        background: #0b0f19;
    }
.animate-spin {
    animation: spin 1s linear infinite;
}
@keyframes spin {
    from {
        transform: rotate(0deg);
    }
    to {
        transform: rotate(360deg);
    }
}
"""


def previous(image):
    return image 

def upload_image(file):
    return Image.open(file)
    
def chat(btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name, counter_out, image_oneup, prompt, history, progress=gr.Progress(track_tqdm=True)):
    progress(0, desc="Starting...")
    if prompt != '' and prompt.lower() == 'reverse' : #--to add revert functionality later
        history = history or []
        temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' 
        image_oneup.save(temp_img_name)
        response = 'Reverted to the last image ' + '<img src="/file=' + temp_img_name + '">'  
        history.append((prompt, response))
        return history, history, image_oneup, temp_img_name, counter_out
    if prompt != '' and prompt.lower() == 'restart' : #--to add revert functionality later
        history = history or []
        temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' 
        #Resizing the image
        basewidth = 512
        wpercent = (basewidth/float(image_in.size[0]))
        hsize = int((float(image_in.size[1])*float(wpercent)))
        image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS)
        image_in.save(temp_img_name)
        response = 'Reverted to the last image ' + '<img src="/file=' + temp_img_name + '">'  
        history.append((prompt, response))
        return history, history, image_in, temp_img_name, counter_out
    #adding supportive sample text
    add_text_list = ["There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?"]        
    if counter_out > 1:
      edited_image = pipe(prompt, image=image_hid, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
      if os.path.exists(img_name):
        os.remove(img_name)
      temp_img_name = img_name[:-4]+str(int(time.time()))[-4:]+'.png' 
      # Create a file-like object
      with open(temp_img_name, "wb") as fp:
        # Save the image to the file-like object
        edited_image.save(fp)
      #Get the name of the saved image
      saved_image_name2 = fp.name
      #edited_image.save(temp_img_name) #, overwrite=True)
      history = history or []
      response = random.choice(add_text_list) + '<img src="/file=' + saved_image_name2 + '">'  
      history.append((prompt, response))
      counter_out += 1
      return history, history, edited_image, temp_img_name, counter_out
    elif counter_out == 0:
      seed = random.randint(0, 1000000)
      img_name = f"./edited_image_{seed}.png"
      #convert file object to image
      image_in = Image.open(btn_upload)
      #Resizing the image
      basewidth = 512
      wpercent = (basewidth/float(image_in.size[0]))
      hsize = int((float(image_in.size[1])*float(wpercent)))
      image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS)
      if os.path.exists(img_name):
        os.remove(img_name)
      with open(img_name, "wb") as fp:
        # Save the image to the file-like object
        image_in.save(fp)
      #Get the name of the saved image
      saved_image_name0 = fp.name
      history = history or []
      response = '<img src="/file=' + img_name + '">'   #IMG_NAME
      history.append((prompt, response))
      counter_out += 1
      return history, history, image_in, img_name, counter_out
    elif counter_out == 1:        
      #instruct-pix2pix inference
      edited_image = pipe(prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
      if os.path.exists(img_name):
        os.remove(img_name)
      temp_img_name = img_name[:-4]+str(int(time.time()))[-4:]+'.png' 
      with open(temp_img_name, "wb") as fp:
        # Save the image to the file-like object
        edited_image.save(fp)
      #Get the name of the saved image
      saved_image_name1 = fp.name
      history = history or []
      response = random.choice(add_text_list) + '<img src="/file=' + saved_image_name1 + '">'   #IMG_NAME
      history.append((prompt, response))
      counter_out += 1
      return history, history, edited_image, temp_img_name, counter_out
        

#Blocks layout
with gr.Blocks(css="style.css") as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
                <div
                style="
                    display: inline-flex;
                    align-items: center;
                    gap: 0.8rem;
                    font-size: 1.75rem;
                "
                >
                <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
                    ChatPix2Pix: Image Editing by Instructions
                </h1>
                </div>
                <p style="margin-bottom: 10px; font-size: 94%">
                Hi, I'm a photoshop expert bot, start by uploading your image using the upload button, and then tell me what changes you want to make to it.<br>
                <a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br> 
                Based on <a href="https://huggingface.co/timbrooks/instruct-pix2pix" target="_blank">Diffusers implementation</a> of InstructPix2Pix.
                </p>
            </div>""")
        #gr.Markdown("""<h1><center>dummy</h1></center> """)
        chatbot = gr.Chatbot()
        state_in = gr.State()
        with gr.Row():
            text_in = gr.Textbox(value='', placeholder="Type your instructions here and press Enter")
            btn_upload = gr.UploadButton("Upload image", file_types=["image"], file_count="single")
        with gr.Accordion("Advance settings for Training and Inference", open=False):
          image_in = gr.Image(visible=False,type='pil', label="Original Image")
          gr.Markdown("Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale.")
          in_steps = gr.Number(label="Enter the number of Inference steps", value = 20)
          in_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Guidance scale", value=7.5)
          in_img_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Image Guidance scale", value=1.5)
          image_hid = gr.Image(type='pil', visible=False)
          image_oneup = gr.Image(type='pil', visible=False)
          img_name_temp_out = gr.Textbox(visible=False)
          counter_out = gr.Number(visible=False, value=0, precision=0)

    btn_upload.upload(chat,
                      [btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup,  text_in, state_in], 
                      [chatbot, state_in, image_in, img_name_temp_out, counter_out])
    text_in.submit(chat,[btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup,  text_in, state_in], [chatbot, state_in, image_hid, img_name_temp_out, counter_out])
    text_in.submit(previous, [image_hid], [image_oneup])
    gr.Markdown(help_text)
    
demo.queue(concurrency_count=10)
demo.launch(debug=True, width="80%", height=2000)