Spaces:
Running
Running
import os | |
import gradio as gr | |
from gradio_imageslider import ImageSlider | |
from loadimg import load_img | |
import spaces | |
from transformers import AutoModelForImageSegmentation | |
import torch | |
from torchvision import transforms | |
from PIL import Image, ImageChops | |
from moviepy.editor import VideoFileClip, ImageSequenceClip | |
import numpy as np | |
from tqdm import tqdm | |
from uuid import uuid1 | |
# Check CUDA availability | |
if torch.cuda.is_available(): | |
device = "cuda" | |
else: | |
device = "cpu" | |
torch.set_float32_matmul_precision(["high", "highest"][0]) | |
# Load the model | |
birefnet = AutoModelForImageSegmentation.from_pretrained( | |
"briaai/RMBG-2.0", trust_remote_code=True | |
) | |
birefnet.to(device) | |
transform_image = transforms.Compose( | |
[ | |
transforms.Resize((1024, 1024)), | |
transforms.ToTensor(), | |
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), | |
] | |
) | |
output_folder = 'output_images' | |
if not os.path.exists(output_folder): | |
os.makedirs(output_folder) | |
def fn(image): | |
im = load_img(image, output_type="pil") | |
im = im.convert("RGB") | |
origin = im.copy() | |
image = process(im) | |
image_path = os.path.join(output_folder, "no_bg_image.png") | |
image.save(image_path) | |
return (image, origin), image_path | |
def process(image): | |
image_size = image.size | |
input_images = transform_image(image).unsqueeze(0).to(device) | |
# Prediction | |
with torch.no_grad(): | |
preds = birefnet(input_images)[-1].sigmoid().cpu() | |
pred = preds[0].squeeze() | |
pred_pil = transforms.ToPILImage()(pred) | |
mask = pred_pil.resize(image_size) | |
image.putalpha(mask) | |
return image | |
def process_file(f): | |
name_path = f.rsplit(".",1)[0]+".png" | |
im = load_img(f, output_type="pil") | |
im = im.convert("RGB") | |
transparent = process(im) | |
transparent.save(name_path) | |
return name_path | |
def remove_background(image): | |
"""Remove background from a single image.""" | |
input_images = transform_image(image).unsqueeze(0).to(device) | |
# Prediction | |
with torch.no_grad(): | |
preds = birefnet(input_images)[-1].sigmoid().cpu() | |
pred = preds[0].squeeze() | |
# Convert the prediction to a mask | |
mask = (pred * 255).byte() # Convert to 0-255 range | |
mask_pil = transforms.ToPILImage()(mask).convert("L") | |
mask_resized = mask_pil.resize(image.size, Image.LANCZOS) | |
# Apply the mask to the image | |
image.putalpha(mask_resized) | |
return image, mask_resized | |
def process_video(input_video_path): | |
"""Process a video to remove the background from each frame.""" | |
# Load the video | |
video_clip = VideoFileClip(input_video_path) | |
# Process each frame | |
frames = [] | |
for frame in tqdm(video_clip.iter_frames()): | |
frame_pil = Image.fromarray(frame) | |
frame_no_bg, mask_resized = remove_background(frame_pil) | |
path = "{}.png".format(uuid1()) | |
frame_no_bg.save(path) | |
frame_no_bg = Image.open(path).convert("RGBA") | |
os.remove(path) | |
# Convert mask_resized to RGBA mode | |
mask_resized_rgba = mask_resized.convert("RGBA") | |
# Apply the mask using ImageChops.multiply | |
output = ImageChops.multiply(frame_no_bg, mask_resized_rgba) | |
output_np = np.array(output) | |
frames.append(output_np) | |
# Save the processed frames as a new video | |
output_video_path = os.path.join(output_folder, "no_bg_video.mp4") | |
processed_clip = ImageSequenceClip(frames, fps=video_clip.fps) | |
processed_clip.write_videofile(output_video_path, codec='libx264', ffmpeg_params=['-pix_fmt', 'yuva420p']) | |
return output_video_path | |
# Gradio components | |
slider1 = ImageSlider(label="RMBG-2.0", type="pil") | |
slider2 = ImageSlider(label="RMBG-2.0", type="pil") | |
image = gr.Image(label="Upload an image") | |
image2 = gr.Image(label="Upload an image", type="filepath") | |
text = gr.Textbox(label="Paste an image URL") | |
png_file = gr.File(label="output png file") | |
video_input = gr.Video(label="Upload a video") | |
video_output = gr.Video(label="Processed video") | |
# Example videos | |
example_videos = [ | |
"pexels-cottonbro-5319934.mp4", | |
"300_A_car_is_running_on_the_road.mp4", | |
"A_Terracotta_Warrior_is_skateboarding_9033688.mp4" | |
] | |
# Gradio interfaces | |
tab1 = gr.Interface( | |
fn, inputs=image, outputs=[slider1, gr.File(label="output png file")], examples=[load_img("giraffe.jpg", output_type="pil")], api_name="image" | |
) | |
tab2 = gr.Interface(fn, inputs=text, outputs=[slider2, gr.File(label="output png file")], examples=["http://farm9.staticflickr.com/8488/8228323072_76eeddfea3_z.jpg"], api_name="text") | |
#tab3 = gr.Interface(process_file, inputs=image2, outputs=png_file, examples=["giraffe.jpg"], api_name="png") | |
tab4 = gr.Interface(process_video, inputs=video_input, outputs=video_output, examples=example_videos, api_name="video", cache_examples = False) | |
# Gradio tabbed interface | |
demo = gr.TabbedInterface( | |
[tab4, tab1, tab2], ["input video", "input image", "input url"], title="RMBG-2.0 for background removal" | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True, show_error=True) | |