Spaces:

svjack
/

BRIA-RMBG-2.0-Video

Running

App Files Files Community

BRIA-RMBG-2.0-Video / app.py

svjack

Update app.py

168444f verified 3 months ago

raw

history blame

5.05 kB

	import os
	import gradio as gr
	from gradio_imageslider import ImageSlider
	from loadimg import load_img
	import spaces
	from transformers import AutoModelForImageSegmentation
	import torch
	from torchvision import transforms
	from PIL import Image, ImageChops
	from moviepy.editor import VideoFileClip, ImageSequenceClip
	import numpy as np
	from tqdm import tqdm
	from uuid import uuid1

	# Check CUDA availability
	if torch.cuda.is_available():
	device = "cuda"
	else:
	device = "cpu"

	torch.set_float32_matmul_precision(["high", "highest"][0])

	# Load the model
	birefnet = AutoModelForImageSegmentation.from_pretrained(
	"briaai/RMBG-2.0", trust_remote_code=True
	)
	birefnet.to(device)
	transform_image = transforms.Compose(
	[
	transforms.Resize((1024, 1024)),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
	]
	)

	output_folder = 'output_images'
	if not os.path.exists(output_folder):
	os.makedirs(output_folder)

	def fn(image):
	im = load_img(image, output_type="pil")
	im = im.convert("RGB")
	origin = im.copy()
	image = process(im)
	image_path = os.path.join(output_folder, "no_bg_image.png")
	image.save(image_path)
	return (image, origin), image_path

	@spaces.GPU
	def process(image):
	image_size = image.size
	input_images = transform_image(image).unsqueeze(0).to(device)
	# Prediction
	with torch.no_grad():
	preds = birefnet(input_images)[-1].sigmoid().cpu()
	pred = preds[0].squeeze()
	pred_pil = transforms.ToPILImage()(pred)
	mask = pred_pil.resize(image_size)
	image.putalpha(mask)
	return image

	def process_file(f):
	name_path = f.rsplit(".",1)[0]+".png"
	im = load_img(f, output_type="pil")
	im = im.convert("RGB")
	transparent = process(im)
	transparent.save(name_path)
	return name_path

	def remove_background(image):
	"""Remove background from a single image."""
	input_images = transform_image(image).unsqueeze(0).to(device)

	# Prediction
	with torch.no_grad():
	preds = birefnet(input_images)[-1].sigmoid().cpu()
	pred = preds[0].squeeze()

	# Convert the prediction to a mask
	mask = (pred * 255).byte() # Convert to 0-255 range
	mask_pil = transforms.ToPILImage()(mask).convert("L")
	mask_resized = mask_pil.resize(image.size, Image.LANCZOS)

	# Apply the mask to the image
	image.putalpha(mask_resized)

	return image, mask_resized

	def process_video(input_video_path):
	"""Process a video to remove the background from each frame."""
	# Load the video
	video_clip = VideoFileClip(input_video_path)

	# Process each frame
	frames = []
	for frame in tqdm(video_clip.iter_frames()):
	frame_pil = Image.fromarray(frame)
	frame_no_bg, mask_resized = remove_background(frame_pil)
	path = "{}.png".format(uuid1())
	frame_no_bg.save(path)
	frame_no_bg = Image.open(path).convert("RGBA")
	os.remove(path)

	# Convert mask_resized to RGBA mode
	mask_resized_rgba = mask_resized.convert("RGBA")

	# Apply the mask using ImageChops.multiply
	output = ImageChops.multiply(frame_no_bg, mask_resized_rgba)
	output_np = np.array(output)
	frames.append(output_np)

	# Save the processed frames as a new video
	output_video_path = os.path.join(output_folder, "no_bg_video.mp4")
	processed_clip = ImageSequenceClip(frames, fps=video_clip.fps)
	processed_clip.write_videofile(output_video_path, codec='libx264', ffmpeg_params=['-pix_fmt', 'yuva420p'])

	return output_video_path

	# Gradio components
	slider1 = ImageSlider(label="RMBG-2.0", type="pil")
	slider2 = ImageSlider(label="RMBG-2.0", type="pil")
	image = gr.Image(label="Upload an image")
	image2 = gr.Image(label="Upload an image", type="filepath")
	text = gr.Textbox(label="Paste an image URL")
	png_file = gr.File(label="output png file")
	video_input = gr.Video(label="Upload a video")
	video_output = gr.Video(label="Processed video")

	# Example videos
	example_videos = [
	"pexels-cottonbro-5319934.mp4",
	"300_A_car_is_running_on_the_road.mp4",
	"A_Terracotta_Warrior_is_skateboarding_9033688.mp4"
	]

	# Gradio interfaces
	tab1 = gr.Interface(
	fn, inputs=image, outputs=[slider1, gr.File(label="output png file")], examples=[load_img("giraffe.jpg", output_type="pil")], api_name="image"
	)

	tab2 = gr.Interface(fn, inputs=text, outputs=[slider2, gr.File(label="output png file")], examples=["http://farm9.staticflickr.com/8488/8228323072_76eeddfea3_z.jpg"], api_name="text")
	#tab3 = gr.Interface(process_file, inputs=image2, outputs=png_file, examples=["giraffe.jpg"], api_name="png")
	tab4 = gr.Interface(process_video, inputs=video_input, outputs=video_output, examples=example_videos, api_name="video", cache_examples = False)

	# Gradio tabbed interface
	demo = gr.TabbedInterface(
	[tab4, tab1, tab2], ["input video", "input image", "input url"], title="RMBG-2.0 for background removal"
	)

	if __name__ == "__main__":
	demo.launch(share=True, show_error=True)