Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,588 Bytes
4ae7d54 bc6d5e6 4ae7d54 bc6d5e6 4ae7d54 4e14a56 4ae7d54 4e14a56 4ae7d54 b03c819 4e14a56 4ae7d54 1f28a9c 4ae7d54 1f28a9c 4ae7d54 4e14a56 4ae7d54 1f28a9c 4ae7d54 968aada 155bccb a0550cb 968aada 4ae7d54 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import os
import spaces
from unittest.mock import patch
import gradio as gr
import numpy as np
import supervision as sv
import torch
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoProcessor
from utils.imports import fixed_get_imports
from utils.models import (
run_captioning,
CAPTIONING_TASK,
run_caption_to_phrase_grounding
)
from utils.video import (
create_directory,
remove_files_older_than,
generate_file_name,
calculate_end_frame_index
)
MARKDOWN = """
# Florence-2 for Videos 🎬
<div>
<a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-florence-2-on-detection-dataset.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab" style="display:inline-block;">
</a>
<a href="https://blog.roboflow.com/florence-2/">
<img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="Roboflow" style="display:inline-block;">
</a>
<a href="https://arxiv.org/abs/2311.06242">
<img src="https://img.shields.io/badge/arXiv-2311.06242-b31b1b.svg" alt="arXiv" style="display:inline-block;">
</a>
</div>
"""
RESULTS = "results"
CHECKPOINT = "microsoft/Florence-2-base-ft"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
MODEL = AutoModelForCausalLM.from_pretrained(
CHECKPOINT, trust_remote_code=True).to(DEVICE)
PROCESSOR = AutoProcessor.from_pretrained(
CHECKPOINT, trust_remote_code=True)
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(color_lookup=sv.ColorLookup.TRACK)
LABEL_ANNOTATOR = sv.LabelAnnotator(color_lookup=sv.ColorLookup.TRACK)
TRACKER = sv.ByteTrack()
# creating video results directory
create_directory(directory_path=RESULTS)
def annotate_image(
input_image: np.ndarray,
detections: sv.Detections
) -> np.ndarray:
output_image = input_image.copy()
output_image = BOUNDING_BOX_ANNOTATOR.annotate(output_image, detections)
output_image = LABEL_ANNOTATOR.annotate(output_image, detections)
return output_image
@spaces.GPU
def process_video(
input_video: str,
progress=gr.Progress(track_tqdm=True)
) -> str:
# cleanup of old video files
remove_files_older_than(RESULTS, 30)
OUTPUT_LENGTH = 4
video_info = sv.VideoInfo.from_video_path(input_video)
video_info.fps = video_info.fps // OUTPUT_LENGTH
total = calculate_end_frame_index(input_video, OUTPUT_LENGTH)
frame_generator = sv.get_video_frames_generator(
source_path=input_video,
end=total,
stride=OUTPUT_LENGTH
)
result_file_name = generate_file_name(extension="mp4")
result_file_path = os.path.join(RESULTS, result_file_name)
TRACKER.reset()
caption = None
with sv.VideoSink(result_file_path, video_info=video_info) as sink:
for _ in tqdm(range(total // OUTPUT_LENGTH), desc="Processing video..."):
frame = next(frame_generator)
if caption is None:
caption = run_captioning(
model=MODEL,
processor=PROCESSOR,
image=frame,
device=DEVICE
)[CAPTIONING_TASK]
detections = run_caption_to_phrase_grounding(
model=MODEL,
processor=PROCESSOR,
caption=caption,
image=frame,
device=DEVICE
)
detections.confidence = np.ones(len(detections))
detections.class_id = np.zeros(len(detections))
detections = TRACKER.update_with_detections(detections)
frame = annotate_image(
input_image=frame,
detections=detections
)
sink.write_frame(frame)
return result_file_path
with gr.Blocks() as demo:
gr.Markdown(MARKDOWN)
with gr.Row():
input_video_component = gr.Video(
label='Input Video'
)
output_video_component = gr.Video(
label='Output Video'
)
with gr.Row():
submit_button_component = gr.Button(
value='Submit',
scale=1,
variant='primary'
)
submit_button_component.click(
fn=process_video,
inputs=[
input_video_component,
],
outputs=output_video_component
)
demo.launch(debug=False, show_error=True, max_threads=1)
|