HaWoR / lib /pipeline /tools.py
ThunderVVV's picture
update
5f028d6
raw
history blame
4.1 kB
import cv2
from tqdm import tqdm
import numpy as np
import torch
from ultralytics import YOLO
if torch.cuda.is_available():
autocast = torch.cuda.amp.autocast
else:
class autocast:
def __init__(self, enabled=True):
pass
def __enter__(self):
pass
def __exit__(self, *args):
pass
def detect_track(imgfiles, thresh=0.5):
hand_det_model = YOLO('./weights/external/detector.pt')
# Run
boxes_ = []
tracks = {}
for t, imgpath in enumerate(tqdm(imgfiles)):
img_cv2 = cv2.imread(imgpath)
### --- Detection ---
with torch.no_grad():
with autocast():
results = hand_det_model.track(img_cv2, conf=thresh, persist=True, verbose=False)
boxes = results[0].boxes.xyxy.cpu().numpy()
confs = results[0].boxes.conf.cpu().numpy()
handedness = results[0].boxes.cls.cpu().numpy()
if not results[0].boxes.id is None:
track_id = results[0].boxes.id.cpu().numpy()
else:
track_id = [-1] * len(boxes)
boxes = np.hstack([boxes, confs[:, None]])
find_right = False
find_left = False
for idx, box in enumerate(boxes):
if track_id[idx] == -1:
if handedness[[idx]] > 0:
id = int(10000)
else:
id = int(5000)
else:
id = track_id[idx]
subj = dict()
subj['frame'] = t
subj['det'] = True
subj['det_box'] = boxes[[idx]]
subj['det_handedness'] = handedness[[idx]]
if (not find_right and handedness[[idx]] > 0) or (not find_left and handedness[[idx]]==0):
if id in tracks:
tracks[id].append(subj)
else:
tracks[id] = [subj]
if handedness[[idx]] > 0:
find_right = True
elif handedness[[idx]] == 0:
find_left = True
tracks = np.array(tracks, dtype=object)
boxes_ = np.array(boxes_, dtype=object)
return boxes_, tracks
def parse_chunks(frame, boxes, min_len=16):
""" If a track disappear in the middle,
we separate it to different segments to estimate the HPS independently.
If a segment is less than 16 frames, we get rid of it for now.
"""
frame_chunks = []
boxes_chunks = []
step = frame[1:] - frame[:-1]
step = np.concatenate([[0], step])
breaks = np.where(step != 1)[0]
start = 0
for bk in breaks:
f_chunk = frame[start:bk]
b_chunk = boxes[start:bk]
start = bk
if len(f_chunk)>=min_len:
frame_chunks.append(f_chunk)
boxes_chunks.append(b_chunk)
if bk==breaks[-1]: # last chunk
f_chunk = frame[bk:]
b_chunk = boxes[bk:]
if len(f_chunk)>=min_len:
frame_chunks.append(f_chunk)
boxes_chunks.append(b_chunk)
return frame_chunks, boxes_chunks
def parse_chunks_hand_frame(frame):
""" If a track disappear in the middle,
we separate it to different segments to estimate the HPS independently.
If a segment is less than 16 frames, we get rid of it for now.
"""
frame_chunks = []
step = frame[1:] - frame[:-1]
step = np.concatenate([[0], step])
breaks = np.where(step != 1)[0]
start = 0
for bk in breaks:
f_chunk = frame[start:bk]
start = bk
if len(f_chunk) > 0:
frame_chunks.append(f_chunk)
if bk==breaks[-1]: # last chunk
f_chunk = frame[bk:]
if len(f_chunk) > 0:
frame_chunks.append(f_chunk)
return frame_chunks