Spaces:
Sleeping
Sleeping
File size: 4,104 Bytes
5f028d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import cv2
from tqdm import tqdm
import numpy as np
import torch
from ultralytics import YOLO
if torch.cuda.is_available():
autocast = torch.cuda.amp.autocast
else:
class autocast:
def __init__(self, enabled=True):
pass
def __enter__(self):
pass
def __exit__(self, *args):
pass
def detect_track(imgfiles, thresh=0.5):
hand_det_model = YOLO('./weights/external/detector.pt')
# Run
boxes_ = []
tracks = {}
for t, imgpath in enumerate(tqdm(imgfiles)):
img_cv2 = cv2.imread(imgpath)
### --- Detection ---
with torch.no_grad():
with autocast():
results = hand_det_model.track(img_cv2, conf=thresh, persist=True, verbose=False)
boxes = results[0].boxes.xyxy.cpu().numpy()
confs = results[0].boxes.conf.cpu().numpy()
handedness = results[0].boxes.cls.cpu().numpy()
if not results[0].boxes.id is None:
track_id = results[0].boxes.id.cpu().numpy()
else:
track_id = [-1] * len(boxes)
boxes = np.hstack([boxes, confs[:, None]])
find_right = False
find_left = False
for idx, box in enumerate(boxes):
if track_id[idx] == -1:
if handedness[[idx]] > 0:
id = int(10000)
else:
id = int(5000)
else:
id = track_id[idx]
subj = dict()
subj['frame'] = t
subj['det'] = True
subj['det_box'] = boxes[[idx]]
subj['det_handedness'] = handedness[[idx]]
if (not find_right and handedness[[idx]] > 0) or (not find_left and handedness[[idx]]==0):
if id in tracks:
tracks[id].append(subj)
else:
tracks[id] = [subj]
if handedness[[idx]] > 0:
find_right = True
elif handedness[[idx]] == 0:
find_left = True
tracks = np.array(tracks, dtype=object)
boxes_ = np.array(boxes_, dtype=object)
return boxes_, tracks
def parse_chunks(frame, boxes, min_len=16):
""" If a track disappear in the middle,
we separate it to different segments to estimate the HPS independently.
If a segment is less than 16 frames, we get rid of it for now.
"""
frame_chunks = []
boxes_chunks = []
step = frame[1:] - frame[:-1]
step = np.concatenate([[0], step])
breaks = np.where(step != 1)[0]
start = 0
for bk in breaks:
f_chunk = frame[start:bk]
b_chunk = boxes[start:bk]
start = bk
if len(f_chunk)>=min_len:
frame_chunks.append(f_chunk)
boxes_chunks.append(b_chunk)
if bk==breaks[-1]: # last chunk
f_chunk = frame[bk:]
b_chunk = boxes[bk:]
if len(f_chunk)>=min_len:
frame_chunks.append(f_chunk)
boxes_chunks.append(b_chunk)
return frame_chunks, boxes_chunks
def parse_chunks_hand_frame(frame):
""" If a track disappear in the middle,
we separate it to different segments to estimate the HPS independently.
If a segment is less than 16 frames, we get rid of it for now.
"""
frame_chunks = []
step = frame[1:] - frame[:-1]
step = np.concatenate([[0], step])
breaks = np.where(step != 1)[0]
start = 0
for bk in breaks:
f_chunk = frame[start:bk]
start = bk
if len(f_chunk) > 0:
frame_chunks.append(f_chunk)
if bk==breaks[-1]: # last chunk
f_chunk = frame[bk:]
if len(f_chunk) > 0:
frame_chunks.append(f_chunk)
return frame_chunks
|