Spaces:
Sleeping
Sleeping
import glob | |
import logging | |
import os | |
import platform | |
import random | |
import re | |
import shutil | |
import subprocess | |
import time | |
import torchvision | |
from contextlib import contextmanager | |
from copy import copy | |
from pathlib import Path | |
import cv2 | |
import math | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import torch | |
import torch.nn as nn | |
import yaml | |
from PIL import Image | |
from scipy.cluster.vq import kmeans | |
from scipy.signal import butter, filtfilt | |
from tqdm import tqdm | |
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9): | |
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 | |
box2 = box2.T | |
# Get the coordinates of bounding boxes | |
if x1y1x2y2: # x1, y1, x2, y2 = box1 | |
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] | |
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] | |
else: # transform from xywh to xyxy | |
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 | |
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 | |
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 | |
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 | |
# Intersection area | |
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ | |
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) | |
# Union Area | |
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps | |
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps | |
union = w1 * h1 + w2 * h2 - inter + eps | |
iou = inter / union | |
if GIoU or DIoU or CIoU: | |
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width | |
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height | |
if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 | |
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared | |
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + | |
(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared | |
if DIoU: | |
return iou - rho2 / c2 # DIoU | |
elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 | |
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) | |
with torch.no_grad(): | |
alpha = v / ((1 + eps) - iou + v) | |
return iou - (rho2 / c2 + v * alpha) # CIoU | |
else: # GIoU https://arxiv.org/pdf/1902.09630.pdf | |
c_area = cw * ch + eps # convex area | |
return iou - (c_area - union) / c_area # GIoU | |
else: | |
return iou # IoU | |
def box_iou(box1, box2): | |
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py | |
""" | |
Return intersection-over-union (Jaccard index) of boxes. | |
Both sets of boxes are expected to be in (x1, y1, x2, y2) format. | |
Arguments: | |
box1 (Tensor[N, 4]) | |
box2 (Tensor[M, 4]) | |
Returns: | |
iou (Tensor[N, M]): the NxM matrix containing the pairwise | |
IoU values for every element in boxes1 and boxes2 | |
""" | |
def box_area(box): | |
# box = 4xn | |
return (box[2] - box[0]) * (box[3] - box[1]) #(x2-x1)*(y2-y1) | |
area1 = box_area(box1.T) | |
area2 = box_area(box2.T) | |
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) | |
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) | |
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) | |
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()): | |
"""Performs Non-Maximum Suppression (NMS) on inference results | |
Returns: | |
detections with shape: nx6 (x1, y1, x2, y2, conf, cls) | |
""" | |
nc = prediction.shape[2] - 5 # number of classes | |
xc = prediction[..., 4] > conf_thres # candidates | |
# Settings | |
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height | |
max_det = 300 # maximum number of detections per image | |
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() | |
time_limit = 10.0 # seconds to quit after | |
redundant = True # require redundant detections | |
multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) | |
merge = False # use merge-NMS | |
t = time.time() | |
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] | |
for xi, x in enumerate(prediction): # image index, image inference | |
# Apply constraints | |
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height | |
x = x[xc[xi]] # confidence | |
# Cat apriori labels if autolabelling | |
if labels and len(labels[xi]): | |
l = labels[xi] | |
v = torch.zeros((len(l), nc + 5), device=x.device) | |
v[:, :4] = l[:, 1:5] # box | |
v[:, 4] = 1.0 # conf | |
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls | |
x = torch.cat((x, v), 0) | |
# If none remain process next image | |
if not x.shape[0]: | |
continue | |
# Compute conf | |
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf | |
# Box (center x, center y, width, height) to (x1, y1, x2, y2) | |
box = xywh2xyxy(x[:, :4]) | |
# Detections matrix nx6 (xyxy, conf, cls) | |
if multi_label: | |
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T | |
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) | |
else: # best class only | |
conf, j = x[:, 5:].max(1, keepdim=True) | |
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] | |
# Filter by class | |
if classes is not None: | |
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] | |
# Apply finite constraint | |
# if not torch.isfinite(x).all(): | |
# x = x[torch.isfinite(x).all(1)] | |
# Check shape | |
n = x.shape[0] # number of boxes | |
if not n: # no boxes | |
continue | |
elif n > max_nms: # excess boxes | |
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence | |
# Batched NMS | |
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes | |
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores | |
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS | |
if i.shape[0] > max_det: # limit detections | |
i = i[:max_det] | |
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) | |
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) | |
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix | |
weights = iou * scores[None] # box weights | |
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes | |
if redundant: | |
i = i[iou.sum(1) > 1] # require redundancy | |
output[xi] = x[i] | |
if (time.time() - t) > time_limit: | |
print(f'WARNING: NMS time limit {time_limit}s exceeded') | |
break # time limit exceeded | |
return output | |
def xywh2xyxy(x): | |
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right | |
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) | |
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x | |
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y | |
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x | |
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y | |
return y | |
def fitness(x): | |
# Returns fitness (for use with results.txt or evolve.txt) | |
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, [email protected], [email protected]:0.95] | |
return (x[:, :4] * w).sum(1) | |
def check_img_size(img_size, s=32): | |
# Verify img_size is a multiple of stride s | |
new_size = make_divisible(img_size, int(s)) # ceil gs-multiple | |
if new_size != img_size: | |
print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size)) | |
return new_size | |
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): | |
# Rescale coords (xyxy) from img1_shape to img0_shape | |
if ratio_pad is None: # calculate from img0_shape | |
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new | |
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding | |
else: | |
gain = ratio_pad[0][0] | |
pad = ratio_pad[1] | |
coords[:, [0, 2]] -= pad[0] # x padding | |
coords[:, [1, 3]] -= pad[1] # y padding | |
coords[:, :4] /= gain | |
clip_coords(coords, img0_shape) | |
return coords | |
def clip_coords(boxes, img_shape): | |
# Clip bounding xyxy bounding boxes to image shape (height, width) | |
boxes[:, 0].clamp_(0, img_shape[1]) # x1 | |
boxes[:, 1].clamp_(0, img_shape[0]) # y1 | |
boxes[:, 2].clamp_(0, img_shape[1]) # x2 | |
boxes[:, 3].clamp_(0, img_shape[0]) # y2 | |
def make_divisible(x, divisor): | |
# Returns x evenly divisible by divisor | |
return math.ceil(x / divisor) * divisor | |
def xyxy2xywh(x): | |
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right | |
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) | |
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center | |
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center | |
y[:, 2] = x[:, 2] - x[:, 0] # width | |
y[:, 3] = x[:, 3] - x[:, 1] # height | |
return y | |
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): | |
# Plot image grid with labels | |
if isinstance(images, torch.Tensor): | |
images = images.cpu().float().numpy() | |
if isinstance(targets, torch.Tensor): | |
targets = targets.cpu().numpy() | |
# un-normalise | |
if np.max(images[0]) <= 1: | |
images *= 255 | |
tl = 3 # line thickness | |
tf = max(tl - 1, 1) # font thickness | |
bs, _, h, w = images.shape # batch size, _, height, width | |
bs = min(bs, max_subplots) # limit plot images | |
ns = np.ceil(bs ** 0.5) # number of subplots (square) | |
# Check if we should resize | |
scale_factor = max_size / max(h, w) | |
if scale_factor < 1: | |
h = math.ceil(scale_factor * h) | |
w = math.ceil(scale_factor * w) | |
colors = color_list() # list of colors | |
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init | |
for i, img in enumerate(images): | |
if i == max_subplots: # if last batch has fewer images than we expect | |
break | |
block_x = int(w * (i // ns)) | |
block_y = int(h * (i % ns)) | |
img = img.transpose(1, 2, 0) | |
if scale_factor < 1: | |
img = cv2.resize(img, (w, h)) | |
mosaic[block_y:block_y + h, block_x:block_x + w, :] = img | |
if len(targets) > 0: | |
image_targets = targets[targets[:, 0] == i] | |
boxes = xywh2xyxy(image_targets[:, 2:6]).T | |
classes = image_targets[:, 1].astype('int') | |
labels = image_targets.shape[1] == 6 # labels if no conf column | |
conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred) | |
if boxes.shape[1]: | |
if boxes.max() <= 1.01: # if normalized with tolerance 0.01 | |
boxes[[0, 2]] *= w # scale to pixels | |
boxes[[1, 3]] *= h | |
elif scale_factor < 1: # absolute coords need scale if image scales | |
boxes *= scale_factor | |
boxes[[0, 2]] += block_x | |
boxes[[1, 3]] += block_y | |
for j, box in enumerate(boxes.T): | |
cls = int(classes[j]) | |
color = colors[cls % len(colors)] | |
cls = names[cls] if names else cls | |
if labels or conf[j] > 0.25: # 0.25 conf thresh | |
label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j]) | |
plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) | |
# Draw image filename labels | |
if paths: | |
label = Path(paths[i]).name[:40] # trim to 40 char | |
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] | |
cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, | |
lineType=cv2.LINE_AA) | |
# Image border | |
cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) | |
if fname: | |
r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size | |
mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) | |
# cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save | |
Image.fromarray(mosaic).save(fname) # PIL save | |
return mosaic | |
def plot_one_box(x, img, color=None, label=None, line_thickness=None): | |
# Plots one bounding box on image img | |
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness | |
color = color or [random.randint(0, 255) for _ in range(3)] | |
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) | |
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) | |
if label: | |
tf = max(tl - 1, 1) # font thickness | |
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] | |
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 | |
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled | |
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) | |
def color_list(): | |
# Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb | |
def hex2rgb(h): | |
return tuple(int(str(h[1 + i:1 + i + 2]), 16) for i in (0, 2, 4)) | |
return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']] | |
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]): | |
""" Compute the average precision, given the recall and precision curves. | |
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. | |
# Arguments | |
tp: True positives (nparray, nx1 or nx10). | |
conf: Objectness value from 0-1 (nparray). | |
pred_cls: Predicted object classes (nparray). | |
target_cls: True object classes (nparray). | |
plot: Plot precision-recall curve at [email protected] | |
save_dir: Plot save directory | |
# Returns | |
The average precision as computed in py-faster-rcnn. | |
""" | |
# Sort by objectness | |
i = np.argsort(-conf) | |
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] | |
# Find unique classes | |
unique_classes = np.unique(target_cls) | |
# Create Precision-Recall curve and compute AP for each class | |
px, py = np.linspace(0, 1, 1000), [] # for plotting | |
pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 | |
s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) | |
ap, p, r = np.zeros(s), np.zeros((unique_classes.shape[0], 1000)), np.zeros((unique_classes.shape[0], 1000)) | |
for ci, c in enumerate(unique_classes): | |
i = pred_cls == c | |
n_l = (target_cls == c).sum() # number of labels | |
n_p = i.sum() # number of predictions | |
if n_p == 0 or n_l == 0: | |
continue | |
else: | |
# Accumulate FPs and TPs | |
fpc = (1 - tp[i]).cumsum(0) | |
tpc = tp[i].cumsum(0) | |
# Recall | |
recall = tpc / (n_l + 1e-16) # recall curve | |
r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases | |
# Precision | |
precision = tpc / (tpc + fpc) # precision curve | |
p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score | |
# AP from recall-precision curve | |
for j in range(tp.shape[1]): | |
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) | |
if plot and (j == 0): | |
py.append(np.interp(px, mrec, mpre)) # precision at [email protected] | |
# Compute F1 score (harmonic mean of precision and recall) | |
f1 = 2 * p * r / (p + r + 1e-16) | |
i=r.mean(0).argmax() | |
if plot: | |
plot_pr_curve(px, py, ap, save_dir, names) | |
return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32') | |
def compute_ap(recall, precision): | |
""" Compute the average precision, given the recall and precision curves. | |
Source: https://github.com/rbgirshick/py-faster-rcnn. | |
# Arguments | |
recall: The recall curve (list). | |
precision: The precision curve (list). | |
# Returns | |
The average precision as computed in py-faster-rcnn. | |
""" | |
# Append sentinel values to beginning and end | |
mrec = np.concatenate(([0.], recall, [recall[-1] + 1E-3])) | |
mpre = np.concatenate(([1.], precision, [0.])) | |
# Compute the precision envelope | |
mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) | |
# Integrate area under curve | |
method = 'interp' # methods: 'continuous', 'interp' | |
if method == 'interp': | |
x = np.linspace(0, 1, 101) # 101-point interp (COCO) | |
ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate | |
else: # 'continuous' | |
i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes | |
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve | |
return ap, mpre, mrec | |
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) | |
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ | |
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') | |
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') | |
# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco | |
# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet | |
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, | |
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] | |
return x | |
def output_to_target(output): | |
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf] | |
targets = [] | |
for i, o in enumerate(output): | |
for *box, conf, cls in o.cpu().numpy(): | |
targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) | |
return np.array(targets) | |
def plot_pr_curve(px, py, ap, save_dir='.', names=()): | |
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) | |
py = np.stack(py, axis=1) | |
if 0 < len(names) < 21: # show mAP in legend if < 10 classes | |
for i, y in enumerate(py.T): | |
ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision) | |
else: | |
ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) | |
ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f [email protected]' % ap[:, 0].mean()) | |
ax.set_xlabel('Recall') | |
ax.set_ylabel('Precision') | |
ax.set_xlim(0, 1) | |
ax.set_ylim(0, 1) | |
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") | |
fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250) | |