AAA_guidance / predict.py

update_repo

2f3b6c4 4 months ago

17.8 kB

	# Must import torch before onnxruntime, else could not create cuda context
	# ref: https://github.com/microsoft/onnxruntime/issues/11092#issuecomment-1386840174
	import torch, torchvision
	import onnxruntime

	from time import perf_counter
	from openvino.runtime import Core, Layout, get_batch, AsyncInferQueue
	from pathlib import Path
	import yaml
	import cv2
	import numpy as np
	import time
	from plots import Annotator, process_mask, scale_boxes, scale_image, colors
	from loguru import logger


	def from_numpy(x):
	return torch.from_numpy(x) if isinstance(x, np.ndarray) else x


	def yaml_load(file="data.yaml"):
	# Single-line safe yaml loading
	with open(file, errors="ignore") as f:
	return yaml.safe_load(f)


	def load_metadata(f=Path("path/to/meta.yaml")):
	# Load metadata from meta.yaml if it exists
	if f.exists():
	d = yaml_load(f)
	return d["stride"], d["names"] # assign stride, names
	return None, None


	def letterbox(
	im,
	new_shape=(640, 640),
	color=(114, 114, 114),
	auto=True,
	scale_fill=False,
	scaleup=True,
	stride=32,
	):
	# Resize and pad image while meeting stride-multiple constraints
	shape = im.shape[:2] # current shape [height, width]
	if isinstance(new_shape, int):
	new_shape = (new_shape, new_shape)

	# Scale ratio (new / old)
	r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
	if not scaleup: # only scale down, do not scale up (for better val mAP)
	r = min(r, 1.0)

	# Compute padding
	ratio = r, r # width, height ratios
	new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
	dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
	if auto: # minimum rectangle
	dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
	elif scale_fill: # stretch
	dw, dh = 0.0, 0.0
	new_unpad = (new_shape[1], new_shape[0])
	ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios

	dw /= 2 # divide padding into 2 sides
	dh /= 2

	if shape[::-1] != new_unpad: # resize
	im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
	top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
	left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
	im = cv2.copyMakeBorder(
	im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
	) # add border
	return im, ratio, (dw, dh)


	def xywh2xyxy(x):
	# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
	y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
	y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
	y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
	y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
	y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
	return y


	def box_iou(box1, box2, eps=1e-7):
	# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
	"""
	Return intersection-over-union (Jaccard index) of boxes.
	Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
	Arguments:
	box1 (Tensor[N, 4])
	box2 (Tensor[M, 4])
	Returns:
	iou (Tensor[N, M]): the NxM matrix containing the pairwise
	IoU values for every element in boxes1 and boxes2
	"""

	# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
	(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
	inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)

	# IoU = inter / (area1 + area2 - inter)
	return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)


	def non_max_suppression(
	prediction,
	conf_thres=0.25,
	iou_thres=0.45,
	classes=None,
	agnostic=False,
	multi_label=False,
	labels=(),
	max_det=300,
	nm=0, # number of masks
	redundant=True, # require redundant detections
	):
	"""Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
	Returns:
	list of detections, on (n,6) tensor per image [xyxy, conf, cls]
	"""

	if isinstance(
	prediction, (list, tuple)
	): # YOLOv5 model in validation model, output = (inference_out, loss_out)
	prediction = prediction[0] # select only inference output

	device = prediction.device
	mps = "mps" in device.type # Apple MPS
	if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
	prediction = prediction.cpu()
	bs = prediction.shape[0] # batch size
	nc = prediction.shape[2] - nm - 5 # number of classes
	xc = prediction[..., 4] > conf_thres # candidates

	# Checks
	assert (
	0 <= conf_thres <= 1
	), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
	assert (
	0 <= iou_thres <= 1
	), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"

	# Settings
	# min_wh = 2 # (pixels) minimum box width and height
	max_wh = 7680 # (pixels) maximum box width and height
	max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
	multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
	merge = False # use merge-NMS

	t = time.time()
	mi = 5 + nc # mask start index
	output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
	for xi, x in enumerate(prediction): # image index, image inference
	# Apply constraints
	# x[((x[..., 2:4] < min_wh) \| (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
	x = x[xc[xi]] # confidence

	# Cat apriori labels if autolabelling
	if labels and len(labels[xi]):
	lb = labels[xi]
	v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
	v[:, :4] = lb[:, 1:5] # box
	v[:, 4] = 1.0 # conf
	v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
	x = torch.cat((x, v), 0)

	# If none remain process next image
	if not x.shape[0]:
	continue

	# Compute conf
	x[:, 5:] = x[:, 4:5] # conf = obj_conf cls_conf

	# Box/Mask
	box = xywh2xyxy(
	x[:, :4]
	) # center_x, center_y, width, height) to (x1, y1, x2, y2)
	mask = x[:, mi:] # zero columns if no masks

	# Detections matrix nx6 (xyxy, conf, cls)
	if multi_label:
	i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
	x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
	else: # best class only
	conf, j = x[:, 5:mi].max(1, keepdim=True)
	x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]

	# Filter by class
	if classes is not None:
	x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]

	# Apply finite constraint
	# if not torch.isfinite(x).all():
	# x = x[torch.isfinite(x).all(1)]

	# Check shape
	n = x.shape[0] # number of boxes
	if not n: # no boxes
	continue
	elif n > max_nms: # excess boxes
	x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
	else:
	x = x[x[:, 4].argsort(descending=True)] # sort by confidence

	# Batched NMS
	c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
	boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
	i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
	if i.shape[0] > max_det: # limit detections
	i = i[:max_det]
	if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean)
	# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
	iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
	weights = iou * scores[None] # box weights
	x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
	1, keepdim=True
	) # merged boxes
	if redundant:
	i = i[iou.sum(1) > 1] # require redundancy

	output[xi] = x[i]
	if mps:
	output[xi] = output[xi].to(device)

	return output


	class Model:
	def __init__(
	self,
	model_path,
	imgsz=320,
	classes=None,
	device="CPU",
	plot_mask=False,
	conf_thres=0.7,
	n_jobs=1,
	is_async=False,
	):
	# filter by class: classes=[0], or classes=[0, 2, 3]
	model_type = "onnx" if Path(model_path).suffix == ".onnx" else "openvino"
	assert Path(model_path).exists(), f"Model {model_path} not found"
	assert Path(model_path).suffix in (
	".onnx",
	".xml",
	), "Model must be .onnx or .xml"
	self.model_type = model_type
	self.model_path = model_path
	self.imgsz = imgsz
	self.classes = classes
	self.plot_mask = plot_mask
	self.conf_thres = conf_thres

	# async settings
	self.n_jobs = n_jobs
	self.is_async = is_async
	self.completed_results = {} # key: frame_id, value: inference results
	self.ori_cv_imgs = {} # key: frame_id, value: original cv image
	self.prep_cv_imgs = {} # key: frame_id, value: preprocessed cv image

	if self.model_type == "onnx":
	assert is_async is False, "Async mode is not supported for ONNX models"
	providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
	session = onnxruntime.InferenceSession(model_path, providers=providers)
	self.session = session
	output_names = [x.name for x in session.get_outputs()]
	self.output_names = output_names
	meta = session.get_modelmeta().custom_metadata_map # metadata
	if "stride" in meta:
	stride, names = int(meta["stride"]), eval(meta["names"])
	self.stride = stride
	self.names = names
	elif self.model_type == "openvino":
	# load OpenVINO model
	assert Path(model_path).suffix == ".xml", "OpenVINO model must be .xml"
	ie = Core()
	weights = Path(model_path).with_suffix(".bin").as_posix()
	network = ie.read_model(model=model_path, weights=weights)
	if network.get_parameters()[0].get_layout().empty:
	network.get_parameters()[0].set_layout(Layout("NCHW"))
	batch_dim = get_batch(network)
	if batch_dim.is_static:
	batch_size = batch_dim.get_length()

	# To run inference on M1, we must export the IR model using "mo --use_legacy_frontend"
	# Otherwise, we would get the following error when compiling the model
	# https://github.com/openvinotoolkit/openvino/issues/12476#issuecomment-1222202804
	config = {}
	if n_jobs == "auto":
	config = {"PERFORMANCE_HINT": "THROUGHPUT"}
	self.executable_network = ie.compile_model(
	network, device_name=device, config=config
	)
	num_requests = self.executable_network.get_property(
	"OPTIMAL_NUMBER_OF_INFER_REQUESTS"
	)
	self.n_jobs = num_requests if n_jobs == "auto" else int(n_jobs)
	logger.info(f"Optimal number of infer requests should be: {num_requests}")
	self.stride, self.names = load_metadata(
	Path(weights).with_suffix(".yaml")
	) # load metadata

	if is_async:
	logger.info(f"Using num of infer requests jobs: {n_jobs}")
	self.pipeline = AsyncInferQueue(self.executable_network, self.n_jobs)
	self.pipeline.set_callback(self.callback)

	def preprocess(self, cv_img, pt=False):
	im = letterbox(cv_img, self.imgsz, stride=self.stride, auto=pt)[
	0
	] # padded resize
	im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
	im = np.ascontiguousarray(im) # contiguous
	im = torch.from_numpy(im)
	im = im.float() # uint8 to fp16/32
	im /= 255 # 0 - 255 to 0.0 - 1.0
	if len(im.shape) == 3:
	im = im[None] # expand for batch dim
	im = im.cpu().numpy() # torch to numpy
	return im

	def postprocess(self, y, ori_cv_im, prep_im):
	y = [from_numpy(x) for x in y]
	pred, proto = y[0], y[-1]

	im0 = ori_cv_im

	# NMS
	iou_thres = 0.45
	agnostic_nms = False
	max_det = 1 # maximum detections per image, only 1 aorta is needed
	pred = non_max_suppression(
	pred,
	self.conf_thres,
	iou_thres,
	self.classes,
	agnostic_nms,
	max_det=max_det,
	nm=32,
	)

	# Process predictions
	line_thickness = 3
	annotator = Annotator(
	np.ascontiguousarray(im0),
	line_width=line_thickness,
	example=str(self.names),
	)
	i = 0
	det = pred[0]
	im = prep_im
	r_xyxy, r_conf, r_masks = None, None, None
	if len(pred[0]):
	masks = process_mask(
	proto[i],
	det[:, 6:],
	det[:, :4],
	(self.imgsz, self.imgsz),
	upsample=True,
	) # HWC
	det[:, :4] = scale_boxes(
	(self.imgsz, self.imgsz), det[:, :4], im0.shape
	).round() # rescale boxes to im0 size

	# Mask plotting
	if self.plot_mask:
	annotator.masks(
	masks,
	colors=[colors(x, True) for x in det[:, 5]],
	im_gpu=im[i],
	alpha=0.1,
	)

	# Write results
	for j, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])):
	# Add bbox to image
	c = int(cls) # integer class
	label = f"{self.names[c]} {conf:.2f}"
	annotator.box_label(xyxy, label, color=colors(c, True))
	r_xyxy = xyxy
	r_conf = conf
	r_xyxy = [i.int().numpy().item() for i in r_xyxy]
	r_conf = r_conf.numpy().item()
	r_masks = scale_image((self.imgsz, self.imgsz), masks.numpy()[0], im0.shape)
	return annotator.result(), (r_xyxy, r_conf, r_masks)

	def predict(self, cv_img):
	# return the annotated image and the bounding box
	result_cv_img, xyxy = None, None
	im = self.preprocess(cv_img)
	if self.model_type == "onnx":
	y = self.session.run(
	self.output_names, {self.session.get_inputs()[0].name: im}
	)
	elif self.model_type == "openvino":
	# OpenVINO model inference
	# Note: Please use FP32 model on M1, otherwise you will get many runtime errors
	# Very slow on M1, but works
	# start = perf_counter()
	y = list(self.executable_network([im]).values())
	# logger.info(f"OpenVINO inference time: {perf_counter() - start:.3f}s")
	result_cv_img, others = self.postprocess(y, cv_img, im)
	return result_cv_img, others

	def callback(self, request, userdata):
	# callback function for AsyncInferQueue
	outputs = request.outputs
	frame_id = userdata
	self.completed_results[frame_id] = [i.data for i in outputs]

	def predict_async(self, cv_img, frame_id):
	assert self.is_async, "Please set is_async=True when initializing the model"
	self.ori_cv_imgs[frame_id] = cv_img
	im = self.preprocess(cv_img)
	self.prep_cv_imgs[frame_id] = im

	# Note: The start_async function call is not required to be synchronized - it waits for any available job if the queue is busy/overloaded.
	# https://docs.openvino.ai/latest/openvino_docs_OV_UG_Python_API_exclusives.html#asyncinferqueue
	#
	# idle_id = self.pipeline.get_idle_request_id()
	# self.pipeline.start_async({idle_id: im}, frame_id)
	self.pipeline.start_async({0: im}, frame_id)

	def is_free_to_infer_async(self):
	"""Returns True if any free request in the pool, otherwise False"""
	assert self.is_async, "Please set is_async=True when initializing the model"
	return self.pipeline.is_ready()

	def get_result(self, frame_id):
	"""Returns the inference result for the given frame_id"""
	assert self.is_async, "Please set is_async=True when initializing the model"
	if frame_id in self.completed_results:
	y = self.completed_results.pop(frame_id)
	cv_img = self.ori_cv_imgs.pop(frame_id)
	im = self.prep_cv_imgs.pop(frame_id)
	result_cv_img, others = self.postprocess(y, cv_img, im)
	return result_cv_img, others
	return None


	if __name__ == "__main__":
	m_p = "weights/yolov7seg-JH-v1.onnx"
	m_p = "weights/yolov5s-seg-MK-v1.onnx"
	m_p = "weights/best_openvino_model/best.xml"
	imgsz = 320
	# imgsz = 640
	model = Model(model_path=m_p, imgsz=imgsz)

	# inference an image using the loaded model
	# source = 'Tim_3-0-00-20.05.jpg'
	path = "data/Jimmy_2-0-00-04.63.jpg"
	assert Path(path).exists(), f"Input image {path} doesn't exist"

	# output path
	save_dir = "runs/predict"
	Path(save_dir).mkdir(parents=True, exist_ok=True)
	out_p = f"{save_dir}/{Path(path).stem}.jpg"

	# load image and preprocess
	im0 = cv2.imread(path) # BGR
	result_cv_img, _ = model.predict(im0)
	if result_cv_img is not None:
	cv2.imwrite(out_p, result_cv_img)
	logger.info(f"Saved result to {out_p}")
	else:
	logger.error("No result, something went wrong")