""" This file contains functions that are used to perform data augmentation. """ import torch import numpy as np from skimage.transform import rotate, resize import cv2 from torchvision.transforms import Normalize, ToTensor, Compose from lib.core import constants def get_normalization(): normalize_img = Compose([ToTensor(), Normalize(mean=constants.IMG_NORM_MEAN, std=constants.IMG_NORM_STD) ]) return normalize_img def get_transform(center, scale, res, rot=0): """Generate transformation matrix.""" h = 200 * scale + 1e-6 t = np.zeros((3, 3)) t[0, 0] = float(res[1]) / h t[1, 1] = float(res[0]) / h t[0, 2] = res[1] * (-float(center[0]) / h + .5) t[1, 2] = res[0] * (-float(center[1]) / h + .5) t[2, 2] = 1 if not rot == 0: rot = -rot # To match direction of rotation from cropping rot_mat = np.zeros((3,3)) rot_rad = rot * np.pi / 180 sn,cs = np.sin(rot_rad), np.cos(rot_rad) rot_mat[0,:2] = [cs, -sn] rot_mat[1,:2] = [sn, cs] rot_mat[2,2] = 1 # Need to rotate around center t_mat = np.eye(3) t_mat[0,2] = -res[1]/2 t_mat[1,2] = -res[0]/2 t_inv = t_mat.copy() t_inv[:2,2] *= -1 t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t))) return t def transform(pt, center, scale, res, invert=0, rot=0, asint=True): """Transform pixel location to different reference.""" t = get_transform(center, scale, res, rot=rot) if invert: t = np.linalg.inv(t) new_pt = np.array([pt[0]-1, pt[1]-1, 1.]).T new_pt = np.dot(t, new_pt) if asint: return new_pt[:2].astype(int)+1 else: return new_pt[:2]+1 def transform_pts(pts, center, scale, res, invert=0, rot=0, asint=True): """Transform pixel location to different reference.""" t = get_transform(center, scale, res, rot=rot) if invert: t = np.linalg.inv(t) pts = np.concatenate((pts, np.ones_like(pts)[:, [0]]), axis=-1) new_pt = pts.T new_pt = np.dot(t, new_pt) if asint: return new_pt[:2, :].T.astype(int) else: return new_pt[:2, :].T def crop(img, center, scale, res, rot=0): """Crop image according to the supplied bounding box.""" # Upper left point ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 # Bottom right point br = np.array(transform([res[0]+1, res[1]+1], center, scale, res, invert=1))-1 # Padding so that when rotated proper amount of context is included pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) if not rot == 0: ul -= pad br += pad new_shape = [br[1] - ul[1], br[0] - ul[0]] if len(img.shape) > 2: new_shape += [img.shape[2]] new_img = np.zeros(new_shape) # Range to fill new array new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] # Range to sample from original image old_x = max(0, ul[0]), min(len(img[0]), br[0]) old_y = max(0, ul[1]), min(len(img), br[1]) try: new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]] except: print("invlid bbox, fill with 0") if not rot == 0: # Remove padding new_img = rotate(new_img, rot) new_img = new_img[pad:-pad, pad:-pad] new_img = resize(new_img, res) return new_img def crop_j2d(j2d, center, scale, res, rot=0): """Crop image according to the supplied bounding box.""" # Upper left point # crop_j2d = np.array(transform_pts(j2d, center, scale, res, invert=0)) b = scale * 200 points2d = j2d - (center - b/2) points2d = points2d * (res[0] / b) return points2d def crop_crop(img, center, scale, res, rot=0): """Crop image according to the supplied bounding box.""" # Upper left point ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 # Bottom right point br = np.array(transform([res[0]+1, res[1]+1], center, scale, res, invert=1))-1 # Padding so that when rotated proper amount of context is included pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) if not rot == 0: ul -= pad br += pad new_shape = [br[1] - ul[1], br[0] - ul[0]] if len(img.shape) > 2: new_shape += [img.shape[2]] new_img = np.zeros(new_shape) if new_img.shape[0] > img.shape[0]: p = (new_img.shape[0] - img.shape[0]) / 2 p = int(p) new_img = cv2.copyMakeBorder(img, p, p, p, p, cv2.BORDER_REPLICATE) # Range to fill new array new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] # Range to sample from original image old_x = max(0, ul[0]), min(len(img[0]), br[0]) old_y = max(0, ul[1]), min(len(img), br[1]) new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]] if not rot == 0: # Remove padding new_img = rotate(new_img, rot) new_img = new_img[pad:-pad, pad:-pad] new_img = resize(new_img, res) return new_img def uncrop(img, center, scale, orig_shape, rot=0, is_rgb=True): """'Undo' the image cropping/resizing. This function is used when evaluating mask/part segmentation. """ res = img.shape[:2] # Upper left point ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 # Bottom right point br = np.array(transform([res[0]+1,res[1]+1], center, scale, res, invert=1))-1 # size of cropped image crop_shape = [br[1] - ul[1], br[0] - ul[0]] new_shape = [br[1] - ul[1], br[0] - ul[0]] if len(img.shape) > 2: new_shape += [img.shape[2]] new_img = np.zeros(orig_shape, dtype=np.uint8) # Range to fill new array new_x = max(0, -ul[0]), min(br[0], orig_shape[1]) - ul[0] new_y = max(0, -ul[1]), min(br[1], orig_shape[0]) - ul[1] # Range to sample from original image old_x = max(0, ul[0]), min(orig_shape[1], br[0]) old_y = max(0, ul[1]), min(orig_shape[0], br[1]) img = resize(img, crop_shape, interp='nearest') new_img[old_y[0]:old_y[1], old_x[0]:old_x[1]] = img[new_y[0]:new_y[1], new_x[0]:new_x[1]] return new_img def rot_aa(aa, rot): """Rotate axis angle parameters.""" # pose parameters R = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]]) # find the rotation of the body in camera frame per_rdg, _ = cv2.Rodrigues(aa) # apply the global rotation to the global orientation resrot, _ = cv2.Rodrigues(np.dot(R,per_rdg)) aa = (resrot.T)[0] return aa def flip_img(img): """Flip rgb images or masks. channels come last, e.g. (256,256,3). """ img = np.fliplr(img) return img def flip_kp(kp): """Flip keypoints.""" if len(kp) == 24: flipped_parts = constants.J24_FLIP_PERM elif len(kp) == 49: flipped_parts = constants.J49_FLIP_PERM kp = kp[flipped_parts] kp[:,0] = - kp[:,0] return kp def flip_pose(pose): """Flip pose. The flipping is based on SMPL parameters. """ flipped_parts = constants.SMPL_POSE_FLIP_PERM pose = pose[flipped_parts] # we also negate the second and the third dimension of the axis-angle pose[1::3] = -pose[1::3] pose[2::3] = -pose[2::3] return pose def crop_img(img, center, scale, res, val=255): """Crop image according to the supplied bounding box.""" # Upper left point ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 # Bottom right point br = np.array(transform([res[0]+1, res[1]+1], center, scale, res, invert=1))-1 new_shape = [br[1] - ul[1], br[0] - ul[0]] if len(img.shape) > 2: new_shape += [img.shape[2]] new_img = np.ones(new_shape) * val # Range to fill new array new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] # Range to sample from original image old_x = max(0, ul[0]), min(len(img[0]), br[0]) old_y = max(0, ul[1]), min(len(img), br[1]) new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]] new_img = resize(new_img, res) return new_img def boxes_2_cs(boxes): x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] w, h = x2-x1, y2-y1 cx, cy = x1+w/2, y1+h/2 size = np.stack([w, h]).max(axis=0) centers = np.stack([cx, cy], axis=1) scales = size / 200 return centers, scales def box_2_cs(box): x1,y1,x2,y2 = box[:4].int().tolist() w, h = x2-x1, y2-y1 cx, cy = x1+w/2, y1+h/2 size = max(w, h) center = [cx, cy] scale = size / 200 return center, scale def est_intrinsics(img_shape): h, w, c = img_shape img_center = torch.tensor([w/2., h/2.]).float() img_focal = torch.tensor(np.sqrt(h**2 + w**2)).float() return img_center, img_focal