HaWoR / lib /vis /tools.py
ThunderVVV's picture
update
5f028d6
raw
history blame
23.3 kB
# Useful visualization functions from SLAHMR and WHAM
import os
import cv2
import numpy as np
import torch
from PIL import Image
def read_image(path, scale=1):
im = Image.open(path)
if scale == 1:
return np.array(im)
W, H = im.size
w, h = int(scale * W), int(scale * H)
return np.array(im.resize((w, h), Image.ANTIALIAS))
def transform_torch3d(T_c2w):
"""
:param T_c2w (*, 4, 4)
returns (*, 3, 3), (*, 3)
"""
R1 = torch.tensor(
[[-1.0, 0.0, 0.0], [0.0, -1.0, 0.0], [0.0, 0.0, 1.0],], device=T_c2w.device,
)
R2 = torch.tensor(
[[1.0, 0.0, 0.0], [0.0, -1.0, 0.0], [0.0, 0.0, -1.0],], device=T_c2w.device,
)
cam_R, cam_t = T_c2w[..., :3, :3], T_c2w[..., :3, 3]
cam_R = torch.einsum("...ij,jk->...ik", cam_R, R1)
cam_t = torch.einsum("ij,...j->...i", R2, cam_t)
return cam_R, cam_t
def transform_pyrender(T_c2w):
"""
:param T_c2w (*, 4, 4)
"""
T_vis = torch.tensor(
[
[1.0, 0.0, 0.0, 0.0],
[0.0, -1.0, 0.0, 0.0],
[0.0, 0.0, -1.0, 0.0],
[0.0, 0.0, 0.0, 1.0],
],
device=T_c2w.device,
)
return torch.einsum(
"...ij,jk->...ik", torch.einsum("ij,...jk->...ik", T_vis, T_c2w), T_vis
)
def smpl_to_geometry(verts, faces, vis_mask=None, track_ids=None):
"""
:param verts (B, T, V, 3)
:param faces (F, 3)
:param vis_mask (optional) (B, T) visibility of each person
:param track_ids (optional) (B,)
returns list of T verts (B, V, 3), faces (F, 3), colors (B, 3)
where B is different depending on the visibility of the people
"""
B, T = verts.shape[:2]
device = verts.device
# (B, 3)
colors = (
track_to_colors(track_ids)
if track_ids is not None
else torch.ones(B, 3, device) * 0.5
)
# list T (B, V, 3), T (B, 3), T (F, 3)
return filter_visible_meshes(verts, colors, faces, vis_mask)
def filter_visible_meshes(verts, colors, faces, vis_mask=None, vis_opacity=False):
"""
:param verts (B, T, V, 3)
:param colors (B, 3)
:param faces (F, 3)
:param vis_mask (optional tensor, default None) (B, T) ternary mask
-1 if not in frame
0 if temporarily occluded
1 if visible
:param vis_opacity (optional bool, default False)
if True, make occluded people alpha=0.5, otherwise alpha=1
returns a list of T lists verts (Bi, V, 3), colors (Bi, 4), faces (F, 3)
"""
# import ipdb; ipdb.set_trace()
B, T = verts.shape[:2]
faces = [faces for t in range(T)]
if vis_mask is None:
verts = [verts[:, t] for t in range(T)]
colors = [colors for t in range(T)]
return verts, colors, faces
# render occluded and visible, but not removed
vis_mask = vis_mask >= 0
if vis_opacity:
alpha = 0.5 * (vis_mask[..., None] + 1)
else:
alpha = (vis_mask[..., None] >= 0).float()
vert_list = [verts[vis_mask[:, t], t] for t in range(T)]
colors = [
torch.cat([colors[vis_mask[:, t]], alpha[vis_mask[:, t], t]], dim=-1)
for t in range(T)
]
bounds = get_bboxes(verts, vis_mask)
return vert_list, colors, faces, bounds
def get_bboxes(verts, vis_mask):
"""
return bb_min, bb_max, and mean for each track (B, 3) over entire trajectory
:param verts (B, T, V, 3)
:param vis_mask (B, T)
"""
B, T, *_ = verts.shape
bb_min, bb_max, mean = [], [], []
for b in range(B):
v = verts[b, vis_mask[b, :T]] # (Tb, V, 3)
bb_min.append(v.amin(dim=(0, 1)))
bb_max.append(v.amax(dim=(0, 1)))
mean.append(v.mean(dim=(0, 1)))
bb_min = torch.stack(bb_min, dim=0)
bb_max = torch.stack(bb_max, dim=0)
mean = torch.stack(mean, dim=0)
# point to a track that's long and close to the camera
zs = mean[:, 2]
counts = vis_mask[:, :T].sum(dim=-1) # (B,)
mask = counts < 0.8 * T
zs[mask] = torch.inf
sel = torch.argmin(zs)
return bb_min.amin(dim=0), bb_max.amax(dim=0), mean[sel]
def track_to_colors(track_ids):
"""
:param track_ids (B)
"""
color_map = torch.from_numpy(get_colors()).to(track_ids)
return color_map[track_ids] / 255 # (B, 3)
def get_colors():
# color_file = os.path.abspath(os.path.join(__file__, "../colors_phalp.txt"))
color_file = os.path.abspath(os.path.join(__file__, "../colors.txt"))
RGB_tuples = np.vstack(
[
np.loadtxt(color_file, skiprows=0),
# np.loadtxt(color_file, skiprows=1),
np.random.uniform(0, 255, size=(10000, 3)),
[[0, 0, 0]],
]
)
b = np.where(RGB_tuples == 0)
RGB_tuples[b] = 1
return RGB_tuples.astype(np.float32)
def checkerboard_geometry(
length=12.0,
color0=[0.8, 0.9, 0.9],
color1=[0.6, 0.7, 0.7],
tile_width=0.5,
alpha=1.0,
up="y",
c1=0.0,
c2=0.0,
):
assert up == "y" or up == "z"
color0 = np.array(color0 + [alpha])
color1 = np.array(color1 + [alpha])
radius = length / 2.0
num_rows = num_cols = max(2, int(length / tile_width))
vertices = []
vert_colors = []
faces = []
face_colors = []
for i in range(num_rows):
for j in range(num_cols):
u0, v0 = j * tile_width - radius, i * tile_width - radius
us = np.array([u0, u0, u0 + tile_width, u0 + tile_width])
vs = np.array([v0, v0 + tile_width, v0 + tile_width, v0])
zs = np.zeros(4)
if up == "y":
cur_verts = np.stack([us, zs, vs], axis=-1) # (4, 3)
cur_verts[:, 0] += c1
cur_verts[:, 2] += c2
else:
cur_verts = np.stack([us, vs, zs], axis=-1) # (4, 3)
cur_verts[:, 0] += c1
cur_verts[:, 1] += c2
cur_faces = np.array(
[[0, 1, 3], [1, 2, 3], [0, 3, 1], [1, 3, 2]], dtype=np.int64
)
cur_faces += 4 * (i * num_cols + j) # the number of previously added verts
use_color0 = (i % 2 == 0 and j % 2 == 0) or (i % 2 == 1 and j % 2 == 1)
cur_color = color0 if use_color0 else color1
cur_colors = np.array([cur_color, cur_color, cur_color, cur_color])
vertices.append(cur_verts)
faces.append(cur_faces)
vert_colors.append(cur_colors)
face_colors.append(cur_colors)
vertices = np.concatenate(vertices, axis=0).astype(np.float32)
vert_colors = np.concatenate(vert_colors, axis=0).astype(np.float32)
faces = np.concatenate(faces, axis=0).astype(np.float32)
face_colors = np.concatenate(face_colors, axis=0).astype(np.float32)
return vertices, faces, vert_colors, face_colors
def camera_marker_geometry(radius, height, up):
assert up == "y" or up == "z"
if up == "y":
vertices = np.array(
[
[-radius, -radius, 0],
[radius, -radius, 0],
[radius, radius, 0],
[-radius, radius, 0],
[0, 0, height],
]
)
else:
vertices = np.array(
[
[-radius, 0, -radius],
[radius, 0, -radius],
[radius, 0, radius],
[-radius, 0, radius],
[0, -height, 0],
]
)
faces = np.array(
[[0, 3, 1], [1, 3, 2], [0, 1, 4], [1, 2, 4], [2, 3, 4], [3, 0, 4],]
)
face_colors = np.array(
[
[1.0, 1.0, 1.0, 1.0],
[1.0, 1.0, 1.0, 1.0],
[0.0, 1.0, 0.0, 1.0],
[1.0, 0.0, 0.0, 1.0],
[0.0, 1.0, 0.0, 1.0],
[1.0, 0.0, 0.0, 1.0],
]
)
return vertices, faces, face_colors
def vis_keypoints(
keypts_list,
img_size,
radius=6,
thickness=3,
kpt_score_thr=0.3,
dataset="TopDownCocoDataset",
):
"""
Visualize keypoints
From ViTPose/mmpose/apis/inference.py
"""
palette = np.array(
[
[255, 128, 0],
[255, 153, 51],
[255, 178, 102],
[230, 230, 0],
[255, 153, 255],
[153, 204, 255],
[255, 102, 255],
[255, 51, 255],
[102, 178, 255],
[51, 153, 255],
[255, 153, 153],
[255, 102, 102],
[255, 51, 51],
[153, 255, 153],
[102, 255, 102],
[51, 255, 51],
[0, 255, 0],
[0, 0, 255],
[255, 0, 0],
[255, 255, 255],
]
)
if dataset in (
"TopDownCocoDataset",
"BottomUpCocoDataset",
"TopDownOCHumanDataset",
"AnimalMacaqueDataset",
):
# show the results
skeleton = [
[15, 13],
[13, 11],
[16, 14],
[14, 12],
[11, 12],
[5, 11],
[6, 12],
[5, 6],
[5, 7],
[6, 8],
[7, 9],
[8, 10],
[1, 2],
[0, 1],
[0, 2],
[1, 3],
[2, 4],
[3, 5],
[4, 6],
]
pose_link_color = palette[
[0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16]
]
pose_kpt_color = palette[
[16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0]
]
elif dataset == "TopDownCocoWholeBodyDataset":
# show the results
skeleton = [
[15, 13],
[13, 11],
[16, 14],
[14, 12],
[11, 12],
[5, 11],
[6, 12],
[5, 6],
[5, 7],
[6, 8],
[7, 9],
[8, 10],
[1, 2],
[0, 1],
[0, 2],
[1, 3],
[2, 4],
[3, 5],
[4, 6],
[15, 17],
[15, 18],
[15, 19],
[16, 20],
[16, 21],
[16, 22],
[91, 92],
[92, 93],
[93, 94],
[94, 95],
[91, 96],
[96, 97],
[97, 98],
[98, 99],
[91, 100],
[100, 101],
[101, 102],
[102, 103],
[91, 104],
[104, 105],
[105, 106],
[106, 107],
[91, 108],
[108, 109],
[109, 110],
[110, 111],
[112, 113],
[113, 114],
[114, 115],
[115, 116],
[112, 117],
[117, 118],
[118, 119],
[119, 120],
[112, 121],
[121, 122],
[122, 123],
[123, 124],
[112, 125],
[125, 126],
[126, 127],
[127, 128],
[112, 129],
[129, 130],
[130, 131],
[131, 132],
]
pose_link_color = palette[
[0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16]
+ [16, 16, 16, 16, 16, 16]
+ [0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16]
+ [0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16]
]
pose_kpt_color = palette[
[16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0]
+ [0, 0, 0, 0, 0, 0]
+ [19] * (68 + 42)
]
elif dataset == "TopDownAicDataset":
skeleton = [
[2, 1],
[1, 0],
[0, 13],
[13, 3],
[3, 4],
[4, 5],
[8, 7],
[7, 6],
[6, 9],
[9, 10],
[10, 11],
[12, 13],
[0, 6],
[3, 9],
]
pose_link_color = palette[[9, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 0, 7, 7]]
pose_kpt_color = palette[[9, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 0, 0]]
elif dataset == "TopDownMpiiDataset":
skeleton = [
[0, 1],
[1, 2],
[2, 6],
[6, 3],
[3, 4],
[4, 5],
[6, 7],
[7, 8],
[8, 9],
[8, 12],
[12, 11],
[11, 10],
[8, 13],
[13, 14],
[14, 15],
]
pose_link_color = palette[[16, 16, 16, 16, 16, 16, 7, 7, 0, 9, 9, 9, 9, 9, 9]]
pose_kpt_color = palette[[16, 16, 16, 16, 16, 16, 7, 7, 0, 0, 9, 9, 9, 9, 9, 9]]
elif dataset == "TopDownMpiiTrbDataset":
skeleton = [
[12, 13],
[13, 0],
[13, 1],
[0, 2],
[1, 3],
[2, 4],
[3, 5],
[0, 6],
[1, 7],
[6, 7],
[6, 8],
[7, 9],
[8, 10],
[9, 11],
[14, 15],
[16, 17],
[18, 19],
[20, 21],
[22, 23],
[24, 25],
[26, 27],
[28, 29],
[30, 31],
[32, 33],
[34, 35],
[36, 37],
[38, 39],
]
pose_link_color = palette[[16] * 14 + [19] * 13]
pose_kpt_color = palette[[16] * 14 + [0] * 26]
elif dataset in ("OneHand10KDataset", "FreiHandDataset", "PanopticDataset"):
skeleton = [
[0, 1],
[1, 2],
[2, 3],
[3, 4],
[0, 5],
[5, 6],
[6, 7],
[7, 8],
[0, 9],
[9, 10],
[10, 11],
[11, 12],
[0, 13],
[13, 14],
[14, 15],
[15, 16],
[0, 17],
[17, 18],
[18, 19],
[19, 20],
]
pose_link_color = palette[
[0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16]
]
pose_kpt_color = palette[
[0, 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16]
]
elif dataset == "InterHand2DDataset":
skeleton = [
[0, 1],
[1, 2],
[2, 3],
[4, 5],
[5, 6],
[6, 7],
[8, 9],
[9, 10],
[10, 11],
[12, 13],
[13, 14],
[14, 15],
[16, 17],
[17, 18],
[18, 19],
[3, 20],
[7, 20],
[11, 20],
[15, 20],
[19, 20],
]
pose_link_color = palette[
[0, 0, 0, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16, 0, 4, 8, 12, 16]
]
pose_kpt_color = palette[
[0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 0]
]
elif dataset == "Face300WDataset":
# show the results
skeleton = []
pose_link_color = palette[[]]
pose_kpt_color = palette[[19] * 68]
kpt_score_thr = 0
elif dataset == "FaceAFLWDataset":
# show the results
skeleton = []
pose_link_color = palette[[]]
pose_kpt_color = palette[[19] * 19]
kpt_score_thr = 0
elif dataset == "FaceCOFWDataset":
# show the results
skeleton = []
pose_link_color = palette[[]]
pose_kpt_color = palette[[19] * 29]
kpt_score_thr = 0
elif dataset == "FaceWFLWDataset":
# show the results
skeleton = []
pose_link_color = palette[[]]
pose_kpt_color = palette[[19] * 98]
kpt_score_thr = 0
elif dataset == "AnimalHorse10Dataset":
skeleton = [
[0, 1],
[1, 12],
[12, 16],
[16, 21],
[21, 17],
[17, 11],
[11, 10],
[10, 8],
[8, 9],
[9, 12],
[2, 3],
[3, 4],
[5, 6],
[6, 7],
[13, 14],
[14, 15],
[18, 19],
[19, 20],
]
pose_link_color = palette[[4] * 10 + [6] * 2 + [6] * 2 + [7] * 2 + [7] * 2]
pose_kpt_color = palette[
[4, 4, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 7, 7, 7, 4, 4, 7, 7, 7, 4]
]
elif dataset == "AnimalFlyDataset":
skeleton = [
[1, 0],
[2, 0],
[3, 0],
[4, 3],
[5, 4],
[7, 6],
[8, 7],
[9, 8],
[11, 10],
[12, 11],
[13, 12],
[15, 14],
[16, 15],
[17, 16],
[19, 18],
[20, 19],
[21, 20],
[23, 22],
[24, 23],
[25, 24],
[27, 26],
[28, 27],
[29, 28],
[30, 3],
[31, 3],
]
pose_link_color = palette[[0] * 25]
pose_kpt_color = palette[[0] * 32]
elif dataset == "AnimalLocustDataset":
skeleton = [
[1, 0],
[2, 1],
[3, 2],
[4, 3],
[6, 5],
[7, 6],
[9, 8],
[10, 9],
[11, 10],
[13, 12],
[14, 13],
[15, 14],
[17, 16],
[18, 17],
[19, 18],
[21, 20],
[22, 21],
[24, 23],
[25, 24],
[26, 25],
[28, 27],
[29, 28],
[30, 29],
[32, 31],
[33, 32],
[34, 33],
]
pose_link_color = palette[[0] * 26]
pose_kpt_color = palette[[0] * 35]
elif dataset == "AnimalZebraDataset":
skeleton = [[1, 0], [2, 1], [3, 2], [4, 2], [5, 7], [6, 7], [7, 2], [8, 7]]
pose_link_color = palette[[0] * 8]
pose_kpt_color = palette[[0] * 9]
elif dataset in "AnimalPoseDataset":
skeleton = [
[0, 1],
[0, 2],
[1, 3],
[0, 4],
[1, 4],
[4, 5],
[5, 7],
[6, 7],
[5, 8],
[8, 12],
[12, 16],
[5, 9],
[9, 13],
[13, 17],
[6, 10],
[10, 14],
[14, 18],
[6, 11],
[11, 15],
[15, 19],
]
pose_link_color = palette[[0] * 20]
pose_kpt_color = palette[[0] * 20]
else:
NotImplementedError()
img_w, img_h = img_size
img = 255 * np.ones((img_h, img_w, 3), dtype=np.uint8)
img = imshow_keypoints(
img,
keypts_list,
skeleton,
kpt_score_thr,
pose_kpt_color,
pose_link_color,
radius,
thickness,
)
alpha = 255 * (img != 255).any(axis=-1, keepdims=True).astype(np.uint8)
return np.concatenate([img, alpha], axis=-1)
def imshow_keypoints(
img,
pose_result,
skeleton=None,
kpt_score_thr=0.3,
pose_kpt_color=None,
pose_link_color=None,
radius=4,
thickness=1,
show_keypoint_weight=False,
):
"""Draw keypoints and links on an image.
From ViTPose/mmpose/core/visualization/image.py
Args:
img (H, W, 3) array
pose_result (list[kpts]): The poses to draw. Each element kpts is
a set of K keypoints as an Kx3 numpy.ndarray, where each
keypoint is represented as x, y, score.
kpt_score_thr (float, optional): Minimum score of keypoints
to be shown. Default: 0.3.
pose_kpt_color (np.array[Nx3]`): Color of N keypoints. If None,
the keypoint will not be drawn.
pose_link_color (np.array[Mx3]): Color of M links. If None, the
links will not be drawn.
thickness (int): Thickness of lines.
show_keypoint_weight (bool): If True, opacity indicates keypoint score
"""
import math
img_h, img_w, _ = img.shape
idcs = [0, 16, 15, 18, 17, 5, 2, 6, 3, 7, 4, 12, 9, 13, 10, 14, 11]
for kpts in pose_result:
kpts = np.array(kpts, copy=False)[idcs]
# draw each point on image
if pose_kpt_color is not None:
assert len(pose_kpt_color) == len(kpts)
for kid, kpt in enumerate(kpts):
x_coord, y_coord, kpt_score = int(kpt[0]), int(kpt[1]), kpt[2]
if kpt_score > kpt_score_thr:
color = tuple(int(c) for c in pose_kpt_color[kid])
if show_keypoint_weight:
img_copy = img.copy()
cv2.circle(
img_copy, (int(x_coord), int(y_coord)), radius, color, -1
)
transparency = max(0, min(1, kpt_score))
cv2.addWeighted(
img_copy, transparency, img, 1 - transparency, 0, dst=img
)
else:
cv2.circle(img, (int(x_coord), int(y_coord)), radius, color, -1)
# draw links
if skeleton is not None and pose_link_color is not None:
assert len(pose_link_color) == len(skeleton)
for sk_id, sk in enumerate(skeleton):
pos1 = (int(kpts[sk[0], 0]), int(kpts[sk[0], 1]))
pos2 = (int(kpts[sk[1], 0]), int(kpts[sk[1], 1]))
if (
pos1[0] > 0
and pos1[0] < img_w
and pos1[1] > 0
and pos1[1] < img_h
and pos2[0] > 0
and pos2[0] < img_w
and pos2[1] > 0
and pos2[1] < img_h
and kpts[sk[0], 2] > kpt_score_thr
and kpts[sk[1], 2] > kpt_score_thr
):
color = tuple(int(c) for c in pose_link_color[sk_id])
if show_keypoint_weight:
img_copy = img.copy()
X = (pos1[0], pos2[0])
Y = (pos1[1], pos2[1])
mX = np.mean(X)
mY = np.mean(Y)
length = ((Y[0] - Y[1]) ** 2 + (X[0] - X[1]) ** 2) ** 0.5
angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1]))
stickwidth = 2
polygon = cv2.ellipse2Poly(
(int(mX), int(mY)),
(int(length / 2), int(stickwidth)),
int(angle),
0,
360,
1,
)
cv2.fillConvexPoly(img_copy, polygon, color)
transparency = max(
0, min(1, 0.5 * (kpts[sk[0], 2] + kpts[sk[1], 2]))
)
cv2.addWeighted(
img_copy, transparency, img, 1 - transparency, 0, dst=img
)
else:
cv2.line(img, pos1, pos2, color, thickness=thickness)
return img