Spaces:
Sleeping
Sleeping
import os | |
import sys | |
import os.path as osp | |
import argparse | |
import numpy as np | |
import torchvision.transforms as transforms | |
import torch.backends.cudnn as cudnn | |
import torch | |
CUR_DIR = osp.dirname(os.path.abspath(__file__)) | |
sys.path.insert(0, osp.join(CUR_DIR, '..', 'main')) | |
sys.path.insert(0, osp.join(CUR_DIR , '..', 'common')) | |
from config import cfg | |
import cv2 | |
from mmdet.apis import init_detector, inference_detector | |
from utils.inference_utils import process_mmdet_results, non_max_suppression | |
from postometro_utils.smpl import SMPL | |
import data.config as smpl_cfg | |
from postometro import get_model | |
class Inferer: | |
def __init__(self, pretrained_model, num_gpus, output_folder): | |
self.output_folder = output_folder | |
self.device = torch.device('cuda') if (num_gpus > 0) else torch.device('cpu') | |
print("Infer using device: ", self.device) | |
# # load model config | |
config_path = osp.join(CUR_DIR, './config', f'config_{pretrained_model}.py') | |
# ckpt_path = osp.join(CUR_DIR, '../pretrained_models', f'{pretrained_model}.pth.tar') | |
ckpt_path = None # for config | |
cfg.get_config_fromfile(config_path) | |
# uodate config | |
cfg.update_config(num_gpus, ckpt_path, output_folder, self.device) | |
self.cfg = cfg | |
cudnn.benchmark = True | |
# load SMPL | |
self.smpl = SMPL().to(self.device) | |
self.faces = self.smpl.faces.cpu().numpy() | |
# load model | |
hmr_model_checkpoint_file = osp.join(CUR_DIR, '../pretrained_models/postometro/resnet_state_dict.bin') | |
self.hmr_model = get_model(backbone_str='resnet50',device=self.device, checkpoint_file = hmr_model_checkpoint_file) | |
# load faster-rcnn as human detector | |
checkpoint_file = osp.join(CUR_DIR, '../pretrained_models/mmdet/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth') | |
config_file= osp.join(CUR_DIR, '../pretrained_models/mmdet/mmdet_faster_rcnn_r50_fpn_coco.py') | |
model = init_detector(config_file, checkpoint_file, device=self.device) # or device='cuda:0' | |
self.model = model | |
def infer(self, original_img, iou_thr, multi_person=False, mesh_as_vertices=False): | |
from utils.preprocessing import process_bbox, generate_patch_image | |
from utils.vis import render_mesh | |
# from utils.human_models import smpl_x | |
# prepare input image | |
transform = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |
std=[0.229, 0.224, 0.225]) | |
vis_img = original_img.copy() | |
original_img_height, original_img_width = original_img.shape[:2] | |
# load renderer | |
# self.renderer = PyRender_Renderer(resolution=(original_img_width, original_img_height), faces=self.faces) | |
## mmdet inference | |
mmdet_results = inference_detector(self.model, original_img) | |
mmdet_box = process_mmdet_results(mmdet_results, cat_id=0, multi_person=True) | |
# early return | |
# save original image if no bbox | |
if len(mmdet_box[0])<1: | |
return original_img, [], [] | |
if not multi_person: | |
# only select the largest bbox | |
num_bbox = 1 | |
mmdet_box = mmdet_box[0] | |
else: | |
# keep bbox by NMS with iou_thr | |
mmdet_box = non_max_suppression(mmdet_box[0], iou_thr) | |
num_bbox = len(mmdet_box) | |
## loop all detected bboxes | |
ok_bboxes = [] | |
for bbox_id in range(num_bbox): | |
mmdet_box_xywh = np.zeros((4)) | |
# xyxy -> xywh | |
mmdet_box_xywh[0] = mmdet_box[bbox_id][0] | |
mmdet_box_xywh[1] = mmdet_box[bbox_id][1] | |
mmdet_box_xywh[2] = abs(mmdet_box[bbox_id][2]-mmdet_box[bbox_id][0]) | |
mmdet_box_xywh[3] = abs(mmdet_box[bbox_id][3]-mmdet_box[bbox_id][1]) | |
# skip small bboxes by bbox_thr in pixel | |
if mmdet_box_xywh[2] < 50 or mmdet_box_xywh[3] < 150: | |
continue | |
# align these pre-processing steps | |
bbox = process_bbox(mmdet_box_xywh, original_img_width, original_img_height) | |
ok_bboxes.append(bbox) | |
# [DEBUG] test mmdet pipeline | |
if bbox is not None: | |
top_left = (int(bbox[0]), int(bbox[1])) | |
bottom_right = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])) | |
cv2.rectangle(vis_img, top_left, bottom_right, (0, 0, 255), 2) | |
# human model inference | |
img, img2bb_trans, bb2img_trans = generate_patch_image(original_img, bbox, 1.0, 0.0, False, self.cfg.input_img_shape) | |
vis_patched_images = img.copy() | |
# here we pre-process images | |
img = img.transpose((2,0,1)) # h,w,c -> c,h,w | |
img = torch.from_numpy(img).float() / 255.0 | |
# Store image before normalization to use it in visualization | |
img = transform(img) | |
img = img.to(cfg.device)[None,:,:,:] | |
# mesh recovery | |
with torch.no_grad(): | |
out = self.hmr_model(img) | |
pred_cam, pred_3d_vertices_fine = out['pred_cam'], out['pred_3d_vertices_fine'] | |
pred_3d_joints_from_smpl = self.smpl.get_h36m_joints(pred_3d_vertices_fine) # batch_size X 17 X 3 | |
pred_3d_joints_from_smpl_pelvis = pred_3d_joints_from_smpl[:,smpl_cfg.H36M_J17_NAME.index('Pelvis'),:] | |
pred_3d_joints_from_smpl = pred_3d_joints_from_smpl[:,smpl_cfg.H36M_J17_TO_J14,:] # batch_size X 14 X 3 | |
# normalize predicted vertices | |
pred_3d_vertices_fine = pred_3d_vertices_fine - pred_3d_joints_from_smpl_pelvis[:, None, :] # batch_size X 6890 X 3 | |
pred_3d_vertices_fine = pred_3d_vertices_fine.detach().cpu().numpy()[0] # 6890 X 3 | |
pred_cam = pred_cam.detach().cpu().numpy()[0] | |
bbox_cx, bbox_cy = bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2 | |
img_cx, img_cy = original_img_width / 2, original_img_height / 2 | |
cx_delta, cy_delta = bbox_cx / img_cx - 1, bbox_cy / img_cy - 1 | |
# render single person mesh | |
vis_img = render_mesh(vis_img, pred_3d_vertices_fine, self.faces, [pred_cam[0] / (original_img_width / bbox[2]), pred_cam[0] / (original_img_height / bbox[3]), | |
pred_cam[1] + cx_delta / (pred_cam[0] / (original_img_width / bbox[2])), | |
pred_cam[2] + cy_delta / (pred_cam[0] / (original_img_height / bbox[3]))], | |
mesh_as_vertices=mesh_as_vertices) | |
vis_img = vis_img.astype('uint8') | |
return vis_img, len(ok_bboxes), ok_bboxes | |
if __name__ == '__main__': | |
from PIL import Image | |
inferer = Inferer('postometro', 1, './out_folder') # gpu | |
image_path = f'../assets/07.jpg' | |
image = Image.open(image_path) | |
# Convert the PIL image to a NumPy array | |
image_np = np.array(image) | |
vis_img, _ , _ = inferer.infer(image_np, 0.2, multi_person=True, mesh_as_vertices=True) | |
save_path = f'./saved_vis_07.jpg' | |
# Ensure the image is in the correct format (PIL expects uint8) | |
if vis_img.dtype != np.uint8: | |
vis_img = vis_img.astype('uint8') | |
# Convert the Numpy array (if RGB) to a PIL image and save | |
image = Image.fromarray(vis_img) | |
image.save(save_path) | |