import os import sys import os.path as osp from pathlib import Path import cv2 import gradio as gr import torch import math import spaces try: import mmpose except: os.system('pip install /home/user/app/main/transformer_utils') os.system('cp -rf /home/user/app/assets/conversions.py /home/user/.pyenv/versions/3.9.18/lib/python3.9/site-packages/torchgeometry/core/conversions.py') DEFAULT_MODEL='postometro' # for config OUT_FOLDER = '/home/user/app/demo_out' os.makedirs(OUT_FOLDER, exist_ok=True) @spaces.GPU(enable_queue=True) def infer(image_input, in_threshold=0.5, num_people="Single person", render_mesh=False): num_gpus = 1 if torch.cuda.is_available() else -1 # dismiss cuda information # print("!!! torch.cuda.is_available: ", torch.cuda.is_available()) # print("!!! torch.cuda.device_count: ", torch.cuda.device_count()) # print("CUDA version: ", torch.version.cuda) # index = torch.cuda.current_device() # print("CUDA current_device: ", index) # print("CUDA device_name: ", torch.cuda.get_device_name(index)) from main.inference import Inferer inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER) os.system(f'rm -rf {OUT_FOLDER}/*') multi_person = False if (num_people == "Single person") else True vis_img, num_bbox, mmdet_box = inferer.infer(image_input, in_threshold, multi_person, not(render_mesh)) return vis_img, "bbox num: {}, bbox meta: {}".format(num_bbox, mmdet_box) TITLE = '''
Note: You can drop a image at the panel (or select one of the examples) to obtain the 3D parametric reconstructions of the detected humans.
''' with gr.Blocks(title="PostoMETRO", css=".gradio-container") as demo: gr.Markdown(TITLE) gr.Markdown(DESCRIPTION) with gr.Row(): with gr.Column(): image_input = gr.Image(label="Input image", elem_classes="Image") threshold = gr.Slider(0, 1.0, value=0.2, label='BBox detection threshold', info="PostoMETRO will take in cropped bboxes as input to produce human mesh. A small threshold will prevent redundant bboxes and vice versa.") num_people = gr.Radio( choices=["Single person", "Multiple people"], value="Single person", label="Number of people", info="Choose how many people are there in the video. Choose 'single person' for faster inference.", interactive=True, scale=1,) mesh_as_vertices = gr.Checkbox( label="Render as mesh", info="By default, the estimated SMPL parameters are rendered as vertices for faster visualization. Check this option if you want to visualize meshes instead.", interactive=True, scale=1,) send_button = gr.Button("Infer") with gr.Column(): processed_frames = gr.Image(label="Rendered Results") debug_textbox = gr.Textbox(label="Debug information") # example_images = gr.Examples([]) send_button.click(fn=infer, inputs=[image_input, threshold, num_people, mesh_as_vertices], outputs=[processed_frames, debug_textbox]) # with gr.Row(): example_images = gr.Examples([ ['/home/user/app/assets/01.jpg'], ['/home/user/app/assets/02.jpg'], ['/home/user/app/assets/03.jpg'], ['/home/user/app/assets/04.jpg'], ['/home/user/app/assets/05.jpg'], ['/home/user/app/assets/06.jpg'], ['/home/user/app/assets/07.jpg'], ], inputs=[image_input, 0.2]) #demo.queue() demo.queue().launch(debug=True)