Spaces:
Runtime error
Runtime error
Upload hy3dgen/texgen/utils/multiview_utils.py with huggingface_hub
Browse files
hy3dgen/texgen/utils/multiview_utils.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Open Source Model Licensed under the Apache License Version 2.0
|
2 |
+
# and Other Licenses of the Third-Party Components therein:
|
3 |
+
# The below Model in this distribution may have been modified by THL A29 Limited
|
4 |
+
# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
|
5 |
+
|
6 |
+
# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
|
7 |
+
# The below software and/or models in this distribution may have been
|
8 |
+
# modified by THL A29 Limited ("Tencent Modifications").
|
9 |
+
# All Tencent Modifications are Copyright (C) THL A29 Limited.
|
10 |
+
|
11 |
+
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
|
12 |
+
# except for the third-party components listed below.
|
13 |
+
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
|
14 |
+
# in the repsective licenses of these third-party components.
|
15 |
+
# Users must comply with all terms and conditions of original licenses of these third-party
|
16 |
+
# components and must ensure that the usage of the third party components adheres to
|
17 |
+
# all relevant laws and regulations.
|
18 |
+
|
19 |
+
# For avoidance of doubts, Hunyuan 3D means the large language models and
|
20 |
+
# their software and algorithms, including trained model weights, parameters (including
|
21 |
+
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
|
22 |
+
# fine-tuning enabling code and other elements of the foregoing made publicly available
|
23 |
+
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
|
24 |
+
|
25 |
+
import os
|
26 |
+
import random
|
27 |
+
|
28 |
+
import numpy as np
|
29 |
+
import torch
|
30 |
+
from diffusers import DiffusionPipeline
|
31 |
+
from diffusers import EulerAncestralDiscreteScheduler
|
32 |
+
|
33 |
+
|
34 |
+
class Multiview_Diffusion_Net():
|
35 |
+
def __init__(self, config) -> None:
|
36 |
+
self.device = config.device
|
37 |
+
self.view_size = 512
|
38 |
+
multiview_ckpt_path = config.multiview_ckpt_path
|
39 |
+
|
40 |
+
current_file_path = os.path.abspath(__file__)
|
41 |
+
custom_pipeline_path = os.path.join(os.path.dirname(current_file_path), '..', 'hunyuanpaint')
|
42 |
+
|
43 |
+
pipeline = DiffusionPipeline.from_pretrained(
|
44 |
+
multiview_ckpt_path,
|
45 |
+
custom_pipeline=custom_pipeline_path, torch_dtype=torch.float16)
|
46 |
+
|
47 |
+
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config,
|
48 |
+
timestep_spacing='trailing')
|
49 |
+
|
50 |
+
pipeline.set_progress_bar_config(disable=True)
|
51 |
+
self.pipeline = pipeline.to(self.device)
|
52 |
+
|
53 |
+
def seed_everything(self, seed):
|
54 |
+
random.seed(seed)
|
55 |
+
np.random.seed(seed)
|
56 |
+
torch.manual_seed(seed)
|
57 |
+
os.environ["PL_GLOBAL_SEED"] = str(seed)
|
58 |
+
|
59 |
+
def __call__(self, input_image, control_images, camera_info):
|
60 |
+
|
61 |
+
self.seed_everything(0)
|
62 |
+
|
63 |
+
input_image = input_image.resize((self.view_size, self.view_size))
|
64 |
+
for i in range(len(control_images)):
|
65 |
+
control_images[i] = control_images[i].resize((self.view_size, self.view_size))
|
66 |
+
if control_images[i].mode == 'L':
|
67 |
+
control_images[i] = control_images[i].point(lambda x: 255 if x > 1 else 0, mode='1')
|
68 |
+
|
69 |
+
kwargs = dict(generator=torch.Generator(device=self.pipeline.device).manual_seed(0))
|
70 |
+
|
71 |
+
num_view = len(control_images) // 2
|
72 |
+
normal_image = [[control_images[i] for i in range(num_view)]]
|
73 |
+
position_image = [[control_images[i + num_view] for i in range(num_view)]]
|
74 |
+
|
75 |
+
camera_info_gen = [camera_info]
|
76 |
+
camera_info_ref = [[0]]
|
77 |
+
kwargs['width'] = self.view_size
|
78 |
+
kwargs['height'] = self.view_size
|
79 |
+
kwargs['num_in_batch'] = num_view
|
80 |
+
kwargs['camera_info_gen'] = camera_info_gen
|
81 |
+
kwargs['camera_info_ref'] = camera_info_ref
|
82 |
+
kwargs["normal_imgs"] = normal_image
|
83 |
+
kwargs["position_imgs"] = position_image
|
84 |
+
|
85 |
+
mvd_image = self.pipeline(input_image, num_inference_steps=30, **kwargs).images
|
86 |
+
return mvd_image
|