davidvgilmore commited on
Commit
60f06d0
·
verified ·
1 Parent(s): 83f798d

Upload hy3dgen/texgen/utils/multiview_utils.py with huggingface_hub

Browse files
hy3dgen/texgen/utils/multiview_utils.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Open Source Model Licensed under the Apache License Version 2.0
2
+ # and Other Licenses of the Third-Party Components therein:
3
+ # The below Model in this distribution may have been modified by THL A29 Limited
4
+ # ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
5
+
6
+ # Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
7
+ # The below software and/or models in this distribution may have been
8
+ # modified by THL A29 Limited ("Tencent Modifications").
9
+ # All Tencent Modifications are Copyright (C) THL A29 Limited.
10
+
11
+ # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
12
+ # except for the third-party components listed below.
13
+ # Hunyuan 3D does not impose any additional limitations beyond what is outlined
14
+ # in the repsective licenses of these third-party components.
15
+ # Users must comply with all terms and conditions of original licenses of these third-party
16
+ # components and must ensure that the usage of the third party components adheres to
17
+ # all relevant laws and regulations.
18
+
19
+ # For avoidance of doubts, Hunyuan 3D means the large language models and
20
+ # their software and algorithms, including trained model weights, parameters (including
21
+ # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
22
+ # fine-tuning enabling code and other elements of the foregoing made publicly available
23
+ # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
24
+
25
+ import os
26
+ import random
27
+
28
+ import numpy as np
29
+ import torch
30
+ from diffusers import DiffusionPipeline
31
+ from diffusers import EulerAncestralDiscreteScheduler
32
+
33
+
34
+ class Multiview_Diffusion_Net():
35
+ def __init__(self, config) -> None:
36
+ self.device = config.device
37
+ self.view_size = 512
38
+ multiview_ckpt_path = config.multiview_ckpt_path
39
+
40
+ current_file_path = os.path.abspath(__file__)
41
+ custom_pipeline_path = os.path.join(os.path.dirname(current_file_path), '..', 'hunyuanpaint')
42
+
43
+ pipeline = DiffusionPipeline.from_pretrained(
44
+ multiview_ckpt_path,
45
+ custom_pipeline=custom_pipeline_path, torch_dtype=torch.float16)
46
+
47
+ pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config,
48
+ timestep_spacing='trailing')
49
+
50
+ pipeline.set_progress_bar_config(disable=True)
51
+ self.pipeline = pipeline.to(self.device)
52
+
53
+ def seed_everything(self, seed):
54
+ random.seed(seed)
55
+ np.random.seed(seed)
56
+ torch.manual_seed(seed)
57
+ os.environ["PL_GLOBAL_SEED"] = str(seed)
58
+
59
+ def __call__(self, input_image, control_images, camera_info):
60
+
61
+ self.seed_everything(0)
62
+
63
+ input_image = input_image.resize((self.view_size, self.view_size))
64
+ for i in range(len(control_images)):
65
+ control_images[i] = control_images[i].resize((self.view_size, self.view_size))
66
+ if control_images[i].mode == 'L':
67
+ control_images[i] = control_images[i].point(lambda x: 255 if x > 1 else 0, mode='1')
68
+
69
+ kwargs = dict(generator=torch.Generator(device=self.pipeline.device).manual_seed(0))
70
+
71
+ num_view = len(control_images) // 2
72
+ normal_image = [[control_images[i] for i in range(num_view)]]
73
+ position_image = [[control_images[i + num_view] for i in range(num_view)]]
74
+
75
+ camera_info_gen = [camera_info]
76
+ camera_info_ref = [[0]]
77
+ kwargs['width'] = self.view_size
78
+ kwargs['height'] = self.view_size
79
+ kwargs['num_in_batch'] = num_view
80
+ kwargs['camera_info_gen'] = camera_info_gen
81
+ kwargs['camera_info_ref'] = camera_info_ref
82
+ kwargs["normal_imgs"] = normal_image
83
+ kwargs["position_imgs"] = position_image
84
+
85
+ mvd_image = self.pipeline(input_image, num_inference_steps=30, **kwargs).images
86
+ return mvd_image