Update pipeline.py
Browse files- pipeline.py +14 -7
pipeline.py
CHANGED
@@ -660,12 +660,12 @@ class MultiViewUNetModel(ModelMixin, ConfigMixin):
|
|
660 |
|
661 |
def __init__(
|
662 |
self,
|
663 |
-
image_size,
|
664 |
-
in_channels,
|
665 |
-
model_channels,
|
666 |
-
out_channels,
|
667 |
num_res_blocks,
|
668 |
attention_resolutions,
|
|
|
|
|
|
|
|
|
669 |
dropout=0,
|
670 |
channel_mult=(1, 2, 4, 8),
|
671 |
conv_resample=True,
|
@@ -687,6 +687,7 @@ class MultiViewUNetModel(ModelMixin, ConfigMixin):
|
|
687 |
**kwargs,
|
688 |
):
|
689 |
super().__init__()
|
|
|
690 |
assert context_dim is not None
|
691 |
|
692 |
if num_heads_upsample == -1:
|
@@ -1439,8 +1440,8 @@ class MVDreamPipeline(DiffusionPipeline):
|
|
1439 |
self,
|
1440 |
prompt: str = "",
|
1441 |
image: Optional[np.ndarray] = None,
|
1442 |
-
height: int =
|
1443 |
-
width: int =
|
1444 |
elevation: float = 0,
|
1445 |
num_inference_steps: int = 50,
|
1446 |
guidance_scale: float = 7.0,
|
@@ -1511,6 +1512,7 @@ class MVDreamPipeline(DiffusionPipeline):
|
|
1511 |
|
1512 |
# Denoising loop
|
1513 |
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
|
|
1514 |
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
1515 |
for i, t in enumerate(timesteps):
|
1516 |
# expand the latents if we are doing classifier free guidance
|
@@ -1553,6 +1555,11 @@ class MVDreamPipeline(DiffusionPipeline):
|
|
1553 |
noise_pred = noise_pred_uncond + guidance_scale * (
|
1554 |
noise_pred_text - noise_pred_uncond
|
1555 |
)
|
|
|
|
|
|
|
|
|
|
|
1556 |
|
1557 |
# compute the previous noisy sample x_t -> x_t-1
|
1558 |
latents: torch.Tensor = self.scheduler.step(
|
@@ -1580,4 +1587,4 @@ class MVDreamPipeline(DiffusionPipeline):
|
|
1580 |
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
1581 |
self.final_offload_hook.offload()
|
1582 |
|
1583 |
-
return image
|
|
|
660 |
|
661 |
def __init__(
|
662 |
self,
|
|
|
|
|
|
|
|
|
663 |
num_res_blocks,
|
664 |
attention_resolutions,
|
665 |
+
image_size=512,
|
666 |
+
in_channels=4,
|
667 |
+
model_channels=320,
|
668 |
+
out_channels=4,
|
669 |
dropout=0,
|
670 |
channel_mult=(1, 2, 4, 8),
|
671 |
conv_resample=True,
|
|
|
687 |
**kwargs,
|
688 |
):
|
689 |
super().__init__()
|
690 |
+
self.image_size = image_size
|
691 |
assert context_dim is not None
|
692 |
|
693 |
if num_heads_upsample == -1:
|
|
|
1440 |
self,
|
1441 |
prompt: str = "",
|
1442 |
image: Optional[np.ndarray] = None,
|
1443 |
+
height: int = 512,
|
1444 |
+
width: int = 512,
|
1445 |
elevation: float = 0,
|
1446 |
num_inference_steps: int = 50,
|
1447 |
guidance_scale: float = 7.0,
|
|
|
1512 |
|
1513 |
# Denoising loop
|
1514 |
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
1515 |
+
total_loss = 0.0
|
1516 |
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
1517 |
for i, t in enumerate(timesteps):
|
1518 |
# expand the latents if we are doing classifier free guidance
|
|
|
1555 |
noise_pred = noise_pred_uncond + guidance_scale * (
|
1556 |
noise_pred_text - noise_pred_uncond
|
1557 |
)
|
1558 |
+
loss = F.mse_loss(noise_pred_uncond, noise_pred_text)
|
1559 |
+
else:
|
1560 |
+
loss = F.mse_loss(noise_pred, torch.zeros_like(noise_pred))
|
1561 |
+
|
1562 |
+
total_loss += loss.item()
|
1563 |
|
1564 |
# compute the previous noisy sample x_t -> x_t-1
|
1565 |
latents: torch.Tensor = self.scheduler.step(
|
|
|
1587 |
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
1588 |
self.final_offload_hook.offload()
|
1589 |
|
1590 |
+
return image, total_loss/len(timesteps)
|