Abdualkader
/

Multi-View

Image-to-3D

Diffusers

Safetensors

MVDreamPipeline

Model card Files Files and versions Community

Abdualkader commited on 9 days ago

Commit

28aea7c

verified ·

1 Parent(s): 8505c8f

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +14 -7

pipeline.py CHANGED Viewed

@@ -660,12 +660,12 @@ class MultiViewUNetModel(ModelMixin, ConfigMixin):
     def __init__(
         self,
-        image_size,
-        in_channels,
-        model_channels,
-        out_channels,
         num_res_blocks,
         attention_resolutions,
         dropout=0,
         channel_mult=(1, 2, 4, 8),
         conv_resample=True,
@@ -687,6 +687,7 @@ class MultiViewUNetModel(ModelMixin, ConfigMixin):
         **kwargs,
     ):
         super().__init__()
         assert context_dim is not None
         if num_heads_upsample == -1:
@@ -1439,8 +1440,8 @@ class MVDreamPipeline(DiffusionPipeline):
         self,
         prompt: str = "",
         image: Optional[np.ndarray] = None,
-        height: int = 256,
-        width: int = 256,
         elevation: float = 0,
         num_inference_steps: int = 50,
         guidance_scale: float = 7.0,
@@ -1511,6 +1512,7 @@ class MVDreamPipeline(DiffusionPipeline):
         # Denoising loop
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         with self.progress_bar(total=num_inference_steps) as progress_bar:
             for i, t in enumerate(timesteps):
                 # expand the latents if we are doing classifier free guidance
@@ -1553,6 +1555,11 @@ class MVDreamPipeline(DiffusionPipeline):
                     noise_pred = noise_pred_uncond + guidance_scale * (
                         noise_pred_text - noise_pred_uncond
                     )
                 # compute the previous noisy sample x_t -> x_t-1
                 latents: torch.Tensor = self.scheduler.step(
@@ -1580,4 +1587,4 @@ class MVDreamPipeline(DiffusionPipeline):
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
             self.final_offload_hook.offload()
-        return image

     def __init__(
         self,
         num_res_blocks,
         attention_resolutions,
+        image_size=512,
+        in_channels=4,
+        model_channels=320,
+        out_channels=4,
         dropout=0,
         channel_mult=(1, 2, 4, 8),
         conv_resample=True,
         **kwargs,
     ):
         super().__init__()
+        self.image_size = image_size
         assert context_dim is not None
         if num_heads_upsample == -1:
         self,
         prompt: str = "",
         image: Optional[np.ndarray] = None,
+        height: int = 512,
+        width: int = 512,
         elevation: float = 0,
         num_inference_steps: int = 50,
         guidance_scale: float = 7.0,
         # Denoising loop
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
+        total_loss = 0.0
         with self.progress_bar(total=num_inference_steps) as progress_bar:
             for i, t in enumerate(timesteps):
                 # expand the latents if we are doing classifier free guidance
                     noise_pred = noise_pred_uncond + guidance_scale * (
                         noise_pred_text - noise_pred_uncond
                     )
+                    loss = F.mse_loss(noise_pred_uncond, noise_pred_text)
+                else:
+                    loss = F.mse_loss(noise_pred, torch.zeros_like(noise_pred))
+                total_loss += loss.item()
                 # compute the previous noisy sample x_t -> x_t-1
                 latents: torch.Tensor = self.scheduler.step(
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
             self.final_offload_hook.offload()
+        return image, total_loss/len(timesteps)