Spaces:

Lightricks
/

LTX-Video-Playground

Running on A100

App Files Files Community

guysrn commited on Nov 12, 2024

Commit

d5e984f

1 Parent(s): cda00c1

causal_video_autoencoder: add option to half channels in depth to space upsample block

Browse files

Files changed (1) hide show

xora/models/autoencoders/causal_video_autoencoder.py +13 -3

xora/models/autoencoders/causal_video_autoencoder.py CHANGED Viewed

@@ -455,6 +455,8 @@ class Decoder(nn.Module):
             block_params = block_params if isinstance(block_params, dict) else {}
             if block_name == "res_x_y":
                 output_channel = output_channel * block_params.get("multiplier", 2)
         self.conv_in = make_conv_nd(
             dims,
@@ -501,11 +503,13 @@ class Decoder(nn.Module):
                     dims=dims, in_channels=input_channel, stride=(1, 2, 2)
                 )
             elif block_name == "compress_all":
                 block = DepthToSpaceUpsample(
                     dims=dims,
                     in_channels=input_channel,
                     stride=(2, 2, 2),
                     residual=block_params.get("residual", False),
                 )
             else:
                 raise ValueError(f"unknown layer: {block_name}")
@@ -614,10 +618,14 @@ class UNetMidBlock3D(nn.Module):
 class DepthToSpaceUpsample(nn.Module):
-    def __init__(self, dims, in_channels, stride, residual=False):
         super().__init__()
         self.stride = stride
-        self.out_channels = np.prod(stride) * in_channels
         self.conv = make_conv_nd(
             dims=dims,
             in_channels=in_channels,
@@ -627,6 +635,7 @@ class DepthToSpaceUpsample(nn.Module):
             causal=True,
         )
         self.residual = residual
     def forward(self, x, causal: bool = True):
         if self.residual:
@@ -638,7 +647,8 @@ class DepthToSpaceUpsample(nn.Module):
                 p2=self.stride[1],
                 p3=self.stride[2],
             )
-            x_in = x_in.repeat(1, np.prod(self.stride), 1, 1, 1)
             if self.stride[0] == 2:
                 x_in = x_in[:, :, 1:, :, :]
         x = self.conv(x, causal=causal)

             block_params = block_params if isinstance(block_params, dict) else {}
             if block_name == "res_x_y":
                 output_channel = output_channel * block_params.get("multiplier", 2)
+            if block_name == "compress_all":
+                output_channel = output_channel * block_params.get("multiplier", 1)
         self.conv_in = make_conv_nd(
             dims,
                     dims=dims, in_channels=input_channel, stride=(1, 2, 2)
                 )
             elif block_name == "compress_all":
+                output_channel = output_channel // block_params.get("multiplier", 1)
                 block = DepthToSpaceUpsample(
                     dims=dims,
                     in_channels=input_channel,
                     stride=(2, 2, 2),
                     residual=block_params.get("residual", False),
+                    out_channels_reduction_factor=block_params.get("multiplier", 1),
                 )
             else:
                 raise ValueError(f"unknown layer: {block_name}")
 class DepthToSpaceUpsample(nn.Module):
+    def __init__(
+        self, dims, in_channels, stride, residual=False, out_channels_reduction_factor=1
+    ):
         super().__init__()
         self.stride = stride
+        self.out_channels = (
+            np.prod(stride) * in_channels // out_channels_reduction_factor
+        )
         self.conv = make_conv_nd(
             dims=dims,
             in_channels=in_channels,
             causal=True,
         )
         self.residual = residual
+        self.out_channels_reduction_factor = out_channels_reduction_factor
     def forward(self, x, causal: bool = True):
         if self.residual:
                 p2=self.stride[1],
                 p3=self.stride[2],
             )
+            num_repeat = np.prod(self.stride) // self.out_channels_reduction_factor
+            x_in = x_in.repeat(1, num_repeat, 1, 1, 1)
             if self.stride[0] == 2:
                 x_in = x_in[:, :, 1:, :, :]
         x = self.conv(x, causal=causal)