Reformat and add comments (#9)

Browse files

Files changed (8) hide show

README.md +1 -1
config.yaml +1 -0
loss.py +13 -4
mae_modules.py +1 -0
mae_utils.py +8 -2
masking.py +7 -2
vit.py +34 -9
vit_encoder.py +1 -0

README.md CHANGED Viewed

@@ -34,7 +34,7 @@ def vit_base_patch16_256(**kwargs):
 ```
 ## Provided models
-A publicly available model for research can be found via Nvidia's BioNemo platform, which handles inference and auto-scaling for you: https://www.rxrx.ai/phenom
 We have partnered with Nvidia to host a publicly-available smaller and more flexible version of the MAE phenomics foundation model, called Phenom-Beta. Interested parties can access it directly through the Nvidia BioNemo API:
 - https://blogs.nvidia.com/blog/drug-discovery-bionemo-generative-ai/

 ```
 ## Provided models
+A publicly available model for research can be found via Nvidia's BioNemo platform, which handles inference and auto-scaling: https://www.rxrx.ai/phenom
 We have partnered with Nvidia to host a publicly-available smaller and more flexible version of the MAE phenomics foundation model, called Phenom-Beta. Interested parties can access it directly through the Nvidia BioNemo API:
 - https://blogs.nvidia.com/blog/drug-discovery-bionemo-generative-ai/

config.yaml CHANGED Viewed

@@ -1,3 +1,4 @@
 loss:
   _target_: torch.nn.MSELoss  # combine with fourier loss weighted at 0.01 mixing factor for best results
   reduction: none

+# © Recursion Pharmaceuticals 2024
 loss:
   _target_: torch.nn.MSELoss  # combine with fourier loss weighted at 0.01 mixing factor for best results
   reduction: none

loss.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import torch
 import torch.nn as nn
@@ -16,7 +17,9 @@ class FourierLoss(nn.Module):
         output of this loss be managed by the model under question.
         """
         super().__init__()
-        self.loss = nn.L1Loss(reduction="none") if use_l1_loss else nn.MSELoss(reduction="none")
         self.num_modalities = num_multimodal_modalities
     def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
@@ -34,7 +37,9 @@ class FourierLoss(nn.Module):
             H_W = h * w
         if len(input.shape) != len(target.shape) != 4:
-            raise ValueError(f"Invalid input shape: got {input.shape} and {target.shape}.")
         fft_reconstructed = torch.fft.fft2(input)
         fft_original = torch.fft.fft2(target)
@@ -42,9 +47,13 @@ class FourierLoss(nn.Module):
         magnitude_reconstructed = torch.abs(fft_reconstructed)
         magnitude_original = torch.abs(fft_original)
-        loss_tensor: torch.Tensor = self.loss(magnitude_reconstructed, magnitude_original)
-        if flattened_images and not self.num_bins:  # then output loss should be reshaped
             loss_tensor = loss_tensor.reshape(B, H_W * self.num_modalities, C)
         return loss_tensor

+# © Recursion Pharmaceuticals 2024
 import torch
 import torch.nn as nn
         output of this loss be managed by the model under question.
         """
         super().__init__()
+        self.loss = (
+            nn.L1Loss(reduction="none") if use_l1_loss else nn.MSELoss(reduction="none")
+        )
         self.num_modalities = num_multimodal_modalities
     def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
             H_W = h * w
         if len(input.shape) != len(target.shape) != 4:
+            raise ValueError(
+                f"Invalid input shape: got {input.shape} and {target.shape}."
+            )
         fft_reconstructed = torch.fft.fft2(input)
         fft_original = torch.fft.fft2(target)
         magnitude_reconstructed = torch.abs(fft_reconstructed)
         magnitude_original = torch.abs(fft_original)
+        loss_tensor: torch.Tensor = self.loss(
+            magnitude_reconstructed, magnitude_original
+        )
+        if (
+            flattened_images and not self.num_bins
+        ):  # then output loss should be reshaped
             loss_tensor = loss_tensor.reshape(B, H_W * self.num_modalities, C)
         return loss_tensor

mae_modules.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from functools import partial
 from typing import Tuple, Union

+# © Recursion Pharmaceuticals 2024
 from functools import partial
 from typing import Tuple, Union

mae_utils.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import math
 import torch
-def flatten_images(img: torch.Tensor, patch_size: int, channel_agnostic: bool = False) -> torch.Tensor:
     """
     Flattens 2D images into tokens with the same pixel values
@@ -33,7 +36,10 @@ def flatten_images(img: torch.Tensor, patch_size: int, channel_agnostic: bool =
 def unflatten_tokens(
-    tokens: torch.Tensor, patch_size: int, num_modalities: int = 1, channel_agnostic: bool = False
 ) -> torch.Tensor:
     """
     Unflattens tokens (N,L,patch_size**2 * C) into image tensor (N,C,H,W) with the pixel values

+# © Recursion Pharmaceuticals 2024
 import math
 import torch
+def flatten_images(
+    img: torch.Tensor, patch_size: int, channel_agnostic: bool = False
+) -> torch.Tensor:
     """
     Flattens 2D images into tokens with the same pixel values
 def unflatten_tokens(
+    tokens: torch.Tensor,
+    patch_size: int,
+    num_modalities: int = 1,
+    channel_agnostic: bool = False,
 ) -> torch.Tensor:
     """
     Unflattens tokens (N,L,patch_size**2 * C) into image tensor (N,C,H,W) with the pixel values

masking.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from typing import Tuple, Union
 import torch
@@ -36,11 +37,15 @@ def transformer_random_masking(
     # get masked input
     tokens_to_keep = shuffled_tokens[:, :len_keep]  # keep the first len_keep indices
-    x_masked = torch.gather(x, dim=1, index=tokens_to_keep.unsqueeze(-1).repeat(1, 1, D))
     # get binary mask used for loss masking: 0 is keep, 1 is remove
     mask = torch.ones([N, L], device=x.device)
     mask[:, :len_keep] = 0
-    mask = torch.gather(mask, dim=1, index=ind_restore)  # unshuffle to get the binary mask
     return x_masked, mask, ind_restore

+# © Recursion Pharmaceuticals 2024
 from typing import Tuple, Union
 import torch
     # get masked input
     tokens_to_keep = shuffled_tokens[:, :len_keep]  # keep the first len_keep indices
+    x_masked = torch.gather(
+        x, dim=1, index=tokens_to_keep.unsqueeze(-1).repeat(1, 1, D)
+    )
     # get binary mask used for loss masking: 0 is keep, 1 is remove
     mask = torch.ones([N, L], device=x.device)
     mask[:, :len_keep] = 0
+    mask = torch.gather(
+        mask, dim=1, index=ind_restore
+    )  # unshuffle to get the binary mask
     return x_masked, mask, ind_restore

vit.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import timm.models.vision_transformer as vit
 import torch
 def generate_2d_sincos_pos_embeddings(
-    embedding_dim: int, length: int, scale: float = 10000.0, use_class_token: bool = True, num_modality: int = 1
 ) -> torch.nn.Parameter:
     """
     Generate 2Dimensional sin/cosine positional embeddings
@@ -30,16 +35,25 @@ def generate_2d_sincos_pos_embeddings(
     """
     linear_positions = torch.arange(length, dtype=torch.float32)
-    height_mesh, width_mesh = torch.meshgrid(linear_positions, linear_positions, indexing="ij")
     positional_dim = embedding_dim // 4  # accomodate h and w x cos and sin embeddings
-    positional_weights = torch.arange(positional_dim, dtype=torch.float32) / positional_dim
     positional_weights = 1.0 / (scale**positional_weights)
     height_weights = torch.outer(height_mesh.flatten(), positional_weights)
     width_weights = torch.outer(width_mesh.flatten(), positional_weights)
     positional_encoding = torch.cat(
-        [torch.sin(height_weights), torch.cos(height_weights), torch.sin(width_weights), torch.cos(width_weights)],
         dim=1,
     )[None, :, :]
@@ -73,11 +87,15 @@ class ChannelAgnosticPatchEmbed(vit.PatchEmbed):  # type: ignore[misc]
             bias=bias,
         )
         # channel-agnostic MAE has a single projection for all chans
-        self.proj = torch.nn.Conv2d(1, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         in_chans = x.shape[1]
-        x = torch.stack([self.proj(x[:, i : i + 1]) for i in range(in_chans)], dim=2)  # single project for all chans
         x = x.flatten(2).transpose(1, 2)  # BCMHW -> BNC
         return x
@@ -106,7 +124,9 @@ class ChannelAgnosticViT(vit.VisionTransformer):  # type: ignore[misc]
         return self.pos_drop(x)  # type: ignore[no-any-return]
-def channel_agnostic_vit(vit_backbone: vit.VisionTransformer, max_in_chans: int) -> vit.VisionTransformer:
     # replace patch embedding with channel-agnostic version
     vit_backbone.patch_embed = ChannelAgnosticPatchEmbed(
         img_size=vit_backbone.patch_embed.img_size[0],
@@ -145,9 +165,14 @@ def sincos_positional_encoding_vit(
         the same ViT but with fixed no-grad positional encodings to add to vit patch encodings
     """
     # length: number of tokens along height or width of image after patching (assuming square)
-    length = vit_backbone.patch_embed.img_size[0] // vit_backbone.patch_embed.patch_size[0]
     pos_embeddings = generate_2d_sincos_pos_embeddings(
-        vit_backbone.embed_dim, length=length, scale=scale, use_class_token=vit_backbone.cls_token is not None
     )
     # note, if the model had weight_init == 'skip', this might get overwritten
     vit_backbone.pos_embed = pos_embeddings

+# © Recursion Pharmaceuticals 2024
 import timm.models.vision_transformer as vit
 import torch
 def generate_2d_sincos_pos_embeddings(
+    embedding_dim: int,
+    length: int,
+    scale: float = 10000.0,
+    use_class_token: bool = True,
+    num_modality: int = 1,
 ) -> torch.nn.Parameter:
     """
     Generate 2Dimensional sin/cosine positional embeddings
     """
     linear_positions = torch.arange(length, dtype=torch.float32)
+    height_mesh, width_mesh = torch.meshgrid(
+        linear_positions, linear_positions, indexing="ij"
+    )
     positional_dim = embedding_dim // 4  # accomodate h and w x cos and sin embeddings
+    positional_weights = (
+        torch.arange(positional_dim, dtype=torch.float32) / positional_dim
+    )
     positional_weights = 1.0 / (scale**positional_weights)
     height_weights = torch.outer(height_mesh.flatten(), positional_weights)
     width_weights = torch.outer(width_mesh.flatten(), positional_weights)
     positional_encoding = torch.cat(
+        [
+            torch.sin(height_weights),
+            torch.cos(height_weights),
+            torch.sin(width_weights),
+            torch.cos(width_weights),
+        ],
         dim=1,
     )[None, :, :]
             bias=bias,
         )
         # channel-agnostic MAE has a single projection for all chans
+        self.proj = torch.nn.Conv2d(
+            1, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias
+        )
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         in_chans = x.shape[1]
+        x = torch.stack(
+            [self.proj(x[:, i : i + 1]) for i in range(in_chans)], dim=2
+        )  # single project for all chans
         x = x.flatten(2).transpose(1, 2)  # BCMHW -> BNC
         return x
         return self.pos_drop(x)  # type: ignore[no-any-return]
+def channel_agnostic_vit(
+    vit_backbone: vit.VisionTransformer, max_in_chans: int
+) -> vit.VisionTransformer:
     # replace patch embedding with channel-agnostic version
     vit_backbone.patch_embed = ChannelAgnosticPatchEmbed(
         img_size=vit_backbone.patch_embed.img_size[0],
         the same ViT but with fixed no-grad positional encodings to add to vit patch encodings
     """
     # length: number of tokens along height or width of image after patching (assuming square)
+    length = (
+        vit_backbone.patch_embed.img_size[0] // vit_backbone.patch_embed.patch_size[0]
+    )
     pos_embeddings = generate_2d_sincos_pos_embeddings(
+        vit_backbone.embed_dim,
+        length=length,
+        scale=scale,
+        use_class_token=vit_backbone.cls_token is not None,
     )
     # note, if the model had weight_init == 'skip', this might get overwritten
     vit_backbone.pos_embed = pos_embeddings

vit_encoder.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from typing import Dict
 import timm.models.vision_transformer as vit

+# © Recursion Pharmaceuticals 2024
 from typing import Dict
 import timm.models.vision_transformer as vit