stable-point-aware-3d

Running

App Files Files Community

stable-point-aware-3d / spar3d /models /global_estimator /reni_estimator.py

jammmmm

Add spar3d demo files

38dbec8 22 days ago

raw

history blame

3.41 kB

	from dataclasses import dataclass, field
	from typing import Any

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from jaxtyping import Float
	from torch import Tensor

	from spar3d.models.illumination.reni.env_map import RENIEnvMap
	from spar3d.models.utils import BaseModule


	def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor:
	assert d6.shape[-1] == 6, "Input tensor must have shape (..., 6)"

	def proj_u2a(u, a):
	r"""
	u: batch x 3
	a: batch x 3
	"""
	inner_prod = torch.sum(u * a, dim=-1, keepdim=True)
	norm2 = torch.sum(u**2, dim=-1, keepdim=True)
	norm2 = torch.clamp(norm2, min=1e-8)
	factor = inner_prod / (norm2 + 1e-10)
	return factor * u

	x_raw, y_raw = d6[..., :3], d6[..., 3:]

	x = F.normalize(x_raw, dim=-1)
	y = F.normalize(y_raw - proj_u2a(x, y_raw), dim=-1)
	z = torch.cross(x, y, dim=-1)

	return torch.stack((x, y, z), dim=-1)


	class ReniLatentCodeEstimator(BaseModule):
	@dataclass
	class Config(BaseModule.Config):
	triplane_features: int = 40

	n_layers: int = 5
	hidden_features: int = 512
	activation: str = "relu"

	pool: str = "mean"

	reni_env_config: dict = field(default_factory=dict)

	cfg: Config

	def configure(self):
	layers = []
	cur_features = self.cfg.triplane_features * 3
	for _ in range(self.cfg.n_layers):
	layers.append(
	nn.Conv2d(
	cur_features,
	self.cfg.hidden_features,
	kernel_size=3,
	padding=0,
	stride=2,
	)
	)
	layers.append(self.make_activation(self.cfg.activation))

	cur_features = self.cfg.hidden_features

	self.layers = nn.Sequential(*layers)

	self.reni_env_map = RENIEnvMap(self.cfg.reni_env_config)
	self.latent_dim = self.reni_env_map.field.latent_dim

	self.fc_latents = nn.Linear(self.cfg.hidden_features, self.latent_dim * 3)
	nn.init.normal_(self.fc_latents.weight, mean=0.0, std=0.3)

	self.fc_rotations = nn.Linear(self.cfg.hidden_features, 6)
	nn.init.constant_(self.fc_rotations.bias, 0.0)
	nn.init.normal_(
	self.fc_rotations.weight, mean=0.0, std=0.01
	) # Small variance here

	self.fc_scale = nn.Linear(self.cfg.hidden_features, 1)
	nn.init.constant_(self.fc_scale.bias, 0.0)
	nn.init.normal_(self.fc_scale.weight, mean=0.0, std=0.01) # Small variance here

	def make_activation(self, activation):
	if activation == "relu":
	return nn.ReLU(inplace=True)
	elif activation == "silu":
	return nn.SiLU(inplace=True)
	else:
	raise NotImplementedError

	def forward(
	self,
	triplane: Float[Tensor, "B 3 F Ht Wt"],
	) -> dict[str, Any]:
	x = self.layers(
	triplane.reshape(
	triplane.shape[0], -1, triplane.shape[-2], triplane.shape[-1]
	)
	)
	x = x.mean(dim=[-2, -1])

	latents = self.fc_latents(x).reshape(-1, self.latent_dim, 3)
	rotations = self.fc_rotations(x)
	scale = self.fc_scale(x)

	env_map = self.reni_env_map(latents, rotation_6d_to_matrix(rotations), scale)

	return {"illumination": env_map["rgb"]}