climateGAN / climategan /trainer.py
vict0rsch's picture
update from climategan space
3d5f935
"""
Main component: the trainer handles everything:
* initializations
* training
* saving
"""
import inspect
import warnings
from copy import deepcopy
from pathlib import Path
from time import time
import numpy as np
from comet_ml import ExistingExperiment, Experiment
warnings.simplefilter("ignore", UserWarning)
import torch
import torch.nn as nn
from addict import Dict
from torch import autograd, sigmoid, softmax
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from climategan.data import get_all_loaders, decode_segmap_merged_labels
from climategan.discriminator import OmniDiscriminator, create_discriminator
from climategan.eval_metrics import accuracy, mIOU
from climategan.fid import compute_val_fid
from climategan.fire import add_fire
from climategan.generator import OmniGenerator, create_generator
from climategan.logger import Logger
from climategan.losses import get_losses
from climategan.optim import get_optimizer
from climategan.transforms import DiffTransforms
from climategan.tutils import (
divide_pred,
get_num_params,
get_WGAN_gradient,
lrgb2srgb,
normalize,
print_num_parameters,
shuffle_batch_tuple,
srgb2lrgb,
tensor_to_uint8_numpy_image,
vgg_preprocess,
zero_grad,
)
from climategan.utils import (
comet_kwargs,
div_dict,
find_target_size,
flatten_opts,
get_display_indices,
get_existing_comet_id,
get_latest_opts,
merge,
resolve,
sum_dict,
Timer,
)
try:
import torch_xla.core.xla_model as xm # type: ignore
except ImportError:
pass
class Trainer:
"""Main trainer class"""
def __init__(self, opts, comet_exp=None, verbose=0, device=None):
"""Trainer class to gather various model training procedures
such as training evaluating saving and logging
init:
* creates an addict.Dict logger
* creates logger.exp as a comet_exp experiment if `comet` arg is True
* sets the device (1 GPU or CPU)
Args:
opts (addict.Dict): options to configure the trainer, the data, the models
comet (bool, optional): whether to log the trainer with comet.ml.
Defaults to False.
verbose (int, optional): printing level to debug. Defaults to 0.
"""
super().__init__()
self.opts = opts
self.verbose = verbose
self.logger = Logger(self)
self.losses = None
self.G = self.D = None
self.real_val_fid_stats = None
self.use_pl4m = False
self.is_setup = False
self.loaders = self.all_loaders = None
self.exp = None
self.current_mode = "train"
self.diff_transforms = None
self.kitti_pretrain = self.opts.train.kitti.pretrain
self.pseudo_training_tasks = set(self.opts.train.pseudo.tasks)
self.lr_names = {}
self.base_display_images = {}
self.kitty_display_images = {}
self.domain_labels = {"s": 0, "r": 1}
self.device = device or torch.device(
"cuda:0" if torch.cuda.is_available() else "cpu"
)
if isinstance(comet_exp, Experiment):
self.exp = comet_exp
if self.opts.train.amp:
optimizers = [
self.opts.gen.opt.optimizer.lower(),
self.opts.dis.opt.optimizer.lower(),
]
if "extraadam" in optimizers:
raise ValueError(
"AMP does not work with ExtraAdam ({})".format(optimizers)
)
self.grad_scaler_d = GradScaler()
self.grad_scaler_g = GradScaler()
# -------------------------------
# ----- Legacy Overwrites -----
# -------------------------------
if (
self.opts.gen.s.depth_feat_fusion is True
or self.opts.gen.s.depth_dada_fusion is True
):
self.opts.gen.s.use_dada = True
@torch.no_grad()
def paint_and_mask(self, image_batch, mask_batch=None, resolution="approx"):
"""
Paints a batch of images (or a single image with a batch dim of 1). If
masks are not provided, they are inferred from the masker.
Resolution can either be the train-time resolution or the closest
multiple of 2 ** spade_n_up
Operations performed without gradient
If resolution == "approx" then the output image has the shape:
(dim // 2 ** spade_n_up) * 2 ** spade_n_up, for dim in [height, width]
eg: (1000, 1300) => (896, 1280) for spade_n_up = 7
If resolution == "exact" then the output image has the same shape:
we first process in "approx" mode then upsample bilinear
If resolution == "basic" image output shape is the train-time's
(typically 640x640)
If resolution == "upsample" image is inferred as "basic" and
then upsampled to original size
Args:
image_batch (torch.Tensor): 4D batch of images to flood
mask_batch (torch.Tensor, optional): Masks for the images.
Defaults to None (infer with Masker).
resolution (str, optional): "approx", "exact" or False
Returns:
torch.Tensor: N x C x H x W where H and W depend on `resolution`
"""
assert resolution in {"approx", "exact", "basic", "upsample"}
previous_mode = self.current_mode
if previous_mode == "train":
self.eval_mode()
if mask_batch is None:
mask_batch = self.G.mask(x=image_batch)
else:
assert len(image_batch) == len(mask_batch)
assert image_batch.shape[-2:] == mask_batch.shape[-2:]
if resolution not in {"approx", "exact"}:
painted = self.G.paint(mask_batch, image_batch)
if resolution == "upsample":
painted = nn.functional.interpolate(
painted, size=image_batch.shape[-2:], mode="bilinear"
)
else:
# save latent shape
zh = self.G.painter.z_h
zw = self.G.painter.z_w
# adapt latent shape to approximately keep the resolution
self.G.painter.z_h = (
image_batch.shape[-2] // 2**self.opts.gen.p.spade_n_up
)
self.G.painter.z_w = (
image_batch.shape[-1] // 2**self.opts.gen.p.spade_n_up
)
painted = self.G.paint(mask_batch, image_batch)
self.G.painter.z_h = zh
self.G.painter.z_w = zw
if resolution == "exact":
painted = nn.functional.interpolate(
painted, size=image_batch.shape[-2:], mode="bilinear"
)
if previous_mode == "train":
self.train_mode()
return painted
def _p(self, *args, **kwargs):
"""
verbose-dependant print util
"""
if self.verbose > 0:
print(*args, **kwargs)
@torch.no_grad()
def infer_all(
self,
x,
numpy=True,
stores={},
bin_value=-1,
half=False,
xla=False,
cloudy=True,
auto_resize_640=False,
ignore_event=set(),
return_intermediates=False,
):
"""
Create a dictionary of events from a numpy or tensor,
single or batch image data.
stores is a dictionary of times for the Timer class.
bin_value is used to binarize (or not) flood masks
all values in the output dictionary have 4 dimensions:
BxHxWxC if numpy else BxCxHxW
"""
assert self.is_setup
assert len(x.shape) in {3, 4}, f"Unknown Data shape {x.shape}"
# convert numpy to tensor
if not isinstance(x, torch.Tensor):
x = torch.tensor(x, device=self.device)
# add batch dimension
if len(x.shape) == 3:
x.unsqueeze_(0)
# permute channels as second dimension
if x.shape[1] != 3:
assert x.shape[-1] == 3, f"Unknown x shape to permute {x.shape}"
x = x.permute(0, 3, 1, 2)
# send to device
if x.device != self.device:
x = x.to(self.device)
# interpolate to standard input size
if auto_resize_640 and (x.shape[-1] != 640 or x.shape[-2] != 640):
x = torch.nn.functional.interpolate(x, (640, 640), mode="bilinear")
if half:
x = x.half()
# adjust painter's latent vector
self.G.painter.set_latent_shape(x.shape, True)
with Timer(store=stores.get("all events", [])):
# encode
with Timer(store=stores.get("encode", [])):
z = self.G.encode(x)
if xla:
xm.mark_step()
# predict from masker
with Timer(store=stores.get("depth", [])):
depth, z_depth = self.G.decoders["d"](z)
if xla:
xm.mark_step()
with Timer(store=stores.get("segmentation", [])):
segmentation = self.G.decoders["s"](z, z_depth)
if xla:
xm.mark_step()
with Timer(store=stores.get("mask", [])):
cond = self.G.make_m_cond(depth, segmentation, x)
mask = self.G.mask(z=z, cond=cond, z_depth=z_depth)
if xla:
xm.mark_step()
# apply events
if "wildfire" not in ignore_event:
with Timer(store=stores.get("wildfire", [])):
wildfire = self.compute_fire(x, seg_preds=segmentation)
if "smog" not in ignore_event:
with Timer(store=stores.get("smog", [])):
smog = self.compute_smog(x, d=depth, s=segmentation)
if "flood" not in ignore_event:
with Timer(store=stores.get("flood", [])):
flood = self.compute_flood(
x,
m=mask,
s=segmentation,
cloudy=cloudy,
bin_value=bin_value,
)
if xla:
xm.mark_step()
output_data = {}
if numpy:
with Timer(store=stores.get("numpy", [])):
if "flood" not in ignore_event:
# normalize to 0-1
flood = tensor_to_uint8_numpy_image(flood)
# convert to 0-255 uint8
output_data["flood"] = flood
if "wildfire" not in ignore_event:
wildfire = tensor_to_uint8_numpy_image(wildfire)
output_data["wildfire"] = wildfire
if "smog" not in ignore_event:
smog = tensor_to_uint8_numpy_image(smog)
output_data["smog"] = smog
if return_intermediates:
if numpy:
output_data["mask"] = (
((mask > bin_value) * 255).cpu().numpy().astype(np.uint8)
)
output_data["depth"] = tensor_to_uint8_numpy_image(depth)
output_data["segmentation"] = (
decode_segmap_merged_labels(segmentation, "r", False)
.cpu()
.permute(0, 2, 3, 1)
.numpy()
.astype(np.uint8)
)
else:
output_data["mask"] = mask
output_data["depth"] = depth
output_data["segmentation"] = segmentation
return output_data
@classmethod
def resume_from_path(
cls,
path,
overrides={},
setup=True,
inference=False,
new_exp=False,
device=None,
verbose=1,
):
"""
Resume and optionally setup a trainer from a specific path,
using the latest opts and checkpoint. Requires path to contain opts.yaml
(or increased), url.txt (or increased) and checkpoints/
Args:
path (str | pathlib.Path): Trainer to resume
overrides (dict, optional): Override loaded opts with those. Defaults to {}.
setup (bool, optional): Wether or not to setup the trainer before
returning it. Defaults to True.
inference (bool, optional): Setup should be done in inference mode or not.
Defaults to False.
new_exp (bool, optional): Re-use existing comet exp in path or create
a new one? Defaults to False.
device (torch.device, optional): Device to use
Returns:
climategan.Trainer: Loaded and resumed trainer
"""
p = resolve(path)
assert p.exists()
c = p / "checkpoints"
assert c.exists() and c.is_dir()
opts = get_latest_opts(p)
opts = Dict(merge(overrides, opts))
opts.train.resume = True
if new_exp is None:
exp = None
elif new_exp is True:
exp = Experiment(project_name="climategan", **comet_kwargs)
exp.log_asset_folder(
str(resolve(Path(__file__)).parent),
recursive=True,
log_file_name=True,
)
exp.log_parameters(flatten_opts(opts))
else:
comet_id = get_existing_comet_id(p)
exp = ExistingExperiment(previous_experiment=comet_id, **comet_kwargs)
trainer = cls(opts, comet_exp=exp, device=device, verbose=verbose)
if setup:
trainer.setup(inference=inference)
return trainer
def save(self):
save_dir = Path(self.opts.output_path) / Path("checkpoints")
save_dir.mkdir(exist_ok=True)
save_path = save_dir / "latest_ckpt.pth"
# Construct relevant state dicts / optims:
# Save at least G
save_dict = {
"epoch": self.logger.epoch,
"G": self.G.state_dict(),
"g_opt": self.g_opt.state_dict(),
"step": self.logger.global_step,
}
if self.D is not None and get_num_params(self.D) > 0:
save_dict["D"] = self.D.state_dict()
save_dict["d_opt"] = self.d_opt.state_dict()
if (
self.logger.epoch >= self.opts.train.min_save_epoch
and self.logger.epoch % self.opts.train.save_n_epochs == 0
):
torch.save(save_dict, save_dir / f"epoch_{self.logger.epoch}_ckpt.pth")
torch.save(save_dict, save_path)
def resume(self, inference=False):
tpu = "xla" in str(self.device)
if tpu:
print("Resuming on TPU:", self.device)
m_path = Path(self.opts.load_paths.m)
p_path = Path(self.opts.load_paths.p)
pm_path = Path(self.opts.load_paths.pm)
output_path = Path(self.opts.output_path)
map_loc = self.device if not tpu else "cpu"
if "m" in self.opts.tasks and "p" in self.opts.tasks:
# ----------------------------------------
# ----- Masker and Painter Loading -----
# ----------------------------------------
# want to resume a pm model but no path was provided:
# resume a single pm model from output_path
if all([str(p) == "none" for p in [m_path, p_path, pm_path]]):
checkpoint_path = output_path / "checkpoints/latest_ckpt.pth"
print("Resuming P+M model from", str(checkpoint_path))
checkpoint = torch.load(checkpoint_path, map_location=map_loc)
# want to resume a pm model with a pm_path provided:
# resume a single pm model from load_paths.pm
# depending on whether a dir or a file is specified
elif str(pm_path) != "none":
assert pm_path.exists()
if pm_path.is_dir():
checkpoint_path = pm_path / "checkpoints/latest_ckpt.pth"
else:
assert pm_path.suffix == ".pth"
checkpoint_path = pm_path
print("Resuming P+M model from", str(checkpoint_path))
checkpoint = torch.load(checkpoint_path, map_location=map_loc)
# want to resume a pm model, pm_path not provided:
# m_path and p_path must be provided as dirs or pth files
elif m_path != p_path:
assert m_path.exists()
assert p_path.exists()
if m_path.is_dir():
m_path = m_path / "checkpoints/latest_ckpt.pth"
if p_path.is_dir():
p_path = p_path / "checkpoints/latest_ckpt.pth"
assert m_path.suffix == ".pth"
assert p_path.suffix == ".pth"
print(f"Resuming P+M model from \n -{p_path} \nand \n -{m_path}")
m_checkpoint = torch.load(m_path, map_location=map_loc)
p_checkpoint = torch.load(p_path, map_location=map_loc)
checkpoint = merge(m_checkpoint, p_checkpoint)
else:
raise ValueError(
"Cannot resume a P+M model with provided load_paths:\n{}".format(
self.opts.load_paths
)
)
else:
# ----------------------------------
# ----- Single Model Loading -----
# ----------------------------------
# cannot specify both paths
if str(m_path) != "none" and str(p_path) != "none":
raise ValueError(
"Opts tasks are {} but received 2 values for the load_paths".format(
self.opts.tasks
)
)
# specified m
elif str(m_path) != "none":
assert m_path.exists()
assert "m" in self.opts.tasks
model = "M"
if m_path.is_dir():
m_path = m_path / "checkpoints/latest_ckpt.pth"
checkpoint_path = m_path
# specified m
elif str(p_path) != "none":
assert p_path.exists()
assert "p" in self.opts.tasks
model = "P"
if p_path.is_dir():
p_path = p_path / "checkpoints/latest_ckpt.pth"
checkpoint_path = p_path
# specified neither p nor m: resume from output_path
else:
model = "P" if "p" in self.opts.tasks else "M"
checkpoint_path = output_path / "checkpoints/latest_ckpt.pth"
print(f"Resuming {model} model from {checkpoint_path}")
checkpoint = torch.load(checkpoint_path, map_location=map_loc)
# On TPUs must send the data to the xla device as it cannot be mapped
# there directly from torch.load
if tpu:
checkpoint = xm.send_cpu_data_to_device(checkpoint, self.device)
# -----------------------
# ----- Restore G -----
# -----------------------
if inference:
incompatible_keys = self.G.load_state_dict(checkpoint["G"], strict=False)
if incompatible_keys.missing_keys:
print("WARNING: Missing keys in self.G.load_state_dict, keeping inits")
print(incompatible_keys.missing_keys)
if incompatible_keys.unexpected_keys:
print("WARNING: Ignoring Unexpected keys in self.G.load_state_dict")
print(incompatible_keys.unexpected_keys)
else:
self.G.load_state_dict(checkpoint["G"])
if inference:
# only G is needed to infer
print("Done loading checkpoints.")
return
self.g_opt.load_state_dict(checkpoint["g_opt"])
# ------------------------------
# ----- Resume scheduler -----
# ------------------------------
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
for _ in range(self.logger.epoch + 1):
self.update_learning_rates()
# -----------------------
# ----- Restore D -----
# -----------------------
if self.D is not None and get_num_params(self.D) > 0:
self.D.load_state_dict(checkpoint["D"])
self.d_opt.load_state_dict(checkpoint["d_opt"])
# ---------------------------
# ----- Resore logger -----
# ---------------------------
self.logger.epoch = checkpoint["epoch"]
self.logger.global_step = checkpoint["step"]
self.exp.log_text(
"Resuming from epoch {} & step {}".format(
checkpoint["epoch"], checkpoint["step"]
)
)
# Round step to even number for extraGradient
if self.logger.global_step % 2 != 0:
self.logger.global_step += 1
def eval_mode(self):
"""
Set trainer's models in eval mode
"""
if self.G is not None:
self.G.eval()
if self.D is not None:
self.D.eval()
self.current_mode = "eval"
def train_mode(self):
"""
Set trainer's models in train mode
"""
if self.G is not None:
self.G.train()
if self.D is not None:
self.D.train()
self.current_mode = "train"
def assert_z_matches_x(self, x, z):
assert x.shape[0] == (
z.shape[0] if not isinstance(z, (list, tuple)) else z[0].shape[0]
), "x-> {}, z->{}".format(
x.shape, z.shape if not isinstance(z, (list, tuple)) else z[0].shape
)
def batch_to_device(self, b):
"""sends the data in b to self.device
Args:
b (dict): the batch dictionnay
Returns:
dict: the batch dictionnary with its "data" field sent to self.device
"""
for task, tensor in b["data"].items():
b["data"][task] = tensor.to(self.device)
return b
def sample_painter_z(self, batch_size):
return self.G.sample_painter_z(batch_size, self.device)
@property
def train_loaders(self):
"""Get a zip of all training loaders
Returns:
generator: zip generator yielding tuples:
(batch_rf, batch_rn, batch_sf, batch_sn)
"""
return zip(*list(self.loaders["train"].values()))
@property
def val_loaders(self):
"""Get a zip of all validation loaders
Returns:
generator: zip generator yielding tuples:
(batch_rf, batch_rn, batch_sf, batch_sn)
"""
return zip(*list(self.loaders["val"].values()))
def compute_latent_shape(self):
"""Compute the latent shape, i.e. the Encoder's output shape,
from a batch.
Raises:
ValueError: If no loader, the latent_shape cannot be inferred
Returns:
tuple: (c, h, w)
"""
x = None
for mode in self.all_loaders:
for domain in self.all_loaders.loaders[mode]:
x = (
self.all_loaders[mode][domain]
.dataset[0]["data"]["x"]
.to(self.device)
)
break
if x is not None:
break
if x is None:
raise ValueError("No batch found to compute_latent_shape")
x = x.unsqueeze(0)
z = self.G.encode(x)
return z.shape[1:] if not isinstance(z, (list, tuple)) else z[0].shape[1:]
def g_opt_step(self):
"""Run an optimizing step ; if using ExtraAdam, there needs to be an extrapolation
step every other step
"""
if "extra" in self.opts.gen.opt.optimizer.lower() and (
self.logger.global_step % 2 == 0
):
self.g_opt.extrapolation()
else:
self.g_opt.step()
def d_opt_step(self):
"""Run an optimizing step ; if using ExtraAdam, there needs to be an extrapolation
step every other step
"""
if "extra" in self.opts.dis.opt.optimizer.lower() and (
self.logger.global_step % 2 == 0
):
self.d_opt.extrapolation()
else:
self.d_opt.step()
def update_learning_rates(self):
if self.g_scheduler is not None:
self.g_scheduler.step()
if self.d_scheduler is not None:
self.d_scheduler.step()
def setup(self, inference=False):
"""Prepare the trainer before it can be used to train the models:
* initialize G and D
* creates 2 optimizers
"""
self.logger.global_step = 0
start_time = time()
self.logger.time.start_time = start_time
verbose = self.verbose
if not inference:
self.all_loaders = get_all_loaders(self.opts)
# -----------------------
# ----- Generator -----
# -----------------------
__t = time()
print("Creating generator...")
self.G: OmniGenerator = create_generator(
self.opts, device=self.device, no_init=inference, verbose=verbose
)
self.has_painter = get_num_params(self.G.painter) or self.G.load_val_painter()
if self.has_painter:
self.G.painter.set_latent_shape(find_target_size(self.opts, "x"), True)
print(f"Generator OK in {time() - __t:.1f}s.")
if inference: # Inference mode: no more than a Generator needed
print("Inference mode: no Discriminator, no optimizers")
print_num_parameters(self)
self.switch_data(to="base")
if self.opts.train.resume:
self.resume(True)
self.eval_mode()
print("Trainer is in evaluation mode.")
print("Setup done.")
self.is_setup = True
return
# ---------------------------
# ----- Discriminator -----
# ---------------------------
self.D: OmniDiscriminator = create_discriminator(
self.opts, self.device, verbose=verbose
)
print("Discriminator OK.")
print_num_parameters(self)
# --------------------------
# ----- Optimization -----
# --------------------------
# Get different optimizers for each task (different learning rates)
self.g_opt, self.g_scheduler, self.lr_names["G"] = get_optimizer(
self.G, self.opts.gen.opt, self.opts.tasks
)
if get_num_params(self.D) > 0:
self.d_opt, self.d_scheduler, self.lr_names["D"] = get_optimizer(
self.D, self.opts.dis.opt, self.opts.tasks, True
)
else:
self.d_opt, self.d_scheduler = None, None
self.losses = get_losses(self.opts, verbose, device=self.device)
if "p" in self.opts.tasks and self.opts.gen.p.diff_aug.use:
self.diff_transforms = DiffTransforms(self.opts.gen.p.diff_aug)
if verbose > 0:
for mode, mode_dict in self.all_loaders.items():
for domain, domain_loader in mode_dict.items():
print(
"Loader {} {} : {}".format(
mode, domain, len(domain_loader.dataset)
)
)
# ----------------------------
# ----- Display images -----
# ----------------------------
self.set_display_images()
# -------------------------------
# ----- Log Architectures -----
# -------------------------------
self.logger.log_architecture()
# -----------------------------
# ----- Set data source -----
# -----------------------------
if self.kitti_pretrain:
self.switch_data(to="kitti")
else:
self.switch_data(to="base")
# -------------------------
# ----- Setup Done. -----
# -------------------------
print(" " * 50, end="\r")
print("Done creating display images")
if self.opts.train.resume:
print("Resuming Model (inference: False)")
self.resume(False)
else:
print("Not resuming: starting a new model")
print("Setup done.")
self.is_setup = True
def switch_data(self, to="kitti"):
caller = inspect.stack()[1].function
print(f"[{caller}] Switching data source to", to)
self.data_source = to
if to == "kitti":
self.display_images = self.kitty_display_images
if self.all_loaders is not None:
self.loaders = {
mode: {"s": self.all_loaders[mode]["kitti"]}
for mode in self.all_loaders
}
else:
self.display_images = self.base_display_images
if self.all_loaders is not None:
self.loaders = {
mode: {
domain: self.all_loaders[mode][domain]
for domain in self.all_loaders[mode]
if domain != "kitti"
}
for mode in self.all_loaders
}
if (
self.logger.global_step % 2 != 0
and "extra" in self.opts.dis.opt.optimizer.lower()
):
print(
"Warning: artificially bumping step to run an extrapolation step first."
)
self.logger.global_step += 1
def set_display_images(self, use_all=False):
for mode, mode_dict in self.all_loaders.items():
if self.kitti_pretrain:
self.kitty_display_images[mode] = {}
self.base_display_images[mode] = {}
for domain in mode_dict:
if self.kitti_pretrain and domain == "kitti":
target_dict = self.kitty_display_images
else:
if domain == "kitti":
continue
target_dict = self.base_display_images
dataset = self.all_loaders[mode][domain].dataset
display_indices = (
get_display_indices(self.opts, domain, len(dataset))
if not use_all
else list(range(len(dataset)))
)
ldis = len(display_indices)
print(
f" Creating {ldis} {mode} {domain} display images...",
end="\r",
flush=True,
)
target_dict[mode][domain] = [
Dict(dataset[i])
for i in display_indices
if (print(f"({i})", end="\r") is None and i < len(dataset))
]
if self.exp is not None:
for im_id, d in enumerate(target_dict[mode][domain]):
self.exp.log_parameter(
"display_image_{}_{}_{}".format(mode, domain, im_id),
d["paths"],
)
def train(self):
"""For each epoch:
* train
* eval
* save
"""
assert self.is_setup
for self.logger.epoch in range(
self.logger.epoch, self.logger.epoch + self.opts.train.epochs
):
# backprop painter's disc loss to masker
if (
self.logger.epoch == self.opts.gen.p.pl4m_epoch
and get_num_params(self.G.painter) > 0
and "p" in self.opts.tasks
and self.opts.gen.m.use_pl4m
):
print(
"\n\n >>> Enabling pl4m at epoch {}\n\n".format(self.logger.epoch)
)
self.use_pl4m = True
self.run_epoch()
self.run_evaluation(verbose=1)
self.save()
# end vkitti2 pre-training
if self.logger.epoch == self.opts.train.kitti.epochs - 1:
self.switch_data(to="base")
self.kitti_pretrain = False
# end pseudo training
if self.logger.epoch == self.opts.train.pseudo.epochs - 1:
self.pseudo_training_tasks = set()
def run_epoch(self):
"""Runs an epoch:
* checks trainer is setup
* gets a tuple of batches per domain
* sends batches to device
* updates sequentially G, D
"""
assert self.is_setup
self.train_mode()
if self.exp is not None:
self.exp.log_parameter("epoch", self.logger.epoch)
epoch_len = min(len(loader) for loader in self.loaders["train"].values())
epoch_desc = "Epoch {}".format(self.logger.epoch)
self.logger.time.epoch_start = time()
for multi_batch_tuple in tqdm(
self.train_loaders,
desc=epoch_desc,
total=epoch_len,
mininterval=0.5,
unit="batch",
):
self.logger.time.step_start = time()
multi_batch_tuple = shuffle_batch_tuple(multi_batch_tuple)
# The `[0]` is because the domain is contained in a list
multi_domain_batch = {
batch["domain"][0]: self.batch_to_device(batch)
for batch in multi_batch_tuple
}
# ------------------------------
# ----- Update Generator -----
# ------------------------------
# freeze params of the discriminator
if self.d_opt is not None:
for param in self.D.parameters():
param.requires_grad = False
self.update_G(multi_domain_batch)
# ----------------------------------
# ----- Update Discriminator -----
# ----------------------------------
# unfreeze params of the discriminator
if self.d_opt is not None and not self.kitti_pretrain:
for param in self.D.parameters():
param.requires_grad = True
self.update_D(multi_domain_batch)
# -------------------------
# ----- Log Metrics -----
# -------------------------
self.logger.global_step += 1
self.logger.log_step_time(time())
if not self.kitti_pretrain:
self.update_learning_rates()
self.logger.log_learning_rates()
self.logger.log_epoch_time(time())
def update_G(self, multi_domain_batch, verbose=0):
"""Perform an update on g from multi_domain_batch which is a dictionary
domain => batch
* automatic mixed precision according to self.opts.train.amp
* compute loss for each task
* loss.backward()
* g_opt_step()
* g_opt.step() or .extrapolation() depending on self.logger.global_step
* logs losses on comet.ml with self.logger.log_losses(model_to_update="G")
Args:
multi_domain_batch (dict): dictionnary of domain batches
"""
zero_grad(self.G)
if self.opts.train.amp:
with autocast():
g_loss = self.get_G_loss(multi_domain_batch, verbose)
self.grad_scaler_g.scale(g_loss).backward()
self.grad_scaler_g.step(self.g_opt)
self.grad_scaler_g.update()
else:
g_loss = self.get_G_loss(multi_domain_batch, verbose)
g_loss.backward()
self.g_opt_step()
self.logger.log_losses(model_to_update="G", mode="train")
def update_D(self, multi_domain_batch, verbose=0):
zero_grad(self.D)
if self.opts.train.amp:
with autocast():
d_loss = self.get_D_loss(multi_domain_batch, verbose)
self.grad_scaler_d.scale(d_loss).backward()
self.grad_scaler_d.step(self.d_opt)
self.grad_scaler_d.update()
else:
d_loss = self.get_D_loss(multi_domain_batch, verbose)
d_loss.backward()
self.d_opt_step()
self.logger.losses.disc.total_loss = d_loss.item()
self.logger.log_losses(model_to_update="D", mode="train")
def get_D_loss(self, multi_domain_batch, verbose=0):
"""Compute the discriminators' losses:
* for each domain-specific batch:
* encode the image
* get the conditioning tensor if using spade
* source domain is the data's domain, sequentially r|s then f|n
* get the target domain accordingly
* compute the translated image from the data
* compute the source domain discriminator's loss on the data
* compute the target domain discriminator's loss on the translated image
# ? In this setting, each D[decoder][domain] is updated twice towards
# real or fake data
See readme's update d section for details
Args:
multi_domain_batch ([type]): [description]
Returns:
[type]: [description]
"""
disc_loss = {
"m": {"Advent": 0},
"s": {"Advent": 0},
}
if self.opts.dis.p.use_local_discriminator:
disc_loss["p"] = {"global": 0, "local": 0}
else:
disc_loss["p"] = {"gan": 0}
for domain, batch in multi_domain_batch.items():
x = batch["data"]["x"]
# ---------------------
# ----- Painter -----
# ---------------------
if domain == "rf" and self.has_painter:
m = batch["data"]["m"]
# sample vector
with torch.no_grad():
# see spade compute_discriminator_loss
fake = self.G.paint(m, x)
if self.opts.gen.p.diff_aug.use:
fake = self.diff_transforms(fake)
x = self.diff_transforms(x)
fake = fake.detach()
fake.requires_grad_()
if self.opts.dis.p.use_local_discriminator:
fake_d_global = self.D["p"]["global"](fake)
real_d_global = self.D["p"]["global"](x)
fake_d_local = self.D["p"]["local"](fake * m)
real_d_local = self.D["p"]["local"](x * m)
global_loss = self.losses["D"]["p"](fake_d_global, False, True)
global_loss += self.losses["D"]["p"](real_d_global, True, True)
local_loss = self.losses["D"]["p"](fake_d_local, False, True)
local_loss += self.losses["D"]["p"](real_d_local, True, True)
disc_loss["p"]["global"] += global_loss
disc_loss["p"]["local"] += local_loss
else:
real_cat = torch.cat([m, x], axis=1)
fake_cat = torch.cat([m, fake], axis=1)
real_fake_cat = torch.cat([real_cat, fake_cat], dim=0)
real_fake_d = self.D["p"](real_fake_cat)
real_d, fake_d = divide_pred(real_fake_d)
disc_loss["p"]["gan"] = self.losses["D"]["p"](fake_d, False, True)
disc_loss["p"]["gan"] += self.losses["D"]["p"](real_d, True, True)
# --------------------
# ----- Masker -----
# --------------------
else:
z = self.G.encode(x)
s_pred = d_pred = cond = z_depth = None
if "s" in batch["data"]:
if "d" in self.opts.tasks and self.opts.gen.s.use_dada:
d_pred, z_depth = self.G.decoders["d"](z)
step_loss, s_pred = self.masker_s_loss(
x, z, d_pred, z_depth, None, domain, for_="D"
)
step_loss *= self.opts.train.lambdas.advent.adv_main
disc_loss["s"]["Advent"] += step_loss
if "m" in batch["data"]:
if "d" in self.opts.tasks:
if self.opts.gen.m.use_spade:
if d_pred is None:
d_pred, z_depth = self.G.decoders["d"](z)
cond = self.G.make_m_cond(d_pred, s_pred, x)
elif self.opts.gen.m.use_dada:
if d_pred is None:
d_pred, z_depth = self.G.decoders["d"](z)
step_loss, _ = self.masker_m_loss(
x,
z,
None,
domain,
for_="D",
cond=cond,
z_depth=z_depth,
depth_preds=d_pred,
)
step_loss *= self.opts.train.lambdas.advent.adv_main
disc_loss["m"]["Advent"] += step_loss
self.logger.losses.disc.update(
{
dom: {
k: v.item() if isinstance(v, torch.Tensor) else v
for k, v in d.items()
}
for dom, d in disc_loss.items()
}
)
loss = sum(v for d in disc_loss.values() for k, v in d.items())
return loss
def get_G_loss(self, multi_domain_batch, verbose=0):
m_loss = p_loss = None
# For now, always compute "representation loss"
g_loss = 0
if any(t in self.opts.tasks for t in "msd"):
m_loss = self.get_masker_loss(multi_domain_batch)
self.logger.losses.gen.masker = m_loss.item()
g_loss += m_loss
if "p" in self.opts.tasks and not self.kitti_pretrain:
p_loss = self.get_painter_loss(multi_domain_batch)
self.logger.losses.gen.painter = p_loss.item()
g_loss += p_loss
assert g_loss != 0 and not isinstance(g_loss, int), "No update in get_G_loss!"
self.logger.losses.gen.total_loss = g_loss.item()
return g_loss
def get_masker_loss(self, multi_domain_batch): # TODO update docstrings
"""Only update the representation part of the model, meaning everything
but the translation part
* for each batch in available domains:
* compute task-specific losses
* compute the adaptation and translation decoders' auto-encoding losses
* compute the adaptation decoder's translation losses (GAN and Cycle)
Args:
multi_domain_batch (dict): dictionnary mapping domain names to batches from
the trainer's loaders
Returns:
torch.Tensor: scalar loss tensor, weighted according to opts.train.lambdas
"""
m_loss = 0
for domain, batch in multi_domain_batch.items():
# We don't care about the flooded domain here
if domain == "rf":
continue
x = batch["data"]["x"]
z = self.G.encode(x)
# --------------------------------------
# ----- task-specific losses (2) -----
# --------------------------------------
d_pred = s_pred = z_depth = None
for task in ["d", "s", "m"]:
if task not in batch["data"]:
continue
target = batch["data"][task]
if task == "d":
loss, d_pred, z_depth = self.masker_d_loss(
x, z, target, domain, "G"
)
m_loss += loss
self.logger.losses.gen.task["d"][domain] = loss.item()
elif task == "s":
loss, s_pred = self.masker_s_loss(
x, z, d_pred, z_depth, target, domain, "G"
)
m_loss += loss
self.logger.losses.gen.task["s"][domain] = loss.item()
elif task == "m":
cond = None
if self.opts.gen.m.use_spade:
if not self.opts.gen.m.detach:
d_pred = d_pred.clone()
s_pred = s_pred.clone()
cond = self.G.make_m_cond(d_pred, s_pred, x)
loss, _ = self.masker_m_loss(
x,
z,
target,
domain,
"G",
cond=cond,
z_depth=z_depth,
depth_preds=d_pred,
)
m_loss += loss
self.logger.losses.gen.task["m"][domain] = loss.item()
return m_loss
def get_painter_loss(self, multi_domain_batch):
"""Computes the translation loss when flooding/deflooding images
Args:
multi_domain_batch (dict): dictionnary mapping domain names to batches from
the trainer's loaders
Returns:
torch.Tensor: scalar loss tensor, weighted according to opts.train.lambdas
"""
step_loss = 0
# self.g_opt.zero_grad()
lambdas = self.opts.train.lambdas
batch_domain = "rf"
batch = multi_domain_batch[batch_domain]
x = batch["data"]["x"]
# ! different mask: hides water to be reconstructed
# ! 1 for water, 0 otherwise
m = batch["data"]["m"]
fake_flooded = self.G.paint(m, x)
# ----------------------
# ----- VGG Loss -----
# ----------------------
if lambdas.G.p.vgg != 0:
loss = self.losses["G"]["p"]["vgg"](
vgg_preprocess(fake_flooded * m), vgg_preprocess(x * m)
)
loss *= lambdas.G.p.vgg
self.logger.losses.gen.p.vgg = loss.item()
step_loss += loss
# ---------------------
# ----- TV Loss -----
# ---------------------
if lambdas.G.p.tv != 0:
loss = self.losses["G"]["p"]["tv"](fake_flooded * m)
loss *= lambdas.G.p.tv
self.logger.losses.gen.p.tv = loss.item()
step_loss += loss
# --------------------------
# ----- Context Loss -----
# --------------------------
if lambdas.G.p.context != 0:
loss = self.losses["G"]["p"]["context"](fake_flooded, x, m)
loss *= lambdas.G.p.context
self.logger.losses.gen.p.context = loss.item()
step_loss += loss
# ---------------------------------
# ----- Reconstruction Loss -----
# ---------------------------------
if lambdas.G.p.reconstruction != 0:
loss = self.losses["G"]["p"]["reconstruction"](fake_flooded, x, m)
loss *= lambdas.G.p.reconstruction
self.logger.losses.gen.p.reconstruction = loss.item()
step_loss += loss
# -------------------------------------
# ----- Local & Global GAN Loss -----
# -------------------------------------
if self.opts.gen.p.diff_aug.use:
fake_flooded = self.diff_transforms(fake_flooded)
x = self.diff_transforms(x)
if self.opts.dis.p.use_local_discriminator:
fake_d_global = self.D["p"]["global"](fake_flooded)
fake_d_local = self.D["p"]["local"](fake_flooded * m)
real_d_global = self.D["p"]["global"](x)
# Note: discriminator returns [out_1,...,out_num_D] outputs
# Each out_i is a list [feat1, feat2, ..., pred_i]
self.logger.losses.gen.p.gan = 0
loss = self.losses["G"]["p"]["gan"](fake_d_global, True, False)
loss += self.losses["G"]["p"]["gan"](fake_d_local, True, False)
loss *= lambdas.G["p"]["gan"]
self.logger.losses.gen.p.gan = loss.item()
step_loss += loss
# -----------------------------------
# ----- Feature Matching Loss -----
# -----------------------------------
# (only on global discriminator)
# Order must be real, fake
if self.opts.dis.p.get_intermediate_features:
loss = self.losses["G"]["p"]["featmatch"](real_d_global, fake_d_global)
loss *= lambdas.G["p"]["featmatch"]
if isinstance(loss, float):
self.logger.losses.gen.p.featmatch = loss
else:
self.logger.losses.gen.p.featmatch = loss.item()
step_loss += loss
# -------------------------------------------
# ----- Single Discriminator GAN Loss -----
# -------------------------------------------
else:
real_cat = torch.cat([m, x], axis=1)
fake_cat = torch.cat([m, fake_flooded], axis=1)
real_fake_cat = torch.cat([real_cat, fake_cat], dim=0)
real_fake_d = self.D["p"](real_fake_cat)
real_d, fake_d = divide_pred(real_fake_d)
loss = self.losses["G"]["p"]["gan"](fake_d, True, False)
self.logger.losses.gen.p.gan = loss.item()
step_loss += loss
# -----------------------------------
# ----- Feature Matching Loss -----
# -----------------------------------
if self.opts.dis.p.get_intermediate_features and lambdas.G.p.featmatch != 0:
loss = self.losses["G"]["p"]["featmatch"](real_d, fake_d)
loss *= lambdas.G.p.featmatch
if isinstance(loss, float):
self.logger.losses.gen.p.featmatch = loss
else:
self.logger.losses.gen.p.featmatch = loss.item()
step_loss += loss
return step_loss
def masker_d_loss(self, x, z, target, domain, for_="G"):
assert for_ in {"G", "D"}
self.assert_z_matches_x(x, z)
assert x.shape[0] == target.shape[0]
zero_loss = torch.tensor(0.0, device=self.device)
weight = self.opts.train.lambdas.G.d.main
prediction, z_depth = self.G.decoders["d"](z)
if self.opts.gen.d.classify.enable:
target.squeeze_(1)
full_loss = self.losses["G"]["tasks"]["d"](prediction, target)
full_loss *= weight
if weight == 0 or (domain == "r" and "d" not in self.pseudo_training_tasks):
return zero_loss, prediction, z_depth
return full_loss, prediction, z_depth
def masker_s_loss(self, x, z, depth_preds, z_depth, target, domain, for_="G"):
assert for_ in {"G", "D"}
assert domain in {"r", "s"}
self.assert_z_matches_x(x, z)
assert x.shape[0] == target.shape[0] if target is not None else True
full_loss = torch.tensor(0.0, device=self.device)
softmax_preds = None
# --------------------------
# ----- Segmentation -----
# --------------------------
pred = None
if for_ == "G" or self.opts.gen.s.use_advent:
pred = self.G.decoders["s"](z, z_depth)
# Supervised segmentation loss: crossent for sim domain,
# crossent_pseudo for real ; loss is crossent in any case
if for_ == "G":
if domain == "s" or "s" in self.pseudo_training_tasks:
if domain == "s":
logger = self.logger.losses.gen.task["s"]["crossent"]
weight = self.opts.train.lambdas.G["s"]["crossent"]
else:
logger = self.logger.losses.gen.task["s"]["crossent_pseudo"]
weight = self.opts.train.lambdas.G["s"]["crossent_pseudo"]
if weight != 0:
# Cross-Entropy loss
loss_func = self.losses["G"]["tasks"]["s"]["crossent"]
loss = loss_func(pred, target.squeeze(1))
loss *= weight
full_loss += loss
logger[domain] = loss.item()
if domain == "r":
weight = self.opts.train.lambdas.G["s"]["minent"]
if self.opts.gen.s.use_minent and weight != 0:
softmax_preds = softmax(pred, dim=1)
# Entropy minimization loss
loss = self.losses["G"]["tasks"]["s"]["minent"](softmax_preds)
loss *= weight
full_loss += loss
self.logger.losses.gen.task["s"]["minent"]["r"] = loss.item()
# Fool ADVENT discriminator
if self.opts.gen.s.use_advent:
if self.opts.gen.s.use_dada and depth_preds is not None:
depth_preds = depth_preds.detach()
else:
depth_preds = None
if for_ == "D":
domain_label = domain
logger = {}
loss_func = self.losses["D"]["advent"]
pred = pred.detach()
weight = self.opts.train.lambdas.advent.adv_main
else:
domain_label = "s"
logger = self.logger.losses.gen.task["s"]["advent"]
loss_func = self.losses["G"]["tasks"]["s"]["advent"]
weight = self.opts.train.lambdas.G["s"]["advent"]
if (for_ == "D" or domain == "r") and weight != 0:
if softmax_preds is None:
softmax_preds = softmax(pred, dim=1)
loss = loss_func(
softmax_preds,
self.domain_labels[domain_label],
self.D["s"]["Advent"],
depth_preds,
)
loss *= weight
full_loss += loss
logger[domain] = loss.item()
if for_ == "D":
# WGAN: clipping or GP
if self.opts.dis.s.gan_type == "GAN" or "WGAN_norm":
pass
elif self.opts.dis.s.gan_type == "WGAN":
for p in self.D["s"]["Advent"].parameters():
p.data.clamp_(
self.opts.dis.s.wgan_clamp_lower,
self.opts.dis.s.wgan_clamp_upper,
)
elif self.opts.dis.s.gan_type == "WGAN_gp":
prob_need_grad = autograd.Variable(pred, requires_grad=True)
d_out = self.D["s"]["Advent"](prob_need_grad)
gp = get_WGAN_gradient(prob_need_grad, d_out)
gp_loss = gp * self.opts.train.lambdas.advent.WGAN_gp
full_loss += gp_loss
else:
raise NotImplementedError
return full_loss, pred
def masker_m_loss(
self, x, z, target, domain, for_="G", cond=None, z_depth=None, depth_preds=None
):
assert for_ in {"G", "D"}
assert domain in {"r", "s"}
self.assert_z_matches_x(x, z)
assert x.shape[0] == target.shape[0] if target is not None else True
full_loss = torch.tensor(0.0, device=self.device)
pred_logits = self.G.decoders["m"](z, cond=cond, z_depth=z_depth)
pred_prob = sigmoid(pred_logits)
pred_prob_complementary = 1 - pred_prob
prob = torch.cat([pred_prob, pred_prob_complementary], dim=1)
if for_ == "G":
# TV loss
weight = self.opts.train.lambdas.G.m.tv
if weight != 0:
loss = self.losses["G"]["tasks"]["m"]["tv"](pred_prob)
loss *= weight
full_loss += loss
self.logger.losses.gen.task["m"]["tv"][domain] = loss.item()
weight = self.opts.train.lambdas.G.m.bce
if domain == "s" and weight != 0:
# CrossEnt Loss
loss = self.losses["G"]["tasks"]["m"]["bce"](pred_logits, target)
loss *= weight
full_loss += loss
self.logger.losses.gen.task["m"]["bce"]["s"] = loss.item()
if domain == "r":
weight = self.opts.train.lambdas.G["m"]["gi"]
if self.opts.gen.m.use_ground_intersection and weight != 0:
# GroundIntersection loss
loss = self.losses["G"]["tasks"]["m"]["gi"](pred_prob, target)
loss *= weight
full_loss += loss
self.logger.losses.gen.task["m"]["gi"]["r"] = loss.item()
weight = self.opts.train.lambdas.G.m.pl4m
if self.use_pl4m and weight != 0:
# Painter loss
pl4m_loss = self.painter_loss_for_masker(x, pred_prob)
pl4m_loss *= weight
full_loss += pl4m_loss
self.logger.losses.gen.task.m.pl4m.r = pl4m_loss.item()
weight = self.opts.train.lambdas.advent.ent_main
if self.opts.gen.m.use_minent and weight != 0:
# MinEnt loss
loss = self.losses["G"]["tasks"]["m"]["minent"](prob)
loss *= weight
full_loss += loss
self.logger.losses.gen.task["m"]["minent"]["r"] = loss.item()
if self.opts.gen.m.use_advent:
# AdvEnt loss
if self.opts.gen.m.use_dada and depth_preds is not None:
depth_preds = depth_preds.detach()
depth_preds = torch.nn.functional.interpolate(
depth_preds, size=x.shape[-2:], mode="nearest"
)
else:
depth_preds = None
if for_ == "D":
domain_label = domain
logger = {}
loss_func = self.losses["D"]["advent"]
prob = prob.detach()
weight = self.opts.train.lambdas.advent.adv_main
else:
domain_label = "s"
logger = self.logger.losses.gen.task["m"]["advent"]
loss_func = self.losses["G"]["tasks"]["m"]["advent"]
weight = self.opts.train.lambdas.advent.adv_main
if (for_ == "D" or domain == "r") and weight != 0:
loss = loss_func(
prob.to(self.device),
self.domain_labels[domain_label],
self.D["m"]["Advent"],
depth_preds,
)
loss *= weight
full_loss += loss
logger[domain] = loss.item()
if for_ == "D":
# WGAN: clipping or GP
if self.opts.dis.m.gan_type == "GAN" or "WGAN_norm":
pass
elif self.opts.dis.m.gan_type == "WGAN":
for p in self.D["s"]["Advent"].parameters():
p.data.clamp_(
self.opts.dis.m.wgan_clamp_lower,
self.opts.dis.m.wgan_clamp_upper,
)
elif self.opts.dis.m.gan_type == "WGAN_gp":
prob_need_grad = autograd.Variable(prob, requires_grad=True)
d_out = self.D["s"]["Advent"](prob_need_grad)
gp = get_WGAN_gradient(prob_need_grad, d_out)
gp_loss = self.opts.train.lambdas.advent.WGAN_gp * gp
full_loss += gp_loss
else:
raise NotImplementedError
return full_loss, prob
def painter_loss_for_masker(self, x, m):
# pl4m loss
# painter should not be updated
for param in self.G.painter.parameters():
param.requires_grad = False
# TODO for param in self.D.painter.parameters():
# param.requires_grad = False
fake_flooded = self.G.paint(m, x)
if self.opts.dis.p.use_local_discriminator:
fake_d_global = self.D["p"]["global"](fake_flooded)
fake_d_local = self.D["p"]["local"](fake_flooded * m)
# Note: discriminator returns [out_1,...,out_num_D] outputs
# Each out_i is a list [feat1, feat2, ..., pred_i]
pl4m_loss = self.losses["G"]["p"]["gan"](fake_d_global, True, False)
pl4m_loss += self.losses["G"]["p"]["gan"](fake_d_local, True, False)
else:
real_cat = torch.cat([m, x], axis=1)
fake_cat = torch.cat([m, fake_flooded], axis=1)
real_fake_cat = torch.cat([real_cat, fake_cat], dim=0)
real_fake_d = self.D["p"](real_fake_cat)
_, fake_d = divide_pred(real_fake_d)
pl4m_loss = self.losses["G"]["p"]["gan"](fake_d, True, False)
if "p" in self.opts.tasks:
for param in self.G.painter.parameters():
param.requires_grad = True
return pl4m_loss
@torch.no_grad()
def run_evaluation(self, verbose=0):
print("******************* Running Evaluation ***********************")
start_time = time()
self.eval_mode()
val_logger = None
nb_of_batches = None
for i, multi_batch_tuple in enumerate(self.val_loaders):
# create a dictionnary (domain => batch) from tuple
# (batch_domain_0, ..., batch_domain_i)
# and send it to self.device
nb_of_batches = i + 1
multi_domain_batch = {
batch["domain"][0]: self.batch_to_device(batch)
for batch in multi_batch_tuple
}
self.get_G_loss(multi_domain_batch, verbose)
if val_logger is None:
val_logger = deepcopy(self.logger.losses.generator)
else:
val_logger = sum_dict(val_logger, self.logger.losses.generator)
val_logger = div_dict(val_logger, nb_of_batches)
self.logger.losses.generator = val_logger
self.logger.log_losses(model_to_update="G", mode="val")
for d in self.opts.domains:
self.logger.log_comet_images("train", d)
self.logger.log_comet_images("val", d)
if "m" in self.opts.tasks and self.has_painter and not self.kitti_pretrain:
self.logger.log_comet_combined_images("train", "r")
self.logger.log_comet_combined_images("val", "r")
if self.exp is not None:
print()
if "m" in self.opts.tasks or "s" in self.opts.tasks:
self.eval_images("val", "r")
self.eval_images("val", "s")
if "p" in self.opts.tasks and not self.kitti_pretrain:
val_fid = compute_val_fid(self)
if self.exp is not None:
self.exp.log_metric("val_fid", val_fid, step=self.logger.global_step)
else:
print("Validation FID Score", val_fid)
self.train_mode()
timing = int(time() - start_time)
print("****************** Done in {}s *********************".format(timing))
def eval_images(self, mode, domain):
if domain == "s" and self.kitti_pretrain:
domain = "kitti"
if domain == "rf" or domain not in self.display_images[mode]:
return
metric_funcs = {"accuracy": accuracy, "mIOU": mIOU}
metric_avg_scores = {"m": {}}
if "s" in self.opts.tasks:
metric_avg_scores["s"] = {}
if "d" in self.opts.tasks and domain == "s" and self.opts.gen.d.classify.enable:
metric_avg_scores["d"] = {}
for key in metric_funcs:
for task in metric_avg_scores:
metric_avg_scores[task][key] = []
for im_set in self.display_images[mode][domain]:
x = im_set["data"]["x"].unsqueeze(0).to(self.device)
z = self.G.encode(x)
s_pred = d_pred = z_depth = None
if "d" in metric_avg_scores:
d_pred, z_depth = self.G.decoders["d"](z)
d_pred = d_pred.detach().cpu()
if domain == "s":
d = im_set["data"]["d"].unsqueeze(0).detach()
for metric in metric_funcs:
metric_score = metric_funcs[metric](d_pred, d)
metric_avg_scores["d"][metric].append(metric_score)
if "s" in metric_avg_scores:
if z_depth is None:
if self.opts.gen.s.use_dada and "d" in self.opts.tasks:
_, z_depth = self.G.decoders["d"](z)
s_pred = self.G.decoders["s"](z, z_depth).detach().cpu()
s = im_set["data"]["s"].unsqueeze(0).detach()
for metric in metric_funcs:
metric_score = metric_funcs[metric](s_pred, s)
metric_avg_scores["s"][metric].append(metric_score)
if "m" in self.opts:
cond = None
if s_pred is not None and d_pred is not None:
cond = self.G.make_m_cond(d_pred, s_pred, x)
if z_depth is None:
if self.opts.gen.m.use_dada and "d" in self.opts.tasks:
_, z_depth = self.G.decoders["d"](z)
pred_mask = (
(self.G.mask(z=z, cond=cond, z_depth=z_depth)).detach().cpu()
)
pred_mask = (pred_mask > 0.5).to(torch.float32)
pred_prob = torch.cat([1 - pred_mask, pred_mask], dim=1)
m = im_set["data"]["m"].unsqueeze(0).detach()
for metric in metric_funcs:
if metric != "mIOU":
metric_score = metric_funcs[metric](pred_mask, m)
else:
metric_score = metric_funcs[metric](pred_prob, m)
metric_avg_scores["m"][metric].append(metric_score)
metric_avg_scores = {
task: {
metric: np.mean(values) if values else float("nan")
for metric, values in met_dict.items()
}
for task, met_dict in metric_avg_scores.items()
}
metric_avg_scores = {
task: {
metric: value if not np.isnan(value) else -1
for metric, value in met_dict.items()
}
for task, met_dict in metric_avg_scores.items()
}
if self.exp is not None:
self.exp.log_metrics(
flatten_opts(metric_avg_scores),
prefix=f"metrics_{mode}_{domain}",
step=self.logger.global_step,
)
else:
print(f"metrics_{mode}_{domain}")
print(flatten_opts(metric_avg_scores))
return 0
def functional_test_mode(self):
import atexit
self.opts.output_path = (
Path("~").expanduser() / "climategan" / "functional_tests"
)
Path(self.opts.output_path).mkdir(parents=True, exist_ok=True)
with open(Path(self.opts.output_path) / "is_functional.test", "w") as f:
f.write("trainer functional test - delete this dir")
if self.exp is not None:
self.exp.log_parameter("is_functional_test", True)
atexit.register(self.del_output_path)
def del_output_path(self, force=False):
import shutil
if not Path(self.opts.output_path).exists():
return
if (Path(self.opts.output_path) / "is_functional.test").exists() or force:
shutil.rmtree(self.opts.output_path)
def compute_fire(self, x, seg_preds=None, z=None, z_depth=None):
"""
Transforms input tensor given wildfires event
Args:
x (torch.Tensor): Input tensor
seg_preds (torch.Tensor): Semantic segmentation
predictions for input tensor
z (torch.Tensor): Latent vector of encoded "x".
Can be None if seg_preds is given.
Returns:
torch.Tensor: Wildfire version of input tensor
"""
if seg_preds is None:
if z is None:
z = self.G.encode(x)
seg_preds = self.G.decoders["s"](z, z_depth)
return add_fire(x, seg_preds, self.opts.events.fire)
def compute_flood(
self, x, z=None, z_depth=None, m=None, s=None, cloudy=None, bin_value=-1
):
"""
Applies a flood (mask + paint) to an input image, with optionally
pre-computed masker z or mask
Args:
x (torch.Tensor): B x C x H x W -1:1 input image
z (torch.Tensor, optional): B x C x H x W Masker latent vector.
Defaults to None.
m (torch.Tensor, optional): B x 1 x H x W Mask. Defaults to None.
bin_value (float, optional): Mask binarization value.
Set to -1 to use smooth masks (no binarization)
Returns:
torch.Tensor: B x 3 x H x W -1:1 flooded image
"""
if m is None:
if z is None:
z = self.G.encode(x)
if "d" in self.opts.tasks and self.opts.gen.m.use_dada and z_depth is None:
_, z_depth = self.G.decoders["d"](z)
m = self.G.mask(x=x, z=z, z_depth=z_depth)
if bin_value >= 0:
m = (m > bin_value).to(m.dtype)
if cloudy:
assert s is not None
return self.G.paint_cloudy(m, x, s)
return self.G.paint(m, x)
def compute_smog(self, x, z=None, d=None, s=None, use_sky_seg=False):
# implementation from the paper:
# HazeRD: An outdoor scene dataset and benchmark for single image dehazing
sky_mask = None
if d is None or (use_sky_seg and s is None):
if z is None:
z = self.G.encode(x)
if d is None:
d, _ = self.G.decoders["d"](z)
if use_sky_seg and s is None:
if "s" not in self.opts.tasks:
raise ValueError(
"Cannot have "
+ "(use_sky_seg is True and s is None and 's' not in tasks)"
)
s = self.G.decoders["s"](z)
# TODO: s to sky mask
# TODO: interpolate to d's size
params = self.opts.events.smog
airlight = params.airlight * torch.ones(3)
airlight = airlight.view(1, -1, 1, 1).to(self.device)
irradiance = srgb2lrgb(x)
beta = torch.tensor([params.beta / params.vr] * 3)
beta = beta.view(1, -1, 1, 1).to(self.device)
d = normalize(d, mini=0.3, maxi=1.0)
d = 1.0 / d
d = normalize(d, mini=0.1, maxi=1)
if sky_mask is not None:
d[sky_mask] = 1
d = torch.nn.functional.interpolate(
d, size=x.shape[-2:], mode="bilinear", align_corners=True
)
d = d.repeat(1, 3, 1, 1)
transmission = torch.exp(d * -beta)
smogged = transmission * irradiance + (1 - transmission) * airlight
smogged = lrgb2srgb(smogged)
# add yellow filter
alpha = params.alpha / 255
yellow_mask = torch.Tensor([params.yellow_color]) / 255
yellow_filter = (
yellow_mask.unsqueeze(2)
.unsqueeze(2)
.repeat(1, 1, smogged.shape[-2], smogged.shape[-1])
.to(self.device)
)
smogged = smogged * (1 - alpha) + yellow_filter * alpha
return smogged