output_path: /miniscratch/_groups/ccai/trash | |
# README on load_path | |
# 1/ any path which leads to a dir will be loaded as `path / checkpoints / latest_ckpt.pth` | |
# 2/ if you want to specify a specific checkpoint, it MUST be a `.pth` file | |
# 3/ resuming a P OR an M model, you may only specify 1 of `load_path.p` OR `load_path.m`. | |
# You may also leave BOTH at none, in which case `output_path / checkpoints / latest_ckpt.pth` | |
# will be used | |
# 4/ resuming a P+M model, you may specify (`p` AND `m`) OR `pm` OR leave all at none, | |
# in which case `output_path / checkpoints / latest_ckpt.pth` will be used to load from | |
# a single checkpoint | |
load_paths: | |
p: none # Painter weights: none will use `output_path / checkpoints / latest_ckpt.pth` | |
m: none # Masker weights: none will use `output_path / checkpoints / latest_ckpt.pth` | |
pm: none # Painter and Masker weights: none will use `output_path / checkpoints / latest_ckpt.pth` | |
# ------------------- | |
# ----- Tasks ----- | |
# ------------------- | |
tasks: [d, s, m, p] # [p] [m, s, d] | |
# ---------------- | |
# ----- Data ----- | |
# ---------------- | |
data: | |
max_samples: -1 # -1 for all, otherwise set to an int to crop the training data size | |
files: # if one is not none it will override the dirs location | |
base: /miniscratch/_groups/ccai/data/jsons | |
train: | |
r: train_r_full.json | |
s: train_s_fixedholes.json | |
rf: train_rf.json | |
kitti: train_kitti.json | |
val: | |
r: val_r_full.json | |
s: val_s_fixedholes.json | |
rf: val_rf_labelbox.json | |
kitti: val_kitti.json | |
check_samples: False | |
loaders: | |
batch_size: 6 | |
num_workers: 6 | |
normalization: default # can be "default" or "HRNet" for now. # default: mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]; HRNet: mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] | |
transforms: | |
- name: hflip | |
ignore: val | |
p: 0.5 | |
- name: resize | |
ignore: false | |
new_size: 640 | |
keep_aspect_ratio: true # smallest dimension will be `new_size` and the other will be computed to keep aspect ratio | |
- name: crop | |
ignore: false | |
center: val # disable randomness, crop around the image's center | |
height: 600 | |
width: 600 | |
- name: brightness | |
ignore: val | |
- name: saturation | |
ignore: val | |
- name: contrast | |
ignore: val | |
- name: resize | |
ignore: false | |
new_size: | |
default: 640 | |
d: 160 | |
s: 160 | |
# --------------------- | |
# ----- Generator ----- | |
# --------------------- | |
gen: | |
opt: | |
optimizer: ExtraAdam # one in [Adam, ExtraAdam] default: Adam | |
beta1: 0.9 | |
lr: | |
default: 0.00005 # 0.00001 for dlv2, 0.00005 for dlv3 | |
lr_policy: step | |
# lr_policy can be constant, step or multi_step; if step, specify lr_step_size and lr_gamma | |
# if multi_step specify lr_step_size lr_gamma and lr_milestones: | |
# if lr_milestones is a list: | |
# the learning rate will be multiplied by gamma each time the epoch reaches an | |
# item in the list (no need for lr_step_size). | |
# if lr_milestones is an int: | |
# a list of milestones is created from `range(lr_milestones, train.epochs, lr_step_size)` | |
lr_step_size: 5 # for linear decay : period of learning rate decay (epochs) | |
lr_milestones: 15 | |
lr_gamma: 0.5 # Multiplicative factor of learning rate decay | |
default: | |
&default-gen # default parameters for the generator (encoder and decoders) | |
activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] | |
init_gain: 0.02 | |
init_type: xavier | |
n_res: 1 # number of residual blocks before upsampling | |
n_downsample: &n_downsample 3 # number of downsampling layers in encoder | dim 32 + down 3 => z = 256 x 32 x 32 | |
n_upsample: *n_downsample # upsampling in spade decoder ; should match encoder.n_downsample | |
pad_type: reflect # padding type [zero/reflect] | |
norm: spectral # ResBlock normalization ; one of {"batch", "instance", "layer", "adain", "spectral", "none"} | |
proj_dim: 32 # Dim of projection from latent space | |
encoder: # specific params for the encoder | |
<<: *default-gen | |
dim: 32 | |
architecture: deeplabv3 # [deeplabv2/v3 resnet -> res_dim=2048) | dlv3 mobilenet -> res_dim=320 | |
input_dim: 3 # input number of channels | |
n_res: 0 # number of residual blocks in content encoder/decoder | |
norm: spectral # ConvBlock normalization ; one of {"batch", "instance", "layer", "adain", "spectral", "none"} | |
#! Don't change!!! | |
deeplabv2: | |
nblocks: [3, 4, 23, 3] | |
use_pretrained: True | |
pretrained_model: "/miniscratch/_groups/ccai/data/pretrained_models/deeplabv2/DeepLab_resnet_pretrained_imagenet.pth" | |
deeplabv3: | |
backbone: resnet # resnet or mobilenet | |
output_stride: 8 # 8 or 16 | |
use_pretrained: true | |
pretrained_model: | |
mobilenet: "/miniscratch/_groups/ccai/data/pretrained_models/deeplabv3/deeplabv3_plus_mobilenetv2_segmentron.pth" | |
resnet: "/miniscratch/_groups/ccai/data/pretrained_models/deeplabv3/model_CoinCheungDeepLab-v3-plus.pth" | |
d: # specific params for the depth estimation decoder | |
<<: *default-gen | |
output_dim: 1 | |
norm: batch | |
loss: sigm # dada or sigm | /!\ ignored if classify.enable | |
upsample_featuremaps: True # upsamples from 80x80 to 160x160 intermediate feature maps | |
architecture: dada # dada or base | must be base for classif | |
classify: # classify log-depth instead of regression | |
enable: False | |
linspace: | |
min: 0.35 | |
max: 6.95 | |
buckets: 256 | |
s: # specific params for the semantic segmentation decoder | |
<<: *default-gen | |
num_classes: 11 | |
output_dim: 11 | |
use_advent: True | |
use_minent: True | |
architecture: deeplabv3 | |
upsample_featuremaps: False # upsamples from 80x80 to 160x160 intermediate feature maps | |
use_dada: True | |
p: # specific params for the SPADE painter | |
<<: *default-gen | |
latent_dim: 640 | |
loss: gan # gan or hinge | |
no_z: true # <=> use_vae=False in the SPADE repo | |
output_dim: 3 # output dimension | |
pad_type: reflect # padding type [zero/reflect] | |
paste_original_content: True # only select the water painted to backprop through the network, not the whole generated image: fake_flooded = masked_x + m * fake_flooded | |
pl4m_epoch: 49 # epoch from which we introduce a new loss to the masker: the painter's discriminator's loss | |
spade_kernel_size: 3 # kernel size within SPADE norm layers | |
spade_n_up: 7 # number of upsampling layers in the translation decoder is equal to number of downsamplings in the encoder. output's h and w are z's h and w x 2^spade_num_upsampling_layers | z:32 and spade_n_up:4 => output 512 | |
spade_param_free_norm: instance # what param-free normalization to apply in SPADE normalization | |
spade_use_spectral_norm: true | |
use_final_shortcut: False # if true, the last spade block does not get the masked input as conditioning but the prediction of the previous layer (passed through a conv to match dims) in order to lighten the masking restrictions and have smoother edges | |
diff_aug: | |
use: False | |
do_color_jittering: false | |
do_cutout: false | |
cutout_ratio: 0.5 | |
do_translation: false | |
translation_ratio: 0.125 | |
m: # specific params for the mask-generation decoder | |
<<: *default-gen | |
use_spade: False | |
output_dim: 1 | |
use_minent: True # directly minimize the entropy of the image | |
use_minent_var: True # add variance of entropy map in the measure of entropy for a certain picture | |
use_advent: True # minimize the entropy of the image by adversarial training | |
use_ground_intersection: True | |
use_proj: True | |
proj_dim: 64 | |
use_pl4m: False | |
n_res: 3 | |
use_low_level_feats: True | |
use_dada: False | |
spade: | |
latent_dim: 128 | |
detach: false # detach s_pred and d_pred conditioning tensors | |
cond_nc: 15 # 12 without x, 15 with x | |
spade_use_spectral_norm: True | |
spade_param_free_norm: batch | |
num_layers: 3 | |
activations: | |
all_lrelu: True | |
# ------------------------- | |
# ----- Discriminator ----- | |
# ------------------------- | |
dis: | |
soft_shift: 0.2 # label smoothing: real in U(1-soft_shift, 1), fake in U(0, soft_shift) # ! one-sided label smoothing | |
flip_prob: 0.05 # label flipping | |
opt: | |
optimizer: ExtraAdam # one in [Adam, ExtraAdam] default: Adam | |
beta1: 0.5 | |
lr: | |
default: 0.00002 # 0.0001 for dlv2, 0.00002 for dlv3 | |
lr_policy: step | |
# lr_policy can be constant, step or multi_step; if step, specify lr_step_size and lr_gamma | |
# if multi_step specify lr_step_size lr_gamma and lr_milestones: | |
# if lr_milestones is a list: | |
# the learning rate will be multiplied by gamma each time the epoch reaches an | |
# item in the list (no need for lr_step_size). | |
# if lr_milestones is an int: | |
# a list of milestones is created from `range(lr_milestones, train.epochs, lr_step_size)` | |
lr_step_size: 15 # for linear decay : period of learning rate decay (epochs) | |
lr_milestones: 5 | |
lr_gamma: 0.5 # Multiplicative factor of learning rate decay | |
default: | |
&default-dis # default setting for discriminators (there are 4 of them for rn rf sn sf) | |
input_nc: 3 | |
ndf: 64 | |
n_layers: 4 | |
norm: instance | |
init_type: xavier | |
init_gain: 0.02 | |
use_sigmoid: false | |
num_D: 1 #Number of discriminators to use (>1 means multi-scale) | |
get_intermediate_features: false | |
p: | |
<<: *default-dis | |
num_D: 3 | |
get_intermediate_features: true | |
use_local_discriminator: false | |
# ttur: false # two time-scale update rule (see SPADE repo) | |
m: | |
<<: *default-dis | |
multi_level: false | |
architecture: base # can be [base | OmniDiscriminator] | |
gan_type: WGAN_norm # can be [GAN | WGAN | WGAN_gp | WGAN_norm] | |
wgan_clamp_lower: -0.01 # used in WGAN, WGAN clap the params in dis to [wgan_clamp_lower, wgan_clamp_upper] for every update | |
wgan_clamp_upper: 0.01 # used in WGAN | |
s: | |
<<: *default-dis | |
gan_type: WGAN_norm # can be [GAN | WGAN | WGAN_gp | WGAN_norm] | |
wgan_clamp_lower: -0.01 # used in WGAN, WGAN clap the params in dis to [wgan_clamp_lower, wgan_clamp_upper] for every update | |
wgan_clamp_upper: 0.01 # used in WGAN | |
# ------------------------------- | |
# ----- Domain Classifier ----- | |
# ------------------------------- | |
classifier: | |
opt: | |
optimizer: ExtraAdam # one in [Adam, ExtraAdam] default: Adam | |
beta1: 0.5 | |
lr: | |
default: 0.0005 | |
lr_policy: step # constant or step ; if step, specify step_size and gamma | |
lr_step_size: 30 # for linear decay | |
lr_gamma: 0.5 | |
loss: l2 #Loss can be l1, l2, cross_entropy. default cross_entropy | |
layers: [100, 100, 20, 20, 4] # number of units per hidden layer ; las number is output_dim | |
dropout: 0.4 # probability of being set to 0 | |
init_type: kaiming | |
init_gain: 0.2 | |
proj_dim: 128 #Dim of projection from latent space | |
# ------------------------ | |
# ----- Train Params ----- | |
# ------------------------ | |
train: | |
kitti: | |
pretrain: False | |
epochs: 10 | |
batch_size: 6 | |
amp: False | |
pseudo: | |
tasks: [] # list of tasks for which to use pseudo labels (empty list to disable) | |
epochs: 10 # disable pseudo training after n epochs (set to -1 to never disable) | |
epochs: 300 | |
fid: | |
n_images: 57 # val_rf.json has 57 images | |
batch_size: 50 # inception inference batch size, not painter's | |
dims: 2048 # what Inception bock to compute the stats from (see BLOCK_INDEX_BY_DIM in fid.py) | |
latent_domain_adaptation: False # whether or not to do domain adaptation on the latent vectors # Needs to be turned off if use_advent is True | |
lambdas: # scaling factors in the total loss | |
G: | |
d: | |
main: 1 | |
gml: 0.5 | |
s: | |
crossent: 1 | |
crossent_pseudo: 0.001 | |
minent: 0.001 | |
advent: 0.001 | |
m: | |
bce: 1 # Main prediction loss, i.e. GAN or BCE | |
tv: 1 # Total variational loss (for smoothing) | |
gi: 0.05 | |
pl4m: 1 # painter loss for the masker (end-to-end) | |
p: | |
context: 0 | |
dm: 1 # depth matching | |
featmatch: 10 | |
gan: 1 # gan loss | |
reconstruction: 0 | |
tv: 0 | |
vgg: 10 | |
classifier: 1 | |
C: 1 | |
advent: | |
ent_main: 0.5 # the coefficient of the MinEnt loss that directly minimize the entropy of the image | |
ent_aux: 0.0 # the corresponding coefficient of the MinEnt loss of second output | |
ent_var: 0.1 # the proportion of variance of entropy map in the entropy measure for a certain picture | |
adv_main: 1.0 # the coefficient of the AdvEnt loss that minimize the entropy of the image by adversarial training | |
adv_aux: 0.0 # the corresponding coefficient of the AdvEnt loss of second output | |
dis_main: 1.0 # the discriminator take care of the first output in the adversarial training | |
dis_aux: 0.0 # the discriminator take care of the second output in the adversarial training | |
WGAN_gp: 10 # used in WGAN_gp, it's the hyperparameters for the gradient penalty | |
log_level: 2 # 0: no log, 1: only aggregated losses, >1 detailed losses | |
save_n_epochs: 25 # Save `latest_ckpt.pth` every epoch, `epoch_{epoch}_ckpt.pth` model every n epochs if epoch >= min_save_epoch | |
min_save_epoch: 28 # Save extra intermediate checkpoints when epoch > min_save_epoch | |
resume: false # Load latest_ckpt.pth checkpoint from `output_path` #TODO Make this path of checkpoint to load | |
auto_resume: true # automatically looks for similar output paths and exact same jobID to resume training automatically even if resume is false. | |
# ----------------------------- | |
# ----- Validation Params ----- | |
# ----------------------------- | |
val: | |
store_images: false # write to disk on top of comet logging | |
val_painter: /miniscratch/_groups/ccai/checkpoints/painter/victor/good_large_lr/checkpoints/latest_ckpt.pth | |
# ----------------------------- | |
# ----- Comet Params ---------- | |
# ----------------------------- | |
comet: | |
display_size: 20 | |
rows_per_log: 5 # number of samples (rows) in a logged grid image. Number of total logged images: display_size // rows_per_log | |
im_per_row: # how many columns (3 = x, target, pred) | |
p: 4 | |
m: 6 | |
s: 4 | |
d: 4 | |