Respair
/

stts_test_ckpt

Model card Files Files and versions Metrics Training metrics Community

stts_test_ckpt / StyleTTS_Accelerate /Models /Anispeech_with_DIFF /config.yml

Respair's picture

Upload folder using huggingface_hub

9d7032c verified about 1 month ago

2.44 kB

	log_dir: "Models/Anispeech_with_DIFF"
	first_stage_path: "/home/ubuntu/StyleTTS_Accelerate_44khz/Models/Anispeech/epoch_1st_00020.pth"
	save_freq: 1
	log_interval: 10
	device: "cuda"
	multigpu: false
	epochs_1st: 200 # number of epochs for first stage training
	epochs_2nd: 100 # number of peochs for second stage training
	batch_size: 32
	pretrained_model: ""
	second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
	load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters

	diff_epoch: 5

	train_data: "/home/ubuntu/StyleTTS_Accelerate_44khz/Data/ani_train_only_longs.csv"
	val_data: "/home/ubuntu/StyleTTS_Accelerate_44khz/Data/val_list_libritts.txt"

	F0_path: "Utils/JDC/bst.t7"
	ASR_config: "Utils/ASR/config.yml"
	ASR_path: "Utils/ASR/epoch_00080.pth"

	preprocess_params:
	sr: 24000
	spect_params:
	n_fft: 2048
	win_length: 1200
	hop_length: 300

	model_params:
	hidden_dim: 512
	n_token: 178
	style_dim: 128
	n_layer: 3
	dim_in: 64
	max_conv_dim: 512
	n_mels: 80
	dropout: 0.2


	diffusion:
	embedding_mask_proba: 0.1
	# transformer config
	transformer:
	num_layers: 3
	num_heads: 8
	head_features: 64
	multiplier: 2

	# diffusion distribution config
	dist:
	sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
	estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
	mean: -3.0
	std: 1.0


	loss_params:
	lambda_mel: 10. # mel reconstruction loss (1st & 2nd stage)
	lambda_adv: 1. # adversarial loss (1st & 2nd stage)
	lambda_reg: 1. # adversarial regularization loss (1st & 2nd stage)
	lambda_fm: 0.1 # feature matching loss (1st & 2nd stage)

	lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
	lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
	TMA_epoch: 2 # TMA starting epoch (1st stage)

	# https://github.com/yl4579/StyleTTS/issues/7
	TMA_CEloss: false # whether to use cross-entropy (CE) loss for TMA

	lambda_F0: 1. # F0 reconstruction loss (2nd stage)
	lambda_norm: 1. # norm reconstruction loss (2nd stage)
	lambda_dur: 1. # duration loss (2nd stage)
	lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)

	lambda_sty: 1. # style reconstruction loss (2nd stage)
	lambda_diff: 1. # score matching loss (2nd stage)

	optimizer_params:
	lr: 0.0001