litagin's picture
init
70c3683
"""
Style-Bert-VITS2 モデルのハイパーパラメータを表す Pydantic モデル。
デフォルト値は configs/config_jp_extra.json 内の定義と概ね同一で、
万が一ロードした config.json に存在しないキーがあった際のフェイルセーフとして適用される。
"""
from pathlib import Path
from typing import Optional, Union
from pydantic import BaseModel, ConfigDict
class HyperParametersTrain(BaseModel):
log_interval: int = 200
eval_interval: int = 1000
seed: int = 42
epochs: int = 1000
learning_rate: float = 0.0001
betas: tuple[float, float] = (0.8, 0.99)
eps: float = 1e-9
batch_size: int = 2
bf16_run: bool = False
fp16_run: bool = False
lr_decay: float = 0.99996
segment_size: int = 16384
init_lr_ratio: int = 1
warmup_epochs: int = 0
c_mel: int = 45
c_kl: float = 1.0
c_commit: int = 100
skip_optimizer: bool = False
freeze_ZH_bert: bool = False
freeze_JP_bert: bool = False
freeze_EN_bert: bool = False
freeze_emo: bool = False
freeze_style: bool = False
freeze_decoder: bool = False
class HyperParametersData(BaseModel):
use_jp_extra: bool = True
training_files: str = "Data/Dummy/train.list"
validation_files: str = "Data/Dummy/val.list"
max_wav_value: float = 32768.0
sampling_rate: int = 44100
filter_length: int = 2048
hop_length: int = 512
win_length: int = 2048
n_mel_channels: int = 128
mel_fmin: float = 0.0
mel_fmax: Optional[float] = None
add_blank: bool = True
n_speakers: int = 1
cleaned_text: bool = True
spk2id: dict[str, int] = {
"Dummy": 0,
}
num_styles: int = 1
style2id: dict[str, int] = {
"Neutral": 0,
}
class HyperParametersModelSLM(BaseModel):
model: str = "./slm/wavlm-base-plus"
sr: int = 16000
hidden: int = 768
nlayers: int = 13
initial_channel: int = 64
class HyperParametersModel(BaseModel):
use_spk_conditioned_encoder: bool = True
use_noise_scaled_mas: bool = True
use_mel_posterior_encoder: bool = False
use_duration_discriminator: bool = False
use_wavlm_discriminator: bool = True
inter_channels: int = 192
hidden_channels: int = 192
filter_channels: int = 768
n_heads: int = 2
n_layers: int = 6
kernel_size: int = 3
p_dropout: float = 0.1
resblock: str = "1"
resblock_kernel_sizes: list[int] = [3, 7, 11]
resblock_dilation_sizes: list[list[int]] = [
[1, 3, 5],
[1, 3, 5],
[1, 3, 5],
]
upsample_rates: list[int] = [8, 8, 2, 2, 2]
upsample_initial_channel: int = 512
upsample_kernel_sizes: list[int] = [16, 16, 8, 2, 2]
n_layers_q: int = 3
use_spectral_norm: bool = False
gin_channels: int = 512
slm: HyperParametersModelSLM = HyperParametersModelSLM()
class HyperParameters(BaseModel):
model_name: str = "Dummy"
version: str = "2.0-JP-Extra"
train: HyperParametersTrain = HyperParametersTrain()
data: HyperParametersData = HyperParametersData()
model: HyperParametersModel = HyperParametersModel()
# 以下は学習時にのみ動的に設定されるパラメータ (通常 config.json には存在しない)
model_dir: Optional[str] = None
speedup: bool = False
repo_id: Optional[str] = None
# model_ 以下を Pydantic の保護対象から除外する
model_config = ConfigDict(protected_namespaces=())
@staticmethod
def load_from_json(json_path: Union[str, Path]) -> "HyperParameters":
"""
与えられた JSON ファイルからハイパーパラメータを読み込む。
Args:
json_path (Union[str, Path]): JSON ファイルのパス
Returns:
HyperParameters: ハイパーパラメータ
"""
with open(json_path, encoding="utf-8") as f:
return HyperParameters.model_validate_json(f.read())