Spaces:
Sleeping
Sleeping
File size: 3,965 Bytes
70c3683 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
"""
Style-Bert-VITS2 モデルのハイパーパラメータを表す Pydantic モデル。
デフォルト値は configs/config_jp_extra.json 内の定義と概ね同一で、
万が一ロードした config.json に存在しないキーがあった際のフェイルセーフとして適用される。
"""
from pathlib import Path
from typing import Optional, Union
from pydantic import BaseModel, ConfigDict
class HyperParametersTrain(BaseModel):
log_interval: int = 200
eval_interval: int = 1000
seed: int = 42
epochs: int = 1000
learning_rate: float = 0.0001
betas: tuple[float, float] = (0.8, 0.99)
eps: float = 1e-9
batch_size: int = 2
bf16_run: bool = False
fp16_run: bool = False
lr_decay: float = 0.99996
segment_size: int = 16384
init_lr_ratio: int = 1
warmup_epochs: int = 0
c_mel: int = 45
c_kl: float = 1.0
c_commit: int = 100
skip_optimizer: bool = False
freeze_ZH_bert: bool = False
freeze_JP_bert: bool = False
freeze_EN_bert: bool = False
freeze_emo: bool = False
freeze_style: bool = False
freeze_decoder: bool = False
class HyperParametersData(BaseModel):
use_jp_extra: bool = True
training_files: str = "Data/Dummy/train.list"
validation_files: str = "Data/Dummy/val.list"
max_wav_value: float = 32768.0
sampling_rate: int = 44100
filter_length: int = 2048
hop_length: int = 512
win_length: int = 2048
n_mel_channels: int = 128
mel_fmin: float = 0.0
mel_fmax: Optional[float] = None
add_blank: bool = True
n_speakers: int = 1
cleaned_text: bool = True
spk2id: dict[str, int] = {
"Dummy": 0,
}
num_styles: int = 1
style2id: dict[str, int] = {
"Neutral": 0,
}
class HyperParametersModelSLM(BaseModel):
model: str = "./slm/wavlm-base-plus"
sr: int = 16000
hidden: int = 768
nlayers: int = 13
initial_channel: int = 64
class HyperParametersModel(BaseModel):
use_spk_conditioned_encoder: bool = True
use_noise_scaled_mas: bool = True
use_mel_posterior_encoder: bool = False
use_duration_discriminator: bool = False
use_wavlm_discriminator: bool = True
inter_channels: int = 192
hidden_channels: int = 192
filter_channels: int = 768
n_heads: int = 2
n_layers: int = 6
kernel_size: int = 3
p_dropout: float = 0.1
resblock: str = "1"
resblock_kernel_sizes: list[int] = [3, 7, 11]
resblock_dilation_sizes: list[list[int]] = [
[1, 3, 5],
[1, 3, 5],
[1, 3, 5],
]
upsample_rates: list[int] = [8, 8, 2, 2, 2]
upsample_initial_channel: int = 512
upsample_kernel_sizes: list[int] = [16, 16, 8, 2, 2]
n_layers_q: int = 3
use_spectral_norm: bool = False
gin_channels: int = 512
slm: HyperParametersModelSLM = HyperParametersModelSLM()
class HyperParameters(BaseModel):
model_name: str = "Dummy"
version: str = "2.0-JP-Extra"
train: HyperParametersTrain = HyperParametersTrain()
data: HyperParametersData = HyperParametersData()
model: HyperParametersModel = HyperParametersModel()
# 以下は学習時にのみ動的に設定されるパラメータ (通常 config.json には存在しない)
model_dir: Optional[str] = None
speedup: bool = False
repo_id: Optional[str] = None
# model_ 以下を Pydantic の保護対象から除外する
model_config = ConfigDict(protected_namespaces=())
@staticmethod
def load_from_json(json_path: Union[str, Path]) -> "HyperParameters":
"""
与えられた JSON ファイルからハイパーパラメータを読み込む。
Args:
json_path (Union[str, Path]): JSON ファイルのパス
Returns:
HyperParameters: ハイパーパラメータ
"""
with open(json_path, encoding="utf-8") as f:
return HyperParameters.model_validate_json(f.read())
|