Spaces:
Build error
Build error
from pathlib import Path | |
import time | |
import os | |
from contextlib import contextmanager | |
import random | |
import numpy as np | |
import audiotools as at | |
from audiotools import AudioSignal | |
import argbind | |
import shutil | |
import torch | |
import yaml | |
from vampnet.interface import Interface, signal_concat | |
from vampnet import mask as pmask | |
from ttutil import log | |
# TODO: incorporate discord bot (if mem allows) | |
# in a separate thread, send audio samples for listening | |
# and send back the results | |
# as well as the params for sampling | |
# also a command that lets you clear the current signal | |
# if you want to start over | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
VAMPNET_DIR = Path(".").resolve() | |
def chdir(path): | |
old_dir = os.getcwd() | |
os.chdir(path) | |
try: | |
yield | |
finally: | |
os.chdir(old_dir) | |
def load_interface(model_choice="default") -> Interface: | |
with chdir(VAMPNET_DIR): | |
# populate the model choices with any interface.yml files in the generated confs | |
MODEL_CHOICES = { | |
"default": { | |
"Interface.coarse_ckpt": "models/vampnet/coarse.pth", | |
"Interface.coarse2fine_ckpt": "models/vampnet/c2f.pth", | |
"Interface.codec_ckpt": "models/vampnet/codec.pth", | |
} | |
} | |
generated_confs = Path("conf/generated") | |
for conf_file in generated_confs.glob("*/interface.yml"): | |
with open(conf_file) as f: | |
_conf = yaml.safe_load(f) | |
# check if the coarse, c2f, and codec ckpts exist | |
# otherwise, dont' add this model choice | |
if not ( | |
Path(_conf["Interface.coarse_ckpt"]).exists() and | |
Path(_conf["Interface.coarse2fine_ckpt"]).exists() and | |
Path(_conf["Interface.codec_ckpt"]).exists() | |
): | |
continue | |
MODEL_CHOICES[conf_file.parent.name] = _conf | |
interface = Interface( | |
device=device, | |
coarse_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse_ckpt"], | |
coarse2fine_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse2fine_ckpt"], | |
codec_ckpt=MODEL_CHOICES[model_choice]["Interface.codec_ckpt"], | |
) | |
interface.model_choices = MODEL_CHOICES | |
interface.to("cuda" if torch.cuda.is_available() else "cpu") | |
return interface | |
def load_model(interface: Interface, model_choice: str): | |
interface.reload( | |
interface.model_choices[model_choice]["Interface.coarse_ckpt"], | |
interface.model_choices[model_choice]["Interface.coarse2fine_ckpt"], | |
) | |
def ez_variation( | |
interface, | |
sig: AudioSignal, | |
seed: int = None, | |
model_choice: str = None, | |
): | |
t0 = time.time() | |
if seed is None: | |
seed = int(torch.randint(0, 2**32, (1,)).item()) | |
at.util.seed(seed) | |
# reload the model if necessary | |
if model_choice is not None: | |
load_model(interface, model_choice) | |
# SAMPLING MASK PARAMS, hard code for now, we'll prob want a more preset-ey thing for the actual thin | |
# we probably honestly just want to oscillate between the same 4 presets | |
# in a predictable order such that they have a predictable outcome | |
periodic_p = random.choice([3]) | |
n_mask_codebooks = 3 | |
sampletemp = random.choice([1.0,]) | |
dropout = random.choice([0.0, 0.0]) | |
top_p = None # NOTE: top p may be the culprit behind the collapse into single pitches. | |
# parameters for the build_mask function | |
build_mask_kwargs = dict( | |
rand_mask_intensity=1.0, | |
prefix_s=0.0, | |
suffix_s=0.0, | |
periodic_prompt=int(periodic_p), | |
periodic_prompt2=int(periodic_p), | |
periodic_prompt_width=1, | |
_dropout=dropout, | |
upper_codebook_mask=int(n_mask_codebooks), | |
upper_codebook_mask_2=int(n_mask_codebooks), | |
) | |
# parameters for the vamp function | |
vamp_kwargs = dict( | |
temperature=sampletemp, | |
typical_filtering=True, | |
typical_mass=0.15, | |
typical_min_tokens=64, | |
top_p=top_p, | |
seed=seed, | |
sample_cutoff=1.0, | |
) | |
# save the mask as a txt file | |
interface.set_chunk_size(10.0) | |
sig, mask, codes = interface.vamp( | |
sig, | |
batch_size=1, | |
feedback_steps=1, | |
time_stretch_factor=1, | |
build_mask_kwargs=build_mask_kwargs, | |
vamp_kwargs=vamp_kwargs, | |
return_mask=True, | |
) | |
log(f"vamp took {time.time() - t0} seconds") | |
return sig | |
def main(): | |
import tqdm | |
interface = load_interface() | |
sig = AudioSignal.excerpt("assets/example.wav", duration=7.0) | |
sig = interface.preprocess(sig) | |
sig.write('ttout/in.wav') | |
insig = sig.clone() | |
fdbk_every = 4 | |
fdbk = 0.5 | |
for i in tqdm.tqdm(range(1000)): | |
sig = ez_variation(interface, sig, model_choice="orchestral") | |
sig.write(f'ttout/out{i}.wav') | |
if __name__ == "__main__": | |
main() |