Dionyssos's picture
distortion on barging DEBUG
e70ad00
raw
history blame
1.64 kB
import audiofile
import numpy as np
import torch
from audiocraft.loaders import load_compression_model, load_lm_model
from audiocraft.conditioners import ConditioningAttributes
class AudioGen():
def __init__(self,
compression_model=None,
lm=None,
duration=.74):
self.compression_model = compression_model
self.lm = lm
self.duration = duration
@property
def frame_rate(self):
return self.compression_model.frame_rate
def generate(self,
descriptions):
with torch.no_grad():
attributes = [
ConditioningAttributes(text={'description': d}) for d in descriptions]
gen_tokens = self.lm.generate(
conditions=attributes,
max_gen_len=int(self.duration * self.frame_rate)) #[n_draw, 4, 37]
x = self.compression_model.decode(gen_tokens, None) #[n_draw, 1, 11840]
n_draw, _, n_time_samples = x.shape
x = x.reshape(1, n_draw * n_time_samples) # linearise n_draw
return x
device = 'cuda:0'
# https://huggingface.co/facebook/audiogen-medium
sound_generator = AudioGen(
compression_model=load_compression_model('facebook/audiogen-medium', device=device).eval(),
lm=load_lm_model('facebook/audiogen-medium', device=device).to(torch.float).eval(),
duration=.74)
print('\n\n\n\n___________________')
txt = 'dogs barging in the street'
x = sound_generator.generate([txt])[0].detach().cpu().numpy()
x /= np.abs(x).max() + 1e-7
audiofile.write('del_seane.wav', x, 16000)