Spaces:
Runtime error
Runtime error
File size: 1,957 Bytes
f34acd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import torch
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import soundfile as sf
class TTSModel:
def __init__(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model_name = "ai4bharat/indic-parler-tts"
# Print cache directory and model files
print(f"Loading model on device: {self.device}")
# Initialize model and tokenizers exactly as in the documentation
self.model = ParlerTTSForConditionalGeneration.from_pretrained(self.model_name).to(self.device)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
self.description_tokenizer = AutoTokenizer.from_pretrained(self.model.config.text_encoder._name_or_path)
print("Model loaded successfully")
def generate_audio(self, text, description):
try:
# Tokenize exactly as shown in the documentation
description_inputs = self.description_tokenizer(
description,
return_tensors="pt"
).to(self.device)
prompt_inputs = self.tokenizer(
text,
return_tensors="pt"
).to(self.device)
# Generate audio
with torch.no_grad():
generation = self.model.generate(
input_ids=description_inputs.input_ids,
attention_mask=description_inputs.attention_mask,
prompt_input_ids=prompt_inputs.input_ids,
prompt_attention_mask=prompt_inputs.attention_mask
)
# Convert to numpy array
audio_array = generation.cpu().numpy().squeeze()
return audio_array
except Exception as e:
print(f"Error in speech generation: {str(e)}")
raise
|