Spaces:
Sleeping
Sleeping
Add variable audio lengths
#3
by
ylacombe
- opened
app.py
CHANGED
@@ -30,7 +30,7 @@ model = ParlerTTSForConditionalGeneration.from_pretrained(
|
|
30 |
client = InferenceClient()
|
31 |
|
32 |
description_tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
33 |
-
prompt_tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
34 |
feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
|
35 |
|
36 |
SAMPLE_RATE = feature_extractor.sampling_rate
|
@@ -78,7 +78,7 @@ def generate_story(subject: str, setting: str) -> str:
|
|
78 |
return None, None, story
|
79 |
|
80 |
|
81 |
-
@spaces.GPU
|
82 |
def generate_base(story):
|
83 |
|
84 |
|
@@ -95,8 +95,10 @@ def generate_base(story):
|
|
95 |
speech_output = model.generate(input_ids=description_tokens.input_ids,
|
96 |
prompt_input_ids=story_tokens.input_ids,
|
97 |
attention_mask=description_tokens.attention_mask,
|
98 |
-
prompt_attention_mask=story_tokens.attention_mask
|
99 |
-
|
|
|
|
|
100 |
return None, None, speech_output
|
101 |
|
102 |
|
|
|
30 |
client = InferenceClient()
|
31 |
|
32 |
description_tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
33 |
+
prompt_tokenizer = AutoTokenizer.from_pretrained(repo_id, padding_side="left")
|
34 |
feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
|
35 |
|
36 |
SAMPLE_RATE = feature_extractor.sampling_rate
|
|
|
78 |
return None, None, story
|
79 |
|
80 |
|
81 |
+
@spaces.GPU(duration=120)
|
82 |
def generate_base(story):
|
83 |
|
84 |
|
|
|
95 |
speech_output = model.generate(input_ids=description_tokens.input_ids,
|
96 |
prompt_input_ids=story_tokens.input_ids,
|
97 |
attention_mask=description_tokens.attention_mask,
|
98 |
+
prompt_attention_mask=story_tokens.attention_mask,
|
99 |
+
return_dict_in_generate=True,
|
100 |
+
)
|
101 |
+
speech_output = [output.cpu().numpy()[:output_length] for (output, output_length) in zip(speech_output.sequences, speech_output.audios_length)]
|
102 |
return None, None, speech_output
|
103 |
|
104 |
|