Spaces:
Runtime error
Runtime error
File size: 5,048 Bytes
f34acd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
LANGUAGE_VOICE_MAPPING = {
"Assamese": ["Amit", "Sita"],
"Bengali": ["Arjun", "Aditi"],
"Bodo": ["Bikram", "Maya"],
"Chhattisgarhi": ["Bhanu", "Champa"],
"Dogri": ["Karan"],
"English": ["Thoma", "Mary"],
"Gujarati": ["Yash", "Neha"],
"Hindi": ["Rohit", "Divya"],
"Kannada": ["Suresh", "Anu"],
"Malayalam": ["Anjali", "Harish"],
"Manipuri": ["Laishram", "Ranjit"],
"Marathi": ["Sanjay", "Sunita"],
"Nepali": ["Amrita"],
"Odia": ["Manas", "Debjani"],
"Punjabi": ["Divjot", "Gurpreet"],
"Sanskrit": ["Aryan"],
"Tamil": ["Jaya", "Kavitha"],
"Telugu": ["Prakash", "Lalitha"]
}
# Voice characteristics for each speaker
VOICE_CHARACTERISTICS = {
"Amit": "slightly deep and resonant",
"Sita": "clear and well-paced",
"Arjun": "moderate and clear",
"Aditi": "high-pitched and expressive",
"Bikram": "higher-pitched and energetic",
"Maya": "balanced and pleasant",
"Bhanu": "warm and measured",
"Champa": "clear and gentle",
"Karan": "high-pitched and engaging",
"Thoma": "clear and well-articulated",
"Mary": "pleasant and measured",
"Yash": "warm and balanced",
"Neha": "clear and dynamic",
"Rohit": "moderate and expressive",
"Divya": "pleasant and well-paced",
"Suresh": "clear and precise",
"Anu": "warm and melodious",
"Anjali": "high-pitched and pleasant",
"Harish": "deep and measured",
"Laishram": "balanced and smooth",
"Ranjit": "clear and authoritative",
"Sanjay": "deep and authoritative",
"Sunita": "high-pitched and pleasant",
"Amrita": "high-pitched and gentle",
"Manas": "moderate and measured",
"Debjani": "clear and pleasant",
"Divjot": "clear and dynamic",
"Gurpreet": "warm and balanced",
"Aryan": "resonant and measured",
"Jaya": "high-pitched and melodious",
"Kavitha": "clear and expressive",
"Prakash": "clear and well-paced",
"Lalitha": "pleasant and melodious"
}
# Emotion descriptions
EMOTION_DESC = {
"Neutral": "maintaining a balanced and natural tone",
"Happy": "with a warm and positive energy",
"Sad": "with a gentle and somber tone",
"Angry": "with intense and strong delivery",
"Highly Expressive": "with dynamic and vibrant emotional delivery",
"Monotone": "with minimal tonal variation"
}
# Speed descriptions
SPEED_DESC = {
"Very Slow": "at an extremely measured pace",
"Slow": "at a measured, deliberate pace",
"Normal": "at a natural, comfortable pace",
"Fast": "at a swift, dynamic pace",
"Very Fast": "at a rapid, accelerated pace"
}
# Pitch modifiers
PITCH_DESC = {
"Very Low": "in an extremely deep register",
"Low": "in a deeper register",
"Medium": "in a natural pitch range",
"High": "in a higher register",
"Very High": "in an extremely high register"
}
BACKGROUND_NOISE_DESC = {
"None": "with absolutely no background noise",
"Minimal": "with minimal background noise",
"Moderate": "with moderate ambient noise",
"Noticeable": "with noticeable background sounds"
}
REVERBERATION_DESC = {
"Very Close": "in an extremely intimate setting",
"Close": "in a close-sounding environment",
"Moderate": "in a moderately spacious environment",
"Distant": "in a spacious, reverberant setting",
"Very Distant": "in a very large, echoing space"
}
QUALITY_DESC = {
"Basic": "in basic audio quality",
"Good": "in good audio quality",
"High": "in high audio quality",
"Studio": "in professional studio quality"
}
def construct_description(
speaker,
language,
emotion="Neutral",
speed="Normal",
pitch="Medium",
background_noise="Minimal",
reverberation="Close",
quality="High"
):
"""
Constructs a comprehensive description for the TTS model based on all available parameters.
Args:
speaker (str): The name of the speaker
language (str): The language being spoken
emotion (str): The emotional tone
speed (str): The speaking speed
pitch (str): The pitch level
background_noise (str): Level of background noise
reverberation (str): Distance/space effect
quality (str): Audio quality level
Returns:
str: A detailed description for the TTS model
"""
description = (
f"{speaker} speaks in {language} {VOICE_CHARACTERISTICS.get(speaker, 'with clear articulation')} "
f"{PITCH_DESC[pitch]}, {EMOTION_DESC[emotion]} {SPEED_DESC[speed]}. "
f"The recording is {REVERBERATION_DESC[reverberation]}, {BACKGROUND_NOISE_DESC[background_noise]}, "
f"captured {QUALITY_DESC[quality]}."
)
return description
def get_speakers_for_language(language):
"""
Get the list of recommended speakers for a given language.
Args:
language (str): The language to get speakers for
Returns:
list: List of recommended speakers for the language
"""
return LANGUAGE_VOICE_MAPPING.get(language, []) |