Spaces:

abhishekrajpurohit
/

generate_local_lan

Runtime error

File size: 5,048 Bytes

195bb33

LANGUAGE_VOICE_MAPPING = {
    "Assamese": ["Amit", "Sita"],
    "Bengali": ["Arjun", "Aditi"],
    "Bodo": ["Bikram", "Maya"],
    "Chhattisgarhi": ["Bhanu", "Champa"],
    "Dogri": ["Karan"],
    "English": ["Thoma", "Mary"],
    "Gujarati": ["Yash", "Neha"],
    "Hindi": ["Rohit", "Divya"],
    "Kannada": ["Suresh", "Anu"],
    "Malayalam": ["Anjali", "Harish"],
    "Manipuri": ["Laishram", "Ranjit"],
    "Marathi": ["Sanjay", "Sunita"],
    "Nepali": ["Amrita"],
    "Odia": ["Manas", "Debjani"],
    "Punjabi": ["Divjot", "Gurpreet"],
    "Sanskrit": ["Aryan"],
    "Tamil": ["Jaya", "Kavitha"],
    "Telugu": ["Prakash", "Lalitha"]
}

# Voice characteristics for each speaker
VOICE_CHARACTERISTICS = {
    "Amit": "slightly deep and resonant",
    "Sita": "clear and well-paced",
    "Arjun": "moderate and clear",
    "Aditi": "high-pitched and expressive",
    "Bikram": "higher-pitched and energetic",
    "Maya": "balanced and pleasant",
    "Bhanu": "warm and measured",
    "Champa": "clear and gentle",
    "Karan": "high-pitched and engaging",
    "Thoma": "clear and well-articulated",
    "Mary": "pleasant and measured",
    "Yash": "warm and balanced",
    "Neha": "clear and dynamic",
    "Rohit": "moderate and expressive",
    "Divya": "pleasant and well-paced",
    "Suresh": "clear and precise",
    "Anu": "warm and melodious",
    "Anjali": "high-pitched and pleasant",
    "Harish": "deep and measured",
    "Laishram": "balanced and smooth",
    "Ranjit": "clear and authoritative",
    "Sanjay": "deep and authoritative",
    "Sunita": "high-pitched and pleasant",
    "Amrita": "high-pitched and gentle",
    "Manas": "moderate and measured",
    "Debjani": "clear and pleasant",
    "Divjot": "clear and dynamic",
    "Gurpreet": "warm and balanced",
    "Aryan": "resonant and measured",
    "Jaya": "high-pitched and melodious",
    "Kavitha": "clear and expressive",
    "Prakash": "clear and well-paced",
    "Lalitha": "pleasant and melodious"
}

# Emotion descriptions
EMOTION_DESC = {
    "Neutral": "maintaining a balanced and natural tone",
    "Happy": "with a warm and positive energy",
    "Sad": "with a gentle and somber tone",
    "Angry": "with intense and strong delivery",
    "Highly Expressive": "with dynamic and vibrant emotional delivery",
    "Monotone": "with minimal tonal variation"
}

# Speed descriptions
SPEED_DESC = {
    "Very Slow": "at an extremely measured pace",
    "Slow": "at a measured, deliberate pace",
    "Normal": "at a natural, comfortable pace",
    "Fast": "at a swift, dynamic pace",
    "Very Fast": "at a rapid, accelerated pace"
}

# Pitch modifiers
PITCH_DESC = {
    "Very Low": "in an extremely deep register",
    "Low": "in a deeper register",
    "Medium": "in a natural pitch range",
    "High": "in a higher register",
    "Very High": "in an extremely high register"
}

BACKGROUND_NOISE_DESC = {
    "None": "with absolutely no background noise",
    "Minimal": "with minimal background noise",
    "Moderate": "with moderate ambient noise",
    "Noticeable": "with noticeable background sounds"
}

REVERBERATION_DESC = {
    "Very Close": "in an extremely intimate setting",
    "Close": "in a close-sounding environment",
    "Moderate": "in a moderately spacious environment",
    "Distant": "in a spacious, reverberant setting",
    "Very Distant": "in a very large, echoing space"
}

QUALITY_DESC = {
    "Basic": "in basic audio quality",
    "Good": "in good audio quality",
    "High": "in high audio quality",
    "Studio": "in professional studio quality"
}

def construct_description(
    speaker, 
    language, 
    emotion="Neutral", 
    speed="Normal", 
    pitch="Medium",
    background_noise="Minimal",
    reverberation="Close",
    quality="High"
):
    """
    Constructs a comprehensive description for the TTS model based on all available parameters.
    
    Args:
        speaker (str): The name of the speaker
        language (str): The language being spoken
        emotion (str): The emotional tone
        speed (str): The speaking speed
        pitch (str): The pitch level
        background_noise (str): Level of background noise
        reverberation (str): Distance/space effect
        quality (str): Audio quality level
    
    Returns:
        str: A detailed description for the TTS model
    """
    description = (
        f"{speaker} speaks in {language} {VOICE_CHARACTERISTICS.get(speaker, 'with clear articulation')} "
        f"{PITCH_DESC[pitch]}, {EMOTION_DESC[emotion]} {SPEED_DESC[speed]}. "
        f"The recording is {REVERBERATION_DESC[reverberation]}, {BACKGROUND_NOISE_DESC[background_noise]}, "
        f"captured {QUALITY_DESC[quality]}."
    )
    
    return description

def get_speakers_for_language(language):
    """
    Get the list of recommended speakers for a given language.
    
    Args:
        language (str): The language to get speakers for
    
    Returns:
        list: List of recommended speakers for the language
    """
    return LANGUAGE_VOICE_MAPPING.get(language, [])