File size: 8,760 Bytes
ed6a185
 
 
 
 
 
 
 
 
 
 
 
fb59a53
ed6a185
 
 
 
fb59a53
ed6a185
 
 
 
c1f0266
 
 
 
 
fb59a53
c1f0266
 
 
 
 
 
ed6a185
 
 
 
 
 
 
 
 
 
1485adb
ed6a185
1485adb
 
 
 
ed6a185
 
 
 
 
4447788
ed6a185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import streamlit as st
import requests
import os
import PyPDF2
import docx
import time

#------------------------------------------------------------------------
# Configurations
#------------------------------------------------------------------------
# Streamlit page setup
st.set_page_config(
    page_title="Text Translator", 
    page_icon=":speech_balloon:", 
    layout="centered", 
    initial_sidebar_state="auto",
    menu_items={
        'Get Help': 'mailto:[email protected]',
        'About': "This app is built to support translation tasks"
    }
)

#------------------------------------------------------------------------
# Title
#------------------------------------------------------------------------

# Set the title of the app
st.title("Text Translator")

# Description
st.write("""
Choose a target language, enter your text or upload a document, and click **Translate** to get the translated text.
""")

#------------------------------------------------------------------------
# Sidebar
#------------------------------------------------------------------------
with st.sidebar:
    # Password input field
    # password = st.text_input("Enter Password:", type="password")
    
    # Set the desired width in pixels
    image_width = 300  
    # Define the path to the image
    # image_path = "MTSSai_logo.png"
    # Display the image
    # st.image(image_path, width=image_width)

    # Set the title
    st.title("MTSS.ai")
    
    # Toggle for Help and Report a Bug
    with st.expander("Need help and report a bug"):
        st.write("""
        **Contact**: Cheyne LeVesseur, PhD  
        **Email**: [email protected]
        """)
    st.divider()
    st.subheader('User Instructions')
    
    # Principles text with Markdown formatting
    User_Instructions = """

    - **Step 1**: Provide either text input or upload a document for translation.
    - **Step 2**: Click Translate.
    - **Step 3**: Sit back, relax, and let the magic happen!

    """
    st.markdown(User_Instructions)

#------------------------------------------------------------------------
# Functions
#------------------------------------------------------------------------

# Language to model mapping
language_model_mapping = {
    "Spanish": "Helsinki-NLP/opus-mt-en-es",
    "Arabic": "Helsinki-NLP/opus-mt-en-ar",
    "Chinese": "Helsinki-NLP/opus-mt-en-zh",
    "Albanian": "Helsinki-NLP/opus-mt-en-sq",
    "French": "Helsinki-NLP/opus-mt-en-fr",
    "German": "Helsinki-NLP/opus-mt-en-de",
    "Japanese": "Helsinki-NLP/opus-mt-en-jap",
    "Italian": "Helsinki-NLP/opus-mt-en-it",
    "Dutch": "Helsinki-NLP/opus-mt-en-nl",
    "Hindi": "Helsinki-NLP/opus-mt-en-hi",
    "Russian": "Helsinki-NLP/opus-mt-en-ru",
    "Indonesian": "Helsinki-NLP/opus-mt-en-id",
    "Greek": "Helsinki-NLP/opus-mt-en-el",
    "Danish": "Helsinki-NLP/opus-mt-en-da",
    "Swedish": "Helsinki-NLP/opus-mt-en-sv",
    "Czech": "Helsinki-NLP/opus-mt-en-cs",
    "Catalan": "Helsinki-NLP/opus-mt-en-ca",
    "Bulgarian": "Helsinki-NLP/opus-mt-en-bg",
    "Estonian": "Helsinki-NLP/opus-mt-en-et",
    "Basque": "Helsinki-NLP/opus-mt-en-eu",
    "Vietnamese": "Helsinki-NLP/opus-mt-en-vi",
    "Finnish": "Helsinki-NLP/opus-mt-en-fi",
    "Hebrew": "Helsinki-NLP/opus-mt-en-he",
    "Azerbaijani": "Helsinki-NLP/opus-mt-en-az",
    "Afrikaans": "Helsinki-NLP/opus-mt-en-af",
    "Armenian": "Helsinki-NLP/opus-mt-en-hy",
    "Hungarian": "Helsinki-NLP/opus-mt-en-hu"
}

# Dropdown for language selection
language = st.selectbox(
    "Select target language",
    list(language_model_mapping.keys())
)

# Input method selection
input_option = st.radio("Select input method:", ("Text Input", "Upload Document"))

input_text = ""

# Functions to extract text from files
def extract_text_from_pdf(pdf_file):
    try:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            extracted_text = page.extract_text()
            if extracted_text:
                text += extracted_text + "\n"
        return text
    except Exception as e:
        st.error(f"Error extracting text from PDF: {e}")
        return ""

def extract_text_from_docx(docx_file):
    try:
        doc = docx.Document(docx_file)
        text = ""
        for para in doc.paragraphs:
            text += para.text + "\n"
        return text
    except Exception as e:
        st.error(f"Error extracting text from Word document: {e}")
        return ""

# Text area or file uploader based on input method
if input_option == "Text Input":
    input_text = st.text_area("Enter text to translate", height=200)
elif input_option == "Upload Document":
    uploaded_file = st.file_uploader("Choose a file", type=["pdf", "docx"])
    if uploaded_file is not None:
        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
        if file_extension == ".pdf":
            with st.spinner("Extracting text from PDF..."):
                input_text = extract_text_from_pdf(uploaded_file)
        elif file_extension == ".docx":
            with st.spinner("Extracting text from Word document..."):
                input_text = extract_text_from_docx(uploaded_file)
        else:
            st.error("Unsupported file type.")
            input_text = ""

# Function to split text into chunks
def split_text_into_chunks(text, max_chunk_size):
    return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]

# Function to perform the translation with retry mechanism
def translate_text(text, target_lang, max_retries=5, backoff_factor=2):
    model = language_model_mapping.get(target_lang)
    if not model:
        st.error("Unsupported language selected.")
        return None

    # Retrieve Hugging Face API key from environment variables
    hf_api_key = os.getenv('HF_API_KEY')
    if not hf_api_key:
        st.error("Hugging Face API key not set in environment variables.")
        return None

    API_URL = f"/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2F%3Cspan class="hljs-subst">{model}"
    headers = {
        "Authorization": f"Bearer {hf_api_key}"  # Use the API key from environment variables
    }

    # Split the text into manageable chunks
    max_chunk_size = 500  # Adjust based on API limitations
    text_chunks = split_text_into_chunks(text, max_chunk_size)
    translated_chunks = []

    for chunk_index, chunk in enumerate(text_chunks):
        attempt = 0
        while attempt < max_retries:
            payload = {
                "inputs": chunk,
            }
            try:
                response = requests.post(API_URL, headers=headers, json=payload)
                if response.status_code == 503:
                    # Service Unavailable, retry after delay
                    attempt += 1
                    wait_time = backoff_factor ** attempt
                    time.sleep(wait_time)
                    continue
                response.raise_for_status()  # Raise an error for bad status codes
                result = response.json()

                # Handle possible errors from the API
                if isinstance(result, dict) and result.get("error"):
                    st.error(f"Error from translation API: {result['error']}")
                    return None

                # The API might return a list of translations
                if isinstance(result, list) and len(result) > 0:
                    translated_text = result[0].get("translation_text", "No translation found.")
                elif isinstance(result, dict) and "translation_text" in result:
                    translated_text = result["translation_text"]
                else:
                    translated_text = "Unexpected response format from the API."

                translated_chunks.append(translated_text)
                break  # Exit the retry loop if successful

            except requests.exceptions.RequestException as e:
                attempt += 1
                wait_time = backoff_factor ** attempt
                time.sleep(wait_time)
        else:
            # All retry attempts failed for this chunk
            st.error(f"Failed to translate chunk {chunk_index + 1} after {max_retries} attempts.")
            return None

    return " ".join(translated_chunks)

# Translate button
if st.button("Translate"):
    if not input_text.strip():
        st.warning("Please enter some text to translate.")
    else:
        with st.spinner("Translation service loading..."):
            translated = translate_text(input_text, language)
            if translated:
                st.subheader("Translated Text:")
                st.write(translated)
            else:
                st.error("Translation failed. Please try again later.")