Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import os | |
import PyPDF2 | |
import docx | |
import time | |
#------------------------------------------------------------------------ | |
# Configurations | |
#------------------------------------------------------------------------ | |
# Streamlit page setup | |
st.set_page_config( | |
page_title="Text Translator", | |
page_icon=":speech_balloon:", | |
layout="centered", | |
initial_sidebar_state="auto", | |
menu_items={ | |
'Get Help': 'mailto:[email protected]', | |
'About': "This app is built to support translation tasks" | |
} | |
) | |
#------------------------------------------------------------------------ | |
# Title | |
#------------------------------------------------------------------------ | |
# Set the title of the app | |
st.title("Text Translator") | |
# Description | |
st.write(""" | |
Choose a target language, enter your text or upload a document, and click **Translate** to get the translated text. | |
""") | |
#------------------------------------------------------------------------ | |
# Sidebar | |
#------------------------------------------------------------------------ | |
with st.sidebar: | |
# Password input field | |
# password = st.text_input("Enter Password:", type="password") | |
# Set the desired width in pixels | |
image_width = 300 | |
# Define the path to the image | |
# image_path = "MTSSai_logo.png" | |
# Display the image | |
# st.image(image_path, width=image_width) | |
# Set the title | |
st.title("MTSS.ai") | |
# Toggle for Help and Report a Bug | |
with st.expander("Need help and report a bug"): | |
st.write(""" | |
**Contact**: Cheyne LeVesseur, PhD | |
**Email**: [email protected] | |
""") | |
st.divider() | |
st.subheader('User Instructions') | |
# Principles text with Markdown formatting | |
User_Instructions = """ | |
- **Step 1**: Provide either text input or upload a document for translation. | |
- **Step 2**: Click Translate. | |
- **Step 3**: Sit back, relax, and let the magic happen! | |
""" | |
st.markdown(User_Instructions) | |
#------------------------------------------------------------------------ | |
# Functions | |
#------------------------------------------------------------------------ | |
# Language to model mapping | |
language_model_mapping = { | |
"Spanish": "Helsinki-NLP/opus-mt-en-es", | |
"Arabic": "Helsinki-NLP/opus-mt-en-ar", | |
"Chinese": "Helsinki-NLP/opus-mt-en-zh", | |
"Albanian": "Helsinki-NLP/opus-mt-en-sq", | |
"French": "Helsinki-NLP/opus-mt-en-fr", | |
"German": "Helsinki-NLP/opus-mt-en-de", | |
"Japanese": "Helsinki-NLP/opus-mt-en-jap", | |
"Italian": "Helsinki-NLP/opus-mt-en-it", | |
"Dutch": "Helsinki-NLP/opus-mt-en-nl", | |
"Hindi": "Helsinki-NLP/opus-mt-en-hi", | |
"Russian": "Helsinki-NLP/opus-mt-en-ru", | |
"Indonesian": "Helsinki-NLP/opus-mt-en-id", | |
"Greek": "Helsinki-NLP/opus-mt-en-el", | |
"Danish": "Helsinki-NLP/opus-mt-en-da", | |
"Swedish": "Helsinki-NLP/opus-mt-en-sv", | |
"Czech": "Helsinki-NLP/opus-mt-en-cs", | |
"Catalan": "Helsinki-NLP/opus-mt-en-ca", | |
"Bulgarian": "Helsinki-NLP/opus-mt-en-bg", | |
"Estonian": "Helsinki-NLP/opus-mt-en-et", | |
"Basque": "Helsinki-NLP/opus-mt-en-eu", | |
"Vietnamese": "Helsinki-NLP/opus-mt-en-vi", | |
"Finnish": "Helsinki-NLP/opus-mt-en-fi", | |
"Hebrew": "Helsinki-NLP/opus-mt-en-he", | |
"Azerbaijani": "Helsinki-NLP/opus-mt-en-az", | |
"Afrikaans": "Helsinki-NLP/opus-mt-en-af", | |
"Armenian": "Helsinki-NLP/opus-mt-en-hy", | |
"Hungarian": "Helsinki-NLP/opus-mt-en-hu" | |
} | |
# Dropdown for language selection | |
language = st.selectbox( | |
"Select target language", | |
list(language_model_mapping.keys()) | |
) | |
# Input method selection | |
input_option = st.radio("Select input method:", ("Text Input", "Upload Document")) | |
input_text = "" | |
# Functions to extract text from files | |
def extract_text_from_pdf(pdf_file): | |
try: | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
text = "" | |
for page_num in range(len(pdf_reader.pages)): | |
page = pdf_reader.pages[page_num] | |
extracted_text = page.extract_text() | |
if extracted_text: | |
text += extracted_text + "\n" | |
return text | |
except Exception as e: | |
st.error(f"Error extracting text from PDF: {e}") | |
return "" | |
def extract_text_from_docx(docx_file): | |
try: | |
doc = docx.Document(docx_file) | |
text = "" | |
for para in doc.paragraphs: | |
text += para.text + "\n" | |
return text | |
except Exception as e: | |
st.error(f"Error extracting text from Word document: {e}") | |
return "" | |
# Text area or file uploader based on input method | |
if input_option == "Text Input": | |
input_text = st.text_area("Enter text to translate", height=200) | |
elif input_option == "Upload Document": | |
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "docx"]) | |
if uploaded_file is not None: | |
file_extension = os.path.splitext(uploaded_file.name)[1].lower() | |
if file_extension == ".pdf": | |
with st.spinner("Extracting text from PDF..."): | |
input_text = extract_text_from_pdf(uploaded_file) | |
elif file_extension == ".docx": | |
with st.spinner("Extracting text from Word document..."): | |
input_text = extract_text_from_docx(uploaded_file) | |
else: | |
st.error("Unsupported file type.") | |
input_text = "" | |
# Function to split text into chunks | |
def split_text_into_chunks(text, max_chunk_size): | |
return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)] | |
# Function to perform the translation with retry mechanism | |
def translate_text(text, target_lang, max_retries=5, backoff_factor=2): | |
model = language_model_mapping.get(target_lang) | |
if not model: | |
st.error("Unsupported language selected.") | |
return None | |
# Retrieve Hugging Face API key from environment variables | |
hf_api_key = os.getenv('HF_API_KEY') | |
if not hf_api_key: | |
st.error("Hugging Face API key not set in environment variables.") | |
return None | |
API_URL = f"/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2F%3Cspan class="hljs-subst">{model}" | |
headers = { | |
"Authorization": f"Bearer {hf_api_key}" # Use the API key from environment variables | |
} | |
# Split the text into manageable chunks | |
max_chunk_size = 500 # Adjust based on API limitations | |
text_chunks = split_text_into_chunks(text, max_chunk_size) | |
translated_chunks = [] | |
for chunk_index, chunk in enumerate(text_chunks): | |
attempt = 0 | |
while attempt < max_retries: | |
payload = { | |
"inputs": chunk, | |
} | |
try: | |
response = requests.post(API_URL, headers=headers, json=payload) | |
if response.status_code == 503: | |
# Service Unavailable, retry after delay | |
attempt += 1 | |
wait_time = backoff_factor ** attempt | |
time.sleep(wait_time) | |
continue | |
response.raise_for_status() # Raise an error for bad status codes | |
result = response.json() | |
# Handle possible errors from the API | |
if isinstance(result, dict) and result.get("error"): | |
st.error(f"Error from translation API: {result['error']}") | |
return None | |
# The API might return a list of translations | |
if isinstance(result, list) and len(result) > 0: | |
translated_text = result[0].get("translation_text", "No translation found.") | |
elif isinstance(result, dict) and "translation_text" in result: | |
translated_text = result["translation_text"] | |
else: | |
translated_text = "Unexpected response format from the API." | |
translated_chunks.append(translated_text) | |
break # Exit the retry loop if successful | |
except requests.exceptions.RequestException as e: | |
attempt += 1 | |
wait_time = backoff_factor ** attempt | |
time.sleep(wait_time) | |
else: | |
# All retry attempts failed for this chunk | |
st.error(f"Failed to translate chunk {chunk_index + 1} after {max_retries} attempts.") | |
return None | |
return " ".join(translated_chunks) | |
# Translate button | |
if st.button("Translate"): | |
if not input_text.strip(): | |
st.warning("Please enter some text to translate.") | |
else: | |
with st.spinner("Translation service loading..."): | |
translated = translate_text(input_text, language) | |
if translated: | |
st.subheader("Translated Text:") | |
st.write(translated) | |
else: | |
st.error("Translation failed. Please try again later.") |