Spaces:
Sleeping
Sleeping
File size: 8,760 Bytes
ed6a185 fb59a53 ed6a185 fb59a53 ed6a185 c1f0266 fb59a53 c1f0266 ed6a185 1485adb ed6a185 1485adb ed6a185 4447788 ed6a185 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
import streamlit as st
import requests
import os
import PyPDF2
import docx
import time
#------------------------------------------------------------------------
# Configurations
#------------------------------------------------------------------------
# Streamlit page setup
st.set_page_config(
page_title="Text Translator",
page_icon=":speech_balloon:",
layout="centered",
initial_sidebar_state="auto",
menu_items={
'Get Help': 'mailto:[email protected]',
'About': "This app is built to support translation tasks"
}
)
#------------------------------------------------------------------------
# Title
#------------------------------------------------------------------------
# Set the title of the app
st.title("Text Translator")
# Description
st.write("""
Choose a target language, enter your text or upload a document, and click **Translate** to get the translated text.
""")
#------------------------------------------------------------------------
# Sidebar
#------------------------------------------------------------------------
with st.sidebar:
# Password input field
# password = st.text_input("Enter Password:", type="password")
# Set the desired width in pixels
image_width = 300
# Define the path to the image
# image_path = "MTSSai_logo.png"
# Display the image
# st.image(image_path, width=image_width)
# Set the title
st.title("MTSS.ai")
# Toggle for Help and Report a Bug
with st.expander("Need help and report a bug"):
st.write("""
**Contact**: Cheyne LeVesseur, PhD
**Email**: [email protected]
""")
st.divider()
st.subheader('User Instructions')
# Principles text with Markdown formatting
User_Instructions = """
- **Step 1**: Provide either text input or upload a document for translation.
- **Step 2**: Click Translate.
- **Step 3**: Sit back, relax, and let the magic happen!
"""
st.markdown(User_Instructions)
#------------------------------------------------------------------------
# Functions
#------------------------------------------------------------------------
# Language to model mapping
language_model_mapping = {
"Spanish": "Helsinki-NLP/opus-mt-en-es",
"Arabic": "Helsinki-NLP/opus-mt-en-ar",
"Chinese": "Helsinki-NLP/opus-mt-en-zh",
"Albanian": "Helsinki-NLP/opus-mt-en-sq",
"French": "Helsinki-NLP/opus-mt-en-fr",
"German": "Helsinki-NLP/opus-mt-en-de",
"Japanese": "Helsinki-NLP/opus-mt-en-jap",
"Italian": "Helsinki-NLP/opus-mt-en-it",
"Dutch": "Helsinki-NLP/opus-mt-en-nl",
"Hindi": "Helsinki-NLP/opus-mt-en-hi",
"Russian": "Helsinki-NLP/opus-mt-en-ru",
"Indonesian": "Helsinki-NLP/opus-mt-en-id",
"Greek": "Helsinki-NLP/opus-mt-en-el",
"Danish": "Helsinki-NLP/opus-mt-en-da",
"Swedish": "Helsinki-NLP/opus-mt-en-sv",
"Czech": "Helsinki-NLP/opus-mt-en-cs",
"Catalan": "Helsinki-NLP/opus-mt-en-ca",
"Bulgarian": "Helsinki-NLP/opus-mt-en-bg",
"Estonian": "Helsinki-NLP/opus-mt-en-et",
"Basque": "Helsinki-NLP/opus-mt-en-eu",
"Vietnamese": "Helsinki-NLP/opus-mt-en-vi",
"Finnish": "Helsinki-NLP/opus-mt-en-fi",
"Hebrew": "Helsinki-NLP/opus-mt-en-he",
"Azerbaijani": "Helsinki-NLP/opus-mt-en-az",
"Afrikaans": "Helsinki-NLP/opus-mt-en-af",
"Armenian": "Helsinki-NLP/opus-mt-en-hy",
"Hungarian": "Helsinki-NLP/opus-mt-en-hu"
}
# Dropdown for language selection
language = st.selectbox(
"Select target language",
list(language_model_mapping.keys())
)
# Input method selection
input_option = st.radio("Select input method:", ("Text Input", "Upload Document"))
input_text = ""
# Functions to extract text from files
def extract_text_from_pdf(pdf_file):
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
extracted_text = page.extract_text()
if extracted_text:
text += extracted_text + "\n"
return text
except Exception as e:
st.error(f"Error extracting text from PDF: {e}")
return ""
def extract_text_from_docx(docx_file):
try:
doc = docx.Document(docx_file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
except Exception as e:
st.error(f"Error extracting text from Word document: {e}")
return ""
# Text area or file uploader based on input method
if input_option == "Text Input":
input_text = st.text_area("Enter text to translate", height=200)
elif input_option == "Upload Document":
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "docx"])
if uploaded_file is not None:
file_extension = os.path.splitext(uploaded_file.name)[1].lower()
if file_extension == ".pdf":
with st.spinner("Extracting text from PDF..."):
input_text = extract_text_from_pdf(uploaded_file)
elif file_extension == ".docx":
with st.spinner("Extracting text from Word document..."):
input_text = extract_text_from_docx(uploaded_file)
else:
st.error("Unsupported file type.")
input_text = ""
# Function to split text into chunks
def split_text_into_chunks(text, max_chunk_size):
return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
# Function to perform the translation with retry mechanism
def translate_text(text, target_lang, max_retries=5, backoff_factor=2):
model = language_model_mapping.get(target_lang)
if not model:
st.error("Unsupported language selected.")
return None
# Retrieve Hugging Face API key from environment variables
hf_api_key = os.getenv('HF_API_KEY')
if not hf_api_key:
st.error("Hugging Face API key not set in environment variables.")
return None
API_URL = f"/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2F%3Cspan class="hljs-subst">{model}"
headers = {
"Authorization": f"Bearer {hf_api_key}" # Use the API key from environment variables
}
# Split the text into manageable chunks
max_chunk_size = 500 # Adjust based on API limitations
text_chunks = split_text_into_chunks(text, max_chunk_size)
translated_chunks = []
for chunk_index, chunk in enumerate(text_chunks):
attempt = 0
while attempt < max_retries:
payload = {
"inputs": chunk,
}
try:
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 503:
# Service Unavailable, retry after delay
attempt += 1
wait_time = backoff_factor ** attempt
time.sleep(wait_time)
continue
response.raise_for_status() # Raise an error for bad status codes
result = response.json()
# Handle possible errors from the API
if isinstance(result, dict) and result.get("error"):
st.error(f"Error from translation API: {result['error']}")
return None
# The API might return a list of translations
if isinstance(result, list) and len(result) > 0:
translated_text = result[0].get("translation_text", "No translation found.")
elif isinstance(result, dict) and "translation_text" in result:
translated_text = result["translation_text"]
else:
translated_text = "Unexpected response format from the API."
translated_chunks.append(translated_text)
break # Exit the retry loop if successful
except requests.exceptions.RequestException as e:
attempt += 1
wait_time = backoff_factor ** attempt
time.sleep(wait_time)
else:
# All retry attempts failed for this chunk
st.error(f"Failed to translate chunk {chunk_index + 1} after {max_retries} attempts.")
return None
return " ".join(translated_chunks)
# Translate button
if st.button("Translate"):
if not input_text.strip():
st.warning("Please enter some text to translate.")
else:
with st.spinner("Translation service loading..."):
translated = translate_text(input_text, language)
if translated:
st.subheader("Translated Text:")
st.write(translated)
else:
st.error("Translation failed. Please try again later.") |