VisionTexts / app.py
ProfessorLeVesseur's picture
Update app.py
71826d4 verified
import streamlit as st
import requests
from PIL import Image
import io
from huggingface_hub import InferenceClient
# Streamlit page setup
st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered")
# Add the logo image with a specified width
image_width = 300 # Set the desired width in pixels
st.image('MTSS.ai_Logo.png', width=image_width)
st.header('VisionTexts™ | Accessibility')
st.subheader('Image Alt Text Creator')
# Retrieve the Hugging Face API Key from secrets
huggingface_api_key = st.secrets["huggingface_api_key"]
# Initialize the Hugging Face inference client
client = InferenceClient(token=huggingface_api_key)
# File uploader allows user to add their own image
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
if uploaded_file:
# Display the uploaded image
image = Image.open(uploaded_file).convert('RGB')
image_width = 200 # Set the desired width in pixels
with st.expander("Image", expanded=True):
st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
else:
st.warning("Please upload an image.")
# Option for adding additional details
show_details = st.checkbox("Add additional details about the image.", value=False)
if show_details:
# Text input for additional details about the image
additional_details = st.text_area(
"Provide specific information that is important to include in the alt text or reflect why the image is being used:"
)
else:
additional_details = ""
# Button to trigger the analysis
analyze_button = st.button("Analyze the Image", type="secondary")
# Prompt for complex image description
complex_image_prompt_text = (
"As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. "
"Provide a detailed description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. "
"Skip phrases like 'image of' or 'picture of.' "
"Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative. "
"Importantly, only describe what is visibly present in the image and avoid making assumptions or adding extraneous information. "
"Stick to the facts and ensure the description is accurate and reliable."
)
# Functions to query the Hugging Face Inference API
def query_image_caption(image):
# Convert PIL image to bytes
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
image_bytes = buffered.getvalue()
# Use the InferenceClient's image_to_text method
response = client.image_to_text(
# model="Salesforce/blip-image-captioning-large",
model="nlpconnect/vit-gpt2-image-captioning",
image=image_bytes,
)
return response
def query_llm(prompt):
# System prompt (optional)
system_prompt = "You are an expert in image accessibility and alternative text."
# Generate the response using the Hugging Face InferenceClient's chat completion
response = client.chat.completions.create(
model="meta-llama/Llama-2-7b-chat-hf",
messages=[
{"role": "system", "content": system_prompt}, # Optional system prompt
{"role": "user", "content": prompt}
],
stream=True,
temperature=0.5,
max_tokens=1024,
top_p=0.7
)
# Collect the streamed response
response_content = ""
for message in response:
if "choices" in message and len(message["choices"]) > 0:
delta = message["choices"][0].get("delta", {})
content = delta.get("content", "")
response_content += content
# Optionally, you can update the progress to the user here
return response_content.strip()
# Check if an image has been uploaded and if the button has been pressed
if uploaded_file is not None and analyze_button:
with st.spinner("Analyzing the image..."):
# Get the caption from the image using the image captioning API
caption_response = query_image_caption(image)
# Handle potential errors from the API
if isinstance(caption_response, dict) and caption_response.get("error"):
st.error(f"Error with image captioning model: {caption_response['error']}")
else:
# Since caption_response is a string, assign it directly
image_caption = caption_response
# Use the complex image prompt text
prompt_text = complex_image_prompt_text
# Include additional details if provided
if additional_details:
prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
# Create the full prompt
full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
# Use the language model to generate the alt text description
llm_response = query_llm(full_prompt)
# Display the generated alt text
st.markdown("### Generated Alt Text:")
st.write(llm_response)
st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
else:
st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")