Spaces:

ProfessorLeVesseur
/

VisionTexts

Running

App Files Files Community

VisionTexts / app.py

ProfessorLeVesseur

Update app.py

71826d4 verified about 1 month ago

raw

history blame contribute delete

5.4 kB

	import streamlit as st
	import requests
	from PIL import Image
	import io
	from huggingface_hub import InferenceClient

	# Streamlit page setup
	st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered")

	# Add the logo image with a specified width
	image_width = 300 # Set the desired width in pixels
	st.image('MTSS.ai_Logo.png', width=image_width)

	st.header('VisionTexts™ \| Accessibility')
	st.subheader('Image Alt Text Creator')

	# Retrieve the Hugging Face API Key from secrets
	huggingface_api_key = st.secrets["huggingface_api_key"]

	# Initialize the Hugging Face inference client
	client = InferenceClient(token=huggingface_api_key)

	# File uploader allows user to add their own image
	uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])

	if uploaded_file:
	# Display the uploaded image
	image = Image.open(uploaded_file).convert('RGB')
	image_width = 200 # Set the desired width in pixels
	with st.expander("Image", expanded=True):
	st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
	else:
	st.warning("Please upload an image.")

	# Option for adding additional details
	show_details = st.checkbox("Add additional details about the image.", value=False)

	if show_details:
	# Text input for additional details about the image
	additional_details = st.text_area(
	"Provide specific information that is important to include in the alt text or reflect why the image is being used:"
	)
	else:
	additional_details = ""

	# Button to trigger the analysis
	analyze_button = st.button("Analyze the Image", type="secondary")

	# Prompt for complex image description
	complex_image_prompt_text = (
	"As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. "
	"Provide a detailed description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. "
	"Skip phrases like 'image of' or 'picture of.' "
	"Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative. "
	"Importantly, only describe what is visibly present in the image and avoid making assumptions or adding extraneous information. "
	"Stick to the facts and ensure the description is accurate and reliable."
	)


	# Functions to query the Hugging Face Inference API

	def query_image_caption(image):
	# Convert PIL image to bytes
	buffered = io.BytesIO()
	image.save(buffered, format="JPEG")
	image_bytes = buffered.getvalue()

	# Use the InferenceClient's image_to_text method
	response = client.image_to_text(
	# model="Salesforce/blip-image-captioning-large",
	model="nlpconnect/vit-gpt2-image-captioning",
	image=image_bytes,
	)
	return response

	def query_llm(prompt):
	# System prompt (optional)
	system_prompt = "You are an expert in image accessibility and alternative text."

	# Generate the response using the Hugging Face InferenceClient's chat completion
	response = client.chat.completions.create(
	model="meta-llama/Llama-2-7b-chat-hf",
	messages=[
	{"role": "system", "content": system_prompt}, # Optional system prompt
	{"role": "user", "content": prompt}
	],
	stream=True,
	temperature=0.5,
	max_tokens=1024,
	top_p=0.7
	)

	# Collect the streamed response
	response_content = ""
	for message in response:
	if "choices" in message and len(message["choices"]) > 0:
	delta = message["choices"][0].get("delta", {})
	content = delta.get("content", "")
	response_content += content
	# Optionally, you can update the progress to the user here

	return response_content.strip()

	# Check if an image has been uploaded and if the button has been pressed
	if uploaded_file is not None and analyze_button:
	with st.spinner("Analyzing the image..."):
	# Get the caption from the image using the image captioning API
	caption_response = query_image_caption(image)

	# Handle potential errors from the API
	if isinstance(caption_response, dict) and caption_response.get("error"):
	st.error(f"Error with image captioning model: {caption_response['error']}")
	else:
	# Since caption_response is a string, assign it directly
	image_caption = caption_response

	# Use the complex image prompt text
	prompt_text = complex_image_prompt_text

	# Include additional details if provided
	if additional_details:
	prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"

	# Create the full prompt
	full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"

	# Use the language model to generate the alt text description
	llm_response = query_llm(full_prompt)

	# Display the generated alt text
	st.markdown("### Generated Alt Text:")
	st.write(llm_response)

	st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
	else:
	st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")