# Standard library imports import os import re import io # External libraries for image processing and visualization from PIL import Image # Azure AI services from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import DocumentAnalysisClient # Gradio for creating interfaces import gradio as gr # Other libraries import numpy as np import openai # Azure Cognitive Services endpoint and key endpoint = "https://herbariumsamplerecognition.cognitiveservices.azure.com/" key = os.environ['KEY1'] def sanitize_filename(filename): # Remove characters that are not alphanumeric, spaces, dots, or underscores return re.sub(r'[^\w\s\.-]', '', filename) def extract_info(text): # Set your OpenAI API key openai.api_key = os.environ['KEY2'] # Prepare the prompt for the API prompt = f"From the provided text, return only the relevant information in a JSON format according to the Darwin Core standard for biodiversity specimen. Note: make sure that each output has a 'country' field. If you do not find an explicit country, make your best guess at the country using the context of the other text.\n{text}\n{text}" try: # Send the request to the API response = openai.ChatCompletion.create( model="gpt-4-1106-preview", messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}] ) # Extract the response return response.choices[0].message['content'] if response.choices else "No response from the API." except Exception as e: return f"An error occurred: {str(e)}" def analyze_read(image_stream): try: document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) poller = document_analysis_client.begin_analyze_document( "prebuilt-read", image_stream) result = poller.result() # Collect the content from the document document_content = result.content extracted_info = extract_info(document_content) return extracted_info except Exception as e: return f"An error occurred: {str(e)}" def model_function(image): # Convert the NumPy array to a PIL Image object image = Image.fromarray(np.uint8(image)).convert('RGB') # Convert the uploaded image to a byte stream image_bytes = io.BytesIO() image.save(image_bytes, format='JPEG') # Using 'JPEG' as the format image_bytes = image_bytes.getvalue() output_text = analyze_read(image_bytes) return output_text title = "HerbariaOCR" description = "Upload your Herbaria specimen and let the Azure-GPT pipeline work its magic!\nYou will find all the textual data from the image extracted in the DarwinCore json format standard." article = "Check out [the GitHub repository](https://github.com/BU-Spark/HerbariaOCR) that this demo is based off of." iface = gr.Interface(fn=model_function,title=title, description=description,article=article, inputs="image", outputs="text") iface.launch(share=True)