Spaces:

smritae01
/

HerbariaOCR

Sleeping

File size: 3,137 Bytes

d4916cd

# Standard library imports
import os
import re
import io

# External libraries for image processing and visualization
from PIL import Image

# Azure AI services
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient

# Gradio for creating interfaces
import gradio as gr

# Other libraries
import numpy as np
import openai

# Azure Cognitive Services endpoint and key
endpoint = "https://herbariumsamplerecognition.cognitiveservices.azure.com/"
key = os.environ['KEY1']


def sanitize_filename(filename):
    # Remove characters that are not alphanumeric, spaces, dots, or underscores
    return re.sub(r'[^\w\s\.-]', '', filename)

def extract_info(text):
    # Set your OpenAI API key
    openai.api_key = os.environ['KEY2']

    # Prepare the prompt for the API
    prompt = f"From the provided text, return only the relevant information in a JSON format according to the Darwin Core standard for biodiversity specimen. Note: make sure that each output has a 'country' field. If you do not find an explicit country, make your best guess at the country using the context of the other text.\n{text}\n{text}"

    try:
        # Send the request to the API
        response = openai.ChatCompletion.create(
            model="gpt-4-1106-preview",
            messages=[{"role": "system", "content": "You are a helpful assistant."}, 
                      {"role": "user", "content": prompt}]
        )

        # Extract the response
        return response.choices[0].message['content'] if response.choices else "No response from the API."

    except Exception as e:
        return f"An error occurred: {str(e)}"

def analyze_read(image_stream):
    try:
        document_analysis_client = DocumentAnalysisClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        )

        poller = document_analysis_client.begin_analyze_document(
            "prebuilt-read", image_stream)
        result = poller.result()

        # Collect the content from the document
        document_content = result.content
        extracted_info = extract_info(document_content)

        return extracted_info

    except Exception as e:
        return f"An error occurred: {str(e)}"

def model_function(image):
    # Convert the NumPy array to a PIL Image object
    image = Image.fromarray(np.uint8(image)).convert('RGB')

    # Convert the uploaded image to a byte stream
    image_bytes = io.BytesIO()
    image.save(image_bytes, format='JPEG')  # Using 'JPEG' as the format
    image_bytes = image_bytes.getvalue()

    output_text = analyze_read(image_bytes)
    return output_text

title = "HerbariaOCR"
description = "Upload your Herbaria specimen and let the Azure-GPT pipeline work its magic!\nYou will find all the textual data from the image extracted in the DarwinCore json format standard."
article = "Check out [the GitHub repository](https://github.com/BU-Spark/HerbariaOCR) that this demo is based off of."

iface = gr.Interface(fn=model_function,title=title,
    description=description,article=article, inputs="image", outputs="text")
iface.launch(share=True)