|
import gradio as gr |
|
from transformers import AutoModel, AutoTokenizer |
|
import torch |
|
|
|
|
|
model_name = "sentence-transformers/all-mpnet-base-v2" |
|
model = AutoModel.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
def text_to_vector(texts): |
|
results = [] |
|
|
|
|
|
for sentence in texts: |
|
try: |
|
|
|
inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True) |
|
|
|
|
|
if inputs['input_ids'].shape[1] == 0: |
|
raise ValueError(f"Tokenization failed for sentence: '{sentence}'") |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
|
|
|
|
if outputs.pooler_output is None: |
|
raise ValueError(f"No vector generated for sentence: '{sentence}'") |
|
|
|
|
|
vector = outputs.pooler_output.squeeze().numpy().tolist() |
|
|
|
|
|
results.append({ |
|
"sentence": sentence, |
|
"vector": vector |
|
}) |
|
except Exception as e: |
|
|
|
results.append({ |
|
"sentence": sentence, |
|
"vector": f"Error: {str(e)}" |
|
}) |
|
|
|
return results |
|
|
|
demo = gr.Interface( |
|
fn=text_to_vector, |
|
inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"), |
|
outputs=gr.JSON(label="Sentence and Vector Pairs"), |
|
title="Batch Text to Vector", |
|
description="This demo converts an array of sentences to vectors and returns objects with sentence and vector." |
|
) |
|
|
|
demo.launch() |
|
|
|
|