import gradio as gr from transformers import AutoModel, AutoTokenizer import torch # Load a small CPU model for text to vector processing model_name = "sentence-transformers/all-mpnet-base-v2" model = AutoModel.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def text_to_vector(texts): results = [] # Process each sentence individually to catch errors for sentence in texts: try: # Tokenize the sentence inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True) # Check if tokenization results in valid tokens if inputs['input_ids'].shape[1] == 0: raise ValueError(f"Tokenization failed for sentence: '{sentence}'") # Pass through the model with torch.no_grad(): outputs = model(**inputs) # Get the vector from pooler_output or handle errors if outputs.pooler_output is None: raise ValueError(f"No vector generated for sentence: '{sentence}'") # Convert the vector to a list of floats vector = outputs.pooler_output.squeeze().numpy().tolist() # Append result as sentence and vector pair results.append({ "sentence": sentence, "vector": vector }) except Exception as e: # Handle any errors for individual sentences results.append({ "sentence": sentence, "vector": f"Error: {str(e)}" }) return results demo = gr.Interface( fn=text_to_vector, inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"), outputs=gr.JSON(label="Sentence and Vector Pairs"), title="Batch Text to Vector", description="This demo converts an array of sentences to vectors and returns objects with sentence and vector." ) demo.launch()