|
import gradio as gr |
|
import requests |
|
import os |
|
import json |
|
import time |
|
import transformers |
|
import re |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
hf_token = os.getenv("HF_AUTH_TOKEN") |
|
vapi_url = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fvectara%2Fhallucination_evaluation_model%26quot%3B%3C%2Fspan%3E%3C!-- HTML_TAG_END --> |
|
headers = {"Authorization": f"Bearer {hf_token}"} |
|
|
|
|
|
model_name = "allenai/OLMo-1B" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) |
|
|
|
def generate_text(prompt, max_new_tokens=100, do_sample=False, top_k=50, top_p=0.95): |
|
inputs = tokenizer(prompt, return_tensors='pt', return_token_type_ids=False) |
|
response = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=do_sample, top_k=top_k, top_p=top_p) |
|
return tokenizer.batch_decode(response, skip_special_tokens=True)[0] |
|
|
|
|
|
|
|
def query(payload): |
|
response = requests.post(vapi_url, headers=headers, json=payload) |
|
return response.json() |
|
|
|
def check_hallucination(assertion, citation): |
|
api_url = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fvectara%2Fhallucination_evaluation_model%26quot%3B%3C%2Fspan%3E%3C!-- HTML_TAG_END --> |
|
header = {"Authorization": f"Bearer {hf_token}"} |
|
payload = {"inputs": f"{assertion} [SEP] {citation}"} |
|
|
|
attempts = 0 |
|
max_attempts = 3 |
|
wait_time = 180 |
|
|
|
while attempts < max_attempts: |
|
try: |
|
response = requests.post(api_url, headers=header, json=payload, timeout=120) |
|
response.raise_for_status() |
|
output = response.json() |
|
output = output[0][0]["score"] |
|
return f"**hallucination score:** {output}" |
|
except requests.exceptions.HTTPError as http_err: |
|
print(f"HTTP error occurred: {http_err}") |
|
except requests.exceptions.RequestException as err: |
|
print(f"Other error occurred: {err}") |
|
except KeyError: |
|
print("KeyError: The expected key was not found in the response. The endpoint might be waking up.") |
|
|
|
attempts += 1 |
|
if attempts < max_attempts: |
|
print(f"Attempt {attempts} failed. Waiting for {wait_time} seconds before retrying...") |
|
time.sleep(wait_time) |
|
else: |
|
print("Maximum attempts reached. Please try again later.") |
|
return "Error: Unable to retrieve hallucination score after multiple attempts." |
|
|
|
return "Error: Unable to process the hallucination check." |
|
|
|
|
|
def query_vectara(text): |
|
user_message = text |
|
customer_id = os.getenv('CUSTOMER_ID') |
|
corpus_id = os.getenv('CORPUS_ID') |
|
api_key = os.getenv('API_KEY') |
|
api_key_header = { |
|
"customer-id": customer_id, |
|
"x-api-key": api_key |
|
} |
|
request_body = { |
|
"query": [ |
|
{ |
|
"query": user_message, |
|
"queryContext": "", |
|
"start": 1, |
|
"numResults": 25, |
|
"contextConfig": { |
|
"charsBefore": 0, |
|
"charsAfter": 0, |
|
"sentencesBefore": 2, |
|
"sentencesAfter": 2, |
|
"startTag": "%START_SNIPPET%", |
|
"endTag": "%END_SNIPPET%", |
|
}, |
|
"rerankingConfig": { |
|
"rerankerId": 272725718, |
|
"mmrConfig": { |
|
"diversityBias": 0.35 |
|
} |
|
}, |
|
"corpusKey": [ |
|
{ |
|
"customerId": customer_id, |
|
"corpusId": corpus_id, |
|
"semantics": 0, |
|
"metadataFilter": "", |
|
"lexicalInterpolationConfig": { |
|
"lambda": 0 |
|
}, |
|
"dim": [] |
|
} |
|
], |
|
"summary": [ |
|
{ |
|
"maxSummarizedResults": 5, |
|
"responseLang": "auto", |
|
"summarizerPromptName": "vectara-summary-ext-v1.2.0" |
|
} |
|
] |
|
} |
|
] |
|
} |
|
response = requests.post( |
|
"https://api.vectara.io/v1/query", |
|
json=request_body, |
|
verify=True, |
|
headers=api_key_header |
|
) |
|
|
|
if response.status_code == 200: |
|
query_data = response.json() |
|
if query_data: |
|
sources_info = [] |
|
|
|
|
|
summary = query_data['responseSet'][0]['summary'][0]['text'] |
|
|
|
|
|
for response_set in query_data.get('responseSet', []): |
|
|
|
|
|
for source in response_set.get('response', [])[:5]: |
|
source_metadata = source.get('metadata', []) |
|
source_info = {} |
|
|
|
for metadata in source_metadata: |
|
metadata_name = metadata.get('name', '') |
|
metadata_value = metadata.get('value', '') |
|
|
|
if metadata_name == 'title': |
|
source_info['title'] = metadata_value |
|
elif metadata_name == 'author': |
|
source_info['author'] = metadata_value |
|
elif metadata_name == 'pageNumber': |
|
source_info['page number'] = metadata_value |
|
|
|
if source_info: |
|
sources_info.append(source_info) |
|
|
|
result = {"summary": summary, "sources": sources_info} |
|
return f"{json.dumps(result, indent=2)}" |
|
else: |
|
return "No data found in the response." |
|
else: |
|
return f"Error: {response.status_code}" |
|
|
|
def remove_references(text): |
|
|
|
pattern = r'\[\d+\]+' |
|
|
|
cleaned_text = re.sub(pattern, '', text) |
|
return cleaned_text |
|
|
|
def clean_text(text): |
|
|
|
cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', text) |
|
return cleaned_text |
|
|
|
def evaluate_content(user_input): |
|
vectara_response = query_vectara(user_input) |
|
vectara_response_json = json.loads(vectara_response) |
|
|
|
summary = vectara_response_json.get("summary", "") |
|
sources = vectara_response_json.get("sources", []) |
|
|
|
|
|
summary_no_refs = remove_references(summary) |
|
|
|
|
|
summary_clean = clean_text(summary_no_refs) |
|
|
|
|
|
sources_info = "" |
|
for source in sources: |
|
title = source.get("title", "No title") |
|
author = source.get("author", "No author") |
|
page_number = source.get("page number", "N/A") |
|
|
|
|
|
title_clean = clean_text(title) |
|
author_clean = clean_text(author) |
|
|
|
sources_info += f"Title: {title_clean}, Author: {author_clean}, Page: {page_number}\n" |
|
|
|
|
|
olmo_output = generate_text(summary_clean) |
|
olmo_output_clean = clean_text(olmo_output) |
|
|
|
|
|
hallucination_score = check_hallucination(olmo_output, summary) |
|
|
|
return summary_clean, sources_info, olmo_output_clean, hallucination_score |
|
|
|
|
|
iface = gr.Interface( |
|
fn=evaluate_content, |
|
inputs=[gr.Textbox(label="User Input")], |
|
outputs=[ |
|
gr.Textbox(label="Vectara Summary", lines=10), |
|
gr.Textbox(label="Vectara Sources", lines=10), |
|
gr.Textbox(label="Generated Text", lines=10), |
|
gr.Textbox(label="Hallucination Score") |
|
], |
|
live=False, |
|
title="👋🏻Welcome to 🌟Team Tonic's 🧠🌈SureRAG🔴🟢", |
|
description="Nothing is more important than reputation. However you can create automated content pipelines for public facing content. How can businesses grow their reputation while mitigating risks due to AI? How it works : vectara rag retrieval reranking and summarization is used to return content. then an LLM generates content based on these returns. this content is checked for hallucination before being validated for publishing on twitter. SureRAG is fixed on Tonic-AI's README files as a Demo, provide input to generate a response. This response is checked by Vectara's HHME. Check out the model [vectara/hallucination_evaluation_model](https://huggingface.co/vectara/hallucination_evaluation_model) Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to 🌟 [DataTonic](https://github.com/Tonic-AI/DataTonic)", |
|
) |
|
iface.launch() |