File size: 9,413 Bytes
297437a
e760939
5b45d10
e21a9f1
bb6e5e5
5d65dfe
e62dd99
e21a9f1
297437a
0c37b52
e93a46f
 
 
 
e21a9f1
de59cef
 
e21a9f1
5eddd0b
e21a9f1
c0d4e9c
e21a9f1
 
 
e93a46f
 
 
 
 
fa0faa4
 
 
 
 
bb6e5e5
 
 
fa0faa4
bb6e5e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa0faa4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e93a46f
fa0faa4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e760939
fa0faa4
e760939
22a3dc8
 
 
 
 
e62dd99
e21a9f1
cd4bee1
 
 
 
 
e62dd99
 
 
 
 
 
 
22a3dc8
 
 
 
 
e62dd99
 
 
 
 
 
 
 
 
 
 
 
 
 
22a3dc8
e62dd99
 
 
 
 
 
 
 
 
e760939
e21a9f1
 
affd952
e62dd99
affd952
e62dd99
affd952
 
dc99d7c
e93a46f
 
e760939
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import gradio as gr
import requests
import os  
import json
import time
import transformers
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

hf_token = os.getenv("HF_AUTH_TOKEN")
vapi_url = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fvectara%2Fhallucination_evaluation_model%26quot%3B%3C%2Fspan%3E
headers = {"Authorization": f"Bearer {hf_token}"}


model_name = "allenai/OLMo-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)

def generate_text(prompt, max_new_tokens=100, do_sample=False, top_k=50, top_p=0.95):
    inputs = tokenizer(prompt, return_tensors='pt', return_token_type_ids=False)
    response = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=do_sample, top_k=top_k, top_p=top_p)
    return tokenizer.batch_decode(response, skip_special_tokens=True)[0]


# Function to query the API
def query(payload):
    response = requests.post(vapi_url, headers=headers, json=payload)
    return response.json()

def check_hallucination(assertion, citation):
    api_url = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fvectara%2Fhallucination_evaluation_model%26quot%3B%3C%2Fspan%3E
    header = {"Authorization": f"Bearer {hf_token}"}
    payload = {"inputs": f"{assertion} [SEP] {citation}"}

    attempts = 0
    max_attempts = 3
    wait_time = 180  # 3 minutes

    while attempts < max_attempts:
        try:
            response = requests.post(api_url, headers=header, json=payload, timeout=120)
            response.raise_for_status()  # This will raise an exception for HTTP error codes
            output = response.json()
            output = output[0][0]["score"]
            return f"**hallucination score:** {output}"
        except requests.exceptions.HTTPError as http_err:
            print(f"HTTP error occurred: {http_err}")  # Python 3.6
        except requests.exceptions.RequestException as err:
            print(f"Other error occurred: {err}")  # Python 3.6
        except KeyError:
            print("KeyError: The expected key was not found in the response. The endpoint might be waking up.")
        
        attempts += 1
        if attempts < max_attempts:
            print(f"Attempt {attempts} failed. Waiting for {wait_time} seconds before retrying...")
            time.sleep(wait_time)
        else:
            print("Maximum attempts reached. Please try again later.")
            return "Error: Unable to retrieve hallucination score after multiple attempts."

    return "Error: Unable to process the hallucination check."


def query_vectara(text):
    user_message = text
    customer_id = os.getenv('CUSTOMER_ID')
    corpus_id = os.getenv('CORPUS_ID')
    api_key = os.getenv('API_KEY')
    api_key_header = {
        "customer-id": customer_id,
        "x-api-key": api_key
    }
    request_body = {
        "query": [
            {
                "query": user_message,
                "queryContext": "",
                "start": 1,
                "numResults": 25,
                "contextConfig": {
                    "charsBefore": 0,
                    "charsAfter": 0,
                    "sentencesBefore": 2,
                    "sentencesAfter": 2,
                    "startTag": "%START_SNIPPET%",
                    "endTag": "%END_SNIPPET%",
                },
                "rerankingConfig": {
                    "rerankerId": 272725718,
                    "mmrConfig": {
                        "diversityBias": 0.35
                    }
                },
                "corpusKey": [
                    {
                        "customerId": customer_id,
                        "corpusId": corpus_id,
                        "semantics": 0,
                        "metadataFilter": "",
                        "lexicalInterpolationConfig": {
                            "lambda": 0
                        },
                        "dim": []
                    }
                ],
                "summary": [
                    {
                        "maxSummarizedResults": 5,
                        "responseLang": "auto",
                        "summarizerPromptName": "vectara-summary-ext-v1.2.0"
                    }
                ]
            }
        ]
    }
    response = requests.post(
        "https://api.vectara.io/v1/query",
        json=request_body,  
        verify=True,
        headers=api_key_header
    )

    if response.status_code == 200:
        query_data = response.json()
        if query_data:
            sources_info = []

            # Extract the summary.
            summary = query_data['responseSet'][0]['summary'][0]['text']

            # Iterate over all response sets
            for response_set in query_data.get('responseSet', []):
                # Extract sources
                # Limit to top 5 sources.
                for source in response_set.get('response', [])[:5]:
                    source_metadata = source.get('metadata', [])
                    source_info = {}

                    for metadata in source_metadata:
                        metadata_name = metadata.get('name', '')
                        metadata_value = metadata.get('value', '')

                        if metadata_name == 'title':
                            source_info['title'] = metadata_value
                        elif metadata_name == 'author':
                            source_info['author'] = metadata_value
                        elif metadata_name == 'pageNumber':
                            source_info['page number'] = metadata_value

                    if source_info:
                        sources_info.append(source_info)

            result = {"summary": summary, "sources": sources_info}
            return f"{json.dumps(result, indent=2)}"
        else:
            return "No data found in the response."
    else:
        return f"Error: {response.status_code}"

def remove_references(text):
    # Regex pattern to find references like [1], [1][2], etc.
    pattern = r'\[\d+\]+'
    # Replace found patterns with an empty string
    cleaned_text = re.sub(pattern, '', text)
    return cleaned_text

def clean_text(text):
    # Remove special characters, keeping only letters, numbers, and spaces
    cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return cleaned_text

def evaluate_content(user_input):
    vectara_response = query_vectara(user_input)
    vectara_response_json = json.loads(vectara_response)
    
    summary = vectara_response_json.get("summary", "")
    sources = vectara_response_json.get("sources", [])
    
    # Remove references from the summary text
    summary_no_refs = remove_references(summary)
    
    # Clean summary text to remove special characters
    summary_clean = clean_text(summary_no_refs)
    
    # Process sources to extract and clean necessary information
    sources_info = ""
    for source in sources:
        title = source.get("title", "No title")
        author = source.get("author", "No author")
        page_number = source.get("page number", "N/A")
        
        # Clean source info
        title_clean = clean_text(title)
        author_clean = clean_text(author)
        
        sources_info += f"Title: {title_clean}, Author: {author_clean}, Page: {page_number}\n"
    
    # Generate text based on the cleaned and reference-removed summary
    olmo_output = generate_text(summary_clean)
    olmo_output_clean = clean_text(olmo_output)
    
    # Check hallucination based on the original output and summary
    hallucination_score = check_hallucination(olmo_output, summary)
    
    return summary_clean, sources_info, olmo_output_clean, hallucination_score

# Adjust the Gradio interface outputs to match the new structure
iface = gr.Interface(
    fn=evaluate_content,
    inputs=[gr.Textbox(label="User Input")],
    outputs=[
        gr.Textbox(label="Vectara Summary", lines=10),
        gr.Textbox(label="Vectara Sources", lines=10),
        gr.Textbox(label="Generated Text", lines=10),
        gr.Textbox(label="Hallucination Score")
    ],
    live=False,
    title="👋🏻Welcome to 🌟Team Tonic's 🧠🌈SureRAG🔴🟢",
    description="Nothing is more important than reputation. However you can create automated content pipelines for public facing content. How can businesses grow their reputation while mitigating risks due to AI? How it works : vectara rag retrieval reranking and summarization is used to return content. then an LLM generates content based on these returns. this content is checked for hallucination before being validated for publishing on twitter. SureRAG is fixed on Tonic-AI's README files as a Demo, provide input to generate a response. This response is checked by Vectara's HHME. Check out the model [vectara/hallucination_evaluation_model](https://huggingface.co/vectara/hallucination_evaluation_model) Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to 🌟 [DataTonic](https://github.com/Tonic-AI/DataTonic)",
)
iface.launch()