File size: 5,166 Bytes
db1f2ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ff6f98
db1f2ae
 
 
 
a1eebfa
 
 
 
 
 
 
 
db1f2ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c076928
db1f2ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32b1759
db1f2ae
 
 
89ac6d5
db1f2ae
 
 
 
 
 
 
 
6fa1e27
db1f2ae
 
 
 
 
 
 
 
 
6fa1e27
db1f2ae
 
 
 
0e56a5a
db1f2ae
 
 
 
 
c8b989d
db1f2ae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import argparse
# from dataclasses import dataclass
from langchain_community.vectorstores import Chroma
#from langchain_openai import OpenAIEmbeddings
#from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate


# a template by which the bot will answer the quetion according the "context" of
# the text that will be imported as context later, determines the information that the question should be answered according to.
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""
# from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
# from langchain.embeddings import OpenAIEmbeddings
#from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import openai
from dotenv import load_dotenv
import os
import shutil
# a custom embedding
from sentence_transformers import SentenceTransformer
from langchain_experimental.text_splitter import SemanticChunker
from typing import List


class MyEmbeddings:
    def __init__(self):
        self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [self.model.encode(t).tolist() for t in texts]
    def embed_query(self, query: str) -> List[float]:
            return [self.model.encode([query])][0][0].tolist()


embeddings = MyEmbeddings()

splitter = SemanticChunker(embeddings)
CHROMA_PATH = "chroma8"
# call the chroma generated in a directory
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)


import os
from huggingface_hub import login

# Retrieve the token from the environment variable
token = os.getenv('HF_Token')

# Log in using the token
login(token=token)
from transformers import AutoTokenizer
import transformers
import torch

model = "tiiuae/falcon-7b-instruct" # meta-llama/Llama-2-7b-chat-hf

tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)




from transformers import pipeline

llama_pipeline = pipeline(
    "text-generation",  # LLM task
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)



def get_response(prompt: str) -> None:
    """
    Generate a response from the Llama model.

    Parameters:
        prompt (str): The user's input/question for the model.

    Returns:
        None: Prints the model's response.
    """
    sequences = llama_pipeline(
        prompt,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=256,
    )
    print("Chatbot:", sequences[0]['generated_text'])




template = """Answer the query based only the provided context, and if the answer is not contained within the context below, say "I don't knowwwww"

Context:
{context}

{query}""".strip()

from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate(
    input_variables=["query", "context"],
    template=template
)











# Generate a response from the Llama model
def get_llama_response(message: str, history: list) -> str:
    """
    Generates a conversational response from the Llama model.

    Parameters:
        message (str): User's input message.
        history (list): Past conversation history.

    Returns:
        str: Generated response from the Llama model.
    """
    print('messageeeeeeeeeeeeeee:',message)
    #query = format_message(message, history)
    response = ""

    query = """
    Answer the question based only on the following context. Dont provide any information out of the context:

    {context}

    ---

    Answer the question based on the above context: {question}
    """

    #message='how does alice meet the mad hatter?'
    ######################
    # Search the DB for similar documents to the query.
    print('before searching inside the db')
    results = db.similarity_search_with_relevance_scores(message, k=3)
    if len(results) == 0 or results[0][1] < 0.5:
        print(f"Unable to find matching results.")
    print('after searchingf insidee the db')


    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    #context_text="amin is a math student."
    ####################3

    query = prompt_template.format(query=message, context=context_text)
    #query=query.format(context=context_text,question=message)
    print('im gonna generate response')
    sequences = llama_pipeline(
        query,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=1024,
    )

    print('igenerated response')
    generated_text = sequences[0]['generated_text']
    response = generated_text[len(query):]  # Remove the prompt from the output

    print("Chatbot:", response.strip())
    print('i wanneea return')
    return response.strip()



import gradio as gr

gr.ChatInterface(get_llama_response).launch()