Spaces:
Runtime error
Runtime error
File size: 5,166 Bytes
db1f2ae 6ff6f98 db1f2ae a1eebfa db1f2ae c076928 db1f2ae 32b1759 db1f2ae 89ac6d5 db1f2ae 6fa1e27 db1f2ae 6fa1e27 db1f2ae 0e56a5a db1f2ae c8b989d db1f2ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import argparse
# from dataclasses import dataclass
from langchain_community.vectorstores import Chroma
#from langchain_openai import OpenAIEmbeddings
#from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
# a template by which the bot will answer the quetion according the "context" of
# the text that will be imported as context later, determines the information that the question should be answered according to.
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
---
Answer the question based on the above context: {question}
"""
# from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
# from langchain.embeddings import OpenAIEmbeddings
#from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import openai
from dotenv import load_dotenv
import os
import shutil
# a custom embedding
from sentence_transformers import SentenceTransformer
from langchain_experimental.text_splitter import SemanticChunker
from typing import List
class MyEmbeddings:
def __init__(self):
self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
def embed_documents(self, texts: List[str]) -> List[List[float]]:
return [self.model.encode(t).tolist() for t in texts]
def embed_query(self, query: str) -> List[float]:
return [self.model.encode([query])][0][0].tolist()
embeddings = MyEmbeddings()
splitter = SemanticChunker(embeddings)
CHROMA_PATH = "chroma8"
# call the chroma generated in a directory
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
import os
from huggingface_hub import login
# Retrieve the token from the environment variable
token = os.getenv('HF_Token')
# Log in using the token
login(token=token)
from transformers import AutoTokenizer
import transformers
import torch
model = "tiiuae/falcon-7b-instruct" # meta-llama/Llama-2-7b-chat-hf
tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)
from transformers import pipeline
llama_pipeline = pipeline(
"text-generation", # LLM task
model=model,
torch_dtype=torch.float16,
device_map="auto",
)
def get_response(prompt: str) -> None:
"""
Generate a response from the Llama model.
Parameters:
prompt (str): The user's input/question for the model.
Returns:
None: Prints the model's response.
"""
sequences = llama_pipeline(
prompt,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=256,
)
print("Chatbot:", sequences[0]['generated_text'])
template = """Answer the query based only the provided context, and if the answer is not contained within the context below, say "I don't knowwwww"
Context:
{context}
{query}""".strip()
from langchain.prompts import PromptTemplate
prompt_template = PromptTemplate(
input_variables=["query", "context"],
template=template
)
# Generate a response from the Llama model
def get_llama_response(message: str, history: list) -> str:
"""
Generates a conversational response from the Llama model.
Parameters:
message (str): User's input message.
history (list): Past conversation history.
Returns:
str: Generated response from the Llama model.
"""
print('messageeeeeeeeeeeeeee:',message)
#query = format_message(message, history)
response = ""
query = """
Answer the question based only on the following context. Dont provide any information out of the context:
{context}
---
Answer the question based on the above context: {question}
"""
#message='how does alice meet the mad hatter?'
######################
# Search the DB for similar documents to the query.
print('before searching inside the db')
results = db.similarity_search_with_relevance_scores(message, k=3)
if len(results) == 0 or results[0][1] < 0.5:
print(f"Unable to find matching results.")
print('after searchingf insidee the db')
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
#context_text="amin is a math student."
####################3
query = prompt_template.format(query=message, context=context_text)
#query=query.format(context=context_text,question=message)
print('im gonna generate response')
sequences = llama_pipeline(
query,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=1024,
)
print('igenerated response')
generated_text = sequences[0]['generated_text']
response = generated_text[len(query):] # Remove the prompt from the output
print("Chatbot:", response.strip())
print('i wanneea return')
return response.strip()
import gradio as gr
gr.ChatInterface(get_llama_response).launch() |