Spaces:

AminFaraji
/

FirstSpace

Runtime error

App Files Files Community

AminFaraji commited on Oct 8, 2024

Commit

a4522e9

verified ·

1 Parent(s): 8317bae

Create app.py

Browse files

Files changed (1) hide show

app.py +289 -0

app.py ADDED Viewed

	@@ -0,0 +1,289 @@

+print(55877)
+import argparse
+# from dataclasses import dataclass
+from langchain.prompts import ChatPromptTemplate
+try:
+  from langchain_community.vectorstores import Chroma
+except:
+  from langchain_community.vectorstores import Chroma
+#from langchain_openai import OpenAIEmbeddings
+#from langchain_openai import ChatOpenAI
+# from langchain.document_loaders import DirectoryLoader
+from langchain_community.document_loaders import DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.schema import Document
+# from langchain.embeddings import OpenAIEmbeddings
+#from langchain_openai import OpenAIEmbeddings
+from langchain_community.vectorstores import Chroma
+import openai
+from dotenv import load_dotenv
+import os
+import shutil
+import re
+import warnings
+from typing import List
+import torch
+from langchain import PromptTemplate
+from langchain.chains import ConversationChain
+from langchain.chains.conversation.memory import ConversationBufferWindowMemory
+from langchain.llms import HuggingFacePipeline
+from langchain.schema import BaseOutputParser
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    StoppingCriteria,
+    StoppingCriteriaList,
+    pipeline,
+)
+warnings.filterwarnings("ignore", category=UserWarning)
+MODEL_NAME = "tiiuae/falcon-7b-instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME, trust_remote_code=True, device_map="auto",offload_folder="offload"
+)
+model = model.eval()
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+print(f"Model device: {model.device}")
+# a custom embedding
+from sentence_transformers import SentenceTransformer
+from langchain_experimental.text_splitter import SemanticChunker
+from typing import List
+class MyEmbeddings:
+    def __init__(self):
+        self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+        #self.model=model
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        return [self.model.encode(t).tolist() for t in texts]
+    def embed_query(self, query: str) -> List[float]:
+            return [self.model.encode([query])][0][0].tolist()
+embeddings = MyEmbeddings()
+splitter = SemanticChunker(embeddings)
+# Create CLI.
+#parser = argparse.ArgumentParser()
+#parser.add_argument("query_text", type=str, help="The query text.")
+#args = parser.parse_args()
+#query_text = args.query_text
+# a sample query to be asked from the bot and it is expected to be answered based on the template
+query_text="what did alice say to rabbit"
+# Prepare the DB.
+#embedding_function = OpenAIEmbeddings() # main
+CHROMA_PATH = "chroma8"
+# call the chroma generated in a directory
+db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
+# Search the DB for similar documents to the query.
+results = db.similarity_search_with_relevance_scores(query_text, k=2)
+if len(results) == 0 or results[0][1] < 0.5:
+    print(f"Unable to find matching results.")
+context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
+prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
+prompt = prompt_template.format(context=context_text, question=query_text)
+print(prompt)
+generation_config = model.generation_config
+generation_config.temperature = 0
+generation_config.num_return_sequences = 1
+generation_config.max_new_tokens = 256
+generation_config.use_cache = False
+generation_config.repetition_penalty = 1.7
+generation_config.pad_token_id = tokenizer.eos_token_id
+generation_config.eos_token_id = tokenizer.eos_token_id
+generation_config
+prompt = """
+The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
+Current conversation:
+Human: Who is Dwight K Schrute?
+AI:
+""".strip()
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+input_ids = input_ids.to(model.device)
+class StopGenerationCriteria(StoppingCriteria):
+    def __init__(
+        self, tokens: List[List[str]], tokenizer: AutoTokenizer, device: torch.device
+    ):
+        stop_token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens]
+        self.stop_token_ids = [
+            torch.tensor(x, dtype=torch.long, device=device) for x in stop_token_ids
+        ]
+    def __call__(
+        self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+    ) -> bool:
+        for stop_ids in self.stop_token_ids:
+            if torch.eq(input_ids[0][-len(stop_ids) :], stop_ids).all():
+                return True
+        return False
+stop_tokens = [["Human", ":"], ["AI", ":"]]
+stopping_criteria = StoppingCriteriaList(
+    [StopGenerationCriteria(stop_tokens, tokenizer, model.device)]
+)
+generation_pipeline = pipeline(
+    model=model,
+    tokenizer=tokenizer,
+    return_full_text=True,
+    task="text-generation",
+    stopping_criteria=stopping_criteria,
+    generation_config=generation_config,
+)
+llm = HuggingFacePipeline(pipeline=generation_pipeline)
+# propably sets the number of previous conversation history to take into account for new answers
+template = """
+The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
+Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
+Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.
+Current conversation:
+{history}
+Human: {input}
+AI:""".strip()
+prompt = PromptTemplate(input_variables=["history", "input"], template=template)
+memory = ConversationBufferWindowMemory(
+    memory_key="history", k=6, return_only_outputs=True
+)
+chain = ConversationChain(llm=llm, memory=memory, prompt=prompt, verbose=True)
+class CleanupOutputParser(BaseOutputParser):
+    def parse(self, text: str) -> str:
+        user_pattern = r"\nUser"
+        text = re.sub(user_pattern, "", text)
+        human_pattern = r"\nHuman:"
+        text = re.sub(human_pattern, "", text)
+        ai_pattern = r"\nAI:"
+        return re.sub(ai_pattern, "", text).strip()
+    @property
+    def _type(self) -> str:
+        return "output_parser"
+class CleanupOutputParser(BaseOutputParser):
+    def parse(self, text: str) -> str:
+        user_pattern = r"\nUser"
+        text = re.sub(user_pattern, "", text)
+        human_pattern = r"\nquestion:"
+        text = re.sub(human_pattern, "", text)
+        ai_pattern = r"\nanswer:"
+        return re.sub(ai_pattern, "", text).strip()
+    @property
+    def _type(self) -> str:
+        return "output_parser"
+template = """
+The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
+Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
+Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.
+Current conversation:
+{history}
+Human: {input}
+AI:""".strip()
+prompt = PromptTemplate(input_variables=["history", "input"], template=template)
+memory = ConversationBufferWindowMemory(
+    memory_key="history", k=3, return_only_outputs=True
+)
+chain = ConversationChain(
+    llm=llm,
+    memory=memory,
+    prompt=prompt,
+    output_parser=CleanupOutputParser(),
+    verbose=True,
+)
+# Generate a response from the Llama model
+def get_llama_response(message: str, history: list) -> str:
+    """
+    Generates a conversational response from the Llama model.
+    Parameters:
+        message (str): User's input message.
+        history (list): Past conversation history.
+    Returns:
+        str: Generated response from the Llama model.
+    """
+    query_text =message
+    results = db.similarity_search_with_relevance_scores(query_text, k=2)
+    if len(results) == 0 or results[0][1] < 0.5:
+        print(f"Unable to find matching results.")
+    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results ])
+    template = """
+    The following is a conversation between a human an AI. Answer  question based only on the conversation.
+    Current conversation:
+    {history}
+    """
+    s="""
+    \n question: {input}
+    \n answer:""".strip()
+    prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+'\n'+s)
+    #print(template)
+    chain.prompt=prompt
+    res = chain.predict(input=query_text)
+    return res
+        #return response.strip()
+import gradio as gr
+iface = gr.Interface(fn=get_llama_response, inputs="text", outputs="text")
+iface.launch(share=True)