Spaces:
Sleeping
Sleeping
dinhquangson
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,9 @@ from langchain.chat_models import ChatOpenAI
|
|
17 |
from langchain.memory import ConversationBufferMemory
|
18 |
from langchain.chains import ConversationalRetrievalChain
|
19 |
from htmlTemplates import css, bot_template, user_template
|
20 |
-
from langchain.llms import HuggingFaceHub
|
|
|
|
|
21 |
|
22 |
|
23 |
def get_pdf_pages(pdf_docs):
|
@@ -108,11 +110,28 @@ def get_conversation_chain(vectorstore):
|
|
108 |
ConversationalRetrievalChain
|
109 |
A conversational retrieval chain for generating responses.
|
110 |
|
111 |
-
|
112 |
llm = HuggingFaceHub(
|
113 |
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
114 |
model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
|
115 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
# llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
|
117 |
|
118 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
|
|
17 |
from langchain.memory import ConversationBufferMemory
|
18 |
from langchain.chains import ConversationalRetrievalChain
|
19 |
from htmlTemplates import css, bot_template, user_template
|
20 |
+
#from langchain.llms import HuggingFaceHub
|
21 |
+
from llama_index.llms import LlamaCPP
|
22 |
+
|
23 |
|
24 |
|
25 |
def get_pdf_pages(pdf_docs):
|
|
|
110 |
ConversationalRetrievalChain
|
111 |
A conversational retrieval chain for generating responses.
|
112 |
|
113 |
+
|
114 |
llm = HuggingFaceHub(
|
115 |
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
116 |
model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
|
117 |
)
|
118 |
+
"""
|
119 |
+
llm = LlamaCPP(
|
120 |
+
model_url=None, # We'll load locally.
|
121 |
+
# Trying small version of an already small model
|
122 |
+
model_path='phi-2.Q4_K_M.gguf',
|
123 |
+
temperature=0.1,
|
124 |
+
max_new_tokens=512,
|
125 |
+
context_window=2048, # Phi-2 2K context window - this could be a limitation for RAG as it has to put the content into this context window
|
126 |
+
generate_kwargs={},
|
127 |
+
# set to at least 1 to use GPU
|
128 |
+
# This is small model and there's no indication of layers offloaded to the GPU
|
129 |
+
model_kwargs={"n_gpu_layers": 32},
|
130 |
+
messages_to_prompt=messages_to_prompt,
|
131 |
+
completion_to_prompt=completion_to_prompt,
|
132 |
+
verbose=True
|
133 |
+
)
|
134 |
+
|
135 |
# llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
|
136 |
|
137 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|