File size: 2,176 Bytes
5892bc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8bc147
 
 
5892bc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from langchain import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import SystemMessagePromptTemplate
from langchain_community.embeddings import SentenceTransformerEmbeddings
from fastapi import FastAPI, Request, Form, Response
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from fastapi.staticfiles import StaticFiles
from fastapi.encoders import jsonable_encoder
from qdrant_client import QdrantClient
from langchain_community.vectorstores import Qdrant
from src.prompt import *
from src.helper import download_hugging_face_embedding
import os
import json
import gradio as gr


local_llm = r"C:\Users\irath\Documents\docs\Projects\End_to_End_Medical_Chatbot_using_Langchain\End-to-End-Medical-Chatbot-using-Langchain\Model\qwen2-1.5b-medical_qa-finetune-q4_k_m.gguf"
llm = LlamaCpp(model_path = local_llm, temperature = 0.1, max_tokens=2048, top_p=1, n_ctx=2048)

embeddings = download_hugging_face_embedding()

url = "http://localhost:6333"
client = QdrantClient(url=url, prefer_grpc=False)
db = Qdrant(client=client, embeddings=embeddings, collection_name="vector_db")

retriever = db.as_retriever(search_kwargs={"k":1})

chat_history = []
if llm is not None and db is not None:
    chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever)
else:
    print("LLM or Vector Database not initialized")
    
def predict(message, history):
    history_langchain_format = []
    prompt = PromptTemplate(template=prompt_template, input_variables=["chat_history", "message"])
    
    response = chain({"question": message, "chat_history": chat_history})
    answer = response['answer']
    
    chat_history.append((message, answer))
    
    temp = []
    for input_question, bot_answer in history:
        temp.append(input_question)
        temp.append(bot_answer)
        history_langchain_format.append(temp)
    temp.clear()
    temp.append(message)
    temp.append(answer)
    history_langchain_format.append(temp)
    
    return answer

gr.ChatInterface(predict).launch()