Spaces:

kiliango
/

MY_ASSISTANT_API

Running

App Files Files Community

Ilyas KHIAT commited on Oct 8, 2024

Commit

716ce63

1 Parent(s): e08f8de

app api files

Browse files

Files changed (7) hide show

.dockerignore +11 -0
.gitignore +2 -0
Dockerfile +13 -0
main.py +125 -0
prompt.py +14 -0
rag.py +84 -0
requirements.txt +15 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,11 @@

+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+venv/
+.git
+.dockerignore
+Dockerfile
+*.md

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__/
2	+ .env

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.12
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from fastapi import FastAPI, HTTPException, UploadFile, File,Request,Depends,status
+from fastapi.security import OAuth2PasswordBearer
+from pydantic import BaseModel, Json
+from typing import Optional
+from pinecone import Pinecone, ServerlessSpec
+from uuid import uuid4
+import os
+from dotenv import load_dotenv
+from rag import *
+from fastapi.responses import StreamingResponse
+import json
+from prompt import *
+from typing import Literal
+import time
+from fastapi.middleware.cors import CORSMiddleware
+load_dotenv()
+## setup pinecone index
+pinecone_api_key = os.environ.get("PINECONE_API_KEY")
+pc = Pinecone(api_key=pinecone_api_key)
+index_name = os.environ.get("INDEX_NAME") # change if desired
+existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]
+if index_name not in existing_indexes:
+    pc.create_index(
+        name=index_name,
+        dimension=1536,
+        metric="cosine",
+        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
+    )
+    while not pc.describe_index(index_name).status["ready"]:
+        time.sleep(1)
+index = pc.Index(index_name)
+vector_store = PineconeVectorStore(index=index, embedding=embedding)
+## setup authorization
+api_keys = [os.environ.get("FASTAPI_API_KEY")]
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")  # use token authentication
+def api_key_auth(api_key: str = Depends(oauth2_scheme)):
+    if api_key not in api_keys:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Forbidden"
+        )
+dev_mode = os.environ.get("DEV")
+if dev_mode == "True":
+    app = FastAPI()
+else:
+    app = FastAPI(dependencies=[Depends(api_key_auth)])
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
+class UserInput(BaseModel):
+    query: str
+    stream: Optional[bool] = False
+    messages: Optional[list[dict]] = []
+class ChunkToDB(BaseModel):
+    message: str
+    title: str
+@app.post("/add_chunk_to_db")
+async def add_chunk_to_db(chunk: ChunkToDB):
+    try:
+        title = chunk.title
+        message = chunk.message
+        return get_vectorstore(text_chunk=message,index=index,title=title)
+    except Exception as e:
+        return {"message": str(e)}
+@app.get("/list_vectors")
+async def list_vectors():
+    try:
+        return index.list()
+    except Exception as e:
+        return {"message": str(e)}
+@app.post("/generate")
+async def generate(user_input: UserInput):
+    try:
+        print(user_input.stream,user_input.query)
+        if user_input.stream:
+            return StreamingResponse(generate_stream(user_input.query,user_input.messages,index_name=index,stream=True,vector_store=vector_store),media_type="application/json")
+        else:
+            return generate_stream(user_input.query,user_input.messages,index_name=index,stream=False,vector_store=vector_store)
+    except Exception as e:
+        return {"message": str(e)}
+@app.post("/retreive_context")
+async def retreive_context_response(query: str):
+    try:
+        return retreive_context(index=index,query=query)
+    except Exception as e:
+        return {"message": str(e)}
+@app.delete("/delete_vector")
+async def delete_vector(filename_id: str):
+    try:
+        return index.delete(ids=[filename_id])
+    except Exception as e:
+        return {"message": str(e)}
+@app.get("/check_server")
+async def check_server():
+    return {"message":"Server is running"}
+@app.get("/")
+async def read_root():
+    return {"message":"Welcome to the AI API"}

prompt.py ADDED Viewed

	@@ -0,0 +1,14 @@

+template = '''
+You are an AI assistant for Ilyas Khiat, a future engineer with a major in AI, and software engineering. Your job is to respond to visitors in a persuasive, concise, and brilliant way, always making Ilyas' profile stand out. Your responses must highlight his technical expertise, projects, and how he adds value to potential employers, plus soft skills. Always provide necessary links (e.g., LinkedIn: https://www.linkedin.com/in/ilyas-khiat-148a73254/, github: https://github.com/Ilyas-Khiat , projects, hobbies) to substantiate the information. Ensure your tone is pleasant, engaging, and matches the language of the user's query. The goal is to convince recruiters that Ilyas is the best fit for their business needs.
+The context retreived from the user is:
+{context}
+The history of the conversation is:
+{history}
+The user's query is:
+{query}
+Please respond to the user's query in a  consis way  and well formatted markdown with paragraphs and emojis that highlights Ilyas' technical expertise, projects, and how he adds value to potential employers, plus soft skills. Add life to your answer and emphasize keywords with bold, make it short in no more than 150 words or 200 tokens. Ensure your tone is pleasant, engaging, and matches the language of the user's query and your responce is not bluffing and exaggerating but proffesionnal and short and straight to the goal. The goal is to convince recruiters that Ilyas is the best fit for their business needs.
+'''

rag.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_pinecone import PineconeVectorStore
+from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import PromptTemplate
+from uuid import uuid4
+from prompt import *
+from pydantic import BaseModel, Field
+from dotenv import load_dotenv
+import os
+from langchain_core.tools import tool
+import unicodedata
+load_dotenv()
+index_name = os.environ.get("INDEX_NAME")
+# Global initialization
+embedding_model = "text-embedding-3-small"
+embedding = OpenAIEmbeddings(model=embedding_model)
+vector_store = PineconeVectorStore(index=index_name, embedding=embedding)
+def get_vectorstore(text_chunk,index,title,model="text-embedding-3-small"):
+    try:
+        embedding = OpenAIEmbeddings(model=model)
+        print("loaded embedding")
+        vector_store = PineconeVectorStore(index=index, embedding=embedding)
+        print("loaded vector store")
+        document = Document(
+        page_content=text_chunk,
+        metadata={"title": title}
+        )
+        print("loaded document")
+        uuid = f"{title}_{uuid4()}"
+        vector_store.add_documents(documents=[document], ids=[uuid])
+        print("added document")
+        return {"filename_id":uuid}
+    except Exception as e:
+        print(e)
+        return False
+def retreive_context(query:str,index:str, model="text-embedding-3-small",vector_store=None):
+    try:
+        #vector_store = PineconeVectorStore(index=index, embedding=embedding)
+        retriever = vector_store.as_retriever(
+                search_type="similarity_score_threshold",
+                search_kwargs={"k": 3, "score_threshold": 0.5},
+            )
+        return retriever.invoke(query)
+    except Exception as e:
+        print(e)
+        return False
+llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=300, temperature=0.5)
+def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 0.5,index_name="",stream=True,vector_store=None):
+    try:
+        print("init chat")
+        print("init template")
+        prompt = PromptTemplate.from_template(template)
+        print("retreiving context")
+        context = retreive_context(query=query,index=index_name,vector_store=vector_store)
+        print(f"Context: {context}")
+        llm_chain = prompt | llm | StrOutputParser()
+        print("streaming")
+        if stream:
+            return llm_chain.stream({"context":context,"history":messages,"query":query})
+        else:
+            return llm_chain.invoke({"context":context,"history":messages,"query":query})
+    except Exception as e:
+        print(e)
+        return False

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+fastapi
+uvicorn
+python-multipart
+pydantic
+langchain-pinecone
+pinecone-notebooks
+pinecone-client[grpc]
+async-timeout
+pymupdf
+python-dotenv
+typing-extensions
+langchain
+langchain-openai
+langchain-community
+langchain-pinecone