LLM-dock / app.py
Hzqhssn's picture
initial push
c6e91d5
raw
history blame contribute delete
979 Bytes
from llama_cpp import Llama
# Initialize the LLM
llm = Llama.from_pretrained(
repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
filename="llama-3.2-1b-instruct-q4_k_m.gguf"
)
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from llama_cpp import Llama
# Initialize the LLM once when the application starts
llm = Llama.from_pretrained(
repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
filename="llama-3.2-1b-instruct-q4_k_m.gguf"
)
app = FastAPI()
class ChatRequest(BaseModel):
message: str
@app.post("/chat")
async def chat_completion(request: ChatRequest):
try:
response = llm.create_chat_completion(
messages=[
{"role": "user", "content": request.message}
]
)
return {
"response": response['choices'][0]['message']['content']
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))