File size: 2,813 Bytes
35d085e
 
 
 
 
 
 
 
f441fbb
 
35d085e
f441fbb
 
 
35d085e
d98a703
 
 
fc15936
d98a703
 
 
 
 
 
 
 
 
 
 
c95fdfc
 
f441fbb
c03dd90
b6c24cf
f441fbb
 
c03dd90
2ef897f
b6c24cf
35d085e
 
f441fbb
 
2ef897f
 
f441fbb
2ef897f
 
f441fbb
 
35d085e
c03dd90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35d085e
 
 
f441fbb
 
 
 
 
 
 
 
 
 
 
35d085e
 
 
 
f441fbb
 
35d085e
 
 
f441fbb
 
 
 
 
 
 
9ac7986
f441fbb
 
 
 
 
35d085e
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
import requests
# from langchain.llms.huggingface_pipeline import HuggingFacePipeline

# API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fmistralai%2FMistral-7B-v0.1%26quot%3B%3C%2Fspan%3E
# headers = {"Authorization": f"Bearer {key}"}

# def query(payload):
# 	response = requests.post(API_URL, headers=headers, json=payload)
# 	return response.json()

def LLM(llm_name, length):
    print(llm_name)
    tokenizer = AutoTokenizer.from_pretrained(llm_name)
    model = AutoModelForCausalLM.from_pretrained(llm_name,trust_remote_code=True)                                                
    pipe = pipeline("text-generation",
                    model=model,
                    tokenizer=tokenizer,
                    max_length=length,
                    do_sample=True,
                    top_p=0.95,
                    repetition_penalty=1.2,
                   )
    return pipe


# tokenizer = AutoTokenizer.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
# base_model = AutoModelForCausalLM.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
# Mistral 7B
# mistral_llm = LLM("mistralai/Mistral-7B-v0.1",30000)
# mistral_llm = LLM("microsoft/phi-2",2000)

# WizardCoder 13B
# wizard_llm = LLM("WizardLM/WizardCoder-Python-13B-V1.0",8000)
wizard_llm =  LLM("WizardLM/WizardCoder-3B-V1.0",4000)
mistral_llm = wizard_llm
# hf_llm = HuggingFacePipeline(pipeline=pipe)

def ask_model(model, prompt):
    if(model == 'mistral'):
        result = mistral_llm(prompt)
        return result
    if(model == 'wizard'):
         result =  wizard_llm(prompt)
         return result






key = os.environ.get("huggingface_key")
openai_api_key = os.environ.get("openai_key")
app = FastAPI(openapi_url="/api/v1/LLM/openapi.json", docs_url="/api/v1/LLM/docs")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
    allow_credentials=True,
)


@app.get("/")
def root():
    return {"message": "R&D LLM API"}
    
# @app.get("/get")
# def get():
#     result = pipe("name 5 programming languages",do_sample=False)
#     print(result)
#     return {"message": result}








@app.post("/ask_llm")
async def ask_llm_endpoint(model:str, prompt: str):
    result = ask_model(model,prompt)
    return {"result": result}









# APIs

# @app.post("/ask_HFAPI")
# def ask_HFAPI_endpoint(prompt: str):
#     result = query(prompt)
#     return {"result": result}
    
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003", temperature=0.5, openai_api_key=openai_api_key)

@app.post("/ask_GPT")
def ask_GPT_endpoint(prompt: str):
    result = llm(prompt)
    return {"result": result}