File size: 3,416 Bytes
35d085e
 
 
 
 
 
 
 
f441fbb
 
35d085e
f441fbb
 
 
35d085e
960bed2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c95fdfc
 
f441fbb
c03dd90
 
f441fbb
 
c03dd90
 
35d085e
 
f441fbb
 
 
 
 
 
 
35d085e
c03dd90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35d085e
 
 
f441fbb
 
 
 
 
 
 
 
 
 
 
35d085e
 
 
 
f441fbb
 
35d085e
 
 
f441fbb
 
 
 
 
 
 
9ac7986
f441fbb
 
 
 
 
35d085e
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
import requests
# from langchain.llms.huggingface_pipeline import HuggingFacePipeline

# API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fmistralai%2FMistral-7B-v0.1%26quot%3B%3C%2Fspan%3E
# headers = {"Authorization": f"Bearer {key}"}

# def query(payload):
# 	response = requests.post(API_URL, headers=headers, json=payload)
# 	return response.json()

# def LLM(llm_name, length):
#     print(llm_name)
#     tokenizer = AutoTokenizer.from_pretrained(llm_name)
#     model = AutoModelForCausalLM.from_pretrained(llm_name,
#                                                  trust_remote_code=True, 
#                                                  device_map="auto",
#                                                  load_in_8bit=True)
#     pipe = pipeline("text-generation",
#                     model=model,
#                     tokenizer=tokenizer,
#                     max_length=length,
#                     do_sample=True,
#                     top_p=0.95,
#                     repetition_penalty=1.2,
#                    )
#     return pipe
tokenizer = AutoTokenizer.from_pretrained('replit/replit-code-v1_5-3b', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained('replit/replit-code-v1_5-3b', trust_remote_code=True)

x = tokenizer.encode('def fibonacci(n): ', return_tensors='pt')
y = model.generate(x, max_length=100, do_sample=True, top_p=0.95, top_k=4, temperature=0.2, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)

# decoding
generated_code = tokenizer.decode(y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
print(generated_code)

pipe = generated_code
# tokenizer = AutoTokenizer.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
# base_model = AutoModelForCausalLM.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
# Mistral 7B
# mistral_llm = LLM("mistralai/Mistral-7B-v0.1",30000)
mistral_llm = pipe

# WizardCoder 13B
# wizard_llm = LLM("WizardLM/WizardCoder-Python-13B-V1.0",8000)
wizard_llm = pipe
# hf_llm = HuggingFacePipeline(pipeline=pipe)

def ask_model(model, prompt):
    if(model == 'mistral'):
        return mistral_llm(prompt)
    if(model == 'wizard'):
        return wizard_llm(prompt)






key = os.environ.get("huggingface_key")
openai_api_key = os.environ.get("openai_key")
app = FastAPI(openapi_url="/api/v1/LLM/openapi.json", docs_url="/api/v1/LLM/docs")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
    allow_credentials=True,
)


@app.get("/")
def root():
    return {"message": "R&D LLM API"}
    
# @app.get("/get")
# def get():
#     result = pipe("name 5 programming languages",do_sample=False)
#     print(result)
#     return {"message": result}








@app.post("/ask_llm")
async def ask_llm_endpoint(model:str, prompt: str):
    result = ask_model(model,prompt)
    return {"result": result}









# APIs

# @app.post("/ask_HFAPI")
# def ask_HFAPI_endpoint(prompt: str):
#     result = query(prompt)
#     return {"result": result}
    
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003", temperature=0.5, openai_api_key=openai_api_key)

@app.post("/ask_GPT")
def ask_GPT_endpoint(prompt: str):
    result = llm(prompt)
    return {"result": result}