Spaces:
Paused
Paused
File size: 2,813 Bytes
35d085e f441fbb 35d085e f441fbb 35d085e d98a703 fc15936 d98a703 c95fdfc f441fbb c03dd90 b6c24cf f441fbb c03dd90 2ef897f b6c24cf 35d085e f441fbb 2ef897f f441fbb 2ef897f f441fbb 35d085e c03dd90 35d085e f441fbb 35d085e f441fbb 35d085e f441fbb 9ac7986 f441fbb 35d085e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
import requests
# from langchain.llms.huggingface_pipeline import HuggingFacePipeline
# API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fmistralai%2FMistral-7B-v0.1%26quot%3B%3C%2Fspan%3E
# headers = {"Authorization": f"Bearer {key}"}
# def query(payload):
# response = requests.post(API_URL, headers=headers, json=payload)
# return response.json()
def LLM(llm_name, length):
print(llm_name)
tokenizer = AutoTokenizer.from_pretrained(llm_name)
model = AutoModelForCausalLM.from_pretrained(llm_name,trust_remote_code=True)
pipe = pipeline("text-generation",
model=model,
tokenizer=tokenizer,
max_length=length,
do_sample=True,
top_p=0.95,
repetition_penalty=1.2,
)
return pipe
# tokenizer = AutoTokenizer.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
# base_model = AutoModelForCausalLM.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
# Mistral 7B
# mistral_llm = LLM("mistralai/Mistral-7B-v0.1",30000)
# mistral_llm = LLM("microsoft/phi-2",2000)
# WizardCoder 13B
# wizard_llm = LLM("WizardLM/WizardCoder-Python-13B-V1.0",8000)
wizard_llm = LLM("WizardLM/WizardCoder-3B-V1.0",4000)
mistral_llm = wizard_llm
# hf_llm = HuggingFacePipeline(pipeline=pipe)
def ask_model(model, prompt):
if(model == 'mistral'):
result = mistral_llm(prompt)
return result
if(model == 'wizard'):
result = wizard_llm(prompt)
return result
key = os.environ.get("huggingface_key")
openai_api_key = os.environ.get("openai_key")
app = FastAPI(openapi_url="/api/v1/LLM/openapi.json", docs_url="/api/v1/LLM/docs")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
allow_credentials=True,
)
@app.get("/")
def root():
return {"message": "R&D LLM API"}
# @app.get("/get")
# def get():
# result = pipe("name 5 programming languages",do_sample=False)
# print(result)
# return {"message": result}
@app.post("/ask_llm")
async def ask_llm_endpoint(model:str, prompt: str):
result = ask_model(model,prompt)
return {"result": result}
# APIs
# @app.post("/ask_HFAPI")
# def ask_HFAPI_endpoint(prompt: str):
# result = query(prompt)
# return {"result": result}
from langchain.llms import OpenAI
llm = OpenAI(model_name="text-davinci-003", temperature=0.5, openai_api_key=openai_api_key)
@app.post("/ask_GPT")
def ask_GPT_endpoint(prompt: str):
result = llm(prompt)
return {"result": result} |