File size: 1,304 Bytes
9bf2007
 
dcd2d54
ea58bd6
9bf2007
5ed2b9f
e48a0c0
7338a55
 
e5e2748
21e7dd1
 
 
fdc39d2
1720d8c
7338a55
 
 
 
 
 
 
 
 
 
3e6fc0f
1720d8c
724ddd6
 
72231f4
724ddd6
6254e11
724ddd6
 
21e7dd1
ea58bd6
21e7dd1
724ddd6
 
 
 
 
 
 
 
1720d8c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import transformers
import torch

from fastapi import FastAPI, Response

from transformers import AutoModelForCausalLM, AutoTokenizer

from fastapi.middleware.cors import CORSMiddleware

app = FastAPI()

MODEL = None
TOKENIZER = None
# ?input=%22Name%203%20shows%22
origins = ['https://aiforall.netlify.app/']

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


@app.get("/")
def llama():
    # prompt = [{'role': 'user', 'content': ""+input}]
    # inputs = TOKENIZER.apply_chat_template( prompt, add_generation_prompt=True,     return_tensors='pt' )

    # tokens = MODEL.generate( inputs.to(MODEL.device), max_new_tokens=1024, temperature=0.3, do_sample=True)

    # tresponse = TOKENIZER.decode(tokens[0], skip_special_tokens=False)
    # print(tresponse)

    return Response(content="hello world", media_type="application/json")

# @app.on_event("startup")
# def init_model():
#     global MODEL
#     global TOKENIZER
#     if not MODEL:
#         print("loading model")
#         TOKENIZER = AutoTokenizer.from_pretrained('stabilityai/stablelm-zephyr-3b')
#         MODEL = AutoModelForCausalLM.from_pretrained('stabilityai/stablelm-zephyr-3b', device_map="auto")
#         print("loaded model")