Spaces:
Runtime error
Runtime error
File size: 1,304 Bytes
9bf2007 dcd2d54 ea58bd6 9bf2007 5ed2b9f e48a0c0 7338a55 e5e2748 21e7dd1 fdc39d2 1720d8c 7338a55 3e6fc0f 1720d8c 724ddd6 72231f4 724ddd6 6254e11 724ddd6 21e7dd1 ea58bd6 21e7dd1 724ddd6 1720d8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import transformers
import torch
from fastapi import FastAPI, Response
from transformers import AutoModelForCausalLM, AutoTokenizer
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI()
MODEL = None
TOKENIZER = None
# ?input=%22Name%203%20shows%22
origins = ['https://aiforall.netlify.app/']
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
def llama():
# prompt = [{'role': 'user', 'content': ""+input}]
# inputs = TOKENIZER.apply_chat_template( prompt, add_generation_prompt=True, return_tensors='pt' )
# tokens = MODEL.generate( inputs.to(MODEL.device), max_new_tokens=1024, temperature=0.3, do_sample=True)
# tresponse = TOKENIZER.decode(tokens[0], skip_special_tokens=False)
# print(tresponse)
return Response(content="hello world", media_type="application/json")
# @app.on_event("startup")
# def init_model():
# global MODEL
# global TOKENIZER
# if not MODEL:
# print("loading model")
# TOKENIZER = AutoTokenizer.from_pretrained('stabilityai/stablelm-zephyr-3b')
# MODEL = AutoModelForCausalLM.from_pretrained('stabilityai/stablelm-zephyr-3b', device_map="auto")
# print("loaded model")
|