MekkCyber commited on
Commit
21e7b6b
·
1 Parent(s): a6b0228
Files changed (3) hide show
  1. .gitignore +1 -0
  2. README.md +1 -1
  3. app.py +12 -9
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ models
README.md CHANGED
@@ -2,7 +2,7 @@
2
  title: BitNet.cpp
3
  emoji: 💻
4
  colorFrom: blue
5
- colorTo: red
6
  sdk: docker
7
  app_file: app.py
8
  app_port: 7860
 
2
  title: BitNet.cpp
3
  emoji: 💻
4
  colorFrom: blue
5
+ colorTo: white
6
  sdk: docker
7
  app_file: app.py
8
  app_port: 7860
app.py CHANGED
@@ -36,6 +36,8 @@ def setup_bitnet(model_name):
36
  def run_inference(model_name, input_text, num_tokens=6):
37
  try:
38
  # Call the `run_inference.py` script with the model and input
 
 
39
  start_time = time.time()
40
  if input_text is None :
41
  return "Please provide an input text for the model"
@@ -61,8 +63,9 @@ def run_transformers(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken
61
  # if oauth_token is None :
62
  # return "Error : To Compare please login to your HF account and make sure you have access to the used Llama models"
63
  # Load the model and tokenizer dynamically if needed (commented out for performance)
64
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=oauth_token.token)
65
- model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=oauth_token.token)
 
66
  if input_text is None :
67
  return "Please provide an input text for the model", None
68
  # Encode the input text
@@ -145,14 +148,14 @@ demo = interface()
145
  # fastapi_app.add_middleware(SessionMiddleware, secret_key="secret_key") # Use a secure, random secret key
146
 
147
  # # Launch the app
148
- # demo.launch()
149
 
150
- from fastapi import FastAPI
151
 
152
- app = FastAPI()
153
 
154
- # Add SessionMiddleware for sessions handling
155
- app.add_middleware(SessionMiddleware, secret_key="secure_secret_key")
156
 
157
- # Mount Gradio app to FastAPI at the root
158
- app.mount("/", demo)
 
36
  def run_inference(model_name, input_text, num_tokens=6):
37
  try:
38
  # Call the `run_inference.py` script with the model and input
39
+
40
+ model_name = model_name.split("/")[1]
41
  start_time = time.time()
42
  if input_text is None :
43
  return "Please provide an input text for the model"
 
63
  # if oauth_token is None :
64
  # return "Error : To Compare please login to your HF account and make sure you have access to the used Llama models"
65
  # Load the model and tokenizer dynamically if needed (commented out for performance)
66
+ if model_name=="TinyLlama/TinyLlama-1.1B-Chat-v1.0" :
67
+ tokenizer = AutoTokenizer.from_pretrained('./models/tinyllama')
68
+ model = AutoModelForCausalLM.from_pretrained('./models/tinyllama')
69
  if input_text is None :
70
  return "Please provide an input text for the model", None
71
  # Encode the input text
 
148
  # fastapi_app.add_middleware(SessionMiddleware, secret_key="secret_key") # Use a secure, random secret key
149
 
150
  # # Launch the app
151
+ demo.launch()
152
 
153
+ # from fastapi import FastAPI
154
 
155
+ # app = FastAPI()
156
 
157
+ # # Add SessionMiddleware for sessions handling
158
+ # app.add_middleware(SessionMiddleware, secret_key="secure_secret_key")
159
 
160
+ # # Mount Gradio app to FastAPI at the root
161
+ # app.mount("/", demo)