BitNet.cpp

Sleeping

MekkCyber commited on Oct 28, 2024

Commit

21e7b6b

1 Parent(s): a6b0228

update

Files changed (3) hide show

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ models

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 title: BitNet.cpp
 emoji: 💻
 colorFrom: blue
-colorTo: red
 sdk: docker
 app_file: app.py
 app_port: 7860

 title: BitNet.cpp
 emoji: 💻
 colorFrom: blue
+colorTo: white
 sdk: docker
 app_file: app.py
 app_port: 7860

app.py CHANGED Viewed

@@ -36,6 +36,8 @@ def setup_bitnet(model_name):
 def run_inference(model_name, input_text, num_tokens=6):
     try:
         # Call the `run_inference.py` script with the model and input
         start_time = time.time()
         if input_text is None :
             return "Please provide an input text for the model"
@@ -61,8 +63,9 @@ def run_transformers(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken
     # if oauth_token is None :
     #     return "Error : To Compare please login to your HF account and make sure you have access to the used Llama models"
     # Load the model and tokenizer dynamically if needed (commented out for performance)
-    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=oauth_token.token)
-    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=oauth_token.token)
     if input_text is None :
         return "Please provide an input text for the model", None
     # Encode the input text
@@ -145,14 +148,14 @@ demo = interface()
 # fastapi_app.add_middleware(SessionMiddleware, secret_key="secret_key")  # Use a secure, random secret key
 # # Launch the app
-# demo.launch()
-from fastapi import FastAPI
-app = FastAPI()
-# Add SessionMiddleware for sessions handling
-app.add_middleware(SessionMiddleware, secret_key="secure_secret_key")
-# Mount Gradio app to FastAPI at the root
-app.mount("/", demo)

 def run_inference(model_name, input_text, num_tokens=6):
     try:
         # Call the `run_inference.py` script with the model and input
+        model_name = model_name.split("/")[1]
         start_time = time.time()
         if input_text is None :
             return "Please provide an input text for the model"
     # if oauth_token is None :
     #     return "Error : To Compare please login to your HF account and make sure you have access to the used Llama models"
     # Load the model and tokenizer dynamically if needed (commented out for performance)
+    if model_name=="TinyLlama/TinyLlama-1.1B-Chat-v1.0" :
+        tokenizer = AutoTokenizer.from_pretrained('./models/tinyllama')
+        model = AutoModelForCausalLM.from_pretrained('./models/tinyllama')
     if input_text is None :
         return "Please provide an input text for the model", None
     # Encode the input text
 # fastapi_app.add_middleware(SessionMiddleware, secret_key="secret_key")  # Use a secure, random secret key
 # # Launch the app
+demo.launch()
+# from fastapi import FastAPI
+# app = FastAPI()
+# # Add SessionMiddleware for sessions handling
+# app.add_middleware(SessionMiddleware, secret_key="secure_secret_key")
+# # Mount Gradio app to FastAPI at the root
+# app.mount("/", demo)