MekkCyber commited on
Commit
ec1b552
·
1 Parent(s): 74e1421

initial_commit

Browse files
Files changed (5) hide show
  1. Dockerfile +49 -0
  2. README.md +15 -3
  3. app.py +135 -0
  4. requirements.txt +6 -0
  5. setup.sh +2 -0
Dockerfile ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim
3
+
4
+ # Install git as root
5
+ RUN apt-get update && \
6
+ apt-get install -y wget gnupg nmap && \
7
+ # Add the LLVM repository for clang
8
+ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
9
+ echo "deb http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-15 main" > /etc/apt/sources.list.d/llvm-toolchain.list && \
10
+ apt-get update && \
11
+ apt-get install -y git cmake clang && \
12
+ rm -rf /var/lib/apt/lists/*
13
+
14
+ # Create a non-root user (do this as root)
15
+ RUN useradd -ms /bin/bash myuser
16
+
17
+ # Set the working directory as root before switching users
18
+ WORKDIR /home/myuser/app
19
+
20
+ # Copy files and change permissions as root
21
+ COPY requirements.txt /home/myuser/app/requirements.txt
22
+ COPY setup.sh /home/myuser/app/setup.sh
23
+ RUN chmod +x /home/myuser/app/setup.sh
24
+
25
+ # Install Python dependencies as root
26
+ RUN pip install --no-cache-dir -r /home/myuser/app/requirements.txt
27
+
28
+ # Run setup.sh as root (if it needs elevated privileges)
29
+ RUN /home/myuser/app/setup.sh
30
+
31
+ # Change ownership of the directory to myuser
32
+ RUN chown -R myuser:myuser /home/myuser/app
33
+
34
+ # Switch to the non-root user
35
+ USER myuser
36
+
37
+
38
+ RUN ls /home/myuser/app
39
+ # Copy the rest of the application code to the container (as myuser)
40
+ COPY . /home/myuser/app
41
+
42
+ # Expose the necessary port
43
+ EXPOSE 7860
44
+
45
+ # Set environment variable for Gradio
46
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
47
+
48
+ # Command to run the application
49
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,22 @@
1
  ---
2
  title: BitNet.cpp
3
- emoji: 🌖
4
  colorFrom: blue
5
- colorTo: blue
6
- sdk: docker
 
 
7
  pinned: false
 
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: BitNet.cpp
3
+ emoji: 💻
4
  colorFrom: blue
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 4.27.0
8
+ app_file: app.py
9
  pinned: false
10
+
11
+ hf_oauth: true
12
+ # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
13
+ hf_oauth_expiration_minutes: 480
14
+ # optional, see "Scopes" below. "openid profile" is always included.
15
+ hf_oauth_scopes:
16
+ - read-repos
17
+ - write-repos
18
+ - manage-repos
19
+ - inference-api
20
  ---
21
 
22
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import os
4
+ import time
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+ import logging
7
+
8
+ # Configure logging
9
+ logging.basicConfig(level=logging.INFO)
10
+
11
+ # Path to the cloned repository
12
+ BITNET_REPO_PATH = "/home/myuser/app/BitNet"
13
+ SETUP_SCRIPT = os.path.join(BITNET_REPO_PATH, "setup_env.py")
14
+ INFERENCE_SCRIPT = os.path.join(BITNET_REPO_PATH, "run_inference.py")
15
+
16
+ # Function to set up the environment by running setup.py
17
+ def setup_bitnet(model_name):
18
+ try:
19
+ result = subprocess.run(
20
+ f"python {SETUP_SCRIPT} --hf-repo {model_name} -q i2_s",
21
+ shell=True,
22
+ cwd=BITNET_REPO_PATH,
23
+ capture_output=True,
24
+ text=True
25
+ )
26
+ if result.returncode == 0:
27
+ return "Setup completed successfully!"
28
+ else:
29
+ return f"Error in setup: {result.stderr}"
30
+ except Exception as e:
31
+ return str(e)
32
+
33
+ # Function to run inference using the `run_inference.py` file
34
+ def run_inference(model_name, input_text, num_tokens=6):
35
+ try:
36
+ # Call the `run_inference.py` script with the model and input
37
+ start_time = time.time()
38
+ result = subprocess.run(
39
+ f"python run_inference.py -m models/Llama3-8B-1.58-100B-tokens/ggml-model-i2_s.gguf -p \"{input_text}\" -n {num_tokens} -temp 0",
40
+ shell=True,
41
+ cwd=BITNET_REPO_PATH,
42
+ capture_output=True,
43
+ text=True
44
+ )
45
+ end_time = time.time()
46
+
47
+ if result.returncode == 0:
48
+ inference_time = round(end_time - start_time, 2)
49
+ return result.stdout, f"Inference took {inference_time} seconds."
50
+ else:
51
+ return f"Error during inference: {result.stderr}", None
52
+ except Exception as e:
53
+ return str(e), None
54
+
55
+ def run_transformers(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, input_text, num_tokens):
56
+
57
+ if oauth_token is None :
58
+ return "Error : To Compare please login to your HF account and make sure you have access to the used Llama models"
59
+ # Load the model and tokenizer dynamically if needed (commented out for performance)
60
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=oauth_token.token)
61
+ model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=oauth_token.token)
62
+
63
+ # Encode the input text
64
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")
65
+
66
+ # Start time for inference
67
+ start_time = time.time()
68
+
69
+ # Generate output with the specified number of tokens
70
+ output = model.generate(input_ids, max_length=len(input_ids[0]) + num_tokens, num_return_sequences=1)
71
+
72
+ # Calculate inference time
73
+ inference_time = time.time() - start_time
74
+
75
+ # Decode the generated output
76
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
77
+
78
+ return generated_text, f"{inference_time:.2f} seconds"
79
+
80
+ # Gradio Interface
81
+ def interface():
82
+ with gr.Blocks(css=".gr-button {background-color: #5C6BC0; color: white;} .gr-button:hover {background-color: #3F51B5;}") as demo:
83
+
84
+ gr.LoginButton(elem_id="login-button", elem_classes="center-button")
85
+
86
+ gr.Markdown(
87
+ """
88
+ <h1 style="text-align: center; color: #4A148C;">BitNet.cpp Speed Demonstration</h1>
89
+ <p style="text-align: center; color: #6A1B9A;">Compare the speed and performance of BitNet with Transformers!</p>
90
+ """,
91
+ elem_id="header"
92
+ )
93
+
94
+ # Model selection and setup row
95
+ with gr.Row():
96
+ model_dropdown = gr.Dropdown(
97
+ label="Select Model",
98
+ choices=["HF1BitLLM/Llama3-8B-1.58-100B-tokens", "1bitLLM/bitnet_b1_58-3B", "1bitLLM/bitnet_b1_58-large"], # Replace with available models
99
+ value="HF1BitLLM/Llama3-8B-1.58-100B-tokens",
100
+ interactive=True,
101
+ elem_id="model-dropdown"
102
+ )
103
+ setup_button = gr.Button("Run Setup", elem_id="setup-button")
104
+ setup_status = gr.Textbox(label="Setup Status", interactive=False, placeholder="Setup status will appear here...")
105
+
106
+ # Inference row
107
+ with gr.Row():
108
+ num_tokens = gr.Slider(minimum=1, maximum=100, label="Number of Tokens to Generate", value=50, step=1)
109
+ input_text = gr.Textbox(label="Input Text", placeholder="Enter your input text here...")
110
+ infer_button = gr.Button("Run Inference", elem_id="infer-button")
111
+ result_output = gr.Textbox(label="Output", interactive=False, placeholder="Inference output will appear here...")
112
+ time_output = gr.Textbox(label="Inference Time", interactive=False, placeholder="Inference time will appear here...")
113
+
114
+ # Comparison with Transformers
115
+ with gr.Row():
116
+ transformer_model_dropdown = gr.Dropdown(
117
+ label="Select Transformers Model",
118
+ choices=["meta-llama/Llama-3.1-8B", "meta-llama/Llama-3.2-3B", "meta-llama/Llama-3.2-1B"], # Replace with actual models
119
+ value="meta-llama/Llama-3.1-8B",
120
+ interactive=True
121
+ )
122
+ compare_button = gr.Button("Run Transformers Inference", elem_id="compare-button")
123
+ transformer_result_output = gr.Textbox(label="Transformers Output", interactive=False, placeholder="Transformers output will appear here...")
124
+ transformer_time_output = gr.Textbox(label="Transformers Inference Time", interactive=False, placeholder="Transformers inference time will appear here...")
125
+
126
+ # Actions
127
+ setup_button.click(setup_bitnet, inputs=model_dropdown, outputs=setup_status)
128
+ infer_button.click(run_inference, inputs=[model_dropdown, input_text, num_tokens], outputs=[result_output, time_output])
129
+ compare_button.click(run_transformers, inputs=[transformer_model_dropdown, input_text, num_tokens], outputs=[transformer_result_output, transformer_time_output])
130
+
131
+ # Launch the Gradio app
132
+ return demo
133
+
134
+ demo = interface()
135
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ https://gradio-builds.s3.amazonaws.com/4485dd46a8e4b3f5b35e42d52f291b72fdc1a952/gradio-4.39.0-py3-none-any.whl
2
+ pydantic==2.8.2
3
+ pydantic-core==2.20.1
4
+ fastapi==0.112.4
5
+ huggingface-hub
6
+ transformers
setup.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git clone --recursive https://github.com/microsoft/BitNet.git
2
+ pip install -r BitNet/requirements.txt