Ali-C137 commited on
Commit
fe89a6e
·
verified ·
1 Parent(s): facb321

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -25
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # -*- coding: utf-8 -*-
2
- """Untitled2.ipynb
3
 
4
  Automatically generated by Colaboratory.
5
 
@@ -9,25 +9,42 @@ Original file is located at
9
 
10
  # !pip install gradio
11
 
12
- def estimate_training_cost(number_of_parameters, number_of_tokens, gpu_throughput=312e12, utilization_rate=0.5, overhead=1.10, cost_per_gpu_hour=1.85):
 
 
13
  """
14
- Estimates the training cost of a large language model.
15
 
16
  Args:
 
 
17
  - number_of_parameters (int): The number of parameters in the model.
18
  - number_of_tokens (int): The number of tokens to train on.
19
- - gpu_throughput (float, optional): The peak throughput of the GPU in FLOPs/sec. Default is 312 TFLOPs/sec for A100 GPUs.
20
  - utilization_rate (float, optional): The utilization rate of the GPU (0 < utilization_rate ≤ 1). Default is 0.5 (50%).
21
  - overhead (float, optional): Multiplier to account for overhead and additional costs (1 + overhead percentage). Default is 1.10 (10% overhead).
22
  - cost_per_gpu_hour (float, optional): The cost per hour of using the GPU. Default is $1.85/hour.
23
 
24
  Returns:
25
  - float: The estimated total cost of training the model.
 
 
26
  """
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Calculate the total number of FLOPs required for training
28
  total_flops = 6 * number_of_parameters * number_of_tokens
29
 
30
- # Calculate the number of hours required on the A100 GPUs
31
  gpu_hours = total_flops / (gpu_throughput * 3600)
32
 
33
  # Adjust for the actual utilization of the GPUs
@@ -41,38 +58,40 @@ def estimate_training_cost(number_of_parameters, number_of_tokens, gpu_throughpu
41
 
42
  return total_cost
43
 
44
- # Example usage:
45
- # Let's say we have a model with 70 billion parameters and it's trained on 2 trillion tokens
46
- # The default values for the other parameters are used in this example
47
- total_cost = estimate_training_cost(number_of_parameters=70e9, number_of_tokens=2e12)
48
- total_cost
49
-
50
- import gradio as gr
51
-
52
- # Assume the function estimate_training_cost is already defined as per the previous discussion.
53
-
54
- def gradio_interface(number_of_parameters, number_of_tokens, utilization_rate, overhead, cost_per_gpu_hour):
55
- # Convert string inputs to correct types
56
  number_of_parameters = float(number_of_parameters) * 1e9 # Convert from billions to actual number
57
  number_of_tokens = float(number_of_tokens) * 1e12 # Convert from trillions to actual number
58
  utilization_rate = float(utilization_rate)
59
  overhead = float(overhead)
60
  cost_per_gpu_hour = float(cost_per_gpu_hour)
61
 
62
- # Estimate the cost
63
- cost = estimate_training_cost(number_of_parameters, number_of_tokens, utilization_rate=utilization_rate, overhead=overhead, cost_per_gpu_hour=cost_per_gpu_hour)
64
-
65
- # Return the result as a formatted string
66
  return f"The estimated training cost is ${cost:,.2f}"
67
 
 
 
 
68
  # Define the title and description for the Gradio app
69
  title = "<h2 style='text-align: center;'>LLM Training Cost Calculator</h2>"
70
- description = "<p style='text-align: center;'>Estimate the cost of training large language models (LLM). This tool helps you to calculate the cost based on model parameters and tokens. We plan to extend this calculator to include the cost of fine-tuning models using strategies like LoRA or QLoRA. Stay tuned for updates where you'll be able to input the model ID from the Hugging Face Hub, select the fine-tuning strategy, and specify quantization details if QLoRA is chosen.</p>"
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- # Create the Gradio interface with title and description
73
  iface = gr.Interface(
74
  fn=gradio_interface,
75
  inputs=[
 
 
76
  gr.Textbox(label="Number of Parameters (in billions)", value="70"),
77
  gr.Textbox(label="Number of Tokens (in trillions)", value="2"),
78
  gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"),
@@ -85,6 +104,4 @@ iface = gr.Interface(
85
  article="<p style='text-align: center;'>Developed with ❤️ by Elfilali Ali</p>"
86
  )
87
 
88
- # Run the interface
89
  iface.launch()
90
-
 
1
  # -*- coding: utf-8 -*-
2
+ """LLM Training Cost Calculator App.ipynb
3
 
4
  Automatically generated by Colaboratory.
5
 
 
9
 
10
  # !pip install gradio
11
 
12
+ import gradio as gr
13
+
14
+ def estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=0.5, overhead=1.10, cost_per_gpu_hour=1.85):
15
  """
16
+ Estimates the training cost of a large language model based on the selected GPU and precision.
17
 
18
  Args:
19
+ - gpu_choice (str): The choice of GPU, e.g., 'A100 80GB PCIe', 'V100', etc.
20
+ - precision (str): The precision level for the GPU, e.g., 'bf16', 'tf32', 'tensor'.
21
  - number_of_parameters (int): The number of parameters in the model.
22
  - number_of_tokens (int): The number of tokens to train on.
 
23
  - utilization_rate (float, optional): The utilization rate of the GPU (0 < utilization_rate ≤ 1). Default is 0.5 (50%).
24
  - overhead (float, optional): Multiplier to account for overhead and additional costs (1 + overhead percentage). Default is 1.10 (10% overhead).
25
  - cost_per_gpu_hour (float, optional): The cost per hour of using the GPU. Default is $1.85/hour.
26
 
27
  Returns:
28
  - float: The estimated total cost of training the model.
29
+
30
+ The function dynamically adjusts the GPU throughput based on the selected GPU and precision. The throughput values are predefined for each GPU and precision combination. This estimation assumes a linear scaling of training cost with the number of parameters and tokens.
31
  """
32
+
33
+ gpu_throughputs = {
34
+ 'A100 80GB PCIe': {'bf16': 312e12, 'tf32': 156e12},
35
+ 'A100 80GB SXM': {'bf16': 624e12, 'tf32': 312e12},
36
+ 'V100': {'tensor': 130e12}, # Assuming only the deep learning performance for V100
37
+ 'H100 SXM': {'bf16': 1979e12, 'tf32': 989e12},
38
+ 'H100 PCIe': {'bf16': 1513e12, 'tf32': 756e12}
39
+ }
40
+
41
+ # Get the correct GPU throughput
42
+ gpu_throughput = gpu_throughputs[gpu_choice][precision]
43
+
44
  # Calculate the total number of FLOPs required for training
45
  total_flops = 6 * number_of_parameters * number_of_tokens
46
 
47
+ # Calculate the number of hours required on the selected GPU
48
  gpu_hours = total_flops / (gpu_throughput * 3600)
49
 
50
  # Adjust for the actual utilization of the GPUs
 
58
 
59
  return total_cost
60
 
61
+ def gradio_interface(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate, overhead, cost_per_gpu_hour):
 
 
 
 
 
 
 
 
 
 
 
62
  number_of_parameters = float(number_of_parameters) * 1e9 # Convert from billions to actual number
63
  number_of_tokens = float(number_of_tokens) * 1e12 # Convert from trillions to actual number
64
  utilization_rate = float(utilization_rate)
65
  overhead = float(overhead)
66
  cost_per_gpu_hour = float(cost_per_gpu_hour)
67
 
68
+ cost = estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=utilization_rate, overhead=overhead, cost_per_gpu_hour=cost_per_gpu_hour)
 
 
 
69
  return f"The estimated training cost is ${cost:,.2f}"
70
 
71
+ gpu_choices = ["A100 80GB PCIe", "A100 80GB SXM", "V100", "H100 SXM", "H100 PCIe"]
72
+ default_precisions = ['bf16', 'tf32', 'tensor', 'bf16', 'bf16'] # Default precision for each GPU
73
+
74
  # Define the title and description for the Gradio app
75
  title = "<h2 style='text-align: center;'>LLM Training Cost Calculator</h2>"
76
+ description = """
77
+ <p style='text-align: center;'>Estimate the cost of training large language models (LLM). This tool helps you calculate the cost based on model parameters, tokens, and GPU selections with various precision options. Select a GPU and the precision level to get an accurate cost estimate.</p>
78
+ <p><strong>Available GPUs and Precisions:</strong></p>
79
+ <ul>
80
+ <li><strong>A100 80GB PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
81
+ <li><strong>A100 80GB SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
82
+ <li><strong>V100:</strong> Uses Deep Learning performance with Tensor Cores (tensor) as the default and only precision.</li>
83
+ <li><strong>H100 SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
84
+ <li><strong>H100 PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
85
+ </ul>
86
+ <p>The choice of GPU and precision impacts the throughput, affecting training time and cost. BFLOAT16 is generally faster and more cost-effective, while Tensor Float 32 offers higher precision. The V100 GPU is optimized for Deep Learning with Tensor Cores.</p>
87
+ <p style='text-align: center;'>We plan to extend this calculator to include calculating the cost of fine-tuning models using strategies like LoRA or QLoRA. Stay tuned for updates where you'll be able to input the model ID from the Hugging Face Hub, select the fine-tuning strategy, and specify quantization details if QLoRA is chosen.</p>
88
+ """
89
 
 
90
  iface = gr.Interface(
91
  fn=gradio_interface,
92
  inputs=[
93
+ gr.Dropdown(choices=gpu_choices, label="Select GPU", value='A100 80GB PCIe'),
94
+ gr.Dropdown(choices=['bf16', 'tf32', 'tensor'], label="Select Precision", value='bf16'),
95
  gr.Textbox(label="Number of Parameters (in billions)", value="70"),
96
  gr.Textbox(label="Number of Tokens (in trillions)", value="2"),
97
  gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"),
 
104
  article="<p style='text-align: center;'>Developed with ❤️ by Elfilali Ali</p>"
105
  )
106
 
 
107
  iface.launch()