Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
·
315ec00
1
Parent(s):
7aacedb
Add load testing
Browse files
spitfight/colosseum/client.py
CHANGED
@@ -27,7 +27,7 @@ class ControllerClient:
|
|
27 |
"""Initialize the controller client."""
|
28 |
self.controller_addr = controller_addr
|
29 |
self.timeout = timeout
|
30 |
-
self.request_id = str(
|
31 |
|
32 |
def fork(self) -> ControllerClient:
|
33 |
"""Return a copy of the client with a new request ID."""
|
|
|
27 |
"""Initialize the controller client."""
|
28 |
self.controller_addr = controller_addr
|
29 |
self.timeout = timeout
|
30 |
+
self.request_id = str(uuid4()) if request_id is None else str(request_id)
|
31 |
|
32 |
def fork(self) -> ControllerClient:
|
33 |
"""Return a copy of the client with a new request ID."""
|
tests/colosseum/controller_load_test.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import random
|
4 |
+
import itertools
|
5 |
+
import multiprocessing as mp
|
6 |
+
|
7 |
+
import tyro
|
8 |
+
|
9 |
+
from spitfight.colosseum.client import ControllerClient
|
10 |
+
|
11 |
+
CONTROLLER_ADDR = os.environ["COLOSSEUM_CONTROLLER_ADDR"]
|
12 |
+
|
13 |
+
PROMPTS = [
|
14 |
+
"What is Deep Learning?",
|
15 |
+
"Write a poem about life.",
|
16 |
+
"What is the basics of Rust?",
|
17 |
+
"What is Python's GIL?",
|
18 |
+
"What are Go channels and how do they compare with Rust flume channels?",
|
19 |
+
"What is the difference between a list and a tuple in Python?",
|
20 |
+
"How do I use Python's asyncio.wait?",
|
21 |
+
"How do I accurately measure the execution time of a function in Python?",
|
22 |
+
"How do I use Python's multiprocessing module?",
|
23 |
+
"What is Python's built-in dataclasses module?",
|
24 |
+
"How is Python's async/await different from Rust's async/await?",
|
25 |
+
"What is Hugging Face Transformers?",
|
26 |
+
"Tell me about your capabilities.",
|
27 |
+
"When is your knowledge cutoff, and what does it mean?",
|
28 |
+
"Explain Machine Learning in simple terms.",
|
29 |
+
"Write a song that welcomes new students to the University of Michigan.",
|
30 |
+
"Explain how to use the Pydantic library with a single code block.",
|
31 |
+
"Write a poem about Jae-Won Chung, God of Computer Science.",
|
32 |
+
"Write a poem about the University of Michigan.",
|
33 |
+
"How do I get my new AI startup funded?",
|
34 |
+
"Explain the notion of zero copy in programming.",
|
35 |
+
"Explain the notion of zero knowledge proofs.",
|
36 |
+
"Explain the notion of zero trust in cybersecurity.",
|
37 |
+
"What is a monad in functional programming?",
|
38 |
+
"What is a monad in category theory?",
|
39 |
+
"How are monads implemented in both Haskell and OCaml?",
|
40 |
+
"What is the difference between a monad and a functor?",
|
41 |
+
"What is the difference between a monad and a monoid?",
|
42 |
+
"How are monads used in Rust?",
|
43 |
+
"What is a good name for a software library that makes ML energy efficient?",
|
44 |
+
"What would be some good naming criteria for a tech startup?",
|
45 |
+
"What is the opposite of democracy? Explain in detail.",
|
46 |
+
"Why are people scared to be contacted by the IRS?",
|
47 |
+
"What is fingerstyle guitar?",
|
48 |
+
"How do I practice and play fingerstyle guitar?",
|
49 |
+
"What is the difference between fingerstyle and classical guitar?",
|
50 |
+
"What is the difference between classical and flamenco guitar?",
|
51 |
+
"What is the difference between classical and jazz guitar?",
|
52 |
+
"Explain the basics of the Django web framework.",
|
53 |
+
"Explain the basics of the Flask web framework.",
|
54 |
+
"Explain the basics of the FastAPI web framework.",
|
55 |
+
"I really need to pee. What should I do?",
|
56 |
+
"Why would one use Python's abc module?",
|
57 |
+
"Explain Python type annotations and why they are useful.",
|
58 |
+
"How do I create an immutable list in Python?",
|
59 |
+
"How do I create a mutable tuple in Python?",
|
60 |
+
"When does dropping out of a Computer Science PhD program make sense?",
|
61 |
+
"What is the difference between a PhD and a Masters in Computer Science?",
|
62 |
+
"How are software engineers and software developers different?",
|
63 |
+
"Hi",
|
64 |
+
"What's up",
|
65 |
+
"How are you?",
|
66 |
+
"What am I supposed to type here",
|
67 |
+
"Is indoor vaping legal?",
|
68 |
+
"What are the key points of the 14th amendment?",
|
69 |
+
"I'm new to the US. What are some social taboos I should be aware of?",
|
70 |
+
] * 2
|
71 |
+
|
72 |
+
|
73 |
+
def request(prompt: str) -> tuple[str, str, str, str, float]:
|
74 |
+
time.sleep(random.random() * 5)
|
75 |
+
client = ControllerClient(CONTROLLER_ADDR, timeout=30)
|
76 |
+
|
77 |
+
response_a, response_b = "", ""
|
78 |
+
start_time = time.monotonic()
|
79 |
+
for resp_a, resp_b in itertools.zip_longest(
|
80 |
+
client.prompt(prompt, index=0),
|
81 |
+
client.prompt(prompt, index=1),
|
82 |
+
):
|
83 |
+
if resp_a is not None:
|
84 |
+
response_a += resp_a
|
85 |
+
if resp_b is not None:
|
86 |
+
response_b += resp_b
|
87 |
+
|
88 |
+
latency = time.monotonic() - start_time
|
89 |
+
return client.request_id, prompt, response_a, response_b, latency
|
90 |
+
|
91 |
+
|
92 |
+
def main(concurrency: int = len(PROMPTS), logfile: str = "load_test_results.csv"):
|
93 |
+
latencies = []
|
94 |
+
|
95 |
+
start_time = time.monotonic()
|
96 |
+
with mp.Pool(processes=concurrency) as pool:
|
97 |
+
for request_id, prompt, response_a, response_b, latency in pool.imap_unordered(request, PROMPTS):
|
98 |
+
latencies.append(latency)
|
99 |
+
print(f"Request ID {request_id} finished, {latency=:.2f}s")
|
100 |
+
|
101 |
+
total_time = time.monotonic() - start_time
|
102 |
+
average_latency = sum(latencies) / len(latencies)
|
103 |
+
requests_per_second = len(latencies) / total_time
|
104 |
+
print(f"Total time: {total_time:.2f}s")
|
105 |
+
print(f"Average latency: {average_latency:.2f}s")
|
106 |
+
print(f"Requests per second: {requests_per_second:.2f}")
|
107 |
+
with open(logfile, "a") as f:
|
108 |
+
f.write(f"{concurrency},{total_time},{average_latency},{requests_per_second}\n")
|
109 |
+
|
110 |
+
|
111 |
+
if __name__ == "__main__":
|
112 |
+
tyro.cli(main)
|