Spaces:
Sleeping
Sleeping
justinxzhao
commited on
Commit
·
1afb9ca
1
Parent(s):
a0dca54
Add token usage tracking for openai and fix token usage tracking for anthropic.
Browse files- app.py +39 -34
- constants.py +2 -1
app.py
CHANGED
@@ -61,6 +61,19 @@ def anthropic_streamlit_streamer(stream, llm):
|
|
61 |
"""
|
62 |
for event in stream:
|
63 |
if hasattr(event, "type"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
# Handle content blocks
|
65 |
if event.type == "content_block_delta" and hasattr(event, "delta"):
|
66 |
# Extract text delta from the event
|
@@ -68,18 +81,6 @@ def anthropic_streamlit_streamer(stream, llm):
|
|
68 |
if text_delta:
|
69 |
yield text_delta
|
70 |
|
71 |
-
# Count input token usage.
|
72 |
-
if event.type == "message_start":
|
73 |
-
input_token_usage = event["usage"]["input_tokens"]
|
74 |
-
output_token_usage = event["usage"]["output_tokens"]
|
75 |
-
st.session_state["input_token_usage"][llm] += input_token_usage
|
76 |
-
st.session_state["output_token_usage"][llm] += output_token_usage
|
77 |
-
|
78 |
-
# Count output token usage.
|
79 |
-
if event.type == "message_delta":
|
80 |
-
output_token_usage = event["usage"]["output_tokens"]
|
81 |
-
st.session_state["output_token_usage"][llm] += output_token_usage
|
82 |
-
|
83 |
# Handle message completion events (optional if needed)
|
84 |
elif event.type == "message_stop":
|
85 |
break # End of message, stop streaming
|
@@ -101,6 +102,17 @@ def google_streamlit_streamer(stream):
|
|
101 |
yield chunk.text
|
102 |
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
def together_streamlit_streamer(stream, llm):
|
105 |
# https://docs.together.ai/docs/chat-overview#streaming-responses
|
106 |
for chunk in stream:
|
@@ -111,21 +123,6 @@ def together_streamlit_streamer(stream, llm):
|
|
111 |
yield chunk.choices[0].delta.content
|
112 |
|
113 |
|
114 |
-
def llm_streamlit_streamer(stream, llm):
|
115 |
-
if llm.startswith("anthropic"):
|
116 |
-
print(f"Using Anthropic streamer for llm: {llm}")
|
117 |
-
return anthropic_streamlit_streamer(stream, llm)
|
118 |
-
elif llm.startswith("vertex"):
|
119 |
-
print(f"Using Vertex streamer for llm: {llm}")
|
120 |
-
return google_streamlit_streamer(stream)
|
121 |
-
elif llm.startswith("together"):
|
122 |
-
print(f"Using Together streamer for llm: {llm}")
|
123 |
-
return together_streamlit_streamer(stream, llm)
|
124 |
-
else:
|
125 |
-
print(f"Using OpenAI streamer for llm: {llm}")
|
126 |
-
return openai_streamlit_streamer(stream, llm)
|
127 |
-
|
128 |
-
|
129 |
# Helper functions for LLM council and aggregator selection
|
130 |
def llm_council_selector():
|
131 |
selected_council = st.radio(
|
@@ -144,6 +141,7 @@ def get_openai_response(model_name, prompt):
|
|
144 |
model=model_name,
|
145 |
messages=[{"role": "user", "content": prompt}],
|
146 |
stream=True,
|
|
|
147 |
)
|
148 |
|
149 |
|
@@ -175,7 +173,9 @@ def get_llm_response_stream(model_identifier, prompt):
|
|
175 |
"""Returns a streamlit-friendly stream of response tokens from the LLM."""
|
176 |
provider, model_name = model_identifier.split("://")
|
177 |
if provider == "openai":
|
178 |
-
return
|
|
|
|
|
179 |
elif provider == "anthropic":
|
180 |
return anthropic_streamlit_streamer(
|
181 |
get_anthropic_response(model_name, prompt), model_identifier
|
@@ -360,6 +360,13 @@ def parse_judging_responses(
|
|
360 |
],
|
361 |
response_format=DirectAssessmentJudgingResponse,
|
362 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
return completion.choices[0].message.parsed
|
364 |
|
365 |
|
@@ -582,12 +589,12 @@ def st_render_responses(user_prompt):
|
|
582 |
else:
|
583 |
message_placeholder = st.empty()
|
584 |
aggregator_stream = get_llm_response_stream(
|
585 |
-
selected_aggregator, aggregator_prompt
|
586 |
)
|
587 |
if aggregator_stream:
|
588 |
-
st.session_state.responses[
|
589 |
-
|
590 |
-
)
|
591 |
|
592 |
st.session_state.responses_collected = True
|
593 |
|
@@ -825,8 +832,6 @@ def main():
|
|
825 |
)
|
826 |
|
827 |
if submit_button:
|
828 |
-
st.markdown("#### Responses")
|
829 |
-
|
830 |
# Udpate state.
|
831 |
st.session_state.selected_models = selected_models
|
832 |
st.session_state.selected_aggregator = selected_aggregator
|
|
|
61 |
"""
|
62 |
for event in stream:
|
63 |
if hasattr(event, "type"):
|
64 |
+
# Count input token usage.
|
65 |
+
if event.type == "message_start":
|
66 |
+
st.session_state["input_token_usage"][
|
67 |
+
llm
|
68 |
+
] += event.message.usage.input_tokens
|
69 |
+
st.session_state["output_token_usage"][
|
70 |
+
llm
|
71 |
+
] += event.message.usage.output_tokens
|
72 |
+
|
73 |
+
# Count output token usage.
|
74 |
+
if event.type == "message_delta":
|
75 |
+
st.session_state["output_token_usage"][llm] += event.usage.output_tokens
|
76 |
+
|
77 |
# Handle content blocks
|
78 |
if event.type == "content_block_delta" and hasattr(event, "delta"):
|
79 |
# Extract text delta from the event
|
|
|
81 |
if text_delta:
|
82 |
yield text_delta
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
# Handle message completion events (optional if needed)
|
85 |
elif event.type == "message_stop":
|
86 |
break # End of message, stop streaming
|
|
|
102 |
yield chunk.text
|
103 |
|
104 |
|
105 |
+
def openai_streamlit_streamer(stream, llm):
|
106 |
+
# https://platform.openai.com/docs/api-reference/streaming
|
107 |
+
for event in stream:
|
108 |
+
if event.usage:
|
109 |
+
st.session_state["input_token_usage"][llm] += event.usage.prompt_tokens
|
110 |
+
st.session_state["output_token_usage"][llm] += event.usage.completion_tokens
|
111 |
+
if event.choices:
|
112 |
+
if event.choices[0].delta.content:
|
113 |
+
yield event.choices[0].delta.content
|
114 |
+
|
115 |
+
|
116 |
def together_streamlit_streamer(stream, llm):
|
117 |
# https://docs.together.ai/docs/chat-overview#streaming-responses
|
118 |
for chunk in stream:
|
|
|
123 |
yield chunk.choices[0].delta.content
|
124 |
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
# Helper functions for LLM council and aggregator selection
|
127 |
def llm_council_selector():
|
128 |
selected_council = st.radio(
|
|
|
141 |
model=model_name,
|
142 |
messages=[{"role": "user", "content": prompt}],
|
143 |
stream=True,
|
144 |
+
stream_options={"include_usage": True},
|
145 |
)
|
146 |
|
147 |
|
|
|
173 |
"""Returns a streamlit-friendly stream of response tokens from the LLM."""
|
174 |
provider, model_name = model_identifier.split("://")
|
175 |
if provider == "openai":
|
176 |
+
return openai_streamlit_streamer(
|
177 |
+
get_openai_response(model_name, prompt), model_identifier
|
178 |
+
)
|
179 |
elif provider == "anthropic":
|
180 |
return anthropic_streamlit_streamer(
|
181 |
get_anthropic_response(model_name, prompt), model_identifier
|
|
|
360 |
],
|
361 |
response_format=DirectAssessmentJudgingResponse,
|
362 |
)
|
363 |
+
# Track token usage.
|
364 |
+
st.session_state["input_token_usage"][
|
365 |
+
"gpt-4o-mini"
|
366 |
+
] += completion.usage.prompt_tokens
|
367 |
+
st.session_state["output_token_usage"][
|
368 |
+
"gpt-4o-mini"
|
369 |
+
] += completion.usage.completion_tokens
|
370 |
return completion.choices[0].message.parsed
|
371 |
|
372 |
|
|
|
589 |
else:
|
590 |
message_placeholder = st.empty()
|
591 |
aggregator_stream = get_llm_response_stream(
|
592 |
+
st.session_state.selected_aggregator, aggregator_prompt
|
593 |
)
|
594 |
if aggregator_stream:
|
595 |
+
st.session_state.responses[
|
596 |
+
get_aggregator_key(st.session_state.selected_aggregator)
|
597 |
+
] = message_placeholder.write_stream(aggregator_stream)
|
598 |
|
599 |
st.session_state.responses_collected = True
|
600 |
|
|
|
832 |
)
|
833 |
|
834 |
if submit_button:
|
|
|
|
|
835 |
# Udpate state.
|
836 |
st.session_state.selected_models = selected_models
|
837 |
st.session_state.selected_aggregator = selected_aggregator
|
constants.py
CHANGED
@@ -9,7 +9,8 @@ if os.getenv("DEBUG_MODE") == "True":
|
|
9 |
"Smalls": [
|
10 |
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
11 |
"together://meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
12 |
-
|
|
|
13 |
],
|
14 |
"Flagships": [
|
15 |
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
|
|
9 |
"Smalls": [
|
10 |
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
11 |
"together://meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
12 |
+
"anthropic://claude-3-haiku-20240307",
|
13 |
+
"openai://gpt-4o-mini",
|
14 |
],
|
15 |
"Flagships": [
|
16 |
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|