justinxzhao commited on
Commit
1afb9ca
·
1 Parent(s): a0dca54

Add token usage tracking for openai and fix token usage tracking for anthropic.

Browse files
Files changed (2) hide show
  1. app.py +39 -34
  2. constants.py +2 -1
app.py CHANGED
@@ -61,6 +61,19 @@ def anthropic_streamlit_streamer(stream, llm):
61
  """
62
  for event in stream:
63
  if hasattr(event, "type"):
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # Handle content blocks
65
  if event.type == "content_block_delta" and hasattr(event, "delta"):
66
  # Extract text delta from the event
@@ -68,18 +81,6 @@ def anthropic_streamlit_streamer(stream, llm):
68
  if text_delta:
69
  yield text_delta
70
 
71
- # Count input token usage.
72
- if event.type == "message_start":
73
- input_token_usage = event["usage"]["input_tokens"]
74
- output_token_usage = event["usage"]["output_tokens"]
75
- st.session_state["input_token_usage"][llm] += input_token_usage
76
- st.session_state["output_token_usage"][llm] += output_token_usage
77
-
78
- # Count output token usage.
79
- if event.type == "message_delta":
80
- output_token_usage = event["usage"]["output_tokens"]
81
- st.session_state["output_token_usage"][llm] += output_token_usage
82
-
83
  # Handle message completion events (optional if needed)
84
  elif event.type == "message_stop":
85
  break # End of message, stop streaming
@@ -101,6 +102,17 @@ def google_streamlit_streamer(stream):
101
  yield chunk.text
102
 
103
 
 
 
 
 
 
 
 
 
 
 
 
104
  def together_streamlit_streamer(stream, llm):
105
  # https://docs.together.ai/docs/chat-overview#streaming-responses
106
  for chunk in stream:
@@ -111,21 +123,6 @@ def together_streamlit_streamer(stream, llm):
111
  yield chunk.choices[0].delta.content
112
 
113
 
114
- def llm_streamlit_streamer(stream, llm):
115
- if llm.startswith("anthropic"):
116
- print(f"Using Anthropic streamer for llm: {llm}")
117
- return anthropic_streamlit_streamer(stream, llm)
118
- elif llm.startswith("vertex"):
119
- print(f"Using Vertex streamer for llm: {llm}")
120
- return google_streamlit_streamer(stream)
121
- elif llm.startswith("together"):
122
- print(f"Using Together streamer for llm: {llm}")
123
- return together_streamlit_streamer(stream, llm)
124
- else:
125
- print(f"Using OpenAI streamer for llm: {llm}")
126
- return openai_streamlit_streamer(stream, llm)
127
-
128
-
129
  # Helper functions for LLM council and aggregator selection
130
  def llm_council_selector():
131
  selected_council = st.radio(
@@ -144,6 +141,7 @@ def get_openai_response(model_name, prompt):
144
  model=model_name,
145
  messages=[{"role": "user", "content": prompt}],
146
  stream=True,
 
147
  )
148
 
149
 
@@ -175,7 +173,9 @@ def get_llm_response_stream(model_identifier, prompt):
175
  """Returns a streamlit-friendly stream of response tokens from the LLM."""
176
  provider, model_name = model_identifier.split("://")
177
  if provider == "openai":
178
- return get_openai_response(model_name, prompt)
 
 
179
  elif provider == "anthropic":
180
  return anthropic_streamlit_streamer(
181
  get_anthropic_response(model_name, prompt), model_identifier
@@ -360,6 +360,13 @@ def parse_judging_responses(
360
  ],
361
  response_format=DirectAssessmentJudgingResponse,
362
  )
 
 
 
 
 
 
 
363
  return completion.choices[0].message.parsed
364
 
365
 
@@ -582,12 +589,12 @@ def st_render_responses(user_prompt):
582
  else:
583
  message_placeholder = st.empty()
584
  aggregator_stream = get_llm_response_stream(
585
- selected_aggregator, aggregator_prompt
586
  )
587
  if aggregator_stream:
588
- st.session_state.responses[get_aggregator_key(selected_aggregator)] = (
589
- message_placeholder.write_stream(aggregator_stream)
590
- )
591
 
592
  st.session_state.responses_collected = True
593
 
@@ -825,8 +832,6 @@ def main():
825
  )
826
 
827
  if submit_button:
828
- st.markdown("#### Responses")
829
-
830
  # Udpate state.
831
  st.session_state.selected_models = selected_models
832
  st.session_state.selected_aggregator = selected_aggregator
 
61
  """
62
  for event in stream:
63
  if hasattr(event, "type"):
64
+ # Count input token usage.
65
+ if event.type == "message_start":
66
+ st.session_state["input_token_usage"][
67
+ llm
68
+ ] += event.message.usage.input_tokens
69
+ st.session_state["output_token_usage"][
70
+ llm
71
+ ] += event.message.usage.output_tokens
72
+
73
+ # Count output token usage.
74
+ if event.type == "message_delta":
75
+ st.session_state["output_token_usage"][llm] += event.usage.output_tokens
76
+
77
  # Handle content blocks
78
  if event.type == "content_block_delta" and hasattr(event, "delta"):
79
  # Extract text delta from the event
 
81
  if text_delta:
82
  yield text_delta
83
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  # Handle message completion events (optional if needed)
85
  elif event.type == "message_stop":
86
  break # End of message, stop streaming
 
102
  yield chunk.text
103
 
104
 
105
+ def openai_streamlit_streamer(stream, llm):
106
+ # https://platform.openai.com/docs/api-reference/streaming
107
+ for event in stream:
108
+ if event.usage:
109
+ st.session_state["input_token_usage"][llm] += event.usage.prompt_tokens
110
+ st.session_state["output_token_usage"][llm] += event.usage.completion_tokens
111
+ if event.choices:
112
+ if event.choices[0].delta.content:
113
+ yield event.choices[0].delta.content
114
+
115
+
116
  def together_streamlit_streamer(stream, llm):
117
  # https://docs.together.ai/docs/chat-overview#streaming-responses
118
  for chunk in stream:
 
123
  yield chunk.choices[0].delta.content
124
 
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  # Helper functions for LLM council and aggregator selection
127
  def llm_council_selector():
128
  selected_council = st.radio(
 
141
  model=model_name,
142
  messages=[{"role": "user", "content": prompt}],
143
  stream=True,
144
+ stream_options={"include_usage": True},
145
  )
146
 
147
 
 
173
  """Returns a streamlit-friendly stream of response tokens from the LLM."""
174
  provider, model_name = model_identifier.split("://")
175
  if provider == "openai":
176
+ return openai_streamlit_streamer(
177
+ get_openai_response(model_name, prompt), model_identifier
178
+ )
179
  elif provider == "anthropic":
180
  return anthropic_streamlit_streamer(
181
  get_anthropic_response(model_name, prompt), model_identifier
 
360
  ],
361
  response_format=DirectAssessmentJudgingResponse,
362
  )
363
+ # Track token usage.
364
+ st.session_state["input_token_usage"][
365
+ "gpt-4o-mini"
366
+ ] += completion.usage.prompt_tokens
367
+ st.session_state["output_token_usage"][
368
+ "gpt-4o-mini"
369
+ ] += completion.usage.completion_tokens
370
  return completion.choices[0].message.parsed
371
 
372
 
 
589
  else:
590
  message_placeholder = st.empty()
591
  aggregator_stream = get_llm_response_stream(
592
+ st.session_state.selected_aggregator, aggregator_prompt
593
  )
594
  if aggregator_stream:
595
+ st.session_state.responses[
596
+ get_aggregator_key(st.session_state.selected_aggregator)
597
+ ] = message_placeholder.write_stream(aggregator_stream)
598
 
599
  st.session_state.responses_collected = True
600
 
 
832
  )
833
 
834
  if submit_button:
 
 
835
  # Udpate state.
836
  st.session_state.selected_models = selected_models
837
  st.session_state.selected_aggregator = selected_aggregator
constants.py CHANGED
@@ -9,7 +9,8 @@ if os.getenv("DEBUG_MODE") == "True":
9
  "Smalls": [
10
  "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
11
  "together://meta-llama/Llama-3.2-3B-Instruct-Turbo",
12
- # "anthropic://claude-3-haiku-20240307",
 
13
  ],
14
  "Flagships": [
15
  "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
 
9
  "Smalls": [
10
  "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
11
  "together://meta-llama/Llama-3.2-3B-Instruct-Turbo",
12
+ "anthropic://claude-3-haiku-20240307",
13
+ "openai://gpt-4o-mini",
14
  ],
15
  "Flagships": [
16
  "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",