richardblythman commited on
Commit
ada32cf
·
verified ·
1 Parent(s): 463094c

Upload folder using huggingface_hub

Browse files
Files changed (39) hide show
  1. .venv/lib/python3.12/site-packages/__pycache__/_virtualenv.cpython-312.pyc +0 -0
  2. .venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/INSTALLER +1 -0
  3. .venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/LICENSE +26 -0
  4. .venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/METADATA +395 -0
  5. .venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/RECORD +577 -0
  6. .venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/WHEEL +4 -0
  7. .venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/entry_points.txt +3 -0
  8. .venv/lib/python3.12/site-packages/litellm/cost_calculator.py +8 -3
  9. .venv/lib/python3.12/site-packages/litellm/integrations/prometheus.py +47 -5
  10. .venv/lib/python3.12/site-packages/litellm/litellm_core_utils/litellm_logging.py +3 -2
  11. .venv/lib/python3.12/site-packages/litellm/llms/anthropic.py +5 -3
  12. .venv/lib/python3.12/site-packages/litellm/llms/prompt_templates/factory.py +15 -3
  13. .venv/lib/python3.12/site-packages/litellm/llms/vertex_httpx.py +19 -2
  14. .venv/lib/python3.12/site-packages/litellm/model_prices_and_context_window_backup.json +18 -0
  15. .venv/lib/python3.12/site-packages/litellm/proxy/_new_secret_config.yaml +2 -4
  16. .venv/lib/python3.12/site-packages/litellm/proxy/_types.py +4 -0
  17. .venv/lib/python3.12/site-packages/litellm/proxy/auth/auth_checks.py +43 -6
  18. .venv/lib/python3.12/site-packages/litellm/proxy/auth/auth_utils.py +17 -0
  19. .venv/lib/python3.12/site-packages/litellm/proxy/common_utils/callback_utils.py +297 -0
  20. .venv/lib/python3.12/site-packages/litellm/proxy/guardrails/init_guardrails.py +1 -1
  21. .venv/lib/python3.12/site-packages/litellm/proxy/hooks/parallel_request_limiter.py +117 -0
  22. .venv/lib/python3.12/site-packages/litellm/proxy/management_endpoints/key_management_endpoints.py +35 -1
  23. .venv/lib/python3.12/site-packages/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +125 -23
  24. .venv/lib/python3.12/site-packages/litellm/proxy/proxy_config.yaml +5 -3
  25. .venv/lib/python3.12/site-packages/litellm/proxy/proxy_server.py +35 -1
  26. .venv/lib/python3.12/site-packages/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py +138 -0
  27. .venv/lib/python3.12/site-packages/litellm/router.py +17 -0
  28. .venv/lib/python3.12/site-packages/litellm/tests/test_anthropic_completion.py +70 -1
  29. .venv/lib/python3.12/site-packages/litellm/tests/test_key_generate_prisma.py +165 -0
  30. .venv/lib/python3.12/site-packages/litellm/tests/test_least_busy_routing.py +12 -3
  31. .venv/lib/python3.12/site-packages/litellm/tests/test_parallel_request_limiter.py +270 -0
  32. .venv/lib/python3.12/site-packages/litellm/tests/test_pass_through_endpoints.py +4 -1
  33. .venv/lib/python3.12/site-packages/litellm/tests/test_proxy_server.py +49 -0
  34. .venv/lib/python3.12/site-packages/naptha_sdk-0.1.0.dist-info/RECORD +1 -1
  35. .venv/lib/python3.12/site-packages/naptha_sdk-0.1.0.dist-info/direct_url.json +1 -1
  36. .venv/lib/python3.12/site-packages/naptha_sdk/agent_service_engine.py +1 -1
  37. .venv/src/naptha-sdk/naptha_sdk/agent_service_engine.py +1 -1
  38. poetry.lock +5 -5
  39. pyproject.toml +1 -1
.venv/lib/python3.12/site-packages/__pycache__/_virtualenv.cpython-312.pyc CHANGED
Binary files a/.venv/lib/python3.12/site-packages/__pycache__/_virtualenv.cpython-312.pyc and b/.venv/lib/python3.12/site-packages/__pycache__/_virtualenv.cpython-312.pyc differ
 
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ Poetry 1.8.3
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/LICENSE ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Portions of this software are licensed as follows:
2
+
3
+ * All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
4
+ * Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
5
+ ---
6
+ MIT License
7
+
8
+ Copyright (c) 2023 Berri AI
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/METADATA ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: litellm
3
+ Version: 1.43.18
4
+ Summary: Library to easily interface with LLM API providers
5
+ License: MIT
6
+ Author: BerriAI
7
+ Requires-Python: >=3.8, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Provides-Extra: extra-proxy
15
+ Provides-Extra: proxy
16
+ Requires-Dist: PyJWT (>=2.8.0,<3.0.0) ; extra == "proxy"
17
+ Requires-Dist: aiohttp
18
+ Requires-Dist: apscheduler (>=3.10.4,<4.0.0) ; extra == "proxy"
19
+ Requires-Dist: azure-identity (>=1.15.0,<2.0.0) ; extra == "extra-proxy"
20
+ Requires-Dist: azure-keyvault-secrets (>=4.8.0,<5.0.0) ; extra == "extra-proxy"
21
+ Requires-Dist: backoff ; extra == "proxy"
22
+ Requires-Dist: click
23
+ Requires-Dist: cryptography (>=42.0.5,<43.0.0) ; extra == "proxy"
24
+ Requires-Dist: fastapi (>=0.111.0,<0.112.0) ; extra == "proxy"
25
+ Requires-Dist: fastapi-sso (>=0.10.0,<0.11.0) ; extra == "proxy"
26
+ Requires-Dist: google-cloud-kms (>=2.21.3,<3.0.0) ; extra == "extra-proxy"
27
+ Requires-Dist: gunicorn (>=22.0.0,<23.0.0) ; extra == "proxy"
28
+ Requires-Dist: importlib-metadata (>=6.8.0)
29
+ Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
30
+ Requires-Dist: jsonschema (>=4.22.0,<5.0.0)
31
+ Requires-Dist: openai (>=1.40.0)
32
+ Requires-Dist: orjson (>=3.9.7,<4.0.0) ; extra == "proxy"
33
+ Requires-Dist: prisma (==0.11.0) ; extra == "extra-proxy"
34
+ Requires-Dist: pydantic (>=2.0.0,<3.0.0)
35
+ Requires-Dist: pynacl (>=1.5.0,<2.0.0) ; extra == "extra-proxy"
36
+ Requires-Dist: python-dotenv (>=0.2.0)
37
+ Requires-Dist: python-multipart (>=0.0.9,<0.0.10) ; extra == "proxy"
38
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "proxy"
39
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
40
+ Requires-Dist: resend (>=0.8.0,<0.9.0) ; extra == "extra-proxy"
41
+ Requires-Dist: rq ; extra == "proxy"
42
+ Requires-Dist: tiktoken (>=0.7.0)
43
+ Requires-Dist: tokenizers
44
+ Requires-Dist: uvicorn (>=0.22.0,<0.23.0) ; extra == "proxy"
45
+ Project-URL: documentation, https://docs.litellm.ai
46
+ Project-URL: homepage, https://litellm.ai
47
+ Project-URL: repository, https://github.com/BerriAI/litellm
48
+ Description-Content-Type: text/markdown
49
+
50
+ <h1 align="center">
51
+ 🚅 LiteLLM
52
+ </h1>
53
+ <p align="center">
54
+ <p align="center">
55
+ <a href="https://render.com/deploy?repo=https://github.com/BerriAI/litellm" target="_blank" rel="nofollow"><img src="https://render.com/images/deploy-to-render-button.svg" alt="Deploy to Render"></a>
56
+ <a href="https://railway.app/template/HLP0Ub?referralCode=jch2ME">
57
+ <img src="https://railway.app/button.svg" alt="Deploy on Railway">
58
+ </a>
59
+ </p>
60
+ <p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
61
+ <br>
62
+ </p>
63
+ <h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (LLM Gateway)</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
64
+ <h4 align="center">
65
+ <a href="https://pypi.org/project/litellm/" target="_blank">
66
+ <img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
67
+ </a>
68
+ <a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
69
+ <img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
70
+ </a>
71
+ <a href="https://www.ycombinator.com/companies/berriai">
72
+ <img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
73
+ </a>
74
+ <a href="https://wa.link/huol9n">
75
+ <img src="https://img.shields.io/static/v1?label=Chat%20on&message=WhatsApp&color=success&logo=WhatsApp&style=flat-square" alt="Whatsapp">
76
+ </a>
77
+ <a href="https://discord.gg/wuPM9dRgDw">
78
+ <img src="https://img.shields.io/static/v1?label=Chat%20on&message=Discord&color=blue&logo=Discord&style=flat-square" alt="Discord">
79
+ </a>
80
+ </h4>
81
+
82
+ LiteLLM manages:
83
+
84
+ - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
85
+ - [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
86
+ - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
87
+ - Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server (LLM Gateway)](https://docs.litellm.ai/docs/simple_proxy)
88
+
89
+ [**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
90
+ [**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
91
+
92
+ 🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
93
+
94
+ Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
95
+
96
+ # Usage ([**Docs**](https://docs.litellm.ai/docs/))
97
+
98
+ > [!IMPORTANT]
99
+ > LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
100
+ > LiteLLM v1.40.14+ now requires `pydantic>=2.0.0`. No changes required.
101
+
102
+ <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
103
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
104
+ </a>
105
+
106
+ ```shell
107
+ pip install litellm
108
+ ```
109
+
110
+ ```python
111
+ from litellm import completion
112
+ import os
113
+
114
+ ## set ENV variables
115
+ os.environ["OPENAI_API_KEY"] = "your-openai-key"
116
+ os.environ["COHERE_API_KEY"] = "your-cohere-key"
117
+
118
+ messages = [{ "content": "Hello, how are you?","role": "user"}]
119
+
120
+ # openai call
121
+ response = completion(model="gpt-3.5-turbo", messages=messages)
122
+
123
+ # cohere call
124
+ response = completion(model="command-nightly", messages=messages)
125
+ print(response)
126
+ ```
127
+
128
+ Call any model supported by a provider, with `model=<provider_name>/<model_name>`. There might be provider-specific details here, so refer to [provider docs for more information](https://docs.litellm.ai/docs/providers)
129
+
130
+ ## Async ([Docs](https://docs.litellm.ai/docs/completion/stream#async-completion))
131
+
132
+ ```python
133
+ from litellm import acompletion
134
+ import asyncio
135
+
136
+ async def test_get_response():
137
+ user_message = "Hello, how are you?"
138
+ messages = [{"content": user_message, "role": "user"}]
139
+ response = await acompletion(model="gpt-3.5-turbo", messages=messages)
140
+ return response
141
+
142
+ response = asyncio.run(test_get_response())
143
+ print(response)
144
+ ```
145
+
146
+ ## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream))
147
+
148
+ liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
149
+ Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.)
150
+
151
+ ```python
152
+ from litellm import completion
153
+ response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
154
+ for part in response:
155
+ print(part.choices[0].delta.content or "")
156
+
157
+ # claude 2
158
+ response = completion('claude-2', messages, stream=True)
159
+ for part in response:
160
+ print(part.choices[0].delta.content or "")
161
+ ```
162
+
163
+ ## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
164
+
165
+ LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack
166
+
167
+ ```python
168
+ from litellm import completion
169
+
170
+ ## set env variables for logging tools
171
+ os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
172
+ os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key"
173
+ os.environ["LANGFUSE_PUBLIC_KEY"] = ""
174
+ os.environ["LANGFUSE_SECRET_KEY"] = ""
175
+ os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
176
+
177
+ os.environ["OPENAI_API_KEY"]
178
+
179
+ # set callbacks
180
+ litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
181
+
182
+ #openai call
183
+ response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
184
+ ```
185
+
186
+ # LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
187
+
188
+ Track spend + Load Balance across multiple projects
189
+
190
+ [Hosted Proxy (Preview)](https://docs.litellm.ai/docs/hosted)
191
+
192
+ The proxy provides:
193
+
194
+ 1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
195
+ 2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
196
+ 3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
197
+ 4. [Rate Limiting](https://docs.litellm.ai/docs/proxy/users#set-rate-limits)
198
+
199
+ ## 📖 Proxy Endpoints - [Swagger Docs](https://litellm-api.up.railway.app/)
200
+
201
+
202
+ ## Quick Start Proxy - CLI
203
+
204
+ ```shell
205
+ pip install 'litellm[proxy]'
206
+ ```
207
+
208
+ ### Step 1: Start litellm proxy
209
+
210
+ ```shell
211
+ $ litellm --model huggingface/bigcode/starcoder
212
+
213
+ #INFO: Proxy running on http://0.0.0.0:4000
214
+ ```
215
+
216
+ ### Step 2: Make ChatCompletions Request to Proxy
217
+
218
+
219
+ > [!IMPORTANT]
220
+ > 💡 [Use LiteLLM Proxy with Langchain (Python, JS), OpenAI SDK (Python, JS) Anthropic SDK, Mistral SDK, LlamaIndex, Instructor, Curl](https://docs.litellm.ai/docs/proxy/user_keys)
221
+
222
+ ```python
223
+ import openai # openai v1.0.0+
224
+ client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
225
+ # request sent to model set on litellm proxy, `litellm --model`
226
+ response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
227
+ {
228
+ "role": "user",
229
+ "content": "this is a test request, write a short poem"
230
+ }
231
+ ])
232
+
233
+ print(response)
234
+ ```
235
+
236
+ ## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys))
237
+
238
+ Connect the proxy with a Postgres DB to create proxy keys
239
+
240
+ ```bash
241
+ # Get the code
242
+ git clone https://github.com/BerriAI/litellm
243
+
244
+ # Go to folder
245
+ cd litellm
246
+
247
+ # Add the master key - you can change this after setup
248
+ echo 'LITELLM_MASTER_KEY="sk-1234"' > .env
249
+
250
+ # Add the litellm salt key - you cannot change this after adding a model
251
+ # It is used to encrypt / decrypt your LLM API Key credentials
252
+ # We recommned - https://1password.com/password-generator/
253
+ # password generator to get a random hash for litellm salt key
254
+ echo 'LITELLM_SALT_KEY="sk-1234"' > .env
255
+
256
+ source .env
257
+
258
+ # Start
259
+ docker-compose up
260
+ ```
261
+
262
+
263
+ UI on `/ui` on your proxy server
264
+ ![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033)
265
+
266
+ Set budgets and rate limits across multiple projects
267
+ `POST /key/generate`
268
+
269
+ ### Request
270
+
271
+ ```shell
272
+ curl 'http://0.0.0.0:4000/key/generate' \
273
+ --header 'Authorization: Bearer sk-1234' \
274
+ --header 'Content-Type: application/json' \
275
+ --data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "[email protected]", "team": "core-infra"}}'
276
+ ```
277
+
278
+ ### Expected Response
279
+
280
+ ```shell
281
+ {
282
+ "key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
283
+ "expires": "2023-11-19T01:38:25.838000+00:00" # datetime object
284
+ }
285
+ ```
286
+
287
+ ## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
288
+
289
+ | Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
290
+ |-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
291
+ | [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
292
+ | [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
293
+ | [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | |
294
+ | [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | |
295
+ | [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
296
+ | [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | |
297
+ | [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | |
298
+ | [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | |
299
+ | [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | |
300
+ | [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | |
301
+ | [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | |
302
+ | [empower](https://docs.litellm.ai/docs/providers/empower) | ✅ | ✅ | ✅ | ✅ |
303
+ | [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | |
304
+ | [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | |
305
+ | [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | |
306
+ | [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | |
307
+ | [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | |
308
+ | [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | |
309
+ | [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | |
310
+ | [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | |
311
+ | [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | |
312
+ | [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | |
313
+ | [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | |
314
+ | [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | |
315
+ | [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | |
316
+ | [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | |
317
+ | [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | |
318
+ | [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | |
319
+ | [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | |
320
+ | [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | |
321
+ | [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | |
322
+ | [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | ✅ | | |
323
+
324
+ [**Read the Docs**](https://docs.litellm.ai/docs/)
325
+
326
+ ## Contributing
327
+
328
+ To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
329
+
330
+ Here's how to modify the repo locally:
331
+ Step 1: Clone the repo
332
+
333
+ ```
334
+ git clone https://github.com/BerriAI/litellm.git
335
+ ```
336
+
337
+ Step 2: Navigate into the project, and install dependencies:
338
+
339
+ ```
340
+ cd litellm
341
+ poetry install -E extra_proxy -E proxy
342
+ ```
343
+
344
+ Step 3: Test your change:
345
+
346
+ ```
347
+ cd litellm/tests # pwd: Documents/litellm/litellm/tests
348
+ poetry run flake8
349
+ poetry run pytest .
350
+ ```
351
+
352
+ Step 4: Submit a PR with your changes! 🚀
353
+
354
+ - push your fork to your GitHub repo
355
+ - submit a PR from there
356
+
357
+ # Enterprise
358
+ For companies that need better security, user management and professional support
359
+
360
+ [Talk to founders](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
361
+
362
+ This covers:
363
+ - ✅ **Features under the [LiteLLM Commercial License](https://docs.litellm.ai/docs/proxy/enterprise):**
364
+ - ✅ **Feature Prioritization**
365
+ - ✅ **Custom Integrations**
366
+ - ✅ **Professional Support - Dedicated discord + slack**
367
+ - ✅ **Custom SLAs**
368
+ - ✅ **Secure access with Single Sign-On**
369
+
370
+ # Support / talk with founders
371
+
372
+ - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
373
+ - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
374
+ - Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
375
+ - Our emails ✉️ [email protected] / [email protected]
376
+
377
+ # Why did we build this
378
+
379
+ - **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
380
+
381
+ # Contributors
382
+
383
+ <!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
384
+ <!-- prettier-ignore-start -->
385
+ <!-- markdownlint-disable -->
386
+
387
+ <!-- markdownlint-restore -->
388
+ <!-- prettier-ignore-end -->
389
+
390
+ <!-- ALL-CONTRIBUTORS-LIST:END -->
391
+
392
+ <a href="https://github.com/BerriAI/litellm/graphs/contributors">
393
+ <img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
394
+ </a>
395
+
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/RECORD ADDED
@@ -0,0 +1,577 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ../../../bin/litellm,sha256=C8U9cdkxUI1nR-SrwEkSOXFmeppVfM5f1V9oXW1nlx8,283
2
+ litellm/__init__.py,sha256=RgjXw3jbyF2m_WUVLIrcH0Uykz_pJC0FcdcYzy0l0n4,31281
3
+ litellm/_logging.py,sha256=cE1K1nljhW8yjV_-s-vJ0YQ8DJsNHvzWc1MFECIE3QY,2963
4
+ litellm/_redis.py,sha256=d9i9_Q1EJyQ3dRsgLhC18I792BiVxKFL9p0mqVv8aoU,5207
5
+ litellm/_service_logger.py,sha256=oxrLpjtssQN49A2QbwVdqBpaLoXtHfeQuEeVhaMolBI,6712
6
+ litellm/_version.py,sha256=ydZafSCEtu5V7o42UV_a8DTJyuLk-TgDZ5C5BUS6NTQ,101
7
+ litellm/adapters/anthropic_adapter.py,sha256=ySQ4Zsgat2lDZIR392QAyRFRciWeHVvz_4_fjS6HsPY,7356
8
+ litellm/assistants/main.py,sha256=-LRjZQz97ZYdlSMPAAq6s9IDLM5DfxbxpuzYOly03VA,47544
9
+ litellm/assistants/utils.py,sha256=3Ru97LHqMZDaibntq4Gczl9J2txLIUsK9lZBrNmEnmk,5729
10
+ litellm/batches/main.py,sha256=hq1kNNtubnPvJUv4ay4ZK-gYchf2VzXeGtqzMUmn7C8,15438
11
+ litellm/budget_manager.py,sha256=ed2TJr3-t0VFsEYehB7vFleJB5pnkfvvoRsYOkrc8K0,8304
12
+ litellm/caching.py,sha256=DQrrQyh_2Sizb932fZLlJAVoV_7rSzpC1du7gWBjtJo,91470
13
+ litellm/cost.json,sha256=GJEXQcWy9ZvA5DhsPlWnolw-0gK_JG6PQRC67EO6VmQ,108
14
+ litellm/cost_calculator.py,sha256=oA7kbg-_-Kg8ty88bRo7OxkH8L3Yn6qQfRr4TQvAq2g,33078
15
+ litellm/deprecated_litellm_server/.env.template,sha256=CO7AgKScAzceHKw711IG7q0_mlI_DIf-P8i3j9jdMbE,941
16
+ litellm/deprecated_litellm_server/Dockerfile,sha256=uduHN-pR8EqaWTpSgrU9gs1fFsMhIi1m_kCLovmtKtY,224
17
+ litellm/deprecated_litellm_server/README.md,sha256=TieXW_VcgnNoSt6juEbXn02I846Z7mJgDqxTThNsjYM,62
18
+ litellm/deprecated_litellm_server/__init__.py,sha256=U1MJ3xxMEDLZx1GTB6IFV0IsmIo0Ri0uAxXBIQPB2z0,52
19
+ litellm/deprecated_litellm_server/main.py,sha256=gjh5DHJ2n4UQVcd6bNjMcBHxlsOgl8ARvHnsigQXUjE,8350
20
+ litellm/deprecated_litellm_server/requirements.txt,sha256=ocISAeyHpH-UgvlL0557UXL6S1XJJ-bxh1P4uU2lTI8,70
21
+ litellm/deprecated_litellm_server/server_utils.py,sha256=vQze7bSC-11yd2GvQ_LatrKLXzrUamr7vRIuAM5PvrM,3221
22
+ litellm/exceptions.py,sha256=fyP2enX3vQo9w6P-R3wwrII1qe9h6gzv1oN8RiJAMjA,27887
23
+ litellm/files/main.py,sha256=sbpq7OjubHiSt2rBUO98UDiuYcdVAEqoGJ97VEwSds8,26036
24
+ litellm/fine_tuning/main.py,sha256=9hhRKJEytPaCRjiGJqKlM_IaNAA34q2LETzGwc8HTL4,22114
25
+ litellm/integrations/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
26
+ litellm/integrations/_types/open_inference.py,sha256=OKr0ZQIozlmyNhQh1fCv1ONrVAEUuDj-pZZv_JgK7Ao,7402
27
+ litellm/integrations/aispend.py,sha256=XfwmWvaDAgU7OCipI_nAYpJTOrNX8mZv3BrZMymRnSY,6423
28
+ litellm/integrations/arize_ai.py,sha256=knvcaUlnMCokm3OhNttM5mxjA6v4avZgj9cIV-t9VUQ,4034
29
+ litellm/integrations/athina.py,sha256=ozijkeM4qnfCIla8ZDvDgS0oejfz2vzj8noloZxypt8,3560
30
+ litellm/integrations/berrispend.py,sha256=FrL04R7bMLSre-dwt8GM9jA62Aqhyf2XJ7iiXok_5rQ,6577
31
+ litellm/integrations/braintrust_logging.py,sha256=xcjjzALBrxtE24w2w0umPOxg7wuJrRx6ej6Gow_DXdo,14128
32
+ litellm/integrations/clickhouse.py,sha256=fsJ33CcXnK700mpxTpUeEp5Yp_TIEgbPNE5t6tSAAIg,10305
33
+ litellm/integrations/custom_logger.py,sha256=VUi431sVQsKDOaPB1f7cDoHnuyH7uDjSCt4IqeqeSts,6875
34
+ litellm/integrations/datadog.py,sha256=Kk_D2JaP8UnpcUNk0fEv34Du6pbvqYBgOVUOpo5YNto,5400
35
+ litellm/integrations/dynamodb.py,sha256=HPh_L5n55hkNqnRp7z1PtaCYSL7Po7YNl_eRmg3_uNM,3229
36
+ litellm/integrations/email_alerting.py,sha256=aIP_Q03X60PSqJQ3rerWsOC4qS12Fh21GoSAGqF2nbc,4460
37
+ litellm/integrations/email_templates/templates.py,sha256=aLw_bBXNBImuTN5u7w6Z4_WKBWU_p1zKOOi48-nWhuY,2277
38
+ litellm/integrations/galileo.py,sha256=NhdpG1lyqHqbz_po_tPWKDfcRJfyEpUHILadIibhQwo,5613
39
+ litellm/integrations/gcs_bucket.py,sha256=d5xEZAO9g7M0G4oZZOpltZlk4CuLupry7eH_TqAad94,11043
40
+ litellm/integrations/greenscale.py,sha256=HBghButcuhRuP_cjl5Qxi9m2fitGfoDMIqh5W4RFyNA,2560
41
+ litellm/integrations/helicone.py,sha256=-My_KVQdg5XTewGqk_AwiP5akUNZ7UsekhoQI1otlEY,6894
42
+ litellm/integrations/lago.py,sha256=eob9SSnjehQSnEPqphAnUMm3PLtJUu3Z-La5Q3iCYlE,6407
43
+ litellm/integrations/langfuse.py,sha256=rVHyEY_md8hE8gGFXGqQgj5w9SdKssVSes8t_dSsBdU,28471
44
+ litellm/integrations/langsmith.py,sha256=25fd5HZvDhJtwrBJ3mXVsFDIfb9s6Oud1AILmGaxu6o,8279
45
+ litellm/integrations/litedebugger.py,sha256=VPfy6gIcZ1ahQIB9rJwniCO0zlWcLPTssg3mr8H9w0w,11088
46
+ litellm/integrations/logfire_logger.py,sha256=9-r9IvD8etmckjsKGmj5XpTY3dbUiewR6Gxhqb-_ftc,6155
47
+ litellm/integrations/lunary.py,sha256=Eyh9D0pPVGPIcDSCT9RoZ0rRGSwKmzy1L-3Ko4QAQnw,5229
48
+ litellm/integrations/openmeter.py,sha256=aX3SCNaoYiXd2oqKr6-00gzv7hS80mVKdluO64qmmSw,4534
49
+ litellm/integrations/opentelemetry.py,sha256=iEfd1tS95_sZAwQxgvm5vCLD4sElyzyqsgzhUUSVFZg,30286
50
+ litellm/integrations/prometheus.py,sha256=fVet5dwx4jqB4sE7Wgrp2jzdu-jxIejn5etfU7GStAo,23534
51
+ litellm/integrations/prometheus_helpers/prometheus_api.py,sha256=4Dc05zzdTPGHAIy1kV629Emr-gxr0g129x_IbmEd4tc,2527
52
+ litellm/integrations/prometheus_services.py,sha256=HiauxN2ZeEpGX0iiAWatxq24gAahQwqnuHr5KUeNnlI,7375
53
+ litellm/integrations/prompt_layer.py,sha256=RmCBDOOYXXx5X0jc-zIzpdDIcOGaMewgzr8cPu7ailo,3572
54
+ litellm/integrations/s3.py,sha256=EQlNn8dneZ66zl07laP1MU7E4lwIMtzzoZ4zHSPBjUU,6877
55
+ litellm/integrations/slack_alerting.py,sha256=rvYArR8JGX5_m-GV2Gl8lc4Ac4CLB-osnJRb0_u7TCk,68224
56
+ litellm/integrations/supabase.py,sha256=0hGcHrWR-rmIucLxUy2AM1DhKSGiibAqgID25O39nxo,4042
57
+ litellm/integrations/test_httpx.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
+ litellm/integrations/traceloop.py,sha256=Whgy-4so47PlyHT-ej6N5WYInLPvvccbBoWe1oOYTp8,5913
59
+ litellm/integrations/weights_biases.py,sha256=tZGuknLmre98aElgA9vqZ8dhTUzyMR3gEfn82vZILcA,7813
60
+ litellm/litellm_core_utils/core_helpers.py,sha256=JMWnc4mZ8FzyA5mYIOndVVK_phhpPc7EK16J3GZFbTk,3814
61
+ litellm/litellm_core_utils/exception_mapping_utils.py,sha256=mbETVEpgENAq8nVHcNyiXQWiBkKAfZuy1-dUU1zUuww,1535
62
+ litellm/litellm_core_utils/json_validation_rule.py,sha256=rtDKG_1vyTUsDp2BCUN6mj7jf_EDtOnQYxAfXcKTuz0,790
63
+ litellm/litellm_core_utils/litellm_logging.py,sha256=zk4pEaqBaxbWvoQmnRQ2g8oNYOAdggTHRPcJ_gnszYE,109711
64
+ litellm/litellm_core_utils/llm_cost_calc/google.py,sha256=Jbqg0Ur1gMNKMrGFlcw3rNx9gGdqnEmWmSrkTVRo-64,8276
65
+ litellm/litellm_core_utils/llm_cost_calc/utils.py,sha256=oE-V0I0SQjqwUn2mm42WnNCFP8kk_OS2MGQBgLrRbYs,2991
66
+ litellm/litellm_core_utils/llm_request_utils.py,sha256=DunH2Xg73qKcqkv0yh_TcxuNsZeP3CeL_Zzl1NK_0Yw,986
67
+ litellm/litellm_core_utils/logging_utils.py,sha256=BzKIdfYfFieznqyZ176lVcOdRhhaRTOv1tW4XKUrTmA,531
68
+ litellm/litellm_core_utils/redact_messages.py,sha256=rk7A2i6GVtTYvPg-GufOsVViz7zJ-T4PlyBi3wXvPgs,3768
69
+ litellm/litellm_core_utils/streaming_utils.py,sha256=1te4DqCt590w7yPE4qWev9FSCqRT9AxPu_NUsp84oWo,600
70
+ litellm/litellm_core_utils/token_counter.py,sha256=BF4xHo4ut6b5DBVd5r4bMfLyW_81_AvDT5n_0hKDQh4,3144
71
+ litellm/llms/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
72
+ litellm/llms/ai21.py,sha256=LBVSkxybKuR3B4iJEHrZE0YhtIykq3SDW5TRaSGToaA,7898
73
+ litellm/llms/aleph_alpha.py,sha256=QfkTSNzPZgzJtO72Q3j7baq_SKPEkApvU3u4skWkoDE,12754
74
+ litellm/llms/anthropic.py,sha256=ydcc0elNtEPRCm6QxqgZgLrh1cYp5uM3o4Y_dnTL3kw,52574
75
+ litellm/llms/anthropic_text.py,sha256=_a1uvk8rp1PumeSntgIrRS40oc0uPpZiKEPzE_IQuZI,11144
76
+ litellm/llms/azure.py,sha256=WRyjarQH1cK0lqDsIqbtIP8ljMC0O-Pyl-hrOi_j90A,96911
77
+ litellm/llms/azure_text.py,sha256=ke6yLeKEVtxvqNUuEOBzp06KqCocScCMSSyuqsyGNfE,20074
78
+ litellm/llms/base.py,sha256=iDcvsYjKW28jLdXHHI0izlJD0tQ7sJy8KKrye6916b8,2609
79
+ litellm/llms/base_aws_llm.py,sha256=yIvyVNXn3koJRsZ5VSD-yfunQ9hrDT4geM7QN3y4_BY,7874
80
+ litellm/llms/baseten.py,sha256=I43FimMq1qnPlHh1cbUIssVDN_h_i3sgG84jV2VP7Rc,6062
81
+ litellm/llms/bedrock.py,sha256=ngnu3qF1YtzHxCRHb5Eek1d2VqNK4w2ZRtcHdUZIJDw,56661
82
+ litellm/llms/bedrock_httpx.py,sha256=3OurzBkxF-Gqvd0w9grj36S9wfO4by5TIAFgOf0AEsU,79410
83
+ litellm/llms/clarifai.py,sha256=MYjU5UyNbMy5kpOFPkq0ql1izHXOadgztX8esrmUaHs,10716
84
+ litellm/llms/cloudflare.py,sha256=6p4TlxvxUNRwtMTAwUDlKDC0HHmaym3SMXLtQVVvr0s,5598
85
+ litellm/llms/cohere.py,sha256=C8ZxdOI5WdWgvDU0dhKtF5xT7Ca_cpZRq4S8-oUUHQo,13375
86
+ litellm/llms/cohere_chat.py,sha256=KeDi07XnQWgEHC6HHPfqvYG4LmGYcrhjLZP6FiksLFE,12093
87
+ litellm/llms/custom_httpx/http_handler.py,sha256=aclVKWDD1jd97qVcIwE7OYuoUBqJ5zZreDUdRdLpbpk,11252
88
+ litellm/llms/custom_httpx/httpx_handler.py,sha256=v8HYYvxAywJCZubWqYGvdiBF6fH6B8Y-XjDWZLv0fus,1249
89
+ litellm/llms/custom_llm.py,sha256=GHEInUvjyz5MOShpSnPa3RQMGcYUIAT0YLLXVrXfj5E,4264
90
+ litellm/llms/databricks.py,sha256=JwWopK46bwSUG8RjEPs28f0oV9dxqg6efokJCjSfvxI,25779
91
+ litellm/llms/files_apis/azure.py,sha256=fFAIkze3mxRES9VWuMTwS0eGGTIF77fSOrUc3UXMgRo,11098
92
+ litellm/llms/fine_tuning_apis/azure.py,sha256=6TRtrnDoQn8jp5B0dm9Lop5jnCFc0H3_8xLGT6xSn9E,6857
93
+ litellm/llms/fine_tuning_apis/openai.py,sha256=X_kZVX9nwyKv7FcjagStRfNSLCFdbw6M8ssoW78c-ZE,7226
94
+ litellm/llms/fine_tuning_apis/vertex_ai.py,sha256=y3s64IovOF-oRrelxtx88-ohI5xkUggQSi1r44LJDfw,11674
95
+ litellm/llms/fireworks_ai.py,sha256=UE0BPONbFez4T0RrN-kxW62BRlqd4_IZKex2zkuCrBg,3595
96
+ litellm/llms/gemini.py,sha256=lld1t-vN8LiB5ESpZWcS4sCYZgfFrsTSaEj4bzDU73k,15616
97
+ litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt,sha256=-KennA-85KE2N-dTyR2TG4v30NvWc6IAE6zCIEngjZQ,76183
98
+ litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt,sha256=IskID-RI7HHQTqVB8cTAHoAOIVoeGm8vhFz5opYL8Kk,1288358
99
+ litellm/llms/huggingface_restapi.py,sha256=UJX4hkW2DrKeeVWFtebCa3EOUfjuhI7MFE1ACj3iYKI,44377
100
+ litellm/llms/maritalk.py,sha256=i3BGq_CJYB8z30dpb3PHt0gpeBCimBQhbNvg_HcT_pQ,5930
101
+ litellm/llms/nlp_cloud.py,sha256=AJSyYEv3JVAB6j9BPm61nkHx0LeZqHChU4maEZ1tjRs,8004
102
+ litellm/llms/nvidia_nim.py,sha256=pYVGYZeQF-VAB__bov-OIjG3C5Nok_ZZDnEXmi6Xt7Y,4619
103
+ litellm/llms/ollama.py,sha256=iHOz2ICaBWFUJLLHok5TXcg0EVK06rQTYT5pTBVD3cY,22684
104
+ litellm/llms/ollama_chat.py,sha256=_CjoCTnK4nvN7L0VU9xyU-y0CwFdvMFlP_Ny7F2WB4U,22396
105
+ litellm/llms/oobabooga.py,sha256=tygqCpcBiev_1ojYz-e84Gblzic3r2DJBszYMhkh99U,5474
106
+ litellm/llms/openai.py,sha256=ib3LSyUEwp6m7TWnn00aCBkniEPxfZUm9KXLPsfb8GM,126187
107
+ litellm/llms/openrouter.py,sha256=2KIepjHPqDXseisZ37ZjWQTgGDqDA8TDLBtb0hTGmqU,1026
108
+ litellm/llms/palm.py,sha256=jeuHIYmUvXnzmudYsxZrFhW03HoYNnhP4vWTPEIVGXo,7042
109
+ litellm/llms/petals.py,sha256=GPhhmIXqftEs3PLYg6k0IJvazTb9CzPyhBouTg6DXi0,7032
110
+ litellm/llms/predibase.py,sha256=_hNXV1RvEr9wpiNAFDbVW68Hq8y0wqw9_hcvHEWOxrw,22616
111
+ litellm/llms/prompt_templates/factory.py,sha256=dAGCJU0qOIm0EL5Nl-3CplK1IU1izuI_XhFAmYoAWj4,104166
112
+ litellm/llms/replicate.py,sha256=GWxD-VJcgWEo_iuavL-RiWIlUFoa_BL9dIenMCaLEj4,22090
113
+ litellm/llms/sagemaker.py,sha256=4DTZquhoUbOG72DOtpfpE1T6QZ1EbxCsRwr0eNtZTuQ,35170
114
+ litellm/llms/text_completion_codestral.py,sha256=itv5aHanYOi_OXH77gf-IS6VL5s3XWXPHogY_ZpILMQ,17906
115
+ litellm/llms/together_ai.py,sha256=GiTyexiXGQ7CFh5rRYl4dXjmFfqpFQWjJgCgHG5pMmY,9310
116
+ litellm/llms/tokenizers/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
117
+ litellm/llms/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
+ litellm/llms/tokenizers/anthropic_tokenizer.json,sha256=wkFzffJLTn98mvT9zuKaDKkD3LKIqLdTvDRqMJKRF2c,1774213
119
+ litellm/llms/tokenizers/ec7223a39ce59f226a68acc30dc1af2788490e15,sha256=lLXKff9NAHZ7wlb90bJ-Wxc2HXuKX5aFR_nyPrcNIGk,836186
120
+ litellm/llms/tokenizers/fb374d419588a4632f3f557e76b4b70aebbca790,sha256=RGqVOMtsNI41FhINfAiwn1fDZJXirP_-WaW_iwz7Gi0,3613922
121
+ litellm/llms/triton.py,sha256=TXjx0dE6OhZozq0XifUwShaR540UkR4ASSDzAST1Xg4,11123
122
+ litellm/llms/vertex_ai.py,sha256=B-AJJ5a7LYu27jW_oUEIhrwVMZO7kYBlhMGkbqtTXlA,58900
123
+ litellm/llms/vertex_ai_anthropic.py,sha256=NHSVl9EItKtYxrCuZWcd-mwrXteXW1I_BbkWcfgbu9U,15364
124
+ litellm/llms/vertex_ai_partner.py,sha256=kCkcbcQV4ABKheTG35QxrJCiLHjji588f0BhtFUBlMo,8481
125
+ litellm/llms/vertex_httpx.py,sha256=yEoOuO0cXYG5RdGdld-nIp319_3t32z0ExItCkMpqDw,66479
126
+ litellm/llms/vllm.py,sha256=p_m0E4E_C5UKMjqfN1TiAt7oFsFoqS3yZgn3EaQSBbs,6122
127
+ litellm/llms/volcengine.py,sha256=f4CyIpbxff8trhkB6SEFNh-VmwwxkN_CD6Z32U3we9Y,2689
128
+ litellm/llms/watsonx.py,sha256=BqWLNjcKHhVZUZ1uF9KwZcT-zi74pE_E8z3QAe5o8NA,31464
129
+ litellm/main.py,sha256=2gZaGO9GOtTjWfnpvf1nQ-n4nF5lY03CWzpoXKiTzDg,205901
130
+ litellm/model_prices_and_context_window_backup.json,sha256=d56iJoU85W4Sa44iRXTW2X8flxjbX4-y0IpjJsybQrg,181982
131
+ litellm/proxy/.gitignore,sha256=v2ZocUpppVuVfYJh1Bd1JpjpYSLxifJdClMEo0oOdT0,17
132
+ litellm/proxy/README.md,sha256=MiZkO5ggaGLYVglWypp43xUgnWCL08XsqQAMSY8m5h8,787
133
+ litellm/proxy/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
134
+ litellm/proxy/_experimental/out/_next/static/chunks/131-cb6bfe24e23e121b.js,sha256=PuBjUDy1cjBTTKd60Ctu6FBVzkqDzrEtQYg7B7atiDA,681365
135
+ litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-cac2949a76539886.js,sha256=L-FNXiIqsiBouoQ_haADmDIgNst-SPCs1iQhLFTNSKU,31064
136
+ litellm/proxy/_experimental/out/_next/static/chunks/3014691f-b24e8254c7593934.js,sha256=5vmdZfOJ4jrMbHo5TPKda4yuzYkai8ohJGTW4V2B2oI,725
137
+ litellm/proxy/_experimental/out/_next/static/chunks/505-5ff3c318fddfa35c.js,sha256=RlyUAsnfKQvWgPhkR2YKjvG6FiXNaMwIMdge7oC6UM0,327591
138
+ litellm/proxy/_experimental/out/_next/static/chunks/605-8e4b96f972af8eaf.js,sha256=P0WKRKtSxBAoWrYXjst0lQNpkP-145DaytbbGgrzeus,1015030
139
+ litellm/proxy/_experimental/out/_next/static/chunks/684-16b194c83a169f6d.js,sha256=cNjHfykyxpnQyi-NpQMUID_fTfjX7qozLPEa0LRtGSY,49660
140
+ litellm/proxy/_experimental/out/_next/static/chunks/69-8316d07d1f41e39f.js,sha256=lAalMsBcPUObqwncSYLplvSN_d0ercbP0lsldZBedlc,113335
141
+ litellm/proxy/_experimental/out/_next/static/chunks/777-50d836152fad178b.js,sha256=Xzm7fyD3wuRH1iPlPxn8qKtNaOmb8kRu_sQlfT4TdGc,28836
142
+ litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found-4163791cb6a88df1.js,sha256=vRsMkO0OtbtzFDUnTLKV0chuVk6FfeQxtwfnvu8HyiQ,1775
143
+ litellm/proxy/_experimental/out/_next/static/chunks/app/layout-f84e1a2dc32a2b83.js,sha256=-f5KFlm1rJ9y4O780hHPtxWb-Ctj7ONFugdZyfU0d3w,420
144
+ litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-79eee78ed9fccf89.js,sha256=EN-k7S4w9EFNHNp6q-Bdn_-ZTzUH8tETMHWkcU1_DPA,7568
145
+ litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-8be9c2a4a5c886c5.js,sha256=YX_2ILOyKtNvizwJvoQjWrJi_90y_rPpfTzZd5OwH_U,2788
146
+ litellm/proxy/_experimental/out/_next/static/chunks/app/page-539019eb3e09177c.js,sha256=kxrgf_yf2YsngNq-oze8SRT3vD4r1gAeLPzgoXdmMlA,166748
147
+ litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f593049e31b05aeb.js,sha256=QI0CKrpO8VBSIcDy-QBfPdQhTKoEXsIhfBXiQbnI16M,172192
148
+ litellm/proxy/_experimental/out/_next/static/chunks/framework-b370f160bb96059c.js,sha256=rdVv5xb7IDglnNf8MXneJb9zkELY_b3ITKCE5ThunrQ,141006
149
+ litellm/proxy/_experimental/out/_next/static/chunks/main-a61244f130fbf565.js,sha256=zlIcVUCT3XuOxqLsd5zTJ0JGu0Lwt0TLGBIG9mPbPqM,109705
150
+ litellm/proxy/_experimental/out/_next/static/chunks/main-app-9b4fb13a7db53edf.js,sha256=zl4knUp_PI9M9cnbWbJql9TmKbAJAc0Y_R6SPOzb_MI,470
151
+ litellm/proxy/_experimental/out/_next/static/chunks/pages/_app-d21e88acd55d90f1.js,sha256=6Tfqn-C_NCH5A9z56noXA-XhmIzoyHHa8hNefPRJVwE,284
152
+ litellm/proxy/_experimental/out/_next/static/chunks/pages/_error-d6107f1aac0c574c.js,sha256=eCsfmitISoEXoWoho7nSup51aVxMHxTUS5HZqTniPMg,249
153
+ litellm/proxy/_experimental/out/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js,sha256=AiXrA00CSgO9yQ6mx59WGTZi58Pu6QlpYpiCDlF8u4M,91460
154
+ litellm/proxy/_experimental/out/_next/static/chunks/webpack-193a7eac80c8baba.js,sha256=1Psq5CSz-83kmJjbVL11T7-RpQgTka0WR1yvRkBvMyU,3833
155
+ litellm/proxy/_experimental/out/_next/static/css/cd10067a0a3408b4.css,sha256=UMww9uOUjZbSIH7JFsZO5N9Ahf37hebjGfNnzmaCBqw,435002
156
+ litellm/proxy/_experimental/out/_next/static/fV1QC9ULIrhC5SUk7IqET/_buildManifest.js,sha256=1ZIrpmVMAUNAG9YfFRaxhQ_0tNDBUuM6anTEgxedIAU,224
157
+ litellm/proxy/_experimental/out/_next/static/fV1QC9ULIrhC5SUk7IqET/_ssgManifest.js,sha256=Z49s4suAsf5y_GfnQSvm4qtq2ggxEbZPfEDTXjy6XgA,80
158
+ litellm/proxy/_experimental/out/_next/static/media/26a46d62cd723877-s.woff2,sha256=lOXII-cuccwg9L-imwQ08iYAQJZdnQZsDny13Jn_1sM,18820
159
+ litellm/proxy/_experimental/out/_next/static/media/55c55f0601d81cf3-s.woff2,sha256=zhKUdvQpmyFbVbHu0vxDJLRr2yZB4R1Z4kgbdEpH0RQ,25908
160
+ litellm/proxy/_experimental/out/_next/static/media/581909926a08bbc8-s.woff2,sha256=6sXLry_RZ9cH4ezy5o4q8jK-nPyVJXQWRBfzKllMXvw,19072
161
+ litellm/proxy/_experimental/out/_next/static/media/6d93bde91c0c2823-s.woff2,sha256=MuUklqJWCJ8nnGFQGu-7Q4D3ksk_Aex6bnNWeP1is_E,74316
162
+ litellm/proxy/_experimental/out/_next/static/media/97e0cb1ae144a2a9-s.woff2,sha256=PSMwBhtNmlpSF4kzHs5Rp1X9plNe2ybXQJtxBSizzQ0,11220
163
+ litellm/proxy/_experimental/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2,sha256=yI2yQBvvfhID4JM8xVJaD4GGO_0HZ1bbEqzqVZbwiew,48556
164
+ litellm/proxy/_experimental/out/_next/static/media/df0a9ae256c0569c-s.woff2,sha256=jbAP9Gxnsizai-2GWs9wd2UcrI0oQdW0CYBVa0iWGTE,10280
165
+ litellm/proxy/_experimental/out/favicon.ico,sha256=Ikbq6HOjEekHeOnx68AzqSl19Y5YPKwdQv7FyKK6Q8M,15406
166
+ litellm/proxy/_experimental/out/index.html,sha256=irBRWUnm0PrWadX8OEpsFwI1-G2sf__G_R9-qZas-VM,4970
167
+ litellm/proxy/_experimental/out/index.txt,sha256=Mg3w2XuTUbt99MQwxoErqmg176uK0d6IHKh6FZuW3IA,2814
168
+ litellm/proxy/_experimental/out/model_hub.txt,sha256=4q9pFlK2rrDn26rNEGprA70dAS5SKqF-xfq6ROAICJE,3121
169
+ litellm/proxy/_experimental/out/next.svg,sha256=VZld-tbstJRaHoVt3KA8XhaqW_E_0htN9qdK55NXvPw,1375
170
+ litellm/proxy/_experimental/out/onboarding.txt,sha256=oUHO_ARLk_YTrYB_r_GGNDxlXVtfcwytHrMwjqQ2dWI,3174
171
+ litellm/proxy/_experimental/out/vercel.svg,sha256=P6XNdXtBjhivxo3eutVfRDIG5BAyeSHdsr8b5zFliIA,629
172
+ litellm/proxy/_experimental/post_call_rules.py,sha256=0tMsQ8ViObIH2wJcEfdWt9CZ2FAkj6HoBIrAr59VvFc,170
173
+ litellm/proxy/_logging.py,sha256=3zwPYBRv2EL1OB8Tk7_O6qU6lXCL2zSBNshe7rfyZbU,1055
174
+ litellm/proxy/_new_secret_config.yaml,sha256=lpsdzQZiPUUd_hJRxQMgcRDNMJoCp9VBS4vDxoSyFD4,68
175
+ litellm/proxy/_super_secret_config.yaml,sha256=go-txuGiBfjn8vrxTYrB9Sto_BRWjnMiTrStJcSh5Xw,3480
176
+ litellm/proxy/_types.py,sha256=IFnuSZv92fBsHPUyl4p1DBmeppbyK-FH8bRGf_4H8ds,55813
177
+ litellm/proxy/admin_ui.py,sha256=x1z0jm_HxQbsOqvv0QE4SXe8HNo_atKTpmhcDnaA8zo,7562
178
+ litellm/proxy/analytics_endpoints/analytics_endpoints.py,sha256=qk2Onr8qR38dp7_JXPSb1xKsWRGgl9IvETrWIFQnTmQ,3480
179
+ litellm/proxy/auth/auth_checks.py,sha256=l6d2-NdiG4XNNgP4EoJQ-ofv9PCKr5brllLPd0lunKU,22437
180
+ litellm/proxy/auth/auth_utils.py,sha256=mqFAyC8PIOoCdYJhkYiLgGe6NazVVOsMFCy4fKzMMFs,7513
181
+ litellm/proxy/auth/handle_jwt.py,sha256=cO1BQ98C_aQvwzEduUmMtq8cztbJkRs0bwR4zTb7rQ4,9052
182
+ litellm/proxy/auth/litellm_license.py,sha256=tBwvC1giDYrcVBBFXZN_ofxozEcIfA3RerH3lLX0TZc,5064
183
+ litellm/proxy/auth/model_checks.py,sha256=GVnSDURnKs0hREyJtSFgBmxyjUJR-1fQdFMmtnqetEg,2397
184
+ litellm/proxy/auth/oauth2_check.py,sha256=vDqfzUnR_ybjdnVhczoz4dOzObqhvzXtzUL2xEh8EoU,2875
185
+ litellm/proxy/auth/public_key.pem,sha256=KlTCQCWViTHUwzzxCu9KyFCX8YTdnIfGJlx7jiotak4,451
186
+ litellm/proxy/auth/rds_iam_token.py,sha256=D7pBrNyjUPhYslBVuV7Wzfs8iotOnSR4d3lTybMveaQ,6198
187
+ litellm/proxy/auth/user_api_key_auth.py,sha256=_MTcomGi6dS53Krw9i5AnPjriQ0UkQdUiOtXkKFcCSQ,55138
188
+ litellm/proxy/cached_logo.jpg,sha256=KQhlR-OWdfaNr0Cz6lQYbMRVnU1I7o-19IKOO92EovM,15974
189
+ litellm/proxy/caching_routes.py,sha256=jWUJQoTX4TcnS7PjPP7H2RlS96xddkpKV-lmFnRK6Kc,6234
190
+ litellm/proxy/common_utils/admin_ui_utils.py,sha256=HMC3onZmlFuidqBBx1qoMXcsHie-JFoBEgLkDGPVJfQ,5908
191
+ litellm/proxy/common_utils/callback_utils.py,sha256=XZfd1Rc8Utqwcv56lFFRP1lPefPwBvLOSEyS6dZICFE,12526
192
+ litellm/proxy/common_utils/debug_utils.py,sha256=_0DIq80jHcX8hwrj-JQyoYP6RtbD4z2CwxOYEtvSNwg,7865
193
+ litellm/proxy/common_utils/encrypt_decrypt_utils.py,sha256=JG0Bq-P4RyCQxEGkryDG1u2U9OykDm4DseTVt8T_RAI,2815
194
+ litellm/proxy/common_utils/http_parsing_utils.py,sha256=PGpF474ULWwu41d28ckpJeyG_gTA-FqYgbyZssPKH1s,3415
195
+ litellm/proxy/common_utils/load_config_utils.py,sha256=OTB-yO5u7-5oVbRMQImYy1KVWn-HcNjRe78kj7Q6OdE,2100
196
+ litellm/proxy/common_utils/openai_endpoint_utils.py,sha256=cKo5fw41r09HDscxibDKUqpI7bUd1QQjd306NI1N_C0,721
197
+ litellm/proxy/config_management_endpoints/pass_through_endpoints.py,sha256=f7RT0Q-mioZ3E-Khzgpb5g47gd8nq6DpVfd6HqeqnR8,1057
198
+ litellm/proxy/custom_callbacks.py,sha256=cOVm_N7CZ7lJWJe0azBeeObGNmv55yddGxEsur0GqDQ,2673
199
+ litellm/proxy/custom_callbacks1.py,sha256=IJzB6S5HysU3kcpkhKuZ7lQTHFOVBrHvMs3XqIflwjo,1779
200
+ litellm/proxy/custom_handler.py,sha256=XCMLmhsbB1GL46Lih0V_1qHQF30KcCAQv6JTaI2enSs,675
201
+ litellm/proxy/db/base_client.py,sha256=JAg-ghx1qLNuxSRSn0B6Y_BB7a1ZIINNuvjOTJ_aByQ,1129
202
+ litellm/proxy/db/dynamo_db.py,sha256=_UxzvCgnmZt90iR7u9kF0rGE_v3Gzhr986QZ5QkBmFA,16392
203
+ litellm/proxy/example_config_yaml/_health_check_test_config.yaml,sha256=DcUpvUly3ASBh57fdv51uZ5Nr7a3o7f7j1sQebILtjQ,512
204
+ litellm/proxy/example_config_yaml/aliases_config.yaml,sha256=mN_iQHMZBv6CWXLF3BAOc-sdRrLKcFnWRbJIDXePXcA,1225
205
+ litellm/proxy/example_config_yaml/azure_config.yaml,sha256=swb4kZv8EN6IfTW8G_uOFqjzXtcMxUpbf7Lz7G_GHS8,747
206
+ litellm/proxy/example_config_yaml/custom_auth.py,sha256=4Gm2Jk3BtuGxSAargCdoR8qBQvVYJkseR0MomBJFXlk,1586
207
+ litellm/proxy/example_config_yaml/custom_callbacks.py,sha256=BHiYN-Jtpf42_d5mc_G34yRuKMsQCGNYCq0RfhP3x2c,2252
208
+ litellm/proxy/example_config_yaml/langfuse_config.yaml,sha256=jkBz0zM8bUEBb_gmHi5P0TuFyC0WYlyGa37-WVRdsAo,181
209
+ litellm/proxy/example_config_yaml/load_balancer.yaml,sha256=hz5tnS6TvE8P-qU3pZ-SspqMB280EtrSwMZvjEca3sg,886
210
+ litellm/proxy/example_config_yaml/opentelemetry_config.yaml,sha256=u7-6jPVmj2Yca7nTeu1ykDZzzdtGKcGj3v5Y557Fc00,192
211
+ litellm/proxy/example_config_yaml/otel_test_config.yaml,sha256=brqGCEvPSIMPBJTwF70M7zdbv1XXNb2PmdQFHmkv-fk,247
212
+ litellm/proxy/example_config_yaml/simple_config.yaml,sha256=OBODVvCc0814U8-YTmiwT7C4UkSjLN51Bd0HxDenTVg,88
213
+ litellm/proxy/fine_tuning_endpoints/endpoints.py,sha256=IM4JEwayis5UwIArgnNf06fzohyfafvbRGxGmJN-1W4,14199
214
+ litellm/proxy/guardrails/guardrail_helpers.py,sha256=24g8oO47L5Hd_vFz1Q7uycd6Yc4HaDdzSGGkesDvyZo,3975
215
+ litellm/proxy/guardrails/init_guardrails.py,sha256=Q7VdOyUe6WBNcK835IRkeVh5NChnB7RPSgn3N6VtDpI,2674
216
+ litellm/proxy/health_check.py,sha256=DFXFyT249ksTDYQUsSLdm0jzStVOoOBmMDcFs_IYfpU,3271
217
+ litellm/proxy/health_endpoints/_health_endpoints.py,sha256=cBFpZJcxeUaA9GMN3ByUkzDDP6mxzUxtcXFThUCVl2A,20440
218
+ litellm/proxy/hooks/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
219
+ litellm/proxy/hooks/azure_content_safety.py,sha256=NI82k0Lu8X8xHHNDiifMQFhl4bAcPZlCt93J6zOPQgc,5620
220
+ litellm/proxy/hooks/batch_redis_get.py,sha256=1NkleF6uwY5OkncUWhhceWqWZ1abjCJgBwtOdHWp1HY,5263
221
+ litellm/proxy/hooks/cache_control_check.py,sha256=_2oyTBCOsyS8pJKis8U4boIA2DqXc9d15rCWQpANG6k,2213
222
+ litellm/proxy/hooks/dynamic_rate_limiter.py,sha256=vIXBFsKQ7F76_7QZ1Rc7_Nsxas3kqYDQ-iDSKqO-BO8,11850
223
+ litellm/proxy/hooks/example_presidio_ad_hoc_recognizer.json,sha256=VZLbOsMKjmQRdigSjZ3Rn5PJiizWV0If4_kGq_gH9DE,756
224
+ litellm/proxy/hooks/max_budget_limiter.py,sha256=tUZGcX8P6jgpxbSxrEMUoRDH-pTqpQjwGO3cGphuIm0,1640
225
+ litellm/proxy/hooks/parallel_request_limiter.py,sha256=WbTNCB1yn49jUipUf8AOBhFr_uheZvTnwwuaLWTEd_k,27226
226
+ litellm/proxy/hooks/presidio_pii_masking.py,sha256=R8J2dZpRqx18AqOiWsalC4FH5RIiUQLPxdhVS2gmXaA,14810
227
+ litellm/proxy/hooks/prompt_injection_detection.py,sha256=IApaMoXa7lcCAm-u856cu6y0InjgXVkZDK9zWUdpkxk,10251
228
+ litellm/proxy/lambda.py,sha256=h_06oqJhK3tkvnKOmxe7VLtPuIJIsosJE07BFXzF7sQ,107
229
+ litellm/proxy/litellm_pre_call_utils.py,sha256=mvuCOsURF0pMJAVOc9vnw-2Zdcs-XVMLYERB2rjbsDs,13345
230
+ litellm/proxy/llamaguard_prompt.txt,sha256=tCel8OPpD7IybjAulUqEg4QhJBdXKGThiv6J4DoKJFk,3300
231
+ litellm/proxy/logo.jpg,sha256=ZnPgg_2nBqNuMuqW2ZSrWNISVaK6HiSuNB4e5xttQto,24694
232
+ litellm/proxy/management_endpoints/internal_user_endpoints.py,sha256=4EGdTFEBuGCaP2Ici7k84Nbv2IbRNc2IkOXmHQ3bhYQ,29723
233
+ litellm/proxy/management_endpoints/key_management_endpoints.py,sha256=0K0Ig2iTQNybl_-3NRaS5SuGXOQFnYDQXAsDsB4H_gg,39494
234
+ litellm/proxy/management_endpoints/team_callback_endpoints.py,sha256=-EXxZRkJ1zptyhh1OW0QD5mLmrSng1KF5hMP5BjOW2c,14318
235
+ litellm/proxy/management_endpoints/team_endpoints.py,sha256=-grVzx0Vp1_yNuwpcaaYhM4DEeDr5rwb8uDj5gMrCLo,32827
236
+ litellm/proxy/management_helpers/utils.py,sha256=NvnpYaPBopH_nym7gWhiCFLaaVLE_pQ-8j0P7hIjABI,12559
237
+ litellm/proxy/openai_files_endpoints/files_endpoints.py,sha256=MG_NJAa_CTgbP9HRzrH7r9FtHUDBH3awSo7roer17EQ,20800
238
+ litellm/proxy/openapi.json,sha256=MJrfO9l1MFZmvPnXC77LzUJojMwTkAiFU4whrntKA-4,7163
239
+ litellm/proxy/otel_config.yaml,sha256=Fnu-KSsikVFOve9ev6mjKNRMTisOCn-SjTCRoLe1dZ4,625
240
+ litellm/proxy/out/404.html,sha256=QqEaqZOsCgQ-bNIz0UkOKQkATFA0_ZSNw8N_dWRhSEo,6645
241
+ litellm/proxy/out/_next/static/bNZcj3BOVnvWu7auSxFn-/_buildManifest.js,sha256=1ZIrpmVMAUNAG9YfFRaxhQ_0tNDBUuM6anTEgxedIAU,224
242
+ litellm/proxy/out/_next/static/bNZcj3BOVnvWu7auSxFn-/_ssgManifest.js,sha256=Z49s4suAsf5y_GfnQSvm4qtq2ggxEbZPfEDTXjy6XgA,80
243
+ litellm/proxy/out/_next/static/chunks/131-cb6bfe24e23e121b.js,sha256=PuBjUDy1cjBTTKd60Ctu6FBVzkqDzrEtQYg7B7atiDA,681365
244
+ litellm/proxy/out/_next/static/chunks/2f6dbc85-cac2949a76539886.js,sha256=L-FNXiIqsiBouoQ_haADmDIgNst-SPCs1iQhLFTNSKU,31064
245
+ litellm/proxy/out/_next/static/chunks/3014691f-b24e8254c7593934.js,sha256=5vmdZfOJ4jrMbHo5TPKda4yuzYkai8ohJGTW4V2B2oI,725
246
+ litellm/proxy/out/_next/static/chunks/505-5ff3c318fddfa35c.js,sha256=RlyUAsnfKQvWgPhkR2YKjvG6FiXNaMwIMdge7oC6UM0,327591
247
+ litellm/proxy/out/_next/static/chunks/605-8e4b96f972af8eaf.js,sha256=P0WKRKtSxBAoWrYXjst0lQNpkP-145DaytbbGgrzeus,1015030
248
+ litellm/proxy/out/_next/static/chunks/684-16b194c83a169f6d.js,sha256=cNjHfykyxpnQyi-NpQMUID_fTfjX7qozLPEa0LRtGSY,49660
249
+ litellm/proxy/out/_next/static/chunks/69-8316d07d1f41e39f.js,sha256=lAalMsBcPUObqwncSYLplvSN_d0ercbP0lsldZBedlc,113335
250
+ litellm/proxy/out/_next/static/chunks/777-50d836152fad178b.js,sha256=Xzm7fyD3wuRH1iPlPxn8qKtNaOmb8kRu_sQlfT4TdGc,28836
251
+ litellm/proxy/out/_next/static/chunks/app/_not-found-4163791cb6a88df1.js,sha256=vRsMkO0OtbtzFDUnTLKV0chuVk6FfeQxtwfnvu8HyiQ,1775
252
+ litellm/proxy/out/_next/static/chunks/app/layout-e379310fa648921d.js,sha256=beGf6MhgLrjZ6TnrQM6xeDmS00RJGpoIF07wkU0v6dA,420
253
+ litellm/proxy/out/_next/static/chunks/app/model_hub/page-39740ed413adc0ec.js,sha256=jK3HuUKJiH_BdodqtJpJ6gA9-OjHX1xgcqUVSQQCO18,7568
254
+ litellm/proxy/out/_next/static/chunks/app/onboarding/page-cd0662400289603c.js,sha256=ZG7ANPSvcOZWIPOS0kiJXrB_mMYtMcbkCnvaUN7v0-E,2788
255
+ litellm/proxy/out/_next/static/chunks/app/page-5b7e51de07f1b89a.js,sha256=knKqEGuXueppVaBnD9EC4vivpRCA2w4_ETDKvfT6Oew,166748
256
+ litellm/proxy/out/_next/static/chunks/fd9d1056-f593049e31b05aeb.js,sha256=QI0CKrpO8VBSIcDy-QBfPdQhTKoEXsIhfBXiQbnI16M,172192
257
+ litellm/proxy/out/_next/static/chunks/framework-b370f160bb96059c.js,sha256=rdVv5xb7IDglnNf8MXneJb9zkELY_b3ITKCE5ThunrQ,141006
258
+ litellm/proxy/out/_next/static/chunks/main-a61244f130fbf565.js,sha256=zlIcVUCT3XuOxqLsd5zTJ0JGu0Lwt0TLGBIG9mPbPqM,109705
259
+ litellm/proxy/out/_next/static/chunks/main-app-096338c8e1915716.js,sha256=6rTapQL-ul47TUSRitJ-HOTm4mJHSX5T5si3T-gYj-8,470
260
+ litellm/proxy/out/_next/static/chunks/pages/_app-d21e88acd55d90f1.js,sha256=6Tfqn-C_NCH5A9z56noXA-XhmIzoyHHa8hNefPRJVwE,284
261
+ litellm/proxy/out/_next/static/chunks/pages/_error-d6107f1aac0c574c.js,sha256=eCsfmitISoEXoWoho7nSup51aVxMHxTUS5HZqTniPMg,249
262
+ litellm/proxy/out/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js,sha256=AiXrA00CSgO9yQ6mx59WGTZi58Pu6QlpYpiCDlF8u4M,91460
263
+ litellm/proxy/out/_next/static/chunks/webpack-2d07f7d2618f9dfc.js,sha256=CHjpYfsFltVNfHacPI3qkHBCjF953WspddbcjL4-G5c,3833
264
+ litellm/proxy/out/_next/static/css/051d7321f11572d4.css,sha256=ZAr7WQ7FH8lTC0yesXVuh_vMky3_6lwGITrPD74M3hk,435002
265
+ litellm/proxy/out/_next/static/media/05a31a2ca4975f99-s.woff2,sha256=aWbbPArZ7qC4X5FSGEnMlyRNgDekNM95lriseseglDc,10496
266
+ litellm/proxy/out/_next/static/media/513657b02c5c193f-s.woff2,sha256=KG1HaV7h2Ir6RPfhBeM8I_RblOuOc97RtgqRQ5ZR5uA,17612
267
+ litellm/proxy/out/_next/static/media/51ed15f9841b9f9d-s.woff2,sha256=biS-z0aPL0ZtGGo4W6reWB_jwASs7DXZipKkSSFgnHw,22524
268
+ litellm/proxy/out/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2,sha256=O88EyjAeRPE_QEyKBKpK5wf2epUOEu8wwjj5bnhCZqE,46552
269
+ litellm/proxy/out/_next/static/media/d6b16ce4a6175f26-s.woff2,sha256=Lo2OA4Fs4kgf_PLDbklFXlDfaFQg56qwljRJCa1pTY4,80044
270
+ litellm/proxy/out/_next/static/media/ec159349637c90ad-s.woff2,sha256=In0Dqjy59njqtSDIZhWICXhLuULHbySMImbKI87FhUA,27316
271
+ litellm/proxy/out/_next/static/media/fd4db3eb5472fc27-s.woff2,sha256=ZEFW4NV-s7WzXwBryUCTH_cG3DoOLQkL9bRwuf_ojRY,12768
272
+ litellm/proxy/out/favicon.ico,sha256=Ikbq6HOjEekHeOnx68AzqSl19Y5YPKwdQv7FyKK6Q8M,15406
273
+ litellm/proxy/out/index.html,sha256=nIBZl0GeNsWHxd6s6LuNu-V_3XC5tFj4vNeH7mkSt_s,4970
274
+ litellm/proxy/out/index.txt,sha256=HBcRQgyWV7LfgmB1SeCbZUu7gnverOd0KKGxo6OeHmQ,2814
275
+ litellm/proxy/out/model_hub.html,sha256=fH59PU_X4fOmPxxl3Kd-Qd0C-_06jnn7nh6CGunuFZM,5344
276
+ litellm/proxy/out/model_hub.txt,sha256=hsvGZaMdDhcckP22g6FZBor2cg4HfHrTk6un4beB0Ao,3121
277
+ litellm/proxy/out/next.svg,sha256=VZld-tbstJRaHoVt3KA8XhaqW_E_0htN9qdK55NXvPw,1375
278
+ litellm/proxy/out/onboarding.html,sha256=EpT9xGR3e9fzchtZeDcg5GXQEUJbXr_VAJdHKDgC2rs,5402
279
+ litellm/proxy/out/onboarding.txt,sha256=-4Vps5sF6CVeOqIZXS4Cqan0FdzMpOZp0VDb7Rug_Lg,3174
280
+ litellm/proxy/out/vercel.svg,sha256=P6XNdXtBjhivxo3eutVfRDIG5BAyeSHdsr8b5zFliIA,629
281
+ litellm/proxy/pass_through_endpoints/pass_through_endpoints.py,sha256=HIAuLXBIy-kYRj2_ror_HKHLaf0Q9CGyHDRDekzeb1Q,27074
282
+ litellm/proxy/post_call_rules.py,sha256=bbnqX3BXhKjvbRN6LdZIwndKMCh88i4a9BXkTzsaHVk,359
283
+ litellm/proxy/prisma_migration.py,sha256=_Rsx8d8J7zF4pdVXo5vqP0OsNzoGg7fXXyZKqDxLht0,2264
284
+ litellm/proxy/proxy_cli.py,sha256=bXMgaLCbCO88qR6icLfreszo7FBjk4BqXj_NVt6NQgQ,25901
285
+ litellm/proxy/proxy_config.yaml,sha256=yv8uOLwPzGviTvtYRawjOxQaQ8sqbHbgcNNJ6g3mN9E,1637
286
+ litellm/proxy/proxy_load_test/litellm_proxy_config.yaml,sha256=S5cDYau2btdo-VKyBaGqjOl9hPPolttvKx45M78e07c,158
287
+ litellm/proxy/proxy_load_test/litellm_router_proxy/Dockerfile,sha256=ojfWGjipZHfYWaaxUYV1Tygah8UHR_iC5dTloBvkqME,457
288
+ litellm/proxy/proxy_load_test/litellm_router_proxy/main.py,sha256=lc8tA_IYqIhOE1JVu7dtAKtnEYObmohjjsGksfoydFk,1567
289
+ litellm/proxy/proxy_load_test/locustfile.py,sha256=r5Y93HQeiuBlsiN73jwb3uNJOG2HfroBpZq9S-_TKc0,1082
290
+ litellm/proxy/proxy_load_test/openai_endpoint.py,sha256=d16VI9g_554QCovlRUm9EFRk7KfMrIVW5SZhXPHGbN4,1412
291
+ litellm/proxy/proxy_load_test/simple_litellm_proxy.py,sha256=v4ywNlkgZ2GQH6XHiwPTR-LbTuEqb0PBBXa0IFliDps,1301
292
+ litellm/proxy/proxy_load_test/simple_litellm_router_proxy.py,sha256=lc8tA_IYqIhOE1JVu7dtAKtnEYObmohjjsGksfoydFk,1567
293
+ litellm/proxy/proxy_load_test/simple_proxy.py,sha256=a9FuXHZq_10zIXkW7JtIEtO2ORo3dyoXcf-33zunE20,1267
294
+ litellm/proxy/proxy_server.py,sha256=MVJlTtCxwgHPG7tJ7bIWiRtBXqSxKCFe8yKq1QTiaiY,362456
295
+ litellm/proxy/queue/celery_app.py,sha256=biT-emisvGRb697qPMsAXyphfFpCUHlzRGesBeZ3Nv8,2612
296
+ litellm/proxy/queue/celery_worker.py,sha256=Sz5zAZXcU96WbkFOsGxFaaPKJtmz8F4JIsWA477eMwQ,324
297
+ litellm/proxy/queue/rq_worker.py,sha256=oOD_1LuoJw2pc-D8aHBg7kN0uCR-piXqBGZbfJSdbLo,957
298
+ litellm/proxy/route_llm_request.py,sha256=C_NDJs5Nst9PPr5s018jCF3isu9uGdLoRpAf9YehYb8,3387
299
+ litellm/proxy/schema.prisma,sha256=xoC6o4xE-A33Bb_45ztnnWE2kP3ED5gmda1DrmR1jsU,10345
300
+ litellm/proxy/secret_managers/aws_secret_manager.py,sha256=oS0pYj6Wpu0PkwlEifDlJ9Q7FsP3mDAZjoRAvVGZxvI,5306
301
+ litellm/proxy/secret_managers/google_kms.py,sha256=QbvkHgeZE3PC4OmVOvlqhT5HOuG1B-3WvcLFconQdk0,1284
302
+ litellm/proxy/spend_tracking/spend_management_endpoints.py,sha256=pxa8yzSbpgxfZymtje3k5ybTOjQ0Bv7Go8QQAQ4Qah4,67309
303
+ litellm/proxy/spend_tracking/spend_tracking_utils.py,sha256=zVWVpV6SZ5pQoh20Wv4zyl6J2U4LNZy_8z8MFisSExY,4376
304
+ litellm/proxy/start.sh,sha256=qFUFqvhcEIMyL3Bp9vtAtLvY0zjyLw6lHTocHqpLE5w,32
305
+ litellm/proxy/tests/bursty_load_test_completion.py,sha256=qdH9bbEO-semCr4rqZa5fz-NdO5G9IYx-okxmwUXec4,1506
306
+ litellm/proxy/tests/error_log.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
307
+ litellm/proxy/tests/large_text.py,sha256=jNJ7qOSbKv7awjMWUvnyjL-x3fa8Uu6tRbR_VYViZtE,24566
308
+ litellm/proxy/tests/llama_index_data/essay.txt,sha256=8GEx5TIILYFGfoemBcNEITbObOO-f0cR7dyLC0wYDMg,75041
309
+ litellm/proxy/tests/load_test_completion.py,sha256=j23t4zg3Q2BFhSy563wqauadC7TRLjkC5NhE4KE9pw4,2103
310
+ litellm/proxy/tests/load_test_embedding.py,sha256=1_5D3sIOKu9l1Y41b-SB_al7-HTViwdyKStJVB9C7pU,2972
311
+ litellm/proxy/tests/load_test_embedding_100.py,sha256=qMiabdAchfp2PkbqxXyEohZmKDGxUu3ZobV1YSIr52M,1573
312
+ litellm/proxy/tests/load_test_embedding_proxy.py,sha256=u3PJSuaVwex1cY_7SrhePMHD1Eu2nOZBE9pCK1Pd8mY,3014
313
+ litellm/proxy/tests/load_test_q.py,sha256=lu5ima-DhIN5zy_VkuxK2CiexNUANI7j1bvJFsMWHEY,3939
314
+ litellm/proxy/tests/request_log.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
315
+ litellm/proxy/tests/test_anthropic_context_caching.py,sha256=XdO18lk5YyHSmTMCQXefHsgYtLRsXt2YTIelifAlrpk,1039
316
+ litellm/proxy/tests/test_anthropic_sdk.py,sha256=JUSv3o3UuHXf_FYClerSsTigR-vbadtBiY6cIS7kxm0,481
317
+ litellm/proxy/tests/test_async.py,sha256=yZoDqz8JI9JDIo5dGAk1ipIDncy2-KL3jvSll0QF-vg,739
318
+ litellm/proxy/tests/test_gemini_context_caching.py,sha256=3fWPDtTqwmwDgdoBrlGlO2kSnhnP66a2svouP_w5fBE,1575
319
+ litellm/proxy/tests/test_langchain_request.py,sha256=tkOxaPitlISJt570nseQahU0MQLZX0RFZg7HaahmNos,1187
320
+ litellm/proxy/tests/test_llamaindex.py,sha256=uKSIB18t1fny0rr_JuagEg4zJnl-DBGTZT2uI14bF4w,1041
321
+ litellm/proxy/tests/test_mistral_sdk.py,sha256=_j6p3_4bPZh6o7yfwO5pG5cnfbWNuRALK2ycOjvsGtg,403
322
+ litellm/proxy/tests/test_openai_embedding.py,sha256=KiDpGjzOJFqr_uEKjDY0XyyQPYfL8PsMUynZH4lPnFg,2541
323
+ litellm/proxy/tests/test_openai_exception_request.py,sha256=Ka6yYycaDruKIw7ckBKSw2-Bqsp1XeTP8JeTfGTlyDk,1391
324
+ litellm/proxy/tests/test_openai_js.js,sha256=R5kHG96bQIWpJSNMRgtjr-vK0eUSURv-GBc6GeEpEOs,945
325
+ litellm/proxy/tests/test_openai_request.py,sha256=RaUPtc6341adnYyQo4b9KVW42dAbnAYlrgFn2MNBIBM,1763
326
+ litellm/proxy/tests/test_openai_request_with_traceparent.py,sha256=FrTdzhWyUBc8f4P15Z7OlWDI50LQD5C4waIerNJgEuM,1653
327
+ litellm/proxy/tests/test_openai_simple_embedding.py,sha256=41OITmgUD_RLb84O56ym55cyosZKU0mCAZMQbEfpmYA,289
328
+ litellm/proxy/tests/test_pass_through_langfuse.py,sha256=jfFlFyisJv36vX1UAkTEEiadQn_V4Of0H3Quhbbden8,338
329
+ litellm/proxy/tests/test_q.py,sha256=B3C8Z5vhnF19FmA682XtKqJcRuL1K9xFZvubbdElQfo,2514
330
+ litellm/proxy/tests/test_simple_traceparent_openai.py,sha256=8aW31Ui2Lpz3hf6pIP0E0odMoRdPOeYL8pglj84nI-g,595
331
+ litellm/proxy/ui_crud_endpoints/proxy_setting_endpoints.py,sha256=RIo26sszKZqhy2-h8OpT3r3eIiNahMtyTgQfQ54xw6o,3515
332
+ litellm/proxy/utils.py,sha256=LrVV2SMJrPlgX1vcy-4jVEV8HOb7HOQm-sS7004QCO4,115344
333
+ litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py,sha256=lpqD5GyibzJCJnMDvcSkCOqLmU7Xgb234AuyIED5QVg,4149
334
+ litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py,sha256=3Go1gGVvk3B0sDRWxznTKlhS_HGuYKCgMQjcsIvbLWI,10460
335
+ litellm/py.typed,sha256=bKPUECNwNtN5PZk1JYgrtM4QFbfIniqsIVGDL0oKMuQ,129
336
+ litellm/router.py,sha256=H2t9Kqrtz5gDQsvSA2qTAlcDAqhcs8Sc7oItloqNDlw,211688
337
+ litellm/router_strategy/least_busy.py,sha256=wfupoyg5nvuWovelCE5aULs4XsOZ02sAfsQTRid66ww,8093
338
+ litellm/router_strategy/lowest_cost.py,sha256=ZEU4KpR4rXzR6n1EDuNirWxQ9NotvqbeK73wJT86n_0,12888
339
+ litellm/router_strategy/lowest_latency.py,sha256=cD-qnLTp2RK2zKGI6E0J837GHvuDEkNy0o5fPkc9omk,20210
340
+ litellm/router_strategy/lowest_tpm_rpm.py,sha256=_U3FESx3sg-YimJPG1zx3j2GPvmSoE-fnau48JJLFgg,9281
341
+ litellm/router_strategy/lowest_tpm_rpm_v2.py,sha256=Qz_U9oR7JAyOdeMorv615DG81frUOaQga4Ruog34CuQ,24904
342
+ litellm/router_strategy/tag_based_routing.py,sha256=nOMA6MWnp9HxdJamlpuJ3HPgdYS71SCC3YWHTNTJjls,2814
343
+ litellm/router_utils/client_initalization_utils.py,sha256=kIDmvYHkocnstX_uioH-GQKhkGx1L9ydDDIwaarwUDg,21660
344
+ litellm/router_utils/cooldown_callbacks.py,sha256=r1UhQ4MlfPjwiUASIwnL1Fvkrt0zy5NQS4D01-pfNIY,1827
345
+ litellm/router_utils/fallback_event_handlers.py,sha256=mh35Olzu2b7e54ezcPyrxda7iyCneVwjvil_lxK6KxA,1227
346
+ litellm/router_utils/handle_error.py,sha256=5qMHc2XrCnmRsqeEtNdIGFJiYqEiqLtXtfE9Jm9QrVU,1531
347
+ litellm/scheduler.py,sha256=V9-Ucc-E9CzjvTJiM1hBdgks8KjGaT--lOy1-bTNFxM,4476
348
+ litellm/tests/.litellm_cache/cache.db,sha256=baRpphicsnVHsgqvlDbqxqB7hftsK3sAmkMLoJygBxA,32768
349
+ litellm/tests/adroit-crow-413218-bc47f303efc9.json,sha256=usPH8ctWP1zQoyDl2u1u0BIbikk-YJWqHngQUQrA9c4,608
350
+ litellm/tests/azure_fine_tune.jsonl,sha256=dzUe6F_pSfA5b3T1MueC7wcquoJYxBiNV9WcebADRhw,3151
351
+ litellm/tests/conftest.py,sha256=hODXEXJP6x-7xweXZBKzdnljZS7-MpgAW7JxydI-Ouw,1575
352
+ litellm/tests/data_map.txt,sha256=-4_fGs0ltj5Hl3btweqXE3aDxcMi-Dpc8a2iW26aG1A,3939
353
+ litellm/tests/eagle.wav,sha256=TAdGabgjNoGNszBsSuYbOzjqaNczI9XyNf6rMTSafu0,55852
354
+ litellm/tests/example_config_yaml/aliases_config.yaml,sha256=LdCrc0xUjYAa4h0ssz7zjkMXSwUYK2Vv_0LLTNRDzts,1231
355
+ litellm/tests/example_config_yaml/azure_config.yaml,sha256=ulSpfDLCLdiNoTcAjg6ejTms_wuOlseIfYbpHzWCC5U,443
356
+ litellm/tests/example_config_yaml/cache_no_params.yaml,sha256=r0TniRcIZgkzj_vPpkCn5wzRXaWOI8Ct62pCXlhfjIY,127
357
+ litellm/tests/example_config_yaml/cache_with_params.yaml,sha256=EYsKHuqGwI_I6zF294Bq9sYa-KMFAVeLSmXtG7Q18AU,248
358
+ litellm/tests/example_config_yaml/langfuse_config.yaml,sha256=jkBz0zM8bUEBb_gmHi5P0TuFyC0WYlyGa37-WVRdsAo,181
359
+ litellm/tests/example_config_yaml/load_balancer.yaml,sha256=hz5tnS6TvE8P-qU3pZ-SspqMB280EtrSwMZvjEca3sg,886
360
+ litellm/tests/example_config_yaml/opentelemetry_config.yaml,sha256=u7-6jPVmj2Yca7nTeu1ykDZzzdtGKcGj3v5Y557Fc00,192
361
+ litellm/tests/example_config_yaml/simple_config.yaml,sha256=OBODVvCc0814U8-YTmiwT7C4UkSjLN51Bd0HxDenTVg,88
362
+ litellm/tests/gettysburg.wav,sha256=djDa_7Lyjyck2B8f8gOetppfo2DbORlyGncDKljbDUY,775192
363
+ litellm/tests/langfuse.log,sha256=_RcDsToj93hWVmFtgC97cKfNNJYtPiGewDzDLdZFrME,50483
364
+ litellm/tests/large_text.py,sha256=jNJ7qOSbKv7awjMWUvnyjL-x3fa8Uu6tRbR_VYViZtE,24566
365
+ litellm/tests/litellm_uuid.txt,sha256=wsz3BXCEmkNYCru3g5llvm_aimwDqXZ4L4w70Q1XmRQ,36
366
+ litellm/tests/messages_with_counts.py,sha256=SNLS89VExANkR5wHFbXi1WUGbnSQWZA7WC5EaPyy3BI,20196
367
+ litellm/tests/model_cost.json,sha256=G9vH2an8oTeVxoCsrpzuq_-sIDZkav_xUzC6UwOqFWw,37
368
+ litellm/tests/openai_batch_completions.jsonl,sha256=o0X_l2BJ1cMY5Nclmemt2oZUPq3TMd7bpTSfvVHl4H0,512
369
+ litellm/tests/stream_chunk_testdata.py,sha256=BNhI5wfTOagJner4oCvv91HJd6rJdSFyJMPACz9X3v0,16955
370
+ litellm/tests/test_acompletion.py,sha256=sGw8TUWNENtFSj7ouBg1bqfz9OYhRHbI-xIIYKvE478,1059
371
+ litellm/tests/test_acooldowns_router.py,sha256=aCE0F_3DOx4-IinDR4pV6MhV3GbHA4LRZUgaWVATKIA,7026
372
+ litellm/tests/test_add_function_to_prompt.py,sha256=YpA1hc46_ICm8C8UgYZnNECsAk4gcoOGC8jgeSG8MK4,2719
373
+ litellm/tests/test_add_update_models.py,sha256=qiQA5fbTEISRcMcOzWk-L61Ioi1N-H-Mn3rr6v56Xh8,6458
374
+ litellm/tests/test_alangfuse.py,sha256=lewv4YUQQAz7ie4Q5Y6x2P3wW__VgIqeioaaHLjsucc,32791
375
+ litellm/tests/test_alerting.py,sha256=iV42tXjOgCu8qGdTS46_V6Ad_P0MBUnMkYOpQTvLglA,27059
376
+ litellm/tests/test_amazing_s3_logs.py,sha256=XQLgBrx7PcnGfo7oe1oaYOiTMKwJgVglX7cJ-nwXHR0,8509
377
+ litellm/tests/test_amazing_vertex_completion.py,sha256=OtpzvkxLiCG0aB2efz9qLddT4Cr08qMa0vUzjPmQs-o,77276
378
+ litellm/tests/test_anthropic_completion.py,sha256=a--VDKfZv88KbtpEZHaZBXfcqpiNCvtX0dK09EduP4c,15483
379
+ litellm/tests/test_anthropic_prompt_caching.py,sha256=mDiMeksMbiMwJuq-etiWl74VrAkC71DuJQjYomOqhLY,11137
380
+ litellm/tests/test_aproxy_startup.py,sha256=B2ght7sETpuR7ey779uxeRbdNM-TS0upWUW0H3g6fQc,3111
381
+ litellm/tests/test_arize_ai.py,sha256=tDjDN9MTBtCWDXdcoo2guDyRqhFbZHukuqbodOtABrU,720
382
+ litellm/tests/test_assistants.py,sha256=7VihcCFIzmYbLyYO7ZhpE2ZFMuieIgTxYR_dZpAQ2bA,9611
383
+ litellm/tests/test_async_fn.py,sha256=GQ0tlSx7CEAVhonQPKE1KVU1eTElpnW3txnl_Tb7xro,10796
384
+ litellm/tests/test_async_opentelemetry.py,sha256=aGLaHT_KzyjSx2u-ZHfmtPNeFgPHdoGDBde9RtHvOyk,2510
385
+ litellm/tests/test_audio_speech.py,sha256=QPmpEDxcLyH4g-Pb5MS00X8W3PewhTsMxvPvaS1wUns,2865
386
+ litellm/tests/test_auth_checks.py,sha256=SV5fUV8Saylsqv62joCmpEtHpyH6gAQgrfF_kXTWu-s,1993
387
+ litellm/tests/test_azure_content_safety.py,sha256=HdfcdCqmiofvukvBgvhkgNrqbZkMxwEc-QpiGezhRuw,9358
388
+ litellm/tests/test_azure_perf.py,sha256=JyzeufbSBdom-bCNqWm2Rdt_s8qqlFULpQjnNaeGXUY,4050
389
+ litellm/tests/test_bad_params.py,sha256=-hnBLmuw-UDT6SUnvjMHJqJaWyUBk7O4TjdO2NU7sgc,4776
390
+ litellm/tests/test_banned_keyword_list.py,sha256=ufooccsKEhWoNAvlTO6GIZdG2yvQwUpQq-r4SCjNst0,1969
391
+ litellm/tests/test_batch_completion_return_exceptions.py,sha256=HajZCg0M_1kfyoBeamBCSbm3Cdqm1zGV2qJzHazvk1Y,665
392
+ litellm/tests/test_batch_completions.py,sha256=f2tQwHXLb83-zqOdKhCRc2naIMsl6gPD9Qkc7S6T_TQ,2087
393
+ litellm/tests/test_bedrock_completion.py,sha256=3jO7kWBcJduRwLuvHqpiLQyl_JWdx9G2MtBNSizqCxE,43650
394
+ litellm/tests/test_blocked_user_list.py,sha256=W3C0zh7B4PQ8UEecwSlSqUoJ08anjH-0ts2m12UsiGE,4593
395
+ litellm/tests/test_braintrust.py,sha256=_US0Ru-24jGsIG21zSQQ_pHOVkRCF5GTyzlL7xwQLCY,1174
396
+ litellm/tests/test_budget_manager.py,sha256=LukzeqTWLAHJknTOwezGfpxj1Zgxmh9L233VNFkVQjo,5087
397
+ litellm/tests/test_caching.py,sha256=5_Ic5DRYc9_K6Kh21qNOLJAcfLBJ4PC5rIhMKb83IZ4,53148
398
+ litellm/tests/test_caching_ssl.py,sha256=-MCNRgOCwp_Ji58RwfQMkrhGRL8W7Eu6p84HRVJHYuU,3366
399
+ litellm/tests/test_clarifai_completion.py,sha256=ZvZpv3AMocLCwBaigVMHz6BByVaG3USRiGWegkFd9Qw,2867
400
+ litellm/tests/test_class.py,sha256=ZEx56yqsgbXZvsd7xNSYXHZFfA7XZ8o1n1Ga_vruk3Q,3141
401
+ litellm/tests/test_clickhouse_logger.py,sha256=B4dXn1bF2GgcS6VLzFxI9cYPdn0CK7IXysvG4B3dwdc,1055
402
+ litellm/tests/test_cohere_completion.py,sha256=OKwuVTAc7-Fc0kxP6JEEMm9TVXZgPMOiQjyYf6drVu8,7575
403
+ litellm/tests/test_completion.py,sha256=bGMPqDhDzGVc-m4Voh7ctKxs2O3pGSJA3fxd8Ysdpy8,138792
404
+ litellm/tests/test_completion_cost.py,sha256=cgYk65bHW-jYIp8aoOvOTxvFYzH4TfSITVppZ5iiigY,35202
405
+ litellm/tests/test_completion_with_retries.py,sha256=F4V2Y81JeuZ9E2uJZYYF88zHvOCFmbTZoDdWpAnRdx0,1475
406
+ litellm/tests/test_config.py,sha256=Mm9fxgrVugZoEpNIxr8onWPp7lMq58koReBA9hX6RHc,8412
407
+ litellm/tests/test_configs/custom_auth.py,sha256=udRnEgzdTut_bjuXpA7bFP6xjFVuJpVulof_pWxN1mk,679
408
+ litellm/tests/test_configs/custom_callbacks.py,sha256=GPYybpwRV-EqtYwHZGQe_Y4_FQN3IOsC0RTGMJZDQj4,4483
409
+ litellm/tests/test_configs/test_bad_config.yaml,sha256=0r2cBcZlr3uEow6TEOHCzR-1ITuxi-4ZounqYaBhK7M,603
410
+ litellm/tests/test_configs/test_cloudflare_azure_with_cache_config.yaml,sha256=kllqMcmc9lju8KnXmXC6HgHBg6MCNJVB6n6P4Z0rl-s,779
411
+ litellm/tests/test_configs/test_config.yaml,sha256=eKoeKY-hllVq8SVgKIgWQCstsVjKfWikg6cYPIQ8frY,869
412
+ litellm/tests/test_configs/test_config_custom_auth.yaml,sha256=wHVCsrxViy45xUFof76Ao_ZKzHO4kLuIx3UTqczDfBs,216
413
+ litellm/tests/test_configs/test_config_no_auth.yaml,sha256=DNlGwiHczgrFeCz--bhVMN9SJKvHOF8XlZlw69xIrVw,3916
414
+ litellm/tests/test_configs/test_custom_logger.yaml,sha256=sGHWRf-Go-TfMqw-TlQeSeP_ImGIYSA-_G5LbHTLP_M,723
415
+ litellm/tests/test_configs/test_guardrails_config.yaml,sha256=YzDM9bWMmF4MFUl3563BeJ2Qv5TcgbS0VsE9BnAbDf4,879
416
+ litellm/tests/test_cost_calc.py,sha256=AOcd8tnPE7jdJW8psXWMQEL5na2Vu0KG2HED94HWDLI,2866
417
+ litellm/tests/test_custom_api_logger.py,sha256=0FbDI1zvtmM853ROJ6yLDlFShhQgNj6rNywqcYid5j4,1140
418
+ litellm/tests/test_custom_callback_input.py,sha256=Zpf2Nevxr5um3Ds8YZ7TnCiCTF8LNzIZRLuI9cTffJs,46162
419
+ litellm/tests/test_custom_callback_router.py,sha256=vf96FJKKVzU3ajWf-447FBy3uRC20W5zkTLtfB64CjM,27016
420
+ litellm/tests/test_custom_llm.py,sha256=nkyz9F3QtzqEaxlz9TTwPU9ek6TjZf2_AYt9sSep6vU,8778
421
+ litellm/tests/test_custom_logger.py,sha256=KO2tp2_9zYJAoQqtp17aJU5v9NOcYhJ-X5JuE-AhHA4,20233
422
+ litellm/tests/test_datadog.py,sha256=pOCHxQIyLyFCe1UtcCs5dBFfqcXxuKq-FzlsRLMt7W8,604
423
+ litellm/tests/test_deployed_proxy_keygen.py,sha256=V46OXA2ai5qb4Z_--uDYuxRvlIFTrTLWJ2pGCCT4VAs,2075
424
+ litellm/tests/test_dynamic_rate_limit_handler.py,sha256=vGf6D7EQW7TebJiKOmYbte48q0YSGsIufqCZP1rIaqw,16646
425
+ litellm/tests/test_dynamodb_logs.py,sha256=CVtPM8yfxcppNGBGQ4_RH-HSqfvnhyo8TSXpL0Tfo9Q,3850
426
+ litellm/tests/test_embedding.py,sha256=RNVUhVxohD2GeqHedY_M7xpKaBWtvA2uNQTSJiQIEIY,25131
427
+ litellm/tests/test_exceptions.py,sha256=HQj72sw3vBSFF3u-4TX1sfYq4bXP660-GBw-9Pdn-II,28774
428
+ litellm/tests/test_file_types.py,sha256=AWhh4MG5ZXey-62NyvvfXIJub15kUTbeyAogUEIPV28,2267
429
+ litellm/tests/test_fine_tuning_api.py,sha256=KtNlZE3KtbRYoTBc05v_bIoqFT_f5qw00O5Pvh9O-os,9254
430
+ litellm/tests/test_fireworks_ai.py,sha256=jfIsD2reaTyOvnasOkbmYPEm_lX7QESsHszFdteKAI0,958
431
+ litellm/tests/test_function_call_parsing.py,sha256=2tTLgOURuk1mnU0T3ZhzrseIJtGrba_thD0nzHDHpjs,4532
432
+ litellm/tests/test_function_calling.py,sha256=nsa03fvWgYzn_YvYMMAoQom3shUZK2iqoZZAV8kqr7o,13187
433
+ litellm/tests/test_function_setup.py,sha256=4-kKCVlXSDkcMg9zUwl0f554WlIyq9YSAtZ5-WEwKY8,734
434
+ litellm/tests/test_gcs_bucket.py,sha256=Y942HopIJEkLx7XXFRDEr807I8dsHk4oNxr4fSfaKtE,10830
435
+ litellm/tests/test_get_llm_provider.py,sha256=w3z0ofaBMjbAqBDDXq0hfInsl0ydovYqmv-MPKmkCEY,1940
436
+ litellm/tests/test_get_model_file.py,sha256=R2BTCsbA5T3K3ze-AMQXW923aJph-TL6QSWpEBuz8mg,285
437
+ litellm/tests/test_get_model_info.py,sha256=iQWkbueabmcj5XN0UoPYttudMO3xxEhUJ5v6fMPFjeI,1729
438
+ litellm/tests/test_get_model_list.py,sha256=EfSrl6kM5JeUNnO_3qNi-RX6TtkGGf8MRzPdI1sEwGk,253
439
+ litellm/tests/test_get_optional_params_embeddings.py,sha256=TmRFUSxyA8vkIIFi7TPlthIXfyE5h-Hi7H6c-nfoXD0,2051
440
+ litellm/tests/test_get_optional_params_functions_not_supported.py,sha256=tOEV9HXTALvGsAbpMjp-oMeiyqbKed6oW6kTVJpx8gI,926
441
+ litellm/tests/test_google_ai_studio_gemini.py,sha256=XSMVwiokSeDLaonEAt3jpTLevOa9-zws2bxgv_pkwZs,1113
442
+ litellm/tests/test_guardrails_config.py,sha256=6UApgEBo0DdLe3z8wGTugL7DSCFb5rdUZpPBhWbNLEo,2275
443
+ litellm/tests/test_health_check.py,sha256=MC6i8hA5r4zPFJVb5kUHIDh_gR3bPS5L7MbQVgfZa1k,2943
444
+ litellm/tests/test_helicone_integration.py,sha256=2qz_g2DpBJBFUrrHe5rahQmOuyIu8845foICIJZqGWg,3511
445
+ litellm/tests/test_hf_prompt_templates.py,sha256=NEtp_-cHEC0aZGCMBfvcRfbhz1YvFn9oLN2NCzaGrvw,2546
446
+ litellm/tests/test_image_generation.py,sha256=P2Rfl9tM3PFU3Ez9ewMf0LVSeFaXyWD5xyf9VDZ3MIQ,7461
447
+ litellm/tests/test_img_resize.py,sha256=kqW2Z3cdmOicmpMSz5l2-6j7eaYgKggm2Ez7d0BbIsI,2873
448
+ litellm/tests/test_jwt.py,sha256=vi6Il9_wOzmZtT5rGLBi7L1t86UZ4yI6b-TzuGGyXkw,26872
449
+ litellm/tests/test_key_generate_dynamodb.py,sha256=2H52x6RFvN_XcR1OKJ72yKWGEU71apN5z5sFazkxqo0,20093
450
+ litellm/tests/test_key_generate_prisma.py,sha256=TTyyWtM5WWQy0KROBcQoN-B2-YeftKEImQH9tZi03Sk,102714
451
+ litellm/tests/test_lakera_ai_prompt_injection.py,sha256=uRvHa9NoIOnD8icWGwIKj421Rw7wsSO_vdsCKWbDAw4,13730
452
+ litellm/tests/test_langchain_ChatLiteLLM.py,sha256=kapU65kUxf1U1K8QJ7ny6J5CGncDLLYd02mku2XsPK0,2796
453
+ litellm/tests/test_langsmith.py,sha256=uoiUlNZ0JzF8ABeD5bu5as7DFQtADKUh9XEYGevnI6g,6183
454
+ litellm/tests/test_least_busy_routing.py,sha256=0KECP868dA_08ImKPckX6tWcC5BPjyxOxpAsYthgpjs,8676
455
+ litellm/tests/test_litellm_max_budget.py,sha256=je8LIJbKm5-VCk1lAld_2hTgzfPMflgCQPxcDDO56ZQ,1113
456
+ litellm/tests/test_llm_guard.py,sha256=36qOYyOsvfcQNl9HCWR-uq-cplaM36_9Uzi1CICo8Hs,4546
457
+ litellm/tests/test_load_test_router_s3.py,sha256=aes22PJe9Hd_KUa-8MqJDK68lObVVS1Wv6nKdZq7O1E,3042
458
+ litellm/tests/test_loadtest_router.py,sha256=AFJyZyWNBaCIhz-46ZHMZXnTRIfUbBVaMq35O_ECy-E,2831
459
+ litellm/tests/test_logfire.py,sha256=SGVs3OtyEYl1N51pIPNDeNjRZYmMZ3C88xVO-HkWqqI,1998
460
+ litellm/tests/test_logging.py,sha256=Nzv-pyERvVaYSYYd_ifth0tCy6o9deIxBkwv1zNw0mI,13682
461
+ litellm/tests/test_longer_context_fallback.py,sha256=84kK5QeafD3CjXGro4U5tOhSZj2v9cyDgynBGniMcXU,324
462
+ litellm/tests/test_lowest_cost_routing.py,sha256=I0_92uS1OXPz6s-CTSsv3vm0CKOEKQ0g2L4q10BMFf8,6288
463
+ litellm/tests/test_lowest_latency_routing.py,sha256=ukCHn1m5LLLTq2IHdHGMkO6zBSiqW6KjyO2lFOT0KRw,29987
464
+ litellm/tests/test_lunary.py,sha256=bUDusFAiMHT-KuZvrB2i-XGEnGbjct58riaLjXNwIsI,3187
465
+ litellm/tests/test_max_tpm_rpm_limiter.py,sha256=9-ZkpfphcNLkvKJ7ZzkBN3YOBfC-vNzlTw8hhGIQU9E,5234
466
+ litellm/tests/test_mem_usage.py,sha256=WYM-TrNEs-1_XPSZ-gZzl-LqaZ_bwTcPcQjoNNYze-c,4438
467
+ litellm/tests/test_mock_request.py,sha256=6zhrKY6mS0FUbXW79I9Gwah7GmJi_mq1pzbkcYHormQ,2789
468
+ litellm/tests/test_model_alias_map.py,sha256=epVVqwnI-JFL67A8sOfRPfi_Dq3vdq5eMKmjtZ3NDRo,1140
469
+ litellm/tests/test_model_max_token_adjust.py,sha256=Z64EssBLYSL-U8mUuV06FSYpxBjG13CozA00A9eiSXQ,771
470
+ litellm/tests/test_model_response_typing/server.py,sha256=TZUoo_zMl-6l-RX3OX_U0ESQvKd7x3iW5KS8bsoUm1E,626
471
+ litellm/tests/test_model_response_typing/test.py,sha256=cTofJzFIDczLwLVeEq7uOt7I3lnwkbm6ayIFbwiGgJo,495
472
+ litellm/tests/test_multiple_deployments.py,sha256=nMYKiALlVOEwbgzszijl_nTff5ksFGvHKrtM9bSMCWU,1643
473
+ litellm/tests/test_ollama.py,sha256=uhsQrEyjDCihbAa7ECm70PdqL--ulnVEWrHbdqUXRpw,3651
474
+ litellm/tests/test_ollama_local.py,sha256=wi44ljEQ7J0DcB0eZbDNAK0fFk3xp0Fm58Va7qeu9qk,15508
475
+ litellm/tests/test_ollama_local_chat.py,sha256=trhkw-ZpxcaC6dejel2hU1HrrrQBJDU74EwhyPikKi4,14951
476
+ litellm/tests/test_openai_batches_and_files.py,sha256=1AxCKfecQzfXvmu2k6Juxy3ze6MPpz1tdrOwVoXVO90,5923
477
+ litellm/tests/test_openai_moderations_hook.py,sha256=PZYOADhQLFZg0vDBBowzcOvKCJt8u68kIX42tJDRMZI,2162
478
+ litellm/tests/test_optional_params.py,sha256=ngm9Nxmz2T6sH5k3pz8MeqPDUkykmkiKw8LL7Ih7GP4,14467
479
+ litellm/tests/test_parallel_request_limiter.py,sha256=mWFWby3rpy40iJLOL7KqQWmgZ4D7_PBRN22Xd-bAW_0,35831
480
+ litellm/tests/test_pass_through_endpoints.py,sha256=xuIijwzVogrfwTw-sFZSedTEV8DWxz2dxAWZniGpEn8,11535
481
+ litellm/tests/test_presidio_masking.py,sha256=saDN_vZEtVmg4DTmUpocSD2_SbLy5C-c4b1ln6Qg58w,9042
482
+ litellm/tests/test_profiling_router.py,sha256=wBUfRXJo4o0XQmoIY1YK_wZeMPuXfSnSXPlg_hq0gFQ,5335
483
+ litellm/tests/test_prometheus.py,sha256=mf7IiWnTAtLuySNoTZrCF1KSJX7aAJsc-z40DhaTJRE,2546
484
+ litellm/tests/test_prometheus_service.py,sha256=HAlYs_349HJxCt10v4dZujxNTdjmHV848UzAfuk-3sw,4319
485
+ litellm/tests/test_prompt_factory.py,sha256=ksMgDy8E_ISOCvzMSEa6N30fi4JiFQ92HNozszMaprw,12869
486
+ litellm/tests/test_prompt_injection_detection.py,sha256=_EpHsnKvyVjMFWkK5Qs7OqW5lF8MzIASA_oTh3VLSHw,4441
487
+ litellm/tests/test_promptlayer_integration.py,sha256=SW6DcI1od1o4XKmCISU5QmrBV2RPGbefS4n4S8pF_Hk,3288
488
+ litellm/tests/test_provider_specific_config.py,sha256=Njr02kQTgxMYZcKHBaRdKA1ZOmzNpQYoSiFJ9sB2cm4,27043
489
+ litellm/tests/test_proxy_custom_auth.py,sha256=PyoAyFuh2QJWEJ2gBlVdbiyqvDO68iBKAOK30b8b_yc,3058
490
+ litellm/tests/test_proxy_custom_logger.py,sha256=0Zm8bef8shEEyo5iLiA0-pes2TZOsjnghsbL-eV0QGM,10705
491
+ litellm/tests/test_proxy_encrypt_decrypt.py,sha256=Xg1Gt1Kq6rpRzWIi6EEMMYMuhVFqWXZyaXwtElWHclc,1513
492
+ litellm/tests/test_proxy_exception_mapping.py,sha256=Q6StwyBpgf8Sazoge8uy3Jv40JhNpjIbwc_35SpdGBQ,10934
493
+ litellm/tests/test_proxy_gunicorn.py,sha256=5uHJ7xJDjrxvEnJ_7y_g5qbqWgI8tr2LEhdzrPZLKSY,2077
494
+ litellm/tests/test_proxy_pass_user_config.py,sha256=7xzdKfvsLrYtZCZfCyRoWtI88kvVPgd7KX_QJ-Ezy3A,3502
495
+ litellm/tests/test_proxy_reject_logging.py,sha256=3PVx42s0JTnDfzFjyb0S2ecJ9BencG_rT9wa_xVPKWc,5228
496
+ litellm/tests/test_proxy_routes.py,sha256=k7BSKIWpEPufWPlt_9RubIkX78TNZkpD2pdOjWIDZNg,4503
497
+ litellm/tests/test_proxy_server.py,sha256=66hJsCEXQlwz5NH2rMDvj1_ZE_qUY1ui8wgqlj9sfi4,39332
498
+ litellm/tests/test_proxy_server_caching.py,sha256=_77xqgmFtdrvkwbTgPK6vmHP0RAGU6aj_zgdNTEk0fQ,3276
499
+ litellm/tests/test_proxy_server_cost.py,sha256=xvp0zhjwvtzcZTmxAvwmzzBfqTEduwZe39X_DBSRAZ8,4750
500
+ litellm/tests/test_proxy_server_keys.py,sha256=t_E-h3XpSOx-JmPKezeT2tXkkHWZ0lYW3RU1LEc8ELQ,9545
501
+ litellm/tests/test_proxy_server_langfuse.py,sha256=jnQq6x2v0VMrMSlRlO9AxndNHgVWT4tdvLZMJ-LdNbY,2514
502
+ litellm/tests/test_proxy_server_spend.py,sha256=JPW7Fs4a8vU3J2lCraJfV6eLdpzfWGV0MNyvmAY1N2Y,2501
503
+ litellm/tests/test_proxy_setting_guardrails.py,sha256=el0phiXreqDrtlrI_twzZL-J3uJ1IOcB6uTAVjcE6Lo,1943
504
+ litellm/tests/test_proxy_token_counter.py,sha256=49AtDQb1OKo_h6NsZ9ibEDsW-GGoQBcTqqMPoSmjBPY,3543
505
+ litellm/tests/test_proxy_utils.py,sha256=hhxYr9hb608HFq4KXPHMDtgXWxHLhBfY349NcgI3W_c,3073
506
+ litellm/tests/test_pydantic.py,sha256=pawjEkMf8qLXLVYaMGZ6bNKoFIIQRZQpmYs0JBiK2pw,1579
507
+ litellm/tests/test_pydantic_namespaces.py,sha256=lqJyzMqEqnruYPPH42EV_YAtgLYMyTgzZouPGYocpi4,469
508
+ litellm/tests/test_python_38.py,sha256=H0VyVx2fazjLXgsdZyE1zg9Z1g_biwCXObvFSPHf4aQ,2279
509
+ litellm/tests/test_register_model.py,sha256=Y7xcALoGS6Ju5DFa98Yt8l6qPFm0umDvX7erzHYTEaA,1921
510
+ litellm/tests/test_router.py,sha256=41gLzfWLRXdEsegeZgYZHyUjBZ_iOL376SRavlpZQHQ,71634
511
+ litellm/tests/test_router_batch_completion.py,sha256=eGo8KsVbSzmYT4TOESWzRFpj-UwdloFiasVfexbZAvc,5864
512
+ litellm/tests/test_router_caching.py,sha256=z7E1Zs5inphbO3Mhg5nabF3OXDerjoooVEDXcg0bSIo,10811
513
+ litellm/tests/test_router_client_init.py,sha256=lgHcK0MT4EFRCKW2Dow9sFTssJbembI6dpcyU4xDiwA,2385
514
+ litellm/tests/test_router_cooldowns.py,sha256=TkKyea5W_ERiwe91AwuMeD-iocjjig5Dj_eBVKqKlSU,2781
515
+ litellm/tests/test_router_custom_routing.py,sha256=vRHi_UJL3XVB5D6n3d4wRMFZDCfpqJxWRDPuhF9F1V8,5141
516
+ litellm/tests/test_router_debug_logs.py,sha256=HVhEkrzpRl7DOBriCdwe9EWWwrr8M-XjCxKLdDmhCZQ,3245
517
+ litellm/tests/test_router_fallbacks.py,sha256=0mWDuXcR-aYI0NIrIxhSJaNiblPm8PphudWEC86J_o8,43623
518
+ litellm/tests/test_router_get_deployments.py,sha256=a-igGS5P_CqANKNCADOJMSqYwZ4UcwKBQXpYG_quamc,21075
519
+ litellm/tests/test_router_init.py,sha256=KyDenTjpRcCeOtk1XDkUXldhAvJg0txxTQaOJmfqkg4,21745
520
+ litellm/tests/test_router_max_parallel_requests.py,sha256=1nPeseBcoOWb2_ph_DbgkuiR2TYrsPvLUoQv8oTfp54,3659
521
+ litellm/tests/test_router_policy_violation.py,sha256=L6LebYL03cal-VV-r3MNPBxnimXtPAwFOaJIQupYR-U,4721
522
+ litellm/tests/test_router_retries.py,sha256=4HUzE2--14hZSyXKa2lzOnAL_eJb9IgExtx_myi1Zk0,18650
523
+ litellm/tests/test_router_tag_routing.py,sha256=HaRScjQhSUCEkT_SOEncfCj_sX9T2DM-dfBwNybiQzw,2603
524
+ litellm/tests/test_router_timeout.py,sha256=IqOJzrOlMwAHwjhLrLYymeOoEi3jDEm664FUbgER4NU,3789
525
+ litellm/tests/test_router_utils.py,sha256=9a5B69qzBB1cLw0Wm-uB0FXWhPH2U-9L7re4y7eQJLc,3024
526
+ litellm/tests/test_router_with_fallbacks.py,sha256=SgDOn464fDOwwAW-8XfsvGgRaNWHw6IXW3rMbBmAtxI,1704
527
+ litellm/tests/test_rules.py,sha256=OxlwkWvmNHFZRTQv74Qm_ewnCeip7KsPR36OQLelI-8,4573
528
+ litellm/tests/test_sagemaker.py,sha256=mqThmBOPQyIqZjZssnM6HYG4LSt1y3Ss7j8iMYP0Jj8,10377
529
+ litellm/tests/test_scheduler.py,sha256=NZxqGtR5KvsGUhVGYLnsTNwbr_Z3lLvjPAQzPC7OqsY,2669
530
+ litellm/tests/test_secret_detect_hook.py,sha256=AVdaGz4IRKKWovDjY8-st3UArPM-pS-3AXbcKPWrGkE,9257
531
+ litellm/tests/test_secret_manager.py,sha256=Da3YnLGbzbXQerjLDxntJn1sqLLJAuLnxyyd1mf6nMU,6182
532
+ litellm/tests/test_simple_shuffle.py,sha256=9BvKKOxa52NbQcqqAemgH8aVYBO0xK89d_DUgPrYuZE,1338
533
+ litellm/tests/test_spend_calculate_endpoint.py,sha256=Lbp6cb1-LdAc-hHuCxx2hXhMZXGCXDv4LmcJqS1kAFA,3830
534
+ litellm/tests/test_spend_logs.py,sha256=dNXRjoQLvSMMMHO_-KOPNg1fY5o5p4BXQPSOrBHohfo,12832
535
+ litellm/tests/test_stream_chunk_builder.py,sha256=iWGZNHbcRXqgzcRs1ScrwSw--JAnkgiFkc38aJAM9SI,6751
536
+ litellm/tests/test_streaming.py,sha256=qB4WSqHDKTUQZ-ZivsjRD6O8ZL8tpKpg5PCYPrLnvjM,130734
537
+ litellm/tests/test_supabase_integration.py,sha256=Q3NPcTRvYUH7z-DkGOHPGliijQpzjt-wnGGqUhemXaM,2084
538
+ litellm/tests/test_team_config.py,sha256=wsrpHa2C0QaiWFuWqLBJG1_XhM713Y257AN55YeClOo,1095
539
+ litellm/tests/test_text_completion.py,sha256=_cVtQGxWAx13FOJoYdXaEIvpNMKPrLoHobNQbfbKwBo,88234
540
+ litellm/tests/test_timeout.py,sha256=rhT-0M-yQv9_F7Nzyk5lFIzIqmdGgcCi7ts7gokt5NI,8548
541
+ litellm/tests/test_together_ai.py,sha256=ITgIYYEVVCj6bo06bBthmOvAvZ1wZ-ImWdmmLeRPBA8,2046
542
+ litellm/tests/test_token_counter.py,sha256=xz2nZ-cqtm64Z-1LJXlj81m5m5scuDgtqYJOk_gPo4c,14943
543
+ litellm/tests/test_tpm_rpm_routing_v2.py,sha256=OavM6EBgApkgPuHPnSL7uQ_A_IRMUixLQ2RKuw_aDDQ,13629
544
+ litellm/tests/test_traceloop.py,sha256=bRI1Q7nKgz3gDnZ_TsXKowp9mKkwOecwSZTgn3eW9xg,855
545
+ litellm/tests/test_triton.py,sha256=VEbBkutgoaXGM1-J-w2kU0B3zmvl12_yjb7Lhfj98D8,845
546
+ litellm/tests/test_update_spend.py,sha256=KNk22PJc3RcVw3Ig4qZUSJ7jOqJ3jt-HMyqLMBrcThk,2972
547
+ litellm/tests/test_user_api_key_auth.py,sha256=oKuuilrO6NtCj3hQwcrvu9EyY2usUV4dvnhAqxdF5zk,6985
548
+ litellm/tests/test_utils.py,sha256=tWHN2wMTvO7iB89mjs9if1Rtx5DIPEBp3T5680bND-E,24588
549
+ litellm/tests/test_validate_environment.py,sha256=WHTVUmjJsjV83IShZxkM46kWXFhyNQQ0viTFj6nafFI,297
550
+ litellm/tests/test_wandb.py,sha256=BKvRTVfLGed_2XVeNCXrz2JlMsgzqG8cb8h6Oi86anQ,1784
551
+ litellm/tests/test_whisper.py,sha256=SquJQoF5uyiXnfRY8GKUntiu2DxqD2Jkp9pkoDkDuwc,5631
552
+ litellm/tests/user_cost.json,sha256=1qlG-BK_Gi55K5QZdyQJ_EW8Srfhvrgj6t1BlDanQt4,208
553
+ litellm/tests/vertex_ai.jsonl,sha256=2APsDSMZLO45Jqi1yg_f0c2B5S0wrQ_wvz8RCRWOTz4,654
554
+ litellm/tests/vertex_key.json,sha256=usPH8ctWP1zQoyDl2u1u0BIbikk-YJWqHngQUQrA9c4,608
555
+ litellm/timeout.py,sha256=x_Rxjhmn08v3rh_m-vcf0hYo4BS8jRuwSEsjYtbR61Q,4319
556
+ litellm/types/adapter.py,sha256=VLUBbZaxxVWPTzOVbks1x4xSm1icGSW3b7abid_I3y0,222
557
+ litellm/types/completion.py,sha256=KmlFzmPxZUuMpg_RtYxOvVsK9OoZi9VtWtUBoRvOHbg,5869
558
+ litellm/types/embedding.py,sha256=-I4LM4kGCRwNtw0SiSngM8OePTRnrIjIiwNfwGY2slg,615
559
+ litellm/types/files.py,sha256=nhTV3IKYO1XoVxuyWJ7_qr96cAOlG0QuZmnYGWU883A,7202
560
+ litellm/types/guardrails.py,sha256=oz_0qcABCzxEY5d90-9hOVbwl2vSnod5kXOBZsBerMk,1678
561
+ litellm/types/llms/anthropic.py,sha256=C-QAHxEHIaqEWdul95zz6pREe6miMK32z5S4mZVBrvY,6749
562
+ litellm/types/llms/bedrock.py,sha256=KoGL-JGHS5C6CbQqmQEzVabNHmKrVhqmd7iXFqrpjq4,4546
563
+ litellm/types/llms/cohere.py,sha256=68RmYVreF11AOBiHRIxOFdIKGpp1Ljp_gue4uwiphS8,1001
564
+ litellm/types/llms/custom_llm.py,sha256=BQiianU1zrlBtLpjB6zaXe5-xeVZrsRZNb25go4gnqI,220
565
+ litellm/types/llms/databricks.py,sha256=31TnMzir4nkDYFWuUhpI5aTDzZlg_1npPqQvZfkOjOs,493
566
+ litellm/types/llms/openai.py,sha256=WbFdhQTQQuxzSo0M4yKL0bV4TYOaZHN-Dv1itOljc_0,13702
567
+ litellm/types/llms/vertex_ai.py,sha256=rDvSfLIdegNQJlpXhFPEJMliFwsUDFirhE8rbdjaHWg,6855
568
+ litellm/types/router.py,sha256=IBVNLSmyRZnyjK7VmhpsJl8ZNv2N5Koae4yWu2nCXZw,19141
569
+ litellm/types/services.py,sha256=4gqWoMDYvFHjebAhTvW8ytt9QfpmrvLNDncfi4FyAIo,1008
570
+ litellm/types/utils.py,sha256=xuEwz_lFPs97ebtwqOEJk5kJd3MzJQRT_CencgTeeok,36546
571
+ litellm/utils.py,sha256=F8jNrTMREfORWmUEkxpbIpNCZ3XVcJ18PABCOcf-Cxo,500678
572
+ litellm-1.43.18.dist-info/LICENSE,sha256=sXDWv46INd01fgEWgdsCj01R4vsOqJIFj1bgH7ObgnM,1419
573
+ litellm-1.43.18.dist-info/METADATA,sha256=O31V_WebyjzPiWyLsPGTqdPXn1cL0F4sc26T-BaPd5k,32293
574
+ litellm-1.43.18.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
575
+ litellm-1.43.18.dist-info/entry_points.txt,sha256=FGIGsq4hBWP2nfWEtKPIwxv67GXhoegZK_AF2oK447M,46
576
+ litellm-1.43.18.dist-info/INSTALLER,sha256=HLHRd3rVxZqLVn0Nby492_jJUNACT5LifwfFYrwaW0E,12
577
+ litellm-1.43.18.dist-info/RECORD,,
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/WHEEL ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/entry_points.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [console_scripts]
2
+ litellm=litellm:run_server
3
+
.venv/lib/python3.12/site-packages/litellm/cost_calculator.py CHANGED
@@ -412,7 +412,7 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
412
 
413
  def _select_model_name_for_cost_calc(
414
  model: Optional[str],
415
- completion_response: Union[BaseModel, dict],
416
  base_model: Optional[str] = None,
417
  custom_pricing: Optional[bool] = None,
418
  ) -> Optional[str]:
@@ -428,7 +428,12 @@ def _select_model_name_for_cost_calc(
428
  if base_model is not None:
429
  return base_model
430
 
431
- return_model = model or completion_response.get("model", "") # type: ignore
 
 
 
 
 
432
  if hasattr(completion_response, "_hidden_params"):
433
  if (
434
  completion_response._hidden_params.get("model", None) is not None
@@ -660,7 +665,7 @@ def completion_cost(
660
 
661
  if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
662
  # Calculate the prompt characters + response characters
663
- if len("messages") > 0:
664
  prompt_string = litellm.utils.get_formatted_prompt(
665
  data={"messages": messages}, call_type="completion"
666
  )
 
412
 
413
  def _select_model_name_for_cost_calc(
414
  model: Optional[str],
415
+ completion_response: Union[BaseModel, dict, str],
416
  base_model: Optional[str] = None,
417
  custom_pricing: Optional[bool] = None,
418
  ) -> Optional[str]:
 
428
  if base_model is not None:
429
  return base_model
430
 
431
+ return_model = model
432
+ if isinstance(completion_response, str):
433
+ return return_model
434
+
435
+ elif return_model is None:
436
+ return_model = completion_response.get("model", "") # type: ignore
437
  if hasattr(completion_response, "_hidden_params"):
438
  if (
439
  completion_response._hidden_params.get("model", None) is not None
 
665
 
666
  if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
667
  # Calculate the prompt characters + response characters
668
+ if len(messages) > 0:
669
  prompt_string = litellm.utils.get_formatted_prompt(
670
  data={"messages": messages}, call_type="completion"
671
  )
.venv/lib/python3.12/site-packages/litellm/integrations/prometheus.py CHANGED
@@ -103,13 +103,30 @@ class PrometheusLogger(CustomLogger):
103
  "Remaining budget for api key",
104
  labelnames=["hashed_api_key", "api_key_alias"],
105
  )
106
-
107
- ########################################
108
- # LLM API Deployment Metrics / analytics
109
- ########################################
110
-
111
  # Litellm-Enterprise Metrics
112
  if premium_user is True:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  # Remaining Rate Limit for model
114
  self.litellm_remaining_requests_metric = Gauge(
115
  "litellm_remaining_requests",
@@ -187,6 +204,9 @@ class PrometheusLogger(CustomLogger):
187
 
188
  async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
189
  # Define prometheus client
 
 
 
190
  from litellm.proxy.proxy_server import premium_user
191
 
192
  verbose_logger.debug(
@@ -197,6 +217,7 @@ class PrometheusLogger(CustomLogger):
197
  model = kwargs.get("model", "")
198
  response_cost = kwargs.get("response_cost", 0.0) or 0
199
  litellm_params = kwargs.get("litellm_params", {}) or {}
 
200
  proxy_server_request = litellm_params.get("proxy_server_request") or {}
201
  end_user_id = proxy_server_request.get("body", {}).get("user", None)
202
  user_id = litellm_params.get("metadata", {}).get("user_api_key_user_id", None)
@@ -286,6 +307,27 @@ class PrometheusLogger(CustomLogger):
286
  user_api_key, user_api_key_alias
287
  ).set(_remaining_api_key_budget)
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  # set x-ratelimit headers
290
  if premium_user is True:
291
  self.set_llm_deployment_success_metrics(
 
103
  "Remaining budget for api key",
104
  labelnames=["hashed_api_key", "api_key_alias"],
105
  )
 
 
 
 
 
106
  # Litellm-Enterprise Metrics
107
  if premium_user is True:
108
+
109
+ ########################################
110
+ # LiteLLM Virtual API KEY metrics
111
+ ########################################
112
+ # Remaining MODEL RPM limit for API Key
113
+ self.litellm_remaining_api_key_requests_for_model = Gauge(
114
+ "litellm_remaining_api_key_requests_for_model",
115
+ "Remaining Requests API Key can make for model (model based rpm limit on key)",
116
+ labelnames=["hashed_api_key", "api_key_alias", "model"],
117
+ )
118
+
119
+ # Remaining MODEL TPM limit for API Key
120
+ self.litellm_remaining_api_key_tokens_for_model = Gauge(
121
+ "litellm_remaining_api_key_tokens_for_model",
122
+ "Remaining Tokens API Key can make for model (model based tpm limit on key)",
123
+ labelnames=["hashed_api_key", "api_key_alias", "model"],
124
+ )
125
+
126
+ ########################################
127
+ # LLM API Deployment Metrics / analytics
128
+ ########################################
129
+
130
  # Remaining Rate Limit for model
131
  self.litellm_remaining_requests_metric = Gauge(
132
  "litellm_remaining_requests",
 
204
 
205
  async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
206
  # Define prometheus client
207
+ from litellm.proxy.common_utils.callback_utils import (
208
+ get_model_group_from_litellm_kwargs,
209
+ )
210
  from litellm.proxy.proxy_server import premium_user
211
 
212
  verbose_logger.debug(
 
217
  model = kwargs.get("model", "")
218
  response_cost = kwargs.get("response_cost", 0.0) or 0
219
  litellm_params = kwargs.get("litellm_params", {}) or {}
220
+ _metadata = litellm_params.get("metadata", {})
221
  proxy_server_request = litellm_params.get("proxy_server_request") or {}
222
  end_user_id = proxy_server_request.get("body", {}).get("user", None)
223
  user_id = litellm_params.get("metadata", {}).get("user_api_key_user_id", None)
 
307
  user_api_key, user_api_key_alias
308
  ).set(_remaining_api_key_budget)
309
 
310
+ # Set remaining rpm/tpm for API Key + model
311
+ # see parallel_request_limiter.py - variables are set there
312
+ model_group = get_model_group_from_litellm_kwargs(kwargs)
313
+ remaining_requests_variable_name = (
314
+ f"litellm-key-remaining-requests-{model_group}"
315
+ )
316
+ remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}"
317
+
318
+ remaining_requests = _metadata.get(
319
+ remaining_requests_variable_name, sys.maxsize
320
+ )
321
+ remaining_tokens = _metadata.get(remaining_tokens_variable_name, sys.maxsize)
322
+
323
+ self.litellm_remaining_api_key_requests_for_model.labels(
324
+ user_api_key, user_api_key_alias, model_group
325
+ ).set(remaining_requests)
326
+
327
+ self.litellm_remaining_api_key_tokens_for_model.labels(
328
+ user_api_key, user_api_key_alias, model_group
329
+ ).set(remaining_tokens)
330
+
331
  # set x-ratelimit headers
332
  if premium_user is True:
333
  self.set_llm_deployment_success_metrics(
.venv/lib/python3.12/site-packages/litellm/litellm_core_utils/litellm_logging.py CHANGED
@@ -274,6 +274,7 @@ class Logging:
274
  headers = {}
275
  data = additional_args.get("complete_input_dict", {})
276
  api_base = str(additional_args.get("api_base", ""))
 
277
  if "key=" in api_base:
278
  # Find the position of "key=" in the string
279
  key_index = api_base.find("key=") + 4
@@ -2320,7 +2321,7 @@ def get_standard_logging_object_payload(
2320
  model_map_value=_model_cost_information,
2321
  )
2322
  except Exception:
2323
- verbose_logger.warning(
2324
  "Model is not mapped in model cost map. Defaulting to None model_cost_information for standard_logging_payload"
2325
  )
2326
  model_cost_information = StandardLoggingModelInformation(
@@ -2362,7 +2363,7 @@ def get_standard_logging_object_payload(
2362
 
2363
  return payload
2364
  except Exception as e:
2365
- verbose_logger.error(
2366
  "Error creating standard logging object - {}".format(str(e))
2367
  )
2368
  return None
 
274
  headers = {}
275
  data = additional_args.get("complete_input_dict", {})
276
  api_base = str(additional_args.get("api_base", ""))
277
+ query_params = additional_args.get("query_params", {})
278
  if "key=" in api_base:
279
  # Find the position of "key=" in the string
280
  key_index = api_base.find("key=") + 4
 
2321
  model_map_value=_model_cost_information,
2322
  )
2323
  except Exception:
2324
+ verbose_logger.debug( # keep in debug otherwise it will trigger on every call
2325
  "Model is not mapped in model cost map. Defaulting to None model_cost_information for standard_logging_payload"
2326
  )
2327
  model_cost_information = StandardLoggingModelInformation(
 
2363
 
2364
  return payload
2365
  except Exception as e:
2366
+ verbose_logger.exception(
2367
  "Error creating standard logging object - {}".format(str(e))
2368
  )
2369
  return None
.venv/lib/python3.12/site-packages/litellm/llms/anthropic.py CHANGED
@@ -1122,6 +1122,7 @@ class ModelResponseIterator:
1122
  self.streaming_response = streaming_response
1123
  self.response_iterator = self.streaming_response
1124
  self.content_blocks: List[ContentBlockDelta] = []
 
1125
 
1126
  def check_empty_tool_call_args(self) -> bool:
1127
  """
@@ -1171,7 +1172,7 @@ class ModelResponseIterator:
1171
  "name": None,
1172
  "arguments": content_block["delta"]["partial_json"],
1173
  },
1174
- "index": content_block["index"],
1175
  }
1176
  elif type_chunk == "content_block_start":
1177
  """
@@ -1183,6 +1184,7 @@ class ModelResponseIterator:
1183
  if content_block_start["content_block"]["type"] == "text":
1184
  text = content_block_start["content_block"]["text"]
1185
  elif content_block_start["content_block"]["type"] == "tool_use":
 
1186
  tool_use = {
1187
  "id": content_block_start["content_block"]["id"],
1188
  "type": "function",
@@ -1190,7 +1192,7 @@ class ModelResponseIterator:
1190
  "name": content_block_start["content_block"]["name"],
1191
  "arguments": "",
1192
  },
1193
- "index": content_block_start["index"],
1194
  }
1195
  elif type_chunk == "content_block_stop":
1196
  content_block_stop = ContentBlockStop(**chunk) # type: ignore
@@ -1204,7 +1206,7 @@ class ModelResponseIterator:
1204
  "name": None,
1205
  "arguments": "{}",
1206
  },
1207
- "index": content_block_stop["index"],
1208
  }
1209
  elif type_chunk == "message_delta":
1210
  """
 
1122
  self.streaming_response = streaming_response
1123
  self.response_iterator = self.streaming_response
1124
  self.content_blocks: List[ContentBlockDelta] = []
1125
+ self.tool_index = -1
1126
 
1127
  def check_empty_tool_call_args(self) -> bool:
1128
  """
 
1172
  "name": None,
1173
  "arguments": content_block["delta"]["partial_json"],
1174
  },
1175
+ "index": self.tool_index,
1176
  }
1177
  elif type_chunk == "content_block_start":
1178
  """
 
1184
  if content_block_start["content_block"]["type"] == "text":
1185
  text = content_block_start["content_block"]["text"]
1186
  elif content_block_start["content_block"]["type"] == "tool_use":
1187
+ self.tool_index += 1
1188
  tool_use = {
1189
  "id": content_block_start["content_block"]["id"],
1190
  "type": "function",
 
1192
  "name": content_block_start["content_block"]["name"],
1193
  "arguments": "",
1194
  },
1195
+ "index": self.tool_index,
1196
  }
1197
  elif type_chunk == "content_block_stop":
1198
  content_block_stop = ContentBlockStop(**chunk) # type: ignore
 
1206
  "name": None,
1207
  "arguments": "{}",
1208
  },
1209
+ "index": self.tool_index,
1210
  }
1211
  elif type_chunk == "message_delta":
1212
  """
.venv/lib/python3.12/site-packages/litellm/llms/prompt_templates/factory.py CHANGED
@@ -1010,6 +1010,9 @@ def convert_to_gemini_tool_call_invoke(
1010
  name = tool["function"].get("name", "")
1011
  arguments = tool["function"].get("arguments", "")
1012
  arguments_dict = json.loads(arguments)
 
 
 
1013
  for k, v in arguments_dict.items():
1014
  inferred_protocol_value = infer_protocol_value(value=v)
1015
  _field = litellm.types.llms.vertex_ai.Field(
@@ -1022,9 +1025,18 @@ def convert_to_gemini_tool_call_invoke(
1022
  name=name,
1023
  args=_fields,
1024
  )
1025
- _parts_list.append(
1026
- litellm.types.llms.vertex_ai.PartType(function_call=function_call)
1027
- )
 
 
 
 
 
 
 
 
 
1028
  return _parts_list
1029
  except Exception as e:
1030
  raise Exception(
 
1010
  name = tool["function"].get("name", "")
1011
  arguments = tool["function"].get("arguments", "")
1012
  arguments_dict = json.loads(arguments)
1013
+ function_call: Optional[litellm.types.llms.vertex_ai.FunctionCall] = (
1014
+ None
1015
+ )
1016
  for k, v in arguments_dict.items():
1017
  inferred_protocol_value = infer_protocol_value(value=v)
1018
  _field = litellm.types.llms.vertex_ai.Field(
 
1025
  name=name,
1026
  args=_fields,
1027
  )
1028
+ if function_call is not None:
1029
+ _parts_list.append(
1030
+ litellm.types.llms.vertex_ai.PartType(
1031
+ function_call=function_call
1032
+ )
1033
+ )
1034
+ else: # don't silently drop params. Make it clear to user what's happening.
1035
+ raise Exception(
1036
+ "function_call missing. Received tool call with 'type': 'function'. No function call in argument - {}".format(
1037
+ tool
1038
+ )
1039
+ )
1040
  return _parts_list
1041
  except Exception as e:
1042
  raise Exception(
.venv/lib/python3.12/site-packages/litellm/llms/vertex_httpx.py CHANGED
@@ -491,6 +491,16 @@ class VertexGeminiConfig:
491
  "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
492
  }
493
 
 
 
 
 
 
 
 
 
 
 
494
 
495
  async def make_call(
496
  client: Optional[AsyncHTTPHandler],
@@ -504,8 +514,15 @@ async def make_call(
504
  if client is None:
505
  client = AsyncHTTPHandler() # Create a new client if none provided
506
 
507
- response = await client.post(api_base, headers=headers, data=data, stream=True)
508
-
 
 
 
 
 
 
 
509
  if response.status_code != 200:
510
  raise VertexAIError(status_code=response.status_code, message=response.text)
511
 
 
491
  "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
492
  }
493
 
494
+ def translate_exception_str(self, exception_string: str):
495
+ if (
496
+ "GenerateContentRequest.tools[0].function_declarations[0].parameters.properties: should be non-empty for OBJECT type"
497
+ in exception_string
498
+ ):
499
+ return "'properties' field in tools[0]['function']['parameters'] cannot be empty if 'type' == 'object'. Received error from provider - {}".format(
500
+ exception_string
501
+ )
502
+ return exception_string
503
+
504
 
505
  async def make_call(
506
  client: Optional[AsyncHTTPHandler],
 
514
  if client is None:
515
  client = AsyncHTTPHandler() # Create a new client if none provided
516
 
517
+ try:
518
+ response = await client.post(api_base, headers=headers, data=data, stream=True)
519
+ response.raise_for_status()
520
+ except httpx.HTTPStatusError as e:
521
+ exception_string = str(await e.response.aread())
522
+ raise VertexAIError(
523
+ status_code=e.response.status_code,
524
+ message=VertexGeminiConfig().translate_exception_str(exception_string),
525
+ )
526
  if response.status_code != 200:
527
  raise VertexAIError(status_code=response.status_code, message=response.text)
528
 
.venv/lib/python3.12/site-packages/litellm/model_prices_and_context_window_backup.json CHANGED
@@ -4201,6 +4201,15 @@
4201
  "litellm_provider": "ollama",
4202
  "mode": "completion"
4203
  },
 
 
 
 
 
 
 
 
 
4204
  "ollama/llama2:13b": {
4205
  "max_tokens": 4096,
4206
  "max_input_tokens": 4096,
@@ -4237,6 +4246,15 @@
4237
  "litellm_provider": "ollama",
4238
  "mode": "chat"
4239
  },
 
 
 
 
 
 
 
 
 
4240
  "ollama/llama3:70b": {
4241
  "max_tokens": 8192,
4242
  "max_input_tokens": 8192,
 
4201
  "litellm_provider": "ollama",
4202
  "mode": "completion"
4203
  },
4204
+ "ollama/llama2:7b": {
4205
+ "max_tokens": 4096,
4206
+ "max_input_tokens": 4096,
4207
+ "max_output_tokens": 4096,
4208
+ "input_cost_per_token": 0.0,
4209
+ "output_cost_per_token": 0.0,
4210
+ "litellm_provider": "ollama",
4211
+ "mode": "completion"
4212
+ },
4213
  "ollama/llama2:13b": {
4214
  "max_tokens": 4096,
4215
  "max_input_tokens": 4096,
 
4246
  "litellm_provider": "ollama",
4247
  "mode": "chat"
4248
  },
4249
+ "ollama/llama3:8b": {
4250
+ "max_tokens": 8192,
4251
+ "max_input_tokens": 8192,
4252
+ "max_output_tokens": 8192,
4253
+ "input_cost_per_token": 0.0,
4254
+ "output_cost_per_token": 0.0,
4255
+ "litellm_provider": "ollama",
4256
+ "mode": "chat"
4257
+ },
4258
  "ollama/llama3:70b": {
4259
  "max_tokens": 8192,
4260
  "max_input_tokens": 8192,
.venv/lib/python3.12/site-packages/litellm/proxy/_new_secret_config.yaml CHANGED
@@ -1,6 +1,4 @@
1
  model_list:
2
- - model_name: "text-embedding-ada-002"
3
  litellm_params:
4
- model: "azure/azure-embedding-model"
5
- api_base: os.environ/AZURE_API_BASE
6
- api_key: os.environ/AZURE_API_KEY
 
1
  model_list:
2
+ - model_name: "*"
3
  litellm_params:
4
+ model: "*"
 
 
.venv/lib/python3.12/site-packages/litellm/proxy/_types.py CHANGED
@@ -585,6 +585,8 @@ class GenerateKeyRequest(GenerateRequestBase):
585
 
586
  model_config = ConfigDict(protected_namespaces=())
587
  send_invite_email: Optional[bool] = None
 
 
588
 
589
 
590
  class GenerateKeyResponse(GenerateKeyRequest):
@@ -1337,6 +1339,8 @@ class UserAPIKeyAuth(
1337
  ] = None
1338
  allowed_model_region: Optional[Literal["eu"]] = None
1339
  parent_otel_span: Optional[Span] = None
 
 
1340
 
1341
  @model_validator(mode="before")
1342
  @classmethod
 
585
 
586
  model_config = ConfigDict(protected_namespaces=())
587
  send_invite_email: Optional[bool] = None
588
+ model_rpm_limit: Optional[dict] = None
589
+ model_tpm_limit: Optional[dict] = None
590
 
591
 
592
  class GenerateKeyResponse(GenerateKeyRequest):
 
1339
  ] = None
1340
  allowed_model_region: Optional[Literal["eu"]] = None
1341
  parent_otel_span: Optional[Span] = None
1342
+ rpm_limit_per_model: Optional[Dict[str, int]] = None
1343
+ tpm_limit_per_model: Optional[Dict[str, int]] = None
1344
 
1345
  @model_validator(mode="before")
1346
  @classmethod
.venv/lib/python3.12/site-packages/litellm/proxy/auth/auth_checks.py CHANGED
@@ -10,7 +10,7 @@ Run checks for:
10
  """
11
  import time
12
  from datetime import datetime
13
- from typing import TYPE_CHECKING, Any, Literal, Optional
14
 
15
  import litellm
16
  from litellm._logging import verbose_proxy_logger
@@ -77,6 +77,11 @@ def common_checks(
77
  if "all-proxy-models" in team_object.models:
78
  # this means the team has access to all models on the proxy
79
  pass
 
 
 
 
 
80
  else:
81
  raise Exception(
82
  f"Team={team_object.team_id} not allowed to call model={_model}. Allowed team models = {team_object.models}"
@@ -327,6 +332,39 @@ async def get_end_user_object(
327
  return None
328
 
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  @log_to_opentelemetry
331
  async def get_user_object(
332
  user_id: str,
@@ -543,12 +581,11 @@ async def can_key_call_model(
543
  )
544
  from collections import defaultdict
545
 
 
 
546
  access_groups = defaultdict(list)
547
- if llm_model_list is not None:
548
- for m in llm_model_list:
549
- for group in m.get("model_info", {}).get("access_groups", []):
550
- model_name = m["model_name"]
551
- access_groups[group].append(model_name)
552
 
553
  models_in_current_access_groups = []
554
  if len(access_groups) > 0: # check if token contains any model access groups
 
10
  """
11
  import time
12
  from datetime import datetime
13
+ from typing import TYPE_CHECKING, Any, List, Literal, Optional
14
 
15
  import litellm
16
  from litellm._logging import verbose_proxy_logger
 
77
  if "all-proxy-models" in team_object.models:
78
  # this means the team has access to all models on the proxy
79
  pass
80
+ # check if the team model is an access_group
81
+ elif model_in_access_group(_model, team_object.models) is True:
82
+ pass
83
+ elif _model and "*" in _model:
84
+ pass
85
  else:
86
  raise Exception(
87
  f"Team={team_object.team_id} not allowed to call model={_model}. Allowed team models = {team_object.models}"
 
332
  return None
333
 
334
 
335
+ def model_in_access_group(model: str, team_models: Optional[List[str]]) -> bool:
336
+ from collections import defaultdict
337
+
338
+ from litellm.proxy.proxy_server import llm_router
339
+
340
+ if team_models is None:
341
+ return True
342
+ if model in team_models:
343
+ return True
344
+
345
+ access_groups = defaultdict(list)
346
+ if llm_router:
347
+ access_groups = llm_router.get_model_access_groups()
348
+
349
+ models_in_current_access_groups = []
350
+ if len(access_groups) > 0: # check if token contains any model access groups
351
+ for idx, m in enumerate(
352
+ team_models
353
+ ): # loop token models, if any of them are an access group add the access group
354
+ if m in access_groups:
355
+ # if it is an access group we need to remove it from valid_token.models
356
+ models_in_group = access_groups[m]
357
+ models_in_current_access_groups.extend(models_in_group)
358
+
359
+ # Filter out models that are access_groups
360
+ filtered_models = [m for m in team_models if m not in access_groups]
361
+ filtered_models += models_in_current_access_groups
362
+
363
+ if model in filtered_models:
364
+ return True
365
+ return False
366
+
367
+
368
  @log_to_opentelemetry
369
  async def get_user_object(
370
  user_id: str,
 
581
  )
582
  from collections import defaultdict
583
 
584
+ from litellm.proxy.proxy_server import llm_router
585
+
586
  access_groups = defaultdict(list)
587
+ if llm_router:
588
+ access_groups = llm_router.get_model_access_groups()
 
 
 
589
 
590
  models_in_current_access_groups = []
591
  if len(access_groups) > 0: # check if token contains any model access groups
.venv/lib/python3.12/site-packages/litellm/proxy/auth/auth_utils.py CHANGED
@@ -210,3 +210,20 @@ def bytes_to_mb(bytes_value: int):
210
  Helper to convert bytes to MB
211
  """
212
  return bytes_value / (1024 * 1024)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  Helper to convert bytes to MB
211
  """
212
  return bytes_value / (1024 * 1024)
213
+
214
+
215
+ # helpers used by parallel request limiter to handle model rpm/tpm limits for a given api key
216
+ def get_key_model_rpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict]:
217
+ if user_api_key_dict.metadata:
218
+ if "model_rpm_limit" in user_api_key_dict.metadata:
219
+ return user_api_key_dict.metadata["model_rpm_limit"]
220
+
221
+ return None
222
+
223
+
224
+ def get_key_model_tpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict]:
225
+ if user_api_key_dict.metadata:
226
+ if "model_tpm_limit" in user_api_key_dict.metadata:
227
+ return user_api_key_dict.metadata["model_tpm_limit"]
228
+
229
+ return None
.venv/lib/python3.12/site-packages/litellm/proxy/common_utils/callback_utils.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from typing import Any, Dict, List, Optional, get_args
3
+
4
+ import litellm
5
+ from litellm._logging import verbose_proxy_logger
6
+ from litellm.proxy._types import CommonProxyErrors, LiteLLMPromptInjectionParams
7
+ from litellm.proxy.utils import get_instance_fn
8
+
9
+ blue_color_code = "\033[94m"
10
+ reset_color_code = "\033[0m"
11
+
12
+
13
+ def initialize_callbacks_on_proxy(
14
+ value: Any,
15
+ premium_user: bool,
16
+ config_file_path: str,
17
+ litellm_settings: dict,
18
+ callback_specific_params: dict = {},
19
+ ):
20
+ from litellm.proxy.proxy_server import prisma_client
21
+
22
+ verbose_proxy_logger.debug(
23
+ f"{blue_color_code}initializing callbacks={value} on proxy{reset_color_code}"
24
+ )
25
+ if isinstance(value, list):
26
+ imported_list: List[Any] = []
27
+ for callback in value: # ["presidio", <my-custom-callback>]
28
+ if (
29
+ isinstance(callback, str)
30
+ and callback in litellm._known_custom_logger_compatible_callbacks
31
+ ):
32
+ imported_list.append(callback)
33
+ elif isinstance(callback, str) and callback == "otel":
34
+ from litellm.integrations.opentelemetry import OpenTelemetry
35
+ from litellm.proxy import proxy_server
36
+
37
+ open_telemetry_logger = OpenTelemetry()
38
+
39
+ # Add Otel as a service callback
40
+ if "otel" not in litellm.service_callback:
41
+ litellm.service_callback.append("otel")
42
+
43
+ imported_list.append(open_telemetry_logger)
44
+ setattr(proxy_server, "open_telemetry_logger", open_telemetry_logger)
45
+ elif isinstance(callback, str) and callback == "presidio":
46
+ from litellm.proxy.hooks.presidio_pii_masking import (
47
+ _OPTIONAL_PresidioPIIMasking,
48
+ )
49
+
50
+ presidio_logging_only: Optional[bool] = litellm_settings.get(
51
+ "presidio_logging_only", None
52
+ )
53
+ if presidio_logging_only is not None:
54
+ presidio_logging_only = bool(
55
+ presidio_logging_only
56
+ ) # validate boolean given
57
+
58
+ params = {
59
+ "logging_only": presidio_logging_only,
60
+ **callback_specific_params.get("presidio", {}),
61
+ }
62
+ pii_masking_object = _OPTIONAL_PresidioPIIMasking(**params)
63
+ imported_list.append(pii_masking_object)
64
+ elif isinstance(callback, str) and callback == "llamaguard_moderations":
65
+ from enterprise.enterprise_hooks.llama_guard import (
66
+ _ENTERPRISE_LlamaGuard,
67
+ )
68
+
69
+ if premium_user != True:
70
+ raise Exception(
71
+ "Trying to use Llama Guard"
72
+ + CommonProxyErrors.not_premium_user.value
73
+ )
74
+
75
+ llama_guard_object = _ENTERPRISE_LlamaGuard()
76
+ imported_list.append(llama_guard_object)
77
+ elif isinstance(callback, str) and callback == "hide_secrets":
78
+ from enterprise.enterprise_hooks.secret_detection import (
79
+ _ENTERPRISE_SecretDetection,
80
+ )
81
+
82
+ if premium_user != True:
83
+ raise Exception(
84
+ "Trying to use secret hiding"
85
+ + CommonProxyErrors.not_premium_user.value
86
+ )
87
+
88
+ _secret_detection_object = _ENTERPRISE_SecretDetection()
89
+ imported_list.append(_secret_detection_object)
90
+ elif isinstance(callback, str) and callback == "openai_moderations":
91
+ from enterprise.enterprise_hooks.openai_moderation import (
92
+ _ENTERPRISE_OpenAI_Moderation,
93
+ )
94
+
95
+ if premium_user != True:
96
+ raise Exception(
97
+ "Trying to use OpenAI Moderations Check"
98
+ + CommonProxyErrors.not_premium_user.value
99
+ )
100
+
101
+ openai_moderations_object = _ENTERPRISE_OpenAI_Moderation()
102
+ imported_list.append(openai_moderations_object)
103
+ elif isinstance(callback, str) and callback == "lakera_prompt_injection":
104
+ from enterprise.enterprise_hooks.lakera_ai import (
105
+ _ENTERPRISE_lakeraAI_Moderation,
106
+ )
107
+
108
+ if premium_user != True:
109
+ raise Exception(
110
+ "Trying to use LakeraAI Prompt Injection"
111
+ + CommonProxyErrors.not_premium_user.value
112
+ )
113
+
114
+ init_params = {}
115
+ if "lakera_prompt_injection" in callback_specific_params:
116
+ init_params = callback_specific_params["lakera_prompt_injection"]
117
+ lakera_moderations_object = _ENTERPRISE_lakeraAI_Moderation(
118
+ **init_params
119
+ )
120
+ imported_list.append(lakera_moderations_object)
121
+ elif isinstance(callback, str) and callback == "aporio_prompt_injection":
122
+ from enterprise.enterprise_hooks.aporio_ai import _ENTERPRISE_Aporio
123
+
124
+ if premium_user is not True:
125
+ raise Exception(
126
+ "Trying to use Aporio AI Guardrail"
127
+ + CommonProxyErrors.not_premium_user.value
128
+ )
129
+
130
+ aporio_guardrail_object = _ENTERPRISE_Aporio()
131
+ imported_list.append(aporio_guardrail_object)
132
+ elif isinstance(callback, str) and callback == "google_text_moderation":
133
+ from enterprise.enterprise_hooks.google_text_moderation import (
134
+ _ENTERPRISE_GoogleTextModeration,
135
+ )
136
+
137
+ if premium_user != True:
138
+ raise Exception(
139
+ "Trying to use Google Text Moderation"
140
+ + CommonProxyErrors.not_premium_user.value
141
+ )
142
+
143
+ google_text_moderation_obj = _ENTERPRISE_GoogleTextModeration()
144
+ imported_list.append(google_text_moderation_obj)
145
+ elif isinstance(callback, str) and callback == "llmguard_moderations":
146
+ from enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMGuard
147
+
148
+ if premium_user != True:
149
+ raise Exception(
150
+ "Trying to use Llm Guard"
151
+ + CommonProxyErrors.not_premium_user.value
152
+ )
153
+
154
+ llm_guard_moderation_obj = _ENTERPRISE_LLMGuard()
155
+ imported_list.append(llm_guard_moderation_obj)
156
+ elif isinstance(callback, str) and callback == "blocked_user_check":
157
+ from enterprise.enterprise_hooks.blocked_user_list import (
158
+ _ENTERPRISE_BlockedUserList,
159
+ )
160
+
161
+ if premium_user != True:
162
+ raise Exception(
163
+ "Trying to use ENTERPRISE BlockedUser"
164
+ + CommonProxyErrors.not_premium_user.value
165
+ )
166
+
167
+ blocked_user_list = _ENTERPRISE_BlockedUserList(
168
+ prisma_client=prisma_client
169
+ )
170
+ imported_list.append(blocked_user_list)
171
+ elif isinstance(callback, str) and callback == "banned_keywords":
172
+ from enterprise.enterprise_hooks.banned_keywords import (
173
+ _ENTERPRISE_BannedKeywords,
174
+ )
175
+
176
+ if premium_user != True:
177
+ raise Exception(
178
+ "Trying to use ENTERPRISE BannedKeyword"
179
+ + CommonProxyErrors.not_premium_user.value
180
+ )
181
+
182
+ banned_keywords_obj = _ENTERPRISE_BannedKeywords()
183
+ imported_list.append(banned_keywords_obj)
184
+ elif isinstance(callback, str) and callback == "detect_prompt_injection":
185
+ from litellm.proxy.hooks.prompt_injection_detection import (
186
+ _OPTIONAL_PromptInjectionDetection,
187
+ )
188
+
189
+ prompt_injection_params = None
190
+ if "prompt_injection_params" in litellm_settings:
191
+ prompt_injection_params_in_config = litellm_settings[
192
+ "prompt_injection_params"
193
+ ]
194
+ prompt_injection_params = LiteLLMPromptInjectionParams(
195
+ **prompt_injection_params_in_config
196
+ )
197
+
198
+ prompt_injection_detection_obj = _OPTIONAL_PromptInjectionDetection(
199
+ prompt_injection_params=prompt_injection_params,
200
+ )
201
+ imported_list.append(prompt_injection_detection_obj)
202
+ elif isinstance(callback, str) and callback == "batch_redis_requests":
203
+ from litellm.proxy.hooks.batch_redis_get import (
204
+ _PROXY_BatchRedisRequests,
205
+ )
206
+
207
+ batch_redis_obj = _PROXY_BatchRedisRequests()
208
+ imported_list.append(batch_redis_obj)
209
+ elif isinstance(callback, str) and callback == "azure_content_safety":
210
+ from litellm.proxy.hooks.azure_content_safety import (
211
+ _PROXY_AzureContentSafety,
212
+ )
213
+
214
+ azure_content_safety_params = litellm_settings[
215
+ "azure_content_safety_params"
216
+ ]
217
+ for k, v in azure_content_safety_params.items():
218
+ if (
219
+ v is not None
220
+ and isinstance(v, str)
221
+ and v.startswith("os.environ/")
222
+ ):
223
+ azure_content_safety_params[k] = litellm.get_secret(v)
224
+
225
+ azure_content_safety_obj = _PROXY_AzureContentSafety(
226
+ **azure_content_safety_params,
227
+ )
228
+ imported_list.append(azure_content_safety_obj)
229
+ else:
230
+ verbose_proxy_logger.debug(
231
+ f"{blue_color_code} attempting to import custom calback={callback} {reset_color_code}"
232
+ )
233
+ imported_list.append(
234
+ get_instance_fn(
235
+ value=callback,
236
+ config_file_path=config_file_path,
237
+ )
238
+ )
239
+ if isinstance(litellm.callbacks, list):
240
+ litellm.callbacks.extend(imported_list)
241
+ else:
242
+ litellm.callbacks = imported_list # type: ignore
243
+ else:
244
+ litellm.callbacks = [
245
+ get_instance_fn(
246
+ value=value,
247
+ config_file_path=config_file_path,
248
+ )
249
+ ]
250
+ verbose_proxy_logger.debug(
251
+ f"{blue_color_code} Initialized Callbacks - {litellm.callbacks} {reset_color_code}"
252
+ )
253
+
254
+
255
+ def get_model_group_from_litellm_kwargs(kwargs: dict) -> Optional[str]:
256
+ _litellm_params = kwargs.get("litellm_params", None) or {}
257
+ _metadata = _litellm_params.get("metadata", None) or {}
258
+ _model_group = _metadata.get("model_group", None)
259
+ if _model_group is not None:
260
+ return _model_group
261
+
262
+ return None
263
+
264
+
265
+ def get_model_group_from_request_data(data: dict) -> Optional[str]:
266
+ _metadata = data.get("metadata", None) or {}
267
+ _model_group = _metadata.get("model_group", None)
268
+ if _model_group is not None:
269
+ return _model_group
270
+
271
+ return None
272
+
273
+
274
+ def get_remaining_tokens_and_requests_from_request_data(data: Dict) -> Dict[str, str]:
275
+ """
276
+ Helper function to return x-litellm-key-remaining-tokens-{model_group} and x-litellm-key-remaining-requests-{model_group}
277
+
278
+ Returns {} when api_key + model rpm/tpm limit is not set
279
+
280
+ """
281
+ headers = {}
282
+ _metadata = data.get("metadata", None) or {}
283
+ model_group = get_model_group_from_request_data(data)
284
+
285
+ # Remaining Requests
286
+ remaining_requests_variable_name = f"litellm-key-remaining-requests-{model_group}"
287
+ remaining_requests = _metadata.get(remaining_requests_variable_name, None)
288
+ if remaining_requests:
289
+ headers[f"x-litellm-key-remaining-requests-{model_group}"] = remaining_requests
290
+
291
+ # Remaining Tokens
292
+ remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}"
293
+ remaining_tokens = _metadata.get(remaining_tokens_variable_name, None)
294
+ if remaining_tokens:
295
+ headers[f"x-litellm-key-remaining-tokens-{model_group}"] = remaining_tokens
296
+
297
+ return headers
.venv/lib/python3.12/site-packages/litellm/proxy/guardrails/init_guardrails.py CHANGED
@@ -5,7 +5,7 @@ from pydantic import BaseModel, RootModel
5
 
6
  import litellm
7
  from litellm._logging import verbose_proxy_logger
8
- from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
9
  from litellm.types.guardrails import GuardrailItem, GuardrailItemSpec
10
 
11
  all_guardrails: List[GuardrailItem] = []
 
5
 
6
  import litellm
7
  from litellm._logging import verbose_proxy_logger
8
+ from litellm.proxy.common_utils.callback_utils import initialize_callbacks_on_proxy
9
  from litellm.types.guardrails import GuardrailItem, GuardrailItemSpec
10
 
11
  all_guardrails: List[GuardrailItem] = []
.venv/lib/python3.12/site-packages/litellm/proxy/hooks/parallel_request_limiter.py CHANGED
@@ -11,6 +11,10 @@ from litellm._logging import verbose_proxy_logger
11
  from litellm.caching import DualCache
12
  from litellm.integrations.custom_logger import CustomLogger
13
  from litellm.proxy._types import UserAPIKeyAuth
 
 
 
 
14
 
15
 
16
  class _PROXY_MaxParallelRequestsHandler(CustomLogger):
@@ -202,6 +206,85 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
202
  additional_details=f"Hit limit for api_key: {api_key}. tpm_limit: {tpm_limit}, current_tpm {current['current_tpm']} , rpm_limit: {rpm_limit} current rpm {current['current_rpm']} "
203
  )
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  # check if REQUEST ALLOWED for user_id
206
  user_id = user_api_key_dict.user_id
207
  if user_id is not None:
@@ -299,6 +382,10 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
299
  return
300
 
301
  async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
 
 
 
 
302
  try:
303
  self.print_verbose("INSIDE parallel request limiter ASYNC SUCCESS LOGGING")
304
  global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
@@ -365,6 +452,36 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
365
  request_count_api_key, new_val, ttl=60
366
  ) # store in cache for 1 min.
367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  # ------------
369
  # Update usage - User
370
  # ------------
 
11
  from litellm.caching import DualCache
12
  from litellm.integrations.custom_logger import CustomLogger
13
  from litellm.proxy._types import UserAPIKeyAuth
14
+ from litellm.proxy.auth.auth_utils import (
15
+ get_key_model_rpm_limit,
16
+ get_key_model_tpm_limit,
17
+ )
18
 
19
 
20
  class _PROXY_MaxParallelRequestsHandler(CustomLogger):
 
206
  additional_details=f"Hit limit for api_key: {api_key}. tpm_limit: {tpm_limit}, current_tpm {current['current_tpm']} , rpm_limit: {rpm_limit} current rpm {current['current_rpm']} "
207
  )
208
 
209
+ # Check if request under RPM/TPM per model for a given API Key
210
+ if (
211
+ get_key_model_tpm_limit(user_api_key_dict) is not None
212
+ or get_key_model_rpm_limit(user_api_key_dict) is not None
213
+ ):
214
+ _model = data.get("model", None)
215
+ request_count_api_key = (
216
+ f"{api_key}::{_model}::{precise_minute}::request_count"
217
+ )
218
+
219
+ current = await self.internal_usage_cache.async_get_cache(
220
+ key=request_count_api_key
221
+ ) # {"current_requests": 1, "current_tpm": 1, "current_rpm": 10}
222
+
223
+ tpm_limit_for_model = None
224
+ rpm_limit_for_model = None
225
+
226
+ _tpm_limit_for_key_model = get_key_model_tpm_limit(user_api_key_dict)
227
+ _rpm_limit_for_key_model = get_key_model_rpm_limit(user_api_key_dict)
228
+
229
+ if _model is not None:
230
+
231
+ if _tpm_limit_for_key_model:
232
+ tpm_limit_for_model = _tpm_limit_for_key_model.get(_model)
233
+
234
+ if _rpm_limit_for_key_model:
235
+ rpm_limit_for_model = _rpm_limit_for_key_model.get(_model)
236
+ if current is None:
237
+ new_val = {
238
+ "current_requests": 1,
239
+ "current_tpm": 0,
240
+ "current_rpm": 0,
241
+ }
242
+ await self.internal_usage_cache.async_set_cache(
243
+ request_count_api_key, new_val
244
+ )
245
+ elif tpm_limit_for_model is not None or rpm_limit_for_model is not None:
246
+ # Increase count for this token
247
+ new_val = {
248
+ "current_requests": current["current_requests"] + 1,
249
+ "current_tpm": current["current_tpm"],
250
+ "current_rpm": current["current_rpm"],
251
+ }
252
+ if (
253
+ tpm_limit_for_model is not None
254
+ and current["current_tpm"] >= tpm_limit_for_model
255
+ ):
256
+ return self.raise_rate_limit_error(
257
+ additional_details=f"Hit TPM limit for model: {_model} on api_key: {api_key}. tpm_limit: {tpm_limit_for_model}, current_tpm {current['current_tpm']} "
258
+ )
259
+ elif (
260
+ rpm_limit_for_model is not None
261
+ and current["current_rpm"] >= rpm_limit_for_model
262
+ ):
263
+ return self.raise_rate_limit_error(
264
+ additional_details=f"Hit RPM limit for model: {_model} on api_key: {api_key}. rpm_limit: {rpm_limit_for_model}, current_rpm {current['current_rpm']} "
265
+ )
266
+ else:
267
+ await self.internal_usage_cache.async_set_cache(
268
+ request_count_api_key, new_val
269
+ )
270
+
271
+ _remaining_tokens = None
272
+ _remaining_requests = None
273
+ # Add remaining tokens, requests to metadata
274
+ if tpm_limit_for_model is not None:
275
+ _remaining_tokens = tpm_limit_for_model - new_val["current_tpm"]
276
+ if rpm_limit_for_model is not None:
277
+ _remaining_requests = rpm_limit_for_model - new_val["current_rpm"]
278
+
279
+ _remaining_limits_data = {
280
+ f"litellm-key-remaining-tokens-{_model}": _remaining_tokens,
281
+ f"litellm-key-remaining-requests-{_model}": _remaining_requests,
282
+ }
283
+
284
+ if "metadata" not in data:
285
+ data["metadata"] = {}
286
+ data["metadata"].update(_remaining_limits_data)
287
+
288
  # check if REQUEST ALLOWED for user_id
289
  user_id = user_api_key_dict.user_id
290
  if user_id is not None:
 
382
  return
383
 
384
  async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
385
+ from litellm.proxy.common_utils.callback_utils import (
386
+ get_model_group_from_litellm_kwargs,
387
+ )
388
+
389
  try:
390
  self.print_verbose("INSIDE parallel request limiter ASYNC SUCCESS LOGGING")
391
  global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
 
452
  request_count_api_key, new_val, ttl=60
453
  ) # store in cache for 1 min.
454
 
455
+ # ------------
456
+ # Update usage - model group + API Key
457
+ # ------------
458
+ model_group = get_model_group_from_litellm_kwargs(kwargs)
459
+ if user_api_key is not None and model_group is not None:
460
+ request_count_api_key = (
461
+ f"{user_api_key}::{model_group}::{precise_minute}::request_count"
462
+ )
463
+
464
+ current = await self.internal_usage_cache.async_get_cache(
465
+ key=request_count_api_key
466
+ ) or {
467
+ "current_requests": 1,
468
+ "current_tpm": total_tokens,
469
+ "current_rpm": 1,
470
+ }
471
+
472
+ new_val = {
473
+ "current_requests": max(current["current_requests"] - 1, 0),
474
+ "current_tpm": current["current_tpm"] + total_tokens,
475
+ "current_rpm": current["current_rpm"] + 1,
476
+ }
477
+
478
+ self.print_verbose(
479
+ f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
480
+ )
481
+ await self.internal_usage_cache.async_set_cache(
482
+ request_count_api_key, new_val, ttl=60
483
+ )
484
+
485
  # ------------
486
  # Update usage - User
487
  # ------------
.venv/lib/python3.12/site-packages/litellm/proxy/management_endpoints/key_management_endpoints.py CHANGED
@@ -68,7 +68,8 @@ async def generate_key_fn(
68
  - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "[email protected]" }
69
  - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
70
  - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
71
-
 
72
  Examples:
73
 
74
  1. Allow users to turn on/off pii masking
@@ -323,6 +324,9 @@ async def update_key_fn(
323
  # get non default values for key
324
  non_default_values = {}
325
  for k, v in data_json.items():
 
 
 
326
  if v is not None and v not in (
327
  [],
328
  {},
@@ -343,6 +347,25 @@ async def update_key_fn(
343
  key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
344
  non_default_values["budget_reset_at"] = key_reset_at
345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  response = await prisma_client.update_data(
347
  token=key, data={**non_default_values, "token": key}
348
  )
@@ -709,6 +732,8 @@ async def generate_key_helper_fn(
709
  allowed_cache_controls: Optional[list] = [],
710
  permissions: Optional[dict] = {},
711
  model_max_budget: Optional[dict] = {},
 
 
712
  teams: Optional[list] = None,
713
  organization_id: Optional[str] = None,
714
  table_name: Optional[Literal["key", "user"]] = None,
@@ -750,6 +775,15 @@ async def generate_key_helper_fn(
750
  aliases_json = json.dumps(aliases)
751
  config_json = json.dumps(config)
752
  permissions_json = json.dumps(permissions)
 
 
 
 
 
 
 
 
 
753
  metadata_json = json.dumps(metadata)
754
  model_max_budget_json = json.dumps(model_max_budget)
755
  user_role = user_role
 
68
  - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "[email protected]" }
69
  - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
70
  - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
71
+ - model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit.
72
+ - model_tpm_limit: Optional[dict] - key-specific model tpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific tpm limit.
73
  Examples:
74
 
75
  1. Allow users to turn on/off pii masking
 
324
  # get non default values for key
325
  non_default_values = {}
326
  for k, v in data_json.items():
327
+ # this field gets stored in metadata
328
+ if key == "model_rpm_limit" or key == "model_tpm_limit":
329
+ continue
330
  if v is not None and v not in (
331
  [],
332
  {},
 
347
  key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
348
  non_default_values["budget_reset_at"] = key_reset_at
349
 
350
+ # Update metadata for virtual Key
351
+ if data.model_tpm_limit:
352
+ _metadata = existing_key_row.metadata or {}
353
+ if "model_tpm_limit" not in _metadata:
354
+ _metadata["model_tpm_limit"] = {}
355
+
356
+ _metadata["model_tpm_limit"].update(data.model_tpm_limit)
357
+ non_default_values["metadata"] = _metadata
358
+ non_default_values.pop("model_tpm_limit", None)
359
+
360
+ if data.model_rpm_limit:
361
+ _metadata = existing_key_row.metadata or {}
362
+ if "model_rpm_limit" not in _metadata:
363
+ _metadata["model_rpm_limit"] = {}
364
+
365
+ _metadata["model_rpm_limit"].update(data.model_rpm_limit)
366
+ non_default_values["metadata"] = _metadata
367
+ non_default_values.pop("model_rpm_limit", None)
368
+
369
  response = await prisma_client.update_data(
370
  token=key, data={**non_default_values, "token": key}
371
  )
 
732
  allowed_cache_controls: Optional[list] = [],
733
  permissions: Optional[dict] = {},
734
  model_max_budget: Optional[dict] = {},
735
+ model_rpm_limit: Optional[dict] = {},
736
+ model_tpm_limit: Optional[dict] = {},
737
  teams: Optional[list] = None,
738
  organization_id: Optional[str] = None,
739
  table_name: Optional[Literal["key", "user"]] = None,
 
775
  aliases_json = json.dumps(aliases)
776
  config_json = json.dumps(config)
777
  permissions_json = json.dumps(permissions)
778
+
779
+ # Add model_rpm_limit and model_tpm_limit to metadata
780
+ if model_rpm_limit is not None:
781
+ metadata = metadata or {}
782
+ metadata["model_rpm_limit"] = model_rpm_limit
783
+ if model_tpm_limit is not None:
784
+ metadata = metadata or {}
785
+ metadata["model_tpm_limit"] = model_tpm_limit
786
+
787
  metadata_json = json.dumps(metadata)
788
  model_max_budget_json = json.dumps(model_max_budget)
789
  user_role = user_role
.venv/lib/python3.12/site-packages/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py CHANGED
@@ -3,7 +3,7 @@ import asyncio
3
  import json
4
  import traceback
5
  from base64 import b64encode
6
- from typing import List, Optional
7
 
8
  import httpx
9
  from fastapi import (
@@ -267,12 +267,25 @@ def forward_headers_from_request(
267
  return headers
268
 
269
 
 
 
 
 
 
 
 
 
 
 
 
270
  async def pass_through_request(
271
  request: Request,
272
  target: str,
273
  custom_headers: dict,
274
  user_api_key_dict: UserAPIKeyAuth,
275
  forward_headers: Optional[bool] = False,
 
 
276
  ):
277
  try:
278
  import time
@@ -291,7 +304,7 @@ async def pass_through_request(
291
  body_str = request_body.decode()
292
  try:
293
  _parsed_body = ast.literal_eval(body_str)
294
- except:
295
  _parsed_body = json.loads(body_str)
296
 
297
  verbose_proxy_logger.debug(
@@ -307,25 +320,10 @@ async def pass_through_request(
307
  call_type="pass_through_endpoint",
308
  )
309
 
310
- async_client = httpx.AsyncClient()
311
-
312
- response = await async_client.request(
313
- method=request.method,
314
- url=url,
315
- headers=headers,
316
- params=request.query_params,
317
- json=_parsed_body,
318
- )
319
-
320
- if response.status_code >= 300:
321
- raise HTTPException(status_code=response.status_code, detail=response.text)
322
-
323
- content = await response.aread()
324
 
325
- ## LOG SUCCESS
326
- start_time = time.time()
327
- end_time = time.time()
328
  # create logging object
 
329
  logging_obj = Logging(
330
  model="unknown",
331
  messages=[{"role": "user", "content": "no-message-pass-through-endpoint"}],
@@ -335,6 +333,7 @@ async def pass_through_request(
335
  litellm_call_id=str(uuid.uuid4()),
336
  function_id="1245",
337
  )
 
338
  # done for supporting 'parallel_request_limiter.py' with pass-through endpoints
339
  kwargs = {
340
  "litellm_params": {
@@ -355,6 +354,103 @@ async def pass_through_request(
355
  call_type="pass_through_endpoint",
356
  )
357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  await logging_obj.async_success_handler(
359
  result="",
360
  start_time=start_time,
@@ -365,7 +461,7 @@ async def pass_through_request(
365
  return Response(
366
  content=content,
367
  status_code=response.status_code,
368
- headers=dict(response.headers),
369
  )
370
  except Exception as e:
371
  verbose_proxy_logger.exception(
@@ -423,19 +519,25 @@ def create_pass_through_route(
423
  )
424
 
425
  except Exception:
426
- verbose_proxy_logger.warning("Defaulting to target being a url.")
427
 
428
- async def endpoint_func(
429
  request: Request,
430
  fastapi_response: Response,
431
  user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 
 
 
 
432
  ):
433
- return await pass_through_request(
434
  request=request,
435
  target=target,
436
  custom_headers=custom_headers or {},
437
  user_api_key_dict=user_api_key_dict,
438
  forward_headers=_forward_headers,
 
 
439
  )
440
 
441
  return endpoint_func
 
3
  import json
4
  import traceback
5
  from base64 import b64encode
6
+ from typing import AsyncIterable, List, Optional
7
 
8
  import httpx
9
  from fastapi import (
 
267
  return headers
268
 
269
 
270
+ def get_response_headers(headers: httpx.Headers) -> dict:
271
+ excluded_headers = {"transfer-encoding", "content-encoding"}
272
+ return_headers = {
273
+ key: value
274
+ for key, value in headers.items()
275
+ if key.lower() not in excluded_headers
276
+ }
277
+
278
+ return return_headers
279
+
280
+
281
  async def pass_through_request(
282
  request: Request,
283
  target: str,
284
  custom_headers: dict,
285
  user_api_key_dict: UserAPIKeyAuth,
286
  forward_headers: Optional[bool] = False,
287
+ query_params: Optional[dict] = None,
288
+ stream: Optional[bool] = None,
289
  ):
290
  try:
291
  import time
 
304
  body_str = request_body.decode()
305
  try:
306
  _parsed_body = ast.literal_eval(body_str)
307
+ except Exception:
308
  _parsed_body = json.loads(body_str)
309
 
310
  verbose_proxy_logger.debug(
 
320
  call_type="pass_through_endpoint",
321
  )
322
 
323
+ async_client = httpx.AsyncClient(timeout=600)
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
 
 
 
325
  # create logging object
326
+ start_time = time.time()
327
  logging_obj = Logging(
328
  model="unknown",
329
  messages=[{"role": "user", "content": "no-message-pass-through-endpoint"}],
 
333
  litellm_call_id=str(uuid.uuid4()),
334
  function_id="1245",
335
  )
336
+
337
  # done for supporting 'parallel_request_limiter.py' with pass-through endpoints
338
  kwargs = {
339
  "litellm_params": {
 
354
  call_type="pass_through_endpoint",
355
  )
356
 
357
+ # combine url with query params for logging
358
+
359
+ requested_query_params = query_params or request.query_params.__dict__
360
+ requested_query_params_str = "&".join(
361
+ f"{k}={v}" for k, v in requested_query_params.items()
362
+ )
363
+
364
+ if "?" in str(url):
365
+ logging_url = str(url) + "&" + requested_query_params_str
366
+ else:
367
+ logging_url = str(url) + "?" + requested_query_params_str
368
+
369
+ logging_obj.pre_call(
370
+ input=[{"role": "user", "content": "no-message-pass-through-endpoint"}],
371
+ api_key="",
372
+ additional_args={
373
+ "complete_input_dict": _parsed_body,
374
+ "api_base": logging_url,
375
+ "headers": headers,
376
+ },
377
+ )
378
+
379
+ if stream:
380
+ req = async_client.build_request(
381
+ "POST",
382
+ url,
383
+ json=_parsed_body,
384
+ params=requested_query_params,
385
+ headers=headers,
386
+ )
387
+
388
+ response = await async_client.send(req, stream=stream)
389
+
390
+ try:
391
+ response.raise_for_status()
392
+ except httpx.HTTPStatusError as e:
393
+ raise HTTPException(
394
+ status_code=e.response.status_code, detail=await e.response.aread()
395
+ )
396
+
397
+ # Create an async generator to yield the response content
398
+ async def stream_response() -> AsyncIterable[bytes]:
399
+ async for chunk in response.aiter_bytes():
400
+ yield chunk
401
+
402
+ return StreamingResponse(
403
+ stream_response(),
404
+ headers=get_response_headers(response.headers),
405
+ status_code=response.status_code,
406
+ )
407
+
408
+ response = await async_client.request(
409
+ method=request.method,
410
+ url=url,
411
+ headers=headers,
412
+ params=requested_query_params,
413
+ json=_parsed_body,
414
+ )
415
+
416
+ if (
417
+ response.headers.get("content-type") is not None
418
+ and response.headers["content-type"] == "text/event-stream"
419
+ ):
420
+ try:
421
+ response.raise_for_status()
422
+ except httpx.HTTPStatusError as e:
423
+ raise HTTPException(
424
+ status_code=e.response.status_code, detail=await e.response.aread()
425
+ )
426
+
427
+ # streaming response
428
+ # Create an async generator to yield the response content
429
+ async def stream_response() -> AsyncIterable[bytes]:
430
+ async for chunk in response.aiter_bytes():
431
+ yield chunk
432
+
433
+ return StreamingResponse(
434
+ stream_response(),
435
+ headers=get_response_headers(response.headers),
436
+ status_code=response.status_code,
437
+ )
438
+
439
+ try:
440
+ response.raise_for_status()
441
+ except httpx.HTTPStatusError as e:
442
+ raise HTTPException(
443
+ status_code=e.response.status_code, detail=e.response.text
444
+ )
445
+
446
+ if response.status_code >= 300:
447
+ raise HTTPException(status_code=response.status_code, detail=response.text)
448
+
449
+ content = await response.aread()
450
+
451
+ ## LOG SUCCESS
452
+ end_time = time.time()
453
+
454
  await logging_obj.async_success_handler(
455
  result="",
456
  start_time=start_time,
 
461
  return Response(
462
  content=content,
463
  status_code=response.status_code,
464
+ headers=get_response_headers(response.headers),
465
  )
466
  except Exception as e:
467
  verbose_proxy_logger.exception(
 
519
  )
520
 
521
  except Exception:
522
+ verbose_proxy_logger.debug("Defaulting to target being a url.")
523
 
524
+ async def endpoint_func( # type: ignore
525
  request: Request,
526
  fastapi_response: Response,
527
  user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
528
+ query_params: Optional[dict] = None,
529
+ stream: Optional[
530
+ bool
531
+ ] = None, # if pass-through endpoint is a streaming request
532
  ):
533
+ return await pass_through_request( # type: ignore
534
  request=request,
535
  target=target,
536
  custom_headers=custom_headers or {},
537
  user_api_key_dict=user_api_key_dict,
538
  forward_headers=_forward_headers,
539
+ query_params=query_params,
540
+ stream=stream,
541
  )
542
 
543
  return endpoint_func
.venv/lib/python3.12/site-packages/litellm/proxy/proxy_config.yaml CHANGED
@@ -4,10 +4,14 @@ model_list:
4
  model: openai/fake
5
  api_key: fake-key
6
  api_base: https://exampleopenaiendpoint-production.up.railway.app/
 
 
7
  - model_name: fireworks-llama-v3-70b-instruct
8
  litellm_params:
9
  model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
10
  api_key: "os.environ/FIREWORKS"
 
 
11
  - model_name: "*"
12
  litellm_params:
13
  model: "*"
@@ -42,7 +46,5 @@ general_settings:
42
 
43
  litellm_settings:
44
  fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
45
- callbacks: ["gcs_bucket"]
46
- success_callback: ["langfuse"]
47
  langfuse_default_tags: ["cache_hit", "cache_key", "user_api_key_alias", "user_api_key_team_alias"]
48
- cache: True
 
4
  model: openai/fake
5
  api_key: fake-key
6
  api_base: https://exampleopenaiendpoint-production.up.railway.app/
7
+ model_info:
8
+ access_groups: ["beta-models"]
9
  - model_name: fireworks-llama-v3-70b-instruct
10
  litellm_params:
11
  model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
12
  api_key: "os.environ/FIREWORKS"
13
+ model_info:
14
+ access_groups: ["beta-models"]
15
  - model_name: "*"
16
  litellm_params:
17
  model: "*"
 
46
 
47
  litellm_settings:
48
  fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
49
+ success_callback: ["langfuse", "prometheus"]
 
50
  langfuse_default_tags: ["cache_hit", "cache_key", "user_api_key_alias", "user_api_key_team_alias"]
 
.venv/lib/python3.12/site-packages/litellm/proxy/proxy_server.py CHANGED
@@ -148,6 +148,10 @@ from litellm.proxy.common_utils.admin_ui_utils import (
148
  html_form,
149
  show_missing_vars_in_env,
150
  )
 
 
 
 
151
  from litellm.proxy.common_utils.debug_utils import init_verbose_loggers
152
  from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
153
  from litellm.proxy.common_utils.encrypt_decrypt_utils import (
@@ -158,7 +162,6 @@ from litellm.proxy.common_utils.http_parsing_utils import (
158
  _read_request_body,
159
  check_file_size_under_limit,
160
  )
161
- from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
162
  from litellm.proxy.common_utils.load_config_utils import get_file_contents_from_s3
163
  from litellm.proxy.common_utils.openai_endpoint_utils import (
164
  remove_sensitive_info_from_deployment,
@@ -227,6 +230,9 @@ from litellm.proxy.utils import (
227
  send_email,
228
  update_spend,
229
  )
 
 
 
230
  from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import router as vertex_router
231
  from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import set_default_vertex_config
232
  from litellm.router import (
@@ -503,6 +509,7 @@ def get_custom_headers(
503
  model_region: Optional[str] = None,
504
  response_cost: Optional[Union[float, str]] = None,
505
  fastest_response_batch_completion: Optional[bool] = None,
 
506
  **kwargs,
507
  ) -> dict:
508
  exclude_values = {"", None}
@@ -523,6 +530,12 @@ def get_custom_headers(
523
  ),
524
  **{k: str(v) for k, v in kwargs.items()},
525
  }
 
 
 
 
 
 
526
  try:
527
  return {
528
  key: value for key, value in headers.items() if value not in exclude_values
@@ -3107,6 +3120,7 @@ async def chat_completion(
3107
  response_cost=response_cost,
3108
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3109
  fastest_response_batch_completion=fastest_response_batch_completion,
 
3110
  **additional_headers,
3111
  )
3112
  selected_data_generator = select_data_generator(
@@ -3141,6 +3155,7 @@ async def chat_completion(
3141
  response_cost=response_cost,
3142
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3143
  fastest_response_batch_completion=fastest_response_batch_completion,
 
3144
  **additional_headers,
3145
  )
3146
  )
@@ -3322,6 +3337,7 @@ async def completion(
3322
  api_base=api_base,
3323
  version=version,
3324
  response_cost=response_cost,
 
3325
  )
3326
  selected_data_generator = select_data_generator(
3327
  response=response,
@@ -3343,6 +3359,7 @@ async def completion(
3343
  api_base=api_base,
3344
  version=version,
3345
  response_cost=response_cost,
 
3346
  )
3347
  )
3348
  await check_response_size_is_safe(response=response)
@@ -3550,6 +3567,7 @@ async def embeddings(
3550
  response_cost=response_cost,
3551
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3552
  call_id=litellm_call_id,
 
3553
  )
3554
  )
3555
  await check_response_size_is_safe(response=response)
@@ -3676,6 +3694,7 @@ async def image_generation(
3676
  response_cost=response_cost,
3677
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3678
  call_id=litellm_call_id,
 
3679
  )
3680
  )
3681
 
@@ -3797,6 +3816,7 @@ async def audio_speech(
3797
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3798
  fastest_response_batch_completion=None,
3799
  call_id=litellm_call_id,
 
3800
  )
3801
 
3802
  selected_data_generator = select_data_generator(
@@ -3934,6 +3954,7 @@ async def audio_transcriptions(
3934
  response_cost=response_cost,
3935
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3936
  call_id=litellm_call_id,
 
3937
  )
3938
  )
3939
 
@@ -4037,6 +4058,7 @@ async def get_assistants(
4037
  api_base=api_base,
4038
  version=version,
4039
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4040
  )
4041
  )
4042
 
@@ -4132,6 +4154,7 @@ async def create_assistant(
4132
  api_base=api_base,
4133
  version=version,
4134
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4135
  )
4136
  )
4137
 
@@ -4227,6 +4250,7 @@ async def delete_assistant(
4227
  api_base=api_base,
4228
  version=version,
4229
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4230
  )
4231
  )
4232
 
@@ -4322,6 +4346,7 @@ async def create_threads(
4322
  api_base=api_base,
4323
  version=version,
4324
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4325
  )
4326
  )
4327
 
@@ -4416,6 +4441,7 @@ async def get_thread(
4416
  api_base=api_base,
4417
  version=version,
4418
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4419
  )
4420
  )
4421
 
@@ -4513,6 +4539,7 @@ async def add_messages(
4513
  api_base=api_base,
4514
  version=version,
4515
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4516
  )
4517
  )
4518
 
@@ -4606,6 +4633,7 @@ async def get_messages(
4606
  api_base=api_base,
4607
  version=version,
4608
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4609
  )
4610
  )
4611
 
@@ -4713,6 +4741,7 @@ async def run_thread(
4713
  api_base=api_base,
4714
  version=version,
4715
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4716
  )
4717
  )
4718
 
@@ -4835,6 +4864,7 @@ async def create_batch(
4835
  api_base=api_base,
4836
  version=version,
4837
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4838
  )
4839
  )
4840
 
@@ -4930,6 +4960,7 @@ async def retrieve_batch(
4930
  api_base=api_base,
4931
  version=version,
4932
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
4933
  )
4934
  )
4935
 
@@ -5148,6 +5179,7 @@ async def moderations(
5148
  api_base=api_base,
5149
  version=version,
5150
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
 
5151
  )
5152
  )
5153
 
@@ -5317,6 +5349,7 @@ async def anthropic_response(
5317
  api_base=api_base,
5318
  version=version,
5319
  response_cost=response_cost,
 
5320
  )
5321
  )
5322
 
@@ -9704,6 +9737,7 @@ def cleanup_router_config_variables():
9704
  app.include_router(router)
9705
  app.include_router(fine_tuning_router)
9706
  app.include_router(vertex_router)
 
9707
  app.include_router(pass_through_router)
9708
  app.include_router(health_router)
9709
  app.include_router(key_management_router)
 
148
  html_form,
149
  show_missing_vars_in_env,
150
  )
151
+ from litellm.proxy.common_utils.callback_utils import (
152
+ get_remaining_tokens_and_requests_from_request_data,
153
+ initialize_callbacks_on_proxy,
154
+ )
155
  from litellm.proxy.common_utils.debug_utils import init_verbose_loggers
156
  from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
157
  from litellm.proxy.common_utils.encrypt_decrypt_utils import (
 
162
  _read_request_body,
163
  check_file_size_under_limit,
164
  )
 
165
  from litellm.proxy.common_utils.load_config_utils import get_file_contents_from_s3
166
  from litellm.proxy.common_utils.openai_endpoint_utils import (
167
  remove_sensitive_info_from_deployment,
 
230
  send_email,
231
  update_spend,
232
  )
233
+ from litellm.proxy.vertex_ai_endpoints.google_ai_studio_endpoints import (
234
+ router as gemini_router,
235
+ )
236
  from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import router as vertex_router
237
  from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import set_default_vertex_config
238
  from litellm.router import (
 
509
  model_region: Optional[str] = None,
510
  response_cost: Optional[Union[float, str]] = None,
511
  fastest_response_batch_completion: Optional[bool] = None,
512
+ request_data: Optional[dict] = {},
513
  **kwargs,
514
  ) -> dict:
515
  exclude_values = {"", None}
 
530
  ),
531
  **{k: str(v) for k, v in kwargs.items()},
532
  }
533
+ if request_data:
534
+ remaining_tokens_header = get_remaining_tokens_and_requests_from_request_data(
535
+ request_data
536
+ )
537
+ headers.update(remaining_tokens_header)
538
+
539
  try:
540
  return {
541
  key: value for key, value in headers.items() if value not in exclude_values
 
3120
  response_cost=response_cost,
3121
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3122
  fastest_response_batch_completion=fastest_response_batch_completion,
3123
+ request_data=data,
3124
  **additional_headers,
3125
  )
3126
  selected_data_generator = select_data_generator(
 
3155
  response_cost=response_cost,
3156
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3157
  fastest_response_batch_completion=fastest_response_batch_completion,
3158
+ request_data=data,
3159
  **additional_headers,
3160
  )
3161
  )
 
3337
  api_base=api_base,
3338
  version=version,
3339
  response_cost=response_cost,
3340
+ request_data=data,
3341
  )
3342
  selected_data_generator = select_data_generator(
3343
  response=response,
 
3359
  api_base=api_base,
3360
  version=version,
3361
  response_cost=response_cost,
3362
+ request_data=data,
3363
  )
3364
  )
3365
  await check_response_size_is_safe(response=response)
 
3567
  response_cost=response_cost,
3568
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3569
  call_id=litellm_call_id,
3570
+ request_data=data,
3571
  )
3572
  )
3573
  await check_response_size_is_safe(response=response)
 
3694
  response_cost=response_cost,
3695
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3696
  call_id=litellm_call_id,
3697
+ request_data=data,
3698
  )
3699
  )
3700
 
 
3816
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3817
  fastest_response_batch_completion=None,
3818
  call_id=litellm_call_id,
3819
+ request_data=data,
3820
  )
3821
 
3822
  selected_data_generator = select_data_generator(
 
3954
  response_cost=response_cost,
3955
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
3956
  call_id=litellm_call_id,
3957
+ request_data=data,
3958
  )
3959
  )
3960
 
 
4058
  api_base=api_base,
4059
  version=version,
4060
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4061
+ request_data=data,
4062
  )
4063
  )
4064
 
 
4154
  api_base=api_base,
4155
  version=version,
4156
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4157
+ request_data=data,
4158
  )
4159
  )
4160
 
 
4250
  api_base=api_base,
4251
  version=version,
4252
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4253
+ request_data=data,
4254
  )
4255
  )
4256
 
 
4346
  api_base=api_base,
4347
  version=version,
4348
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4349
+ request_data=data,
4350
  )
4351
  )
4352
 
 
4441
  api_base=api_base,
4442
  version=version,
4443
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4444
+ request_data=data,
4445
  )
4446
  )
4447
 
 
4539
  api_base=api_base,
4540
  version=version,
4541
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4542
+ request_data=data,
4543
  )
4544
  )
4545
 
 
4633
  api_base=api_base,
4634
  version=version,
4635
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4636
+ request_data=data,
4637
  )
4638
  )
4639
 
 
4741
  api_base=api_base,
4742
  version=version,
4743
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4744
+ request_data=data,
4745
  )
4746
  )
4747
 
 
4864
  api_base=api_base,
4865
  version=version,
4866
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4867
+ request_data=data,
4868
  )
4869
  )
4870
 
 
4960
  api_base=api_base,
4961
  version=version,
4962
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
4963
+ request_data=data,
4964
  )
4965
  )
4966
 
 
5179
  api_base=api_base,
5180
  version=version,
5181
  model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
5182
+ request_data=data,
5183
  )
5184
  )
5185
 
 
5349
  api_base=api_base,
5350
  version=version,
5351
  response_cost=response_cost,
5352
+ request_data=data,
5353
  )
5354
  )
5355
 
 
9737
  app.include_router(router)
9738
  app.include_router(fine_tuning_router)
9739
  app.include_router(vertex_router)
9740
+ app.include_router(gemini_router)
9741
  app.include_router(pass_through_router)
9742
  app.include_router(health_router)
9743
  app.include_router(key_management_router)
.venv/lib/python3.12/site-packages/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ What is this?
3
+
4
+ Google AI Studio Pass-Through Endpoints
5
+ """
6
+
7
+ """
8
+ 1. Create pass-through endpoints for any LITELLM_BASE_URL/gemini/<endpoint> map to https://generativelanguage.googleapis.com/<endpoint>
9
+ """
10
+
11
+ import ast
12
+ import asyncio
13
+ import traceback
14
+ from datetime import datetime, timedelta, timezone
15
+ from typing import List, Optional
16
+ from urllib.parse import urlencode
17
+
18
+ import fastapi
19
+ import httpx
20
+ from fastapi import (
21
+ APIRouter,
22
+ Depends,
23
+ File,
24
+ Form,
25
+ Header,
26
+ HTTPException,
27
+ Request,
28
+ Response,
29
+ UploadFile,
30
+ status,
31
+ )
32
+ from starlette.datastructures import QueryParams
33
+
34
+ import litellm
35
+ from litellm._logging import verbose_proxy_logger
36
+ from litellm.batches.main import FileObject
37
+ from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
38
+ from litellm.proxy._types import *
39
+ from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
40
+ from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
41
+ create_pass_through_route,
42
+ )
43
+
44
+ router = APIRouter()
45
+ default_vertex_config = None
46
+
47
+
48
+ @router.api_route("/gemini/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"])
49
+ async def gemini_proxy_route(
50
+ endpoint: str,
51
+ request: Request,
52
+ fastapi_response: Response,
53
+ ):
54
+ ## CHECK FOR LITELLM API KEY IN THE QUERY PARAMS - ?..key=LITELLM_API_KEY
55
+ api_key = request.query_params.get("key")
56
+
57
+ user_api_key_dict = await user_api_key_auth(
58
+ request=request, api_key="Bearer {}".format(api_key)
59
+ )
60
+
61
+ base_target_url = "https://generativelanguage.googleapis.com"
62
+ encoded_endpoint = httpx.URL(endpoint).path
63
+
64
+ # Ensure endpoint starts with '/' for proper URL construction
65
+ if not encoded_endpoint.startswith("/"):
66
+ encoded_endpoint = "/" + encoded_endpoint
67
+
68
+ # Construct the full target URL using httpx
69
+ base_url = httpx.URL(base_target_url)
70
+ updated_url = base_url.copy_with(path=encoded_endpoint)
71
+
72
+ # Add or update query parameters
73
+ gemini_api_key = litellm.utils.get_secret(secret_name="GEMINI_API_KEY")
74
+ # Merge query parameters, giving precedence to those in updated_url
75
+ merged_params = dict(request.query_params)
76
+ merged_params.update({"key": gemini_api_key})
77
+
78
+ ## check for streaming
79
+ is_streaming_request = False
80
+ if "stream" in str(updated_url):
81
+ is_streaming_request = True
82
+
83
+ ## CREATE PASS-THROUGH
84
+ endpoint_func = create_pass_through_route(
85
+ endpoint=endpoint,
86
+ target=str(updated_url),
87
+ ) # dynamically construct pass-through endpoint based on incoming path
88
+ received_value = await endpoint_func(
89
+ request,
90
+ fastapi_response,
91
+ user_api_key_dict,
92
+ query_params=merged_params,
93
+ stream=is_streaming_request,
94
+ )
95
+
96
+ return received_value
97
+
98
+
99
+ @router.api_route("/cohere/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"])
100
+ async def cohere_proxy_route(
101
+ endpoint: str,
102
+ request: Request,
103
+ fastapi_response: Response,
104
+ user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
105
+ ):
106
+ base_target_url = "https://api.cohere.com"
107
+ encoded_endpoint = httpx.URL(endpoint).path
108
+
109
+ # Ensure endpoint starts with '/' for proper URL construction
110
+ if not encoded_endpoint.startswith("/"):
111
+ encoded_endpoint = "/" + encoded_endpoint
112
+
113
+ # Construct the full target URL using httpx
114
+ base_url = httpx.URL(base_target_url)
115
+ updated_url = base_url.copy_with(path=encoded_endpoint)
116
+
117
+ # Add or update query parameters
118
+ cohere_api_key = litellm.utils.get_secret(secret_name="COHERE_API_KEY")
119
+
120
+ ## check for streaming
121
+ is_streaming_request = False
122
+ if "stream" in str(updated_url):
123
+ is_streaming_request = True
124
+
125
+ ## CREATE PASS-THROUGH
126
+ endpoint_func = create_pass_through_route(
127
+ endpoint=endpoint,
128
+ target=str(updated_url),
129
+ custom_headers={"Authorization": "Bearer {}".format(cohere_api_key)},
130
+ ) # dynamically construct pass-through endpoint based on incoming path
131
+ received_value = await endpoint_func(
132
+ request,
133
+ fastapi_response,
134
+ user_api_key_dict,
135
+ stream=is_streaming_request,
136
+ )
137
+
138
+ return received_value
.venv/lib/python3.12/site-packages/litellm/router.py CHANGED
@@ -421,6 +421,7 @@ class Router:
421
  routing_strategy=routing_strategy,
422
  routing_strategy_args=routing_strategy_args,
423
  )
 
424
  ## USAGE TRACKING ##
425
  if isinstance(litellm._async_success_callback, list):
426
  litellm._async_success_callback.append(self.deployment_callback_on_success)
@@ -4116,6 +4117,22 @@ class Router:
4116
  return self.model_list
4117
  return None
4118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4119
  def get_settings(self):
4120
  """
4121
  Get router settings method, returns a dictionary of the settings and their values.
 
421
  routing_strategy=routing_strategy,
422
  routing_strategy_args=routing_strategy_args,
423
  )
424
+ self.access_groups = None
425
  ## USAGE TRACKING ##
426
  if isinstance(litellm._async_success_callback, list):
427
  litellm._async_success_callback.append(self.deployment_callback_on_success)
 
4117
  return self.model_list
4118
  return None
4119
 
4120
+ def get_model_access_groups(self):
4121
+ from collections import defaultdict
4122
+
4123
+ access_groups = defaultdict(list)
4124
+ if self.access_groups:
4125
+ return self.access_groups
4126
+
4127
+ if self.model_list:
4128
+ for m in self.model_list:
4129
+ for group in m.get("model_info", {}).get("access_groups", []):
4130
+ model_name = m["model_name"]
4131
+ access_groups[group].append(model_name)
4132
+ # set access groups
4133
+ self.access_groups = access_groups
4134
+ return access_groups
4135
+
4136
  def get_settings(self):
4137
  """
4138
  Get router settings method, returns a dictionary of the settings and their values.
.venv/lib/python3.12/site-packages/litellm/tests/test_anthropic_completion.py CHANGED
@@ -10,6 +10,7 @@ from dotenv import load_dotenv
10
 
11
  import litellm.types
12
  import litellm.types.utils
 
13
 
14
  load_dotenv()
15
  import io
@@ -150,6 +151,74 @@ def test_anthropic_completion_e2e(stream):
150
  assert message_stop_received is True
151
 
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  @pytest.mark.asyncio
154
  async def test_anthropic_router_completion_e2e():
155
  litellm.set_verbose = True
@@ -275,4 +344,4 @@ def test_anthropic_tool_calling_translation():
275
  print(translated_params["messages"])
276
 
277
  assert len(translated_params["messages"]) > 0
278
- assert translated_params["messages"][0]["role"] == "user"
 
10
 
11
  import litellm.types
12
  import litellm.types.utils
13
+ from litellm.llms.anthropic import ModelResponseIterator
14
 
15
  load_dotenv()
16
  import io
 
151
  assert message_stop_received is True
152
 
153
 
154
+ anthropic_chunk_list = [
155
+ {"type": "content_block_start", "index": 0, "content_block": {"type": "text", "text": ""}},
156
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "To"}},
157
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " answer"}},
158
+ {"type": "content_block_delta", "index": 0,
159
+ "delta": {"type": "text_delta", "text": " your question about the weather"}},
160
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " in Boston and Los"}},
161
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " Angeles today, I'll"}},
162
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " need to"}},
163
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " use"}},
164
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " the"}},
165
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " get_current_weather"}},
166
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " function"}},
167
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " for"}},
168
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " both"}},
169
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " cities"}},
170
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": ". Let"}},
171
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " me fetch"}},
172
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " that"}},
173
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " information"}},
174
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " for"}},
175
+ {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " you."}},
176
+ {"type": "content_block_stop", "index": 0},
177
+ {"type": "content_block_start", "index": 1,
178
+ "content_block": {"type": "tool_use", "id": "toolu_12345", "name": "get_current_weather", "input": {}}},
179
+ {"type": "content_block_delta", "index": 1, "delta": {"type": "input_json_delta", "partial_json": ""}},
180
+ {"type": "content_block_delta", "index": 1, "delta": {"type": "input_json_delta", "partial_json": "{\"locat"}},
181
+ {"type": "content_block_delta", "index": 1, "delta": {"type": "input_json_delta", "partial_json": "ion\": \"Bos"}},
182
+ {"type": "content_block_delta", "index": 1, "delta": {"type": "input_json_delta", "partial_json": "ton, MA\"}"}},
183
+ {"type": "content_block_stop", "index": 1},
184
+ {"type": "content_block_start", "index": 2,
185
+ "content_block": {"type": "tool_use", "id": "toolu_023423423", "name": "get_current_weather", "input": {}}},
186
+ {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": ""}},
187
+ {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "{\"l"}},
188
+ {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "oca"}},
189
+ {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "tio"}},
190
+ {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "n\": \"Lo"}},
191
+ {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "s Angel"}},
192
+ {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "es, CA\"}"}},
193
+ {"type": "content_block_stop", "index": 2},
194
+ {"type": "message_delta", "delta": {"stop_reason": "tool_use", "stop_sequence": None},
195
+ "usage": {"output_tokens": 137}},
196
+ {"type": "message_stop"}
197
+ ]
198
+
199
+
200
+ def test_anthropic_tool_streaming():
201
+ """
202
+ OpenAI starts tool_use indexes at 0 for the first tool, regardless of preceding text.
203
+
204
+ Anthropic gives tool_use indexes starting at the first chunk, meaning they often start at 1
205
+ when they should start at 0
206
+ """
207
+ litellm.set_verbose = True
208
+ response_iter = ModelResponseIterator([], False)
209
+
210
+ # First index is 0, we'll start earlier because incrementing is easier
211
+ correct_tool_index = -1
212
+ for chunk in anthropic_chunk_list:
213
+ parsed_chunk = response_iter.chunk_parser(chunk)
214
+ if tool_use := parsed_chunk.get('tool_use'):
215
+
216
+ # We only increment when a new block starts
217
+ if tool_use.get('id') is not None:
218
+ correct_tool_index += 1
219
+ assert tool_use['index'] == correct_tool_index
220
+
221
+
222
  @pytest.mark.asyncio
223
  async def test_anthropic_router_completion_e2e():
224
  litellm.set_verbose = True
 
344
  print(translated_params["messages"])
345
 
346
  assert len(translated_params["messages"]) > 0
347
+ assert translated_params["messages"][0]["role"] == "user"
.venv/lib/python3.12/site-packages/litellm/tests/test_key_generate_prisma.py CHANGED
@@ -2710,3 +2710,168 @@ async def test_custom_api_key_header_name(prisma_client):
2710
  pass
2711
 
2712
  # this should pass because X-Litellm-Key is valid
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2710
  pass
2711
 
2712
  # this should pass because X-Litellm-Key is valid
2713
+
2714
+
2715
+ @pytest.mark.asyncio()
2716
+ async def test_generate_key_with_model_tpm_limit(prisma_client):
2717
+ print("prisma client=", prisma_client)
2718
+
2719
+ setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
2720
+ setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
2721
+ await litellm.proxy.proxy_server.prisma_client.connect()
2722
+ request = GenerateKeyRequest(
2723
+ metadata={
2724
+ "team": "litellm-team3",
2725
+ "model_tpm_limit": {"gpt-4": 100},
2726
+ "model_rpm_limit": {"gpt-4": 2},
2727
+ }
2728
+ )
2729
+ key = await generate_key_fn(
2730
+ data=request,
2731
+ user_api_key_dict=UserAPIKeyAuth(
2732
+ user_role=LitellmUserRoles.PROXY_ADMIN,
2733
+ api_key="sk-1234",
2734
+ user_id="1234",
2735
+ ),
2736
+ )
2737
+ print(key)
2738
+
2739
+ generated_key = key.key
2740
+
2741
+ # use generated key to auth in
2742
+ result = await info_key_fn(key=generated_key)
2743
+ print("result from info_key_fn", result)
2744
+ assert result["key"] == generated_key
2745
+ print("\n info for key=", result["info"])
2746
+ assert result["info"]["metadata"] == {
2747
+ "team": "litellm-team3",
2748
+ "model_tpm_limit": {"gpt-4": 100},
2749
+ "model_rpm_limit": {"gpt-4": 2},
2750
+ }
2751
+
2752
+ # Update model tpm_limit and rpm_limit
2753
+ request = UpdateKeyRequest(
2754
+ key=generated_key,
2755
+ model_tpm_limit={"gpt-4": 200},
2756
+ model_rpm_limit={"gpt-4": 3},
2757
+ )
2758
+ _request = Request(scope={"type": "http"})
2759
+ _request._url = URL(url="/update/key")
2760
+
2761
+ await update_key_fn(data=request, request=_request)
2762
+ result = await info_key_fn(key=generated_key)
2763
+ print("result from info_key_fn", result)
2764
+ assert result["key"] == generated_key
2765
+ print("\n info for key=", result["info"])
2766
+ assert result["info"]["metadata"] == {
2767
+ "team": "litellm-team3",
2768
+ "model_tpm_limit": {"gpt-4": 200},
2769
+ "model_rpm_limit": {"gpt-4": 3},
2770
+ }
2771
+
2772
+
2773
+ @pytest.mark.asyncio()
2774
+ async def test_team_access_groups(prisma_client):
2775
+ """
2776
+ Test team based model access groups
2777
+
2778
+ - Test calling a model in the access group -> pass
2779
+ - Test calling a model not in the access group -> fail
2780
+ """
2781
+ litellm.set_verbose = True
2782
+ setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
2783
+ setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
2784
+ await litellm.proxy.proxy_server.prisma_client.connect()
2785
+ # create router with access groups
2786
+ litellm_router = litellm.Router(
2787
+ model_list=[
2788
+ {
2789
+ "model_name": "gemini-pro-vision",
2790
+ "litellm_params": {
2791
+ "model": "vertex_ai/gemini-1.0-pro-vision-001",
2792
+ },
2793
+ "model_info": {"access_groups": ["beta-models"]},
2794
+ },
2795
+ {
2796
+ "model_name": "gpt-4o",
2797
+ "litellm_params": {
2798
+ "model": "gpt-4o",
2799
+ },
2800
+ "model_info": {"access_groups": ["beta-models"]},
2801
+ },
2802
+ ]
2803
+ )
2804
+ setattr(litellm.proxy.proxy_server, "llm_router", litellm_router)
2805
+
2806
+ # Create team with models=["beta-models"]
2807
+ team_request = NewTeamRequest(
2808
+ team_alias="testing-team",
2809
+ models=["beta-models"],
2810
+ )
2811
+
2812
+ new_team_response = await new_team(
2813
+ data=team_request,
2814
+ user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
2815
+ http_request=Request(scope={"type": "http"}),
2816
+ )
2817
+ print("new_team_response", new_team_response)
2818
+ created_team_id = new_team_response["team_id"]
2819
+
2820
+ # create key with team_id=created_team_id
2821
+ request = GenerateKeyRequest(
2822
+ team_id=created_team_id,
2823
+ )
2824
+
2825
+ key = await generate_key_fn(
2826
+ data=request,
2827
+ user_api_key_dict=UserAPIKeyAuth(
2828
+ user_role=LitellmUserRoles.PROXY_ADMIN,
2829
+ api_key="sk-1234",
2830
+ user_id="1234",
2831
+ ),
2832
+ )
2833
+ print(key)
2834
+
2835
+ generated_key = key.key
2836
+ bearer_token = "Bearer " + generated_key
2837
+
2838
+ request = Request(scope={"type": "http"})
2839
+ request._url = URL(url="/chat/completions")
2840
+
2841
+ for model in ["gpt-4o", "gemini-pro-vision"]:
2842
+ # Expect these to pass
2843
+ async def return_body():
2844
+ return_string = f'{{"model": "{model}"}}'
2845
+ # return string as bytes
2846
+ return return_string.encode()
2847
+
2848
+ request.body = return_body
2849
+
2850
+ # use generated key to auth in
2851
+ print(
2852
+ "Bearer token being sent to user_api_key_auth() - {}".format(bearer_token)
2853
+ )
2854
+ result = await user_api_key_auth(request=request, api_key=bearer_token)
2855
+
2856
+ for model in ["gpt-4", "gpt-4o-mini", "gemini-experimental"]:
2857
+ # Expect these to fail
2858
+ async def return_body_2():
2859
+ return_string = f'{{"model": "{model}"}}'
2860
+ # return string as bytes
2861
+ return return_string.encode()
2862
+
2863
+ request.body = return_body_2
2864
+
2865
+ # use generated key to auth in
2866
+ print(
2867
+ "Bearer token being sent to user_api_key_auth() - {}".format(bearer_token)
2868
+ )
2869
+ try:
2870
+ result = await user_api_key_auth(request=request, api_key=bearer_token)
2871
+ pytest.fail(f"This should have failed!. IT's an invalid model")
2872
+ except Exception as e:
2873
+ print("got exception", e)
2874
+ assert (
2875
+ "not allowed to call model" in e.message
2876
+ and "Allowed team models" in e.message
2877
+ )
.venv/lib/python3.12/site-packages/litellm/tests/test_least_busy_routing.py CHANGED
@@ -1,8 +1,13 @@
1
  #### What this tests ####
2
  # This tests the router's ability to identify the least busy deployment
3
 
4
- import sys, os, asyncio, time, random
 
 
 
 
5
  import traceback
 
6
  from dotenv import load_dotenv
7
 
8
  load_dotenv()
@@ -12,10 +17,11 @@ sys.path.insert(
12
  0, os.path.abspath("../..")
13
  ) # Adds the parent directory to the system path
14
  import pytest
15
- from litellm import Router
16
  import litellm
17
- from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
18
  from litellm.caching import DualCache
 
19
 
20
  ### UNIT TESTS FOR LEAST BUSY LOGGING ###
21
 
@@ -123,6 +129,9 @@ def test_router_get_available_deployments():
123
 
124
  return_dict = router.cache.get_cache(key=cache_key)
125
 
 
 
 
126
  assert router.leastbusy_logger.logged_success == 1
127
  assert return_dict[1] == 10
128
  assert return_dict[2] == 54
 
1
  #### What this tests ####
2
  # This tests the router's ability to identify the least busy deployment
3
 
4
+ import asyncio
5
+ import os
6
+ import random
7
+ import sys
8
+ import time
9
  import traceback
10
+
11
  from dotenv import load_dotenv
12
 
13
  load_dotenv()
 
17
  0, os.path.abspath("../..")
18
  ) # Adds the parent directory to the system path
19
  import pytest
20
+
21
  import litellm
22
+ from litellm import Router
23
  from litellm.caching import DualCache
24
+ from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
25
 
26
  ### UNIT TESTS FOR LEAST BUSY LOGGING ###
27
 
 
129
 
130
  return_dict = router.cache.get_cache(key=cache_key)
131
 
132
+ # wait 2 seconds
133
+ time.sleep(2)
134
+
135
  assert router.leastbusy_logger.logged_success == 1
136
  assert return_dict[1] == 10
137
  assert return_dict[2] == 54
.venv/lib/python3.12/site-packages/litellm/tests/test_parallel_request_limiter.py CHANGED
@@ -908,3 +908,273 @@ async def test_bad_router_tpm_limit():
908
  )["current_tpm"]
909
  == 0
910
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
908
  )["current_tpm"]
909
  == 0
910
  )
911
+
912
+
913
+ @pytest.mark.asyncio
914
+ async def test_bad_router_tpm_limit_per_model():
915
+ model_list = [
916
+ {
917
+ "model_name": "azure-model",
918
+ "litellm_params": {
919
+ "model": "azure/gpt-turbo",
920
+ "api_key": "os.environ/AZURE_FRANCE_API_KEY",
921
+ "api_base": "https://openai-france-1234.openai.azure.com",
922
+ "rpm": 1440,
923
+ },
924
+ "model_info": {"id": 1},
925
+ },
926
+ {
927
+ "model_name": "azure-model",
928
+ "litellm_params": {
929
+ "model": "azure/gpt-35-turbo",
930
+ "api_key": "os.environ/AZURE_EUROPE_API_KEY",
931
+ "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
932
+ "rpm": 6,
933
+ },
934
+ "model_info": {"id": 2},
935
+ },
936
+ ]
937
+ router = Router(
938
+ model_list=model_list,
939
+ set_verbose=False,
940
+ num_retries=3,
941
+ ) # type: ignore
942
+
943
+ _api_key = "sk-12345"
944
+ _api_key = hash_token(_api_key)
945
+ model = "azure-model"
946
+
947
+ user_api_key_dict = UserAPIKeyAuth(
948
+ api_key=_api_key,
949
+ max_parallel_requests=10,
950
+ tpm_limit=10,
951
+ metadata={
952
+ "model_rpm_limit": {model: 5},
953
+ "model_tpm_limit": {model: 5},
954
+ },
955
+ )
956
+ local_cache = DualCache()
957
+ pl = ProxyLogging(user_api_key_cache=local_cache)
958
+ pl._init_litellm_callbacks()
959
+ print(f"litellm callbacks: {litellm.callbacks}")
960
+ parallel_request_handler = pl.max_parallel_request_limiter
961
+
962
+ await parallel_request_handler.async_pre_call_hook(
963
+ user_api_key_dict=user_api_key_dict,
964
+ cache=local_cache,
965
+ data={"model": model},
966
+ call_type="",
967
+ )
968
+
969
+ current_date = datetime.now().strftime("%Y-%m-%d")
970
+ current_hour = datetime.now().strftime("%H")
971
+ current_minute = datetime.now().strftime("%M")
972
+ precise_minute = f"{current_date}-{current_hour}-{current_minute}"
973
+ request_count_api_key = f"{_api_key}::{model}::{precise_minute}::request_count"
974
+
975
+ print(
976
+ "internal usage cache: ",
977
+ parallel_request_handler.internal_usage_cache.in_memory_cache.cache_dict,
978
+ )
979
+
980
+ assert (
981
+ parallel_request_handler.internal_usage_cache.get_cache(
982
+ key=request_count_api_key
983
+ )["current_requests"]
984
+ == 1
985
+ )
986
+
987
+ # bad call
988
+ try:
989
+ response = await router.acompletion(
990
+ model=model,
991
+ messages=[{"role": "user2", "content": "Write me a paragraph on the moon"}],
992
+ stream=True,
993
+ metadata={"user_api_key": _api_key},
994
+ )
995
+ except:
996
+ pass
997
+ await asyncio.sleep(1) # success is done in a separate thread
998
+
999
+ assert (
1000
+ parallel_request_handler.internal_usage_cache.get_cache(
1001
+ key=request_count_api_key
1002
+ )["current_tpm"]
1003
+ == 0
1004
+ )
1005
+
1006
+
1007
+ @pytest.mark.asyncio
1008
+ async def test_pre_call_hook_rpm_limits_per_model():
1009
+ """
1010
+ Test if error raised on hitting rpm limits for a given model
1011
+ """
1012
+ import logging
1013
+
1014
+ from litellm._logging import (
1015
+ verbose_logger,
1016
+ verbose_proxy_logger,
1017
+ verbose_router_logger,
1018
+ )
1019
+
1020
+ verbose_logger.setLevel(logging.DEBUG)
1021
+ verbose_proxy_logger.setLevel(logging.DEBUG)
1022
+ verbose_router_logger.setLevel(logging.DEBUG)
1023
+
1024
+ _api_key = "sk-12345"
1025
+ _api_key = hash_token(_api_key)
1026
+ user_api_key_dict = UserAPIKeyAuth(
1027
+ api_key=_api_key,
1028
+ max_parallel_requests=100,
1029
+ tpm_limit=900000,
1030
+ rpm_limit=100000,
1031
+ metadata={
1032
+ "model_rpm_limit": {"azure-model": 1},
1033
+ },
1034
+ )
1035
+ local_cache = DualCache()
1036
+ pl = ProxyLogging(user_api_key_cache=local_cache)
1037
+ pl._init_litellm_callbacks()
1038
+ print(f"litellm callbacks: {litellm.callbacks}")
1039
+ parallel_request_handler = pl.max_parallel_request_limiter
1040
+
1041
+ await parallel_request_handler.async_pre_call_hook(
1042
+ user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
1043
+ )
1044
+
1045
+ model = "azure-model"
1046
+
1047
+ kwargs = {
1048
+ "model": model,
1049
+ "litellm_params": {
1050
+ "metadata": {"user_api_key": _api_key, "model_group": model}
1051
+ },
1052
+ }
1053
+
1054
+ await parallel_request_handler.async_log_success_event(
1055
+ kwargs=kwargs,
1056
+ response_obj="",
1057
+ start_time="",
1058
+ end_time="",
1059
+ )
1060
+
1061
+ ## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
1062
+
1063
+ try:
1064
+ await parallel_request_handler.async_pre_call_hook(
1065
+ user_api_key_dict=user_api_key_dict,
1066
+ cache=local_cache,
1067
+ data={"model": model},
1068
+ call_type="",
1069
+ )
1070
+
1071
+ pytest.fail(f"Expected call to fail")
1072
+ except Exception as e:
1073
+ assert e.status_code == 429
1074
+ print("got error=", e)
1075
+ assert (
1076
+ "limit reached Hit RPM limit for model: azure-model on api_key: c11e7177eb60c80cf983ddf8ca98f2dc1272d4c612204ce9bedd2460b18939cc"
1077
+ in str(e)
1078
+ )
1079
+
1080
+
1081
+ @pytest.mark.asyncio
1082
+ async def test_pre_call_hook_tpm_limits_per_model():
1083
+ """
1084
+ Test if error raised on hitting tpm limits for a given model
1085
+ """
1086
+ import logging
1087
+
1088
+ from litellm._logging import (
1089
+ verbose_logger,
1090
+ verbose_proxy_logger,
1091
+ verbose_router_logger,
1092
+ )
1093
+
1094
+ verbose_logger.setLevel(logging.DEBUG)
1095
+ verbose_proxy_logger.setLevel(logging.DEBUG)
1096
+ verbose_router_logger.setLevel(logging.DEBUG)
1097
+
1098
+ _api_key = "sk-12345"
1099
+ _api_key = hash_token(_api_key)
1100
+ user_api_key_dict = UserAPIKeyAuth(
1101
+ api_key=_api_key,
1102
+ max_parallel_requests=100,
1103
+ tpm_limit=900000,
1104
+ rpm_limit=100000,
1105
+ metadata={
1106
+ "model_tpm_limit": {"azure-model": 1},
1107
+ "model_rpm_limit": {"azure-model": 100},
1108
+ },
1109
+ )
1110
+ local_cache = DualCache()
1111
+ pl = ProxyLogging(user_api_key_cache=local_cache)
1112
+ pl._init_litellm_callbacks()
1113
+ print(f"litellm callbacks: {litellm.callbacks}")
1114
+ parallel_request_handler = pl.max_parallel_request_limiter
1115
+ model = "azure-model"
1116
+
1117
+ await parallel_request_handler.async_pre_call_hook(
1118
+ user_api_key_dict=user_api_key_dict,
1119
+ cache=local_cache,
1120
+ data={"model": model},
1121
+ call_type="",
1122
+ )
1123
+
1124
+ kwargs = {
1125
+ "model": model,
1126
+ "litellm_params": {
1127
+ "metadata": {"user_api_key": _api_key, "model_group": model}
1128
+ },
1129
+ }
1130
+
1131
+ await parallel_request_handler.async_log_success_event(
1132
+ kwargs=kwargs,
1133
+ response_obj=litellm.ModelResponse(usage=litellm.Usage(total_tokens=11)),
1134
+ start_time="",
1135
+ end_time="",
1136
+ )
1137
+
1138
+ current_date = datetime.now().strftime("%Y-%m-%d")
1139
+ current_hour = datetime.now().strftime("%H")
1140
+ current_minute = datetime.now().strftime("%M")
1141
+ precise_minute = f"{current_date}-{current_hour}-{current_minute}"
1142
+ request_count_api_key = f"{_api_key}::{model}::{precise_minute}::request_count"
1143
+
1144
+ print(
1145
+ "internal usage cache: ",
1146
+ parallel_request_handler.internal_usage_cache.in_memory_cache.cache_dict,
1147
+ )
1148
+
1149
+ assert (
1150
+ parallel_request_handler.internal_usage_cache.get_cache(
1151
+ key=request_count_api_key
1152
+ )["current_tpm"]
1153
+ == 11
1154
+ )
1155
+
1156
+ assert (
1157
+ parallel_request_handler.internal_usage_cache.get_cache(
1158
+ key=request_count_api_key
1159
+ )["current_rpm"]
1160
+ == 1
1161
+ )
1162
+
1163
+ ## Expected cache val: {"current_requests": 0, "current_tpm": 11, "current_rpm": "1"}
1164
+
1165
+ try:
1166
+ await parallel_request_handler.async_pre_call_hook(
1167
+ user_api_key_dict=user_api_key_dict,
1168
+ cache=local_cache,
1169
+ data={"model": model},
1170
+ call_type="",
1171
+ )
1172
+
1173
+ pytest.fail(f"Expected call to fail")
1174
+ except Exception as e:
1175
+ assert e.status_code == 429
1176
+ print("got error=", e)
1177
+ assert (
1178
+ "request limit reached Hit TPM limit for model: azure-model on api_key"
1179
+ in str(e)
1180
+ )
.venv/lib/python3.12/site-packages/litellm/tests/test_pass_through_endpoints.py CHANGED
@@ -11,6 +11,7 @@ sys.path.insert(
11
  ) # Adds-the parent directory to the system path
12
 
13
  import asyncio
 
14
 
15
  import httpx
16
 
@@ -19,7 +20,9 @@ from litellm.proxy.proxy_server import app, initialize_pass_through_endpoints
19
 
20
  # Mock the async_client used in the pass_through_request function
21
  async def mock_request(*args, **kwargs):
22
- return httpx.Response(200, json={"message": "Mocked response"})
 
 
23
 
24
 
25
  @pytest.fixture
 
11
  ) # Adds-the parent directory to the system path
12
 
13
  import asyncio
14
+ from unittest.mock import Mock
15
 
16
  import httpx
17
 
 
20
 
21
  # Mock the async_client used in the pass_through_request function
22
  async def mock_request(*args, **kwargs):
23
+ mock_response = httpx.Response(200, json={"message": "Mocked response"})
24
+ mock_response.request = Mock(spec=httpx.Request)
25
+ return mock_response
26
 
27
 
28
  @pytest.fixture
.venv/lib/python3.12/site-packages/litellm/tests/test_proxy_server.py CHANGED
@@ -1166,3 +1166,52 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
1166
  assert new_data["success_callback"] == ["langfuse"]
1167
  assert "langfuse_public_key" in new_data
1168
  assert "langfuse_secret_key" in new_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1166
  assert new_data["success_callback"] == ["langfuse"]
1167
  assert "langfuse_public_key" in new_data
1168
  assert "langfuse_secret_key" in new_data
1169
+
1170
+
1171
+ @pytest.mark.asyncio
1172
+ async def test_gemini_pass_through_endpoint():
1173
+ from starlette.datastructures import URL
1174
+
1175
+ from litellm.proxy.vertex_ai_endpoints.google_ai_studio_endpoints import (
1176
+ Request,
1177
+ Response,
1178
+ gemini_proxy_route,
1179
+ )
1180
+
1181
+ body = b"""
1182
+ {
1183
+ "contents": [{
1184
+ "parts":[{
1185
+ "text": "The quick brown fox jumps over the lazy dog."
1186
+ }]
1187
+ }]
1188
+ }
1189
+ """
1190
+
1191
+ # Construct the scope dictionary
1192
+ scope = {
1193
+ "type": "http",
1194
+ "method": "POST",
1195
+ "path": "/gemini/v1beta/models/gemini-1.5-flash:countTokens",
1196
+ "query_string": b"key=sk-1234",
1197
+ "headers": [
1198
+ (b"content-type", b"application/json"),
1199
+ ],
1200
+ }
1201
+
1202
+ # Create a new Request object
1203
+ async def async_receive():
1204
+ return {"type": "http.request", "body": body, "more_body": False}
1205
+
1206
+ request = Request(
1207
+ scope=scope,
1208
+ receive=async_receive,
1209
+ )
1210
+
1211
+ resp = await gemini_proxy_route(
1212
+ endpoint="v1beta/models/gemini-1.5-flash:countTokens?key=sk-1234",
1213
+ request=request,
1214
+ fastapi_response=Response(),
1215
+ )
1216
+
1217
+ print(resp.body)
.venv/lib/python3.12/site-packages/naptha_sdk-0.1.0.dist-info/RECORD CHANGED
@@ -1,6 +1,6 @@
1
  ../../../bin/naptha,sha256=caDahce1fdPiWx_e7NogUNj_x-F3pfMRk20TJJoEoxg,265
2
  naptha_sdk/agent_service.py,sha256=BtyqmyaxRVN0DeSUMgIO8XBLbMe2vGBEpwNcdQbLXgk,2538
3
- naptha_sdk/agent_service_engine.py,sha256=mhoVx1PEqPrsP87EPqea_k4awHFBybP26xK7kDoDlTA,6604
4
  naptha_sdk/app.py,sha256=CpI8ZCnt4nKBkNujORSAeOLJIKuInqrKtl6IOeynZXo,4116
5
  naptha_sdk/client/comms/http_client.py,sha256=JKz-EUHKVIIAaFDB3w5WHJ_Bv4og4YDGBXGglyDMFSE,8390
6
  naptha_sdk/client/comms/ws_client.py,sha256=KPSNNQ0pwaJBRUNPBZ4WAREqSo2QFejjtfXYaLAzMs0,10577
 
1
  ../../../bin/naptha,sha256=caDahce1fdPiWx_e7NogUNj_x-F3pfMRk20TJJoEoxg,265
2
  naptha_sdk/agent_service.py,sha256=BtyqmyaxRVN0DeSUMgIO8XBLbMe2vGBEpwNcdQbLXgk,2538
3
+ naptha_sdk/agent_service_engine.py,sha256=vfHDoYNM1lP8hvQ5SmTP9qhKiqlyaLAktqsgIzw8mgU,6605
4
  naptha_sdk/app.py,sha256=CpI8ZCnt4nKBkNujORSAeOLJIKuInqrKtl6IOeynZXo,4116
5
  naptha_sdk/client/comms/http_client.py,sha256=JKz-EUHKVIIAaFDB3w5WHJ_Bv4og4YDGBXGglyDMFSE,8390
6
  naptha_sdk/client/comms/ws_client.py,sha256=KPSNNQ0pwaJBRUNPBZ4WAREqSo2QFejjtfXYaLAzMs0,10577
.venv/lib/python3.12/site-packages/naptha_sdk-0.1.0.dist-info/direct_url.json CHANGED
@@ -1 +1 @@
1
- {"url": "https://github.com/NapthaAI/naptha-sdk.git", "vcs_info": {"vcs": "git", "requested_revision": "feat/single-file", "commit_id": "d3a067838b217740a1c513a9565b1cb98d92fc5b"}}
 
1
+ {"url": "https://github.com/NapthaAI/naptha-sdk.git", "vcs_info": {"vcs": "git", "requested_revision": "feat/single-file", "commit_id": "2e4060d3d4fc493f57133b8253ed5c78335e9ef0"}}
.venv/lib/python3.12/site-packages/naptha_sdk/agent_service_engine.py CHANGED
@@ -22,7 +22,7 @@ async def run_agent_service(agent_service, flow_run, parameters) -> None:
22
  await agent_service_engine.complete()
23
  break
24
  time.sleep(3)
25
- return agent_service_engine.agent_serviceresult[-1]
26
  except Exception as e:
27
  logger.error(f"An error occurred: {str(e)}")
28
  await agent_service_engine.fail()
 
22
  await agent_service_engine.complete()
23
  break
24
  time.sleep(3)
25
+ return agent_service_engine.agent_service_result[-1]
26
  except Exception as e:
27
  logger.error(f"An error occurred: {str(e)}")
28
  await agent_service_engine.fail()
.venv/src/naptha-sdk/naptha_sdk/agent_service_engine.py CHANGED
@@ -22,7 +22,7 @@ async def run_agent_service(agent_service, flow_run, parameters) -> None:
22
  await agent_service_engine.complete()
23
  break
24
  time.sleep(3)
25
- return agent_service_engine.agent_serviceresult[-1]
26
  except Exception as e:
27
  logger.error(f"An error occurred: {str(e)}")
28
  await agent_service_engine.fail()
 
22
  await agent_service_engine.complete()
23
  break
24
  time.sleep(3)
25
+ return agent_service_engine.agent_service_result[-1]
26
  except Exception as e:
27
  logger.error(f"An error occurred: {str(e)}")
28
  await agent_service_engine.fail()
poetry.lock CHANGED
@@ -720,13 +720,13 @@ referencing = ">=0.31.0"
720
 
721
  [[package]]
722
  name = "litellm"
723
- version = "1.43.17"
724
  description = "Library to easily interface with LLM API providers"
725
  optional = false
726
  python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
727
  files = [
728
- {file = "litellm-1.43.17-py3-none-any.whl", hash = "sha256:f5d68c812f087b49266631e09ae78b48b3ea03cd2e04e7760162a5919c5ccec7"},
729
- {file = "litellm-1.43.17.tar.gz", hash = "sha256:8ac82b18bf6ae7c29627e8e5d89b183f075b32fb7027b17d2fb7d7d0b7cf8b7f"},
730
  ]
731
 
732
  [package.dependencies]
@@ -937,7 +937,7 @@ surrealdb = "^0.3.2"
937
  type = "git"
938
  url = "https://github.com/NapthaAI/naptha-sdk.git"
939
  reference = "feat/single-file"
940
- resolved_reference = "d3a067838b217740a1c513a9565b1cb98d92fc5b"
941
 
942
  [[package]]
943
  name = "openai"
@@ -1891,4 +1891,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
1891
  [metadata]
1892
  lock-version = "2.0"
1893
  python-versions = "^3.12"
1894
- content-hash = "a28a3e26361a34d07cfbc6466d87f5ceeefdc61b5446e19c521c782568dc42bb"
 
720
 
721
  [[package]]
722
  name = "litellm"
723
+ version = "1.43.18"
724
  description = "Library to easily interface with LLM API providers"
725
  optional = false
726
  python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
727
  files = [
728
+ {file = "litellm-1.43.18-py3-none-any.whl", hash = "sha256:68d853b4a0198a16e2260e4406a20f8d2e59bd903e019b7f3ba5a9f35ecc3e62"},
729
+ {file = "litellm-1.43.18.tar.gz", hash = "sha256:e22b20065b62663dd060be9da1e84ca05903931c41c49d35a98649ed09e79d29"},
730
  ]
731
 
732
  [package.dependencies]
 
937
  type = "git"
938
  url = "https://github.com/NapthaAI/naptha-sdk.git"
939
  reference = "feat/single-file"
940
+ resolved_reference = "2e4060d3d4fc493f57133b8253ed5c78335e9ef0"
941
 
942
  [[package]]
943
  name = "openai"
 
1891
  [metadata]
1892
  lock-version = "2.0"
1893
  python-versions = "^3.12"
1894
+ content-hash = "1cc6d2a25c4edcd20480814792d461fd48fd6f8ee09a6b9d721e77327e6bc9e1"
pyproject.toml CHANGED
@@ -7,8 +7,8 @@ readme = "README.md"
7
 
8
  [tool.poetry.dependencies]
9
  python = "^3.12"
10
- litellm = "^1.43.17"
11
  naptha-sdk = {git = "https://github.com/NapthaAI/naptha-sdk.git", rev = "feat/single-file"}
 
12
 
13
 
14
  [build-system]
 
7
 
8
  [tool.poetry.dependencies]
9
  python = "^3.12"
 
10
  naptha-sdk = {git = "https://github.com/NapthaAI/naptha-sdk.git", rev = "feat/single-file"}
11
+ litellm = "^1.43.18"
12
 
13
 
14
  [build-system]