|
|
|
import threading, requests |
|
from typing import Callable, List, Optional, Dict, Union, Any |
|
from litellm.caching import Cache |
|
import httpx |
|
|
|
input_callback: List[Union[str, Callable]] = [] |
|
success_callback: List[Union[str, Callable]] = [] |
|
failure_callback: List[Union[str, Callable]] = [] |
|
callbacks: List[Callable] = [] |
|
_async_success_callback: List[Callable] = [] |
|
pre_call_rules: List[Callable] = [] |
|
post_call_rules: List[Callable] = [] |
|
set_verbose = False |
|
email: Optional[ |
|
str |
|
] = None |
|
token: Optional[ |
|
str |
|
] = None |
|
telemetry = True |
|
max_tokens = 256 |
|
drop_params = False |
|
retry = True |
|
api_key: Optional[str] = None |
|
openai_key: Optional[str] = None |
|
azure_key: Optional[str] = None |
|
anthropic_key: Optional[str] = None |
|
replicate_key: Optional[str] = None |
|
cohere_key: Optional[str] = None |
|
maritalk_key: Optional[str] = None |
|
ai21_key: Optional[str] = None |
|
openrouter_key: Optional[str] = None |
|
huggingface_key: Optional[str] = None |
|
vertex_project: Optional[str] = None |
|
vertex_location: Optional[str] = None |
|
togetherai_api_key: Optional[str] = None |
|
baseten_key: Optional[str] = None |
|
aleph_alpha_key: Optional[str] = None |
|
nlp_cloud_key: Optional[str] = None |
|
use_client: bool = False |
|
logging: bool = True |
|
caching: bool = False |
|
caching_with_models: bool = False |
|
cache: Optional[Cache] = None |
|
model_alias_map: Dict[str, str] = {} |
|
max_budget: float = 0.0 |
|
_current_cost = 0 |
|
error_logs: Dict = {} |
|
add_function_to_prompt: bool = False |
|
client_session: Optional[httpx.Client] = None |
|
aclient_session: Optional[httpx.AsyncClient] = None |
|
model_fallbacks: Optional[List] = None |
|
model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" |
|
suppress_debug_info = False |
|
|
|
request_timeout: Optional[float] = 6000 |
|
num_retries: Optional[int] = None |
|
fallbacks: Optional[List] = None |
|
context_window_fallbacks: Optional[List] = None |
|
allowed_fails: int = 0 |
|
|
|
secret_manager_client: Optional[Any] = None |
|
|
|
|
|
def get_model_cost_map(url: str): |
|
try: |
|
with requests.get(url, timeout=5) as response: |
|
response.raise_for_status() |
|
content = response.json() |
|
return content |
|
except Exception as e: |
|
import importlib.resources |
|
import json |
|
with importlib.resources.open_text("litellm", "model_prices_and_context_window_backup.json") as f: |
|
content = json.load(f) |
|
return content |
|
model_cost = get_model_cost_map(url=model_cost_map_url) |
|
custom_prompt_dict:Dict[str, dict] = {} |
|
|
|
class MyLocal(threading.local): |
|
def __init__(self): |
|
self.user = "Hello World" |
|
|
|
|
|
_thread_context = MyLocal() |
|
|
|
|
|
def identify(event_details): |
|
|
|
if "user" in event_details: |
|
_thread_context.user = event_details["user"] |
|
|
|
|
|
|
|
api_base = None |
|
headers = None |
|
api_version = None |
|
organization = None |
|
config_path = None |
|
|
|
open_ai_chat_completion_models: List = [] |
|
open_ai_text_completion_models: List = [] |
|
cohere_models: List = [] |
|
anthropic_models: List = [] |
|
openrouter_models: List = [] |
|
vertex_chat_models: List = [] |
|
vertex_code_chat_models: List = [] |
|
vertex_text_models: List = [] |
|
vertex_code_text_models: List = [] |
|
ai21_models: List = [] |
|
nlp_cloud_models: List = [] |
|
aleph_alpha_models: List = [] |
|
bedrock_models: List = [] |
|
deepinfra_models: List = [] |
|
perplexity_models: List = [] |
|
for key, value in model_cost.items(): |
|
if value.get('litellm_provider') == 'openai': |
|
open_ai_chat_completion_models.append(key) |
|
elif value.get('litellm_provider') == 'text-completion-openai': |
|
open_ai_text_completion_models.append(key) |
|
elif value.get('litellm_provider') == 'cohere': |
|
cohere_models.append(key) |
|
elif value.get('litellm_provider') == 'anthropic': |
|
anthropic_models.append(key) |
|
elif value.get('litellm_provider') == 'openrouter': |
|
split_string = key.split('/', 1) |
|
openrouter_models.append(split_string[1]) |
|
elif value.get('litellm_provider') == 'vertex_ai-text-models': |
|
vertex_text_models.append(key) |
|
elif value.get('litellm_provider') == 'vertex_ai-code-text-models': |
|
vertex_code_text_models.append(key) |
|
elif value.get('litellm_provider') == 'vertex_ai-chat-models': |
|
vertex_chat_models.append(key) |
|
elif value.get('litellm_provider') == 'vertex_ai-code-chat-models': |
|
vertex_code_chat_models.append(key) |
|
elif value.get('litellm_provider') == 'ai21': |
|
ai21_models.append(key) |
|
elif value.get('litellm_provider') == 'nlp_cloud': |
|
nlp_cloud_models.append(key) |
|
elif value.get('litellm_provider') == 'aleph_alpha': |
|
aleph_alpha_models.append(key) |
|
elif value.get('litellm_provider') == 'bedrock': |
|
bedrock_models.append(key) |
|
elif value.get('litellm_provider') == 'deepinfra': |
|
deepinfra_models.append(key) |
|
elif value.get('litellm_provider') == 'perplexity': |
|
perplexity_models.append(key) |
|
|
|
|
|
openai_compatible_endpoints: List = [ |
|
"api.perplexity.ai", |
|
"api.endpoints.anyscale.com/v1", |
|
"api.deepinfra.com/v1/openai" |
|
] |
|
|
|
|
|
|
|
replicate_models: List = [ |
|
|
|
"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", |
|
"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", |
|
"meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db", |
|
|
|
"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", |
|
"joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe", |
|
|
|
"daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f" |
|
|
|
"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5", |
|
"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad", |
|
] |
|
|
|
huggingface_models: List = [ |
|
"meta-llama/Llama-2-7b-hf", |
|
"meta-llama/Llama-2-7b-chat-hf", |
|
"meta-llama/Llama-2-13b-hf", |
|
"meta-llama/Llama-2-13b-chat-hf", |
|
"meta-llama/Llama-2-70b-hf", |
|
"meta-llama/Llama-2-70b-chat-hf", |
|
"meta-llama/Llama-2-7b", |
|
"meta-llama/Llama-2-7b-chat", |
|
"meta-llama/Llama-2-13b", |
|
"meta-llama/Llama-2-13b-chat", |
|
"meta-llama/Llama-2-70b", |
|
"meta-llama/Llama-2-70b-chat", |
|
] |
|
|
|
together_ai_models: List = [ |
|
|
|
"togethercomputer/llama-2-70b-chat", |
|
|
|
|
|
"togethercomputer/llama-2-70b", |
|
"togethercomputer/LLaMA-2-7B-32K", |
|
"togethercomputer/Llama-2-7B-32K-Instruct", |
|
"togethercomputer/llama-2-7b", |
|
|
|
|
|
"togethercomputer/falcon-40b-instruct", |
|
"togethercomputer/falcon-7b-instruct", |
|
|
|
|
|
"togethercomputer/alpaca-7b", |
|
|
|
|
|
"HuggingFaceH4/starchat-alpha", |
|
|
|
|
|
"togethercomputer/CodeLlama-34b", |
|
"togethercomputer/CodeLlama-34b-Instruct", |
|
"togethercomputer/CodeLlama-34b-Python", |
|
"defog/sqlcoder", |
|
"NumbersStation/nsql-llama-2-7B", |
|
"WizardLM/WizardCoder-15B-V1.0", |
|
"WizardLM/WizardCoder-Python-34B-V1.0", |
|
|
|
|
|
"NousResearch/Nous-Hermes-Llama2-13b", |
|
"Austism/chronos-hermes-13b", |
|
"upstage/SOLAR-0-70b-16bit", |
|
"WizardLM/WizardLM-70B-V1.0", |
|
|
|
] |
|
|
|
|
|
baseten_models: List = ["qvv0xeq", "q841o8w", "31dxrj3"] |
|
|
|
petals_models = [ |
|
"petals-team/StableBeluga2", |
|
] |
|
|
|
ollama_models = [ |
|
"llama2" |
|
] |
|
|
|
maritalk_models = [ |
|
"maritalk" |
|
] |
|
|
|
model_list = ( |
|
open_ai_chat_completion_models |
|
+ open_ai_text_completion_models |
|
+ cohere_models |
|
+ anthropic_models |
|
+ replicate_models |
|
+ openrouter_models |
|
+ huggingface_models |
|
+ vertex_chat_models |
|
+ vertex_text_models |
|
+ ai21_models |
|
+ together_ai_models |
|
+ baseten_models |
|
+ aleph_alpha_models |
|
+ nlp_cloud_models |
|
+ ollama_models |
|
+ bedrock_models |
|
+ deepinfra_models |
|
+ perplexity_models |
|
+ maritalk_models |
|
) |
|
|
|
provider_list: List = [ |
|
"openai", |
|
"custom_openai", |
|
"cohere", |
|
"anthropic", |
|
"replicate", |
|
"huggingface", |
|
"together_ai", |
|
"openrouter", |
|
"vertex_ai", |
|
"palm", |
|
"ai21", |
|
"baseten", |
|
"azure", |
|
"sagemaker", |
|
"bedrock", |
|
"vllm", |
|
"nlp_cloud", |
|
"petals", |
|
"oobabooga", |
|
"ollama", |
|
"deepinfra", |
|
"perplexity", |
|
"anyscale", |
|
"maritalk", |
|
"custom", |
|
] |
|
|
|
models_by_provider: dict = { |
|
"openai": open_ai_chat_completion_models + open_ai_text_completion_models, |
|
"cohere": cohere_models, |
|
"anthropic": anthropic_models, |
|
"replicate": replicate_models, |
|
"huggingface": huggingface_models, |
|
"together_ai": together_ai_models, |
|
"baseten": baseten_models, |
|
"openrouter": openrouter_models, |
|
"vertex_ai": vertex_chat_models + vertex_text_models, |
|
"ai21": ai21_models, |
|
"bedrock": bedrock_models, |
|
"petals": petals_models, |
|
"ollama": ollama_models, |
|
"deepinfra": deepinfra_models, |
|
"perplexity": perplexity_models, |
|
"maritalk": maritalk_models |
|
} |
|
|
|
|
|
longer_context_model_fallback_dict: dict = { |
|
|
|
"gpt-3.5-turbo": "gpt-3.5-turbo-16k", |
|
"gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301", |
|
"gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613", |
|
"gpt-4": "gpt-4-32k", |
|
"gpt-4-0314": "gpt-4-32k-0314", |
|
"gpt-4-0613": "gpt-4-32k-0613", |
|
|
|
"claude-instant-1": "claude-2", |
|
"claude-instant-1.2": "claude-2", |
|
|
|
"chat-bison": "chat-bison-32k", |
|
"chat-bison@001": "chat-bison-32k", |
|
"codechat-bison": "codechat-bison-32k", |
|
"codechat-bison@001": "codechat-bison-32k", |
|
|
|
"openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k", |
|
"openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2", |
|
} |
|
|
|
|
|
open_ai_embedding_models: List = ["text-embedding-ada-002"] |
|
cohere_embedding_models: List = [ |
|
"embed-english-v3.0", |
|
"embed-english-light-v3.0", |
|
"embed-multilingual-v3.0", |
|
"embed-english-v2.0", |
|
"embed-english-light-v2.0", |
|
"embed-multilingual-v2.0", |
|
] |
|
bedrock_embedding_models: List = ["amazon.titan-embed-text-v1"] |
|
|
|
all_embedding_models = open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models |
|
|
|
from .timeout import timeout |
|
from .utils import ( |
|
client, |
|
exception_type, |
|
get_optional_params, |
|
modify_integration, |
|
token_counter, |
|
cost_per_token, |
|
completion_cost, |
|
get_litellm_params, |
|
Logging, |
|
acreate, |
|
get_model_list, |
|
get_max_tokens, |
|
get_model_info, |
|
register_prompt_template, |
|
validate_environment, |
|
check_valid_key, |
|
get_llm_provider, |
|
completion_with_config, |
|
register_model, |
|
encode, |
|
decode, |
|
_calculate_retry_after, |
|
_should_retry, |
|
get_secret |
|
) |
|
from .llms.huggingface_restapi import HuggingfaceConfig |
|
from .llms.anthropic import AnthropicConfig |
|
from .llms.replicate import ReplicateConfig |
|
from .llms.cohere import CohereConfig |
|
from .llms.ai21 import AI21Config |
|
from .llms.together_ai import TogetherAIConfig |
|
from .llms.palm import PalmConfig |
|
from .llms.nlp_cloud import NLPCloudConfig |
|
from .llms.aleph_alpha import AlephAlphaConfig |
|
from .llms.petals import PetalsConfig |
|
from .llms.vertex_ai import VertexAIConfig |
|
from .llms.sagemaker import SagemakerConfig |
|
from .llms.ollama import OllamaConfig |
|
from .llms.maritalk import MaritTalkConfig |
|
from .llms.bedrock import AmazonTitanConfig, AmazonAI21Config, AmazonAnthropicConfig, AmazonCohereConfig, AmazonLlamaConfig |
|
from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig |
|
from .llms.azure import AzureOpenAIConfig |
|
from .main import * |
|
from .integrations import * |
|
from .exceptions import ( |
|
AuthenticationError, |
|
InvalidRequestError, |
|
BadRequestError, |
|
RateLimitError, |
|
ServiceUnavailableError, |
|
OpenAIError, |
|
ContextWindowExceededError, |
|
BudgetExceededError, |
|
APIError, |
|
Timeout, |
|
APIConnectionError, |
|
APIResponseValidationError |
|
) |
|
from .budget_manager import BudgetManager |
|
from .proxy.proxy_cli import run_server |
|
from .router import Router |