yym68686 commited on
Commit
4f0c23d
·
1 Parent(s): 1e88a66

🪞 Frontend: Add request model statistics bar chart

Browse files

🐛 Bug: Fix the bug where the error reason is not displayed due to a timeout.

✨ Feature: Add feature: support custom timeout for different models

Files changed (4) hide show
  1. README.md +40 -32
  2. README_CN.md +8 -0
  3. main.py +222 -44
  4. utils.py +0 -17
README.md CHANGED
@@ -59,36 +59,36 @@ Detailed advanced configuration of `api.yaml`:
59
 
60
  ```yaml
61
  providers:
62
- - provider: provider_name # Service provider name, such as openai, anthropic, gemini, openrouter, deepbricks, any name can be given, required
63
- base_url: https://api.your.com/v1/chat/completions # API address of the backend service, required
64
  api: sk-YgS6GTi0b4bEabc4C # Provider's API Key, required
65
- model: # Optional, if model is not configured, all available models will be automatically obtained via base_url and api through the /v1/models endpoint.
66
- - gpt-4o # Model name that can be used, required
67
- - claude-3-5-sonnet-20240620: claude-3-5-sonnet # Renamed model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, you can use a simple name to replace the original complex name, optional
68
  - dall-e-3
69
 
70
  - provider: anthropic
71
  base_url: https://api.anthropic.com/v1/messages
72
- api: # Supports multiple API Keys, multiple keys automatically enable polling load balancing, at least one key, required
73
  - sk-ant-api03-bNnAOJyA-xQw_twAA
74
  - sk-ant-api02-bNnxxxx
75
  model:
76
- - claude-3-5-sonnet-20240620: claude-3-5-sonnet # Renamed model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, you can use a simple name to replace the original complex name, optional
77
- tools: true # Whether to support tools, such as generating code, generating documents, etc., default is true, optional
78
 
79
  - provider: gemini
80
  base_url: https://generativelanguage.googleapis.com/v1beta # base_url supports v1beta/v1, only for Gemini models, required
81
  api: AIzaSyAN2k6IRdgw
82
  model:
83
  - gemini-1.5-pro
84
- - gemini-1.5-flash-exp-0827: gemini-1.5-flash # After renaming, the original model name gemini-1.5-flash-exp-0827 cannot be used, if you want to use the original name, you can add the original name in the model, just add the line below to use the original name
85
- - gemini-1.5-flash-exp-0827 # Add this line, both gemini-1.5-flash-exp-0827 and gemini-1.5-flash can be requested
86
  tools: true
87
 
88
  - provider: vertex
89
- project_id: gen-lang-client-xxxxxxxxxxxxxx # Description: Your Google Cloud project ID. Format: String, usually composed of lowercase letters, numbers, and hyphens. How to obtain: You can find your project ID in the project selector in Google Cloud Console.
90
- private_key: "-----BEGIN PRIVATE KEY-----\nxxxxx\n-----END PRIVATE" # Description: Private key of Google Cloud Vertex AI service account. Format: A JSON formatted string containing the private key information of the service account. How to obtain: Create a service account in Google Cloud Console, generate a JSON format key file, and then set its content as the value of this environment variable.
91
- client_email: [email protected] # Description: Email address of Google Cloud Vertex AI service account. Format: Usually a string like "[email protected]". How to obtain: Generated when creating a service account, can also be obtained by viewing the service account details in the "IAM and Admin" section of Google Cloud Console.
92
  model:
93
  - gemini-1.5-pro
94
  - gemini-1.5-flash
@@ -97,14 +97,14 @@ providers:
97
  - claude-3-sonnet@20240229: claude-3-sonnet
98
  - claude-3-haiku@20240307: claude-3-haiku
99
  tools: true
100
- notes: https://xxxxx.com/ # You can put the provider's website, notes, official documentation, optional
101
 
102
  - provider: cloudflare
103
  api: f42b3xxxxxxxxxxq4aoGAh # Cloudflare API Key, required
104
  cf_account_id: 8ec0xxxxxxxxxxxxe721 # Cloudflare Account ID, required
105
  model:
106
- - '@cf/meta/llama-3.1-8b-instruct': llama-3.1-8b # Renamed model, @cf/meta/llama-3.1-8b-instruct is the provider's original model name, the model name must be enclosed in quotes, otherwise yaml syntax error, llama-3.1-8b is the renamed name, you can use a simple name to replace the original complex name, optional
107
- - '@cf/meta/llama-3.1-8b-instruct' # The model name must be enclosed in quotes, otherwise yaml syntax error
108
 
109
  - provider: other-provider
110
  base_url: https://api.xxx.com/v1/messages
@@ -113,39 +113,47 @@ providers:
113
  - causallm-35b-beta2ep-q6k: causallm-35b
114
  - anthropic/claude-3-5-sonnet
115
  tools: false
116
- engine: openrouter # Force to use a specific message format, currently supports gpt, claude, gemini, openrouter native format, optional
117
 
118
  api_keys:
119
  - api: sk-KjjI60Yf0JFWxfgRmXqFWyGtWUd9GZnmi3KlvowmRWpWpQRo # API Key, users need an API key to use this service, required
120
- model: # The model that this API Key can use, required. Channel-level polling load balancing is enabled by default, each request model is requested in the order configured in the model. It is unrelated to the original channel order in providers. Therefore, you can set different request orders for each API key.
121
- - gpt-4o # Model name that can be used, can use the gpt-4o model provided by all providers
122
- - claude-3-5-sonnet # Model name that can be used, can use the claude-3-5-sonnet model provided by all providers
123
- - gemini/* # Model name that can be used, can only use all models provided by the provider named gemini, where gemini is the provider name, * represents all models
124
  role: admin
125
 
126
  - api: sk-pkhf60Yf0JGyJxgRmXqFQyTgWUd9GZnmi3KlvowmRWpWqrhy
127
  model:
128
- - anthropic/claude-3-5-sonnet # Model name that can be used, can only use the claude-3-5-sonnet model provided by the provider named anthropic. The claude-3-5-sonnet model from other providers cannot be used. This way of writing will not match the model named anthropic/claude-3-5-sonnet provided by other-provider.
129
- - <anthropic/claude-3-5-sonnet> # By adding angle brackets on both sides of the model name, it will not look for the claude-3-5-sonnet model under the channel named anthropic, but will use the entire anthropic/claude-3-5-sonnet as the model name. This way of writing can match the model named anthropic/claude-3-5-sonnet provided by other-provider. But it will not match the claude-3-5-sonnet model under anthropic.
130
- - openai-test/text-moderation-latest # When message moderation is enabled, the text-moderation-latest model under the channel named openai-test can be used for moral review.
131
  preferences:
132
- SCHEDULING_ALGORITHM: fixed_priority # When SCHEDULING_ALGORITHM is fixed_priority, use fixed priority scheduling, always execute the first channel with a request. Enabled by default, the default value of SCHEDULING_ALGORITHM is fixed_priority. Optional values for SCHEDULING_ALGORITHM are: fixed_priority, round_robin, weighted_round_robin, lottery, random.
133
- # When SCHEDULING_ALGORITHM is random, use random polling load balancing, randomly request the channel with the requested model.
134
- # When SCHEDULING_ALGORITHM is round_robin, use polling load balancing, request the channel of the user's model in order.
135
  AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
136
- RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default is 60/min, optional
137
- ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, messages will be morally reviewed, if inappropriate messages are found, an error message will be returned.
138
 
139
  # Channel-level weighted load balancing configuration example
140
  - api: sk-KjjI60Yd0JFWtxxxxxxxxxxxxxxwmRWpWpQRo
141
  model:
142
- - gcp1/*: 5 # The number after the colon is the weight, weight only supports positive integers.
143
- - gcp2/*: 3 # The size of the number represents the weight, the larger the number, the greater the probability of the request.
144
  - gcp3/*: 2 # In this example, there are a total of 10 weights across all channels, and 5 out of 10 requests will request the gcp1/* model, 2 requests will request the gcp2/* model, and 3 requests will request the gcp3/* model.
145
 
146
  preferences:
147
- SCHEDULING_ALGORITHM: weighted_round_robin # Only when SCHEDULING_ALGORITHM is weighted_round_robin and the channels above have weights, requests will be made in the weighted order. Use weighted polling load balancing, request the channel of the model with the request in weight order. When SCHEDULING_ALGORITHM is lottery, use lottery polling load balancing, randomly request the channel of the model with the request according to weight. Channels without weights automatically fall back to round_robin polling load balancing.
148
  AUTO_RETRY: true
 
 
 
 
 
 
 
 
149
  ```
150
 
151
  Mount the configuration file and start the uni-api docker container:
 
59
 
60
  ```yaml
61
  providers:
62
+ - provider: provider_name # Service provider name, such as openai, anthropic, gemini, openrouter, deepbricks, any name is fine, required
63
+ base_url: https://api.your.com/v1/chat/completions # Backend service API address, required
64
  api: sk-YgS6GTi0b4bEabc4C # Provider's API Key, required
65
+ model: # Optional, if the model is not configured, all available models will be automatically obtained through the /v1/models endpoint via base_url and api.
66
+ - gpt-4o # Usable model name, required
67
+ - claude-3-5-sonnet-20240620: claude-3-5-sonnet # Rename model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, a simpler name can replace the original complex name, optional
68
  - dall-e-3
69
 
70
  - provider: anthropic
71
  base_url: https://api.anthropic.com/v1/messages
72
+ api: # Supports multiple API Keys, multiple keys automatically enable round-robin load balancing, at least one key, required
73
  - sk-ant-api03-bNnAOJyA-xQw_twAA
74
  - sk-ant-api02-bNnxxxx
75
  model:
76
+ - claude-3-5-sonnet-20240620: claude-3-5-sonnet # Rename model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, a simpler name can replace the original complex name, optional
77
+ tools: true # Whether to support tools, such as code generation, document generation, etc., default is true, optional
78
 
79
  - provider: gemini
80
  base_url: https://generativelanguage.googleapis.com/v1beta # base_url supports v1beta/v1, only for Gemini models, required
81
  api: AIzaSyAN2k6IRdgw
82
  model:
83
  - gemini-1.5-pro
84
+ - gemini-1.5-flash-exp-0827: gemini-1.5-flash # After renaming, the original model name gemini-1.5-flash-exp-0827 cannot be used. If you want to use the original name, you can add the original name in the model, just add the following line to use the original name.
85
+ - gemini-1.5-flash-exp-0827 # Adding this line allows both gemini-1.5-flash-exp-0827 and gemini-1.5-flash to be requested
86
  tools: true
87
 
88
  - provider: vertex
89
+ project_id: gen-lang-client-xxxxxxxxxxxxxx # Description: Your Google Cloud project ID. Format: A string usually consisting of lowercase letters, numbers, and hyphens. How to obtain: You can find your project ID in the project selector of the Google Cloud Console.
90
+ private_key: "-----BEGIN PRIVATE KEY-----\nxxxxx\n-----END PRIVATE" # Description: The private key of the Google Cloud Vertex AI service account. Format: A JSON formatted string containing the service account's private key information. How to obtain: Create a service account in the Google Cloud Console, generate a JSON formatted key file, and then set its content as the value of this environment variable.
91
+ client_email: [email protected] # Description: The email address of the Google Cloud Vertex AI service account. Format: Usually a string like "[email protected]". How to obtain: Generated when creating the service account, or can be found in the "IAM & Admin" section of the Google Cloud Console to view service account details.
92
  model:
93
  - gemini-1.5-pro
94
  - gemini-1.5-flash
 
97
  - claude-3-sonnet@20240229: claude-3-sonnet
98
  - claude-3-haiku@20240307: claude-3-haiku
99
  tools: true
100
+ notes: https://xxxxx.com/ # Can include the provider's website, remarks, official documentation, optional
101
 
102
  - provider: cloudflare
103
  api: f42b3xxxxxxxxxxq4aoGAh # Cloudflare API Key, required
104
  cf_account_id: 8ec0xxxxxxxxxxxxe721 # Cloudflare Account ID, required
105
  model:
106
+ - '@cf/meta/llama-3.1-8b-instruct': llama-3.1-8b # Rename model, @cf/meta/llama-3.1-8b-instruct is the provider's original model name, must be enclosed in quotes to avoid YAML syntax error, llama-3.1-8b is the renamed name, a simpler name can replace the original complex name, optional
107
+ - '@cf/meta/llama-3.1-8b-instruct' # Must be enclosed in quotes to avoid YAML syntax error
108
 
109
  - provider: other-provider
110
  base_url: https://api.xxx.com/v1/messages
 
113
  - causallm-35b-beta2ep-q6k: causallm-35b
114
  - anthropic/claude-3-5-sonnet
115
  tools: false
116
+ engine: openrouter # Force using a specific message format, currently supports gpt, claude, gemini, openrouter native format, optional
117
 
118
  api_keys:
119
  - api: sk-KjjI60Yf0JFWxfgRmXqFWyGtWUd9GZnmi3KlvowmRWpWpQRo # API Key, users need an API key to use this service, required
120
+ model: # Models that can be used with this API Key, required. By default, channel-level round-robin load balancing is enabled, and each request is made in the order configured in the model. It is not related to the original channel order in providers. Therefore, you can set different request orders for each API key.
121
+ - gpt-4o # Usable model name, can use all gpt-4o models provided by providers
122
+ - claude-3-5-sonnet # Usable model name, can use all claude-3-5-sonnet models provided by providers
123
+ - gemini/* # Usable model name, can only use all models provided by the provider named gemini, where gemini is the provider name, * represents all models
124
  role: admin
125
 
126
  - api: sk-pkhf60Yf0JGyJxgRmXqFQyTgWUd9GZnmi3KlvowmRWpWqrhy
127
  model:
128
+ - anthropic/claude-3-5-sonnet # Usable model name, can only use the claude-3-5-sonnet model provided by the provider named anthropic. Models named claude-3-5-sonnet from other providers cannot be used. This syntax will not match the model named anthropic/claude-3-5-sonnet provided by other-provider.
129
+ - <anthropic/claude-3-5-sonnet> # By adding angle brackets around the model name, it will not search for the claude-3-5-sonnet model under the channel named anthropic, but will treat the entire anthropic/claude-3-5-sonnet as the model name. This syntax can match the model named anthropic/claude-3-5-sonnet provided by other-provider. But it will not match the claude-3-5-sonnet model under anthropic.
130
+ - openai-test/text-moderation-latest # When message moderation is enabled, the text-moderation-latest model under the channel named openai-test can be used for moderation.
131
  preferences:
132
+ SCHEDULING_ALGORITHM: fixed_priority # When SCHEDULING_ALGORITHM is fixed_priority, fixed priority scheduling is used, always executing the channel of the first requested model. Enabled by default, the default value of SCHEDULING_ALGORITHM is fixed_priority. Optional values for SCHEDULING_ALGORITHM are: fixed_priority, round_robin, weighted_round_robin, lottery, random.
133
+ # When SCHEDULING_ALGORITHM is random, random round-robin load balancing is used, randomly requesting the channel of the requested model.
134
+ # When SCHEDULING_ALGORITHM is round_robin, round-robin load balancing is used, requesting the user's model channels in order.
135
  AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
136
+ RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default 60/min, optional
137
+ ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, messages will be moderated, and inappropriate messages will return an error.
138
 
139
  # Channel-level weighted load balancing configuration example
140
  - api: sk-KjjI60Yd0JFWtxxxxxxxxxxxxxxwmRWpWpQRo
141
  model:
142
+ - gcp1/*: 5 # The number after the colon is the weight, weights only support positive integers.
143
+ - gcp2/*: 3 # The size of the number represents the weight, the larger the number, the greater the probability of request.
144
  - gcp3/*: 2 # In this example, there are a total of 10 weights across all channels, and 5 out of 10 requests will request the gcp1/* model, 2 requests will request the gcp2/* model, and 3 requests will request the gcp3/* model.
145
 
146
  preferences:
147
+ SCHEDULING_ALGORITHM: weighted_round_robin # Only when SCHEDULING_ALGORITHM is weighted_round_robin and the above channels have weights, requests will be made in the weighted order. Using weighted round-robin load balancing, requests are made in the order of weight for the channel of the requested model. When SCHEDULING_ALGORITHM is lottery, lottery round-robin load balancing is used, randomly requesting the channel of the requested model according to weight. Channels without weights automatically fall back to round_robin round-robin load balancing.
148
  AUTO_RETRY: true
149
+
150
+ preferences: # Global configuration
151
+ model_timeout: # Model timeout, in seconds, default 100 seconds, optional
152
+ gpt-4o: 10 # Model gpt-4o timeout is 10 seconds, gpt-4o is the model name, when requesting models like gpt-4o-2024-08-06, the timeout is also 10 seconds
153
+ claude-3-5-sonnet: 10 # Model claude-3-5-sonnet timeout is 10 seconds, when requesting models like claude-3-5-sonnet-20240620, the timeout is also 10 seconds
154
+ default: 10 # If the model does not have a timeout set, the default timeout of 10 seconds is used, when requesting models not in model_timeout, the default timeout is 10 seconds, if default is not set, uni-api will use the default timeout set by the environment variable TIMEOUT, which is 100 seconds
155
+ o1-mini: 30 # Model o1-mini timeout is 30 seconds, when requesting models with names starting with o1-mini, the timeout is 30 seconds
156
+ o1-preview: 100 # Model o1-preview timeout is 100 seconds, when requesting models with names starting with o1-preview, the timeout is 100 seconds
157
  ```
158
 
159
  Mount the configuration file and start the uni-api docker container:
README_CN.md CHANGED
@@ -146,6 +146,14 @@ api_keys:
146
  preferences:
147
  SCHEDULING_ALGORITHM: weighted_round_robin # 仅当 SCHEDULING_ALGORITHM 为 weighted_round_robin 并且上面的渠道如果有权重,会按照加权后的顺序请求。使用加权轮训负载均衡,按照权重顺序请求拥有请求的模型的渠道。当 SCHEDULING_ALGORITHM 为 lottery 时,使用抽奖轮训负载均衡,按照权重随机请求拥有请求的模型的渠道。没设置权重的渠道自动回退到 round_robin 轮训负载均衡。
148
  AUTO_RETRY: true
 
 
 
 
 
 
 
 
149
  ```
150
 
151
  挂载配置文件并启动 uni-api docker 容器:
 
146
  preferences:
147
  SCHEDULING_ALGORITHM: weighted_round_robin # 仅当 SCHEDULING_ALGORITHM 为 weighted_round_robin 并且上面的渠道如果有权重,会按照加权后的顺序请求。使用加权轮训负载均衡,按照权重顺序请求拥有请求的模型的渠道。当 SCHEDULING_ALGORITHM 为 lottery 时,使用抽奖轮训负载均衡,按照权重随机请求拥有请求的模型的渠道。没设置权重的渠道自动回退到 round_robin 轮训负载均衡。
148
  AUTO_RETRY: true
149
+
150
+ preferences: # 全局配置
151
+ model_timeout: # 模型超时时间,单位为秒,默认 100 秒,选填
152
+ gpt-4o: 10 # 模型 gpt-4o 的超时时间为 10 秒,gpt-4o 是模型名称,当请求 gpt-4o-2024-08-06 等模型时,超时时间也是 10 秒
153
+ claude-3-5-sonnet: 10 # 模型 claude-3-5-sonnet 的超时时间为 10 秒,当请求 claude-3-5-sonnet-20240620 等模型时,超时时间也是 10 秒
154
+ default: 10 # 模型没有设置超时时间,使用默认的超时时间 10 秒,当请求的不在 model_timeout 里面的模型时,超时时间默认是 10 秒,不设置 default,uni-api 会使用 环境变量 TIMEOUT 设置的默认超时时间,默认超时时间是 100 秒
155
+ o1-mini: 30 # 模型 o1-mini 的超时时间为 30 秒,当请求名字是 o1-mini 开头的模型时,超时时间是 30 秒
156
+ o1-preview: 100 # 模型 o1-preview 的超时时间为 100 秒,当请求名字是 o1-preview 开头的模型时,超时时间是 100 秒
157
  ```
158
 
159
  挂载配置文件并启动 uni-api docker 容器:
main.py CHANGED
@@ -29,6 +29,7 @@ import os
29
  import string
30
  import json
31
 
 
32
  is_debug = bool(os.getenv("DEBUG", False))
33
  # is_debug = False
34
 
@@ -97,7 +98,9 @@ async def lifespan(app: FastAPI):
97
 
98
  yield
99
  # 关闭时的代码
100
- await app.state.client.aclose()
 
 
101
 
102
  app = FastAPI(lifespan=lifespan, debug=is_debug)
103
 
@@ -493,6 +496,49 @@ app.add_middleware(
493
 
494
  app.add_middleware(StatsMiddleware)
495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  @app.middleware("http")
497
  async def ensure_config(request: Request, call_next):
498
  if not hasattr(app.state, 'config'):
@@ -508,6 +554,32 @@ async def ensure_config(request: Request, call_next):
508
  else:
509
  raise Exception("No admin API key found")
510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  return await call_next(request)
512
 
513
  # 在 process_request 函数中更新成功和失败计数
@@ -578,32 +650,51 @@ async def process_request(request: Union[RequestModel, ImageGenerationRequest, A
578
  pass
579
  else:
580
  logger.info(json.dumps(payload, indent=4, ensure_ascii=False))
 
581
  current_info = request_info.get()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
  try:
583
- model = model_dict[request.model]
584
- if request.stream:
585
- generator = fetch_response_stream(app.state.client, url, headers, payload, engine, model)
586
- wrapped_generator, first_response_time = await error_handling_wrapper(generator)
587
- response = StarletteStreamingResponse(wrapped_generator, media_type="text/event-stream")
588
- else:
589
- generator = fetch_response(app.state.client, url, headers, payload, engine, model)
590
- wrapped_generator, first_response_time = await error_handling_wrapper(generator)
591
- first_element = await anext(wrapped_generator)
592
- first_element = first_element.lstrip("data: ")
593
- # print("first_element", first_element)
594
- first_element = json.loads(first_element)
595
- response = StarletteStreamingResponse(iter([json.dumps(first_element)]), media_type="application/json")
596
- # response = JSONResponse(first_element)
597
-
598
- # 更新成功计数和首次响应时间
599
- await update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=True)
600
- # await app.middleware_stack.app.update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=True)
601
- current_info["first_response_time"] = first_response_time
602
- current_info["success"] = True
603
- current_info["provider"] = provider['provider']
 
604
 
605
- return response
606
- except (Exception, HTTPException, asyncio.CancelledError, httpx.ReadError, httpx.RemoteProtocolError) as e:
607
  await update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=False)
608
  # await app.middleware_stack.app.update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=False)
609
 
@@ -823,25 +914,36 @@ class ModelRequestHandler:
823
  try:
824
  response = await process_request(request, provider, endpoint, token)
825
  return response
826
- except HTTPException as e:
827
- logger.error(f"Error with provider {provider['provider']}: {str(e)}")
828
- status_code = e.status_code
829
- error_message = e.detail
830
-
831
- if auto_retry:
832
- continue
 
 
 
 
 
 
 
 
 
 
 
833
  else:
834
- raise HTTPException(status_code=500, detail=f"Error: Current provider response failed: {error_message}")
835
- except (Exception, asyncio.CancelledError, httpx.ReadError, httpx.RemoteProtocolError) as e:
836
- logger.error(f"Error with provider {provider['provider']}: {str(e)}")
 
837
  if is_debug:
838
  import traceback
839
  traceback.print_exc()
840
- error_message = str(e)
841
  if auto_retry:
842
  continue
843
  else:
844
- raise HTTPException(status_code=500, detail=f"Error: Current provider response failed: {error_message}")
845
 
846
  current_info = request_info.get()
847
  current_info["first_response_time"] = -1
@@ -1155,7 +1257,7 @@ from xue.components.menubar import (
1155
  Menubar, MenubarMenu, MenubarTrigger, MenubarContent,
1156
  MenubarItem, MenubarSeparator
1157
  )
1158
- from xue.components import input, dropdown, sheet, form, button, checkbox, sidebar
1159
  from xue.components.model_config_row import model_config_row
1160
  # import sys
1161
  # import os
@@ -1277,13 +1379,13 @@ sidebar_items = [
1277
  # "value": "settings",
1278
  # "hx": {"get": "/settings", "target": "#main-content"}
1279
  # },
1280
- # {
1281
- # "icon": "database",
1282
- # # "label": "数据",
1283
- # "label": "Data",
1284
- # "value": "data",
1285
- # "hx": {"get": "/data", "target": "#main-content"}
1286
- # },
1287
  # {
1288
  # "icon": "scroll-text",
1289
  # # "label": "日志",
@@ -1342,6 +1444,82 @@ async def toggle_sidebar(is_collapsed: bool = False):
1342
  active_item="dashboard"
1343
  ).render()
1344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1345
  @frontend_router.get("/dropdown-menu/{menu_id}/{row_id}", response_class=HTMLResponse, dependencies=[Depends(frontend_rate_limit_dependency)])
1346
  async def get_columns_menu(menu_id: str, row_id: str):
1347
  columns = [
 
29
  import string
30
  import json
31
 
32
+ DEFAULT_TIMEOUT = float(os.getenv("TIMEOUT", 100))
33
  is_debug = bool(os.getenv("DEBUG", False))
34
  # is_debug = False
35
 
 
98
 
99
  yield
100
  # 关闭时的代码
101
+ # await app.state.client.aclose()
102
+ if hasattr(app.state, 'client_manager'):
103
+ await app.state.client_manager.close()
104
 
105
  app = FastAPI(lifespan=lifespan, debug=is_debug)
106
 
 
496
 
497
  app.add_middleware(StatsMiddleware)
498
 
499
+ class ClientManager:
500
+ def __init__(self, pool_size=100):
501
+ self.pool_size = pool_size
502
+ self.clients = {} # {timeout_value: AsyncClient}
503
+ self.locks = {} # {timeout_value: Lock}
504
+
505
+ async def init(self, default_config):
506
+ self.default_config = default_config
507
+
508
+ @asynccontextmanager
509
+ async def get_client(self, timeout_value):
510
+ # 对同一超时值的客户端加锁
511
+ if timeout_value not in self.locks:
512
+ self.locks[timeout_value] = asyncio.Lock()
513
+
514
+ async with self.locks[timeout_value]:
515
+ # 获取或创建指定超时值的客户端
516
+ if timeout_value not in self.clients:
517
+ timeout = httpx.Timeout(
518
+ connect=15.0,
519
+ read=timeout_value,
520
+ write=30.0,
521
+ pool=self.pool_size
522
+ )
523
+ self.clients[timeout_value] = httpx.AsyncClient(
524
+ timeout=timeout,
525
+ limits=httpx.Limits(max_connections=self.pool_size),
526
+ **self.default_config
527
+ )
528
+
529
+ try:
530
+ yield self.clients[timeout_value]
531
+ except Exception as e:
532
+ # 如果客户端出现问题,关闭并重新创建
533
+ await self.clients[timeout_value].aclose()
534
+ del self.clients[timeout_value]
535
+ raise e
536
+
537
+ async def close(self):
538
+ for client in self.clients.values():
539
+ await client.aclose()
540
+ self.clients.clear()
541
+
542
  @app.middleware("http")
543
  async def ensure_config(request: Request, call_next):
544
  if not hasattr(app.state, 'config'):
 
554
  else:
555
  raise Exception("No admin API key found")
556
 
557
+ if app and not hasattr(app.state, 'client_manager'):
558
+
559
+ default_config = {
560
+ "headers": {
561
+ "User-Agent": "curl/7.68.0",
562
+ "Accept": "*/*",
563
+ },
564
+ "http2": True,
565
+ "verify": True,
566
+ "follow_redirects": True
567
+ }
568
+
569
+ # 初始化客户端管理器
570
+ app.state.client_manager = ClientManager(pool_size=200)
571
+ await app.state.client_manager.init(default_config)
572
+
573
+ # 存储超时配置
574
+ app.state.timeouts = {}
575
+ if app.state.config and 'preferences' in app.state.config:
576
+ for model_name, timeout_value in app.state.config['preferences'].get('model_timeout', {}).items():
577
+ app.state.timeouts[model_name] = timeout_value
578
+ if "default" not in app.state.config['preferences'].get('model_timeout', {}):
579
+ app.state.timeouts["default"] = DEFAULT_TIMEOUT
580
+
581
+ print("app.state.timeouts", app.state.timeouts)
582
+
583
  return await call_next(request)
584
 
585
  # 在 process_request 函数中更新成功和失败计数
 
650
  pass
651
  else:
652
  logger.info(json.dumps(payload, indent=4, ensure_ascii=False))
653
+
654
  current_info = request_info.get()
655
+ model = model_dict[request.model]
656
+
657
+ timeout_value = None
658
+ # 先尝试精确匹配
659
+
660
+ if model in app.state.timeouts:
661
+ timeout_value = app.state.timeouts[model]
662
+ else:
663
+ # 如果没有精确匹配,尝试模糊匹配
664
+ for timeout_model in app.state.timeouts:
665
+ if timeout_model in model:
666
+ timeout_value = app.state.timeouts[timeout_model]
667
+ break
668
+
669
+ # 如果都没匹配到,使用默认值
670
+ if timeout_value is None:
671
+ timeout_value = app.state.timeouts.get("default", DEFAULT_TIMEOUT)
672
+
673
  try:
674
+ async with app.state.client_manager.get_client(timeout_value) as client:
675
+ if request.stream:
676
+ generator = fetch_response_stream(client, url, headers, payload, engine, model)
677
+ wrapped_generator, first_response_time = await error_handling_wrapper(generator)
678
+ response = StarletteStreamingResponse(wrapped_generator, media_type="text/event-stream")
679
+ else:
680
+ generator = fetch_response(client, url, headers, payload, engine, model)
681
+ wrapped_generator, first_response_time = await error_handling_wrapper(generator)
682
+ first_element = await anext(wrapped_generator)
683
+ first_element = first_element.lstrip("data: ")
684
+ # print("first_element", first_element)
685
+ first_element = json.loads(first_element)
686
+ response = StarletteStreamingResponse(iter([json.dumps(first_element)]), media_type="application/json")
687
+ # response = JSONResponse(first_element)
688
+
689
+ # 更新成功计数和首次响应时间
690
+ await update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=True)
691
+ # await app.middleware_stack.app.update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=True)
692
+ current_info["first_response_time"] = first_response_time
693
+ current_info["success"] = True
694
+ current_info["provider"] = provider['provider']
695
+ return response
696
 
697
+ except (Exception, HTTPException, asyncio.CancelledError, httpx.ReadError, httpx.RemoteProtocolError, httpx.ReadTimeout) as e:
 
698
  await update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=False)
699
  # await app.middleware_stack.app.update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=False)
700
 
 
914
  try:
915
  response = await process_request(request, provider, endpoint, token)
916
  return response
917
+ except (Exception, HTTPException, asyncio.CancelledError, httpx.ReadError, httpx.RemoteProtocolError, httpx.ReadTimeout) as e:
918
+
919
+ # 根据异常类型设置状态码和错误消息
920
+ if isinstance(e, httpx.ReadTimeout):
921
+ status_code = 504 # Gateway Timeout
922
+ error_message = "Request timed out"
923
+ elif isinstance(e, httpx.ReadError):
924
+ status_code = 502 # Bad Gateway
925
+ error_message = "Network read error"
926
+ elif isinstance(e, httpx.RemoteProtocolError):
927
+ status_code = 502 # Bad Gateway
928
+ error_message = "Remote protocol error"
929
+ elif isinstance(e, asyncio.CancelledError):
930
+ status_code = 499 # Client Closed Request
931
+ error_message = "Request was cancelled"
932
+ elif isinstance(e, HTTPException):
933
+ status_code = e.status_code
934
+ error_message = str(e.detail)
935
  else:
936
+ status_code = 500 # Internal Server Error
937
+ error_message = str(e) or f"Unknown error: {e.__class__.__name__}"
938
+
939
+ logger.error(f"Error {status_code} with provider {provider['provider']}: {error_message}")
940
  if is_debug:
941
  import traceback
942
  traceback.print_exc()
 
943
  if auto_retry:
944
  continue
945
  else:
946
+ raise HTTPException(status_code=status_code, detail=f"Error: Current provider response failed: {error_message}")
947
 
948
  current_info = request_info.get()
949
  current_info["first_response_time"] = -1
 
1257
  Menubar, MenubarMenu, MenubarTrigger, MenubarContent,
1258
  MenubarItem, MenubarSeparator
1259
  )
1260
+ from xue.components import input, dropdown, sheet, form, button, checkbox, sidebar, chart
1261
  from xue.components.model_config_row import model_config_row
1262
  # import sys
1263
  # import os
 
1379
  # "value": "settings",
1380
  # "hx": {"get": "/settings", "target": "#main-content"}
1381
  # },
1382
+ {
1383
+ "icon": "database",
1384
+ # "label": "数据",
1385
+ "label": "Data",
1386
+ "value": "data",
1387
+ "hx": {"get": "/data", "target": "#main-content"}
1388
+ },
1389
  # {
1390
  # "icon": "scroll-text",
1391
  # # "label": "日志",
 
1444
  active_item="dashboard"
1445
  ).render()
1446
 
1447
+ @frontend_router.get("/data", response_class=HTMLResponse, dependencies=[Depends(frontend_rate_limit_dependency)])
1448
+ async def data_page(x_api_key: str = Depends(get_api_key)):
1449
+ if not x_api_key:
1450
+ return RedirectResponse(url="/login", status_code=303)
1451
+
1452
+ if DISABLE_DATABASE:
1453
+ return HTMLResponse("数据库已禁用")
1454
+
1455
+ async with async_session() as session:
1456
+ # 计算过去24小时的开始时间
1457
+ start_time = datetime.now(timezone.utc) - timedelta(hours=24)
1458
+
1459
+ # 获取每个模型的请求数据
1460
+ model_stats = await session.execute(
1461
+ select(
1462
+ RequestStat.model,
1463
+ RequestStat.provider,
1464
+ func.count().label('count')
1465
+ )
1466
+ .where(RequestStat.timestamp >= start_time)
1467
+ .group_by(RequestStat.model, RequestStat.provider)
1468
+ .order_by(desc('count'))
1469
+ )
1470
+ model_stats = model_stats.fetchall()
1471
+
1472
+ # 处理数据以适配图表格式
1473
+ chart_data = []
1474
+ providers = list(set(stat.provider for stat in model_stats))
1475
+ models = list(set(stat.model for stat in model_stats))
1476
+
1477
+ for model in models:
1478
+ data_point = {"model": model}
1479
+ for provider in providers:
1480
+ count = next(
1481
+ (stat.count for stat in model_stats
1482
+ if stat.model == model and stat.provider == provider),
1483
+ 0
1484
+ )
1485
+ data_point[provider] = count
1486
+ chart_data.append(data_point)
1487
+
1488
+ # 定义图表系列
1489
+ series = [
1490
+ {"name": provider, "data_key": provider}
1491
+ for provider in providers
1492
+ ]
1493
+
1494
+ # 图表配置
1495
+ chart_config = {
1496
+ "stacked": True, # 堆叠柱状图
1497
+ "horizontal": False,
1498
+ "colors": [f"hsl({i * 360 / len(providers)}, 70%, 50%)" for i in range(len(providers))], # 生成不同的颜色
1499
+ "grid": True,
1500
+ "legend": True,
1501
+ "tooltip": True
1502
+ }
1503
+
1504
+ result = HTML(
1505
+ Head(title="数据统计"),
1506
+ Body(
1507
+ Div(
1508
+ Div(
1509
+ "模型使用统计 (24小时)",
1510
+ class_="text-2xl font-bold mb-4"
1511
+ ),
1512
+ Div(
1513
+ chart.bar_chart("model-usage-chart", chart_data, "model", series, chart_config),
1514
+ class_="h-[600px]" # 设置图表高度
1515
+ ),
1516
+ class_="container mx-auto p-4"
1517
+ )
1518
+ )
1519
+ ).render()
1520
+
1521
+ return result
1522
+
1523
  @frontend_router.get("/dropdown-menu/{menu_id}/{row_id}", response_class=HTMLResponse, dependencies=[Depends(frontend_rate_limit_dependency)])
1524
  async def get_columns_menu(menu_id: str, row_id: str):
1525
  columns = [
utils.py CHANGED
@@ -130,23 +130,6 @@ def update_config(config_data, use_config_url=False):
130
 
131
  # 读取YAML配置文件
132
  async def load_config(app=None):
133
-
134
- if app and not hasattr(app.state, 'client'):
135
- import os
136
- TIMEOUT = float(os.getenv("TIMEOUT", 100))
137
- timeout = httpx.Timeout(connect=15.0, read=TIMEOUT, write=30.0, pool=30.0)
138
- default_headers = {
139
- "User-Agent": "curl/7.68.0", # 模拟 curl 的 User-Agent
140
- "Accept": "*/*", # curl 的默认 Accept 头
141
- }
142
- app.state.client = httpx.AsyncClient(
143
- timeout=timeout,
144
- headers=default_headers,
145
- http2=True, # 禁用 HTTP/2
146
- verify=True, # 保持 SSL 验证(如需禁用,设为 False,但不建议)
147
- follow_redirects=True, # 自动跟随重定向
148
- )
149
-
150
  try:
151
  with open(API_YAML_PATH, 'r', encoding='utf-8') as file:
152
  conf = yaml.load(file)
 
130
 
131
  # 读取YAML配置文件
132
  async def load_config(app=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  try:
134
  with open(API_YAML_PATH, 'r', encoding='utf-8') as file:
135
  conf = yaml.load(file)