🪞 Frontend: Add request model statistics bar chart
Browse files🐛 Bug: Fix the bug where the error reason is not displayed due to a timeout.
✨ Feature: Add feature: support custom timeout for different models
README.md
CHANGED
@@ -59,36 +59,36 @@ Detailed advanced configuration of `api.yaml`:
|
|
59 |
|
60 |
```yaml
|
61 |
providers:
|
62 |
-
- provider: provider_name # Service provider name, such as openai, anthropic, gemini, openrouter, deepbricks, any name
|
63 |
-
base_url: https://api.your.com/v1/chat/completions # API address
|
64 |
api: sk-YgS6GTi0b4bEabc4C # Provider's API Key, required
|
65 |
-
model: # Optional, if model is not configured, all available models will be automatically obtained
|
66 |
-
- gpt-4o #
|
67 |
-
- claude-3-5-sonnet-20240620: claude-3-5-sonnet #
|
68 |
- dall-e-3
|
69 |
|
70 |
- provider: anthropic
|
71 |
base_url: https://api.anthropic.com/v1/messages
|
72 |
-
api: # Supports multiple API Keys, multiple keys automatically enable
|
73 |
- sk-ant-api03-bNnAOJyA-xQw_twAA
|
74 |
- sk-ant-api02-bNnxxxx
|
75 |
model:
|
76 |
-
- claude-3-5-sonnet-20240620: claude-3-5-sonnet #
|
77 |
-
tools: true # Whether to support tools, such as
|
78 |
|
79 |
- provider: gemini
|
80 |
base_url: https://generativelanguage.googleapis.com/v1beta # base_url supports v1beta/v1, only for Gemini models, required
|
81 |
api: AIzaSyAN2k6IRdgw
|
82 |
model:
|
83 |
- gemini-1.5-pro
|
84 |
-
- gemini-1.5-flash-exp-0827: gemini-1.5-flash # After renaming, the original model name gemini-1.5-flash-exp-0827 cannot be used
|
85 |
-
- gemini-1.5-flash-exp-0827 #
|
86 |
tools: true
|
87 |
|
88 |
- provider: vertex
|
89 |
-
project_id: gen-lang-client-xxxxxxxxxxxxxx # Description: Your Google Cloud project ID. Format:
|
90 |
-
private_key: "-----BEGIN PRIVATE KEY-----\nxxxxx\n-----END PRIVATE" # Description:
|
91 |
-
client_email: [email protected] # Description:
|
92 |
model:
|
93 |
- gemini-1.5-pro
|
94 |
- gemini-1.5-flash
|
@@ -97,14 +97,14 @@ providers:
|
|
97 |
- claude-3-sonnet@20240229: claude-3-sonnet
|
98 |
- claude-3-haiku@20240307: claude-3-haiku
|
99 |
tools: true
|
100 |
-
notes: https://xxxxx.com/ #
|
101 |
|
102 |
- provider: cloudflare
|
103 |
api: f42b3xxxxxxxxxxq4aoGAh # Cloudflare API Key, required
|
104 |
cf_account_id: 8ec0xxxxxxxxxxxxe721 # Cloudflare Account ID, required
|
105 |
model:
|
106 |
-
- '@cf/meta/llama-3.1-8b-instruct': llama-3.1-8b #
|
107 |
-
- '@cf/meta/llama-3.1-8b-instruct' #
|
108 |
|
109 |
- provider: other-provider
|
110 |
base_url: https://api.xxx.com/v1/messages
|
@@ -113,39 +113,47 @@ providers:
|
|
113 |
- causallm-35b-beta2ep-q6k: causallm-35b
|
114 |
- anthropic/claude-3-5-sonnet
|
115 |
tools: false
|
116 |
-
engine: openrouter # Force
|
117 |
|
118 |
api_keys:
|
119 |
- api: sk-KjjI60Yf0JFWxfgRmXqFWyGtWUd9GZnmi3KlvowmRWpWpQRo # API Key, users need an API key to use this service, required
|
120 |
-
model: #
|
121 |
-
- gpt-4o #
|
122 |
-
- claude-3-5-sonnet #
|
123 |
-
- gemini/* #
|
124 |
role: admin
|
125 |
|
126 |
- api: sk-pkhf60Yf0JGyJxgRmXqFQyTgWUd9GZnmi3KlvowmRWpWqrhy
|
127 |
model:
|
128 |
-
- anthropic/claude-3-5-sonnet #
|
129 |
-
- <anthropic/claude-3-5-sonnet> # By adding angle brackets
|
130 |
-
- openai-test/text-moderation-latest # When message moderation is enabled, the text-moderation-latest model under the channel named openai-test can be used for
|
131 |
preferences:
|
132 |
-
SCHEDULING_ALGORITHM: fixed_priority # When SCHEDULING_ALGORITHM is fixed_priority,
|
133 |
-
# When SCHEDULING_ALGORITHM is random,
|
134 |
-
# When SCHEDULING_ALGORITHM is round_robin,
|
135 |
AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
|
136 |
-
RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default
|
137 |
-
ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, messages will be
|
138 |
|
139 |
# Channel-level weighted load balancing configuration example
|
140 |
- api: sk-KjjI60Yd0JFWtxxxxxxxxxxxxxxwmRWpWpQRo
|
141 |
model:
|
142 |
-
- gcp1/*: 5 # The number after the colon is the weight,
|
143 |
-
- gcp2/*: 3 # The size of the number represents the weight, the larger the number, the greater the probability of
|
144 |
- gcp3/*: 2 # In this example, there are a total of 10 weights across all channels, and 5 out of 10 requests will request the gcp1/* model, 2 requests will request the gcp2/* model, and 3 requests will request the gcp3/* model.
|
145 |
|
146 |
preferences:
|
147 |
-
SCHEDULING_ALGORITHM: weighted_round_robin # Only when SCHEDULING_ALGORITHM is weighted_round_robin and the channels
|
148 |
AUTO_RETRY: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
```
|
150 |
|
151 |
Mount the configuration file and start the uni-api docker container:
|
|
|
59 |
|
60 |
```yaml
|
61 |
providers:
|
62 |
+
- provider: provider_name # Service provider name, such as openai, anthropic, gemini, openrouter, deepbricks, any name is fine, required
|
63 |
+
base_url: https://api.your.com/v1/chat/completions # Backend service API address, required
|
64 |
api: sk-YgS6GTi0b4bEabc4C # Provider's API Key, required
|
65 |
+
model: # Optional, if the model is not configured, all available models will be automatically obtained through the /v1/models endpoint via base_url and api.
|
66 |
+
- gpt-4o # Usable model name, required
|
67 |
+
- claude-3-5-sonnet-20240620: claude-3-5-sonnet # Rename model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, a simpler name can replace the original complex name, optional
|
68 |
- dall-e-3
|
69 |
|
70 |
- provider: anthropic
|
71 |
base_url: https://api.anthropic.com/v1/messages
|
72 |
+
api: # Supports multiple API Keys, multiple keys automatically enable round-robin load balancing, at least one key, required
|
73 |
- sk-ant-api03-bNnAOJyA-xQw_twAA
|
74 |
- sk-ant-api02-bNnxxxx
|
75 |
model:
|
76 |
+
- claude-3-5-sonnet-20240620: claude-3-5-sonnet # Rename model, claude-3-5-sonnet-20240620 is the provider's model name, claude-3-5-sonnet is the renamed name, a simpler name can replace the original complex name, optional
|
77 |
+
tools: true # Whether to support tools, such as code generation, document generation, etc., default is true, optional
|
78 |
|
79 |
- provider: gemini
|
80 |
base_url: https://generativelanguage.googleapis.com/v1beta # base_url supports v1beta/v1, only for Gemini models, required
|
81 |
api: AIzaSyAN2k6IRdgw
|
82 |
model:
|
83 |
- gemini-1.5-pro
|
84 |
+
- gemini-1.5-flash-exp-0827: gemini-1.5-flash # After renaming, the original model name gemini-1.5-flash-exp-0827 cannot be used. If you want to use the original name, you can add the original name in the model, just add the following line to use the original name.
|
85 |
+
- gemini-1.5-flash-exp-0827 # Adding this line allows both gemini-1.5-flash-exp-0827 and gemini-1.5-flash to be requested
|
86 |
tools: true
|
87 |
|
88 |
- provider: vertex
|
89 |
+
project_id: gen-lang-client-xxxxxxxxxxxxxx # Description: Your Google Cloud project ID. Format: A string usually consisting of lowercase letters, numbers, and hyphens. How to obtain: You can find your project ID in the project selector of the Google Cloud Console.
|
90 |
+
private_key: "-----BEGIN PRIVATE KEY-----\nxxxxx\n-----END PRIVATE" # Description: The private key of the Google Cloud Vertex AI service account. Format: A JSON formatted string containing the service account's private key information. How to obtain: Create a service account in the Google Cloud Console, generate a JSON formatted key file, and then set its content as the value of this environment variable.
|
91 |
+
client_email: [email protected] # Description: The email address of the Google Cloud Vertex AI service account. Format: Usually a string like "[email protected]". How to obtain: Generated when creating the service account, or can be found in the "IAM & Admin" section of the Google Cloud Console to view service account details.
|
92 |
model:
|
93 |
- gemini-1.5-pro
|
94 |
- gemini-1.5-flash
|
|
|
97 |
- claude-3-sonnet@20240229: claude-3-sonnet
|
98 |
- claude-3-haiku@20240307: claude-3-haiku
|
99 |
tools: true
|
100 |
+
notes: https://xxxxx.com/ # Can include the provider's website, remarks, official documentation, optional
|
101 |
|
102 |
- provider: cloudflare
|
103 |
api: f42b3xxxxxxxxxxq4aoGAh # Cloudflare API Key, required
|
104 |
cf_account_id: 8ec0xxxxxxxxxxxxe721 # Cloudflare Account ID, required
|
105 |
model:
|
106 |
+
- '@cf/meta/llama-3.1-8b-instruct': llama-3.1-8b # Rename model, @cf/meta/llama-3.1-8b-instruct is the provider's original model name, must be enclosed in quotes to avoid YAML syntax error, llama-3.1-8b is the renamed name, a simpler name can replace the original complex name, optional
|
107 |
+
- '@cf/meta/llama-3.1-8b-instruct' # Must be enclosed in quotes to avoid YAML syntax error
|
108 |
|
109 |
- provider: other-provider
|
110 |
base_url: https://api.xxx.com/v1/messages
|
|
|
113 |
- causallm-35b-beta2ep-q6k: causallm-35b
|
114 |
- anthropic/claude-3-5-sonnet
|
115 |
tools: false
|
116 |
+
engine: openrouter # Force using a specific message format, currently supports gpt, claude, gemini, openrouter native format, optional
|
117 |
|
118 |
api_keys:
|
119 |
- api: sk-KjjI60Yf0JFWxfgRmXqFWyGtWUd9GZnmi3KlvowmRWpWpQRo # API Key, users need an API key to use this service, required
|
120 |
+
model: # Models that can be used with this API Key, required. By default, channel-level round-robin load balancing is enabled, and each request is made in the order configured in the model. It is not related to the original channel order in providers. Therefore, you can set different request orders for each API key.
|
121 |
+
- gpt-4o # Usable model name, can use all gpt-4o models provided by providers
|
122 |
+
- claude-3-5-sonnet # Usable model name, can use all claude-3-5-sonnet models provided by providers
|
123 |
+
- gemini/* # Usable model name, can only use all models provided by the provider named gemini, where gemini is the provider name, * represents all models
|
124 |
role: admin
|
125 |
|
126 |
- api: sk-pkhf60Yf0JGyJxgRmXqFQyTgWUd9GZnmi3KlvowmRWpWqrhy
|
127 |
model:
|
128 |
+
- anthropic/claude-3-5-sonnet # Usable model name, can only use the claude-3-5-sonnet model provided by the provider named anthropic. Models named claude-3-5-sonnet from other providers cannot be used. This syntax will not match the model named anthropic/claude-3-5-sonnet provided by other-provider.
|
129 |
+
- <anthropic/claude-3-5-sonnet> # By adding angle brackets around the model name, it will not search for the claude-3-5-sonnet model under the channel named anthropic, but will treat the entire anthropic/claude-3-5-sonnet as the model name. This syntax can match the model named anthropic/claude-3-5-sonnet provided by other-provider. But it will not match the claude-3-5-sonnet model under anthropic.
|
130 |
+
- openai-test/text-moderation-latest # When message moderation is enabled, the text-moderation-latest model under the channel named openai-test can be used for moderation.
|
131 |
preferences:
|
132 |
+
SCHEDULING_ALGORITHM: fixed_priority # When SCHEDULING_ALGORITHM is fixed_priority, fixed priority scheduling is used, always executing the channel of the first requested model. Enabled by default, the default value of SCHEDULING_ALGORITHM is fixed_priority. Optional values for SCHEDULING_ALGORITHM are: fixed_priority, round_robin, weighted_round_robin, lottery, random.
|
133 |
+
# When SCHEDULING_ALGORITHM is random, random round-robin load balancing is used, randomly requesting the channel of the requested model.
|
134 |
+
# When SCHEDULING_ALGORITHM is round_robin, round-robin load balancing is used, requesting the user's model channels in order.
|
135 |
AUTO_RETRY: true # Whether to automatically retry, automatically retry the next provider, true for automatic retry, false for no automatic retry, default is true
|
136 |
+
RATE_LIMIT: 2/min # Supports rate limiting, maximum number of requests per minute, can be set to an integer, such as 2/min, 2 times per minute, 5/hour, 5 times per hour, 10/day, 10 times per day, 10/month, 10 times per month, 10/year, 10 times per year. Default 60/min, optional
|
137 |
+
ENABLE_MODERATION: true # Whether to enable message moderation, true for enable, false for disable, default is false, when enabled, messages will be moderated, and inappropriate messages will return an error.
|
138 |
|
139 |
# Channel-level weighted load balancing configuration example
|
140 |
- api: sk-KjjI60Yd0JFWtxxxxxxxxxxxxxxwmRWpWpQRo
|
141 |
model:
|
142 |
+
- gcp1/*: 5 # The number after the colon is the weight, weights only support positive integers.
|
143 |
+
- gcp2/*: 3 # The size of the number represents the weight, the larger the number, the greater the probability of request.
|
144 |
- gcp3/*: 2 # In this example, there are a total of 10 weights across all channels, and 5 out of 10 requests will request the gcp1/* model, 2 requests will request the gcp2/* model, and 3 requests will request the gcp3/* model.
|
145 |
|
146 |
preferences:
|
147 |
+
SCHEDULING_ALGORITHM: weighted_round_robin # Only when SCHEDULING_ALGORITHM is weighted_round_robin and the above channels have weights, requests will be made in the weighted order. Using weighted round-robin load balancing, requests are made in the order of weight for the channel of the requested model. When SCHEDULING_ALGORITHM is lottery, lottery round-robin load balancing is used, randomly requesting the channel of the requested model according to weight. Channels without weights automatically fall back to round_robin round-robin load balancing.
|
148 |
AUTO_RETRY: true
|
149 |
+
|
150 |
+
preferences: # Global configuration
|
151 |
+
model_timeout: # Model timeout, in seconds, default 100 seconds, optional
|
152 |
+
gpt-4o: 10 # Model gpt-4o timeout is 10 seconds, gpt-4o is the model name, when requesting models like gpt-4o-2024-08-06, the timeout is also 10 seconds
|
153 |
+
claude-3-5-sonnet: 10 # Model claude-3-5-sonnet timeout is 10 seconds, when requesting models like claude-3-5-sonnet-20240620, the timeout is also 10 seconds
|
154 |
+
default: 10 # If the model does not have a timeout set, the default timeout of 10 seconds is used, when requesting models not in model_timeout, the default timeout is 10 seconds, if default is not set, uni-api will use the default timeout set by the environment variable TIMEOUT, which is 100 seconds
|
155 |
+
o1-mini: 30 # Model o1-mini timeout is 30 seconds, when requesting models with names starting with o1-mini, the timeout is 30 seconds
|
156 |
+
o1-preview: 100 # Model o1-preview timeout is 100 seconds, when requesting models with names starting with o1-preview, the timeout is 100 seconds
|
157 |
```
|
158 |
|
159 |
Mount the configuration file and start the uni-api docker container:
|
README_CN.md
CHANGED
@@ -146,6 +146,14 @@ api_keys:
|
|
146 |
preferences:
|
147 |
SCHEDULING_ALGORITHM: weighted_round_robin # 仅当 SCHEDULING_ALGORITHM 为 weighted_round_robin 并且上面的渠道如果有权重,会按照加权后的顺序请求。使用加权轮训负载均衡,按照权重顺序请求拥有请求的模型的渠道。当 SCHEDULING_ALGORITHM 为 lottery 时,使用抽奖轮训负载均衡,按照权重随机请求拥有请求的模型的渠道。没设置权重的渠道自动回退到 round_robin 轮训负载均衡。
|
148 |
AUTO_RETRY: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
```
|
150 |
|
151 |
挂载配置文件并启动 uni-api docker 容器:
|
|
|
146 |
preferences:
|
147 |
SCHEDULING_ALGORITHM: weighted_round_robin # 仅当 SCHEDULING_ALGORITHM 为 weighted_round_robin 并且上面的渠道如果有权重,会按照加权后的顺序请求。使用加权轮训负载均衡,按照权重顺序请求拥有请求的模型的渠道。当 SCHEDULING_ALGORITHM 为 lottery 时,使用抽奖轮训负载均衡,按照权重随机请求拥有请求的模型的渠道。没设置权重的渠道自动回退到 round_robin 轮训负载均衡。
|
148 |
AUTO_RETRY: true
|
149 |
+
|
150 |
+
preferences: # 全局配置
|
151 |
+
model_timeout: # 模型超时时间,单位为秒,默认 100 秒,选填
|
152 |
+
gpt-4o: 10 # 模型 gpt-4o 的超时时间为 10 秒,gpt-4o 是模型名称,当请求 gpt-4o-2024-08-06 等模型时,超时时间也是 10 秒
|
153 |
+
claude-3-5-sonnet: 10 # 模型 claude-3-5-sonnet 的超时时间为 10 秒,当请求 claude-3-5-sonnet-20240620 等模型时,超时时间也是 10 秒
|
154 |
+
default: 10 # 模型没有设置超时时间,使用默认的超时时间 10 秒,当请求的不在 model_timeout 里面的模型时,超时时间默认是 10 秒,不设置 default,uni-api 会使用 环境变量 TIMEOUT 设置的默认超时时间,默认超时时间是 100 秒
|
155 |
+
o1-mini: 30 # 模型 o1-mini 的超时时间为 30 秒,当请求名字是 o1-mini 开头的模型时,超时时间是 30 秒
|
156 |
+
o1-preview: 100 # 模型 o1-preview 的超时时间为 100 秒,当请求名字是 o1-preview 开头的模型时,超时时间是 100 秒
|
157 |
```
|
158 |
|
159 |
挂载配置文件并启动 uni-api docker 容器:
|
main.py
CHANGED
@@ -29,6 +29,7 @@ import os
|
|
29 |
import string
|
30 |
import json
|
31 |
|
|
|
32 |
is_debug = bool(os.getenv("DEBUG", False))
|
33 |
# is_debug = False
|
34 |
|
@@ -97,7 +98,9 @@ async def lifespan(app: FastAPI):
|
|
97 |
|
98 |
yield
|
99 |
# 关闭时的代码
|
100 |
-
await app.state.client.aclose()
|
|
|
|
|
101 |
|
102 |
app = FastAPI(lifespan=lifespan, debug=is_debug)
|
103 |
|
@@ -493,6 +496,49 @@ app.add_middleware(
|
|
493 |
|
494 |
app.add_middleware(StatsMiddleware)
|
495 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
496 |
@app.middleware("http")
|
497 |
async def ensure_config(request: Request, call_next):
|
498 |
if not hasattr(app.state, 'config'):
|
@@ -508,6 +554,32 @@ async def ensure_config(request: Request, call_next):
|
|
508 |
else:
|
509 |
raise Exception("No admin API key found")
|
510 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
return await call_next(request)
|
512 |
|
513 |
# 在 process_request 函数中更新成功和失败计数
|
@@ -578,32 +650,51 @@ async def process_request(request: Union[RequestModel, ImageGenerationRequest, A
|
|
578 |
pass
|
579 |
else:
|
580 |
logger.info(json.dumps(payload, indent=4, ensure_ascii=False))
|
|
|
581 |
current_info = request_info.get()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
try:
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
|
|
604 |
|
605 |
-
|
606 |
-
except (Exception, HTTPException, asyncio.CancelledError, httpx.ReadError, httpx.RemoteProtocolError) as e:
|
607 |
await update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=False)
|
608 |
# await app.middleware_stack.app.update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=False)
|
609 |
|
@@ -823,25 +914,36 @@ class ModelRequestHandler:
|
|
823 |
try:
|
824 |
response = await process_request(request, provider, endpoint, token)
|
825 |
return response
|
826 |
-
except HTTPException as e:
|
827 |
-
|
828 |
-
|
829 |
-
|
830 |
-
|
831 |
-
|
832 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
833 |
else:
|
834 |
-
|
835 |
-
|
836 |
-
|
|
|
837 |
if is_debug:
|
838 |
import traceback
|
839 |
traceback.print_exc()
|
840 |
-
error_message = str(e)
|
841 |
if auto_retry:
|
842 |
continue
|
843 |
else:
|
844 |
-
raise HTTPException(status_code=
|
845 |
|
846 |
current_info = request_info.get()
|
847 |
current_info["first_response_time"] = -1
|
@@ -1155,7 +1257,7 @@ from xue.components.menubar import (
|
|
1155 |
Menubar, MenubarMenu, MenubarTrigger, MenubarContent,
|
1156 |
MenubarItem, MenubarSeparator
|
1157 |
)
|
1158 |
-
from xue.components import input, dropdown, sheet, form, button, checkbox, sidebar
|
1159 |
from xue.components.model_config_row import model_config_row
|
1160 |
# import sys
|
1161 |
# import os
|
@@ -1277,13 +1379,13 @@ sidebar_items = [
|
|
1277 |
# "value": "settings",
|
1278 |
# "hx": {"get": "/settings", "target": "#main-content"}
|
1279 |
# },
|
1280 |
-
|
1281 |
-
|
1282 |
-
|
1283 |
-
|
1284 |
-
|
1285 |
-
|
1286 |
-
|
1287 |
# {
|
1288 |
# "icon": "scroll-text",
|
1289 |
# # "label": "日志",
|
@@ -1342,6 +1444,82 @@ async def toggle_sidebar(is_collapsed: bool = False):
|
|
1342 |
active_item="dashboard"
|
1343 |
).render()
|
1344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1345 |
@frontend_router.get("/dropdown-menu/{menu_id}/{row_id}", response_class=HTMLResponse, dependencies=[Depends(frontend_rate_limit_dependency)])
|
1346 |
async def get_columns_menu(menu_id: str, row_id: str):
|
1347 |
columns = [
|
|
|
29 |
import string
|
30 |
import json
|
31 |
|
32 |
+
DEFAULT_TIMEOUT = float(os.getenv("TIMEOUT", 100))
|
33 |
is_debug = bool(os.getenv("DEBUG", False))
|
34 |
# is_debug = False
|
35 |
|
|
|
98 |
|
99 |
yield
|
100 |
# 关闭时的代码
|
101 |
+
# await app.state.client.aclose()
|
102 |
+
if hasattr(app.state, 'client_manager'):
|
103 |
+
await app.state.client_manager.close()
|
104 |
|
105 |
app = FastAPI(lifespan=lifespan, debug=is_debug)
|
106 |
|
|
|
496 |
|
497 |
app.add_middleware(StatsMiddleware)
|
498 |
|
499 |
+
class ClientManager:
|
500 |
+
def __init__(self, pool_size=100):
|
501 |
+
self.pool_size = pool_size
|
502 |
+
self.clients = {} # {timeout_value: AsyncClient}
|
503 |
+
self.locks = {} # {timeout_value: Lock}
|
504 |
+
|
505 |
+
async def init(self, default_config):
|
506 |
+
self.default_config = default_config
|
507 |
+
|
508 |
+
@asynccontextmanager
|
509 |
+
async def get_client(self, timeout_value):
|
510 |
+
# 对同一超时值的客户端加锁
|
511 |
+
if timeout_value not in self.locks:
|
512 |
+
self.locks[timeout_value] = asyncio.Lock()
|
513 |
+
|
514 |
+
async with self.locks[timeout_value]:
|
515 |
+
# 获取或创建指定超时值的客户端
|
516 |
+
if timeout_value not in self.clients:
|
517 |
+
timeout = httpx.Timeout(
|
518 |
+
connect=15.0,
|
519 |
+
read=timeout_value,
|
520 |
+
write=30.0,
|
521 |
+
pool=self.pool_size
|
522 |
+
)
|
523 |
+
self.clients[timeout_value] = httpx.AsyncClient(
|
524 |
+
timeout=timeout,
|
525 |
+
limits=httpx.Limits(max_connections=self.pool_size),
|
526 |
+
**self.default_config
|
527 |
+
)
|
528 |
+
|
529 |
+
try:
|
530 |
+
yield self.clients[timeout_value]
|
531 |
+
except Exception as e:
|
532 |
+
# 如果客户端出现问题,关闭并重新创建
|
533 |
+
await self.clients[timeout_value].aclose()
|
534 |
+
del self.clients[timeout_value]
|
535 |
+
raise e
|
536 |
+
|
537 |
+
async def close(self):
|
538 |
+
for client in self.clients.values():
|
539 |
+
await client.aclose()
|
540 |
+
self.clients.clear()
|
541 |
+
|
542 |
@app.middleware("http")
|
543 |
async def ensure_config(request: Request, call_next):
|
544 |
if not hasattr(app.state, 'config'):
|
|
|
554 |
else:
|
555 |
raise Exception("No admin API key found")
|
556 |
|
557 |
+
if app and not hasattr(app.state, 'client_manager'):
|
558 |
+
|
559 |
+
default_config = {
|
560 |
+
"headers": {
|
561 |
+
"User-Agent": "curl/7.68.0",
|
562 |
+
"Accept": "*/*",
|
563 |
+
},
|
564 |
+
"http2": True,
|
565 |
+
"verify": True,
|
566 |
+
"follow_redirects": True
|
567 |
+
}
|
568 |
+
|
569 |
+
# 初始化客户端管理器
|
570 |
+
app.state.client_manager = ClientManager(pool_size=200)
|
571 |
+
await app.state.client_manager.init(default_config)
|
572 |
+
|
573 |
+
# 存储超时配置
|
574 |
+
app.state.timeouts = {}
|
575 |
+
if app.state.config and 'preferences' in app.state.config:
|
576 |
+
for model_name, timeout_value in app.state.config['preferences'].get('model_timeout', {}).items():
|
577 |
+
app.state.timeouts[model_name] = timeout_value
|
578 |
+
if "default" not in app.state.config['preferences'].get('model_timeout', {}):
|
579 |
+
app.state.timeouts["default"] = DEFAULT_TIMEOUT
|
580 |
+
|
581 |
+
print("app.state.timeouts", app.state.timeouts)
|
582 |
+
|
583 |
return await call_next(request)
|
584 |
|
585 |
# 在 process_request 函数中更新成功和失败计数
|
|
|
650 |
pass
|
651 |
else:
|
652 |
logger.info(json.dumps(payload, indent=4, ensure_ascii=False))
|
653 |
+
|
654 |
current_info = request_info.get()
|
655 |
+
model = model_dict[request.model]
|
656 |
+
|
657 |
+
timeout_value = None
|
658 |
+
# 先尝试精确匹配
|
659 |
+
|
660 |
+
if model in app.state.timeouts:
|
661 |
+
timeout_value = app.state.timeouts[model]
|
662 |
+
else:
|
663 |
+
# 如果没有精确匹配,尝试模糊匹配
|
664 |
+
for timeout_model in app.state.timeouts:
|
665 |
+
if timeout_model in model:
|
666 |
+
timeout_value = app.state.timeouts[timeout_model]
|
667 |
+
break
|
668 |
+
|
669 |
+
# 如果都没匹配到,使用默认值
|
670 |
+
if timeout_value is None:
|
671 |
+
timeout_value = app.state.timeouts.get("default", DEFAULT_TIMEOUT)
|
672 |
+
|
673 |
try:
|
674 |
+
async with app.state.client_manager.get_client(timeout_value) as client:
|
675 |
+
if request.stream:
|
676 |
+
generator = fetch_response_stream(client, url, headers, payload, engine, model)
|
677 |
+
wrapped_generator, first_response_time = await error_handling_wrapper(generator)
|
678 |
+
response = StarletteStreamingResponse(wrapped_generator, media_type="text/event-stream")
|
679 |
+
else:
|
680 |
+
generator = fetch_response(client, url, headers, payload, engine, model)
|
681 |
+
wrapped_generator, first_response_time = await error_handling_wrapper(generator)
|
682 |
+
first_element = await anext(wrapped_generator)
|
683 |
+
first_element = first_element.lstrip("data: ")
|
684 |
+
# print("first_element", first_element)
|
685 |
+
first_element = json.loads(first_element)
|
686 |
+
response = StarletteStreamingResponse(iter([json.dumps(first_element)]), media_type="application/json")
|
687 |
+
# response = JSONResponse(first_element)
|
688 |
+
|
689 |
+
# 更新成功计数和首次响应时间
|
690 |
+
await update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=True)
|
691 |
+
# await app.middleware_stack.app.update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=True)
|
692 |
+
current_info["first_response_time"] = first_response_time
|
693 |
+
current_info["success"] = True
|
694 |
+
current_info["provider"] = provider['provider']
|
695 |
+
return response
|
696 |
|
697 |
+
except (Exception, HTTPException, asyncio.CancelledError, httpx.ReadError, httpx.RemoteProtocolError, httpx.ReadTimeout) as e:
|
|
|
698 |
await update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=False)
|
699 |
# await app.middleware_stack.app.update_channel_stats(current_info["request_id"], provider['provider'], request.model, token, success=False)
|
700 |
|
|
|
914 |
try:
|
915 |
response = await process_request(request, provider, endpoint, token)
|
916 |
return response
|
917 |
+
except (Exception, HTTPException, asyncio.CancelledError, httpx.ReadError, httpx.RemoteProtocolError, httpx.ReadTimeout) as e:
|
918 |
+
|
919 |
+
# 根据异常类型设置状态码和错误消息
|
920 |
+
if isinstance(e, httpx.ReadTimeout):
|
921 |
+
status_code = 504 # Gateway Timeout
|
922 |
+
error_message = "Request timed out"
|
923 |
+
elif isinstance(e, httpx.ReadError):
|
924 |
+
status_code = 502 # Bad Gateway
|
925 |
+
error_message = "Network read error"
|
926 |
+
elif isinstance(e, httpx.RemoteProtocolError):
|
927 |
+
status_code = 502 # Bad Gateway
|
928 |
+
error_message = "Remote protocol error"
|
929 |
+
elif isinstance(e, asyncio.CancelledError):
|
930 |
+
status_code = 499 # Client Closed Request
|
931 |
+
error_message = "Request was cancelled"
|
932 |
+
elif isinstance(e, HTTPException):
|
933 |
+
status_code = e.status_code
|
934 |
+
error_message = str(e.detail)
|
935 |
else:
|
936 |
+
status_code = 500 # Internal Server Error
|
937 |
+
error_message = str(e) or f"Unknown error: {e.__class__.__name__}"
|
938 |
+
|
939 |
+
logger.error(f"Error {status_code} with provider {provider['provider']}: {error_message}")
|
940 |
if is_debug:
|
941 |
import traceback
|
942 |
traceback.print_exc()
|
|
|
943 |
if auto_retry:
|
944 |
continue
|
945 |
else:
|
946 |
+
raise HTTPException(status_code=status_code, detail=f"Error: Current provider response failed: {error_message}")
|
947 |
|
948 |
current_info = request_info.get()
|
949 |
current_info["first_response_time"] = -1
|
|
|
1257 |
Menubar, MenubarMenu, MenubarTrigger, MenubarContent,
|
1258 |
MenubarItem, MenubarSeparator
|
1259 |
)
|
1260 |
+
from xue.components import input, dropdown, sheet, form, button, checkbox, sidebar, chart
|
1261 |
from xue.components.model_config_row import model_config_row
|
1262 |
# import sys
|
1263 |
# import os
|
|
|
1379 |
# "value": "settings",
|
1380 |
# "hx": {"get": "/settings", "target": "#main-content"}
|
1381 |
# },
|
1382 |
+
{
|
1383 |
+
"icon": "database",
|
1384 |
+
# "label": "数据",
|
1385 |
+
"label": "Data",
|
1386 |
+
"value": "data",
|
1387 |
+
"hx": {"get": "/data", "target": "#main-content"}
|
1388 |
+
},
|
1389 |
# {
|
1390 |
# "icon": "scroll-text",
|
1391 |
# # "label": "日志",
|
|
|
1444 |
active_item="dashboard"
|
1445 |
).render()
|
1446 |
|
1447 |
+
@frontend_router.get("/data", response_class=HTMLResponse, dependencies=[Depends(frontend_rate_limit_dependency)])
|
1448 |
+
async def data_page(x_api_key: str = Depends(get_api_key)):
|
1449 |
+
if not x_api_key:
|
1450 |
+
return RedirectResponse(url="/login", status_code=303)
|
1451 |
+
|
1452 |
+
if DISABLE_DATABASE:
|
1453 |
+
return HTMLResponse("数据库已禁用")
|
1454 |
+
|
1455 |
+
async with async_session() as session:
|
1456 |
+
# 计算过去24小时的开始时间
|
1457 |
+
start_time = datetime.now(timezone.utc) - timedelta(hours=24)
|
1458 |
+
|
1459 |
+
# 获取每个模型的请求数据
|
1460 |
+
model_stats = await session.execute(
|
1461 |
+
select(
|
1462 |
+
RequestStat.model,
|
1463 |
+
RequestStat.provider,
|
1464 |
+
func.count().label('count')
|
1465 |
+
)
|
1466 |
+
.where(RequestStat.timestamp >= start_time)
|
1467 |
+
.group_by(RequestStat.model, RequestStat.provider)
|
1468 |
+
.order_by(desc('count'))
|
1469 |
+
)
|
1470 |
+
model_stats = model_stats.fetchall()
|
1471 |
+
|
1472 |
+
# 处理数据以适配图表格式
|
1473 |
+
chart_data = []
|
1474 |
+
providers = list(set(stat.provider for stat in model_stats))
|
1475 |
+
models = list(set(stat.model for stat in model_stats))
|
1476 |
+
|
1477 |
+
for model in models:
|
1478 |
+
data_point = {"model": model}
|
1479 |
+
for provider in providers:
|
1480 |
+
count = next(
|
1481 |
+
(stat.count for stat in model_stats
|
1482 |
+
if stat.model == model and stat.provider == provider),
|
1483 |
+
0
|
1484 |
+
)
|
1485 |
+
data_point[provider] = count
|
1486 |
+
chart_data.append(data_point)
|
1487 |
+
|
1488 |
+
# 定义图表系列
|
1489 |
+
series = [
|
1490 |
+
{"name": provider, "data_key": provider}
|
1491 |
+
for provider in providers
|
1492 |
+
]
|
1493 |
+
|
1494 |
+
# 图表配置
|
1495 |
+
chart_config = {
|
1496 |
+
"stacked": True, # 堆叠柱状图
|
1497 |
+
"horizontal": False,
|
1498 |
+
"colors": [f"hsl({i * 360 / len(providers)}, 70%, 50%)" for i in range(len(providers))], # 生成不同的颜色
|
1499 |
+
"grid": True,
|
1500 |
+
"legend": True,
|
1501 |
+
"tooltip": True
|
1502 |
+
}
|
1503 |
+
|
1504 |
+
result = HTML(
|
1505 |
+
Head(title="数据统计"),
|
1506 |
+
Body(
|
1507 |
+
Div(
|
1508 |
+
Div(
|
1509 |
+
"模型使用统计 (24小时)",
|
1510 |
+
class_="text-2xl font-bold mb-4"
|
1511 |
+
),
|
1512 |
+
Div(
|
1513 |
+
chart.bar_chart("model-usage-chart", chart_data, "model", series, chart_config),
|
1514 |
+
class_="h-[600px]" # 设置图表高度
|
1515 |
+
),
|
1516 |
+
class_="container mx-auto p-4"
|
1517 |
+
)
|
1518 |
+
)
|
1519 |
+
).render()
|
1520 |
+
|
1521 |
+
return result
|
1522 |
+
|
1523 |
@frontend_router.get("/dropdown-menu/{menu_id}/{row_id}", response_class=HTMLResponse, dependencies=[Depends(frontend_rate_limit_dependency)])
|
1524 |
async def get_columns_menu(menu_id: str, row_id: str):
|
1525 |
columns = [
|
utils.py
CHANGED
@@ -130,23 +130,6 @@ def update_config(config_data, use_config_url=False):
|
|
130 |
|
131 |
# 读取YAML配置文件
|
132 |
async def load_config(app=None):
|
133 |
-
|
134 |
-
if app and not hasattr(app.state, 'client'):
|
135 |
-
import os
|
136 |
-
TIMEOUT = float(os.getenv("TIMEOUT", 100))
|
137 |
-
timeout = httpx.Timeout(connect=15.0, read=TIMEOUT, write=30.0, pool=30.0)
|
138 |
-
default_headers = {
|
139 |
-
"User-Agent": "curl/7.68.0", # 模拟 curl 的 User-Agent
|
140 |
-
"Accept": "*/*", # curl 的默认 Accept 头
|
141 |
-
}
|
142 |
-
app.state.client = httpx.AsyncClient(
|
143 |
-
timeout=timeout,
|
144 |
-
headers=default_headers,
|
145 |
-
http2=True, # 禁用 HTTP/2
|
146 |
-
verify=True, # 保持 SSL 验证(如需禁用,设为 False,但不建议)
|
147 |
-
follow_redirects=True, # 自动跟随重定向
|
148 |
-
)
|
149 |
-
|
150 |
try:
|
151 |
with open(API_YAML_PATH, 'r', encoding='utf-8') as file:
|
152 |
conf = yaml.load(file)
|
|
|
130 |
|
131 |
# 读取YAML配置文件
|
132 |
async def load_config(app=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
try:
|
134 |
with open(API_YAML_PATH, 'r', encoding='utf-8') as file:
|
135 |
conf = yaml.load(file)
|