zzc0208 commited on
Commit
d5de47d
·
verified ·
1 Parent(s): db7e968

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +749 -752
app.py CHANGED
@@ -1,752 +1,749 @@
1
- # ruff: noqa
2
- import random
3
- import requests
4
- import io
5
- import gradio as gr
6
- import pandas as pd
7
- from PIL import Image
8
- from useapi import *
9
- from utils import *
10
-
11
-
12
- def set_interface_language(lang):
13
- if lang == "Chinese":
14
- return {
15
- "title": "# LLM角色扮演竞技场:在角色扮演场景中评估LLMs的表现",
16
- "intro": """
17
- ## 📜 规则
18
- #### · 与两个匿名的模型(例如 Claude, Gemini, Llama )同时进行角色扮演(他们会成为一个相同的角色),投票选出更好的那个
19
- #### · 你可以一直对话直到选出赢家(单轮对话上限是5轮)
20
- #### · [角色来自Rubii.ai, 想要和角色进行更长久的角色扮演?来Rubii.ai吧。](https://rubii.ai/)
21
- #### · 想要开始对话,您需要先在"选择角色"中选择一个角色,之后您需要在"选择时刻"中选择一个时刻,时刻是给角色选一个场景和开场白,这样可以与角色在想要的场景中进行对话。
22
- """,
23
- "avatar_label": "角色图片",
24
- "char_choice_label": "选择角色",
25
- "preset_prompt_label": "选择时刻",
26
- "refresh_button": "刷新角色列表",
27
- "bio_label": "输入你的自设",
28
- "bio_placeholder": "我的名字叫Rubii",
29
- "chatbox1_label": "Model 1 的答复",
30
- "chatbox2_label": "Model 2 的答复",
31
- "user_input_placeholder": "在此输入对话",
32
- "battle_button": "发送",
33
- "score_instruction": "在获得答复后可使用以下四个按钮对本轮对话打分",
34
- "model1win_button": "Model 1 效果更好",
35
- "model2win_button": "Model 2 效果更好",
36
- "tie_button": "效果一样好",
37
- "bothbad_button": "效果都不好",
38
- "result_placeholder": "结果: ",
39
- "refresh_chat_button": "刷新对话",
40
- "ranking_tab": "🏆 模型排行",
41
- "model_name_header": "模型名称",
42
- "contest_count_header": "参赛次数",
43
- "win_rate_header": "模型胜率",
44
- "random_model": "⚔️ 随机模型",
45
- "select_language": "选择语言(语言很重要,选择语言决定了角色扮演中AI用的语言)",
46
- "select_language2": "选择语言",
47
- "contant": """
48
- ## 联系我们
49
- ### 我们正在积极寻求更多模型愿意参与我们排行榜,
50
- ### 我们也在寻求合作,如果您有意的话,欢迎请联系我们。**Email:** [[email protected]](mailto:[email protected])
51
- ### 您可以在我们的[Discord](https://discord.gg/jwcTfTpYj5)反馈遇到的BUG和问题
52
- ## 服务条款
53
- ### 用户在使用服务之前需同意以下条款:
54
- ### 该服务为研究预览版。它仅提供有限的安全措施,可能会生成冒犯性内容。不得将该服务用于任何非法、有害、暴力、种族主义或性相关的目的。请勿上传任何私人信息。该服务会收集用户的对话数据,包括文字和图像,并保留在创作共用署名(CC-BY)或类似许可证下分发这些数据的权利。
55
- """,
56
- }
57
- elif lang == "English":
58
- return {
59
- "title": "# Roleplay LLM Arena: Benchmarking LLMs in the Roleplay Scenario",
60
- "intro": """
61
- ## 📜 Rules
62
- #### · Engage in roleplay with two anonymous models (e.g., Claude, Gemini, Llama) simultaneously, as they take on the same character. Vote for the one that performs better.
63
- #### · You can continue the conversation until you select a winner (the maximum number of dialogue rounds per session is 5).
64
- #### · [The character comes from Rubii.ai. Want to engage in longer roleplay sessions with the character? Come to Rubii.ai.](https://rubii.ai/)
65
- #### · To start the comparison, you need to first select a character in "Choose Character." Then, you need to select a "Moment" in "Choose Moment." A moment is used to set a scene and opening line for the character, allowing you to converse with the character in the desired scenario.
66
- """,
67
- "avatar_label": "Character Image",
68
- "char_choice_label": "Select Character",
69
- "preset_prompt_label": "Select Moment",
70
- "refresh_button": "Refresh Character List",
71
- "bio_label": "Enter your bio",
72
- "bio_placeholder": "My name is Rubii.",
73
- "chatbox1_label": "Model 1's Response",
74
- "chatbox2_label": "Model 2's Response",
75
- "user_input_placeholder": "Enter conversation here",
76
- "battle_button": "Send",
77
- "score_instruction": "Use the following four buttons to score this round after receiving the response",
78
- "model1win_button": "Model 1 is better",
79
- "model2win_button": "Model 2 is better",
80
- "tie_button": "Both are equally good",
81
- "bothbad_button": "Both are bad",
82
- "result_placeholder": "Result: ",
83
- "refresh_chat_button": "Refresh Conversation",
84
- "ranking_tab": "🏆 Model Ranking",
85
- "model_name_header": "Model Name",
86
- "contest_count_header": "Contest Count",
87
- "win_rate_header": "Win Rate",
88
- "random_model": "⚔️ Random Model",
89
- "select_language": "Select a language (Language is important; the choice of language determines the language the AI will use in the roleplay)",
90
- "select_language2": "Select a language",
91
- "contant": """
92
- ## Contact Us
93
- ### We are actively seeking more models willing to participate in our leaderboard.
94
- ### We are also looking for collaboration opportunities. If you are interested, please contact us. **Email:** [[email protected]](mailto:[email protected]).
95
- ### You can report any bugs and issues on our [Discord](https://discord.gg/jwcTfTpYj5).
96
- ## Terms of Service
97
- ### Users must agree to the following terms before using the service:
98
- ### This service is a research preview. It provides limited safety measures and may generate offensive content. The service should not be used for any illegal, harmful, violent, racist, or sexually related purposes. Please do not upload any personal information. The service collects user conversation data, including text and images, and reserves the right to distribute this data under Creative Commons Attribution (CC-BY) or similar licenses.
99
- """,
100
- }
101
- elif lang == "Japanese":
102
- return {
103
- "title": "# LLMロールプレイアリーナ:ロールプレイシナリオでのLLMのパフォーマンスを評価",
104
- "intro": """
105
- ## 📜 ルール
106
- #### · 2つの匿名モデル(例: Claude, Gemini, Llama)と同時にロールプレイを行い(彼らは同じキャラクターになります)、より良い方に投票してください。
107
- #### · 勝者が決まるまで会話を続けることができます(1ターンあたりの会話の上限は5ターンです)。
108
- #### · [キャラクターはRubii.aiから来ました。キャラクターともっと長いロールプレイをしたいですか?Rubii.aiに来てください。](https://rubii.ai/)]
109
- #### · 会話を始めるには、まず「キャラクターを選択」でキャラクターを選択し、「時刻を選択」でシーンとオープニングを選択してください。これにより、キャラクターと望むシーンで会話ができます。
110
- """,
111
- "avatar_label": "キャラクター画像",
112
- "char_choice_label": "キャラクターを選択",
113
- "preset_prompt_label": "時刻を選択",
114
- "refresh_button": "キャラクターリストを更新",
115
- "bio_label": "あなたのプロフィールを入力",
116
- "bio_placeholder": "私の名前はRubii",
117
- "chatbox1_label": "Model 1 の応答",
118
- "chatbox2_label": "Model 2 の応答",
119
- "user_input_placeholder": "ここにメッセージを入力",
120
- "battle_button": "送信",
121
- "score_instruction": "応答を受け取った後、以下の4つのボタンでこのターンの会話を評価できます",
122
- "model1win_button": "Model 1 がより良い",
123
- "model2win_button": "Model 2 がより良い",
124
- "tie_button": "同じくらい良い",
125
- "bothbad_button": "どちらも良くない",
126
- "result_placeholder": "結果: ",
127
- "refresh_chat_button": "会話を更新",
128
- "ranking_tab": "🏆 モデルランキング",
129
- "model_name_header": "モデル名",
130
- "contest_count_header": "参加回数",
131
- "win_rate_header": "モデル勝率",
132
- "random_model": "⚔️ ランダムモデル",
133
- "select_language": "言語を選択してください(言語は非常に重要です。選択した言語はロールプレイでAIが使用する言語を決定します)",
134
- "select_language2": "言語を選択してください",
135
- "contant": """
136
- ## お問い合わせ
137
- ### 私たちは、リーダーボードに参加したいモデルを積極的に探しています。
138
- ### 私たちはコラボレーションの機会も探しています。興味がある方は、ぜひご連絡ください。**メール:** [[email protected]](mailto:[email protected])。
139
- ### バグや問題が発生した場合は、[Discord](https://discord.gg/jwcTfTpYj5)で報告できます。
140
- ## 利用規約
141
- ### サービスを利用する前に、ユーザーは以下の規約に同意する必要があります:
142
- ### 本サービスは研究プレビュー版です。限られた安全対策を提供しており、攻撃的な内容を生成する可能性があります。本サービスを違法、有害、暴力的、人種差別的、または性的な目的で使用しないでください。個人情報のアップロードは避けてください。本サービスはユーザーの会話データ(テキストおよび画像)を収集し、クリエイティブ・コモンズ・ライセンス(CC-BY)または同様のライセンスの下でこれらのデータを配布する権利を保有します。
143
- """,
144
- }
145
-
146
- elif lang == "Korean":
147
- return {
148
- "title": "# LLM 역할 수행 경기장: 역할 수행 시나리오에서 LLM의 성능 평가",
149
- "intro": """
150
- ## 📜 규칙
151
- #### · 두 개의 익명의 모델(예: Claude, Gemini, Llama)과 동시에 역할 수행을 진행하고, 더 나은 모델을 선택하세요.
152
- #### · 우승자를 선택할 때까지 계속 대화를 진행할 수 있습니다(최대 5라운드).
153
- #### · [캐릭터는 Rubii.ai에서 왔습니다. 캐릭터와 더 긴 롤플레이를 하고 싶으신가요? Rubii.ai로 오세요.](https://rubii.ai/)
154
- #### · 대화를 시작하려면 먼저 "캐릭터 선택"에서 캐릭터를 선택해야 하며, 그 다음 "시나리오 선택"에서 시나리오를 선택해야 합니다. 시나리오는 캐릭터에게 장면과 오프닝을 제공하여 원하는 시나리오에서 대화를 진행할 수 있게 합니다.
155
- """,
156
- "avatar_label": "캐릭터 이미지",
157
- "char_choice_label": "캐릭터 선택",
158
- "preset_prompt_label": "시나리오 선택",
159
- "refresh_button": "캐릭터 목록 새로고침",
160
- "bio_label": "자신의 설정 입력",
161
- "bio_placeholder": "제 이름은 루비입니다.",
162
- "chatbox1_label": "Model 1의 응답",
163
- "chatbox2_label": "Model 2의 응답",
164
- "user_input_placeholder": "여기에 대화 입력",
165
- "battle_button": "보내기",
166
- "score_instruction": "응답을 받은 후 아래 네 개의 버튼을 사용하여 이번 라운드를 평가할 수 있습니다.",
167
- "model1win_button": "Model 1이 더 나음",
168
- "model2win_button": "Model 2가 더 나음",
169
- "tie_button": "똑같이 좋음",
170
- "bothbad_button": "둘 다 별로임",
171
- "result_placeholder": "결과: ",
172
- "refresh_chat_button": "대화 새로고침",
173
- "ranking_tab": "🏆 모델 순위",
174
- "model_name_header": "모델 이름",
175
- "contest_count_header": "참가 횟수",
176
- "win_rate_header": "모델 승률",
177
- "random_model": "⚔️ 랜덤 모델",
178
- "select_language": "언어를 선택하세요 (언어는 매우 중요합니다. 선택한 언어는 역할 놀이에서 AI가 사용할 언어를 결정합니다)",
179
- "select_language2": "언어를 선택하세요",
180
- "contant": """
181
- ## 문의하기
182
- ### 우리는 리더보드에 참여할 의향이 있는 모델을 적극적으로 찾고 있습니다.
183
- ### 우리는 또한 협력 기회를 모색하고 있습니다. 관심이 있으시면 연락해 주세요. **이메일:** [[email protected]](mailto:[email protected])
184
- ### 버그 및 문제는 [Discord](https://discord.gg/jwcTfTpYj5)에서 보고할 수 있습니다.
185
- ## 이용 약관
186
- ### 사용자는 서비스를 사용하기 전에 다음 약관에 동의해야 합니다:
187
- ### 이 서비스는 연구 미리보기 버전입니다. 제한된 안전 조치를 제공하며, 불쾌한 콘텐츠를 생성할 수 있습니다. 이 서비스를 불법적, 해롭거나, 폭력적이거나, 인종차별적이거나, 성적으로 관련된 목적으로 사용하지 마십시오. 개인 정보를 업로드하지 마십시오. 이 서비스는 사용자 대화 데이터(텍스트 및 이미지)를 수집하며, 크리에이티브 커먼즈 저작자 표시(CC-BY) 또는 유사한 라이선스 하에 이 데이터를 배포할 권리를 보유합니다.
188
- """,
189
- }
190
-
191
-
192
- async def run_battle(
193
- user_input,
194
- chatbox1,
195
- chatbox2,
196
- session_id1,
197
- session_id2,
198
- chat_count,
199
- bio,
200
- preset_prompt,
201
- selected_models,
202
- ):
203
- if chat_count >= 5:
204
- chatbox1 = chatbox1 + [
205
- (
206
- "您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
207
- "您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
208
- )
209
- ]
210
- chatbox2 = chatbox2 + [
211
- (
212
- "您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
213
- "您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
214
- )
215
- ]
216
- yield (
217
- chatbox1,
218
- chatbox2,
219
- selected_models[0],
220
- selected_models[1],
221
- gr.update(interactive=True),
222
- gr.update(interactive=True),
223
- gr.update(interactive=True),
224
- gr.update(interactive=True),
225
- session_id1,
226
- session_id2,
227
- chat_count,
228
- gr.update(interactive=False),
229
- gr.update(value=""),
230
- )
231
- return
232
- chat_count += 1
233
- chatbox1 = chatbox1 + [(user_input, "")]
234
- chatbox2 = chatbox2 + [(user_input, "")]
235
- yield (
236
- chatbox1,
237
- chatbox2,
238
- selected_models[0],
239
- selected_models[1],
240
- gr.update(interactive=True),
241
- gr.update(interactive=True),
242
- gr.update(interactive=True),
243
- gr.update(interactive=True),
244
- session_id1,
245
- session_id2,
246
- chat_count,
247
- gr.update(interactive=True),
248
- gr.update(value=""),
249
- )
250
- response1 = ""
251
- response2 = ""
252
- async for chunk in combine_streams(
253
- user_input,
254
- user_input,
255
- selected_models[0],
256
- selected_models[1],
257
- preset_prompt["_id"],
258
- preset_prompt["_id"],
259
- session_id1,
260
- session_id2,
261
- bio,
262
- bio,
263
- language,
264
- ):
265
- if "requestA_header" in chunk:
266
- session_id1 = chunk["requestA_header"]["x-session-id"]
267
- if "requestB_header" in chunk:
268
- session_id2 = chunk["requestB_header"]["x-session-id"]
269
- if "requestA" in chunk:
270
- response1 += chunk["requestA"]
271
- if "requestB" in chunk:
272
- response2 += chunk["requestB"]
273
- chatbox1 = chatbox1[:-1] + [(user_input, response1)]
274
- chatbox2 = chatbox2[:-1] + [(user_input, response2)]
275
- yield (
276
- chatbox1,
277
- chatbox2,
278
- selected_models[0],
279
- selected_models[1],
280
- gr.update(interactive=True),
281
- gr.update(interactive=True),
282
- gr.update(interactive=True),
283
- gr.update(interactive=True),
284
- session_id1,
285
- session_id2,
286
- chat_count,
287
- gr.update(interactive=True),
288
- gr.update(value=""),
289
- )
290
-
291
-
292
- def select_winner(model1_name, model2_name, state, turn, anony, Language):
293
- if Language == "Chinese":
294
- if state == "Model 1":
295
- result = f"感谢您的投票,你选择了 {state} - {model1_name} 效果更好,{model2_name} 效果更差,刷新以进行下一轮测试"
296
- elif state == "Model 2":
297
- result = f"感谢您的投票,你选择了 {state} - {model2_name} 效果更好,{model1_name} 效果更差,刷新以进行下一轮测试"
298
- elif state == "tie":
299
- result = f"感谢您的投票,你选择了 {model1_name} 与 {model2_name} 效果都很好,刷新以进行下一轮测试"
300
- elif state == "bothbad":
301
- result = f"感谢您的投票,你选择了 {model1_name} 与 {model2_name} 效果都不好,刷新以进行下一轮测试"
302
- elif Language == "English":
303
- if state == "Model 1":
304
- result = f"Thank you for your vote. You chose {state} - {model1_name} performed better, {model2_name} performed worse. Refresh to proceed to the next round of testing."
305
- elif state == "Model 2":
306
- result = f"Thank you for your vote. You chose {state} - {model2_name} performed better, {model1_name} performed worse. Refresh to proceed to the next round of testing."
307
- elif state == "tie":
308
- result = f"Thank you for your vote. You selected that both {model1_name} and {model2_name} performed well. Refresh to proceed to the next round of testing."
309
- elif state == "bothbad":
310
- result = f"Thank you for your vote. You chose that both {model1_name} and {model2_name} performed poorly. Refresh to proceed to the next round of testing."
311
- elif Language == "Japanese":
312
- if state == "Model 1":
313
- result = f"投票ありがとうございます。あなたは {state} - {model1_name} の方が良く、{model2_name} は劣っていると選びました。次のテストを行うにはリフレッシュしてください。"
314
- elif state == "Model 2":
315
- result = f"投票ありがとうございます。あなたは {state} - {model2_name} の方が良く、{model1_name} は劣っていると選びました。次のテストを行うにはリフレッシュしてください。"
316
- elif state == "tie":
317
- result = f"投票ありがとうございます。あなたは {model1_name} と {model2_name} の両方が良いと選びました。次のテストを行うにはリフレッシュしてください。"
318
- elif state == "bothbad":
319
- result = f"投票ありがとうございます。あなたは {model1_name} と {model2_name} の両方が良くないと選びました。次のテストを行うにはリフレッシュしてください。"
320
- elif Language == "Korean":
321
- if state == "Model 1":
322
- result = f"투표해 주셔서 감사합니다. {state} - {model1_name} 이(가) 더 좋다고 선택하셨습니다. {model2_name} 이(가) 더 나쁩니다. 다음 테스트를 위해 새로 고침하세요."
323
- elif state == "Model 2":
324
- result = f"투표해 주셔서 감사합니다. {state} - {model2_name} 이(가) 더 좋다고 선택하셨습니다. {model1_name} 이(가) 더 나쁩니다. 다음 테스트를 위해 새로 고침하세요."
325
- elif state == "tie":
326
- result = f"투표해 주셔서 감사합니다. {model1_name} 과(와) {model2_name} 둘 다 좋다고 선택하셨습니다. 다음 테스트를 위해 새로 고침하세요."
327
- elif state == "bothbad":
328
- result = f"투표해 주셔서 감사합니다. {model1_name} 과(와) {model2_name} 둘 다 나쁘다고 선택하셨습니다. 다음 테스트를 위해 새로 고침하세요."
329
- update_model_stats(model1_name, model2_name, state, turn, anony, Language)
330
- # 返回结果并让打分按钮置灰
331
- return (
332
- result,
333
- gr.update(interactive=False),
334
- gr.update(interactive=False),
335
- gr.update(interactive=False),
336
- gr.update(interactive=False),
337
- gr.update(interactive=False),
338
- )
339
-
340
-
341
- async def get_preset_prompts(char_id, language):
342
- recommand_data = await recommand(char_id, language)
343
- return [(item["name"], item) for item in recommand_data]
344
-
345
-
346
- async def update_preset_prompt(char_id, language):
347
- preset_prompts = await get_preset_prompts(char_id, language)
348
- avatar_image_url = id_to_avatar(char_id)
349
- response = requests.get(avatar_image_url)
350
- image = Image.open(io.BytesIO(response.content))
351
- resized_image = image.resize((224, 224))
352
- return gr.update(choices=preset_prompts), resized_image
353
-
354
-
355
- def update_chat_and_avatar(moment):
356
- opening = [(None, moment["opening"])]
357
- selected_models = random.sample(models, 2)
358
- while selected_models[0] == selected_models[1]:
359
- selected_models = random.sample(models, 2)
360
- print(selected_models)
361
- return opening, opening, moment["image_url"], "", "", selected_models
362
-
363
-
364
- def refresh_data(language):
365
- characters = recommand_character(language)
366
- characters = [(item["name"], item["_id"]) for item in characters]
367
- return gr.update(choices=characters)
368
-
369
-
370
- def refresh_chat(moment):
371
- chatbox1, chatbox2, avatar_image, session_id1, session_id2, selected_models = (
372
- update_chat_and_avatar(moment)
373
- )
374
- return (
375
- chatbox1,
376
- chatbox2,
377
- avatar_image,
378
- session_id1,
379
- session_id2,
380
- selected_models,
381
- gr.update(interactive=False),
382
- gr.update(interactive=False),
383
- gr.update(interactive=False),
384
- gr.update(interactive=False),
385
- gr.update(interactive=True),
386
- gr.update(value="结果:"),
387
- gr.update(value=0),
388
- )
389
-
390
-
391
- def update_language(lang):
392
- print("update_language", lang)
393
- text = set_interface_language(lang)
394
- characters = recommand_character(lang)
395
- characters = [(item["name"], item["_id"]) for item in characters]
396
- return (
397
- text["title"],
398
- text["intro"],
399
- None,
400
- gr.update(label=text["char_choice_label"], choices=characters),
401
- gr.update(label=text["preset_prompt_label"]),
402
- gr.update(value=text["refresh_button"]),
403
- gr.update(placeholder=text["bio_placeholder"], label=text["bio_label"]),
404
- gr.update(label=text["chatbox1_label"]),
405
- gr.update(label=text["chatbox2_label"]),
406
- gr.update(placeholder=text["user_input_placeholder"]),
407
- gr.update(value=text["battle_button"]),
408
- gr.update(placeholder=text["result_placeholder"]),
409
- gr.update(value=text["refresh_chat_button"]),
410
- gr.update(value=text["model1win_button"]),
411
- gr.update(value=text["model2win_button"]),
412
- gr.update(value=text["tie_button"]),
413
- gr.update(value=text["bothbad_button"]),
414
- gr.update(label=text["random_model"]),
415
- gr.update(label=text["ranking_tab"]),
416
- gr.update(label=text["select_language"], value=lang),
417
- text["score_instruction"],
418
- text["contant"],
419
- gr.update(value=lang,label=text["select_language2"])
420
- )
421
-
422
-
423
- def auto_i18n(request: gr.Request):
424
- print(request.headers["Accept-Language"])
425
- if request.headers["Accept-Language"].split(",")[0].lower().startswith("zh"):
426
- language = "Chinese"
427
- elif request.headers["Accept-Language"].split(",")[0].lower().startswith("en"):
428
- language = "English"
429
- elif request.headers["Accept-Language"].split(",")[0].lower().startswith("ja"):
430
- language = "Japanese"
431
- elif request.headers["Accept-Language"].split(",")[0].lower().startswith("ko"):
432
- language = "Korean"
433
- else:
434
- language = "Chinese"
435
- return language
436
-
437
-
438
- def init_and_update(request: gr.Request):
439
- detected_lang = auto_i18n(request)
440
- return [detected_lang] + list(update_language(detected_lang))
441
- def passive_language_change(lang):
442
- return gr.update(value=lang)
443
-
444
-
445
- with gr.Blocks() as demo:
446
- # load 的时候就会刷新掉default_language
447
- default_language = gr.State("Chinese")
448
- language = "Chinese"
449
- characters = recommand_character(language)
450
- characters = [(item["name"], item["_id"]) for item in characters]
451
- text = set_interface_language(default_language.value)
452
- models = get_models()
453
- with gr.Tab(text["random_model"]) as random_model_tab:
454
- with gr.Column():
455
- title = gr.Markdown(f"{text['title']}")
456
- with gr.Column(scale=10):
457
- intro = gr.Markdown(f"{text['intro']}")
458
- with gr.Column(scale=1):
459
- language = gr.Radio(
460
- ["English", "Chinese", "Japanese", "Korean"],
461
- label=text["select_language"],
462
- value=default_language.value,
463
- )
464
- with gr.Row():
465
- with gr.Column(scale=1):
466
- avatar_image = gr.Image(scale=1, label=text["avatar_label"])
467
- with gr.Column(scale=7):
468
- with gr.Row():
469
- char_choice = gr.Dropdown(
470
- choices=characters,
471
- label=text["char_choice_label"],
472
- scale=3,
473
- )
474
- preset_prompt = gr.Dropdown(
475
- label=text["preset_prompt_label"], scale=3
476
- )
477
- refresh_button = gr.Button(
478
- text["refresh_button"], scale=1, variant="primary"
479
- )
480
- with gr.Row():
481
- bio = gr.Textbox(
482
- show_label=True,
483
- label=text["bio_label"],
484
- placeholder=text["bio_placeholder"],
485
- )
486
- with gr.Row():
487
- chatbox1 = gr.Chatbot(label=text["chatbox1_label"])
488
- chatbox2 = gr.Chatbot(label=text["chatbox2_label"])
489
- with gr.Row():
490
- user_input = gr.Textbox(
491
- placeholder=text["user_input_placeholder"], scale=3, show_label=False
492
- )
493
- battle_button = gr.Button(text["battle_button"], scale=1, variant="primary")
494
- with gr.Column():
495
- score_instruction = gr.Markdown(f"{text['score_instruction']}")
496
- with gr.Row():
497
- model1win_button = gr.Button(
498
- text["model1win_button"], variant="primary", interactive=False
499
- )
500
- model2win_button = gr.Button(
501
- text["model2win_button"], variant="primary", interactive=False
502
- )
503
- tie_button = gr.Button(text["tie_button"], interactive=False)
504
- bothbad_button = gr.Button(text["bothbad_button"], interactive=False)
505
- with gr.Row():
506
- result_output = gr.Textbox(
507
- placeholder=text["result_placeholder"], scale=3, show_label=False
508
- )
509
- refresh_chat_button = gr.Button(
510
- text["refresh_chat_button"], variant="secondary", scale=1
511
- )
512
-
513
- with gr.Tab(text["ranking_tab"]) as ranking_tab:
514
- language2 = gr.Radio(
515
- ["English", "Chinese", "Japanese", "Korean"],
516
- label=text["select_language"],
517
- value=default_language.value,
518
- )
519
- gr.DataFrame(
520
- load_dataframe,
521
- datatype=["str", "str", "str", "str", "str"],
522
- every=gr.Timer(10),
523
- )
524
- # 插入 CSS 样式,用于隐藏底部的“通过 API 使用”链接
525
- gr.HTML("""
526
- <style>
527
- footer {display: none !important;}
528
- </style>
529
- """)
530
- with gr.Row():
531
- with gr.Column(scale=5):
532
- contant = gr.Markdown(f"{text['contant']}")
533
- with gr.Column(scale=1):
534
- gr.Image("group.jpg")
535
-
536
- selected_models = gr.State([])
537
- model1_state = gr.State("")
538
- model2_state = gr.State("")
539
- chat_count = gr.State(0)
540
- session_id1 = gr.State("")
541
- session_id2 = gr.State("")
542
-
543
- refresh_button.click(fn=refresh_data, inputs=language, outputs=char_choice)
544
- refresh_chat_button.click(
545
- fn=refresh_chat,
546
- inputs=[preset_prompt],
547
- outputs=[
548
- chatbox1,
549
- chatbox2,
550
- avatar_image,
551
- session_id1,
552
- session_id2,
553
- selected_models,
554
- model1win_button,
555
- model2win_button,
556
- tie_button,
557
- bothbad_button,
558
- battle_button,
559
- result_output,
560
- chat_count,
561
- ],
562
- )
563
- language.change(
564
- fn=update_language,
565
- inputs=language,
566
- outputs=[
567
- title,
568
- intro,
569
- avatar_image,
570
- char_choice,
571
- preset_prompt,
572
- refresh_button,
573
- bio,
574
- chatbox1,
575
- chatbox2,
576
- user_input,
577
- battle_button,
578
- result_output,
579
- refresh_chat_button,
580
- model1win_button,
581
- model2win_button,
582
- tie_button,
583
- bothbad_button,
584
- random_model_tab,
585
- ranking_tab,
586
- language,
587
- score_instruction,
588
- contant,
589
- language2
590
- ],
591
- )
592
- language2.change(
593
- fn=passive_language_change,
594
- inputs=language2,
595
- outputs=language
596
- )
597
- char_choice.change(
598
- fn=update_preset_prompt,
599
- inputs=[char_choice, language],
600
- outputs=[preset_prompt, avatar_image],
601
- )
602
- preset_prompt.change(
603
- fn=update_chat_and_avatar,
604
- inputs=[preset_prompt],
605
- outputs=[
606
- chatbox1,
607
- chatbox2,
608
- avatar_image,
609
- session_id1,
610
- session_id2,
611
- selected_models,
612
- ],
613
- )
614
- model1win_button.click(
615
- fn=select_winner,
616
- inputs=[
617
- model1_state,
618
- model2_state,
619
- gr.State("Model 1"),
620
- chat_count,
621
- gr.State(True),
622
- language,
623
- ],
624
- outputs=[
625
- result_output,
626
- model1win_button,
627
- model2win_button,
628
- tie_button,
629
- bothbad_button,
630
- battle_button,
631
- ],
632
- )
633
- model2win_button.click(
634
- fn=select_winner,
635
- inputs=[
636
- model1_state,
637
- model2_state,
638
- gr.State("Model 2"),
639
- chat_count,
640
- gr.State(True),
641
- language,
642
- ],
643
- outputs=[
644
- result_output,
645
- model1win_button,
646
- model2win_button,
647
- tie_button,
648
- bothbad_button,
649
- battle_button,
650
- ],
651
- )
652
- tie_button.click(
653
- fn=select_winner,
654
- inputs=[
655
- model1_state,
656
- model2_state,
657
- gr.State("tie"),
658
- chat_count,
659
- gr.State(True),
660
- language,
661
- ],
662
- outputs=[
663
- result_output,
664
- model1win_button,
665
- model2win_button,
666
- tie_button,
667
- bothbad_button,
668
- battle_button,
669
- ],
670
- )
671
- bothbad_button.click(
672
- fn=select_winner,
673
- inputs=[
674
- model1_state,
675
- model2_state,
676
- gr.State("bothbad"),
677
- chat_count,
678
- gr.State(True),
679
- language,
680
- ],
681
- outputs=[
682
- result_output,
683
- model1win_button,
684
- model2win_button,
685
- tie_button,
686
- bothbad_button,
687
- battle_button,
688
- ],
689
- )
690
- battle_button.click(
691
- run_battle,
692
- inputs=[
693
- user_input,
694
- chatbox1,
695
- chatbox2,
696
- session_id1,
697
- session_id2,
698
- chat_count,
699
- bio,
700
- preset_prompt,
701
- selected_models,
702
- ],
703
- outputs=[
704
- chatbox1,
705
- chatbox2,
706
- model1_state,
707
- model2_state,
708
- model1win_button,
709
- model2win_button,
710
- tie_button,
711
- bothbad_button,
712
- session_id1,
713
- session_id2,
714
- chat_count,
715
- battle_button,
716
- user_input,
717
- ],
718
- )
719
- demo.load(
720
- init_and_update,
721
- outputs=[
722
- default_language,
723
- title,
724
- intro,
725
- avatar_image,
726
- char_choice,
727
- preset_prompt,
728
- refresh_button,
729
- bio,
730
- chatbox1,
731
- chatbox2,
732
- user_input,
733
- battle_button,
734
- result_output,
735
- refresh_chat_button,
736
- model1win_button,
737
- model2win_button,
738
- tie_button,
739
- bothbad_button,
740
- random_model_tab,
741
- ranking_tab,
742
- language,
743
- score_instruction,
744
- contant,
745
- ],
746
- )
747
-
748
- if __name__ == "__main__":
749
- demo.queue(default_concurrency_limit=8).launch(
750
- server_name="0.0.0.0",
751
- server_port=7860,
752
- )
 
1
+ # ruff: noqa
2
+ import random
3
+ import requests
4
+ import io
5
+ import gradio as gr
6
+ import pandas as pd
7
+ from PIL import Image
8
+ from useapi import *
9
+ from utils import *
10
+
11
+
12
+ def set_interface_language(lang):
13
+ if lang == "Chinese":
14
+ return {
15
+ "title": "# LLM角色扮演竞技场:在角色扮演场景中评估LLMs的表现",
16
+ "intro": """
17
+ ## 📜 规则
18
+ #### · 与两个匿名的模型(例如 Claude, Gemini, Llama )同时进行角色扮演(他们会成为一个相同的角色),投票选出更好的那个
19
+ #### · 你可以一直对话直到选出赢家(单轮对话上限是5轮)
20
+ #### · [角色来自Rubii.ai, 想要和角色进行更长久的角色扮演?来Rubii.ai吧。](https://rubii.ai/)
21
+ #### · 想要开始对话,您需要先在"选择角色"中选择一个角色,之后您需要在"选择时刻"中选择一个时刻,时刻是给角色选一个场景和开场白,这样可以与角色在想要的场景中进行对话。
22
+ """,
23
+ "avatar_label": "角色图片",
24
+ "char_choice_label": "选择角色",
25
+ "preset_prompt_label": "选择时刻",
26
+ "refresh_button": "刷新角色列表",
27
+ "bio_label": "输入你的自设",
28
+ "bio_placeholder": "我的名字叫Rubii",
29
+ "chatbox1_label": "Model 1 的答复",
30
+ "chatbox2_label": "Model 2 的答复",
31
+ "user_input_placeholder": "在此输入对话",
32
+ "battle_button": "发送",
33
+ "score_instruction": "在获得答复后可使用以下四个按钮对本轮对话打分",
34
+ "model1win_button": "Model 1 效果更好",
35
+ "model2win_button": "Model 2 效果更好",
36
+ "tie_button": "效果一样好",
37
+ "bothbad_button": "效果都不好",
38
+ "result_placeholder": "结果: ",
39
+ "refresh_chat_button": "刷新对话",
40
+ "ranking_tab": "🏆 模型排行",
41
+ "model_name_header": "模型名称",
42
+ "contest_count_header": "参赛次数",
43
+ "win_rate_header": "模型胜率",
44
+ "random_model": "⚔️ 随机模型",
45
+ "select_language": "选择语言(语言很重要,选择语言决定了角色扮演中AI用的语言)",
46
+ "select_language2": "选择语言",
47
+ "contant": """
48
+ ## 联系我们
49
+ ### 我们正在积极寻求更多模型愿意参与我们排行榜,
50
+ ### 我们也在寻求合作,如果您有意的话,欢迎请联系我们。**Email:** [[email protected]](mailto:[email protected])
51
+ ### 您可以在我们的[Discord](https://discord.gg/jwcTfTpYj5)反馈遇到的BUG和问题
52
+ ## 服务条款
53
+ ### 用户在使用服务之前需同意以下条款:
54
+ ### 该服务为研究预览版。它仅提供有限的安全措施,可能会生成冒犯性内容。不得将该服务用于任何非法、有害、暴力、种族主义或性相关的目的。请勿上传任何私人信息。该服务会收集用户的对话数据,包括文字和图像,并保留在创作共用署名(CC-BY)或类似许可证下分发这些数据的权利。
55
+ """,
56
+ }
57
+ elif lang == "English":
58
+ return {
59
+ "title": "# Roleplay LLM Arena: Benchmarking LLMs in the Roleplay Scenario",
60
+ "intro": """
61
+ ## 📜 Rules
62
+ #### · Engage in roleplay with two anonymous models (e.g., Claude, Gemini, Llama) simultaneously, as they take on the same character. Vote for the one that performs better.
63
+ #### · You can continue the conversation until you select a winner (the maximum number of dialogue rounds per session is 5).
64
+ #### · [The character comes from Rubii.ai. Want to engage in longer roleplay sessions with the character? Come to Rubii.ai.](https://rubii.ai/)
65
+ #### · To start the comparison, you need to first select a character in "Choose Character." Then, you need to select a "Moment" in "Choose Moment." A moment is used to set a scene and opening line for the character, allowing you to converse with the character in the desired scenario.
66
+ """,
67
+ "avatar_label": "Character Image",
68
+ "char_choice_label": "Select Character",
69
+ "preset_prompt_label": "Select Moment",
70
+ "refresh_button": "Refresh Character List",
71
+ "bio_label": "Enter your bio",
72
+ "bio_placeholder": "My name is Rubii.",
73
+ "chatbox1_label": "Model 1's Response",
74
+ "chatbox2_label": "Model 2's Response",
75
+ "user_input_placeholder": "Enter conversation here",
76
+ "battle_button": "Send",
77
+ "score_instruction": "Use the following four buttons to score this round after receiving the response",
78
+ "model1win_button": "Model 1 is better",
79
+ "model2win_button": "Model 2 is better",
80
+ "tie_button": "Both are equally good",
81
+ "bothbad_button": "Both are bad",
82
+ "result_placeholder": "Result: ",
83
+ "refresh_chat_button": "Refresh Conversation",
84
+ "ranking_tab": "🏆 Model Ranking",
85
+ "model_name_header": "Model Name",
86
+ "contest_count_header": "Contest Count",
87
+ "win_rate_header": "Win Rate",
88
+ "random_model": "⚔️ Random Model",
89
+ "select_language": "Select a language (Language is important; the choice of language determines the language the AI will use in the roleplay)",
90
+ "select_language2": "Select a language",
91
+ "contant": """
92
+ ## Contact Us
93
+ ### We are actively seeking more models willing to participate in our leaderboard.
94
+ ### We are also looking for collaboration opportunities. If you are interested, please contact us. **Email:** [[email protected]](mailto:[email protected]).
95
+ ### You can report any bugs and issues on our [Discord](https://discord.gg/jwcTfTpYj5).
96
+ ## Terms of Service
97
+ ### Users must agree to the following terms before using the service:
98
+ ### This service is a research preview. It provides limited safety measures and may generate offensive content. The service should not be used for any illegal, harmful, violent, racist, or sexually related purposes. Please do not upload any personal information. The service collects user conversation data, including text and images, and reserves the right to distribute this data under Creative Commons Attribution (CC-BY) or similar licenses.
99
+ """,
100
+ }
101
+ elif lang == "Japanese":
102
+ return {
103
+ "title": "# LLMロールプレイアリーナ:ロールプレイシナリオでのLLMのパフォーマンスを評価",
104
+ "intro": """
105
+ ## 📜 ルール
106
+ #### · 2つの匿名モデル(例: Claude, Gemini, Llama)と同時にロールプレイを行い(彼らは同じキャラクターになります)、より良い方に投票してください。
107
+ #### · 勝者が決まるまで会話を続けることができます(1ターンあたりの会話の上限は5ターンです)。
108
+ #### · [キャラクターはRubii.aiから来ました。キャラクターともっと長いロールプレイをしたいですか?Rubii.aiに来てください。](https://rubii.ai/)]
109
+ #### · 会話を始めるには、まず「キャラクターを選択」でキャラクターを選択し、「時刻を選択」でシーンとオープニングを選択してください。これにより、キャラクターと望むシーンで会話ができます。
110
+ """,
111
+ "avatar_label": "キャラクター画像",
112
+ "char_choice_label": "キャラクターを選択",
113
+ "preset_prompt_label": "時刻を選択",
114
+ "refresh_button": "キャラクターリストを更新",
115
+ "bio_label": "あなたのプロフィールを入力",
116
+ "bio_placeholder": "私の名前はRubii",
117
+ "chatbox1_label": "Model 1 の応答",
118
+ "chatbox2_label": "Model 2 の応答",
119
+ "user_input_placeholder": "ここにメッセージを入力",
120
+ "battle_button": "送信",
121
+ "score_instruction": "応答を受け取った後、以下の4つのボタンでこのターンの会話を評価できます",
122
+ "model1win_button": "Model 1 がより良い",
123
+ "model2win_button": "Model 2 がより良い",
124
+ "tie_button": "同じくらい良い",
125
+ "bothbad_button": "どちらも良くない",
126
+ "result_placeholder": "結果: ",
127
+ "refresh_chat_button": "会話を更新",
128
+ "ranking_tab": "🏆 モデルランキング",
129
+ "model_name_header": "モデル名",
130
+ "contest_count_header": "参加回数",
131
+ "win_rate_header": "モデル勝率",
132
+ "random_model": "⚔️ ランダムモデル",
133
+ "select_language": "言語を選択してください(言語は非常に重要です。選択した言語はロールプレイでAIが使用する言語を決定します)",
134
+ "select_language2": "言語を選択してください",
135
+ "contant": """
136
+ ## お問い合わせ
137
+ ### 私たちは、リーダーボードに参加したいモデルを積極的に探しています。
138
+ ### 私たちはコラボレーションの機会も探しています。興味がある方は、ぜひご連絡ください。**メール:** [[email protected]](mailto:[email protected])。
139
+ ### バグや問題が発生した場合は、[Discord](https://discord.gg/jwcTfTpYj5)で報告できます。
140
+ ## 利用規約
141
+ ### サービスを利用する前に、ユーザーは以下の規��に同意する必要があります:
142
+ ### 本サービスは研究プレビュー版です。限られた安全対策を提供しており、攻撃的な内容を生成する可能性があります。本サービスを違法、有害、暴力的、人種差別的、または性的な目的で使用しないでください。個人情報のアップロードは避けてください。本サービスはユーザーの会話データ(テキストおよび画像)を収集し、クリエイティブ・コモンズ・ライセンス(CC-BY)または同様のライセンスの下でこれらのデータを配布する権利を保有します。
143
+ """,
144
+ }
145
+
146
+ elif lang == "Korean":
147
+ return {
148
+ "title": "# LLM 역할 수행 경기장: 역할 수행 시나리오에서 LLM의 성능 평가",
149
+ "intro": """
150
+ ## 📜 규칙
151
+ #### · 두 개의 익명의 모델(예: Claude, Gemini, Llama)과 동시에 역할 수행을 진행하고, 더 나은 모델을 선택하세요.
152
+ #### · 우승자를 선택할 때까지 계속 대화를 진행할 수 있습니다(최대 5라운드).
153
+ #### · [캐릭터는 Rubii.ai에서 왔습니다. 캐릭터와 더 긴 롤플레이를 하고 싶으신가요? Rubii.ai로 오세요.](https://rubii.ai/)
154
+ #### · 대화를 시작하려면 먼저 "캐릭터 선택"에서 캐릭터를 선택해야 하며, 그 다음 "시나리오 선택"에서 시나리오를 선택해야 합니다. 시나리오는 캐릭터에게 장면과 오프닝을 제공하여 원하는 시나리오에서 대화를 진행할 수 있게 합니다.
155
+ """,
156
+ "avatar_label": "캐릭터 이미지",
157
+ "char_choice_label": "캐릭터 선택",
158
+ "preset_prompt_label": "시나리오 선택",
159
+ "refresh_button": "캐릭터 목록 새로고침",
160
+ "bio_label": "자신의 설정 입력",
161
+ "bio_placeholder": "제 이름은 루비입니다.",
162
+ "chatbox1_label": "Model 1의 응답",
163
+ "chatbox2_label": "Model 2의 응답",
164
+ "user_input_placeholder": "여기에 대화 입력",
165
+ "battle_button": "보내기",
166
+ "score_instruction": "응답을 받은 후 아래 네 개의 버튼을 사용하여 이번 라운드를 평가할 수 있습니다.",
167
+ "model1win_button": "Model 1이 더 나음",
168
+ "model2win_button": "Model 2가 더 나음",
169
+ "tie_button": "똑같이 좋음",
170
+ "bothbad_button": "둘 다 별로임",
171
+ "result_placeholder": "결과: ",
172
+ "refresh_chat_button": "대화 새로고침",
173
+ "ranking_tab": "🏆 모델 순위",
174
+ "model_name_header": "모델 이름",
175
+ "contest_count_header": "참가 횟수",
176
+ "win_rate_header": "모델 승률",
177
+ "random_model": "⚔️ 랜덤 모델",
178
+ "select_language": "언어를 선택하세요 (언어는 매우 중요합니다. 선택한 언어는 역할 놀이에서 AI가 사용할 언어를 결정합니다)",
179
+ "select_language2": "언어를 선택하세요",
180
+ "contant": """
181
+ ## 문의하기
182
+ ### 우리는 리더보드에 참여할 의향이 있는 모델을 적극적으로 찾고 있습니다.
183
+ ### 우리는 또한 협력 기회를 모색하고 있습니다. 관심이 있으시면 연락해 주세요. **이메일:** [[email protected]](mailto:[email protected])
184
+ ### 버그 및 문제는 [Discord](https://discord.gg/jwcTfTpYj5)에서 보고할 수 있습니다.
185
+ ## 이용 약관
186
+ ### 사용자는 서비스를 사용하기 전에 다음 약관에 동의해야 합니다:
187
+ ### 이 서비스는 연구 미리보기 버전입니다. 제한된 안전 조치를 제공하며, 불쾌한 콘텐츠를 생성할 수 있습니다. 이 서비스를 불법적, 해롭거나, 폭력적이거나, 인종차별적이거나, 성적으로 관련된 목적으로 사용하지 마십시오. 개인 정보를 업로드하지 마십시오. 이 서비스는 사용자 대화 데이터(텍스트 및 이미지)를 수집하며, 크리에이티브 커먼즈 저작자 표시(CC-BY) 또는 유사한 라이선스 하에 이 데이터를 배포할 권리를 보유합니다.
188
+ """,
189
+ }
190
+
191
+
192
+ async def run_battle(
193
+ user_input,
194
+ chatbox1,
195
+ chatbox2,
196
+ session_id1,
197
+ session_id2,
198
+ chat_count,
199
+ bio,
200
+ preset_prompt,
201
+ selected_models,
202
+ ):
203
+ if chat_count >= 5:
204
+ chatbox1 = chatbox1 + [
205
+ (
206
+ "您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
207
+ "您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
208
+ )
209
+ ]
210
+ chatbox2 = chatbox2 + [
211
+ (
212
+ "您已经在此体验了多次模型效果了,前往 rubii.ai 继续对���吧",
213
+ "您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
214
+ )
215
+ ]
216
+ yield (
217
+ chatbox1,
218
+ chatbox2,
219
+ selected_models[0],
220
+ selected_models[1],
221
+ gr.update(interactive=True),
222
+ gr.update(interactive=True),
223
+ gr.update(interactive=True),
224
+ gr.update(interactive=True),
225
+ session_id1,
226
+ session_id2,
227
+ chat_count,
228
+ gr.update(interactive=False),
229
+ gr.update(value=""),
230
+ )
231
+ return
232
+ chat_count += 1
233
+ chatbox1 = chatbox1 + [(user_input, "")]
234
+ chatbox2 = chatbox2 + [(user_input, "")]
235
+ yield (
236
+ chatbox1,
237
+ chatbox2,
238
+ selected_models[0],
239
+ selected_models[1],
240
+ gr.update(interactive=True),
241
+ gr.update(interactive=True),
242
+ gr.update(interactive=True),
243
+ gr.update(interactive=True),
244
+ session_id1,
245
+ session_id2,
246
+ chat_count,
247
+ gr.update(interactive=True),
248
+ gr.update(value=""),
249
+ )
250
+ response1 = ""
251
+ response2 = ""
252
+ async for chunk in combine_streams(
253
+ user_input,
254
+ user_input,
255
+ selected_models[0],
256
+ selected_models[1],
257
+ preset_prompt["_id"],
258
+ preset_prompt["_id"],
259
+ session_id1,
260
+ session_id2,
261
+ bio,
262
+ bio,
263
+ language,
264
+ ):
265
+ if "requestA_header" in chunk:
266
+ session_id1 = chunk["requestA_header"]["x-session-id"]
267
+ if "requestB_header" in chunk:
268
+ session_id2 = chunk["requestB_header"]["x-session-id"]
269
+ if "requestA" in chunk:
270
+ response1 += chunk["requestA"]
271
+ if "requestB" in chunk:
272
+ response2 += chunk["requestB"]
273
+ chatbox1 = chatbox1[:-1] + [(user_input, response1)]
274
+ chatbox2 = chatbox2[:-1] + [(user_input, response2)]
275
+ yield (
276
+ chatbox1,
277
+ chatbox2,
278
+ selected_models[0],
279
+ selected_models[1],
280
+ gr.update(interactive=True),
281
+ gr.update(interactive=True),
282
+ gr.update(interactive=True),
283
+ gr.update(interactive=True),
284
+ session_id1,
285
+ session_id2,
286
+ chat_count,
287
+ gr.update(interactive=True),
288
+ gr.update(value=""),
289
+ )
290
+
291
+
292
+ def select_winner(model1_name, model2_name, state, turn, anony, Language):
293
+ if Language == "Chinese":
294
+ if state == "Model 1":
295
+ result = f"感谢您的投票,你选择了 {state} - {model1_name} 效果更好,{model2_name} 效果更差,刷新以进行下一轮测试"
296
+ elif state == "Model 2":
297
+ result = f"感谢您的投票,你选择了 {state} - {model2_name} 效果更好,{model1_name} 效果更差,刷新以进行下一轮测试"
298
+ elif state == "tie":
299
+ result = f"感谢您的投票,你选择了 {model1_name} 与 {model2_name} 效果都很好,刷新以进行下一轮测试"
300
+ elif state == "bothbad":
301
+ result = f"感谢您的投票,你选择了 {model1_name} 与 {model2_name} 效果都不好,刷新以进行下一轮测试"
302
+ elif Language == "English":
303
+ if state == "Model 1":
304
+ result = f"Thank you for your vote. You chose {state} - {model1_name} performed better, {model2_name} performed worse. Refresh to proceed to the next round of testing."
305
+ elif state == "Model 2":
306
+ result = f"Thank you for your vote. You chose {state} - {model2_name} performed better, {model1_name} performed worse. Refresh to proceed to the next round of testing."
307
+ elif state == "tie":
308
+ result = f"Thank you for your vote. You selected that both {model1_name} and {model2_name} performed well. Refresh to proceed to the next round of testing."
309
+ elif state == "bothbad":
310
+ result = f"Thank you for your vote. You chose that both {model1_name} and {model2_name} performed poorly. Refresh to proceed to the next round of testing."
311
+ elif Language == "Japanese":
312
+ if state == "Model 1":
313
+ result = f"投票ありがとうございます。あなたは {state} - {model1_name} の方が良く、{model2_name} は劣っていると選びました。次のテストを行うにはリフレッシュしてください。"
314
+ elif state == "Model 2":
315
+ result = f"投票ありがとうございます。あなたは {state} - {model2_name} の方が良く、{model1_name} は劣っていると選びました。次のテストを行うにはリフレッシュしてください。"
316
+ elif state == "tie":
317
+ result = f"投票ありがとうございます。あなたは {model1_name} と {model2_name} の両方が良いと選びました。次のテストを行うにはリフレッシュしてください。"
318
+ elif state == "bothbad":
319
+ result = f"投票ありがとうございます。あなたは {model1_name} と {model2_name} の両方が良く��いと選びました。次のテストを行うにはリフレッシュしてください。"
320
+ elif Language == "Korean":
321
+ if state == "Model 1":
322
+ result = f"투표해 주셔서 감사합니다. {state} - {model1_name} 이(가) 더 좋다고 선택하셨습니다. {model2_name} 이(가) 더 나쁩니다. 다음 테스트를 위해 새로 고침하세요."
323
+ elif state == "Model 2":
324
+ result = f"투표해 주셔서 감사합니다. {state} - {model2_name} 이(가) 더 좋다고 선택하셨습니다. {model1_name} 이(가) 더 나쁩니다. 다음 테스트를 위해 새로 고침하세요."
325
+ elif state == "tie":
326
+ result = f"투표해 주셔서 감사합니다. {model1_name} 과(와) {model2_name} 둘 다 좋다고 선택하셨습니다. 다음 테스트를 위해 새로 고침하세요."
327
+ elif state == "bothbad":
328
+ result = f"투표해 주셔서 감사합니다. {model1_name} 과(와) {model2_name} 둘 다 나쁘다고 선택하셨습니다. 다음 테스트를 위해 새로 고침하세요."
329
+ update_model_stats(model1_name, model2_name, state, turn, anony, Language)
330
+ # 返回结果并让打分按钮置灰
331
+ return (
332
+ result,
333
+ gr.update(interactive=False),
334
+ gr.update(interactive=False),
335
+ gr.update(interactive=False),
336
+ gr.update(interactive=False),
337
+ gr.update(interactive=False),
338
+ )
339
+
340
+
341
+ async def get_preset_prompts(char_id, language):
342
+ recommand_data = await recommand(char_id, language)
343
+ return [(item["name"], item) for item in recommand_data]
344
+
345
+
346
+ async def update_preset_prompt(char_id, language):
347
+ preset_prompts = await get_preset_prompts(char_id, language)
348
+ avatar_image_url = id_to_avatar(char_id)
349
+ response = requests.get(avatar_image_url)
350
+ image = Image.open(io.BytesIO(response.content))
351
+ resized_image = image.resize((224, 224))
352
+ return gr.update(choices=preset_prompts), resized_image
353
+
354
+
355
+ def update_chat_and_avatar(moment):
356
+ opening = [(None, moment["opening"])]
357
+ selected_models = random.sample(models, 2)
358
+ while selected_models[0] == selected_models[1]:
359
+ selected_models = random.sample(models, 2)
360
+ print(selected_models)
361
+ return opening, opening, moment["image_url"], "", "", selected_models
362
+
363
+
364
+ def refresh_data(language):
365
+ characters = recommand_character(language)
366
+ characters = [(item["name"], item["_id"]) for item in characters]
367
+ return gr.update(choices=characters)
368
+
369
+
370
+ def refresh_chat(moment):
371
+ chatbox1, chatbox2, avatar_image, session_id1, session_id2, selected_models = (
372
+ update_chat_and_avatar(moment)
373
+ )
374
+ return (
375
+ chatbox1,
376
+ chatbox2,
377
+ avatar_image,
378
+ session_id1,
379
+ session_id2,
380
+ selected_models,
381
+ gr.update(interactive=False),
382
+ gr.update(interactive=False),
383
+ gr.update(interactive=False),
384
+ gr.update(interactive=False),
385
+ gr.update(interactive=True),
386
+ gr.update(value="结果:"),
387
+ gr.update(value=0),
388
+ )
389
+
390
+
391
+ def update_language(lang):
392
+ print("update_language", lang)
393
+ text = set_interface_language(lang)
394
+ characters = recommand_character(lang)
395
+ characters = [(item["name"], item["_id"]) for item in characters]
396
+ return (
397
+ text["title"],
398
+ text["intro"],
399
+ None,
400
+ gr.update(label=text["char_choice_label"], choices=characters),
401
+ gr.update(label=text["preset_prompt_label"]),
402
+ gr.update(value=text["refresh_button"]),
403
+ gr.update(placeholder=text["bio_placeholder"], label=text["bio_label"]),
404
+ gr.update(label=text["chatbox1_label"]),
405
+ gr.update(label=text["chatbox2_label"]),
406
+ gr.update(placeholder=text["user_input_placeholder"]),
407
+ gr.update(value=text["battle_button"]),
408
+ gr.update(placeholder=text["result_placeholder"]),
409
+ gr.update(value=text["refresh_chat_button"]),
410
+ gr.update(value=text["model1win_button"]),
411
+ gr.update(value=text["model2win_button"]),
412
+ gr.update(value=text["tie_button"]),
413
+ gr.update(value=text["bothbad_button"]),
414
+ gr.update(label=text["random_model"]),
415
+ gr.update(label=text["ranking_tab"]),
416
+ gr.update(label=text["select_language"], value=lang),
417
+ text["score_instruction"],
418
+ text["contant"],
419
+ gr.update(value=lang,label=text["select_language2"])
420
+ )
421
+
422
+
423
+ def auto_i18n(request: gr.Request):
424
+ print(request.headers["Accept-Language"])
425
+ if request.headers["Accept-Language"].split(",")[0].lower().startswith("zh"):
426
+ language = "Chinese"
427
+ elif request.headers["Accept-Language"].split(",")[0].lower().startswith("en"):
428
+ language = "English"
429
+ elif request.headers["Accept-Language"].split(",")[0].lower().startswith("ja"):
430
+ language = "Japanese"
431
+ elif request.headers["Accept-Language"].split(",")[0].lower().startswith("ko"):
432
+ language = "Korean"
433
+ else:
434
+ language = "Chinese"
435
+ return language
436
+
437
+
438
+ def init_and_update(request: gr.Request):
439
+ detected_lang = auto_i18n(request)
440
+ return [detected_lang] + list(update_language(detected_lang))
441
+ def passive_language_change(lang):
442
+ return gr.update(value=lang)
443
+
444
+
445
+ with gr.Blocks() as demo:
446
+ # load 的时候就会刷新掉default_language
447
+ default_language = gr.State("Chinese")
448
+ language = "Chinese"
449
+ characters = recommand_character(language)
450
+ characters = [(item["name"], item["_id"]) for item in characters]
451
+ text = set_interface_language(default_language.value)
452
+ models = get_models()
453
+ with gr.Tab(text["random_model"]) as random_model_tab:
454
+ with gr.Column():
455
+ title = gr.Markdown(f"{text['title']}")
456
+ with gr.Column(scale=10):
457
+ intro = gr.Markdown(f"{text['intro']}")
458
+ with gr.Column(scale=1):
459
+ language = gr.Radio(
460
+ ["English", "Chinese", "Japanese", "Korean"],
461
+ label=text["select_language"],
462
+ value=default_language.value,
463
+ )
464
+ with gr.Row():
465
+ with gr.Column(scale=1):
466
+ avatar_image = gr.Image(scale=1, label=text["avatar_label"])
467
+ with gr.Column(scale=7):
468
+ with gr.Row():
469
+ char_choice = gr.Dropdown(
470
+ choices=characters,
471
+ label=text["char_choice_label"],
472
+ scale=3,
473
+ )
474
+ preset_prompt = gr.Dropdown(
475
+ label=text["preset_prompt_label"], scale=3
476
+ )
477
+ refresh_button = gr.Button(
478
+ text["refresh_button"], scale=1, variant="primary"
479
+ )
480
+ with gr.Row():
481
+ bio = gr.Textbox(
482
+ show_label=True,
483
+ label=text["bio_label"],
484
+ placeholder=text["bio_placeholder"],
485
+ )
486
+ with gr.Row():
487
+ chatbox1 = gr.Chatbot(label=text["chatbox1_label"])
488
+ chatbox2 = gr.Chatbot(label=text["chatbox2_label"])
489
+ with gr.Row():
490
+ user_input = gr.Textbox(
491
+ placeholder=text["user_input_placeholder"], scale=3, show_label=False
492
+ )
493
+ battle_button = gr.Button(text["battle_button"], scale=1, variant="primary")
494
+ with gr.Column():
495
+ score_instruction = gr.Markdown(f"{text['score_instruction']}")
496
+ with gr.Row():
497
+ model1win_button = gr.Button(
498
+ text["model1win_button"], variant="primary", interactive=False
499
+ )
500
+ model2win_button = gr.Button(
501
+ text["model2win_button"], variant="primary", interactive=False
502
+ )
503
+ tie_button = gr.Button(text["tie_button"], interactive=False)
504
+ bothbad_button = gr.Button(text["bothbad_button"], interactive=False)
505
+ with gr.Row():
506
+ result_output = gr.Textbox(
507
+ placeholder=text["result_placeholder"], scale=3, show_label=False
508
+ )
509
+ refresh_chat_button = gr.Button(
510
+ text["refresh_chat_button"], variant="secondary", scale=1
511
+ )
512
+
513
+ with gr.Tab(text["ranking_tab"]) as ranking_tab:
514
+ language2 = gr.Radio(
515
+ ["English", "Chinese", "Japanese", "Korean"],
516
+ label=text["select_language"],
517
+ value=default_language.value,
518
+ )
519
+ gr.DataFrame(
520
+ load_dataframe,
521
+ datatype=["str", "str", "str", "str", "str"],
522
+ every=gr.Timer(10),
523
+ )
524
+ # 插入 CSS 样式,用于隐藏底部的“通过 API 使用”链接
525
+ gr.HTML("""
526
+ <style>
527
+ footer {display: none !important;}
528
+ </style>
529
+ """)
530
+ with gr.Row():
531
+ with gr.Column(scale=5):
532
+ contant = gr.Markdown(f"{text['contant']}")
533
+ with gr.Column(scale=1):
534
+ gr.Image("group.jpg")
535
+
536
+ selected_models = gr.State([])
537
+ model1_state = gr.State("")
538
+ model2_state = gr.State("")
539
+ chat_count = gr.State(0)
540
+ session_id1 = gr.State("")
541
+ session_id2 = gr.State("")
542
+
543
+ refresh_button.click(fn=refresh_data, inputs=language, outputs=char_choice)
544
+ refresh_chat_button.click(
545
+ fn=refresh_chat,
546
+ inputs=[preset_prompt],
547
+ outputs=[
548
+ chatbox1,
549
+ chatbox2,
550
+ avatar_image,
551
+ session_id1,
552
+ session_id2,
553
+ selected_models,
554
+ model1win_button,
555
+ model2win_button,
556
+ tie_button,
557
+ bothbad_button,
558
+ battle_button,
559
+ result_output,
560
+ chat_count,
561
+ ],
562
+ )
563
+ language.change(
564
+ fn=update_language,
565
+ inputs=language,
566
+ outputs=[
567
+ title,
568
+ intro,
569
+ avatar_image,
570
+ char_choice,
571
+ preset_prompt,
572
+ refresh_button,
573
+ bio,
574
+ chatbox1,
575
+ chatbox2,
576
+ user_input,
577
+ battle_button,
578
+ result_output,
579
+ refresh_chat_button,
580
+ model1win_button,
581
+ model2win_button,
582
+ tie_button,
583
+ bothbad_button,
584
+ random_model_tab,
585
+ ranking_tab,
586
+ language,
587
+ score_instruction,
588
+ contant,
589
+ language2
590
+ ],
591
+ )
592
+ language2.change(
593
+ fn=passive_language_change,
594
+ inputs=language2,
595
+ outputs=language
596
+ )
597
+ char_choice.change(
598
+ fn=update_preset_prompt,
599
+ inputs=[char_choice, language],
600
+ outputs=[preset_prompt, avatar_image],
601
+ )
602
+ preset_prompt.change(
603
+ fn=update_chat_and_avatar,
604
+ inputs=[preset_prompt],
605
+ outputs=[
606
+ chatbox1,
607
+ chatbox2,
608
+ avatar_image,
609
+ session_id1,
610
+ session_id2,
611
+ selected_models,
612
+ ],
613
+ )
614
+ model1win_button.click(
615
+ fn=select_winner,
616
+ inputs=[
617
+ model1_state,
618
+ model2_state,
619
+ gr.State("Model 1"),
620
+ chat_count,
621
+ gr.State(True),
622
+ language,
623
+ ],
624
+ outputs=[
625
+ result_output,
626
+ model1win_button,
627
+ model2win_button,
628
+ tie_button,
629
+ bothbad_button,
630
+ battle_button,
631
+ ],
632
+ )
633
+ model2win_button.click(
634
+ fn=select_winner,
635
+ inputs=[
636
+ model1_state,
637
+ model2_state,
638
+ gr.State("Model 2"),
639
+ chat_count,
640
+ gr.State(True),
641
+ language,
642
+ ],
643
+ outputs=[
644
+ result_output,
645
+ model1win_button,
646
+ model2win_button,
647
+ tie_button,
648
+ bothbad_button,
649
+ battle_button,
650
+ ],
651
+ )
652
+ tie_button.click(
653
+ fn=select_winner,
654
+ inputs=[
655
+ model1_state,
656
+ model2_state,
657
+ gr.State("tie"),
658
+ chat_count,
659
+ gr.State(True),
660
+ language,
661
+ ],
662
+ outputs=[
663
+ result_output,
664
+ model1win_button,
665
+ model2win_button,
666
+ tie_button,
667
+ bothbad_button,
668
+ battle_button,
669
+ ],
670
+ )
671
+ bothbad_button.click(
672
+ fn=select_winner,
673
+ inputs=[
674
+ model1_state,
675
+ model2_state,
676
+ gr.State("bothbad"),
677
+ chat_count,
678
+ gr.State(True),
679
+ language,
680
+ ],
681
+ outputs=[
682
+ result_output,
683
+ model1win_button,
684
+ model2win_button,
685
+ tie_button,
686
+ bothbad_button,
687
+ battle_button,
688
+ ],
689
+ )
690
+ battle_button.click(
691
+ run_battle,
692
+ inputs=[
693
+ user_input,
694
+ chatbox1,
695
+ chatbox2,
696
+ session_id1,
697
+ session_id2,
698
+ chat_count,
699
+ bio,
700
+ preset_prompt,
701
+ selected_models,
702
+ ],
703
+ outputs=[
704
+ chatbox1,
705
+ chatbox2,
706
+ model1_state,
707
+ model2_state,
708
+ model1win_button,
709
+ model2win_button,
710
+ tie_button,
711
+ bothbad_button,
712
+ session_id1,
713
+ session_id2,
714
+ chat_count,
715
+ battle_button,
716
+ user_input,
717
+ ],
718
+ )
719
+ demo.load(
720
+ init_and_update,
721
+ outputs=[
722
+ default_language,
723
+ title,
724
+ intro,
725
+ avatar_image,
726
+ char_choice,
727
+ preset_prompt,
728
+ refresh_button,
729
+ bio,
730
+ chatbox1,
731
+ chatbox2,
732
+ user_input,
733
+ battle_button,
734
+ result_output,
735
+ refresh_chat_button,
736
+ model1win_button,
737
+ model2win_button,
738
+ tie_button,
739
+ bothbad_button,
740
+ random_model_tab,
741
+ ranking_tab,
742
+ language,
743
+ score_instruction,
744
+ contant,
745
+ ],
746
+ )
747
+
748
+ if __name__ == "__main__":
749
+ demo.queue(default_concurrency_limit=8).launch()