Spaces:

yanolja
/

arena

Running

Kang Suhyun commited on Apr 8, 2024

Commit

2a0aa5a

unverified ·

1 Parent(s): fa7ac61

[#67] Check models at the start of the app (#68)

* [#67] Check models at the start of the app

This change adds a check for the models at the start of the app.
If the models are not available, the app will throw an error.

* Apply code review

* Update

* Refactor completion function and fix error handling

* Add verbose logging in litellm module

* Refactor model and response code

* Swap content and role order in completion messages

Files changed (3) hide show

app.py +3 -0
model.py +77 -0
response.py +18 -40

app.py CHANGED Viewed

@@ -13,6 +13,8 @@ from credentials import set_credentials
 from leaderboard import build_leaderboard
 from leaderboard import db
 from leaderboard import SUPPORTED_TRANSLATION_LANGUAGES
 import response
 from response import get_responses
@@ -189,6 +191,7 @@ with gr.Blocks(title="Arena", css=css) as app:
 if __name__ == "__main__":
   set_credentials(credentials.CREDENTIALS, credentials.CREDENTIALS_PATH)
   # We need to enable queue to use generators.
   app.queue()

 from leaderboard import build_leaderboard
 from leaderboard import db
 from leaderboard import SUPPORTED_TRANSLATION_LANGUAGES
+from model import check_models
+from model import supported_models
 import response
 from response import get_responses
 if __name__ == "__main__":
   set_credentials(credentials.CREDENTIALS, credentials.CREDENTIALS_PATH)
+  check_models(supported_models)
   # We need to enable queue to use generators.
   app.queue()

model.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+This module contains functions to interact with the models.
+"""
+import json
+import os
+from typing import List
+from google.cloud import secretmanager
+from google.oauth2 import service_account
+import litellm
+from credentials import get_credentials_json
+GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT")
+MODELS_SECRET = os.environ.get("MODELS_SECRET")
+secretmanager_client = secretmanager.SecretManagerServiceClient(
+    credentials=service_account.Credentials.from_service_account_info(
+        get_credentials_json()))
+models_secret = secretmanager_client.access_secret_version(
+    name=secretmanager_client.secret_version_path(GOOGLE_CLOUD_PROJECT,
+                                                  MODELS_SECRET, "latest"))
+decoded_secret = models_secret.payload.data.decode("UTF-8")
+supported_models_json = json.loads(decoded_secret)
+class Model:
+  def __init__(
+      self,
+      name: str,
+      provider: str = None,
+      # The JSON keys are in camelCase. To unpack these keys into
+      # Model attributes, we need to use the same camelCase names.
+      apiKey: str = None,  # pylint: disable=invalid-name
+      apiBase: str = None):  # pylint: disable=invalid-name
+    self.name = name
+    self.provider = provider
+    self.api_key = apiKey
+    self.api_base = apiBase
+supported_models: List[Model] = [
+    Model(name=model_name, **model_config)
+    for model_name, model_config in supported_models_json.items()
+]
+def completion(model: Model, messages: List, max_tokens: float = None) -> str:
+  response = litellm.completion(model=model.provider + "/" +
+                                model.name if model.provider else model.name,
+                                api_key=model.api_key,
+                                api_base=model.api_base,
+                                messages=messages,
+                                max_tokens=max_tokens)
+  return response.choices[0].message.content
+def check_models(models: List[Model]):
+  for model in models:
+    print(f"Checking model {model.name}...")
+    try:
+      completion(model=model,
+                 messages=[{
+                     "content": "Hello.",
+                     "role": "user"
+                 }],
+                 max_tokens=5)
+      print(f"Model {model.name} is available.")
+    # This check is designed to verify the availability of the models
+    # without any issues. Therefore, we need to catch all exceptions.
+    except Exception as e:  # pylint: disable=broad-except
+      raise RuntimeError(f"Model {model.name} is not available: {e}") from e

response.py CHANGED Viewed

@@ -3,32 +3,17 @@ This module contains functions for generating responses using LLMs.
 """
 import enum
-import json
-import os
 from random import sample
 from uuid import uuid4
 from firebase_admin import firestore
-from google.cloud import secretmanager
-from google.oauth2 import service_account
 import gradio as gr
-from litellm import completion
-from credentials import get_credentials_json
 from leaderboard import db
-GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT")
-MODELS_SECRET = os.environ.get("MODELS_SECRET")
-secretmanager_client = secretmanager.SecretManagerServiceClient(
-    credentials=service_account.Credentials.from_service_account_info(
-        get_credentials_json()))
-models_secret = secretmanager_client.access_secret_version(
-    name=secretmanager_client.secret_version_path(GOOGLE_CLOUD_PROJECT,
-                                                  MODELS_SECRET, "latest"))
-decoded_secret = models_secret.payload.data.decode("UTF-8")
-supported_models = json.loads(decoded_secret)
 def create_history(model_name: str, instruction: str, prompt: str,
@@ -69,42 +54,35 @@ def get_responses(user_prompt, category, source_lang, target_lang):
                                                not target_lang):
     raise gr.Error("Please select source and target languages.")
-  models = sample(list(supported_models), 2)
   instruction = get_instruction(category, source_lang, target_lang)
   responses = []
   for model in models:
-    model_config = supported_models[model]
-    model_name = model_config[
-        "provider"] + "/" + model if "provider" in model_config else model
-    api_key = model_config.get("apiKey", None)
-    api_base = model_config.get("apiBase", None)
     try:
       # TODO(#1): Allow user to set configuration.
-      response = completion(model=model_name,
-                            api_key=api_key,
-                            api_base=api_base,
                             messages=[{
-                                "content": instruction,
-                                "role": "system"
                             }, {
-                                "content": user_prompt,
-                                "role": "user"
                             }])
-      content = response.choices[0].message.content
-      create_history(model, instruction, user_prompt, content)
-      responses.append(content)
     # TODO(#1): Narrow down the exception type.
     except Exception as e:  # pylint: disable=broad-except
-      print(f"Error with model {model}: {e}")
       raise gr.Error("Failed to get response. Please try again.")
   # It simulates concurrent stream response generation.
   max_response_length = max(len(response) for response in responses)
   for i in range(max_response_length):
-    yield [response[:i + 1] for response in responses] + models + [instruction]
-  yield responses + models + [instruction]

 """
 import enum
 from random import sample
+from typing import List
 from uuid import uuid4
 from firebase_admin import firestore
 import gradio as gr
 from leaderboard import db
+from model import completion
+from model import Model
+from model import supported_models
 def create_history(model_name: str, instruction: str, prompt: str,
                                                not target_lang):
     raise gr.Error("Please select source and target languages.")
+  models: List[Model] = sample(list(supported_models), 2)
   instruction = get_instruction(category, source_lang, target_lang)
   responses = []
   for model in models:
     try:
       # TODO(#1): Allow user to set configuration.
+      response = completion(model=model,
                             messages=[{
+                                "role": "system",
+                                "content": instruction
                             }, {
+                                "role": "user",
+                                "content": user_prompt
                             }])
+      create_history(model.name, instruction, user_prompt, response)
+      responses.append(response)
     # TODO(#1): Narrow down the exception type.
     except Exception as e:  # pylint: disable=broad-except
+      print(f"Error with model {model.name}: {e}")
       raise gr.Error("Failed to get response. Please try again.")
+  model_names = [model.name for model in models]
   # It simulates concurrent stream response generation.
   max_response_length = max(len(response) for response in responses)
   for i in range(max_response_length):
+    yield [response[:i + 1] for response in responses
+          ] + model_names + [instruction]
+  yield responses + model_names + [instruction]