diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..aa96e1820a29689a9163b6a202ac15bf5d81fd8e --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +ENVIRONMENT=development +HF_TOKEN=xxx +HF_HOME=.cache diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 3b6ab3cb9f296564e0dc782b3473700bbc89190d..0000000000000000000000000000000000000000 --- a/.gitattributes +++ /dev/null @@ -1,36 +0,0 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text -scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text -gif.gif filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 4249057d6718f3acdda39a5efbf7a32599f4a522..08e57889adbc8cb31f2809bb3232c4f42e283a21 100644 --- a/.gitignore +++ b/.gitignore @@ -1,22 +1,45 @@ -venv/ -.venv/ -__pycache__/ -.env -.ipynb_checkpoints -*ipynb -.vscode/ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +__pycache__ +.cache/ + +# dependencies + +frontend/node_modules +/.pnp +.pnp.js + +# testing + +/coverage + +# production + +/build + +# misc + .DS_Store -.ruff_cache/ -.python-version -.profile_app.python -*pstats -*.lock - -eval-queue/ -eval-results/ -dynamic-info/ -downloads/ -model-votes/ -open-llm-leaderboard___contents/ - -src/assets/model_counts.html +.env.local +.env.development.local +.env.test.local +.env.production.local + +npm-debug.log* +yarn-debug.log* +yarn-error.log\* + +src/dataframe.json + +yarn.lock +package-lock.json + +/public + +.claudesync/ + +# Environment variables +.env +.env.* +!.env.example + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 0710dad252bda2ac9fd5b7e4e2e4dc0afeff43cf..0000000000000000000000000000000000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -default_language_version: - python: python3 - -ci: - autofix_prs: true - autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions' - autoupdate_schedule: quarterly - -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 - hooks: - - id: check-yaml - - id: check-case-conflict - - id: detect-private-key - - id: check-added-large-files - args: ['--maxkb=1000'] - - id: requirements-txt-fixer - - id: end-of-file-fixer - - id: trailing-whitespace - - - repo: https://github.com/PyCQA/isort - rev: 5.12.0 - hooks: - - id: isort - name: Format imports - - - repo: https://github.com/psf/black - rev: 22.12.0 - hooks: - - id: black - name: Format code - additional_dependencies: ['click==8.0.2'] - - - repo: https://github.com/charliermarsh/ruff-pre-commit - # Ruff version. - rev: 'v0.0.267' - hooks: - - id: ruff diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..d79f28215d40c83a9e623b7dec58aa019da6d91a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,63 @@ +# Build frontend +FROM node:18 as frontend-build +WORKDIR /app +COPY frontend/package*.json ./ +RUN npm install +COPY frontend/ ./ + +RUN npm run build + +# Build backend +FROM python:3.9-slim +WORKDIR /app + +# Create non-root user +RUN useradd -m -u 1000 user + +# Install poetry +RUN pip install poetry + +# Create and configure cache directory +RUN mkdir -p /app/.cache && \ + chown -R user:user /app + +# Copy and install backend dependencies +COPY backend/pyproject.toml backend/poetry.lock* ./ +RUN poetry config virtualenvs.create false \ + && poetry install --no-interaction --no-ansi --no-root --only main + +# Copy backend code +COPY backend/ . + +# Install Node.js and npm +RUN apt-get update && apt-get install -y \ + curl \ + netcat-openbsd \ + && curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \ + && apt-get install -y nodejs \ + && rm -rf /var/lib/apt/lists/* + +# Copy frontend server and build +COPY --from=frontend-build /app/build ./frontend/build +COPY --from=frontend-build /app/package*.json ./frontend/ +COPY --from=frontend-build /app/server.js ./frontend/ + +# Install frontend production dependencies +WORKDIR /app/frontend +RUN npm install --production +WORKDIR /app + +# Environment variables +ENV HF_HOME=/app/.cache \ + TRANSFORMERS_CACHE=/app/.cache \ + HF_DATASETS_CACHE=/app/.cache \ + INTERNAL_API_PORT=7861 \ + PORT=7860 \ + NODE_ENV=production + +# Note: HF_TOKEN should be provided at runtime, not build time +USER user +EXPOSE 7860 + +# Start both servers with wait-for +CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"] \ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index a99bb53049b7022e7de973aeb72e3b9740c45436..0000000000000000000000000000000000000000 --- a/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -.PHONY: style format quality all - -# Applies code style fixes to the specified file or directory -style: - @echo "Applying style fixes to $(file)" - ruff format $(file) - ruff check --fix $(file) --line-length 119 - -# Checks code quality for the specified file or directory -quality: - @echo "Checking code quality for $(file)" - ruff check $(file) --line-length 119 - -# Applies PEP8 formatting and checks the entire codebase -all: - @echo "Formatting and checking the entire codebase" - ruff format . - ruff check --fix . --line-length 119 diff --git a/README.md b/README.md index 38baa0ab70407aacc0c1a23d8fbb6abef254662f..27a351d18d4f377ad9b0a5eb46c2f8f4b895505f 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,85 @@ --- title: Open LLM Leaderboard 2 emoji: 🏆 -colorFrom: green -colorTo: indigo -sdk: gradio -sdk_version: 4.44.0 -app_file: app.py +colorFrom: blue +colorTo: red +sdk: docker +hf_oauth: true pinned: true license: apache-2.0 duplicated_from: open-llm-leaderboard/open_llm_leaderboard -fullWidth: true -startup_duration_timeout: 1h -hf_oauth: true -space_ci: - private: true - secrets: - - HF_TOKEN - - WEBHOOK_SECRET tags: - leaderboard short_description: Track, rank and evaluate open LLMs and chatbots --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference \ No newline at end of file +# Open LLM Leaderboard + +Modern React interface for comparing Large Language Models (LLMs) in an open and reproducible way. + +## Features + +- 📊 Interactive table with advanced sorting and filtering +- 🔍 Semantic model search +- 📌 Pin models for comparison +- 📱 Responsive and modern interface +- 🎨 Dark/Light mode +- ⚡️ Optimized performance with virtualization + +## Architecture + +The project is split into two main parts: + +### Frontend (React) + +``` +frontend/ +├── src/ +│ ├── components/ # Reusable UI components +│ ├── pages/ # Application pages +│ ├── hooks/ # Custom React hooks +│ ├── context/ # React contexts +│ └── constants/ # Constants and configurations +├── public/ # Static assets +└── server.js # Express server for production +``` + +### Backend (FastAPI) + +``` +backend/ +├── app/ +│ ├── api/ # API router and endpoints +│ │ └── endpoints/ # Specific API endpoints +│ ├── core/ # Core functionality +│ ├── config/ # Configuration +│ └── services/ # Business logic services +│ ├── leaderboard.py +│ ├── models.py +│ ├── votes.py +│ └── hf_service.py +└── utils/ # Utility functions +``` + +## Technologies + +### Frontend + +- React +- Material-UI +- TanStack Table & Virtual +- Express.js + +### Backend + +- FastAPI +- Hugging Face API +- Docker + +## Development + +The application is containerized using Docker and can be run using: + +```bash +docker-compose up +``` diff --git a/app.py b/app.py deleted file mode 100644 index 26e87c58351ac092351fc4e4ba243af0cd3e8eda..0000000000000000000000000000000000000000 --- a/app.py +++ /dev/null @@ -1,492 +0,0 @@ -import logging -import time -import schedule -import datetime -import gradio as gr -from threading import Thread -import datasets -from huggingface_hub import snapshot_download, WebhooksServer, WebhookPayload, RepoCard -from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns -from apscheduler.schedulers.background import BackgroundScheduler - -# Start ephemeral Spaces on PRs (see config in README.md) -from gradio_space_ci.webhook import IS_EPHEMERAL_SPACE, SPACE_ID, configure_space_ci - -from src.display.about import ( - CITATION_BUTTON_LABEL, - CITATION_BUTTON_TEXT, - EVALUATION_QUEUE_TEXT, - INTRODUCTION_TEXT, - TITLE, -) -from src.display.css_html_js import custom_css -from src.display.utils import ( - BENCHMARK_COLS, - COLS, - EVAL_COLS, - EVAL_TYPES, - AutoEvalColumn, - ModelType, - Precision, - WeightType, - fields, - EvalQueueColumn -) -from src.envs import ( - API, - EVAL_REQUESTS_PATH, - AGGREGATED_REPO, - HF_TOKEN, - QUEUE_REPO, - REPO_ID, - VOTES_REPO, - VOTES_PATH, - HF_HOME, -) -from src.populate import get_evaluation_queue_df, get_leaderboard_df -from src.submission.submit import add_new_eval -from src.voting.vote_system import VoteManager, run_scheduler - -# Configure logging -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") - -# Start ephemeral Spaces on PRs (see config in README.md) -from gradio_space_ci.webhook import IS_EPHEMERAL_SPACE, SPACE_ID, configure_space_ci - -# Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set. -# This controls whether a full initialization should be performed. -DO_FULL_INIT = True # os.getenv("LEADERBOARD_FULL_INIT", "True") == "True" -NEW_DATA_ON_LEADERBOARD = True -LEADERBOARD_DF = None - -def restart_space(): - logging.info(f"Restarting space with repo ID: {REPO_ID}") - try: - # Check if new data is pending and download if necessary - if NEW_DATA_ON_LEADERBOARD: - logging.info("Fetching latest leaderboard data before restart.") - get_latest_data_leaderboard() - - # Now restart the space - API.restart_space(repo_id=REPO_ID, token=HF_TOKEN) - logging.info("Space restarted successfully.") - except Exception as e: - logging.error(f"Failed to restart space: {e}") - -def time_diff_wrapper(func): - def wrapper(*args, **kwargs): - start_time = time.time() - result = func(*args, **kwargs) - end_time = time.time() - diff = end_time - start_time - logging.info(f"Time taken for {func.__name__}: {diff} seconds") - return result - - return wrapper - - -@time_diff_wrapper -def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5): - """Download dataset with exponential backoff retries.""" - attempt = 0 - while attempt < max_attempts: - try: - logging.info(f"Downloading {repo_id} to {local_dir}") - snapshot_download( - repo_id=repo_id, - local_dir=local_dir, - repo_type=repo_type, - tqdm_class=None, - etag_timeout=30, - max_workers=8, - ) - logging.info("Download successful") - return - except Exception as e: - wait_time = backoff_factor**attempt - logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s") - time.sleep(wait_time) - attempt += 1 - raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts") - -def get_latest_data_leaderboard(leaderboard_initial_df=None): - global NEW_DATA_ON_LEADERBOARD - global LEADERBOARD_DF - if NEW_DATA_ON_LEADERBOARD: - logging.info("Leaderboard updated at reload!") - try: - leaderboard_dataset = datasets.load_dataset( - AGGREGATED_REPO, - "default", - split="train", - cache_dir=HF_HOME, - download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data - verification_mode="no_checks" - ) - LEADERBOARD_DF = get_leaderboard_df( - leaderboard_dataset=leaderboard_dataset, - cols=COLS, - benchmark_cols=BENCHMARK_COLS, - ) - logging.info("Leaderboard dataset successfully downloaded.") - except Exception as e: - logging.error(f"Failed to download leaderboard dataset: {e}") - return - - # Reset the flag after successful download - NEW_DATA_ON_LEADERBOARD = False - else: - LEADERBOARD_DF = leaderboard_initial_df - logging.info("Using cached leaderboard dataset.") - return LEADERBOARD_DF - - -def get_latest_data_queue(): - eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS) - return eval_queue_dfs - -def init_space(): - """Initializes the application space, loading only necessary data.""" - global NEW_DATA_ON_LEADERBOARD - NEW_DATA_ON_LEADERBOARD = True # Ensure new data is always pulled on restart - - if DO_FULL_INIT: - # These downloads only occur on full initialization - try: - download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH) - download_dataset(VOTES_REPO, VOTES_PATH) - except Exception: - restart_space() - - # Always redownload the leaderboard DataFrame - global LEADERBOARD_DF - LEADERBOARD_DF = get_latest_data_leaderboard() - - # Evaluation queue DataFrame retrieval is independent of initialization detail level - eval_queue_dfs = get_latest_data_queue() - - return LEADERBOARD_DF, eval_queue_dfs - -# Initialize VoteManager -vote_manager = VoteManager(VOTES_PATH, EVAL_REQUESTS_PATH, VOTES_REPO) - - -# Schedule the upload_votes method to run every 15 minutes -schedule.every(15).minutes.do(vote_manager.upload_votes) - -# Start the scheduler in a separate thread -scheduler_thread = Thread(target=run_scheduler, args=(vote_manager,), daemon=True) -scheduler_thread.start() - -# Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable. -# This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag. -LEADERBOARD_DF, eval_queue_dfs = init_space() -finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs - - -def init_leaderboard(dataframe): - if dataframe is None or dataframe.empty: - raise ValueError("Leaderboard DataFrame is empty or None.") - return Leaderboard( - value=dataframe, - datatype=[c.type for c in fields(AutoEvalColumn)], - select_columns=SelectColumns( - default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], - cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy], - label="Select Columns to Display:", - ), - search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.fullname.name, AutoEvalColumn.license.name], - hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], - filter_columns=[ - ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"), - ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"), - ColumnFilter( - AutoEvalColumn.params.name, - type="slider", - min=0.01, - max=150, - label="Select the number of parameters (B)", - ), - ColumnFilter( - AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True - ), - ColumnFilter( - AutoEvalColumn.merged.name, type="boolean", label="Merge/MoErge", default=False - ), - ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False), - ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True), - ColumnFilter(AutoEvalColumn.official_providers.name, type="boolean", label="Show only official providers", default=False), - ], - bool_checkboxgroup_label="Hide models", - interactive=False, - ) - -main_block = gr.Blocks(css=custom_css) -with main_block: - with gr.Row(elem_id="header-row"): - gr.HTML(TITLE) - - gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") - - with gr.Tabs(elem_classes="tab-buttons") as tabs: - with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): - leaderboard = init_leaderboard(LEADERBOARD_DF) - - with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5): - with gr.Column(): - with gr.Row(): - gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") - - with gr.Row(): - gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text") - login_button = gr.LoginButton(elem_id="oauth-button") - - with gr.Row(): - with gr.Column(): - model_name_textbox = gr.Textbox(label="Model name") - revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="latest") - with gr.Row(): - model_type = gr.Dropdown( - choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown], - label="Model type", - multiselect=False, - value=ModelType.FT.to_str(" : "), - interactive=True, - ) - chat_template_toggle = gr.Checkbox( - label="Use chat template", - value=False, - info="Is your model a chat model?", - ) - - with gr.Column(): - precision = gr.Dropdown( - choices=[i.value.name for i in Precision if i != Precision.Unknown], - label="Precision", - multiselect=False, - value="float16", - interactive=True, - ) - weight_type = gr.Dropdown( - choices=[i.value.name for i in WeightType], - label="Weights type", - multiselect=False, - value=WeightType.Original.value.name, - interactive=True, - ) - base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)", interactive=False) - - with gr.Column(): - with gr.Accordion( - f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", - open=False, - ): - with gr.Row(): - finished_eval_table = gr.components.Dataframe( - value=finished_eval_queue_df, - headers=EVAL_COLS, - datatype=EVAL_TYPES, - row_count=5, - interactive=False, - ) - with gr.Accordion( - f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", - open=False, - ): - with gr.Row(): - running_eval_table = gr.components.Dataframe( - value=running_eval_queue_df, - headers=EVAL_COLS, - datatype=EVAL_TYPES, - row_count=5, - interactive=False, - ) - - with gr.Accordion( - f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", - open=False, - ): - with gr.Row(): - pending_eval_table = gr.components.Dataframe( - value=pending_eval_queue_df, - headers=EVAL_COLS, - datatype=EVAL_TYPES, - row_count=5, - interactive=False, - ) - - submit_button = gr.Button("Submit Eval") - submission_result = gr.Markdown() - - # The chat template checkbox update function - def update_chat_checkbox(model_type_value): - return ModelType.from_str(model_type_value) == ModelType.chat - - model_type.change( - fn=update_chat_checkbox, - inputs=[model_type], # Pass the current checkbox value - outputs=chat_template_toggle, - ) - - # The base_model_name_textbox interactivity and value reset function - def update_base_model_name_textbox(weight_type_value): - # Convert the dropdown value back to the corresponding WeightType Enum - weight_type_enum = WeightType[weight_type_value] - - # Determine if the textbox should be interactive - interactive = weight_type_enum in [WeightType.Adapter, WeightType.Delta] - - # Reset the value if weight type is "Original" - reset_value = "" if not interactive else None - - return gr.update(interactive=interactive, value=reset_value) - - weight_type.change( - fn=update_base_model_name_textbox, - inputs=[weight_type], - outputs=[base_model_name_textbox], - ) - - submit_button.click( - add_new_eval, - [ - model_name_textbox, - base_model_name_textbox, - revision_name_textbox, - precision, - weight_type, - model_type, - chat_template_toggle, - ], - submission_result, - ) - - # Ensure the values in 'pending_eval_queue_df' are correct and ready for the DataFrame component - with gr.TabItem("🆙 Model Vote"): - with gr.Row(): - gr.Markdown( - "## Vote for the models which should be evaluated first! \nYou'll need to sign in with the button above first. All votes are recorded.", - elem_classes="markdown-text" - ) - login_button = gr.LoginButton(elem_id="oauth-button") - - - with gr.Row(): - pending_models = pending_eval_queue_df[EvalQueueColumn.model_name.name].to_list() - - with gr.Column(): - selected_model = gr.Dropdown( - choices=pending_models, - label="Models", - multiselect=False, - value="str", - interactive=True, - ) - - vote_button = gr.Button("Vote", variant="primary") - - with gr.Row(): - with gr.Accordion( - f"Available models pending ({len(pending_eval_queue_df)})", - open=True, - ): - with gr.Row(): - pending_eval_table_votes = gr.components.Dataframe( - value=vote_manager.create_request_vote_df( - pending_eval_queue_df - ), - headers=EVAL_COLS, - datatype=EVAL_TYPES, - row_count=5, - interactive=False - ) - - # Set the click event for the vote button - vote_button.click( - vote_manager.add_vote, - inputs=[selected_model, pending_eval_table], - outputs=[pending_eval_table_votes] - ) - - - with gr.Row(): - with gr.Accordion("📙 Citation", open=False): - citation_button = gr.Textbox( - value=CITATION_BUTTON_TEXT, - label=CITATION_BUTTON_LABEL, - lines=20, - elem_id="citation-button", - show_copy_button=True, - ) - - main_block.load(fn=get_latest_data_leaderboard, inputs=[leaderboard], outputs=[leaderboard]) - leaderboard.change(fn=get_latest_data_queue, inputs=None, outputs=[finished_eval_table, running_eval_table, pending_eval_table]) - pending_eval_table.change(fn=vote_manager.create_request_vote_df, inputs=[pending_eval_table], outputs=[pending_eval_table_votes]) - -main_block.queue(default_concurrency_limit=40) - - -def enable_space_ci_and_return_server(ui: gr.Blocks) -> WebhooksServer: - # Taken from https://huggingface.co/spaces/Wauplin/gradio-space-ci/blob/075119aee75ab5e7150bf0814eec91c83482e790/src/gradio_space_ci/webhook.py#L61 - # Compared to original, this one do not monkeypatch Gradio which allows us to define more webhooks. - # ht to Lucain! - if SPACE_ID is None: - print("Not in a Space: Space CI disabled.") - return WebhooksServer(ui=main_block) - - if IS_EPHEMERAL_SPACE: - print("In an ephemeral Space: Space CI disabled.") - return WebhooksServer(ui=main_block) - - card = RepoCard.load(repo_id_or_path=SPACE_ID, repo_type="space") - config = card.data.get("space_ci", {}) - print(f"Enabling Space CI with config from README: {config}") - - return configure_space_ci( - blocks=ui, - trusted_authors=config.get("trusted_authors"), - private=config.get("private", "auto"), - variables=config.get("variables", "auto"), - secrets=config.get("secrets"), - hardware=config.get("hardware"), - storage=config.get("storage"), - ) - -# Create webhooks server (with CI url if in Space and not ephemeral) -webhooks_server = enable_space_ci_and_return_server(ui=main_block) - -# Add webhooks -@webhooks_server.add_webhook -def update_leaderboard(payload: WebhookPayload) -> None: - """Redownloads the leaderboard dataset each time it updates""" - if payload.repo.type == "dataset" and payload.event.action == "update": - global NEW_DATA_ON_LEADERBOARD - logging.info("New data detected, downloading updated leaderboard dataset.") - - # Mark the flag for new data - NEW_DATA_ON_LEADERBOARD = True - - # Now actually download the latest data immediately - get_latest_data_leaderboard() - -# The below code is not used at the moment, as we can manage the queue file locally -LAST_UPDATE_QUEUE = datetime.datetime.now() -@webhooks_server.add_webhook -def update_queue(payload: WebhookPayload) -> None: - """Redownloads the queue dataset each time it updates""" - if payload.repo.type == "dataset" and payload.event.action == "update": - current_time = datetime.datetime.now() - global LAST_UPDATE_QUEUE - if current_time - LAST_UPDATE_QUEUE > datetime.timedelta(minutes=10): - print("Would have updated the queue") - # We only redownload is last update was more than 10 minutes ago, as the queue is - # updated regularly and heavy to download - download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH) - LAST_UPDATE_QUEUE = datetime.datetime.now() - -webhooks_server.launch() - -scheduler = BackgroundScheduler() -scheduler.add_job(restart_space, "interval", hours=1) # Restart every 1h -logging.info("Scheduler initialized to restart space every 1 hour.") -scheduler.start() \ No newline at end of file diff --git a/backend/Dockerfile.dev b/backend/Dockerfile.dev new file mode 100644 index 0000000000000000000000000000000000000000..5b2bd73b0585d1bc1332699c62cc2ff11f2b8032 --- /dev/null +++ b/backend/Dockerfile.dev @@ -0,0 +1,25 @@ +FROM python:3.9-slim + +WORKDIR /app + +# Install required system dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Install poetry +RUN pip install poetry + +# Copy Poetry configuration files +COPY pyproject.toml poetry.lock* ./ + +# Install dependencies +RUN poetry config virtualenvs.create false && \ + poetry install --no-interaction --no-ansi --no-root + +# Environment variables configuration for logs +ENV PYTHONUNBUFFERED=1 +ENV LOG_LEVEL=INFO + +# In dev, mount volume directly +CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"] \ No newline at end of file diff --git a/backend/README.md b/backend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c20ccbd93d20e85f7724880eb0f4ab22cf9067f8 --- /dev/null +++ b/backend/README.md @@ -0,0 +1,352 @@ +# Backend - Open LLM Leaderboard 🏆 + +FastAPI backend for the Open LLM Leaderboard. This service is part of a larger architecture that includes a React frontend. For complete project installation, see the [main README](../README.md). + +## ✨ Features + +- 📊 REST API for LLM models leaderboard management +- 🗳️ Voting and ranking system +- 🔄 HuggingFace Hub integration +- 🚀 Caching and performance optimizations + +## 🏗 Architecture + +```mermaid +flowchart TD + Client(["**Frontend**

React Application"]) --> API["**API Server**

FastAPI REST Endpoints"] + + subgraph Backend + API --> Core["**Core Layer**

• Middleware
• Cache
• Rate Limiting"] + Core --> Services["**Services Layer**

• Business Logic
• Data Processing"] + + subgraph Services Layer + Services --> Models["**Model Service**

• Model Submission
• Evaluation Pipeline"] + Services --> Votes["**Vote Service**

• Vote Management
• Data Synchronization"] + Services --> Board["**Leaderboard Service**

• Rankings
• Performance Metrics"] + end + + Models --> Cache["**Cache Layer**

• In-Memory Store
• Auto Invalidation"] + Votes --> Cache + Board --> Cache + + Models --> HF["**HuggingFace Hub**

• Models Repository
• Datasets Access"] + Votes --> HF + Board --> HF + end + + style Client fill:#f9f,stroke:#333,stroke-width:2px + style Models fill:#bbf,stroke:#333,stroke-width:2px + style Votes fill:#bbf,stroke:#333,stroke-width:2px + style Board fill:#bbf,stroke:#333,stroke-width:2px + style HF fill:#bfb,stroke:#333,stroke-width:2px +``` + +## 🛠️ HuggingFace Datasets + +The application uses several datasets on the HuggingFace Hub: + +### 1. Requests Dataset (`{HF_ORGANIZATION}/requests`) + +- **Operations**: + - 📤 `POST /api/models/submit`: Adds a JSON file for each new model submission + - 📥 `GET /api/models/status`: Reads files to get models status +- **Format**: One JSON file per model with submission details +- **Updates**: On each new model submission + +### 2. Votes Dataset (`{HF_ORGANIZATION}/votes`) + +- **Operations**: + - 📤 `POST /api/votes/{model_id}`: Adds a new vote + - 📥 `GET /api/votes/model/{provider}/{model}`: Reads model votes + - 📥 `GET /api/votes/user/{user_id}`: Reads user votes +- **Format**: JSONL with one vote per line +- **Sync**: Bidirectional between local cache and Hub + +### 3. Contents Dataset (`{HF_ORGANIZATION}/contents`) + +- **Operations**: + - 📥 `GET /api/leaderboard`: Reads raw data + - 📥 `GET /api/leaderboard/formatted`: Reads and formats data +- **Format**: Main dataset containing all scores and metrics +- **Updates**: Automatic after model evaluations + +### 4. Maintainers Highlight Dataset (`{HF_ORGANIZATION}/maintainers-highlight`) + +- **Operations**: + - 📥 Read-only access for highlighted models +- **Format**: List of models selected by maintainers +- **Updates**: Manual by maintainers + +## 🛠 Local Development + +### Prerequisites + +- Python 3.9+ +- [Poetry](https://python-poetry.org/docs/#installation) + +### Standalone Installation (without Docker) + +```bash +# Install dependencies +poetry install + +# Setup configuration +cp .env.example .env + +# Start development server +poetry run uvicorn app.asgi:app --host 0.0.0.0 --port 7860 --reload +``` + +Server will be available at http://localhost:7860 + +## ⚙️ Configuration + +| Variable | Description | Default | +| ------------ | ------------------------------------ | ----------- | +| ENVIRONMENT | Environment (development/production) | development | +| HF_TOKEN | HuggingFace authentication token | - | +| PORT | Server port | 7860 | +| LOG_LEVEL | Logging level (INFO/DEBUG/WARNING) | INFO | +| CORS_ORIGINS | Allowed CORS origins | ["*"] | +| CACHE_TTL | Cache Time To Live in seconds | 300 | + +## 🔧 Middleware + +The backend uses several middleware layers for optimal performance and security: + +- **CORS Middleware**: Handles Cross-Origin Resource Sharing +- **GZIP Middleware**: Compresses responses > 500 bytes +- **Rate Limiting**: Prevents API abuse +- **Caching**: In-memory caching with automatic invalidation + +## 📝 Logging + +The application uses a structured logging system with: + +- Formatted console output +- Different log levels per component +- Request/Response logging +- Performance metrics +- Error tracking + +## 📁 File Structure + +``` +backend/ +├── app/ # Source code +│ ├── api/ # Routes and endpoints +│ │ └── endpoints/ # Endpoint handlers +│ ├── core/ # Configurations +│ ├── services/ # Business logic +│ └── utils/ # Utilities +└── tests/ # Tests +``` + +## 📚 API + +Swagger documentation available at http://localhost:7860/docs + +### Main Endpoints & Data Structures + +#### Leaderboard + +- `GET /api/leaderboard/formatted` - Formatted data with computed fields and metadata + + ```typescript + Response { + models: [{ + id: string, // eval_name + model: { + name: string, // fullname + sha: string, // Model sha + precision: string, // e.g. "fp16", "int8" + type: string, // e.g. "fined-tuned-on-domain-specific-dataset" + weight_type: string, + architecture: string, + average_score: number, + has_chat_template: boolean + }, + evaluations: { + ifeval: { + name: "IFEval", + value: number, // Raw score + normalized_score: number + }, + bbh: { + name: "BBH", + value: number, + normalized_score: number + }, + math: { + name: "MATH Level 5", + value: number, + normalized_score: number + }, + gpqa: { + name: "GPQA", + value: number, + normalized_score: number + }, + musr: { + name: "MUSR", + value: number, + normalized_score: number + }, + mmlu_pro: { + name: "MMLU-PRO", + value: number, + normalized_score: number + } + }, + features: { + is_not_available_on_hub: boolean, + is_merged: boolean, + is_moe: boolean, + is_flagged: boolean, + is_highlighted_by_maintainer: boolean + }, + metadata: { + upload_date: string, + submission_date: string, + generation: string, + base_model: string, + hub_license: string, + hub_hearts: number, + params_billions: number, + co2_cost: number // CO₂ cost in kg + } + }] + } + ``` + +- `GET /api/leaderboard` - Raw data from the HuggingFace dataset + ```typescript + Response { + models: [{ + eval_name: string, + Precision: string, + Type: string, + "Weight type": string, + Architecture: string, + Model: string, + fullname: string, + "Model sha": string, + "Average ⬆️": number, + "Hub License": string, + "Hub ❤️": number, + "#Params (B)": number, + "Available on the hub": boolean, + Not_Merged: boolean, + MoE: boolean, + Flagged: boolean, + "Chat Template": boolean, + "CO₂ cost (kg)": number, + "IFEval Raw": number, + IFEval: number, + "BBH Raw": number, + BBH: number, + "MATH Lvl 5 Raw": number, + "MATH Lvl 5": number, + "GPQA Raw": number, + GPQA: number, + "MUSR Raw": number, + MUSR: number, + "MMLU-PRO Raw": number, + "MMLU-PRO": number, + "Maintainer's Highlight": boolean, + "Upload To Hub Date": string, + "Submission Date": string, + Generation: string, + "Base Model": string + }] + } + ``` + +#### Models + +- `GET /api/models/status` - Get all models grouped by status + ```typescript + Response { + pending: [{ + name: string, + submitter: string, + revision: string, + wait_time: string, + submission_time: string, + status: "PENDING" | "EVALUATING" | "FINISHED", + precision: string + }], + evaluating: Array, + finished: Array + } + ``` +- `GET /api/models/pending` - Get pending models only +- `POST /api/models/submit` - Submit model + + ```typescript + Request { + user_id: string, + model_id: string, + base_model?: string, + precision?: string, + model_type: string + } + + Response { + status: string, + message: string + } + ``` + +- `GET /api/models/{model_id}/status` - Get model status + +#### Votes + +- `POST /api/votes/{model_id}` - Vote + + ```typescript + Request { + vote_type: "up" | "down", + user_id: string // HuggingFace username + } + + Response { + success: boolean, + message: string + } + ``` + +- `GET /api/votes/model/{provider}/{model}` - Get model votes + ```typescript + Response { + total_votes: number, + up_votes: number, + down_votes: number + } + ``` +- `GET /api/votes/user/{user_id}` - Get user votes + ```typescript + Response Array<{ + model_id: string, + vote_type: string, + timestamp: string + }> + ``` + +## 🔒 Authentication + +The backend uses HuggingFace token-based authentication for secure API access. Make sure to: + +1. Set your HF_TOKEN in the .env file +2. Include the token in API requests via Bearer authentication +3. Keep your token secure and never commit it to version control + +## 🚀 Performance + +The backend implements several optimizations: + +- In-memory caching with configurable TTL (Time To Live) +- Batch processing for model evaluations +- Rate limiting for API endpoints +- Efficient database queries with proper indexing +- Automatic cache invalidation for votes diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..41bd81293794127ec484666c9a9bf3b2cd0bbe3c --- /dev/null +++ b/backend/app/api/__init__.py @@ -0,0 +1,5 @@ +""" +API package initialization +""" + +__all__ = ["endpoints"] diff --git a/backend/app/api/dependencies.py b/backend/app/api/dependencies.py new file mode 100644 index 0000000000000000000000000000000000000000..89b98e0cdea59cf0b395722bdb473c71aee48496 --- /dev/null +++ b/backend/app/api/dependencies.py @@ -0,0 +1,34 @@ +from fastapi import Depends, HTTPException +import logging +from app.services.models import ModelService +from app.services.votes import VoteService +from app.utils.logging import LogFormatter + +logger = logging.getLogger(__name__) + +model_service = ModelService() +vote_service = VoteService() + +async def get_model_service() -> ModelService: + """Dependency to get ModelService instance""" + try: + logger.info(LogFormatter.info("Initializing model service dependency")) + await model_service.initialize() + logger.info(LogFormatter.success("Model service initialized")) + return model_service + except Exception as e: + error_msg = "Failed to initialize model service" + logger.error(LogFormatter.error(error_msg, e)) + raise HTTPException(status_code=500, detail=str(e)) + +async def get_vote_service() -> VoteService: + """Dependency to get VoteService instance""" + try: + logger.info(LogFormatter.info("Initializing vote service dependency")) + await vote_service.initialize() + logger.info(LogFormatter.success("Vote service initialized")) + return vote_service + except Exception as e: + error_msg = "Failed to initialize vote service" + logger.error(LogFormatter.error(error_msg, e)) + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/backend/app/api/endpoints/leaderboard.py b/backend/app/api/endpoints/leaderboard.py new file mode 100644 index 0000000000000000000000000000000000000000..cf15be68b9d5e36b5a1b3f97b533ff3a7766764a --- /dev/null +++ b/backend/app/api/endpoints/leaderboard.py @@ -0,0 +1,49 @@ +from fastapi import APIRouter +from typing import List, Dict, Any +from app.services.leaderboard import LeaderboardService +from app.core.fastapi_cache import cached, build_cache_key +import logging +from app.utils.logging import LogFormatter + +logger = logging.getLogger(__name__) +router = APIRouter() +leaderboard_service = LeaderboardService() + +def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs): + """Build cache key for leaderboard data""" + key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted" + key = build_cache_key(namespace, key_type) + logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}")) + return key + +@router.get("") +@cached(expire=300, key_builder=leaderboard_key_builder) +async def get_leaderboard() -> List[Dict[str, Any]]: + """ + Get raw leaderboard data + Response will be automatically GZIP compressed if size > 500 bytes + """ + try: + logger.info(LogFormatter.info("Fetching raw leaderboard data")) + data = await leaderboard_service.fetch_raw_data() + logger.info(LogFormatter.success(f"Retrieved {len(data)} leaderboard entries")) + return data + except Exception as e: + logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e)) + raise + +@router.get("/formatted") +@cached(expire=300, key_builder=leaderboard_key_builder) +async def get_formatted_leaderboard() -> List[Dict[str, Any]]: + """ + Get formatted leaderboard data with restructured objects + Response will be automatically GZIP compressed if size > 500 bytes + """ + try: + logger.info(LogFormatter.info("Fetching formatted leaderboard data")) + data = await leaderboard_service.get_formatted_data() + logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries")) + return data + except Exception as e: + logger.error(LogFormatter.error("Failed to fetch formatted leaderboard data", e)) + raise \ No newline at end of file diff --git a/backend/app/api/endpoints/models.py b/backend/app/api/endpoints/models.py new file mode 100644 index 0000000000000000000000000000000000000000..e8361c35ab2abfd3a75f6232bff725404b43b377 --- /dev/null +++ b/backend/app/api/endpoints/models.py @@ -0,0 +1,103 @@ +from fastapi import APIRouter, HTTPException, Depends +from typing import Dict, Any, List +import logging +from app.services.models import ModelService +from app.api.dependencies import get_model_service +from app.core.fastapi_cache import cached +from app.utils.logging import LogFormatter + +logger = logging.getLogger(__name__) +router = APIRouter(tags=["models"]) + +@router.get("/status") +@cached(expire=300) +async def get_models_status( + model_service: ModelService = Depends(get_model_service) +) -> Dict[str, List[Dict[str, Any]]]: + """Get all models grouped by status""" + try: + logger.info(LogFormatter.info("Fetching status for all models")) + result = await model_service.get_models() + stats = { + status: len(models) for status, models in result.items() + } + for line in LogFormatter.stats(stats, "Models by Status"): + logger.info(line) + return result + except Exception as e: + logger.error(LogFormatter.error("Failed to get models status", e)) + raise HTTPException(status_code=500, detail=str(e)) + +@router.get("/pending") +@cached(expire=60) +async def get_pending_models( + model_service: ModelService = Depends(get_model_service) +) -> List[Dict[str, Any]]: + """Get all models waiting for evaluation""" + try: + logger.info(LogFormatter.info("Fetching pending models")) + models = await model_service.get_models() + pending = models.get("pending", []) + logger.info(LogFormatter.success(f"Found {len(pending)} pending models")) + return pending + except Exception as e: + logger.error(LogFormatter.error("Failed to get pending models", e)) + raise HTTPException(status_code=500, detail=str(e)) + +@router.post("/submit") +async def submit_model( + model_data: Dict[str, Any], + model_service: ModelService = Depends(get_model_service) +) -> Dict[str, Any]: + try: + logger.info(LogFormatter.section("MODEL SUBMISSION")) + + user_id = model_data.pop('user_id', None) + if not user_id: + error_msg = "user_id is required" + logger.error(LogFormatter.error("Validation failed", error_msg)) + raise ValueError(error_msg) + + # Log submission details + submission_info = { + "Model_ID": model_data.get("model_id"), + "User": user_id, + "Base_Model": model_data.get("base_model"), + "Precision": model_data.get("precision"), + "Model_Type": model_data.get("model_type") + } + for line in LogFormatter.tree(submission_info, "Submission Details"): + logger.info(line) + + result = await model_service.submit_model(model_data, user_id) + logger.info(LogFormatter.success("Model submitted successfully")) + return result + + except ValueError as e: + logger.error(LogFormatter.error("Invalid submission data", e)) + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.error(LogFormatter.error("Submission failed", e)) + raise HTTPException(status_code=500, detail=str(e)) + +@router.get("/{model_id}/status") +async def get_model_status( + model_id: str, + model_service: ModelService = Depends(get_model_service) +) -> Dict[str, Any]: + try: + logger.info(LogFormatter.info(f"Checking status for model: {model_id}")) + status = await model_service.get_model_status(model_id) + + if status["status"] != "not_found": + logger.info(LogFormatter.success("Status found")) + for line in LogFormatter.tree(status, "Model Status"): + logger.info(line) + else: + logger.warning(LogFormatter.warning(f"No status found for model: {model_id}")) + + return status + + except Exception as e: + logger.error(LogFormatter.error("Failed to get model status", e)) + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/backend/app/api/endpoints/votes.py b/backend/app/api/endpoints/votes.py new file mode 100644 index 0000000000000000000000000000000000000000..6d2a3b64add9b8ef0950366b38eca469b3788bf6 --- /dev/null +++ b/backend/app/api/endpoints/votes.py @@ -0,0 +1,105 @@ +from fastapi import APIRouter, HTTPException, Query, Depends +from typing import Dict, Any, List +from app.services.votes import VoteService +from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key +import logging +from app.utils.logging import LogFormatter + +logger = logging.getLogger(__name__) +router = APIRouter() +vote_service = VoteService() + +def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs): + """Build cache key for model votes""" + provider = kwargs.get('provider') + model = kwargs.get('model') + key = build_cache_key(namespace, provider, model) + logger.debug(LogFormatter.info(f"Built model votes cache key: {key}")) + return key + +def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs): + """Build cache key for user votes""" + user_id = kwargs.get('user_id') + key = build_cache_key(namespace, user_id) + logger.debug(LogFormatter.info(f"Built user votes cache key: {key}")) + return key + +@router.post("/{model_id:path}") +async def add_vote( + model_id: str, + vote_type: str = Query(..., description="Type of vote (up/down)"), + user_id: str = Query(..., description="HuggingFace username") +) -> Dict[str, Any]: + try: + logger.info(LogFormatter.section("ADDING VOTE")) + stats = { + "Model": model_id, + "User": user_id, + "Type": vote_type + } + for line in LogFormatter.tree(stats, "Vote Details"): + logger.info(line) + + await vote_service.initialize() + result = await vote_service.add_vote(model_id, user_id, vote_type) + + # Invalidate affected caches + try: + logger.info(LogFormatter.subsection("CACHE INVALIDATION")) + provider, model = model_id.split('/', 1) + + # Build and invalidate cache keys + model_cache_key = build_cache_key("model_votes", provider, model) + user_cache_key = build_cache_key("user_votes", user_id) + + invalidate_cache_key(model_cache_key) + invalidate_cache_key(user_cache_key) + + cache_stats = { + "Model_Cache": model_cache_key, + "User_Cache": user_cache_key + } + for line in LogFormatter.tree(cache_stats, "Invalidated Caches"): + logger.info(line) + + except Exception as e: + logger.error(LogFormatter.error("Failed to invalidate cache", e)) + + return result + except Exception as e: + logger.error(LogFormatter.error("Failed to add vote", e)) + raise HTTPException(status_code=400, detail=str(e)) + +@router.get("/model/{provider}/{model}") +@cached(expire=60, key_builder=model_votes_key_builder) +async def get_model_votes( + provider: str, + model: str +) -> Dict[str, Any]: + """Get all votes for a specific model""" + try: + logger.info(LogFormatter.info(f"Fetching votes for model: {provider}/{model}")) + await vote_service.initialize() + model_id = f"{provider}/{model}" + result = await vote_service.get_model_votes(model_id) + logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes")) + return result + except Exception as e: + logger.error(LogFormatter.error("Failed to get model votes", e)) + raise HTTPException(status_code=400, detail=str(e)) + +@router.get("/user/{user_id}") +@cached(expire=60, key_builder=user_votes_key_builder) +async def get_user_votes( + user_id: str +) -> List[Dict[str, Any]]: + """Get all votes from a specific user""" + try: + logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}")) + await vote_service.initialize() + votes = await vote_service.get_user_votes(user_id) + logger.info(LogFormatter.success(f"Found {len(votes)} votes")) + return votes + except Exception as e: + logger.error(LogFormatter.error("Failed to get user votes", e)) + raise HTTPException(status_code=400, detail=str(e)) \ No newline at end of file diff --git a/backend/app/api/router.py b/backend/app/api/router.py new file mode 100644 index 0000000000000000000000000000000000000000..a2c952105c729b92abc72d59ae5882ee4394c017 --- /dev/null +++ b/backend/app/api/router.py @@ -0,0 +1,9 @@ +from fastapi import APIRouter + +from app.api.endpoints import leaderboard, votes, models + +router = APIRouter() + +router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"]) +router.include_router(votes.router, prefix="/votes", tags=["votes"]) +router.include_router(models.router, prefix="/models", tags=["models"]) \ No newline at end of file diff --git a/backend/app/asgi.py b/backend/app/asgi.py new file mode 100644 index 0000000000000000000000000000000000000000..4bf3d8b59d8fe2bb0244ddcc9bc7130f94ec1870 --- /dev/null +++ b/backend/app/asgi.py @@ -0,0 +1,106 @@ +""" +ASGI entry point for the Open LLM Leaderboard API. +""" +import os +import uvicorn +import logging +import logging.config +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.middleware.gzip import GZipMiddleware +import sys + +from app.api.router import router +from app.core.fastapi_cache import setup_cache +from app.utils.logging import LogFormatter +from app.config import hf_config + +# Configure logging before anything else +LOGGING_CONFIG = { + "version": 1, + "disable_existing_loggers": True, + "formatters": { + "default": { + "format": "%(name)s - %(levelname)s - %(message)s", + } + }, + "handlers": { + "default": { + "formatter": "default", + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", + } + }, + "loggers": { + "uvicorn": { + "handlers": ["default"], + "level": "WARNING", + "propagate": False, + }, + "uvicorn.error": { + "level": "WARNING", + "handlers": ["default"], + "propagate": False, + }, + "uvicorn.access": { + "handlers": ["default"], + "level": "INFO", + "propagate": False, + }, + "app": { + "handlers": ["default"], + "level": "INFO", + "propagate": False, + } + }, + "root": { + "handlers": ["default"], + "level": "INFO", + } +} + +# Apply logging configuration +logging.config.dictConfig(LOGGING_CONFIG) +logger = logging.getLogger("app") + +# Create FastAPI application +app = FastAPI( + title="Open LLM Leaderboard", + version="1.0.0", + docs_url="/docs", +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Add GZIP compression +app.add_middleware(GZipMiddleware, minimum_size=500) + +# Include API router +app.include_router(router, prefix="/api") + +@app.on_event("startup") +async def startup_event(): + """Initialize services on startup""" + logger.info("\n") + logger.info(LogFormatter.section("APPLICATION STARTUP")) + + # Log HF configuration + logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION")) + logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}")) + logger.info(LogFormatter.info(f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}")) + logger.info(LogFormatter.info(f"Using repositories:")) + logger.info(LogFormatter.info(f" - Queue: {hf_config.QUEUE_REPO}")) + logger.info(LogFormatter.info(f" - Aggregated: {hf_config.AGGREGATED_REPO}")) + logger.info(LogFormatter.info(f" - Votes: {hf_config.VOTES_REPO}")) + logger.info(LogFormatter.info(f" - Maintainers Highlight: {hf_config.MAINTAINERS_HIGHLIGHT_REPO}")) + + # Setup cache + setup_cache() + logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend")) \ No newline at end of file diff --git a/backend/app/config/__init__.py b/backend/app/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9a8cea98b9ddb1daaf3c9e8e5d2c9be1fc94657e --- /dev/null +++ b/backend/app/config/__init__.py @@ -0,0 +1,6 @@ +""" +Configuration module for the Open LLM Leaderboard backend. +All configuration values are imported from base.py to avoid circular dependencies. +""" + +from .base import * diff --git a/backend/app/config/base.py b/backend/app/config/base.py new file mode 100644 index 0000000000000000000000000000000000000000..89a7e65b155fe2d781bc6178fdf2ecea163554b5 --- /dev/null +++ b/backend/app/config/base.py @@ -0,0 +1,38 @@ +import os +from pathlib import Path + +# Server configuration +HOST = "0.0.0.0" +PORT = 7860 +WORKERS = 4 +RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False + +# CORS configuration +ORIGINS = ["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"] + +# Cache configuration +CACHE_TTL = int(os.environ.get("CACHE_TTL", 300)) # 5 minutes default + +# Rate limiting +RATE_LIMIT_PERIOD = 7 # days +RATE_LIMIT_QUOTA = 5 +HAS_HIGHER_RATE_LIMIT = [] + +# HuggingFace configuration +HF_TOKEN = os.environ.get("HF_TOKEN") +HF_ORGANIZATION = "open-llm-leaderboard" +API = { + "INFERENCE": "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels", + "HUB": "https://huggingface.co" +} + +# Cache paths +CACHE_ROOT = Path(os.environ.get("HF_HOME", ".cache")) +DATASETS_CACHE = CACHE_ROOT / "datasets" +MODELS_CACHE = CACHE_ROOT / "models" +VOTES_CACHE = CACHE_ROOT / "votes" +EVAL_CACHE = CACHE_ROOT / "eval-queue" + +# Repository configuration +QUEUE_REPO = f"{HF_ORGANIZATION}/requests" +EVAL_REQUESTS_PATH = EVAL_CACHE / "eval_requests.jsonl" \ No newline at end of file diff --git a/backend/app/config/hf_config.py b/backend/app/config/hf_config.py new file mode 100644 index 0000000000000000000000000000000000000000..3e01bec392db2fa6513ea7ab638e4269593da25c --- /dev/null +++ b/backend/app/config/hf_config.py @@ -0,0 +1,35 @@ +""" +Hugging Face configuration module +""" +import os +import logging +from typing import Optional +from huggingface_hub import HfApi +from pathlib import Path +from app.core.cache import cache_config +from app.utils.logging import LogFormatter + +logger = logging.getLogger(__name__) + +# Organization or user who owns the datasets +HF_ORGANIZATION = "open-llm-leaderboard" +# HF_ORGANIZATION = "open-llm-leaderboard" + +# Get HF token directly from environment +HF_TOKEN = os.environ.get("HF_TOKEN") +if not HF_TOKEN: + logger.warning("HF_TOKEN not found in environment variables. Some features may be limited.") + +# Initialize HF API +API = HfApi(token=HF_TOKEN) + +# Repository configuration +QUEUE_REPO = f"{HF_ORGANIZATION}/requests" +AGGREGATED_REPO = f"{HF_ORGANIZATION}/contents" +VOTES_REPO = f"{HF_ORGANIZATION}/votes" +MAINTAINERS_HIGHLIGHT_REPO = f"{HF_ORGANIZATION}/maintainers-highlight" + +# File paths from cache config +VOTES_PATH = cache_config.votes_file +EVAL_REQUESTS_PATH = cache_config.eval_requests_file +MODEL_CACHE_DIR = cache_config.models_cache \ No newline at end of file diff --git a/backend/app/config/logging_config.py b/backend/app/config/logging_config.py new file mode 100644 index 0000000000000000000000000000000000000000..96be6f6749cdd79defb975141d857ff216aac420 --- /dev/null +++ b/backend/app/config/logging_config.py @@ -0,0 +1,38 @@ +import logging +import sys +from tqdm import tqdm + +def get_tqdm_handler(): + """ + Creates a special handler for tqdm that doesn't interfere with other logs. + """ + class TqdmLoggingHandler(logging.Handler): + def emit(self, record): + try: + msg = self.format(record) + tqdm.write(msg) + self.flush() + except Exception: + self.handleError(record) + + return TqdmLoggingHandler() + +def setup_service_logger(service_name: str) -> logging.Logger: + """ + Configure a specific logger for a given service. + """ + logger = logging.getLogger(f"app.services.{service_name}") + + # If the logger already has handlers, don't reconfigure it + if logger.handlers: + return logger + + # Add tqdm handler for this service + tqdm_handler = get_tqdm_handler() + tqdm_handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s')) + logger.addHandler(tqdm_handler) + + # Don't propagate logs to parent loggers + logger.propagate = False + + return logger \ No newline at end of file diff --git a/backend/app/core/cache.py b/backend/app/core/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..28da6c2f09ef08b0df3393c6c97f9fc412ae13eb --- /dev/null +++ b/backend/app/core/cache.py @@ -0,0 +1,109 @@ +import os +import shutil +from pathlib import Path +from datetime import timedelta +import logging +from app.utils.logging import LogFormatter +from app.config.base import ( + CACHE_ROOT, + DATASETS_CACHE, + MODELS_CACHE, + VOTES_CACHE, + EVAL_CACHE, + CACHE_TTL +) + +logger = logging.getLogger(__name__) + +class CacheConfig: + def __init__(self): + # Get cache paths from config + self.cache_root = CACHE_ROOT + self.datasets_cache = DATASETS_CACHE + self.models_cache = MODELS_CACHE + self.votes_cache = VOTES_CACHE + self.eval_cache = EVAL_CACHE + + # Specific files + self.votes_file = self.votes_cache / "votes_data.jsonl" + self.eval_requests_file = self.eval_cache / "eval_requests.jsonl" + + # Cache TTL + self.cache_ttl = timedelta(seconds=CACHE_TTL) + + self._initialize_cache_dirs() + self._setup_environment() + + def _initialize_cache_dirs(self): + """Initialize all necessary cache directories""" + try: + logger.info(LogFormatter.section("CACHE INITIALIZATION")) + + cache_dirs = { + "Root": self.cache_root, + "Datasets": self.datasets_cache, + "Models": self.models_cache, + "Votes": self.votes_cache, + "Eval": self.eval_cache + } + + for name, cache_dir in cache_dirs.items(): + cache_dir.mkdir(parents=True, exist_ok=True) + logger.info(LogFormatter.success(f"{name} cache directory: {cache_dir}")) + + except Exception as e: + logger.error(LogFormatter.error("Failed to create cache directories", e)) + raise + + def _setup_environment(self): + """Configure HuggingFace environment variables""" + logger.info(LogFormatter.subsection("ENVIRONMENT SETUP")) + + env_vars = { + "HF_HOME": str(self.cache_root), + "TRANSFORMERS_CACHE": str(self.models_cache), + "HF_DATASETS_CACHE": str(self.datasets_cache) + } + + for var, value in env_vars.items(): + os.environ[var] = value + logger.info(LogFormatter.info(f"Set {var}={value}")) + + def get_cache_path(self, cache_type: str) -> Path: + """Returns the path for a specific cache type""" + cache_paths = { + "datasets": self.datasets_cache, + "models": self.models_cache, + "votes": self.votes_cache, + "eval": self.eval_cache + } + return cache_paths.get(cache_type, self.cache_root) + + def flush_cache(self, cache_type: str = None): + """Flush specified cache or all caches if no type is specified""" + try: + if cache_type: + logger.info(LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE")) + cache_dir = self.get_cache_path(cache_type) + if cache_dir.exists(): + stats = { + "Cache_Type": cache_type, + "Directory": str(cache_dir) + } + for line in LogFormatter.tree(stats, "Cache Details"): + logger.info(line) + shutil.rmtree(cache_dir) + cache_dir.mkdir(parents=True, exist_ok=True) + logger.info(LogFormatter.success("Cache cleared successfully")) + else: + logger.info(LogFormatter.section("FLUSHING ALL CACHES")) + for cache_type in ["datasets", "models", "votes", "eval"]: + self.flush_cache(cache_type) + logger.info(LogFormatter.success("All caches cleared successfully")) + + except Exception as e: + logger.error(LogFormatter.error("Failed to flush cache", e)) + raise + +# Singleton instance of cache configuration +cache_config = CacheConfig() \ No newline at end of file diff --git a/backend/app/core/fastapi_cache.py b/backend/app/core/fastapi_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..51bd95ceccde3e3b613980266938ac190a296b30 --- /dev/null +++ b/backend/app/core/fastapi_cache.py @@ -0,0 +1,48 @@ +from fastapi_cache import FastAPICache +from fastapi_cache.backends.inmemory import InMemoryBackend +from fastapi_cache.decorator import cache +from datetime import timedelta +from app.config import CACHE_TTL +import logging +from app.utils.logging import LogFormatter + +logger = logging.getLogger(__name__) + +def setup_cache(): + """Initialize FastAPI Cache with in-memory backend""" + FastAPICache.init( + backend=InMemoryBackend(), + prefix="fastapi-cache", + expire=CACHE_TTL + ) + logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend")) + +def invalidate_cache_key(key: str): + """Invalidate a specific cache key""" + try: + backend = FastAPICache.get_backend() + if hasattr(backend, 'delete'): + backend.delete(key) + logger.info(LogFormatter.success(f"Cache invalidated for key: {key}")) + else: + logger.warning(LogFormatter.warning("Cache backend does not support deletion")) + except Exception as e: + logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e)) + +def build_cache_key(namespace: str, *args) -> str: + """Build a consistent cache key""" + key = f"fastapi-cache:{namespace}:{':'.join(str(arg) for arg in args)}" + logger.debug(LogFormatter.info(f"Built cache key: {key}")) + return key + +def cached(expire: int = CACHE_TTL, key_builder=None): + """Decorator for caching endpoint responses + + Args: + expire (int): Cache TTL in seconds + key_builder (callable, optional): Custom key builder function + """ + return cache( + expire=expire, + key_builder=key_builder + ) \ No newline at end of file diff --git a/backend/app/main.py b/backend/app/main.py new file mode 100644 index 0000000000000000000000000000000000000000..86a00401700d1a97f9c7e3cd67509f51d7808c84 --- /dev/null +++ b/backend/app/main.py @@ -0,0 +1,18 @@ +from fastapi import FastAPI +from app.config.logging_config import setup_logging +import logging + +# Initialize logging configuration +setup_logging() +logger = logging.getLogger(__name__) + +app = FastAPI(title="Open LLM Leaderboard API") + +@app.on_event("startup") +async def startup_event(): + logger.info("Starting up the application...") + +# Import and include routers after app initialization +from app.api import models, votes +app.include_router(models.router, prefix="/api", tags=["models"]) +app.include_router(votes.router, prefix="/api", tags=["votes"]) \ No newline at end of file diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..399192f82143e7bf446fa183fa9e7779adab2bd7 --- /dev/null +++ b/backend/app/services/__init__.py @@ -0,0 +1,3 @@ +from . import hf_service, leaderboard, votes, models + +__all__ = ["hf_service", "leaderboard", "votes", "models"] diff --git a/backend/app/services/hf_service.py b/backend/app/services/hf_service.py new file mode 100644 index 0000000000000000000000000000000000000000..c8e2a2bc09562cbae0cab56f5f7f8c74445adf53 --- /dev/null +++ b/backend/app/services/hf_service.py @@ -0,0 +1,50 @@ +from typing import Optional +from huggingface_hub import HfApi +from app.config import HF_TOKEN, API +from app.core.cache import cache_config +from app.utils.logging import LogFormatter +import logging + +logger = logging.getLogger(__name__) + +class HuggingFaceService: + def __init__(self): + self.api = API + self.token = HF_TOKEN + self.cache_dir = cache_config.models_cache + + async def check_authentication(self) -> bool: + """Check if the HF token is valid""" + if not self.token: + return False + try: + logger.info(LogFormatter.info("Checking HF token validity...")) + self.api.get_token_permission() + logger.info(LogFormatter.success("HF token is valid")) + return True + except Exception as e: + logger.error(LogFormatter.error("HF token validation failed", e)) + return False + + async def get_user_info(self) -> Optional[dict]: + """Get information about the authenticated user""" + try: + logger.info(LogFormatter.info("Fetching user information...")) + info = self.api.get_token_permission() + logger.info(LogFormatter.success(f"User info retrieved for: {info.get('user', 'Unknown')}")) + return info + except Exception as e: + logger.error(LogFormatter.error("Failed to get user info", e)) + return None + + def _log_repo_operation(self, operation: str, repo: str, details: str = None): + """Helper to log repository operations""" + logger.info(LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}")) + stats = { + "Operation": operation, + "Repository": repo, + } + if details: + stats["Details"] = details + for line in LogFormatter.tree(stats): + logger.info(line) \ No newline at end of file diff --git a/backend/app/services/leaderboard.py b/backend/app/services/leaderboard.py new file mode 100644 index 0000000000000000000000000000000000000000..49930fce526c6cad2c7c7ddd3a4d741a4bbd47ad --- /dev/null +++ b/backend/app/services/leaderboard.py @@ -0,0 +1,205 @@ +from app.core.cache import cache_config +from datetime import datetime +from typing import List, Dict, Any +import datasets +from fastapi import HTTPException +import logging +from app.config.base import HF_ORGANIZATION +from app.utils.logging import LogFormatter + +logger = logging.getLogger(__name__) + +class LeaderboardService: + def __init__(self): + pass + + async def fetch_raw_data(self) -> List[Dict[str, Any]]: + """Fetch raw leaderboard data from HuggingFace dataset""" + try: + logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA")) + logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/contents")) + + dataset = datasets.load_dataset( + f"{HF_ORGANIZATION}/contents", + cache_dir=cache_config.get_cache_path("datasets") + )["train"] + + df = dataset.to_pandas() + data = df.to_dict('records') + + stats = { + "Total_Entries": len(data), + "Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB" + } + for line in LogFormatter.stats(stats, "Dataset Statistics"): + logger.info(line) + + return data + + except Exception as e: + logger.error(LogFormatter.error("Failed to fetch leaderboard data", e)) + raise HTTPException(status_code=500, detail=str(e)) + + async def get_formatted_data(self) -> List[Dict[str, Any]]: + """Get formatted leaderboard data""" + try: + logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA")) + + raw_data = await self.fetch_raw_data() + formatted_data = [] + type_counts = {} + error_count = 0 + + # Initialize progress tracking + total_items = len(raw_data) + logger.info(LogFormatter.info(f"Processing {total_items:,} entries...")) + + for i, item in enumerate(raw_data, 1): + try: + formatted_item = await self.transform_data(item) + formatted_data.append(formatted_item) + + # Count model types + model_type = formatted_item["model"]["type"] + type_counts[model_type] = type_counts.get(model_type, 0) + 1 + + except Exception as e: + error_count += 1 + logger.error(LogFormatter.error(f"Failed to format entry {i}/{total_items}", e)) + continue + + # Log progress every 10% + if i % max(1, total_items // 10) == 0: + progress = (i / total_items) * 100 + logger.info(LogFormatter.info(f"Progress: {LogFormatter.progress_bar(i, total_items)}")) + + # Log final statistics + stats = { + "Total_Processed": total_items, + "Successful": len(formatted_data), + "Failed": error_count + } + logger.info(LogFormatter.section("PROCESSING SUMMARY")) + for line in LogFormatter.stats(stats, "Processing Statistics"): + logger.info(line) + + # Log model type distribution + type_stats = {f"Type_{k}": v for k, v in type_counts.items()} + logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION")) + for line in LogFormatter.stats(type_stats): + logger.info(line) + + return formatted_data + + except Exception as e: + logger.error(LogFormatter.error("Failed to format leaderboard data", e)) + raise HTTPException(status_code=500, detail=str(e)) + + async def transform_data(self, data: Dict[str, Any]) -> Dict[str, Any]: + """Transform raw data into the format expected by the frontend""" + try: + # Extract model name for logging + model_name = data.get("fullname", "Unknown") + logger.debug(LogFormatter.info(f"Transforming data for model: {model_name}")) + + evaluations = { + "ifeval": { + "name": "IFEval", + "value": data.get("IFEval Raw", 0), + "normalized_score": data.get("IFEval", 0) + }, + "bbh": { + "name": "BBH", + "value": data.get("BBH Raw", 0), + "normalized_score": data.get("BBH", 0) + }, + "math": { + "name": "MATH Level 5", + "value": data.get("MATH Lvl 5 Raw", 0), + "normalized_score": data.get("MATH Lvl 5", 0) + }, + "gpqa": { + "name": "GPQA", + "value": data.get("GPQA Raw", 0), + "normalized_score": data.get("GPQA", 0) + }, + "musr": { + "name": "MUSR", + "value": data.get("MUSR Raw", 0), + "normalized_score": data.get("MUSR", 0) + }, + "mmlu_pro": { + "name": "MMLU-PRO", + "value": data.get("MMLU-PRO Raw", 0), + "normalized_score": data.get("MMLU-PRO", 0) + } + } + + features = { + "is_not_available_on_hub": not data.get("Available on the hub", False), + "is_merged": not data.get("Not_Merged", False), + "is_moe": not data.get("MoE", False), + "is_flagged": data.get("Flagged", False), + "is_highlighted_by_maintainer": data.get("Official Providers", False) + } + + metadata = { + "upload_date": data.get("Upload To Hub Date"), + "submission_date": data.get("Submission Date"), + "generation": data.get("Generation"), + "base_model": data.get("Base Model"), + "hub_license": data.get("Hub License"), + "hub_hearts": data.get("Hub ❤️"), + "params_billions": data.get("#Params (B)"), + "co2_cost": data.get("CO₂ cost (kg)", 0) + } + + # Clean model type by removing emojis if present + original_type = data.get("Type", "") + model_type = original_type.lower().strip() + + # Remove emojis and parentheses + if "(" in model_type: + model_type = model_type.split("(")[0].strip() + model_type = ''.join(c for c in model_type if not c in '🔶🟢🟩💬🤝🌸 ') + + # Map old model types to new ones + model_type_mapping = { + "fine-tuned": "fined-tuned-on-domain-specific-dataset", + "fine tuned": "fined-tuned-on-domain-specific-dataset", + "finetuned": "fined-tuned-on-domain-specific-dataset", + "fine_tuned": "fined-tuned-on-domain-specific-dataset", + "ft": "fined-tuned-on-domain-specific-dataset", + "finetuning": "fined-tuned-on-domain-specific-dataset", + "fine tuning": "fined-tuned-on-domain-specific-dataset", + "fine-tuning": "fined-tuned-on-domain-specific-dataset" + } + + mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type) + + if mapped_type != model_type: + logger.debug(LogFormatter.info(f"Model type mapped: {original_type} -> {mapped_type}")) + + transformed_data = { + "id": data.get("eval_name"), + "model": { + "name": data.get("fullname"), + "sha": data.get("Model sha"), + "precision": data.get("Precision"), + "type": mapped_type, + "weight_type": data.get("Weight type"), + "architecture": data.get("Architecture"), + "average_score": data.get("Average ⬆️"), + "has_chat_template": data.get("Chat Template", False) + }, + "evaluations": evaluations, + "features": features, + "metadata": metadata + } + + logger.debug(LogFormatter.success(f"Successfully transformed data for {model_name}")) + return transformed_data + + except Exception as e: + logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e)) + raise \ No newline at end of file diff --git a/backend/app/services/models.py b/backend/app/services/models.py new file mode 100644 index 0000000000000000000000000000000000000000..87b3dffddfc40a794c976668a161390817ecd45d --- /dev/null +++ b/backend/app/services/models.py @@ -0,0 +1,559 @@ +from datetime import datetime, timezone +from typing import Dict, Any, Optional, List +import json +import os +from pathlib import Path +import logging +import aiohttp +import asyncio +import time +from huggingface_hub import HfApi, CommitOperationAdd +from huggingface_hub.utils import build_hf_headers +import datasets +from datasets import load_dataset, disable_progress_bar +import sys +import contextlib +from concurrent.futures import ThreadPoolExecutor +import tempfile + +from app.config import ( + QUEUE_REPO, + HF_TOKEN, + EVAL_REQUESTS_PATH +) +from app.config.hf_config import HF_ORGANIZATION +from app.services.hf_service import HuggingFaceService +from app.utils.model_validation import ModelValidator +from app.services.votes import VoteService +from app.core.cache import cache_config +from app.utils.logging import LogFormatter + +# Disable datasets progress bars globally +disable_progress_bar() + +logger = logging.getLogger(__name__) + +# Context manager to temporarily disable stdout and stderr +@contextlib.contextmanager +def suppress_output(): + stdout = sys.stdout + stderr = sys.stderr + devnull = open(os.devnull, 'w') + try: + sys.stdout = devnull + sys.stderr = devnull + yield + finally: + sys.stdout = stdout + sys.stderr = stderr + devnull.close() + +class ProgressTracker: + def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10): + self.total = total + self.current = 0 + self.desc = desc + self.start_time = time.time() + self.update_frequency = update_frequency # Percentage steps + self.last_update = -1 + + # Initial log with fancy formatting + logger.info(LogFormatter.section(desc)) + logger.info(LogFormatter.info(f"Starting processing of {total:,} items...")) + sys.stdout.flush() + + def update(self, n: int = 1): + self.current += n + current_percentage = (self.current * 100) // self.total + + # Only update on frequency steps (e.g., 0%, 10%, 20%, etc.) + if current_percentage >= self.last_update + self.update_frequency or current_percentage == 100: + elapsed = time.time() - self.start_time + rate = self.current / elapsed if elapsed > 0 else 0 + remaining = (self.total - self.current) / rate if rate > 0 else 0 + + # Create progress stats + stats = { + "Progress": LogFormatter.progress_bar(self.current, self.total), + "Items": f"{self.current:,}/{self.total:,}", + "Time": f"⏱️ {elapsed:.1f}s elapsed, {remaining:.1f}s remaining", + "Rate": f"🚀 {rate:.1f} items/s" + } + + # Log progress using tree format + for line in LogFormatter.tree(stats): + logger.info(line) + sys.stdout.flush() + + self.last_update = (current_percentage // self.update_frequency) * self.update_frequency + + def close(self): + elapsed = time.time() - self.start_time + rate = self.total / elapsed if elapsed > 0 else 0 + + # Final summary with fancy formatting + logger.info(LogFormatter.section("COMPLETED")) + stats = { + "Total": f"{self.total:,} items", + "Time": f"{elapsed:.1f}s", + "Rate": f"{rate:.1f} items/s" + } + for line in LogFormatter.stats(stats): + logger.info(line) + logger.info("="*50) + sys.stdout.flush() + +class ModelService(HuggingFaceService): + _instance: Optional['ModelService'] = None + _initialized = False + + def __new__(cls): + if cls._instance is None: + logger.info(LogFormatter.info("Creating new ModelService instance")) + cls._instance = super(ModelService, cls).__new__(cls) + return cls._instance + + def __init__(self): + if not hasattr(self, '_init_done'): + logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION")) + super().__init__() + self.validator = ModelValidator() + self.vote_service = VoteService() + self.eval_requests_path = cache_config.eval_requests_file + logger.info(LogFormatter.info(f"Using eval requests path: {self.eval_requests_path}")) + + self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True) + self.hf_api = HfApi(token=HF_TOKEN) + self.cached_models = None + self.last_cache_update = 0 + self.cache_ttl = cache_config.cache_ttl.total_seconds() + self._init_done = True + logger.info(LogFormatter.success("Initialization complete")) + + async def _download_and_process_file(self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker) -> Optional[Dict]: + """Download and process a file asynchronously""" + try: + # Build file URL + url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}" + headers = build_hf_headers(token=self.token) + + # Download file + async with session.get(url, headers=headers) as response: + if response.status != 200: + logger.error(LogFormatter.error(f"Failed to download {file}", f"HTTP {response.status}")) + progress.update() + return None + + try: + # First read content as text + text_content = await response.text() + # Then parse JSON + content = json.loads(text_content) + except json.JSONDecodeError as e: + logger.error(LogFormatter.error(f"Failed to decode JSON from {file}", e)) + progress.update() + return None + + # Get status and determine target status + status = content.get("status", "PENDING").upper() + target_status = None + status_map = { + "PENDING": ["PENDING", "RERUN"], + "EVALUATING": ["RUNNING"], + "FINISHED": ["FINISHED", "PENDING_NEW_EVAL"] + } + + for target, source_statuses in status_map.items(): + if status in source_statuses: + target_status = target + break + + if not target_status: + progress.update() + return None + + # Calculate wait time + try: + submit_time = datetime.fromisoformat(content["submitted_time"].replace("Z", "+00:00")) + if submit_time.tzinfo is None: + submit_time = submit_time.replace(tzinfo=timezone.utc) + current_time = datetime.now(timezone.utc) + wait_time = current_time - submit_time + + model_info = { + "name": content["model"], + "submitter": content.get("sender", "Unknown"), + "revision": content["revision"], + "wait_time": f"{wait_time.total_seconds():.1f}s", + "submission_time": content["submitted_time"], + "status": target_status, + "precision": content.get("precision", "Unknown") + } + + progress.update() + return model_info + + except (ValueError, TypeError) as e: + logger.error(LogFormatter.error(f"Failed to process {file}", e)) + progress.update() + return None + + except Exception as e: + logger.error(LogFormatter.error(f"Failed to load {file}", e)) + progress.update() + return None + + async def _refresh_models_cache(self): + """Refresh the models cache""" + try: + logger.info(LogFormatter.section("CACHE REFRESH")) + self._log_repo_operation("read", f"{HF_ORGANIZATION}/requests", "Refreshing models cache") + + # Initialize models dictionary + models = { + "finished": [], + "evaluating": [], + "pending": [] + } + + try: + logger.info(LogFormatter.subsection("DATASET LOADING")) + logger.info(LogFormatter.info("Loading dataset files...")) + + # List files in repository + with suppress_output(): + files = self.hf_api.list_repo_files( + repo_id=QUEUE_REPO, + repo_type="dataset", + token=self.token + ) + + # Filter JSON files + json_files = [f for f in files if f.endswith('.json')] + total_files = len(json_files) + + # Log repository stats + stats = { + "Total_Files": len(files), + "JSON_Files": total_files, + } + for line in LogFormatter.stats(stats, "Repository Statistics"): + logger.info(line) + + if not json_files: + raise Exception("No JSON files found in repository") + + # Initialize progress tracker + progress = ProgressTracker(total_files, "PROCESSING FILES") + + try: + # Create aiohttp session to reuse connections + async with aiohttp.ClientSession() as session: + # Process files in chunks + chunk_size = 50 + + for i in range(0, len(json_files), chunk_size): + chunk = json_files[i:i + chunk_size] + chunk_tasks = [ + self._download_and_process_file(file, session, progress) + for file in chunk + ] + results = await asyncio.gather(*chunk_tasks) + + # Process results + for result in results: + if result: + status = result.pop("status") + models[status.lower()].append(result) + + finally: + progress.close() + + # Final summary with fancy formatting + logger.info(LogFormatter.section("CACHE SUMMARY")) + stats = { + "Finished": len(models["finished"]), + "Evaluating": len(models["evaluating"]), + "Pending": len(models["pending"]) + } + for line in LogFormatter.stats(stats, "Models by Status"): + logger.info(line) + logger.info("="*50) + + except Exception as e: + logger.error(LogFormatter.error("Error processing files", e)) + raise + + # Update cache + self.cached_models = models + self.last_cache_update = time.time() + logger.info(LogFormatter.success("Cache updated successfully")) + + return models + + except Exception as e: + logger.error(LogFormatter.error("Cache refresh failed", e)) + raise + + async def initialize(self): + """Initialize the model service""" + if self._initialized: + logger.info(LogFormatter.info("Service already initialized, using cached data")) + return + + try: + logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION")) + + # Check if cache already exists + cache_path = cache_config.get_cache_path("datasets") + if not cache_path.exists() or not any(cache_path.iterdir()): + logger.info(LogFormatter.info("No existing cache found, initializing datasets cache...")) + cache_config.flush_cache("datasets") + else: + logger.info(LogFormatter.info("Using existing datasets cache")) + + # Ensure eval requests directory exists + self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True) + logger.info(LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}")) + + # List existing files + if self.eval_requests_path.exists(): + files = list(self.eval_requests_path.glob("**/*.json")) + stats = { + "Total_Files": len(files), + "Directory": str(self.eval_requests_path) + } + for line in LogFormatter.stats(stats, "Eval Requests"): + logger.info(line) + + # Load initial cache + await self._refresh_models_cache() + + self._initialized = True + logger.info(LogFormatter.success("Model service initialization complete")) + + except Exception as e: + logger.error(LogFormatter.error("Initialization failed", e)) + raise + + async def get_models(self) -> Dict[str, List[Dict[str, Any]]]: + """Get all models with their status""" + if not self._initialized: + logger.info(LogFormatter.info("Service not initialized, initializing now...")) + await self.initialize() + + current_time = time.time() + cache_age = current_time - self.last_cache_update + + # Check if cache needs refresh + if not self.cached_models: + logger.info(LogFormatter.info("No cached data available, refreshing cache...")) + return await self._refresh_models_cache() + elif cache_age > self.cache_ttl: + logger.info(LogFormatter.info(f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)")) + return await self._refresh_models_cache() + else: + logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)")) + return self.cached_models + + async def submit_model( + self, + model_data: Dict[str, Any], + user_id: str + ) -> Dict[str, Any]: + logger.info(LogFormatter.section("MODEL SUBMISSION")) + self._log_repo_operation("write", f"{HF_ORGANIZATION}/requests", f"Submitting model {model_data['model_id']} by {user_id}") + stats = { + "Model": model_data["model_id"], + "User": user_id, + "Revision": model_data["revision"], + "Precision": model_data["precision"], + "Type": model_data["model_type"] + } + for line in LogFormatter.tree(stats, "Submission Details"): + logger.info(line) + + # Validate required fields + required_fields = [ + "model_id", "base_model", "revision", "precision", + "weight_type", "model_type", "use_chat_template" + ] + for field in required_fields: + if field not in model_data: + raise ValueError(f"Missing required field: {field}") + + # Check if model already exists in the system + try: + logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS")) + existing_models = await self.get_models() + + # Check in all statuses (pending, evaluating, finished) + for status, models in existing_models.items(): + for model in models: + if model["name"] == model_data["model_id"]: + error_msg = f"Model {model_data['model_id']} is already in the system with status: {status}" + logger.error(LogFormatter.error("Submission rejected", error_msg)) + raise ValueError(error_msg) + + logger.info(LogFormatter.success("No existing submission found")) + except ValueError: + raise + except Exception as e: + logger.error(LogFormatter.error("Failed to check existing submissions", e)) + raise + + # Get model info and validate it exists on HuggingFace + try: + logger.info(LogFormatter.subsection("MODEL VALIDATION")) + + # Get the model info to check if it exists + model_info = self.hf_api.model_info( + model_data["model_id"], + revision=model_data["revision"], + token=self.token + ) + if not model_info: + raise Exception(f"Model {model_data['model_id']} not found on HuggingFace Hub") + + logger.info(LogFormatter.success("Model exists on HuggingFace Hub")) + + except Exception as e: + logger.error(LogFormatter.error("Model validation failed", e)) + raise + + # Validate model card + valid, error, model_card = await self.validator.check_model_card( + model_data["model_id"] + ) + if not valid: + logger.error(LogFormatter.error("Model card validation failed", error)) + raise Exception(error) + logger.info(LogFormatter.success("Model card validation passed")) + + # Check size limits + model_size, error = await self.validator.get_model_size( + model_info, + model_data["precision"], + model_data["base_model"] + ) + if model_size is None: + logger.error(LogFormatter.error("Model size validation failed", error)) + raise Exception(error) + logger.info(LogFormatter.success(f"Model size validation passed: {model_size:.1f}GB")) + + # Size limits based on precision + if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100: + error_msg = f"Model too large for {model_data['precision']} (limit: 100GB)" + logger.error(LogFormatter.error("Size limit exceeded", error_msg)) + raise Exception(error_msg) + + # Chat template validation if requested + if model_data["use_chat_template"]: + valid, error = await self.validator.check_chat_template( + model_data["model_id"], + model_data["revision"] + ) + if not valid: + logger.error(LogFormatter.error("Chat template validation failed", error)) + raise Exception(error) + logger.info(LogFormatter.success("Chat template validation passed")) + + # Create eval entry + eval_entry = { + "model": model_data["model_id"], + "base_model": model_data["base_model"], + "revision": model_info.sha, + "precision": model_data["precision"], + "params": model_size, + "architectures": model_info.pipeline_tag if hasattr(model_info, 'pipeline_tag') else None, + "weight_type": model_data["weight_type"], + "status": "PENDING", + "submitted_time": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "model_type": model_data["model_type"], + "job_id": -1, + "job_start_time": None, + "use_chat_template": model_data["use_chat_template"], + "sender": user_id + } + + logger.info(LogFormatter.subsection("EVALUATION ENTRY")) + for line in LogFormatter.tree(eval_entry): + logger.info(line) + + # Upload to HF dataset + try: + logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE")) + logger.info(LogFormatter.info(f"Uploading to {HF_ORGANIZATION}/requests...")) + + # Construct the path in the dataset + org_or_user = model_data["model_id"].split("/")[0] if "/" in model_data["model_id"] else "" + model_path = model_data["model_id"].split("/")[-1] + relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json" + + # Create a temporary file with the request + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: + json.dump(eval_entry, temp_file, indent=2) + temp_file.flush() + temp_path = temp_file.name + + # Upload file directly + self.hf_api.upload_file( + path_or_fileobj=temp_path, + path_in_repo=relative_path, + repo_id=f"{HF_ORGANIZATION}/requests", + repo_type="dataset", + commit_message=f"Add {model_data['model_id']} to eval queue", + token=self.token + ) + + # Clean up temp file + os.unlink(temp_path) + + logger.info(LogFormatter.success("Upload successful")) + + except Exception as e: + logger.error(LogFormatter.error("Upload failed", e)) + raise + + # Add automatic vote + try: + logger.info(LogFormatter.subsection("AUTOMATIC VOTE")) + logger.info(LogFormatter.info(f"Adding upvote for {model_data['model_id']} by {user_id}")) + await self.vote_service.add_vote( + model_data["model_id"], + user_id, + "up" + ) + logger.info(LogFormatter.success("Vote recorded successfully")) + except Exception as e: + logger.error(LogFormatter.error("Failed to record vote", e)) + # Don't raise here as the main submission was successful + + return { + "status": "success", + "message": "Model submitted successfully and vote recorded" + } + + async def get_model_status(self, model_id: str) -> Dict[str, Any]: + """Get evaluation status of a model""" + logger.info(LogFormatter.info(f"Checking status for model: {model_id}")) + eval_path = self.eval_requests_path + + for user_folder in eval_path.iterdir(): + if user_folder.is_dir(): + for file in user_folder.glob("*.json"): + with open(file, "r") as f: + data = json.load(f) + if data["model"] == model_id: + status = { + "status": data["status"], + "submitted_time": data["submitted_time"], + "job_id": data.get("job_id", -1) + } + logger.info(LogFormatter.success("Status found")) + for line in LogFormatter.tree(status, "Model Status"): + logger.info(line) + return status + + logger.warning(LogFormatter.warning(f"No status found for model: {model_id}")) + return {"status": "not_found"} \ No newline at end of file diff --git a/backend/app/services/rate_limiter.py b/backend/app/services/rate_limiter.py new file mode 100644 index 0000000000000000000000000000000000000000..988c68e2f7d7f3847d6691c70f55975648aa3c8f --- /dev/null +++ b/backend/app/services/rate_limiter.py @@ -0,0 +1,72 @@ +""" +import logging +from datetime import datetime, timedelta, timezone +from typing import Tuple, Dict, List + +logger = logging.getLogger(__name__) + +class RateLimiter: + def __init__(self, period_days: int = 7, quota: int = 5): + self.period_days = period_days + self.quota = quota + self.submission_history: Dict[str, List[datetime]] = {} + self.higher_quota_users = set() # Users with higher quotas + self.unlimited_users = set() # Users with no quota limits + + def add_unlimited_user(self, user_id: str): + """Add a user to the unlimited users list""" + self.unlimited_users.add(user_id) + + def add_higher_quota_user(self, user_id: str): + """Add a user to the higher quota users list""" + self.higher_quota_users.add(user_id) + + def record_submission(self, user_id: str): + """Record a new submission for a user""" + current_time = datetime.now(timezone.utc) + if user_id not in self.submission_history: + self.submission_history[user_id] = [] + self.submission_history[user_id].append(current_time) + + def clean_old_submissions(self, user_id: str): + """Remove submissions older than the period""" + if user_id not in self.submission_history: + return + + current_time = datetime.now(timezone.utc) + cutoff_time = current_time - timedelta(days=self.period_days) + + self.submission_history[user_id] = [ + time for time in self.submission_history[user_id] + if time > cutoff_time + ] + + async def check_rate_limit(self, user_id: str) -> Tuple[bool, str]: + """Check if a user has exceeded their rate limit + + Returns: + Tuple[bool, str]: (is_allowed, error_message) + """ + # Unlimited users bypass all checks + if user_id in self.unlimited_users: + return True, "" + + # Clean old submissions + self.clean_old_submissions(user_id) + + # Get current submission count + submission_count = len(self.submission_history.get(user_id, [])) + + # Calculate user's quota + user_quota = self.quota * 2 if user_id in self.higher_quota_users else self.quota + + # Check if user has exceeded their quota + if submission_count >= user_quota: + error_msg = ( + f"User '{user_id}' has reached the limit of {user_quota} submissions " + f"in the last {self.period_days} days. Please wait before submitting again." + ) + return False, error_msg + + return True, "" +""" \ No newline at end of file diff --git a/backend/app/services/votes.py b/backend/app/services/votes.py new file mode 100644 index 0000000000000000000000000000000000000000..03ab371cf6d6c73d7dddacab1746b045ba65805e --- /dev/null +++ b/backend/app/services/votes.py @@ -0,0 +1,391 @@ +from datetime import datetime, timezone +from typing import Dict, Any, List, Set, Tuple, Optional +import json +import logging +import asyncio +from pathlib import Path +import os +import aiohttp +from huggingface_hub import HfApi +import datasets + +from app.services.hf_service import HuggingFaceService +from app.config import HF_TOKEN, API +from app.config.hf_config import HF_ORGANIZATION +from app.core.cache import cache_config +from app.utils.logging import LogFormatter + +logger = logging.getLogger(__name__) + +class VoteService(HuggingFaceService): + _instance: Optional['VoteService'] = None + _initialized = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super(VoteService, cls).__new__(cls) + return cls._instance + + def __init__(self): + if not hasattr(self, '_init_done'): + super().__init__() + self.votes_file = cache_config.votes_file + self.votes_to_upload: List[Dict[str, Any]] = [] + self.vote_check_set: Set[Tuple[str, str, str]] = set() + self._votes_by_model: Dict[str, List[Dict[str, Any]]] = {} + self._votes_by_user: Dict[str, List[Dict[str, Any]]] = {} + self._upload_lock = asyncio.Lock() + self._last_sync = None + self._sync_interval = 300 # 5 minutes + self._total_votes = 0 + self._last_vote_timestamp = None + self._max_retries = 3 + self._retry_delay = 1 # seconds + self._upload_batch_size = 10 + self.hf_api = HfApi(token=HF_TOKEN) + self._init_done = True + + async def initialize(self): + """Initialize the vote service""" + if self._initialized: + await self._check_for_new_votes() + return + + try: + logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION")) + + # Ensure votes directory exists + self.votes_file.parent.mkdir(parents=True, exist_ok=True) + + # Load existing votes if file exists + local_vote_count = 0 + if self.votes_file.exists(): + logger.info(LogFormatter.info(f"Loading votes from {self.votes_file}")) + local_vote_count = await self._count_local_votes() + logger.info(LogFormatter.info(f"Found {local_vote_count:,} local votes")) + + # Check remote votes count + remote_vote_count = await self._count_remote_votes() + logger.info(LogFormatter.info(f"Found {remote_vote_count:,} remote votes")) + + if remote_vote_count > local_vote_count: + logger.info(LogFormatter.info(f"Fetching {remote_vote_count - local_vote_count:,} new votes")) + await self._sync_with_hub() + elif remote_vote_count < local_vote_count: + logger.warning(LogFormatter.warning(f"Local votes ({local_vote_count:,}) > Remote votes ({remote_vote_count:,})")) + await self._load_existing_votes() + else: + logger.info(LogFormatter.success("Local and remote votes are in sync")) + if local_vote_count > 0: + await self._load_existing_votes() + else: + logger.info(LogFormatter.info("No votes found")) + + self._initialized = True + self._last_sync = datetime.now(timezone.utc) + + # Final summary + stats = { + "Total_Votes": self._total_votes, + "Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC") + } + logger.info(LogFormatter.section("INITIALIZATION COMPLETE")) + for line in LogFormatter.stats(stats): + logger.info(line) + + except Exception as e: + logger.error(LogFormatter.error("Initialization failed", e)) + raise + + async def _count_local_votes(self) -> int: + """Count votes in local file""" + if not self.votes_file.exists(): + return 0 + + count = 0 + try: + with open(self.votes_file, 'r') as f: + for _ in f: + count += 1 + return count + except Exception as e: + logger.error(f"Error counting local votes: {str(e)}") + return 0 + + async def _count_remote_votes(self) -> int: + """Count votes in remote file""" + url = f"https://huggingface.co/datasets/{HF_ORGANIZATION}/votes/raw/main/votes_data.jsonl" + headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {} + + try: + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as response: + if response.status == 200: + count = 0 + async for line in response.content: + if line.strip(): # Skip empty lines + count += 1 + return count + else: + logger.error(f"Failed to get remote votes: HTTP {response.status}") + return 0 + except Exception as e: + logger.error(f"Error counting remote votes: {str(e)}") + return 0 + + async def _sync_with_hub(self): + """Sync votes with HuggingFace hub using datasets""" + try: + logger.info(LogFormatter.section("VOTE SYNC")) + self._log_repo_operation("sync", f"{HF_ORGANIZATION}/votes", "Syncing local votes with HF hub") + logger.info(LogFormatter.info("Syncing with HuggingFace hub...")) + + # Load votes from HF dataset + dataset = datasets.load_dataset( + f"{HF_ORGANIZATION}/votes", + split="train", + cache_dir=cache_config.get_cache_path("datasets") + ) + + remote_votes = len(dataset) + logger.info(LogFormatter.info(f"Dataset loaded with {remote_votes:,} votes")) + + # Convert to list of dictionaries + df = dataset.to_pandas() + if 'timestamp' in df.columns: + df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%dT%H:%M:%SZ') + remote_votes = df.to_dict('records') + + # If we have more remote votes than local + if len(remote_votes) > self._total_votes: + new_votes = len(remote_votes) - self._total_votes + logger.info(LogFormatter.info(f"Processing {new_votes:,} new votes...")) + + # Save votes to local file + with open(self.votes_file, 'w') as f: + for vote in remote_votes: + f.write(json.dumps(vote) + '\n') + + # Reload votes in memory + await self._load_existing_votes() + logger.info(LogFormatter.success("Sync completed successfully")) + else: + logger.info(LogFormatter.success("Local votes are up to date")) + + self._last_sync = datetime.now(timezone.utc) + + except Exception as e: + logger.error(LogFormatter.error("Sync failed", e)) + raise + + async def _check_for_new_votes(self): + """Check for new votes on the hub""" + try: + self._log_repo_operation("check", f"{HF_ORGANIZATION}/votes", "Checking for new votes") + # Load only dataset metadata + dataset_info = datasets.load_dataset(f"{HF_ORGANIZATION}/votes", split="train") + remote_vote_count = len(dataset_info) + + if remote_vote_count > self._total_votes: + logger.info(f"Found {remote_vote_count - self._total_votes} new votes on hub") + await self._sync_with_hub() + else: + logger.info("No new votes found on hub") + + except Exception as e: + logger.error(f"Error checking for new votes: {str(e)}") + + async def _load_existing_votes(self): + """Load existing votes from file""" + if not self.votes_file.exists(): + logger.warning(LogFormatter.warning("No votes file found")) + return + + try: + logger.info(LogFormatter.section("LOADING VOTES")) + + # Clear existing data structures + self.vote_check_set.clear() + self._votes_by_model.clear() + self._votes_by_user.clear() + + vote_count = 0 + latest_timestamp = None + + with open(self.votes_file, "r") as f: + for line in f: + try: + vote = json.loads(line.strip()) + vote_count += 1 + + # Track latest timestamp + try: + vote_timestamp = datetime.fromisoformat(vote["timestamp"].replace("Z", "+00:00")) + if not latest_timestamp or vote_timestamp > latest_timestamp: + latest_timestamp = vote_timestamp + vote["timestamp"] = vote_timestamp.strftime("%Y-%m-%dT%H:%M:%SZ") + except (KeyError, ValueError) as e: + logger.warning(LogFormatter.warning(f"Invalid timestamp in vote: {str(e)}")) + continue + + if vote_count % 1000 == 0: + logger.info(LogFormatter.info(f"Processed {vote_count:,} votes...")) + + self._add_vote_to_memory(vote) + + except json.JSONDecodeError as e: + logger.error(LogFormatter.error("Vote parsing failed", e)) + continue + except Exception as e: + logger.error(LogFormatter.error("Vote processing failed", e)) + continue + + self._total_votes = vote_count + self._last_vote_timestamp = latest_timestamp + + # Final summary + stats = { + "Total_Votes": vote_count, + "Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC") if latest_timestamp else "None", + "Unique_Models": len(self._votes_by_model), + "Unique_Users": len(self._votes_by_user) + } + + logger.info(LogFormatter.section("VOTE SUMMARY")) + for line in LogFormatter.stats(stats): + logger.info(line) + + except Exception as e: + logger.error(LogFormatter.error("Failed to load votes", e)) + raise + + def _add_vote_to_memory(self, vote: Dict[str, Any]): + """Add vote to memory structures""" + try: + check_tuple = (vote["model"], vote["revision"], vote["username"]) + + # Skip if we already have this vote + if check_tuple in self.vote_check_set: + return + + self.vote_check_set.add(check_tuple) + + # Update model votes + if vote["model"] not in self._votes_by_model: + self._votes_by_model[vote["model"]] = [] + self._votes_by_model[vote["model"]].append(vote) + + # Update user votes + if vote["username"] not in self._votes_by_user: + self._votes_by_user[vote["username"]] = [] + self._votes_by_user[vote["username"]].append(vote) + + except KeyError as e: + logger.error(f"Malformed vote data, missing key: {str(e)}") + except Exception as e: + logger.error(f"Error adding vote to memory: {str(e)}") + + async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]: + """Get all votes from a specific user""" + logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}")) + votes = self._votes_by_user.get(user_id, []) + logger.info(LogFormatter.success(f"Found {len(votes):,} votes")) + return votes + + async def get_model_votes(self, model_id: str) -> Dict[str, Any]: + """Get all votes for a specific model""" + logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}")) + votes = self._votes_by_model.get(model_id, []) + + # Group votes by revision + votes_by_revision = {} + for vote in votes: + revision = vote["revision"] + if revision not in votes_by_revision: + votes_by_revision[revision] = 0 + votes_by_revision[revision] += 1 + + stats = { + "Total_Votes": len(votes), + **{f"Revision_{k}": v for k, v in votes_by_revision.items()} + } + + logger.info(LogFormatter.section("VOTE STATISTICS")) + for line in LogFormatter.stats(stats): + logger.info(line) + + return { + "total_votes": len(votes), + "votes_by_revision": votes_by_revision, + "votes": votes + } + + async def _get_model_revision(self, model_id: str) -> str: + """Get current revision of a model with retries""" + logger.info(f"Getting revision for model: {model_id}") + for attempt in range(self._max_retries): + try: + model_info = await asyncio.to_thread(self.hf_api.model_info, model_id) + logger.info(f"Successfully got revision {model_info.sha} for model {model_id}") + return model_info.sha + except Exception as e: + logger.error(f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}") + if attempt < self._max_retries - 1: + retry_delay = self._retry_delay * (attempt + 1) + logger.info(f"Retrying in {retry_delay} seconds...") + await asyncio.sleep(retry_delay) + else: + logger.warning(f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts") + return "main" + + async def add_vote(self, model_id: str, user_id: str, vote_type: str) -> Dict[str, Any]: + """Add a vote for a model""" + try: + self._log_repo_operation("add", f"{HF_ORGANIZATION}/votes", f"Adding {vote_type} vote for {model_id} by {user_id}") + logger.info(LogFormatter.section("NEW VOTE")) + stats = { + "Model": model_id, + "User": user_id, + "Type": vote_type + } + for line in LogFormatter.tree(stats, "Vote Details"): + logger.info(line) + + revision = await self._get_model_revision(model_id) + check_tuple = (model_id, revision, user_id) + + if check_tuple in self.vote_check_set: + raise ValueError("Vote already recorded for this model") + + vote = { + "model": model_id, + "revision": revision, + "username": user_id, + "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "vote_type": vote_type + } + + # Update local storage + with open(self.votes_file, "a") as f: + f.write(json.dumps(vote) + "\n") + + self._add_vote_to_memory(vote) + self.votes_to_upload.append(vote) + + stats = { + "Status": "Success", + "Queue_Size": len(self.votes_to_upload) + } + for line in LogFormatter.stats(stats): + logger.info(line) + + # Try to upload if batch size reached + if len(self.votes_to_upload) >= self._upload_batch_size: + logger.info(LogFormatter.info(f"Upload batch size reached ({self._upload_batch_size}), triggering sync")) + await self._sync_with_hub() + + return {"status": "success", "message": "Vote added successfully"} + + except Exception as e: + logger.error(LogFormatter.error("Failed to add vote", e)) + raise \ No newline at end of file diff --git a/backend/app/utils/__init__.py b/backend/app/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..69a93acb760828c13400cfcd19da2822dfd83e5e --- /dev/null +++ b/backend/app/utils/__init__.py @@ -0,0 +1,3 @@ +from . import model_validation + +__all__ = ["model_validation"] diff --git a/backend/app/utils/logging.py b/backend/app/utils/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..f7e56730181304a799bdc61eae9a7b5d8a1c005a --- /dev/null +++ b/backend/app/utils/logging.py @@ -0,0 +1,105 @@ +import logging +import sys +from typing import Dict, Any, List, Optional + +logger = logging.getLogger(__name__) + +class LogFormatter: + """Utility class for consistent log formatting across the application""" + + @staticmethod + def section(title: str) -> str: + """Create a section header""" + return f"\n{'='*20} {title.upper()} {'='*20}" + + @staticmethod + def subsection(title: str) -> str: + """Create a subsection header""" + return f"\n{'─'*20} {title} {'─'*20}" + + @staticmethod + def tree(items: Dict[str, Any], title: str = None) -> List[str]: + """Create a tree view of dictionary data""" + lines = [] + if title: + lines.append(f"📊 {title}:") + + # Get the maximum length for alignment + max_key_length = max(len(str(k)) for k in items.keys()) + + # Format each item + for i, (key, value) in enumerate(items.items()): + prefix = "└──" if i == len(items) - 1 else "├──" + if isinstance(value, (int, float)): + value = f"{value:,}" # Add thousand separators + lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}") + + return lines + + @staticmethod + def stats(stats: Dict[str, int], title: str = None) -> List[str]: + """Format statistics with icons""" + lines = [] + if title: + lines.append(f"📊 {title}:") + + # Get the maximum length for alignment + max_key_length = max(len(str(k)) for k in stats.keys()) + + # Format each stat with an appropriate icon + icons = { + "total": "📌", + "success": "✅", + "error": "❌", + "pending": "⏳", + "processing": "⚙️", + "finished": "✨", + "evaluating": "🔄", + "downloads": "⬇️", + "files": "📁", + "cached": "💾", + "size": "📏", + "time": "⏱️", + "rate": "🚀" + } + + # Format each item + for i, (key, value) in enumerate(stats.items()): + prefix = "└──" if i == len(stats) - 1 else "├──" + icon = icons.get(key.lower().split('_')[0], "•") + if isinstance(value, (int, float)): + value = f"{value:,}" # Add thousand separators + lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}") + + return lines + + @staticmethod + def progress_bar(current: int, total: int, width: int = 20) -> str: + """Create a progress bar""" + percentage = (current * 100) // total + filled = "█" * (percentage * width // 100) + empty = "░" * (width - len(filled)) + return f"{filled}{empty} {percentage:3d}%" + + @staticmethod + def error(message: str, error: Optional[Exception] = None) -> str: + """Format error message""" + error_msg = f"\n❌ Error: {message}" + if error: + error_msg += f"\n └── Details: {str(error)}" + return error_msg + + @staticmethod + def success(message: str) -> str: + """Format success message""" + return f"✅ {message}" + + @staticmethod + def warning(message: str) -> str: + """Format warning message""" + return f"⚠️ {message}" + + @staticmethod + def info(message: str) -> str: + """Format info message""" + return f"ℹ️ {message}" \ No newline at end of file diff --git a/backend/app/utils/model_validation.py b/backend/app/utils/model_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..a0d7079adaedd4579b6cb2d85da12046e51229c0 --- /dev/null +++ b/backend/app/utils/model_validation.py @@ -0,0 +1,208 @@ +import json +import logging +import asyncio +import re +from typing import Tuple, Optional, Dict, Any +import aiohttp +from huggingface_hub import HfApi, ModelCard, hf_hub_download +from transformers import AutoConfig, AutoTokenizer +from app.config.base import HF_TOKEN, API +from app.utils.logging import LogFormatter + +logger = logging.getLogger(__name__) + +class ModelValidator: + def __init__(self): + self.token = HF_TOKEN + self.api = HfApi(token=self.token) + self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {} + + async def check_model_card(self, model_id: str) -> Tuple[bool, str, Optional[Dict[str, Any]]]: + """Check if model has a valid model card""" + try: + logger.info(LogFormatter.info(f"Checking model card for {model_id}")) + + # Get model card content using ModelCard.load + try: + model_card = await asyncio.to_thread( + ModelCard.load, + model_id + ) + logger.info(LogFormatter.success("Model card found")) + except Exception as e: + error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it." + logger.error(LogFormatter.error(error_msg, e)) + return False, error_msg, None + + # Check license in model card data + if model_card.data.license is None and not ("license_name" in model_card.data and "license_link" in model_card.data): + error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair." + logger.warning(LogFormatter.warning(error_msg)) + return False, error_msg, None + + # Enforce card content length + if len(model_card.text) < 200: + error_msg = "Please add a description to your model card, it is too short." + logger.warning(LogFormatter.warning(error_msg)) + return False, error_msg, None + + logger.info(LogFormatter.success("Model card validation passed")) + return True, "", model_card + + except Exception as e: + error_msg = "Failed to validate model card" + logger.error(LogFormatter.error(error_msg, e)) + return False, str(e), None + + async def get_safetensors_metadata(self, model_id: str, filename: str = "model.safetensors") -> Optional[Dict]: + """Get metadata from a safetensors file""" + try: + url = f"{API['HUB']}/{model_id}/raw/main/{filename}" + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=self.headers) as response: + if response.status == 200: + # Read only the first 32KB to get the metadata + header = await response.content.read(32768) + # Parse metadata length from the first 8 bytes + metadata_len = int.from_bytes(header[:8], byteorder='little') + metadata_bytes = header[8:8+metadata_len] + return json.loads(metadata_bytes) + return None + except Exception as e: + logger.warning(f"Failed to get safetensors metadata: {str(e)}") + return None + + async def get_model_size( + self, + model_info: Any, + precision: str, + base_model: str + ) -> Tuple[Optional[float], Optional[str]]: + """Get model size in billions of parameters""" + try: + logger.info(LogFormatter.info(f"Checking model size for {model_info.modelId}")) + + # Check if model is adapter + is_adapter = any(s.rfilename == "adapter_config.json" for s in model_info.siblings if hasattr(s, 'rfilename')) + + # Try to get size from safetensors first + model_size = None + + if is_adapter and base_model: + # For adapters, we need both adapter and base model sizes + adapter_meta = await self.get_safetensors_metadata(model_info.id, "adapter_model.safetensors") + base_meta = await self.get_safetensors_metadata(base_model) + + if adapter_meta and base_meta: + adapter_size = sum(int(v.split(',')[0]) for v in adapter_meta.get("tensor_metadata", {}).values()) + base_size = sum(int(v.split(',')[0]) for v in base_meta.get("tensor_metadata", {}).values()) + model_size = (adapter_size + base_size) / (2 * 1e9) # Convert to billions, assuming float16 + else: + # For regular models, just get the model size + meta = await self.get_safetensors_metadata(model_info.id) + if meta: + total_params = sum(int(v.split(',')[0]) for v in meta.get("tensor_metadata", {}).values()) + model_size = total_params / (2 * 1e9) # Convert to billions, assuming float16 + + if model_size is None: + # Fallback: Try to get size from model name + size_pattern = re.compile(r"(\d+\.?\d*)b") # Matches patterns like "7b", "13b", "1.1b" + size_match = re.search(size_pattern, model_info.id.lower()) + + if size_match: + size_str = size_match.group(1) + model_size = float(size_str) + else: + return None, "Could not determine model size from safetensors or model name" + + # Adjust size for GPTQ models + size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1 + model_size = round(size_factor * model_size, 3) + + logger.info(LogFormatter.success(f"Model size: {model_size}B parameters")) + return model_size, None + + except Exception as e: + error_msg = "Failed to get model size" + logger.error(LogFormatter.error(error_msg, e)) + return None, str(e) + + async def check_chat_template( + self, + model_id: str, + revision: str + ) -> Tuple[bool, Optional[str]]: + """Check if model has a valid chat template""" + try: + logger.info(LogFormatter.info(f"Checking chat template for {model_id}")) + + try: + config_file = await asyncio.to_thread( + hf_hub_download, + repo_id=model_id, + filename="tokenizer_config.json", + revision=revision, + repo_type="model" + ) + + with open(config_file, 'r') as f: + tokenizer_config = json.load(f) + + if 'chat_template' not in tokenizer_config: + error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it." + logger.error(LogFormatter.error(error_msg)) + return False, error_msg + + logger.info(LogFormatter.success("Valid chat template found")) + return True, None + + except Exception as e: + error_msg = f"Error checking chat_template: {str(e)}" + logger.error(LogFormatter.error(error_msg)) + return False, error_msg + + except Exception as e: + error_msg = "Failed to check chat template" + logger.error(LogFormatter.error(error_msg, e)) + return False, str(e) + + async def is_model_on_hub( + self, + model_name: str, + revision: str, + test_tokenizer: bool = False, + trust_remote_code: bool = False + ) -> Tuple[bool, Optional[str], Optional[Any]]: + """Check if model exists and is properly configured on the Hub""" + try: + config = await asyncio.to_thread( + AutoConfig.from_pretrained, + model_name, + revision=revision, + trust_remote_code=trust_remote_code, + token=self.token, + force_download=True + ) + + if test_tokenizer: + try: + await asyncio.to_thread( + AutoTokenizer.from_pretrained, + model_name, + revision=revision, + trust_remote_code=trust_remote_code, + token=self.token + ) + except ValueError as e: + return False, f"uses a tokenizer which is not in a transformers release: {e}", None + except Exception: + return False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None + + return True, None, config + + except ValueError: + return False, "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.", None + except Exception as e: + if "You are trying to access a gated repo." in str(e): + return True, "uses a gated model.", None + return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None \ No newline at end of file diff --git a/backend/pyproject.toml b/backend/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..a50475aeea429e5a303ebaade104445ee07797bc --- /dev/null +++ b/backend/pyproject.toml @@ -0,0 +1,30 @@ +[tool.poetry] +name = "llm-leaderboard-backend" +version = "0.1.0" +description = "Backend for the Open LLM Leaderboard" +authors = ["Your Name "] + +[tool.poetry.dependencies] +python = "^3.9" +fastapi = "^0.104.1" +uvicorn = {extras = ["standard"], version = "^0.24.0"} +numpy = "1.24.3" +pandas = "^2.0.0" +datasets = "^2.0.0" +pyarrow = "^14.0.1" +python-multipart = "^0.0.6" +huggingface-hub = "^0.19.0" +transformers = "^4.35.0" +safetensors = "^0.4.0" +aiofiles = "^24.1.0" +fastapi-cache2 = "^0.2.1" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.0" +black = "^23.7.0" +isort = "^5.12.0" +flake8 = "^6.1.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/backend/utils/analyze_prod_datasets.py b/backend/utils/analyze_prod_datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..f05b1c257045bdbc6ef4d782626cae37aa76c14a --- /dev/null +++ b/backend/utils/analyze_prod_datasets.py @@ -0,0 +1,170 @@ +import os +import json +import logging +from datetime import datetime +from pathlib import Path +from typing import Dict, Any, List +from huggingface_hub import HfApi +from dotenv import load_dotenv +from app.config.hf_config import HF_ORGANIZATION + +# Get the backend directory path +BACKEND_DIR = Path(__file__).parent.parent +ROOT_DIR = BACKEND_DIR.parent + +# Load environment variables from .env file in root directory +load_dotenv(ROOT_DIR / ".env") + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(message)s' +) +logger = logging.getLogger(__name__) + +# Initialize Hugging Face API +HF_TOKEN = os.getenv("HF_TOKEN") +if not HF_TOKEN: + raise ValueError("HF_TOKEN not found in environment variables") +api = HfApi(token=HF_TOKEN) + +def analyze_dataset(repo_id: str) -> Dict[str, Any]: + """Analyze a dataset and return statistics""" + try: + # Get dataset info + dataset_info = api.dataset_info(repo_id=repo_id) + + # Get file list + files = api.list_repo_files(repo_id, repo_type="dataset") + + # Get last commit info + commits = api.list_repo_commits(repo_id, repo_type="dataset") + last_commit = next(commits, None) + + # Count lines in jsonl files + total_entries = 0 + for file in files: + if file.endswith('.jsonl'): + try: + # Download file content + content = api.hf_hub_download( + repo_id=repo_id, + filename=file, + repo_type="dataset" + ) + + # Count lines + with open(content, 'r') as f: + for _ in f: + total_entries += 1 + + except Exception as e: + logger.error(f"Error processing file {file}: {str(e)}") + continue + + # Special handling for requests dataset + if repo_id == f"{HF_ORGANIZATION}/requests": + pending_count = 0 + completed_count = 0 + + try: + content = api.hf_hub_download( + repo_id=repo_id, + filename="eval_requests.jsonl", + repo_type="dataset" + ) + + with open(content, 'r') as f: + for line in f: + try: + entry = json.loads(line) + if entry.get("status") == "pending": + pending_count += 1 + elif entry.get("status") == "completed": + completed_count += 1 + except json.JSONDecodeError: + continue + + except Exception as e: + logger.error(f"Error analyzing requests: {str(e)}") + + # Build response + response = { + "id": repo_id, + "last_modified": last_commit.created_at if last_commit else None, + "total_entries": total_entries, + "file_count": len(files), + "size_bytes": dataset_info.size_in_bytes, + "downloads": dataset_info.downloads, + } + + # Add request-specific info if applicable + if repo_id == f"{HF_ORGANIZATION}/requests": + response.update({ + "pending_requests": pending_count, + "completed_requests": completed_count + }) + + return response + + except Exception as e: + logger.error(f"Error analyzing dataset {repo_id}: {str(e)}") + return { + "id": repo_id, + "error": str(e) + } + +def main(): + """Main function to analyze all datasets""" + try: + # List of datasets to analyze + datasets = [ + { + "id": f"{HF_ORGANIZATION}/contents", + "description": "Aggregated results" + }, + { + "id": f"{HF_ORGANIZATION}/requests", + "description": "Evaluation requests" + }, + { + "id": f"{HF_ORGANIZATION}/votes", + "description": "User votes" + }, + { + "id": f"{HF_ORGANIZATION}/maintainers-highlight", + "description": "Highlighted models" + } + ] + + # Analyze each dataset + results = [] + for dataset in datasets: + logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...") + result = analyze_dataset(dataset['id']) + results.append(result) + + if 'error' in result: + logger.error(f"❌ Error: {result['error']}") + else: + logger.info(f"✓ {result['total_entries']} entries") + logger.info(f"✓ {result['file_count']} files") + logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB") + logger.info(f"✓ {result['downloads']} downloads") + + if 'pending_requests' in result: + logger.info(f"✓ {result['pending_requests']} pending requests") + logger.info(f"✓ {result['completed_requests']} completed requests") + + if result['last_modified']: + last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00')) + logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}") + + return results + + except Exception as e: + logger.error(f"Global error: {str(e)}") + return [] + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/utils/analyze_prod_models.py b/backend/utils/analyze_prod_models.py new file mode 100644 index 0000000000000000000000000000000000000000..479dd103755a4c3a3805dbd49c2de8a94739dfb8 --- /dev/null +++ b/backend/utils/analyze_prod_models.py @@ -0,0 +1,105 @@ +import os +import json +import logging +from pathlib import Path +from huggingface_hub import HfApi +from dotenv import load_dotenv +from app.config.hf_config import HF_ORGANIZATION + +# Get the backend directory path +BACKEND_DIR = Path(__file__).parent.parent +ROOT_DIR = BACKEND_DIR.parent + +# Load environment variables from .env file in root directory +load_dotenv(ROOT_DIR / ".env") + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(message)s' +) +logger = logging.getLogger(__name__) + +# Initialize Hugging Face API +HF_TOKEN = os.getenv("HF_TOKEN") +if not HF_TOKEN: + raise ValueError("HF_TOKEN not found in environment variables") +api = HfApi(token=HF_TOKEN) + +def count_evaluated_models(): + """Count the number of evaluated models""" + try: + # Get dataset info + dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/contents", repo_type="dataset") + + # Get file list + files = api.list_repo_files(f"{HF_ORGANIZATION}/contents", repo_type="dataset") + + # Get last commit info + commits = api.list_repo_commits(f"{HF_ORGANIZATION}/contents", repo_type="dataset") + last_commit = next(commits, None) + + # Count lines in jsonl files + total_entries = 0 + for file in files: + if file.endswith('.jsonl'): + try: + # Download file content + content = api.hf_hub_download( + repo_id=f"{HF_ORGANIZATION}/contents", + filename=file, + repo_type="dataset" + ) + + # Count lines + with open(content, 'r') as f: + for _ in f: + total_entries += 1 + + except Exception as e: + logger.error(f"Error processing file {file}: {str(e)}") + continue + + # Build response + response = { + "total_models": total_entries, + "last_modified": last_commit.created_at if last_commit else None, + "file_count": len(files), + "size_bytes": dataset_info.size_in_bytes, + "downloads": dataset_info.downloads + } + + return response + + except Exception as e: + logger.error(f"Error counting evaluated models: {str(e)}") + return { + "error": str(e) + } + +def main(): + """Main function to count evaluated models""" + try: + logger.info("\nAnalyzing evaluated models...") + result = count_evaluated_models() + + if 'error' in result: + logger.error(f"❌ Error: {result['error']}") + else: + logger.info(f"✓ {result['total_models']} models evaluated") + logger.info(f"✓ {result['file_count']} files") + logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB") + logger.info(f"✓ {result['downloads']} downloads") + + if result['last_modified']: + last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00')) + logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}") + + return result + + except Exception as e: + logger.error(f"Global error: {str(e)}") + return {"error": str(e)} + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/utils/last_activity.py b/backend/utils/last_activity.py new file mode 100644 index 0000000000000000000000000000000000000000..9f403ef0d223f79c9f7d2633ecbee5c3044ed5ae --- /dev/null +++ b/backend/utils/last_activity.py @@ -0,0 +1,164 @@ +import os +import json +import logging +from datetime import datetime +from pathlib import Path +from typing import Dict, Any, List, Tuple +from huggingface_hub import HfApi +from dotenv import load_dotenv + +# Get the backend directory path +BACKEND_DIR = Path(__file__).parent.parent +ROOT_DIR = BACKEND_DIR.parent + +# Load environment variables from .env file in root directory +load_dotenv(ROOT_DIR / ".env") + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(message)s' +) +logger = logging.getLogger(__name__) + +# Initialize Hugging Face API +HF_TOKEN = os.getenv("HF_TOKEN") +if not HF_TOKEN: + raise ValueError("HF_TOKEN not found in environment variables") +api = HfApi(token=HF_TOKEN) + +# Default organization +HF_ORGANIZATION = os.getenv('HF_ORGANIZATION', 'open-llm-leaderboard') + +def get_last_votes(limit: int = 5) -> List[Dict]: + """Get the last votes from the votes dataset""" + try: + logger.info("\nFetching last votes...") + + # Download and read votes file + logger.info("Downloading votes file...") + votes_file = api.hf_hub_download( + repo_id=f"{HF_ORGANIZATION}/votes", + filename="votes_data.jsonl", + repo_type="dataset" + ) + + logger.info("Reading votes file...") + votes = [] + with open(votes_file, 'r') as f: + for line in f: + try: + vote = json.loads(line) + votes.append(vote) + except json.JSONDecodeError: + continue + + # Sort by timestamp and get last n votes + logger.info("Sorting votes...") + votes.sort(key=lambda x: x.get('timestamp', ''), reverse=True) + last_votes = votes[:limit] + + logger.info(f"✓ Found {len(last_votes)} recent votes") + return last_votes + + except Exception as e: + logger.error(f"Error reading votes: {str(e)}") + return [] + +def get_last_models(limit: int = 5) -> List[Dict]: + """Get the last models from the requests dataset using commit history""" + try: + logger.info("\nFetching last model submissions...") + + # Get commit history + logger.info("Getting commit history...") + commits = list(api.list_repo_commits( + repo_id=f"{HF_ORGANIZATION}/requests", + repo_type="dataset" + )) + logger.info(f"Found {len(commits)} commits") + + # Track processed files to avoid duplicates + processed_files = set() + models = [] + + # Process commits until we have enough models + for i, commit in enumerate(commits): + logger.info(f"Processing commit {i+1}/{len(commits)} ({commit.created_at})") + + # Look at added/modified files in this commit + files_to_process = [f for f in (commit.added + commit.modified) if f.endswith('.json')] + if files_to_process: + logger.info(f"Found {len(files_to_process)} JSON files in commit") + + for file in files_to_process: + if file in processed_files: + continue + + processed_files.add(file) + logger.info(f"Downloading {file}...") + + try: + # Download and read the file + content = api.hf_hub_download( + repo_id=f"{HF_ORGANIZATION}/requests", + filename=file, + repo_type="dataset" + ) + + with open(content, 'r') as f: + model_data = json.load(f) + models.append(model_data) + logger.info(f"✓ Added model {model_data.get('model', 'Unknown')}") + + if len(models) >= limit: + logger.info("Reached desired number of models") + break + + except Exception as e: + logger.error(f"Error reading file {file}: {str(e)}") + continue + + if len(models) >= limit: + break + + logger.info(f"✓ Found {len(models)} recent model submissions") + return models + + except Exception as e: + logger.error(f"Error reading models: {str(e)}") + return [] + +def main(): + """Display last activities from the leaderboard""" + try: + # Get last votes + logger.info("\n=== Last Votes ===") + last_votes = get_last_votes() + if last_votes: + for vote in last_votes: + logger.info(f"\nModel: {vote.get('model')}") + logger.info(f"User: {vote.get('username')}") + logger.info(f"Timestamp: {vote.get('timestamp')}") + else: + logger.info("No votes found") + + # Get last model submissions + logger.info("\n=== Last Model Submissions ===") + last_models = get_last_models() + if last_models: + for model in last_models: + logger.info(f"\nModel: {model.get('model')}") + logger.info(f"Submitter: {model.get('sender', 'Unknown')}") + logger.info(f"Status: {model.get('status', 'Unknown')}") + logger.info(f"Submission Time: {model.get('submitted_time', 'Unknown')}") + logger.info(f"Precision: {model.get('precision', 'Unknown')}") + logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}") + else: + logger.info("No models found") + + except Exception as e: + logger.error(f"Global error: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/utils/sync_datasets_locally.py b/backend/utils/sync_datasets_locally.py new file mode 100644 index 0000000000000000000000000000000000000000..e1c01ba9e138c99b4d041f7c382ec9d071c9a1f0 --- /dev/null +++ b/backend/utils/sync_datasets_locally.py @@ -0,0 +1,130 @@ +import os +import shutil +import tempfile +import logging +from pathlib import Path +from huggingface_hub import HfApi, snapshot_download, upload_folder, create_repo +from dotenv import load_dotenv + +# Configure source and destination usernames +SOURCE_USERNAME = "open-llm-leaderboard" +DESTINATION_USERNAME = "tfrere" + +# Get the backend directory path +BACKEND_DIR = Path(__file__).parent.parent +ROOT_DIR = BACKEND_DIR.parent + +# Load environment variables from .env file in root directory +load_dotenv(ROOT_DIR / ".env") + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(message)s' +) +logger = logging.getLogger(__name__) + +# List of dataset names to sync +DATASET_NAMES = [ + "votes", + "results", + "requests", + "contents", + "maintainers-highlight", +] + +# Build list of datasets with their source and destination paths +DATASETS = [ + (name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}") + for name in DATASET_NAMES +] + +# Initialize Hugging Face API +api = HfApi() + +def ensure_repo_exists(repo_id, token): + """Ensure the repository exists, create it if it doesn't""" + try: + api.repo_info(repo_id=repo_id, repo_type="dataset") + logger.info(f"✓ Repository {repo_id} already exists") + except Exception: + logger.info(f"Creating repository {repo_id}...") + create_repo( + repo_id=repo_id, + repo_type="dataset", + token=token, + private=True + ) + logger.info(f"✓ Repository {repo_id} created") + +def process_dataset(dataset_info, token): + """Process a single dataset""" + name, source_dataset, destination_dataset = dataset_info + try: + logger.info(f"\n📥 Processing dataset: {name}") + + # Ensure destination repository exists + ensure_repo_exists(destination_dataset, token) + + # Create a temporary directory for this dataset + with tempfile.TemporaryDirectory() as temp_dir: + try: + # List files in source dataset + logger.info(f"Listing files in {source_dataset}...") + files = api.list_repo_files(source_dataset, repo_type="dataset") + logger.info(f"Detected structure: {len(files)} files") + + # Download dataset + logger.info(f"Downloading from {source_dataset}...") + local_dir = snapshot_download( + repo_id=source_dataset, + repo_type="dataset", + local_dir=temp_dir, + token=token + ) + logger.info(f"✓ Download complete") + + # Upload to destination while preserving structure + logger.info(f"📤 Uploading to {destination_dataset}...") + api.upload_folder( + folder_path=local_dir, + repo_id=destination_dataset, + repo_type="dataset", + token=token + ) + logger.info(f"✅ {name} copied successfully!") + return True + + except Exception as e: + logger.error(f"❌ Error processing {name}: {str(e)}") + return False + + except Exception as e: + logger.error(f"❌ Error for {name}: {str(e)}") + return False + +def copy_datasets(): + try: + logger.info("🔑 Checking authentication...") + # Get token from .env file + token = os.getenv("HF_TOKEN") + if not token: + raise ValueError("HF_TOKEN not found in .env file") + + # Process datasets sequentially + results = [] + for dataset_info in DATASETS: + success = process_dataset(dataset_info, token) + results.append((dataset_info[0], success)) + + # Print final summary + logger.info("\n📊 Final summary:") + for dataset, success in results: + status = "✅ Success" if success else "❌ Failure" + logger.info(f"{dataset}: {status}") + + except Exception as e: + logger.error(f"❌ Global error: {str(e)}") + +if __name__ == "__main__": + copy_datasets() \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..a37db75cf2bd7023fb390ac025cad4bccbb327a5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,35 @@ +version: '3.8' + +services: + backend: + build: + context: ./backend + dockerfile: Dockerfile.dev + args: + - HF_TOKEN=${HF_TOKEN} + ports: + - "${BACKEND_PORT:-8000}:8000" + volumes: + - ./backend:/app + environment: + - ENVIRONMENT=${ENVIRONMENT:-development} + - HF_TOKEN=${HF_TOKEN} + - HF_HOME=${HF_HOME:-/.cache} + command: uvicorn app.asgi:app --host 0.0.0.0 --port 8000 --reload + + frontend: + build: + context: ./frontend + dockerfile: Dockerfile.dev + ports: + - "${FRONTEND_PORT:-7860}:7860" + volumes: + - ./frontend:/app + - /app/node_modules + environment: + - NODE_ENV=${ENVIRONMENT:-development} + - CHOKIDAR_USEPOLLING=true + - PORT=${FRONTEND_PORT:-7860} + command: npm start + stdin_open: true + tty: true \ No newline at end of file diff --git a/frontend/Dockerfile.dev b/frontend/Dockerfile.dev new file mode 100644 index 0000000000000000000000000000000000000000..259f7c9d8746db26bee8ee531d96cbe0d619321e --- /dev/null +++ b/frontend/Dockerfile.dev @@ -0,0 +1,15 @@ +FROM node:18 + +WORKDIR /app + +# Install required global dependencies +RUN npm install -g react-scripts + +# Copy package.json and package-lock.json +COPY package*.json ./ + +# Install project dependencies +RUN npm install + +# Volume will be mounted here, no need for COPY +CMD ["npm", "start"] \ No newline at end of file diff --git a/frontend/README.md b/frontend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7ef4ff265f3c870efce128f47bdda8d266689a88 --- /dev/null +++ b/frontend/README.md @@ -0,0 +1,80 @@ +# Frontend - Open LLM Leaderboard 🏆 + +React interface for exploring and comparing open-source language models. + +## 🏗 Architecture + +```mermaid +flowchart TD + Client(["User Browser"]) --> Components["React Components"] + + subgraph Frontend + Components --> Context["Context Layer
• LeaderboardContext
• Global State"] + + API["API Layer
• /api/leaderboard/formatted
• TanStack Query"] --> |Data Feed| Context + + Context --> Hooks["Hooks Layer
• Data Processing
• Filtering
• Caching"] + + Hooks --> Features["Features
• Table Management
• Search & Filters
• Display Options"] + Features --> Cache["Cache Layer
• LocalStorage
• URL State"] + end + + API --> Backend["Backend Server"] + + style Backend fill:#f96,stroke:#333,stroke-width:2px +``` + +## ✨ Core Features + +- 🔍 **Search & Filters**: Real-time filtering, regex search, advanced filters +- 📊 **Data Visualization**: Interactive table, customizable columns, sorting +- 🔄 **State Management**: URL sync, client-side caching (5min TTL) +- 📱 **Responsive Design**: Mobile-friendly, dark/light themes + +## 🛠 Tech Stack + +- React 18 + Material-UI +- TanStack Query & Table +- React Router v6 + +## 📁 Project Structure + +``` +src/ +├── pages/ +│ └── LeaderboardPage/ +│ ├── components/ # UI Components +│ ├── context/ # Global State +│ └── hooks/ # Data Processing +├── components/ # Shared Components +└── utils/ # Helper Functions +``` + +## 🚀 Development + +```bash +# Install dependencies +npm install + +# Start development server +npm start + +# Production build +npm run build +``` + +## 🔧 Environment Variables + +```env +# API Configuration +REACT_APP_API_URL=http://localhost:8000 +REACT_APP_CACHE_DURATION=300000 # 5 minutes +``` + +## 🔄 Data Flow + +1. API fetches leaderboard data from backend +2. Context stores and manages global state +3. Hooks handle data processing and filtering +4. Components render based on processed data +5. Cache maintains user preferences and URL state diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000000000000000000000000000000000000..93de14fd49415a97be66fa06310e2a1249b85ad6 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,55 @@ +{ + "name": "open-llm-leaderboard", + "version": "0.1.0", + "private": true, + "dependencies": { + "@emotion/react": "^11.13.3", + "@emotion/styled": "^11.13.0", + "@huggingface/hub": "^0.14.0", + "@mui/icons-material": "^6.1.7", + "@mui/lab": "^6.0.0-beta.16", + "@mui/material": "^6.1.6", + "@mui/x-data-grid": "^7.22.2", + "@tanstack/react-query": "^5.62.2", + "@tanstack/react-table": "^8.20.5", + "@tanstack/react-virtual": "^3.10.9", + "@testing-library/jest-dom": "^5.17.0", + "@testing-library/react": "^13.4.0", + "@testing-library/user-event": "^13.5.0", + "compression": "^1.7.4", + "cors": "^2.8.5", + "express": "^4.18.2", + "react": "^18.3.1", + "react-dom": "^18.3.1", + "react-router-dom": "^6.28.0", + "react-scripts": "5.0.1", + "serve-static": "^1.15.0", + "web-vitals": "^2.1.4" + }, + "scripts": { + "start": "react-scripts start", + "build": "react-scripts build", + "test": "react-scripts test", + "eject": "react-scripts eject", + "serve": "node server.js" + }, + "eslintConfig": { + "extends": [ + "react-app", + "react-app/jest" + ] + }, + "browserslist": { + "production": [ + ">0.2%", + "not dead", + "not op_mini all" + ], + "development": [ + "last 1 chrome version", + "last 1 firefox version", + "last 1 safari version" + ] + }, + "proxy": "http://backend:8000" +} diff --git a/frontend/public/index.html b/frontend/public/index.html new file mode 100644 index 0000000000000000000000000000000000000000..8a32753d8f0c23e7ef7a0b5bcb159a68dbafeddc --- /dev/null +++ b/frontend/public/index.html @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + Open LLM Leaderboard - Compare Open Source Large Language Models + + + + + +
+ + + diff --git a/frontend/public/logo256.png b/frontend/public/logo256.png new file mode 100644 index 0000000000000000000000000000000000000000..58547e134af0ac1200a4608fb1c800b3e8e9ddf1 Binary files /dev/null and b/frontend/public/logo256.png differ diff --git a/frontend/public/logo32.png b/frontend/public/logo32.png new file mode 100644 index 0000000000000000000000000000000000000000..1b6e8fbd42dd1bcc599649bf6f230fde89a6908a Binary files /dev/null and b/frontend/public/logo32.png differ diff --git a/frontend/public/og-image.jpg b/frontend/public/og-image.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1d4a3f3cb7d838489ef0a5dde1ce7c493273f98d Binary files /dev/null and b/frontend/public/og-image.jpg differ diff --git a/frontend/public/robots.txt b/frontend/public/robots.txt new file mode 100644 index 0000000000000000000000000000000000000000..e9e57dc4d41b9b46e05112e9f45b7ea6ac0ba15e --- /dev/null +++ b/frontend/public/robots.txt @@ -0,0 +1,3 @@ +# https://www.robotstxt.org/robotstxt.html +User-agent: * +Disallow: diff --git a/frontend/server.js b/frontend/server.js new file mode 100644 index 0000000000000000000000000000000000000000..653befea69419568b117ce809871639d86d65581 --- /dev/null +++ b/frontend/server.js @@ -0,0 +1,85 @@ +const express = require("express"); +const cors = require("cors"); +const compression = require("compression"); +const path = require("path"); +const serveStatic = require("serve-static"); +const { createProxyMiddleware } = require("http-proxy-middleware"); + +const app = express(); +const port = process.env.PORT || 7860; +const apiPort = process.env.INTERNAL_API_PORT || 7861; + +// Enable CORS for all routes +app.use(cors()); + +// Enable GZIP compression +app.use(compression()); + +// Proxy all API requests to the Python backend +app.use( + "/api", + createProxyMiddleware({ + target: `http://127.0.0.1:${apiPort}`, + changeOrigin: true, + onError: (err, req, res) => { + console.error("Proxy Error:", err); + res.status(500).json({ error: "Proxy Error", details: err.message }); + }, + }) +); + +// Serve static files from the build directory +app.use( + express.static(path.join(__dirname, "build"), { + // Don't cache HTML files + setHeaders: (res, path) => { + if (path.endsWith(".html")) { + res.setHeader("Cache-Control", "no-cache, no-store, must-revalidate"); + res.setHeader("Pragma", "no-cache"); + res.setHeader("Expires", "0"); + } else { + // Cache other static resources for 1 year + res.setHeader("Cache-Control", "public, max-age=31536000"); + } + }, + }) +); + +// Middleware to preserve URL parameters +app.use((req, res, next) => { + // Don't interfere with API requests + if (req.url.startsWith("/api")) { + return next(); + } + + // Preserve original URL parameters + req.originalUrl = req.url; + next(); +}); + +// Handle all other routes by serving index.html +app.get("*", (req, res) => { + // Don't interfere with API requests + if (req.url.startsWith("/api")) { + return next(); + } + + // Headers for client-side routing + res.set({ + "Cache-Control": "no-cache, no-store, must-revalidate", + Pragma: "no-cache", + Expires: "0", + }); + + // Send index.html for all other routes + res.sendFile(path.join(__dirname, "build", "index.html")); +}); + +app.listen(port, "0.0.0.0", () => { + console.log( + `Frontend server is running on port ${port} in ${ + process.env.NODE_ENV || "development" + } mode` + ); + console.log(`API proxy target: http://127.0.0.1:${apiPort}`); +}); diff --git a/frontend/src/App.js b/frontend/src/App.js new file mode 100644 index 0000000000000000000000000000000000000000..a04b2c31654a5252dcb5c590bdd73ee3c9f046ac --- /dev/null +++ b/frontend/src/App.js @@ -0,0 +1,115 @@ +import React, { useEffect } from "react"; +import { + HashRouter as Router, + Routes, + Route, + useSearchParams, + useLocation, +} from "react-router-dom"; +import { ThemeProvider } from "@mui/material/styles"; +import { Box, CssBaseline } from "@mui/material"; +import Navigation from "./components/Navigation/Navigation"; +import LeaderboardPage from "./pages/LeaderboardPage/LeaderboardPage"; +import AddModelPage from "./pages/AddModelPage/AddModelPage"; +import QuotePage from "./pages/QuotePage/QuotePage"; +import VoteModelPage from "./pages/VoteModelPage/VoteModelPage"; +import Footer from "./components/Footer/Footer"; +import getTheme from "./config/theme"; +import { useThemeMode } from "./hooks/useThemeMode"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import LeaderboardProvider from "./pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext"; + +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + retry: 1, + refetchOnWindowFocus: false, + }, + }, +}); + +function UrlHandler() { + const location = useLocation(); + const [searchParams] = useSearchParams(); + + // Synchroniser l'URL avec la page parente HF + useEffect(() => { + // Vérifier si nous sommes dans un iframe HF Space + const isHFSpace = window.location !== window.parent.location; + if (!isHFSpace) return; + + // Sync query and hash from this embedded app to the parent page URL + const queryString = window.location.search; + const hash = window.location.hash; + + // HF Spaces' special message type to update the query string and the hash in the parent page URL + window.parent.postMessage( + { + queryString, + hash, + }, + "https://huggingface.co" + ); + }, [location, searchParams]); + + // Read the updated hash reactively + useEffect(() => { + const handleHashChange = (event) => { + console.log("hash change event", event); + }; + + window.addEventListener("hashchange", handleHashChange); + return () => window.removeEventListener("hashchange", handleHashChange); + }, []); + + return null; +} + +function App() { + const { mode, toggleTheme } = useThemeMode(); + const theme = getTheme(mode); + + return ( + + + + + + + + + + + } /> + } /> + } /> + } /> + + +