from typing import List, Dict import httpx import gradio as gr import pandas as pd from huggingface_hub import HfApi, ModelCard def search_hub(query: str, search_type: str) -> pd.DataFrame: api = HfApi() data = [] if search_type == "Models": results = api.list_models(search=query) data = [{"id": model.modelId, "author": model.author, "downloads": model.downloads, "link": f"https://huggingface.co/{model.modelId}"} for model in results] elif search_type == "Datasets": results = api.list_datasets(search=query) data = [{"id": dataset.id, "author": dataset.author, "downloads": dataset.downloads, "link": f"https://huggingface.co/datasets/{dataset.id}"} for dataset in results] elif search_type == "Spaces": results = api.list_spaces(search=query) data = [{"id": space.id, "author": space.author, "link": f"https://huggingface.co/spaces/{space.id}"} for space in results] return pd.DataFrame(data) def open_url(row): if row is not None and not row.empty: url = row.iloc[0]['link'] return f'{url}' else: return "" def load_metadata(row, search_type): if row is not None and not row.empty: item_id = row.iloc[0]['id'] if search_type == "Models": try: card = ModelCard.load(item_id) return card except Exception as e: return f"Error loading model card: {str(e)}" elif search_type == "Datasets": api = HfApi() metadata = api.dataset_info(item_id) return str(metadata) elif search_type == "Spaces": api = HfApi() metadata = api.space_info(item_id) return str(metadata) else: return "" else: return "" def SwarmyTime(data: List[Dict]) -> Dict: """ Aggregates all content from the given data. :param data: List of dictionaries containing the search results :return: Dictionary with aggregated content """ aggregated = { "total_items": len(data), "unique_authors": set(), "total_downloads": 0, "item_types": {"Models": 0, "Datasets": 0, "Spaces": 0} } for item in data: aggregated["unique_authors"].add(item.get("author", "Unknown")) aggregated["total_downloads"] += item.get("downloads", 0) if "modelId" in item: aggregated["item_types"]["Models"] += 1 elif "dataset" in item.get("id", ""): aggregated["item_types"]["Datasets"] += 1 else: aggregated["item_types"]["Spaces"] += 1 aggregated["unique_authors"] = len(aggregated["unique_authors"]) return aggregated with gr.Blocks() as demo: gr.Markdown("## Search the Hugging Face Hub") with gr.Row(): search_query = gr.Textbox(label="Search Query") search_type = gr.Radio(["Models", "Datasets", "Spaces"], label="Search Type", value="Models") search_button = gr.Button("Search") results_df = gr.DataFrame(label="Search Results", wrap=True, interactive=True) url_output = gr.HTML(label="URL") metadata_output = gr.Textbox(label="Metadata", lines=10) aggregated_output = gr.JSON(label="Aggregated Content") def search_and_aggregate(query, search_type): df = search_hub(query, search_type) aggregated = SwarmyTime(df.to_dict('records')) return df, aggregated search_button.click(search_and_aggregate, inputs=[search_query, search_type], outputs=[results_df, aggregated_output]) results_df.select(open_url, outputs=[url_output]) results_df.select(load_metadata, inputs=[results_df, search_type], outputs=[metadata_output]) demo.launch(debug=True)