I have the same problem. The Model Hub Search API only provides a lastModified field, which is useless when trying to find trending models, as some old model repositories are just updating their readme.md, which in turn updates the whole repositories lastModified date. Also: a lastModified date only for the model files would be useful within the python search API:
the siblings array only returns file names, but no lastModified, and nobody should scrape file dates:
siblings: [ModelFile(rfilename='.gitattributes'), ModelFile(rfilename='README.md'), ModelFile(rfilename='config.json'), ModelFile(rfilename='pytorch_model.bin'), ModelFile(rfilename='sentencepiece.bpe.model'), ModelFile(rfilename='special_tokens_map.json'), ModelFile(rfilename='tokenizer.json'), ModelFile(rfilename='tokenizer_config.json')]
Docs
I’ve built a small gradio UI for jupyter notebooks or google colabs to at least get some popular transformer-models and calculate a downloads/days metric:
import gradio as gr
from huggingface_hub import HfApi, ModelFilter
from datetime import datetime
import pandas as pd
# Define a global variable to store the output data
output_data = None
def process_output(data):
global output_data
output_data = data
def get_models(task, model_library, date_cutoff, text_search, author):
api = HfApi()
models = api.list_models(
filter=ModelFilter(
task=task,
library=model_library
),
search=text_search,
author=author
)
# Convert date_cutoff to datetime object
date_cutoff = datetime.strptime(date_cutoff, '%Y-%m-%d')
# Filter models by date_cutoff and calculate downloads per day
model_data = []
for model in models:
last_modified = datetime.strptime(model.lastModified, '%Y-%m-%dT%H:%M:%S.%fZ')
if last_modified >= date_cutoff:
days_online = (datetime.now() - last_modified).days
downloads_per_day = model.downloads if days_online == 0 else model.downloads / days_online
model_data.append([model.modelId, last_modified, model.downloads, downloads_per_day])
# Create a DataFrame and sort by lastModified date
df = pd.DataFrame(model_data, columns=['Model', 'lastModified', 'totalDownloads', 'downloadsPerDay'])
df.sort_values(by='lastModified', ascending=False, inplace=True)
return df
iface = gr.Interface(
layout='vertical',
fn=get_models,
inputs=[
gr.inputs.Dropdown(choices=['text-generation'], label='Task'),
gr.inputs.Dropdown(choices=['transformers'], label='Libraries'),
gr.inputs.Textbox(lines=1, label='Date Cutoff (YYYY-MM-DD)'),
gr.inputs.Textbox(lines=1, label='Text Search'),
gr.inputs.Textbox(lines=1, label='Author')
],
outputs=gr.outputs.Dataframe(type='pandas'),
title='Hugging Face Model Explorer',
description='Explore Hugging Face models by task, architecture, and date.'
)
#iface.launch(debug=True, share=True)
iface.launch()