burtenshaw
commited on
Commit
β’
ec60e9a
1
Parent(s):
afaf730
use percentile boundaries in app
Browse files- app.py +40 -25
- default.jpg +0 -0
- default.png +0 -0
- images/empty.png +0 -0
- images/space.png +0 -0
- percentiles.json +1 -0
- pyproject.toml +3 -0
- stats_dataset.ipynb +166 -0
app.py
CHANGED
@@ -3,13 +3,28 @@ import gradio as gr
|
|
3 |
from urllib.parse import urlencode
|
4 |
import os
|
5 |
from datetime import datetime
|
|
|
6 |
|
7 |
# Load environment variables
|
8 |
|
9 |
DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2"
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
|
15 |
def create_image(stats, username):
|
@@ -19,39 +34,39 @@ def create_image(stats, username):
|
|
19 |
dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"]
|
20 |
space_activity = total_stats["Space Likes"]
|
21 |
|
22 |
-
# Calculate percentiles based on
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
)
|
27 |
-
dataset_percentile = round(
|
28 |
-
(top_items["Top Dataset"]["likes"] / MAX_DATASET_ACTIVITY) * 100, 2
|
29 |
-
)
|
30 |
-
space_percentile = round(
|
31 |
-
(top_items["Top Space"]["likes"] / MAX_SPACE_ACTIVITY) * 100, 2
|
32 |
-
)
|
33 |
|
34 |
-
# Choose base image URL based on highest activity
|
35 |
-
# check if no activity in any category
|
36 |
-
# if everything is 0, we show the empty image
|
37 |
if model_activity == 0 and dataset_activity == 0 and space_activity == 0:
|
38 |
url = "https://hub-recap.imglab-cdn.net/images/empty.png"
|
39 |
avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
|
40 |
elif model_activity >= max(dataset_activity, space_activity):
|
41 |
-
url = "https://hub-recap.imglab-cdn.net/images/
|
42 |
-
avatar = f"Model Pro (
|
|
|
|
|
43 |
elif dataset_activity >= max(model_activity, space_activity):
|
44 |
-
url = "https://hub-recap.imglab-cdn.net/images/
|
45 |
-
avatar = f"Dataset Guru (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
else:
|
47 |
-
url = "https://hub-recap.imglab-cdn.net/images/
|
48 |
-
avatar =
|
49 |
|
50 |
# Build text content with proper formatting
|
51 |
text_parts = []
|
52 |
|
53 |
text_parts.append(
|
54 |
-
f'<span size="11pt" weight="bold">Hugging Face
|
55 |
)
|
56 |
text_parts.append("") # Empty line for spacing
|
57 |
|
@@ -117,7 +132,7 @@ def create_image(stats, username):
|
|
117 |
|
118 |
# Update the avatar message with percentile
|
119 |
text_parts.append("") # Empty line for spacing
|
120 |
-
text_parts.append(f'<span size="9pt">You are a {avatar}
|
121 |
|
122 |
# Add additional percentile info if other categories are significant
|
123 |
other_percentiles = []
|
|
|
3 |
from urllib.parse import urlencode
|
4 |
import os
|
5 |
from datetime import datetime
|
6 |
+
import json
|
7 |
|
8 |
# Load environment variables
|
9 |
|
10 |
DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2"
|
11 |
+
|
12 |
+
# Load percentiles data
|
13 |
+
with open("percentiles.json") as f:
|
14 |
+
PERCENTILES = json.load(f)
|
15 |
+
|
16 |
+
|
17 |
+
def get_percentile_rank(likes, category):
|
18 |
+
if likes == 0:
|
19 |
+
return 0
|
20 |
+
percentiles = PERCENTILES[f"{category}_percentiles"]
|
21 |
+
if likes >= percentiles["p_99999"]:
|
22 |
+
return 99.999
|
23 |
+
elif likes >= percentiles["p_9999"]:
|
24 |
+
return 99.99
|
25 |
+
elif likes >= percentiles["p_999"]:
|
26 |
+
return 99.9
|
27 |
+
return 0
|
28 |
|
29 |
|
30 |
def create_image(stats, username):
|
|
|
34 |
dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"]
|
35 |
space_activity = total_stats["Space Likes"]
|
36 |
|
37 |
+
# Calculate percentiles based on likes
|
38 |
+
model_percentile = get_percentile_rank(total_stats["Model Likes"], "model")
|
39 |
+
dataset_percentile = get_percentile_rank(total_stats["Dataset Likes"], "dataset")
|
40 |
+
space_percentile = get_percentile_rank(space_activity, "space")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
# Choose base image URL based on highest activity (keep using activity for image selection)
|
|
|
|
|
43 |
if model_activity == 0 and dataset_activity == 0 and space_activity == 0:
|
44 |
url = "https://hub-recap.imglab-cdn.net/images/empty.png"
|
45 |
avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
|
46 |
elif model_activity >= max(dataset_activity, space_activity):
|
47 |
+
url = "https://hub-recap.imglab-cdn.net/images/model.png"
|
48 |
+
avatar = f"Model Pro" + (
|
49 |
+
f" (top {model_percentile}%)" if model_percentile > 0 else ""
|
50 |
+
)
|
51 |
elif dataset_activity >= max(model_activity, space_activity):
|
52 |
+
url = "https://hub-recap.imglab-cdn.net/images/dataset.png"
|
53 |
+
avatar = f"Dataset Guru" + (
|
54 |
+
f" (top {dataset_percentile}%)" if dataset_percentile > 0 else ""
|
55 |
+
)
|
56 |
+
elif space_activity >= max(model_activity, dataset_activity):
|
57 |
+
url = "https://hub-recap.imglab-cdn.net/images/space.png"
|
58 |
+
avatar = f"Space Artiste" + (
|
59 |
+
f" (top {space_percentile}%)" if space_percentile > 0 else ""
|
60 |
+
)
|
61 |
else:
|
62 |
+
url = "https://hub-recap.imglab-cdn.net/images/empty.png"
|
63 |
+
avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
|
64 |
|
65 |
# Build text content with proper formatting
|
66 |
text_parts = []
|
67 |
|
68 |
text_parts.append(
|
69 |
+
f'<span size="11pt" weight="bold">Hugging Face β€οΈ {username} in 2024</span>'
|
70 |
)
|
71 |
text_parts.append("") # Empty line for spacing
|
72 |
|
|
|
132 |
|
133 |
# Update the avatar message with percentile
|
134 |
text_parts.append("") # Empty line for spacing
|
135 |
+
text_parts.append(f'<span size="9pt">You are a {avatar}!</span>')
|
136 |
|
137 |
# Add additional percentile info if other categories are significant
|
138 |
other_percentiles = []
|
default.jpg
DELETED
Binary file (247 kB)
|
|
default.png
ADDED
images/empty.png
CHANGED
images/space.png
CHANGED
percentiles.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dataset_percentiles": {"p_99999": 1299, "p_9999": 491, "p_999": 125}, "model_percentiles": {"p_99999": 3698, "p_9999": 949, "p_999": 143}, "space_percentiles": {"p_99999": 6040, "p_9999": 1552, "p_999": 326}}
|
pyproject.toml
CHANGED
@@ -5,6 +5,9 @@ description = "Add your description here"
|
|
5 |
readme = "README.md"
|
6 |
requires-python = ">=3.11"
|
7 |
dependencies = [
|
|
|
8 |
"gradio>=5.9.1",
|
|
|
|
|
9 |
"requests>=2.32.3",
|
10 |
]
|
|
|
5 |
readme = "README.md"
|
6 |
requires-python = ">=3.11"
|
7 |
dependencies = [
|
8 |
+
"datasets>=3.2.0",
|
9 |
"gradio>=5.9.1",
|
10 |
+
"ipykernel>=6.29.5",
|
11 |
+
"pandas>=2.2.3",
|
12 |
"requests>=2.32.3",
|
13 |
]
|
stats_dataset.ipynb
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stderr",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"/Users/ben/code/hub-recap/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
13 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
14 |
+
]
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"source": [
|
18 |
+
"from datasets import load_dataset\n",
|
19 |
+
"\n",
|
20 |
+
"ds = load_dataset(\"cfahlgren1/hub-stats\", \"datasets\")\n",
|
21 |
+
"ds_df = ds[\"train\"].to_pandas()"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": 3,
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [],
|
29 |
+
"source": [
|
30 |
+
"ds = load_dataset(\"cfahlgren1/hub-stats\", \"models\")\n",
|
31 |
+
"md_df = ds[\"train\"].to_pandas()"
|
32 |
+
]
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"cell_type": "code",
|
36 |
+
"execution_count": 30,
|
37 |
+
"metadata": {},
|
38 |
+
"outputs": [
|
39 |
+
{
|
40 |
+
"name": "stderr",
|
41 |
+
"output_type": "stream",
|
42 |
+
"text": [
|
43 |
+
"Generating train split: 100%|ββββββββββ| 309714/309714 [00:00<00:00, 353713.86 examples/s]\n"
|
44 |
+
]
|
45 |
+
}
|
46 |
+
],
|
47 |
+
"source": [
|
48 |
+
"ds = load_dataset(\"cfahlgren1/hub-stats\", \"spaces\")\n",
|
49 |
+
"sp_df = ds[\"train\"].to_pandas()"
|
50 |
+
]
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"cell_type": "code",
|
54 |
+
"execution_count": 40,
|
55 |
+
"metadata": {},
|
56 |
+
"outputs": [
|
57 |
+
{
|
58 |
+
"name": "stdout",
|
59 |
+
"output_type": "stream",
|
60 |
+
"text": [
|
61 |
+
"{'p_99999': 1299, 'p_9999': 491, 'p_999': 125}\n"
|
62 |
+
]
|
63 |
+
}
|
64 |
+
],
|
65 |
+
"source": [
|
66 |
+
"dataset_percentiles = {\n",
|
67 |
+
" \"p_99999\": int(ds_df[\"likes\"].quantile(0.99999)),\n",
|
68 |
+
" \"p_9999\": int(ds_df[\"likes\"].quantile(0.9999)),\n",
|
69 |
+
" \"p_999\": int(ds_df[\"likes\"].quantile(0.999)),\n",
|
70 |
+
"}\n",
|
71 |
+
"print(dataset_percentiles)"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"cell_type": "code",
|
76 |
+
"execution_count": 41,
|
77 |
+
"metadata": {},
|
78 |
+
"outputs": [
|
79 |
+
{
|
80 |
+
"name": "stdout",
|
81 |
+
"output_type": "stream",
|
82 |
+
"text": [
|
83 |
+
"{'p_99999': 3698, 'p_9999': 949, 'p_999': 143}\n"
|
84 |
+
]
|
85 |
+
}
|
86 |
+
],
|
87 |
+
"source": [
|
88 |
+
"model_percentiles = {\n",
|
89 |
+
" \"p_99999\": int(md_df[\"likes\"].quantile(0.99999)),\n",
|
90 |
+
" \"p_9999\": int(md_df[\"likes\"].quantile(0.9999)),\n",
|
91 |
+
" \"p_999\": int(md_df[\"likes\"].quantile(0.999)),\n",
|
92 |
+
"}\n",
|
93 |
+
"print(model_percentiles)"
|
94 |
+
]
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"cell_type": "code",
|
98 |
+
"execution_count": 42,
|
99 |
+
"metadata": {},
|
100 |
+
"outputs": [
|
101 |
+
{
|
102 |
+
"name": "stdout",
|
103 |
+
"output_type": "stream",
|
104 |
+
"text": [
|
105 |
+
"{'p_99999': 6040, 'p_9999': 1552, 'p_999': 326}\n"
|
106 |
+
]
|
107 |
+
}
|
108 |
+
],
|
109 |
+
"source": [
|
110 |
+
"space_percentiles = {\n",
|
111 |
+
" \"p_99999\": int(sp_df[\"likes\"].quantile(0.99999)),\n",
|
112 |
+
" \"p_9999\": int(sp_df[\"likes\"].quantile(0.9999)),\n",
|
113 |
+
" \"p_999\": int(sp_df[\"likes\"].quantile(0.999)),\n",
|
114 |
+
"}\n",
|
115 |
+
"print(space_percentiles)"
|
116 |
+
]
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"cell_type": "code",
|
120 |
+
"execution_count": 43,
|
121 |
+
"metadata": {},
|
122 |
+
"outputs": [],
|
123 |
+
"source": [
|
124 |
+
"import json\n",
|
125 |
+
"\n",
|
126 |
+
"with open(\"percentiles.json\", \"w\") as f:\n",
|
127 |
+
" json.dump(\n",
|
128 |
+
" {\n",
|
129 |
+
" \"dataset_percentiles\": dataset_percentiles,\n",
|
130 |
+
" \"model_percentiles\": model_percentiles,\n",
|
131 |
+
" \"space_percentiles\": space_percentiles,\n",
|
132 |
+
" },\n",
|
133 |
+
" f,\n",
|
134 |
+
" )"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"cell_type": "code",
|
139 |
+
"execution_count": null,
|
140 |
+
"metadata": {},
|
141 |
+
"outputs": [],
|
142 |
+
"source": []
|
143 |
+
}
|
144 |
+
],
|
145 |
+
"metadata": {
|
146 |
+
"kernelspec": {
|
147 |
+
"display_name": ".venv",
|
148 |
+
"language": "python",
|
149 |
+
"name": "python3"
|
150 |
+
},
|
151 |
+
"language_info": {
|
152 |
+
"codemirror_mode": {
|
153 |
+
"name": "ipython",
|
154 |
+
"version": 3
|
155 |
+
},
|
156 |
+
"file_extension": ".py",
|
157 |
+
"mimetype": "text/x-python",
|
158 |
+
"name": "python",
|
159 |
+
"nbconvert_exporter": "python",
|
160 |
+
"pygments_lexer": "ipython3",
|
161 |
+
"version": "3.11.10"
|
162 |
+
}
|
163 |
+
},
|
164 |
+
"nbformat": 4,
|
165 |
+
"nbformat_minor": 2
|
166 |
+
}
|