lewtun HF staff commited on
Commit
bf5cb56
·
1 Parent(s): 06a86d3

Delete Untitled.ipynb

Browse files
Files changed (1) hide show
  1. Untitled.ipynb +0 -266
Untitled.ipynb DELETED
@@ -1,266 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "ed2ddd96-57f3-452e-9d28-e44654edbb65",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "from huggingface_hub import DatasetFilter, list_datasets, HfApi, ModelFilter, DatasetSearchArguments\n",
11
- "from pathlib import Path\n",
12
- "from dotenv import load_dotenv\n",
13
- "import os\n"
14
- ]
15
- },
16
- {
17
- "cell_type": "code",
18
- "execution_count": 2,
19
- "id": "c45ae63c-4e02-47e3-a3e9-895a7bc2702d",
20
- "metadata": {},
21
- "outputs": [],
22
- "source": [
23
- "if Path(\".env\").is_file():\n",
24
- " load_dotenv(\".env\")\n",
25
- "\n",
26
- "auth_token = os.getenv(\"HF_HUB_TOKEN\")"
27
- ]
28
- },
29
- {
30
- "cell_type": "code",
31
- "execution_count": 7,
32
- "id": "23e088a3-276a-45bf-9373-4dfe934b5556",
33
- "metadata": {},
34
- "outputs": [],
35
- "source": [
36
- "filt = DatasetFilter(benchmark=\"raft\")\n",
37
- "submissions = list_datasets(filter=filt, full=True, use_auth_token=auth_token)"
38
- ]
39
- },
40
- {
41
- "cell_type": "code",
42
- "execution_count": 8,
43
- "id": "641c4060",
44
- "metadata": {},
45
- "outputs": [
46
- {
47
- "name": "stdout",
48
- "output_type": "stream",
49
- "text": [
50
- "\u001b[0;31mSignature:\u001b[0m\n",
51
- "\u001b[0mlist_datasets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
52
- "\u001b[0;34m\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
53
- "\u001b[0;34m\u001b[0m \u001b[0mfilter\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mhuggingface_hub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendpoint_helpers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDatasetFilter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIterable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
54
- "\u001b[0;34m\u001b[0m \u001b[0mauthor\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
55
- "\u001b[0;34m\u001b[0m \u001b[0msearch\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
56
- "\u001b[0;34m\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mLiteral\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'lastModified'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
57
- "\u001b[0;34m\u001b[0m \u001b[0mdirection\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mLiteral\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
58
- "\u001b[0;34m\u001b[0m \u001b[0mlimit\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
59
- "\u001b[0;34m\u001b[0m \u001b[0mcardData\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
60
- "\u001b[0;34m\u001b[0m \u001b[0mfull\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
61
- "\u001b[0;34m\u001b[0m \u001b[0muse_auth_token\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
62
- "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mhuggingface_hub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhf_api\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDatasetInfo\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
63
- "\u001b[0;31mDocstring:\u001b[0m\n",
64
- "Get the public list of all the datasets on huggingface.co\n",
65
- "\n",
66
- "Args:\n",
67
- " filter ([`DatasetFilter`] or `str` or `Iterable`, *optional*):\n",
68
- " A string or [`DatasetFilter`] which can be used to identify\n",
69
- " datasets on the hub.\n",
70
- " author (`str`, *optional*):\n",
71
- " A string which identify the author of the returned models\n",
72
- " search (`str`, *optional*):\n",
73
- " A string that will be contained in the returned models.\n",
74
- " sort (`Literal[\"lastModified\"]` or `str`, *optional*):\n",
75
- " The key with which to sort the resulting datasets. Possible\n",
76
- " values are the properties of the [`huggingface_hub.hf_api.DatasetInfo`] class.\n",
77
- " direction (`Literal[-1]` or `int`, *optional*):\n",
78
- " Direction in which to sort. The value `-1` sorts by descending\n",
79
- " order while all other values sort by ascending order.\n",
80
- " limit (`int`, *optional*):\n",
81
- " The limit on the number of datasets fetched. Leaving this option\n",
82
- " to `None` fetches all datasets.\n",
83
- " cardData (`bool`, *optional*):\n",
84
- " Whether to grab the metadata for the dataset as well. Can\n",
85
- " contain useful information such as the PapersWithCode ID.\n",
86
- " full (`bool`, *optional*):\n",
87
- " Whether to fetch all dataset data, including the `lastModified`\n",
88
- " and the `cardData`.\n",
89
- " use_auth_token (`bool` or `str`, *optional*):\n",
90
- " Whether to use the `auth_token` provided from the\n",
91
- " `huggingface_hub` cli. If not logged in, a valid `auth_token`\n",
92
- " can be passed in as a string.\n",
93
- "\n",
94
- "Example usage with the `filter` argument:\n",
95
- "\n",
96
- "```python\n",
97
- ">>> from huggingface_hub import HfApi\n",
98
- "\n",
99
- ">>> api = HfApi()\n",
100
- "\n",
101
- ">>> # List all datasets\n",
102
- ">>> api.list_datasets()\n",
103
- "\n",
104
- ">>> # Get all valid search arguments\n",
105
- ">>> args = DatasetSearchArguments()\n",
106
- "\n",
107
- ">>> # List only the text classification datasets\n",
108
- ">>> api.list_datasets(filter=\"task_categories:text-classification\")\n",
109
- ">>> # Using the `DatasetFilter`\n",
110
- ">>> filt = DatasetFilter(task_categories=\"text-classification\")\n",
111
- ">>> # With `DatasetSearchArguments`\n",
112
- ">>> filt = DatasetFilter(task=args.task_categories.text_classification)\n",
113
- ">>> api.list_models(filter=filt)\n",
114
- "\n",
115
- ">>> # List only the datasets in russian for language modeling\n",
116
- ">>> api.list_datasets(\n",
117
- "... filter=(\"languages:ru\", \"task_ids:language-modeling\")\n",
118
- "... )\n",
119
- ">>> # Using the `DatasetFilter`\n",
120
- ">>> filt = DatasetFilter(languages=\"ru\", task_ids=\"language-modeling\")\n",
121
- ">>> # With `DatasetSearchArguments`\n",
122
- ">>> filt = DatasetFilter(\n",
123
- "... languages=args.languages.ru,\n",
124
- "... task_ids=args.task_ids.language_modeling,\n",
125
- "... )\n",
126
- ">>> api.list_datasets(filter=filt)\n",
127
- "```\n",
128
- "\n",
129
- "Example usage with the `search` argument:\n",
130
- "\n",
131
- "```python\n",
132
- ">>> from huggingface_hub import HfApi\n",
133
- "\n",
134
- ">>> api = HfApi()\n",
135
- "\n",
136
- ">>> # List all datasets with \"text\" in their name\n",
137
- ">>> api.list_datasets(search=\"text\")\n",
138
- "\n",
139
- ">>> # List all datasets with \"text\" in their name made by google\n",
140
- ">>> api.list_datasets(search=\"text\", author=\"google\")\n",
141
- "```\n",
142
- "\u001b[0;31mFile:\u001b[0m ~/miniconda3/envs/raft-leaderboard/lib/python3.8/site-packages/huggingface_hub/hf_api.py\n",
143
- "\u001b[0;31mType:\u001b[0m method\n"
144
- ]
145
- }
146
- ],
147
- "source": [
148
- "?list_datasets"
149
- ]
150
- },
151
- {
152
- "cell_type": "code",
153
- "execution_count": 9,
154
- "id": "228750aa-6d92-4d26-971f-5248e056f54b",
155
- "metadata": {},
156
- "outputs": [
157
- {
158
- "data": {
159
- "text/plain": [
160
- "5"
161
- ]
162
- },
163
- "execution_count": 9,
164
- "metadata": {},
165
- "output_type": "execute_result"
166
- }
167
- ],
168
- "source": [
169
- "len(submissions)"
170
- ]
171
- },
172
- {
173
- "cell_type": "code",
174
- "execution_count": 11,
175
- "id": "6dc34fa3-be44-4170-8daf-39f87aae5b34",
176
- "metadata": {},
177
- "outputs": [
178
- {
179
- "name": "stdout",
180
- "output_type": "stream",
181
- "text": [
182
- "benchmark\n",
183
- "type\n",
184
- "submission_name\n"
185
- ]
186
- }
187
- ],
188
- "source": [
189
- "for k,v in submissions[3].cardData.items():\n",
190
- " print(k)"
191
- ]
192
- },
193
- {
194
- "cell_type": "code",
195
- "execution_count": 12,
196
- "id": "f4dd2dbc",
197
- "metadata": {},
198
- "outputs": [
199
- {
200
- "data": {
201
- "text/plain": [
202
- "DatasetInfo: {\n",
203
- "\tid: moshew/my_raft\n",
204
- "\tsha: 534086adc3aec801687316b3fe162e4231ab0a6b\n",
205
- "\tlastModified: 2022-07-16T17:01:04.000Z\n",
206
- "\ttags: ['benchmark:raft']\n",
207
- "\tprivate: False\n",
208
- "\tauthor: moshew\n",
209
- "\tdescription: \n",
210
- "\tcitation: @InProceedings{huggingface:dataset,\n",
211
- "title = {A great new dataset},\n",
212
- "author={huggingface, Inc.\n",
213
- "},\n",
214
- "year={2020}\n",
215
- "}\n",
216
- "\tcardData: {'benchmark': 'raft', 'type': 'prediction', 'submission_name': 'SetFit300'}\n",
217
- "\tsiblings: None\n",
218
- "\t_id: 621ffdd236468d709f183ac3\n",
219
- "\tdisabled: False\n",
220
- "\tgated: auto\n",
221
- "\tgitalyUid: 0d29a8b3b8364fb2d86b3ad56d62ea4aaf13a5cf95884aa0381b966d79b045e1\n",
222
- "\tlikes: 0\n",
223
- "\tdownloads: 0\n",
224
- "}"
225
- ]
226
- },
227
- "execution_count": 12,
228
- "metadata": {},
229
- "output_type": "execute_result"
230
- }
231
- ],
232
- "source": [
233
- "submissions[0]"
234
- ]
235
- },
236
- {
237
- "cell_type": "code",
238
- "execution_count": null,
239
- "id": "27079f0b",
240
- "metadata": {},
241
- "outputs": [],
242
- "source": []
243
- }
244
- ],
245
- "metadata": {
246
- "kernelspec": {
247
- "display_name": "raft-leaderboard",
248
- "language": "python",
249
- "name": "python3"
250
- },
251
- "language_info": {
252
- "codemirror_mode": {
253
- "name": "ipython",
254
- "version": 3
255
- },
256
- "file_extension": ".py",
257
- "mimetype": "text/x-python",
258
- "name": "python",
259
- "nbconvert_exporter": "python",
260
- "pygments_lexer": "ipython3",
261
- "version": "3.8.15"
262
- }
263
- },
264
- "nbformat": 4,
265
- "nbformat_minor": 5
266
- }