reach-vb HF staff commited on
Commit
c8e7ce2
·
1 Parent(s): 254a3c6

87245978eac49d491b540e2a86047c183ef44b5025e4ace6bf1f58653aed56a8

Browse files
Files changed (50) hide show
  1. lib/python3.11/site-packages/huggingface_hub/templates/datasetcard_template.md +143 -0
  2. lib/python3.11/site-packages/huggingface_hub/templates/modelcard_template.md +203 -0
  3. lib/python3.11/site-packages/huggingface_hub/utils/__init__.py +108 -0
  4. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  5. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_assets.cpython-311.pyc +0 -0
  6. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_manager.cpython-311.pyc +0 -0
  7. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_chunk_utils.cpython-311.pyc +0 -0
  8. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_datetime.cpython-311.pyc +0 -0
  9. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_deprecation.cpython-311.pyc +0 -0
  10. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_errors.cpython-311.pyc +0 -0
  11. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_experimental.cpython-311.pyc +0 -0
  12. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_fixes.cpython-311.pyc +0 -0
  13. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_git_credential.cpython-311.pyc +0 -0
  14. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_headers.cpython-311.pyc +0 -0
  15. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_hf_folder.cpython-311.pyc +0 -0
  16. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_http.cpython-311.pyc +0 -0
  17. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_pagination.cpython-311.pyc +0 -0
  18. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_paths.cpython-311.pyc +0 -0
  19. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_runtime.cpython-311.pyc +0 -0
  20. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_safetensors.cpython-311.pyc +0 -0
  21. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_subprocess.cpython-311.pyc +0 -0
  22. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_telemetry.cpython-311.pyc +0 -0
  23. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_token.cpython-311.pyc +0 -0
  24. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_typing.cpython-311.pyc +0 -0
  25. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_validators.cpython-311.pyc +0 -0
  26. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/endpoint_helpers.cpython-311.pyc +0 -0
  27. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/insecure_hashlib.cpython-311.pyc +0 -0
  28. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/logging.cpython-311.pyc +0 -0
  29. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/sha.cpython-311.pyc +0 -0
  30. lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/tqdm.cpython-311.pyc +0 -0
  31. lib/python3.11/site-packages/huggingface_hub/utils/_cache_assets.py +135 -0
  32. lib/python3.11/site-packages/huggingface_hub/utils/_cache_manager.py +806 -0
  33. lib/python3.11/site-packages/huggingface_hub/utils/_chunk_utils.py +64 -0
  34. lib/python3.11/site-packages/huggingface_hub/utils/_datetime.py +68 -0
  35. lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py +136 -0
  36. lib/python3.11/site-packages/huggingface_hub/utils/_errors.py +359 -0
  37. lib/python3.11/site-packages/huggingface_hub/utils/_experimental.py +65 -0
  38. lib/python3.11/site-packages/huggingface_hub/utils/_fixes.py +77 -0
  39. lib/python3.11/site-packages/huggingface_hub/utils/_git_credential.py +120 -0
  40. lib/python3.11/site-packages/huggingface_hub/utils/_headers.py +234 -0
  41. lib/python3.11/site-packages/huggingface_hub/utils/_hf_folder.py +114 -0
  42. lib/python3.11/site-packages/huggingface_hub/utils/_http.py +307 -0
  43. lib/python3.11/site-packages/huggingface_hub/utils/_pagination.py +51 -0
  44. lib/python3.11/site-packages/huggingface_hub/utils/_paths.py +117 -0
  45. lib/python3.11/site-packages/huggingface_hub/utils/_runtime.py +344 -0
  46. lib/python3.11/site-packages/huggingface_hub/utils/_safetensors.py +124 -0
  47. lib/python3.11/site-packages/huggingface_hub/utils/_subprocess.py +142 -0
  48. lib/python3.11/site-packages/huggingface_hub/utils/_telemetry.py +118 -0
  49. lib/python3.11/site-packages/huggingface_hub/utils/_token.py +129 -0
  50. lib/python3.11/site-packages/huggingface_hub/utils/_typing.py +22 -0
lib/python3.11/site-packages/huggingface_hub/templates/datasetcard_template.md ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ # For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
3
+ # Doc / guide: https://huggingface.co/docs/hub/datasets-cards
4
+ {{ card_data }}
5
+ ---
6
+
7
+ # Dataset Card for {{ pretty_name | default("Dataset Name", true) }}
8
+
9
+ <!-- Provide a quick summary of the dataset. -->
10
+
11
+ {{ dataset_summary | default("", true) }}
12
+
13
+ ## Dataset Details
14
+
15
+ ### Dataset Description
16
+
17
+ <!-- Provide a longer summary of what this dataset is. -->
18
+
19
+ {{ dataset_description | default("", true) }}
20
+
21
+ - **Curated by:** {{ curators | default("[More Information Needed]", true)}}
22
+ - **Funded by [optional]:** {{ funded_by | default("[More Information Needed]", true)}}
23
+ - **Shared by [optional]:** {{ shared_by | default("[More Information Needed]", true)}}
24
+ - **Language(s) (NLP):** {{ language | default("[More Information Needed]", true)}}
25
+ - **License:** {{ license | default("[More Information Needed]", true)}}
26
+
27
+ ### Dataset Sources [optional]
28
+
29
+ <!-- Provide the basic links for the dataset. -->
30
+
31
+ - **Repository:** {{ repo | default("[More Information Needed]", true)}}
32
+ - **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}}
33
+ - **Demo [optional]:** {{ demo | default("[More Information Needed]", true)}}
34
+
35
+ ## Uses
36
+
37
+ <!-- Address questions around how the dataset is intended to be used. -->
38
+
39
+ ### Direct Use
40
+
41
+ <!-- This section describes suitable use cases for the dataset. -->
42
+
43
+ {{ direct_use | default("[More Information Needed]", true)}}
44
+
45
+ ### Out-of-Scope Use
46
+
47
+ <!-- This section addresses misuse, malicious use, and uses that the dataset will not work well for. -->
48
+
49
+ {{ out_of_scope_use | default("[More Information Needed]", true)}}
50
+
51
+ ## Dataset Structure
52
+
53
+ <!-- This section provides a description of the dataset fields, and additional information about the dataset structure such as criteria used to create the splits, relationships between data points, etc. -->
54
+
55
+ {{ dataset_structure | default("[More Information Needed]", true)}}
56
+
57
+ ## Dataset Creation
58
+
59
+ ### Curation Rationale
60
+
61
+ <!-- Motivation for the creation of this dataset. -->
62
+
63
+ {{ curation_rationale_section | default("[More Information Needed]", true)}}
64
+
65
+ ### Source Data
66
+
67
+ <!-- This section describes the source data (e.g. news text and headlines, social media posts, translated sentences, ...). -->
68
+
69
+ #### Data Collection and Processing
70
+
71
+ <!-- This section describes the data collection and processing process such as data selection criteria, filtering and normalization methods, tools and libraries used, etc. -->
72
+
73
+ {{ data_collection_and_processing_section | default("[More Information Needed]", true)}}
74
+
75
+ #### Who are the source data producers?
76
+
77
+ <!-- This section describes the people or systems who originally created the data. It should also include self-reported demographic or identity information for the source data creators if this information is available. -->
78
+
79
+ {{ source_data_producers_section | default("[More Information Needed]", true)}}
80
+
81
+ ### Annotations [optional]
82
+
83
+ <!-- If the dataset contains annotations which are not part of the initial data collection, use this section to describe them. -->
84
+
85
+ #### Annotation process
86
+
87
+ <!-- This section describes the annotation process such as annotation tools used in the process, the amount of data annotated, annotation guidelines provided to the annotators, interannotator statistics, annotation validation, etc. -->
88
+
89
+ {{ annotation_process_section | default("[More Information Needed]", true)}}
90
+
91
+ #### Who are the annotators?
92
+
93
+ <!-- This section describes the people or systems who created the annotations. -->
94
+
95
+ {{ who_are_annotators_section | default("[More Information Needed]", true)}}
96
+
97
+ #### Personal and Sensitive Information
98
+
99
+ <!-- State whether the dataset contains data that might be considered personal, sensitive, or private (e.g., data that reveals addresses, uniquely identifiable names or aliases, racial or ethnic origins, sexual orientations, religious beliefs, political opinions, financial or health data, etc.). If efforts were made to anonymize the data, describe the anonymization process. -->
100
+
101
+ {{ personal_and_sensitive_information | default("[More Information Needed]", true)}}
102
+
103
+ ## Bias, Risks, and Limitations
104
+
105
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
106
+
107
+ {{ bias_risks_limitations | default("[More Information Needed]", true)}}
108
+
109
+ ### Recommendations
110
+
111
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
112
+
113
+ {{ bias_recommendations | default("Users should be made aware of the risks, biases and limitations of the dataset. More information needed for further recommendations.", true)}}
114
+
115
+ ## Citation [optional]
116
+
117
+ <!-- If there is a paper or blog post introducing the dataset, the APA and Bibtex information for that should go in this section. -->
118
+
119
+ **BibTeX:**
120
+
121
+ {{ citation_bibtex | default("[More Information Needed]", true)}}
122
+
123
+ **APA:**
124
+
125
+ {{ citation_apa | default("[More Information Needed]", true)}}
126
+
127
+ ## Glossary [optional]
128
+
129
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the dataset or dataset card. -->
130
+
131
+ {{ glossary | default("[More Information Needed]", true)}}
132
+
133
+ ## More Information [optional]
134
+
135
+ {{ more_information | default("[More Information Needed]", true)}}
136
+
137
+ ## Dataset Card Authors [optional]
138
+
139
+ {{ dataset_card_authors | default("[More Information Needed]", true)}}
140
+
141
+ ## Dataset Card Contact
142
+
143
+ {{ dataset_card_contact | default("[More Information Needed]", true)}}
lib/python3.11/site-packages/huggingface_hub/templates/modelcard_template.md ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ # For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
3
+ # Doc / guide: https://huggingface.co/docs/hub/model-cards
4
+ {{ card_data }}
5
+ ---
6
+
7
+ # Model Card for {{ model_id | default("Model ID", true) }}
8
+
9
+ <!-- Provide a quick summary of what the model is/does. -->
10
+
11
+ {{ model_summary | default("", true) }}
12
+
13
+ ## Model Details
14
+
15
+ ### Model Description
16
+
17
+ <!-- Provide a longer summary of what this model is. -->
18
+
19
+ {{ model_description | default("", true) }}
20
+
21
+ - **Developed by:** {{ developers | default("[More Information Needed]", true)}}
22
+ - **Funded by [optional]:** {{ funded_by | default("[More Information Needed]", true)}}
23
+ - **Shared by [optional]:** {{ shared_by | default("[More Information Needed]", true)}}
24
+ - **Model type:** {{ model_type | default("[More Information Needed]", true)}}
25
+ - **Language(s) (NLP):** {{ language | default("[More Information Needed]", true)}}
26
+ - **License:** {{ license | default("[More Information Needed]", true)}}
27
+ - **Finetuned from model [optional]:** {{ base_model | default("[More Information Needed]", true)}}
28
+
29
+ ### Model Sources [optional]
30
+
31
+ <!-- Provide the basic links for the model. -->
32
+
33
+ - **Repository:** {{ repo | default("[More Information Needed]", true)}}
34
+ - **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}}
35
+ - **Demo [optional]:** {{ demo | default("[More Information Needed]", true)}}
36
+
37
+ ## Uses
38
+
39
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
40
+
41
+ ### Direct Use
42
+
43
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
44
+
45
+ {{ direct_use | default("[More Information Needed]", true)}}
46
+
47
+ ### Downstream Use [optional]
48
+
49
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
50
+
51
+ {{ downstream_use | default("[More Information Needed]", true)}}
52
+
53
+ ### Out-of-Scope Use
54
+
55
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
56
+
57
+ {{ out_of_scope_use | default("[More Information Needed]", true)}}
58
+
59
+ ## Bias, Risks, and Limitations
60
+
61
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
62
+
63
+ {{ bias_risks_limitations | default("[More Information Needed]", true)}}
64
+
65
+ ### Recommendations
66
+
67
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
68
+
69
+ {{ bias_recommendations | default("Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.", true)}}
70
+
71
+ ## How to Get Started with the Model
72
+
73
+ Use the code below to get started with the model.
74
+
75
+ {{ get_started_code | default("[More Information Needed]", true)}}
76
+
77
+ ## Training Details
78
+
79
+ ### Training Data
80
+
81
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
82
+
83
+ {{ training_data | default("[More Information Needed]", true)}}
84
+
85
+ ### Training Procedure
86
+
87
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
88
+
89
+ #### Preprocessing [optional]
90
+
91
+ {{ preprocessing | default("[More Information Needed]", true)}}
92
+
93
+
94
+ #### Training Hyperparameters
95
+
96
+ - **Training regime:** {{ training_regime | default("[More Information Needed]", true)}} <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
97
+
98
+ #### Speeds, Sizes, Times [optional]
99
+
100
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
101
+
102
+ {{ speeds_sizes_times | default("[More Information Needed]", true)}}
103
+
104
+ ## Evaluation
105
+
106
+ <!-- This section describes the evaluation protocols and provides the results. -->
107
+
108
+ ### Testing Data, Factors & Metrics
109
+
110
+ #### Testing Data
111
+
112
+ <!-- This should link to a Dataset Card if possible. -->
113
+
114
+ {{ testing_data | default("[More Information Needed]", true)}}
115
+
116
+ #### Factors
117
+
118
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
119
+
120
+ {{ testing_factors | default("[More Information Needed]", true)}}
121
+
122
+ #### Metrics
123
+
124
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
125
+
126
+ {{ testing_metrics | default("[More Information Needed]", true)}}
127
+
128
+ ### Results
129
+
130
+ {{ results | default("[More Information Needed]", true)}}
131
+
132
+ #### Summary
133
+
134
+ {{ results_summary | default("", true) }}
135
+
136
+ ## Model Examination [optional]
137
+
138
+ <!-- Relevant interpretability work for the model goes here -->
139
+
140
+ {{ model_examination | default("[More Information Needed]", true)}}
141
+
142
+ ## Environmental Impact
143
+
144
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
145
+
146
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
147
+
148
+ - **Hardware Type:** {{ hardware_type | default("[More Information Needed]", true)}}
149
+ - **Hours used:** {{ hours_used | default("[More Information Needed]", true)}}
150
+ - **Cloud Provider:** {{ cloud_provider | default("[More Information Needed]", true)}}
151
+ - **Compute Region:** {{ cloud_region | default("[More Information Needed]", true)}}
152
+ - **Carbon Emitted:** {{ co2_emitted | default("[More Information Needed]", true)}}
153
+
154
+ ## Technical Specifications [optional]
155
+
156
+ ### Model Architecture and Objective
157
+
158
+ {{ model_specs | default("[More Information Needed]", true)}}
159
+
160
+ ### Compute Infrastructure
161
+
162
+ {{ compute_infrastructure | default("[More Information Needed]", true)}}
163
+
164
+ #### Hardware
165
+
166
+ {{ hardware_requirements | default("[More Information Needed]", true)}}
167
+
168
+ #### Software
169
+
170
+ {{ software | default("[More Information Needed]", true)}}
171
+
172
+ ## Citation [optional]
173
+
174
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
175
+
176
+ **BibTeX:**
177
+
178
+ {{ citation_bibtex | default("[More Information Needed]", true)}}
179
+
180
+ **APA:**
181
+
182
+ {{ citation_apa | default("[More Information Needed]", true)}}
183
+
184
+ ## Glossary [optional]
185
+
186
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
187
+
188
+ {{ glossary | default("[More Information Needed]", true)}}
189
+
190
+ ## More Information [optional]
191
+
192
+ {{ more_information | default("[More Information Needed]", true)}}
193
+
194
+ ## Model Card Authors [optional]
195
+
196
+ {{ model_card_authors | default("[More Information Needed]", true)}}
197
+
198
+ ## Model Card Contact
199
+
200
+ {{ model_card_contact | default("[More Information Needed]", true)}}
201
+
202
+
203
+
lib/python3.11/site-packages/huggingface_hub/utils/__init__.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # flake8: noqa
2
+ #!/usr/bin/env python
3
+ # coding=utf-8
4
+ # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License
17
+
18
+ from . import tqdm as _tqdm # _tqdm is the module
19
+ from ._cache_assets import cached_assets_path
20
+ from ._cache_manager import (
21
+ CachedFileInfo,
22
+ CachedRepoInfo,
23
+ CachedRevisionInfo,
24
+ CacheNotFound,
25
+ CorruptedCacheException,
26
+ DeleteCacheStrategy,
27
+ HFCacheInfo,
28
+ scan_cache_dir,
29
+ )
30
+ from ._chunk_utils import chunk_iterable
31
+ from ._datetime import parse_datetime
32
+ from ._errors import (
33
+ BadRequestError,
34
+ EntryNotFoundError,
35
+ FileMetadataError,
36
+ GatedRepoError,
37
+ HfHubHTTPError,
38
+ LocalEntryNotFoundError,
39
+ RepositoryNotFoundError,
40
+ RevisionNotFoundError,
41
+ hf_raise_for_status,
42
+ )
43
+ from ._token import get_token
44
+ from ._fixes import SoftTemporaryDirectory, yaml_dump
45
+ from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
46
+ from ._headers import build_hf_headers, get_token_to_send, LocalTokenNotFoundError
47
+ from ._hf_folder import HfFolder
48
+ from ._http import configure_http_backend, get_session, http_backoff, reset_sessions, OfflineModeIsEnabled
49
+ from ._pagination import paginate
50
+ from ._paths import filter_repo_objects, IGNORE_GIT_FOLDER_PATTERNS
51
+ from ._experimental import experimental
52
+ from ._runtime import (
53
+ dump_environment_info,
54
+ get_aiohttp_version,
55
+ get_fastai_version,
56
+ get_fastcore_version,
57
+ get_gradio_version,
58
+ get_graphviz_version,
59
+ get_hf_hub_version,
60
+ get_hf_transfer_version,
61
+ get_jinja_version,
62
+ get_numpy_version,
63
+ get_pillow_version,
64
+ get_pydantic_version,
65
+ get_pydot_version,
66
+ get_python_version,
67
+ get_tensorboard_version,
68
+ get_tf_version,
69
+ get_torch_version,
70
+ is_aiohttp_available,
71
+ is_fastai_available,
72
+ is_fastcore_available,
73
+ is_numpy_available,
74
+ is_google_colab,
75
+ is_gradio_available,
76
+ is_graphviz_available,
77
+ is_hf_transfer_available,
78
+ is_jinja_available,
79
+ is_notebook,
80
+ is_pillow_available,
81
+ is_pydantic_available,
82
+ is_pydot_available,
83
+ is_tensorboard_available,
84
+ is_tf_available,
85
+ is_torch_available,
86
+ )
87
+ from ._safetensors import (
88
+ SafetensorsFileMetadata,
89
+ SafetensorsRepoMetadata,
90
+ TensorInfo,
91
+ SafetensorsParsingError,
92
+ NotASafetensorsRepoError,
93
+ )
94
+ from ._subprocess import capture_output, run_interactive_subprocess, run_subprocess
95
+ from ._validators import (
96
+ HFValidationError,
97
+ smoothly_deprecate_use_auth_token,
98
+ validate_hf_hub_args,
99
+ validate_repo_id,
100
+ )
101
+ from .tqdm import (
102
+ are_progress_bars_disabled,
103
+ disable_progress_bars,
104
+ enable_progress_bars,
105
+ tqdm,
106
+ tqdm_stream_file,
107
+ )
108
+ from ._telemetry import send_telemetry
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (4.4 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_assets.cpython-311.pyc ADDED
Binary file (5.81 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_manager.cpython-311.pyc ADDED
Binary file (35.4 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_chunk_utils.cpython-311.pyc ADDED
Binary file (2.27 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_datetime.cpython-311.pyc ADDED
Binary file (2.44 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_deprecation.cpython-311.pyc ADDED
Binary file (7.49 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_errors.cpython-311.pyc ADDED
Binary file (15.9 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_experimental.cpython-311.pyc ADDED
Binary file (2.45 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_fixes.cpython-311.pyc ADDED
Binary file (3.03 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_git_credential.cpython-311.pyc ADDED
Binary file (5.73 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_headers.cpython-311.pyc ADDED
Binary file (10.4 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_hf_folder.cpython-311.pyc ADDED
Binary file (5.06 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_http.cpython-311.pyc ADDED
Binary file (14.8 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_pagination.cpython-311.pyc ADDED
Binary file (2.3 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_paths.cpython-311.pyc ADDED
Binary file (5.04 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_runtime.cpython-311.pyc ADDED
Binary file (13.4 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_safetensors.cpython-311.pyc ADDED
Binary file (7.38 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_subprocess.cpython-311.pyc ADDED
Binary file (5.44 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_telemetry.cpython-311.pyc ADDED
Binary file (6.15 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_token.cpython-311.pyc ADDED
Binary file (5.88 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_typing.cpython-311.pyc ADDED
Binary file (574 Bytes). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_validators.cpython-311.pyc ADDED
Binary file (9.81 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/endpoint_helpers.cpython-311.pyc ADDED
Binary file (9.87 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/insecure_hashlib.cpython-311.pyc ADDED
Binary file (665 Bytes). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/logging.cpython-311.pyc ADDED
Binary file (6.57 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/sha.cpython-311.pyc ADDED
Binary file (1.44 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/tqdm.cpython-311.pyc ADDED
Binary file (7.3 kB). View file
 
lib/python3.11/site-packages/huggingface_hub/utils/_cache_assets.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2019-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ from pathlib import Path
16
+ from typing import Union
17
+
18
+ from ..constants import HF_ASSETS_CACHE
19
+
20
+
21
+ def cached_assets_path(
22
+ library_name: str,
23
+ namespace: str = "default",
24
+ subfolder: str = "default",
25
+ *,
26
+ assets_dir: Union[str, Path, None] = None,
27
+ ):
28
+ """Return a folder path to cache arbitrary files.
29
+
30
+ `huggingface_hub` provides a canonical folder path to store assets. This is the
31
+ recommended way to integrate cache in a downstream library as it will benefit from
32
+ the builtins tools to scan and delete the cache properly.
33
+
34
+ The distinction is made between files cached from the Hub and assets. Files from the
35
+ Hub are cached in a git-aware manner and entirely managed by `huggingface_hub`. See
36
+ [related documentation](https://huggingface.co/docs/huggingface_hub/how-to-cache).
37
+ All other files that a downstream library caches are considered to be "assets"
38
+ (files downloaded from external sources, extracted from a .tar archive, preprocessed
39
+ for training,...).
40
+
41
+ Once the folder path is generated, it is guaranteed to exist and to be a directory.
42
+ The path is based on 3 levels of depth: the library name, a namespace and a
43
+ subfolder. Those 3 levels grants flexibility while allowing `huggingface_hub` to
44
+ expect folders when scanning/deleting parts of the assets cache. Within a library,
45
+ it is expected that all namespaces share the same subset of subfolder names but this
46
+ is not a mandatory rule. The downstream library has then full control on which file
47
+ structure to adopt within its cache. Namespace and subfolder are optional (would
48
+ default to a `"default/"` subfolder) but library name is mandatory as we want every
49
+ downstream library to manage its own cache.
50
+
51
+ Expected tree:
52
+ ```text
53
+ assets/
54
+ └── datasets/
55
+ │ ├── SQuAD/
56
+ │ │ ├── downloaded/
57
+ │ │ ├── extracted/
58
+ │ │ └── processed/
59
+ │ ├── Helsinki-NLP--tatoeba_mt/
60
+ │ ├── downloaded/
61
+ │ ├── extracted/
62
+ │ └── processed/
63
+ └── transformers/
64
+ ├── default/
65
+ │ ├── something/
66
+ ├── bert-base-cased/
67
+ │ ├── default/
68
+ │ └── training/
69
+ hub/
70
+ └── models--julien-c--EsperBERTo-small/
71
+ ├── blobs/
72
+ │ ├── (...)
73
+ │ ├── (...)
74
+ ├── refs/
75
+ │ └── (...)
76
+ └── [ 128] snapshots/
77
+ ├── 2439f60ef33a0d46d85da5001d52aeda5b00ce9f/
78
+ │ ├── (...)
79
+ └── bbc77c8132af1cc5cf678da3f1ddf2de43606d48/
80
+ └── (...)
81
+ ```
82
+
83
+
84
+ Args:
85
+ library_name (`str`):
86
+ Name of the library that will manage the cache folder. Example: `"dataset"`.
87
+ namespace (`str`, *optional*, defaults to "default"):
88
+ Namespace to which the data belongs. Example: `"SQuAD"`.
89
+ subfolder (`str`, *optional*, defaults to "default"):
90
+ Subfolder in which the data will be stored. Example: `extracted`.
91
+ assets_dir (`str`, `Path`, *optional*):
92
+ Path to the folder where assets are cached. This must not be the same folder
93
+ where Hub files are cached. Defaults to `HF_HOME / "assets"` if not provided.
94
+ Can also be set with `HF_ASSETS_CACHE` environment variable.
95
+
96
+ Returns:
97
+ Path to the cache folder (`Path`).
98
+
99
+ Example:
100
+ ```py
101
+ >>> from huggingface_hub import cached_assets_path
102
+
103
+ >>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="download")
104
+ PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/download')
105
+
106
+ >>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="extracted")
107
+ PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/extracted')
108
+
109
+ >>> cached_assets_path(library_name="datasets", namespace="Helsinki-NLP/tatoeba_mt")
110
+ PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/Helsinki-NLP--tatoeba_mt/default')
111
+
112
+ >>> cached_assets_path(library_name="datasets", assets_dir="/tmp/tmp123456")
113
+ PosixPath('/tmp/tmp123456/datasets/default/default')
114
+ ```
115
+ """
116
+ # Resolve assets_dir
117
+ if assets_dir is None:
118
+ assets_dir = HF_ASSETS_CACHE
119
+ assets_dir = Path(assets_dir).expanduser().resolve()
120
+
121
+ # Avoid names that could create path issues
122
+ for part in (" ", "/", "\\"):
123
+ library_name = library_name.replace(part, "--")
124
+ namespace = namespace.replace(part, "--")
125
+ subfolder = subfolder.replace(part, "--")
126
+
127
+ # Path to subfolder is created
128
+ path = assets_dir / library_name / namespace / subfolder
129
+ try:
130
+ path.mkdir(exist_ok=True, parents=True)
131
+ except (FileExistsError, NotADirectoryError):
132
+ raise ValueError(f"Corrupted assets folder: cannot create directory because of an existing file ({path}).")
133
+
134
+ # Return
135
+ return path
lib/python3.11/site-packages/huggingface_hub/utils/_cache_manager.py ADDED
@@ -0,0 +1,806 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to manage the HF cache directory."""
16
+ import os
17
+ import shutil
18
+ import time
19
+ from collections import defaultdict
20
+ from dataclasses import dataclass
21
+ from pathlib import Path
22
+ from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
23
+
24
+ from ..constants import HF_HUB_CACHE
25
+ from . import logging
26
+
27
+
28
+ logger = logging.get_logger(__name__)
29
+
30
+ REPO_TYPE_T = Literal["model", "dataset", "space"]
31
+
32
+
33
+ class CacheNotFound(Exception):
34
+ """Exception thrown when the Huggingface cache is not found."""
35
+
36
+ cache_dir: Union[str, Path]
37
+
38
+ def __init__(self, msg: str, cache_dir: Union[str, Path], *args, **kwargs):
39
+ super().__init__(msg, *args, **kwargs)
40
+ self.cache_dir = cache_dir
41
+
42
+
43
+ class CorruptedCacheException(Exception):
44
+ """Exception for any unexpected structure in the Huggingface cache-system."""
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class CachedFileInfo:
49
+ """Frozen data structure holding information about a single cached file.
50
+
51
+ Args:
52
+ file_name (`str`):
53
+ Name of the file. Example: `config.json`.
54
+ file_path (`Path`):
55
+ Path of the file in the `snapshots` directory. The file path is a symlink
56
+ referring to a blob in the `blobs` folder.
57
+ blob_path (`Path`):
58
+ Path of the blob file. This is equivalent to `file_path.resolve()`.
59
+ size_on_disk (`int`):
60
+ Size of the blob file in bytes.
61
+ blob_last_accessed (`float`):
62
+ Timestamp of the last time the blob file has been accessed (from any
63
+ revision).
64
+ blob_last_modified (`float`):
65
+ Timestamp of the last time the blob file has been modified/created.
66
+
67
+ <Tip warning={true}>
68
+
69
+ `blob_last_accessed` and `blob_last_modified` reliability can depend on the OS you
70
+ are using. See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
71
+ for more details.
72
+
73
+ </Tip>
74
+ """
75
+
76
+ file_name: str
77
+ file_path: Path
78
+ blob_path: Path
79
+ size_on_disk: int
80
+
81
+ blob_last_accessed: float
82
+ blob_last_modified: float
83
+
84
+ @property
85
+ def blob_last_accessed_str(self) -> str:
86
+ """
87
+ (property) Timestamp of the last time the blob file has been accessed (from any
88
+ revision), returned as a human-readable string.
89
+
90
+ Example: "2 weeks ago".
91
+ """
92
+ return _format_timesince(self.blob_last_accessed)
93
+
94
+ @property
95
+ def blob_last_modified_str(self) -> str:
96
+ """
97
+ (property) Timestamp of the last time the blob file has been modified, returned
98
+ as a human-readable string.
99
+
100
+ Example: "2 weeks ago".
101
+ """
102
+ return _format_timesince(self.blob_last_modified)
103
+
104
+ @property
105
+ def size_on_disk_str(self) -> str:
106
+ """
107
+ (property) Size of the blob file as a human-readable string.
108
+
109
+ Example: "42.2K".
110
+ """
111
+ return _format_size(self.size_on_disk)
112
+
113
+
114
+ @dataclass(frozen=True)
115
+ class CachedRevisionInfo:
116
+ """Frozen data structure holding information about a revision.
117
+
118
+ A revision correspond to a folder in the `snapshots` folder and is populated with
119
+ the exact tree structure as the repo on the Hub but contains only symlinks. A
120
+ revision can be either referenced by 1 or more `refs` or be "detached" (no refs).
121
+
122
+ Args:
123
+ commit_hash (`str`):
124
+ Hash of the revision (unique).
125
+ Example: `"9338f7b671827df886678df2bdd7cc7b4f36dffd"`.
126
+ snapshot_path (`Path`):
127
+ Path to the revision directory in the `snapshots` folder. It contains the
128
+ exact tree structure as the repo on the Hub.
129
+ files: (`FrozenSet[CachedFileInfo]`):
130
+ Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
131
+ refs (`FrozenSet[str]`):
132
+ Set of `refs` pointing to this revision. If the revision has no `refs`, it
133
+ is considered detached.
134
+ Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
135
+ size_on_disk (`int`):
136
+ Sum of the blob file sizes that are symlink-ed by the revision.
137
+ last_modified (`float`):
138
+ Timestamp of the last time the revision has been created/modified.
139
+
140
+ <Tip warning={true}>
141
+
142
+ `last_accessed` cannot be determined correctly on a single revision as blob files
143
+ are shared across revisions.
144
+
145
+ </Tip>
146
+
147
+ <Tip warning={true}>
148
+
149
+ `size_on_disk` is not necessarily the sum of all file sizes because of possible
150
+ duplicated files. Besides, only blobs are taken into account, not the (negligible)
151
+ size of folders and symlinks.
152
+
153
+ </Tip>
154
+ """
155
+
156
+ commit_hash: str
157
+ snapshot_path: Path
158
+ size_on_disk: int
159
+ files: FrozenSet[CachedFileInfo]
160
+ refs: FrozenSet[str]
161
+
162
+ last_modified: float
163
+
164
+ @property
165
+ def last_modified_str(self) -> str:
166
+ """
167
+ (property) Timestamp of the last time the revision has been modified, returned
168
+ as a human-readable string.
169
+
170
+ Example: "2 weeks ago".
171
+ """
172
+ return _format_timesince(self.last_modified)
173
+
174
+ @property
175
+ def size_on_disk_str(self) -> str:
176
+ """
177
+ (property) Sum of the blob file sizes as a human-readable string.
178
+
179
+ Example: "42.2K".
180
+ """
181
+ return _format_size(self.size_on_disk)
182
+
183
+ @property
184
+ def nb_files(self) -> int:
185
+ """
186
+ (property) Total number of files in the revision.
187
+ """
188
+ return len(self.files)
189
+
190
+
191
+ @dataclass(frozen=True)
192
+ class CachedRepoInfo:
193
+ """Frozen data structure holding information about a cached repository.
194
+
195
+ Args:
196
+ repo_id (`str`):
197
+ Repo id of the repo on the Hub. Example: `"google/fleurs"`.
198
+ repo_type (`Literal["dataset", "model", "space"]`):
199
+ Type of the cached repo.
200
+ repo_path (`Path`):
201
+ Local path to the cached repo.
202
+ size_on_disk (`int`):
203
+ Sum of the blob file sizes in the cached repo.
204
+ nb_files (`int`):
205
+ Total number of blob files in the cached repo.
206
+ revisions (`FrozenSet[CachedRevisionInfo]`):
207
+ Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
208
+ last_accessed (`float`):
209
+ Timestamp of the last time a blob file of the repo has been accessed.
210
+ last_modified (`float`):
211
+ Timestamp of the last time a blob file of the repo has been modified/created.
212
+
213
+ <Tip warning={true}>
214
+
215
+ `size_on_disk` is not necessarily the sum of all revisions sizes because of
216
+ duplicated files. Besides, only blobs are taken into account, not the (negligible)
217
+ size of folders and symlinks.
218
+
219
+ </Tip>
220
+
221
+ <Tip warning={true}>
222
+
223
+ `last_accessed` and `last_modified` reliability can depend on the OS you are using.
224
+ See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
225
+ for more details.
226
+
227
+ </Tip>
228
+ """
229
+
230
+ repo_id: str
231
+ repo_type: REPO_TYPE_T
232
+ repo_path: Path
233
+ size_on_disk: int
234
+ nb_files: int
235
+ revisions: FrozenSet[CachedRevisionInfo]
236
+
237
+ last_accessed: float
238
+ last_modified: float
239
+
240
+ @property
241
+ def last_accessed_str(self) -> str:
242
+ """
243
+ (property) Last time a blob file of the repo has been accessed, returned as a
244
+ human-readable string.
245
+
246
+ Example: "2 weeks ago".
247
+ """
248
+ return _format_timesince(self.last_accessed)
249
+
250
+ @property
251
+ def last_modified_str(self) -> str:
252
+ """
253
+ (property) Last time a blob file of the repo has been modified, returned as a
254
+ human-readable string.
255
+
256
+ Example: "2 weeks ago".
257
+ """
258
+ return _format_timesince(self.last_modified)
259
+
260
+ @property
261
+ def size_on_disk_str(self) -> str:
262
+ """
263
+ (property) Sum of the blob file sizes as a human-readable string.
264
+
265
+ Example: "42.2K".
266
+ """
267
+ return _format_size(self.size_on_disk)
268
+
269
+ @property
270
+ def refs(self) -> Dict[str, CachedRevisionInfo]:
271
+ """
272
+ (property) Mapping between `refs` and revision data structures.
273
+ """
274
+ return {ref: revision for revision in self.revisions for ref in revision.refs}
275
+
276
+
277
+ @dataclass(frozen=True)
278
+ class DeleteCacheStrategy:
279
+ """Frozen data structure holding the strategy to delete cached revisions.
280
+
281
+ This object is not meant to be instantiated programmatically but to be returned by
282
+ [`~utils.HFCacheInfo.delete_revisions`]. See documentation for usage example.
283
+
284
+ Args:
285
+ expected_freed_size (`float`):
286
+ Expected freed size once strategy is executed.
287
+ blobs (`FrozenSet[Path]`):
288
+ Set of blob file paths to be deleted.
289
+ refs (`FrozenSet[Path]`):
290
+ Set of reference file paths to be deleted.
291
+ repos (`FrozenSet[Path]`):
292
+ Set of entire repo paths to be deleted.
293
+ snapshots (`FrozenSet[Path]`):
294
+ Set of snapshots to be deleted (directory of symlinks).
295
+ """
296
+
297
+ expected_freed_size: int
298
+ blobs: FrozenSet[Path]
299
+ refs: FrozenSet[Path]
300
+ repos: FrozenSet[Path]
301
+ snapshots: FrozenSet[Path]
302
+
303
+ @property
304
+ def expected_freed_size_str(self) -> str:
305
+ """
306
+ (property) Expected size that will be freed as a human-readable string.
307
+
308
+ Example: "42.2K".
309
+ """
310
+ return _format_size(self.expected_freed_size)
311
+
312
+ def execute(self) -> None:
313
+ """Execute the defined strategy.
314
+
315
+ <Tip warning={true}>
316
+
317
+ If this method is interrupted, the cache might get corrupted. Deletion order is
318
+ implemented so that references and symlinks are deleted before the actual blob
319
+ files.
320
+
321
+ </Tip>
322
+
323
+ <Tip warning={true}>
324
+
325
+ This method is irreversible. If executed, cached files are erased and must be
326
+ downloaded again.
327
+
328
+ </Tip>
329
+ """
330
+ # Deletion order matters. Blobs are deleted in last so that the user can't end
331
+ # up in a state where a `ref`` refers to a missing snapshot or a snapshot
332
+ # symlink refers to a deleted blob.
333
+
334
+ # Delete entire repos
335
+ for path in self.repos:
336
+ _try_delete_path(path, path_type="repo")
337
+
338
+ # Delete snapshot directories
339
+ for path in self.snapshots:
340
+ _try_delete_path(path, path_type="snapshot")
341
+
342
+ # Delete refs files
343
+ for path in self.refs:
344
+ _try_delete_path(path, path_type="ref")
345
+
346
+ # Delete blob files
347
+ for path in self.blobs:
348
+ _try_delete_path(path, path_type="blob")
349
+
350
+ logger.info(f"Cache deletion done. Saved {self.expected_freed_size_str}.")
351
+
352
+
353
+ @dataclass(frozen=True)
354
+ class HFCacheInfo:
355
+ """Frozen data structure holding information about the entire cache-system.
356
+
357
+ This data structure is returned by [`scan_cache_dir`] and is immutable.
358
+
359
+ Args:
360
+ size_on_disk (`int`):
361
+ Sum of all valid repo sizes in the cache-system.
362
+ repos (`FrozenSet[CachedRepoInfo]`):
363
+ Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
364
+ cache-system while scanning.
365
+ warnings (`List[CorruptedCacheException]`):
366
+ List of [`~CorruptedCacheException`] that occurred while scanning the cache.
367
+ Those exceptions are captured so that the scan can continue. Corrupted repos
368
+ are skipped from the scan.
369
+
370
+ <Tip warning={true}>
371
+
372
+ Here `size_on_disk` is equal to the sum of all repo sizes (only blobs). However if
373
+ some cached repos are corrupted, their sizes are not taken into account.
374
+
375
+ </Tip>
376
+ """
377
+
378
+ size_on_disk: int
379
+ repos: FrozenSet[CachedRepoInfo]
380
+ warnings: List[CorruptedCacheException]
381
+
382
+ @property
383
+ def size_on_disk_str(self) -> str:
384
+ """
385
+ (property) Sum of all valid repo sizes in the cache-system as a human-readable
386
+ string.
387
+
388
+ Example: "42.2K".
389
+ """
390
+ return _format_size(self.size_on_disk)
391
+
392
+ def delete_revisions(self, *revisions: str) -> DeleteCacheStrategy:
393
+ """Prepare the strategy to delete one or more revisions cached locally.
394
+
395
+ Input revisions can be any revision hash. If a revision hash is not found in the
396
+ local cache, a warning is thrown but no error is raised. Revisions can be from
397
+ different cached repos since hashes are unique across repos,
398
+
399
+ Examples:
400
+ ```py
401
+ >>> from huggingface_hub import scan_cache_dir
402
+ >>> cache_info = scan_cache_dir()
403
+ >>> delete_strategy = cache_info.delete_revisions(
404
+ ... "81fd1d6e7847c99f5862c9fb81387956d99ec7aa"
405
+ ... )
406
+ >>> print(f"Will free {delete_strategy.expected_freed_size_str}.")
407
+ Will free 7.9K.
408
+ >>> delete_strategy.execute()
409
+ Cache deletion done. Saved 7.9K.
410
+ ```
411
+
412
+ ```py
413
+ >>> from huggingface_hub import scan_cache_dir
414
+ >>> scan_cache_dir().delete_revisions(
415
+ ... "81fd1d6e7847c99f5862c9fb81387956d99ec7aa",
416
+ ... "e2983b237dccf3ab4937c97fa717319a9ca1a96d",
417
+ ... "6c0e6080953db56375760c0471a8c5f2929baf11",
418
+ ... ).execute()
419
+ Cache deletion done. Saved 8.6G.
420
+ ```
421
+
422
+ <Tip warning={true}>
423
+
424
+ `delete_revisions` returns a [`~utils.DeleteCacheStrategy`] object that needs to
425
+ be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
426
+ allows having a dry run before actually executing the deletion.
427
+
428
+ </Tip>
429
+ """
430
+ hashes_to_delete: Set[str] = set(revisions)
431
+
432
+ repos_with_revisions: Dict[CachedRepoInfo, Set[CachedRevisionInfo]] = defaultdict(set)
433
+
434
+ for repo in self.repos:
435
+ for revision in repo.revisions:
436
+ if revision.commit_hash in hashes_to_delete:
437
+ repos_with_revisions[repo].add(revision)
438
+ hashes_to_delete.remove(revision.commit_hash)
439
+
440
+ if len(hashes_to_delete) > 0:
441
+ logger.warning(f"Revision(s) not found - cannot delete them: {', '.join(hashes_to_delete)}")
442
+
443
+ delete_strategy_blobs: Set[Path] = set()
444
+ delete_strategy_refs: Set[Path] = set()
445
+ delete_strategy_repos: Set[Path] = set()
446
+ delete_strategy_snapshots: Set[Path] = set()
447
+ delete_strategy_expected_freed_size = 0
448
+
449
+ for affected_repo, revisions_to_delete in repos_with_revisions.items():
450
+ other_revisions = affected_repo.revisions - revisions_to_delete
451
+
452
+ # If no other revisions, it means all revisions are deleted
453
+ # -> delete the entire cached repo
454
+ if len(other_revisions) == 0:
455
+ delete_strategy_repos.add(affected_repo.repo_path)
456
+ delete_strategy_expected_freed_size += affected_repo.size_on_disk
457
+ continue
458
+
459
+ # Some revisions of the repo will be deleted but not all. We need to filter
460
+ # which blob files will not be linked anymore.
461
+ for revision_to_delete in revisions_to_delete:
462
+ # Snapshot dir
463
+ delete_strategy_snapshots.add(revision_to_delete.snapshot_path)
464
+
465
+ # Refs dir
466
+ for ref in revision_to_delete.refs:
467
+ delete_strategy_refs.add(affected_repo.repo_path / "refs" / ref)
468
+
469
+ # Blobs dir
470
+ for file in revision_to_delete.files:
471
+ if file.blob_path not in delete_strategy_blobs:
472
+ is_file_alone = True
473
+ for revision in other_revisions:
474
+ for rev_file in revision.files:
475
+ if file.blob_path == rev_file.blob_path:
476
+ is_file_alone = False
477
+ break
478
+ if not is_file_alone:
479
+ break
480
+
481
+ # Blob file not referenced by remaining revisions -> delete
482
+ if is_file_alone:
483
+ delete_strategy_blobs.add(file.blob_path)
484
+ delete_strategy_expected_freed_size += file.size_on_disk
485
+
486
+ # Return the strategy instead of executing it.
487
+ return DeleteCacheStrategy(
488
+ blobs=frozenset(delete_strategy_blobs),
489
+ refs=frozenset(delete_strategy_refs),
490
+ repos=frozenset(delete_strategy_repos),
491
+ snapshots=frozenset(delete_strategy_snapshots),
492
+ expected_freed_size=delete_strategy_expected_freed_size,
493
+ )
494
+
495
+
496
+ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
497
+ """Scan the entire HF cache-system and return a [`~HFCacheInfo`] structure.
498
+
499
+ Use `scan_cache_dir` in order to programmatically scan your cache-system. The cache
500
+ will be scanned repo by repo. If a repo is corrupted, a [`~CorruptedCacheException`]
501
+ will be thrown internally but captured and returned in the [`~HFCacheInfo`]
502
+ structure. Only valid repos get a proper report.
503
+
504
+ ```py
505
+ >>> from huggingface_hub import scan_cache_dir
506
+
507
+ >>> hf_cache_info = scan_cache_dir()
508
+ HFCacheInfo(
509
+ size_on_disk=3398085269,
510
+ repos=frozenset({
511
+ CachedRepoInfo(
512
+ repo_id='t5-small',
513
+ repo_type='model',
514
+ repo_path=PosixPath(...),
515
+ size_on_disk=970726914,
516
+ nb_files=11,
517
+ revisions=frozenset({
518
+ CachedRevisionInfo(
519
+ commit_hash='d78aea13fa7ecd06c29e3e46195d6341255065d5',
520
+ size_on_disk=970726339,
521
+ snapshot_path=PosixPath(...),
522
+ files=frozenset({
523
+ CachedFileInfo(
524
+ file_name='config.json',
525
+ size_on_disk=1197
526
+ file_path=PosixPath(...),
527
+ blob_path=PosixPath(...),
528
+ ),
529
+ CachedFileInfo(...),
530
+ ...
531
+ }),
532
+ ),
533
+ CachedRevisionInfo(...),
534
+ ...
535
+ }),
536
+ ),
537
+ CachedRepoInfo(...),
538
+ ...
539
+ }),
540
+ warnings=[
541
+ CorruptedCacheException("Snapshots dir doesn't exist in cached repo: ..."),
542
+ CorruptedCacheException(...),
543
+ ...
544
+ ],
545
+ )
546
+ ```
547
+
548
+ You can also print a detailed report directly from the `huggingface-cli` using:
549
+ ```text
550
+ > huggingface-cli scan-cache
551
+ REPO ID REPO TYPE SIZE ON DISK NB FILES REFS LOCAL PATH
552
+ --------------------------- --------- ------------ -------- ------------------- -------------------------------------------------------------------------
553
+ glue dataset 116.3K 15 1.17.0, main, 2.4.0 /Users/lucain/.cache/huggingface/hub/datasets--glue
554
+ google/fleurs dataset 64.9M 6 main, refs/pr/1 /Users/lucain/.cache/huggingface/hub/datasets--google--fleurs
555
+ Jean-Baptiste/camembert-ner model 441.0M 7 main /Users/lucain/.cache/huggingface/hub/models--Jean-Baptiste--camembert-ner
556
+ bert-base-cased model 1.9G 13 main /Users/lucain/.cache/huggingface/hub/models--bert-base-cased
557
+ t5-base model 10.1K 3 main /Users/lucain/.cache/huggingface/hub/models--t5-base
558
+ t5-small model 970.7M 11 refs/pr/1, main /Users/lucain/.cache/huggingface/hub/models--t5-small
559
+
560
+ Done in 0.0s. Scanned 6 repo(s) for a total of 3.4G.
561
+ Got 1 warning(s) while scanning. Use -vvv to print details.
562
+ ```
563
+
564
+ Args:
565
+ cache_dir (`str` or `Path`, `optional`):
566
+ Cache directory to cache. Defaults to the default HF cache directory.
567
+
568
+ <Tip warning={true}>
569
+
570
+ Raises:
571
+
572
+ `CacheNotFound`
573
+ If the cache directory does not exist.
574
+
575
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
576
+ If the cache directory is a file, instead of a directory.
577
+
578
+ </Tip>
579
+
580
+ Returns: a [`~HFCacheInfo`] object.
581
+ """
582
+ if cache_dir is None:
583
+ cache_dir = HF_HUB_CACHE
584
+
585
+ cache_dir = Path(cache_dir).expanduser().resolve()
586
+ if not cache_dir.exists():
587
+ raise CacheNotFound(
588
+ f"Cache directory not found: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable.",
589
+ cache_dir=cache_dir,
590
+ )
591
+
592
+ if cache_dir.is_file():
593
+ raise ValueError(
594
+ f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
595
+ )
596
+
597
+ repos: Set[CachedRepoInfo] = set()
598
+ warnings: List[CorruptedCacheException] = []
599
+ for repo_path in cache_dir.iterdir():
600
+ if repo_path.name == ".locks": # skip './.locks/' folder
601
+ continue
602
+ try:
603
+ repos.add(_scan_cached_repo(repo_path))
604
+ except CorruptedCacheException as e:
605
+ warnings.append(e)
606
+
607
+ return HFCacheInfo(
608
+ repos=frozenset(repos),
609
+ size_on_disk=sum(repo.size_on_disk for repo in repos),
610
+ warnings=warnings,
611
+ )
612
+
613
+
614
+ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
615
+ """Scan a single cache repo and return information about it.
616
+
617
+ Any unexpected behavior will raise a [`~CorruptedCacheException`].
618
+ """
619
+ if not repo_path.is_dir():
620
+ raise CorruptedCacheException(f"Repo path is not a directory: {repo_path}")
621
+
622
+ if "--" not in repo_path.name:
623
+ raise CorruptedCacheException(f"Repo path is not a valid HuggingFace cache directory: {repo_path}")
624
+
625
+ repo_type, repo_id = repo_path.name.split("--", maxsplit=1)
626
+ repo_type = repo_type[:-1] # "models" -> "model"
627
+ repo_id = repo_id.replace("--", "/") # google/fleurs -> "google/fleurs"
628
+
629
+ if repo_type not in {"dataset", "model", "space"}:
630
+ raise CorruptedCacheException(
631
+ f"Repo type must be `dataset`, `model` or `space`, found `{repo_type}` ({repo_path})."
632
+ )
633
+
634
+ blob_stats: Dict[Path, os.stat_result] = {} # Key is blob_path, value is blob stats
635
+
636
+ snapshots_path = repo_path / "snapshots"
637
+ refs_path = repo_path / "refs"
638
+
639
+ if not snapshots_path.exists() or not snapshots_path.is_dir():
640
+ raise CorruptedCacheException(f"Snapshots dir doesn't exist in cached repo: {snapshots_path}")
641
+
642
+ # Scan over `refs` directory
643
+
644
+ # key is revision hash, value is set of refs
645
+ refs_by_hash: Dict[str, Set[str]] = defaultdict(set)
646
+ if refs_path.exists():
647
+ # Example of `refs` directory
648
+ # ── refs
649
+ # ├── main
650
+ # └── refs
651
+ # └── pr
652
+ # └── 1
653
+ if refs_path.is_file():
654
+ raise CorruptedCacheException(f"Refs directory cannot be a file: {refs_path}")
655
+
656
+ for ref_path in refs_path.glob("**/*"):
657
+ # glob("**/*") iterates over all files and directories -> skip directories
658
+ if ref_path.is_dir():
659
+ continue
660
+
661
+ ref_name = str(ref_path.relative_to(refs_path))
662
+ with ref_path.open() as f:
663
+ commit_hash = f.read()
664
+
665
+ refs_by_hash[commit_hash].add(ref_name)
666
+
667
+ # Scan snapshots directory
668
+ cached_revisions: Set[CachedRevisionInfo] = set()
669
+ for revision_path in snapshots_path.iterdir():
670
+ if revision_path.is_file():
671
+ raise CorruptedCacheException(f"Snapshots folder corrupted. Found a file: {revision_path}")
672
+
673
+ cached_files = set()
674
+ for file_path in revision_path.glob("**/*"):
675
+ # glob("**/*") iterates over all files and directories -> skip directories
676
+ if file_path.is_dir():
677
+ continue
678
+
679
+ blob_path = Path(file_path).resolve()
680
+ if not blob_path.exists():
681
+ raise CorruptedCacheException(f"Blob missing (broken symlink): {blob_path}")
682
+
683
+ if blob_path not in blob_stats:
684
+ blob_stats[blob_path] = blob_path.stat()
685
+
686
+ cached_files.add(
687
+ CachedFileInfo(
688
+ file_name=file_path.name,
689
+ file_path=file_path,
690
+ size_on_disk=blob_stats[blob_path].st_size,
691
+ blob_path=blob_path,
692
+ blob_last_accessed=blob_stats[blob_path].st_atime,
693
+ blob_last_modified=blob_stats[blob_path].st_mtime,
694
+ )
695
+ )
696
+
697
+ # Last modified is either the last modified blob file or the revision folder
698
+ # itself if it is empty
699
+ if len(cached_files) > 0:
700
+ revision_last_modified = max(blob_stats[file.blob_path].st_mtime for file in cached_files)
701
+ else:
702
+ revision_last_modified = revision_path.stat().st_mtime
703
+
704
+ cached_revisions.add(
705
+ CachedRevisionInfo(
706
+ commit_hash=revision_path.name,
707
+ files=frozenset(cached_files),
708
+ refs=frozenset(refs_by_hash.pop(revision_path.name, set())),
709
+ size_on_disk=sum(
710
+ blob_stats[blob_path].st_size for blob_path in set(file.blob_path for file in cached_files)
711
+ ),
712
+ snapshot_path=revision_path,
713
+ last_modified=revision_last_modified,
714
+ )
715
+ )
716
+
717
+ # Check that all refs referred to an existing revision
718
+ if len(refs_by_hash) > 0:
719
+ raise CorruptedCacheException(
720
+ f"Reference(s) refer to missing commit hashes: {dict(refs_by_hash)} ({repo_path})."
721
+ )
722
+
723
+ # Last modified is either the last modified blob file or the repo folder itself if
724
+ # no blob files has been found. Same for last accessed.
725
+ if len(blob_stats) > 0:
726
+ repo_last_accessed = max(stat.st_atime for stat in blob_stats.values())
727
+ repo_last_modified = max(stat.st_mtime for stat in blob_stats.values())
728
+ else:
729
+ repo_stats = repo_path.stat()
730
+ repo_last_accessed = repo_stats.st_atime
731
+ repo_last_modified = repo_stats.st_mtime
732
+
733
+ # Build and return frozen structure
734
+ return CachedRepoInfo(
735
+ nb_files=len(blob_stats),
736
+ repo_id=repo_id,
737
+ repo_path=repo_path,
738
+ repo_type=repo_type, # type: ignore
739
+ revisions=frozenset(cached_revisions),
740
+ size_on_disk=sum(stat.st_size for stat in blob_stats.values()),
741
+ last_accessed=repo_last_accessed,
742
+ last_modified=repo_last_modified,
743
+ )
744
+
745
+
746
+ def _format_size(num: int) -> str:
747
+ """Format size in bytes into a human-readable string.
748
+
749
+ Taken from https://stackoverflow.com/a/1094933
750
+ """
751
+ num_f = float(num)
752
+ for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
753
+ if abs(num_f) < 1000.0:
754
+ return f"{num_f:3.1f}{unit}"
755
+ num_f /= 1000.0
756
+ return f"{num_f:.1f}Y"
757
+
758
+
759
+ _TIMESINCE_CHUNKS = (
760
+ # Label, divider, max value
761
+ ("second", 1, 60),
762
+ ("minute", 60, 60),
763
+ ("hour", 60 * 60, 24),
764
+ ("day", 60 * 60 * 24, 6),
765
+ ("week", 60 * 60 * 24 * 7, 6),
766
+ ("month", 60 * 60 * 24 * 30, 11),
767
+ ("year", 60 * 60 * 24 * 365, None),
768
+ )
769
+
770
+
771
+ def _format_timesince(ts: float) -> str:
772
+ """Format timestamp in seconds into a human-readable string, relative to now.
773
+
774
+ Vaguely inspired by Django's `timesince` formatter.
775
+ """
776
+ delta = time.time() - ts
777
+ if delta < 20:
778
+ return "a few seconds ago"
779
+ for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
780
+ value = round(delta / divider)
781
+ if max_value is not None and value <= max_value:
782
+ break
783
+ return f"{value} {label}{'s' if value > 1 else ''} ago"
784
+
785
+
786
+ def _try_delete_path(path: Path, path_type: str) -> None:
787
+ """Try to delete a local file or folder.
788
+
789
+ If the path does not exists, error is logged as a warning and then ignored.
790
+
791
+ Args:
792
+ path (`Path`)
793
+ Path to delete. Can be a file or a folder.
794
+ path_type (`str`)
795
+ What path are we deleting ? Only for logging purposes. Example: "snapshot".
796
+ """
797
+ logger.info(f"Delete {path_type}: {path}")
798
+ try:
799
+ if path.is_file():
800
+ os.remove(path)
801
+ else:
802
+ shutil.rmtree(path)
803
+ except FileNotFoundError:
804
+ logger.warning(f"Couldn't delete {path_type}: file not found ({path})", exc_info=True)
805
+ except PermissionError:
806
+ logger.warning(f"Couldn't delete {path_type}: permission denied ({path})", exc_info=True)
lib/python3.11/site-packages/huggingface_hub/utils/_chunk_utils.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains a utility to iterate by chunks over an iterator."""
16
+ import itertools
17
+ from typing import Iterable, TypeVar
18
+
19
+
20
+ T = TypeVar("T")
21
+
22
+
23
+ def chunk_iterable(iterable: Iterable[T], chunk_size: int) -> Iterable[Iterable[T]]:
24
+ """Iterates over an iterator chunk by chunk.
25
+
26
+ Taken from https://stackoverflow.com/a/8998040.
27
+ See also https://github.com/huggingface/huggingface_hub/pull/920#discussion_r938793088.
28
+
29
+ Args:
30
+ iterable (`Iterable`):
31
+ The iterable on which we want to iterate.
32
+ chunk_size (`int`):
33
+ Size of the chunks. Must be a strictly positive integer (e.g. >0).
34
+
35
+ Example:
36
+
37
+ ```python
38
+ >>> from huggingface_hub.utils import chunk_iterable
39
+
40
+ >>> for items in chunk_iterable(range(17), chunk_size=8):
41
+ ... print(items)
42
+ # [0, 1, 2, 3, 4, 5, 6, 7]
43
+ # [8, 9, 10, 11, 12, 13, 14, 15]
44
+ # [16] # smaller last chunk
45
+ ```
46
+
47
+ Raises:
48
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
49
+ If `chunk_size` <= 0.
50
+
51
+ <Tip warning={true}>
52
+ The last chunk can be smaller than `chunk_size`.
53
+ </Tip>
54
+ """
55
+ if not isinstance(chunk_size, int) or chunk_size <= 0:
56
+ raise ValueError("`chunk_size` must be a strictly positive integer (>0).")
57
+
58
+ iterator = iter(iterable)
59
+ while True:
60
+ try:
61
+ next_item = next(iterator)
62
+ except StopIteration:
63
+ return
64
+ yield itertools.chain((next_item,), itertools.islice(iterator, chunk_size - 1))
lib/python3.11/site-packages/huggingface_hub/utils/_datetime.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle datetimes in Huggingface Hub."""
16
+ from datetime import datetime, timedelta, timezone
17
+
18
+
19
+ # Local machine offset compared to UTC.
20
+ # Taken from https://stackoverflow.com/a/3168394.
21
+ # `utcoffset()` returns `None` if no offset -> empty timedelta.
22
+ UTC_OFFSET = datetime.now(timezone.utc).astimezone().utcoffset() or timedelta()
23
+
24
+
25
+ def parse_datetime(date_string: str) -> datetime:
26
+ """
27
+ Parses a date_string returned from the server to a datetime object.
28
+
29
+ This parser is a weak-parser is the sense that it handles only a single format of
30
+ date_string. It is expected that the server format will never change. The
31
+ implementation depends only on the standard lib to avoid an external dependency
32
+ (python-dateutil). See full discussion about this decision on PR:
33
+ https://github.com/huggingface/huggingface_hub/pull/999.
34
+
35
+ Example:
36
+ ```py
37
+ > parse_datetime('2022-08-19T07:19:38.123Z')
38
+ datetime.datetime(2022, 8, 19, 7, 19, 38, 123000, tzinfo=timezone.utc)
39
+ ```
40
+
41
+ Args:
42
+ date_string (`str`):
43
+ A string representing a datetime returned by the Hub server.
44
+ String is expected to follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern.
45
+
46
+ Returns:
47
+ A python datetime object.
48
+
49
+ Raises:
50
+ :class:`ValueError`:
51
+ If `date_string` cannot be parsed.
52
+ """
53
+ try:
54
+ # Datetime ending with a Z means "UTC". Here we parse the date as local machine
55
+ # timezone and then move it to the appropriate UTC timezone.
56
+ # See https://en.wikipedia.org/wiki/ISO_8601#Coordinated_Universal_Time_(UTC)
57
+ # Taken from https://stackoverflow.com/a/3168394.
58
+ if len(date_string) == 30:
59
+ # Means timezoned-timestamp with nanoseconds precision. We need to truncate the last 3 digits.
60
+ date_string = date_string[:-4] + "Z"
61
+ dt = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
62
+ dt += UTC_OFFSET # By default, datetime is not timezoned -> move to UTC time
63
+ return dt.astimezone(timezone.utc) # Set explicit timezone
64
+ except ValueError as e:
65
+ raise ValueError(
66
+ f"Cannot parse '{date_string}' as a datetime. Date string is expected to"
67
+ " follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern."
68
+ ) from e
lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ from functools import wraps
3
+ from inspect import Parameter, signature
4
+ from typing import Iterable, Optional
5
+
6
+
7
+ def _deprecate_positional_args(*, version: str):
8
+ """Decorator for methods that issues warnings for positional arguments.
9
+ Using the keyword-only argument syntax in pep 3102, arguments after the
10
+ * will issue a warning when passed as a positional argument.
11
+
12
+ Args:
13
+ version (`str`):
14
+ The version when positional arguments will result in error.
15
+ """
16
+
17
+ def _inner_deprecate_positional_args(f):
18
+ sig = signature(f)
19
+ kwonly_args = []
20
+ all_args = []
21
+ for name, param in sig.parameters.items():
22
+ if param.kind == Parameter.POSITIONAL_OR_KEYWORD:
23
+ all_args.append(name)
24
+ elif param.kind == Parameter.KEYWORD_ONLY:
25
+ kwonly_args.append(name)
26
+
27
+ @wraps(f)
28
+ def inner_f(*args, **kwargs):
29
+ extra_args = len(args) - len(all_args)
30
+ if extra_args <= 0:
31
+ return f(*args, **kwargs)
32
+ # extra_args > 0
33
+ args_msg = [
34
+ f"{name}='{arg}'" if isinstance(arg, str) else f"{name}={arg}"
35
+ for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:])
36
+ ]
37
+ args_msg = ", ".join(args_msg)
38
+ warnings.warn(
39
+ f"Deprecated positional argument(s) used in '{f.__name__}': pass"
40
+ f" {args_msg} as keyword args. From version {version} passing these"
41
+ " as positional arguments will result in an error,",
42
+ FutureWarning,
43
+ )
44
+ kwargs.update(zip(sig.parameters, args))
45
+ return f(**kwargs)
46
+
47
+ return inner_f
48
+
49
+ return _inner_deprecate_positional_args
50
+
51
+
52
+ def _deprecate_arguments(
53
+ *,
54
+ version: str,
55
+ deprecated_args: Iterable[str],
56
+ custom_message: Optional[str] = None,
57
+ ):
58
+ """Decorator to issue warnings when using deprecated arguments.
59
+
60
+ TODO: could be useful to be able to set a custom error message.
61
+
62
+ Args:
63
+ version (`str`):
64
+ The version when deprecated arguments will result in error.
65
+ deprecated_args (`List[str]`):
66
+ List of the arguments to be deprecated.
67
+ custom_message (`str`, *optional*):
68
+ Warning message that is raised. If not passed, a default warning message
69
+ will be created.
70
+ """
71
+
72
+ def _inner_deprecate_positional_args(f):
73
+ sig = signature(f)
74
+
75
+ @wraps(f)
76
+ def inner_f(*args, **kwargs):
77
+ # Check for used deprecated arguments
78
+ used_deprecated_args = []
79
+ for _, parameter in zip(args, sig.parameters.values()):
80
+ if parameter.name in deprecated_args:
81
+ used_deprecated_args.append(parameter.name)
82
+ for kwarg_name, kwarg_value in kwargs.items():
83
+ if (
84
+ # If argument is deprecated but still used
85
+ kwarg_name in deprecated_args
86
+ # And then the value is not the default value
87
+ and kwarg_value != sig.parameters[kwarg_name].default
88
+ ):
89
+ used_deprecated_args.append(kwarg_name)
90
+
91
+ # Warn and proceed
92
+ if len(used_deprecated_args) > 0:
93
+ message = (
94
+ f"Deprecated argument(s) used in '{f.__name__}':"
95
+ f" {', '.join(used_deprecated_args)}. Will not be supported from"
96
+ f" version '{version}'."
97
+ )
98
+ if custom_message is not None:
99
+ message += "\n\n" + custom_message
100
+ warnings.warn(message, FutureWarning)
101
+ return f(*args, **kwargs)
102
+
103
+ return inner_f
104
+
105
+ return _inner_deprecate_positional_args
106
+
107
+
108
+ def _deprecate_method(*, version: str, message: Optional[str] = None):
109
+ """Decorator to issue warnings when using a deprecated method.
110
+
111
+ Args:
112
+ version (`str`):
113
+ The version when deprecated arguments will result in error.
114
+ message (`str`, *optional*):
115
+ Warning message that is raised. If not passed, a default warning message
116
+ will be created.
117
+ """
118
+
119
+ def _inner_deprecate_method(f):
120
+ name = f.__name__
121
+ if name == "__init__":
122
+ name = f.__qualname__.split(".")[0] # class name instead of method name
123
+
124
+ @wraps(f)
125
+ def inner_f(*args, **kwargs):
126
+ warning_message = (
127
+ f"'{name}' (from '{f.__module__}') is deprecated and will be removed from version '{version}'."
128
+ )
129
+ if message is not None:
130
+ warning_message += " " + message
131
+ warnings.warn(warning_message, FutureWarning)
132
+ return f(*args, **kwargs)
133
+
134
+ return inner_f
135
+
136
+ return _inner_deprecate_method
lib/python3.11/site-packages/huggingface_hub/utils/_errors.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Optional
3
+
4
+ from requests import HTTPError, Response
5
+
6
+ from ._fixes import JSONDecodeError
7
+
8
+
9
+ REPO_API_REGEX = re.compile(
10
+ r"""
11
+ # staging or production endpoint
12
+ ^https://(hub-ci.)?huggingface.co
13
+ (
14
+ # on /api/repo_type/repo_id
15
+ /api/(models|datasets|spaces)/(.+)
16
+ |
17
+ # or /repo_id/resolve/revision/...
18
+ /(.+)/resolve/(.+)
19
+ )
20
+ """,
21
+ flags=re.VERBOSE,
22
+ )
23
+
24
+
25
+ class FileMetadataError(OSError):
26
+ """Error triggered when the metadata of a file on the Hub cannot be retrieved (missing ETag or commit_hash).
27
+
28
+ Inherits from `OSError` for backward compatibility.
29
+ """
30
+
31
+
32
+ class HfHubHTTPError(HTTPError):
33
+ """
34
+ HTTPError to inherit from for any custom HTTP Error raised in HF Hub.
35
+
36
+ Any HTTPError is converted at least into a `HfHubHTTPError`. If some information is
37
+ sent back by the server, it will be added to the error message.
38
+
39
+ Added details:
40
+ - Request id from "X-Request-Id" header if exists.
41
+ - Server error message from the header "X-Error-Message".
42
+ - Server error message if we can found one in the response body.
43
+
44
+ Example:
45
+ ```py
46
+ import requests
47
+ from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError
48
+
49
+ response = get_session().post(...)
50
+ try:
51
+ hf_raise_for_status(response)
52
+ except HfHubHTTPError as e:
53
+ print(str(e)) # formatted message
54
+ e.request_id, e.server_message # details returned by server
55
+
56
+ # Complete the error message with additional information once it's raised
57
+ e.append_to_message("\n`create_commit` expects the repository to exist.")
58
+ raise
59
+ ```
60
+ """
61
+
62
+ request_id: Optional[str] = None
63
+ server_message: Optional[str] = None
64
+
65
+ def __init__(self, message: str, response: Optional[Response] = None):
66
+ # Parse server information if any.
67
+ if response is not None:
68
+ self.request_id = response.headers.get("X-Request-Id")
69
+ try:
70
+ server_data = response.json()
71
+ except JSONDecodeError:
72
+ server_data = {}
73
+
74
+ # Retrieve server error message from multiple sources
75
+ server_message_from_headers = response.headers.get("X-Error-Message")
76
+ server_message_from_body = server_data.get("error")
77
+ server_multiple_messages_from_body = "\n".join(
78
+ error["message"] for error in server_data.get("errors", []) if "message" in error
79
+ )
80
+
81
+ # Concatenate error messages
82
+ _server_message = ""
83
+ if server_message_from_headers is not None: # from headers
84
+ _server_message += server_message_from_headers + "\n"
85
+ if server_message_from_body is not None: # from body "error"
86
+ if isinstance(server_message_from_body, list):
87
+ server_message_from_body = "\n".join(server_message_from_body)
88
+ if server_message_from_body not in _server_message:
89
+ _server_message += server_message_from_body + "\n"
90
+ if server_multiple_messages_from_body is not None: # from body "errors"
91
+ if server_multiple_messages_from_body not in _server_message:
92
+ _server_message += server_multiple_messages_from_body + "\n"
93
+ _server_message = _server_message.strip()
94
+
95
+ # Set message to `HfHubHTTPError` (if any)
96
+ if _server_message != "":
97
+ self.server_message = _server_message
98
+
99
+ super().__init__(
100
+ _format_error_message(
101
+ message,
102
+ request_id=self.request_id,
103
+ server_message=self.server_message,
104
+ ),
105
+ response=response, # type: ignore
106
+ request=response.request if response is not None else None, # type: ignore
107
+ )
108
+
109
+ def append_to_message(self, additional_message: str) -> None:
110
+ """Append additional information to the `HfHubHTTPError` initial message."""
111
+ self.args = (self.args[0] + additional_message,) + self.args[1:]
112
+
113
+
114
+ class RepositoryNotFoundError(HfHubHTTPError):
115
+ """
116
+ Raised when trying to access a hf.co URL with an invalid repository name, or
117
+ with a private repo name the user does not have access to.
118
+
119
+ Example:
120
+
121
+ ```py
122
+ >>> from huggingface_hub import model_info
123
+ >>> model_info("<non_existent_repository>")
124
+ (...)
125
+ huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: PvMw_VjBMjVdMz53WKIzP)
126
+
127
+ Repository Not Found for url: https://huggingface.co/api/models/%3Cnon_existent_repository%3E.
128
+ Please make sure you specified the correct `repo_id` and `repo_type`.
129
+ If the repo is private, make sure you are authenticated.
130
+ Invalid username or password.
131
+ ```
132
+ """
133
+
134
+
135
+ class GatedRepoError(RepositoryNotFoundError):
136
+ """
137
+ Raised when trying to access a gated repository for which the user is not on the
138
+ authorized list.
139
+
140
+ Note: derives from `RepositoryNotFoundError` to ensure backward compatibility.
141
+
142
+ Example:
143
+
144
+ ```py
145
+ >>> from huggingface_hub import model_info
146
+ >>> model_info("<gated_repository>")
147
+ (...)
148
+ huggingface_hub.utils._errors.GatedRepoError: 403 Client Error. (Request ID: ViT1Bf7O_026LGSQuVqfa)
149
+
150
+ Cannot access gated repo for url https://huggingface.co/api/models/ardent-figment/gated-model.
151
+ Access to model ardent-figment/gated-model is restricted and you are not in the authorized list.
152
+ Visit https://huggingface.co/ardent-figment/gated-model to ask for access.
153
+ ```
154
+ """
155
+
156
+
157
+ class RevisionNotFoundError(HfHubHTTPError):
158
+ """
159
+ Raised when trying to access a hf.co URL with a valid repository but an invalid
160
+ revision.
161
+
162
+ Example:
163
+
164
+ ```py
165
+ >>> from huggingface_hub import hf_hub_download
166
+ >>> hf_hub_download('bert-base-cased', 'config.json', revision='<non-existent-revision>')
167
+ (...)
168
+ huggingface_hub.utils._errors.RevisionNotFoundError: 404 Client Error. (Request ID: Mwhe_c3Kt650GcdKEFomX)
169
+
170
+ Revision Not Found for url: https://huggingface.co/bert-base-cased/resolve/%3Cnon-existent-revision%3E/config.json.
171
+ ```
172
+ """
173
+
174
+
175
+ class EntryNotFoundError(HfHubHTTPError):
176
+ """
177
+ Raised when trying to access a hf.co URL with a valid repository and revision
178
+ but an invalid filename.
179
+
180
+ Example:
181
+
182
+ ```py
183
+ >>> from huggingface_hub import hf_hub_download
184
+ >>> hf_hub_download('bert-base-cased', '<non-existent-file>')
185
+ (...)
186
+ huggingface_hub.utils._errors.EntryNotFoundError: 404 Client Error. (Request ID: 53pNl6M0MxsnG5Sw8JA6x)
187
+
188
+ Entry Not Found for url: https://huggingface.co/bert-base-cased/resolve/main/%3Cnon-existent-file%3E.
189
+ ```
190
+ """
191
+
192
+
193
+ class LocalEntryNotFoundError(EntryNotFoundError, FileNotFoundError, ValueError):
194
+ """
195
+ Raised when trying to access a file or snapshot that is not on the disk when network is
196
+ disabled or unavailable (connection issue). The entry may exist on the Hub.
197
+
198
+ Note: `ValueError` type is to ensure backward compatibility.
199
+ Note: `LocalEntryNotFoundError` derives from `HTTPError` because of `EntryNotFoundError`
200
+ even when it is not a network issue.
201
+
202
+ Example:
203
+
204
+ ```py
205
+ >>> from huggingface_hub import hf_hub_download
206
+ >>> hf_hub_download('bert-base-cased', '<non-cached-file>', local_files_only=True)
207
+ (...)
208
+ huggingface_hub.utils._errors.LocalEntryNotFoundError: Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable hf.co look-ups and downloads online, set 'local_files_only' to False.
209
+ ```
210
+ """
211
+
212
+ def __init__(self, message: str):
213
+ super().__init__(message, response=None)
214
+
215
+
216
+ class BadRequestError(HfHubHTTPError, ValueError):
217
+ """
218
+ Raised by `hf_raise_for_status` when the server returns a HTTP 400 error.
219
+
220
+ Example:
221
+
222
+ ```py
223
+ >>> resp = requests.post("hf.co/api/check", ...)
224
+ >>> hf_raise_for_status(resp, endpoint_name="check")
225
+ huggingface_hub.utils._errors.BadRequestError: Bad request for check endpoint: {details} (Request ID: XXX)
226
+ ```
227
+ """
228
+
229
+
230
+ def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None) -> None:
231
+ """
232
+ Internal version of `response.raise_for_status()` that will refine a
233
+ potential HTTPError. Raised exception will be an instance of `HfHubHTTPError`.
234
+
235
+ This helper is meant to be the unique method to raise_for_status when making a call
236
+ to the Hugging Face Hub.
237
+
238
+ Example:
239
+ ```py
240
+ import requests
241
+ from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError
242
+
243
+ response = get_session().post(...)
244
+ try:
245
+ hf_raise_for_status(response)
246
+ except HfHubHTTPError as e:
247
+ print(str(e)) # formatted message
248
+ e.request_id, e.server_message # details returned by server
249
+
250
+ # Complete the error message with additional information once it's raised
251
+ e.append_to_message("\n`create_commit` expects the repository to exist.")
252
+ raise
253
+ ```
254
+
255
+ Args:
256
+ response (`Response`):
257
+ Response from the server.
258
+ endpoint_name (`str`, *optional*):
259
+ Name of the endpoint that has been called. If provided, the error message
260
+ will be more complete.
261
+
262
+ <Tip warning={true}>
263
+
264
+ Raises when the request has failed:
265
+
266
+ - [`~utils.RepositoryNotFoundError`]
267
+ If the repository to download from cannot be found. This may be because it
268
+ doesn't exist, because `repo_type` is not set correctly, or because the repo
269
+ is `private` and you do not have access.
270
+ - [`~utils.GatedRepoError`]
271
+ If the repository exists but is gated and the user is not on the authorized
272
+ list.
273
+ - [`~utils.RevisionNotFoundError`]
274
+ If the repository exists but the revision couldn't be find.
275
+ - [`~utils.EntryNotFoundError`]
276
+ If the repository exists but the entry (e.g. the requested file) couldn't be
277
+ find.
278
+ - [`~utils.BadRequestError`]
279
+ If request failed with a HTTP 400 BadRequest error.
280
+ - [`~utils.HfHubHTTPError`]
281
+ If request failed for a reason not listed above.
282
+
283
+ </Tip>
284
+ """
285
+ try:
286
+ response.raise_for_status()
287
+ except HTTPError as e:
288
+ error_code = response.headers.get("X-Error-Code")
289
+
290
+ if error_code == "RevisionNotFound":
291
+ message = f"{response.status_code} Client Error." + "\n\n" + f"Revision Not Found for url: {response.url}."
292
+ raise RevisionNotFoundError(message, response) from e
293
+
294
+ elif error_code == "EntryNotFound":
295
+ message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}."
296
+ raise EntryNotFoundError(message, response) from e
297
+
298
+ elif error_code == "GatedRepo":
299
+ message = (
300
+ f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated repo for url {response.url}."
301
+ )
302
+ raise GatedRepoError(message, response) from e
303
+
304
+ elif error_code == "RepoNotFound" or (
305
+ response.status_code == 401
306
+ and response.request is not None
307
+ and response.request.url is not None
308
+ and REPO_API_REGEX.search(response.request.url) is not None
309
+ ):
310
+ # 401 is misleading as it is returned for:
311
+ # - private and gated repos if user is not authenticated
312
+ # - missing repos
313
+ # => for now, we process them as `RepoNotFound` anyway.
314
+ # See https://gist.github.com/Wauplin/46c27ad266b15998ce56a6603796f0b9
315
+ message = (
316
+ f"{response.status_code} Client Error."
317
+ + "\n\n"
318
+ + f"Repository Not Found for url: {response.url}."
319
+ + "\nPlease make sure you specified the correct `repo_id` and"
320
+ " `repo_type`.\nIf you are trying to access a private or gated repo,"
321
+ " make sure you are authenticated."
322
+ )
323
+ raise RepositoryNotFoundError(message, response) from e
324
+
325
+ elif response.status_code == 400:
326
+ message = (
327
+ f"\n\nBad request for {endpoint_name} endpoint:" if endpoint_name is not None else "\n\nBad request:"
328
+ )
329
+ raise BadRequestError(message, response=response) from e
330
+
331
+ # Convert `HTTPError` into a `HfHubHTTPError` to display request information
332
+ # as well (request id and/or server error message)
333
+ raise HfHubHTTPError(str(e), response=response) from e
334
+
335
+
336
+ def _format_error_message(message: str, request_id: Optional[str], server_message: Optional[str]) -> str:
337
+ """
338
+ Format the `HfHubHTTPError` error message based on initial message and information
339
+ returned by the server.
340
+
341
+ Used when initializing `HfHubHTTPError`.
342
+ """
343
+ # Add message from response body
344
+ if server_message is not None and len(server_message) > 0 and server_message.lower() not in message.lower():
345
+ if "\n\n" in message:
346
+ message += "\n" + server_message
347
+ else:
348
+ message += "\n\n" + server_message
349
+
350
+ # Add Request ID
351
+ if request_id is not None and str(request_id).lower() not in message.lower():
352
+ request_id_message = f" (Request ID: {request_id})"
353
+ if "\n" in message:
354
+ newline_index = message.index("\n")
355
+ message = message[:newline_index] + request_id_message + message[newline_index:]
356
+ else:
357
+ message += request_id_message
358
+
359
+ return message
lib/python3.11/site-packages/huggingface_hub/utils/_experimental.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to flag a feature as "experimental" in Huggingface Hub."""
16
+ import warnings
17
+ from functools import wraps
18
+ from typing import Callable
19
+
20
+ from .. import constants
21
+
22
+
23
+ def experimental(fn: Callable) -> Callable:
24
+ """Decorator to flag a feature as experimental.
25
+
26
+ An experimental feature trigger a warning when used as it might be subject to breaking changes in the future.
27
+ Warnings can be disabled by setting the environment variable `HF_EXPERIMENTAL_WARNING` to `0`.
28
+
29
+ Args:
30
+ fn (`Callable`):
31
+ The function to flag as experimental.
32
+
33
+ Returns:
34
+ `Callable`: The decorated function.
35
+
36
+ Example:
37
+
38
+ ```python
39
+ >>> from huggingface_hub.utils import experimental
40
+
41
+ >>> @experimental
42
+ ... def my_function():
43
+ ... print("Hello world!")
44
+
45
+ >>> my_function()
46
+ UserWarning: 'my_function' is experimental and might be subject to breaking changes in the future. You can disable
47
+ this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment variable.
48
+ Hello world!
49
+ ```
50
+ """
51
+ # For classes, put the "experimental" around the "__new__" method => __new__ will be removed in warning message
52
+ name = fn.__qualname__[: -len(".__new__")] if fn.__qualname__.endswith(".__new__") else fn.__qualname__
53
+
54
+ @wraps(fn)
55
+ def _inner_fn(*args, **kwargs):
56
+ if not constants.HF_HUB_DISABLE_EXPERIMENTAL_WARNING:
57
+ warnings.warn(
58
+ f"'{name}' is experimental and might be subject to breaking changes in the future."
59
+ " You can disable this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment"
60
+ " variable.",
61
+ UserWarning,
62
+ )
63
+ return fn(*args, **kwargs)
64
+
65
+ return _inner_fn
lib/python3.11/site-packages/huggingface_hub/utils/_fixes.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # JSONDecodeError was introduced in requests=2.27 released in 2022.
2
+ # This allows us to support older requests for users
3
+ # More information: https://github.com/psf/requests/pull/5856
4
+ try:
5
+ from requests import JSONDecodeError # type: ignore # noqa: F401
6
+ except ImportError:
7
+ try:
8
+ from simplejson import JSONDecodeError # type: ignore # noqa: F401
9
+ except ImportError:
10
+ from json import JSONDecodeError # type: ignore # noqa: F401
11
+
12
+ import contextlib
13
+ import os
14
+ import shutil
15
+ import stat
16
+ import tempfile
17
+ from functools import partial
18
+ from pathlib import Path
19
+ from typing import Callable, Generator, Optional, Union
20
+
21
+ import yaml
22
+
23
+
24
+ # Wrap `yaml.dump` to set `allow_unicode=True` by default.
25
+ #
26
+ # Example:
27
+ # ```py
28
+ # >>> yaml.dump({"emoji": "👀", "some unicode": "日本か"})
29
+ # 'emoji: "\\U0001F440"\nsome unicode: "\\u65E5\\u672C\\u304B"\n'
30
+ #
31
+ # >>> yaml_dump({"emoji": "👀", "some unicode": "日本か"})
32
+ # 'emoji: "👀"\nsome unicode: "日本か"\n'
33
+ # ```
34
+ yaml_dump: Callable[..., str] = partial(yaml.dump, stream=None, allow_unicode=True) # type: ignore
35
+
36
+
37
+ @contextlib.contextmanager
38
+ def SoftTemporaryDirectory(
39
+ suffix: Optional[str] = None,
40
+ prefix: Optional[str] = None,
41
+ dir: Optional[Union[Path, str]] = None,
42
+ **kwargs,
43
+ ) -> Generator[str, None, None]:
44
+ """
45
+ Context manager to create a temporary directory and safely delete it.
46
+
47
+ If tmp directory cannot be deleted normally, we set the WRITE permission and retry.
48
+ If cleanup still fails, we give up but don't raise an exception. This is equivalent
49
+ to `tempfile.TemporaryDirectory(..., ignore_cleanup_errors=True)` introduced in
50
+ Python 3.10.
51
+
52
+ See https://www.scivision.dev/python-tempfile-permission-error-windows/.
53
+ """
54
+ tmpdir = tempfile.TemporaryDirectory(prefix=prefix, suffix=suffix, dir=dir, **kwargs)
55
+ yield tmpdir.name
56
+
57
+ try:
58
+ # First once with normal cleanup
59
+ shutil.rmtree(tmpdir.name)
60
+ except Exception:
61
+ # If failed, try to set write permission and retry
62
+ try:
63
+ shutil.rmtree(tmpdir.name, onerror=_set_write_permission_and_retry)
64
+ except Exception:
65
+ pass
66
+
67
+ # And finally, cleanup the tmpdir.
68
+ # If it fails again, give up but do not throw error
69
+ try:
70
+ tmpdir.cleanup()
71
+ except Exception:
72
+ pass
73
+
74
+
75
+ def _set_write_permission_and_retry(func, path, excinfo):
76
+ os.chmod(path, stat.S_IWRITE)
77
+ func(path)
lib/python3.11/site-packages/huggingface_hub/utils/_git_credential.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to manage Git credentials."""
16
+ import re
17
+ import subprocess
18
+ from typing import List, Optional
19
+
20
+ from ..constants import ENDPOINT
21
+ from ._subprocess import run_interactive_subprocess, run_subprocess
22
+
23
+
24
+ GIT_CREDENTIAL_REGEX = re.compile(
25
+ r"""
26
+ ^\s* # start of line
27
+ credential\.helper # credential.helper value
28
+ \s*=\s* # separator
29
+ (\w+) # the helper name (group 1)
30
+ (\s|$) # whitespace or end of line
31
+ """,
32
+ flags=re.MULTILINE | re.IGNORECASE | re.VERBOSE,
33
+ )
34
+
35
+
36
+ def list_credential_helpers(folder: Optional[str] = None) -> List[str]:
37
+ """Return the list of git credential helpers configured.
38
+
39
+ See https://git-scm.com/docs/gitcredentials.
40
+
41
+ Credentials are saved in all configured helpers (store, cache, macOS keychain,...).
42
+ Calls "`git credential approve`" internally. See https://git-scm.com/docs/git-credential.
43
+
44
+ Args:
45
+ folder (`str`, *optional*):
46
+ The folder in which to check the configured helpers.
47
+ """
48
+ try:
49
+ output = run_subprocess("git config --list", folder=folder).stdout
50
+ parsed = _parse_credential_output(output)
51
+ return parsed
52
+ except subprocess.CalledProcessError as exc:
53
+ raise EnvironmentError(exc.stderr)
54
+
55
+
56
+ def set_git_credential(token: str, username: str = "hf_user", folder: Optional[str] = None) -> None:
57
+ """Save a username/token pair in git credential for HF Hub registry.
58
+
59
+ Credentials are saved in all configured helpers (store, cache, macOS keychain,...).
60
+ Calls "`git credential approve`" internally. See https://git-scm.com/docs/git-credential.
61
+
62
+ Args:
63
+ username (`str`, defaults to `"hf_user"`):
64
+ A git username. Defaults to `"hf_user"`, the default user used in the Hub.
65
+ token (`str`, defaults to `"hf_user"`):
66
+ A git password. In practice, the User Access Token for the Hub.
67
+ See https://huggingface.co/settings/tokens.
68
+ folder (`str`, *optional*):
69
+ The folder in which to check the configured helpers.
70
+ """
71
+ with run_interactive_subprocess("git credential approve", folder=folder) as (
72
+ stdin,
73
+ _,
74
+ ):
75
+ stdin.write(f"url={ENDPOINT}\nusername={username.lower()}\npassword={token}\n\n")
76
+ stdin.flush()
77
+
78
+
79
+ def unset_git_credential(username: str = "hf_user", folder: Optional[str] = None) -> None:
80
+ """Erase credentials from git credential for HF Hub registry.
81
+
82
+ Credentials are erased from the configured helpers (store, cache, macOS
83
+ keychain,...), if any. If `username` is not provided, any credential configured for
84
+ HF Hub endpoint is erased.
85
+ Calls "`git credential erase`" internally. See https://git-scm.com/docs/git-credential.
86
+
87
+ Args:
88
+ username (`str`, defaults to `"hf_user"`):
89
+ A git username. Defaults to `"hf_user"`, the default user used in the Hub.
90
+ folder (`str`, *optional*):
91
+ The folder in which to check the configured helpers.
92
+ """
93
+ with run_interactive_subprocess("git credential reject", folder=folder) as (
94
+ stdin,
95
+ _,
96
+ ):
97
+ standard_input = f"url={ENDPOINT}\n"
98
+ if username is not None:
99
+ standard_input += f"username={username.lower()}\n"
100
+ standard_input += "\n"
101
+
102
+ stdin.write(standard_input)
103
+ stdin.flush()
104
+
105
+
106
+ def _parse_credential_output(output: str) -> List[str]:
107
+ """Parse the output of `git credential fill` to extract the password.
108
+
109
+ Args:
110
+ output (`str`):
111
+ The output of `git credential fill`.
112
+ """
113
+ # NOTE: If user has set an helper for a custom URL, it will not we caught here.
114
+ # Example: `credential.https://huggingface.co.helper=store`
115
+ # See: https://github.com/huggingface/huggingface_hub/pull/1138#discussion_r1013324508
116
+ return sorted( # Sort for nice printing
117
+ set( # Might have some duplicates
118
+ match[0] for match in GIT_CREDENTIAL_REGEX.findall(output)
119
+ )
120
+ )
lib/python3.11/site-packages/huggingface_hub/utils/_headers.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle headers to send in calls to Huggingface Hub."""
16
+ from typing import Dict, Optional, Union
17
+
18
+ from .. import constants
19
+ from ._runtime import (
20
+ get_fastai_version,
21
+ get_fastcore_version,
22
+ get_hf_hub_version,
23
+ get_python_version,
24
+ get_tf_version,
25
+ get_torch_version,
26
+ is_fastai_available,
27
+ is_fastcore_available,
28
+ is_tf_available,
29
+ is_torch_available,
30
+ )
31
+ from ._token import get_token
32
+ from ._validators import validate_hf_hub_args
33
+
34
+
35
+ class LocalTokenNotFoundError(EnvironmentError):
36
+ """Raised if local token is required but not found."""
37
+
38
+
39
+ @validate_hf_hub_args
40
+ def build_hf_headers(
41
+ *,
42
+ token: Optional[Union[bool, str]] = None,
43
+ is_write_action: bool = False,
44
+ library_name: Optional[str] = None,
45
+ library_version: Optional[str] = None,
46
+ user_agent: Union[Dict, str, None] = None,
47
+ ) -> Dict[str, str]:
48
+ """
49
+ Build headers dictionary to send in a HF Hub call.
50
+
51
+ By default, authorization token is always provided either from argument (explicit
52
+ use) or retrieved from the cache (implicit use). To explicitly avoid sending the
53
+ token to the Hub, set `token=False` or set the `HF_HUB_DISABLE_IMPLICIT_TOKEN`
54
+ environment variable.
55
+
56
+ In case of an API call that requires write access, an error is thrown if token is
57
+ `None` or token is an organization token (starting with `"api_org***"`).
58
+
59
+ In addition to the auth header, a user-agent is added to provide information about
60
+ the installed packages (versions of python, huggingface_hub, torch, tensorflow,
61
+ fastai and fastcore).
62
+
63
+ Args:
64
+ token (`str`, `bool`, *optional*):
65
+ The token to be sent in authorization header for the Hub call:
66
+ - if a string, it is used as the Hugging Face token
67
+ - if `True`, the token is read from the machine (cache or env variable)
68
+ - if `False`, authorization header is not set
69
+ - if `None`, the token is read from the machine only except if
70
+ `HF_HUB_DISABLE_IMPLICIT_TOKEN` env variable is set.
71
+ is_write_action (`bool`, default to `False`):
72
+ Set to True if the API call requires a write access. If `True`, the token
73
+ will be validated (cannot be `None`, cannot start by `"api_org***"`).
74
+ library_name (`str`, *optional*):
75
+ The name of the library that is making the HTTP request. Will be added to
76
+ the user-agent header.
77
+ library_version (`str`, *optional*):
78
+ The version of the library that is making the HTTP request. Will be added
79
+ to the user-agent header.
80
+ user_agent (`str`, `dict`, *optional*):
81
+ The user agent info in the form of a dictionary or a single string. It will
82
+ be completed with information about the installed packages.
83
+
84
+ Returns:
85
+ A `Dict` of headers to pass in your API call.
86
+
87
+ Example:
88
+ ```py
89
+ >>> build_hf_headers(token="hf_***") # explicit token
90
+ {"authorization": "Bearer hf_***", "user-agent": ""}
91
+
92
+ >>> build_hf_headers(token=True) # explicitly use cached token
93
+ {"authorization": "Bearer hf_***",...}
94
+
95
+ >>> build_hf_headers(token=False) # explicitly don't use cached token
96
+ {"user-agent": ...}
97
+
98
+ >>> build_hf_headers() # implicit use of the cached token
99
+ {"authorization": "Bearer hf_***",...}
100
+
101
+ # HF_HUB_DISABLE_IMPLICIT_TOKEN=True # to set as env variable
102
+ >>> build_hf_headers() # token is not sent
103
+ {"user-agent": ...}
104
+
105
+ >>> build_hf_headers(token="api_org_***", is_write_action=True)
106
+ ValueError: You must use your personal account token for write-access methods.
107
+
108
+ >>> build_hf_headers(library_name="transformers", library_version="1.2.3")
109
+ {"authorization": ..., "user-agent": "transformers/1.2.3; hf_hub/0.10.2; python/3.10.4; tensorflow/1.55"}
110
+ ```
111
+
112
+ Raises:
113
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
114
+ If organization token is passed and "write" access is required.
115
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
116
+ If "write" access is required but token is not passed and not saved locally.
117
+ [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
118
+ If `token=True` but token is not saved locally.
119
+ """
120
+ # Get auth token to send
121
+ token_to_send = get_token_to_send(token)
122
+ _validate_token_to_send(token_to_send, is_write_action=is_write_action)
123
+
124
+ # Combine headers
125
+ headers = {
126
+ "user-agent": _http_user_agent(
127
+ library_name=library_name,
128
+ library_version=library_version,
129
+ user_agent=user_agent,
130
+ )
131
+ }
132
+ if token_to_send is not None:
133
+ headers["authorization"] = f"Bearer {token_to_send}"
134
+ return headers
135
+
136
+
137
+ def get_token_to_send(token: Optional[Union[bool, str]]) -> Optional[str]:
138
+ """Select the token to send from either `token` or the cache."""
139
+ # Case token is explicitly provided
140
+ if isinstance(token, str):
141
+ return token
142
+
143
+ # Case token is explicitly forbidden
144
+ if token is False:
145
+ return None
146
+
147
+ # Token is not provided: we get it from local cache
148
+ cached_token = get_token()
149
+
150
+ # Case token is explicitly required
151
+ if token is True:
152
+ if cached_token is None:
153
+ raise LocalTokenNotFoundError(
154
+ "Token is required (`token=True`), but no token found. You"
155
+ " need to provide a token or be logged in to Hugging Face with"
156
+ " `huggingface-cli login` or `huggingface_hub.login`. See"
157
+ " https://huggingface.co/settings/tokens."
158
+ )
159
+ return cached_token
160
+
161
+ # Case implicit use of the token is forbidden by env variable
162
+ if constants.HF_HUB_DISABLE_IMPLICIT_TOKEN:
163
+ return None
164
+
165
+ # Otherwise: we use the cached token as the user has not explicitly forbidden it
166
+ return cached_token
167
+
168
+
169
+ def _validate_token_to_send(token: Optional[str], is_write_action: bool) -> None:
170
+ if is_write_action:
171
+ if token is None:
172
+ raise ValueError(
173
+ "Token is required (write-access action) but no token found. You need"
174
+ " to provide a token or be logged in to Hugging Face with"
175
+ " `huggingface-cli login` or `huggingface_hub.login`. See"
176
+ " https://huggingface.co/settings/tokens."
177
+ )
178
+ if token.startswith("api_org"):
179
+ raise ValueError(
180
+ "You must use your personal account token for write-access methods. To"
181
+ " generate a write-access token, go to"
182
+ " https://huggingface.co/settings/tokens"
183
+ )
184
+
185
+
186
+ def _http_user_agent(
187
+ *,
188
+ library_name: Optional[str] = None,
189
+ library_version: Optional[str] = None,
190
+ user_agent: Union[Dict, str, None] = None,
191
+ ) -> str:
192
+ """Format a user-agent string containing information about the installed packages.
193
+
194
+ Args:
195
+ library_name (`str`, *optional*):
196
+ The name of the library that is making the HTTP request.
197
+ library_version (`str`, *optional*):
198
+ The version of the library that is making the HTTP request.
199
+ user_agent (`str`, `dict`, *optional*):
200
+ The user agent info in the form of a dictionary or a single string.
201
+
202
+ Returns:
203
+ The formatted user-agent string.
204
+ """
205
+ if library_name is not None:
206
+ ua = f"{library_name}/{library_version}"
207
+ else:
208
+ ua = "unknown/None"
209
+ ua += f"; hf_hub/{get_hf_hub_version()}"
210
+ ua += f"; python/{get_python_version()}"
211
+
212
+ if not constants.HF_HUB_DISABLE_TELEMETRY:
213
+ if is_torch_available():
214
+ ua += f"; torch/{get_torch_version()}"
215
+ if is_tf_available():
216
+ ua += f"; tensorflow/{get_tf_version()}"
217
+ if is_fastai_available():
218
+ ua += f"; fastai/{get_fastai_version()}"
219
+ if is_fastcore_available():
220
+ ua += f"; fastcore/{get_fastcore_version()}"
221
+
222
+ if isinstance(user_agent, dict):
223
+ ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items())
224
+ elif isinstance(user_agent, str):
225
+ ua += "; " + user_agent
226
+
227
+ return _deduplicate_user_agent(ua)
228
+
229
+
230
+ def _deduplicate_user_agent(user_agent: str) -> str:
231
+ """Deduplicate redundant information in the generated user-agent."""
232
+ # Split around ";" > Strip whitespaces > Store as dict keys (ensure unicity) > format back as string
233
+ # Order is implicitly preserved by dictionary structure (see https://stackoverflow.com/a/53657523).
234
+ return "; ".join({key.strip(): None for key in user_agent.split(";")}.keys())
lib/python3.11/site-packages/huggingface_hub/utils/_hf_folder.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contain helper class to retrieve/store token from/to local cache."""
16
+ import os
17
+ import warnings
18
+ from pathlib import Path
19
+ from typing import Optional
20
+
21
+ from .. import constants
22
+
23
+
24
+ class HfFolder:
25
+ path_token = Path(constants.HF_TOKEN_PATH)
26
+ # Private attribute. Will be removed in v0.15
27
+ _old_path_token = Path(constants._OLD_HF_TOKEN_PATH)
28
+
29
+ # TODO: deprecate when adapted in transformers/datasets/gradio
30
+ # @_deprecate_method(version="1.0", message="Use `huggingface_hub.login` instead.")
31
+ @classmethod
32
+ def save_token(cls, token: str) -> None:
33
+ """
34
+ Save token, creating folder as needed.
35
+
36
+ Token is saved in the huggingface home folder. You can configure it by setting
37
+ the `HF_HOME` environment variable.
38
+
39
+ Args:
40
+ token (`str`):
41
+ The token to save to the [`HfFolder`]
42
+ """
43
+ cls.path_token.parent.mkdir(parents=True, exist_ok=True)
44
+ cls.path_token.write_text(token)
45
+
46
+ # TODO: deprecate when adapted in transformers/datasets/gradio
47
+ # @_deprecate_method(version="1.0", message="Use `huggingface_hub.get_token` instead.")
48
+ @classmethod
49
+ def get_token(cls) -> Optional[str]:
50
+ """
51
+ Get token or None if not existent.
52
+
53
+ Note that a token can be also provided using the `HF_TOKEN` environment variable.
54
+
55
+ Token is saved in the huggingface home folder. You can configure it by setting
56
+ the `HF_HOME` environment variable. Previous location was `~/.huggingface/token`.
57
+ If token is found in old location but not in new location, it is copied there first.
58
+ For more details, see https://github.com/huggingface/huggingface_hub/issues/1232.
59
+
60
+ Returns:
61
+ `str` or `None`: The token, `None` if it doesn't exist.
62
+ """
63
+ # 0. Check if token exist in old path but not new location
64
+ try:
65
+ cls._copy_to_new_path_and_warn()
66
+ except Exception: # if not possible (e.g. PermissionError), do not raise
67
+ pass
68
+
69
+ # 1. Is it set by environment variable ?
70
+ token: Optional[str] = os.environ.get("HF_TOKEN")
71
+ if token is None: # Ensure backward compatibility but doesn't have priority
72
+ token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
73
+ if token is not None:
74
+ token = token.replace("\r", "").replace("\n", "").strip()
75
+ if token != "":
76
+ return token
77
+
78
+ # 2. Is it set in token path ?
79
+ try:
80
+ token = cls.path_token.read_text()
81
+ token = token.replace("\r", "").replace("\n", "").strip()
82
+ return token
83
+ except FileNotFoundError:
84
+ return None
85
+
86
+ # TODO: deprecate when adapted in transformers/datasets/gradio
87
+ # @_deprecate_method(version="1.0", message="Use `huggingface_hub.logout` instead.")
88
+ @classmethod
89
+ def delete_token(cls) -> None:
90
+ """
91
+ Deletes the token from storage. Does not fail if token does not exist.
92
+ """
93
+ try:
94
+ cls.path_token.unlink()
95
+ except FileNotFoundError:
96
+ pass
97
+
98
+ try:
99
+ cls._old_path_token.unlink()
100
+ except FileNotFoundError:
101
+ pass
102
+
103
+ @classmethod
104
+ def _copy_to_new_path_and_warn(cls):
105
+ if cls._old_path_token.exists() and not cls.path_token.exists():
106
+ cls.save_token(cls._old_path_token.read_text())
107
+ warnings.warn(
108
+ f"A token has been found in `{cls._old_path_token}`. This is the old"
109
+ " path where tokens were stored. The new location is"
110
+ f" `{cls.path_token}` which is configurable using `HF_HOME` environment"
111
+ " variable. Your token has been copied to this new location. You can"
112
+ " now safely delete the old token file manually or use"
113
+ " `huggingface-cli logout`."
114
+ )
lib/python3.11/site-packages/huggingface_hub/utils/_http.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle HTTP requests in Huggingface Hub."""
16
+ import io
17
+ import os
18
+ import threading
19
+ import time
20
+ import uuid
21
+ from functools import lru_cache
22
+ from http import HTTPStatus
23
+ from typing import Callable, Tuple, Type, Union
24
+
25
+ import requests
26
+ from requests import Response
27
+ from requests.adapters import HTTPAdapter
28
+ from requests.models import PreparedRequest
29
+
30
+ from .. import constants
31
+ from . import logging
32
+ from ._typing import HTTP_METHOD_T
33
+
34
+
35
+ logger = logging.get_logger(__name__)
36
+
37
+ # Both headers are used by the Hub to debug failed requests.
38
+ # `X_AMZN_TRACE_ID` is better as it also works to debug on Cloudfront and ALB.
39
+ # If `X_AMZN_TRACE_ID` is set, the Hub will use it as well.
40
+ X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"
41
+ X_REQUEST_ID = "x-request-id"
42
+
43
+
44
+ class OfflineModeIsEnabled(ConnectionError):
45
+ """Raised when a request is made but `HF_HUB_OFFLINE=1` is set as environment variable."""
46
+
47
+
48
+ class UniqueRequestIdAdapter(HTTPAdapter):
49
+ X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"
50
+
51
+ def add_headers(self, request, **kwargs):
52
+ super().add_headers(request, **kwargs)
53
+
54
+ # Add random request ID => easier for server-side debug
55
+ if X_AMZN_TRACE_ID not in request.headers:
56
+ request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())
57
+
58
+ # Add debug log
59
+ has_token = str(request.headers.get("authorization", "")).startswith("Bearer hf_")
60
+ logger.debug(
61
+ f"Request {request.headers[X_AMZN_TRACE_ID]}: {request.method} {request.url} (authenticated: {has_token})"
62
+ )
63
+
64
+ def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
65
+ """Catch any RequestException to append request id to the error message for debugging."""
66
+ try:
67
+ return super().send(request, *args, **kwargs)
68
+ except requests.RequestException as e:
69
+ request_id = request.headers.get(X_AMZN_TRACE_ID)
70
+ if request_id is not None:
71
+ # Taken from https://stackoverflow.com/a/58270258
72
+ e.args = (*e.args, f"(Request ID: {request_id})")
73
+ raise
74
+
75
+
76
+ class OfflineAdapter(HTTPAdapter):
77
+ def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
78
+ raise OfflineModeIsEnabled(
79
+ f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
80
+ )
81
+
82
+
83
+ def _default_backend_factory() -> requests.Session:
84
+ session = requests.Session()
85
+ if constants.HF_HUB_OFFLINE:
86
+ session.mount("http://", OfflineAdapter())
87
+ session.mount("https://", OfflineAdapter())
88
+ else:
89
+ session.mount("http://", UniqueRequestIdAdapter())
90
+ session.mount("https://", UniqueRequestIdAdapter())
91
+ return session
92
+
93
+
94
+ BACKEND_FACTORY_T = Callable[[], requests.Session]
95
+ _GLOBAL_BACKEND_FACTORY: BACKEND_FACTORY_T = _default_backend_factory
96
+
97
+
98
+ def configure_http_backend(backend_factory: BACKEND_FACTORY_T = _default_backend_factory) -> None:
99
+ """
100
+ Configure the HTTP backend by providing a `backend_factory`. Any HTTP calls made by `huggingface_hub` will use a
101
+ Session object instantiated by this factory. This can be useful if you are running your scripts in a specific
102
+ environment requiring custom configuration (e.g. custom proxy or certifications).
103
+
104
+ Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
105
+ `huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
106
+ set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
107
+ calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
108
+
109
+ See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
110
+
111
+ Example:
112
+ ```py
113
+ import requests
114
+ from huggingface_hub import configure_http_backend, get_session
115
+
116
+ # Create a factory function that returns a Session with configured proxies
117
+ def backend_factory() -> requests.Session:
118
+ session = requests.Session()
119
+ session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
120
+ return session
121
+
122
+ # Set it as the default session factory
123
+ configure_http_backend(backend_factory=backend_factory)
124
+
125
+ # In practice, this is mostly done internally in `huggingface_hub`
126
+ session = get_session()
127
+ ```
128
+ """
129
+ global _GLOBAL_BACKEND_FACTORY
130
+ _GLOBAL_BACKEND_FACTORY = backend_factory
131
+ reset_sessions()
132
+
133
+
134
+ def get_session() -> requests.Session:
135
+ """
136
+ Get a `requests.Session` object, using the session factory from the user.
137
+
138
+ Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
139
+ `huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
140
+ set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
141
+ calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
142
+
143
+ See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
144
+
145
+ Example:
146
+ ```py
147
+ import requests
148
+ from huggingface_hub import configure_http_backend, get_session
149
+
150
+ # Create a factory function that returns a Session with configured proxies
151
+ def backend_factory() -> requests.Session:
152
+ session = requests.Session()
153
+ session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
154
+ return session
155
+
156
+ # Set it as the default session factory
157
+ configure_http_backend(backend_factory=backend_factory)
158
+
159
+ # In practice, this is mostly done internally in `huggingface_hub`
160
+ session = get_session()
161
+ ```
162
+ """
163
+ return _get_session_from_cache(process_id=os.getpid(), thread_id=threading.get_ident())
164
+
165
+
166
+ def reset_sessions() -> None:
167
+ """Reset the cache of sessions.
168
+
169
+ Mostly used internally when sessions are reconfigured or an SSLError is raised.
170
+ See [`configure_http_backend`] for more details.
171
+ """
172
+ _get_session_from_cache.cache_clear()
173
+
174
+
175
+ @lru_cache
176
+ def _get_session_from_cache(process_id: int, thread_id: int) -> requests.Session:
177
+ """
178
+ Create a new session per thread using global factory. Using LRU cache (maxsize 128) to avoid memory leaks when
179
+ using thousands of threads. Cache is cleared when `configure_http_backend` is called.
180
+ """
181
+ return _GLOBAL_BACKEND_FACTORY()
182
+
183
+
184
+ def http_backoff(
185
+ method: HTTP_METHOD_T,
186
+ url: str,
187
+ *,
188
+ max_retries: int = 5,
189
+ base_wait_time: float = 1,
190
+ max_wait_time: float = 8,
191
+ retry_on_exceptions: Union[Type[Exception], Tuple[Type[Exception], ...]] = (
192
+ requests.Timeout,
193
+ requests.ConnectionError,
194
+ ),
195
+ retry_on_status_codes: Union[int, Tuple[int, ...]] = HTTPStatus.SERVICE_UNAVAILABLE,
196
+ **kwargs,
197
+ ) -> Response:
198
+ """Wrapper around requests to retry calls on an endpoint, with exponential backoff.
199
+
200
+ Endpoint call is retried on exceptions (ex: connection timeout, proxy error,...)
201
+ and/or on specific status codes (ex: service unavailable). If the call failed more
202
+ than `max_retries`, the exception is thrown or `raise_for_status` is called on the
203
+ response object.
204
+
205
+ Re-implement mechanisms from the `backoff` library to avoid adding an external
206
+ dependencies to `hugging_face_hub`. See https://github.com/litl/backoff.
207
+
208
+ Args:
209
+ method (`Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]`):
210
+ HTTP method to perform.
211
+ url (`str`):
212
+ The URL of the resource to fetch.
213
+ max_retries (`int`, *optional*, defaults to `5`):
214
+ Maximum number of retries, defaults to 5 (no retries).
215
+ base_wait_time (`float`, *optional*, defaults to `1`):
216
+ Duration (in seconds) to wait before retrying the first time.
217
+ Wait time between retries then grows exponentially, capped by
218
+ `max_wait_time`.
219
+ max_wait_time (`float`, *optional*, defaults to `8`):
220
+ Maximum duration (in seconds) to wait before retrying.
221
+ retry_on_exceptions (`Type[Exception]` or `Tuple[Type[Exception]]`, *optional*):
222
+ Define which exceptions must be caught to retry the request. Can be a single type or a tuple of types.
223
+ By default, retry on `requests.Timeout` and `requests.ConnectionError`.
224
+ retry_on_status_codes (`int` or `Tuple[int]`, *optional*, defaults to `503`):
225
+ Define on which status codes the request must be retried. By default, only
226
+ HTTP 503 Service Unavailable is retried.
227
+ **kwargs (`dict`, *optional*):
228
+ kwargs to pass to `requests.request`.
229
+
230
+ Example:
231
+ ```
232
+ >>> from huggingface_hub.utils import http_backoff
233
+
234
+ # Same usage as "requests.request".
235
+ >>> response = http_backoff("GET", "https://www.google.com")
236
+ >>> response.raise_for_status()
237
+
238
+ # If you expect a Gateway Timeout from time to time
239
+ >>> http_backoff("PUT", upload_url, data=data, retry_on_status_codes=504)
240
+ >>> response.raise_for_status()
241
+ ```
242
+
243
+ <Tip warning={true}>
244
+
245
+ When using `requests` it is possible to stream data by passing an iterator to the
246
+ `data` argument. On http backoff this is a problem as the iterator is not reset
247
+ after a failed call. This issue is mitigated for file objects or any IO streams
248
+ by saving the initial position of the cursor (with `data.tell()`) and resetting the
249
+ cursor between each call (with `data.seek()`). For arbitrary iterators, http backoff
250
+ will fail. If this is a hard constraint for you, please let us know by opening an
251
+ issue on [Github](https://github.com/huggingface/huggingface_hub).
252
+
253
+ </Tip>
254
+ """
255
+ if isinstance(retry_on_exceptions, type): # Tuple from single exception type
256
+ retry_on_exceptions = (retry_on_exceptions,)
257
+
258
+ if isinstance(retry_on_status_codes, int): # Tuple from single status code
259
+ retry_on_status_codes = (retry_on_status_codes,)
260
+
261
+ nb_tries = 0
262
+ sleep_time = base_wait_time
263
+
264
+ # If `data` is used and is a file object (or any IO), it will be consumed on the
265
+ # first HTTP request. We need to save the initial position so that the full content
266
+ # of the file is re-sent on http backoff. See warning tip in docstring.
267
+ io_obj_initial_pos = None
268
+ if "data" in kwargs and isinstance(kwargs["data"], io.IOBase):
269
+ io_obj_initial_pos = kwargs["data"].tell()
270
+
271
+ session = get_session()
272
+ while True:
273
+ nb_tries += 1
274
+ try:
275
+ # If `data` is used and is a file object (or any IO), set back cursor to
276
+ # initial position.
277
+ if io_obj_initial_pos is not None:
278
+ kwargs["data"].seek(io_obj_initial_pos)
279
+
280
+ # Perform request and return if status_code is not in the retry list.
281
+ response = session.request(method=method, url=url, **kwargs)
282
+ if response.status_code not in retry_on_status_codes:
283
+ return response
284
+
285
+ # Wrong status code returned (HTTP 503 for instance)
286
+ logger.warning(f"HTTP Error {response.status_code} thrown while requesting {method} {url}")
287
+ if nb_tries > max_retries:
288
+ response.raise_for_status() # Will raise uncaught exception
289
+ # We return response to avoid infinite loop in the corner case where the
290
+ # user ask for retry on a status code that doesn't raise_for_status.
291
+ return response
292
+
293
+ except retry_on_exceptions as err:
294
+ logger.warning(f"'{err}' thrown while requesting {method} {url}")
295
+
296
+ if isinstance(err, requests.ConnectionError):
297
+ reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
298
+
299
+ if nb_tries > max_retries:
300
+ raise err
301
+
302
+ # Sleep for X seconds
303
+ logger.warning(f"Retrying in {sleep_time}s [Retry {nb_tries}/{max_retries}].")
304
+ time.sleep(sleep_time)
305
+
306
+ # Update sleep time for next retry
307
+ sleep_time = min(max_wait_time, sleep_time * 2) # Exponential backoff
lib/python3.11/site-packages/huggingface_hub/utils/_pagination.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle pagination on Huggingface Hub."""
16
+ from typing import Dict, Iterable, Optional
17
+
18
+ import requests
19
+
20
+ from . import get_session, hf_raise_for_status, logging
21
+
22
+
23
+ logger = logging.get_logger(__name__)
24
+
25
+
26
+ def paginate(path: str, params: Dict, headers: Dict) -> Iterable:
27
+ """Fetch a list of models/datasets/spaces and paginate through results.
28
+
29
+ This is using the same "Link" header format as GitHub.
30
+ See:
31
+ - https://requests.readthedocs.io/en/latest/api/#requests.Response.links
32
+ - https://docs.github.com/en/rest/guides/traversing-with-pagination#link-header
33
+ """
34
+ session = get_session()
35
+ r = session.get(path, params=params, headers=headers)
36
+ hf_raise_for_status(r)
37
+ yield from r.json()
38
+
39
+ # Follow pages
40
+ # Next link already contains query params
41
+ next_page = _get_next_page(r)
42
+ while next_page is not None:
43
+ logger.debug(f"Pagination detected. Requesting next page: {next_page}")
44
+ r = session.get(next_page, headers=headers)
45
+ hf_raise_for_status(r)
46
+ yield from r.json()
47
+ next_page = _get_next_page(r)
48
+
49
+
50
+ def _get_next_page(response: requests.Response) -> Optional[str]:
51
+ return response.links.get("next", {}).get("url")
lib/python3.11/site-packages/huggingface_hub/utils/_paths.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle paths in Huggingface Hub."""
16
+ from fnmatch import fnmatch
17
+ from pathlib import Path
18
+ from typing import Callable, Generator, Iterable, List, Optional, TypeVar, Union
19
+
20
+
21
+ T = TypeVar("T")
22
+
23
+ IGNORE_GIT_FOLDER_PATTERNS = [".git", ".git/*", "*/.git", "**/.git/**"]
24
+
25
+
26
+ def filter_repo_objects(
27
+ items: Iterable[T],
28
+ *,
29
+ allow_patterns: Optional[Union[List[str], str]] = None,
30
+ ignore_patterns: Optional[Union[List[str], str]] = None,
31
+ key: Optional[Callable[[T], str]] = None,
32
+ ) -> Generator[T, None, None]:
33
+ """Filter repo objects based on an allowlist and a denylist.
34
+
35
+ Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
36
+ In the later case, `key` must be provided and specifies a function of one argument
37
+ that is used to extract a path from each element in iterable.
38
+
39
+ Patterns are Unix shell-style wildcards which are NOT regular expressions. See
40
+ https://docs.python.org/3/library/fnmatch.html for more details.
41
+
42
+ Args:
43
+ items (`Iterable`):
44
+ List of items to filter.
45
+ allow_patterns (`str` or `List[str]`, *optional*):
46
+ Patterns constituting the allowlist. If provided, item paths must match at
47
+ least one pattern from the allowlist.
48
+ ignore_patterns (`str` or `List[str]`, *optional*):
49
+ Patterns constituting the denylist. If provided, item paths must not match
50
+ any patterns from the denylist.
51
+ key (`Callable[[T], str]`, *optional*):
52
+ Single-argument function to extract a path from each item. If not provided,
53
+ the `items` must already be `str` or `Path`.
54
+
55
+ Returns:
56
+ Filtered list of objects, as a generator.
57
+
58
+ Raises:
59
+ :class:`ValueError`:
60
+ If `key` is not provided and items are not `str` or `Path`.
61
+
62
+ Example usage with paths:
63
+ ```python
64
+ >>> # Filter only PDFs that are not hidden.
65
+ >>> list(filter_repo_objects(
66
+ ... ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
67
+ ... allow_patterns=["*.pdf"],
68
+ ... ignore_patterns=[".*"],
69
+ ... ))
70
+ ["aaa.pdf"]
71
+ ```
72
+
73
+ Example usage with objects:
74
+ ```python
75
+ >>> list(filter_repo_objects(
76
+ ... [
77
+ ... CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")
78
+ ... CommitOperationAdd(path_or_fileobj="/tmp/bbb.jpg", path_in_repo="bbb.jpg")
79
+ ... CommitOperationAdd(path_or_fileobj="/tmp/.ccc.pdf", path_in_repo=".ccc.pdf")
80
+ ... CommitOperationAdd(path_or_fileobj="/tmp/.ddd.png", path_in_repo=".ddd.png")
81
+ ... ],
82
+ ... allow_patterns=["*.pdf"],
83
+ ... ignore_patterns=[".*"],
84
+ ... key=lambda x: x.repo_in_path
85
+ ... ))
86
+ [CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")]
87
+ ```
88
+ """
89
+ if isinstance(allow_patterns, str):
90
+ allow_patterns = [allow_patterns]
91
+
92
+ if isinstance(ignore_patterns, str):
93
+ ignore_patterns = [ignore_patterns]
94
+
95
+ if key is None:
96
+
97
+ def _identity(item: T) -> str:
98
+ if isinstance(item, str):
99
+ return item
100
+ if isinstance(item, Path):
101
+ return str(item)
102
+ raise ValueError(f"Please provide `key` argument in `filter_repo_objects`: `{item}` is not a string.")
103
+
104
+ key = _identity # Items must be `str` or `Path`, otherwise raise ValueError
105
+
106
+ for item in items:
107
+ path = key(item)
108
+
109
+ # Skip if there's an allowlist and path doesn't match any
110
+ if allow_patterns is not None and not any(fnmatch(path, r) for r in allow_patterns):
111
+ continue
112
+
113
+ # Skip if there's a denylist and path matches any
114
+ if ignore_patterns is not None and any(fnmatch(path, r) for r in ignore_patterns):
115
+ continue
116
+
117
+ yield item
lib/python3.11/site-packages/huggingface_hub/utils/_runtime.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Check presence of installed packages at runtime."""
16
+ import importlib.metadata
17
+ import platform
18
+ import sys
19
+ import warnings
20
+ from typing import Any, Dict
21
+
22
+ from .. import __version__, constants
23
+
24
+
25
+ _PY_VERSION: str = sys.version.split()[0].rstrip("+")
26
+
27
+ _package_versions = {}
28
+
29
+ _CANDIDATES = {
30
+ "aiohttp": {"aiohttp"},
31
+ "fastai": {"fastai"},
32
+ "fastcore": {"fastcore"},
33
+ "gradio": {"gradio"},
34
+ "graphviz": {"graphviz"},
35
+ "hf_transfer": {"hf_transfer"},
36
+ "jinja": {"Jinja2"},
37
+ "numpy": {"numpy"},
38
+ "pillow": {"Pillow"},
39
+ "pydantic": {"pydantic"},
40
+ "pydot": {"pydot"},
41
+ "tensorboard": {"tensorboardX"},
42
+ "tensorflow": (
43
+ "tensorflow",
44
+ "tensorflow-cpu",
45
+ "tensorflow-gpu",
46
+ "tf-nightly",
47
+ "tf-nightly-cpu",
48
+ "tf-nightly-gpu",
49
+ "intel-tensorflow",
50
+ "intel-tensorflow-avx512",
51
+ "tensorflow-rocm",
52
+ "tensorflow-macos",
53
+ ),
54
+ "torch": {"torch"},
55
+ }
56
+
57
+ # Check once at runtime
58
+ for candidate_name, package_names in _CANDIDATES.items():
59
+ _package_versions[candidate_name] = "N/A"
60
+ for name in package_names:
61
+ try:
62
+ _package_versions[candidate_name] = importlib.metadata.version(name)
63
+ break
64
+ except importlib.metadata.PackageNotFoundError:
65
+ pass
66
+
67
+
68
+ def _get_version(package_name: str) -> str:
69
+ return _package_versions.get(package_name, "N/A")
70
+
71
+
72
+ def _is_available(package_name: str) -> bool:
73
+ return _get_version(package_name) != "N/A"
74
+
75
+
76
+ # Python
77
+ def get_python_version() -> str:
78
+ return _PY_VERSION
79
+
80
+
81
+ # Huggingface Hub
82
+ def get_hf_hub_version() -> str:
83
+ return __version__
84
+
85
+
86
+ # aiohttp
87
+ def is_aiohttp_available() -> bool:
88
+ return _is_available("aiohttp")
89
+
90
+
91
+ def get_aiohttp_version() -> str:
92
+ return _get_version("aiohttp")
93
+
94
+
95
+ # FastAI
96
+ def is_fastai_available() -> bool:
97
+ return _is_available("fastai")
98
+
99
+
100
+ def get_fastai_version() -> str:
101
+ return _get_version("fastai")
102
+
103
+
104
+ # Fastcore
105
+ def is_fastcore_available() -> bool:
106
+ return _is_available("fastcore")
107
+
108
+
109
+ def get_fastcore_version() -> str:
110
+ return _get_version("fastcore")
111
+
112
+
113
+ # FastAI
114
+ def is_gradio_available() -> bool:
115
+ return _is_available("gradio")
116
+
117
+
118
+ def get_gradio_version() -> str:
119
+ return _get_version("gradio")
120
+
121
+
122
+ # Graphviz
123
+ def is_graphviz_available() -> bool:
124
+ return _is_available("graphviz")
125
+
126
+
127
+ def get_graphviz_version() -> str:
128
+ return _get_version("graphviz")
129
+
130
+
131
+ # hf_transfer
132
+ def is_hf_transfer_available() -> bool:
133
+ return _is_available("hf_transfer")
134
+
135
+
136
+ def get_hf_transfer_version() -> str:
137
+ return _get_version("hf_transfer")
138
+
139
+
140
+ # Numpy
141
+ def is_numpy_available() -> bool:
142
+ return _is_available("numpy")
143
+
144
+
145
+ def get_numpy_version() -> str:
146
+ return _get_version("numpy")
147
+
148
+
149
+ # Jinja
150
+ def is_jinja_available() -> bool:
151
+ return _is_available("jinja")
152
+
153
+
154
+ def get_jinja_version() -> str:
155
+ return _get_version("jinja")
156
+
157
+
158
+ # Pillow
159
+ def is_pillow_available() -> bool:
160
+ return _is_available("pillow")
161
+
162
+
163
+ def get_pillow_version() -> str:
164
+ return _get_version("pillow")
165
+
166
+
167
+ # Pydantic
168
+ def is_pydantic_available() -> bool:
169
+ if not _is_available("pydantic"):
170
+ return False
171
+ # For Pydantic, we add an extra check to test whether it is correctly installed or not. If both pydantic 2.x and
172
+ # typing_extensions<=4.5.0 are installed, then pydantic will fail at import time. This should not happen when
173
+ # it is installed with `pip install huggingface_hub[inference]` but it can happen when it is installed manually
174
+ # by the user in an environment that we don't control.
175
+ #
176
+ # Usually we won't need to do this kind of check on optional dependencies. However, pydantic is a special case
177
+ # as it is automatically imported when doing `from huggingface_hub import ...` even if the user doesn't use it.
178
+ #
179
+ # See https://github.com/huggingface/huggingface_hub/pull/1829 for more details.
180
+ try:
181
+ from pydantic import validator # noqa: F401
182
+ except ImportError:
183
+ # Example: "ImportError: cannot import name 'TypeAliasType' from 'typing_extensions'"
184
+ warnings.warn(
185
+ "Pydantic is installed but cannot be imported. Please check your installation. `huggingface_hub` will "
186
+ "default to not using Pydantic. Error message: '{e}'"
187
+ )
188
+ return False
189
+ return True
190
+
191
+
192
+ def get_pydantic_version() -> str:
193
+ return _get_version("pydantic")
194
+
195
+
196
+ # Pydot
197
+ def is_pydot_available() -> bool:
198
+ return _is_available("pydot")
199
+
200
+
201
+ def get_pydot_version() -> str:
202
+ return _get_version("pydot")
203
+
204
+
205
+ # Tensorboard
206
+ def is_tensorboard_available() -> bool:
207
+ return _is_available("tensorboard")
208
+
209
+
210
+ def get_tensorboard_version() -> str:
211
+ return _get_version("tensorboard")
212
+
213
+
214
+ # Tensorflow
215
+ def is_tf_available() -> bool:
216
+ return _is_available("tensorflow")
217
+
218
+
219
+ def get_tf_version() -> str:
220
+ return _get_version("tensorflow")
221
+
222
+
223
+ # Torch
224
+ def is_torch_available() -> bool:
225
+ return _is_available("torch")
226
+
227
+
228
+ def get_torch_version() -> str:
229
+ return _get_version("torch")
230
+
231
+
232
+ # Shell-related helpers
233
+ try:
234
+ # Set to `True` if script is running in a Google Colab notebook.
235
+ # If running in Google Colab, git credential store is set globally which makes the
236
+ # warning disappear. See https://github.com/huggingface/huggingface_hub/issues/1043
237
+ #
238
+ # Taken from https://stackoverflow.com/a/63519730.
239
+ _is_google_colab = "google.colab" in str(get_ipython()) # type: ignore # noqa: F821
240
+ except NameError:
241
+ _is_google_colab = False
242
+
243
+
244
+ def is_notebook() -> bool:
245
+ """Return `True` if code is executed in a notebook (Jupyter, Colab, QTconsole).
246
+
247
+ Taken from https://stackoverflow.com/a/39662359.
248
+ Adapted to make it work with Google colab as well.
249
+ """
250
+ try:
251
+ shell_class = get_ipython().__class__ # type: ignore # noqa: F821
252
+ for parent_class in shell_class.__mro__: # e.g. "is subclass of"
253
+ if parent_class.__name__ == "ZMQInteractiveShell":
254
+ return True # Jupyter notebook, Google colab or qtconsole
255
+ return False
256
+ except NameError:
257
+ return False # Probably standard Python interpreter
258
+
259
+
260
+ def is_google_colab() -> bool:
261
+ """Return `True` if code is executed in a Google colab.
262
+
263
+ Taken from https://stackoverflow.com/a/63519730.
264
+ """
265
+ return _is_google_colab
266
+
267
+
268
+ def dump_environment_info() -> Dict[str, Any]:
269
+ """Dump information about the machine to help debugging issues.
270
+
271
+ Similar helper exist in:
272
+ - `datasets` (https://github.com/huggingface/datasets/blob/main/src/datasets/commands/env.py)
273
+ - `diffusers` (https://github.com/huggingface/diffusers/blob/main/src/diffusers/commands/env.py)
274
+ - `transformers` (https://github.com/huggingface/transformers/blob/main/src/transformers/commands/env.py)
275
+ """
276
+ from huggingface_hub import get_token, whoami
277
+ from huggingface_hub.utils import list_credential_helpers
278
+
279
+ token = get_token()
280
+
281
+ # Generic machine info
282
+ info: Dict[str, Any] = {
283
+ "huggingface_hub version": get_hf_hub_version(),
284
+ "Platform": platform.platform(),
285
+ "Python version": get_python_version(),
286
+ }
287
+
288
+ # Interpreter info
289
+ try:
290
+ shell_class = get_ipython().__class__ # type: ignore # noqa: F821
291
+ info["Running in iPython ?"] = "Yes"
292
+ info["iPython shell"] = shell_class.__name__
293
+ except NameError:
294
+ info["Running in iPython ?"] = "No"
295
+ info["Running in notebook ?"] = "Yes" if is_notebook() else "No"
296
+ info["Running in Google Colab ?"] = "Yes" if is_google_colab() else "No"
297
+
298
+ # Login info
299
+ info["Token path ?"] = constants.HF_TOKEN_PATH
300
+ info["Has saved token ?"] = token is not None
301
+ if token is not None:
302
+ try:
303
+ info["Who am I ?"] = whoami()["name"]
304
+ except Exception:
305
+ pass
306
+
307
+ try:
308
+ info["Configured git credential helpers"] = ", ".join(list_credential_helpers())
309
+ except Exception:
310
+ pass
311
+
312
+ # Installed dependencies
313
+ info["FastAI"] = get_fastai_version()
314
+ info["Tensorflow"] = get_tf_version()
315
+ info["Torch"] = get_torch_version()
316
+ info["Jinja2"] = get_jinja_version()
317
+ info["Graphviz"] = get_graphviz_version()
318
+ info["Pydot"] = get_pydot_version()
319
+ info["Pillow"] = get_pillow_version()
320
+ info["hf_transfer"] = get_hf_transfer_version()
321
+ info["gradio"] = get_gradio_version()
322
+ info["tensorboard"] = get_tensorboard_version()
323
+ info["numpy"] = get_numpy_version()
324
+ info["pydantic"] = get_pydantic_version()
325
+ info["aiohttp"] = get_aiohttp_version()
326
+
327
+ # Environment variables
328
+ info["ENDPOINT"] = constants.ENDPOINT
329
+ info["HF_HUB_CACHE"] = constants.HF_HUB_CACHE
330
+ info["HF_ASSETS_CACHE"] = constants.HF_ASSETS_CACHE
331
+ info["HF_TOKEN_PATH"] = constants.HF_TOKEN_PATH
332
+ info["HF_HUB_OFFLINE"] = constants.HF_HUB_OFFLINE
333
+ info["HF_HUB_DISABLE_TELEMETRY"] = constants.HF_HUB_DISABLE_TELEMETRY
334
+ info["HF_HUB_DISABLE_PROGRESS_BARS"] = constants.HF_HUB_DISABLE_PROGRESS_BARS
335
+ info["HF_HUB_DISABLE_SYMLINKS_WARNING"] = constants.HF_HUB_DISABLE_SYMLINKS_WARNING
336
+ info["HF_HUB_DISABLE_EXPERIMENTAL_WARNING"] = constants.HF_HUB_DISABLE_EXPERIMENTAL_WARNING
337
+ info["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = constants.HF_HUB_DISABLE_IMPLICIT_TOKEN
338
+ info["HF_HUB_ENABLE_HF_TRANSFER"] = constants.HF_HUB_ENABLE_HF_TRANSFER
339
+ info["HF_HUB_ETAG_TIMEOUT"] = constants.HF_HUB_ETAG_TIMEOUT
340
+ info["HF_HUB_DOWNLOAD_TIMEOUT"] = constants.HF_HUB_DOWNLOAD_TIMEOUT
341
+
342
+ print("\nCopy-and-paste the text below in your GitHub issue.\n")
343
+ print("\n".join([f"- {prop}: {val}" for prop, val in info.items()]) + "\n")
344
+ return info
lib/python3.11/site-packages/huggingface_hub/utils/_safetensors.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import operator
3
+ from collections import defaultdict
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, List, Literal, Optional, Tuple
6
+
7
+
8
+ FILENAME_T = str
9
+ TENSOR_NAME_T = str
10
+ DTYPE_T = Literal["F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL"]
11
+
12
+
13
+ class SafetensorsParsingError(Exception):
14
+ """Raised when failing to parse a safetensors file metadata.
15
+
16
+ This can be the case if the file is not a safetensors file or does not respect the specification.
17
+ """
18
+
19
+
20
+ class NotASafetensorsRepoError(Exception):
21
+ """Raised when a repo is not a Safetensors repo i.e. doesn't have either a `model.safetensors` or a
22
+ `model.safetensors.index.json` file.
23
+ """
24
+
25
+
26
+ @dataclass
27
+ class TensorInfo:
28
+ """Information about a tensor.
29
+
30
+ For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
31
+
32
+ Attributes:
33
+ dtype (`str`):
34
+ The data type of the tensor ("F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL").
35
+ shape (`List[int]`):
36
+ The shape of the tensor.
37
+ data_offsets (`Tuple[int, int]`):
38
+ The offsets of the data in the file as a tuple `[BEGIN, END]`.
39
+ parameter_count (`int`):
40
+ The number of parameters in the tensor.
41
+ """
42
+
43
+ dtype: DTYPE_T
44
+ shape: List[int]
45
+ data_offsets: Tuple[int, int]
46
+ parameter_count: int = field(init=False)
47
+
48
+ def __post_init__(self) -> None:
49
+ # Taken from https://stackoverflow.com/a/13840436
50
+ try:
51
+ self.parameter_count = functools.reduce(operator.mul, self.shape)
52
+ except TypeError:
53
+ self.parameter_count = 1 # scalar value has no shape
54
+
55
+
56
+ @dataclass
57
+ class SafetensorsFileMetadata:
58
+ """Metadata for a Safetensors file hosted on the Hub.
59
+
60
+ This class is returned by [`parse_safetensors_file_metadata`].
61
+
62
+ For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
63
+
64
+ Attributes:
65
+ metadata (`Dict`):
66
+ The metadata contained in the file.
67
+ tensors (`Dict[str, TensorInfo]`):
68
+ A map of all tensors. Keys are tensor names and values are information about the corresponding tensor, as a
69
+ [`TensorInfo`] object.
70
+ parameter_count (`Dict[str, int]`):
71
+ A map of the number of parameters per data type. Keys are data types and values are the number of parameters
72
+ of that data type.
73
+ """
74
+
75
+ metadata: Dict[str, str]
76
+ tensors: Dict[TENSOR_NAME_T, TensorInfo]
77
+ parameter_count: Dict[DTYPE_T, int] = field(init=False)
78
+
79
+ def __post_init__(self) -> None:
80
+ parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
81
+ for tensor in self.tensors.values():
82
+ parameter_count[tensor.dtype] += tensor.parameter_count
83
+ self.parameter_count = dict(parameter_count)
84
+
85
+
86
+ @dataclass
87
+ class SafetensorsRepoMetadata:
88
+ """Metadata for a Safetensors repo.
89
+
90
+ A repo is considered to be a Safetensors repo if it contains either a 'model.safetensors' weight file (non-shared
91
+ model) or a 'model.safetensors.index.json' index file (sharded model) at its root.
92
+
93
+ This class is returned by [`get_safetensors_metadata`].
94
+
95
+ For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
96
+
97
+ Attributes:
98
+ metadata (`Dict`, *optional*):
99
+ The metadata contained in the 'model.safetensors.index.json' file, if it exists. Only populated for sharded
100
+ models.
101
+ sharded (`bool`):
102
+ Whether the repo contains a sharded model or not.
103
+ weight_map (`Dict[str, str]`):
104
+ A map of all weights. Keys are tensor names and values are filenames of the files containing the tensors.
105
+ files_metadata (`Dict[str, SafetensorsFileMetadata]`):
106
+ A map of all files metadata. Keys are filenames and values are the metadata of the corresponding file, as
107
+ a [`SafetensorsFileMetadata`] object.
108
+ parameter_count (`Dict[str, int]`):
109
+ A map of the number of parameters per data type. Keys are data types and values are the number of parameters
110
+ of that data type.
111
+ """
112
+
113
+ metadata: Optional[Dict]
114
+ sharded: bool
115
+ weight_map: Dict[TENSOR_NAME_T, FILENAME_T] # tensor name -> filename
116
+ files_metadata: Dict[FILENAME_T, SafetensorsFileMetadata] # filename -> metadata
117
+ parameter_count: Dict[DTYPE_T, int] = field(init=False)
118
+
119
+ def __post_init__(self) -> None:
120
+ parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
121
+ for file_metadata in self.files_metadata.values():
122
+ for dtype, nb_parameters_ in file_metadata.parameter_count.items():
123
+ parameter_count[dtype] += nb_parameters_
124
+ self.parameter_count = dict(parameter_count)
lib/python3.11/site-packages/huggingface_hub/utils/_subprocess.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License
16
+ """Contains utilities to easily handle subprocesses in `huggingface_hub`."""
17
+ import os
18
+ import subprocess
19
+ import sys
20
+ from contextlib import contextmanager
21
+ from io import StringIO
22
+ from pathlib import Path
23
+ from typing import IO, Generator, List, Optional, Tuple, Union
24
+
25
+ from .logging import get_logger
26
+
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ @contextmanager
32
+ def capture_output() -> Generator[StringIO, None, None]:
33
+ """Capture output that is printed to terminal.
34
+
35
+ Taken from https://stackoverflow.com/a/34738440
36
+
37
+ Example:
38
+ ```py
39
+ >>> with capture_output() as output:
40
+ ... print("hello world")
41
+ >>> assert output.getvalue() == "hello world\n"
42
+ ```
43
+ """
44
+ output = StringIO()
45
+ previous_output = sys.stdout
46
+ sys.stdout = output
47
+ yield output
48
+ sys.stdout = previous_output
49
+
50
+
51
+ def run_subprocess(
52
+ command: Union[str, List[str]],
53
+ folder: Optional[Union[str, Path]] = None,
54
+ check=True,
55
+ **kwargs,
56
+ ) -> subprocess.CompletedProcess:
57
+ """
58
+ Method to run subprocesses. Calling this will capture the `stderr` and `stdout`,
59
+ please call `subprocess.run` manually in case you would like for them not to
60
+ be captured.
61
+
62
+ Args:
63
+ command (`str` or `List[str]`):
64
+ The command to execute as a string or list of strings.
65
+ folder (`str`, *optional*):
66
+ The folder in which to run the command. Defaults to current working
67
+ directory (from `os.getcwd()`).
68
+ check (`bool`, *optional*, defaults to `True`):
69
+ Setting `check` to `True` will raise a `subprocess.CalledProcessError`
70
+ when the subprocess has a non-zero exit code.
71
+ kwargs (`Dict[str]`):
72
+ Keyword arguments to be passed to the `subprocess.run` underlying command.
73
+
74
+ Returns:
75
+ `subprocess.CompletedProcess`: The completed process.
76
+ """
77
+ if isinstance(command, str):
78
+ command = command.split()
79
+
80
+ if isinstance(folder, Path):
81
+ folder = str(folder)
82
+
83
+ return subprocess.run(
84
+ command,
85
+ stderr=subprocess.PIPE,
86
+ stdout=subprocess.PIPE,
87
+ check=check,
88
+ encoding="utf-8",
89
+ errors="replace", # if not utf-8, replace char by �
90
+ cwd=folder or os.getcwd(),
91
+ **kwargs,
92
+ )
93
+
94
+
95
+ @contextmanager
96
+ def run_interactive_subprocess(
97
+ command: Union[str, List[str]],
98
+ folder: Optional[Union[str, Path]] = None,
99
+ **kwargs,
100
+ ) -> Generator[Tuple[IO[str], IO[str]], None, None]:
101
+ """Run a subprocess in an interactive mode in a context manager.
102
+
103
+ Args:
104
+ command (`str` or `List[str]`):
105
+ The command to execute as a string or list of strings.
106
+ folder (`str`, *optional*):
107
+ The folder in which to run the command. Defaults to current working
108
+ directory (from `os.getcwd()`).
109
+ kwargs (`Dict[str]`):
110
+ Keyword arguments to be passed to the `subprocess.run` underlying command.
111
+
112
+ Returns:
113
+ `Tuple[IO[str], IO[str]]`: A tuple with `stdin` and `stdout` to interact
114
+ with the process (input and output are utf-8 encoded).
115
+
116
+ Example:
117
+ ```python
118
+ with _interactive_subprocess("git credential-store get") as (stdin, stdout):
119
+ # Write to stdin
120
+ stdin.write("url=hf.co\nusername=obama\n".encode("utf-8"))
121
+ stdin.flush()
122
+
123
+ # Read from stdout
124
+ output = stdout.read().decode("utf-8")
125
+ ```
126
+ """
127
+ if isinstance(command, str):
128
+ command = command.split()
129
+
130
+ with subprocess.Popen(
131
+ command,
132
+ stdin=subprocess.PIPE,
133
+ stdout=subprocess.PIPE,
134
+ stderr=subprocess.STDOUT,
135
+ encoding="utf-8",
136
+ errors="replace", # if not utf-8, replace char by �
137
+ cwd=folder or os.getcwd(),
138
+ **kwargs,
139
+ ) as process:
140
+ assert process.stdin is not None, "subprocess is opened as subprocess.PIPE"
141
+ assert process.stdout is not None, "subprocess is opened as subprocess.PIPE"
142
+ yield process.stdin, process.stdout
lib/python3.11/site-packages/huggingface_hub/utils/_telemetry.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from queue import Queue
2
+ from threading import Lock, Thread
3
+ from typing import Dict, Optional, Union
4
+ from urllib.parse import quote
5
+
6
+ from .. import constants, logging
7
+ from . import build_hf_headers, get_session, hf_raise_for_status
8
+
9
+
10
+ logger = logging.get_logger(__name__)
11
+
12
+ # Telemetry is sent by a separate thread to avoid blocking the main thread.
13
+ # A daemon thread is started once and consume tasks from the _TELEMETRY_QUEUE.
14
+ # If the thread stops for some reason -shouldn't happen-, we restart a new one.
15
+ _TELEMETRY_THREAD: Optional[Thread] = None
16
+ _TELEMETRY_THREAD_LOCK = Lock() # Lock to avoid starting multiple threads in parallel
17
+ _TELEMETRY_QUEUE: Queue = Queue()
18
+
19
+
20
+ def send_telemetry(
21
+ topic: str,
22
+ *,
23
+ library_name: Optional[str] = None,
24
+ library_version: Optional[str] = None,
25
+ user_agent: Union[Dict, str, None] = None,
26
+ ) -> None:
27
+ """
28
+ Sends telemetry that helps tracking usage of different HF libraries.
29
+
30
+ This usage data helps us debug issues and prioritize new features. However, we understand that not everyone wants
31
+ to share additional information, and we respect your privacy. You can disable telemetry collection by setting the
32
+ `HF_HUB_DISABLE_TELEMETRY=1` as environment variable. Telemetry is also disabled in offline mode (i.e. when setting
33
+ `HF_HUB_OFFLINE=1`).
34
+
35
+ Telemetry collection is run in a separate thread to minimize impact for the user.
36
+
37
+ Args:
38
+ topic (`str`):
39
+ Name of the topic that is monitored. The topic is directly used to build the URL. If you want to monitor
40
+ subtopics, just use "/" separation. Examples: "gradio", "transformers/examples",...
41
+ library_name (`str`, *optional*):
42
+ The name of the library that is making the HTTP request. Will be added to the user-agent header.
43
+ library_version (`str`, *optional*):
44
+ The version of the library that is making the HTTP request. Will be added to the user-agent header.
45
+ user_agent (`str`, `dict`, *optional*):
46
+ The user agent info in the form of a dictionary or a single string. It will be completed with information about the installed packages.
47
+
48
+ Example:
49
+ ```py
50
+ >>> from huggingface_hub.utils import send_telemetry
51
+
52
+ # Send telemetry without library information
53
+ >>> send_telemetry("ping")
54
+
55
+ # Send telemetry to subtopic with library information
56
+ >>> send_telemetry("gradio/local_link", library_name="gradio", library_version="3.22.1")
57
+
58
+ # Send telemetry with additional data
59
+ >>> send_telemetry(
60
+ ... topic="examples",
61
+ ... library_name="transformers",
62
+ ... library_version="4.26.0",
63
+ ... user_agent={"pipeline": "text_classification", "framework": "flax"},
64
+ ... )
65
+ ```
66
+ """
67
+ if constants.HF_HUB_OFFLINE or constants.HF_HUB_DISABLE_TELEMETRY:
68
+ return
69
+
70
+ _start_telemetry_thread() # starts thread only if doesn't exist yet
71
+ _TELEMETRY_QUEUE.put(
72
+ {"topic": topic, "library_name": library_name, "library_version": library_version, "user_agent": user_agent}
73
+ )
74
+
75
+
76
+ def _start_telemetry_thread():
77
+ """Start a daemon thread to consume tasks from the telemetry queue.
78
+
79
+ If the thread is interrupted, start a new one.
80
+ """
81
+ with _TELEMETRY_THREAD_LOCK: # avoid to start multiple threads if called concurrently
82
+ global _TELEMETRY_THREAD
83
+ if _TELEMETRY_THREAD is None or not _TELEMETRY_THREAD.is_alive():
84
+ _TELEMETRY_THREAD = Thread(target=_telemetry_worker, daemon=True)
85
+ _TELEMETRY_THREAD.start()
86
+
87
+
88
+ def _telemetry_worker():
89
+ """Wait for a task and consume it."""
90
+ while True:
91
+ kwargs = _TELEMETRY_QUEUE.get()
92
+ _send_telemetry_in_thread(**kwargs)
93
+ _TELEMETRY_QUEUE.task_done()
94
+
95
+
96
+ def _send_telemetry_in_thread(
97
+ topic: str,
98
+ *,
99
+ library_name: Optional[str] = None,
100
+ library_version: Optional[str] = None,
101
+ user_agent: Union[Dict, str, None] = None,
102
+ ) -> None:
103
+ """Contains the actual data sending data to the Hub."""
104
+ path = "/".join(quote(part) for part in topic.split("/") if len(part) > 0)
105
+ try:
106
+ r = get_session().head(
107
+ f"{constants.ENDPOINT}/api/telemetry/{path}",
108
+ headers=build_hf_headers(
109
+ token=False, # no need to send a token for telemetry
110
+ library_name=library_name,
111
+ library_version=library_version,
112
+ user_agent=user_agent,
113
+ ),
114
+ )
115
+ hf_raise_for_status(r)
116
+ except Exception as e:
117
+ # We don't want to error in case of connection errors of any kind.
118
+ logger.debug(f"Error while sending telemetry: {e}")
lib/python3.11/site-packages/huggingface_hub/utils/_token.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2023 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains an helper to get the token from machine (env variable, secret or config file)."""
15
+ import os
16
+ import warnings
17
+ from pathlib import Path
18
+ from threading import Lock
19
+ from typing import Optional
20
+
21
+ from .. import constants
22
+ from ._runtime import is_google_colab
23
+
24
+
25
+ _IS_GOOGLE_COLAB_CHECKED = False
26
+ _GOOGLE_COLAB_SECRET_LOCK = Lock()
27
+ _GOOGLE_COLAB_SECRET: Optional[str] = None
28
+
29
+
30
+ def get_token() -> Optional[str]:
31
+ """
32
+ Get token if user is logged in.
33
+
34
+ Note: in most cases, you should use [`huggingface_hub.utils.build_hf_headers`] instead. This method is only useful
35
+ if you want to retrieve the token for other purposes than sending an HTTP request.
36
+
37
+ Token is retrieved in priority from the `HF_TOKEN` environment variable. Otherwise, we read the token file located
38
+ in the Hugging Face home folder. Returns None if user is not logged in. To log in, use [`login`] or
39
+ `huggingface-cli login`.
40
+
41
+ Returns:
42
+ `str` or `None`: The token, `None` if it doesn't exist.
43
+ """
44
+ return _get_token_from_google_colab() or _get_token_from_environment() or _get_token_from_file()
45
+
46
+
47
+ def _get_token_from_google_colab() -> Optional[str]:
48
+ """Get token from Google Colab secrets vault using `google.colab.userdata.get(...)`.
49
+
50
+ Token is read from the vault only once per session and then stored in a global variable to avoid re-requesting
51
+ access to the vault.
52
+ """
53
+ if not is_google_colab():
54
+ return None
55
+
56
+ # `google.colab.userdata` is not thread-safe
57
+ # This can lead to a deadlock if multiple threads try to access it at the same time
58
+ # (typically when using `snapshot_download`)
59
+ # => use a lock
60
+ # See https://github.com/huggingface/huggingface_hub/issues/1952 for more details.
61
+ with _GOOGLE_COLAB_SECRET_LOCK:
62
+ global _GOOGLE_COLAB_SECRET
63
+ global _IS_GOOGLE_COLAB_CHECKED
64
+
65
+ if _IS_GOOGLE_COLAB_CHECKED: # request access only once
66
+ return _GOOGLE_COLAB_SECRET
67
+
68
+ try:
69
+ from google.colab import userdata
70
+ from google.colab.errors import Error as ColabError
71
+ except ImportError:
72
+ return None
73
+
74
+ try:
75
+ token = userdata.get("HF_TOKEN")
76
+ _GOOGLE_COLAB_SECRET = _clean_token(token)
77
+ except userdata.NotebookAccessError:
78
+ # Means the user has a secret call `HF_TOKEN` and got a popup "please grand access to HF_TOKEN" and refused it
79
+ # => warn user but ignore error => do not re-request access to user
80
+ warnings.warn(
81
+ "\nAccess to the secret `HF_TOKEN` has not been granted on this notebook."
82
+ "\nYou will not be requested again."
83
+ "\nPlease restart the session if you want to be prompted again."
84
+ )
85
+ _GOOGLE_COLAB_SECRET = None
86
+ except userdata.SecretNotFoundError:
87
+ # Means the user did not define a `HF_TOKEN` secret => warn
88
+ warnings.warn(
89
+ "\nThe secret `HF_TOKEN` does not exist in your Colab secrets."
90
+ "\nTo authenticate with the Hugging Face Hub, create a token in your settings tab "
91
+ "(https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session."
92
+ "\nYou will be able to reuse this secret in all of your notebooks."
93
+ "\nPlease note that authentication is recommended but still optional to access public models or datasets."
94
+ )
95
+ _GOOGLE_COLAB_SECRET = None
96
+ except ColabError as e:
97
+ # Something happen but we don't know what => recommend to open a GitHub issue
98
+ warnings.warn(
99
+ f"\nError while fetching `HF_TOKEN` secret value from your vault: '{str(e)}'."
100
+ "\nYou are not authenticated with the Hugging Face Hub in this notebook."
101
+ "\nIf the error persists, please let us know by opening an issue on GitHub "
102
+ "(https://github.com/huggingface/huggingface_hub/issues/new)."
103
+ )
104
+ _GOOGLE_COLAB_SECRET = None
105
+
106
+ _IS_GOOGLE_COLAB_CHECKED = True
107
+ return _GOOGLE_COLAB_SECRET
108
+
109
+
110
+ def _get_token_from_environment() -> Optional[str]:
111
+ # `HF_TOKEN` has priority (keep `HUGGING_FACE_HUB_TOKEN` for backward compatibility)
112
+ return _clean_token(os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN"))
113
+
114
+
115
+ def _get_token_from_file() -> Optional[str]:
116
+ try:
117
+ return _clean_token(Path(constants.HF_TOKEN_PATH).read_text())
118
+ except FileNotFoundError:
119
+ return None
120
+
121
+
122
+ def _clean_token(token: Optional[str]) -> Optional[str]:
123
+ """Clean token by removing trailing and leading spaces and newlines.
124
+
125
+ If token is an empty string, return None.
126
+ """
127
+ if token is None:
128
+ return None
129
+ return token.replace("\r", "").replace("\n", "").strip() or None
lib/python3.11/site-packages/huggingface_hub/utils/_typing.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Handle typing imports based on system compatibility."""
16
+ from typing import Callable, Literal, TypeVar
17
+
18
+
19
+ HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
20
+
21
+ # type hint meaning "function signature not changed by decorator"
22
+ CallableT = TypeVar("CallableT", bound=Callable)