87245978eac49d491b540e2a86047c183ef44b5025e4ace6bf1f58653aed56a8
Browse files- lib/python3.11/site-packages/huggingface_hub/templates/datasetcard_template.md +143 -0
- lib/python3.11/site-packages/huggingface_hub/templates/modelcard_template.md +203 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__init__.py +108 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_assets.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_manager.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_chunk_utils.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_datetime.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_deprecation.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_errors.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_experimental.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_fixes.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_git_credential.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_headers.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_hf_folder.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_http.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_pagination.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_paths.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_runtime.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_safetensors.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_subprocess.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_telemetry.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_token.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_typing.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_validators.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/endpoint_helpers.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/insecure_hashlib.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/logging.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/sha.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/tqdm.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_cache_assets.py +135 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_cache_manager.py +806 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_chunk_utils.py +64 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_datetime.py +68 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py +136 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_errors.py +359 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_experimental.py +65 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_fixes.py +77 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_git_credential.py +120 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_headers.py +234 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_hf_folder.py +114 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_http.py +307 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_pagination.py +51 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_paths.py +117 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_runtime.py +344 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_safetensors.py +124 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_subprocess.py +142 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_telemetry.py +118 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_token.py +129 -0
- lib/python3.11/site-packages/huggingface_hub/utils/_typing.py +22 -0
lib/python3.11/site-packages/huggingface_hub/templates/datasetcard_template.md
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
# For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
|
3 |
+
# Doc / guide: https://huggingface.co/docs/hub/datasets-cards
|
4 |
+
{{ card_data }}
|
5 |
+
---
|
6 |
+
|
7 |
+
# Dataset Card for {{ pretty_name | default("Dataset Name", true) }}
|
8 |
+
|
9 |
+
<!-- Provide a quick summary of the dataset. -->
|
10 |
+
|
11 |
+
{{ dataset_summary | default("", true) }}
|
12 |
+
|
13 |
+
## Dataset Details
|
14 |
+
|
15 |
+
### Dataset Description
|
16 |
+
|
17 |
+
<!-- Provide a longer summary of what this dataset is. -->
|
18 |
+
|
19 |
+
{{ dataset_description | default("", true) }}
|
20 |
+
|
21 |
+
- **Curated by:** {{ curators | default("[More Information Needed]", true)}}
|
22 |
+
- **Funded by [optional]:** {{ funded_by | default("[More Information Needed]", true)}}
|
23 |
+
- **Shared by [optional]:** {{ shared_by | default("[More Information Needed]", true)}}
|
24 |
+
- **Language(s) (NLP):** {{ language | default("[More Information Needed]", true)}}
|
25 |
+
- **License:** {{ license | default("[More Information Needed]", true)}}
|
26 |
+
|
27 |
+
### Dataset Sources [optional]
|
28 |
+
|
29 |
+
<!-- Provide the basic links for the dataset. -->
|
30 |
+
|
31 |
+
- **Repository:** {{ repo | default("[More Information Needed]", true)}}
|
32 |
+
- **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}}
|
33 |
+
- **Demo [optional]:** {{ demo | default("[More Information Needed]", true)}}
|
34 |
+
|
35 |
+
## Uses
|
36 |
+
|
37 |
+
<!-- Address questions around how the dataset is intended to be used. -->
|
38 |
+
|
39 |
+
### Direct Use
|
40 |
+
|
41 |
+
<!-- This section describes suitable use cases for the dataset. -->
|
42 |
+
|
43 |
+
{{ direct_use | default("[More Information Needed]", true)}}
|
44 |
+
|
45 |
+
### Out-of-Scope Use
|
46 |
+
|
47 |
+
<!-- This section addresses misuse, malicious use, and uses that the dataset will not work well for. -->
|
48 |
+
|
49 |
+
{{ out_of_scope_use | default("[More Information Needed]", true)}}
|
50 |
+
|
51 |
+
## Dataset Structure
|
52 |
+
|
53 |
+
<!-- This section provides a description of the dataset fields, and additional information about the dataset structure such as criteria used to create the splits, relationships between data points, etc. -->
|
54 |
+
|
55 |
+
{{ dataset_structure | default("[More Information Needed]", true)}}
|
56 |
+
|
57 |
+
## Dataset Creation
|
58 |
+
|
59 |
+
### Curation Rationale
|
60 |
+
|
61 |
+
<!-- Motivation for the creation of this dataset. -->
|
62 |
+
|
63 |
+
{{ curation_rationale_section | default("[More Information Needed]", true)}}
|
64 |
+
|
65 |
+
### Source Data
|
66 |
+
|
67 |
+
<!-- This section describes the source data (e.g. news text and headlines, social media posts, translated sentences, ...). -->
|
68 |
+
|
69 |
+
#### Data Collection and Processing
|
70 |
+
|
71 |
+
<!-- This section describes the data collection and processing process such as data selection criteria, filtering and normalization methods, tools and libraries used, etc. -->
|
72 |
+
|
73 |
+
{{ data_collection_and_processing_section | default("[More Information Needed]", true)}}
|
74 |
+
|
75 |
+
#### Who are the source data producers?
|
76 |
+
|
77 |
+
<!-- This section describes the people or systems who originally created the data. It should also include self-reported demographic or identity information for the source data creators if this information is available. -->
|
78 |
+
|
79 |
+
{{ source_data_producers_section | default("[More Information Needed]", true)}}
|
80 |
+
|
81 |
+
### Annotations [optional]
|
82 |
+
|
83 |
+
<!-- If the dataset contains annotations which are not part of the initial data collection, use this section to describe them. -->
|
84 |
+
|
85 |
+
#### Annotation process
|
86 |
+
|
87 |
+
<!-- This section describes the annotation process such as annotation tools used in the process, the amount of data annotated, annotation guidelines provided to the annotators, interannotator statistics, annotation validation, etc. -->
|
88 |
+
|
89 |
+
{{ annotation_process_section | default("[More Information Needed]", true)}}
|
90 |
+
|
91 |
+
#### Who are the annotators?
|
92 |
+
|
93 |
+
<!-- This section describes the people or systems who created the annotations. -->
|
94 |
+
|
95 |
+
{{ who_are_annotators_section | default("[More Information Needed]", true)}}
|
96 |
+
|
97 |
+
#### Personal and Sensitive Information
|
98 |
+
|
99 |
+
<!-- State whether the dataset contains data that might be considered personal, sensitive, or private (e.g., data that reveals addresses, uniquely identifiable names or aliases, racial or ethnic origins, sexual orientations, religious beliefs, political opinions, financial or health data, etc.). If efforts were made to anonymize the data, describe the anonymization process. -->
|
100 |
+
|
101 |
+
{{ personal_and_sensitive_information | default("[More Information Needed]", true)}}
|
102 |
+
|
103 |
+
## Bias, Risks, and Limitations
|
104 |
+
|
105 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
106 |
+
|
107 |
+
{{ bias_risks_limitations | default("[More Information Needed]", true)}}
|
108 |
+
|
109 |
+
### Recommendations
|
110 |
+
|
111 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
112 |
+
|
113 |
+
{{ bias_recommendations | default("Users should be made aware of the risks, biases and limitations of the dataset. More information needed for further recommendations.", true)}}
|
114 |
+
|
115 |
+
## Citation [optional]
|
116 |
+
|
117 |
+
<!-- If there is a paper or blog post introducing the dataset, the APA and Bibtex information for that should go in this section. -->
|
118 |
+
|
119 |
+
**BibTeX:**
|
120 |
+
|
121 |
+
{{ citation_bibtex | default("[More Information Needed]", true)}}
|
122 |
+
|
123 |
+
**APA:**
|
124 |
+
|
125 |
+
{{ citation_apa | default("[More Information Needed]", true)}}
|
126 |
+
|
127 |
+
## Glossary [optional]
|
128 |
+
|
129 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the dataset or dataset card. -->
|
130 |
+
|
131 |
+
{{ glossary | default("[More Information Needed]", true)}}
|
132 |
+
|
133 |
+
## More Information [optional]
|
134 |
+
|
135 |
+
{{ more_information | default("[More Information Needed]", true)}}
|
136 |
+
|
137 |
+
## Dataset Card Authors [optional]
|
138 |
+
|
139 |
+
{{ dataset_card_authors | default("[More Information Needed]", true)}}
|
140 |
+
|
141 |
+
## Dataset Card Contact
|
142 |
+
|
143 |
+
{{ dataset_card_contact | default("[More Information Needed]", true)}}
|
lib/python3.11/site-packages/huggingface_hub/templates/modelcard_template.md
ADDED
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
3 |
+
# Doc / guide: https://huggingface.co/docs/hub/model-cards
|
4 |
+
{{ card_data }}
|
5 |
+
---
|
6 |
+
|
7 |
+
# Model Card for {{ model_id | default("Model ID", true) }}
|
8 |
+
|
9 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
10 |
+
|
11 |
+
{{ model_summary | default("", true) }}
|
12 |
+
|
13 |
+
## Model Details
|
14 |
+
|
15 |
+
### Model Description
|
16 |
+
|
17 |
+
<!-- Provide a longer summary of what this model is. -->
|
18 |
+
|
19 |
+
{{ model_description | default("", true) }}
|
20 |
+
|
21 |
+
- **Developed by:** {{ developers | default("[More Information Needed]", true)}}
|
22 |
+
- **Funded by [optional]:** {{ funded_by | default("[More Information Needed]", true)}}
|
23 |
+
- **Shared by [optional]:** {{ shared_by | default("[More Information Needed]", true)}}
|
24 |
+
- **Model type:** {{ model_type | default("[More Information Needed]", true)}}
|
25 |
+
- **Language(s) (NLP):** {{ language | default("[More Information Needed]", true)}}
|
26 |
+
- **License:** {{ license | default("[More Information Needed]", true)}}
|
27 |
+
- **Finetuned from model [optional]:** {{ base_model | default("[More Information Needed]", true)}}
|
28 |
+
|
29 |
+
### Model Sources [optional]
|
30 |
+
|
31 |
+
<!-- Provide the basic links for the model. -->
|
32 |
+
|
33 |
+
- **Repository:** {{ repo | default("[More Information Needed]", true)}}
|
34 |
+
- **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}}
|
35 |
+
- **Demo [optional]:** {{ demo | default("[More Information Needed]", true)}}
|
36 |
+
|
37 |
+
## Uses
|
38 |
+
|
39 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
40 |
+
|
41 |
+
### Direct Use
|
42 |
+
|
43 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
44 |
+
|
45 |
+
{{ direct_use | default("[More Information Needed]", true)}}
|
46 |
+
|
47 |
+
### Downstream Use [optional]
|
48 |
+
|
49 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
50 |
+
|
51 |
+
{{ downstream_use | default("[More Information Needed]", true)}}
|
52 |
+
|
53 |
+
### Out-of-Scope Use
|
54 |
+
|
55 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
56 |
+
|
57 |
+
{{ out_of_scope_use | default("[More Information Needed]", true)}}
|
58 |
+
|
59 |
+
## Bias, Risks, and Limitations
|
60 |
+
|
61 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
62 |
+
|
63 |
+
{{ bias_risks_limitations | default("[More Information Needed]", true)}}
|
64 |
+
|
65 |
+
### Recommendations
|
66 |
+
|
67 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
68 |
+
|
69 |
+
{{ bias_recommendations | default("Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.", true)}}
|
70 |
+
|
71 |
+
## How to Get Started with the Model
|
72 |
+
|
73 |
+
Use the code below to get started with the model.
|
74 |
+
|
75 |
+
{{ get_started_code | default("[More Information Needed]", true)}}
|
76 |
+
|
77 |
+
## Training Details
|
78 |
+
|
79 |
+
### Training Data
|
80 |
+
|
81 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
82 |
+
|
83 |
+
{{ training_data | default("[More Information Needed]", true)}}
|
84 |
+
|
85 |
+
### Training Procedure
|
86 |
+
|
87 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
88 |
+
|
89 |
+
#### Preprocessing [optional]
|
90 |
+
|
91 |
+
{{ preprocessing | default("[More Information Needed]", true)}}
|
92 |
+
|
93 |
+
|
94 |
+
#### Training Hyperparameters
|
95 |
+
|
96 |
+
- **Training regime:** {{ training_regime | default("[More Information Needed]", true)}} <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
97 |
+
|
98 |
+
#### Speeds, Sizes, Times [optional]
|
99 |
+
|
100 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
101 |
+
|
102 |
+
{{ speeds_sizes_times | default("[More Information Needed]", true)}}
|
103 |
+
|
104 |
+
## Evaluation
|
105 |
+
|
106 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
107 |
+
|
108 |
+
### Testing Data, Factors & Metrics
|
109 |
+
|
110 |
+
#### Testing Data
|
111 |
+
|
112 |
+
<!-- This should link to a Dataset Card if possible. -->
|
113 |
+
|
114 |
+
{{ testing_data | default("[More Information Needed]", true)}}
|
115 |
+
|
116 |
+
#### Factors
|
117 |
+
|
118 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
119 |
+
|
120 |
+
{{ testing_factors | default("[More Information Needed]", true)}}
|
121 |
+
|
122 |
+
#### Metrics
|
123 |
+
|
124 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
125 |
+
|
126 |
+
{{ testing_metrics | default("[More Information Needed]", true)}}
|
127 |
+
|
128 |
+
### Results
|
129 |
+
|
130 |
+
{{ results | default("[More Information Needed]", true)}}
|
131 |
+
|
132 |
+
#### Summary
|
133 |
+
|
134 |
+
{{ results_summary | default("", true) }}
|
135 |
+
|
136 |
+
## Model Examination [optional]
|
137 |
+
|
138 |
+
<!-- Relevant interpretability work for the model goes here -->
|
139 |
+
|
140 |
+
{{ model_examination | default("[More Information Needed]", true)}}
|
141 |
+
|
142 |
+
## Environmental Impact
|
143 |
+
|
144 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
145 |
+
|
146 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
147 |
+
|
148 |
+
- **Hardware Type:** {{ hardware_type | default("[More Information Needed]", true)}}
|
149 |
+
- **Hours used:** {{ hours_used | default("[More Information Needed]", true)}}
|
150 |
+
- **Cloud Provider:** {{ cloud_provider | default("[More Information Needed]", true)}}
|
151 |
+
- **Compute Region:** {{ cloud_region | default("[More Information Needed]", true)}}
|
152 |
+
- **Carbon Emitted:** {{ co2_emitted | default("[More Information Needed]", true)}}
|
153 |
+
|
154 |
+
## Technical Specifications [optional]
|
155 |
+
|
156 |
+
### Model Architecture and Objective
|
157 |
+
|
158 |
+
{{ model_specs | default("[More Information Needed]", true)}}
|
159 |
+
|
160 |
+
### Compute Infrastructure
|
161 |
+
|
162 |
+
{{ compute_infrastructure | default("[More Information Needed]", true)}}
|
163 |
+
|
164 |
+
#### Hardware
|
165 |
+
|
166 |
+
{{ hardware_requirements | default("[More Information Needed]", true)}}
|
167 |
+
|
168 |
+
#### Software
|
169 |
+
|
170 |
+
{{ software | default("[More Information Needed]", true)}}
|
171 |
+
|
172 |
+
## Citation [optional]
|
173 |
+
|
174 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
175 |
+
|
176 |
+
**BibTeX:**
|
177 |
+
|
178 |
+
{{ citation_bibtex | default("[More Information Needed]", true)}}
|
179 |
+
|
180 |
+
**APA:**
|
181 |
+
|
182 |
+
{{ citation_apa | default("[More Information Needed]", true)}}
|
183 |
+
|
184 |
+
## Glossary [optional]
|
185 |
+
|
186 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
187 |
+
|
188 |
+
{{ glossary | default("[More Information Needed]", true)}}
|
189 |
+
|
190 |
+
## More Information [optional]
|
191 |
+
|
192 |
+
{{ more_information | default("[More Information Needed]", true)}}
|
193 |
+
|
194 |
+
## Model Card Authors [optional]
|
195 |
+
|
196 |
+
{{ model_card_authors | default("[More Information Needed]", true)}}
|
197 |
+
|
198 |
+
## Model Card Contact
|
199 |
+
|
200 |
+
{{ model_card_contact | default("[More Information Needed]", true)}}
|
201 |
+
|
202 |
+
|
203 |
+
|
lib/python3.11/site-packages/huggingface_hub/utils/__init__.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# flake8: noqa
|
2 |
+
#!/usr/bin/env python
|
3 |
+
# coding=utf-8
|
4 |
+
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
5 |
+
#
|
6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7 |
+
# you may not use this file except in compliance with the License.
|
8 |
+
# You may obtain a copy of the License at
|
9 |
+
#
|
10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11 |
+
#
|
12 |
+
# Unless required by applicable law or agreed to in writing, software
|
13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15 |
+
# See the License for the specific language governing permissions and
|
16 |
+
# limitations under the License
|
17 |
+
|
18 |
+
from . import tqdm as _tqdm # _tqdm is the module
|
19 |
+
from ._cache_assets import cached_assets_path
|
20 |
+
from ._cache_manager import (
|
21 |
+
CachedFileInfo,
|
22 |
+
CachedRepoInfo,
|
23 |
+
CachedRevisionInfo,
|
24 |
+
CacheNotFound,
|
25 |
+
CorruptedCacheException,
|
26 |
+
DeleteCacheStrategy,
|
27 |
+
HFCacheInfo,
|
28 |
+
scan_cache_dir,
|
29 |
+
)
|
30 |
+
from ._chunk_utils import chunk_iterable
|
31 |
+
from ._datetime import parse_datetime
|
32 |
+
from ._errors import (
|
33 |
+
BadRequestError,
|
34 |
+
EntryNotFoundError,
|
35 |
+
FileMetadataError,
|
36 |
+
GatedRepoError,
|
37 |
+
HfHubHTTPError,
|
38 |
+
LocalEntryNotFoundError,
|
39 |
+
RepositoryNotFoundError,
|
40 |
+
RevisionNotFoundError,
|
41 |
+
hf_raise_for_status,
|
42 |
+
)
|
43 |
+
from ._token import get_token
|
44 |
+
from ._fixes import SoftTemporaryDirectory, yaml_dump
|
45 |
+
from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
|
46 |
+
from ._headers import build_hf_headers, get_token_to_send, LocalTokenNotFoundError
|
47 |
+
from ._hf_folder import HfFolder
|
48 |
+
from ._http import configure_http_backend, get_session, http_backoff, reset_sessions, OfflineModeIsEnabled
|
49 |
+
from ._pagination import paginate
|
50 |
+
from ._paths import filter_repo_objects, IGNORE_GIT_FOLDER_PATTERNS
|
51 |
+
from ._experimental import experimental
|
52 |
+
from ._runtime import (
|
53 |
+
dump_environment_info,
|
54 |
+
get_aiohttp_version,
|
55 |
+
get_fastai_version,
|
56 |
+
get_fastcore_version,
|
57 |
+
get_gradio_version,
|
58 |
+
get_graphviz_version,
|
59 |
+
get_hf_hub_version,
|
60 |
+
get_hf_transfer_version,
|
61 |
+
get_jinja_version,
|
62 |
+
get_numpy_version,
|
63 |
+
get_pillow_version,
|
64 |
+
get_pydantic_version,
|
65 |
+
get_pydot_version,
|
66 |
+
get_python_version,
|
67 |
+
get_tensorboard_version,
|
68 |
+
get_tf_version,
|
69 |
+
get_torch_version,
|
70 |
+
is_aiohttp_available,
|
71 |
+
is_fastai_available,
|
72 |
+
is_fastcore_available,
|
73 |
+
is_numpy_available,
|
74 |
+
is_google_colab,
|
75 |
+
is_gradio_available,
|
76 |
+
is_graphviz_available,
|
77 |
+
is_hf_transfer_available,
|
78 |
+
is_jinja_available,
|
79 |
+
is_notebook,
|
80 |
+
is_pillow_available,
|
81 |
+
is_pydantic_available,
|
82 |
+
is_pydot_available,
|
83 |
+
is_tensorboard_available,
|
84 |
+
is_tf_available,
|
85 |
+
is_torch_available,
|
86 |
+
)
|
87 |
+
from ._safetensors import (
|
88 |
+
SafetensorsFileMetadata,
|
89 |
+
SafetensorsRepoMetadata,
|
90 |
+
TensorInfo,
|
91 |
+
SafetensorsParsingError,
|
92 |
+
NotASafetensorsRepoError,
|
93 |
+
)
|
94 |
+
from ._subprocess import capture_output, run_interactive_subprocess, run_subprocess
|
95 |
+
from ._validators import (
|
96 |
+
HFValidationError,
|
97 |
+
smoothly_deprecate_use_auth_token,
|
98 |
+
validate_hf_hub_args,
|
99 |
+
validate_repo_id,
|
100 |
+
)
|
101 |
+
from .tqdm import (
|
102 |
+
are_progress_bars_disabled,
|
103 |
+
disable_progress_bars,
|
104 |
+
enable_progress_bars,
|
105 |
+
tqdm,
|
106 |
+
tqdm_stream_file,
|
107 |
+
)
|
108 |
+
from ._telemetry import send_telemetry
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (4.4 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_assets.cpython-311.pyc
ADDED
Binary file (5.81 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_cache_manager.cpython-311.pyc
ADDED
Binary file (35.4 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_chunk_utils.cpython-311.pyc
ADDED
Binary file (2.27 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_datetime.cpython-311.pyc
ADDED
Binary file (2.44 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_deprecation.cpython-311.pyc
ADDED
Binary file (7.49 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_errors.cpython-311.pyc
ADDED
Binary file (15.9 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_experimental.cpython-311.pyc
ADDED
Binary file (2.45 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_fixes.cpython-311.pyc
ADDED
Binary file (3.03 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_git_credential.cpython-311.pyc
ADDED
Binary file (5.73 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_headers.cpython-311.pyc
ADDED
Binary file (10.4 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_hf_folder.cpython-311.pyc
ADDED
Binary file (5.06 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_http.cpython-311.pyc
ADDED
Binary file (14.8 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_pagination.cpython-311.pyc
ADDED
Binary file (2.3 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_paths.cpython-311.pyc
ADDED
Binary file (5.04 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_runtime.cpython-311.pyc
ADDED
Binary file (13.4 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_safetensors.cpython-311.pyc
ADDED
Binary file (7.38 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_subprocess.cpython-311.pyc
ADDED
Binary file (5.44 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_telemetry.cpython-311.pyc
ADDED
Binary file (6.15 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_token.cpython-311.pyc
ADDED
Binary file (5.88 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_typing.cpython-311.pyc
ADDED
Binary file (574 Bytes). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/_validators.cpython-311.pyc
ADDED
Binary file (9.81 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/endpoint_helpers.cpython-311.pyc
ADDED
Binary file (9.87 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/insecure_hashlib.cpython-311.pyc
ADDED
Binary file (665 Bytes). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/logging.cpython-311.pyc
ADDED
Binary file (6.57 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/sha.cpython-311.pyc
ADDED
Binary file (1.44 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/__pycache__/tqdm.cpython-311.pyc
ADDED
Binary file (7.3 kB). View file
|
|
lib/python3.11/site-packages/huggingface_hub/utils/_cache_assets.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2019-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
from pathlib import Path
|
16 |
+
from typing import Union
|
17 |
+
|
18 |
+
from ..constants import HF_ASSETS_CACHE
|
19 |
+
|
20 |
+
|
21 |
+
def cached_assets_path(
|
22 |
+
library_name: str,
|
23 |
+
namespace: str = "default",
|
24 |
+
subfolder: str = "default",
|
25 |
+
*,
|
26 |
+
assets_dir: Union[str, Path, None] = None,
|
27 |
+
):
|
28 |
+
"""Return a folder path to cache arbitrary files.
|
29 |
+
|
30 |
+
`huggingface_hub` provides a canonical folder path to store assets. This is the
|
31 |
+
recommended way to integrate cache in a downstream library as it will benefit from
|
32 |
+
the builtins tools to scan and delete the cache properly.
|
33 |
+
|
34 |
+
The distinction is made between files cached from the Hub and assets. Files from the
|
35 |
+
Hub are cached in a git-aware manner and entirely managed by `huggingface_hub`. See
|
36 |
+
[related documentation](https://huggingface.co/docs/huggingface_hub/how-to-cache).
|
37 |
+
All other files that a downstream library caches are considered to be "assets"
|
38 |
+
(files downloaded from external sources, extracted from a .tar archive, preprocessed
|
39 |
+
for training,...).
|
40 |
+
|
41 |
+
Once the folder path is generated, it is guaranteed to exist and to be a directory.
|
42 |
+
The path is based on 3 levels of depth: the library name, a namespace and a
|
43 |
+
subfolder. Those 3 levels grants flexibility while allowing `huggingface_hub` to
|
44 |
+
expect folders when scanning/deleting parts of the assets cache. Within a library,
|
45 |
+
it is expected that all namespaces share the same subset of subfolder names but this
|
46 |
+
is not a mandatory rule. The downstream library has then full control on which file
|
47 |
+
structure to adopt within its cache. Namespace and subfolder are optional (would
|
48 |
+
default to a `"default/"` subfolder) but library name is mandatory as we want every
|
49 |
+
downstream library to manage its own cache.
|
50 |
+
|
51 |
+
Expected tree:
|
52 |
+
```text
|
53 |
+
assets/
|
54 |
+
└── datasets/
|
55 |
+
│ ├── SQuAD/
|
56 |
+
│ │ ├── downloaded/
|
57 |
+
│ │ ├── extracted/
|
58 |
+
│ │ └── processed/
|
59 |
+
│ ├── Helsinki-NLP--tatoeba_mt/
|
60 |
+
│ ├── downloaded/
|
61 |
+
│ ├── extracted/
|
62 |
+
│ └── processed/
|
63 |
+
└── transformers/
|
64 |
+
├── default/
|
65 |
+
│ ├── something/
|
66 |
+
├── bert-base-cased/
|
67 |
+
│ ├── default/
|
68 |
+
│ └── training/
|
69 |
+
hub/
|
70 |
+
└── models--julien-c--EsperBERTo-small/
|
71 |
+
├── blobs/
|
72 |
+
│ ├── (...)
|
73 |
+
│ ├── (...)
|
74 |
+
├── refs/
|
75 |
+
│ └── (...)
|
76 |
+
└── [ 128] snapshots/
|
77 |
+
├── 2439f60ef33a0d46d85da5001d52aeda5b00ce9f/
|
78 |
+
│ ├── (...)
|
79 |
+
└── bbc77c8132af1cc5cf678da3f1ddf2de43606d48/
|
80 |
+
└── (...)
|
81 |
+
```
|
82 |
+
|
83 |
+
|
84 |
+
Args:
|
85 |
+
library_name (`str`):
|
86 |
+
Name of the library that will manage the cache folder. Example: `"dataset"`.
|
87 |
+
namespace (`str`, *optional*, defaults to "default"):
|
88 |
+
Namespace to which the data belongs. Example: `"SQuAD"`.
|
89 |
+
subfolder (`str`, *optional*, defaults to "default"):
|
90 |
+
Subfolder in which the data will be stored. Example: `extracted`.
|
91 |
+
assets_dir (`str`, `Path`, *optional*):
|
92 |
+
Path to the folder where assets are cached. This must not be the same folder
|
93 |
+
where Hub files are cached. Defaults to `HF_HOME / "assets"` if not provided.
|
94 |
+
Can also be set with `HF_ASSETS_CACHE` environment variable.
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
Path to the cache folder (`Path`).
|
98 |
+
|
99 |
+
Example:
|
100 |
+
```py
|
101 |
+
>>> from huggingface_hub import cached_assets_path
|
102 |
+
|
103 |
+
>>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="download")
|
104 |
+
PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/download')
|
105 |
+
|
106 |
+
>>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="extracted")
|
107 |
+
PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/extracted')
|
108 |
+
|
109 |
+
>>> cached_assets_path(library_name="datasets", namespace="Helsinki-NLP/tatoeba_mt")
|
110 |
+
PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/Helsinki-NLP--tatoeba_mt/default')
|
111 |
+
|
112 |
+
>>> cached_assets_path(library_name="datasets", assets_dir="/tmp/tmp123456")
|
113 |
+
PosixPath('/tmp/tmp123456/datasets/default/default')
|
114 |
+
```
|
115 |
+
"""
|
116 |
+
# Resolve assets_dir
|
117 |
+
if assets_dir is None:
|
118 |
+
assets_dir = HF_ASSETS_CACHE
|
119 |
+
assets_dir = Path(assets_dir).expanduser().resolve()
|
120 |
+
|
121 |
+
# Avoid names that could create path issues
|
122 |
+
for part in (" ", "/", "\\"):
|
123 |
+
library_name = library_name.replace(part, "--")
|
124 |
+
namespace = namespace.replace(part, "--")
|
125 |
+
subfolder = subfolder.replace(part, "--")
|
126 |
+
|
127 |
+
# Path to subfolder is created
|
128 |
+
path = assets_dir / library_name / namespace / subfolder
|
129 |
+
try:
|
130 |
+
path.mkdir(exist_ok=True, parents=True)
|
131 |
+
except (FileExistsError, NotADirectoryError):
|
132 |
+
raise ValueError(f"Corrupted assets folder: cannot create directory because of an existing file ({path}).")
|
133 |
+
|
134 |
+
# Return
|
135 |
+
return path
|
lib/python3.11/site-packages/huggingface_hub/utils/_cache_manager.py
ADDED
@@ -0,0 +1,806 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to manage the HF cache directory."""
|
16 |
+
import os
|
17 |
+
import shutil
|
18 |
+
import time
|
19 |
+
from collections import defaultdict
|
20 |
+
from dataclasses import dataclass
|
21 |
+
from pathlib import Path
|
22 |
+
from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
|
23 |
+
|
24 |
+
from ..constants import HF_HUB_CACHE
|
25 |
+
from . import logging
|
26 |
+
|
27 |
+
|
28 |
+
logger = logging.get_logger(__name__)
|
29 |
+
|
30 |
+
REPO_TYPE_T = Literal["model", "dataset", "space"]
|
31 |
+
|
32 |
+
|
33 |
+
class CacheNotFound(Exception):
|
34 |
+
"""Exception thrown when the Huggingface cache is not found."""
|
35 |
+
|
36 |
+
cache_dir: Union[str, Path]
|
37 |
+
|
38 |
+
def __init__(self, msg: str, cache_dir: Union[str, Path], *args, **kwargs):
|
39 |
+
super().__init__(msg, *args, **kwargs)
|
40 |
+
self.cache_dir = cache_dir
|
41 |
+
|
42 |
+
|
43 |
+
class CorruptedCacheException(Exception):
|
44 |
+
"""Exception for any unexpected structure in the Huggingface cache-system."""
|
45 |
+
|
46 |
+
|
47 |
+
@dataclass(frozen=True)
|
48 |
+
class CachedFileInfo:
|
49 |
+
"""Frozen data structure holding information about a single cached file.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
file_name (`str`):
|
53 |
+
Name of the file. Example: `config.json`.
|
54 |
+
file_path (`Path`):
|
55 |
+
Path of the file in the `snapshots` directory. The file path is a symlink
|
56 |
+
referring to a blob in the `blobs` folder.
|
57 |
+
blob_path (`Path`):
|
58 |
+
Path of the blob file. This is equivalent to `file_path.resolve()`.
|
59 |
+
size_on_disk (`int`):
|
60 |
+
Size of the blob file in bytes.
|
61 |
+
blob_last_accessed (`float`):
|
62 |
+
Timestamp of the last time the blob file has been accessed (from any
|
63 |
+
revision).
|
64 |
+
blob_last_modified (`float`):
|
65 |
+
Timestamp of the last time the blob file has been modified/created.
|
66 |
+
|
67 |
+
<Tip warning={true}>
|
68 |
+
|
69 |
+
`blob_last_accessed` and `blob_last_modified` reliability can depend on the OS you
|
70 |
+
are using. See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
|
71 |
+
for more details.
|
72 |
+
|
73 |
+
</Tip>
|
74 |
+
"""
|
75 |
+
|
76 |
+
file_name: str
|
77 |
+
file_path: Path
|
78 |
+
blob_path: Path
|
79 |
+
size_on_disk: int
|
80 |
+
|
81 |
+
blob_last_accessed: float
|
82 |
+
blob_last_modified: float
|
83 |
+
|
84 |
+
@property
|
85 |
+
def blob_last_accessed_str(self) -> str:
|
86 |
+
"""
|
87 |
+
(property) Timestamp of the last time the blob file has been accessed (from any
|
88 |
+
revision), returned as a human-readable string.
|
89 |
+
|
90 |
+
Example: "2 weeks ago".
|
91 |
+
"""
|
92 |
+
return _format_timesince(self.blob_last_accessed)
|
93 |
+
|
94 |
+
@property
|
95 |
+
def blob_last_modified_str(self) -> str:
|
96 |
+
"""
|
97 |
+
(property) Timestamp of the last time the blob file has been modified, returned
|
98 |
+
as a human-readable string.
|
99 |
+
|
100 |
+
Example: "2 weeks ago".
|
101 |
+
"""
|
102 |
+
return _format_timesince(self.blob_last_modified)
|
103 |
+
|
104 |
+
@property
|
105 |
+
def size_on_disk_str(self) -> str:
|
106 |
+
"""
|
107 |
+
(property) Size of the blob file as a human-readable string.
|
108 |
+
|
109 |
+
Example: "42.2K".
|
110 |
+
"""
|
111 |
+
return _format_size(self.size_on_disk)
|
112 |
+
|
113 |
+
|
114 |
+
@dataclass(frozen=True)
|
115 |
+
class CachedRevisionInfo:
|
116 |
+
"""Frozen data structure holding information about a revision.
|
117 |
+
|
118 |
+
A revision correspond to a folder in the `snapshots` folder and is populated with
|
119 |
+
the exact tree structure as the repo on the Hub but contains only symlinks. A
|
120 |
+
revision can be either referenced by 1 or more `refs` or be "detached" (no refs).
|
121 |
+
|
122 |
+
Args:
|
123 |
+
commit_hash (`str`):
|
124 |
+
Hash of the revision (unique).
|
125 |
+
Example: `"9338f7b671827df886678df2bdd7cc7b4f36dffd"`.
|
126 |
+
snapshot_path (`Path`):
|
127 |
+
Path to the revision directory in the `snapshots` folder. It contains the
|
128 |
+
exact tree structure as the repo on the Hub.
|
129 |
+
files: (`FrozenSet[CachedFileInfo]`):
|
130 |
+
Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
|
131 |
+
refs (`FrozenSet[str]`):
|
132 |
+
Set of `refs` pointing to this revision. If the revision has no `refs`, it
|
133 |
+
is considered detached.
|
134 |
+
Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
|
135 |
+
size_on_disk (`int`):
|
136 |
+
Sum of the blob file sizes that are symlink-ed by the revision.
|
137 |
+
last_modified (`float`):
|
138 |
+
Timestamp of the last time the revision has been created/modified.
|
139 |
+
|
140 |
+
<Tip warning={true}>
|
141 |
+
|
142 |
+
`last_accessed` cannot be determined correctly on a single revision as blob files
|
143 |
+
are shared across revisions.
|
144 |
+
|
145 |
+
</Tip>
|
146 |
+
|
147 |
+
<Tip warning={true}>
|
148 |
+
|
149 |
+
`size_on_disk` is not necessarily the sum of all file sizes because of possible
|
150 |
+
duplicated files. Besides, only blobs are taken into account, not the (negligible)
|
151 |
+
size of folders and symlinks.
|
152 |
+
|
153 |
+
</Tip>
|
154 |
+
"""
|
155 |
+
|
156 |
+
commit_hash: str
|
157 |
+
snapshot_path: Path
|
158 |
+
size_on_disk: int
|
159 |
+
files: FrozenSet[CachedFileInfo]
|
160 |
+
refs: FrozenSet[str]
|
161 |
+
|
162 |
+
last_modified: float
|
163 |
+
|
164 |
+
@property
|
165 |
+
def last_modified_str(self) -> str:
|
166 |
+
"""
|
167 |
+
(property) Timestamp of the last time the revision has been modified, returned
|
168 |
+
as a human-readable string.
|
169 |
+
|
170 |
+
Example: "2 weeks ago".
|
171 |
+
"""
|
172 |
+
return _format_timesince(self.last_modified)
|
173 |
+
|
174 |
+
@property
|
175 |
+
def size_on_disk_str(self) -> str:
|
176 |
+
"""
|
177 |
+
(property) Sum of the blob file sizes as a human-readable string.
|
178 |
+
|
179 |
+
Example: "42.2K".
|
180 |
+
"""
|
181 |
+
return _format_size(self.size_on_disk)
|
182 |
+
|
183 |
+
@property
|
184 |
+
def nb_files(self) -> int:
|
185 |
+
"""
|
186 |
+
(property) Total number of files in the revision.
|
187 |
+
"""
|
188 |
+
return len(self.files)
|
189 |
+
|
190 |
+
|
191 |
+
@dataclass(frozen=True)
|
192 |
+
class CachedRepoInfo:
|
193 |
+
"""Frozen data structure holding information about a cached repository.
|
194 |
+
|
195 |
+
Args:
|
196 |
+
repo_id (`str`):
|
197 |
+
Repo id of the repo on the Hub. Example: `"google/fleurs"`.
|
198 |
+
repo_type (`Literal["dataset", "model", "space"]`):
|
199 |
+
Type of the cached repo.
|
200 |
+
repo_path (`Path`):
|
201 |
+
Local path to the cached repo.
|
202 |
+
size_on_disk (`int`):
|
203 |
+
Sum of the blob file sizes in the cached repo.
|
204 |
+
nb_files (`int`):
|
205 |
+
Total number of blob files in the cached repo.
|
206 |
+
revisions (`FrozenSet[CachedRevisionInfo]`):
|
207 |
+
Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
|
208 |
+
last_accessed (`float`):
|
209 |
+
Timestamp of the last time a blob file of the repo has been accessed.
|
210 |
+
last_modified (`float`):
|
211 |
+
Timestamp of the last time a blob file of the repo has been modified/created.
|
212 |
+
|
213 |
+
<Tip warning={true}>
|
214 |
+
|
215 |
+
`size_on_disk` is not necessarily the sum of all revisions sizes because of
|
216 |
+
duplicated files. Besides, only blobs are taken into account, not the (negligible)
|
217 |
+
size of folders and symlinks.
|
218 |
+
|
219 |
+
</Tip>
|
220 |
+
|
221 |
+
<Tip warning={true}>
|
222 |
+
|
223 |
+
`last_accessed` and `last_modified` reliability can depend on the OS you are using.
|
224 |
+
See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
|
225 |
+
for more details.
|
226 |
+
|
227 |
+
</Tip>
|
228 |
+
"""
|
229 |
+
|
230 |
+
repo_id: str
|
231 |
+
repo_type: REPO_TYPE_T
|
232 |
+
repo_path: Path
|
233 |
+
size_on_disk: int
|
234 |
+
nb_files: int
|
235 |
+
revisions: FrozenSet[CachedRevisionInfo]
|
236 |
+
|
237 |
+
last_accessed: float
|
238 |
+
last_modified: float
|
239 |
+
|
240 |
+
@property
|
241 |
+
def last_accessed_str(self) -> str:
|
242 |
+
"""
|
243 |
+
(property) Last time a blob file of the repo has been accessed, returned as a
|
244 |
+
human-readable string.
|
245 |
+
|
246 |
+
Example: "2 weeks ago".
|
247 |
+
"""
|
248 |
+
return _format_timesince(self.last_accessed)
|
249 |
+
|
250 |
+
@property
|
251 |
+
def last_modified_str(self) -> str:
|
252 |
+
"""
|
253 |
+
(property) Last time a blob file of the repo has been modified, returned as a
|
254 |
+
human-readable string.
|
255 |
+
|
256 |
+
Example: "2 weeks ago".
|
257 |
+
"""
|
258 |
+
return _format_timesince(self.last_modified)
|
259 |
+
|
260 |
+
@property
|
261 |
+
def size_on_disk_str(self) -> str:
|
262 |
+
"""
|
263 |
+
(property) Sum of the blob file sizes as a human-readable string.
|
264 |
+
|
265 |
+
Example: "42.2K".
|
266 |
+
"""
|
267 |
+
return _format_size(self.size_on_disk)
|
268 |
+
|
269 |
+
@property
|
270 |
+
def refs(self) -> Dict[str, CachedRevisionInfo]:
|
271 |
+
"""
|
272 |
+
(property) Mapping between `refs` and revision data structures.
|
273 |
+
"""
|
274 |
+
return {ref: revision for revision in self.revisions for ref in revision.refs}
|
275 |
+
|
276 |
+
|
277 |
+
@dataclass(frozen=True)
|
278 |
+
class DeleteCacheStrategy:
|
279 |
+
"""Frozen data structure holding the strategy to delete cached revisions.
|
280 |
+
|
281 |
+
This object is not meant to be instantiated programmatically but to be returned by
|
282 |
+
[`~utils.HFCacheInfo.delete_revisions`]. See documentation for usage example.
|
283 |
+
|
284 |
+
Args:
|
285 |
+
expected_freed_size (`float`):
|
286 |
+
Expected freed size once strategy is executed.
|
287 |
+
blobs (`FrozenSet[Path]`):
|
288 |
+
Set of blob file paths to be deleted.
|
289 |
+
refs (`FrozenSet[Path]`):
|
290 |
+
Set of reference file paths to be deleted.
|
291 |
+
repos (`FrozenSet[Path]`):
|
292 |
+
Set of entire repo paths to be deleted.
|
293 |
+
snapshots (`FrozenSet[Path]`):
|
294 |
+
Set of snapshots to be deleted (directory of symlinks).
|
295 |
+
"""
|
296 |
+
|
297 |
+
expected_freed_size: int
|
298 |
+
blobs: FrozenSet[Path]
|
299 |
+
refs: FrozenSet[Path]
|
300 |
+
repos: FrozenSet[Path]
|
301 |
+
snapshots: FrozenSet[Path]
|
302 |
+
|
303 |
+
@property
|
304 |
+
def expected_freed_size_str(self) -> str:
|
305 |
+
"""
|
306 |
+
(property) Expected size that will be freed as a human-readable string.
|
307 |
+
|
308 |
+
Example: "42.2K".
|
309 |
+
"""
|
310 |
+
return _format_size(self.expected_freed_size)
|
311 |
+
|
312 |
+
def execute(self) -> None:
|
313 |
+
"""Execute the defined strategy.
|
314 |
+
|
315 |
+
<Tip warning={true}>
|
316 |
+
|
317 |
+
If this method is interrupted, the cache might get corrupted. Deletion order is
|
318 |
+
implemented so that references and symlinks are deleted before the actual blob
|
319 |
+
files.
|
320 |
+
|
321 |
+
</Tip>
|
322 |
+
|
323 |
+
<Tip warning={true}>
|
324 |
+
|
325 |
+
This method is irreversible. If executed, cached files are erased and must be
|
326 |
+
downloaded again.
|
327 |
+
|
328 |
+
</Tip>
|
329 |
+
"""
|
330 |
+
# Deletion order matters. Blobs are deleted in last so that the user can't end
|
331 |
+
# up in a state where a `ref`` refers to a missing snapshot or a snapshot
|
332 |
+
# symlink refers to a deleted blob.
|
333 |
+
|
334 |
+
# Delete entire repos
|
335 |
+
for path in self.repos:
|
336 |
+
_try_delete_path(path, path_type="repo")
|
337 |
+
|
338 |
+
# Delete snapshot directories
|
339 |
+
for path in self.snapshots:
|
340 |
+
_try_delete_path(path, path_type="snapshot")
|
341 |
+
|
342 |
+
# Delete refs files
|
343 |
+
for path in self.refs:
|
344 |
+
_try_delete_path(path, path_type="ref")
|
345 |
+
|
346 |
+
# Delete blob files
|
347 |
+
for path in self.blobs:
|
348 |
+
_try_delete_path(path, path_type="blob")
|
349 |
+
|
350 |
+
logger.info(f"Cache deletion done. Saved {self.expected_freed_size_str}.")
|
351 |
+
|
352 |
+
|
353 |
+
@dataclass(frozen=True)
|
354 |
+
class HFCacheInfo:
|
355 |
+
"""Frozen data structure holding information about the entire cache-system.
|
356 |
+
|
357 |
+
This data structure is returned by [`scan_cache_dir`] and is immutable.
|
358 |
+
|
359 |
+
Args:
|
360 |
+
size_on_disk (`int`):
|
361 |
+
Sum of all valid repo sizes in the cache-system.
|
362 |
+
repos (`FrozenSet[CachedRepoInfo]`):
|
363 |
+
Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
|
364 |
+
cache-system while scanning.
|
365 |
+
warnings (`List[CorruptedCacheException]`):
|
366 |
+
List of [`~CorruptedCacheException`] that occurred while scanning the cache.
|
367 |
+
Those exceptions are captured so that the scan can continue. Corrupted repos
|
368 |
+
are skipped from the scan.
|
369 |
+
|
370 |
+
<Tip warning={true}>
|
371 |
+
|
372 |
+
Here `size_on_disk` is equal to the sum of all repo sizes (only blobs). However if
|
373 |
+
some cached repos are corrupted, their sizes are not taken into account.
|
374 |
+
|
375 |
+
</Tip>
|
376 |
+
"""
|
377 |
+
|
378 |
+
size_on_disk: int
|
379 |
+
repos: FrozenSet[CachedRepoInfo]
|
380 |
+
warnings: List[CorruptedCacheException]
|
381 |
+
|
382 |
+
@property
|
383 |
+
def size_on_disk_str(self) -> str:
|
384 |
+
"""
|
385 |
+
(property) Sum of all valid repo sizes in the cache-system as a human-readable
|
386 |
+
string.
|
387 |
+
|
388 |
+
Example: "42.2K".
|
389 |
+
"""
|
390 |
+
return _format_size(self.size_on_disk)
|
391 |
+
|
392 |
+
def delete_revisions(self, *revisions: str) -> DeleteCacheStrategy:
|
393 |
+
"""Prepare the strategy to delete one or more revisions cached locally.
|
394 |
+
|
395 |
+
Input revisions can be any revision hash. If a revision hash is not found in the
|
396 |
+
local cache, a warning is thrown but no error is raised. Revisions can be from
|
397 |
+
different cached repos since hashes are unique across repos,
|
398 |
+
|
399 |
+
Examples:
|
400 |
+
```py
|
401 |
+
>>> from huggingface_hub import scan_cache_dir
|
402 |
+
>>> cache_info = scan_cache_dir()
|
403 |
+
>>> delete_strategy = cache_info.delete_revisions(
|
404 |
+
... "81fd1d6e7847c99f5862c9fb81387956d99ec7aa"
|
405 |
+
... )
|
406 |
+
>>> print(f"Will free {delete_strategy.expected_freed_size_str}.")
|
407 |
+
Will free 7.9K.
|
408 |
+
>>> delete_strategy.execute()
|
409 |
+
Cache deletion done. Saved 7.9K.
|
410 |
+
```
|
411 |
+
|
412 |
+
```py
|
413 |
+
>>> from huggingface_hub import scan_cache_dir
|
414 |
+
>>> scan_cache_dir().delete_revisions(
|
415 |
+
... "81fd1d6e7847c99f5862c9fb81387956d99ec7aa",
|
416 |
+
... "e2983b237dccf3ab4937c97fa717319a9ca1a96d",
|
417 |
+
... "6c0e6080953db56375760c0471a8c5f2929baf11",
|
418 |
+
... ).execute()
|
419 |
+
Cache deletion done. Saved 8.6G.
|
420 |
+
```
|
421 |
+
|
422 |
+
<Tip warning={true}>
|
423 |
+
|
424 |
+
`delete_revisions` returns a [`~utils.DeleteCacheStrategy`] object that needs to
|
425 |
+
be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
|
426 |
+
allows having a dry run before actually executing the deletion.
|
427 |
+
|
428 |
+
</Tip>
|
429 |
+
"""
|
430 |
+
hashes_to_delete: Set[str] = set(revisions)
|
431 |
+
|
432 |
+
repos_with_revisions: Dict[CachedRepoInfo, Set[CachedRevisionInfo]] = defaultdict(set)
|
433 |
+
|
434 |
+
for repo in self.repos:
|
435 |
+
for revision in repo.revisions:
|
436 |
+
if revision.commit_hash in hashes_to_delete:
|
437 |
+
repos_with_revisions[repo].add(revision)
|
438 |
+
hashes_to_delete.remove(revision.commit_hash)
|
439 |
+
|
440 |
+
if len(hashes_to_delete) > 0:
|
441 |
+
logger.warning(f"Revision(s) not found - cannot delete them: {', '.join(hashes_to_delete)}")
|
442 |
+
|
443 |
+
delete_strategy_blobs: Set[Path] = set()
|
444 |
+
delete_strategy_refs: Set[Path] = set()
|
445 |
+
delete_strategy_repos: Set[Path] = set()
|
446 |
+
delete_strategy_snapshots: Set[Path] = set()
|
447 |
+
delete_strategy_expected_freed_size = 0
|
448 |
+
|
449 |
+
for affected_repo, revisions_to_delete in repos_with_revisions.items():
|
450 |
+
other_revisions = affected_repo.revisions - revisions_to_delete
|
451 |
+
|
452 |
+
# If no other revisions, it means all revisions are deleted
|
453 |
+
# -> delete the entire cached repo
|
454 |
+
if len(other_revisions) == 0:
|
455 |
+
delete_strategy_repos.add(affected_repo.repo_path)
|
456 |
+
delete_strategy_expected_freed_size += affected_repo.size_on_disk
|
457 |
+
continue
|
458 |
+
|
459 |
+
# Some revisions of the repo will be deleted but not all. We need to filter
|
460 |
+
# which blob files will not be linked anymore.
|
461 |
+
for revision_to_delete in revisions_to_delete:
|
462 |
+
# Snapshot dir
|
463 |
+
delete_strategy_snapshots.add(revision_to_delete.snapshot_path)
|
464 |
+
|
465 |
+
# Refs dir
|
466 |
+
for ref in revision_to_delete.refs:
|
467 |
+
delete_strategy_refs.add(affected_repo.repo_path / "refs" / ref)
|
468 |
+
|
469 |
+
# Blobs dir
|
470 |
+
for file in revision_to_delete.files:
|
471 |
+
if file.blob_path not in delete_strategy_blobs:
|
472 |
+
is_file_alone = True
|
473 |
+
for revision in other_revisions:
|
474 |
+
for rev_file in revision.files:
|
475 |
+
if file.blob_path == rev_file.blob_path:
|
476 |
+
is_file_alone = False
|
477 |
+
break
|
478 |
+
if not is_file_alone:
|
479 |
+
break
|
480 |
+
|
481 |
+
# Blob file not referenced by remaining revisions -> delete
|
482 |
+
if is_file_alone:
|
483 |
+
delete_strategy_blobs.add(file.blob_path)
|
484 |
+
delete_strategy_expected_freed_size += file.size_on_disk
|
485 |
+
|
486 |
+
# Return the strategy instead of executing it.
|
487 |
+
return DeleteCacheStrategy(
|
488 |
+
blobs=frozenset(delete_strategy_blobs),
|
489 |
+
refs=frozenset(delete_strategy_refs),
|
490 |
+
repos=frozenset(delete_strategy_repos),
|
491 |
+
snapshots=frozenset(delete_strategy_snapshots),
|
492 |
+
expected_freed_size=delete_strategy_expected_freed_size,
|
493 |
+
)
|
494 |
+
|
495 |
+
|
496 |
+
def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
497 |
+
"""Scan the entire HF cache-system and return a [`~HFCacheInfo`] structure.
|
498 |
+
|
499 |
+
Use `scan_cache_dir` in order to programmatically scan your cache-system. The cache
|
500 |
+
will be scanned repo by repo. If a repo is corrupted, a [`~CorruptedCacheException`]
|
501 |
+
will be thrown internally but captured and returned in the [`~HFCacheInfo`]
|
502 |
+
structure. Only valid repos get a proper report.
|
503 |
+
|
504 |
+
```py
|
505 |
+
>>> from huggingface_hub import scan_cache_dir
|
506 |
+
|
507 |
+
>>> hf_cache_info = scan_cache_dir()
|
508 |
+
HFCacheInfo(
|
509 |
+
size_on_disk=3398085269,
|
510 |
+
repos=frozenset({
|
511 |
+
CachedRepoInfo(
|
512 |
+
repo_id='t5-small',
|
513 |
+
repo_type='model',
|
514 |
+
repo_path=PosixPath(...),
|
515 |
+
size_on_disk=970726914,
|
516 |
+
nb_files=11,
|
517 |
+
revisions=frozenset({
|
518 |
+
CachedRevisionInfo(
|
519 |
+
commit_hash='d78aea13fa7ecd06c29e3e46195d6341255065d5',
|
520 |
+
size_on_disk=970726339,
|
521 |
+
snapshot_path=PosixPath(...),
|
522 |
+
files=frozenset({
|
523 |
+
CachedFileInfo(
|
524 |
+
file_name='config.json',
|
525 |
+
size_on_disk=1197
|
526 |
+
file_path=PosixPath(...),
|
527 |
+
blob_path=PosixPath(...),
|
528 |
+
),
|
529 |
+
CachedFileInfo(...),
|
530 |
+
...
|
531 |
+
}),
|
532 |
+
),
|
533 |
+
CachedRevisionInfo(...),
|
534 |
+
...
|
535 |
+
}),
|
536 |
+
),
|
537 |
+
CachedRepoInfo(...),
|
538 |
+
...
|
539 |
+
}),
|
540 |
+
warnings=[
|
541 |
+
CorruptedCacheException("Snapshots dir doesn't exist in cached repo: ..."),
|
542 |
+
CorruptedCacheException(...),
|
543 |
+
...
|
544 |
+
],
|
545 |
+
)
|
546 |
+
```
|
547 |
+
|
548 |
+
You can also print a detailed report directly from the `huggingface-cli` using:
|
549 |
+
```text
|
550 |
+
> huggingface-cli scan-cache
|
551 |
+
REPO ID REPO TYPE SIZE ON DISK NB FILES REFS LOCAL PATH
|
552 |
+
--------------------------- --------- ------------ -------- ------------------- -------------------------------------------------------------------------
|
553 |
+
glue dataset 116.3K 15 1.17.0, main, 2.4.0 /Users/lucain/.cache/huggingface/hub/datasets--glue
|
554 |
+
google/fleurs dataset 64.9M 6 main, refs/pr/1 /Users/lucain/.cache/huggingface/hub/datasets--google--fleurs
|
555 |
+
Jean-Baptiste/camembert-ner model 441.0M 7 main /Users/lucain/.cache/huggingface/hub/models--Jean-Baptiste--camembert-ner
|
556 |
+
bert-base-cased model 1.9G 13 main /Users/lucain/.cache/huggingface/hub/models--bert-base-cased
|
557 |
+
t5-base model 10.1K 3 main /Users/lucain/.cache/huggingface/hub/models--t5-base
|
558 |
+
t5-small model 970.7M 11 refs/pr/1, main /Users/lucain/.cache/huggingface/hub/models--t5-small
|
559 |
+
|
560 |
+
Done in 0.0s. Scanned 6 repo(s) for a total of 3.4G.
|
561 |
+
Got 1 warning(s) while scanning. Use -vvv to print details.
|
562 |
+
```
|
563 |
+
|
564 |
+
Args:
|
565 |
+
cache_dir (`str` or `Path`, `optional`):
|
566 |
+
Cache directory to cache. Defaults to the default HF cache directory.
|
567 |
+
|
568 |
+
<Tip warning={true}>
|
569 |
+
|
570 |
+
Raises:
|
571 |
+
|
572 |
+
`CacheNotFound`
|
573 |
+
If the cache directory does not exist.
|
574 |
+
|
575 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
576 |
+
If the cache directory is a file, instead of a directory.
|
577 |
+
|
578 |
+
</Tip>
|
579 |
+
|
580 |
+
Returns: a [`~HFCacheInfo`] object.
|
581 |
+
"""
|
582 |
+
if cache_dir is None:
|
583 |
+
cache_dir = HF_HUB_CACHE
|
584 |
+
|
585 |
+
cache_dir = Path(cache_dir).expanduser().resolve()
|
586 |
+
if not cache_dir.exists():
|
587 |
+
raise CacheNotFound(
|
588 |
+
f"Cache directory not found: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable.",
|
589 |
+
cache_dir=cache_dir,
|
590 |
+
)
|
591 |
+
|
592 |
+
if cache_dir.is_file():
|
593 |
+
raise ValueError(
|
594 |
+
f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
|
595 |
+
)
|
596 |
+
|
597 |
+
repos: Set[CachedRepoInfo] = set()
|
598 |
+
warnings: List[CorruptedCacheException] = []
|
599 |
+
for repo_path in cache_dir.iterdir():
|
600 |
+
if repo_path.name == ".locks": # skip './.locks/' folder
|
601 |
+
continue
|
602 |
+
try:
|
603 |
+
repos.add(_scan_cached_repo(repo_path))
|
604 |
+
except CorruptedCacheException as e:
|
605 |
+
warnings.append(e)
|
606 |
+
|
607 |
+
return HFCacheInfo(
|
608 |
+
repos=frozenset(repos),
|
609 |
+
size_on_disk=sum(repo.size_on_disk for repo in repos),
|
610 |
+
warnings=warnings,
|
611 |
+
)
|
612 |
+
|
613 |
+
|
614 |
+
def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
615 |
+
"""Scan a single cache repo and return information about it.
|
616 |
+
|
617 |
+
Any unexpected behavior will raise a [`~CorruptedCacheException`].
|
618 |
+
"""
|
619 |
+
if not repo_path.is_dir():
|
620 |
+
raise CorruptedCacheException(f"Repo path is not a directory: {repo_path}")
|
621 |
+
|
622 |
+
if "--" not in repo_path.name:
|
623 |
+
raise CorruptedCacheException(f"Repo path is not a valid HuggingFace cache directory: {repo_path}")
|
624 |
+
|
625 |
+
repo_type, repo_id = repo_path.name.split("--", maxsplit=1)
|
626 |
+
repo_type = repo_type[:-1] # "models" -> "model"
|
627 |
+
repo_id = repo_id.replace("--", "/") # google/fleurs -> "google/fleurs"
|
628 |
+
|
629 |
+
if repo_type not in {"dataset", "model", "space"}:
|
630 |
+
raise CorruptedCacheException(
|
631 |
+
f"Repo type must be `dataset`, `model` or `space`, found `{repo_type}` ({repo_path})."
|
632 |
+
)
|
633 |
+
|
634 |
+
blob_stats: Dict[Path, os.stat_result] = {} # Key is blob_path, value is blob stats
|
635 |
+
|
636 |
+
snapshots_path = repo_path / "snapshots"
|
637 |
+
refs_path = repo_path / "refs"
|
638 |
+
|
639 |
+
if not snapshots_path.exists() or not snapshots_path.is_dir():
|
640 |
+
raise CorruptedCacheException(f"Snapshots dir doesn't exist in cached repo: {snapshots_path}")
|
641 |
+
|
642 |
+
# Scan over `refs` directory
|
643 |
+
|
644 |
+
# key is revision hash, value is set of refs
|
645 |
+
refs_by_hash: Dict[str, Set[str]] = defaultdict(set)
|
646 |
+
if refs_path.exists():
|
647 |
+
# Example of `refs` directory
|
648 |
+
# ── refs
|
649 |
+
# ├── main
|
650 |
+
# └── refs
|
651 |
+
# └── pr
|
652 |
+
# └── 1
|
653 |
+
if refs_path.is_file():
|
654 |
+
raise CorruptedCacheException(f"Refs directory cannot be a file: {refs_path}")
|
655 |
+
|
656 |
+
for ref_path in refs_path.glob("**/*"):
|
657 |
+
# glob("**/*") iterates over all files and directories -> skip directories
|
658 |
+
if ref_path.is_dir():
|
659 |
+
continue
|
660 |
+
|
661 |
+
ref_name = str(ref_path.relative_to(refs_path))
|
662 |
+
with ref_path.open() as f:
|
663 |
+
commit_hash = f.read()
|
664 |
+
|
665 |
+
refs_by_hash[commit_hash].add(ref_name)
|
666 |
+
|
667 |
+
# Scan snapshots directory
|
668 |
+
cached_revisions: Set[CachedRevisionInfo] = set()
|
669 |
+
for revision_path in snapshots_path.iterdir():
|
670 |
+
if revision_path.is_file():
|
671 |
+
raise CorruptedCacheException(f"Snapshots folder corrupted. Found a file: {revision_path}")
|
672 |
+
|
673 |
+
cached_files = set()
|
674 |
+
for file_path in revision_path.glob("**/*"):
|
675 |
+
# glob("**/*") iterates over all files and directories -> skip directories
|
676 |
+
if file_path.is_dir():
|
677 |
+
continue
|
678 |
+
|
679 |
+
blob_path = Path(file_path).resolve()
|
680 |
+
if not blob_path.exists():
|
681 |
+
raise CorruptedCacheException(f"Blob missing (broken symlink): {blob_path}")
|
682 |
+
|
683 |
+
if blob_path not in blob_stats:
|
684 |
+
blob_stats[blob_path] = blob_path.stat()
|
685 |
+
|
686 |
+
cached_files.add(
|
687 |
+
CachedFileInfo(
|
688 |
+
file_name=file_path.name,
|
689 |
+
file_path=file_path,
|
690 |
+
size_on_disk=blob_stats[blob_path].st_size,
|
691 |
+
blob_path=blob_path,
|
692 |
+
blob_last_accessed=blob_stats[blob_path].st_atime,
|
693 |
+
blob_last_modified=blob_stats[blob_path].st_mtime,
|
694 |
+
)
|
695 |
+
)
|
696 |
+
|
697 |
+
# Last modified is either the last modified blob file or the revision folder
|
698 |
+
# itself if it is empty
|
699 |
+
if len(cached_files) > 0:
|
700 |
+
revision_last_modified = max(blob_stats[file.blob_path].st_mtime for file in cached_files)
|
701 |
+
else:
|
702 |
+
revision_last_modified = revision_path.stat().st_mtime
|
703 |
+
|
704 |
+
cached_revisions.add(
|
705 |
+
CachedRevisionInfo(
|
706 |
+
commit_hash=revision_path.name,
|
707 |
+
files=frozenset(cached_files),
|
708 |
+
refs=frozenset(refs_by_hash.pop(revision_path.name, set())),
|
709 |
+
size_on_disk=sum(
|
710 |
+
blob_stats[blob_path].st_size for blob_path in set(file.blob_path for file in cached_files)
|
711 |
+
),
|
712 |
+
snapshot_path=revision_path,
|
713 |
+
last_modified=revision_last_modified,
|
714 |
+
)
|
715 |
+
)
|
716 |
+
|
717 |
+
# Check that all refs referred to an existing revision
|
718 |
+
if len(refs_by_hash) > 0:
|
719 |
+
raise CorruptedCacheException(
|
720 |
+
f"Reference(s) refer to missing commit hashes: {dict(refs_by_hash)} ({repo_path})."
|
721 |
+
)
|
722 |
+
|
723 |
+
# Last modified is either the last modified blob file or the repo folder itself if
|
724 |
+
# no blob files has been found. Same for last accessed.
|
725 |
+
if len(blob_stats) > 0:
|
726 |
+
repo_last_accessed = max(stat.st_atime for stat in blob_stats.values())
|
727 |
+
repo_last_modified = max(stat.st_mtime for stat in blob_stats.values())
|
728 |
+
else:
|
729 |
+
repo_stats = repo_path.stat()
|
730 |
+
repo_last_accessed = repo_stats.st_atime
|
731 |
+
repo_last_modified = repo_stats.st_mtime
|
732 |
+
|
733 |
+
# Build and return frozen structure
|
734 |
+
return CachedRepoInfo(
|
735 |
+
nb_files=len(blob_stats),
|
736 |
+
repo_id=repo_id,
|
737 |
+
repo_path=repo_path,
|
738 |
+
repo_type=repo_type, # type: ignore
|
739 |
+
revisions=frozenset(cached_revisions),
|
740 |
+
size_on_disk=sum(stat.st_size for stat in blob_stats.values()),
|
741 |
+
last_accessed=repo_last_accessed,
|
742 |
+
last_modified=repo_last_modified,
|
743 |
+
)
|
744 |
+
|
745 |
+
|
746 |
+
def _format_size(num: int) -> str:
|
747 |
+
"""Format size in bytes into a human-readable string.
|
748 |
+
|
749 |
+
Taken from https://stackoverflow.com/a/1094933
|
750 |
+
"""
|
751 |
+
num_f = float(num)
|
752 |
+
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
|
753 |
+
if abs(num_f) < 1000.0:
|
754 |
+
return f"{num_f:3.1f}{unit}"
|
755 |
+
num_f /= 1000.0
|
756 |
+
return f"{num_f:.1f}Y"
|
757 |
+
|
758 |
+
|
759 |
+
_TIMESINCE_CHUNKS = (
|
760 |
+
# Label, divider, max value
|
761 |
+
("second", 1, 60),
|
762 |
+
("minute", 60, 60),
|
763 |
+
("hour", 60 * 60, 24),
|
764 |
+
("day", 60 * 60 * 24, 6),
|
765 |
+
("week", 60 * 60 * 24 * 7, 6),
|
766 |
+
("month", 60 * 60 * 24 * 30, 11),
|
767 |
+
("year", 60 * 60 * 24 * 365, None),
|
768 |
+
)
|
769 |
+
|
770 |
+
|
771 |
+
def _format_timesince(ts: float) -> str:
|
772 |
+
"""Format timestamp in seconds into a human-readable string, relative to now.
|
773 |
+
|
774 |
+
Vaguely inspired by Django's `timesince` formatter.
|
775 |
+
"""
|
776 |
+
delta = time.time() - ts
|
777 |
+
if delta < 20:
|
778 |
+
return "a few seconds ago"
|
779 |
+
for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
|
780 |
+
value = round(delta / divider)
|
781 |
+
if max_value is not None and value <= max_value:
|
782 |
+
break
|
783 |
+
return f"{value} {label}{'s' if value > 1 else ''} ago"
|
784 |
+
|
785 |
+
|
786 |
+
def _try_delete_path(path: Path, path_type: str) -> None:
|
787 |
+
"""Try to delete a local file or folder.
|
788 |
+
|
789 |
+
If the path does not exists, error is logged as a warning and then ignored.
|
790 |
+
|
791 |
+
Args:
|
792 |
+
path (`Path`)
|
793 |
+
Path to delete. Can be a file or a folder.
|
794 |
+
path_type (`str`)
|
795 |
+
What path are we deleting ? Only for logging purposes. Example: "snapshot".
|
796 |
+
"""
|
797 |
+
logger.info(f"Delete {path_type}: {path}")
|
798 |
+
try:
|
799 |
+
if path.is_file():
|
800 |
+
os.remove(path)
|
801 |
+
else:
|
802 |
+
shutil.rmtree(path)
|
803 |
+
except FileNotFoundError:
|
804 |
+
logger.warning(f"Couldn't delete {path_type}: file not found ({path})", exc_info=True)
|
805 |
+
except PermissionError:
|
806 |
+
logger.warning(f"Couldn't delete {path_type}: permission denied ({path})", exc_info=True)
|
lib/python3.11/site-packages/huggingface_hub/utils/_chunk_utils.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains a utility to iterate by chunks over an iterator."""
|
16 |
+
import itertools
|
17 |
+
from typing import Iterable, TypeVar
|
18 |
+
|
19 |
+
|
20 |
+
T = TypeVar("T")
|
21 |
+
|
22 |
+
|
23 |
+
def chunk_iterable(iterable: Iterable[T], chunk_size: int) -> Iterable[Iterable[T]]:
|
24 |
+
"""Iterates over an iterator chunk by chunk.
|
25 |
+
|
26 |
+
Taken from https://stackoverflow.com/a/8998040.
|
27 |
+
See also https://github.com/huggingface/huggingface_hub/pull/920#discussion_r938793088.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
iterable (`Iterable`):
|
31 |
+
The iterable on which we want to iterate.
|
32 |
+
chunk_size (`int`):
|
33 |
+
Size of the chunks. Must be a strictly positive integer (e.g. >0).
|
34 |
+
|
35 |
+
Example:
|
36 |
+
|
37 |
+
```python
|
38 |
+
>>> from huggingface_hub.utils import chunk_iterable
|
39 |
+
|
40 |
+
>>> for items in chunk_iterable(range(17), chunk_size=8):
|
41 |
+
... print(items)
|
42 |
+
# [0, 1, 2, 3, 4, 5, 6, 7]
|
43 |
+
# [8, 9, 10, 11, 12, 13, 14, 15]
|
44 |
+
# [16] # smaller last chunk
|
45 |
+
```
|
46 |
+
|
47 |
+
Raises:
|
48 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
49 |
+
If `chunk_size` <= 0.
|
50 |
+
|
51 |
+
<Tip warning={true}>
|
52 |
+
The last chunk can be smaller than `chunk_size`.
|
53 |
+
</Tip>
|
54 |
+
"""
|
55 |
+
if not isinstance(chunk_size, int) or chunk_size <= 0:
|
56 |
+
raise ValueError("`chunk_size` must be a strictly positive integer (>0).")
|
57 |
+
|
58 |
+
iterator = iter(iterable)
|
59 |
+
while True:
|
60 |
+
try:
|
61 |
+
next_item = next(iterator)
|
62 |
+
except StopIteration:
|
63 |
+
return
|
64 |
+
yield itertools.chain((next_item,), itertools.islice(iterator, chunk_size - 1))
|
lib/python3.11/site-packages/huggingface_hub/utils/_datetime.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to handle datetimes in Huggingface Hub."""
|
16 |
+
from datetime import datetime, timedelta, timezone
|
17 |
+
|
18 |
+
|
19 |
+
# Local machine offset compared to UTC.
|
20 |
+
# Taken from https://stackoverflow.com/a/3168394.
|
21 |
+
# `utcoffset()` returns `None` if no offset -> empty timedelta.
|
22 |
+
UTC_OFFSET = datetime.now(timezone.utc).astimezone().utcoffset() or timedelta()
|
23 |
+
|
24 |
+
|
25 |
+
def parse_datetime(date_string: str) -> datetime:
|
26 |
+
"""
|
27 |
+
Parses a date_string returned from the server to a datetime object.
|
28 |
+
|
29 |
+
This parser is a weak-parser is the sense that it handles only a single format of
|
30 |
+
date_string. It is expected that the server format will never change. The
|
31 |
+
implementation depends only on the standard lib to avoid an external dependency
|
32 |
+
(python-dateutil). See full discussion about this decision on PR:
|
33 |
+
https://github.com/huggingface/huggingface_hub/pull/999.
|
34 |
+
|
35 |
+
Example:
|
36 |
+
```py
|
37 |
+
> parse_datetime('2022-08-19T07:19:38.123Z')
|
38 |
+
datetime.datetime(2022, 8, 19, 7, 19, 38, 123000, tzinfo=timezone.utc)
|
39 |
+
```
|
40 |
+
|
41 |
+
Args:
|
42 |
+
date_string (`str`):
|
43 |
+
A string representing a datetime returned by the Hub server.
|
44 |
+
String is expected to follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern.
|
45 |
+
|
46 |
+
Returns:
|
47 |
+
A python datetime object.
|
48 |
+
|
49 |
+
Raises:
|
50 |
+
:class:`ValueError`:
|
51 |
+
If `date_string` cannot be parsed.
|
52 |
+
"""
|
53 |
+
try:
|
54 |
+
# Datetime ending with a Z means "UTC". Here we parse the date as local machine
|
55 |
+
# timezone and then move it to the appropriate UTC timezone.
|
56 |
+
# See https://en.wikipedia.org/wiki/ISO_8601#Coordinated_Universal_Time_(UTC)
|
57 |
+
# Taken from https://stackoverflow.com/a/3168394.
|
58 |
+
if len(date_string) == 30:
|
59 |
+
# Means timezoned-timestamp with nanoseconds precision. We need to truncate the last 3 digits.
|
60 |
+
date_string = date_string[:-4] + "Z"
|
61 |
+
dt = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
|
62 |
+
dt += UTC_OFFSET # By default, datetime is not timezoned -> move to UTC time
|
63 |
+
return dt.astimezone(timezone.utc) # Set explicit timezone
|
64 |
+
except ValueError as e:
|
65 |
+
raise ValueError(
|
66 |
+
f"Cannot parse '{date_string}' as a datetime. Date string is expected to"
|
67 |
+
" follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern."
|
68 |
+
) from e
|
lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
from functools import wraps
|
3 |
+
from inspect import Parameter, signature
|
4 |
+
from typing import Iterable, Optional
|
5 |
+
|
6 |
+
|
7 |
+
def _deprecate_positional_args(*, version: str):
|
8 |
+
"""Decorator for methods that issues warnings for positional arguments.
|
9 |
+
Using the keyword-only argument syntax in pep 3102, arguments after the
|
10 |
+
* will issue a warning when passed as a positional argument.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
version (`str`):
|
14 |
+
The version when positional arguments will result in error.
|
15 |
+
"""
|
16 |
+
|
17 |
+
def _inner_deprecate_positional_args(f):
|
18 |
+
sig = signature(f)
|
19 |
+
kwonly_args = []
|
20 |
+
all_args = []
|
21 |
+
for name, param in sig.parameters.items():
|
22 |
+
if param.kind == Parameter.POSITIONAL_OR_KEYWORD:
|
23 |
+
all_args.append(name)
|
24 |
+
elif param.kind == Parameter.KEYWORD_ONLY:
|
25 |
+
kwonly_args.append(name)
|
26 |
+
|
27 |
+
@wraps(f)
|
28 |
+
def inner_f(*args, **kwargs):
|
29 |
+
extra_args = len(args) - len(all_args)
|
30 |
+
if extra_args <= 0:
|
31 |
+
return f(*args, **kwargs)
|
32 |
+
# extra_args > 0
|
33 |
+
args_msg = [
|
34 |
+
f"{name}='{arg}'" if isinstance(arg, str) else f"{name}={arg}"
|
35 |
+
for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:])
|
36 |
+
]
|
37 |
+
args_msg = ", ".join(args_msg)
|
38 |
+
warnings.warn(
|
39 |
+
f"Deprecated positional argument(s) used in '{f.__name__}': pass"
|
40 |
+
f" {args_msg} as keyword args. From version {version} passing these"
|
41 |
+
" as positional arguments will result in an error,",
|
42 |
+
FutureWarning,
|
43 |
+
)
|
44 |
+
kwargs.update(zip(sig.parameters, args))
|
45 |
+
return f(**kwargs)
|
46 |
+
|
47 |
+
return inner_f
|
48 |
+
|
49 |
+
return _inner_deprecate_positional_args
|
50 |
+
|
51 |
+
|
52 |
+
def _deprecate_arguments(
|
53 |
+
*,
|
54 |
+
version: str,
|
55 |
+
deprecated_args: Iterable[str],
|
56 |
+
custom_message: Optional[str] = None,
|
57 |
+
):
|
58 |
+
"""Decorator to issue warnings when using deprecated arguments.
|
59 |
+
|
60 |
+
TODO: could be useful to be able to set a custom error message.
|
61 |
+
|
62 |
+
Args:
|
63 |
+
version (`str`):
|
64 |
+
The version when deprecated arguments will result in error.
|
65 |
+
deprecated_args (`List[str]`):
|
66 |
+
List of the arguments to be deprecated.
|
67 |
+
custom_message (`str`, *optional*):
|
68 |
+
Warning message that is raised. If not passed, a default warning message
|
69 |
+
will be created.
|
70 |
+
"""
|
71 |
+
|
72 |
+
def _inner_deprecate_positional_args(f):
|
73 |
+
sig = signature(f)
|
74 |
+
|
75 |
+
@wraps(f)
|
76 |
+
def inner_f(*args, **kwargs):
|
77 |
+
# Check for used deprecated arguments
|
78 |
+
used_deprecated_args = []
|
79 |
+
for _, parameter in zip(args, sig.parameters.values()):
|
80 |
+
if parameter.name in deprecated_args:
|
81 |
+
used_deprecated_args.append(parameter.name)
|
82 |
+
for kwarg_name, kwarg_value in kwargs.items():
|
83 |
+
if (
|
84 |
+
# If argument is deprecated but still used
|
85 |
+
kwarg_name in deprecated_args
|
86 |
+
# And then the value is not the default value
|
87 |
+
and kwarg_value != sig.parameters[kwarg_name].default
|
88 |
+
):
|
89 |
+
used_deprecated_args.append(kwarg_name)
|
90 |
+
|
91 |
+
# Warn and proceed
|
92 |
+
if len(used_deprecated_args) > 0:
|
93 |
+
message = (
|
94 |
+
f"Deprecated argument(s) used in '{f.__name__}':"
|
95 |
+
f" {', '.join(used_deprecated_args)}. Will not be supported from"
|
96 |
+
f" version '{version}'."
|
97 |
+
)
|
98 |
+
if custom_message is not None:
|
99 |
+
message += "\n\n" + custom_message
|
100 |
+
warnings.warn(message, FutureWarning)
|
101 |
+
return f(*args, **kwargs)
|
102 |
+
|
103 |
+
return inner_f
|
104 |
+
|
105 |
+
return _inner_deprecate_positional_args
|
106 |
+
|
107 |
+
|
108 |
+
def _deprecate_method(*, version: str, message: Optional[str] = None):
|
109 |
+
"""Decorator to issue warnings when using a deprecated method.
|
110 |
+
|
111 |
+
Args:
|
112 |
+
version (`str`):
|
113 |
+
The version when deprecated arguments will result in error.
|
114 |
+
message (`str`, *optional*):
|
115 |
+
Warning message that is raised. If not passed, a default warning message
|
116 |
+
will be created.
|
117 |
+
"""
|
118 |
+
|
119 |
+
def _inner_deprecate_method(f):
|
120 |
+
name = f.__name__
|
121 |
+
if name == "__init__":
|
122 |
+
name = f.__qualname__.split(".")[0] # class name instead of method name
|
123 |
+
|
124 |
+
@wraps(f)
|
125 |
+
def inner_f(*args, **kwargs):
|
126 |
+
warning_message = (
|
127 |
+
f"'{name}' (from '{f.__module__}') is deprecated and will be removed from version '{version}'."
|
128 |
+
)
|
129 |
+
if message is not None:
|
130 |
+
warning_message += " " + message
|
131 |
+
warnings.warn(warning_message, FutureWarning)
|
132 |
+
return f(*args, **kwargs)
|
133 |
+
|
134 |
+
return inner_f
|
135 |
+
|
136 |
+
return _inner_deprecate_method
|
lib/python3.11/site-packages/huggingface_hub/utils/_errors.py
ADDED
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
from requests import HTTPError, Response
|
5 |
+
|
6 |
+
from ._fixes import JSONDecodeError
|
7 |
+
|
8 |
+
|
9 |
+
REPO_API_REGEX = re.compile(
|
10 |
+
r"""
|
11 |
+
# staging or production endpoint
|
12 |
+
^https://(hub-ci.)?huggingface.co
|
13 |
+
(
|
14 |
+
# on /api/repo_type/repo_id
|
15 |
+
/api/(models|datasets|spaces)/(.+)
|
16 |
+
|
|
17 |
+
# or /repo_id/resolve/revision/...
|
18 |
+
/(.+)/resolve/(.+)
|
19 |
+
)
|
20 |
+
""",
|
21 |
+
flags=re.VERBOSE,
|
22 |
+
)
|
23 |
+
|
24 |
+
|
25 |
+
class FileMetadataError(OSError):
|
26 |
+
"""Error triggered when the metadata of a file on the Hub cannot be retrieved (missing ETag or commit_hash).
|
27 |
+
|
28 |
+
Inherits from `OSError` for backward compatibility.
|
29 |
+
"""
|
30 |
+
|
31 |
+
|
32 |
+
class HfHubHTTPError(HTTPError):
|
33 |
+
"""
|
34 |
+
HTTPError to inherit from for any custom HTTP Error raised in HF Hub.
|
35 |
+
|
36 |
+
Any HTTPError is converted at least into a `HfHubHTTPError`. If some information is
|
37 |
+
sent back by the server, it will be added to the error message.
|
38 |
+
|
39 |
+
Added details:
|
40 |
+
- Request id from "X-Request-Id" header if exists.
|
41 |
+
- Server error message from the header "X-Error-Message".
|
42 |
+
- Server error message if we can found one in the response body.
|
43 |
+
|
44 |
+
Example:
|
45 |
+
```py
|
46 |
+
import requests
|
47 |
+
from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError
|
48 |
+
|
49 |
+
response = get_session().post(...)
|
50 |
+
try:
|
51 |
+
hf_raise_for_status(response)
|
52 |
+
except HfHubHTTPError as e:
|
53 |
+
print(str(e)) # formatted message
|
54 |
+
e.request_id, e.server_message # details returned by server
|
55 |
+
|
56 |
+
# Complete the error message with additional information once it's raised
|
57 |
+
e.append_to_message("\n`create_commit` expects the repository to exist.")
|
58 |
+
raise
|
59 |
+
```
|
60 |
+
"""
|
61 |
+
|
62 |
+
request_id: Optional[str] = None
|
63 |
+
server_message: Optional[str] = None
|
64 |
+
|
65 |
+
def __init__(self, message: str, response: Optional[Response] = None):
|
66 |
+
# Parse server information if any.
|
67 |
+
if response is not None:
|
68 |
+
self.request_id = response.headers.get("X-Request-Id")
|
69 |
+
try:
|
70 |
+
server_data = response.json()
|
71 |
+
except JSONDecodeError:
|
72 |
+
server_data = {}
|
73 |
+
|
74 |
+
# Retrieve server error message from multiple sources
|
75 |
+
server_message_from_headers = response.headers.get("X-Error-Message")
|
76 |
+
server_message_from_body = server_data.get("error")
|
77 |
+
server_multiple_messages_from_body = "\n".join(
|
78 |
+
error["message"] for error in server_data.get("errors", []) if "message" in error
|
79 |
+
)
|
80 |
+
|
81 |
+
# Concatenate error messages
|
82 |
+
_server_message = ""
|
83 |
+
if server_message_from_headers is not None: # from headers
|
84 |
+
_server_message += server_message_from_headers + "\n"
|
85 |
+
if server_message_from_body is not None: # from body "error"
|
86 |
+
if isinstance(server_message_from_body, list):
|
87 |
+
server_message_from_body = "\n".join(server_message_from_body)
|
88 |
+
if server_message_from_body not in _server_message:
|
89 |
+
_server_message += server_message_from_body + "\n"
|
90 |
+
if server_multiple_messages_from_body is not None: # from body "errors"
|
91 |
+
if server_multiple_messages_from_body not in _server_message:
|
92 |
+
_server_message += server_multiple_messages_from_body + "\n"
|
93 |
+
_server_message = _server_message.strip()
|
94 |
+
|
95 |
+
# Set message to `HfHubHTTPError` (if any)
|
96 |
+
if _server_message != "":
|
97 |
+
self.server_message = _server_message
|
98 |
+
|
99 |
+
super().__init__(
|
100 |
+
_format_error_message(
|
101 |
+
message,
|
102 |
+
request_id=self.request_id,
|
103 |
+
server_message=self.server_message,
|
104 |
+
),
|
105 |
+
response=response, # type: ignore
|
106 |
+
request=response.request if response is not None else None, # type: ignore
|
107 |
+
)
|
108 |
+
|
109 |
+
def append_to_message(self, additional_message: str) -> None:
|
110 |
+
"""Append additional information to the `HfHubHTTPError` initial message."""
|
111 |
+
self.args = (self.args[0] + additional_message,) + self.args[1:]
|
112 |
+
|
113 |
+
|
114 |
+
class RepositoryNotFoundError(HfHubHTTPError):
|
115 |
+
"""
|
116 |
+
Raised when trying to access a hf.co URL with an invalid repository name, or
|
117 |
+
with a private repo name the user does not have access to.
|
118 |
+
|
119 |
+
Example:
|
120 |
+
|
121 |
+
```py
|
122 |
+
>>> from huggingface_hub import model_info
|
123 |
+
>>> model_info("<non_existent_repository>")
|
124 |
+
(...)
|
125 |
+
huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: PvMw_VjBMjVdMz53WKIzP)
|
126 |
+
|
127 |
+
Repository Not Found for url: https://huggingface.co/api/models/%3Cnon_existent_repository%3E.
|
128 |
+
Please make sure you specified the correct `repo_id` and `repo_type`.
|
129 |
+
If the repo is private, make sure you are authenticated.
|
130 |
+
Invalid username or password.
|
131 |
+
```
|
132 |
+
"""
|
133 |
+
|
134 |
+
|
135 |
+
class GatedRepoError(RepositoryNotFoundError):
|
136 |
+
"""
|
137 |
+
Raised when trying to access a gated repository for which the user is not on the
|
138 |
+
authorized list.
|
139 |
+
|
140 |
+
Note: derives from `RepositoryNotFoundError` to ensure backward compatibility.
|
141 |
+
|
142 |
+
Example:
|
143 |
+
|
144 |
+
```py
|
145 |
+
>>> from huggingface_hub import model_info
|
146 |
+
>>> model_info("<gated_repository>")
|
147 |
+
(...)
|
148 |
+
huggingface_hub.utils._errors.GatedRepoError: 403 Client Error. (Request ID: ViT1Bf7O_026LGSQuVqfa)
|
149 |
+
|
150 |
+
Cannot access gated repo for url https://huggingface.co/api/models/ardent-figment/gated-model.
|
151 |
+
Access to model ardent-figment/gated-model is restricted and you are not in the authorized list.
|
152 |
+
Visit https://huggingface.co/ardent-figment/gated-model to ask for access.
|
153 |
+
```
|
154 |
+
"""
|
155 |
+
|
156 |
+
|
157 |
+
class RevisionNotFoundError(HfHubHTTPError):
|
158 |
+
"""
|
159 |
+
Raised when trying to access a hf.co URL with a valid repository but an invalid
|
160 |
+
revision.
|
161 |
+
|
162 |
+
Example:
|
163 |
+
|
164 |
+
```py
|
165 |
+
>>> from huggingface_hub import hf_hub_download
|
166 |
+
>>> hf_hub_download('bert-base-cased', 'config.json', revision='<non-existent-revision>')
|
167 |
+
(...)
|
168 |
+
huggingface_hub.utils._errors.RevisionNotFoundError: 404 Client Error. (Request ID: Mwhe_c3Kt650GcdKEFomX)
|
169 |
+
|
170 |
+
Revision Not Found for url: https://huggingface.co/bert-base-cased/resolve/%3Cnon-existent-revision%3E/config.json.
|
171 |
+
```
|
172 |
+
"""
|
173 |
+
|
174 |
+
|
175 |
+
class EntryNotFoundError(HfHubHTTPError):
|
176 |
+
"""
|
177 |
+
Raised when trying to access a hf.co URL with a valid repository and revision
|
178 |
+
but an invalid filename.
|
179 |
+
|
180 |
+
Example:
|
181 |
+
|
182 |
+
```py
|
183 |
+
>>> from huggingface_hub import hf_hub_download
|
184 |
+
>>> hf_hub_download('bert-base-cased', '<non-existent-file>')
|
185 |
+
(...)
|
186 |
+
huggingface_hub.utils._errors.EntryNotFoundError: 404 Client Error. (Request ID: 53pNl6M0MxsnG5Sw8JA6x)
|
187 |
+
|
188 |
+
Entry Not Found for url: https://huggingface.co/bert-base-cased/resolve/main/%3Cnon-existent-file%3E.
|
189 |
+
```
|
190 |
+
"""
|
191 |
+
|
192 |
+
|
193 |
+
class LocalEntryNotFoundError(EntryNotFoundError, FileNotFoundError, ValueError):
|
194 |
+
"""
|
195 |
+
Raised when trying to access a file or snapshot that is not on the disk when network is
|
196 |
+
disabled or unavailable (connection issue). The entry may exist on the Hub.
|
197 |
+
|
198 |
+
Note: `ValueError` type is to ensure backward compatibility.
|
199 |
+
Note: `LocalEntryNotFoundError` derives from `HTTPError` because of `EntryNotFoundError`
|
200 |
+
even when it is not a network issue.
|
201 |
+
|
202 |
+
Example:
|
203 |
+
|
204 |
+
```py
|
205 |
+
>>> from huggingface_hub import hf_hub_download
|
206 |
+
>>> hf_hub_download('bert-base-cased', '<non-cached-file>', local_files_only=True)
|
207 |
+
(...)
|
208 |
+
huggingface_hub.utils._errors.LocalEntryNotFoundError: Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable hf.co look-ups and downloads online, set 'local_files_only' to False.
|
209 |
+
```
|
210 |
+
"""
|
211 |
+
|
212 |
+
def __init__(self, message: str):
|
213 |
+
super().__init__(message, response=None)
|
214 |
+
|
215 |
+
|
216 |
+
class BadRequestError(HfHubHTTPError, ValueError):
|
217 |
+
"""
|
218 |
+
Raised by `hf_raise_for_status` when the server returns a HTTP 400 error.
|
219 |
+
|
220 |
+
Example:
|
221 |
+
|
222 |
+
```py
|
223 |
+
>>> resp = requests.post("hf.co/api/check", ...)
|
224 |
+
>>> hf_raise_for_status(resp, endpoint_name="check")
|
225 |
+
huggingface_hub.utils._errors.BadRequestError: Bad request for check endpoint: {details} (Request ID: XXX)
|
226 |
+
```
|
227 |
+
"""
|
228 |
+
|
229 |
+
|
230 |
+
def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None) -> None:
|
231 |
+
"""
|
232 |
+
Internal version of `response.raise_for_status()` that will refine a
|
233 |
+
potential HTTPError. Raised exception will be an instance of `HfHubHTTPError`.
|
234 |
+
|
235 |
+
This helper is meant to be the unique method to raise_for_status when making a call
|
236 |
+
to the Hugging Face Hub.
|
237 |
+
|
238 |
+
Example:
|
239 |
+
```py
|
240 |
+
import requests
|
241 |
+
from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError
|
242 |
+
|
243 |
+
response = get_session().post(...)
|
244 |
+
try:
|
245 |
+
hf_raise_for_status(response)
|
246 |
+
except HfHubHTTPError as e:
|
247 |
+
print(str(e)) # formatted message
|
248 |
+
e.request_id, e.server_message # details returned by server
|
249 |
+
|
250 |
+
# Complete the error message with additional information once it's raised
|
251 |
+
e.append_to_message("\n`create_commit` expects the repository to exist.")
|
252 |
+
raise
|
253 |
+
```
|
254 |
+
|
255 |
+
Args:
|
256 |
+
response (`Response`):
|
257 |
+
Response from the server.
|
258 |
+
endpoint_name (`str`, *optional*):
|
259 |
+
Name of the endpoint that has been called. If provided, the error message
|
260 |
+
will be more complete.
|
261 |
+
|
262 |
+
<Tip warning={true}>
|
263 |
+
|
264 |
+
Raises when the request has failed:
|
265 |
+
|
266 |
+
- [`~utils.RepositoryNotFoundError`]
|
267 |
+
If the repository to download from cannot be found. This may be because it
|
268 |
+
doesn't exist, because `repo_type` is not set correctly, or because the repo
|
269 |
+
is `private` and you do not have access.
|
270 |
+
- [`~utils.GatedRepoError`]
|
271 |
+
If the repository exists but is gated and the user is not on the authorized
|
272 |
+
list.
|
273 |
+
- [`~utils.RevisionNotFoundError`]
|
274 |
+
If the repository exists but the revision couldn't be find.
|
275 |
+
- [`~utils.EntryNotFoundError`]
|
276 |
+
If the repository exists but the entry (e.g. the requested file) couldn't be
|
277 |
+
find.
|
278 |
+
- [`~utils.BadRequestError`]
|
279 |
+
If request failed with a HTTP 400 BadRequest error.
|
280 |
+
- [`~utils.HfHubHTTPError`]
|
281 |
+
If request failed for a reason not listed above.
|
282 |
+
|
283 |
+
</Tip>
|
284 |
+
"""
|
285 |
+
try:
|
286 |
+
response.raise_for_status()
|
287 |
+
except HTTPError as e:
|
288 |
+
error_code = response.headers.get("X-Error-Code")
|
289 |
+
|
290 |
+
if error_code == "RevisionNotFound":
|
291 |
+
message = f"{response.status_code} Client Error." + "\n\n" + f"Revision Not Found for url: {response.url}."
|
292 |
+
raise RevisionNotFoundError(message, response) from e
|
293 |
+
|
294 |
+
elif error_code == "EntryNotFound":
|
295 |
+
message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}."
|
296 |
+
raise EntryNotFoundError(message, response) from e
|
297 |
+
|
298 |
+
elif error_code == "GatedRepo":
|
299 |
+
message = (
|
300 |
+
f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated repo for url {response.url}."
|
301 |
+
)
|
302 |
+
raise GatedRepoError(message, response) from e
|
303 |
+
|
304 |
+
elif error_code == "RepoNotFound" or (
|
305 |
+
response.status_code == 401
|
306 |
+
and response.request is not None
|
307 |
+
and response.request.url is not None
|
308 |
+
and REPO_API_REGEX.search(response.request.url) is not None
|
309 |
+
):
|
310 |
+
# 401 is misleading as it is returned for:
|
311 |
+
# - private and gated repos if user is not authenticated
|
312 |
+
# - missing repos
|
313 |
+
# => for now, we process them as `RepoNotFound` anyway.
|
314 |
+
# See https://gist.github.com/Wauplin/46c27ad266b15998ce56a6603796f0b9
|
315 |
+
message = (
|
316 |
+
f"{response.status_code} Client Error."
|
317 |
+
+ "\n\n"
|
318 |
+
+ f"Repository Not Found for url: {response.url}."
|
319 |
+
+ "\nPlease make sure you specified the correct `repo_id` and"
|
320 |
+
" `repo_type`.\nIf you are trying to access a private or gated repo,"
|
321 |
+
" make sure you are authenticated."
|
322 |
+
)
|
323 |
+
raise RepositoryNotFoundError(message, response) from e
|
324 |
+
|
325 |
+
elif response.status_code == 400:
|
326 |
+
message = (
|
327 |
+
f"\n\nBad request for {endpoint_name} endpoint:" if endpoint_name is not None else "\n\nBad request:"
|
328 |
+
)
|
329 |
+
raise BadRequestError(message, response=response) from e
|
330 |
+
|
331 |
+
# Convert `HTTPError` into a `HfHubHTTPError` to display request information
|
332 |
+
# as well (request id and/or server error message)
|
333 |
+
raise HfHubHTTPError(str(e), response=response) from e
|
334 |
+
|
335 |
+
|
336 |
+
def _format_error_message(message: str, request_id: Optional[str], server_message: Optional[str]) -> str:
|
337 |
+
"""
|
338 |
+
Format the `HfHubHTTPError` error message based on initial message and information
|
339 |
+
returned by the server.
|
340 |
+
|
341 |
+
Used when initializing `HfHubHTTPError`.
|
342 |
+
"""
|
343 |
+
# Add message from response body
|
344 |
+
if server_message is not None and len(server_message) > 0 and server_message.lower() not in message.lower():
|
345 |
+
if "\n\n" in message:
|
346 |
+
message += "\n" + server_message
|
347 |
+
else:
|
348 |
+
message += "\n\n" + server_message
|
349 |
+
|
350 |
+
# Add Request ID
|
351 |
+
if request_id is not None and str(request_id).lower() not in message.lower():
|
352 |
+
request_id_message = f" (Request ID: {request_id})"
|
353 |
+
if "\n" in message:
|
354 |
+
newline_index = message.index("\n")
|
355 |
+
message = message[:newline_index] + request_id_message + message[newline_index:]
|
356 |
+
else:
|
357 |
+
message += request_id_message
|
358 |
+
|
359 |
+
return message
|
lib/python3.11/site-packages/huggingface_hub/utils/_experimental.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to flag a feature as "experimental" in Huggingface Hub."""
|
16 |
+
import warnings
|
17 |
+
from functools import wraps
|
18 |
+
from typing import Callable
|
19 |
+
|
20 |
+
from .. import constants
|
21 |
+
|
22 |
+
|
23 |
+
def experimental(fn: Callable) -> Callable:
|
24 |
+
"""Decorator to flag a feature as experimental.
|
25 |
+
|
26 |
+
An experimental feature trigger a warning when used as it might be subject to breaking changes in the future.
|
27 |
+
Warnings can be disabled by setting the environment variable `HF_EXPERIMENTAL_WARNING` to `0`.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
fn (`Callable`):
|
31 |
+
The function to flag as experimental.
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
`Callable`: The decorated function.
|
35 |
+
|
36 |
+
Example:
|
37 |
+
|
38 |
+
```python
|
39 |
+
>>> from huggingface_hub.utils import experimental
|
40 |
+
|
41 |
+
>>> @experimental
|
42 |
+
... def my_function():
|
43 |
+
... print("Hello world!")
|
44 |
+
|
45 |
+
>>> my_function()
|
46 |
+
UserWarning: 'my_function' is experimental and might be subject to breaking changes in the future. You can disable
|
47 |
+
this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment variable.
|
48 |
+
Hello world!
|
49 |
+
```
|
50 |
+
"""
|
51 |
+
# For classes, put the "experimental" around the "__new__" method => __new__ will be removed in warning message
|
52 |
+
name = fn.__qualname__[: -len(".__new__")] if fn.__qualname__.endswith(".__new__") else fn.__qualname__
|
53 |
+
|
54 |
+
@wraps(fn)
|
55 |
+
def _inner_fn(*args, **kwargs):
|
56 |
+
if not constants.HF_HUB_DISABLE_EXPERIMENTAL_WARNING:
|
57 |
+
warnings.warn(
|
58 |
+
f"'{name}' is experimental and might be subject to breaking changes in the future."
|
59 |
+
" You can disable this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment"
|
60 |
+
" variable.",
|
61 |
+
UserWarning,
|
62 |
+
)
|
63 |
+
return fn(*args, **kwargs)
|
64 |
+
|
65 |
+
return _inner_fn
|
lib/python3.11/site-packages/huggingface_hub/utils/_fixes.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# JSONDecodeError was introduced in requests=2.27 released in 2022.
|
2 |
+
# This allows us to support older requests for users
|
3 |
+
# More information: https://github.com/psf/requests/pull/5856
|
4 |
+
try:
|
5 |
+
from requests import JSONDecodeError # type: ignore # noqa: F401
|
6 |
+
except ImportError:
|
7 |
+
try:
|
8 |
+
from simplejson import JSONDecodeError # type: ignore # noqa: F401
|
9 |
+
except ImportError:
|
10 |
+
from json import JSONDecodeError # type: ignore # noqa: F401
|
11 |
+
|
12 |
+
import contextlib
|
13 |
+
import os
|
14 |
+
import shutil
|
15 |
+
import stat
|
16 |
+
import tempfile
|
17 |
+
from functools import partial
|
18 |
+
from pathlib import Path
|
19 |
+
from typing import Callable, Generator, Optional, Union
|
20 |
+
|
21 |
+
import yaml
|
22 |
+
|
23 |
+
|
24 |
+
# Wrap `yaml.dump` to set `allow_unicode=True` by default.
|
25 |
+
#
|
26 |
+
# Example:
|
27 |
+
# ```py
|
28 |
+
# >>> yaml.dump({"emoji": "👀", "some unicode": "日本か"})
|
29 |
+
# 'emoji: "\\U0001F440"\nsome unicode: "\\u65E5\\u672C\\u304B"\n'
|
30 |
+
#
|
31 |
+
# >>> yaml_dump({"emoji": "👀", "some unicode": "日本か"})
|
32 |
+
# 'emoji: "👀"\nsome unicode: "日本か"\n'
|
33 |
+
# ```
|
34 |
+
yaml_dump: Callable[..., str] = partial(yaml.dump, stream=None, allow_unicode=True) # type: ignore
|
35 |
+
|
36 |
+
|
37 |
+
@contextlib.contextmanager
|
38 |
+
def SoftTemporaryDirectory(
|
39 |
+
suffix: Optional[str] = None,
|
40 |
+
prefix: Optional[str] = None,
|
41 |
+
dir: Optional[Union[Path, str]] = None,
|
42 |
+
**kwargs,
|
43 |
+
) -> Generator[str, None, None]:
|
44 |
+
"""
|
45 |
+
Context manager to create a temporary directory and safely delete it.
|
46 |
+
|
47 |
+
If tmp directory cannot be deleted normally, we set the WRITE permission and retry.
|
48 |
+
If cleanup still fails, we give up but don't raise an exception. This is equivalent
|
49 |
+
to `tempfile.TemporaryDirectory(..., ignore_cleanup_errors=True)` introduced in
|
50 |
+
Python 3.10.
|
51 |
+
|
52 |
+
See https://www.scivision.dev/python-tempfile-permission-error-windows/.
|
53 |
+
"""
|
54 |
+
tmpdir = tempfile.TemporaryDirectory(prefix=prefix, suffix=suffix, dir=dir, **kwargs)
|
55 |
+
yield tmpdir.name
|
56 |
+
|
57 |
+
try:
|
58 |
+
# First once with normal cleanup
|
59 |
+
shutil.rmtree(tmpdir.name)
|
60 |
+
except Exception:
|
61 |
+
# If failed, try to set write permission and retry
|
62 |
+
try:
|
63 |
+
shutil.rmtree(tmpdir.name, onerror=_set_write_permission_and_retry)
|
64 |
+
except Exception:
|
65 |
+
pass
|
66 |
+
|
67 |
+
# And finally, cleanup the tmpdir.
|
68 |
+
# If it fails again, give up but do not throw error
|
69 |
+
try:
|
70 |
+
tmpdir.cleanup()
|
71 |
+
except Exception:
|
72 |
+
pass
|
73 |
+
|
74 |
+
|
75 |
+
def _set_write_permission_and_retry(func, path, excinfo):
|
76 |
+
os.chmod(path, stat.S_IWRITE)
|
77 |
+
func(path)
|
lib/python3.11/site-packages/huggingface_hub/utils/_git_credential.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to manage Git credentials."""
|
16 |
+
import re
|
17 |
+
import subprocess
|
18 |
+
from typing import List, Optional
|
19 |
+
|
20 |
+
from ..constants import ENDPOINT
|
21 |
+
from ._subprocess import run_interactive_subprocess, run_subprocess
|
22 |
+
|
23 |
+
|
24 |
+
GIT_CREDENTIAL_REGEX = re.compile(
|
25 |
+
r"""
|
26 |
+
^\s* # start of line
|
27 |
+
credential\.helper # credential.helper value
|
28 |
+
\s*=\s* # separator
|
29 |
+
(\w+) # the helper name (group 1)
|
30 |
+
(\s|$) # whitespace or end of line
|
31 |
+
""",
|
32 |
+
flags=re.MULTILINE | re.IGNORECASE | re.VERBOSE,
|
33 |
+
)
|
34 |
+
|
35 |
+
|
36 |
+
def list_credential_helpers(folder: Optional[str] = None) -> List[str]:
|
37 |
+
"""Return the list of git credential helpers configured.
|
38 |
+
|
39 |
+
See https://git-scm.com/docs/gitcredentials.
|
40 |
+
|
41 |
+
Credentials are saved in all configured helpers (store, cache, macOS keychain,...).
|
42 |
+
Calls "`git credential approve`" internally. See https://git-scm.com/docs/git-credential.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
folder (`str`, *optional*):
|
46 |
+
The folder in which to check the configured helpers.
|
47 |
+
"""
|
48 |
+
try:
|
49 |
+
output = run_subprocess("git config --list", folder=folder).stdout
|
50 |
+
parsed = _parse_credential_output(output)
|
51 |
+
return parsed
|
52 |
+
except subprocess.CalledProcessError as exc:
|
53 |
+
raise EnvironmentError(exc.stderr)
|
54 |
+
|
55 |
+
|
56 |
+
def set_git_credential(token: str, username: str = "hf_user", folder: Optional[str] = None) -> None:
|
57 |
+
"""Save a username/token pair in git credential for HF Hub registry.
|
58 |
+
|
59 |
+
Credentials are saved in all configured helpers (store, cache, macOS keychain,...).
|
60 |
+
Calls "`git credential approve`" internally. See https://git-scm.com/docs/git-credential.
|
61 |
+
|
62 |
+
Args:
|
63 |
+
username (`str`, defaults to `"hf_user"`):
|
64 |
+
A git username. Defaults to `"hf_user"`, the default user used in the Hub.
|
65 |
+
token (`str`, defaults to `"hf_user"`):
|
66 |
+
A git password. In practice, the User Access Token for the Hub.
|
67 |
+
See https://huggingface.co/settings/tokens.
|
68 |
+
folder (`str`, *optional*):
|
69 |
+
The folder in which to check the configured helpers.
|
70 |
+
"""
|
71 |
+
with run_interactive_subprocess("git credential approve", folder=folder) as (
|
72 |
+
stdin,
|
73 |
+
_,
|
74 |
+
):
|
75 |
+
stdin.write(f"url={ENDPOINT}\nusername={username.lower()}\npassword={token}\n\n")
|
76 |
+
stdin.flush()
|
77 |
+
|
78 |
+
|
79 |
+
def unset_git_credential(username: str = "hf_user", folder: Optional[str] = None) -> None:
|
80 |
+
"""Erase credentials from git credential for HF Hub registry.
|
81 |
+
|
82 |
+
Credentials are erased from the configured helpers (store, cache, macOS
|
83 |
+
keychain,...), if any. If `username` is not provided, any credential configured for
|
84 |
+
HF Hub endpoint is erased.
|
85 |
+
Calls "`git credential erase`" internally. See https://git-scm.com/docs/git-credential.
|
86 |
+
|
87 |
+
Args:
|
88 |
+
username (`str`, defaults to `"hf_user"`):
|
89 |
+
A git username. Defaults to `"hf_user"`, the default user used in the Hub.
|
90 |
+
folder (`str`, *optional*):
|
91 |
+
The folder in which to check the configured helpers.
|
92 |
+
"""
|
93 |
+
with run_interactive_subprocess("git credential reject", folder=folder) as (
|
94 |
+
stdin,
|
95 |
+
_,
|
96 |
+
):
|
97 |
+
standard_input = f"url={ENDPOINT}\n"
|
98 |
+
if username is not None:
|
99 |
+
standard_input += f"username={username.lower()}\n"
|
100 |
+
standard_input += "\n"
|
101 |
+
|
102 |
+
stdin.write(standard_input)
|
103 |
+
stdin.flush()
|
104 |
+
|
105 |
+
|
106 |
+
def _parse_credential_output(output: str) -> List[str]:
|
107 |
+
"""Parse the output of `git credential fill` to extract the password.
|
108 |
+
|
109 |
+
Args:
|
110 |
+
output (`str`):
|
111 |
+
The output of `git credential fill`.
|
112 |
+
"""
|
113 |
+
# NOTE: If user has set an helper for a custom URL, it will not we caught here.
|
114 |
+
# Example: `credential.https://huggingface.co.helper=store`
|
115 |
+
# See: https://github.com/huggingface/huggingface_hub/pull/1138#discussion_r1013324508
|
116 |
+
return sorted( # Sort for nice printing
|
117 |
+
set( # Might have some duplicates
|
118 |
+
match[0] for match in GIT_CREDENTIAL_REGEX.findall(output)
|
119 |
+
)
|
120 |
+
)
|
lib/python3.11/site-packages/huggingface_hub/utils/_headers.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to handle headers to send in calls to Huggingface Hub."""
|
16 |
+
from typing import Dict, Optional, Union
|
17 |
+
|
18 |
+
from .. import constants
|
19 |
+
from ._runtime import (
|
20 |
+
get_fastai_version,
|
21 |
+
get_fastcore_version,
|
22 |
+
get_hf_hub_version,
|
23 |
+
get_python_version,
|
24 |
+
get_tf_version,
|
25 |
+
get_torch_version,
|
26 |
+
is_fastai_available,
|
27 |
+
is_fastcore_available,
|
28 |
+
is_tf_available,
|
29 |
+
is_torch_available,
|
30 |
+
)
|
31 |
+
from ._token import get_token
|
32 |
+
from ._validators import validate_hf_hub_args
|
33 |
+
|
34 |
+
|
35 |
+
class LocalTokenNotFoundError(EnvironmentError):
|
36 |
+
"""Raised if local token is required but not found."""
|
37 |
+
|
38 |
+
|
39 |
+
@validate_hf_hub_args
|
40 |
+
def build_hf_headers(
|
41 |
+
*,
|
42 |
+
token: Optional[Union[bool, str]] = None,
|
43 |
+
is_write_action: bool = False,
|
44 |
+
library_name: Optional[str] = None,
|
45 |
+
library_version: Optional[str] = None,
|
46 |
+
user_agent: Union[Dict, str, None] = None,
|
47 |
+
) -> Dict[str, str]:
|
48 |
+
"""
|
49 |
+
Build headers dictionary to send in a HF Hub call.
|
50 |
+
|
51 |
+
By default, authorization token is always provided either from argument (explicit
|
52 |
+
use) or retrieved from the cache (implicit use). To explicitly avoid sending the
|
53 |
+
token to the Hub, set `token=False` or set the `HF_HUB_DISABLE_IMPLICIT_TOKEN`
|
54 |
+
environment variable.
|
55 |
+
|
56 |
+
In case of an API call that requires write access, an error is thrown if token is
|
57 |
+
`None` or token is an organization token (starting with `"api_org***"`).
|
58 |
+
|
59 |
+
In addition to the auth header, a user-agent is added to provide information about
|
60 |
+
the installed packages (versions of python, huggingface_hub, torch, tensorflow,
|
61 |
+
fastai and fastcore).
|
62 |
+
|
63 |
+
Args:
|
64 |
+
token (`str`, `bool`, *optional*):
|
65 |
+
The token to be sent in authorization header for the Hub call:
|
66 |
+
- if a string, it is used as the Hugging Face token
|
67 |
+
- if `True`, the token is read from the machine (cache or env variable)
|
68 |
+
- if `False`, authorization header is not set
|
69 |
+
- if `None`, the token is read from the machine only except if
|
70 |
+
`HF_HUB_DISABLE_IMPLICIT_TOKEN` env variable is set.
|
71 |
+
is_write_action (`bool`, default to `False`):
|
72 |
+
Set to True if the API call requires a write access. If `True`, the token
|
73 |
+
will be validated (cannot be `None`, cannot start by `"api_org***"`).
|
74 |
+
library_name (`str`, *optional*):
|
75 |
+
The name of the library that is making the HTTP request. Will be added to
|
76 |
+
the user-agent header.
|
77 |
+
library_version (`str`, *optional*):
|
78 |
+
The version of the library that is making the HTTP request. Will be added
|
79 |
+
to the user-agent header.
|
80 |
+
user_agent (`str`, `dict`, *optional*):
|
81 |
+
The user agent info in the form of a dictionary or a single string. It will
|
82 |
+
be completed with information about the installed packages.
|
83 |
+
|
84 |
+
Returns:
|
85 |
+
A `Dict` of headers to pass in your API call.
|
86 |
+
|
87 |
+
Example:
|
88 |
+
```py
|
89 |
+
>>> build_hf_headers(token="hf_***") # explicit token
|
90 |
+
{"authorization": "Bearer hf_***", "user-agent": ""}
|
91 |
+
|
92 |
+
>>> build_hf_headers(token=True) # explicitly use cached token
|
93 |
+
{"authorization": "Bearer hf_***",...}
|
94 |
+
|
95 |
+
>>> build_hf_headers(token=False) # explicitly don't use cached token
|
96 |
+
{"user-agent": ...}
|
97 |
+
|
98 |
+
>>> build_hf_headers() # implicit use of the cached token
|
99 |
+
{"authorization": "Bearer hf_***",...}
|
100 |
+
|
101 |
+
# HF_HUB_DISABLE_IMPLICIT_TOKEN=True # to set as env variable
|
102 |
+
>>> build_hf_headers() # token is not sent
|
103 |
+
{"user-agent": ...}
|
104 |
+
|
105 |
+
>>> build_hf_headers(token="api_org_***", is_write_action=True)
|
106 |
+
ValueError: You must use your personal account token for write-access methods.
|
107 |
+
|
108 |
+
>>> build_hf_headers(library_name="transformers", library_version="1.2.3")
|
109 |
+
{"authorization": ..., "user-agent": "transformers/1.2.3; hf_hub/0.10.2; python/3.10.4; tensorflow/1.55"}
|
110 |
+
```
|
111 |
+
|
112 |
+
Raises:
|
113 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
114 |
+
If organization token is passed and "write" access is required.
|
115 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
116 |
+
If "write" access is required but token is not passed and not saved locally.
|
117 |
+
[`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
|
118 |
+
If `token=True` but token is not saved locally.
|
119 |
+
"""
|
120 |
+
# Get auth token to send
|
121 |
+
token_to_send = get_token_to_send(token)
|
122 |
+
_validate_token_to_send(token_to_send, is_write_action=is_write_action)
|
123 |
+
|
124 |
+
# Combine headers
|
125 |
+
headers = {
|
126 |
+
"user-agent": _http_user_agent(
|
127 |
+
library_name=library_name,
|
128 |
+
library_version=library_version,
|
129 |
+
user_agent=user_agent,
|
130 |
+
)
|
131 |
+
}
|
132 |
+
if token_to_send is not None:
|
133 |
+
headers["authorization"] = f"Bearer {token_to_send}"
|
134 |
+
return headers
|
135 |
+
|
136 |
+
|
137 |
+
def get_token_to_send(token: Optional[Union[bool, str]]) -> Optional[str]:
|
138 |
+
"""Select the token to send from either `token` or the cache."""
|
139 |
+
# Case token is explicitly provided
|
140 |
+
if isinstance(token, str):
|
141 |
+
return token
|
142 |
+
|
143 |
+
# Case token is explicitly forbidden
|
144 |
+
if token is False:
|
145 |
+
return None
|
146 |
+
|
147 |
+
# Token is not provided: we get it from local cache
|
148 |
+
cached_token = get_token()
|
149 |
+
|
150 |
+
# Case token is explicitly required
|
151 |
+
if token is True:
|
152 |
+
if cached_token is None:
|
153 |
+
raise LocalTokenNotFoundError(
|
154 |
+
"Token is required (`token=True`), but no token found. You"
|
155 |
+
" need to provide a token or be logged in to Hugging Face with"
|
156 |
+
" `huggingface-cli login` or `huggingface_hub.login`. See"
|
157 |
+
" https://huggingface.co/settings/tokens."
|
158 |
+
)
|
159 |
+
return cached_token
|
160 |
+
|
161 |
+
# Case implicit use of the token is forbidden by env variable
|
162 |
+
if constants.HF_HUB_DISABLE_IMPLICIT_TOKEN:
|
163 |
+
return None
|
164 |
+
|
165 |
+
# Otherwise: we use the cached token as the user has not explicitly forbidden it
|
166 |
+
return cached_token
|
167 |
+
|
168 |
+
|
169 |
+
def _validate_token_to_send(token: Optional[str], is_write_action: bool) -> None:
|
170 |
+
if is_write_action:
|
171 |
+
if token is None:
|
172 |
+
raise ValueError(
|
173 |
+
"Token is required (write-access action) but no token found. You need"
|
174 |
+
" to provide a token or be logged in to Hugging Face with"
|
175 |
+
" `huggingface-cli login` or `huggingface_hub.login`. See"
|
176 |
+
" https://huggingface.co/settings/tokens."
|
177 |
+
)
|
178 |
+
if token.startswith("api_org"):
|
179 |
+
raise ValueError(
|
180 |
+
"You must use your personal account token for write-access methods. To"
|
181 |
+
" generate a write-access token, go to"
|
182 |
+
" https://huggingface.co/settings/tokens"
|
183 |
+
)
|
184 |
+
|
185 |
+
|
186 |
+
def _http_user_agent(
|
187 |
+
*,
|
188 |
+
library_name: Optional[str] = None,
|
189 |
+
library_version: Optional[str] = None,
|
190 |
+
user_agent: Union[Dict, str, None] = None,
|
191 |
+
) -> str:
|
192 |
+
"""Format a user-agent string containing information about the installed packages.
|
193 |
+
|
194 |
+
Args:
|
195 |
+
library_name (`str`, *optional*):
|
196 |
+
The name of the library that is making the HTTP request.
|
197 |
+
library_version (`str`, *optional*):
|
198 |
+
The version of the library that is making the HTTP request.
|
199 |
+
user_agent (`str`, `dict`, *optional*):
|
200 |
+
The user agent info in the form of a dictionary or a single string.
|
201 |
+
|
202 |
+
Returns:
|
203 |
+
The formatted user-agent string.
|
204 |
+
"""
|
205 |
+
if library_name is not None:
|
206 |
+
ua = f"{library_name}/{library_version}"
|
207 |
+
else:
|
208 |
+
ua = "unknown/None"
|
209 |
+
ua += f"; hf_hub/{get_hf_hub_version()}"
|
210 |
+
ua += f"; python/{get_python_version()}"
|
211 |
+
|
212 |
+
if not constants.HF_HUB_DISABLE_TELEMETRY:
|
213 |
+
if is_torch_available():
|
214 |
+
ua += f"; torch/{get_torch_version()}"
|
215 |
+
if is_tf_available():
|
216 |
+
ua += f"; tensorflow/{get_tf_version()}"
|
217 |
+
if is_fastai_available():
|
218 |
+
ua += f"; fastai/{get_fastai_version()}"
|
219 |
+
if is_fastcore_available():
|
220 |
+
ua += f"; fastcore/{get_fastcore_version()}"
|
221 |
+
|
222 |
+
if isinstance(user_agent, dict):
|
223 |
+
ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items())
|
224 |
+
elif isinstance(user_agent, str):
|
225 |
+
ua += "; " + user_agent
|
226 |
+
|
227 |
+
return _deduplicate_user_agent(ua)
|
228 |
+
|
229 |
+
|
230 |
+
def _deduplicate_user_agent(user_agent: str) -> str:
|
231 |
+
"""Deduplicate redundant information in the generated user-agent."""
|
232 |
+
# Split around ";" > Strip whitespaces > Store as dict keys (ensure unicity) > format back as string
|
233 |
+
# Order is implicitly preserved by dictionary structure (see https://stackoverflow.com/a/53657523).
|
234 |
+
return "; ".join({key.strip(): None for key in user_agent.split(";")}.keys())
|
lib/python3.11/site-packages/huggingface_hub/utils/_hf_folder.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contain helper class to retrieve/store token from/to local cache."""
|
16 |
+
import os
|
17 |
+
import warnings
|
18 |
+
from pathlib import Path
|
19 |
+
from typing import Optional
|
20 |
+
|
21 |
+
from .. import constants
|
22 |
+
|
23 |
+
|
24 |
+
class HfFolder:
|
25 |
+
path_token = Path(constants.HF_TOKEN_PATH)
|
26 |
+
# Private attribute. Will be removed in v0.15
|
27 |
+
_old_path_token = Path(constants._OLD_HF_TOKEN_PATH)
|
28 |
+
|
29 |
+
# TODO: deprecate when adapted in transformers/datasets/gradio
|
30 |
+
# @_deprecate_method(version="1.0", message="Use `huggingface_hub.login` instead.")
|
31 |
+
@classmethod
|
32 |
+
def save_token(cls, token: str) -> None:
|
33 |
+
"""
|
34 |
+
Save token, creating folder as needed.
|
35 |
+
|
36 |
+
Token is saved in the huggingface home folder. You can configure it by setting
|
37 |
+
the `HF_HOME` environment variable.
|
38 |
+
|
39 |
+
Args:
|
40 |
+
token (`str`):
|
41 |
+
The token to save to the [`HfFolder`]
|
42 |
+
"""
|
43 |
+
cls.path_token.parent.mkdir(parents=True, exist_ok=True)
|
44 |
+
cls.path_token.write_text(token)
|
45 |
+
|
46 |
+
# TODO: deprecate when adapted in transformers/datasets/gradio
|
47 |
+
# @_deprecate_method(version="1.0", message="Use `huggingface_hub.get_token` instead.")
|
48 |
+
@classmethod
|
49 |
+
def get_token(cls) -> Optional[str]:
|
50 |
+
"""
|
51 |
+
Get token or None if not existent.
|
52 |
+
|
53 |
+
Note that a token can be also provided using the `HF_TOKEN` environment variable.
|
54 |
+
|
55 |
+
Token is saved in the huggingface home folder. You can configure it by setting
|
56 |
+
the `HF_HOME` environment variable. Previous location was `~/.huggingface/token`.
|
57 |
+
If token is found in old location but not in new location, it is copied there first.
|
58 |
+
For more details, see https://github.com/huggingface/huggingface_hub/issues/1232.
|
59 |
+
|
60 |
+
Returns:
|
61 |
+
`str` or `None`: The token, `None` if it doesn't exist.
|
62 |
+
"""
|
63 |
+
# 0. Check if token exist in old path but not new location
|
64 |
+
try:
|
65 |
+
cls._copy_to_new_path_and_warn()
|
66 |
+
except Exception: # if not possible (e.g. PermissionError), do not raise
|
67 |
+
pass
|
68 |
+
|
69 |
+
# 1. Is it set by environment variable ?
|
70 |
+
token: Optional[str] = os.environ.get("HF_TOKEN")
|
71 |
+
if token is None: # Ensure backward compatibility but doesn't have priority
|
72 |
+
token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
73 |
+
if token is not None:
|
74 |
+
token = token.replace("\r", "").replace("\n", "").strip()
|
75 |
+
if token != "":
|
76 |
+
return token
|
77 |
+
|
78 |
+
# 2. Is it set in token path ?
|
79 |
+
try:
|
80 |
+
token = cls.path_token.read_text()
|
81 |
+
token = token.replace("\r", "").replace("\n", "").strip()
|
82 |
+
return token
|
83 |
+
except FileNotFoundError:
|
84 |
+
return None
|
85 |
+
|
86 |
+
# TODO: deprecate when adapted in transformers/datasets/gradio
|
87 |
+
# @_deprecate_method(version="1.0", message="Use `huggingface_hub.logout` instead.")
|
88 |
+
@classmethod
|
89 |
+
def delete_token(cls) -> None:
|
90 |
+
"""
|
91 |
+
Deletes the token from storage. Does not fail if token does not exist.
|
92 |
+
"""
|
93 |
+
try:
|
94 |
+
cls.path_token.unlink()
|
95 |
+
except FileNotFoundError:
|
96 |
+
pass
|
97 |
+
|
98 |
+
try:
|
99 |
+
cls._old_path_token.unlink()
|
100 |
+
except FileNotFoundError:
|
101 |
+
pass
|
102 |
+
|
103 |
+
@classmethod
|
104 |
+
def _copy_to_new_path_and_warn(cls):
|
105 |
+
if cls._old_path_token.exists() and not cls.path_token.exists():
|
106 |
+
cls.save_token(cls._old_path_token.read_text())
|
107 |
+
warnings.warn(
|
108 |
+
f"A token has been found in `{cls._old_path_token}`. This is the old"
|
109 |
+
" path where tokens were stored. The new location is"
|
110 |
+
f" `{cls.path_token}` which is configurable using `HF_HOME` environment"
|
111 |
+
" variable. Your token has been copied to this new location. You can"
|
112 |
+
" now safely delete the old token file manually or use"
|
113 |
+
" `huggingface-cli logout`."
|
114 |
+
)
|
lib/python3.11/site-packages/huggingface_hub/utils/_http.py
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to handle HTTP requests in Huggingface Hub."""
|
16 |
+
import io
|
17 |
+
import os
|
18 |
+
import threading
|
19 |
+
import time
|
20 |
+
import uuid
|
21 |
+
from functools import lru_cache
|
22 |
+
from http import HTTPStatus
|
23 |
+
from typing import Callable, Tuple, Type, Union
|
24 |
+
|
25 |
+
import requests
|
26 |
+
from requests import Response
|
27 |
+
from requests.adapters import HTTPAdapter
|
28 |
+
from requests.models import PreparedRequest
|
29 |
+
|
30 |
+
from .. import constants
|
31 |
+
from . import logging
|
32 |
+
from ._typing import HTTP_METHOD_T
|
33 |
+
|
34 |
+
|
35 |
+
logger = logging.get_logger(__name__)
|
36 |
+
|
37 |
+
# Both headers are used by the Hub to debug failed requests.
|
38 |
+
# `X_AMZN_TRACE_ID` is better as it also works to debug on Cloudfront and ALB.
|
39 |
+
# If `X_AMZN_TRACE_ID` is set, the Hub will use it as well.
|
40 |
+
X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"
|
41 |
+
X_REQUEST_ID = "x-request-id"
|
42 |
+
|
43 |
+
|
44 |
+
class OfflineModeIsEnabled(ConnectionError):
|
45 |
+
"""Raised when a request is made but `HF_HUB_OFFLINE=1` is set as environment variable."""
|
46 |
+
|
47 |
+
|
48 |
+
class UniqueRequestIdAdapter(HTTPAdapter):
|
49 |
+
X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"
|
50 |
+
|
51 |
+
def add_headers(self, request, **kwargs):
|
52 |
+
super().add_headers(request, **kwargs)
|
53 |
+
|
54 |
+
# Add random request ID => easier for server-side debug
|
55 |
+
if X_AMZN_TRACE_ID not in request.headers:
|
56 |
+
request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())
|
57 |
+
|
58 |
+
# Add debug log
|
59 |
+
has_token = str(request.headers.get("authorization", "")).startswith("Bearer hf_")
|
60 |
+
logger.debug(
|
61 |
+
f"Request {request.headers[X_AMZN_TRACE_ID]}: {request.method} {request.url} (authenticated: {has_token})"
|
62 |
+
)
|
63 |
+
|
64 |
+
def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
|
65 |
+
"""Catch any RequestException to append request id to the error message for debugging."""
|
66 |
+
try:
|
67 |
+
return super().send(request, *args, **kwargs)
|
68 |
+
except requests.RequestException as e:
|
69 |
+
request_id = request.headers.get(X_AMZN_TRACE_ID)
|
70 |
+
if request_id is not None:
|
71 |
+
# Taken from https://stackoverflow.com/a/58270258
|
72 |
+
e.args = (*e.args, f"(Request ID: {request_id})")
|
73 |
+
raise
|
74 |
+
|
75 |
+
|
76 |
+
class OfflineAdapter(HTTPAdapter):
|
77 |
+
def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
|
78 |
+
raise OfflineModeIsEnabled(
|
79 |
+
f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
|
80 |
+
)
|
81 |
+
|
82 |
+
|
83 |
+
def _default_backend_factory() -> requests.Session:
|
84 |
+
session = requests.Session()
|
85 |
+
if constants.HF_HUB_OFFLINE:
|
86 |
+
session.mount("http://", OfflineAdapter())
|
87 |
+
session.mount("https://", OfflineAdapter())
|
88 |
+
else:
|
89 |
+
session.mount("http://", UniqueRequestIdAdapter())
|
90 |
+
session.mount("https://", UniqueRequestIdAdapter())
|
91 |
+
return session
|
92 |
+
|
93 |
+
|
94 |
+
BACKEND_FACTORY_T = Callable[[], requests.Session]
|
95 |
+
_GLOBAL_BACKEND_FACTORY: BACKEND_FACTORY_T = _default_backend_factory
|
96 |
+
|
97 |
+
|
98 |
+
def configure_http_backend(backend_factory: BACKEND_FACTORY_T = _default_backend_factory) -> None:
|
99 |
+
"""
|
100 |
+
Configure the HTTP backend by providing a `backend_factory`. Any HTTP calls made by `huggingface_hub` will use a
|
101 |
+
Session object instantiated by this factory. This can be useful if you are running your scripts in a specific
|
102 |
+
environment requiring custom configuration (e.g. custom proxy or certifications).
|
103 |
+
|
104 |
+
Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
|
105 |
+
`huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
|
106 |
+
set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
|
107 |
+
calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
|
108 |
+
|
109 |
+
See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
|
110 |
+
|
111 |
+
Example:
|
112 |
+
```py
|
113 |
+
import requests
|
114 |
+
from huggingface_hub import configure_http_backend, get_session
|
115 |
+
|
116 |
+
# Create a factory function that returns a Session with configured proxies
|
117 |
+
def backend_factory() -> requests.Session:
|
118 |
+
session = requests.Session()
|
119 |
+
session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
|
120 |
+
return session
|
121 |
+
|
122 |
+
# Set it as the default session factory
|
123 |
+
configure_http_backend(backend_factory=backend_factory)
|
124 |
+
|
125 |
+
# In practice, this is mostly done internally in `huggingface_hub`
|
126 |
+
session = get_session()
|
127 |
+
```
|
128 |
+
"""
|
129 |
+
global _GLOBAL_BACKEND_FACTORY
|
130 |
+
_GLOBAL_BACKEND_FACTORY = backend_factory
|
131 |
+
reset_sessions()
|
132 |
+
|
133 |
+
|
134 |
+
def get_session() -> requests.Session:
|
135 |
+
"""
|
136 |
+
Get a `requests.Session` object, using the session factory from the user.
|
137 |
+
|
138 |
+
Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
|
139 |
+
`huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
|
140 |
+
set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
|
141 |
+
calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
|
142 |
+
|
143 |
+
See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
|
144 |
+
|
145 |
+
Example:
|
146 |
+
```py
|
147 |
+
import requests
|
148 |
+
from huggingface_hub import configure_http_backend, get_session
|
149 |
+
|
150 |
+
# Create a factory function that returns a Session with configured proxies
|
151 |
+
def backend_factory() -> requests.Session:
|
152 |
+
session = requests.Session()
|
153 |
+
session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
|
154 |
+
return session
|
155 |
+
|
156 |
+
# Set it as the default session factory
|
157 |
+
configure_http_backend(backend_factory=backend_factory)
|
158 |
+
|
159 |
+
# In practice, this is mostly done internally in `huggingface_hub`
|
160 |
+
session = get_session()
|
161 |
+
```
|
162 |
+
"""
|
163 |
+
return _get_session_from_cache(process_id=os.getpid(), thread_id=threading.get_ident())
|
164 |
+
|
165 |
+
|
166 |
+
def reset_sessions() -> None:
|
167 |
+
"""Reset the cache of sessions.
|
168 |
+
|
169 |
+
Mostly used internally when sessions are reconfigured or an SSLError is raised.
|
170 |
+
See [`configure_http_backend`] for more details.
|
171 |
+
"""
|
172 |
+
_get_session_from_cache.cache_clear()
|
173 |
+
|
174 |
+
|
175 |
+
@lru_cache
|
176 |
+
def _get_session_from_cache(process_id: int, thread_id: int) -> requests.Session:
|
177 |
+
"""
|
178 |
+
Create a new session per thread using global factory. Using LRU cache (maxsize 128) to avoid memory leaks when
|
179 |
+
using thousands of threads. Cache is cleared when `configure_http_backend` is called.
|
180 |
+
"""
|
181 |
+
return _GLOBAL_BACKEND_FACTORY()
|
182 |
+
|
183 |
+
|
184 |
+
def http_backoff(
|
185 |
+
method: HTTP_METHOD_T,
|
186 |
+
url: str,
|
187 |
+
*,
|
188 |
+
max_retries: int = 5,
|
189 |
+
base_wait_time: float = 1,
|
190 |
+
max_wait_time: float = 8,
|
191 |
+
retry_on_exceptions: Union[Type[Exception], Tuple[Type[Exception], ...]] = (
|
192 |
+
requests.Timeout,
|
193 |
+
requests.ConnectionError,
|
194 |
+
),
|
195 |
+
retry_on_status_codes: Union[int, Tuple[int, ...]] = HTTPStatus.SERVICE_UNAVAILABLE,
|
196 |
+
**kwargs,
|
197 |
+
) -> Response:
|
198 |
+
"""Wrapper around requests to retry calls on an endpoint, with exponential backoff.
|
199 |
+
|
200 |
+
Endpoint call is retried on exceptions (ex: connection timeout, proxy error,...)
|
201 |
+
and/or on specific status codes (ex: service unavailable). If the call failed more
|
202 |
+
than `max_retries`, the exception is thrown or `raise_for_status` is called on the
|
203 |
+
response object.
|
204 |
+
|
205 |
+
Re-implement mechanisms from the `backoff` library to avoid adding an external
|
206 |
+
dependencies to `hugging_face_hub`. See https://github.com/litl/backoff.
|
207 |
+
|
208 |
+
Args:
|
209 |
+
method (`Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]`):
|
210 |
+
HTTP method to perform.
|
211 |
+
url (`str`):
|
212 |
+
The URL of the resource to fetch.
|
213 |
+
max_retries (`int`, *optional*, defaults to `5`):
|
214 |
+
Maximum number of retries, defaults to 5 (no retries).
|
215 |
+
base_wait_time (`float`, *optional*, defaults to `1`):
|
216 |
+
Duration (in seconds) to wait before retrying the first time.
|
217 |
+
Wait time between retries then grows exponentially, capped by
|
218 |
+
`max_wait_time`.
|
219 |
+
max_wait_time (`float`, *optional*, defaults to `8`):
|
220 |
+
Maximum duration (in seconds) to wait before retrying.
|
221 |
+
retry_on_exceptions (`Type[Exception]` or `Tuple[Type[Exception]]`, *optional*):
|
222 |
+
Define which exceptions must be caught to retry the request. Can be a single type or a tuple of types.
|
223 |
+
By default, retry on `requests.Timeout` and `requests.ConnectionError`.
|
224 |
+
retry_on_status_codes (`int` or `Tuple[int]`, *optional*, defaults to `503`):
|
225 |
+
Define on which status codes the request must be retried. By default, only
|
226 |
+
HTTP 503 Service Unavailable is retried.
|
227 |
+
**kwargs (`dict`, *optional*):
|
228 |
+
kwargs to pass to `requests.request`.
|
229 |
+
|
230 |
+
Example:
|
231 |
+
```
|
232 |
+
>>> from huggingface_hub.utils import http_backoff
|
233 |
+
|
234 |
+
# Same usage as "requests.request".
|
235 |
+
>>> response = http_backoff("GET", "https://www.google.com")
|
236 |
+
>>> response.raise_for_status()
|
237 |
+
|
238 |
+
# If you expect a Gateway Timeout from time to time
|
239 |
+
>>> http_backoff("PUT", upload_url, data=data, retry_on_status_codes=504)
|
240 |
+
>>> response.raise_for_status()
|
241 |
+
```
|
242 |
+
|
243 |
+
<Tip warning={true}>
|
244 |
+
|
245 |
+
When using `requests` it is possible to stream data by passing an iterator to the
|
246 |
+
`data` argument. On http backoff this is a problem as the iterator is not reset
|
247 |
+
after a failed call. This issue is mitigated for file objects or any IO streams
|
248 |
+
by saving the initial position of the cursor (with `data.tell()`) and resetting the
|
249 |
+
cursor between each call (with `data.seek()`). For arbitrary iterators, http backoff
|
250 |
+
will fail. If this is a hard constraint for you, please let us know by opening an
|
251 |
+
issue on [Github](https://github.com/huggingface/huggingface_hub).
|
252 |
+
|
253 |
+
</Tip>
|
254 |
+
"""
|
255 |
+
if isinstance(retry_on_exceptions, type): # Tuple from single exception type
|
256 |
+
retry_on_exceptions = (retry_on_exceptions,)
|
257 |
+
|
258 |
+
if isinstance(retry_on_status_codes, int): # Tuple from single status code
|
259 |
+
retry_on_status_codes = (retry_on_status_codes,)
|
260 |
+
|
261 |
+
nb_tries = 0
|
262 |
+
sleep_time = base_wait_time
|
263 |
+
|
264 |
+
# If `data` is used and is a file object (or any IO), it will be consumed on the
|
265 |
+
# first HTTP request. We need to save the initial position so that the full content
|
266 |
+
# of the file is re-sent on http backoff. See warning tip in docstring.
|
267 |
+
io_obj_initial_pos = None
|
268 |
+
if "data" in kwargs and isinstance(kwargs["data"], io.IOBase):
|
269 |
+
io_obj_initial_pos = kwargs["data"].tell()
|
270 |
+
|
271 |
+
session = get_session()
|
272 |
+
while True:
|
273 |
+
nb_tries += 1
|
274 |
+
try:
|
275 |
+
# If `data` is used and is a file object (or any IO), set back cursor to
|
276 |
+
# initial position.
|
277 |
+
if io_obj_initial_pos is not None:
|
278 |
+
kwargs["data"].seek(io_obj_initial_pos)
|
279 |
+
|
280 |
+
# Perform request and return if status_code is not in the retry list.
|
281 |
+
response = session.request(method=method, url=url, **kwargs)
|
282 |
+
if response.status_code not in retry_on_status_codes:
|
283 |
+
return response
|
284 |
+
|
285 |
+
# Wrong status code returned (HTTP 503 for instance)
|
286 |
+
logger.warning(f"HTTP Error {response.status_code} thrown while requesting {method} {url}")
|
287 |
+
if nb_tries > max_retries:
|
288 |
+
response.raise_for_status() # Will raise uncaught exception
|
289 |
+
# We return response to avoid infinite loop in the corner case where the
|
290 |
+
# user ask for retry on a status code that doesn't raise_for_status.
|
291 |
+
return response
|
292 |
+
|
293 |
+
except retry_on_exceptions as err:
|
294 |
+
logger.warning(f"'{err}' thrown while requesting {method} {url}")
|
295 |
+
|
296 |
+
if isinstance(err, requests.ConnectionError):
|
297 |
+
reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
|
298 |
+
|
299 |
+
if nb_tries > max_retries:
|
300 |
+
raise err
|
301 |
+
|
302 |
+
# Sleep for X seconds
|
303 |
+
logger.warning(f"Retrying in {sleep_time}s [Retry {nb_tries}/{max_retries}].")
|
304 |
+
time.sleep(sleep_time)
|
305 |
+
|
306 |
+
# Update sleep time for next retry
|
307 |
+
sleep_time = min(max_wait_time, sleep_time * 2) # Exponential backoff
|
lib/python3.11/site-packages/huggingface_hub/utils/_pagination.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to handle pagination on Huggingface Hub."""
|
16 |
+
from typing import Dict, Iterable, Optional
|
17 |
+
|
18 |
+
import requests
|
19 |
+
|
20 |
+
from . import get_session, hf_raise_for_status, logging
|
21 |
+
|
22 |
+
|
23 |
+
logger = logging.get_logger(__name__)
|
24 |
+
|
25 |
+
|
26 |
+
def paginate(path: str, params: Dict, headers: Dict) -> Iterable:
|
27 |
+
"""Fetch a list of models/datasets/spaces and paginate through results.
|
28 |
+
|
29 |
+
This is using the same "Link" header format as GitHub.
|
30 |
+
See:
|
31 |
+
- https://requests.readthedocs.io/en/latest/api/#requests.Response.links
|
32 |
+
- https://docs.github.com/en/rest/guides/traversing-with-pagination#link-header
|
33 |
+
"""
|
34 |
+
session = get_session()
|
35 |
+
r = session.get(path, params=params, headers=headers)
|
36 |
+
hf_raise_for_status(r)
|
37 |
+
yield from r.json()
|
38 |
+
|
39 |
+
# Follow pages
|
40 |
+
# Next link already contains query params
|
41 |
+
next_page = _get_next_page(r)
|
42 |
+
while next_page is not None:
|
43 |
+
logger.debug(f"Pagination detected. Requesting next page: {next_page}")
|
44 |
+
r = session.get(next_page, headers=headers)
|
45 |
+
hf_raise_for_status(r)
|
46 |
+
yield from r.json()
|
47 |
+
next_page = _get_next_page(r)
|
48 |
+
|
49 |
+
|
50 |
+
def _get_next_page(response: requests.Response) -> Optional[str]:
|
51 |
+
return response.links.get("next", {}).get("url")
|
lib/python3.11/site-packages/huggingface_hub/utils/_paths.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to handle paths in Huggingface Hub."""
|
16 |
+
from fnmatch import fnmatch
|
17 |
+
from pathlib import Path
|
18 |
+
from typing import Callable, Generator, Iterable, List, Optional, TypeVar, Union
|
19 |
+
|
20 |
+
|
21 |
+
T = TypeVar("T")
|
22 |
+
|
23 |
+
IGNORE_GIT_FOLDER_PATTERNS = [".git", ".git/*", "*/.git", "**/.git/**"]
|
24 |
+
|
25 |
+
|
26 |
+
def filter_repo_objects(
|
27 |
+
items: Iterable[T],
|
28 |
+
*,
|
29 |
+
allow_patterns: Optional[Union[List[str], str]] = None,
|
30 |
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
31 |
+
key: Optional[Callable[[T], str]] = None,
|
32 |
+
) -> Generator[T, None, None]:
|
33 |
+
"""Filter repo objects based on an allowlist and a denylist.
|
34 |
+
|
35 |
+
Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
|
36 |
+
In the later case, `key` must be provided and specifies a function of one argument
|
37 |
+
that is used to extract a path from each element in iterable.
|
38 |
+
|
39 |
+
Patterns are Unix shell-style wildcards which are NOT regular expressions. See
|
40 |
+
https://docs.python.org/3/library/fnmatch.html for more details.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
items (`Iterable`):
|
44 |
+
List of items to filter.
|
45 |
+
allow_patterns (`str` or `List[str]`, *optional*):
|
46 |
+
Patterns constituting the allowlist. If provided, item paths must match at
|
47 |
+
least one pattern from the allowlist.
|
48 |
+
ignore_patterns (`str` or `List[str]`, *optional*):
|
49 |
+
Patterns constituting the denylist. If provided, item paths must not match
|
50 |
+
any patterns from the denylist.
|
51 |
+
key (`Callable[[T], str]`, *optional*):
|
52 |
+
Single-argument function to extract a path from each item. If not provided,
|
53 |
+
the `items` must already be `str` or `Path`.
|
54 |
+
|
55 |
+
Returns:
|
56 |
+
Filtered list of objects, as a generator.
|
57 |
+
|
58 |
+
Raises:
|
59 |
+
:class:`ValueError`:
|
60 |
+
If `key` is not provided and items are not `str` or `Path`.
|
61 |
+
|
62 |
+
Example usage with paths:
|
63 |
+
```python
|
64 |
+
>>> # Filter only PDFs that are not hidden.
|
65 |
+
>>> list(filter_repo_objects(
|
66 |
+
... ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
|
67 |
+
... allow_patterns=["*.pdf"],
|
68 |
+
... ignore_patterns=[".*"],
|
69 |
+
... ))
|
70 |
+
["aaa.pdf"]
|
71 |
+
```
|
72 |
+
|
73 |
+
Example usage with objects:
|
74 |
+
```python
|
75 |
+
>>> list(filter_repo_objects(
|
76 |
+
... [
|
77 |
+
... CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")
|
78 |
+
... CommitOperationAdd(path_or_fileobj="/tmp/bbb.jpg", path_in_repo="bbb.jpg")
|
79 |
+
... CommitOperationAdd(path_or_fileobj="/tmp/.ccc.pdf", path_in_repo=".ccc.pdf")
|
80 |
+
... CommitOperationAdd(path_or_fileobj="/tmp/.ddd.png", path_in_repo=".ddd.png")
|
81 |
+
... ],
|
82 |
+
... allow_patterns=["*.pdf"],
|
83 |
+
... ignore_patterns=[".*"],
|
84 |
+
... key=lambda x: x.repo_in_path
|
85 |
+
... ))
|
86 |
+
[CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")]
|
87 |
+
```
|
88 |
+
"""
|
89 |
+
if isinstance(allow_patterns, str):
|
90 |
+
allow_patterns = [allow_patterns]
|
91 |
+
|
92 |
+
if isinstance(ignore_patterns, str):
|
93 |
+
ignore_patterns = [ignore_patterns]
|
94 |
+
|
95 |
+
if key is None:
|
96 |
+
|
97 |
+
def _identity(item: T) -> str:
|
98 |
+
if isinstance(item, str):
|
99 |
+
return item
|
100 |
+
if isinstance(item, Path):
|
101 |
+
return str(item)
|
102 |
+
raise ValueError(f"Please provide `key` argument in `filter_repo_objects`: `{item}` is not a string.")
|
103 |
+
|
104 |
+
key = _identity # Items must be `str` or `Path`, otherwise raise ValueError
|
105 |
+
|
106 |
+
for item in items:
|
107 |
+
path = key(item)
|
108 |
+
|
109 |
+
# Skip if there's an allowlist and path doesn't match any
|
110 |
+
if allow_patterns is not None and not any(fnmatch(path, r) for r in allow_patterns):
|
111 |
+
continue
|
112 |
+
|
113 |
+
# Skip if there's a denylist and path matches any
|
114 |
+
if ignore_patterns is not None and any(fnmatch(path, r) for r in ignore_patterns):
|
115 |
+
continue
|
116 |
+
|
117 |
+
yield item
|
lib/python3.11/site-packages/huggingface_hub/utils/_runtime.py
ADDED
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Check presence of installed packages at runtime."""
|
16 |
+
import importlib.metadata
|
17 |
+
import platform
|
18 |
+
import sys
|
19 |
+
import warnings
|
20 |
+
from typing import Any, Dict
|
21 |
+
|
22 |
+
from .. import __version__, constants
|
23 |
+
|
24 |
+
|
25 |
+
_PY_VERSION: str = sys.version.split()[0].rstrip("+")
|
26 |
+
|
27 |
+
_package_versions = {}
|
28 |
+
|
29 |
+
_CANDIDATES = {
|
30 |
+
"aiohttp": {"aiohttp"},
|
31 |
+
"fastai": {"fastai"},
|
32 |
+
"fastcore": {"fastcore"},
|
33 |
+
"gradio": {"gradio"},
|
34 |
+
"graphviz": {"graphviz"},
|
35 |
+
"hf_transfer": {"hf_transfer"},
|
36 |
+
"jinja": {"Jinja2"},
|
37 |
+
"numpy": {"numpy"},
|
38 |
+
"pillow": {"Pillow"},
|
39 |
+
"pydantic": {"pydantic"},
|
40 |
+
"pydot": {"pydot"},
|
41 |
+
"tensorboard": {"tensorboardX"},
|
42 |
+
"tensorflow": (
|
43 |
+
"tensorflow",
|
44 |
+
"tensorflow-cpu",
|
45 |
+
"tensorflow-gpu",
|
46 |
+
"tf-nightly",
|
47 |
+
"tf-nightly-cpu",
|
48 |
+
"tf-nightly-gpu",
|
49 |
+
"intel-tensorflow",
|
50 |
+
"intel-tensorflow-avx512",
|
51 |
+
"tensorflow-rocm",
|
52 |
+
"tensorflow-macos",
|
53 |
+
),
|
54 |
+
"torch": {"torch"},
|
55 |
+
}
|
56 |
+
|
57 |
+
# Check once at runtime
|
58 |
+
for candidate_name, package_names in _CANDIDATES.items():
|
59 |
+
_package_versions[candidate_name] = "N/A"
|
60 |
+
for name in package_names:
|
61 |
+
try:
|
62 |
+
_package_versions[candidate_name] = importlib.metadata.version(name)
|
63 |
+
break
|
64 |
+
except importlib.metadata.PackageNotFoundError:
|
65 |
+
pass
|
66 |
+
|
67 |
+
|
68 |
+
def _get_version(package_name: str) -> str:
|
69 |
+
return _package_versions.get(package_name, "N/A")
|
70 |
+
|
71 |
+
|
72 |
+
def _is_available(package_name: str) -> bool:
|
73 |
+
return _get_version(package_name) != "N/A"
|
74 |
+
|
75 |
+
|
76 |
+
# Python
|
77 |
+
def get_python_version() -> str:
|
78 |
+
return _PY_VERSION
|
79 |
+
|
80 |
+
|
81 |
+
# Huggingface Hub
|
82 |
+
def get_hf_hub_version() -> str:
|
83 |
+
return __version__
|
84 |
+
|
85 |
+
|
86 |
+
# aiohttp
|
87 |
+
def is_aiohttp_available() -> bool:
|
88 |
+
return _is_available("aiohttp")
|
89 |
+
|
90 |
+
|
91 |
+
def get_aiohttp_version() -> str:
|
92 |
+
return _get_version("aiohttp")
|
93 |
+
|
94 |
+
|
95 |
+
# FastAI
|
96 |
+
def is_fastai_available() -> bool:
|
97 |
+
return _is_available("fastai")
|
98 |
+
|
99 |
+
|
100 |
+
def get_fastai_version() -> str:
|
101 |
+
return _get_version("fastai")
|
102 |
+
|
103 |
+
|
104 |
+
# Fastcore
|
105 |
+
def is_fastcore_available() -> bool:
|
106 |
+
return _is_available("fastcore")
|
107 |
+
|
108 |
+
|
109 |
+
def get_fastcore_version() -> str:
|
110 |
+
return _get_version("fastcore")
|
111 |
+
|
112 |
+
|
113 |
+
# FastAI
|
114 |
+
def is_gradio_available() -> bool:
|
115 |
+
return _is_available("gradio")
|
116 |
+
|
117 |
+
|
118 |
+
def get_gradio_version() -> str:
|
119 |
+
return _get_version("gradio")
|
120 |
+
|
121 |
+
|
122 |
+
# Graphviz
|
123 |
+
def is_graphviz_available() -> bool:
|
124 |
+
return _is_available("graphviz")
|
125 |
+
|
126 |
+
|
127 |
+
def get_graphviz_version() -> str:
|
128 |
+
return _get_version("graphviz")
|
129 |
+
|
130 |
+
|
131 |
+
# hf_transfer
|
132 |
+
def is_hf_transfer_available() -> bool:
|
133 |
+
return _is_available("hf_transfer")
|
134 |
+
|
135 |
+
|
136 |
+
def get_hf_transfer_version() -> str:
|
137 |
+
return _get_version("hf_transfer")
|
138 |
+
|
139 |
+
|
140 |
+
# Numpy
|
141 |
+
def is_numpy_available() -> bool:
|
142 |
+
return _is_available("numpy")
|
143 |
+
|
144 |
+
|
145 |
+
def get_numpy_version() -> str:
|
146 |
+
return _get_version("numpy")
|
147 |
+
|
148 |
+
|
149 |
+
# Jinja
|
150 |
+
def is_jinja_available() -> bool:
|
151 |
+
return _is_available("jinja")
|
152 |
+
|
153 |
+
|
154 |
+
def get_jinja_version() -> str:
|
155 |
+
return _get_version("jinja")
|
156 |
+
|
157 |
+
|
158 |
+
# Pillow
|
159 |
+
def is_pillow_available() -> bool:
|
160 |
+
return _is_available("pillow")
|
161 |
+
|
162 |
+
|
163 |
+
def get_pillow_version() -> str:
|
164 |
+
return _get_version("pillow")
|
165 |
+
|
166 |
+
|
167 |
+
# Pydantic
|
168 |
+
def is_pydantic_available() -> bool:
|
169 |
+
if not _is_available("pydantic"):
|
170 |
+
return False
|
171 |
+
# For Pydantic, we add an extra check to test whether it is correctly installed or not. If both pydantic 2.x and
|
172 |
+
# typing_extensions<=4.5.0 are installed, then pydantic will fail at import time. This should not happen when
|
173 |
+
# it is installed with `pip install huggingface_hub[inference]` but it can happen when it is installed manually
|
174 |
+
# by the user in an environment that we don't control.
|
175 |
+
#
|
176 |
+
# Usually we won't need to do this kind of check on optional dependencies. However, pydantic is a special case
|
177 |
+
# as it is automatically imported when doing `from huggingface_hub import ...` even if the user doesn't use it.
|
178 |
+
#
|
179 |
+
# See https://github.com/huggingface/huggingface_hub/pull/1829 for more details.
|
180 |
+
try:
|
181 |
+
from pydantic import validator # noqa: F401
|
182 |
+
except ImportError:
|
183 |
+
# Example: "ImportError: cannot import name 'TypeAliasType' from 'typing_extensions'"
|
184 |
+
warnings.warn(
|
185 |
+
"Pydantic is installed but cannot be imported. Please check your installation. `huggingface_hub` will "
|
186 |
+
"default to not using Pydantic. Error message: '{e}'"
|
187 |
+
)
|
188 |
+
return False
|
189 |
+
return True
|
190 |
+
|
191 |
+
|
192 |
+
def get_pydantic_version() -> str:
|
193 |
+
return _get_version("pydantic")
|
194 |
+
|
195 |
+
|
196 |
+
# Pydot
|
197 |
+
def is_pydot_available() -> bool:
|
198 |
+
return _is_available("pydot")
|
199 |
+
|
200 |
+
|
201 |
+
def get_pydot_version() -> str:
|
202 |
+
return _get_version("pydot")
|
203 |
+
|
204 |
+
|
205 |
+
# Tensorboard
|
206 |
+
def is_tensorboard_available() -> bool:
|
207 |
+
return _is_available("tensorboard")
|
208 |
+
|
209 |
+
|
210 |
+
def get_tensorboard_version() -> str:
|
211 |
+
return _get_version("tensorboard")
|
212 |
+
|
213 |
+
|
214 |
+
# Tensorflow
|
215 |
+
def is_tf_available() -> bool:
|
216 |
+
return _is_available("tensorflow")
|
217 |
+
|
218 |
+
|
219 |
+
def get_tf_version() -> str:
|
220 |
+
return _get_version("tensorflow")
|
221 |
+
|
222 |
+
|
223 |
+
# Torch
|
224 |
+
def is_torch_available() -> bool:
|
225 |
+
return _is_available("torch")
|
226 |
+
|
227 |
+
|
228 |
+
def get_torch_version() -> str:
|
229 |
+
return _get_version("torch")
|
230 |
+
|
231 |
+
|
232 |
+
# Shell-related helpers
|
233 |
+
try:
|
234 |
+
# Set to `True` if script is running in a Google Colab notebook.
|
235 |
+
# If running in Google Colab, git credential store is set globally which makes the
|
236 |
+
# warning disappear. See https://github.com/huggingface/huggingface_hub/issues/1043
|
237 |
+
#
|
238 |
+
# Taken from https://stackoverflow.com/a/63519730.
|
239 |
+
_is_google_colab = "google.colab" in str(get_ipython()) # type: ignore # noqa: F821
|
240 |
+
except NameError:
|
241 |
+
_is_google_colab = False
|
242 |
+
|
243 |
+
|
244 |
+
def is_notebook() -> bool:
|
245 |
+
"""Return `True` if code is executed in a notebook (Jupyter, Colab, QTconsole).
|
246 |
+
|
247 |
+
Taken from https://stackoverflow.com/a/39662359.
|
248 |
+
Adapted to make it work with Google colab as well.
|
249 |
+
"""
|
250 |
+
try:
|
251 |
+
shell_class = get_ipython().__class__ # type: ignore # noqa: F821
|
252 |
+
for parent_class in shell_class.__mro__: # e.g. "is subclass of"
|
253 |
+
if parent_class.__name__ == "ZMQInteractiveShell":
|
254 |
+
return True # Jupyter notebook, Google colab or qtconsole
|
255 |
+
return False
|
256 |
+
except NameError:
|
257 |
+
return False # Probably standard Python interpreter
|
258 |
+
|
259 |
+
|
260 |
+
def is_google_colab() -> bool:
|
261 |
+
"""Return `True` if code is executed in a Google colab.
|
262 |
+
|
263 |
+
Taken from https://stackoverflow.com/a/63519730.
|
264 |
+
"""
|
265 |
+
return _is_google_colab
|
266 |
+
|
267 |
+
|
268 |
+
def dump_environment_info() -> Dict[str, Any]:
|
269 |
+
"""Dump information about the machine to help debugging issues.
|
270 |
+
|
271 |
+
Similar helper exist in:
|
272 |
+
- `datasets` (https://github.com/huggingface/datasets/blob/main/src/datasets/commands/env.py)
|
273 |
+
- `diffusers` (https://github.com/huggingface/diffusers/blob/main/src/diffusers/commands/env.py)
|
274 |
+
- `transformers` (https://github.com/huggingface/transformers/blob/main/src/transformers/commands/env.py)
|
275 |
+
"""
|
276 |
+
from huggingface_hub import get_token, whoami
|
277 |
+
from huggingface_hub.utils import list_credential_helpers
|
278 |
+
|
279 |
+
token = get_token()
|
280 |
+
|
281 |
+
# Generic machine info
|
282 |
+
info: Dict[str, Any] = {
|
283 |
+
"huggingface_hub version": get_hf_hub_version(),
|
284 |
+
"Platform": platform.platform(),
|
285 |
+
"Python version": get_python_version(),
|
286 |
+
}
|
287 |
+
|
288 |
+
# Interpreter info
|
289 |
+
try:
|
290 |
+
shell_class = get_ipython().__class__ # type: ignore # noqa: F821
|
291 |
+
info["Running in iPython ?"] = "Yes"
|
292 |
+
info["iPython shell"] = shell_class.__name__
|
293 |
+
except NameError:
|
294 |
+
info["Running in iPython ?"] = "No"
|
295 |
+
info["Running in notebook ?"] = "Yes" if is_notebook() else "No"
|
296 |
+
info["Running in Google Colab ?"] = "Yes" if is_google_colab() else "No"
|
297 |
+
|
298 |
+
# Login info
|
299 |
+
info["Token path ?"] = constants.HF_TOKEN_PATH
|
300 |
+
info["Has saved token ?"] = token is not None
|
301 |
+
if token is not None:
|
302 |
+
try:
|
303 |
+
info["Who am I ?"] = whoami()["name"]
|
304 |
+
except Exception:
|
305 |
+
pass
|
306 |
+
|
307 |
+
try:
|
308 |
+
info["Configured git credential helpers"] = ", ".join(list_credential_helpers())
|
309 |
+
except Exception:
|
310 |
+
pass
|
311 |
+
|
312 |
+
# Installed dependencies
|
313 |
+
info["FastAI"] = get_fastai_version()
|
314 |
+
info["Tensorflow"] = get_tf_version()
|
315 |
+
info["Torch"] = get_torch_version()
|
316 |
+
info["Jinja2"] = get_jinja_version()
|
317 |
+
info["Graphviz"] = get_graphviz_version()
|
318 |
+
info["Pydot"] = get_pydot_version()
|
319 |
+
info["Pillow"] = get_pillow_version()
|
320 |
+
info["hf_transfer"] = get_hf_transfer_version()
|
321 |
+
info["gradio"] = get_gradio_version()
|
322 |
+
info["tensorboard"] = get_tensorboard_version()
|
323 |
+
info["numpy"] = get_numpy_version()
|
324 |
+
info["pydantic"] = get_pydantic_version()
|
325 |
+
info["aiohttp"] = get_aiohttp_version()
|
326 |
+
|
327 |
+
# Environment variables
|
328 |
+
info["ENDPOINT"] = constants.ENDPOINT
|
329 |
+
info["HF_HUB_CACHE"] = constants.HF_HUB_CACHE
|
330 |
+
info["HF_ASSETS_CACHE"] = constants.HF_ASSETS_CACHE
|
331 |
+
info["HF_TOKEN_PATH"] = constants.HF_TOKEN_PATH
|
332 |
+
info["HF_HUB_OFFLINE"] = constants.HF_HUB_OFFLINE
|
333 |
+
info["HF_HUB_DISABLE_TELEMETRY"] = constants.HF_HUB_DISABLE_TELEMETRY
|
334 |
+
info["HF_HUB_DISABLE_PROGRESS_BARS"] = constants.HF_HUB_DISABLE_PROGRESS_BARS
|
335 |
+
info["HF_HUB_DISABLE_SYMLINKS_WARNING"] = constants.HF_HUB_DISABLE_SYMLINKS_WARNING
|
336 |
+
info["HF_HUB_DISABLE_EXPERIMENTAL_WARNING"] = constants.HF_HUB_DISABLE_EXPERIMENTAL_WARNING
|
337 |
+
info["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = constants.HF_HUB_DISABLE_IMPLICIT_TOKEN
|
338 |
+
info["HF_HUB_ENABLE_HF_TRANSFER"] = constants.HF_HUB_ENABLE_HF_TRANSFER
|
339 |
+
info["HF_HUB_ETAG_TIMEOUT"] = constants.HF_HUB_ETAG_TIMEOUT
|
340 |
+
info["HF_HUB_DOWNLOAD_TIMEOUT"] = constants.HF_HUB_DOWNLOAD_TIMEOUT
|
341 |
+
|
342 |
+
print("\nCopy-and-paste the text below in your GitHub issue.\n")
|
343 |
+
print("\n".join([f"- {prop}: {val}" for prop, val in info.items()]) + "\n")
|
344 |
+
return info
|
lib/python3.11/site-packages/huggingface_hub/utils/_safetensors.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import functools
|
2 |
+
import operator
|
3 |
+
from collections import defaultdict
|
4 |
+
from dataclasses import dataclass, field
|
5 |
+
from typing import Dict, List, Literal, Optional, Tuple
|
6 |
+
|
7 |
+
|
8 |
+
FILENAME_T = str
|
9 |
+
TENSOR_NAME_T = str
|
10 |
+
DTYPE_T = Literal["F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL"]
|
11 |
+
|
12 |
+
|
13 |
+
class SafetensorsParsingError(Exception):
|
14 |
+
"""Raised when failing to parse a safetensors file metadata.
|
15 |
+
|
16 |
+
This can be the case if the file is not a safetensors file or does not respect the specification.
|
17 |
+
"""
|
18 |
+
|
19 |
+
|
20 |
+
class NotASafetensorsRepoError(Exception):
|
21 |
+
"""Raised when a repo is not a Safetensors repo i.e. doesn't have either a `model.safetensors` or a
|
22 |
+
`model.safetensors.index.json` file.
|
23 |
+
"""
|
24 |
+
|
25 |
+
|
26 |
+
@dataclass
|
27 |
+
class TensorInfo:
|
28 |
+
"""Information about a tensor.
|
29 |
+
|
30 |
+
For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
|
31 |
+
|
32 |
+
Attributes:
|
33 |
+
dtype (`str`):
|
34 |
+
The data type of the tensor ("F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL").
|
35 |
+
shape (`List[int]`):
|
36 |
+
The shape of the tensor.
|
37 |
+
data_offsets (`Tuple[int, int]`):
|
38 |
+
The offsets of the data in the file as a tuple `[BEGIN, END]`.
|
39 |
+
parameter_count (`int`):
|
40 |
+
The number of parameters in the tensor.
|
41 |
+
"""
|
42 |
+
|
43 |
+
dtype: DTYPE_T
|
44 |
+
shape: List[int]
|
45 |
+
data_offsets: Tuple[int, int]
|
46 |
+
parameter_count: int = field(init=False)
|
47 |
+
|
48 |
+
def __post_init__(self) -> None:
|
49 |
+
# Taken from https://stackoverflow.com/a/13840436
|
50 |
+
try:
|
51 |
+
self.parameter_count = functools.reduce(operator.mul, self.shape)
|
52 |
+
except TypeError:
|
53 |
+
self.parameter_count = 1 # scalar value has no shape
|
54 |
+
|
55 |
+
|
56 |
+
@dataclass
|
57 |
+
class SafetensorsFileMetadata:
|
58 |
+
"""Metadata for a Safetensors file hosted on the Hub.
|
59 |
+
|
60 |
+
This class is returned by [`parse_safetensors_file_metadata`].
|
61 |
+
|
62 |
+
For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
|
63 |
+
|
64 |
+
Attributes:
|
65 |
+
metadata (`Dict`):
|
66 |
+
The metadata contained in the file.
|
67 |
+
tensors (`Dict[str, TensorInfo]`):
|
68 |
+
A map of all tensors. Keys are tensor names and values are information about the corresponding tensor, as a
|
69 |
+
[`TensorInfo`] object.
|
70 |
+
parameter_count (`Dict[str, int]`):
|
71 |
+
A map of the number of parameters per data type. Keys are data types and values are the number of parameters
|
72 |
+
of that data type.
|
73 |
+
"""
|
74 |
+
|
75 |
+
metadata: Dict[str, str]
|
76 |
+
tensors: Dict[TENSOR_NAME_T, TensorInfo]
|
77 |
+
parameter_count: Dict[DTYPE_T, int] = field(init=False)
|
78 |
+
|
79 |
+
def __post_init__(self) -> None:
|
80 |
+
parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
|
81 |
+
for tensor in self.tensors.values():
|
82 |
+
parameter_count[tensor.dtype] += tensor.parameter_count
|
83 |
+
self.parameter_count = dict(parameter_count)
|
84 |
+
|
85 |
+
|
86 |
+
@dataclass
|
87 |
+
class SafetensorsRepoMetadata:
|
88 |
+
"""Metadata for a Safetensors repo.
|
89 |
+
|
90 |
+
A repo is considered to be a Safetensors repo if it contains either a 'model.safetensors' weight file (non-shared
|
91 |
+
model) or a 'model.safetensors.index.json' index file (sharded model) at its root.
|
92 |
+
|
93 |
+
This class is returned by [`get_safetensors_metadata`].
|
94 |
+
|
95 |
+
For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
|
96 |
+
|
97 |
+
Attributes:
|
98 |
+
metadata (`Dict`, *optional*):
|
99 |
+
The metadata contained in the 'model.safetensors.index.json' file, if it exists. Only populated for sharded
|
100 |
+
models.
|
101 |
+
sharded (`bool`):
|
102 |
+
Whether the repo contains a sharded model or not.
|
103 |
+
weight_map (`Dict[str, str]`):
|
104 |
+
A map of all weights. Keys are tensor names and values are filenames of the files containing the tensors.
|
105 |
+
files_metadata (`Dict[str, SafetensorsFileMetadata]`):
|
106 |
+
A map of all files metadata. Keys are filenames and values are the metadata of the corresponding file, as
|
107 |
+
a [`SafetensorsFileMetadata`] object.
|
108 |
+
parameter_count (`Dict[str, int]`):
|
109 |
+
A map of the number of parameters per data type. Keys are data types and values are the number of parameters
|
110 |
+
of that data type.
|
111 |
+
"""
|
112 |
+
|
113 |
+
metadata: Optional[Dict]
|
114 |
+
sharded: bool
|
115 |
+
weight_map: Dict[TENSOR_NAME_T, FILENAME_T] # tensor name -> filename
|
116 |
+
files_metadata: Dict[FILENAME_T, SafetensorsFileMetadata] # filename -> metadata
|
117 |
+
parameter_count: Dict[DTYPE_T, int] = field(init=False)
|
118 |
+
|
119 |
+
def __post_init__(self) -> None:
|
120 |
+
parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
|
121 |
+
for file_metadata in self.files_metadata.values():
|
122 |
+
for dtype, nb_parameters_ in file_metadata.parameter_count.items():
|
123 |
+
parameter_count[dtype] += nb_parameters_
|
124 |
+
self.parameter_count = dict(parameter_count)
|
lib/python3.11/site-packages/huggingface_hub/utils/_subprocess.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding=utf-8
|
3 |
+
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
4 |
+
#
|
5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
+
# you may not use this file except in compliance with the License.
|
7 |
+
# You may obtain a copy of the License at
|
8 |
+
#
|
9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
+
#
|
11 |
+
# Unless required by applicable law or agreed to in writing, software
|
12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
+
# See the License for the specific language governing permissions and
|
15 |
+
# limitations under the License
|
16 |
+
"""Contains utilities to easily handle subprocesses in `huggingface_hub`."""
|
17 |
+
import os
|
18 |
+
import subprocess
|
19 |
+
import sys
|
20 |
+
from contextlib import contextmanager
|
21 |
+
from io import StringIO
|
22 |
+
from pathlib import Path
|
23 |
+
from typing import IO, Generator, List, Optional, Tuple, Union
|
24 |
+
|
25 |
+
from .logging import get_logger
|
26 |
+
|
27 |
+
|
28 |
+
logger = get_logger(__name__)
|
29 |
+
|
30 |
+
|
31 |
+
@contextmanager
|
32 |
+
def capture_output() -> Generator[StringIO, None, None]:
|
33 |
+
"""Capture output that is printed to terminal.
|
34 |
+
|
35 |
+
Taken from https://stackoverflow.com/a/34738440
|
36 |
+
|
37 |
+
Example:
|
38 |
+
```py
|
39 |
+
>>> with capture_output() as output:
|
40 |
+
... print("hello world")
|
41 |
+
>>> assert output.getvalue() == "hello world\n"
|
42 |
+
```
|
43 |
+
"""
|
44 |
+
output = StringIO()
|
45 |
+
previous_output = sys.stdout
|
46 |
+
sys.stdout = output
|
47 |
+
yield output
|
48 |
+
sys.stdout = previous_output
|
49 |
+
|
50 |
+
|
51 |
+
def run_subprocess(
|
52 |
+
command: Union[str, List[str]],
|
53 |
+
folder: Optional[Union[str, Path]] = None,
|
54 |
+
check=True,
|
55 |
+
**kwargs,
|
56 |
+
) -> subprocess.CompletedProcess:
|
57 |
+
"""
|
58 |
+
Method to run subprocesses. Calling this will capture the `stderr` and `stdout`,
|
59 |
+
please call `subprocess.run` manually in case you would like for them not to
|
60 |
+
be captured.
|
61 |
+
|
62 |
+
Args:
|
63 |
+
command (`str` or `List[str]`):
|
64 |
+
The command to execute as a string or list of strings.
|
65 |
+
folder (`str`, *optional*):
|
66 |
+
The folder in which to run the command. Defaults to current working
|
67 |
+
directory (from `os.getcwd()`).
|
68 |
+
check (`bool`, *optional*, defaults to `True`):
|
69 |
+
Setting `check` to `True` will raise a `subprocess.CalledProcessError`
|
70 |
+
when the subprocess has a non-zero exit code.
|
71 |
+
kwargs (`Dict[str]`):
|
72 |
+
Keyword arguments to be passed to the `subprocess.run` underlying command.
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
`subprocess.CompletedProcess`: The completed process.
|
76 |
+
"""
|
77 |
+
if isinstance(command, str):
|
78 |
+
command = command.split()
|
79 |
+
|
80 |
+
if isinstance(folder, Path):
|
81 |
+
folder = str(folder)
|
82 |
+
|
83 |
+
return subprocess.run(
|
84 |
+
command,
|
85 |
+
stderr=subprocess.PIPE,
|
86 |
+
stdout=subprocess.PIPE,
|
87 |
+
check=check,
|
88 |
+
encoding="utf-8",
|
89 |
+
errors="replace", # if not utf-8, replace char by �
|
90 |
+
cwd=folder or os.getcwd(),
|
91 |
+
**kwargs,
|
92 |
+
)
|
93 |
+
|
94 |
+
|
95 |
+
@contextmanager
|
96 |
+
def run_interactive_subprocess(
|
97 |
+
command: Union[str, List[str]],
|
98 |
+
folder: Optional[Union[str, Path]] = None,
|
99 |
+
**kwargs,
|
100 |
+
) -> Generator[Tuple[IO[str], IO[str]], None, None]:
|
101 |
+
"""Run a subprocess in an interactive mode in a context manager.
|
102 |
+
|
103 |
+
Args:
|
104 |
+
command (`str` or `List[str]`):
|
105 |
+
The command to execute as a string or list of strings.
|
106 |
+
folder (`str`, *optional*):
|
107 |
+
The folder in which to run the command. Defaults to current working
|
108 |
+
directory (from `os.getcwd()`).
|
109 |
+
kwargs (`Dict[str]`):
|
110 |
+
Keyword arguments to be passed to the `subprocess.run` underlying command.
|
111 |
+
|
112 |
+
Returns:
|
113 |
+
`Tuple[IO[str], IO[str]]`: A tuple with `stdin` and `stdout` to interact
|
114 |
+
with the process (input and output are utf-8 encoded).
|
115 |
+
|
116 |
+
Example:
|
117 |
+
```python
|
118 |
+
with _interactive_subprocess("git credential-store get") as (stdin, stdout):
|
119 |
+
# Write to stdin
|
120 |
+
stdin.write("url=hf.co\nusername=obama\n".encode("utf-8"))
|
121 |
+
stdin.flush()
|
122 |
+
|
123 |
+
# Read from stdout
|
124 |
+
output = stdout.read().decode("utf-8")
|
125 |
+
```
|
126 |
+
"""
|
127 |
+
if isinstance(command, str):
|
128 |
+
command = command.split()
|
129 |
+
|
130 |
+
with subprocess.Popen(
|
131 |
+
command,
|
132 |
+
stdin=subprocess.PIPE,
|
133 |
+
stdout=subprocess.PIPE,
|
134 |
+
stderr=subprocess.STDOUT,
|
135 |
+
encoding="utf-8",
|
136 |
+
errors="replace", # if not utf-8, replace char by �
|
137 |
+
cwd=folder or os.getcwd(),
|
138 |
+
**kwargs,
|
139 |
+
) as process:
|
140 |
+
assert process.stdin is not None, "subprocess is opened as subprocess.PIPE"
|
141 |
+
assert process.stdout is not None, "subprocess is opened as subprocess.PIPE"
|
142 |
+
yield process.stdin, process.stdout
|
lib/python3.11/site-packages/huggingface_hub/utils/_telemetry.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from queue import Queue
|
2 |
+
from threading import Lock, Thread
|
3 |
+
from typing import Dict, Optional, Union
|
4 |
+
from urllib.parse import quote
|
5 |
+
|
6 |
+
from .. import constants, logging
|
7 |
+
from . import build_hf_headers, get_session, hf_raise_for_status
|
8 |
+
|
9 |
+
|
10 |
+
logger = logging.get_logger(__name__)
|
11 |
+
|
12 |
+
# Telemetry is sent by a separate thread to avoid blocking the main thread.
|
13 |
+
# A daemon thread is started once and consume tasks from the _TELEMETRY_QUEUE.
|
14 |
+
# If the thread stops for some reason -shouldn't happen-, we restart a new one.
|
15 |
+
_TELEMETRY_THREAD: Optional[Thread] = None
|
16 |
+
_TELEMETRY_THREAD_LOCK = Lock() # Lock to avoid starting multiple threads in parallel
|
17 |
+
_TELEMETRY_QUEUE: Queue = Queue()
|
18 |
+
|
19 |
+
|
20 |
+
def send_telemetry(
|
21 |
+
topic: str,
|
22 |
+
*,
|
23 |
+
library_name: Optional[str] = None,
|
24 |
+
library_version: Optional[str] = None,
|
25 |
+
user_agent: Union[Dict, str, None] = None,
|
26 |
+
) -> None:
|
27 |
+
"""
|
28 |
+
Sends telemetry that helps tracking usage of different HF libraries.
|
29 |
+
|
30 |
+
This usage data helps us debug issues and prioritize new features. However, we understand that not everyone wants
|
31 |
+
to share additional information, and we respect your privacy. You can disable telemetry collection by setting the
|
32 |
+
`HF_HUB_DISABLE_TELEMETRY=1` as environment variable. Telemetry is also disabled in offline mode (i.e. when setting
|
33 |
+
`HF_HUB_OFFLINE=1`).
|
34 |
+
|
35 |
+
Telemetry collection is run in a separate thread to minimize impact for the user.
|
36 |
+
|
37 |
+
Args:
|
38 |
+
topic (`str`):
|
39 |
+
Name of the topic that is monitored. The topic is directly used to build the URL. If you want to monitor
|
40 |
+
subtopics, just use "/" separation. Examples: "gradio", "transformers/examples",...
|
41 |
+
library_name (`str`, *optional*):
|
42 |
+
The name of the library that is making the HTTP request. Will be added to the user-agent header.
|
43 |
+
library_version (`str`, *optional*):
|
44 |
+
The version of the library that is making the HTTP request. Will be added to the user-agent header.
|
45 |
+
user_agent (`str`, `dict`, *optional*):
|
46 |
+
The user agent info in the form of a dictionary or a single string. It will be completed with information about the installed packages.
|
47 |
+
|
48 |
+
Example:
|
49 |
+
```py
|
50 |
+
>>> from huggingface_hub.utils import send_telemetry
|
51 |
+
|
52 |
+
# Send telemetry without library information
|
53 |
+
>>> send_telemetry("ping")
|
54 |
+
|
55 |
+
# Send telemetry to subtopic with library information
|
56 |
+
>>> send_telemetry("gradio/local_link", library_name="gradio", library_version="3.22.1")
|
57 |
+
|
58 |
+
# Send telemetry with additional data
|
59 |
+
>>> send_telemetry(
|
60 |
+
... topic="examples",
|
61 |
+
... library_name="transformers",
|
62 |
+
... library_version="4.26.0",
|
63 |
+
... user_agent={"pipeline": "text_classification", "framework": "flax"},
|
64 |
+
... )
|
65 |
+
```
|
66 |
+
"""
|
67 |
+
if constants.HF_HUB_OFFLINE or constants.HF_HUB_DISABLE_TELEMETRY:
|
68 |
+
return
|
69 |
+
|
70 |
+
_start_telemetry_thread() # starts thread only if doesn't exist yet
|
71 |
+
_TELEMETRY_QUEUE.put(
|
72 |
+
{"topic": topic, "library_name": library_name, "library_version": library_version, "user_agent": user_agent}
|
73 |
+
)
|
74 |
+
|
75 |
+
|
76 |
+
def _start_telemetry_thread():
|
77 |
+
"""Start a daemon thread to consume tasks from the telemetry queue.
|
78 |
+
|
79 |
+
If the thread is interrupted, start a new one.
|
80 |
+
"""
|
81 |
+
with _TELEMETRY_THREAD_LOCK: # avoid to start multiple threads if called concurrently
|
82 |
+
global _TELEMETRY_THREAD
|
83 |
+
if _TELEMETRY_THREAD is None or not _TELEMETRY_THREAD.is_alive():
|
84 |
+
_TELEMETRY_THREAD = Thread(target=_telemetry_worker, daemon=True)
|
85 |
+
_TELEMETRY_THREAD.start()
|
86 |
+
|
87 |
+
|
88 |
+
def _telemetry_worker():
|
89 |
+
"""Wait for a task and consume it."""
|
90 |
+
while True:
|
91 |
+
kwargs = _TELEMETRY_QUEUE.get()
|
92 |
+
_send_telemetry_in_thread(**kwargs)
|
93 |
+
_TELEMETRY_QUEUE.task_done()
|
94 |
+
|
95 |
+
|
96 |
+
def _send_telemetry_in_thread(
|
97 |
+
topic: str,
|
98 |
+
*,
|
99 |
+
library_name: Optional[str] = None,
|
100 |
+
library_version: Optional[str] = None,
|
101 |
+
user_agent: Union[Dict, str, None] = None,
|
102 |
+
) -> None:
|
103 |
+
"""Contains the actual data sending data to the Hub."""
|
104 |
+
path = "/".join(quote(part) for part in topic.split("/") if len(part) > 0)
|
105 |
+
try:
|
106 |
+
r = get_session().head(
|
107 |
+
f"{constants.ENDPOINT}/api/telemetry/{path}",
|
108 |
+
headers=build_hf_headers(
|
109 |
+
token=False, # no need to send a token for telemetry
|
110 |
+
library_name=library_name,
|
111 |
+
library_version=library_version,
|
112 |
+
user_agent=user_agent,
|
113 |
+
),
|
114 |
+
)
|
115 |
+
hf_raise_for_status(r)
|
116 |
+
except Exception as e:
|
117 |
+
# We don't want to error in case of connection errors of any kind.
|
118 |
+
logger.debug(f"Error while sending telemetry: {e}")
|
lib/python3.11/site-packages/huggingface_hub/utils/_token.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2023 The HuggingFace Team. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""Contains an helper to get the token from machine (env variable, secret or config file)."""
|
15 |
+
import os
|
16 |
+
import warnings
|
17 |
+
from pathlib import Path
|
18 |
+
from threading import Lock
|
19 |
+
from typing import Optional
|
20 |
+
|
21 |
+
from .. import constants
|
22 |
+
from ._runtime import is_google_colab
|
23 |
+
|
24 |
+
|
25 |
+
_IS_GOOGLE_COLAB_CHECKED = False
|
26 |
+
_GOOGLE_COLAB_SECRET_LOCK = Lock()
|
27 |
+
_GOOGLE_COLAB_SECRET: Optional[str] = None
|
28 |
+
|
29 |
+
|
30 |
+
def get_token() -> Optional[str]:
|
31 |
+
"""
|
32 |
+
Get token if user is logged in.
|
33 |
+
|
34 |
+
Note: in most cases, you should use [`huggingface_hub.utils.build_hf_headers`] instead. This method is only useful
|
35 |
+
if you want to retrieve the token for other purposes than sending an HTTP request.
|
36 |
+
|
37 |
+
Token is retrieved in priority from the `HF_TOKEN` environment variable. Otherwise, we read the token file located
|
38 |
+
in the Hugging Face home folder. Returns None if user is not logged in. To log in, use [`login`] or
|
39 |
+
`huggingface-cli login`.
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
`str` or `None`: The token, `None` if it doesn't exist.
|
43 |
+
"""
|
44 |
+
return _get_token_from_google_colab() or _get_token_from_environment() or _get_token_from_file()
|
45 |
+
|
46 |
+
|
47 |
+
def _get_token_from_google_colab() -> Optional[str]:
|
48 |
+
"""Get token from Google Colab secrets vault using `google.colab.userdata.get(...)`.
|
49 |
+
|
50 |
+
Token is read from the vault only once per session and then stored in a global variable to avoid re-requesting
|
51 |
+
access to the vault.
|
52 |
+
"""
|
53 |
+
if not is_google_colab():
|
54 |
+
return None
|
55 |
+
|
56 |
+
# `google.colab.userdata` is not thread-safe
|
57 |
+
# This can lead to a deadlock if multiple threads try to access it at the same time
|
58 |
+
# (typically when using `snapshot_download`)
|
59 |
+
# => use a lock
|
60 |
+
# See https://github.com/huggingface/huggingface_hub/issues/1952 for more details.
|
61 |
+
with _GOOGLE_COLAB_SECRET_LOCK:
|
62 |
+
global _GOOGLE_COLAB_SECRET
|
63 |
+
global _IS_GOOGLE_COLAB_CHECKED
|
64 |
+
|
65 |
+
if _IS_GOOGLE_COLAB_CHECKED: # request access only once
|
66 |
+
return _GOOGLE_COLAB_SECRET
|
67 |
+
|
68 |
+
try:
|
69 |
+
from google.colab import userdata
|
70 |
+
from google.colab.errors import Error as ColabError
|
71 |
+
except ImportError:
|
72 |
+
return None
|
73 |
+
|
74 |
+
try:
|
75 |
+
token = userdata.get("HF_TOKEN")
|
76 |
+
_GOOGLE_COLAB_SECRET = _clean_token(token)
|
77 |
+
except userdata.NotebookAccessError:
|
78 |
+
# Means the user has a secret call `HF_TOKEN` and got a popup "please grand access to HF_TOKEN" and refused it
|
79 |
+
# => warn user but ignore error => do not re-request access to user
|
80 |
+
warnings.warn(
|
81 |
+
"\nAccess to the secret `HF_TOKEN` has not been granted on this notebook."
|
82 |
+
"\nYou will not be requested again."
|
83 |
+
"\nPlease restart the session if you want to be prompted again."
|
84 |
+
)
|
85 |
+
_GOOGLE_COLAB_SECRET = None
|
86 |
+
except userdata.SecretNotFoundError:
|
87 |
+
# Means the user did not define a `HF_TOKEN` secret => warn
|
88 |
+
warnings.warn(
|
89 |
+
"\nThe secret `HF_TOKEN` does not exist in your Colab secrets."
|
90 |
+
"\nTo authenticate with the Hugging Face Hub, create a token in your settings tab "
|
91 |
+
"(https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session."
|
92 |
+
"\nYou will be able to reuse this secret in all of your notebooks."
|
93 |
+
"\nPlease note that authentication is recommended but still optional to access public models or datasets."
|
94 |
+
)
|
95 |
+
_GOOGLE_COLAB_SECRET = None
|
96 |
+
except ColabError as e:
|
97 |
+
# Something happen but we don't know what => recommend to open a GitHub issue
|
98 |
+
warnings.warn(
|
99 |
+
f"\nError while fetching `HF_TOKEN` secret value from your vault: '{str(e)}'."
|
100 |
+
"\nYou are not authenticated with the Hugging Face Hub in this notebook."
|
101 |
+
"\nIf the error persists, please let us know by opening an issue on GitHub "
|
102 |
+
"(https://github.com/huggingface/huggingface_hub/issues/new)."
|
103 |
+
)
|
104 |
+
_GOOGLE_COLAB_SECRET = None
|
105 |
+
|
106 |
+
_IS_GOOGLE_COLAB_CHECKED = True
|
107 |
+
return _GOOGLE_COLAB_SECRET
|
108 |
+
|
109 |
+
|
110 |
+
def _get_token_from_environment() -> Optional[str]:
|
111 |
+
# `HF_TOKEN` has priority (keep `HUGGING_FACE_HUB_TOKEN` for backward compatibility)
|
112 |
+
return _clean_token(os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN"))
|
113 |
+
|
114 |
+
|
115 |
+
def _get_token_from_file() -> Optional[str]:
|
116 |
+
try:
|
117 |
+
return _clean_token(Path(constants.HF_TOKEN_PATH).read_text())
|
118 |
+
except FileNotFoundError:
|
119 |
+
return None
|
120 |
+
|
121 |
+
|
122 |
+
def _clean_token(token: Optional[str]) -> Optional[str]:
|
123 |
+
"""Clean token by removing trailing and leading spaces and newlines.
|
124 |
+
|
125 |
+
If token is an empty string, return None.
|
126 |
+
"""
|
127 |
+
if token is None:
|
128 |
+
return None
|
129 |
+
return token.replace("\r", "").replace("\n", "").strip() or None
|
lib/python3.11/site-packages/huggingface_hub/utils/_typing.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Handle typing imports based on system compatibility."""
|
16 |
+
from typing import Callable, Literal, TypeVar
|
17 |
+
|
18 |
+
|
19 |
+
HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
|
20 |
+
|
21 |
+
# type hint meaning "function signature not changed by decorator"
|
22 |
+
CallableT = TypeVar("CallableT", bound=Callable)
|