Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +3 -0
- added_tokens.json +26 -0
- config.json +29 -0
- generation_config.json +12 -0
- meow/.gitignore +2 -0
- meow/bin/Activate.ps1 +248 -0
- meow/bin/activate +75 -0
- meow/bin/activate.csh +27 -0
- meow/bin/activate.fish +69 -0
- meow/bin/huggingface-cli +8 -0
- meow/bin/normalizer +8 -0
- meow/bin/pip +8 -0
- meow/bin/pip3 +8 -0
- meow/bin/pip3.13 +8 -0
- meow/bin/python +0 -0
- meow/bin/python3 +0 -0
- meow/bin/python3.13 +0 -0
- meow/bin/tqdm +8 -0
- meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/INSTALLER +1 -0
- meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/METADATA +59 -0
- meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/RECORD +24 -0
- meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/WHEEL +4 -0
- meow/lib/python3.13/site-packages/huggingface_hub/_commit_api.py +729 -0
- meow/lib/python3.13/site-packages/huggingface_hub/_space_api.py +160 -0
- meow/lib/python3.13/site-packages/huggingface_hub/_tensorboard_logger.py +194 -0
- meow/lib/python3.13/site-packages/huggingface_hub/_webhooks_payload.py +137 -0
- meow/lib/python3.13/site-packages/huggingface_hub/_webhooks_server.py +386 -0
- meow/lib/python3.13/site-packages/huggingface_hub/constants.py +225 -0
- meow/lib/python3.13/site-packages/huggingface_hub/fastai_utils.py +425 -0
- meow/lib/python3.13/site-packages/huggingface_hub/hf_file_system.py +1140 -0
- meow/lib/python3.13/site-packages/huggingface_hub/hub_mixin.py +833 -0
- meow/lib/python3.13/site-packages/huggingface_hub/keras_mixin.py +500 -0
- meow/lib/python3.13/site-packages/huggingface_hub/repocard.py +830 -0
- meow/lib/python3.13/site-packages/huggingface_hub/repocard_data.py +749 -0
- meow/lib/python3.13/site-packages/huggingface_hub/repository.py +1477 -0
- meow/lib/python3.13/site-packages/idna-3.10.dist-info/INSTALLER +1 -0
- meow/lib/python3.13/site-packages/idna-3.10.dist-info/LICENSE.md +31 -0
- meow/lib/python3.13/site-packages/idna-3.10.dist-info/METADATA +250 -0
- meow/lib/python3.13/site-packages/idna-3.10.dist-info/RECORD +22 -0
- meow/lib/python3.13/site-packages/idna-3.10.dist-info/WHEEL +4 -0
- meow/lib/python3.13/site-packages/packaging/__init__.py +15 -0
- meow/lib/python3.13/site-packages/packaging/_elffile.py +110 -0
- meow/lib/python3.13/site-packages/packaging/_manylinux.py +263 -0
- meow/lib/python3.13/site-packages/packaging/_musllinux.py +85 -0
- meow/lib/python3.13/site-packages/packaging/_parser.py +354 -0
- meow/lib/python3.13/site-packages/packaging/_structures.py +61 -0
- meow/lib/python3.13/site-packages/packaging/_tokenizer.py +194 -0
- meow/lib/python3.13/site-packages/packaging/markers.py +331 -0
- meow/lib/python3.13/site-packages/packaging/metadata.py +863 -0
- meow/lib/python3.13/site-packages/packaging/py.typed +0 -0
README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: Qwen/Qwen2.5-72B-Instruct
|
3 |
+
---
|
added_tokens.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"</tool_call>": 151658,
|
3 |
+
"<bot_end>": 151666,
|
4 |
+
"<human_end>": 151665,
|
5 |
+
"<tool_call>": 151657,
|
6 |
+
"<|box_end|>": 151649,
|
7 |
+
"<|box_start|>": 151648,
|
8 |
+
"<|endoftext|>": 151643,
|
9 |
+
"<|file_sep|>": 151664,
|
10 |
+
"<|fim_middle|>": 151660,
|
11 |
+
"<|fim_pad|>": 151662,
|
12 |
+
"<|fim_prefix|>": 151659,
|
13 |
+
"<|fim_suffix|>": 151661,
|
14 |
+
"<|im_end|>": 151645,
|
15 |
+
"<|im_start|>": 151644,
|
16 |
+
"<|image_pad|>": 151655,
|
17 |
+
"<|object_ref_end|>": 151647,
|
18 |
+
"<|object_ref_start|>": 151646,
|
19 |
+
"<|quad_end|>": 151651,
|
20 |
+
"<|quad_start|>": 151650,
|
21 |
+
"<|repo_name|>": 151663,
|
22 |
+
"<|video_pad|>": 151656,
|
23 |
+
"<|vision_end|>": 151653,
|
24 |
+
"<|vision_pad|>": 151654,
|
25 |
+
"<|vision_start|>": 151652
|
26 |
+
}
|
config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nisten/experiment-77b",
|
3 |
+
"architectures": [
|
4 |
+
"Qwen2ForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151643,
|
8 |
+
"eos_token_id": 151645,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 8192,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 29568,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"max_window_layers": 70,
|
15 |
+
"model_type": "qwen2",
|
16 |
+
"num_attention_heads": 64,
|
17 |
+
"num_hidden_layers": 86,
|
18 |
+
"num_key_value_heads": 8,
|
19 |
+
"rms_norm_eps": 1e-06,
|
20 |
+
"rope_scaling": null,
|
21 |
+
"rope_theta": 1000000.0,
|
22 |
+
"sliding_window": null,
|
23 |
+
"tie_word_embeddings": false,
|
24 |
+
"torch_dtype": "bfloat16",
|
25 |
+
"transformers_version": "4.47.1",
|
26 |
+
"use_cache": true,
|
27 |
+
"use_sliding_window": false,
|
28 |
+
"vocab_size": 151672
|
29 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 151643,
|
3 |
+
"do_sample": true,
|
4 |
+
"eos_token_id": [
|
5 |
+
151645,
|
6 |
+
151643
|
7 |
+
],
|
8 |
+
"pad_token_id": 151643,
|
9 |
+
"temperature": 0.7,
|
10 |
+
"top_p": 1.0,
|
11 |
+
"transformers_version": "4.46.2"
|
12 |
+
}
|
meow/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# Created by venv; see https://docs.python.org/3/library/venv.html
|
2 |
+
*
|
meow/bin/Activate.ps1
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<#
|
2 |
+
.Synopsis
|
3 |
+
Activate a Python virtual environment for the current PowerShell session.
|
4 |
+
|
5 |
+
.Description
|
6 |
+
Pushes the python executable for a virtual environment to the front of the
|
7 |
+
$Env:PATH environment variable and sets the prompt to signify that you are
|
8 |
+
in a Python virtual environment. Makes use of the command line switches as
|
9 |
+
well as the `pyvenv.cfg` file values present in the virtual environment.
|
10 |
+
|
11 |
+
.Parameter VenvDir
|
12 |
+
Path to the directory that contains the virtual environment to activate. The
|
13 |
+
default value for this is the parent of the directory that the Activate.ps1
|
14 |
+
script is located within.
|
15 |
+
|
16 |
+
.Parameter Prompt
|
17 |
+
The prompt prefix to display when this virtual environment is activated. By
|
18 |
+
default, this prompt is the name of the virtual environment folder (VenvDir)
|
19 |
+
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
20 |
+
|
21 |
+
.Example
|
22 |
+
Activate.ps1
|
23 |
+
Activates the Python virtual environment that contains the Activate.ps1 script.
|
24 |
+
|
25 |
+
.Example
|
26 |
+
Activate.ps1 -Verbose
|
27 |
+
Activates the Python virtual environment that contains the Activate.ps1 script,
|
28 |
+
and shows extra information about the activation as it executes.
|
29 |
+
|
30 |
+
.Example
|
31 |
+
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
32 |
+
Activates the Python virtual environment located in the specified location.
|
33 |
+
|
34 |
+
.Example
|
35 |
+
Activate.ps1 -Prompt "MyPython"
|
36 |
+
Activates the Python virtual environment that contains the Activate.ps1 script,
|
37 |
+
and prefixes the current prompt with the specified string (surrounded in
|
38 |
+
parentheses) while the virtual environment is active.
|
39 |
+
|
40 |
+
.Notes
|
41 |
+
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
42 |
+
execution policy for the user. You can do this by issuing the following PowerShell
|
43 |
+
command:
|
44 |
+
|
45 |
+
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
46 |
+
|
47 |
+
For more information on Execution Policies:
|
48 |
+
https://go.microsoft.com/fwlink/?LinkID=135170
|
49 |
+
|
50 |
+
#>
|
51 |
+
Param(
|
52 |
+
[Parameter(Mandatory = $false)]
|
53 |
+
[String]
|
54 |
+
$VenvDir,
|
55 |
+
[Parameter(Mandatory = $false)]
|
56 |
+
[String]
|
57 |
+
$Prompt
|
58 |
+
)
|
59 |
+
|
60 |
+
<# Function declarations --------------------------------------------------- #>
|
61 |
+
|
62 |
+
<#
|
63 |
+
.Synopsis
|
64 |
+
Remove all shell session elements added by the Activate script, including the
|
65 |
+
addition of the virtual environment's Python executable from the beginning of
|
66 |
+
the PATH variable.
|
67 |
+
|
68 |
+
.Parameter NonDestructive
|
69 |
+
If present, do not remove this function from the global namespace for the
|
70 |
+
session.
|
71 |
+
|
72 |
+
#>
|
73 |
+
function global:deactivate ([switch]$NonDestructive) {
|
74 |
+
# Revert to original values
|
75 |
+
|
76 |
+
# The prior prompt:
|
77 |
+
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
78 |
+
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
79 |
+
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
80 |
+
}
|
81 |
+
|
82 |
+
# The prior PYTHONHOME:
|
83 |
+
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
84 |
+
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
85 |
+
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
86 |
+
}
|
87 |
+
|
88 |
+
# The prior PATH:
|
89 |
+
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
90 |
+
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
91 |
+
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
92 |
+
}
|
93 |
+
|
94 |
+
# Just remove the VIRTUAL_ENV altogether:
|
95 |
+
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
96 |
+
Remove-Item -Path env:VIRTUAL_ENV
|
97 |
+
}
|
98 |
+
|
99 |
+
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
100 |
+
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
101 |
+
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
102 |
+
}
|
103 |
+
|
104 |
+
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
105 |
+
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
106 |
+
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
107 |
+
}
|
108 |
+
|
109 |
+
# Leave deactivate function in the global namespace if requested:
|
110 |
+
if (-not $NonDestructive) {
|
111 |
+
Remove-Item -Path function:deactivate
|
112 |
+
}
|
113 |
+
}
|
114 |
+
|
115 |
+
<#
|
116 |
+
.Description
|
117 |
+
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
118 |
+
given folder, and returns them in a map.
|
119 |
+
|
120 |
+
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
121 |
+
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
122 |
+
then it is considered a `key = value` line. The left hand string is the key,
|
123 |
+
the right hand is the value.
|
124 |
+
|
125 |
+
If the value starts with a `'` or a `"` then the first and last character is
|
126 |
+
stripped from the value before being captured.
|
127 |
+
|
128 |
+
.Parameter ConfigDir
|
129 |
+
Path to the directory that contains the `pyvenv.cfg` file.
|
130 |
+
#>
|
131 |
+
function Get-PyVenvConfig(
|
132 |
+
[String]
|
133 |
+
$ConfigDir
|
134 |
+
) {
|
135 |
+
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
136 |
+
|
137 |
+
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
138 |
+
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
139 |
+
|
140 |
+
# An empty map will be returned if no config file is found.
|
141 |
+
$pyvenvConfig = @{ }
|
142 |
+
|
143 |
+
if ($pyvenvConfigPath) {
|
144 |
+
|
145 |
+
Write-Verbose "File exists, parse `key = value` lines"
|
146 |
+
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
147 |
+
|
148 |
+
$pyvenvConfigContent | ForEach-Object {
|
149 |
+
$keyval = $PSItem -split "\s*=\s*", 2
|
150 |
+
if ($keyval[0] -and $keyval[1]) {
|
151 |
+
$val = $keyval[1]
|
152 |
+
|
153 |
+
# Remove extraneous quotations around a string value.
|
154 |
+
if ("'""".Contains($val.Substring(0, 1))) {
|
155 |
+
$val = $val.Substring(1, $val.Length - 2)
|
156 |
+
}
|
157 |
+
|
158 |
+
$pyvenvConfig[$keyval[0]] = $val
|
159 |
+
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
160 |
+
}
|
161 |
+
}
|
162 |
+
}
|
163 |
+
return $pyvenvConfig
|
164 |
+
}
|
165 |
+
|
166 |
+
|
167 |
+
<# Begin Activate script --------------------------------------------------- #>
|
168 |
+
|
169 |
+
# Determine the containing directory of this script
|
170 |
+
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
171 |
+
$VenvExecDir = Get-Item -Path $VenvExecPath
|
172 |
+
|
173 |
+
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
174 |
+
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
175 |
+
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
176 |
+
|
177 |
+
# Set values required in priority: CmdLine, ConfigFile, Default
|
178 |
+
# First, get the location of the virtual environment, it might not be
|
179 |
+
# VenvExecDir if specified on the command line.
|
180 |
+
if ($VenvDir) {
|
181 |
+
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
182 |
+
}
|
183 |
+
else {
|
184 |
+
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
185 |
+
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
186 |
+
Write-Verbose "VenvDir=$VenvDir"
|
187 |
+
}
|
188 |
+
|
189 |
+
# Next, read the `pyvenv.cfg` file to determine any required value such
|
190 |
+
# as `prompt`.
|
191 |
+
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
192 |
+
|
193 |
+
# Next, set the prompt from the command line, or the config file, or
|
194 |
+
# just use the name of the virtual environment folder.
|
195 |
+
if ($Prompt) {
|
196 |
+
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
197 |
+
}
|
198 |
+
else {
|
199 |
+
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
200 |
+
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
201 |
+
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
202 |
+
$Prompt = $pyvenvCfg['prompt'];
|
203 |
+
}
|
204 |
+
else {
|
205 |
+
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
|
206 |
+
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
207 |
+
$Prompt = Split-Path -Path $venvDir -Leaf
|
208 |
+
}
|
209 |
+
}
|
210 |
+
|
211 |
+
Write-Verbose "Prompt = '$Prompt'"
|
212 |
+
Write-Verbose "VenvDir='$VenvDir'"
|
213 |
+
|
214 |
+
# Deactivate any currently active virtual environment, but leave the
|
215 |
+
# deactivate function in place.
|
216 |
+
deactivate -nondestructive
|
217 |
+
|
218 |
+
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
219 |
+
# that there is an activated venv.
|
220 |
+
$env:VIRTUAL_ENV = $VenvDir
|
221 |
+
|
222 |
+
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
223 |
+
|
224 |
+
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
225 |
+
|
226 |
+
Write-Verbose "Setting prompt to '$Prompt'"
|
227 |
+
|
228 |
+
# Set the prompt to include the env name
|
229 |
+
# Make sure _OLD_VIRTUAL_PROMPT is global
|
230 |
+
function global:_OLD_VIRTUAL_PROMPT { "" }
|
231 |
+
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
232 |
+
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
233 |
+
|
234 |
+
function global:prompt {
|
235 |
+
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
236 |
+
_OLD_VIRTUAL_PROMPT
|
237 |
+
}
|
238 |
+
}
|
239 |
+
|
240 |
+
# Clear PYTHONHOME
|
241 |
+
if (Test-Path -Path Env:PYTHONHOME) {
|
242 |
+
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
243 |
+
Remove-Item -Path Env:PYTHONHOME
|
244 |
+
}
|
245 |
+
|
246 |
+
# Add the venv to the PATH
|
247 |
+
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
248 |
+
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
meow/bin/activate
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file must be used with "source bin/activate" *from bash*
|
2 |
+
# You cannot run it directly
|
3 |
+
|
4 |
+
deactivate () {
|
5 |
+
# reset old environment variables
|
6 |
+
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
7 |
+
PATH="${_OLD_VIRTUAL_PATH:-}"
|
8 |
+
export PATH
|
9 |
+
unset _OLD_VIRTUAL_PATH
|
10 |
+
fi
|
11 |
+
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
12 |
+
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
13 |
+
export PYTHONHOME
|
14 |
+
unset _OLD_VIRTUAL_PYTHONHOME
|
15 |
+
fi
|
16 |
+
|
17 |
+
# Call hash to forget past commands. Without forgetting
|
18 |
+
# past commands the $PATH changes we made may not be respected
|
19 |
+
hash -r 2> /dev/null
|
20 |
+
|
21 |
+
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
22 |
+
PS1="${_OLD_VIRTUAL_PS1:-}"
|
23 |
+
export PS1
|
24 |
+
unset _OLD_VIRTUAL_PS1
|
25 |
+
fi
|
26 |
+
|
27 |
+
unset VIRTUAL_ENV
|
28 |
+
unset VIRTUAL_ENV_PROMPT
|
29 |
+
if [ ! "${1:-}" = "nondestructive" ] ; then
|
30 |
+
# Self destruct!
|
31 |
+
unset -f deactivate
|
32 |
+
fi
|
33 |
+
}
|
34 |
+
|
35 |
+
# unset irrelevant variables
|
36 |
+
deactivate nondestructive
|
37 |
+
|
38 |
+
# on Windows, a path can contain colons and backslashes and has to be converted:
|
39 |
+
case "$(uname)" in
|
40 |
+
CYGWIN*|MSYS*)
|
41 |
+
# transform D:\path\to\venv to /d/path/to/venv on MSYS
|
42 |
+
# and to /cygdrive/d/path/to/venv on Cygwin
|
43 |
+
VIRTUAL_ENV=$(cygpath "/Users/n/mergekit/merged/meow")
|
44 |
+
export VIRTUAL_ENV
|
45 |
+
;;
|
46 |
+
*)
|
47 |
+
# use the path as-is
|
48 |
+
export VIRTUAL_ENV="/Users/n/mergekit/merged/meow"
|
49 |
+
;;
|
50 |
+
esac
|
51 |
+
|
52 |
+
_OLD_VIRTUAL_PATH="$PATH"
|
53 |
+
PATH="$VIRTUAL_ENV/bin:$PATH"
|
54 |
+
export PATH
|
55 |
+
|
56 |
+
VIRTUAL_ENV_PROMPT="meow"
|
57 |
+
export VIRTUAL_ENV_PROMPT
|
58 |
+
|
59 |
+
# unset PYTHONHOME if set
|
60 |
+
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
61 |
+
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
62 |
+
if [ -n "${PYTHONHOME:-}" ] ; then
|
63 |
+
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
64 |
+
unset PYTHONHOME
|
65 |
+
fi
|
66 |
+
|
67 |
+
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
68 |
+
_OLD_VIRTUAL_PS1="${PS1:-}"
|
69 |
+
PS1="(meow) ${PS1:-}"
|
70 |
+
export PS1
|
71 |
+
fi
|
72 |
+
|
73 |
+
# Call hash to forget past commands. Without forgetting
|
74 |
+
# past commands the $PATH changes we made may not be respected
|
75 |
+
hash -r 2> /dev/null
|
meow/bin/activate.csh
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file must be used with "source bin/activate.csh" *from csh*.
|
2 |
+
# You cannot run it directly.
|
3 |
+
|
4 |
+
# Created by Davide Di Blasi <[email protected]>.
|
5 |
+
# Ported to Python 3.3 venv by Andrew Svetlov <[email protected]>
|
6 |
+
|
7 |
+
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
|
8 |
+
|
9 |
+
# Unset irrelevant variables.
|
10 |
+
deactivate nondestructive
|
11 |
+
|
12 |
+
setenv VIRTUAL_ENV "/Users/n/mergekit/merged/meow"
|
13 |
+
|
14 |
+
set _OLD_VIRTUAL_PATH="$PATH"
|
15 |
+
setenv PATH "$VIRTUAL_ENV/bin:$PATH"
|
16 |
+
setenv VIRTUAL_ENV_PROMPT "meow"
|
17 |
+
|
18 |
+
|
19 |
+
set _OLD_VIRTUAL_PROMPT="$prompt"
|
20 |
+
|
21 |
+
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
|
22 |
+
set prompt = "(meow) $prompt"
|
23 |
+
endif
|
24 |
+
|
25 |
+
alias pydoc python -m pydoc
|
26 |
+
|
27 |
+
rehash
|
meow/bin/activate.fish
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
|
2 |
+
# (https://fishshell.com/). You cannot run it directly.
|
3 |
+
|
4 |
+
function deactivate -d "Exit virtual environment and return to normal shell environment"
|
5 |
+
# reset old environment variables
|
6 |
+
if test -n "$_OLD_VIRTUAL_PATH"
|
7 |
+
set -gx PATH $_OLD_VIRTUAL_PATH
|
8 |
+
set -e _OLD_VIRTUAL_PATH
|
9 |
+
end
|
10 |
+
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
|
11 |
+
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
|
12 |
+
set -e _OLD_VIRTUAL_PYTHONHOME
|
13 |
+
end
|
14 |
+
|
15 |
+
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
|
16 |
+
set -e _OLD_FISH_PROMPT_OVERRIDE
|
17 |
+
# prevents error when using nested fish instances (Issue #93858)
|
18 |
+
if functions -q _old_fish_prompt
|
19 |
+
functions -e fish_prompt
|
20 |
+
functions -c _old_fish_prompt fish_prompt
|
21 |
+
functions -e _old_fish_prompt
|
22 |
+
end
|
23 |
+
end
|
24 |
+
|
25 |
+
set -e VIRTUAL_ENV
|
26 |
+
set -e VIRTUAL_ENV_PROMPT
|
27 |
+
if test "$argv[1]" != "nondestructive"
|
28 |
+
# Self-destruct!
|
29 |
+
functions -e deactivate
|
30 |
+
end
|
31 |
+
end
|
32 |
+
|
33 |
+
# Unset irrelevant variables.
|
34 |
+
deactivate nondestructive
|
35 |
+
|
36 |
+
set -gx VIRTUAL_ENV "/Users/n/mergekit/merged/meow"
|
37 |
+
|
38 |
+
set -gx _OLD_VIRTUAL_PATH $PATH
|
39 |
+
set -gx PATH "$VIRTUAL_ENV/bin" $PATH
|
40 |
+
set -gx VIRTUAL_ENV_PROMPT "meow"
|
41 |
+
|
42 |
+
# Unset PYTHONHOME if set.
|
43 |
+
if set -q PYTHONHOME
|
44 |
+
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
|
45 |
+
set -e PYTHONHOME
|
46 |
+
end
|
47 |
+
|
48 |
+
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
|
49 |
+
# fish uses a function instead of an env var to generate the prompt.
|
50 |
+
|
51 |
+
# Save the current fish_prompt function as the function _old_fish_prompt.
|
52 |
+
functions -c fish_prompt _old_fish_prompt
|
53 |
+
|
54 |
+
# With the original prompt function renamed, we can override with our own.
|
55 |
+
function fish_prompt
|
56 |
+
# Save the return status of the last command.
|
57 |
+
set -l old_status $status
|
58 |
+
|
59 |
+
# Output the venv prompt; color taken from the blue of the Python logo.
|
60 |
+
printf "%s(%s)%s " (set_color 4B8BBE) "meow" (set_color normal)
|
61 |
+
|
62 |
+
# Restore the return status of the previous command.
|
63 |
+
echo "exit $old_status" | .
|
64 |
+
# Output the original/"old" prompt.
|
65 |
+
_old_fish_prompt
|
66 |
+
end
|
67 |
+
|
68 |
+
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
|
69 |
+
end
|
meow/bin/huggingface-cli
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/n/mergekit/merged/meow/bin/python3.13
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from huggingface_hub.commands.huggingface_cli import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
meow/bin/normalizer
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/n/mergekit/merged/meow/bin/python3.13
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from charset_normalizer import cli
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(cli.cli_detect())
|
meow/bin/pip
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/n/mergekit/merged/meow/bin/python3.13
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from pip._internal.cli.main import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
meow/bin/pip3
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/n/mergekit/merged/meow/bin/python3.13
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from pip._internal.cli.main import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
meow/bin/pip3.13
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/n/mergekit/merged/meow/bin/python3.13
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from pip._internal.cli.main import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
meow/bin/python
ADDED
Binary file (52.6 kB). View file
|
|
meow/bin/python3
ADDED
Binary file (52.6 kB). View file
|
|
meow/bin/python3.13
ADDED
Binary file (52.6 kB). View file
|
|
meow/bin/tqdm
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/n/mergekit/merged/meow/bin/python3.13
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from tqdm.cli import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/INSTALLER
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
pip
|
meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/METADATA
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.3
|
2 |
+
Name: filelock
|
3 |
+
Version: 3.16.1
|
4 |
+
Summary: A platform independent file lock.
|
5 |
+
Project-URL: Documentation, https://py-filelock.readthedocs.io
|
6 |
+
Project-URL: Homepage, https://github.com/tox-dev/py-filelock
|
7 |
+
Project-URL: Source, https://github.com/tox-dev/py-filelock
|
8 |
+
Project-URL: Tracker, https://github.com/tox-dev/py-filelock/issues
|
9 |
+
Maintainer-email: Bernát Gábor <[email protected]>
|
10 |
+
License-Expression: Unlicense
|
11 |
+
License-File: LICENSE
|
12 |
+
Keywords: application,cache,directory,log,user
|
13 |
+
Classifier: Development Status :: 5 - Production/Stable
|
14 |
+
Classifier: Intended Audience :: Developers
|
15 |
+
Classifier: License :: OSI Approved :: The Unlicense (Unlicense)
|
16 |
+
Classifier: Operating System :: OS Independent
|
17 |
+
Classifier: Programming Language :: Python
|
18 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
19 |
+
Classifier: Programming Language :: Python :: 3.8
|
20 |
+
Classifier: Programming Language :: Python :: 3.9
|
21 |
+
Classifier: Programming Language :: Python :: 3.10
|
22 |
+
Classifier: Programming Language :: Python :: 3.11
|
23 |
+
Classifier: Programming Language :: Python :: 3.12
|
24 |
+
Classifier: Programming Language :: Python :: 3.13
|
25 |
+
Classifier: Topic :: Internet
|
26 |
+
Classifier: Topic :: Software Development :: Libraries
|
27 |
+
Classifier: Topic :: System
|
28 |
+
Requires-Python: >=3.8
|
29 |
+
Provides-Extra: docs
|
30 |
+
Requires-Dist: furo>=2024.8.6; extra == 'docs'
|
31 |
+
Requires-Dist: sphinx-autodoc-typehints>=2.4.1; extra == 'docs'
|
32 |
+
Requires-Dist: sphinx>=8.0.2; extra == 'docs'
|
33 |
+
Provides-Extra: testing
|
34 |
+
Requires-Dist: covdefaults>=2.3; extra == 'testing'
|
35 |
+
Requires-Dist: coverage>=7.6.1; extra == 'testing'
|
36 |
+
Requires-Dist: diff-cover>=9.2; extra == 'testing'
|
37 |
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'testing'
|
38 |
+
Requires-Dist: pytest-cov>=5; extra == 'testing'
|
39 |
+
Requires-Dist: pytest-mock>=3.14; extra == 'testing'
|
40 |
+
Requires-Dist: pytest-timeout>=2.3.1; extra == 'testing'
|
41 |
+
Requires-Dist: pytest>=8.3.3; extra == 'testing'
|
42 |
+
Requires-Dist: virtualenv>=20.26.4; extra == 'testing'
|
43 |
+
Provides-Extra: typing
|
44 |
+
Requires-Dist: typing-extensions>=4.12.2; (python_version < '3.11') and extra == 'typing'
|
45 |
+
Description-Content-Type: text/markdown
|
46 |
+
|
47 |
+
# filelock
|
48 |
+
|
49 |
+
[![PyPI](https://img.shields.io/pypi/v/filelock)](https://pypi.org/project/filelock/)
|
50 |
+
[![Supported Python
|
51 |
+
versions](https://img.shields.io/pypi/pyversions/filelock.svg)](https://pypi.org/project/filelock/)
|
52 |
+
[![Documentation
|
53 |
+
status](https://readthedocs.org/projects/py-filelock/badge/?version=latest)](https://py-filelock.readthedocs.io/en/latest/?badge=latest)
|
54 |
+
[![Code style:
|
55 |
+
black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
|
56 |
+
[![Downloads](https://static.pepy.tech/badge/filelock/month)](https://pepy.tech/project/filelock)
|
57 |
+
[![check](https://github.com/tox-dev/py-filelock/actions/workflows/check.yml/badge.svg)](https://github.com/tox-dev/py-filelock/actions/workflows/check.yml)
|
58 |
+
|
59 |
+
For more information checkout the [official documentation](https://py-filelock.readthedocs.io/en/latest/index.html).
|
meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/RECORD
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
filelock-3.16.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
2 |
+
filelock-3.16.1.dist-info/METADATA,sha256=LXL5-XQe_eTKkdNs76A6jSicQ1DBSTXqkDcjsprWvIM,2944
|
3 |
+
filelock-3.16.1.dist-info/RECORD,,
|
4 |
+
filelock-3.16.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
5 |
+
filelock-3.16.1.dist-info/licenses/LICENSE,sha256=iNm062BXnBkew5HKBMFhMFctfu3EqG2qWL8oxuFMm80,1210
|
6 |
+
filelock/__init__.py,sha256=_t_-OAGXo_qyPa9lNQ1YnzVYEvSW3I0onPqzpomsVVg,1769
|
7 |
+
filelock/__pycache__/__init__.cpython-313.pyc,,
|
8 |
+
filelock/__pycache__/_api.cpython-313.pyc,,
|
9 |
+
filelock/__pycache__/_error.cpython-313.pyc,,
|
10 |
+
filelock/__pycache__/_soft.cpython-313.pyc,,
|
11 |
+
filelock/__pycache__/_unix.cpython-313.pyc,,
|
12 |
+
filelock/__pycache__/_util.cpython-313.pyc,,
|
13 |
+
filelock/__pycache__/_windows.cpython-313.pyc,,
|
14 |
+
filelock/__pycache__/asyncio.cpython-313.pyc,,
|
15 |
+
filelock/__pycache__/version.cpython-313.pyc,,
|
16 |
+
filelock/_api.py,sha256=GVeBEGjpDD8S1bYqG6_u0MZfbYHS6XrHs_n3PVKq-h0,14541
|
17 |
+
filelock/_error.py,sha256=-5jMcjTu60YAvAO1UbqDD1GIEjVkwr8xCFwDBtMeYDg,787
|
18 |
+
filelock/_soft.py,sha256=haqtc_TB_KJbYv2a8iuEAclKuM4fMG1vTcp28sK919c,1711
|
19 |
+
filelock/_unix.py,sha256=-FXP0tjInBHUYygOlMpp4taUmD87QOkrD_4ybg_iT7Q,2259
|
20 |
+
filelock/_util.py,sha256=QHBoNFIYfbAThhotH3Q8E2acFc84wpG49-T-uu017ZE,1715
|
21 |
+
filelock/_windows.py,sha256=eMKL8dZKrgekf5VYVGR14an29JGEInRtUO8ui9ABywg,2177
|
22 |
+
filelock/asyncio.py,sha256=3D4JP4Ms5IXTGib5eOekyr6uH6rZlieV_moVGY36juA,12463
|
23 |
+
filelock/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24 |
+
filelock/version.py,sha256=KSOBzuLwiqiVWDPGfMj1ntr25YrY6JBDr8RvinQX_FM,413
|
meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/WHEEL
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Wheel-Version: 1.0
|
2 |
+
Generator: hatchling 1.25.0
|
3 |
+
Root-Is-Purelib: true
|
4 |
+
Tag: py3-none-any
|
meow/lib/python3.13/site-packages/huggingface_hub/_commit_api.py
ADDED
@@ -0,0 +1,729 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Type definitions and utilities for the `create_commit` API
|
3 |
+
"""
|
4 |
+
|
5 |
+
import base64
|
6 |
+
import io
|
7 |
+
import os
|
8 |
+
import warnings
|
9 |
+
from collections import defaultdict
|
10 |
+
from contextlib import contextmanager
|
11 |
+
from dataclasses import dataclass, field
|
12 |
+
from itertools import groupby
|
13 |
+
from pathlib import Path, PurePosixPath
|
14 |
+
from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union
|
15 |
+
|
16 |
+
from tqdm.contrib.concurrent import thread_map
|
17 |
+
|
18 |
+
from . import constants
|
19 |
+
from .errors import EntryNotFoundError
|
20 |
+
from .file_download import hf_hub_url
|
21 |
+
from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
|
22 |
+
from .utils import (
|
23 |
+
FORBIDDEN_FOLDERS,
|
24 |
+
chunk_iterable,
|
25 |
+
get_session,
|
26 |
+
hf_raise_for_status,
|
27 |
+
logging,
|
28 |
+
sha,
|
29 |
+
tqdm_stream_file,
|
30 |
+
validate_hf_hub_args,
|
31 |
+
)
|
32 |
+
from .utils import tqdm as hf_tqdm
|
33 |
+
|
34 |
+
|
35 |
+
if TYPE_CHECKING:
|
36 |
+
from .hf_api import RepoFile
|
37 |
+
|
38 |
+
|
39 |
+
logger = logging.get_logger(__name__)
|
40 |
+
|
41 |
+
|
42 |
+
UploadMode = Literal["lfs", "regular"]
|
43 |
+
|
44 |
+
# Max is 1,000 per request on the Hub for HfApi.get_paths_info
|
45 |
+
# Otherwise we get:
|
46 |
+
# HfHubHTTPError: 413 Client Error: Payload Too Large for url: https://huggingface.co/api/datasets/xxx (Request ID: xxx)\n\ntoo many parameters
|
47 |
+
# See https://github.com/huggingface/huggingface_hub/issues/1503
|
48 |
+
FETCH_LFS_BATCH_SIZE = 500
|
49 |
+
|
50 |
+
|
51 |
+
@dataclass
|
52 |
+
class CommitOperationDelete:
|
53 |
+
"""
|
54 |
+
Data structure holding necessary info to delete a file or a folder from a repository
|
55 |
+
on the Hub.
|
56 |
+
|
57 |
+
Args:
|
58 |
+
path_in_repo (`str`):
|
59 |
+
Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
|
60 |
+
for a file or `"checkpoints/1fec34a/"` for a folder.
|
61 |
+
is_folder (`bool` or `Literal["auto"]`, *optional*)
|
62 |
+
Whether the Delete Operation applies to a folder or not. If "auto", the path
|
63 |
+
type (file or folder) is guessed automatically by looking if path ends with
|
64 |
+
a "/" (folder) or not (file). To explicitly set the path type, you can set
|
65 |
+
`is_folder=True` or `is_folder=False`.
|
66 |
+
"""
|
67 |
+
|
68 |
+
path_in_repo: str
|
69 |
+
is_folder: Union[bool, Literal["auto"]] = "auto"
|
70 |
+
|
71 |
+
def __post_init__(self):
|
72 |
+
self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
|
73 |
+
|
74 |
+
if self.is_folder == "auto":
|
75 |
+
self.is_folder = self.path_in_repo.endswith("/")
|
76 |
+
if not isinstance(self.is_folder, bool):
|
77 |
+
raise ValueError(
|
78 |
+
f"Wrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got '{self.is_folder}'."
|
79 |
+
)
|
80 |
+
|
81 |
+
|
82 |
+
@dataclass
|
83 |
+
class CommitOperationCopy:
|
84 |
+
"""
|
85 |
+
Data structure holding necessary info to copy a file in a repository on the Hub.
|
86 |
+
|
87 |
+
Limitations:
|
88 |
+
- Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it
|
89 |
+
- Cross-repository copies are not supported.
|
90 |
+
|
91 |
+
Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub.
|
92 |
+
|
93 |
+
Args:
|
94 |
+
src_path_in_repo (`str`):
|
95 |
+
Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`.
|
96 |
+
path_in_repo (`str`):
|
97 |
+
Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`.
|
98 |
+
src_revision (`str`, *optional*):
|
99 |
+
The git revision of the file to be copied. Can be any valid git revision.
|
100 |
+
Default to the target commit revision.
|
101 |
+
"""
|
102 |
+
|
103 |
+
src_path_in_repo: str
|
104 |
+
path_in_repo: str
|
105 |
+
src_revision: Optional[str] = None
|
106 |
+
|
107 |
+
def __post_init__(self):
|
108 |
+
self.src_path_in_repo = _validate_path_in_repo(self.src_path_in_repo)
|
109 |
+
self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
|
110 |
+
|
111 |
+
|
112 |
+
@dataclass
|
113 |
+
class CommitOperationAdd:
|
114 |
+
"""
|
115 |
+
Data structure holding necessary info to upload a file to a repository on the Hub.
|
116 |
+
|
117 |
+
Args:
|
118 |
+
path_in_repo (`str`):
|
119 |
+
Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
|
120 |
+
path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`):
|
121 |
+
Either:
|
122 |
+
- a path to a local file (as `str` or `pathlib.Path`) to upload
|
123 |
+
- a buffer of bytes (`bytes`) holding the content of the file to upload
|
124 |
+
- a "file object" (subclass of `io.BufferedIOBase`), typically obtained
|
125 |
+
with `open(path, "rb")`. It must support `seek()` and `tell()` methods.
|
126 |
+
|
127 |
+
Raises:
|
128 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
129 |
+
If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`.
|
130 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
131 |
+
If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file.
|
132 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
133 |
+
If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both
|
134 |
+
`seek()` and `tell()`.
|
135 |
+
"""
|
136 |
+
|
137 |
+
path_in_repo: str
|
138 |
+
path_or_fileobj: Union[str, Path, bytes, BinaryIO]
|
139 |
+
upload_info: UploadInfo = field(init=False, repr=False)
|
140 |
+
|
141 |
+
# Internal attributes
|
142 |
+
|
143 |
+
# set to "lfs" or "regular" once known
|
144 |
+
_upload_mode: Optional[UploadMode] = field(init=False, repr=False, default=None)
|
145 |
+
|
146 |
+
# set to True if .gitignore rules prevent the file from being uploaded as LFS
|
147 |
+
# (server-side check)
|
148 |
+
_should_ignore: Optional[bool] = field(init=False, repr=False, default=None)
|
149 |
+
|
150 |
+
# set to the remote OID of the file if it has already been uploaded
|
151 |
+
# useful to determine if a commit will be empty or not
|
152 |
+
_remote_oid: Optional[str] = field(init=False, repr=False, default=None)
|
153 |
+
|
154 |
+
# set to True once the file has been uploaded as LFS
|
155 |
+
_is_uploaded: bool = field(init=False, repr=False, default=False)
|
156 |
+
|
157 |
+
# set to True once the file has been committed
|
158 |
+
_is_committed: bool = field(init=False, repr=False, default=False)
|
159 |
+
|
160 |
+
def __post_init__(self) -> None:
|
161 |
+
"""Validates `path_or_fileobj` and compute `upload_info`."""
|
162 |
+
self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
|
163 |
+
|
164 |
+
# Validate `path_or_fileobj` value
|
165 |
+
if isinstance(self.path_or_fileobj, Path):
|
166 |
+
self.path_or_fileobj = str(self.path_or_fileobj)
|
167 |
+
if isinstance(self.path_or_fileobj, str):
|
168 |
+
path_or_fileobj = os.path.normpath(os.path.expanduser(self.path_or_fileobj))
|
169 |
+
if not os.path.isfile(path_or_fileobj):
|
170 |
+
raise ValueError(f"Provided path: '{path_or_fileobj}' is not a file on the local file system")
|
171 |
+
elif not isinstance(self.path_or_fileobj, (io.BufferedIOBase, bytes)):
|
172 |
+
# ^^ Inspired from: https://stackoverflow.com/questions/44584829/how-to-determine-if-file-is-opened-in-binary-or-text-mode
|
173 |
+
raise ValueError(
|
174 |
+
"path_or_fileobj must be either an instance of str, bytes or"
|
175 |
+
" io.BufferedIOBase. If you passed a file-like object, make sure it is"
|
176 |
+
" in binary mode."
|
177 |
+
)
|
178 |
+
if isinstance(self.path_or_fileobj, io.BufferedIOBase):
|
179 |
+
try:
|
180 |
+
self.path_or_fileobj.tell()
|
181 |
+
self.path_or_fileobj.seek(0, os.SEEK_CUR)
|
182 |
+
except (OSError, AttributeError) as exc:
|
183 |
+
raise ValueError(
|
184 |
+
"path_or_fileobj is a file-like object but does not implement seek() and tell()"
|
185 |
+
) from exc
|
186 |
+
|
187 |
+
# Compute "upload_info" attribute
|
188 |
+
if isinstance(self.path_or_fileobj, str):
|
189 |
+
self.upload_info = UploadInfo.from_path(self.path_or_fileobj)
|
190 |
+
elif isinstance(self.path_or_fileobj, bytes):
|
191 |
+
self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj)
|
192 |
+
else:
|
193 |
+
self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj)
|
194 |
+
|
195 |
+
@contextmanager
|
196 |
+
def as_file(self, with_tqdm: bool = False) -> Iterator[BinaryIO]:
|
197 |
+
"""
|
198 |
+
A context manager that yields a file-like object allowing to read the underlying
|
199 |
+
data behind `path_or_fileobj`.
|
200 |
+
|
201 |
+
Args:
|
202 |
+
with_tqdm (`bool`, *optional*, defaults to `False`):
|
203 |
+
If True, iterating over the file object will display a progress bar. Only
|
204 |
+
works if the file-like object is a path to a file. Pure bytes and buffers
|
205 |
+
are not supported.
|
206 |
+
|
207 |
+
Example:
|
208 |
+
|
209 |
+
```python
|
210 |
+
>>> operation = CommitOperationAdd(
|
211 |
+
... path_in_repo="remote/dir/weights.h5",
|
212 |
+
... path_or_fileobj="./local/weights.h5",
|
213 |
+
... )
|
214 |
+
CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5')
|
215 |
+
|
216 |
+
>>> with operation.as_file() as file:
|
217 |
+
... content = file.read()
|
218 |
+
|
219 |
+
>>> with operation.as_file(with_tqdm=True) as file:
|
220 |
+
... while True:
|
221 |
+
... data = file.read(1024)
|
222 |
+
... if not data:
|
223 |
+
... break
|
224 |
+
config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
|
225 |
+
|
226 |
+
>>> with operation.as_file(with_tqdm=True) as file:
|
227 |
+
... requests.put(..., data=file)
|
228 |
+
config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
|
229 |
+
```
|
230 |
+
"""
|
231 |
+
if isinstance(self.path_or_fileobj, str) or isinstance(self.path_or_fileobj, Path):
|
232 |
+
if with_tqdm:
|
233 |
+
with tqdm_stream_file(self.path_or_fileobj) as file:
|
234 |
+
yield file
|
235 |
+
else:
|
236 |
+
with open(self.path_or_fileobj, "rb") as file:
|
237 |
+
yield file
|
238 |
+
elif isinstance(self.path_or_fileobj, bytes):
|
239 |
+
yield io.BytesIO(self.path_or_fileobj)
|
240 |
+
elif isinstance(self.path_or_fileobj, io.BufferedIOBase):
|
241 |
+
prev_pos = self.path_or_fileobj.tell()
|
242 |
+
yield self.path_or_fileobj
|
243 |
+
self.path_or_fileobj.seek(prev_pos, io.SEEK_SET)
|
244 |
+
|
245 |
+
def b64content(self) -> bytes:
|
246 |
+
"""
|
247 |
+
The base64-encoded content of `path_or_fileobj`
|
248 |
+
|
249 |
+
Returns: `bytes`
|
250 |
+
"""
|
251 |
+
with self.as_file() as file:
|
252 |
+
return base64.b64encode(file.read())
|
253 |
+
|
254 |
+
@property
|
255 |
+
def _local_oid(self) -> Optional[str]:
|
256 |
+
"""Return the OID of the local file.
|
257 |
+
|
258 |
+
This OID is then compared to `self._remote_oid` to check if the file has changed compared to the remote one.
|
259 |
+
If the file did not change, we won't upload it again to prevent empty commits.
|
260 |
+
|
261 |
+
For LFS files, the OID corresponds to the SHA256 of the file content (used a LFS ref).
|
262 |
+
For regular files, the OID corresponds to the SHA1 of the file content.
|
263 |
+
Note: this is slightly different to git OID computation since the oid of an LFS file is usually the git-SHA1 of the
|
264 |
+
pointer file content (not the actual file content). However, using the SHA256 is enough to detect changes
|
265 |
+
and more convenient client-side.
|
266 |
+
"""
|
267 |
+
if self._upload_mode is None:
|
268 |
+
return None
|
269 |
+
elif self._upload_mode == "lfs":
|
270 |
+
return self.upload_info.sha256.hex()
|
271 |
+
else:
|
272 |
+
# Regular file => compute sha1
|
273 |
+
# => no need to read by chunk since the file is guaranteed to be <=5MB.
|
274 |
+
with self.as_file() as file:
|
275 |
+
return sha.git_hash(file.read())
|
276 |
+
|
277 |
+
|
278 |
+
def _validate_path_in_repo(path_in_repo: str) -> str:
|
279 |
+
# Validate `path_in_repo` value to prevent a server-side issue
|
280 |
+
if path_in_repo.startswith("/"):
|
281 |
+
path_in_repo = path_in_repo[1:]
|
282 |
+
if path_in_repo == "." or path_in_repo == ".." or path_in_repo.startswith("../"):
|
283 |
+
raise ValueError(f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'")
|
284 |
+
if path_in_repo.startswith("./"):
|
285 |
+
path_in_repo = path_in_repo[2:]
|
286 |
+
for forbidden in FORBIDDEN_FOLDERS:
|
287 |
+
if any(part == forbidden for part in path_in_repo.split("/")):
|
288 |
+
raise ValueError(
|
289 |
+
f"Invalid `path_in_repo` in CommitOperation: cannot update files under a '{forbidden}/' folder (path:"
|
290 |
+
f" '{path_in_repo}')."
|
291 |
+
)
|
292 |
+
return path_in_repo
|
293 |
+
|
294 |
+
|
295 |
+
CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete]
|
296 |
+
|
297 |
+
|
298 |
+
def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
|
299 |
+
"""
|
300 |
+
Warn user when a list of operations is expected to overwrite itself in a single
|
301 |
+
commit.
|
302 |
+
|
303 |
+
Rules:
|
304 |
+
- If a filepath is updated by multiple `CommitOperationAdd` operations, a warning
|
305 |
+
message is triggered.
|
306 |
+
- If a filepath is updated at least once by a `CommitOperationAdd` and then deleted
|
307 |
+
by a `CommitOperationDelete`, a warning is triggered.
|
308 |
+
- If a `CommitOperationDelete` deletes a filepath that is then updated by a
|
309 |
+
`CommitOperationAdd`, no warning is triggered. This is usually useless (no need to
|
310 |
+
delete before upload) but can happen if a user deletes an entire folder and then
|
311 |
+
add new files to it.
|
312 |
+
"""
|
313 |
+
nb_additions_per_path: Dict[str, int] = defaultdict(int)
|
314 |
+
for operation in operations:
|
315 |
+
path_in_repo = operation.path_in_repo
|
316 |
+
if isinstance(operation, CommitOperationAdd):
|
317 |
+
if nb_additions_per_path[path_in_repo] > 0:
|
318 |
+
warnings.warn(
|
319 |
+
"About to update multiple times the same file in the same commit:"
|
320 |
+
f" '{path_in_repo}'. This can cause undesired inconsistencies in"
|
321 |
+
" your repo."
|
322 |
+
)
|
323 |
+
nb_additions_per_path[path_in_repo] += 1
|
324 |
+
for parent in PurePosixPath(path_in_repo).parents:
|
325 |
+
# Also keep track of number of updated files per folder
|
326 |
+
# => warns if deleting a folder overwrite some contained files
|
327 |
+
nb_additions_per_path[str(parent)] += 1
|
328 |
+
if isinstance(operation, CommitOperationDelete):
|
329 |
+
if nb_additions_per_path[str(PurePosixPath(path_in_repo))] > 0:
|
330 |
+
if operation.is_folder:
|
331 |
+
warnings.warn(
|
332 |
+
"About to delete a folder containing files that have just been"
|
333 |
+
f" updated within the same commit: '{path_in_repo}'. This can"
|
334 |
+
" cause undesired inconsistencies in your repo."
|
335 |
+
)
|
336 |
+
else:
|
337 |
+
warnings.warn(
|
338 |
+
"About to delete a file that have just been updated within the"
|
339 |
+
f" same commit: '{path_in_repo}'. This can cause undesired"
|
340 |
+
" inconsistencies in your repo."
|
341 |
+
)
|
342 |
+
|
343 |
+
|
344 |
+
@validate_hf_hub_args
|
345 |
+
def _upload_lfs_files(
|
346 |
+
*,
|
347 |
+
additions: List[CommitOperationAdd],
|
348 |
+
repo_type: str,
|
349 |
+
repo_id: str,
|
350 |
+
headers: Dict[str, str],
|
351 |
+
endpoint: Optional[str] = None,
|
352 |
+
num_threads: int = 5,
|
353 |
+
revision: Optional[str] = None,
|
354 |
+
):
|
355 |
+
"""
|
356 |
+
Uploads the content of `additions` to the Hub using the large file storage protocol.
|
357 |
+
|
358 |
+
Relevant external documentation:
|
359 |
+
- LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md
|
360 |
+
|
361 |
+
Args:
|
362 |
+
additions (`List` of `CommitOperationAdd`):
|
363 |
+
The files to be uploaded
|
364 |
+
repo_type (`str`):
|
365 |
+
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
|
366 |
+
repo_id (`str`):
|
367 |
+
A namespace (user or an organization) and a repo name separated
|
368 |
+
by a `/`.
|
369 |
+
headers (`Dict[str, str]`):
|
370 |
+
Headers to use for the request, including authorization headers and user agent.
|
371 |
+
num_threads (`int`, *optional*):
|
372 |
+
The number of concurrent threads to use when uploading. Defaults to 5.
|
373 |
+
revision (`str`, *optional*):
|
374 |
+
The git revision to upload to.
|
375 |
+
|
376 |
+
Raises:
|
377 |
+
[`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
|
378 |
+
If an upload failed for any reason
|
379 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
380 |
+
If the server returns malformed responses
|
381 |
+
[`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
|
382 |
+
If the LFS batch endpoint returned an HTTP error.
|
383 |
+
"""
|
384 |
+
# Step 1: retrieve upload instructions from the LFS batch endpoint.
|
385 |
+
# Upload instructions are retrieved by chunk of 256 files to avoid reaching
|
386 |
+
# the payload limit.
|
387 |
+
batch_actions: List[Dict] = []
|
388 |
+
for chunk in chunk_iterable(additions, chunk_size=256):
|
389 |
+
batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
|
390 |
+
upload_infos=[op.upload_info for op in chunk],
|
391 |
+
repo_id=repo_id,
|
392 |
+
repo_type=repo_type,
|
393 |
+
revision=revision,
|
394 |
+
endpoint=endpoint,
|
395 |
+
headers=headers,
|
396 |
+
token=None, # already passed in 'headers'
|
397 |
+
)
|
398 |
+
|
399 |
+
# If at least 1 error, we do not retrieve information for other chunks
|
400 |
+
if batch_errors_chunk:
|
401 |
+
message = "\n".join(
|
402 |
+
[
|
403 |
+
f'Encountered error for file with OID {err.get("oid")}: `{err.get("error", {}).get("message")}'
|
404 |
+
for err in batch_errors_chunk
|
405 |
+
]
|
406 |
+
)
|
407 |
+
raise ValueError(f"LFS batch endpoint returned errors:\n{message}")
|
408 |
+
|
409 |
+
batch_actions += batch_actions_chunk
|
410 |
+
oid2addop = {add_op.upload_info.sha256.hex(): add_op for add_op in additions}
|
411 |
+
|
412 |
+
# Step 2: ignore files that have already been uploaded
|
413 |
+
filtered_actions = []
|
414 |
+
for action in batch_actions:
|
415 |
+
if action.get("actions") is None:
|
416 |
+
logger.debug(
|
417 |
+
f"Content of file {oid2addop[action['oid']].path_in_repo} is already"
|
418 |
+
" present upstream - skipping upload."
|
419 |
+
)
|
420 |
+
else:
|
421 |
+
filtered_actions.append(action)
|
422 |
+
|
423 |
+
if len(filtered_actions) == 0:
|
424 |
+
logger.debug("No LFS files to upload.")
|
425 |
+
return
|
426 |
+
|
427 |
+
# Step 3: upload files concurrently according to these instructions
|
428 |
+
def _wrapped_lfs_upload(batch_action) -> None:
|
429 |
+
try:
|
430 |
+
operation = oid2addop[batch_action["oid"]]
|
431 |
+
lfs_upload(operation=operation, lfs_batch_action=batch_action, headers=headers, endpoint=endpoint)
|
432 |
+
except Exception as exc:
|
433 |
+
raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc
|
434 |
+
|
435 |
+
if constants.HF_HUB_ENABLE_HF_TRANSFER:
|
436 |
+
logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.")
|
437 |
+
for action in hf_tqdm(filtered_actions, name="huggingface_hub.lfs_upload"):
|
438 |
+
_wrapped_lfs_upload(action)
|
439 |
+
elif len(filtered_actions) == 1:
|
440 |
+
logger.debug("Uploading 1 LFS file to the Hub")
|
441 |
+
_wrapped_lfs_upload(filtered_actions[0])
|
442 |
+
else:
|
443 |
+
logger.debug(
|
444 |
+
f"Uploading {len(filtered_actions)} LFS files to the Hub using up to {num_threads} threads concurrently"
|
445 |
+
)
|
446 |
+
thread_map(
|
447 |
+
_wrapped_lfs_upload,
|
448 |
+
filtered_actions,
|
449 |
+
desc=f"Upload {len(filtered_actions)} LFS files",
|
450 |
+
max_workers=num_threads,
|
451 |
+
tqdm_class=hf_tqdm,
|
452 |
+
)
|
453 |
+
|
454 |
+
|
455 |
+
def _validate_preupload_info(preupload_info: dict):
|
456 |
+
files = preupload_info.get("files")
|
457 |
+
if not isinstance(files, list):
|
458 |
+
raise ValueError("preupload_info is improperly formatted")
|
459 |
+
for file_info in files:
|
460 |
+
if not (
|
461 |
+
isinstance(file_info, dict)
|
462 |
+
and isinstance(file_info.get("path"), str)
|
463 |
+
and isinstance(file_info.get("uploadMode"), str)
|
464 |
+
and (file_info["uploadMode"] in ("lfs", "regular"))
|
465 |
+
):
|
466 |
+
raise ValueError("preupload_info is improperly formatted:")
|
467 |
+
return preupload_info
|
468 |
+
|
469 |
+
|
470 |
+
@validate_hf_hub_args
|
471 |
+
def _fetch_upload_modes(
|
472 |
+
additions: Iterable[CommitOperationAdd],
|
473 |
+
repo_type: str,
|
474 |
+
repo_id: str,
|
475 |
+
headers: Dict[str, str],
|
476 |
+
revision: str,
|
477 |
+
endpoint: Optional[str] = None,
|
478 |
+
create_pr: bool = False,
|
479 |
+
gitignore_content: Optional[str] = None,
|
480 |
+
) -> None:
|
481 |
+
"""
|
482 |
+
Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob
|
483 |
+
or as git LFS blob. Input `additions` are mutated in-place with the upload mode.
|
484 |
+
|
485 |
+
Args:
|
486 |
+
additions (`Iterable` of :class:`CommitOperationAdd`):
|
487 |
+
Iterable of :class:`CommitOperationAdd` describing the files to
|
488 |
+
upload to the Hub.
|
489 |
+
repo_type (`str`):
|
490 |
+
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
|
491 |
+
repo_id (`str`):
|
492 |
+
A namespace (user or an organization) and a repo name separated
|
493 |
+
by a `/`.
|
494 |
+
headers (`Dict[str, str]`):
|
495 |
+
Headers to use for the request, including authorization headers and user agent.
|
496 |
+
revision (`str`):
|
497 |
+
The git revision to upload the files to. Can be any valid git revision.
|
498 |
+
gitignore_content (`str`, *optional*):
|
499 |
+
The content of the `.gitignore` file to know which files should be ignored. The order of priority
|
500 |
+
is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present
|
501 |
+
in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub
|
502 |
+
(if any).
|
503 |
+
Raises:
|
504 |
+
[`~utils.HfHubHTTPError`]
|
505 |
+
If the Hub API returned an error.
|
506 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
507 |
+
If the Hub API response is improperly formatted.
|
508 |
+
"""
|
509 |
+
endpoint = endpoint if endpoint is not None else constants.ENDPOINT
|
510 |
+
|
511 |
+
# Fetch upload mode (LFS or regular) chunk by chunk.
|
512 |
+
upload_modes: Dict[str, UploadMode] = {}
|
513 |
+
should_ignore_info: Dict[str, bool] = {}
|
514 |
+
oid_info: Dict[str, Optional[str]] = {}
|
515 |
+
|
516 |
+
for chunk in chunk_iterable(additions, 256):
|
517 |
+
payload: Dict = {
|
518 |
+
"files": [
|
519 |
+
{
|
520 |
+
"path": op.path_in_repo,
|
521 |
+
"sample": base64.b64encode(op.upload_info.sample).decode("ascii"),
|
522 |
+
"size": op.upload_info.size,
|
523 |
+
}
|
524 |
+
for op in chunk
|
525 |
+
]
|
526 |
+
}
|
527 |
+
if gitignore_content is not None:
|
528 |
+
payload["gitIgnore"] = gitignore_content
|
529 |
+
|
530 |
+
resp = get_session().post(
|
531 |
+
f"{endpoint}/api/{repo_type}s/{repo_id}/preupload/{revision}",
|
532 |
+
json=payload,
|
533 |
+
headers=headers,
|
534 |
+
params={"create_pr": "1"} if create_pr else None,
|
535 |
+
)
|
536 |
+
hf_raise_for_status(resp)
|
537 |
+
preupload_info = _validate_preupload_info(resp.json())
|
538 |
+
upload_modes.update(**{file["path"]: file["uploadMode"] for file in preupload_info["files"]})
|
539 |
+
should_ignore_info.update(**{file["path"]: file["shouldIgnore"] for file in preupload_info["files"]})
|
540 |
+
oid_info.update(**{file["path"]: file.get("oid") for file in preupload_info["files"]})
|
541 |
+
|
542 |
+
# Set upload mode for each addition operation
|
543 |
+
for addition in additions:
|
544 |
+
addition._upload_mode = upload_modes[addition.path_in_repo]
|
545 |
+
addition._should_ignore = should_ignore_info[addition.path_in_repo]
|
546 |
+
addition._remote_oid = oid_info[addition.path_in_repo]
|
547 |
+
|
548 |
+
# Empty files cannot be uploaded as LFS (S3 would fail with a 501 Not Implemented)
|
549 |
+
# => empty files are uploaded as "regular" to still allow users to commit them.
|
550 |
+
for addition in additions:
|
551 |
+
if addition.upload_info.size == 0:
|
552 |
+
addition._upload_mode = "regular"
|
553 |
+
|
554 |
+
|
555 |
+
@validate_hf_hub_args
|
556 |
+
def _fetch_files_to_copy(
|
557 |
+
copies: Iterable[CommitOperationCopy],
|
558 |
+
repo_type: str,
|
559 |
+
repo_id: str,
|
560 |
+
headers: Dict[str, str],
|
561 |
+
revision: str,
|
562 |
+
endpoint: Optional[str] = None,
|
563 |
+
) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]:
|
564 |
+
"""
|
565 |
+
Fetch information about the files to copy.
|
566 |
+
|
567 |
+
For LFS files, we only need their metadata (file size and sha256) while for regular files
|
568 |
+
we need to download the raw content from the Hub.
|
569 |
+
|
570 |
+
Args:
|
571 |
+
copies (`Iterable` of :class:`CommitOperationCopy`):
|
572 |
+
Iterable of :class:`CommitOperationCopy` describing the files to
|
573 |
+
copy on the Hub.
|
574 |
+
repo_type (`str`):
|
575 |
+
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
|
576 |
+
repo_id (`str`):
|
577 |
+
A namespace (user or an organization) and a repo name separated
|
578 |
+
by a `/`.
|
579 |
+
headers (`Dict[str, str]`):
|
580 |
+
Headers to use for the request, including authorization headers and user agent.
|
581 |
+
revision (`str`):
|
582 |
+
The git revision to upload the files to. Can be any valid git revision.
|
583 |
+
|
584 |
+
Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
|
585 |
+
Key is the file path and revision of the file to copy.
|
586 |
+
Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files).
|
587 |
+
|
588 |
+
Raises:
|
589 |
+
[`~utils.HfHubHTTPError`]
|
590 |
+
If the Hub API returned an error.
|
591 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
592 |
+
If the Hub API response is improperly formatted.
|
593 |
+
"""
|
594 |
+
from .hf_api import HfApi, RepoFolder
|
595 |
+
|
596 |
+
hf_api = HfApi(endpoint=endpoint, headers=headers)
|
597 |
+
files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
|
598 |
+
for src_revision, operations in groupby(copies, key=lambda op: op.src_revision):
|
599 |
+
operations = list(operations) # type: ignore
|
600 |
+
paths = [op.src_path_in_repo for op in operations]
|
601 |
+
for offset in range(0, len(paths), FETCH_LFS_BATCH_SIZE):
|
602 |
+
src_repo_files = hf_api.get_paths_info(
|
603 |
+
repo_id=repo_id,
|
604 |
+
paths=paths[offset : offset + FETCH_LFS_BATCH_SIZE],
|
605 |
+
revision=src_revision or revision,
|
606 |
+
repo_type=repo_type,
|
607 |
+
)
|
608 |
+
for src_repo_file in src_repo_files:
|
609 |
+
if isinstance(src_repo_file, RepoFolder):
|
610 |
+
raise NotImplementedError("Copying a folder is not implemented.")
|
611 |
+
if src_repo_file.lfs:
|
612 |
+
files_to_copy[(src_repo_file.path, src_revision)] = src_repo_file
|
613 |
+
else:
|
614 |
+
# TODO: (optimization) download regular files to copy concurrently
|
615 |
+
url = hf_hub_url(
|
616 |
+
endpoint=endpoint,
|
617 |
+
repo_type=repo_type,
|
618 |
+
repo_id=repo_id,
|
619 |
+
revision=src_revision or revision,
|
620 |
+
filename=src_repo_file.path,
|
621 |
+
)
|
622 |
+
response = get_session().get(url, headers=headers)
|
623 |
+
hf_raise_for_status(response)
|
624 |
+
files_to_copy[(src_repo_file.path, src_revision)] = response.content
|
625 |
+
for operation in operations:
|
626 |
+
if (operation.src_path_in_repo, src_revision) not in files_to_copy:
|
627 |
+
raise EntryNotFoundError(
|
628 |
+
f"Cannot copy {operation.src_path_in_repo} at revision "
|
629 |
+
f"{src_revision or revision}: file is missing on repo."
|
630 |
+
)
|
631 |
+
return files_to_copy
|
632 |
+
|
633 |
+
|
634 |
+
def _prepare_commit_payload(
|
635 |
+
operations: Iterable[CommitOperation],
|
636 |
+
files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]],
|
637 |
+
commit_message: str,
|
638 |
+
commit_description: Optional[str] = None,
|
639 |
+
parent_commit: Optional[str] = None,
|
640 |
+
) -> Iterable[Dict[str, Any]]:
|
641 |
+
"""
|
642 |
+
Builds the payload to POST to the `/commit` API of the Hub.
|
643 |
+
|
644 |
+
Payload is returned as an iterator so that it can be streamed as a ndjson in the
|
645 |
+
POST request.
|
646 |
+
|
647 |
+
For more information, see:
|
648 |
+
- https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
|
649 |
+
- http://ndjson.org/
|
650 |
+
"""
|
651 |
+
commit_description = commit_description if commit_description is not None else ""
|
652 |
+
|
653 |
+
# 1. Send a header item with the commit metadata
|
654 |
+
header_value = {"summary": commit_message, "description": commit_description}
|
655 |
+
if parent_commit is not None:
|
656 |
+
header_value["parentCommit"] = parent_commit
|
657 |
+
yield {"key": "header", "value": header_value}
|
658 |
+
|
659 |
+
nb_ignored_files = 0
|
660 |
+
|
661 |
+
# 2. Send operations, one per line
|
662 |
+
for operation in operations:
|
663 |
+
# Skip ignored files
|
664 |
+
if isinstance(operation, CommitOperationAdd) and operation._should_ignore:
|
665 |
+
logger.debug(f"Skipping file '{operation.path_in_repo}' in commit (ignored by gitignore file).")
|
666 |
+
nb_ignored_files += 1
|
667 |
+
continue
|
668 |
+
|
669 |
+
# 2.a. Case adding a regular file
|
670 |
+
if isinstance(operation, CommitOperationAdd) and operation._upload_mode == "regular":
|
671 |
+
yield {
|
672 |
+
"key": "file",
|
673 |
+
"value": {
|
674 |
+
"content": operation.b64content().decode(),
|
675 |
+
"path": operation.path_in_repo,
|
676 |
+
"encoding": "base64",
|
677 |
+
},
|
678 |
+
}
|
679 |
+
# 2.b. Case adding an LFS file
|
680 |
+
elif isinstance(operation, CommitOperationAdd) and operation._upload_mode == "lfs":
|
681 |
+
yield {
|
682 |
+
"key": "lfsFile",
|
683 |
+
"value": {
|
684 |
+
"path": operation.path_in_repo,
|
685 |
+
"algo": "sha256",
|
686 |
+
"oid": operation.upload_info.sha256.hex(),
|
687 |
+
"size": operation.upload_info.size,
|
688 |
+
},
|
689 |
+
}
|
690 |
+
# 2.c. Case deleting a file or folder
|
691 |
+
elif isinstance(operation, CommitOperationDelete):
|
692 |
+
yield {
|
693 |
+
"key": "deletedFolder" if operation.is_folder else "deletedFile",
|
694 |
+
"value": {"path": operation.path_in_repo},
|
695 |
+
}
|
696 |
+
# 2.d. Case copying a file or folder
|
697 |
+
elif isinstance(operation, CommitOperationCopy):
|
698 |
+
file_to_copy = files_to_copy[(operation.src_path_in_repo, operation.src_revision)]
|
699 |
+
if isinstance(file_to_copy, bytes):
|
700 |
+
yield {
|
701 |
+
"key": "file",
|
702 |
+
"value": {
|
703 |
+
"content": base64.b64encode(file_to_copy).decode(),
|
704 |
+
"path": operation.path_in_repo,
|
705 |
+
"encoding": "base64",
|
706 |
+
},
|
707 |
+
}
|
708 |
+
elif file_to_copy.lfs:
|
709 |
+
yield {
|
710 |
+
"key": "lfsFile",
|
711 |
+
"value": {
|
712 |
+
"path": operation.path_in_repo,
|
713 |
+
"algo": "sha256",
|
714 |
+
"oid": file_to_copy.lfs.sha256,
|
715 |
+
},
|
716 |
+
}
|
717 |
+
else:
|
718 |
+
raise ValueError(
|
719 |
+
"Malformed files_to_copy (should be raw file content as bytes or RepoFile objects with LFS info."
|
720 |
+
)
|
721 |
+
# 2.e. Never expected to happen
|
722 |
+
else:
|
723 |
+
raise ValueError(
|
724 |
+
f"Unknown operation to commit. Operation: {operation}. Upload mode:"
|
725 |
+
f" {getattr(operation, '_upload_mode', None)}"
|
726 |
+
)
|
727 |
+
|
728 |
+
if nb_ignored_files > 0:
|
729 |
+
logger.info(f"Skipped {nb_ignored_files} file(s) in commit (ignored by gitignore file).")
|
meow/lib/python3.13/site-packages/huggingface_hub/_space_api.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2019-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
from dataclasses import dataclass
|
16 |
+
from datetime import datetime
|
17 |
+
from enum import Enum
|
18 |
+
from typing import Dict, Optional
|
19 |
+
|
20 |
+
from huggingface_hub.utils import parse_datetime
|
21 |
+
|
22 |
+
|
23 |
+
class SpaceStage(str, Enum):
|
24 |
+
"""
|
25 |
+
Enumeration of possible stage of a Space on the Hub.
|
26 |
+
|
27 |
+
Value can be compared to a string:
|
28 |
+
```py
|
29 |
+
assert SpaceStage.BUILDING == "BUILDING"
|
30 |
+
```
|
31 |
+
|
32 |
+
Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L61 (private url).
|
33 |
+
"""
|
34 |
+
|
35 |
+
# Copied from moon-landing > server > repo_types > SpaceInfo.ts (private repo)
|
36 |
+
NO_APP_FILE = "NO_APP_FILE"
|
37 |
+
CONFIG_ERROR = "CONFIG_ERROR"
|
38 |
+
BUILDING = "BUILDING"
|
39 |
+
BUILD_ERROR = "BUILD_ERROR"
|
40 |
+
RUNNING = "RUNNING"
|
41 |
+
RUNNING_BUILDING = "RUNNING_BUILDING"
|
42 |
+
RUNTIME_ERROR = "RUNTIME_ERROR"
|
43 |
+
DELETING = "DELETING"
|
44 |
+
STOPPED = "STOPPED"
|
45 |
+
PAUSED = "PAUSED"
|
46 |
+
|
47 |
+
|
48 |
+
class SpaceHardware(str, Enum):
|
49 |
+
"""
|
50 |
+
Enumeration of hardwares available to run your Space on the Hub.
|
51 |
+
|
52 |
+
Value can be compared to a string:
|
53 |
+
```py
|
54 |
+
assert SpaceHardware.CPU_BASIC == "cpu-basic"
|
55 |
+
```
|
56 |
+
|
57 |
+
Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L73 (private url).
|
58 |
+
"""
|
59 |
+
|
60 |
+
CPU_BASIC = "cpu-basic"
|
61 |
+
CPU_UPGRADE = "cpu-upgrade"
|
62 |
+
T4_SMALL = "t4-small"
|
63 |
+
T4_MEDIUM = "t4-medium"
|
64 |
+
L4X1 = "l4x1"
|
65 |
+
L4X4 = "l4x4"
|
66 |
+
ZERO_A10G = "zero-a10g"
|
67 |
+
A10G_SMALL = "a10g-small"
|
68 |
+
A10G_LARGE = "a10g-large"
|
69 |
+
A10G_LARGEX2 = "a10g-largex2"
|
70 |
+
A10G_LARGEX4 = "a10g-largex4"
|
71 |
+
A100_LARGE = "a100-large"
|
72 |
+
V5E_1X1 = "v5e-1x1"
|
73 |
+
V5E_2X2 = "v5e-2x2"
|
74 |
+
V5E_2X4 = "v5e-2x4"
|
75 |
+
|
76 |
+
|
77 |
+
class SpaceStorage(str, Enum):
|
78 |
+
"""
|
79 |
+
Enumeration of persistent storage available for your Space on the Hub.
|
80 |
+
|
81 |
+
Value can be compared to a string:
|
82 |
+
```py
|
83 |
+
assert SpaceStorage.SMALL == "small"
|
84 |
+
```
|
85 |
+
|
86 |
+
Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts#L24 (private url).
|
87 |
+
"""
|
88 |
+
|
89 |
+
SMALL = "small"
|
90 |
+
MEDIUM = "medium"
|
91 |
+
LARGE = "large"
|
92 |
+
|
93 |
+
|
94 |
+
@dataclass
|
95 |
+
class SpaceRuntime:
|
96 |
+
"""
|
97 |
+
Contains information about the current runtime of a Space.
|
98 |
+
|
99 |
+
Args:
|
100 |
+
stage (`str`):
|
101 |
+
Current stage of the space. Example: RUNNING.
|
102 |
+
hardware (`str` or `None`):
|
103 |
+
Current hardware of the space. Example: "cpu-basic". Can be `None` if Space
|
104 |
+
is `BUILDING` for the first time.
|
105 |
+
requested_hardware (`str` or `None`):
|
106 |
+
Requested hardware. Can be different than `hardware` especially if the request
|
107 |
+
has just been made. Example: "t4-medium". Can be `None` if no hardware has
|
108 |
+
been requested yet.
|
109 |
+
sleep_time (`int` or `None`):
|
110 |
+
Number of seconds the Space will be kept alive after the last request. By default (if value is `None`), the
|
111 |
+
Space will never go to sleep if it's running on an upgraded hardware, while it will go to sleep after 48
|
112 |
+
hours on a free 'cpu-basic' hardware. For more details, see https://huggingface.co/docs/hub/spaces-gpus#sleep-time.
|
113 |
+
raw (`dict`):
|
114 |
+
Raw response from the server. Contains more information about the Space
|
115 |
+
runtime like number of replicas, number of cpu, memory size,...
|
116 |
+
"""
|
117 |
+
|
118 |
+
stage: SpaceStage
|
119 |
+
hardware: Optional[SpaceHardware]
|
120 |
+
requested_hardware: Optional[SpaceHardware]
|
121 |
+
sleep_time: Optional[int]
|
122 |
+
storage: Optional[SpaceStorage]
|
123 |
+
raw: Dict
|
124 |
+
|
125 |
+
def __init__(self, data: Dict) -> None:
|
126 |
+
self.stage = data["stage"]
|
127 |
+
self.hardware = data.get("hardware", {}).get("current")
|
128 |
+
self.requested_hardware = data.get("hardware", {}).get("requested")
|
129 |
+
self.sleep_time = data.get("gcTimeout")
|
130 |
+
self.storage = data.get("storage")
|
131 |
+
self.raw = data
|
132 |
+
|
133 |
+
|
134 |
+
@dataclass
|
135 |
+
class SpaceVariable:
|
136 |
+
"""
|
137 |
+
Contains information about the current variables of a Space.
|
138 |
+
|
139 |
+
Args:
|
140 |
+
key (`str`):
|
141 |
+
Variable key. Example: `"MODEL_REPO_ID"`
|
142 |
+
value (`str`):
|
143 |
+
Variable value. Example: `"the_model_repo_id"`.
|
144 |
+
description (`str` or None):
|
145 |
+
Description of the variable. Example: `"Model Repo ID of the implemented model"`.
|
146 |
+
updatedAt (`datetime` or None):
|
147 |
+
datetime of the last update of the variable (if the variable has been updated at least once).
|
148 |
+
"""
|
149 |
+
|
150 |
+
key: str
|
151 |
+
value: str
|
152 |
+
description: Optional[str]
|
153 |
+
updated_at: Optional[datetime]
|
154 |
+
|
155 |
+
def __init__(self, key: str, values: Dict) -> None:
|
156 |
+
self.key = key
|
157 |
+
self.value = values["value"]
|
158 |
+
self.description = values.get("description")
|
159 |
+
updated_at = values.get("updatedAt")
|
160 |
+
self.updated_at = parse_datetime(updated_at) if updated_at is not None else None
|
meow/lib/python3.13/site-packages/huggingface_hub/_tensorboard_logger.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2023 The HuggingFace Team. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""Contains a logger to push training logs to the Hub, using Tensorboard."""
|
15 |
+
|
16 |
+
from pathlib import Path
|
17 |
+
from typing import TYPE_CHECKING, List, Optional, Union
|
18 |
+
|
19 |
+
from ._commit_scheduler import CommitScheduler
|
20 |
+
from .errors import EntryNotFoundError
|
21 |
+
from .repocard import ModelCard
|
22 |
+
from .utils import experimental
|
23 |
+
|
24 |
+
|
25 |
+
# Depending on user's setup, SummaryWriter can come either from 'tensorboardX'
|
26 |
+
# or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
|
27 |
+
# from either of them.
|
28 |
+
try:
|
29 |
+
from tensorboardX import SummaryWriter
|
30 |
+
|
31 |
+
is_summary_writer_available = True
|
32 |
+
|
33 |
+
except ImportError:
|
34 |
+
try:
|
35 |
+
from torch.utils.tensorboard import SummaryWriter
|
36 |
+
|
37 |
+
is_summary_writer_available = False
|
38 |
+
except ImportError:
|
39 |
+
# Dummy class to avoid failing at import. Will raise on instance creation.
|
40 |
+
SummaryWriter = object
|
41 |
+
is_summary_writer_available = False
|
42 |
+
|
43 |
+
if TYPE_CHECKING:
|
44 |
+
from tensorboardX import SummaryWriter
|
45 |
+
|
46 |
+
|
47 |
+
class HFSummaryWriter(SummaryWriter):
|
48 |
+
"""
|
49 |
+
Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.
|
50 |
+
|
51 |
+
Data is logged locally and then pushed to the Hub asynchronously. Pushing data to the Hub is done in a separate
|
52 |
+
thread to avoid blocking the training script. In particular, if the upload fails for any reason (e.g. a connection
|
53 |
+
issue), the main script will not be interrupted. Data is automatically pushed to the Hub every `commit_every`
|
54 |
+
minutes (default to every 5 minutes).
|
55 |
+
|
56 |
+
<Tip warning={true}>
|
57 |
+
|
58 |
+
`HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice.
|
59 |
+
|
60 |
+
</Tip>
|
61 |
+
|
62 |
+
Args:
|
63 |
+
repo_id (`str`):
|
64 |
+
The id of the repo to which the logs will be pushed.
|
65 |
+
logdir (`str`, *optional*):
|
66 |
+
The directory where the logs will be written. If not specified, a local directory will be created by the
|
67 |
+
underlying `SummaryWriter` object.
|
68 |
+
commit_every (`int` or `float`, *optional*):
|
69 |
+
The frequency (in minutes) at which the logs will be pushed to the Hub. Defaults to 5 minutes.
|
70 |
+
squash_history (`bool`, *optional*):
|
71 |
+
Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
|
72 |
+
useful to avoid degraded performances on the repo when it grows too large.
|
73 |
+
repo_type (`str`, *optional*):
|
74 |
+
The type of the repo to which the logs will be pushed. Defaults to "model".
|
75 |
+
repo_revision (`str`, *optional*):
|
76 |
+
The revision of the repo to which the logs will be pushed. Defaults to "main".
|
77 |
+
repo_private (`bool`, *optional*):
|
78 |
+
Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
|
79 |
+
path_in_repo (`str`, *optional*):
|
80 |
+
The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/".
|
81 |
+
repo_allow_patterns (`List[str]` or `str`, *optional*):
|
82 |
+
A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the
|
83 |
+
[upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
|
84 |
+
repo_ignore_patterns (`List[str]` or `str`, *optional*):
|
85 |
+
A list of patterns to exclude in the upload. Check out the
|
86 |
+
[upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
|
87 |
+
token (`str`, *optional*):
|
88 |
+
Authentication token. Will default to the stored token. See https://huggingface.co/settings/token for more
|
89 |
+
details
|
90 |
+
kwargs:
|
91 |
+
Additional keyword arguments passed to `SummaryWriter`.
|
92 |
+
|
93 |
+
Examples:
|
94 |
+
```diff
|
95 |
+
# Taken from https://pytorch.org/docs/stable/tensorboard.html
|
96 |
+
- from torch.utils.tensorboard import SummaryWriter
|
97 |
+
+ from huggingface_hub import HFSummaryWriter
|
98 |
+
|
99 |
+
import numpy as np
|
100 |
+
|
101 |
+
- writer = SummaryWriter()
|
102 |
+
+ writer = HFSummaryWriter(repo_id="username/my-trained-model")
|
103 |
+
|
104 |
+
for n_iter in range(100):
|
105 |
+
writer.add_scalar('Loss/train', np.random.random(), n_iter)
|
106 |
+
writer.add_scalar('Loss/test', np.random.random(), n_iter)
|
107 |
+
writer.add_scalar('Accuracy/train', np.random.random(), n_iter)
|
108 |
+
writer.add_scalar('Accuracy/test', np.random.random(), n_iter)
|
109 |
+
```
|
110 |
+
|
111 |
+
```py
|
112 |
+
>>> from huggingface_hub import HFSummaryWriter
|
113 |
+
|
114 |
+
# Logs are automatically pushed every 15 minutes (5 by default) + when exiting the context manager
|
115 |
+
>>> with HFSummaryWriter(repo_id="test_hf_logger", commit_every=15) as logger:
|
116 |
+
... logger.add_scalar("a", 1)
|
117 |
+
... logger.add_scalar("b", 2)
|
118 |
+
```
|
119 |
+
"""
|
120 |
+
|
121 |
+
@experimental
|
122 |
+
def __new__(cls, *args, **kwargs) -> "HFSummaryWriter":
|
123 |
+
if not is_summary_writer_available:
|
124 |
+
raise ImportError(
|
125 |
+
"You must have `tensorboard` installed to use `HFSummaryWriter`. Please run `pip install --upgrade"
|
126 |
+
" tensorboardX` first."
|
127 |
+
)
|
128 |
+
return super().__new__(cls)
|
129 |
+
|
130 |
+
def __init__(
|
131 |
+
self,
|
132 |
+
repo_id: str,
|
133 |
+
*,
|
134 |
+
logdir: Optional[str] = None,
|
135 |
+
commit_every: Union[int, float] = 5,
|
136 |
+
squash_history: bool = False,
|
137 |
+
repo_type: Optional[str] = None,
|
138 |
+
repo_revision: Optional[str] = None,
|
139 |
+
repo_private: Optional[bool] = None,
|
140 |
+
path_in_repo: Optional[str] = "tensorboard",
|
141 |
+
repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*",
|
142 |
+
repo_ignore_patterns: Optional[Union[List[str], str]] = None,
|
143 |
+
token: Optional[str] = None,
|
144 |
+
**kwargs,
|
145 |
+
):
|
146 |
+
# Initialize SummaryWriter
|
147 |
+
super().__init__(logdir=logdir, **kwargs)
|
148 |
+
|
149 |
+
# Check logdir has been correctly initialized and fail early otherwise. In practice, SummaryWriter takes care of it.
|
150 |
+
if not isinstance(self.logdir, str):
|
151 |
+
raise ValueError(f"`self.logdir` must be a string. Got '{self.logdir}' of type {type(self.logdir)}.")
|
152 |
+
|
153 |
+
# Append logdir name to `path_in_repo`
|
154 |
+
if path_in_repo is None or path_in_repo == "":
|
155 |
+
path_in_repo = Path(self.logdir).name
|
156 |
+
else:
|
157 |
+
path_in_repo = path_in_repo.strip("/") + "/" + Path(self.logdir).name
|
158 |
+
|
159 |
+
# Initialize scheduler
|
160 |
+
self.scheduler = CommitScheduler(
|
161 |
+
folder_path=self.logdir,
|
162 |
+
path_in_repo=path_in_repo,
|
163 |
+
repo_id=repo_id,
|
164 |
+
repo_type=repo_type,
|
165 |
+
revision=repo_revision,
|
166 |
+
private=repo_private,
|
167 |
+
token=token,
|
168 |
+
allow_patterns=repo_allow_patterns,
|
169 |
+
ignore_patterns=repo_ignore_patterns,
|
170 |
+
every=commit_every,
|
171 |
+
squash_history=squash_history,
|
172 |
+
)
|
173 |
+
|
174 |
+
# Exposing some high-level info at root level
|
175 |
+
self.repo_id = self.scheduler.repo_id
|
176 |
+
self.repo_type = self.scheduler.repo_type
|
177 |
+
self.repo_revision = self.scheduler.revision
|
178 |
+
|
179 |
+
# Add `hf-summary-writer` tag to the model card metadata
|
180 |
+
try:
|
181 |
+
card = ModelCard.load(repo_id_or_path=self.repo_id, repo_type=self.repo_type)
|
182 |
+
except EntryNotFoundError:
|
183 |
+
card = ModelCard("")
|
184 |
+
tags = card.data.get("tags", [])
|
185 |
+
if "hf-summary-writer" not in tags:
|
186 |
+
tags.append("hf-summary-writer")
|
187 |
+
card.data["tags"] = tags
|
188 |
+
card.push_to_hub(repo_id=self.repo_id, repo_type=self.repo_type)
|
189 |
+
|
190 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
191 |
+
"""Push to hub in a non-blocking way when exiting the logger's context manager."""
|
192 |
+
super().__exit__(exc_type, exc_val, exc_tb)
|
193 |
+
future = self.scheduler.trigger()
|
194 |
+
future.result()
|
meow/lib/python3.13/site-packages/huggingface_hub/_webhooks_payload.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains data structures to parse the webhooks payload."""
|
16 |
+
|
17 |
+
from typing import List, Literal, Optional
|
18 |
+
|
19 |
+
from .utils import is_pydantic_available
|
20 |
+
|
21 |
+
|
22 |
+
if is_pydantic_available():
|
23 |
+
from pydantic import BaseModel
|
24 |
+
else:
|
25 |
+
# Define a dummy BaseModel to avoid import errors when pydantic is not installed
|
26 |
+
# Import error will be raised when trying to use the class
|
27 |
+
|
28 |
+
class BaseModel: # type: ignore [no-redef]
|
29 |
+
def __init__(self, *args, **kwargs) -> None:
|
30 |
+
raise ImportError(
|
31 |
+
"You must have `pydantic` installed to use `WebhookPayload`. This is an optional dependency that"
|
32 |
+
" should be installed separately. Please run `pip install --upgrade pydantic` and retry."
|
33 |
+
)
|
34 |
+
|
35 |
+
|
36 |
+
# This is an adaptation of the ReportV3 interface implemented in moon-landing. V0, V1 and V2 have been ignored as they
|
37 |
+
# are not in used anymore. To keep in sync when format is updated in
|
38 |
+
# https://github.com/huggingface/moon-landing/blob/main/server/lib/HFWebhooks.ts (internal link).
|
39 |
+
|
40 |
+
|
41 |
+
WebhookEvent_T = Literal[
|
42 |
+
"create",
|
43 |
+
"delete",
|
44 |
+
"move",
|
45 |
+
"update",
|
46 |
+
]
|
47 |
+
RepoChangeEvent_T = Literal[
|
48 |
+
"add",
|
49 |
+
"move",
|
50 |
+
"remove",
|
51 |
+
"update",
|
52 |
+
]
|
53 |
+
RepoType_T = Literal[
|
54 |
+
"dataset",
|
55 |
+
"model",
|
56 |
+
"space",
|
57 |
+
]
|
58 |
+
DiscussionStatus_T = Literal[
|
59 |
+
"closed",
|
60 |
+
"draft",
|
61 |
+
"open",
|
62 |
+
"merged",
|
63 |
+
]
|
64 |
+
SupportedWebhookVersion = Literal[3]
|
65 |
+
|
66 |
+
|
67 |
+
class ObjectId(BaseModel):
|
68 |
+
id: str
|
69 |
+
|
70 |
+
|
71 |
+
class WebhookPayloadUrl(BaseModel):
|
72 |
+
web: str
|
73 |
+
api: Optional[str] = None
|
74 |
+
|
75 |
+
|
76 |
+
class WebhookPayloadMovedTo(BaseModel):
|
77 |
+
name: str
|
78 |
+
owner: ObjectId
|
79 |
+
|
80 |
+
|
81 |
+
class WebhookPayloadWebhook(ObjectId):
|
82 |
+
version: SupportedWebhookVersion
|
83 |
+
|
84 |
+
|
85 |
+
class WebhookPayloadEvent(BaseModel):
|
86 |
+
action: WebhookEvent_T
|
87 |
+
scope: str
|
88 |
+
|
89 |
+
|
90 |
+
class WebhookPayloadDiscussionChanges(BaseModel):
|
91 |
+
base: str
|
92 |
+
mergeCommitId: Optional[str] = None
|
93 |
+
|
94 |
+
|
95 |
+
class WebhookPayloadComment(ObjectId):
|
96 |
+
author: ObjectId
|
97 |
+
hidden: bool
|
98 |
+
content: Optional[str] = None
|
99 |
+
url: WebhookPayloadUrl
|
100 |
+
|
101 |
+
|
102 |
+
class WebhookPayloadDiscussion(ObjectId):
|
103 |
+
num: int
|
104 |
+
author: ObjectId
|
105 |
+
url: WebhookPayloadUrl
|
106 |
+
title: str
|
107 |
+
isPullRequest: bool
|
108 |
+
status: DiscussionStatus_T
|
109 |
+
changes: Optional[WebhookPayloadDiscussionChanges] = None
|
110 |
+
pinned: Optional[bool] = None
|
111 |
+
|
112 |
+
|
113 |
+
class WebhookPayloadRepo(ObjectId):
|
114 |
+
owner: ObjectId
|
115 |
+
head_sha: Optional[str] = None
|
116 |
+
name: str
|
117 |
+
private: bool
|
118 |
+
subdomain: Optional[str] = None
|
119 |
+
tags: Optional[List[str]] = None
|
120 |
+
type: Literal["dataset", "model", "space"]
|
121 |
+
url: WebhookPayloadUrl
|
122 |
+
|
123 |
+
|
124 |
+
class WebhookPayloadUpdatedRef(BaseModel):
|
125 |
+
ref: str
|
126 |
+
oldSha: Optional[str] = None
|
127 |
+
newSha: Optional[str] = None
|
128 |
+
|
129 |
+
|
130 |
+
class WebhookPayload(BaseModel):
|
131 |
+
event: WebhookPayloadEvent
|
132 |
+
repo: WebhookPayloadRepo
|
133 |
+
discussion: Optional[WebhookPayloadDiscussion] = None
|
134 |
+
comment: Optional[WebhookPayloadComment] = None
|
135 |
+
webhook: WebhookPayloadWebhook
|
136 |
+
movedTo: Optional[WebhookPayloadMovedTo] = None
|
137 |
+
updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None
|
meow/lib/python3.13/site-packages/huggingface_hub/_webhooks_server.py
ADDED
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains `WebhooksServer` and `webhook_endpoint` to create a webhook server easily."""
|
16 |
+
|
17 |
+
import atexit
|
18 |
+
import inspect
|
19 |
+
import os
|
20 |
+
from functools import wraps
|
21 |
+
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
|
22 |
+
|
23 |
+
from .utils import experimental, is_fastapi_available, is_gradio_available
|
24 |
+
|
25 |
+
|
26 |
+
if TYPE_CHECKING:
|
27 |
+
import gradio as gr
|
28 |
+
from fastapi import Request
|
29 |
+
|
30 |
+
if is_fastapi_available():
|
31 |
+
from fastapi import FastAPI, Request
|
32 |
+
from fastapi.responses import JSONResponse
|
33 |
+
else:
|
34 |
+
# Will fail at runtime if FastAPI is not available
|
35 |
+
FastAPI = Request = JSONResponse = None # type: ignore [misc, assignment]
|
36 |
+
|
37 |
+
|
38 |
+
_global_app: Optional["WebhooksServer"] = None
|
39 |
+
_is_local = os.environ.get("SPACE_ID") is None
|
40 |
+
|
41 |
+
|
42 |
+
@experimental
|
43 |
+
class WebhooksServer:
|
44 |
+
"""
|
45 |
+
The [`WebhooksServer`] class lets you create an instance of a Gradio app that can receive Huggingface webhooks.
|
46 |
+
These webhooks can be registered using the [`~WebhooksServer.add_webhook`] decorator. Webhook endpoints are added to
|
47 |
+
the app as a POST endpoint to the FastAPI router. Once all the webhooks are registered, the `launch` method has to be
|
48 |
+
called to start the app.
|
49 |
+
|
50 |
+
It is recommended to accept [`WebhookPayload`] as the first argument of the webhook function. It is a Pydantic
|
51 |
+
model that contains all the information about the webhook event. The data will be parsed automatically for you.
|
52 |
+
|
53 |
+
Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
|
54 |
+
WebhooksServer and deploy it on a Space.
|
55 |
+
|
56 |
+
<Tip warning={true}>
|
57 |
+
|
58 |
+
`WebhooksServer` is experimental. Its API is subject to change in the future.
|
59 |
+
|
60 |
+
</Tip>
|
61 |
+
|
62 |
+
<Tip warning={true}>
|
63 |
+
|
64 |
+
You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
|
65 |
+
|
66 |
+
</Tip>
|
67 |
+
|
68 |
+
Args:
|
69 |
+
ui (`gradio.Blocks`, optional):
|
70 |
+
A Gradio UI instance to be used as the Space landing page. If `None`, a UI displaying instructions
|
71 |
+
about the configured webhooks is created.
|
72 |
+
webhook_secret (`str`, optional):
|
73 |
+
A secret key to verify incoming webhook requests. You can set this value to any secret you want as long as
|
74 |
+
you also configure it in your [webhooks settings panel](https://huggingface.co/settings/webhooks). You
|
75 |
+
can also set this value as the `WEBHOOK_SECRET` environment variable. If no secret is provided, the
|
76 |
+
webhook endpoints are opened without any security.
|
77 |
+
|
78 |
+
Example:
|
79 |
+
|
80 |
+
```python
|
81 |
+
import gradio as gr
|
82 |
+
from huggingface_hub import WebhooksServer, WebhookPayload
|
83 |
+
|
84 |
+
with gr.Blocks() as ui:
|
85 |
+
...
|
86 |
+
|
87 |
+
app = WebhooksServer(ui=ui, webhook_secret="my_secret_key")
|
88 |
+
|
89 |
+
@app.add_webhook("/say_hello")
|
90 |
+
async def hello(payload: WebhookPayload):
|
91 |
+
return {"message": "hello"}
|
92 |
+
|
93 |
+
app.launch()
|
94 |
+
```
|
95 |
+
"""
|
96 |
+
|
97 |
+
def __new__(cls, *args, **kwargs) -> "WebhooksServer":
|
98 |
+
if not is_gradio_available():
|
99 |
+
raise ImportError(
|
100 |
+
"You must have `gradio` installed to use `WebhooksServer`. Please run `pip install --upgrade gradio`"
|
101 |
+
" first."
|
102 |
+
)
|
103 |
+
if not is_fastapi_available():
|
104 |
+
raise ImportError(
|
105 |
+
"You must have `fastapi` installed to use `WebhooksServer`. Please run `pip install --upgrade fastapi`"
|
106 |
+
" first."
|
107 |
+
)
|
108 |
+
return super().__new__(cls)
|
109 |
+
|
110 |
+
def __init__(
|
111 |
+
self,
|
112 |
+
ui: Optional["gr.Blocks"] = None,
|
113 |
+
webhook_secret: Optional[str] = None,
|
114 |
+
) -> None:
|
115 |
+
self._ui = ui
|
116 |
+
|
117 |
+
self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
|
118 |
+
self.registered_webhooks: Dict[str, Callable] = {}
|
119 |
+
_warn_on_empty_secret(self.webhook_secret)
|
120 |
+
|
121 |
+
def add_webhook(self, path: Optional[str] = None) -> Callable:
|
122 |
+
"""
|
123 |
+
Decorator to add a webhook to the [`WebhooksServer`] server.
|
124 |
+
|
125 |
+
Args:
|
126 |
+
path (`str`, optional):
|
127 |
+
The URL path to register the webhook function. If not provided, the function name will be used as the
|
128 |
+
path. In any case, all webhooks are registered under `/webhooks`.
|
129 |
+
|
130 |
+
Raises:
|
131 |
+
ValueError: If the provided path is already registered as a webhook.
|
132 |
+
|
133 |
+
Example:
|
134 |
+
```python
|
135 |
+
from huggingface_hub import WebhooksServer, WebhookPayload
|
136 |
+
|
137 |
+
app = WebhooksServer()
|
138 |
+
|
139 |
+
@app.add_webhook
|
140 |
+
async def trigger_training(payload: WebhookPayload):
|
141 |
+
if payload.repo.type == "dataset" and payload.event.action == "update":
|
142 |
+
# Trigger a training job if a dataset is updated
|
143 |
+
...
|
144 |
+
|
145 |
+
app.launch()
|
146 |
+
```
|
147 |
+
"""
|
148 |
+
# Usage: directly as decorator. Example: `@app.add_webhook`
|
149 |
+
if callable(path):
|
150 |
+
# If path is a function, it means it was used as a decorator without arguments
|
151 |
+
return self.add_webhook()(path)
|
152 |
+
|
153 |
+
# Usage: provide a path. Example: `@app.add_webhook(...)`
|
154 |
+
@wraps(FastAPI.post)
|
155 |
+
def _inner_post(*args, **kwargs):
|
156 |
+
func = args[0]
|
157 |
+
abs_path = f"/webhooks/{(path or func.__name__).strip('/')}"
|
158 |
+
if abs_path in self.registered_webhooks:
|
159 |
+
raise ValueError(f"Webhook {abs_path} already exists.")
|
160 |
+
self.registered_webhooks[abs_path] = func
|
161 |
+
|
162 |
+
return _inner_post
|
163 |
+
|
164 |
+
def launch(self, prevent_thread_lock: bool = False, **launch_kwargs: Any) -> None:
|
165 |
+
"""Launch the Gradio app and register webhooks to the underlying FastAPI server.
|
166 |
+
|
167 |
+
Input parameters are forwarded to Gradio when launching the app.
|
168 |
+
"""
|
169 |
+
ui = self._ui or self._get_default_ui()
|
170 |
+
|
171 |
+
# Start Gradio App
|
172 |
+
# - as non-blocking so that webhooks can be added afterwards
|
173 |
+
# - as shared if launch locally (to debug webhooks)
|
174 |
+
launch_kwargs.setdefault("share", _is_local)
|
175 |
+
self.fastapi_app, _, _ = ui.launch(prevent_thread_lock=True, **launch_kwargs)
|
176 |
+
|
177 |
+
# Register webhooks to FastAPI app
|
178 |
+
for path, func in self.registered_webhooks.items():
|
179 |
+
# Add secret check if required
|
180 |
+
if self.webhook_secret is not None:
|
181 |
+
func = _wrap_webhook_to_check_secret(func, webhook_secret=self.webhook_secret)
|
182 |
+
|
183 |
+
# Add route to FastAPI app
|
184 |
+
self.fastapi_app.post(path)(func)
|
185 |
+
|
186 |
+
# Print instructions and block main thread
|
187 |
+
space_host = os.environ.get("SPACE_HOST")
|
188 |
+
url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url)
|
189 |
+
url = url.strip("/")
|
190 |
+
message = "\nWebhooks are correctly setup and ready to use:"
|
191 |
+
message += "\n" + "\n".join(f" - POST {url}{webhook}" for webhook in self.registered_webhooks)
|
192 |
+
message += "\nGo to https://huggingface.co/settings/webhooks to setup your webhooks."
|
193 |
+
print(message)
|
194 |
+
|
195 |
+
if not prevent_thread_lock:
|
196 |
+
ui.block_thread()
|
197 |
+
|
198 |
+
def _get_default_ui(self) -> "gr.Blocks":
|
199 |
+
"""Default UI if not provided (lists webhooks and provides basic instructions)."""
|
200 |
+
import gradio as gr
|
201 |
+
|
202 |
+
with gr.Blocks() as ui:
|
203 |
+
gr.Markdown("# This is an app to process 🤗 Webhooks")
|
204 |
+
gr.Markdown(
|
205 |
+
"Webhooks are a foundation for MLOps-related features. They allow you to listen for new changes on"
|
206 |
+
" specific repos or to all repos belonging to particular set of users/organizations (not just your"
|
207 |
+
" repos, but any repo). Check out this [guide](https://huggingface.co/docs/hub/webhooks) to get to"
|
208 |
+
" know more about webhooks on the Huggingface Hub."
|
209 |
+
)
|
210 |
+
gr.Markdown(
|
211 |
+
f"{len(self.registered_webhooks)} webhook(s) are registered:"
|
212 |
+
+ "\n\n"
|
213 |
+
+ "\n ".join(
|
214 |
+
f"- [{webhook_path}]({_get_webhook_doc_url(webhook.__name__, webhook_path)})"
|
215 |
+
for webhook_path, webhook in self.registered_webhooks.items()
|
216 |
+
)
|
217 |
+
)
|
218 |
+
gr.Markdown(
|
219 |
+
"Go to https://huggingface.co/settings/webhooks to setup your webhooks."
|
220 |
+
+ "\nYou app is running locally. Please look at the logs to check the full URL you need to set."
|
221 |
+
if _is_local
|
222 |
+
else (
|
223 |
+
"\nThis app is running on a Space. You can find the corresponding URL in the options menu"
|
224 |
+
" (top-right) > 'Embed the Space'. The URL looks like 'https://{username}-{repo_name}.hf.space'."
|
225 |
+
)
|
226 |
+
)
|
227 |
+
return ui
|
228 |
+
|
229 |
+
|
230 |
+
@experimental
|
231 |
+
def webhook_endpoint(path: Optional[str] = None) -> Callable:
|
232 |
+
"""Decorator to start a [`WebhooksServer`] and register the decorated function as a webhook endpoint.
|
233 |
+
|
234 |
+
This is a helper to get started quickly. If you need more flexibility (custom landing page or webhook secret),
|
235 |
+
you can use [`WebhooksServer`] directly. You can register multiple webhook endpoints (to the same server) by using
|
236 |
+
this decorator multiple times.
|
237 |
+
|
238 |
+
Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
|
239 |
+
server and deploy it on a Space.
|
240 |
+
|
241 |
+
<Tip warning={true}>
|
242 |
+
|
243 |
+
`webhook_endpoint` is experimental. Its API is subject to change in the future.
|
244 |
+
|
245 |
+
</Tip>
|
246 |
+
|
247 |
+
<Tip warning={true}>
|
248 |
+
|
249 |
+
You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
|
250 |
+
|
251 |
+
</Tip>
|
252 |
+
|
253 |
+
Args:
|
254 |
+
path (`str`, optional):
|
255 |
+
The URL path to register the webhook function. If not provided, the function name will be used as the path.
|
256 |
+
In any case, all webhooks are registered under `/webhooks`.
|
257 |
+
|
258 |
+
Examples:
|
259 |
+
The default usage is to register a function as a webhook endpoint. The function name will be used as the path.
|
260 |
+
The server will be started automatically at exit (i.e. at the end of the script).
|
261 |
+
|
262 |
+
```python
|
263 |
+
from huggingface_hub import webhook_endpoint, WebhookPayload
|
264 |
+
|
265 |
+
@webhook_endpoint
|
266 |
+
async def trigger_training(payload: WebhookPayload):
|
267 |
+
if payload.repo.type == "dataset" and payload.event.action == "update":
|
268 |
+
# Trigger a training job if a dataset is updated
|
269 |
+
...
|
270 |
+
|
271 |
+
# Server is automatically started at the end of the script.
|
272 |
+
```
|
273 |
+
|
274 |
+
Advanced usage: register a function as a webhook endpoint and start the server manually. This is useful if you
|
275 |
+
are running it in a notebook.
|
276 |
+
|
277 |
+
```python
|
278 |
+
from huggingface_hub import webhook_endpoint, WebhookPayload
|
279 |
+
|
280 |
+
@webhook_endpoint
|
281 |
+
async def trigger_training(payload: WebhookPayload):
|
282 |
+
if payload.repo.type == "dataset" and payload.event.action == "update":
|
283 |
+
# Trigger a training job if a dataset is updated
|
284 |
+
...
|
285 |
+
|
286 |
+
# Start the server manually
|
287 |
+
trigger_training.launch()
|
288 |
+
```
|
289 |
+
"""
|
290 |
+
if callable(path):
|
291 |
+
# If path is a function, it means it was used as a decorator without arguments
|
292 |
+
return webhook_endpoint()(path)
|
293 |
+
|
294 |
+
@wraps(WebhooksServer.add_webhook)
|
295 |
+
def _inner(func: Callable) -> Callable:
|
296 |
+
app = _get_global_app()
|
297 |
+
app.add_webhook(path)(func)
|
298 |
+
if len(app.registered_webhooks) == 1:
|
299 |
+
# Register `app.launch` to run at exit (only once)
|
300 |
+
atexit.register(app.launch)
|
301 |
+
|
302 |
+
@wraps(app.launch)
|
303 |
+
def _launch_now():
|
304 |
+
# Run the app directly (without waiting atexit)
|
305 |
+
atexit.unregister(app.launch)
|
306 |
+
app.launch()
|
307 |
+
|
308 |
+
func.launch = _launch_now # type: ignore
|
309 |
+
return func
|
310 |
+
|
311 |
+
return _inner
|
312 |
+
|
313 |
+
|
314 |
+
def _get_global_app() -> WebhooksServer:
|
315 |
+
global _global_app
|
316 |
+
if _global_app is None:
|
317 |
+
_global_app = WebhooksServer()
|
318 |
+
return _global_app
|
319 |
+
|
320 |
+
|
321 |
+
def _warn_on_empty_secret(webhook_secret: Optional[str]) -> None:
|
322 |
+
if webhook_secret is None:
|
323 |
+
print("Webhook secret is not defined. This means your webhook endpoints will be open to everyone.")
|
324 |
+
print(
|
325 |
+
"To add a secret, set `WEBHOOK_SECRET` as environment variable or pass it at initialization: "
|
326 |
+
"\n\t`app = WebhooksServer(webhook_secret='my_secret', ...)`"
|
327 |
+
)
|
328 |
+
print(
|
329 |
+
"For more details about webhook secrets, please refer to"
|
330 |
+
" https://huggingface.co/docs/hub/webhooks#webhook-secret."
|
331 |
+
)
|
332 |
+
else:
|
333 |
+
print("Webhook secret is correctly defined.")
|
334 |
+
|
335 |
+
|
336 |
+
def _get_webhook_doc_url(webhook_name: str, webhook_path: str) -> str:
|
337 |
+
"""Returns the anchor to a given webhook in the docs (experimental)"""
|
338 |
+
return "/docs#/default/" + webhook_name + webhook_path.replace("/", "_") + "_post"
|
339 |
+
|
340 |
+
|
341 |
+
def _wrap_webhook_to_check_secret(func: Callable, webhook_secret: str) -> Callable:
|
342 |
+
"""Wraps a webhook function to check the webhook secret before calling the function.
|
343 |
+
|
344 |
+
This is a hacky way to add the `request` parameter to the function signature. Since FastAPI based itself on route
|
345 |
+
parameters to inject the values to the function, we need to hack the function signature to retrieve the `Request`
|
346 |
+
object (and hence the headers). A far cleaner solution would be to use a middleware. However, since
|
347 |
+
`fastapi==0.90.1`, a middleware cannot be added once the app has started. And since the FastAPI app is started by
|
348 |
+
Gradio internals (and not by us), we cannot add a middleware.
|
349 |
+
|
350 |
+
This method is called only when a secret has been defined by the user. If a request is sent without the
|
351 |
+
"x-webhook-secret", the function will return a 401 error (unauthorized). If the header is sent but is incorrect,
|
352 |
+
the function will return a 403 error (forbidden).
|
353 |
+
|
354 |
+
Inspired by https://stackoverflow.com/a/33112180.
|
355 |
+
"""
|
356 |
+
initial_sig = inspect.signature(func)
|
357 |
+
|
358 |
+
@wraps(func)
|
359 |
+
async def _protected_func(request: Request, **kwargs):
|
360 |
+
request_secret = request.headers.get("x-webhook-secret")
|
361 |
+
if request_secret is None:
|
362 |
+
return JSONResponse({"error": "x-webhook-secret header not set."}, status_code=401)
|
363 |
+
if request_secret != webhook_secret:
|
364 |
+
return JSONResponse({"error": "Invalid webhook secret."}, status_code=403)
|
365 |
+
|
366 |
+
# Inject `request` in kwargs if required
|
367 |
+
if "request" in initial_sig.parameters:
|
368 |
+
kwargs["request"] = request
|
369 |
+
|
370 |
+
# Handle both sync and async routes
|
371 |
+
if inspect.iscoroutinefunction(func):
|
372 |
+
return await func(**kwargs)
|
373 |
+
else:
|
374 |
+
return func(**kwargs)
|
375 |
+
|
376 |
+
# Update signature to include request
|
377 |
+
if "request" not in initial_sig.parameters:
|
378 |
+
_protected_func.__signature__ = initial_sig.replace( # type: ignore
|
379 |
+
parameters=(
|
380 |
+
inspect.Parameter(name="request", kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Request),
|
381 |
+
)
|
382 |
+
+ tuple(initial_sig.parameters.values())
|
383 |
+
)
|
384 |
+
|
385 |
+
# Return protected route
|
386 |
+
return _protected_func
|
meow/lib/python3.13/site-packages/huggingface_hub/constants.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import typing
|
4 |
+
from typing import Literal, Optional, Tuple
|
5 |
+
|
6 |
+
|
7 |
+
# Possible values for env variables
|
8 |
+
|
9 |
+
|
10 |
+
ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
|
11 |
+
ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
|
12 |
+
|
13 |
+
|
14 |
+
def _is_true(value: Optional[str]) -> bool:
|
15 |
+
if value is None:
|
16 |
+
return False
|
17 |
+
return value.upper() in ENV_VARS_TRUE_VALUES
|
18 |
+
|
19 |
+
|
20 |
+
def _as_int(value: Optional[str]) -> Optional[int]:
|
21 |
+
if value is None:
|
22 |
+
return None
|
23 |
+
return int(value)
|
24 |
+
|
25 |
+
|
26 |
+
# Constants for file downloads
|
27 |
+
|
28 |
+
PYTORCH_WEIGHTS_NAME = "pytorch_model.bin"
|
29 |
+
TF2_WEIGHTS_NAME = "tf_model.h5"
|
30 |
+
TF_WEIGHTS_NAME = "model.ckpt"
|
31 |
+
FLAX_WEIGHTS_NAME = "flax_model.msgpack"
|
32 |
+
CONFIG_NAME = "config.json"
|
33 |
+
REPOCARD_NAME = "README.md"
|
34 |
+
DEFAULT_ETAG_TIMEOUT = 10
|
35 |
+
DEFAULT_DOWNLOAD_TIMEOUT = 10
|
36 |
+
DEFAULT_REQUEST_TIMEOUT = 10
|
37 |
+
DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
|
38 |
+
HF_TRANSFER_CONCURRENCY = 100
|
39 |
+
|
40 |
+
# Constants for serialization
|
41 |
+
|
42 |
+
PYTORCH_WEIGHTS_FILE_PATTERN = "pytorch_model{suffix}.bin" # Unsafe pickle: use safetensors instead
|
43 |
+
SAFETENSORS_WEIGHTS_FILE_PATTERN = "model{suffix}.safetensors"
|
44 |
+
TF2_WEIGHTS_FILE_PATTERN = "tf_model{suffix}.h5"
|
45 |
+
|
46 |
+
# Constants for safetensors repos
|
47 |
+
|
48 |
+
SAFETENSORS_SINGLE_FILE = "model.safetensors"
|
49 |
+
SAFETENSORS_INDEX_FILE = "model.safetensors.index.json"
|
50 |
+
SAFETENSORS_MAX_HEADER_LENGTH = 25_000_000
|
51 |
+
|
52 |
+
# Timeout of aquiring file lock and logging the attempt
|
53 |
+
FILELOCK_LOG_EVERY_SECONDS = 10
|
54 |
+
|
55 |
+
# Git-related constants
|
56 |
+
|
57 |
+
DEFAULT_REVISION = "main"
|
58 |
+
REGEX_COMMIT_OID = re.compile(r"[A-Fa-f0-9]{5,40}")
|
59 |
+
|
60 |
+
HUGGINGFACE_CO_URL_HOME = "https://huggingface.co/"
|
61 |
+
|
62 |
+
_staging_mode = _is_true(os.environ.get("HUGGINGFACE_CO_STAGING"))
|
63 |
+
|
64 |
+
_HF_DEFAULT_ENDPOINT = "https://huggingface.co"
|
65 |
+
_HF_DEFAULT_STAGING_ENDPOINT = "https://hub-ci.huggingface.co"
|
66 |
+
ENDPOINT = os.getenv("HF_ENDPOINT") or (_HF_DEFAULT_STAGING_ENDPOINT if _staging_mode else _HF_DEFAULT_ENDPOINT)
|
67 |
+
|
68 |
+
HUGGINGFACE_CO_URL_TEMPLATE = ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
|
69 |
+
HUGGINGFACE_HEADER_X_REPO_COMMIT = "X-Repo-Commit"
|
70 |
+
HUGGINGFACE_HEADER_X_LINKED_ETAG = "X-Linked-Etag"
|
71 |
+
HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size"
|
72 |
+
|
73 |
+
INFERENCE_ENDPOINT = os.environ.get("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co")
|
74 |
+
|
75 |
+
# See https://huggingface.co/docs/inference-endpoints/index
|
76 |
+
INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2"
|
77 |
+
|
78 |
+
|
79 |
+
REPO_ID_SEPARATOR = "--"
|
80 |
+
# ^ this substring is not allowed in repo_ids on hf.co
|
81 |
+
# and is the canonical one we use for serialization of repo ids elsewhere.
|
82 |
+
|
83 |
+
|
84 |
+
REPO_TYPE_DATASET = "dataset"
|
85 |
+
REPO_TYPE_SPACE = "space"
|
86 |
+
REPO_TYPE_MODEL = "model"
|
87 |
+
REPO_TYPES = [None, REPO_TYPE_MODEL, REPO_TYPE_DATASET, REPO_TYPE_SPACE]
|
88 |
+
SPACES_SDK_TYPES = ["gradio", "streamlit", "docker", "static"]
|
89 |
+
|
90 |
+
REPO_TYPES_URL_PREFIXES = {
|
91 |
+
REPO_TYPE_DATASET: "datasets/",
|
92 |
+
REPO_TYPE_SPACE: "spaces/",
|
93 |
+
}
|
94 |
+
REPO_TYPES_MAPPING = {
|
95 |
+
"datasets": REPO_TYPE_DATASET,
|
96 |
+
"spaces": REPO_TYPE_SPACE,
|
97 |
+
"models": REPO_TYPE_MODEL,
|
98 |
+
}
|
99 |
+
|
100 |
+
DiscussionTypeFilter = Literal["all", "discussion", "pull_request"]
|
101 |
+
DISCUSSION_TYPES: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter)
|
102 |
+
DiscussionStatusFilter = Literal["all", "open", "closed"]
|
103 |
+
DISCUSSION_STATUS: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter)
|
104 |
+
|
105 |
+
# Webhook subscription types
|
106 |
+
WEBHOOK_DOMAIN_T = Literal["repo", "discussions"]
|
107 |
+
|
108 |
+
# default cache
|
109 |
+
default_home = os.path.join(os.path.expanduser("~"), ".cache")
|
110 |
+
HF_HOME = os.path.expanduser(
|
111 |
+
os.getenv(
|
112 |
+
"HF_HOME",
|
113 |
+
os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "huggingface"),
|
114 |
+
)
|
115 |
+
)
|
116 |
+
hf_cache_home = HF_HOME # for backward compatibility. TODO: remove this in 1.0.0
|
117 |
+
|
118 |
+
default_cache_path = os.path.join(HF_HOME, "hub")
|
119 |
+
default_assets_cache_path = os.path.join(HF_HOME, "assets")
|
120 |
+
|
121 |
+
# Legacy env variables
|
122 |
+
HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", default_cache_path)
|
123 |
+
HUGGINGFACE_ASSETS_CACHE = os.getenv("HUGGINGFACE_ASSETS_CACHE", default_assets_cache_path)
|
124 |
+
|
125 |
+
# New env variables
|
126 |
+
HF_HUB_CACHE = os.getenv("HF_HUB_CACHE", HUGGINGFACE_HUB_CACHE)
|
127 |
+
HF_ASSETS_CACHE = os.getenv("HF_ASSETS_CACHE", HUGGINGFACE_ASSETS_CACHE)
|
128 |
+
|
129 |
+
HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
|
130 |
+
|
131 |
+
# Opt-out from telemetry requests
|
132 |
+
HF_HUB_DISABLE_TELEMETRY = (
|
133 |
+
_is_true(os.environ.get("HF_HUB_DISABLE_TELEMETRY")) # HF-specific env variable
|
134 |
+
or _is_true(os.environ.get("DISABLE_TELEMETRY"))
|
135 |
+
or _is_true(os.environ.get("DO_NOT_TRACK")) # https://consoledonottrack.com/
|
136 |
+
)
|
137 |
+
|
138 |
+
# In the past, token was stored in a hardcoded location
|
139 |
+
# `_OLD_HF_TOKEN_PATH` is deprecated and will be removed "at some point".
|
140 |
+
# See https://github.com/huggingface/huggingface_hub/issues/1232
|
141 |
+
_OLD_HF_TOKEN_PATH = os.path.expanduser("~/.huggingface/token")
|
142 |
+
HF_TOKEN_PATH = os.environ.get("HF_TOKEN_PATH", os.path.join(HF_HOME, "token"))
|
143 |
+
HF_STORED_TOKENS_PATH = os.path.join(os.path.dirname(HF_TOKEN_PATH), "stored_tokens")
|
144 |
+
|
145 |
+
if _staging_mode:
|
146 |
+
# In staging mode, we use a different cache to ensure we don't mix up production and staging data or tokens
|
147 |
+
_staging_home = os.path.join(os.path.expanduser("~"), ".cache", "huggingface_staging")
|
148 |
+
HUGGINGFACE_HUB_CACHE = os.path.join(_staging_home, "hub")
|
149 |
+
_OLD_HF_TOKEN_PATH = os.path.join(_staging_home, "_old_token")
|
150 |
+
HF_TOKEN_PATH = os.path.join(_staging_home, "token")
|
151 |
+
|
152 |
+
# Here, `True` will disable progress bars globally without possibility of enabling it
|
153 |
+
# programmatically. `False` will enable them without possibility of disabling them.
|
154 |
+
# If environment variable is not set (None), then the user is free to enable/disable
|
155 |
+
# them programmatically.
|
156 |
+
# TL;DR: env variable has priority over code
|
157 |
+
__HF_HUB_DISABLE_PROGRESS_BARS = os.environ.get("HF_HUB_DISABLE_PROGRESS_BARS")
|
158 |
+
HF_HUB_DISABLE_PROGRESS_BARS: Optional[bool] = (
|
159 |
+
_is_true(__HF_HUB_DISABLE_PROGRESS_BARS) if __HF_HUB_DISABLE_PROGRESS_BARS is not None else None
|
160 |
+
)
|
161 |
+
|
162 |
+
# Disable warning on machines that do not support symlinks (e.g. Windows non-developer)
|
163 |
+
HF_HUB_DISABLE_SYMLINKS_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISABLE_SYMLINKS_WARNING"))
|
164 |
+
|
165 |
+
# Disable warning when using experimental features
|
166 |
+
HF_HUB_DISABLE_EXPERIMENTAL_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISABLE_EXPERIMENTAL_WARNING"))
|
167 |
+
|
168 |
+
# Disable sending the cached token by default is all HTTP requests to the Hub
|
169 |
+
HF_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN"))
|
170 |
+
|
171 |
+
# Enable fast-download using external dependency "hf_transfer"
|
172 |
+
# See:
|
173 |
+
# - https://pypi.org/project/hf-transfer/
|
174 |
+
# - https://github.com/huggingface/hf_transfer (private)
|
175 |
+
HF_HUB_ENABLE_HF_TRANSFER: bool = _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER"))
|
176 |
+
|
177 |
+
|
178 |
+
# UNUSED
|
179 |
+
# We don't use symlinks in local dir anymore.
|
180 |
+
HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD: int = (
|
181 |
+
_as_int(os.environ.get("HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD")) or 5 * 1024 * 1024
|
182 |
+
)
|
183 |
+
|
184 |
+
# Used to override the etag timeout on a system level
|
185 |
+
HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT
|
186 |
+
|
187 |
+
# Used to override the get request timeout on a system level
|
188 |
+
HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")) or DEFAULT_DOWNLOAD_TIMEOUT
|
189 |
+
|
190 |
+
# List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are
|
191 |
+
# deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by
|
192 |
+
# default. We still keep the full list of supported frameworks in case we want to scan all of them.
|
193 |
+
MAIN_INFERENCE_API_FRAMEWORKS = [
|
194 |
+
"diffusers",
|
195 |
+
"sentence-transformers",
|
196 |
+
"text-generation-inference",
|
197 |
+
"transformers",
|
198 |
+
]
|
199 |
+
|
200 |
+
ALL_INFERENCE_API_FRAMEWORKS = MAIN_INFERENCE_API_FRAMEWORKS + [
|
201 |
+
"adapter-transformers",
|
202 |
+
"allennlp",
|
203 |
+
"asteroid",
|
204 |
+
"bertopic",
|
205 |
+
"doctr",
|
206 |
+
"espnet",
|
207 |
+
"fairseq",
|
208 |
+
"fastai",
|
209 |
+
"fasttext",
|
210 |
+
"flair",
|
211 |
+
"k2",
|
212 |
+
"keras",
|
213 |
+
"mindspore",
|
214 |
+
"nemo",
|
215 |
+
"open_clip",
|
216 |
+
"paddlenlp",
|
217 |
+
"peft",
|
218 |
+
"pyannote-audio",
|
219 |
+
"sklearn",
|
220 |
+
"spacy",
|
221 |
+
"span-marker",
|
222 |
+
"speechbrain",
|
223 |
+
"stanza",
|
224 |
+
"timm",
|
225 |
+
]
|
meow/lib/python3.13/site-packages/huggingface_hub/fastai_utils.py
ADDED
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from pathlib import Path
|
4 |
+
from pickle import DEFAULT_PROTOCOL, PicklingError
|
5 |
+
from typing import Any, Dict, List, Optional, Union
|
6 |
+
|
7 |
+
from packaging import version
|
8 |
+
|
9 |
+
from huggingface_hub import constants, snapshot_download
|
10 |
+
from huggingface_hub.hf_api import HfApi
|
11 |
+
from huggingface_hub.utils import (
|
12 |
+
SoftTemporaryDirectory,
|
13 |
+
get_fastai_version,
|
14 |
+
get_fastcore_version,
|
15 |
+
get_python_version,
|
16 |
+
)
|
17 |
+
|
18 |
+
from .utils import logging, validate_hf_hub_args
|
19 |
+
from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility...
|
20 |
+
|
21 |
+
|
22 |
+
logger = logging.get_logger(__name__)
|
23 |
+
|
24 |
+
|
25 |
+
def _check_fastai_fastcore_versions(
|
26 |
+
fastai_min_version: str = "2.4",
|
27 |
+
fastcore_min_version: str = "1.3.27",
|
28 |
+
):
|
29 |
+
"""
|
30 |
+
Checks that the installed fastai and fastcore versions are compatible for pickle serialization.
|
31 |
+
|
32 |
+
Args:
|
33 |
+
fastai_min_version (`str`, *optional*):
|
34 |
+
The minimum fastai version supported.
|
35 |
+
fastcore_min_version (`str`, *optional*):
|
36 |
+
The minimum fastcore version supported.
|
37 |
+
|
38 |
+
<Tip>
|
39 |
+
Raises the following error:
|
40 |
+
|
41 |
+
- [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
|
42 |
+
if the fastai or fastcore libraries are not available or are of an invalid version.
|
43 |
+
|
44 |
+
</Tip>
|
45 |
+
"""
|
46 |
+
|
47 |
+
if (get_fastcore_version() or get_fastai_version()) == "N/A":
|
48 |
+
raise ImportError(
|
49 |
+
f"fastai>={fastai_min_version} and fastcore>={fastcore_min_version} are"
|
50 |
+
f" required. Currently using fastai=={get_fastai_version()} and"
|
51 |
+
f" fastcore=={get_fastcore_version()}."
|
52 |
+
)
|
53 |
+
|
54 |
+
current_fastai_version = version.Version(get_fastai_version())
|
55 |
+
current_fastcore_version = version.Version(get_fastcore_version())
|
56 |
+
|
57 |
+
if current_fastai_version < version.Version(fastai_min_version):
|
58 |
+
raise ImportError(
|
59 |
+
"`push_to_hub_fastai` and `from_pretrained_fastai` require a"
|
60 |
+
f" fastai>={fastai_min_version} version, but you are using fastai version"
|
61 |
+
f" {get_fastai_version()} which is incompatible. Upgrade with `pip install"
|
62 |
+
" fastai==2.5.6`."
|
63 |
+
)
|
64 |
+
|
65 |
+
if current_fastcore_version < version.Version(fastcore_min_version):
|
66 |
+
raise ImportError(
|
67 |
+
"`push_to_hub_fastai` and `from_pretrained_fastai` require a"
|
68 |
+
f" fastcore>={fastcore_min_version} version, but you are using fastcore"
|
69 |
+
f" version {get_fastcore_version()} which is incompatible. Upgrade with"
|
70 |
+
" `pip install fastcore==1.3.27`."
|
71 |
+
)
|
72 |
+
|
73 |
+
|
74 |
+
def _check_fastai_fastcore_pyproject_versions(
|
75 |
+
storage_folder: str,
|
76 |
+
fastai_min_version: str = "2.4",
|
77 |
+
fastcore_min_version: str = "1.3.27",
|
78 |
+
):
|
79 |
+
"""
|
80 |
+
Checks that the `pyproject.toml` file in the directory `storage_folder` has fastai and fastcore versions
|
81 |
+
that are compatible with `from_pretrained_fastai` and `push_to_hub_fastai`. If `pyproject.toml` does not exist
|
82 |
+
or does not contain versions for fastai and fastcore, then it logs a warning.
|
83 |
+
|
84 |
+
Args:
|
85 |
+
storage_folder (`str`):
|
86 |
+
Folder to look for the `pyproject.toml` file.
|
87 |
+
fastai_min_version (`str`, *optional*):
|
88 |
+
The minimum fastai version supported.
|
89 |
+
fastcore_min_version (`str`, *optional*):
|
90 |
+
The minimum fastcore version supported.
|
91 |
+
|
92 |
+
<Tip>
|
93 |
+
Raises the following errors:
|
94 |
+
|
95 |
+
- [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
|
96 |
+
if the `toml` module is not installed.
|
97 |
+
- [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
|
98 |
+
if the `pyproject.toml` indicates a lower than minimum supported version of fastai or fastcore.
|
99 |
+
|
100 |
+
</Tip>
|
101 |
+
"""
|
102 |
+
|
103 |
+
try:
|
104 |
+
import toml
|
105 |
+
except ModuleNotFoundError:
|
106 |
+
raise ImportError(
|
107 |
+
"`push_to_hub_fastai` and `from_pretrained_fastai` require the toml module."
|
108 |
+
" Install it with `pip install toml`."
|
109 |
+
)
|
110 |
+
|
111 |
+
# Checks that a `pyproject.toml`, with `build-system` and `requires` sections, exists in the repository. If so, get a list of required packages.
|
112 |
+
if not os.path.isfile(f"{storage_folder}/pyproject.toml"):
|
113 |
+
logger.warning(
|
114 |
+
"There is no `pyproject.toml` in the repository that contains the fastai"
|
115 |
+
" `Learner`. The `pyproject.toml` would allow us to verify that your fastai"
|
116 |
+
" and fastcore versions are compatible with those of the model you want to"
|
117 |
+
" load."
|
118 |
+
)
|
119 |
+
return
|
120 |
+
pyproject_toml = toml.load(f"{storage_folder}/pyproject.toml")
|
121 |
+
|
122 |
+
if "build-system" not in pyproject_toml.keys():
|
123 |
+
logger.warning(
|
124 |
+
"There is no `build-system` section in the pyproject.toml of the repository"
|
125 |
+
" that contains the fastai `Learner`. The `build-system` would allow us to"
|
126 |
+
" verify that your fastai and fastcore versions are compatible with those"
|
127 |
+
" of the model you want to load."
|
128 |
+
)
|
129 |
+
return
|
130 |
+
build_system_toml = pyproject_toml["build-system"]
|
131 |
+
|
132 |
+
if "requires" not in build_system_toml.keys():
|
133 |
+
logger.warning(
|
134 |
+
"There is no `requires` section in the pyproject.toml of the repository"
|
135 |
+
" that contains the fastai `Learner`. The `requires` would allow us to"
|
136 |
+
" verify that your fastai and fastcore versions are compatible with those"
|
137 |
+
" of the model you want to load."
|
138 |
+
)
|
139 |
+
return
|
140 |
+
package_versions = build_system_toml["requires"]
|
141 |
+
|
142 |
+
# Extracts contains fastai and fastcore versions from `pyproject.toml` if available.
|
143 |
+
# If the package is specified but not the version (e.g. "fastai" instead of "fastai=2.4"), the default versions are the highest.
|
144 |
+
fastai_packages = [pck for pck in package_versions if pck.startswith("fastai")]
|
145 |
+
if len(fastai_packages) == 0:
|
146 |
+
logger.warning("The repository does not have a fastai version specified in the `pyproject.toml`.")
|
147 |
+
# fastai_version is an empty string if not specified
|
148 |
+
else:
|
149 |
+
fastai_version = str(fastai_packages[0]).partition("=")[2]
|
150 |
+
if fastai_version != "" and version.Version(fastai_version) < version.Version(fastai_min_version):
|
151 |
+
raise ImportError(
|
152 |
+
"`from_pretrained_fastai` requires"
|
153 |
+
f" fastai>={fastai_min_version} version but the model to load uses"
|
154 |
+
f" {fastai_version} which is incompatible."
|
155 |
+
)
|
156 |
+
|
157 |
+
fastcore_packages = [pck for pck in package_versions if pck.startswith("fastcore")]
|
158 |
+
if len(fastcore_packages) == 0:
|
159 |
+
logger.warning("The repository does not have a fastcore version specified in the `pyproject.toml`.")
|
160 |
+
# fastcore_version is an empty string if not specified
|
161 |
+
else:
|
162 |
+
fastcore_version = str(fastcore_packages[0]).partition("=")[2]
|
163 |
+
if fastcore_version != "" and version.Version(fastcore_version) < version.Version(fastcore_min_version):
|
164 |
+
raise ImportError(
|
165 |
+
"`from_pretrained_fastai` requires"
|
166 |
+
f" fastcore>={fastcore_min_version} version, but you are using fastcore"
|
167 |
+
f" version {fastcore_version} which is incompatible."
|
168 |
+
)
|
169 |
+
|
170 |
+
|
171 |
+
README_TEMPLATE = """---
|
172 |
+
tags:
|
173 |
+
- fastai
|
174 |
+
---
|
175 |
+
|
176 |
+
# Amazing!
|
177 |
+
|
178 |
+
🥳 Congratulations on hosting your fastai model on the Hugging Face Hub!
|
179 |
+
|
180 |
+
# Some next steps
|
181 |
+
1. Fill out this model card with more information (see the template below and the [documentation here](https://huggingface.co/docs/hub/model-repos))!
|
182 |
+
|
183 |
+
2. Create a demo in Gradio or Streamlit using 🤗 Spaces ([documentation here](https://huggingface.co/docs/hub/spaces)).
|
184 |
+
|
185 |
+
3. Join the fastai community on the [Fastai Discord](https://discord.com/invite/YKrxeNn)!
|
186 |
+
|
187 |
+
Greetings fellow fastlearner 🤝! Don't forget to delete this content from your model card.
|
188 |
+
|
189 |
+
|
190 |
+
---
|
191 |
+
|
192 |
+
|
193 |
+
# Model card
|
194 |
+
|
195 |
+
## Model description
|
196 |
+
More information needed
|
197 |
+
|
198 |
+
## Intended uses & limitations
|
199 |
+
More information needed
|
200 |
+
|
201 |
+
## Training and evaluation data
|
202 |
+
More information needed
|
203 |
+
"""
|
204 |
+
|
205 |
+
PYPROJECT_TEMPLATE = f"""[build-system]
|
206 |
+
requires = ["setuptools>=40.8.0", "wheel", "python={get_python_version()}", "fastai={get_fastai_version()}", "fastcore={get_fastcore_version()}"]
|
207 |
+
build-backend = "setuptools.build_meta:__legacy__"
|
208 |
+
"""
|
209 |
+
|
210 |
+
|
211 |
+
def _create_model_card(repo_dir: Path):
|
212 |
+
"""
|
213 |
+
Creates a model card for the repository.
|
214 |
+
|
215 |
+
Args:
|
216 |
+
repo_dir (`Path`):
|
217 |
+
Directory where model card is created.
|
218 |
+
"""
|
219 |
+
readme_path = repo_dir / "README.md"
|
220 |
+
|
221 |
+
if not readme_path.exists():
|
222 |
+
with readme_path.open("w", encoding="utf-8") as f:
|
223 |
+
f.write(README_TEMPLATE)
|
224 |
+
|
225 |
+
|
226 |
+
def _create_model_pyproject(repo_dir: Path):
|
227 |
+
"""
|
228 |
+
Creates a `pyproject.toml` for the repository.
|
229 |
+
|
230 |
+
Args:
|
231 |
+
repo_dir (`Path`):
|
232 |
+
Directory where `pyproject.toml` is created.
|
233 |
+
"""
|
234 |
+
pyproject_path = repo_dir / "pyproject.toml"
|
235 |
+
|
236 |
+
if not pyproject_path.exists():
|
237 |
+
with pyproject_path.open("w", encoding="utf-8") as f:
|
238 |
+
f.write(PYPROJECT_TEMPLATE)
|
239 |
+
|
240 |
+
|
241 |
+
def _save_pretrained_fastai(
|
242 |
+
learner,
|
243 |
+
save_directory: Union[str, Path],
|
244 |
+
config: Optional[Dict[str, Any]] = None,
|
245 |
+
):
|
246 |
+
"""
|
247 |
+
Saves a fastai learner to `save_directory` in pickle format using the default pickle protocol for the version of python used.
|
248 |
+
|
249 |
+
Args:
|
250 |
+
learner (`Learner`):
|
251 |
+
The `fastai.Learner` you'd like to save.
|
252 |
+
save_directory (`str` or `Path`):
|
253 |
+
Specific directory in which you want to save the fastai learner.
|
254 |
+
config (`dict`, *optional*):
|
255 |
+
Configuration object. Will be uploaded as a .json file. Example: 'https://huggingface.co/espejelomar/fastai-pet-breeds-classification/blob/main/config.json'.
|
256 |
+
|
257 |
+
<Tip>
|
258 |
+
|
259 |
+
Raises the following error:
|
260 |
+
|
261 |
+
- [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError)
|
262 |
+
if the config file provided is not a dictionary.
|
263 |
+
|
264 |
+
</Tip>
|
265 |
+
"""
|
266 |
+
_check_fastai_fastcore_versions()
|
267 |
+
|
268 |
+
os.makedirs(save_directory, exist_ok=True)
|
269 |
+
|
270 |
+
# if the user provides config then we update it with the fastai and fastcore versions in CONFIG_TEMPLATE.
|
271 |
+
if config is not None:
|
272 |
+
if not isinstance(config, dict):
|
273 |
+
raise RuntimeError(f"Provided config should be a dict. Got: '{type(config)}'")
|
274 |
+
path = os.path.join(save_directory, constants.CONFIG_NAME)
|
275 |
+
with open(path, "w") as f:
|
276 |
+
json.dump(config, f)
|
277 |
+
|
278 |
+
_create_model_card(Path(save_directory))
|
279 |
+
_create_model_pyproject(Path(save_directory))
|
280 |
+
|
281 |
+
# learner.export saves the model in `self.path`.
|
282 |
+
learner.path = Path(save_directory)
|
283 |
+
os.makedirs(save_directory, exist_ok=True)
|
284 |
+
try:
|
285 |
+
learner.export(
|
286 |
+
fname="model.pkl",
|
287 |
+
pickle_protocol=DEFAULT_PROTOCOL,
|
288 |
+
)
|
289 |
+
except PicklingError:
|
290 |
+
raise PicklingError(
|
291 |
+
"You are using a lambda function, i.e., an anonymous function. `pickle`"
|
292 |
+
" cannot pickle function objects and requires that all functions have"
|
293 |
+
" names. One possible solution is to name the function."
|
294 |
+
)
|
295 |
+
|
296 |
+
|
297 |
+
@validate_hf_hub_args
|
298 |
+
def from_pretrained_fastai(
|
299 |
+
repo_id: str,
|
300 |
+
revision: Optional[str] = None,
|
301 |
+
):
|
302 |
+
"""
|
303 |
+
Load pretrained fastai model from the Hub or from a local directory.
|
304 |
+
|
305 |
+
Args:
|
306 |
+
repo_id (`str`):
|
307 |
+
The location where the pickled fastai.Learner is. It can be either of the two:
|
308 |
+
- Hosted on the Hugging Face Hub. E.g.: 'espejelomar/fatai-pet-breeds-classification' or 'distilgpt2'.
|
309 |
+
You can add a `revision` by appending `@` at the end of `repo_id`. E.g.: `dbmdz/bert-base-german-cased@main`.
|
310 |
+
Revision is the specific model version to use. Since we use a git-based system for storing models and other
|
311 |
+
artifacts on the Hugging Face Hub, it can be a branch name, a tag name, or a commit id.
|
312 |
+
- Hosted locally. `repo_id` would be a directory containing the pickle and a pyproject.toml
|
313 |
+
indicating the fastai and fastcore versions used to build the `fastai.Learner`. E.g.: `./my_model_directory/`.
|
314 |
+
revision (`str`, *optional*):
|
315 |
+
Revision at which the repo's files are downloaded. See documentation of `snapshot_download`.
|
316 |
+
|
317 |
+
Returns:
|
318 |
+
The `fastai.Learner` model in the `repo_id` repo.
|
319 |
+
"""
|
320 |
+
_check_fastai_fastcore_versions()
|
321 |
+
|
322 |
+
# Load the `repo_id` repo.
|
323 |
+
# `snapshot_download` returns the folder where the model was stored.
|
324 |
+
# `cache_dir` will be the default '/root/.cache/huggingface/hub'
|
325 |
+
if not os.path.isdir(repo_id):
|
326 |
+
storage_folder = snapshot_download(
|
327 |
+
repo_id=repo_id,
|
328 |
+
revision=revision,
|
329 |
+
library_name="fastai",
|
330 |
+
library_version=get_fastai_version(),
|
331 |
+
)
|
332 |
+
else:
|
333 |
+
storage_folder = repo_id
|
334 |
+
|
335 |
+
_check_fastai_fastcore_pyproject_versions(storage_folder)
|
336 |
+
|
337 |
+
from fastai.learner import load_learner # type: ignore
|
338 |
+
|
339 |
+
return load_learner(os.path.join(storage_folder, "model.pkl"))
|
340 |
+
|
341 |
+
|
342 |
+
@validate_hf_hub_args
|
343 |
+
def push_to_hub_fastai(
|
344 |
+
learner,
|
345 |
+
*,
|
346 |
+
repo_id: str,
|
347 |
+
commit_message: str = "Push FastAI model using huggingface_hub.",
|
348 |
+
private: Optional[bool] = None,
|
349 |
+
token: Optional[str] = None,
|
350 |
+
config: Optional[dict] = None,
|
351 |
+
branch: Optional[str] = None,
|
352 |
+
create_pr: Optional[bool] = None,
|
353 |
+
allow_patterns: Optional[Union[List[str], str]] = None,
|
354 |
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
355 |
+
delete_patterns: Optional[Union[List[str], str]] = None,
|
356 |
+
api_endpoint: Optional[str] = None,
|
357 |
+
):
|
358 |
+
"""
|
359 |
+
Upload learner checkpoint files to the Hub.
|
360 |
+
|
361 |
+
Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
|
362 |
+
`delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
|
363 |
+
details.
|
364 |
+
|
365 |
+
Args:
|
366 |
+
learner (`Learner`):
|
367 |
+
The `fastai.Learner' you'd like to push to the Hub.
|
368 |
+
repo_id (`str`):
|
369 |
+
The repository id for your model in Hub in the format of "namespace/repo_name". The namespace can be your individual account or an organization to which you have write access (for example, 'stanfordnlp/stanza-de').
|
370 |
+
commit_message (`str`, *optional*):
|
371 |
+
Message to commit while pushing. Will default to :obj:`"add model"`.
|
372 |
+
private (`bool`, *optional*):
|
373 |
+
Whether or not the repository created should be private.
|
374 |
+
If `None` (default), will default to been public except if the organization's default is private.
|
375 |
+
token (`str`, *optional*):
|
376 |
+
The Hugging Face account token to use as HTTP bearer authorization for remote files. If :obj:`None`, the token will be asked by a prompt.
|
377 |
+
config (`dict`, *optional*):
|
378 |
+
Configuration object to be saved alongside the model weights.
|
379 |
+
branch (`str`, *optional*):
|
380 |
+
The git branch on which to push the model. This defaults to
|
381 |
+
the default branch as specified in your repository, which
|
382 |
+
defaults to `"main"`.
|
383 |
+
create_pr (`boolean`, *optional*):
|
384 |
+
Whether or not to create a Pull Request from `branch` with that commit.
|
385 |
+
Defaults to `False`.
|
386 |
+
api_endpoint (`str`, *optional*):
|
387 |
+
The API endpoint to use when pushing the model to the hub.
|
388 |
+
allow_patterns (`List[str]` or `str`, *optional*):
|
389 |
+
If provided, only files matching at least one pattern are pushed.
|
390 |
+
ignore_patterns (`List[str]` or `str`, *optional*):
|
391 |
+
If provided, files matching any of the patterns are not pushed.
|
392 |
+
delete_patterns (`List[str]` or `str`, *optional*):
|
393 |
+
If provided, remote files matching any of the patterns will be deleted from the repo.
|
394 |
+
|
395 |
+
Returns:
|
396 |
+
The url of the commit of your model in the given repository.
|
397 |
+
|
398 |
+
<Tip>
|
399 |
+
|
400 |
+
Raises the following error:
|
401 |
+
|
402 |
+
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
403 |
+
if the user is not log on to the Hugging Face Hub.
|
404 |
+
|
405 |
+
</Tip>
|
406 |
+
"""
|
407 |
+
_check_fastai_fastcore_versions()
|
408 |
+
api = HfApi(endpoint=api_endpoint)
|
409 |
+
repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id
|
410 |
+
|
411 |
+
# Push the files to the repo in a single commit
|
412 |
+
with SoftTemporaryDirectory() as tmp:
|
413 |
+
saved_path = Path(tmp) / repo_id
|
414 |
+
_save_pretrained_fastai(learner, saved_path, config=config)
|
415 |
+
return api.upload_folder(
|
416 |
+
repo_id=repo_id,
|
417 |
+
token=token,
|
418 |
+
folder_path=saved_path,
|
419 |
+
commit_message=commit_message,
|
420 |
+
revision=branch,
|
421 |
+
create_pr=create_pr,
|
422 |
+
allow_patterns=allow_patterns,
|
423 |
+
ignore_patterns=ignore_patterns,
|
424 |
+
delete_patterns=delete_patterns,
|
425 |
+
)
|
meow/lib/python3.13/site-packages/huggingface_hub/hf_file_system.py
ADDED
@@ -0,0 +1,1140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import tempfile
|
4 |
+
from collections import deque
|
5 |
+
from dataclasses import dataclass, field
|
6 |
+
from datetime import datetime
|
7 |
+
from itertools import chain
|
8 |
+
from pathlib import Path
|
9 |
+
from typing import Any, Dict, Iterator, List, NoReturn, Optional, Tuple, Union
|
10 |
+
from urllib.parse import quote, unquote
|
11 |
+
|
12 |
+
import fsspec
|
13 |
+
from fsspec.callbacks import _DEFAULT_CALLBACK, NoOpCallback, TqdmCallback
|
14 |
+
from fsspec.utils import isfilelike
|
15 |
+
from requests import Response
|
16 |
+
|
17 |
+
from . import constants
|
18 |
+
from ._commit_api import CommitOperationCopy, CommitOperationDelete
|
19 |
+
from .errors import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
|
20 |
+
from .file_download import hf_hub_url, http_get
|
21 |
+
from .hf_api import HfApi, LastCommitInfo, RepoFile
|
22 |
+
from .utils import HFValidationError, hf_raise_for_status, http_backoff
|
23 |
+
|
24 |
+
|
25 |
+
# Regex used to match special revisions with "/" in them (see #1710)
|
26 |
+
SPECIAL_REFS_REVISION_REGEX = re.compile(
|
27 |
+
r"""
|
28 |
+
(^refs\/convert\/\w+) # `refs/convert/parquet` revisions
|
29 |
+
|
|
30 |
+
(^refs\/pr\/\d+) # PR revisions
|
31 |
+
""",
|
32 |
+
re.VERBOSE,
|
33 |
+
)
|
34 |
+
|
35 |
+
|
36 |
+
@dataclass
|
37 |
+
class HfFileSystemResolvedPath:
|
38 |
+
"""Data structure containing information about a resolved Hugging Face file system path."""
|
39 |
+
|
40 |
+
repo_type: str
|
41 |
+
repo_id: str
|
42 |
+
revision: str
|
43 |
+
path_in_repo: str
|
44 |
+
# The part placed after '@' in the initial path. It can be a quoted or unquoted refs revision.
|
45 |
+
# Used to reconstruct the unresolved path to return to the user.
|
46 |
+
_raw_revision: Optional[str] = field(default=None, repr=False)
|
47 |
+
|
48 |
+
def unresolve(self) -> str:
|
49 |
+
repo_path = constants.REPO_TYPES_URL_PREFIXES.get(self.repo_type, "") + self.repo_id
|
50 |
+
if self._raw_revision:
|
51 |
+
return f"{repo_path}@{self._raw_revision}/{self.path_in_repo}".rstrip("/")
|
52 |
+
elif self.revision != constants.DEFAULT_REVISION:
|
53 |
+
return f"{repo_path}@{safe_revision(self.revision)}/{self.path_in_repo}".rstrip("/")
|
54 |
+
else:
|
55 |
+
return f"{repo_path}/{self.path_in_repo}".rstrip("/")
|
56 |
+
|
57 |
+
|
58 |
+
class HfFileSystem(fsspec.AbstractFileSystem):
|
59 |
+
"""
|
60 |
+
Access a remote Hugging Face Hub repository as if were a local file system.
|
61 |
+
|
62 |
+
<Tip warning={true}>
|
63 |
+
|
64 |
+
[`HfFileSystem`] provides fsspec compatibility, which is useful for libraries that require it (e.g., reading
|
65 |
+
Hugging Face datasets directly with `pandas`). However, it introduces additional overhead due to this compatibility
|
66 |
+
layer. For better performance and reliability, it's recommended to use `HfApi` methods when possible.
|
67 |
+
|
68 |
+
</Tip>
|
69 |
+
|
70 |
+
Args:
|
71 |
+
token (`str` or `bool`, *optional*):
|
72 |
+
A valid user access token (string). Defaults to the locally saved
|
73 |
+
token, which is the recommended method for authentication (see
|
74 |
+
https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
|
75 |
+
To disable authentication, pass `False`.
|
76 |
+
endpoint (`str`, *optional*):
|
77 |
+
Endpoint of the Hub. Defaults to <https://huggingface.co>.
|
78 |
+
Usage:
|
79 |
+
|
80 |
+
```python
|
81 |
+
>>> from huggingface_hub import HfFileSystem
|
82 |
+
|
83 |
+
>>> fs = HfFileSystem()
|
84 |
+
|
85 |
+
>>> # List files
|
86 |
+
>>> fs.glob("my-username/my-model/*.bin")
|
87 |
+
['my-username/my-model/pytorch_model.bin']
|
88 |
+
>>> fs.ls("datasets/my-username/my-dataset", detail=False)
|
89 |
+
['datasets/my-username/my-dataset/.gitattributes', 'datasets/my-username/my-dataset/README.md', 'datasets/my-username/my-dataset/data.json']
|
90 |
+
|
91 |
+
>>> # Read/write files
|
92 |
+
>>> with fs.open("my-username/my-model/pytorch_model.bin") as f:
|
93 |
+
... data = f.read()
|
94 |
+
>>> with fs.open("my-username/my-model/pytorch_model.bin", "wb") as f:
|
95 |
+
... f.write(data)
|
96 |
+
```
|
97 |
+
"""
|
98 |
+
|
99 |
+
root_marker = ""
|
100 |
+
protocol = "hf"
|
101 |
+
|
102 |
+
def __init__(
|
103 |
+
self,
|
104 |
+
*args,
|
105 |
+
endpoint: Optional[str] = None,
|
106 |
+
token: Union[bool, str, None] = None,
|
107 |
+
**storage_options,
|
108 |
+
):
|
109 |
+
super().__init__(*args, **storage_options)
|
110 |
+
self.endpoint = endpoint or constants.ENDPOINT
|
111 |
+
self.token = token
|
112 |
+
self._api = HfApi(endpoint=endpoint, token=token)
|
113 |
+
# Maps (repo_type, repo_id, revision) to a 2-tuple with:
|
114 |
+
# * the 1st element indicating whether the repositoy and the revision exist
|
115 |
+
# * the 2nd element being the exception raised if the repository or revision doesn't exist
|
116 |
+
self._repo_and_revision_exists_cache: Dict[
|
117 |
+
Tuple[str, str, Optional[str]], Tuple[bool, Optional[Exception]]
|
118 |
+
] = {}
|
119 |
+
|
120 |
+
def _repo_and_revision_exist(
|
121 |
+
self, repo_type: str, repo_id: str, revision: Optional[str]
|
122 |
+
) -> Tuple[bool, Optional[Exception]]:
|
123 |
+
if (repo_type, repo_id, revision) not in self._repo_and_revision_exists_cache:
|
124 |
+
try:
|
125 |
+
self._api.repo_info(
|
126 |
+
repo_id, revision=revision, repo_type=repo_type, timeout=constants.HF_HUB_ETAG_TIMEOUT
|
127 |
+
)
|
128 |
+
except (RepositoryNotFoundError, HFValidationError) as e:
|
129 |
+
self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e
|
130 |
+
self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = False, e
|
131 |
+
except RevisionNotFoundError as e:
|
132 |
+
self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e
|
133 |
+
self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None
|
134 |
+
else:
|
135 |
+
self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = True, None
|
136 |
+
self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None
|
137 |
+
return self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)]
|
138 |
+
|
139 |
+
def resolve_path(self, path: str, revision: Optional[str] = None) -> HfFileSystemResolvedPath:
|
140 |
+
"""
|
141 |
+
Resolve a Hugging Face file system path into its components.
|
142 |
+
|
143 |
+
Args:
|
144 |
+
path (`str`):
|
145 |
+
Path to resolve.
|
146 |
+
revision (`str`, *optional*):
|
147 |
+
The revision of the repo to resolve. Defaults to the revision specified in the path.
|
148 |
+
|
149 |
+
Returns:
|
150 |
+
[`HfFileSystemResolvedPath`]: Resolved path information containing `repo_type`, `repo_id`, `revision` and `path_in_repo`.
|
151 |
+
|
152 |
+
Raises:
|
153 |
+
`ValueError`:
|
154 |
+
If path contains conflicting revision information.
|
155 |
+
`NotImplementedError`:
|
156 |
+
If trying to list repositories.
|
157 |
+
"""
|
158 |
+
|
159 |
+
def _align_revision_in_path_with_revision(
|
160 |
+
revision_in_path: Optional[str], revision: Optional[str]
|
161 |
+
) -> Optional[str]:
|
162 |
+
if revision is not None:
|
163 |
+
if revision_in_path is not None and revision_in_path != revision:
|
164 |
+
raise ValueError(
|
165 |
+
f'Revision specified in path ("{revision_in_path}") and in `revision` argument ("{revision}")'
|
166 |
+
" are not the same."
|
167 |
+
)
|
168 |
+
else:
|
169 |
+
revision = revision_in_path
|
170 |
+
return revision
|
171 |
+
|
172 |
+
path = self._strip_protocol(path)
|
173 |
+
if not path:
|
174 |
+
# can't list repositories at root
|
175 |
+
raise NotImplementedError("Access to repositories lists is not implemented.")
|
176 |
+
elif path.split("/")[0] + "/" in constants.REPO_TYPES_URL_PREFIXES.values():
|
177 |
+
if "/" not in path:
|
178 |
+
# can't list repositories at the repository type level
|
179 |
+
raise NotImplementedError("Access to repositories lists is not implemented.")
|
180 |
+
repo_type, path = path.split("/", 1)
|
181 |
+
repo_type = constants.REPO_TYPES_MAPPING[repo_type]
|
182 |
+
else:
|
183 |
+
repo_type = constants.REPO_TYPE_MODEL
|
184 |
+
if path.count("/") > 0:
|
185 |
+
if "@" in path:
|
186 |
+
repo_id, revision_in_path = path.split("@", 1)
|
187 |
+
if "/" in revision_in_path:
|
188 |
+
match = SPECIAL_REFS_REVISION_REGEX.search(revision_in_path)
|
189 |
+
if match is not None and revision in (None, match.group()):
|
190 |
+
# Handle `refs/convert/parquet` and PR revisions separately
|
191 |
+
path_in_repo = SPECIAL_REFS_REVISION_REGEX.sub("", revision_in_path).lstrip("/")
|
192 |
+
revision_in_path = match.group()
|
193 |
+
else:
|
194 |
+
revision_in_path, path_in_repo = revision_in_path.split("/", 1)
|
195 |
+
else:
|
196 |
+
path_in_repo = ""
|
197 |
+
revision = _align_revision_in_path_with_revision(unquote(revision_in_path), revision)
|
198 |
+
repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
|
199 |
+
if not repo_and_revision_exist:
|
200 |
+
_raise_file_not_found(path, err)
|
201 |
+
else:
|
202 |
+
revision_in_path = None
|
203 |
+
repo_id_with_namespace = "/".join(path.split("/")[:2])
|
204 |
+
path_in_repo_with_namespace = "/".join(path.split("/")[2:])
|
205 |
+
repo_id_without_namespace = path.split("/")[0]
|
206 |
+
path_in_repo_without_namespace = "/".join(path.split("/")[1:])
|
207 |
+
repo_id = repo_id_with_namespace
|
208 |
+
path_in_repo = path_in_repo_with_namespace
|
209 |
+
repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
|
210 |
+
if not repo_and_revision_exist:
|
211 |
+
if isinstance(err, (RepositoryNotFoundError, HFValidationError)):
|
212 |
+
repo_id = repo_id_without_namespace
|
213 |
+
path_in_repo = path_in_repo_without_namespace
|
214 |
+
repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision)
|
215 |
+
if not repo_and_revision_exist:
|
216 |
+
_raise_file_not_found(path, err)
|
217 |
+
else:
|
218 |
+
_raise_file_not_found(path, err)
|
219 |
+
else:
|
220 |
+
repo_id = path
|
221 |
+
path_in_repo = ""
|
222 |
+
if "@" in path:
|
223 |
+
repo_id, revision_in_path = path.split("@", 1)
|
224 |
+
revision = _align_revision_in_path_with_revision(unquote(revision_in_path), revision)
|
225 |
+
else:
|
226 |
+
revision_in_path = None
|
227 |
+
repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision)
|
228 |
+
if not repo_and_revision_exist:
|
229 |
+
raise NotImplementedError("Access to repositories lists is not implemented.")
|
230 |
+
|
231 |
+
revision = revision if revision is not None else constants.DEFAULT_REVISION
|
232 |
+
return HfFileSystemResolvedPath(repo_type, repo_id, revision, path_in_repo, _raw_revision=revision_in_path)
|
233 |
+
|
234 |
+
def invalidate_cache(self, path: Optional[str] = None) -> None:
|
235 |
+
"""
|
236 |
+
Clear the cache for a given path.
|
237 |
+
|
238 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.invalidate_cache).
|
239 |
+
|
240 |
+
Args:
|
241 |
+
path (`str`, *optional*):
|
242 |
+
Path to clear from cache. If not provided, clear the entire cache.
|
243 |
+
|
244 |
+
"""
|
245 |
+
if not path:
|
246 |
+
self.dircache.clear()
|
247 |
+
self._repo_and_revision_exists_cache.clear()
|
248 |
+
else:
|
249 |
+
resolved_path = self.resolve_path(path)
|
250 |
+
path = resolved_path.unresolve()
|
251 |
+
while path:
|
252 |
+
self.dircache.pop(path, None)
|
253 |
+
path = self._parent(path)
|
254 |
+
|
255 |
+
# Only clear repo cache if path is to repo root
|
256 |
+
if not resolved_path.path_in_repo:
|
257 |
+
self._repo_and_revision_exists_cache.pop((resolved_path.repo_type, resolved_path.repo_id, None), None)
|
258 |
+
self._repo_and_revision_exists_cache.pop(
|
259 |
+
(resolved_path.repo_type, resolved_path.repo_id, resolved_path.revision), None
|
260 |
+
)
|
261 |
+
|
262 |
+
def _open(
|
263 |
+
self,
|
264 |
+
path: str,
|
265 |
+
mode: str = "rb",
|
266 |
+
revision: Optional[str] = None,
|
267 |
+
block_size: Optional[int] = None,
|
268 |
+
**kwargs,
|
269 |
+
) -> "HfFileSystemFile":
|
270 |
+
if "a" in mode:
|
271 |
+
raise NotImplementedError("Appending to remote files is not yet supported.")
|
272 |
+
if block_size == 0:
|
273 |
+
return HfFileSystemStreamFile(self, path, mode=mode, revision=revision, block_size=block_size, **kwargs)
|
274 |
+
else:
|
275 |
+
return HfFileSystemFile(self, path, mode=mode, revision=revision, block_size=block_size, **kwargs)
|
276 |
+
|
277 |
+
def _rm(self, path: str, revision: Optional[str] = None, **kwargs) -> None:
|
278 |
+
resolved_path = self.resolve_path(path, revision=revision)
|
279 |
+
self._api.delete_file(
|
280 |
+
path_in_repo=resolved_path.path_in_repo,
|
281 |
+
repo_id=resolved_path.repo_id,
|
282 |
+
token=self.token,
|
283 |
+
repo_type=resolved_path.repo_type,
|
284 |
+
revision=resolved_path.revision,
|
285 |
+
commit_message=kwargs.get("commit_message"),
|
286 |
+
commit_description=kwargs.get("commit_description"),
|
287 |
+
)
|
288 |
+
self.invalidate_cache(path=resolved_path.unresolve())
|
289 |
+
|
290 |
+
def rm(
|
291 |
+
self,
|
292 |
+
path: str,
|
293 |
+
recursive: bool = False,
|
294 |
+
maxdepth: Optional[int] = None,
|
295 |
+
revision: Optional[str] = None,
|
296 |
+
**kwargs,
|
297 |
+
) -> None:
|
298 |
+
"""
|
299 |
+
Delete files from a repository.
|
300 |
+
|
301 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.rm).
|
302 |
+
|
303 |
+
<Tip warning={true}>
|
304 |
+
|
305 |
+
Note: When possible, use `HfApi.delete_file()` for better performance.
|
306 |
+
|
307 |
+
</Tip>
|
308 |
+
|
309 |
+
Args:
|
310 |
+
path (`str`):
|
311 |
+
Path to delete.
|
312 |
+
recursive (`bool`, *optional*):
|
313 |
+
If True, delete directory and all its contents. Defaults to False.
|
314 |
+
maxdepth (`int`, *optional*):
|
315 |
+
Maximum number of subdirectories to visit when deleting recursively.
|
316 |
+
revision (`str`, *optional*):
|
317 |
+
The git revision to delete from.
|
318 |
+
|
319 |
+
"""
|
320 |
+
resolved_path = self.resolve_path(path, revision=revision)
|
321 |
+
paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth, revision=revision)
|
322 |
+
paths_in_repo = [self.resolve_path(path).path_in_repo for path in paths if not self.isdir(path)]
|
323 |
+
operations = [CommitOperationDelete(path_in_repo=path_in_repo) for path_in_repo in paths_in_repo]
|
324 |
+
commit_message = f"Delete {path} "
|
325 |
+
commit_message += "recursively " if recursive else ""
|
326 |
+
commit_message += f"up to depth {maxdepth} " if maxdepth is not None else ""
|
327 |
+
# TODO: use `commit_description` to list all the deleted paths?
|
328 |
+
self._api.create_commit(
|
329 |
+
repo_id=resolved_path.repo_id,
|
330 |
+
repo_type=resolved_path.repo_type,
|
331 |
+
token=self.token,
|
332 |
+
operations=operations,
|
333 |
+
revision=resolved_path.revision,
|
334 |
+
commit_message=kwargs.get("commit_message", commit_message),
|
335 |
+
commit_description=kwargs.get("commit_description"),
|
336 |
+
)
|
337 |
+
self.invalidate_cache(path=resolved_path.unresolve())
|
338 |
+
|
339 |
+
def ls(
|
340 |
+
self, path: str, detail: bool = True, refresh: bool = False, revision: Optional[str] = None, **kwargs
|
341 |
+
) -> List[Union[str, Dict[str, Any]]]:
|
342 |
+
"""
|
343 |
+
List the contents of a directory.
|
344 |
+
|
345 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.ls).
|
346 |
+
|
347 |
+
<Tip warning={true}>
|
348 |
+
|
349 |
+
Note: When possible, use `HfApi.list_repo_tree()` for better performance.
|
350 |
+
|
351 |
+
</Tip>
|
352 |
+
|
353 |
+
Args:
|
354 |
+
path (`str`):
|
355 |
+
Path to the directory.
|
356 |
+
detail (`bool`, *optional*):
|
357 |
+
If True, returns a list of dictionaries containing file information. If False,
|
358 |
+
returns a list of file paths. Defaults to True.
|
359 |
+
refresh (`bool`, *optional*):
|
360 |
+
If True, bypass the cache and fetch the latest data. Defaults to False.
|
361 |
+
revision (`str`, *optional*):
|
362 |
+
The git revision to list from.
|
363 |
+
|
364 |
+
Returns:
|
365 |
+
`List[Union[str, Dict[str, Any]]]`: List of file paths (if detail=False) or list of file information
|
366 |
+
dictionaries (if detail=True).
|
367 |
+
"""
|
368 |
+
resolved_path = self.resolve_path(path, revision=revision)
|
369 |
+
path = resolved_path.unresolve()
|
370 |
+
kwargs = {"expand_info": detail, **kwargs}
|
371 |
+
try:
|
372 |
+
out = self._ls_tree(path, refresh=refresh, revision=revision, **kwargs)
|
373 |
+
except EntryNotFoundError:
|
374 |
+
# Path could be a file
|
375 |
+
if not resolved_path.path_in_repo:
|
376 |
+
_raise_file_not_found(path, None)
|
377 |
+
out = self._ls_tree(self._parent(path), refresh=refresh, revision=revision, **kwargs)
|
378 |
+
out = [o for o in out if o["name"] == path]
|
379 |
+
if len(out) == 0:
|
380 |
+
_raise_file_not_found(path, None)
|
381 |
+
return out if detail else [o["name"] for o in out]
|
382 |
+
|
383 |
+
def _ls_tree(
|
384 |
+
self,
|
385 |
+
path: str,
|
386 |
+
recursive: bool = False,
|
387 |
+
refresh: bool = False,
|
388 |
+
revision: Optional[str] = None,
|
389 |
+
expand_info: bool = True,
|
390 |
+
):
|
391 |
+
resolved_path = self.resolve_path(path, revision=revision)
|
392 |
+
path = resolved_path.unresolve()
|
393 |
+
root_path = HfFileSystemResolvedPath(
|
394 |
+
resolved_path.repo_type,
|
395 |
+
resolved_path.repo_id,
|
396 |
+
resolved_path.revision,
|
397 |
+
path_in_repo="",
|
398 |
+
_raw_revision=resolved_path._raw_revision,
|
399 |
+
).unresolve()
|
400 |
+
|
401 |
+
out = []
|
402 |
+
if path in self.dircache and not refresh:
|
403 |
+
cached_path_infos = self.dircache[path]
|
404 |
+
out.extend(cached_path_infos)
|
405 |
+
dirs_not_in_dircache = []
|
406 |
+
if recursive:
|
407 |
+
# Use BFS to traverse the cache and build the "recursive "output
|
408 |
+
# (The Hub uses a so-called "tree first" strategy for the tree endpoint but we sort the output to follow the spec so the result is (eventually) the same)
|
409 |
+
dirs_to_visit = deque(
|
410 |
+
[path_info for path_info in cached_path_infos if path_info["type"] == "directory"]
|
411 |
+
)
|
412 |
+
while dirs_to_visit:
|
413 |
+
dir_info = dirs_to_visit.popleft()
|
414 |
+
if dir_info["name"] not in self.dircache:
|
415 |
+
dirs_not_in_dircache.append(dir_info["name"])
|
416 |
+
else:
|
417 |
+
cached_path_infos = self.dircache[dir_info["name"]]
|
418 |
+
out.extend(cached_path_infos)
|
419 |
+
dirs_to_visit.extend(
|
420 |
+
[path_info for path_info in cached_path_infos if path_info["type"] == "directory"]
|
421 |
+
)
|
422 |
+
|
423 |
+
dirs_not_expanded = []
|
424 |
+
if expand_info:
|
425 |
+
# Check if there are directories with non-expanded entries
|
426 |
+
dirs_not_expanded = [self._parent(o["name"]) for o in out if o["last_commit"] is None]
|
427 |
+
|
428 |
+
if (recursive and dirs_not_in_dircache) or (expand_info and dirs_not_expanded):
|
429 |
+
# If the dircache is incomplete, find the common path of the missing and non-expanded entries
|
430 |
+
# and extend the output with the result of `_ls_tree(common_path, recursive=True)`
|
431 |
+
common_prefix = os.path.commonprefix(dirs_not_in_dircache + dirs_not_expanded)
|
432 |
+
# Get the parent directory if the common prefix itself is not a directory
|
433 |
+
common_path = (
|
434 |
+
common_prefix.rstrip("/")
|
435 |
+
if common_prefix.endswith("/")
|
436 |
+
or common_prefix == root_path
|
437 |
+
or common_prefix in chain(dirs_not_in_dircache, dirs_not_expanded)
|
438 |
+
else self._parent(common_prefix)
|
439 |
+
)
|
440 |
+
out = [o for o in out if not o["name"].startswith(common_path + "/")]
|
441 |
+
for cached_path in self.dircache:
|
442 |
+
if cached_path.startswith(common_path + "/"):
|
443 |
+
self.dircache.pop(cached_path, None)
|
444 |
+
self.dircache.pop(common_path, None)
|
445 |
+
out.extend(
|
446 |
+
self._ls_tree(
|
447 |
+
common_path,
|
448 |
+
recursive=recursive,
|
449 |
+
refresh=True,
|
450 |
+
revision=revision,
|
451 |
+
expand_info=expand_info,
|
452 |
+
)
|
453 |
+
)
|
454 |
+
else:
|
455 |
+
tree = self._api.list_repo_tree(
|
456 |
+
resolved_path.repo_id,
|
457 |
+
resolved_path.path_in_repo,
|
458 |
+
recursive=recursive,
|
459 |
+
expand=expand_info,
|
460 |
+
revision=resolved_path.revision,
|
461 |
+
repo_type=resolved_path.repo_type,
|
462 |
+
)
|
463 |
+
for path_info in tree:
|
464 |
+
if isinstance(path_info, RepoFile):
|
465 |
+
cache_path_info = {
|
466 |
+
"name": root_path + "/" + path_info.path,
|
467 |
+
"size": path_info.size,
|
468 |
+
"type": "file",
|
469 |
+
"blob_id": path_info.blob_id,
|
470 |
+
"lfs": path_info.lfs,
|
471 |
+
"last_commit": path_info.last_commit,
|
472 |
+
"security": path_info.security,
|
473 |
+
}
|
474 |
+
else:
|
475 |
+
cache_path_info = {
|
476 |
+
"name": root_path + "/" + path_info.path,
|
477 |
+
"size": 0,
|
478 |
+
"type": "directory",
|
479 |
+
"tree_id": path_info.tree_id,
|
480 |
+
"last_commit": path_info.last_commit,
|
481 |
+
}
|
482 |
+
parent_path = self._parent(cache_path_info["name"])
|
483 |
+
self.dircache.setdefault(parent_path, []).append(cache_path_info)
|
484 |
+
out.append(cache_path_info)
|
485 |
+
return out
|
486 |
+
|
487 |
+
def walk(self, path: str, *args, **kwargs) -> Iterator[Tuple[str, List[str], List[str]]]:
|
488 |
+
"""
|
489 |
+
Return all files below the given path.
|
490 |
+
|
491 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.walk).
|
492 |
+
|
493 |
+
Args:
|
494 |
+
path (`str`):
|
495 |
+
Root path to list files from.
|
496 |
+
|
497 |
+
Returns:
|
498 |
+
`Iterator[Tuple[str, List[str], List[str]]]`: An iterator of (path, list of directory names, list of file names) tuples.
|
499 |
+
"""
|
500 |
+
# Set expand_info=False by default to get a x10 speed boost
|
501 |
+
kwargs = {"expand_info": kwargs.get("detail", False), **kwargs}
|
502 |
+
path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve()
|
503 |
+
yield from super().walk(path, *args, **kwargs)
|
504 |
+
|
505 |
+
def glob(self, path: str, **kwargs) -> List[str]:
|
506 |
+
"""
|
507 |
+
Find files by glob-matching.
|
508 |
+
|
509 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob).
|
510 |
+
|
511 |
+
Args:
|
512 |
+
path (`str`):
|
513 |
+
Path pattern to match.
|
514 |
+
|
515 |
+
Returns:
|
516 |
+
`List[str]`: List of paths matching the pattern.
|
517 |
+
"""
|
518 |
+
# Set expand_info=False by default to get a x10 speed boost
|
519 |
+
kwargs = {"expand_info": kwargs.get("detail", False), **kwargs}
|
520 |
+
path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve()
|
521 |
+
return super().glob(path, **kwargs)
|
522 |
+
|
523 |
+
def find(
|
524 |
+
self,
|
525 |
+
path: str,
|
526 |
+
maxdepth: Optional[int] = None,
|
527 |
+
withdirs: bool = False,
|
528 |
+
detail: bool = False,
|
529 |
+
refresh: bool = False,
|
530 |
+
revision: Optional[str] = None,
|
531 |
+
**kwargs,
|
532 |
+
) -> Union[List[str], Dict[str, Dict[str, Any]]]:
|
533 |
+
"""
|
534 |
+
List all files below path.
|
535 |
+
|
536 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.find).
|
537 |
+
|
538 |
+
Args:
|
539 |
+
path (`str`):
|
540 |
+
Root path to list files from.
|
541 |
+
maxdepth (`int`, *optional*):
|
542 |
+
Maximum depth to descend into subdirectories.
|
543 |
+
withdirs (`bool`, *optional*):
|
544 |
+
Include directory paths in the output. Defaults to False.
|
545 |
+
detail (`bool`, *optional*):
|
546 |
+
If True, returns a dict mapping paths to file information. Defaults to False.
|
547 |
+
refresh (`bool`, *optional*):
|
548 |
+
If True, bypass the cache and fetch the latest data. Defaults to False.
|
549 |
+
revision (`str`, *optional*):
|
550 |
+
The git revision to list from.
|
551 |
+
|
552 |
+
Returns:
|
553 |
+
`Union[List[str], Dict[str, Dict[str, Any]]]`: List of paths or dict of file information.
|
554 |
+
"""
|
555 |
+
if maxdepth:
|
556 |
+
return super().find(
|
557 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, refresh=refresh, revision=revision, **kwargs
|
558 |
+
)
|
559 |
+
resolved_path = self.resolve_path(path, revision=revision)
|
560 |
+
path = resolved_path.unresolve()
|
561 |
+
kwargs = {"expand_info": detail, **kwargs}
|
562 |
+
try:
|
563 |
+
out = self._ls_tree(path, recursive=True, refresh=refresh, revision=resolved_path.revision, **kwargs)
|
564 |
+
except EntryNotFoundError:
|
565 |
+
# Path could be a file
|
566 |
+
if self.info(path, revision=revision, **kwargs)["type"] == "file":
|
567 |
+
out = {path: {}}
|
568 |
+
else:
|
569 |
+
out = {}
|
570 |
+
else:
|
571 |
+
if not withdirs:
|
572 |
+
out = [o for o in out if o["type"] != "directory"]
|
573 |
+
else:
|
574 |
+
# If `withdirs=True`, include the directory itself to be consistent with the spec
|
575 |
+
path_info = self.info(path, revision=resolved_path.revision, **kwargs)
|
576 |
+
out = [path_info] + out if path_info["type"] == "directory" else out
|
577 |
+
out = {o["name"]: o for o in out}
|
578 |
+
names = sorted(out)
|
579 |
+
if not detail:
|
580 |
+
return names
|
581 |
+
else:
|
582 |
+
return {name: out[name] for name in names}
|
583 |
+
|
584 |
+
def cp_file(self, path1: str, path2: str, revision: Optional[str] = None, **kwargs) -> None:
|
585 |
+
"""
|
586 |
+
Copy a file within or between repositories.
|
587 |
+
|
588 |
+
<Tip warning={true}>
|
589 |
+
|
590 |
+
Note: When possible, use `HfApi.upload_file()` for better performance.
|
591 |
+
|
592 |
+
</Tip>
|
593 |
+
|
594 |
+
Args:
|
595 |
+
path1 (`str`):
|
596 |
+
Source path to copy from.
|
597 |
+
path2 (`str`):
|
598 |
+
Destination path to copy to.
|
599 |
+
revision (`str`, *optional*):
|
600 |
+
The git revision to copy from.
|
601 |
+
|
602 |
+
"""
|
603 |
+
resolved_path1 = self.resolve_path(path1, revision=revision)
|
604 |
+
resolved_path2 = self.resolve_path(path2, revision=revision)
|
605 |
+
|
606 |
+
same_repo = (
|
607 |
+
resolved_path1.repo_type == resolved_path2.repo_type and resolved_path1.repo_id == resolved_path2.repo_id
|
608 |
+
)
|
609 |
+
|
610 |
+
if same_repo:
|
611 |
+
commit_message = f"Copy {path1} to {path2}"
|
612 |
+
self._api.create_commit(
|
613 |
+
repo_id=resolved_path1.repo_id,
|
614 |
+
repo_type=resolved_path1.repo_type,
|
615 |
+
revision=resolved_path2.revision,
|
616 |
+
commit_message=kwargs.get("commit_message", commit_message),
|
617 |
+
commit_description=kwargs.get("commit_description", ""),
|
618 |
+
operations=[
|
619 |
+
CommitOperationCopy(
|
620 |
+
src_path_in_repo=resolved_path1.path_in_repo,
|
621 |
+
path_in_repo=resolved_path2.path_in_repo,
|
622 |
+
src_revision=resolved_path1.revision,
|
623 |
+
)
|
624 |
+
],
|
625 |
+
)
|
626 |
+
else:
|
627 |
+
with self.open(path1, "rb", revision=resolved_path1.revision) as f:
|
628 |
+
content = f.read()
|
629 |
+
commit_message = f"Copy {path1} to {path2}"
|
630 |
+
self._api.upload_file(
|
631 |
+
path_or_fileobj=content,
|
632 |
+
path_in_repo=resolved_path2.path_in_repo,
|
633 |
+
repo_id=resolved_path2.repo_id,
|
634 |
+
token=self.token,
|
635 |
+
repo_type=resolved_path2.repo_type,
|
636 |
+
revision=resolved_path2.revision,
|
637 |
+
commit_message=kwargs.get("commit_message", commit_message),
|
638 |
+
commit_description=kwargs.get("commit_description"),
|
639 |
+
)
|
640 |
+
self.invalidate_cache(path=resolved_path1.unresolve())
|
641 |
+
self.invalidate_cache(path=resolved_path2.unresolve())
|
642 |
+
|
643 |
+
def modified(self, path: str, **kwargs) -> datetime:
|
644 |
+
"""
|
645 |
+
Get the last modified time of a file.
|
646 |
+
|
647 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.modified).
|
648 |
+
|
649 |
+
Args:
|
650 |
+
path (`str`):
|
651 |
+
Path to the file.
|
652 |
+
|
653 |
+
Returns:
|
654 |
+
`datetime`: Last commit date of the file.
|
655 |
+
"""
|
656 |
+
info = self.info(path, **kwargs)
|
657 |
+
return info["last_commit"]["date"]
|
658 |
+
|
659 |
+
def info(self, path: str, refresh: bool = False, revision: Optional[str] = None, **kwargs) -> Dict[str, Any]:
|
660 |
+
"""
|
661 |
+
Get information about a file or directory.
|
662 |
+
|
663 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.info).
|
664 |
+
|
665 |
+
<Tip warning={true}>
|
666 |
+
|
667 |
+
Note: When possible, use `HfApi.get_paths_info()` or `HfApi.repo_info()` for better performance.
|
668 |
+
|
669 |
+
</Tip>
|
670 |
+
|
671 |
+
Args:
|
672 |
+
path (`str`):
|
673 |
+
Path to get info for.
|
674 |
+
refresh (`bool`, *optional*):
|
675 |
+
If True, bypass the cache and fetch the latest data. Defaults to False.
|
676 |
+
revision (`str`, *optional*):
|
677 |
+
The git revision to get info from.
|
678 |
+
|
679 |
+
Returns:
|
680 |
+
`Dict[str, Any]`: Dictionary containing file information (type, size, commit info, etc.).
|
681 |
+
|
682 |
+
"""
|
683 |
+
resolved_path = self.resolve_path(path, revision=revision)
|
684 |
+
path = resolved_path.unresolve()
|
685 |
+
expand_info = kwargs.get(
|
686 |
+
"expand_info", True
|
687 |
+
) # don't expose it as a parameter in the public API to follow the spec
|
688 |
+
if not resolved_path.path_in_repo:
|
689 |
+
# Path is the root directory
|
690 |
+
out = {
|
691 |
+
"name": path,
|
692 |
+
"size": 0,
|
693 |
+
"type": "directory",
|
694 |
+
}
|
695 |
+
if expand_info:
|
696 |
+
last_commit = self._api.list_repo_commits(
|
697 |
+
resolved_path.repo_id, repo_type=resolved_path.repo_type, revision=resolved_path.revision
|
698 |
+
)[-1]
|
699 |
+
out = {
|
700 |
+
**out,
|
701 |
+
"tree_id": None, # TODO: tree_id of the root directory?
|
702 |
+
"last_commit": LastCommitInfo(
|
703 |
+
oid=last_commit.commit_id, title=last_commit.title, date=last_commit.created_at
|
704 |
+
),
|
705 |
+
}
|
706 |
+
else:
|
707 |
+
out = None
|
708 |
+
parent_path = self._parent(path)
|
709 |
+
if not expand_info and parent_path not in self.dircache:
|
710 |
+
# Fill the cache with cheap call
|
711 |
+
self.ls(parent_path, expand_info=False)
|
712 |
+
if parent_path in self.dircache:
|
713 |
+
# Check if the path is in the cache
|
714 |
+
out1 = [o for o in self.dircache[parent_path] if o["name"] == path]
|
715 |
+
if not out1:
|
716 |
+
_raise_file_not_found(path, None)
|
717 |
+
out = out1[0]
|
718 |
+
if refresh or out is None or (expand_info and out and out["last_commit"] is None):
|
719 |
+
paths_info = self._api.get_paths_info(
|
720 |
+
resolved_path.repo_id,
|
721 |
+
resolved_path.path_in_repo,
|
722 |
+
expand=expand_info,
|
723 |
+
revision=resolved_path.revision,
|
724 |
+
repo_type=resolved_path.repo_type,
|
725 |
+
)
|
726 |
+
if not paths_info:
|
727 |
+
_raise_file_not_found(path, None)
|
728 |
+
path_info = paths_info[0]
|
729 |
+
root_path = HfFileSystemResolvedPath(
|
730 |
+
resolved_path.repo_type,
|
731 |
+
resolved_path.repo_id,
|
732 |
+
resolved_path.revision,
|
733 |
+
path_in_repo="",
|
734 |
+
_raw_revision=resolved_path._raw_revision,
|
735 |
+
).unresolve()
|
736 |
+
if isinstance(path_info, RepoFile):
|
737 |
+
out = {
|
738 |
+
"name": root_path + "/" + path_info.path,
|
739 |
+
"size": path_info.size,
|
740 |
+
"type": "file",
|
741 |
+
"blob_id": path_info.blob_id,
|
742 |
+
"lfs": path_info.lfs,
|
743 |
+
"last_commit": path_info.last_commit,
|
744 |
+
"security": path_info.security,
|
745 |
+
}
|
746 |
+
else:
|
747 |
+
out = {
|
748 |
+
"name": root_path + "/" + path_info.path,
|
749 |
+
"size": 0,
|
750 |
+
"type": "directory",
|
751 |
+
"tree_id": path_info.tree_id,
|
752 |
+
"last_commit": path_info.last_commit,
|
753 |
+
}
|
754 |
+
if not expand_info:
|
755 |
+
out = {k: out[k] for k in ["name", "size", "type"]}
|
756 |
+
assert out is not None
|
757 |
+
return out
|
758 |
+
|
759 |
+
def exists(self, path, **kwargs):
|
760 |
+
"""
|
761 |
+
Check if a file exists.
|
762 |
+
|
763 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.exists).
|
764 |
+
|
765 |
+
<Tip warning={true}>
|
766 |
+
|
767 |
+
Note: When possible, use `HfApi.file_exists()` for better performance.
|
768 |
+
|
769 |
+
</Tip>
|
770 |
+
|
771 |
+
Args:
|
772 |
+
path (`str`):
|
773 |
+
Path to check.
|
774 |
+
|
775 |
+
Returns:
|
776 |
+
`bool`: True if file exists, False otherwise.
|
777 |
+
"""
|
778 |
+
try:
|
779 |
+
if kwargs.get("refresh", False):
|
780 |
+
self.invalidate_cache(path)
|
781 |
+
|
782 |
+
self.info(path, **{**kwargs, "expand_info": False})
|
783 |
+
return True
|
784 |
+
except: # noqa: E722
|
785 |
+
return False
|
786 |
+
|
787 |
+
def isdir(self, path):
|
788 |
+
"""
|
789 |
+
Check if a path is a directory.
|
790 |
+
|
791 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.isdir).
|
792 |
+
|
793 |
+
Args:
|
794 |
+
path (`str`):
|
795 |
+
Path to check.
|
796 |
+
|
797 |
+
Returns:
|
798 |
+
`bool`: True if path is a directory, False otherwise.
|
799 |
+
"""
|
800 |
+
try:
|
801 |
+
return self.info(path, expand_info=False)["type"] == "directory"
|
802 |
+
except OSError:
|
803 |
+
return False
|
804 |
+
|
805 |
+
def isfile(self, path):
|
806 |
+
"""
|
807 |
+
Check if a path is a file.
|
808 |
+
|
809 |
+
For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.isfile).
|
810 |
+
|
811 |
+
Args:
|
812 |
+
path (`str`):
|
813 |
+
Path to check.
|
814 |
+
|
815 |
+
Returns:
|
816 |
+
`bool`: True if path is a file, False otherwise.
|
817 |
+
"""
|
818 |
+
try:
|
819 |
+
return self.info(path, expand_info=False)["type"] == "file"
|
820 |
+
except: # noqa: E722
|
821 |
+
return False
|
822 |
+
|
823 |
+
def url(self, path: str) -> str:
|
824 |
+
"""
|
825 |
+
Get the HTTP URL of the given path.
|
826 |
+
|
827 |
+
Args:
|
828 |
+
path (`str`):
|
829 |
+
Path to get URL for.
|
830 |
+
|
831 |
+
Returns:
|
832 |
+
`str`: HTTP URL to access the file or directory on the Hub.
|
833 |
+
"""
|
834 |
+
resolved_path = self.resolve_path(path)
|
835 |
+
url = hf_hub_url(
|
836 |
+
resolved_path.repo_id,
|
837 |
+
resolved_path.path_in_repo,
|
838 |
+
repo_type=resolved_path.repo_type,
|
839 |
+
revision=resolved_path.revision,
|
840 |
+
endpoint=self.endpoint,
|
841 |
+
)
|
842 |
+
if self.isdir(path):
|
843 |
+
url = url.replace("/resolve/", "/tree/", 1)
|
844 |
+
return url
|
845 |
+
|
846 |
+
def get_file(self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs) -> None:
|
847 |
+
"""
|
848 |
+
Copy single remote file to local.
|
849 |
+
|
850 |
+
<Tip warning={true}>
|
851 |
+
|
852 |
+
Note: When possible, use `HfApi.hf_hub_download()` for better performance.
|
853 |
+
|
854 |
+
</Tip>
|
855 |
+
|
856 |
+
Args:
|
857 |
+
rpath (`str`):
|
858 |
+
Remote path to download from.
|
859 |
+
lpath (`str`):
|
860 |
+
Local path to download to.
|
861 |
+
callback (`Callback`, *optional*):
|
862 |
+
Optional callback to track download progress. Defaults to no callback.
|
863 |
+
outfile (`IO`, *optional*):
|
864 |
+
Optional file-like object to write to. If provided, `lpath` is ignored.
|
865 |
+
|
866 |
+
"""
|
867 |
+
revision = kwargs.get("revision")
|
868 |
+
unhandled_kwargs = set(kwargs.keys()) - {"revision"}
|
869 |
+
if not isinstance(callback, (NoOpCallback, TqdmCallback)) or len(unhandled_kwargs) > 0:
|
870 |
+
# for now, let's not handle custom callbacks
|
871 |
+
# and let's not handle custom kwargs
|
872 |
+
return super().get_file(rpath, lpath, callback=callback, outfile=outfile, **kwargs)
|
873 |
+
|
874 |
+
# Taken from https://github.com/fsspec/filesystem_spec/blob/47b445ae4c284a82dd15e0287b1ffc410e8fc470/fsspec/spec.py#L883
|
875 |
+
if isfilelike(lpath):
|
876 |
+
outfile = lpath
|
877 |
+
elif self.isdir(rpath):
|
878 |
+
os.makedirs(lpath, exist_ok=True)
|
879 |
+
return None
|
880 |
+
|
881 |
+
if isinstance(lpath, (str, Path)): # otherwise, let's assume it's a file-like object
|
882 |
+
os.makedirs(os.path.dirname(lpath), exist_ok=True)
|
883 |
+
|
884 |
+
# Open file if not already open
|
885 |
+
close_file = False
|
886 |
+
if outfile is None:
|
887 |
+
outfile = open(lpath, "wb")
|
888 |
+
close_file = True
|
889 |
+
initial_pos = outfile.tell()
|
890 |
+
|
891 |
+
# Custom implementation of `get_file` to use `http_get`.
|
892 |
+
resolve_remote_path = self.resolve_path(rpath, revision=revision)
|
893 |
+
expected_size = self.info(rpath, revision=revision)["size"]
|
894 |
+
callback.set_size(expected_size)
|
895 |
+
try:
|
896 |
+
http_get(
|
897 |
+
url=hf_hub_url(
|
898 |
+
repo_id=resolve_remote_path.repo_id,
|
899 |
+
revision=resolve_remote_path.revision,
|
900 |
+
filename=resolve_remote_path.path_in_repo,
|
901 |
+
repo_type=resolve_remote_path.repo_type,
|
902 |
+
endpoint=self.endpoint,
|
903 |
+
),
|
904 |
+
temp_file=outfile,
|
905 |
+
displayed_filename=rpath,
|
906 |
+
expected_size=expected_size,
|
907 |
+
resume_size=0,
|
908 |
+
headers=self._api._build_hf_headers(),
|
909 |
+
_tqdm_bar=callback.tqdm if isinstance(callback, TqdmCallback) else None,
|
910 |
+
)
|
911 |
+
outfile.seek(initial_pos)
|
912 |
+
finally:
|
913 |
+
# Close file only if we opened it ourselves
|
914 |
+
if close_file:
|
915 |
+
outfile.close()
|
916 |
+
|
917 |
+
@property
|
918 |
+
def transaction(self):
|
919 |
+
"""A context within which files are committed together upon exit
|
920 |
+
|
921 |
+
Requires the file class to implement `.commit()` and `.discard()`
|
922 |
+
for the normal and exception cases.
|
923 |
+
"""
|
924 |
+
# Taken from https://github.com/fsspec/filesystem_spec/blob/3fbb6fee33b46cccb015607630843dea049d3243/fsspec/spec.py#L231
|
925 |
+
# See https://github.com/huggingface/huggingface_hub/issues/1733
|
926 |
+
raise NotImplementedError("Transactional commits are not supported.")
|
927 |
+
|
928 |
+
def start_transaction(self):
|
929 |
+
"""Begin write transaction for deferring files, non-context version"""
|
930 |
+
# Taken from https://github.com/fsspec/filesystem_spec/blob/3fbb6fee33b46cccb015607630843dea049d3243/fsspec/spec.py#L241
|
931 |
+
# See https://github.com/huggingface/huggingface_hub/issues/1733
|
932 |
+
raise NotImplementedError("Transactional commits are not supported.")
|
933 |
+
|
934 |
+
|
935 |
+
class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
|
936 |
+
def __init__(self, fs: HfFileSystem, path: str, revision: Optional[str] = None, **kwargs):
|
937 |
+
try:
|
938 |
+
self.resolved_path = fs.resolve_path(path, revision=revision)
|
939 |
+
except FileNotFoundError as e:
|
940 |
+
if "w" in kwargs.get("mode", ""):
|
941 |
+
raise FileNotFoundError(
|
942 |
+
f"{e}.\nMake sure the repository and revision exist before writing data."
|
943 |
+
) from e
|
944 |
+
raise
|
945 |
+
# avoid an unnecessary .info() call with expensive expand_info=True to instantiate .details
|
946 |
+
if kwargs.get("mode", "rb") == "rb":
|
947 |
+
self.details = fs.info(self.resolved_path.unresolve(), expand_info=False)
|
948 |
+
super().__init__(fs, self.resolved_path.unresolve(), **kwargs)
|
949 |
+
self.fs: HfFileSystem
|
950 |
+
|
951 |
+
def __del__(self):
|
952 |
+
if not hasattr(self, "resolved_path"):
|
953 |
+
# Means that the constructor failed. Nothing to do.
|
954 |
+
return
|
955 |
+
return super().__del__()
|
956 |
+
|
957 |
+
def _fetch_range(self, start: int, end: int) -> bytes:
|
958 |
+
headers = {
|
959 |
+
"range": f"bytes={start}-{end - 1}",
|
960 |
+
**self.fs._api._build_hf_headers(),
|
961 |
+
}
|
962 |
+
url = hf_hub_url(
|
963 |
+
repo_id=self.resolved_path.repo_id,
|
964 |
+
revision=self.resolved_path.revision,
|
965 |
+
filename=self.resolved_path.path_in_repo,
|
966 |
+
repo_type=self.resolved_path.repo_type,
|
967 |
+
endpoint=self.fs.endpoint,
|
968 |
+
)
|
969 |
+
r = http_backoff(
|
970 |
+
"GET",
|
971 |
+
url,
|
972 |
+
headers=headers,
|
973 |
+
retry_on_status_codes=(500, 502, 503, 504),
|
974 |
+
timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
|
975 |
+
)
|
976 |
+
hf_raise_for_status(r)
|
977 |
+
return r.content
|
978 |
+
|
979 |
+
def _initiate_upload(self) -> None:
|
980 |
+
self.temp_file = tempfile.NamedTemporaryFile(prefix="hffs-", delete=False)
|
981 |
+
|
982 |
+
def _upload_chunk(self, final: bool = False) -> None:
|
983 |
+
self.buffer.seek(0)
|
984 |
+
block = self.buffer.read()
|
985 |
+
self.temp_file.write(block)
|
986 |
+
if final:
|
987 |
+
self.temp_file.close()
|
988 |
+
self.fs._api.upload_file(
|
989 |
+
path_or_fileobj=self.temp_file.name,
|
990 |
+
path_in_repo=self.resolved_path.path_in_repo,
|
991 |
+
repo_id=self.resolved_path.repo_id,
|
992 |
+
token=self.fs.token,
|
993 |
+
repo_type=self.resolved_path.repo_type,
|
994 |
+
revision=self.resolved_path.revision,
|
995 |
+
commit_message=self.kwargs.get("commit_message"),
|
996 |
+
commit_description=self.kwargs.get("commit_description"),
|
997 |
+
)
|
998 |
+
os.remove(self.temp_file.name)
|
999 |
+
self.fs.invalidate_cache(
|
1000 |
+
path=self.resolved_path.unresolve(),
|
1001 |
+
)
|
1002 |
+
|
1003 |
+
def read(self, length=-1):
|
1004 |
+
"""Read remote file.
|
1005 |
+
|
1006 |
+
If `length` is not provided or is -1, the entire file is downloaded and read. On POSIX systems and if
|
1007 |
+
`hf_transfer` is not enabled, the file is loaded in memory directly. Otherwise, the file is downloaded to a
|
1008 |
+
temporary file and read from there.
|
1009 |
+
"""
|
1010 |
+
if self.mode == "rb" and (length is None or length == -1) and self.loc == 0:
|
1011 |
+
with self.fs.open(self.path, "rb", block_size=0) as f: # block_size=0 enables fast streaming
|
1012 |
+
return f.read()
|
1013 |
+
return super().read(length)
|
1014 |
+
|
1015 |
+
def url(self) -> str:
|
1016 |
+
return self.fs.url(self.path)
|
1017 |
+
|
1018 |
+
|
1019 |
+
class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile):
|
1020 |
+
def __init__(
|
1021 |
+
self,
|
1022 |
+
fs: HfFileSystem,
|
1023 |
+
path: str,
|
1024 |
+
mode: str = "rb",
|
1025 |
+
revision: Optional[str] = None,
|
1026 |
+
block_size: int = 0,
|
1027 |
+
cache_type: str = "none",
|
1028 |
+
**kwargs,
|
1029 |
+
):
|
1030 |
+
if block_size != 0:
|
1031 |
+
raise ValueError(f"HfFileSystemStreamFile only supports block_size=0 but got {block_size}")
|
1032 |
+
if cache_type != "none":
|
1033 |
+
raise ValueError(f"HfFileSystemStreamFile only supports cache_type='none' but got {cache_type}")
|
1034 |
+
if "w" in mode:
|
1035 |
+
raise ValueError(f"HfFileSystemStreamFile only supports reading but got mode='{mode}'")
|
1036 |
+
try:
|
1037 |
+
self.resolved_path = fs.resolve_path(path, revision=revision)
|
1038 |
+
except FileNotFoundError as e:
|
1039 |
+
if "w" in kwargs.get("mode", ""):
|
1040 |
+
raise FileNotFoundError(
|
1041 |
+
f"{e}.\nMake sure the repository and revision exist before writing data."
|
1042 |
+
) from e
|
1043 |
+
# avoid an unnecessary .info() call to instantiate .details
|
1044 |
+
self.details = {"name": self.resolved_path.unresolve(), "size": None}
|
1045 |
+
super().__init__(
|
1046 |
+
fs, self.resolved_path.unresolve(), mode=mode, block_size=block_size, cache_type=cache_type, **kwargs
|
1047 |
+
)
|
1048 |
+
self.response: Optional[Response] = None
|
1049 |
+
self.fs: HfFileSystem
|
1050 |
+
|
1051 |
+
def seek(self, loc: int, whence: int = 0):
|
1052 |
+
if loc == 0 and whence == 1:
|
1053 |
+
return
|
1054 |
+
if loc == self.loc and whence == 0:
|
1055 |
+
return
|
1056 |
+
raise ValueError("Cannot seek streaming HF file")
|
1057 |
+
|
1058 |
+
def read(self, length: int = -1):
|
1059 |
+
read_args = (length,) if length >= 0 else ()
|
1060 |
+
if self.response is None or self.response.raw.isclosed():
|
1061 |
+
url = hf_hub_url(
|
1062 |
+
repo_id=self.resolved_path.repo_id,
|
1063 |
+
revision=self.resolved_path.revision,
|
1064 |
+
filename=self.resolved_path.path_in_repo,
|
1065 |
+
repo_type=self.resolved_path.repo_type,
|
1066 |
+
endpoint=self.fs.endpoint,
|
1067 |
+
)
|
1068 |
+
self.response = http_backoff(
|
1069 |
+
"GET",
|
1070 |
+
url,
|
1071 |
+
headers=self.fs._api._build_hf_headers(),
|
1072 |
+
retry_on_status_codes=(500, 502, 503, 504),
|
1073 |
+
stream=True,
|
1074 |
+
timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
|
1075 |
+
)
|
1076 |
+
hf_raise_for_status(self.response)
|
1077 |
+
try:
|
1078 |
+
out = self.response.raw.read(*read_args)
|
1079 |
+
except Exception:
|
1080 |
+
self.response.close()
|
1081 |
+
|
1082 |
+
# Retry by recreating the connection
|
1083 |
+
url = hf_hub_url(
|
1084 |
+
repo_id=self.resolved_path.repo_id,
|
1085 |
+
revision=self.resolved_path.revision,
|
1086 |
+
filename=self.resolved_path.path_in_repo,
|
1087 |
+
repo_type=self.resolved_path.repo_type,
|
1088 |
+
endpoint=self.fs.endpoint,
|
1089 |
+
)
|
1090 |
+
self.response = http_backoff(
|
1091 |
+
"GET",
|
1092 |
+
url,
|
1093 |
+
headers={"Range": "bytes=%d-" % self.loc, **self.fs._api._build_hf_headers()},
|
1094 |
+
retry_on_status_codes=(500, 502, 503, 504),
|
1095 |
+
stream=True,
|
1096 |
+
timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
|
1097 |
+
)
|
1098 |
+
hf_raise_for_status(self.response)
|
1099 |
+
try:
|
1100 |
+
out = self.response.raw.read(*read_args)
|
1101 |
+
except Exception:
|
1102 |
+
self.response.close()
|
1103 |
+
raise
|
1104 |
+
self.loc += len(out)
|
1105 |
+
return out
|
1106 |
+
|
1107 |
+
def url(self) -> str:
|
1108 |
+
return self.fs.url(self.path)
|
1109 |
+
|
1110 |
+
def __del__(self):
|
1111 |
+
if not hasattr(self, "resolved_path"):
|
1112 |
+
# Means that the constructor failed. Nothing to do.
|
1113 |
+
return
|
1114 |
+
return super().__del__()
|
1115 |
+
|
1116 |
+
def __reduce__(self):
|
1117 |
+
return reopen, (self.fs, self.path, self.mode, self.blocksize, self.cache.name)
|
1118 |
+
|
1119 |
+
|
1120 |
+
def safe_revision(revision: str) -> str:
|
1121 |
+
return revision if SPECIAL_REFS_REVISION_REGEX.match(revision) else safe_quote(revision)
|
1122 |
+
|
1123 |
+
|
1124 |
+
def safe_quote(s: str) -> str:
|
1125 |
+
return quote(s, safe="")
|
1126 |
+
|
1127 |
+
|
1128 |
+
def _raise_file_not_found(path: str, err: Optional[Exception]) -> NoReturn:
|
1129 |
+
msg = path
|
1130 |
+
if isinstance(err, RepositoryNotFoundError):
|
1131 |
+
msg = f"{path} (repository not found)"
|
1132 |
+
elif isinstance(err, RevisionNotFoundError):
|
1133 |
+
msg = f"{path} (revision not found)"
|
1134 |
+
elif isinstance(err, HFValidationError):
|
1135 |
+
msg = f"{path} (invalid repository id)"
|
1136 |
+
raise FileNotFoundError(msg) from err
|
1137 |
+
|
1138 |
+
|
1139 |
+
def reopen(fs: HfFileSystem, path: str, mode: str, block_size: int, cache_type: str):
|
1140 |
+
return fs.open(path, mode=mode, block_size=block_size, cache_type=cache_type)
|
meow/lib/python3.13/site-packages/huggingface_hub/hub_mixin.py
ADDED
@@ -0,0 +1,833 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import inspect
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
from dataclasses import asdict, dataclass, is_dataclass
|
5 |
+
from pathlib import Path
|
6 |
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
|
7 |
+
|
8 |
+
import packaging.version
|
9 |
+
|
10 |
+
from . import constants
|
11 |
+
from .errors import EntryNotFoundError, HfHubHTTPError
|
12 |
+
from .file_download import hf_hub_download
|
13 |
+
from .hf_api import HfApi
|
14 |
+
from .repocard import ModelCard, ModelCardData
|
15 |
+
from .utils import (
|
16 |
+
SoftTemporaryDirectory,
|
17 |
+
is_jsonable,
|
18 |
+
is_safetensors_available,
|
19 |
+
is_simple_optional_type,
|
20 |
+
is_torch_available,
|
21 |
+
logging,
|
22 |
+
unwrap_simple_optional_type,
|
23 |
+
validate_hf_hub_args,
|
24 |
+
)
|
25 |
+
|
26 |
+
|
27 |
+
if TYPE_CHECKING:
|
28 |
+
from _typeshed import DataclassInstance
|
29 |
+
|
30 |
+
if is_torch_available():
|
31 |
+
import torch # type: ignore
|
32 |
+
|
33 |
+
if is_safetensors_available():
|
34 |
+
import safetensors
|
35 |
+
from safetensors.torch import load_model as load_model_as_safetensor
|
36 |
+
from safetensors.torch import save_model as save_model_as_safetensor
|
37 |
+
|
38 |
+
|
39 |
+
logger = logging.get_logger(__name__)
|
40 |
+
|
41 |
+
# Generic variable that is either ModelHubMixin or a subclass thereof
|
42 |
+
T = TypeVar("T", bound="ModelHubMixin")
|
43 |
+
# Generic variable to represent an args type
|
44 |
+
ARGS_T = TypeVar("ARGS_T")
|
45 |
+
ENCODER_T = Callable[[ARGS_T], Any]
|
46 |
+
DECODER_T = Callable[[Any], ARGS_T]
|
47 |
+
CODER_T = Tuple[ENCODER_T, DECODER_T]
|
48 |
+
|
49 |
+
|
50 |
+
DEFAULT_MODEL_CARD = """
|
51 |
+
---
|
52 |
+
# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
53 |
+
# Doc / guide: https://huggingface.co/docs/hub/model-cards
|
54 |
+
{{ card_data }}
|
55 |
+
---
|
56 |
+
|
57 |
+
This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
|
58 |
+
- Library: {{ repo_url | default("[More Information Needed]", true) }}
|
59 |
+
- Docs: {{ docs_url | default("[More Information Needed]", true) }}
|
60 |
+
"""
|
61 |
+
|
62 |
+
|
63 |
+
@dataclass
|
64 |
+
class MixinInfo:
|
65 |
+
model_card_template: str
|
66 |
+
model_card_data: ModelCardData
|
67 |
+
repo_url: Optional[str] = None
|
68 |
+
docs_url: Optional[str] = None
|
69 |
+
|
70 |
+
|
71 |
+
class ModelHubMixin:
|
72 |
+
"""
|
73 |
+
A generic mixin to integrate ANY machine learning framework with the Hub.
|
74 |
+
|
75 |
+
To integrate your framework, your model class must inherit from this class. Custom logic for saving/loading models
|
76 |
+
have to be overwritten in [`_from_pretrained`] and [`_save_pretrained`]. [`PyTorchModelHubMixin`] is a good example
|
77 |
+
of mixin integration with the Hub. Check out our [integration guide](../guides/integrations) for more instructions.
|
78 |
+
|
79 |
+
When inheriting from [`ModelHubMixin`], you can define class-level attributes. These attributes are not passed to
|
80 |
+
`__init__` but to the class definition itself. This is useful to define metadata about the library integrating
|
81 |
+
[`ModelHubMixin`].
|
82 |
+
|
83 |
+
For more details on how to integrate the mixin with your library, checkout the [integration guide](../guides/integrations).
|
84 |
+
|
85 |
+
Args:
|
86 |
+
repo_url (`str`, *optional*):
|
87 |
+
URL of the library repository. Used to generate model card.
|
88 |
+
docs_url (`str`, *optional*):
|
89 |
+
URL of the library documentation. Used to generate model card.
|
90 |
+
model_card_template (`str`, *optional*):
|
91 |
+
Template of the model card. Used to generate model card. Defaults to a generic template.
|
92 |
+
language (`str` or `List[str]`, *optional*):
|
93 |
+
Language supported by the library. Used to generate model card.
|
94 |
+
library_name (`str`, *optional*):
|
95 |
+
Name of the library integrating ModelHubMixin. Used to generate model card.
|
96 |
+
license (`str`, *optional*):
|
97 |
+
License of the library integrating ModelHubMixin. Used to generate model card.
|
98 |
+
E.g: "apache-2.0"
|
99 |
+
license_name (`str`, *optional*):
|
100 |
+
Name of the library integrating ModelHubMixin. Used to generate model card.
|
101 |
+
Only used if `license` is set to `other`.
|
102 |
+
E.g: "coqui-public-model-license".
|
103 |
+
license_link (`str`, *optional*):
|
104 |
+
URL to the license of the library integrating ModelHubMixin. Used to generate model card.
|
105 |
+
Only used if `license` is set to `other` and `license_name` is set.
|
106 |
+
E.g: "https://coqui.ai/cpml".
|
107 |
+
pipeline_tag (`str`, *optional*):
|
108 |
+
Tag of the pipeline. Used to generate model card. E.g. "text-classification".
|
109 |
+
tags (`List[str]`, *optional*):
|
110 |
+
Tags to be added to the model card. Used to generate model card. E.g. ["x-custom-tag", "arxiv:2304.12244"]
|
111 |
+
coders (`Dict[Type, Tuple[Callable, Callable]]`, *optional*):
|
112 |
+
Dictionary of custom types and their encoders/decoders. Used to encode/decode arguments that are not
|
113 |
+
jsonable by default. E.g dataclasses, argparse.Namespace, OmegaConf, etc.
|
114 |
+
|
115 |
+
Example:
|
116 |
+
|
117 |
+
```python
|
118 |
+
>>> from huggingface_hub import ModelHubMixin
|
119 |
+
|
120 |
+
# Inherit from ModelHubMixin
|
121 |
+
>>> class MyCustomModel(
|
122 |
+
... ModelHubMixin,
|
123 |
+
... library_name="my-library",
|
124 |
+
... tags=["x-custom-tag", "arxiv:2304.12244"],
|
125 |
+
... repo_url="https://github.com/huggingface/my-cool-library",
|
126 |
+
... docs_url="https://huggingface.co/docs/my-cool-library",
|
127 |
+
... # ^ optional metadata to generate model card
|
128 |
+
... ):
|
129 |
+
... def __init__(self, size: int = 512, device: str = "cpu"):
|
130 |
+
... # define how to initialize your model
|
131 |
+
... super().__init__()
|
132 |
+
... ...
|
133 |
+
...
|
134 |
+
... def _save_pretrained(self, save_directory: Path) -> None:
|
135 |
+
... # define how to serialize your model
|
136 |
+
... ...
|
137 |
+
...
|
138 |
+
... @classmethod
|
139 |
+
... def from_pretrained(
|
140 |
+
... cls: Type[T],
|
141 |
+
... pretrained_model_name_or_path: Union[str, Path],
|
142 |
+
... *,
|
143 |
+
... force_download: bool = False,
|
144 |
+
... resume_download: Optional[bool] = None,
|
145 |
+
... proxies: Optional[Dict] = None,
|
146 |
+
... token: Optional[Union[str, bool]] = None,
|
147 |
+
... cache_dir: Optional[Union[str, Path]] = None,
|
148 |
+
... local_files_only: bool = False,
|
149 |
+
... revision: Optional[str] = None,
|
150 |
+
... **model_kwargs,
|
151 |
+
... ) -> T:
|
152 |
+
... # define how to deserialize your model
|
153 |
+
... ...
|
154 |
+
|
155 |
+
>>> model = MyCustomModel(size=256, device="gpu")
|
156 |
+
|
157 |
+
# Save model weights to local directory
|
158 |
+
>>> model.save_pretrained("my-awesome-model")
|
159 |
+
|
160 |
+
# Push model weights to the Hub
|
161 |
+
>>> model.push_to_hub("my-awesome-model")
|
162 |
+
|
163 |
+
# Download and initialize weights from the Hub
|
164 |
+
>>> reloaded_model = MyCustomModel.from_pretrained("username/my-awesome-model")
|
165 |
+
>>> reloaded_model.size
|
166 |
+
256
|
167 |
+
|
168 |
+
# Model card has been correctly populated
|
169 |
+
>>> from huggingface_hub import ModelCard
|
170 |
+
>>> card = ModelCard.load("username/my-awesome-model")
|
171 |
+
>>> card.data.tags
|
172 |
+
["x-custom-tag", "pytorch_model_hub_mixin", "model_hub_mixin"]
|
173 |
+
>>> card.data.library_name
|
174 |
+
"my-library"
|
175 |
+
```
|
176 |
+
"""
|
177 |
+
|
178 |
+
_hub_mixin_config: Optional[Union[dict, "DataclassInstance"]] = None
|
179 |
+
# ^ optional config attribute automatically set in `from_pretrained`
|
180 |
+
_hub_mixin_info: MixinInfo
|
181 |
+
# ^ information about the library integrating ModelHubMixin (used to generate model card)
|
182 |
+
_hub_mixin_inject_config: bool # whether `_from_pretrained` expects `config` or not
|
183 |
+
_hub_mixin_init_parameters: Dict[str, inspect.Parameter] # __init__ parameters
|
184 |
+
_hub_mixin_jsonable_default_values: Dict[str, Any] # default values for __init__ parameters
|
185 |
+
_hub_mixin_jsonable_custom_types: Tuple[Type, ...] # custom types that can be encoded/decoded
|
186 |
+
_hub_mixin_coders: Dict[Type, CODER_T] # encoders/decoders for custom types
|
187 |
+
# ^ internal values to handle config
|
188 |
+
|
189 |
+
def __init_subclass__(
|
190 |
+
cls,
|
191 |
+
*,
|
192 |
+
# Generic info for model card
|
193 |
+
repo_url: Optional[str] = None,
|
194 |
+
docs_url: Optional[str] = None,
|
195 |
+
# Model card template
|
196 |
+
model_card_template: str = DEFAULT_MODEL_CARD,
|
197 |
+
# Model card metadata
|
198 |
+
language: Optional[List[str]] = None,
|
199 |
+
library_name: Optional[str] = None,
|
200 |
+
license: Optional[str] = None,
|
201 |
+
license_name: Optional[str] = None,
|
202 |
+
license_link: Optional[str] = None,
|
203 |
+
pipeline_tag: Optional[str] = None,
|
204 |
+
tags: Optional[List[str]] = None,
|
205 |
+
# How to encode/decode arguments with custom type into a JSON config?
|
206 |
+
coders: Optional[
|
207 |
+
Dict[Type, CODER_T]
|
208 |
+
# Key is a type.
|
209 |
+
# Value is a tuple (encoder, decoder).
|
210 |
+
# Example: {MyCustomType: (lambda x: x.value, lambda data: MyCustomType(data))}
|
211 |
+
] = None,
|
212 |
+
) -> None:
|
213 |
+
"""Inspect __init__ signature only once when subclassing + handle modelcard."""
|
214 |
+
super().__init_subclass__()
|
215 |
+
|
216 |
+
# Will be reused when creating modelcard
|
217 |
+
tags = tags or []
|
218 |
+
tags.append("model_hub_mixin")
|
219 |
+
|
220 |
+
# Initialize MixinInfo if not existent
|
221 |
+
info = MixinInfo(model_card_template=model_card_template, model_card_data=ModelCardData())
|
222 |
+
|
223 |
+
# If parent class has a MixinInfo, inherit from it as a copy
|
224 |
+
if hasattr(cls, "_hub_mixin_info"):
|
225 |
+
# Inherit model card template from parent class if not explicitly set
|
226 |
+
if model_card_template == DEFAULT_MODEL_CARD:
|
227 |
+
info.model_card_template = cls._hub_mixin_info.model_card_template
|
228 |
+
|
229 |
+
# Inherit from parent model card data
|
230 |
+
info.model_card_data = ModelCardData(**cls._hub_mixin_info.model_card_data.to_dict())
|
231 |
+
|
232 |
+
# Inherit other info
|
233 |
+
info.docs_url = cls._hub_mixin_info.docs_url
|
234 |
+
info.repo_url = cls._hub_mixin_info.repo_url
|
235 |
+
cls._hub_mixin_info = info
|
236 |
+
|
237 |
+
# Update MixinInfo with metadata
|
238 |
+
if model_card_template is not None and model_card_template != DEFAULT_MODEL_CARD:
|
239 |
+
info.model_card_template = model_card_template
|
240 |
+
if repo_url is not None:
|
241 |
+
info.repo_url = repo_url
|
242 |
+
if docs_url is not None:
|
243 |
+
info.docs_url = docs_url
|
244 |
+
if language is not None:
|
245 |
+
info.model_card_data.language = language
|
246 |
+
if library_name is not None:
|
247 |
+
info.model_card_data.library_name = library_name
|
248 |
+
if license is not None:
|
249 |
+
info.model_card_data.license = license
|
250 |
+
if license_name is not None:
|
251 |
+
info.model_card_data.license_name = license_name
|
252 |
+
if license_link is not None:
|
253 |
+
info.model_card_data.license_link = license_link
|
254 |
+
if pipeline_tag is not None:
|
255 |
+
info.model_card_data.pipeline_tag = pipeline_tag
|
256 |
+
if tags is not None:
|
257 |
+
if info.model_card_data.tags is not None:
|
258 |
+
info.model_card_data.tags.extend(tags)
|
259 |
+
else:
|
260 |
+
info.model_card_data.tags = tags
|
261 |
+
|
262 |
+
info.model_card_data.tags = sorted(set(info.model_card_data.tags))
|
263 |
+
|
264 |
+
# Handle encoders/decoders for args
|
265 |
+
cls._hub_mixin_coders = coders or {}
|
266 |
+
cls._hub_mixin_jsonable_custom_types = tuple(cls._hub_mixin_coders.keys())
|
267 |
+
|
268 |
+
# Inspect __init__ signature to handle config
|
269 |
+
cls._hub_mixin_init_parameters = dict(inspect.signature(cls.__init__).parameters)
|
270 |
+
cls._hub_mixin_jsonable_default_values = {
|
271 |
+
param.name: cls._encode_arg(param.default)
|
272 |
+
for param in cls._hub_mixin_init_parameters.values()
|
273 |
+
if param.default is not inspect.Parameter.empty and cls._is_jsonable(param.default)
|
274 |
+
}
|
275 |
+
cls._hub_mixin_inject_config = "config" in inspect.signature(cls._from_pretrained).parameters
|
276 |
+
|
277 |
+
def __new__(cls: Type[T], *args, **kwargs) -> T:
|
278 |
+
"""Create a new instance of the class and handle config.
|
279 |
+
|
280 |
+
3 cases:
|
281 |
+
- If `self._hub_mixin_config` is already set, do nothing.
|
282 |
+
- If `config` is passed as a dataclass, set it as `self._hub_mixin_config`.
|
283 |
+
- Otherwise, build `self._hub_mixin_config` from default values and passed values.
|
284 |
+
"""
|
285 |
+
instance = super().__new__(cls)
|
286 |
+
|
287 |
+
# If `config` is already set, return early
|
288 |
+
if instance._hub_mixin_config is not None:
|
289 |
+
return instance
|
290 |
+
|
291 |
+
# Infer passed values
|
292 |
+
passed_values = {
|
293 |
+
**{
|
294 |
+
key: value
|
295 |
+
for key, value in zip(
|
296 |
+
# [1:] to skip `self` parameter
|
297 |
+
list(cls._hub_mixin_init_parameters)[1:],
|
298 |
+
args,
|
299 |
+
)
|
300 |
+
},
|
301 |
+
**kwargs,
|
302 |
+
}
|
303 |
+
|
304 |
+
# If config passed as dataclass => set it and return early
|
305 |
+
if is_dataclass(passed_values.get("config")):
|
306 |
+
instance._hub_mixin_config = passed_values["config"]
|
307 |
+
return instance
|
308 |
+
|
309 |
+
# Otherwise, build config from default + passed values
|
310 |
+
init_config = {
|
311 |
+
# default values
|
312 |
+
**cls._hub_mixin_jsonable_default_values,
|
313 |
+
# passed values
|
314 |
+
**{
|
315 |
+
key: cls._encode_arg(value) # Encode custom types as jsonable value
|
316 |
+
for key, value in passed_values.items()
|
317 |
+
if instance._is_jsonable(value) # Only if jsonable or we have a custom encoder
|
318 |
+
},
|
319 |
+
}
|
320 |
+
passed_config = init_config.pop("config", {})
|
321 |
+
|
322 |
+
# Populate `init_config` with provided config
|
323 |
+
if isinstance(passed_config, dict):
|
324 |
+
init_config.update(passed_config)
|
325 |
+
|
326 |
+
# Set `config` attribute and return
|
327 |
+
if init_config != {}:
|
328 |
+
instance._hub_mixin_config = init_config
|
329 |
+
return instance
|
330 |
+
|
331 |
+
@classmethod
|
332 |
+
def _is_jsonable(cls, value: Any) -> bool:
|
333 |
+
"""Check if a value is JSON serializable."""
|
334 |
+
if isinstance(value, cls._hub_mixin_jsonable_custom_types):
|
335 |
+
return True
|
336 |
+
return is_jsonable(value)
|
337 |
+
|
338 |
+
@classmethod
|
339 |
+
def _encode_arg(cls, arg: Any) -> Any:
|
340 |
+
"""Encode an argument into a JSON serializable format."""
|
341 |
+
for type_, (encoder, _) in cls._hub_mixin_coders.items():
|
342 |
+
if isinstance(arg, type_):
|
343 |
+
if arg is None:
|
344 |
+
return None
|
345 |
+
return encoder(arg)
|
346 |
+
return arg
|
347 |
+
|
348 |
+
@classmethod
|
349 |
+
def _decode_arg(cls, expected_type: Type[ARGS_T], value: Any) -> Optional[ARGS_T]:
|
350 |
+
"""Decode a JSON serializable value into an argument."""
|
351 |
+
if is_simple_optional_type(expected_type):
|
352 |
+
if value is None:
|
353 |
+
return None
|
354 |
+
expected_type = unwrap_simple_optional_type(expected_type)
|
355 |
+
# Dataclass => handle it
|
356 |
+
if is_dataclass(expected_type):
|
357 |
+
return _load_dataclass(expected_type, value) # type: ignore[return-value]
|
358 |
+
# Otherwise => check custom decoders
|
359 |
+
for type_, (_, decoder) in cls._hub_mixin_coders.items():
|
360 |
+
if inspect.isclass(expected_type) and issubclass(expected_type, type_):
|
361 |
+
return decoder(value)
|
362 |
+
# Otherwise => don't decode
|
363 |
+
return value
|
364 |
+
|
365 |
+
def save_pretrained(
|
366 |
+
self,
|
367 |
+
save_directory: Union[str, Path],
|
368 |
+
*,
|
369 |
+
config: Optional[Union[dict, "DataclassInstance"]] = None,
|
370 |
+
repo_id: Optional[str] = None,
|
371 |
+
push_to_hub: bool = False,
|
372 |
+
model_card_kwargs: Optional[Dict[str, Any]] = None,
|
373 |
+
**push_to_hub_kwargs,
|
374 |
+
) -> Optional[str]:
|
375 |
+
"""
|
376 |
+
Save weights in local directory.
|
377 |
+
|
378 |
+
Args:
|
379 |
+
save_directory (`str` or `Path`):
|
380 |
+
Path to directory in which the model weights and configuration will be saved.
|
381 |
+
config (`dict` or `DataclassInstance`, *optional*):
|
382 |
+
Model configuration specified as a key/value dictionary or a dataclass instance.
|
383 |
+
push_to_hub (`bool`, *optional*, defaults to `False`):
|
384 |
+
Whether or not to push your model to the Huggingface Hub after saving it.
|
385 |
+
repo_id (`str`, *optional*):
|
386 |
+
ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to the folder name if
|
387 |
+
not provided.
|
388 |
+
model_card_kwargs (`Dict[str, Any]`, *optional*):
|
389 |
+
Additional arguments passed to the model card template to customize the model card.
|
390 |
+
push_to_hub_kwargs:
|
391 |
+
Additional key word arguments passed along to the [`~ModelHubMixin.push_to_hub`] method.
|
392 |
+
Returns:
|
393 |
+
`str` or `None`: url of the commit on the Hub if `push_to_hub=True`, `None` otherwise.
|
394 |
+
"""
|
395 |
+
save_directory = Path(save_directory)
|
396 |
+
save_directory.mkdir(parents=True, exist_ok=True)
|
397 |
+
|
398 |
+
# Remove config.json if already exists. After `_save_pretrained` we don't want to overwrite config.json
|
399 |
+
# as it might have been saved by the custom `_save_pretrained` already. However we do want to overwrite
|
400 |
+
# an existing config.json if it was not saved by `_save_pretrained`.
|
401 |
+
config_path = save_directory / constants.CONFIG_NAME
|
402 |
+
config_path.unlink(missing_ok=True)
|
403 |
+
|
404 |
+
# save model weights/files (framework-specific)
|
405 |
+
self._save_pretrained(save_directory)
|
406 |
+
|
407 |
+
# save config (if provided and if not serialized yet in `_save_pretrained`)
|
408 |
+
if config is None:
|
409 |
+
config = self._hub_mixin_config
|
410 |
+
if config is not None:
|
411 |
+
if is_dataclass(config):
|
412 |
+
config = asdict(config) # type: ignore[arg-type]
|
413 |
+
if not config_path.exists():
|
414 |
+
config_str = json.dumps(config, sort_keys=True, indent=2)
|
415 |
+
config_path.write_text(config_str)
|
416 |
+
|
417 |
+
# save model card
|
418 |
+
model_card_path = save_directory / "README.md"
|
419 |
+
model_card_kwargs = model_card_kwargs if model_card_kwargs is not None else {}
|
420 |
+
if not model_card_path.exists(): # do not overwrite if already exists
|
421 |
+
self.generate_model_card(**model_card_kwargs).save(save_directory / "README.md")
|
422 |
+
|
423 |
+
# push to the Hub if required
|
424 |
+
if push_to_hub:
|
425 |
+
kwargs = push_to_hub_kwargs.copy() # soft-copy to avoid mutating input
|
426 |
+
if config is not None: # kwarg for `push_to_hub`
|
427 |
+
kwargs["config"] = config
|
428 |
+
if repo_id is None:
|
429 |
+
repo_id = save_directory.name # Defaults to `save_directory` name
|
430 |
+
return self.push_to_hub(repo_id=repo_id, model_card_kwargs=model_card_kwargs, **kwargs)
|
431 |
+
return None
|
432 |
+
|
433 |
+
def _save_pretrained(self, save_directory: Path) -> None:
|
434 |
+
"""
|
435 |
+
Overwrite this method in subclass to define how to save your model.
|
436 |
+
Check out our [integration guide](../guides/integrations) for instructions.
|
437 |
+
|
438 |
+
Args:
|
439 |
+
save_directory (`str` or `Path`):
|
440 |
+
Path to directory in which the model weights and configuration will be saved.
|
441 |
+
"""
|
442 |
+
raise NotImplementedError
|
443 |
+
|
444 |
+
@classmethod
|
445 |
+
@validate_hf_hub_args
|
446 |
+
def from_pretrained(
|
447 |
+
cls: Type[T],
|
448 |
+
pretrained_model_name_or_path: Union[str, Path],
|
449 |
+
*,
|
450 |
+
force_download: bool = False,
|
451 |
+
resume_download: Optional[bool] = None,
|
452 |
+
proxies: Optional[Dict] = None,
|
453 |
+
token: Optional[Union[str, bool]] = None,
|
454 |
+
cache_dir: Optional[Union[str, Path]] = None,
|
455 |
+
local_files_only: bool = False,
|
456 |
+
revision: Optional[str] = None,
|
457 |
+
**model_kwargs,
|
458 |
+
) -> T:
|
459 |
+
"""
|
460 |
+
Download a model from the Huggingface Hub and instantiate it.
|
461 |
+
|
462 |
+
Args:
|
463 |
+
pretrained_model_name_or_path (`str`, `Path`):
|
464 |
+
- Either the `model_id` (string) of a model hosted on the Hub, e.g. `bigscience/bloom`.
|
465 |
+
- Or a path to a `directory` containing model weights saved using
|
466 |
+
[`~transformers.PreTrainedModel.save_pretrained`], e.g., `../path/to/my_model_directory/`.
|
467 |
+
revision (`str`, *optional*):
|
468 |
+
Revision of the model on the Hub. Can be a branch name, a git tag or any commit id.
|
469 |
+
Defaults to the latest commit on `main` branch.
|
470 |
+
force_download (`bool`, *optional*, defaults to `False`):
|
471 |
+
Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding
|
472 |
+
the existing cache.
|
473 |
+
proxies (`Dict[str, str]`, *optional*):
|
474 |
+
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
475 |
+
'http://hostname': 'foo.bar:4012'}`. The proxies are used on every request.
|
476 |
+
token (`str` or `bool`, *optional*):
|
477 |
+
The token to use as HTTP bearer authorization for remote files. By default, it will use the token
|
478 |
+
cached when running `huggingface-cli login`.
|
479 |
+
cache_dir (`str`, `Path`, *optional*):
|
480 |
+
Path to the folder where cached files are stored.
|
481 |
+
local_files_only (`bool`, *optional*, defaults to `False`):
|
482 |
+
If `True`, avoid downloading the file and return the path to the local cached file if it exists.
|
483 |
+
model_kwargs (`Dict`, *optional*):
|
484 |
+
Additional kwargs to pass to the model during initialization.
|
485 |
+
"""
|
486 |
+
model_id = str(pretrained_model_name_or_path)
|
487 |
+
config_file: Optional[str] = None
|
488 |
+
if os.path.isdir(model_id):
|
489 |
+
if constants.CONFIG_NAME in os.listdir(model_id):
|
490 |
+
config_file = os.path.join(model_id, constants.CONFIG_NAME)
|
491 |
+
else:
|
492 |
+
logger.warning(f"{constants.CONFIG_NAME} not found in {Path(model_id).resolve()}")
|
493 |
+
else:
|
494 |
+
try:
|
495 |
+
config_file = hf_hub_download(
|
496 |
+
repo_id=model_id,
|
497 |
+
filename=constants.CONFIG_NAME,
|
498 |
+
revision=revision,
|
499 |
+
cache_dir=cache_dir,
|
500 |
+
force_download=force_download,
|
501 |
+
proxies=proxies,
|
502 |
+
resume_download=resume_download,
|
503 |
+
token=token,
|
504 |
+
local_files_only=local_files_only,
|
505 |
+
)
|
506 |
+
except HfHubHTTPError as e:
|
507 |
+
logger.info(f"{constants.CONFIG_NAME} not found on the HuggingFace Hub: {str(e)}")
|
508 |
+
|
509 |
+
# Read config
|
510 |
+
config = None
|
511 |
+
if config_file is not None:
|
512 |
+
with open(config_file, "r", encoding="utf-8") as f:
|
513 |
+
config = json.load(f)
|
514 |
+
|
515 |
+
# Decode custom types in config
|
516 |
+
for key, value in config.items():
|
517 |
+
if key in cls._hub_mixin_init_parameters:
|
518 |
+
expected_type = cls._hub_mixin_init_parameters[key].annotation
|
519 |
+
if expected_type is not inspect.Parameter.empty:
|
520 |
+
config[key] = cls._decode_arg(expected_type, value)
|
521 |
+
|
522 |
+
# Populate model_kwargs from config
|
523 |
+
for param in cls._hub_mixin_init_parameters.values():
|
524 |
+
if param.name not in model_kwargs and param.name in config:
|
525 |
+
model_kwargs[param.name] = config[param.name]
|
526 |
+
|
527 |
+
# Check if `config` argument was passed at init
|
528 |
+
if "config" in cls._hub_mixin_init_parameters and "config" not in model_kwargs:
|
529 |
+
# Decode `config` argument if it was passed
|
530 |
+
config_annotation = cls._hub_mixin_init_parameters["config"].annotation
|
531 |
+
config = cls._decode_arg(config_annotation, config)
|
532 |
+
|
533 |
+
# Forward config to model initialization
|
534 |
+
model_kwargs["config"] = config
|
535 |
+
|
536 |
+
# Inject config if `**kwargs` are expected
|
537 |
+
if is_dataclass(cls):
|
538 |
+
for key in cls.__dataclass_fields__:
|
539 |
+
if key not in model_kwargs and key in config:
|
540 |
+
model_kwargs[key] = config[key]
|
541 |
+
elif any(param.kind == inspect.Parameter.VAR_KEYWORD for param in cls._hub_mixin_init_parameters.values()):
|
542 |
+
for key, value in config.items():
|
543 |
+
if key not in model_kwargs:
|
544 |
+
model_kwargs[key] = value
|
545 |
+
|
546 |
+
# Finally, also inject if `_from_pretrained` expects it
|
547 |
+
if cls._hub_mixin_inject_config and "config" not in model_kwargs:
|
548 |
+
model_kwargs["config"] = config
|
549 |
+
|
550 |
+
instance = cls._from_pretrained(
|
551 |
+
model_id=str(model_id),
|
552 |
+
revision=revision,
|
553 |
+
cache_dir=cache_dir,
|
554 |
+
force_download=force_download,
|
555 |
+
proxies=proxies,
|
556 |
+
resume_download=resume_download,
|
557 |
+
local_files_only=local_files_only,
|
558 |
+
token=token,
|
559 |
+
**model_kwargs,
|
560 |
+
)
|
561 |
+
|
562 |
+
# Implicitly set the config as instance attribute if not already set by the class
|
563 |
+
# This way `config` will be available when calling `save_pretrained` or `push_to_hub`.
|
564 |
+
if config is not None and (getattr(instance, "_hub_mixin_config", None) in (None, {})):
|
565 |
+
instance._hub_mixin_config = config
|
566 |
+
|
567 |
+
return instance
|
568 |
+
|
569 |
+
@classmethod
|
570 |
+
def _from_pretrained(
|
571 |
+
cls: Type[T],
|
572 |
+
*,
|
573 |
+
model_id: str,
|
574 |
+
revision: Optional[str],
|
575 |
+
cache_dir: Optional[Union[str, Path]],
|
576 |
+
force_download: bool,
|
577 |
+
proxies: Optional[Dict],
|
578 |
+
resume_download: Optional[bool],
|
579 |
+
local_files_only: bool,
|
580 |
+
token: Optional[Union[str, bool]],
|
581 |
+
**model_kwargs,
|
582 |
+
) -> T:
|
583 |
+
"""Overwrite this method in subclass to define how to load your model from pretrained.
|
584 |
+
|
585 |
+
Use [`hf_hub_download`] or [`snapshot_download`] to download files from the Hub before loading them. Most
|
586 |
+
args taken as input can be directly passed to those 2 methods. If needed, you can add more arguments to this
|
587 |
+
method using "model_kwargs". For example [`PyTorchModelHubMixin._from_pretrained`] takes as input a `map_location`
|
588 |
+
parameter to set on which device the model should be loaded.
|
589 |
+
|
590 |
+
Check out our [integration guide](../guides/integrations) for more instructions.
|
591 |
+
|
592 |
+
Args:
|
593 |
+
model_id (`str`):
|
594 |
+
ID of the model to load from the Huggingface Hub (e.g. `bigscience/bloom`).
|
595 |
+
revision (`str`, *optional*):
|
596 |
+
Revision of the model on the Hub. Can be a branch name, a git tag or any commit id. Defaults to the
|
597 |
+
latest commit on `main` branch.
|
598 |
+
force_download (`bool`, *optional*, defaults to `False`):
|
599 |
+
Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding
|
600 |
+
the existing cache.
|
601 |
+
proxies (`Dict[str, str]`, *optional*):
|
602 |
+
A dictionary of proxy servers to use by protocol or endpoint (e.g., `{'http': 'foo.bar:3128',
|
603 |
+
'http://hostname': 'foo.bar:4012'}`).
|
604 |
+
token (`str` or `bool`, *optional*):
|
605 |
+
The token to use as HTTP bearer authorization for remote files. By default, it will use the token
|
606 |
+
cached when running `huggingface-cli login`.
|
607 |
+
cache_dir (`str`, `Path`, *optional*):
|
608 |
+
Path to the folder where cached files are stored.
|
609 |
+
local_files_only (`bool`, *optional*, defaults to `False`):
|
610 |
+
If `True`, avoid downloading the file and return the path to the local cached file if it exists.
|
611 |
+
model_kwargs:
|
612 |
+
Additional keyword arguments passed along to the [`~ModelHubMixin._from_pretrained`] method.
|
613 |
+
"""
|
614 |
+
raise NotImplementedError
|
615 |
+
|
616 |
+
@validate_hf_hub_args
|
617 |
+
def push_to_hub(
|
618 |
+
self,
|
619 |
+
repo_id: str,
|
620 |
+
*,
|
621 |
+
config: Optional[Union[dict, "DataclassInstance"]] = None,
|
622 |
+
commit_message: str = "Push model using huggingface_hub.",
|
623 |
+
private: Optional[bool] = None,
|
624 |
+
token: Optional[str] = None,
|
625 |
+
branch: Optional[str] = None,
|
626 |
+
create_pr: Optional[bool] = None,
|
627 |
+
allow_patterns: Optional[Union[List[str], str]] = None,
|
628 |
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
629 |
+
delete_patterns: Optional[Union[List[str], str]] = None,
|
630 |
+
model_card_kwargs: Optional[Dict[str, Any]] = None,
|
631 |
+
) -> str:
|
632 |
+
"""
|
633 |
+
Upload model checkpoint to the Hub.
|
634 |
+
|
635 |
+
Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
|
636 |
+
`delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
|
637 |
+
details.
|
638 |
+
|
639 |
+
Args:
|
640 |
+
repo_id (`str`):
|
641 |
+
ID of the repository to push to (example: `"username/my-model"`).
|
642 |
+
config (`dict` or `DataclassInstance`, *optional*):
|
643 |
+
Model configuration specified as a key/value dictionary or a dataclass instance.
|
644 |
+
commit_message (`str`, *optional*):
|
645 |
+
Message to commit while pushing.
|
646 |
+
private (`bool`, *optional*):
|
647 |
+
Whether the repository created should be private.
|
648 |
+
If `None` (default), the repo will be public unless the organization's default is private.
|
649 |
+
token (`str`, *optional*):
|
650 |
+
The token to use as HTTP bearer authorization for remote files. By default, it will use the token
|
651 |
+
cached when running `huggingface-cli login`.
|
652 |
+
branch (`str`, *optional*):
|
653 |
+
The git branch on which to push the model. This defaults to `"main"`.
|
654 |
+
create_pr (`boolean`, *optional*):
|
655 |
+
Whether or not to create a Pull Request from `branch` with that commit. Defaults to `False`.
|
656 |
+
allow_patterns (`List[str]` or `str`, *optional*):
|
657 |
+
If provided, only files matching at least one pattern are pushed.
|
658 |
+
ignore_patterns (`List[str]` or `str`, *optional*):
|
659 |
+
If provided, files matching any of the patterns are not pushed.
|
660 |
+
delete_patterns (`List[str]` or `str`, *optional*):
|
661 |
+
If provided, remote files matching any of the patterns will be deleted from the repo.
|
662 |
+
model_card_kwargs (`Dict[str, Any]`, *optional*):
|
663 |
+
Additional arguments passed to the model card template to customize the model card.
|
664 |
+
|
665 |
+
Returns:
|
666 |
+
The url of the commit of your model in the given repository.
|
667 |
+
"""
|
668 |
+
api = HfApi(token=token)
|
669 |
+
repo_id = api.create_repo(repo_id=repo_id, private=private, exist_ok=True).repo_id
|
670 |
+
|
671 |
+
# Push the files to the repo in a single commit
|
672 |
+
with SoftTemporaryDirectory() as tmp:
|
673 |
+
saved_path = Path(tmp) / repo_id
|
674 |
+
self.save_pretrained(saved_path, config=config, model_card_kwargs=model_card_kwargs)
|
675 |
+
return api.upload_folder(
|
676 |
+
repo_id=repo_id,
|
677 |
+
repo_type="model",
|
678 |
+
folder_path=saved_path,
|
679 |
+
commit_message=commit_message,
|
680 |
+
revision=branch,
|
681 |
+
create_pr=create_pr,
|
682 |
+
allow_patterns=allow_patterns,
|
683 |
+
ignore_patterns=ignore_patterns,
|
684 |
+
delete_patterns=delete_patterns,
|
685 |
+
)
|
686 |
+
|
687 |
+
def generate_model_card(self, *args, **kwargs) -> ModelCard:
|
688 |
+
card = ModelCard.from_template(
|
689 |
+
card_data=self._hub_mixin_info.model_card_data,
|
690 |
+
template_str=self._hub_mixin_info.model_card_template,
|
691 |
+
repo_url=self._hub_mixin_info.repo_url,
|
692 |
+
docs_url=self._hub_mixin_info.docs_url,
|
693 |
+
**kwargs,
|
694 |
+
)
|
695 |
+
return card
|
696 |
+
|
697 |
+
|
698 |
+
class PyTorchModelHubMixin(ModelHubMixin):
|
699 |
+
"""
|
700 |
+
Implementation of [`ModelHubMixin`] to provide model Hub upload/download capabilities to PyTorch models. The model
|
701 |
+
is set in evaluation mode by default using `model.eval()` (dropout modules are deactivated). To train the model,
|
702 |
+
you should first set it back in training mode with `model.train()`.
|
703 |
+
|
704 |
+
See [`ModelHubMixin`] for more details on how to use the mixin.
|
705 |
+
|
706 |
+
Example:
|
707 |
+
|
708 |
+
```python
|
709 |
+
>>> import torch
|
710 |
+
>>> import torch.nn as nn
|
711 |
+
>>> from huggingface_hub import PyTorchModelHubMixin
|
712 |
+
|
713 |
+
>>> class MyModel(
|
714 |
+
... nn.Module,
|
715 |
+
... PyTorchModelHubMixin,
|
716 |
+
... library_name="keras-nlp",
|
717 |
+
... repo_url="https://github.com/keras-team/keras-nlp",
|
718 |
+
... docs_url="https://keras.io/keras_nlp/",
|
719 |
+
... # ^ optional metadata to generate model card
|
720 |
+
... ):
|
721 |
+
... def __init__(self, hidden_size: int = 512, vocab_size: int = 30000, output_size: int = 4):
|
722 |
+
... super().__init__()
|
723 |
+
... self.param = nn.Parameter(torch.rand(hidden_size, vocab_size))
|
724 |
+
... self.linear = nn.Linear(output_size, vocab_size)
|
725 |
+
|
726 |
+
... def forward(self, x):
|
727 |
+
... return self.linear(x + self.param)
|
728 |
+
>>> model = MyModel(hidden_size=256)
|
729 |
+
|
730 |
+
# Save model weights to local directory
|
731 |
+
>>> model.save_pretrained("my-awesome-model")
|
732 |
+
|
733 |
+
# Push model weights to the Hub
|
734 |
+
>>> model.push_to_hub("my-awesome-model")
|
735 |
+
|
736 |
+
# Download and initialize weights from the Hub
|
737 |
+
>>> model = MyModel.from_pretrained("username/my-awesome-model")
|
738 |
+
>>> model.hidden_size
|
739 |
+
256
|
740 |
+
```
|
741 |
+
"""
|
742 |
+
|
743 |
+
def __init_subclass__(cls, *args, tags: Optional[List[str]] = None, **kwargs) -> None:
|
744 |
+
tags = tags or []
|
745 |
+
tags.append("pytorch_model_hub_mixin")
|
746 |
+
kwargs["tags"] = tags
|
747 |
+
return super().__init_subclass__(*args, **kwargs)
|
748 |
+
|
749 |
+
def _save_pretrained(self, save_directory: Path) -> None:
|
750 |
+
"""Save weights from a Pytorch model to a local directory."""
|
751 |
+
model_to_save = self.module if hasattr(self, "module") else self # type: ignore
|
752 |
+
save_model_as_safetensor(model_to_save, str(save_directory / constants.SAFETENSORS_SINGLE_FILE))
|
753 |
+
|
754 |
+
@classmethod
|
755 |
+
def _from_pretrained(
|
756 |
+
cls,
|
757 |
+
*,
|
758 |
+
model_id: str,
|
759 |
+
revision: Optional[str],
|
760 |
+
cache_dir: Optional[Union[str, Path]],
|
761 |
+
force_download: bool,
|
762 |
+
proxies: Optional[Dict],
|
763 |
+
resume_download: Optional[bool],
|
764 |
+
local_files_only: bool,
|
765 |
+
token: Union[str, bool, None],
|
766 |
+
map_location: str = "cpu",
|
767 |
+
strict: bool = False,
|
768 |
+
**model_kwargs,
|
769 |
+
):
|
770 |
+
"""Load Pytorch pretrained weights and return the loaded model."""
|
771 |
+
model = cls(**model_kwargs)
|
772 |
+
if os.path.isdir(model_id):
|
773 |
+
print("Loading weights from local directory")
|
774 |
+
model_file = os.path.join(model_id, constants.SAFETENSORS_SINGLE_FILE)
|
775 |
+
return cls._load_as_safetensor(model, model_file, map_location, strict)
|
776 |
+
else:
|
777 |
+
try:
|
778 |
+
model_file = hf_hub_download(
|
779 |
+
repo_id=model_id,
|
780 |
+
filename=constants.SAFETENSORS_SINGLE_FILE,
|
781 |
+
revision=revision,
|
782 |
+
cache_dir=cache_dir,
|
783 |
+
force_download=force_download,
|
784 |
+
proxies=proxies,
|
785 |
+
resume_download=resume_download,
|
786 |
+
token=token,
|
787 |
+
local_files_only=local_files_only,
|
788 |
+
)
|
789 |
+
return cls._load_as_safetensor(model, model_file, map_location, strict)
|
790 |
+
except EntryNotFoundError:
|
791 |
+
model_file = hf_hub_download(
|
792 |
+
repo_id=model_id,
|
793 |
+
filename=constants.PYTORCH_WEIGHTS_NAME,
|
794 |
+
revision=revision,
|
795 |
+
cache_dir=cache_dir,
|
796 |
+
force_download=force_download,
|
797 |
+
proxies=proxies,
|
798 |
+
resume_download=resume_download,
|
799 |
+
token=token,
|
800 |
+
local_files_only=local_files_only,
|
801 |
+
)
|
802 |
+
return cls._load_as_pickle(model, model_file, map_location, strict)
|
803 |
+
|
804 |
+
@classmethod
|
805 |
+
def _load_as_pickle(cls, model: T, model_file: str, map_location: str, strict: bool) -> T:
|
806 |
+
state_dict = torch.load(model_file, map_location=torch.device(map_location), weights_only=True)
|
807 |
+
model.load_state_dict(state_dict, strict=strict) # type: ignore
|
808 |
+
model.eval() # type: ignore
|
809 |
+
return model
|
810 |
+
|
811 |
+
@classmethod
|
812 |
+
def _load_as_safetensor(cls, model: T, model_file: str, map_location: str, strict: bool) -> T:
|
813 |
+
if packaging.version.parse(safetensors.__version__) < packaging.version.parse("0.4.3"): # type: ignore [attr-defined]
|
814 |
+
load_model_as_safetensor(model, model_file, strict=strict) # type: ignore [arg-type]
|
815 |
+
if map_location != "cpu":
|
816 |
+
logger.warning(
|
817 |
+
"Loading model weights on other devices than 'cpu' is not supported natively in your version of safetensors."
|
818 |
+
" This means that the model is loaded on 'cpu' first and then copied to the device."
|
819 |
+
" This leads to a slower loading time."
|
820 |
+
" Please update safetensors to version 0.4.3 or above for improved performance."
|
821 |
+
)
|
822 |
+
model.to(map_location) # type: ignore [attr-defined]
|
823 |
+
else:
|
824 |
+
safetensors.torch.load_model(model, model_file, strict=strict, device=map_location) # type: ignore [arg-type]
|
825 |
+
return model
|
826 |
+
|
827 |
+
|
828 |
+
def _load_dataclass(datacls: Type["DataclassInstance"], data: dict) -> "DataclassInstance":
|
829 |
+
"""Load a dataclass instance from a dictionary.
|
830 |
+
|
831 |
+
Fields not expected by the dataclass are ignored.
|
832 |
+
"""
|
833 |
+
return datacls(**{k: v for k, v in data.items() if k in datacls.__dataclass_fields__})
|
meow/lib/python3.13/site-packages/huggingface_hub/keras_mixin.py
ADDED
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import collections.abc as collections
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import warnings
|
5 |
+
from functools import wraps
|
6 |
+
from pathlib import Path
|
7 |
+
from shutil import copytree
|
8 |
+
from typing import Any, Dict, List, Optional, Union
|
9 |
+
|
10 |
+
from huggingface_hub import ModelHubMixin, snapshot_download
|
11 |
+
from huggingface_hub.utils import (
|
12 |
+
get_tf_version,
|
13 |
+
is_graphviz_available,
|
14 |
+
is_pydot_available,
|
15 |
+
is_tf_available,
|
16 |
+
yaml_dump,
|
17 |
+
)
|
18 |
+
|
19 |
+
from . import constants
|
20 |
+
from .hf_api import HfApi
|
21 |
+
from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
|
22 |
+
from .utils._typing import CallableT
|
23 |
+
|
24 |
+
|
25 |
+
logger = logging.get_logger(__name__)
|
26 |
+
|
27 |
+
keras = None
|
28 |
+
if is_tf_available():
|
29 |
+
# Depending on which version of TensorFlow is installed, we need to import
|
30 |
+
# keras from the correct location.
|
31 |
+
# See https://github.com/tensorflow/tensorflow/releases/tag/v2.16.1.
|
32 |
+
# Note: saving a keras model only works with Keras<3.0.
|
33 |
+
try:
|
34 |
+
import tf_keras as keras # type: ignore
|
35 |
+
except ImportError:
|
36 |
+
import tensorflow as tf # type: ignore
|
37 |
+
|
38 |
+
keras = tf.keras
|
39 |
+
|
40 |
+
|
41 |
+
def _requires_keras_2_model(fn: CallableT) -> CallableT:
|
42 |
+
# Wrapper to raise if user tries to save a Keras 3.x model
|
43 |
+
@wraps(fn)
|
44 |
+
def _inner(model, *args, **kwargs):
|
45 |
+
if not hasattr(model, "history"): # hacky way to check if model is Keras 2.x
|
46 |
+
raise NotImplementedError(
|
47 |
+
f"Cannot use '{fn.__name__}': Keras 3.x is not supported."
|
48 |
+
" Please save models manually and upload them using `upload_folder` or `huggingface-cli upload`."
|
49 |
+
)
|
50 |
+
return fn(model, *args, **kwargs)
|
51 |
+
|
52 |
+
return _inner # type: ignore [return-value]
|
53 |
+
|
54 |
+
|
55 |
+
def _flatten_dict(dictionary, parent_key=""):
|
56 |
+
"""Flatten a nested dictionary.
|
57 |
+
Reference: https://stackoverflow.com/a/6027615/10319735
|
58 |
+
|
59 |
+
Args:
|
60 |
+
dictionary (`dict`):
|
61 |
+
The nested dictionary to be flattened.
|
62 |
+
parent_key (`str`):
|
63 |
+
The parent key to be prefixed to the children keys.
|
64 |
+
Necessary for recursing over the nested dictionary.
|
65 |
+
|
66 |
+
Returns:
|
67 |
+
The flattened dictionary.
|
68 |
+
"""
|
69 |
+
items = []
|
70 |
+
for key, value in dictionary.items():
|
71 |
+
new_key = f"{parent_key}.{key}" if parent_key else key
|
72 |
+
if isinstance(value, collections.MutableMapping):
|
73 |
+
items.extend(
|
74 |
+
_flatten_dict(
|
75 |
+
value,
|
76 |
+
new_key,
|
77 |
+
).items()
|
78 |
+
)
|
79 |
+
else:
|
80 |
+
items.append((new_key, value))
|
81 |
+
return dict(items)
|
82 |
+
|
83 |
+
|
84 |
+
def _create_hyperparameter_table(model):
|
85 |
+
"""Parse hyperparameter dictionary into a markdown table."""
|
86 |
+
table = None
|
87 |
+
if model.optimizer is not None:
|
88 |
+
optimizer_params = model.optimizer.get_config()
|
89 |
+
# flatten the configuration
|
90 |
+
optimizer_params = _flatten_dict(optimizer_params)
|
91 |
+
optimizer_params["training_precision"] = keras.mixed_precision.global_policy().name
|
92 |
+
table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
|
93 |
+
for key, value in optimizer_params.items():
|
94 |
+
table += f"| {key} | {value} |\n"
|
95 |
+
return table
|
96 |
+
|
97 |
+
|
98 |
+
def _plot_network(model, save_directory):
|
99 |
+
keras.utils.plot_model(
|
100 |
+
model,
|
101 |
+
to_file=f"{save_directory}/model.png",
|
102 |
+
show_shapes=False,
|
103 |
+
show_dtype=False,
|
104 |
+
show_layer_names=True,
|
105 |
+
rankdir="TB",
|
106 |
+
expand_nested=False,
|
107 |
+
dpi=96,
|
108 |
+
layer_range=None,
|
109 |
+
)
|
110 |
+
|
111 |
+
|
112 |
+
def _create_model_card(
|
113 |
+
model,
|
114 |
+
repo_dir: Path,
|
115 |
+
plot_model: bool = True,
|
116 |
+
metadata: Optional[dict] = None,
|
117 |
+
):
|
118 |
+
"""
|
119 |
+
Creates a model card for the repository.
|
120 |
+
|
121 |
+
Do not overwrite an existing README.md file.
|
122 |
+
"""
|
123 |
+
readme_path = repo_dir / "README.md"
|
124 |
+
if readme_path.exists():
|
125 |
+
return
|
126 |
+
|
127 |
+
hyperparameters = _create_hyperparameter_table(model)
|
128 |
+
if plot_model and is_graphviz_available() and is_pydot_available():
|
129 |
+
_plot_network(model, repo_dir)
|
130 |
+
if metadata is None:
|
131 |
+
metadata = {}
|
132 |
+
metadata["library_name"] = "keras"
|
133 |
+
model_card: str = "---\n"
|
134 |
+
model_card += yaml_dump(metadata, default_flow_style=False)
|
135 |
+
model_card += "---\n"
|
136 |
+
model_card += "\n## Model description\n\nMore information needed\n"
|
137 |
+
model_card += "\n## Intended uses & limitations\n\nMore information needed\n"
|
138 |
+
model_card += "\n## Training and evaluation data\n\nMore information needed\n"
|
139 |
+
if hyperparameters is not None:
|
140 |
+
model_card += "\n## Training procedure\n"
|
141 |
+
model_card += "\n### Training hyperparameters\n"
|
142 |
+
model_card += "\nThe following hyperparameters were used during training:\n\n"
|
143 |
+
model_card += hyperparameters
|
144 |
+
model_card += "\n"
|
145 |
+
if plot_model and os.path.exists(f"{repo_dir}/model.png"):
|
146 |
+
model_card += "\n ## Model Plot\n"
|
147 |
+
model_card += "\n<details>"
|
148 |
+
model_card += "\n<summary>View Model Plot</summary>\n"
|
149 |
+
path_to_plot = "./model.png"
|
150 |
+
model_card += f"\n![Model Image]({path_to_plot})\n"
|
151 |
+
model_card += "\n</details>"
|
152 |
+
|
153 |
+
readme_path.write_text(model_card)
|
154 |
+
|
155 |
+
|
156 |
+
@_requires_keras_2_model
|
157 |
+
def save_pretrained_keras(
|
158 |
+
model,
|
159 |
+
save_directory: Union[str, Path],
|
160 |
+
config: Optional[Dict[str, Any]] = None,
|
161 |
+
include_optimizer: bool = False,
|
162 |
+
plot_model: bool = True,
|
163 |
+
tags: Optional[Union[list, str]] = None,
|
164 |
+
**model_save_kwargs,
|
165 |
+
):
|
166 |
+
"""
|
167 |
+
Saves a Keras model to save_directory in SavedModel format. Use this if
|
168 |
+
you're using the Functional or Sequential APIs.
|
169 |
+
|
170 |
+
Args:
|
171 |
+
model (`Keras.Model`):
|
172 |
+
The [Keras
|
173 |
+
model](https://www.tensorflow.org/api_docs/python/tf/keras/Model)
|
174 |
+
you'd like to save. The model must be compiled and built.
|
175 |
+
save_directory (`str` or `Path`):
|
176 |
+
Specify directory in which you want to save the Keras model.
|
177 |
+
config (`dict`, *optional*):
|
178 |
+
Configuration object to be saved alongside the model weights.
|
179 |
+
include_optimizer(`bool`, *optional*, defaults to `False`):
|
180 |
+
Whether or not to include optimizer in serialization.
|
181 |
+
plot_model (`bool`, *optional*, defaults to `True`):
|
182 |
+
Setting this to `True` will plot the model and put it in the model
|
183 |
+
card. Requires graphviz and pydot to be installed.
|
184 |
+
tags (Union[`str`,`list`], *optional*):
|
185 |
+
List of tags that are related to model or string of a single tag. See example tags
|
186 |
+
[here](https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1).
|
187 |
+
model_save_kwargs(`dict`, *optional*):
|
188 |
+
model_save_kwargs will be passed to
|
189 |
+
[`tf.keras.models.save_model()`](https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model).
|
190 |
+
"""
|
191 |
+
if keras is None:
|
192 |
+
raise ImportError("Called a Tensorflow-specific function but could not import it.")
|
193 |
+
|
194 |
+
if not model.built:
|
195 |
+
raise ValueError("Model should be built before trying to save")
|
196 |
+
|
197 |
+
save_directory = Path(save_directory)
|
198 |
+
save_directory.mkdir(parents=True, exist_ok=True)
|
199 |
+
|
200 |
+
# saving config
|
201 |
+
if config:
|
202 |
+
if not isinstance(config, dict):
|
203 |
+
raise RuntimeError(f"Provided config to save_pretrained_keras should be a dict. Got: '{type(config)}'")
|
204 |
+
|
205 |
+
with (save_directory / constants.CONFIG_NAME).open("w") as f:
|
206 |
+
json.dump(config, f)
|
207 |
+
|
208 |
+
metadata = {}
|
209 |
+
if isinstance(tags, list):
|
210 |
+
metadata["tags"] = tags
|
211 |
+
elif isinstance(tags, str):
|
212 |
+
metadata["tags"] = [tags]
|
213 |
+
|
214 |
+
task_name = model_save_kwargs.pop("task_name", None)
|
215 |
+
if task_name is not None:
|
216 |
+
warnings.warn(
|
217 |
+
"`task_name` input argument is deprecated. Pass `tags` instead.",
|
218 |
+
FutureWarning,
|
219 |
+
)
|
220 |
+
if "tags" in metadata:
|
221 |
+
metadata["tags"].append(task_name)
|
222 |
+
else:
|
223 |
+
metadata["tags"] = [task_name]
|
224 |
+
|
225 |
+
if model.history is not None:
|
226 |
+
if model.history.history != {}:
|
227 |
+
path = save_directory / "history.json"
|
228 |
+
if path.exists():
|
229 |
+
warnings.warn(
|
230 |
+
"`history.json` file already exists, it will be overwritten by the history of this version.",
|
231 |
+
UserWarning,
|
232 |
+
)
|
233 |
+
with path.open("w", encoding="utf-8") as f:
|
234 |
+
json.dump(model.history.history, f, indent=2, sort_keys=True)
|
235 |
+
|
236 |
+
_create_model_card(model, save_directory, plot_model, metadata)
|
237 |
+
keras.models.save_model(model, save_directory, include_optimizer=include_optimizer, **model_save_kwargs)
|
238 |
+
|
239 |
+
|
240 |
+
def from_pretrained_keras(*args, **kwargs) -> "KerasModelHubMixin":
|
241 |
+
r"""
|
242 |
+
Instantiate a pretrained Keras model from a pre-trained model from the Hub.
|
243 |
+
The model is expected to be in `SavedModel` format.
|
244 |
+
|
245 |
+
Args:
|
246 |
+
pretrained_model_name_or_path (`str` or `os.PathLike`):
|
247 |
+
Can be either:
|
248 |
+
- A string, the `model id` of a pretrained model hosted inside a
|
249 |
+
model repo on huggingface.co. Valid model ids can be located
|
250 |
+
at the root-level, like `bert-base-uncased`, or namespaced
|
251 |
+
under a user or organization name, like
|
252 |
+
`dbmdz/bert-base-german-cased`.
|
253 |
+
- You can add `revision` by appending `@` at the end of model_id
|
254 |
+
simply like this: `dbmdz/bert-base-german-cased@main` Revision
|
255 |
+
is the specific model version to use. It can be a branch name,
|
256 |
+
a tag name, or a commit id, since we use a git-based system
|
257 |
+
for storing models and other artifacts on huggingface.co, so
|
258 |
+
`revision` can be any identifier allowed by git.
|
259 |
+
- A path to a `directory` containing model weights saved using
|
260 |
+
[`~transformers.PreTrainedModel.save_pretrained`], e.g.,
|
261 |
+
`./my_model_directory/`.
|
262 |
+
- `None` if you are both providing the configuration and state
|
263 |
+
dictionary (resp. with keyword arguments `config` and
|
264 |
+
`state_dict`).
|
265 |
+
force_download (`bool`, *optional*, defaults to `False`):
|
266 |
+
Whether to force the (re-)download of the model weights and
|
267 |
+
configuration files, overriding the cached versions if they exist.
|
268 |
+
proxies (`Dict[str, str]`, *optional*):
|
269 |
+
A dictionary of proxy servers to use by protocol or endpoint, e.g.,
|
270 |
+
`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The
|
271 |
+
proxies are used on each request.
|
272 |
+
token (`str` or `bool`, *optional*):
|
273 |
+
The token to use as HTTP bearer authorization for remote files. If
|
274 |
+
`True`, will use the token generated when running `transformers-cli
|
275 |
+
login` (stored in `~/.huggingface`).
|
276 |
+
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
277 |
+
Path to a directory in which a downloaded pretrained model
|
278 |
+
configuration should be cached if the standard cache should not be
|
279 |
+
used.
|
280 |
+
local_files_only(`bool`, *optional*, defaults to `False`):
|
281 |
+
Whether to only look at local files (i.e., do not try to download
|
282 |
+
the model).
|
283 |
+
model_kwargs (`Dict`, *optional*):
|
284 |
+
model_kwargs will be passed to the model during initialization
|
285 |
+
|
286 |
+
<Tip>
|
287 |
+
|
288 |
+
Passing `token=True` is required when you want to use a private
|
289 |
+
model.
|
290 |
+
|
291 |
+
</Tip>
|
292 |
+
"""
|
293 |
+
return KerasModelHubMixin.from_pretrained(*args, **kwargs)
|
294 |
+
|
295 |
+
|
296 |
+
@validate_hf_hub_args
|
297 |
+
@_requires_keras_2_model
|
298 |
+
def push_to_hub_keras(
|
299 |
+
model,
|
300 |
+
repo_id: str,
|
301 |
+
*,
|
302 |
+
config: Optional[dict] = None,
|
303 |
+
commit_message: str = "Push Keras model using huggingface_hub.",
|
304 |
+
private: Optional[bool] = None,
|
305 |
+
api_endpoint: Optional[str] = None,
|
306 |
+
token: Optional[str] = None,
|
307 |
+
branch: Optional[str] = None,
|
308 |
+
create_pr: Optional[bool] = None,
|
309 |
+
allow_patterns: Optional[Union[List[str], str]] = None,
|
310 |
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
311 |
+
delete_patterns: Optional[Union[List[str], str]] = None,
|
312 |
+
log_dir: Optional[str] = None,
|
313 |
+
include_optimizer: bool = False,
|
314 |
+
tags: Optional[Union[list, str]] = None,
|
315 |
+
plot_model: bool = True,
|
316 |
+
**model_save_kwargs,
|
317 |
+
):
|
318 |
+
"""
|
319 |
+
Upload model checkpoint to the Hub.
|
320 |
+
|
321 |
+
Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
|
322 |
+
`delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
|
323 |
+
details.
|
324 |
+
|
325 |
+
Args:
|
326 |
+
model (`Keras.Model`):
|
327 |
+
The [Keras model](`https://www.tensorflow.org/api_docs/python/tf/keras/Model`) you'd like to push to the
|
328 |
+
Hub. The model must be compiled and built.
|
329 |
+
repo_id (`str`):
|
330 |
+
ID of the repository to push to (example: `"username/my-model"`).
|
331 |
+
commit_message (`str`, *optional*, defaults to "Add Keras model"):
|
332 |
+
Message to commit while pushing.
|
333 |
+
private (`bool`, *optional*):
|
334 |
+
Whether the repository created should be private.
|
335 |
+
If `None` (default), the repo will be public unless the organization's default is private.
|
336 |
+
api_endpoint (`str`, *optional*):
|
337 |
+
The API endpoint to use when pushing the model to the hub.
|
338 |
+
token (`str`, *optional*):
|
339 |
+
The token to use as HTTP bearer authorization for remote files. If
|
340 |
+
not set, will use the token set when logging in with
|
341 |
+
`huggingface-cli login` (stored in `~/.huggingface`).
|
342 |
+
branch (`str`, *optional*):
|
343 |
+
The git branch on which to push the model. This defaults to
|
344 |
+
the default branch as specified in your repository, which
|
345 |
+
defaults to `"main"`.
|
346 |
+
create_pr (`boolean`, *optional*):
|
347 |
+
Whether or not to create a Pull Request from `branch` with that commit.
|
348 |
+
Defaults to `False`.
|
349 |
+
config (`dict`, *optional*):
|
350 |
+
Configuration object to be saved alongside the model weights.
|
351 |
+
allow_patterns (`List[str]` or `str`, *optional*):
|
352 |
+
If provided, only files matching at least one pattern are pushed.
|
353 |
+
ignore_patterns (`List[str]` or `str`, *optional*):
|
354 |
+
If provided, files matching any of the patterns are not pushed.
|
355 |
+
delete_patterns (`List[str]` or `str`, *optional*):
|
356 |
+
If provided, remote files matching any of the patterns will be deleted from the repo.
|
357 |
+
log_dir (`str`, *optional*):
|
358 |
+
TensorBoard logging directory to be pushed. The Hub automatically
|
359 |
+
hosts and displays a TensorBoard instance if log files are included
|
360 |
+
in the repository.
|
361 |
+
include_optimizer (`bool`, *optional*, defaults to `False`):
|
362 |
+
Whether or not to include optimizer during serialization.
|
363 |
+
tags (Union[`list`, `str`], *optional*):
|
364 |
+
List of tags that are related to model or string of a single tag. See example tags
|
365 |
+
[here](https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1).
|
366 |
+
plot_model (`bool`, *optional*, defaults to `True`):
|
367 |
+
Setting this to `True` will plot the model and put it in the model
|
368 |
+
card. Requires graphviz and pydot to be installed.
|
369 |
+
model_save_kwargs(`dict`, *optional*):
|
370 |
+
model_save_kwargs will be passed to
|
371 |
+
[`tf.keras.models.save_model()`](https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model).
|
372 |
+
|
373 |
+
Returns:
|
374 |
+
The url of the commit of your model in the given repository.
|
375 |
+
"""
|
376 |
+
api = HfApi(endpoint=api_endpoint)
|
377 |
+
repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id
|
378 |
+
|
379 |
+
# Push the files to the repo in a single commit
|
380 |
+
with SoftTemporaryDirectory() as tmp:
|
381 |
+
saved_path = Path(tmp) / repo_id
|
382 |
+
save_pretrained_keras(
|
383 |
+
model,
|
384 |
+
saved_path,
|
385 |
+
config=config,
|
386 |
+
include_optimizer=include_optimizer,
|
387 |
+
tags=tags,
|
388 |
+
plot_model=plot_model,
|
389 |
+
**model_save_kwargs,
|
390 |
+
)
|
391 |
+
|
392 |
+
# If `log_dir` provided, delete remote logs and upload new ones
|
393 |
+
if log_dir is not None:
|
394 |
+
delete_patterns = (
|
395 |
+
[]
|
396 |
+
if delete_patterns is None
|
397 |
+
else (
|
398 |
+
[delete_patterns] # convert `delete_patterns` to a list
|
399 |
+
if isinstance(delete_patterns, str)
|
400 |
+
else delete_patterns
|
401 |
+
)
|
402 |
+
)
|
403 |
+
delete_patterns.append("logs/*")
|
404 |
+
copytree(log_dir, saved_path / "logs")
|
405 |
+
|
406 |
+
return api.upload_folder(
|
407 |
+
repo_type="model",
|
408 |
+
repo_id=repo_id,
|
409 |
+
folder_path=saved_path,
|
410 |
+
commit_message=commit_message,
|
411 |
+
token=token,
|
412 |
+
revision=branch,
|
413 |
+
create_pr=create_pr,
|
414 |
+
allow_patterns=allow_patterns,
|
415 |
+
ignore_patterns=ignore_patterns,
|
416 |
+
delete_patterns=delete_patterns,
|
417 |
+
)
|
418 |
+
|
419 |
+
|
420 |
+
class KerasModelHubMixin(ModelHubMixin):
|
421 |
+
"""
|
422 |
+
Implementation of [`ModelHubMixin`] to provide model Hub upload/download
|
423 |
+
capabilities to Keras models.
|
424 |
+
|
425 |
+
|
426 |
+
```python
|
427 |
+
>>> import tensorflow as tf
|
428 |
+
>>> from huggingface_hub import KerasModelHubMixin
|
429 |
+
|
430 |
+
|
431 |
+
>>> class MyModel(tf.keras.Model, KerasModelHubMixin):
|
432 |
+
... def __init__(self, **kwargs):
|
433 |
+
... super().__init__()
|
434 |
+
... self.config = kwargs.pop("config", None)
|
435 |
+
... self.dummy_inputs = ...
|
436 |
+
... self.layer = ...
|
437 |
+
|
438 |
+
... def call(self, *args):
|
439 |
+
... return ...
|
440 |
+
|
441 |
+
|
442 |
+
>>> # Initialize and compile the model as you normally would
|
443 |
+
>>> model = MyModel()
|
444 |
+
>>> model.compile(...)
|
445 |
+
>>> # Build the graph by training it or passing dummy inputs
|
446 |
+
>>> _ = model(model.dummy_inputs)
|
447 |
+
>>> # Save model weights to local directory
|
448 |
+
>>> model.save_pretrained("my-awesome-model")
|
449 |
+
>>> # Push model weights to the Hub
|
450 |
+
>>> model.push_to_hub("my-awesome-model")
|
451 |
+
>>> # Download and initialize weights from the Hub
|
452 |
+
>>> model = MyModel.from_pretrained("username/super-cool-model")
|
453 |
+
```
|
454 |
+
"""
|
455 |
+
|
456 |
+
def _save_pretrained(self, save_directory):
|
457 |
+
save_pretrained_keras(self, save_directory)
|
458 |
+
|
459 |
+
@classmethod
|
460 |
+
def _from_pretrained(
|
461 |
+
cls,
|
462 |
+
model_id,
|
463 |
+
revision,
|
464 |
+
cache_dir,
|
465 |
+
force_download,
|
466 |
+
proxies,
|
467 |
+
resume_download,
|
468 |
+
local_files_only,
|
469 |
+
token,
|
470 |
+
config: Optional[Dict[str, Any]] = None,
|
471 |
+
**model_kwargs,
|
472 |
+
):
|
473 |
+
"""Here we just call [`from_pretrained_keras`] function so both the mixin and
|
474 |
+
functional APIs stay in sync.
|
475 |
+
|
476 |
+
TODO - Some args above aren't used since we are calling
|
477 |
+
snapshot_download instead of hf_hub_download.
|
478 |
+
"""
|
479 |
+
if keras is None:
|
480 |
+
raise ImportError("Called a TensorFlow-specific function but could not import it.")
|
481 |
+
|
482 |
+
# Root is either a local filepath matching model_id or a cached snapshot
|
483 |
+
if not os.path.isdir(model_id):
|
484 |
+
storage_folder = snapshot_download(
|
485 |
+
repo_id=model_id,
|
486 |
+
revision=revision,
|
487 |
+
cache_dir=cache_dir,
|
488 |
+
library_name="keras",
|
489 |
+
library_version=get_tf_version(),
|
490 |
+
)
|
491 |
+
else:
|
492 |
+
storage_folder = model_id
|
493 |
+
|
494 |
+
# TODO: change this in a future PR. We are not returning a KerasModelHubMixin instance here...
|
495 |
+
model = keras.models.load_model(storage_folder)
|
496 |
+
|
497 |
+
# For now, we add a new attribute, config, to store the config loaded from the hub/a local dir.
|
498 |
+
model.config = config
|
499 |
+
|
500 |
+
return model
|
meow/lib/python3.13/site-packages/huggingface_hub/repocard.py
ADDED
@@ -0,0 +1,830 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Any, Dict, Literal, Optional, Type, Union
|
5 |
+
|
6 |
+
import requests
|
7 |
+
import yaml
|
8 |
+
|
9 |
+
from huggingface_hub.file_download import hf_hub_download
|
10 |
+
from huggingface_hub.hf_api import upload_file
|
11 |
+
from huggingface_hub.repocard_data import (
|
12 |
+
CardData,
|
13 |
+
DatasetCardData,
|
14 |
+
EvalResult,
|
15 |
+
ModelCardData,
|
16 |
+
SpaceCardData,
|
17 |
+
eval_results_to_model_index,
|
18 |
+
model_index_to_eval_results,
|
19 |
+
)
|
20 |
+
from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
|
21 |
+
|
22 |
+
from . import constants
|
23 |
+
from .errors import EntryNotFoundError
|
24 |
+
from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
|
25 |
+
|
26 |
+
|
27 |
+
logger = logging.get_logger(__name__)
|
28 |
+
|
29 |
+
|
30 |
+
TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md"
|
31 |
+
TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md"
|
32 |
+
|
33 |
+
# exact same regex as in the Hub server. Please keep in sync.
|
34 |
+
# See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18
|
35 |
+
REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))")
|
36 |
+
|
37 |
+
|
38 |
+
class RepoCard:
|
39 |
+
card_data_class = CardData
|
40 |
+
default_template_path = TEMPLATE_MODELCARD_PATH
|
41 |
+
repo_type = "model"
|
42 |
+
|
43 |
+
def __init__(self, content: str, ignore_metadata_errors: bool = False):
|
44 |
+
"""Initialize a RepoCard from string content. The content should be a
|
45 |
+
Markdown file with a YAML block at the beginning and a Markdown body.
|
46 |
+
|
47 |
+
Args:
|
48 |
+
content (`str`): The content of the Markdown file.
|
49 |
+
|
50 |
+
Example:
|
51 |
+
```python
|
52 |
+
>>> from huggingface_hub.repocard import RepoCard
|
53 |
+
>>> text = '''
|
54 |
+
... ---
|
55 |
+
... language: en
|
56 |
+
... license: mit
|
57 |
+
... ---
|
58 |
+
...
|
59 |
+
... # My repo
|
60 |
+
... '''
|
61 |
+
>>> card = RepoCard(text)
|
62 |
+
>>> card.data.to_dict()
|
63 |
+
{'language': 'en', 'license': 'mit'}
|
64 |
+
>>> card.text
|
65 |
+
'\\n# My repo\\n'
|
66 |
+
|
67 |
+
```
|
68 |
+
<Tip>
|
69 |
+
Raises the following error:
|
70 |
+
|
71 |
+
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
72 |
+
when the content of the repo card metadata is not a dictionary.
|
73 |
+
|
74 |
+
</Tip>
|
75 |
+
"""
|
76 |
+
|
77 |
+
# Set the content of the RepoCard, as well as underlying .data and .text attributes.
|
78 |
+
# See the `content` property setter for more details.
|
79 |
+
self.ignore_metadata_errors = ignore_metadata_errors
|
80 |
+
self.content = content
|
81 |
+
|
82 |
+
@property
|
83 |
+
def content(self):
|
84 |
+
"""The content of the RepoCard, including the YAML block and the Markdown body."""
|
85 |
+
line_break = _detect_line_ending(self._content) or "\n"
|
86 |
+
return f"---{line_break}{self.data.to_yaml(line_break=line_break, original_order=self._original_order)}{line_break}---{line_break}{self.text}"
|
87 |
+
|
88 |
+
@content.setter
|
89 |
+
def content(self, content: str):
|
90 |
+
"""Set the content of the RepoCard."""
|
91 |
+
self._content = content
|
92 |
+
|
93 |
+
match = REGEX_YAML_BLOCK.search(content)
|
94 |
+
if match:
|
95 |
+
# Metadata found in the YAML block
|
96 |
+
yaml_block = match.group(2)
|
97 |
+
self.text = content[match.end() :]
|
98 |
+
data_dict = yaml.safe_load(yaml_block)
|
99 |
+
|
100 |
+
if data_dict is None:
|
101 |
+
data_dict = {}
|
102 |
+
|
103 |
+
# The YAML block's data should be a dictionary
|
104 |
+
if not isinstance(data_dict, dict):
|
105 |
+
raise ValueError("repo card metadata block should be a dict")
|
106 |
+
else:
|
107 |
+
# Model card without metadata... create empty metadata
|
108 |
+
logger.warning("Repo card metadata block was not found. Setting CardData to empty.")
|
109 |
+
data_dict = {}
|
110 |
+
self.text = content
|
111 |
+
|
112 |
+
self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors)
|
113 |
+
self._original_order = list(data_dict.keys())
|
114 |
+
|
115 |
+
def __str__(self):
|
116 |
+
return self.content
|
117 |
+
|
118 |
+
def save(self, filepath: Union[Path, str]):
|
119 |
+
r"""Save a RepoCard to a file.
|
120 |
+
|
121 |
+
Args:
|
122 |
+
filepath (`Union[Path, str]`): Filepath to the markdown file to save.
|
123 |
+
|
124 |
+
Example:
|
125 |
+
```python
|
126 |
+
>>> from huggingface_hub.repocard import RepoCard
|
127 |
+
>>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card")
|
128 |
+
>>> card.save("/tmp/test.md")
|
129 |
+
|
130 |
+
```
|
131 |
+
"""
|
132 |
+
filepath = Path(filepath)
|
133 |
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
134 |
+
# Preserve newlines as in the existing file.
|
135 |
+
with open(filepath, mode="w", newline="", encoding="utf-8") as f:
|
136 |
+
f.write(str(self))
|
137 |
+
|
138 |
+
@classmethod
|
139 |
+
def load(
|
140 |
+
cls,
|
141 |
+
repo_id_or_path: Union[str, Path],
|
142 |
+
repo_type: Optional[str] = None,
|
143 |
+
token: Optional[str] = None,
|
144 |
+
ignore_metadata_errors: bool = False,
|
145 |
+
):
|
146 |
+
"""Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath.
|
147 |
+
|
148 |
+
Args:
|
149 |
+
repo_id_or_path (`Union[str, Path]`):
|
150 |
+
The repo ID associated with a Hugging Face Hub repo or a local filepath.
|
151 |
+
repo_type (`str`, *optional*):
|
152 |
+
The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options
|
153 |
+
are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
|
154 |
+
class, the default value will be the child class's `repo_type`.
|
155 |
+
token (`str`, *optional*):
|
156 |
+
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token.
|
157 |
+
ignore_metadata_errors (`str`):
|
158 |
+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
159 |
+
the process. Use it at your own risk.
|
160 |
+
|
161 |
+
Returns:
|
162 |
+
[`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's
|
163 |
+
README.md file or filepath.
|
164 |
+
|
165 |
+
Example:
|
166 |
+
```python
|
167 |
+
>>> from huggingface_hub.repocard import RepoCard
|
168 |
+
>>> card = RepoCard.load("nateraw/food")
|
169 |
+
>>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"]
|
170 |
+
|
171 |
+
```
|
172 |
+
"""
|
173 |
+
|
174 |
+
if Path(repo_id_or_path).exists():
|
175 |
+
card_path = Path(repo_id_or_path)
|
176 |
+
elif isinstance(repo_id_or_path, str):
|
177 |
+
card_path = Path(
|
178 |
+
hf_hub_download(
|
179 |
+
repo_id_or_path,
|
180 |
+
constants.REPOCARD_NAME,
|
181 |
+
repo_type=repo_type or cls.repo_type,
|
182 |
+
token=token,
|
183 |
+
)
|
184 |
+
)
|
185 |
+
else:
|
186 |
+
raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).")
|
187 |
+
|
188 |
+
# Preserve newlines in the existing file.
|
189 |
+
with card_path.open(mode="r", newline="", encoding="utf-8") as f:
|
190 |
+
return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors)
|
191 |
+
|
192 |
+
def validate(self, repo_type: Optional[str] = None):
|
193 |
+
"""Validates card against Hugging Face Hub's card validation logic.
|
194 |
+
Using this function requires access to the internet, so it is only called
|
195 |
+
internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`].
|
196 |
+
|
197 |
+
Args:
|
198 |
+
repo_type (`str`, *optional*, defaults to "model"):
|
199 |
+
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space".
|
200 |
+
If this function is called from a child class, the default will be the child class's `repo_type`.
|
201 |
+
|
202 |
+
<Tip>
|
203 |
+
Raises the following errors:
|
204 |
+
|
205 |
+
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
206 |
+
if the card fails validation checks.
|
207 |
+
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
|
208 |
+
if the request to the Hub API fails for any other reason.
|
209 |
+
|
210 |
+
</Tip>
|
211 |
+
"""
|
212 |
+
|
213 |
+
# If repo type is provided, otherwise, use the repo type of the card.
|
214 |
+
repo_type = repo_type or self.repo_type
|
215 |
+
|
216 |
+
body = {
|
217 |
+
"repoType": repo_type,
|
218 |
+
"content": str(self),
|
219 |
+
}
|
220 |
+
headers = {"Accept": "text/plain"}
|
221 |
+
|
222 |
+
try:
|
223 |
+
r = get_session().post("https://huggingface.co/api/validate-yaml", body, headers=headers)
|
224 |
+
r.raise_for_status()
|
225 |
+
except requests.exceptions.HTTPError as exc:
|
226 |
+
if r.status_code == 400:
|
227 |
+
raise ValueError(r.text)
|
228 |
+
else:
|
229 |
+
raise exc
|
230 |
+
|
231 |
+
def push_to_hub(
|
232 |
+
self,
|
233 |
+
repo_id: str,
|
234 |
+
token: Optional[str] = None,
|
235 |
+
repo_type: Optional[str] = None,
|
236 |
+
commit_message: Optional[str] = None,
|
237 |
+
commit_description: Optional[str] = None,
|
238 |
+
revision: Optional[str] = None,
|
239 |
+
create_pr: Optional[bool] = None,
|
240 |
+
parent_commit: Optional[str] = None,
|
241 |
+
):
|
242 |
+
"""Push a RepoCard to a Hugging Face Hub repo.
|
243 |
+
|
244 |
+
Args:
|
245 |
+
repo_id (`str`):
|
246 |
+
The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food".
|
247 |
+
token (`str`, *optional*):
|
248 |
+
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to
|
249 |
+
the stored token.
|
250 |
+
repo_type (`str`, *optional*, defaults to "model"):
|
251 |
+
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this
|
252 |
+
function is called by a child class, it will default to the child class's `repo_type`.
|
253 |
+
commit_message (`str`, *optional*):
|
254 |
+
The summary / title / first line of the generated commit.
|
255 |
+
commit_description (`str`, *optional*)
|
256 |
+
The description of the generated commit.
|
257 |
+
revision (`str`, *optional*):
|
258 |
+
The git revision to commit from. Defaults to the head of the `"main"` branch.
|
259 |
+
create_pr (`bool`, *optional*):
|
260 |
+
Whether or not to create a Pull Request with this commit. Defaults to `False`.
|
261 |
+
parent_commit (`str`, *optional*):
|
262 |
+
The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
|
263 |
+
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
|
264 |
+
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
|
265 |
+
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
|
266 |
+
especially useful if the repo is updated / committed to concurrently.
|
267 |
+
Returns:
|
268 |
+
`str`: URL of the commit which updated the card metadata.
|
269 |
+
"""
|
270 |
+
|
271 |
+
# If repo type is provided, otherwise, use the repo type of the card.
|
272 |
+
repo_type = repo_type or self.repo_type
|
273 |
+
|
274 |
+
# Validate card before pushing to hub
|
275 |
+
self.validate(repo_type=repo_type)
|
276 |
+
|
277 |
+
with SoftTemporaryDirectory() as tmpdir:
|
278 |
+
tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
|
279 |
+
tmp_path.write_text(str(self))
|
280 |
+
url = upload_file(
|
281 |
+
path_or_fileobj=str(tmp_path),
|
282 |
+
path_in_repo=constants.REPOCARD_NAME,
|
283 |
+
repo_id=repo_id,
|
284 |
+
token=token,
|
285 |
+
repo_type=repo_type,
|
286 |
+
commit_message=commit_message,
|
287 |
+
commit_description=commit_description,
|
288 |
+
create_pr=create_pr,
|
289 |
+
revision=revision,
|
290 |
+
parent_commit=parent_commit,
|
291 |
+
)
|
292 |
+
return url
|
293 |
+
|
294 |
+
@classmethod
|
295 |
+
def from_template(
|
296 |
+
cls,
|
297 |
+
card_data: CardData,
|
298 |
+
template_path: Optional[str] = None,
|
299 |
+
template_str: Optional[str] = None,
|
300 |
+
**template_kwargs,
|
301 |
+
):
|
302 |
+
"""Initialize a RepoCard from a template. By default, it uses the default template.
|
303 |
+
|
304 |
+
Templates are Jinja2 templates that can be customized by passing keyword arguments.
|
305 |
+
|
306 |
+
Args:
|
307 |
+
card_data (`huggingface_hub.CardData`):
|
308 |
+
A huggingface_hub.CardData instance containing the metadata you want to include in the YAML
|
309 |
+
header of the repo card on the Hugging Face Hub.
|
310 |
+
template_path (`str`, *optional*):
|
311 |
+
A path to a markdown file with optional Jinja template variables that can be filled
|
312 |
+
in with `template_kwargs`. Defaults to the default template.
|
313 |
+
|
314 |
+
Returns:
|
315 |
+
[`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the
|
316 |
+
template.
|
317 |
+
"""
|
318 |
+
if is_jinja_available():
|
319 |
+
import jinja2
|
320 |
+
else:
|
321 |
+
raise ImportError(
|
322 |
+
"Using RepoCard.from_template requires Jinja2 to be installed. Please"
|
323 |
+
" install it with `pip install Jinja2`."
|
324 |
+
)
|
325 |
+
|
326 |
+
kwargs = card_data.to_dict().copy()
|
327 |
+
kwargs.update(template_kwargs) # Template_kwargs have priority
|
328 |
+
|
329 |
+
if template_path is not None:
|
330 |
+
template_str = Path(template_path).read_text()
|
331 |
+
if template_str is None:
|
332 |
+
template_str = Path(cls.default_template_path).read_text()
|
333 |
+
template = jinja2.Template(template_str)
|
334 |
+
content = template.render(card_data=card_data.to_yaml(), **kwargs)
|
335 |
+
return cls(content)
|
336 |
+
|
337 |
+
|
338 |
+
class ModelCard(RepoCard):
|
339 |
+
card_data_class = ModelCardData
|
340 |
+
default_template_path = TEMPLATE_MODELCARD_PATH
|
341 |
+
repo_type = "model"
|
342 |
+
|
343 |
+
@classmethod
|
344 |
+
def from_template( # type: ignore # violates Liskov property but easier to use
|
345 |
+
cls,
|
346 |
+
card_data: ModelCardData,
|
347 |
+
template_path: Optional[str] = None,
|
348 |
+
template_str: Optional[str] = None,
|
349 |
+
**template_kwargs,
|
350 |
+
):
|
351 |
+
"""Initialize a ModelCard from a template. By default, it uses the default template, which can be found here:
|
352 |
+
https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md
|
353 |
+
|
354 |
+
Templates are Jinja2 templates that can be customized by passing keyword arguments.
|
355 |
+
|
356 |
+
Args:
|
357 |
+
card_data (`huggingface_hub.ModelCardData`):
|
358 |
+
A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML
|
359 |
+
header of the model card on the Hugging Face Hub.
|
360 |
+
template_path (`str`, *optional*):
|
361 |
+
A path to a markdown file with optional Jinja template variables that can be filled
|
362 |
+
in with `template_kwargs`. Defaults to the default template.
|
363 |
+
|
364 |
+
Returns:
|
365 |
+
[`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the
|
366 |
+
template.
|
367 |
+
|
368 |
+
Example:
|
369 |
+
```python
|
370 |
+
>>> from huggingface_hub import ModelCard, ModelCardData, EvalResult
|
371 |
+
|
372 |
+
>>> # Using the Default Template
|
373 |
+
>>> card_data = ModelCardData(
|
374 |
+
... language='en',
|
375 |
+
... license='mit',
|
376 |
+
... library_name='timm',
|
377 |
+
... tags=['image-classification', 'resnet'],
|
378 |
+
... datasets=['beans'],
|
379 |
+
... metrics=['accuracy'],
|
380 |
+
... )
|
381 |
+
>>> card = ModelCard.from_template(
|
382 |
+
... card_data,
|
383 |
+
... model_description='This model does x + y...'
|
384 |
+
... )
|
385 |
+
|
386 |
+
>>> # Including Evaluation Results
|
387 |
+
>>> card_data = ModelCardData(
|
388 |
+
... language='en',
|
389 |
+
... tags=['image-classification', 'resnet'],
|
390 |
+
... eval_results=[
|
391 |
+
... EvalResult(
|
392 |
+
... task_type='image-classification',
|
393 |
+
... dataset_type='beans',
|
394 |
+
... dataset_name='Beans',
|
395 |
+
... metric_type='accuracy',
|
396 |
+
... metric_value=0.9,
|
397 |
+
... ),
|
398 |
+
... ],
|
399 |
+
... model_name='my-cool-model',
|
400 |
+
... )
|
401 |
+
>>> card = ModelCard.from_template(card_data)
|
402 |
+
|
403 |
+
>>> # Using a Custom Template
|
404 |
+
>>> card_data = ModelCardData(
|
405 |
+
... language='en',
|
406 |
+
... tags=['image-classification', 'resnet']
|
407 |
+
... )
|
408 |
+
>>> card = ModelCard.from_template(
|
409 |
+
... card_data=card_data,
|
410 |
+
... template_path='./src/huggingface_hub/templates/modelcard_template.md',
|
411 |
+
... custom_template_var='custom value', # will be replaced in template if it exists
|
412 |
+
... )
|
413 |
+
|
414 |
+
```
|
415 |
+
"""
|
416 |
+
return super().from_template(card_data, template_path, template_str, **template_kwargs)
|
417 |
+
|
418 |
+
|
419 |
+
class DatasetCard(RepoCard):
|
420 |
+
card_data_class = DatasetCardData
|
421 |
+
default_template_path = TEMPLATE_DATASETCARD_PATH
|
422 |
+
repo_type = "dataset"
|
423 |
+
|
424 |
+
@classmethod
|
425 |
+
def from_template( # type: ignore # violates Liskov property but easier to use
|
426 |
+
cls,
|
427 |
+
card_data: DatasetCardData,
|
428 |
+
template_path: Optional[str] = None,
|
429 |
+
template_str: Optional[str] = None,
|
430 |
+
**template_kwargs,
|
431 |
+
):
|
432 |
+
"""Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here:
|
433 |
+
https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md
|
434 |
+
|
435 |
+
Templates are Jinja2 templates that can be customized by passing keyword arguments.
|
436 |
+
|
437 |
+
Args:
|
438 |
+
card_data (`huggingface_hub.DatasetCardData`):
|
439 |
+
A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML
|
440 |
+
header of the dataset card on the Hugging Face Hub.
|
441 |
+
template_path (`str`, *optional*):
|
442 |
+
A path to a markdown file with optional Jinja template variables that can be filled
|
443 |
+
in with `template_kwargs`. Defaults to the default template.
|
444 |
+
|
445 |
+
Returns:
|
446 |
+
[`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the
|
447 |
+
template.
|
448 |
+
|
449 |
+
Example:
|
450 |
+
```python
|
451 |
+
>>> from huggingface_hub import DatasetCard, DatasetCardData
|
452 |
+
|
453 |
+
>>> # Using the Default Template
|
454 |
+
>>> card_data = DatasetCardData(
|
455 |
+
... language='en',
|
456 |
+
... license='mit',
|
457 |
+
... annotations_creators='crowdsourced',
|
458 |
+
... task_categories=['text-classification'],
|
459 |
+
... task_ids=['sentiment-classification', 'text-scoring'],
|
460 |
+
... multilinguality='monolingual',
|
461 |
+
... pretty_name='My Text Classification Dataset',
|
462 |
+
... )
|
463 |
+
>>> card = DatasetCard.from_template(
|
464 |
+
... card_data,
|
465 |
+
... pretty_name=card_data.pretty_name,
|
466 |
+
... )
|
467 |
+
|
468 |
+
>>> # Using a Custom Template
|
469 |
+
>>> card_data = DatasetCardData(
|
470 |
+
... language='en',
|
471 |
+
... license='mit',
|
472 |
+
... )
|
473 |
+
>>> card = DatasetCard.from_template(
|
474 |
+
... card_data=card_data,
|
475 |
+
... template_path='./src/huggingface_hub/templates/datasetcard_template.md',
|
476 |
+
... custom_template_var='custom value', # will be replaced in template if it exists
|
477 |
+
... )
|
478 |
+
|
479 |
+
```
|
480 |
+
"""
|
481 |
+
return super().from_template(card_data, template_path, template_str, **template_kwargs)
|
482 |
+
|
483 |
+
|
484 |
+
class SpaceCard(RepoCard):
|
485 |
+
card_data_class = SpaceCardData
|
486 |
+
default_template_path = TEMPLATE_MODELCARD_PATH
|
487 |
+
repo_type = "space"
|
488 |
+
|
489 |
+
|
490 |
+
def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # noqa: F722
|
491 |
+
"""Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines.
|
492 |
+
|
493 |
+
Uses same implementation as in Hub server, keep it in sync.
|
494 |
+
|
495 |
+
Returns:
|
496 |
+
str: The detected line ending of the string.
|
497 |
+
"""
|
498 |
+
cr = content.count("\r")
|
499 |
+
lf = content.count("\n")
|
500 |
+
crlf = content.count("\r\n")
|
501 |
+
if cr + lf == 0:
|
502 |
+
return None
|
503 |
+
if crlf == cr and crlf == lf:
|
504 |
+
return "\r\n"
|
505 |
+
if cr > lf:
|
506 |
+
return "\r"
|
507 |
+
else:
|
508 |
+
return "\n"
|
509 |
+
|
510 |
+
|
511 |
+
def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]:
|
512 |
+
content = Path(local_path).read_text()
|
513 |
+
match = REGEX_YAML_BLOCK.search(content)
|
514 |
+
if match:
|
515 |
+
yaml_block = match.group(2)
|
516 |
+
data = yaml.safe_load(yaml_block)
|
517 |
+
if data is None or isinstance(data, dict):
|
518 |
+
return data
|
519 |
+
raise ValueError("repo card metadata block should be a dict")
|
520 |
+
else:
|
521 |
+
return None
|
522 |
+
|
523 |
+
|
524 |
+
def metadata_save(local_path: Union[str, Path], data: Dict) -> None:
|
525 |
+
"""
|
526 |
+
Save the metadata dict in the upper YAML part Trying to preserve newlines as
|
527 |
+
in the existing file. Docs about open() with newline="" parameter:
|
528 |
+
https://docs.python.org/3/library/functions.html?highlight=open#open Does
|
529 |
+
not work with "^M" linebreaks, which are replaced by \n
|
530 |
+
"""
|
531 |
+
line_break = "\n"
|
532 |
+
content = ""
|
533 |
+
# try to detect existing newline character
|
534 |
+
if os.path.exists(local_path):
|
535 |
+
with open(local_path, "r", newline="", encoding="utf8") as readme:
|
536 |
+
content = readme.read()
|
537 |
+
if isinstance(readme.newlines, tuple):
|
538 |
+
line_break = readme.newlines[0]
|
539 |
+
elif isinstance(readme.newlines, str):
|
540 |
+
line_break = readme.newlines
|
541 |
+
|
542 |
+
# creates a new file if it not
|
543 |
+
with open(local_path, "w", newline="", encoding="utf8") as readme:
|
544 |
+
data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break)
|
545 |
+
# sort_keys: keep dict order
|
546 |
+
match = REGEX_YAML_BLOCK.search(content)
|
547 |
+
if match:
|
548 |
+
output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :]
|
549 |
+
else:
|
550 |
+
output = f"---{line_break}{data_yaml}---{line_break}{content}"
|
551 |
+
|
552 |
+
readme.write(output)
|
553 |
+
readme.close()
|
554 |
+
|
555 |
+
|
556 |
+
def metadata_eval_result(
|
557 |
+
*,
|
558 |
+
model_pretty_name: str,
|
559 |
+
task_pretty_name: str,
|
560 |
+
task_id: str,
|
561 |
+
metrics_pretty_name: str,
|
562 |
+
metrics_id: str,
|
563 |
+
metrics_value: Any,
|
564 |
+
dataset_pretty_name: str,
|
565 |
+
dataset_id: str,
|
566 |
+
metrics_config: Optional[str] = None,
|
567 |
+
metrics_verified: bool = False,
|
568 |
+
dataset_config: Optional[str] = None,
|
569 |
+
dataset_split: Optional[str] = None,
|
570 |
+
dataset_revision: Optional[str] = None,
|
571 |
+
metrics_verification_token: Optional[str] = None,
|
572 |
+
) -> Dict:
|
573 |
+
"""
|
574 |
+
Creates a metadata dict with the result from a model evaluated on a dataset.
|
575 |
+
|
576 |
+
Args:
|
577 |
+
model_pretty_name (`str`):
|
578 |
+
The name of the model in natural language.
|
579 |
+
task_pretty_name (`str`):
|
580 |
+
The name of a task in natural language.
|
581 |
+
task_id (`str`):
|
582 |
+
Example: automatic-speech-recognition. A task id.
|
583 |
+
metrics_pretty_name (`str`):
|
584 |
+
A name for the metric in natural language. Example: Test WER.
|
585 |
+
metrics_id (`str`):
|
586 |
+
Example: wer. A metric id from https://hf.co/metrics.
|
587 |
+
metrics_value (`Any`):
|
588 |
+
The value from the metric. Example: 20.0 or "20.0 ± 1.2".
|
589 |
+
dataset_pretty_name (`str`):
|
590 |
+
The name of the dataset in natural language.
|
591 |
+
dataset_id (`str`):
|
592 |
+
Example: common_voice. A dataset id from https://hf.co/datasets.
|
593 |
+
metrics_config (`str`, *optional*):
|
594 |
+
The name of the metric configuration used in `load_metric()`.
|
595 |
+
Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
|
596 |
+
metrics_verified (`bool`, *optional*, defaults to `False`):
|
597 |
+
Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
598 |
+
dataset_config (`str`, *optional*):
|
599 |
+
Example: fr. The name of the dataset configuration used in `load_dataset()`.
|
600 |
+
dataset_split (`str`, *optional*):
|
601 |
+
Example: test. The name of the dataset split used in `load_dataset()`.
|
602 |
+
dataset_revision (`str`, *optional*):
|
603 |
+
Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision
|
604 |
+
used in `load_dataset()`.
|
605 |
+
metrics_verification_token (`bool`, *optional*):
|
606 |
+
A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
|
607 |
+
|
608 |
+
Returns:
|
609 |
+
`dict`: a metadata dict with the result from a model evaluated on a dataset.
|
610 |
+
|
611 |
+
Example:
|
612 |
+
```python
|
613 |
+
>>> from huggingface_hub import metadata_eval_result
|
614 |
+
>>> results = metadata_eval_result(
|
615 |
+
... model_pretty_name="RoBERTa fine-tuned on ReactionGIF",
|
616 |
+
... task_pretty_name="Text Classification",
|
617 |
+
... task_id="text-classification",
|
618 |
+
... metrics_pretty_name="Accuracy",
|
619 |
+
... metrics_id="accuracy",
|
620 |
+
... metrics_value=0.2662102282047272,
|
621 |
+
... dataset_pretty_name="ReactionJPEG",
|
622 |
+
... dataset_id="julien-c/reactionjpeg",
|
623 |
+
... dataset_config="default",
|
624 |
+
... dataset_split="test",
|
625 |
+
... )
|
626 |
+
>>> results == {
|
627 |
+
... 'model-index': [
|
628 |
+
... {
|
629 |
+
... 'name': 'RoBERTa fine-tuned on ReactionGIF',
|
630 |
+
... 'results': [
|
631 |
+
... {
|
632 |
+
... 'task': {
|
633 |
+
... 'type': 'text-classification',
|
634 |
+
... 'name': 'Text Classification'
|
635 |
+
... },
|
636 |
+
... 'dataset': {
|
637 |
+
... 'name': 'ReactionJPEG',
|
638 |
+
... 'type': 'julien-c/reactionjpeg',
|
639 |
+
... 'config': 'default',
|
640 |
+
... 'split': 'test'
|
641 |
+
... },
|
642 |
+
... 'metrics': [
|
643 |
+
... {
|
644 |
+
... 'type': 'accuracy',
|
645 |
+
... 'value': 0.2662102282047272,
|
646 |
+
... 'name': 'Accuracy',
|
647 |
+
... 'verified': False
|
648 |
+
... }
|
649 |
+
... ]
|
650 |
+
... }
|
651 |
+
... ]
|
652 |
+
... }
|
653 |
+
... ]
|
654 |
+
... }
|
655 |
+
True
|
656 |
+
|
657 |
+
```
|
658 |
+
"""
|
659 |
+
|
660 |
+
return {
|
661 |
+
"model-index": eval_results_to_model_index(
|
662 |
+
model_name=model_pretty_name,
|
663 |
+
eval_results=[
|
664 |
+
EvalResult(
|
665 |
+
task_name=task_pretty_name,
|
666 |
+
task_type=task_id,
|
667 |
+
metric_name=metrics_pretty_name,
|
668 |
+
metric_type=metrics_id,
|
669 |
+
metric_value=metrics_value,
|
670 |
+
dataset_name=dataset_pretty_name,
|
671 |
+
dataset_type=dataset_id,
|
672 |
+
metric_config=metrics_config,
|
673 |
+
verified=metrics_verified,
|
674 |
+
verify_token=metrics_verification_token,
|
675 |
+
dataset_config=dataset_config,
|
676 |
+
dataset_split=dataset_split,
|
677 |
+
dataset_revision=dataset_revision,
|
678 |
+
)
|
679 |
+
],
|
680 |
+
)
|
681 |
+
}
|
682 |
+
|
683 |
+
|
684 |
+
@validate_hf_hub_args
|
685 |
+
def metadata_update(
|
686 |
+
repo_id: str,
|
687 |
+
metadata: Dict,
|
688 |
+
*,
|
689 |
+
repo_type: Optional[str] = None,
|
690 |
+
overwrite: bool = False,
|
691 |
+
token: Optional[str] = None,
|
692 |
+
commit_message: Optional[str] = None,
|
693 |
+
commit_description: Optional[str] = None,
|
694 |
+
revision: Optional[str] = None,
|
695 |
+
create_pr: bool = False,
|
696 |
+
parent_commit: Optional[str] = None,
|
697 |
+
) -> str:
|
698 |
+
"""
|
699 |
+
Updates the metadata in the README.md of a repository on the Hugging Face Hub.
|
700 |
+
If the README.md file doesn't exist yet, a new one is created with metadata and an
|
701 |
+
the default ModelCard or DatasetCard template. For `space` repo, an error is thrown
|
702 |
+
as a Space cannot exist without a `README.md` file.
|
703 |
+
|
704 |
+
Args:
|
705 |
+
repo_id (`str`):
|
706 |
+
The name of the repository.
|
707 |
+
metadata (`dict`):
|
708 |
+
A dictionary containing the metadata to be updated.
|
709 |
+
repo_type (`str`, *optional*):
|
710 |
+
Set to `"dataset"` or `"space"` if updating to a dataset or space,
|
711 |
+
`None` or `"model"` if updating to a model. Default is `None`.
|
712 |
+
overwrite (`bool`, *optional*, defaults to `False`):
|
713 |
+
If set to `True` an existing field can be overwritten, otherwise
|
714 |
+
attempting to overwrite an existing field will cause an error.
|
715 |
+
token (`str`, *optional*):
|
716 |
+
The Hugging Face authentication token.
|
717 |
+
commit_message (`str`, *optional*):
|
718 |
+
The summary / title / first line of the generated commit. Defaults to
|
719 |
+
`f"Update metadata with huggingface_hub"`
|
720 |
+
commit_description (`str` *optional*)
|
721 |
+
The description of the generated commit
|
722 |
+
revision (`str`, *optional*):
|
723 |
+
The git revision to commit from. Defaults to the head of the
|
724 |
+
`"main"` branch.
|
725 |
+
create_pr (`boolean`, *optional*):
|
726 |
+
Whether or not to create a Pull Request from `revision` with that commit.
|
727 |
+
Defaults to `False`.
|
728 |
+
parent_commit (`str`, *optional*):
|
729 |
+
The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
|
730 |
+
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
|
731 |
+
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
|
732 |
+
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
|
733 |
+
especially useful if the repo is updated / committed to concurrently.
|
734 |
+
Returns:
|
735 |
+
`str`: URL of the commit which updated the card metadata.
|
736 |
+
|
737 |
+
Example:
|
738 |
+
```python
|
739 |
+
>>> from huggingface_hub import metadata_update
|
740 |
+
>>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF',
|
741 |
+
... 'results': [{'dataset': {'name': 'ReactionGIF',
|
742 |
+
... 'type': 'julien-c/reactiongif'},
|
743 |
+
... 'metrics': [{'name': 'Recall',
|
744 |
+
... 'type': 'recall',
|
745 |
+
... 'value': 0.7762102282047272}],
|
746 |
+
... 'task': {'name': 'Text Classification',
|
747 |
+
... 'type': 'text-classification'}}]}]}
|
748 |
+
>>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata)
|
749 |
+
|
750 |
+
```
|
751 |
+
"""
|
752 |
+
commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
|
753 |
+
|
754 |
+
# Card class given repo_type
|
755 |
+
card_class: Type[RepoCard]
|
756 |
+
if repo_type is None or repo_type == "model":
|
757 |
+
card_class = ModelCard
|
758 |
+
elif repo_type == "dataset":
|
759 |
+
card_class = DatasetCard
|
760 |
+
elif repo_type == "space":
|
761 |
+
card_class = RepoCard
|
762 |
+
else:
|
763 |
+
raise ValueError(f"Unknown repo_type: {repo_type}")
|
764 |
+
|
765 |
+
# Either load repo_card from the Hub or create an empty one.
|
766 |
+
# NOTE: Will not create the repo if it doesn't exist.
|
767 |
+
try:
|
768 |
+
card = card_class.load(repo_id, token=token, repo_type=repo_type)
|
769 |
+
except EntryNotFoundError:
|
770 |
+
if repo_type == "space":
|
771 |
+
raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
|
772 |
+
|
773 |
+
# Initialize a ModelCard or DatasetCard from default template and no data.
|
774 |
+
card = card_class.from_template(CardData())
|
775 |
+
|
776 |
+
for key, value in metadata.items():
|
777 |
+
if key == "model-index":
|
778 |
+
# if the new metadata doesn't include a name, either use existing one or repo name
|
779 |
+
if "name" not in value[0]:
|
780 |
+
value[0]["name"] = getattr(card, "model_name", repo_id)
|
781 |
+
model_name, new_results = model_index_to_eval_results(value)
|
782 |
+
if card.data.eval_results is None:
|
783 |
+
card.data.eval_results = new_results
|
784 |
+
card.data.model_name = model_name
|
785 |
+
else:
|
786 |
+
existing_results = card.data.eval_results
|
787 |
+
|
788 |
+
# Iterate over new results
|
789 |
+
# Iterate over existing results
|
790 |
+
# If both results describe the same metric but value is different:
|
791 |
+
# If overwrite=True: overwrite the metric value
|
792 |
+
# Else: raise ValueError
|
793 |
+
# Else: append new result to existing ones.
|
794 |
+
for new_result in new_results:
|
795 |
+
result_found = False
|
796 |
+
for existing_result in existing_results:
|
797 |
+
if new_result.is_equal_except_value(existing_result):
|
798 |
+
if new_result != existing_result and not overwrite:
|
799 |
+
raise ValueError(
|
800 |
+
"You passed a new value for the existing metric"
|
801 |
+
f" 'name: {new_result.metric_name}, type: "
|
802 |
+
f"{new_result.metric_type}'. Set `overwrite=True`"
|
803 |
+
" to overwrite existing metrics."
|
804 |
+
)
|
805 |
+
result_found = True
|
806 |
+
existing_result.metric_value = new_result.metric_value
|
807 |
+
if existing_result.verified is True:
|
808 |
+
existing_result.verify_token = new_result.verify_token
|
809 |
+
if not result_found:
|
810 |
+
card.data.eval_results.append(new_result)
|
811 |
+
else:
|
812 |
+
# Any metadata that is not a result metric
|
813 |
+
if card.data.get(key) is not None and not overwrite and card.data.get(key) != value:
|
814 |
+
raise ValueError(
|
815 |
+
f"You passed a new value for the existing meta data field '{key}'."
|
816 |
+
" Set `overwrite=True` to overwrite existing metadata."
|
817 |
+
)
|
818 |
+
else:
|
819 |
+
card.data[key] = value
|
820 |
+
|
821 |
+
return card.push_to_hub(
|
822 |
+
repo_id,
|
823 |
+
token=token,
|
824 |
+
repo_type=repo_type,
|
825 |
+
commit_message=commit_message,
|
826 |
+
commit_description=commit_description,
|
827 |
+
create_pr=create_pr,
|
828 |
+
revision=revision,
|
829 |
+
parent_commit=parent_commit,
|
830 |
+
)
|
meow/lib/python3.13/site-packages/huggingface_hub/repocard_data.py
ADDED
@@ -0,0 +1,749 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
from collections import defaultdict
|
3 |
+
from dataclasses import dataclass
|
4 |
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
5 |
+
|
6 |
+
from huggingface_hub.utils import logging, yaml_dump
|
7 |
+
|
8 |
+
|
9 |
+
logger = logging.get_logger(__name__)
|
10 |
+
|
11 |
+
|
12 |
+
@dataclass
|
13 |
+
class EvalResult:
|
14 |
+
"""
|
15 |
+
Flattened representation of individual evaluation results found in model-index of Model Cards.
|
16 |
+
|
17 |
+
For more information on the model-index spec, see https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
task_type (`str`):
|
21 |
+
The task identifier. Example: "image-classification".
|
22 |
+
dataset_type (`str`):
|
23 |
+
The dataset identifier. Example: "common_voice". Use dataset id from https://hf.co/datasets.
|
24 |
+
dataset_name (`str`):
|
25 |
+
A pretty name for the dataset. Example: "Common Voice (French)".
|
26 |
+
metric_type (`str`):
|
27 |
+
The metric identifier. Example: "wer". Use metric id from https://hf.co/metrics.
|
28 |
+
metric_value (`Any`):
|
29 |
+
The metric value. Example: 0.9 or "20.0 ± 1.2".
|
30 |
+
task_name (`str`, *optional*):
|
31 |
+
A pretty name for the task. Example: "Speech Recognition".
|
32 |
+
dataset_config (`str`, *optional*):
|
33 |
+
The name of the dataset configuration used in `load_dataset()`.
|
34 |
+
Example: fr in `load_dataset("common_voice", "fr")`. See the `datasets` docs for more info:
|
35 |
+
https://hf.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name
|
36 |
+
dataset_split (`str`, *optional*):
|
37 |
+
The split used in `load_dataset()`. Example: "test".
|
38 |
+
dataset_revision (`str`, *optional*):
|
39 |
+
The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
|
40 |
+
Example: 5503434ddd753f426f4b38109466949a1217c2bb
|
41 |
+
dataset_args (`Dict[str, Any]`, *optional*):
|
42 |
+
The arguments passed during `Metric.compute()`. Example for `bleu`: `{"max_order": 4}`
|
43 |
+
metric_name (`str`, *optional*):
|
44 |
+
A pretty name for the metric. Example: "Test WER".
|
45 |
+
metric_config (`str`, *optional*):
|
46 |
+
The name of the metric configuration used in `load_metric()`.
|
47 |
+
Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
|
48 |
+
See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
|
49 |
+
metric_args (`Dict[str, Any]`, *optional*):
|
50 |
+
The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4
|
51 |
+
verified (`bool`, *optional*):
|
52 |
+
Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
53 |
+
verify_token (`str`, *optional*):
|
54 |
+
A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
|
55 |
+
source_name (`str`, *optional*):
|
56 |
+
The name of the source of the evaluation result. Example: "Open LLM Leaderboard".
|
57 |
+
source_url (`str`, *optional*):
|
58 |
+
The URL of the source of the evaluation result. Example: "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard".
|
59 |
+
"""
|
60 |
+
|
61 |
+
# Required
|
62 |
+
|
63 |
+
# The task identifier
|
64 |
+
# Example: automatic-speech-recognition
|
65 |
+
task_type: str
|
66 |
+
|
67 |
+
# The dataset identifier
|
68 |
+
# Example: common_voice. Use dataset id from https://hf.co/datasets
|
69 |
+
dataset_type: str
|
70 |
+
|
71 |
+
# A pretty name for the dataset.
|
72 |
+
# Example: Common Voice (French)
|
73 |
+
dataset_name: str
|
74 |
+
|
75 |
+
# The metric identifier
|
76 |
+
# Example: wer. Use metric id from https://hf.co/metrics
|
77 |
+
metric_type: str
|
78 |
+
|
79 |
+
# Value of the metric.
|
80 |
+
# Example: 20.0 or "20.0 ± 1.2"
|
81 |
+
metric_value: Any
|
82 |
+
|
83 |
+
# Optional
|
84 |
+
|
85 |
+
# A pretty name for the task.
|
86 |
+
# Example: Speech Recognition
|
87 |
+
task_name: Optional[str] = None
|
88 |
+
|
89 |
+
# The name of the dataset configuration used in `load_dataset()`.
|
90 |
+
# Example: fr in `load_dataset("common_voice", "fr")`.
|
91 |
+
# See the `datasets` docs for more info:
|
92 |
+
# https://huggingface.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name
|
93 |
+
dataset_config: Optional[str] = None
|
94 |
+
|
95 |
+
# The split used in `load_dataset()`.
|
96 |
+
# Example: test
|
97 |
+
dataset_split: Optional[str] = None
|
98 |
+
|
99 |
+
# The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
|
100 |
+
# Example: 5503434ddd753f426f4b38109466949a1217c2bb
|
101 |
+
dataset_revision: Optional[str] = None
|
102 |
+
|
103 |
+
# The arguments passed during `Metric.compute()`.
|
104 |
+
# Example for `bleu`: max_order: 4
|
105 |
+
dataset_args: Optional[Dict[str, Any]] = None
|
106 |
+
|
107 |
+
# A pretty name for the metric.
|
108 |
+
# Example: Test WER
|
109 |
+
metric_name: Optional[str] = None
|
110 |
+
|
111 |
+
# The name of the metric configuration used in `load_metric()`.
|
112 |
+
# Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
|
113 |
+
# See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
|
114 |
+
metric_config: Optional[str] = None
|
115 |
+
|
116 |
+
# The arguments passed during `Metric.compute()`.
|
117 |
+
# Example for `bleu`: max_order: 4
|
118 |
+
metric_args: Optional[Dict[str, Any]] = None
|
119 |
+
|
120 |
+
# Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
121 |
+
verified: Optional[bool] = None
|
122 |
+
|
123 |
+
# A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
|
124 |
+
verify_token: Optional[str] = None
|
125 |
+
|
126 |
+
# The name of the source of the evaluation result.
|
127 |
+
# Example: Open LLM Leaderboard
|
128 |
+
source_name: Optional[str] = None
|
129 |
+
|
130 |
+
# The URL of the source of the evaluation result.
|
131 |
+
# Example: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard
|
132 |
+
source_url: Optional[str] = None
|
133 |
+
|
134 |
+
@property
|
135 |
+
def unique_identifier(self) -> tuple:
|
136 |
+
"""Returns a tuple that uniquely identifies this evaluation."""
|
137 |
+
return (
|
138 |
+
self.task_type,
|
139 |
+
self.dataset_type,
|
140 |
+
self.dataset_config,
|
141 |
+
self.dataset_split,
|
142 |
+
self.dataset_revision,
|
143 |
+
)
|
144 |
+
|
145 |
+
def is_equal_except_value(self, other: "EvalResult") -> bool:
|
146 |
+
"""
|
147 |
+
Return True if `self` and `other` describe exactly the same metric but with a
|
148 |
+
different value.
|
149 |
+
"""
|
150 |
+
for key, _ in self.__dict__.items():
|
151 |
+
if key == "metric_value":
|
152 |
+
continue
|
153 |
+
# For metrics computed by Hugging Face's evaluation service, `verify_token` is derived from `metric_value`,
|
154 |
+
# so we exclude it here in the comparison.
|
155 |
+
if key != "verify_token" and getattr(self, key) != getattr(other, key):
|
156 |
+
return False
|
157 |
+
return True
|
158 |
+
|
159 |
+
def __post_init__(self) -> None:
|
160 |
+
if self.source_name is not None and self.source_url is None:
|
161 |
+
raise ValueError("If `source_name` is provided, `source_url` must also be provided.")
|
162 |
+
|
163 |
+
|
164 |
+
@dataclass
|
165 |
+
class CardData:
|
166 |
+
"""Structure containing metadata from a RepoCard.
|
167 |
+
|
168 |
+
[`CardData`] is the parent class of [`ModelCardData`] and [`DatasetCardData`].
|
169 |
+
|
170 |
+
Metadata can be exported as a dictionary or YAML. Export can be customized to alter the representation of the data
|
171 |
+
(example: flatten evaluation results). `CardData` behaves as a dictionary (can get, pop, set values) but do not
|
172 |
+
inherit from `dict` to allow this export step.
|
173 |
+
"""
|
174 |
+
|
175 |
+
def __init__(self, ignore_metadata_errors: bool = False, **kwargs):
|
176 |
+
self.__dict__.update(kwargs)
|
177 |
+
|
178 |
+
def to_dict(self):
|
179 |
+
"""Converts CardData to a dict.
|
180 |
+
|
181 |
+
Returns:
|
182 |
+
`dict`: CardData represented as a dictionary ready to be dumped to a YAML
|
183 |
+
block for inclusion in a README.md file.
|
184 |
+
"""
|
185 |
+
|
186 |
+
data_dict = copy.deepcopy(self.__dict__)
|
187 |
+
self._to_dict(data_dict)
|
188 |
+
return {key: value for key, value in data_dict.items() if value is not None}
|
189 |
+
|
190 |
+
def _to_dict(self, data_dict):
|
191 |
+
"""Use this method in child classes to alter the dict representation of the data. Alter the dict in-place.
|
192 |
+
|
193 |
+
Args:
|
194 |
+
data_dict (`dict`): The raw dict representation of the card data.
|
195 |
+
"""
|
196 |
+
pass
|
197 |
+
|
198 |
+
def to_yaml(self, line_break=None, original_order: Optional[List[str]] = None) -> str:
|
199 |
+
"""Dumps CardData to a YAML block for inclusion in a README.md file.
|
200 |
+
|
201 |
+
Args:
|
202 |
+
line_break (str, *optional*):
|
203 |
+
The line break to use when dumping to yaml.
|
204 |
+
|
205 |
+
Returns:
|
206 |
+
`str`: CardData represented as a YAML block.
|
207 |
+
"""
|
208 |
+
if original_order:
|
209 |
+
self.__dict__ = {
|
210 |
+
k: self.__dict__[k]
|
211 |
+
for k in original_order + list(set(self.__dict__.keys()) - set(original_order))
|
212 |
+
if k in self.__dict__
|
213 |
+
}
|
214 |
+
return yaml_dump(self.to_dict(), sort_keys=False, line_break=line_break).strip()
|
215 |
+
|
216 |
+
def __repr__(self):
|
217 |
+
return repr(self.__dict__)
|
218 |
+
|
219 |
+
def __str__(self):
|
220 |
+
return self.to_yaml()
|
221 |
+
|
222 |
+
def get(self, key: str, default: Any = None) -> Any:
|
223 |
+
"""Get value for a given metadata key."""
|
224 |
+
return self.__dict__.get(key, default)
|
225 |
+
|
226 |
+
def pop(self, key: str, default: Any = None) -> Any:
|
227 |
+
"""Pop value for a given metadata key."""
|
228 |
+
return self.__dict__.pop(key, default)
|
229 |
+
|
230 |
+
def __getitem__(self, key: str) -> Any:
|
231 |
+
"""Get value for a given metadata key."""
|
232 |
+
return self.__dict__[key]
|
233 |
+
|
234 |
+
def __setitem__(self, key: str, value: Any) -> None:
|
235 |
+
"""Set value for a given metadata key."""
|
236 |
+
self.__dict__[key] = value
|
237 |
+
|
238 |
+
def __contains__(self, key: str) -> bool:
|
239 |
+
"""Check if a given metadata key is set."""
|
240 |
+
return key in self.__dict__
|
241 |
+
|
242 |
+
def __len__(self) -> int:
|
243 |
+
"""Return the number of metadata keys set."""
|
244 |
+
return len(self.__dict__)
|
245 |
+
|
246 |
+
|
247 |
+
class ModelCardData(CardData):
|
248 |
+
"""Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
249 |
+
|
250 |
+
Args:
|
251 |
+
base_model (`str` or `List[str]`, *optional*):
|
252 |
+
The identifier of the base model from which the model derives. This is applicable for example if your model is a
|
253 |
+
fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs
|
254 |
+
if your model derives from multiple models). Defaults to None.
|
255 |
+
datasets (`Union[str, List[str]]`, *optional*):
|
256 |
+
Dataset or list of datasets that were used to train this model. Should be a dataset ID
|
257 |
+
found on https://hf.co/datasets. Defaults to None.
|
258 |
+
eval_results (`Union[List[EvalResult], EvalResult]`, *optional*):
|
259 |
+
List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided,
|
260 |
+
`model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`.
|
261 |
+
language (`Union[str, List[str]]`, *optional*):
|
262 |
+
Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or
|
263 |
+
639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`.
|
264 |
+
library_name (`str`, *optional*):
|
265 |
+
Name of library used by this model. Example: keras or any library from
|
266 |
+
https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries.ts.
|
267 |
+
Defaults to None.
|
268 |
+
license (`str`, *optional*):
|
269 |
+
License of this model. Example: apache-2.0 or any license from
|
270 |
+
https://huggingface.co/docs/hub/repositories-licenses. Defaults to None.
|
271 |
+
license_name (`str`, *optional*):
|
272 |
+
Name of the license of this model. Defaults to None. To be used in conjunction with `license_link`.
|
273 |
+
Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a name. In that case, use `license` instead.
|
274 |
+
license_link (`str`, *optional*):
|
275 |
+
Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`.
|
276 |
+
Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead.
|
277 |
+
metrics (`List[str]`, *optional*):
|
278 |
+
List of metrics used to evaluate this model. Should be a metric name that can be found
|
279 |
+
at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
|
280 |
+
model_name (`str`, *optional*):
|
281 |
+
A name for this model. It is used along with
|
282 |
+
`eval_results` to construct the `model-index` within the card's metadata. The name
|
283 |
+
you supply here is what will be used on PapersWithCode's leaderboards. If None is provided
|
284 |
+
then the repo name is used as a default. Defaults to None.
|
285 |
+
pipeline_tag (`str`, *optional*):
|
286 |
+
The pipeline tag associated with the model. Example: "text-classification".
|
287 |
+
tags (`List[str]`, *optional*):
|
288 |
+
List of tags to add to your model that can be used when filtering on the Hugging
|
289 |
+
Face Hub. Defaults to None.
|
290 |
+
ignore_metadata_errors (`str`):
|
291 |
+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
292 |
+
the process. Use it at your own risk.
|
293 |
+
kwargs (`dict`, *optional*):
|
294 |
+
Additional metadata that will be added to the model card. Defaults to None.
|
295 |
+
|
296 |
+
Example:
|
297 |
+
```python
|
298 |
+
>>> from huggingface_hub import ModelCardData
|
299 |
+
>>> card_data = ModelCardData(
|
300 |
+
... language="en",
|
301 |
+
... license="mit",
|
302 |
+
... library_name="timm",
|
303 |
+
... tags=['image-classification', 'resnet'],
|
304 |
+
... )
|
305 |
+
>>> card_data.to_dict()
|
306 |
+
{'language': 'en', 'license': 'mit', 'library_name': 'timm', 'tags': ['image-classification', 'resnet']}
|
307 |
+
|
308 |
+
```
|
309 |
+
"""
|
310 |
+
|
311 |
+
def __init__(
|
312 |
+
self,
|
313 |
+
*,
|
314 |
+
base_model: Optional[Union[str, List[str]]] = None,
|
315 |
+
datasets: Optional[Union[str, List[str]]] = None,
|
316 |
+
eval_results: Optional[List[EvalResult]] = None,
|
317 |
+
language: Optional[Union[str, List[str]]] = None,
|
318 |
+
library_name: Optional[str] = None,
|
319 |
+
license: Optional[str] = None,
|
320 |
+
license_name: Optional[str] = None,
|
321 |
+
license_link: Optional[str] = None,
|
322 |
+
metrics: Optional[List[str]] = None,
|
323 |
+
model_name: Optional[str] = None,
|
324 |
+
pipeline_tag: Optional[str] = None,
|
325 |
+
tags: Optional[List[str]] = None,
|
326 |
+
ignore_metadata_errors: bool = False,
|
327 |
+
**kwargs,
|
328 |
+
):
|
329 |
+
self.base_model = base_model
|
330 |
+
self.datasets = datasets
|
331 |
+
self.eval_results = eval_results
|
332 |
+
self.language = language
|
333 |
+
self.library_name = library_name
|
334 |
+
self.license = license
|
335 |
+
self.license_name = license_name
|
336 |
+
self.license_link = license_link
|
337 |
+
self.metrics = metrics
|
338 |
+
self.model_name = model_name
|
339 |
+
self.pipeline_tag = pipeline_tag
|
340 |
+
self.tags = _to_unique_list(tags)
|
341 |
+
|
342 |
+
model_index = kwargs.pop("model-index", None)
|
343 |
+
if model_index:
|
344 |
+
try:
|
345 |
+
model_name, eval_results = model_index_to_eval_results(model_index)
|
346 |
+
self.model_name = model_name
|
347 |
+
self.eval_results = eval_results
|
348 |
+
except (KeyError, TypeError) as error:
|
349 |
+
if ignore_metadata_errors:
|
350 |
+
logger.warning("Invalid model-index. Not loading eval results into CardData.")
|
351 |
+
else:
|
352 |
+
raise ValueError(
|
353 |
+
f"Invalid `model_index` in metadata cannot be parsed: {error.__class__} {error}. Pass"
|
354 |
+
" `ignore_metadata_errors=True` to ignore this error while loading a Model Card. Warning:"
|
355 |
+
" some information will be lost. Use it at your own risk."
|
356 |
+
)
|
357 |
+
|
358 |
+
super().__init__(**kwargs)
|
359 |
+
|
360 |
+
if self.eval_results:
|
361 |
+
if isinstance(self.eval_results, EvalResult):
|
362 |
+
self.eval_results = [self.eval_results]
|
363 |
+
if self.model_name is None:
|
364 |
+
raise ValueError("Passing `eval_results` requires `model_name` to be set.")
|
365 |
+
|
366 |
+
def _to_dict(self, data_dict):
|
367 |
+
"""Format the internal data dict. In this case, we convert eval results to a valid model index"""
|
368 |
+
if self.eval_results is not None:
|
369 |
+
data_dict["model-index"] = eval_results_to_model_index(self.model_name, self.eval_results)
|
370 |
+
del data_dict["eval_results"], data_dict["model_name"]
|
371 |
+
|
372 |
+
|
373 |
+
class DatasetCardData(CardData):
|
374 |
+
"""Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
375 |
+
|
376 |
+
Args:
|
377 |
+
language (`List[str]`, *optional*):
|
378 |
+
Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or
|
379 |
+
639-3 code (two/three letters), or a special value like "code", "multilingual".
|
380 |
+
license (`Union[str, List[str]]`, *optional*):
|
381 |
+
License(s) of this dataset. Example: apache-2.0 or any license from
|
382 |
+
https://huggingface.co/docs/hub/repositories-licenses.
|
383 |
+
annotations_creators (`Union[str, List[str]]`, *optional*):
|
384 |
+
How the annotations for the dataset were created.
|
385 |
+
Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'no-annotation', 'other'.
|
386 |
+
language_creators (`Union[str, List[str]]`, *optional*):
|
387 |
+
How the text-based data in the dataset was created.
|
388 |
+
Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'other'
|
389 |
+
multilinguality (`Union[str, List[str]]`, *optional*):
|
390 |
+
Whether the dataset is multilingual.
|
391 |
+
Options are: 'monolingual', 'multilingual', 'translation', 'other'.
|
392 |
+
size_categories (`Union[str, List[str]]`, *optional*):
|
393 |
+
The number of examples in the dataset. Options are: 'n<1K', '1K<n<10K', '10K<n<100K',
|
394 |
+
'100K<n<1M', '1M<n<10M', '10M<n<100M', '100M<n<1B', '1B<n<10B', '10B<n<100B', '100B<n<1T', 'n>1T', and 'other'.
|
395 |
+
source_datasets (`List[str]]`, *optional*):
|
396 |
+
Indicates whether the dataset is an original dataset or extended from another existing dataset.
|
397 |
+
Options are: 'original' and 'extended'.
|
398 |
+
task_categories (`Union[str, List[str]]`, *optional*):
|
399 |
+
What categories of task does the dataset support?
|
400 |
+
task_ids (`Union[str, List[str]]`, *optional*):
|
401 |
+
What specific tasks does the dataset support?
|
402 |
+
paperswithcode_id (`str`, *optional*):
|
403 |
+
ID of the dataset on PapersWithCode.
|
404 |
+
pretty_name (`str`, *optional*):
|
405 |
+
A more human-readable name for the dataset. (ex. "Cats vs. Dogs")
|
406 |
+
train_eval_index (`Dict`, *optional*):
|
407 |
+
A dictionary that describes the necessary spec for doing evaluation on the Hub.
|
408 |
+
If not provided, it will be gathered from the 'train-eval-index' key of the kwargs.
|
409 |
+
config_names (`Union[str, List[str]]`, *optional*):
|
410 |
+
A list of the available dataset configs for the dataset.
|
411 |
+
"""
|
412 |
+
|
413 |
+
def __init__(
|
414 |
+
self,
|
415 |
+
*,
|
416 |
+
language: Optional[Union[str, List[str]]] = None,
|
417 |
+
license: Optional[Union[str, List[str]]] = None,
|
418 |
+
annotations_creators: Optional[Union[str, List[str]]] = None,
|
419 |
+
language_creators: Optional[Union[str, List[str]]] = None,
|
420 |
+
multilinguality: Optional[Union[str, List[str]]] = None,
|
421 |
+
size_categories: Optional[Union[str, List[str]]] = None,
|
422 |
+
source_datasets: Optional[List[str]] = None,
|
423 |
+
task_categories: Optional[Union[str, List[str]]] = None,
|
424 |
+
task_ids: Optional[Union[str, List[str]]] = None,
|
425 |
+
paperswithcode_id: Optional[str] = None,
|
426 |
+
pretty_name: Optional[str] = None,
|
427 |
+
train_eval_index: Optional[Dict] = None,
|
428 |
+
config_names: Optional[Union[str, List[str]]] = None,
|
429 |
+
ignore_metadata_errors: bool = False,
|
430 |
+
**kwargs,
|
431 |
+
):
|
432 |
+
self.annotations_creators = annotations_creators
|
433 |
+
self.language_creators = language_creators
|
434 |
+
self.language = language
|
435 |
+
self.license = license
|
436 |
+
self.multilinguality = multilinguality
|
437 |
+
self.size_categories = size_categories
|
438 |
+
self.source_datasets = source_datasets
|
439 |
+
self.task_categories = task_categories
|
440 |
+
self.task_ids = task_ids
|
441 |
+
self.paperswithcode_id = paperswithcode_id
|
442 |
+
self.pretty_name = pretty_name
|
443 |
+
self.config_names = config_names
|
444 |
+
|
445 |
+
# TODO - maybe handle this similarly to EvalResult?
|
446 |
+
self.train_eval_index = train_eval_index or kwargs.pop("train-eval-index", None)
|
447 |
+
super().__init__(**kwargs)
|
448 |
+
|
449 |
+
def _to_dict(self, data_dict):
|
450 |
+
data_dict["train-eval-index"] = data_dict.pop("train_eval_index")
|
451 |
+
|
452 |
+
|
453 |
+
class SpaceCardData(CardData):
|
454 |
+
"""Space Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
455 |
+
|
456 |
+
To get an exhaustive reference of Spaces configuration, please visit https://huggingface.co/docs/hub/spaces-config-reference#spaces-configuration-reference.
|
457 |
+
|
458 |
+
Args:
|
459 |
+
title (`str`, *optional*)
|
460 |
+
Title of the Space.
|
461 |
+
sdk (`str`, *optional*)
|
462 |
+
SDK of the Space (one of `gradio`, `streamlit`, `docker`, or `static`).
|
463 |
+
sdk_version (`str`, *optional*)
|
464 |
+
Version of the used SDK (if Gradio/Streamlit sdk).
|
465 |
+
python_version (`str`, *optional*)
|
466 |
+
Python version used in the Space (if Gradio/Streamlit sdk).
|
467 |
+
app_file (`str`, *optional*)
|
468 |
+
Path to your main application file (which contains either gradio or streamlit Python code, or static html code).
|
469 |
+
Path is relative to the root of the repository.
|
470 |
+
app_port (`str`, *optional*)
|
471 |
+
Port on which your application is running. Used only if sdk is `docker`.
|
472 |
+
license (`str`, *optional*)
|
473 |
+
License of this model. Example: apache-2.0 or any license from
|
474 |
+
https://huggingface.co/docs/hub/repositories-licenses.
|
475 |
+
duplicated_from (`str`, *optional*)
|
476 |
+
ID of the original Space if this is a duplicated Space.
|
477 |
+
models (List[`str`], *optional*)
|
478 |
+
List of models related to this Space. Should be a dataset ID found on https://hf.co/models.
|
479 |
+
datasets (`List[str]`, *optional*)
|
480 |
+
List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets.
|
481 |
+
tags (`List[str]`, *optional*)
|
482 |
+
List of tags to add to your Space that can be used when filtering on the Hub.
|
483 |
+
ignore_metadata_errors (`str`):
|
484 |
+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
485 |
+
the process. Use it at your own risk.
|
486 |
+
kwargs (`dict`, *optional*):
|
487 |
+
Additional metadata that will be added to the space card.
|
488 |
+
|
489 |
+
Example:
|
490 |
+
```python
|
491 |
+
>>> from huggingface_hub import SpaceCardData
|
492 |
+
>>> card_data = SpaceCardData(
|
493 |
+
... title="Dreambooth Training",
|
494 |
+
... license="mit",
|
495 |
+
... sdk="gradio",
|
496 |
+
... duplicated_from="multimodalart/dreambooth-training"
|
497 |
+
... )
|
498 |
+
>>> card_data.to_dict()
|
499 |
+
{'title': 'Dreambooth Training', 'sdk': 'gradio', 'license': 'mit', 'duplicated_from': 'multimodalart/dreambooth-training'}
|
500 |
+
```
|
501 |
+
"""
|
502 |
+
|
503 |
+
def __init__(
|
504 |
+
self,
|
505 |
+
*,
|
506 |
+
title: Optional[str] = None,
|
507 |
+
sdk: Optional[str] = None,
|
508 |
+
sdk_version: Optional[str] = None,
|
509 |
+
python_version: Optional[str] = None,
|
510 |
+
app_file: Optional[str] = None,
|
511 |
+
app_port: Optional[int] = None,
|
512 |
+
license: Optional[str] = None,
|
513 |
+
duplicated_from: Optional[str] = None,
|
514 |
+
models: Optional[List[str]] = None,
|
515 |
+
datasets: Optional[List[str]] = None,
|
516 |
+
tags: Optional[List[str]] = None,
|
517 |
+
ignore_metadata_errors: bool = False,
|
518 |
+
**kwargs,
|
519 |
+
):
|
520 |
+
self.title = title
|
521 |
+
self.sdk = sdk
|
522 |
+
self.sdk_version = sdk_version
|
523 |
+
self.python_version = python_version
|
524 |
+
self.app_file = app_file
|
525 |
+
self.app_port = app_port
|
526 |
+
self.license = license
|
527 |
+
self.duplicated_from = duplicated_from
|
528 |
+
self.models = models
|
529 |
+
self.datasets = datasets
|
530 |
+
self.tags = _to_unique_list(tags)
|
531 |
+
super().__init__(**kwargs)
|
532 |
+
|
533 |
+
|
534 |
+
def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str, List[EvalResult]]:
|
535 |
+
"""Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects.
|
536 |
+
|
537 |
+
A detailed spec of the model index can be found here:
|
538 |
+
https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
539 |
+
|
540 |
+
Args:
|
541 |
+
model_index (`List[Dict[str, Any]]`):
|
542 |
+
A model index data structure, likely coming from a README.md file on the
|
543 |
+
Hugging Face Hub.
|
544 |
+
|
545 |
+
Returns:
|
546 |
+
model_name (`str`):
|
547 |
+
The name of the model as found in the model index. This is used as the
|
548 |
+
identifier for the model on leaderboards like PapersWithCode.
|
549 |
+
eval_results (`List[EvalResult]`):
|
550 |
+
A list of `huggingface_hub.EvalResult` objects containing the metrics
|
551 |
+
reported in the provided model_index.
|
552 |
+
|
553 |
+
Example:
|
554 |
+
```python
|
555 |
+
>>> from huggingface_hub.repocard_data import model_index_to_eval_results
|
556 |
+
>>> # Define a minimal model index
|
557 |
+
>>> model_index = [
|
558 |
+
... {
|
559 |
+
... "name": "my-cool-model",
|
560 |
+
... "results": [
|
561 |
+
... {
|
562 |
+
... "task": {
|
563 |
+
... "type": "image-classification"
|
564 |
+
... },
|
565 |
+
... "dataset": {
|
566 |
+
... "type": "beans",
|
567 |
+
... "name": "Beans"
|
568 |
+
... },
|
569 |
+
... "metrics": [
|
570 |
+
... {
|
571 |
+
... "type": "accuracy",
|
572 |
+
... "value": 0.9
|
573 |
+
... }
|
574 |
+
... ]
|
575 |
+
... }
|
576 |
+
... ]
|
577 |
+
... }
|
578 |
+
... ]
|
579 |
+
>>> model_name, eval_results = model_index_to_eval_results(model_index)
|
580 |
+
>>> model_name
|
581 |
+
'my-cool-model'
|
582 |
+
>>> eval_results[0].task_type
|
583 |
+
'image-classification'
|
584 |
+
>>> eval_results[0].metric_type
|
585 |
+
'accuracy'
|
586 |
+
|
587 |
+
```
|
588 |
+
"""
|
589 |
+
|
590 |
+
eval_results = []
|
591 |
+
for elem in model_index:
|
592 |
+
name = elem["name"]
|
593 |
+
results = elem["results"]
|
594 |
+
for result in results:
|
595 |
+
task_type = result["task"]["type"]
|
596 |
+
task_name = result["task"].get("name")
|
597 |
+
dataset_type = result["dataset"]["type"]
|
598 |
+
dataset_name = result["dataset"]["name"]
|
599 |
+
dataset_config = result["dataset"].get("config")
|
600 |
+
dataset_split = result["dataset"].get("split")
|
601 |
+
dataset_revision = result["dataset"].get("revision")
|
602 |
+
dataset_args = result["dataset"].get("args")
|
603 |
+
source_name = result.get("source", {}).get("name")
|
604 |
+
source_url = result.get("source", {}).get("url")
|
605 |
+
|
606 |
+
for metric in result["metrics"]:
|
607 |
+
metric_type = metric["type"]
|
608 |
+
metric_value = metric["value"]
|
609 |
+
metric_name = metric.get("name")
|
610 |
+
metric_args = metric.get("args")
|
611 |
+
metric_config = metric.get("config")
|
612 |
+
verified = metric.get("verified")
|
613 |
+
verify_token = metric.get("verifyToken")
|
614 |
+
|
615 |
+
eval_result = EvalResult(
|
616 |
+
task_type=task_type, # Required
|
617 |
+
dataset_type=dataset_type, # Required
|
618 |
+
dataset_name=dataset_name, # Required
|
619 |
+
metric_type=metric_type, # Required
|
620 |
+
metric_value=metric_value, # Required
|
621 |
+
task_name=task_name,
|
622 |
+
dataset_config=dataset_config,
|
623 |
+
dataset_split=dataset_split,
|
624 |
+
dataset_revision=dataset_revision,
|
625 |
+
dataset_args=dataset_args,
|
626 |
+
metric_name=metric_name,
|
627 |
+
metric_args=metric_args,
|
628 |
+
metric_config=metric_config,
|
629 |
+
verified=verified,
|
630 |
+
verify_token=verify_token,
|
631 |
+
source_name=source_name,
|
632 |
+
source_url=source_url,
|
633 |
+
)
|
634 |
+
eval_results.append(eval_result)
|
635 |
+
return name, eval_results
|
636 |
+
|
637 |
+
|
638 |
+
def _remove_none(obj):
|
639 |
+
"""
|
640 |
+
Recursively remove `None` values from a dict. Borrowed from: https://stackoverflow.com/a/20558778
|
641 |
+
"""
|
642 |
+
if isinstance(obj, (list, tuple, set)):
|
643 |
+
return type(obj)(_remove_none(x) for x in obj if x is not None)
|
644 |
+
elif isinstance(obj, dict):
|
645 |
+
return type(obj)((_remove_none(k), _remove_none(v)) for k, v in obj.items() if k is not None and v is not None)
|
646 |
+
else:
|
647 |
+
return obj
|
648 |
+
|
649 |
+
|
650 |
+
def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult]) -> List[Dict[str, Any]]:
|
651 |
+
"""Takes in given model name and list of `huggingface_hub.EvalResult` and returns a
|
652 |
+
valid model-index that will be compatible with the format expected by the
|
653 |
+
Hugging Face Hub.
|
654 |
+
|
655 |
+
Args:
|
656 |
+
model_name (`str`):
|
657 |
+
Name of the model (ex. "my-cool-model"). This is used as the identifier
|
658 |
+
for the model on leaderboards like PapersWithCode.
|
659 |
+
eval_results (`List[EvalResult]`):
|
660 |
+
List of `huggingface_hub.EvalResult` objects containing the metrics to be
|
661 |
+
reported in the model-index.
|
662 |
+
|
663 |
+
Returns:
|
664 |
+
model_index (`List[Dict[str, Any]]`): The eval_results converted to a model-index.
|
665 |
+
|
666 |
+
Example:
|
667 |
+
```python
|
668 |
+
>>> from huggingface_hub.repocard_data import eval_results_to_model_index, EvalResult
|
669 |
+
>>> # Define minimal eval_results
|
670 |
+
>>> eval_results = [
|
671 |
+
... EvalResult(
|
672 |
+
... task_type="image-classification", # Required
|
673 |
+
... dataset_type="beans", # Required
|
674 |
+
... dataset_name="Beans", # Required
|
675 |
+
... metric_type="accuracy", # Required
|
676 |
+
... metric_value=0.9, # Required
|
677 |
+
... )
|
678 |
+
... ]
|
679 |
+
>>> eval_results_to_model_index("my-cool-model", eval_results)
|
680 |
+
[{'name': 'my-cool-model', 'results': [{'task': {'type': 'image-classification'}, 'dataset': {'name': 'Beans', 'type': 'beans'}, 'metrics': [{'type': 'accuracy', 'value': 0.9}]}]}]
|
681 |
+
|
682 |
+
```
|
683 |
+
"""
|
684 |
+
|
685 |
+
# Metrics are reported on a unique task-and-dataset basis.
|
686 |
+
# Here, we make a map of those pairs and the associated EvalResults.
|
687 |
+
task_and_ds_types_map: Dict[Any, List[EvalResult]] = defaultdict(list)
|
688 |
+
for eval_result in eval_results:
|
689 |
+
task_and_ds_types_map[eval_result.unique_identifier].append(eval_result)
|
690 |
+
|
691 |
+
# Use the map from above to generate the model index data.
|
692 |
+
model_index_data = []
|
693 |
+
for results in task_and_ds_types_map.values():
|
694 |
+
# All items from `results` share same metadata
|
695 |
+
sample_result = results[0]
|
696 |
+
data = {
|
697 |
+
"task": {
|
698 |
+
"type": sample_result.task_type,
|
699 |
+
"name": sample_result.task_name,
|
700 |
+
},
|
701 |
+
"dataset": {
|
702 |
+
"name": sample_result.dataset_name,
|
703 |
+
"type": sample_result.dataset_type,
|
704 |
+
"config": sample_result.dataset_config,
|
705 |
+
"split": sample_result.dataset_split,
|
706 |
+
"revision": sample_result.dataset_revision,
|
707 |
+
"args": sample_result.dataset_args,
|
708 |
+
},
|
709 |
+
"metrics": [
|
710 |
+
{
|
711 |
+
"type": result.metric_type,
|
712 |
+
"value": result.metric_value,
|
713 |
+
"name": result.metric_name,
|
714 |
+
"config": result.metric_config,
|
715 |
+
"args": result.metric_args,
|
716 |
+
"verified": result.verified,
|
717 |
+
"verifyToken": result.verify_token,
|
718 |
+
}
|
719 |
+
for result in results
|
720 |
+
],
|
721 |
+
}
|
722 |
+
if sample_result.source_url is not None:
|
723 |
+
source = {
|
724 |
+
"url": sample_result.source_url,
|
725 |
+
}
|
726 |
+
if sample_result.source_name is not None:
|
727 |
+
source["name"] = sample_result.source_name
|
728 |
+
data["source"] = source
|
729 |
+
model_index_data.append(data)
|
730 |
+
|
731 |
+
# TODO - Check if there cases where this list is longer than one?
|
732 |
+
# Finally, the model index itself is list of dicts.
|
733 |
+
model_index = [
|
734 |
+
{
|
735 |
+
"name": model_name,
|
736 |
+
"results": model_index_data,
|
737 |
+
}
|
738 |
+
]
|
739 |
+
return _remove_none(model_index)
|
740 |
+
|
741 |
+
|
742 |
+
def _to_unique_list(tags: Optional[List[str]]) -> Optional[List[str]]:
|
743 |
+
if tags is None:
|
744 |
+
return tags
|
745 |
+
unique_tags = [] # make tags unique + keep order explicitly
|
746 |
+
for tag in tags:
|
747 |
+
if tag not in unique_tags:
|
748 |
+
unique_tags.append(tag)
|
749 |
+
return unique_tags
|
meow/lib/python3.13/site-packages/huggingface_hub/repository.py
ADDED
@@ -0,0 +1,1477 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import atexit
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import subprocess
|
5 |
+
import threading
|
6 |
+
import time
|
7 |
+
from contextlib import contextmanager
|
8 |
+
from pathlib import Path
|
9 |
+
from typing import Callable, Dict, Iterator, List, Optional, Tuple, TypedDict, Union
|
10 |
+
from urllib.parse import urlparse
|
11 |
+
|
12 |
+
from huggingface_hub import constants
|
13 |
+
from huggingface_hub.repocard import metadata_load, metadata_save
|
14 |
+
|
15 |
+
from .hf_api import HfApi, repo_type_and_id_from_hf_id
|
16 |
+
from .lfs import LFS_MULTIPART_UPLOAD_COMMAND
|
17 |
+
from .utils import (
|
18 |
+
SoftTemporaryDirectory,
|
19 |
+
get_token,
|
20 |
+
logging,
|
21 |
+
run_subprocess,
|
22 |
+
tqdm,
|
23 |
+
validate_hf_hub_args,
|
24 |
+
)
|
25 |
+
from .utils._deprecation import _deprecate_method
|
26 |
+
|
27 |
+
|
28 |
+
logger = logging.get_logger(__name__)
|
29 |
+
|
30 |
+
|
31 |
+
class CommandInProgress:
|
32 |
+
"""
|
33 |
+
Utility to follow commands launched asynchronously.
|
34 |
+
"""
|
35 |
+
|
36 |
+
def __init__(
|
37 |
+
self,
|
38 |
+
title: str,
|
39 |
+
is_done_method: Callable,
|
40 |
+
status_method: Callable,
|
41 |
+
process: subprocess.Popen,
|
42 |
+
post_method: Optional[Callable] = None,
|
43 |
+
):
|
44 |
+
self.title = title
|
45 |
+
self._is_done = is_done_method
|
46 |
+
self._status = status_method
|
47 |
+
self._process = process
|
48 |
+
self._stderr = ""
|
49 |
+
self._stdout = ""
|
50 |
+
self._post_method = post_method
|
51 |
+
|
52 |
+
@property
|
53 |
+
def is_done(self) -> bool:
|
54 |
+
"""
|
55 |
+
Whether the process is done.
|
56 |
+
"""
|
57 |
+
result = self._is_done()
|
58 |
+
|
59 |
+
if result and self._post_method is not None:
|
60 |
+
self._post_method()
|
61 |
+
self._post_method = None
|
62 |
+
|
63 |
+
return result
|
64 |
+
|
65 |
+
@property
|
66 |
+
def status(self) -> int:
|
67 |
+
"""
|
68 |
+
The exit code/status of the current action. Will return `0` if the
|
69 |
+
command has completed successfully, and a number between 1 and 255 if
|
70 |
+
the process errored-out.
|
71 |
+
|
72 |
+
Will return -1 if the command is still ongoing.
|
73 |
+
"""
|
74 |
+
return self._status()
|
75 |
+
|
76 |
+
@property
|
77 |
+
def failed(self) -> bool:
|
78 |
+
"""
|
79 |
+
Whether the process errored-out.
|
80 |
+
"""
|
81 |
+
return self.status > 0
|
82 |
+
|
83 |
+
@property
|
84 |
+
def stderr(self) -> str:
|
85 |
+
"""
|
86 |
+
The current output message on the standard error.
|
87 |
+
"""
|
88 |
+
if self._process.stderr is not None:
|
89 |
+
self._stderr += self._process.stderr.read()
|
90 |
+
return self._stderr
|
91 |
+
|
92 |
+
@property
|
93 |
+
def stdout(self) -> str:
|
94 |
+
"""
|
95 |
+
The current output message on the standard output.
|
96 |
+
"""
|
97 |
+
if self._process.stdout is not None:
|
98 |
+
self._stdout += self._process.stdout.read()
|
99 |
+
return self._stdout
|
100 |
+
|
101 |
+
def __repr__(self):
|
102 |
+
status = self.status
|
103 |
+
|
104 |
+
if status == -1:
|
105 |
+
status = "running"
|
106 |
+
|
107 |
+
return (
|
108 |
+
f"[{self.title} command, status code: {status},"
|
109 |
+
f" {'in progress.' if not self.is_done else 'finished.'} PID:"
|
110 |
+
f" {self._process.pid}]"
|
111 |
+
)
|
112 |
+
|
113 |
+
|
114 |
+
def is_git_repo(folder: Union[str, Path]) -> bool:
|
115 |
+
"""
|
116 |
+
Check if the folder is the root or part of a git repository
|
117 |
+
|
118 |
+
Args:
|
119 |
+
folder (`str`):
|
120 |
+
The folder in which to run the command.
|
121 |
+
|
122 |
+
Returns:
|
123 |
+
`bool`: `True` if the repository is part of a repository, `False`
|
124 |
+
otherwise.
|
125 |
+
"""
|
126 |
+
folder_exists = os.path.exists(os.path.join(folder, ".git"))
|
127 |
+
git_branch = subprocess.run("git branch".split(), cwd=folder, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
128 |
+
return folder_exists and git_branch.returncode == 0
|
129 |
+
|
130 |
+
|
131 |
+
def is_local_clone(folder: Union[str, Path], remote_url: str) -> bool:
|
132 |
+
"""
|
133 |
+
Check if the folder is a local clone of the remote_url
|
134 |
+
|
135 |
+
Args:
|
136 |
+
folder (`str` or `Path`):
|
137 |
+
The folder in which to run the command.
|
138 |
+
remote_url (`str`):
|
139 |
+
The url of a git repository.
|
140 |
+
|
141 |
+
Returns:
|
142 |
+
`bool`: `True` if the repository is a local clone of the remote
|
143 |
+
repository specified, `False` otherwise.
|
144 |
+
"""
|
145 |
+
if not is_git_repo(folder):
|
146 |
+
return False
|
147 |
+
|
148 |
+
remotes = run_subprocess("git remote -v", folder).stdout
|
149 |
+
|
150 |
+
# Remove token for the test with remotes.
|
151 |
+
remote_url = re.sub(r"https://.*@", "https://", remote_url)
|
152 |
+
remotes = [re.sub(r"https://.*@", "https://", remote) for remote in remotes.split()]
|
153 |
+
return remote_url in remotes
|
154 |
+
|
155 |
+
|
156 |
+
def is_tracked_with_lfs(filename: Union[str, Path]) -> bool:
|
157 |
+
"""
|
158 |
+
Check if the file passed is tracked with git-lfs.
|
159 |
+
|
160 |
+
Args:
|
161 |
+
filename (`str` or `Path`):
|
162 |
+
The filename to check.
|
163 |
+
|
164 |
+
Returns:
|
165 |
+
`bool`: `True` if the file passed is tracked with git-lfs, `False`
|
166 |
+
otherwise.
|
167 |
+
"""
|
168 |
+
folder = Path(filename).parent
|
169 |
+
filename = Path(filename).name
|
170 |
+
|
171 |
+
try:
|
172 |
+
p = run_subprocess("git check-attr -a".split() + [filename], folder)
|
173 |
+
attributes = p.stdout.strip()
|
174 |
+
except subprocess.CalledProcessError as exc:
|
175 |
+
if not is_git_repo(folder):
|
176 |
+
return False
|
177 |
+
else:
|
178 |
+
raise OSError(exc.stderr)
|
179 |
+
|
180 |
+
if len(attributes) == 0:
|
181 |
+
return False
|
182 |
+
|
183 |
+
found_lfs_tag = {"diff": False, "merge": False, "filter": False}
|
184 |
+
|
185 |
+
for attribute in attributes.split("\n"):
|
186 |
+
for tag in found_lfs_tag.keys():
|
187 |
+
if tag in attribute and "lfs" in attribute:
|
188 |
+
found_lfs_tag[tag] = True
|
189 |
+
|
190 |
+
return all(found_lfs_tag.values())
|
191 |
+
|
192 |
+
|
193 |
+
def is_git_ignored(filename: Union[str, Path]) -> bool:
|
194 |
+
"""
|
195 |
+
Check if file is git-ignored. Supports nested .gitignore files.
|
196 |
+
|
197 |
+
Args:
|
198 |
+
filename (`str` or `Path`):
|
199 |
+
The filename to check.
|
200 |
+
|
201 |
+
Returns:
|
202 |
+
`bool`: `True` if the file passed is ignored by `git`, `False`
|
203 |
+
otherwise.
|
204 |
+
"""
|
205 |
+
folder = Path(filename).parent
|
206 |
+
filename = Path(filename).name
|
207 |
+
|
208 |
+
try:
|
209 |
+
p = run_subprocess("git check-ignore".split() + [filename], folder, check=False)
|
210 |
+
# Will return exit code 1 if not gitignored
|
211 |
+
is_ignored = not bool(p.returncode)
|
212 |
+
except subprocess.CalledProcessError as exc:
|
213 |
+
raise OSError(exc.stderr)
|
214 |
+
|
215 |
+
return is_ignored
|
216 |
+
|
217 |
+
|
218 |
+
def is_binary_file(filename: Union[str, Path]) -> bool:
|
219 |
+
"""
|
220 |
+
Check if file is a binary file.
|
221 |
+
|
222 |
+
Args:
|
223 |
+
filename (`str` or `Path`):
|
224 |
+
The filename to check.
|
225 |
+
|
226 |
+
Returns:
|
227 |
+
`bool`: `True` if the file passed is a binary file, `False` otherwise.
|
228 |
+
"""
|
229 |
+
try:
|
230 |
+
with open(filename, "rb") as f:
|
231 |
+
content = f.read(10 * (1024**2)) # Read a maximum of 10MB
|
232 |
+
|
233 |
+
# Code sample taken from the following stack overflow thread
|
234 |
+
# https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python/7392391#7392391
|
235 |
+
text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
|
236 |
+
return bool(content.translate(None, text_chars))
|
237 |
+
except UnicodeDecodeError:
|
238 |
+
return True
|
239 |
+
|
240 |
+
|
241 |
+
def files_to_be_staged(pattern: str = ".", folder: Union[str, Path, None] = None) -> List[str]:
|
242 |
+
"""
|
243 |
+
Returns a list of filenames that are to be staged.
|
244 |
+
|
245 |
+
Args:
|
246 |
+
pattern (`str` or `Path`):
|
247 |
+
The pattern of filenames to check. Put `.` to get all files.
|
248 |
+
folder (`str` or `Path`):
|
249 |
+
The folder in which to run the command.
|
250 |
+
|
251 |
+
Returns:
|
252 |
+
`List[str]`: List of files that are to be staged.
|
253 |
+
"""
|
254 |
+
try:
|
255 |
+
p = run_subprocess("git ls-files --exclude-standard -mo".split() + [pattern], folder)
|
256 |
+
if len(p.stdout.strip()):
|
257 |
+
files = p.stdout.strip().split("\n")
|
258 |
+
else:
|
259 |
+
files = []
|
260 |
+
except subprocess.CalledProcessError as exc:
|
261 |
+
raise EnvironmentError(exc.stderr)
|
262 |
+
|
263 |
+
return files
|
264 |
+
|
265 |
+
|
266 |
+
def is_tracked_upstream(folder: Union[str, Path]) -> bool:
|
267 |
+
"""
|
268 |
+
Check if the current checked-out branch is tracked upstream.
|
269 |
+
|
270 |
+
Args:
|
271 |
+
folder (`str` or `Path`):
|
272 |
+
The folder in which to run the command.
|
273 |
+
|
274 |
+
Returns:
|
275 |
+
`bool`: `True` if the current checked-out branch is tracked upstream,
|
276 |
+
`False` otherwise.
|
277 |
+
"""
|
278 |
+
try:
|
279 |
+
run_subprocess("git rev-parse --symbolic-full-name --abbrev-ref @{u}", folder)
|
280 |
+
return True
|
281 |
+
except subprocess.CalledProcessError as exc:
|
282 |
+
if "HEAD" in exc.stderr:
|
283 |
+
raise OSError("No branch checked out")
|
284 |
+
|
285 |
+
return False
|
286 |
+
|
287 |
+
|
288 |
+
def commits_to_push(folder: Union[str, Path], upstream: Optional[str] = None) -> int:
|
289 |
+
"""
|
290 |
+
Check the number of commits that would be pushed upstream
|
291 |
+
|
292 |
+
Args:
|
293 |
+
folder (`str` or `Path`):
|
294 |
+
The folder in which to run the command.
|
295 |
+
upstream (`str`, *optional*):
|
296 |
+
The name of the upstream repository with which the comparison should be
|
297 |
+
made.
|
298 |
+
|
299 |
+
Returns:
|
300 |
+
`int`: Number of commits that would be pushed upstream were a `git
|
301 |
+
push` to proceed.
|
302 |
+
"""
|
303 |
+
try:
|
304 |
+
result = run_subprocess(f"git cherry -v {upstream or ''}", folder)
|
305 |
+
return len(result.stdout.split("\n")) - 1
|
306 |
+
except subprocess.CalledProcessError as exc:
|
307 |
+
raise EnvironmentError(exc.stderr)
|
308 |
+
|
309 |
+
|
310 |
+
class PbarT(TypedDict):
|
311 |
+
# Used to store an opened progress bar in `_lfs_log_progress`
|
312 |
+
bar: tqdm
|
313 |
+
past_bytes: int
|
314 |
+
|
315 |
+
|
316 |
+
@contextmanager
|
317 |
+
def _lfs_log_progress():
|
318 |
+
"""
|
319 |
+
This is a context manager that will log the Git LFS progress of cleaning,
|
320 |
+
smudging, pulling and pushing.
|
321 |
+
"""
|
322 |
+
|
323 |
+
if logger.getEffectiveLevel() >= logging.ERROR:
|
324 |
+
try:
|
325 |
+
yield
|
326 |
+
except Exception:
|
327 |
+
pass
|
328 |
+
return
|
329 |
+
|
330 |
+
def output_progress(stopping_event: threading.Event):
|
331 |
+
"""
|
332 |
+
To be launched as a separate thread with an event meaning it should stop
|
333 |
+
the tail.
|
334 |
+
"""
|
335 |
+
# Key is tuple(state, filename), value is a dict(tqdm bar and a previous value)
|
336 |
+
pbars: Dict[Tuple[str, str], PbarT] = {}
|
337 |
+
|
338 |
+
def close_pbars():
|
339 |
+
for pbar in pbars.values():
|
340 |
+
pbar["bar"].update(pbar["bar"].total - pbar["past_bytes"])
|
341 |
+
pbar["bar"].refresh()
|
342 |
+
pbar["bar"].close()
|
343 |
+
|
344 |
+
def tail_file(filename) -> Iterator[str]:
|
345 |
+
"""
|
346 |
+
Creates a generator to be iterated through, which will return each
|
347 |
+
line one by one. Will stop tailing the file if the stopping_event is
|
348 |
+
set.
|
349 |
+
"""
|
350 |
+
with open(filename, "r") as file:
|
351 |
+
current_line = ""
|
352 |
+
while True:
|
353 |
+
if stopping_event.is_set():
|
354 |
+
close_pbars()
|
355 |
+
break
|
356 |
+
|
357 |
+
line_bit = file.readline()
|
358 |
+
if line_bit is not None and not len(line_bit.strip()) == 0:
|
359 |
+
current_line += line_bit
|
360 |
+
if current_line.endswith("\n"):
|
361 |
+
yield current_line
|
362 |
+
current_line = ""
|
363 |
+
else:
|
364 |
+
time.sleep(1)
|
365 |
+
|
366 |
+
# If the file isn't created yet, wait for a few seconds before trying again.
|
367 |
+
# Can be interrupted with the stopping_event.
|
368 |
+
while not os.path.exists(os.environ["GIT_LFS_PROGRESS"]):
|
369 |
+
if stopping_event.is_set():
|
370 |
+
close_pbars()
|
371 |
+
return
|
372 |
+
|
373 |
+
time.sleep(2)
|
374 |
+
|
375 |
+
for line in tail_file(os.environ["GIT_LFS_PROGRESS"]):
|
376 |
+
try:
|
377 |
+
state, file_progress, byte_progress, filename = line.split()
|
378 |
+
except ValueError as error:
|
379 |
+
# Try/except to ease debugging. See https://github.com/huggingface/huggingface_hub/issues/1373.
|
380 |
+
raise ValueError(f"Cannot unpack LFS progress line:\n{line}") from error
|
381 |
+
description = f"{state.capitalize()} file {filename}"
|
382 |
+
|
383 |
+
current_bytes, total_bytes = byte_progress.split("/")
|
384 |
+
current_bytes_int = int(current_bytes)
|
385 |
+
total_bytes_int = int(total_bytes)
|
386 |
+
|
387 |
+
pbar = pbars.get((state, filename))
|
388 |
+
if pbar is None:
|
389 |
+
# Initialize progress bar
|
390 |
+
pbars[(state, filename)] = {
|
391 |
+
"bar": tqdm(
|
392 |
+
desc=description,
|
393 |
+
initial=current_bytes_int,
|
394 |
+
total=total_bytes_int,
|
395 |
+
unit="B",
|
396 |
+
unit_scale=True,
|
397 |
+
unit_divisor=1024,
|
398 |
+
name="huggingface_hub.lfs_upload",
|
399 |
+
),
|
400 |
+
"past_bytes": int(current_bytes),
|
401 |
+
}
|
402 |
+
else:
|
403 |
+
# Update progress bar
|
404 |
+
pbar["bar"].update(current_bytes_int - pbar["past_bytes"])
|
405 |
+
pbar["past_bytes"] = current_bytes_int
|
406 |
+
|
407 |
+
current_lfs_progress_value = os.environ.get("GIT_LFS_PROGRESS", "")
|
408 |
+
|
409 |
+
with SoftTemporaryDirectory() as tmpdir:
|
410 |
+
os.environ["GIT_LFS_PROGRESS"] = os.path.join(tmpdir, "lfs_progress")
|
411 |
+
logger.debug(f"Following progress in {os.environ['GIT_LFS_PROGRESS']}")
|
412 |
+
|
413 |
+
exit_event = threading.Event()
|
414 |
+
x = threading.Thread(target=output_progress, args=(exit_event,), daemon=True)
|
415 |
+
x.start()
|
416 |
+
|
417 |
+
try:
|
418 |
+
yield
|
419 |
+
finally:
|
420 |
+
exit_event.set()
|
421 |
+
x.join()
|
422 |
+
|
423 |
+
os.environ["GIT_LFS_PROGRESS"] = current_lfs_progress_value
|
424 |
+
|
425 |
+
|
426 |
+
class Repository:
|
427 |
+
"""
|
428 |
+
Helper class to wrap the git and git-lfs commands.
|
429 |
+
|
430 |
+
The aim is to facilitate interacting with huggingface.co hosted model or
|
431 |
+
dataset repos, though not a lot here (if any) is actually specific to
|
432 |
+
huggingface.co.
|
433 |
+
|
434 |
+
<Tip warning={true}>
|
435 |
+
|
436 |
+
[`Repository`] is deprecated in favor of the http-based alternatives implemented in
|
437 |
+
[`HfApi`]. Given its large adoption in legacy code, the complete removal of
|
438 |
+
[`Repository`] will only happen in release `v1.0`. For more details, please read
|
439 |
+
https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
|
440 |
+
|
441 |
+
</Tip>
|
442 |
+
"""
|
443 |
+
|
444 |
+
command_queue: List[CommandInProgress]
|
445 |
+
|
446 |
+
@validate_hf_hub_args
|
447 |
+
@_deprecate_method(
|
448 |
+
version="1.0",
|
449 |
+
message=(
|
450 |
+
"Please prefer the http-based alternatives instead. Given its large adoption in legacy code, the complete"
|
451 |
+
" removal is only planned on next major release.\nFor more details, please read"
|
452 |
+
" https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http."
|
453 |
+
),
|
454 |
+
)
|
455 |
+
def __init__(
|
456 |
+
self,
|
457 |
+
local_dir: Union[str, Path],
|
458 |
+
clone_from: Optional[str] = None,
|
459 |
+
repo_type: Optional[str] = None,
|
460 |
+
token: Union[bool, str] = True,
|
461 |
+
git_user: Optional[str] = None,
|
462 |
+
git_email: Optional[str] = None,
|
463 |
+
revision: Optional[str] = None,
|
464 |
+
skip_lfs_files: bool = False,
|
465 |
+
client: Optional[HfApi] = None,
|
466 |
+
):
|
467 |
+
"""
|
468 |
+
Instantiate a local clone of a git repo.
|
469 |
+
|
470 |
+
If `clone_from` is set, the repo will be cloned from an existing remote repository.
|
471 |
+
If the remote repo does not exist, a `EnvironmentError` exception will be thrown.
|
472 |
+
Please create the remote repo first using [`create_repo`].
|
473 |
+
|
474 |
+
`Repository` uses the local git credentials by default. If explicitly set, the `token`
|
475 |
+
or the `git_user`/`git_email` pair will be used instead.
|
476 |
+
|
477 |
+
Args:
|
478 |
+
local_dir (`str` or `Path`):
|
479 |
+
path (e.g. `'my_trained_model/'`) to the local directory, where
|
480 |
+
the `Repository` will be initialized.
|
481 |
+
clone_from (`str`, *optional*):
|
482 |
+
Either a repository url or `repo_id`.
|
483 |
+
Example:
|
484 |
+
- `"https://huggingface.co/philschmid/playground-tests"`
|
485 |
+
- `"philschmid/playground-tests"`
|
486 |
+
repo_type (`str`, *optional*):
|
487 |
+
To set when cloning a repo from a repo_id. Default is model.
|
488 |
+
token (`bool` or `str`, *optional*):
|
489 |
+
A valid authentication token (see https://huggingface.co/settings/token).
|
490 |
+
If `None` or `True` and machine is logged in (through `huggingface-cli login`
|
491 |
+
or [`~huggingface_hub.login`]), token will be retrieved from the cache.
|
492 |
+
If `False`, token is not sent in the request header.
|
493 |
+
git_user (`str`, *optional*):
|
494 |
+
will override the `git config user.name` for committing and
|
495 |
+
pushing files to the hub.
|
496 |
+
git_email (`str`, *optional*):
|
497 |
+
will override the `git config user.email` for committing and
|
498 |
+
pushing files to the hub.
|
499 |
+
revision (`str`, *optional*):
|
500 |
+
Revision to checkout after initializing the repository. If the
|
501 |
+
revision doesn't exist, a branch will be created with that
|
502 |
+
revision name from the default branch's current HEAD.
|
503 |
+
skip_lfs_files (`bool`, *optional*, defaults to `False`):
|
504 |
+
whether to skip git-LFS files or not.
|
505 |
+
client (`HfApi`, *optional*):
|
506 |
+
Instance of [`HfApi`] to use when calling the HF Hub API. A new
|
507 |
+
instance will be created if this is left to `None`.
|
508 |
+
|
509 |
+
Raises:
|
510 |
+
[`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
|
511 |
+
If the remote repository set in `clone_from` does not exist.
|
512 |
+
"""
|
513 |
+
if isinstance(local_dir, Path):
|
514 |
+
local_dir = str(local_dir)
|
515 |
+
os.makedirs(local_dir, exist_ok=True)
|
516 |
+
self.local_dir = os.path.join(os.getcwd(), local_dir)
|
517 |
+
self._repo_type = repo_type
|
518 |
+
self.command_queue = []
|
519 |
+
self.skip_lfs_files = skip_lfs_files
|
520 |
+
self.client = client if client is not None else HfApi()
|
521 |
+
|
522 |
+
self.check_git_versions()
|
523 |
+
|
524 |
+
if isinstance(token, str):
|
525 |
+
self.huggingface_token: Optional[str] = token
|
526 |
+
elif token is False:
|
527 |
+
self.huggingface_token = None
|
528 |
+
else:
|
529 |
+
# if `True` -> explicit use of the cached token
|
530 |
+
# if `None` -> implicit use of the cached token
|
531 |
+
self.huggingface_token = get_token()
|
532 |
+
|
533 |
+
if clone_from is not None:
|
534 |
+
self.clone_from(repo_url=clone_from)
|
535 |
+
else:
|
536 |
+
if is_git_repo(self.local_dir):
|
537 |
+
logger.debug("[Repository] is a valid git repo")
|
538 |
+
else:
|
539 |
+
raise ValueError("If not specifying `clone_from`, you need to pass Repository a valid git clone.")
|
540 |
+
|
541 |
+
if self.huggingface_token is not None and (git_email is None or git_user is None):
|
542 |
+
user = self.client.whoami(self.huggingface_token)
|
543 |
+
|
544 |
+
if git_email is None:
|
545 |
+
git_email = user.get("email")
|
546 |
+
|
547 |
+
if git_user is None:
|
548 |
+
git_user = user.get("fullname")
|
549 |
+
|
550 |
+
if git_user is not None or git_email is not None:
|
551 |
+
self.git_config_username_and_email(git_user, git_email)
|
552 |
+
|
553 |
+
self.lfs_enable_largefiles()
|
554 |
+
self.git_credential_helper_store()
|
555 |
+
|
556 |
+
if revision is not None:
|
557 |
+
self.git_checkout(revision, create_branch_ok=True)
|
558 |
+
|
559 |
+
# This ensures that all commands exit before exiting the Python runtime.
|
560 |
+
# This will ensure all pushes register on the hub, even if other errors happen in subsequent operations.
|
561 |
+
atexit.register(self.wait_for_commands)
|
562 |
+
|
563 |
+
@property
|
564 |
+
def current_branch(self) -> str:
|
565 |
+
"""
|
566 |
+
Returns the current checked out branch.
|
567 |
+
|
568 |
+
Returns:
|
569 |
+
`str`: Current checked out branch.
|
570 |
+
"""
|
571 |
+
try:
|
572 |
+
result = run_subprocess("git rev-parse --abbrev-ref HEAD", self.local_dir).stdout.strip()
|
573 |
+
except subprocess.CalledProcessError as exc:
|
574 |
+
raise EnvironmentError(exc.stderr)
|
575 |
+
|
576 |
+
return result
|
577 |
+
|
578 |
+
def check_git_versions(self):
|
579 |
+
"""
|
580 |
+
Checks that `git` and `git-lfs` can be run.
|
581 |
+
|
582 |
+
Raises:
|
583 |
+
[`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
|
584 |
+
If `git` or `git-lfs` are not installed.
|
585 |
+
"""
|
586 |
+
try:
|
587 |
+
git_version = run_subprocess("git --version", self.local_dir).stdout.strip()
|
588 |
+
except FileNotFoundError:
|
589 |
+
raise EnvironmentError("Looks like you do not have git installed, please install.")
|
590 |
+
|
591 |
+
try:
|
592 |
+
lfs_version = run_subprocess("git-lfs --version", self.local_dir).stdout.strip()
|
593 |
+
except FileNotFoundError:
|
594 |
+
raise EnvironmentError(
|
595 |
+
"Looks like you do not have git-lfs installed, please install."
|
596 |
+
" You can install from https://git-lfs.github.com/."
|
597 |
+
" Then run `git lfs install` (you only have to do this once)."
|
598 |
+
)
|
599 |
+
logger.info(git_version + "\n" + lfs_version)
|
600 |
+
|
601 |
+
@validate_hf_hub_args
|
602 |
+
def clone_from(self, repo_url: str, token: Union[bool, str, None] = None):
|
603 |
+
"""
|
604 |
+
Clone from a remote. If the folder already exists, will try to clone the
|
605 |
+
repository within it.
|
606 |
+
|
607 |
+
If this folder is a git repository with linked history, will try to
|
608 |
+
update the repository.
|
609 |
+
|
610 |
+
Args:
|
611 |
+
repo_url (`str`):
|
612 |
+
The URL from which to clone the repository
|
613 |
+
token (`Union[str, bool]`, *optional*):
|
614 |
+
Whether to use the authentication token. It can be:
|
615 |
+
- a string which is the token itself
|
616 |
+
- `False`, which would not use the authentication token
|
617 |
+
- `True`, which would fetch the authentication token from the
|
618 |
+
local folder and use it (you should be logged in for this to
|
619 |
+
work).
|
620 |
+
- `None`, which would retrieve the value of
|
621 |
+
`self.huggingface_token`.
|
622 |
+
|
623 |
+
<Tip>
|
624 |
+
|
625 |
+
Raises the following error:
|
626 |
+
|
627 |
+
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
628 |
+
if an organization token (starts with "api_org") is passed. Use must use
|
629 |
+
your own personal access token (see https://hf.co/settings/tokens).
|
630 |
+
|
631 |
+
- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
|
632 |
+
if you are trying to clone the repository in a non-empty folder, or if the
|
633 |
+
`git` operations raise errors.
|
634 |
+
|
635 |
+
</Tip>
|
636 |
+
"""
|
637 |
+
token = (
|
638 |
+
token # str -> use it
|
639 |
+
if isinstance(token, str)
|
640 |
+
else (
|
641 |
+
None # `False` -> explicit no token
|
642 |
+
if token is False
|
643 |
+
else self.huggingface_token # `None` or `True` -> use default
|
644 |
+
)
|
645 |
+
)
|
646 |
+
if token is not None and token.startswith("api_org"):
|
647 |
+
raise ValueError(
|
648 |
+
"You must use your personal access token, not an Organization token"
|
649 |
+
" (see https://hf.co/settings/tokens)."
|
650 |
+
)
|
651 |
+
|
652 |
+
hub_url = self.client.endpoint
|
653 |
+
if hub_url in repo_url or ("http" not in repo_url and len(repo_url.split("/")) <= 2):
|
654 |
+
repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(repo_url, hub_url=hub_url)
|
655 |
+
repo_id = f"{namespace}/{repo_name}" if namespace is not None else repo_name
|
656 |
+
|
657 |
+
if repo_type is not None:
|
658 |
+
self._repo_type = repo_type
|
659 |
+
|
660 |
+
repo_url = hub_url + "/"
|
661 |
+
|
662 |
+
if self._repo_type in constants.REPO_TYPES_URL_PREFIXES:
|
663 |
+
repo_url += constants.REPO_TYPES_URL_PREFIXES[self._repo_type]
|
664 |
+
|
665 |
+
if token is not None:
|
666 |
+
# Add token in git url when provided
|
667 |
+
scheme = urlparse(repo_url).scheme
|
668 |
+
repo_url = repo_url.replace(f"{scheme}://", f"{scheme}://user:{token}@")
|
669 |
+
|
670 |
+
repo_url += repo_id
|
671 |
+
|
672 |
+
# For error messages, it's cleaner to show the repo url without the token.
|
673 |
+
clean_repo_url = re.sub(r"(https?)://.*@", r"\1://", repo_url)
|
674 |
+
try:
|
675 |
+
run_subprocess("git lfs install", self.local_dir)
|
676 |
+
|
677 |
+
# checks if repository is initialized in a empty repository or in one with files
|
678 |
+
if len(os.listdir(self.local_dir)) == 0:
|
679 |
+
logger.warning(f"Cloning {clean_repo_url} into local empty directory.")
|
680 |
+
|
681 |
+
with _lfs_log_progress():
|
682 |
+
env = os.environ.copy()
|
683 |
+
|
684 |
+
if self.skip_lfs_files:
|
685 |
+
env.update({"GIT_LFS_SKIP_SMUDGE": "1"})
|
686 |
+
|
687 |
+
run_subprocess(
|
688 |
+
# 'git lfs clone' is deprecated (will display a warning in the terminal)
|
689 |
+
# but we still use it as it provides a nicer UX when downloading large
|
690 |
+
# files (shows progress).
|
691 |
+
f"{'git clone' if self.skip_lfs_files else 'git lfs clone'} {repo_url} .",
|
692 |
+
self.local_dir,
|
693 |
+
env=env,
|
694 |
+
)
|
695 |
+
else:
|
696 |
+
# Check if the folder is the root of a git repository
|
697 |
+
if not is_git_repo(self.local_dir):
|
698 |
+
raise EnvironmentError(
|
699 |
+
"Tried to clone a repository in a non-empty folder that isn't"
|
700 |
+
f" a git repository ('{self.local_dir}'). If you really want to"
|
701 |
+
f" do this, do it manually:\n cd {self.local_dir} && git init"
|
702 |
+
" && git remote add origin && git pull origin main\n or clone"
|
703 |
+
" repo to a new folder and move your existing files there"
|
704 |
+
" afterwards."
|
705 |
+
)
|
706 |
+
|
707 |
+
if is_local_clone(self.local_dir, repo_url):
|
708 |
+
logger.warning(
|
709 |
+
f"{self.local_dir} is already a clone of {clean_repo_url}."
|
710 |
+
" Make sure you pull the latest changes with"
|
711 |
+
" `repo.git_pull()`."
|
712 |
+
)
|
713 |
+
else:
|
714 |
+
output = run_subprocess("git remote get-url origin", self.local_dir, check=False)
|
715 |
+
|
716 |
+
error_msg = (
|
717 |
+
f"Tried to clone {clean_repo_url} in an unrelated git"
|
718 |
+
" repository.\nIf you believe this is an error, please add"
|
719 |
+
f" a remote with the following URL: {clean_repo_url}."
|
720 |
+
)
|
721 |
+
if output.returncode == 0:
|
722 |
+
clean_local_remote_url = re.sub(r"https://.*@", "https://", output.stdout)
|
723 |
+
error_msg += f"\nLocal path has its origin defined as: {clean_local_remote_url}"
|
724 |
+
raise EnvironmentError(error_msg)
|
725 |
+
|
726 |
+
except subprocess.CalledProcessError as exc:
|
727 |
+
raise EnvironmentError(exc.stderr)
|
728 |
+
|
729 |
+
def git_config_username_and_email(self, git_user: Optional[str] = None, git_email: Optional[str] = None):
|
730 |
+
"""
|
731 |
+
Sets git username and email (only in the current repo).
|
732 |
+
|
733 |
+
Args:
|
734 |
+
git_user (`str`, *optional*):
|
735 |
+
The username to register through `git`.
|
736 |
+
git_email (`str`, *optional*):
|
737 |
+
The email to register through `git`.
|
738 |
+
"""
|
739 |
+
try:
|
740 |
+
if git_user is not None:
|
741 |
+
run_subprocess("git config user.name".split() + [git_user], self.local_dir)
|
742 |
+
|
743 |
+
if git_email is not None:
|
744 |
+
run_subprocess(f"git config user.email {git_email}".split(), self.local_dir)
|
745 |
+
except subprocess.CalledProcessError as exc:
|
746 |
+
raise EnvironmentError(exc.stderr)
|
747 |
+
|
748 |
+
def git_credential_helper_store(self):
|
749 |
+
"""
|
750 |
+
Sets the git credential helper to `store`
|
751 |
+
"""
|
752 |
+
try:
|
753 |
+
run_subprocess("git config credential.helper store", self.local_dir)
|
754 |
+
except subprocess.CalledProcessError as exc:
|
755 |
+
raise EnvironmentError(exc.stderr)
|
756 |
+
|
757 |
+
def git_head_hash(self) -> str:
|
758 |
+
"""
|
759 |
+
Get commit sha on top of HEAD.
|
760 |
+
|
761 |
+
Returns:
|
762 |
+
`str`: The current checked out commit SHA.
|
763 |
+
"""
|
764 |
+
try:
|
765 |
+
p = run_subprocess("git rev-parse HEAD", self.local_dir)
|
766 |
+
return p.stdout.strip()
|
767 |
+
except subprocess.CalledProcessError as exc:
|
768 |
+
raise EnvironmentError(exc.stderr)
|
769 |
+
|
770 |
+
def git_remote_url(self) -> str:
|
771 |
+
"""
|
772 |
+
Get URL to origin remote.
|
773 |
+
|
774 |
+
Returns:
|
775 |
+
`str`: The URL of the `origin` remote.
|
776 |
+
"""
|
777 |
+
try:
|
778 |
+
p = run_subprocess("git config --get remote.origin.url", self.local_dir)
|
779 |
+
url = p.stdout.strip()
|
780 |
+
# Strip basic auth info.
|
781 |
+
return re.sub(r"https://.*@", "https://", url)
|
782 |
+
except subprocess.CalledProcessError as exc:
|
783 |
+
raise EnvironmentError(exc.stderr)
|
784 |
+
|
785 |
+
def git_head_commit_url(self) -> str:
|
786 |
+
"""
|
787 |
+
Get URL to last commit on HEAD. We assume it's been pushed, and the url
|
788 |
+
scheme is the same one as for GitHub or HuggingFace.
|
789 |
+
|
790 |
+
Returns:
|
791 |
+
`str`: The URL to the current checked-out commit.
|
792 |
+
"""
|
793 |
+
sha = self.git_head_hash()
|
794 |
+
url = self.git_remote_url()
|
795 |
+
if url.endswith("/"):
|
796 |
+
url = url[:-1]
|
797 |
+
return f"{url}/commit/{sha}"
|
798 |
+
|
799 |
+
def list_deleted_files(self) -> List[str]:
|
800 |
+
"""
|
801 |
+
Returns a list of the files that are deleted in the working directory or
|
802 |
+
index.
|
803 |
+
|
804 |
+
Returns:
|
805 |
+
`List[str]`: A list of files that have been deleted in the working
|
806 |
+
directory or index.
|
807 |
+
"""
|
808 |
+
try:
|
809 |
+
git_status = run_subprocess("git status -s", self.local_dir).stdout.strip()
|
810 |
+
except subprocess.CalledProcessError as exc:
|
811 |
+
raise EnvironmentError(exc.stderr)
|
812 |
+
|
813 |
+
if len(git_status) == 0:
|
814 |
+
return []
|
815 |
+
|
816 |
+
# Receives a status like the following
|
817 |
+
# D .gitignore
|
818 |
+
# D new_file.json
|
819 |
+
# AD new_file1.json
|
820 |
+
# ?? new_file2.json
|
821 |
+
# ?? new_file4.json
|
822 |
+
|
823 |
+
# Strip each line of whitespaces
|
824 |
+
modified_files_statuses = [status.strip() for status in git_status.split("\n")]
|
825 |
+
|
826 |
+
# Only keep files that are deleted using the D prefix
|
827 |
+
deleted_files_statuses = [status for status in modified_files_statuses if "D" in status.split()[0]]
|
828 |
+
|
829 |
+
# Remove the D prefix and strip to keep only the relevant filename
|
830 |
+
deleted_files = [status.split()[-1].strip() for status in deleted_files_statuses]
|
831 |
+
|
832 |
+
return deleted_files
|
833 |
+
|
834 |
+
def lfs_track(self, patterns: Union[str, List[str]], filename: bool = False):
|
835 |
+
"""
|
836 |
+
Tell git-lfs to track files according to a pattern.
|
837 |
+
|
838 |
+
Setting the `filename` argument to `True` will treat the arguments as
|
839 |
+
literal filenames, not as patterns. Any special glob characters in the
|
840 |
+
filename will be escaped when writing to the `.gitattributes` file.
|
841 |
+
|
842 |
+
Args:
|
843 |
+
patterns (`Union[str, List[str]]`):
|
844 |
+
The pattern, or list of patterns, to track with git-lfs.
|
845 |
+
filename (`bool`, *optional*, defaults to `False`):
|
846 |
+
Whether to use the patterns as literal filenames.
|
847 |
+
"""
|
848 |
+
if isinstance(patterns, str):
|
849 |
+
patterns = [patterns]
|
850 |
+
try:
|
851 |
+
for pattern in patterns:
|
852 |
+
run_subprocess(
|
853 |
+
f"git lfs track {'--filename' if filename else ''} {pattern}",
|
854 |
+
self.local_dir,
|
855 |
+
)
|
856 |
+
except subprocess.CalledProcessError as exc:
|
857 |
+
raise EnvironmentError(exc.stderr)
|
858 |
+
|
859 |
+
def lfs_untrack(self, patterns: Union[str, List[str]]):
|
860 |
+
"""
|
861 |
+
Tell git-lfs to untrack those files.
|
862 |
+
|
863 |
+
Args:
|
864 |
+
patterns (`Union[str, List[str]]`):
|
865 |
+
The pattern, or list of patterns, to untrack with git-lfs.
|
866 |
+
"""
|
867 |
+
if isinstance(patterns, str):
|
868 |
+
patterns = [patterns]
|
869 |
+
try:
|
870 |
+
for pattern in patterns:
|
871 |
+
run_subprocess("git lfs untrack".split() + [pattern], self.local_dir)
|
872 |
+
except subprocess.CalledProcessError as exc:
|
873 |
+
raise EnvironmentError(exc.stderr)
|
874 |
+
|
875 |
+
def lfs_enable_largefiles(self):
|
876 |
+
"""
|
877 |
+
HF-specific. This enables upload support of files >5GB.
|
878 |
+
"""
|
879 |
+
try:
|
880 |
+
lfs_config = "git config lfs.customtransfer.multipart"
|
881 |
+
run_subprocess(f"{lfs_config}.path huggingface-cli", self.local_dir)
|
882 |
+
run_subprocess(
|
883 |
+
f"{lfs_config}.args {LFS_MULTIPART_UPLOAD_COMMAND}",
|
884 |
+
self.local_dir,
|
885 |
+
)
|
886 |
+
except subprocess.CalledProcessError as exc:
|
887 |
+
raise EnvironmentError(exc.stderr)
|
888 |
+
|
889 |
+
def auto_track_binary_files(self, pattern: str = ".") -> List[str]:
|
890 |
+
"""
|
891 |
+
Automatically track binary files with git-lfs.
|
892 |
+
|
893 |
+
Args:
|
894 |
+
pattern (`str`, *optional*, defaults to "."):
|
895 |
+
The pattern with which to track files that are binary.
|
896 |
+
|
897 |
+
Returns:
|
898 |
+
`List[str]`: List of filenames that are now tracked due to being
|
899 |
+
binary files
|
900 |
+
"""
|
901 |
+
files_to_be_tracked_with_lfs = []
|
902 |
+
|
903 |
+
deleted_files = self.list_deleted_files()
|
904 |
+
|
905 |
+
for filename in files_to_be_staged(pattern, folder=self.local_dir):
|
906 |
+
if filename in deleted_files:
|
907 |
+
continue
|
908 |
+
|
909 |
+
path_to_file = os.path.join(os.getcwd(), self.local_dir, filename)
|
910 |
+
|
911 |
+
if not (is_tracked_with_lfs(path_to_file) or is_git_ignored(path_to_file)):
|
912 |
+
size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024)
|
913 |
+
|
914 |
+
if size_in_mb >= 10:
|
915 |
+
logger.warning(
|
916 |
+
"Parsing a large file to check if binary or not. Tracking large"
|
917 |
+
" files using `repository.auto_track_large_files` is"
|
918 |
+
" recommended so as to not load the full file in memory."
|
919 |
+
)
|
920 |
+
|
921 |
+
is_binary = is_binary_file(path_to_file)
|
922 |
+
|
923 |
+
if is_binary:
|
924 |
+
self.lfs_track(filename)
|
925 |
+
files_to_be_tracked_with_lfs.append(filename)
|
926 |
+
|
927 |
+
# Cleanup the .gitattributes if files were deleted
|
928 |
+
self.lfs_untrack(deleted_files)
|
929 |
+
|
930 |
+
return files_to_be_tracked_with_lfs
|
931 |
+
|
932 |
+
def auto_track_large_files(self, pattern: str = ".") -> List[str]:
|
933 |
+
"""
|
934 |
+
Automatically track large files (files that weigh more than 10MBs) with
|
935 |
+
git-lfs.
|
936 |
+
|
937 |
+
Args:
|
938 |
+
pattern (`str`, *optional*, defaults to "."):
|
939 |
+
The pattern with which to track files that are above 10MBs.
|
940 |
+
|
941 |
+
Returns:
|
942 |
+
`List[str]`: List of filenames that are now tracked due to their
|
943 |
+
size.
|
944 |
+
"""
|
945 |
+
files_to_be_tracked_with_lfs = []
|
946 |
+
|
947 |
+
deleted_files = self.list_deleted_files()
|
948 |
+
|
949 |
+
for filename in files_to_be_staged(pattern, folder=self.local_dir):
|
950 |
+
if filename in deleted_files:
|
951 |
+
continue
|
952 |
+
|
953 |
+
path_to_file = os.path.join(os.getcwd(), self.local_dir, filename)
|
954 |
+
size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024)
|
955 |
+
|
956 |
+
if size_in_mb >= 10 and not is_tracked_with_lfs(path_to_file) and not is_git_ignored(path_to_file):
|
957 |
+
self.lfs_track(filename)
|
958 |
+
files_to_be_tracked_with_lfs.append(filename)
|
959 |
+
|
960 |
+
# Cleanup the .gitattributes if files were deleted
|
961 |
+
self.lfs_untrack(deleted_files)
|
962 |
+
|
963 |
+
return files_to_be_tracked_with_lfs
|
964 |
+
|
965 |
+
def lfs_prune(self, recent=False):
|
966 |
+
"""
|
967 |
+
git lfs prune
|
968 |
+
|
969 |
+
Args:
|
970 |
+
recent (`bool`, *optional*, defaults to `False`):
|
971 |
+
Whether to prune files even if they were referenced by recent
|
972 |
+
commits. See the following
|
973 |
+
[link](https://github.com/git-lfs/git-lfs/blob/f3d43f0428a84fc4f1e5405b76b5a73ec2437e65/docs/man/git-lfs-prune.1.ronn#recent-files)
|
974 |
+
for more information.
|
975 |
+
"""
|
976 |
+
try:
|
977 |
+
with _lfs_log_progress():
|
978 |
+
result = run_subprocess(f"git lfs prune {'--recent' if recent else ''}", self.local_dir)
|
979 |
+
logger.info(result.stdout)
|
980 |
+
except subprocess.CalledProcessError as exc:
|
981 |
+
raise EnvironmentError(exc.stderr)
|
982 |
+
|
983 |
+
def git_pull(self, rebase: bool = False, lfs: bool = False):
|
984 |
+
"""
|
985 |
+
git pull
|
986 |
+
|
987 |
+
Args:
|
988 |
+
rebase (`bool`, *optional*, defaults to `False`):
|
989 |
+
Whether to rebase the current branch on top of the upstream
|
990 |
+
branch after fetching.
|
991 |
+
lfs (`bool`, *optional*, defaults to `False`):
|
992 |
+
Whether to fetch the LFS files too. This option only changes the
|
993 |
+
behavior when a repository was cloned without fetching the LFS
|
994 |
+
files; calling `repo.git_pull(lfs=True)` will then fetch the LFS
|
995 |
+
file from the remote repository.
|
996 |
+
"""
|
997 |
+
command = "git pull" if not lfs else "git lfs pull"
|
998 |
+
if rebase:
|
999 |
+
command += " --rebase"
|
1000 |
+
try:
|
1001 |
+
with _lfs_log_progress():
|
1002 |
+
result = run_subprocess(command, self.local_dir)
|
1003 |
+
logger.info(result.stdout)
|
1004 |
+
except subprocess.CalledProcessError as exc:
|
1005 |
+
raise EnvironmentError(exc.stderr)
|
1006 |
+
|
1007 |
+
def git_add(self, pattern: str = ".", auto_lfs_track: bool = False):
|
1008 |
+
"""
|
1009 |
+
git add
|
1010 |
+
|
1011 |
+
Setting the `auto_lfs_track` parameter to `True` will automatically
|
1012 |
+
track files that are larger than 10MB with `git-lfs`.
|
1013 |
+
|
1014 |
+
Args:
|
1015 |
+
pattern (`str`, *optional*, defaults to "."):
|
1016 |
+
The pattern with which to add files to staging.
|
1017 |
+
auto_lfs_track (`bool`, *optional*, defaults to `False`):
|
1018 |
+
Whether to automatically track large and binary files with
|
1019 |
+
git-lfs. Any file over 10MB in size, or in binary format, will
|
1020 |
+
be automatically tracked.
|
1021 |
+
"""
|
1022 |
+
if auto_lfs_track:
|
1023 |
+
# Track files according to their size (>=10MB)
|
1024 |
+
tracked_files = self.auto_track_large_files(pattern)
|
1025 |
+
|
1026 |
+
# Read the remaining files and track them if they're binary
|
1027 |
+
tracked_files.extend(self.auto_track_binary_files(pattern))
|
1028 |
+
|
1029 |
+
if tracked_files:
|
1030 |
+
logger.warning(
|
1031 |
+
f"Adding files tracked by Git LFS: {tracked_files}. This may take a"
|
1032 |
+
" bit of time if the files are large."
|
1033 |
+
)
|
1034 |
+
|
1035 |
+
try:
|
1036 |
+
result = run_subprocess("git add -v".split() + [pattern], self.local_dir)
|
1037 |
+
logger.info(f"Adding to index:\n{result.stdout}\n")
|
1038 |
+
except subprocess.CalledProcessError as exc:
|
1039 |
+
raise EnvironmentError(exc.stderr)
|
1040 |
+
|
1041 |
+
def git_commit(self, commit_message: str = "commit files to HF hub"):
|
1042 |
+
"""
|
1043 |
+
git commit
|
1044 |
+
|
1045 |
+
Args:
|
1046 |
+
commit_message (`str`, *optional*, defaults to "commit files to HF hub"):
|
1047 |
+
The message attributed to the commit.
|
1048 |
+
"""
|
1049 |
+
try:
|
1050 |
+
result = run_subprocess("git commit -v -m".split() + [commit_message], self.local_dir)
|
1051 |
+
logger.info(f"Committed:\n{result.stdout}\n")
|
1052 |
+
except subprocess.CalledProcessError as exc:
|
1053 |
+
if len(exc.stderr) > 0:
|
1054 |
+
raise EnvironmentError(exc.stderr)
|
1055 |
+
else:
|
1056 |
+
raise EnvironmentError(exc.stdout)
|
1057 |
+
|
1058 |
+
def git_push(
|
1059 |
+
self,
|
1060 |
+
upstream: Optional[str] = None,
|
1061 |
+
blocking: bool = True,
|
1062 |
+
auto_lfs_prune: bool = False,
|
1063 |
+
) -> Union[str, Tuple[str, CommandInProgress]]:
|
1064 |
+
"""
|
1065 |
+
git push
|
1066 |
+
|
1067 |
+
If used without setting `blocking`, will return url to commit on remote
|
1068 |
+
repo. If used with `blocking=True`, will return a tuple containing the
|
1069 |
+
url to commit and the command object to follow for information about the
|
1070 |
+
process.
|
1071 |
+
|
1072 |
+
Args:
|
1073 |
+
upstream (`str`, *optional*):
|
1074 |
+
Upstream to which this should push. If not specified, will push
|
1075 |
+
to the lastly defined upstream or to the default one (`origin
|
1076 |
+
main`).
|
1077 |
+
blocking (`bool`, *optional*, defaults to `True`):
|
1078 |
+
Whether the function should return only when the push has
|
1079 |
+
finished. Setting this to `False` will return an
|
1080 |
+
`CommandInProgress` object which has an `is_done` property. This
|
1081 |
+
property will be set to `True` when the push is finished.
|
1082 |
+
auto_lfs_prune (`bool`, *optional*, defaults to `False`):
|
1083 |
+
Whether to automatically prune files once they have been pushed
|
1084 |
+
to the remote.
|
1085 |
+
"""
|
1086 |
+
command = "git push"
|
1087 |
+
|
1088 |
+
if upstream:
|
1089 |
+
command += f" --set-upstream {upstream}"
|
1090 |
+
|
1091 |
+
number_of_commits = commits_to_push(self.local_dir, upstream)
|
1092 |
+
|
1093 |
+
if number_of_commits > 1:
|
1094 |
+
logger.warning(f"Several commits ({number_of_commits}) will be pushed upstream.")
|
1095 |
+
if blocking:
|
1096 |
+
logger.warning("The progress bars may be unreliable.")
|
1097 |
+
|
1098 |
+
try:
|
1099 |
+
with _lfs_log_progress():
|
1100 |
+
process = subprocess.Popen(
|
1101 |
+
command.split(),
|
1102 |
+
stderr=subprocess.PIPE,
|
1103 |
+
stdout=subprocess.PIPE,
|
1104 |
+
encoding="utf-8",
|
1105 |
+
cwd=self.local_dir,
|
1106 |
+
)
|
1107 |
+
|
1108 |
+
if blocking:
|
1109 |
+
stdout, stderr = process.communicate()
|
1110 |
+
return_code = process.poll()
|
1111 |
+
process.kill()
|
1112 |
+
|
1113 |
+
if len(stderr):
|
1114 |
+
logger.warning(stderr)
|
1115 |
+
|
1116 |
+
if return_code:
|
1117 |
+
raise subprocess.CalledProcessError(return_code, process.args, output=stdout, stderr=stderr)
|
1118 |
+
|
1119 |
+
except subprocess.CalledProcessError as exc:
|
1120 |
+
raise EnvironmentError(exc.stderr)
|
1121 |
+
|
1122 |
+
if not blocking:
|
1123 |
+
|
1124 |
+
def status_method():
|
1125 |
+
status = process.poll()
|
1126 |
+
if status is None:
|
1127 |
+
return -1
|
1128 |
+
else:
|
1129 |
+
return status
|
1130 |
+
|
1131 |
+
command_in_progress = CommandInProgress(
|
1132 |
+
"push",
|
1133 |
+
is_done_method=lambda: process.poll() is not None,
|
1134 |
+
status_method=status_method,
|
1135 |
+
process=process,
|
1136 |
+
post_method=self.lfs_prune if auto_lfs_prune else None,
|
1137 |
+
)
|
1138 |
+
|
1139 |
+
self.command_queue.append(command_in_progress)
|
1140 |
+
|
1141 |
+
return self.git_head_commit_url(), command_in_progress
|
1142 |
+
|
1143 |
+
if auto_lfs_prune:
|
1144 |
+
self.lfs_prune()
|
1145 |
+
|
1146 |
+
return self.git_head_commit_url()
|
1147 |
+
|
1148 |
+
def git_checkout(self, revision: str, create_branch_ok: bool = False):
|
1149 |
+
"""
|
1150 |
+
git checkout a given revision
|
1151 |
+
|
1152 |
+
Specifying `create_branch_ok` to `True` will create the branch to the
|
1153 |
+
given revision if that revision doesn't exist.
|
1154 |
+
|
1155 |
+
Args:
|
1156 |
+
revision (`str`):
|
1157 |
+
The revision to checkout.
|
1158 |
+
create_branch_ok (`str`, *optional*, defaults to `False`):
|
1159 |
+
Whether creating a branch named with the `revision` passed at
|
1160 |
+
the current checked-out reference if `revision` isn't an
|
1161 |
+
existing revision is allowed.
|
1162 |
+
"""
|
1163 |
+
try:
|
1164 |
+
result = run_subprocess(f"git checkout {revision}", self.local_dir)
|
1165 |
+
logger.warning(f"Checked out {revision} from {self.current_branch}.")
|
1166 |
+
logger.warning(result.stdout)
|
1167 |
+
except subprocess.CalledProcessError as exc:
|
1168 |
+
if not create_branch_ok:
|
1169 |
+
raise EnvironmentError(exc.stderr)
|
1170 |
+
else:
|
1171 |
+
try:
|
1172 |
+
result = run_subprocess(f"git checkout -b {revision}", self.local_dir)
|
1173 |
+
logger.warning(
|
1174 |
+
f"Revision `{revision}` does not exist. Created and checked out branch `{revision}`."
|
1175 |
+
)
|
1176 |
+
logger.warning(result.stdout)
|
1177 |
+
except subprocess.CalledProcessError as exc:
|
1178 |
+
raise EnvironmentError(exc.stderr)
|
1179 |
+
|
1180 |
+
def tag_exists(self, tag_name: str, remote: Optional[str] = None) -> bool:
|
1181 |
+
"""
|
1182 |
+
Check if a tag exists or not.
|
1183 |
+
|
1184 |
+
Args:
|
1185 |
+
tag_name (`str`):
|
1186 |
+
The name of the tag to check.
|
1187 |
+
remote (`str`, *optional*):
|
1188 |
+
Whether to check if the tag exists on a remote. This parameter
|
1189 |
+
should be the identifier of the remote.
|
1190 |
+
|
1191 |
+
Returns:
|
1192 |
+
`bool`: Whether the tag exists.
|
1193 |
+
"""
|
1194 |
+
if remote:
|
1195 |
+
try:
|
1196 |
+
result = run_subprocess(f"git ls-remote origin refs/tags/{tag_name}", self.local_dir).stdout.strip()
|
1197 |
+
except subprocess.CalledProcessError as exc:
|
1198 |
+
raise EnvironmentError(exc.stderr)
|
1199 |
+
|
1200 |
+
return len(result) != 0
|
1201 |
+
else:
|
1202 |
+
try:
|
1203 |
+
git_tags = run_subprocess("git tag", self.local_dir).stdout.strip()
|
1204 |
+
except subprocess.CalledProcessError as exc:
|
1205 |
+
raise EnvironmentError(exc.stderr)
|
1206 |
+
|
1207 |
+
git_tags = git_tags.split("\n")
|
1208 |
+
return tag_name in git_tags
|
1209 |
+
|
1210 |
+
def delete_tag(self, tag_name: str, remote: Optional[str] = None) -> bool:
|
1211 |
+
"""
|
1212 |
+
Delete a tag, both local and remote, if it exists
|
1213 |
+
|
1214 |
+
Args:
|
1215 |
+
tag_name (`str`):
|
1216 |
+
The tag name to delete.
|
1217 |
+
remote (`str`, *optional*):
|
1218 |
+
The remote on which to delete the tag.
|
1219 |
+
|
1220 |
+
Returns:
|
1221 |
+
`bool`: `True` if deleted, `False` if the tag didn't exist.
|
1222 |
+
If remote is not passed, will just be updated locally
|
1223 |
+
"""
|
1224 |
+
delete_locally = True
|
1225 |
+
delete_remotely = True
|
1226 |
+
|
1227 |
+
if not self.tag_exists(tag_name):
|
1228 |
+
delete_locally = False
|
1229 |
+
|
1230 |
+
if not self.tag_exists(tag_name, remote=remote):
|
1231 |
+
delete_remotely = False
|
1232 |
+
|
1233 |
+
if delete_locally:
|
1234 |
+
try:
|
1235 |
+
run_subprocess(["git", "tag", "-d", tag_name], self.local_dir).stdout.strip()
|
1236 |
+
except subprocess.CalledProcessError as exc:
|
1237 |
+
raise EnvironmentError(exc.stderr)
|
1238 |
+
|
1239 |
+
if remote and delete_remotely:
|
1240 |
+
try:
|
1241 |
+
run_subprocess(f"git push {remote} --delete {tag_name}", self.local_dir).stdout.strip()
|
1242 |
+
except subprocess.CalledProcessError as exc:
|
1243 |
+
raise EnvironmentError(exc.stderr)
|
1244 |
+
|
1245 |
+
return True
|
1246 |
+
|
1247 |
+
def add_tag(self, tag_name: str, message: Optional[str] = None, remote: Optional[str] = None):
|
1248 |
+
"""
|
1249 |
+
Add a tag at the current head and push it
|
1250 |
+
|
1251 |
+
If remote is None, will just be updated locally
|
1252 |
+
|
1253 |
+
If no message is provided, the tag will be lightweight. if a message is
|
1254 |
+
provided, the tag will be annotated.
|
1255 |
+
|
1256 |
+
Args:
|
1257 |
+
tag_name (`str`):
|
1258 |
+
The name of the tag to be added.
|
1259 |
+
message (`str`, *optional*):
|
1260 |
+
The message that accompanies the tag. The tag will turn into an
|
1261 |
+
annotated tag if a message is passed.
|
1262 |
+
remote (`str`, *optional*):
|
1263 |
+
The remote on which to add the tag.
|
1264 |
+
"""
|
1265 |
+
if message:
|
1266 |
+
tag_args = ["git", "tag", "-a", tag_name, "-m", message]
|
1267 |
+
else:
|
1268 |
+
tag_args = ["git", "tag", tag_name]
|
1269 |
+
|
1270 |
+
try:
|
1271 |
+
run_subprocess(tag_args, self.local_dir).stdout.strip()
|
1272 |
+
except subprocess.CalledProcessError as exc:
|
1273 |
+
raise EnvironmentError(exc.stderr)
|
1274 |
+
|
1275 |
+
if remote:
|
1276 |
+
try:
|
1277 |
+
run_subprocess(f"git push {remote} {tag_name}", self.local_dir).stdout.strip()
|
1278 |
+
except subprocess.CalledProcessError as exc:
|
1279 |
+
raise EnvironmentError(exc.stderr)
|
1280 |
+
|
1281 |
+
def is_repo_clean(self) -> bool:
|
1282 |
+
"""
|
1283 |
+
Return whether or not the git status is clean or not
|
1284 |
+
|
1285 |
+
Returns:
|
1286 |
+
`bool`: `True` if the git status is clean, `False` otherwise.
|
1287 |
+
"""
|
1288 |
+
try:
|
1289 |
+
git_status = run_subprocess("git status --porcelain", self.local_dir).stdout.strip()
|
1290 |
+
except subprocess.CalledProcessError as exc:
|
1291 |
+
raise EnvironmentError(exc.stderr)
|
1292 |
+
|
1293 |
+
return len(git_status) == 0
|
1294 |
+
|
1295 |
+
def push_to_hub(
|
1296 |
+
self,
|
1297 |
+
commit_message: str = "commit files to HF hub",
|
1298 |
+
blocking: bool = True,
|
1299 |
+
clean_ok: bool = True,
|
1300 |
+
auto_lfs_prune: bool = False,
|
1301 |
+
) -> Union[None, str, Tuple[str, CommandInProgress]]:
|
1302 |
+
"""
|
1303 |
+
Helper to add, commit, and push files to remote repository on the
|
1304 |
+
HuggingFace Hub. Will automatically track large files (>10MB).
|
1305 |
+
|
1306 |
+
Args:
|
1307 |
+
commit_message (`str`):
|
1308 |
+
Message to use for the commit.
|
1309 |
+
blocking (`bool`, *optional*, defaults to `True`):
|
1310 |
+
Whether the function should return only when the `git push` has
|
1311 |
+
finished.
|
1312 |
+
clean_ok (`bool`, *optional*, defaults to `True`):
|
1313 |
+
If True, this function will return None if the repo is
|
1314 |
+
untouched. Default behavior is to fail because the git command
|
1315 |
+
fails.
|
1316 |
+
auto_lfs_prune (`bool`, *optional*, defaults to `False`):
|
1317 |
+
Whether to automatically prune files once they have been pushed
|
1318 |
+
to the remote.
|
1319 |
+
"""
|
1320 |
+
if clean_ok and self.is_repo_clean():
|
1321 |
+
logger.info("Repo currently clean. Ignoring push_to_hub")
|
1322 |
+
return None
|
1323 |
+
self.git_add(auto_lfs_track=True)
|
1324 |
+
self.git_commit(commit_message)
|
1325 |
+
return self.git_push(
|
1326 |
+
upstream=f"origin {self.current_branch}",
|
1327 |
+
blocking=blocking,
|
1328 |
+
auto_lfs_prune=auto_lfs_prune,
|
1329 |
+
)
|
1330 |
+
|
1331 |
+
@contextmanager
|
1332 |
+
def commit(
|
1333 |
+
self,
|
1334 |
+
commit_message: str,
|
1335 |
+
branch: Optional[str] = None,
|
1336 |
+
track_large_files: bool = True,
|
1337 |
+
blocking: bool = True,
|
1338 |
+
auto_lfs_prune: bool = False,
|
1339 |
+
):
|
1340 |
+
"""
|
1341 |
+
Context manager utility to handle committing to a repository. This
|
1342 |
+
automatically tracks large files (>10Mb) with git-lfs. Set the
|
1343 |
+
`track_large_files` argument to `False` if you wish to ignore that
|
1344 |
+
behavior.
|
1345 |
+
|
1346 |
+
Args:
|
1347 |
+
commit_message (`str`):
|
1348 |
+
Message to use for the commit.
|
1349 |
+
branch (`str`, *optional*):
|
1350 |
+
The branch on which the commit will appear. This branch will be
|
1351 |
+
checked-out before any operation.
|
1352 |
+
track_large_files (`bool`, *optional*, defaults to `True`):
|
1353 |
+
Whether to automatically track large files or not. Will do so by
|
1354 |
+
default.
|
1355 |
+
blocking (`bool`, *optional*, defaults to `True`):
|
1356 |
+
Whether the function should return only when the `git push` has
|
1357 |
+
finished.
|
1358 |
+
auto_lfs_prune (`bool`, defaults to `True`):
|
1359 |
+
Whether to automatically prune files once they have been pushed
|
1360 |
+
to the remote.
|
1361 |
+
|
1362 |
+
Examples:
|
1363 |
+
|
1364 |
+
```python
|
1365 |
+
>>> with Repository(
|
1366 |
+
... "text-files",
|
1367 |
+
... clone_from="<user>/text-files",
|
1368 |
+
... token=True,
|
1369 |
+
>>> ).commit("My first file :)"):
|
1370 |
+
... with open("file.txt", "w+") as f:
|
1371 |
+
... f.write(json.dumps({"hey": 8}))
|
1372 |
+
|
1373 |
+
>>> import torch
|
1374 |
+
|
1375 |
+
>>> model = torch.nn.Transformer()
|
1376 |
+
>>> with Repository(
|
1377 |
+
... "torch-model",
|
1378 |
+
... clone_from="<user>/torch-model",
|
1379 |
+
... token=True,
|
1380 |
+
>>> ).commit("My cool model :)"):
|
1381 |
+
... torch.save(model.state_dict(), "model.pt")
|
1382 |
+
```
|
1383 |
+
|
1384 |
+
"""
|
1385 |
+
|
1386 |
+
files_to_stage = files_to_be_staged(".", folder=self.local_dir)
|
1387 |
+
|
1388 |
+
if len(files_to_stage):
|
1389 |
+
files_in_msg = str(files_to_stage[:5])[:-1] + ", ...]" if len(files_to_stage) > 5 else str(files_to_stage)
|
1390 |
+
logger.error(
|
1391 |
+
"There exists some updated files in the local repository that are not"
|
1392 |
+
f" committed: {files_in_msg}. This may lead to errors if checking out"
|
1393 |
+
" a branch. These files and their modifications will be added to the"
|
1394 |
+
" current commit."
|
1395 |
+
)
|
1396 |
+
|
1397 |
+
if branch is not None:
|
1398 |
+
self.git_checkout(branch, create_branch_ok=True)
|
1399 |
+
|
1400 |
+
if is_tracked_upstream(self.local_dir):
|
1401 |
+
logger.warning("Pulling changes ...")
|
1402 |
+
self.git_pull(rebase=True)
|
1403 |
+
else:
|
1404 |
+
logger.warning(f"The current branch has no upstream branch. Will push to 'origin {self.current_branch}'")
|
1405 |
+
|
1406 |
+
current_working_directory = os.getcwd()
|
1407 |
+
os.chdir(os.path.join(current_working_directory, self.local_dir))
|
1408 |
+
|
1409 |
+
try:
|
1410 |
+
yield self
|
1411 |
+
finally:
|
1412 |
+
self.git_add(auto_lfs_track=track_large_files)
|
1413 |
+
|
1414 |
+
try:
|
1415 |
+
self.git_commit(commit_message)
|
1416 |
+
except OSError as e:
|
1417 |
+
# If no changes are detected, there is nothing to commit.
|
1418 |
+
if "nothing to commit" not in str(e):
|
1419 |
+
raise e
|
1420 |
+
|
1421 |
+
try:
|
1422 |
+
self.git_push(
|
1423 |
+
upstream=f"origin {self.current_branch}",
|
1424 |
+
blocking=blocking,
|
1425 |
+
auto_lfs_prune=auto_lfs_prune,
|
1426 |
+
)
|
1427 |
+
except OSError as e:
|
1428 |
+
# If no changes are detected, there is nothing to commit.
|
1429 |
+
if "could not read Username" in str(e):
|
1430 |
+
raise OSError("Couldn't authenticate user for push. Did you set `token` to `True`?") from e
|
1431 |
+
else:
|
1432 |
+
raise e
|
1433 |
+
|
1434 |
+
os.chdir(current_working_directory)
|
1435 |
+
|
1436 |
+
def repocard_metadata_load(self) -> Optional[Dict]:
|
1437 |
+
filepath = os.path.join(self.local_dir, constants.REPOCARD_NAME)
|
1438 |
+
if os.path.isfile(filepath):
|
1439 |
+
return metadata_load(filepath)
|
1440 |
+
return None
|
1441 |
+
|
1442 |
+
def repocard_metadata_save(self, data: Dict) -> None:
|
1443 |
+
return metadata_save(os.path.join(self.local_dir, constants.REPOCARD_NAME), data)
|
1444 |
+
|
1445 |
+
@property
|
1446 |
+
def commands_failed(self):
|
1447 |
+
"""
|
1448 |
+
Returns the asynchronous commands that failed.
|
1449 |
+
"""
|
1450 |
+
return [c for c in self.command_queue if c.status > 0]
|
1451 |
+
|
1452 |
+
@property
|
1453 |
+
def commands_in_progress(self):
|
1454 |
+
"""
|
1455 |
+
Returns the asynchronous commands that are currently in progress.
|
1456 |
+
"""
|
1457 |
+
return [c for c in self.command_queue if not c.is_done]
|
1458 |
+
|
1459 |
+
def wait_for_commands(self):
|
1460 |
+
"""
|
1461 |
+
Blocking method: blocks all subsequent execution until all commands have
|
1462 |
+
been processed.
|
1463 |
+
"""
|
1464 |
+
index = 0
|
1465 |
+
for command_failed in self.commands_failed:
|
1466 |
+
logger.error(f"The {command_failed.title} command with PID {command_failed._process.pid} failed.")
|
1467 |
+
logger.error(command_failed.stderr)
|
1468 |
+
|
1469 |
+
while self.commands_in_progress:
|
1470 |
+
if index % 10 == 0:
|
1471 |
+
logger.warning(
|
1472 |
+
f"Waiting for the following commands to finish before shutting down: {self.commands_in_progress}."
|
1473 |
+
)
|
1474 |
+
|
1475 |
+
index += 1
|
1476 |
+
|
1477 |
+
time.sleep(1)
|
meow/lib/python3.13/site-packages/idna-3.10.dist-info/INSTALLER
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
pip
|
meow/lib/python3.13/site-packages/idna-3.10.dist-info/LICENSE.md
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BSD 3-Clause License
|
2 |
+
|
3 |
+
Copyright (c) 2013-2024, Kim Davies and contributors.
|
4 |
+
All rights reserved.
|
5 |
+
|
6 |
+
Redistribution and use in source and binary forms, with or without
|
7 |
+
modification, are permitted provided that the following conditions are
|
8 |
+
met:
|
9 |
+
|
10 |
+
1. Redistributions of source code must retain the above copyright
|
11 |
+
notice, this list of conditions and the following disclaimer.
|
12 |
+
|
13 |
+
2. Redistributions in binary form must reproduce the above copyright
|
14 |
+
notice, this list of conditions and the following disclaimer in the
|
15 |
+
documentation and/or other materials provided with the distribution.
|
16 |
+
|
17 |
+
3. Neither the name of the copyright holder nor the names of its
|
18 |
+
contributors may be used to endorse or promote products derived from
|
19 |
+
this software without specific prior written permission.
|
20 |
+
|
21 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
22 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
23 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
24 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
25 |
+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
26 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
27 |
+
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
28 |
+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
29 |
+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
30 |
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
31 |
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
meow/lib/python3.13/site-packages/idna-3.10.dist-info/METADATA
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: idna
|
3 |
+
Version: 3.10
|
4 |
+
Summary: Internationalized Domain Names in Applications (IDNA)
|
5 |
+
Author-email: Kim Davies <[email protected]>
|
6 |
+
Requires-Python: >=3.6
|
7 |
+
Description-Content-Type: text/x-rst
|
8 |
+
Classifier: Development Status :: 5 - Production/Stable
|
9 |
+
Classifier: Intended Audience :: Developers
|
10 |
+
Classifier: Intended Audience :: System Administrators
|
11 |
+
Classifier: License :: OSI Approved :: BSD License
|
12 |
+
Classifier: Operating System :: OS Independent
|
13 |
+
Classifier: Programming Language :: Python
|
14 |
+
Classifier: Programming Language :: Python :: 3
|
15 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
16 |
+
Classifier: Programming Language :: Python :: 3.6
|
17 |
+
Classifier: Programming Language :: Python :: 3.7
|
18 |
+
Classifier: Programming Language :: Python :: 3.8
|
19 |
+
Classifier: Programming Language :: Python :: 3.9
|
20 |
+
Classifier: Programming Language :: Python :: 3.10
|
21 |
+
Classifier: Programming Language :: Python :: 3.11
|
22 |
+
Classifier: Programming Language :: Python :: 3.12
|
23 |
+
Classifier: Programming Language :: Python :: 3.13
|
24 |
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
25 |
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
26 |
+
Classifier: Topic :: Internet :: Name Service (DNS)
|
27 |
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
28 |
+
Classifier: Topic :: Utilities
|
29 |
+
Requires-Dist: ruff >= 0.6.2 ; extra == "all"
|
30 |
+
Requires-Dist: mypy >= 1.11.2 ; extra == "all"
|
31 |
+
Requires-Dist: pytest >= 8.3.2 ; extra == "all"
|
32 |
+
Requires-Dist: flake8 >= 7.1.1 ; extra == "all"
|
33 |
+
Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst
|
34 |
+
Project-URL: Issue tracker, https://github.com/kjd/idna/issues
|
35 |
+
Project-URL: Source, https://github.com/kjd/idna
|
36 |
+
Provides-Extra: all
|
37 |
+
|
38 |
+
Internationalized Domain Names in Applications (IDNA)
|
39 |
+
=====================================================
|
40 |
+
|
41 |
+
Support for the Internationalized Domain Names in
|
42 |
+
Applications (IDNA) protocol as specified in `RFC 5891
|
43 |
+
<https://tools.ietf.org/html/rfc5891>`_. This is the latest version of
|
44 |
+
the protocol and is sometimes referred to as “IDNA 2008”.
|
45 |
+
|
46 |
+
This library also provides support for Unicode Technical
|
47 |
+
Standard 46, `Unicode IDNA Compatibility Processing
|
48 |
+
<https://unicode.org/reports/tr46/>`_.
|
49 |
+
|
50 |
+
This acts as a suitable replacement for the “encodings.idna”
|
51 |
+
module that comes with the Python standard library, but which
|
52 |
+
only supports the older superseded IDNA specification (`RFC 3490
|
53 |
+
<https://tools.ietf.org/html/rfc3490>`_).
|
54 |
+
|
55 |
+
Basic functions are simply executed:
|
56 |
+
|
57 |
+
.. code-block:: pycon
|
58 |
+
|
59 |
+
>>> import idna
|
60 |
+
>>> idna.encode('ドメイン.テスト')
|
61 |
+
b'xn--eckwd4c7c.xn--zckzah'
|
62 |
+
>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
|
63 |
+
ドメイン.テスト
|
64 |
+
|
65 |
+
|
66 |
+
Installation
|
67 |
+
------------
|
68 |
+
|
69 |
+
This package is available for installation from PyPI:
|
70 |
+
|
71 |
+
.. code-block:: bash
|
72 |
+
|
73 |
+
$ python3 -m pip install idna
|
74 |
+
|
75 |
+
|
76 |
+
Usage
|
77 |
+
-----
|
78 |
+
|
79 |
+
For typical usage, the ``encode`` and ``decode`` functions will take a
|
80 |
+
domain name argument and perform a conversion to A-labels or U-labels
|
81 |
+
respectively.
|
82 |
+
|
83 |
+
.. code-block:: pycon
|
84 |
+
|
85 |
+
>>> import idna
|
86 |
+
>>> idna.encode('ドメイン.テスト')
|
87 |
+
b'xn--eckwd4c7c.xn--zckzah'
|
88 |
+
>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
|
89 |
+
ドメイン.テスト
|
90 |
+
|
91 |
+
You may use the codec encoding and decoding methods using the
|
92 |
+
``idna.codec`` module:
|
93 |
+
|
94 |
+
.. code-block:: pycon
|
95 |
+
|
96 |
+
>>> import idna.codec
|
97 |
+
>>> print('домен.испытание'.encode('idna2008'))
|
98 |
+
b'xn--d1acufc.xn--80akhbyknj4f'
|
99 |
+
>>> print(b'xn--d1acufc.xn--80akhbyknj4f'.decode('idna2008'))
|
100 |
+
домен.испытание
|
101 |
+
|
102 |
+
Conversions can be applied at a per-label basis using the ``ulabel`` or
|
103 |
+
``alabel`` functions if necessary:
|
104 |
+
|
105 |
+
.. code-block:: pycon
|
106 |
+
|
107 |
+
>>> idna.alabel('测试')
|
108 |
+
b'xn--0zwm56d'
|
109 |
+
|
110 |
+
Compatibility Mapping (UTS #46)
|
111 |
+
+++++++++++++++++++++++++++++++
|
112 |
+
|
113 |
+
As described in `RFC 5895 <https://tools.ietf.org/html/rfc5895>`_, the
|
114 |
+
IDNA specification does not normalize input from different potential
|
115 |
+
ways a user may input a domain name. This functionality, known as
|
116 |
+
a “mapping”, is considered by the specification to be a local
|
117 |
+
user-interface issue distinct from IDNA conversion functionality.
|
118 |
+
|
119 |
+
This library provides one such mapping that was developed by the
|
120 |
+
Unicode Consortium. Known as `Unicode IDNA Compatibility Processing
|
121 |
+
<https://unicode.org/reports/tr46/>`_, it provides for both a regular
|
122 |
+
mapping for typical applications, as well as a transitional mapping to
|
123 |
+
help migrate from older IDNA 2003 applications. Strings are
|
124 |
+
preprocessed according to Section 4.4 “Preprocessing for IDNA2008”
|
125 |
+
prior to the IDNA operations.
|
126 |
+
|
127 |
+
For example, “Königsgäßchen” is not a permissible label as *LATIN
|
128 |
+
CAPITAL LETTER K* is not allowed (nor are capital letters in general).
|
129 |
+
UTS 46 will convert this into lower case prior to applying the IDNA
|
130 |
+
conversion.
|
131 |
+
|
132 |
+
.. code-block:: pycon
|
133 |
+
|
134 |
+
>>> import idna
|
135 |
+
>>> idna.encode('Königsgäßchen')
|
136 |
+
...
|
137 |
+
idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
|
138 |
+
>>> idna.encode('Königsgäßchen', uts46=True)
|
139 |
+
b'xn--knigsgchen-b4a3dun'
|
140 |
+
>>> print(idna.decode('xn--knigsgchen-b4a3dun'))
|
141 |
+
königsgäßchen
|
142 |
+
|
143 |
+
Transitional processing provides conversions to help transition from
|
144 |
+
the older 2003 standard to the current standard. For example, in the
|
145 |
+
original IDNA specification, the *LATIN SMALL LETTER SHARP S* (ß) was
|
146 |
+
converted into two *LATIN SMALL LETTER S* (ss), whereas in the current
|
147 |
+
IDNA specification this conversion is not performed.
|
148 |
+
|
149 |
+
.. code-block:: pycon
|
150 |
+
|
151 |
+
>>> idna.encode('Königsgäßchen', uts46=True, transitional=True)
|
152 |
+
'xn--knigsgsschen-lcb0w'
|
153 |
+
|
154 |
+
Implementers should use transitional processing with caution, only in
|
155 |
+
rare cases where conversion from legacy labels to current labels must be
|
156 |
+
performed (i.e. IDNA implementations that pre-date 2008). For typical
|
157 |
+
applications that just need to convert labels, transitional processing
|
158 |
+
is unlikely to be beneficial and could produce unexpected incompatible
|
159 |
+
results.
|
160 |
+
|
161 |
+
``encodings.idna`` Compatibility
|
162 |
+
++++++++++++++++++++++++++++++++
|
163 |
+
|
164 |
+
Function calls from the Python built-in ``encodings.idna`` module are
|
165 |
+
mapped to their IDNA 2008 equivalents using the ``idna.compat`` module.
|
166 |
+
Simply substitute the ``import`` clause in your code to refer to the new
|
167 |
+
module name.
|
168 |
+
|
169 |
+
Exceptions
|
170 |
+
----------
|
171 |
+
|
172 |
+
All errors raised during the conversion following the specification
|
173 |
+
should raise an exception derived from the ``idna.IDNAError`` base
|
174 |
+
class.
|
175 |
+
|
176 |
+
More specific exceptions that may be generated as ``idna.IDNABidiError``
|
177 |
+
when the error reflects an illegal combination of left-to-right and
|
178 |
+
right-to-left characters in a label; ``idna.InvalidCodepoint`` when
|
179 |
+
a specific codepoint is an illegal character in an IDN label (i.e.
|
180 |
+
INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is
|
181 |
+
illegal based on its positional context (i.e. it is CONTEXTO or CONTEXTJ
|
182 |
+
but the contextual requirements are not satisfied.)
|
183 |
+
|
184 |
+
Building and Diagnostics
|
185 |
+
------------------------
|
186 |
+
|
187 |
+
The IDNA and UTS 46 functionality relies upon pre-calculated lookup
|
188 |
+
tables for performance. These tables are derived from computing against
|
189 |
+
eligibility criteria in the respective standards. These tables are
|
190 |
+
computed using the command-line script ``tools/idna-data``.
|
191 |
+
|
192 |
+
This tool will fetch relevant codepoint data from the Unicode repository
|
193 |
+
and perform the required calculations to identify eligibility. There are
|
194 |
+
three main modes:
|
195 |
+
|
196 |
+
* ``idna-data make-libdata``. Generates ``idnadata.py`` and
|
197 |
+
``uts46data.py``, the pre-calculated lookup tables used for IDNA and
|
198 |
+
UTS 46 conversions. Implementers who wish to track this library against
|
199 |
+
a different Unicode version may use this tool to manually generate a
|
200 |
+
different version of the ``idnadata.py`` and ``uts46data.py`` files.
|
201 |
+
|
202 |
+
* ``idna-data make-table``. Generate a table of the IDNA disposition
|
203 |
+
(e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix
|
204 |
+
B.1 of RFC 5892 and the pre-computed tables published by `IANA
|
205 |
+
<https://www.iana.org/>`_.
|
206 |
+
|
207 |
+
* ``idna-data U+0061``. Prints debugging output on the various
|
208 |
+
properties associated with an individual Unicode codepoint (in this
|
209 |
+
case, U+0061), that are used to assess the IDNA and UTS 46 status of a
|
210 |
+
codepoint. This is helpful in debugging or analysis.
|
211 |
+
|
212 |
+
The tool accepts a number of arguments, described using ``idna-data
|
213 |
+
-h``. Most notably, the ``--version`` argument allows the specification
|
214 |
+
of the version of Unicode to be used in computing the table data. For
|
215 |
+
example, ``idna-data --version 9.0.0 make-libdata`` will generate
|
216 |
+
library data against Unicode 9.0.0.
|
217 |
+
|
218 |
+
|
219 |
+
Additional Notes
|
220 |
+
----------------
|
221 |
+
|
222 |
+
* **Packages**. The latest tagged release version is published in the
|
223 |
+
`Python Package Index <https://pypi.org/project/idna/>`_.
|
224 |
+
|
225 |
+
* **Version support**. This library supports Python 3.6 and higher.
|
226 |
+
As this library serves as a low-level toolkit for a variety of
|
227 |
+
applications, many of which strive for broad compatibility with older
|
228 |
+
Python versions, there is no rush to remove older interpreter support.
|
229 |
+
Removing support for older versions should be well justified in that the
|
230 |
+
maintenance burden has become too high.
|
231 |
+
|
232 |
+
* **Python 2**. Python 2 is supported by version 2.x of this library.
|
233 |
+
Use "idna<3" in your requirements file if you need this library for
|
234 |
+
a Python 2 application. Be advised that these versions are no longer
|
235 |
+
actively developed.
|
236 |
+
|
237 |
+
* **Testing**. The library has a test suite based on each rule of the
|
238 |
+
IDNA specification, as well as tests that are provided as part of the
|
239 |
+
Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing
|
240 |
+
<https://unicode.org/reports/tr46/>`_.
|
241 |
+
|
242 |
+
* **Emoji**. It is an occasional request to support emoji domains in
|
243 |
+
this library. Encoding of symbols like emoji is expressly prohibited by
|
244 |
+
the technical standard IDNA 2008 and emoji domains are broadly phased
|
245 |
+
out across the domain industry due to associated security risks. For
|
246 |
+
now, applications that need to support these non-compliant labels
|
247 |
+
may wish to consider trying the encode/decode operation in this library
|
248 |
+
first, and then falling back to using `encodings.idna`. See `the Github
|
249 |
+
project <https://github.com/kjd/idna/issues/18>`_ for more discussion.
|
250 |
+
|
meow/lib/python3.13/site-packages/idna-3.10.dist-info/RECORD
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
idna-3.10.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
2 |
+
idna-3.10.dist-info/LICENSE.md,sha256=pZ8LDvNjWHQQmkRhykT_enDVBpboFHZ7-vch1Mmw2w8,1541
|
3 |
+
idna-3.10.dist-info/METADATA,sha256=URR5ZyDfQ1PCEGhkYoojqfi2Ra0tau2--lhwG4XSfjI,10158
|
4 |
+
idna-3.10.dist-info/RECORD,,
|
5 |
+
idna-3.10.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
6 |
+
idna/__init__.py,sha256=MPqNDLZbXqGaNdXxAFhiqFPKEQXju2jNQhCey6-5eJM,868
|
7 |
+
idna/__pycache__/__init__.cpython-313.pyc,,
|
8 |
+
idna/__pycache__/codec.cpython-313.pyc,,
|
9 |
+
idna/__pycache__/compat.cpython-313.pyc,,
|
10 |
+
idna/__pycache__/core.cpython-313.pyc,,
|
11 |
+
idna/__pycache__/idnadata.cpython-313.pyc,,
|
12 |
+
idna/__pycache__/intranges.cpython-313.pyc,,
|
13 |
+
idna/__pycache__/package_data.cpython-313.pyc,,
|
14 |
+
idna/__pycache__/uts46data.cpython-313.pyc,,
|
15 |
+
idna/codec.py,sha256=PEew3ItwzjW4hymbasnty2N2OXvNcgHB-JjrBuxHPYY,3422
|
16 |
+
idna/compat.py,sha256=RzLy6QQCdl9784aFhb2EX9EKGCJjg0P3PilGdeXXcx8,316
|
17 |
+
idna/core.py,sha256=YJYyAMnwiQEPjVC4-Fqu_p4CJ6yKKuDGmppBNQNQpFs,13239
|
18 |
+
idna/idnadata.py,sha256=W30GcIGvtOWYwAjZj4ZjuouUutC6ffgNuyjJy7fZ-lo,78306
|
19 |
+
idna/intranges.py,sha256=amUtkdhYcQG8Zr-CoMM_kVRacxkivC1WgxN1b63KKdU,1898
|
20 |
+
idna/package_data.py,sha256=q59S3OXsc5VI8j6vSD0sGBMyk6zZ4vWFREE88yCJYKs,21
|
21 |
+
idna/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22 |
+
idna/uts46data.py,sha256=rt90K9J40gUSwppDPCrhjgi5AA6pWM65dEGRSf6rIhM,239289
|
meow/lib/python3.13/site-packages/idna-3.10.dist-info/WHEEL
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Wheel-Version: 1.0
|
2 |
+
Generator: flit 3.9.0
|
3 |
+
Root-Is-Purelib: true
|
4 |
+
Tag: py3-none-any
|
meow/lib/python3.13/site-packages/packaging/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
3 |
+
# for complete details.
|
4 |
+
|
5 |
+
__title__ = "packaging"
|
6 |
+
__summary__ = "Core utilities for Python packages"
|
7 |
+
__uri__ = "https://github.com/pypa/packaging"
|
8 |
+
|
9 |
+
__version__ = "24.2"
|
10 |
+
|
11 |
+
__author__ = "Donald Stufft and individual contributors"
|
12 |
+
__email__ = "[email protected]"
|
13 |
+
|
14 |
+
__license__ = "BSD-2-Clause or Apache-2.0"
|
15 |
+
__copyright__ = f"2014 {__author__}"
|
meow/lib/python3.13/site-packages/packaging/_elffile.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
ELF file parser.
|
3 |
+
|
4 |
+
This provides a class ``ELFFile`` that parses an ELF executable in a similar
|
5 |
+
interface to ``ZipFile``. Only the read interface is implemented.
|
6 |
+
|
7 |
+
Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
|
8 |
+
ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
|
9 |
+
"""
|
10 |
+
|
11 |
+
from __future__ import annotations
|
12 |
+
|
13 |
+
import enum
|
14 |
+
import os
|
15 |
+
import struct
|
16 |
+
from typing import IO
|
17 |
+
|
18 |
+
|
19 |
+
class ELFInvalid(ValueError):
|
20 |
+
pass
|
21 |
+
|
22 |
+
|
23 |
+
class EIClass(enum.IntEnum):
|
24 |
+
C32 = 1
|
25 |
+
C64 = 2
|
26 |
+
|
27 |
+
|
28 |
+
class EIData(enum.IntEnum):
|
29 |
+
Lsb = 1
|
30 |
+
Msb = 2
|
31 |
+
|
32 |
+
|
33 |
+
class EMachine(enum.IntEnum):
|
34 |
+
I386 = 3
|
35 |
+
S390 = 22
|
36 |
+
Arm = 40
|
37 |
+
X8664 = 62
|
38 |
+
AArc64 = 183
|
39 |
+
|
40 |
+
|
41 |
+
class ELFFile:
|
42 |
+
"""
|
43 |
+
Representation of an ELF executable.
|
44 |
+
"""
|
45 |
+
|
46 |
+
def __init__(self, f: IO[bytes]) -> None:
|
47 |
+
self._f = f
|
48 |
+
|
49 |
+
try:
|
50 |
+
ident = self._read("16B")
|
51 |
+
except struct.error as e:
|
52 |
+
raise ELFInvalid("unable to parse identification") from e
|
53 |
+
magic = bytes(ident[:4])
|
54 |
+
if magic != b"\x7fELF":
|
55 |
+
raise ELFInvalid(f"invalid magic: {magic!r}")
|
56 |
+
|
57 |
+
self.capacity = ident[4] # Format for program header (bitness).
|
58 |
+
self.encoding = ident[5] # Data structure encoding (endianness).
|
59 |
+
|
60 |
+
try:
|
61 |
+
# e_fmt: Format for program header.
|
62 |
+
# p_fmt: Format for section header.
|
63 |
+
# p_idx: Indexes to find p_type, p_offset, and p_filesz.
|
64 |
+
e_fmt, self._p_fmt, self._p_idx = {
|
65 |
+
(1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
|
66 |
+
(1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
|
67 |
+
(2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
|
68 |
+
(2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
|
69 |
+
}[(self.capacity, self.encoding)]
|
70 |
+
except KeyError as e:
|
71 |
+
raise ELFInvalid(
|
72 |
+
f"unrecognized capacity ({self.capacity}) or "
|
73 |
+
f"encoding ({self.encoding})"
|
74 |
+
) from e
|
75 |
+
|
76 |
+
try:
|
77 |
+
(
|
78 |
+
_,
|
79 |
+
self.machine, # Architecture type.
|
80 |
+
_,
|
81 |
+
_,
|
82 |
+
self._e_phoff, # Offset of program header.
|
83 |
+
_,
|
84 |
+
self.flags, # Processor-specific flags.
|
85 |
+
_,
|
86 |
+
self._e_phentsize, # Size of section.
|
87 |
+
self._e_phnum, # Number of sections.
|
88 |
+
) = self._read(e_fmt)
|
89 |
+
except struct.error as e:
|
90 |
+
raise ELFInvalid("unable to parse machine and section information") from e
|
91 |
+
|
92 |
+
def _read(self, fmt: str) -> tuple[int, ...]:
|
93 |
+
return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
|
94 |
+
|
95 |
+
@property
|
96 |
+
def interpreter(self) -> str | None:
|
97 |
+
"""
|
98 |
+
The path recorded in the ``PT_INTERP`` section header.
|
99 |
+
"""
|
100 |
+
for index in range(self._e_phnum):
|
101 |
+
self._f.seek(self._e_phoff + self._e_phentsize * index)
|
102 |
+
try:
|
103 |
+
data = self._read(self._p_fmt)
|
104 |
+
except struct.error:
|
105 |
+
continue
|
106 |
+
if data[self._p_idx[0]] != 3: # Not PT_INTERP.
|
107 |
+
continue
|
108 |
+
self._f.seek(data[self._p_idx[1]])
|
109 |
+
return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
|
110 |
+
return None
|
meow/lib/python3.13/site-packages/packaging/_manylinux.py
ADDED
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import collections
|
4 |
+
import contextlib
|
5 |
+
import functools
|
6 |
+
import os
|
7 |
+
import re
|
8 |
+
import sys
|
9 |
+
import warnings
|
10 |
+
from typing import Generator, Iterator, NamedTuple, Sequence
|
11 |
+
|
12 |
+
from ._elffile import EIClass, EIData, ELFFile, EMachine
|
13 |
+
|
14 |
+
EF_ARM_ABIMASK = 0xFF000000
|
15 |
+
EF_ARM_ABI_VER5 = 0x05000000
|
16 |
+
EF_ARM_ABI_FLOAT_HARD = 0x00000400
|
17 |
+
|
18 |
+
|
19 |
+
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
|
20 |
+
# as the type for `path` until then.
|
21 |
+
@contextlib.contextmanager
|
22 |
+
def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]:
|
23 |
+
try:
|
24 |
+
with open(path, "rb") as f:
|
25 |
+
yield ELFFile(f)
|
26 |
+
except (OSError, TypeError, ValueError):
|
27 |
+
yield None
|
28 |
+
|
29 |
+
|
30 |
+
def _is_linux_armhf(executable: str) -> bool:
|
31 |
+
# hard-float ABI can be detected from the ELF header of the running
|
32 |
+
# process
|
33 |
+
# https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
|
34 |
+
with _parse_elf(executable) as f:
|
35 |
+
return (
|
36 |
+
f is not None
|
37 |
+
and f.capacity == EIClass.C32
|
38 |
+
and f.encoding == EIData.Lsb
|
39 |
+
and f.machine == EMachine.Arm
|
40 |
+
and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
|
41 |
+
and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
|
42 |
+
)
|
43 |
+
|
44 |
+
|
45 |
+
def _is_linux_i686(executable: str) -> bool:
|
46 |
+
with _parse_elf(executable) as f:
|
47 |
+
return (
|
48 |
+
f is not None
|
49 |
+
and f.capacity == EIClass.C32
|
50 |
+
and f.encoding == EIData.Lsb
|
51 |
+
and f.machine == EMachine.I386
|
52 |
+
)
|
53 |
+
|
54 |
+
|
55 |
+
def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool:
|
56 |
+
if "armv7l" in archs:
|
57 |
+
return _is_linux_armhf(executable)
|
58 |
+
if "i686" in archs:
|
59 |
+
return _is_linux_i686(executable)
|
60 |
+
allowed_archs = {
|
61 |
+
"x86_64",
|
62 |
+
"aarch64",
|
63 |
+
"ppc64",
|
64 |
+
"ppc64le",
|
65 |
+
"s390x",
|
66 |
+
"loongarch64",
|
67 |
+
"riscv64",
|
68 |
+
}
|
69 |
+
return any(arch in allowed_archs for arch in archs)
|
70 |
+
|
71 |
+
|
72 |
+
# If glibc ever changes its major version, we need to know what the last
|
73 |
+
# minor version was, so we can build the complete list of all versions.
|
74 |
+
# For now, guess what the highest minor version might be, assume it will
|
75 |
+
# be 50 for testing. Once this actually happens, update the dictionary
|
76 |
+
# with the actual value.
|
77 |
+
_LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50)
|
78 |
+
|
79 |
+
|
80 |
+
class _GLibCVersion(NamedTuple):
|
81 |
+
major: int
|
82 |
+
minor: int
|
83 |
+
|
84 |
+
|
85 |
+
def _glibc_version_string_confstr() -> str | None:
|
86 |
+
"""
|
87 |
+
Primary implementation of glibc_version_string using os.confstr.
|
88 |
+
"""
|
89 |
+
# os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
|
90 |
+
# to be broken or missing. This strategy is used in the standard library
|
91 |
+
# platform module.
|
92 |
+
# https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
|
93 |
+
try:
|
94 |
+
# Should be a string like "glibc 2.17".
|
95 |
+
version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION")
|
96 |
+
assert version_string is not None
|
97 |
+
_, version = version_string.rsplit()
|
98 |
+
except (AssertionError, AttributeError, OSError, ValueError):
|
99 |
+
# os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
|
100 |
+
return None
|
101 |
+
return version
|
102 |
+
|
103 |
+
|
104 |
+
def _glibc_version_string_ctypes() -> str | None:
|
105 |
+
"""
|
106 |
+
Fallback implementation of glibc_version_string using ctypes.
|
107 |
+
"""
|
108 |
+
try:
|
109 |
+
import ctypes
|
110 |
+
except ImportError:
|
111 |
+
return None
|
112 |
+
|
113 |
+
# ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
|
114 |
+
# manpage says, "If filename is NULL, then the returned handle is for the
|
115 |
+
# main program". This way we can let the linker do the work to figure out
|
116 |
+
# which libc our process is actually using.
|
117 |
+
#
|
118 |
+
# We must also handle the special case where the executable is not a
|
119 |
+
# dynamically linked executable. This can occur when using musl libc,
|
120 |
+
# for example. In this situation, dlopen() will error, leading to an
|
121 |
+
# OSError. Interestingly, at least in the case of musl, there is no
|
122 |
+
# errno set on the OSError. The single string argument used to construct
|
123 |
+
# OSError comes from libc itself and is therefore not portable to
|
124 |
+
# hard code here. In any case, failure to call dlopen() means we
|
125 |
+
# can proceed, so we bail on our attempt.
|
126 |
+
try:
|
127 |
+
process_namespace = ctypes.CDLL(None)
|
128 |
+
except OSError:
|
129 |
+
return None
|
130 |
+
|
131 |
+
try:
|
132 |
+
gnu_get_libc_version = process_namespace.gnu_get_libc_version
|
133 |
+
except AttributeError:
|
134 |
+
# Symbol doesn't exist -> therefore, we are not linked to
|
135 |
+
# glibc.
|
136 |
+
return None
|
137 |
+
|
138 |
+
# Call gnu_get_libc_version, which returns a string like "2.5"
|
139 |
+
gnu_get_libc_version.restype = ctypes.c_char_p
|
140 |
+
version_str: str = gnu_get_libc_version()
|
141 |
+
# py2 / py3 compatibility:
|
142 |
+
if not isinstance(version_str, str):
|
143 |
+
version_str = version_str.decode("ascii")
|
144 |
+
|
145 |
+
return version_str
|
146 |
+
|
147 |
+
|
148 |
+
def _glibc_version_string() -> str | None:
|
149 |
+
"""Returns glibc version string, or None if not using glibc."""
|
150 |
+
return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
|
151 |
+
|
152 |
+
|
153 |
+
def _parse_glibc_version(version_str: str) -> tuple[int, int]:
|
154 |
+
"""Parse glibc version.
|
155 |
+
|
156 |
+
We use a regexp instead of str.split because we want to discard any
|
157 |
+
random junk that might come after the minor version -- this might happen
|
158 |
+
in patched/forked versions of glibc (e.g. Linaro's version of glibc
|
159 |
+
uses version strings like "2.20-2014.11"). See gh-3588.
|
160 |
+
"""
|
161 |
+
m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
|
162 |
+
if not m:
|
163 |
+
warnings.warn(
|
164 |
+
f"Expected glibc version with 2 components major.minor,"
|
165 |
+
f" got: {version_str}",
|
166 |
+
RuntimeWarning,
|
167 |
+
stacklevel=2,
|
168 |
+
)
|
169 |
+
return -1, -1
|
170 |
+
return int(m.group("major")), int(m.group("minor"))
|
171 |
+
|
172 |
+
|
173 |
+
@functools.lru_cache
|
174 |
+
def _get_glibc_version() -> tuple[int, int]:
|
175 |
+
version_str = _glibc_version_string()
|
176 |
+
if version_str is None:
|
177 |
+
return (-1, -1)
|
178 |
+
return _parse_glibc_version(version_str)
|
179 |
+
|
180 |
+
|
181 |
+
# From PEP 513, PEP 600
|
182 |
+
def _is_compatible(arch: str, version: _GLibCVersion) -> bool:
|
183 |
+
sys_glibc = _get_glibc_version()
|
184 |
+
if sys_glibc < version:
|
185 |
+
return False
|
186 |
+
# Check for presence of _manylinux module.
|
187 |
+
try:
|
188 |
+
import _manylinux
|
189 |
+
except ImportError:
|
190 |
+
return True
|
191 |
+
if hasattr(_manylinux, "manylinux_compatible"):
|
192 |
+
result = _manylinux.manylinux_compatible(version[0], version[1], arch)
|
193 |
+
if result is not None:
|
194 |
+
return bool(result)
|
195 |
+
return True
|
196 |
+
if version == _GLibCVersion(2, 5):
|
197 |
+
if hasattr(_manylinux, "manylinux1_compatible"):
|
198 |
+
return bool(_manylinux.manylinux1_compatible)
|
199 |
+
if version == _GLibCVersion(2, 12):
|
200 |
+
if hasattr(_manylinux, "manylinux2010_compatible"):
|
201 |
+
return bool(_manylinux.manylinux2010_compatible)
|
202 |
+
if version == _GLibCVersion(2, 17):
|
203 |
+
if hasattr(_manylinux, "manylinux2014_compatible"):
|
204 |
+
return bool(_manylinux.manylinux2014_compatible)
|
205 |
+
return True
|
206 |
+
|
207 |
+
|
208 |
+
_LEGACY_MANYLINUX_MAP = {
|
209 |
+
# CentOS 7 w/ glibc 2.17 (PEP 599)
|
210 |
+
(2, 17): "manylinux2014",
|
211 |
+
# CentOS 6 w/ glibc 2.12 (PEP 571)
|
212 |
+
(2, 12): "manylinux2010",
|
213 |
+
# CentOS 5 w/ glibc 2.5 (PEP 513)
|
214 |
+
(2, 5): "manylinux1",
|
215 |
+
}
|
216 |
+
|
217 |
+
|
218 |
+
def platform_tags(archs: Sequence[str]) -> Iterator[str]:
|
219 |
+
"""Generate manylinux tags compatible to the current platform.
|
220 |
+
|
221 |
+
:param archs: Sequence of compatible architectures.
|
222 |
+
The first one shall be the closest to the actual architecture and be the part of
|
223 |
+
platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
|
224 |
+
The ``linux_`` prefix is assumed as a prerequisite for the current platform to
|
225 |
+
be manylinux-compatible.
|
226 |
+
|
227 |
+
:returns: An iterator of compatible manylinux tags.
|
228 |
+
"""
|
229 |
+
if not _have_compatible_abi(sys.executable, archs):
|
230 |
+
return
|
231 |
+
# Oldest glibc to be supported regardless of architecture is (2, 17).
|
232 |
+
too_old_glibc2 = _GLibCVersion(2, 16)
|
233 |
+
if set(archs) & {"x86_64", "i686"}:
|
234 |
+
# On x86/i686 also oldest glibc to be supported is (2, 5).
|
235 |
+
too_old_glibc2 = _GLibCVersion(2, 4)
|
236 |
+
current_glibc = _GLibCVersion(*_get_glibc_version())
|
237 |
+
glibc_max_list = [current_glibc]
|
238 |
+
# We can assume compatibility across glibc major versions.
|
239 |
+
# https://sourceware.org/bugzilla/show_bug.cgi?id=24636
|
240 |
+
#
|
241 |
+
# Build a list of maximum glibc versions so that we can
|
242 |
+
# output the canonical list of all glibc from current_glibc
|
243 |
+
# down to too_old_glibc2, including all intermediary versions.
|
244 |
+
for glibc_major in range(current_glibc.major - 1, 1, -1):
|
245 |
+
glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
|
246 |
+
glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
|
247 |
+
for arch in archs:
|
248 |
+
for glibc_max in glibc_max_list:
|
249 |
+
if glibc_max.major == too_old_glibc2.major:
|
250 |
+
min_minor = too_old_glibc2.minor
|
251 |
+
else:
|
252 |
+
# For other glibc major versions oldest supported is (x, 0).
|
253 |
+
min_minor = -1
|
254 |
+
for glibc_minor in range(glibc_max.minor, min_minor, -1):
|
255 |
+
glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
|
256 |
+
tag = "manylinux_{}_{}".format(*glibc_version)
|
257 |
+
if _is_compatible(arch, glibc_version):
|
258 |
+
yield f"{tag}_{arch}"
|
259 |
+
# Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
|
260 |
+
if glibc_version in _LEGACY_MANYLINUX_MAP:
|
261 |
+
legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
|
262 |
+
if _is_compatible(arch, glibc_version):
|
263 |
+
yield f"{legacy_tag}_{arch}"
|
meow/lib/python3.13/site-packages/packaging/_musllinux.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""PEP 656 support.
|
2 |
+
|
3 |
+
This module implements logic to detect if the currently running Python is
|
4 |
+
linked against musl, and what musl version is used.
|
5 |
+
"""
|
6 |
+
|
7 |
+
from __future__ import annotations
|
8 |
+
|
9 |
+
import functools
|
10 |
+
import re
|
11 |
+
import subprocess
|
12 |
+
import sys
|
13 |
+
from typing import Iterator, NamedTuple, Sequence
|
14 |
+
|
15 |
+
from ._elffile import ELFFile
|
16 |
+
|
17 |
+
|
18 |
+
class _MuslVersion(NamedTuple):
|
19 |
+
major: int
|
20 |
+
minor: int
|
21 |
+
|
22 |
+
|
23 |
+
def _parse_musl_version(output: str) -> _MuslVersion | None:
|
24 |
+
lines = [n for n in (n.strip() for n in output.splitlines()) if n]
|
25 |
+
if len(lines) < 2 or lines[0][:4] != "musl":
|
26 |
+
return None
|
27 |
+
m = re.match(r"Version (\d+)\.(\d+)", lines[1])
|
28 |
+
if not m:
|
29 |
+
return None
|
30 |
+
return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
|
31 |
+
|
32 |
+
|
33 |
+
@functools.lru_cache
|
34 |
+
def _get_musl_version(executable: str) -> _MuslVersion | None:
|
35 |
+
"""Detect currently-running musl runtime version.
|
36 |
+
|
37 |
+
This is done by checking the specified executable's dynamic linking
|
38 |
+
information, and invoking the loader to parse its output for a version
|
39 |
+
string. If the loader is musl, the output would be something like::
|
40 |
+
|
41 |
+
musl libc (x86_64)
|
42 |
+
Version 1.2.2
|
43 |
+
Dynamic Program Loader
|
44 |
+
"""
|
45 |
+
try:
|
46 |
+
with open(executable, "rb") as f:
|
47 |
+
ld = ELFFile(f).interpreter
|
48 |
+
except (OSError, TypeError, ValueError):
|
49 |
+
return None
|
50 |
+
if ld is None or "musl" not in ld:
|
51 |
+
return None
|
52 |
+
proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True)
|
53 |
+
return _parse_musl_version(proc.stderr)
|
54 |
+
|
55 |
+
|
56 |
+
def platform_tags(archs: Sequence[str]) -> Iterator[str]:
|
57 |
+
"""Generate musllinux tags compatible to the current platform.
|
58 |
+
|
59 |
+
:param archs: Sequence of compatible architectures.
|
60 |
+
The first one shall be the closest to the actual architecture and be the part of
|
61 |
+
platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
|
62 |
+
The ``linux_`` prefix is assumed as a prerequisite for the current platform to
|
63 |
+
be musllinux-compatible.
|
64 |
+
|
65 |
+
:returns: An iterator of compatible musllinux tags.
|
66 |
+
"""
|
67 |
+
sys_musl = _get_musl_version(sys.executable)
|
68 |
+
if sys_musl is None: # Python not dynamically linked against musl.
|
69 |
+
return
|
70 |
+
for arch in archs:
|
71 |
+
for minor in range(sys_musl.minor, -1, -1):
|
72 |
+
yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
|
73 |
+
|
74 |
+
|
75 |
+
if __name__ == "__main__": # pragma: no cover
|
76 |
+
import sysconfig
|
77 |
+
|
78 |
+
plat = sysconfig.get_platform()
|
79 |
+
assert plat.startswith("linux-"), "not linux"
|
80 |
+
|
81 |
+
print("plat:", plat)
|
82 |
+
print("musl:", _get_musl_version(sys.executable))
|
83 |
+
print("tags:", end=" ")
|
84 |
+
for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])):
|
85 |
+
print(t, end="\n ")
|
meow/lib/python3.13/site-packages/packaging/_parser.py
ADDED
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Handwritten parser of dependency specifiers.
|
2 |
+
|
3 |
+
The docstring for each __parse_* function contains EBNF-inspired grammar representing
|
4 |
+
the implementation.
|
5 |
+
"""
|
6 |
+
|
7 |
+
from __future__ import annotations
|
8 |
+
|
9 |
+
import ast
|
10 |
+
from typing import NamedTuple, Sequence, Tuple, Union
|
11 |
+
|
12 |
+
from ._tokenizer import DEFAULT_RULES, Tokenizer
|
13 |
+
|
14 |
+
|
15 |
+
class Node:
|
16 |
+
def __init__(self, value: str) -> None:
|
17 |
+
self.value = value
|
18 |
+
|
19 |
+
def __str__(self) -> str:
|
20 |
+
return self.value
|
21 |
+
|
22 |
+
def __repr__(self) -> str:
|
23 |
+
return f"<{self.__class__.__name__}('{self}')>"
|
24 |
+
|
25 |
+
def serialize(self) -> str:
|
26 |
+
raise NotImplementedError
|
27 |
+
|
28 |
+
|
29 |
+
class Variable(Node):
|
30 |
+
def serialize(self) -> str:
|
31 |
+
return str(self)
|
32 |
+
|
33 |
+
|
34 |
+
class Value(Node):
|
35 |
+
def serialize(self) -> str:
|
36 |
+
return f'"{self}"'
|
37 |
+
|
38 |
+
|
39 |
+
class Op(Node):
|
40 |
+
def serialize(self) -> str:
|
41 |
+
return str(self)
|
42 |
+
|
43 |
+
|
44 |
+
MarkerVar = Union[Variable, Value]
|
45 |
+
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
|
46 |
+
MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
|
47 |
+
MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
|
48 |
+
|
49 |
+
|
50 |
+
class ParsedRequirement(NamedTuple):
|
51 |
+
name: str
|
52 |
+
url: str
|
53 |
+
extras: list[str]
|
54 |
+
specifier: str
|
55 |
+
marker: MarkerList | None
|
56 |
+
|
57 |
+
|
58 |
+
# --------------------------------------------------------------------------------------
|
59 |
+
# Recursive descent parser for dependency specifier
|
60 |
+
# --------------------------------------------------------------------------------------
|
61 |
+
def parse_requirement(source: str) -> ParsedRequirement:
|
62 |
+
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
|
63 |
+
|
64 |
+
|
65 |
+
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
|
66 |
+
"""
|
67 |
+
requirement = WS? IDENTIFIER WS? extras WS? requirement_details
|
68 |
+
"""
|
69 |
+
tokenizer.consume("WS")
|
70 |
+
|
71 |
+
name_token = tokenizer.expect(
|
72 |
+
"IDENTIFIER", expected="package name at the start of dependency specifier"
|
73 |
+
)
|
74 |
+
name = name_token.text
|
75 |
+
tokenizer.consume("WS")
|
76 |
+
|
77 |
+
extras = _parse_extras(tokenizer)
|
78 |
+
tokenizer.consume("WS")
|
79 |
+
|
80 |
+
url, specifier, marker = _parse_requirement_details(tokenizer)
|
81 |
+
tokenizer.expect("END", expected="end of dependency specifier")
|
82 |
+
|
83 |
+
return ParsedRequirement(name, url, extras, specifier, marker)
|
84 |
+
|
85 |
+
|
86 |
+
def _parse_requirement_details(
|
87 |
+
tokenizer: Tokenizer,
|
88 |
+
) -> tuple[str, str, MarkerList | None]:
|
89 |
+
"""
|
90 |
+
requirement_details = AT URL (WS requirement_marker?)?
|
91 |
+
| specifier WS? (requirement_marker)?
|
92 |
+
"""
|
93 |
+
|
94 |
+
specifier = ""
|
95 |
+
url = ""
|
96 |
+
marker = None
|
97 |
+
|
98 |
+
if tokenizer.check("AT"):
|
99 |
+
tokenizer.read()
|
100 |
+
tokenizer.consume("WS")
|
101 |
+
|
102 |
+
url_start = tokenizer.position
|
103 |
+
url = tokenizer.expect("URL", expected="URL after @").text
|
104 |
+
if tokenizer.check("END", peek=True):
|
105 |
+
return (url, specifier, marker)
|
106 |
+
|
107 |
+
tokenizer.expect("WS", expected="whitespace after URL")
|
108 |
+
|
109 |
+
# The input might end after whitespace.
|
110 |
+
if tokenizer.check("END", peek=True):
|
111 |
+
return (url, specifier, marker)
|
112 |
+
|
113 |
+
marker = _parse_requirement_marker(
|
114 |
+
tokenizer, span_start=url_start, after="URL and whitespace"
|
115 |
+
)
|
116 |
+
else:
|
117 |
+
specifier_start = tokenizer.position
|
118 |
+
specifier = _parse_specifier(tokenizer)
|
119 |
+
tokenizer.consume("WS")
|
120 |
+
|
121 |
+
if tokenizer.check("END", peek=True):
|
122 |
+
return (url, specifier, marker)
|
123 |
+
|
124 |
+
marker = _parse_requirement_marker(
|
125 |
+
tokenizer,
|
126 |
+
span_start=specifier_start,
|
127 |
+
after=(
|
128 |
+
"version specifier"
|
129 |
+
if specifier
|
130 |
+
else "name and no valid version specifier"
|
131 |
+
),
|
132 |
+
)
|
133 |
+
|
134 |
+
return (url, specifier, marker)
|
135 |
+
|
136 |
+
|
137 |
+
def _parse_requirement_marker(
|
138 |
+
tokenizer: Tokenizer, *, span_start: int, after: str
|
139 |
+
) -> MarkerList:
|
140 |
+
"""
|
141 |
+
requirement_marker = SEMICOLON marker WS?
|
142 |
+
"""
|
143 |
+
|
144 |
+
if not tokenizer.check("SEMICOLON"):
|
145 |
+
tokenizer.raise_syntax_error(
|
146 |
+
f"Expected end or semicolon (after {after})",
|
147 |
+
span_start=span_start,
|
148 |
+
)
|
149 |
+
tokenizer.read()
|
150 |
+
|
151 |
+
marker = _parse_marker(tokenizer)
|
152 |
+
tokenizer.consume("WS")
|
153 |
+
|
154 |
+
return marker
|
155 |
+
|
156 |
+
|
157 |
+
def _parse_extras(tokenizer: Tokenizer) -> list[str]:
|
158 |
+
"""
|
159 |
+
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
|
160 |
+
"""
|
161 |
+
if not tokenizer.check("LEFT_BRACKET", peek=True):
|
162 |
+
return []
|
163 |
+
|
164 |
+
with tokenizer.enclosing_tokens(
|
165 |
+
"LEFT_BRACKET",
|
166 |
+
"RIGHT_BRACKET",
|
167 |
+
around="extras",
|
168 |
+
):
|
169 |
+
tokenizer.consume("WS")
|
170 |
+
extras = _parse_extras_list(tokenizer)
|
171 |
+
tokenizer.consume("WS")
|
172 |
+
|
173 |
+
return extras
|
174 |
+
|
175 |
+
|
176 |
+
def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
|
177 |
+
"""
|
178 |
+
extras_list = identifier (wsp* ',' wsp* identifier)*
|
179 |
+
"""
|
180 |
+
extras: list[str] = []
|
181 |
+
|
182 |
+
if not tokenizer.check("IDENTIFIER"):
|
183 |
+
return extras
|
184 |
+
|
185 |
+
extras.append(tokenizer.read().text)
|
186 |
+
|
187 |
+
while True:
|
188 |
+
tokenizer.consume("WS")
|
189 |
+
if tokenizer.check("IDENTIFIER", peek=True):
|
190 |
+
tokenizer.raise_syntax_error("Expected comma between extra names")
|
191 |
+
elif not tokenizer.check("COMMA"):
|
192 |
+
break
|
193 |
+
|
194 |
+
tokenizer.read()
|
195 |
+
tokenizer.consume("WS")
|
196 |
+
|
197 |
+
extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
|
198 |
+
extras.append(extra_token.text)
|
199 |
+
|
200 |
+
return extras
|
201 |
+
|
202 |
+
|
203 |
+
def _parse_specifier(tokenizer: Tokenizer) -> str:
|
204 |
+
"""
|
205 |
+
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
|
206 |
+
| WS? version_many WS?
|
207 |
+
"""
|
208 |
+
with tokenizer.enclosing_tokens(
|
209 |
+
"LEFT_PARENTHESIS",
|
210 |
+
"RIGHT_PARENTHESIS",
|
211 |
+
around="version specifier",
|
212 |
+
):
|
213 |
+
tokenizer.consume("WS")
|
214 |
+
parsed_specifiers = _parse_version_many(tokenizer)
|
215 |
+
tokenizer.consume("WS")
|
216 |
+
|
217 |
+
return parsed_specifiers
|
218 |
+
|
219 |
+
|
220 |
+
def _parse_version_many(tokenizer: Tokenizer) -> str:
|
221 |
+
"""
|
222 |
+
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
|
223 |
+
"""
|
224 |
+
parsed_specifiers = ""
|
225 |
+
while tokenizer.check("SPECIFIER"):
|
226 |
+
span_start = tokenizer.position
|
227 |
+
parsed_specifiers += tokenizer.read().text
|
228 |
+
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
|
229 |
+
tokenizer.raise_syntax_error(
|
230 |
+
".* suffix can only be used with `==` or `!=` operators",
|
231 |
+
span_start=span_start,
|
232 |
+
span_end=tokenizer.position + 1,
|
233 |
+
)
|
234 |
+
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
|
235 |
+
tokenizer.raise_syntax_error(
|
236 |
+
"Local version label can only be used with `==` or `!=` operators",
|
237 |
+
span_start=span_start,
|
238 |
+
span_end=tokenizer.position,
|
239 |
+
)
|
240 |
+
tokenizer.consume("WS")
|
241 |
+
if not tokenizer.check("COMMA"):
|
242 |
+
break
|
243 |
+
parsed_specifiers += tokenizer.read().text
|
244 |
+
tokenizer.consume("WS")
|
245 |
+
|
246 |
+
return parsed_specifiers
|
247 |
+
|
248 |
+
|
249 |
+
# --------------------------------------------------------------------------------------
|
250 |
+
# Recursive descent parser for marker expression
|
251 |
+
# --------------------------------------------------------------------------------------
|
252 |
+
def parse_marker(source: str) -> MarkerList:
|
253 |
+
return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
|
254 |
+
|
255 |
+
|
256 |
+
def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
|
257 |
+
retval = _parse_marker(tokenizer)
|
258 |
+
tokenizer.expect("END", expected="end of marker expression")
|
259 |
+
return retval
|
260 |
+
|
261 |
+
|
262 |
+
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
|
263 |
+
"""
|
264 |
+
marker = marker_atom (BOOLOP marker_atom)+
|
265 |
+
"""
|
266 |
+
expression = [_parse_marker_atom(tokenizer)]
|
267 |
+
while tokenizer.check("BOOLOP"):
|
268 |
+
token = tokenizer.read()
|
269 |
+
expr_right = _parse_marker_atom(tokenizer)
|
270 |
+
expression.extend((token.text, expr_right))
|
271 |
+
return expression
|
272 |
+
|
273 |
+
|
274 |
+
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
|
275 |
+
"""
|
276 |
+
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
|
277 |
+
| WS? marker_item WS?
|
278 |
+
"""
|
279 |
+
|
280 |
+
tokenizer.consume("WS")
|
281 |
+
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
|
282 |
+
with tokenizer.enclosing_tokens(
|
283 |
+
"LEFT_PARENTHESIS",
|
284 |
+
"RIGHT_PARENTHESIS",
|
285 |
+
around="marker expression",
|
286 |
+
):
|
287 |
+
tokenizer.consume("WS")
|
288 |
+
marker: MarkerAtom = _parse_marker(tokenizer)
|
289 |
+
tokenizer.consume("WS")
|
290 |
+
else:
|
291 |
+
marker = _parse_marker_item(tokenizer)
|
292 |
+
tokenizer.consume("WS")
|
293 |
+
return marker
|
294 |
+
|
295 |
+
|
296 |
+
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
|
297 |
+
"""
|
298 |
+
marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
|
299 |
+
"""
|
300 |
+
tokenizer.consume("WS")
|
301 |
+
marker_var_left = _parse_marker_var(tokenizer)
|
302 |
+
tokenizer.consume("WS")
|
303 |
+
marker_op = _parse_marker_op(tokenizer)
|
304 |
+
tokenizer.consume("WS")
|
305 |
+
marker_var_right = _parse_marker_var(tokenizer)
|
306 |
+
tokenizer.consume("WS")
|
307 |
+
return (marker_var_left, marker_op, marker_var_right)
|
308 |
+
|
309 |
+
|
310 |
+
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
|
311 |
+
"""
|
312 |
+
marker_var = VARIABLE | QUOTED_STRING
|
313 |
+
"""
|
314 |
+
if tokenizer.check("VARIABLE"):
|
315 |
+
return process_env_var(tokenizer.read().text.replace(".", "_"))
|
316 |
+
elif tokenizer.check("QUOTED_STRING"):
|
317 |
+
return process_python_str(tokenizer.read().text)
|
318 |
+
else:
|
319 |
+
tokenizer.raise_syntax_error(
|
320 |
+
message="Expected a marker variable or quoted string"
|
321 |
+
)
|
322 |
+
|
323 |
+
|
324 |
+
def process_env_var(env_var: str) -> Variable:
|
325 |
+
if env_var in ("platform_python_implementation", "python_implementation"):
|
326 |
+
return Variable("platform_python_implementation")
|
327 |
+
else:
|
328 |
+
return Variable(env_var)
|
329 |
+
|
330 |
+
|
331 |
+
def process_python_str(python_str: str) -> Value:
|
332 |
+
value = ast.literal_eval(python_str)
|
333 |
+
return Value(str(value))
|
334 |
+
|
335 |
+
|
336 |
+
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
|
337 |
+
"""
|
338 |
+
marker_op = IN | NOT IN | OP
|
339 |
+
"""
|
340 |
+
if tokenizer.check("IN"):
|
341 |
+
tokenizer.read()
|
342 |
+
return Op("in")
|
343 |
+
elif tokenizer.check("NOT"):
|
344 |
+
tokenizer.read()
|
345 |
+
tokenizer.expect("WS", expected="whitespace after 'not'")
|
346 |
+
tokenizer.expect("IN", expected="'in' after 'not'")
|
347 |
+
return Op("not in")
|
348 |
+
elif tokenizer.check("OP"):
|
349 |
+
return Op(tokenizer.read().text)
|
350 |
+
else:
|
351 |
+
return tokenizer.raise_syntax_error(
|
352 |
+
"Expected marker operator, one of "
|
353 |
+
"<=, <, !=, ==, >=, >, ~=, ===, in, not in"
|
354 |
+
)
|
meow/lib/python3.13/site-packages/packaging/_structures.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
3 |
+
# for complete details.
|
4 |
+
|
5 |
+
|
6 |
+
class InfinityType:
|
7 |
+
def __repr__(self) -> str:
|
8 |
+
return "Infinity"
|
9 |
+
|
10 |
+
def __hash__(self) -> int:
|
11 |
+
return hash(repr(self))
|
12 |
+
|
13 |
+
def __lt__(self, other: object) -> bool:
|
14 |
+
return False
|
15 |
+
|
16 |
+
def __le__(self, other: object) -> bool:
|
17 |
+
return False
|
18 |
+
|
19 |
+
def __eq__(self, other: object) -> bool:
|
20 |
+
return isinstance(other, self.__class__)
|
21 |
+
|
22 |
+
def __gt__(self, other: object) -> bool:
|
23 |
+
return True
|
24 |
+
|
25 |
+
def __ge__(self, other: object) -> bool:
|
26 |
+
return True
|
27 |
+
|
28 |
+
def __neg__(self: object) -> "NegativeInfinityType":
|
29 |
+
return NegativeInfinity
|
30 |
+
|
31 |
+
|
32 |
+
Infinity = InfinityType()
|
33 |
+
|
34 |
+
|
35 |
+
class NegativeInfinityType:
|
36 |
+
def __repr__(self) -> str:
|
37 |
+
return "-Infinity"
|
38 |
+
|
39 |
+
def __hash__(self) -> int:
|
40 |
+
return hash(repr(self))
|
41 |
+
|
42 |
+
def __lt__(self, other: object) -> bool:
|
43 |
+
return True
|
44 |
+
|
45 |
+
def __le__(self, other: object) -> bool:
|
46 |
+
return True
|
47 |
+
|
48 |
+
def __eq__(self, other: object) -> bool:
|
49 |
+
return isinstance(other, self.__class__)
|
50 |
+
|
51 |
+
def __gt__(self, other: object) -> bool:
|
52 |
+
return False
|
53 |
+
|
54 |
+
def __ge__(self, other: object) -> bool:
|
55 |
+
return False
|
56 |
+
|
57 |
+
def __neg__(self: object) -> InfinityType:
|
58 |
+
return Infinity
|
59 |
+
|
60 |
+
|
61 |
+
NegativeInfinity = NegativeInfinityType()
|
meow/lib/python3.13/site-packages/packaging/_tokenizer.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import contextlib
|
4 |
+
import re
|
5 |
+
from dataclasses import dataclass
|
6 |
+
from typing import Iterator, NoReturn
|
7 |
+
|
8 |
+
from .specifiers import Specifier
|
9 |
+
|
10 |
+
|
11 |
+
@dataclass
|
12 |
+
class Token:
|
13 |
+
name: str
|
14 |
+
text: str
|
15 |
+
position: int
|
16 |
+
|
17 |
+
|
18 |
+
class ParserSyntaxError(Exception):
|
19 |
+
"""The provided source text could not be parsed correctly."""
|
20 |
+
|
21 |
+
def __init__(
|
22 |
+
self,
|
23 |
+
message: str,
|
24 |
+
*,
|
25 |
+
source: str,
|
26 |
+
span: tuple[int, int],
|
27 |
+
) -> None:
|
28 |
+
self.span = span
|
29 |
+
self.message = message
|
30 |
+
self.source = source
|
31 |
+
|
32 |
+
super().__init__()
|
33 |
+
|
34 |
+
def __str__(self) -> str:
|
35 |
+
marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
|
36 |
+
return "\n ".join([self.message, self.source, marker])
|
37 |
+
|
38 |
+
|
39 |
+
DEFAULT_RULES: dict[str, str | re.Pattern[str]] = {
|
40 |
+
"LEFT_PARENTHESIS": r"\(",
|
41 |
+
"RIGHT_PARENTHESIS": r"\)",
|
42 |
+
"LEFT_BRACKET": r"\[",
|
43 |
+
"RIGHT_BRACKET": r"\]",
|
44 |
+
"SEMICOLON": r";",
|
45 |
+
"COMMA": r",",
|
46 |
+
"QUOTED_STRING": re.compile(
|
47 |
+
r"""
|
48 |
+
(
|
49 |
+
('[^']*')
|
50 |
+
|
|
51 |
+
("[^"]*")
|
52 |
+
)
|
53 |
+
""",
|
54 |
+
re.VERBOSE,
|
55 |
+
),
|
56 |
+
"OP": r"(===|==|~=|!=|<=|>=|<|>)",
|
57 |
+
"BOOLOP": r"\b(or|and)\b",
|
58 |
+
"IN": r"\bin\b",
|
59 |
+
"NOT": r"\bnot\b",
|
60 |
+
"VARIABLE": re.compile(
|
61 |
+
r"""
|
62 |
+
\b(
|
63 |
+
python_version
|
64 |
+
|python_full_version
|
65 |
+
|os[._]name
|
66 |
+
|sys[._]platform
|
67 |
+
|platform_(release|system)
|
68 |
+
|platform[._](version|machine|python_implementation)
|
69 |
+
|python_implementation
|
70 |
+
|implementation_(name|version)
|
71 |
+
|extra
|
72 |
+
)\b
|
73 |
+
""",
|
74 |
+
re.VERBOSE,
|
75 |
+
),
|
76 |
+
"SPECIFIER": re.compile(
|
77 |
+
Specifier._operator_regex_str + Specifier._version_regex_str,
|
78 |
+
re.VERBOSE | re.IGNORECASE,
|
79 |
+
),
|
80 |
+
"AT": r"\@",
|
81 |
+
"URL": r"[^ \t]+",
|
82 |
+
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
|
83 |
+
"VERSION_PREFIX_TRAIL": r"\.\*",
|
84 |
+
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
|
85 |
+
"WS": r"[ \t]+",
|
86 |
+
"END": r"$",
|
87 |
+
}
|
88 |
+
|
89 |
+
|
90 |
+
class Tokenizer:
|
91 |
+
"""Context-sensitive token parsing.
|
92 |
+
|
93 |
+
Provides methods to examine the input stream to check whether the next token
|
94 |
+
matches.
|
95 |
+
"""
|
96 |
+
|
97 |
+
def __init__(
|
98 |
+
self,
|
99 |
+
source: str,
|
100 |
+
*,
|
101 |
+
rules: dict[str, str | re.Pattern[str]],
|
102 |
+
) -> None:
|
103 |
+
self.source = source
|
104 |
+
self.rules: dict[str, re.Pattern[str]] = {
|
105 |
+
name: re.compile(pattern) for name, pattern in rules.items()
|
106 |
+
}
|
107 |
+
self.next_token: Token | None = None
|
108 |
+
self.position = 0
|
109 |
+
|
110 |
+
def consume(self, name: str) -> None:
|
111 |
+
"""Move beyond provided token name, if at current position."""
|
112 |
+
if self.check(name):
|
113 |
+
self.read()
|
114 |
+
|
115 |
+
def check(self, name: str, *, peek: bool = False) -> bool:
|
116 |
+
"""Check whether the next token has the provided name.
|
117 |
+
|
118 |
+
By default, if the check succeeds, the token *must* be read before
|
119 |
+
another check. If `peek` is set to `True`, the token is not loaded and
|
120 |
+
would need to be checked again.
|
121 |
+
"""
|
122 |
+
assert (
|
123 |
+
self.next_token is None
|
124 |
+
), f"Cannot check for {name!r}, already have {self.next_token!r}"
|
125 |
+
assert name in self.rules, f"Unknown token name: {name!r}"
|
126 |
+
|
127 |
+
expression = self.rules[name]
|
128 |
+
|
129 |
+
match = expression.match(self.source, self.position)
|
130 |
+
if match is None:
|
131 |
+
return False
|
132 |
+
if not peek:
|
133 |
+
self.next_token = Token(name, match[0], self.position)
|
134 |
+
return True
|
135 |
+
|
136 |
+
def expect(self, name: str, *, expected: str) -> Token:
|
137 |
+
"""Expect a certain token name next, failing with a syntax error otherwise.
|
138 |
+
|
139 |
+
The token is *not* read.
|
140 |
+
"""
|
141 |
+
if not self.check(name):
|
142 |
+
raise self.raise_syntax_error(f"Expected {expected}")
|
143 |
+
return self.read()
|
144 |
+
|
145 |
+
def read(self) -> Token:
|
146 |
+
"""Consume the next token and return it."""
|
147 |
+
token = self.next_token
|
148 |
+
assert token is not None
|
149 |
+
|
150 |
+
self.position += len(token.text)
|
151 |
+
self.next_token = None
|
152 |
+
|
153 |
+
return token
|
154 |
+
|
155 |
+
def raise_syntax_error(
|
156 |
+
self,
|
157 |
+
message: str,
|
158 |
+
*,
|
159 |
+
span_start: int | None = None,
|
160 |
+
span_end: int | None = None,
|
161 |
+
) -> NoReturn:
|
162 |
+
"""Raise ParserSyntaxError at the given position."""
|
163 |
+
span = (
|
164 |
+
self.position if span_start is None else span_start,
|
165 |
+
self.position if span_end is None else span_end,
|
166 |
+
)
|
167 |
+
raise ParserSyntaxError(
|
168 |
+
message,
|
169 |
+
source=self.source,
|
170 |
+
span=span,
|
171 |
+
)
|
172 |
+
|
173 |
+
@contextlib.contextmanager
|
174 |
+
def enclosing_tokens(
|
175 |
+
self, open_token: str, close_token: str, *, around: str
|
176 |
+
) -> Iterator[None]:
|
177 |
+
if self.check(open_token):
|
178 |
+
open_position = self.position
|
179 |
+
self.read()
|
180 |
+
else:
|
181 |
+
open_position = None
|
182 |
+
|
183 |
+
yield
|
184 |
+
|
185 |
+
if open_position is None:
|
186 |
+
return
|
187 |
+
|
188 |
+
if not self.check(close_token):
|
189 |
+
self.raise_syntax_error(
|
190 |
+
f"Expected matching {close_token} for {open_token}, after {around}",
|
191 |
+
span_start=open_position,
|
192 |
+
)
|
193 |
+
|
194 |
+
self.read()
|
meow/lib/python3.13/site-packages/packaging/markers.py
ADDED
@@ -0,0 +1,331 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file is dual licensed under the terms of the Apache License, Version
|
2 |
+
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
|
3 |
+
# for complete details.
|
4 |
+
|
5 |
+
from __future__ import annotations
|
6 |
+
|
7 |
+
import operator
|
8 |
+
import os
|
9 |
+
import platform
|
10 |
+
import sys
|
11 |
+
from typing import Any, Callable, TypedDict, cast
|
12 |
+
|
13 |
+
from ._parser import MarkerAtom, MarkerList, Op, Value, Variable
|
14 |
+
from ._parser import parse_marker as _parse_marker
|
15 |
+
from ._tokenizer import ParserSyntaxError
|
16 |
+
from .specifiers import InvalidSpecifier, Specifier
|
17 |
+
from .utils import canonicalize_name
|
18 |
+
|
19 |
+
__all__ = [
|
20 |
+
"InvalidMarker",
|
21 |
+
"Marker",
|
22 |
+
"UndefinedComparison",
|
23 |
+
"UndefinedEnvironmentName",
|
24 |
+
"default_environment",
|
25 |
+
]
|
26 |
+
|
27 |
+
Operator = Callable[[str, str], bool]
|
28 |
+
|
29 |
+
|
30 |
+
class InvalidMarker(ValueError):
|
31 |
+
"""
|
32 |
+
An invalid marker was found, users should refer to PEP 508.
|
33 |
+
"""
|
34 |
+
|
35 |
+
|
36 |
+
class UndefinedComparison(ValueError):
|
37 |
+
"""
|
38 |
+
An invalid operation was attempted on a value that doesn't support it.
|
39 |
+
"""
|
40 |
+
|
41 |
+
|
42 |
+
class UndefinedEnvironmentName(ValueError):
|
43 |
+
"""
|
44 |
+
A name was attempted to be used that does not exist inside of the
|
45 |
+
environment.
|
46 |
+
"""
|
47 |
+
|
48 |
+
|
49 |
+
class Environment(TypedDict):
|
50 |
+
implementation_name: str
|
51 |
+
"""The implementation's identifier, e.g. ``'cpython'``."""
|
52 |
+
|
53 |
+
implementation_version: str
|
54 |
+
"""
|
55 |
+
The implementation's version, e.g. ``'3.13.0a2'`` for CPython 3.13.0a2, or
|
56 |
+
``'7.3.13'`` for PyPy3.10 v7.3.13.
|
57 |
+
"""
|
58 |
+
|
59 |
+
os_name: str
|
60 |
+
"""
|
61 |
+
The value of :py:data:`os.name`. The name of the operating system dependent module
|
62 |
+
imported, e.g. ``'posix'``.
|
63 |
+
"""
|
64 |
+
|
65 |
+
platform_machine: str
|
66 |
+
"""
|
67 |
+
Returns the machine type, e.g. ``'i386'``.
|
68 |
+
|
69 |
+
An empty string if the value cannot be determined.
|
70 |
+
"""
|
71 |
+
|
72 |
+
platform_release: str
|
73 |
+
"""
|
74 |
+
The system's release, e.g. ``'2.2.0'`` or ``'NT'``.
|
75 |
+
|
76 |
+
An empty string if the value cannot be determined.
|
77 |
+
"""
|
78 |
+
|
79 |
+
platform_system: str
|
80 |
+
"""
|
81 |
+
The system/OS name, e.g. ``'Linux'``, ``'Windows'`` or ``'Java'``.
|
82 |
+
|
83 |
+
An empty string if the value cannot be determined.
|
84 |
+
"""
|
85 |
+
|
86 |
+
platform_version: str
|
87 |
+
"""
|
88 |
+
The system's release version, e.g. ``'#3 on degas'``.
|
89 |
+
|
90 |
+
An empty string if the value cannot be determined.
|
91 |
+
"""
|
92 |
+
|
93 |
+
python_full_version: str
|
94 |
+
"""
|
95 |
+
The Python version as string ``'major.minor.patchlevel'``.
|
96 |
+
|
97 |
+
Note that unlike the Python :py:data:`sys.version`, this value will always include
|
98 |
+
the patchlevel (it defaults to 0).
|
99 |
+
"""
|
100 |
+
|
101 |
+
platform_python_implementation: str
|
102 |
+
"""
|
103 |
+
A string identifying the Python implementation, e.g. ``'CPython'``.
|
104 |
+
"""
|
105 |
+
|
106 |
+
python_version: str
|
107 |
+
"""The Python version as string ``'major.minor'``."""
|
108 |
+
|
109 |
+
sys_platform: str
|
110 |
+
"""
|
111 |
+
This string contains a platform identifier that can be used to append
|
112 |
+
platform-specific components to :py:data:`sys.path`, for instance.
|
113 |
+
|
114 |
+
For Unix systems, except on Linux and AIX, this is the lowercased OS name as
|
115 |
+
returned by ``uname -s`` with the first part of the version as returned by
|
116 |
+
``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, at the time when Python
|
117 |
+
was built.
|
118 |
+
"""
|
119 |
+
|
120 |
+
|
121 |
+
def _normalize_extra_values(results: Any) -> Any:
|
122 |
+
"""
|
123 |
+
Normalize extra values.
|
124 |
+
"""
|
125 |
+
if isinstance(results[0], tuple):
|
126 |
+
lhs, op, rhs = results[0]
|
127 |
+
if isinstance(lhs, Variable) and lhs.value == "extra":
|
128 |
+
normalized_extra = canonicalize_name(rhs.value)
|
129 |
+
rhs = Value(normalized_extra)
|
130 |
+
elif isinstance(rhs, Variable) and rhs.value == "extra":
|
131 |
+
normalized_extra = canonicalize_name(lhs.value)
|
132 |
+
lhs = Value(normalized_extra)
|
133 |
+
results[0] = lhs, op, rhs
|
134 |
+
return results
|
135 |
+
|
136 |
+
|
137 |
+
def _format_marker(
|
138 |
+
marker: list[str] | MarkerAtom | str, first: bool | None = True
|
139 |
+
) -> str:
|
140 |
+
assert isinstance(marker, (list, tuple, str))
|
141 |
+
|
142 |
+
# Sometimes we have a structure like [[...]] which is a single item list
|
143 |
+
# where the single item is itself it's own list. In that case we want skip
|
144 |
+
# the rest of this function so that we don't get extraneous () on the
|
145 |
+
# outside.
|
146 |
+
if (
|
147 |
+
isinstance(marker, list)
|
148 |
+
and len(marker) == 1
|
149 |
+
and isinstance(marker[0], (list, tuple))
|
150 |
+
):
|
151 |
+
return _format_marker(marker[0])
|
152 |
+
|
153 |
+
if isinstance(marker, list):
|
154 |
+
inner = (_format_marker(m, first=False) for m in marker)
|
155 |
+
if first:
|
156 |
+
return " ".join(inner)
|
157 |
+
else:
|
158 |
+
return "(" + " ".join(inner) + ")"
|
159 |
+
elif isinstance(marker, tuple):
|
160 |
+
return " ".join([m.serialize() for m in marker])
|
161 |
+
else:
|
162 |
+
return marker
|
163 |
+
|
164 |
+
|
165 |
+
_operators: dict[str, Operator] = {
|
166 |
+
"in": lambda lhs, rhs: lhs in rhs,
|
167 |
+
"not in": lambda lhs, rhs: lhs not in rhs,
|
168 |
+
"<": operator.lt,
|
169 |
+
"<=": operator.le,
|
170 |
+
"==": operator.eq,
|
171 |
+
"!=": operator.ne,
|
172 |
+
">=": operator.ge,
|
173 |
+
">": operator.gt,
|
174 |
+
}
|
175 |
+
|
176 |
+
|
177 |
+
def _eval_op(lhs: str, op: Op, rhs: str) -> bool:
|
178 |
+
try:
|
179 |
+
spec = Specifier("".join([op.serialize(), rhs]))
|
180 |
+
except InvalidSpecifier:
|
181 |
+
pass
|
182 |
+
else:
|
183 |
+
return spec.contains(lhs, prereleases=True)
|
184 |
+
|
185 |
+
oper: Operator | None = _operators.get(op.serialize())
|
186 |
+
if oper is None:
|
187 |
+
raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
|
188 |
+
|
189 |
+
return oper(lhs, rhs)
|
190 |
+
|
191 |
+
|
192 |
+
def _normalize(*values: str, key: str) -> tuple[str, ...]:
|
193 |
+
# PEP 685 – Comparison of extra names for optional distribution dependencies
|
194 |
+
# https://peps.python.org/pep-0685/
|
195 |
+
# > When comparing extra names, tools MUST normalize the names being
|
196 |
+
# > compared using the semantics outlined in PEP 503 for names
|
197 |
+
if key == "extra":
|
198 |
+
return tuple(canonicalize_name(v) for v in values)
|
199 |
+
|
200 |
+
# other environment markers don't have such standards
|
201 |
+
return values
|
202 |
+
|
203 |
+
|
204 |
+
def _evaluate_markers(markers: MarkerList, environment: dict[str, str]) -> bool:
|
205 |
+
groups: list[list[bool]] = [[]]
|
206 |
+
|
207 |
+
for marker in markers:
|
208 |
+
assert isinstance(marker, (list, tuple, str))
|
209 |
+
|
210 |
+
if isinstance(marker, list):
|
211 |
+
groups[-1].append(_evaluate_markers(marker, environment))
|
212 |
+
elif isinstance(marker, tuple):
|
213 |
+
lhs, op, rhs = marker
|
214 |
+
|
215 |
+
if isinstance(lhs, Variable):
|
216 |
+
environment_key = lhs.value
|
217 |
+
lhs_value = environment[environment_key]
|
218 |
+
rhs_value = rhs.value
|
219 |
+
else:
|
220 |
+
lhs_value = lhs.value
|
221 |
+
environment_key = rhs.value
|
222 |
+
rhs_value = environment[environment_key]
|
223 |
+
|
224 |
+
lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
|
225 |
+
groups[-1].append(_eval_op(lhs_value, op, rhs_value))
|
226 |
+
else:
|
227 |
+
assert marker in ["and", "or"]
|
228 |
+
if marker == "or":
|
229 |
+
groups.append([])
|
230 |
+
|
231 |
+
return any(all(item) for item in groups)
|
232 |
+
|
233 |
+
|
234 |
+
def format_full_version(info: sys._version_info) -> str:
|
235 |
+
version = f"{info.major}.{info.minor}.{info.micro}"
|
236 |
+
kind = info.releaselevel
|
237 |
+
if kind != "final":
|
238 |
+
version += kind[0] + str(info.serial)
|
239 |
+
return version
|
240 |
+
|
241 |
+
|
242 |
+
def default_environment() -> Environment:
|
243 |
+
iver = format_full_version(sys.implementation.version)
|
244 |
+
implementation_name = sys.implementation.name
|
245 |
+
return {
|
246 |
+
"implementation_name": implementation_name,
|
247 |
+
"implementation_version": iver,
|
248 |
+
"os_name": os.name,
|
249 |
+
"platform_machine": platform.machine(),
|
250 |
+
"platform_release": platform.release(),
|
251 |
+
"platform_system": platform.system(),
|
252 |
+
"platform_version": platform.version(),
|
253 |
+
"python_full_version": platform.python_version(),
|
254 |
+
"platform_python_implementation": platform.python_implementation(),
|
255 |
+
"python_version": ".".join(platform.python_version_tuple()[:2]),
|
256 |
+
"sys_platform": sys.platform,
|
257 |
+
}
|
258 |
+
|
259 |
+
|
260 |
+
class Marker:
|
261 |
+
def __init__(self, marker: str) -> None:
|
262 |
+
# Note: We create a Marker object without calling this constructor in
|
263 |
+
# packaging.requirements.Requirement. If any additional logic is
|
264 |
+
# added here, make sure to mirror/adapt Requirement.
|
265 |
+
try:
|
266 |
+
self._markers = _normalize_extra_values(_parse_marker(marker))
|
267 |
+
# The attribute `_markers` can be described in terms of a recursive type:
|
268 |
+
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
|
269 |
+
#
|
270 |
+
# For example, the following expression:
|
271 |
+
# python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
|
272 |
+
#
|
273 |
+
# is parsed into:
|
274 |
+
# [
|
275 |
+
# (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
|
276 |
+
# 'and',
|
277 |
+
# [
|
278 |
+
# (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
|
279 |
+
# 'or',
|
280 |
+
# (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
|
281 |
+
# ]
|
282 |
+
# ]
|
283 |
+
except ParserSyntaxError as e:
|
284 |
+
raise InvalidMarker(str(e)) from e
|
285 |
+
|
286 |
+
def __str__(self) -> str:
|
287 |
+
return _format_marker(self._markers)
|
288 |
+
|
289 |
+
def __repr__(self) -> str:
|
290 |
+
return f"<Marker('{self}')>"
|
291 |
+
|
292 |
+
def __hash__(self) -> int:
|
293 |
+
return hash((self.__class__.__name__, str(self)))
|
294 |
+
|
295 |
+
def __eq__(self, other: Any) -> bool:
|
296 |
+
if not isinstance(other, Marker):
|
297 |
+
return NotImplemented
|
298 |
+
|
299 |
+
return str(self) == str(other)
|
300 |
+
|
301 |
+
def evaluate(self, environment: dict[str, str] | None = None) -> bool:
|
302 |
+
"""Evaluate a marker.
|
303 |
+
|
304 |
+
Return the boolean from evaluating the given marker against the
|
305 |
+
environment. environment is an optional argument to override all or
|
306 |
+
part of the determined environment.
|
307 |
+
|
308 |
+
The environment is determined from the current Python process.
|
309 |
+
"""
|
310 |
+
current_environment = cast("dict[str, str]", default_environment())
|
311 |
+
current_environment["extra"] = ""
|
312 |
+
if environment is not None:
|
313 |
+
current_environment.update(environment)
|
314 |
+
# The API used to allow setting extra to None. We need to handle this
|
315 |
+
# case for backwards compatibility.
|
316 |
+
if current_environment["extra"] is None:
|
317 |
+
current_environment["extra"] = ""
|
318 |
+
|
319 |
+
return _evaluate_markers(
|
320 |
+
self._markers, _repair_python_full_version(current_environment)
|
321 |
+
)
|
322 |
+
|
323 |
+
|
324 |
+
def _repair_python_full_version(env: dict[str, str]) -> dict[str, str]:
|
325 |
+
"""
|
326 |
+
Work around platform.python_version() returning something that is not PEP 440
|
327 |
+
compliant for non-tagged Python builds.
|
328 |
+
"""
|
329 |
+
if env["python_full_version"].endswith("+"):
|
330 |
+
env["python_full_version"] += "local"
|
331 |
+
return env
|
meow/lib/python3.13/site-packages/packaging/metadata.py
ADDED
@@ -0,0 +1,863 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import email.feedparser
|
4 |
+
import email.header
|
5 |
+
import email.message
|
6 |
+
import email.parser
|
7 |
+
import email.policy
|
8 |
+
import pathlib
|
9 |
+
import sys
|
10 |
+
import typing
|
11 |
+
from typing import (
|
12 |
+
Any,
|
13 |
+
Callable,
|
14 |
+
Generic,
|
15 |
+
Literal,
|
16 |
+
TypedDict,
|
17 |
+
cast,
|
18 |
+
)
|
19 |
+
|
20 |
+
from . import licenses, requirements, specifiers, utils
|
21 |
+
from . import version as version_module
|
22 |
+
from .licenses import NormalizedLicenseExpression
|
23 |
+
|
24 |
+
T = typing.TypeVar("T")
|
25 |
+
|
26 |
+
|
27 |
+
if sys.version_info >= (3, 11): # pragma: no cover
|
28 |
+
ExceptionGroup = ExceptionGroup
|
29 |
+
else: # pragma: no cover
|
30 |
+
|
31 |
+
class ExceptionGroup(Exception):
|
32 |
+
"""A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
|
33 |
+
|
34 |
+
If :external:exc:`ExceptionGroup` is already defined by Python itself,
|
35 |
+
that version is used instead.
|
36 |
+
"""
|
37 |
+
|
38 |
+
message: str
|
39 |
+
exceptions: list[Exception]
|
40 |
+
|
41 |
+
def __init__(self, message: str, exceptions: list[Exception]) -> None:
|
42 |
+
self.message = message
|
43 |
+
self.exceptions = exceptions
|
44 |
+
|
45 |
+
def __repr__(self) -> str:
|
46 |
+
return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
|
47 |
+
|
48 |
+
|
49 |
+
class InvalidMetadata(ValueError):
|
50 |
+
"""A metadata field contains invalid data."""
|
51 |
+
|
52 |
+
field: str
|
53 |
+
"""The name of the field that contains invalid data."""
|
54 |
+
|
55 |
+
def __init__(self, field: str, message: str) -> None:
|
56 |
+
self.field = field
|
57 |
+
super().__init__(message)
|
58 |
+
|
59 |
+
|
60 |
+
# The RawMetadata class attempts to make as few assumptions about the underlying
|
61 |
+
# serialization formats as possible. The idea is that as long as a serialization
|
62 |
+
# formats offer some very basic primitives in *some* way then we can support
|
63 |
+
# serializing to and from that format.
|
64 |
+
class RawMetadata(TypedDict, total=False):
|
65 |
+
"""A dictionary of raw core metadata.
|
66 |
+
|
67 |
+
Each field in core metadata maps to a key of this dictionary (when data is
|
68 |
+
provided). The key is lower-case and underscores are used instead of dashes
|
69 |
+
compared to the equivalent core metadata field. Any core metadata field that
|
70 |
+
can be specified multiple times or can hold multiple values in a single
|
71 |
+
field have a key with a plural name. See :class:`Metadata` whose attributes
|
72 |
+
match the keys of this dictionary.
|
73 |
+
|
74 |
+
Core metadata fields that can be specified multiple times are stored as a
|
75 |
+
list or dict depending on which is appropriate for the field. Any fields
|
76 |
+
which hold multiple values in a single field are stored as a list.
|
77 |
+
|
78 |
+
"""
|
79 |
+
|
80 |
+
# Metadata 1.0 - PEP 241
|
81 |
+
metadata_version: str
|
82 |
+
name: str
|
83 |
+
version: str
|
84 |
+
platforms: list[str]
|
85 |
+
summary: str
|
86 |
+
description: str
|
87 |
+
keywords: list[str]
|
88 |
+
home_page: str
|
89 |
+
author: str
|
90 |
+
author_email: str
|
91 |
+
license: str
|
92 |
+
|
93 |
+
# Metadata 1.1 - PEP 314
|
94 |
+
supported_platforms: list[str]
|
95 |
+
download_url: str
|
96 |
+
classifiers: list[str]
|
97 |
+
requires: list[str]
|
98 |
+
provides: list[str]
|
99 |
+
obsoletes: list[str]
|
100 |
+
|
101 |
+
# Metadata 1.2 - PEP 345
|
102 |
+
maintainer: str
|
103 |
+
maintainer_email: str
|
104 |
+
requires_dist: list[str]
|
105 |
+
provides_dist: list[str]
|
106 |
+
obsoletes_dist: list[str]
|
107 |
+
requires_python: str
|
108 |
+
requires_external: list[str]
|
109 |
+
project_urls: dict[str, str]
|
110 |
+
|
111 |
+
# Metadata 2.0
|
112 |
+
# PEP 426 attempted to completely revamp the metadata format
|
113 |
+
# but got stuck without ever being able to build consensus on
|
114 |
+
# it and ultimately ended up withdrawn.
|
115 |
+
#
|
116 |
+
# However, a number of tools had started emitting METADATA with
|
117 |
+
# `2.0` Metadata-Version, so for historical reasons, this version
|
118 |
+
# was skipped.
|
119 |
+
|
120 |
+
# Metadata 2.1 - PEP 566
|
121 |
+
description_content_type: str
|
122 |
+
provides_extra: list[str]
|
123 |
+
|
124 |
+
# Metadata 2.2 - PEP 643
|
125 |
+
dynamic: list[str]
|
126 |
+
|
127 |
+
# Metadata 2.3 - PEP 685
|
128 |
+
# No new fields were added in PEP 685, just some edge case were
|
129 |
+
# tightened up to provide better interoptability.
|
130 |
+
|
131 |
+
# Metadata 2.4 - PEP 639
|
132 |
+
license_expression: str
|
133 |
+
license_files: list[str]
|
134 |
+
|
135 |
+
|
136 |
+
_STRING_FIELDS = {
|
137 |
+
"author",
|
138 |
+
"author_email",
|
139 |
+
"description",
|
140 |
+
"description_content_type",
|
141 |
+
"download_url",
|
142 |
+
"home_page",
|
143 |
+
"license",
|
144 |
+
"license_expression",
|
145 |
+
"maintainer",
|
146 |
+
"maintainer_email",
|
147 |
+
"metadata_version",
|
148 |
+
"name",
|
149 |
+
"requires_python",
|
150 |
+
"summary",
|
151 |
+
"version",
|
152 |
+
}
|
153 |
+
|
154 |
+
_LIST_FIELDS = {
|
155 |
+
"classifiers",
|
156 |
+
"dynamic",
|
157 |
+
"license_files",
|
158 |
+
"obsoletes",
|
159 |
+
"obsoletes_dist",
|
160 |
+
"platforms",
|
161 |
+
"provides",
|
162 |
+
"provides_dist",
|
163 |
+
"provides_extra",
|
164 |
+
"requires",
|
165 |
+
"requires_dist",
|
166 |
+
"requires_external",
|
167 |
+
"supported_platforms",
|
168 |
+
}
|
169 |
+
|
170 |
+
_DICT_FIELDS = {
|
171 |
+
"project_urls",
|
172 |
+
}
|
173 |
+
|
174 |
+
|
175 |
+
def _parse_keywords(data: str) -> list[str]:
|
176 |
+
"""Split a string of comma-separated keywords into a list of keywords."""
|
177 |
+
return [k.strip() for k in data.split(",")]
|
178 |
+
|
179 |
+
|
180 |
+
def _parse_project_urls(data: list[str]) -> dict[str, str]:
|
181 |
+
"""Parse a list of label/URL string pairings separated by a comma."""
|
182 |
+
urls = {}
|
183 |
+
for pair in data:
|
184 |
+
# Our logic is slightly tricky here as we want to try and do
|
185 |
+
# *something* reasonable with malformed data.
|
186 |
+
#
|
187 |
+
# The main thing that we have to worry about, is data that does
|
188 |
+
# not have a ',' at all to split the label from the Value. There
|
189 |
+
# isn't a singular right answer here, and we will fail validation
|
190 |
+
# later on (if the caller is validating) so it doesn't *really*
|
191 |
+
# matter, but since the missing value has to be an empty str
|
192 |
+
# and our return value is dict[str, str], if we let the key
|
193 |
+
# be the missing value, then they'd have multiple '' values that
|
194 |
+
# overwrite each other in a accumulating dict.
|
195 |
+
#
|
196 |
+
# The other potentional issue is that it's possible to have the
|
197 |
+
# same label multiple times in the metadata, with no solid "right"
|
198 |
+
# answer with what to do in that case. As such, we'll do the only
|
199 |
+
# thing we can, which is treat the field as unparseable and add it
|
200 |
+
# to our list of unparsed fields.
|
201 |
+
parts = [p.strip() for p in pair.split(",", 1)]
|
202 |
+
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
|
203 |
+
|
204 |
+
# TODO: The spec doesn't say anything about if the keys should be
|
205 |
+
# considered case sensitive or not... logically they should
|
206 |
+
# be case-preserving and case-insensitive, but doing that
|
207 |
+
# would open up more cases where we might have duplicate
|
208 |
+
# entries.
|
209 |
+
label, url = parts
|
210 |
+
if label in urls:
|
211 |
+
# The label already exists in our set of urls, so this field
|
212 |
+
# is unparseable, and we can just add the whole thing to our
|
213 |
+
# unparseable data and stop processing it.
|
214 |
+
raise KeyError("duplicate labels in project urls")
|
215 |
+
urls[label] = url
|
216 |
+
|
217 |
+
return urls
|
218 |
+
|
219 |
+
|
220 |
+
def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
|
221 |
+
"""Get the body of the message."""
|
222 |
+
# If our source is a str, then our caller has managed encodings for us,
|
223 |
+
# and we don't need to deal with it.
|
224 |
+
if isinstance(source, str):
|
225 |
+
payload = msg.get_payload()
|
226 |
+
assert isinstance(payload, str)
|
227 |
+
return payload
|
228 |
+
# If our source is a bytes, then we're managing the encoding and we need
|
229 |
+
# to deal with it.
|
230 |
+
else:
|
231 |
+
bpayload = msg.get_payload(decode=True)
|
232 |
+
assert isinstance(bpayload, bytes)
|
233 |
+
try:
|
234 |
+
return bpayload.decode("utf8", "strict")
|
235 |
+
except UnicodeDecodeError as exc:
|
236 |
+
raise ValueError("payload in an invalid encoding") from exc
|
237 |
+
|
238 |
+
|
239 |
+
# The various parse_FORMAT functions here are intended to be as lenient as
|
240 |
+
# possible in their parsing, while still returning a correctly typed
|
241 |
+
# RawMetadata.
|
242 |
+
#
|
243 |
+
# To aid in this, we also generally want to do as little touching of the
|
244 |
+
# data as possible, except where there are possibly some historic holdovers
|
245 |
+
# that make valid data awkward to work with.
|
246 |
+
#
|
247 |
+
# While this is a lower level, intermediate format than our ``Metadata``
|
248 |
+
# class, some light touch ups can make a massive difference in usability.
|
249 |
+
|
250 |
+
# Map METADATA fields to RawMetadata.
|
251 |
+
_EMAIL_TO_RAW_MAPPING = {
|
252 |
+
"author": "author",
|
253 |
+
"author-email": "author_email",
|
254 |
+
"classifier": "classifiers",
|
255 |
+
"description": "description",
|
256 |
+
"description-content-type": "description_content_type",
|
257 |
+
"download-url": "download_url",
|
258 |
+
"dynamic": "dynamic",
|
259 |
+
"home-page": "home_page",
|
260 |
+
"keywords": "keywords",
|
261 |
+
"license": "license",
|
262 |
+
"license-expression": "license_expression",
|
263 |
+
"license-file": "license_files",
|
264 |
+
"maintainer": "maintainer",
|
265 |
+
"maintainer-email": "maintainer_email",
|
266 |
+
"metadata-version": "metadata_version",
|
267 |
+
"name": "name",
|
268 |
+
"obsoletes": "obsoletes",
|
269 |
+
"obsoletes-dist": "obsoletes_dist",
|
270 |
+
"platform": "platforms",
|
271 |
+
"project-url": "project_urls",
|
272 |
+
"provides": "provides",
|
273 |
+
"provides-dist": "provides_dist",
|
274 |
+
"provides-extra": "provides_extra",
|
275 |
+
"requires": "requires",
|
276 |
+
"requires-dist": "requires_dist",
|
277 |
+
"requires-external": "requires_external",
|
278 |
+
"requires-python": "requires_python",
|
279 |
+
"summary": "summary",
|
280 |
+
"supported-platform": "supported_platforms",
|
281 |
+
"version": "version",
|
282 |
+
}
|
283 |
+
_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
|
284 |
+
|
285 |
+
|
286 |
+
def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
|
287 |
+
"""Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
|
288 |
+
|
289 |
+
This function returns a two-item tuple of dicts. The first dict is of
|
290 |
+
recognized fields from the core metadata specification. Fields that can be
|
291 |
+
parsed and translated into Python's built-in types are converted
|
292 |
+
appropriately. All other fields are left as-is. Fields that are allowed to
|
293 |
+
appear multiple times are stored as lists.
|
294 |
+
|
295 |
+
The second dict contains all other fields from the metadata. This includes
|
296 |
+
any unrecognized fields. It also includes any fields which are expected to
|
297 |
+
be parsed into a built-in type but were not formatted appropriately. Finally,
|
298 |
+
any fields that are expected to appear only once but are repeated are
|
299 |
+
included in this dict.
|
300 |
+
|
301 |
+
"""
|
302 |
+
raw: dict[str, str | list[str] | dict[str, str]] = {}
|
303 |
+
unparsed: dict[str, list[str]] = {}
|
304 |
+
|
305 |
+
if isinstance(data, str):
|
306 |
+
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
|
307 |
+
else:
|
308 |
+
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
|
309 |
+
|
310 |
+
# We have to wrap parsed.keys() in a set, because in the case of multiple
|
311 |
+
# values for a key (a list), the key will appear multiple times in the
|
312 |
+
# list of keys, but we're avoiding that by using get_all().
|
313 |
+
for name in frozenset(parsed.keys()):
|
314 |
+
# Header names in RFC are case insensitive, so we'll normalize to all
|
315 |
+
# lower case to make comparisons easier.
|
316 |
+
name = name.lower()
|
317 |
+
|
318 |
+
# We use get_all() here, even for fields that aren't multiple use,
|
319 |
+
# because otherwise someone could have e.g. two Name fields, and we
|
320 |
+
# would just silently ignore it rather than doing something about it.
|
321 |
+
headers = parsed.get_all(name) or []
|
322 |
+
|
323 |
+
# The way the email module works when parsing bytes is that it
|
324 |
+
# unconditionally decodes the bytes as ascii using the surrogateescape
|
325 |
+
# handler. When you pull that data back out (such as with get_all() ),
|
326 |
+
# it looks to see if the str has any surrogate escapes, and if it does
|
327 |
+
# it wraps it in a Header object instead of returning the string.
|
328 |
+
#
|
329 |
+
# As such, we'll look for those Header objects, and fix up the encoding.
|
330 |
+
value = []
|
331 |
+
# Flag if we have run into any issues processing the headers, thus
|
332 |
+
# signalling that the data belongs in 'unparsed'.
|
333 |
+
valid_encoding = True
|
334 |
+
for h in headers:
|
335 |
+
# It's unclear if this can return more types than just a Header or
|
336 |
+
# a str, so we'll just assert here to make sure.
|
337 |
+
assert isinstance(h, (email.header.Header, str))
|
338 |
+
|
339 |
+
# If it's a header object, we need to do our little dance to get
|
340 |
+
# the real data out of it. In cases where there is invalid data
|
341 |
+
# we're going to end up with mojibake, but there's no obvious, good
|
342 |
+
# way around that without reimplementing parts of the Header object
|
343 |
+
# ourselves.
|
344 |
+
#
|
345 |
+
# That should be fine since, if mojibacked happens, this key is
|
346 |
+
# going into the unparsed dict anyways.
|
347 |
+
if isinstance(h, email.header.Header):
|
348 |
+
# The Header object stores it's data as chunks, and each chunk
|
349 |
+
# can be independently encoded, so we'll need to check each
|
350 |
+
# of them.
|
351 |
+
chunks: list[tuple[bytes, str | None]] = []
|
352 |
+
for bin, encoding in email.header.decode_header(h):
|
353 |
+
try:
|
354 |
+
bin.decode("utf8", "strict")
|
355 |
+
except UnicodeDecodeError:
|
356 |
+
# Enable mojibake.
|
357 |
+
encoding = "latin1"
|
358 |
+
valid_encoding = False
|
359 |
+
else:
|
360 |
+
encoding = "utf8"
|
361 |
+
chunks.append((bin, encoding))
|
362 |
+
|
363 |
+
# Turn our chunks back into a Header object, then let that
|
364 |
+
# Header object do the right thing to turn them into a
|
365 |
+
# string for us.
|
366 |
+
value.append(str(email.header.make_header(chunks)))
|
367 |
+
# This is already a string, so just add it.
|
368 |
+
else:
|
369 |
+
value.append(h)
|
370 |
+
|
371 |
+
# We've processed all of our values to get them into a list of str,
|
372 |
+
# but we may have mojibake data, in which case this is an unparsed
|
373 |
+
# field.
|
374 |
+
if not valid_encoding:
|
375 |
+
unparsed[name] = value
|
376 |
+
continue
|
377 |
+
|
378 |
+
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
|
379 |
+
if raw_name is None:
|
380 |
+
# This is a bit of a weird situation, we've encountered a key that
|
381 |
+
# we don't know what it means, so we don't know whether it's meant
|
382 |
+
# to be a list or not.
|
383 |
+
#
|
384 |
+
# Since we can't really tell one way or another, we'll just leave it
|
385 |
+
# as a list, even though it may be a single item list, because that's
|
386 |
+
# what makes the most sense for email headers.
|
387 |
+
unparsed[name] = value
|
388 |
+
continue
|
389 |
+
|
390 |
+
# If this is one of our string fields, then we'll check to see if our
|
391 |
+
# value is a list of a single item. If it is then we'll assume that
|
392 |
+
# it was emitted as a single string, and unwrap the str from inside
|
393 |
+
# the list.
|
394 |
+
#
|
395 |
+
# If it's any other kind of data, then we haven't the faintest clue
|
396 |
+
# what we should parse it as, and we have to just add it to our list
|
397 |
+
# of unparsed stuff.
|
398 |
+
if raw_name in _STRING_FIELDS and len(value) == 1:
|
399 |
+
raw[raw_name] = value[0]
|
400 |
+
# If this is one of our list of string fields, then we can just assign
|
401 |
+
# the value, since email *only* has strings, and our get_all() call
|
402 |
+
# above ensures that this is a list.
|
403 |
+
elif raw_name in _LIST_FIELDS:
|
404 |
+
raw[raw_name] = value
|
405 |
+
# Special Case: Keywords
|
406 |
+
# The keywords field is implemented in the metadata spec as a str,
|
407 |
+
# but it conceptually is a list of strings, and is serialized using
|
408 |
+
# ", ".join(keywords), so we'll do some light data massaging to turn
|
409 |
+
# this into what it logically is.
|
410 |
+
elif raw_name == "keywords" and len(value) == 1:
|
411 |
+
raw[raw_name] = _parse_keywords(value[0])
|
412 |
+
# Special Case: Project-URL
|
413 |
+
# The project urls is implemented in the metadata spec as a list of
|
414 |
+
# specially-formatted strings that represent a key and a value, which
|
415 |
+
# is fundamentally a mapping, however the email format doesn't support
|
416 |
+
# mappings in a sane way, so it was crammed into a list of strings
|
417 |
+
# instead.
|
418 |
+
#
|
419 |
+
# We will do a little light data massaging to turn this into a map as
|
420 |
+
# it logically should be.
|
421 |
+
elif raw_name == "project_urls":
|
422 |
+
try:
|
423 |
+
raw[raw_name] = _parse_project_urls(value)
|
424 |
+
except KeyError:
|
425 |
+
unparsed[name] = value
|
426 |
+
# Nothing that we've done has managed to parse this, so it'll just
|
427 |
+
# throw it in our unparseable data and move on.
|
428 |
+
else:
|
429 |
+
unparsed[name] = value
|
430 |
+
|
431 |
+
# We need to support getting the Description from the message payload in
|
432 |
+
# addition to getting it from the the headers. This does mean, though, there
|
433 |
+
# is the possibility of it being set both ways, in which case we put both
|
434 |
+
# in 'unparsed' since we don't know which is right.
|
435 |
+
try:
|
436 |
+
payload = _get_payload(parsed, data)
|
437 |
+
except ValueError:
|
438 |
+
unparsed.setdefault("description", []).append(
|
439 |
+
parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload]
|
440 |
+
)
|
441 |
+
else:
|
442 |
+
if payload:
|
443 |
+
# Check to see if we've already got a description, if so then both
|
444 |
+
# it, and this body move to unparseable.
|
445 |
+
if "description" in raw:
|
446 |
+
description_header = cast(str, raw.pop("description"))
|
447 |
+
unparsed.setdefault("description", []).extend(
|
448 |
+
[description_header, payload]
|
449 |
+
)
|
450 |
+
elif "description" in unparsed:
|
451 |
+
unparsed["description"].append(payload)
|
452 |
+
else:
|
453 |
+
raw["description"] = payload
|
454 |
+
|
455 |
+
# We need to cast our `raw` to a metadata, because a TypedDict only support
|
456 |
+
# literal key names, but we're computing our key names on purpose, but the
|
457 |
+
# way this function is implemented, our `TypedDict` can only have valid key
|
458 |
+
# names.
|
459 |
+
return cast(RawMetadata, raw), unparsed
|
460 |
+
|
461 |
+
|
462 |
+
_NOT_FOUND = object()
|
463 |
+
|
464 |
+
|
465 |
+
# Keep the two values in sync.
|
466 |
+
_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
|
467 |
+
_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
|
468 |
+
|
469 |
+
_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
|
470 |
+
|
471 |
+
|
472 |
+
class _Validator(Generic[T]):
|
473 |
+
"""Validate a metadata field.
|
474 |
+
|
475 |
+
All _process_*() methods correspond to a core metadata field. The method is
|
476 |
+
called with the field's raw value. If the raw value is valid it is returned
|
477 |
+
in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
|
478 |
+
If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
|
479 |
+
as appropriate).
|
480 |
+
"""
|
481 |
+
|
482 |
+
name: str
|
483 |
+
raw_name: str
|
484 |
+
added: _MetadataVersion
|
485 |
+
|
486 |
+
def __init__(
|
487 |
+
self,
|
488 |
+
*,
|
489 |
+
added: _MetadataVersion = "1.0",
|
490 |
+
) -> None:
|
491 |
+
self.added = added
|
492 |
+
|
493 |
+
def __set_name__(self, _owner: Metadata, name: str) -> None:
|
494 |
+
self.name = name
|
495 |
+
self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
|
496 |
+
|
497 |
+
def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
|
498 |
+
# With Python 3.8, the caching can be replaced with functools.cached_property().
|
499 |
+
# No need to check the cache as attribute lookup will resolve into the
|
500 |
+
# instance's __dict__ before __get__ is called.
|
501 |
+
cache = instance.__dict__
|
502 |
+
value = instance._raw.get(self.name)
|
503 |
+
|
504 |
+
# To make the _process_* methods easier, we'll check if the value is None
|
505 |
+
# and if this field is NOT a required attribute, and if both of those
|
506 |
+
# things are true, we'll skip the the converter. This will mean that the
|
507 |
+
# converters never have to deal with the None union.
|
508 |
+
if self.name in _REQUIRED_ATTRS or value is not None:
|
509 |
+
try:
|
510 |
+
converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
|
511 |
+
except AttributeError:
|
512 |
+
pass
|
513 |
+
else:
|
514 |
+
value = converter(value)
|
515 |
+
|
516 |
+
cache[self.name] = value
|
517 |
+
try:
|
518 |
+
del instance._raw[self.name] # type: ignore[misc]
|
519 |
+
except KeyError:
|
520 |
+
pass
|
521 |
+
|
522 |
+
return cast(T, value)
|
523 |
+
|
524 |
+
def _invalid_metadata(
|
525 |
+
self, msg: str, cause: Exception | None = None
|
526 |
+
) -> InvalidMetadata:
|
527 |
+
exc = InvalidMetadata(
|
528 |
+
self.raw_name, msg.format_map({"field": repr(self.raw_name)})
|
529 |
+
)
|
530 |
+
exc.__cause__ = cause
|
531 |
+
return exc
|
532 |
+
|
533 |
+
def _process_metadata_version(self, value: str) -> _MetadataVersion:
|
534 |
+
# Implicitly makes Metadata-Version required.
|
535 |
+
if value not in _VALID_METADATA_VERSIONS:
|
536 |
+
raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
|
537 |
+
return cast(_MetadataVersion, value)
|
538 |
+
|
539 |
+
def _process_name(self, value: str) -> str:
|
540 |
+
if not value:
|
541 |
+
raise self._invalid_metadata("{field} is a required field")
|
542 |
+
# Validate the name as a side-effect.
|
543 |
+
try:
|
544 |
+
utils.canonicalize_name(value, validate=True)
|
545 |
+
except utils.InvalidName as exc:
|
546 |
+
raise self._invalid_metadata(
|
547 |
+
f"{value!r} is invalid for {{field}}", cause=exc
|
548 |
+
) from exc
|
549 |
+
else:
|
550 |
+
return value
|
551 |
+
|
552 |
+
def _process_version(self, value: str) -> version_module.Version:
|
553 |
+
if not value:
|
554 |
+
raise self._invalid_metadata("{field} is a required field")
|
555 |
+
try:
|
556 |
+
return version_module.parse(value)
|
557 |
+
except version_module.InvalidVersion as exc:
|
558 |
+
raise self._invalid_metadata(
|
559 |
+
f"{value!r} is invalid for {{field}}", cause=exc
|
560 |
+
) from exc
|
561 |
+
|
562 |
+
def _process_summary(self, value: str) -> str:
|
563 |
+
"""Check the field contains no newlines."""
|
564 |
+
if "\n" in value:
|
565 |
+
raise self._invalid_metadata("{field} must be a single line")
|
566 |
+
return value
|
567 |
+
|
568 |
+
def _process_description_content_type(self, value: str) -> str:
|
569 |
+
content_types = {"text/plain", "text/x-rst", "text/markdown"}
|
570 |
+
message = email.message.EmailMessage()
|
571 |
+
message["content-type"] = value
|
572 |
+
|
573 |
+
content_type, parameters = (
|
574 |
+
# Defaults to `text/plain` if parsing failed.
|
575 |
+
message.get_content_type().lower(),
|
576 |
+
message["content-type"].params,
|
577 |
+
)
|
578 |
+
# Check if content-type is valid or defaulted to `text/plain` and thus was
|
579 |
+
# not parseable.
|
580 |
+
if content_type not in content_types or content_type not in value.lower():
|
581 |
+
raise self._invalid_metadata(
|
582 |
+
f"{{field}} must be one of {list(content_types)}, not {value!r}"
|
583 |
+
)
|
584 |
+
|
585 |
+
charset = parameters.get("charset", "UTF-8")
|
586 |
+
if charset != "UTF-8":
|
587 |
+
raise self._invalid_metadata(
|
588 |
+
f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
|
589 |
+
)
|
590 |
+
|
591 |
+
markdown_variants = {"GFM", "CommonMark"}
|
592 |
+
variant = parameters.get("variant", "GFM") # Use an acceptable default.
|
593 |
+
if content_type == "text/markdown" and variant not in markdown_variants:
|
594 |
+
raise self._invalid_metadata(
|
595 |
+
f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
|
596 |
+
f"not {variant!r}",
|
597 |
+
)
|
598 |
+
return value
|
599 |
+
|
600 |
+
def _process_dynamic(self, value: list[str]) -> list[str]:
|
601 |
+
for dynamic_field in map(str.lower, value):
|
602 |
+
if dynamic_field in {"name", "version", "metadata-version"}:
|
603 |
+
raise self._invalid_metadata(
|
604 |
+
f"{dynamic_field!r} is not allowed as a dynamic field"
|
605 |
+
)
|
606 |
+
elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
|
607 |
+
raise self._invalid_metadata(
|
608 |
+
f"{dynamic_field!r} is not a valid dynamic field"
|
609 |
+
)
|
610 |
+
return list(map(str.lower, value))
|
611 |
+
|
612 |
+
def _process_provides_extra(
|
613 |
+
self,
|
614 |
+
value: list[str],
|
615 |
+
) -> list[utils.NormalizedName]:
|
616 |
+
normalized_names = []
|
617 |
+
try:
|
618 |
+
for name in value:
|
619 |
+
normalized_names.append(utils.canonicalize_name(name, validate=True))
|
620 |
+
except utils.InvalidName as exc:
|
621 |
+
raise self._invalid_metadata(
|
622 |
+
f"{name!r} is invalid for {{field}}", cause=exc
|
623 |
+
) from exc
|
624 |
+
else:
|
625 |
+
return normalized_names
|
626 |
+
|
627 |
+
def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
|
628 |
+
try:
|
629 |
+
return specifiers.SpecifierSet(value)
|
630 |
+
except specifiers.InvalidSpecifier as exc:
|
631 |
+
raise self._invalid_metadata(
|
632 |
+
f"{value!r} is invalid for {{field}}", cause=exc
|
633 |
+
) from exc
|
634 |
+
|
635 |
+
def _process_requires_dist(
|
636 |
+
self,
|
637 |
+
value: list[str],
|
638 |
+
) -> list[requirements.Requirement]:
|
639 |
+
reqs = []
|
640 |
+
try:
|
641 |
+
for req in value:
|
642 |
+
reqs.append(requirements.Requirement(req))
|
643 |
+
except requirements.InvalidRequirement as exc:
|
644 |
+
raise self._invalid_metadata(
|
645 |
+
f"{req!r} is invalid for {{field}}", cause=exc
|
646 |
+
) from exc
|
647 |
+
else:
|
648 |
+
return reqs
|
649 |
+
|
650 |
+
def _process_license_expression(
|
651 |
+
self, value: str
|
652 |
+
) -> NormalizedLicenseExpression | None:
|
653 |
+
try:
|
654 |
+
return licenses.canonicalize_license_expression(value)
|
655 |
+
except ValueError as exc:
|
656 |
+
raise self._invalid_metadata(
|
657 |
+
f"{value!r} is invalid for {{field}}", cause=exc
|
658 |
+
) from exc
|
659 |
+
|
660 |
+
def _process_license_files(self, value: list[str]) -> list[str]:
|
661 |
+
paths = []
|
662 |
+
for path in value:
|
663 |
+
if ".." in path:
|
664 |
+
raise self._invalid_metadata(
|
665 |
+
f"{path!r} is invalid for {{field}}, "
|
666 |
+
"parent directory indicators are not allowed"
|
667 |
+
)
|
668 |
+
if "*" in path:
|
669 |
+
raise self._invalid_metadata(
|
670 |
+
f"{path!r} is invalid for {{field}}, paths must be resolved"
|
671 |
+
)
|
672 |
+
if (
|
673 |
+
pathlib.PurePosixPath(path).is_absolute()
|
674 |
+
or pathlib.PureWindowsPath(path).is_absolute()
|
675 |
+
):
|
676 |
+
raise self._invalid_metadata(
|
677 |
+
f"{path!r} is invalid for {{field}}, paths must be relative"
|
678 |
+
)
|
679 |
+
if pathlib.PureWindowsPath(path).as_posix() != path:
|
680 |
+
raise self._invalid_metadata(
|
681 |
+
f"{path!r} is invalid for {{field}}, "
|
682 |
+
"paths must use '/' delimiter"
|
683 |
+
)
|
684 |
+
paths.append(path)
|
685 |
+
return paths
|
686 |
+
|
687 |
+
|
688 |
+
class Metadata:
|
689 |
+
"""Representation of distribution metadata.
|
690 |
+
|
691 |
+
Compared to :class:`RawMetadata`, this class provides objects representing
|
692 |
+
metadata fields instead of only using built-in types. Any invalid metadata
|
693 |
+
will cause :exc:`InvalidMetadata` to be raised (with a
|
694 |
+
:py:attr:`~BaseException.__cause__` attribute as appropriate).
|
695 |
+
"""
|
696 |
+
|
697 |
+
_raw: RawMetadata
|
698 |
+
|
699 |
+
@classmethod
|
700 |
+
def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
|
701 |
+
"""Create an instance from :class:`RawMetadata`.
|
702 |
+
|
703 |
+
If *validate* is true, all metadata will be validated. All exceptions
|
704 |
+
related to validation will be gathered and raised as an :class:`ExceptionGroup`.
|
705 |
+
"""
|
706 |
+
ins = cls()
|
707 |
+
ins._raw = data.copy() # Mutations occur due to caching enriched values.
|
708 |
+
|
709 |
+
if validate:
|
710 |
+
exceptions: list[Exception] = []
|
711 |
+
try:
|
712 |
+
metadata_version = ins.metadata_version
|
713 |
+
metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
|
714 |
+
except InvalidMetadata as metadata_version_exc:
|
715 |
+
exceptions.append(metadata_version_exc)
|
716 |
+
metadata_version = None
|
717 |
+
|
718 |
+
# Make sure to check for the fields that are present, the required
|
719 |
+
# fields (so their absence can be reported).
|
720 |
+
fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
|
721 |
+
# Remove fields that have already been checked.
|
722 |
+
fields_to_check -= {"metadata_version"}
|
723 |
+
|
724 |
+
for key in fields_to_check:
|
725 |
+
try:
|
726 |
+
if metadata_version:
|
727 |
+
# Can't use getattr() as that triggers descriptor protocol which
|
728 |
+
# will fail due to no value for the instance argument.
|
729 |
+
try:
|
730 |
+
field_metadata_version = cls.__dict__[key].added
|
731 |
+
except KeyError:
|
732 |
+
exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
|
733 |
+
exceptions.append(exc)
|
734 |
+
continue
|
735 |
+
field_age = _VALID_METADATA_VERSIONS.index(
|
736 |
+
field_metadata_version
|
737 |
+
)
|
738 |
+
if field_age > metadata_age:
|
739 |
+
field = _RAW_TO_EMAIL_MAPPING[key]
|
740 |
+
exc = InvalidMetadata(
|
741 |
+
field,
|
742 |
+
f"{field} introduced in metadata version "
|
743 |
+
f"{field_metadata_version}, not {metadata_version}",
|
744 |
+
)
|
745 |
+
exceptions.append(exc)
|
746 |
+
continue
|
747 |
+
getattr(ins, key)
|
748 |
+
except InvalidMetadata as exc:
|
749 |
+
exceptions.append(exc)
|
750 |
+
|
751 |
+
if exceptions:
|
752 |
+
raise ExceptionGroup("invalid metadata", exceptions)
|
753 |
+
|
754 |
+
return ins
|
755 |
+
|
756 |
+
@classmethod
|
757 |
+
def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
|
758 |
+
"""Parse metadata from email headers.
|
759 |
+
|
760 |
+
If *validate* is true, the metadata will be validated. All exceptions
|
761 |
+
related to validation will be gathered and raised as an :class:`ExceptionGroup`.
|
762 |
+
"""
|
763 |
+
raw, unparsed = parse_email(data)
|
764 |
+
|
765 |
+
if validate:
|
766 |
+
exceptions: list[Exception] = []
|
767 |
+
for unparsed_key in unparsed:
|
768 |
+
if unparsed_key in _EMAIL_TO_RAW_MAPPING:
|
769 |
+
message = f"{unparsed_key!r} has invalid data"
|
770 |
+
else:
|
771 |
+
message = f"unrecognized field: {unparsed_key!r}"
|
772 |
+
exceptions.append(InvalidMetadata(unparsed_key, message))
|
773 |
+
|
774 |
+
if exceptions:
|
775 |
+
raise ExceptionGroup("unparsed", exceptions)
|
776 |
+
|
777 |
+
try:
|
778 |
+
return cls.from_raw(raw, validate=validate)
|
779 |
+
except ExceptionGroup as exc_group:
|
780 |
+
raise ExceptionGroup(
|
781 |
+
"invalid or unparsed metadata", exc_group.exceptions
|
782 |
+
) from None
|
783 |
+
|
784 |
+
metadata_version: _Validator[_MetadataVersion] = _Validator()
|
785 |
+
""":external:ref:`core-metadata-metadata-version`
|
786 |
+
(required; validated to be a valid metadata version)"""
|
787 |
+
# `name` is not normalized/typed to NormalizedName so as to provide access to
|
788 |
+
# the original/raw name.
|
789 |
+
name: _Validator[str] = _Validator()
|
790 |
+
""":external:ref:`core-metadata-name`
|
791 |
+
(required; validated using :func:`~packaging.utils.canonicalize_name` and its
|
792 |
+
*validate* parameter)"""
|
793 |
+
version: _Validator[version_module.Version] = _Validator()
|
794 |
+
""":external:ref:`core-metadata-version` (required)"""
|
795 |
+
dynamic: _Validator[list[str] | None] = _Validator(
|
796 |
+
added="2.2",
|
797 |
+
)
|
798 |
+
""":external:ref:`core-metadata-dynamic`
|
799 |
+
(validated against core metadata field names and lowercased)"""
|
800 |
+
platforms: _Validator[list[str] | None] = _Validator()
|
801 |
+
""":external:ref:`core-metadata-platform`"""
|
802 |
+
supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
|
803 |
+
""":external:ref:`core-metadata-supported-platform`"""
|
804 |
+
summary: _Validator[str | None] = _Validator()
|
805 |
+
""":external:ref:`core-metadata-summary` (validated to contain no newlines)"""
|
806 |
+
description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body
|
807 |
+
""":external:ref:`core-metadata-description`"""
|
808 |
+
description_content_type: _Validator[str | None] = _Validator(added="2.1")
|
809 |
+
""":external:ref:`core-metadata-description-content-type` (validated)"""
|
810 |
+
keywords: _Validator[list[str] | None] = _Validator()
|
811 |
+
""":external:ref:`core-metadata-keywords`"""
|
812 |
+
home_page: _Validator[str | None] = _Validator()
|
813 |
+
""":external:ref:`core-metadata-home-page`"""
|
814 |
+
download_url: _Validator[str | None] = _Validator(added="1.1")
|
815 |
+
""":external:ref:`core-metadata-download-url`"""
|
816 |
+
author: _Validator[str | None] = _Validator()
|
817 |
+
""":external:ref:`core-metadata-author`"""
|
818 |
+
author_email: _Validator[str | None] = _Validator()
|
819 |
+
""":external:ref:`core-metadata-author-email`"""
|
820 |
+
maintainer: _Validator[str | None] = _Validator(added="1.2")
|
821 |
+
""":external:ref:`core-metadata-maintainer`"""
|
822 |
+
maintainer_email: _Validator[str | None] = _Validator(added="1.2")
|
823 |
+
""":external:ref:`core-metadata-maintainer-email`"""
|
824 |
+
license: _Validator[str | None] = _Validator()
|
825 |
+
""":external:ref:`core-metadata-license`"""
|
826 |
+
license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator(
|
827 |
+
added="2.4"
|
828 |
+
)
|
829 |
+
""":external:ref:`core-metadata-license-expression`"""
|
830 |
+
license_files: _Validator[list[str] | None] = _Validator(added="2.4")
|
831 |
+
""":external:ref:`core-metadata-license-file`"""
|
832 |
+
classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
|
833 |
+
""":external:ref:`core-metadata-classifier`"""
|
834 |
+
requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
|
835 |
+
added="1.2"
|
836 |
+
)
|
837 |
+
""":external:ref:`core-metadata-requires-dist`"""
|
838 |
+
requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
|
839 |
+
added="1.2"
|
840 |
+
)
|
841 |
+
""":external:ref:`core-metadata-requires-python`"""
|
842 |
+
# Because `Requires-External` allows for non-PEP 440 version specifiers, we
|
843 |
+
# don't do any processing on the values.
|
844 |
+
requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
|
845 |
+
""":external:ref:`core-metadata-requires-external`"""
|
846 |
+
project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
|
847 |
+
""":external:ref:`core-metadata-project-url`"""
|
848 |
+
# PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
|
849 |
+
# regardless of metadata version.
|
850 |
+
provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
|
851 |
+
added="2.1",
|
852 |
+
)
|
853 |
+
""":external:ref:`core-metadata-provides-extra`"""
|
854 |
+
provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
|
855 |
+
""":external:ref:`core-metadata-provides-dist`"""
|
856 |
+
obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
|
857 |
+
""":external:ref:`core-metadata-obsoletes-dist`"""
|
858 |
+
requires: _Validator[list[str] | None] = _Validator(added="1.1")
|
859 |
+
"""``Requires`` (deprecated)"""
|
860 |
+
provides: _Validator[list[str] | None] = _Validator(added="1.1")
|
861 |
+
"""``Provides`` (deprecated)"""
|
862 |
+
obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
|
863 |
+
"""``Obsoletes`` (deprecated)"""
|
meow/lib/python3.13/site-packages/packaging/py.typed
ADDED
File without changes
|