nisten commited on
Commit
dbe1b18
·
verified ·
1 Parent(s): 1a8f5f5

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +3 -0
  2. added_tokens.json +26 -0
  3. config.json +29 -0
  4. generation_config.json +12 -0
  5. meow/.gitignore +2 -0
  6. meow/bin/Activate.ps1 +248 -0
  7. meow/bin/activate +75 -0
  8. meow/bin/activate.csh +27 -0
  9. meow/bin/activate.fish +69 -0
  10. meow/bin/huggingface-cli +8 -0
  11. meow/bin/normalizer +8 -0
  12. meow/bin/pip +8 -0
  13. meow/bin/pip3 +8 -0
  14. meow/bin/pip3.13 +8 -0
  15. meow/bin/python +0 -0
  16. meow/bin/python3 +0 -0
  17. meow/bin/python3.13 +0 -0
  18. meow/bin/tqdm +8 -0
  19. meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/INSTALLER +1 -0
  20. meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/METADATA +59 -0
  21. meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/RECORD +24 -0
  22. meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/WHEEL +4 -0
  23. meow/lib/python3.13/site-packages/huggingface_hub/_commit_api.py +729 -0
  24. meow/lib/python3.13/site-packages/huggingface_hub/_space_api.py +160 -0
  25. meow/lib/python3.13/site-packages/huggingface_hub/_tensorboard_logger.py +194 -0
  26. meow/lib/python3.13/site-packages/huggingface_hub/_webhooks_payload.py +137 -0
  27. meow/lib/python3.13/site-packages/huggingface_hub/_webhooks_server.py +386 -0
  28. meow/lib/python3.13/site-packages/huggingface_hub/constants.py +225 -0
  29. meow/lib/python3.13/site-packages/huggingface_hub/fastai_utils.py +425 -0
  30. meow/lib/python3.13/site-packages/huggingface_hub/hf_file_system.py +1140 -0
  31. meow/lib/python3.13/site-packages/huggingface_hub/hub_mixin.py +833 -0
  32. meow/lib/python3.13/site-packages/huggingface_hub/keras_mixin.py +500 -0
  33. meow/lib/python3.13/site-packages/huggingface_hub/repocard.py +830 -0
  34. meow/lib/python3.13/site-packages/huggingface_hub/repocard_data.py +749 -0
  35. meow/lib/python3.13/site-packages/huggingface_hub/repository.py +1477 -0
  36. meow/lib/python3.13/site-packages/idna-3.10.dist-info/INSTALLER +1 -0
  37. meow/lib/python3.13/site-packages/idna-3.10.dist-info/LICENSE.md +31 -0
  38. meow/lib/python3.13/site-packages/idna-3.10.dist-info/METADATA +250 -0
  39. meow/lib/python3.13/site-packages/idna-3.10.dist-info/RECORD +22 -0
  40. meow/lib/python3.13/site-packages/idna-3.10.dist-info/WHEEL +4 -0
  41. meow/lib/python3.13/site-packages/packaging/__init__.py +15 -0
  42. meow/lib/python3.13/site-packages/packaging/_elffile.py +110 -0
  43. meow/lib/python3.13/site-packages/packaging/_manylinux.py +263 -0
  44. meow/lib/python3.13/site-packages/packaging/_musllinux.py +85 -0
  45. meow/lib/python3.13/site-packages/packaging/_parser.py +354 -0
  46. meow/lib/python3.13/site-packages/packaging/_structures.py +61 -0
  47. meow/lib/python3.13/site-packages/packaging/_tokenizer.py +194 -0
  48. meow/lib/python3.13/site-packages/packaging/markers.py +331 -0
  49. meow/lib/python3.13/site-packages/packaging/metadata.py +863 -0
  50. meow/lib/python3.13/site-packages/packaging/py.typed +0 -0
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen2.5-72B-Instruct
3
+ ---
added_tokens.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<bot_end>": 151666,
4
+ "<human_end>": 151665,
5
+ "<tool_call>": 151657,
6
+ "<|box_end|>": 151649,
7
+ "<|box_start|>": 151648,
8
+ "<|endoftext|>": 151643,
9
+ "<|file_sep|>": 151664,
10
+ "<|fim_middle|>": 151660,
11
+ "<|fim_pad|>": 151662,
12
+ "<|fim_prefix|>": 151659,
13
+ "<|fim_suffix|>": 151661,
14
+ "<|im_end|>": 151645,
15
+ "<|im_start|>": 151644,
16
+ "<|image_pad|>": 151655,
17
+ "<|object_ref_end|>": 151647,
18
+ "<|object_ref_start|>": 151646,
19
+ "<|quad_end|>": 151651,
20
+ "<|quad_start|>": 151650,
21
+ "<|repo_name|>": 151663,
22
+ "<|video_pad|>": 151656,
23
+ "<|vision_end|>": 151653,
24
+ "<|vision_pad|>": 151654,
25
+ "<|vision_start|>": 151652
26
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nisten/experiment-77b",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8192,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 29568,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 70,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 64,
17
+ "num_hidden_layers": 86,
18
+ "num_key_value_heads": 8,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.47.1",
26
+ "use_cache": true,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 151672
29
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.7,
10
+ "top_p": 1.0,
11
+ "transformers_version": "4.46.2"
12
+ }
meow/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Created by venv; see https://docs.python.org/3/library/venv.html
2
+ *
meow/bin/Activate.ps1 ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <#
2
+ .Synopsis
3
+ Activate a Python virtual environment for the current PowerShell session.
4
+
5
+ .Description
6
+ Pushes the python executable for a virtual environment to the front of the
7
+ $Env:PATH environment variable and sets the prompt to signify that you are
8
+ in a Python virtual environment. Makes use of the command line switches as
9
+ well as the `pyvenv.cfg` file values present in the virtual environment.
10
+
11
+ .Parameter VenvDir
12
+ Path to the directory that contains the virtual environment to activate. The
13
+ default value for this is the parent of the directory that the Activate.ps1
14
+ script is located within.
15
+
16
+ .Parameter Prompt
17
+ The prompt prefix to display when this virtual environment is activated. By
18
+ default, this prompt is the name of the virtual environment folder (VenvDir)
19
+ surrounded by parentheses and followed by a single space (ie. '(.venv) ').
20
+
21
+ .Example
22
+ Activate.ps1
23
+ Activates the Python virtual environment that contains the Activate.ps1 script.
24
+
25
+ .Example
26
+ Activate.ps1 -Verbose
27
+ Activates the Python virtual environment that contains the Activate.ps1 script,
28
+ and shows extra information about the activation as it executes.
29
+
30
+ .Example
31
+ Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
32
+ Activates the Python virtual environment located in the specified location.
33
+
34
+ .Example
35
+ Activate.ps1 -Prompt "MyPython"
36
+ Activates the Python virtual environment that contains the Activate.ps1 script,
37
+ and prefixes the current prompt with the specified string (surrounded in
38
+ parentheses) while the virtual environment is active.
39
+
40
+ .Notes
41
+ On Windows, it may be required to enable this Activate.ps1 script by setting the
42
+ execution policy for the user. You can do this by issuing the following PowerShell
43
+ command:
44
+
45
+ PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
46
+
47
+ For more information on Execution Policies:
48
+ https://go.microsoft.com/fwlink/?LinkID=135170
49
+
50
+ #>
51
+ Param(
52
+ [Parameter(Mandatory = $false)]
53
+ [String]
54
+ $VenvDir,
55
+ [Parameter(Mandatory = $false)]
56
+ [String]
57
+ $Prompt
58
+ )
59
+
60
+ <# Function declarations --------------------------------------------------- #>
61
+
62
+ <#
63
+ .Synopsis
64
+ Remove all shell session elements added by the Activate script, including the
65
+ addition of the virtual environment's Python executable from the beginning of
66
+ the PATH variable.
67
+
68
+ .Parameter NonDestructive
69
+ If present, do not remove this function from the global namespace for the
70
+ session.
71
+
72
+ #>
73
+ function global:deactivate ([switch]$NonDestructive) {
74
+ # Revert to original values
75
+
76
+ # The prior prompt:
77
+ if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
78
+ Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
79
+ Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
80
+ }
81
+
82
+ # The prior PYTHONHOME:
83
+ if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
84
+ Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
85
+ Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
86
+ }
87
+
88
+ # The prior PATH:
89
+ if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
90
+ Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
91
+ Remove-Item -Path Env:_OLD_VIRTUAL_PATH
92
+ }
93
+
94
+ # Just remove the VIRTUAL_ENV altogether:
95
+ if (Test-Path -Path Env:VIRTUAL_ENV) {
96
+ Remove-Item -Path env:VIRTUAL_ENV
97
+ }
98
+
99
+ # Just remove VIRTUAL_ENV_PROMPT altogether.
100
+ if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
101
+ Remove-Item -Path env:VIRTUAL_ENV_PROMPT
102
+ }
103
+
104
+ # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
105
+ if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
106
+ Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
107
+ }
108
+
109
+ # Leave deactivate function in the global namespace if requested:
110
+ if (-not $NonDestructive) {
111
+ Remove-Item -Path function:deactivate
112
+ }
113
+ }
114
+
115
+ <#
116
+ .Description
117
+ Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
118
+ given folder, and returns them in a map.
119
+
120
+ For each line in the pyvenv.cfg file, if that line can be parsed into exactly
121
+ two strings separated by `=` (with any amount of whitespace surrounding the =)
122
+ then it is considered a `key = value` line. The left hand string is the key,
123
+ the right hand is the value.
124
+
125
+ If the value starts with a `'` or a `"` then the first and last character is
126
+ stripped from the value before being captured.
127
+
128
+ .Parameter ConfigDir
129
+ Path to the directory that contains the `pyvenv.cfg` file.
130
+ #>
131
+ function Get-PyVenvConfig(
132
+ [String]
133
+ $ConfigDir
134
+ ) {
135
+ Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
136
+
137
+ # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
138
+ $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
139
+
140
+ # An empty map will be returned if no config file is found.
141
+ $pyvenvConfig = @{ }
142
+
143
+ if ($pyvenvConfigPath) {
144
+
145
+ Write-Verbose "File exists, parse `key = value` lines"
146
+ $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
147
+
148
+ $pyvenvConfigContent | ForEach-Object {
149
+ $keyval = $PSItem -split "\s*=\s*", 2
150
+ if ($keyval[0] -and $keyval[1]) {
151
+ $val = $keyval[1]
152
+
153
+ # Remove extraneous quotations around a string value.
154
+ if ("'""".Contains($val.Substring(0, 1))) {
155
+ $val = $val.Substring(1, $val.Length - 2)
156
+ }
157
+
158
+ $pyvenvConfig[$keyval[0]] = $val
159
+ Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
160
+ }
161
+ }
162
+ }
163
+ return $pyvenvConfig
164
+ }
165
+
166
+
167
+ <# Begin Activate script --------------------------------------------------- #>
168
+
169
+ # Determine the containing directory of this script
170
+ $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
171
+ $VenvExecDir = Get-Item -Path $VenvExecPath
172
+
173
+ Write-Verbose "Activation script is located in path: '$VenvExecPath'"
174
+ Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
175
+ Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
176
+
177
+ # Set values required in priority: CmdLine, ConfigFile, Default
178
+ # First, get the location of the virtual environment, it might not be
179
+ # VenvExecDir if specified on the command line.
180
+ if ($VenvDir) {
181
+ Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
182
+ }
183
+ else {
184
+ Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
185
+ $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
186
+ Write-Verbose "VenvDir=$VenvDir"
187
+ }
188
+
189
+ # Next, read the `pyvenv.cfg` file to determine any required value such
190
+ # as `prompt`.
191
+ $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
192
+
193
+ # Next, set the prompt from the command line, or the config file, or
194
+ # just use the name of the virtual environment folder.
195
+ if ($Prompt) {
196
+ Write-Verbose "Prompt specified as argument, using '$Prompt'"
197
+ }
198
+ else {
199
+ Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
200
+ if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
201
+ Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
202
+ $Prompt = $pyvenvCfg['prompt'];
203
+ }
204
+ else {
205
+ Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
206
+ Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
207
+ $Prompt = Split-Path -Path $venvDir -Leaf
208
+ }
209
+ }
210
+
211
+ Write-Verbose "Prompt = '$Prompt'"
212
+ Write-Verbose "VenvDir='$VenvDir'"
213
+
214
+ # Deactivate any currently active virtual environment, but leave the
215
+ # deactivate function in place.
216
+ deactivate -nondestructive
217
+
218
+ # Now set the environment variable VIRTUAL_ENV, used by many tools to determine
219
+ # that there is an activated venv.
220
+ $env:VIRTUAL_ENV = $VenvDir
221
+
222
+ $env:VIRTUAL_ENV_PROMPT = $Prompt
223
+
224
+ if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
225
+
226
+ Write-Verbose "Setting prompt to '$Prompt'"
227
+
228
+ # Set the prompt to include the env name
229
+ # Make sure _OLD_VIRTUAL_PROMPT is global
230
+ function global:_OLD_VIRTUAL_PROMPT { "" }
231
+ Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
232
+ New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
233
+
234
+ function global:prompt {
235
+ Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
236
+ _OLD_VIRTUAL_PROMPT
237
+ }
238
+ }
239
+
240
+ # Clear PYTHONHOME
241
+ if (Test-Path -Path Env:PYTHONHOME) {
242
+ Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
243
+ Remove-Item -Path Env:PYTHONHOME
244
+ }
245
+
246
+ # Add the venv to the PATH
247
+ Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
248
+ $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
meow/bin/activate ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file must be used with "source bin/activate" *from bash*
2
+ # You cannot run it directly
3
+
4
+ deactivate () {
5
+ # reset old environment variables
6
+ if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
7
+ PATH="${_OLD_VIRTUAL_PATH:-}"
8
+ export PATH
9
+ unset _OLD_VIRTUAL_PATH
10
+ fi
11
+ if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
12
+ PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
13
+ export PYTHONHOME
14
+ unset _OLD_VIRTUAL_PYTHONHOME
15
+ fi
16
+
17
+ # Call hash to forget past commands. Without forgetting
18
+ # past commands the $PATH changes we made may not be respected
19
+ hash -r 2> /dev/null
20
+
21
+ if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
22
+ PS1="${_OLD_VIRTUAL_PS1:-}"
23
+ export PS1
24
+ unset _OLD_VIRTUAL_PS1
25
+ fi
26
+
27
+ unset VIRTUAL_ENV
28
+ unset VIRTUAL_ENV_PROMPT
29
+ if [ ! "${1:-}" = "nondestructive" ] ; then
30
+ # Self destruct!
31
+ unset -f deactivate
32
+ fi
33
+ }
34
+
35
+ # unset irrelevant variables
36
+ deactivate nondestructive
37
+
38
+ # on Windows, a path can contain colons and backslashes and has to be converted:
39
+ case "$(uname)" in
40
+ CYGWIN*|MSYS*)
41
+ # transform D:\path\to\venv to /d/path/to/venv on MSYS
42
+ # and to /cygdrive/d/path/to/venv on Cygwin
43
+ VIRTUAL_ENV=$(cygpath "/Users/n/mergekit/merged/meow")
44
+ export VIRTUAL_ENV
45
+ ;;
46
+ *)
47
+ # use the path as-is
48
+ export VIRTUAL_ENV="/Users/n/mergekit/merged/meow"
49
+ ;;
50
+ esac
51
+
52
+ _OLD_VIRTUAL_PATH="$PATH"
53
+ PATH="$VIRTUAL_ENV/bin:$PATH"
54
+ export PATH
55
+
56
+ VIRTUAL_ENV_PROMPT="meow"
57
+ export VIRTUAL_ENV_PROMPT
58
+
59
+ # unset PYTHONHOME if set
60
+ # this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
61
+ # could use `if (set -u; : $PYTHONHOME) ;` in bash
62
+ if [ -n "${PYTHONHOME:-}" ] ; then
63
+ _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
64
+ unset PYTHONHOME
65
+ fi
66
+
67
+ if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
68
+ _OLD_VIRTUAL_PS1="${PS1:-}"
69
+ PS1="(meow) ${PS1:-}"
70
+ export PS1
71
+ fi
72
+
73
+ # Call hash to forget past commands. Without forgetting
74
+ # past commands the $PATH changes we made may not be respected
75
+ hash -r 2> /dev/null
meow/bin/activate.csh ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file must be used with "source bin/activate.csh" *from csh*.
2
+ # You cannot run it directly.
3
+
4
+ # Created by Davide Di Blasi <[email protected]>.
5
+ # Ported to Python 3.3 venv by Andrew Svetlov <[email protected]>
6
+
7
+ alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
8
+
9
+ # Unset irrelevant variables.
10
+ deactivate nondestructive
11
+
12
+ setenv VIRTUAL_ENV "/Users/n/mergekit/merged/meow"
13
+
14
+ set _OLD_VIRTUAL_PATH="$PATH"
15
+ setenv PATH "$VIRTUAL_ENV/bin:$PATH"
16
+ setenv VIRTUAL_ENV_PROMPT "meow"
17
+
18
+
19
+ set _OLD_VIRTUAL_PROMPT="$prompt"
20
+
21
+ if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
22
+ set prompt = "(meow) $prompt"
23
+ endif
24
+
25
+ alias pydoc python -m pydoc
26
+
27
+ rehash
meow/bin/activate.fish ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file must be used with "source <venv>/bin/activate.fish" *from fish*
2
+ # (https://fishshell.com/). You cannot run it directly.
3
+
4
+ function deactivate -d "Exit virtual environment and return to normal shell environment"
5
+ # reset old environment variables
6
+ if test -n "$_OLD_VIRTUAL_PATH"
7
+ set -gx PATH $_OLD_VIRTUAL_PATH
8
+ set -e _OLD_VIRTUAL_PATH
9
+ end
10
+ if test -n "$_OLD_VIRTUAL_PYTHONHOME"
11
+ set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
12
+ set -e _OLD_VIRTUAL_PYTHONHOME
13
+ end
14
+
15
+ if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
16
+ set -e _OLD_FISH_PROMPT_OVERRIDE
17
+ # prevents error when using nested fish instances (Issue #93858)
18
+ if functions -q _old_fish_prompt
19
+ functions -e fish_prompt
20
+ functions -c _old_fish_prompt fish_prompt
21
+ functions -e _old_fish_prompt
22
+ end
23
+ end
24
+
25
+ set -e VIRTUAL_ENV
26
+ set -e VIRTUAL_ENV_PROMPT
27
+ if test "$argv[1]" != "nondestructive"
28
+ # Self-destruct!
29
+ functions -e deactivate
30
+ end
31
+ end
32
+
33
+ # Unset irrelevant variables.
34
+ deactivate nondestructive
35
+
36
+ set -gx VIRTUAL_ENV "/Users/n/mergekit/merged/meow"
37
+
38
+ set -gx _OLD_VIRTUAL_PATH $PATH
39
+ set -gx PATH "$VIRTUAL_ENV/bin" $PATH
40
+ set -gx VIRTUAL_ENV_PROMPT "meow"
41
+
42
+ # Unset PYTHONHOME if set.
43
+ if set -q PYTHONHOME
44
+ set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
45
+ set -e PYTHONHOME
46
+ end
47
+
48
+ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
49
+ # fish uses a function instead of an env var to generate the prompt.
50
+
51
+ # Save the current fish_prompt function as the function _old_fish_prompt.
52
+ functions -c fish_prompt _old_fish_prompt
53
+
54
+ # With the original prompt function renamed, we can override with our own.
55
+ function fish_prompt
56
+ # Save the return status of the last command.
57
+ set -l old_status $status
58
+
59
+ # Output the venv prompt; color taken from the blue of the Python logo.
60
+ printf "%s(%s)%s " (set_color 4B8BBE) "meow" (set_color normal)
61
+
62
+ # Restore the return status of the previous command.
63
+ echo "exit $old_status" | .
64
+ # Output the original/"old" prompt.
65
+ _old_fish_prompt
66
+ end
67
+
68
+ set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
69
+ end
meow/bin/huggingface-cli ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/Users/n/mergekit/merged/meow/bin/python3.13
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from huggingface_hub.commands.huggingface_cli import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
meow/bin/normalizer ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/Users/n/mergekit/merged/meow/bin/python3.13
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from charset_normalizer import cli
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(cli.cli_detect())
meow/bin/pip ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/Users/n/mergekit/merged/meow/bin/python3.13
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from pip._internal.cli.main import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
meow/bin/pip3 ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/Users/n/mergekit/merged/meow/bin/python3.13
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from pip._internal.cli.main import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
meow/bin/pip3.13 ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/Users/n/mergekit/merged/meow/bin/python3.13
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from pip._internal.cli.main import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
meow/bin/python ADDED
Binary file (52.6 kB). View file
 
meow/bin/python3 ADDED
Binary file (52.6 kB). View file
 
meow/bin/python3.13 ADDED
Binary file (52.6 kB). View file
 
meow/bin/tqdm ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/Users/n/mergekit/merged/meow/bin/python3.13
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from tqdm.cli import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/METADATA ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.3
2
+ Name: filelock
3
+ Version: 3.16.1
4
+ Summary: A platform independent file lock.
5
+ Project-URL: Documentation, https://py-filelock.readthedocs.io
6
+ Project-URL: Homepage, https://github.com/tox-dev/py-filelock
7
+ Project-URL: Source, https://github.com/tox-dev/py-filelock
8
+ Project-URL: Tracker, https://github.com/tox-dev/py-filelock/issues
9
+ Maintainer-email: Bernát Gábor <[email protected]>
10
+ License-Expression: Unlicense
11
+ License-File: LICENSE
12
+ Keywords: application,cache,directory,log,user
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: The Unlicense (Unlicense)
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python
18
+ Classifier: Programming Language :: Python :: 3 :: Only
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Topic :: Internet
26
+ Classifier: Topic :: Software Development :: Libraries
27
+ Classifier: Topic :: System
28
+ Requires-Python: >=3.8
29
+ Provides-Extra: docs
30
+ Requires-Dist: furo>=2024.8.6; extra == 'docs'
31
+ Requires-Dist: sphinx-autodoc-typehints>=2.4.1; extra == 'docs'
32
+ Requires-Dist: sphinx>=8.0.2; extra == 'docs'
33
+ Provides-Extra: testing
34
+ Requires-Dist: covdefaults>=2.3; extra == 'testing'
35
+ Requires-Dist: coverage>=7.6.1; extra == 'testing'
36
+ Requires-Dist: diff-cover>=9.2; extra == 'testing'
37
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'testing'
38
+ Requires-Dist: pytest-cov>=5; extra == 'testing'
39
+ Requires-Dist: pytest-mock>=3.14; extra == 'testing'
40
+ Requires-Dist: pytest-timeout>=2.3.1; extra == 'testing'
41
+ Requires-Dist: pytest>=8.3.3; extra == 'testing'
42
+ Requires-Dist: virtualenv>=20.26.4; extra == 'testing'
43
+ Provides-Extra: typing
44
+ Requires-Dist: typing-extensions>=4.12.2; (python_version < '3.11') and extra == 'typing'
45
+ Description-Content-Type: text/markdown
46
+
47
+ # filelock
48
+
49
+ [![PyPI](https://img.shields.io/pypi/v/filelock)](https://pypi.org/project/filelock/)
50
+ [![Supported Python
51
+ versions](https://img.shields.io/pypi/pyversions/filelock.svg)](https://pypi.org/project/filelock/)
52
+ [![Documentation
53
+ status](https://readthedocs.org/projects/py-filelock/badge/?version=latest)](https://py-filelock.readthedocs.io/en/latest/?badge=latest)
54
+ [![Code style:
55
+ black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
56
+ [![Downloads](https://static.pepy.tech/badge/filelock/month)](https://pepy.tech/project/filelock)
57
+ [![check](https://github.com/tox-dev/py-filelock/actions/workflows/check.yml/badge.svg)](https://github.com/tox-dev/py-filelock/actions/workflows/check.yml)
58
+
59
+ For more information checkout the [official documentation](https://py-filelock.readthedocs.io/en/latest/index.html).
meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/RECORD ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ filelock-3.16.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ filelock-3.16.1.dist-info/METADATA,sha256=LXL5-XQe_eTKkdNs76A6jSicQ1DBSTXqkDcjsprWvIM,2944
3
+ filelock-3.16.1.dist-info/RECORD,,
4
+ filelock-3.16.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
5
+ filelock-3.16.1.dist-info/licenses/LICENSE,sha256=iNm062BXnBkew5HKBMFhMFctfu3EqG2qWL8oxuFMm80,1210
6
+ filelock/__init__.py,sha256=_t_-OAGXo_qyPa9lNQ1YnzVYEvSW3I0onPqzpomsVVg,1769
7
+ filelock/__pycache__/__init__.cpython-313.pyc,,
8
+ filelock/__pycache__/_api.cpython-313.pyc,,
9
+ filelock/__pycache__/_error.cpython-313.pyc,,
10
+ filelock/__pycache__/_soft.cpython-313.pyc,,
11
+ filelock/__pycache__/_unix.cpython-313.pyc,,
12
+ filelock/__pycache__/_util.cpython-313.pyc,,
13
+ filelock/__pycache__/_windows.cpython-313.pyc,,
14
+ filelock/__pycache__/asyncio.cpython-313.pyc,,
15
+ filelock/__pycache__/version.cpython-313.pyc,,
16
+ filelock/_api.py,sha256=GVeBEGjpDD8S1bYqG6_u0MZfbYHS6XrHs_n3PVKq-h0,14541
17
+ filelock/_error.py,sha256=-5jMcjTu60YAvAO1UbqDD1GIEjVkwr8xCFwDBtMeYDg,787
18
+ filelock/_soft.py,sha256=haqtc_TB_KJbYv2a8iuEAclKuM4fMG1vTcp28sK919c,1711
19
+ filelock/_unix.py,sha256=-FXP0tjInBHUYygOlMpp4taUmD87QOkrD_4ybg_iT7Q,2259
20
+ filelock/_util.py,sha256=QHBoNFIYfbAThhotH3Q8E2acFc84wpG49-T-uu017ZE,1715
21
+ filelock/_windows.py,sha256=eMKL8dZKrgekf5VYVGR14an29JGEInRtUO8ui9ABywg,2177
22
+ filelock/asyncio.py,sha256=3D4JP4Ms5IXTGib5eOekyr6uH6rZlieV_moVGY36juA,12463
23
+ filelock/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ filelock/version.py,sha256=KSOBzuLwiqiVWDPGfMj1ntr25YrY6JBDr8RvinQX_FM,413
meow/lib/python3.13/site-packages/filelock-3.16.1.dist-info/WHEEL ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.25.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
meow/lib/python3.13/site-packages/huggingface_hub/_commit_api.py ADDED
@@ -0,0 +1,729 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Type definitions and utilities for the `create_commit` API
3
+ """
4
+
5
+ import base64
6
+ import io
7
+ import os
8
+ import warnings
9
+ from collections import defaultdict
10
+ from contextlib import contextmanager
11
+ from dataclasses import dataclass, field
12
+ from itertools import groupby
13
+ from pathlib import Path, PurePosixPath
14
+ from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union
15
+
16
+ from tqdm.contrib.concurrent import thread_map
17
+
18
+ from . import constants
19
+ from .errors import EntryNotFoundError
20
+ from .file_download import hf_hub_url
21
+ from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
22
+ from .utils import (
23
+ FORBIDDEN_FOLDERS,
24
+ chunk_iterable,
25
+ get_session,
26
+ hf_raise_for_status,
27
+ logging,
28
+ sha,
29
+ tqdm_stream_file,
30
+ validate_hf_hub_args,
31
+ )
32
+ from .utils import tqdm as hf_tqdm
33
+
34
+
35
+ if TYPE_CHECKING:
36
+ from .hf_api import RepoFile
37
+
38
+
39
+ logger = logging.get_logger(__name__)
40
+
41
+
42
+ UploadMode = Literal["lfs", "regular"]
43
+
44
+ # Max is 1,000 per request on the Hub for HfApi.get_paths_info
45
+ # Otherwise we get:
46
+ # HfHubHTTPError: 413 Client Error: Payload Too Large for url: https://huggingface.co/api/datasets/xxx (Request ID: xxx)\n\ntoo many parameters
47
+ # See https://github.com/huggingface/huggingface_hub/issues/1503
48
+ FETCH_LFS_BATCH_SIZE = 500
49
+
50
+
51
+ @dataclass
52
+ class CommitOperationDelete:
53
+ """
54
+ Data structure holding necessary info to delete a file or a folder from a repository
55
+ on the Hub.
56
+
57
+ Args:
58
+ path_in_repo (`str`):
59
+ Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
60
+ for a file or `"checkpoints/1fec34a/"` for a folder.
61
+ is_folder (`bool` or `Literal["auto"]`, *optional*)
62
+ Whether the Delete Operation applies to a folder or not. If "auto", the path
63
+ type (file or folder) is guessed automatically by looking if path ends with
64
+ a "/" (folder) or not (file). To explicitly set the path type, you can set
65
+ `is_folder=True` or `is_folder=False`.
66
+ """
67
+
68
+ path_in_repo: str
69
+ is_folder: Union[bool, Literal["auto"]] = "auto"
70
+
71
+ def __post_init__(self):
72
+ self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
73
+
74
+ if self.is_folder == "auto":
75
+ self.is_folder = self.path_in_repo.endswith("/")
76
+ if not isinstance(self.is_folder, bool):
77
+ raise ValueError(
78
+ f"Wrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got '{self.is_folder}'."
79
+ )
80
+
81
+
82
+ @dataclass
83
+ class CommitOperationCopy:
84
+ """
85
+ Data structure holding necessary info to copy a file in a repository on the Hub.
86
+
87
+ Limitations:
88
+ - Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it
89
+ - Cross-repository copies are not supported.
90
+
91
+ Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub.
92
+
93
+ Args:
94
+ src_path_in_repo (`str`):
95
+ Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`.
96
+ path_in_repo (`str`):
97
+ Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`.
98
+ src_revision (`str`, *optional*):
99
+ The git revision of the file to be copied. Can be any valid git revision.
100
+ Default to the target commit revision.
101
+ """
102
+
103
+ src_path_in_repo: str
104
+ path_in_repo: str
105
+ src_revision: Optional[str] = None
106
+
107
+ def __post_init__(self):
108
+ self.src_path_in_repo = _validate_path_in_repo(self.src_path_in_repo)
109
+ self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
110
+
111
+
112
+ @dataclass
113
+ class CommitOperationAdd:
114
+ """
115
+ Data structure holding necessary info to upload a file to a repository on the Hub.
116
+
117
+ Args:
118
+ path_in_repo (`str`):
119
+ Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
120
+ path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`):
121
+ Either:
122
+ - a path to a local file (as `str` or `pathlib.Path`) to upload
123
+ - a buffer of bytes (`bytes`) holding the content of the file to upload
124
+ - a "file object" (subclass of `io.BufferedIOBase`), typically obtained
125
+ with `open(path, "rb")`. It must support `seek()` and `tell()` methods.
126
+
127
+ Raises:
128
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
129
+ If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`.
130
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
131
+ If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file.
132
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
133
+ If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both
134
+ `seek()` and `tell()`.
135
+ """
136
+
137
+ path_in_repo: str
138
+ path_or_fileobj: Union[str, Path, bytes, BinaryIO]
139
+ upload_info: UploadInfo = field(init=False, repr=False)
140
+
141
+ # Internal attributes
142
+
143
+ # set to "lfs" or "regular" once known
144
+ _upload_mode: Optional[UploadMode] = field(init=False, repr=False, default=None)
145
+
146
+ # set to True if .gitignore rules prevent the file from being uploaded as LFS
147
+ # (server-side check)
148
+ _should_ignore: Optional[bool] = field(init=False, repr=False, default=None)
149
+
150
+ # set to the remote OID of the file if it has already been uploaded
151
+ # useful to determine if a commit will be empty or not
152
+ _remote_oid: Optional[str] = field(init=False, repr=False, default=None)
153
+
154
+ # set to True once the file has been uploaded as LFS
155
+ _is_uploaded: bool = field(init=False, repr=False, default=False)
156
+
157
+ # set to True once the file has been committed
158
+ _is_committed: bool = field(init=False, repr=False, default=False)
159
+
160
+ def __post_init__(self) -> None:
161
+ """Validates `path_or_fileobj` and compute `upload_info`."""
162
+ self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
163
+
164
+ # Validate `path_or_fileobj` value
165
+ if isinstance(self.path_or_fileobj, Path):
166
+ self.path_or_fileobj = str(self.path_or_fileobj)
167
+ if isinstance(self.path_or_fileobj, str):
168
+ path_or_fileobj = os.path.normpath(os.path.expanduser(self.path_or_fileobj))
169
+ if not os.path.isfile(path_or_fileobj):
170
+ raise ValueError(f"Provided path: '{path_or_fileobj}' is not a file on the local file system")
171
+ elif not isinstance(self.path_or_fileobj, (io.BufferedIOBase, bytes)):
172
+ # ^^ Inspired from: https://stackoverflow.com/questions/44584829/how-to-determine-if-file-is-opened-in-binary-or-text-mode
173
+ raise ValueError(
174
+ "path_or_fileobj must be either an instance of str, bytes or"
175
+ " io.BufferedIOBase. If you passed a file-like object, make sure it is"
176
+ " in binary mode."
177
+ )
178
+ if isinstance(self.path_or_fileobj, io.BufferedIOBase):
179
+ try:
180
+ self.path_or_fileobj.tell()
181
+ self.path_or_fileobj.seek(0, os.SEEK_CUR)
182
+ except (OSError, AttributeError) as exc:
183
+ raise ValueError(
184
+ "path_or_fileobj is a file-like object but does not implement seek() and tell()"
185
+ ) from exc
186
+
187
+ # Compute "upload_info" attribute
188
+ if isinstance(self.path_or_fileobj, str):
189
+ self.upload_info = UploadInfo.from_path(self.path_or_fileobj)
190
+ elif isinstance(self.path_or_fileobj, bytes):
191
+ self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj)
192
+ else:
193
+ self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj)
194
+
195
+ @contextmanager
196
+ def as_file(self, with_tqdm: bool = False) -> Iterator[BinaryIO]:
197
+ """
198
+ A context manager that yields a file-like object allowing to read the underlying
199
+ data behind `path_or_fileobj`.
200
+
201
+ Args:
202
+ with_tqdm (`bool`, *optional*, defaults to `False`):
203
+ If True, iterating over the file object will display a progress bar. Only
204
+ works if the file-like object is a path to a file. Pure bytes and buffers
205
+ are not supported.
206
+
207
+ Example:
208
+
209
+ ```python
210
+ >>> operation = CommitOperationAdd(
211
+ ... path_in_repo="remote/dir/weights.h5",
212
+ ... path_or_fileobj="./local/weights.h5",
213
+ ... )
214
+ CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5')
215
+
216
+ >>> with operation.as_file() as file:
217
+ ... content = file.read()
218
+
219
+ >>> with operation.as_file(with_tqdm=True) as file:
220
+ ... while True:
221
+ ... data = file.read(1024)
222
+ ... if not data:
223
+ ... break
224
+ config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
225
+
226
+ >>> with operation.as_file(with_tqdm=True) as file:
227
+ ... requests.put(..., data=file)
228
+ config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
229
+ ```
230
+ """
231
+ if isinstance(self.path_or_fileobj, str) or isinstance(self.path_or_fileobj, Path):
232
+ if with_tqdm:
233
+ with tqdm_stream_file(self.path_or_fileobj) as file:
234
+ yield file
235
+ else:
236
+ with open(self.path_or_fileobj, "rb") as file:
237
+ yield file
238
+ elif isinstance(self.path_or_fileobj, bytes):
239
+ yield io.BytesIO(self.path_or_fileobj)
240
+ elif isinstance(self.path_or_fileobj, io.BufferedIOBase):
241
+ prev_pos = self.path_or_fileobj.tell()
242
+ yield self.path_or_fileobj
243
+ self.path_or_fileobj.seek(prev_pos, io.SEEK_SET)
244
+
245
+ def b64content(self) -> bytes:
246
+ """
247
+ The base64-encoded content of `path_or_fileobj`
248
+
249
+ Returns: `bytes`
250
+ """
251
+ with self.as_file() as file:
252
+ return base64.b64encode(file.read())
253
+
254
+ @property
255
+ def _local_oid(self) -> Optional[str]:
256
+ """Return the OID of the local file.
257
+
258
+ This OID is then compared to `self._remote_oid` to check if the file has changed compared to the remote one.
259
+ If the file did not change, we won't upload it again to prevent empty commits.
260
+
261
+ For LFS files, the OID corresponds to the SHA256 of the file content (used a LFS ref).
262
+ For regular files, the OID corresponds to the SHA1 of the file content.
263
+ Note: this is slightly different to git OID computation since the oid of an LFS file is usually the git-SHA1 of the
264
+ pointer file content (not the actual file content). However, using the SHA256 is enough to detect changes
265
+ and more convenient client-side.
266
+ """
267
+ if self._upload_mode is None:
268
+ return None
269
+ elif self._upload_mode == "lfs":
270
+ return self.upload_info.sha256.hex()
271
+ else:
272
+ # Regular file => compute sha1
273
+ # => no need to read by chunk since the file is guaranteed to be <=5MB.
274
+ with self.as_file() as file:
275
+ return sha.git_hash(file.read())
276
+
277
+
278
+ def _validate_path_in_repo(path_in_repo: str) -> str:
279
+ # Validate `path_in_repo` value to prevent a server-side issue
280
+ if path_in_repo.startswith("/"):
281
+ path_in_repo = path_in_repo[1:]
282
+ if path_in_repo == "." or path_in_repo == ".." or path_in_repo.startswith("../"):
283
+ raise ValueError(f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'")
284
+ if path_in_repo.startswith("./"):
285
+ path_in_repo = path_in_repo[2:]
286
+ for forbidden in FORBIDDEN_FOLDERS:
287
+ if any(part == forbidden for part in path_in_repo.split("/")):
288
+ raise ValueError(
289
+ f"Invalid `path_in_repo` in CommitOperation: cannot update files under a '{forbidden}/' folder (path:"
290
+ f" '{path_in_repo}')."
291
+ )
292
+ return path_in_repo
293
+
294
+
295
+ CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete]
296
+
297
+
298
+ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
299
+ """
300
+ Warn user when a list of operations is expected to overwrite itself in a single
301
+ commit.
302
+
303
+ Rules:
304
+ - If a filepath is updated by multiple `CommitOperationAdd` operations, a warning
305
+ message is triggered.
306
+ - If a filepath is updated at least once by a `CommitOperationAdd` and then deleted
307
+ by a `CommitOperationDelete`, a warning is triggered.
308
+ - If a `CommitOperationDelete` deletes a filepath that is then updated by a
309
+ `CommitOperationAdd`, no warning is triggered. This is usually useless (no need to
310
+ delete before upload) but can happen if a user deletes an entire folder and then
311
+ add new files to it.
312
+ """
313
+ nb_additions_per_path: Dict[str, int] = defaultdict(int)
314
+ for operation in operations:
315
+ path_in_repo = operation.path_in_repo
316
+ if isinstance(operation, CommitOperationAdd):
317
+ if nb_additions_per_path[path_in_repo] > 0:
318
+ warnings.warn(
319
+ "About to update multiple times the same file in the same commit:"
320
+ f" '{path_in_repo}'. This can cause undesired inconsistencies in"
321
+ " your repo."
322
+ )
323
+ nb_additions_per_path[path_in_repo] += 1
324
+ for parent in PurePosixPath(path_in_repo).parents:
325
+ # Also keep track of number of updated files per folder
326
+ # => warns if deleting a folder overwrite some contained files
327
+ nb_additions_per_path[str(parent)] += 1
328
+ if isinstance(operation, CommitOperationDelete):
329
+ if nb_additions_per_path[str(PurePosixPath(path_in_repo))] > 0:
330
+ if operation.is_folder:
331
+ warnings.warn(
332
+ "About to delete a folder containing files that have just been"
333
+ f" updated within the same commit: '{path_in_repo}'. This can"
334
+ " cause undesired inconsistencies in your repo."
335
+ )
336
+ else:
337
+ warnings.warn(
338
+ "About to delete a file that have just been updated within the"
339
+ f" same commit: '{path_in_repo}'. This can cause undesired"
340
+ " inconsistencies in your repo."
341
+ )
342
+
343
+
344
+ @validate_hf_hub_args
345
+ def _upload_lfs_files(
346
+ *,
347
+ additions: List[CommitOperationAdd],
348
+ repo_type: str,
349
+ repo_id: str,
350
+ headers: Dict[str, str],
351
+ endpoint: Optional[str] = None,
352
+ num_threads: int = 5,
353
+ revision: Optional[str] = None,
354
+ ):
355
+ """
356
+ Uploads the content of `additions` to the Hub using the large file storage protocol.
357
+
358
+ Relevant external documentation:
359
+ - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md
360
+
361
+ Args:
362
+ additions (`List` of `CommitOperationAdd`):
363
+ The files to be uploaded
364
+ repo_type (`str`):
365
+ Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
366
+ repo_id (`str`):
367
+ A namespace (user or an organization) and a repo name separated
368
+ by a `/`.
369
+ headers (`Dict[str, str]`):
370
+ Headers to use for the request, including authorization headers and user agent.
371
+ num_threads (`int`, *optional*):
372
+ The number of concurrent threads to use when uploading. Defaults to 5.
373
+ revision (`str`, *optional*):
374
+ The git revision to upload to.
375
+
376
+ Raises:
377
+ [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
378
+ If an upload failed for any reason
379
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
380
+ If the server returns malformed responses
381
+ [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
382
+ If the LFS batch endpoint returned an HTTP error.
383
+ """
384
+ # Step 1: retrieve upload instructions from the LFS batch endpoint.
385
+ # Upload instructions are retrieved by chunk of 256 files to avoid reaching
386
+ # the payload limit.
387
+ batch_actions: List[Dict] = []
388
+ for chunk in chunk_iterable(additions, chunk_size=256):
389
+ batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
390
+ upload_infos=[op.upload_info for op in chunk],
391
+ repo_id=repo_id,
392
+ repo_type=repo_type,
393
+ revision=revision,
394
+ endpoint=endpoint,
395
+ headers=headers,
396
+ token=None, # already passed in 'headers'
397
+ )
398
+
399
+ # If at least 1 error, we do not retrieve information for other chunks
400
+ if batch_errors_chunk:
401
+ message = "\n".join(
402
+ [
403
+ f'Encountered error for file with OID {err.get("oid")}: `{err.get("error", {}).get("message")}'
404
+ for err in batch_errors_chunk
405
+ ]
406
+ )
407
+ raise ValueError(f"LFS batch endpoint returned errors:\n{message}")
408
+
409
+ batch_actions += batch_actions_chunk
410
+ oid2addop = {add_op.upload_info.sha256.hex(): add_op for add_op in additions}
411
+
412
+ # Step 2: ignore files that have already been uploaded
413
+ filtered_actions = []
414
+ for action in batch_actions:
415
+ if action.get("actions") is None:
416
+ logger.debug(
417
+ f"Content of file {oid2addop[action['oid']].path_in_repo} is already"
418
+ " present upstream - skipping upload."
419
+ )
420
+ else:
421
+ filtered_actions.append(action)
422
+
423
+ if len(filtered_actions) == 0:
424
+ logger.debug("No LFS files to upload.")
425
+ return
426
+
427
+ # Step 3: upload files concurrently according to these instructions
428
+ def _wrapped_lfs_upload(batch_action) -> None:
429
+ try:
430
+ operation = oid2addop[batch_action["oid"]]
431
+ lfs_upload(operation=operation, lfs_batch_action=batch_action, headers=headers, endpoint=endpoint)
432
+ except Exception as exc:
433
+ raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc
434
+
435
+ if constants.HF_HUB_ENABLE_HF_TRANSFER:
436
+ logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.")
437
+ for action in hf_tqdm(filtered_actions, name="huggingface_hub.lfs_upload"):
438
+ _wrapped_lfs_upload(action)
439
+ elif len(filtered_actions) == 1:
440
+ logger.debug("Uploading 1 LFS file to the Hub")
441
+ _wrapped_lfs_upload(filtered_actions[0])
442
+ else:
443
+ logger.debug(
444
+ f"Uploading {len(filtered_actions)} LFS files to the Hub using up to {num_threads} threads concurrently"
445
+ )
446
+ thread_map(
447
+ _wrapped_lfs_upload,
448
+ filtered_actions,
449
+ desc=f"Upload {len(filtered_actions)} LFS files",
450
+ max_workers=num_threads,
451
+ tqdm_class=hf_tqdm,
452
+ )
453
+
454
+
455
+ def _validate_preupload_info(preupload_info: dict):
456
+ files = preupload_info.get("files")
457
+ if not isinstance(files, list):
458
+ raise ValueError("preupload_info is improperly formatted")
459
+ for file_info in files:
460
+ if not (
461
+ isinstance(file_info, dict)
462
+ and isinstance(file_info.get("path"), str)
463
+ and isinstance(file_info.get("uploadMode"), str)
464
+ and (file_info["uploadMode"] in ("lfs", "regular"))
465
+ ):
466
+ raise ValueError("preupload_info is improperly formatted:")
467
+ return preupload_info
468
+
469
+
470
+ @validate_hf_hub_args
471
+ def _fetch_upload_modes(
472
+ additions: Iterable[CommitOperationAdd],
473
+ repo_type: str,
474
+ repo_id: str,
475
+ headers: Dict[str, str],
476
+ revision: str,
477
+ endpoint: Optional[str] = None,
478
+ create_pr: bool = False,
479
+ gitignore_content: Optional[str] = None,
480
+ ) -> None:
481
+ """
482
+ Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob
483
+ or as git LFS blob. Input `additions` are mutated in-place with the upload mode.
484
+
485
+ Args:
486
+ additions (`Iterable` of :class:`CommitOperationAdd`):
487
+ Iterable of :class:`CommitOperationAdd` describing the files to
488
+ upload to the Hub.
489
+ repo_type (`str`):
490
+ Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
491
+ repo_id (`str`):
492
+ A namespace (user or an organization) and a repo name separated
493
+ by a `/`.
494
+ headers (`Dict[str, str]`):
495
+ Headers to use for the request, including authorization headers and user agent.
496
+ revision (`str`):
497
+ The git revision to upload the files to. Can be any valid git revision.
498
+ gitignore_content (`str`, *optional*):
499
+ The content of the `.gitignore` file to know which files should be ignored. The order of priority
500
+ is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present
501
+ in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub
502
+ (if any).
503
+ Raises:
504
+ [`~utils.HfHubHTTPError`]
505
+ If the Hub API returned an error.
506
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
507
+ If the Hub API response is improperly formatted.
508
+ """
509
+ endpoint = endpoint if endpoint is not None else constants.ENDPOINT
510
+
511
+ # Fetch upload mode (LFS or regular) chunk by chunk.
512
+ upload_modes: Dict[str, UploadMode] = {}
513
+ should_ignore_info: Dict[str, bool] = {}
514
+ oid_info: Dict[str, Optional[str]] = {}
515
+
516
+ for chunk in chunk_iterable(additions, 256):
517
+ payload: Dict = {
518
+ "files": [
519
+ {
520
+ "path": op.path_in_repo,
521
+ "sample": base64.b64encode(op.upload_info.sample).decode("ascii"),
522
+ "size": op.upload_info.size,
523
+ }
524
+ for op in chunk
525
+ ]
526
+ }
527
+ if gitignore_content is not None:
528
+ payload["gitIgnore"] = gitignore_content
529
+
530
+ resp = get_session().post(
531
+ f"{endpoint}/api/{repo_type}s/{repo_id}/preupload/{revision}",
532
+ json=payload,
533
+ headers=headers,
534
+ params={"create_pr": "1"} if create_pr else None,
535
+ )
536
+ hf_raise_for_status(resp)
537
+ preupload_info = _validate_preupload_info(resp.json())
538
+ upload_modes.update(**{file["path"]: file["uploadMode"] for file in preupload_info["files"]})
539
+ should_ignore_info.update(**{file["path"]: file["shouldIgnore"] for file in preupload_info["files"]})
540
+ oid_info.update(**{file["path"]: file.get("oid") for file in preupload_info["files"]})
541
+
542
+ # Set upload mode for each addition operation
543
+ for addition in additions:
544
+ addition._upload_mode = upload_modes[addition.path_in_repo]
545
+ addition._should_ignore = should_ignore_info[addition.path_in_repo]
546
+ addition._remote_oid = oid_info[addition.path_in_repo]
547
+
548
+ # Empty files cannot be uploaded as LFS (S3 would fail with a 501 Not Implemented)
549
+ # => empty files are uploaded as "regular" to still allow users to commit them.
550
+ for addition in additions:
551
+ if addition.upload_info.size == 0:
552
+ addition._upload_mode = "regular"
553
+
554
+
555
+ @validate_hf_hub_args
556
+ def _fetch_files_to_copy(
557
+ copies: Iterable[CommitOperationCopy],
558
+ repo_type: str,
559
+ repo_id: str,
560
+ headers: Dict[str, str],
561
+ revision: str,
562
+ endpoint: Optional[str] = None,
563
+ ) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]:
564
+ """
565
+ Fetch information about the files to copy.
566
+
567
+ For LFS files, we only need their metadata (file size and sha256) while for regular files
568
+ we need to download the raw content from the Hub.
569
+
570
+ Args:
571
+ copies (`Iterable` of :class:`CommitOperationCopy`):
572
+ Iterable of :class:`CommitOperationCopy` describing the files to
573
+ copy on the Hub.
574
+ repo_type (`str`):
575
+ Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
576
+ repo_id (`str`):
577
+ A namespace (user or an organization) and a repo name separated
578
+ by a `/`.
579
+ headers (`Dict[str, str]`):
580
+ Headers to use for the request, including authorization headers and user agent.
581
+ revision (`str`):
582
+ The git revision to upload the files to. Can be any valid git revision.
583
+
584
+ Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
585
+ Key is the file path and revision of the file to copy.
586
+ Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files).
587
+
588
+ Raises:
589
+ [`~utils.HfHubHTTPError`]
590
+ If the Hub API returned an error.
591
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
592
+ If the Hub API response is improperly formatted.
593
+ """
594
+ from .hf_api import HfApi, RepoFolder
595
+
596
+ hf_api = HfApi(endpoint=endpoint, headers=headers)
597
+ files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
598
+ for src_revision, operations in groupby(copies, key=lambda op: op.src_revision):
599
+ operations = list(operations) # type: ignore
600
+ paths = [op.src_path_in_repo for op in operations]
601
+ for offset in range(0, len(paths), FETCH_LFS_BATCH_SIZE):
602
+ src_repo_files = hf_api.get_paths_info(
603
+ repo_id=repo_id,
604
+ paths=paths[offset : offset + FETCH_LFS_BATCH_SIZE],
605
+ revision=src_revision or revision,
606
+ repo_type=repo_type,
607
+ )
608
+ for src_repo_file in src_repo_files:
609
+ if isinstance(src_repo_file, RepoFolder):
610
+ raise NotImplementedError("Copying a folder is not implemented.")
611
+ if src_repo_file.lfs:
612
+ files_to_copy[(src_repo_file.path, src_revision)] = src_repo_file
613
+ else:
614
+ # TODO: (optimization) download regular files to copy concurrently
615
+ url = hf_hub_url(
616
+ endpoint=endpoint,
617
+ repo_type=repo_type,
618
+ repo_id=repo_id,
619
+ revision=src_revision or revision,
620
+ filename=src_repo_file.path,
621
+ )
622
+ response = get_session().get(url, headers=headers)
623
+ hf_raise_for_status(response)
624
+ files_to_copy[(src_repo_file.path, src_revision)] = response.content
625
+ for operation in operations:
626
+ if (operation.src_path_in_repo, src_revision) not in files_to_copy:
627
+ raise EntryNotFoundError(
628
+ f"Cannot copy {operation.src_path_in_repo} at revision "
629
+ f"{src_revision or revision}: file is missing on repo."
630
+ )
631
+ return files_to_copy
632
+
633
+
634
+ def _prepare_commit_payload(
635
+ operations: Iterable[CommitOperation],
636
+ files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]],
637
+ commit_message: str,
638
+ commit_description: Optional[str] = None,
639
+ parent_commit: Optional[str] = None,
640
+ ) -> Iterable[Dict[str, Any]]:
641
+ """
642
+ Builds the payload to POST to the `/commit` API of the Hub.
643
+
644
+ Payload is returned as an iterator so that it can be streamed as a ndjson in the
645
+ POST request.
646
+
647
+ For more information, see:
648
+ - https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
649
+ - http://ndjson.org/
650
+ """
651
+ commit_description = commit_description if commit_description is not None else ""
652
+
653
+ # 1. Send a header item with the commit metadata
654
+ header_value = {"summary": commit_message, "description": commit_description}
655
+ if parent_commit is not None:
656
+ header_value["parentCommit"] = parent_commit
657
+ yield {"key": "header", "value": header_value}
658
+
659
+ nb_ignored_files = 0
660
+
661
+ # 2. Send operations, one per line
662
+ for operation in operations:
663
+ # Skip ignored files
664
+ if isinstance(operation, CommitOperationAdd) and operation._should_ignore:
665
+ logger.debug(f"Skipping file '{operation.path_in_repo}' in commit (ignored by gitignore file).")
666
+ nb_ignored_files += 1
667
+ continue
668
+
669
+ # 2.a. Case adding a regular file
670
+ if isinstance(operation, CommitOperationAdd) and operation._upload_mode == "regular":
671
+ yield {
672
+ "key": "file",
673
+ "value": {
674
+ "content": operation.b64content().decode(),
675
+ "path": operation.path_in_repo,
676
+ "encoding": "base64",
677
+ },
678
+ }
679
+ # 2.b. Case adding an LFS file
680
+ elif isinstance(operation, CommitOperationAdd) and operation._upload_mode == "lfs":
681
+ yield {
682
+ "key": "lfsFile",
683
+ "value": {
684
+ "path": operation.path_in_repo,
685
+ "algo": "sha256",
686
+ "oid": operation.upload_info.sha256.hex(),
687
+ "size": operation.upload_info.size,
688
+ },
689
+ }
690
+ # 2.c. Case deleting a file or folder
691
+ elif isinstance(operation, CommitOperationDelete):
692
+ yield {
693
+ "key": "deletedFolder" if operation.is_folder else "deletedFile",
694
+ "value": {"path": operation.path_in_repo},
695
+ }
696
+ # 2.d. Case copying a file or folder
697
+ elif isinstance(operation, CommitOperationCopy):
698
+ file_to_copy = files_to_copy[(operation.src_path_in_repo, operation.src_revision)]
699
+ if isinstance(file_to_copy, bytes):
700
+ yield {
701
+ "key": "file",
702
+ "value": {
703
+ "content": base64.b64encode(file_to_copy).decode(),
704
+ "path": operation.path_in_repo,
705
+ "encoding": "base64",
706
+ },
707
+ }
708
+ elif file_to_copy.lfs:
709
+ yield {
710
+ "key": "lfsFile",
711
+ "value": {
712
+ "path": operation.path_in_repo,
713
+ "algo": "sha256",
714
+ "oid": file_to_copy.lfs.sha256,
715
+ },
716
+ }
717
+ else:
718
+ raise ValueError(
719
+ "Malformed files_to_copy (should be raw file content as bytes or RepoFile objects with LFS info."
720
+ )
721
+ # 2.e. Never expected to happen
722
+ else:
723
+ raise ValueError(
724
+ f"Unknown operation to commit. Operation: {operation}. Upload mode:"
725
+ f" {getattr(operation, '_upload_mode', None)}"
726
+ )
727
+
728
+ if nb_ignored_files > 0:
729
+ logger.info(f"Skipped {nb_ignored_files} file(s) in commit (ignored by gitignore file).")
meow/lib/python3.13/site-packages/huggingface_hub/_space_api.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2019-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ from dataclasses import dataclass
16
+ from datetime import datetime
17
+ from enum import Enum
18
+ from typing import Dict, Optional
19
+
20
+ from huggingface_hub.utils import parse_datetime
21
+
22
+
23
+ class SpaceStage(str, Enum):
24
+ """
25
+ Enumeration of possible stage of a Space on the Hub.
26
+
27
+ Value can be compared to a string:
28
+ ```py
29
+ assert SpaceStage.BUILDING == "BUILDING"
30
+ ```
31
+
32
+ Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L61 (private url).
33
+ """
34
+
35
+ # Copied from moon-landing > server > repo_types > SpaceInfo.ts (private repo)
36
+ NO_APP_FILE = "NO_APP_FILE"
37
+ CONFIG_ERROR = "CONFIG_ERROR"
38
+ BUILDING = "BUILDING"
39
+ BUILD_ERROR = "BUILD_ERROR"
40
+ RUNNING = "RUNNING"
41
+ RUNNING_BUILDING = "RUNNING_BUILDING"
42
+ RUNTIME_ERROR = "RUNTIME_ERROR"
43
+ DELETING = "DELETING"
44
+ STOPPED = "STOPPED"
45
+ PAUSED = "PAUSED"
46
+
47
+
48
+ class SpaceHardware(str, Enum):
49
+ """
50
+ Enumeration of hardwares available to run your Space on the Hub.
51
+
52
+ Value can be compared to a string:
53
+ ```py
54
+ assert SpaceHardware.CPU_BASIC == "cpu-basic"
55
+ ```
56
+
57
+ Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L73 (private url).
58
+ """
59
+
60
+ CPU_BASIC = "cpu-basic"
61
+ CPU_UPGRADE = "cpu-upgrade"
62
+ T4_SMALL = "t4-small"
63
+ T4_MEDIUM = "t4-medium"
64
+ L4X1 = "l4x1"
65
+ L4X4 = "l4x4"
66
+ ZERO_A10G = "zero-a10g"
67
+ A10G_SMALL = "a10g-small"
68
+ A10G_LARGE = "a10g-large"
69
+ A10G_LARGEX2 = "a10g-largex2"
70
+ A10G_LARGEX4 = "a10g-largex4"
71
+ A100_LARGE = "a100-large"
72
+ V5E_1X1 = "v5e-1x1"
73
+ V5E_2X2 = "v5e-2x2"
74
+ V5E_2X4 = "v5e-2x4"
75
+
76
+
77
+ class SpaceStorage(str, Enum):
78
+ """
79
+ Enumeration of persistent storage available for your Space on the Hub.
80
+
81
+ Value can be compared to a string:
82
+ ```py
83
+ assert SpaceStorage.SMALL == "small"
84
+ ```
85
+
86
+ Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts#L24 (private url).
87
+ """
88
+
89
+ SMALL = "small"
90
+ MEDIUM = "medium"
91
+ LARGE = "large"
92
+
93
+
94
+ @dataclass
95
+ class SpaceRuntime:
96
+ """
97
+ Contains information about the current runtime of a Space.
98
+
99
+ Args:
100
+ stage (`str`):
101
+ Current stage of the space. Example: RUNNING.
102
+ hardware (`str` or `None`):
103
+ Current hardware of the space. Example: "cpu-basic". Can be `None` if Space
104
+ is `BUILDING` for the first time.
105
+ requested_hardware (`str` or `None`):
106
+ Requested hardware. Can be different than `hardware` especially if the request
107
+ has just been made. Example: "t4-medium". Can be `None` if no hardware has
108
+ been requested yet.
109
+ sleep_time (`int` or `None`):
110
+ Number of seconds the Space will be kept alive after the last request. By default (if value is `None`), the
111
+ Space will never go to sleep if it's running on an upgraded hardware, while it will go to sleep after 48
112
+ hours on a free 'cpu-basic' hardware. For more details, see https://huggingface.co/docs/hub/spaces-gpus#sleep-time.
113
+ raw (`dict`):
114
+ Raw response from the server. Contains more information about the Space
115
+ runtime like number of replicas, number of cpu, memory size,...
116
+ """
117
+
118
+ stage: SpaceStage
119
+ hardware: Optional[SpaceHardware]
120
+ requested_hardware: Optional[SpaceHardware]
121
+ sleep_time: Optional[int]
122
+ storage: Optional[SpaceStorage]
123
+ raw: Dict
124
+
125
+ def __init__(self, data: Dict) -> None:
126
+ self.stage = data["stage"]
127
+ self.hardware = data.get("hardware", {}).get("current")
128
+ self.requested_hardware = data.get("hardware", {}).get("requested")
129
+ self.sleep_time = data.get("gcTimeout")
130
+ self.storage = data.get("storage")
131
+ self.raw = data
132
+
133
+
134
+ @dataclass
135
+ class SpaceVariable:
136
+ """
137
+ Contains information about the current variables of a Space.
138
+
139
+ Args:
140
+ key (`str`):
141
+ Variable key. Example: `"MODEL_REPO_ID"`
142
+ value (`str`):
143
+ Variable value. Example: `"the_model_repo_id"`.
144
+ description (`str` or None):
145
+ Description of the variable. Example: `"Model Repo ID of the implemented model"`.
146
+ updatedAt (`datetime` or None):
147
+ datetime of the last update of the variable (if the variable has been updated at least once).
148
+ """
149
+
150
+ key: str
151
+ value: str
152
+ description: Optional[str]
153
+ updated_at: Optional[datetime]
154
+
155
+ def __init__(self, key: str, values: Dict) -> None:
156
+ self.key = key
157
+ self.value = values["value"]
158
+ self.description = values.get("description")
159
+ updated_at = values.get("updatedAt")
160
+ self.updated_at = parse_datetime(updated_at) if updated_at is not None else None
meow/lib/python3.13/site-packages/huggingface_hub/_tensorboard_logger.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2023 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains a logger to push training logs to the Hub, using Tensorboard."""
15
+
16
+ from pathlib import Path
17
+ from typing import TYPE_CHECKING, List, Optional, Union
18
+
19
+ from ._commit_scheduler import CommitScheduler
20
+ from .errors import EntryNotFoundError
21
+ from .repocard import ModelCard
22
+ from .utils import experimental
23
+
24
+
25
+ # Depending on user's setup, SummaryWriter can come either from 'tensorboardX'
26
+ # or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
27
+ # from either of them.
28
+ try:
29
+ from tensorboardX import SummaryWriter
30
+
31
+ is_summary_writer_available = True
32
+
33
+ except ImportError:
34
+ try:
35
+ from torch.utils.tensorboard import SummaryWriter
36
+
37
+ is_summary_writer_available = False
38
+ except ImportError:
39
+ # Dummy class to avoid failing at import. Will raise on instance creation.
40
+ SummaryWriter = object
41
+ is_summary_writer_available = False
42
+
43
+ if TYPE_CHECKING:
44
+ from tensorboardX import SummaryWriter
45
+
46
+
47
+ class HFSummaryWriter(SummaryWriter):
48
+ """
49
+ Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.
50
+
51
+ Data is logged locally and then pushed to the Hub asynchronously. Pushing data to the Hub is done in a separate
52
+ thread to avoid blocking the training script. In particular, if the upload fails for any reason (e.g. a connection
53
+ issue), the main script will not be interrupted. Data is automatically pushed to the Hub every `commit_every`
54
+ minutes (default to every 5 minutes).
55
+
56
+ <Tip warning={true}>
57
+
58
+ `HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice.
59
+
60
+ </Tip>
61
+
62
+ Args:
63
+ repo_id (`str`):
64
+ The id of the repo to which the logs will be pushed.
65
+ logdir (`str`, *optional*):
66
+ The directory where the logs will be written. If not specified, a local directory will be created by the
67
+ underlying `SummaryWriter` object.
68
+ commit_every (`int` or `float`, *optional*):
69
+ The frequency (in minutes) at which the logs will be pushed to the Hub. Defaults to 5 minutes.
70
+ squash_history (`bool`, *optional*):
71
+ Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
72
+ useful to avoid degraded performances on the repo when it grows too large.
73
+ repo_type (`str`, *optional*):
74
+ The type of the repo to which the logs will be pushed. Defaults to "model".
75
+ repo_revision (`str`, *optional*):
76
+ The revision of the repo to which the logs will be pushed. Defaults to "main".
77
+ repo_private (`bool`, *optional*):
78
+ Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
79
+ path_in_repo (`str`, *optional*):
80
+ The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/".
81
+ repo_allow_patterns (`List[str]` or `str`, *optional*):
82
+ A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the
83
+ [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
84
+ repo_ignore_patterns (`List[str]` or `str`, *optional*):
85
+ A list of patterns to exclude in the upload. Check out the
86
+ [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
87
+ token (`str`, *optional*):
88
+ Authentication token. Will default to the stored token. See https://huggingface.co/settings/token for more
89
+ details
90
+ kwargs:
91
+ Additional keyword arguments passed to `SummaryWriter`.
92
+
93
+ Examples:
94
+ ```diff
95
+ # Taken from https://pytorch.org/docs/stable/tensorboard.html
96
+ - from torch.utils.tensorboard import SummaryWriter
97
+ + from huggingface_hub import HFSummaryWriter
98
+
99
+ import numpy as np
100
+
101
+ - writer = SummaryWriter()
102
+ + writer = HFSummaryWriter(repo_id="username/my-trained-model")
103
+
104
+ for n_iter in range(100):
105
+ writer.add_scalar('Loss/train', np.random.random(), n_iter)
106
+ writer.add_scalar('Loss/test', np.random.random(), n_iter)
107
+ writer.add_scalar('Accuracy/train', np.random.random(), n_iter)
108
+ writer.add_scalar('Accuracy/test', np.random.random(), n_iter)
109
+ ```
110
+
111
+ ```py
112
+ >>> from huggingface_hub import HFSummaryWriter
113
+
114
+ # Logs are automatically pushed every 15 minutes (5 by default) + when exiting the context manager
115
+ >>> with HFSummaryWriter(repo_id="test_hf_logger", commit_every=15) as logger:
116
+ ... logger.add_scalar("a", 1)
117
+ ... logger.add_scalar("b", 2)
118
+ ```
119
+ """
120
+
121
+ @experimental
122
+ def __new__(cls, *args, **kwargs) -> "HFSummaryWriter":
123
+ if not is_summary_writer_available:
124
+ raise ImportError(
125
+ "You must have `tensorboard` installed to use `HFSummaryWriter`. Please run `pip install --upgrade"
126
+ " tensorboardX` first."
127
+ )
128
+ return super().__new__(cls)
129
+
130
+ def __init__(
131
+ self,
132
+ repo_id: str,
133
+ *,
134
+ logdir: Optional[str] = None,
135
+ commit_every: Union[int, float] = 5,
136
+ squash_history: bool = False,
137
+ repo_type: Optional[str] = None,
138
+ repo_revision: Optional[str] = None,
139
+ repo_private: Optional[bool] = None,
140
+ path_in_repo: Optional[str] = "tensorboard",
141
+ repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*",
142
+ repo_ignore_patterns: Optional[Union[List[str], str]] = None,
143
+ token: Optional[str] = None,
144
+ **kwargs,
145
+ ):
146
+ # Initialize SummaryWriter
147
+ super().__init__(logdir=logdir, **kwargs)
148
+
149
+ # Check logdir has been correctly initialized and fail early otherwise. In practice, SummaryWriter takes care of it.
150
+ if not isinstance(self.logdir, str):
151
+ raise ValueError(f"`self.logdir` must be a string. Got '{self.logdir}' of type {type(self.logdir)}.")
152
+
153
+ # Append logdir name to `path_in_repo`
154
+ if path_in_repo is None or path_in_repo == "":
155
+ path_in_repo = Path(self.logdir).name
156
+ else:
157
+ path_in_repo = path_in_repo.strip("/") + "/" + Path(self.logdir).name
158
+
159
+ # Initialize scheduler
160
+ self.scheduler = CommitScheduler(
161
+ folder_path=self.logdir,
162
+ path_in_repo=path_in_repo,
163
+ repo_id=repo_id,
164
+ repo_type=repo_type,
165
+ revision=repo_revision,
166
+ private=repo_private,
167
+ token=token,
168
+ allow_patterns=repo_allow_patterns,
169
+ ignore_patterns=repo_ignore_patterns,
170
+ every=commit_every,
171
+ squash_history=squash_history,
172
+ )
173
+
174
+ # Exposing some high-level info at root level
175
+ self.repo_id = self.scheduler.repo_id
176
+ self.repo_type = self.scheduler.repo_type
177
+ self.repo_revision = self.scheduler.revision
178
+
179
+ # Add `hf-summary-writer` tag to the model card metadata
180
+ try:
181
+ card = ModelCard.load(repo_id_or_path=self.repo_id, repo_type=self.repo_type)
182
+ except EntryNotFoundError:
183
+ card = ModelCard("")
184
+ tags = card.data.get("tags", [])
185
+ if "hf-summary-writer" not in tags:
186
+ tags.append("hf-summary-writer")
187
+ card.data["tags"] = tags
188
+ card.push_to_hub(repo_id=self.repo_id, repo_type=self.repo_type)
189
+
190
+ def __exit__(self, exc_type, exc_val, exc_tb):
191
+ """Push to hub in a non-blocking way when exiting the logger's context manager."""
192
+ super().__exit__(exc_type, exc_val, exc_tb)
193
+ future = self.scheduler.trigger()
194
+ future.result()
meow/lib/python3.13/site-packages/huggingface_hub/_webhooks_payload.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains data structures to parse the webhooks payload."""
16
+
17
+ from typing import List, Literal, Optional
18
+
19
+ from .utils import is_pydantic_available
20
+
21
+
22
+ if is_pydantic_available():
23
+ from pydantic import BaseModel
24
+ else:
25
+ # Define a dummy BaseModel to avoid import errors when pydantic is not installed
26
+ # Import error will be raised when trying to use the class
27
+
28
+ class BaseModel: # type: ignore [no-redef]
29
+ def __init__(self, *args, **kwargs) -> None:
30
+ raise ImportError(
31
+ "You must have `pydantic` installed to use `WebhookPayload`. This is an optional dependency that"
32
+ " should be installed separately. Please run `pip install --upgrade pydantic` and retry."
33
+ )
34
+
35
+
36
+ # This is an adaptation of the ReportV3 interface implemented in moon-landing. V0, V1 and V2 have been ignored as they
37
+ # are not in used anymore. To keep in sync when format is updated in
38
+ # https://github.com/huggingface/moon-landing/blob/main/server/lib/HFWebhooks.ts (internal link).
39
+
40
+
41
+ WebhookEvent_T = Literal[
42
+ "create",
43
+ "delete",
44
+ "move",
45
+ "update",
46
+ ]
47
+ RepoChangeEvent_T = Literal[
48
+ "add",
49
+ "move",
50
+ "remove",
51
+ "update",
52
+ ]
53
+ RepoType_T = Literal[
54
+ "dataset",
55
+ "model",
56
+ "space",
57
+ ]
58
+ DiscussionStatus_T = Literal[
59
+ "closed",
60
+ "draft",
61
+ "open",
62
+ "merged",
63
+ ]
64
+ SupportedWebhookVersion = Literal[3]
65
+
66
+
67
+ class ObjectId(BaseModel):
68
+ id: str
69
+
70
+
71
+ class WebhookPayloadUrl(BaseModel):
72
+ web: str
73
+ api: Optional[str] = None
74
+
75
+
76
+ class WebhookPayloadMovedTo(BaseModel):
77
+ name: str
78
+ owner: ObjectId
79
+
80
+
81
+ class WebhookPayloadWebhook(ObjectId):
82
+ version: SupportedWebhookVersion
83
+
84
+
85
+ class WebhookPayloadEvent(BaseModel):
86
+ action: WebhookEvent_T
87
+ scope: str
88
+
89
+
90
+ class WebhookPayloadDiscussionChanges(BaseModel):
91
+ base: str
92
+ mergeCommitId: Optional[str] = None
93
+
94
+
95
+ class WebhookPayloadComment(ObjectId):
96
+ author: ObjectId
97
+ hidden: bool
98
+ content: Optional[str] = None
99
+ url: WebhookPayloadUrl
100
+
101
+
102
+ class WebhookPayloadDiscussion(ObjectId):
103
+ num: int
104
+ author: ObjectId
105
+ url: WebhookPayloadUrl
106
+ title: str
107
+ isPullRequest: bool
108
+ status: DiscussionStatus_T
109
+ changes: Optional[WebhookPayloadDiscussionChanges] = None
110
+ pinned: Optional[bool] = None
111
+
112
+
113
+ class WebhookPayloadRepo(ObjectId):
114
+ owner: ObjectId
115
+ head_sha: Optional[str] = None
116
+ name: str
117
+ private: bool
118
+ subdomain: Optional[str] = None
119
+ tags: Optional[List[str]] = None
120
+ type: Literal["dataset", "model", "space"]
121
+ url: WebhookPayloadUrl
122
+
123
+
124
+ class WebhookPayloadUpdatedRef(BaseModel):
125
+ ref: str
126
+ oldSha: Optional[str] = None
127
+ newSha: Optional[str] = None
128
+
129
+
130
+ class WebhookPayload(BaseModel):
131
+ event: WebhookPayloadEvent
132
+ repo: WebhookPayloadRepo
133
+ discussion: Optional[WebhookPayloadDiscussion] = None
134
+ comment: Optional[WebhookPayloadComment] = None
135
+ webhook: WebhookPayloadWebhook
136
+ movedTo: Optional[WebhookPayloadMovedTo] = None
137
+ updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None
meow/lib/python3.13/site-packages/huggingface_hub/_webhooks_server.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains `WebhooksServer` and `webhook_endpoint` to create a webhook server easily."""
16
+
17
+ import atexit
18
+ import inspect
19
+ import os
20
+ from functools import wraps
21
+ from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
22
+
23
+ from .utils import experimental, is_fastapi_available, is_gradio_available
24
+
25
+
26
+ if TYPE_CHECKING:
27
+ import gradio as gr
28
+ from fastapi import Request
29
+
30
+ if is_fastapi_available():
31
+ from fastapi import FastAPI, Request
32
+ from fastapi.responses import JSONResponse
33
+ else:
34
+ # Will fail at runtime if FastAPI is not available
35
+ FastAPI = Request = JSONResponse = None # type: ignore [misc, assignment]
36
+
37
+
38
+ _global_app: Optional["WebhooksServer"] = None
39
+ _is_local = os.environ.get("SPACE_ID") is None
40
+
41
+
42
+ @experimental
43
+ class WebhooksServer:
44
+ """
45
+ The [`WebhooksServer`] class lets you create an instance of a Gradio app that can receive Huggingface webhooks.
46
+ These webhooks can be registered using the [`~WebhooksServer.add_webhook`] decorator. Webhook endpoints are added to
47
+ the app as a POST endpoint to the FastAPI router. Once all the webhooks are registered, the `launch` method has to be
48
+ called to start the app.
49
+
50
+ It is recommended to accept [`WebhookPayload`] as the first argument of the webhook function. It is a Pydantic
51
+ model that contains all the information about the webhook event. The data will be parsed automatically for you.
52
+
53
+ Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
54
+ WebhooksServer and deploy it on a Space.
55
+
56
+ <Tip warning={true}>
57
+
58
+ `WebhooksServer` is experimental. Its API is subject to change in the future.
59
+
60
+ </Tip>
61
+
62
+ <Tip warning={true}>
63
+
64
+ You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
65
+
66
+ </Tip>
67
+
68
+ Args:
69
+ ui (`gradio.Blocks`, optional):
70
+ A Gradio UI instance to be used as the Space landing page. If `None`, a UI displaying instructions
71
+ about the configured webhooks is created.
72
+ webhook_secret (`str`, optional):
73
+ A secret key to verify incoming webhook requests. You can set this value to any secret you want as long as
74
+ you also configure it in your [webhooks settings panel](https://huggingface.co/settings/webhooks). You
75
+ can also set this value as the `WEBHOOK_SECRET` environment variable. If no secret is provided, the
76
+ webhook endpoints are opened without any security.
77
+
78
+ Example:
79
+
80
+ ```python
81
+ import gradio as gr
82
+ from huggingface_hub import WebhooksServer, WebhookPayload
83
+
84
+ with gr.Blocks() as ui:
85
+ ...
86
+
87
+ app = WebhooksServer(ui=ui, webhook_secret="my_secret_key")
88
+
89
+ @app.add_webhook("/say_hello")
90
+ async def hello(payload: WebhookPayload):
91
+ return {"message": "hello"}
92
+
93
+ app.launch()
94
+ ```
95
+ """
96
+
97
+ def __new__(cls, *args, **kwargs) -> "WebhooksServer":
98
+ if not is_gradio_available():
99
+ raise ImportError(
100
+ "You must have `gradio` installed to use `WebhooksServer`. Please run `pip install --upgrade gradio`"
101
+ " first."
102
+ )
103
+ if not is_fastapi_available():
104
+ raise ImportError(
105
+ "You must have `fastapi` installed to use `WebhooksServer`. Please run `pip install --upgrade fastapi`"
106
+ " first."
107
+ )
108
+ return super().__new__(cls)
109
+
110
+ def __init__(
111
+ self,
112
+ ui: Optional["gr.Blocks"] = None,
113
+ webhook_secret: Optional[str] = None,
114
+ ) -> None:
115
+ self._ui = ui
116
+
117
+ self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
118
+ self.registered_webhooks: Dict[str, Callable] = {}
119
+ _warn_on_empty_secret(self.webhook_secret)
120
+
121
+ def add_webhook(self, path: Optional[str] = None) -> Callable:
122
+ """
123
+ Decorator to add a webhook to the [`WebhooksServer`] server.
124
+
125
+ Args:
126
+ path (`str`, optional):
127
+ The URL path to register the webhook function. If not provided, the function name will be used as the
128
+ path. In any case, all webhooks are registered under `/webhooks`.
129
+
130
+ Raises:
131
+ ValueError: If the provided path is already registered as a webhook.
132
+
133
+ Example:
134
+ ```python
135
+ from huggingface_hub import WebhooksServer, WebhookPayload
136
+
137
+ app = WebhooksServer()
138
+
139
+ @app.add_webhook
140
+ async def trigger_training(payload: WebhookPayload):
141
+ if payload.repo.type == "dataset" and payload.event.action == "update":
142
+ # Trigger a training job if a dataset is updated
143
+ ...
144
+
145
+ app.launch()
146
+ ```
147
+ """
148
+ # Usage: directly as decorator. Example: `@app.add_webhook`
149
+ if callable(path):
150
+ # If path is a function, it means it was used as a decorator without arguments
151
+ return self.add_webhook()(path)
152
+
153
+ # Usage: provide a path. Example: `@app.add_webhook(...)`
154
+ @wraps(FastAPI.post)
155
+ def _inner_post(*args, **kwargs):
156
+ func = args[0]
157
+ abs_path = f"/webhooks/{(path or func.__name__).strip('/')}"
158
+ if abs_path in self.registered_webhooks:
159
+ raise ValueError(f"Webhook {abs_path} already exists.")
160
+ self.registered_webhooks[abs_path] = func
161
+
162
+ return _inner_post
163
+
164
+ def launch(self, prevent_thread_lock: bool = False, **launch_kwargs: Any) -> None:
165
+ """Launch the Gradio app and register webhooks to the underlying FastAPI server.
166
+
167
+ Input parameters are forwarded to Gradio when launching the app.
168
+ """
169
+ ui = self._ui or self._get_default_ui()
170
+
171
+ # Start Gradio App
172
+ # - as non-blocking so that webhooks can be added afterwards
173
+ # - as shared if launch locally (to debug webhooks)
174
+ launch_kwargs.setdefault("share", _is_local)
175
+ self.fastapi_app, _, _ = ui.launch(prevent_thread_lock=True, **launch_kwargs)
176
+
177
+ # Register webhooks to FastAPI app
178
+ for path, func in self.registered_webhooks.items():
179
+ # Add secret check if required
180
+ if self.webhook_secret is not None:
181
+ func = _wrap_webhook_to_check_secret(func, webhook_secret=self.webhook_secret)
182
+
183
+ # Add route to FastAPI app
184
+ self.fastapi_app.post(path)(func)
185
+
186
+ # Print instructions and block main thread
187
+ space_host = os.environ.get("SPACE_HOST")
188
+ url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url)
189
+ url = url.strip("/")
190
+ message = "\nWebhooks are correctly setup and ready to use:"
191
+ message += "\n" + "\n".join(f" - POST {url}{webhook}" for webhook in self.registered_webhooks)
192
+ message += "\nGo to https://huggingface.co/settings/webhooks to setup your webhooks."
193
+ print(message)
194
+
195
+ if not prevent_thread_lock:
196
+ ui.block_thread()
197
+
198
+ def _get_default_ui(self) -> "gr.Blocks":
199
+ """Default UI if not provided (lists webhooks and provides basic instructions)."""
200
+ import gradio as gr
201
+
202
+ with gr.Blocks() as ui:
203
+ gr.Markdown("# This is an app to process 🤗 Webhooks")
204
+ gr.Markdown(
205
+ "Webhooks are a foundation for MLOps-related features. They allow you to listen for new changes on"
206
+ " specific repos or to all repos belonging to particular set of users/organizations (not just your"
207
+ " repos, but any repo). Check out this [guide](https://huggingface.co/docs/hub/webhooks) to get to"
208
+ " know more about webhooks on the Huggingface Hub."
209
+ )
210
+ gr.Markdown(
211
+ f"{len(self.registered_webhooks)} webhook(s) are registered:"
212
+ + "\n\n"
213
+ + "\n ".join(
214
+ f"- [{webhook_path}]({_get_webhook_doc_url(webhook.__name__, webhook_path)})"
215
+ for webhook_path, webhook in self.registered_webhooks.items()
216
+ )
217
+ )
218
+ gr.Markdown(
219
+ "Go to https://huggingface.co/settings/webhooks to setup your webhooks."
220
+ + "\nYou app is running locally. Please look at the logs to check the full URL you need to set."
221
+ if _is_local
222
+ else (
223
+ "\nThis app is running on a Space. You can find the corresponding URL in the options menu"
224
+ " (top-right) > 'Embed the Space'. The URL looks like 'https://{username}-{repo_name}.hf.space'."
225
+ )
226
+ )
227
+ return ui
228
+
229
+
230
+ @experimental
231
+ def webhook_endpoint(path: Optional[str] = None) -> Callable:
232
+ """Decorator to start a [`WebhooksServer`] and register the decorated function as a webhook endpoint.
233
+
234
+ This is a helper to get started quickly. If you need more flexibility (custom landing page or webhook secret),
235
+ you can use [`WebhooksServer`] directly. You can register multiple webhook endpoints (to the same server) by using
236
+ this decorator multiple times.
237
+
238
+ Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
239
+ server and deploy it on a Space.
240
+
241
+ <Tip warning={true}>
242
+
243
+ `webhook_endpoint` is experimental. Its API is subject to change in the future.
244
+
245
+ </Tip>
246
+
247
+ <Tip warning={true}>
248
+
249
+ You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
250
+
251
+ </Tip>
252
+
253
+ Args:
254
+ path (`str`, optional):
255
+ The URL path to register the webhook function. If not provided, the function name will be used as the path.
256
+ In any case, all webhooks are registered under `/webhooks`.
257
+
258
+ Examples:
259
+ The default usage is to register a function as a webhook endpoint. The function name will be used as the path.
260
+ The server will be started automatically at exit (i.e. at the end of the script).
261
+
262
+ ```python
263
+ from huggingface_hub import webhook_endpoint, WebhookPayload
264
+
265
+ @webhook_endpoint
266
+ async def trigger_training(payload: WebhookPayload):
267
+ if payload.repo.type == "dataset" and payload.event.action == "update":
268
+ # Trigger a training job if a dataset is updated
269
+ ...
270
+
271
+ # Server is automatically started at the end of the script.
272
+ ```
273
+
274
+ Advanced usage: register a function as a webhook endpoint and start the server manually. This is useful if you
275
+ are running it in a notebook.
276
+
277
+ ```python
278
+ from huggingface_hub import webhook_endpoint, WebhookPayload
279
+
280
+ @webhook_endpoint
281
+ async def trigger_training(payload: WebhookPayload):
282
+ if payload.repo.type == "dataset" and payload.event.action == "update":
283
+ # Trigger a training job if a dataset is updated
284
+ ...
285
+
286
+ # Start the server manually
287
+ trigger_training.launch()
288
+ ```
289
+ """
290
+ if callable(path):
291
+ # If path is a function, it means it was used as a decorator without arguments
292
+ return webhook_endpoint()(path)
293
+
294
+ @wraps(WebhooksServer.add_webhook)
295
+ def _inner(func: Callable) -> Callable:
296
+ app = _get_global_app()
297
+ app.add_webhook(path)(func)
298
+ if len(app.registered_webhooks) == 1:
299
+ # Register `app.launch` to run at exit (only once)
300
+ atexit.register(app.launch)
301
+
302
+ @wraps(app.launch)
303
+ def _launch_now():
304
+ # Run the app directly (without waiting atexit)
305
+ atexit.unregister(app.launch)
306
+ app.launch()
307
+
308
+ func.launch = _launch_now # type: ignore
309
+ return func
310
+
311
+ return _inner
312
+
313
+
314
+ def _get_global_app() -> WebhooksServer:
315
+ global _global_app
316
+ if _global_app is None:
317
+ _global_app = WebhooksServer()
318
+ return _global_app
319
+
320
+
321
+ def _warn_on_empty_secret(webhook_secret: Optional[str]) -> None:
322
+ if webhook_secret is None:
323
+ print("Webhook secret is not defined. This means your webhook endpoints will be open to everyone.")
324
+ print(
325
+ "To add a secret, set `WEBHOOK_SECRET` as environment variable or pass it at initialization: "
326
+ "\n\t`app = WebhooksServer(webhook_secret='my_secret', ...)`"
327
+ )
328
+ print(
329
+ "For more details about webhook secrets, please refer to"
330
+ " https://huggingface.co/docs/hub/webhooks#webhook-secret."
331
+ )
332
+ else:
333
+ print("Webhook secret is correctly defined.")
334
+
335
+
336
+ def _get_webhook_doc_url(webhook_name: str, webhook_path: str) -> str:
337
+ """Returns the anchor to a given webhook in the docs (experimental)"""
338
+ return "/docs#/default/" + webhook_name + webhook_path.replace("/", "_") + "_post"
339
+
340
+
341
+ def _wrap_webhook_to_check_secret(func: Callable, webhook_secret: str) -> Callable:
342
+ """Wraps a webhook function to check the webhook secret before calling the function.
343
+
344
+ This is a hacky way to add the `request` parameter to the function signature. Since FastAPI based itself on route
345
+ parameters to inject the values to the function, we need to hack the function signature to retrieve the `Request`
346
+ object (and hence the headers). A far cleaner solution would be to use a middleware. However, since
347
+ `fastapi==0.90.1`, a middleware cannot be added once the app has started. And since the FastAPI app is started by
348
+ Gradio internals (and not by us), we cannot add a middleware.
349
+
350
+ This method is called only when a secret has been defined by the user. If a request is sent without the
351
+ "x-webhook-secret", the function will return a 401 error (unauthorized). If the header is sent but is incorrect,
352
+ the function will return a 403 error (forbidden).
353
+
354
+ Inspired by https://stackoverflow.com/a/33112180.
355
+ """
356
+ initial_sig = inspect.signature(func)
357
+
358
+ @wraps(func)
359
+ async def _protected_func(request: Request, **kwargs):
360
+ request_secret = request.headers.get("x-webhook-secret")
361
+ if request_secret is None:
362
+ return JSONResponse({"error": "x-webhook-secret header not set."}, status_code=401)
363
+ if request_secret != webhook_secret:
364
+ return JSONResponse({"error": "Invalid webhook secret."}, status_code=403)
365
+
366
+ # Inject `request` in kwargs if required
367
+ if "request" in initial_sig.parameters:
368
+ kwargs["request"] = request
369
+
370
+ # Handle both sync and async routes
371
+ if inspect.iscoroutinefunction(func):
372
+ return await func(**kwargs)
373
+ else:
374
+ return func(**kwargs)
375
+
376
+ # Update signature to include request
377
+ if "request" not in initial_sig.parameters:
378
+ _protected_func.__signature__ = initial_sig.replace( # type: ignore
379
+ parameters=(
380
+ inspect.Parameter(name="request", kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Request),
381
+ )
382
+ + tuple(initial_sig.parameters.values())
383
+ )
384
+
385
+ # Return protected route
386
+ return _protected_func
meow/lib/python3.13/site-packages/huggingface_hub/constants.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import typing
4
+ from typing import Literal, Optional, Tuple
5
+
6
+
7
+ # Possible values for env variables
8
+
9
+
10
+ ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
11
+ ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
12
+
13
+
14
+ def _is_true(value: Optional[str]) -> bool:
15
+ if value is None:
16
+ return False
17
+ return value.upper() in ENV_VARS_TRUE_VALUES
18
+
19
+
20
+ def _as_int(value: Optional[str]) -> Optional[int]:
21
+ if value is None:
22
+ return None
23
+ return int(value)
24
+
25
+
26
+ # Constants for file downloads
27
+
28
+ PYTORCH_WEIGHTS_NAME = "pytorch_model.bin"
29
+ TF2_WEIGHTS_NAME = "tf_model.h5"
30
+ TF_WEIGHTS_NAME = "model.ckpt"
31
+ FLAX_WEIGHTS_NAME = "flax_model.msgpack"
32
+ CONFIG_NAME = "config.json"
33
+ REPOCARD_NAME = "README.md"
34
+ DEFAULT_ETAG_TIMEOUT = 10
35
+ DEFAULT_DOWNLOAD_TIMEOUT = 10
36
+ DEFAULT_REQUEST_TIMEOUT = 10
37
+ DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
38
+ HF_TRANSFER_CONCURRENCY = 100
39
+
40
+ # Constants for serialization
41
+
42
+ PYTORCH_WEIGHTS_FILE_PATTERN = "pytorch_model{suffix}.bin" # Unsafe pickle: use safetensors instead
43
+ SAFETENSORS_WEIGHTS_FILE_PATTERN = "model{suffix}.safetensors"
44
+ TF2_WEIGHTS_FILE_PATTERN = "tf_model{suffix}.h5"
45
+
46
+ # Constants for safetensors repos
47
+
48
+ SAFETENSORS_SINGLE_FILE = "model.safetensors"
49
+ SAFETENSORS_INDEX_FILE = "model.safetensors.index.json"
50
+ SAFETENSORS_MAX_HEADER_LENGTH = 25_000_000
51
+
52
+ # Timeout of aquiring file lock and logging the attempt
53
+ FILELOCK_LOG_EVERY_SECONDS = 10
54
+
55
+ # Git-related constants
56
+
57
+ DEFAULT_REVISION = "main"
58
+ REGEX_COMMIT_OID = re.compile(r"[A-Fa-f0-9]{5,40}")
59
+
60
+ HUGGINGFACE_CO_URL_HOME = "https://huggingface.co/"
61
+
62
+ _staging_mode = _is_true(os.environ.get("HUGGINGFACE_CO_STAGING"))
63
+
64
+ _HF_DEFAULT_ENDPOINT = "https://huggingface.co"
65
+ _HF_DEFAULT_STAGING_ENDPOINT = "https://hub-ci.huggingface.co"
66
+ ENDPOINT = os.getenv("HF_ENDPOINT") or (_HF_DEFAULT_STAGING_ENDPOINT if _staging_mode else _HF_DEFAULT_ENDPOINT)
67
+
68
+ HUGGINGFACE_CO_URL_TEMPLATE = ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
69
+ HUGGINGFACE_HEADER_X_REPO_COMMIT = "X-Repo-Commit"
70
+ HUGGINGFACE_HEADER_X_LINKED_ETAG = "X-Linked-Etag"
71
+ HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size"
72
+
73
+ INFERENCE_ENDPOINT = os.environ.get("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co")
74
+
75
+ # See https://huggingface.co/docs/inference-endpoints/index
76
+ INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2"
77
+
78
+
79
+ REPO_ID_SEPARATOR = "--"
80
+ # ^ this substring is not allowed in repo_ids on hf.co
81
+ # and is the canonical one we use for serialization of repo ids elsewhere.
82
+
83
+
84
+ REPO_TYPE_DATASET = "dataset"
85
+ REPO_TYPE_SPACE = "space"
86
+ REPO_TYPE_MODEL = "model"
87
+ REPO_TYPES = [None, REPO_TYPE_MODEL, REPO_TYPE_DATASET, REPO_TYPE_SPACE]
88
+ SPACES_SDK_TYPES = ["gradio", "streamlit", "docker", "static"]
89
+
90
+ REPO_TYPES_URL_PREFIXES = {
91
+ REPO_TYPE_DATASET: "datasets/",
92
+ REPO_TYPE_SPACE: "spaces/",
93
+ }
94
+ REPO_TYPES_MAPPING = {
95
+ "datasets": REPO_TYPE_DATASET,
96
+ "spaces": REPO_TYPE_SPACE,
97
+ "models": REPO_TYPE_MODEL,
98
+ }
99
+
100
+ DiscussionTypeFilter = Literal["all", "discussion", "pull_request"]
101
+ DISCUSSION_TYPES: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter)
102
+ DiscussionStatusFilter = Literal["all", "open", "closed"]
103
+ DISCUSSION_STATUS: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter)
104
+
105
+ # Webhook subscription types
106
+ WEBHOOK_DOMAIN_T = Literal["repo", "discussions"]
107
+
108
+ # default cache
109
+ default_home = os.path.join(os.path.expanduser("~"), ".cache")
110
+ HF_HOME = os.path.expanduser(
111
+ os.getenv(
112
+ "HF_HOME",
113
+ os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "huggingface"),
114
+ )
115
+ )
116
+ hf_cache_home = HF_HOME # for backward compatibility. TODO: remove this in 1.0.0
117
+
118
+ default_cache_path = os.path.join(HF_HOME, "hub")
119
+ default_assets_cache_path = os.path.join(HF_HOME, "assets")
120
+
121
+ # Legacy env variables
122
+ HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", default_cache_path)
123
+ HUGGINGFACE_ASSETS_CACHE = os.getenv("HUGGINGFACE_ASSETS_CACHE", default_assets_cache_path)
124
+
125
+ # New env variables
126
+ HF_HUB_CACHE = os.getenv("HF_HUB_CACHE", HUGGINGFACE_HUB_CACHE)
127
+ HF_ASSETS_CACHE = os.getenv("HF_ASSETS_CACHE", HUGGINGFACE_ASSETS_CACHE)
128
+
129
+ HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
130
+
131
+ # Opt-out from telemetry requests
132
+ HF_HUB_DISABLE_TELEMETRY = (
133
+ _is_true(os.environ.get("HF_HUB_DISABLE_TELEMETRY")) # HF-specific env variable
134
+ or _is_true(os.environ.get("DISABLE_TELEMETRY"))
135
+ or _is_true(os.environ.get("DO_NOT_TRACK")) # https://consoledonottrack.com/
136
+ )
137
+
138
+ # In the past, token was stored in a hardcoded location
139
+ # `_OLD_HF_TOKEN_PATH` is deprecated and will be removed "at some point".
140
+ # See https://github.com/huggingface/huggingface_hub/issues/1232
141
+ _OLD_HF_TOKEN_PATH = os.path.expanduser("~/.huggingface/token")
142
+ HF_TOKEN_PATH = os.environ.get("HF_TOKEN_PATH", os.path.join(HF_HOME, "token"))
143
+ HF_STORED_TOKENS_PATH = os.path.join(os.path.dirname(HF_TOKEN_PATH), "stored_tokens")
144
+
145
+ if _staging_mode:
146
+ # In staging mode, we use a different cache to ensure we don't mix up production and staging data or tokens
147
+ _staging_home = os.path.join(os.path.expanduser("~"), ".cache", "huggingface_staging")
148
+ HUGGINGFACE_HUB_CACHE = os.path.join(_staging_home, "hub")
149
+ _OLD_HF_TOKEN_PATH = os.path.join(_staging_home, "_old_token")
150
+ HF_TOKEN_PATH = os.path.join(_staging_home, "token")
151
+
152
+ # Here, `True` will disable progress bars globally without possibility of enabling it
153
+ # programmatically. `False` will enable them without possibility of disabling them.
154
+ # If environment variable is not set (None), then the user is free to enable/disable
155
+ # them programmatically.
156
+ # TL;DR: env variable has priority over code
157
+ __HF_HUB_DISABLE_PROGRESS_BARS = os.environ.get("HF_HUB_DISABLE_PROGRESS_BARS")
158
+ HF_HUB_DISABLE_PROGRESS_BARS: Optional[bool] = (
159
+ _is_true(__HF_HUB_DISABLE_PROGRESS_BARS) if __HF_HUB_DISABLE_PROGRESS_BARS is not None else None
160
+ )
161
+
162
+ # Disable warning on machines that do not support symlinks (e.g. Windows non-developer)
163
+ HF_HUB_DISABLE_SYMLINKS_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISABLE_SYMLINKS_WARNING"))
164
+
165
+ # Disable warning when using experimental features
166
+ HF_HUB_DISABLE_EXPERIMENTAL_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISABLE_EXPERIMENTAL_WARNING"))
167
+
168
+ # Disable sending the cached token by default is all HTTP requests to the Hub
169
+ HF_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN"))
170
+
171
+ # Enable fast-download using external dependency "hf_transfer"
172
+ # See:
173
+ # - https://pypi.org/project/hf-transfer/
174
+ # - https://github.com/huggingface/hf_transfer (private)
175
+ HF_HUB_ENABLE_HF_TRANSFER: bool = _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER"))
176
+
177
+
178
+ # UNUSED
179
+ # We don't use symlinks in local dir anymore.
180
+ HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD: int = (
181
+ _as_int(os.environ.get("HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD")) or 5 * 1024 * 1024
182
+ )
183
+
184
+ # Used to override the etag timeout on a system level
185
+ HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT
186
+
187
+ # Used to override the get request timeout on a system level
188
+ HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")) or DEFAULT_DOWNLOAD_TIMEOUT
189
+
190
+ # List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are
191
+ # deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by
192
+ # default. We still keep the full list of supported frameworks in case we want to scan all of them.
193
+ MAIN_INFERENCE_API_FRAMEWORKS = [
194
+ "diffusers",
195
+ "sentence-transformers",
196
+ "text-generation-inference",
197
+ "transformers",
198
+ ]
199
+
200
+ ALL_INFERENCE_API_FRAMEWORKS = MAIN_INFERENCE_API_FRAMEWORKS + [
201
+ "adapter-transformers",
202
+ "allennlp",
203
+ "asteroid",
204
+ "bertopic",
205
+ "doctr",
206
+ "espnet",
207
+ "fairseq",
208
+ "fastai",
209
+ "fasttext",
210
+ "flair",
211
+ "k2",
212
+ "keras",
213
+ "mindspore",
214
+ "nemo",
215
+ "open_clip",
216
+ "paddlenlp",
217
+ "peft",
218
+ "pyannote-audio",
219
+ "sklearn",
220
+ "spacy",
221
+ "span-marker",
222
+ "speechbrain",
223
+ "stanza",
224
+ "timm",
225
+ ]
meow/lib/python3.13/site-packages/huggingface_hub/fastai_utils.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+ from pickle import DEFAULT_PROTOCOL, PicklingError
5
+ from typing import Any, Dict, List, Optional, Union
6
+
7
+ from packaging import version
8
+
9
+ from huggingface_hub import constants, snapshot_download
10
+ from huggingface_hub.hf_api import HfApi
11
+ from huggingface_hub.utils import (
12
+ SoftTemporaryDirectory,
13
+ get_fastai_version,
14
+ get_fastcore_version,
15
+ get_python_version,
16
+ )
17
+
18
+ from .utils import logging, validate_hf_hub_args
19
+ from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility...
20
+
21
+
22
+ logger = logging.get_logger(__name__)
23
+
24
+
25
+ def _check_fastai_fastcore_versions(
26
+ fastai_min_version: str = "2.4",
27
+ fastcore_min_version: str = "1.3.27",
28
+ ):
29
+ """
30
+ Checks that the installed fastai and fastcore versions are compatible for pickle serialization.
31
+
32
+ Args:
33
+ fastai_min_version (`str`, *optional*):
34
+ The minimum fastai version supported.
35
+ fastcore_min_version (`str`, *optional*):
36
+ The minimum fastcore version supported.
37
+
38
+ <Tip>
39
+ Raises the following error:
40
+
41
+ - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
42
+ if the fastai or fastcore libraries are not available or are of an invalid version.
43
+
44
+ </Tip>
45
+ """
46
+
47
+ if (get_fastcore_version() or get_fastai_version()) == "N/A":
48
+ raise ImportError(
49
+ f"fastai>={fastai_min_version} and fastcore>={fastcore_min_version} are"
50
+ f" required. Currently using fastai=={get_fastai_version()} and"
51
+ f" fastcore=={get_fastcore_version()}."
52
+ )
53
+
54
+ current_fastai_version = version.Version(get_fastai_version())
55
+ current_fastcore_version = version.Version(get_fastcore_version())
56
+
57
+ if current_fastai_version < version.Version(fastai_min_version):
58
+ raise ImportError(
59
+ "`push_to_hub_fastai` and `from_pretrained_fastai` require a"
60
+ f" fastai>={fastai_min_version} version, but you are using fastai version"
61
+ f" {get_fastai_version()} which is incompatible. Upgrade with `pip install"
62
+ " fastai==2.5.6`."
63
+ )
64
+
65
+ if current_fastcore_version < version.Version(fastcore_min_version):
66
+ raise ImportError(
67
+ "`push_to_hub_fastai` and `from_pretrained_fastai` require a"
68
+ f" fastcore>={fastcore_min_version} version, but you are using fastcore"
69
+ f" version {get_fastcore_version()} which is incompatible. Upgrade with"
70
+ " `pip install fastcore==1.3.27`."
71
+ )
72
+
73
+
74
+ def _check_fastai_fastcore_pyproject_versions(
75
+ storage_folder: str,
76
+ fastai_min_version: str = "2.4",
77
+ fastcore_min_version: str = "1.3.27",
78
+ ):
79
+ """
80
+ Checks that the `pyproject.toml` file in the directory `storage_folder` has fastai and fastcore versions
81
+ that are compatible with `from_pretrained_fastai` and `push_to_hub_fastai`. If `pyproject.toml` does not exist
82
+ or does not contain versions for fastai and fastcore, then it logs a warning.
83
+
84
+ Args:
85
+ storage_folder (`str`):
86
+ Folder to look for the `pyproject.toml` file.
87
+ fastai_min_version (`str`, *optional*):
88
+ The minimum fastai version supported.
89
+ fastcore_min_version (`str`, *optional*):
90
+ The minimum fastcore version supported.
91
+
92
+ <Tip>
93
+ Raises the following errors:
94
+
95
+ - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
96
+ if the `toml` module is not installed.
97
+ - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
98
+ if the `pyproject.toml` indicates a lower than minimum supported version of fastai or fastcore.
99
+
100
+ </Tip>
101
+ """
102
+
103
+ try:
104
+ import toml
105
+ except ModuleNotFoundError:
106
+ raise ImportError(
107
+ "`push_to_hub_fastai` and `from_pretrained_fastai` require the toml module."
108
+ " Install it with `pip install toml`."
109
+ )
110
+
111
+ # Checks that a `pyproject.toml`, with `build-system` and `requires` sections, exists in the repository. If so, get a list of required packages.
112
+ if not os.path.isfile(f"{storage_folder}/pyproject.toml"):
113
+ logger.warning(
114
+ "There is no `pyproject.toml` in the repository that contains the fastai"
115
+ " `Learner`. The `pyproject.toml` would allow us to verify that your fastai"
116
+ " and fastcore versions are compatible with those of the model you want to"
117
+ " load."
118
+ )
119
+ return
120
+ pyproject_toml = toml.load(f"{storage_folder}/pyproject.toml")
121
+
122
+ if "build-system" not in pyproject_toml.keys():
123
+ logger.warning(
124
+ "There is no `build-system` section in the pyproject.toml of the repository"
125
+ " that contains the fastai `Learner`. The `build-system` would allow us to"
126
+ " verify that your fastai and fastcore versions are compatible with those"
127
+ " of the model you want to load."
128
+ )
129
+ return
130
+ build_system_toml = pyproject_toml["build-system"]
131
+
132
+ if "requires" not in build_system_toml.keys():
133
+ logger.warning(
134
+ "There is no `requires` section in the pyproject.toml of the repository"
135
+ " that contains the fastai `Learner`. The `requires` would allow us to"
136
+ " verify that your fastai and fastcore versions are compatible with those"
137
+ " of the model you want to load."
138
+ )
139
+ return
140
+ package_versions = build_system_toml["requires"]
141
+
142
+ # Extracts contains fastai and fastcore versions from `pyproject.toml` if available.
143
+ # If the package is specified but not the version (e.g. "fastai" instead of "fastai=2.4"), the default versions are the highest.
144
+ fastai_packages = [pck for pck in package_versions if pck.startswith("fastai")]
145
+ if len(fastai_packages) == 0:
146
+ logger.warning("The repository does not have a fastai version specified in the `pyproject.toml`.")
147
+ # fastai_version is an empty string if not specified
148
+ else:
149
+ fastai_version = str(fastai_packages[0]).partition("=")[2]
150
+ if fastai_version != "" and version.Version(fastai_version) < version.Version(fastai_min_version):
151
+ raise ImportError(
152
+ "`from_pretrained_fastai` requires"
153
+ f" fastai>={fastai_min_version} version but the model to load uses"
154
+ f" {fastai_version} which is incompatible."
155
+ )
156
+
157
+ fastcore_packages = [pck for pck in package_versions if pck.startswith("fastcore")]
158
+ if len(fastcore_packages) == 0:
159
+ logger.warning("The repository does not have a fastcore version specified in the `pyproject.toml`.")
160
+ # fastcore_version is an empty string if not specified
161
+ else:
162
+ fastcore_version = str(fastcore_packages[0]).partition("=")[2]
163
+ if fastcore_version != "" and version.Version(fastcore_version) < version.Version(fastcore_min_version):
164
+ raise ImportError(
165
+ "`from_pretrained_fastai` requires"
166
+ f" fastcore>={fastcore_min_version} version, but you are using fastcore"
167
+ f" version {fastcore_version} which is incompatible."
168
+ )
169
+
170
+
171
+ README_TEMPLATE = """---
172
+ tags:
173
+ - fastai
174
+ ---
175
+
176
+ # Amazing!
177
+
178
+ 🥳 Congratulations on hosting your fastai model on the Hugging Face Hub!
179
+
180
+ # Some next steps
181
+ 1. Fill out this model card with more information (see the template below and the [documentation here](https://huggingface.co/docs/hub/model-repos))!
182
+
183
+ 2. Create a demo in Gradio or Streamlit using 🤗 Spaces ([documentation here](https://huggingface.co/docs/hub/spaces)).
184
+
185
+ 3. Join the fastai community on the [Fastai Discord](https://discord.com/invite/YKrxeNn)!
186
+
187
+ Greetings fellow fastlearner 🤝! Don't forget to delete this content from your model card.
188
+
189
+
190
+ ---
191
+
192
+
193
+ # Model card
194
+
195
+ ## Model description
196
+ More information needed
197
+
198
+ ## Intended uses & limitations
199
+ More information needed
200
+
201
+ ## Training and evaluation data
202
+ More information needed
203
+ """
204
+
205
+ PYPROJECT_TEMPLATE = f"""[build-system]
206
+ requires = ["setuptools>=40.8.0", "wheel", "python={get_python_version()}", "fastai={get_fastai_version()}", "fastcore={get_fastcore_version()}"]
207
+ build-backend = "setuptools.build_meta:__legacy__"
208
+ """
209
+
210
+
211
+ def _create_model_card(repo_dir: Path):
212
+ """
213
+ Creates a model card for the repository.
214
+
215
+ Args:
216
+ repo_dir (`Path`):
217
+ Directory where model card is created.
218
+ """
219
+ readme_path = repo_dir / "README.md"
220
+
221
+ if not readme_path.exists():
222
+ with readme_path.open("w", encoding="utf-8") as f:
223
+ f.write(README_TEMPLATE)
224
+
225
+
226
+ def _create_model_pyproject(repo_dir: Path):
227
+ """
228
+ Creates a `pyproject.toml` for the repository.
229
+
230
+ Args:
231
+ repo_dir (`Path`):
232
+ Directory where `pyproject.toml` is created.
233
+ """
234
+ pyproject_path = repo_dir / "pyproject.toml"
235
+
236
+ if not pyproject_path.exists():
237
+ with pyproject_path.open("w", encoding="utf-8") as f:
238
+ f.write(PYPROJECT_TEMPLATE)
239
+
240
+
241
+ def _save_pretrained_fastai(
242
+ learner,
243
+ save_directory: Union[str, Path],
244
+ config: Optional[Dict[str, Any]] = None,
245
+ ):
246
+ """
247
+ Saves a fastai learner to `save_directory` in pickle format using the default pickle protocol for the version of python used.
248
+
249
+ Args:
250
+ learner (`Learner`):
251
+ The `fastai.Learner` you'd like to save.
252
+ save_directory (`str` or `Path`):
253
+ Specific directory in which you want to save the fastai learner.
254
+ config (`dict`, *optional*):
255
+ Configuration object. Will be uploaded as a .json file. Example: 'https://huggingface.co/espejelomar/fastai-pet-breeds-classification/blob/main/config.json'.
256
+
257
+ <Tip>
258
+
259
+ Raises the following error:
260
+
261
+ - [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError)
262
+ if the config file provided is not a dictionary.
263
+
264
+ </Tip>
265
+ """
266
+ _check_fastai_fastcore_versions()
267
+
268
+ os.makedirs(save_directory, exist_ok=True)
269
+
270
+ # if the user provides config then we update it with the fastai and fastcore versions in CONFIG_TEMPLATE.
271
+ if config is not None:
272
+ if not isinstance(config, dict):
273
+ raise RuntimeError(f"Provided config should be a dict. Got: '{type(config)}'")
274
+ path = os.path.join(save_directory, constants.CONFIG_NAME)
275
+ with open(path, "w") as f:
276
+ json.dump(config, f)
277
+
278
+ _create_model_card(Path(save_directory))
279
+ _create_model_pyproject(Path(save_directory))
280
+
281
+ # learner.export saves the model in `self.path`.
282
+ learner.path = Path(save_directory)
283
+ os.makedirs(save_directory, exist_ok=True)
284
+ try:
285
+ learner.export(
286
+ fname="model.pkl",
287
+ pickle_protocol=DEFAULT_PROTOCOL,
288
+ )
289
+ except PicklingError:
290
+ raise PicklingError(
291
+ "You are using a lambda function, i.e., an anonymous function. `pickle`"
292
+ " cannot pickle function objects and requires that all functions have"
293
+ " names. One possible solution is to name the function."
294
+ )
295
+
296
+
297
+ @validate_hf_hub_args
298
+ def from_pretrained_fastai(
299
+ repo_id: str,
300
+ revision: Optional[str] = None,
301
+ ):
302
+ """
303
+ Load pretrained fastai model from the Hub or from a local directory.
304
+
305
+ Args:
306
+ repo_id (`str`):
307
+ The location where the pickled fastai.Learner is. It can be either of the two:
308
+ - Hosted on the Hugging Face Hub. E.g.: 'espejelomar/fatai-pet-breeds-classification' or 'distilgpt2'.
309
+ You can add a `revision` by appending `@` at the end of `repo_id`. E.g.: `dbmdz/bert-base-german-cased@main`.
310
+ Revision is the specific model version to use. Since we use a git-based system for storing models and other
311
+ artifacts on the Hugging Face Hub, it can be a branch name, a tag name, or a commit id.
312
+ - Hosted locally. `repo_id` would be a directory containing the pickle and a pyproject.toml
313
+ indicating the fastai and fastcore versions used to build the `fastai.Learner`. E.g.: `./my_model_directory/`.
314
+ revision (`str`, *optional*):
315
+ Revision at which the repo's files are downloaded. See documentation of `snapshot_download`.
316
+
317
+ Returns:
318
+ The `fastai.Learner` model in the `repo_id` repo.
319
+ """
320
+ _check_fastai_fastcore_versions()
321
+
322
+ # Load the `repo_id` repo.
323
+ # `snapshot_download` returns the folder where the model was stored.
324
+ # `cache_dir` will be the default '/root/.cache/huggingface/hub'
325
+ if not os.path.isdir(repo_id):
326
+ storage_folder = snapshot_download(
327
+ repo_id=repo_id,
328
+ revision=revision,
329
+ library_name="fastai",
330
+ library_version=get_fastai_version(),
331
+ )
332
+ else:
333
+ storage_folder = repo_id
334
+
335
+ _check_fastai_fastcore_pyproject_versions(storage_folder)
336
+
337
+ from fastai.learner import load_learner # type: ignore
338
+
339
+ return load_learner(os.path.join(storage_folder, "model.pkl"))
340
+
341
+
342
+ @validate_hf_hub_args
343
+ def push_to_hub_fastai(
344
+ learner,
345
+ *,
346
+ repo_id: str,
347
+ commit_message: str = "Push FastAI model using huggingface_hub.",
348
+ private: Optional[bool] = None,
349
+ token: Optional[str] = None,
350
+ config: Optional[dict] = None,
351
+ branch: Optional[str] = None,
352
+ create_pr: Optional[bool] = None,
353
+ allow_patterns: Optional[Union[List[str], str]] = None,
354
+ ignore_patterns: Optional[Union[List[str], str]] = None,
355
+ delete_patterns: Optional[Union[List[str], str]] = None,
356
+ api_endpoint: Optional[str] = None,
357
+ ):
358
+ """
359
+ Upload learner checkpoint files to the Hub.
360
+
361
+ Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
362
+ `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
363
+ details.
364
+
365
+ Args:
366
+ learner (`Learner`):
367
+ The `fastai.Learner' you'd like to push to the Hub.
368
+ repo_id (`str`):
369
+ The repository id for your model in Hub in the format of "namespace/repo_name". The namespace can be your individual account or an organization to which you have write access (for example, 'stanfordnlp/stanza-de').
370
+ commit_message (`str`, *optional*):
371
+ Message to commit while pushing. Will default to :obj:`"add model"`.
372
+ private (`bool`, *optional*):
373
+ Whether or not the repository created should be private.
374
+ If `None` (default), will default to been public except if the organization's default is private.
375
+ token (`str`, *optional*):
376
+ The Hugging Face account token to use as HTTP bearer authorization for remote files. If :obj:`None`, the token will be asked by a prompt.
377
+ config (`dict`, *optional*):
378
+ Configuration object to be saved alongside the model weights.
379
+ branch (`str`, *optional*):
380
+ The git branch on which to push the model. This defaults to
381
+ the default branch as specified in your repository, which
382
+ defaults to `"main"`.
383
+ create_pr (`boolean`, *optional*):
384
+ Whether or not to create a Pull Request from `branch` with that commit.
385
+ Defaults to `False`.
386
+ api_endpoint (`str`, *optional*):
387
+ The API endpoint to use when pushing the model to the hub.
388
+ allow_patterns (`List[str]` or `str`, *optional*):
389
+ If provided, only files matching at least one pattern are pushed.
390
+ ignore_patterns (`List[str]` or `str`, *optional*):
391
+ If provided, files matching any of the patterns are not pushed.
392
+ delete_patterns (`List[str]` or `str`, *optional*):
393
+ If provided, remote files matching any of the patterns will be deleted from the repo.
394
+
395
+ Returns:
396
+ The url of the commit of your model in the given repository.
397
+
398
+ <Tip>
399
+
400
+ Raises the following error:
401
+
402
+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
403
+ if the user is not log on to the Hugging Face Hub.
404
+
405
+ </Tip>
406
+ """
407
+ _check_fastai_fastcore_versions()
408
+ api = HfApi(endpoint=api_endpoint)
409
+ repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id
410
+
411
+ # Push the files to the repo in a single commit
412
+ with SoftTemporaryDirectory() as tmp:
413
+ saved_path = Path(tmp) / repo_id
414
+ _save_pretrained_fastai(learner, saved_path, config=config)
415
+ return api.upload_folder(
416
+ repo_id=repo_id,
417
+ token=token,
418
+ folder_path=saved_path,
419
+ commit_message=commit_message,
420
+ revision=branch,
421
+ create_pr=create_pr,
422
+ allow_patterns=allow_patterns,
423
+ ignore_patterns=ignore_patterns,
424
+ delete_patterns=delete_patterns,
425
+ )
meow/lib/python3.13/site-packages/huggingface_hub/hf_file_system.py ADDED
@@ -0,0 +1,1140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import tempfile
4
+ from collections import deque
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from itertools import chain
8
+ from pathlib import Path
9
+ from typing import Any, Dict, Iterator, List, NoReturn, Optional, Tuple, Union
10
+ from urllib.parse import quote, unquote
11
+
12
+ import fsspec
13
+ from fsspec.callbacks import _DEFAULT_CALLBACK, NoOpCallback, TqdmCallback
14
+ from fsspec.utils import isfilelike
15
+ from requests import Response
16
+
17
+ from . import constants
18
+ from ._commit_api import CommitOperationCopy, CommitOperationDelete
19
+ from .errors import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
20
+ from .file_download import hf_hub_url, http_get
21
+ from .hf_api import HfApi, LastCommitInfo, RepoFile
22
+ from .utils import HFValidationError, hf_raise_for_status, http_backoff
23
+
24
+
25
+ # Regex used to match special revisions with "/" in them (see #1710)
26
+ SPECIAL_REFS_REVISION_REGEX = re.compile(
27
+ r"""
28
+ (^refs\/convert\/\w+) # `refs/convert/parquet` revisions
29
+ |
30
+ (^refs\/pr\/\d+) # PR revisions
31
+ """,
32
+ re.VERBOSE,
33
+ )
34
+
35
+
36
+ @dataclass
37
+ class HfFileSystemResolvedPath:
38
+ """Data structure containing information about a resolved Hugging Face file system path."""
39
+
40
+ repo_type: str
41
+ repo_id: str
42
+ revision: str
43
+ path_in_repo: str
44
+ # The part placed after '@' in the initial path. It can be a quoted or unquoted refs revision.
45
+ # Used to reconstruct the unresolved path to return to the user.
46
+ _raw_revision: Optional[str] = field(default=None, repr=False)
47
+
48
+ def unresolve(self) -> str:
49
+ repo_path = constants.REPO_TYPES_URL_PREFIXES.get(self.repo_type, "") + self.repo_id
50
+ if self._raw_revision:
51
+ return f"{repo_path}@{self._raw_revision}/{self.path_in_repo}".rstrip("/")
52
+ elif self.revision != constants.DEFAULT_REVISION:
53
+ return f"{repo_path}@{safe_revision(self.revision)}/{self.path_in_repo}".rstrip("/")
54
+ else:
55
+ return f"{repo_path}/{self.path_in_repo}".rstrip("/")
56
+
57
+
58
+ class HfFileSystem(fsspec.AbstractFileSystem):
59
+ """
60
+ Access a remote Hugging Face Hub repository as if were a local file system.
61
+
62
+ <Tip warning={true}>
63
+
64
+ [`HfFileSystem`] provides fsspec compatibility, which is useful for libraries that require it (e.g., reading
65
+ Hugging Face datasets directly with `pandas`). However, it introduces additional overhead due to this compatibility
66
+ layer. For better performance and reliability, it's recommended to use `HfApi` methods when possible.
67
+
68
+ </Tip>
69
+
70
+ Args:
71
+ token (`str` or `bool`, *optional*):
72
+ A valid user access token (string). Defaults to the locally saved
73
+ token, which is the recommended method for authentication (see
74
+ https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
75
+ To disable authentication, pass `False`.
76
+ endpoint (`str`, *optional*):
77
+ Endpoint of the Hub. Defaults to <https://huggingface.co>.
78
+ Usage:
79
+
80
+ ```python
81
+ >>> from huggingface_hub import HfFileSystem
82
+
83
+ >>> fs = HfFileSystem()
84
+
85
+ >>> # List files
86
+ >>> fs.glob("my-username/my-model/*.bin")
87
+ ['my-username/my-model/pytorch_model.bin']
88
+ >>> fs.ls("datasets/my-username/my-dataset", detail=False)
89
+ ['datasets/my-username/my-dataset/.gitattributes', 'datasets/my-username/my-dataset/README.md', 'datasets/my-username/my-dataset/data.json']
90
+
91
+ >>> # Read/write files
92
+ >>> with fs.open("my-username/my-model/pytorch_model.bin") as f:
93
+ ... data = f.read()
94
+ >>> with fs.open("my-username/my-model/pytorch_model.bin", "wb") as f:
95
+ ... f.write(data)
96
+ ```
97
+ """
98
+
99
+ root_marker = ""
100
+ protocol = "hf"
101
+
102
+ def __init__(
103
+ self,
104
+ *args,
105
+ endpoint: Optional[str] = None,
106
+ token: Union[bool, str, None] = None,
107
+ **storage_options,
108
+ ):
109
+ super().__init__(*args, **storage_options)
110
+ self.endpoint = endpoint or constants.ENDPOINT
111
+ self.token = token
112
+ self._api = HfApi(endpoint=endpoint, token=token)
113
+ # Maps (repo_type, repo_id, revision) to a 2-tuple with:
114
+ # * the 1st element indicating whether the repositoy and the revision exist
115
+ # * the 2nd element being the exception raised if the repository or revision doesn't exist
116
+ self._repo_and_revision_exists_cache: Dict[
117
+ Tuple[str, str, Optional[str]], Tuple[bool, Optional[Exception]]
118
+ ] = {}
119
+
120
+ def _repo_and_revision_exist(
121
+ self, repo_type: str, repo_id: str, revision: Optional[str]
122
+ ) -> Tuple[bool, Optional[Exception]]:
123
+ if (repo_type, repo_id, revision) not in self._repo_and_revision_exists_cache:
124
+ try:
125
+ self._api.repo_info(
126
+ repo_id, revision=revision, repo_type=repo_type, timeout=constants.HF_HUB_ETAG_TIMEOUT
127
+ )
128
+ except (RepositoryNotFoundError, HFValidationError) as e:
129
+ self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e
130
+ self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = False, e
131
+ except RevisionNotFoundError as e:
132
+ self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e
133
+ self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None
134
+ else:
135
+ self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = True, None
136
+ self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None
137
+ return self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)]
138
+
139
+ def resolve_path(self, path: str, revision: Optional[str] = None) -> HfFileSystemResolvedPath:
140
+ """
141
+ Resolve a Hugging Face file system path into its components.
142
+
143
+ Args:
144
+ path (`str`):
145
+ Path to resolve.
146
+ revision (`str`, *optional*):
147
+ The revision of the repo to resolve. Defaults to the revision specified in the path.
148
+
149
+ Returns:
150
+ [`HfFileSystemResolvedPath`]: Resolved path information containing `repo_type`, `repo_id`, `revision` and `path_in_repo`.
151
+
152
+ Raises:
153
+ `ValueError`:
154
+ If path contains conflicting revision information.
155
+ `NotImplementedError`:
156
+ If trying to list repositories.
157
+ """
158
+
159
+ def _align_revision_in_path_with_revision(
160
+ revision_in_path: Optional[str], revision: Optional[str]
161
+ ) -> Optional[str]:
162
+ if revision is not None:
163
+ if revision_in_path is not None and revision_in_path != revision:
164
+ raise ValueError(
165
+ f'Revision specified in path ("{revision_in_path}") and in `revision` argument ("{revision}")'
166
+ " are not the same."
167
+ )
168
+ else:
169
+ revision = revision_in_path
170
+ return revision
171
+
172
+ path = self._strip_protocol(path)
173
+ if not path:
174
+ # can't list repositories at root
175
+ raise NotImplementedError("Access to repositories lists is not implemented.")
176
+ elif path.split("/")[0] + "/" in constants.REPO_TYPES_URL_PREFIXES.values():
177
+ if "/" not in path:
178
+ # can't list repositories at the repository type level
179
+ raise NotImplementedError("Access to repositories lists is not implemented.")
180
+ repo_type, path = path.split("/", 1)
181
+ repo_type = constants.REPO_TYPES_MAPPING[repo_type]
182
+ else:
183
+ repo_type = constants.REPO_TYPE_MODEL
184
+ if path.count("/") > 0:
185
+ if "@" in path:
186
+ repo_id, revision_in_path = path.split("@", 1)
187
+ if "/" in revision_in_path:
188
+ match = SPECIAL_REFS_REVISION_REGEX.search(revision_in_path)
189
+ if match is not None and revision in (None, match.group()):
190
+ # Handle `refs/convert/parquet` and PR revisions separately
191
+ path_in_repo = SPECIAL_REFS_REVISION_REGEX.sub("", revision_in_path).lstrip("/")
192
+ revision_in_path = match.group()
193
+ else:
194
+ revision_in_path, path_in_repo = revision_in_path.split("/", 1)
195
+ else:
196
+ path_in_repo = ""
197
+ revision = _align_revision_in_path_with_revision(unquote(revision_in_path), revision)
198
+ repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
199
+ if not repo_and_revision_exist:
200
+ _raise_file_not_found(path, err)
201
+ else:
202
+ revision_in_path = None
203
+ repo_id_with_namespace = "/".join(path.split("/")[:2])
204
+ path_in_repo_with_namespace = "/".join(path.split("/")[2:])
205
+ repo_id_without_namespace = path.split("/")[0]
206
+ path_in_repo_without_namespace = "/".join(path.split("/")[1:])
207
+ repo_id = repo_id_with_namespace
208
+ path_in_repo = path_in_repo_with_namespace
209
+ repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
210
+ if not repo_and_revision_exist:
211
+ if isinstance(err, (RepositoryNotFoundError, HFValidationError)):
212
+ repo_id = repo_id_without_namespace
213
+ path_in_repo = path_in_repo_without_namespace
214
+ repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision)
215
+ if not repo_and_revision_exist:
216
+ _raise_file_not_found(path, err)
217
+ else:
218
+ _raise_file_not_found(path, err)
219
+ else:
220
+ repo_id = path
221
+ path_in_repo = ""
222
+ if "@" in path:
223
+ repo_id, revision_in_path = path.split("@", 1)
224
+ revision = _align_revision_in_path_with_revision(unquote(revision_in_path), revision)
225
+ else:
226
+ revision_in_path = None
227
+ repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision)
228
+ if not repo_and_revision_exist:
229
+ raise NotImplementedError("Access to repositories lists is not implemented.")
230
+
231
+ revision = revision if revision is not None else constants.DEFAULT_REVISION
232
+ return HfFileSystemResolvedPath(repo_type, repo_id, revision, path_in_repo, _raw_revision=revision_in_path)
233
+
234
+ def invalidate_cache(self, path: Optional[str] = None) -> None:
235
+ """
236
+ Clear the cache for a given path.
237
+
238
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.invalidate_cache).
239
+
240
+ Args:
241
+ path (`str`, *optional*):
242
+ Path to clear from cache. If not provided, clear the entire cache.
243
+
244
+ """
245
+ if not path:
246
+ self.dircache.clear()
247
+ self._repo_and_revision_exists_cache.clear()
248
+ else:
249
+ resolved_path = self.resolve_path(path)
250
+ path = resolved_path.unresolve()
251
+ while path:
252
+ self.dircache.pop(path, None)
253
+ path = self._parent(path)
254
+
255
+ # Only clear repo cache if path is to repo root
256
+ if not resolved_path.path_in_repo:
257
+ self._repo_and_revision_exists_cache.pop((resolved_path.repo_type, resolved_path.repo_id, None), None)
258
+ self._repo_and_revision_exists_cache.pop(
259
+ (resolved_path.repo_type, resolved_path.repo_id, resolved_path.revision), None
260
+ )
261
+
262
+ def _open(
263
+ self,
264
+ path: str,
265
+ mode: str = "rb",
266
+ revision: Optional[str] = None,
267
+ block_size: Optional[int] = None,
268
+ **kwargs,
269
+ ) -> "HfFileSystemFile":
270
+ if "a" in mode:
271
+ raise NotImplementedError("Appending to remote files is not yet supported.")
272
+ if block_size == 0:
273
+ return HfFileSystemStreamFile(self, path, mode=mode, revision=revision, block_size=block_size, **kwargs)
274
+ else:
275
+ return HfFileSystemFile(self, path, mode=mode, revision=revision, block_size=block_size, **kwargs)
276
+
277
+ def _rm(self, path: str, revision: Optional[str] = None, **kwargs) -> None:
278
+ resolved_path = self.resolve_path(path, revision=revision)
279
+ self._api.delete_file(
280
+ path_in_repo=resolved_path.path_in_repo,
281
+ repo_id=resolved_path.repo_id,
282
+ token=self.token,
283
+ repo_type=resolved_path.repo_type,
284
+ revision=resolved_path.revision,
285
+ commit_message=kwargs.get("commit_message"),
286
+ commit_description=kwargs.get("commit_description"),
287
+ )
288
+ self.invalidate_cache(path=resolved_path.unresolve())
289
+
290
+ def rm(
291
+ self,
292
+ path: str,
293
+ recursive: bool = False,
294
+ maxdepth: Optional[int] = None,
295
+ revision: Optional[str] = None,
296
+ **kwargs,
297
+ ) -> None:
298
+ """
299
+ Delete files from a repository.
300
+
301
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.rm).
302
+
303
+ <Tip warning={true}>
304
+
305
+ Note: When possible, use `HfApi.delete_file()` for better performance.
306
+
307
+ </Tip>
308
+
309
+ Args:
310
+ path (`str`):
311
+ Path to delete.
312
+ recursive (`bool`, *optional*):
313
+ If True, delete directory and all its contents. Defaults to False.
314
+ maxdepth (`int`, *optional*):
315
+ Maximum number of subdirectories to visit when deleting recursively.
316
+ revision (`str`, *optional*):
317
+ The git revision to delete from.
318
+
319
+ """
320
+ resolved_path = self.resolve_path(path, revision=revision)
321
+ paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth, revision=revision)
322
+ paths_in_repo = [self.resolve_path(path).path_in_repo for path in paths if not self.isdir(path)]
323
+ operations = [CommitOperationDelete(path_in_repo=path_in_repo) for path_in_repo in paths_in_repo]
324
+ commit_message = f"Delete {path} "
325
+ commit_message += "recursively " if recursive else ""
326
+ commit_message += f"up to depth {maxdepth} " if maxdepth is not None else ""
327
+ # TODO: use `commit_description` to list all the deleted paths?
328
+ self._api.create_commit(
329
+ repo_id=resolved_path.repo_id,
330
+ repo_type=resolved_path.repo_type,
331
+ token=self.token,
332
+ operations=operations,
333
+ revision=resolved_path.revision,
334
+ commit_message=kwargs.get("commit_message", commit_message),
335
+ commit_description=kwargs.get("commit_description"),
336
+ )
337
+ self.invalidate_cache(path=resolved_path.unresolve())
338
+
339
+ def ls(
340
+ self, path: str, detail: bool = True, refresh: bool = False, revision: Optional[str] = None, **kwargs
341
+ ) -> List[Union[str, Dict[str, Any]]]:
342
+ """
343
+ List the contents of a directory.
344
+
345
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.ls).
346
+
347
+ <Tip warning={true}>
348
+
349
+ Note: When possible, use `HfApi.list_repo_tree()` for better performance.
350
+
351
+ </Tip>
352
+
353
+ Args:
354
+ path (`str`):
355
+ Path to the directory.
356
+ detail (`bool`, *optional*):
357
+ If True, returns a list of dictionaries containing file information. If False,
358
+ returns a list of file paths. Defaults to True.
359
+ refresh (`bool`, *optional*):
360
+ If True, bypass the cache and fetch the latest data. Defaults to False.
361
+ revision (`str`, *optional*):
362
+ The git revision to list from.
363
+
364
+ Returns:
365
+ `List[Union[str, Dict[str, Any]]]`: List of file paths (if detail=False) or list of file information
366
+ dictionaries (if detail=True).
367
+ """
368
+ resolved_path = self.resolve_path(path, revision=revision)
369
+ path = resolved_path.unresolve()
370
+ kwargs = {"expand_info": detail, **kwargs}
371
+ try:
372
+ out = self._ls_tree(path, refresh=refresh, revision=revision, **kwargs)
373
+ except EntryNotFoundError:
374
+ # Path could be a file
375
+ if not resolved_path.path_in_repo:
376
+ _raise_file_not_found(path, None)
377
+ out = self._ls_tree(self._parent(path), refresh=refresh, revision=revision, **kwargs)
378
+ out = [o for o in out if o["name"] == path]
379
+ if len(out) == 0:
380
+ _raise_file_not_found(path, None)
381
+ return out if detail else [o["name"] for o in out]
382
+
383
+ def _ls_tree(
384
+ self,
385
+ path: str,
386
+ recursive: bool = False,
387
+ refresh: bool = False,
388
+ revision: Optional[str] = None,
389
+ expand_info: bool = True,
390
+ ):
391
+ resolved_path = self.resolve_path(path, revision=revision)
392
+ path = resolved_path.unresolve()
393
+ root_path = HfFileSystemResolvedPath(
394
+ resolved_path.repo_type,
395
+ resolved_path.repo_id,
396
+ resolved_path.revision,
397
+ path_in_repo="",
398
+ _raw_revision=resolved_path._raw_revision,
399
+ ).unresolve()
400
+
401
+ out = []
402
+ if path in self.dircache and not refresh:
403
+ cached_path_infos = self.dircache[path]
404
+ out.extend(cached_path_infos)
405
+ dirs_not_in_dircache = []
406
+ if recursive:
407
+ # Use BFS to traverse the cache and build the "recursive "output
408
+ # (The Hub uses a so-called "tree first" strategy for the tree endpoint but we sort the output to follow the spec so the result is (eventually) the same)
409
+ dirs_to_visit = deque(
410
+ [path_info for path_info in cached_path_infos if path_info["type"] == "directory"]
411
+ )
412
+ while dirs_to_visit:
413
+ dir_info = dirs_to_visit.popleft()
414
+ if dir_info["name"] not in self.dircache:
415
+ dirs_not_in_dircache.append(dir_info["name"])
416
+ else:
417
+ cached_path_infos = self.dircache[dir_info["name"]]
418
+ out.extend(cached_path_infos)
419
+ dirs_to_visit.extend(
420
+ [path_info for path_info in cached_path_infos if path_info["type"] == "directory"]
421
+ )
422
+
423
+ dirs_not_expanded = []
424
+ if expand_info:
425
+ # Check if there are directories with non-expanded entries
426
+ dirs_not_expanded = [self._parent(o["name"]) for o in out if o["last_commit"] is None]
427
+
428
+ if (recursive and dirs_not_in_dircache) or (expand_info and dirs_not_expanded):
429
+ # If the dircache is incomplete, find the common path of the missing and non-expanded entries
430
+ # and extend the output with the result of `_ls_tree(common_path, recursive=True)`
431
+ common_prefix = os.path.commonprefix(dirs_not_in_dircache + dirs_not_expanded)
432
+ # Get the parent directory if the common prefix itself is not a directory
433
+ common_path = (
434
+ common_prefix.rstrip("/")
435
+ if common_prefix.endswith("/")
436
+ or common_prefix == root_path
437
+ or common_prefix in chain(dirs_not_in_dircache, dirs_not_expanded)
438
+ else self._parent(common_prefix)
439
+ )
440
+ out = [o for o in out if not o["name"].startswith(common_path + "/")]
441
+ for cached_path in self.dircache:
442
+ if cached_path.startswith(common_path + "/"):
443
+ self.dircache.pop(cached_path, None)
444
+ self.dircache.pop(common_path, None)
445
+ out.extend(
446
+ self._ls_tree(
447
+ common_path,
448
+ recursive=recursive,
449
+ refresh=True,
450
+ revision=revision,
451
+ expand_info=expand_info,
452
+ )
453
+ )
454
+ else:
455
+ tree = self._api.list_repo_tree(
456
+ resolved_path.repo_id,
457
+ resolved_path.path_in_repo,
458
+ recursive=recursive,
459
+ expand=expand_info,
460
+ revision=resolved_path.revision,
461
+ repo_type=resolved_path.repo_type,
462
+ )
463
+ for path_info in tree:
464
+ if isinstance(path_info, RepoFile):
465
+ cache_path_info = {
466
+ "name": root_path + "/" + path_info.path,
467
+ "size": path_info.size,
468
+ "type": "file",
469
+ "blob_id": path_info.blob_id,
470
+ "lfs": path_info.lfs,
471
+ "last_commit": path_info.last_commit,
472
+ "security": path_info.security,
473
+ }
474
+ else:
475
+ cache_path_info = {
476
+ "name": root_path + "/" + path_info.path,
477
+ "size": 0,
478
+ "type": "directory",
479
+ "tree_id": path_info.tree_id,
480
+ "last_commit": path_info.last_commit,
481
+ }
482
+ parent_path = self._parent(cache_path_info["name"])
483
+ self.dircache.setdefault(parent_path, []).append(cache_path_info)
484
+ out.append(cache_path_info)
485
+ return out
486
+
487
+ def walk(self, path: str, *args, **kwargs) -> Iterator[Tuple[str, List[str], List[str]]]:
488
+ """
489
+ Return all files below the given path.
490
+
491
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.walk).
492
+
493
+ Args:
494
+ path (`str`):
495
+ Root path to list files from.
496
+
497
+ Returns:
498
+ `Iterator[Tuple[str, List[str], List[str]]]`: An iterator of (path, list of directory names, list of file names) tuples.
499
+ """
500
+ # Set expand_info=False by default to get a x10 speed boost
501
+ kwargs = {"expand_info": kwargs.get("detail", False), **kwargs}
502
+ path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve()
503
+ yield from super().walk(path, *args, **kwargs)
504
+
505
+ def glob(self, path: str, **kwargs) -> List[str]:
506
+ """
507
+ Find files by glob-matching.
508
+
509
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob).
510
+
511
+ Args:
512
+ path (`str`):
513
+ Path pattern to match.
514
+
515
+ Returns:
516
+ `List[str]`: List of paths matching the pattern.
517
+ """
518
+ # Set expand_info=False by default to get a x10 speed boost
519
+ kwargs = {"expand_info": kwargs.get("detail", False), **kwargs}
520
+ path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve()
521
+ return super().glob(path, **kwargs)
522
+
523
+ def find(
524
+ self,
525
+ path: str,
526
+ maxdepth: Optional[int] = None,
527
+ withdirs: bool = False,
528
+ detail: bool = False,
529
+ refresh: bool = False,
530
+ revision: Optional[str] = None,
531
+ **kwargs,
532
+ ) -> Union[List[str], Dict[str, Dict[str, Any]]]:
533
+ """
534
+ List all files below path.
535
+
536
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.find).
537
+
538
+ Args:
539
+ path (`str`):
540
+ Root path to list files from.
541
+ maxdepth (`int`, *optional*):
542
+ Maximum depth to descend into subdirectories.
543
+ withdirs (`bool`, *optional*):
544
+ Include directory paths in the output. Defaults to False.
545
+ detail (`bool`, *optional*):
546
+ If True, returns a dict mapping paths to file information. Defaults to False.
547
+ refresh (`bool`, *optional*):
548
+ If True, bypass the cache and fetch the latest data. Defaults to False.
549
+ revision (`str`, *optional*):
550
+ The git revision to list from.
551
+
552
+ Returns:
553
+ `Union[List[str], Dict[str, Dict[str, Any]]]`: List of paths or dict of file information.
554
+ """
555
+ if maxdepth:
556
+ return super().find(
557
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, refresh=refresh, revision=revision, **kwargs
558
+ )
559
+ resolved_path = self.resolve_path(path, revision=revision)
560
+ path = resolved_path.unresolve()
561
+ kwargs = {"expand_info": detail, **kwargs}
562
+ try:
563
+ out = self._ls_tree(path, recursive=True, refresh=refresh, revision=resolved_path.revision, **kwargs)
564
+ except EntryNotFoundError:
565
+ # Path could be a file
566
+ if self.info(path, revision=revision, **kwargs)["type"] == "file":
567
+ out = {path: {}}
568
+ else:
569
+ out = {}
570
+ else:
571
+ if not withdirs:
572
+ out = [o for o in out if o["type"] != "directory"]
573
+ else:
574
+ # If `withdirs=True`, include the directory itself to be consistent with the spec
575
+ path_info = self.info(path, revision=resolved_path.revision, **kwargs)
576
+ out = [path_info] + out if path_info["type"] == "directory" else out
577
+ out = {o["name"]: o for o in out}
578
+ names = sorted(out)
579
+ if not detail:
580
+ return names
581
+ else:
582
+ return {name: out[name] for name in names}
583
+
584
+ def cp_file(self, path1: str, path2: str, revision: Optional[str] = None, **kwargs) -> None:
585
+ """
586
+ Copy a file within or between repositories.
587
+
588
+ <Tip warning={true}>
589
+
590
+ Note: When possible, use `HfApi.upload_file()` for better performance.
591
+
592
+ </Tip>
593
+
594
+ Args:
595
+ path1 (`str`):
596
+ Source path to copy from.
597
+ path2 (`str`):
598
+ Destination path to copy to.
599
+ revision (`str`, *optional*):
600
+ The git revision to copy from.
601
+
602
+ """
603
+ resolved_path1 = self.resolve_path(path1, revision=revision)
604
+ resolved_path2 = self.resolve_path(path2, revision=revision)
605
+
606
+ same_repo = (
607
+ resolved_path1.repo_type == resolved_path2.repo_type and resolved_path1.repo_id == resolved_path2.repo_id
608
+ )
609
+
610
+ if same_repo:
611
+ commit_message = f"Copy {path1} to {path2}"
612
+ self._api.create_commit(
613
+ repo_id=resolved_path1.repo_id,
614
+ repo_type=resolved_path1.repo_type,
615
+ revision=resolved_path2.revision,
616
+ commit_message=kwargs.get("commit_message", commit_message),
617
+ commit_description=kwargs.get("commit_description", ""),
618
+ operations=[
619
+ CommitOperationCopy(
620
+ src_path_in_repo=resolved_path1.path_in_repo,
621
+ path_in_repo=resolved_path2.path_in_repo,
622
+ src_revision=resolved_path1.revision,
623
+ )
624
+ ],
625
+ )
626
+ else:
627
+ with self.open(path1, "rb", revision=resolved_path1.revision) as f:
628
+ content = f.read()
629
+ commit_message = f"Copy {path1} to {path2}"
630
+ self._api.upload_file(
631
+ path_or_fileobj=content,
632
+ path_in_repo=resolved_path2.path_in_repo,
633
+ repo_id=resolved_path2.repo_id,
634
+ token=self.token,
635
+ repo_type=resolved_path2.repo_type,
636
+ revision=resolved_path2.revision,
637
+ commit_message=kwargs.get("commit_message", commit_message),
638
+ commit_description=kwargs.get("commit_description"),
639
+ )
640
+ self.invalidate_cache(path=resolved_path1.unresolve())
641
+ self.invalidate_cache(path=resolved_path2.unresolve())
642
+
643
+ def modified(self, path: str, **kwargs) -> datetime:
644
+ """
645
+ Get the last modified time of a file.
646
+
647
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.modified).
648
+
649
+ Args:
650
+ path (`str`):
651
+ Path to the file.
652
+
653
+ Returns:
654
+ `datetime`: Last commit date of the file.
655
+ """
656
+ info = self.info(path, **kwargs)
657
+ return info["last_commit"]["date"]
658
+
659
+ def info(self, path: str, refresh: bool = False, revision: Optional[str] = None, **kwargs) -> Dict[str, Any]:
660
+ """
661
+ Get information about a file or directory.
662
+
663
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.info).
664
+
665
+ <Tip warning={true}>
666
+
667
+ Note: When possible, use `HfApi.get_paths_info()` or `HfApi.repo_info()` for better performance.
668
+
669
+ </Tip>
670
+
671
+ Args:
672
+ path (`str`):
673
+ Path to get info for.
674
+ refresh (`bool`, *optional*):
675
+ If True, bypass the cache and fetch the latest data. Defaults to False.
676
+ revision (`str`, *optional*):
677
+ The git revision to get info from.
678
+
679
+ Returns:
680
+ `Dict[str, Any]`: Dictionary containing file information (type, size, commit info, etc.).
681
+
682
+ """
683
+ resolved_path = self.resolve_path(path, revision=revision)
684
+ path = resolved_path.unresolve()
685
+ expand_info = kwargs.get(
686
+ "expand_info", True
687
+ ) # don't expose it as a parameter in the public API to follow the spec
688
+ if not resolved_path.path_in_repo:
689
+ # Path is the root directory
690
+ out = {
691
+ "name": path,
692
+ "size": 0,
693
+ "type": "directory",
694
+ }
695
+ if expand_info:
696
+ last_commit = self._api.list_repo_commits(
697
+ resolved_path.repo_id, repo_type=resolved_path.repo_type, revision=resolved_path.revision
698
+ )[-1]
699
+ out = {
700
+ **out,
701
+ "tree_id": None, # TODO: tree_id of the root directory?
702
+ "last_commit": LastCommitInfo(
703
+ oid=last_commit.commit_id, title=last_commit.title, date=last_commit.created_at
704
+ ),
705
+ }
706
+ else:
707
+ out = None
708
+ parent_path = self._parent(path)
709
+ if not expand_info and parent_path not in self.dircache:
710
+ # Fill the cache with cheap call
711
+ self.ls(parent_path, expand_info=False)
712
+ if parent_path in self.dircache:
713
+ # Check if the path is in the cache
714
+ out1 = [o for o in self.dircache[parent_path] if o["name"] == path]
715
+ if not out1:
716
+ _raise_file_not_found(path, None)
717
+ out = out1[0]
718
+ if refresh or out is None or (expand_info and out and out["last_commit"] is None):
719
+ paths_info = self._api.get_paths_info(
720
+ resolved_path.repo_id,
721
+ resolved_path.path_in_repo,
722
+ expand=expand_info,
723
+ revision=resolved_path.revision,
724
+ repo_type=resolved_path.repo_type,
725
+ )
726
+ if not paths_info:
727
+ _raise_file_not_found(path, None)
728
+ path_info = paths_info[0]
729
+ root_path = HfFileSystemResolvedPath(
730
+ resolved_path.repo_type,
731
+ resolved_path.repo_id,
732
+ resolved_path.revision,
733
+ path_in_repo="",
734
+ _raw_revision=resolved_path._raw_revision,
735
+ ).unresolve()
736
+ if isinstance(path_info, RepoFile):
737
+ out = {
738
+ "name": root_path + "/" + path_info.path,
739
+ "size": path_info.size,
740
+ "type": "file",
741
+ "blob_id": path_info.blob_id,
742
+ "lfs": path_info.lfs,
743
+ "last_commit": path_info.last_commit,
744
+ "security": path_info.security,
745
+ }
746
+ else:
747
+ out = {
748
+ "name": root_path + "/" + path_info.path,
749
+ "size": 0,
750
+ "type": "directory",
751
+ "tree_id": path_info.tree_id,
752
+ "last_commit": path_info.last_commit,
753
+ }
754
+ if not expand_info:
755
+ out = {k: out[k] for k in ["name", "size", "type"]}
756
+ assert out is not None
757
+ return out
758
+
759
+ def exists(self, path, **kwargs):
760
+ """
761
+ Check if a file exists.
762
+
763
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.exists).
764
+
765
+ <Tip warning={true}>
766
+
767
+ Note: When possible, use `HfApi.file_exists()` for better performance.
768
+
769
+ </Tip>
770
+
771
+ Args:
772
+ path (`str`):
773
+ Path to check.
774
+
775
+ Returns:
776
+ `bool`: True if file exists, False otherwise.
777
+ """
778
+ try:
779
+ if kwargs.get("refresh", False):
780
+ self.invalidate_cache(path)
781
+
782
+ self.info(path, **{**kwargs, "expand_info": False})
783
+ return True
784
+ except: # noqa: E722
785
+ return False
786
+
787
+ def isdir(self, path):
788
+ """
789
+ Check if a path is a directory.
790
+
791
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.isdir).
792
+
793
+ Args:
794
+ path (`str`):
795
+ Path to check.
796
+
797
+ Returns:
798
+ `bool`: True if path is a directory, False otherwise.
799
+ """
800
+ try:
801
+ return self.info(path, expand_info=False)["type"] == "directory"
802
+ except OSError:
803
+ return False
804
+
805
+ def isfile(self, path):
806
+ """
807
+ Check if a path is a file.
808
+
809
+ For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.isfile).
810
+
811
+ Args:
812
+ path (`str`):
813
+ Path to check.
814
+
815
+ Returns:
816
+ `bool`: True if path is a file, False otherwise.
817
+ """
818
+ try:
819
+ return self.info(path, expand_info=False)["type"] == "file"
820
+ except: # noqa: E722
821
+ return False
822
+
823
+ def url(self, path: str) -> str:
824
+ """
825
+ Get the HTTP URL of the given path.
826
+
827
+ Args:
828
+ path (`str`):
829
+ Path to get URL for.
830
+
831
+ Returns:
832
+ `str`: HTTP URL to access the file or directory on the Hub.
833
+ """
834
+ resolved_path = self.resolve_path(path)
835
+ url = hf_hub_url(
836
+ resolved_path.repo_id,
837
+ resolved_path.path_in_repo,
838
+ repo_type=resolved_path.repo_type,
839
+ revision=resolved_path.revision,
840
+ endpoint=self.endpoint,
841
+ )
842
+ if self.isdir(path):
843
+ url = url.replace("/resolve/", "/tree/", 1)
844
+ return url
845
+
846
+ def get_file(self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs) -> None:
847
+ """
848
+ Copy single remote file to local.
849
+
850
+ <Tip warning={true}>
851
+
852
+ Note: When possible, use `HfApi.hf_hub_download()` for better performance.
853
+
854
+ </Tip>
855
+
856
+ Args:
857
+ rpath (`str`):
858
+ Remote path to download from.
859
+ lpath (`str`):
860
+ Local path to download to.
861
+ callback (`Callback`, *optional*):
862
+ Optional callback to track download progress. Defaults to no callback.
863
+ outfile (`IO`, *optional*):
864
+ Optional file-like object to write to. If provided, `lpath` is ignored.
865
+
866
+ """
867
+ revision = kwargs.get("revision")
868
+ unhandled_kwargs = set(kwargs.keys()) - {"revision"}
869
+ if not isinstance(callback, (NoOpCallback, TqdmCallback)) or len(unhandled_kwargs) > 0:
870
+ # for now, let's not handle custom callbacks
871
+ # and let's not handle custom kwargs
872
+ return super().get_file(rpath, lpath, callback=callback, outfile=outfile, **kwargs)
873
+
874
+ # Taken from https://github.com/fsspec/filesystem_spec/blob/47b445ae4c284a82dd15e0287b1ffc410e8fc470/fsspec/spec.py#L883
875
+ if isfilelike(lpath):
876
+ outfile = lpath
877
+ elif self.isdir(rpath):
878
+ os.makedirs(lpath, exist_ok=True)
879
+ return None
880
+
881
+ if isinstance(lpath, (str, Path)): # otherwise, let's assume it's a file-like object
882
+ os.makedirs(os.path.dirname(lpath), exist_ok=True)
883
+
884
+ # Open file if not already open
885
+ close_file = False
886
+ if outfile is None:
887
+ outfile = open(lpath, "wb")
888
+ close_file = True
889
+ initial_pos = outfile.tell()
890
+
891
+ # Custom implementation of `get_file` to use `http_get`.
892
+ resolve_remote_path = self.resolve_path(rpath, revision=revision)
893
+ expected_size = self.info(rpath, revision=revision)["size"]
894
+ callback.set_size(expected_size)
895
+ try:
896
+ http_get(
897
+ url=hf_hub_url(
898
+ repo_id=resolve_remote_path.repo_id,
899
+ revision=resolve_remote_path.revision,
900
+ filename=resolve_remote_path.path_in_repo,
901
+ repo_type=resolve_remote_path.repo_type,
902
+ endpoint=self.endpoint,
903
+ ),
904
+ temp_file=outfile,
905
+ displayed_filename=rpath,
906
+ expected_size=expected_size,
907
+ resume_size=0,
908
+ headers=self._api._build_hf_headers(),
909
+ _tqdm_bar=callback.tqdm if isinstance(callback, TqdmCallback) else None,
910
+ )
911
+ outfile.seek(initial_pos)
912
+ finally:
913
+ # Close file only if we opened it ourselves
914
+ if close_file:
915
+ outfile.close()
916
+
917
+ @property
918
+ def transaction(self):
919
+ """A context within which files are committed together upon exit
920
+
921
+ Requires the file class to implement `.commit()` and `.discard()`
922
+ for the normal and exception cases.
923
+ """
924
+ # Taken from https://github.com/fsspec/filesystem_spec/blob/3fbb6fee33b46cccb015607630843dea049d3243/fsspec/spec.py#L231
925
+ # See https://github.com/huggingface/huggingface_hub/issues/1733
926
+ raise NotImplementedError("Transactional commits are not supported.")
927
+
928
+ def start_transaction(self):
929
+ """Begin write transaction for deferring files, non-context version"""
930
+ # Taken from https://github.com/fsspec/filesystem_spec/blob/3fbb6fee33b46cccb015607630843dea049d3243/fsspec/spec.py#L241
931
+ # See https://github.com/huggingface/huggingface_hub/issues/1733
932
+ raise NotImplementedError("Transactional commits are not supported.")
933
+
934
+
935
+ class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
936
+ def __init__(self, fs: HfFileSystem, path: str, revision: Optional[str] = None, **kwargs):
937
+ try:
938
+ self.resolved_path = fs.resolve_path(path, revision=revision)
939
+ except FileNotFoundError as e:
940
+ if "w" in kwargs.get("mode", ""):
941
+ raise FileNotFoundError(
942
+ f"{e}.\nMake sure the repository and revision exist before writing data."
943
+ ) from e
944
+ raise
945
+ # avoid an unnecessary .info() call with expensive expand_info=True to instantiate .details
946
+ if kwargs.get("mode", "rb") == "rb":
947
+ self.details = fs.info(self.resolved_path.unresolve(), expand_info=False)
948
+ super().__init__(fs, self.resolved_path.unresolve(), **kwargs)
949
+ self.fs: HfFileSystem
950
+
951
+ def __del__(self):
952
+ if not hasattr(self, "resolved_path"):
953
+ # Means that the constructor failed. Nothing to do.
954
+ return
955
+ return super().__del__()
956
+
957
+ def _fetch_range(self, start: int, end: int) -> bytes:
958
+ headers = {
959
+ "range": f"bytes={start}-{end - 1}",
960
+ **self.fs._api._build_hf_headers(),
961
+ }
962
+ url = hf_hub_url(
963
+ repo_id=self.resolved_path.repo_id,
964
+ revision=self.resolved_path.revision,
965
+ filename=self.resolved_path.path_in_repo,
966
+ repo_type=self.resolved_path.repo_type,
967
+ endpoint=self.fs.endpoint,
968
+ )
969
+ r = http_backoff(
970
+ "GET",
971
+ url,
972
+ headers=headers,
973
+ retry_on_status_codes=(500, 502, 503, 504),
974
+ timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
975
+ )
976
+ hf_raise_for_status(r)
977
+ return r.content
978
+
979
+ def _initiate_upload(self) -> None:
980
+ self.temp_file = tempfile.NamedTemporaryFile(prefix="hffs-", delete=False)
981
+
982
+ def _upload_chunk(self, final: bool = False) -> None:
983
+ self.buffer.seek(0)
984
+ block = self.buffer.read()
985
+ self.temp_file.write(block)
986
+ if final:
987
+ self.temp_file.close()
988
+ self.fs._api.upload_file(
989
+ path_or_fileobj=self.temp_file.name,
990
+ path_in_repo=self.resolved_path.path_in_repo,
991
+ repo_id=self.resolved_path.repo_id,
992
+ token=self.fs.token,
993
+ repo_type=self.resolved_path.repo_type,
994
+ revision=self.resolved_path.revision,
995
+ commit_message=self.kwargs.get("commit_message"),
996
+ commit_description=self.kwargs.get("commit_description"),
997
+ )
998
+ os.remove(self.temp_file.name)
999
+ self.fs.invalidate_cache(
1000
+ path=self.resolved_path.unresolve(),
1001
+ )
1002
+
1003
+ def read(self, length=-1):
1004
+ """Read remote file.
1005
+
1006
+ If `length` is not provided or is -1, the entire file is downloaded and read. On POSIX systems and if
1007
+ `hf_transfer` is not enabled, the file is loaded in memory directly. Otherwise, the file is downloaded to a
1008
+ temporary file and read from there.
1009
+ """
1010
+ if self.mode == "rb" and (length is None or length == -1) and self.loc == 0:
1011
+ with self.fs.open(self.path, "rb", block_size=0) as f: # block_size=0 enables fast streaming
1012
+ return f.read()
1013
+ return super().read(length)
1014
+
1015
+ def url(self) -> str:
1016
+ return self.fs.url(self.path)
1017
+
1018
+
1019
+ class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile):
1020
+ def __init__(
1021
+ self,
1022
+ fs: HfFileSystem,
1023
+ path: str,
1024
+ mode: str = "rb",
1025
+ revision: Optional[str] = None,
1026
+ block_size: int = 0,
1027
+ cache_type: str = "none",
1028
+ **kwargs,
1029
+ ):
1030
+ if block_size != 0:
1031
+ raise ValueError(f"HfFileSystemStreamFile only supports block_size=0 but got {block_size}")
1032
+ if cache_type != "none":
1033
+ raise ValueError(f"HfFileSystemStreamFile only supports cache_type='none' but got {cache_type}")
1034
+ if "w" in mode:
1035
+ raise ValueError(f"HfFileSystemStreamFile only supports reading but got mode='{mode}'")
1036
+ try:
1037
+ self.resolved_path = fs.resolve_path(path, revision=revision)
1038
+ except FileNotFoundError as e:
1039
+ if "w" in kwargs.get("mode", ""):
1040
+ raise FileNotFoundError(
1041
+ f"{e}.\nMake sure the repository and revision exist before writing data."
1042
+ ) from e
1043
+ # avoid an unnecessary .info() call to instantiate .details
1044
+ self.details = {"name": self.resolved_path.unresolve(), "size": None}
1045
+ super().__init__(
1046
+ fs, self.resolved_path.unresolve(), mode=mode, block_size=block_size, cache_type=cache_type, **kwargs
1047
+ )
1048
+ self.response: Optional[Response] = None
1049
+ self.fs: HfFileSystem
1050
+
1051
+ def seek(self, loc: int, whence: int = 0):
1052
+ if loc == 0 and whence == 1:
1053
+ return
1054
+ if loc == self.loc and whence == 0:
1055
+ return
1056
+ raise ValueError("Cannot seek streaming HF file")
1057
+
1058
+ def read(self, length: int = -1):
1059
+ read_args = (length,) if length >= 0 else ()
1060
+ if self.response is None or self.response.raw.isclosed():
1061
+ url = hf_hub_url(
1062
+ repo_id=self.resolved_path.repo_id,
1063
+ revision=self.resolved_path.revision,
1064
+ filename=self.resolved_path.path_in_repo,
1065
+ repo_type=self.resolved_path.repo_type,
1066
+ endpoint=self.fs.endpoint,
1067
+ )
1068
+ self.response = http_backoff(
1069
+ "GET",
1070
+ url,
1071
+ headers=self.fs._api._build_hf_headers(),
1072
+ retry_on_status_codes=(500, 502, 503, 504),
1073
+ stream=True,
1074
+ timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
1075
+ )
1076
+ hf_raise_for_status(self.response)
1077
+ try:
1078
+ out = self.response.raw.read(*read_args)
1079
+ except Exception:
1080
+ self.response.close()
1081
+
1082
+ # Retry by recreating the connection
1083
+ url = hf_hub_url(
1084
+ repo_id=self.resolved_path.repo_id,
1085
+ revision=self.resolved_path.revision,
1086
+ filename=self.resolved_path.path_in_repo,
1087
+ repo_type=self.resolved_path.repo_type,
1088
+ endpoint=self.fs.endpoint,
1089
+ )
1090
+ self.response = http_backoff(
1091
+ "GET",
1092
+ url,
1093
+ headers={"Range": "bytes=%d-" % self.loc, **self.fs._api._build_hf_headers()},
1094
+ retry_on_status_codes=(500, 502, 503, 504),
1095
+ stream=True,
1096
+ timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
1097
+ )
1098
+ hf_raise_for_status(self.response)
1099
+ try:
1100
+ out = self.response.raw.read(*read_args)
1101
+ except Exception:
1102
+ self.response.close()
1103
+ raise
1104
+ self.loc += len(out)
1105
+ return out
1106
+
1107
+ def url(self) -> str:
1108
+ return self.fs.url(self.path)
1109
+
1110
+ def __del__(self):
1111
+ if not hasattr(self, "resolved_path"):
1112
+ # Means that the constructor failed. Nothing to do.
1113
+ return
1114
+ return super().__del__()
1115
+
1116
+ def __reduce__(self):
1117
+ return reopen, (self.fs, self.path, self.mode, self.blocksize, self.cache.name)
1118
+
1119
+
1120
+ def safe_revision(revision: str) -> str:
1121
+ return revision if SPECIAL_REFS_REVISION_REGEX.match(revision) else safe_quote(revision)
1122
+
1123
+
1124
+ def safe_quote(s: str) -> str:
1125
+ return quote(s, safe="")
1126
+
1127
+
1128
+ def _raise_file_not_found(path: str, err: Optional[Exception]) -> NoReturn:
1129
+ msg = path
1130
+ if isinstance(err, RepositoryNotFoundError):
1131
+ msg = f"{path} (repository not found)"
1132
+ elif isinstance(err, RevisionNotFoundError):
1133
+ msg = f"{path} (revision not found)"
1134
+ elif isinstance(err, HFValidationError):
1135
+ msg = f"{path} (invalid repository id)"
1136
+ raise FileNotFoundError(msg) from err
1137
+
1138
+
1139
+ def reopen(fs: HfFileSystem, path: str, mode: str, block_size: int, cache_type: str):
1140
+ return fs.open(path, mode=mode, block_size=block_size, cache_type=cache_type)
meow/lib/python3.13/site-packages/huggingface_hub/hub_mixin.py ADDED
@@ -0,0 +1,833 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import inspect
2
+ import json
3
+ import os
4
+ from dataclasses import asdict, dataclass, is_dataclass
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
7
+
8
+ import packaging.version
9
+
10
+ from . import constants
11
+ from .errors import EntryNotFoundError, HfHubHTTPError
12
+ from .file_download import hf_hub_download
13
+ from .hf_api import HfApi
14
+ from .repocard import ModelCard, ModelCardData
15
+ from .utils import (
16
+ SoftTemporaryDirectory,
17
+ is_jsonable,
18
+ is_safetensors_available,
19
+ is_simple_optional_type,
20
+ is_torch_available,
21
+ logging,
22
+ unwrap_simple_optional_type,
23
+ validate_hf_hub_args,
24
+ )
25
+
26
+
27
+ if TYPE_CHECKING:
28
+ from _typeshed import DataclassInstance
29
+
30
+ if is_torch_available():
31
+ import torch # type: ignore
32
+
33
+ if is_safetensors_available():
34
+ import safetensors
35
+ from safetensors.torch import load_model as load_model_as_safetensor
36
+ from safetensors.torch import save_model as save_model_as_safetensor
37
+
38
+
39
+ logger = logging.get_logger(__name__)
40
+
41
+ # Generic variable that is either ModelHubMixin or a subclass thereof
42
+ T = TypeVar("T", bound="ModelHubMixin")
43
+ # Generic variable to represent an args type
44
+ ARGS_T = TypeVar("ARGS_T")
45
+ ENCODER_T = Callable[[ARGS_T], Any]
46
+ DECODER_T = Callable[[Any], ARGS_T]
47
+ CODER_T = Tuple[ENCODER_T, DECODER_T]
48
+
49
+
50
+ DEFAULT_MODEL_CARD = """
51
+ ---
52
+ # For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
53
+ # Doc / guide: https://huggingface.co/docs/hub/model-cards
54
+ {{ card_data }}
55
+ ---
56
+
57
+ This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
58
+ - Library: {{ repo_url | default("[More Information Needed]", true) }}
59
+ - Docs: {{ docs_url | default("[More Information Needed]", true) }}
60
+ """
61
+
62
+
63
+ @dataclass
64
+ class MixinInfo:
65
+ model_card_template: str
66
+ model_card_data: ModelCardData
67
+ repo_url: Optional[str] = None
68
+ docs_url: Optional[str] = None
69
+
70
+
71
+ class ModelHubMixin:
72
+ """
73
+ A generic mixin to integrate ANY machine learning framework with the Hub.
74
+
75
+ To integrate your framework, your model class must inherit from this class. Custom logic for saving/loading models
76
+ have to be overwritten in [`_from_pretrained`] and [`_save_pretrained`]. [`PyTorchModelHubMixin`] is a good example
77
+ of mixin integration with the Hub. Check out our [integration guide](../guides/integrations) for more instructions.
78
+
79
+ When inheriting from [`ModelHubMixin`], you can define class-level attributes. These attributes are not passed to
80
+ `__init__` but to the class definition itself. This is useful to define metadata about the library integrating
81
+ [`ModelHubMixin`].
82
+
83
+ For more details on how to integrate the mixin with your library, checkout the [integration guide](../guides/integrations).
84
+
85
+ Args:
86
+ repo_url (`str`, *optional*):
87
+ URL of the library repository. Used to generate model card.
88
+ docs_url (`str`, *optional*):
89
+ URL of the library documentation. Used to generate model card.
90
+ model_card_template (`str`, *optional*):
91
+ Template of the model card. Used to generate model card. Defaults to a generic template.
92
+ language (`str` or `List[str]`, *optional*):
93
+ Language supported by the library. Used to generate model card.
94
+ library_name (`str`, *optional*):
95
+ Name of the library integrating ModelHubMixin. Used to generate model card.
96
+ license (`str`, *optional*):
97
+ License of the library integrating ModelHubMixin. Used to generate model card.
98
+ E.g: "apache-2.0"
99
+ license_name (`str`, *optional*):
100
+ Name of the library integrating ModelHubMixin. Used to generate model card.
101
+ Only used if `license` is set to `other`.
102
+ E.g: "coqui-public-model-license".
103
+ license_link (`str`, *optional*):
104
+ URL to the license of the library integrating ModelHubMixin. Used to generate model card.
105
+ Only used if `license` is set to `other` and `license_name` is set.
106
+ E.g: "https://coqui.ai/cpml".
107
+ pipeline_tag (`str`, *optional*):
108
+ Tag of the pipeline. Used to generate model card. E.g. "text-classification".
109
+ tags (`List[str]`, *optional*):
110
+ Tags to be added to the model card. Used to generate model card. E.g. ["x-custom-tag", "arxiv:2304.12244"]
111
+ coders (`Dict[Type, Tuple[Callable, Callable]]`, *optional*):
112
+ Dictionary of custom types and their encoders/decoders. Used to encode/decode arguments that are not
113
+ jsonable by default. E.g dataclasses, argparse.Namespace, OmegaConf, etc.
114
+
115
+ Example:
116
+
117
+ ```python
118
+ >>> from huggingface_hub import ModelHubMixin
119
+
120
+ # Inherit from ModelHubMixin
121
+ >>> class MyCustomModel(
122
+ ... ModelHubMixin,
123
+ ... library_name="my-library",
124
+ ... tags=["x-custom-tag", "arxiv:2304.12244"],
125
+ ... repo_url="https://github.com/huggingface/my-cool-library",
126
+ ... docs_url="https://huggingface.co/docs/my-cool-library",
127
+ ... # ^ optional metadata to generate model card
128
+ ... ):
129
+ ... def __init__(self, size: int = 512, device: str = "cpu"):
130
+ ... # define how to initialize your model
131
+ ... super().__init__()
132
+ ... ...
133
+ ...
134
+ ... def _save_pretrained(self, save_directory: Path) -> None:
135
+ ... # define how to serialize your model
136
+ ... ...
137
+ ...
138
+ ... @classmethod
139
+ ... def from_pretrained(
140
+ ... cls: Type[T],
141
+ ... pretrained_model_name_or_path: Union[str, Path],
142
+ ... *,
143
+ ... force_download: bool = False,
144
+ ... resume_download: Optional[bool] = None,
145
+ ... proxies: Optional[Dict] = None,
146
+ ... token: Optional[Union[str, bool]] = None,
147
+ ... cache_dir: Optional[Union[str, Path]] = None,
148
+ ... local_files_only: bool = False,
149
+ ... revision: Optional[str] = None,
150
+ ... **model_kwargs,
151
+ ... ) -> T:
152
+ ... # define how to deserialize your model
153
+ ... ...
154
+
155
+ >>> model = MyCustomModel(size=256, device="gpu")
156
+
157
+ # Save model weights to local directory
158
+ >>> model.save_pretrained("my-awesome-model")
159
+
160
+ # Push model weights to the Hub
161
+ >>> model.push_to_hub("my-awesome-model")
162
+
163
+ # Download and initialize weights from the Hub
164
+ >>> reloaded_model = MyCustomModel.from_pretrained("username/my-awesome-model")
165
+ >>> reloaded_model.size
166
+ 256
167
+
168
+ # Model card has been correctly populated
169
+ >>> from huggingface_hub import ModelCard
170
+ >>> card = ModelCard.load("username/my-awesome-model")
171
+ >>> card.data.tags
172
+ ["x-custom-tag", "pytorch_model_hub_mixin", "model_hub_mixin"]
173
+ >>> card.data.library_name
174
+ "my-library"
175
+ ```
176
+ """
177
+
178
+ _hub_mixin_config: Optional[Union[dict, "DataclassInstance"]] = None
179
+ # ^ optional config attribute automatically set in `from_pretrained`
180
+ _hub_mixin_info: MixinInfo
181
+ # ^ information about the library integrating ModelHubMixin (used to generate model card)
182
+ _hub_mixin_inject_config: bool # whether `_from_pretrained` expects `config` or not
183
+ _hub_mixin_init_parameters: Dict[str, inspect.Parameter] # __init__ parameters
184
+ _hub_mixin_jsonable_default_values: Dict[str, Any] # default values for __init__ parameters
185
+ _hub_mixin_jsonable_custom_types: Tuple[Type, ...] # custom types that can be encoded/decoded
186
+ _hub_mixin_coders: Dict[Type, CODER_T] # encoders/decoders for custom types
187
+ # ^ internal values to handle config
188
+
189
+ def __init_subclass__(
190
+ cls,
191
+ *,
192
+ # Generic info for model card
193
+ repo_url: Optional[str] = None,
194
+ docs_url: Optional[str] = None,
195
+ # Model card template
196
+ model_card_template: str = DEFAULT_MODEL_CARD,
197
+ # Model card metadata
198
+ language: Optional[List[str]] = None,
199
+ library_name: Optional[str] = None,
200
+ license: Optional[str] = None,
201
+ license_name: Optional[str] = None,
202
+ license_link: Optional[str] = None,
203
+ pipeline_tag: Optional[str] = None,
204
+ tags: Optional[List[str]] = None,
205
+ # How to encode/decode arguments with custom type into a JSON config?
206
+ coders: Optional[
207
+ Dict[Type, CODER_T]
208
+ # Key is a type.
209
+ # Value is a tuple (encoder, decoder).
210
+ # Example: {MyCustomType: (lambda x: x.value, lambda data: MyCustomType(data))}
211
+ ] = None,
212
+ ) -> None:
213
+ """Inspect __init__ signature only once when subclassing + handle modelcard."""
214
+ super().__init_subclass__()
215
+
216
+ # Will be reused when creating modelcard
217
+ tags = tags or []
218
+ tags.append("model_hub_mixin")
219
+
220
+ # Initialize MixinInfo if not existent
221
+ info = MixinInfo(model_card_template=model_card_template, model_card_data=ModelCardData())
222
+
223
+ # If parent class has a MixinInfo, inherit from it as a copy
224
+ if hasattr(cls, "_hub_mixin_info"):
225
+ # Inherit model card template from parent class if not explicitly set
226
+ if model_card_template == DEFAULT_MODEL_CARD:
227
+ info.model_card_template = cls._hub_mixin_info.model_card_template
228
+
229
+ # Inherit from parent model card data
230
+ info.model_card_data = ModelCardData(**cls._hub_mixin_info.model_card_data.to_dict())
231
+
232
+ # Inherit other info
233
+ info.docs_url = cls._hub_mixin_info.docs_url
234
+ info.repo_url = cls._hub_mixin_info.repo_url
235
+ cls._hub_mixin_info = info
236
+
237
+ # Update MixinInfo with metadata
238
+ if model_card_template is not None and model_card_template != DEFAULT_MODEL_CARD:
239
+ info.model_card_template = model_card_template
240
+ if repo_url is not None:
241
+ info.repo_url = repo_url
242
+ if docs_url is not None:
243
+ info.docs_url = docs_url
244
+ if language is not None:
245
+ info.model_card_data.language = language
246
+ if library_name is not None:
247
+ info.model_card_data.library_name = library_name
248
+ if license is not None:
249
+ info.model_card_data.license = license
250
+ if license_name is not None:
251
+ info.model_card_data.license_name = license_name
252
+ if license_link is not None:
253
+ info.model_card_data.license_link = license_link
254
+ if pipeline_tag is not None:
255
+ info.model_card_data.pipeline_tag = pipeline_tag
256
+ if tags is not None:
257
+ if info.model_card_data.tags is not None:
258
+ info.model_card_data.tags.extend(tags)
259
+ else:
260
+ info.model_card_data.tags = tags
261
+
262
+ info.model_card_data.tags = sorted(set(info.model_card_data.tags))
263
+
264
+ # Handle encoders/decoders for args
265
+ cls._hub_mixin_coders = coders or {}
266
+ cls._hub_mixin_jsonable_custom_types = tuple(cls._hub_mixin_coders.keys())
267
+
268
+ # Inspect __init__ signature to handle config
269
+ cls._hub_mixin_init_parameters = dict(inspect.signature(cls.__init__).parameters)
270
+ cls._hub_mixin_jsonable_default_values = {
271
+ param.name: cls._encode_arg(param.default)
272
+ for param in cls._hub_mixin_init_parameters.values()
273
+ if param.default is not inspect.Parameter.empty and cls._is_jsonable(param.default)
274
+ }
275
+ cls._hub_mixin_inject_config = "config" in inspect.signature(cls._from_pretrained).parameters
276
+
277
+ def __new__(cls: Type[T], *args, **kwargs) -> T:
278
+ """Create a new instance of the class and handle config.
279
+
280
+ 3 cases:
281
+ - If `self._hub_mixin_config` is already set, do nothing.
282
+ - If `config` is passed as a dataclass, set it as `self._hub_mixin_config`.
283
+ - Otherwise, build `self._hub_mixin_config` from default values and passed values.
284
+ """
285
+ instance = super().__new__(cls)
286
+
287
+ # If `config` is already set, return early
288
+ if instance._hub_mixin_config is not None:
289
+ return instance
290
+
291
+ # Infer passed values
292
+ passed_values = {
293
+ **{
294
+ key: value
295
+ for key, value in zip(
296
+ # [1:] to skip `self` parameter
297
+ list(cls._hub_mixin_init_parameters)[1:],
298
+ args,
299
+ )
300
+ },
301
+ **kwargs,
302
+ }
303
+
304
+ # If config passed as dataclass => set it and return early
305
+ if is_dataclass(passed_values.get("config")):
306
+ instance._hub_mixin_config = passed_values["config"]
307
+ return instance
308
+
309
+ # Otherwise, build config from default + passed values
310
+ init_config = {
311
+ # default values
312
+ **cls._hub_mixin_jsonable_default_values,
313
+ # passed values
314
+ **{
315
+ key: cls._encode_arg(value) # Encode custom types as jsonable value
316
+ for key, value in passed_values.items()
317
+ if instance._is_jsonable(value) # Only if jsonable or we have a custom encoder
318
+ },
319
+ }
320
+ passed_config = init_config.pop("config", {})
321
+
322
+ # Populate `init_config` with provided config
323
+ if isinstance(passed_config, dict):
324
+ init_config.update(passed_config)
325
+
326
+ # Set `config` attribute and return
327
+ if init_config != {}:
328
+ instance._hub_mixin_config = init_config
329
+ return instance
330
+
331
+ @classmethod
332
+ def _is_jsonable(cls, value: Any) -> bool:
333
+ """Check if a value is JSON serializable."""
334
+ if isinstance(value, cls._hub_mixin_jsonable_custom_types):
335
+ return True
336
+ return is_jsonable(value)
337
+
338
+ @classmethod
339
+ def _encode_arg(cls, arg: Any) -> Any:
340
+ """Encode an argument into a JSON serializable format."""
341
+ for type_, (encoder, _) in cls._hub_mixin_coders.items():
342
+ if isinstance(arg, type_):
343
+ if arg is None:
344
+ return None
345
+ return encoder(arg)
346
+ return arg
347
+
348
+ @classmethod
349
+ def _decode_arg(cls, expected_type: Type[ARGS_T], value: Any) -> Optional[ARGS_T]:
350
+ """Decode a JSON serializable value into an argument."""
351
+ if is_simple_optional_type(expected_type):
352
+ if value is None:
353
+ return None
354
+ expected_type = unwrap_simple_optional_type(expected_type)
355
+ # Dataclass => handle it
356
+ if is_dataclass(expected_type):
357
+ return _load_dataclass(expected_type, value) # type: ignore[return-value]
358
+ # Otherwise => check custom decoders
359
+ for type_, (_, decoder) in cls._hub_mixin_coders.items():
360
+ if inspect.isclass(expected_type) and issubclass(expected_type, type_):
361
+ return decoder(value)
362
+ # Otherwise => don't decode
363
+ return value
364
+
365
+ def save_pretrained(
366
+ self,
367
+ save_directory: Union[str, Path],
368
+ *,
369
+ config: Optional[Union[dict, "DataclassInstance"]] = None,
370
+ repo_id: Optional[str] = None,
371
+ push_to_hub: bool = False,
372
+ model_card_kwargs: Optional[Dict[str, Any]] = None,
373
+ **push_to_hub_kwargs,
374
+ ) -> Optional[str]:
375
+ """
376
+ Save weights in local directory.
377
+
378
+ Args:
379
+ save_directory (`str` or `Path`):
380
+ Path to directory in which the model weights and configuration will be saved.
381
+ config (`dict` or `DataclassInstance`, *optional*):
382
+ Model configuration specified as a key/value dictionary or a dataclass instance.
383
+ push_to_hub (`bool`, *optional*, defaults to `False`):
384
+ Whether or not to push your model to the Huggingface Hub after saving it.
385
+ repo_id (`str`, *optional*):
386
+ ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to the folder name if
387
+ not provided.
388
+ model_card_kwargs (`Dict[str, Any]`, *optional*):
389
+ Additional arguments passed to the model card template to customize the model card.
390
+ push_to_hub_kwargs:
391
+ Additional key word arguments passed along to the [`~ModelHubMixin.push_to_hub`] method.
392
+ Returns:
393
+ `str` or `None`: url of the commit on the Hub if `push_to_hub=True`, `None` otherwise.
394
+ """
395
+ save_directory = Path(save_directory)
396
+ save_directory.mkdir(parents=True, exist_ok=True)
397
+
398
+ # Remove config.json if already exists. After `_save_pretrained` we don't want to overwrite config.json
399
+ # as it might have been saved by the custom `_save_pretrained` already. However we do want to overwrite
400
+ # an existing config.json if it was not saved by `_save_pretrained`.
401
+ config_path = save_directory / constants.CONFIG_NAME
402
+ config_path.unlink(missing_ok=True)
403
+
404
+ # save model weights/files (framework-specific)
405
+ self._save_pretrained(save_directory)
406
+
407
+ # save config (if provided and if not serialized yet in `_save_pretrained`)
408
+ if config is None:
409
+ config = self._hub_mixin_config
410
+ if config is not None:
411
+ if is_dataclass(config):
412
+ config = asdict(config) # type: ignore[arg-type]
413
+ if not config_path.exists():
414
+ config_str = json.dumps(config, sort_keys=True, indent=2)
415
+ config_path.write_text(config_str)
416
+
417
+ # save model card
418
+ model_card_path = save_directory / "README.md"
419
+ model_card_kwargs = model_card_kwargs if model_card_kwargs is not None else {}
420
+ if not model_card_path.exists(): # do not overwrite if already exists
421
+ self.generate_model_card(**model_card_kwargs).save(save_directory / "README.md")
422
+
423
+ # push to the Hub if required
424
+ if push_to_hub:
425
+ kwargs = push_to_hub_kwargs.copy() # soft-copy to avoid mutating input
426
+ if config is not None: # kwarg for `push_to_hub`
427
+ kwargs["config"] = config
428
+ if repo_id is None:
429
+ repo_id = save_directory.name # Defaults to `save_directory` name
430
+ return self.push_to_hub(repo_id=repo_id, model_card_kwargs=model_card_kwargs, **kwargs)
431
+ return None
432
+
433
+ def _save_pretrained(self, save_directory: Path) -> None:
434
+ """
435
+ Overwrite this method in subclass to define how to save your model.
436
+ Check out our [integration guide](../guides/integrations) for instructions.
437
+
438
+ Args:
439
+ save_directory (`str` or `Path`):
440
+ Path to directory in which the model weights and configuration will be saved.
441
+ """
442
+ raise NotImplementedError
443
+
444
+ @classmethod
445
+ @validate_hf_hub_args
446
+ def from_pretrained(
447
+ cls: Type[T],
448
+ pretrained_model_name_or_path: Union[str, Path],
449
+ *,
450
+ force_download: bool = False,
451
+ resume_download: Optional[bool] = None,
452
+ proxies: Optional[Dict] = None,
453
+ token: Optional[Union[str, bool]] = None,
454
+ cache_dir: Optional[Union[str, Path]] = None,
455
+ local_files_only: bool = False,
456
+ revision: Optional[str] = None,
457
+ **model_kwargs,
458
+ ) -> T:
459
+ """
460
+ Download a model from the Huggingface Hub and instantiate it.
461
+
462
+ Args:
463
+ pretrained_model_name_or_path (`str`, `Path`):
464
+ - Either the `model_id` (string) of a model hosted on the Hub, e.g. `bigscience/bloom`.
465
+ - Or a path to a `directory` containing model weights saved using
466
+ [`~transformers.PreTrainedModel.save_pretrained`], e.g., `../path/to/my_model_directory/`.
467
+ revision (`str`, *optional*):
468
+ Revision of the model on the Hub. Can be a branch name, a git tag or any commit id.
469
+ Defaults to the latest commit on `main` branch.
470
+ force_download (`bool`, *optional*, defaults to `False`):
471
+ Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding
472
+ the existing cache.
473
+ proxies (`Dict[str, str]`, *optional*):
474
+ A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
475
+ 'http://hostname': 'foo.bar:4012'}`. The proxies are used on every request.
476
+ token (`str` or `bool`, *optional*):
477
+ The token to use as HTTP bearer authorization for remote files. By default, it will use the token
478
+ cached when running `huggingface-cli login`.
479
+ cache_dir (`str`, `Path`, *optional*):
480
+ Path to the folder where cached files are stored.
481
+ local_files_only (`bool`, *optional*, defaults to `False`):
482
+ If `True`, avoid downloading the file and return the path to the local cached file if it exists.
483
+ model_kwargs (`Dict`, *optional*):
484
+ Additional kwargs to pass to the model during initialization.
485
+ """
486
+ model_id = str(pretrained_model_name_or_path)
487
+ config_file: Optional[str] = None
488
+ if os.path.isdir(model_id):
489
+ if constants.CONFIG_NAME in os.listdir(model_id):
490
+ config_file = os.path.join(model_id, constants.CONFIG_NAME)
491
+ else:
492
+ logger.warning(f"{constants.CONFIG_NAME} not found in {Path(model_id).resolve()}")
493
+ else:
494
+ try:
495
+ config_file = hf_hub_download(
496
+ repo_id=model_id,
497
+ filename=constants.CONFIG_NAME,
498
+ revision=revision,
499
+ cache_dir=cache_dir,
500
+ force_download=force_download,
501
+ proxies=proxies,
502
+ resume_download=resume_download,
503
+ token=token,
504
+ local_files_only=local_files_only,
505
+ )
506
+ except HfHubHTTPError as e:
507
+ logger.info(f"{constants.CONFIG_NAME} not found on the HuggingFace Hub: {str(e)}")
508
+
509
+ # Read config
510
+ config = None
511
+ if config_file is not None:
512
+ with open(config_file, "r", encoding="utf-8") as f:
513
+ config = json.load(f)
514
+
515
+ # Decode custom types in config
516
+ for key, value in config.items():
517
+ if key in cls._hub_mixin_init_parameters:
518
+ expected_type = cls._hub_mixin_init_parameters[key].annotation
519
+ if expected_type is not inspect.Parameter.empty:
520
+ config[key] = cls._decode_arg(expected_type, value)
521
+
522
+ # Populate model_kwargs from config
523
+ for param in cls._hub_mixin_init_parameters.values():
524
+ if param.name not in model_kwargs and param.name in config:
525
+ model_kwargs[param.name] = config[param.name]
526
+
527
+ # Check if `config` argument was passed at init
528
+ if "config" in cls._hub_mixin_init_parameters and "config" not in model_kwargs:
529
+ # Decode `config` argument if it was passed
530
+ config_annotation = cls._hub_mixin_init_parameters["config"].annotation
531
+ config = cls._decode_arg(config_annotation, config)
532
+
533
+ # Forward config to model initialization
534
+ model_kwargs["config"] = config
535
+
536
+ # Inject config if `**kwargs` are expected
537
+ if is_dataclass(cls):
538
+ for key in cls.__dataclass_fields__:
539
+ if key not in model_kwargs and key in config:
540
+ model_kwargs[key] = config[key]
541
+ elif any(param.kind == inspect.Parameter.VAR_KEYWORD for param in cls._hub_mixin_init_parameters.values()):
542
+ for key, value in config.items():
543
+ if key not in model_kwargs:
544
+ model_kwargs[key] = value
545
+
546
+ # Finally, also inject if `_from_pretrained` expects it
547
+ if cls._hub_mixin_inject_config and "config" not in model_kwargs:
548
+ model_kwargs["config"] = config
549
+
550
+ instance = cls._from_pretrained(
551
+ model_id=str(model_id),
552
+ revision=revision,
553
+ cache_dir=cache_dir,
554
+ force_download=force_download,
555
+ proxies=proxies,
556
+ resume_download=resume_download,
557
+ local_files_only=local_files_only,
558
+ token=token,
559
+ **model_kwargs,
560
+ )
561
+
562
+ # Implicitly set the config as instance attribute if not already set by the class
563
+ # This way `config` will be available when calling `save_pretrained` or `push_to_hub`.
564
+ if config is not None and (getattr(instance, "_hub_mixin_config", None) in (None, {})):
565
+ instance._hub_mixin_config = config
566
+
567
+ return instance
568
+
569
+ @classmethod
570
+ def _from_pretrained(
571
+ cls: Type[T],
572
+ *,
573
+ model_id: str,
574
+ revision: Optional[str],
575
+ cache_dir: Optional[Union[str, Path]],
576
+ force_download: bool,
577
+ proxies: Optional[Dict],
578
+ resume_download: Optional[bool],
579
+ local_files_only: bool,
580
+ token: Optional[Union[str, bool]],
581
+ **model_kwargs,
582
+ ) -> T:
583
+ """Overwrite this method in subclass to define how to load your model from pretrained.
584
+
585
+ Use [`hf_hub_download`] or [`snapshot_download`] to download files from the Hub before loading them. Most
586
+ args taken as input can be directly passed to those 2 methods. If needed, you can add more arguments to this
587
+ method using "model_kwargs". For example [`PyTorchModelHubMixin._from_pretrained`] takes as input a `map_location`
588
+ parameter to set on which device the model should be loaded.
589
+
590
+ Check out our [integration guide](../guides/integrations) for more instructions.
591
+
592
+ Args:
593
+ model_id (`str`):
594
+ ID of the model to load from the Huggingface Hub (e.g. `bigscience/bloom`).
595
+ revision (`str`, *optional*):
596
+ Revision of the model on the Hub. Can be a branch name, a git tag or any commit id. Defaults to the
597
+ latest commit on `main` branch.
598
+ force_download (`bool`, *optional*, defaults to `False`):
599
+ Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding
600
+ the existing cache.
601
+ proxies (`Dict[str, str]`, *optional*):
602
+ A dictionary of proxy servers to use by protocol or endpoint (e.g., `{'http': 'foo.bar:3128',
603
+ 'http://hostname': 'foo.bar:4012'}`).
604
+ token (`str` or `bool`, *optional*):
605
+ The token to use as HTTP bearer authorization for remote files. By default, it will use the token
606
+ cached when running `huggingface-cli login`.
607
+ cache_dir (`str`, `Path`, *optional*):
608
+ Path to the folder where cached files are stored.
609
+ local_files_only (`bool`, *optional*, defaults to `False`):
610
+ If `True`, avoid downloading the file and return the path to the local cached file if it exists.
611
+ model_kwargs:
612
+ Additional keyword arguments passed along to the [`~ModelHubMixin._from_pretrained`] method.
613
+ """
614
+ raise NotImplementedError
615
+
616
+ @validate_hf_hub_args
617
+ def push_to_hub(
618
+ self,
619
+ repo_id: str,
620
+ *,
621
+ config: Optional[Union[dict, "DataclassInstance"]] = None,
622
+ commit_message: str = "Push model using huggingface_hub.",
623
+ private: Optional[bool] = None,
624
+ token: Optional[str] = None,
625
+ branch: Optional[str] = None,
626
+ create_pr: Optional[bool] = None,
627
+ allow_patterns: Optional[Union[List[str], str]] = None,
628
+ ignore_patterns: Optional[Union[List[str], str]] = None,
629
+ delete_patterns: Optional[Union[List[str], str]] = None,
630
+ model_card_kwargs: Optional[Dict[str, Any]] = None,
631
+ ) -> str:
632
+ """
633
+ Upload model checkpoint to the Hub.
634
+
635
+ Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
636
+ `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
637
+ details.
638
+
639
+ Args:
640
+ repo_id (`str`):
641
+ ID of the repository to push to (example: `"username/my-model"`).
642
+ config (`dict` or `DataclassInstance`, *optional*):
643
+ Model configuration specified as a key/value dictionary or a dataclass instance.
644
+ commit_message (`str`, *optional*):
645
+ Message to commit while pushing.
646
+ private (`bool`, *optional*):
647
+ Whether the repository created should be private.
648
+ If `None` (default), the repo will be public unless the organization's default is private.
649
+ token (`str`, *optional*):
650
+ The token to use as HTTP bearer authorization for remote files. By default, it will use the token
651
+ cached when running `huggingface-cli login`.
652
+ branch (`str`, *optional*):
653
+ The git branch on which to push the model. This defaults to `"main"`.
654
+ create_pr (`boolean`, *optional*):
655
+ Whether or not to create a Pull Request from `branch` with that commit. Defaults to `False`.
656
+ allow_patterns (`List[str]` or `str`, *optional*):
657
+ If provided, only files matching at least one pattern are pushed.
658
+ ignore_patterns (`List[str]` or `str`, *optional*):
659
+ If provided, files matching any of the patterns are not pushed.
660
+ delete_patterns (`List[str]` or `str`, *optional*):
661
+ If provided, remote files matching any of the patterns will be deleted from the repo.
662
+ model_card_kwargs (`Dict[str, Any]`, *optional*):
663
+ Additional arguments passed to the model card template to customize the model card.
664
+
665
+ Returns:
666
+ The url of the commit of your model in the given repository.
667
+ """
668
+ api = HfApi(token=token)
669
+ repo_id = api.create_repo(repo_id=repo_id, private=private, exist_ok=True).repo_id
670
+
671
+ # Push the files to the repo in a single commit
672
+ with SoftTemporaryDirectory() as tmp:
673
+ saved_path = Path(tmp) / repo_id
674
+ self.save_pretrained(saved_path, config=config, model_card_kwargs=model_card_kwargs)
675
+ return api.upload_folder(
676
+ repo_id=repo_id,
677
+ repo_type="model",
678
+ folder_path=saved_path,
679
+ commit_message=commit_message,
680
+ revision=branch,
681
+ create_pr=create_pr,
682
+ allow_patterns=allow_patterns,
683
+ ignore_patterns=ignore_patterns,
684
+ delete_patterns=delete_patterns,
685
+ )
686
+
687
+ def generate_model_card(self, *args, **kwargs) -> ModelCard:
688
+ card = ModelCard.from_template(
689
+ card_data=self._hub_mixin_info.model_card_data,
690
+ template_str=self._hub_mixin_info.model_card_template,
691
+ repo_url=self._hub_mixin_info.repo_url,
692
+ docs_url=self._hub_mixin_info.docs_url,
693
+ **kwargs,
694
+ )
695
+ return card
696
+
697
+
698
+ class PyTorchModelHubMixin(ModelHubMixin):
699
+ """
700
+ Implementation of [`ModelHubMixin`] to provide model Hub upload/download capabilities to PyTorch models. The model
701
+ is set in evaluation mode by default using `model.eval()` (dropout modules are deactivated). To train the model,
702
+ you should first set it back in training mode with `model.train()`.
703
+
704
+ See [`ModelHubMixin`] for more details on how to use the mixin.
705
+
706
+ Example:
707
+
708
+ ```python
709
+ >>> import torch
710
+ >>> import torch.nn as nn
711
+ >>> from huggingface_hub import PyTorchModelHubMixin
712
+
713
+ >>> class MyModel(
714
+ ... nn.Module,
715
+ ... PyTorchModelHubMixin,
716
+ ... library_name="keras-nlp",
717
+ ... repo_url="https://github.com/keras-team/keras-nlp",
718
+ ... docs_url="https://keras.io/keras_nlp/",
719
+ ... # ^ optional metadata to generate model card
720
+ ... ):
721
+ ... def __init__(self, hidden_size: int = 512, vocab_size: int = 30000, output_size: int = 4):
722
+ ... super().__init__()
723
+ ... self.param = nn.Parameter(torch.rand(hidden_size, vocab_size))
724
+ ... self.linear = nn.Linear(output_size, vocab_size)
725
+
726
+ ... def forward(self, x):
727
+ ... return self.linear(x + self.param)
728
+ >>> model = MyModel(hidden_size=256)
729
+
730
+ # Save model weights to local directory
731
+ >>> model.save_pretrained("my-awesome-model")
732
+
733
+ # Push model weights to the Hub
734
+ >>> model.push_to_hub("my-awesome-model")
735
+
736
+ # Download and initialize weights from the Hub
737
+ >>> model = MyModel.from_pretrained("username/my-awesome-model")
738
+ >>> model.hidden_size
739
+ 256
740
+ ```
741
+ """
742
+
743
+ def __init_subclass__(cls, *args, tags: Optional[List[str]] = None, **kwargs) -> None:
744
+ tags = tags or []
745
+ tags.append("pytorch_model_hub_mixin")
746
+ kwargs["tags"] = tags
747
+ return super().__init_subclass__(*args, **kwargs)
748
+
749
+ def _save_pretrained(self, save_directory: Path) -> None:
750
+ """Save weights from a Pytorch model to a local directory."""
751
+ model_to_save = self.module if hasattr(self, "module") else self # type: ignore
752
+ save_model_as_safetensor(model_to_save, str(save_directory / constants.SAFETENSORS_SINGLE_FILE))
753
+
754
+ @classmethod
755
+ def _from_pretrained(
756
+ cls,
757
+ *,
758
+ model_id: str,
759
+ revision: Optional[str],
760
+ cache_dir: Optional[Union[str, Path]],
761
+ force_download: bool,
762
+ proxies: Optional[Dict],
763
+ resume_download: Optional[bool],
764
+ local_files_only: bool,
765
+ token: Union[str, bool, None],
766
+ map_location: str = "cpu",
767
+ strict: bool = False,
768
+ **model_kwargs,
769
+ ):
770
+ """Load Pytorch pretrained weights and return the loaded model."""
771
+ model = cls(**model_kwargs)
772
+ if os.path.isdir(model_id):
773
+ print("Loading weights from local directory")
774
+ model_file = os.path.join(model_id, constants.SAFETENSORS_SINGLE_FILE)
775
+ return cls._load_as_safetensor(model, model_file, map_location, strict)
776
+ else:
777
+ try:
778
+ model_file = hf_hub_download(
779
+ repo_id=model_id,
780
+ filename=constants.SAFETENSORS_SINGLE_FILE,
781
+ revision=revision,
782
+ cache_dir=cache_dir,
783
+ force_download=force_download,
784
+ proxies=proxies,
785
+ resume_download=resume_download,
786
+ token=token,
787
+ local_files_only=local_files_only,
788
+ )
789
+ return cls._load_as_safetensor(model, model_file, map_location, strict)
790
+ except EntryNotFoundError:
791
+ model_file = hf_hub_download(
792
+ repo_id=model_id,
793
+ filename=constants.PYTORCH_WEIGHTS_NAME,
794
+ revision=revision,
795
+ cache_dir=cache_dir,
796
+ force_download=force_download,
797
+ proxies=proxies,
798
+ resume_download=resume_download,
799
+ token=token,
800
+ local_files_only=local_files_only,
801
+ )
802
+ return cls._load_as_pickle(model, model_file, map_location, strict)
803
+
804
+ @classmethod
805
+ def _load_as_pickle(cls, model: T, model_file: str, map_location: str, strict: bool) -> T:
806
+ state_dict = torch.load(model_file, map_location=torch.device(map_location), weights_only=True)
807
+ model.load_state_dict(state_dict, strict=strict) # type: ignore
808
+ model.eval() # type: ignore
809
+ return model
810
+
811
+ @classmethod
812
+ def _load_as_safetensor(cls, model: T, model_file: str, map_location: str, strict: bool) -> T:
813
+ if packaging.version.parse(safetensors.__version__) < packaging.version.parse("0.4.3"): # type: ignore [attr-defined]
814
+ load_model_as_safetensor(model, model_file, strict=strict) # type: ignore [arg-type]
815
+ if map_location != "cpu":
816
+ logger.warning(
817
+ "Loading model weights on other devices than 'cpu' is not supported natively in your version of safetensors."
818
+ " This means that the model is loaded on 'cpu' first and then copied to the device."
819
+ " This leads to a slower loading time."
820
+ " Please update safetensors to version 0.4.3 or above for improved performance."
821
+ )
822
+ model.to(map_location) # type: ignore [attr-defined]
823
+ else:
824
+ safetensors.torch.load_model(model, model_file, strict=strict, device=map_location) # type: ignore [arg-type]
825
+ return model
826
+
827
+
828
+ def _load_dataclass(datacls: Type["DataclassInstance"], data: dict) -> "DataclassInstance":
829
+ """Load a dataclass instance from a dictionary.
830
+
831
+ Fields not expected by the dataclass are ignored.
832
+ """
833
+ return datacls(**{k: v for k, v in data.items() if k in datacls.__dataclass_fields__})
meow/lib/python3.13/site-packages/huggingface_hub/keras_mixin.py ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections.abc as collections
2
+ import json
3
+ import os
4
+ import warnings
5
+ from functools import wraps
6
+ from pathlib import Path
7
+ from shutil import copytree
8
+ from typing import Any, Dict, List, Optional, Union
9
+
10
+ from huggingface_hub import ModelHubMixin, snapshot_download
11
+ from huggingface_hub.utils import (
12
+ get_tf_version,
13
+ is_graphviz_available,
14
+ is_pydot_available,
15
+ is_tf_available,
16
+ yaml_dump,
17
+ )
18
+
19
+ from . import constants
20
+ from .hf_api import HfApi
21
+ from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
22
+ from .utils._typing import CallableT
23
+
24
+
25
+ logger = logging.get_logger(__name__)
26
+
27
+ keras = None
28
+ if is_tf_available():
29
+ # Depending on which version of TensorFlow is installed, we need to import
30
+ # keras from the correct location.
31
+ # See https://github.com/tensorflow/tensorflow/releases/tag/v2.16.1.
32
+ # Note: saving a keras model only works with Keras<3.0.
33
+ try:
34
+ import tf_keras as keras # type: ignore
35
+ except ImportError:
36
+ import tensorflow as tf # type: ignore
37
+
38
+ keras = tf.keras
39
+
40
+
41
+ def _requires_keras_2_model(fn: CallableT) -> CallableT:
42
+ # Wrapper to raise if user tries to save a Keras 3.x model
43
+ @wraps(fn)
44
+ def _inner(model, *args, **kwargs):
45
+ if not hasattr(model, "history"): # hacky way to check if model is Keras 2.x
46
+ raise NotImplementedError(
47
+ f"Cannot use '{fn.__name__}': Keras 3.x is not supported."
48
+ " Please save models manually and upload them using `upload_folder` or `huggingface-cli upload`."
49
+ )
50
+ return fn(model, *args, **kwargs)
51
+
52
+ return _inner # type: ignore [return-value]
53
+
54
+
55
+ def _flatten_dict(dictionary, parent_key=""):
56
+ """Flatten a nested dictionary.
57
+ Reference: https://stackoverflow.com/a/6027615/10319735
58
+
59
+ Args:
60
+ dictionary (`dict`):
61
+ The nested dictionary to be flattened.
62
+ parent_key (`str`):
63
+ The parent key to be prefixed to the children keys.
64
+ Necessary for recursing over the nested dictionary.
65
+
66
+ Returns:
67
+ The flattened dictionary.
68
+ """
69
+ items = []
70
+ for key, value in dictionary.items():
71
+ new_key = f"{parent_key}.{key}" if parent_key else key
72
+ if isinstance(value, collections.MutableMapping):
73
+ items.extend(
74
+ _flatten_dict(
75
+ value,
76
+ new_key,
77
+ ).items()
78
+ )
79
+ else:
80
+ items.append((new_key, value))
81
+ return dict(items)
82
+
83
+
84
+ def _create_hyperparameter_table(model):
85
+ """Parse hyperparameter dictionary into a markdown table."""
86
+ table = None
87
+ if model.optimizer is not None:
88
+ optimizer_params = model.optimizer.get_config()
89
+ # flatten the configuration
90
+ optimizer_params = _flatten_dict(optimizer_params)
91
+ optimizer_params["training_precision"] = keras.mixed_precision.global_policy().name
92
+ table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
93
+ for key, value in optimizer_params.items():
94
+ table += f"| {key} | {value} |\n"
95
+ return table
96
+
97
+
98
+ def _plot_network(model, save_directory):
99
+ keras.utils.plot_model(
100
+ model,
101
+ to_file=f"{save_directory}/model.png",
102
+ show_shapes=False,
103
+ show_dtype=False,
104
+ show_layer_names=True,
105
+ rankdir="TB",
106
+ expand_nested=False,
107
+ dpi=96,
108
+ layer_range=None,
109
+ )
110
+
111
+
112
+ def _create_model_card(
113
+ model,
114
+ repo_dir: Path,
115
+ plot_model: bool = True,
116
+ metadata: Optional[dict] = None,
117
+ ):
118
+ """
119
+ Creates a model card for the repository.
120
+
121
+ Do not overwrite an existing README.md file.
122
+ """
123
+ readme_path = repo_dir / "README.md"
124
+ if readme_path.exists():
125
+ return
126
+
127
+ hyperparameters = _create_hyperparameter_table(model)
128
+ if plot_model and is_graphviz_available() and is_pydot_available():
129
+ _plot_network(model, repo_dir)
130
+ if metadata is None:
131
+ metadata = {}
132
+ metadata["library_name"] = "keras"
133
+ model_card: str = "---\n"
134
+ model_card += yaml_dump(metadata, default_flow_style=False)
135
+ model_card += "---\n"
136
+ model_card += "\n## Model description\n\nMore information needed\n"
137
+ model_card += "\n## Intended uses & limitations\n\nMore information needed\n"
138
+ model_card += "\n## Training and evaluation data\n\nMore information needed\n"
139
+ if hyperparameters is not None:
140
+ model_card += "\n## Training procedure\n"
141
+ model_card += "\n### Training hyperparameters\n"
142
+ model_card += "\nThe following hyperparameters were used during training:\n\n"
143
+ model_card += hyperparameters
144
+ model_card += "\n"
145
+ if plot_model and os.path.exists(f"{repo_dir}/model.png"):
146
+ model_card += "\n ## Model Plot\n"
147
+ model_card += "\n<details>"
148
+ model_card += "\n<summary>View Model Plot</summary>\n"
149
+ path_to_plot = "./model.png"
150
+ model_card += f"\n![Model Image]({path_to_plot})\n"
151
+ model_card += "\n</details>"
152
+
153
+ readme_path.write_text(model_card)
154
+
155
+
156
+ @_requires_keras_2_model
157
+ def save_pretrained_keras(
158
+ model,
159
+ save_directory: Union[str, Path],
160
+ config: Optional[Dict[str, Any]] = None,
161
+ include_optimizer: bool = False,
162
+ plot_model: bool = True,
163
+ tags: Optional[Union[list, str]] = None,
164
+ **model_save_kwargs,
165
+ ):
166
+ """
167
+ Saves a Keras model to save_directory in SavedModel format. Use this if
168
+ you're using the Functional or Sequential APIs.
169
+
170
+ Args:
171
+ model (`Keras.Model`):
172
+ The [Keras
173
+ model](https://www.tensorflow.org/api_docs/python/tf/keras/Model)
174
+ you'd like to save. The model must be compiled and built.
175
+ save_directory (`str` or `Path`):
176
+ Specify directory in which you want to save the Keras model.
177
+ config (`dict`, *optional*):
178
+ Configuration object to be saved alongside the model weights.
179
+ include_optimizer(`bool`, *optional*, defaults to `False`):
180
+ Whether or not to include optimizer in serialization.
181
+ plot_model (`bool`, *optional*, defaults to `True`):
182
+ Setting this to `True` will plot the model and put it in the model
183
+ card. Requires graphviz and pydot to be installed.
184
+ tags (Union[`str`,`list`], *optional*):
185
+ List of tags that are related to model or string of a single tag. See example tags
186
+ [here](https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1).
187
+ model_save_kwargs(`dict`, *optional*):
188
+ model_save_kwargs will be passed to
189
+ [`tf.keras.models.save_model()`](https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model).
190
+ """
191
+ if keras is None:
192
+ raise ImportError("Called a Tensorflow-specific function but could not import it.")
193
+
194
+ if not model.built:
195
+ raise ValueError("Model should be built before trying to save")
196
+
197
+ save_directory = Path(save_directory)
198
+ save_directory.mkdir(parents=True, exist_ok=True)
199
+
200
+ # saving config
201
+ if config:
202
+ if not isinstance(config, dict):
203
+ raise RuntimeError(f"Provided config to save_pretrained_keras should be a dict. Got: '{type(config)}'")
204
+
205
+ with (save_directory / constants.CONFIG_NAME).open("w") as f:
206
+ json.dump(config, f)
207
+
208
+ metadata = {}
209
+ if isinstance(tags, list):
210
+ metadata["tags"] = tags
211
+ elif isinstance(tags, str):
212
+ metadata["tags"] = [tags]
213
+
214
+ task_name = model_save_kwargs.pop("task_name", None)
215
+ if task_name is not None:
216
+ warnings.warn(
217
+ "`task_name` input argument is deprecated. Pass `tags` instead.",
218
+ FutureWarning,
219
+ )
220
+ if "tags" in metadata:
221
+ metadata["tags"].append(task_name)
222
+ else:
223
+ metadata["tags"] = [task_name]
224
+
225
+ if model.history is not None:
226
+ if model.history.history != {}:
227
+ path = save_directory / "history.json"
228
+ if path.exists():
229
+ warnings.warn(
230
+ "`history.json` file already exists, it will be overwritten by the history of this version.",
231
+ UserWarning,
232
+ )
233
+ with path.open("w", encoding="utf-8") as f:
234
+ json.dump(model.history.history, f, indent=2, sort_keys=True)
235
+
236
+ _create_model_card(model, save_directory, plot_model, metadata)
237
+ keras.models.save_model(model, save_directory, include_optimizer=include_optimizer, **model_save_kwargs)
238
+
239
+
240
+ def from_pretrained_keras(*args, **kwargs) -> "KerasModelHubMixin":
241
+ r"""
242
+ Instantiate a pretrained Keras model from a pre-trained model from the Hub.
243
+ The model is expected to be in `SavedModel` format.
244
+
245
+ Args:
246
+ pretrained_model_name_or_path (`str` or `os.PathLike`):
247
+ Can be either:
248
+ - A string, the `model id` of a pretrained model hosted inside a
249
+ model repo on huggingface.co. Valid model ids can be located
250
+ at the root-level, like `bert-base-uncased`, or namespaced
251
+ under a user or organization name, like
252
+ `dbmdz/bert-base-german-cased`.
253
+ - You can add `revision` by appending `@` at the end of model_id
254
+ simply like this: `dbmdz/bert-base-german-cased@main` Revision
255
+ is the specific model version to use. It can be a branch name,
256
+ a tag name, or a commit id, since we use a git-based system
257
+ for storing models and other artifacts on huggingface.co, so
258
+ `revision` can be any identifier allowed by git.
259
+ - A path to a `directory` containing model weights saved using
260
+ [`~transformers.PreTrainedModel.save_pretrained`], e.g.,
261
+ `./my_model_directory/`.
262
+ - `None` if you are both providing the configuration and state
263
+ dictionary (resp. with keyword arguments `config` and
264
+ `state_dict`).
265
+ force_download (`bool`, *optional*, defaults to `False`):
266
+ Whether to force the (re-)download of the model weights and
267
+ configuration files, overriding the cached versions if they exist.
268
+ proxies (`Dict[str, str]`, *optional*):
269
+ A dictionary of proxy servers to use by protocol or endpoint, e.g.,
270
+ `{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The
271
+ proxies are used on each request.
272
+ token (`str` or `bool`, *optional*):
273
+ The token to use as HTTP bearer authorization for remote files. If
274
+ `True`, will use the token generated when running `transformers-cli
275
+ login` (stored in `~/.huggingface`).
276
+ cache_dir (`Union[str, os.PathLike]`, *optional*):
277
+ Path to a directory in which a downloaded pretrained model
278
+ configuration should be cached if the standard cache should not be
279
+ used.
280
+ local_files_only(`bool`, *optional*, defaults to `False`):
281
+ Whether to only look at local files (i.e., do not try to download
282
+ the model).
283
+ model_kwargs (`Dict`, *optional*):
284
+ model_kwargs will be passed to the model during initialization
285
+
286
+ <Tip>
287
+
288
+ Passing `token=True` is required when you want to use a private
289
+ model.
290
+
291
+ </Tip>
292
+ """
293
+ return KerasModelHubMixin.from_pretrained(*args, **kwargs)
294
+
295
+
296
+ @validate_hf_hub_args
297
+ @_requires_keras_2_model
298
+ def push_to_hub_keras(
299
+ model,
300
+ repo_id: str,
301
+ *,
302
+ config: Optional[dict] = None,
303
+ commit_message: str = "Push Keras model using huggingface_hub.",
304
+ private: Optional[bool] = None,
305
+ api_endpoint: Optional[str] = None,
306
+ token: Optional[str] = None,
307
+ branch: Optional[str] = None,
308
+ create_pr: Optional[bool] = None,
309
+ allow_patterns: Optional[Union[List[str], str]] = None,
310
+ ignore_patterns: Optional[Union[List[str], str]] = None,
311
+ delete_patterns: Optional[Union[List[str], str]] = None,
312
+ log_dir: Optional[str] = None,
313
+ include_optimizer: bool = False,
314
+ tags: Optional[Union[list, str]] = None,
315
+ plot_model: bool = True,
316
+ **model_save_kwargs,
317
+ ):
318
+ """
319
+ Upload model checkpoint to the Hub.
320
+
321
+ Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
322
+ `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
323
+ details.
324
+
325
+ Args:
326
+ model (`Keras.Model`):
327
+ The [Keras model](`https://www.tensorflow.org/api_docs/python/tf/keras/Model`) you'd like to push to the
328
+ Hub. The model must be compiled and built.
329
+ repo_id (`str`):
330
+ ID of the repository to push to (example: `"username/my-model"`).
331
+ commit_message (`str`, *optional*, defaults to "Add Keras model"):
332
+ Message to commit while pushing.
333
+ private (`bool`, *optional*):
334
+ Whether the repository created should be private.
335
+ If `None` (default), the repo will be public unless the organization's default is private.
336
+ api_endpoint (`str`, *optional*):
337
+ The API endpoint to use when pushing the model to the hub.
338
+ token (`str`, *optional*):
339
+ The token to use as HTTP bearer authorization for remote files. If
340
+ not set, will use the token set when logging in with
341
+ `huggingface-cli login` (stored in `~/.huggingface`).
342
+ branch (`str`, *optional*):
343
+ The git branch on which to push the model. This defaults to
344
+ the default branch as specified in your repository, which
345
+ defaults to `"main"`.
346
+ create_pr (`boolean`, *optional*):
347
+ Whether or not to create a Pull Request from `branch` with that commit.
348
+ Defaults to `False`.
349
+ config (`dict`, *optional*):
350
+ Configuration object to be saved alongside the model weights.
351
+ allow_patterns (`List[str]` or `str`, *optional*):
352
+ If provided, only files matching at least one pattern are pushed.
353
+ ignore_patterns (`List[str]` or `str`, *optional*):
354
+ If provided, files matching any of the patterns are not pushed.
355
+ delete_patterns (`List[str]` or `str`, *optional*):
356
+ If provided, remote files matching any of the patterns will be deleted from the repo.
357
+ log_dir (`str`, *optional*):
358
+ TensorBoard logging directory to be pushed. The Hub automatically
359
+ hosts and displays a TensorBoard instance if log files are included
360
+ in the repository.
361
+ include_optimizer (`bool`, *optional*, defaults to `False`):
362
+ Whether or not to include optimizer during serialization.
363
+ tags (Union[`list`, `str`], *optional*):
364
+ List of tags that are related to model or string of a single tag. See example tags
365
+ [here](https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1).
366
+ plot_model (`bool`, *optional*, defaults to `True`):
367
+ Setting this to `True` will plot the model and put it in the model
368
+ card. Requires graphviz and pydot to be installed.
369
+ model_save_kwargs(`dict`, *optional*):
370
+ model_save_kwargs will be passed to
371
+ [`tf.keras.models.save_model()`](https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model).
372
+
373
+ Returns:
374
+ The url of the commit of your model in the given repository.
375
+ """
376
+ api = HfApi(endpoint=api_endpoint)
377
+ repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id
378
+
379
+ # Push the files to the repo in a single commit
380
+ with SoftTemporaryDirectory() as tmp:
381
+ saved_path = Path(tmp) / repo_id
382
+ save_pretrained_keras(
383
+ model,
384
+ saved_path,
385
+ config=config,
386
+ include_optimizer=include_optimizer,
387
+ tags=tags,
388
+ plot_model=plot_model,
389
+ **model_save_kwargs,
390
+ )
391
+
392
+ # If `log_dir` provided, delete remote logs and upload new ones
393
+ if log_dir is not None:
394
+ delete_patterns = (
395
+ []
396
+ if delete_patterns is None
397
+ else (
398
+ [delete_patterns] # convert `delete_patterns` to a list
399
+ if isinstance(delete_patterns, str)
400
+ else delete_patterns
401
+ )
402
+ )
403
+ delete_patterns.append("logs/*")
404
+ copytree(log_dir, saved_path / "logs")
405
+
406
+ return api.upload_folder(
407
+ repo_type="model",
408
+ repo_id=repo_id,
409
+ folder_path=saved_path,
410
+ commit_message=commit_message,
411
+ token=token,
412
+ revision=branch,
413
+ create_pr=create_pr,
414
+ allow_patterns=allow_patterns,
415
+ ignore_patterns=ignore_patterns,
416
+ delete_patterns=delete_patterns,
417
+ )
418
+
419
+
420
+ class KerasModelHubMixin(ModelHubMixin):
421
+ """
422
+ Implementation of [`ModelHubMixin`] to provide model Hub upload/download
423
+ capabilities to Keras models.
424
+
425
+
426
+ ```python
427
+ >>> import tensorflow as tf
428
+ >>> from huggingface_hub import KerasModelHubMixin
429
+
430
+
431
+ >>> class MyModel(tf.keras.Model, KerasModelHubMixin):
432
+ ... def __init__(self, **kwargs):
433
+ ... super().__init__()
434
+ ... self.config = kwargs.pop("config", None)
435
+ ... self.dummy_inputs = ...
436
+ ... self.layer = ...
437
+
438
+ ... def call(self, *args):
439
+ ... return ...
440
+
441
+
442
+ >>> # Initialize and compile the model as you normally would
443
+ >>> model = MyModel()
444
+ >>> model.compile(...)
445
+ >>> # Build the graph by training it or passing dummy inputs
446
+ >>> _ = model(model.dummy_inputs)
447
+ >>> # Save model weights to local directory
448
+ >>> model.save_pretrained("my-awesome-model")
449
+ >>> # Push model weights to the Hub
450
+ >>> model.push_to_hub("my-awesome-model")
451
+ >>> # Download and initialize weights from the Hub
452
+ >>> model = MyModel.from_pretrained("username/super-cool-model")
453
+ ```
454
+ """
455
+
456
+ def _save_pretrained(self, save_directory):
457
+ save_pretrained_keras(self, save_directory)
458
+
459
+ @classmethod
460
+ def _from_pretrained(
461
+ cls,
462
+ model_id,
463
+ revision,
464
+ cache_dir,
465
+ force_download,
466
+ proxies,
467
+ resume_download,
468
+ local_files_only,
469
+ token,
470
+ config: Optional[Dict[str, Any]] = None,
471
+ **model_kwargs,
472
+ ):
473
+ """Here we just call [`from_pretrained_keras`] function so both the mixin and
474
+ functional APIs stay in sync.
475
+
476
+ TODO - Some args above aren't used since we are calling
477
+ snapshot_download instead of hf_hub_download.
478
+ """
479
+ if keras is None:
480
+ raise ImportError("Called a TensorFlow-specific function but could not import it.")
481
+
482
+ # Root is either a local filepath matching model_id or a cached snapshot
483
+ if not os.path.isdir(model_id):
484
+ storage_folder = snapshot_download(
485
+ repo_id=model_id,
486
+ revision=revision,
487
+ cache_dir=cache_dir,
488
+ library_name="keras",
489
+ library_version=get_tf_version(),
490
+ )
491
+ else:
492
+ storage_folder = model_id
493
+
494
+ # TODO: change this in a future PR. We are not returning a KerasModelHubMixin instance here...
495
+ model = keras.models.load_model(storage_folder)
496
+
497
+ # For now, we add a new attribute, config, to store the config loaded from the hub/a local dir.
498
+ model.config = config
499
+
500
+ return model
meow/lib/python3.13/site-packages/huggingface_hub/repocard.py ADDED
@@ -0,0 +1,830 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from pathlib import Path
4
+ from typing import Any, Dict, Literal, Optional, Type, Union
5
+
6
+ import requests
7
+ import yaml
8
+
9
+ from huggingface_hub.file_download import hf_hub_download
10
+ from huggingface_hub.hf_api import upload_file
11
+ from huggingface_hub.repocard_data import (
12
+ CardData,
13
+ DatasetCardData,
14
+ EvalResult,
15
+ ModelCardData,
16
+ SpaceCardData,
17
+ eval_results_to_model_index,
18
+ model_index_to_eval_results,
19
+ )
20
+ from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
21
+
22
+ from . import constants
23
+ from .errors import EntryNotFoundError
24
+ from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
25
+
26
+
27
+ logger = logging.get_logger(__name__)
28
+
29
+
30
+ TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md"
31
+ TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md"
32
+
33
+ # exact same regex as in the Hub server. Please keep in sync.
34
+ # See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18
35
+ REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))")
36
+
37
+
38
+ class RepoCard:
39
+ card_data_class = CardData
40
+ default_template_path = TEMPLATE_MODELCARD_PATH
41
+ repo_type = "model"
42
+
43
+ def __init__(self, content: str, ignore_metadata_errors: bool = False):
44
+ """Initialize a RepoCard from string content. The content should be a
45
+ Markdown file with a YAML block at the beginning and a Markdown body.
46
+
47
+ Args:
48
+ content (`str`): The content of the Markdown file.
49
+
50
+ Example:
51
+ ```python
52
+ >>> from huggingface_hub.repocard import RepoCard
53
+ >>> text = '''
54
+ ... ---
55
+ ... language: en
56
+ ... license: mit
57
+ ... ---
58
+ ...
59
+ ... # My repo
60
+ ... '''
61
+ >>> card = RepoCard(text)
62
+ >>> card.data.to_dict()
63
+ {'language': 'en', 'license': 'mit'}
64
+ >>> card.text
65
+ '\\n# My repo\\n'
66
+
67
+ ```
68
+ <Tip>
69
+ Raises the following error:
70
+
71
+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
72
+ when the content of the repo card metadata is not a dictionary.
73
+
74
+ </Tip>
75
+ """
76
+
77
+ # Set the content of the RepoCard, as well as underlying .data and .text attributes.
78
+ # See the `content` property setter for more details.
79
+ self.ignore_metadata_errors = ignore_metadata_errors
80
+ self.content = content
81
+
82
+ @property
83
+ def content(self):
84
+ """The content of the RepoCard, including the YAML block and the Markdown body."""
85
+ line_break = _detect_line_ending(self._content) or "\n"
86
+ return f"---{line_break}{self.data.to_yaml(line_break=line_break, original_order=self._original_order)}{line_break}---{line_break}{self.text}"
87
+
88
+ @content.setter
89
+ def content(self, content: str):
90
+ """Set the content of the RepoCard."""
91
+ self._content = content
92
+
93
+ match = REGEX_YAML_BLOCK.search(content)
94
+ if match:
95
+ # Metadata found in the YAML block
96
+ yaml_block = match.group(2)
97
+ self.text = content[match.end() :]
98
+ data_dict = yaml.safe_load(yaml_block)
99
+
100
+ if data_dict is None:
101
+ data_dict = {}
102
+
103
+ # The YAML block's data should be a dictionary
104
+ if not isinstance(data_dict, dict):
105
+ raise ValueError("repo card metadata block should be a dict")
106
+ else:
107
+ # Model card without metadata... create empty metadata
108
+ logger.warning("Repo card metadata block was not found. Setting CardData to empty.")
109
+ data_dict = {}
110
+ self.text = content
111
+
112
+ self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors)
113
+ self._original_order = list(data_dict.keys())
114
+
115
+ def __str__(self):
116
+ return self.content
117
+
118
+ def save(self, filepath: Union[Path, str]):
119
+ r"""Save a RepoCard to a file.
120
+
121
+ Args:
122
+ filepath (`Union[Path, str]`): Filepath to the markdown file to save.
123
+
124
+ Example:
125
+ ```python
126
+ >>> from huggingface_hub.repocard import RepoCard
127
+ >>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card")
128
+ >>> card.save("/tmp/test.md")
129
+
130
+ ```
131
+ """
132
+ filepath = Path(filepath)
133
+ filepath.parent.mkdir(parents=True, exist_ok=True)
134
+ # Preserve newlines as in the existing file.
135
+ with open(filepath, mode="w", newline="", encoding="utf-8") as f:
136
+ f.write(str(self))
137
+
138
+ @classmethod
139
+ def load(
140
+ cls,
141
+ repo_id_or_path: Union[str, Path],
142
+ repo_type: Optional[str] = None,
143
+ token: Optional[str] = None,
144
+ ignore_metadata_errors: bool = False,
145
+ ):
146
+ """Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath.
147
+
148
+ Args:
149
+ repo_id_or_path (`Union[str, Path]`):
150
+ The repo ID associated with a Hugging Face Hub repo or a local filepath.
151
+ repo_type (`str`, *optional*):
152
+ The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options
153
+ are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
154
+ class, the default value will be the child class's `repo_type`.
155
+ token (`str`, *optional*):
156
+ Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token.
157
+ ignore_metadata_errors (`str`):
158
+ If True, errors while parsing the metadata section will be ignored. Some information might be lost during
159
+ the process. Use it at your own risk.
160
+
161
+ Returns:
162
+ [`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's
163
+ README.md file or filepath.
164
+
165
+ Example:
166
+ ```python
167
+ >>> from huggingface_hub.repocard import RepoCard
168
+ >>> card = RepoCard.load("nateraw/food")
169
+ >>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"]
170
+
171
+ ```
172
+ """
173
+
174
+ if Path(repo_id_or_path).exists():
175
+ card_path = Path(repo_id_or_path)
176
+ elif isinstance(repo_id_or_path, str):
177
+ card_path = Path(
178
+ hf_hub_download(
179
+ repo_id_or_path,
180
+ constants.REPOCARD_NAME,
181
+ repo_type=repo_type or cls.repo_type,
182
+ token=token,
183
+ )
184
+ )
185
+ else:
186
+ raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).")
187
+
188
+ # Preserve newlines in the existing file.
189
+ with card_path.open(mode="r", newline="", encoding="utf-8") as f:
190
+ return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors)
191
+
192
+ def validate(self, repo_type: Optional[str] = None):
193
+ """Validates card against Hugging Face Hub's card validation logic.
194
+ Using this function requires access to the internet, so it is only called
195
+ internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`].
196
+
197
+ Args:
198
+ repo_type (`str`, *optional*, defaults to "model"):
199
+ The type of Hugging Face repo to push to. Options are "model", "dataset", and "space".
200
+ If this function is called from a child class, the default will be the child class's `repo_type`.
201
+
202
+ <Tip>
203
+ Raises the following errors:
204
+
205
+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
206
+ if the card fails validation checks.
207
+ - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
208
+ if the request to the Hub API fails for any other reason.
209
+
210
+ </Tip>
211
+ """
212
+
213
+ # If repo type is provided, otherwise, use the repo type of the card.
214
+ repo_type = repo_type or self.repo_type
215
+
216
+ body = {
217
+ "repoType": repo_type,
218
+ "content": str(self),
219
+ }
220
+ headers = {"Accept": "text/plain"}
221
+
222
+ try:
223
+ r = get_session().post("https://huggingface.co/api/validate-yaml", body, headers=headers)
224
+ r.raise_for_status()
225
+ except requests.exceptions.HTTPError as exc:
226
+ if r.status_code == 400:
227
+ raise ValueError(r.text)
228
+ else:
229
+ raise exc
230
+
231
+ def push_to_hub(
232
+ self,
233
+ repo_id: str,
234
+ token: Optional[str] = None,
235
+ repo_type: Optional[str] = None,
236
+ commit_message: Optional[str] = None,
237
+ commit_description: Optional[str] = None,
238
+ revision: Optional[str] = None,
239
+ create_pr: Optional[bool] = None,
240
+ parent_commit: Optional[str] = None,
241
+ ):
242
+ """Push a RepoCard to a Hugging Face Hub repo.
243
+
244
+ Args:
245
+ repo_id (`str`):
246
+ The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food".
247
+ token (`str`, *optional*):
248
+ Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to
249
+ the stored token.
250
+ repo_type (`str`, *optional*, defaults to "model"):
251
+ The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this
252
+ function is called by a child class, it will default to the child class's `repo_type`.
253
+ commit_message (`str`, *optional*):
254
+ The summary / title / first line of the generated commit.
255
+ commit_description (`str`, *optional*)
256
+ The description of the generated commit.
257
+ revision (`str`, *optional*):
258
+ The git revision to commit from. Defaults to the head of the `"main"` branch.
259
+ create_pr (`bool`, *optional*):
260
+ Whether or not to create a Pull Request with this commit. Defaults to `False`.
261
+ parent_commit (`str`, *optional*):
262
+ The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
263
+ If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
264
+ If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
265
+ Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
266
+ especially useful if the repo is updated / committed to concurrently.
267
+ Returns:
268
+ `str`: URL of the commit which updated the card metadata.
269
+ """
270
+
271
+ # If repo type is provided, otherwise, use the repo type of the card.
272
+ repo_type = repo_type or self.repo_type
273
+
274
+ # Validate card before pushing to hub
275
+ self.validate(repo_type=repo_type)
276
+
277
+ with SoftTemporaryDirectory() as tmpdir:
278
+ tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
279
+ tmp_path.write_text(str(self))
280
+ url = upload_file(
281
+ path_or_fileobj=str(tmp_path),
282
+ path_in_repo=constants.REPOCARD_NAME,
283
+ repo_id=repo_id,
284
+ token=token,
285
+ repo_type=repo_type,
286
+ commit_message=commit_message,
287
+ commit_description=commit_description,
288
+ create_pr=create_pr,
289
+ revision=revision,
290
+ parent_commit=parent_commit,
291
+ )
292
+ return url
293
+
294
+ @classmethod
295
+ def from_template(
296
+ cls,
297
+ card_data: CardData,
298
+ template_path: Optional[str] = None,
299
+ template_str: Optional[str] = None,
300
+ **template_kwargs,
301
+ ):
302
+ """Initialize a RepoCard from a template. By default, it uses the default template.
303
+
304
+ Templates are Jinja2 templates that can be customized by passing keyword arguments.
305
+
306
+ Args:
307
+ card_data (`huggingface_hub.CardData`):
308
+ A huggingface_hub.CardData instance containing the metadata you want to include in the YAML
309
+ header of the repo card on the Hugging Face Hub.
310
+ template_path (`str`, *optional*):
311
+ A path to a markdown file with optional Jinja template variables that can be filled
312
+ in with `template_kwargs`. Defaults to the default template.
313
+
314
+ Returns:
315
+ [`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the
316
+ template.
317
+ """
318
+ if is_jinja_available():
319
+ import jinja2
320
+ else:
321
+ raise ImportError(
322
+ "Using RepoCard.from_template requires Jinja2 to be installed. Please"
323
+ " install it with `pip install Jinja2`."
324
+ )
325
+
326
+ kwargs = card_data.to_dict().copy()
327
+ kwargs.update(template_kwargs) # Template_kwargs have priority
328
+
329
+ if template_path is not None:
330
+ template_str = Path(template_path).read_text()
331
+ if template_str is None:
332
+ template_str = Path(cls.default_template_path).read_text()
333
+ template = jinja2.Template(template_str)
334
+ content = template.render(card_data=card_data.to_yaml(), **kwargs)
335
+ return cls(content)
336
+
337
+
338
+ class ModelCard(RepoCard):
339
+ card_data_class = ModelCardData
340
+ default_template_path = TEMPLATE_MODELCARD_PATH
341
+ repo_type = "model"
342
+
343
+ @classmethod
344
+ def from_template( # type: ignore # violates Liskov property but easier to use
345
+ cls,
346
+ card_data: ModelCardData,
347
+ template_path: Optional[str] = None,
348
+ template_str: Optional[str] = None,
349
+ **template_kwargs,
350
+ ):
351
+ """Initialize a ModelCard from a template. By default, it uses the default template, which can be found here:
352
+ https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md
353
+
354
+ Templates are Jinja2 templates that can be customized by passing keyword arguments.
355
+
356
+ Args:
357
+ card_data (`huggingface_hub.ModelCardData`):
358
+ A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML
359
+ header of the model card on the Hugging Face Hub.
360
+ template_path (`str`, *optional*):
361
+ A path to a markdown file with optional Jinja template variables that can be filled
362
+ in with `template_kwargs`. Defaults to the default template.
363
+
364
+ Returns:
365
+ [`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the
366
+ template.
367
+
368
+ Example:
369
+ ```python
370
+ >>> from huggingface_hub import ModelCard, ModelCardData, EvalResult
371
+
372
+ >>> # Using the Default Template
373
+ >>> card_data = ModelCardData(
374
+ ... language='en',
375
+ ... license='mit',
376
+ ... library_name='timm',
377
+ ... tags=['image-classification', 'resnet'],
378
+ ... datasets=['beans'],
379
+ ... metrics=['accuracy'],
380
+ ... )
381
+ >>> card = ModelCard.from_template(
382
+ ... card_data,
383
+ ... model_description='This model does x + y...'
384
+ ... )
385
+
386
+ >>> # Including Evaluation Results
387
+ >>> card_data = ModelCardData(
388
+ ... language='en',
389
+ ... tags=['image-classification', 'resnet'],
390
+ ... eval_results=[
391
+ ... EvalResult(
392
+ ... task_type='image-classification',
393
+ ... dataset_type='beans',
394
+ ... dataset_name='Beans',
395
+ ... metric_type='accuracy',
396
+ ... metric_value=0.9,
397
+ ... ),
398
+ ... ],
399
+ ... model_name='my-cool-model',
400
+ ... )
401
+ >>> card = ModelCard.from_template(card_data)
402
+
403
+ >>> # Using a Custom Template
404
+ >>> card_data = ModelCardData(
405
+ ... language='en',
406
+ ... tags=['image-classification', 'resnet']
407
+ ... )
408
+ >>> card = ModelCard.from_template(
409
+ ... card_data=card_data,
410
+ ... template_path='./src/huggingface_hub/templates/modelcard_template.md',
411
+ ... custom_template_var='custom value', # will be replaced in template if it exists
412
+ ... )
413
+
414
+ ```
415
+ """
416
+ return super().from_template(card_data, template_path, template_str, **template_kwargs)
417
+
418
+
419
+ class DatasetCard(RepoCard):
420
+ card_data_class = DatasetCardData
421
+ default_template_path = TEMPLATE_DATASETCARD_PATH
422
+ repo_type = "dataset"
423
+
424
+ @classmethod
425
+ def from_template( # type: ignore # violates Liskov property but easier to use
426
+ cls,
427
+ card_data: DatasetCardData,
428
+ template_path: Optional[str] = None,
429
+ template_str: Optional[str] = None,
430
+ **template_kwargs,
431
+ ):
432
+ """Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here:
433
+ https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md
434
+
435
+ Templates are Jinja2 templates that can be customized by passing keyword arguments.
436
+
437
+ Args:
438
+ card_data (`huggingface_hub.DatasetCardData`):
439
+ A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML
440
+ header of the dataset card on the Hugging Face Hub.
441
+ template_path (`str`, *optional*):
442
+ A path to a markdown file with optional Jinja template variables that can be filled
443
+ in with `template_kwargs`. Defaults to the default template.
444
+
445
+ Returns:
446
+ [`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the
447
+ template.
448
+
449
+ Example:
450
+ ```python
451
+ >>> from huggingface_hub import DatasetCard, DatasetCardData
452
+
453
+ >>> # Using the Default Template
454
+ >>> card_data = DatasetCardData(
455
+ ... language='en',
456
+ ... license='mit',
457
+ ... annotations_creators='crowdsourced',
458
+ ... task_categories=['text-classification'],
459
+ ... task_ids=['sentiment-classification', 'text-scoring'],
460
+ ... multilinguality='monolingual',
461
+ ... pretty_name='My Text Classification Dataset',
462
+ ... )
463
+ >>> card = DatasetCard.from_template(
464
+ ... card_data,
465
+ ... pretty_name=card_data.pretty_name,
466
+ ... )
467
+
468
+ >>> # Using a Custom Template
469
+ >>> card_data = DatasetCardData(
470
+ ... language='en',
471
+ ... license='mit',
472
+ ... )
473
+ >>> card = DatasetCard.from_template(
474
+ ... card_data=card_data,
475
+ ... template_path='./src/huggingface_hub/templates/datasetcard_template.md',
476
+ ... custom_template_var='custom value', # will be replaced in template if it exists
477
+ ... )
478
+
479
+ ```
480
+ """
481
+ return super().from_template(card_data, template_path, template_str, **template_kwargs)
482
+
483
+
484
+ class SpaceCard(RepoCard):
485
+ card_data_class = SpaceCardData
486
+ default_template_path = TEMPLATE_MODELCARD_PATH
487
+ repo_type = "space"
488
+
489
+
490
+ def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # noqa: F722
491
+ """Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines.
492
+
493
+ Uses same implementation as in Hub server, keep it in sync.
494
+
495
+ Returns:
496
+ str: The detected line ending of the string.
497
+ """
498
+ cr = content.count("\r")
499
+ lf = content.count("\n")
500
+ crlf = content.count("\r\n")
501
+ if cr + lf == 0:
502
+ return None
503
+ if crlf == cr and crlf == lf:
504
+ return "\r\n"
505
+ if cr > lf:
506
+ return "\r"
507
+ else:
508
+ return "\n"
509
+
510
+
511
+ def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]:
512
+ content = Path(local_path).read_text()
513
+ match = REGEX_YAML_BLOCK.search(content)
514
+ if match:
515
+ yaml_block = match.group(2)
516
+ data = yaml.safe_load(yaml_block)
517
+ if data is None or isinstance(data, dict):
518
+ return data
519
+ raise ValueError("repo card metadata block should be a dict")
520
+ else:
521
+ return None
522
+
523
+
524
+ def metadata_save(local_path: Union[str, Path], data: Dict) -> None:
525
+ """
526
+ Save the metadata dict in the upper YAML part Trying to preserve newlines as
527
+ in the existing file. Docs about open() with newline="" parameter:
528
+ https://docs.python.org/3/library/functions.html?highlight=open#open Does
529
+ not work with "^M" linebreaks, which are replaced by \n
530
+ """
531
+ line_break = "\n"
532
+ content = ""
533
+ # try to detect existing newline character
534
+ if os.path.exists(local_path):
535
+ with open(local_path, "r", newline="", encoding="utf8") as readme:
536
+ content = readme.read()
537
+ if isinstance(readme.newlines, tuple):
538
+ line_break = readme.newlines[0]
539
+ elif isinstance(readme.newlines, str):
540
+ line_break = readme.newlines
541
+
542
+ # creates a new file if it not
543
+ with open(local_path, "w", newline="", encoding="utf8") as readme:
544
+ data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break)
545
+ # sort_keys: keep dict order
546
+ match = REGEX_YAML_BLOCK.search(content)
547
+ if match:
548
+ output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :]
549
+ else:
550
+ output = f"---{line_break}{data_yaml}---{line_break}{content}"
551
+
552
+ readme.write(output)
553
+ readme.close()
554
+
555
+
556
+ def metadata_eval_result(
557
+ *,
558
+ model_pretty_name: str,
559
+ task_pretty_name: str,
560
+ task_id: str,
561
+ metrics_pretty_name: str,
562
+ metrics_id: str,
563
+ metrics_value: Any,
564
+ dataset_pretty_name: str,
565
+ dataset_id: str,
566
+ metrics_config: Optional[str] = None,
567
+ metrics_verified: bool = False,
568
+ dataset_config: Optional[str] = None,
569
+ dataset_split: Optional[str] = None,
570
+ dataset_revision: Optional[str] = None,
571
+ metrics_verification_token: Optional[str] = None,
572
+ ) -> Dict:
573
+ """
574
+ Creates a metadata dict with the result from a model evaluated on a dataset.
575
+
576
+ Args:
577
+ model_pretty_name (`str`):
578
+ The name of the model in natural language.
579
+ task_pretty_name (`str`):
580
+ The name of a task in natural language.
581
+ task_id (`str`):
582
+ Example: automatic-speech-recognition. A task id.
583
+ metrics_pretty_name (`str`):
584
+ A name for the metric in natural language. Example: Test WER.
585
+ metrics_id (`str`):
586
+ Example: wer. A metric id from https://hf.co/metrics.
587
+ metrics_value (`Any`):
588
+ The value from the metric. Example: 20.0 or "20.0 ± 1.2".
589
+ dataset_pretty_name (`str`):
590
+ The name of the dataset in natural language.
591
+ dataset_id (`str`):
592
+ Example: common_voice. A dataset id from https://hf.co/datasets.
593
+ metrics_config (`str`, *optional*):
594
+ The name of the metric configuration used in `load_metric()`.
595
+ Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
596
+ metrics_verified (`bool`, *optional*, defaults to `False`):
597
+ Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
598
+ dataset_config (`str`, *optional*):
599
+ Example: fr. The name of the dataset configuration used in `load_dataset()`.
600
+ dataset_split (`str`, *optional*):
601
+ Example: test. The name of the dataset split used in `load_dataset()`.
602
+ dataset_revision (`str`, *optional*):
603
+ Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision
604
+ used in `load_dataset()`.
605
+ metrics_verification_token (`bool`, *optional*):
606
+ A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
607
+
608
+ Returns:
609
+ `dict`: a metadata dict with the result from a model evaluated on a dataset.
610
+
611
+ Example:
612
+ ```python
613
+ >>> from huggingface_hub import metadata_eval_result
614
+ >>> results = metadata_eval_result(
615
+ ... model_pretty_name="RoBERTa fine-tuned on ReactionGIF",
616
+ ... task_pretty_name="Text Classification",
617
+ ... task_id="text-classification",
618
+ ... metrics_pretty_name="Accuracy",
619
+ ... metrics_id="accuracy",
620
+ ... metrics_value=0.2662102282047272,
621
+ ... dataset_pretty_name="ReactionJPEG",
622
+ ... dataset_id="julien-c/reactionjpeg",
623
+ ... dataset_config="default",
624
+ ... dataset_split="test",
625
+ ... )
626
+ >>> results == {
627
+ ... 'model-index': [
628
+ ... {
629
+ ... 'name': 'RoBERTa fine-tuned on ReactionGIF',
630
+ ... 'results': [
631
+ ... {
632
+ ... 'task': {
633
+ ... 'type': 'text-classification',
634
+ ... 'name': 'Text Classification'
635
+ ... },
636
+ ... 'dataset': {
637
+ ... 'name': 'ReactionJPEG',
638
+ ... 'type': 'julien-c/reactionjpeg',
639
+ ... 'config': 'default',
640
+ ... 'split': 'test'
641
+ ... },
642
+ ... 'metrics': [
643
+ ... {
644
+ ... 'type': 'accuracy',
645
+ ... 'value': 0.2662102282047272,
646
+ ... 'name': 'Accuracy',
647
+ ... 'verified': False
648
+ ... }
649
+ ... ]
650
+ ... }
651
+ ... ]
652
+ ... }
653
+ ... ]
654
+ ... }
655
+ True
656
+
657
+ ```
658
+ """
659
+
660
+ return {
661
+ "model-index": eval_results_to_model_index(
662
+ model_name=model_pretty_name,
663
+ eval_results=[
664
+ EvalResult(
665
+ task_name=task_pretty_name,
666
+ task_type=task_id,
667
+ metric_name=metrics_pretty_name,
668
+ metric_type=metrics_id,
669
+ metric_value=metrics_value,
670
+ dataset_name=dataset_pretty_name,
671
+ dataset_type=dataset_id,
672
+ metric_config=metrics_config,
673
+ verified=metrics_verified,
674
+ verify_token=metrics_verification_token,
675
+ dataset_config=dataset_config,
676
+ dataset_split=dataset_split,
677
+ dataset_revision=dataset_revision,
678
+ )
679
+ ],
680
+ )
681
+ }
682
+
683
+
684
+ @validate_hf_hub_args
685
+ def metadata_update(
686
+ repo_id: str,
687
+ metadata: Dict,
688
+ *,
689
+ repo_type: Optional[str] = None,
690
+ overwrite: bool = False,
691
+ token: Optional[str] = None,
692
+ commit_message: Optional[str] = None,
693
+ commit_description: Optional[str] = None,
694
+ revision: Optional[str] = None,
695
+ create_pr: bool = False,
696
+ parent_commit: Optional[str] = None,
697
+ ) -> str:
698
+ """
699
+ Updates the metadata in the README.md of a repository on the Hugging Face Hub.
700
+ If the README.md file doesn't exist yet, a new one is created with metadata and an
701
+ the default ModelCard or DatasetCard template. For `space` repo, an error is thrown
702
+ as a Space cannot exist without a `README.md` file.
703
+
704
+ Args:
705
+ repo_id (`str`):
706
+ The name of the repository.
707
+ metadata (`dict`):
708
+ A dictionary containing the metadata to be updated.
709
+ repo_type (`str`, *optional*):
710
+ Set to `"dataset"` or `"space"` if updating to a dataset or space,
711
+ `None` or `"model"` if updating to a model. Default is `None`.
712
+ overwrite (`bool`, *optional*, defaults to `False`):
713
+ If set to `True` an existing field can be overwritten, otherwise
714
+ attempting to overwrite an existing field will cause an error.
715
+ token (`str`, *optional*):
716
+ The Hugging Face authentication token.
717
+ commit_message (`str`, *optional*):
718
+ The summary / title / first line of the generated commit. Defaults to
719
+ `f"Update metadata with huggingface_hub"`
720
+ commit_description (`str` *optional*)
721
+ The description of the generated commit
722
+ revision (`str`, *optional*):
723
+ The git revision to commit from. Defaults to the head of the
724
+ `"main"` branch.
725
+ create_pr (`boolean`, *optional*):
726
+ Whether or not to create a Pull Request from `revision` with that commit.
727
+ Defaults to `False`.
728
+ parent_commit (`str`, *optional*):
729
+ The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
730
+ If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
731
+ If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
732
+ Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
733
+ especially useful if the repo is updated / committed to concurrently.
734
+ Returns:
735
+ `str`: URL of the commit which updated the card metadata.
736
+
737
+ Example:
738
+ ```python
739
+ >>> from huggingface_hub import metadata_update
740
+ >>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF',
741
+ ... 'results': [{'dataset': {'name': 'ReactionGIF',
742
+ ... 'type': 'julien-c/reactiongif'},
743
+ ... 'metrics': [{'name': 'Recall',
744
+ ... 'type': 'recall',
745
+ ... 'value': 0.7762102282047272}],
746
+ ... 'task': {'name': 'Text Classification',
747
+ ... 'type': 'text-classification'}}]}]}
748
+ >>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata)
749
+
750
+ ```
751
+ """
752
+ commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
753
+
754
+ # Card class given repo_type
755
+ card_class: Type[RepoCard]
756
+ if repo_type is None or repo_type == "model":
757
+ card_class = ModelCard
758
+ elif repo_type == "dataset":
759
+ card_class = DatasetCard
760
+ elif repo_type == "space":
761
+ card_class = RepoCard
762
+ else:
763
+ raise ValueError(f"Unknown repo_type: {repo_type}")
764
+
765
+ # Either load repo_card from the Hub or create an empty one.
766
+ # NOTE: Will not create the repo if it doesn't exist.
767
+ try:
768
+ card = card_class.load(repo_id, token=token, repo_type=repo_type)
769
+ except EntryNotFoundError:
770
+ if repo_type == "space":
771
+ raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
772
+
773
+ # Initialize a ModelCard or DatasetCard from default template and no data.
774
+ card = card_class.from_template(CardData())
775
+
776
+ for key, value in metadata.items():
777
+ if key == "model-index":
778
+ # if the new metadata doesn't include a name, either use existing one or repo name
779
+ if "name" not in value[0]:
780
+ value[0]["name"] = getattr(card, "model_name", repo_id)
781
+ model_name, new_results = model_index_to_eval_results(value)
782
+ if card.data.eval_results is None:
783
+ card.data.eval_results = new_results
784
+ card.data.model_name = model_name
785
+ else:
786
+ existing_results = card.data.eval_results
787
+
788
+ # Iterate over new results
789
+ # Iterate over existing results
790
+ # If both results describe the same metric but value is different:
791
+ # If overwrite=True: overwrite the metric value
792
+ # Else: raise ValueError
793
+ # Else: append new result to existing ones.
794
+ for new_result in new_results:
795
+ result_found = False
796
+ for existing_result in existing_results:
797
+ if new_result.is_equal_except_value(existing_result):
798
+ if new_result != existing_result and not overwrite:
799
+ raise ValueError(
800
+ "You passed a new value for the existing metric"
801
+ f" 'name: {new_result.metric_name}, type: "
802
+ f"{new_result.metric_type}'. Set `overwrite=True`"
803
+ " to overwrite existing metrics."
804
+ )
805
+ result_found = True
806
+ existing_result.metric_value = new_result.metric_value
807
+ if existing_result.verified is True:
808
+ existing_result.verify_token = new_result.verify_token
809
+ if not result_found:
810
+ card.data.eval_results.append(new_result)
811
+ else:
812
+ # Any metadata that is not a result metric
813
+ if card.data.get(key) is not None and not overwrite and card.data.get(key) != value:
814
+ raise ValueError(
815
+ f"You passed a new value for the existing meta data field '{key}'."
816
+ " Set `overwrite=True` to overwrite existing metadata."
817
+ )
818
+ else:
819
+ card.data[key] = value
820
+
821
+ return card.push_to_hub(
822
+ repo_id,
823
+ token=token,
824
+ repo_type=repo_type,
825
+ commit_message=commit_message,
826
+ commit_description=commit_description,
827
+ create_pr=create_pr,
828
+ revision=revision,
829
+ parent_commit=parent_commit,
830
+ )
meow/lib/python3.13/site-packages/huggingface_hub/repocard_data.py ADDED
@@ -0,0 +1,749 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ from collections import defaultdict
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional, Tuple, Union
5
+
6
+ from huggingface_hub.utils import logging, yaml_dump
7
+
8
+
9
+ logger = logging.get_logger(__name__)
10
+
11
+
12
+ @dataclass
13
+ class EvalResult:
14
+ """
15
+ Flattened representation of individual evaluation results found in model-index of Model Cards.
16
+
17
+ For more information on the model-index spec, see https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1.
18
+
19
+ Args:
20
+ task_type (`str`):
21
+ The task identifier. Example: "image-classification".
22
+ dataset_type (`str`):
23
+ The dataset identifier. Example: "common_voice". Use dataset id from https://hf.co/datasets.
24
+ dataset_name (`str`):
25
+ A pretty name for the dataset. Example: "Common Voice (French)".
26
+ metric_type (`str`):
27
+ The metric identifier. Example: "wer". Use metric id from https://hf.co/metrics.
28
+ metric_value (`Any`):
29
+ The metric value. Example: 0.9 or "20.0 ± 1.2".
30
+ task_name (`str`, *optional*):
31
+ A pretty name for the task. Example: "Speech Recognition".
32
+ dataset_config (`str`, *optional*):
33
+ The name of the dataset configuration used in `load_dataset()`.
34
+ Example: fr in `load_dataset("common_voice", "fr")`. See the `datasets` docs for more info:
35
+ https://hf.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name
36
+ dataset_split (`str`, *optional*):
37
+ The split used in `load_dataset()`. Example: "test".
38
+ dataset_revision (`str`, *optional*):
39
+ The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
40
+ Example: 5503434ddd753f426f4b38109466949a1217c2bb
41
+ dataset_args (`Dict[str, Any]`, *optional*):
42
+ The arguments passed during `Metric.compute()`. Example for `bleu`: `{"max_order": 4}`
43
+ metric_name (`str`, *optional*):
44
+ A pretty name for the metric. Example: "Test WER".
45
+ metric_config (`str`, *optional*):
46
+ The name of the metric configuration used in `load_metric()`.
47
+ Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
48
+ See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
49
+ metric_args (`Dict[str, Any]`, *optional*):
50
+ The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4
51
+ verified (`bool`, *optional*):
52
+ Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
53
+ verify_token (`str`, *optional*):
54
+ A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
55
+ source_name (`str`, *optional*):
56
+ The name of the source of the evaluation result. Example: "Open LLM Leaderboard".
57
+ source_url (`str`, *optional*):
58
+ The URL of the source of the evaluation result. Example: "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard".
59
+ """
60
+
61
+ # Required
62
+
63
+ # The task identifier
64
+ # Example: automatic-speech-recognition
65
+ task_type: str
66
+
67
+ # The dataset identifier
68
+ # Example: common_voice. Use dataset id from https://hf.co/datasets
69
+ dataset_type: str
70
+
71
+ # A pretty name for the dataset.
72
+ # Example: Common Voice (French)
73
+ dataset_name: str
74
+
75
+ # The metric identifier
76
+ # Example: wer. Use metric id from https://hf.co/metrics
77
+ metric_type: str
78
+
79
+ # Value of the metric.
80
+ # Example: 20.0 or "20.0 ± 1.2"
81
+ metric_value: Any
82
+
83
+ # Optional
84
+
85
+ # A pretty name for the task.
86
+ # Example: Speech Recognition
87
+ task_name: Optional[str] = None
88
+
89
+ # The name of the dataset configuration used in `load_dataset()`.
90
+ # Example: fr in `load_dataset("common_voice", "fr")`.
91
+ # See the `datasets` docs for more info:
92
+ # https://huggingface.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name
93
+ dataset_config: Optional[str] = None
94
+
95
+ # The split used in `load_dataset()`.
96
+ # Example: test
97
+ dataset_split: Optional[str] = None
98
+
99
+ # The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
100
+ # Example: 5503434ddd753f426f4b38109466949a1217c2bb
101
+ dataset_revision: Optional[str] = None
102
+
103
+ # The arguments passed during `Metric.compute()`.
104
+ # Example for `bleu`: max_order: 4
105
+ dataset_args: Optional[Dict[str, Any]] = None
106
+
107
+ # A pretty name for the metric.
108
+ # Example: Test WER
109
+ metric_name: Optional[str] = None
110
+
111
+ # The name of the metric configuration used in `load_metric()`.
112
+ # Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
113
+ # See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
114
+ metric_config: Optional[str] = None
115
+
116
+ # The arguments passed during `Metric.compute()`.
117
+ # Example for `bleu`: max_order: 4
118
+ metric_args: Optional[Dict[str, Any]] = None
119
+
120
+ # Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
121
+ verified: Optional[bool] = None
122
+
123
+ # A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
124
+ verify_token: Optional[str] = None
125
+
126
+ # The name of the source of the evaluation result.
127
+ # Example: Open LLM Leaderboard
128
+ source_name: Optional[str] = None
129
+
130
+ # The URL of the source of the evaluation result.
131
+ # Example: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard
132
+ source_url: Optional[str] = None
133
+
134
+ @property
135
+ def unique_identifier(self) -> tuple:
136
+ """Returns a tuple that uniquely identifies this evaluation."""
137
+ return (
138
+ self.task_type,
139
+ self.dataset_type,
140
+ self.dataset_config,
141
+ self.dataset_split,
142
+ self.dataset_revision,
143
+ )
144
+
145
+ def is_equal_except_value(self, other: "EvalResult") -> bool:
146
+ """
147
+ Return True if `self` and `other` describe exactly the same metric but with a
148
+ different value.
149
+ """
150
+ for key, _ in self.__dict__.items():
151
+ if key == "metric_value":
152
+ continue
153
+ # For metrics computed by Hugging Face's evaluation service, `verify_token` is derived from `metric_value`,
154
+ # so we exclude it here in the comparison.
155
+ if key != "verify_token" and getattr(self, key) != getattr(other, key):
156
+ return False
157
+ return True
158
+
159
+ def __post_init__(self) -> None:
160
+ if self.source_name is not None and self.source_url is None:
161
+ raise ValueError("If `source_name` is provided, `source_url` must also be provided.")
162
+
163
+
164
+ @dataclass
165
+ class CardData:
166
+ """Structure containing metadata from a RepoCard.
167
+
168
+ [`CardData`] is the parent class of [`ModelCardData`] and [`DatasetCardData`].
169
+
170
+ Metadata can be exported as a dictionary or YAML. Export can be customized to alter the representation of the data
171
+ (example: flatten evaluation results). `CardData` behaves as a dictionary (can get, pop, set values) but do not
172
+ inherit from `dict` to allow this export step.
173
+ """
174
+
175
+ def __init__(self, ignore_metadata_errors: bool = False, **kwargs):
176
+ self.__dict__.update(kwargs)
177
+
178
+ def to_dict(self):
179
+ """Converts CardData to a dict.
180
+
181
+ Returns:
182
+ `dict`: CardData represented as a dictionary ready to be dumped to a YAML
183
+ block for inclusion in a README.md file.
184
+ """
185
+
186
+ data_dict = copy.deepcopy(self.__dict__)
187
+ self._to_dict(data_dict)
188
+ return {key: value for key, value in data_dict.items() if value is not None}
189
+
190
+ def _to_dict(self, data_dict):
191
+ """Use this method in child classes to alter the dict representation of the data. Alter the dict in-place.
192
+
193
+ Args:
194
+ data_dict (`dict`): The raw dict representation of the card data.
195
+ """
196
+ pass
197
+
198
+ def to_yaml(self, line_break=None, original_order: Optional[List[str]] = None) -> str:
199
+ """Dumps CardData to a YAML block for inclusion in a README.md file.
200
+
201
+ Args:
202
+ line_break (str, *optional*):
203
+ The line break to use when dumping to yaml.
204
+
205
+ Returns:
206
+ `str`: CardData represented as a YAML block.
207
+ """
208
+ if original_order:
209
+ self.__dict__ = {
210
+ k: self.__dict__[k]
211
+ for k in original_order + list(set(self.__dict__.keys()) - set(original_order))
212
+ if k in self.__dict__
213
+ }
214
+ return yaml_dump(self.to_dict(), sort_keys=False, line_break=line_break).strip()
215
+
216
+ def __repr__(self):
217
+ return repr(self.__dict__)
218
+
219
+ def __str__(self):
220
+ return self.to_yaml()
221
+
222
+ def get(self, key: str, default: Any = None) -> Any:
223
+ """Get value for a given metadata key."""
224
+ return self.__dict__.get(key, default)
225
+
226
+ def pop(self, key: str, default: Any = None) -> Any:
227
+ """Pop value for a given metadata key."""
228
+ return self.__dict__.pop(key, default)
229
+
230
+ def __getitem__(self, key: str) -> Any:
231
+ """Get value for a given metadata key."""
232
+ return self.__dict__[key]
233
+
234
+ def __setitem__(self, key: str, value: Any) -> None:
235
+ """Set value for a given metadata key."""
236
+ self.__dict__[key] = value
237
+
238
+ def __contains__(self, key: str) -> bool:
239
+ """Check if a given metadata key is set."""
240
+ return key in self.__dict__
241
+
242
+ def __len__(self) -> int:
243
+ """Return the number of metadata keys set."""
244
+ return len(self.__dict__)
245
+
246
+
247
+ class ModelCardData(CardData):
248
+ """Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
249
+
250
+ Args:
251
+ base_model (`str` or `List[str]`, *optional*):
252
+ The identifier of the base model from which the model derives. This is applicable for example if your model is a
253
+ fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs
254
+ if your model derives from multiple models). Defaults to None.
255
+ datasets (`Union[str, List[str]]`, *optional*):
256
+ Dataset or list of datasets that were used to train this model. Should be a dataset ID
257
+ found on https://hf.co/datasets. Defaults to None.
258
+ eval_results (`Union[List[EvalResult], EvalResult]`, *optional*):
259
+ List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided,
260
+ `model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`.
261
+ language (`Union[str, List[str]]`, *optional*):
262
+ Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or
263
+ 639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`.
264
+ library_name (`str`, *optional*):
265
+ Name of library used by this model. Example: keras or any library from
266
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries.ts.
267
+ Defaults to None.
268
+ license (`str`, *optional*):
269
+ License of this model. Example: apache-2.0 or any license from
270
+ https://huggingface.co/docs/hub/repositories-licenses. Defaults to None.
271
+ license_name (`str`, *optional*):
272
+ Name of the license of this model. Defaults to None. To be used in conjunction with `license_link`.
273
+ Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a name. In that case, use `license` instead.
274
+ license_link (`str`, *optional*):
275
+ Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`.
276
+ Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead.
277
+ metrics (`List[str]`, *optional*):
278
+ List of metrics used to evaluate this model. Should be a metric name that can be found
279
+ at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
280
+ model_name (`str`, *optional*):
281
+ A name for this model. It is used along with
282
+ `eval_results` to construct the `model-index` within the card's metadata. The name
283
+ you supply here is what will be used on PapersWithCode's leaderboards. If None is provided
284
+ then the repo name is used as a default. Defaults to None.
285
+ pipeline_tag (`str`, *optional*):
286
+ The pipeline tag associated with the model. Example: "text-classification".
287
+ tags (`List[str]`, *optional*):
288
+ List of tags to add to your model that can be used when filtering on the Hugging
289
+ Face Hub. Defaults to None.
290
+ ignore_metadata_errors (`str`):
291
+ If True, errors while parsing the metadata section will be ignored. Some information might be lost during
292
+ the process. Use it at your own risk.
293
+ kwargs (`dict`, *optional*):
294
+ Additional metadata that will be added to the model card. Defaults to None.
295
+
296
+ Example:
297
+ ```python
298
+ >>> from huggingface_hub import ModelCardData
299
+ >>> card_data = ModelCardData(
300
+ ... language="en",
301
+ ... license="mit",
302
+ ... library_name="timm",
303
+ ... tags=['image-classification', 'resnet'],
304
+ ... )
305
+ >>> card_data.to_dict()
306
+ {'language': 'en', 'license': 'mit', 'library_name': 'timm', 'tags': ['image-classification', 'resnet']}
307
+
308
+ ```
309
+ """
310
+
311
+ def __init__(
312
+ self,
313
+ *,
314
+ base_model: Optional[Union[str, List[str]]] = None,
315
+ datasets: Optional[Union[str, List[str]]] = None,
316
+ eval_results: Optional[List[EvalResult]] = None,
317
+ language: Optional[Union[str, List[str]]] = None,
318
+ library_name: Optional[str] = None,
319
+ license: Optional[str] = None,
320
+ license_name: Optional[str] = None,
321
+ license_link: Optional[str] = None,
322
+ metrics: Optional[List[str]] = None,
323
+ model_name: Optional[str] = None,
324
+ pipeline_tag: Optional[str] = None,
325
+ tags: Optional[List[str]] = None,
326
+ ignore_metadata_errors: bool = False,
327
+ **kwargs,
328
+ ):
329
+ self.base_model = base_model
330
+ self.datasets = datasets
331
+ self.eval_results = eval_results
332
+ self.language = language
333
+ self.library_name = library_name
334
+ self.license = license
335
+ self.license_name = license_name
336
+ self.license_link = license_link
337
+ self.metrics = metrics
338
+ self.model_name = model_name
339
+ self.pipeline_tag = pipeline_tag
340
+ self.tags = _to_unique_list(tags)
341
+
342
+ model_index = kwargs.pop("model-index", None)
343
+ if model_index:
344
+ try:
345
+ model_name, eval_results = model_index_to_eval_results(model_index)
346
+ self.model_name = model_name
347
+ self.eval_results = eval_results
348
+ except (KeyError, TypeError) as error:
349
+ if ignore_metadata_errors:
350
+ logger.warning("Invalid model-index. Not loading eval results into CardData.")
351
+ else:
352
+ raise ValueError(
353
+ f"Invalid `model_index` in metadata cannot be parsed: {error.__class__} {error}. Pass"
354
+ " `ignore_metadata_errors=True` to ignore this error while loading a Model Card. Warning:"
355
+ " some information will be lost. Use it at your own risk."
356
+ )
357
+
358
+ super().__init__(**kwargs)
359
+
360
+ if self.eval_results:
361
+ if isinstance(self.eval_results, EvalResult):
362
+ self.eval_results = [self.eval_results]
363
+ if self.model_name is None:
364
+ raise ValueError("Passing `eval_results` requires `model_name` to be set.")
365
+
366
+ def _to_dict(self, data_dict):
367
+ """Format the internal data dict. In this case, we convert eval results to a valid model index"""
368
+ if self.eval_results is not None:
369
+ data_dict["model-index"] = eval_results_to_model_index(self.model_name, self.eval_results)
370
+ del data_dict["eval_results"], data_dict["model_name"]
371
+
372
+
373
+ class DatasetCardData(CardData):
374
+ """Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
375
+
376
+ Args:
377
+ language (`List[str]`, *optional*):
378
+ Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or
379
+ 639-3 code (two/three letters), or a special value like "code", "multilingual".
380
+ license (`Union[str, List[str]]`, *optional*):
381
+ License(s) of this dataset. Example: apache-2.0 or any license from
382
+ https://huggingface.co/docs/hub/repositories-licenses.
383
+ annotations_creators (`Union[str, List[str]]`, *optional*):
384
+ How the annotations for the dataset were created.
385
+ Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'no-annotation', 'other'.
386
+ language_creators (`Union[str, List[str]]`, *optional*):
387
+ How the text-based data in the dataset was created.
388
+ Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'other'
389
+ multilinguality (`Union[str, List[str]]`, *optional*):
390
+ Whether the dataset is multilingual.
391
+ Options are: 'monolingual', 'multilingual', 'translation', 'other'.
392
+ size_categories (`Union[str, List[str]]`, *optional*):
393
+ The number of examples in the dataset. Options are: 'n<1K', '1K<n<10K', '10K<n<100K',
394
+ '100K<n<1M', '1M<n<10M', '10M<n<100M', '100M<n<1B', '1B<n<10B', '10B<n<100B', '100B<n<1T', 'n>1T', and 'other'.
395
+ source_datasets (`List[str]]`, *optional*):
396
+ Indicates whether the dataset is an original dataset or extended from another existing dataset.
397
+ Options are: 'original' and 'extended'.
398
+ task_categories (`Union[str, List[str]]`, *optional*):
399
+ What categories of task does the dataset support?
400
+ task_ids (`Union[str, List[str]]`, *optional*):
401
+ What specific tasks does the dataset support?
402
+ paperswithcode_id (`str`, *optional*):
403
+ ID of the dataset on PapersWithCode.
404
+ pretty_name (`str`, *optional*):
405
+ A more human-readable name for the dataset. (ex. "Cats vs. Dogs")
406
+ train_eval_index (`Dict`, *optional*):
407
+ A dictionary that describes the necessary spec for doing evaluation on the Hub.
408
+ If not provided, it will be gathered from the 'train-eval-index' key of the kwargs.
409
+ config_names (`Union[str, List[str]]`, *optional*):
410
+ A list of the available dataset configs for the dataset.
411
+ """
412
+
413
+ def __init__(
414
+ self,
415
+ *,
416
+ language: Optional[Union[str, List[str]]] = None,
417
+ license: Optional[Union[str, List[str]]] = None,
418
+ annotations_creators: Optional[Union[str, List[str]]] = None,
419
+ language_creators: Optional[Union[str, List[str]]] = None,
420
+ multilinguality: Optional[Union[str, List[str]]] = None,
421
+ size_categories: Optional[Union[str, List[str]]] = None,
422
+ source_datasets: Optional[List[str]] = None,
423
+ task_categories: Optional[Union[str, List[str]]] = None,
424
+ task_ids: Optional[Union[str, List[str]]] = None,
425
+ paperswithcode_id: Optional[str] = None,
426
+ pretty_name: Optional[str] = None,
427
+ train_eval_index: Optional[Dict] = None,
428
+ config_names: Optional[Union[str, List[str]]] = None,
429
+ ignore_metadata_errors: bool = False,
430
+ **kwargs,
431
+ ):
432
+ self.annotations_creators = annotations_creators
433
+ self.language_creators = language_creators
434
+ self.language = language
435
+ self.license = license
436
+ self.multilinguality = multilinguality
437
+ self.size_categories = size_categories
438
+ self.source_datasets = source_datasets
439
+ self.task_categories = task_categories
440
+ self.task_ids = task_ids
441
+ self.paperswithcode_id = paperswithcode_id
442
+ self.pretty_name = pretty_name
443
+ self.config_names = config_names
444
+
445
+ # TODO - maybe handle this similarly to EvalResult?
446
+ self.train_eval_index = train_eval_index or kwargs.pop("train-eval-index", None)
447
+ super().__init__(**kwargs)
448
+
449
+ def _to_dict(self, data_dict):
450
+ data_dict["train-eval-index"] = data_dict.pop("train_eval_index")
451
+
452
+
453
+ class SpaceCardData(CardData):
454
+ """Space Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
455
+
456
+ To get an exhaustive reference of Spaces configuration, please visit https://huggingface.co/docs/hub/spaces-config-reference#spaces-configuration-reference.
457
+
458
+ Args:
459
+ title (`str`, *optional*)
460
+ Title of the Space.
461
+ sdk (`str`, *optional*)
462
+ SDK of the Space (one of `gradio`, `streamlit`, `docker`, or `static`).
463
+ sdk_version (`str`, *optional*)
464
+ Version of the used SDK (if Gradio/Streamlit sdk).
465
+ python_version (`str`, *optional*)
466
+ Python version used in the Space (if Gradio/Streamlit sdk).
467
+ app_file (`str`, *optional*)
468
+ Path to your main application file (which contains either gradio or streamlit Python code, or static html code).
469
+ Path is relative to the root of the repository.
470
+ app_port (`str`, *optional*)
471
+ Port on which your application is running. Used only if sdk is `docker`.
472
+ license (`str`, *optional*)
473
+ License of this model. Example: apache-2.0 or any license from
474
+ https://huggingface.co/docs/hub/repositories-licenses.
475
+ duplicated_from (`str`, *optional*)
476
+ ID of the original Space if this is a duplicated Space.
477
+ models (List[`str`], *optional*)
478
+ List of models related to this Space. Should be a dataset ID found on https://hf.co/models.
479
+ datasets (`List[str]`, *optional*)
480
+ List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets.
481
+ tags (`List[str]`, *optional*)
482
+ List of tags to add to your Space that can be used when filtering on the Hub.
483
+ ignore_metadata_errors (`str`):
484
+ If True, errors while parsing the metadata section will be ignored. Some information might be lost during
485
+ the process. Use it at your own risk.
486
+ kwargs (`dict`, *optional*):
487
+ Additional metadata that will be added to the space card.
488
+
489
+ Example:
490
+ ```python
491
+ >>> from huggingface_hub import SpaceCardData
492
+ >>> card_data = SpaceCardData(
493
+ ... title="Dreambooth Training",
494
+ ... license="mit",
495
+ ... sdk="gradio",
496
+ ... duplicated_from="multimodalart/dreambooth-training"
497
+ ... )
498
+ >>> card_data.to_dict()
499
+ {'title': 'Dreambooth Training', 'sdk': 'gradio', 'license': 'mit', 'duplicated_from': 'multimodalart/dreambooth-training'}
500
+ ```
501
+ """
502
+
503
+ def __init__(
504
+ self,
505
+ *,
506
+ title: Optional[str] = None,
507
+ sdk: Optional[str] = None,
508
+ sdk_version: Optional[str] = None,
509
+ python_version: Optional[str] = None,
510
+ app_file: Optional[str] = None,
511
+ app_port: Optional[int] = None,
512
+ license: Optional[str] = None,
513
+ duplicated_from: Optional[str] = None,
514
+ models: Optional[List[str]] = None,
515
+ datasets: Optional[List[str]] = None,
516
+ tags: Optional[List[str]] = None,
517
+ ignore_metadata_errors: bool = False,
518
+ **kwargs,
519
+ ):
520
+ self.title = title
521
+ self.sdk = sdk
522
+ self.sdk_version = sdk_version
523
+ self.python_version = python_version
524
+ self.app_file = app_file
525
+ self.app_port = app_port
526
+ self.license = license
527
+ self.duplicated_from = duplicated_from
528
+ self.models = models
529
+ self.datasets = datasets
530
+ self.tags = _to_unique_list(tags)
531
+ super().__init__(**kwargs)
532
+
533
+
534
+ def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str, List[EvalResult]]:
535
+ """Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects.
536
+
537
+ A detailed spec of the model index can be found here:
538
+ https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
539
+
540
+ Args:
541
+ model_index (`List[Dict[str, Any]]`):
542
+ A model index data structure, likely coming from a README.md file on the
543
+ Hugging Face Hub.
544
+
545
+ Returns:
546
+ model_name (`str`):
547
+ The name of the model as found in the model index. This is used as the
548
+ identifier for the model on leaderboards like PapersWithCode.
549
+ eval_results (`List[EvalResult]`):
550
+ A list of `huggingface_hub.EvalResult` objects containing the metrics
551
+ reported in the provided model_index.
552
+
553
+ Example:
554
+ ```python
555
+ >>> from huggingface_hub.repocard_data import model_index_to_eval_results
556
+ >>> # Define a minimal model index
557
+ >>> model_index = [
558
+ ... {
559
+ ... "name": "my-cool-model",
560
+ ... "results": [
561
+ ... {
562
+ ... "task": {
563
+ ... "type": "image-classification"
564
+ ... },
565
+ ... "dataset": {
566
+ ... "type": "beans",
567
+ ... "name": "Beans"
568
+ ... },
569
+ ... "metrics": [
570
+ ... {
571
+ ... "type": "accuracy",
572
+ ... "value": 0.9
573
+ ... }
574
+ ... ]
575
+ ... }
576
+ ... ]
577
+ ... }
578
+ ... ]
579
+ >>> model_name, eval_results = model_index_to_eval_results(model_index)
580
+ >>> model_name
581
+ 'my-cool-model'
582
+ >>> eval_results[0].task_type
583
+ 'image-classification'
584
+ >>> eval_results[0].metric_type
585
+ 'accuracy'
586
+
587
+ ```
588
+ """
589
+
590
+ eval_results = []
591
+ for elem in model_index:
592
+ name = elem["name"]
593
+ results = elem["results"]
594
+ for result in results:
595
+ task_type = result["task"]["type"]
596
+ task_name = result["task"].get("name")
597
+ dataset_type = result["dataset"]["type"]
598
+ dataset_name = result["dataset"]["name"]
599
+ dataset_config = result["dataset"].get("config")
600
+ dataset_split = result["dataset"].get("split")
601
+ dataset_revision = result["dataset"].get("revision")
602
+ dataset_args = result["dataset"].get("args")
603
+ source_name = result.get("source", {}).get("name")
604
+ source_url = result.get("source", {}).get("url")
605
+
606
+ for metric in result["metrics"]:
607
+ metric_type = metric["type"]
608
+ metric_value = metric["value"]
609
+ metric_name = metric.get("name")
610
+ metric_args = metric.get("args")
611
+ metric_config = metric.get("config")
612
+ verified = metric.get("verified")
613
+ verify_token = metric.get("verifyToken")
614
+
615
+ eval_result = EvalResult(
616
+ task_type=task_type, # Required
617
+ dataset_type=dataset_type, # Required
618
+ dataset_name=dataset_name, # Required
619
+ metric_type=metric_type, # Required
620
+ metric_value=metric_value, # Required
621
+ task_name=task_name,
622
+ dataset_config=dataset_config,
623
+ dataset_split=dataset_split,
624
+ dataset_revision=dataset_revision,
625
+ dataset_args=dataset_args,
626
+ metric_name=metric_name,
627
+ metric_args=metric_args,
628
+ metric_config=metric_config,
629
+ verified=verified,
630
+ verify_token=verify_token,
631
+ source_name=source_name,
632
+ source_url=source_url,
633
+ )
634
+ eval_results.append(eval_result)
635
+ return name, eval_results
636
+
637
+
638
+ def _remove_none(obj):
639
+ """
640
+ Recursively remove `None` values from a dict. Borrowed from: https://stackoverflow.com/a/20558778
641
+ """
642
+ if isinstance(obj, (list, tuple, set)):
643
+ return type(obj)(_remove_none(x) for x in obj if x is not None)
644
+ elif isinstance(obj, dict):
645
+ return type(obj)((_remove_none(k), _remove_none(v)) for k, v in obj.items() if k is not None and v is not None)
646
+ else:
647
+ return obj
648
+
649
+
650
+ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult]) -> List[Dict[str, Any]]:
651
+ """Takes in given model name and list of `huggingface_hub.EvalResult` and returns a
652
+ valid model-index that will be compatible with the format expected by the
653
+ Hugging Face Hub.
654
+
655
+ Args:
656
+ model_name (`str`):
657
+ Name of the model (ex. "my-cool-model"). This is used as the identifier
658
+ for the model on leaderboards like PapersWithCode.
659
+ eval_results (`List[EvalResult]`):
660
+ List of `huggingface_hub.EvalResult` objects containing the metrics to be
661
+ reported in the model-index.
662
+
663
+ Returns:
664
+ model_index (`List[Dict[str, Any]]`): The eval_results converted to a model-index.
665
+
666
+ Example:
667
+ ```python
668
+ >>> from huggingface_hub.repocard_data import eval_results_to_model_index, EvalResult
669
+ >>> # Define minimal eval_results
670
+ >>> eval_results = [
671
+ ... EvalResult(
672
+ ... task_type="image-classification", # Required
673
+ ... dataset_type="beans", # Required
674
+ ... dataset_name="Beans", # Required
675
+ ... metric_type="accuracy", # Required
676
+ ... metric_value=0.9, # Required
677
+ ... )
678
+ ... ]
679
+ >>> eval_results_to_model_index("my-cool-model", eval_results)
680
+ [{'name': 'my-cool-model', 'results': [{'task': {'type': 'image-classification'}, 'dataset': {'name': 'Beans', 'type': 'beans'}, 'metrics': [{'type': 'accuracy', 'value': 0.9}]}]}]
681
+
682
+ ```
683
+ """
684
+
685
+ # Metrics are reported on a unique task-and-dataset basis.
686
+ # Here, we make a map of those pairs and the associated EvalResults.
687
+ task_and_ds_types_map: Dict[Any, List[EvalResult]] = defaultdict(list)
688
+ for eval_result in eval_results:
689
+ task_and_ds_types_map[eval_result.unique_identifier].append(eval_result)
690
+
691
+ # Use the map from above to generate the model index data.
692
+ model_index_data = []
693
+ for results in task_and_ds_types_map.values():
694
+ # All items from `results` share same metadata
695
+ sample_result = results[0]
696
+ data = {
697
+ "task": {
698
+ "type": sample_result.task_type,
699
+ "name": sample_result.task_name,
700
+ },
701
+ "dataset": {
702
+ "name": sample_result.dataset_name,
703
+ "type": sample_result.dataset_type,
704
+ "config": sample_result.dataset_config,
705
+ "split": sample_result.dataset_split,
706
+ "revision": sample_result.dataset_revision,
707
+ "args": sample_result.dataset_args,
708
+ },
709
+ "metrics": [
710
+ {
711
+ "type": result.metric_type,
712
+ "value": result.metric_value,
713
+ "name": result.metric_name,
714
+ "config": result.metric_config,
715
+ "args": result.metric_args,
716
+ "verified": result.verified,
717
+ "verifyToken": result.verify_token,
718
+ }
719
+ for result in results
720
+ ],
721
+ }
722
+ if sample_result.source_url is not None:
723
+ source = {
724
+ "url": sample_result.source_url,
725
+ }
726
+ if sample_result.source_name is not None:
727
+ source["name"] = sample_result.source_name
728
+ data["source"] = source
729
+ model_index_data.append(data)
730
+
731
+ # TODO - Check if there cases where this list is longer than one?
732
+ # Finally, the model index itself is list of dicts.
733
+ model_index = [
734
+ {
735
+ "name": model_name,
736
+ "results": model_index_data,
737
+ }
738
+ ]
739
+ return _remove_none(model_index)
740
+
741
+
742
+ def _to_unique_list(tags: Optional[List[str]]) -> Optional[List[str]]:
743
+ if tags is None:
744
+ return tags
745
+ unique_tags = [] # make tags unique + keep order explicitly
746
+ for tag in tags:
747
+ if tag not in unique_tags:
748
+ unique_tags.append(tag)
749
+ return unique_tags
meow/lib/python3.13/site-packages/huggingface_hub/repository.py ADDED
@@ -0,0 +1,1477 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import atexit
2
+ import os
3
+ import re
4
+ import subprocess
5
+ import threading
6
+ import time
7
+ from contextlib import contextmanager
8
+ from pathlib import Path
9
+ from typing import Callable, Dict, Iterator, List, Optional, Tuple, TypedDict, Union
10
+ from urllib.parse import urlparse
11
+
12
+ from huggingface_hub import constants
13
+ from huggingface_hub.repocard import metadata_load, metadata_save
14
+
15
+ from .hf_api import HfApi, repo_type_and_id_from_hf_id
16
+ from .lfs import LFS_MULTIPART_UPLOAD_COMMAND
17
+ from .utils import (
18
+ SoftTemporaryDirectory,
19
+ get_token,
20
+ logging,
21
+ run_subprocess,
22
+ tqdm,
23
+ validate_hf_hub_args,
24
+ )
25
+ from .utils._deprecation import _deprecate_method
26
+
27
+
28
+ logger = logging.get_logger(__name__)
29
+
30
+
31
+ class CommandInProgress:
32
+ """
33
+ Utility to follow commands launched asynchronously.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ title: str,
39
+ is_done_method: Callable,
40
+ status_method: Callable,
41
+ process: subprocess.Popen,
42
+ post_method: Optional[Callable] = None,
43
+ ):
44
+ self.title = title
45
+ self._is_done = is_done_method
46
+ self._status = status_method
47
+ self._process = process
48
+ self._stderr = ""
49
+ self._stdout = ""
50
+ self._post_method = post_method
51
+
52
+ @property
53
+ def is_done(self) -> bool:
54
+ """
55
+ Whether the process is done.
56
+ """
57
+ result = self._is_done()
58
+
59
+ if result and self._post_method is not None:
60
+ self._post_method()
61
+ self._post_method = None
62
+
63
+ return result
64
+
65
+ @property
66
+ def status(self) -> int:
67
+ """
68
+ The exit code/status of the current action. Will return `0` if the
69
+ command has completed successfully, and a number between 1 and 255 if
70
+ the process errored-out.
71
+
72
+ Will return -1 if the command is still ongoing.
73
+ """
74
+ return self._status()
75
+
76
+ @property
77
+ def failed(self) -> bool:
78
+ """
79
+ Whether the process errored-out.
80
+ """
81
+ return self.status > 0
82
+
83
+ @property
84
+ def stderr(self) -> str:
85
+ """
86
+ The current output message on the standard error.
87
+ """
88
+ if self._process.stderr is not None:
89
+ self._stderr += self._process.stderr.read()
90
+ return self._stderr
91
+
92
+ @property
93
+ def stdout(self) -> str:
94
+ """
95
+ The current output message on the standard output.
96
+ """
97
+ if self._process.stdout is not None:
98
+ self._stdout += self._process.stdout.read()
99
+ return self._stdout
100
+
101
+ def __repr__(self):
102
+ status = self.status
103
+
104
+ if status == -1:
105
+ status = "running"
106
+
107
+ return (
108
+ f"[{self.title} command, status code: {status},"
109
+ f" {'in progress.' if not self.is_done else 'finished.'} PID:"
110
+ f" {self._process.pid}]"
111
+ )
112
+
113
+
114
+ def is_git_repo(folder: Union[str, Path]) -> bool:
115
+ """
116
+ Check if the folder is the root or part of a git repository
117
+
118
+ Args:
119
+ folder (`str`):
120
+ The folder in which to run the command.
121
+
122
+ Returns:
123
+ `bool`: `True` if the repository is part of a repository, `False`
124
+ otherwise.
125
+ """
126
+ folder_exists = os.path.exists(os.path.join(folder, ".git"))
127
+ git_branch = subprocess.run("git branch".split(), cwd=folder, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
128
+ return folder_exists and git_branch.returncode == 0
129
+
130
+
131
+ def is_local_clone(folder: Union[str, Path], remote_url: str) -> bool:
132
+ """
133
+ Check if the folder is a local clone of the remote_url
134
+
135
+ Args:
136
+ folder (`str` or `Path`):
137
+ The folder in which to run the command.
138
+ remote_url (`str`):
139
+ The url of a git repository.
140
+
141
+ Returns:
142
+ `bool`: `True` if the repository is a local clone of the remote
143
+ repository specified, `False` otherwise.
144
+ """
145
+ if not is_git_repo(folder):
146
+ return False
147
+
148
+ remotes = run_subprocess("git remote -v", folder).stdout
149
+
150
+ # Remove token for the test with remotes.
151
+ remote_url = re.sub(r"https://.*@", "https://", remote_url)
152
+ remotes = [re.sub(r"https://.*@", "https://", remote) for remote in remotes.split()]
153
+ return remote_url in remotes
154
+
155
+
156
+ def is_tracked_with_lfs(filename: Union[str, Path]) -> bool:
157
+ """
158
+ Check if the file passed is tracked with git-lfs.
159
+
160
+ Args:
161
+ filename (`str` or `Path`):
162
+ The filename to check.
163
+
164
+ Returns:
165
+ `bool`: `True` if the file passed is tracked with git-lfs, `False`
166
+ otherwise.
167
+ """
168
+ folder = Path(filename).parent
169
+ filename = Path(filename).name
170
+
171
+ try:
172
+ p = run_subprocess("git check-attr -a".split() + [filename], folder)
173
+ attributes = p.stdout.strip()
174
+ except subprocess.CalledProcessError as exc:
175
+ if not is_git_repo(folder):
176
+ return False
177
+ else:
178
+ raise OSError(exc.stderr)
179
+
180
+ if len(attributes) == 0:
181
+ return False
182
+
183
+ found_lfs_tag = {"diff": False, "merge": False, "filter": False}
184
+
185
+ for attribute in attributes.split("\n"):
186
+ for tag in found_lfs_tag.keys():
187
+ if tag in attribute and "lfs" in attribute:
188
+ found_lfs_tag[tag] = True
189
+
190
+ return all(found_lfs_tag.values())
191
+
192
+
193
+ def is_git_ignored(filename: Union[str, Path]) -> bool:
194
+ """
195
+ Check if file is git-ignored. Supports nested .gitignore files.
196
+
197
+ Args:
198
+ filename (`str` or `Path`):
199
+ The filename to check.
200
+
201
+ Returns:
202
+ `bool`: `True` if the file passed is ignored by `git`, `False`
203
+ otherwise.
204
+ """
205
+ folder = Path(filename).parent
206
+ filename = Path(filename).name
207
+
208
+ try:
209
+ p = run_subprocess("git check-ignore".split() + [filename], folder, check=False)
210
+ # Will return exit code 1 if not gitignored
211
+ is_ignored = not bool(p.returncode)
212
+ except subprocess.CalledProcessError as exc:
213
+ raise OSError(exc.stderr)
214
+
215
+ return is_ignored
216
+
217
+
218
+ def is_binary_file(filename: Union[str, Path]) -> bool:
219
+ """
220
+ Check if file is a binary file.
221
+
222
+ Args:
223
+ filename (`str` or `Path`):
224
+ The filename to check.
225
+
226
+ Returns:
227
+ `bool`: `True` if the file passed is a binary file, `False` otherwise.
228
+ """
229
+ try:
230
+ with open(filename, "rb") as f:
231
+ content = f.read(10 * (1024**2)) # Read a maximum of 10MB
232
+
233
+ # Code sample taken from the following stack overflow thread
234
+ # https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python/7392391#7392391
235
+ text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
236
+ return bool(content.translate(None, text_chars))
237
+ except UnicodeDecodeError:
238
+ return True
239
+
240
+
241
+ def files_to_be_staged(pattern: str = ".", folder: Union[str, Path, None] = None) -> List[str]:
242
+ """
243
+ Returns a list of filenames that are to be staged.
244
+
245
+ Args:
246
+ pattern (`str` or `Path`):
247
+ The pattern of filenames to check. Put `.` to get all files.
248
+ folder (`str` or `Path`):
249
+ The folder in which to run the command.
250
+
251
+ Returns:
252
+ `List[str]`: List of files that are to be staged.
253
+ """
254
+ try:
255
+ p = run_subprocess("git ls-files --exclude-standard -mo".split() + [pattern], folder)
256
+ if len(p.stdout.strip()):
257
+ files = p.stdout.strip().split("\n")
258
+ else:
259
+ files = []
260
+ except subprocess.CalledProcessError as exc:
261
+ raise EnvironmentError(exc.stderr)
262
+
263
+ return files
264
+
265
+
266
+ def is_tracked_upstream(folder: Union[str, Path]) -> bool:
267
+ """
268
+ Check if the current checked-out branch is tracked upstream.
269
+
270
+ Args:
271
+ folder (`str` or `Path`):
272
+ The folder in which to run the command.
273
+
274
+ Returns:
275
+ `bool`: `True` if the current checked-out branch is tracked upstream,
276
+ `False` otherwise.
277
+ """
278
+ try:
279
+ run_subprocess("git rev-parse --symbolic-full-name --abbrev-ref @{u}", folder)
280
+ return True
281
+ except subprocess.CalledProcessError as exc:
282
+ if "HEAD" in exc.stderr:
283
+ raise OSError("No branch checked out")
284
+
285
+ return False
286
+
287
+
288
+ def commits_to_push(folder: Union[str, Path], upstream: Optional[str] = None) -> int:
289
+ """
290
+ Check the number of commits that would be pushed upstream
291
+
292
+ Args:
293
+ folder (`str` or `Path`):
294
+ The folder in which to run the command.
295
+ upstream (`str`, *optional*):
296
+ The name of the upstream repository with which the comparison should be
297
+ made.
298
+
299
+ Returns:
300
+ `int`: Number of commits that would be pushed upstream were a `git
301
+ push` to proceed.
302
+ """
303
+ try:
304
+ result = run_subprocess(f"git cherry -v {upstream or ''}", folder)
305
+ return len(result.stdout.split("\n")) - 1
306
+ except subprocess.CalledProcessError as exc:
307
+ raise EnvironmentError(exc.stderr)
308
+
309
+
310
+ class PbarT(TypedDict):
311
+ # Used to store an opened progress bar in `_lfs_log_progress`
312
+ bar: tqdm
313
+ past_bytes: int
314
+
315
+
316
+ @contextmanager
317
+ def _lfs_log_progress():
318
+ """
319
+ This is a context manager that will log the Git LFS progress of cleaning,
320
+ smudging, pulling and pushing.
321
+ """
322
+
323
+ if logger.getEffectiveLevel() >= logging.ERROR:
324
+ try:
325
+ yield
326
+ except Exception:
327
+ pass
328
+ return
329
+
330
+ def output_progress(stopping_event: threading.Event):
331
+ """
332
+ To be launched as a separate thread with an event meaning it should stop
333
+ the tail.
334
+ """
335
+ # Key is tuple(state, filename), value is a dict(tqdm bar and a previous value)
336
+ pbars: Dict[Tuple[str, str], PbarT] = {}
337
+
338
+ def close_pbars():
339
+ for pbar in pbars.values():
340
+ pbar["bar"].update(pbar["bar"].total - pbar["past_bytes"])
341
+ pbar["bar"].refresh()
342
+ pbar["bar"].close()
343
+
344
+ def tail_file(filename) -> Iterator[str]:
345
+ """
346
+ Creates a generator to be iterated through, which will return each
347
+ line one by one. Will stop tailing the file if the stopping_event is
348
+ set.
349
+ """
350
+ with open(filename, "r") as file:
351
+ current_line = ""
352
+ while True:
353
+ if stopping_event.is_set():
354
+ close_pbars()
355
+ break
356
+
357
+ line_bit = file.readline()
358
+ if line_bit is not None and not len(line_bit.strip()) == 0:
359
+ current_line += line_bit
360
+ if current_line.endswith("\n"):
361
+ yield current_line
362
+ current_line = ""
363
+ else:
364
+ time.sleep(1)
365
+
366
+ # If the file isn't created yet, wait for a few seconds before trying again.
367
+ # Can be interrupted with the stopping_event.
368
+ while not os.path.exists(os.environ["GIT_LFS_PROGRESS"]):
369
+ if stopping_event.is_set():
370
+ close_pbars()
371
+ return
372
+
373
+ time.sleep(2)
374
+
375
+ for line in tail_file(os.environ["GIT_LFS_PROGRESS"]):
376
+ try:
377
+ state, file_progress, byte_progress, filename = line.split()
378
+ except ValueError as error:
379
+ # Try/except to ease debugging. See https://github.com/huggingface/huggingface_hub/issues/1373.
380
+ raise ValueError(f"Cannot unpack LFS progress line:\n{line}") from error
381
+ description = f"{state.capitalize()} file {filename}"
382
+
383
+ current_bytes, total_bytes = byte_progress.split("/")
384
+ current_bytes_int = int(current_bytes)
385
+ total_bytes_int = int(total_bytes)
386
+
387
+ pbar = pbars.get((state, filename))
388
+ if pbar is None:
389
+ # Initialize progress bar
390
+ pbars[(state, filename)] = {
391
+ "bar": tqdm(
392
+ desc=description,
393
+ initial=current_bytes_int,
394
+ total=total_bytes_int,
395
+ unit="B",
396
+ unit_scale=True,
397
+ unit_divisor=1024,
398
+ name="huggingface_hub.lfs_upload",
399
+ ),
400
+ "past_bytes": int(current_bytes),
401
+ }
402
+ else:
403
+ # Update progress bar
404
+ pbar["bar"].update(current_bytes_int - pbar["past_bytes"])
405
+ pbar["past_bytes"] = current_bytes_int
406
+
407
+ current_lfs_progress_value = os.environ.get("GIT_LFS_PROGRESS", "")
408
+
409
+ with SoftTemporaryDirectory() as tmpdir:
410
+ os.environ["GIT_LFS_PROGRESS"] = os.path.join(tmpdir, "lfs_progress")
411
+ logger.debug(f"Following progress in {os.environ['GIT_LFS_PROGRESS']}")
412
+
413
+ exit_event = threading.Event()
414
+ x = threading.Thread(target=output_progress, args=(exit_event,), daemon=True)
415
+ x.start()
416
+
417
+ try:
418
+ yield
419
+ finally:
420
+ exit_event.set()
421
+ x.join()
422
+
423
+ os.environ["GIT_LFS_PROGRESS"] = current_lfs_progress_value
424
+
425
+
426
+ class Repository:
427
+ """
428
+ Helper class to wrap the git and git-lfs commands.
429
+
430
+ The aim is to facilitate interacting with huggingface.co hosted model or
431
+ dataset repos, though not a lot here (if any) is actually specific to
432
+ huggingface.co.
433
+
434
+ <Tip warning={true}>
435
+
436
+ [`Repository`] is deprecated in favor of the http-based alternatives implemented in
437
+ [`HfApi`]. Given its large adoption in legacy code, the complete removal of
438
+ [`Repository`] will only happen in release `v1.0`. For more details, please read
439
+ https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
440
+
441
+ </Tip>
442
+ """
443
+
444
+ command_queue: List[CommandInProgress]
445
+
446
+ @validate_hf_hub_args
447
+ @_deprecate_method(
448
+ version="1.0",
449
+ message=(
450
+ "Please prefer the http-based alternatives instead. Given its large adoption in legacy code, the complete"
451
+ " removal is only planned on next major release.\nFor more details, please read"
452
+ " https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http."
453
+ ),
454
+ )
455
+ def __init__(
456
+ self,
457
+ local_dir: Union[str, Path],
458
+ clone_from: Optional[str] = None,
459
+ repo_type: Optional[str] = None,
460
+ token: Union[bool, str] = True,
461
+ git_user: Optional[str] = None,
462
+ git_email: Optional[str] = None,
463
+ revision: Optional[str] = None,
464
+ skip_lfs_files: bool = False,
465
+ client: Optional[HfApi] = None,
466
+ ):
467
+ """
468
+ Instantiate a local clone of a git repo.
469
+
470
+ If `clone_from` is set, the repo will be cloned from an existing remote repository.
471
+ If the remote repo does not exist, a `EnvironmentError` exception will be thrown.
472
+ Please create the remote repo first using [`create_repo`].
473
+
474
+ `Repository` uses the local git credentials by default. If explicitly set, the `token`
475
+ or the `git_user`/`git_email` pair will be used instead.
476
+
477
+ Args:
478
+ local_dir (`str` or `Path`):
479
+ path (e.g. `'my_trained_model/'`) to the local directory, where
480
+ the `Repository` will be initialized.
481
+ clone_from (`str`, *optional*):
482
+ Either a repository url or `repo_id`.
483
+ Example:
484
+ - `"https://huggingface.co/philschmid/playground-tests"`
485
+ - `"philschmid/playground-tests"`
486
+ repo_type (`str`, *optional*):
487
+ To set when cloning a repo from a repo_id. Default is model.
488
+ token (`bool` or `str`, *optional*):
489
+ A valid authentication token (see https://huggingface.co/settings/token).
490
+ If `None` or `True` and machine is logged in (through `huggingface-cli login`
491
+ or [`~huggingface_hub.login`]), token will be retrieved from the cache.
492
+ If `False`, token is not sent in the request header.
493
+ git_user (`str`, *optional*):
494
+ will override the `git config user.name` for committing and
495
+ pushing files to the hub.
496
+ git_email (`str`, *optional*):
497
+ will override the `git config user.email` for committing and
498
+ pushing files to the hub.
499
+ revision (`str`, *optional*):
500
+ Revision to checkout after initializing the repository. If the
501
+ revision doesn't exist, a branch will be created with that
502
+ revision name from the default branch's current HEAD.
503
+ skip_lfs_files (`bool`, *optional*, defaults to `False`):
504
+ whether to skip git-LFS files or not.
505
+ client (`HfApi`, *optional*):
506
+ Instance of [`HfApi`] to use when calling the HF Hub API. A new
507
+ instance will be created if this is left to `None`.
508
+
509
+ Raises:
510
+ [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
511
+ If the remote repository set in `clone_from` does not exist.
512
+ """
513
+ if isinstance(local_dir, Path):
514
+ local_dir = str(local_dir)
515
+ os.makedirs(local_dir, exist_ok=True)
516
+ self.local_dir = os.path.join(os.getcwd(), local_dir)
517
+ self._repo_type = repo_type
518
+ self.command_queue = []
519
+ self.skip_lfs_files = skip_lfs_files
520
+ self.client = client if client is not None else HfApi()
521
+
522
+ self.check_git_versions()
523
+
524
+ if isinstance(token, str):
525
+ self.huggingface_token: Optional[str] = token
526
+ elif token is False:
527
+ self.huggingface_token = None
528
+ else:
529
+ # if `True` -> explicit use of the cached token
530
+ # if `None` -> implicit use of the cached token
531
+ self.huggingface_token = get_token()
532
+
533
+ if clone_from is not None:
534
+ self.clone_from(repo_url=clone_from)
535
+ else:
536
+ if is_git_repo(self.local_dir):
537
+ logger.debug("[Repository] is a valid git repo")
538
+ else:
539
+ raise ValueError("If not specifying `clone_from`, you need to pass Repository a valid git clone.")
540
+
541
+ if self.huggingface_token is not None and (git_email is None or git_user is None):
542
+ user = self.client.whoami(self.huggingface_token)
543
+
544
+ if git_email is None:
545
+ git_email = user.get("email")
546
+
547
+ if git_user is None:
548
+ git_user = user.get("fullname")
549
+
550
+ if git_user is not None or git_email is not None:
551
+ self.git_config_username_and_email(git_user, git_email)
552
+
553
+ self.lfs_enable_largefiles()
554
+ self.git_credential_helper_store()
555
+
556
+ if revision is not None:
557
+ self.git_checkout(revision, create_branch_ok=True)
558
+
559
+ # This ensures that all commands exit before exiting the Python runtime.
560
+ # This will ensure all pushes register on the hub, even if other errors happen in subsequent operations.
561
+ atexit.register(self.wait_for_commands)
562
+
563
+ @property
564
+ def current_branch(self) -> str:
565
+ """
566
+ Returns the current checked out branch.
567
+
568
+ Returns:
569
+ `str`: Current checked out branch.
570
+ """
571
+ try:
572
+ result = run_subprocess("git rev-parse --abbrev-ref HEAD", self.local_dir).stdout.strip()
573
+ except subprocess.CalledProcessError as exc:
574
+ raise EnvironmentError(exc.stderr)
575
+
576
+ return result
577
+
578
+ def check_git_versions(self):
579
+ """
580
+ Checks that `git` and `git-lfs` can be run.
581
+
582
+ Raises:
583
+ [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
584
+ If `git` or `git-lfs` are not installed.
585
+ """
586
+ try:
587
+ git_version = run_subprocess("git --version", self.local_dir).stdout.strip()
588
+ except FileNotFoundError:
589
+ raise EnvironmentError("Looks like you do not have git installed, please install.")
590
+
591
+ try:
592
+ lfs_version = run_subprocess("git-lfs --version", self.local_dir).stdout.strip()
593
+ except FileNotFoundError:
594
+ raise EnvironmentError(
595
+ "Looks like you do not have git-lfs installed, please install."
596
+ " You can install from https://git-lfs.github.com/."
597
+ " Then run `git lfs install` (you only have to do this once)."
598
+ )
599
+ logger.info(git_version + "\n" + lfs_version)
600
+
601
+ @validate_hf_hub_args
602
+ def clone_from(self, repo_url: str, token: Union[bool, str, None] = None):
603
+ """
604
+ Clone from a remote. If the folder already exists, will try to clone the
605
+ repository within it.
606
+
607
+ If this folder is a git repository with linked history, will try to
608
+ update the repository.
609
+
610
+ Args:
611
+ repo_url (`str`):
612
+ The URL from which to clone the repository
613
+ token (`Union[str, bool]`, *optional*):
614
+ Whether to use the authentication token. It can be:
615
+ - a string which is the token itself
616
+ - `False`, which would not use the authentication token
617
+ - `True`, which would fetch the authentication token from the
618
+ local folder and use it (you should be logged in for this to
619
+ work).
620
+ - `None`, which would retrieve the value of
621
+ `self.huggingface_token`.
622
+
623
+ <Tip>
624
+
625
+ Raises the following error:
626
+
627
+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
628
+ if an organization token (starts with "api_org") is passed. Use must use
629
+ your own personal access token (see https://hf.co/settings/tokens).
630
+
631
+ - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
632
+ if you are trying to clone the repository in a non-empty folder, or if the
633
+ `git` operations raise errors.
634
+
635
+ </Tip>
636
+ """
637
+ token = (
638
+ token # str -> use it
639
+ if isinstance(token, str)
640
+ else (
641
+ None # `False` -> explicit no token
642
+ if token is False
643
+ else self.huggingface_token # `None` or `True` -> use default
644
+ )
645
+ )
646
+ if token is not None and token.startswith("api_org"):
647
+ raise ValueError(
648
+ "You must use your personal access token, not an Organization token"
649
+ " (see https://hf.co/settings/tokens)."
650
+ )
651
+
652
+ hub_url = self.client.endpoint
653
+ if hub_url in repo_url or ("http" not in repo_url and len(repo_url.split("/")) <= 2):
654
+ repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(repo_url, hub_url=hub_url)
655
+ repo_id = f"{namespace}/{repo_name}" if namespace is not None else repo_name
656
+
657
+ if repo_type is not None:
658
+ self._repo_type = repo_type
659
+
660
+ repo_url = hub_url + "/"
661
+
662
+ if self._repo_type in constants.REPO_TYPES_URL_PREFIXES:
663
+ repo_url += constants.REPO_TYPES_URL_PREFIXES[self._repo_type]
664
+
665
+ if token is not None:
666
+ # Add token in git url when provided
667
+ scheme = urlparse(repo_url).scheme
668
+ repo_url = repo_url.replace(f"{scheme}://", f"{scheme}://user:{token}@")
669
+
670
+ repo_url += repo_id
671
+
672
+ # For error messages, it's cleaner to show the repo url without the token.
673
+ clean_repo_url = re.sub(r"(https?)://.*@", r"\1://", repo_url)
674
+ try:
675
+ run_subprocess("git lfs install", self.local_dir)
676
+
677
+ # checks if repository is initialized in a empty repository or in one with files
678
+ if len(os.listdir(self.local_dir)) == 0:
679
+ logger.warning(f"Cloning {clean_repo_url} into local empty directory.")
680
+
681
+ with _lfs_log_progress():
682
+ env = os.environ.copy()
683
+
684
+ if self.skip_lfs_files:
685
+ env.update({"GIT_LFS_SKIP_SMUDGE": "1"})
686
+
687
+ run_subprocess(
688
+ # 'git lfs clone' is deprecated (will display a warning in the terminal)
689
+ # but we still use it as it provides a nicer UX when downloading large
690
+ # files (shows progress).
691
+ f"{'git clone' if self.skip_lfs_files else 'git lfs clone'} {repo_url} .",
692
+ self.local_dir,
693
+ env=env,
694
+ )
695
+ else:
696
+ # Check if the folder is the root of a git repository
697
+ if not is_git_repo(self.local_dir):
698
+ raise EnvironmentError(
699
+ "Tried to clone a repository in a non-empty folder that isn't"
700
+ f" a git repository ('{self.local_dir}'). If you really want to"
701
+ f" do this, do it manually:\n cd {self.local_dir} && git init"
702
+ " && git remote add origin && git pull origin main\n or clone"
703
+ " repo to a new folder and move your existing files there"
704
+ " afterwards."
705
+ )
706
+
707
+ if is_local_clone(self.local_dir, repo_url):
708
+ logger.warning(
709
+ f"{self.local_dir} is already a clone of {clean_repo_url}."
710
+ " Make sure you pull the latest changes with"
711
+ " `repo.git_pull()`."
712
+ )
713
+ else:
714
+ output = run_subprocess("git remote get-url origin", self.local_dir, check=False)
715
+
716
+ error_msg = (
717
+ f"Tried to clone {clean_repo_url} in an unrelated git"
718
+ " repository.\nIf you believe this is an error, please add"
719
+ f" a remote with the following URL: {clean_repo_url}."
720
+ )
721
+ if output.returncode == 0:
722
+ clean_local_remote_url = re.sub(r"https://.*@", "https://", output.stdout)
723
+ error_msg += f"\nLocal path has its origin defined as: {clean_local_remote_url}"
724
+ raise EnvironmentError(error_msg)
725
+
726
+ except subprocess.CalledProcessError as exc:
727
+ raise EnvironmentError(exc.stderr)
728
+
729
+ def git_config_username_and_email(self, git_user: Optional[str] = None, git_email: Optional[str] = None):
730
+ """
731
+ Sets git username and email (only in the current repo).
732
+
733
+ Args:
734
+ git_user (`str`, *optional*):
735
+ The username to register through `git`.
736
+ git_email (`str`, *optional*):
737
+ The email to register through `git`.
738
+ """
739
+ try:
740
+ if git_user is not None:
741
+ run_subprocess("git config user.name".split() + [git_user], self.local_dir)
742
+
743
+ if git_email is not None:
744
+ run_subprocess(f"git config user.email {git_email}".split(), self.local_dir)
745
+ except subprocess.CalledProcessError as exc:
746
+ raise EnvironmentError(exc.stderr)
747
+
748
+ def git_credential_helper_store(self):
749
+ """
750
+ Sets the git credential helper to `store`
751
+ """
752
+ try:
753
+ run_subprocess("git config credential.helper store", self.local_dir)
754
+ except subprocess.CalledProcessError as exc:
755
+ raise EnvironmentError(exc.stderr)
756
+
757
+ def git_head_hash(self) -> str:
758
+ """
759
+ Get commit sha on top of HEAD.
760
+
761
+ Returns:
762
+ `str`: The current checked out commit SHA.
763
+ """
764
+ try:
765
+ p = run_subprocess("git rev-parse HEAD", self.local_dir)
766
+ return p.stdout.strip()
767
+ except subprocess.CalledProcessError as exc:
768
+ raise EnvironmentError(exc.stderr)
769
+
770
+ def git_remote_url(self) -> str:
771
+ """
772
+ Get URL to origin remote.
773
+
774
+ Returns:
775
+ `str`: The URL of the `origin` remote.
776
+ """
777
+ try:
778
+ p = run_subprocess("git config --get remote.origin.url", self.local_dir)
779
+ url = p.stdout.strip()
780
+ # Strip basic auth info.
781
+ return re.sub(r"https://.*@", "https://", url)
782
+ except subprocess.CalledProcessError as exc:
783
+ raise EnvironmentError(exc.stderr)
784
+
785
+ def git_head_commit_url(self) -> str:
786
+ """
787
+ Get URL to last commit on HEAD. We assume it's been pushed, and the url
788
+ scheme is the same one as for GitHub or HuggingFace.
789
+
790
+ Returns:
791
+ `str`: The URL to the current checked-out commit.
792
+ """
793
+ sha = self.git_head_hash()
794
+ url = self.git_remote_url()
795
+ if url.endswith("/"):
796
+ url = url[:-1]
797
+ return f"{url}/commit/{sha}"
798
+
799
+ def list_deleted_files(self) -> List[str]:
800
+ """
801
+ Returns a list of the files that are deleted in the working directory or
802
+ index.
803
+
804
+ Returns:
805
+ `List[str]`: A list of files that have been deleted in the working
806
+ directory or index.
807
+ """
808
+ try:
809
+ git_status = run_subprocess("git status -s", self.local_dir).stdout.strip()
810
+ except subprocess.CalledProcessError as exc:
811
+ raise EnvironmentError(exc.stderr)
812
+
813
+ if len(git_status) == 0:
814
+ return []
815
+
816
+ # Receives a status like the following
817
+ # D .gitignore
818
+ # D new_file.json
819
+ # AD new_file1.json
820
+ # ?? new_file2.json
821
+ # ?? new_file4.json
822
+
823
+ # Strip each line of whitespaces
824
+ modified_files_statuses = [status.strip() for status in git_status.split("\n")]
825
+
826
+ # Only keep files that are deleted using the D prefix
827
+ deleted_files_statuses = [status for status in modified_files_statuses if "D" in status.split()[0]]
828
+
829
+ # Remove the D prefix and strip to keep only the relevant filename
830
+ deleted_files = [status.split()[-1].strip() for status in deleted_files_statuses]
831
+
832
+ return deleted_files
833
+
834
+ def lfs_track(self, patterns: Union[str, List[str]], filename: bool = False):
835
+ """
836
+ Tell git-lfs to track files according to a pattern.
837
+
838
+ Setting the `filename` argument to `True` will treat the arguments as
839
+ literal filenames, not as patterns. Any special glob characters in the
840
+ filename will be escaped when writing to the `.gitattributes` file.
841
+
842
+ Args:
843
+ patterns (`Union[str, List[str]]`):
844
+ The pattern, or list of patterns, to track with git-lfs.
845
+ filename (`bool`, *optional*, defaults to `False`):
846
+ Whether to use the patterns as literal filenames.
847
+ """
848
+ if isinstance(patterns, str):
849
+ patterns = [patterns]
850
+ try:
851
+ for pattern in patterns:
852
+ run_subprocess(
853
+ f"git lfs track {'--filename' if filename else ''} {pattern}",
854
+ self.local_dir,
855
+ )
856
+ except subprocess.CalledProcessError as exc:
857
+ raise EnvironmentError(exc.stderr)
858
+
859
+ def lfs_untrack(self, patterns: Union[str, List[str]]):
860
+ """
861
+ Tell git-lfs to untrack those files.
862
+
863
+ Args:
864
+ patterns (`Union[str, List[str]]`):
865
+ The pattern, or list of patterns, to untrack with git-lfs.
866
+ """
867
+ if isinstance(patterns, str):
868
+ patterns = [patterns]
869
+ try:
870
+ for pattern in patterns:
871
+ run_subprocess("git lfs untrack".split() + [pattern], self.local_dir)
872
+ except subprocess.CalledProcessError as exc:
873
+ raise EnvironmentError(exc.stderr)
874
+
875
+ def lfs_enable_largefiles(self):
876
+ """
877
+ HF-specific. This enables upload support of files >5GB.
878
+ """
879
+ try:
880
+ lfs_config = "git config lfs.customtransfer.multipart"
881
+ run_subprocess(f"{lfs_config}.path huggingface-cli", self.local_dir)
882
+ run_subprocess(
883
+ f"{lfs_config}.args {LFS_MULTIPART_UPLOAD_COMMAND}",
884
+ self.local_dir,
885
+ )
886
+ except subprocess.CalledProcessError as exc:
887
+ raise EnvironmentError(exc.stderr)
888
+
889
+ def auto_track_binary_files(self, pattern: str = ".") -> List[str]:
890
+ """
891
+ Automatically track binary files with git-lfs.
892
+
893
+ Args:
894
+ pattern (`str`, *optional*, defaults to "."):
895
+ The pattern with which to track files that are binary.
896
+
897
+ Returns:
898
+ `List[str]`: List of filenames that are now tracked due to being
899
+ binary files
900
+ """
901
+ files_to_be_tracked_with_lfs = []
902
+
903
+ deleted_files = self.list_deleted_files()
904
+
905
+ for filename in files_to_be_staged(pattern, folder=self.local_dir):
906
+ if filename in deleted_files:
907
+ continue
908
+
909
+ path_to_file = os.path.join(os.getcwd(), self.local_dir, filename)
910
+
911
+ if not (is_tracked_with_lfs(path_to_file) or is_git_ignored(path_to_file)):
912
+ size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024)
913
+
914
+ if size_in_mb >= 10:
915
+ logger.warning(
916
+ "Parsing a large file to check if binary or not. Tracking large"
917
+ " files using `repository.auto_track_large_files` is"
918
+ " recommended so as to not load the full file in memory."
919
+ )
920
+
921
+ is_binary = is_binary_file(path_to_file)
922
+
923
+ if is_binary:
924
+ self.lfs_track(filename)
925
+ files_to_be_tracked_with_lfs.append(filename)
926
+
927
+ # Cleanup the .gitattributes if files were deleted
928
+ self.lfs_untrack(deleted_files)
929
+
930
+ return files_to_be_tracked_with_lfs
931
+
932
+ def auto_track_large_files(self, pattern: str = ".") -> List[str]:
933
+ """
934
+ Automatically track large files (files that weigh more than 10MBs) with
935
+ git-lfs.
936
+
937
+ Args:
938
+ pattern (`str`, *optional*, defaults to "."):
939
+ The pattern with which to track files that are above 10MBs.
940
+
941
+ Returns:
942
+ `List[str]`: List of filenames that are now tracked due to their
943
+ size.
944
+ """
945
+ files_to_be_tracked_with_lfs = []
946
+
947
+ deleted_files = self.list_deleted_files()
948
+
949
+ for filename in files_to_be_staged(pattern, folder=self.local_dir):
950
+ if filename in deleted_files:
951
+ continue
952
+
953
+ path_to_file = os.path.join(os.getcwd(), self.local_dir, filename)
954
+ size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024)
955
+
956
+ if size_in_mb >= 10 and not is_tracked_with_lfs(path_to_file) and not is_git_ignored(path_to_file):
957
+ self.lfs_track(filename)
958
+ files_to_be_tracked_with_lfs.append(filename)
959
+
960
+ # Cleanup the .gitattributes if files were deleted
961
+ self.lfs_untrack(deleted_files)
962
+
963
+ return files_to_be_tracked_with_lfs
964
+
965
+ def lfs_prune(self, recent=False):
966
+ """
967
+ git lfs prune
968
+
969
+ Args:
970
+ recent (`bool`, *optional*, defaults to `False`):
971
+ Whether to prune files even if they were referenced by recent
972
+ commits. See the following
973
+ [link](https://github.com/git-lfs/git-lfs/blob/f3d43f0428a84fc4f1e5405b76b5a73ec2437e65/docs/man/git-lfs-prune.1.ronn#recent-files)
974
+ for more information.
975
+ """
976
+ try:
977
+ with _lfs_log_progress():
978
+ result = run_subprocess(f"git lfs prune {'--recent' if recent else ''}", self.local_dir)
979
+ logger.info(result.stdout)
980
+ except subprocess.CalledProcessError as exc:
981
+ raise EnvironmentError(exc.stderr)
982
+
983
+ def git_pull(self, rebase: bool = False, lfs: bool = False):
984
+ """
985
+ git pull
986
+
987
+ Args:
988
+ rebase (`bool`, *optional*, defaults to `False`):
989
+ Whether to rebase the current branch on top of the upstream
990
+ branch after fetching.
991
+ lfs (`bool`, *optional*, defaults to `False`):
992
+ Whether to fetch the LFS files too. This option only changes the
993
+ behavior when a repository was cloned without fetching the LFS
994
+ files; calling `repo.git_pull(lfs=True)` will then fetch the LFS
995
+ file from the remote repository.
996
+ """
997
+ command = "git pull" if not lfs else "git lfs pull"
998
+ if rebase:
999
+ command += " --rebase"
1000
+ try:
1001
+ with _lfs_log_progress():
1002
+ result = run_subprocess(command, self.local_dir)
1003
+ logger.info(result.stdout)
1004
+ except subprocess.CalledProcessError as exc:
1005
+ raise EnvironmentError(exc.stderr)
1006
+
1007
+ def git_add(self, pattern: str = ".", auto_lfs_track: bool = False):
1008
+ """
1009
+ git add
1010
+
1011
+ Setting the `auto_lfs_track` parameter to `True` will automatically
1012
+ track files that are larger than 10MB with `git-lfs`.
1013
+
1014
+ Args:
1015
+ pattern (`str`, *optional*, defaults to "."):
1016
+ The pattern with which to add files to staging.
1017
+ auto_lfs_track (`bool`, *optional*, defaults to `False`):
1018
+ Whether to automatically track large and binary files with
1019
+ git-lfs. Any file over 10MB in size, or in binary format, will
1020
+ be automatically tracked.
1021
+ """
1022
+ if auto_lfs_track:
1023
+ # Track files according to their size (>=10MB)
1024
+ tracked_files = self.auto_track_large_files(pattern)
1025
+
1026
+ # Read the remaining files and track them if they're binary
1027
+ tracked_files.extend(self.auto_track_binary_files(pattern))
1028
+
1029
+ if tracked_files:
1030
+ logger.warning(
1031
+ f"Adding files tracked by Git LFS: {tracked_files}. This may take a"
1032
+ " bit of time if the files are large."
1033
+ )
1034
+
1035
+ try:
1036
+ result = run_subprocess("git add -v".split() + [pattern], self.local_dir)
1037
+ logger.info(f"Adding to index:\n{result.stdout}\n")
1038
+ except subprocess.CalledProcessError as exc:
1039
+ raise EnvironmentError(exc.stderr)
1040
+
1041
+ def git_commit(self, commit_message: str = "commit files to HF hub"):
1042
+ """
1043
+ git commit
1044
+
1045
+ Args:
1046
+ commit_message (`str`, *optional*, defaults to "commit files to HF hub"):
1047
+ The message attributed to the commit.
1048
+ """
1049
+ try:
1050
+ result = run_subprocess("git commit -v -m".split() + [commit_message], self.local_dir)
1051
+ logger.info(f"Committed:\n{result.stdout}\n")
1052
+ except subprocess.CalledProcessError as exc:
1053
+ if len(exc.stderr) > 0:
1054
+ raise EnvironmentError(exc.stderr)
1055
+ else:
1056
+ raise EnvironmentError(exc.stdout)
1057
+
1058
+ def git_push(
1059
+ self,
1060
+ upstream: Optional[str] = None,
1061
+ blocking: bool = True,
1062
+ auto_lfs_prune: bool = False,
1063
+ ) -> Union[str, Tuple[str, CommandInProgress]]:
1064
+ """
1065
+ git push
1066
+
1067
+ If used without setting `blocking`, will return url to commit on remote
1068
+ repo. If used with `blocking=True`, will return a tuple containing the
1069
+ url to commit and the command object to follow for information about the
1070
+ process.
1071
+
1072
+ Args:
1073
+ upstream (`str`, *optional*):
1074
+ Upstream to which this should push. If not specified, will push
1075
+ to the lastly defined upstream or to the default one (`origin
1076
+ main`).
1077
+ blocking (`bool`, *optional*, defaults to `True`):
1078
+ Whether the function should return only when the push has
1079
+ finished. Setting this to `False` will return an
1080
+ `CommandInProgress` object which has an `is_done` property. This
1081
+ property will be set to `True` when the push is finished.
1082
+ auto_lfs_prune (`bool`, *optional*, defaults to `False`):
1083
+ Whether to automatically prune files once they have been pushed
1084
+ to the remote.
1085
+ """
1086
+ command = "git push"
1087
+
1088
+ if upstream:
1089
+ command += f" --set-upstream {upstream}"
1090
+
1091
+ number_of_commits = commits_to_push(self.local_dir, upstream)
1092
+
1093
+ if number_of_commits > 1:
1094
+ logger.warning(f"Several commits ({number_of_commits}) will be pushed upstream.")
1095
+ if blocking:
1096
+ logger.warning("The progress bars may be unreliable.")
1097
+
1098
+ try:
1099
+ with _lfs_log_progress():
1100
+ process = subprocess.Popen(
1101
+ command.split(),
1102
+ stderr=subprocess.PIPE,
1103
+ stdout=subprocess.PIPE,
1104
+ encoding="utf-8",
1105
+ cwd=self.local_dir,
1106
+ )
1107
+
1108
+ if blocking:
1109
+ stdout, stderr = process.communicate()
1110
+ return_code = process.poll()
1111
+ process.kill()
1112
+
1113
+ if len(stderr):
1114
+ logger.warning(stderr)
1115
+
1116
+ if return_code:
1117
+ raise subprocess.CalledProcessError(return_code, process.args, output=stdout, stderr=stderr)
1118
+
1119
+ except subprocess.CalledProcessError as exc:
1120
+ raise EnvironmentError(exc.stderr)
1121
+
1122
+ if not blocking:
1123
+
1124
+ def status_method():
1125
+ status = process.poll()
1126
+ if status is None:
1127
+ return -1
1128
+ else:
1129
+ return status
1130
+
1131
+ command_in_progress = CommandInProgress(
1132
+ "push",
1133
+ is_done_method=lambda: process.poll() is not None,
1134
+ status_method=status_method,
1135
+ process=process,
1136
+ post_method=self.lfs_prune if auto_lfs_prune else None,
1137
+ )
1138
+
1139
+ self.command_queue.append(command_in_progress)
1140
+
1141
+ return self.git_head_commit_url(), command_in_progress
1142
+
1143
+ if auto_lfs_prune:
1144
+ self.lfs_prune()
1145
+
1146
+ return self.git_head_commit_url()
1147
+
1148
+ def git_checkout(self, revision: str, create_branch_ok: bool = False):
1149
+ """
1150
+ git checkout a given revision
1151
+
1152
+ Specifying `create_branch_ok` to `True` will create the branch to the
1153
+ given revision if that revision doesn't exist.
1154
+
1155
+ Args:
1156
+ revision (`str`):
1157
+ The revision to checkout.
1158
+ create_branch_ok (`str`, *optional*, defaults to `False`):
1159
+ Whether creating a branch named with the `revision` passed at
1160
+ the current checked-out reference if `revision` isn't an
1161
+ existing revision is allowed.
1162
+ """
1163
+ try:
1164
+ result = run_subprocess(f"git checkout {revision}", self.local_dir)
1165
+ logger.warning(f"Checked out {revision} from {self.current_branch}.")
1166
+ logger.warning(result.stdout)
1167
+ except subprocess.CalledProcessError as exc:
1168
+ if not create_branch_ok:
1169
+ raise EnvironmentError(exc.stderr)
1170
+ else:
1171
+ try:
1172
+ result = run_subprocess(f"git checkout -b {revision}", self.local_dir)
1173
+ logger.warning(
1174
+ f"Revision `{revision}` does not exist. Created and checked out branch `{revision}`."
1175
+ )
1176
+ logger.warning(result.stdout)
1177
+ except subprocess.CalledProcessError as exc:
1178
+ raise EnvironmentError(exc.stderr)
1179
+
1180
+ def tag_exists(self, tag_name: str, remote: Optional[str] = None) -> bool:
1181
+ """
1182
+ Check if a tag exists or not.
1183
+
1184
+ Args:
1185
+ tag_name (`str`):
1186
+ The name of the tag to check.
1187
+ remote (`str`, *optional*):
1188
+ Whether to check if the tag exists on a remote. This parameter
1189
+ should be the identifier of the remote.
1190
+
1191
+ Returns:
1192
+ `bool`: Whether the tag exists.
1193
+ """
1194
+ if remote:
1195
+ try:
1196
+ result = run_subprocess(f"git ls-remote origin refs/tags/{tag_name}", self.local_dir).stdout.strip()
1197
+ except subprocess.CalledProcessError as exc:
1198
+ raise EnvironmentError(exc.stderr)
1199
+
1200
+ return len(result) != 0
1201
+ else:
1202
+ try:
1203
+ git_tags = run_subprocess("git tag", self.local_dir).stdout.strip()
1204
+ except subprocess.CalledProcessError as exc:
1205
+ raise EnvironmentError(exc.stderr)
1206
+
1207
+ git_tags = git_tags.split("\n")
1208
+ return tag_name in git_tags
1209
+
1210
+ def delete_tag(self, tag_name: str, remote: Optional[str] = None) -> bool:
1211
+ """
1212
+ Delete a tag, both local and remote, if it exists
1213
+
1214
+ Args:
1215
+ tag_name (`str`):
1216
+ The tag name to delete.
1217
+ remote (`str`, *optional*):
1218
+ The remote on which to delete the tag.
1219
+
1220
+ Returns:
1221
+ `bool`: `True` if deleted, `False` if the tag didn't exist.
1222
+ If remote is not passed, will just be updated locally
1223
+ """
1224
+ delete_locally = True
1225
+ delete_remotely = True
1226
+
1227
+ if not self.tag_exists(tag_name):
1228
+ delete_locally = False
1229
+
1230
+ if not self.tag_exists(tag_name, remote=remote):
1231
+ delete_remotely = False
1232
+
1233
+ if delete_locally:
1234
+ try:
1235
+ run_subprocess(["git", "tag", "-d", tag_name], self.local_dir).stdout.strip()
1236
+ except subprocess.CalledProcessError as exc:
1237
+ raise EnvironmentError(exc.stderr)
1238
+
1239
+ if remote and delete_remotely:
1240
+ try:
1241
+ run_subprocess(f"git push {remote} --delete {tag_name}", self.local_dir).stdout.strip()
1242
+ except subprocess.CalledProcessError as exc:
1243
+ raise EnvironmentError(exc.stderr)
1244
+
1245
+ return True
1246
+
1247
+ def add_tag(self, tag_name: str, message: Optional[str] = None, remote: Optional[str] = None):
1248
+ """
1249
+ Add a tag at the current head and push it
1250
+
1251
+ If remote is None, will just be updated locally
1252
+
1253
+ If no message is provided, the tag will be lightweight. if a message is
1254
+ provided, the tag will be annotated.
1255
+
1256
+ Args:
1257
+ tag_name (`str`):
1258
+ The name of the tag to be added.
1259
+ message (`str`, *optional*):
1260
+ The message that accompanies the tag. The tag will turn into an
1261
+ annotated tag if a message is passed.
1262
+ remote (`str`, *optional*):
1263
+ The remote on which to add the tag.
1264
+ """
1265
+ if message:
1266
+ tag_args = ["git", "tag", "-a", tag_name, "-m", message]
1267
+ else:
1268
+ tag_args = ["git", "tag", tag_name]
1269
+
1270
+ try:
1271
+ run_subprocess(tag_args, self.local_dir).stdout.strip()
1272
+ except subprocess.CalledProcessError as exc:
1273
+ raise EnvironmentError(exc.stderr)
1274
+
1275
+ if remote:
1276
+ try:
1277
+ run_subprocess(f"git push {remote} {tag_name}", self.local_dir).stdout.strip()
1278
+ except subprocess.CalledProcessError as exc:
1279
+ raise EnvironmentError(exc.stderr)
1280
+
1281
+ def is_repo_clean(self) -> bool:
1282
+ """
1283
+ Return whether or not the git status is clean or not
1284
+
1285
+ Returns:
1286
+ `bool`: `True` if the git status is clean, `False` otherwise.
1287
+ """
1288
+ try:
1289
+ git_status = run_subprocess("git status --porcelain", self.local_dir).stdout.strip()
1290
+ except subprocess.CalledProcessError as exc:
1291
+ raise EnvironmentError(exc.stderr)
1292
+
1293
+ return len(git_status) == 0
1294
+
1295
+ def push_to_hub(
1296
+ self,
1297
+ commit_message: str = "commit files to HF hub",
1298
+ blocking: bool = True,
1299
+ clean_ok: bool = True,
1300
+ auto_lfs_prune: bool = False,
1301
+ ) -> Union[None, str, Tuple[str, CommandInProgress]]:
1302
+ """
1303
+ Helper to add, commit, and push files to remote repository on the
1304
+ HuggingFace Hub. Will automatically track large files (>10MB).
1305
+
1306
+ Args:
1307
+ commit_message (`str`):
1308
+ Message to use for the commit.
1309
+ blocking (`bool`, *optional*, defaults to `True`):
1310
+ Whether the function should return only when the `git push` has
1311
+ finished.
1312
+ clean_ok (`bool`, *optional*, defaults to `True`):
1313
+ If True, this function will return None if the repo is
1314
+ untouched. Default behavior is to fail because the git command
1315
+ fails.
1316
+ auto_lfs_prune (`bool`, *optional*, defaults to `False`):
1317
+ Whether to automatically prune files once they have been pushed
1318
+ to the remote.
1319
+ """
1320
+ if clean_ok and self.is_repo_clean():
1321
+ logger.info("Repo currently clean. Ignoring push_to_hub")
1322
+ return None
1323
+ self.git_add(auto_lfs_track=True)
1324
+ self.git_commit(commit_message)
1325
+ return self.git_push(
1326
+ upstream=f"origin {self.current_branch}",
1327
+ blocking=blocking,
1328
+ auto_lfs_prune=auto_lfs_prune,
1329
+ )
1330
+
1331
+ @contextmanager
1332
+ def commit(
1333
+ self,
1334
+ commit_message: str,
1335
+ branch: Optional[str] = None,
1336
+ track_large_files: bool = True,
1337
+ blocking: bool = True,
1338
+ auto_lfs_prune: bool = False,
1339
+ ):
1340
+ """
1341
+ Context manager utility to handle committing to a repository. This
1342
+ automatically tracks large files (>10Mb) with git-lfs. Set the
1343
+ `track_large_files` argument to `False` if you wish to ignore that
1344
+ behavior.
1345
+
1346
+ Args:
1347
+ commit_message (`str`):
1348
+ Message to use for the commit.
1349
+ branch (`str`, *optional*):
1350
+ The branch on which the commit will appear. This branch will be
1351
+ checked-out before any operation.
1352
+ track_large_files (`bool`, *optional*, defaults to `True`):
1353
+ Whether to automatically track large files or not. Will do so by
1354
+ default.
1355
+ blocking (`bool`, *optional*, defaults to `True`):
1356
+ Whether the function should return only when the `git push` has
1357
+ finished.
1358
+ auto_lfs_prune (`bool`, defaults to `True`):
1359
+ Whether to automatically prune files once they have been pushed
1360
+ to the remote.
1361
+
1362
+ Examples:
1363
+
1364
+ ```python
1365
+ >>> with Repository(
1366
+ ... "text-files",
1367
+ ... clone_from="<user>/text-files",
1368
+ ... token=True,
1369
+ >>> ).commit("My first file :)"):
1370
+ ... with open("file.txt", "w+") as f:
1371
+ ... f.write(json.dumps({"hey": 8}))
1372
+
1373
+ >>> import torch
1374
+
1375
+ >>> model = torch.nn.Transformer()
1376
+ >>> with Repository(
1377
+ ... "torch-model",
1378
+ ... clone_from="<user>/torch-model",
1379
+ ... token=True,
1380
+ >>> ).commit("My cool model :)"):
1381
+ ... torch.save(model.state_dict(), "model.pt")
1382
+ ```
1383
+
1384
+ """
1385
+
1386
+ files_to_stage = files_to_be_staged(".", folder=self.local_dir)
1387
+
1388
+ if len(files_to_stage):
1389
+ files_in_msg = str(files_to_stage[:5])[:-1] + ", ...]" if len(files_to_stage) > 5 else str(files_to_stage)
1390
+ logger.error(
1391
+ "There exists some updated files in the local repository that are not"
1392
+ f" committed: {files_in_msg}. This may lead to errors if checking out"
1393
+ " a branch. These files and their modifications will be added to the"
1394
+ " current commit."
1395
+ )
1396
+
1397
+ if branch is not None:
1398
+ self.git_checkout(branch, create_branch_ok=True)
1399
+
1400
+ if is_tracked_upstream(self.local_dir):
1401
+ logger.warning("Pulling changes ...")
1402
+ self.git_pull(rebase=True)
1403
+ else:
1404
+ logger.warning(f"The current branch has no upstream branch. Will push to 'origin {self.current_branch}'")
1405
+
1406
+ current_working_directory = os.getcwd()
1407
+ os.chdir(os.path.join(current_working_directory, self.local_dir))
1408
+
1409
+ try:
1410
+ yield self
1411
+ finally:
1412
+ self.git_add(auto_lfs_track=track_large_files)
1413
+
1414
+ try:
1415
+ self.git_commit(commit_message)
1416
+ except OSError as e:
1417
+ # If no changes are detected, there is nothing to commit.
1418
+ if "nothing to commit" not in str(e):
1419
+ raise e
1420
+
1421
+ try:
1422
+ self.git_push(
1423
+ upstream=f"origin {self.current_branch}",
1424
+ blocking=blocking,
1425
+ auto_lfs_prune=auto_lfs_prune,
1426
+ )
1427
+ except OSError as e:
1428
+ # If no changes are detected, there is nothing to commit.
1429
+ if "could not read Username" in str(e):
1430
+ raise OSError("Couldn't authenticate user for push. Did you set `token` to `True`?") from e
1431
+ else:
1432
+ raise e
1433
+
1434
+ os.chdir(current_working_directory)
1435
+
1436
+ def repocard_metadata_load(self) -> Optional[Dict]:
1437
+ filepath = os.path.join(self.local_dir, constants.REPOCARD_NAME)
1438
+ if os.path.isfile(filepath):
1439
+ return metadata_load(filepath)
1440
+ return None
1441
+
1442
+ def repocard_metadata_save(self, data: Dict) -> None:
1443
+ return metadata_save(os.path.join(self.local_dir, constants.REPOCARD_NAME), data)
1444
+
1445
+ @property
1446
+ def commands_failed(self):
1447
+ """
1448
+ Returns the asynchronous commands that failed.
1449
+ """
1450
+ return [c for c in self.command_queue if c.status > 0]
1451
+
1452
+ @property
1453
+ def commands_in_progress(self):
1454
+ """
1455
+ Returns the asynchronous commands that are currently in progress.
1456
+ """
1457
+ return [c for c in self.command_queue if not c.is_done]
1458
+
1459
+ def wait_for_commands(self):
1460
+ """
1461
+ Blocking method: blocks all subsequent execution until all commands have
1462
+ been processed.
1463
+ """
1464
+ index = 0
1465
+ for command_failed in self.commands_failed:
1466
+ logger.error(f"The {command_failed.title} command with PID {command_failed._process.pid} failed.")
1467
+ logger.error(command_failed.stderr)
1468
+
1469
+ while self.commands_in_progress:
1470
+ if index % 10 == 0:
1471
+ logger.warning(
1472
+ f"Waiting for the following commands to finish before shutting down: {self.commands_in_progress}."
1473
+ )
1474
+
1475
+ index += 1
1476
+
1477
+ time.sleep(1)
meow/lib/python3.13/site-packages/idna-3.10.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
meow/lib/python3.13/site-packages/idna-3.10.dist-info/LICENSE.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2013-2024, Kim Davies and contributors.
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are
8
+ met:
9
+
10
+ 1. Redistributions of source code must retain the above copyright
11
+ notice, this list of conditions and the following disclaimer.
12
+
13
+ 2. Redistributions in binary form must reproduce the above copyright
14
+ notice, this list of conditions and the following disclaimer in the
15
+ documentation and/or other materials provided with the distribution.
16
+
17
+ 3. Neither the name of the copyright holder nor the names of its
18
+ contributors may be used to endorse or promote products derived from
19
+ this software without specific prior written permission.
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
meow/lib/python3.13/site-packages/idna-3.10.dist-info/METADATA ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: idna
3
+ Version: 3.10
4
+ Summary: Internationalized Domain Names in Applications (IDNA)
5
+ Author-email: Kim Davies <[email protected]>
6
+ Requires-Python: >=3.6
7
+ Description-Content-Type: text/x-rst
8
+ Classifier: Development Status :: 5 - Production/Stable
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: System Administrators
11
+ Classifier: License :: OSI Approved :: BSD License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.6
17
+ Classifier: Programming Language :: Python :: 3.7
18
+ Classifier: Programming Language :: Python :: 3.8
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: 3.13
24
+ Classifier: Programming Language :: Python :: Implementation :: CPython
25
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
26
+ Classifier: Topic :: Internet :: Name Service (DNS)
27
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
+ Classifier: Topic :: Utilities
29
+ Requires-Dist: ruff >= 0.6.2 ; extra == "all"
30
+ Requires-Dist: mypy >= 1.11.2 ; extra == "all"
31
+ Requires-Dist: pytest >= 8.3.2 ; extra == "all"
32
+ Requires-Dist: flake8 >= 7.1.1 ; extra == "all"
33
+ Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst
34
+ Project-URL: Issue tracker, https://github.com/kjd/idna/issues
35
+ Project-URL: Source, https://github.com/kjd/idna
36
+ Provides-Extra: all
37
+
38
+ Internationalized Domain Names in Applications (IDNA)
39
+ =====================================================
40
+
41
+ Support for the Internationalized Domain Names in
42
+ Applications (IDNA) protocol as specified in `RFC 5891
43
+ <https://tools.ietf.org/html/rfc5891>`_. This is the latest version of
44
+ the protocol and is sometimes referred to as “IDNA 2008”.
45
+
46
+ This library also provides support for Unicode Technical
47
+ Standard 46, `Unicode IDNA Compatibility Processing
48
+ <https://unicode.org/reports/tr46/>`_.
49
+
50
+ This acts as a suitable replacement for the “encodings.idna”
51
+ module that comes with the Python standard library, but which
52
+ only supports the older superseded IDNA specification (`RFC 3490
53
+ <https://tools.ietf.org/html/rfc3490>`_).
54
+
55
+ Basic functions are simply executed:
56
+
57
+ .. code-block:: pycon
58
+
59
+ >>> import idna
60
+ >>> idna.encode('ドメイン.テスト')
61
+ b'xn--eckwd4c7c.xn--zckzah'
62
+ >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
63
+ ドメイン.テスト
64
+
65
+
66
+ Installation
67
+ ------------
68
+
69
+ This package is available for installation from PyPI:
70
+
71
+ .. code-block:: bash
72
+
73
+ $ python3 -m pip install idna
74
+
75
+
76
+ Usage
77
+ -----
78
+
79
+ For typical usage, the ``encode`` and ``decode`` functions will take a
80
+ domain name argument and perform a conversion to A-labels or U-labels
81
+ respectively.
82
+
83
+ .. code-block:: pycon
84
+
85
+ >>> import idna
86
+ >>> idna.encode('ドメイン.テスト')
87
+ b'xn--eckwd4c7c.xn--zckzah'
88
+ >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
89
+ ドメイン.テスト
90
+
91
+ You may use the codec encoding and decoding methods using the
92
+ ``idna.codec`` module:
93
+
94
+ .. code-block:: pycon
95
+
96
+ >>> import idna.codec
97
+ >>> print('домен.испытание'.encode('idna2008'))
98
+ b'xn--d1acufc.xn--80akhbyknj4f'
99
+ >>> print(b'xn--d1acufc.xn--80akhbyknj4f'.decode('idna2008'))
100
+ домен.испытание
101
+
102
+ Conversions can be applied at a per-label basis using the ``ulabel`` or
103
+ ``alabel`` functions if necessary:
104
+
105
+ .. code-block:: pycon
106
+
107
+ >>> idna.alabel('测试')
108
+ b'xn--0zwm56d'
109
+
110
+ Compatibility Mapping (UTS #46)
111
+ +++++++++++++++++++++++++++++++
112
+
113
+ As described in `RFC 5895 <https://tools.ietf.org/html/rfc5895>`_, the
114
+ IDNA specification does not normalize input from different potential
115
+ ways a user may input a domain name. This functionality, known as
116
+ a “mapping”, is considered by the specification to be a local
117
+ user-interface issue distinct from IDNA conversion functionality.
118
+
119
+ This library provides one such mapping that was developed by the
120
+ Unicode Consortium. Known as `Unicode IDNA Compatibility Processing
121
+ <https://unicode.org/reports/tr46/>`_, it provides for both a regular
122
+ mapping for typical applications, as well as a transitional mapping to
123
+ help migrate from older IDNA 2003 applications. Strings are
124
+ preprocessed according to Section 4.4 “Preprocessing for IDNA2008”
125
+ prior to the IDNA operations.
126
+
127
+ For example, “Königsgäßchen” is not a permissible label as *LATIN
128
+ CAPITAL LETTER K* is not allowed (nor are capital letters in general).
129
+ UTS 46 will convert this into lower case prior to applying the IDNA
130
+ conversion.
131
+
132
+ .. code-block:: pycon
133
+
134
+ >>> import idna
135
+ >>> idna.encode('Königsgäßchen')
136
+ ...
137
+ idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
138
+ >>> idna.encode('Königsgäßchen', uts46=True)
139
+ b'xn--knigsgchen-b4a3dun'
140
+ >>> print(idna.decode('xn--knigsgchen-b4a3dun'))
141
+ königsgäßchen
142
+
143
+ Transitional processing provides conversions to help transition from
144
+ the older 2003 standard to the current standard. For example, in the
145
+ original IDNA specification, the *LATIN SMALL LETTER SHARP S* (ß) was
146
+ converted into two *LATIN SMALL LETTER S* (ss), whereas in the current
147
+ IDNA specification this conversion is not performed.
148
+
149
+ .. code-block:: pycon
150
+
151
+ >>> idna.encode('Königsgäßchen', uts46=True, transitional=True)
152
+ 'xn--knigsgsschen-lcb0w'
153
+
154
+ Implementers should use transitional processing with caution, only in
155
+ rare cases where conversion from legacy labels to current labels must be
156
+ performed (i.e. IDNA implementations that pre-date 2008). For typical
157
+ applications that just need to convert labels, transitional processing
158
+ is unlikely to be beneficial and could produce unexpected incompatible
159
+ results.
160
+
161
+ ``encodings.idna`` Compatibility
162
+ ++++++++++++++++++++++++++++++++
163
+
164
+ Function calls from the Python built-in ``encodings.idna`` module are
165
+ mapped to their IDNA 2008 equivalents using the ``idna.compat`` module.
166
+ Simply substitute the ``import`` clause in your code to refer to the new
167
+ module name.
168
+
169
+ Exceptions
170
+ ----------
171
+
172
+ All errors raised during the conversion following the specification
173
+ should raise an exception derived from the ``idna.IDNAError`` base
174
+ class.
175
+
176
+ More specific exceptions that may be generated as ``idna.IDNABidiError``
177
+ when the error reflects an illegal combination of left-to-right and
178
+ right-to-left characters in a label; ``idna.InvalidCodepoint`` when
179
+ a specific codepoint is an illegal character in an IDN label (i.e.
180
+ INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is
181
+ illegal based on its positional context (i.e. it is CONTEXTO or CONTEXTJ
182
+ but the contextual requirements are not satisfied.)
183
+
184
+ Building and Diagnostics
185
+ ------------------------
186
+
187
+ The IDNA and UTS 46 functionality relies upon pre-calculated lookup
188
+ tables for performance. These tables are derived from computing against
189
+ eligibility criteria in the respective standards. These tables are
190
+ computed using the command-line script ``tools/idna-data``.
191
+
192
+ This tool will fetch relevant codepoint data from the Unicode repository
193
+ and perform the required calculations to identify eligibility. There are
194
+ three main modes:
195
+
196
+ * ``idna-data make-libdata``. Generates ``idnadata.py`` and
197
+ ``uts46data.py``, the pre-calculated lookup tables used for IDNA and
198
+ UTS 46 conversions. Implementers who wish to track this library against
199
+ a different Unicode version may use this tool to manually generate a
200
+ different version of the ``idnadata.py`` and ``uts46data.py`` files.
201
+
202
+ * ``idna-data make-table``. Generate a table of the IDNA disposition
203
+ (e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix
204
+ B.1 of RFC 5892 and the pre-computed tables published by `IANA
205
+ <https://www.iana.org/>`_.
206
+
207
+ * ``idna-data U+0061``. Prints debugging output on the various
208
+ properties associated with an individual Unicode codepoint (in this
209
+ case, U+0061), that are used to assess the IDNA and UTS 46 status of a
210
+ codepoint. This is helpful in debugging or analysis.
211
+
212
+ The tool accepts a number of arguments, described using ``idna-data
213
+ -h``. Most notably, the ``--version`` argument allows the specification
214
+ of the version of Unicode to be used in computing the table data. For
215
+ example, ``idna-data --version 9.0.0 make-libdata`` will generate
216
+ library data against Unicode 9.0.0.
217
+
218
+
219
+ Additional Notes
220
+ ----------------
221
+
222
+ * **Packages**. The latest tagged release version is published in the
223
+ `Python Package Index <https://pypi.org/project/idna/>`_.
224
+
225
+ * **Version support**. This library supports Python 3.6 and higher.
226
+ As this library serves as a low-level toolkit for a variety of
227
+ applications, many of which strive for broad compatibility with older
228
+ Python versions, there is no rush to remove older interpreter support.
229
+ Removing support for older versions should be well justified in that the
230
+ maintenance burden has become too high.
231
+
232
+ * **Python 2**. Python 2 is supported by version 2.x of this library.
233
+ Use "idna<3" in your requirements file if you need this library for
234
+ a Python 2 application. Be advised that these versions are no longer
235
+ actively developed.
236
+
237
+ * **Testing**. The library has a test suite based on each rule of the
238
+ IDNA specification, as well as tests that are provided as part of the
239
+ Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing
240
+ <https://unicode.org/reports/tr46/>`_.
241
+
242
+ * **Emoji**. It is an occasional request to support emoji domains in
243
+ this library. Encoding of symbols like emoji is expressly prohibited by
244
+ the technical standard IDNA 2008 and emoji domains are broadly phased
245
+ out across the domain industry due to associated security risks. For
246
+ now, applications that need to support these non-compliant labels
247
+ may wish to consider trying the encode/decode operation in this library
248
+ first, and then falling back to using `encodings.idna`. See `the Github
249
+ project <https://github.com/kjd/idna/issues/18>`_ for more discussion.
250
+
meow/lib/python3.13/site-packages/idna-3.10.dist-info/RECORD ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ idna-3.10.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ idna-3.10.dist-info/LICENSE.md,sha256=pZ8LDvNjWHQQmkRhykT_enDVBpboFHZ7-vch1Mmw2w8,1541
3
+ idna-3.10.dist-info/METADATA,sha256=URR5ZyDfQ1PCEGhkYoojqfi2Ra0tau2--lhwG4XSfjI,10158
4
+ idna-3.10.dist-info/RECORD,,
5
+ idna-3.10.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
6
+ idna/__init__.py,sha256=MPqNDLZbXqGaNdXxAFhiqFPKEQXju2jNQhCey6-5eJM,868
7
+ idna/__pycache__/__init__.cpython-313.pyc,,
8
+ idna/__pycache__/codec.cpython-313.pyc,,
9
+ idna/__pycache__/compat.cpython-313.pyc,,
10
+ idna/__pycache__/core.cpython-313.pyc,,
11
+ idna/__pycache__/idnadata.cpython-313.pyc,,
12
+ idna/__pycache__/intranges.cpython-313.pyc,,
13
+ idna/__pycache__/package_data.cpython-313.pyc,,
14
+ idna/__pycache__/uts46data.cpython-313.pyc,,
15
+ idna/codec.py,sha256=PEew3ItwzjW4hymbasnty2N2OXvNcgHB-JjrBuxHPYY,3422
16
+ idna/compat.py,sha256=RzLy6QQCdl9784aFhb2EX9EKGCJjg0P3PilGdeXXcx8,316
17
+ idna/core.py,sha256=YJYyAMnwiQEPjVC4-Fqu_p4CJ6yKKuDGmppBNQNQpFs,13239
18
+ idna/idnadata.py,sha256=W30GcIGvtOWYwAjZj4ZjuouUutC6ffgNuyjJy7fZ-lo,78306
19
+ idna/intranges.py,sha256=amUtkdhYcQG8Zr-CoMM_kVRacxkivC1WgxN1b63KKdU,1898
20
+ idna/package_data.py,sha256=q59S3OXsc5VI8j6vSD0sGBMyk6zZ4vWFREE88yCJYKs,21
21
+ idna/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ idna/uts46data.py,sha256=rt90K9J40gUSwppDPCrhjgi5AA6pWM65dEGRSf6rIhM,239289
meow/lib/python3.13/site-packages/idna-3.10.dist-info/WHEEL ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: flit 3.9.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
meow/lib/python3.13/site-packages/packaging/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is dual licensed under the terms of the Apache License, Version
2
+ # 2.0, and the BSD License. See the LICENSE file in the root of this repository
3
+ # for complete details.
4
+
5
+ __title__ = "packaging"
6
+ __summary__ = "Core utilities for Python packages"
7
+ __uri__ = "https://github.com/pypa/packaging"
8
+
9
+ __version__ = "24.2"
10
+
11
+ __author__ = "Donald Stufft and individual contributors"
12
+ __email__ = "[email protected]"
13
+
14
+ __license__ = "BSD-2-Clause or Apache-2.0"
15
+ __copyright__ = f"2014 {__author__}"
meow/lib/python3.13/site-packages/packaging/_elffile.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ELF file parser.
3
+
4
+ This provides a class ``ELFFile`` that parses an ELF executable in a similar
5
+ interface to ``ZipFile``. Only the read interface is implemented.
6
+
7
+ Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
8
+ ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import enum
14
+ import os
15
+ import struct
16
+ from typing import IO
17
+
18
+
19
+ class ELFInvalid(ValueError):
20
+ pass
21
+
22
+
23
+ class EIClass(enum.IntEnum):
24
+ C32 = 1
25
+ C64 = 2
26
+
27
+
28
+ class EIData(enum.IntEnum):
29
+ Lsb = 1
30
+ Msb = 2
31
+
32
+
33
+ class EMachine(enum.IntEnum):
34
+ I386 = 3
35
+ S390 = 22
36
+ Arm = 40
37
+ X8664 = 62
38
+ AArc64 = 183
39
+
40
+
41
+ class ELFFile:
42
+ """
43
+ Representation of an ELF executable.
44
+ """
45
+
46
+ def __init__(self, f: IO[bytes]) -> None:
47
+ self._f = f
48
+
49
+ try:
50
+ ident = self._read("16B")
51
+ except struct.error as e:
52
+ raise ELFInvalid("unable to parse identification") from e
53
+ magic = bytes(ident[:4])
54
+ if magic != b"\x7fELF":
55
+ raise ELFInvalid(f"invalid magic: {magic!r}")
56
+
57
+ self.capacity = ident[4] # Format for program header (bitness).
58
+ self.encoding = ident[5] # Data structure encoding (endianness).
59
+
60
+ try:
61
+ # e_fmt: Format for program header.
62
+ # p_fmt: Format for section header.
63
+ # p_idx: Indexes to find p_type, p_offset, and p_filesz.
64
+ e_fmt, self._p_fmt, self._p_idx = {
65
+ (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
66
+ (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
67
+ (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
68
+ (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
69
+ }[(self.capacity, self.encoding)]
70
+ except KeyError as e:
71
+ raise ELFInvalid(
72
+ f"unrecognized capacity ({self.capacity}) or "
73
+ f"encoding ({self.encoding})"
74
+ ) from e
75
+
76
+ try:
77
+ (
78
+ _,
79
+ self.machine, # Architecture type.
80
+ _,
81
+ _,
82
+ self._e_phoff, # Offset of program header.
83
+ _,
84
+ self.flags, # Processor-specific flags.
85
+ _,
86
+ self._e_phentsize, # Size of section.
87
+ self._e_phnum, # Number of sections.
88
+ ) = self._read(e_fmt)
89
+ except struct.error as e:
90
+ raise ELFInvalid("unable to parse machine and section information") from e
91
+
92
+ def _read(self, fmt: str) -> tuple[int, ...]:
93
+ return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
94
+
95
+ @property
96
+ def interpreter(self) -> str | None:
97
+ """
98
+ The path recorded in the ``PT_INTERP`` section header.
99
+ """
100
+ for index in range(self._e_phnum):
101
+ self._f.seek(self._e_phoff + self._e_phentsize * index)
102
+ try:
103
+ data = self._read(self._p_fmt)
104
+ except struct.error:
105
+ continue
106
+ if data[self._p_idx[0]] != 3: # Not PT_INTERP.
107
+ continue
108
+ self._f.seek(data[self._p_idx[1]])
109
+ return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
110
+ return None
meow/lib/python3.13/site-packages/packaging/_manylinux.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import collections
4
+ import contextlib
5
+ import functools
6
+ import os
7
+ import re
8
+ import sys
9
+ import warnings
10
+ from typing import Generator, Iterator, NamedTuple, Sequence
11
+
12
+ from ._elffile import EIClass, EIData, ELFFile, EMachine
13
+
14
+ EF_ARM_ABIMASK = 0xFF000000
15
+ EF_ARM_ABI_VER5 = 0x05000000
16
+ EF_ARM_ABI_FLOAT_HARD = 0x00000400
17
+
18
+
19
+ # `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
20
+ # as the type for `path` until then.
21
+ @contextlib.contextmanager
22
+ def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]:
23
+ try:
24
+ with open(path, "rb") as f:
25
+ yield ELFFile(f)
26
+ except (OSError, TypeError, ValueError):
27
+ yield None
28
+
29
+
30
+ def _is_linux_armhf(executable: str) -> bool:
31
+ # hard-float ABI can be detected from the ELF header of the running
32
+ # process
33
+ # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
34
+ with _parse_elf(executable) as f:
35
+ return (
36
+ f is not None
37
+ and f.capacity == EIClass.C32
38
+ and f.encoding == EIData.Lsb
39
+ and f.machine == EMachine.Arm
40
+ and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
41
+ and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
42
+ )
43
+
44
+
45
+ def _is_linux_i686(executable: str) -> bool:
46
+ with _parse_elf(executable) as f:
47
+ return (
48
+ f is not None
49
+ and f.capacity == EIClass.C32
50
+ and f.encoding == EIData.Lsb
51
+ and f.machine == EMachine.I386
52
+ )
53
+
54
+
55
+ def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool:
56
+ if "armv7l" in archs:
57
+ return _is_linux_armhf(executable)
58
+ if "i686" in archs:
59
+ return _is_linux_i686(executable)
60
+ allowed_archs = {
61
+ "x86_64",
62
+ "aarch64",
63
+ "ppc64",
64
+ "ppc64le",
65
+ "s390x",
66
+ "loongarch64",
67
+ "riscv64",
68
+ }
69
+ return any(arch in allowed_archs for arch in archs)
70
+
71
+
72
+ # If glibc ever changes its major version, we need to know what the last
73
+ # minor version was, so we can build the complete list of all versions.
74
+ # For now, guess what the highest minor version might be, assume it will
75
+ # be 50 for testing. Once this actually happens, update the dictionary
76
+ # with the actual value.
77
+ _LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50)
78
+
79
+
80
+ class _GLibCVersion(NamedTuple):
81
+ major: int
82
+ minor: int
83
+
84
+
85
+ def _glibc_version_string_confstr() -> str | None:
86
+ """
87
+ Primary implementation of glibc_version_string using os.confstr.
88
+ """
89
+ # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
90
+ # to be broken or missing. This strategy is used in the standard library
91
+ # platform module.
92
+ # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
93
+ try:
94
+ # Should be a string like "glibc 2.17".
95
+ version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION")
96
+ assert version_string is not None
97
+ _, version = version_string.rsplit()
98
+ except (AssertionError, AttributeError, OSError, ValueError):
99
+ # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
100
+ return None
101
+ return version
102
+
103
+
104
+ def _glibc_version_string_ctypes() -> str | None:
105
+ """
106
+ Fallback implementation of glibc_version_string using ctypes.
107
+ """
108
+ try:
109
+ import ctypes
110
+ except ImportError:
111
+ return None
112
+
113
+ # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
114
+ # manpage says, "If filename is NULL, then the returned handle is for the
115
+ # main program". This way we can let the linker do the work to figure out
116
+ # which libc our process is actually using.
117
+ #
118
+ # We must also handle the special case where the executable is not a
119
+ # dynamically linked executable. This can occur when using musl libc,
120
+ # for example. In this situation, dlopen() will error, leading to an
121
+ # OSError. Interestingly, at least in the case of musl, there is no
122
+ # errno set on the OSError. The single string argument used to construct
123
+ # OSError comes from libc itself and is therefore not portable to
124
+ # hard code here. In any case, failure to call dlopen() means we
125
+ # can proceed, so we bail on our attempt.
126
+ try:
127
+ process_namespace = ctypes.CDLL(None)
128
+ except OSError:
129
+ return None
130
+
131
+ try:
132
+ gnu_get_libc_version = process_namespace.gnu_get_libc_version
133
+ except AttributeError:
134
+ # Symbol doesn't exist -> therefore, we are not linked to
135
+ # glibc.
136
+ return None
137
+
138
+ # Call gnu_get_libc_version, which returns a string like "2.5"
139
+ gnu_get_libc_version.restype = ctypes.c_char_p
140
+ version_str: str = gnu_get_libc_version()
141
+ # py2 / py3 compatibility:
142
+ if not isinstance(version_str, str):
143
+ version_str = version_str.decode("ascii")
144
+
145
+ return version_str
146
+
147
+
148
+ def _glibc_version_string() -> str | None:
149
+ """Returns glibc version string, or None if not using glibc."""
150
+ return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
151
+
152
+
153
+ def _parse_glibc_version(version_str: str) -> tuple[int, int]:
154
+ """Parse glibc version.
155
+
156
+ We use a regexp instead of str.split because we want to discard any
157
+ random junk that might come after the minor version -- this might happen
158
+ in patched/forked versions of glibc (e.g. Linaro's version of glibc
159
+ uses version strings like "2.20-2014.11"). See gh-3588.
160
+ """
161
+ m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
162
+ if not m:
163
+ warnings.warn(
164
+ f"Expected glibc version with 2 components major.minor,"
165
+ f" got: {version_str}",
166
+ RuntimeWarning,
167
+ stacklevel=2,
168
+ )
169
+ return -1, -1
170
+ return int(m.group("major")), int(m.group("minor"))
171
+
172
+
173
+ @functools.lru_cache
174
+ def _get_glibc_version() -> tuple[int, int]:
175
+ version_str = _glibc_version_string()
176
+ if version_str is None:
177
+ return (-1, -1)
178
+ return _parse_glibc_version(version_str)
179
+
180
+
181
+ # From PEP 513, PEP 600
182
+ def _is_compatible(arch: str, version: _GLibCVersion) -> bool:
183
+ sys_glibc = _get_glibc_version()
184
+ if sys_glibc < version:
185
+ return False
186
+ # Check for presence of _manylinux module.
187
+ try:
188
+ import _manylinux
189
+ except ImportError:
190
+ return True
191
+ if hasattr(_manylinux, "manylinux_compatible"):
192
+ result = _manylinux.manylinux_compatible(version[0], version[1], arch)
193
+ if result is not None:
194
+ return bool(result)
195
+ return True
196
+ if version == _GLibCVersion(2, 5):
197
+ if hasattr(_manylinux, "manylinux1_compatible"):
198
+ return bool(_manylinux.manylinux1_compatible)
199
+ if version == _GLibCVersion(2, 12):
200
+ if hasattr(_manylinux, "manylinux2010_compatible"):
201
+ return bool(_manylinux.manylinux2010_compatible)
202
+ if version == _GLibCVersion(2, 17):
203
+ if hasattr(_manylinux, "manylinux2014_compatible"):
204
+ return bool(_manylinux.manylinux2014_compatible)
205
+ return True
206
+
207
+
208
+ _LEGACY_MANYLINUX_MAP = {
209
+ # CentOS 7 w/ glibc 2.17 (PEP 599)
210
+ (2, 17): "manylinux2014",
211
+ # CentOS 6 w/ glibc 2.12 (PEP 571)
212
+ (2, 12): "manylinux2010",
213
+ # CentOS 5 w/ glibc 2.5 (PEP 513)
214
+ (2, 5): "manylinux1",
215
+ }
216
+
217
+
218
+ def platform_tags(archs: Sequence[str]) -> Iterator[str]:
219
+ """Generate manylinux tags compatible to the current platform.
220
+
221
+ :param archs: Sequence of compatible architectures.
222
+ The first one shall be the closest to the actual architecture and be the part of
223
+ platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
224
+ The ``linux_`` prefix is assumed as a prerequisite for the current platform to
225
+ be manylinux-compatible.
226
+
227
+ :returns: An iterator of compatible manylinux tags.
228
+ """
229
+ if not _have_compatible_abi(sys.executable, archs):
230
+ return
231
+ # Oldest glibc to be supported regardless of architecture is (2, 17).
232
+ too_old_glibc2 = _GLibCVersion(2, 16)
233
+ if set(archs) & {"x86_64", "i686"}:
234
+ # On x86/i686 also oldest glibc to be supported is (2, 5).
235
+ too_old_glibc2 = _GLibCVersion(2, 4)
236
+ current_glibc = _GLibCVersion(*_get_glibc_version())
237
+ glibc_max_list = [current_glibc]
238
+ # We can assume compatibility across glibc major versions.
239
+ # https://sourceware.org/bugzilla/show_bug.cgi?id=24636
240
+ #
241
+ # Build a list of maximum glibc versions so that we can
242
+ # output the canonical list of all glibc from current_glibc
243
+ # down to too_old_glibc2, including all intermediary versions.
244
+ for glibc_major in range(current_glibc.major - 1, 1, -1):
245
+ glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
246
+ glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
247
+ for arch in archs:
248
+ for glibc_max in glibc_max_list:
249
+ if glibc_max.major == too_old_glibc2.major:
250
+ min_minor = too_old_glibc2.minor
251
+ else:
252
+ # For other glibc major versions oldest supported is (x, 0).
253
+ min_minor = -1
254
+ for glibc_minor in range(glibc_max.minor, min_minor, -1):
255
+ glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
256
+ tag = "manylinux_{}_{}".format(*glibc_version)
257
+ if _is_compatible(arch, glibc_version):
258
+ yield f"{tag}_{arch}"
259
+ # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
260
+ if glibc_version in _LEGACY_MANYLINUX_MAP:
261
+ legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
262
+ if _is_compatible(arch, glibc_version):
263
+ yield f"{legacy_tag}_{arch}"
meow/lib/python3.13/site-packages/packaging/_musllinux.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """PEP 656 support.
2
+
3
+ This module implements logic to detect if the currently running Python is
4
+ linked against musl, and what musl version is used.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import functools
10
+ import re
11
+ import subprocess
12
+ import sys
13
+ from typing import Iterator, NamedTuple, Sequence
14
+
15
+ from ._elffile import ELFFile
16
+
17
+
18
+ class _MuslVersion(NamedTuple):
19
+ major: int
20
+ minor: int
21
+
22
+
23
+ def _parse_musl_version(output: str) -> _MuslVersion | None:
24
+ lines = [n for n in (n.strip() for n in output.splitlines()) if n]
25
+ if len(lines) < 2 or lines[0][:4] != "musl":
26
+ return None
27
+ m = re.match(r"Version (\d+)\.(\d+)", lines[1])
28
+ if not m:
29
+ return None
30
+ return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
31
+
32
+
33
+ @functools.lru_cache
34
+ def _get_musl_version(executable: str) -> _MuslVersion | None:
35
+ """Detect currently-running musl runtime version.
36
+
37
+ This is done by checking the specified executable's dynamic linking
38
+ information, and invoking the loader to parse its output for a version
39
+ string. If the loader is musl, the output would be something like::
40
+
41
+ musl libc (x86_64)
42
+ Version 1.2.2
43
+ Dynamic Program Loader
44
+ """
45
+ try:
46
+ with open(executable, "rb") as f:
47
+ ld = ELFFile(f).interpreter
48
+ except (OSError, TypeError, ValueError):
49
+ return None
50
+ if ld is None or "musl" not in ld:
51
+ return None
52
+ proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True)
53
+ return _parse_musl_version(proc.stderr)
54
+
55
+
56
+ def platform_tags(archs: Sequence[str]) -> Iterator[str]:
57
+ """Generate musllinux tags compatible to the current platform.
58
+
59
+ :param archs: Sequence of compatible architectures.
60
+ The first one shall be the closest to the actual architecture and be the part of
61
+ platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
62
+ The ``linux_`` prefix is assumed as a prerequisite for the current platform to
63
+ be musllinux-compatible.
64
+
65
+ :returns: An iterator of compatible musllinux tags.
66
+ """
67
+ sys_musl = _get_musl_version(sys.executable)
68
+ if sys_musl is None: # Python not dynamically linked against musl.
69
+ return
70
+ for arch in archs:
71
+ for minor in range(sys_musl.minor, -1, -1):
72
+ yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
73
+
74
+
75
+ if __name__ == "__main__": # pragma: no cover
76
+ import sysconfig
77
+
78
+ plat = sysconfig.get_platform()
79
+ assert plat.startswith("linux-"), "not linux"
80
+
81
+ print("plat:", plat)
82
+ print("musl:", _get_musl_version(sys.executable))
83
+ print("tags:", end=" ")
84
+ for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])):
85
+ print(t, end="\n ")
meow/lib/python3.13/site-packages/packaging/_parser.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Handwritten parser of dependency specifiers.
2
+
3
+ The docstring for each __parse_* function contains EBNF-inspired grammar representing
4
+ the implementation.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import ast
10
+ from typing import NamedTuple, Sequence, Tuple, Union
11
+
12
+ from ._tokenizer import DEFAULT_RULES, Tokenizer
13
+
14
+
15
+ class Node:
16
+ def __init__(self, value: str) -> None:
17
+ self.value = value
18
+
19
+ def __str__(self) -> str:
20
+ return self.value
21
+
22
+ def __repr__(self) -> str:
23
+ return f"<{self.__class__.__name__}('{self}')>"
24
+
25
+ def serialize(self) -> str:
26
+ raise NotImplementedError
27
+
28
+
29
+ class Variable(Node):
30
+ def serialize(self) -> str:
31
+ return str(self)
32
+
33
+
34
+ class Value(Node):
35
+ def serialize(self) -> str:
36
+ return f'"{self}"'
37
+
38
+
39
+ class Op(Node):
40
+ def serialize(self) -> str:
41
+ return str(self)
42
+
43
+
44
+ MarkerVar = Union[Variable, Value]
45
+ MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
46
+ MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
47
+ MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
48
+
49
+
50
+ class ParsedRequirement(NamedTuple):
51
+ name: str
52
+ url: str
53
+ extras: list[str]
54
+ specifier: str
55
+ marker: MarkerList | None
56
+
57
+
58
+ # --------------------------------------------------------------------------------------
59
+ # Recursive descent parser for dependency specifier
60
+ # --------------------------------------------------------------------------------------
61
+ def parse_requirement(source: str) -> ParsedRequirement:
62
+ return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
63
+
64
+
65
+ def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
66
+ """
67
+ requirement = WS? IDENTIFIER WS? extras WS? requirement_details
68
+ """
69
+ tokenizer.consume("WS")
70
+
71
+ name_token = tokenizer.expect(
72
+ "IDENTIFIER", expected="package name at the start of dependency specifier"
73
+ )
74
+ name = name_token.text
75
+ tokenizer.consume("WS")
76
+
77
+ extras = _parse_extras(tokenizer)
78
+ tokenizer.consume("WS")
79
+
80
+ url, specifier, marker = _parse_requirement_details(tokenizer)
81
+ tokenizer.expect("END", expected="end of dependency specifier")
82
+
83
+ return ParsedRequirement(name, url, extras, specifier, marker)
84
+
85
+
86
+ def _parse_requirement_details(
87
+ tokenizer: Tokenizer,
88
+ ) -> tuple[str, str, MarkerList | None]:
89
+ """
90
+ requirement_details = AT URL (WS requirement_marker?)?
91
+ | specifier WS? (requirement_marker)?
92
+ """
93
+
94
+ specifier = ""
95
+ url = ""
96
+ marker = None
97
+
98
+ if tokenizer.check("AT"):
99
+ tokenizer.read()
100
+ tokenizer.consume("WS")
101
+
102
+ url_start = tokenizer.position
103
+ url = tokenizer.expect("URL", expected="URL after @").text
104
+ if tokenizer.check("END", peek=True):
105
+ return (url, specifier, marker)
106
+
107
+ tokenizer.expect("WS", expected="whitespace after URL")
108
+
109
+ # The input might end after whitespace.
110
+ if tokenizer.check("END", peek=True):
111
+ return (url, specifier, marker)
112
+
113
+ marker = _parse_requirement_marker(
114
+ tokenizer, span_start=url_start, after="URL and whitespace"
115
+ )
116
+ else:
117
+ specifier_start = tokenizer.position
118
+ specifier = _parse_specifier(tokenizer)
119
+ tokenizer.consume("WS")
120
+
121
+ if tokenizer.check("END", peek=True):
122
+ return (url, specifier, marker)
123
+
124
+ marker = _parse_requirement_marker(
125
+ tokenizer,
126
+ span_start=specifier_start,
127
+ after=(
128
+ "version specifier"
129
+ if specifier
130
+ else "name and no valid version specifier"
131
+ ),
132
+ )
133
+
134
+ return (url, specifier, marker)
135
+
136
+
137
+ def _parse_requirement_marker(
138
+ tokenizer: Tokenizer, *, span_start: int, after: str
139
+ ) -> MarkerList:
140
+ """
141
+ requirement_marker = SEMICOLON marker WS?
142
+ """
143
+
144
+ if not tokenizer.check("SEMICOLON"):
145
+ tokenizer.raise_syntax_error(
146
+ f"Expected end or semicolon (after {after})",
147
+ span_start=span_start,
148
+ )
149
+ tokenizer.read()
150
+
151
+ marker = _parse_marker(tokenizer)
152
+ tokenizer.consume("WS")
153
+
154
+ return marker
155
+
156
+
157
+ def _parse_extras(tokenizer: Tokenizer) -> list[str]:
158
+ """
159
+ extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
160
+ """
161
+ if not tokenizer.check("LEFT_BRACKET", peek=True):
162
+ return []
163
+
164
+ with tokenizer.enclosing_tokens(
165
+ "LEFT_BRACKET",
166
+ "RIGHT_BRACKET",
167
+ around="extras",
168
+ ):
169
+ tokenizer.consume("WS")
170
+ extras = _parse_extras_list(tokenizer)
171
+ tokenizer.consume("WS")
172
+
173
+ return extras
174
+
175
+
176
+ def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
177
+ """
178
+ extras_list = identifier (wsp* ',' wsp* identifier)*
179
+ """
180
+ extras: list[str] = []
181
+
182
+ if not tokenizer.check("IDENTIFIER"):
183
+ return extras
184
+
185
+ extras.append(tokenizer.read().text)
186
+
187
+ while True:
188
+ tokenizer.consume("WS")
189
+ if tokenizer.check("IDENTIFIER", peek=True):
190
+ tokenizer.raise_syntax_error("Expected comma between extra names")
191
+ elif not tokenizer.check("COMMA"):
192
+ break
193
+
194
+ tokenizer.read()
195
+ tokenizer.consume("WS")
196
+
197
+ extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
198
+ extras.append(extra_token.text)
199
+
200
+ return extras
201
+
202
+
203
+ def _parse_specifier(tokenizer: Tokenizer) -> str:
204
+ """
205
+ specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
206
+ | WS? version_many WS?
207
+ """
208
+ with tokenizer.enclosing_tokens(
209
+ "LEFT_PARENTHESIS",
210
+ "RIGHT_PARENTHESIS",
211
+ around="version specifier",
212
+ ):
213
+ tokenizer.consume("WS")
214
+ parsed_specifiers = _parse_version_many(tokenizer)
215
+ tokenizer.consume("WS")
216
+
217
+ return parsed_specifiers
218
+
219
+
220
+ def _parse_version_many(tokenizer: Tokenizer) -> str:
221
+ """
222
+ version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
223
+ """
224
+ parsed_specifiers = ""
225
+ while tokenizer.check("SPECIFIER"):
226
+ span_start = tokenizer.position
227
+ parsed_specifiers += tokenizer.read().text
228
+ if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
229
+ tokenizer.raise_syntax_error(
230
+ ".* suffix can only be used with `==` or `!=` operators",
231
+ span_start=span_start,
232
+ span_end=tokenizer.position + 1,
233
+ )
234
+ if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
235
+ tokenizer.raise_syntax_error(
236
+ "Local version label can only be used with `==` or `!=` operators",
237
+ span_start=span_start,
238
+ span_end=tokenizer.position,
239
+ )
240
+ tokenizer.consume("WS")
241
+ if not tokenizer.check("COMMA"):
242
+ break
243
+ parsed_specifiers += tokenizer.read().text
244
+ tokenizer.consume("WS")
245
+
246
+ return parsed_specifiers
247
+
248
+
249
+ # --------------------------------------------------------------------------------------
250
+ # Recursive descent parser for marker expression
251
+ # --------------------------------------------------------------------------------------
252
+ def parse_marker(source: str) -> MarkerList:
253
+ return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
254
+
255
+
256
+ def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
257
+ retval = _parse_marker(tokenizer)
258
+ tokenizer.expect("END", expected="end of marker expression")
259
+ return retval
260
+
261
+
262
+ def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
263
+ """
264
+ marker = marker_atom (BOOLOP marker_atom)+
265
+ """
266
+ expression = [_parse_marker_atom(tokenizer)]
267
+ while tokenizer.check("BOOLOP"):
268
+ token = tokenizer.read()
269
+ expr_right = _parse_marker_atom(tokenizer)
270
+ expression.extend((token.text, expr_right))
271
+ return expression
272
+
273
+
274
+ def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
275
+ """
276
+ marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
277
+ | WS? marker_item WS?
278
+ """
279
+
280
+ tokenizer.consume("WS")
281
+ if tokenizer.check("LEFT_PARENTHESIS", peek=True):
282
+ with tokenizer.enclosing_tokens(
283
+ "LEFT_PARENTHESIS",
284
+ "RIGHT_PARENTHESIS",
285
+ around="marker expression",
286
+ ):
287
+ tokenizer.consume("WS")
288
+ marker: MarkerAtom = _parse_marker(tokenizer)
289
+ tokenizer.consume("WS")
290
+ else:
291
+ marker = _parse_marker_item(tokenizer)
292
+ tokenizer.consume("WS")
293
+ return marker
294
+
295
+
296
+ def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
297
+ """
298
+ marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
299
+ """
300
+ tokenizer.consume("WS")
301
+ marker_var_left = _parse_marker_var(tokenizer)
302
+ tokenizer.consume("WS")
303
+ marker_op = _parse_marker_op(tokenizer)
304
+ tokenizer.consume("WS")
305
+ marker_var_right = _parse_marker_var(tokenizer)
306
+ tokenizer.consume("WS")
307
+ return (marker_var_left, marker_op, marker_var_right)
308
+
309
+
310
+ def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
311
+ """
312
+ marker_var = VARIABLE | QUOTED_STRING
313
+ """
314
+ if tokenizer.check("VARIABLE"):
315
+ return process_env_var(tokenizer.read().text.replace(".", "_"))
316
+ elif tokenizer.check("QUOTED_STRING"):
317
+ return process_python_str(tokenizer.read().text)
318
+ else:
319
+ tokenizer.raise_syntax_error(
320
+ message="Expected a marker variable or quoted string"
321
+ )
322
+
323
+
324
+ def process_env_var(env_var: str) -> Variable:
325
+ if env_var in ("platform_python_implementation", "python_implementation"):
326
+ return Variable("platform_python_implementation")
327
+ else:
328
+ return Variable(env_var)
329
+
330
+
331
+ def process_python_str(python_str: str) -> Value:
332
+ value = ast.literal_eval(python_str)
333
+ return Value(str(value))
334
+
335
+
336
+ def _parse_marker_op(tokenizer: Tokenizer) -> Op:
337
+ """
338
+ marker_op = IN | NOT IN | OP
339
+ """
340
+ if tokenizer.check("IN"):
341
+ tokenizer.read()
342
+ return Op("in")
343
+ elif tokenizer.check("NOT"):
344
+ tokenizer.read()
345
+ tokenizer.expect("WS", expected="whitespace after 'not'")
346
+ tokenizer.expect("IN", expected="'in' after 'not'")
347
+ return Op("not in")
348
+ elif tokenizer.check("OP"):
349
+ return Op(tokenizer.read().text)
350
+ else:
351
+ return tokenizer.raise_syntax_error(
352
+ "Expected marker operator, one of "
353
+ "<=, <, !=, ==, >=, >, ~=, ===, in, not in"
354
+ )
meow/lib/python3.13/site-packages/packaging/_structures.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is dual licensed under the terms of the Apache License, Version
2
+ # 2.0, and the BSD License. See the LICENSE file in the root of this repository
3
+ # for complete details.
4
+
5
+
6
+ class InfinityType:
7
+ def __repr__(self) -> str:
8
+ return "Infinity"
9
+
10
+ def __hash__(self) -> int:
11
+ return hash(repr(self))
12
+
13
+ def __lt__(self, other: object) -> bool:
14
+ return False
15
+
16
+ def __le__(self, other: object) -> bool:
17
+ return False
18
+
19
+ def __eq__(self, other: object) -> bool:
20
+ return isinstance(other, self.__class__)
21
+
22
+ def __gt__(self, other: object) -> bool:
23
+ return True
24
+
25
+ def __ge__(self, other: object) -> bool:
26
+ return True
27
+
28
+ def __neg__(self: object) -> "NegativeInfinityType":
29
+ return NegativeInfinity
30
+
31
+
32
+ Infinity = InfinityType()
33
+
34
+
35
+ class NegativeInfinityType:
36
+ def __repr__(self) -> str:
37
+ return "-Infinity"
38
+
39
+ def __hash__(self) -> int:
40
+ return hash(repr(self))
41
+
42
+ def __lt__(self, other: object) -> bool:
43
+ return True
44
+
45
+ def __le__(self, other: object) -> bool:
46
+ return True
47
+
48
+ def __eq__(self, other: object) -> bool:
49
+ return isinstance(other, self.__class__)
50
+
51
+ def __gt__(self, other: object) -> bool:
52
+ return False
53
+
54
+ def __ge__(self, other: object) -> bool:
55
+ return False
56
+
57
+ def __neg__(self: object) -> InfinityType:
58
+ return Infinity
59
+
60
+
61
+ NegativeInfinity = NegativeInfinityType()
meow/lib/python3.13/site-packages/packaging/_tokenizer.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import re
5
+ from dataclasses import dataclass
6
+ from typing import Iterator, NoReturn
7
+
8
+ from .specifiers import Specifier
9
+
10
+
11
+ @dataclass
12
+ class Token:
13
+ name: str
14
+ text: str
15
+ position: int
16
+
17
+
18
+ class ParserSyntaxError(Exception):
19
+ """The provided source text could not be parsed correctly."""
20
+
21
+ def __init__(
22
+ self,
23
+ message: str,
24
+ *,
25
+ source: str,
26
+ span: tuple[int, int],
27
+ ) -> None:
28
+ self.span = span
29
+ self.message = message
30
+ self.source = source
31
+
32
+ super().__init__()
33
+
34
+ def __str__(self) -> str:
35
+ marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
36
+ return "\n ".join([self.message, self.source, marker])
37
+
38
+
39
+ DEFAULT_RULES: dict[str, str | re.Pattern[str]] = {
40
+ "LEFT_PARENTHESIS": r"\(",
41
+ "RIGHT_PARENTHESIS": r"\)",
42
+ "LEFT_BRACKET": r"\[",
43
+ "RIGHT_BRACKET": r"\]",
44
+ "SEMICOLON": r";",
45
+ "COMMA": r",",
46
+ "QUOTED_STRING": re.compile(
47
+ r"""
48
+ (
49
+ ('[^']*')
50
+ |
51
+ ("[^"]*")
52
+ )
53
+ """,
54
+ re.VERBOSE,
55
+ ),
56
+ "OP": r"(===|==|~=|!=|<=|>=|<|>)",
57
+ "BOOLOP": r"\b(or|and)\b",
58
+ "IN": r"\bin\b",
59
+ "NOT": r"\bnot\b",
60
+ "VARIABLE": re.compile(
61
+ r"""
62
+ \b(
63
+ python_version
64
+ |python_full_version
65
+ |os[._]name
66
+ |sys[._]platform
67
+ |platform_(release|system)
68
+ |platform[._](version|machine|python_implementation)
69
+ |python_implementation
70
+ |implementation_(name|version)
71
+ |extra
72
+ )\b
73
+ """,
74
+ re.VERBOSE,
75
+ ),
76
+ "SPECIFIER": re.compile(
77
+ Specifier._operator_regex_str + Specifier._version_regex_str,
78
+ re.VERBOSE | re.IGNORECASE,
79
+ ),
80
+ "AT": r"\@",
81
+ "URL": r"[^ \t]+",
82
+ "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
83
+ "VERSION_PREFIX_TRAIL": r"\.\*",
84
+ "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
85
+ "WS": r"[ \t]+",
86
+ "END": r"$",
87
+ }
88
+
89
+
90
+ class Tokenizer:
91
+ """Context-sensitive token parsing.
92
+
93
+ Provides methods to examine the input stream to check whether the next token
94
+ matches.
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ source: str,
100
+ *,
101
+ rules: dict[str, str | re.Pattern[str]],
102
+ ) -> None:
103
+ self.source = source
104
+ self.rules: dict[str, re.Pattern[str]] = {
105
+ name: re.compile(pattern) for name, pattern in rules.items()
106
+ }
107
+ self.next_token: Token | None = None
108
+ self.position = 0
109
+
110
+ def consume(self, name: str) -> None:
111
+ """Move beyond provided token name, if at current position."""
112
+ if self.check(name):
113
+ self.read()
114
+
115
+ def check(self, name: str, *, peek: bool = False) -> bool:
116
+ """Check whether the next token has the provided name.
117
+
118
+ By default, if the check succeeds, the token *must* be read before
119
+ another check. If `peek` is set to `True`, the token is not loaded and
120
+ would need to be checked again.
121
+ """
122
+ assert (
123
+ self.next_token is None
124
+ ), f"Cannot check for {name!r}, already have {self.next_token!r}"
125
+ assert name in self.rules, f"Unknown token name: {name!r}"
126
+
127
+ expression = self.rules[name]
128
+
129
+ match = expression.match(self.source, self.position)
130
+ if match is None:
131
+ return False
132
+ if not peek:
133
+ self.next_token = Token(name, match[0], self.position)
134
+ return True
135
+
136
+ def expect(self, name: str, *, expected: str) -> Token:
137
+ """Expect a certain token name next, failing with a syntax error otherwise.
138
+
139
+ The token is *not* read.
140
+ """
141
+ if not self.check(name):
142
+ raise self.raise_syntax_error(f"Expected {expected}")
143
+ return self.read()
144
+
145
+ def read(self) -> Token:
146
+ """Consume the next token and return it."""
147
+ token = self.next_token
148
+ assert token is not None
149
+
150
+ self.position += len(token.text)
151
+ self.next_token = None
152
+
153
+ return token
154
+
155
+ def raise_syntax_error(
156
+ self,
157
+ message: str,
158
+ *,
159
+ span_start: int | None = None,
160
+ span_end: int | None = None,
161
+ ) -> NoReturn:
162
+ """Raise ParserSyntaxError at the given position."""
163
+ span = (
164
+ self.position if span_start is None else span_start,
165
+ self.position if span_end is None else span_end,
166
+ )
167
+ raise ParserSyntaxError(
168
+ message,
169
+ source=self.source,
170
+ span=span,
171
+ )
172
+
173
+ @contextlib.contextmanager
174
+ def enclosing_tokens(
175
+ self, open_token: str, close_token: str, *, around: str
176
+ ) -> Iterator[None]:
177
+ if self.check(open_token):
178
+ open_position = self.position
179
+ self.read()
180
+ else:
181
+ open_position = None
182
+
183
+ yield
184
+
185
+ if open_position is None:
186
+ return
187
+
188
+ if not self.check(close_token):
189
+ self.raise_syntax_error(
190
+ f"Expected matching {close_token} for {open_token}, after {around}",
191
+ span_start=open_position,
192
+ )
193
+
194
+ self.read()
meow/lib/python3.13/site-packages/packaging/markers.py ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is dual licensed under the terms of the Apache License, Version
2
+ # 2.0, and the BSD License. See the LICENSE file in the root of this repository
3
+ # for complete details.
4
+
5
+ from __future__ import annotations
6
+
7
+ import operator
8
+ import os
9
+ import platform
10
+ import sys
11
+ from typing import Any, Callable, TypedDict, cast
12
+
13
+ from ._parser import MarkerAtom, MarkerList, Op, Value, Variable
14
+ from ._parser import parse_marker as _parse_marker
15
+ from ._tokenizer import ParserSyntaxError
16
+ from .specifiers import InvalidSpecifier, Specifier
17
+ from .utils import canonicalize_name
18
+
19
+ __all__ = [
20
+ "InvalidMarker",
21
+ "Marker",
22
+ "UndefinedComparison",
23
+ "UndefinedEnvironmentName",
24
+ "default_environment",
25
+ ]
26
+
27
+ Operator = Callable[[str, str], bool]
28
+
29
+
30
+ class InvalidMarker(ValueError):
31
+ """
32
+ An invalid marker was found, users should refer to PEP 508.
33
+ """
34
+
35
+
36
+ class UndefinedComparison(ValueError):
37
+ """
38
+ An invalid operation was attempted on a value that doesn't support it.
39
+ """
40
+
41
+
42
+ class UndefinedEnvironmentName(ValueError):
43
+ """
44
+ A name was attempted to be used that does not exist inside of the
45
+ environment.
46
+ """
47
+
48
+
49
+ class Environment(TypedDict):
50
+ implementation_name: str
51
+ """The implementation's identifier, e.g. ``'cpython'``."""
52
+
53
+ implementation_version: str
54
+ """
55
+ The implementation's version, e.g. ``'3.13.0a2'`` for CPython 3.13.0a2, or
56
+ ``'7.3.13'`` for PyPy3.10 v7.3.13.
57
+ """
58
+
59
+ os_name: str
60
+ """
61
+ The value of :py:data:`os.name`. The name of the operating system dependent module
62
+ imported, e.g. ``'posix'``.
63
+ """
64
+
65
+ platform_machine: str
66
+ """
67
+ Returns the machine type, e.g. ``'i386'``.
68
+
69
+ An empty string if the value cannot be determined.
70
+ """
71
+
72
+ platform_release: str
73
+ """
74
+ The system's release, e.g. ``'2.2.0'`` or ``'NT'``.
75
+
76
+ An empty string if the value cannot be determined.
77
+ """
78
+
79
+ platform_system: str
80
+ """
81
+ The system/OS name, e.g. ``'Linux'``, ``'Windows'`` or ``'Java'``.
82
+
83
+ An empty string if the value cannot be determined.
84
+ """
85
+
86
+ platform_version: str
87
+ """
88
+ The system's release version, e.g. ``'#3 on degas'``.
89
+
90
+ An empty string if the value cannot be determined.
91
+ """
92
+
93
+ python_full_version: str
94
+ """
95
+ The Python version as string ``'major.minor.patchlevel'``.
96
+
97
+ Note that unlike the Python :py:data:`sys.version`, this value will always include
98
+ the patchlevel (it defaults to 0).
99
+ """
100
+
101
+ platform_python_implementation: str
102
+ """
103
+ A string identifying the Python implementation, e.g. ``'CPython'``.
104
+ """
105
+
106
+ python_version: str
107
+ """The Python version as string ``'major.minor'``."""
108
+
109
+ sys_platform: str
110
+ """
111
+ This string contains a platform identifier that can be used to append
112
+ platform-specific components to :py:data:`sys.path`, for instance.
113
+
114
+ For Unix systems, except on Linux and AIX, this is the lowercased OS name as
115
+ returned by ``uname -s`` with the first part of the version as returned by
116
+ ``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, at the time when Python
117
+ was built.
118
+ """
119
+
120
+
121
+ def _normalize_extra_values(results: Any) -> Any:
122
+ """
123
+ Normalize extra values.
124
+ """
125
+ if isinstance(results[0], tuple):
126
+ lhs, op, rhs = results[0]
127
+ if isinstance(lhs, Variable) and lhs.value == "extra":
128
+ normalized_extra = canonicalize_name(rhs.value)
129
+ rhs = Value(normalized_extra)
130
+ elif isinstance(rhs, Variable) and rhs.value == "extra":
131
+ normalized_extra = canonicalize_name(lhs.value)
132
+ lhs = Value(normalized_extra)
133
+ results[0] = lhs, op, rhs
134
+ return results
135
+
136
+
137
+ def _format_marker(
138
+ marker: list[str] | MarkerAtom | str, first: bool | None = True
139
+ ) -> str:
140
+ assert isinstance(marker, (list, tuple, str))
141
+
142
+ # Sometimes we have a structure like [[...]] which is a single item list
143
+ # where the single item is itself it's own list. In that case we want skip
144
+ # the rest of this function so that we don't get extraneous () on the
145
+ # outside.
146
+ if (
147
+ isinstance(marker, list)
148
+ and len(marker) == 1
149
+ and isinstance(marker[0], (list, tuple))
150
+ ):
151
+ return _format_marker(marker[0])
152
+
153
+ if isinstance(marker, list):
154
+ inner = (_format_marker(m, first=False) for m in marker)
155
+ if first:
156
+ return " ".join(inner)
157
+ else:
158
+ return "(" + " ".join(inner) + ")"
159
+ elif isinstance(marker, tuple):
160
+ return " ".join([m.serialize() for m in marker])
161
+ else:
162
+ return marker
163
+
164
+
165
+ _operators: dict[str, Operator] = {
166
+ "in": lambda lhs, rhs: lhs in rhs,
167
+ "not in": lambda lhs, rhs: lhs not in rhs,
168
+ "<": operator.lt,
169
+ "<=": operator.le,
170
+ "==": operator.eq,
171
+ "!=": operator.ne,
172
+ ">=": operator.ge,
173
+ ">": operator.gt,
174
+ }
175
+
176
+
177
+ def _eval_op(lhs: str, op: Op, rhs: str) -> bool:
178
+ try:
179
+ spec = Specifier("".join([op.serialize(), rhs]))
180
+ except InvalidSpecifier:
181
+ pass
182
+ else:
183
+ return spec.contains(lhs, prereleases=True)
184
+
185
+ oper: Operator | None = _operators.get(op.serialize())
186
+ if oper is None:
187
+ raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
188
+
189
+ return oper(lhs, rhs)
190
+
191
+
192
+ def _normalize(*values: str, key: str) -> tuple[str, ...]:
193
+ # PEP 685 – Comparison of extra names for optional distribution dependencies
194
+ # https://peps.python.org/pep-0685/
195
+ # > When comparing extra names, tools MUST normalize the names being
196
+ # > compared using the semantics outlined in PEP 503 for names
197
+ if key == "extra":
198
+ return tuple(canonicalize_name(v) for v in values)
199
+
200
+ # other environment markers don't have such standards
201
+ return values
202
+
203
+
204
+ def _evaluate_markers(markers: MarkerList, environment: dict[str, str]) -> bool:
205
+ groups: list[list[bool]] = [[]]
206
+
207
+ for marker in markers:
208
+ assert isinstance(marker, (list, tuple, str))
209
+
210
+ if isinstance(marker, list):
211
+ groups[-1].append(_evaluate_markers(marker, environment))
212
+ elif isinstance(marker, tuple):
213
+ lhs, op, rhs = marker
214
+
215
+ if isinstance(lhs, Variable):
216
+ environment_key = lhs.value
217
+ lhs_value = environment[environment_key]
218
+ rhs_value = rhs.value
219
+ else:
220
+ lhs_value = lhs.value
221
+ environment_key = rhs.value
222
+ rhs_value = environment[environment_key]
223
+
224
+ lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
225
+ groups[-1].append(_eval_op(lhs_value, op, rhs_value))
226
+ else:
227
+ assert marker in ["and", "or"]
228
+ if marker == "or":
229
+ groups.append([])
230
+
231
+ return any(all(item) for item in groups)
232
+
233
+
234
+ def format_full_version(info: sys._version_info) -> str:
235
+ version = f"{info.major}.{info.minor}.{info.micro}"
236
+ kind = info.releaselevel
237
+ if kind != "final":
238
+ version += kind[0] + str(info.serial)
239
+ return version
240
+
241
+
242
+ def default_environment() -> Environment:
243
+ iver = format_full_version(sys.implementation.version)
244
+ implementation_name = sys.implementation.name
245
+ return {
246
+ "implementation_name": implementation_name,
247
+ "implementation_version": iver,
248
+ "os_name": os.name,
249
+ "platform_machine": platform.machine(),
250
+ "platform_release": platform.release(),
251
+ "platform_system": platform.system(),
252
+ "platform_version": platform.version(),
253
+ "python_full_version": platform.python_version(),
254
+ "platform_python_implementation": platform.python_implementation(),
255
+ "python_version": ".".join(platform.python_version_tuple()[:2]),
256
+ "sys_platform": sys.platform,
257
+ }
258
+
259
+
260
+ class Marker:
261
+ def __init__(self, marker: str) -> None:
262
+ # Note: We create a Marker object without calling this constructor in
263
+ # packaging.requirements.Requirement. If any additional logic is
264
+ # added here, make sure to mirror/adapt Requirement.
265
+ try:
266
+ self._markers = _normalize_extra_values(_parse_marker(marker))
267
+ # The attribute `_markers` can be described in terms of a recursive type:
268
+ # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
269
+ #
270
+ # For example, the following expression:
271
+ # python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
272
+ #
273
+ # is parsed into:
274
+ # [
275
+ # (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
276
+ # 'and',
277
+ # [
278
+ # (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
279
+ # 'or',
280
+ # (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
281
+ # ]
282
+ # ]
283
+ except ParserSyntaxError as e:
284
+ raise InvalidMarker(str(e)) from e
285
+
286
+ def __str__(self) -> str:
287
+ return _format_marker(self._markers)
288
+
289
+ def __repr__(self) -> str:
290
+ return f"<Marker('{self}')>"
291
+
292
+ def __hash__(self) -> int:
293
+ return hash((self.__class__.__name__, str(self)))
294
+
295
+ def __eq__(self, other: Any) -> bool:
296
+ if not isinstance(other, Marker):
297
+ return NotImplemented
298
+
299
+ return str(self) == str(other)
300
+
301
+ def evaluate(self, environment: dict[str, str] | None = None) -> bool:
302
+ """Evaluate a marker.
303
+
304
+ Return the boolean from evaluating the given marker against the
305
+ environment. environment is an optional argument to override all or
306
+ part of the determined environment.
307
+
308
+ The environment is determined from the current Python process.
309
+ """
310
+ current_environment = cast("dict[str, str]", default_environment())
311
+ current_environment["extra"] = ""
312
+ if environment is not None:
313
+ current_environment.update(environment)
314
+ # The API used to allow setting extra to None. We need to handle this
315
+ # case for backwards compatibility.
316
+ if current_environment["extra"] is None:
317
+ current_environment["extra"] = ""
318
+
319
+ return _evaluate_markers(
320
+ self._markers, _repair_python_full_version(current_environment)
321
+ )
322
+
323
+
324
+ def _repair_python_full_version(env: dict[str, str]) -> dict[str, str]:
325
+ """
326
+ Work around platform.python_version() returning something that is not PEP 440
327
+ compliant for non-tagged Python builds.
328
+ """
329
+ if env["python_full_version"].endswith("+"):
330
+ env["python_full_version"] += "local"
331
+ return env
meow/lib/python3.13/site-packages/packaging/metadata.py ADDED
@@ -0,0 +1,863 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import email.feedparser
4
+ import email.header
5
+ import email.message
6
+ import email.parser
7
+ import email.policy
8
+ import pathlib
9
+ import sys
10
+ import typing
11
+ from typing import (
12
+ Any,
13
+ Callable,
14
+ Generic,
15
+ Literal,
16
+ TypedDict,
17
+ cast,
18
+ )
19
+
20
+ from . import licenses, requirements, specifiers, utils
21
+ from . import version as version_module
22
+ from .licenses import NormalizedLicenseExpression
23
+
24
+ T = typing.TypeVar("T")
25
+
26
+
27
+ if sys.version_info >= (3, 11): # pragma: no cover
28
+ ExceptionGroup = ExceptionGroup
29
+ else: # pragma: no cover
30
+
31
+ class ExceptionGroup(Exception):
32
+ """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
33
+
34
+ If :external:exc:`ExceptionGroup` is already defined by Python itself,
35
+ that version is used instead.
36
+ """
37
+
38
+ message: str
39
+ exceptions: list[Exception]
40
+
41
+ def __init__(self, message: str, exceptions: list[Exception]) -> None:
42
+ self.message = message
43
+ self.exceptions = exceptions
44
+
45
+ def __repr__(self) -> str:
46
+ return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
47
+
48
+
49
+ class InvalidMetadata(ValueError):
50
+ """A metadata field contains invalid data."""
51
+
52
+ field: str
53
+ """The name of the field that contains invalid data."""
54
+
55
+ def __init__(self, field: str, message: str) -> None:
56
+ self.field = field
57
+ super().__init__(message)
58
+
59
+
60
+ # The RawMetadata class attempts to make as few assumptions about the underlying
61
+ # serialization formats as possible. The idea is that as long as a serialization
62
+ # formats offer some very basic primitives in *some* way then we can support
63
+ # serializing to and from that format.
64
+ class RawMetadata(TypedDict, total=False):
65
+ """A dictionary of raw core metadata.
66
+
67
+ Each field in core metadata maps to a key of this dictionary (when data is
68
+ provided). The key is lower-case and underscores are used instead of dashes
69
+ compared to the equivalent core metadata field. Any core metadata field that
70
+ can be specified multiple times or can hold multiple values in a single
71
+ field have a key with a plural name. See :class:`Metadata` whose attributes
72
+ match the keys of this dictionary.
73
+
74
+ Core metadata fields that can be specified multiple times are stored as a
75
+ list or dict depending on which is appropriate for the field. Any fields
76
+ which hold multiple values in a single field are stored as a list.
77
+
78
+ """
79
+
80
+ # Metadata 1.0 - PEP 241
81
+ metadata_version: str
82
+ name: str
83
+ version: str
84
+ platforms: list[str]
85
+ summary: str
86
+ description: str
87
+ keywords: list[str]
88
+ home_page: str
89
+ author: str
90
+ author_email: str
91
+ license: str
92
+
93
+ # Metadata 1.1 - PEP 314
94
+ supported_platforms: list[str]
95
+ download_url: str
96
+ classifiers: list[str]
97
+ requires: list[str]
98
+ provides: list[str]
99
+ obsoletes: list[str]
100
+
101
+ # Metadata 1.2 - PEP 345
102
+ maintainer: str
103
+ maintainer_email: str
104
+ requires_dist: list[str]
105
+ provides_dist: list[str]
106
+ obsoletes_dist: list[str]
107
+ requires_python: str
108
+ requires_external: list[str]
109
+ project_urls: dict[str, str]
110
+
111
+ # Metadata 2.0
112
+ # PEP 426 attempted to completely revamp the metadata format
113
+ # but got stuck without ever being able to build consensus on
114
+ # it and ultimately ended up withdrawn.
115
+ #
116
+ # However, a number of tools had started emitting METADATA with
117
+ # `2.0` Metadata-Version, so for historical reasons, this version
118
+ # was skipped.
119
+
120
+ # Metadata 2.1 - PEP 566
121
+ description_content_type: str
122
+ provides_extra: list[str]
123
+
124
+ # Metadata 2.2 - PEP 643
125
+ dynamic: list[str]
126
+
127
+ # Metadata 2.3 - PEP 685
128
+ # No new fields were added in PEP 685, just some edge case were
129
+ # tightened up to provide better interoptability.
130
+
131
+ # Metadata 2.4 - PEP 639
132
+ license_expression: str
133
+ license_files: list[str]
134
+
135
+
136
+ _STRING_FIELDS = {
137
+ "author",
138
+ "author_email",
139
+ "description",
140
+ "description_content_type",
141
+ "download_url",
142
+ "home_page",
143
+ "license",
144
+ "license_expression",
145
+ "maintainer",
146
+ "maintainer_email",
147
+ "metadata_version",
148
+ "name",
149
+ "requires_python",
150
+ "summary",
151
+ "version",
152
+ }
153
+
154
+ _LIST_FIELDS = {
155
+ "classifiers",
156
+ "dynamic",
157
+ "license_files",
158
+ "obsoletes",
159
+ "obsoletes_dist",
160
+ "platforms",
161
+ "provides",
162
+ "provides_dist",
163
+ "provides_extra",
164
+ "requires",
165
+ "requires_dist",
166
+ "requires_external",
167
+ "supported_platforms",
168
+ }
169
+
170
+ _DICT_FIELDS = {
171
+ "project_urls",
172
+ }
173
+
174
+
175
+ def _parse_keywords(data: str) -> list[str]:
176
+ """Split a string of comma-separated keywords into a list of keywords."""
177
+ return [k.strip() for k in data.split(",")]
178
+
179
+
180
+ def _parse_project_urls(data: list[str]) -> dict[str, str]:
181
+ """Parse a list of label/URL string pairings separated by a comma."""
182
+ urls = {}
183
+ for pair in data:
184
+ # Our logic is slightly tricky here as we want to try and do
185
+ # *something* reasonable with malformed data.
186
+ #
187
+ # The main thing that we have to worry about, is data that does
188
+ # not have a ',' at all to split the label from the Value. There
189
+ # isn't a singular right answer here, and we will fail validation
190
+ # later on (if the caller is validating) so it doesn't *really*
191
+ # matter, but since the missing value has to be an empty str
192
+ # and our return value is dict[str, str], if we let the key
193
+ # be the missing value, then they'd have multiple '' values that
194
+ # overwrite each other in a accumulating dict.
195
+ #
196
+ # The other potentional issue is that it's possible to have the
197
+ # same label multiple times in the metadata, with no solid "right"
198
+ # answer with what to do in that case. As such, we'll do the only
199
+ # thing we can, which is treat the field as unparseable and add it
200
+ # to our list of unparsed fields.
201
+ parts = [p.strip() for p in pair.split(",", 1)]
202
+ parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
203
+
204
+ # TODO: The spec doesn't say anything about if the keys should be
205
+ # considered case sensitive or not... logically they should
206
+ # be case-preserving and case-insensitive, but doing that
207
+ # would open up more cases where we might have duplicate
208
+ # entries.
209
+ label, url = parts
210
+ if label in urls:
211
+ # The label already exists in our set of urls, so this field
212
+ # is unparseable, and we can just add the whole thing to our
213
+ # unparseable data and stop processing it.
214
+ raise KeyError("duplicate labels in project urls")
215
+ urls[label] = url
216
+
217
+ return urls
218
+
219
+
220
+ def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
221
+ """Get the body of the message."""
222
+ # If our source is a str, then our caller has managed encodings for us,
223
+ # and we don't need to deal with it.
224
+ if isinstance(source, str):
225
+ payload = msg.get_payload()
226
+ assert isinstance(payload, str)
227
+ return payload
228
+ # If our source is a bytes, then we're managing the encoding and we need
229
+ # to deal with it.
230
+ else:
231
+ bpayload = msg.get_payload(decode=True)
232
+ assert isinstance(bpayload, bytes)
233
+ try:
234
+ return bpayload.decode("utf8", "strict")
235
+ except UnicodeDecodeError as exc:
236
+ raise ValueError("payload in an invalid encoding") from exc
237
+
238
+
239
+ # The various parse_FORMAT functions here are intended to be as lenient as
240
+ # possible in their parsing, while still returning a correctly typed
241
+ # RawMetadata.
242
+ #
243
+ # To aid in this, we also generally want to do as little touching of the
244
+ # data as possible, except where there are possibly some historic holdovers
245
+ # that make valid data awkward to work with.
246
+ #
247
+ # While this is a lower level, intermediate format than our ``Metadata``
248
+ # class, some light touch ups can make a massive difference in usability.
249
+
250
+ # Map METADATA fields to RawMetadata.
251
+ _EMAIL_TO_RAW_MAPPING = {
252
+ "author": "author",
253
+ "author-email": "author_email",
254
+ "classifier": "classifiers",
255
+ "description": "description",
256
+ "description-content-type": "description_content_type",
257
+ "download-url": "download_url",
258
+ "dynamic": "dynamic",
259
+ "home-page": "home_page",
260
+ "keywords": "keywords",
261
+ "license": "license",
262
+ "license-expression": "license_expression",
263
+ "license-file": "license_files",
264
+ "maintainer": "maintainer",
265
+ "maintainer-email": "maintainer_email",
266
+ "metadata-version": "metadata_version",
267
+ "name": "name",
268
+ "obsoletes": "obsoletes",
269
+ "obsoletes-dist": "obsoletes_dist",
270
+ "platform": "platforms",
271
+ "project-url": "project_urls",
272
+ "provides": "provides",
273
+ "provides-dist": "provides_dist",
274
+ "provides-extra": "provides_extra",
275
+ "requires": "requires",
276
+ "requires-dist": "requires_dist",
277
+ "requires-external": "requires_external",
278
+ "requires-python": "requires_python",
279
+ "summary": "summary",
280
+ "supported-platform": "supported_platforms",
281
+ "version": "version",
282
+ }
283
+ _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
284
+
285
+
286
+ def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
287
+ """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
288
+
289
+ This function returns a two-item tuple of dicts. The first dict is of
290
+ recognized fields from the core metadata specification. Fields that can be
291
+ parsed and translated into Python's built-in types are converted
292
+ appropriately. All other fields are left as-is. Fields that are allowed to
293
+ appear multiple times are stored as lists.
294
+
295
+ The second dict contains all other fields from the metadata. This includes
296
+ any unrecognized fields. It also includes any fields which are expected to
297
+ be parsed into a built-in type but were not formatted appropriately. Finally,
298
+ any fields that are expected to appear only once but are repeated are
299
+ included in this dict.
300
+
301
+ """
302
+ raw: dict[str, str | list[str] | dict[str, str]] = {}
303
+ unparsed: dict[str, list[str]] = {}
304
+
305
+ if isinstance(data, str):
306
+ parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
307
+ else:
308
+ parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
309
+
310
+ # We have to wrap parsed.keys() in a set, because in the case of multiple
311
+ # values for a key (a list), the key will appear multiple times in the
312
+ # list of keys, but we're avoiding that by using get_all().
313
+ for name in frozenset(parsed.keys()):
314
+ # Header names in RFC are case insensitive, so we'll normalize to all
315
+ # lower case to make comparisons easier.
316
+ name = name.lower()
317
+
318
+ # We use get_all() here, even for fields that aren't multiple use,
319
+ # because otherwise someone could have e.g. two Name fields, and we
320
+ # would just silently ignore it rather than doing something about it.
321
+ headers = parsed.get_all(name) or []
322
+
323
+ # The way the email module works when parsing bytes is that it
324
+ # unconditionally decodes the bytes as ascii using the surrogateescape
325
+ # handler. When you pull that data back out (such as with get_all() ),
326
+ # it looks to see if the str has any surrogate escapes, and if it does
327
+ # it wraps it in a Header object instead of returning the string.
328
+ #
329
+ # As such, we'll look for those Header objects, and fix up the encoding.
330
+ value = []
331
+ # Flag if we have run into any issues processing the headers, thus
332
+ # signalling that the data belongs in 'unparsed'.
333
+ valid_encoding = True
334
+ for h in headers:
335
+ # It's unclear if this can return more types than just a Header or
336
+ # a str, so we'll just assert here to make sure.
337
+ assert isinstance(h, (email.header.Header, str))
338
+
339
+ # If it's a header object, we need to do our little dance to get
340
+ # the real data out of it. In cases where there is invalid data
341
+ # we're going to end up with mojibake, but there's no obvious, good
342
+ # way around that without reimplementing parts of the Header object
343
+ # ourselves.
344
+ #
345
+ # That should be fine since, if mojibacked happens, this key is
346
+ # going into the unparsed dict anyways.
347
+ if isinstance(h, email.header.Header):
348
+ # The Header object stores it's data as chunks, and each chunk
349
+ # can be independently encoded, so we'll need to check each
350
+ # of them.
351
+ chunks: list[tuple[bytes, str | None]] = []
352
+ for bin, encoding in email.header.decode_header(h):
353
+ try:
354
+ bin.decode("utf8", "strict")
355
+ except UnicodeDecodeError:
356
+ # Enable mojibake.
357
+ encoding = "latin1"
358
+ valid_encoding = False
359
+ else:
360
+ encoding = "utf8"
361
+ chunks.append((bin, encoding))
362
+
363
+ # Turn our chunks back into a Header object, then let that
364
+ # Header object do the right thing to turn them into a
365
+ # string for us.
366
+ value.append(str(email.header.make_header(chunks)))
367
+ # This is already a string, so just add it.
368
+ else:
369
+ value.append(h)
370
+
371
+ # We've processed all of our values to get them into a list of str,
372
+ # but we may have mojibake data, in which case this is an unparsed
373
+ # field.
374
+ if not valid_encoding:
375
+ unparsed[name] = value
376
+ continue
377
+
378
+ raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
379
+ if raw_name is None:
380
+ # This is a bit of a weird situation, we've encountered a key that
381
+ # we don't know what it means, so we don't know whether it's meant
382
+ # to be a list or not.
383
+ #
384
+ # Since we can't really tell one way or another, we'll just leave it
385
+ # as a list, even though it may be a single item list, because that's
386
+ # what makes the most sense for email headers.
387
+ unparsed[name] = value
388
+ continue
389
+
390
+ # If this is one of our string fields, then we'll check to see if our
391
+ # value is a list of a single item. If it is then we'll assume that
392
+ # it was emitted as a single string, and unwrap the str from inside
393
+ # the list.
394
+ #
395
+ # If it's any other kind of data, then we haven't the faintest clue
396
+ # what we should parse it as, and we have to just add it to our list
397
+ # of unparsed stuff.
398
+ if raw_name in _STRING_FIELDS and len(value) == 1:
399
+ raw[raw_name] = value[0]
400
+ # If this is one of our list of string fields, then we can just assign
401
+ # the value, since email *only* has strings, and our get_all() call
402
+ # above ensures that this is a list.
403
+ elif raw_name in _LIST_FIELDS:
404
+ raw[raw_name] = value
405
+ # Special Case: Keywords
406
+ # The keywords field is implemented in the metadata spec as a str,
407
+ # but it conceptually is a list of strings, and is serialized using
408
+ # ", ".join(keywords), so we'll do some light data massaging to turn
409
+ # this into what it logically is.
410
+ elif raw_name == "keywords" and len(value) == 1:
411
+ raw[raw_name] = _parse_keywords(value[0])
412
+ # Special Case: Project-URL
413
+ # The project urls is implemented in the metadata spec as a list of
414
+ # specially-formatted strings that represent a key and a value, which
415
+ # is fundamentally a mapping, however the email format doesn't support
416
+ # mappings in a sane way, so it was crammed into a list of strings
417
+ # instead.
418
+ #
419
+ # We will do a little light data massaging to turn this into a map as
420
+ # it logically should be.
421
+ elif raw_name == "project_urls":
422
+ try:
423
+ raw[raw_name] = _parse_project_urls(value)
424
+ except KeyError:
425
+ unparsed[name] = value
426
+ # Nothing that we've done has managed to parse this, so it'll just
427
+ # throw it in our unparseable data and move on.
428
+ else:
429
+ unparsed[name] = value
430
+
431
+ # We need to support getting the Description from the message payload in
432
+ # addition to getting it from the the headers. This does mean, though, there
433
+ # is the possibility of it being set both ways, in which case we put both
434
+ # in 'unparsed' since we don't know which is right.
435
+ try:
436
+ payload = _get_payload(parsed, data)
437
+ except ValueError:
438
+ unparsed.setdefault("description", []).append(
439
+ parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload]
440
+ )
441
+ else:
442
+ if payload:
443
+ # Check to see if we've already got a description, if so then both
444
+ # it, and this body move to unparseable.
445
+ if "description" in raw:
446
+ description_header = cast(str, raw.pop("description"))
447
+ unparsed.setdefault("description", []).extend(
448
+ [description_header, payload]
449
+ )
450
+ elif "description" in unparsed:
451
+ unparsed["description"].append(payload)
452
+ else:
453
+ raw["description"] = payload
454
+
455
+ # We need to cast our `raw` to a metadata, because a TypedDict only support
456
+ # literal key names, but we're computing our key names on purpose, but the
457
+ # way this function is implemented, our `TypedDict` can only have valid key
458
+ # names.
459
+ return cast(RawMetadata, raw), unparsed
460
+
461
+
462
+ _NOT_FOUND = object()
463
+
464
+
465
+ # Keep the two values in sync.
466
+ _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
467
+ _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
468
+
469
+ _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
470
+
471
+
472
+ class _Validator(Generic[T]):
473
+ """Validate a metadata field.
474
+
475
+ All _process_*() methods correspond to a core metadata field. The method is
476
+ called with the field's raw value. If the raw value is valid it is returned
477
+ in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
478
+ If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
479
+ as appropriate).
480
+ """
481
+
482
+ name: str
483
+ raw_name: str
484
+ added: _MetadataVersion
485
+
486
+ def __init__(
487
+ self,
488
+ *,
489
+ added: _MetadataVersion = "1.0",
490
+ ) -> None:
491
+ self.added = added
492
+
493
+ def __set_name__(self, _owner: Metadata, name: str) -> None:
494
+ self.name = name
495
+ self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
496
+
497
+ def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
498
+ # With Python 3.8, the caching can be replaced with functools.cached_property().
499
+ # No need to check the cache as attribute lookup will resolve into the
500
+ # instance's __dict__ before __get__ is called.
501
+ cache = instance.__dict__
502
+ value = instance._raw.get(self.name)
503
+
504
+ # To make the _process_* methods easier, we'll check if the value is None
505
+ # and if this field is NOT a required attribute, and if both of those
506
+ # things are true, we'll skip the the converter. This will mean that the
507
+ # converters never have to deal with the None union.
508
+ if self.name in _REQUIRED_ATTRS or value is not None:
509
+ try:
510
+ converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
511
+ except AttributeError:
512
+ pass
513
+ else:
514
+ value = converter(value)
515
+
516
+ cache[self.name] = value
517
+ try:
518
+ del instance._raw[self.name] # type: ignore[misc]
519
+ except KeyError:
520
+ pass
521
+
522
+ return cast(T, value)
523
+
524
+ def _invalid_metadata(
525
+ self, msg: str, cause: Exception | None = None
526
+ ) -> InvalidMetadata:
527
+ exc = InvalidMetadata(
528
+ self.raw_name, msg.format_map({"field": repr(self.raw_name)})
529
+ )
530
+ exc.__cause__ = cause
531
+ return exc
532
+
533
+ def _process_metadata_version(self, value: str) -> _MetadataVersion:
534
+ # Implicitly makes Metadata-Version required.
535
+ if value not in _VALID_METADATA_VERSIONS:
536
+ raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
537
+ return cast(_MetadataVersion, value)
538
+
539
+ def _process_name(self, value: str) -> str:
540
+ if not value:
541
+ raise self._invalid_metadata("{field} is a required field")
542
+ # Validate the name as a side-effect.
543
+ try:
544
+ utils.canonicalize_name(value, validate=True)
545
+ except utils.InvalidName as exc:
546
+ raise self._invalid_metadata(
547
+ f"{value!r} is invalid for {{field}}", cause=exc
548
+ ) from exc
549
+ else:
550
+ return value
551
+
552
+ def _process_version(self, value: str) -> version_module.Version:
553
+ if not value:
554
+ raise self._invalid_metadata("{field} is a required field")
555
+ try:
556
+ return version_module.parse(value)
557
+ except version_module.InvalidVersion as exc:
558
+ raise self._invalid_metadata(
559
+ f"{value!r} is invalid for {{field}}", cause=exc
560
+ ) from exc
561
+
562
+ def _process_summary(self, value: str) -> str:
563
+ """Check the field contains no newlines."""
564
+ if "\n" in value:
565
+ raise self._invalid_metadata("{field} must be a single line")
566
+ return value
567
+
568
+ def _process_description_content_type(self, value: str) -> str:
569
+ content_types = {"text/plain", "text/x-rst", "text/markdown"}
570
+ message = email.message.EmailMessage()
571
+ message["content-type"] = value
572
+
573
+ content_type, parameters = (
574
+ # Defaults to `text/plain` if parsing failed.
575
+ message.get_content_type().lower(),
576
+ message["content-type"].params,
577
+ )
578
+ # Check if content-type is valid or defaulted to `text/plain` and thus was
579
+ # not parseable.
580
+ if content_type not in content_types or content_type not in value.lower():
581
+ raise self._invalid_metadata(
582
+ f"{{field}} must be one of {list(content_types)}, not {value!r}"
583
+ )
584
+
585
+ charset = parameters.get("charset", "UTF-8")
586
+ if charset != "UTF-8":
587
+ raise self._invalid_metadata(
588
+ f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
589
+ )
590
+
591
+ markdown_variants = {"GFM", "CommonMark"}
592
+ variant = parameters.get("variant", "GFM") # Use an acceptable default.
593
+ if content_type == "text/markdown" and variant not in markdown_variants:
594
+ raise self._invalid_metadata(
595
+ f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
596
+ f"not {variant!r}",
597
+ )
598
+ return value
599
+
600
+ def _process_dynamic(self, value: list[str]) -> list[str]:
601
+ for dynamic_field in map(str.lower, value):
602
+ if dynamic_field in {"name", "version", "metadata-version"}:
603
+ raise self._invalid_metadata(
604
+ f"{dynamic_field!r} is not allowed as a dynamic field"
605
+ )
606
+ elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
607
+ raise self._invalid_metadata(
608
+ f"{dynamic_field!r} is not a valid dynamic field"
609
+ )
610
+ return list(map(str.lower, value))
611
+
612
+ def _process_provides_extra(
613
+ self,
614
+ value: list[str],
615
+ ) -> list[utils.NormalizedName]:
616
+ normalized_names = []
617
+ try:
618
+ for name in value:
619
+ normalized_names.append(utils.canonicalize_name(name, validate=True))
620
+ except utils.InvalidName as exc:
621
+ raise self._invalid_metadata(
622
+ f"{name!r} is invalid for {{field}}", cause=exc
623
+ ) from exc
624
+ else:
625
+ return normalized_names
626
+
627
+ def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
628
+ try:
629
+ return specifiers.SpecifierSet(value)
630
+ except specifiers.InvalidSpecifier as exc:
631
+ raise self._invalid_metadata(
632
+ f"{value!r} is invalid for {{field}}", cause=exc
633
+ ) from exc
634
+
635
+ def _process_requires_dist(
636
+ self,
637
+ value: list[str],
638
+ ) -> list[requirements.Requirement]:
639
+ reqs = []
640
+ try:
641
+ for req in value:
642
+ reqs.append(requirements.Requirement(req))
643
+ except requirements.InvalidRequirement as exc:
644
+ raise self._invalid_metadata(
645
+ f"{req!r} is invalid for {{field}}", cause=exc
646
+ ) from exc
647
+ else:
648
+ return reqs
649
+
650
+ def _process_license_expression(
651
+ self, value: str
652
+ ) -> NormalizedLicenseExpression | None:
653
+ try:
654
+ return licenses.canonicalize_license_expression(value)
655
+ except ValueError as exc:
656
+ raise self._invalid_metadata(
657
+ f"{value!r} is invalid for {{field}}", cause=exc
658
+ ) from exc
659
+
660
+ def _process_license_files(self, value: list[str]) -> list[str]:
661
+ paths = []
662
+ for path in value:
663
+ if ".." in path:
664
+ raise self._invalid_metadata(
665
+ f"{path!r} is invalid for {{field}}, "
666
+ "parent directory indicators are not allowed"
667
+ )
668
+ if "*" in path:
669
+ raise self._invalid_metadata(
670
+ f"{path!r} is invalid for {{field}}, paths must be resolved"
671
+ )
672
+ if (
673
+ pathlib.PurePosixPath(path).is_absolute()
674
+ or pathlib.PureWindowsPath(path).is_absolute()
675
+ ):
676
+ raise self._invalid_metadata(
677
+ f"{path!r} is invalid for {{field}}, paths must be relative"
678
+ )
679
+ if pathlib.PureWindowsPath(path).as_posix() != path:
680
+ raise self._invalid_metadata(
681
+ f"{path!r} is invalid for {{field}}, "
682
+ "paths must use '/' delimiter"
683
+ )
684
+ paths.append(path)
685
+ return paths
686
+
687
+
688
+ class Metadata:
689
+ """Representation of distribution metadata.
690
+
691
+ Compared to :class:`RawMetadata`, this class provides objects representing
692
+ metadata fields instead of only using built-in types. Any invalid metadata
693
+ will cause :exc:`InvalidMetadata` to be raised (with a
694
+ :py:attr:`~BaseException.__cause__` attribute as appropriate).
695
+ """
696
+
697
+ _raw: RawMetadata
698
+
699
+ @classmethod
700
+ def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
701
+ """Create an instance from :class:`RawMetadata`.
702
+
703
+ If *validate* is true, all metadata will be validated. All exceptions
704
+ related to validation will be gathered and raised as an :class:`ExceptionGroup`.
705
+ """
706
+ ins = cls()
707
+ ins._raw = data.copy() # Mutations occur due to caching enriched values.
708
+
709
+ if validate:
710
+ exceptions: list[Exception] = []
711
+ try:
712
+ metadata_version = ins.metadata_version
713
+ metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
714
+ except InvalidMetadata as metadata_version_exc:
715
+ exceptions.append(metadata_version_exc)
716
+ metadata_version = None
717
+
718
+ # Make sure to check for the fields that are present, the required
719
+ # fields (so their absence can be reported).
720
+ fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
721
+ # Remove fields that have already been checked.
722
+ fields_to_check -= {"metadata_version"}
723
+
724
+ for key in fields_to_check:
725
+ try:
726
+ if metadata_version:
727
+ # Can't use getattr() as that triggers descriptor protocol which
728
+ # will fail due to no value for the instance argument.
729
+ try:
730
+ field_metadata_version = cls.__dict__[key].added
731
+ except KeyError:
732
+ exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
733
+ exceptions.append(exc)
734
+ continue
735
+ field_age = _VALID_METADATA_VERSIONS.index(
736
+ field_metadata_version
737
+ )
738
+ if field_age > metadata_age:
739
+ field = _RAW_TO_EMAIL_MAPPING[key]
740
+ exc = InvalidMetadata(
741
+ field,
742
+ f"{field} introduced in metadata version "
743
+ f"{field_metadata_version}, not {metadata_version}",
744
+ )
745
+ exceptions.append(exc)
746
+ continue
747
+ getattr(ins, key)
748
+ except InvalidMetadata as exc:
749
+ exceptions.append(exc)
750
+
751
+ if exceptions:
752
+ raise ExceptionGroup("invalid metadata", exceptions)
753
+
754
+ return ins
755
+
756
+ @classmethod
757
+ def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
758
+ """Parse metadata from email headers.
759
+
760
+ If *validate* is true, the metadata will be validated. All exceptions
761
+ related to validation will be gathered and raised as an :class:`ExceptionGroup`.
762
+ """
763
+ raw, unparsed = parse_email(data)
764
+
765
+ if validate:
766
+ exceptions: list[Exception] = []
767
+ for unparsed_key in unparsed:
768
+ if unparsed_key in _EMAIL_TO_RAW_MAPPING:
769
+ message = f"{unparsed_key!r} has invalid data"
770
+ else:
771
+ message = f"unrecognized field: {unparsed_key!r}"
772
+ exceptions.append(InvalidMetadata(unparsed_key, message))
773
+
774
+ if exceptions:
775
+ raise ExceptionGroup("unparsed", exceptions)
776
+
777
+ try:
778
+ return cls.from_raw(raw, validate=validate)
779
+ except ExceptionGroup as exc_group:
780
+ raise ExceptionGroup(
781
+ "invalid or unparsed metadata", exc_group.exceptions
782
+ ) from None
783
+
784
+ metadata_version: _Validator[_MetadataVersion] = _Validator()
785
+ """:external:ref:`core-metadata-metadata-version`
786
+ (required; validated to be a valid metadata version)"""
787
+ # `name` is not normalized/typed to NormalizedName so as to provide access to
788
+ # the original/raw name.
789
+ name: _Validator[str] = _Validator()
790
+ """:external:ref:`core-metadata-name`
791
+ (required; validated using :func:`~packaging.utils.canonicalize_name` and its
792
+ *validate* parameter)"""
793
+ version: _Validator[version_module.Version] = _Validator()
794
+ """:external:ref:`core-metadata-version` (required)"""
795
+ dynamic: _Validator[list[str] | None] = _Validator(
796
+ added="2.2",
797
+ )
798
+ """:external:ref:`core-metadata-dynamic`
799
+ (validated against core metadata field names and lowercased)"""
800
+ platforms: _Validator[list[str] | None] = _Validator()
801
+ """:external:ref:`core-metadata-platform`"""
802
+ supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
803
+ """:external:ref:`core-metadata-supported-platform`"""
804
+ summary: _Validator[str | None] = _Validator()
805
+ """:external:ref:`core-metadata-summary` (validated to contain no newlines)"""
806
+ description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body
807
+ """:external:ref:`core-metadata-description`"""
808
+ description_content_type: _Validator[str | None] = _Validator(added="2.1")
809
+ """:external:ref:`core-metadata-description-content-type` (validated)"""
810
+ keywords: _Validator[list[str] | None] = _Validator()
811
+ """:external:ref:`core-metadata-keywords`"""
812
+ home_page: _Validator[str | None] = _Validator()
813
+ """:external:ref:`core-metadata-home-page`"""
814
+ download_url: _Validator[str | None] = _Validator(added="1.1")
815
+ """:external:ref:`core-metadata-download-url`"""
816
+ author: _Validator[str | None] = _Validator()
817
+ """:external:ref:`core-metadata-author`"""
818
+ author_email: _Validator[str | None] = _Validator()
819
+ """:external:ref:`core-metadata-author-email`"""
820
+ maintainer: _Validator[str | None] = _Validator(added="1.2")
821
+ """:external:ref:`core-metadata-maintainer`"""
822
+ maintainer_email: _Validator[str | None] = _Validator(added="1.2")
823
+ """:external:ref:`core-metadata-maintainer-email`"""
824
+ license: _Validator[str | None] = _Validator()
825
+ """:external:ref:`core-metadata-license`"""
826
+ license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator(
827
+ added="2.4"
828
+ )
829
+ """:external:ref:`core-metadata-license-expression`"""
830
+ license_files: _Validator[list[str] | None] = _Validator(added="2.4")
831
+ """:external:ref:`core-metadata-license-file`"""
832
+ classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
833
+ """:external:ref:`core-metadata-classifier`"""
834
+ requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
835
+ added="1.2"
836
+ )
837
+ """:external:ref:`core-metadata-requires-dist`"""
838
+ requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
839
+ added="1.2"
840
+ )
841
+ """:external:ref:`core-metadata-requires-python`"""
842
+ # Because `Requires-External` allows for non-PEP 440 version specifiers, we
843
+ # don't do any processing on the values.
844
+ requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
845
+ """:external:ref:`core-metadata-requires-external`"""
846
+ project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
847
+ """:external:ref:`core-metadata-project-url`"""
848
+ # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
849
+ # regardless of metadata version.
850
+ provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
851
+ added="2.1",
852
+ )
853
+ """:external:ref:`core-metadata-provides-extra`"""
854
+ provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
855
+ """:external:ref:`core-metadata-provides-dist`"""
856
+ obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
857
+ """:external:ref:`core-metadata-obsoletes-dist`"""
858
+ requires: _Validator[list[str] | None] = _Validator(added="1.1")
859
+ """``Requires`` (deprecated)"""
860
+ provides: _Validator[list[str] | None] = _Validator(added="1.1")
861
+ """``Provides`` (deprecated)"""
862
+ obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
863
+ """``Obsoletes`` (deprecated)"""
meow/lib/python3.13/site-packages/packaging/py.typed ADDED
File without changes