royleibov commited on
Commit
fc0c277
·
1 Parent(s): 9e890cc

Add new zipnn scripts

Browse files
zipnn_compress_file.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ import argparse
5
+ import time
6
+
7
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
8
+
9
+ KB = 1024
10
+ MB = 1024 * 1024
11
+ GB = 1024 * 1024 * 1024
12
+
13
+
14
+ def check_and_install_zipnn():
15
+ try:
16
+ import zipnn
17
+ except ImportError:
18
+ print("zipnn not found. Installing...")
19
+ subprocess.check_call(
20
+ [
21
+ sys.executable,
22
+ "-m",
23
+ "pip",
24
+ "install",
25
+ "zipnn",
26
+ "--upgrade",
27
+ ]
28
+ )
29
+ import zipnn
30
+
31
+
32
+ def parse_streaming_chunk_size(
33
+ streaming_chunk_size,
34
+ ):
35
+ if str(streaming_chunk_size).isdigit():
36
+ final = int(streaming_chunk_size)
37
+ else:
38
+ size_value = int(streaming_chunk_size[:-2])
39
+ size_unit = streaming_chunk_size[-2].lower()
40
+
41
+ if size_unit == "k":
42
+ final = KB * size_value
43
+ elif size_unit == "m":
44
+ final = MB * size_value
45
+ elif size_unit == "g":
46
+ final = GB * size_value
47
+ else:
48
+ raise ValueError(f"Invalid size unit: {size_unit}. Use 'k', 'm', or 'g'.")
49
+
50
+ return final
51
+
52
+
53
+ def compress_file(
54
+ input_file,
55
+ dtype="",
56
+ streaming_chunk_size=1048576,
57
+ delete=False,
58
+ force=False,
59
+ ):
60
+ import zipnn
61
+
62
+ streaming_chunk_size = parse_streaming_chunk_size(streaming_chunk_size)
63
+ full_path = input_file
64
+ if not os.path.exists(full_path):
65
+ print("File not found")
66
+ return
67
+ if delete:
68
+ print(f"Deleting {full_path}...")
69
+ os.remove(full_path)
70
+ else:
71
+ compressed_path = full_path + ".znn"
72
+ if not force and os.path.exists(compressed_path):
73
+ user_input = (
74
+ input(f"{compressed_path} already exists; overwrite (y/n)? ").strip().lower()
75
+ )
76
+ if user_input not in ("yes", "y"):
77
+ print(f"Skipping {full_path}...")
78
+ return
79
+ print(f"Compressing {full_path}...")
80
+ #
81
+ output_file = input_file + ".znn"
82
+ if dtype:
83
+ zpn = zipnn.ZipNN(
84
+ bytearray_dtype="float32",
85
+ is_streaming=True,
86
+ streaming_chunk_kb=streaming_chunk_size,
87
+ )
88
+ else:
89
+ zpn = zipnn.ZipNN(
90
+ is_streaming=True,
91
+ streaming_chunk_kb=streaming_chunk_size,
92
+ )
93
+ file_size_before = 0
94
+ file_size_after = 0
95
+ start_time = time.time()
96
+ with open(input_file, "rb") as infile, open(output_file, "wb") as outfile:
97
+ chunk = infile.read()
98
+ file_size_before += len(chunk)
99
+ compressed_chunk = zpn.compress(chunk)
100
+ if compressed_chunk:
101
+ file_size_after += len(compressed_chunk)
102
+ outfile.write(compressed_chunk)
103
+ end_time = time.time() - start_time
104
+ print(f"Compressed {input_file} to {output_file}")
105
+ print(
106
+ f"Original size: {file_size_before/GB:.02f}GB size after compression: {file_size_after/GB:.02f}GB, Remaining size is {file_size_after/file_size_before*100:.02f}% of original, time: {end_time:.02f}"
107
+ )
108
+
109
+
110
+ if __name__ == "__main__":
111
+ if len(sys.argv) < 2:
112
+ print("Usage: python compress_files.py <suffix>")
113
+ print("Example: python compress_files.py 'safetensors'")
114
+ sys.exit(1)
115
+
116
+ parser = argparse.ArgumentParser(description="Enter a file path to compress.")
117
+ parser.add_argument(
118
+ "input_file",
119
+ type=str,
120
+ help="Specify the path to the file to compress.",
121
+ )
122
+ parser.add_argument(
123
+ "--float32",
124
+ action="store_true",
125
+ help="A flag that triggers float32 compression",
126
+ )
127
+ parser.add_argument(
128
+ "--streaming_chunk_size",
129
+ type=str,
130
+ help="An optional streaming chunk size. The format is int (for size in Bytes) or int+KB/MB/GB. Default is 1MB",
131
+ )
132
+ parser.add_argument(
133
+ "--delete",
134
+ action="store_true",
135
+ help="A flag that triggers deletion of a single file instead of compression",
136
+ )
137
+ parser.add_argument(
138
+ "--force",
139
+ action="store_true",
140
+ help="A flag that forces overwriting when compressing.",
141
+ )
142
+ args = parser.parse_args()
143
+ optional_kwargs = {}
144
+ if args.float32:
145
+ optional_kwargs["dtype"] = 32
146
+ if args.streaming_chunk_size is not None:
147
+ optional_kwargs["streaming_chunk_size"] = args.streaming_chunk_size
148
+ if args.delete:
149
+ optional_kwargs["delete"] = args.delete
150
+ if args.force:
151
+ optional_kwargs["force"] = args.force
152
+
153
+ check_and_install_zipnn()
154
+ compress_file(args.input_file, **optional_kwargs)
zipnn_compress_path.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ import argparse
5
+ from concurrent.futures import (
6
+ ProcessPoolExecutor,
7
+ as_completed,
8
+ )
9
+ from zipnn_compress_file import compress_file
10
+ import zipnn
11
+
12
+ sys.path.append(
13
+ os.path.abspath(
14
+ os.path.join(
15
+ os.path.dirname(__file__), ".."
16
+ )
17
+ )
18
+ )
19
+
20
+
21
+ KB = 1024
22
+ MB = 1024 * 1024
23
+ GB = 1024 * 1024 * 1024
24
+
25
+
26
+ def check_and_install_zipnn():
27
+ try:
28
+ import zipnn
29
+ except ImportError:
30
+ print("zipnn not found. Installing...")
31
+ subprocess.check_call(
32
+ [
33
+ sys.executable,
34
+ "-m",
35
+ "pip",
36
+ "install",
37
+ "zipnn",
38
+ "--upgrade",
39
+ ]
40
+ )
41
+ import zipnn
42
+
43
+
44
+ def parse_streaming_chunk_size(
45
+ streaming_chunk_size,
46
+ ):
47
+ if str(streaming_chunk_size).isdigit():
48
+ final = int(streaming_chunk_size)
49
+ else:
50
+ size_value = int(
51
+ streaming_chunk_size[:-2]
52
+ )
53
+ size_unit = streaming_chunk_size[
54
+ -2
55
+ ].lower()
56
+
57
+ if size_unit == "k":
58
+ final = KB * size_value
59
+ elif size_unit == "m":
60
+ final = MB * size_value
61
+ elif size_unit == "g":
62
+ final = GB * size_value
63
+ else:
64
+ raise ValueError(
65
+ f"Invalid size unit: {size_unit}. Use 'k', 'm', or 'g'."
66
+ )
67
+
68
+ return final
69
+
70
+
71
+ def compress_files_with_suffix(
72
+ suffix,
73
+ dtype="",
74
+ streaming_chunk_size=1048576,
75
+ path=".",
76
+ delete=False,
77
+ r=False,
78
+ force=False,
79
+ max_processes=1,
80
+ ):
81
+
82
+ file_list = []
83
+ streaming_chunk_size = (
84
+ parse_streaming_chunk_size(
85
+ streaming_chunk_size
86
+ )
87
+ )
88
+ directories_to_search = (
89
+ os.walk(path)
90
+ if r
91
+ else [(path, [], os.listdir(path))]
92
+ )
93
+ files_found = False
94
+ for root, _, files in directories_to_search:
95
+ for file_name in files:
96
+ if file_name.endswith(suffix):
97
+ compressed_path = (
98
+ file_name + ".znn"
99
+ )
100
+ if not force and os.path.exists(
101
+ compressed_path
102
+ ):
103
+ user_input = (
104
+ input(
105
+ f"{compressed_path} already exists; overwrite (y/n)? "
106
+ )
107
+ .strip()
108
+ .lower()
109
+ )
110
+ if user_input not in (
111
+ "y",
112
+ "yes",
113
+ ):
114
+ print(
115
+ f"Skipping {file_name}..."
116
+ )
117
+ continue
118
+ files_found = True
119
+ full_path = os.path.join(
120
+ root, file_name
121
+ )
122
+ file_list.append(full_path)
123
+
124
+ with ProcessPoolExecutor(
125
+ max_workers=max_processes
126
+ ) as executor:
127
+ future_to_file = {
128
+ executor.submit(
129
+ compress_file,
130
+ file,
131
+ dtype,
132
+ streaming_chunk_size,
133
+ delete,
134
+ True,
135
+ ): file
136
+ for file in file_list[:max_processes]
137
+ }
138
+ file_list = file_list[max_processes:]
139
+ while future_to_file:
140
+ for future in as_completed(
141
+ future_to_file
142
+ ):
143
+ file = future_to_file.pop(future)
144
+
145
+ try:
146
+ future.result()
147
+ except Exception as exc:
148
+ print(
149
+ f"File {file} generated an exception: {exc}"
150
+ )
151
+
152
+ if file_list:
153
+ next_file = file_list.pop(0)
154
+ future_to_file[
155
+ executor.submit(
156
+ compress_file,
157
+ next_file,
158
+ dtype,
159
+ streaming_chunk_size,
160
+ delete,
161
+ True,
162
+ )
163
+ ] = next_file
164
+
165
+ if not files_found:
166
+ print(
167
+ f"No files with the suffix '{suffix}' found."
168
+ )
169
+
170
+
171
+ if __name__ == "__main__":
172
+ if len(sys.argv) < 2:
173
+ print(
174
+ "Usage: python compress_files.py <suffix>"
175
+ )
176
+ print(
177
+ "Example: python compress_files.py 'safetensors'"
178
+ )
179
+ sys.exit(1)
180
+
181
+ parser = argparse.ArgumentParser(
182
+ description="Enter a suffix to compress, (optional) dtype, (optional) streaming chunk size, (optional) path to files."
183
+ )
184
+ parser.add_argument(
185
+ "suffix",
186
+ type=str,
187
+ help="Specify the file suffix to compress all files with that suffix. If a single file name is provided, only that file will be compressed.",
188
+ )
189
+ parser.add_argument(
190
+ "--float32",
191
+ action="store_true",
192
+ help="A flag that triggers float32 compression",
193
+ )
194
+ parser.add_argument(
195
+ "--streaming_chunk_size",
196
+ type=str,
197
+ help="An optional streaming chunk size. The format is int (for size in Bytes) or int+KB/MB/GB. Default is 1MB",
198
+ )
199
+ parser.add_argument(
200
+ "--path",
201
+ type=str,
202
+ help="Path to files to compress",
203
+ )
204
+ parser.add_argument(
205
+ "--delete",
206
+ action="store_true",
207
+ help="A flag that triggers deletion of a single file instead of compression",
208
+ )
209
+ parser.add_argument(
210
+ "-r",
211
+ action="store_true",
212
+ help="A flag that triggers recursive search on all subdirectories",
213
+ )
214
+ parser.add_argument(
215
+ "--recursive",
216
+ action="store_true",
217
+ help="A flag that triggers recursive search on all subdirectories",
218
+ )
219
+ parser.add_argument(
220
+ "--force",
221
+ action="store_true",
222
+ help="A flag that forces overwriting when compressing.",
223
+ )
224
+ parser.add_argument(
225
+ "--max_processes",
226
+ type=int,
227
+ help="The amount of maximum processes.",
228
+ )
229
+ args = parser.parse_args()
230
+ optional_kwargs = {}
231
+ if args.float32:
232
+ optional_kwargs["dtype"] = 32
233
+ if args.streaming_chunk_size is not None:
234
+ optional_kwargs[
235
+ "streaming_chunk_size"
236
+ ] = args.streaming_chunk_size
237
+ if args.path is not None:
238
+ optional_kwargs["path"] = args.path
239
+ if args.delete:
240
+ optional_kwargs["delete"] = args.delete
241
+ if args.r or args.recursive:
242
+ optional_kwargs["r"] = args.r
243
+ if args.force:
244
+ optional_kwargs["force"] = args.force
245
+ if args.max_processes:
246
+ optional_kwargs["max_processes"] = (
247
+ args.max_processes
248
+ )
249
+
250
+ check_and_install_zipnn()
251
+ compress_files_with_suffix(
252
+ args.suffix, **optional_kwargs
253
+ )
zipnn_decompress_file.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ import argparse
5
+
6
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
7
+
8
+
9
+ def check_and_install_zipnn():
10
+ try:
11
+ import zipnn
12
+ except ImportError:
13
+ print("zipnn not found. Installing...")
14
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "zipnn"])
15
+ import zipnn
16
+
17
+
18
+ def decompress_file(input_file, dtype="", delete=False, force=False):
19
+ import zipnn
20
+
21
+ if not input_file.endswith(".znn"):
22
+ raise ValueError("Input file does not have the '.znn' suffix")
23
+
24
+ if os.path.exists(input_file):
25
+ if delete:
26
+ print(f"Deleting {input_file}...")
27
+ os.remove(input_file)
28
+ else:
29
+ decompressed_path = input_file[:-4]
30
+ if not force and os.path.exists(decompressed_path):
31
+
32
+ user_input = (
33
+ input(f"{decompressed_path} already exists; overwrite (y/n)? ").strip().lower()
34
+ )
35
+
36
+ if user_input not in ("yes", "y"):
37
+ print(f"Skipping {input_file}...")
38
+ return
39
+ print(f"Decompressing {input_file}...")
40
+
41
+ output_file = input_file[:-4]
42
+
43
+ if dtype:
44
+ zpn = zipnn.ZipNN(is_streaming=True, bytearray_dtype="float32")
45
+ else:
46
+ zpn = zipnn.ZipNN(is_streaming=True)
47
+
48
+ with open(input_file, "rb") as infile, open(output_file, "wb") as outfile:
49
+ d_data = b""
50
+ chunk = infile.read()
51
+ d_data += zpn.decompress(chunk)
52
+ outfile.write(d_data)
53
+ print(f"Decompressed {input_file} to {output_file}")
54
+
55
+ else:
56
+ print(f"Error: The file {input_file} does not exist.")
57
+
58
+
59
+ if __name__ == "__main__":
60
+ check_and_install_zipnn()
61
+
62
+ parser = argparse.ArgumentParser(description="Enter a file path to decompress.")
63
+ parser.add_argument("input_file", type=str, help="Specify the path to the file to decompress.")
64
+ parser.add_argument(
65
+ "--float32", action="store_true", help="A flag that triggers float32 compression."
66
+ )
67
+ parser.add_argument(
68
+ "--delete",
69
+ action="store_true",
70
+ help="A flag that triggers deletion of a single compressed file instead of decompression",
71
+ )
72
+ parser.add_argument(
73
+ "--force", action="store_true", help="A flag that forces overwriting when decompressing."
74
+ )
75
+ args = parser.parse_args()
76
+ optional_kwargs = {}
77
+ if args.float32:
78
+ optional_kwargs["dtype"] = 32
79
+ if args.delete:
80
+ optional_kwargs["delete"] = args.delete
81
+ if args.force:
82
+ optional_kwargs["force"] = args.force
83
+
84
+ decompress_file(args.input_file, **optional_kwargs)
zipnn_decompress_path.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import argparse
4
+ import subprocess
5
+ import zipnn
6
+ from concurrent.futures import (
7
+ ProcessPoolExecutor,
8
+ as_completed,
9
+ )
10
+ from zipnn_decompress_file import (
11
+ decompress_file,
12
+ )
13
+
14
+ sys.path.append(
15
+ os.path.abspath(
16
+ os.path.join(
17
+ os.path.dirname(__file__),
18
+ "..",
19
+ )
20
+ )
21
+ )
22
+
23
+
24
+ def check_and_install_zipnn():
25
+ try:
26
+ import zipnn
27
+ except ImportError:
28
+ print("zipnn not found. Installing...")
29
+ subprocess.check_call(
30
+ [
31
+ sys.executable,
32
+ "-m",
33
+ "pip",
34
+ "install",
35
+ "zipnn",
36
+ ]
37
+ )
38
+ import zipnn
39
+
40
+
41
+ def decompress_zpn_files(
42
+ dtype="",
43
+ path=".",
44
+ delete=False,
45
+ force=False,
46
+ max_processes=1,
47
+ ):
48
+
49
+ file_list = []
50
+ directories_to_search = [
51
+ (
52
+ path,
53
+ [],
54
+ os.listdir(path),
55
+ )
56
+ ]
57
+ for (
58
+ root,
59
+ _,
60
+ files,
61
+ ) in directories_to_search:
62
+ for file_name in files:
63
+ if file_name.endswith(".znn"):
64
+ decompressed_path = file_name[:-4]
65
+ if not force and os.path.exists(
66
+ decompressed_path
67
+ ):
68
+ user_input = (
69
+ input(
70
+ f"{decompressed_path} already exists; overwrite (y/n)? "
71
+ )
72
+ .strip()
73
+ .lower()
74
+ )
75
+ if user_input not in (
76
+ "y",
77
+ "yes",
78
+ ):
79
+ print(
80
+ f"Skipping {file_name}..."
81
+ )
82
+ continue
83
+ full_path = os.path.join(
84
+ root,
85
+ file_name,
86
+ )
87
+ file_list.append(full_path)
88
+
89
+ with ProcessPoolExecutor(
90
+ max_workers=max_processes
91
+ ) as executor:
92
+ for file in file_list[:max_processes]:
93
+ future_to_file = {
94
+ executor.submit(
95
+ decompress_file,
96
+ file,
97
+ dtype,
98
+ delete,
99
+ True,
100
+ ): file
101
+ for file in file_list[
102
+ :max_processes
103
+ ]
104
+ }
105
+
106
+ file_list = file_list[max_processes:]
107
+ while future_to_file:
108
+
109
+ for future in as_completed(
110
+ future_to_file
111
+ ):
112
+ file = future_to_file.pop(
113
+ future
114
+ )
115
+ try:
116
+ future.result()
117
+ except Exception as exc:
118
+ print(
119
+ f"File {file} generated an exception: {exc}"
120
+ )
121
+
122
+ if file_list:
123
+ next_file = file_list.pop(
124
+ 0
125
+ )
126
+ future_to_file[
127
+ executor.submit(
128
+ decompress_file,
129
+ next_file,
130
+ dtype,
131
+ delete,
132
+ True,
133
+ )
134
+ ] = next_file
135
+ #
136
+
137
+
138
+ if __name__ == "__main__":
139
+ check_and_install_zipnn()
140
+
141
+ parser = argparse.ArgumentParser(
142
+ description="Compresses all .znn files. (optional) dtype."
143
+ )
144
+ parser.add_argument(
145
+ "--float32",
146
+ action="store_true",
147
+ help="A flag that triggers float32 compression.",
148
+ )
149
+ parser.add_argument(
150
+ "--path",
151
+ type=str,
152
+ help="Path to folder of files to decompress. If left empty, checks current folder.",
153
+ )
154
+ parser.add_argument(
155
+ "--delete",
156
+ action="store_true",
157
+ help="A flag that triggers deletion of a single compressed file instead of decompression",
158
+ )
159
+ parser.add_argument(
160
+ "--force",
161
+ action="store_true",
162
+ help="A flag that forces overwriting when decompressing.",
163
+ )
164
+ parser.add_argument(
165
+ "--max_processes",
166
+ type=int,
167
+ help="The amount of maximum processes.",
168
+ )
169
+ args = parser.parse_args()
170
+ optional_kwargs = {}
171
+ if args.float32:
172
+ optional_kwargs["dtype"] = 32
173
+ if args.path is not None:
174
+ optional_kwargs["path"] = args.path
175
+ if args.delete:
176
+ optional_kwargs["delete"] = args.delete
177
+ if args.force:
178
+ optional_kwargs["force"] = args.force
179
+ if args.max_processes:
180
+ optional_kwargs["max_processes"] = (
181
+ args.max_processes
182
+ )
183
+
184
+ decompress_zpn_files(**optional_kwargs)