File size: 6,090 Bytes
5c5a02d
 
 
e3b994d
5c5a02d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00f8773
 
 
 
 
 
 
5c5a02d
 
00f8773
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3b994d
00f8773
 
 
 
e3b994d
00f8773
 
 
 
 
 
 
 
 
5c5a02d
e3b994d
5c5a02d
 
 
 
 
 
 
 
00f8773
5c5a02d
 
 
 
00f8773
5c5a02d
 
 
e3b994d
 
 
 
 
 
 
 
 
 
5c5a02d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3b994d
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
SHELL := /bin/bash

# Configuration variables
NATIVE_ANDROID = $(abspath ../Native-LLM-for-Android)
QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL
ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx
ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist
STAGING_DIR = /tmp/transformers.js/staging
TRANSFORMERS_JS_PATH = ../transformers.js
ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools

# Python paths from venvs
NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3
TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3

# Model parts
PARTS = A B C D E

define progress_bar
	printf "\r   Progress: \033[1;32m["; \
	_done=$$(($1 * 20 / $2)); \
	for ((i=0; i<_done; i++)); do printf "="; done; \
	printf "\033[0m"; \
	_left=$$((20 - _done)); \
	for ((i=0; i<_left; i++)); do printf " "; done; \
	printf "\033[1;32m]\033[0m $1/$2  Processing: \033[1;34m%s\033[K\033[0m\r" "$3"
endef

# See https://github.com/pytorch/pytorch/issues/94280#issuecomment-2089196400
# Original export scripts export a bunch of tensor files, so we merge into one / two files instead.
export-merged-source-models: export-merged-source-models-first-pass export-merged-source-models-second-pass
	@echo "βœ… Exporting merged source models complete"

export-merged-source-models-first-pass:
	@echo "πŸ’Ύ First pass: Export all models with merged tensors..."
	@mkdir -p $(ONNX_DEST_DIR)
	@files=`find $(ONNX_SRC_DIR) -name "*.onnx"`; \
	total=`echo "$$files" | wc -w | tr -d ' '`; \
	echo "Files found (first pass): $$total"; \
	current=0; \
	for item in $$files; do \
	current=$$((current + 1)); \
	$(call progress_bar,$$current,$$total,$$item); \
	$(NATIVE_PYTHON) -u -c "import onnx, os, sys; src='$$item'; dest_dir='$(ONNX_DEST_DIR)'; \
		m = onnx.load(src); \
		d = os.path.join(dest_dir, os.path.basename(src)); \
		onnx.save_model(m, d, all_tensors_to_one_file=True, save_as_external_data=True, location=os.path.basename(d)+'.data')" || exit 1; \
	done; \
	echo "βœ… Done first pass"

export-merged-source-models-second-pass:
	@echo "πŸ’Ύ Second pass: Converting large models to external data format..."
	@files=`find $(ONNX_DEST_DIR) -name "*.onnx"`; \
	total=`echo "$$files" | wc -w | tr -d ' '`; \
	echo "Files found (second pass): $$total"; \
	current=0; \
	for item in $$files; do \
		current=$$((current + 1)); \
		$(call progress_bar,$$current,$$total,$$item); \
		$(NATIVE_PYTHON) -c 'import onnx, os, sys; \
			src = """'"$$item"'"""; \
			total_size = os.path.getsize(src); \
			d = os.path.join(dest_dir, os.path.basename(src)); \
			total_size += os.path.getsize(src + ".data") if os.path.exists(src + ".data") else 0; \
			needs_external = total_size > 2e9; \
			onnx.save_model( \
				onnx.load(src), \
				d, \
				save_as_external_data=needs_external, \
				all_tensors_to_one_file=True, \
				location=(os.path.basename(src) + ".data") if needs_external else None \
			); \
			not needs_external and os.path.exists(src + ".data") and os.remove(src + ".data") \
			' || exit 1; \
	done; \
	echo "βœ… Done second models"


all-in-one: export quantize clean-large-files fix-gpu-buffers export-merged-source-models
	@echo "✨ All done! ONNX models exported, slimmed, quantized and fixed"

export: export-abcd export-e
	@echo "βœ… Export complete"

export-abcd:
	@echo "πŸš€ Exporting parts A, B, C, D..."
	cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
	$(NATIVE_PYTHON) QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct"

export-e:
	@echo "πŸš€ Exporting part E..."
	cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
	$(NATIVE_PYTHON) QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct"

slim:
	@echo "πŸ—œοΈ  Slimming ONNX models..."
	@files=`find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"`; \
	total=`echo "$$files" | wc -w | tr -d ' '`; \
	echo "Files found: $$total"; \
	current=0; \
	for item in $$files; do \
		current=$$((current + 1)); \
		$(call progress_bar,$$current,$$total,$$item); \
		onnxslim --verbose "$$item" "$$item" || exit 1; \
	done; \
	echo "βœ… Slimming complete"

quantize:
	@echo "⚑ Starting quantization..."
	for part in $(PARTS); do \
		$(MAKE) quantize-$$part || exit 1; \
	done
	@echo "βœ… Quantization complete"

quantize-%:
	@echo "⚑ Quantizing part $*..."
	mkdir -p $(ONNX_DEST_DIR)
	cd $(TRANSFORMERS_JS_PATH) && \
	mkdir -p $(STAGING_DIR) && \
	rm -f $(STAGING_DIR)/* && \
	ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \
	find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \
	find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \
	find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \
	EXTRA_FLAGS=""; \
	if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \
	echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \
	PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \
		--input_folder '$(STAGING_DIR)' \
		--output_folder '$(ONNX_DEST_DIR)' \
		--mode q4f16 $$EXTRA_FLAGS

clean-large-files:
	@echo "🧹 Removing ONNX files over 2GB..."
	cd $(ONNX_DEST_DIR) && \
	for f in $$(find . -name "*.onnx" -type f); do \
		total_size=0; \
		if [ -f "$$f"".data" ]; then \
			total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \
		elif [ -f "$$f""_data" ]; then \
			total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \
		else \
			total_size=$$(stat -f %z "$$f"); \
		fi; \
		size_mb=$$(( total_size / 1048576 )); \
		if [ $$total_size -ge 2147483648 ]; then \
			echo "   Removing $$f (size: $$size_mb MB)..."; \
			rm -f "$$f" "$$f"".data" "$$f""_data"; \
		fi \
	done
	@echo "βœ… Large file cleanup complete"

fix-gpu-buffers:
	@echo "πŸ”§ Fixing GPU buffers for E models..."
	@files=`find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f`; \
	total=`echo "$$files" | wc -w | tr -d ' '`; \
	echo "Files found: $$total"; \
	current=0; \
	for item in $$files; do \
		current=$$((current + 1)); \
		$(call progress_bar,$$current,$$total,$$item); \
		cd $(NATIVE_ANDROID) && .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite "$$item" || exit 1; \
	done; \
	echo "βœ… GPU buffer fixes complete"