File size: 6,090 Bytes
5c5a02d e3b994d 5c5a02d 00f8773 5c5a02d 00f8773 e3b994d 00f8773 e3b994d 00f8773 5c5a02d e3b994d 5c5a02d 00f8773 5c5a02d 00f8773 5c5a02d e3b994d 5c5a02d e3b994d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
SHELL := /bin/bash
# Configuration variables
NATIVE_ANDROID = $(abspath ../Native-LLM-for-Android)
QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL
ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx
ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist
STAGING_DIR = /tmp/transformers.js/staging
TRANSFORMERS_JS_PATH = ../transformers.js
ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools
# Python paths from venvs
NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3
TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3
# Model parts
PARTS = A B C D E
define progress_bar
printf "\r Progress: \033[1;32m["; \
_done=$$(($1 * 20 / $2)); \
for ((i=0; i<_done; i++)); do printf "="; done; \
printf "\033[0m"; \
_left=$$((20 - _done)); \
for ((i=0; i<_left; i++)); do printf " "; done; \
printf "\033[1;32m]\033[0m $1/$2 Processing: \033[1;34m%s\033[K\033[0m\r" "$3"
endef
# See https://github.com/pytorch/pytorch/issues/94280#issuecomment-2089196400
# Original export scripts export a bunch of tensor files, so we merge into one / two files instead.
export-merged-source-models: export-merged-source-models-first-pass export-merged-source-models-second-pass
@echo "β
Exporting merged source models complete"
export-merged-source-models-first-pass:
@echo "πΎ First pass: Export all models with merged tensors..."
@mkdir -p $(ONNX_DEST_DIR)
@files=`find $(ONNX_SRC_DIR) -name "*.onnx"`; \
total=`echo "$$files" | wc -w | tr -d ' '`; \
echo "Files found (first pass): $$total"; \
current=0; \
for item in $$files; do \
current=$$((current + 1)); \
$(call progress_bar,$$current,$$total,$$item); \
$(NATIVE_PYTHON) -u -c "import onnx, os, sys; src='$$item'; dest_dir='$(ONNX_DEST_DIR)'; \
m = onnx.load(src); \
d = os.path.join(dest_dir, os.path.basename(src)); \
onnx.save_model(m, d, all_tensors_to_one_file=True, save_as_external_data=True, location=os.path.basename(d)+'.data')" || exit 1; \
done; \
echo "β
Done first pass"
export-merged-source-models-second-pass:
@echo "πΎ Second pass: Converting large models to external data format..."
@files=`find $(ONNX_DEST_DIR) -name "*.onnx"`; \
total=`echo "$$files" | wc -w | tr -d ' '`; \
echo "Files found (second pass): $$total"; \
current=0; \
for item in $$files; do \
current=$$((current + 1)); \
$(call progress_bar,$$current,$$total,$$item); \
$(NATIVE_PYTHON) -c 'import onnx, os, sys; \
src = """'"$$item"'"""; \
total_size = os.path.getsize(src); \
d = os.path.join(dest_dir, os.path.basename(src)); \
total_size += os.path.getsize(src + ".data") if os.path.exists(src + ".data") else 0; \
needs_external = total_size > 2e9; \
onnx.save_model( \
onnx.load(src), \
d, \
save_as_external_data=needs_external, \
all_tensors_to_one_file=True, \
location=(os.path.basename(src) + ".data") if needs_external else None \
); \
not needs_external and os.path.exists(src + ".data") and os.remove(src + ".data") \
' || exit 1; \
done; \
echo "β
Done second models"
all-in-one: export quantize clean-large-files fix-gpu-buffers export-merged-source-models
@echo "β¨ All done! ONNX models exported, slimmed, quantized and fixed"
export: export-abcd export-e
@echo "β
Export complete"
export-abcd:
@echo "π Exporting parts A, B, C, D..."
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
$(NATIVE_PYTHON) QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct"
export-e:
@echo "π Exporting part E..."
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \
$(NATIVE_PYTHON) QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct"
slim:
@echo "ποΈ Slimming ONNX models..."
@files=`find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"`; \
total=`echo "$$files" | wc -w | tr -d ' '`; \
echo "Files found: $$total"; \
current=0; \
for item in $$files; do \
current=$$((current + 1)); \
$(call progress_bar,$$current,$$total,$$item); \
onnxslim --verbose "$$item" "$$item" || exit 1; \
done; \
echo "β
Slimming complete"
quantize:
@echo "β‘ Starting quantization..."
for part in $(PARTS); do \
$(MAKE) quantize-$$part || exit 1; \
done
@echo "β
Quantization complete"
quantize-%:
@echo "β‘ Quantizing part $*..."
mkdir -p $(ONNX_DEST_DIR)
cd $(TRANSFORMERS_JS_PATH) && \
mkdir -p $(STAGING_DIR) && \
rm -f $(STAGING_DIR)/* && \
ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \
find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \
find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \
find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \
EXTRA_FLAGS=""; \
if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \
echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \
PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \
--input_folder '$(STAGING_DIR)' \
--output_folder '$(ONNX_DEST_DIR)' \
--mode q4f16 $$EXTRA_FLAGS
clean-large-files:
@echo "π§Ή Removing ONNX files over 2GB..."
cd $(ONNX_DEST_DIR) && \
for f in $$(find . -name "*.onnx" -type f); do \
total_size=0; \
if [ -f "$$f"".data" ]; then \
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \
elif [ -f "$$f""_data" ]; then \
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \
else \
total_size=$$(stat -f %z "$$f"); \
fi; \
size_mb=$$(( total_size / 1048576 )); \
if [ $$total_size -ge 2147483648 ]; then \
echo " Removing $$f (size: $$size_mb MB)..."; \
rm -f "$$f" "$$f"".data" "$$f""_data"; \
fi \
done
@echo "β
Large file cleanup complete"
fix-gpu-buffers:
@echo "π§ Fixing GPU buffers for E models..."
@files=`find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f`; \
total=`echo "$$files" | wc -w | tr -d ' '`; \
echo "Files found: $$total"; \
current=0; \
for item in $$files; do \
current=$$((current + 1)); \
$(call progress_bar,$$current,$$total,$$item); \
cd $(NATIVE_ANDROID) && .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite "$$item" || exit 1; \
done; \
echo "β
GPU buffer fixes complete"
|