In [1]:
%cd /home/mmnga
!source .venv/bin/activate
%cd /home/mmnga/hdd/llm-data/

/home/mmnga
/home/mmnga/hdd/llm-data


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [3]:
!pip install -Uqq torch safetensors transformers 

In [2]:
# chat-vectorの作成
model_name_or_path_base = "/home/mmnga/hdd/llm-data/Meta-Llama-3-8B-Instruct"
model_name_or_path_target = "/home/mmnga/hdd/llm-data/suzume-llama-3-8B-japanese"
save_vector_name_or_path = "/home/mmnga/hdd/llm-data/llama-3-8B-chat-vector_2"



In [55]:
from safetensors import safe_open
from safetensors.torch import save_file

import pathlib, os, json, transformers
from string import Template
import torch

class ChatVectorManager:
    def __init__(self, model_name_or_path_base, model_name_or_path_target, save_vector_name_or_path):
        self.model_name_or_path_base = model_name_or_path_base
        self.model_name_or_path_target = model_name_or_path_target
        self.save_vector_name_or_path = save_vector_name_or_path
        self.config_base = {}
        self.config_target = {}
        self.n_layers = 0
        self.layer_weight_templates = []
        self.base_weight_map = {}
        self.target_weight_map = {}
        self.current_weight_file_base = ""
        self.current_weight_file_target = ""
        self.base_weights = {}
        self.target_weights = {}
        self.save_weights = {}
        self.save_index_weights = {}
        self.save_size = 0
        self.save_total_size = 0
        self.save_byte_size = int(4.9 * 1024 * 1024 * 1024)
        self.save_counter = 0

    def load_setitngs(self, layer_num_config_name):

        # load config
        with open(self.model_name_or_path_base + "/config.json", "r") as f:
            self.config_base = json.load(f)

        with open(self.model_name_or_path_target + "/config.json", "r") as f:
            self.config_target = json.load(f)

        # load weight map
        with open(self.model_name_or_path_base + "/model.safetensors.index.json", "r") as f:
            self.base_weight_map = json.load(f)["weight_map"]

        with open(self.model_name_or_path_target + "/model.safetensors.index.json", "r") as f:
            self.target_weight_map = json.load(f)["weight_map"]

        self.n_layers = int(self.config_base[layer_num_config_name])
        

    def add_layer_weight_template_name(self, weight_template_name):
        self.layer_weight_templates.append(Template(weight_template_name))

    def load_base_weight(self, weight_name):
        if self.current_weight_file_base == self.base_weight_map[weight_name]:
            return
        else:
            file_name = self.base_weight_map[weight_name]
            self.base_weights = safe_open(f"{self.model_name_or_path_base}/{file_name}", framework="pt")
            self.current_weight_file_base = file_name

    def load_target_weight(self, weight_name):
        if self.current_weight_file_target == self.target_weight_map[weight_name]:
            return
        else:
            file_name = self.target_weight_map[weight_name]
            self.target_weights = safe_open(f"{self.model_name_or_path_target}/{file_name}", framework="pt")
            self.current_weight_file_target = file_name

    def layer_weight_iter(self):
        for i in range(self.n_layers):
            base_layer_weights = {}
            target_layer_weights = {}
            for t in self.layer_weight_templates:
                weight_name = t.substitute(i=i)
                self.load_base_weight(weight_name)
                self.load_target_weight(weight_name)
                base_layer_weights[weight_name] = self.base_weights.get_tensor(weight_name)
                target_layer_weights[weight_name] = self.target_weights.get_tensor(weight_name)

                yield i, weight_name, base_layer_weights[weight_name], target_layer_weights[weight_name]

    def get_weight_byte_size(self, weight):

        if isinstance(weight, torch.Tensor):
            weight_byte_size = weight.nelement() * weight.element_size()
        else:
            weight_byte_size = sum(p.nelement() * p.element_size() for p in weight.parameters())

        return weight_byte_size


    def save_weights_split(self):
        if len(self.save_weights.keys()) == 0:
            return 

        file_name = f"{self.save_vector_name_or_path}/model-{self.save_counter:05}.safetensors"

        for weight_name in self.save_weights.keys():
            self.save_index_weights[weight_name] = file_name.split("/")[-1]

        save_file(self.save_weights, file_name, metadata={"format":"pt"})
        self.save_size = 0
        self.save_counter += 1
        self.save_weights = {}
        print(f"save: {file_name}")

    def push_weight(self, weight_name, weight):
        weight_size = self.get_weight_byte_size(weight)
        self.save_weights[weight_name] = weight
        self.save_size += weight_size
        self.save_total_size += weight_size

        print(f"vector: {weight_name} {weight_size}")
        if self.save_size > self.save_byte_size:
            self.save_weights_split()
    
    def save_weight_map(self):
        new_weight_map = {
            "metadata": {
                "total_size": self.save_total_size
            },
            "weight_map": self.save_index_weights
        }
        with open(f"{self.save_vector_name_or_path}/model.safetensors.index.json", "w") as f:
            json.dump(new_weight_map, f, indent=4)

        print("make model.safetensors.index.json")

    def save_config(self):
        with open(f"{self.save_vector_name_or_path}/config.json", "w") as f:
            json.dump(self.config_target, f, indent=4)

    def make_vector(self):

        os.makedirs(self.save_vector_name_or_path, exist_ok=True)

        # 数値が含まれないweight
        for weight_name in [k for k in self.target_weight_map if not any(c.isdigit() for c in k)]:
            self.load_base_weight(weight_name)
            self.load_target_weight(weight_name)
            base_weight = self.base_weights.get_tensor(weight_name)
            target_weight = self.target_weights.get_tensor(weight_name)
            diff = target_weight - base_weight
            self.push_weight(weight_name, diff)

        for i, weight_name, base_weight, target_weight in self.layer_weight_iter():
            diff = target_weight - base_weight
            self.push_weight(weight_name, diff)

        self.save_weights_split()
        self.save_weight_map()
        self.save_config()
        print("Done!")


In [56]:
cvm = ChatVectorManager(model_name_or_path_base, model_name_or_path_target, save_vector_name_or_path)
cvm.load_setitngs(layer_num_config_name="num_hidden_layers")
cvm.add_layer_weight_template_name("model.layers.${i}.input_layernorm.weight")
cvm.add_layer_weight_template_name("model.layers.${i}.mlp.down_proj.weight")
cvm.add_layer_weight_template_name("model.layers.${i}.mlp.gate_proj.weight")
cvm.add_layer_weight_template_name("model.layers.${i}.mlp.up_proj.weight")
cvm.add_layer_weight_template_name("model.layers.${i}.post_attention_layernorm.weight")
cvm.add_layer_weight_template_name("model.layers.${i}.self_attn.k_proj.weight")
cvm.add_layer_weight_template_name("model.layers.${i}.self_attn.o_proj.weight")
cvm.add_layer_weight_template_name("model.layers.${i}.self_attn.q_proj.weight")
cvm.add_layer_weight_template_name("model.layers.${i}.self_attn.v_proj.weight")
cvm.make_vector()


vector: lm_head.weight 1050673152
vector: model.embed_tokens.weight 1050673152
vector: model.norm.weight 8192
vector: model.layers.0.input_layernorm.weight 8192
vector: model.layers.0.mlp.down_proj.weight 117440512
vector: model.layers.0.mlp.gate_proj.weight 117440512
vector: model.layers.0.mlp.up_proj.weight 117440512
vector: model.layers.0.post_attention_layernorm.weight 8192
vector: model.layers.0.self_attn.k_proj.weight 8388608
vector: model.layers.0.self_attn.o_proj.weight 33554432
vector: model.layers.0.self_attn.q_proj.weight 33554432
vector: model.layers.0.self_attn.v_proj.weight 8388608
vector: model.layers.1.input_layernorm.weight 8192
vector: model.layers.1.mlp.down_proj.weight 117440512
vector: model.layers.1.mlp.gate_proj.weight 117440512
vector: model.layers.1.mlp.up_proj.weight 117440512
vector: model.layers.1.post_attention_layernorm.weight 8192
vector: model.layers.1.self_attn.k_proj.weight 8388608
vector: model.layers.1.self_attn.o_proj.weight 33554432
vector: model.l

In [3]:
merge_target_model_path = "/home/mmnga/hdd/llm-data/Meta-Llama-3-70B-Instruct"
vector_path = "/home/mmnga/hdd/llm-data/llama-3-8B-chat-vector_2"
save_merged_model_path = "/home/mmnga/hdd/llm-data/Llama-3-70B-suzume-vector"

In [59]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer
from safetensors import safe_open
from safetensors.torch import save_file
import os
import json
from string import Template

class ExpandChatVectorMerger:
    def __init__(self, merge_target_model_path, vector_path, save_merged_model_path):
        self.merge_target_model_path = merge_target_model_path
        self.vector_path = vector_path
        self.save_merged_model_path = save_merged_model_path

        self.hold_layers_front = 8 # この0からこのlayerまではvectorをそのまま適用する
        self.hold_layers_later = -8 # 最後からこのlayerまではvectorをそのまま適用する
        self.apply_layer_map = {} # targetのlayerにvectorのlayerを適用するmap
        self.config_target = {}
        self.config_vector = {}
        self.n_layers_vector = 0
        self.n_layers = 0
        self.layer_weight_templates = []

        self.vector_weight_map = {}
        self.target_weight_map = {}
        self.current_weight_file_vector = ""
        self.current_weight_file_target = ""

        self.vector_weights = {}
        self.target_weights = {}
        self.save_weights = {}
        self.save_index_weights = {}
        self.save_size = 0
        self.save_total_size = 0
        self.save_byte_size = int(4.9 * 1024 * 1024 * 1024)
        self.save_counter = 0

    def load_setitngs(self, layer_num_config_name):

        # load config
        with open(self.vector_path + "/config.json", "r") as f:
            self.config_vector = json.load(f)

        with open(self.merge_target_model_path + "/config.json", "r") as f:
            self.config_target = json.load(f)

        # load weight map
        with open(self.vector_path + "/model.safetensors.index.json", "r") as f:
            self.vector_weight_map = json.load(f)["weight_map"]

        with open(self.merge_target_model_path + "/model.safetensors.index.json", "r") as f:
            self.target_weight_map = json.load(f)["weight_map"]

        self.n_layers = int(self.config_target[layer_num_config_name])
        self.n_layers_vector = int(self.config_vector[layer_num_config_name])

    def add_layer_weight_template_name(self, weight_template_name):
        self.layer_weight_templates.append(Template(weight_template_name))

    def make_apply_layer_map(self):
        target_from = self.hold_layers_front
        target_to = self.n_layers + self.hold_layers_later
        vector_to = self.n_layers_vector + self.hold_layers_later

        expand_count_target = target_to - target_from
        expand_count_vector = (vector_to - target_from) +1

        print("vector_to", vector_to)
        

        for i in range(self.n_layers):
            if i < self.hold_layers_front:
                self.apply_layer_map[str(i)] = i
                print("front", i, self.apply_layer_map[str(i)])
            elif i > self.n_layers + self.hold_layers_later:
                self.apply_layer_map[str(i)] = (i - self.n_layers) + self.n_layers_vector
                print("later", i, self.apply_layer_map[str(i)])
            else:
                index_in_vector = int(((i - self.hold_layers_front) / expand_count_target) * expand_count_vector)
                self.apply_layer_map[str(i)] = min(self.hold_layers_front + index_in_vector, vector_to)
            
                print("expand", i, self.apply_layer_map[str(i)])


    def get_merge_weight(self, vector_weight, target_weight):
        reshaped_tensor = vector_weight.unsqueeze(0).unsqueeze(0)
        
        if len(target_weight.shape) == 2:
            upsampled_tensor = F.interpolate(reshaped_tensor, size=target_weight.shape, mode='bilinear', align_corners=False)
        elif len(target_weight.shape) == 1:
            upsampled_tensor = F.interpolate(reshaped_tensor, size=target_weight.shape, mode='linear', align_corners=False)

        vw = upsampled_tensor.squeeze(0).squeeze(0)

        return target_weight + vw

    def add_layer_weight_template_name(self, weight_template_name):
        self.layer_weight_templates.append(Template(weight_template_name))

    def load_vector_weight(self, weight_name):
        if self.current_weight_file_vector == self.vector_weight_map[weight_name]:
            return
        else:
            file_name = self.vector_weight_map[weight_name]
            self.vector_weights = safe_open(f"{self.vector_path}/{file_name}", framework="pt")
            self.current_weight_file_vector = file_name

    def load_target_weight(self, weight_name):
        if self.current_weight_file_target == self.target_weight_map[weight_name]:
            return
        else:
            file_name = self.target_weight_map[weight_name]
            self.target_weights = safe_open(f"{self.merge_target_model_path}/{file_name}", framework="pt")
            self.current_weight_file_target = file_name

    def layer_weight_iter(self):

        for i in range(self.n_layers):
            target_layer_weights = {}
            vector_layer_weights = {}

            vector_layer_index = self.apply_layer_map[str(i)]

            for t in self.layer_weight_templates:
                vector_weight_name = t.substitute(i=vector_layer_index)
                target_weight_name = t.substitute(i=i)
                self.load_vector_weight(vector_weight_name)
                self.load_target_weight(target_weight_name)
                
                vector_layer_weights[vector_weight_name] = self.vector_weights.get_tensor(vector_weight_name)
                target_layer_weights[target_weight_name] = self.target_weights.get_tensor(target_weight_name)

                yield i, target_weight_name, vector_layer_weights[vector_weight_name], target_layer_weights[target_weight_name]

    def get_weight_byte_size(self, weight):

        if isinstance(weight, torch.Tensor):
            weight_byte_size = weight.nelement() * weight.element_size()
        else:
            weight_byte_size = sum(p.nelement() * p.element_size() for p in weight.parameters())

        return weight_byte_size


    def save_weights_split(self):
        if len(self.save_weights.keys()) == 0:
            return 

        file_name = f"{self.save_merged_model_path}/model-{self.save_counter:05}.safetensors"

        for weight_name in self.save_weights.keys():
            self.save_index_weights[weight_name] = file_name.split("/")[-1]

        save_file(self.save_weights, file_name, metadata={"format":"pt"})
        self.save_size = 0
        self.save_counter += 1
        self.save_weights = {}
        print(f"save: {file_name}")

    def push_weight(self, weight_name, weight):
        weight_size = self.get_weight_byte_size(weight)
        self.save_weights[weight_name] = weight
        self.save_size += weight_size
        self.save_total_size += weight_size

        print(f"vector: {weight_name} {weight_size}")
        if self.save_size > self.save_byte_size:
            self.save_weights_split()
    
    def save_weight_map(self):
        new_weight_map = {
            "metadata": {
                "total_size": self.save_total_size
            },
            "weight_map": self.save_index_weights
        }
        with open(f"{self.save_merged_model_path}/model.safetensors.index.json", "w") as f:
            json.dump(new_weight_map, f, indent=4)

        print("make model.safetensors.index.json")

    def save_config(self):
        with open(f"{self.save_merged_model_path}/config.json", "w") as f:
            json.dump(self.config_target, f, indent=4)
    
    def save_tokenizer(self):
        tokenizer = AutoTokenizer.from_pretrained(self.merge_target_model_path)
        tokenizer.save_pretrained(self.save_merged_model_path)

    def merge(self):

        os.makedirs(self.save_merged_model_path, exist_ok=True)
        self.save_tokenizer()
        self.make_apply_layer_map()

        # 数値が含まれないweight
        for weight_name in [k for k in self.target_weight_map if not any(c.isdigit() for c in k)]:
            self.load_target_weight(weight_name)
            self.load_vector_weight(weight_name)
            target_weight = self.target_weights.get_tensor(weight_name)
            vector_weight = self.vector_weights.get_tensor(weight_name)
            merge_weight = self.get_merge_weight(vector_weight, target_weight)
            self.push_weight(weight_name, merge_weight)

        # layers
        for i, target_weight_name, vector_weight, target_weight in self.layer_weight_iter():
            merge_weight = self.get_merge_weight(vector_weight, target_weight)
            self.push_weight(target_weight_name, merge_weight)

        self.save_weights_split()
        self.save_weight_map()
        self.save_config()
        print("Done!")


In [60]:
ecvm = ExpandChatVectorMerger(merge_target_model_path, vector_path, save_merged_model_path)
ecvm.load_setitngs("num_hidden_layers")
print("config_target", ecvm.n_layers)
print("config_vector", ecvm.n_layers_vector)
ecvm.load_setitngs(layer_num_config_name="num_hidden_layers")
ecvm.add_layer_weight_template_name("model.layers.${i}.input_layernorm.weight")
ecvm.add_layer_weight_template_name("model.layers.${i}.mlp.down_proj.weight")
ecvm.add_layer_weight_template_name("model.layers.${i}.mlp.gate_proj.weight")
ecvm.add_layer_weight_template_name("model.layers.${i}.mlp.up_proj.weight")
ecvm.add_layer_weight_template_name("model.layers.${i}.post_attention_layernorm.weight")
ecvm.add_layer_weight_template_name("model.layers.${i}.self_attn.k_proj.weight")
ecvm.add_layer_weight_template_name("model.layers.${i}.self_attn.o_proj.weight")
ecvm.add_layer_weight_template_name("model.layers.${i}.self_attn.q_proj.weight")
ecvm.add_layer_weight_template_name("model.layers.${i}.self_attn.v_proj.weight")
ecvm.merge()



Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config_target 80
config_vector 32
vector_to 24
front 0 0
front 1 1
front 2 2
front 3 3
front 4 4
front 5 5
front 6 6
front 7 7
expand 8 8
expand 9 8
expand 10 8
expand 11 8
expand 12 9
expand 13 9
expand 14 9
expand 15 9
expand 16 10
expand 17 10
expand 18 10
expand 19 10
expand 20 11
expand 21 11
expand 22 11
expand 23 11
expand 24 12
expand 25 12
expand 26 12
expand 27 13
expand 28 13
expand 29 13
expand 30 13
expand 31 14
expand 32 14
expand 33 14
expand 34 14
expand 35 15
expand 36 15
expand 37 15
expand 38 15
expand 39 16
expand 40 16
expand 41 16
expand 42 17
expand 43 17
expand 44 17
expand 45 17
expand 46 18
expand 47 18
expand 48 18
expand 49 18
expand 50 19
expand 51 19
expand 52 19
expand 53 19
expand 54 20
expand 55 20
expand 56 20
expand 57 21
expand 58 21
expand 59 21
expand 60 21
expand 61 22
expand 62 22
expand 63 22
expand 64 22
expand 65 23
expand 66 23
expand 67 23
expand 68 23
expand 69 24
expand 70 24
expand 71 24
expand 72 24
later 73 25
later 74 26
later 75 27
la

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = ecvm.save_merged_model_path
re_save_path = "/home/mmnga/hdd/llm-data/Llama-3-70B-suzume-vector_re"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cpu")

messages = [
    {"role": "system", "content": "日本語で返答してください。"},
    {"role": "user", "content": "東京のおすすめの観光スポットを教えて下さい"},
]
prompt = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

inputs = tokenizer([prompt], return_tensors="pt")

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

# model.save_pretrained(re_save_path)
# tokenizer.save_pretrained(re_save_path)

outputs = model.generate(**inputs, 
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
    )

print(tokenizer.decode(outputs[0]))


In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "/home/mmnga/hdd/llm-data/Llama-3-70B-suzume-vector_re"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cpu")

messages = [
    {"role": "system", "content": "日本語で返答してください。"},
    {"role": "user", "content": "東京のおすすめの観光スポットを教えて下さい"},
]
prompt = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

inputs = tokenizer([prompt], return_tensors="pt")

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(**inputs, 
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
    )

print(tokenizer.decode(outputs[0]))


  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 30/30 [01:53<00:00,  3.77s/it]
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


<|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>

日本語で返答してください。<|eot_id|><|start_header_id|>user<|end_header_id|>

東京のおすすめの観光スポットを教えて下さい<|eot_id|><|start_header_id|>assistant<|end_header_id|>

東京は観光スポットが非常に多く、どれを選ぶか迷ってしまうほどです！以下は、東京のおすすめの観光スポット10選です。

1. **東京スカイツリー**：東京都心部にある高さ634mの超高層タワーの展望台から、東京のパノラマを眺めることができます。
2. **浅草寺**：浅草区にある古い寺院で、雷門（浅草門）や仲見世通りが有名です。
3. **渋谷スクランブルクロス**：渋谷区にある世界的に有名な交差点で、流行の最先端を感じることができます。
4. **東京タワー**：港区にある高さ333mのタワーで、夜はライトアップされます。
5. **新宿御苑**：新宿区にある大きな公園で、桜のシーズンには非常に人気があります。
6. **築地市場**：中央区にある世界最大の魚市場で、寿司や海老の朝食を味わうことができます。
7. **明治神


In [None]:
# !pip install -Uqq huggingface-hub

# !huggingface-cli login --token $HF_TOKEN

# tokenizer.push_to_hub("Llama-3-70B-japanese-suzume-vector", use_auth_token=True, private=True)
# model.push_to_hub("Llama-3-70B-japanese-suzume-vector", use_auth_token=True, private=True)
