File size: 842 Bytes
e7993e4
 
ba381a3
e85dbaa
504b127
e7993e4
 
504b127
 
 
 
 
 
 
 
 
 
 
 
 
 
e85dbaa
504b127
 
6de0aed
504b127
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from transformers import AutoTokenizer, AutoModelForSequenceClassification

MODEL_NAME = "RPW/NIH1.3_Llama-3.2-11B"

# โหลด tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# ตั้งค่า special tokens
special_tokens = {
    "bos_token": "<|begin_of_text|>",
    "eos_token": "<|end_of_text|>",
    "additional_special_tokens": [
        "<|reserved_special_token_0|>",
        "<|reserved_special_token_1|>",
        "<|finetune_right_pad_id|>",
        "<|step_id|>",
        "<|start_header_id|>",
        "<|end_header_id|>"
    ]
}
tokenizer.add_special_tokens(special_tokens)

# โหลด model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

# ปรับขนาด embedding เพื่อรองรับ special tokens
model.resize_token_embeddings(len(tokenizer))