NIH1.2_Llama3.2 / app.py
RPW's picture
Update app.py
ba381a3 verified
raw
history blame
842 Bytes
from transformers import AutoTokenizer, AutoModelForSequenceClassification
MODEL_NAME = "RPW/NIH1.3_Llama-3.2-11B"
# โหลด tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# ตั้งค่า special tokens
special_tokens = {
"bos_token": "<|begin_of_text|>",
"eos_token": "<|end_of_text|>",
"additional_special_tokens": [
"<|reserved_special_token_0|>",
"<|reserved_special_token_1|>",
"<|finetune_right_pad_id|>",
"<|step_id|>",
"<|start_header_id|>",
"<|end_header_id|>"
]
}
tokenizer.add_special_tokens(special_tokens)
# โหลด model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
# ปรับขนาด embedding เพื่อรองรับ special tokens
model.resize_token_embeddings(len(tokenizer))