Python code with Pipeline

import transformers
import torch

model_id = "VIRNECT/llama-3-Korean-8B-V2"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

pipeline.model.eval()

PROMPT = '''당신은 인간과 λŒ€ν™”ν•˜λŠ” μΉœμ ˆν•œ μ±—λ΄‡μž…λ‹ˆλ‹€. μ§ˆλ¬Έμ— λŒ€ν•œ 정보λ₯Ό 상황에 맞게 μžμ„Ένžˆ μ œκ³΅ν•©λ‹ˆλ‹€. 당신이 μ§ˆλ¬Έμ— λŒ€ν•œ 닡을 λͺ¨λ₯Έλ‹€λ©΄, 사싀은 λͺ¨λ₯Έλ‹€κ³  λ§ν•©λ‹ˆλ‹€.'''
instruction = "화학곡학이 λ‹€λ₯Έ 곡학 뢄야와 μ–΄λ–»κ²Œ λ‹€λ₯Έκ°€μš”?"

messages = [
    {"role": "system", "content": f"{PROMPT}"},
    {"role": "user", "content": f"{instruction}"}
]

prompt = pipeline.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = pipeline(
    prompt,
    max_new_tokens=2048,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9
)

print(outputs[0]["generated_text"][len(prompt):])
Downloads last month
2,685
Safetensors
Model size
8.03B params
Tensor type
FP16
Β·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for VIRNECT/llama-3-Korean-8B-V2

Quantizations
1 model

Spaces using VIRNECT/llama-3-Korean-8B-V2 6