|
--- |
|
library_name: ctranslate2 |
|
license: apache-2.0 |
|
base_model: |
|
- internlm/internlm3-8b-instruct |
|
base_model_relation: quantized |
|
tags: |
|
- ctranslate2 |
|
- internlm3 |
|
- chat |
|
--- |
|
|
|
### Ctranslate2 conversion of InternLM3 - 8b into "int8" |
|
|
|
[Original model here](https://huggingface.co/internlm/internlm3-8b-instruct) |
|
|
|
# Example Usage |
|
|
|
<details><summary>Non-Streaming Example:</summary> |
|
|
|
```python |
|
import ctranslate2 |
|
from transformers import AutoTokenizer |
|
|
|
def generate_response(prompt: str, system_message: str, model_path: str) -> str: |
|
generator = ctranslate2.Generator( |
|
model_path, |
|
device="cuda", |
|
compute_type="int8" |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
formatted_prompt = f"""<s><|im_start|>system |
|
{system_message}<|im_end|> |
|
<|im_start|>user |
|
{user_message}<|im_end|> |
|
<|im_start|>assistant |
|
""" |
|
tokens = tokenizer.tokenize(formatted_prompt, trust_remote_code=True) |
|
results = generator.generate_batch( |
|
[tokens], |
|
max_length=1024, |
|
sampling_temperature=0.7, |
|
include_prompt_in_result=False, |
|
end_token="<|im_end|>", |
|
return_end_token=False, |
|
) |
|
response = tokenizer.decode(results[0].sequences_ids[0], skip_special_tokens=True) |
|
return response |
|
|
|
if __name__ == "__main__": |
|
model_path = "path/to/your/phi-4-ct2-model" |
|
system_message = "You are a helpful AI assistant." |
|
user_prompt = "Write a short poem about a cat." |
|
response = generate_response(user_prompt, system_message, model_path) |
|
print("\nGenerated response:") |
|
print(response) |
|
|
|
``` |
|
</details> |
|
|