ctranslate2-4you commited on
Commit
991fa15
·
verified ·
1 Parent(s): f900fd1

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +60 -0
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: ctranslate2
3
+ license: apache-2.0
4
+ base_model:
5
+ - internlm/internlm3-8b-instruct
6
+ base_model_relation: quantized
7
+ tags:
8
+ - ctranslate2
9
+ - awq
10
+ - internlm3
11
+ - chat
12
+ ---
13
+
14
+ ### Ctranslate2 conversion of InternLM3 - 8b
15
+
16
+ [Original model here](https://huggingface.co/internlm/internlm3-8b-instruct-awq)
17
+
18
+ # Example Usage
19
+
20
+ <details><summary>Non-Streaming Example:</summary>
21
+
22
+ ```python
23
+ import ctranslate2
24
+ from transformers import AutoTokenizer
25
+
26
+ def generate_response(prompt: str, system_message: str, model_path: str) -> str:
27
+ generator = ctranslate2.Generator(
28
+ model_path,
29
+ device="cuda",
30
+ compute_type="int8"
31
+ )
32
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
33
+ formatted_prompt = f"""<s><|im_start|>system
34
+ {system_message}<|im_end|>
35
+ <|im_start|>user
36
+ {user_message}<|im_end|>
37
+ <|im_start|>assistant
38
+ """
39
+ tokens = tokenizer.tokenize(formatted_prompt, trust_remote_code=True)
40
+ results = generator.generate_batch(
41
+ [tokens],
42
+ max_length=1024,
43
+ sampling_temperature=0.7,
44
+ include_prompt_in_result=False,
45
+ end_token="<|im_end|>",
46
+ return_end_token=False,
47
+ )
48
+ response = tokenizer.decode(results[0].sequences_ids[0], skip_special_tokens=True)
49
+ return response
50
+
51
+ if __name__ == "__main__":
52
+ model_path = "path/to/your/phi-4-ct2-model"
53
+ system_message = "You are a helpful AI assistant."
54
+ user_prompt = "Write a short poem about a cat."
55
+ response = generate_response(user_prompt, system_message, model_path)
56
+ print("\nGenerated response:")
57
+ print(response)
58
+
59
+ ```
60
+ </details>