ctranslate2-4you commited on
Commit
229b98f
·
verified ·
1 Parent(s): 65b1a4e

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +87 -0
README.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: ctranslate2
3
+ license: mit
4
+ base_model:
5
+ - microsoft/phi-4
6
+ base_model_relation: quantized
7
+ tags:
8
+ - ctranslate2
9
+ - AWQ
10
+ - phi-4
11
+ - phi
12
+ - chat
13
+ ---
14
+
15
+ Ctranslate2-based version of Phi-4
16
+ 1) First converted to AWQ format using the [cosmopedia-100k dataset](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia-100k) for calibration.
17
+ 2) Converted to Ctranslate2-compatible format
18
+
19
+ ### For inference, the main difference is that you do not use the "compute_type" parameter.
20
+
21
+ # Example Usage
22
+
23
+ <details><summary>Non-Streaming Example:</summary>
24
+
25
+ ```python
26
+ import ctranslate2
27
+ from transformers import AutoTokenizer
28
+
29
+ def generate_response(prompt: str, system_message: str, model_path: str) -> str:
30
+ generator = ctranslate2.Generator(
31
+ model_path,
32
+ device="cuda",
33
+ )
34
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
35
+ formatted_prompt = f"""<|im_start|>system<|im_sep|>{system_message}<|im_end|>
36
+ <|im_start|>user<|im_sep|>{prompt}<|im_end|>
37
+ <|im_start|>assistant<|im_sep|>"""
38
+ tokens = tokenizer.tokenize(formatted_prompt)
39
+ results = generator.generate_batch(
40
+ [tokens],
41
+ max_length=1024,
42
+ sampling_temperature=0.7,
43
+ include_prompt_in_result=False
44
+ )
45
+ response = tokenizer.decode(results[0].sequences_ids[0], skip_special_tokens=True)
46
+ return response
47
+
48
+ if __name__ == "__main__":
49
+ model_path = "path/to/your/phi-4-ct2-model"
50
+ system_message = "You are a helpful AI assistant."
51
+ user_prompt = "Write a short poem about a cat."
52
+ response = generate_response(user_prompt, system_message, model_path)
53
+ print("\nGenerated response:")
54
+ print(response)
55
+
56
+ ```
57
+ </details>
58
+
59
+ <details><summary>Streaming Example:</summary>
60
+
61
+ ```python
62
+ import ctranslate2
63
+ from transformers import AutoTokenizer
64
+ import sys
65
+
66
+ def generate_response(prompt: str, system_message: str, model_path: str) -> None:
67
+ generator = ctranslate2.Generator(model_path, device="cuda")
68
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
69
+ formatted_prompt = f"""<|im_start|>system<|im_sep|>{system_message}<|im_end|>
70
+ <|im_start|>user<|im_sep|>{prompt}<|im_end|>
71
+ <|im_start|>assistant<|im_sep|>"""
72
+ tokens: List[str] = tokenizer.tokenize(formatted_prompt)
73
+ for step in generator.generate_tokens([tokens], max_length=1024, sampling_temperature=0.7):
74
+ token: str = step.token
75
+ if token in tokenizer.eos_token or token in tokenizer.all_special_tokens:
76
+ break
77
+ decoded_token: str = tokenizer.decode([step.token_id])
78
+ print(decoded_token, end="", flush=True)
79
+
80
+ if __name__ == "__main__":
81
+ model_path = "path/to/your/phi-4-ct2-model"
82
+ system_message = "You are a helpful AI assistant."
83
+ user_prompt = "Write a short poem about a cat."
84
+ print("\nGenerating response:")
85
+ generate_response(user_prompt, system_message, model_path)
86
+ ```
87
+ </details>