alonsosilva commited on
Commit
88d564e
·
1 Parent(s): 080d490
Files changed (4) hide show
  1. Dockerfile +28 -0
  2. LICENSE +21 -0
  3. app.py +195 -0
  4. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ # Set up a new user named "user" with user ID 1000
4
+ RUN useradd -m -u 1000 user
5
+
6
+ # Switch to the "user" user
7
+ USER user
8
+
9
+ # Set home to the user's home directory
10
+ ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH
12
+
13
+ # Set the working directory to the user's home directory
14
+ WORKDIR $HOME/app
15
+
16
+ # Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
17
+ RUN pip install --no-cache-dir --upgrade pip
18
+
19
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
20
+ COPY --chown=user . $HOME/app
21
+
22
+ COPY --chown=user requirements.txt .
23
+
24
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
25
+
26
+ COPY --chown=user app.py app.py
27
+
28
+ ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Alonso Silva Allende
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
app.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+
3
+ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
4
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
5
+
6
+ class BaseStreamer:
7
+ """
8
+ Base class from which `.generate()` streamers should inherit.
9
+ """
10
+
11
+ def put(self, value):
12
+ """Function that is called by `.generate()` to push new tokens"""
13
+ raise NotImplementedError()
14
+
15
+ def end(self):
16
+ """Function that is called by `.generate()` to signal the end of generation"""
17
+ raise NotImplementedError()
18
+
19
+ class TextStreamer(BaseStreamer):
20
+ """
21
+ Simple text streamer that prints the token(s) to stdout as soon as entire words are formed.
22
+
23
+ <Tip warning={true}>
24
+
25
+ The API for the streamer classes is still under development and may change in the future.
26
+
27
+ </Tip>
28
+
29
+ Parameters:
30
+ tokenizer (`AutoTokenizer`):
31
+ The tokenized used to decode the tokens.
32
+ skip_prompt (`bool`, *optional*, defaults to `False`):
33
+ Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
34
+ decode_kwargs (`dict`, *optional*):
35
+ Additional keyword arguments to pass to the tokenizer's `decode` method.
36
+
37
+ Examples:
38
+
39
+ ```python
40
+ >>> from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
41
+
42
+ >>> tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
43
+ >>> model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
44
+ >>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
45
+ >>> streamer = TextStreamer(tok)
46
+
47
+ >>> # Despite returning the usual output, the streamer will also print the generated text to stdout.
48
+ >>> _ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
49
+ An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
50
+ ```
51
+ """
52
+
53
+ def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, **decode_kwargs):
54
+ self.tokenizer = tokenizer
55
+ self.skip_prompt = skip_prompt
56
+ self.decode_kwargs = decode_kwargs
57
+
58
+ # variables used in the streaming process
59
+ self.token_cache = []
60
+ self.print_len = 0
61
+ self.next_tokens_are_prompt = True
62
+
63
+ def put(self, value):
64
+ """
65
+ Receives tokens, decodes them, and prints them to stdout as soon as they form entire words.
66
+ """
67
+ if len(value.shape) > 1 and value.shape[0] > 1:
68
+ raise ValueError("TextStreamer only supports batch size 1")
69
+ elif len(value.shape) > 1:
70
+ value = value[0]
71
+
72
+ if self.skip_prompt and self.next_tokens_are_prompt:
73
+ self.next_tokens_are_prompt = False
74
+ return
75
+
76
+ # Add the new token to the cache and decodes the entire thing.
77
+ self.token_cache.extend(value.tolist())
78
+ text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
79
+
80
+ # After the symbol for a new line, we flush the cache.
81
+ if text.endswith("\n"):
82
+ printable_text = text[self.print_len :]
83
+ self.token_cache = []
84
+ self.print_len = 0
85
+ # If the last token is a CJK character, we print the characters.
86
+ elif len(text) > 0 and self._is_chinese_char(ord(text[-1])):
87
+ printable_text = text[self.print_len :]
88
+ self.print_len += len(printable_text)
89
+ # Otherwise, prints until the last space char (simple heuristic to avoid printing incomplete words,
90
+ # which may change with the subsequent token -- there are probably smarter ways to do this!)
91
+ else:
92
+ printable_text = text[self.print_len : text.rfind(" ") + 1]
93
+ self.print_len += len(printable_text)
94
+
95
+ self.on_finalized_text(printable_text)
96
+
97
+ def end(self):
98
+ """Flushes any remaining cache and prints a newline to stdout."""
99
+ # Flush the cache, if it exists
100
+ if len(self.token_cache) > 0:
101
+ text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
102
+ printable_text = text[self.print_len :]
103
+ self.token_cache = []
104
+ self.print_len = 0
105
+ else:
106
+ printable_text = ""
107
+
108
+ self.next_tokens_are_prompt = True
109
+ self.on_finalized_text(printable_text, stream_end=True)
110
+
111
+ def on_finalized_text(self, text: str, stream_end: bool = False):
112
+ """Prints the new text to stdout. If the stream is ending, also prints a newline."""
113
+ # print(text, flush=True, end="" if not stream_end else None)
114
+ messages.value = [
115
+ *messages.value[:-1],
116
+ {
117
+ "role": "assistant",
118
+ "content": messages.value[-1]["content"] + text,
119
+ },
120
+ ]
121
+
122
+ def _is_chinese_char(self, cp):
123
+ """Checks whether CP is the codepoint of a CJK character."""
124
+ # This defines a "chinese character" as anything in the CJK Unicode block:
125
+ # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
126
+ #
127
+ # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
128
+ # despite its name. The modern Korean Hangul alphabet is a different block,
129
+ # as is Japanese Hiragana and Katakana. Those alphabets are used to write
130
+ # space-separated words, so they are not treated specially and handled
131
+ # like the all of the other languages.
132
+ if (
133
+ (cp >= 0x4E00 and cp <= 0x9FFF)
134
+ or (cp >= 0x3400 and cp <= 0x4DBF) #
135
+ or (cp >= 0x20000 and cp <= 0x2A6DF) #
136
+ or (cp >= 0x2A700 and cp <= 0x2B73F) #
137
+ or (cp >= 0x2B740 and cp <= 0x2B81F) #
138
+ or (cp >= 0x2B820 and cp <= 0x2CEAF) #
139
+ or (cp >= 0xF900 and cp <= 0xFAFF)
140
+ or (cp >= 0x2F800 and cp <= 0x2FA1F) #
141
+ ): #
142
+ return True
143
+
144
+ return False
145
+
146
+ streamer = TextStreamer(tokenizer, skip_prompt=True)
147
+
148
+ import re
149
+ import solara
150
+ from typing import List
151
+ from typing_extensions import TypedDict
152
+
153
+ class MessageDict(TypedDict):
154
+ role: str
155
+ content: str
156
+
157
+ messages: solara.Reactive[List[MessageDict]] = solara.reactive([])
158
+ @solara.component
159
+ def Page():
160
+ solara.lab.theme.themes.light.primary = "#0000ff"
161
+ solara.lab.theme.themes.light.secondary = "#0000ff"
162
+ solara.lab.theme.themes.dark.primary = "#0000ff"
163
+ solara.lab.theme.themes.dark.secondary = "#0000ff"
164
+ title = "Qwen2-1.5B-Instruct"
165
+ with solara.Head():
166
+ solara.Title(f"{title}")
167
+ with solara.Column(align="center"):
168
+ user_message_count = len([m for m in messages.value if m["role"] == "user"])
169
+ def send(message):
170
+ messages.value = [*messages.value, {"role": "user", "content": message}]
171
+ def response(message):
172
+ messages.value = [*messages.value, {"role": "assistant", "content": ""}]
173
+ text = tokenizer.apply_chat_template(
174
+ [{"role": "user", "content": message}],
175
+ tokenize=False,
176
+ add_generation_prompt=True
177
+ )
178
+ inputs = tokenizer(text, return_tensors="pt")
179
+ _ = model.generate(**inputs, streamer=streamer, max_new_tokens=512)
180
+ def result():
181
+ if messages.value != []:
182
+ response(messages.value[-1]["content"])
183
+ result = solara.lab.use_task(result, dependencies=[user_message_count])
184
+ with solara.lab.ChatBox(style={"position": "fixed", "overflow-y": "scroll","scrollbar-width": "none", "-ms-overflow-style": "none", "top": "0", "bottom": "10rem", "width": "70%"}):
185
+ for item in messages.value:
186
+ with solara.lab.ChatMessage(
187
+ user=item["role"] == "user",
188
+ name="User" if item["role"] == "user" else "Qwen2-0.5B-Instruct",
189
+ avatar_background_color="#33cccc" if item["role"] == "assistant" else "#ff991f",
190
+ border_radius="20px",
191
+ style="background-color:darkgrey!important;" if solara.lab.theme.dark_effective else "background-color:lightgrey!important;"
192
+ ):
193
+ item["content"] = re.sub('<\|im_end\|>', '', item["content"])
194
+ solara.Markdown(item["content"])
195
+ solara.lab.ChatInput(send_callback=send, style={"position": "fixed", "bottom": "3rem", "width": "70%"})
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch==2.3.1
2
+ transformers==4.41.2
3
+ solara==1.33.0