nchen909 commited on
Commit
7f07f26
·
verified ·
1 Parent(s): 76f0323

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +3 -9
  2. app_new.py +205 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Try
3
- emoji: 📈
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.21.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: try
3
+ app_file: app_new.py
 
 
4
  sdk: gradio
5
+ sdk_version: 5.6.0
 
 
6
  ---
 
 
app_new.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import os
4
+
5
+ from huggingface_hub.file_download import http_get
6
+ from llama_cpp import Llama
7
+
8
+
9
+ SYSTEM_PROMPT = "You are Apollo, a multilingual medical model. You communicate with people and assist them."
10
+
11
+
12
+ def get_message_tokens(model, role, content):
13
+ content = f"{role}\n{content}\n</s>"
14
+ content = content.encode("utf-8")
15
+ return model.tokenize(content, special=True)
16
+
17
+
18
+ def get_system_tokens(model):
19
+ system_message = {"role": "system", "content": SYSTEM_PROMPT}
20
+ return get_message_tokens(model, **system_message)
21
+
22
+
23
+ def load_model(
24
+ directory: str = "/Users/nchen/Code/Mistral-7B-Instruct-GGUF-Run-On-CPU-Basic",
25
+ model_name: str = "apollo2-7b-q4_k_m.gguf",
26
+ model_url: str = "https://huggingface.co/nchen909/Apollo2-7B-Q4_K_M-GGUF/blob/main/apollo2-7b-q4_k_m.gguf"
27
+ ):
28
+ final_model_path = os.path.join(directory, model_name)
29
+
30
+ print("Downloading all files...")
31
+ if not os.path.exists(final_model_path):
32
+ with open(final_model_path, "wb") as f:
33
+ http_get(model_url, f)
34
+ os.chmod(final_model_path, 0o777)
35
+ print("Files downloaded!")
36
+
37
+ model = Llama(
38
+ model_path=final_model_path,
39
+ n_ctx=1024
40
+ )
41
+
42
+ print("Model loaded!")
43
+ return model
44
+
45
+
46
+ MODEL = load_model()
47
+
48
+
49
+ def user(message, history):
50
+ new_history = history + [[message, None]]
51
+ return "", new_history
52
+
53
+
54
+ def bot(
55
+ history,
56
+ system_prompt,
57
+ top_p,
58
+ top_k,
59
+ temp
60
+ ):
61
+ model = MODEL
62
+ tokens = get_system_tokens(model)[:]
63
+
64
+ for user_message, bot_message in history[:-1]:
65
+ message_tokens = get_message_tokens(model=model, role="user", content=user_message)
66
+ tokens.extend(message_tokens)
67
+ if bot_message:
68
+ message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
69
+ tokens.extend(message_tokens)
70
+
71
+ last_user_message = history[-1][0]
72
+ message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
73
+ tokens.extend(message_tokens)
74
+
75
+ role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
76
+ tokens.extend(role_tokens)
77
+ generator = model.generate(
78
+ tokens,
79
+ top_k=top_k,
80
+ top_p=top_p,
81
+ temp=temp
82
+ )
83
+
84
+ partial_text = ""
85
+ for i, token in enumerate(generator):
86
+ if token == model.token_eos():
87
+ break
88
+ partial_text += model.detokenize([token]).decode("utf-8", "ignore")
89
+ history[-1][1] = partial_text
90
+ yield history
91
+
92
+
93
+ with gr.Blocks(
94
+ theme=gr.themes.Soft()
95
+ ) as demo:
96
+ favicon = '<img src="https://cdn.midjourney.com/b88e5beb-6324-4820-8504-a1a37a9ba36d/0_1.png" width="48px" style="display: inline">'
97
+ gr.Markdown(
98
+ f"""<h1><center>{favicon}Saiga2 13B GGUF Q4_K</center></h1>
99
+
100
+ This is a demo of a **Russian**-speaking LLaMA2-based model. If you are interested in other languages, please check other models, such as [MPT-7B-Chat](https://huggingface.co/spaces/mosaicml/mpt-7b-chat).
101
+
102
+ Это демонстрационная версия [квантованной Сайги-2 с 13 миллиардами параметров](https://huggingface.co/IlyaGusev/saiga2_13b_ggml), работающая на CPU.
103
+
104
+ Сайга-2 — это разговорная языковая модель, которая основана на [LLaMA-2](https://ai.meta.com/llama/) и дообучена на корпусах, сгенерированных ChatGPT, таких как [ru_turbo_alpaca](https://huggingface.co/datasets/IlyaGusev/ru_turbo_alpaca), [ru_turbo_saiga](https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga) и [gpt_roleplay_realm](https://huggingface.co/datasets/IlyaGusev/gpt_roleplay_realm).
105
+ """
106
+ )
107
+ with gr.Row():
108
+ with gr.Column(scale=5):
109
+ system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
110
+ chatbot = gr.Chatbot(label="Диалог")
111
+ with gr.Column(min_width=80, scale=1):
112
+ with gr.Tab(label="Параметры генерации"):
113
+ top_p = gr.Slider(
114
+ minimum=0.0,
115
+ maximum=1.0,
116
+ value=0.9,
117
+ step=0.05,
118
+ interactive=True,
119
+ label="Top-p",
120
+ )
121
+ top_k = gr.Slider(
122
+ minimum=10,
123
+ maximum=100,
124
+ value=30,
125
+ step=5,
126
+ interactive=True,
127
+ label="Top-k",
128
+ )
129
+ temp = gr.Slider(
130
+ minimum=0.0,
131
+ maximum=2.0,
132
+ value=0.01,
133
+ step=0.01,
134
+ interactive=True,
135
+ label="Температура"
136
+ )
137
+ with gr.Row():
138
+ with gr.Column():
139
+ msg = gr.Textbox(
140
+ label="Отправить сообщение",
141
+ placeholder="Отправить сообщение",
142
+ show_label=False,
143
+ )
144
+ with gr.Column():
145
+ with gr.Row():
146
+ submit = gr.Button("Отправить")
147
+ stop = gr.Button("Остановить")
148
+ clear = gr.Button("Очистить")
149
+ with gr.Row():
150
+ gr.Markdown(
151
+ """ПРЕДУПРЕЖДЕНИЕ: Модель может генерировать фактически или этически некорректные тексты. Мы не несём за это ответственность."""
152
+ )
153
+
154
+ # Pressing Enter
155
+ submit_event = msg.submit(
156
+ fn=user,
157
+ inputs=[msg, chatbot],
158
+ outputs=[msg, chatbot],
159
+ queue=False,
160
+ ).success(
161
+ fn=bot,
162
+ inputs=[
163
+ chatbot,
164
+ system_prompt,
165
+ top_p,
166
+ top_k,
167
+ temp
168
+ ],
169
+ outputs=chatbot,
170
+ queue=True,
171
+ )
172
+
173
+ # Pressing the button
174
+ submit_click_event = submit.click(
175
+ fn=user,
176
+ inputs=[msg, chatbot],
177
+ outputs=[msg, chatbot],
178
+ queue=False,
179
+ ).success(
180
+ fn=bot,
181
+ inputs=[
182
+ chatbot,
183
+ system_prompt,
184
+ top_p,
185
+ top_k,
186
+ temp
187
+ ],
188
+ outputs=chatbot,
189
+ queue=True,
190
+ )
191
+
192
+ # Stop generation
193
+ stop.click(
194
+ fn=None,
195
+ inputs=None,
196
+ outputs=None,
197
+ cancels=[submit_event, submit_click_event],
198
+ queue=False,
199
+ )
200
+
201
+ # Clear history
202
+ clear.click(lambda: None, None, chatbot, queue=False)
203
+
204
+ demo.queue(max_size=128)
205
+ demo.launch(show_error=True, share=True)