Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,31 +4,31 @@ import json
|
|
4 |
import time
|
5 |
|
6 |
# Initialize the pipeline with the new model
|
7 |
-
pipe = pipeline("text-generation", model="Blexus/
|
8 |
|
9 |
-
def format_prompt(message,
|
10 |
-
prompt =
|
11 |
|
12 |
-
for entry in history:
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
|
17 |
-
prompt += f"
|
18 |
return prompt
|
19 |
|
20 |
-
def generate(prompt,
|
21 |
temperature = float(temperature)
|
22 |
if temperature < 1e-2:
|
23 |
temperature = 1e-2
|
24 |
top_p = float(top_p)
|
25 |
|
26 |
-
formatted_prompt = format_prompt(prompt,
|
27 |
-
response_text = "We are sorry but
|
28 |
# Generate the response without streaming
|
29 |
try:
|
30 |
response = pipe(formatted_prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty)[0]["generated_text"]
|
31 |
-
response_text = response.split("
|
32 |
|
33 |
# Simulate streaming by yielding parts of the response
|
34 |
accumulated_response = "" # To keep track of the full response
|
@@ -48,12 +48,6 @@ customCSS = """
|
|
48 |
"""
|
49 |
|
50 |
additional_inputs = [
|
51 |
-
gr.Textbox(
|
52 |
-
label="System prompt",
|
53 |
-
value="You are a helpful intelligent assistant. Your name is Quble.",
|
54 |
-
info="System prompt",
|
55 |
-
interactive=True,
|
56 |
-
),
|
57 |
gr.Slider(
|
58 |
label="Temperature",
|
59 |
value=0.9,
|
|
|
4 |
import time
|
5 |
|
6 |
# Initialize the pipeline with the new model
|
7 |
+
pipe = pipeline("text-generation", model="Blexus/originos-icn-savant-instruct")
|
8 |
|
9 |
+
def format_prompt(message, history):
|
10 |
+
prompt = ""
|
11 |
|
12 |
+
#for entry in history:
|
13 |
+
# if len(entry) == 2:
|
14 |
+
# user_prompt, bot_response = entry
|
15 |
+
# prompt += f"USER: {user_prompt} <|endofuser|>\nASSISTANT: {bot_response}<|endoftext|>\n"
|
16 |
|
17 |
+
prompt += f"<|in|> {message} <|out|>"
|
18 |
return prompt
|
19 |
|
20 |
+
def generate(prompt, history, temperature=0.9, max_new_tokens=4096, top_p=0.9, repetition_penalty=1.2):
|
21 |
temperature = float(temperature)
|
22 |
if temperature < 1e-2:
|
23 |
temperature = 1e-2
|
24 |
top_p = float(top_p)
|
25 |
|
26 |
+
formatted_prompt = format_prompt(prompt, history)
|
27 |
+
response_text = "We are sorry but ICN savant doesn't know how to answer."
|
28 |
# Generate the response without streaming
|
29 |
try:
|
30 |
response = pipe(formatted_prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty)[0]["generated_text"]
|
31 |
+
response_text = response.split("<|end|>")[-1].strip()
|
32 |
|
33 |
# Simulate streaming by yielding parts of the response
|
34 |
accumulated_response = "" # To keep track of the full response
|
|
|
48 |
"""
|
49 |
|
50 |
additional_inputs = [
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
gr.Slider(
|
52 |
label="Temperature",
|
53 |
value=0.9,
|