Spaces:

KumaTea
/

KumaGLM-Lite

Runtime error

App Files Files Community

KumaTea commited on Apr 18, 2023

Commit

2a4f313

1 Parent(s): 221f925

sync with main version

Browse files

Files changed (2) hide show

app.py +55 -35
requirements.txt +3 -3

app.py CHANGED Viewed

@@ -12,13 +12,38 @@ fix_pytorch_int8()
 import torch
 import gradio as gr
 from transformers import AutoTokenizer, GenerationConfig, AutoModel
 # device = torch.device('cpu')
 # torch.cuda.current_device = lambda : device
 model = AutoModel.from_pretrained(
     "KumaTea/twitter-int4",
     trust_remote_code=True,
@@ -35,25 +60,32 @@ model.eval()
 torch.set_default_tensor_type(torch.FloatTensor)
-def evaluate(context, temperature, top_p, top_k):
     generation_config = GenerationConfig(
         temperature=temperature,
         top_p=top_p,
-        top_k=top_k,
         #repetition_penalty=1.1,
         num_beams=1,
         do_sample=True,
     )
     with torch.no_grad():
-        input_text = f"Context: {context}Answer: "
-        ids = tokenizer.encode(input_text)
-        input_ids = torch.LongTensor([ids]).to('cpu')
         out = model.generate(
-            input_ids=input_ids,
-            max_length=160,
             generation_config=generation_config
         )
-        out_text = tokenizer.decode(out[0]).split("Answer: ")[1]
         return out_text
@@ -65,10 +97,12 @@ def evaluate_stream(msg, history, temperature, top_p):
         num_beams=1,
         do_sample=True,
     )
-    history.append([msg, None])
-    context = ""
     if len(history) > 4:
         history.pop(0)
@@ -79,7 +113,7 @@ def evaluate_stream(msg, history, temperature, top_p):
     for h in history[:-1]:
         context += h[0] + "||" + h[1] + "||"
-    context += history[-1][0]
     context = context.replace(r'<br>', '')
     # TODO: Avoid the tokens are too long.
@@ -89,37 +123,20 @@ def evaluate_stream(msg, history, temperature, top_p):
         context = context[15:]
     h = []
-    print("History:", history)
-    print("Context:", context)
     for response, h in model.stream_chat(tokenizer, context, h, max_length=CUTOFF, top_p=top_p, temperature=temperature):
         history[-1][1] = response
         yield history, ""
-    #return response
-title = """<h1 align="center">KumaGLM</h1>
-<h3 align='center'>这是一个 AI Kuma，你可以与他聊天，或者直接在文本框按下Enter</h3>
-<p align='center'>采用 INT4 量化，速度很慢，仅作备用</p>"""
-footer =  """<p align='center'>
-本项目基于
-<a href='https://github.com/ljsabc/Fujisaki' target='_blank'>ljsabc/Fujisaki</a>
-，模型采用
-<a href='https://huggingface.co/THUDM/chatglm-6b' target='_blank'>THUDM/chatglm-6b</a>
-。
-</p>
-<p align='center'>
-<em>每天起床第一句！</em>
-</p>"""
 with gr.Blocks() as demo:
-    gr.HTML(title)
-    state = gr.State()
     with gr.Row():
         with gr.Column(scale=2):
-            temp = gr.components.Slider(minimum=0, maximum=1.1, value=0.8, label="Temperature",
                 info="温度参数，越高的温度生成的内容越丰富，但是有可能出现语法问题。小的温度也能帮助生成更相关的回答。")
-            top_p = gr.components.Slider(minimum=0.5, maximum=1.0, value=0.975, label="Top-p",
                 info="top-p参数，只输出前p>top-p的文字，越大生成的内容越丰富，但也可能出现语法问题。数字越小似乎上下文的衔接性越好。")
             #code = gr.Textbox(label="temp_output", info="解码器输出")
             #top_k = gr.components.Slider(minimum=1, maximum=200, step=1, value=25, label="Top k",
@@ -128,12 +145,15 @@ with gr.Blocks() as demo:
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(label="聊天框", info="")
             msg = gr.Textbox(label="输入框", placeholder="最近过得怎么样？",
-                info="输入你的内容，按[Enter]发送。也可以什么都不填写生成随机数据。对话一般不能太长，否则就复读机了，建议清除数据。")
             clear = gr.Button("清除聊天")
     msg.submit(evaluate_stream, [msg, chatbot, temp, top_p], [chatbot, msg])
     clear.click(lambda: None, None, chatbot, queue=False)
-    gr.HTML(footer)
 demo.queue()
 demo.launch(debug=False)

 import torch
+import logging
 import gradio as gr
 from transformers import AutoTokenizer, GenerationConfig, AutoModel
+gr_title = """<h1 align="center">KumaGLM Lite</h1>
+<h3 align='center'>这是<a href="https://huggingface.co/spaces/KumaTea/KumaGLM" target="_blank">另一个</a> AI Kuma，你可以与他聊天，或者直接在文本框按下Enter</h3>
+<p align='center'>采用 INT4 量化，速度很慢，仅作备用</p>
+<p align='center'>GitHub Repo: <a class="github-button" href="https://github.com/KumaTea/ChatGLM" aria-label="Star KumaTea/ChatGLM on GitHub">KumaTea/ChatGLM</a></p>
+<script async defer src="https://buttons.github.io/buttons.js"></script>
+"""
+gr_footer =  """<p align='center'>
+本项目基于
+<a href='https://github.com/ljsabc/Fujisaki' target='_blank'>ljsabc/Fujisaki</a>
+，模型采用
+<a href='https://huggingface.co/THUDM/chatglm-6b' target='_blank'>THUDM/chatglm-6b</a>
+。
+</p>
+<p align='center'>
+<em>每天起床第一句！</em>
+</p>"""
+default_start = ["你是谁？", "我是 kuma"]
 # device = torch.device('cpu')
 # torch.cuda.current_device = lambda : device
+logging.basicConfig(
+    format='%(asctime)s %(levelname)-8s %(message)s',
+    level=logging.INFO,
+    datefmt='%m/%d %H:%M:%S')
 model = AutoModel.from_pretrained(
     "KumaTea/twitter-int4",
     trust_remote_code=True,
 torch.set_default_tensor_type(torch.FloatTensor)
+def evaluate(context, temperature, top_p, top_k=None):
     generation_config = GenerationConfig(
         temperature=temperature,
         top_p=top_p,
+        # top_k=top_k,
         #repetition_penalty=1.1,
         num_beams=1,
         do_sample=True,
     )
     with torch.no_grad():
+        # input_text = f"Context: {context}Answer: "
+        input_text = '||'.join(default_start) + '||'
+        input_text += context + '||'
+        logging.info('[API] Incoming request: ' + input_text)
+        ids = tokenizer([input_text], return_tensors="pt")
+        inputs = ids.to("cpu")
         out = model.generate(
+            **inputs,
+            max_length=224,
             generation_config=generation_config
         )
+        out = out.tolist()[0]
+        decoder_output = tokenizer.decode(out)
+        # out_text = decoder_output.split("Answer: ")[1]
+        out_text = decoder_output
+        logging.info('[API] Result: ' + out_text)
         return out_text
         num_beams=1,
         do_sample=True,
     )
+    if not msg:
+        msg = '……'
+    history.append([msg, ""])
+    context = '||'.join(default_start) + '||'
     if len(history) > 4:
         history.pop(0)
     for h in history[:-1]:
         context += h[0] + "||" + h[1] + "||"
+    context += history[-1][0] + "||"
     context = context.replace(r'<br>', '')
     # TODO: Avoid the tokens are too long.
         context = context[15:]
     h = []
+    logging.info('[UI] Incoming request: ' + context)
     for response, h in model.stream_chat(tokenizer, context, h, max_length=CUTOFF, top_p=top_p, temperature=temperature):
         history[-1][1] = response
         yield history, ""
 with gr.Blocks() as demo:
+    gr.HTML(gr_title)
+    # state = gr.State()
     with gr.Row():
         with gr.Column(scale=2):
+            temp = gr.components.Slider(minimum=0, maximum=1.1, value=0.5, label="Temperature",
                 info="温度参数，越高的温度生成的内容越丰富，但是有可能出现语法问题。小的温度也能帮助生成更相关的回答。")
+            top_p = gr.components.Slider(minimum=0.5, maximum=1.0, value=0.8, label="Top-p",
                 info="top-p参数，只输出前p>top-p的文字，越大生成的内容越丰富，但也可能出现语法问题。数字越小似乎上下文的衔接性越好。")
             #code = gr.Textbox(label="temp_output", info="解码器输出")
             #top_k = gr.components.Slider(minimum=1, maximum=200, step=1, value=25, label="Top k",
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(label="聊天框", info="")
             msg = gr.Textbox(label="输入框", placeholder="最近过得怎么样？",
+                info="输入你的内容，按 [Enter] 发送。什么都不填经常会出错。")
             clear = gr.Button("清除聊天")
+            api_handler = gr.Button("API", visible=False)
+            textbox_for_api = gr.Textbox(visible=False)
     msg.submit(evaluate_stream, [msg, chatbot, temp, top_p], [chatbot, msg])
     clear.click(lambda: None, None, chatbot, queue=False)
+    api_handler.click(evaluate, [textbox_for_api, temp, top_p], [textbox_for_api], api_name='chat')
+    gr.HTML(gr_footer)
 demo.queue()
 demo.launch(debug=False)

requirements.txt CHANGED Viewed

@@ -15,6 +15,6 @@ datasets>=2.10.1
 git+https://github.com/huggingface/peft.git  # 最新版本 >=0.3.0.dev0
 --extra-index-url https://download.pytorch.org/whl/cpu
-torch==2.0.0+cpu
-torchvision==0.15.1+cpu
-torchaudio==2.0.1+cpu

 git+https://github.com/huggingface/peft.git  # 最新版本 >=0.3.0.dev0
 --extra-index-url https://download.pytorch.org/whl/cpu
+torch>=2.0.0+cpu
+torchvision>=0.15.1+cpu
+torchaudio>=2.0.1+cpu