YanshekWoo commited on
Commit
02f0e9a
·
1 Parent(s): 5a9366e

ADD history processor

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -4,10 +4,10 @@ from transformers import BertTokenizer, BartForConditionalGeneration
4
 
5
  title = "HIT-TMG/dialogue-bart-large-chinese"
6
  description = """
7
- This is a seq2seq model fine-tuned on several Chinese dialogue datasets, from bart-large-chinese.
8
- See some details of model card at https://huggingface.co/HIT-TMG/dialogue-bart-large-chinese .
9
- Besides starting the conversation from scratch, you can also input the whole dialogue history utterance by utterance seperated by '[SEP]' (e.g. "可以认识一下吗[SEP]当然可以啦,你好。[SEP]嘿嘿你好,请问你最近在忙什么呢?[SEP]我最近养了一只狗狗,我在训练它呢。").
10
- Please be careful that the history utterance turn should be odd, since this demo begins from user instead of the chatbot.
11
  """
12
 
13
 
@@ -31,12 +31,17 @@ def chat_func(input_utterance: str, history: Optional[List[str]] = None):
31
  truncation=True,
32
  max_length=max_length).input_ids
33
 
34
- output_ids = model.generate(input_ids)[0]
 
35
  response = tokenizer.decode(output_ids, skip_special_tokens=True)
36
 
37
  history.append(response)
38
 
39
- display_utterances = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)]
 
 
 
 
40
 
41
  return display_utterances, history
42
 
 
4
 
5
  title = "HIT-TMG/dialogue-bart-large-chinese"
6
  description = """
7
+ This is a seq2seq model fine-tuned on several Chinese dialogue datasets, from bart-large-chinese. \n
8
+ See some details of model card at https://huggingface.co/HIT-TMG/dialogue-bart-large-chinese . \n\n
9
+ Besides starting the conversation from scratch, you can also input the whole dialogue history utterance by utterance seperated by '[SEP]'. \n
10
+ (e.g. "可以认识一下吗[SEP]当然可以啦,你好。[SEP]嘿嘿你好,请问你最近在忙什么呢?[SEP]我最近养了一只狗狗,我在训练它呢。") \n
11
  """
12
 
13
 
 
31
  truncation=True,
32
  max_length=max_length).input_ids
33
 
34
+ output_ids = model.generate(input_ids,
35
+ max_new_tokens=30)[0]
36
  response = tokenizer.decode(output_ids, skip_special_tokens=True)
37
 
38
  history.append(response)
39
 
40
+
41
+ if len(history) % 2 == 0:
42
+ display_utterances = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)]
43
+ else:
44
+ display_utterances = [("", history[0])] + [(history[i], history[i + 1]) for i in range(1, len(history) - 1, 2)]
45
 
46
  return display_utterances, history
47