nxphi47 commited on
Commit
f38f5d9
·
1 Parent(s): 5919bed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -31
app.py CHANGED
@@ -178,6 +178,7 @@ Your response should adapt to the norms and customs of the respective language a
178
  # ============ CONSTANT ============
179
  # https://github.com/gradio-app/gradio/issues/884
180
  MODEL_NAME = "SeaLLM-13B"
 
181
 
182
  MODEL_TITLE = """
183
  <div class="container" style="
@@ -231,21 +232,24 @@ MODEL_TITLE = """
231
  # </span>
232
  # """.strip()
233
 
234
- MODEL_DESC = """
 
 
 
235
  <div style='display:flex; gap: 0.25rem; '>
236
  <a href='https://github.com/SeaLLMs/SeaLLMs'><img src='https://img.shields.io/badge/Github-Code-success'></a>
237
  <a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
238
  <a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
239
  </div>
240
  <span style="font-size: larger">
241
- This is <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">SeaLLM-13B-Chat</a> - a chatbot assistant optimized for Southeast Asian Languages. It produces helpful responses in English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩 and Thai 🇹🇭.
242
- Explore <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">our article</a> for more details.
243
  </span>
244
  <br>
245
  <span>
246
- <span style="color: red">NOTE:</span> The chatbot may produce inaccurate and harmful information.
247
- By using our service, you are required to <span style="color: red">agree to our <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b/blob/main/LICENSE" target="_blank" style="color: red">Terms Of Use</a>,</span> which includes
248
- not to use our service to generate any harmful, inappropriate or unethical or illegal content that violates locally applicable and international laws and regulations.
249
  The service collects user dialogue data for testing and performance improvement, and reserves the right to distribute it under
250
  <a href="https://creativecommons.org/licenses/by/4.0/">(CC-BY)</a> or similar license. So do not enter any personal information!
251
  </span>
@@ -731,17 +735,6 @@ def llama_chat_sys_input_seq_constructor(text, sys_prompt=SYSTEM_PROMPT_1, bos_t
731
  return f"{bos_token}{B_INST} {B_SYS} {sys_prompt} {E_SYS} {text} {E_INST}"
732
 
733
 
734
- def few_shot_prompt(
735
- message: str,
736
- history: List[Tuple[str, str]],
737
- sys_prompt=SYSTEM_PROMPT_1,
738
- bos_token=BOS_TOKEN,
739
- eos_token=EOS_TOKEN,
740
- include_end_instruct=True,
741
- ):
742
- return f"{bos_token} {message}"
743
-
744
-
745
  def llama_chat_multiturn_sys_input_seq_constructor(
746
  message: str,
747
  history: List[Tuple[str, str]],
@@ -1572,10 +1565,9 @@ def batch_inference(
1572
  prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
1573
  elif prompt_mode == 'few-shot':
1574
  from functools import partial
1575
- # prompt_format_fn = partial(
1576
- # llama_chat_multiturn_sys_input_seq_constructor, include_end_instruct=False
1577
- # )
1578
- prompt_format_fn = few_shot_prompt
1579
  else:
1580
  raise gr.Error(f'Wrong mode {prompt_mode}')
1581
 
@@ -1607,7 +1599,6 @@ def batch_inference(
1607
  for res, item in zip(responses, all_items):
1608
  item['response'] = res
1609
 
1610
- # save_path = "/mnt/workspace/workgroup/phi/test.json"
1611
  save_path = BATCH_INFER_SAVE_TMP_FILE
1612
  os.makedirs(os.path.dirname(save_path), exist_ok=True)
1613
  with open(save_path, 'w', encoding='utf-8') as f:
@@ -1629,6 +1620,15 @@ each item has `prompt` key. We put guardrails to enhance safety, so do not input
1629
  ```
1630
  """
1631
 
 
 
 
 
 
 
 
 
 
1632
 
1633
  def launch():
1634
  global demo, llm, DEBUG, LOG_FILE
@@ -1701,7 +1701,7 @@ def launch():
1701
 
1702
  if QUANTIZATION == 'awq':
1703
  print(F'Load model in int4 quantization')
1704
- llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization, quantization="awq")
1705
  else:
1706
  llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization)
1707
 
@@ -1751,7 +1751,7 @@ def launch():
1751
  ["upload_chat.json", "chat", 0.2, 1024, 0.5, 0, "[STOP],[END],<s>,</s>"],
1752
  ["upload_few_shot.json", "few-shot", 0.2, 128, 0.5, 0, "[STOP],[END],<s>,</s>,\\n"]
1753
  ],
1754
- cache_examples=False,
1755
  )
1756
 
1757
  demo_chat = gr.ChatInterface(
@@ -1765,7 +1765,7 @@ def launch():
1765
  ],
1766
  show_copy_button=True,
1767
  ),
1768
- textbox=gr.Textbox(placeholder='Type message', lines=8, max_lines=128, min_width=200),
1769
  submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
1770
  # ! consider preventing the stop button
1771
  # stop_btn=None,
@@ -1780,26 +1780,42 @@ def launch():
1780
  # ! Remove the system prompt textbox to avoid jailbreaking
1781
  # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
1782
  ],
 
 
1783
  )
 
 
 
 
1784
  demo = CustomTabbedInterface(
1785
  interface_list=[demo_chat, demo_file_upload],
1786
  tab_names=["Chat Interface", "Batch Inference"],
1787
  title=f"{model_title}",
1788
- description=f"{model_desc}",
1789
  )
1790
  demo.title = MODEL_NAME
 
1791
  with demo:
 
 
 
 
 
 
 
1792
  gr.Markdown(cite_markdown)
1793
- if DISPLAY_MODEL_PATH:
1794
- gr.Markdown(path_markdown.format(model_path=model_path))
1795
 
1796
  if ENABLE_AGREE_POPUP:
1797
  demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
1798
 
1799
-
1800
  demo.queue()
1801
  demo.launch(server_port=PORT)
1802
  else:
 
 
 
1803
  demo = gr.ChatInterface(
1804
  response_fn,
1805
  chatbot=ChatBot(
@@ -1811,12 +1827,12 @@ def launch():
1811
  ],
1812
  show_copy_button=True,
1813
  ),
1814
- textbox=gr.Textbox(placeholder='Type message', lines=8, max_lines=128, min_width=200),
1815
  submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
1816
  # ! consider preventing the stop button
1817
  # stop_btn=None,
1818
  title=f"{model_title}",
1819
- description=f"{model_desc}",
1820
  additional_inputs=[
1821
  gr.Number(value=temperature, label='Temperature (higher -> more random)'),
1822
  gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
@@ -1826,6 +1842,8 @@ def launch():
1826
  # ! Remove the system prompt textbox to avoid jailbreaking
1827
  # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
1828
  ],
 
 
1829
  )
1830
  demo.title = MODEL_NAME
1831
  with demo:
 
178
  # ============ CONSTANT ============
179
  # https://github.com/gradio-app/gradio/issues/884
180
  MODEL_NAME = "SeaLLM-13B"
181
+ MODEL_NAME = str(os.environ.get("MODEL_NAME", "SeaLLM-13B"))
182
 
183
  MODEL_TITLE = """
184
  <div class="container" style="
 
232
  # </span>
233
  # """.strip()
234
 
235
+ # <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">SeaLLM-13B-Chat</a> - a helpful chatbot assistant for Southeast Asian Languages. It supports English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩, Thai 🇹🇭, Malay 🇲🇾, Khmer🇰🇭, Lao🇱🇦, Tagalog🇵🇭 and Burmese🇲🇲.
236
+
237
+
238
+ MODEL_DESC = f"""
239
  <div style='display:flex; gap: 0.25rem; '>
240
  <a href='https://github.com/SeaLLMs/SeaLLMs'><img src='https://img.shields.io/badge/Github-Code-success'></a>
241
  <a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
242
  <a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
243
  </div>
244
  <span style="font-size: larger">
245
+ <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">{MODEL_NAME}</a> - a helpful assistant for Southeast Asian Languages. It supports English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩, Thai 🇹🇭, Malay 🇲🇾, Khmer🇰🇭, Lao🇱🇦, Tagalog🇵🇭 and Burmese🇲🇲.
246
+ Explore <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">our article</a> for more.
247
  </span>
248
  <br>
249
  <span>
250
+ <span style="color: red">NOTE: The chatbot may produce false and harmful content and does not have up-to-date knowledge.</span>
251
+ By using our service, you are required to agree to our <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b/blob/main/LICENSE" target="_blank" style="color: red">Terms Of Use</a>, which includes
252
+ not to use our service to generate any harmful, inappropriate or illegal content that violates local and international laws.
253
  The service collects user dialogue data for testing and performance improvement, and reserves the right to distribute it under
254
  <a href="https://creativecommons.org/licenses/by/4.0/">(CC-BY)</a> or similar license. So do not enter any personal information!
255
  </span>
 
735
  return f"{bos_token}{B_INST} {B_SYS} {sys_prompt} {E_SYS} {text} {E_INST}"
736
 
737
 
 
 
 
 
 
 
 
 
 
 
 
738
  def llama_chat_multiturn_sys_input_seq_constructor(
739
  message: str,
740
  history: List[Tuple[str, str]],
 
1565
  prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
1566
  elif prompt_mode == 'few-shot':
1567
  from functools import partial
1568
+ prompt_format_fn = partial(
1569
+ llama_chat_multiturn_sys_input_seq_constructor, include_end_instruct=False
1570
+ )
 
1571
  else:
1572
  raise gr.Error(f'Wrong mode {prompt_mode}')
1573
 
 
1599
  for res, item in zip(responses, all_items):
1600
  item['response'] = res
1601
 
 
1602
  save_path = BATCH_INFER_SAVE_TMP_FILE
1603
  os.makedirs(os.path.dirname(save_path), exist_ok=True)
1604
  with open(save_path, 'w', encoding='utf-8') as f:
 
1620
  ```
1621
  """
1622
 
1623
+ CHAT_EXAMPLES = [
1624
+ ["Hãy giải thích thuyết tương đối rộng."],
1625
+ ["Tolong bantu saya menulis email ke lembaga pemerintah untuk mencari dukungan finansial untuk penelitian AI."],
1626
+ ["ຂໍແຈ້ງ 5 ສະຖານທີ່ທ່ອງທ່ຽວໃນນະຄອນຫຼວງວຽງຈັນ"],
1627
+ ]
1628
+
1629
+
1630
+ # performance items
1631
+
1632
 
1633
  def launch():
1634
  global demo, llm, DEBUG, LOG_FILE
 
1701
 
1702
  if QUANTIZATION == 'awq':
1703
  print(F'Load model in int4 quantization')
1704
+ llm = LLM(model=model_path, dtype="float16", tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization, quantization="awq")
1705
  else:
1706
  llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization)
1707
 
 
1751
  ["upload_chat.json", "chat", 0.2, 1024, 0.5, 0, "[STOP],[END],<s>,</s>"],
1752
  ["upload_few_shot.json", "few-shot", 0.2, 128, 0.5, 0, "[STOP],[END],<s>,</s>,\\n"]
1753
  ],
1754
+ # cache_examples=True,
1755
  )
1756
 
1757
  demo_chat = gr.ChatInterface(
 
1765
  ],
1766
  show_copy_button=True,
1767
  ),
1768
+ textbox=gr.Textbox(placeholder='Type message', lines=4, max_lines=128, min_width=200),
1769
  submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
1770
  # ! consider preventing the stop button
1771
  # stop_btn=None,
 
1780
  # ! Remove the system prompt textbox to avoid jailbreaking
1781
  # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
1782
  ],
1783
+ examples=CHAT_EXAMPLES,
1784
+ cache_examples=False
1785
  )
1786
+ descriptions = model_desc
1787
+ if DISPLAY_MODEL_PATH:
1788
+ descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
1789
+
1790
  demo = CustomTabbedInterface(
1791
  interface_list=[demo_chat, demo_file_upload],
1792
  tab_names=["Chat Interface", "Batch Inference"],
1793
  title=f"{model_title}",
1794
+ description=descriptions,
1795
  )
1796
  demo.title = MODEL_NAME
1797
+ callback = None
1798
  with demo:
1799
+ if DATA_SET_REPO_PATH != "":
1800
+ try:
1801
+ from performance_plot import attach_plot_to_demo
1802
+ attach_plot_to_demo(demo)
1803
+ except Exception as e:
1804
+ print(f'Fail to load DEMO plot: {str(e)}')
1805
+
1806
  gr.Markdown(cite_markdown)
1807
+ # if DISPLAY_MODEL_PATH:
1808
+ # gr.Markdown(path_markdown.format(model_path=model_path))
1809
 
1810
  if ENABLE_AGREE_POPUP:
1811
  demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
1812
 
 
1813
  demo.queue()
1814
  demo.launch(server_port=PORT)
1815
  else:
1816
+ descriptions = model_desc
1817
+ if DISPLAY_MODEL_PATH:
1818
+ descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
1819
  demo = gr.ChatInterface(
1820
  response_fn,
1821
  chatbot=ChatBot(
 
1827
  ],
1828
  show_copy_button=True,
1829
  ),
1830
+ textbox=gr.Textbox(placeholder='Type message', lines=4, max_lines=128, min_width=200),
1831
  submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
1832
  # ! consider preventing the stop button
1833
  # stop_btn=None,
1834
  title=f"{model_title}",
1835
+ description=descriptions,
1836
  additional_inputs=[
1837
  gr.Number(value=temperature, label='Temperature (higher -> more random)'),
1838
  gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
 
1842
  # ! Remove the system prompt textbox to avoid jailbreaking
1843
  # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
1844
  ],
1845
+ examples=CHAT_EXAMPLES,
1846
+ cache_examples=False
1847
  )
1848
  demo.title = MODEL_NAME
1849
  with demo: