Spaces:
Running
on
Zero
Running
on
Zero
NGUYEN, Xuan Phi
commited on
Commit
·
5622434
1
Parent(s):
c14f353
update
Browse files
app.py
CHANGED
@@ -470,7 +470,7 @@ def chat_response(message, history, temperature: float, max_tokens: int, system_
|
|
470 |
return f'{out}'
|
471 |
|
472 |
|
473 |
-
def vllm_abort(self:
|
474 |
scheduler = self.llm_engine.scheduler
|
475 |
for state_queue in [scheduler.waiting, scheduler.running, scheduler.swapped]:
|
476 |
for seq_group in state_queue:
|
@@ -482,7 +482,8 @@ def vllm_abort(self: LLM):
|
|
482 |
continue
|
483 |
scheduler.free_seq(seq, SequenceStatus.FINISHED_ABORTED)
|
484 |
|
485 |
-
def _vllm_run_engine(self: LLM, use_tqdm: bool = False) -> Dict[str, RequestOutput]:
|
|
|
486 |
# Initialize tqdm.
|
487 |
if use_tqdm:
|
488 |
num_requests = self.llm_engine.get_num_unfinished_requests()
|
@@ -512,10 +513,10 @@ def _vllm_run_engine(self: LLM, use_tqdm: bool = False) -> Dict[str, RequestOutp
|
|
512 |
def vllm_generate_stream(
|
513 |
self: LLM,
|
514 |
prompts: Optional[Union[str, List[str]]] = None,
|
515 |
-
sampling_params: Optional[
|
516 |
prompt_token_ids: Optional[List[List[int]]] = None,
|
517 |
use_tqdm: bool = False,
|
518 |
-
) -> Dict[str,
|
519 |
"""Generates the completions for the input prompts.
|
520 |
|
521 |
NOTE: This class automatically batches the given prompts, considering
|
@@ -661,7 +662,7 @@ def debug_chat_response_echo(
|
|
661 |
frequency_penalty: float = 0.4,
|
662 |
system_prompt: str = SYSTEM_PROMPT_1,
|
663 |
) -> str:
|
664 |
-
yield message
|
665 |
|
666 |
|
667 |
# ============ CONSTANT ============
|
|
|
470 |
return f'{out}'
|
471 |
|
472 |
|
473 |
+
def vllm_abort(self: Any):
|
474 |
scheduler = self.llm_engine.scheduler
|
475 |
for state_queue in [scheduler.waiting, scheduler.running, scheduler.swapped]:
|
476 |
for seq_group in state_queue:
|
|
|
482 |
continue
|
483 |
scheduler.free_seq(seq, SequenceStatus.FINISHED_ABORTED)
|
484 |
|
485 |
+
# def _vllm_run_engine(self: LLM, use_tqdm: bool = False) -> Dict[str, RequestOutput]:
|
486 |
+
def _vllm_run_engine(self: Any, use_tqdm: bool = False) -> Dict[str, Any]:
|
487 |
# Initialize tqdm.
|
488 |
if use_tqdm:
|
489 |
num_requests = self.llm_engine.get_num_unfinished_requests()
|
|
|
513 |
def vllm_generate_stream(
|
514 |
self: LLM,
|
515 |
prompts: Optional[Union[str, List[str]]] = None,
|
516 |
+
sampling_params: Optional[Any] = None,
|
517 |
prompt_token_ids: Optional[List[List[int]]] = None,
|
518 |
use_tqdm: bool = False,
|
519 |
+
) -> Dict[str, Any]:
|
520 |
"""Generates the completions for the input prompts.
|
521 |
|
522 |
NOTE: This class automatically batches the given prompts, considering
|
|
|
662 |
frequency_penalty: float = 0.4,
|
663 |
system_prompt: str = SYSTEM_PROMPT_1,
|
664 |
) -> str:
|
665 |
+
yield f"repeat: {message}"
|
666 |
|
667 |
|
668 |
# ============ CONSTANT ============
|