hysts HF staff commited on
Commit
5af899b
·
1 Parent(s): 143e2b3
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -9,7 +9,7 @@ import spaces
9
  import torch
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
11
 
12
- DESCRIPTION = "# Mistral-7B"
13
 
14
  if not torch.cuda.is_available():
15
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
@@ -19,7 +19,7 @@ DEFAULT_MAX_NEW_TOKENS = 1024
19
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
20
 
21
  if torch.cuda.is_available():
22
- model_id = "mistralai/Mistral-7B-Instruct-v0.1"
23
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
24
  tokenizer = AutoTokenizer.from_pretrained(model_id)
25
 
@@ -45,7 +45,7 @@ def generate(
45
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
46
  input_ids = input_ids.to(model.device)
47
 
48
- streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
49
  generate_kwargs = dict(
50
  {"input_ids": input_ids},
51
  streamer=streamer,
 
9
  import torch
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
11
 
12
+ DESCRIPTION = "# Mistral-7B v0.2"
13
 
14
  if not torch.cuda.is_available():
15
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 
19
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
20
 
21
  if torch.cuda.is_available():
22
+ model_id = "mistralai/Mistral-7B-Instruct-v0.2"
23
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
24
  tokenizer = AutoTokenizer.from_pretrained(model_id)
25
 
 
45
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
46
  input_ids = input_ids.to(model.device)
47
 
48
+ streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
49
  generate_kwargs = dict(
50
  {"input_ids": input_ids},
51
  streamer=streamer,