philschmid commited on
Commit
e124d2a
·
1 Parent(s): 02124fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -31,18 +31,18 @@ else:
31
  # torch_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
32
  # model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
33
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True)
34
-
35
  tokenizer = AutoTokenizer.from_pretrained(model_id)
36
 
37
  prompt_template = f"### Anweisung:\n{{input}}\n\n### Antwort:"
38
 
39
 
40
- def generate(instruction, temperature, max_new_tokens, top_p, length_penalty):
41
  formatted_instruction = prompt_template.format(input=instruction)
42
  # COMMENT IN FOR NON STREAMING
43
  # generation_config = GenerationConfig(
44
  # do_sample=True,
45
  # top_p=top_p,
 
46
  # temperature=temperature,
47
  # max_new_tokens=max_new_tokens,
48
  # early_stopping=True,
@@ -71,7 +71,9 @@ def generate(instruction, temperature, max_new_tokens, top_p, length_penalty):
71
 
72
  generate_kwargs = dict(
73
  top_p=top_p,
 
74
  temperature=temperature,
 
75
  max_new_tokens=max_new_tokens,
76
  early_stopping=True,
77
  length_penalty=length_penalty,
@@ -142,7 +144,13 @@ with gr.Blocks(theme=theme) as demo:
142
  placeholder="Hier Antwort erscheint...",
143
  )
144
  submit = gr.Button("Generate", variant="primary")
145
- gr.Examples(examples=examples, inputs=[instruction])
 
 
 
 
 
 
146
 
147
  with gr.Column(scale=1):
148
  temperature = gr.Slider(
 
31
  # torch_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
32
  # model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
33
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True)
 
34
  tokenizer = AutoTokenizer.from_pretrained(model_id)
35
 
36
  prompt_template = f"### Anweisung:\n{{input}}\n\n### Antwort:"
37
 
38
 
39
+ def generate(instruction, temperature=1, max_new_tokens=256, top_p=0.9, length_penalty=1.0):
40
  formatted_instruction = prompt_template.format(input=instruction)
41
  # COMMENT IN FOR NON STREAMING
42
  # generation_config = GenerationConfig(
43
  # do_sample=True,
44
  # top_p=top_p,
45
+ # top_k=0,
46
  # temperature=temperature,
47
  # max_new_tokens=max_new_tokens,
48
  # early_stopping=True,
 
71
 
72
  generate_kwargs = dict(
73
  top_p=top_p,
74
+ top_k=0,
75
  temperature=temperature,
76
+ do_sample=True,
77
  max_new_tokens=max_new_tokens,
78
  early_stopping=True,
79
  length_penalty=length_penalty,
 
144
  placeholder="Hier Antwort erscheint...",
145
  )
146
  submit = gr.Button("Generate", variant="primary")
147
+ gr.Examples(
148
+ examples=examples,
149
+ inputs=[instruction],
150
+ # cache_examples=True,
151
+ # fn=generate,
152
+ # outputs=[output],
153
+ )
154
 
155
  with gr.Column(scale=1):
156
  temperature = gr.Slider(