Spaces:

nroggendorff
/

dolphin

Paused

nroggendorff commited on Apr 10, 2024

Commit

b7d4c4e

verified ·

1 Parent(s): b0f4d21

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,26 +1,14 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
-tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-2-zephyr-1_6b')
-model = AutoModelForCausalLM.from_pretrained(
-    'stabilityai/stablelm-2-zephyr-1_6b',
-    device_map="auto"
-)
 def pipe(text: str):
-    tokens = model.generate(
-        inputs.to(model.device),
-        max_new_tokens=1024,
-        temperature=0.5,
-        do_sample=True
-    )
-    inputs = tokenizer.apply_chat_template(
-        text,
-        add_generation_prompt=True,
-        return_tensors='pt'
-    )
-    return tokenizer.decode(tokens[0], skip_special_tokens=False)
 if __name__ == "__main__":
     interface = gr.Interface(pipe, gr.Textbox(label="Prompt"), gr.Textbox(label="Response"), title="Text Completion")

 import gradio as gr
+from vllm import LLM, SamplingParams
+llm = LLM(model="meta-llama/Llama-2-7B-Chat-hf")
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
 def pipe(text: str):
+    prompt = [text]
+    tokens = llm.generate(prompt, sampling_params)
+    output = (output.outputs[0].text for output in tokens)
+    return output[0]
 if __name__ == "__main__":
     interface = gr.Interface(pipe, gr.Textbox(label="Prompt"), gr.Textbox(label="Response"), title="Text Completion")