Spaces:
Runtime error
Runtime error
File size: 2,884 Bytes
9551276 3be135a 5e72808 ec21d4f 2bcefc7 ec21d4f 2bcefc7 ec21d4f 14f07e7 ec21d4f 14f07e7 2bcefc7 3be135a 14f07e7 5e72808 3be135a 5e72808 2bcefc7 110c323 bb25d5e 5e72808 e42b84a 5d31a12 5e72808 110c323 ec21d4f c5df1c5 ec21d4f 110c323 5e72808 110c323 ec21d4f 110c323 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import random
import gradio as gr
from groq import Groq
# Initialize the Groq client with your API key
client = Groq(
api_key=os.environ.get("Groq_Api_Key")
)
def create_history_messages(history):
# Interleave user and assistant messages in the order they occurred
history_messages = []
for user_msg, assistant_msg in history:
history_messages.append({"role": "user", "content": user_msg})
history_messages.append({"role": "assistant", "content": assistant_msg})
return history_messages
def generate_response(prompt, history, model, temperature, max_tokens, top_p, seed):
messages = create_history_messages(history)
messages.append({"role": "user", "content": prompt})
print(messages)
if seed == 0:
seed = random.randint(1, 100000)
stream = client.chat.completions.create(
messages=messages,
model=model,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
seed=seed,
stop=None,
stream=True,
)
response = ""
for chunk in stream:
delta_content = chunk.choices[0].delta.content
if delta_content is not None:
response += delta_content
yield response
additional_inputs = [
gr.Dropdown(
choices=[
"llama-3.2-3b-preview",
"llama-3.2-1b-preview",
"llama-3.1-70b-versatile",
"llama-3.1-8b-instant",
"llama3-70b-8192",
"llama3-8b-8192",
"mixtral-8x7b-32768",
"gemma2-9b-it",
"gemma-7b-it"
],
value="llama-3.1-70b-versatile",
label="Model"
),
gr.Slider(
minimum=0.0, maximum=1.0, step=0.01, value=0.5,
label="Temperature",
info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."
),
gr.Slider(
minimum=1, maximum=131000, step=1, value=8100,
label="Max Tokens",
info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."
),
gr.Slider(
minimum=0.0, maximum=1.0, step=0.01, value=0.5,
label="Top P",
info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."
),
gr.Number(
precision=0, value=0, label="Seed",
info="A starting point to initiate generation, use 0 for random"
)
]
gr.ChatInterface(
fn=generate_response,
theme="Nymbo/Alyx_Theme",
chatbot=gr.Chatbot(
show_label=False,
show_share_button=False,
show_copy_button=True,
likeable=True,
layout="panel"
),
additional_inputs=additional_inputs,
).launch() |