import os
import gradio as gr
from functools import partial
from huggingface_hub import InferenceClient
css = """
#generate_button {
transition: background-color 1s ease-out, color 1s ease-out; border-color 1s ease-out;
}
"""
def generate(prompt: str, hf_token: str, model: str):
messages = [{"role": "user", "content": prompt}]
if hf_token is None or not hf_token.strip():
hf_token = os.getenv("HUGGINGFACE_API_KEY")
client = InferenceClient(model, token=hf_token)
model_name = model.split("/")[1]
response = f"**{model_name}**\n\n"
for msg in client.chat_completion(messages, max_tokens=600, stream=True):
token = msg.choices[0].delta.content
response += token
yield response
def clear_token():
# returns a textbox with visibility set to False
# this will update the hf_token widget thus hiding it
return gr.Textbox(visible=False)
with gr.Blocks(css=css, theme="gradio/soft") as demo:
gr.Markdown("
Code LLM Explorer
")
prompt = gr.Textbox(
label="Prompt",
lines=2, # default two lines length
max_lines=5, # the Textbox entends upto 5 lines length
info="Type your Prompt here",
show_label=False,
value="Write Bubble Sort in Python",
)
hf_token = gr.Textbox(
label="HuggingFace Token",
type="password",
placeholder="Your Hugging Face Token",
show_label=False,
)
# gr.Group() will group the two buttons together
# so there will be no gap between two buttons
with gr.Group():
with gr.Row() as button_row:
# variant: 'primary' for main call-to-action, 'secondary' for a more subdued style, 'stop' for a stop button.
generate_btn = gr.Button(
"Run", elem_id="generate_button", variant="primary", size="sm"
)
view_code = gr.Button(
"View Code", elem_id="generate_button", variant="secondary", size="sm"
)
with gr.Row() as output_row:
codellama_output = gr.Markdown("codellama/CodeLlama-34b-Instruct-hf")
stablecode_output = gr.Markdown("stabilityai/stable-code-instruct-3b")
deepseek_output = gr.Markdown("deepseek-ai/deepseek-coder-33b-instruct")
gr.on(
[prompt.submit, generate_btn.click], clear_token, inputs=None, outputs=hf_token
).then(
fn=partial(generate, model="codellama/CodeLlama-34b-Instruct-hf"),
inputs=[prompt, hf_token],
outputs=codellama_output,
)
gr.on(
[prompt.submit, generate_btn.click], clear_token, inputs=None, outputs=hf_token
).then(
fn=partial(generate, model="stabilityai/stable-code-instruct-3b"),
inputs=[prompt, hf_token],
outputs=stablecode_output,
)
gr.on(
[prompt.submit, generate_btn.click], clear_token, inputs=None, outputs=hf_token
).then(
fn=partial(generate, model="microsoft/Phi-3-mini-4k-instruct"),
inputs=[prompt, hf_token],
outputs=deepseek_output,
)
demo.launch()