File size: 3,020 Bytes
1ecdaca f4a7d4e c60e96c f4a7d4e 1ecdaca c60e96c f4a7d4e 1ecdaca f4a7d4e 1ecdaca c60e96c 1ecdaca f4a7d4e 1ecdaca f4a7d4e c60e96c f4a7d4e c60e96c f4a7d4e c60e96c f4a7d4e c60e96c f4a7d4e c60e96c f4a7d4e c60e96c 1ecdaca c60e96c 1ecdaca c60e96c 1ecdaca c60e96c 1ecdaca c60e96c 1ecdaca c60e96c 1ecdaca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Inicjalizacja InferenceClient
client = InferenceClient("01-ai/Yi-Coder-9B-Chat")
# Inicjalizacja tokenizera i modelu
model_path = "01-ai/Yi-Coder-9B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto").eval()
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
use_local_model: bool,
):
# Przygotowanie wiadomości do kontekstu
messages = [{"role": "system", "content": system_message}]
for user, assistant in history:
if user:
messages.append({"role": "user", "content": user})
if assistant:
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
if use_local_model:
# Użycie lokalnego modelu
input_text = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
input_ids = tokenizer.encode(input_text, return_tensors="pt")
input_ids = input_ids.to(model.device)
with torch.no_grad():
output = model.generate(
input_ids,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(output[0], skip_special_tokens=True)
yield response.split("assistant:")[-1].strip()
else:
# Użycie Hugging Face Inference API
response = ""
for chunk in client.text_generation(
"\n".join([f"{m['role']}: {m['content']}" for m in messages]),
max_new_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
response += chunk
yield response.split("assistant:")[-1].strip()
# Tworzenie interfejsu Gradio
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="Odpowiadasz w języku polskim. Jesteś Coder/Developer/Programista i tworzysz pełny kod.",
label="Wiadomość systemowa"
),
gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Maksymalna liczba nowych tokenów"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperatura"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (próbkowanie nucleus)",
),
gr.Checkbox(label="Użyj lokalnego modelu", value=False),
],
title="Zaawansowany interfejs czatu AI",
description="Czatuj z modelem AI, korzystając z Hugging Face Inference API lub lokalnego modelu.",
)
if __name__ == "__main__":
demo.launch() |