patrol114 commited on
Commit
c60e96c
·
verified ·
1 Parent(s): 3acefb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -89
app.py CHANGED
@@ -1,151 +1,87 @@
1
  import gradio as gr
2
-
3
  from huggingface_hub import InferenceClient
4
-
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
-
7
  import torch
8
 
9
- # Initialize the InferenceClient
10
-
11
  client = InferenceClient("01-ai/Yi-Coder-9B-Chat")
12
 
13
- # Initialize tokenizer and model
14
-
15
- model_path = "01-ai/Yi-Coder-9B-Chat" # Make sure this is correct
16
-
17
  tokenizer = AutoTokenizer.from_pretrained(model_path)
18
-
19
  model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto").eval()
20
 
21
  def respond(
22
-
23
  message,
24
-
25
  history: list[tuple[str, str]],
26
-
27
  system_message,
28
-
29
  max_tokens,
30
-
31
  temperature,
32
-
33
  top_p,
34
-
35
  use_local_model: bool,
36
-
37
  ):
38
-
39
  messages = [{"role": "system", "content": system_message}]
40
-
41
  for user, assistant in history:
42
-
43
  if user:
44
-
45
  messages.append({"role": "user", "content": user})
46
-
47
  if assistant:
48
-
49
  messages.append({"role": "assistant", "content": assistant})
50
-
51
  messages.append({"role": "user", "content": message})
52
 
53
  if use_local_model:
54
-
55
- # Use local model
56
-
57
- input_ids = tokenizer.encode("".join([m["content"] for m in messages]), return_tensors="pt")
58
-
59
  input_ids = input_ids.to(model.device)
60
-
61
 
62
-
63
  with torch.no_grad():
64
-
65
  output = model.generate(
66
-
67
  input_ids,
68
-
69
  max_new_tokens=max_tokens,
70
-
71
  temperature=temperature,
72
-
73
  top_p=top_p,
74
-
75
  do_sample=True,
76
-
77
  pad_token_id=tokenizer.eos_token_id,
78
-
79
  )
80
-
81
 
82
-
83
  response = tokenizer.decode(output[0], skip_special_tokens=True)
84
-
85
- yield response
86
-
87
  else:
88
-
89
- # Use Hugging Face Inference API
90
-
91
  response = ""
92
-
93
- for message in client.text_generation(
94
-
95
- "".join([m["content"] for m in messages]),
96
-
97
  max_new_tokens=max_tokens,
98
-
99
  stream=True,
100
-
101
  temperature=temperature,
102
-
103
  top_p=top_p,
104
-
105
  ):
 
 
106
 
107
- response += message
108
-
109
- yield response
110
-
111
- # Create Gradio interface
112
-
113
  demo = gr.ChatInterface(
114
-
115
  respond,
116
-
117
  additional_inputs=[
118
-
119
- gr.Textbox(value="Odpowiadasz w Jezyku Polskim jesteś Coder/Developer/Programista tworzysz pełny kod..", label="System message"),
120
-
121
- gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens"),
122
-
123
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
124
-
125
  gr.Slider(
126
-
127
  minimum=0.1,
128
-
129
  maximum=1.0,
130
-
131
  value=0.95,
132
-
133
  step=0.05,
134
-
135
- label="Top-p (nucleus sampling)",
136
-
137
  ),
138
-
139
- gr.Checkbox(label="Use Local Model", value=False),
140
-
141
  ],
142
-
143
- title="Advanced Chat Interface",
144
-
145
- description="Chat with an AI model using either the Hugging Face Inference API or a local model.",
146
-
147
  )
148
 
149
- if name == "__main__":
150
-
151
  demo.launch()
 
1
  import gradio as gr
 
2
  from huggingface_hub import InferenceClient
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
  import torch
5
 
6
+ # Inicjalizacja InferenceClient
 
7
  client = InferenceClient("01-ai/Yi-Coder-9B-Chat")
8
 
9
+ # Inicjalizacja tokenizera i modelu
10
+ model_path = "01-ai/Yi-Coder-9B-Chat"
 
 
11
  tokenizer = AutoTokenizer.from_pretrained(model_path)
 
12
  model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto").eval()
13
 
14
  def respond(
 
15
  message,
 
16
  history: list[tuple[str, str]],
 
17
  system_message,
 
18
  max_tokens,
 
19
  temperature,
 
20
  top_p,
 
21
  use_local_model: bool,
 
22
  ):
23
+ # Przygotowanie wiadomości do kontekstu
24
  messages = [{"role": "system", "content": system_message}]
 
25
  for user, assistant in history:
 
26
  if user:
 
27
  messages.append({"role": "user", "content": user})
 
28
  if assistant:
 
29
  messages.append({"role": "assistant", "content": assistant})
 
30
  messages.append({"role": "user", "content": message})
31
 
32
  if use_local_model:
33
+ # Użycie lokalnego modelu
34
+ input_text = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
35
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")
 
 
36
  input_ids = input_ids.to(model.device)
 
37
 
 
38
  with torch.no_grad():
 
39
  output = model.generate(
 
40
  input_ids,
 
41
  max_new_tokens=max_tokens,
 
42
  temperature=temperature,
 
43
  top_p=top_p,
 
44
  do_sample=True,
 
45
  pad_token_id=tokenizer.eos_token_id,
 
46
  )
 
47
 
 
48
  response = tokenizer.decode(output[0], skip_special_tokens=True)
49
+ yield response.split("assistant:")[-1].strip()
 
 
50
  else:
51
+ # Użycie Hugging Face Inference API
 
 
52
  response = ""
53
+ for chunk in client.text_generation(
54
+ "\n".join([f"{m['role']}: {m['content']}" for m in messages]),
 
 
 
55
  max_new_tokens=max_tokens,
 
56
  stream=True,
 
57
  temperature=temperature,
 
58
  top_p=top_p,
 
59
  ):
60
+ response += chunk
61
+ yield response.split("assistant:")[-1].strip()
62
 
63
+ # Tworzenie interfejsu Gradio
 
 
 
 
 
64
  demo = gr.ChatInterface(
 
65
  respond,
 
66
  additional_inputs=[
67
+ gr.Textbox(
68
+ value="Odpowiadasz w języku polskim. Jesteś Coder/Developer/Programista i tworzysz pełny kod.",
69
+ label="Wiadomość systemowa"
70
+ ),
71
+ gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Maksymalna liczba nowych tokenów"),
72
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperatura"),
 
73
  gr.Slider(
 
74
  minimum=0.1,
 
75
  maximum=1.0,
 
76
  value=0.95,
 
77
  step=0.05,
78
+ label="Top-p (próbkowanie nucleus)",
 
 
79
  ),
80
+ gr.Checkbox(label="Użyj lokalnego modelu", value=False),
 
 
81
  ],
82
+ title="Zaawansowany interfejs czatu AI",
83
+ description="Czatuj z modelem AI, korzystając z Hugging Face Inference API lub lokalnego modelu.",
 
 
 
84
  )
85
 
86
+ if __name__ == "__main__":
 
87
  demo.launch()