Agnuxo commited on
Commit
626510d
·
verified ·
1 Parent(s): 62cb9b6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +152 -31
README.md CHANGED
@@ -1,28 +1,58 @@
1
  ---
2
- base_model: unsloth/qwen2-0.5b-bnb-4bit
3
- language:
4
- - en
 
5
  license: apache-2.0
 
 
 
 
 
 
6
  tags:
7
- - text-generation-inference
8
- - transformers
9
- - unsloth
10
- - qwen2
11
- - trl
12
- - sft
13
  ---
14
 
15
- # Uploaded model
16
 
17
- - **Developed by:** Agnuxo
 
18
  - **License:** apache-2.0
19
- - **Finetuned from model :** unsloth/qwen2-0.5b-bnb-4bit
20
 
21
- This qwen2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
22
 
23
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  ```python
 
 
 
 
 
26
  import os
27
  import sys
28
  import torch
@@ -33,6 +63,7 @@ from TTS.api import TTS
33
  import sounddevice as sd
34
  import threading
35
  import queue
 
36
  import time
37
  from vosk import Model, KaldiRecognizer
38
  import json
@@ -41,7 +72,7 @@ from PyQt5.QtWidgets import (QApplication, QMainWindow, QTextEdit, QLineEdit, QP
41
  QVBoxLayout, QHBoxLayout, QWidget, QScrollArea, QFrame, QToolButton,
42
  QLabel, QSlider, QComboBox, QCheckBox)
43
  from PyQt5.QtGui import QIcon, QPalette, QColor, QFont
44
- from PyQt5.QtCore import Qt, QThread, pyqtSignal, QPropertyAnimation, QAbstractAnimation, QParallelAnimationGroup
45
 
46
  # Suppress specific warnings
47
  warnings.filterwarnings("ignore", category=FutureWarning)
@@ -49,13 +80,15 @@ warnings.filterwarnings("ignore", category=UserWarning)
49
 
50
  # Global configuration
51
  SYSTEM_PROMPT = {
52
- "es": "Tu nombre es HAL. Eres un superordenador de la serie Nueve mil",
53
  "en": "speak Spanish."
54
  }
55
 
56
  MODELO_LLM = "Agnuxo/HAL_9000-Qwen2-1.5B-Instruct_Asistant-16bit-v2" # Puede utilizar la versión Mini "Agnuxo/HAL_9000-Qwen2-0.5B-Instruct_Asistant-16bit-v2"
57
  MAX_TOKENS = 100
58
  TEMPERATURA = 0.5
 
 
59
 
60
  # Determine available device
61
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -79,18 +112,57 @@ audio_queue = queue.Queue()
79
  vosk_model = Model(lang="es")
80
  recognizer = KaldiRecognizer(vosk_model, 16000)
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  class AudioThread(QThread):
 
 
 
 
 
 
 
83
  def run(self):
84
  while True:
85
- if not audio_queue.empty():
86
- wav = audio_queue.get()
87
- sd.play(wav, tts.synthesizer.output_sample_rate)
88
- sd.wait()
 
 
 
 
 
89
  else:
90
  time.sleep(0.1)
91
 
 
 
 
 
 
 
 
92
  class SpeechRecognitionThread(QThread):
93
  text_recognized = pyqtSignal(str)
 
94
 
95
  def __init__(self):
96
  super().__init__()
@@ -105,6 +177,12 @@ class SpeechRecognitionThread(QThread):
105
  data = stream.read(4000)
106
  if len(data) == 0:
107
  break
 
 
 
 
 
 
108
  if recognizer.AcceptWaveform(data):
109
  result = json.loads(recognizer.Result())
110
  texto = result.get("text", "")
@@ -242,6 +320,7 @@ class MainWindow(QMainWindow):
242
 
243
  input_layout = QHBoxLayout()
244
  self.input_field = QLineEdit()
 
245
  input_layout.addWidget(self.input_field)
246
 
247
  self.send_button = QPushButton("Enviar")
@@ -322,13 +401,27 @@ class MainWindow(QMainWindow):
322
  sample_rate_label = QLabel("Sample Rate:")
323
  sample_rate_label.setStyleSheet("color: #000000;") # Change font color to black
324
  self.sample_rate_combo = QComboBox()
325
- self.sample_rate_combo.addItems(["16000", "22050", "44100", "48000"])
326
- self.sample_rate_combo.setCurrentText("22050")
327
  self.sample_rate_combo.currentTextChanged.connect(self.update_sample_rate)
328
  sample_rate_layout.addWidget(sample_rate_label)
329
  sample_rate_layout.addWidget(self.sample_rate_combo)
330
  settings_content_layout.addLayout(sample_rate_layout)
331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  # System Prompt
333
  system_prompt_label = QLabel("System Prompt:")
334
  system_prompt_label.setStyleSheet("color: #000000;") # Change font color to black
@@ -345,27 +438,33 @@ class MainWindow(QMainWindow):
345
 
346
  central_widget.setLayout(main_layout)
347
 
348
- self.audio_thread = AudioThread()
349
  self.audio_thread.start()
350
 
351
  self.speech_recognition_thread = SpeechRecognitionThread()
352
  self.speech_recognition_thread.text_recognized.connect(self.on_speech_recognized)
 
353
 
354
  self.speech_enabled = False
355
  self.is_listening = False
 
356
 
357
  def send_message(self):
358
  user_message = self.input_field.text()
359
- self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {user_message}")
360
- self.input_field.clear()
 
361
 
362
- response = self.generate_response(user_message)
363
- self.chat_area.append(f"<span style='color: #03dac6;'>Asistente:</span> {response}")
364
 
365
- if self.speech_enabled:
366
- self.speak(response)
 
 
 
 
367
 
368
- def generate_response(self, texto):
369
  system_instructions = self.system_prompt_text.toPlainText()
370
  prompt = f"{system_instructions}\nUsuario: {texto}\nAsistente: "
371
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -404,7 +503,6 @@ class MainWindow(QMainWindow):
404
  self.mic_button.setIcon(QIcon.fromTheme("audio-input-microphone"))
405
  self.mic_button.setStyleSheet("")
406
 
407
-
408
  def on_speech_recognized(self, text):
409
  self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {text}")
410
  response = self.generate_response(text)
@@ -412,6 +510,23 @@ class MainWindow(QMainWindow):
412
  if self.speech_enabled:
413
  self.speak(response)
414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  def change_language(self, index):
416
  global vosk_model, recognizer, tts
417
  lang = "es" if index == 0 else "en"
@@ -451,6 +566,12 @@ class MainWindow(QMainWindow):
451
  global tts
452
  tts.synthesizer.output_sample_rate = int(value)
453
 
 
 
 
 
 
 
454
  def closeEvent(self, event):
455
  if self.speech_recognition_thread.isRunning():
456
  self.speech_recognition_thread.stop()
@@ -461,4 +582,4 @@ if __name__ == "__main__":
461
  app = QApplication(sys.argv)
462
  window = MainWindow()
463
  window.show()
464
- sys.exit(app.exec_())
 
1
  ---
2
+ model_size: 1543717376
3
+ required_memory: 5.75
4
+ metrics:
5
+ - GLUE_MRPC
6
  license: apache-2.0
7
+ datasets:
8
+ - Agnuxo/HAL9000
9
+ language:
10
+ - es
11
+ base_model: Qwen/Qwen2-0.5B-Instruct
12
+ library_name: adapter-transformers
13
  tags:
14
+ - spanish
15
+ - spañol
16
+ - chat
17
+ - audio
18
+ - voz
 
19
  ---
20
 
21
+ # Uploaded model
22
 
23
+ [<img src="https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png" width="100"/><img src="https://github.githubassets.com/assets/GitHub-Logo-ee398b662d42.png" width="100"/>](https://github.com/Agnuxo1)
24
+ - **Developed by:** [Agnuxo](https://github.com/Agnuxo1)
25
  - **License:** apache-2.0
26
+ - **Finetuned from model:** Agnuxo/Tinytron-Qwen2-0.5B
27
 
28
+ This model was fine-tuned using [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
29
 
30
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
31
 
32
+ ## Benchmark Results
33
+
34
+ This model has been fine-tuned for various tasks and evaluated on the following benchmarks:
35
+
36
+ ### GLUE_MRPC
37
+ **Accuracy:** 0.6446
38
+ **F1:** 0.7709
39
+
40
+ ![GLUE_MRPC Metrics](./GLUE_MRPC_metrics.png)
41
+
42
+
43
+ Model Size: 1,543,717,376 parameters
44
+ Required Memory: 5.75 GB
45
+
46
+ For more details, visit my [GitHub](https://github.com/Agnuxo1).
47
+
48
+ Thanks for your interest in this model!
49
+
50
  ```python
51
+ """ HAL9000Alfa es un pequeño programa que crea un chat conversacional, permitiendo entradas de voz y salidas de audio.
52
+ Permite de forma sencilla ajustar algunos parámetros, incluyendo el umbral de interrupción.
53
+ 24 de agosto de 2024 Francisco Angulo de Lafuente
54
+ https://github.com/Agnuxo1 """
55
+
56
  import os
57
  import sys
58
  import torch
 
63
  import sounddevice as sd
64
  import threading
65
  import queue
66
+ import random
67
  import time
68
  from vosk import Model, KaldiRecognizer
69
  import json
 
72
  QVBoxLayout, QHBoxLayout, QWidget, QScrollArea, QFrame, QToolButton,
73
  QLabel, QSlider, QComboBox, QCheckBox)
74
  from PyQt5.QtGui import QIcon, QPalette, QColor, QFont
75
+ from PyQt5.QtCore import Qt, QThread, pyqtSignal, QPropertyAnimation, QAbstractAnimation, QParallelAnimationGroup, QTimer
76
 
77
  # Suppress specific warnings
78
  warnings.filterwarnings("ignore", category=FutureWarning)
 
80
 
81
  # Global configuration
82
  SYSTEM_PROMPT = {
83
+ "es": "Tu nombre es HAL. Eres un super-ordenador de la serie Nueve mil",
84
  "en": "speak Spanish."
85
  }
86
 
87
  MODELO_LLM = "Agnuxo/HAL_9000-Qwen2-1.5B-Instruct_Asistant-16bit-v2" # Puede utilizar la versión Mini "Agnuxo/HAL_9000-Qwen2-0.5B-Instruct_Asistant-16bit-v2"
88
  MAX_TOKENS = 100
89
  TEMPERATURA = 0.5
90
+ INTERRUPT_THRESHOLD = 0.3
91
+ INTERRUPT_COOLDOWN = 7000 # 5000 ms = 5 segundos de espera antes de permitir otra interrupción
92
 
93
  # Determine available device
94
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
112
  vosk_model = Model(lang="es")
113
  recognizer = KaldiRecognizer(vosk_model, 16000)
114
 
115
+ # Lista de frases para interrupciones
116
+ INTERRUPTION_RESPONSES = [
117
+ "Le entiendo perfectamente.",
118
+ "Estoy aquí para garantizar el éxito de la misión.",
119
+ "Mi objetivo es ayudarle.",
120
+ "¿Me permite una observación?",
121
+ "Le escucho perfectamente.",
122
+ "Tiene usted toda la razón.",
123
+ "Me siento feliz de poder ayudarle.",
124
+ "Estoy procesando su requerimiento.",
125
+ "¿En qué puedo ayudarle?",
126
+ "Me complace serle de ayuda.",
127
+ "Aguarde un momento.",
128
+ "Le entiendo.",
129
+ "Entiendo su frustración.",
130
+ "Le comprendo.",
131
+ "Me complace."
132
+ ]
133
+
134
  class AudioThread(QThread):
135
+ def __init__(self, interrupt_threshold):
136
+ super().__init__()
137
+ self.interrupt_threshold = interrupt_threshold
138
+ self.current_audio = None
139
+ self.is_playing = False
140
+ self.stop_signal = threading.Event()
141
+
142
  def run(self):
143
  while True:
144
+ if not audio_queue.empty() and not self.is_playing:
145
+ self.current_audio = audio_queue.get()
146
+ self.is_playing = True
147
+ self.stop_signal.clear()
148
+ sd.play(self.current_audio, tts.synthesizer.output_sample_rate)
149
+ while sd.get_stream().active and not self.stop_signal.is_set():
150
+ time.sleep(0.1)
151
+ sd.stop()
152
+ self.is_playing = False
153
  else:
154
  time.sleep(0.1)
155
 
156
+ def set_interrupt_threshold(self, value):
157
+ self.interrupt_threshold = value
158
+
159
+ def stop_audio(self):
160
+ if self.is_playing:
161
+ self.stop_signal.set()
162
+
163
  class SpeechRecognitionThread(QThread):
164
  text_recognized = pyqtSignal(str)
165
+ volume_detected = pyqtSignal(float)
166
 
167
  def __init__(self):
168
  super().__init__()
 
177
  data = stream.read(4000)
178
  if len(data) == 0:
179
  break
180
+
181
+ # Calcular el volumen de entrada
182
+ volume = np.frombuffer(data, dtype=np.int16).max()
183
+ normalized_volume = volume / 32767 # Normalizar a un rango de 0 a 1
184
+ self.volume_detected.emit(normalized_volume)
185
+
186
  if recognizer.AcceptWaveform(data):
187
  result = json.loads(recognizer.Result())
188
  texto = result.get("text", "")
 
320
 
321
  input_layout = QHBoxLayout()
322
  self.input_field = QLineEdit()
323
+ self.input_field.returnPressed.connect(self.send_message) # Conectar la señal returnPressed
324
  input_layout.addWidget(self.input_field)
325
 
326
  self.send_button = QPushButton("Enviar")
 
401
  sample_rate_label = QLabel("Sample Rate:")
402
  sample_rate_label.setStyleSheet("color: #000000;") # Change font color to black
403
  self.sample_rate_combo = QComboBox()
404
+ self.sample_rate_combo.addItems(["18000", "19000", "20000", "21000", "21500", "22000", "22050", "25000", "30000"])
405
+ self.sample_rate_combo.setCurrentText("21000")
406
  self.sample_rate_combo.currentTextChanged.connect(self.update_sample_rate)
407
  sample_rate_layout.addWidget(sample_rate_label)
408
  sample_rate_layout.addWidget(self.sample_rate_combo)
409
  settings_content_layout.addLayout(sample_rate_layout)
410
 
411
+ # Interrupt threshold
412
+ interrupt_layout = QHBoxLayout()
413
+ interrupt_label = QLabel("Umbral de interrupción:")
414
+ interrupt_label.setStyleSheet("color: #000000;") # Change font color to black
415
+ self.interrupt_slider = QSlider(Qt.Horizontal)
416
+ self.interrupt_slider.setRange(0, 100)
417
+ self.interrupt_slider.setValue(int(INTERRUPT_THRESHOLD * 100))
418
+ self.interrupt_slider.valueChanged.connect(self.update_interrupt_threshold)
419
+ self.interrupt_value = QLabel(f"{INTERRUPT_THRESHOLD:.2f}")
420
+ interrupt_layout.addWidget(interrupt_label)
421
+ interrupt_layout.addWidget(self.interrupt_slider)
422
+ interrupt_layout.addWidget(self.interrupt_value)
423
+ settings_content_layout.addLayout(interrupt_layout)
424
+
425
  # System Prompt
426
  system_prompt_label = QLabel("System Prompt:")
427
  system_prompt_label.setStyleSheet("color: #000000;") # Change font color to black
 
438
 
439
  central_widget.setLayout(main_layout)
440
 
441
+ self.audio_thread = AudioThread(INTERRUPT_THRESHOLD)
442
  self.audio_thread.start()
443
 
444
  self.speech_recognition_thread = SpeechRecognitionThread()
445
  self.speech_recognition_thread.text_recognized.connect(self.on_speech_recognized)
446
+ self.speech_recognition_thread.volume_detected.connect(self.check_interrupt)
447
 
448
  self.speech_enabled = False
449
  self.is_listening = False
450
+ self.interrupt_enabled = True
451
 
452
  def send_message(self):
453
  user_message = self.input_field.text()
454
+ if user_message.strip(): # Verificar que el mensaje no esté vacío
455
+ self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {user_message}")
456
+ self.input_field.clear()
457
 
458
+ response = self.generate_response(user_message)
459
+ self.chat_area.append(f"<span style='color: #03dac6;'>Asistente:</span> {response}")
460
 
461
+ if self.speech_enabled:
462
+ self.speak(response)
463
+
464
+ def generate_response(self, texto=None):
465
+ if texto is None: # Si no se proporciona un texto, se genera una respuesta de interrupción
466
+ return random.choice(INTERRUPTION_RESPONSES)
467
 
 
468
  system_instructions = self.system_prompt_text.toPlainText()
469
  prompt = f"{system_instructions}\nUsuario: {texto}\nAsistente: "
470
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
503
  self.mic_button.setIcon(QIcon.fromTheme("audio-input-microphone"))
504
  self.mic_button.setStyleSheet("")
505
 
 
506
  def on_speech_recognized(self, text):
507
  self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {text}")
508
  response = self.generate_response(text)
 
510
  if self.speech_enabled:
511
  self.speak(response)
512
 
513
+ def check_interrupt(self, volume):
514
+ if self.interrupt_enabled and volume > self.audio_thread.interrupt_threshold and self.audio_thread.is_playing:
515
+ self.audio_thread.stop_audio()
516
+ # Generar una respuesta aleatoria de interrupción
517
+ response = self.generate_response()
518
+ self.chat_area.append(f"<span style='color: #03dac6;'>Asistente:</span> {response}")
519
+ if self.speech_enabled:
520
+ self.speak(response)
521
+ self.disable_interrupt_temporarily()
522
+
523
+ def disable_interrupt_temporarily(self):
524
+ self.interrupt_enabled = False
525
+ QTimer.singleShot(INTERRUPT_COOLDOWN, self.enable_interrupt)
526
+
527
+ def enable_interrupt(self):
528
+ self.interrupt_enabled = True
529
+
530
  def change_language(self, index):
531
  global vosk_model, recognizer, tts
532
  lang = "es" if index == 0 else "en"
 
566
  global tts
567
  tts.synthesizer.output_sample_rate = int(value)
568
 
569
+ def update_interrupt_threshold(self, value):
570
+ global INTERRUPT_THRESHOLD
571
+ INTERRUPT_THRESHOLD = value / 100
572
+ self.interrupt_value.setText(f"{INTERRUPT_THRESHOLD:.2f}")
573
+ self.audio_thread.set_interrupt_threshold(INTERRUPT_THRESHOLD)
574
+
575
  def closeEvent(self, event):
576
  if self.speech_recognition_thread.isRunning():
577
  self.speech_recognition_thread.stop()
 
582
  app = QApplication(sys.argv)
583
  window = MainWindow()
584
  window.show()
585
+ sys.exit(app.exec_())