Spaces:
Sleeping
Sleeping
RealSanjay
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from faster_whisper import WhisperModel
|
3 |
+
import numpy as np
|
4 |
+
import os
|
5 |
+
import statistics
|
6 |
+
from transformers import pipeline
|
7 |
+
from textblob import TextBlob
|
8 |
+
import torch
|
9 |
+
import time
|
10 |
+
|
11 |
+
class WebAITranscriber:
|
12 |
+
def __init__(self):
|
13 |
+
# Check if CUDA is available
|
14 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
+
self.compute_type = "float16" if torch.cuda.is_available() else "int8"
|
16 |
+
|
17 |
+
# Initialize Whisper Model with optimized settings
|
18 |
+
print(f"Initializing Whisper Model on {self.device}...")
|
19 |
+
self.model = WhisperModel(
|
20 |
+
model_size="base", # Using base model for better speed/accuracy balance
|
21 |
+
device=self.device,
|
22 |
+
compute_type=self.compute_type,
|
23 |
+
cpu_threads=min(os.cpu_count(), 4), # Optimized thread count
|
24 |
+
download_root=None, # Use default cache directory
|
25 |
+
local_files_only=False # Allow downloading if needed
|
26 |
+
)
|
27 |
+
|
28 |
+
# Optimize model settings
|
29 |
+
self.model_settings = {
|
30 |
+
'beam_size': 1, # Reduced beam size for speed
|
31 |
+
'best_of': 1, # Take first result
|
32 |
+
'temperature': 0, # Reduce randomness
|
33 |
+
'compression_ratio_threshold': 2.4,
|
34 |
+
'condition_on_previous_text': True,
|
35 |
+
'no_speech_threshold': 0.6,
|
36 |
+
'initial_prompt': None
|
37 |
+
}
|
38 |
+
|
39 |
+
# AI Detection Markers (optimized for speed)
|
40 |
+
self.ai_markers = {
|
41 |
+
'formal_phrases': [
|
42 |
+
'moreover', 'furthermore', 'consequently',
|
43 |
+
'in conclusion', 'it is worth noting'
|
44 |
+
],
|
45 |
+
'ai_disclaimers': [
|
46 |
+
'as an ai', 'i want to be clear',
|
47 |
+
'it is important to note'
|
48 |
+
]
|
49 |
+
}
|
50 |
+
|
51 |
+
# Initialize AI Detector with optimized settings
|
52 |
+
print("Initializing AI Detection...")
|
53 |
+
self.ai_detector = pipeline('text-classification',
|
54 |
+
model='roberta-base-openai-detector',
|
55 |
+
device=self.device)
|
56 |
+
|
57 |
+
# Optimized buffer settings
|
58 |
+
self.min_analysis_words = 10 # Minimum words before analysis
|
59 |
+
self.max_buffer_size = 1000 # Maximum buffer size in words
|
60 |
+
self.analysis_interval = 3 # Minimum seconds between analyses
|
61 |
+
self.last_analysis_time = time.time()
|
62 |
+
self.transcript_buffer = []
|
63 |
+
|
64 |
+
def process_realtime_audio(self, audio, state):
|
65 |
+
"""Process real-time audio with optimized settings"""
|
66 |
+
if audio is None:
|
67 |
+
return "", state
|
68 |
+
|
69 |
+
try:
|
70 |
+
# Initialize state if needed
|
71 |
+
if state is None:
|
72 |
+
state = {"full_transcript": "", "buffer": [], "pending_analysis": False}
|
73 |
+
|
74 |
+
# Process audio in smaller chunks for real-time performance
|
75 |
+
segments, _ = self.model.transcribe(
|
76 |
+
audio[1],
|
77 |
+
language="en", # Specify language for faster processing
|
78 |
+
vad_filter=True, # Use Voice Activity Detection
|
79 |
+
**self.model_settings
|
80 |
+
)
|
81 |
+
|
82 |
+
# Process segments
|
83 |
+
current_transcript = ""
|
84 |
+
for segment in segments:
|
85 |
+
current_transcript += segment.text + " "
|
86 |
+
|
87 |
+
if not current_transcript.strip():
|
88 |
+
return state["full_transcript"], state
|
89 |
+
|
90 |
+
# Update state
|
91 |
+
state["full_transcript"] += " " + current_transcript
|
92 |
+
state["buffer"].append(current_transcript)
|
93 |
+
|
94 |
+
# Check if we should perform analysis
|
95 |
+
current_time = time.time()
|
96 |
+
buffer_text = " ".join(state["buffer"])
|
97 |
+
word_count = len(buffer_text.split())
|
98 |
+
|
99 |
+
if (word_count >= self.min_analysis_words and
|
100 |
+
(current_time - self.last_analysis_time) >= self.analysis_interval):
|
101 |
+
|
102 |
+
# Perform AI analysis
|
103 |
+
if len(buffer_text.strip()) > 0:
|
104 |
+
classification, probability, confidence = self.analyze_ai_content(buffer_text)
|
105 |
+
analysis_result = f"\n\n---AI Analysis---\nClassification: {classification}\nProbability: {probability:.2f}\nConfidence: {confidence}\n---\n"
|
106 |
+
state["full_transcript"] += analysis_result
|
107 |
+
state["buffer"] = []
|
108 |
+
self.last_analysis_time = current_time
|
109 |
+
|
110 |
+
# Trim buffer if it gets too large
|
111 |
+
if word_count > self.max_buffer_size:
|
112 |
+
words = state["full_transcript"].split()
|
113 |
+
state["full_transcript"] = " ".join(words[-self.max_buffer_size:])
|
114 |
+
|
115 |
+
return state["full_transcript"], state
|
116 |
+
|
117 |
+
except Exception as e:
|
118 |
+
return f"Error processing audio: {str(e)}", state
|
119 |
+
|
120 |
+
def analyze_ai_content(self, text):
|
121 |
+
"""Optimized AI content analysis"""
|
122 |
+
if not text or len(text.split()) < self.min_analysis_words:
|
123 |
+
return "Insufficient text", 0.0, "None"
|
124 |
+
|
125 |
+
try:
|
126 |
+
# Parallel processing of different analysis methods
|
127 |
+
roberta_result = self.ai_detector(text[:512])[0] # Limit text length for speed
|
128 |
+
ai_prob = roberta_result['score']
|
129 |
+
|
130 |
+
# Quick linguistic analysis
|
131 |
+
linguistic_score = self.analyze_linguistic_patterns(text)
|
132 |
+
|
133 |
+
# Calculate final score
|
134 |
+
final_score = (ai_prob + linguistic_score) / 2
|
135 |
+
|
136 |
+
# Fast classification
|
137 |
+
if final_score > 0.7:
|
138 |
+
return "AI Generated", final_score, "High"
|
139 |
+
elif final_score > 0.5:
|
140 |
+
return "Likely AI", final_score, "Medium"
|
141 |
+
elif final_score > 0.3:
|
142 |
+
return "Possibly AI", final_score, "Low"
|
143 |
+
return "Likely Human", final_score, "High"
|
144 |
+
|
145 |
+
except Exception as e:
|
146 |
+
print(f"Analysis error: {e}")
|
147 |
+
return "Analysis Error", 0.0, "None"
|
148 |
+
|
149 |
+
def analyze_linguistic_patterns(self, text):
|
150 |
+
"""Optimized linguistic pattern analysis"""
|
151 |
+
text_lower = text.lower()
|
152 |
+
ai_phrase_count = sum(1 for category in self.ai_markers.values()
|
153 |
+
for phrase in category if phrase in text_lower)
|
154 |
+
|
155 |
+
unique_words = len(set(text.split()))
|
156 |
+
total_words = len(text.split())
|
157 |
+
|
158 |
+
return min((ai_phrase_count * 0.3) + (1 - (unique_words / total_words)) * 0.4, 1.0)
|
159 |
+
|
160 |
+
def create_gradio_interface():
|
161 |
+
transcriber = WebAITranscriber()
|
162 |
+
|
163 |
+
# Create the interface
|
164 |
+
with gr.Blocks(title="Real-time AI Speech Analyzer") as interface:
|
165 |
+
gr.Markdown("""
|
166 |
+
# Real-time AI Speech Analyzer
|
167 |
+
This app uses Faster Whisper for real-time speech recognition and AI detection.
|
168 |
+
""")
|
169 |
+
|
170 |
+
with gr.Tab("Real-time Analysis"):
|
171 |
+
with gr.Row():
|
172 |
+
with gr.Column():
|
173 |
+
audio_input = gr.Audio(
|
174 |
+
source="microphone",
|
175 |
+
streaming=True,
|
176 |
+
type="numpy",
|
177 |
+
label="🎤 Speak into your microphone"
|
178 |
+
)
|
179 |
+
gr.Markdown("""
|
180 |
+
### Tips for best performance:
|
181 |
+
- Speak clearly and at a moderate pace
|
182 |
+
- Minimize background noise
|
183 |
+
- Wait a few seconds for initial processing
|
184 |
+
""")
|
185 |
+
with gr.Column():
|
186 |
+
realtime_output = gr.Textbox(
|
187 |
+
label="Real-time Transcript and Analysis",
|
188 |
+
lines=15,
|
189 |
+
max_lines=30
|
190 |
+
)
|
191 |
+
|
192 |
+
audio_input.stream(
|
193 |
+
transcriber.process_realtime_audio,
|
194 |
+
inputs=[audio_input],
|
195 |
+
outputs=[realtime_output],
|
196 |
+
show_progress=False
|
197 |
+
)
|
198 |
+
|
199 |
+
gr.Markdown("""
|
200 |
+
### Technical Details:
|
201 |
+
- Using Faster Whisper for optimized speech recognition
|
202 |
+
- Real-time AI content analysis
|
203 |
+
- Automatic voice activity detection
|
204 |
+
- Optimized for low-latency processing
|
205 |
+
""")
|
206 |
+
|
207 |
+
return interface
|
208 |
+
|
209 |
+
# Launch the app
|
210 |
+
if __name__ == "__main__":
|
211 |
+
interface = create_gradio_interface()
|
212 |
+
interface.launch()
|