RealSanjay commited on
Commit
17dc2a5
·
verified ·
1 Parent(s): 7a046be

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +212 -0
app.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from faster_whisper import WhisperModel
3
+ import numpy as np
4
+ import os
5
+ import statistics
6
+ from transformers import pipeline
7
+ from textblob import TextBlob
8
+ import torch
9
+ import time
10
+
11
+ class WebAITranscriber:
12
+ def __init__(self):
13
+ # Check if CUDA is available
14
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ self.compute_type = "float16" if torch.cuda.is_available() else "int8"
16
+
17
+ # Initialize Whisper Model with optimized settings
18
+ print(f"Initializing Whisper Model on {self.device}...")
19
+ self.model = WhisperModel(
20
+ model_size="base", # Using base model for better speed/accuracy balance
21
+ device=self.device,
22
+ compute_type=self.compute_type,
23
+ cpu_threads=min(os.cpu_count(), 4), # Optimized thread count
24
+ download_root=None, # Use default cache directory
25
+ local_files_only=False # Allow downloading if needed
26
+ )
27
+
28
+ # Optimize model settings
29
+ self.model_settings = {
30
+ 'beam_size': 1, # Reduced beam size for speed
31
+ 'best_of': 1, # Take first result
32
+ 'temperature': 0, # Reduce randomness
33
+ 'compression_ratio_threshold': 2.4,
34
+ 'condition_on_previous_text': True,
35
+ 'no_speech_threshold': 0.6,
36
+ 'initial_prompt': None
37
+ }
38
+
39
+ # AI Detection Markers (optimized for speed)
40
+ self.ai_markers = {
41
+ 'formal_phrases': [
42
+ 'moreover', 'furthermore', 'consequently',
43
+ 'in conclusion', 'it is worth noting'
44
+ ],
45
+ 'ai_disclaimers': [
46
+ 'as an ai', 'i want to be clear',
47
+ 'it is important to note'
48
+ ]
49
+ }
50
+
51
+ # Initialize AI Detector with optimized settings
52
+ print("Initializing AI Detection...")
53
+ self.ai_detector = pipeline('text-classification',
54
+ model='roberta-base-openai-detector',
55
+ device=self.device)
56
+
57
+ # Optimized buffer settings
58
+ self.min_analysis_words = 10 # Minimum words before analysis
59
+ self.max_buffer_size = 1000 # Maximum buffer size in words
60
+ self.analysis_interval = 3 # Minimum seconds between analyses
61
+ self.last_analysis_time = time.time()
62
+ self.transcript_buffer = []
63
+
64
+ def process_realtime_audio(self, audio, state):
65
+ """Process real-time audio with optimized settings"""
66
+ if audio is None:
67
+ return "", state
68
+
69
+ try:
70
+ # Initialize state if needed
71
+ if state is None:
72
+ state = {"full_transcript": "", "buffer": [], "pending_analysis": False}
73
+
74
+ # Process audio in smaller chunks for real-time performance
75
+ segments, _ = self.model.transcribe(
76
+ audio[1],
77
+ language="en", # Specify language for faster processing
78
+ vad_filter=True, # Use Voice Activity Detection
79
+ **self.model_settings
80
+ )
81
+
82
+ # Process segments
83
+ current_transcript = ""
84
+ for segment in segments:
85
+ current_transcript += segment.text + " "
86
+
87
+ if not current_transcript.strip():
88
+ return state["full_transcript"], state
89
+
90
+ # Update state
91
+ state["full_transcript"] += " " + current_transcript
92
+ state["buffer"].append(current_transcript)
93
+
94
+ # Check if we should perform analysis
95
+ current_time = time.time()
96
+ buffer_text = " ".join(state["buffer"])
97
+ word_count = len(buffer_text.split())
98
+
99
+ if (word_count >= self.min_analysis_words and
100
+ (current_time - self.last_analysis_time) >= self.analysis_interval):
101
+
102
+ # Perform AI analysis
103
+ if len(buffer_text.strip()) > 0:
104
+ classification, probability, confidence = self.analyze_ai_content(buffer_text)
105
+ analysis_result = f"\n\n---AI Analysis---\nClassification: {classification}\nProbability: {probability:.2f}\nConfidence: {confidence}\n---\n"
106
+ state["full_transcript"] += analysis_result
107
+ state["buffer"] = []
108
+ self.last_analysis_time = current_time
109
+
110
+ # Trim buffer if it gets too large
111
+ if word_count > self.max_buffer_size:
112
+ words = state["full_transcript"].split()
113
+ state["full_transcript"] = " ".join(words[-self.max_buffer_size:])
114
+
115
+ return state["full_transcript"], state
116
+
117
+ except Exception as e:
118
+ return f"Error processing audio: {str(e)}", state
119
+
120
+ def analyze_ai_content(self, text):
121
+ """Optimized AI content analysis"""
122
+ if not text or len(text.split()) < self.min_analysis_words:
123
+ return "Insufficient text", 0.0, "None"
124
+
125
+ try:
126
+ # Parallel processing of different analysis methods
127
+ roberta_result = self.ai_detector(text[:512])[0] # Limit text length for speed
128
+ ai_prob = roberta_result['score']
129
+
130
+ # Quick linguistic analysis
131
+ linguistic_score = self.analyze_linguistic_patterns(text)
132
+
133
+ # Calculate final score
134
+ final_score = (ai_prob + linguistic_score) / 2
135
+
136
+ # Fast classification
137
+ if final_score > 0.7:
138
+ return "AI Generated", final_score, "High"
139
+ elif final_score > 0.5:
140
+ return "Likely AI", final_score, "Medium"
141
+ elif final_score > 0.3:
142
+ return "Possibly AI", final_score, "Low"
143
+ return "Likely Human", final_score, "High"
144
+
145
+ except Exception as e:
146
+ print(f"Analysis error: {e}")
147
+ return "Analysis Error", 0.0, "None"
148
+
149
+ def analyze_linguistic_patterns(self, text):
150
+ """Optimized linguistic pattern analysis"""
151
+ text_lower = text.lower()
152
+ ai_phrase_count = sum(1 for category in self.ai_markers.values()
153
+ for phrase in category if phrase in text_lower)
154
+
155
+ unique_words = len(set(text.split()))
156
+ total_words = len(text.split())
157
+
158
+ return min((ai_phrase_count * 0.3) + (1 - (unique_words / total_words)) * 0.4, 1.0)
159
+
160
+ def create_gradio_interface():
161
+ transcriber = WebAITranscriber()
162
+
163
+ # Create the interface
164
+ with gr.Blocks(title="Real-time AI Speech Analyzer") as interface:
165
+ gr.Markdown("""
166
+ # Real-time AI Speech Analyzer
167
+ This app uses Faster Whisper for real-time speech recognition and AI detection.
168
+ """)
169
+
170
+ with gr.Tab("Real-time Analysis"):
171
+ with gr.Row():
172
+ with gr.Column():
173
+ audio_input = gr.Audio(
174
+ source="microphone",
175
+ streaming=True,
176
+ type="numpy",
177
+ label="🎤 Speak into your microphone"
178
+ )
179
+ gr.Markdown("""
180
+ ### Tips for best performance:
181
+ - Speak clearly and at a moderate pace
182
+ - Minimize background noise
183
+ - Wait a few seconds for initial processing
184
+ """)
185
+ with gr.Column():
186
+ realtime_output = gr.Textbox(
187
+ label="Real-time Transcript and Analysis",
188
+ lines=15,
189
+ max_lines=30
190
+ )
191
+
192
+ audio_input.stream(
193
+ transcriber.process_realtime_audio,
194
+ inputs=[audio_input],
195
+ outputs=[realtime_output],
196
+ show_progress=False
197
+ )
198
+
199
+ gr.Markdown("""
200
+ ### Technical Details:
201
+ - Using Faster Whisper for optimized speech recognition
202
+ - Real-time AI content analysis
203
+ - Automatic voice activity detection
204
+ - Optimized for low-latency processing
205
+ """)
206
+
207
+ return interface
208
+
209
+ # Launch the app
210
+ if __name__ == "__main__":
211
+ interface = create_gradio_interface()
212
+ interface.launch()