Spaces:
Sleeping
Sleeping
import gradio as gr | |
from faster_whisper import WhisperModel | |
import numpy as np | |
import os | |
import statistics | |
from transformers import pipeline | |
from textblob import TextBlob | |
import torch | |
import time | |
class WebAITranscriber: | |
def __init__(self): | |
# Check if CUDA is available | |
self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
self.compute_type = "float16" if torch.cuda.is_available() else "int8" | |
# Initialize Whisper Model with optimized settings | |
print(f"Initializing Whisper Model on {self.device}...") | |
self.model = WhisperModel( | |
"base", # Fixed: Directly specify the model size as a string | |
device=self.device, | |
compute_type=self.compute_type, | |
cpu_threads=min(os.cpu_count(), 4), # Optimized thread count | |
download_root=None, # Use default cache directory | |
local_files_only=False # Allow downloading if needed | |
) | |
# Optimize model settings | |
self.model_settings = { | |
'beam_size': 1, # Reduced beam size for speed | |
'best_of': 1, # Take first result | |
'temperature': 0, # Reduce randomness | |
'compression_ratio_threshold': 2.4, | |
'condition_on_previous_text': True, | |
'no_speech_threshold': 0.6, | |
'initial_prompt': None | |
} | |
# AI Detection Markers (optimized for speed) | |
self.ai_markers = { | |
'formal_phrases': [ | |
'moreover', 'furthermore', 'consequently', | |
'in conclusion', 'it is worth noting' | |
], | |
'ai_disclaimers': [ | |
'as an ai', 'i want to be clear', | |
'it is important to note' | |
] | |
} | |
# Initialize AI Detector with optimized settings | |
print("Initializing AI Detection...") | |
self.ai_detector = pipeline('text-classification', | |
model='roberta-base-openai-detector', | |
device=self.device) | |
# Optimized buffer settings | |
self.min_analysis_words = 10 # Minimum words before analysis | |
self.max_buffer_size = 1000 # Maximum buffer size in words | |
self.analysis_interval = 3 # Minimum seconds between analyses | |
self.last_analysis_time = time.time() | |
self.transcript_buffer = [] | |
def process_realtime_audio(self, audio, state): | |
"""Process real-time audio with optimized settings""" | |
if audio is None: | |
return "", state | |
try: | |
# Initialize state if needed | |
if state is None: | |
state = {"full_transcript": "", "buffer": [], "pending_analysis": False} | |
# Process audio in smaller chunks for real-time performance | |
segments, _ = self.model.transcribe( | |
audio[1], | |
language="en", # Specify language for faster processing | |
vad_filter=True, # Use Voice Activity Detection | |
**self.model_settings | |
) | |
# Process segments | |
current_transcript = "" | |
for segment in segments: | |
current_transcript += segment.text + " " | |
if not current_transcript.strip(): | |
return state["full_transcript"], state | |
# Update state | |
state["full_transcript"] += " " + current_transcript | |
state["buffer"].append(current_transcript) | |
# Check if we should perform analysis | |
current_time = time.time() | |
buffer_text = " ".join(state["buffer"]) | |
word_count = len(buffer_text.split()) | |
if (word_count >= self.min_analysis_words and | |
(current_time - self.last_analysis_time) >= self.analysis_interval): | |
# Perform AI analysis | |
if len(buffer_text.strip()) > 0: | |
classification, probability, confidence = self.analyze_ai_content(buffer_text) | |
analysis_result = f"\n\n---AI Analysis---\nClassification: {classification}\nProbability: {probability:.2f}\nConfidence: {confidence}\n---\n" | |
state["full_transcript"] += analysis_result | |
state["buffer"] = [] | |
self.last_analysis_time = current_time | |
# Trim buffer if it gets too large | |
if word_count > self.max_buffer_size: | |
words = state["full_transcript"].split() | |
state["full_transcript"] = " ".join(words[-self.max_buffer_size:]) | |
return state["full_transcript"], state | |
except Exception as e: | |
return f"Error processing audio: {str(e)}", state | |
def analyze_ai_content(self, text): | |
"""Optimized AI content analysis""" | |
if not text or len(text.split()) < self.min_analysis_words: | |
return "Insufficient text", 0.0, "None" | |
try: | |
# Parallel processing of different analysis methods | |
roberta_result = self.ai_detector(text[:512])[0] # Limit text length for speed | |
ai_prob = roberta_result['score'] | |
# Quick linguistic analysis | |
linguistic_score = self.analyze_linguistic_patterns(text) | |
# Calculate final score | |
final_score = (ai_prob + linguistic_score) / 2 | |
# Fast classification | |
if final_score > 0.7: | |
return "AI Generated", final_score, "High" | |
elif final_score > 0.5: | |
return "Likely AI", final_score, "Medium" | |
elif final_score > 0.3: | |
return "Possibly AI", final_score, "Low" | |
return "Likely Human", final_score, "High" | |
except Exception as e: | |
print(f"Analysis error: {e}") | |
return "Analysis Error", 0.0, "None" | |
def analyze_linguistic_patterns(self, text): | |
"""Optimized linguistic pattern analysis""" | |
text_lower = text.lower() | |
ai_phrase_count = sum(1 for category in self.ai_markers.values() | |
for phrase in category if phrase in text_lower) | |
unique_words = len(set(text.split())) | |
total_words = len(text.split()) | |
return min((ai_phrase_count * 0.3) + (1 - (unique_words / total_words)) * 0.4, 1.0) | |
def create_gradio_interface(): | |
transcriber = WebAITranscriber() | |
# Create the interface | |
with gr.Blocks(title="Real-time AI Speech Analyzer") as interface: | |
gr.Markdown(""" | |
# Real-time AI Speech Analyzer | |
This app uses Faster Whisper for real-time speech recognition and AI detection. | |
""") | |
with gr.Tab("Real-time Analysis"): | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio( | |
sources=["microphone"], # Updated from 'source' to 'sources' | |
streaming=True, | |
type="numpy", | |
label="🎤 Speak into your microphone" | |
) | |
gr.Markdown(""" | |
### Tips for best performance: | |
- Speak clearly and at a moderate pace | |
- Minimize background noise | |
- Wait a few seconds for initial processing | |
""") | |
with gr.Column(): | |
realtime_output = gr.Textbox( | |
label="Real-time Transcript and Analysis", | |
lines=15, | |
max_lines=30 | |
) | |
audio_input.stream( | |
transcriber.process_realtime_audio, | |
inputs=[audio_input], | |
outputs=[realtime_output], | |
show_progress=False | |
) | |
gr.Markdown(""" | |
### Technical Details: | |
- Using Faster Whisper for optimized speech recognition | |
- Real-time AI content analysis | |
- Automatic voice activity detection | |
- Optimized for low-latency processing | |
""") | |
return interface | |
# Launch the app | |
if __name__ == "__main__": | |
interface = create_gradio_interface() | |
interface.launch() |