Spaces:

RealSanjay
/

ai-speech-analyzer

Sleeping

App Files Files Community

ai-speech-analyzer / app.py

RealSanjay

Update app.py

9ef4d1e verified 19 days ago

raw

history blame contribute delete

8.26 kB

	import gradio as gr
	from faster_whisper import WhisperModel
	import numpy as np
	import os
	import statistics
	from transformers import pipeline
	from textblob import TextBlob
	import torch
	import time

	class WebAITranscriber:
	def __init__(self):
	# Check if CUDA is available
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.compute_type = "float16" if torch.cuda.is_available() else "int8"

	# Initialize Whisper Model with optimized settings
	print(f"Initializing Whisper Model on {self.device}...")
	self.model = WhisperModel(
	"base", # Fixed: Directly specify the model size as a string
	device=self.device,
	compute_type=self.compute_type,
	cpu_threads=min(os.cpu_count(), 4), # Optimized thread count
	download_root=None, # Use default cache directory
	local_files_only=False # Allow downloading if needed
	)

	# Optimize model settings
	self.model_settings = {
	'beam_size': 1, # Reduced beam size for speed
	'best_of': 1, # Take first result
	'temperature': 0, # Reduce randomness
	'compression_ratio_threshold': 2.4,
	'condition_on_previous_text': True,
	'no_speech_threshold': 0.6,
	'initial_prompt': None
	}

	# AI Detection Markers (optimized for speed)
	self.ai_markers = {
	'formal_phrases': [
	'moreover', 'furthermore', 'consequently',
	'in conclusion', 'it is worth noting'
	],
	'ai_disclaimers': [
	'as an ai', 'i want to be clear',
	'it is important to note'
	]
	}

	# Initialize AI Detector with optimized settings
	print("Initializing AI Detection...")
	self.ai_detector = pipeline('text-classification',
	model='roberta-base-openai-detector',
	device=self.device)

	# Optimized buffer settings
	self.min_analysis_words = 10 # Minimum words before analysis
	self.max_buffer_size = 1000 # Maximum buffer size in words
	self.analysis_interval = 3 # Minimum seconds between analyses
	self.last_analysis_time = time.time()
	self.transcript_buffer = []

	def process_realtime_audio(self, audio, state):
	"""Process real-time audio with optimized settings"""
	if audio is None:
	return "", state

	try:
	# Initialize state if needed
	if state is None:
	state = {"full_transcript": "", "buffer": [], "pending_analysis": False}

	# Process audio in smaller chunks for real-time performance
	segments, _ = self.model.transcribe(
	audio[1],
	language="en", # Specify language for faster processing
	vad_filter=True, # Use Voice Activity Detection
	**self.model_settings
	)

	# Process segments
	current_transcript = ""
	for segment in segments:
	current_transcript += segment.text + " "

	if not current_transcript.strip():
	return state["full_transcript"], state

	# Update state
	state["full_transcript"] += " " + current_transcript
	state["buffer"].append(current_transcript)

	# Check if we should perform analysis
	current_time = time.time()
	buffer_text = " ".join(state["buffer"])
	word_count = len(buffer_text.split())

	if (word_count >= self.min_analysis_words and
	(current_time - self.last_analysis_time) >= self.analysis_interval):

	# Perform AI analysis
	if len(buffer_text.strip()) > 0:
	classification, probability, confidence = self.analyze_ai_content(buffer_text)
	analysis_result = f"\n\n---AI Analysis---\nClassification: {classification}\nProbability: {probability:.2f}\nConfidence: {confidence}\n---\n"
	state["full_transcript"] += analysis_result
	state["buffer"] = []
	self.last_analysis_time = current_time

	# Trim buffer if it gets too large
	if word_count > self.max_buffer_size:
	words = state["full_transcript"].split()
	state["full_transcript"] = " ".join(words[-self.max_buffer_size:])

	return state["full_transcript"], state

	except Exception as e:
	return f"Error processing audio: {str(e)}", state

	def analyze_ai_content(self, text):
	"""Optimized AI content analysis"""
	if not text or len(text.split()) < self.min_analysis_words:
	return "Insufficient text", 0.0, "None"

	try:
	# Parallel processing of different analysis methods
	roberta_result = self.ai_detector(text[:512])[0] # Limit text length for speed
	ai_prob = roberta_result['score']

	# Quick linguistic analysis
	linguistic_score = self.analyze_linguistic_patterns(text)

	# Calculate final score
	final_score = (ai_prob + linguistic_score) / 2

	# Fast classification
	if final_score > 0.7:
	return "AI Generated", final_score, "High"
	elif final_score > 0.5:
	return "Likely AI", final_score, "Medium"
	elif final_score > 0.3:
	return "Possibly AI", final_score, "Low"
	return "Likely Human", final_score, "High"

	except Exception as e:
	print(f"Analysis error: {e}")
	return "Analysis Error", 0.0, "None"

	def analyze_linguistic_patterns(self, text):
	"""Optimized linguistic pattern analysis"""
	text_lower = text.lower()
	ai_phrase_count = sum(1 for category in self.ai_markers.values()
	for phrase in category if phrase in text_lower)

	unique_words = len(set(text.split()))
	total_words = len(text.split())

	return min((ai_phrase_count * 0.3) + (1 - (unique_words / total_words)) * 0.4, 1.0)

	def create_gradio_interface():
	transcriber = WebAITranscriber()

	# Create the interface
	with gr.Blocks(title="Real-time AI Speech Analyzer") as interface:
	gr.Markdown("""
	# Real-time AI Speech Analyzer
	This app uses Faster Whisper for real-time speech recognition and AI detection.
	""")

	with gr.Tab("Real-time Analysis"):
	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(
	sources=["microphone"], # Updated from 'source' to 'sources'
	streaming=True,
	type="numpy",
	label="🎤 Speak into your microphone"
	)
	gr.Markdown("""
	### Tips for best performance:
	- Speak clearly and at a moderate pace
	- Minimize background noise
	- Wait a few seconds for initial processing
	""")
	with gr.Column():
	realtime_output = gr.Textbox(
	label="Real-time Transcript and Analysis",
	lines=15,
	max_lines=30
	)

	audio_input.stream(
	transcriber.process_realtime_audio,
	inputs=[audio_input],
	outputs=[realtime_output],
	show_progress=False
	)

	gr.Markdown("""
	### Technical Details:
	- Using Faster Whisper for optimized speech recognition
	- Real-time AI content analysis
	- Automatic voice activity detection
	- Optimized for low-latency processing
	""")

	return interface

	# Launch the app
	if __name__ == "__main__":
	interface = create_gradio_interface()
	interface.launch()