Spaces:

RealSanjay
/

ai-speech-analyzer

Sleeping

App Files Files Community

RealSanjay commited on 30 days ago

Commit

17dc2a5

verified ·

1 Parent(s): 7a046be

Create app.py

Browse files

Files changed (1) hide show

app.py +212 -0

app.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import gradio as gr
+from faster_whisper import WhisperModel
+import numpy as np
+import os
+import statistics
+from transformers import pipeline
+from textblob import TextBlob
+import torch
+import time
+class WebAITranscriber:
+    def __init__(self):
+        # Check if CUDA is available
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.compute_type = "float16" if torch.cuda.is_available() else "int8"
+        # Initialize Whisper Model with optimized settings
+        print(f"Initializing Whisper Model on {self.device}...")
+        self.model = WhisperModel(
+            model_size="base",  # Using base model for better speed/accuracy balance
+            device=self.device,
+            compute_type=self.compute_type,
+            cpu_threads=min(os.cpu_count(), 4),  # Optimized thread count
+            download_root=None,  # Use default cache directory
+            local_files_only=False  # Allow downloading if needed
+        )
+        # Optimize model settings
+        self.model_settings = {
+            'beam_size': 1,  # Reduced beam size for speed
+            'best_of': 1,    # Take first result
+            'temperature': 0, # Reduce randomness
+            'compression_ratio_threshold': 2.4,
+            'condition_on_previous_text': True,
+            'no_speech_threshold': 0.6,
+            'initial_prompt': None
+        }
+        # AI Detection Markers (optimized for speed)
+        self.ai_markers = {
+            'formal_phrases': [
+                'moreover', 'furthermore', 'consequently',
+                'in conclusion', 'it is worth noting'
+            ],
+            'ai_disclaimers': [
+                'as an ai', 'i want to be clear',
+                'it is important to note'
+            ]
+        }
+        # Initialize AI Detector with optimized settings
+        print("Initializing AI Detection...")
+        self.ai_detector = pipeline('text-classification',
+                                  model='roberta-base-openai-detector',
+                                  device=self.device)
+        # Optimized buffer settings
+        self.min_analysis_words = 10  # Minimum words before analysis
+        self.max_buffer_size = 1000   # Maximum buffer size in words
+        self.analysis_interval = 3     # Minimum seconds between analyses
+        self.last_analysis_time = time.time()
+        self.transcript_buffer = []
+    def process_realtime_audio(self, audio, state):
+        """Process real-time audio with optimized settings"""
+        if audio is None:
+            return "", state
+        try:
+            # Initialize state if needed
+            if state is None:
+                state = {"full_transcript": "", "buffer": [], "pending_analysis": False}
+            # Process audio in smaller chunks for real-time performance
+            segments, _ = self.model.transcribe(
+                audio[1],
+                language="en",  # Specify language for faster processing
+                vad_filter=True,  # Use Voice Activity Detection
+                **self.model_settings
+            )
+            # Process segments
+            current_transcript = ""
+            for segment in segments:
+                current_transcript += segment.text + " "
+            if not current_transcript.strip():
+                return state["full_transcript"], state
+            # Update state
+            state["full_transcript"] += " " + current_transcript
+            state["buffer"].append(current_transcript)
+            # Check if we should perform analysis
+            current_time = time.time()
+            buffer_text = " ".join(state["buffer"])
+            word_count = len(buffer_text.split())
+            if (word_count >= self.min_analysis_words and
+                (current_time - self.last_analysis_time) >= self.analysis_interval):
+                # Perform AI analysis
+                if len(buffer_text.strip()) > 0:
+                    classification, probability, confidence = self.analyze_ai_content(buffer_text)
+                    analysis_result = f"\n\n---AI Analysis---\nClassification: {classification}\nProbability: {probability:.2f}\nConfidence: {confidence}\n---\n"
+                    state["full_transcript"] += analysis_result
+                    state["buffer"] = []
+                    self.last_analysis_time = current_time
+                # Trim buffer if it gets too large
+                if word_count > self.max_buffer_size:
+                    words = state["full_transcript"].split()
+                    state["full_transcript"] = " ".join(words[-self.max_buffer_size:])
+            return state["full_transcript"], state
+        except Exception as e:
+            return f"Error processing audio: {str(e)}", state
+    def analyze_ai_content(self, text):
+        """Optimized AI content analysis"""
+        if not text or len(text.split()) < self.min_analysis_words:
+            return "Insufficient text", 0.0, "None"
+        try:
+            # Parallel processing of different analysis methods
+            roberta_result = self.ai_detector(text[:512])[0]  # Limit text length for speed
+            ai_prob = roberta_result['score']
+            # Quick linguistic analysis
+            linguistic_score = self.analyze_linguistic_patterns(text)
+            # Calculate final score
+            final_score = (ai_prob + linguistic_score) / 2
+            # Fast classification
+            if final_score > 0.7:
+                return "AI Generated", final_score, "High"
+            elif final_score > 0.5:
+                return "Likely AI", final_score, "Medium"
+            elif final_score > 0.3:
+                return "Possibly AI", final_score, "Low"
+            return "Likely Human", final_score, "High"
+        except Exception as e:
+            print(f"Analysis error: {e}")
+            return "Analysis Error", 0.0, "None"
+    def analyze_linguistic_patterns(self, text):
+        """Optimized linguistic pattern analysis"""
+        text_lower = text.lower()
+        ai_phrase_count = sum(1 for category in self.ai_markers.values()
+                            for phrase in category if phrase in text_lower)
+        unique_words = len(set(text.split()))
+        total_words = len(text.split())
+        return min((ai_phrase_count * 0.3) + (1 - (unique_words / total_words)) * 0.4, 1.0)
+def create_gradio_interface():
+    transcriber = WebAITranscriber()
+    # Create the interface
+    with gr.Blocks(title="Real-time AI Speech Analyzer") as interface:
+        gr.Markdown("""
+        # Real-time AI Speech Analyzer
+        This app uses Faster Whisper for real-time speech recognition and AI detection.
+        """)
+        with gr.Tab("Real-time Analysis"):
+            with gr.Row():
+                with gr.Column():
+                    audio_input = gr.Audio(
+                        source="microphone",
+                        streaming=True,
+                        type="numpy",
+                        label="🎤 Speak into your microphone"
+                    )
+                    gr.Markdown("""
+                    ### Tips for best performance:
+                    - Speak clearly and at a moderate pace
+                    - Minimize background noise
+                    - Wait a few seconds for initial processing
+                    """)
+                with gr.Column():
+                    realtime_output = gr.Textbox(
+                        label="Real-time Transcript and Analysis",
+                        lines=15,
+                        max_lines=30
+                    )
+            audio_input.stream(
+                transcriber.process_realtime_audio,
+                inputs=[audio_input],
+                outputs=[realtime_output],
+                show_progress=False
+            )
+        gr.Markdown("""
+        ### Technical Details:
+        - Using Faster Whisper for optimized speech recognition
+        - Real-time AI content analysis
+        - Automatic voice activity detection
+        - Optimized for low-latency processing
+        """)
+    return interface
+# Launch the app
+if __name__ == "__main__":
+    interface = create_gradio_interface()
+    interface.launch()