Spaces:

Abhilashvj
/

video-search

Runtime error

File size: 6,265 Bytes

f25ff37

import streamlit as st
import torch
from PIL import Image
import face_recognition
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import cv2
import numpy as np
import subprocess
import tempfile
import os
import yt_dlp
from moviepy.editor import VideoFileClip

# Helper functions
def get_video_id(url):
    return url.split("v=")[1].split("&")[0]

def download_youtube_video(url, output_path):
    ydl_opts = {
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
        'outtmpl': os.path.join(output_path, '%(id)s.%(ext)s'),
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        filename = ydl.prepare_filename(info)
    return filename

def process_video(video_url, output_dir, video_id):
    # Placeholder for video processing logic
    # This should include face detection, object detection, transcription, etc.
    # For now, we'll just download the video
    video_path = download_youtube_video(video_url, output_dir)
    
    # Extract frames (simplified version)
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frame_count / fps
    
    frames = []
    frame_times = []
    for i in range(0, frame_count, int(fps)):  # Extract one frame per second
        video.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = video.read()
        if ret:
            frames.append(frame)
            frame_times.append(i / fps)
    
    video.release()
    
    return {
        'video_path': video_path,
        'frames': frames,
        'frame_times': frame_times,
        'duration': duration,
        'fps': fps
    }

def search(query, index_path, metadata_path, model):
    # Placeholder for search functionality
    # This should use FAISS for efficient similarity search
    return [], []

# Load models
@st.cache_resource
def load_models():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    clip_model, preprocess = torch.hub.load('openai/CLIP', 'clip_vit_b32', device=device)
    text_model = SentenceTransformer("all-MiniLM-L6-v2").to(device)
    qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", device=0 if torch.cuda.is_available() else -1)
    return clip_model, preprocess, text_model, qa_model

clip_model, preprocess, text_model, qa_model = load_models()

# Streamlit UI
st.title("Enhanced YouTube Video Analysis")

video_url = st.text_input("Enter YouTube Video URL")
if st.button("Analyze"):
    with st.spinner("Processing video..."):
        video_id = get_video_id(video_url)
        results = process_video(video_url, "output_dir", video_id)
    
    if results:
        st.success("Video processed successfully!")
        
        # Text search and question answering
        st.subheader("Text Search and Q&A")
        query = st.text_input("Enter a search query or question")
        if query:
            # Placeholder for text search and QA
            st.write("Text search and QA functionality to be implemented")
        
        # Image upload and similarity search
        st.subheader("Image Search")
        uploaded_image = st.file_uploader("Upload an image to find similar frames", type=["jpg", "jpeg", "png"])
        if uploaded_image:
            # Placeholder for image search
            st.write("Image search functionality to be implemented")
        
        # Face upload and recognition
        st.subheader("Face Search")
        uploaded_face = st.file_uploader("Upload a face image to find appearances", type=["jpg", "jpeg", "png"])
        if uploaded_face:
            face_image = face_recognition.load_image_file(uploaded_face)
            face_encoding = face_recognition.face_encodings(face_image)[0]
            
            face_appearances = []
            face_frames = []
            
            for i, frame in enumerate(results['frames']):
                face_locations = face_recognition.face_locations(frame)
                face_encodings = face_recognition.face_encodings(frame, face_locations)
                
                for encoding in face_encodings:
                    if face_recognition.compare_faces([face_encoding], encoding)[0]:
                        face_appearances.append(results['frame_times'][i])
                        face_frames.append(frame)
            
            st.write(f"Face appearances found at {len(face_appearances)} timestamps.")
            
            if face_frames:
                # Create a temporary directory to store frames
                with tempfile.TemporaryDirectory() as temp_dir:
                    # Save frames as images
                    for i, frame in enumerate(face_frames):
                        cv2.imwrite(os.path.join(temp_dir, f"frame_{i:04d}.jpg"), frame)
                    
                    # Use FFmpeg to create a video from the frames
                    output_video = "face_appearances.mp4"
                    ffmpeg_command = [
                        "ffmpeg",
                        "-framerate", str(results['fps']),
                        "-i", os.path.join(temp_dir, "frame_%04d.jpg"),
                        "-c:v", "libx264",
                        "-pix_fmt", "yuv420p",
                        output_video
                    ]
                    subprocess.run(ffmpeg_command, check=True)
                
                # Display the generated video
                st.video(output_video)
                
                # Provide download link for the video
                with open(output_video, "rb") as file:
                    btn = st.download_button(
                        label="Download Face Appearances Video",
                        data=file,
                        file_name="face_appearances.mp4",
                        mime="video/mp4"
                    )
            else:
                st.write("No frames with the uploaded face were found in the video.")
        
        # Display original video
        st.subheader("Original Video")
        st.video(results['video_path'])

else:
    st.warning("Please enter a valid YouTube URL and click 'Analyze'")