Pradheep1647
commited on
Commit
·
cbf53ef
1
Parent(s):
9f703fc
updates the analyze video func
Browse files
app.py
CHANGED
@@ -11,12 +11,11 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
11 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
12 |
import cv2
|
13 |
|
14 |
-
YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY')
|
15 |
-
|
16 |
def download_youtube_video(video_url, api_key):
|
17 |
ydl_opts = {
|
18 |
'format': 'bestvideo+bestaudio',
|
19 |
'outtmpl': os.path.join('./', '%(title)s.%(ext)s'),
|
|
|
20 |
}
|
21 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
22 |
ydl.download([video_url])
|
@@ -91,11 +90,8 @@ emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
|
|
91 |
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
|
92 |
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
|
93 |
|
94 |
-
def analyze_video(video_url):
|
95 |
-
|
96 |
-
output_path = './'
|
97 |
-
|
98 |
-
video_path = download_youtube_video(video_url, YOUTUBE_API_KEY)
|
99 |
mp4_path = convert_to_mp4(video_path)
|
100 |
audio_path = extract_audio_from_video(mp4_path)
|
101 |
audio_wav_path = convert_mp3_to_wav(audio_path)
|
@@ -122,7 +118,7 @@ def analyze_video(video_url):
|
|
122 |
if frame_count_video % n_frame_interval == 0:
|
123 |
pixel_values_video = preprocess_frame(frame_video)
|
124 |
caption_video = generate_caption(pixel_values_video)
|
125 |
-
predicted_emotions_video
|
126 |
emotion_vectors_video.append(np.array(list(predicted_emotions_video.values())))
|
127 |
|
128 |
frame_count_video += 1
|
@@ -148,7 +144,7 @@ with gr.Blocks() as iface:
|
|
148 |
|
149 |
with gr.Row():
|
150 |
transcript_output = gr.Textbox(label="Transcript", interactive=False)
|
151 |
-
audio_emotion_output = gr.Textbox(label="Emotion from Audio", interactive=False)
|
152 |
visual_emotion_output = gr.Textbox(label="Emotion from Video", interactive=False)
|
153 |
|
154 |
submit_button.click(analyze_video, inputs=[video_url, api_key], outputs=[transcript_output, audio_emotion_output, visual_emotion_output])
|
|
|
11 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
12 |
import cv2
|
13 |
|
|
|
|
|
14 |
def download_youtube_video(video_url, api_key):
|
15 |
ydl_opts = {
|
16 |
'format': 'bestvideo+bestaudio',
|
17 |
'outtmpl': os.path.join('./', '%(title)s.%(ext)s'),
|
18 |
+
'quiet': True,
|
19 |
}
|
20 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
21 |
ydl.download([video_url])
|
|
|
90 |
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
|
91 |
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
|
92 |
|
93 |
+
def analyze_video(video_url, api_key):
|
94 |
+
video_path = download_youtube_video(video_url, api_key)
|
|
|
|
|
|
|
95 |
mp4_path = convert_to_mp4(video_path)
|
96 |
audio_path = extract_audio_from_video(mp4_path)
|
97 |
audio_wav_path = convert_mp3_to_wav(audio_path)
|
|
|
118 |
if frame_count_video % n_frame_interval == 0:
|
119 |
pixel_values_video = preprocess_frame(frame_video)
|
120 |
caption_video = generate_caption(pixel_values_video)
|
121 |
+
predicted_emotions_video = predict_emotions(caption_video)
|
122 |
emotion_vectors_video.append(np.array(list(predicted_emotions_video.values())))
|
123 |
|
124 |
frame_count_video += 1
|
|
|
144 |
|
145 |
with gr.Row():
|
146 |
transcript_output = gr.Textbox(label="Transcript", interactive=False)
|
147 |
+
audio_emotion_output = gr.Textbox(label="Emotion from Audio and Text", interactive=False)
|
148 |
visual_emotion_output = gr.Textbox(label="Emotion from Video", interactive=False)
|
149 |
|
150 |
submit_button.click(analyze_video, inputs=[video_url, api_key], outputs=[transcript_output, audio_emotion_output, visual_emotion_output])
|