Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
sys.path.append("../")
|
4 |
+
|
5 |
+
import gradio as gr
|
6 |
+
import torch
|
7 |
+
import numpy as np
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
plt.rcParams["font.family"] = "serif"
|
10 |
+
import decord
|
11 |
+
import PIL, PIL.Image
|
12 |
+
import librosa
|
13 |
+
from IPython.display import Markdown, display
|
14 |
+
import pandas as pd
|
15 |
+
|
16 |
+
from util import *
|
17 |
+
|
18 |
+
|
19 |
+
css = """
|
20 |
+
<style>
|
21 |
+
body {
|
22 |
+
font-family: 'Arial', serif;
|
23 |
+
margin: 0;
|
24 |
+
padding: 0;
|
25 |
+
color: black;
|
26 |
+
}
|
27 |
+
.header {
|
28 |
+
display: flex;
|
29 |
+
align-items: center;
|
30 |
+
justify-content: center;
|
31 |
+
margin-top: 5px;
|
32 |
+
color: black;
|
33 |
+
}
|
34 |
+
.footer {
|
35 |
+
display: flex;
|
36 |
+
align-items: center;
|
37 |
+
justify-content: center;
|
38 |
+
margin-top: 5px;
|
39 |
+
}
|
40 |
+
.image {
|
41 |
+
margin-right: 20px;
|
42 |
+
}
|
43 |
+
.content {
|
44 |
+
text-align: center;
|
45 |
+
color: black;
|
46 |
+
}
|
47 |
+
.title {
|
48 |
+
font-size: 2.5em;
|
49 |
+
font-weight: bold;
|
50 |
+
margin-bottom: 10px;
|
51 |
+
}
|
52 |
+
.authors {
|
53 |
+
color: #4a90e2;
|
54 |
+
font-size: 1.05em;
|
55 |
+
margin: 10px 0;
|
56 |
+
}
|
57 |
+
.affiliations {
|
58 |
+
font-size: 1.em;
|
59 |
+
margin-bottom: 20px;
|
60 |
+
}
|
61 |
+
.buttons {
|
62 |
+
display: flex;
|
63 |
+
justify-content: center;
|
64 |
+
gap: 10px;
|
65 |
+
}
|
66 |
+
.button {
|
67 |
+
background-color: #545758;
|
68 |
+
text-decoration: none;
|
69 |
+
padding: 8px 16px;
|
70 |
+
border-radius: 5px;
|
71 |
+
font-size: 1.05em;
|
72 |
+
}
|
73 |
+
.button:hover {
|
74 |
+
background-color: #333;
|
75 |
+
}
|
76 |
+
</style>
|
77 |
+
"""
|
78 |
+
|
79 |
+
|
80 |
+
header = css + """
|
81 |
+
<div class="header">
|
82 |
+
<!-- <div class="image">
|
83 |
+
<img src="./media_assets/pouring-water-logo5.png" alt="logo" width="100">
|
84 |
+
</div> -->
|
85 |
+
<div class="content">
|
86 |
+
<img src="https://bpiyush.github.io/pouring-water-website/assets/pouring-water-logo5.png" alt="logo" width="80" style="margin-bottom: -50px; margin-right: 30px;">
|
87 |
+
<div class="title" style="font-size: 44px; margin-left: -30px;">The Sound of Water</div>
|
88 |
+
<div style="font-size: 30px; margin-left: -30px;"><b>Inferring Physical Properties from Pouring Liquids</b></div>
|
89 |
+
<div class="authors">
|
90 |
+
<a style="color: #92eaff; href="https://bpiyush.github.io/">Piyush Bagad</a><sup>1</sup>,
|
91 |
+
<a style="color: #92eaff; href="https://makarandtapaswi.github.io/">Makarand Tapaswi</a><sup>2</sup>,
|
92 |
+
<a style="color: #92eaff; href="https://www.ceessnoek.info/">Cees G. M. Snoek</a><sup>3</sup>,
|
93 |
+
<a style="color: #92eaff; href="https://www.robots.ox.ac.uk/~az/">Andrew Zisserman</a><sup>1</sup>,
|
94 |
+
</div>
|
95 |
+
<div class="affiliations">
|
96 |
+
<sup>1</sup>University of Oxford, <sup>2</sup>IIIT Hyderabad, <sup>3</sup>University of Amsterdam
|
97 |
+
</div>
|
98 |
+
|
99 |
+
<div class="buttons">
|
100 |
+
<a href="#" style="color: #92eaff;" class="button">arXiv</a>
|
101 |
+
<a href="https://bpiyush.github.io/pouring-water-website/" style="color: #92eaff;" class="button">π Project</a>
|
102 |
+
<a href="https://github.com/bpiyush/SoundOfWater" style="color: #92eaff;" class="button"> <img src="https://bpiyush.github.io/pouring-water-website/assets/github-logo.png" alt="logo" style="height:16px; float: left;"> Code</a>
|
103 |
+
<a href="https://huggingface.co/datasets/bpiyush/sound-of-water" style="color: #92eaff;" class="button">π€ Data</a>
|
104 |
+
<a href="https://huggingface.co/bpiyush/sound-of-water-models" style="color: #92eaff;" class="button">π€ Models</a>
|
105 |
+
<a href="#" style="color: #92eaff;" class="button">π― Demo</a>
|
106 |
+
</div>
|
107 |
+
</div>
|
108 |
+
</div>
|
109 |
+
"""
|
110 |
+
|
111 |
+
footer = css + """
|
112 |
+
<div class="header" style="justify-content: left;">
|
113 |
+
<div class="content" style="font-size: 16px;">
|
114 |
+
Please give us a π on <a href='https://github.com/bpiyush/SoundOfWater'>Github</a> if you like our work!
|
115 |
+
Tips to get better results:
|
116 |
+
<br><br>
|
117 |
+
<ol style="text-align: left; font-size: 14px; margin-left: 30px">
|
118 |
+
<li>Make sure there is not too much noise such that the pouring is audible.</li>
|
119 |
+
<li>Note that the video is not used during the inference. Only the audio must be clear enough.</li>
|
120 |
+
</ol>
|
121 |
+
</div>
|
122 |
+
</div>
|
123 |
+
"""
|
124 |
+
|
125 |
+
# def process_input(video=None, youtube_link=None, start_time=None, end_time=None):
|
126 |
+
# if video:
|
127 |
+
# return f"Video file uploaded: {video.name}"
|
128 |
+
# elif youtube_link and start_time and end_time:
|
129 |
+
# return f"YouTube link: {youtube_link} (Start: {start_time}, End: {end_time})"
|
130 |
+
# else:
|
131 |
+
# return "Please upload a video or provide a YouTube link with start and end times."
|
132 |
+
|
133 |
+
|
134 |
+
def configure_input():
|
135 |
+
gr.Markdown(
|
136 |
+
"#### Either upload a video file or provide a YouTube link with start and end times."
|
137 |
+
)
|
138 |
+
video_input = gr.Video(label="Upload Video", height=480)
|
139 |
+
youtube_link_start = gr.Textbox(label="YouTube Link (Start time)")
|
140 |
+
youtube_link_end = gr.Textbox(label="YouTube Link (End time)")
|
141 |
+
return [video_input, youtube_link_start, youtube_link_end]
|
142 |
+
|
143 |
+
|
144 |
+
# Example usage in a Gradio interface
|
145 |
+
def process_input(video, youtube_link_start, youtube_link_end):
|
146 |
+
if video is not None:
|
147 |
+
print(video)
|
148 |
+
|
149 |
+
# Load model globally
|
150 |
+
model = load_model()
|
151 |
+
|
152 |
+
# The input is a video file path
|
153 |
+
video_path = video
|
154 |
+
|
155 |
+
# Load first frame
|
156 |
+
frame = load_frame(video_path)
|
157 |
+
|
158 |
+
# Load spectrogram
|
159 |
+
S = load_spectrogram(video_path)
|
160 |
+
|
161 |
+
# Load audio tensor
|
162 |
+
audio = load_audio_tensor(video_path)
|
163 |
+
|
164 |
+
# Get output
|
165 |
+
z_audio, y_audio = get_model_output(audio, model)
|
166 |
+
|
167 |
+
# Show image output
|
168 |
+
image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
|
169 |
+
|
170 |
+
return image, df_show, gr.Markdown(note), tsne_image
|
171 |
+
|
172 |
+
elif (youtube_link_start is not None) and (youtube_link_end is not None):
|
173 |
+
# Using the provided YouTube link
|
174 |
+
# Example: https://youtu.be/6-HVn8Jzzuk?t=10
|
175 |
+
start_link = f"Processing YouTube link: {youtube_link_start}"
|
176 |
+
end_link = f"Processing YouTube link: {youtube_link_end}"
|
177 |
+
|
178 |
+
# Get video ID
|
179 |
+
video_id = youtube_link_start.split("/")[-1].split("?")[0]
|
180 |
+
assert video_id == youtube_link_end.split("/")[-1].split("?")[0], "Video IDs do not match"
|
181 |
+
start_time = float(youtube_link_start.split("t=")[-1])
|
182 |
+
end_time = float(youtube_link_end.split("t=")[-1])
|
183 |
+
|
184 |
+
raise NotImplementedError("YouTube link processing is not implemented yet")
|
185 |
+
else:
|
186 |
+
return "No input provided"
|
187 |
+
|
188 |
+
|
189 |
+
def greet(name, is_morning, temperature):
|
190 |
+
salutation = "Good morning" if is_morning else "Good evening"
|
191 |
+
greeting = f"{salutation} {name}. It is {temperature} degrees today"
|
192 |
+
celsius = (temperature - 32) * 5 / 9
|
193 |
+
return greeting, round(celsius, 2)
|
194 |
+
|
195 |
+
|
196 |
+
|
197 |
+
note = """
|
198 |
+
**Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
|
199 |
+
Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
|
200 |
+
|
201 |
+
$$
|
202 |
+
H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
|
203 |
+
$$
|
204 |
+
"""
|
205 |
+
|
206 |
+
|
207 |
+
def configure_outputs():
|
208 |
+
image_wide = gr.Image(label="Estimated pitch")
|
209 |
+
dataframe = gr.DataFrame(label="Estimated physical properties")
|
210 |
+
image_tsne = gr.Image(label="TSNE of features", width=300)
|
211 |
+
markdown = gr.Markdown(label="Note")
|
212 |
+
# ["image", "dataframe", "image", "markdown"]
|
213 |
+
return [image_wide, dataframe, markdown, image_tsne]
|
214 |
+
|
215 |
+
|
216 |
+
# Configure pre-defined examples
|
217 |
+
examples = [
|
218 |
+
["./media_assets/example_video.mp4", None, None],
|
219 |
+
["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None, None],
|
220 |
+
["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None, None],
|
221 |
+
["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None, None],
|
222 |
+
["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None, None],
|
223 |
+
]
|
224 |
+
|
225 |
+
|
226 |
+
# Define Gradio interface
|
227 |
+
with gr.Blocks(
|
228 |
+
css=custom_css,
|
229 |
+
theme=gr.themes.Default(),
|
230 |
+
) as demo:
|
231 |
+
|
232 |
+
# Add the header
|
233 |
+
gr.HTML(header)
|
234 |
+
|
235 |
+
gr.Interface(
|
236 |
+
fn=process_input,
|
237 |
+
inputs=configure_input(),
|
238 |
+
outputs=configure_outputs(),
|
239 |
+
examples=examples,
|
240 |
+
)
|
241 |
+
|
242 |
+
# Add the footer
|
243 |
+
gr.HTML(footer)
|
244 |
+
|
245 |
+
|
246 |
+
# Launch the interface
|
247 |
+
demo.launch(allowed_paths=["."], share=True)
|