bpiyush commited on
Commit
15e05b1
Β·
verified Β·
1 Parent(s): a0be511

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +247 -0
app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ sys.path.append("../")
4
+
5
+ import gradio as gr
6
+ import torch
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+ plt.rcParams["font.family"] = "serif"
10
+ import decord
11
+ import PIL, PIL.Image
12
+ import librosa
13
+ from IPython.display import Markdown, display
14
+ import pandas as pd
15
+
16
+ from util import *
17
+
18
+
19
+ css = """
20
+ <style>
21
+ body {
22
+ font-family: 'Arial', serif;
23
+ margin: 0;
24
+ padding: 0;
25
+ color: black;
26
+ }
27
+ .header {
28
+ display: flex;
29
+ align-items: center;
30
+ justify-content: center;
31
+ margin-top: 5px;
32
+ color: black;
33
+ }
34
+ .footer {
35
+ display: flex;
36
+ align-items: center;
37
+ justify-content: center;
38
+ margin-top: 5px;
39
+ }
40
+ .image {
41
+ margin-right: 20px;
42
+ }
43
+ .content {
44
+ text-align: center;
45
+ color: black;
46
+ }
47
+ .title {
48
+ font-size: 2.5em;
49
+ font-weight: bold;
50
+ margin-bottom: 10px;
51
+ }
52
+ .authors {
53
+ color: #4a90e2;
54
+ font-size: 1.05em;
55
+ margin: 10px 0;
56
+ }
57
+ .affiliations {
58
+ font-size: 1.em;
59
+ margin-bottom: 20px;
60
+ }
61
+ .buttons {
62
+ display: flex;
63
+ justify-content: center;
64
+ gap: 10px;
65
+ }
66
+ .button {
67
+ background-color: #545758;
68
+ text-decoration: none;
69
+ padding: 8px 16px;
70
+ border-radius: 5px;
71
+ font-size: 1.05em;
72
+ }
73
+ .button:hover {
74
+ background-color: #333;
75
+ }
76
+ </style>
77
+ """
78
+
79
+
80
+ header = css + """
81
+ <div class="header">
82
+ <!-- <div class="image">
83
+ <img src="./media_assets/pouring-water-logo5.png" alt="logo" width="100">
84
+ </div> -->
85
+ <div class="content">
86
+ <img src="https://bpiyush.github.io/pouring-water-website/assets/pouring-water-logo5.png" alt="logo" width="80" style="margin-bottom: -50px; margin-right: 30px;">
87
+ <div class="title" style="font-size: 44px; margin-left: -30px;">The Sound of Water</div>
88
+ <div style="font-size: 30px; margin-left: -30px;"><b>Inferring Physical Properties from Pouring Liquids</b></div>
89
+ <div class="authors">
90
+ <a style="color: #92eaff; href="https://bpiyush.github.io/">Piyush Bagad</a><sup>1</sup>,
91
+ <a style="color: #92eaff; href="https://makarandtapaswi.github.io/">Makarand Tapaswi</a><sup>2</sup>,
92
+ <a style="color: #92eaff; href="https://www.ceessnoek.info/">Cees G. M. Snoek</a><sup>3</sup>,
93
+ <a style="color: #92eaff; href="https://www.robots.ox.ac.uk/~az/">Andrew Zisserman</a><sup>1</sup>,
94
+ </div>
95
+ <div class="affiliations">
96
+ <sup>1</sup>University of Oxford, <sup>2</sup>IIIT Hyderabad, <sup>3</sup>University of Amsterdam
97
+ </div>
98
+
99
+ <div class="buttons">
100
+ <a href="#" style="color: #92eaff;" class="button">arXiv</a>
101
+ <a href="https://bpiyush.github.io/pouring-water-website/" style="color: #92eaff;" class="button">🌐 Project</a>
102
+ <a href="https://github.com/bpiyush/SoundOfWater" style="color: #92eaff;" class="button"> <img src="https://bpiyush.github.io/pouring-water-website/assets/github-logo.png" alt="logo" style="height:16px; float: left;"> &nbsp;Code</a>
103
+ <a href="https://huggingface.co/datasets/bpiyush/sound-of-water" style="color: #92eaff;" class="button">πŸ€— Data</a>
104
+ <a href="https://huggingface.co/bpiyush/sound-of-water-models" style="color: #92eaff;" class="button">πŸ€— Models</a>
105
+ <a href="#" style="color: #92eaff;" class="button">🎯 Demo</a>
106
+ </div>
107
+ </div>
108
+ </div>
109
+ """
110
+
111
+ footer = css + """
112
+ <div class="header" style="justify-content: left;">
113
+ <div class="content" style="font-size: 16px;">
114
+ Please give us a 🌟 on <a href='https://github.com/bpiyush/SoundOfWater'>Github</a> if you like our work!
115
+ Tips to get better results:
116
+ <br><br>
117
+ <ol style="text-align: left; font-size: 14px; margin-left: 30px">
118
+ <li>Make sure there is not too much noise such that the pouring is audible.</li>
119
+ <li>Note that the video is not used during the inference. Only the audio must be clear enough.</li>
120
+ </ol>
121
+ </div>
122
+ </div>
123
+ """
124
+
125
+ # def process_input(video=None, youtube_link=None, start_time=None, end_time=None):
126
+ # if video:
127
+ # return f"Video file uploaded: {video.name}"
128
+ # elif youtube_link and start_time and end_time:
129
+ # return f"YouTube link: {youtube_link} (Start: {start_time}, End: {end_time})"
130
+ # else:
131
+ # return "Please upload a video or provide a YouTube link with start and end times."
132
+
133
+
134
+ def configure_input():
135
+ gr.Markdown(
136
+ "#### Either upload a video file or provide a YouTube link with start and end times."
137
+ )
138
+ video_input = gr.Video(label="Upload Video", height=480)
139
+ youtube_link_start = gr.Textbox(label="YouTube Link (Start time)")
140
+ youtube_link_end = gr.Textbox(label="YouTube Link (End time)")
141
+ return [video_input, youtube_link_start, youtube_link_end]
142
+
143
+
144
+ # Example usage in a Gradio interface
145
+ def process_input(video, youtube_link_start, youtube_link_end):
146
+ if video is not None:
147
+ print(video)
148
+
149
+ # Load model globally
150
+ model = load_model()
151
+
152
+ # The input is a video file path
153
+ video_path = video
154
+
155
+ # Load first frame
156
+ frame = load_frame(video_path)
157
+
158
+ # Load spectrogram
159
+ S = load_spectrogram(video_path)
160
+
161
+ # Load audio tensor
162
+ audio = load_audio_tensor(video_path)
163
+
164
+ # Get output
165
+ z_audio, y_audio = get_model_output(audio, model)
166
+
167
+ # Show image output
168
+ image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
169
+
170
+ return image, df_show, gr.Markdown(note), tsne_image
171
+
172
+ elif (youtube_link_start is not None) and (youtube_link_end is not None):
173
+ # Using the provided YouTube link
174
+ # Example: https://youtu.be/6-HVn8Jzzuk?t=10
175
+ start_link = f"Processing YouTube link: {youtube_link_start}"
176
+ end_link = f"Processing YouTube link: {youtube_link_end}"
177
+
178
+ # Get video ID
179
+ video_id = youtube_link_start.split("/")[-1].split("?")[0]
180
+ assert video_id == youtube_link_end.split("/")[-1].split("?")[0], "Video IDs do not match"
181
+ start_time = float(youtube_link_start.split("t=")[-1])
182
+ end_time = float(youtube_link_end.split("t=")[-1])
183
+
184
+ raise NotImplementedError("YouTube link processing is not implemented yet")
185
+ else:
186
+ return "No input provided"
187
+
188
+
189
+ def greet(name, is_morning, temperature):
190
+ salutation = "Good morning" if is_morning else "Good evening"
191
+ greeting = f"{salutation} {name}. It is {temperature} degrees today"
192
+ celsius = (temperature - 32) * 5 / 9
193
+ return greeting, round(celsius, 2)
194
+
195
+
196
+
197
+ note = """
198
+ **Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
199
+ Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
200
+
201
+ $$
202
+ H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
203
+ $$
204
+ """
205
+
206
+
207
+ def configure_outputs():
208
+ image_wide = gr.Image(label="Estimated pitch")
209
+ dataframe = gr.DataFrame(label="Estimated physical properties")
210
+ image_tsne = gr.Image(label="TSNE of features", width=300)
211
+ markdown = gr.Markdown(label="Note")
212
+ # ["image", "dataframe", "image", "markdown"]
213
+ return [image_wide, dataframe, markdown, image_tsne]
214
+
215
+
216
+ # Configure pre-defined examples
217
+ examples = [
218
+ ["./media_assets/example_video.mp4", None, None],
219
+ ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None, None],
220
+ ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None, None],
221
+ ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None, None],
222
+ ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None, None],
223
+ ]
224
+
225
+
226
+ # Define Gradio interface
227
+ with gr.Blocks(
228
+ css=custom_css,
229
+ theme=gr.themes.Default(),
230
+ ) as demo:
231
+
232
+ # Add the header
233
+ gr.HTML(header)
234
+
235
+ gr.Interface(
236
+ fn=process_input,
237
+ inputs=configure_input(),
238
+ outputs=configure_outputs(),
239
+ examples=examples,
240
+ )
241
+
242
+ # Add the footer
243
+ gr.HTML(footer)
244
+
245
+
246
+ # Launch the interface
247
+ demo.launch(allowed_paths=["."], share=True)