Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
1littlecoder
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import os
|
2 |
import tempfile
|
3 |
import shutil
|
|
|
|
|
4 |
import google.generativeai as genai
|
5 |
import gradio as gr
|
6 |
-
import requests
|
7 |
-
import numpy as np
|
8 |
import subprocess
|
9 |
import matplotlib.pyplot as plt
|
10 |
from matplotlib.animation import FuncAnimation
|
@@ -18,6 +18,7 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
|
18 |
API_KEY = os.getenv('PLAY_API_KEY')
|
19 |
USER_ID = os.getenv('PLAY_USER_ID')
|
20 |
|
|
|
21 |
def upload_to_gemini(path, mime_type="image/jpeg"):
|
22 |
file = genai.upload_file(path, mime_type=mime_type)
|
23 |
return file
|
@@ -62,16 +63,97 @@ def text_to_speech(text):
|
|
62 |
else:
|
63 |
raise ValueError(f"Error: {response.status_code} - {response.text}")
|
64 |
|
65 |
-
# Generate waveform
|
66 |
-
def
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
return output_video_path
|
69 |
|
70 |
-
# Full Gradio
|
71 |
def process_image(image):
|
72 |
roast_text = generate_roast(image)
|
73 |
audio_path = text_to_speech(roast_text)
|
74 |
-
final_video_path =
|
75 |
return roast_text, final_video_path
|
76 |
|
77 |
# Gradio Blocks UI
|
|
|
1 |
import os
|
2 |
import tempfile
|
3 |
import shutil
|
4 |
+
import numpy as np
|
5 |
+
import requests
|
6 |
import google.generativeai as genai
|
7 |
import gradio as gr
|
|
|
|
|
8 |
import subprocess
|
9 |
import matplotlib.pyplot as plt
|
10 |
from matplotlib.animation import FuncAnimation
|
|
|
18 |
API_KEY = os.getenv('PLAY_API_KEY')
|
19 |
USER_ID = os.getenv('PLAY_USER_ID')
|
20 |
|
21 |
+
# Function to upload image to Gemini and get roasted text
|
22 |
def upload_to_gemini(path, mime_type="image/jpeg"):
|
23 |
file = genai.upload_file(path, mime_type=mime_type)
|
24 |
return file
|
|
|
63 |
else:
|
64 |
raise ValueError(f"Error: {response.status_code} - {response.text}")
|
65 |
|
66 |
+
# Generate waveform
|
67 |
+
def make_waveform(
|
68 |
+
audio,
|
69 |
+
bg_color="#f3f4f6",
|
70 |
+
bg_image=None,
|
71 |
+
fg_alpha=0.75,
|
72 |
+
bars_color=("#fbbf24", "#ea580c"),
|
73 |
+
bar_count=50,
|
74 |
+
bar_width=0.6,
|
75 |
+
animate=False
|
76 |
+
):
|
77 |
+
import numpy as np
|
78 |
+
import matplotlib.pyplot as plt
|
79 |
+
from matplotlib.animation import FuncAnimation
|
80 |
+
import tempfile
|
81 |
+
import shutil
|
82 |
+
import PIL.Image
|
83 |
+
|
84 |
+
if isinstance(audio, str):
|
85 |
+
audio = processing_utils.audio_from_file(audio)
|
86 |
+
|
87 |
+
duration = round(len(audio[1]) / audio[0], 4)
|
88 |
+
samples = audio[1]
|
89 |
+
if len(samples.shape) > 1:
|
90 |
+
samples = np.mean(samples, 1)
|
91 |
+
bins_to_pad = bar_count - (len(samples) % bar_count)
|
92 |
+
samples = np.pad(samples, [(0, bins_to_pad)])
|
93 |
+
samples = np.reshape(samples, (bar_count, -1))
|
94 |
+
samples = np.abs(samples)
|
95 |
+
samples = np.max(samples, 1)
|
96 |
+
|
97 |
+
# Color gradient for bars
|
98 |
+
def hex_to_rgb(hex_str):
|
99 |
+
return [int(hex_str[i : i + 2], 16) for i in range(1, 6, 2)]
|
100 |
+
|
101 |
+
def get_color_gradient(c1, c2, n):
|
102 |
+
c1_rgb = np.array(hex_to_rgb(c1)) / 255
|
103 |
+
c2_rgb = np.array(hex_to_rgb(c2)) / 255
|
104 |
+
mix_pcts = [x / (n - 1) for x in range(n)]
|
105 |
+
rgb_colors = [((1 - mix) * c1_rgb + (mix * c2_rgb)) for mix in mix_pcts]
|
106 |
+
return [
|
107 |
+
"#" + "".join(f"{int(round(val * 255)):02x}" for val in item)
|
108 |
+
for item in rgb_colors
|
109 |
+
]
|
110 |
+
|
111 |
+
color = (
|
112 |
+
bars_color
|
113 |
+
if isinstance(bars_color, str)
|
114 |
+
else get_color_gradient(bars_color[0], bars_color[1], bar_count)
|
115 |
+
)
|
116 |
+
|
117 |
+
fig, ax = plt.subplots(figsize=(5, 1), dpi=200, frameon=False)
|
118 |
+
fig.subplots_adjust(left=0, bottom=0, right=1, top=1)
|
119 |
+
plt.axis("off")
|
120 |
+
plt.margins(x=0)
|
121 |
+
|
122 |
+
barcollection = ax.bar(
|
123 |
+
np.arange(0, bar_count),
|
124 |
+
samples * 2,
|
125 |
+
bottom=(-1 * samples),
|
126 |
+
width=bar_width,
|
127 |
+
color=color,
|
128 |
+
alpha=fg_alpha,
|
129 |
+
)
|
130 |
+
|
131 |
+
# Temporary output file
|
132 |
+
tmp_img = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
|
133 |
+
savefig_kwargs = {"facecolor": bg_color} if bg_image is None else {"transparent": True}
|
134 |
+
plt.savefig(tmp_img.name, **savefig_kwargs)
|
135 |
+
|
136 |
+
# Use ffmpeg to create video
|
137 |
+
output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
|
138 |
+
ffmpeg_cmd = [
|
139 |
+
shutil.which("ffmpeg"),
|
140 |
+
"-loop", "1",
|
141 |
+
"-i", tmp_img.name,
|
142 |
+
"-i", audio,
|
143 |
+
"-c:v", "libx264",
|
144 |
+
"-c:a", "aac",
|
145 |
+
"-shortest",
|
146 |
+
"-y",
|
147 |
+
output_video_path,
|
148 |
+
]
|
149 |
+
subprocess.run(ffmpeg_cmd, check=True)
|
150 |
return output_video_path
|
151 |
|
152 |
+
# Full Gradio Interface Function
|
153 |
def process_image(image):
|
154 |
roast_text = generate_roast(image)
|
155 |
audio_path = text_to_speech(roast_text)
|
156 |
+
final_video_path = make_waveform(audio_path, bg_image=image, animate=True)
|
157 |
return roast_text, final_video_path
|
158 |
|
159 |
# Gradio Blocks UI
|