Spaces:
PlayHT
/
Running on CPU Upgrade

1littlecoder commited on
Commit
0b47c5d
·
verified ·
1 Parent(s): 4b075c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -7
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import os
2
  import tempfile
3
  import shutil
 
 
4
  import google.generativeai as genai
5
  import gradio as gr
6
- import requests
7
- import numpy as np
8
  import subprocess
9
  import matplotlib.pyplot as plt
10
  from matplotlib.animation import FuncAnimation
@@ -18,6 +18,7 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
18
  API_KEY = os.getenv('PLAY_API_KEY')
19
  USER_ID = os.getenv('PLAY_USER_ID')
20
 
 
21
  def upload_to_gemini(path, mime_type="image/jpeg"):
22
  file = genai.upload_file(path, mime_type=mime_type)
23
  return file
@@ -62,16 +63,97 @@ def text_to_speech(text):
62
  else:
63
  raise ValueError(f"Error: {response.status_code} - {response.text}")
64
 
65
- # Generate waveform and overlay with image
66
- def make_waveform_overlay(audio_path, image_path):
67
- output_video_path = make_waveform(audio_path, bg_image=image_path, animate=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  return output_video_path
69
 
70
- # Full Gradio Functionality
71
  def process_image(image):
72
  roast_text = generate_roast(image)
73
  audio_path = text_to_speech(roast_text)
74
- final_video_path = make_waveform_overlay(audio_path, image)
75
  return roast_text, final_video_path
76
 
77
  # Gradio Blocks UI
 
1
  import os
2
  import tempfile
3
  import shutil
4
+ import numpy as np
5
+ import requests
6
  import google.generativeai as genai
7
  import gradio as gr
 
 
8
  import subprocess
9
  import matplotlib.pyplot as plt
10
  from matplotlib.animation import FuncAnimation
 
18
  API_KEY = os.getenv('PLAY_API_KEY')
19
  USER_ID = os.getenv('PLAY_USER_ID')
20
 
21
+ # Function to upload image to Gemini and get roasted text
22
  def upload_to_gemini(path, mime_type="image/jpeg"):
23
  file = genai.upload_file(path, mime_type=mime_type)
24
  return file
 
63
  else:
64
  raise ValueError(f"Error: {response.status_code} - {response.text}")
65
 
66
+ # Generate waveform
67
+ def make_waveform(
68
+ audio,
69
+ bg_color="#f3f4f6",
70
+ bg_image=None,
71
+ fg_alpha=0.75,
72
+ bars_color=("#fbbf24", "#ea580c"),
73
+ bar_count=50,
74
+ bar_width=0.6,
75
+ animate=False
76
+ ):
77
+ import numpy as np
78
+ import matplotlib.pyplot as plt
79
+ from matplotlib.animation import FuncAnimation
80
+ import tempfile
81
+ import shutil
82
+ import PIL.Image
83
+
84
+ if isinstance(audio, str):
85
+ audio = processing_utils.audio_from_file(audio)
86
+
87
+ duration = round(len(audio[1]) / audio[0], 4)
88
+ samples = audio[1]
89
+ if len(samples.shape) > 1:
90
+ samples = np.mean(samples, 1)
91
+ bins_to_pad = bar_count - (len(samples) % bar_count)
92
+ samples = np.pad(samples, [(0, bins_to_pad)])
93
+ samples = np.reshape(samples, (bar_count, -1))
94
+ samples = np.abs(samples)
95
+ samples = np.max(samples, 1)
96
+
97
+ # Color gradient for bars
98
+ def hex_to_rgb(hex_str):
99
+ return [int(hex_str[i : i + 2], 16) for i in range(1, 6, 2)]
100
+
101
+ def get_color_gradient(c1, c2, n):
102
+ c1_rgb = np.array(hex_to_rgb(c1)) / 255
103
+ c2_rgb = np.array(hex_to_rgb(c2)) / 255
104
+ mix_pcts = [x / (n - 1) for x in range(n)]
105
+ rgb_colors = [((1 - mix) * c1_rgb + (mix * c2_rgb)) for mix in mix_pcts]
106
+ return [
107
+ "#" + "".join(f"{int(round(val * 255)):02x}" for val in item)
108
+ for item in rgb_colors
109
+ ]
110
+
111
+ color = (
112
+ bars_color
113
+ if isinstance(bars_color, str)
114
+ else get_color_gradient(bars_color[0], bars_color[1], bar_count)
115
+ )
116
+
117
+ fig, ax = plt.subplots(figsize=(5, 1), dpi=200, frameon=False)
118
+ fig.subplots_adjust(left=0, bottom=0, right=1, top=1)
119
+ plt.axis("off")
120
+ plt.margins(x=0)
121
+
122
+ barcollection = ax.bar(
123
+ np.arange(0, bar_count),
124
+ samples * 2,
125
+ bottom=(-1 * samples),
126
+ width=bar_width,
127
+ color=color,
128
+ alpha=fg_alpha,
129
+ )
130
+
131
+ # Temporary output file
132
+ tmp_img = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
133
+ savefig_kwargs = {"facecolor": bg_color} if bg_image is None else {"transparent": True}
134
+ plt.savefig(tmp_img.name, **savefig_kwargs)
135
+
136
+ # Use ffmpeg to create video
137
+ output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
138
+ ffmpeg_cmd = [
139
+ shutil.which("ffmpeg"),
140
+ "-loop", "1",
141
+ "-i", tmp_img.name,
142
+ "-i", audio,
143
+ "-c:v", "libx264",
144
+ "-c:a", "aac",
145
+ "-shortest",
146
+ "-y",
147
+ output_video_path,
148
+ ]
149
+ subprocess.run(ffmpeg_cmd, check=True)
150
  return output_video_path
151
 
152
+ # Full Gradio Interface Function
153
  def process_image(image):
154
  roast_text = generate_roast(image)
155
  audio_path = text_to_speech(roast_text)
156
+ final_video_path = make_waveform(audio_path, bg_image=image, animate=True)
157
  return roast_text, final_video_path
158
 
159
  # Gradio Blocks UI