get_rizzed

Running on CPU Upgrade

App Files Files Community

1littlecoder commited on Oct 31, 2024

Commit

0b47c5d

verified ·

1 Parent(s): 4b075c8

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -7

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import os
 import tempfile
 import shutil
 import google.generativeai as genai
 import gradio as gr
-import requests
-import numpy as np
 import subprocess
 import matplotlib.pyplot as plt
 from matplotlib.animation import FuncAnimation
@@ -18,6 +18,7 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 API_KEY = os.getenv('PLAY_API_KEY')
 USER_ID = os.getenv('PLAY_USER_ID')
 def upload_to_gemini(path, mime_type="image/jpeg"):
     file = genai.upload_file(path, mime_type=mime_type)
     return file
@@ -62,16 +63,97 @@ def text_to_speech(text):
     else:
         raise ValueError(f"Error: {response.status_code} - {response.text}")
-# Generate waveform and overlay with image
-def make_waveform_overlay(audio_path, image_path):
-    output_video_path = make_waveform(audio_path, bg_image=image_path, animate=True)
     return output_video_path
-# Full Gradio Functionality
 def process_image(image):
     roast_text = generate_roast(image)
     audio_path = text_to_speech(roast_text)
-    final_video_path = make_waveform_overlay(audio_path, image)
     return roast_text, final_video_path
 # Gradio Blocks UI

 import os
 import tempfile
 import shutil
+import numpy as np
+import requests
 import google.generativeai as genai
 import gradio as gr
 import subprocess
 import matplotlib.pyplot as plt
 from matplotlib.animation import FuncAnimation
 API_KEY = os.getenv('PLAY_API_KEY')
 USER_ID = os.getenv('PLAY_USER_ID')
+# Function to upload image to Gemini and get roasted text
 def upload_to_gemini(path, mime_type="image/jpeg"):
     file = genai.upload_file(path, mime_type=mime_type)
     return file
     else:
         raise ValueError(f"Error: {response.status_code} - {response.text}")
+# Generate waveform
+def make_waveform(
+    audio,
+    bg_color="#f3f4f6",
+    bg_image=None,
+    fg_alpha=0.75,
+    bars_color=("#fbbf24", "#ea580c"),
+    bar_count=50,
+    bar_width=0.6,
+    animate=False
+):
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from matplotlib.animation import FuncAnimation
+    import tempfile
+    import shutil
+    import PIL.Image
+    if isinstance(audio, str):
+        audio = processing_utils.audio_from_file(audio)
+    duration = round(len(audio[1]) / audio[0], 4)
+    samples = audio[1]
+    if len(samples.shape) > 1:
+        samples = np.mean(samples, 1)
+    bins_to_pad = bar_count - (len(samples) % bar_count)
+    samples = np.pad(samples, [(0, bins_to_pad)])
+    samples = np.reshape(samples, (bar_count, -1))
+    samples = np.abs(samples)
+    samples = np.max(samples, 1)
+    # Color gradient for bars
+    def hex_to_rgb(hex_str):
+        return [int(hex_str[i : i + 2], 16) for i in range(1, 6, 2)]
+    def get_color_gradient(c1, c2, n):
+        c1_rgb = np.array(hex_to_rgb(c1)) / 255
+        c2_rgb = np.array(hex_to_rgb(c2)) / 255
+        mix_pcts = [x / (n - 1) for x in range(n)]
+        rgb_colors = [((1 - mix) * c1_rgb + (mix * c2_rgb)) for mix in mix_pcts]
+        return [
+            "#" + "".join(f"{int(round(val * 255)):02x}" for val in item)
+            for item in rgb_colors
+        ]
+    color = (
+        bars_color
+        if isinstance(bars_color, str)
+        else get_color_gradient(bars_color[0], bars_color[1], bar_count)
+    )
+    fig, ax = plt.subplots(figsize=(5, 1), dpi=200, frameon=False)
+    fig.subplots_adjust(left=0, bottom=0, right=1, top=1)
+    plt.axis("off")
+    plt.margins(x=0)
+    barcollection = ax.bar(
+        np.arange(0, bar_count),
+        samples * 2,
+        bottom=(-1 * samples),
+        width=bar_width,
+        color=color,
+        alpha=fg_alpha,
+    )
+    # Temporary output file
+    tmp_img = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+    savefig_kwargs = {"facecolor": bg_color} if bg_image is None else {"transparent": True}
+    plt.savefig(tmp_img.name, **savefig_kwargs)
+    # Use ffmpeg to create video
+    output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+    ffmpeg_cmd = [
+        shutil.which("ffmpeg"),
+        "-loop", "1",
+        "-i", tmp_img.name,
+        "-i", audio,
+        "-c:v", "libx264",
+        "-c:a", "aac",
+        "-shortest",
+        "-y",
+        output_video_path,
+    ]
+    subprocess.run(ffmpeg_cmd, check=True)
     return output_video_path
+# Full Gradio Interface Function
 def process_image(image):
     roast_text = generate_roast(image)
     audio_path = text_to_speech(roast_text)
+    final_video_path = make_waveform(audio_path, bg_image=image, animate=True)
     return roast_text, final_video_path
 # Gradio Blocks UI