Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 6,812 Bytes
69617e8 1101b16 0ed5bd6 d4daed1 eeaae00 1101b16 69617e8 b326bed 1101b16 346474a 44b7d9c 0ed5bd6 0b47c5d 69617e8 44b7d9c 95c1e26 11ed7b7 95c1e26 44b7d9c b7ee106 44b7d9c b7ee106 69617e8 0ed5bd6 99f3aa9 44b7d9c 53296c8 44b7d9c 53296c8 c8555c6 adf778f 53296c8 710b0e6 44b7d9c 422b172 44b7d9c 0b47c5d d4daed1 44b7d9c d4daed1 44b7d9c d4daed1 b6a6b23 3d2d856 d4daed1 eeaae00 d4daed1 eeaae00 d4daed1 44b7d9c d652179 404f122 d4daed1 404f122 0ed5bd6 d652179 b7ee106 404f122 69617e8 44b7d9c d92281e 404f122 44b7d9c 404f122 b7ee106 404f122 1101b16 1f67cec cfe99f8 1f67cec df76205 69617e8 d652179 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import os
import google.generativeai as genai
import gradio as gr
import requests
from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip
from PIL import Image
# Configure Google Gemini API
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
# Play.ht API keys
API_KEY = os.getenv('PLAY_API_KEY')
USER_ID = os.getenv('PLAY_USER_ID')
# Ensure compatibility with updated PIL library
if not hasattr(Image, 'ANTIALIAS'): # Image.ANTIALIAS is deprecated; LANCZOS is the replacement
Image.ANTIALIAS = Image.LANCZOS
# Theme selection
theme = gr.themes.Base(
primary_hue="emerald",
)
# Function to upload image to Gemini and get roasted text
def upload_to_gemini(path, mime_type="image/jpeg"):
file = genai.upload_file(path, mime_type=mime_type)
return file
def generate_roast(image_path):
try:
uploaded_file = upload_to_gemini(image_path)
generation_config = {
"temperature": 1,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}
model = genai.GenerativeModel(
model_name="gemini-1.5-flash-002",
generation_config=generation_config,
system_instruction = """
You are an AI assistant tasked with creating a flirtatious and humorist conversation between two female hosts reviewing the uploaded picture.
The conversation should feature the two hosts discussing the topic in a natural, conversational manner, with frequent backchanneling and interruptions to make it sound authentic.
Keep the conversation between 100 to 150 words. Please abide by these guidelines:
1. Begin conversation turns with the prefix 'Host 1:' and 'Host 2:'
For example, Host 1: Aren't they cute? Host 2: I'm feeling faint—someone call a doctor! Host 1: Me too, maybe a whole ambulance!
2. Use humor, irony, and sarcasm to compliment and entertain the person depicted in the image based on their appearance.
3. Your output should be a well-written text suitable for reading aloud. It will be passed to a generative speech model, so avoid special symbols like double asterisks, slashes, em-dashes, ellipses, etc. Also, avoid output that isn't dialogue.
4. Conversation turns should be concise and on-topic.
5. Ensure a natural flow of conversation, with hosts engaging with each other's ideas and bringing their own perspectives.
6. Include speech disfluencies and interruptions to make it sound authentic.
7. Incorporate frequent backchanneling throughout the conversation. For example:
Preach!
You can say that again!
What a snack!
I need some air!
""",
)
chat_session = model.start_chat(
history=[{"role": "user", "parts": [uploaded_file]}]
)
response = chat_session.send_message("Rizz this image!")
return response.text
except Exception as e:
return f"Error generating rizz: {e}"
# Function to convert text to speech with Play.ht
def text_to_speech(text):
try:
url = "https://api.play.ai/api/v1/tts/stream"
payload = {
"model": "PlayDialog",
"voice": "s3://voice-cloning-zero-shot/adb83b67-8d75-48ff-ad4d-a0840d231ef1/original/manifest.json",
"voice2": "s3://voice-cloning-zero-shot/831bd330-85c6-4333-b2b4-10c476ea3491/original/manifest.json",
"turnPrefix": "Host 1:",
"turnPrefix2": "Host 2:",
'prompt': None,
'prompt2': None,
"output_format": "mp3",
"text": text,
}
headers = {
"content-type": "application/json",
"Authorization": API_KEY,
"X-User-ID": USER_ID
}
response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200:
audio_path = "output_audio.mp3"
with open(audio_path, "wb") as audio_file:
audio_file.write(response.content)
return audio_path
else:
return f"Error generating audio: {response.status_code} - {response.text}"
except Exception as e:
return f"Error generating audio: {e}"
# Function to create video from image, audio, and add logo overlay
def create_video(image, audio):
try:
# Load the audio file
audio_clip = AudioFileClip(audio)
# Load the main image and set its duration to match the audio
image_clip = ImageClip(image).set_duration(audio_clip.duration)
# Load the logo image, resize it, and position it in the top-right corner
#logo = ImageClip("Logo.png").resize(height=75) # Adjust the height as needed
logo = ImageClip("PlayAI-Logo-RIZZ-URL.png").resize(height=75) # Adjust the height as needed
logo = logo.margin(bottom=10, opacity=0).set_position(("center", "bottom")).set_duration(audio_clip.duration)
# Create a composite video with the main image and the logo overlay
video_clip = CompositeVideoClip([image_clip, logo]).set_audio(audio_clip)
# Save the video to a temporary file
output_path = "/tmp/output_video_with_logo.mp4"
video_clip.write_videofile(
output_path,
fps=30,
codec="libx264",
audio_codec="aac",
preset="slow",
ffmpeg_params=["-b:v", "2000k"] # Adjust bitrate if needed
)
return output_path
except Exception as e:
return f"Error generating video: {e}"
# Function to process all steps at once
def process_roast(image_path):
roast_text = generate_roast(image_path)
audio_path = text_to_speech(roast_text)
video_path = create_video(image_path, audio_path)
return roast_text, audio_path, video_path
# Gradio Interface
with gr.Blocks(theme=theme) as demo:
gr.Markdown("# Get Rizzed, Ready?")
gr.Markdown("Upload an image, click 'Rizz Image', and the AI will roast it")
with gr.Row():
image_input = gr.Image(type="filepath", label="Upload Image")
with gr.Column():
output_text = gr.Textbox(label="Roast Text")
audio_output = gr.Audio(label="Roast Audio")
video_output = gr.Video(label="Roast Video")
# Single button to handle all actions
roast_button = gr.Button("Rizz Image")
roast_button.click(process_roast, inputs=image_input, outputs=[output_text, audio_output, video_output])
gr.Examples(
examples=[["TSwift.jpg"], ["GRamsay.jpg"],["cinemacon-2024---walt-disney-studios-presentation.jpg"]],
inputs=image_input
)
# Launch the app
demo.launch(debug=True)
|