import base64 import json import os import modal import requests import streamlit as st from loguru import logger from pydub import AudioSegment PASSWORDS = os.environ["PASSWORD"].split(",") ROOT_PASSWORDS = os.environ["ROOT_PASSWORD"].split(",") run_transcription = modal.Function.lookup( "ffpub-transcription", "run_transcription", environment_name="main" ) st.set_page_config(page_title="Speech to Text Transcription App") @st.cache_data(show_spinner=False) def transcribe(url, audio_b64, cutoff): payload = {"url": url, "audio_b64": audio_b64, "cutoff": cutoff} response = requests.post( "https://aseifert--ffpub-transcription-fastapi-app.modal.run/transcribe", json=payload ) return response.json() def password_is_correct(password): return password in PASSWORDS or password in ROOT_PASSWORDS def input_is_ready(password, audio_file, url): return password_is_correct(password) and (audio_file or url) def run(): st.markdown( "", unsafe_allow_html=True, ) running = False submit_button = False if "is_expanded" not in st.session_state: st.session_state["is_expanded"] = True # expander = st.expander("Einstellungen", expanded=st.session_state["is_expanded"]) # with expander: password = st.text_input("Zugriffscode (siehe oben)") url = audio_file = None col1, col2 = st.columns([1, 3]) type = col1.radio( "Input", ["Beispiel (Kabarett)", "URL (YouTube, …)", "Datei-Upload"], ) if type == "Beispiel (Kabarett)": url = col2.text_input( "URL (e.g. YouTube video, Dropbox file, etc.)", value="https://www.youtube.com/watch?v=6UONiGMmbS4", disabled=True, ) elif type == "URL (YouTube, …)": url = col2.text_input( "URL (e.g. YouTube video, Dropbox file, etc.)", value="", ) if "youtu" in url: url = url.replace("youtu.be/", "youtube.com/watch?v=") elif "dropbox" in url: url = url.replace("dl=0", "raw=1") else: audio_file = col2.file_uploader( "Datei auswählen", type=[".wav", ".mp3", ".flac", ".m4a", ".ogg"] ) submit_button = col2.button( label="⚡ Transkribieren" + (" (Zugriffscode inkorrekt)" if not password_is_correct(password) else ""), disabled=(not password_is_correct(password) or (not audio_file and not url) or running), ) cutoff = audio_b64 = None cutoff = None if password in ROOT_PASSWORDS else 60 if audio_file or url: # with st.expander(("Audio" if audio_file else "Video") + " abspielen"): if audio_file: st.audio(audio_file) audio_file = AudioSegment.from_file(audio_file)[: cutoff * 1000 if cutoff else None] audio_b64 = base64.b64encode(audio_file.export().read()).decode("ascii") if url: if url == "https://www.youtube.com/watch?v=6UONiGMmbS4": cutoff = None st.video(url) if input_is_ready(password, audio_file, url) and submit_button: # my_bar = st.progress(0) # for percent_complete in range(100): # time.sleep(1) # my_bar.progress(percent_complete + 1) running = True with st.spinner("Transkription läuft..."): transcription = transcribe(url, audio_b64, cutoff) running = False st.text_area("Transkript", transcription["text"], height=300) with st.expander("⬇️ Transkript herunterladen"): st.download_button( label="⬇️ Txt-Datei herunterladen", data=transcription["text"], file_name="transkript.txt", mime="text/plain", ) st.download_button( label="⬇️ OTR-Datei herunterladen", data=json.dumps(transcription["otr"], indent=2, ensure_ascii=False), file_name="transkript.otr", mime="application/json", ) try: run() except Exception as e: logger.error(e) st.error( "Leider ist ein unerwarter Fehler aufgetreten. Ich kann mir das Problem sofort ansehen, Sie erreichen mich unter alexander@ff.pub" )