File size: 4,388 Bytes
8fc7f5c
e414a67
b602cc6
8fc7f5c
a9bf4b2
b67a63e
a9bf4b2
8fc7f5c
56cc953
8fc7f5c
e6989d8
 
 
11f0950
 
 
a9bf4b2
 
 
 
b67a63e
8930d8d
b67a63e
 
 
 
 
a9bf4b2
 
e414a67
e6989d8
e414a67
 
 
 
 
 
a9bf4b2
c122f2d
cb19e55
c122f2d
 
 
d0fed63
e414a67
 
 
 
 
 
a9bf4b2
e414a67
 
 
999c934
acf3663
8ae6044
999c934
222d696
 
 
 
 
 
8ae6044
e414a67
 
 
 
b67a63e
 
 
 
e414a67
 
 
 
d8a8864
 
e414a67
 
11f0950
a9bf4b2
 
8930d8d
b67a63e
e414a67
5ac50f7
 
 
b67a63e
5ac50f7
 
b67a63e
 
5ac50f7
e414a67
 
 
 
 
 
 
d0fed63
e414a67
8930d8d
d0fed63
a9bf4b2
999c934
a190643
d0fed63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9bf4b2
5d9101b
 
 
 
 
11f0950
5d9101b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import base64
import json
import os

import modal
import requests
import streamlit as st
from loguru import logger
from pydub import AudioSegment

PASSWORDS = os.environ["PASSWORD"].split(",")
ROOT_PASSWORDS = os.environ["ROOT_PASSWORD"].split(",")

run_transcription = modal.Function.lookup(
    "ffpub-transcription", "run_transcription", environment_name="main"
)

st.set_page_config(page_title="Speech to Text Transcription App")


@st.cache_data(show_spinner=False)
def transcribe(url, audio_b64, cutoff):
    payload = {"url": url, "audio_b64": audio_b64, "cutoff": cutoff}
    response = requests.post(
        "https://aseifert--ffpub-transcription-fastapi-app.modal.run/transcribe", json=payload
    )
    return response.json()


def password_is_correct(password):
    return password in PASSWORDS or password in ROOT_PASSWORDS


def input_is_ready(password, audio_file, url):
    return password_is_correct(password) and (audio_file or url)


def run():
    st.markdown(
        "<style>section.main > div:first-child { padding-top: 0; padding-bottom: 0; }</style>",
        unsafe_allow_html=True,
    )

    running = False
    submit_button = False
    if "is_expanded" not in st.session_state:
        st.session_state["is_expanded"] = True

    # expander = st.expander("Einstellungen", expanded=st.session_state["is_expanded"])
    # with expander:
    password = st.text_input("Zugriffscode (siehe oben)")
    url = audio_file = None

    col1, col2 = st.columns([1, 3])
    type = col1.radio(
        "Input",
        ["Beispiel (Kabarett)", "URL (YouTube, …)", "Datei-Upload"],
    )
    if type == "Beispiel (Kabarett)":
        url = col2.text_input(
            "URL (e.g. YouTube video, Dropbox file, etc.)",
            value="https://www.youtube.com/watch?v=6UONiGMmbS4",
            disabled=True,
        )
    elif type == "URL (YouTube, …)":
        url = col2.text_input(
            "URL (e.g. YouTube video, Dropbox file, etc.)",
            value="",
        )
        if "youtu" in url:
            url = url.replace("youtu.be/", "youtube.com/watch?v=")
        elif "dropbox" in url:
            url = url.replace("dl=0", "raw=1")
    else:
        audio_file = col2.file_uploader(
            "Datei auswählen", type=[".wav", ".mp3", ".flac", ".m4a", ".ogg"]
        )

    submit_button = col2.button(
        label="⚡ Transkribieren"
        + (" (Zugriffscode inkorrekt)" if not password_is_correct(password) else ""),
        disabled=(not password_is_correct(password) or (not audio_file and not url) or running),
    )

    cutoff = audio_b64 = None
    cutoff = None if password in ROOT_PASSWORDS else 60
    if audio_file or url:
        # with st.expander(("Audio" if audio_file else "Video") + " abspielen"):
        if audio_file:
            st.audio(audio_file)
            audio_file = AudioSegment.from_file(audio_file)[: cutoff * 1000 if cutoff else None]
            audio_b64 = base64.b64encode(audio_file.export().read()).decode("ascii")
        if url:
            if url == "https://www.youtube.com/watch?v=6UONiGMmbS4":
                cutoff = None
            st.video(url)

    if input_is_ready(password, audio_file, url) and submit_button:
        # my_bar = st.progress(0)
        # for percent_complete in range(100):
        #     time.sleep(1)
        #     my_bar.progress(percent_complete + 1)

        running = True
        with st.spinner("Transkription läuft..."):
            transcription = transcribe(url, audio_b64, cutoff)
            running = False

        st.text_area("Transkript", transcription["text"], height=300)

        with st.expander("⬇️ Transkript herunterladen"):
            st.download_button(
                label="⬇️ Txt-Datei herunterladen",
                data=transcription["text"],
                file_name="transkript.txt",
                mime="text/plain",
            )

            st.download_button(
                label="⬇️ OTR-Datei herunterladen",
                data=json.dumps(transcription["otr"], indent=2, ensure_ascii=False),
                file_name="transkript.otr",
                mime="application/json",
            )


try:
    run()
except Exception as e:
    logger.error(e)
    st.error(
        "Leider ist ein unerwarter Fehler aufgetreten. Ich kann mir das Problem sofort ansehen, Sie erreichen mich unter [email protected]"
    )