Spaces:
Runtime error
Runtime error
import streamlit as st | |
import torch | |
import torchaudio | |
import requests | |
from io import BytesIO | |
# Load the Hugging Face model for speech recognition | |
model_name = "facebook/wav2vec2-large-xlsr-53" | |
model = torch.hub.load('pytorch/fairseq', model_name) | |
# Create a function to transcribe audio from a URL using the model | |
def transcribe_audio(url): | |
# Download the audio file from the URL | |
response = requests.get(url) | |
audio_bytes = BytesIO(response.content) | |
# Load the audio file with Torchaudio and apply preprocessing | |
waveform, sample_rate = torchaudio.load(audio_bytes) | |
with torch.no_grad(): | |
features = model.feature_extractor(waveform) | |
logits = model.feature_aggregator(features) | |
transcription = model.decoder.decode(logits) | |
return transcription[0]['text'] | |
# Define the Streamlit app | |
st.title("Speech Recognition with Hugging Face") | |
# Add a file uploader to allow the user to upload an audio file | |
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"]) | |
if audio_file is not None: | |
# Load the audio file with Torchaudio and apply preprocessing | |
waveform, sample_rate = torchaudio.load(audio_file) | |
with torch.no_grad(): | |
features = model.feature_extractor(waveform) | |
logits = model.feature_aggregator(features) | |
transcription = model.decoder.decode(logits) | |
# Display the transcription | |
st.write("Transcription:") | |
st.write(transcription[0]['text']) | |
# Add a text input to allow the user to enter a URL of an audio file | |
url = st.text_input("Enter the URL of an audio file") | |
if url: | |
# Transcribe the audio from the URL using the model | |
transcription = transcribe_audio(url) | |
# Display the transcription | |
st.write("Transcription:") | |
st.write(transcription) | |