awacke1 commited on
Commit
8079286
·
1 Parent(s): 96893ed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import torchaudio
4
+ import requests
5
+ from io import BytesIO
6
+
7
+ # Load the Hugging Face model for speech recognition
8
+ model_name = "facebook/wav2vec2-large-xlsr-53"
9
+ model = torch.hub.load('pytorch/fairseq', model_name)
10
+
11
+ # Create a function to transcribe audio from a URL using the model
12
+ def transcribe_audio(url):
13
+ # Download the audio file from the URL
14
+ response = requests.get(url)
15
+ audio_bytes = BytesIO(response.content)
16
+
17
+ # Load the audio file with Torchaudio and apply preprocessing
18
+ waveform, sample_rate = torchaudio.load(audio_bytes)
19
+ with torch.no_grad():
20
+ features = model.feature_extractor(waveform)
21
+ logits = model.feature_aggregator(features)
22
+ transcription = model.decoder.decode(logits)
23
+
24
+ return transcription[0]['text']
25
+
26
+ # Define the Streamlit app
27
+ st.title("Speech Recognition with Hugging Face")
28
+
29
+ # Add a file uploader to allow the user to upload an audio file
30
+ audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"])
31
+
32
+ if audio_file is not None:
33
+ # Load the audio file with Torchaudio and apply preprocessing
34
+ waveform, sample_rate = torchaudio.load(audio_file)
35
+ with torch.no_grad():
36
+ features = model.feature_extractor(waveform)
37
+ logits = model.feature_aggregator(features)
38
+ transcription = model.decoder.decode(logits)
39
+
40
+ # Display the transcription
41
+ st.write("Transcription:")
42
+ st.write(transcription[0]['text'])
43
+
44
+ # Add a text input to allow the user to enter a URL of an audio file
45
+ url = st.text_input("Enter the URL of an audio file")
46
+ if url:
47
+ # Transcribe the audio from the URL using the model
48
+ transcription = transcribe_audio(url)
49
+
50
+ # Display the transcription
51
+ st.write("Transcription:")
52
+ st.write(transcription)