Spaces:

azoodle
/

genre_classify

Sleeping

App Files Files Community

azeus commited on Dec 8, 2024

Commit

1e2e376

1 Parent(s): c00ec95

adding fb model

Browse files

Files changed (2) hide show

app.py +116 -26
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,11 +1,78 @@
 import streamlit as st
 import numpy as np
 # Page setup
 st.title("🎵 Music Genre Classifier")
-st.write("Upload an audio file to analyze its genre")
-# Create two columns for better layout
 col1, col2 = st.columns(2)
 with col1:
@@ -17,38 +84,61 @@ with col1:
         st.audio(audio_file)
         st.success("File uploaded successfully!")
-        # Add a classify button
         if st.button("Classify Genre"):
-            with st.spinner("Analyzing..."):
-                # Simulate genre classification (we'll replace this with real model later)
-                genres = ["Rock", "Pop", "Hip Hop", "Classical", "Jazz"]
-                confidences = np.random.dirichlet(np.ones(5))  # Random probabilities that sum to 1
-                # Show results
-                st.write("### Genre Analysis Results:")
-                for genre, confidence in zip(genres, confidences):
-                    st.write(f"{genre}: {confidence:.2%}")
-                # Show top prediction
-                top_genre = genres[np.argmax(confidences)]
-                st.write(f"**Predicted Genre:** {top_genre}")
 with col2:
-    # Display some tips and information
     st.write("### Tips for best results:")
-    st.write("- Upload files in MP3 or WAV format")
-    st.write("- Ensure good audio quality")
-    st.write("- Try to upload songs without too much background noise")
     st.write("- Ideal length: 10-30 seconds")
-    # Add a sample counter
-    if 'analyzed_count' not in st.session_state:
-        st.session_state.analyzed_count = 0
-    if audio_file is not None:
-        st.session_state.analyzed_count += 1
-        st.write(f"Songs analyzed this session: {st.session_state.analyzed_count}")
 # Footer
 st.markdown("---")
-st.write("Made with ❤️ using Streamlit")

 import streamlit as st
 import numpy as np
+import torch
+from transformers import Wav2Vec2Processor, Wav2Vec2Model
+import torchaudio
+import io
+# Initialize model and processor
+@st.cache_resource
+def load_model():
+    processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base")
+    model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base")
+    return processor, model
+# Audio processing function
+def process_audio(audio_file, processor, model):
+    # Read audio file
+    audio_bytes = audio_file.read()
+    waveform, sample_rate = torchaudio.load(io.BytesIO(audio_bytes))
+    # Resample if needed
+    if sample_rate != 16000:
+        resampler = torchaudio.transforms.Resample(sample_rate, 16000)
+        waveform = resampler(waveform)
+    # Convert to mono if stereo
+    if waveform.shape[0] > 1:
+        waveform = torch.mean(waveform, dim=0, keepdim=True)
+    # Process through Wav2Vec2
+    inputs = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt", padding=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Get features from last hidden states
+    features = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
+    return features
+# Simple genre classifier (we'll use a basic classifier for demonstration)
+class SimpleGenreClassifier:
+    def __init__(self):
+        self.genres = ["Rock", "Pop", "Hip Hop", "Classical", "Jazz"]
+        # Simulated learned weights (in real application, these would be trained)
+        self.weights = np.random.randn(768, len(self.genres))
+    def predict(self, features):
+        # Simple linear classification
+        logits = np.dot(features, self.weights)
+        probabilities = self.softmax(logits)
+        return probabilities
+    @staticmethod
+    def softmax(x):
+        exp_x = np.exp(x - np.max(x))
+        return exp_x / exp_x.sum()
 # Page setup
 st.title("🎵 Music Genre Classifier")
+st.write("Upload an audio file to analyze its genre using Wav2Vec2")
+# Load models
+try:
+    with st.spinner("Loading models..."):
+        processor, wav2vec_model = load_model()
+        classifier = SimpleGenreClassifier()
+    st.success("Models loaded successfully!")
+except Exception as e:
+    st.error(f"Error loading models: {str(e)}")
+    st.stop()
+# Create two columns for layout
 col1, col2 = st.columns(2)
 with col1:
         st.audio(audio_file)
         st.success("File uploaded successfully!")
+        # Add classify button
         if st.button("Classify Genre"):
+            try:
+                with st.spinner("Analyzing audio..."):
+                    # Extract features using Wav2Vec2
+                    features = process_audio(audio_file, processor, wav2vec_model)
+                    # Get genre predictions
+                    probabilities = classifier.predict(features)
+                    # Show results
+                    st.write("### Genre Analysis Results:")
+                    for genre, prob in zip(classifier.genres, probabilities):
+                        # Create a progress bar for each genre
+                        st.write(f"{genre}:")
+                        st.progress(float(prob))
+                        st.write(f"{prob:.2%}")
+                    # Show top prediction
+                    top_genre = classifier.genres[np.argmax(probabilities)]
+                    st.write(f"**Predicted Genre:** {top_genre}")
+            except Exception as e:
+                st.error(f"Error during analysis: {str(e)}")
 with col2:
+    # Display information about the model
+    st.write("### About the Model:")
+    st.write("""
+    This classifier uses:
+    - Facebook's Wav2Vec2 for audio feature extraction
+    - Custom genre classification layer
+    - Pre-trained on speech recognition
+    """)
+    st.write("### Supported Genres:")
+    for genre in classifier.genres:
+        st.write(f"- {genre}")
+    # Add usage tips
     st.write("### Tips for best results:")
+    st.write("- Upload clear, high-quality audio")
     st.write("- Ideal length: 10-30 seconds")
+    st.write("- Avoid audio with multiple overlapping genres")
+    st.write("- Ensure minimal background noise")
+# Update requirements.txt
+if st.sidebar.checkbox("Show requirements.txt contents"):
+    st.sidebar.code("""
+    streamlit==1.31.0
+    torch==2.0.1
+    torchaudio==2.0.1
+    transformers==4.30.2
+    numpy==1.24.3
+    """)
 # Footer
 st.markdown("---")
+st.write("Made with ❤️ using Streamlit and Hugging Face Transformers")

requirements.txt CHANGED Viewed

@@ -1,2 +1,5 @@
 streamlit==1.31.0
 numpy==1.24.3

 streamlit==1.31.0
+torch==2.0.1
+torchaudio==2.0.1
+transformers==4.30.2
 numpy==1.24.3