Spaces:
Runtime error
Runtime error
import os | |
import numpy as np | |
from matplotlib import pyplot as plt | |
import librosa | |
import libfmp.b | |
import libfmp.c3 | |
import libfmp.c4 | |
import sys | |
def compute_chromagram_from_filename(fn_wav, Fs=22050, N=4096, H=2048, gamma=None, version='STFT', norm='2'): | |
"""Compute chromagram for WAV file specified by filename | |
Notebook: C5/C5S2_ChordRec_Templates.ipynb | |
Args: | |
fn_wav (str): Filenname of WAV | |
Fs (scalar): Sampling rate (Default value = 22050) | |
N (int): Window size (Default value = 4096) | |
H (int): Hop size (Default value = 2048) | |
gamma (float): Constant for logarithmic compression (Default value = None) | |
version (str): Technique used for front-end decomposition ('STFT', 'IIS', 'CQT') (Default value = 'STFT') | |
norm (str): If not 'None', chroma vectors are normalized by norm as specified ('1', '2', 'max') | |
(Default value = '2') | |
Returns: | |
X (np.ndarray): Chromagram | |
Fs_X (scalar): Feature reate of chromagram | |
x (np.ndarray): Audio signal | |
Fs (scalar): Sampling rate of audio signal | |
x_dur (float): Duration (seconds) of audio signal | |
""" | |
x, Fs = librosa.load(fn_wav, sr=Fs) | |
x_dur = x.shape[0] / Fs | |
if version == 'STFT': | |
# Compute chroma features with STFT | |
X = librosa.stft(x, n_fft=N, hop_length=H, pad_mode='constant', center=True) | |
if gamma is not None: | |
X = np.log(1 + gamma * np.abs(X) ** 2) | |
else: | |
X = np.abs(X) ** 2 | |
X = librosa.feature.chroma_stft(S=X, sr=Fs, tuning=0, norm=None, hop_length=H, n_fft=N) | |
if version == 'CQT': | |
# Compute chroma features with CQT decomposition | |
X = librosa.feature.chroma_cqt(y=x, sr=Fs, hop_length=H, norm=None) | |
if version == 'IIR': | |
# Compute chroma features with filter bank (using IIR elliptic filter) | |
X = librosa.iirt(y=x, sr=Fs, win_length=N, hop_length=H, center=True, tuning=0.0) | |
if gamma is not None: | |
X = np.log(1.0 + gamma * X) | |
X = librosa.feature.chroma_cqt(C=X, bins_per_octave=12, n_octaves=7, | |
fmin=librosa.midi_to_hz(24), norm=None) | |
if norm is not None: | |
X = libfmp.c3.normalize_feature_sequence(X, norm=norm) | |
Fs_X = Fs / H | |
return X, Fs_X, x, Fs, x_dur | |
def compute_chromagram(y, sr, Fs=22050, N=4096, H=2048, gamma=None, version='STFT', norm='2'): | |
"""Compute chromagram for WAV file specified by filename | |
Notebook: C5/C5S2_ChordRec_Templates.ipynb | |
Args: | |
y (np.ndarray): Audio signal | |
sr (scalar): Sampling rate | |
Fs (scalar): Sampling rate (Default value = 22050) | |
N (int): Window size (Default value = 4096) | |
H (int): Hop size (Default value = 2048) | |
gamma (float): Constant for logarithmic compression (Default value = None) | |
version (str): Technique used for front-end decomposition ('STFT', 'IIS', 'CQT') (Default value = 'STFT') | |
norm (str): If not 'None', chroma vectors are normalized by norm as specified ('1', '2', 'max') | |
(Default value = '2') | |
Returns: | |
X (np.ndarray): Chromagram | |
Fs_X (scalar): Feature reate of chromagram | |
x (np.ndarray): Audio signal | |
Fs (scalar): Sampling rate of audio signal | |
x_dur (float): Duration (seconds) of audio signal | |
""" | |
x = librosa.resample(y, sr, Fs) | |
x_dur = x.shape[0] / Fs | |
if version == 'STFT': | |
# Compute chroma features with STFT | |
X = librosa.stft(x, n_fft=N, hop_length=H, pad_mode='constant', center=True) | |
if gamma is not None: | |
X = np.log(1 + gamma * np.abs(X) ** 2) | |
else: | |
X = np.abs(X) ** 2 | |
X = librosa.feature.chroma_stft(S=X, sr=Fs, tuning=0, norm=None, hop_length=H, n_fft=N) | |
if version == 'CQT': | |
# Compute chroma features with CQT decomposition | |
X = librosa.feature.chroma_cqt(y=x, sr=Fs, hop_length=H, norm=None) | |
if version == 'IIR': | |
# Compute chroma features with filter bank (using IIR elliptic filter) | |
X = librosa.iirt(y=x, sr=Fs, win_length=N, hop_length=H, center=True, tuning=0.0) | |
if gamma is not None: | |
X = np.log(1.0 + gamma * X) | |
X = librosa.feature.chroma_cqt(C=X, bins_per_octave=12, n_octaves=7, | |
fmin=librosa.midi_to_hz(24), norm=None) | |
if norm is not None: | |
X = libfmp.c3.normalize_feature_sequence(X, norm=norm) | |
Fs_X = Fs / H | |
return X, Fs_X, x, Fs, x_dur | |
def get_chord_labels(ext_minor='m', nonchord=False): | |
"""Generate chord labels for major and minor triads (and possibly nonchord label) | |
Notebook: C5/C5S2_ChordRec_Templates.ipynb | |
Args: | |
ext_minor (str): Extension for minor chords (Default value = 'm') | |
nonchord (bool): If "True" then add nonchord label (Default value = False) | |
Returns: | |
chord_labels (list): List of chord labels | |
""" | |
chroma_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] | |
chord_labels_maj = chroma_labels | |
chord_labels_min = [s + ext_minor for s in chroma_labels] | |
chord_labels = chord_labels_maj + chord_labels_min | |
if nonchord is True: | |
chord_labels = chord_labels + ['N'] | |
return chord_labels | |
def generate_chord_templates(nonchord=False): | |
"""Generate chord templates of major and minor triads (and possibly nonchord) | |
Notebook: C5/C5S2_ChordRec_Templates.ipynb | |
Args: | |
nonchord (bool): If "True" then add nonchord template (Default value = False) | |
Returns: | |
chord_templates (np.ndarray): Matrix containing chord_templates as columns | |
""" | |
template_cmaj = np.array([1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]).T | |
template_cmin = np.array([1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]).T | |
num_chord = 24 | |
if nonchord: | |
num_chord = 25 | |
chord_templates = np.ones((12, num_chord)) | |
for shift in range(12): | |
chord_templates[:, shift] = np.roll(template_cmaj, shift) | |
chord_templates[:, shift+12] = np.roll(template_cmin, shift) | |
return chord_templates | |
def chord_recognition_template(X, norm_sim='1', nonchord=False): | |
"""Conducts template-based chord recognition | |
with major and minor triads (and possibly nonchord) | |
Notebook: C5/C5S2_ChordRec_Templates.ipynb | |
Args: | |
X (np.ndarray): Chromagram | |
norm_sim (str): Specifies norm used for normalizing chord similarity matrix (Default value = '1') | |
nonchord (bool): If "True" then add nonchord template (Default value = False) | |
Returns: | |
chord_sim (np.ndarray): Chord similarity matrix | |
chord_max (np.ndarray): Binarized chord similarity matrix only containing maximizing chord | |
""" | |
chord_templates = generate_chord_templates(nonchord=nonchord) | |
X_norm = libfmp.c3.normalize_feature_sequence(X, norm='2') | |
chord_templates_norm = libfmp.c3.normalize_feature_sequence(chord_templates, norm='2') | |
chord_sim = np.matmul(chord_templates_norm.T, X_norm) | |
if norm_sim is not None: | |
chord_sim = libfmp.c3.normalize_feature_sequence(chord_sim, norm=norm_sim) | |
# chord_max = (chord_sim == chord_sim.max(axis=0)).astype(int) | |
chord_max_index = np.argmax(chord_sim, axis=0) | |
chord_max = np.zeros(chord_sim.shape).astype(np.int32) | |
for n in range(chord_sim.shape[1]): | |
chord_max[chord_max_index[n], n] = 1 | |
return chord_sim, chord_max | |
def plot_chord_recognition(y, sr) : | |
import warnings | |
warnings.warn("This function is deprecated and will be removed in future versions.", DeprecationWarning) | |
X, Fs_X, x, Fs, x_dur = compute_chromagram(y, sr) | |
chord_sim, chord_max = chord_recognition_template(X, norm_sim='max') | |
chord_labels = get_chord_labels(nonchord=False) | |
cmap = libfmp.b.compressed_gray_cmap(alpha=1, reverse=False) | |
fig, ax = plt.subplots(2, 2, gridspec_kw={'width_ratios': [1, 0.03], | |
'height_ratios': [1.5, 3]}, figsize=(8, 10)) | |
libfmp.b.plot_chromagram(X, ax=[ax[0,0], ax[0,1]], Fs=Fs_X, clim=[0, 1], xlabel='', | |
title='STFT-based chromagram (feature rate = %0.1f Hz)' % (Fs_X)) | |
libfmp.b.plot_matrix(chord_max, ax=[ax[1, 0], ax[1, 1]], Fs=Fs_X, | |
title='Time–chord representation of chord recognition result', | |
ylabel='Chord', xlabel='') | |
ax[1, 0].set_yticks(np.arange( len(chord_labels) )) | |
ax[1, 0].set_yticklabels(chord_labels) | |
ax[1, 0].grid() | |
plt.tight_layout() | |
return fig, ax, chord_max | |
def plot_binary_template_chord_recognition(y, sr) : | |
import warnings | |
warnings.warn("This function is deprecated and will be removed in future versions.", DeprecationWarning) | |
X, Fs_X, x, Fs, x_dur = compute_chromagram(y, sr) | |
chord_sim, chord_max = chord_recognition_template(X, norm_sim='max') | |
chord_templates = generate_chord_templates() | |
X_chord = np.matmul(chord_templates, chord_max) | |
fig, ax = plt.subplots(2, 2, gridspec_kw={'width_ratios': [1, 0.03], | |
'height_ratios': [1, 1]}, figsize=(8, 5)) | |
libfmp.b.plot_chromagram(X, ax=[ax[0, 0], ax[0, 1]], Fs=Fs_X, clim=[0, 1], xlabel='', | |
title='STFT-based chromagram (feature rate = %0.1f Hz)' % (Fs_X)) | |
libfmp.b.plot_chromagram(X_chord, ax=[ax[1, 0], ax[1, 1]], Fs=Fs_X, clim=[0, 1], xlabel='', | |
title='Binary templates of the chord recognition result') | |
plt.tight_layout() | |
return fig, ax | |
def chord_table(chord_max): | |
chord_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] + ['Cm', 'C#m', 'Dm', 'D#m', 'Em', 'Fm', 'F#m', 'Gm', 'G#m', 'Am', 'A#m', 'Bm'] | |
# 計算chord_max依照第一個軸的最大值的index | |
chord_max_index = np.argmax(chord_max, axis=0) | |
# 用index找出對應的chord_labels | |
chord_results = [chord_labels[i] for i in chord_max_index] | |
return chord_results | |
def plot_chord(chroma, title="", figsize=(12, 6), cmap="coolwarm", include_minor=False): | |
import seaborn as sns | |
chroma_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] | |
if include_minor: | |
chroma_labels += ['Cm', 'C#m', 'Dm', 'D#m', 'Em', 'Fm', 'F#m', 'Gm', 'G#m', 'Am', 'A#m', 'Bm'] | |
fig, ax = plt.subplots(figsize=figsize) | |
sns.heatmap(chroma, ax=ax, cmap=cmap, linewidths=0.01, linecolor=(1, 1, 1, 0.1)) | |
ax.invert_yaxis() | |
ax.set_yticks( | |
np.arange(len(chroma_labels)) + 0.5, | |
chroma_labels, | |
rotation=0, | |
) | |
ax.set_ylabel("Chord") | |
ax.set_xlabel('Time (frame)') | |
ax.set_title(title) | |
return fig, ax | |
def plot_user_chord(df): | |
import seaborn as sns | |
chroma_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] + ['Cm', 'C#m', 'Dm', 'D#m', 'Em', 'Fm', 'F#m', 'Gm', 'G#m', 'Am', 'A#m', 'Bm'] | |
# 檢查df["Chord"]無chroma_labels以外的值 | |
assert df["Chord"].isin(chroma_labels).all(), "Chord must be in chroma_labels" | |
# 將df["Chord"]轉成chroma_labels的index | |
df["Chord_index"] = df["Chord"].apply(lambda x: chroma_labels.index(x)) | |
# 建立一個24 * len(df)的矩陣,並將值設為0 | |
chroma = np.zeros((24, len(df))) | |
# 依照df["Chord_index"]的值將chroma的值設為1 | |
chroma[df["Chord_index"], np.arange(len(df)),] = 1 | |
# 繪圖 | |
fig, ax = plt.subplots(figsize=(12, 6)) | |
sns.heatmap(chroma, ax=ax, cmap='crest', linewidths=0.01, linecolor=(1, 1, 1, 0.1)) | |
ax.invert_yaxis() | |
ax.set_yticks( | |
np.arange(len(chroma_labels)) + 0.5, | |
chroma_labels, | |
rotation=0, | |
) | |
ax.set_ylabel("Chord") | |
ax.set_xlabel('Time (frame)') | |
ax.set_title('User Chord Recognition Result') | |
return fig, ax |