Spaces:

o-w-o
/

music-analysis

Runtime error

music-analysis / src /chord_recognition.py

Keycatowo

init trans commit

bb5feba almost 2 years ago

11.8 kB

	import os
	import numpy as np
	from matplotlib import pyplot as plt

	import librosa
	import libfmp.b
	import libfmp.c3
	import libfmp.c4

	import sys

	def compute_chromagram_from_filename(fn_wav, Fs=22050, N=4096, H=2048, gamma=None, version='STFT', norm='2'):
	"""Compute chromagram for WAV file specified by filename

	Notebook: C5/C5S2_ChordRec_Templates.ipynb

	Args:
	fn_wav (str): Filenname of WAV
	Fs (scalar): Sampling rate (Default value = 22050)
	N (int): Window size (Default value = 4096)
	H (int): Hop size (Default value = 2048)
	gamma (float): Constant for logarithmic compression (Default value = None)
	version (str): Technique used for front-end decomposition ('STFT', 'IIS', 'CQT') (Default value = 'STFT')
	norm (str): If not 'None', chroma vectors are normalized by norm as specified ('1', '2', 'max')
	(Default value = '2')

	Returns:
	X (np.ndarray): Chromagram
	Fs_X (scalar): Feature reate of chromagram
	x (np.ndarray): Audio signal
	Fs (scalar): Sampling rate of audio signal
	x_dur (float): Duration (seconds) of audio signal
	"""
	x, Fs = librosa.load(fn_wav, sr=Fs)
	x_dur = x.shape[0] / Fs
	if version == 'STFT':
	# Compute chroma features with STFT
	X = librosa.stft(x, n_fft=N, hop_length=H, pad_mode='constant', center=True)
	if gamma is not None:
	X = np.log(1 + gamma * np.abs(X) ** 2)
	else:
	X = np.abs(X) ** 2
	X = librosa.feature.chroma_stft(S=X, sr=Fs, tuning=0, norm=None, hop_length=H, n_fft=N)
	if version == 'CQT':
	# Compute chroma features with CQT decomposition
	X = librosa.feature.chroma_cqt(y=x, sr=Fs, hop_length=H, norm=None)
	if version == 'IIR':
	# Compute chroma features with filter bank (using IIR elliptic filter)
	X = librosa.iirt(y=x, sr=Fs, win_length=N, hop_length=H, center=True, tuning=0.0)
	if gamma is not None:
	X = np.log(1.0 + gamma * X)
	X = librosa.feature.chroma_cqt(C=X, bins_per_octave=12, n_octaves=7,
	fmin=librosa.midi_to_hz(24), norm=None)
	if norm is not None:
	X = libfmp.c3.normalize_feature_sequence(X, norm=norm)
	Fs_X = Fs / H
	return X, Fs_X, x, Fs, x_dur

	def compute_chromagram(y, sr, Fs=22050, N=4096, H=2048, gamma=None, version='STFT', norm='2'):
	"""Compute chromagram for WAV file specified by filename

	Notebook: C5/C5S2_ChordRec_Templates.ipynb

	Args:
	y (np.ndarray): Audio signal
	sr (scalar): Sampling rate
	Fs (scalar): Sampling rate (Default value = 22050)
	N (int): Window size (Default value = 4096)
	H (int): Hop size (Default value = 2048)
	gamma (float): Constant for logarithmic compression (Default value = None)
	version (str): Technique used for front-end decomposition ('STFT', 'IIS', 'CQT') (Default value = 'STFT')
	norm (str): If not 'None', chroma vectors are normalized by norm as specified ('1', '2', 'max')
	(Default value = '2')

	Returns:
	X (np.ndarray): Chromagram
	Fs_X (scalar): Feature reate of chromagram
	x (np.ndarray): Audio signal
	Fs (scalar): Sampling rate of audio signal
	x_dur (float): Duration (seconds) of audio signal
	"""
	x = librosa.resample(y, sr, Fs)
	x_dur = x.shape[0] / Fs
	if version == 'STFT':
	# Compute chroma features with STFT
	X = librosa.stft(x, n_fft=N, hop_length=H, pad_mode='constant', center=True)
	if gamma is not None:
	X = np.log(1 + gamma * np.abs(X) ** 2)
	else:
	X = np.abs(X) ** 2
	X = librosa.feature.chroma_stft(S=X, sr=Fs, tuning=0, norm=None, hop_length=H, n_fft=N)
	if version == 'CQT':
	# Compute chroma features with CQT decomposition
	X = librosa.feature.chroma_cqt(y=x, sr=Fs, hop_length=H, norm=None)
	if version == 'IIR':
	# Compute chroma features with filter bank (using IIR elliptic filter)
	X = librosa.iirt(y=x, sr=Fs, win_length=N, hop_length=H, center=True, tuning=0.0)
	if gamma is not None:
	X = np.log(1.0 + gamma * X)
	X = librosa.feature.chroma_cqt(C=X, bins_per_octave=12, n_octaves=7,
	fmin=librosa.midi_to_hz(24), norm=None)
	if norm is not None:
	X = libfmp.c3.normalize_feature_sequence(X, norm=norm)
	Fs_X = Fs / H
	return X, Fs_X, x, Fs, x_dur

	def get_chord_labels(ext_minor='m', nonchord=False):
	"""Generate chord labels for major and minor triads (and possibly nonchord label)

	Notebook: C5/C5S2_ChordRec_Templates.ipynb

	Args:
	ext_minor (str): Extension for minor chords (Default value = 'm')
	nonchord (bool): If "True" then add nonchord label (Default value = False)

	Returns:
	chord_labels (list): List of chord labels
	"""
	chroma_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
	chord_labels_maj = chroma_labels
	chord_labels_min = [s + ext_minor for s in chroma_labels]
	chord_labels = chord_labels_maj + chord_labels_min
	if nonchord is True:
	chord_labels = chord_labels + ['N']
	return chord_labels

	def generate_chord_templates(nonchord=False):
	"""Generate chord templates of major and minor triads (and possibly nonchord)

	Notebook: C5/C5S2_ChordRec_Templates.ipynb

	Args:
	nonchord (bool): If "True" then add nonchord template (Default value = False)

	Returns:
	chord_templates (np.ndarray): Matrix containing chord_templates as columns
	"""
	template_cmaj = np.array([1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]).T
	template_cmin = np.array([1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]).T
	num_chord = 24
	if nonchord:
	num_chord = 25
	chord_templates = np.ones((12, num_chord))
	for shift in range(12):
	chord_templates[:, shift] = np.roll(template_cmaj, shift)
	chord_templates[:, shift+12] = np.roll(template_cmin, shift)
	return chord_templates

	def chord_recognition_template(X, norm_sim='1', nonchord=False):
	"""Conducts template-based chord recognition
	with major and minor triads (and possibly nonchord)

	Notebook: C5/C5S2_ChordRec_Templates.ipynb

	Args:
	X (np.ndarray): Chromagram
	norm_sim (str): Specifies norm used for normalizing chord similarity matrix (Default value = '1')
	nonchord (bool): If "True" then add nonchord template (Default value = False)

	Returns:
	chord_sim (np.ndarray): Chord similarity matrix
	chord_max (np.ndarray): Binarized chord similarity matrix only containing maximizing chord
	"""
	chord_templates = generate_chord_templates(nonchord=nonchord)
	X_norm = libfmp.c3.normalize_feature_sequence(X, norm='2')
	chord_templates_norm = libfmp.c3.normalize_feature_sequence(chord_templates, norm='2')
	chord_sim = np.matmul(chord_templates_norm.T, X_norm)
	if norm_sim is not None:
	chord_sim = libfmp.c3.normalize_feature_sequence(chord_sim, norm=norm_sim)
	# chord_max = (chord_sim == chord_sim.max(axis=0)).astype(int)
	chord_max_index = np.argmax(chord_sim, axis=0)
	chord_max = np.zeros(chord_sim.shape).astype(np.int32)
	for n in range(chord_sim.shape[1]):
	chord_max[chord_max_index[n], n] = 1

	return chord_sim, chord_max

	def plot_chord_recognition(y, sr) :
	import warnings
	warnings.warn("This function is deprecated and will be removed in future versions.", DeprecationWarning)

	X, Fs_X, x, Fs, x_dur = compute_chromagram(y, sr)

	chord_sim, chord_max = chord_recognition_template(X, norm_sim='max')
	chord_labels = get_chord_labels(nonchord=False)

	cmap = libfmp.b.compressed_gray_cmap(alpha=1, reverse=False)
	fig, ax = plt.subplots(2, 2, gridspec_kw={'width_ratios': [1, 0.03],
	'height_ratios': [1.5, 3]}, figsize=(8, 10))

	libfmp.b.plot_chromagram(X, ax=[ax[0,0], ax[0,1]], Fs=Fs_X, clim=[0, 1], xlabel='',
	title='STFT-based chromagram (feature rate = %0.1f Hz)' % (Fs_X))
	libfmp.b.plot_matrix(chord_max, ax=[ax[1, 0], ax[1, 1]], Fs=Fs_X,
	title='Time–chord representation of chord recognition result',
	ylabel='Chord', xlabel='')
	ax[1, 0].set_yticks(np.arange( len(chord_labels) ))
	ax[1, 0].set_yticklabels(chord_labels)
	ax[1, 0].grid()
	plt.tight_layout()
	return fig, ax, chord_max

	def plot_binary_template_chord_recognition(y, sr) :
	import warnings
	warnings.warn("This function is deprecated and will be removed in future versions.", DeprecationWarning)

	X, Fs_X, x, Fs, x_dur = compute_chromagram(y, sr)
	chord_sim, chord_max = chord_recognition_template(X, norm_sim='max')

	chord_templates = generate_chord_templates()
	X_chord = np.matmul(chord_templates, chord_max)

	fig, ax = plt.subplots(2, 2, gridspec_kw={'width_ratios': [1, 0.03],
	'height_ratios': [1, 1]}, figsize=(8, 5))

	libfmp.b.plot_chromagram(X, ax=[ax[0, 0], ax[0, 1]], Fs=Fs_X, clim=[0, 1], xlabel='',
	title='STFT-based chromagram (feature rate = %0.1f Hz)' % (Fs_X))
	libfmp.b.plot_chromagram(X_chord, ax=[ax[1, 0], ax[1, 1]], Fs=Fs_X, clim=[0, 1], xlabel='',
	title='Binary templates of the chord recognition result')
	plt.tight_layout()
	return fig, ax


	def chord_table(chord_max):

	chord_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] + ['Cm', 'C#m', 'Dm', 'D#m', 'Em', 'Fm', 'F#m', 'Gm', 'G#m', 'Am', 'A#m', 'Bm']

	# 計算chord_max依照第一個軸的最大值的index
	chord_max_index = np.argmax(chord_max, axis=0)
	# 用index找出對應的chord_labels
	chord_results = [chord_labels[i] for i in chord_max_index]

	return chord_results


	def plot_chord(chroma, title="", figsize=(12, 6), cmap="coolwarm", include_minor=False):
	import seaborn as sns
	chroma_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
	if include_minor:
	chroma_labels += ['Cm', 'C#m', 'Dm', 'D#m', 'Em', 'Fm', 'F#m', 'Gm', 'G#m', 'Am', 'A#m', 'Bm']

	fig, ax = plt.subplots(figsize=figsize)

	sns.heatmap(chroma, ax=ax, cmap=cmap, linewidths=0.01, linecolor=(1, 1, 1, 0.1))
	ax.invert_yaxis()
	ax.set_yticks(
	np.arange(len(chroma_labels)) + 0.5,
	chroma_labels,
	rotation=0,
	)
	ax.set_ylabel("Chord")
	ax.set_xlabel('Time (frame)')
	ax.set_title(title)

	return fig, ax

	def plot_user_chord(df):

	import seaborn as sns
	chroma_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] + ['Cm', 'C#m', 'Dm', 'D#m', 'Em', 'Fm', 'F#m', 'Gm', 'G#m', 'Am', 'A#m', 'Bm']

	# 檢查df["Chord"]無chroma_labels以外的值
	assert df["Chord"].isin(chroma_labels).all(), "Chord must be in chroma_labels"

	# 將df["Chord"]轉成chroma_labels的index
	df["Chord_index"] = df["Chord"].apply(lambda x: chroma_labels.index(x))

	# 建立一個24 * len(df)的矩陣，並將值設為0
	chroma = np.zeros((24, len(df)))
	# 依照df["Chord_index"]的值將chroma的值設為1
	chroma[df["Chord_index"], np.arange(len(df)),] = 1

	# 繪圖
	fig, ax = plt.subplots(figsize=(12, 6))
	sns.heatmap(chroma, ax=ax, cmap='crest', linewidths=0.01, linecolor=(1, 1, 1, 0.1))
	ax.invert_yaxis()
	ax.set_yticks(
	np.arange(len(chroma_labels)) + 0.5,
	chroma_labels,
	rotation=0,
	)
	ax.set_ylabel("Chord")
	ax.set_xlabel('Time (frame)')
	ax.set_title('User Chord Recognition Result')

	return fig, ax