%load_ext autoreload
%autoreload 2
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import pandas as pd
from scipy import signal
from spectrogramtools import *
from collections import OrderedDict
import warnings
warnings.filterwarnings("ignore")
def make_html_audio(ys, sr, width=100):
clips = []
for y in ys:
audio = ipd.Audio(y, rate=sr)
audio_html = audio._repr_html_().replace('\n', '').strip()
audio_html = audio_html.replace('<audio ', '<audio style="width: {}px; "'.format(width))
clips.append(audio_html)
return clips
y, sr = librosa.load("doves.wav", sr=22050)
win_length = 2048*4
hop_length = 512
S = STFT(y, win_length, hop_length)
V = np.abs(S)
plt.title("V")
librosa.display.specshow(librosa.amplitude_to_db(V, ref=np.max), x_axis='time')
ipd.Audio(y, rate=sr)
First, randomly initialize $W$ and $H$ with nonnegative numbers
If $V$ is a $M x N$ matrix, and we choose a number $K$ which is an estimate of how many instruments we have, what we can say is that
A = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
print("A")
print(A)
print("$A^T$")
print(A.T)
M = S.shape[0]
N = S.shape[1]
K = 3
# Randomly initialize my W and my H matrices
W = np.random.rand(M, K)
H = np.random.rand(K, N)
n_iters = 100
## TODO: Apply multiplicative update rules n_iters times in a loop
for it in range(n_iters):
H = H * (W.T.dot(V))/(W.T.dot(W.dot(H)))
W = W * (V.dot(H.T))/(W.dot(H.dot(H.T)))
Below we look at two things:
ws = []
ys = []
WH = W.dot(H)
K = H.shape[0]
for i in range(K):
Hi = np.zeros_like(H)
Hi[i, :] = H[i, :]
WHi = np.dot(W, Hi)
Wi = W[:, i]
Wi = np.reshape(Wi, (Wi.size, 1))
Wi = Wi*np.ones((1, int(win_length/hop_length)))
wi = griffinLimInverse(Wi, win_length, hop_length)
ws.append(wi)
Si = S*WHi/WH
yi = iSTFT(Si, win_length, hop_length)
ys.append(yi)
ys = make_html_audio(ys, sr, width=200)
ws = make_html_audio(ws, sr, width=100)
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame(OrderedDict([("Components", ws), ("Filtered", ys)]))
ipd.HTML(df.to_html(escape=False, float_format='%.2f'))
We also listen a Griffin-Lim inverse of $WH$ to see how well it approximated the original spectrogram. We shouldn't expect this to be perfect, since we're only using 3 components
y = griffinLimInverse(WH, win_length, hop_length)
ipd.Audio(y, rate=sr)