%load_ext autoreload
%autoreload 2
import librosa
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import pandas as pd
from scipy import signal
from spectrogramtools import *
from collections import OrderedDict
import warnings
warnings.filterwarnings("ignore")
def make_html_audio(ys, sr, width=100):
clips = []
for y in ys:
audio = ipd.Audio(y, rate=sr)
audio_html = audio._repr_html_().replace('\n', '').strip()
audio_html = audio_html.replace('<audio ', '<audio style="width: {}px; "'.format(width))
clips.append(audio_html)
return clips
y, sr = librosa.load("doves.wav", sr=22050)
win_length = 2048*4
hop_length = 512
S = STFT(y, win_length, hop_length)
V = np.abs(S)
We seek a $W$ and an $H$ so that we minimize
Update Rules:
M = S.shape[0]
N = S.shape[1]
K = 3
W = np.random.rand(M, K)
H = np.random.rand(K, N)
n_iters = 100
## TODO: Apply multiplicative update rules n_iters times in a loop
#KL Divergence Version
for i in range(n_iters):
VL = V/(W.dot(H))
H *= (W.T).dot(VL)/np.sum(W, 0)[:, None]
W *= (VL.dot(H.T))/np.sum(H, 1)[None, :]
Below we look at two things:
ws = []
ys = []
WH = W.dot(H)
K = H.shape[0]
for i in range(K):
Hi = np.zeros_like(H)
Hi[i, :] = H[i, :]
WHi = np.dot(W, Hi)
Wi = W[:, i]
Wi = np.reshape(Wi, (Wi.size, 1))
Wi = Wi*np.ones((1, int(win_length/hop_length)))
wi = griffinLimInverse(Wi, win_length, hop_length)
ws.append(wi)
Si = S*WHi/WH
yi = iSTFT(Si, win_length, hop_length)
ys.append(yi)
ys = make_html_audio(ys, sr, width=200)
ws = make_html_audio(ws, sr, width=100)
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame(OrderedDict([("Components", ws), ("Filtered", ys)]))
ipd.HTML(df.to_html(escape=False, float_format='%.2f'))
We also listen a Griffin-Lim inverse of $WH$ to see how well it approximated the original spectrogram. We shouldn't expect this to be perfect, since we're only using 3 components
y = griffinLimInverse(WH, win_length, hop_length)
ipd.Audio(y, rate=sr)