import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display
x, sr = librosa.load("beatles.wav")
hop_length=512
win = 4096
S = librosa.stft(x, hop_length=hop_length, n_fft=win)
S = np.abs(S)
Sdb = librosa.amplitude_to_db(S,ref=np.max)
plt.figure(figsize=(10, 5))
plt.imshow(Sdb, aspect='auto', extent=(0, hop_length*S.shape[1]/sr, sr/2, 0))
plt.gca().invert_yaxis()
plt.xlabel("Time (Sec)")
plt.ylabel("Frequency (hz)")
ipd.Audio(x, rate=sr)
Version 1: Pure Python Version with Nested Loops
M = Sdb.shape[0] # How many rows I have (frequency indices)
N = Sdb.shape[1] # How many columns I have (time window indices)
novfn = np.zeros(N-1) # Pre-allocate space to hold some kind of difference between columns
times = np.arange(N-1)*hop_length/sr
for j in range(N-1):
acc = 0
for i in range(M):
acc += np.abs(Sdb[i, j+1] - Sdb[i, j])
novfn[j] = acc
plt.figure(figsize=(8, 4))
plt.plot(times, novfn)
plt.xlabel("Time (Seconds)")
Version 2: Using a column slice to eliminate the inner loop
M = Sdb.shape[0] # How many rows I have (frequency indices)
N = Sdb.shape[1] # How many columns I have (time window indices)
novfn = np.zeros(N-1) # Pre-allocate space to hold some kind of difference between columns
times = np.arange(N-1)*hop_length/sr
for j in range(N-1):
novfn[j] = np.sum(np.abs(Sdb[:, j+1] - Sdb[:, j]))
plt.figure(figsize=(8, 4))
plt.plot(times, novfn)
plt.xlabel("Time (Seconds)")
Version 3: Eliminate all loops with 2D slices and sums
M = Sdb.shape[0] # How many rows I have (frequency indices)
N = Sdb.shape[1] # How many columns I have (time window indices)
novfn = np.zeros(N-1) # Pre-allocate space to hold some kind of difference between columns
times = np.arange(N-1)*hop_length/sr
diff = np.abs(Sdb[:, 1::] - Sdb[:, 0:-1])
novfn = np.sum(diff, axis=0)
plt.figure(figsize=(10, 15))
plt.subplot(311)
plt.imshow(Sdb, aspect='auto', extent=(0, hop_length*S.shape[1]/sr, sr/2, 0))
plt.colorbar()
plt.gca().invert_yaxis()
plt.xlabel("Time (Sec)")
plt.ylabel("Frequency (hz)")
plt.subplot(312)
plt.imshow(diff, aspect='auto', extent=(0, hop_length*S.shape[1]/sr, sr/2, 0))
plt.colorbar()
plt.gca().invert_yaxis()
plt.xlabel("Time (Sec)")
plt.ylabel("Frequency (hz)")
plt.subplot(313)
plt.plot(times, novfn)
plt.xlabel("Time (Seconds)")