% E4896 NMF Source Separation practical diary
% 2013-04-29 Dan Ellis [email protected]

% Load piano example
[d,sr] = wavread('gould-wtc1.wav');

% Can we separate first two notes from rest of first arpeggio?
specgram(d,512,sr)
axis([0 10 0 2000])
% gap between 2nd and 3rd fundamentals is about 370 Hz
% Try an elliptical filter:
[bl,al] = ellip(10, 3, 80, 370/(sr/2));
soundsc(filter(bl,al,d(1:10*sr)),sr);
% Just lowest fundamentals, but pretty muffled
% How does the remaining part sound?  Try complementary HPF
[bh,ah] = ellip(10, 3, 80, 370/(sr/2), 'high');
soundsc(filter(bh,ah,d(1:10*sr)),sr);
% Just sounds high-pass, can still hear all notes

% NMF separation of spectrogram
% form complex (invertible) STFT
fftlen = 512;
D = stft(d, fftlen);
% Use Virtanen's KL-based objective to factor STFT magnitude
% Fit 40 terms
r = 40;
[W,H] = nmf_kl_sparse_v(abs(D), r);
% Compare to original
tt = [0:size(D,2)]*(fftlen/2)/sr;
ff = [0:fftlen/2]/fftlen*sr;
subplot(311)
imagesc(tt, ff, 20*log10(abs(D))); axis xy
caxis([-40 20])
axis([0 10 0 2000])
subplot(312)
imagesc(tt, ff, 20*log10(abs(W*H))); axis xy
caxis([-40 20])
axis([0 10 0 2000])
% Full reconstruction looks pretty good.  Let's check components
% Since component ordering is arbitrary, attempt to sort by pitch
I = order_dict(W);
% Now plot dictionary terms
subplot(313)
imagesc(1:r, ff, 20*log10(W(:,I))); axis xy
caxis([-40 20])
axis([0 40 0 2000])
% Components mostly have clear correspondance to fundamentals
% Now look at activations.  Use power law compression of dynamics
imagesc(tt,1:r, H(I,:).^.3); axis xy
axis([0 10 0 40])
% You can sort-of see different notes, but quite noisy

% Try partial reconstruction with just lowest components
X = I(1:15);
dr = resynth_nmf(D,W,H,X);
soundsc(dr(1:10*sr), sr);
% Not bad at separating lowest notes, doesn't sound muffled
% Have a look at resulting spectrogram (mask)
subplot(312)
imagesc(tt, ff, 20*log10(abs(W(:,X)*H(X,:)))); axis xy
caxis([-40 20])
axis([0 10 0 2000])

% What about remainder?
X = I(16:r);
dr = resynth_nmf(D,W,H,X);
soundsc(dr(1:10*sr), sr);
imagesc(tt, ff, 20*log10(abs(W(:,X)*H(X,:)))); axis xy
caxis([-40 20])
axis([0 10 0 2000])
% Some removal of lower notes, but mostly still there

% Now try NMF with sparsity penalty enabled
[W1,H1] = nmf_kl_sparse_v(abs(D),r,'alpha', 1);
I1 = order_dict(W1);
subplot(313)
imagesc(1:r, ff, 20*log10(W1(:,I1))); axis xy
caxis([-40 20])
axis([0 40 0 2000])
% Dictionary pretty similar
imagesc(tt,1:r, H1(I1,:).^.3); axis xy
axis([0 10 0 40])
% Activations are much more sparse
X1 = I1(1:15);
dr1 = resynth_nmf(D,W1,H1,X1);
soundsc(dr1(1:10*sr),sr)
% Good separation of low notes
X1 = I1(16:r);
dr1 = resynth_nmf(D,W1,H1,X1);
soundsc(dr1(1:10*sr),sr)
% And now high notes have low notes mostly removed!

% Things to try:
% - reducing rank
% - increasing alpha
% - other instruments (voice?)
% - can you get better separation by manually selecting components?  Remove clicks?
% - look at individual components: what to they encode?  Minimum to get separation?