% CASSE book Chapter 8, Approaches to Complex Sound Scene Analysis
% Figure 8.6 - NMF decomposition of an artificial sound scene
% Prerequisites:
% NMFLib toolbox: http://www.ee.columbia.edu/~grindlay/code.html
% Emmanouil Benetos 2016
% Read alarm and doorslam audio recordings, construct artificial sequence
[x1,fs] = audioread('alarm44.wav');
[x2,fs2] = audioread('doorslam006.wav');
x3 = zeros(200000,1);
x3(10000:10000+154800-1) = mean(x1(1:154800,:)');
x3(170000:170000+length(x2)-1) = 4*x2;
% Compute ERB spectrogram, initialise dictionary templates and run NMF
[X] = computeERB(x3,fs);
W1 = X(:,29);
W2 = mean(X(:,168:176)');
W = [W1 W2'];
W0=W;
[w1,h,errs,vout] = nmf_beta(X,2,'W0',W,'W',W,'niter', 10, 'verb', 1,'beta',0.6);
% Plot spectrogram and NMF outputs
figure;
subplot(2,2,[1 2]);
imagesc(X); axis xy
title('(a)');
ylabel('frequency bin');
xlabel('time (sec)');
xticklabels({'0.4','0.8','1.2','1.6','2.0','2.4','2.8','3.2','3,6'});
subplot(2,2,3);
multiplot(W');
title('(b)')
ylabel('k');
xlabel('frequency bin');
yticks([0.5 2.5]);
yticklabels({'1','2'})
subplot(2,2,4);
multiplot(h);
title('(c)');
xlabel('time (sec)');
ylabel('k');
xticks(20:20:180);
xticklabels({'0.4','0.8','1.2','1.6','2.0','2.4','2.8','3.2','3,6'});
yticks([0.5 2.5]);
yticklabels({'1','2'})
%%%
function [X] = computeERB(x,fs)
% Code for computing ERB T/F representation
% Adapted from code by Emmanuel Vincent:
% http://homepages.loria.fr/evincent/software/multipitch_tracking.m
% Initialize
nbfreq=250;
% Reading WAV file
%[x,fs]=wavread(wavfile);
x=resample(x,22050,fs).';
fs = 22050;
[I,T]=size(x);
wlen=2^nextpow2(.02*fs); %20 ms window length
N=ceil(T/wlen);
% Computing ERBT coefficients and frequency scale
X=zeros(nbfreq,N,I);
for i=1:I,
[X(:,:,i),f]=erbtm(x(i,:),fs,nbfreq,wlen);
end
X=(sum(X.^2,3)+1e-18).^.5;
fmin=f(1);
fmax=f(nbfreq);
%emin=9.26*log(.00437*fmin+1); emax=9.26*log(.00437*fmax+1);
%e=(0:nbfreq-1)*(emax-emin)/(nbfreq-1)+emin;
%a=.5*(nbfreq-1)/(emax-emin)*9.26*.00437*fs*exp(-e/9.26)-.5;
%alen=2*round(a)+1;
%f=f/fs;
end
function [X,f]=erbtm(x,fs,F,wlen)
% ERBTM Magnitude ERB Transform using a Hann window.
%
% [X,f]=erbtp(x,fs,F,wlen)
%
% Inputs:
% x: 1 x T vector containing a single-channel signal
% fs: sampling frequency in Hz
% F: number of frequency bins (the ratio between the bandwidth of each bin
% and the frequency difference between successive bins is constant)
% wlen: number of samples per frame (must be a multiple of the largest
% downsampling factor, typically a large power of 2)
%
% Output:
% X: F x N matrix containing the time-frequency magnitude (amplitude) coefficients
% f: F x 1 vector containing the center frequency of each frequency bin
%%% Errors and warnings %%%
if nargin<4, error('Not enough input arguments.'); end
[I,T]=size(x);
if I>1, error('The input signal must be a row vector.'); end
N=ceil(T/wlen);
%%% Computing ERBT coefficients %%%
x=hilbert(x);
X=zeros(F,N);
% Determining minimum and maximum frequency
fmax=.5*fs; fmin=0;
for j=1:100,
emin=9.26*log(.00437*fmin+1);
emax=9.26*log(.00437*fmax+1);
fmin=1.5*(emax-emin)/(F-1)/9.26/.00437*exp(emin/9.26);
fmax=.5*fs-1.5*(emax-emin)/(F-1)/9.26/.00437*exp(emax/9.26);
if (fmax < 0) || (fmin > .5*fs), error('The number of frequency bins is too small.'); end
end
% Determining frequency and window length scales
emin=9.26*log(.00437*fmin+1);
emax=9.26*log(.00437*fmax+1);
e=(0:F-1)*(emax-emin)/(F-1)+emin;
f=(exp(e/9.26)-1)/.00437;
a=.5*(F-1)/(emax-emin)*9.26*.00437*fs*exp(-e/9.26)-.5;
% Determining dyadic downsampling bins (for fast computation)
fup=f+1.5*fs./(2*a+1);
subs=-log(2*fup/fs)/log(2);
subs=2.^max(0,floor(min(log2(wlen),subs)));
if (wlen/subs(1) ~= floor(wlen/subs(1))), error(['The number of samples per frame must be a multiple of ' int2str(subs(1)) '.']); end
down=(subs~=[subs(2:end),1]);
for bin=F:-1:1,
% Dyadic downsampling
if down(bin),
x=resample(x,1,2,50);
end
% Convolution with a modulated sine window
hwlen=round(a(bin)/subs(bin));
win=hanning(2*hwlen+1).'.*exp(2i*pi*f(bin)/fs*subs(bin)*(-hwlen:hwlen));
band=[fftfilt(win,[x,zeros(1,2*hwlen)]) zeros(1,wlen/subs(bin))];
% Square-root energy on short time frames
band=band(hwlen+1:hwlen+N*wlen/subs(bin));
X(bin,:)=sqrt(sum(reshape(abs(band).^2,wlen/subs(bin),N),1)/(hwlen+1)^2*subs(bin));
end
return;
end
%%%
function multiplot( x, sz)
% Plot multiple vectors into one compact plot
% Paris Smaragdis, 2007, paris@media.mit.edu
% Cellify
if ~iscell( x)
for i = 1:size( x, 1)
X{i} = x(i,:);
end
x = X;
end
n = length( x);
% Default arrangement
if ~exist( 'sz')
if n > 15
sz = ceil(sqrt( length( x))) * [1 1];
else
sz = [n 1];
end
end
% Find proper scaling
mx = 0;
for i = 1:n
mx = max( mx, max( abs( x{i})));
end
% Plot them
cla reset
l = length( x{1});
for i = 1:n
r = mod( i-1, sz(1));
c = floor( (i-1)/sz(1));
line( c*1.1*l + (1:l), 2*r + x{i}/max( abs(x{i})))
end
axis tight
end