% 2001-02-27-lpc.diary (REVISED 2001-06-01)
% Examples of using LPC functions
[d,r]=wavread('mpgr1_sx419.wav');
% Downsample to 8 kHz
d = resample(d,1,2);
r = r/2;
soundsc(d,r);
% 8th-order LPC modeling
[a,g,e] = lpcfit(d,8);
% Noise-excited resynthesis
rd = lpcsynth(a,g);
% Listen
soundsc(rd)
% Compare spectrograms
subplot(211)
specgram(d,256,r);
subplot(212)
specgram(rd,256,r);
% Clearly the same broad shape, but without pitch structure.

% Residual-excited resynthesis
rde = lpcsynth(a,g,e);
soundsc(rde)
% Perceptually identical to original
% What is the excitation signal like?
% It should contain everything *not* in the noise-excited reconstruction
soundsc(e)
% Weird, flat power in time and frequency, still intelligible!
specgram(e,256,r);
% Interesting that formants still visible, resulting from LPC 
% modeling mismatch (slight)

% Compare original waveform with residual
plot(d)
% Choose a portion straddling fricative-vowel in "football"
subplot(311)
plot(d(2000+[1:1000]))
subplot(312)
plot(rde(2000+[1:1000]))
% residual-excited resynth is not *quite* perfect
% (due to overlap/framing effects)
subplot(313)
plot(e(2000+[1:1000]))
% Residual is basically noise during fricative & silence,
% clearly shows pitch pulses during voicing

% Let's just check the model during this vowel
% Frames occur every 128 samples, so middle of vowel (sample 2500)
% is around frame 20...  Look at the gain (energy) terms to 
% find peak
>> g(17:23)
ans =
    0.0025
    0.0012
    0.0076
    0.0115
    0.0069
    0.0007
    0.0000
% g(20) is the peak.  Let's see the spec: reconstruct the polynomial
a20 = a(20,:);
g20 = g(20);
% see the filter response
freqz(g20, a20);
% Compare to spectral slice at that point
% Include pre-emphasis in spectrogram, to match actual modeled signal
sg=specgram(filter([1 -0.9], 1, d),256,r);
subplot(212)
plot([0:128]/128, 20*log10(abs(sg(:,20))))
% Similar, but not quite the same
size(a)
ans =
   191     9
size(sg)
ans =
   129   190
% They are using slightly different framing (191 vs 190 frames), 
% so spectral slices won't quite line up.
axis([0 1 -40 0])
% You can see it's more or less the same overall shape.
% Well, that's what the model gives us.


% (Crude) sinewave modeling

help extractrax
  [T,M] = extractrax(S,H,VERB)  Extract tracks from a 2-d t-f style array
        S is a matrix of values, which is searched from left to right 
        looking for local maxima in each column, then tracking their 
        evolution.  T is returned with one row for each track that 
        is found, with nonzero values indicating the continuously-
        valued  row (i.e. frequency) of the track in the original 
        matrix.  H is a threshold; only peaks > H*maxx(S) at startup 
        are tracked. H defaults to 0.01.
        Rows of T are sorted by column, then by freq, of their first 
        value. VERB=1 for progress printouts.
 	M returns the interpolated peak magnitude for each track point.
  1998may02 dpwe@icsi.berkeley.edu $Header: $ how many times?

specgram(filter([1 -0.9], 1, d),256,r);
sg=specgram(filter([1 -0.9], 1, d),256,r);
[F,M] = extractrax(abs(sg));
% trax extended to 100 rows
% col 18 (l=8) tk 1: large step of 18.028889 at 23.859004, breaking track
% ...
% trax extended to 1050 rows
help plottrax
  plottrax(T,S,colr,tag,tkcolr,tt,ff)   Plot spectrogram-derived tracks
     T is an array of freq-bin-indexes defining tracks (from
     extractrax); S is the underlying (linear) spectrogram;
     Display the tracks over the spectrogram.
     If S is empty, assume sgram already there.
     If clr is specified, it's a color spec for the outline.
     If tag is specified, label the group with it.
     If tkcolr is given, label the tracks with it.
     tt and ff are arguments for the time and frequency axes of the plot, 
     default to simple enumeration.
  1998may17 dpwe@icsi.berkeley.edu
subplot(111)
plottrax(F,abs(sg))
gcolor
help synthtrax
  X = synthtrax(F, M, SR, SUBF, DUR)      Reconstruct a sound from track rep'n.
 	Each row of F and M contains a series of frequency and magnitude 
 	samples for a particular track.  These will be remodulated and 
 	overlaid into the output sound X which will run at sample rate SR, 
 	although the columns in F and M are subsampled from that rate by 
 	a factor SUBF.  If DUR is nonzero, X will be padded or truncated 
  	to correspond to just this much time.
  dpwe@icsi.berkeley.edu 1994aug20, 1996aug22

% F from extractrax is in bin numbers, 1-129; have to convert to Hz
% for synthtrax...
rsw = synthtrax((F-1)/128*4000,M, 8000, 128);
specgram(rsw,256,r)
soundsc(rsw)
soundsc(d)
% Ugly!  But does something
% Check the extracted frequencies during the first vowel to see 
% how close to harmonic they are
F35 = (F(:,35)-1)*4000/128;
% Filter out the 'NaN' values, for the rows describing tracks not 
% active at this time step
% (find(FSok) returns indices of nonzero vals, so selects non-NaN vals)
FSok = 1-isnan(F35);
F35 = sort(F35(find(FSok)));
>> size(F35)
ans =
    10     1
>> F35'
ans =
   1.0e+03 *
  Columns 1 through 7 
    0.1162    0.2340    0.3508    0.4677    0.5843    0.8221    0.9349
  Columns 8 through 10 
    1.1688    2.8042    3.2727
% Not bad.  5 * 116.2 is 581; extracted peak is 584.3 or 0.6% error.
% Still, that's noticable.  Worse problem is ignored peaks.

