Main Page   Packages   Class Hierarchy   Compound List   File List   Compound Members  

DpweBeatOnsetDetector.java

00001 /*
00002  *  Copyright 2006-2007 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft;
00024 
00025 import java.util.Arrays;
00026 import java.util.Vector;
00027 
00028 import com.meapsoft.featextractors.*;
00029 
00030 /*
00031  * Beat detector based on Dan Ellis' beattrack.m.
00032  *
00033  * 3 stages:
00034  *
00035  * 1. Get an envelope that indicates the onsets
00036  *    Here, we take a dB mel spectrogram, sum it across frequency, 
00037  *    then take first order difference (and maybe smooth the result).
00038  *
00039  * 2. Estimate the global tempo
00040  *    We autocorrelate the entire onset envelope and choose the best 
00041  *    peak in the acceptable range (around 100-250 bpm).
00042  * 
00043  * 3. Go through the onset envelope choosing the best set of times 
00044  *    that both lie near a lot of maxima in the envelope and are 
00045  *    spaced apart by the global tempo period.  We do this with 
00046  *    dynamic programming, choosing a best predecessor for every beat,
00047  *    then finally tracing back the beat that has the best total 
00048  *    score at the end.
00049  *
00050  * @author Dan Ellis (dpwe@ee.columbia.edu)
00051  * @author Ron Weiss (ronw@ee.columbia.edu)
00052  */
00053 public class DpweBeatOnsetDetector extends DpweOnsetDetector
00054 {
00055     private static final int acmax = 128;
00056 
00057     // stabilize beat over this many periods each way 
00058     // - more = smoother, should be integer
00059     private static final int stabwin = 2; 
00060 
00061     // submultiple of the detected tactus = 1/mult
00062     private double divisor = 1;
00063 
00064     // DP search constants
00065     private double tightness = 6.0;     // strictness to tempo 
00066     private double alpha = 0.9;         // how much history is weighted vs. local events
00067 
00068     // no thresholds here
00069     public DpweBeatOnsetDetector(STFT stft, long numFrames)
00070     {
00071         super(stft, numFrames, 0); 
00072     }
00073 
00074     public DpweBeatOnsetDetector(STFT stft, long numFrames, double mult)
00075     {
00076         this(stft, numFrames);
00077 
00078         divisor = 1/mult;
00079     }
00080 
00081     protected void checkOnsets()
00082     {
00083         double[] mm = onsetFunction;
00084         
00085         //DSP.imagesc(DSP.slice(mm, 0, 10000), "mm");
00086 
00087         // remove DC
00088         double[] b = {1, -1};
00089         double[] a = {1, -0.99};
00090         double[] fmm = DSP.filter(b, a, mm); 
00091 
00092         //DSP.imagesc(DSP.slice(fmm, 0, 10000), "fmm");
00093 
00094         double[] xfmm = DSP.xcorr(fmm, fmm, acmax);
00095 
00096         // find local max in the global ac
00097         xfmm = DSP.slice(xfmm, acmax, 2*acmax);
00098         byte[] xpks = localmax(xfmm);
00099         
00100 
00101         // will not include 'edge peak' at first index, but make sure
00102         xpks[0] = 0;
00103 
00104         // delete all peaks that occur before first point below zero)
00105 //         for(int x = 0; x < xfmm.length; x++)   
00106 //             if(xfmm[x] < 0) 
00107 //                 break;
00108 //             else 
00109 //                 xpks[x] = 0;
00110         
00111         // largest local max after first neg pts
00112         double maxpk = DSP.max(DSP.subsref(xfmm, xpks));
00113 
00114         //DSP.imagesc(xfmm, "xfmm");
00115         //DSP.imagesc(DSP.times(xfmm, DSP.todouble(xpks)), "xpks");
00116 
00117         // then period is shortest period with a peak that approaches
00118         // the max 
00119         int startpd = 0;
00120         int pd = 0;
00121         for(int x = 0; x < xfmm.length; x++)
00122         {
00123             if(xpks[x] == 1)
00124             {
00125                 if(xfmm[x] > 0.5*maxpk)
00126                 {
00127                     startpd = x;
00128                     break;
00129                 }
00130             }
00131         }
00132 
00133         // apply divisor
00134         startpd = (int)(divisor*startpd);
00135         // should be pretty stable
00136         pd = startpd;
00137 
00138 
00139         //% Smooth beat events
00140         //templt = exp(-0.5*(([-pd:pd]/(pd/32)).^2));
00141         double[] templt = DSP.exp(DSP.times(DSP.power(DSP.times(DSP.range(-pd,pd),32.0/pd),2.0),-0.5));
00142         //localscore = conv(templt,onsetenv);
00143         //localscore = localscore(round(length(templt)/2)+[1:length(onsetenv)]);
00144         double[] localscore = DSP.conv(templt,fmm);
00145         localscore = DSP.slice(localscore, templt.length/2, templt.length/2+fmm.length-1);
00146         //DSP.imagesc(localscore, "localscore");
00147 
00148         //% DP version:
00149         //% backlink(time) is index of best preceding time for this point
00150         //% cumscore(time) is total cumulated score to this point
00151 
00152         //backlink = zeros(1,length(localscore));
00153         //cumscore = zeros(1,length(localscore));
00154 
00155         int[] backlink = new int[localscore.length];
00156         double[] cumscore = new double[localscore.length];
00157 
00158         //% search range for previous beat
00159         //prange = round(-2*pd):-round(pd/2);
00160         int prangemin = -2*pd;
00161         int prangemax = -pd/2;
00162 
00163         //% Skewed window
00164         //txwt = exp(-0.5*((tightness*log(prange/-pd)).^2));
00165         double[] txwt = DSP.exp(DSP.times(DSP.power(DSP.times(DSP.log(DSP.times(DSP.range(prangemin,prangemax),-1.0/pd)),tightness),2.0), -0.5));
00166 
00167         //starting = 1;
00168         int starting = 1;
00169         double maxlocalscore = DSP.max(localscore);
00170 
00171         //for i = 1:length(localscore)
00172         for(int i = 0; i < localscore.length; ++i) {
00173   
00174             //timerange = i + prange;
00175             double[] scorecands = new double[txwt.length];
00176             double[] valvals;
00177 
00178             //% Are we reaching back before time zero?
00179             //zpad = max(0, min(1-timerange(1),length(prange)));
00180             if(i+prangemin < 0) {
00181                 int valpts = 0;
00182                 for(int j = 0; j < scorecands.length; ++j) scorecands[j]=0;
00183                 if(i+prangemax >=0) {
00184                     valpts = i + prangemax + 1;
00185                     valvals = DSP.times(DSP.slice(txwt,txwt.length-valpts,txwt.length-1),DSP.slice(cumscore,i+prangemax-valpts+1,i+prangemax));
00186                     for(int j = 0; j < valpts; ++j) scorecands[scorecands.length-valpts+j] = valvals[j];
00187                 }
00188             } else {
00189             //% Search over all possible predecessors and apply transition 
00190             //% weighting
00191             //scorecands = txwt .* [zeros(1,zpad),cumscore(timerange(zpad+1:end))];
00192                 scorecands = DSP.times(txwt,DSP.slice(cumscore,i+prangemin,i+prangemax));
00193             }
00194 
00195             //% Find best predecessor beat
00196             //[vv,xx] = max(scorecands);
00197             double vv = DSP.max(scorecands);
00198             int xx = DSP.argmax(scorecands);
00199             //% Add on local score
00200             //cumscore(i) = alpha*vv + (1-alpha)*localscore(i);
00201             cumscore[i] = alpha*vv + (1-alpha)*localscore[i];
00202 
00203             // % special case to catch first onset
00204             //if starting == 1 & localscore(i) < 0.01*max(localscore);
00205             if ( starting == 1 && localscore[i] < 0.01*maxlocalscore) {
00206                 //backlink(i) = -1;
00207                 backlink[i] = -1;
00208             } else {
00209                 //backlink(i) = timerange(xx);
00210                 backlink[i] = i+prangemin+xx;
00211                 //% prevent it from resetting, even through a stretch of silence
00212                 starting = 0;
00213             }
00214         }
00215 
00216         //%%%% Backtrace
00217 
00218         //% Cumulated score is stabilized to lie in constant range, 
00219         //% so just look for one near the end that has a reasonable score
00220         //medscore = median(cumscore(localmax(cumscore)));
00221         double medscore = DSP.median(DSP.subsref(cumscore, localmax(cumscore)));
00222         //bestendx = max(find(cumscore .* localmax(cumscore) > 0.5*medscore));
00223         int bestendx = 0;
00224         int jj = cumscore.length-2;
00225         while(jj > 0 && bestendx == 0) {
00226             if (cumscore[jj] > cumscore[jj-1] && cumscore[jj] >= cumscore[jj+1] && cumscore[jj] > 0.5*medscore) {
00227                 bestendx = jj;
00228             }
00229             --jj;
00230         }
00231 
00232         //b = bestendx;
00233 
00234         int nbeats = 0;
00235         int[] tmplinks = new int[cumscore.length];
00236         tmplinks[0] = bestendx;
00237         //while backlink(b(end)) > 0
00238         int bb;
00239         while ((bb = backlink[tmplinks[nbeats]]) > 0) {
00240             //b = [b,backlink(b(end))];
00241             ++nbeats;
00242             tmplinks[nbeats] = bb;
00243         }
00244 
00245         //b = fliplr(b);
00246         ++nbeats;
00247 
00248         //% return beat times in secs
00249         //b = b / sgsrate;
00250 
00251         for (int j = 0; j < nbeats; ++j) {
00252             int thisbeat = tmplinks[nbeats-1-j];
00253             //System.out.println("thisbeat = "+thisbeat+" nextbeat = "+nextbeat+" pd = "+pd);
00254             //if(cb < fmmxs.length)
00255             //    fmmxs[cb] = fmmx;
00256             //if(cb < xcrs.length)
00257             //    xcrs[cb++] = xcr;
00258 
00259             notifyListeners(thisbeat, 0);
00260         }
00261 
00262         //DSP.imagesc(DSP.transpose(fmmxs), "fmmxs");
00263         //DSP.imagesc(templt), "templt");
00264         //DSP.imagesc(DSP.transpose(xcrs), "xcrs");
00265     }
00266 
00267     // Find local maxima in function a.  Returns a binary array with
00268     // 1's where a has a local maximum.
00269     protected byte[] localmax(double[] a)
00270     {   
00271         byte[] v = new byte[a.length];
00272         Arrays.fill(v, (byte)0);
00273 
00274         for(int fr = 1; fr < a.length-1; fr++)
00275             if(a[fr] > a[fr-1] && a[fr] > a[fr+1]) 
00276                 v[fr] = 1;
00277 
00278         return v;
00279     }
00280 }

Generated on Tue Feb 6 19:02:25 2007 for MEAPsoft by doxygen1.2.18