Main Page   Packages   Class Hierarchy   Compound List   File List   Compound Members  

DpweOnsetDetector.java

00001 /*
00002  *  Copyright 2006-2007 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft;
00024 
00025 import java.util.Arrays;
00026 
00027 import com.meapsoft.featextractors.*;
00028 
00029 /*
00030  * Onset detector based on Dan Ellis' beattrack.m.
00031  *
00032  * Get an envelope that indicates the onsets. Here, we take a dB mel
00033  * spectrogram, sum it across frequency, then take first order
00034  * difference (and maybe smooth the result).
00035  *
00036  * @author Dan Ellis (dpwe@ee.columbia.edu)
00037  * @author Ron Weiss (ronw@ee.columbia.edu)
00038  */
00039 public class DpweOnsetDetector extends OnsetDetector
00040 {
00041     // Onset detection function computed from the STFT
00042     protected double[] onsetFunction;
00043     // number of frames in onsetFunction
00044     private long numFrames;
00045     // silence threshold in dB
00046     protected double silenceThresh = -40; 
00047     // gain applied to the median filter threshold in checkOnsets
00048     private double threshMult = 1;
00049 
00050     private AvgMelSpec melSpec;
00051     private double[] lastMelFrame = null;
00052 
00053 
00054     private double smtime = 0.10;
00055     static final int sr = 32000;
00056     static final int swin = 1024;
00057     static final int nmel = 40;
00058     static final double sgsrate = sr/(swin/2);
00059 
00060     // dpwe debug 2006-12-11
00061     //static final int nDebugFrames = 1500;
00062     //private double[][] melFrames = new double[nDebugFrames][nmel];
00063     //private int nMelFrames = 0;
00064 
00065     //private double[] boost;
00066     
00067     //private double[][] specgram;
00068 
00069     public DpweOnsetDetector(STFT stft, long numFrames, double thresh)
00070     {
00071         super(stft, 0, 0);
00072 
00073         threshMult = thresh;
00074 
00075         onsetFunction = new double[(int)numFrames];
00076         this.numFrames = numFrames;
00077         melSpec = new AvgMelSpec(swin/2+1, sr, nmel);
00078 
00079         // low frequency boost for bass drum
00080         //double[] freqs = DSP.times(DSP.range(0,swin/2), sr/swin);
00081         //        double[] lfboost = DSP.times(DSP.log10(DSP.max(DSP.plus(DSP.rdivide(DSP.minus(200, freqs), 200.0), 1), 1)), 10.0);
00082         // // high frequency boost for noisy drums
00083         // //double[] hfboost = DSP.times(DSP.log10(DSP.max(DSP.plus(DSP.rdivide(DSP.minus(freqs, 6000), 6000), 1), 1)), 10.0);
00084         // double[] hfboost = new double[lfboost.length];
00085         //for(int x = 0; x < hfboost.length; x++)
00086         //{
00087         //    if(freqs[x] < 6000)
00088         //        hfboost[x] = 0;
00089         //    else
00090         //        hfboost[x] = 3;
00091         //}
00092         //boost = DSP.plus(lfboost, hfboost);
00093 
00094         //DSP.imagesc(boost);
00095 
00096         //specgram = new double[(int)numFrames][stft.getColumns()]; 
00097     }
00098 
00099     public DpweOnsetDetector(STFT stft, long numFrames, double thresh, double smt)
00100     {
00101         this(stft, numFrames, thresh);
00102 
00103         smtime = smt;
00104     }    
00105 
00111     public void newFrame(STFT stft, long newestFrame) 
00112     {
00113         if(newestFrame <= numFrames && newestFrame != -1)
00114         {
00115             // apply frequency weights
00116             //double[] D = stft.getFrame(newestFrame);
00117             //stft.setFrame(newestFrame, DSP.plus(D, boost)); 
00118 
00119             //if(newestFrame < specgram.length)
00120             //    specgram[(int)newestFrame] = stft.getFrame(newestFrame);
00121 
00122             double[] melFrame = melSpec.features(stft, newestFrame, 1);
00123 
00124             // threshold:
00125             melFrame = DSP.max(melFrame, silenceThresh);
00126 
00127             // dpwe debug 2006-12-11
00128 //          if (nMelFrames < nDebugFrames) {
00129 //              for (int j = 0; j < nmel; ++j) {
00130 //                  melFrames[nMelFrames][j] = melFrame[j];
00131 //              }
00132 //              ++nMelFrames;
00133 //              if (nMelFrames == nDebugFrames) {
00134 //                  DSP.imagesc(melFrames, "melFrames");
00135 //              }
00136 //          }
00137             
00138             // is this the first frame we've seen?
00139             if(lastMelFrame == null)
00140             {
00141                 lastMelFrame = melFrame;
00142                 return;
00143             }
00144             
00145             long currFrame = newestFrame-1;
00146             onsetFunction[(int)currFrame] = 
00147                 DSP.mean(DSP.max(DSP.minus(melFrame, lastMelFrame), 0));
00148             
00149             lastMelFrame = melFrame;
00150         }
00151         else
00152         {
00153             // if this is the last frame in stft, do some smoothing and
00154             // find local maxes.
00155             checkOnsets();
00156         }
00157     }
00158 
00159     protected void checkOnsets()
00160     {
00161         //DSP.imagesc(specgram, "specgram");
00162         //DSP.imagesc(onsetFunction, "onset function");
00163         
00164         //DSP.wavwrite(DSP.rdivide(onsetFunction, DSP.max(onsetFunction)), (int)sgsrate, "onsetFunction.wav");
00165 
00166         // smooth like crazy
00167         int winLen = (int)(smtime*sgsrate);
00168         // make it odd
00169         winLen = (int)Math.round((winLen-1)/2)*2 + 1;
00170         double[] smwin = DSP.hanning(winLen);
00171         smwin = DSP.times(smwin, 1/DSP.sum(smwin));
00172         onsetFunction = DSP.conv(smwin, onsetFunction);
00173         onsetFunction = DSP.slice(onsetFunction, (int)(winLen-1)/2, (int)(winLen-1)/2+(int)numFrames-1);
00174         
00175         // remove DC
00176         double[] b = {1, -1};
00177         double[] a = {1, -0.99};
00178         onsetFunction = DSP.filter(b, a, onsetFunction); 
00179         
00180         onsetFunction = DSP.max(onsetFunction, 0);
00181         
00182         // normalize
00183         //onsetFunction = DSP.times(onsetFunction, 1/DSP.max(onsetFunction));
00184         
00185         // find local maxima in onsetFunction
00186         //double[] threshFunc = new double[onsetFunction.length];
00187         double dcThresh = 0.005; 
00188         int nwin = 50;
00189         for(int fr = 1; fr < onsetFunction.length-1; fr++)
00190         {
00191             // threshold using a median filter over 50 point window:
00192             double thresh = dcThresh + threshMult
00193                 * DSP.median(DSP.slice(onsetFunction, fr > nwin/2 ? fr-nwin/2 : 0, 
00194                        fr > onsetFunction.length - nwin/2 ? onsetFunction.length : fr+nwin/2-1));
00195             
00196             //threshFunc[fr] = thresh;
00197             
00198             if(onsetFunction[fr] > thresh
00199                && onsetFunction[fr] > onsetFunction[fr-1]
00200                && onsetFunction[fr] > onsetFunction[fr+1]) 
00201                 notifyListeners(fr, 0);
00202         }
00203         
00204         //System.out.println(threshMult);
00205         
00206         //DSP.imagesc(onsetFunction, "smoothed onsetFunc");
00207         //DSP.imagesc(threshFunc, "thresh");
00208         //int len = (int)Math.min(20000, onsetFunction.length);
00209         //double[][] d = new double[2][len];
00210         //d[1] = DSP.slice(onsetFunction, 0, len);
00211         //d[0] = DSP.slice(threshFunc, 0, len);
00212         //DSP.imagesc(DSP.transpose(d), "onset function and threshold");
00213 
00214         //DSP.wavwrite(onsetFunction, (int)sgsrate, "smoothedOnsetFunction.wav");
00215         //DSP.wavwrite(threshFunc, (int)sgsrate, "threshFunction.wav");
00216     }
00217 }

Generated on Tue Feb 6 19:02:25 2007 for MEAPsoft by doxygen1.2.18