00001 /* 00002 * Copyright 2006-2007 Columbia University. 00003 * 00004 * This file is part of MEAPsoft. 00005 * 00006 * MEAPsoft is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License version 2 as 00008 * published by the Free Software Foundation. 00009 * 00010 * MEAPsoft is distributed in the hope that it will be useful, but 00011 * WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 * General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with MEAPsoft; if not, write to the Free Software 00017 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 00018 * 02110-1301 USA 00019 * 00020 * See the file "COPYING" for the text of the license. 00021 */ 00022 00023 package com.meapsoft; 00024 00025 import java.util.Arrays; 00026 00027 import com.meapsoft.featextractors.*; 00028 00029 /* 00030 * Onset detector based on Dan Ellis' beattrack.m. 00031 * 00032 * Get an envelope that indicates the onsets. Here, we take a dB mel 00033 * spectrogram, sum it across frequency, then take first order 00034 * difference (and maybe smooth the result). 00035 * 00036 * @author Dan Ellis ([email protected]) 00037 * @author Ron Weiss ([email protected]) 00038 */ 00039 public class DpweOnsetDetector extends OnsetDetector 00040 { 00041 // Onset detection function computed from the STFT 00042 protected double[] onsetFunction; 00043 // number of frames in onsetFunction 00044 private long numFrames; 00045 // silence threshold in dB 00046 protected double silenceThresh = -40; 00047 // gain applied to the median filter threshold in checkOnsets 00048 private double threshMult = 1; 00049 00050 private AvgMelSpec melSpec; 00051 private double[] lastMelFrame = null; 00052 00053 00054 private double smtime = 0.10; 00055 static final int sr = 32000; 00056 static final int swin = 1024; 00057 static final int nmel = 40; 00058 static final double sgsrate = sr/(swin/2); 00059 00060 // dpwe debug 2006-12-11 00061 //static final int nDebugFrames = 1500; 00062 //private double[][] melFrames = new double[nDebugFrames][nmel]; 00063 //private int nMelFrames = 0; 00064 00065 //private double[] boost; 00066 00067 //private double[][] specgram; 00068 00069 public DpweOnsetDetector(STFT stft, long numFrames, double thresh) 00070 { 00071 super(stft, 0, 0); 00072 00073 threshMult = thresh; 00074 00075 onsetFunction = new double[(int)numFrames]; 00076 this.numFrames = numFrames; 00077 melSpec = new AvgMelSpec(swin/2+1, sr, nmel); 00078 00079 // low frequency boost for bass drum 00080 //double[] freqs = DSP.times(DSP.range(0,swin/2), sr/swin); 00081 // double[] lfboost = DSP.times(DSP.log10(DSP.max(DSP.plus(DSP.rdivide(DSP.minus(200, freqs), 200.0), 1), 1)), 10.0); 00082 // // high frequency boost for noisy drums 00083 // //double[] hfboost = DSP.times(DSP.log10(DSP.max(DSP.plus(DSP.rdivide(DSP.minus(freqs, 6000), 6000), 1), 1)), 10.0); 00084 // double[] hfboost = new double[lfboost.length]; 00085 //for(int x = 0; x < hfboost.length; x++) 00086 //{ 00087 // if(freqs[x] < 6000) 00088 // hfboost[x] = 0; 00089 // else 00090 // hfboost[x] = 3; 00091 //} 00092 //boost = DSP.plus(lfboost, hfboost); 00093 00094 //DSP.imagesc(boost); 00095 00096 //specgram = new double[(int)numFrames][stft.getColumns()]; 00097 } 00098 00099 public DpweOnsetDetector(STFT stft, long numFrames, double thresh, double smt) 00100 { 00101 this(stft, numFrames, thresh); 00102 00103 smtime = smt; 00104 } 00105 00111 public void newFrame(STFT stft, long newestFrame) 00112 { 00113 if(newestFrame <= numFrames && newestFrame != -1) 00114 { 00115 // apply frequency weights 00116 //double[] D = stft.getFrame(newestFrame); 00117 //stft.setFrame(newestFrame, DSP.plus(D, boost)); 00118 00119 //if(newestFrame < specgram.length) 00120 // specgram[(int)newestFrame] = stft.getFrame(newestFrame); 00121 00122 double[] melFrame = melSpec.features(stft, newestFrame, 1); 00123 00124 // threshold: 00125 melFrame = DSP.max(melFrame, silenceThresh); 00126 00127 // dpwe debug 2006-12-11 00128 // if (nMelFrames < nDebugFrames) { 00129 // for (int j = 0; j < nmel; ++j) { 00130 // melFrames[nMelFrames][j] = melFrame[j]; 00131 // } 00132 // ++nMelFrames; 00133 // if (nMelFrames == nDebugFrames) { 00134 // DSP.imagesc(melFrames, "melFrames"); 00135 // } 00136 // } 00137 00138 // is this the first frame we've seen? 00139 if(lastMelFrame == null) 00140 { 00141 lastMelFrame = melFrame; 00142 return; 00143 } 00144 00145 long currFrame = newestFrame-1; 00146 onsetFunction[(int)currFrame] = 00147 DSP.mean(DSP.max(DSP.minus(melFrame, lastMelFrame), 0)); 00148 00149 lastMelFrame = melFrame; 00150 } 00151 else 00152 { 00153 // if this is the last frame in stft, do some smoothing and 00154 // find local maxes. 00155 checkOnsets(); 00156 } 00157 } 00158 00159 protected void checkOnsets() 00160 { 00161 //DSP.imagesc(specgram, "specgram"); 00162 //DSP.imagesc(onsetFunction, "onset function"); 00163 00164 //DSP.wavwrite(DSP.rdivide(onsetFunction, DSP.max(onsetFunction)), (int)sgsrate, "onsetFunction.wav"); 00165 00166 // smooth like crazy 00167 int winLen = (int)(smtime*sgsrate); 00168 // make it odd 00169 winLen = (int)Math.round((winLen-1)/2)*2 + 1; 00170 double[] smwin = DSP.hanning(winLen); 00171 smwin = DSP.times(smwin, 1/DSP.sum(smwin)); 00172 onsetFunction = DSP.conv(smwin, onsetFunction); 00173 onsetFunction = DSP.slice(onsetFunction, (int)(winLen-1)/2, (int)(winLen-1)/2+(int)numFrames-1); 00174 00175 // remove DC 00176 double[] b = {1, -1}; 00177 double[] a = {1, -0.99}; 00178 onsetFunction = DSP.filter(b, a, onsetFunction); 00179 00180 onsetFunction = DSP.max(onsetFunction, 0); 00181 00182 // normalize 00183 //onsetFunction = DSP.times(onsetFunction, 1/DSP.max(onsetFunction)); 00184 00185 // find local maxima in onsetFunction 00186 //double[] threshFunc = new double[onsetFunction.length]; 00187 double dcThresh = 0.005; 00188 int nwin = 50; 00189 for(int fr = 1; fr < onsetFunction.length-1; fr++) 00190 { 00191 // threshold using a median filter over 50 point window: 00192 double thresh = dcThresh + threshMult 00193 * DSP.median(DSP.slice(onsetFunction, fr > nwin/2 ? fr-nwin/2 : 0, 00194 fr > onsetFunction.length - nwin/2 ? onsetFunction.length : fr+nwin/2-1)); 00195 00196 //threshFunc[fr] = thresh; 00197 00198 if(onsetFunction[fr] > thresh 00199 && onsetFunction[fr] > onsetFunction[fr-1] 00200 && onsetFunction[fr] > onsetFunction[fr+1]) 00201 notifyListeners(fr, 0); 00202 } 00203 00204 //System.out.println(threshMult); 00205 00206 //DSP.imagesc(onsetFunction, "smoothed onsetFunc"); 00207 //DSP.imagesc(threshFunc, "thresh"); 00208 //int len = (int)Math.min(20000, onsetFunction.length); 00209 //double[][] d = new double[2][len]; 00210 //d[1] = DSP.slice(onsetFunction, 0, len); 00211 //d[0] = DSP.slice(threshFunc, 0, len); 00212 //DSP.imagesc(DSP.transpose(d), "onset function and threshold"); 00213 00214 //DSP.wavwrite(onsetFunction, (int)sgsrate, "smoothedOnsetFunction.wav"); 00215 //DSP.wavwrite(threshFunc, (int)sgsrate, "threshFunction.wav"); 00216 } 00217 }