Main Page   Packages   Class Hierarchy   Compound List   File List   Compound Members  

AvgFreqSimple.java

00001 /*
00002  *  Copyright 2006-2007 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft.featextractors;
00024 
00025 import java.util.Arrays;
00026 
00027 import com.meapsoft.MEAPUtil;
00028 import com.meapsoft.FeatExtractor;
00029 import com.meapsoft.RingMatrix;
00030 import com.meapsoft.STFT;
00031 
00041 public class AvgFreqSimple extends FeatureExtractor {
00042   // earliest FFT bin to use
00043   static final int FIRSTBAND = 3;
00044 
00045   //double[] pitchWt;
00046   protected double[] linSpec;
00047 
00048   protected double bin2hz;
00049   protected int N;
00050 
00051   // Default constructor 
00052   public AvgFreqSimple() {
00053       this(FeatExtractor.nfft/2+1, FeatExtractor.feSamplingRate);
00054   }
00055 
00056   public AvgFreqSimple(int N, double sampleRate) {
00057       this.N = N;
00058       linSpec = new double[N];
00059 
00060       // Create the weighting profile for choosing the pitch
00061       // Gaussian in log-F space centered on 110Hz with SD of 2 octaves
00062 
00063       bin2hz = sampleRate / (2*(N-1));
00064   }
00065 
00066   public double[] features(STFT stft, long startFrame, int length) {
00067     double[] freq = new double[1];
00068     double[] curFrame;
00069 
00070     boolean recalculateSTFT = stft.getRows() != N;
00071     RingMatrix newstft = null;
00072     if(recalculateSTFT) {
00073         // keep the same number of frames as in stft
00074         newstft = STFT.getSTFT(stft.getSamples(startFrame, startFrame+length), (N-1)*2, stft.nhop);
00075         length = newstft.getColumns();
00076     }    
00077     
00078         //if there are no features we just bail.
00079         if (length == 0)  
00080                 return null;
00081 
00082     double wtdsum = 0;
00083     double sumwts = 0;
00084 
00085     // collect average linear spectrum
00086     Arrays.fill(linSpec, 0);
00087     for(int frame=0; frame<length; frame++) {
00088 
00089         if(!recalculateSTFT)
00090             curFrame = stft.getFrame(startFrame+frame);
00091         else
00092             curFrame = newstft.getColumn(frame);
00093 
00094         if(curFrame != null) {
00095             for(int band=0; band<linSpec.length; band++)
00096                 linSpec[band] += Math.pow(10,curFrame[band]/10) / length;
00097         }
00098     }
00099 
00100     // now pick best peak from linspec
00101 
00102     double pmax = -1;
00103     int maxbin = 0;
00104     for(int band=FIRSTBAND; band<N; band++) {
00105         //double pwr = pitchWt[band]*linSpec[band];
00106         double pwr = linSpec[band];
00107                 if (pwr > pmax) {
00108                     pmax = pwr;
00109                     maxbin = band;
00110                 }
00111     }
00112 
00113     // cubic interpolation
00114     double yz = linSpec[maxbin];
00115     double ym = linSpec[maxbin-1];
00116     double yp = linSpec[maxbin+1];
00117     //treating Y as k(x-x0)^2 + c, we have samples at y(0), y(-1), y(1)
00118     // yz = k x0^2 + c
00119     // yp = k (1-x0)^2 + c  = k + k x0 ^ 2 - 2 k x0 + c
00120     // ym = k (1+x0)^2 + c  = k + k x0 ^ 2 + 2 k x0 + c
00121     double k  = (yp+ym)/2-yz;
00122     double x0 = (ym-yp)/(4*k);
00123     double c  = yz - k*Math.pow(x0,2);
00124     // y = kx^2 +kx0^2 -2kx0.x +c
00125     // dy/dx = 2kx - 2kx0 = 0 when x = x0
00126 
00127         //System.out.println("bin2hz: " + bin2hz + " maxbin: " + maxbin + " x0: " + x0);
00128     //pitch[0] = hz2octs(bin2hz * (maxbin + x0));
00129         freq[0] = bin2hz * (maxbin + x0);
00130     return freq;
00131   }
00132 
00133   public String description()
00134   {
00135           return "Provides a frequency estimation for each segment of sound.";
00136   }
00137 }

Generated on Tue Feb 6 19:02:24 2007 for MEAPsoft by doxygen1.2.18