Main Page   Class Hierarchy   Compound List   File List   Compound Members  

AvgPitchSimple.java

00001 /*
00002  *  Copyright 2006 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft.featextractors;
00024 
00025 import java.util.Arrays;
00026 
00027 import com.meapsoft.MEAPUtil;
00028 import com.meapsoft.RingMatrix;
00029 import com.meapsoft.STFT;
00030 
00041 public class AvgPitchSimple extends FeatureExtractor {
00042     
00043     // earliest FFT bin to use
00044 static final int FIRSTBAND = 3;
00045 
00046     double[] pitchWt;
00047     double[] linSpec;
00048 
00049     double bin2hz;
00050     int N;
00051 
00052 // #define CHROMA_LOG2 (0.69314718055995)
00053 
00054   public double hz2octs(double fq) {
00055       return Math.log(fq / 440.0)/0.69314718055995;
00056   }
00057 
00058   // Default constructor 
00059   public AvgPitchSimple() {
00060       //this(129, 44100.0);
00061       //this(MEAPUtil.frameSize/2+1, MEAPUtil.samplingRate);
00062       this(1025, MEAPUtil.samplingRate);
00063   }
00064 
00065   public AvgPitchSimple(int N, double sampleRate) {
00066       this.N = N;
00067       linSpec = new double[N];
00068       pitchWt = new double[N];
00069 
00070       // Create the weighting profile for choosing the pitch
00071       // Gaussian in log-F space centered on 110Hz with SD of 2 octaves
00072 
00073       bin2hz = sampleRate / (2*(N-1));
00074 
00075       for (int i = FIRSTBAND; i < N; ++i) {
00076           pitchWt[i] = Math.exp(-0.5 * Math.pow((hz2octs(bin2hz*i)+2)/2, 2));
00077       }
00078   }
00079 
00080   public double[] features(STFT stft, long startFrame, int length) {
00081     double[] pitch = new double[1];
00082     double[] curFrame;
00083 
00084     boolean recalculateSTFT = stft.getRows() != N;
00085     RingMatrix newstft = null;
00086     if(recalculateSTFT) {
00087         // keep the same number of frames as in stft
00088         //use STFT -- it's a static method!
00089         newstft = STFT.getSTFT(stft.getSamples(startFrame, startFrame+length), (N-1)*2, MEAPUtil.frameSize);
00090         length = newstft.getColumns();
00091     }    
00092 
00093     double wtdsum = 0;
00094     double sumwts = 0;
00095 
00096     // collect average linear spectrum
00097     Arrays.fill(linSpec, 0);
00098     for(int frame=0; frame<length; frame++) {
00099 
00100         if(!recalculateSTFT)
00101             curFrame = stft.getFrame(startFrame+frame);
00102         else
00103             curFrame = newstft.getColumn(frame);
00104 
00105         if(curFrame != null) {
00106             for(int band=0; band<linSpec.length; band++)
00107                 linSpec[band] += Math.pow(10,curFrame[band]/10) / length;
00108         }
00109     }
00110 
00111     // now pick best peak from linspec
00112 
00113     double pmax = 0;
00114     int maxbin = 0;
00115     for(int band=FIRSTBAND; band<N; band++) {
00116         double pwr = pitchWt[band]*linSpec[band];
00117         if (pwr > pmax) {
00118             pmax = pwr;
00119             maxbin = band;
00120         }
00121     }
00122 
00123     // cubic interpolation
00124     double yz = linSpec[maxbin];
00125     double ym = linSpec[maxbin-1];
00126     double yp = linSpec[maxbin+1];
00127     //treating Y as k(x-x0)^2 + c, we have samples at y(0), y(-1), y(1)
00128     // yz = k x0^2 + c
00129     // yp = k (1-x0)^2 + c  = k + k x0 ^ 2 - 2 k x0 + c
00130     // ym = k (1+x0)^2 + c  = k + k x0 ^ 2 + 2 k x0 + c
00131     double k  = (yp+ym)/2-yz;
00132     double x0 = (ym-yp)/(4*k);
00133     double c  = yz - k*Math.pow(x0,2);
00134     // y = kx^2 +kx0^2 -2kx0.x +c
00135     // dy/dx = 2kx - 2kx0 = 0 when x = x0
00136 
00137     pitch[0] = hz2octs(bin2hz * (maxbin + x0));
00138 
00139     return pitch;
00140   }
00141 
00142   public String description()
00143   {
00144           return "Provides a pitch estimation for each segment of sound.";
00145   }
00146 }

Generated on Thu May 11 15:04:10 2006 for MEAPsoft by doxygen1.2.18