Main Page   Packages   Class Hierarchy   Compound List   File List   Compound Members  

AvgPitchSimple.java

00001 /*
00002  *  Copyright 2006-2007 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft.featextractors;
00024 
00025 import java.util.Arrays;
00026 
00027 import com.meapsoft.MEAPUtil;
00028 import com.meapsoft.FeatExtractor;
00029 import com.meapsoft.RingMatrix;
00030 import com.meapsoft.STFT;
00031 
00042 public class AvgPitchSimple extends FeatureExtractor {
00043     
00044   // earliest FFT bin to use
00045   protected static final int FIRSTBAND = 3;
00046 
00047   protected double[] pitchWt;
00048   protected double[] linSpec;
00049 
00050   protected double bin2hz;
00051   protected int N;
00052 
00053 // #define CHROMA_LOG2 (0.69314718055995)
00054 
00055         //27.5 is A0, our lowest pitch
00056         //notes above A0 will be positive, below will be negative. A0 will be 0
00057   public double hz2octs(double fq) {
00058       if (fq <= 0) 
00059           return 0;
00060 
00061       return Math.log(fq / 27.5)/0.69314718055995;
00062   }
00063 
00064   // Default constructor 
00065   public AvgPitchSimple() {
00066        this(FeatExtractor.nfft/2+1, FeatExtractor.feSamplingRate);
00067   }
00068 
00069   public AvgPitchSimple(int N, double sampleRate) {
00070       this.N = N;
00071       linSpec = new double[N];
00072       pitchWt = new double[N];
00073 
00074       // Create the weighting profile for choosing the pitch
00075       // Gaussian in log-F space centered on 110Hz with SD of 2 octaves
00076                 //we have no idea what's going on here
00077                 //dan says "put minus four in there"
00078                 //-4 makes us center on A440
00079       bin2hz = sampleRate / (2*(N-1));
00080 
00081       for (int i = FIRSTBAND; i < N; ++i) {
00082           pitchWt[i] = Math.exp(-0.5 * Math.pow((hz2octs(bin2hz*i)-4)/2, 2));
00083       }
00084   }
00085 
00086   public double[] features(STFT stft, long startFrame, int length) {
00087     double[] pitch = new double[1];
00088     double[] curFrame;
00089 
00090     boolean recalculateSTFT = stft.getRows() != N;
00091     RingMatrix newstft = null;
00092     if(recalculateSTFT) {
00093         // keep the same number of frames as in stft
00094         newstft = STFT.getSTFT(stft.getSamples(startFrame, startFrame+length), (N-1)*2, stft.nhop);
00095         length = newstft.getColumns();
00096     }    
00097 
00098         //if there are no features we just bail.
00099         if (length == 0)  
00100                 return null;
00101         
00102     double wtdsum = 0;
00103     double sumwts = 0;
00104 
00105     // collect average linear spectrum
00106     Arrays.fill(linSpec, 0);
00107     for(int frame=0; frame<length; frame++) {
00108 
00109         if(!recalculateSTFT)
00110             curFrame = stft.getFrame(startFrame+frame);
00111         else
00112             curFrame = newstft.getColumn(frame);
00113 
00114         if(curFrame != null) {
00115             for(int band=0; band<linSpec.length; band++)
00116                 linSpec[band] += Math.pow(10,curFrame[band]/10) / length;
00117         }
00118     }
00119 
00120     // now pick best peak from linspec
00121 
00122     double pmax = -1;
00123     int maxbin = 0;
00124     for(int band=FIRSTBAND; band<N; band++) {
00125         double pwr = pitchWt[band]*linSpec[band];
00126                 if (pwr > pmax) {
00127                     pmax = pwr;
00128                     maxbin = band;
00129                 }
00130     }
00131 
00132     // cubic interpolation
00133     double yz = linSpec[maxbin];
00134     double ym = linSpec[maxbin-1];
00135     double yp = linSpec[maxbin+1];
00136     //treating Y as k(x-x0)^2 + c, we have samples at y(0), y(-1), y(1)
00137     // yz = k x0^2 + c
00138     // yp = k (1-x0)^2 + c  = k + k x0 ^ 2 - 2 k x0 + c
00139     // ym = k (1+x0)^2 + c  = k + k x0 ^ 2 + 2 k x0 + c
00140     double k  = (yp+ym)/2-yz;
00141     //this "fix" seems to affect too many segments!
00142         //if (k < 0.001) 
00143                 //k = 0.001;
00144 
00145     double x0 = (ym-yp)/(4*k);
00146     double c  = yz - k*Math.pow(x0,2);
00147     // y = kx^2 +kx0^2 -2kx0.x +c
00148     // dy/dx = 2kx - 2kx0 = 0 when x = x0
00149 
00150         //System.out.println("bin2hz: " + bin2hz + " maxbin: " + maxbin + " x0: " + x0);
00151     pitch[0] = hz2octs(bin2hz * (maxbin + x0));
00152 
00153     return pitch;
00154   }
00155 
00156   public String description()
00157   {
00158           return "Provides a pitch estimation for each segment of sound.";
00159   }
00160 }

Generated on Tue Feb 6 19:02:24 2007 for MEAPsoft by doxygen1.2.18