00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 package com.meapsoft.featextractors;
00024
00025 import java.util.Arrays;
00026
00027 import com.meapsoft.MEAPUtil;
00028 import com.meapsoft.RingMatrix;
00029 import com.meapsoft.STFT;
00030
00041 public class AvgPitchSimple extends FeatureExtractor {
00042
00043
00044 static final int FIRSTBAND = 3;
00045
00046 double[] pitchWt;
00047 double[] linSpec;
00048
00049 double bin2hz;
00050 int N;
00051
00052
00053
00054 public double hz2octs(double fq) {
00055 return Math.log(fq / 440.0)/0.69314718055995;
00056 }
00057
00058
00059 public AvgPitchSimple() {
00060
00061
00062 this(1025, MEAPUtil.samplingRate);
00063 }
00064
00065 public AvgPitchSimple(int N, double sampleRate) {
00066 this.N = N;
00067 linSpec = new double[N];
00068 pitchWt = new double[N];
00069
00070
00071
00072
00073 bin2hz = sampleRate / (2*(N-1));
00074
00075 for (int i = FIRSTBAND; i < N; ++i) {
00076 pitchWt[i] = Math.exp(-0.5 * Math.pow((hz2octs(bin2hz*i)+2)/2, 2));
00077 }
00078 }
00079
00080 public double[] features(STFT stft, long startFrame, int length) {
00081 double[] pitch = new double[1];
00082 double[] curFrame;
00083
00084 boolean recalculateSTFT = stft.getRows() != N;
00085 RingMatrix newstft = null;
00086 if(recalculateSTFT) {
00087
00088
00089 newstft = STFT.getSTFT(stft.getSamples(startFrame, startFrame+length), (N-1)*2, MEAPUtil.frameSize);
00090 length = newstft.getColumns();
00091 }
00092
00093 double wtdsum = 0;
00094 double sumwts = 0;
00095
00096
00097 Arrays.fill(linSpec, 0);
00098 for(int frame=0; frame<length; frame++) {
00099
00100 if(!recalculateSTFT)
00101 curFrame = stft.getFrame(startFrame+frame);
00102 else
00103 curFrame = newstft.getColumn(frame);
00104
00105 if(curFrame != null) {
00106 for(int band=0; band<linSpec.length; band++)
00107 linSpec[band] += Math.pow(10,curFrame[band]/10) / length;
00108 }
00109 }
00110
00111
00112
00113 double pmax = 0;
00114 int maxbin = 0;
00115 for(int band=FIRSTBAND; band<N; band++) {
00116 double pwr = pitchWt[band]*linSpec[band];
00117 if (pwr > pmax) {
00118 pmax = pwr;
00119 maxbin = band;
00120 }
00121 }
00122
00123
00124 double yz = linSpec[maxbin];
00125 double ym = linSpec[maxbin-1];
00126 double yp = linSpec[maxbin+1];
00127
00128
00129
00130
00131 double k = (yp+ym)/2-yz;
00132 double x0 = (ym-yp)/(4*k);
00133 double c = yz - k*Math.pow(x0,2);
00134
00135
00136
00137 pitch[0] = hz2octs(bin2hz * (maxbin + x0));
00138
00139 return pitch;
00140 }
00141
00142 public String description()
00143 {
00144 return "Provides a pitch estimation for each segment of sound.";
00145 }
00146 }