00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 package com.meapsoft.featextractors;
00024
00025 import java.util.Arrays;
00026
00027 import com.meapsoft.MEAPUtil;
00028 import com.meapsoft.FeatExtractor;
00029 import com.meapsoft.RingMatrix;
00030 import com.meapsoft.STFT;
00031
00042 public class AvgPitchSimple extends FeatureExtractor {
00043
00044
00045 protected static final int FIRSTBAND = 3;
00046
00047 protected double[] pitchWt;
00048 protected double[] linSpec;
00049
00050 protected double bin2hz;
00051 protected int N;
00052
00053
00054
00055
00056
00057 public double hz2octs(double fq) {
00058 if (fq <= 0)
00059 return 0;
00060
00061 return Math.log(fq / 27.5)/0.69314718055995;
00062 }
00063
00064
00065 public AvgPitchSimple() {
00066 this(FeatExtractor.nfft/2+1, FeatExtractor.feSamplingRate);
00067 }
00068
00069 public AvgPitchSimple(int N, double sampleRate) {
00070 this.N = N;
00071 linSpec = new double[N];
00072 pitchWt = new double[N];
00073
00074
00075
00076
00077
00078
00079 bin2hz = sampleRate / (2*(N-1));
00080
00081 for (int i = FIRSTBAND; i < N; ++i) {
00082 pitchWt[i] = Math.exp(-0.5 * Math.pow((hz2octs(bin2hz*i)-4)/2, 2));
00083 }
00084 }
00085
00086 public double[] features(STFT stft, long startFrame, int length) {
00087 double[] pitch = new double[1];
00088 double[] curFrame;
00089
00090 boolean recalculateSTFT = stft.getRows() != N;
00091 RingMatrix newstft = null;
00092 if(recalculateSTFT) {
00093
00094 newstft = STFT.getSTFT(stft.getSamples(startFrame, startFrame+length), (N-1)*2, stft.nhop);
00095 length = newstft.getColumns();
00096 }
00097
00098
00099 if (length == 0)
00100 return null;
00101
00102 double wtdsum = 0;
00103 double sumwts = 0;
00104
00105
00106 Arrays.fill(linSpec, 0);
00107 for(int frame=0; frame<length; frame++) {
00108
00109 if(!recalculateSTFT)
00110 curFrame = stft.getFrame(startFrame+frame);
00111 else
00112 curFrame = newstft.getColumn(frame);
00113
00114 if(curFrame != null) {
00115 for(int band=0; band<linSpec.length; band++)
00116 linSpec[band] += Math.pow(10,curFrame[band]/10) / length;
00117 }
00118 }
00119
00120
00121
00122 double pmax = -1;
00123 int maxbin = 0;
00124 for(int band=FIRSTBAND; band<N; band++) {
00125 double pwr = pitchWt[band]*linSpec[band];
00126 if (pwr > pmax) {
00127 pmax = pwr;
00128 maxbin = band;
00129 }
00130 }
00131
00132
00133 double yz = linSpec[maxbin];
00134 double ym = linSpec[maxbin-1];
00135 double yp = linSpec[maxbin+1];
00136
00137
00138
00139
00140 double k = (yp+ym)/2-yz;
00141
00142
00143
00144
00145 double x0 = (ym-yp)/(4*k);
00146 double c = yz - k*Math.pow(x0,2);
00147
00148
00149
00150
00151 pitch[0] = hz2octs(bin2hz * (maxbin + x0));
00152
00153 return pitch;
00154 }
00155
00156 public String description()
00157 {
00158 return "Provides a pitch estimation for each segment of sound.";
00159 }
00160 }