Main Page   Packages   Class Hierarchy   Compound List   File List   Compound Members  

AvgMelSpec.java

00001 
00002 /*
00003  *  Copyright 2006-2007 Columbia University.
00004  *
00005  *  This file is part of MEAPsoft.
00006  *
00007  *  MEAPsoft is free software; you can redistribute it and/or modify
00008  *  it under the terms of the GNU General Public License version 2 as
00009  *  published by the Free Software Foundation.
00010  *
00011  *  MEAPsoft is distributed in the hope that it will be useful, but
00012  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *  General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License
00017  *  along with MEAPsoft; if not, write to the Free Software
00018  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00019  *  02110-1301 USA
00020  *
00021  *  See the file "COPYING" for the text of the license.
00022  */
00023 
00024 package com.meapsoft.featextractors;
00025 
00026 import java.util.Arrays;
00027 
00028 import com.meapsoft.FeatExtractor;
00029 import com.meapsoft.STFT;
00030 import com.meapsoft.RingMatrix;
00031 
00038 public class AvgMelSpec extends FeatureExtractor {
00039 
00040     // for each mel bin...
00041     protected double[] melCenter;   // actual targe mel value at center of this bin
00042     protected double[] melWidth;    // mel width divisor for this bin (constant, except broadens in low bins)
00043     // for each fft bin
00044     protected double[] melOfLin;
00045     protected double[] linSpec;
00046 
00047     protected int N;
00048     protected int outDim;
00049 
00050   public double lin2mel(double fq) {
00051     return 1127.0 * Math.log(1.0 + fq / 700.0);
00052   }
00053 
00054   public double mel2lin(double mel) {
00055     return 700.0 * (Math.exp(mel / 1127.0) - 1.0);
00056   }
00057 
00058   // Default constructor - Use 40 mel spaced bins 
00059   public AvgMelSpec() {
00060       this(FeatExtractor.nfft/2+1, FeatExtractor.feSamplingRate, 40);
00061   }
00062 
00063   public AvgMelSpec(int N, float sampleRate, int outDim) {
00064     this.N = N;
00065     this.outDim = outDim;
00066     linSpec = new double[N];
00067 
00068     // Calculate the locations of the bin centers on the mel scale and
00069     // as indices into the input vector
00070     melCenter = new double[outDim+2];
00071     melWidth = new double[outDim+2];
00072 
00073     double melMin = lin2mel(0);
00074     //double melMax = lin2mel(sampleRate/2);
00075     double melMax = lin2mel((8000.0 < sampleRate/2)? 8000.0 : sampleRate/2); // dpwe 2006-12-11 - hard maximum
00076     double hzPerBin = sampleRate/2/N;
00077     for(int i=0; i<outDim+2; i++) {
00078       melCenter[i] = melMin + i * (melMax - melMin) / (outDim + 1);
00079       // System.out.println("centersMel["+i+"]="+centersMel[i]+" centersInd[]="+centersInd[i]);
00080     }
00081     for(int i=0; i<outDim+1; i++) {
00082         melWidth[i] = melCenter[i+1]-melCenter[i];
00083         double linbinwidth = (mel2lin(melCenter[i+1])-mel2lin(melCenter[i]))/hzPerBin;
00084         if (linbinwidth < 1) {
00085             melWidth[i] = lin2mel(mel2lin(melCenter[i])+hzPerBin) - melCenter[i];
00086         }
00087         //System.out.println("melBin="+i+" melCenter="+melCenter[i]+" melWidth="+melWidth[i]+"("+mel2lin(melCenter[i]-melWidth[i])/hzPerBin+".."+mel2lin(melCenter[i])/hzPerBin+".."+mel2lin(melCenter[i]+melWidth[i])/hzPerBin);
00088     }
00089     // precalculate mel translations of fft bin frequencies
00090     melOfLin = new double[N];
00091     for(int i=0; i<N; i++) {
00092       melOfLin[i] = lin2mel(i * sampleRate / (2*N));
00093       //      System.out.println("linbin2Mel["+i+"]="+linbin2mel[i]);
00094     }
00095   }
00096 
00097 
00098   public double[] features(STFT stft, long startFrame, int length) {
00099     double[] melSpec = new double[outDim];
00100     double[] curFrame;
00101     double sum = 0;
00102 
00103     // we're expecting a certain frequency resolution...
00104     boolean recalculateSTFT = stft.getRows() != N;
00105     RingMatrix newstft = null;
00106     if(recalculateSTFT) {
00107         // keep the same number of frames as in stft
00108         newstft = STFT.getSTFT(stft.getSamples(startFrame, startFrame+length), (N-1)*2, stft.nhop);
00109         length = newstft.getColumns();
00110     }    
00111 
00112     // intialize average to 0
00113     Arrays.fill(linSpec, 0);
00114 
00115     // collect average linear spectrum
00116     for(int frame=0; frame<length; frame++) {
00117         if(!recalculateSTFT)
00118             curFrame = stft.getFrame(startFrame+frame);
00119         else
00120             curFrame = newstft.getColumn(frame);
00121 
00122         for(int band=0; band<linSpec.length; band++)
00123             linSpec[band] += curFrame[band] / length;
00124     }
00125     
00126     // convert log magnitude to linear magnitude for binning
00127     for(int band=0; band<linSpec.length; band++)
00128         //linSpec[band] = Math.exp(linSpec[band]);
00129         linSpec[band] = Math.pow(10,linSpec[band]/10);
00130     
00131     // convert to mel scale
00132     for(int bin=0; bin<outDim; bin++) {
00133         // initialize
00134         melSpec[bin] = 0;
00135         
00136         for(int i = 0; i < linSpec.length; ++i) {
00137             double weight = 1.0 - (Math.abs(melOfLin[i] - melCenter[bin])/melWidth[bin]);
00138             if (weight > 0) {
00139                 melSpec[bin] += weight * linSpec[i];
00140             }
00141         }
00142 
00143         // Take log
00144         melSpec[bin] = 10*Math.log(melSpec[bin]) / Math.log(10);
00145         
00146         sum += melSpec[bin];
00147     }
00148     
00149     // Audio scrubber takes care of normalization, level is a good cue
00150     //     for(int bin=0; bin<outDim; bin++)
00151     //       melSpec[bin] = melSpec[bin] / sum;
00152     
00153     return melSpec;
00154   }
00155 
00156   public String description()
00157   {
00158           return "Computes the mean spectrum of a chunk and converts it to the perceptually weighted Mel frequency scale.";
00159   }
00160 }

Generated on Tue Feb 6 19:02:24 2007 for MEAPsoft by doxygen1.2.18