Main Page   Class Hierarchy   Compound List   File List   Compound Members  

AvgMelSpec.java

00001 /*
00002  *  Copyright 2006 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft.featextractors;
00024 
00025 import java.util.Arrays;
00026 
00027 import com.meapsoft.MEAPUtil;
00028 import com.meapsoft.STFT;
00029 
00036 public class AvgMelSpec extends FeatureExtractor {
00037         
00038   double[] centersMel;
00039   int[] centersInd;
00040   double[] freq2mel;
00041   double[] linSpec;
00042   int N, outDim;
00043 
00044   public double lin2mel(double fq) {
00045     return 1127.0 * Math.log(1.0 + fq / 700.0);
00046   }
00047 
00048   public double mel2lin(double mel) {
00049     return 700.0 * (Math.exp(mel / 1127.0) - 1.0);
00050   }
00051 
00052   // Default constructor - Use 40 mel spaced bins - get sampling rate
00053   // and frameSize from MEAPUtil
00054   public AvgMelSpec() {
00055       this(MEAPUtil.frameSize/2+1, 40, MEAPUtil.samplingRate);
00056   }
00057 
00058   public AvgMelSpec(int N, int outDim, double sampleRate) {
00059     this.N = N;
00060     this.outDim = outDim;
00061     linSpec = new double[N];
00062 
00063     // Calculate the locations of the bin centers on the mel scale and
00064     // as indices into the input vector
00065     centersMel = new double[outDim+2];
00066     centersInd = new int[outDim+2];
00067     double melMin = lin2mel(0);
00068     double melMax = lin2mel(1 / (2*sampleRate));
00069     for(int i=0; i<outDim+2; i++) {
00070       centersMel[i] = melMin + i * (melMax - melMin) / (outDim + 1);
00071       
00072       // Indices in inBuf at the smallest integer greater than the mel
00073       // triangle peaks
00074       centersInd[i] = (int) (mel2lin(centersMel[i])*N*sampleRate*2 + 1);
00075     }
00076 
00077     // precalculate mel translations of fft bin frequencies
00078     freq2mel = new double[N];
00079     for(int i=0; i<N; i++)
00080       freq2mel[i] = lin2mel(i / (2*N * sampleRate));
00081   }
00082 
00083 
00084   public double[] features(STFT stft, long startFrame, int length) {
00085     double[] melSpec = new double[outDim];
00086     double[] curFrame;
00087     double sum = 0;
00088 
00089     // intialize average to 0
00090     Arrays.fill(linSpec, 0);
00091 
00092     // collect average linear spectrum
00093     for(int frame=0; frame<length; frame++) {
00094       curFrame = stft.getFrame(startFrame+frame);
00095       for(int band=0; band<linSpec.length; band++)
00096         linSpec[band] += curFrame[band] / length;
00097     }
00098 
00099     // convert log magnitude to linear magnitude for binning
00100     for(int band=0; band<linSpec.length; band++)
00101       //linSpec[band] = Math.exp(linSpec[band]);
00102       linSpec[band] = Math.pow(10,linSpec[band]/10);
00103 
00104     // convert to mel scale
00105     for(int bin=0; bin<outDim; bin++) {
00106       // initialize
00107       melSpec[bin] = 0;
00108       
00109       // ramp up
00110       for(int i=centersInd[bin]; i<centersInd[bin+1]; i++) 
00111         melSpec[bin] += linSpec[i] * (freq2mel[i] - centersMel[bin]) 
00112           / (centersMel[bin+1] - centersMel[bin]);
00113       
00114       // ramp down
00115       for(int i = centersInd[bin+1]; i < centersInd[bin+2]; i++)
00116         melSpec[bin] += linSpec[i] * (centersMel[bin+2] - freq2mel[i]) 
00117           / (centersMel[bin+2] - centersMel[bin+1]);
00118 
00119       // Take log
00120       melSpec[bin] = 10*Math.log(melSpec[bin]) / Math.log(10);
00121 
00122       sum += melSpec[bin];
00123     }
00124 
00125     // Audio scrubber takes care of normalization, level is a good cue
00126 //     for(int bin=0; bin<outDim; bin++)
00127 //       melSpec[bin] = melSpec[bin] / sum;
00128     
00129     return melSpec;
00130   }
00131 
00132   public String description()
00133   {
00134           return "Computes the mean spectrum of a chunk and converts it to the perceptually weighted Mel frequency scale";
00135   }
00136 
00137 }

Generated on Thu May 11 15:04:10 2006 for MEAPsoft by doxygen1.2.18