Main Page   Packages   Class Hierarchy   Compound List   File List   Compound Members  

STFT.java

00001 /*
00002  *  Copyright 2006-2007 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft;
00024 
00025 import java.io.IOException;
00026 import java.util.ArrayList;
00027 import java.util.Arrays;
00028 
00029 import javax.sound.sampled.AudioFormat;
00030 import javax.sound.sampled.AudioInputStream;
00031 
00039 public class STFT {
00040   AudioInputStream input;
00041   AudioFormat format;
00042   int bytesPerWavFrame, frameLen;
00043   ArrayList listeners = new ArrayList();
00044   double[] re, im, window;
00045   static double log10 = Math.log(10);
00046   static double epsilon = 1e-9;       // avoid log of zero
00047   RingMatrix freq, time;
00048   FFT fft;
00049 
00050   static double rmsTarget = 0.08;
00051   static double rmsAlpha = 0.001;
00052   double rms = 1;
00053 
00054   public float samplingRate;
00055   public int nhop;
00056 
00057   // The line should be open, but not started yet.
00058   public STFT(AudioInputStream input, int frameLen, int hopsize, int history) {
00059     this(input, frameLen, history);
00060 
00061     nhop = hopsize;
00062   }
00063 
00064   // The line should be open, but not started yet.
00065   public STFT(AudioInputStream input, int frameLen, int history) {
00066     freq = new RingMatrix(frameLen/2+1, history);
00067     time = new RingMatrix(frameLen, history);
00068     this.frameLen = frameLen;
00069 
00070     this.input = input;
00071     format = input.getFormat();
00072     bytesPerWavFrame = format.getFrameSize();
00073 
00074     samplingRate = format.getSampleRate();
00075 
00076     fft = new FFT(frameLen);
00077 
00078     this.re = new double[frameLen];
00079     this.im = new double[frameLen];
00080     this.window = fft.getWindow();
00081     for(int i=0; i<im.length; i++)
00082       im[i] = 0;
00083 
00084     nhop = frameLen;
00085 
00086     samplingRate = input.getFormat().getSampleRate();
00087   }
00088 
00089   // returns the total number of bytes read
00090   public long start() {
00091       byte[] b = new byte[bytesPerWavFrame * frameLen];
00092       Arrays.fill(b, (byte)0);
00093 
00094       int noverlapBytes = (frameLen-nhop)*bytesPerWavFrame;
00095       int nhopBytes = nhop*bytesPerWavFrame;
00096 
00097       int totalBytesRead = 0;
00098       int bytesRead = 22;
00099       while(bytesRead > 0) {
00100           if(nhop > 0)
00101           {
00102               // shift b so our overlap works out
00103               for(int x = 0; x < noverlapBytes; x++)
00104                   b[x] = b[x+nhopBytes];
00105           }
00106 
00107           try {
00108               bytesRead = input.read(b, noverlapBytes, nhopBytes);
00109               totalBytesRead += bytesRead;
00110           } catch(IOException ioe) {
00111               ioe.printStackTrace();
00112               return totalBytesRead;
00113           }
00114           
00115           // store the unwindowed waveform for getSamples function
00116           double[] wav = time.checkOutColumn();
00117           bytes2doubles(b, wav);
00118           
00119           // Normalize rms using a moving average estimate of it
00120           // Calculate current rms
00121           double rmsCur = 0;
00122           for(int i=0; i<wav.length; i++)
00123               rmsCur += wav[i]*wav[i];
00124           rmsCur = Math.sqrt(rmsCur / wav.length);
00125           
00126           // update moving average
00127           rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00128           
00129           // normalize by rms
00130           for(int i=0; i<wav.length; i++)
00131               wav[i] = wav[i] * rmsTarget / rms;
00132           
00133           time.checkInColumn(wav);
00134           
00135           // window waveform
00136           for(int i=0; i<wav.length; i++)
00137               re[i] = window[i] * wav[i];
00138           
00139           // take fft
00140           fft.fft(re, im);
00141           
00142           // Calculate magnitude
00143           double[] mag = freq.checkOutColumn();
00144           for(int i=0; i<mag.length; i++)
00145               mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00146           
00147           // clear im[]
00148           Arrays.fill(im, 0);
00149 
00150           // Tell everyone concerned that we've added another frame
00151           long frAddr = freq.checkInColumn(mag);
00152           notifyListeners(frAddr);
00153       }
00154 
00155       // let the frame listeners know that we're done reading:
00156       notifyListeners(-1);
00157       
00158       return totalBytesRead;
00159   }
00160     
00161 
00162   // Get the waveform samples from frames frStart to frEnd-1
00163   public double[] getSamples(long frStart, long frEnd) {
00164     long sampStart = fr2Samp(frStart);
00165     long sampEnd = fr2Samp(frEnd);
00166 
00167     double[] x = new double[(int)(sampEnd - sampStart)];
00168 
00169     for(int fr=0; fr < frEnd-frStart; fr++) {
00170         double[] frame = time.getColumn(frStart+fr);
00171         if(frame == null) continue;
00172         // only the first nhop samples of frame are valid
00173         for(int i=0; i<nhop; i++)
00174             x[(int)(fr2Samp(fr+frStart)-sampStart + i)] = frame[i];
00175     }
00176 
00177     return x;
00178   }
00179   
00180   // Convert an address in frames into an address in samples
00181   public long fr2Samp(long frAddr) {
00182     return nhop * frAddr;
00183   }
00184 
00185   // Convert an address in samples into an address in frames
00186   public long samp2fr(long sampAddr) {
00187     return sampAddr/nhop;
00188   }
00189 
00190   public double[] getFrame(long frAddr) { return freq.getColumn(frAddr); }
00191   public void setFrame(long frAddr, double[] dat) { freq.setColumn(frAddr, dat); }
00192   public int getColumns() { return freq.getColumns(); }
00193   public int getRows() { return freq.getRows(); }
00194 
00195   // Dealing with FrameListeners
00196   public void addFrameListener(FrameListener fl) {
00197     listeners.add(fl);
00198   }
00199   public void removeFrameListener(FrameListener fl) {
00200     listeners.remove(fl);
00201   }
00202   public void notifyListeners(long frAddr) {
00203     for(int i=0; i<listeners.size(); i++) {
00204       FrameListener list = (FrameListener) listeners.get(i);
00205       list.newFrame(this, frAddr);
00206     }
00207   }
00208 
00209 
00210   // Convert a byte stream into a stream of doubles.  If it's stereo,
00211   // the channels will be interleaved with each other in the double
00212   // stream, as in the byte stream.
00213   public void bytes2doubles(byte[] audioBytes, double[] audioData) {
00214     if (format.getSampleSizeInBits() == 16) {
00215       if (format.isBigEndian()) {
00216         for (int i = 0; i < audioData.length; i++) {
00217           /* First byte is MSB (high order) */
00218           int MSB = (int) audioBytes[2*i];
00219           /* Second byte is LSB (low order) */
00220           int LSB = (int) audioBytes[2*i+1];
00221           audioData[i] = ((double)(MSB << 8 | (255 & LSB))) 
00222             / 32768.0;
00223         }
00224       } else {
00225         for (int i = 0; i < audioData.length; i++) {
00226           /* First byte is LSB (low order) */
00227           int LSB = (int) audioBytes[2*i];
00228           /* Second byte is MSB (high order) */
00229           int MSB = (int) audioBytes[2*i+1];
00230           audioData[i] = ((double)(MSB << 8 | (255 & LSB))) 
00231             / 32768.0;
00232         }
00233       }
00234     } else if (format.getSampleSizeInBits() == 8) {
00235       int nlengthInSamples = audioBytes.length;
00236       if (format.getEncoding().toString().startsWith("PCM_SIGN")) {
00237         for (int i = 0; i < audioBytes.length; i++) {
00238           audioData[i] = audioBytes[i] / 128.0;
00239         }
00240       } else {
00241         for (int i = 0; i < audioBytes.length; i++) {
00242           audioData[i] = (audioBytes[i] - 128) / 128.0;
00243         }
00244       }
00245     }
00246   }
00247 
00248   // Convert an address in frames to an address in seconds
00249   public double fr2Seconds(long frAddr)
00250   {
00251       return(fr2Samp(frAddr)/samplingRate);
00252   }  
00253 
00254   // Convert an address in seconds to an address in frames
00255   public long seconds2fr(double sec)
00256   {
00257       return(samp2fr((long)(sec*samplingRate)));
00258   }  
00259 
00260 
00264   public static RingMatrix getSTFT(double[] samples, int nfft)
00265   {
00266       return STFT.getSTFT(samples, nfft, nfft);
00267   }
00268 
00272   public static RingMatrix getSTFT(double[] samples, int nfft, int nhop)
00273   {
00274       RingMatrix freq = new RingMatrix(nfft/2+1, samples.length/nhop);
00275 
00276       FFT fft = new FFT(nfft);
00277       double[] window = fft.getWindow();
00278 
00279       double[] wav = new double[nfft];
00280       double rms = 1;
00281       for(int currFrame = 0; currFrame < samples.length/nhop; currFrame++)
00282       {
00283           // zero pad if we run out of samples:
00284           int zeroPadLen = currFrame*nhop + wav.length - samples.length;
00285           if(zeroPadLen < 0)
00286               zeroPadLen = 0;
00287           int wavLen = wav.length - zeroPadLen;
00288           
00289           //for(int i = 0; i<wav.length; i++)
00290           //    wav[i] = samples[currFrame*nhop + i];
00291           for(int i = 0; i < wavLen; i++)
00292               wav[i] = samples[currFrame*nhop + i];
00293           for(int i = wavLen; i < wav.length; i++)
00294               wav[i] = 0;
00295 
00296           // Normalize rms using a moving average estimate of it
00297           // Calculate current rms
00298           double rmsCur = 0;
00299           for(int i=0; i<wav.length; i++)
00300               rmsCur += wav[i]*wav[i];
00301           rmsCur = Math.sqrt(rmsCur / wav.length);
00302       
00303           // update moving average
00304           rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00305           
00306           // normalize by rms
00307           for(int i=0; i<wav.length; i++)
00308               wav[i] = wav[i] * rmsTarget / rms;
00309       
00310           // window waveform
00311           double[] re = new double[wav.length];
00312           double[] im = new double[wav.length];
00313           for(int i=0; i<wav.length; i++)
00314           {
00315               re[i] = window[i] * wav[i];
00316               im[i] = 0;
00317           }
00318 
00319           // take fft
00320           fft.fft(re, im);
00321           
00322           // Calculate magnitude
00323           double[] mag = freq.checkOutColumn();
00324           for(int i=0; i<mag.length; i++)
00325               mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00326 
00327           freq.checkInColumn(mag);
00328       }  
00329 
00330       return freq;
00331   }
00332 
00333 
00342   public int readFrames(long nframes) 
00343   {
00344       byte[] b = new byte[bytesPerWavFrame * frameLen];
00345       Arrays.fill(b, (byte)0);
00346 
00347       int noverlapBytes = (frameLen-nhop)*bytesPerWavFrame;
00348       int nhopBytes = nhop*bytesPerWavFrame;
00349 
00350       int bytesRead = 22;
00351       int nFramesRead = 0;
00352       while(nFramesRead <= nframes)
00353       {
00354           if(nhop > 0)
00355           {
00356               // shift b so our overlap works out
00357               for(int x = 0; x < noverlapBytes; x++)
00358                   b[x] = b[x+nhopBytes];
00359           }
00360 
00361           try 
00362           { 
00363               input.read(b, noverlapBytes, nhopBytes); 
00364               nFramesRead++;
00365           }
00366           catch(IOException ioe) 
00367           { 
00368               ioe.printStackTrace(); 
00369               return nFramesRead;
00370           }
00371           
00372           // store the unwindowed waveform for getSamples function
00373           double[] wav = time.checkOutColumn();
00374           bytes2doubles(b, wav);
00375           
00376           // Normalize rms using a moving average estimate of it
00377           // Calculate current rms
00378           double rmsCur = 0;
00379           for(int i=0; i<wav.length; i++)
00380               rmsCur += wav[i]*wav[i];
00381           rmsCur = Math.sqrt(rmsCur / wav.length);
00382           
00383           // update moving average
00384           rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00385           
00386           // normalize by rms
00387           for(int i=0; i<wav.length; i++)
00388               wav[i] = wav[i] * rmsTarget / rms;
00389           
00390           time.checkInColumn(wav);
00391           
00392           // window waveform
00393           for(int i=0; i<wav.length; i++)
00394               re[i] = window[i] * wav[i];
00395           
00396           // take fft
00397           fft.fft(re, im);
00398           
00399           // Calculate magnitude
00400           double[] mag = freq.checkOutColumn();
00401           for(int i=0; i<mag.length; i++)
00402               mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00403           
00404           // clear im[]
00405           Arrays.fill(im, 0);
00406 
00407           // Tell everyone concerned that we've added another frame
00408           long frAddr = freq.checkInColumn(mag);
00409           notifyListeners(frAddr);
00410       }
00411 
00412       return nFramesRead;
00413   }    
00414 
00418   public long getLastFrameAddress()
00419   {
00420       return freq.nextFrAddr-1;
00421   } 
00422 
00423   public void stop() throws IOException
00424   {
00425       input.close();
00426   }
00427 }

Generated on Tue Feb 6 19:02:27 2007 for MEAPsoft by doxygen1.2.18