Main Page   Class Hierarchy   Compound List   File List   Compound Members  

STFT.java

00001 /*
00002  *  Copyright 2006 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft;
00024 
00025 import java.io.IOException;
00026 import java.util.ArrayList;
00027 
00028 import javax.sound.sampled.AudioFormat;
00029 import javax.sound.sampled.AudioInputStream;
00030 
00038 public class STFT {
00039   AudioInputStream input;
00040   AudioFormat format;
00041   int bytesPerWavFrame, frameLen;
00042   ArrayList listeners = new ArrayList();
00043   double[] re, im, window;
00044   static double log10 = Math.log(10);
00045   static double epsilon = 1e-9;       // avoid log of zero
00046   RingMatrix freq, time;
00047   FFT fft;
00048 
00049   static double rmsTarget = 0.08;
00050   static double rmsAlpha = 0.001;
00051   double rms = 1;
00052 
00053   public float samplingRate = 44100;  
00054 
00055   // The line should be open, but not started yet.
00056   public STFT(AudioInputStream input, int frameLen, int history) {
00057     freq = new RingMatrix(frameLen/2+1, history);
00058     time = new RingMatrix(frameLen, history);
00059     this.frameLen = frameLen;
00060 
00061     this.input = input;
00062     format = input.getFormat();
00063     bytesPerWavFrame = format.getFrameSize();
00064 
00065     samplingRate = format.getSampleRate();
00066 
00067     fft = new FFT(frameLen);
00068 
00069     this.re = new double[frameLen];
00070     this.im = new double[frameLen];
00071     this.window = fft.getWindow();
00072     for(int i=0; i<im.length; i++)
00073       im[i] = 0;
00074   }
00075 
00076   public void start() {
00077       byte[] b = new byte[bytesPerWavFrame * frameLen];
00078       
00079       int bytesRead = 22;
00080       while(bytesRead > 0) {
00081           try {
00082               bytesRead = input.read(b);
00083           } catch(IOException ioe) {
00084               ioe.printStackTrace();
00085               return;
00086           }
00087           //System.out.println(bytesRead + " bytes read");
00088           //System.out.print(".");
00089           
00090           // store the unwindowed waveform for getSamples function
00091           double[] wav = time.checkOutColumn();
00092           bytes2doubles(b, wav);
00093           
00094           // Normalize rms using a moving average estimate of it
00095           // Calculate current rms
00096           double rmsCur = 0;
00097           for(int i=0; i<wav.length; i++)
00098               rmsCur += wav[i]*wav[i];
00099           rmsCur = Math.sqrt(rmsCur / wav.length);
00100           
00101           // update moving average
00102           rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00103           
00104           // normalize by rms
00105           for(int i=0; i<wav.length; i++)
00106               wav[i] = wav[i] * rmsTarget / rms;
00107           
00108           time.checkInColumn(wav);
00109           
00110           // window waveform
00111           for(int i=0; i<wav.length; i++)
00112               re[i] = window[i] * wav[i];
00113           
00114           // take fft
00115           fft.fft(re, im);
00116           
00117           // Calculate magnitude
00118           double[] mag = freq.checkOutColumn();
00119           for(int i=0; i<mag.length; i++)
00120               //mag[i] = re[i]*re[i] + im[i]*im[i];
00121               mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00122           
00123           // clear im[]
00124           for(int i=0; i<im.length; i++)
00125               im[i] = 0;
00126           
00127           // Tell everyone concerned that we've added another frame
00128           long frAddr = freq.checkInColumn(mag);
00129           notifyListeners(frAddr);
00130       }
00131   }
00132     
00133     
00134   // Get the waveform samples from frames frStart to frEnd-1
00135   public double[] getSamples(long frStart, long frEnd) {
00136     long sampStart = fr2Samp(frStart);
00137     long sampEnd = fr2Samp(frEnd);
00138 
00139 //     System.out.println("sampStart=" + sampStart + ", sampEnd=" + sampEnd);
00140 //     System.out.println("frStart=" + frStart + ", frEnd=" + frEnd);
00141 
00142     double[] x = new double[(int)(sampEnd - sampStart)];
00143 
00144     for(int fr=0; fr < frEnd-frStart; fr++) {
00145       double[] frame = time.getColumn(frStart+fr);
00146       if(frame == null) continue;
00147       for(int i=0; i<frameLen; i++)
00148         x[(int)(fr2Samp(fr+frStart)-fr2Samp(frStart) + i)] = frame[i];
00149     }
00150 
00151     return x;
00152   }
00153   
00154   // Convert an address in frames into an address in samples
00155   public long fr2Samp(long frAddr) {
00156     // frames do not overlap, so calculation is easy
00157     return frameLen * frAddr;
00158   }
00159 
00160   // Convert an address in samples into an address in frames
00161   public long samp2fr(long sampAddr) {
00162     return sampAddr/frameLen;
00163   }
00164 
00165   public double[] getFrame(long frAddr) { return freq.getColumn(frAddr); }
00166   public int getColumns() { return freq.getColumns(); }
00167   public int getRows() { return freq.getRows(); }
00168 
00169   // Dealing with FrameListeners
00170   public void addFrameListener(FrameListener fl) {
00171     listeners.add(fl);
00172   }
00173   public void removeFrameListener(FrameListener fl) {
00174     listeners.remove(fl);
00175   }
00176   public void notifyListeners(long frAddr) {
00177     for(int i=0; i<listeners.size(); i++) {
00178       FrameListener list = (FrameListener) listeners.get(i);
00179       list.newFrame(this, frAddr);
00180     }
00181   }
00182 
00183 
00184   // Convert a byte stream into a stream of doubles.  If it's stereo,
00185   // the channels will be interleaved with each other in the double
00186   // stream, as in the byte stream.
00187   public void bytes2doubles(byte[] audioBytes, double[] audioData) {
00188     if (format.getSampleSizeInBits() == 16) {
00189       if (format.isBigEndian()) {
00190         for (int i = 0; i < audioData.length; i++) {
00191           /* First byte is MSB (high order) */
00192           int MSB = (int) audioBytes[2*i];
00193           /* Second byte is LSB (low order) */
00194           int LSB = (int) audioBytes[2*i+1];
00195           audioData[i] = ((double)(MSB << 8 | (255 & LSB))) 
00196             / 32768.0;
00197         }
00198       } else {
00199         for (int i = 0; i < audioData.length; i++) {
00200           /* First byte is LSB (low order) */
00201           int LSB = (int) audioBytes[2*i];
00202           /* Second byte is MSB (high order) */
00203           int MSB = (int) audioBytes[2*i+1];
00204           audioData[i] = ((double)(MSB << 8 | (255 & LSB))) 
00205             / 32768.0;
00206         }
00207       }
00208     } else if (format.getSampleSizeInBits() == 8) {
00209       int nlengthInSamples = audioBytes.length;
00210       if (format.getEncoding().toString().startsWith("PCM_SIGN")) {
00211         for (int i = 0; i < audioBytes.length; i++) {
00212           audioData[i] = audioBytes[i] / 128.0;
00213         }
00214       } else {
00215         for (int i = 0; i < audioBytes.length; i++) {
00216           audioData[i] = (audioBytes[i] - 128) / 128.0;
00217         }
00218       }
00219     }
00220   }
00221 
00222   // Convert an address in frames to an address in seconds
00223   public double fr2Seconds(long frAddr)
00224   {
00225       return(fr2Samp(frAddr)/samplingRate);
00226   }  
00227 
00228   // Convert an address in seconds to an address in frames
00229   public long seconds2fr(double sec)
00230   {
00231       return(samp2fr((long)(sec*samplingRate)));
00232   }  
00233 
00234 
00238   public static RingMatrix getSTFT(double[] samples, int nfft)
00239   {
00240       return STFT.getSTFT(samples, nfft, nfft);
00241   }
00242 
00246   public static RingMatrix getSTFT(double[] samples, int nfft, int nhop)
00247   {
00248       RingMatrix freq = new RingMatrix(nfft/2+1, samples.length/nhop);
00249 
00250       FFT fft = new FFT(nfft);
00251       double[] window = fft.getWindow();
00252 
00253       double[] wav = new double[nfft];
00254       double rms = 1;
00255       for(int currFrame = 0; currFrame < samples.length/nhop; currFrame++)
00256       {
00257           // zero pad if we run out of samples:
00258           int zeroPadLen = currFrame*nhop + wav.length - samples.length;
00259           if(zeroPadLen < 0)
00260               zeroPadLen = 0;
00261           int wavLen = wav.length - zeroPadLen;
00262           
00263           //for(int i = 0; i<wav.length; i++)
00264           //    wav[i] = samples[currFrame*nhop + i];
00265           for(int i = 0; i < wavLen; i++)
00266               wav[i] = samples[currFrame*nhop + i];
00267           for(int i = wavLen; i < wav.length; i++)
00268               wav[i] = 0;
00269 
00270           // Normalize rms using a moving average estimate of it
00271           // Calculate current rms
00272           double rmsCur = 0;
00273           for(int i=0; i<wav.length; i++)
00274               rmsCur += wav[i]*wav[i];
00275           rmsCur = Math.sqrt(rmsCur / wav.length);
00276       
00277           // update moving average
00278           rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00279           
00280           // normalize by rms
00281           for(int i=0; i<wav.length; i++)
00282               wav[i] = wav[i] * rmsTarget / rms;
00283       
00284           // window waveform
00285           double[] re = new double[wav.length];
00286           double[] im = new double[wav.length];
00287           for(int i=0; i<wav.length; i++)
00288           {
00289               re[i] = window[i] * wav[i];
00290               im[i] = 0;
00291           }
00292 
00293           // take fft
00294           fft.fft(re, im);
00295           
00296           // Calculate magnitude
00297           double[] mag = freq.checkOutColumn();
00298           for(int i=0; i<mag.length; i++)
00299               mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00300 
00301           freq.checkInColumn(mag);
00302       }  
00303 
00304       return freq;
00305   }
00306 }

Generated on Thu May 11 15:04:11 2006 for MEAPsoft by doxygen1.2.18