00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 package com.meapsoft;
00024
00025 import java.io.IOException;
00026 import java.util.ArrayList;
00027 import java.util.Arrays;
00028
00029 import javax.sound.sampled.AudioFormat;
00030 import javax.sound.sampled.AudioInputStream;
00031
00039 public class STFT {
00040 AudioInputStream input;
00041 AudioFormat format;
00042 int bytesPerWavFrame, frameLen;
00043 ArrayList listeners = new ArrayList();
00044 double[] re, im, window;
00045 static double log10 = Math.log(10);
00046 static double epsilon = 1e-9;
00047 RingMatrix freq, time;
00048 FFT fft;
00049
00050 static double rmsTarget = 0.08;
00051 static double rmsAlpha = 0.001;
00052 double rms = 1;
00053
00054 public float samplingRate;
00055 public int nhop;
00056
00057
00058 public STFT(AudioInputStream input, int frameLen, int hopsize, int history) {
00059 this(input, frameLen, history);
00060
00061 nhop = hopsize;
00062 }
00063
00064
00065 public STFT(AudioInputStream input, int frameLen, int history) {
00066 freq = new RingMatrix(frameLen/2+1, history);
00067 time = new RingMatrix(frameLen, history);
00068 this.frameLen = frameLen;
00069
00070 this.input = input;
00071 format = input.getFormat();
00072 bytesPerWavFrame = format.getFrameSize();
00073
00074 samplingRate = format.getSampleRate();
00075
00076 fft = new FFT(frameLen);
00077
00078 this.re = new double[frameLen];
00079 this.im = new double[frameLen];
00080 this.window = fft.getWindow();
00081 for(int i=0; i<im.length; i++)
00082 im[i] = 0;
00083
00084 nhop = frameLen;
00085
00086 samplingRate = input.getFormat().getSampleRate();
00087 }
00088
00089
00090 public long start() {
00091 byte[] b = new byte[bytesPerWavFrame * frameLen];
00092 Arrays.fill(b, (byte)0);
00093
00094 int noverlapBytes = (frameLen-nhop)*bytesPerWavFrame;
00095 int nhopBytes = nhop*bytesPerWavFrame;
00096
00097 int totalBytesRead = 0;
00098 int bytesRead = 22;
00099 while(bytesRead > 0) {
00100 if(nhop > 0)
00101 {
00102
00103 for(int x = 0; x < noverlapBytes; x++)
00104 b[x] = b[x+nhopBytes];
00105 }
00106
00107 try {
00108 bytesRead = input.read(b, noverlapBytes, nhopBytes);
00109 totalBytesRead += bytesRead;
00110 } catch(IOException ioe) {
00111 ioe.printStackTrace();
00112 return totalBytesRead;
00113 }
00114
00115
00116 double[] wav = time.checkOutColumn();
00117 bytes2doubles(b, wav);
00118
00119
00120
00121 double rmsCur = 0;
00122 for(int i=0; i<wav.length; i++)
00123 rmsCur += wav[i]*wav[i];
00124 rmsCur = Math.sqrt(rmsCur / wav.length);
00125
00126
00127 rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00128
00129
00130 for(int i=0; i<wav.length; i++)
00131 wav[i] = wav[i] * rmsTarget / rms;
00132
00133 time.checkInColumn(wav);
00134
00135
00136 for(int i=0; i<wav.length; i++)
00137 re[i] = window[i] * wav[i];
00138
00139
00140 fft.fft(re, im);
00141
00142
00143 double[] mag = freq.checkOutColumn();
00144 for(int i=0; i<mag.length; i++)
00145 mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00146
00147
00148 Arrays.fill(im, 0);
00149
00150
00151 long frAddr = freq.checkInColumn(mag);
00152 notifyListeners(frAddr);
00153 }
00154
00155
00156 notifyListeners(-1);
00157
00158 return totalBytesRead;
00159 }
00160
00161
00162
00163 public double[] getSamples(long frStart, long frEnd) {
00164 long sampStart = fr2Samp(frStart);
00165 long sampEnd = fr2Samp(frEnd);
00166
00167 double[] x = new double[(int)(sampEnd - sampStart)];
00168
00169 for(int fr=0; fr < frEnd-frStart; fr++) {
00170 double[] frame = time.getColumn(frStart+fr);
00171 if(frame == null) continue;
00172
00173 for(int i=0; i<nhop; i++)
00174 x[(int)(fr2Samp(fr+frStart)-sampStart + i)] = frame[i];
00175 }
00176
00177 return x;
00178 }
00179
00180
00181 public long fr2Samp(long frAddr) {
00182 return nhop * frAddr;
00183 }
00184
00185
00186 public long samp2fr(long sampAddr) {
00187 return sampAddr/nhop;
00188 }
00189
00190 public double[] getFrame(long frAddr) { return freq.getColumn(frAddr); }
00191 public void setFrame(long frAddr, double[] dat) { freq.setColumn(frAddr, dat); }
00192 public int getColumns() { return freq.getColumns(); }
00193 public int getRows() { return freq.getRows(); }
00194
00195
00196 public void addFrameListener(FrameListener fl) {
00197 listeners.add(fl);
00198 }
00199 public void removeFrameListener(FrameListener fl) {
00200 listeners.remove(fl);
00201 }
00202 public void notifyListeners(long frAddr) {
00203 for(int i=0; i<listeners.size(); i++) {
00204 FrameListener list = (FrameListener) listeners.get(i);
00205 list.newFrame(this, frAddr);
00206 }
00207 }
00208
00209
00210
00211
00212
00213 public void bytes2doubles(byte[] audioBytes, double[] audioData) {
00214 if (format.getSampleSizeInBits() == 16) {
00215 if (format.isBigEndian()) {
00216 for (int i = 0; i < audioData.length; i++) {
00217
00218 int MSB = (int) audioBytes[2*i];
00219
00220 int LSB = (int) audioBytes[2*i+1];
00221 audioData[i] = ((double)(MSB << 8 | (255 & LSB)))
00222 / 32768.0;
00223 }
00224 } else {
00225 for (int i = 0; i < audioData.length; i++) {
00226
00227 int LSB = (int) audioBytes[2*i];
00228
00229 int MSB = (int) audioBytes[2*i+1];
00230 audioData[i] = ((double)(MSB << 8 | (255 & LSB)))
00231 / 32768.0;
00232 }
00233 }
00234 } else if (format.getSampleSizeInBits() == 8) {
00235 int nlengthInSamples = audioBytes.length;
00236 if (format.getEncoding().toString().startsWith("PCM_SIGN")) {
00237 for (int i = 0; i < audioBytes.length; i++) {
00238 audioData[i] = audioBytes[i] / 128.0;
00239 }
00240 } else {
00241 for (int i = 0; i < audioBytes.length; i++) {
00242 audioData[i] = (audioBytes[i] - 128) / 128.0;
00243 }
00244 }
00245 }
00246 }
00247
00248
00249 public double fr2Seconds(long frAddr)
00250 {
00251 return(fr2Samp(frAddr)/samplingRate);
00252 }
00253
00254
00255 public long seconds2fr(double sec)
00256 {
00257 return(samp2fr((long)(sec*samplingRate)));
00258 }
00259
00260
00264 public static RingMatrix getSTFT(double[] samples, int nfft)
00265 {
00266 return STFT.getSTFT(samples, nfft, nfft);
00267 }
00268
00272 public static RingMatrix getSTFT(double[] samples, int nfft, int nhop)
00273 {
00274 RingMatrix freq = new RingMatrix(nfft/2+1, samples.length/nhop);
00275
00276 FFT fft = new FFT(nfft);
00277 double[] window = fft.getWindow();
00278
00279 double[] wav = new double[nfft];
00280 double rms = 1;
00281 for(int currFrame = 0; currFrame < samples.length/nhop; currFrame++)
00282 {
00283
00284 int zeroPadLen = currFrame*nhop + wav.length - samples.length;
00285 if(zeroPadLen < 0)
00286 zeroPadLen = 0;
00287 int wavLen = wav.length - zeroPadLen;
00288
00289
00290
00291 for(int i = 0; i < wavLen; i++)
00292 wav[i] = samples[currFrame*nhop + i];
00293 for(int i = wavLen; i < wav.length; i++)
00294 wav[i] = 0;
00295
00296
00297
00298 double rmsCur = 0;
00299 for(int i=0; i<wav.length; i++)
00300 rmsCur += wav[i]*wav[i];
00301 rmsCur = Math.sqrt(rmsCur / wav.length);
00302
00303
00304 rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00305
00306
00307 for(int i=0; i<wav.length; i++)
00308 wav[i] = wav[i] * rmsTarget / rms;
00309
00310
00311 double[] re = new double[wav.length];
00312 double[] im = new double[wav.length];
00313 for(int i=0; i<wav.length; i++)
00314 {
00315 re[i] = window[i] * wav[i];
00316 im[i] = 0;
00317 }
00318
00319
00320 fft.fft(re, im);
00321
00322
00323 double[] mag = freq.checkOutColumn();
00324 for(int i=0; i<mag.length; i++)
00325 mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00326
00327 freq.checkInColumn(mag);
00328 }
00329
00330 return freq;
00331 }
00332
00333
00342 public int readFrames(long nframes)
00343 {
00344 byte[] b = new byte[bytesPerWavFrame * frameLen];
00345 Arrays.fill(b, (byte)0);
00346
00347 int noverlapBytes = (frameLen-nhop)*bytesPerWavFrame;
00348 int nhopBytes = nhop*bytesPerWavFrame;
00349
00350 int bytesRead = 22;
00351 int nFramesRead = 0;
00352 while(nFramesRead <= nframes)
00353 {
00354 if(nhop > 0)
00355 {
00356
00357 for(int x = 0; x < noverlapBytes; x++)
00358 b[x] = b[x+nhopBytes];
00359 }
00360
00361 try
00362 {
00363 input.read(b, noverlapBytes, nhopBytes);
00364 nFramesRead++;
00365 }
00366 catch(IOException ioe)
00367 {
00368 ioe.printStackTrace();
00369 return nFramesRead;
00370 }
00371
00372
00373 double[] wav = time.checkOutColumn();
00374 bytes2doubles(b, wav);
00375
00376
00377
00378 double rmsCur = 0;
00379 for(int i=0; i<wav.length; i++)
00380 rmsCur += wav[i]*wav[i];
00381 rmsCur = Math.sqrt(rmsCur / wav.length);
00382
00383
00384 rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00385
00386
00387 for(int i=0; i<wav.length; i++)
00388 wav[i] = wav[i] * rmsTarget / rms;
00389
00390 time.checkInColumn(wav);
00391
00392
00393 for(int i=0; i<wav.length; i++)
00394 re[i] = window[i] * wav[i];
00395
00396
00397 fft.fft(re, im);
00398
00399
00400 double[] mag = freq.checkOutColumn();
00401 for(int i=0; i<mag.length; i++)
00402 mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00403
00404
00405 Arrays.fill(im, 0);
00406
00407
00408 long frAddr = freq.checkInColumn(mag);
00409 notifyListeners(frAddr);
00410 }
00411
00412 return nFramesRead;
00413 }
00414
00418 public long getLastFrameAddress()
00419 {
00420 return freq.nextFrAddr-1;
00421 }
00422
00423 public void stop() throws IOException
00424 {
00425 input.close();
00426 }
00427 }