00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 package com.meapsoft;
00024
00025 import java.io.IOException;
00026 import java.util.ArrayList;
00027
00028 import javax.sound.sampled.AudioFormat;
00029 import javax.sound.sampled.AudioInputStream;
00030
00038 public class STFT {
00039 AudioInputStream input;
00040 AudioFormat format;
00041 int bytesPerWavFrame, frameLen;
00042 ArrayList listeners = new ArrayList();
00043 double[] re, im, window;
00044 static double log10 = Math.log(10);
00045 static double epsilon = 1e-9;
00046 RingMatrix freq, time;
00047 FFT fft;
00048
00049 static double rmsTarget = 0.08;
00050 static double rmsAlpha = 0.001;
00051 double rms = 1;
00052
00053 public float samplingRate = 44100;
00054
00055
00056 public STFT(AudioInputStream input, int frameLen, int history) {
00057 freq = new RingMatrix(frameLen/2+1, history);
00058 time = new RingMatrix(frameLen, history);
00059 this.frameLen = frameLen;
00060
00061 this.input = input;
00062 format = input.getFormat();
00063 bytesPerWavFrame = format.getFrameSize();
00064
00065 samplingRate = format.getSampleRate();
00066
00067 fft = new FFT(frameLen);
00068
00069 this.re = new double[frameLen];
00070 this.im = new double[frameLen];
00071 this.window = fft.getWindow();
00072 for(int i=0; i<im.length; i++)
00073 im[i] = 0;
00074 }
00075
00076 public void start() {
00077 byte[] b = new byte[bytesPerWavFrame * frameLen];
00078
00079 int bytesRead = 22;
00080 while(bytesRead > 0) {
00081 try {
00082 bytesRead = input.read(b);
00083 } catch(IOException ioe) {
00084 ioe.printStackTrace();
00085 return;
00086 }
00087
00088
00089
00090
00091 double[] wav = time.checkOutColumn();
00092 bytes2doubles(b, wav);
00093
00094
00095
00096 double rmsCur = 0;
00097 for(int i=0; i<wav.length; i++)
00098 rmsCur += wav[i]*wav[i];
00099 rmsCur = Math.sqrt(rmsCur / wav.length);
00100
00101
00102 rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00103
00104
00105 for(int i=0; i<wav.length; i++)
00106 wav[i] = wav[i] * rmsTarget / rms;
00107
00108 time.checkInColumn(wav);
00109
00110
00111 for(int i=0; i<wav.length; i++)
00112 re[i] = window[i] * wav[i];
00113
00114
00115 fft.fft(re, im);
00116
00117
00118 double[] mag = freq.checkOutColumn();
00119 for(int i=0; i<mag.length; i++)
00120
00121 mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00122
00123
00124 for(int i=0; i<im.length; i++)
00125 im[i] = 0;
00126
00127
00128 long frAddr = freq.checkInColumn(mag);
00129 notifyListeners(frAddr);
00130 }
00131 }
00132
00133
00134
00135 public double[] getSamples(long frStart, long frEnd) {
00136 long sampStart = fr2Samp(frStart);
00137 long sampEnd = fr2Samp(frEnd);
00138
00139
00140
00141
00142 double[] x = new double[(int)(sampEnd - sampStart)];
00143
00144 for(int fr=0; fr < frEnd-frStart; fr++) {
00145 double[] frame = time.getColumn(frStart+fr);
00146 if(frame == null) continue;
00147 for(int i=0; i<frameLen; i++)
00148 x[(int)(fr2Samp(fr+frStart)-fr2Samp(frStart) + i)] = frame[i];
00149 }
00150
00151 return x;
00152 }
00153
00154
00155 public long fr2Samp(long frAddr) {
00156
00157 return frameLen * frAddr;
00158 }
00159
00160
00161 public long samp2fr(long sampAddr) {
00162 return sampAddr/frameLen;
00163 }
00164
00165 public double[] getFrame(long frAddr) { return freq.getColumn(frAddr); }
00166 public int getColumns() { return freq.getColumns(); }
00167 public int getRows() { return freq.getRows(); }
00168
00169
00170 public void addFrameListener(FrameListener fl) {
00171 listeners.add(fl);
00172 }
00173 public void removeFrameListener(FrameListener fl) {
00174 listeners.remove(fl);
00175 }
00176 public void notifyListeners(long frAddr) {
00177 for(int i=0; i<listeners.size(); i++) {
00178 FrameListener list = (FrameListener) listeners.get(i);
00179 list.newFrame(this, frAddr);
00180 }
00181 }
00182
00183
00184
00185
00186
00187 public void bytes2doubles(byte[] audioBytes, double[] audioData) {
00188 if (format.getSampleSizeInBits() == 16) {
00189 if (format.isBigEndian()) {
00190 for (int i = 0; i < audioData.length; i++) {
00191
00192 int MSB = (int) audioBytes[2*i];
00193
00194 int LSB = (int) audioBytes[2*i+1];
00195 audioData[i] = ((double)(MSB << 8 | (255 & LSB)))
00196 / 32768.0;
00197 }
00198 } else {
00199 for (int i = 0; i < audioData.length; i++) {
00200
00201 int LSB = (int) audioBytes[2*i];
00202
00203 int MSB = (int) audioBytes[2*i+1];
00204 audioData[i] = ((double)(MSB << 8 | (255 & LSB)))
00205 / 32768.0;
00206 }
00207 }
00208 } else if (format.getSampleSizeInBits() == 8) {
00209 int nlengthInSamples = audioBytes.length;
00210 if (format.getEncoding().toString().startsWith("PCM_SIGN")) {
00211 for (int i = 0; i < audioBytes.length; i++) {
00212 audioData[i] = audioBytes[i] / 128.0;
00213 }
00214 } else {
00215 for (int i = 0; i < audioBytes.length; i++) {
00216 audioData[i] = (audioBytes[i] - 128) / 128.0;
00217 }
00218 }
00219 }
00220 }
00221
00222
00223 public double fr2Seconds(long frAddr)
00224 {
00225 return(fr2Samp(frAddr)/samplingRate);
00226 }
00227
00228
00229 public long seconds2fr(double sec)
00230 {
00231 return(samp2fr((long)(sec*samplingRate)));
00232 }
00233
00234
00238 public static RingMatrix getSTFT(double[] samples, int nfft)
00239 {
00240 return STFT.getSTFT(samples, nfft, nfft);
00241 }
00242
00246 public static RingMatrix getSTFT(double[] samples, int nfft, int nhop)
00247 {
00248 RingMatrix freq = new RingMatrix(nfft/2+1, samples.length/nhop);
00249
00250 FFT fft = new FFT(nfft);
00251 double[] window = fft.getWindow();
00252
00253 double[] wav = new double[nfft];
00254 double rms = 1;
00255 for(int currFrame = 0; currFrame < samples.length/nhop; currFrame++)
00256 {
00257
00258 int zeroPadLen = currFrame*nhop + wav.length - samples.length;
00259 if(zeroPadLen < 0)
00260 zeroPadLen = 0;
00261 int wavLen = wav.length - zeroPadLen;
00262
00263
00264
00265 for(int i = 0; i < wavLen; i++)
00266 wav[i] = samples[currFrame*nhop + i];
00267 for(int i = wavLen; i < wav.length; i++)
00268 wav[i] = 0;
00269
00270
00271
00272 double rmsCur = 0;
00273 for(int i=0; i<wav.length; i++)
00274 rmsCur += wav[i]*wav[i];
00275 rmsCur = Math.sqrt(rmsCur / wav.length);
00276
00277
00278 rms = rmsAlpha*rmsCur + (1-rmsAlpha)*rms;
00279
00280
00281 for(int i=0; i<wav.length; i++)
00282 wav[i] = wav[i] * rmsTarget / rms;
00283
00284
00285 double[] re = new double[wav.length];
00286 double[] im = new double[wav.length];
00287 for(int i=0; i<wav.length; i++)
00288 {
00289 re[i] = window[i] * wav[i];
00290 im[i] = 0;
00291 }
00292
00293
00294 fft.fft(re, im);
00295
00296
00297 double[] mag = freq.checkOutColumn();
00298 for(int i=0; i<mag.length; i++)
00299 mag[i] = 10*Math.log(re[i]*re[i] + im[i]*im[i] + epsilon) / log10;
00300
00301 freq.checkInColumn(mag);
00302 }
00303
00304 return freq;
00305 }
00306 }