00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 package com.meapsoft;
00024
00025 import java.io.BufferedReader;
00026 import java.io.FileReader;
00027 import java.io.IOException;
00028 import java.io.Writer;
00029 import java.util.Arrays;
00030 import java.util.Iterator;
00031 import java.util.Vector;
00032 import java.util.regex.Matcher;
00033 import java.util.regex.Pattern;
00034
00040 public class FeatFile extends MEAPFile implements Cloneable
00041 {
00042
00043
00044 public Vector featureDescriptions = new Vector();
00045
00046
00047 public Vector chunks;
00048
00049
00050 protected static final Pattern commentPattern = Pattern.compile(
00051 "#\\.*");
00052 protected static final Pattern linePattern = Pattern.compile(
00053 "\\s*([^#\\s]+)\\s*");
00054
00055 protected static final Pattern featDescPattern = Pattern.compile(
00056 "^#\\s*Features:\\s*");
00057
00058 public FeatFile(String fn)
00059 {
00060 filename = fn;
00061 chunks = new Vector(100, 0);
00062 }
00063
00064
00065
00066 protected FeatFile()
00067 {
00068 this("BUG");
00069 }
00070
00077 public double[][] getFeatures()
00078 {
00079 return getFeatures(null);
00080 }
00081
00088 public double[][] getFeatures(int[] featdim)
00089 {
00090
00091 int maxdim = 0;
00092 if(featdim != null)
00093 maxdim = featdim.length;
00094 else
00095 maxdim = ((FeatChunk)chunks.get(0)).numFeatures();
00096
00097 double[][] mat = new double[chunks.size()][maxdim];
00098
00099 for(int x = 0; x < chunks.size(); x++)
00100 {
00101 FeatChunk c = (FeatChunk)chunks.get(x);
00102
00103 double[] currFeat = c.getFeatures(featdim);
00104 for(int y = 0; y < currFeat.length; y++)
00105 mat[x][y] = currFeat[y];
00106 }
00107
00108 return mat;
00109 }
00110
00119 public int[] getFeatureLengths()
00120 {
00121 int[] lengths = new int[featureDescriptions.size()];
00122
00123 for(int i = 0; i < featureDescriptions.size(); i++)
00124 {
00125 int numDim = Integer.parseInt(
00126 ((String)featureDescriptions.get(i)).split("[()]" )[1]);
00127 lengths[i] = numDim;
00128 }
00129 return lengths;
00130 }
00131
00142 public void normalizeFeatures()
00143 {
00144
00145 int startDim = 0;
00146 for(int featType = 0; featType < featureDescriptions.size(); featType++)
00147 {
00148 int numDim = Integer.parseInt(
00149 ((String)featureDescriptions.get(featType)).split("[()]" )[1]);
00150
00151 int[] featDim = new int[numDim];
00152 for(int x = 0; x < numDim; x++)
00153 featDim[x] = startDim + x;
00154
00155
00156 double[][] feat = getFeatures(featDim);
00157 double minFeat = DSP.min(DSP.min(feat));
00158 double maxFeat = DSP.max(DSP.max(feat));
00159
00160 for(int x = 0; x < chunks.size(); x++)
00161 {
00162 FeatChunk c = (FeatChunk)chunks.get(x);
00163
00164 double[] currFeat = c.getFeatures(featDim);
00165
00166 for(int d = 0; d < featDim.length; d++)
00167 c.setFeature(featDim[d],
00168 (currFeat[d]-minFeat)/(maxFeat-minFeat));
00169 }
00170
00171 startDim += numDim;
00172 }
00173 }
00174
00179 public void applyFeatureWeights()
00180 {
00181
00182 int startDim = 0;
00183 for(int featType = 0; featType < featureDescriptions.size(); featType++)
00184 {
00185 int numDim = Integer.parseInt(
00186 ((String)featureDescriptions.get(featType)).split("[()]")[1]);
00187
00188 double weight = 1.0;
00189 try
00190 {
00191 weight = Double.parseDouble(
00192 ((String)featureDescriptions.get(featType)).split("[*]")[0]);
00193 }
00194 catch(NumberFormatException e)
00195 {
00196
00197 continue;
00198 }
00199
00200 int[] featDim = new int[numDim];
00201 for(int x = 0; x < numDim; x++)
00202 featDim[x] = startDim + x;
00203
00204 for(int x = 0; x < chunks.size(); x++)
00205 {
00206 FeatChunk c = (FeatChunk)chunks.get(x);
00207
00208 double[] currFeat = c.getFeatures(featDim);
00209
00210 for(int d = 0; d < featDim.length; d++)
00211 c.setFeature(featDim[d], weight*currFeat[d]);
00212 }
00213
00214 startDim += numDim;
00215 }
00216 }
00217
00221 public void readFile() throws IOException, ParserException
00222 {
00223 BufferedReader in = new BufferedReader(new FileReader(filename));
00224
00225 String audioFile;
00226 double chunkStartTime;
00227 double chunkLength;
00228
00229
00230
00231
00232
00233 boolean haveWrittenHeader = false;
00234 long lineno = 0;
00235 String line;
00236 while((line = in.readLine()) != null)
00237 {
00238 lineno++;
00239
00240
00241 String comment = "";
00242 Matcher c = commentPattern.matcher(line+"\n");
00243 if(c.find())
00244 {
00245
00246 comment = c.group() + line.substring(c.end()) + "\n";
00247 line = line.substring(0, c.start());
00248
00249
00250 Matcher fd = featDescPattern.matcher(comment);
00251 if(fd.find())
00252 {
00253 String featString = comment.substring(fd.end()).trim();
00254
00255 featureDescriptions.addAll(
00256 new Vector(Arrays.asList(featString.split("\\s+"))));
00257 }
00258 }
00259
00260 Matcher p = linePattern.matcher(line);
00261
00262 if(!p.find())
00263 continue;
00264 audioFile = p.group(1);
00265
00266 audioFile = audioFile.replaceAll("%20", " ");
00267
00268
00269 if(!p.find())
00270 throw new ParserException(filename, lineno,
00271 "Could not find chunk start time.");
00272 try { chunkStartTime = Double.parseDouble(p.group(1)); }
00273 catch(NumberFormatException nfe) {
00274 throw new ParserException(filename, lineno,
00275 "Could not parse chunk start time \""
00276 + p.group(1) + "\"."); }
00277 if(!p.find())
00278 throw new ParserException(filename, lineno,
00279 "Could not find chunk length.");
00280 try { chunkLength = Double.parseDouble(p.group(1)); }
00281 catch(NumberFormatException nfe) {
00282 throw new ParserException(filename, lineno,
00283 "Could not parse chunk length \""
00284 + p.group(1) + "\"."); }
00285
00286 FeatChunk ch = new FeatChunk(audioFile, chunkStartTime, chunkLength);
00287 ch.comment = comment;
00288
00289
00290 while(p.find())
00291 {
00292
00293 try
00294 {
00295 ch.addFeature(Double.parseDouble(p.group(1)));
00296 }
00297 catch (NumberFormatException e)
00298 {
00299 ch.addFeature(p.group(1));
00300 }
00301 }
00302
00303 chunks.add(ch);
00304 }
00305
00306 in.close();
00307 haveReadFile = true;
00308 }
00309
00313 public void clearChunks()
00314 {
00315 chunks.clear();
00316 }
00317
00321 public void clearFeatures()
00322 {
00323 Iterator i = chunks.iterator();
00324 while(i.hasNext())
00325 ((FeatChunk)i.next()).clearFeatures();
00326 }
00327
00331 protected void write(Writer w) throws IOException
00332 {
00333
00334 w.write("# filename onset_time chunk_length [features]\n# Features: ");
00335
00336 for (int i = 0; i < featureDescriptions.size(); i++)
00337 {
00338 w.write((String)featureDescriptions.elementAt(i));
00339 }
00340 w.write("\n");
00341
00342 Iterator i = chunks.iterator();
00343 while(i.hasNext())
00344 w.write(i.next().toString());
00345 }
00346
00350 public Object clone()
00351 {
00352 FeatFile o = new FeatFile(this.filename);
00353
00354
00355 o.haveReadFile = this.haveReadFile;
00356 o.haveWrittenFile = this.haveWrittenFile;
00357
00358
00359 o.featureDescriptions = new Vector(this.featureDescriptions);
00360
00361 o.chunks = new Vector(100);
00362 Iterator i = this.chunks.iterator();
00363 while(i.hasNext())
00364 o.chunks.add(((FeatChunk)i.next()).clone());
00365
00366 return o;
00367 }
00368
00374 public boolean isCompatibleWith(FeatFile f)
00375 {
00376
00377
00378
00379
00380
00381 if(this.featureDescriptions.size() != f.featureDescriptions.size())
00382 return false;
00383
00384 for(int x = 0; x < this.featureDescriptions.size(); x++)
00385 {
00386 String featOne = ((String)this.featureDescriptions.elementAt(x)).trim();
00387 String featTwo = ((String)f.featureDescriptions.elementAt(x)).trim();
00388
00389
00390
00391 if(!featOne.equalsIgnoreCase(featTwo))
00392 return false;
00393 }
00394
00395 return true;
00396 }
00397 }