Main Page   Packages   Class Hierarchy   Compound List   File List   Compound Members  

FeatFile.java

00001 /*
00002  *  Copyright 2006-2007 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft;
00024 
00025 import java.io.BufferedReader;
00026 import java.io.FileReader;
00027 import java.io.IOException;
00028 import java.io.Writer;
00029 import java.util.Arrays;
00030 import java.util.Iterator;
00031 import java.util.Vector;
00032 import java.util.regex.Matcher;
00033 import java.util.regex.Pattern;
00034 
00040 public class FeatFile extends MEAPFile implements Cloneable
00041 {
00042     // Keep track of the types (and dimensions) of features contained
00043     // in the chunks in this file
00044     public Vector featureDescriptions = new Vector();
00045     
00046     // The FeatChunks contained in this file
00047     public Vector chunks;  
00048     
00049     // regular expressions for parsing FeatFiles
00050     protected static final Pattern commentPattern = Pattern.compile(
00051         "#\\.*");
00052     protected static final Pattern linePattern = Pattern.compile(
00053         "\\s*([^#\\s]+)\\s*");
00054     // TODO: add feature weights and dimensions parsing into this pattern
00055     protected static final Pattern featDescPattern = Pattern.compile(
00056         "^#\\s*Features:\\s*");
00057 
00058     public FeatFile(String fn)
00059     {
00060         filename = fn;
00061         chunks = new Vector(100, 0);
00062     }
00063 
00064     // Java bitches if this is not present for some reason.  Don't use
00065     // it - its bad news.
00066     protected FeatFile() 
00067     { 
00068         this("BUG");
00069     }
00070 
00077     public double[][] getFeatures()
00078     {
00079         return getFeatures(null);
00080     }
00081 
00088     public double[][] getFeatures(int[] featdim)
00089     {
00090         // how many feature dimensions are we using?
00091         int maxdim = 0;
00092         if(featdim != null)
00093             maxdim = featdim.length;
00094         else
00095             maxdim = ((FeatChunk)chunks.get(0)).numFeatures();
00096 
00097         double[][] mat = new double[chunks.size()][maxdim];
00098 
00099         for(int x = 0; x < chunks.size(); x++)
00100         {
00101             FeatChunk c = (FeatChunk)chunks.get(x);
00102 
00103             double[] currFeat = c.getFeatures(featdim);
00104             for(int y = 0; y < currFeat.length; y++)
00105                 mat[x][y] = currFeat[y];
00106         }
00107 
00108         return mat;
00109     }
00110 
00119         public int[] getFeatureLengths()
00120         {
00121                 int[] lengths = new int[featureDescriptions.size()];
00122                 
00123                 for(int i = 0; i < featureDescriptions.size(); i++)
00124                 {
00125                         int numDim = Integer.parseInt(
00126                                 ((String)featureDescriptions.get(i)).split("[()]" )[1]);
00127                         lengths[i] = numDim;
00128                 }
00129                 return lengths;
00130         }
00131         
00142     public void normalizeFeatures()
00143     {
00144         // operate on each feature type separately
00145         int startDim = 0;
00146         for(int featType = 0; featType < featureDescriptions.size(); featType++)
00147         {
00148             int numDim = Integer.parseInt(
00149                 ((String)featureDescriptions.get(featType)).split("[()]" )[1]);
00150 
00151             int[] featDim = new int[numDim];
00152             for(int x = 0; x < numDim; x++)
00153                 featDim[x] = startDim + x;
00154 
00155             // find the max and min values in these dimensions
00156             double[][] feat = getFeatures(featDim);
00157             double minFeat = DSP.min(DSP.min(feat));
00158             double maxFeat = DSP.max(DSP.max(feat));
00159 
00160             for(int x = 0; x < chunks.size(); x++)
00161             {
00162                 FeatChunk c = (FeatChunk)chunks.get(x);
00163 
00164                 double[] currFeat = c.getFeatures(featDim);
00165 
00166                 for(int d = 0; d < featDim.length; d++)
00167                     c.setFeature(featDim[d], 
00168                                  (currFeat[d]-minFeat)/(maxFeat-minFeat));
00169             }
00170 
00171             startDim += numDim;
00172         }
00173     }
00174 
00179     public void applyFeatureWeights()
00180     {
00181         // operate on each feature type separately
00182         int startDim = 0;
00183         for(int featType = 0; featType < featureDescriptions.size(); featType++)
00184         {
00185             int numDim = Integer.parseInt(
00186                 ((String)featureDescriptions.get(featType)).split("[()]")[1]);
00187 
00188             double weight = 1.0;
00189             try
00190             {
00191                 weight = Double.parseDouble(
00192                     ((String)featureDescriptions.get(featType)).split("[*]")[0]);
00193             }
00194             catch(NumberFormatException e)
00195             {
00196                 // the featureDescription does not contain a weight
00197                 continue;
00198             }
00199 
00200             int[] featDim = new int[numDim];
00201             for(int x = 0; x < numDim; x++)
00202                 featDim[x] = startDim + x;
00203 
00204             for(int x = 0; x < chunks.size(); x++)
00205             {
00206                 FeatChunk c = (FeatChunk)chunks.get(x);
00207 
00208                 double[] currFeat = c.getFeatures(featDim);
00209 
00210                 for(int d = 0; d < featDim.length; d++)
00211                     c.setFeature(featDim[d], weight*currFeat[d]);
00212             }
00213 
00214             startDim += numDim;
00215         }
00216     }
00217 
00221     public void readFile() throws IOException, ParserException
00222     {
00223         BufferedReader in  = new BufferedReader(new FileReader(filename));
00224 
00225         String audioFile;
00226         double chunkStartTime;
00227         double chunkLength;
00228 
00229         // each line (excluding comments) should look like: 
00230         // audioFile chunkStartTime chunkLength feature1 feature2 ...
00231 
00232         // Parse each line of the input file            
00233         boolean haveWrittenHeader = false;
00234         long lineno = 0;
00235         String line;
00236         while((line = in.readLine()) != null) 
00237         { 
00238             lineno++;
00239             
00240             // extract any comments from the current line
00241             String comment = "";
00242             Matcher c = commentPattern.matcher(line+"\n");
00243             if(c.find())
00244             {
00245                 // comments go all the way to the end of the line
00246                 comment = c.group() + line.substring(c.end()) + "\n";
00247                 line = line.substring(0, c.start());
00248 
00249                 // extract featureDescription from comment
00250                 Matcher fd = featDescPattern.matcher(comment);
00251                 if(fd.find())
00252                 {
00253                         String featString = comment.substring(fd.end()).trim();
00254 
00255                     featureDescriptions.addAll(
00256                         new Vector(Arrays.asList(featString.split("\\s+"))));
00257                 }
00258             }
00259 
00260             Matcher p = linePattern.matcher(line);
00261             // is there anything else?
00262             if(!p.find())
00263                 continue;
00264             audioFile = p.group(1);
00265             // decode spaces in the file name
00266             audioFile = audioFile.replaceAll("%20", " ");
00267             //System.out.println(audioFile);
00268 
00269             if(!p.find())
00270                 throw new ParserException(filename, lineno, 
00271                                           "Could not find chunk start time.");
00272             try { chunkStartTime = Double.parseDouble(p.group(1)); }
00273             catch(NumberFormatException nfe) { 
00274                 throw new ParserException(filename, lineno, 
00275                                           "Could not parse chunk start time \"" 
00276                                           + p.group(1)  + "\".");  }
00277             if(!p.find())
00278                 throw new ParserException(filename, lineno, 
00279                                           "Could not find chunk length.");
00280             try { chunkLength = Double.parseDouble(p.group(1)); }
00281             catch(NumberFormatException nfe) { 
00282                 throw new ParserException(filename, lineno, 
00283                                           "Could not parse chunk length \"" 
00284                                           + p.group(1)  + "\".");  }
00285             
00286             FeatChunk ch = new FeatChunk(audioFile, chunkStartTime, chunkLength);
00287             ch.comment = comment;
00288 
00289             // save the remaining features on the line
00290             while(p.find())
00291             {
00292                 // what kind of feature is this?  If its not a double then its a string;
00293                 try 
00294                 {
00295                     ch.addFeature(Double.parseDouble(p.group(1)));
00296                 }
00297                 catch (NumberFormatException e)
00298                 {
00299                     ch.addFeature(p.group(1));
00300                 }
00301             }
00302 
00303             chunks.add(ch);
00304         }
00305 
00306         in.close();
00307         haveReadFile = true;
00308     }
00309     
00313     public void clearChunks()
00314     {
00315         chunks.clear();
00316     }
00317 
00321     public void clearFeatures()
00322     {
00323         Iterator i = chunks.iterator();
00324         while(i.hasNext())
00325             ((FeatChunk)i.next()).clearFeatures();
00326     }
00327     
00331     protected void write(Writer w) throws IOException
00332     {
00333         // write the header
00334         w.write("# filename onset_time chunk_length [features]\n# Features: ");
00335         
00336         for (int i = 0; i < featureDescriptions.size(); i++)
00337         {
00338                 w.write((String)featureDescriptions.elementAt(i));
00339         }
00340         w.write("\n");
00341         
00342         Iterator i = chunks.iterator();
00343         while(i.hasNext())
00344             w.write(i.next().toString());
00345     }
00346 
00350     public Object clone()
00351     {
00352         FeatFile o = new FeatFile(this.filename);
00353        
00354         // superclass (MEAPFile) fields
00355         o.haveReadFile = this.haveReadFile;
00356         o.haveWrittenFile = this.haveWrittenFile;
00357 
00358         // local fields
00359         o.featureDescriptions = new Vector(this.featureDescriptions);
00360 
00361         o.chunks = new Vector(100);
00362         Iterator i = this.chunks.iterator();
00363         while(i.hasNext())
00364             o.chunks.add(((FeatChunk)i.next()).clone());
00365 
00366         return o;
00367     }
00368 
00374     public boolean isCompatibleWith(FeatFile f)
00375     {
00376         //TODO:{Should really check if feature names match excluding
00377         //whitespace and other meaningless gunk.}
00378 
00379         //System.out.println(this.featureDescriptions.size()+" "+ f.featureDescriptions.size());
00380 
00381         if(this.featureDescriptions.size() != f.featureDescriptions.size())
00382             return false;
00383 
00384         for(int x = 0; x < this.featureDescriptions.size(); x++)
00385         {
00386             String featOne = ((String)this.featureDescriptions.elementAt(x)).trim();
00387             String featTwo = ((String)f.featureDescriptions.elementAt(x)).trim();
00388 
00389             //System.out.println(x+": "+featOne+" == "+ featTwo + " : " + featOne.equalsIgnoreCase(featTwo));
00390 
00391             if(!featOne.equalsIgnoreCase(featTwo))
00392                 return false;
00393         }
00394 
00395         return true;
00396     }
00397 }

Generated on Tue Feb 6 19:02:26 2007 for MEAPsoft by doxygen1.2.18