Main Page   Packages   Class Hierarchy   Compound List   File List   Compound Members  

VQComposer.java

00001 /*
00002  *  Copyright 2006-2007 Columbia University.
00003  *
00004  *  This file is part of MEAPsoft.
00005  *
00006  *  MEAPsoft is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License version 2 as
00008  *  published by the Free Software Foundation.
00009  *
00010  *  MEAPsoft is distributed in the hope that it will be useful, but
00011  *  WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with MEAPsoft; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00018  *  02110-1301 USA
00019  *
00020  *  See the file "COPYING" for the text of the license.
00021  */
00022 
00023 package com.meapsoft.composers;
00024 
00025 import gnu.getopt.Getopt;
00026 
00027 import java.io.IOException;
00028 import java.util.Iterator;
00029 import java.util.Vector;
00030 import java.util.Arrays;
00031 
00032 import com.meapsoft.ChunkDist;
00033 import com.meapsoft.EuclideanDist;
00034 import com.meapsoft.DSP;
00035 import com.meapsoft.EDLChunk;
00036 import com.meapsoft.EDLFile;
00037 import com.meapsoft.FeatChunk;
00038 import com.meapsoft.FeatFile;
00039 import com.meapsoft.ParserException;
00040 
00049 public class VQComposer extends Composer
00050 {
00051 
00052         public static String description = "VQComposer trains a vector quantizer on the chunks in the input file.  It then uses it to quantize the chunks in another file.  For best results use the beat segmenter so each chunk has roughly the same length.";
00053         
00054         protected String outFileName = "vq.edl";
00055 
00056     // file to train the VQ
00057         protected FeatFile trainFile;
00058 
00059     // file to quantize.  If this is null, the composer will output an
00060     // EDL containing the cluster template chunks.
00061         protected FeatFile featsToQuantize = null;
00062 
00063     // number of codewords to use
00064     protected int cbSize = 32;
00065     protected Vector templateChunks = new Vector(cbSize);
00066 
00067     // how many beats should we put in each state?
00068     protected int beatsPerCodeword = 4;
00069 
00070     // only supports euclidean distance for now 
00071     protected ChunkDist dist = new EuclideanDist();
00072 
00073     protected int[] featdim = null;
00074     protected boolean debug = false;
00075 
00076         public VQComposer(FeatFile trainFN, EDLFile outFN)
00077         {
00078                 if (trainFN == null || outFN == null)
00079                         return;
00080                         
00081                 trainFile = trainFN;
00082                 outFile = outFN;
00083 
00084                 if(outFile == null)
00085                         outFile = new EDLFile("");
00086         }
00087 
00088         public void printUsageAndExit() 
00089         {
00090                 System.out.println("Usage: VQComposer [-options] features.feat \n\n" + 
00091                            "  where options include:\n" + 
00092                            "    -o output_file   the file to write the output to (defaults to "+outFileName+")\n" +
00093                "    -g               debug mode\n" +
00094                "    -f file.feat     feature file to quantize (uses training features file by default)\n" + 
00095                "    -b nbeats        number of beats each codeword should contain (defaults to "+beatsPerCodeword+")\n" + 
00096                "    -q codebook_size number of templates to use in the VQ codebook (defaults to 8)."); 
00097         printCommandLineOptions('i');
00098         printCommandLineOptions('d');
00099         printCommandLineOptions('c');
00100                 System.out.println();
00101                 System.exit(0);
00102         }
00103 
00104         public VQComposer(String[] args) 
00105         {
00106                 if(args.length == 0)
00107                         printUsageAndExit();
00108 
00109                 Vector features = new Vector();
00110 
00111                 // Parse arguments
00112                 String argString = "o:c:q:i:gd:f:b:";
00113         featdim = parseFeatDim(args, argString);
00114         dist = parseChunkDist(args, argString, featdim);
00115         parseCommands(args, argString);
00116 
00117                 Getopt opt = new Getopt("VQComposer", args, argString);
00118                 opt.setOpterr(false);
00119         
00120                 int c = -1;
00121                 while ((c =opt.getopt()) != -1) 
00122                 {
00123                         switch(c) 
00124                         {
00125                         case 'o':
00126                                 outFileName = opt.getOptarg();
00127                                 break;
00128                         case 'g':
00129                                 debug = true;
00130                                 break;
00131                         case 'q':
00132                                 cbSize = Integer.parseInt(opt.getOptarg());
00133                 break;
00134                         case 'b':
00135                                 beatsPerCodeword = Integer.parseInt(opt.getOptarg());
00136                                 break;
00137                         case 'f':
00138                                 featsToQuantize = new FeatFile(opt.getOptarg());
00139                                 break;
00140             case 'c':  // already handled above
00141                 break;
00142             case 'd':  // already handled above
00143                 break;
00144             case 'i':  // already handled above
00145                 break;
00146                         case '?':
00147                                 printUsageAndExit();
00148                                 break;
00149                         default:
00150                                 System.out.print("getopt() returned " + c + "\n");
00151                         }
00152                 }
00153         
00154                 // parse arguments
00155                 int ind = opt.getOptind();
00156                 if(ind > args.length)
00157                         printUsageAndExit();
00158         
00159                 trainFile = new FeatFile(args[args.length-1]);
00160         if(featsToQuantize == null)
00161             featsToQuantize = trainFile;
00162                 outFile = new EDLFile(outFileName);
00163 
00164                 System.out.println("Composing " + outFileName + 
00165                                                    " from " +  args[args.length-1] + ".");
00166         }
00167 
00168         public void setup() throws IOException, ParserException
00169         {
00170                 super.setup();
00171 
00172                 if(!trainFile.haveReadFile)
00173                         trainFile.readFile();
00174 
00175                 if(trainFile.chunks.size() == 0)
00176                         throw new ParserException(trainFile.filename, "No chunks found");
00177 
00178         trainFile = (FeatFile)trainFile.clone();
00179         trainFile.normalizeFeatures();
00180         trainFile.applyFeatureWeights();
00181 
00182         // To change the number of beats per state all we have to do
00183         // is modify the chunks in trainFile by joining every
00184         // beatsPerCodeword chunk into one a superchunk.
00185         Vector newChunks = new Vector();
00186         for(int x = 0; x < trainFile.chunks.size()-beatsPerCodeword+1; 
00187             x += beatsPerCodeword)
00188         {
00189             FeatChunk newChunk = 
00190                 (FeatChunk)((FeatChunk)trainFile.chunks.get(x)).clone();
00191 
00192             double length = 0;
00193             for(int y = 1; y < beatsPerCodeword; y++)
00194             {
00195                 FeatChunk f = (FeatChunk)trainFile.chunks.get(x+y);
00196                 
00197                 newChunk.addFeature(f.getFeatures());
00198                 newChunk.length += f.length;
00199             }
00200             
00201             newChunks.add(newChunk);
00202         }
00203 
00204         trainFile.chunks = newChunks;
00205 
00206         progress.setMaximum(trainFile.chunks.size());
00207 
00208                 if(featsToQuantize != null)
00209         {
00210             if(!featsToQuantize.haveReadFile)
00211                 featsToQuantize.readFile();
00212 
00213 
00214             // What if features don't match
00215             if(!featsToQuantize.isCompatibleWith(trainFile))
00216                 throw new ParserException(trainFile.filename, 
00217                                       "Features do not match those in " 
00218                                           + featsToQuantize.filename);
00219             
00220             featsToQuantize = (FeatFile)featsToQuantize.clone();
00221             featsToQuantize.normalizeFeatures();
00222             featsToQuantize.applyFeatureWeights();
00223 
00224             // To change the number of beats per state all we have to do
00225             // is modify the chunks in featFile by joining every
00226             // beatsPerCodeword chunk into one a superchunk.
00227             newChunks = new Vector();
00228             for(int x = 0; 
00229                 x < featsToQuantize.chunks.size()-beatsPerCodeword+1; 
00230                 x += beatsPerCodeword)
00231             {
00232                 FeatChunk newChunk = 
00233                     (FeatChunk)((FeatChunk)featsToQuantize.chunks.get(x)).clone();
00234                 
00235                 double length = 0;
00236                 for(int y = 1; y < beatsPerCodeword; y++)
00237                 {
00238                     FeatChunk f = (FeatChunk)featsToQuantize.chunks.get(x+y);
00239                     
00240                     newChunk.addFeature(f.getFeatures());
00241                     newChunk.length += f.length;
00242                 }
00243                 
00244                 newChunks.add(newChunk);
00245             }
00246             
00247             featsToQuantize.chunks = newChunks;
00248 
00249             progress.setMaximum(featsToQuantize.chunks.size());
00250         }
00251         }
00252 
00253 
00257     protected void learnCodebook(FeatFile trainFile)
00258     {
00259         double[][] features = trainFile.getFeatures();
00260         int ndat = features.length;
00261         int ndim = features[0].length;
00262 
00263         progress.setMaximum(progress.getMaximum() + (int)(Math.log(cbSize)/Math.log(2)));
00264 
00265         // initial codebook:
00266         templateChunks = new Vector(cbSize);
00267         
00268         // create a placeholder template chunk
00269         FeatChunk template0 = new FeatChunk("templateChunk0", 0, 0);
00270         template0.setFeatures(DSP.mean(DSP.transpose(features)));
00271         templateChunks.add(template0);
00272 
00273         // distortions of between each codeword and each chunk 
00274         double[][] distortion = new double[cbSize][ndat];
00275         for(int x = 0; x < distortion.length; x++)
00276             Arrays.fill(distortion[x], Double.MAX_VALUE);
00277         // indicies into cbMeans for each chunk
00278         int[] idx = new int[ndat];
00279 
00280         // how much should the means be nudged when splitting
00281         double delta = 1e-3;
00282 
00283         // start from one codeword and go from there
00284         for(int nValidCW = 2; nValidCW <= cbSize; 
00285             nValidCW = Math.min(2*nValidCW, cbSize))
00286         {
00287             if(debug)
00288                 System.out.println("Splitting into "+nValidCW+" codewords.");
00289 
00290             // split codewords
00291             for(int c = 0; c < nValidCW; c += 2)
00292             {
00293                 FeatChunk ch = (FeatChunk)templateChunks.get(c);
00294                 ch.setFeatures(DSP.minus(ch.getFeatures(), delta));
00295                 templateChunks.set(c, ch);
00296 
00297                 FeatChunk newch = new FeatChunk("templateChunk"+c, 0, 0);
00298                 newch.setFeatures(DSP.plus(ch.getFeatures(), delta));
00299                 templateChunks.add(c+1, newch);
00300             }
00301 
00302             double currTotalDist = 0;
00303             double prevTotalDist = Double.MAX_VALUE;
00304             do
00305             {
00306                 prevTotalDist = currTotalDist;
00307                 currTotalDist = 0;
00308                 for(int c = 0; c < nValidCW; c++)
00309                 {
00310                     FeatChunk cw = (FeatChunk)templateChunks.get(c);
00311                     for(int n = 0; n < ndat; n++) 
00312                     {
00313                         FeatChunk ch = (FeatChunk)trainFile.chunks.get(n);
00314                         distortion[c][n] = dist.distance(cw, ch); 
00315                         currTotalDist += distortion[c][n];
00316                     }
00317                 }
00318 
00319                 // quantize
00320                 for(int n = 0; n < ndat; n++)
00321                     idx[n] = DSP.argmin(DSP.getColumn(distortion, n));
00322 
00323                 // update means
00324                 double[] newCW = new double[ndim];
00325                 for(int c = 0; c < nValidCW; c++)
00326                 {
00327                     FeatChunk ch = (FeatChunk)templateChunks.get(c);
00328                     Arrays.fill(newCW, 0);
00329                     int nmatch = 0;
00330                     for(int n = 0; n < ndat; n++)
00331                     {
00332                         if(idx[n] == c)
00333                         {
00334                             nmatch++;
00335                            
00336                             for(int i = 0; i < ndim; i++)
00337                                 newCW[i] += features[n][i];
00338                         }
00339                     }
00340 
00341                     if(nmatch != 0)
00342                         ch.setFeatures(DSP.rdivide(newCW, nmatch));
00343                 }
00344                 if(debug)
00345                     System.out.println("  distortion = "
00346                                        + Math.abs(currTotalDist-prevTotalDist));
00347             } while(Math.abs(currTotalDist - prevTotalDist) > 0.0);
00348 
00349             progress.setValue(progress.getValue()+1);
00350 
00351             // make sure we exit the loop once we're done splitting
00352             if(nValidCW == cbSize)
00353                 break;
00354         }
00355 
00356         // use the chunk closest to cbMeans as the template for each
00357         // codeword
00358         templateChunks = new Vector(cbSize);
00359         for(int c = 0; c < cbSize; c++)
00360         {
00361             int n = DSP.argmin(distortion[c]);
00362             templateChunks.add(c, trainFile.chunks.get(n));
00363         }
00364     }
00365 
00366     protected int quantizeChunk(FeatChunk f)
00367     {
00368         double minDist = Double.MAX_VALUE;
00369         int match = -1;
00370         for(int c = 0; c < templateChunks.size(); c++)
00371         {
00372             FeatChunk chunk = (FeatChunk)templateChunks.get(c);
00373 
00374             double d = dist.distance(chunk, f);
00375             
00376             if(d < minDist)
00377             {
00378                 minDist = d;
00379                 match = c;
00380             }
00381         }
00382 
00383         return match;
00384     }
00385 
00386         public EDLFile compose()
00387         {
00388         // learn vq codebook from trainFile
00389         learnCodebook(trainFile);
00390 
00391         if(featsToQuantize == null)
00392         {
00393             double currTime = 0.0;
00394             Iterator i = templateChunks.iterator();
00395             while(i.hasNext())
00396             {
00397                 FeatChunk currChunk = (FeatChunk)i.next();
00398 
00399                 EDLChunk nc = new EDLChunk(
00400                     (FeatChunk)templateChunks.get(quantizeChunk(currChunk)),
00401                     currTime);
00402                 outFile.chunks.add(nc);
00403 
00404                 currTime += nc.length;
00405                 progress.setValue(progress.getValue()+1);
00406             }
00407 
00408         }
00409         else  // quantize featsToQuantize
00410         {
00411             Iterator i = featsToQuantize.chunks.iterator();
00412             while(i.hasNext())
00413             {
00414                 FeatChunk currChunk = (FeatChunk)i.next();
00415                 
00416                 EDLChunk nc = new EDLChunk(
00417                     (FeatChunk)templateChunks.get(quantizeChunk(currChunk)),
00418                     currChunk.startTime);
00419                 
00420                 outFile.chunks.add(nc);
00421                 progress.setValue(progress.getValue()+1);
00422             }
00423         }
00424 
00425         return outFile;
00426     }
00427 
00428     public void setCodebookSize(int cb)
00429     {
00430         cbSize = cb;
00431 
00432         if(cbSize == 0)
00433             cbSize = 1;
00434     }
00435 
00436     public void setBeatsPerCodeword(int nbeats)
00437     {
00438         if(nbeats > 0)
00439             beatsPerCodeword = nbeats;
00440     }
00441 
00442     public void setFeatsToQuantize(FeatFile featFile)
00443     {
00444         featsToQuantize = featFile;
00445     }
00446         
00447 
00448         public static void main(String[] args) 
00449         {
00450                 VQComposer m = new VQComposer(args);
00451                 long startTime = System.currentTimeMillis();
00452                 m.run();
00453                 System.out.println("Done. Took " +
00454                                                    ((System.currentTimeMillis() - startTime)/1000.0)
00455                                                    + "s");
00456                 System.exit(0);
00457         }
00458 }

Generated on Tue Feb 6 19:02:27 2007 for MEAPsoft by doxygen1.2.18