asr_lib: lab2_train.H Source File

00001 
00002 //  $Id: lab2_train.H,v 1.2 2009/10/02 00:31:58 stanchen Exp $
00003 
00004 
00005 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * **
00006 *   @file lab2_train.H
00007 *   @brief Main loop for Lab 2 Viterbi trainer.
00008 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00009 
00010 #ifndef _LAB2_TRAIN_H
00011 #define _LAB2_TRAIN_H
00012 
00013 
00014 #include "util.H"
00015 #include "front_end.H"
00016 
00017 
00018 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * **
00019 *   Encapsulation of main loop for Viterbi training.
00020 *
00021 *   Holds global variables and has routines for initializing variables
00022 *   and updating them for each utterance.
00023 *   We do this so that we can call this code from Java as well.
00024 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00025 class Lab2TrainMain
00026     {
00027     public:
00028         /** Initialize all data given parameters. **/
00029         Lab2TrainMain(const map<string, string>& params);
00030 
00031         /** Called at the beginning of processing each iteration.
00032         *   Returns whether at EOF.
00033         **/
00034         bool init_iter();
00035 
00036         /** Called at the end of processing each iteration. **/
00037         void finish_iter();
00038 
00039         /** Called at the beginning of processing each utterance.
00040         *   Returns whether at EOF.
00041         **/
00042         bool init_utt();
00043 
00044         /** Called at the end of processing each utterance. **/
00045         void finish_utt(double logProb);
00046 
00047         /** Called at end of program. **/
00048         void finish();
00049 
00050 
00051         /** Returns acoustic model. **/
00052         GmmSet& get_gmm_set() { return m_gmmSet; }
00053 
00054         /** Returns list of GMM counts. **/
00055         const vector<GmmCount>& get_gmm_counts() const
00056             { return m_gmmCountList; }
00057 
00058         /** Returns feature vectors. **/
00059         const matrix<double>& get_feats() const { return m_feats; }
00060 
00061     private:
00062         /** Program parameters. **/
00063         map<string, string> m_params;
00064 
00065         /** Front end. **/
00066         FrontEnd m_frontEnd;
00067 
00068         /** Acoustic model. **/
00069         GmmSet m_gmmSet;
00070 
00071         /** Place to output model. **/
00072         string m_outGmmFile;
00073 
00074         /** Input audio for current utterance. **/
00075         matrix<double> m_inAudio;
00076 
00077         /** Feature vectors for current utterance. **/
00078         matrix<double> m_feats;
00079 
00080         /** Alignment for current utterance. **/
00081         vector<int> m_gmmList;
00082 
00083         /** GMM counts for current utterance. **/
00084         vector<GmmCount> m_gmmCountList;
00085 
00086         /** Total number of iterations. **/
00087         int m_iterCnt;
00088 
00089         /** Current iteration. **/
00090         int m_iterIdx;
00091 
00092         /** Total frames processed so far. **/
00093         int m_totFrmCnt;
00094 
00095         /** Total log prob of utterances processed so far. **/
00096         double m_totLogProb;
00097 
00098         /** Stream for reading audio data. **/
00099         ifstream m_audioStrm;
00100 
00101         /** Stream for reading alignments. **/
00102         ifstream m_alignStrm;
00103 
00104         /** ID string for current utterance. **/
00105         string m_idStr;
00106     };
00107 
00108 
00109 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
00110 *
00111 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00112 
00113 #endif
00114 
00115