00001 00002 // $Id: lab2_fb.H,v 1.3 2009/10/03 05:26:15 stanchen Exp $ 00003 00004 00005 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * ** 00006 * @file lab2_fb.H 00007 * @brief Main loop for Lab 2 Forward-Backward trainer. 00008 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00009 00010 #ifndef _LAB2_FB_H 00011 #define _LAB2_FB_H 00012 00013 00014 #include "util.H" 00015 #include "front_end.H" 00016 00017 00018 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * ** 00019 * Cell in dynamic programming chart for Forward-Backward algorithm. 00020 * 00021 * Holds forward and backward log probs. 00022 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00023 class FbCell 00024 { 00025 public: 00026 /** Ctor; inits F+B log probs to g_zeroLogProb. **/ 00027 FbCell() : m_forwLogProb(g_zeroLogProb), m_backLogProb(g_zeroLogProb) 00028 { } 00029 00030 #ifndef SWIG 00031 #ifndef DOXYGEN 00032 // Hack; for bug in matrix<> class in boost 1.32. 00033 explicit FbCell(int) : m_forwLogProb(g_zeroLogProb), 00034 m_backLogProb(g_zeroLogProb) { } 00035 #endif 00036 #endif 00037 00038 /** Sets forward log prob of cell. **/ 00039 void set_forw_log_prob(double logProb) { m_forwLogProb = logProb; } 00040 00041 /** Sets backward log prob of cell. **/ 00042 void set_back_log_prob(double logProb) { m_backLogProb = logProb; } 00043 00044 /** Returns forward log prob of cell. **/ 00045 double get_forw_log_prob() const { return m_forwLogProb; } 00046 00047 /** Returns backward log prob of cell. **/ 00048 double get_back_log_prob() const { return m_backLogProb; } 00049 00050 private: 00051 /** Forward logprob. **/ 00052 double m_forwLogProb; 00053 00054 /** Backward logprob. **/ 00055 double m_backLogProb; 00056 }; 00057 00058 00059 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 00060 * 00061 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00062 00063 /** Initializes chart for backward pass. **/ 00064 double init_backward_pass(const Graph& graph, matrix<FbCell>& chart); 00065 00066 00067 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * ** 00068 * Encapsulation of main loop for Forward-Backward decoding. 00069 * 00070 * Holds global variables and has routines for initializing variables 00071 * and updating them for each utterance. 00072 * We do this so that we can call this code from Java as well. 00073 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00074 class Lab2FbMain 00075 { 00076 public: 00077 /** Initialize all data given parameters. **/ 00078 Lab2FbMain(const map<string, string>& params); 00079 00080 /** Called at the beginning of processing each iteration. 00081 * Returns whether at EOF. 00082 **/ 00083 bool init_iter(); 00084 00085 /** Called at the end of processing each iteration. **/ 00086 void finish_iter(); 00087 00088 /** Called at the beginning of processing each utterance. 00089 * Returns whether at EOF. 00090 **/ 00091 bool init_utt(); 00092 00093 /** Called at the end of processing each utterance. **/ 00094 void finish_utt(double logProb); 00095 00096 /** Called at end of program. **/ 00097 void finish(); 00098 00099 00100 /** Returns decoding graph/HMM. **/ 00101 const Graph& get_graph() const { return m_graph; } 00102 00103 /** Returns matrix of GMM log probs for each frame. **/ 00104 const matrix<double>& get_gmm_probs() const { return m_gmmProbs; } 00105 00106 /** Returns DP chart. **/ 00107 matrix<FbCell>& get_chart() { return m_chart; } 00108 00109 /** Returns acoustic model. **/ 00110 GmmSet& get_gmm_set() { return m_gmmSet; } 00111 00112 /** Returns object for storing GMM counts. **/ 00113 vector<GmmCount>& get_gmm_counts() { return m_gmmCountList; } 00114 00115 /** Returns object for storing transition counts. **/ 00116 map<int, double>& get_trans_counts() { return m_transCounts; } 00117 00118 /** Returns feature vectors. **/ 00119 const matrix<double>& get_feats() const { return m_feats; } 00120 00121 00122 private: 00123 /** Program parameters. **/ 00124 map<string, string> m_params; 00125 00126 /** Front end. **/ 00127 FrontEnd m_frontEnd; 00128 00129 /** Acoustic model. **/ 00130 GmmSet m_gmmSet; 00131 00132 /** Place to output model. **/ 00133 string m_outGmmFile; 00134 00135 /** Stream for reading audio data. **/ 00136 ifstream m_audioStrm; 00137 00138 /** Graph/HMM. **/ 00139 Graph m_graph; 00140 00141 /** Stream for reading graphs, if doing alignment. **/ 00142 ifstream m_graphStrm; 00143 00144 /** ID string for current utterance. **/ 00145 string m_idStr; 00146 00147 /** Input audio for current utterance. **/ 00148 matrix<double> m_inAudio; 00149 00150 /** Feature vectors for current utterance. **/ 00151 matrix<double> m_feats; 00152 00153 /** GMM probs for current utterance. **/ 00154 matrix<double> m_gmmProbs; 00155 00156 /** For storing GMM counts. **/ 00157 vector<GmmCount> m_gmmCountList; 00158 00159 /** Temporary buffer, For thresholding GMM counts. **/ 00160 vector<GmmCount> m_gmmCountListThresh; 00161 00162 /** File to output transition counts to, if desired. **/ 00163 string m_transCountsFile; 00164 00165 /** Transition counts, for training arc probs. **/ 00166 map<int, double> m_transCounts; 00167 00168 /** DP chart for current utterance. **/ 00169 matrix<FbCell> m_chart; 00170 00171 /** Total number of iterations. **/ 00172 int m_iterCnt; 00173 00174 /** Current iteration. **/ 00175 int m_iterIdx; 00176 00177 /** Total frames processed so far. **/ 00178 int m_totFrmCnt; 00179 00180 /** Total log prob of utterances processed so far. **/ 00181 double m_totLogProb; 00182 }; 00183 00184 00185 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 00186 * 00187 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00188 00189 #endif 00190 00191