00001 00002 // $Id: lab2_vit.H,v 1.4 2009/10/03 05:26:15 stanchen Exp $ 00003 00004 00005 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * ** 00006 * @file lab2_vit.H 00007 * @brief Main loop for Lab 2 Viterbi decoder. 00008 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00009 00010 #ifndef _LAB2_VIT_H 00011 #define _LAB2_VIT_H 00012 00013 00014 #include "util.H" 00015 #include "front_end.H" 00016 00017 00018 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * ** 00019 * Cell in dynamic programming chart for Viterbi algorithm. 00020 * 00021 * Holds Viterbi log prob; and arc ID of best incoming arc for backtrace. 00022 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00023 class VitCell 00024 { 00025 public: 00026 /** Ctor; inits log prob to g_zeroLogProb and arc ID to -1. **/ 00027 VitCell() : m_logProb(g_zeroLogProb), m_arcId(-1) { } 00028 00029 #ifndef SWIG 00030 #ifndef DOXYGEN 00031 // Hack; for bug in matrix<> class in boost 1.32. 00032 explicit VitCell(int) : m_logProb(g_zeroLogProb), m_arcId(-1) { } 00033 #endif 00034 #endif 00035 00036 /** Sets associated log prob and arc ID. **/ 00037 void assign(double logProb, int arcId) 00038 { m_logProb = logProb; m_arcId = arcId; } 00039 00040 /** Returns log prob of cell. **/ 00041 double get_log_prob() const { return m_logProb; } 00042 00043 /** Returns arc ID of cell. **/ 00044 int get_arc_id() const { return m_arcId; } 00045 00046 private: 00047 /** Forward Viterbi logprob. **/ 00048 double m_logProb; 00049 00050 /** ID of best incoming arc, for traceback. **/ 00051 int m_arcId; 00052 }; 00053 00054 00055 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 00056 * 00057 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00058 00059 /** Routine for Viterbi backtrace. **/ 00060 double viterbi_backtrace(const Graph& graph, matrix<VitCell>& chart, 00061 vector<int>& outLabelList, bool doAlign); 00062 00063 00064 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * ** 00065 * Encapsulation of main loop for Viterbi decoding. 00066 * 00067 * Holds global variables and has routines for initializing variables 00068 * and updating them for each utterance. 00069 * We do this so that we can call this code from Java as well. 00070 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00071 class Lab2VitMain 00072 { 00073 public: 00074 /** Initialize all data given parameters. **/ 00075 Lab2VitMain(const map<string, string>& params); 00076 00077 /** Called at the beginning of processing each utterance. 00078 * Returns whether at EOF. 00079 **/ 00080 bool init_utt(); 00081 00082 /** Called at the end of processing each utterance. **/ 00083 void finish_utt(double logProb); 00084 00085 /** Called at end of program. **/ 00086 void finish(); 00087 00088 00089 /** Returns decoding graph/HMM. **/ 00090 const Graph& get_graph() const { return m_graph; } 00091 00092 /** Returns matrix of GMM log probs for each frame. **/ 00093 const matrix<double>& get_gmm_probs() const { return m_gmmProbs; } 00094 00095 /** Returns DP chart. **/ 00096 matrix<VitCell>& get_chart() { return m_chart; } 00097 00098 /** Returns vector to place decoded labels in. **/ 00099 vector<int>& get_label_list() { return m_labelList; } 00100 00101 /** Returns acoustic weight. **/ 00102 double get_acous_wgt() const { return m_acousWgt; } 00103 00104 /** Returns whether doing forced alignment or decoding. **/ 00105 bool do_align() const { return m_doAlign; } 00106 00107 00108 private: 00109 /** Program parameters. **/ 00110 map<string, string> m_params; 00111 00112 /** Front end. **/ 00113 FrontEnd m_frontEnd; 00114 00115 /** Acoustic model. **/ 00116 GmmSet m_gmmSet; 00117 00118 /** Stream for reading audio data. **/ 00119 ifstream m_audioStrm; 00120 00121 /** Whether to do forced alignment or decoding. **/ 00122 bool m_doAlign; 00123 00124 /** Graph/HMM. **/ 00125 Graph m_graph; 00126 00127 /** Stream for reading graphs, if doing alignment. **/ 00128 ifstream m_graphStrm; 00129 00130 /** Stream for writing decoding output or alignments. **/ 00131 ofstream m_outStrm; 00132 00133 /** Acoustic weight. **/ 00134 double m_acousWgt; 00135 00136 /** ID string for current utterance. **/ 00137 string m_idStr; 00138 00139 /** Input audio for current utterance. **/ 00140 matrix<double> m_inAudio; 00141 00142 /** Feature vectors for current utterance. **/ 00143 matrix<double> m_feats; 00144 00145 /** GMM probs for current utterance. **/ 00146 matrix<double> m_gmmProbs; 00147 00148 /** DP chart for current utterance. **/ 00149 matrix<VitCell> m_chart; 00150 00151 /** Decoded output. **/ 00152 vector<int> m_labelList; 00153 00154 /** Total frames processed so far. **/ 00155 int m_totFrmCnt; 00156 00157 /** Total log prob of utterances processed so far. **/ 00158 double m_totLogProb; 00159 }; 00160 00161 00162 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 00163 * 00164 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00165 00166 #endif 00167 00168