#include <cassert>
#include <cfloat>
#include <cmath>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <map>
#include <stdexcept>
#include <string>
#include <vector>
#include <boost/format.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/shared_ptr.hpp>
Go to the source code of this file.
Classes | |
class | GmmScorer |
Abstract base class, interface for object computing GMM probs. More... | |
class | GmmSet |
Class holding set of diagonal covariance GMM's. More... | |
class | GmmCount |
GMM count class. More... | |
class | SymbolTable |
Class holding symbol table for a graph/FSM. More... | |
class | Arc |
Arc class. More... | |
class | Graph |
Graph/FSM class. More... | |
class | NGramCounter |
Class for storing counts for a set of n-grams. More... | |
Math stuff. | |
const double | g_zeroLogProb = -FLT_MAX / 2.0 |
This value can be used to represent the logprob of a "zero" prob. | |
double | add_log_probs (const vector< double > &logProbList) |
Adds the log probs held in logProbList , returning answer as log prob. | |
void | real_fft (vector< double > &vals) |
Does in-place real FFT. | |
void | copy_matrix_row_to_vector (const matrix< double > &mat, unsigned rowIdx, vector< double > &vec) |
Sets vec to be equal to the rowIdx-th row of mat . | |
void | copy_vector_to_matrix_row (const vector< double > &vec, matrix< double > &mat, unsigned rowIdx) |
Sets the rowIdx-th row of mat to vec ; sizes must match. | |
Command-line parsing and parameter lookup routines. | |
typedef map< string, string > | ParamsType |
Type of object used for holding program parameters. | |
void | process_cmd_line (const char **argv, map< string, string > ¶ms) |
Given cmd line arguments argv , parses flags of the form --<flag> <val> and places in flag-to-value map params . | |
void | process_cmd_line (const vector< string > &argList, map< string, string > ¶ms) |
Like process_cmd_line(), but expects arguments as string vector. | |
void | process_cmd_line (const string &argStr, map< string, string > ¶ms) |
Like process_cmd_line(), but expects space-separated arguments in single string. | |
bool | get_bool_param (const map< string, string > ¶ms, const string &name, bool defaultVal=false) |
Returns value of boolean parameter name from parameter map params . | |
int | get_int_param (const map< string, string > ¶ms, const string &name, int defaultVal=0) |
Like get_bool_param(), but for integer parameters. | |
double | get_float_param (const map< string, string > ¶ms, const string &name, double defaultVal=0.0) |
Like get_bool_param(), but for floating-point parameters. | |
string | get_string_param (const map< string, string > ¶ms, const string &name, const string &defaultVal=string()) |
Like get_bool_param(), but for string parameters. | |
string | get_required_string_param (const map< string, string > ¶ms, const string &name) |
Like get_string_param(), but throws exception if parameter absent. | |
Functions | |
bool | operator< (const GmmCount &cnt1, const GmmCount &cnt2) |
Orders GmmCount objects first by frame, then GMM index, then by decreasing count. | |
void | convert_words_to_indices (const vector< string > &wordList, vector< int > &wordIdxList, const SymbolTable &symTable, int n, int bosIdx, int eosIdx, int unkIdx) |
For converting a vector of strings wordList to a vector of ints wordIdxList using a SymbolTable, for n-gram model processing. | |
Vector/matrix I/O routines. | |
void | split_string (const string &inStr, vector< string > &outList) |
Splits inStr into space-separated tokens; places in outList . | |
void | read_string_list (const string &fileName, vector< string > &strList) |
Reads a list of strings, one to a line, from file fileName and places in strList . | |
string | read_float_matrix (istream &inStrm, matrix< double > &mat, const string &name=string()) |
Reads matrix of floating-point numbers from stream inStrm in Matlab text format and places in mat . | |
string | read_float_vector (istream &inStrm, vector< double > &vec, const string &name=string()) |
Like read_float_matrix(), but for float vectors. | |
string | read_int_matrix (istream &inStrm, matrix< int > &mat, const string &name=string()) |
Like read_float_matrix(), but for integer matrices. | |
string | read_int_vector (istream &inStrm, vector< int > &vec, const string &name=string()) |
Like read_float_matrix(), but for integer vectors. | |
void | read_float_matrix (const string &fileName, matrix< double > &mat) |
Like read_float_matrix(), but reads from file fileName instead of stream. | |
void | read_float_vector (const string &fileName, vector< double > &vec) |
Like read_float_vector(), but reads from file fileName instead of stream. | |
void | read_int_matrix (const string &fileName, matrix< int > &mat) |
Like read_int_matrix(), but reads from file fileName instead of stream. | |
void | read_int_vector (const string &fileName, vector< int > &vec) |
Like read_int_vector(), but reads from file fileName instead of stream. | |
void | write_float_matrix (ostream &outStrm, const matrix< double > &mat, const string &name=string()) |
Writes floating-point matrix mat to stream outStrm in Matlab text format. | |
void | write_float_vector (ostream &outStrm, const vector< double > &vec, const string &name=string()) |
Like write_float_matrix(), but for float vectors. | |
void | write_int_matrix (ostream &outStrm, const matrix< int > &mat, const string &name=string()) |
Like write_float_matrix(), but for integer matrices. | |
void | write_int_vector (ostream &outStrm, const vector< int > &vec, const string &name=string()) |
Like write_float_matrix(), but for integer vectors. | |
void | write_float_matrix (const string &fileName, const matrix< double > &mat) |
Like write_float_matrix(), but writes to file fileName instead of stream. | |
void | write_float_vector (const string &fileName, const vector< double > &vec) |
Like write_float_vector(), but writes to file fileName instead of stream. | |
void | write_int_matrix (const string &fileName, const matrix< int > &mat) |
Like write_int_matrix(), but writes to file fileName instead of stream. | |
void | write_int_vector (const string &fileName, const vector< int > &vec) |
Like write_int_vector(), but writes to file fileName instead of stream. |
typedef map<string, string> ParamsType |
Type of object used for holding program parameters.
Declaration needed for hack to get default arguments to work.
double add_log_probs | ( | const vector< double > & | logProbList | ) |
Adds the log probs held in logProbList
, returning answer as log prob.
That is, let's say we have a list of probability values, the logs of which are stored in logProbList
. Then, this routine returns the log of the sum of those probability values. Logarithms are base e.
void real_fft | ( | vector< double > & | vals | ) |
Does in-place real FFT.
For inputs vals[i]
, i = 0, ..., N-1 with sample period T, on return the real and imaginary parts of the FFT value for frequency i/NT are held in the outputs vals[2*i]
and vals[2*i+1]
.
void copy_matrix_row_to_vector | ( | const matrix< double > & | mat, | |
unsigned | rowIdx, | |||
vector< double > & | vec | |||
) |
Sets vec
to be equal to the rowIdx-th
row of mat
.
Rows are numbered starting from 0.
void copy_vector_to_matrix_row | ( | const vector< double > & | vec, | |
matrix< double > & | mat, | |||
unsigned | rowIdx | |||
) |
Sets the rowIdx-th
row of mat
to vec
; sizes must match.
Rows are numbered starting from 0.
void process_cmd_line | ( | const char ** | argv, | |
map< string, string > & | params | |||
) |
Given cmd line arguments argv
, parses flags of the form --<flag> <val>
and places in flag-to-value map params
.
Expects same argv
value as passed to main()
. Existing values in params
are not erased (unless overriden in argv
).
void process_cmd_line | ( | const vector< string > & | argList, | |
map< string, string > & | params | |||
) |
Like process_cmd_line(), but expects arguments as string vector.
bool get_bool_param | ( | const map< string, string > & | params, | |
const string & | name, | |||
bool | defaultVal = false | |||
) |
Returns value of boolean parameter name
from parameter map params
.
If not present, returns defaultVal
.
int get_int_param | ( | const map< string, string > & | params, | |
const string & | name, | |||
int | defaultVal = 0 | |||
) |
Like get_bool_param(), but for integer parameters.
double get_float_param | ( | const map< string, string > & | params, | |
const string & | name, | |||
double | defaultVal = 0.0 | |||
) |
Like get_bool_param(), but for floating-point parameters.
string get_string_param | ( | const map< string, string > & | params, | |
const string & | name, | |||
const string & | defaultVal = string() | |||
) |
Like get_bool_param(), but for string parameters.
string get_required_string_param | ( | const map< string, string > & | params, | |
const string & | name | |||
) |
Like get_string_param(), but throws exception if parameter absent.
void split_string | ( | const string & | inStr, | |
vector< string > & | outList | |||
) |
Splits inStr
into space-separated tokens; places in outList
.
string read_float_matrix | ( | istream & | inStrm, | |
matrix< double > & | mat, | |||
const string & | name = string() | |||
) |
Reads matrix of floating-point numbers from stream inStrm
in Matlab text format and places in mat
.
Expects optional matrix header, and then one row per line. If argument name
is provided, checks name associated with matrix matches and throws exception if doesn't. Returns name given in matrix header, or empty string if none provided.
string read_float_vector | ( | istream & | inStrm, | |
vector< double > & | vec, | |||
const string & | name = string() | |||
) |
Like read_float_matrix(), but for float vectors.
string read_int_matrix | ( | istream & | inStrm, | |
matrix< int > & | mat, | |||
const string & | name = string() | |||
) |
Like read_float_matrix(), but for integer matrices.
string read_int_vector | ( | istream & | inStrm, | |
vector< int > & | vec, | |||
const string & | name = string() | |||
) |
Like read_float_matrix(), but for integer vectors.
void write_float_matrix | ( | ostream & | outStrm, | |
const matrix< double > & | mat, | |||
const string & | name = string() | |||
) |
void write_float_vector | ( | ostream & | outStrm, | |
const vector< double > & | vec, | |||
const string & | name = string() | |||
) |
Like write_float_matrix(), but for float vectors.
void write_int_matrix | ( | ostream & | outStrm, | |
const matrix< int > & | mat, | |||
const string & | name = string() | |||
) |
Like write_float_matrix(), but for integer matrices.
void write_int_vector | ( | ostream & | outStrm, | |
const vector< int > & | vec, | |||
const string & | name = string() | |||
) |
Like write_float_matrix(), but for integer vectors.
void convert_words_to_indices | ( | const vector< string > & | wordList, | |
vector< int > & | wordIdxList, | |||
const SymbolTable & | symTable, | |||
int | n, | |||
int | bosIdx, | |||
int | eosIdx, | |||
int | unkIdx | |||
) |
For converting a vector of strings wordList
to a vector of ints wordIdxList
using a SymbolTable, for n-gram model processing.
Words not in the symbol table are converted to the value unkIdx
. The beginning of the output sequence is padded with n
- 1 bosIdx
values, and a single eosIdx
value is added to the end.
const double g_zeroLogProb = -FLT_MAX / 2.0 |
This value can be used to represent the logprob of a "zero" prob.
Theoretically, log 0 is negative infinity which we can't store, so we can use this very large negative value instead.