util.H File Reference

I/O routines, GmmSet, and Graph classes. More...

#include <cassert>
#include <cfloat>
#include <cmath>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <map>
#include <stdexcept>
#include <string>
#include <vector>
#include <boost/format.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/shared_ptr.hpp>

Go to the source code of this file.

Classes

class GmmScorer

Abstract base class, interface for object computing GMM probs. More...

class GmmSet

Class holding set of diagonal covariance GMM's. More...

class GmmCount

GMM count class. More...

class SymbolTable

Class holding symbol table for a graph/FSM. More...

class Arc

Arc class. More...

class Graph

Graph/FSM class. More...

class NGramCounter

Class for storing counts for a set of n-grams. More...

Math stuff.

const double g_zeroLogProb = -FLT_MAX / 2.0

This value can be used to represent the logprob of a "zero" prob.

double add_log_probs (const vector< double > &logProbList)

Adds the log probs held in logProbList, returning answer as log prob.

void real_fft (vector< double > &vals)

Does in-place real FFT.

void copy_matrix_row_to_vector (const matrix< double > &mat, unsigned rowIdx, vector< double > &vec)

Sets vec to be equal to the rowIdx-th row of mat.

void copy_vector_to_matrix_row (const vector< double > &vec, matrix< double > &mat, unsigned rowIdx)

Sets the rowIdx-th row of mat to vec; sizes must match.

Command-line parsing and parameter lookup routines.

typedef map< string, string > ParamsType

Type of object used for holding program parameters.

void process_cmd_line (const char **argv, map< string, string > &params)

Given cmd line arguments argv, parses flags of the form --<flag> <val> and places in flag-to-value map params.

void process_cmd_line (const vector< string > &argList, map< string, string > &params)

Like process_cmd_line(), but expects arguments as string vector.

void process_cmd_line (const string &argStr, map< string, string > &params)

Like process_cmd_line(), but expects space-separated arguments in single string.

bool get_bool_param (const map< string, string > &params, const string &name, bool defaultVal=false)

Returns value of boolean parameter name from parameter map params.

int get_int_param (const map< string, string > &params, const string &name, int defaultVal=0)

Like get_bool_param(), but for integer parameters.

double get_float_param (const map< string, string > &params, const string &name, double defaultVal=0.0)

Like get_bool_param(), but for floating-point parameters.

string get_string_param (const map< string, string > &params, const string &name, const string &defaultVal=string())

Like get_bool_param(), but for string parameters.

string get_required_string_param (const map< string, string > &params, const string &name)

Like get_string_param(), but throws exception if parameter absent.

Functions

bool operator< (const GmmCount &cnt1, const GmmCount &cnt2)

Orders GmmCount objects first by frame, then GMM index, then by decreasing count.

void convert_words_to_indices (const vector< string > &wordList, vector< int > &wordIdxList, const SymbolTable &symTable, int n, int bosIdx, int eosIdx, int unkIdx)

For converting a vector of strings wordList to a vector of ints wordIdxList using a SymbolTable, for n-gram model processing.

Vector/matrix I/O routines.

void split_string (const string &inStr, vector< string > &outList)

Splits inStr into space-separated tokens; places in outList.

void read_string_list (const string &fileName, vector< string > &strList)

Reads a list of strings, one to a line, from file fileName and places in strList.

string read_float_matrix (istream &inStrm, matrix< double > &mat, const string &name=string())

Reads matrix of floating-point numbers from stream inStrm in Matlab text format and places in mat.

string read_float_vector (istream &inStrm, vector< double > &vec, const string &name=string())

Like read_float_matrix(), but for float vectors.

string read_int_matrix (istream &inStrm, matrix< int > &mat, const string &name=string())

Like read_float_matrix(), but for integer matrices.

string read_int_vector (istream &inStrm, vector< int > &vec, const string &name=string())

Like read_float_matrix(), but for integer vectors.

void read_float_matrix (const string &fileName, matrix< double > &mat)

Like read_float_matrix(), but reads from file fileName instead of stream.

void read_float_vector (const string &fileName, vector< double > &vec)

Like read_float_vector(), but reads from file fileName instead of stream.

void read_int_matrix (const string &fileName, matrix< int > &mat)

Like read_int_matrix(), but reads from file fileName instead of stream.

void read_int_vector (const string &fileName, vector< int > &vec)

Like read_int_vector(), but reads from file fileName instead of stream.

void write_float_matrix (ostream &outStrm, const matrix< double > &mat, const string &name=string())

Writes floating-point matrix mat to stream outStrm in Matlab text format.

void write_float_vector (ostream &outStrm, const vector< double > &vec, const string &name=string())

Like write_float_matrix(), but for float vectors.

void write_int_matrix (ostream &outStrm, const matrix< int > &mat, const string &name=string())

Like write_float_matrix(), but for integer matrices.

void write_int_vector (ostream &outStrm, const vector< int > &vec, const string &name=string())

Like write_float_matrix(), but for integer vectors.

void write_float_matrix (const string &fileName, const matrix< double > &mat)

Like write_float_matrix(), but writes to file fileName instead of stream.

void write_float_vector (const string &fileName, const vector< double > &vec)

Like write_float_vector(), but writes to file fileName instead of stream.

void write_int_matrix (const string &fileName, const matrix< int > &mat)

Like write_int_matrix(), but writes to file fileName instead of stream.

void write_int_vector (const string &fileName, const vector< int > &vec)

Like write_int_vector(), but writes to file fileName instead of stream.

Detailed Description

I/O routines, GmmSet, and Graph classes.

Typedef Documentation

typedef map<string, string> ParamsType

Type of object used for holding program parameters.

Declaration needed for hack to get default arguments to work.

Function Documentation

double add_log_probs ( const vector< double > & logProbList )

Adds the log probs held in logProbList, returning answer as log prob.

That is, let's say we have a list of probability values, the logs of which are stored in logProbList. Then, this routine returns the log of the sum of those probability values. Logarithms are base e.

void real_fft ( vector< double > & vals )

Does in-place real FFT.

For inputs vals[i], i = 0, ..., N-1 with sample period T, on return the real and imaginary parts of the FFT value for frequency i/NT are held in the outputs vals[2*i] and vals[2*i+1].

void copy_matrix_row_to_vector	(	const matrix< double > &	mat,
		unsigned	rowIdx,
		vector< double > &	vec
	)

Sets vec to be equal to the rowIdx-th row of mat.

Rows are numbered starting from 0.

void copy_vector_to_matrix_row	(	const vector< double > &	vec,
		matrix< double > &	mat,
		unsigned	rowIdx
	)

Sets the rowIdx-th row of mat to vec; sizes must match.

Rows are numbered starting from 0.

void process_cmd_line	(	const char **	argv,
		map< string, string > &	params
	)

Given cmd line arguments argv, parses flags of the form --<flag> <val> and places in flag-to-value map params.

Expects same argv value as passed to main(). Existing values in params are not erased (unless overriden in argv).

void process_cmd_line	(	const vector< string > &	argList,
		map< string, string > &	params
	)

Like process_cmd_line(), but expects arguments as string vector.

bool get_bool_param	(	const map< string, string > &	params,
		const string &	name,
		bool	defaultVal = `false`
	)

Returns value of boolean parameter name from parameter map params.

If not present, returns defaultVal.

int get_int_param	(	const map< string, string > &	params,
		const string &	name,
		int	defaultVal = `0`
	)

Like get_bool_param(), but for integer parameters.

double get_float_param	(	const map< string, string > &	params,
		const string &	name,
		double	defaultVal = `0.0`
	)

Like get_bool_param(), but for floating-point parameters.

string get_string_param	(	const map< string, string > &	params,
		const string &	name,
		const string &	defaultVal = `string()`
	)

Like get_bool_param(), but for string parameters.

string get_required_string_param	(	const map< string, string > &	params,
		const string &	name
	)

Like get_string_param(), but throws exception if parameter absent.

void split_string	(	const string &	inStr,
		vector< string > &	outList
	)

Splits inStr into space-separated tokens; places in outList.

string read_float_matrix	(	istream &	inStrm,
		matrix< double > &	mat,
		const string &	name = `string()`
	)

Reads matrix of floating-point numbers from stream inStrm in Matlab text format and places in mat.

Expects optional matrix header, and then one row per line. If argument name is provided, checks name associated with matrix matches and throws exception if doesn't. Returns name given in matrix header, or empty string if none provided.

string read_float_vector	(	istream &	inStrm,
		vector< double > &	vec,
		const string &	name = `string()`
	)

Like read_float_matrix(), but for float vectors.

string read_int_matrix	(	istream &	inStrm,
		matrix< int > &	mat,
		const string &	name = `string()`
	)

Like read_float_matrix(), but for integer matrices.

string read_int_vector	(	istream &	inStrm,
		vector< int > &	vec,
		const string &	name = `string()`
	)

Like read_float_matrix(), but for integer vectors.

void write_float_matrix	(	ostream &	outStrm,
		const matrix< double > &	mat,
		const string &	name = `string()`
	)

Writes floating-point matrix mat to stream outStrm in Matlab text format.

If the argument name is provided, this name will be written in the matrix header (and is the name that will be assigned to the matrix if loaded in octave).

void write_float_vector	(	ostream &	outStrm,
		const vector< double > &	vec,
		const string &	name = `string()`
	)

Like write_float_matrix(), but for float vectors.

void write_int_matrix	(	ostream &	outStrm,
		const matrix< int > &	mat,
		const string &	name = `string()`
	)

Like write_float_matrix(), but for integer matrices.

void write_int_vector	(	ostream &	outStrm,
		const vector< int > &	vec,
		const string &	name = `string()`
	)

Like write_float_matrix(), but for integer vectors.

void convert_words_to_indices	(	const vector< string > &	wordList,
		vector< int > &	wordIdxList,
		const SymbolTable &	symTable,
		int	n,
		int	bosIdx,
		int	eosIdx,
		int	unkIdx
	)

For converting a vector of strings wordList to a vector of ints wordIdxList using a SymbolTable, for n-gram model processing.

Words not in the symbol table are converted to the value unkIdx. The beginning of the output sequence is padded with n - 1 bosIdx values, and a single eosIdx value is added to the end.

Variable Documentation

const double g_zeroLogProb = -FLT_MAX / 2.0

This value can be used to represent the logprob of a "zero" prob.

Theoretically, log 0 is negative infinity which we can't store, so we can use this very large negative value instead.


Classes
class	GmmScorer
	Abstract base class, interface for object computing GMM probs. More...
class	GmmSet
	Class holding set of diagonal covariance GMM's. More...
class	GmmCount
	GMM count class. More...
class	SymbolTable
	Class holding symbol table for a graph/FSM. More...
class	Arc
	Arc class. More...
class	Graph
	Graph/FSM class. More...
class	NGramCounter
	Class for storing counts for a set of n-grams. More...
Math stuff.
const double	g_zeroLogProb = -FLT_MAX / 2.0
	This value can be used to represent the logprob of a "zero" prob.
double	add_log_probs (const vector< double > &logProbList)
	Adds the log probs held in `logProbList`, returning answer as log prob.
void	real_fft (vector< double > &vals)
	Does in-place real FFT.
void	copy_matrix_row_to_vector (const matrix< double > &mat, unsigned rowIdx, vector< double > &vec)
	Sets `vec` to be equal to the `rowIdx-th` row of `mat`.
void	copy_vector_to_matrix_row (const vector< double > &vec, matrix< double > &mat, unsigned rowIdx)
	Sets the `rowIdx-th` row of `mat` to `vec`; sizes must match.
Command-line parsing and parameter lookup routines.
typedef map< string, string >	ParamsType
	Type of object used for holding program parameters.
void	process_cmd_line (const char **argv, map< string, string > &params)
	Given cmd line arguments `argv`, parses flags of the form `--<flag> <val>` and places in flag-to-value map `params`.
void	process_cmd_line (const vector< string > &argList, map< string, string > &params)
	Like process_cmd_line(), but expects arguments as string vector.
void	process_cmd_line (const string &argStr, map< string, string > &params)
	Like process_cmd_line(), but expects space-separated arguments in single string.
bool	get_bool_param (const map< string, string > &params, const string &name, bool defaultVal=false)
	Returns value of boolean parameter `name` from parameter map `params`.
int	get_int_param (const map< string, string > &params, const string &name, int defaultVal=0)
	Like get_bool_param(), but for integer parameters.
double	get_float_param (const map< string, string > &params, const string &name, double defaultVal=0.0)
	Like get_bool_param(), but for floating-point parameters.
string	get_string_param (const map< string, string > &params, const string &name, const string &defaultVal=string())
	Like get_bool_param(), but for string parameters.
string	get_required_string_param (const map< string, string > &params, const string &name)
	Like get_string_param(), but throws exception if parameter absent.
Functions
bool	operator< (const GmmCount &cnt1, const GmmCount &cnt2)
	Orders GmmCount objects first by frame, then GMM index, then by decreasing count.
void	convert_words_to_indices (const vector< string > &wordList, vector< int > &wordIdxList, const SymbolTable &symTable, int n, int bosIdx, int eosIdx, int unkIdx)
	For converting a vector of strings `wordList` to a vector of ints `wordIdxList` using a SymbolTable, for n-gram model processing.
Vector/matrix I/O routines.
void	split_string (const string &inStr, vector< string > &outList)
	Splits `inStr` into space-separated tokens; places in `outList`.
void	read_string_list (const string &fileName, vector< string > &strList)
	Reads a list of strings, one to a line, from file `fileName` and places in `strList`.
string	read_float_matrix (istream &inStrm, matrix< double > &mat, const string &name=string())
	Reads matrix of floating-point numbers from stream `inStrm` in Matlab text format and places in `mat`.
string	read_float_vector (istream &inStrm, vector< double > &vec, const string &name=string())
	Like read_float_matrix(), but for float vectors.
string	read_int_matrix (istream &inStrm, matrix< int > &mat, const string &name=string())
	Like read_float_matrix(), but for integer matrices.
string	read_int_vector (istream &inStrm, vector< int > &vec, const string &name=string())
	Like read_float_matrix(), but for integer vectors.
void	read_float_matrix (const string &fileName, matrix< double > &mat)
	Like read_float_matrix(), but reads from file `fileName` instead of stream.
void	read_float_vector (const string &fileName, vector< double > &vec)
	Like read_float_vector(), but reads from file `fileName` instead of stream.
void	read_int_matrix (const string &fileName, matrix< int > &mat)
	Like read_int_matrix(), but reads from file `fileName` instead of stream.
void	read_int_vector (const string &fileName, vector< int > &vec)
	Like read_int_vector(), but reads from file `fileName` instead of stream.
void	write_float_matrix (ostream &outStrm, const matrix< double > &mat, const string &name=string())
	Writes floating-point matrix `mat` to stream `outStrm` in Matlab text format.
void	write_float_vector (ostream &outStrm, const vector< double > &vec, const string &name=string())
	Like write_float_matrix(), but for float vectors.
void	write_int_matrix (ostream &outStrm, const matrix< int > &mat, const string &name=string())
	Like write_float_matrix(), but for integer matrices.
void	write_int_vector (ostream &outStrm, const vector< int > &vec, const string &name=string())
	Like write_float_matrix(), but for integer vectors.
void	write_float_matrix (const string &fileName, const matrix< double > &mat)
	Like write_float_matrix(), but writes to file `fileName` instead of stream.
void	write_float_vector (const string &fileName, const vector< double > &vec)
	Like write_float_vector(), but writes to file `fileName` instead of stream.
void	write_int_matrix (const string &fileName, const matrix< int > &mat)
	Like write_int_matrix(), but writes to file `fileName` instead of stream.
void	write_int_vector (const string &fileName, const vector< int > &vec)
	Like write_int_vector(), but writes to file `fileName` instead of stream.