#~/bin/bash 
#
# Uses an already trained speech recognizer as to train a new one on
# different training data using HTK. 
# The final HMM can be found in $TMPDIR/hmm/hmmdefs

TRAINFILE=$1 #./speakers11_34_wavs.scp
TRAINFEATFILE=$TRAINFILE
 #./trainall-featextract.scp
GRAMFILE=$2 #./grammar.bnf
WDLIST=$3 #./wdlist
DICTFILE=$4 #./beep
#word level transcripts
TRAINTRANSFILE=$5 #./words.mlf 
#TRAINEVAL=./train_eval.mlf

PROTO_FILE=$6
# how many mixtures per gaussian?
NMIX=$7
TMPDIR=$8

cd $TMPDIR

# given a bunch of training wav files listed in $TRAINFILE, a grammar in
# $GRAMFILE, a word list file in $WDLIST, a pronunciation dictionary
# $DICTFILE, and a file of word level transcriptions for all of the
# training data $TRAINTRANSFILE, this script (and its associated files)
# will bootstrap and train an HTK recognizer...

# files it depends on:
#   sil.hed 

# outputs:
#   wdnet       - HTK grammar
#   dlog        - dictionary log (check for errors)
#   monophones0 - 


# standard command line options (to get reasonable error reports)
#SOPTS="-A -D -T 1"
#SOPTS="-A -D "
SOPTS=$9

# trap any errors when running an HTK command
function runcmd ()
{
    "$@"
    local ERRORCODE=$?

    if [ $ERRORCODE != 0 ]
        then exit $ERRORCODE
    fi
}

# we do this a lot...
# arg 1 = starting HMM
# arg 2 = ending HMM (number of last HMM to be trained)
# arg 3 = name of file containing transcripts to use
# arg 4 = name of file containing list of symbols (e.g. phones) in transcripts
function trainhmms ()
{
  local x=$1     
  while test $x -le $(($2-1))
    do mkdir hmm$(($x+1))
    #runcmd HERest $SOPTS -I $3 -s statistics -t 250.0 250.0 3000.0  -S $TRAINFILE -H hmm$x/macros -H hmm$x/hmmdefs -M hmm$(($x+1)) $4
    # only update means:
    #runcmd HERest $SOPTS -I $3 -t 250.0 250.0 3000.0  -S $TRAINFILE -H hmm$x/macros -H hmm$x/hmmdefs -M hmm$(($x+1)) $4
    runcmd HERest $SOPTS -u m -I $3 -t 250.0 250.0 3000.0 -v 1.0 -S $TRAINFILE -H hmm$x/macros -H hmm$x/hmmdefs -M hmm$(($x+1)) $4
    x=$((x+1))
  done

  HMM=$x    
}

# #global config:
# export HCONFIG="
# SOURCEFORMAT = HTK
# SOURCEKIND = USER
# TARGETFORMAT = HTK
# TARGETKIND = USER"


# 1. make a grammar that HTK understands
if [ `head -1 $GRAMFILE | cut -d " " -f 1 | cut -d "=" -f 1` = "VERSION" ]
    # the grammar already an SLF file?
    then cp $GRAMFILE wdnet
else
    runcmd HParse $GRAMFILE wdnet
fi

# 2. compile an HTK formatted dictionary
runcmd HDMan $SOPTS -m -w $WDLIST -n monophones0 -e . -l dlog dict $DICTFILE

#     add sil to monophones0
cp monophones0 tmp;
echo sil >> tmp;
sort tmp | uniq > monophones0;
rm -f tmp;

# 3. convert word level transcripts to phone level transcripts
echo "EX
IS sil sil
DE sp" > mkphones0.led

runcmd HLEd $SOPTS -l '*' -d dict -i phones0.mlf mkphones0.led $TRAINTRANSFILE

# set up initial hmm
HMM=0
mkdir hmm$HMM
cp $PROTO_FILE hmm$HMM/hmmdefs
touch hmm$HMM/macros

# 5. HMM training:
#    c. do some training rounds (monophones0 should contain sil but not sp)
trainhmms $HMM $((HMM+5)) phones0.mlf monophones0

#5   e. realign the training data, forcing the beginning and end of the
#       utterance to fall into the silence state
echo "silence sil" >> dict
#HVite $SOPTS -l '*' -o SWT -b silence -a -H hmm$HMM/macros -H hmm$HMM/hmmdefs -i aligned.mlf -m -t 250.0 -y lab -I $TRAINTRANSFILE -S $TRAINFILE dict monophones1 
runcmd HVite $SOPTS -l '*' -o SWT -b silence -a -H hmm$HMM/macros -H hmm$HMM/hmmdefs -i aligned.mlf -m -y lab -I $TRAINTRANSFILE -S $TRAINFILE dict monophones0 

trainhmms $HMM $((HMM+5)) aligned.mlf monophones0

# Done!
mv hmm$HMM hmm_final


