#!/usr/bin/tclsh
#
# build-faq-index
#
# Convert a directory of FAQ answers in XML, and an ordering hint file,
# into a directory of FAQ answers in HTML along with an index file.
#
# The first step to an extensible ICSI Speech FAQ
# 2000-05-26 dpwe@icsi.berkeley.edu
# $Header $
# Note to the reader:
# This is a resource that I hope will continue after I am gone. Why then
# am I writing it in Tcl rather than in Perl, given that I'm the only one
# who prefers Tcl to Perl? Well, I'm in a hurry and it would be just too
# difficult for me to deal with my stumbling Perl illiteracy. But I do
# feel bad, if it's any compensation. Assuming that the functionality
# is pretty clear, it's probably best for you to rewrite it from scratch
# in Perl. I'm sure you can do a nicer job.
# 2004-08-30: Updated to be ee.columbia.edu MS faq
# Settings
set hintsfile "MSEE_FAQ"
# Don't assume we've got any kind of sane TCLLIBPATH
if {[array names env SPEECH_DIR] != ""} {
set SPEECH_DIR $env(SPEECH_DIR)
} else {
set SPEECH_DIR /homes/drspeech
}
lappend auto_path [file join $SPEECH_DIR share lib]
package require Dpwe_Utilfns
# Standard substitutions for the body of the FAQ answers
set body_bindings [list \
[list "@MANCGI@" "http://www.ee.columbia.edu/local-cgi-bin/man.cgi"] \
[list "@SPEECHDOCS@" "http://www.ee.columbia.edu/Speech/docs"] \
[list "@PROCEEDS@" "http://www.ee.columbia.edu/proceeds"] \
[list "@PROGMAIL@" "http://www.ee.columbia.edu/local-cgi-bin/progmail/messages/w/speech-weekly.index" ] \
]
proc ScanOneAnswer {filename} {
# Scan the specified file and return the standard fields to
# include in the index. Returns a list:
# {question author date body}
# which can then be linked up with that question in the main index.
set f [open $filename "r"]
set data [read $f]
close $f
if {[regexp {
(.*)} $data all TITLE] == 0} {
puts stderr "Cannot extract TITLE from FAQfile $filename"
set TITLE ""
}
if {[regexp {(.*)} $data all AUTHOR] == 0} {
puts stderr "Cannot extract AUTHOR from FAQfile $filename"
set AUTHOR ""
}
if {[regexp {(.*)} $data all DATE] == 0} {
puts stderr "Cannot extract DATE from FAQfile $filename"
set DATE ""
}
if {[regexp {(.*)} $data all BODY] == 0} {
puts stderr "Cannot extract BODY from FAQfile $filename"
set BODY ""
}
return [list $TITLE $AUTHOR $DATE $BODY]
}
proc ScanAnswerFiles {filelist} {
# Go through a list of files and scan them for the basic information
# so we can link them into the master FAQ index
set titles {}
set tags {}
foreach file $filelist {
lassign [ScanOneAnswer $file] title author date body
lappend titles $title
lappend tags "$author - $date"
# Discard body for now
}
return [list $titles $tags]
}
proc SubstituteBindings {tplt bindings} {
# Take a template definition, then return a substituted
# version using the mappings in $bindings, which is a list
# like {{%n {the name}} {%u userid} ...}
foreach pair $bindings {
lassign $pair from to
# This is a hack, as $from patterns in earlier $to fields
# may get subsequently substituted...
# Must escape any ampersands in the "to" (e.g. <)
regsub -all {&} $to {\\\&} to
# Now the main subs
regsub -all $from $tplt $to tplt
}
return $tplt
}
proc SubstituteTemplate {tpltfilename outfilename bindings} {
# Read a template definition, then write out a substituted
# version using the mappings in $bindings, which is a list
# like {{%n {the name}} {%u userid} ...}
set f [open $tpltfilename "r"]
set tplt [read $f]
close $f
set data [SubstituteBindings $tplt $bindings]
# Now write out the modified version
set f [open $outfilename "w"]
puts -nonewline $f $data
close $f
return $outfilename
}
set _prev_data {}
set _nfaqaswrit 0
proc WriteAnswerHTML {srcfile {number ""} {dstfile ""}} {
# Cover for writing the actual HTML of an answer
# Delays writes by one step, to permit previous and next links
# Must 'flush' with an 'empty' call at finish.
# Returns HTML file to link to
global _prev_data _nfaqaswrit
if {$srcfile != ""} {
# Read the XML file
lassign [ScanOneAnswer $srcfile] question author date body
# What will this link be called?
if {$dstfile == ""} {
set srcfiletail [file tail $srcfile]
if {[regsub {\.xml} $srcfiletail {.html} dstfile] == 0} {
set dstfile "$srcfiletail.html"
}
}
}
# Maybe actually build the deferred file
set next ""
set prev ""
if {$_prev_data != ""} {
lassign $_prev_data data prv_number prv_dst prv_prev
if {$srcfile != ""} {
set next [list $dstfile "$number $question"]
}
WriteAnswerHTMLsub $data $prv_number $prv_dst $prv_prev $next
set prev [list $prv_dst "$prv_number [lindex $data 0]"]
set _prev_data ""
incr _nfaqaswrit
}
# And defer the current file
if {$srcfile != ""} {
set _prev_data [list [list $question $author $date $body] \
$number $dstfile $prev]
return $dstfile
} else {
return $_nfaqaswrit
}
}
proc WriteAnswerHTMLsub {data {number ""} {dstfile ""} {prv ""} {nxt ""}} {
# Convert an XML answer file into an HTML format
# by substituting into the template
# $prv and $nxt are optional previous and next pages to link to;
# formatted as [list $pagehtmlfilename $pagedesctext]
global body_bindings
lassign $data question author date body
# Substitute any patterns in the body
set body [SubstituteBindings $body $body_bindings]
# Read in the template
set tpltfile "tplts/tplt-FAQA.html"
# Maybe build links to prev and next pages
if {$prv != ""} {
lassign $prv page name
set prvhtml "Previous: $name"
} else {
set prvhtml ""
}
if {$nxt != ""} {
lassign $nxt page name
set nxthtml "Next: $name"
} else {
set nxthtml ""
}
# Build the translation list
set bindings [list [list "@NUM@" $number] \
[list "@QUESTION@" $question] \
[list "@AUTHOR@" $author] \
[list "@DATE@" $date] \
[list "@MAKEDATE@" [clock format [clock seconds]]] \
[list "@PREVHTML@" $prvhtml] \
[list "@NEXTHTML@" $nxthtml] \
[list "@BODY@" $body] ]
# Make sure any previous HTML is out of the way
exec rm -f $dstfile
# Rewrite
SubstituteTemplate $tpltfile $dstfile $bindings
# Discourage editing of target HTML
exec chmod a-w $dstfile
puts stderr "Converted $number $question ($dstfile)"
return 1
}
# Line pushback buffer, for backing-up line-by-line parse
set pushbackbuf {}
proc my_gets {file} {
# Like gets, but may return 'pushed back' line
global pushbackbuf
if {[llength $pushbackbuf] > 0} {
set line [lindex $pushbackbuf e]
set pushbackbuf [lreplace $pushbackbuf e e]
} else {
set line [gets $file]
}
return $line
}
proc my_pushback {file line} {
# Push a line back, so that it will be returned on the next my_gets
global pushbackbuf
lappend pushbackbuf $line
}
proc my_eof {file} {
# Cover for eof, won't see eof until pushback empty
global pushbackbuf
if {[llength $pushbackbuf] > 0} {
set rc 0
} else {
set rc [eof $file]
}
return $rc
}
proc ReadHintsCategory {file {indentlevel 0}} {
# Subsidiary function for the hints file reading.
# Reads a set of hints lines from $file and returns them as a list.
# If the indent level goes up, those parts are returned as a sublist
# List is always {name sublist} pairs, with sublist as {} for a terminal
set rslt {}
set lastquest ""
while {![my_eof $file]} {
set line [set rawline [my_gets $file]]
# Strip comments
regsub "\#.*" $line "" line
# Skip blank
if {[string trim $line] != ""} {
# Separate off indent - prefix of spaces
regexp {^( *)(.*[^ ]*) *$} $line all indent quest
set newindentlevel [string length $indent]
if {$newindentlevel > $indentlevel} {
# Went down an indent level
my_pushback $file $rawline
lappend rslt [list $lastquest [ReadHintsCategory $file $newindentlevel]]
set lastquest ""
} else {
if {$lastquest != ""} {
lappend rslt [list $lastquest {}]
set lastquest ""
}
if {$newindentlevel < $indentlevel} {
# End of indent level we were called with
my_pushback $file $rawline
return $rslt
} else {
# just another entry, defer
set lastquest $quest
}
}
}
}
# Reached EOF
# Leftover deferred question?
if {$lastquest != ""} {
lappend rslt [list $lastquest {}]
set lastquest ""
}
return $rslt
}
proc ReadHintsFile {filename} {
# The hints file contains a list of questions
# organized into categories, which is the desired structure
# for the FAQ. It is not necessary for every question in the
# hints file to have an existing answer, nor for every answer
# to match a question in the hints file, but hopefully this
# will be the usual case. Unexpected answers are put in a
# final miscellaneous category.
# Returns a nested list like
# {catname1 {{q1 {}} {q2 {}}}} {catname2 ...}
# This version may be multiply nested, e.g.
# {catname1 {{q1 {}} {cat1.2 {{q1.2.1 {}} {q1.2.2 {}}}} {q3 {}}}} ...
set f [open $filename "r"]
set rslt [ReadHintsCategory $f]
close $f
return $rslt
}
proc MakeQueryHTML {query num faqdata nonterm useddatavar} {
# Subroutine to build the HTML links to the answers for a query
# .. also converts the XML answer files to HTML as a side effect
# $query is the query text $num is the FAQ number to substitute
# $faqdata is the info on all FAQA files found
# $nonterm is 0 if this q is a terminal (meaning a warning if empty)
# $useddatavar is a reference to the list accumulating indices of
# all used FAQ answers
upvar $useddatavar useddata
lassign $faqdata faqfiles faqquestions faqtags
set faqhtml ""
# Search for matching questions in scanned files
set ix 0
set anstags {}
while {$ix != -1} {
set ix2 [lsearch [lrange $faqquestions $ix e] $query]
if {$ix2 > -1} {
# found one
set ix [expr $ix + $ix2]
set file [lindex $faqfiles $ix]
set tag [lindex $faqtags $ix]
# Convert this question to HTML, while we're here...
# (can't do it before because we want to know the $num)
lassign [WriteAnswerHTML $file $num] htmlfile linkname
# .. and build a link to it
set qt {"}
lappend anstags "\[$tag\]"
# Make a note that we used this faqa
lappend useddata $ix
# Redo the search from the next point
incr ix
} else {
set ix -1
}
}
if {[llength $anstags] == 0} {
# Only warn if a terminal
if {$nonterm == 0} {
# set faqhtml "
\nNo answers found\n"
set faqhtml "\nNo answers found\n"
}
} else {
# set faqhtml "
\n[join $anstags " "]\n"
set faqhtml "\n[join $anstags " "]\n"
}
return $faqhtml
}
proc BuildFaqCategory {list data {prefix ""} useddatavar} {
# Build one category
# list is always a list of {name sublist} pairs.
# but don't descend if sublist is empty
upvar $useddatavar useddata
set html ""
if {$list != {}} {
set num 1
foreach el $list {
lassign $el name sublist
append html ""
append html "$prefix$num $name\n"
append html "
\n"
set subll [llength $sublist]
# Add answers
append html [MakeQueryHTML $name $prefix$num $data $subll useddata]
# Add sublist (if any)
append html [BuildFaqCategorySub $sublist $data $prefix$num. useddata]
incr num
}
}
return $html
}
proc BuildFaqCategorySub {list data {prefix ""} useddatavar} {
# Build one category
# list is always a list of {name sublist} pairs.
# but don't descend if sublist is empty
upvar $useddatavar useddata
set html ""
if {$list != {}} {
set num 1
append html "\n"
foreach el $list {
lassign $el name sublist
append html "- $prefix$num $name\n"
set subll [llength $sublist]
# Add answers
append html [MakeQueryHTML $name $prefix$num $data $subll useddata]
# Add sublist (if any)
append html [BuildFaqCategorySub $sublist $data $prefix$num. useddata]
incr num
append html "
\n"
}
append html "
\n"
}
return $html
}
proc qcount {hints} {
# Count the total number of questions in a hints structure
if {$hints == {} } {return 0}
set count 0
foreach el $hints {
lassign $el name sublist
set subll [llength $sublist]
if {$subll != 0} {
incr count [qcount $sublist]
} else {
incr count
}
}
return $count
}
proc BuildFaqIndexHTML {hints faqdata} {
# Build the HTML for faqs following the $hints structure,
# drawing answers from the $faqdata
# $useddata is passed by reference and becomes a list of the
# indices of all FAQs that have been used. This lets us
# collect the uncategorized answers at the end.
set useddata {}
# Handle most of the work in this recursive function
set faqhtml [BuildFaqCategory $hints $faqdata "" useddata]
# Build the uncategorized category
set xtraqs {}
lassign $faqdata faqfiles faqquestions faqtags
for {set ix 0} {$ix < [llength $faqquestions]} {incr ix} {
if {[lsearch -exact $useddata $ix] == -1} {
# Fake this query
set faqq [lindex $faqquestions $ix]
if {[lsearch -exact $xtraqs $faqq] == -1} {
lappend xtraqs $faqq
}
}
}
if {[llength $xtraqs] > 0} {
append faqhtml "Uncategorized FAQ answers
\n"
set xhintqs {}
foreach q $xtraqs {
lappend xhintqs [list $q {}]
}
append faqhtml [BuildFaqCategory $xhintqs $faqdata "X." useddata]
}
# Flush the deferred HTML conversion
set nans [WriteAnswerHTML ""]
puts "$nans of [qcount $hints] FAQAs written"
return $faqhtml
}
proc CreateIndex {} {
# Create the master index of the FAQ files
global hintsfile
#set hintsfile "ICSI_SPEECH_FAQ"
set anspat "anssrc/*.xml"
set indextplt "tplts/tplt-index.php3"
set indexfile "faq.php3"
# First, find the XML files
set ansfiles [glob $anspat]
# Scan them
lassign [ScanAnswerFiles $ansfiles] ansqs anstags
# Build the triple-list faqdata
set faqdata [list $ansfiles $ansqs $anstags]
# Read the hints file
set hints [ReadHintsFile $hintsfile]
# Munge it all together, which also converts the answer files to HTML
set faqhtml [BuildFaqIndexHTML $hints $faqdata]
# Write the index file from a template
set date [clock format [clock seconds]]
set prog [info script]
set bindings [list [list "@DATE@" $date] \
[list "@FAQ@" $faqhtml] \
[list "@PROG@" $prog] ]
return [SubstituteTemplate $indextplt $indexfile $bindings]
}
CreateIndex