#!/usr/bin/tclsh # # build-faq-index # # Convert a directory of FAQ answers in XML, and an ordering hint file, # into a directory of FAQ answers in HTML along with an index file. # # The first step to an extensible ICSI Speech FAQ # 2000-05-26 dpwe@icsi.berkeley.edu # $Header $ # Note to the reader: # This is a resource that I hope will continue after I am gone. Why then # am I writing it in Tcl rather than in Perl, given that I'm the only one # who prefers Tcl to Perl? Well, I'm in a hurry and it would be just too # difficult for me to deal with my stumbling Perl illiteracy. But I do # feel bad, if it's any compensation. Assuming that the functionality # is pretty clear, it's probably best for you to rewrite it from scratch # in Perl. I'm sure you can do a nicer job. # 2004-08-30: Updated to be ee.columbia.edu MS faq # Settings set hintsfile "MSEE_FAQ" # Don't assume we've got any kind of sane TCLLIBPATH if {[array names env SPEECH_DIR] != ""} { set SPEECH_DIR $env(SPEECH_DIR) } else { set SPEECH_DIR /homes/drspeech } lappend auto_path [file join $SPEECH_DIR share lib] package require Dpwe_Utilfns # Standard substitutions for the body of the FAQ answers set body_bindings [list \ [list "@MANCGI@" "http://www.ee.columbia.edu/local-cgi-bin/man.cgi"] \ [list "@SPEECHDOCS@" "http://www.ee.columbia.edu/Speech/docs"] \ [list "@PROCEEDS@" "http://www.ee.columbia.edu/proceeds"] \ [list "@PROGMAIL@" "http://www.ee.columbia.edu/local-cgi-bin/progmail/messages/w/speech-weekly.index" ] \ ] proc ScanOneAnswer {filename} { # Scan the specified file and return the standard fields to # include in the index. Returns a list: # {question author date body} # which can then be linked up with that question in the main index. set f [open $filename "r"] set data [read $f] close $f if {[regexp {(.*)} $data all TITLE] == 0} { puts stderr "Cannot extract TITLE from FAQfile $filename" set TITLE "" } if {[regexp {(.*)} $data all AUTHOR] == 0} { puts stderr "Cannot extract AUTHOR from FAQfile $filename" set AUTHOR "" } if {[regexp {(.*)} $data all DATE] == 0} { puts stderr "Cannot extract DATE from FAQfile $filename" set DATE "" } if {[regexp {(.*)} $data all BODY] == 0} { puts stderr "Cannot extract BODY from FAQfile $filename" set BODY "" } return [list $TITLE $AUTHOR $DATE $BODY] } proc ScanAnswerFiles {filelist} { # Go through a list of files and scan them for the basic information # so we can link them into the master FAQ index set titles {} set tags {} foreach file $filelist { lassign [ScanOneAnswer $file] title author date body lappend titles $title lappend tags "$author - $date" # Discard body for now } return [list $titles $tags] } proc SubstituteBindings {tplt bindings} { # Take a template definition, then return a substituted # version using the mappings in $bindings, which is a list # like {{%n {the name}} {%u userid} ...} foreach pair $bindings { lassign $pair from to # This is a hack, as $from patterns in earlier $to fields # may get subsequently substituted... # Must escape any ampersands in the "to" (e.g. <) regsub -all {&} $to {\\\&} to # Now the main subs regsub -all $from $tplt $to tplt } return $tplt } proc SubstituteTemplate {tpltfilename outfilename bindings} { # Read a template definition, then write out a substituted # version using the mappings in $bindings, which is a list # like {{%n {the name}} {%u userid} ...} set f [open $tpltfilename "r"] set tplt [read $f] close $f set data [SubstituteBindings $tplt $bindings] # Now write out the modified version set f [open $outfilename "w"] puts -nonewline $f $data close $f return $outfilename } set _prev_data {} set _nfaqaswrit 0 proc WriteAnswerHTML {srcfile {number ""} {dstfile ""}} { # Cover for writing the actual HTML of an answer # Delays writes by one step, to permit previous and next links # Must 'flush' with an 'empty' call at finish. # Returns HTML file to link to global _prev_data _nfaqaswrit if {$srcfile != ""} { # Read the XML file lassign [ScanOneAnswer $srcfile] question author date body # What will this link be called? if {$dstfile == ""} { set srcfiletail [file tail $srcfile] if {[regsub {\.xml} $srcfiletail {.html} dstfile] == 0} { set dstfile "$srcfiletail.html" } } } # Maybe actually build the deferred file set next "" set prev "" if {$_prev_data != ""} { lassign $_prev_data data prv_number prv_dst prv_prev if {$srcfile != ""} { set next [list $dstfile "$number $question"] } WriteAnswerHTMLsub $data $prv_number $prv_dst $prv_prev $next set prev [list $prv_dst "$prv_number [lindex $data 0]"] set _prev_data "" incr _nfaqaswrit } # And defer the current file if {$srcfile != ""} { set _prev_data [list [list $question $author $date $body] \ $number $dstfile $prev] return $dstfile } else { return $_nfaqaswrit } } proc WriteAnswerHTMLsub {data {number ""} {dstfile ""} {prv ""} {nxt ""}} { # Convert an XML answer file into an HTML format # by substituting into the template # $prv and $nxt are optional previous and next pages to link to; # formatted as [list $pagehtmlfilename $pagedesctext] global body_bindings lassign $data question author date body # Substitute any patterns in the body set body [SubstituteBindings $body $body_bindings] # Read in the template set tpltfile "tplts/tplt-FAQA.html" # Maybe build links to prev and next pages if {$prv != ""} { lassign $prv page name set prvhtml "Previous: $name" } else { set prvhtml "" } if {$nxt != ""} { lassign $nxt page name set nxthtml "Next: $name" } else { set nxthtml "" } # Build the translation list set bindings [list [list "@NUM@" $number] \ [list "@QUESTION@" $question] \ [list "@AUTHOR@" $author] \ [list "@DATE@" $date] \ [list "@MAKEDATE@" [clock format [clock seconds]]] \ [list "@PREVHTML@" $prvhtml] \ [list "@NEXTHTML@" $nxthtml] \ [list "@BODY@" $body] ] # Make sure any previous HTML is out of the way exec rm -f $dstfile # Rewrite SubstituteTemplate $tpltfile $dstfile $bindings # Discourage editing of target HTML exec chmod a-w $dstfile puts stderr "Converted $number $question ($dstfile)" return 1 } # Line pushback buffer, for backing-up line-by-line parse set pushbackbuf {} proc my_gets {file} { # Like gets, but may return 'pushed back' line global pushbackbuf if {[llength $pushbackbuf] > 0} { set line [lindex $pushbackbuf e] set pushbackbuf [lreplace $pushbackbuf e e] } else { set line [gets $file] } return $line } proc my_pushback {file line} { # Push a line back, so that it will be returned on the next my_gets global pushbackbuf lappend pushbackbuf $line } proc my_eof {file} { # Cover for eof, won't see eof until pushback empty global pushbackbuf if {[llength $pushbackbuf] > 0} { set rc 0 } else { set rc [eof $file] } return $rc } proc ReadHintsCategory {file {indentlevel 0}} { # Subsidiary function for the hints file reading. # Reads a set of hints lines from $file and returns them as a list. # If the indent level goes up, those parts are returned as a sublist # List is always {name sublist} pairs, with sublist as {} for a terminal set rslt {} set lastquest "" while {![my_eof $file]} { set line [set rawline [my_gets $file]] # Strip comments regsub "\#.*" $line "" line # Skip blank if {[string trim $line] != ""} { # Separate off indent - prefix of spaces regexp {^( *)(.*[^ ]*) *$} $line all indent quest set newindentlevel [string length $indent] if {$newindentlevel > $indentlevel} { # Went down an indent level my_pushback $file $rawline lappend rslt [list $lastquest [ReadHintsCategory $file $newindentlevel]] set lastquest "" } else { if {$lastquest != ""} { lappend rslt [list $lastquest {}] set lastquest "" } if {$newindentlevel < $indentlevel} { # End of indent level we were called with my_pushback $file $rawline return $rslt } else { # just another entry, defer set lastquest $quest } } } } # Reached EOF # Leftover deferred question? if {$lastquest != ""} { lappend rslt [list $lastquest {}] set lastquest "" } return $rslt } proc ReadHintsFile {filename} { # The hints file contains a list of questions # organized into categories, which is the desired structure # for the FAQ. It is not necessary for every question in the # hints file to have an existing answer, nor for every answer # to match a question in the hints file, but hopefully this # will be the usual case. Unexpected answers are put in a # final miscellaneous category. # Returns a nested list like # {catname1 {{q1 {}} {q2 {}}}} {catname2 ...} # This version may be multiply nested, e.g. # {catname1 {{q1 {}} {cat1.2 {{q1.2.1 {}} {q1.2.2 {}}}} {q3 {}}}} ... set f [open $filename "r"] set rslt [ReadHintsCategory $f] close $f return $rslt } proc MakeQueryHTML {query num faqdata nonterm useddatavar} { # Subroutine to build the HTML links to the answers for a query # .. also converts the XML answer files to HTML as a side effect # $query is the query text $num is the FAQ number to substitute # $faqdata is the info on all FAQA files found # $nonterm is 0 if this q is a terminal (meaning a warning if empty) # $useddatavar is a reference to the list accumulating indices of # all used FAQ answers upvar $useddatavar useddata lassign $faqdata faqfiles faqquestions faqtags set faqhtml "" # Search for matching questions in scanned files set ix 0 set anstags {} while {$ix != -1} { set ix2 [lsearch [lrange $faqquestions $ix e] $query] if {$ix2 > -1} { # found one set ix [expr $ix + $ix2] set file [lindex $faqfiles $ix] set tag [lindex $faqtags $ix] # Convert this question to HTML, while we're here... # (can't do it before because we want to know the $num) lassign [WriteAnswerHTML $file $num] htmlfile linkname # .. and build a link to it set qt {"} lappend anstags "\[$tag\]" # Make a note that we used this faqa lappend useddata $ix # Redo the search from the next point incr ix } else { set ix -1 } } if {[llength $anstags] == 0} { # Only warn if a terminal if {$nonterm == 0} { # set faqhtml "
\nNo answers found\n" set faqhtml "\nNo answers found\n" } } else { # set faqhtml "
\n[join $anstags " "]\n" set faqhtml "\n[join $anstags " "]\n" } return $faqhtml } proc BuildFaqCategory {list data {prefix ""} useddatavar} { # Build one category # list is always a list of {name sublist} pairs. # but don't descend if sublist is empty upvar $useddatavar useddata set html "" if {$list != {}} { set num 1 foreach el $list { lassign $el name sublist append html "

" append html "$prefix$num $name\n" append html "

\n" set subll [llength $sublist] # Add answers append html [MakeQueryHTML $name $prefix$num $data $subll useddata] # Add sublist (if any) append html [BuildFaqCategorySub $sublist $data $prefix$num. useddata] incr num } } return $html } proc BuildFaqCategorySub {list data {prefix ""} useddatavar} { # Build one category # list is always a list of {name sublist} pairs. # but don't descend if sublist is empty upvar $useddatavar useddata set html "" if {$list != {}} { set num 1 append html "\n" } return $html } proc qcount {hints} { # Count the total number of questions in a hints structure if {$hints == {} } {return 0} set count 0 foreach el $hints { lassign $el name sublist set subll [llength $sublist] if {$subll != 0} { incr count [qcount $sublist] } else { incr count } } return $count } proc BuildFaqIndexHTML {hints faqdata} { # Build the HTML for faqs following the $hints structure, # drawing answers from the $faqdata # $useddata is passed by reference and becomes a list of the # indices of all FAQs that have been used. This lets us # collect the uncategorized answers at the end. set useddata {} # Handle most of the work in this recursive function set faqhtml [BuildFaqCategory $hints $faqdata "" useddata] # Build the uncategorized category set xtraqs {} lassign $faqdata faqfiles faqquestions faqtags for {set ix 0} {$ix < [llength $faqquestions]} {incr ix} { if {[lsearch -exact $useddata $ix] == -1} { # Fake this query set faqq [lindex $faqquestions $ix] if {[lsearch -exact $xtraqs $faqq] == -1} { lappend xtraqs $faqq } } } if {[llength $xtraqs] > 0} { append faqhtml "

Uncategorized FAQ answers

\n" set xhintqs {} foreach q $xtraqs { lappend xhintqs [list $q {}] } append faqhtml [BuildFaqCategory $xhintqs $faqdata "X." useddata] } # Flush the deferred HTML conversion set nans [WriteAnswerHTML ""] puts "$nans of [qcount $hints] FAQAs written" return $faqhtml } proc CreateIndex {} { # Create the master index of the FAQ files global hintsfile #set hintsfile "ICSI_SPEECH_FAQ" set anspat "anssrc/*.xml" set indextplt "tplts/tplt-index.php3" set indexfile "faq.php3" # First, find the XML files set ansfiles [glob $anspat] # Scan them lassign [ScanAnswerFiles $ansfiles] ansqs anstags # Build the triple-list faqdata set faqdata [list $ansfiles $ansqs $anstags] # Read the hints file set hints [ReadHintsFile $hintsfile] # Munge it all together, which also converts the answer files to HTML set faqhtml [BuildFaqIndexHTML $hints $faqdata] # Write the index file from a template set date [clock format [clock seconds]] set prog [info script] set bindings [list [list "@DATE@" $date] \ [list "@FAQ@" $faqhtml] \ [list "@PROG@" $prog] ] return [SubstituteTemplate $indextplt $indexfile $bindings] } CreateIndex