#! /local/bin/tclsh7.5
#
# format RFCs by Frank Pilhofer <fp@informatik.uni-frankfurt.de>
#
# ----------------------------------------------------------------------
# Configuration section. Adjust them to your site
#
# rfcbase
#           The base directory where your rfc files are stored. The
#           rfcs themselves are expected to be named like
#            $rfcbase/RFC1000-1099/rfc1023*
#           If this is not true, adjust the RFCdir, RFCtxt and RFCps
#           functions below.
#
# rfcindex
#           The RFC Index file.
#
# proccgi
#           Location of the proccgi.tcl script
#
# zcat
#           The program to decompress and print compressed and gzipped
#           files
#
# ----------------------------------------------------------------------
# Configuration for full-text search using glimpse.
#
# glimpse
#           The glimpse program
#
# fullimit
#           A regular expression. Only requests from IP addresses
#           matching this expression are allowed fulltext search.
#
# ----------------------------------------------------------------------
#
set rfcbase  /usr/users2/ftp/pub/networking/docs/rfc
set rfcindex $rfcbase/rfc-index.txt
set proccgi  /usr/users2/hiwis/frank/www/cgi-bin/ProcCGIInput.tcl
set zcat     /local/bin/zcat
set glimpse  "/local/bin/glimpse -H /usr/users2/hiwis/frank/.rfc-index"
set fullimit {^141\.2}
#
# ----------------------------------------------------------------------
# The RFCtxt and RFCps functions are supposed to locate the text file
# and, if available, the PostScript file for the given RFC number.
# Change them if your directory tree is organized differently. Returning
# the name of a compressed file is fine as long as $zcat is defined.
# ----------------------------------------------------------------------
#
proc RFCdir { num } {
    set hundreds [expr int($num / 100)]
    if { $hundreds == 0 } {
	return "RFC00-99"
    }
    return [join [concat "RFC" $hundreds "00-" $hundreds "99"] ""]
}

proc RFCtxt { num } {
    global rfcbase
    set dirname $rfcbase/[RFCdir $num]

    foreach filename [glob -nocomplain $dirname/rfc*$num*] {
	if { [string first .txt $filename] != -1 } {
	    return $filename
	}
    }
    return ""
}

proc RFCps { num } {
    global rfcbase
    set dirname $rfcbase/[RFCdir $num]

    foreach filename [glob -nocomplain $dirname/rfc*$num*] {
	if { [string first .ps $filename] != -1 } {
	    return $filename
	}
    }
    return ""
}

#
# ----------------------------------------------------------------------
#
# ProcCGIInput.tcl by Frank Pilhofer <fp@informatik.uni-frankfurt.de>
#
# Process CGI Input data in Tcl. Handle all means of possible input to
# a CGI script (POST, GET, URL-encoding) and output everything into the
# form() array.
#
# Source this file!
#

proc ProcCGIInput { {list {}} } {
    global env form argv

    if { [catch {set env(REQUEST_METHOD)}] } {set env(REQUEST_METHOD) ""}
    if { [catch {set env(QUERY_STRING)}] }   {set env(QUERY_STRING) ""}
    if { [catch {set env(PATH_INFO)}] }      {set env(PATH_INFO) ""}

    if { "$env(REQUEST_METHOD)" == "POST" } {
	set _F_QueryString "[read stdin $env(CONTENT_LENGTH)]&"
    } else {
	set _F_QueryString "$env(QUERY_STRING)&"
    }
    foreach _F_par $argv {
	append _F_QueryString "$_F_par&"
    }
    foreach _F_par [split $env(PATH_INFO) "/"] {
	append _F_QueryString "$_F_par&"
    }
    #
    # process data
    #
    foreach _F_par [split $_F_QueryString "&"] {
	set _F_data [split $_F_par "="]
	set _F_var  [lindex $_F_data 0]
	set _F_val  [join [lrange $_F_data 1 end] "="]
	
	if { $_F_var == "" } {
	    continue
	}
	#
	# replace + by spaces
	#
	regsub -all {\+} $_F_val " " _F_val
	#
	# replace %XX by ascii character
	#
	while {[regexp -nocase {%([0-9a-f][0-9a-f])} $_F_val matsch hex]} {
	    set oct [eval format "%03o" 0x$hex]
	    eval regsub $matsch $_F_val "\\$oct" _F_val
	}
	#
	# okay
	#
	set form($_F_var) $_F_val
    }
    #
    # for each variable in list, set $form(var) to "", so that
    # this variable does at least exist and $form(var) does not
    # fail
    #
    foreach _F_var $list {
	if { [catch {set form($_F_var)}] } {
	    set form($_F_var) ""
	}
    }
}
# ----------------------------------------------------------------------
#

#
# How to link to a formatted RFC file (pointing back to this script)
#

proc RFClink { num } {
    global env
    return [join [concat $env(SCRIPT_NAME) "/rfc=" $num] "" ]
}

#
# Print HTML title and footer and an oops
#

proc HTMLtitle { title } {
    puts "<html><head>
<title>$title</title>
</head>
<body>
"
}

proc HTMLfooter {} {
    puts "<p>
<hr>
Date: [clock format [clock seconds]]<br>
Script by <a href=\"http://www.uni-frankfurt.de/~fp/\">Frank Pilhofer</a><br>
</body></html>
"
}

proc oops { {title "Mismatched Query"} } {
    puts -nonewline "Content-Type: text/html\r\n\r\n"
    HTMLtitle "$title"
    puts "<h1>$title</h1>"
    HTMLfooter
    exit 0
}

#
# Search the RFC database using the glimpse index and return a list
# of matching RFC numbers.
#

proc RFCFullList { search } {
    global glimpse

    if { [catch {eval exec -- $glimpse -wilyz -e \"$search\"} thetext] } {
	oops "$thetext"
	exit 0
    }
    set list ""
    foreach file $thetext {
	if { [regexp {rfc([0-9]+)} $file matsch rfcno] } {
	    lappend list $rfcno
	}
    }
    return $list
}

proc RFCindex { { search "" } { isfull 0 } { first -1 } } {
    global rfcindex env

    puts -nonewline "Content-Type: text/html\r\n\r\n"

    HTMLtitle "RFC Index"

    if { [ regexp {(.Z)|(.gz)$} $rfcindex ] } {
	set res [catch { open "|$zcat $rfcindex" r } fileno]
    } else {
	set res [catch { open $rfcindex r } fileno]
    }
    if { $res } {
	puts "<h1>Oops!</h1>"
	HTMLfooter
	exit 0
    }

    if { $isfull } {
	set ischecked "checked"
	if { $search != "" } {
	    set matchlist [RFCFullList $search]
	} else {
	    set matchlist ""
	}
    } else {
	set ischecked ""
	set matchlist ""
    }

    foreach var {def001 def005 def010 def042 defall} {
	if { ! [regexp {[-0-9]+} $var num ] } {
	    set num -1
	}
	regsub {^0+} $num "" num
	if { $num == $first } {
	    set $var "selected"
	} else {
	    set $var ""
	}
    }

    puts "<h1>RFC Index</h1>
    <form action=\"$env(SCRIPT_NAME)/index\" method=\"get\">
    <input name=\"search\" size=\"40\" value=\"$search\">
    <input type=\"submit\" value=\"Search\"><br>
    Perform Fulltext search
    <input type=\"checkbox\" name=\"full\" $ischecked value=\"on\">
    and return
    <select name=\"first\">
    <option $def001>the first</option>
    <option $def005>the first 5</option>
    <option $def010>the first 10</option>
    <option $def042>the first 42</option>
    <option $defall>all</option>
    </select>
    match(es).
    </form>
    "
    puts "<p><pre>"
    set found 0

    while { ! [ eof $fileno ] } {
	if { [ gets $fileno entry ] < 0 } {
	    break
	}
	#
	# read entry
	#
	if { [regexp {^[0-9]+ } $entry rfcno] } {
	    set input(0) $entry
	    set inlin 1

	    while { ! [ eof $fileno ] } {
		if { [ gets $fileno input($inlin) ] < 0 } {
		    break
		}
		#
		# stop at empty line
		#
		if { [regexp {^ *$} $input($inlin)] } {
		    break
		}
		#
		# Concatenate all lines of the entry
		#
		set entry [concat $entry $input($inlin)]
		incr inlin
	    }
	    #
	    # preprocess the entry so that regexps will work
	    #
	    regsub " \t" $entry " " entry
	    #
	    # check if entry matches
	    #
	    if { $search == "" } {
		set match 1
	    } elseif { $isfull == 1 } {
		regexp {^0*[1-9][0-9]*} $rfcno rfcnew
		if { [lsearch -glob $matchlist "*$rfcnew*"] != -1 } {
		    set match 1
		} else {
		    set match 0
		}
	    } else {
		catch { set match [regexp -nocase -- $search $entry] }
	    }
	    #
	    # if it matches, print the entry with links
	    #
	    if { $match } {
		regsub {^[0-9]+} $input(0) \
			"<a href=\"[RFClink $rfcno]\">&</a>" input(0)
		for {set out 0} {$out < $inlin} {incr out} {
		    regsub {\.txt} $input($out) \
			    "<a href=\"[RFClink $rfcno]/format=plain\">&</a>" \
			    input($out)
		    regsub {\.ps} $input($out) \
			    "<a href=\"[RFClink $rfcno]/format=ps\">&</a>" \
			    input($out)
		    if { $search != "" } {
			regsub -nocase $search $input($out) \
				"<b>&</b>" \
				input($out)
		    }
		    puts $input($out)
		}

		puts ""
		incr found

		if { $found >= $first && $first != -1 } {
		    break
		}
	    }
	} else {
	    #
	    # plain text
	    #
	    puts $entry
	}
    }
    puts "</pre>"

    if { $search != "" } {
	puts "The search for $search found $found documents.<p>"
	if { $found == 0 } {
	    puts "
	    Duh. Search <a href=\"http://altavista.digital.com/cgi-bin/query?pg=q&what=web&fmt=.&q=$search\">AltaVista</a> instead.<p>"
	}
    }

    HTMLfooter
    close $fileno
    exit 0
}

proc RFCformat { num } {
    global rfcbase zcat env
    set file [RFCtxt $num]

    puts -nonewline "Content-Type: text/html\r\n\r\n"
    HTMLtitle "RFC $num"
    puts "<a href=\"$env(SCRIPT_NAME)/index\">Back to the RFC Index</a>"

    if { $file == "" } {
	puts "<h1>Oops, cannot find RFC file</h1>"
	HTMLfooter
	exit 0
    }
    if { [ regexp {(.Z)|(.gz)$} $file ] } {
	set res [catch { open "|$zcat $file" r } fileno]
    } else {
	set res [catch { open $file r } fileno]
    }
    if { $res } {
	puts "<h1>Oops, cannot open RFC file</h1>"
	HTMLfooter
	exit 0
    }
    puts "<pre>"
    #
    # okay, dump it
    #
    while { ! [eof $fileno] } {
	if { [ gets $fileno TheLine ] < 0 } {
	    break
	}
	#
	# substitute meta characters
	#
	if { [ regexp {(<|>|&)} $TheLine ] } {
	    regsub -all "&" $TheLine {\&amp;} TheLine
	    regsub -all "<" $TheLine {\&lt;}  TheLine
	    regsub -all ">" $TheLine {\&gt;}  TheLine
	}
	#
	# add cross-references
	#
	if { [ regexp -nocase {RFC *([0-9]+)} $TheLine rfcText rfcNo ] } {
	    if { [expr $rfcNo] != [expr $num] } {
		regsub -all -nocase {RFC *([0-9]+)} $TheLine \
			"<a href=\"[RFClink $rfcNo]/format=html\">&</a>" \
			TheLine
	    }
	}
	puts $TheLine
    }
    puts "</pre>"
    puts "<a href=\"/fp-cgi/rfc/index\">Back to the RFC Index</a>"
    HTMLfooter
    close $fileno
    exit 0
}

proc RFCplain { num } {
    global rfcbase zcat
    set file [RFCtxt $num]

    puts -nonewline "Content-Type: text/plain\r\n\r\n"

    if { $file == "" } {
	puts "Oops, cannot find RFC file"
	exit 0
    }
    if { [ regexp {(.Z)|(.gz)$} $file ] } {
	exec $zcat $file >@stdout
    } else {
	exec cat $file >@stdout
    }
    exit 0
}

proc RFCpostscript { num } {
    global rfcbase zcat
    set file [RFCps $num]

    if { $file == "" } {
	puts -nonewline "Content-Type: text/plain\r\n\r\n"
	puts "Oops, cannot find RFC $num as Postscript"
	exit 0
    }

    puts -nonewline "Content-Type: application/postscript\r\n\r\n"

    if { [ regexp {(.Z)|(.gz)$} $file ] } {
	exec $zcat $file >@stdout
    } else {
	exec cat $file >@stdout
    }
    exit 0
}

proc NoFull {} {
    puts -nonewline "Content-Type: text/html\r\n\r\n"
    HTMLtitle Sorry
    puts "
    <h1>Sorry</h1>
    The fulltext search is only available to local users because of
    the extraordinary load it puts on our server.
    "
    HTMLfooter
    exit 0
}

set rfcno 0
set type ""

#
# process form data
#

ProcCGIInput

#
# Handle Index
# Fulltext search is only available if REMOTE_ADDR matches the regexp fullimit
#

if { ! [catch {set form(index)}] } {
    if { [catch {set querystring $form(search)}] } {
	set querystring ""
    }
    if { [regexp {[0-9]+} $querystring] } {
	regsub {^0+} $querystring "" querystring
	RFCformat $querystring
	exit 0
    }
    if { [catch {set form(full)}] } {
	set fullsearch 0
    } elseif { $form(full) != "on" } {
	set fullsearch 0
    } else {
	if { [catch {set addr $env(REMOTE_ADDR)}] } {
	    set addr ""
	}
	if { ! [regexp $fullimit $addr] } {
	    NoFull
	    exit 0
	}
	set fullsearch 1
    }
    if { [catch {set thefirst $form(first)}] } {
	set thefirst "all"
    }
    if { ! [regexp {[0-9]+} $thefirst count] } {
	switch $thefirst {
	    "the first" { set count  1 }
	    "all"       { set count -1 }
	    default     { set count -1 }
	}
    }
    RFCindex $querystring $fullsearch $count
    exit 0
}

#
# /script/RFCno/type
#

if { [catch {set form(rfc)}] } {
    oops
    exit 0
}

#
# strip leading zeroes, or tcl will interpret the number as octal
#

regsub {^0+} $form(rfc) "" rfcno

if { $rfcno <= 0 } {
    oops
    exit 0
}

if { [catch {set format $form(format)}] } {
    set format ""
}

switch $format {
    plain   { RFCplain      $rfcno }
    ps      { RFCpostscript $rfcno }
    default { RFCformat     $rfcno }
}
exit 0

