#!/usr/local/bin/python #---------------------------------------------------------------- # index.cgi: CGI script to search for an index term. # $Revision: 1.4 $ $Date: 2003/08/22 23:05:03 $ #---------------------------------------------------------------- # Works with the index structures generated by PyStyler's # `stylindex' utility. This program was translated from an # Icon script that had been working since 1996-11-12 until # the Great RedHat Upgrade of 2003-08-02. #---------------------------------------------------------------- INDEX_CGI_REVISION = "0.0" # External revision level #================================================================ # Overall intended function: # [ (this script is executed with the CGI protocol, receiving # its arguments through the HTML form protocol) and # (KEYWORD_FILE names a readable file) -> # if there is a TARGET_NAME argument -> # if TARGET_NAME argument is empty or its first nonblank # character is not a letter -> # sys.stdout +:= a "Location:" header redirecting # the server to (DEFAULT_URL) # else -> # sys.stdout +:= a "Location:" header redirecting # the server to the URL given by # effective-target(the TARGET_NAME argument, # contents of KEYWORD_FILE) # log-file +:= miss-entry(the TARGET_NAME argument) ] #================================================================ # CONTENTS #---------------------------------------------------------------- # Overall intended function # Imports # Manifest constants # Specification functions # Functions and classes # errorPage(): Display an HTML error page and stop. # class Args: Digests HTML form arguments. # pointServerAt(): Sends "Location:" to redirect to a URL # searchKeywords(): Redirect according to the search target # readKeywordFile(): Read the file of existing keywords # matchKeyword(): Find the words adjacent to the target # bracketAnchor(): Find the appropriate target # jumpFile(): Jump to the beginning of an index page # logMiss(): Accumulate a file of missed searches # jumpToAnchor(): Jump to a specific keyword # Main #================================================================ # IMPORTS #---------------------------------------------------------------- from __future__ import generators # Allow generators import sys # Standard system module import re # Regular expressions import cgi # Common Gateway Interface module import os # Operating system functions import pwd # Password file functions #================================================================ # MANIFEST CONSTANTS #---------------------------------------------------------------- INDEX_BASE = "index" # Base name of all index files KEYWORD_FILE = "keywords" # Name of keyword file TARGET_NAME = "target" # Search term from the HTML_SUFFIX = ".html" # Web page suffix DEFAULT_URL = INDEX_BASE+HTML_SUFFIX # Starting page LOG_FILE = "misses.log" # Accumulates failed search targets #================================================================ # BLANKET PRECONDITIONS #---------------------------------------------------------------- # * KEYWORD_FILE is readable and contains the keywords of the # index pages in ascending order (case-insensitive). #---------------------------------------------------------------- #================================================================ # SPECIFICATION FUNCTIONS #---------------------------------------------------------------- # above-anchor(target, keyFile) == # if target > (all words in keyFile) -> # None # else -> # the first word >= target in keyFile, lowercased #---------------------------------------------------------------- # below-anchor(target, keyFile) == # if target < (all words in keyFile) -> # None # else -> # the word just before target in keyFile, lowercased #---------------------------------------------------------------- # bracketed-target ( target, prev, next ) == # if ( ( prev is None ) or # Rule 1 # ( target[0] != prev[0] ) ) -> # letter-file ( target[0] ) # else if ( ( next is not None ) and # ( next.startswith(target) ) ) -> # Rule 2 # letter-anchor ( next ) # else -> # letter-anchor ( prev ) # Rule 3 #-- # This defines the behavior of searches for words that aren't # in the keyword list. We need to know the word prev (or # None if there are no words prev) and the word next (or None). # There are three principal cases: # 1. If prev is None, or prev starts with a different letter # than target, go to the beginning of the file for # the initial letter of target. # # 2. If next is not None, and target matches next (or is # a substring of it), go to keyword (next). This is # considered a hit; the other cases are misses, and # the target will be written to the log-file. # # 3. Go to keyword (prev) so the user can see context both # before and after the point where the target would be. # # Here is a set of examples for all possible cases, where "..." # means "don't care": # prev target next Go to Rule # ---- ------ ---- ----- --- # None "bear" None _b 1 # None "bear" "box" _b 1 # None "bear" "cat" _b 1 # "ace" "bear" ... _b 1 # "ace" "ant" None _a#ace 3 # "ace" "ant" "axe" _a#ace 3 # "ace" "ant" "art" _a#ace 3 # "ace" "ant" "box" _a#ace 3 # "ace" "ant" "ant" _a#ant 2 # "ace" "ant" "antic" _a#antic 2 #---------------------------------------------------------------- # effective-target(target, keyFile) == # bracketed-target(target, prev-anchor(target, keyFile), # next-anchor(target, keyFile) #-- # This function defines the mapping of search targets to # URLs with anchors. The destination depends on the word # before the target (if any) and the word after (if any) #---------------------------------------------------------------- # letter-anchor(target) == # letter-file(target) + "#" + target #-- # The URL of a keyword is the URL of the page for that letter # of the alphabet, with the keyword appended as an anchor. #---------------------------------------------------------------- # letter-file(target) == # the file whose name is (INDEX_BASE + "_" + target[0] + # HTML_SUFFIX) #---------------------------------------------------------------- # log-file == a file named LOG_FILE #---------------------------------------------------------------- # miss-entry(s) == # if s is in keyword-file -> # a line containing s # else -> nothing #---------------------------------------------------------------- # - - - e r r o r P a g e - - - def errorPage ( *L ): """Generate an HTML error page and stop. [ L is a list of strings -> sys.stdout +:= an HTML error page displaying the concatenation of the elements of L stop execution ] """ #-- 1 -- # [ sys.stdout +:= headers indicating an HTML page ] print "Content-type: text/html" print #-- 2 -- # [ sys.stdout +:= a complete HTML page with a generic error # message and the elements of L, escaped for HTML ] print ( "\n" "\n" " Configuration error\n" "\n" "\n" "

This CGI script has been mis-configured.\n" "

Error: %s\n" "\n" "\n" % cgi.escape("".join(L)) ) #-- 3 -- # [ stop execution ] sys.exit(1) # - - - - - c l a s s A r g s - - - - - class Args: """Object to represent arguments from the

element Exports: Args(): [ if (we are executing under the HTML protocol) and (the arguments are valid) -> return a new Args object representing the arguments else -> sys.stdout +:= an HTML error page stop execution ] .target: [ the TARGET_NAME argument ] """ # - - - A r g s . _ _ i n i t _ _ - - - def __init__ ( self ): "Constructor for the Args object." #-- 1 -- # [ form := a cgi.FieldStorage() object representing the # form arguments ] form = cgi.FieldStorage() #-- 2 -- # [ if form has a TARGET_NAME argument -> # self.target := that argument as a string # else -> # sys.stdout +:= an HTML error page # stop execution ] try: self.target = form[TARGET_NAME].value except KeyError: errorPage ( "The `%s' argument is required." % TARGET_NAME ) # - - - p o i n t S e r v e r A t - - - def pointServerAt ( url ): """Redirect the web server to the given URL. [ url is a string -> sys.stdout +:= a "Location:" header redirecting to url stop execution ] """ #-- 1 -- # [ sys.stdout +:= a "Location:" header redirecting to url ] sys.stdout.write ( "Location: %s\n\n" % url ) #-- 2 -- # [ stop execution ] sys.exit(1) # - - - s e a r c h K e y w o r d s - - - def searchKeywords ( target ): """Find the target's position in the sequence of existing keywords. [ target is a string -> sys.stdout +:= a "Location:" header redirecting the server to effective-target(target, KEYWORD_FILE) log-file +:= miss-entry(target) ] """ #-- 1 -- # [ if KEYWORD_FILE can be opened for reading -> # keyList := lines from that file, without newlines, in order # else -> # sys.stdout +:= error page # stop execution ] keyList = readKeywordFile() #-- 2 -- # [ if keyList is in ascending order, case-insensitive -> # prev := below-anchor ( target, keyList ) # next := above-anchor ( target, keyList ) ] prev, next = matchKeyword ( target, keyList ) #-- 3 -- # [ sys.stdout +:= a "Location:" header redirecting the server # to bracketed-target(target, prev, next) # log-file +:= miss-entry ( target ) # stop execution ] bracketAnchor ( target, prev, next ) # - - - r e a d K e y w o r d F i l e - - - def readKeywordFile ( ): """Get the list of keywords used in the index. [ if KEYWORD_FILE can be opened for reading -> return a list containing lines from that file, without newlines, in order else -> sys.stdout +:= error page stop execution ] """ #-- 1 -- # [ if KEYWORD_FILE can be opened for reading -> # keyFile := that file, so opened # else -> # sys.stdout +:= error page # stop execution ] try: keyFile = open ( KEYWORD_FILE ) except IOError: errorPage ( "Couldn't open the keyword file." ) #-- 2 -- # [ keyList := list of lines from keyFile, without newlines, # in order ] keyList = [ s.rstrip() for s in keyFile ] keyFile.close() #-- 3 -- return keyList # - - - m a t c h K e y w o r d - - - def matchKeyword ( keyword, keyList ): """Check for an exact match in keyList of keyword. [ (keyword is a string) and (keyList is a list of strings in ascending order, case-insensitive) -> return (below-anchor(keyword, keyList), above-anchor(keyword, keyList) ] """ #-- 1 -- # [ if keyList is empty -> # return (prev, next) ] if len(keyList) == 0: return (None, None) #-- 2 -- past = None target = keyword.lower() #-- 3 -- # [ target is lowercased -> # if any entry in keyList is >= target, case-insensitive -> # past := the index of the first such entry # else -> # past := len ( keyList ) - 1 ] for past in range ( len ( keyList ) ): #-- 3 body -- # [ if keyList[past] > keyword, case-insensitive -> # break # else -> I ] if keyList[past].lower() >= keyword: break #-- 4 -- # [ if past==0 -> # prev := None # next := keyList[0], lowercased # else if past==len(keyList)-1 -> # prev := len(keyList)-1, lowercased # next := None # else -> # prev := keyList[past-1], lowercased # next := keyList[past], lowercased ] if past == 0: prev = None next = keyList[0].lower() elif past == ( len ( keyList ) -1 ): prev = keyList[-1].lower() next = None else: prev = keyList[past-1].lower() next = keyList[past].lower() #-- 5 -- return (prev, next) # - - - b r a c k e t A n c h o r - - - def bracketAnchor ( target, prev, next ): """Figure out where in the index to send a search for the target. [ (target is a lowercased string) and (prev is a lowercased keyword < target) and (next is a lowercased keyword > target) -> sys.stdout +:= a "Location:" header redirecting the server to bracketed-target(target, prev, next) log-file +:= miss-entry ( target ) stop execution ] """ #-- 1 -- # [ if (prev is None) or # (prev starts with a different letter than target) -> # sys.stdout +:= a "Location:" header redirecting the server # to letter-file ( target[0] ) # log-file +:= target + "\n" # stop execution # else -> I ] if ( ( prev is None ) or ( prev[0] != target[0] ) ): jumpFile ( target[0], target ) #-- 2 -- # [ if ( ( next is not None ) and # ( next starts with target ) ) -> # sys.stdout +:= a "Location:" header redirecting the server # to letter-anchor ( next ) # stop execution # else -> I ] if ( ( next is not None ) and ( next.startswith ( target ) ) ): jumpToAnchor ( next ) #-- 3 -- # [ log-file +:= target + "\n" ] logMiss ( target ) #-- 4 -- # [ sys.stdout +:= a "Location:" header redirecting the server # to letter-anchor ( prev ) # stop execution ] jumpToAnchor ( prev ) # - - - j u m p F i l e - - - def jumpFile ( letter, target ): """Jump to the beginning of the index page for a given letter. [ letter is a single lowercase letter -> sys.stdout +:= a "Location:" header redirecting the server to letter-file(letter) log-file +:= target + "\n" stop execution ] """ #-- 1 -- # [ log-file +:= target + "\n" ] logMiss ( target ) #-- 2 -- # [ url := the URL of the index page for (letter) ] url = "%s_%s%s" % ( INDEX_BASE, letter, HTML_SUFFIX ) #-- 3 -- # [ sys.stdout +:= a "Location:" header redirecting the server # to url ] pointServerAt ( url ) # - - - l o g M i s s - - - def logMiss ( target ): """Add the target to the cumulative file of missed searches. [ target is a string -> log-file +:= target + "\n" ] """ #-- 1 -- # [ LOG_FILE can be opened for append access -> # logFile := LOG_FILE, so opened ] logFile = open ( LOG_FILE, "a" ) #-- 2 -- # [ logFile +:= target + "\n" ] logFile.write ( "%s\n" % keyword ) #-- 3 -- logFile.close() # - - - j u m p T o A n c h o r - - - def jumpToAnchor ( keyword ): """Redirect the browser to the anchor for keyword [ keyword is a string in the KEYWORD_FILE, lowercased -> sys.stdout +:= a "Location:" header redirecting the server to the URL+location of keyword ] """ #-- 1 -- # [ url := (the URL of the page for the first letter of keyword) + # "#" + keyword ] url = ( "%s_%s%s#%s" % ( INDEX_BASE, keyword[0], HTML_SUFFIX, keyword ) ) #-- 2 -- # [ sys.stdout +:= a "Location:" header redirecting the # server to url ] pointServerAt ( url ) # - - - - - m a i n - - - - - #-- 1 -- # [ if (we are executing under the html protocol) and # (the arguments are valid) -> # args := a new Args object representing those arguments # else -> # sys.stdout +:= an HTML error page # stop execution ] args = Args() #-- 2 -- # [ if args.target starts with at least one letter -> # keyword := initial letters from args.target # else -> # sys.stdout +:= a "Location:" header redirecting to # DEFAULT_URL # stop execution ] m = re.match ( r'[a-zA-Z]+', args.target ) if m is None: pointServerAt ( DEFAULT_URL ) else: keyword = m.group() #-- 3 -- # [ sys.stdout +:= a "Location:" header redirecting the server # to effective-target(lowercased keyword, # KEYWORD_FILE) # log-file +:= miss-entry(lowercased keyword) ] searchKeywords ( keyword.lower() )