#!/usr/local/bin/python
#----------------------------------------------------------------
# index.cgi: CGI script to search for an index term.
# $Revision: 1.4 $ $Date: 2003/08/22 23:05:03 $
#----------------------------------------------------------------
# Works with the index structures generated by PyStyler's
# `stylindex' utility. This program was translated from an
# Icon script that had been working since 1996-11-12 until
# the Great RedHat Upgrade of 2003-08-02.
#----------------------------------------------------------------
INDEX_CGI_REVISION = "0.0" # External revision level
#================================================================
# Overall intended function:
# [ (this script is executed with the CGI protocol, receiving
# its arguments through the HTML form protocol) and
# (KEYWORD_FILE names a readable file) ->
# if there is a TARGET_NAME argument ->
# if TARGET_NAME argument is empty or its first nonblank
# character is not a letter ->
# sys.stdout +:= a "Location:" header redirecting
# the server to (DEFAULT_URL)
# else ->
# sys.stdout +:= a "Location:" header redirecting
# the server to the URL given by
# effective-target(the TARGET_NAME argument,
# contents of KEYWORD_FILE)
# log-file +:= miss-entry(the TARGET_NAME argument) ]
#================================================================
# CONTENTS
#----------------------------------------------------------------
# Overall intended function
# Imports
# Manifest constants
# Specification functions
# Functions and classes
# errorPage(): Display an HTML error page and stop.
# class Args: Digests HTML form arguments.
# pointServerAt(): Sends "Location:" to redirect to a URL
# searchKeywords(): Redirect according to the search target
# readKeywordFile(): Read the file of existing keywords
# matchKeyword(): Find the words adjacent to the target
# bracketAnchor(): Find the appropriate target
# jumpFile(): Jump to the beginning of an index page
# logMiss(): Accumulate a file of missed searches
# jumpToAnchor(): Jump to a specific keyword
# Main
#================================================================
# IMPORTS
#----------------------------------------------------------------
from __future__ import generators # Allow generators
import sys # Standard system module
import re # Regular expressions
import cgi # Common Gateway Interface module
import os # Operating system functions
import pwd # Password file functions
#================================================================
# MANIFEST CONSTANTS
#----------------------------------------------------------------
INDEX_BASE = "index" # Base name of all index files
KEYWORD_FILE = "keywords" # Name of keyword file
TARGET_NAME = "target" # Search term from the
HTML_SUFFIX = ".html" # Web page suffix
DEFAULT_URL = INDEX_BASE+HTML_SUFFIX # Starting page
LOG_FILE = "misses.log" # Accumulates failed search targets
#================================================================
# BLANKET PRECONDITIONS
#----------------------------------------------------------------
# * KEYWORD_FILE is readable and contains the keywords of the
# index pages in ascending order (case-insensitive).
#----------------------------------------------------------------
#================================================================
# SPECIFICATION FUNCTIONS
#----------------------------------------------------------------
# above-anchor(target, keyFile) ==
# if target > (all words in keyFile) ->
# None
# else ->
# the first word >= target in keyFile, lowercased
#----------------------------------------------------------------
# below-anchor(target, keyFile) ==
# if target < (all words in keyFile) ->
# None
# else ->
# the word just before target in keyFile, lowercased
#----------------------------------------------------------------
# bracketed-target ( target, prev, next ) ==
# if ( ( prev is None ) or # Rule 1
# ( target[0] != prev[0] ) ) ->
# letter-file ( target[0] )
# else if ( ( next is not None ) and
# ( next.startswith(target) ) ) -> # Rule 2
# letter-anchor ( next )
# else ->
# letter-anchor ( prev ) # Rule 3
#--
# This defines the behavior of searches for words that aren't
# in the keyword list. We need to know the word prev (or
# None if there are no words prev) and the word next (or None).
# There are three principal cases:
# 1. If prev is None, or prev starts with a different letter
# than target, go to the beginning of the file for
# the initial letter of target.
#
# 2. If next is not None, and target matches next (or is
# a substring of it), go to keyword (next). This is
# considered a hit; the other cases are misses, and
# the target will be written to the log-file.
#
# 3. Go to keyword (prev) so the user can see context both
# before and after the point where the target would be.
#
# Here is a set of examples for all possible cases, where "..."
# means "don't care":
# prev target next Go to Rule
# ---- ------ ---- ----- ---
# None "bear" None _b 1
# None "bear" "box" _b 1
# None "bear" "cat" _b 1
# "ace" "bear" ... _b 1
# "ace" "ant" None _a#ace 3
# "ace" "ant" "axe" _a#ace 3
# "ace" "ant" "art" _a#ace 3
# "ace" "ant" "box" _a#ace 3
# "ace" "ant" "ant" _a#ant 2
# "ace" "ant" "antic" _a#antic 2
#----------------------------------------------------------------
# effective-target(target, keyFile) ==
# bracketed-target(target, prev-anchor(target, keyFile),
# next-anchor(target, keyFile)
#--
# This function defines the mapping of search targets to
# URLs with anchors. The destination depends on the word
# before the target (if any) and the word after (if any)
#----------------------------------------------------------------
# letter-anchor(target) ==
# letter-file(target) + "#" + target
#--
# The URL of a keyword is the URL of the page for that letter
# of the alphabet, with the keyword appended as an anchor.
#----------------------------------------------------------------
# letter-file(target) ==
# the file whose name is (INDEX_BASE + "_" + target[0] +
# HTML_SUFFIX)
#----------------------------------------------------------------
# log-file == a file named LOG_FILE
#----------------------------------------------------------------
# miss-entry(s) ==
# if s is in keyword-file ->
# a line containing s
# else -> nothing
#----------------------------------------------------------------
# - - - e r r o r P a g e - - -
def errorPage ( *L ):
"""Generate an HTML error page and stop.
[ L is a list of strings ->
sys.stdout +:= an HTML error page displaying the
concatenation of the elements of L
stop execution ]
"""
#-- 1 --
# [ sys.stdout +:= headers indicating an HTML page ]
print "Content-type: text/html"
print
#-- 2 --
# [ sys.stdout +:= a complete HTML page with a generic error
# message and the elements of L, escaped for HTML ]
print (
"\n"
"\n"
" Configuration error\n"
"\n"
"\n"
" This CGI script has been mis-configured.\n"
"
Error: %s\n"
"\n"
"\n" %
cgi.escape("".join(L)) )
#-- 3 --
# [ stop execution ]
sys.exit(1)
# - - - - - c l a s s A r g s - - - - -
class Args:
"""Object to represent arguments from the