        Dim Version$/"TYPEINDEX V1.0"/
!    TYPEINDEX.BAS
!       This program is helpful when building an index for a .TYP document
!       the first time.
!       The program has three modes:
!           1) Extraction of an initial word list, which simply
!              produces a list of all the words in the file
!           2) Insertion of .IX <word> commands into the .TYP file,
!              using a list of words to be indexed.
!           3) Construction of raw index from /I output of TYPE
!
!       Extract mode does the following:
!              Reads specified source file
!              Rejects all lines that start with "."
!              Breaks rest of lines up into WORDS, i.e., sequences
!              separated by blanks or commas
!              Drops trailing ',' or '.' and ')'.  Elides leading '('.
!              Words are stored in a database so duplicates may be
!              removed.  After the entire source has been processed,
!              the database is dumped to an alphabetically sorted
!              output file.
!              Should drop numbers and all duplicates.
!              's and " should also be removed.
!
!       Index mode does the following:
!              Read in file of index terms
!              Scan .TYP file for words which are index terms, or
!              whose prefix is an index term.
!              For each index term found, output all .TYP file text
!              from last index term to this index term, then
!              output <CR>.IX indexterm<CR>
!              For words which have a prefix equal to an index term,
!              ask if index reference should be output
!
!       Raw index construction does the following:
!              Reads the .IDX file produced by TYPE using the /I option
!              For each index term, it constructs a list of page references
!              After .IDX file is exhausted, outputs list of
!              index terms in alphabetical order; each term is followed
!              by a comma-seperated list of page references, with
!              duplicate page references deleted.
!              Some postprocessing of the index will be necessary.
     Dim Line$(250), LineToShred$(250), Word$(100), Temp$[100]
     Dim ListOfReferences$[100](100),WordPrefix$(100)
     Dim SourceFile/1/,IndexFile/2/,OutputFile/3/
     Dim DeleteablePrefixes$/" ","(","[","'",'"'/
     Dim DeleteableSuffixes$/".",",",")","]",":",";","'",'"'/
     INCLUDE "KEY.BAS"
     Print Version$
     Print "This program performs three functions:"
     Print "EXTRACT: Construct a list of words contained in a .TYP file."
     Print "INDEX: Given list of index words, insert .IX commands for"
     Print "       those words into a .TYP file."
     Print "FORMAT: Given the /I output of TYPE, construct the actual"
     Print "        index for a document in .TYP format."
     Input "EXTRACT words, INDEX a file, or FORMAT an index? " Line$
     Let Line$=UpperCase$(Line$)
     If Line$="INDEX" then IndexTheFile
     If Line$="FORMAT" then ConstructRawIndex
!^L
Subroutine OutputSourceLine
    If OutputSource
    Then Print #OutputFile,Line$
    Return Subroutine
End

Subroutine ReadNextWord
    ! Returns with WORD$ set to next word from file
    ! Outputs all stuff from file up to WORD$ to output file
    ! Sets up WORD$ to be sent to output file
PickWordFromFrontOfLine: ! pick off the first word on the line
300  If LineToShred$="" Then GetNextLineToGetNextWord
     If Find(DeleteablePrefixes$,LineToShred$(1,1))
     Then
         LineToShred$=Right$(LineToShred$,2)
         Goto PickWordFromFrontOfLine
     Fi
     i=Find(LineToShred$," ")
     If i=0
     Then Word$=LineToShred$\LineToShred$=""
     Else Word$=LineToShred$(1,i-1)\LineToShred$=Right$(LineToShred$,i+1)
TruncateWord:
     If Word$="" Then PickWordFromFrontOfLine
400  If Find(DeleteableSuffixes$,Word$(Len(Word$),1))
     Then Len(Word$)=Len(Word$)-1 \ Goto TruncateWord

     ! Check for first word of sentence
     If Len(Word$)>=2 ...
&       and Find("ABCDEFGHIJKLMNOPQRSTUVWXYZ",Word$(1,1)) ...
&       and Find("abcdefghijklmnopqrstuvwxyz",Word$(2,1))
     Then Word$(1,1)=LowerCase$(Word$(1,1))
     Return Subroutine

GetNextLineToGetNextWord:
100  Call OutputSourceLine
     Input #SourceFile,Line$
     If Eof(SourceFile) Then Return Subroutine
     If Line$=""
     Then Call OutputSourceLine \ ! Empty line
     If Line$(1,1)="."
     Then
         ! Must be TYPE command
         Call OutputSourceLine
         If find(line$,".im ")=1 or find(line$,".IM" )=1
         Then
             ! Handle Image mode by skipping it
             If error when i=val(right$(line$,4)) then i=0 \! ignore conv err
             For linecount=1 to i
                 Input #SourceFile,Line$ \ ! Eat image mode text
                 Call OutputSourceLine
             Next LineCount
         Fi
         Goto GetNextLineToGetNextWord \ ! Ignore TYPE command
     Fi
     LineToShred$=Line$ \ ! So we can output Line$ intact later
ReplaceTabByBlanks: ...
&    Repeat
200      i=Find(LineToShred$,chr$(:9))
         If i=0 then Exit ReplaceTabByBlanks
         LineToShred$(i)=:20 \ ! Replace tab by blank
     End
ReplaceCommaByBlanks: ...
&    Repeat
220      i=Find(LineToShred$,",")
         If i=0 then Exit ReplaceCommaByBlanks
         LineToShred$(i)=:20 \ ! Replace comma by blank
     End
     Goto PickWordFromFrontOfLine
End
!^L
ExtractWordsFromFile: ! Break file into constituent words
     Input "File to break into words: " Line$
     Open #SourceFile,Line$
     Input "File to place resulting words into: " Line$
     Create #OutputFile,Line$

     Create #IndexFile,"typeindex.tmp"
     CALL KEYINIT(IndexFile, 1, 50, 32)
     WordCount=0 \ WordSavedCount=0
     Line$="" \ ! To start off correctly
     LineToShred$="" \ ! To start off correctly
     OutputSource=FALSE \ ! don't output source lines
ExtractWordsLoop: ...
&    Repeat
         ReadNextWord
         If Eof(1) then ExtractWordsDone
         WordCount=WordCount+1
         IF ERROR WHEN
             WordOccurrenceCount=KEY(IndexFile,1,Word$)
         THEN
             If Err=1075
             Then
                 ! New word, record it with reference count of 1
                 CALL KEYINSERT(IndexFile,1,Word$,1)
                 WordSavedCount=WordSavedCount+1
                 Print Word$ \ ! Show we just stored a word
             Else ERROR Fi
         Else
             Print "!"; \ ! Key already exists
             Temp=KeyReplace(IndexFile,1,Word$,WordOccurrenceCount+1)
         Fi
     End

ExtractWordsDone: ! Now we have list of words
     ! Print them all out
     LET WORD$=""
ExtractWordsOutputLoop:
     IF ERROR WHEN
        WordOccurrenceCount=KEYNEXT(IndexFile,1,WORD$)
     THEN
         Print "****";WordSavedCount;" Stored Words out of"; WordCount; "****"
         EXIT
     FI
     PRINT #OutputFile,WORD$,Numf$(".###",WordOccurrenceCount/WordSavedCount)
     GOTO ExtractWordsOutputLoop
!^L
IndexTheFile: ! install .IX commands in .TYP file according to list of words
     Print "This feature currently not operative."
     Repeat
         ReadNextWord
         If Eof(1) then Exit

         ! Now we must find the longest prefix of WORD$ that is an index word
         For prefixlength=len(Word$) to 1 step -1
             Let WordPrefix$=Word$[1,prefixlength]
         Next prefixlength

         Let i=len(Word$) \ ! Find the longest prefix

         Word$(i)=Word$(i)-1 \ ! This works for all printable characters

         IF ERROR WHEN IgnoreMe=KEYNEXT(IndexFile,1,Word$,0)
         THEN IF ERR=1076 Then Print "!"; Else ERROR Fi
         Else
             WordSavedCount=WordSavedCount+1
             Print Word$ \ ! Show we just stored a word
         Fi
     End
!^L
ConstructRawIndex:
!              Reads the .IDX file produced by TYPE using the /I option
!              For each index term, it constructs a list of page references,
!              deleting duplicate references as it goes.
!              After .IDX file is exhausted, outputs list of
!              index terms in alphabetical order; each term is followed
!              by a comma-seperated list of page references.
!              The index term is used as a key, a references a chain
!              of page "numbers".  The format of each chained reference
!              is: Pointer to next ref, len(thisref), "thisref" (as a string)
!
     Input "Name of /I mode TYPE output file? " Line$
     Open #SourceFile,Line$
     Input "Name of place to put Raw Index? " Line$
     Input "Width of index column in characters? " IndexWidth
     If IndexWidth<10
     Then
         Print "Index width too small, using width of 10 instead."
         IndexWidth=10
     Fi
     Create #OutputFile,Line$

if 0
 then
     Open #IndexFile,"indexing.tmp" \ ! skip processing time to get to bug
     goto InitializeOutputFile
fi
     Create #IndexFile,"indexing.tmp"\CALL KEYINIT(IndexFile, 1, 50, 32)
!^L
ReadIndexTerms: ...
&    Repeat
           Input #SourceFile,Word$ \ ! Index term name
           If Eof(SourceFile) Then Exit ReadIndexTerms
           Input #SourceFile,Line$ \ ! Page on which index term was found
           If Eof(SourceFile)
           Then
               Print "?? /I mode file malformed at end!"
               Exit ReadIndexTerms
           Fi
           If Error when ChainHead=Key(IndexFile,1,Word$)
           Then
               If Err=1075
               Then
                   Print "New Index term: "; Word$ ;" @ "; Line$
                   ! Make space for new element and save in file
                   NewElement=GetSpace(IndexFile,6+6+len(Line$))
                   Write #IndexFile@NewElement,0,len(Line$),Line$
                   ! Record index term
                   KeyInsert(IndexFile,1,Word$,NewElement)
               Else Error
           Else
               ! Index term already present.
               ! Determine if this reference is a duplicate
               Read #IndexFile@ChainHead,...
&                   NextElement,Len(Temp$),Temp$[1,Len(Temp$)]
               If Temp$<>Line$
               Then
                   ! New page reference. Record it
                   ! Make space for new element and save in file
                   NewElement=GetSpace(IndexFile,6+6+len(Line$))
                   Write #IndexFile@NewElement,ChainHead,len(Line$),Line$
                   IgnoreMe=KeyReplace(IndexFile,1,Word$,NewElement)
               Else
                   Print "Duplicate page reference ignored."
               Fi
          Fi
     End
!^L
InitializeOutputFile:
     Print #OutputFile,".NF" \ ! Output TYPE commands to space index nicely
     Print #OutputFile,".NJ"
     Let LinesOnThisPage=0 \ ! Number of lines printed on this page

     Let Word$="" \ ! Set up to scan thru index terms alphabetically
OutputRawIndex: ...
&    Repeat
       ! Output next index term
       IF ERROR WHEN NextElement=KeyNext(IndexFile,1,Word$)
       THEN Print "Construction complete"\Exit
       Print #OutputFile,".im 1"
       For i=len(Word$) to 1 step -1 until Word$(i)<>0 do ! chop nulls
       len(Word$)=i
       Temp$=Word$ \ ! Break up index term into chunks shorter than IndexWidth
       ! Temp$ holds the remainder of Word$ after breaking off leading chunks
!D!    Print len(Word$);">";Word$;"<"
!D!    For i=1 to len(Word$) do print hex$(Word$(i)); end\ Print
       Let Line$="" \ ! First line of index term is not indented
       While len(Temp$)+len(Line$)>IndexWidth Do
            BlankPosition=0 \ ! Assume no blank can be found
            Let RemainingSpaceOnLine=IndexWidth-Len(Line$)
            For i=1 to RemainingSpaceOnLine
                If i>len(Temp$) then exit i \ ! With BlankPosition=0
5000            If Temp$[i,1]=" " then BlankPosition=i
            Next i
            If BlankPosition=0
            Then
                Print Word$
                Print "*** index term too wide, broken arbitrarily"
!D!             Print ">>";Line$;"<<";Indexwidth\Print ">>";Temp$;"<<"
1000            Print #OutputFile,Line$;Temp$[1,RemainingSpaceOnLine]
2000            Let Temp$=Right$(Temp$,RemainingSpaceOnLine+1)
            Else
3000            Print #OutputFile,Line$;Temp$[1,BlankPosition]
4000            Let Temp$=Right$(Temp$,BlankPosition+1)
            Fi
            Let LinesOnThisPage=LinesOnThisPage+1
            Line$="   " \ ! Make all but 1st line of index term indented
            ! Note: IndexWidth must be at least 1+maxlen(line$), 10 is reasonable
       End
6000   PRINT #OutputFile,Line$;Temp$;" ";

       ReferenceCount=0 \ ! # References to index term
       Repeat
           ! Collect list of references
           ReferenceCount=ReferenceCount+1 \ ! bump # references to index term
           Read #IndexFile@NextElement,...
&               NextElement,Len(Temp$),Temp$[1,Len(Temp$)]
           Let ListOfReferences$[ReferenceCount]=Temp$
       Unless NextElement=0 End
       ! Now output references in forward order
       Let Line$="" \ ! references collect so far
       While ReferenceCount>0 Do
          If Col(OutputFile)+Len(Line$)+...
&            len(ListOfReferences$[ReferenceCount]) > IndexWidth
          Then
              Print #OutputFile,tab(IndexWidth-len(Line$)+1);Line$
              Let LinesOnThisPage=LinesOnThisPage+1
              Print #OutputFile,"   ";
              Line$=ListOfReferences$[ReferenceCount]
          Else
              If Line$=""
              Then Line$=ListOfReferences$[ReferenceCount]
              Else Line$=Line$ cat "," cat ListOfReferences$[ReferenceCount]
          Fi
          ReferenceCount=ReferenceCount-1
       End
       If Len(Line$)>0
       Then
           Print #OutputFile,tab(IndexWidth-len(Line$)+1);Line$
           Let LinesOnThisPage=LinesOnThisPage+1
       Fi
       If LinesOnThisPage>50
       Then
           Print #OutputFile,".pa"
           Let LinesOnThisPage=0
       Fi
    End
End
