         INCLUDE    SCANNER.680C

         ORG   $3000
PARSER  JSR     SCANINIT        INITIALIZE SCANNER
        LDAB    #SIFILENAMELEN
        LDX     #SIFILENAME
        JSR     SCOPENSI        OPEN SI FILE
        LDAB    #LOFILENAMELEN
        LDX     #LOFILENAME
        JSR     SCCREATELO      CREATE THE LISTING FILE
LOOP    JSR     SCANTOKEN
        CMPA    #EOFTOKEN
        BNE     LOOP
        JSR     SCCLOSESI
        BRA     *
*
SIFILENAME      FCC     /SCANNER.TST/
SIFILENAMELEN   EQU     *-SIFILENAME
*
LOFILENAME      FCC     /CONSOLE:/
LOFILENAMELEN   EQU     *-LOFILENAME
*
        FIN     SCAFFOLD
*        "C" TOKEN SCANNER
*        Works with Software Dynamics Token Scanner
*
*        "C" lexeme syntax, derived from "C Reference Manual" Dennis M. Ritchie
*        Also, "The C Programming Language", 1978 by BW Kernighan & DM Ritchie
*        7/16/82 I.B.

*        A C comment has the form:  /* ...text... */
*        A C comment can cross newline boundaries.
*        Comments can appear anywhere that blanks, tabs or newlines can appear
*        and are handled completely by lexical analysis

*        Compiler control "Lines" (i.e., lines starting with '#' in column 1)
*        are also handled by the lexical analyzer
*        and consist of the following:
*              #DEFINE identifier token-string
*              #DEFINE identifier ( identifier , ... , identifier ) token-string
*              #UNDEF identifier
*              #INCLUDE "filename"
*              #INCLUDE <filename>
*              #IF constant-expression
*              #IFDEF identifier
*              #IFNDEF identifier
*              #ELSE
*              #ENDIF
*              #LINE constant identifier

*        TOKEN DEFINITIONS:
tkn:eof equ 0                          ; end-of-file (right anchor)
tkn:error equ 1                        ; error token
tkn:integerconstant equ 2
tkn:longconstant equ 3
tkn:characterconstant equ 4
tkn:floatingconstant equ 5
tkn:string equ 6
tkn:labelidentifier equ 7
tkn:functionidentifier equ 8
tkn:objectidentifier equ 9
tkn:arrayidentifier equ $A
tkn:identifier equ $B                  ; undefined identifier
tkn:structureidentifier equ $C
tkn:unionidentifier equ $D
tkn:scalaridentifier equ $E
tkn:fieldidentifier equ $F
tkn:typeidentifier equ $10

*    ';' = :3B ;
*    '(' = :28 ;
*    ')' = :29 ;
*    ',' = :2C ;
*    '{' = :7B ;
*    '}' = :7D ;
*    '[' = :5B ;
*    ']' = :5D ;
*    '*' = :2A ;
*    ':' = :3A ;
*    '=' = :3D ;
*    '+' = :2B ;
*    '-' = :2D ;
*    '/' = :2F ;
*    '%' = :25 ;
*    '>' = :3E ;
*    '<' = :3C ;
*    '&' = :26 ;
*    '^' = :5E ;
*    '|' = :7C ;
*    '?' = :3F ;
*    '!' = :21 ;
*    '~' = :7E ;
*    '.' = :2E ;

* Keywords

*    'asm' = :41 ;
*    'auto' = :42 ;
*    'break' = :43 ;
*    'byte' = :44 ;
*    'char' = :45 ;
*    'case' = :46 ;
*    'continue' = :47 ;
*    'default' = :48 ;
*    'do' = :49 ;
*    'double' = :4A ;
*    'else' = :4B ;
*    'entry' = :4C ;
*    'extern' = :4D ;
*    'float' = :4E ;
*    'for' = :4F ;
*    'goto' = :50 ;
*    'if' = :51 ;
*    'int' = :52 ;
*    'long' = :53 ;
*    'register' = :54 ;
*    'return' = :55 ;
*    'short' = :56 ;
*    'sizeof' = :57 ;
*    'static' = :58 ;
*    'struct' = :59 ;
*    'switch' = :5A ;
*    'typedef' = :61 ;
*    'union' = :62 ;
*    'unsigned' = :63 ;
*    'while' = :64 ;
*    '()' = :65 ;
         page
N:Way ; N-Way branch table for deblanking
         fdb   LexIllegalToken         ; :00 Null
         fdb   LexIllegalToken         ; :01 ^A
         fdb   LexIllegalToken         ; :02 ^B
         fdb   LexIllegalToken         ; :03 ^C
         fdb   LexIllegalToken         ; :04 ^D
         fdb   LexIllegalToken         ; :05 ^E
         fdb   LexIllegalToken         ; :06 ^F
         fdb   LexIllegalToken         ; :07 ^G
         fdb   LexIllegalToken         ; :08 ^H
         fdb   0                       ; :09 ^I   Tab is a blank
         fdb   LexIllegalToken         ; :0A ^J
         fdb   LexIllegalToken         ; :0B ^K
         fdb   0                       ; :0C ^L   Form-feed is a blank
         fdb   0                       ; :0D ^M   Carriage-return is a blank
         fdb   LexIllegalToken         ; :0E ^N
         fdb   LexIllegalToken         ; :0F ^O
         fdb   LexIllegalToken         ; :10 ^P
         fdb   LexIllegalToken         ; :11 ^Q
         fdb   LexIllegalToken         ; :12 ^R
         fdb   LexIllegalToken         ; :13 ^S
         fdb   LexIllegalToken         ; :14 ^T
         fdb   LexIllegalToken         ; :15 ^U
         fdb   LexIllegalToken         ; :16 ^V
         fdb   LexIllegalToken         ; :17 ^W
         fdb   LexIllegalToken         ; :18 ^X
         fdb   LexIllegalToken         ; :19 ^Y
         fdb   LexIllegalToken         ; :1A ^Z
         fdb   LexIllegalToken         ; :1B ^[
         fdb   LexIllegalToken         ; :1C ^\
         fdb   LexIllegalToken         ; :1D ^]
         fdb   LexIllegalToken         ; :1E ^^
         fdb   LexIllegalToken         ; :1F ^_

         fdb   0                       ; :20 space A space is definitely blank
         fdb   LexSelfToken            ; :21 '!'
         fdb   LexQuotedString         ; :22 '"'
         fdb   LexCompilerControl      ; :23 '#'
         fdb   LexIllegalToken         ; :24 '$'
         fdb   LexSelfToken            ; :25 '%'
         fdb   LexSelfToken            ; :26 '&'
         fdb   LexSquote               ; :27 ''''
         fdb   LexSelfToken            ; :28 '('
         fdb   LexSelfToken            ; :29 ')'
         fdb   LexSelfToken            ; :2A '*'
         fdb   LexSelfToken            ; :2B '+'
         fdb   LexSelfToken            ; :2C ','
         fdb   LexSelfToken            ; :2D '-'
         fdb   LexSelfToken            ; :2E '.'
         fdb   LexSelfToken            ; :2F '/'

         fdb   LexDigit                ; :30 '0'
         fdb   LexDigit                ; :31 '1'
         fdb   LexDigit                ; :32 '2'
         fdb   LexDigit                ; :33 '3'
         fdb   LexDigit                ; :34 '4'
         fdb   LexDigit                ; :35 '5'
         fdb   LexDigit                ; :36 '6'
         fdb   LexDigit                ; :37 '7'
         fdb   LexDigit                ; :38 '8'
         fdb   LexDigit                ; :39 '9'

         fdb   LexSelfToken            ; :3A ':'
         fdb   LexSelfToken            ; :3B ';'
         fdb   LexSelfToken            ; :3C '<'
         fdb   LexSelfToken            ; :3D '='
         fdb   LexSelfToken            ; :3E '>'
         fdb   LexSelfToken            ; :3F '?'
         fdb   LexIllegalToken         ; :40 '@'

         fdb   LexSymbolToken          ; :41 'A'
         fdb   LexSymbolToken          ; :42 'B'
         fdb   LexSymbolToken          ; :43 'C'
         fdb   LexSymbolToken          ; :44 'D'
         fdb   LexSymbolToken          ; :45 'E'
         fdb   LexSymbolToken          ; :46 'F'
         fdb   LexSymbolToken          ; :47 'G'
         fdb   LexSymbolToken          ; :48 'H'
         fdb   LexSymbolToken          ; :49 'I'
         fdb   LexSymbolToken          ; :4A 'J'
         fdb   LexSymbolToken          ; :4B 'K'
         fdb   LexSymbolToken          ; :4C 'L'
         fdb   LexSymbolToken          ; :4D 'M'
         fdb   LexSymbolToken          ; :4E 'N'
         fdb   LexSymbolToken          ; :4F 'O'
         fdb   LexSymbolToken          ; :50 'P'
         fdb   LexSymbolToken          ; :51 'Q'
         fdb   LexSymbolToken          ; :52 'R'
         fdb   LexSymbolToken          ; :53 'S'
         fdb   LexSymbolToken          ; :54 'T'
         fdb   LexSymbolToken          ; :55 'U'
         fdb   LexSymbolToken          ; :56 'V'
         fdb   LexSymbolToken          ; :57 'W'
         fdb   LexSymbolToken          ; :58 'X'
         fdb   LexSymbolToken          ; :59 'Y'
         fdb   LexSymbolToken          ; :5A 'Z'

         fdb   LexSelfToken            ; :5B '['
         fdb   LexIllegalToken         ; :5C '\'
         fdb   LexSelfToken            ; :5D ']'
         fdb   LexSelfToken            ; :5E '^'
         fdb   LexIllegalToken         ; :5F '_'
         fdb   LexIllegalToken         ; :60 '`'

         fdb   LexSymbolToken          ; :61 'a'
         fdb   LexSymbolToken          ; :62 'b'
         fdb   LexSymbolToken          ; :63 'c'
         fdb   LexSymbolToken          ; :64 'd'
         fdb   LexSymbolToken          ; :65 'e'
         fdb   LexSymbolToken          ; :66 'f'
         fdb   LexSymbolToken          ; :67 'g'
         fdb   LexSymbolToken          ; :68 'h'
         fdb   LexSymbolToken          ; :69 'i'
         fdb   LexSymbolToken          ; :6A 'j'
         fdb   LexSymbolToken          ; :6B 'k'
         fdb   LexSymbolToken          ; :6C 'l'
         fdb   LexSymbolToken          ; :6D 'm'
         fdb   LexSymbolToken          ; :6E 'm'
         fdb   LexSymbolToken          ; :6F 'o'
         fdb   LexSymbolToken          ; :70 'p'
         fdb   LexSymbolToken          ; :71 'q'
         fdb   LexSymbolToken          ; :72 'r'
         fdb   LexSymbolToken          ; :73 's'
         fdb   LexSymbolToken          ; :74 't'
         fdb   LexSymbolToken          ; :75 'u'
         fdb   LexSymbolToken          ; :76 'v'
         fdb   LexSymbolToken          ; :77 'w'
         fdb   LexSymbolToken          ; :78 'x'
         fdb   LexSymbolToken          ; :79 'y'
         fdb   LexSymbolToken          ; :7A 'z'

         fdb   LexSelfToken            ; :7B '{'
         fdb   LexSelfToken            ; :7C '|'
         fdb   LexSelfToken            ; :7D '}'
         fdb   LexSelfToken            ; :7E '~'
         fdb   LexIllegalToken         ; :7F rubout
         page
LexIllegalToken equ *-2 ; Illegal token encountered
         fdb   LexIllegalTokenSemantic
*        fdb   NoNextState

LexSelfToken equ *-2 ; handles all single character tokens
         fdb   LexSelfTokenSemantic
*        fdb   NoNextState

LexCompilerControl equ *-2 ; :23 '#' Start of Compiler Control line
         fdb   LexCompilerControlSemantic
*        fdb   NoNextState

LexQuotedString equ *-2 ; :22 '"' Start of quoted text string
         fdb   DummySemantic
         fdb   LexQuotedStringBody

LexQuotedStringBody ; handle characters of quoted string
         fcb   0                       --> character test
         fcc   '"'
         fdb   LexQuotedStringEndSemantic
         fdb   NoNextState             (end of token)
* This needs more work. Must handle ctl chars, rubout, null, \ddd, "", etc.
         fdb   Scn:GetChar             just collect another character
         fdb   LexQuotedStringBody

LexSquote equ *-2 ; :27 '''' Single quoted character --> funny C integer
         fdb   LexSquotedCharacterSemantic
         fdb   LexSquotedCharacterBody

LexSquotedCharacterBody
         fdb   LexSquotedCharacterCheck
*        fdb   DummySemantic
*        fdb   NoNextState
*        fdb   LexIllegalTokenSemantic
*        fdb   NoNextState

LexDigit equ *-2 ; :30-:39 Beginning of number
         fdb   LexFirstDigitSemantic
         fdb   LexCollectIntegerState

LexCollectIntegerState ; collect digits
         fdb   LexCheckDigit
         fdb   LexAnotherDigitSemantic
         fdb   LexCollectIntegerState
         fdb   LexEndIntegerSemantic
*        fdb   NoNextState

LexSymbolToken equ *-2 ; :41-:5F, :61-:6F 'A'-'Z' or 'a'-'z' Begin Symbol
         fdb   LexFirstSymbolCharacterSemantic
         fdb   LexCollectSymbolState

         page
LexIllegalTokenSemantic ; this token starts incorrectly or is malformed
         ins                           ; pop return address
         ins
         ldaa  #tkn:error              ; pick up illegal token code
DummySemantic ; do-nothing semantic routine
         rts

LexSelfTokenSemantic ; this character represents a token
         ins                           ; pop return address
         ins
         rts                           ; return with token code in (A)

LexQuotedStringEndSemantic ; found end of double-quoted string
         ins
         ins
         jsr   scn:storestring         ; save string body in dictionary
         ldaa  #tkn:string
         rts

LexSquotedCharacterCheck ; check for legal squoted token
         cmpa  #$20                    ; a printing character ?
         bls   LexIllegalTokenSemantic ; b/ no
         cmpa  #$7F                    ; (rubout isn't legal, either!
         beq   LexIllegalTokenSemantic
         ins                           ; pop return address
         ins
*        staa  integervalue
         ldaa  #tkn:integer            ; pick up token code
         rts

LexFirstDigitSemantic ; handle 1st digit
         clr   Integer                  ; zero the integer value
;        clr   Integer+1
         suba  #'0                     ; convert digit to binary
         staa  Integer+1
         rts

LexAnotherDigitSemantic ; handle another digit
         suba  #'0                     ; convert digit to binary
         psha                          ; save digit
         ldaa  Integer                 ; multiply Integer by 10
         ldab  Integer+1
         aslb
         rola                          ; * 2
         bcs   LexAnotherDigitOverflow
         aslb
         rola                          ; * 4
         bcs   LexAnotherDigitOverflow
         addb  Integer+1
         adca  Integer
         bcs   LexAnotherDigitOverflow
         aslb
         rola
         bcs   LexAnotherDigitOverflow
         tsx                           ; add in new digit
         addb  0,x
         adca  #0
         bcs   LexAnotherDigitOverflow
         ins                           ; pop digit
         rts

LexAnotherDigitOverflow ; too-large-integer encountered
         ins                           ; pop digit
         bra   LexIllegalSemantic

LexEndIntegerSemantic ; encountered non-digit
         ins                           ; pop return address
         ins
         jsr   scn:backup1             ; reject this character
         ldaa  #tkn:integer            ; get token code
         rts

         END   PARSER
