!   SRCCOM -- Source Compare Program
!   Compares two source files, prints a list of differences
!   Version 1.0 11/26/82 IDB
!
!   The program works by computing a one-byte "signature" for each
!   line of the two files, and then compares the signatures instead
!   of comparing the lines of the files themselves.  This has the major
!   advantage of requiring only one byte per line, instead of LineLength
!   bytes per line, at the price of a 1/2% probability of two lines
!   "matching" whose content is not identical.  By ensuring that some
!   LineCountMatchThreshold > 1 number of lines must match before
!   SRCCOM claims a block of lines has been moved, the probability of
!   a false "move" claims is .005^LineCountMatchThreshold; this probability
!   is very small.

    Dim Version$/"Source Compare V1.0 (C) 1982 Software Dynamics"/
    Dim RefFileLineSignatures$(4000),FileLineSignatures$(4000)
    Dim RefFileName$(50),FileName$(50),OutputFile$(50)
    Dim RefFile/1/,File/2/,Output/3/
    Dim LineCountMatchThreshold/3/
    Dim Line$(255)
    Dim LastMessagePrinted,None/0/,NewlyInserted/1/,Deleted/2/,Moved/3/

    If Col(0)=1
    Then
        ! Operator needs prompting.
        Print Version$
        Input "Name of reference file:                   " RefFileName$
        Open #RefFile,RefFileName$
        Input "Name of file to be compared:              " FileName$
        Open #File,FileName$
        Input "Place results where (default = console:)? " OutputFile$
        If Len(OutputFile$)=0
        Then Output=0 \ ! Use the console
        Else
            Create #Output,OutputFile$
            OutputFile$=" to " cat OutputFile$
        Fi
        Print "Next time, you can type:"
        Print "    .srccom ";RefFileName$;" with ";FileName$;OutputFile$
    Else
        ! Sophisticated user
        ! Command line syntax:
        !     SRCCOM RefFile WITH File [ TO OutputFile ]
        Input "" OutputFile$
        Print Version$
        Let OutputFile$=Uppercase$(OutputFile$)
        Let i=Find(OutputFile$," WITH ")
        If i=0 Then Error 102
        Let RefFileName$=OutputFile$[1,i-1]
        Let OutputFile$=Right$(OutputFile$,i+6)
        Open #RefFile,RefFileName$
        Let i=Find(OutputFile$," TO ")
        If i=0
        Then
            ! No TO file specified. Use console:
            Output=0
            Let FileName$=OutputFile$
            Open #File,FileName$
        Else
            ! TO file specified.
            Let FileName$=OutputFile$[1,i-1]
            Let OutputFile$=Right$(OutputFile$,i+4)
            Open #File,FileName$
            Create #Output,OutputFile$
        Fi
    Fi
    Print #Output,"Comparing ";FileName$;" against reference ";RefFileName$
!^L
!   Compute signatures on files
!
Subroutine ComputeSignatures(ComputeSignaturesChannel,...
&                            ComputeSignaturesBytes$)
    ! Subroutine to compute signatures of all line of a file
    Dim HistogramData(255)
    Position #ComputeSignaturesChannel,0 \ ! Rewind the file
    Let Len(ComputeSignaturesBytes$)=0 \ ! Reset line count to zero
    Repeat
        Input #ComputeSignaturesChannel,Line$
        ! What about input buffer overflow ??? I'm cheap, ignore for now
        If Eof(ComputeSignaturesChannel) Then ComputeHistogram
        If Len(ComputeSignaturesBytes$)=MaxLen(ComputeSignaturesBytes$)
        Then
            Print #Output,"Capacity exceeded; comparision truncated at";...
&                 Len(ComputeSignaturesBytes$);"lines."
            Goto ComputeHistogram
        Fi
        Let Signature=Len(Line$) \ ! Reset line signature
        ! Compute signature for line. Practically any hashing function will do.
        For i=1 to len(line$)
            Signature=((Signature**1) + (Signature>32767)) xor Line$(i)
!            Signature=Signature xor Line$(i)
        Next i
        If Signature=0 then Signature=1 \ ! 0 reserved as "accounted-for" mark
        Let Len(ComputeSignaturesBytes$)=Len(ComputeSignaturesBytes$)+1
        Let ComputeSignaturesBytes$[Len(ComputeSignaturesBytes$)]=Signature&:FF
    End
ComputeHistogram: ! Construct histogram of hash function distribution
!    For i=0 to len(HistogramData) do HistogramData[i]=0
!    For i=1 to len(ComputeSignaturesBytes$)
!        let i1=int( (ComputeSignaturesBytes$[i]/256)*(len(HistogramData)+1) )
!        Let HistogramData[i1]=HistogramData[i1]+1
!    Next i
!    MaxHistogramData=1 \ ! Pick small value that prevents division by zero
!    For i=0 to len(HistogramData)
!        If MaxHistogramData<HistogramData[i]
!        Then MaxHistogramData=HistogramData[i]
!    Next i
!    For i=0 to len(HistogramData)
!        Print Using "#### !",i*256/(len(HistogramData)+1);
!        For i0=1 to 70*HistogramData[i]/MaxHistogramData do print "*";
!        Print
!    Next i
    Exit Subroutine
End

    Print "Processing Reference File..."
    ComputeSignatures(RefFile,RefFileLineSignatures$)
    Print "Processing File to be compared..."
    ComputeSignatures(File,FileLineSignatures$)
!^L
!   Now compare the signature lists.
!   We want to know:
!      1) Where blocks in the file to be compared came from in the
!         reference file
!      2) What parts of the reference file are no longer in the compared-to
!         file (i.e., what has been deleted)
!      3) What new stuff has been added to the compared-to file.
!
!   We accomplish this by scanning the compared-to signature list,
!       looking for sequences of (greater than) LineCountMatchThreshold lines
!       that match sequences in reference file.
!   If we find such a sequence, we know the reference file position.
!       If the reference file position is the logical "next" place not
!       consumed in the reference file, then nothing has been moved.
!       If the reference file position found is past the logical next place,
!       then reference file text has been deleted.  If before the logical
!       next place, then reference file text has been moved.
!   If we cannot find such a sequence, then point where the unfindable sequence
!       begins is new stuff added to the compared-to file.
!
    ReferenceFileLine=1 \ ! Next logical place in Reference File
    Position #RefFile,0 \ ! Rewind to display deleted text, if any
    ComparedToFileLine=1 \ ! Next logical place in Compared-to file
    Position #File,0 \ ! Rewind to display newly inserted text, if any
    LastMessagePrinted=None \ ! ...about differences between texts

Subroutine FindComparedToText
    ! Sets ReferenceFileLocation to chunk of text ...
    ! matching that at ComparedToFileLine
    ! If found, set RefSegmentSize<>0, else set to zero
    !D! Print "FindComparedToText"
    For ComparedToFileLine=ComparedToFileLine to Len(FileLineSignatures$)
        If FileLineSignatures$(ComparedToFileLine)=0
        Then
            ! This line of text has already been claimed.
            ! It would be redundant to say anything.
            Input #File,Line$ \ ! skip over claimed line
        Else Exit ComparedToFileLine
    Next ComparedToFileLine
    ! This block of text not accounted-for yet
    If ComparedToFileLine>...
&      Len(FileLineSignatures$)-LineCountMatchThreshold+1 ...
&   Or Find(RefFileLineSignatures$,...
&           FileLineSignatures$[ComparedToFileLine,...
&                               LineCountMatchThreshold])=0
    Then RefSegmentSize=0
    Else
        ! Found start of segment from file under test in ref file
        ! Do binary search to determine segment location and size
        LowerBoundSegmentSize=LineCountMatchThreshold
        UpperBoundSegmentSize=...
&           Len(FileLineSignatures$)-ComparedToFileLine+1
        While LowerBoundSegmentSize<UpperBoundSegmentSize Do
            SegmentSize=...
&               Int((LowerBoundSegmentSize+UpperBoundSegmentSize+1)/2)
            If Find(RefFileLineSignatures$,...
&                   FileLineSignatures$[ComparedToFileLine,SegmentSize])
            Then
                ! Segment is at least as big as SegmentSize
                LowerBoundSegmentSize=SegmentSize
            Else
                ! Segment is not as big as SegmentSize
                UpperBoundSegmentSize=SegmentSize-1
            Fi
        End
        RefSegmentSize=LowerBoundSegmentSize \ ! = UpperBoundSegmentSize
        ReferenceFileLocation=...
&           Find(RefFileLineSignatures$,...
&                FileLineSignatures$[ComparedToFileLine,RefSegmentSize])
    Fi
    !D! Print "Found ComparedTo @";ReferenceFileLocation;Refsegmentsize
    Return Subroutine
End

Subroutine PrintInsertedText
    ! Can't find segment; must be newly inserted text.
    Print #Output
    Print #Output,"Inserted following text before reference line";...
&                 ReferenceFileLine
    Let LastMessagePrinted=NewlyInserted
    Repeat
        ! Print out the inserted lines
        ! Assert: FileLineSignatures$[ComparedToFileLine,...
        !     LineCountMatchThreshold] are all non-zero
        Let FileLineSignatures$[ComparedToFileLine]=0 \ ! mark "accounted"
        Input #File,Line$\Print #Output,"    | ";Line$
        ComparedToFileLine=ComparedToFileLine+1
    When ComparedToFileLine<=Len(FileLineSignatures$) ...
&        and FileLineSignatures$[ComparedToFileLine]>0 ...
&        and ( ComparedToFileLine>Len(FileLineSignatures$)...
&                                 -LineCountMatchThreshold+1 ...
&             or 0=Find(RefFileLineSignatures$,...
&                       FileLineSignatures$[ComparedToFileLine,...
&                                           LineCountMatchThreshold] )...
&            ) End
    Return Subroutine
End

Subroutine FindReferenceText
    ! Locates chunk of reference text starting at ReferenceFileLine..
    ! by looking in Compared-To file
    ! returns ComparedToFileLocation pointing to position.
    ! Sets ComparedToSegmentSize to length of segment found, =0 if not found.
    !D! Print "FindReferenceText"
    For ReferenceFileLine=ReferenceFileLine to Len(RefFileLineSignatures$)
        If RefFileLineSignatures$(ReferenceFileLine)=0
        Then
            ! This line of text has already been claimed.
            ! It would be redundant to say anything.
            Input #RefFile,Line$ \ ! skip over claimed line
        Else Exit ReferenceFileLine
    Next ReferenceFileLine
    ! This block of text not accounted-for yet
    If ReferenceFileLine>...
&      Len(RefFileLineSignatures$)-LineCountMatchThreshold+1 ...
&   Or Find(FileLineSignatures$,...
&           RefFileLineSignatures$[ReferenceFileLine,...
&                               LineCountMatchThreshold])=0
    Then ComparedToSegmentSize=0
    Else
        ! Found start of segment in Compared-to file
        ! Do binary search to determine size
        LowerBoundSegmentSize=LineCountMatchThreshold
        UpperBoundSegmentSize=Len(RefFileLineSignatures$)-ReferenceFileLine+1
        While LowerBoundSegmentSize<UpperBoundSegmentSize Do
            SegmentSize=...
&               Int((LowerBoundSegmentSize+UpperBoundSegmentSize+1)/2)
            If Find(FileLineSignatures$,...
&                   RefFileLineSignatures$[ReferenceFileLine,SegmentSize])
            Then
                ! Segment as least as big as SegmentSize
                LowerBoundSegmentSize=SegmentSize
            Else
                ! Segment is not as big as SegmentSize
                UpperBoundSegmentSize=SegmentSize-1
            Fi
        End
        ComparedToSegmentSize=LowerBoundSegmentSize
        ComparedToFileLocation=...
&           Find(FileLineSignatures$,...
&                RefFileLineSignatures$[ReferenceFileLine,...
&                                       ComparedToSegmentSize])
    Fi
    !D! Print "FindRefText @";ComparedToFileLocation;ComparedToSegmentSize
    Return Subroutine
End

Subroutine PrintDeletedLines
    ! Print out lines of Reference File that cannot be found in Compared-to
    Let LastMessagePrinted=Deleted
    Print #Output
    Print #Output,"Deleted lines starting at reference line";ReferenceFileLine
    Repeat
        ! Assert: RefFileLineSignatures$[ReferenceFileLine,...
        !     LineCountMatchThreshold] are all non-zero
        Let RefFileLineSignatures$[ReferenceFileLine]=0 \ ! mark "accounted"
        Input #RefFile,Line$
        Print #Output, Using "####| ",ReferenceFileLine;
        Print #Output, Line$
        Let ReferenceFileLine=ReferenceFileLine+1
    When ReferenceFileLine<=Len(RefFileLineSignatures$)...
&        and RefFileLineSignatures$[ReferenceFileLine]>0 ...
&        and ( ReferenceFileLine>Len(RefFileLineSignatures$)...
&                                -LineCountMatchThreshold+1 ...
&              or 0=Find(FileLineSignatures$,...
&                        RefFileLineSignatures$[ReferenceFileLine,...
&                                               LineCountMatchThreshold] )...
&            ) End
    Return Subroutine
End

Subroutine HandleMatchingText
    ! Reference file and Compared-to file are in sync right here
    ! We need make no comment
    For ReferenceFileLine=ReferenceFileLine...
&                         to ReferenceFileLine+RefSegmentSize-1
        Input #RefFile,Line$ \ ! skip lines
        RefFileLineSignatures$[ReferenceFileLine]=0 \ ! mark "accounted-for"
        ! The 0 prevents this line from being found again...
        ! (Note that Hash algorithm cannot produce zero hash value)
    Next ReferenceFileLine
    For ComparedToFileLine=ComparedToFileLine...
&                          to ComparedToFileLine+RefSegmentSize-1
        Input #File,Line$ \ ! keep in sync
        FileLineSignatures$[ComparedToFileLine]=0 \ ! mark "accounted-for"
    Next ComparedToFileLine
    Return Subroutine
End

Subroutine PrintReferenceTextMoved
    ! Print out text that has been moved, ...
    ! mark as "accounted-for", and skip past it in reference file
    LastMessagePrinted=Moved
    Print #Output
    Print #Output,"Reference lines";ReferenceFileLine;"thru";...
&                         ReferenceFileLine+ComparedToSegmentSize-1;...
&                 "have been moved to target lines";...
&                         ComparedToFileLocation;"thru";...
&                         ComparedToFileLocation+ComparedToSegmentSize-1
    For ReferenceFileLine=ReferenceFileLine...
&                          to ReferenceFileLine+ComparedToSegmentSize-1
        Input #RefFile,Line$
        Let RefFileLineSignatures$[ReferenceFileLine]=0 \ ! "accounted-for"
        Print #Output, Using "####| ",ReferenceFileLine;
        Print #Output, Line$
    Next ReferenceFileLine
    For i=ComparedToFileLocation ...
&         to ComparedToFileLocation+ComparedToSegmentSize-1
        Let FileLineSignatures$[i]=0 \ ! mark "accounted-for"
    Next i
    Return Subroutine
End

Subroutine PrintComparedTextMoved
    ! Print out text that has been moved, and mark as "accounted-for"
    LastMessagePrinted=Moved
    Print #Output
    Print #Output,"Reference lines";ReferenceFileLocation;"thru";...
&                         ReferenceFileLocation+RefSegmentSize-1;...
&                 "have been moved to target lines";...
&                         ComparedToFileLine;"thru";...
&                         ComparedToFileLine+RefSegmentSize-1
    For i=ReferenceFileLocation ...
&         to ReferenceFileLocation+RefSegmentSize-1
        Let RefFileLineSignatures$[i]=0 \ ! mark "accounted-for"
        Input #File,Line$
        Print #Output, Using "####| ",i;
        Print #Output, Line$
    Next i
    For ComparedToFileLine=ComparedToFileLine...
&                          To ComparedToFileLine+RefSegmentSize-1
        Let FileLineSignatures$[ComparedToFileLine]=0 \ ! mark "accounted-for"
    Next ComparedToFileLine
    Return Subroutine
End

State0: ! Know nothing about text at ReferenceFileLine
        ! Know nothing about text at ComparedToFileLine
        ! Determine some information
    FindComparedToText
    If RefSegmentSize=0
    Then
        If ComparedToFileLine<=Len(FileLineSignatures$)
        Then Call PrintInsertedText\Goto State0
        Else
            ! No more Compared-To file left.
            FindReferenceText
            If ReferenceFileLine<=Len(RefFileLineSignatures$)
            Then Call PrintDeletedLines \ ! Finish off Reference file
            ! Clean up and exit.
            Print #Output
            If LastMessagePrinted=None
            Then Print #Output,"*** Files are identical."
            Else Print #Output,"*** Source Comparision completed."
            Exit \ ! Let SDOS close all the files.
        Fi
    Fi

State1: ! We know that a chunk of text in Compared-to file...
        ! can be found in reference file.
        ! We don't know if chunk of text in Reference file can be found

    FindReferenceText \ ! Advance ReferenceFileLine past claimed lines
    If ReferenceFileLocation=ReferenceFileLine
    Then Call HandleMatchingText\Goto State0
    Else
        If ComparedToSegmentSize=0
        Then
            If ReferenceFileLine<=Len(RefFileLineSignatures$)
            Then Call PrintDeletedLines\Goto State0
            Else
                ! No more reference file left.
                Print "State1: Program Bug!"\Exit
            Fi
        Else
            ! Some block of text has been moved. Decide which.
            If ReferenceFileLocation>ReferenceFileLine
            Then
                If RefSegmentSize<=ComparedToSegmentSize
                Then
                    PrintComparedTextMoved
                    Goto State0 \ ! Because some of Ref text might be accounted-for, now
                Else
                    PrintReferenceTextMoved
                    Goto State0 \ ! Because some of Compared-to text might now be accounted
                Fi
            Else
                Print "State1: Program bug..."
                Print "ReferenceFileLine,ReferenceFileLocation,RefSegmentSize"
                Print ReferenceFileLine;ReferenceFileLocation;RefSegmentSize
                Print ...
&           "ComparedToFileLine,ComparedToFileLocation,ComparedToSegmentSize"
                Print ComparedToFileLine;ComparedToFileLocation;...
&                     ComparedToSegmentSize
                Exit
            Fi
        Fi
    Fi

END
