C File Name: Driver.for
C Include compute.lib as an additional library while linking

************************************************************************
*                                                                      *
* This is the driver program to do the Matrix Multiplication. The      *
* input matrices are initialized to random values here. The maximum    *
* number of threads to be spawned and the type of subsystem are also   *
* identified here.                                                     *
*                                                                      *
************************************************************************
      include 'flib.fi'
      Program Driver
      include 'flib.fd'
      integer*4 CONSOLE$, WIN32$, WIN16$
      parameter ( CONSOLE$ = 0 )    ! Console subsystem
      parameter ( WIN32$ = 1 )      ! Win32 subsystem
      parameter ( WIN16$ = 2 )      ! Win16 subsystem
      real*4 ranval
      integer*4 i, j, k, inThreadCount
      integer*4 A_Rows, A_Columns, B_Columns
      real*4 A[Allocatable](:,:), B[Allocatable](:,:), 
     +  C[Allocatable](:,:)

      A_Rows = 50          ! size of A array
      A_Columns = 100      ! size of B array
      B_Columns = 100      ! size of C array
      inThreadCount = 8    ! number of threads to be spawned

      Allocate (A(A_Rows, A_Columns), B(A_Columns, B_Columns),
     +    C(A_Rows, B_Columns) )
      Do  i = 1, A_Columns
          Do j = 1, A_Rows
             Call Random (ranval)
             A (j, i) = ranval
          End Do
          Do k = 1, B_Columns
             Call Random (ranval)
             B(i, k) = ranval
          End Do
      End Do
      
      Call Compute (A, B, C, A_Rows, A_Columns, B_Columns, 
     +       inThreadCount, CONSOLE$)
               
      End
######################################################################
C File Name: Compute.for
C Built as a DLL by compiling with the /LD switch

      include 'mt.fi'
      include 'flib.fi'
      include 'console.fi'

C Include contents of Subroutine Initiate from Listing 2 here.

************************************************************************
*                                                                      *
* MatMult is where the actual calculation of a row times a column is   *
* performed.  This is the thread procedure.                            *
*                                                                      *
************************************************************************
      Subroutine MatMult (CurrentThread)
      include 'common.inc'
      integer*4 CurrentThread
      automatic
      integer*2 wAttribute
      integer*4 cCharCells, lpcWritten
      record /COORD/ coordAttr
      integer*4 i, j, k

C Row and Column staggered as described in Listing 3

      Do i = CurrentThread, A_Rows,  MaxThreadCount
         Do j = (CurrentThread-1)*MaxThreadCount, 
     +           B_Columns + (CurrentThread-1)*MaxThreadCount - 1
            jj =  1 + mod(j, B_Columns)
            Do k = 1, A_Columns
               C(i, jj)  =  C(i, jj) + A(i, k) * B(k, jj)

            End Do
            ! Critical section begins
            If ((Do_Console.eq.CONSOLE$).or.(Do_Console.eq.WIN32$)) then
              Call EnterCriticalSection( loc(GlobalCriticalSection) )
                coordAttr.y = i + 1
                coordAttr.x = jj + 1
                wAttribute = (CurrentThread+1)*16
                cCharCells = 1
                If ( .not.FillConsoleOutputAttribute(hConsoleOut,
     +                                               wAttribute,
     +                                               cCharCells,
     +                                               coordAttr,
     +                                               lpcWritten) ) 
     +               Stop 'FillConsoleOutputAttribute failed'
              Call LeaveCriticalSection( loc(GlobalCriticalSection) )
            End If
            ! Critical section ends
         End Do
      End Do
      End ! MatMult

************************************************************************
*                                                                      *
* Compute does the actual computation by spawning threads. It calls    *
* the routines Initiate, SizeConsole, DrawFrame to set up the console  *
* window and then calls TerminateConsole to clean up and reset console *
* handles.                                                             *
*                                                                      *
************************************************************************     
      Subroutine Compute [dllexport]
     +                  (In_A, In_B, In_C, In_A_Rows, In_A_Columns, 
     +                   In_B_Columns, In_Thread_count, In_Do_Console)
      real In_A(In_A_Rows, In_A_Columns) 
      real In_B(In_A_Columns, In_B_Columns)
      real In_C(In_A_Rows, In_B_Columns)
      integer In_A_Rows, In_A_Columns, In_B_Columns
      integer In_Thread_count, In_Do_Console
      include 'common.inc'
      external MatMult
      integer*4 ThreadHandle [Allocatable](:), threadId
      integer*4 CurrentThread[Allocatable](:), count
      integer*4 waitResult
      integer*4 i, j
  
      Do_Console = In_Do_Console               
      Call Initiate (In_A, In_B, In_A_Rows, In_A_Columns, 
     +      In_B_Columns, In_Thread_count)
      
      If ((In_Do_Console.eq.CONSOLE$).or.(In_Do_Console.eq.WIN32$)) then
        Call InitConsole 
        Call SizeConsole
        Call DrawFrame
      End If

      Allocate (ThreadHandle(MaxThreadCount), 
     +      CurrentThread(MaxThreadCount) )
      
      Do count = 1, MaxThreadCount
        CurrentThread(count) = count
        ThreadHandle(count) = CreateThread( 0, 0, MatMult, 
     +           CurrentThread(count), 0, threadId) 
      End Do
C Can't wait on more than 64 threads 
      waitResult = WaitForMultipleObjects(MaxThreadCount, 
     +      ThreadHandle, .TRUE.,  WAIT_INFINITE)
c Transfer result from common back into return argument.
      Do i = 1, A_Rows
        Do j = 1, B_Columns
            In_C(i,j) = C(i,j)
            C(i, j) = 0.0
        End Do
      End Do

      If ((Do_Console.eq.CONSOLE$).or.(Do_Console.eq.WIN32$)) 
     +     Call TerminateConsole
      Deallocate ( ThreadHandle, CurrentThread )
      End ! Compute

************************************************************************
*                                                                      *
* InitConsole only gets called if the calling application was either   *
* a console application or a Win32 application.  It first checks to    *
* see it it was called from a console application.                     *
*                                                                      *
* If it was then there was already a console window and it creates     *
* a new output screen buffer so the original console window can be     *
* restored at termination.                                             *
*                                                                      *
* If it wasn't then it was called by a Windows application so it       *
* allocates a new console window and gets the its output handle.       *
*                                                                      *
* Then the input and output console operating system handles are       *
* converted to C runtime file handles using open_osfhandle.  Dup2 is   *
* then used to force association of the C stdin, stout, and sterr with *
* the handles to the console.  At this point standard runtime screen   *
* I/O will function correctly in the console window regardless of how  *
* the DLL was called.                                                  *             
*                                                                      *
* It also initializes the critical section used in the threads for     *
* synchronization in using the console handles.                        *
*                                                                      *
************************************************************************

      Subroutine InitConsole
      include 'common.inc'
      record /RTL_CRITICAL_SECTION_DEBUG/ AuxCriticalSection
      integer*4 cfin, cfout
      integer*4 iaccess
      
      GlobalCriticalSection.Address = loc(AuxCriticalSection)
      AuxCriticalSection.Address    = loc(GlobalCriticalSection)
      Call InitializeCriticalSection(loc(GlobalCriticalSection))
      iaccess = GENERIC_READ.or.GENERIC_WRITE
      If ( Do_Console.eq.CONSOLE$ ) then ! Already have a console
c Get original console output handle.
        hConsoleOld = CreateFile('CONOUT$'c,iaccess,3,0,3,0,0)
c Get new screen buffer so old contents can be preserved.                
        hConsoleOut = CreateConsoleScreenBuffer(
     +                          GENERIC_READ.or.GENERIC_WRITE,
     +                          FILE_SHARE_READ.or.FILE_SHARE_WRITE,
     +                          0, 
     +                          1, 
     +                          0)
c Set new screen buffer to be active buffer.
        If (.not.SetConsoleActiveScreenBuffer(hConsoleOut) ) 
     +    Stop 'SetConsoleActiveScreenBuffer failed'
      else  ! Just created console.  Need output handle.
        If (.not.AllocConsole() ) Stop 'AllocConsole failed'
        hConsoleOut = CreateFile('CONOUT$'c,iaccess,3,0,3,0,0)
        hConsoleOld = hConsoleOut
      End If
      hConsoleIn = CreateFile('CONIN$'c ,iaccess,3,0,3,0,0)
      cfin   = open_osfhandle(hConsoleIn,  #08)
      cfout  = open_osfhandle(hConsoleOut, #08)
      If (dup2(cfin, 0).eq.-1) Stop 'Dup2 on cfin failed'
      If (dup2(cfout,1).eq.-1) Stop 'Dup2 on cfout failed'
      If (dup2(cfout,2).eq.-1) Stop 'Dup2 on cfout failed'
      End ! InitConsole

************************************************************************
*                                                                      *
* TerminateConsole restores the runtime handles to the original console*
* handles if the application is a console exe. If the application is a *
* Windows exe, then it simply frees up the allocated console.          *
*                                                                      *
************************************************************************

      Subroutine TerminateConsole
      include 'common.inc'
      integer*4 lpcWritten, cfout
      character buf*40
      record /COORD/ coordCursor
c Write prompt to hit enter to continue.
      buf = 'Press Enter to Continue'
      coordCursor.y = 0
      coordCursor.x = max(0,(B_Columns - len_trim(buf))/2)
      If (.not.SetConsoleCursorPosition(hConsoleOut,
     +                                    coordCursor) ) 
     +    Stop 'SetConsoleCursorPosition failed'
        
      If (.not.WriteConsole(hConsoleOut, 
     +                        loc(buf),
     +                        len_trim(buf),
     +                        lpcWritten,
     +                        0) ) 
     +    Stop 'WriteConsole failed'
      read*  ! Wait until Enter is pressed then continue.
      If (Do_Console.eq.CONSOLE$) then ! Called from a console app.
c Reset runtime handles back.
        If (.not.SetConsoleActiveScreenBuffer(hConsoleOld) ) 
     +    Stop 'SetConsoleActiveScreenBuffer failed'
        cfout  = open_osfhandle(hConsoleOld, #08)
        If (dup2(cfout,1).eq.-1) Stop 'Dup2 on cfout failed'
        If (.not.CloseHandle(hConsoleOut) )
     +    Stop 'CloseHandle failed'
      else ! Called from a Windows application
        If (.not.FreeConsole() ) ! Free the console window
     +    Stop 'FreeConsole failed' 
      End If
      End ! TerminateConsole

************************************************************************
*                                                                      *
* SizeConsole calculates the size required to display the final        *
* matrix. It sets the console screen buffer to a large size and then   *
* sizes the window to the correct dimensions.  It then sets the screen *
* buffer back down to the required size.                               *
*                                                                      *
************************************************************************
      Subroutine SizeConsole
      include 'common.inc'
      record /COORD/ coordConsole
      record /CONSOLE_SCREEN_BUFFER_INFO/ csbi
      record /SMALL_RECT/ psrct
      integer*4 wsize_x, wsize_y

c Calculate window frame dimensions.
      wsize_x = B_Columns + 3
      wsize_y = A_Rows + 3
c Set screen buffer to a large value to get possible maximum dimensions.      
      coordConsole.x = 500
      coordConsole.y = 500
      If (.not.SetConsoleScreenBufferSize(hConsoleOut,
     +                                    coordConsole) ) 
     +    Stop 'SetConsoleScreenBufferSize failed'
c Get screen buffer information. If dimensions too big to display, fail.
      If (.not.GetConsoleScreenBufferInfo(hConsoleOut, csbi) ) 
     +    Stop 'GetConsoleScreenBufferInfo failed'
      If (B_Columns.gt.csbi.dwMaximumWindowSize.x - 5) then
          print*, 'Too many columns to display'
          print*, 'Maximum is ', csbi.dwMaximumWindowSize.x - 5
          Stop
      End If
      If (A_Rows.gt.csbi.dwMaximumWindowSize.y - 5) then
          print*, 'Too many rows to display'
          print*, 'Maximum is ', csbi.dwMaximumWindowSize.y - 5
          Stop
      End If
c Size screen buffer to maximum size, window to required size, then
c buffer back to desired size.
      coordConsole.x = csbi.dwMaximumWindowSize.x
      coordConsole.y = csbi.dwMaximumWindowSize.y
      If (.not.SetConsoleScreenBufferSize(hConsoleOut,
     +                                    coordConsole) ) 
     +    Stop 'SetConsoleScreenBufferSize failed'
c Set buffer and window back down to required size
      psrct.Top    = 0
      psrct.Left   = 0
      psrct.Right  = wsize_x
      psrct.Bottom = wsize_y
      coordConsole.x = wsize_x + 1
      coordConsole.y = wsize_y + 1
      If (.not.SetConsoleWindowInfo(  hConsoleOut,
     +                                .TRUE.,
     +                                psrct) ) 
     +    Stop 'SetConsoleWindowInfo failed'

      If (.not.SetConsoleScreenBufferSize(hConsoleOut,
     +                                    coordConsole) ) 
     +    Stop 'SetConsoleScreenBufferSize failed'
      End ! SizeConsole

************************************************************************
*                                                                      *
* DrawFrame simply calculates and draws the frame that will contain    *
* the matrix.                                                          *
*                                                                      *
************************************************************************
      Subroutine DrawFrame
      include 'common.inc'
      record /COORD/ coordStart
      integer*1 chFillChar
      integer*4 lpcWritten, cCharCells
      integer*4 frame_x, frame_y

c Draw horizontal display frame.
      chFillChar = #CD
      frame_x = B_Columns + 2
      frame_y = A_Rows + 2
      cCharCells = frame_x - 1
      coordStart.x = 1
      coordStart.y = 1
      If (.not.FillConsoleOutputCharacter(hConsoleOut, 
     +                                    chFillChar, 
     +                                    cCharCells, 
     +                                    coordStart, 
     +                                    lpcWritten) )
     +    Stop 'FillConsoleOutputCharacter failed'
      coordStart.x = 1
      coordStart.y = frame_y
      If (.not.FillConsoleOutputCharacter(hConsoleOut, 
     +                                    chFillChar, 
     +                                    cCharCells, 
     +                                    coordStart, 
     +                                    lpcWritten) )
     +    Stop 'FillConsoleOutputCharacter failed'
c Draw vertical display frame with corners.
      cCharCells = 1
      chFillChar = #C9
      Do i = 1, frame_y
        coordStart.x = 1
        coordStart.y = i
        If (i.eq.frame_y) chFillChar = #C8
        If (.not.FillConsoleOutputCharacter(hConsoleOut, 
     +                                      chFillChar, 
     +                                      cCharCells, 
     +                                      coordStart, 
     +                                      lpcWritten) )
     +    Stop 'FillConsoleOutputCharacter failed'
        coordStart.x = frame_x
        If (i.eq.1) then
          chFillChar = #BB
        else
          If (i.eq.frame_y) chFillChar = #BC
        End If
        If (.not.FillConsoleOutputCharacter(hConsoleOut, 
     +                                      chFillChar, 
     +                                      cCharCells, 
     +                                      coordStart, 
     +                                      lpcWritten) )
     +    Stop 'FillConsoleOutputCharacter failed'
        chFillChar = #BA
      End Do
      End ! DrawFrame
######################################################################
C File Name: Common.inc
C Contents of common block and corresponding declarations

      include 'mt.fd'      ! Data declarations for Multithreading API
      include 'flib.fd'    ! Data declarations for runtime library
      include 'console.fd' ! Data declarations for Console API
      integer*4 CONSOLE$, WIN32$, WIN16$
      parameter ( CONSOLE$ = 0 )    ! Console subsystem
      parameter ( WIN32$ = 1 )      ! Win32 subsystem
      parameter ( WIN16$ = 2 )      ! Win16 subsystem
      real*4 A, B, C    ! Input Matrices A & B and Output Matrix C
      integer*4 A_Rows, A_Columns, B_Columns  ! Matrix Dimensions
      integer*4 MaxThreadCount  ! Maximum numner of Threads
      integer*4 Do_Console   ! To identify the subsystem
      integer*4 hConsoleOut,hConsoleIn, hConsoleOld ! Console IO handles
      record /RTL_CRITICAL_SECTION/ GlobalCriticalSection ! CS object
      common  MaxThreadCount,        ! common block
     +        Do_Console,
     +        hConsoleOut,
     +        hConsoleIn,
     +        hConsoleOld,
     +        GlobalCriticalSection, 
     +        A_Rows,                ! Rows in A = Rows in C
     +        A_Columns,             ! Columns in A  = Rows in B
     +        B_Columns,             ! Columns in B = Columns in C
     +        A(1000, 1000),
     +        B(1000, 1000),
     +        C(1000, 1000)          ! Maximum Array size is 1000 X 1000
