*===============================================================================
*
*	TEXAS INSTRUMENTS, INC.		
*
*	MAX
*
*	Revision Date:  04/08/97
*	
*	USAGE
*	
*		This routine is C Callable and can be called as:
*		
*		short max(short a[], int n)
*
*		a[] --- vector array 
*		n   --- number of elements of vector
*
*		If routine is not to be used as a C callable function then
*		you need to initialize values for all of the values passed
*		as these are assumed to be in registers as defined by the 
*		calling convention of the compiler, (refer to the C compiler
*		reference guide).
*
*	C CODE
*		This is the C equivalent of the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*		short max(short a[], int n)
*		{
*			int             i, max;
*			max = -32768;
*			for (i = 0; i < n; i++)
*				if (a[i] > max)
*					max = a[i];
*			return max;
*		}
*
*
*	DESCRIPTION
*
*		This routine finds the max value of a vector and returns
*		the value.  After finding a new max value, it uses 
*               multiply units to move value between registers
*		
*	TECHNIQUES
*
*		1.  The loop is unrolled six times
*	
*	ASSUMPTIONS
*
*         	1.  n is a multiple of 6
*               2.  Vector a[] should be aligned on word boundary
*		
*	MEMORY NOTE
*
*		No memory bank hits under any conditions.
*
*	CYCLES
*
*		n/2 + 13
*
*===============================================================================

	.global _max
	.text

_max:
		ADD	.L2X	2, A4, B4	; copy a
||		MVK	.S1	-32768, A5	; max[j] = -32768
||		MVK	.S2	-32768, B5	; max[j] = -32768
||		MV	.L1X	B3, A0		; move return address
||		SUB	.D2	B4, 6, B0	; i--
||              MV      .D1     A4, A7          ; copy a

		LDH	.D1	*A7++[2], A8	;** x[j] = a[i + j]
||		LDH	.D2	*B4++[2], B8	;** x[j] = a[i + j]

		MVK	.S2	-32768, B6	; max[j] = -32768
||		MVK	.S1	-32768, A6	; max[j] = -32768
||		LDH	.D1	*A7++[2], A9	;** x[j] = a[i + j]
||		LDH	.D2	*B4++[2], B9	;** x[j] = a[i + j]

		LDH	.D1	*A7++[2], A3	;** x[j] = a[i + j]
||		LDH	.D2	*B4++[2], B3	;** x[j] = a[i + j]

	[B0]	B	.S1	LOOP		; for
||	[B0]	SUB		B0, 6, B0	; i--
||		LDH	.D1	*A7++[2], A8	;** x[j] = a[i + j]
||		LDH	.D2	*B4++[2], B8	;** x[j] = a[i + j]

		LDH	.D1	*A7++[2], A9	;** x[j] = a[i + j]
||		LDH	.D2	*B4++[2], B9	;** x[j] = a[i + j]
||		MVK	.S1	-32768, A4	; max[j] = -32768
||		MVK	.S2	-32768, B7	; max[j] = -32768

		CMPLT	.L1	A5, A8,	A1	;* t[j] = max[j] < x[j]
||		CMPLT	.L2	B5, B8,	B1	;* t[j] = max[j] < x[j]
||		LDH	.D1	*A7++[2], A3	;** x[j] = a[i + j]
||		LDH	.D2	*B4++[2], B3	;** x[j] = a[i + j
LOOP:
	[B0]	B	.S1	LOOP		; for
||	[B0]	SUB		B0, 6, B0	; i--
||		CMPLT	.L1	A6, A9,	A2	; t[j] = max[j] < x[j]
||		CMPLT	.L2	B6, B9,	B2	; t[j] = max[j] < x[j]
||	[A1]	MPY	.M1	1, A8, A5	; if (t[j]) max[j] = x[j]
||	[B1]	MPY	.M2	1, B8, B5	; if (t[j]) max[j] = x[j]
||		LDH	.D1	*A7++[2], A8	;** x[j] = a[i + j]
||		LDH	.D2	*B4++[2], B8	;** x[j] = a[i + j]

		CMPLT	.L1	A4, A3,	A2	; t[j] = max[j] < x[j]
||		CMPLT	.L2	B7, B3,	B2	; t[j] = max[j] < x[j]
||	[A2]	MPY	.M1	 1, A9, A6	; if (t[j]) max[j] = x[j]
||	[B2]	MPY	.M2	 1, B9, B6	; if (t[j]) max[j] = x[j]
||		LDH	.D1	*A7++[2], A9	;** x[j] = a[i + j]
||		LDH	.D2	*B4++[2], B9	;** x[j] = a[i + j]

	[A2]	MPY	.M1	 1, A3,	A4	; if (t[j]) max[j] = x[j]
||	[B2]	MPY	.M2	 1, B3, B7	; if (t[j]) max[j] = x[j]
||		CMPLT	.L1	A5, A8,	A1	;* t[j] = max[j] < x[j]
||		CMPLT	.L2	B5, B8,	B1	;* t[j] = max[j] < x[j]
||		LDH	.D1	*A7++[2], A3	;** x[j] = a[i + j]
||		LDH	.D2	*B4++[2], B3	;** x[j] = a[i + j]

LOOP_END:
		CMPLT	.L1	A5, A6,	A1	; t[0] = max[0] < max[2]
||		CMPLT	.L2	B5, B6,	B1	; t[1] = max[1] < max[3]
||              B       .S2X    A0		; branch to return address

		CMPLT	.L1X	A4, B7,	A2	; t[4] = max[4] < max[5]
||	[A1]	MV	.S1	A6, A5		; if (t[0]) max[0] = max[2]
||	[B1]	MV	.L2	B6, B5		; if (t[1]) max[1] = max[3]

	[A2]	MV	.L1X	B7, A4		; if (t[4]) max[4] = max[5]
||		CMPLT	.L2X	B5, A5,	B1	; t[1] = max[1] < max[0]

	[B1]	MV	.L2X	A5, B5		; if (t[1]) max[1] = max[0]

		CMPLT	.L1X	A4, B5,	A2	; t[4] = max[4] < max[1]

	[A2]	MV	.L1X	B5, A4		; if (t[4]) max[4] = max[1]

B_END:
*** END Benchmark Timing ***
