*================================================================================
*
*	TEXAS INSTRUMENTS, INC.		
*
*	MAX INDEX
*
*	Revision Date:  04/18/97
*	
*	USAGE	This routine is C Callable and can be called as:
*		
*		int maxindex(short a[], int n)
*
*		a[] --- vector array 
*               n   --- number of elements in aData
*
*		If routine is not to be used as a C callable function then
*		you need to initialize values for all of the values passed
*		as these are assumed to be in registers as defined by the 
*		calling convention of the compiler, (refer to the C compiler
*		reference guide).
*
*
*	C CODE
*		This is the C equivalent of the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*		int maxindex(short a[], int n)
*		{
*		
*			int             max, index, i;
*			max = -32768;
*			for (i = 0; i < n; i++)
*				if (a[i] > max) {
*					max = a[i];
*					index = i;
*				}
*			return index;
*		}
*
*	DESCRIPTION
*
*		This routine finds the max value of a vector and returns
*		the index of the value.  After finding a new max value, it uses 
*               multiply units to move value between registers
*		
*	TECHNIQUES
*
*		1.  The loop is unrolled three times
*	
*	ASSUMPTIONS
*
*         	1.  n >= 3
*		2.  n is a multiple of 3
*               3.  vector a[] should be aligned on half word boundaries
*		
*	MEMORY NOTE
*
*		No memory bank hits under any conditions.
*
*	CYCLES
*
*		(2/3)n + 12
*
*===============================================================================

	.global _maxindex
	.text

_maxindex:
		ADD	.L2X	2,	A4,	B6	; copy a

		LDH	.D1	*A4++[2],	A3	; x[j] = a[i + j]
||		LDH	.D2	*B6++[3],	B2	; x[j] = a[i + j]
||		SUB	.L2	B4,	3,	B0	; n - 3

		LDH	.D1	*A4++[1],	A0	; x[j] = a[i + j]
||	[B0]	B	.S2	LOOP			; for
||	[B0]	SUB	.L2	B0,	3,	B0	; i += 3 

		LDH	.D1	*A4++[2],	A3	;* x[j] = a[i + j]
||		LDH	.D2	*B6++[3],	B2	;* x[j] = a[i + j]

		LDH	.D1	*A4++[1],	A0	;* x[j] = a[i + j]
||		MVK	.S1	-32768, A5		; max1 = -32768
||	[B0]	B	.S2	LOOP			;* for
||	[B0]	SUB	.L2	B0,	3,	B0	; i += 3 

		LDH	.D1	*A4++[2],	A3	;** x[j] = a[i + j]
||		LDH	.D2	*B6++[3],	B2	;** x[j] = a[i + j]
||		MVK	.S1	-32768, A6		; max3 = -32768
||		MVK	.S2	-32768, B5		; max2 = -32768
||		ZERO	.L2	B4			; i = 0

LOOP:
	[B0]	B	.S2	LOOP			; for
||	[A2]	MV	.S1	A0,	A6		; if (t[j]) max3[j]=x[j]
||	[A2]	MPY	.M1X	1,	B4,	A8	; if (t[j]) index3 = i
||		CMPLT	.L1	A5,	A3,	A1	;* t[j] = max1 < x[j]
||		CMPLT	.L2	B5,	B2,	B1	;* t[j] = max2 < x[j]
||		LDH	.D1	*A4++[1],	A0	;*** x[j] = a[i + j]
||		ADD	.D2	3,	B4,	B4	; i += 3

	[B0]	SUB	.S2	B0,	3,	B0	; i += 3 
||		CMPLT	.L1	A6,	A0,	A2	; t[j] = max3 < x[j]
||	[A1]	MV	.S1	A3,	A5		; if (t[j]) max1=x[j]
||	[B1]	MV	.L2	B2,	B5		; if (t[j]) max2=x[j]
||	[A1]	MPY	.M1X	1,	B4,	A7	; if (t[j]) index1 = i
||	[B1]	MPY	.M2	1,	B4,	B7	; if (t[j]) index2 = i
||		LDH	.D2	*B6++[3],	B2	;*** x[j] = a[i + j]
||		LDH	.D1	*A4++[2],	A3	;*** x[j] = a[i + j]

LOOP_END:

		CMPLT	.L2X	A5,	B5,	B1	; t = max1< max2
||	[A2]	MV	.D1	A0,	A6		; if (t[j]) max3= x[j]

		CMPEQ	.L2X	B5,	A5,	B0	; te = max1 == max2
||	[A2]	MV	.L1X	B4,	A8		; if (t) index3 = i

	[B1]	MV	.L1X	B5,	A5		; if (t) max1 = max2
||	[B0]	CMPLTU	.L2X	B7,	A7,	B1	;if(te) ti=index1<index2
||		B	.S2	B3			; Return from call

	[!B1]	SUB	.D1	A7,	2,	A4	; if (!ti)||(!te||t) index = index1 w/ offset
||	[B1]	SUB	.S1X	B7,	1,	A4	; if (t) index= index1 w/ offset
||		CMPLT	.L1	A5,	A6,	A1	; t = max1 < max3

		CMPEQ	.L1	A5,	A6,	A2	; te = max1 == max3

	[A2]	CMPLTU	.L1	A8,	A4,	A1	; if(te) ti=index3<index

	[A1]	SUB	.L1	A8,	1,	A4	; if (t)||(ti) index = index3 w/ offset
||	[!A1]	SUB	.D1	A4,	1,	A4	; if !(t||ti) index = index w/ offset

B_END:
***  END Benchmark Timing *** 

    
