*===============================================================================
*
*	TEXAS INSTRUMENTS, INC.		
*
*	MINIMUM ERROR SEARCH LOOP
*
*	Revision Date:  04/23/97
*	
*	USAGE	This routine is C Callable and can be called as:
*	
*		int minerror(short *GSP0_TABLE,	 short *errCoefs,
*			     int savePtr_ret)	
*
*		*GSP0_TABLE  -- GSP0 terms array
*               *errCoefs    -- array of error coefficients 
*		*savePtr_ret -- Index of pair of vectors giving max dotprod
*
*		If routine is not to be used as a C callable function then
*		you need to initialize values for all of the values passed
*		as these are assumed to be in registers as defined by the 
*		calling convention of the compiler, (refer to the C compiler
*		reference guide).
*
*	C CODE
*		This is the C equivalent of the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*		int minerror(short *errCoefs, short *GSP0_TABLE,
*			     int savePtr_ret)
*		{	
*		int val, maxVal;
*		int i, j;
*		short *tmpPtr;
*		short *tmpPtr2;
*		short *endPtr;
*		short *endPtr2;
*		short *savePtr;
*	
*		#define GSP0_TERMS 9
*		#define GSP0_NUM 256
*	
*		maxVal = -50.0;
*		tmpPtr = GSP0_TABLE;
*		for (endPtr = tmpPtr + GSP0_TERMS*GSP0_NUM; tmpPtr < endPtr; ){
*		     val = 0;
*		     tmpPtr2 = errCoefs;
*	  	     for(endPtr2=tmpPtr2+GSP0_TERMS;tmpPtr2<endPtr2;tmpPtr2++){
*		          val += *tmpPtr * *tmpPtr2;
*			  tmpPtr++;
*			  }
*		     if (val > maxVal) {
*			  maxVal = val;
*			  savePtr = tmpPtr;
*			  }
*		     }
*		savePtr_ret = (savePtr - GSP0_TABLE)*2;
*		return (maxVal);
*		}
*
*
*
*	DESCRIPTION
*	
*		This the minimum energy error search which is a large
*		part of the VSELP vocoder codebook search.  It performs
*		a dot product on 256 pairs of 9 element vectors and 
*		searches for the pair of vectors which produces the 
*		maximum dot product result.
*
*		
*	TECHNIQUES
*
*		1.  The inner loop is unrolled 2 times
*	
*	ASSUMPTIONS
*
*         	1.  Number of error coefficients is 9
*		2.  Number of GSP0 terms is 256
*
*	MEMORY NOTE
*
*		No memory bank hits given errCoefs & GSP0_TABLE are both on
*		even or both on odd word boundaries (4 hits if not)
*
*	CYCLES
*
*		(256/2)*9 + 14 or 1166 cycles
*
*===============================================================================
	.global _minerror
	.text

_minerror:		


	STW	.D2	B12,*B15	; push B12 onto stack

	STW	.D2	B11,*--B15	; push B11 onto stack

        STW     .D2     A15,*--B15[2]   ; push A15 onto stack
	
*** BEGIN Benchmark Timing ***
B_START:

	LDW	.D1	*A4,A7		; A7 = g(1) & g(0)
||      STW     .D2     B13,*+B15[1]    ; push B13 onto stack

	MVK	.S2	-42,B6		; used for evaluating savePtr
||	LDW	.D1	*+A4[2],A15	; A15 = g(5) & g(4)

	LDW	.D2	*B4++,B0	; B0 = x(1) & x(0)
||	LDW	.D1	*+A4[1],A8	; A8 = g(3) & g(2)
||	MV	.L2	B4,B13		; used for evaluating savePtr
||	MVK	.S1	1,A1		; A1 = 1
||	MVK	.S2	-50,B11		; set maxval = 0;

	LDW	.D2	*B4++,B0	; B0 = x(3) & x(2)
||	MVK	.S1	127,A2		; initialize loop cntr (N-1)				
	LDW	.D2	*B4++,B0	; B0 = x(5) & x(4)
||	LDW	.D1	*+A4[3],A5	; A5 = g(7) & g(6)

	LDW	.D2	*B4++,B0	; B0 = x(7) & x(6)
||	LDH	.D1	*+A4[8],A0	; A0 = g(8) 
||	MVK	.S2	0,B1		; initialize compare reg

	MVK	.S2	0,B2		; initialize val
||	LDW	.D2	*B4++,B0	; B0 = x(0) & x(8)
||	MV	.L2X	A15,B5		; copy g(5) & g(4) to other reg file

OUTLOOP:				; OUTER LOOP BEGINS HERE
	MPY	.M1X	B0,A7,A3	; p0 = x(0) * g(0)
||	MPYH	.M2X	B0,A7,B8	; p1 = x(1) * g(1)
||	ADD	.L1	A3,A9,A9	;* val0 += p0,
||	ADD	.L2	B8,B7,B7	;* val1 += p1,
||	LDW	.D2	*B4++,B0	; B0 = x(2) & x(1)
|| [B1] MV	.S2	B2,B11		;* make val = maxval

	MPY	.M1X	B0,A8,A3	; p0 = x(2) * g(2)
||	MPYH	.M2X	B0,A8,B8	; p1 = x(3) * g(3)
||	LDW	.D2	*B4++,B0	; B0 = x(4) & x(3)
||	ADD	.L1	A9,A3,A9	;* val0 += p0,
||	ADD	.L2	B7,B8,B7	;* val1 += p1,			
|| [B1] ADD	.S2	B6,B4,B12	;* update saveptr

	MPY	.M1X	B0,A15,A3	; p0 = x(4) * g(4)
||	MPYH	.M2	B0,B5,B8	; p1 = x(5) * g(5)
||	ADD	.S1	0,A3,A9		; val0 += p0,			
||	ADD	.S2	0,B8,B7		; val1 += p1,
||	LDW	.D2	*B4++,B0	; B0 = x(6) & x(5)
||	ADD	.L2X	B7,A9,B2	;* val = val0 + val1,	

	MPY	.M1X	B0,A5,A3	; p0 = x(6) * g(6)
||	MPYH	.M2X	B0,A5,B8	; p1 = x(7) * g(7)
||	ADD	.L1	A3,A9,A9	; val0 += p0,			
||	ADD	.S2	B8,B7,B7	; val1 += p1,
||	LDW	.D2	*B4++,B0	; B0 = x(8) & x(7)
||[!A1]	CMPGT	.L2	B2,B11,B1	;* compare val with maxval
|| [A2] B	.S1	OUTLOOP		; for OUTLOOP

	MPY	.M1X	B0,A0,A3	; p0 = x(8) * g(8)
||	MPYHL	.M2X	B0,A7,B8	; p0 = x(0) * g(0)
||	ADD	.L1	A3,A9,A9	; val0 += p0,			
||	ADD	.L2	B8,B7,B7	; val1 += p1,
||	LDW	.D2	*B4++,B0	; B0 = x(1) & x(0)
|| [B1] MV	.S2	B2,B11		;* make maxval = val

	MPYLH	.M2X	B0,A7,B8	; p1 = x(1) * g(1)
||	MPYHL	.M1X	B0,A8,A3	; p0 = x(2) * g(2)
||	LDW	.D2	*B4++,B0	; B0 = x(3) & x(2)
||	ADD	.L1	A3,A9,A9	; val0 += p0,
||	ADD	.L2	B8,B7,B7	; val1 += p1,
|| [B1] ADD	.S2	B6,B4,B12	;* update saveptr

	MPYLH	.M2X	B0,A8,B8	; p1 = x(3) * g(3)
||	MPYHL	.M1X	B0,A15,A3	; p0 = x(4) * g(4)
||	LDW	.D2	*B4++,B0	; B0 = x(5) & x(4)
||	ADD	.L1	A9,A3,A9	;  val0 += p0,
||	ADD	.L2	0,B8,B9		;* val1 += p1,			
||	ADD	.S1	-1,A2,A2	; A2-- dec loop counter
|| [B1] ADD	.S2	2,B12,B12	;* update saveptr

	MPYLH	.M2	B0,B5,B8	; p1 = x(5) * g(5)
||	MPYHL	.M1X	B0,A5,A3	; p0 = x(6) * g(6)
||	ADD	.D1	0,A3,A9		; val0 = p0,			
||	ADD	.S2	B9,B8,B7	; val1 = p1,
||	LDW	.D2	*B4++,B0	; B0 = x(7) & x(6)
|| [A1] ADD	.S1	-1,A1,A1	; A1-- dec priming counter
||	ADD	.L2X	B7,A9,B2	;* val = val0 + val1,	

	MPYLH	.M2X	B0,A5,B8	; p1 = x(7) * g(7)
||	MPYHL	.M1X	B0,A0,A3	; p0 = x(8) * g(8)
||	ADD	.L1	A9,A3,A9	; val0 += p0,			
||	ADD	.S2	B7,B8,B7	; val1 += p1,
||	LDW	.D2	*B4++,B0	; B0 = x(0) & x(8)
||	CMPGT	.L2	B2,B11,B1	;* compare val with maxval

* OUTLOOP ENDS HERE
				
	ADD	.L1	A9,A3,A9	; val0 += p0,			
||	ADD	.L2	B7,B8,B7	; val1 += p1,
||      LDW     .D2     *B15++,A15      ; pop A15 off stack
|| [B1] MV	.S2	B2,B11		;* make val = maxval

	ADD	.L1	A9,A3,A9	; val0 += p0,
||	ADD	.L2	B7,B8,B7	; val1 += p1,
||      LDW     .D2     *B15++,B13      ; pop B13 off stack
|| [B1] ADD	.S2	B6,B4,B12	;* update saveptr

	ADD	.L2X	B7,A9,B2	; val0 += val1, 
||      LDW     .D2     *B15++,B11      ; pop B11 off stack
||	B	.S2	B3
||	MV	.S1X	B11,A4		; A4 returns maxVal

	CMPGT	.L2	B2,B11,B1	; compare val with maxval
||      LDW     .D2     *B15,B12	; pop B12 off stack
|| [B1]	ADD	.S2	B12,4,B12	; updata saveptr

   [B1]	MV	.L1X	B2,A4		; make val = maxval
|| [B1] ADD	.D2	-20,B4,B12	; update saveptr
	
	SUB	.S2	B12,B13,B12     ; update saveptr

	STW	.D1	B12,*A6		; store maxVal ptr

B_END:
*** END Benchmark Timing ***

	NOP
