*===============================================================================
*
*	TEXAS INSTRUMENTS, INC.		
*
*	LATTICE FILTER - INVERSE - ANALYSIS
*
*	Revision Date:  05/12/97
*	
*	USAGE	This routine is C Callable and can be called as:
*	
*		int latanal(short b[], int n, short k[], int f)
*
*		b[] --- array of coefficients
*               n   --- number of coefficients
*		k[] --- array of filter gains
*		f   --- result of the inverse analysis
*
*		If routine is not to be used as a C callable function then
*		you need to initialize values for all of the values passed
*		as these are assumed to be in registers as defined by the 
*		calling convention of the compiler, (refer to the C compiler
*		reference guide).
*
*	C CODE
*		This is the C equivalent of the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*		int latanal(short b[], int n, short k[], int f)
*		{
*			int             i;
*			short           c, a;
*			c = f >> 16; 
*			for (i = 0; i < n; i++) {
*				a = b[i] + ((k[i] * (f >> 16)) >> 16);
*				f += b[i] * k[i];
*				b[i] = c;
*				c = a;
*		 	}
*			return f;
*		}
*
*	DESCRIPTION
*	
*		This routine implements an inverse analysis lattice filter
*		and stores the result in f.  The filter consists of n stages
*		The value of f is calculated by doing a multiply accumulate 
*		on the coefficients and filter gains.  New coefficients are 
*		calculated also.
*
*		
*	TECHNIQUES
*
*		The algorithm requires d to be live too long.  Thus,
*		it must be copied and moved. The loop is unrolled
*		once thus n has to be a multiple of 2.
*	
*	ASSUMPTIONS
*
*         	1.  n is a multiple of 2
*
*	MEMORY NOTE:
*
*		There is a fifty percent chance of a memory hit when pushing
*		B10 in to the stack.  Arrays b[] and k[] should be aligned on
*		different half word boundaries to avoid memory hits.  This 
*		could be accomplished by aligning b[] and k[] on opposite word 
*       	boundaries.
*
*	CYCLES
*   
*		1.5*n + 10
*
*===============================================================================

	.global _latanal
	.text

*** BEGIN Benchmark Timing ***
_latanal:
	B	.S2	LOOP			; while
||	ADD	.L2X	A4,	2,	B10	; copy b
||	MVK	.S1	2,	A1		; acc_f accumulate counter
||	LDH	.D1	*A4++[2],	A0	;**** b0 = *b++,	
||	STW	.D2	B10,	*B15--		; push B10 on stack

	SHR	.S1X	B4,	1,	A2	; n / 2
||	MVK	.S2	4,	B0		; priming count
||	ADD	.L2X	2,	A6,	B1	; copy k	
||	LDH	.D1	*A6++[2],	A5	;**** k0 = *k++	
||	ZERO		B4

	SHR	.S2	B6,	16,	B8	; c0 = f >> 16
||	SUB	.L1	A2,	1,	A2	; n / 2 + 1
||	LDH	.D2	*B1++[2],	B2	;**** k1 = *k++


LOOP:
   [A2]	B	.S2	LOOP			;* for
|| [A2]	SUB	.L1	A2,	1,	A2	; i++
|| [B0]	SUB	.L2	B0,	1,	B0	; priming count
||	MPYLH	.M2	B2,	B6,	B9	;* k0 * (f >> 16)
||	MPY	.M1	A5,	A0,	A9	;** b0 * k0	
||	LDH	.D2	*B10++[2],	B4	;*** b1 = *b++		
||	LDH	.D1	*A4++[2],	A0	;**** b2 = *b++


   [A1]	SUB	.L1	A1,	1,	A1	;** acc_f counter
||[!B0]	ADD	.L2	B9,	B7,	B8	; c1 = b1 + ((k1*(f>>16) >> 16)
||[!B0]	STH	.D2	B8,	*-B10[9]	; b[-1] = c0
||	SHR	.S1	A9,	16,	A7	;* (k1*(f >> 16)) >> 16
||[!A1]	ADD	.S2	B5,	B6,	B6	;* f += b1 * k1
||	MPY	.M2	1,	B4,	B9	;* copy b0 
||	MPY	.M1	1,	A0,	A3	;** copy b1  
||	LDH	.D1	*A6++[2],	A5	;**** k2 = *k++


  [!B0]	STH	.D1	A8,	*-A4[9]		; b[-1] = c1
||	SHR	.S2	B9,	16,	B7	;* (k1 * (f >> 16)) >> 16
||	ADD	.L1	A3,	A7,	A8	;* c2 = b2 +((k2*(f>>16)) >> 16)
||[!A1]	ADD	.L2X	A9,	B6,	B6	;** f += b0 * k0
||	MPYLH	.M1X	A5,	B6,	A9	;** k0 * (f >> 16)
||	MPY	.M2	B2,	B4,	B5	;** b1 * k1
||	LDH	.D2	*B1++[2],	B2	;**** k3 = *k++


LOOP_END:
	LDW	.D2	*++B15,	B10		; pop B10 off stack
||	B	.S2	B3

	SHR	.S1	A9,	16,	A7	;* (k * (f>>16)) >> 16
||	ADD	.S2	B5,	B6,	B6	;* f += b * k
||	STH	.D2	B8,	*-B10[7]	; b[-1] = c
||	ADD	.L2	B9,	B7,	B8	; c = b + ((k * (f>>16)) >> 16)

	ADD	.L1	A3,	A7,	A8	;* c = b + ((k * (f>>16)) >> 16)
||	STH     .D1     A8,	*-A4[7]		; b[-1] = c

	STH	.D2	B8,	*-B10[5]	; b[-1] = c
||	STH	.D1	A8,	*-A4[5]		; b[-1] = c
||	MV	.S1X	B6,	A4		; return f

B_END:
*** END Benchmark Timing ***

	NOP	2

