*===============================================================================
*
*	TEXAS INSTRUMENTS, INC.		
*
*	BLOCK MOVE
*
*	Revision Date:  04/24/97
*	
*	USAGE	This routine is C Callable and can be called as:
*		
*		void blk_move(short *bl, short *zl, int n)
*
*		bl --- block of data to be moved
*               zl --- destination of block of data
*		n  --- number of elements in block
*
*		If routine is not to be used as a C callable function then
*		you need to initialize values for all of the values passed
*		as these are assumed to be in registers as defined by the 
*		calling convention of the compiler, (refer to the C compiler
*		reference guide).
*
*	C CODE
*		This is the C equivalent of the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*		void blk_move(short *bl,short *zl, int n)
*		{
*		short *tmpPtr;
*		short *tmpPtr2;
*
*		tmpPtr = bl;			
*		tmpPtr2 = zl - 1;
*		for (tmpPtr = bl; tmpPtr < bl + n; tmpPtr++)
*			*++tmpPtr2 = *tmpPtr;
*		}
*
*
*	DESCRIPTION
*
*		Move N 16-bit elements from one memory location 
*		to another		
*
*		
*	TECHNIQUES
*
*		1.  Two load words are used to load four 16-bit values at a time
*		2.  The loop is unrolled four times
*	
*	ASSUMPTIONS
*
*		1.  N >= 6
*         	1.  N is a multiple of 2
*
*		
*	MEMORY NOTE
*
*		No memory bank hits under any conditions.
*
*	CYCLES
*
*		5 + N/2, for N multiple of 4
*		6 + N/2, other N's
*
*===============================================================================
	.global _blk_move
	.text

_blk_move:

*** BEGIN Benchmark Timing ***
B_START:

	ADD	.L2X	4,A4,B5 	; bl pointer on B reg file
||	SHR 	.S1	A6,2,A1		; i = n/4
|| 	AND 	.L1	A6,2,A2		; multiple of 2 condition

	LDW	.D1	*A4++[2],A3	; a = *an++,
||	LDW	.D2	*B5++[2],B6	; a = *an++,
||[!A2]	SUB	.S1	A1,2,A1		; i-- 2 for multiples of 4
||[A2]	SUB	.L1     A1,1,A1		; i-- 1 for multiples of 2

   [A1]	B	.S1	LOOP		; for LOOP

   	LDW	.D1	*A4++[2],A3	; a = *an++,
|| 	LDW	.D2	*B5++[2],B6	; a = *an++,
|| [A1]	SUB	.S1	A1,1,A1		; i--
||	ADD	.L1X	4,B4,A5		; zl pointer on A reg file

   [A1]	B	.S1	LOOP		; for LOOP

   	LDW	.D1	*A4++[2],A3	; a = *an++,
|| 	LDW	.D2	*B5++[2],B6	; a = *an++,
|| [A1]	SUB	.S1	A1,1,A1		; i--

LOOP:					; LOOP BEGINS HERE
	STW	.D2	A3,*B4++[2]	; a = *an++,
||	STW	.D1	B6,*A5++[2]	; a = *an++,
|| [A1] B	.S1	LOOP		; for LOOP

   	LDW	.D1	*A4++[2],A3	; a = *an++,
|| 	LDW	.D2	*B5++[2],B6	; a = *an++,
|| [A1] SUB	.S1	A1,1,A1	   	; i--
; loop ends here


	STW	.D2	A3,*B4++[2]	; a = *an++,
||[!A2]	STW	.D1	B6,*A5++[2]	; a = *an++,

B_END:
*** END Benchmark Timing ***

	B	.S2	B3		; return		
  	NOP	5

