:TITLE[BITBLT];	*24 May 1983 by Fiala; change ALU>=0 to Carry at bbIntOn+1
		*and bbIntOn+2 and ALU<0 to Carry' at bbBitBlt+11 to enlarge
		*maxitems from 16k-1 to 32k-1.

%BBTable format

WORD	  NAME		
  0	Function		bit 0 Long; 14:17 function; for AltoXMMode 
				bit 12=alt bank src, 13=alt bank dest
  1	unused
  2	DBCA	Dest BCA	Base Core Address of dest bit map
  3	DBMR	Dest BMR	Bit Map Raster width in words (>=0)
  4	DLX	Dest LX		Block's Left X offset from 1st bit of scan-line (>= 0)
  5	DTY	Dest TY		Block's Top Y offset from 1st scan-line (>=0)
  6	DW	Dest W		Width in bits of block (>=0)
  7	DH	Dest H		Height in scan-lines of block (>=0)
 10	SBCA	Src BCA
 11	SBMR	Src BMR		(>=0??)
 12	SLX	Src LX		(>=0)
 13	STY	Src TY		(>=0)
 14	Gray0			These four words are the Gray Block
 15	Gray1			Gray0 is used on last item transmitted,
 16	Gray2			Gray1 on next-to-last, etc., Gray0 on
 17	Gray3			4th from last, etc.
 20	LongSrcLo		This pair instead of MDShi/SBCA if long
 21	LongSrcHi
 22	LongDestLo		This pair instead of MDShi/DBCA if long
 23	LongDestHi

BitBlt functions:
[X=0 uses BBFB (Dest & Mask'), X=1 uses BBFBX (Dest unmasked)]
[MA=0 causes Src & Mask, MA=1 causes Src or Mask']

CODE	MA  X	SALUFOP	Action			Computation

  0	0   0	R or T	S			(D & M') or (S & M)
  1	0   1	R or T	D or S			D or (S & M)
  2	0   1	R # T	D # S			D # (S & M)
  3	0   1	R & T'	D & S'			D & (S & M)'
  4	1   0	R or T'	S'			(D & M') or (S or M')'
  5	1   1	R or T'	D or S'			D or (S or M')'
  6	1   1	R # T'	D # S'			D # (S or M')'
  7	1   1	R & T	D & S			D & (S or M')
 10	0   1	R or T	(D & S') or (S & G)	(D & (S & M)') or (S & M & G)
 11	0   1	R or T	D or (S & G)		D or (S & M & G)
 12	0   1	R # T	D # (S & G)		D # (S & M & G)
 13	0   1	R & T'	D & (S & G)'		D & (S & M & G)'
 14	0   0	R or T	G			(D & M') or (G & M)
 15	0   1	R or T	D or G			D or (G & M)
 16	0   1	R # T	D # G			D # (G & M)
 17	0   1	R & T'	D & G'			D & (G & M)'
	
bbFunction is in the following format:
   00	mesa long pointer during early initialization;
	L-to-R = 0 / R-to-L = 1 after early init.
01-03	unused
04-07	which-innerloop index
10-11	unused
   12	For AltoXMMode alt bank src, else unused
   13	For AltoXMMode alt bank dest, else unused
14-17	Bitblt function code
%

*BBFA dispatch values
Set[bbItem,3];	*item refill
Set[bbSDRef,4];	*source and destination refill
Set[bbSRef,5];	*source refill
Set[bbDRef,6];	*destination refill
Set[bbNoRef,7];	*no refill

Set[bbILtype0,00];	*functions 1-3 and 5-7
Set[bbILtype1,02];	*functions 0 and 4
Set[bbILtype2,04];	*function 10
Set[bbILtype3,06];	*functions 11-13
Set[bbILtype4,10];	*function 14
Set[bbILtype5,12];	*functions 15-17

:IF[AltoXMMode]; *****************************
OnPage[bbXMPage];
bbShortXM:
	bbSrcQHi _ T;
	T _ (RZero) - (40C);	*177740 addresses bank
	PFetch1[MDS,bbGray];
	Dispatch[bbFunction,12,2];	*Alternate bank bits
	T _ LdF[bbGray,16,2], Disp[.+1];
	Return, DispTable[4];
	bbDestQHi _ (bbDestQHi) + T, Return;
	bbSrcQHi _ (bbSrcQHi) + T, Return;
	bbSrcQHi _ (bbSrcQHi) + T, Goto[.-2];
:ENDIF; **************************************

OnPage[bbPage];

*Alto entry here with StkP pointing at AC1 (2*scan-lines completed),
*(Cycle&PCXF) and not (100000C) in T, and pointer to BitBlt table in AC2
*(a base reg.); the BitBlt table is known to start at an even address.
*Mesa entry with (Cycle&PCXF) or (100000C) in T, Stack holding 2*scan-lines
*completed and Stack0 in AC2 (a pointer to BitBlt table).
bbBitBlt:
	PFetch2[AC2,bbItemWid,6], Task;	*fetch dw and dh
	bbGray1 _ (bbGray1) or not (0C);	*-1=don't touch pages; 0=do
	AC0 _ T, Skip[BPCChk'];		*Save PCF and Mesa/Alto flag in AC0
	  PCB _ (PCB) + (4C);		*Advance PC on imminent refill
	PFetch1[AC2,bbFunction,0];	*Even
	bbNegBitsLeft _ Zero;		*0 L-to-R, variable R-to-L

*Setup bbSrcQLo/Hi and bbDestQLo/Hi double-precision; in short mode, sbca/dbca
*are fetched into bbSrcQLo/bbDestQLo and MDShi rsh 8 is copied into
*bbSrcQHi/bbDestQHi; long pointers are fetched directly into bbSrcQLo/Hi and
*bbDestQLo/Hi.  Smalltalk simulates an XM Alto in which bbFunction[12:13]
*specify that the emulator alternate bank (bits 14-15 of 177740) should be
*added into bbSrcQHi/bbDestQHi respectively.
	PFetch1[AC2,bbGray2,3];		*fetch dbmr
*The way bbItemsLeft and stack count is determined by gray mode requirements.
*Alto gray mode uses gray n mod 4 with n scanlines left to do, and the
*vertical direction of transfer must be the same here for compatibility.
**Aligning the gray pattern with the absolute or relative address of the
**destination bit map is superior to this algorithm but Alto incompatible.
	T _ (Stack) + (2C);
*bbItemsLeft _ (scanlines left to do - 1) * 2
	bbItemsLeft _ (Lsh[bbItemsLeft,1]) - T;	*Exit if no items
	PFetch2[AC2,bbSlx,12], Goto[bbExit,Carry'];	*Fetch slx and sty
**Note incompatibility: on Alto, neg words/scanline reverses transfer
	T _ bbItemWid, Skip[R>=0];
	  PCF _ AC0, DblGoto[bbMDone,bbNDone,R<0];	*Exit if width<0
	bbNegItemWid _ (Zero) - T, Skip[ALU#0];
	  PCF _ AC0, DblGoto[bbMDone,bbNDone,R<0];	*Exit if width=0
	PFetch2[AC2,bbDlx,4];		*fetch dlx and dty
	bbFunction _ (bbFunction) and not (177400C), Goto[bbShrt,R>=0];
	  T _ 22C, Task;
	  PFetch2[AC2,bbDestQLo];
	  T _ 20C;
	  PFetch2[AC2,bbSrcQLo], Goto[bbDir];
bbShrt:	PFetch1[AC2,bbDestQLo,2];	*bbDestQLo _ dbca
	T _ Rsh[MDShi,10];
:IF[AltoXMMode]; *****************************
	bbDestQHi _ T, LoadPage[bbXMPage];
	PFetch1[AC2,bbSrcQLo,10], Call[bbShortXM];	*bbSrcQLo _ sbca
	bbFunction _ (bbFunction) and not (60C);
:ELSE; ***************************************
	bbDestQHi _ T;
	PFetch1[AC2,bbSrcQLo,10], Task;	*bbSrcQLo _ sbca
	bbSrcQHi _ T;
:ENDIF; **************************************

%Determination of BitBlt directions (T=Top, B=Bottom, L=Left, R=Right):
T-to-B is slightly faster than B-to-T, so prefer T-to-B when possible, but
Alto compatibility on gray mode requires B-to-T on dty = sty.
L-to-R is much faster than R-to-L, which uses multiple item setups/scanline,
so avoid R-to-L when possible.  The heuristic here assumes src and dest bit
maps are disjoint except when sbca .eq. dbca (short mode) and sbmr .eq. dbmr.
Direction of transfer is don't care otherwise.  Page-touching prior to
inner loops is necessary only when src and dest bit maps are not disjoint or
when the BitBlt function is not restartable (xor/xnor) on a page fault.
In other situations it is safe to restart BitBlt at the beginning of the
scanline after a fault.
  T-to-B, L-to-R  dty < sty
  B-to-T, R-to-L  (dty = sty) & (sbmr = dbmr) & (dlx - slx >= 100b) &
		  (item >= 100b wide) & (slx+scanlinewidth > dlx)
  B-to-T, L-to-R  (dty > sty) or (dty = sty when not doing R-to-L)
Timing from bbBitBlt to here: 47 short, 48 long (cycles).
%
bbDir:	Dispatch[bbFunction,14,4], Call[bbFnSetup];	*Odd
	SALUF _ T, T _ bbDty;
	LU _ (bbSty) - T;
	T _ bbGray2, FreezeResult, Goto[bbTtoB,ALU>=0];
	bbDBMR _ (Zero) - T, Goto[.+3,MB];
	  T _ bbGray;
	  bbSBMR _ (Zero) - T;
	T _ Rsh[bbItemsLeft,1], Goto[bbGenlInit];

*Sty .ge. Dty: will go T-to-B if .g., B-to-T if .eq. for Alto compatibility
bbTtoB:	bbDBMR _ T, Goto[bbMaybeBTRL,ALU=0];
	  T _ bbGray, Skip[MB];
	    bbSBMR _ T;
	  T _ Rsh[Stack,1], Goto[bbGenlInit];
*StY = Dty
bbMaybeBTRL:
	bbDBMR _ (Zero) - T, Skip[MB'];
	  T _ Rsh[bbItemsLeft,1], Goto[bbGenlInit];
	T _ bbGray;
	bbSBMR _ (Zero) - T;
	LU _ (bbDBMR) + T;
	T _ bbSlx, Skip[ALU=0];
bbTBLR:	  T _ Rsh[bbItemsLeft,1], Goto[bbGenlInit];	*L-to-R dbmr .ne. sbmr
*bbGray1 _ 0 forces page touching because of possible S-D overlap.
	T _ (bbDlx) - T;			*T _ dlx - slx
	LU _ (Lsh[AllOnes,6]) and T, Skip[ALU>=0];
	  bbGray1 _ Zero, Goto[bbTBLR];		*L-to-R if DestX < SrcX
*DestX >= SrcX; bbNegSDNonOverlap _ - (DestX - SrcX)
	LU _ LdF[bbItemWid,0,12], Skip[ALU#0];	*Even
	  bbGray1 _ Zero, Goto[bbTBLR];	*L-to-R if (DestX-SrcX) < 100b
	LU _ (bbItemWid) - T, Skip[ALU#0];
	  bbGray1 _ Zero, Goto[bbTBLR];	*L-to-R if item < 100b long
	bbGray1 _ Zero, Skip[Carry];	*or width < non-overlap
	  T _ Rsh[bbItemsLeft,1], Goto[bbGenlInit];
	bbNegSDNonOverlap _ (Zero) - T;
	bbFunction _ (bbFunction) or (100000C), Goto[bbTBLR]; *R-to-L

*General init: T has items completed if T-to-B, items remaining if B-to-T.
*Time: (71 T-to-B), (71 to 88 B-to-T, L-to-R), (93 to 95 R-to-L) + 1 if long
bbGenlInit:
	bbDty _ (bbDty) + T, Skip[MB'];	*Even
	  LU _ bbGray2, Goto[bbDestInit];
	LU _ bbGray;
	bbSrcWLo _ Zero, Skip[ALU#0];	*sbmr=0 not impossible
	  T _ RHMask[bbSrcQHi], Goto[bbAddSF];
	T _ bbSty _ (bbSty) + T, Call[bbYHi0];
*bbSrcQLo/QHi + (sty*sbmr) + (slx rsh 4); product may be > 16 bits
	  bbGray _ Rsh[bbGray,1], Goto[bbNoAddS,R Even];
*Multiply timing: 6*nzeroes right of the left-most one + 14*nones in sbmr.
	    bbSrcQLo _ (bbSrcQLo) + T, Goto[.+3,ALU#0];
	      T _ (bbYHi) + 1, UseCOutAsCIn;
	      T _ bbSrcQHi _ (bbSrcQHi) + T, Goto[bbAddSF];
	    T _ (bbYHi) + 1, UseCOutAsCIn;
	    bbSrcQHi _ (bbSrcQHi) + T;
bbNoAddS:   T _ bbSty _ Lsh[bbSty,1], DblGoto[bbLshyHi1,bbLshyHi0,R<0];
bbAddSF:bbSrcQHi _ (Lsh[bbSrcQHi,10]) + T + 1;	*bbSrcQHi in base reg. format
	T _ LdF[bbSlx,14,4], Call[bbNegIWSub];
*bbSlast _ (Slx & 17) + ItemWid - 1 = displacement to last bit of scan-line
*Add (slx rsh 4) and copy WLo into QLo; point bbSlx at bit in 1st quadword.
	bbSlast _ (Zero) - T - 1, Call[bbSBWQ];
*Time to here: (1 if long) + {(71 to 75 if no source) else multiply time +
* [(100 to 101 T-to-B), (100 to 118 B-to-T, L-to-R), (122 to 125 R-to-L)]}
	LU _ bbGray2;
bbDestInit:
	T _ bbDestWLo _ Zero, Goto[bbAddDF,ALU=0];
	T _ bbDty, Call[bbYHi0];
*bbDestQLo/QHi + (dty*dbmr) + (dlx rsh 4); product may be > 16 bits
	  bbGray2 _ Rsh[bbGray2,1], Goto[bbNoAddD,R Even];
	    bbDestQLo _ (bbDestQLo) + T, Goto[.+3,ALU#0];
	      T _ (bbYHi) + 1, UseCOutAsCIn;
bbAddDF:      T _ bbDestQHi _ (bbDestQHi) + T, Goto[bbAddDF1];	*Even
	    T _ (bbYHi) + 1, UseCOutAsCIn;
	    bbDestQHi _ (bbDestQHi) + T;
bbNoAddD:   T _ bbDty _ Lsh[bbDty,1], DblGoto[bbLshyHi1,bbLshyHi0,R<0];
bbAddDF1:
	bbDestQHi _ (Lsh[bbDestQHi,10]) + T + 1;
	PFetch2[AC2,bbGray2,16], Call[bbDBWQ];
	T _ LdF[bbDlx,14,4], Call[bbNegIWSub];	*offset to last scan-line bit
	PFetch2[AC2,bbGray,14], Skip[MB];
	  T _ Rsh[bbSlast,4], DblGoto[bbTS,bbNTS,R Odd];
	SB _ bbDlx, Goto[bbTchD];
%Time to bbTchD/S: (1 if long) + [(106 to 111 if no source) else
  (137 to 139 T-to-B), (137 to 156 B-to-T, L-to-R), (159 to 163 R-to-L)]
To this add entry/exit overhead less time between bbItemRefill and bbTchD/S:
  Alto--16 + 19; Alto Mesa--10 + 29
  (-15 T-to-B), (-19 B-to-T), (-12 no source).
Total time: [148 to 150 T-to-B, 148 to 166 B-to-T L-to-R, 168 to 171 R-to-L,
  124 to 127 no source] + [0 Pilot, 8 Alto, or 10 Alto Mesa] + [1 if long].
To this add about 60 cycles/multiply (1 multiply if no source else 2).
%

bbyHi0:		Skip[ALU>=0];	*Nop exit if dty/sty < 0
		  PCF _ AC0, DblGoto[bbMDone,bbNDone,R<0];
		bbYHi _ 0C, Return;

bbLshyHi0:	bbYHi _ Lsh[bbYHi,1], Return;
bbLshyHi1:	bbYHi _ (Lsh[bbYHi,1]) + 1, Return;

bbNegIWSub:	T _ (MNBR _ bbNegItemWid) - T;
		T _ (bbGray1) or T, Skip[R Odd];
		  T _ (Lsh[AllOnes,1]) and T;	*Make bbLast be odd
		bbDlast _ (Zero) - T - 1, Return;	*Mask bbLast be 0

*T[08] _ MA', T[09] _ MB, T[10:15] _ ALU op
*The MB branch condition is used to indicate "no source."
bbFnSetup:
	PFetch1[AC2,bbGray,11], Disp[.+1];	*fetch sbmr

	bbFunction _ 1000C, DispTable[20];	*0,R or T
bbOr:	T _ 204C, Return;		*0,R or T
bbX:	T _ 263C, Goto[bbForceTch];	*0,R xor T
bbAN:	T _ 227C, Return;		*0,R & T'

	bbFunction _ 1000C;		*1,R or T'
	T _ 074C, Return;		*1,R or T'
	T _ 054C, Goto[bbForceTch];	*1,R xnor T
	T _ 056C, Return;		*1,R & T

	bbFunction _ 2000C, Goto[bbOr];	*0,R or T
	bbFunction _ 3000C, Goto[bbOr];	*0,R or T
	bbFunction _ 3000C, Goto[bbX];	*0,R xor T
	bbFunction _ 3000C, Goto[bbAN];	*0,R & T'

	T _ 304C, Goto[bbTR4];		*0,R or T
	T _ 304C, Goto[bbTR5];		*0,R or T
	T _ 363C, Goto[bbTR5Tch];	*0,R xor T
	T _ 327C, Goto[bbTR5];		*0,R & T'

bbTR5:	bbFunction _ 5000C, Return;	*type 5; no source
bbTR4:	bbFunction _ 4000C, Return;	*type 4; no source
bbTR5Tch:	bbFunction _ 5000C;	*type 5; touch pages
bbForceTch:	bbGray1 _ Zero, Return;


%Approx. item refill times starting at bbItemRefill are below; the inner-loop
dependent constant given in the comments before the inner loops must be
added to these and the time is 1 (Alto) or 9 (Mesa) cycles greater if
interrupts are disabled:
  L-to-R, src used:	59 (T-to-B) or 62 (B-to-T) cycles
	[+ 12 + 11*(NDestPages+NSrcPages-2) if xor/xnor functions or sty=dty]
  L-to-R, src unused:	42 [+ 5 + 11*(NDestPages-1) if xor function]
  R-to-L:		11*(NDestPages+NSrcPages) + 94 cycles
  R-to-L, continuing item:	63 cycles, 48 or 64 on last continuation

When both src and dest are used the no-refill case will occur at
most 6 times followed by a src-refill, dest-refill, or item-refill.
If src and dest are word-aligned, at most 3 no-refill loops will occur
followed by a src-dest-refill or item refill.

When only the dest is used, at most 3 no-refill loops occur followed
by a src-dest-refill or item refill.
%

bbSrcFetch:	PFetch4[bbSrcQLo,bbSrc], Return;

*Note: Doing PStore4 first allows both PFetch4's to be launched before
*transport for either occurs.  If the PFetch4 for the src were done first,
*the PFetch4 for the dest could not be launched before transport for both
*preceding references had finished.
bbSrcDestRfl:	PStore4[bbDestQLo,bbDest];
		T _ bbSrcWLo _ (bbSrcWLo) + (4C);
		PFetch4[bbSrcQLo,bbSrc], Skip;
bbDestRfl:	PStore4[bbDestQLo,bbDest];
		T _ bbDestWLo _ (bbDestWLo) + (4C);
bbDestFetch:	PFetch4[bbDestQLo,bbDest], Return;

*The bbIA, bbIB, bbIE, and bbIF dispatch tables could be united by revising
*the SrcDestRefill subroutine to check MB; this would save 12b mi but slow
*inner loops by 2 to 3 cycles.

bbInnerLoops:
*functions 1-3 and 5-7; refill times: i=4+I, sd=32, s=20, d=26, n=4
bbIA1:	bbDlx _ (DB _ bbDlx) + T, Call[bbFixG], At[bbI,bbILtype0];
	T _ BBFA[SB[bbSrc]] or T;
bbIA2:	DB[bbDest] _ BBFBX[DB[bbDest]] SALUFOP T, Disp[.+1];
bbItemRefill:
	T _ bbDestWLo, Goto[bbItemRfl], DispTable[5,17,3];
bbSrcDestRefill:
	T _ bbDestWLo, Goto[bbSrcDestRfl];
bbSrcRefill:
	T _ bbSrcWLo _ (bbSrcWLo) + (4C), Goto[bbSrcFetch];
bbDestRefill:
	T _ bbDestWLo, Goto[bbDestRfl];
	T _ BBFA[SB[bbSrc]] or T, Goto[bbIA2];

*functions 0 and 4; refill times: i=4+I, sd=32, s=20, d=26, n=4
**Can improve by not fetching bbDest when MNBR < -77 on dest refill.
bbIB1:	bbDlx _ (DB _ bbDlx) + T, Call[bbFixG], At[bbI,bbILtype1];
	T _ BBFA[SB[bbSrc]] or T;
bbIB2:	DB[bbDest] _ BBFB[DB[bbDest]] SALUFOP T, Disp[.+1];
	T _ bbDestWLo, Goto[bbItemRefill], DispTable[5,17,3];
	T _ bbDestWLo, Goto[bbSrcDestRfl];
	T _ bbSrcWLo _ (bbSrcWLo) + (4C), Goto[bbSrcFetch];
	T _ bbDestWLo, Goto[bbDestRfl];
	T _ BBFA[SB[bbSrc]] or T, Goto[bbIB2];

*function 10; refill times: i=8+I, sd=36, s=22, d=30, n=8
bbIC1:	bbDlx _ (DB _ bbDlx) + T, Call[bbFixG], At[bbI,bbILtype2];
	T _ BBFA[SB[bbSrc]] or T;
	DB[bbDest] _ (DB[bbDest]) and not T, Disp[.+1];
	T _ PCF[bbGray] and T, Goto[bbICi], DispTable[5,17,3];
	T _ PCF[bbGray] and T, Goto[bbICsd];
	T _ PCF[bbGray] and T, Goto[bbICs];
	T _ PCF[bbGray] and T, Goto[bbICd];
	T _ PCF[bbGray] and T, Goto[bbICr];

*functions 11-13; refill times: i=6+I, sd=34, s=20, d=28, n=6
bbID1:	bbDlx _ (DB _ bbDlx) + T, Call[bbFixG], At[bbI,bbILtype3];
	T _ BBFA[SB[bbSrc]] or T;
	T _ PCF[bbGray] and T, Disp[.+1];
bbICi:	DB[bbDest] _ BBFBX[DB[bbDest]] SALUFOP T, Goto[bbItemRefill], DispTable[5,17,3];
bbICsd:	DB[bbDest] _ BBFBX[DB[bbDest]] SALUFOP T, Goto[bbSrcDestRefill];
bbICs:	DB[bbDest] _ BBFBX[DB[bbDest]] SALUFOP T, Goto[bbSrcRefill];
bbICd:	DB[bbDest] _ BBFBX[DB[bbDest]] SALUFOP T, Goto[bbDestRefill];
bbICr:	DB[bbDest] _ BBFBX[DB[bbDest]] SALUFOP T, Return;

*function 14; refill times: i=4+I, sd=26, s=never, d=never, n=4
**Can improve by not refilling dest and BLKSing bbGray when MNBR<-77.
bbIE1:	bbDlx _ (DB _ bbDlx) + T, Call[bbFixG], At[bbI,bbILtype4];
	T _ BBFA[PCF[bbGray]];
bbIE2:	DB[bbDest] _ BBFB[DB[bbDest]] SALUFOP T, Disp[.+1];
	T _ bbDestWLo, Goto[bbItemRflNS], At[bbIE,bbItem];
	T _ bbDestWLo, Goto[bbDestRfl], At[bbIE,bbSDRef];
	T _ BBFA[PCF[bbGray]], Goto[bbIE2], At[bbIE,bbNoRef];

*functions 15-17; refill times: i=4+I, sd=26, s=never, d=never, n=4
bbIF1:	bbDlx _ (DB _ bbDlx) + T, Call[bbFixG], At[bbI,bbILtype5];
	T _ BBFA[PCF[bbGray]];
bbIF2:	DB[bbDest] _ BBFBX[DB[bbDest]] SALUFOP T, Disp[.+1];
	T _ bbDestWLo, Goto[bbItemRflNS], At[bbIF,bbItem];
	T _ bbDestWLo, Goto[bbDestRfl], At[bbIF,bbSDRef];
	T _ BBFA[PCF[bbGray]], Goto[bbIF2], At[bbIF,bbNoRef];

bbFixG:	PCF _ bbItemsLeft, BBFBX, Return;

bbItemRflNS:
	PStore4[bbDestQLo,bbDest], Call[bbCntI];
	LU _ NWW, DblGoto[bbIntOff,bbIntOn,R<0];

*Worst case time to return from bbIR1 is 42 (src used).
bbItemRfl:
	PStore4[bbDestQLo,bbDest], Call[bbIR1];
*Test for interrupts and done:
*Item refill time: 12.
	LU _ NWW, Skip[R>=0];
bbIntOff:  bbItemsLeft _ (bbItemsLeft) - (2C), Skip;
bbIntOn:bbItemsLeft _ (bbItemsLeft) - (2C), Skip[ALU#0];
	  T _ bbDBMR, DblGoto[bbAdvD,bbExit1,Carry];
	LU _ PCF _ AC0, Skip[Carry];
	  DblGoto[bbMDone,bbNDone,ALU<0];
	LU _ xfWDC, DblGoto[bbMesaInt,bbNovaInt,ALU<0];

bbIR1:	T _ MNBR _ bbNegBitsLeft, Goto[bbRtoLCont,R<0];
bbCntI:	Stack _ (Stack) + (2C), Return;

*At bbMesaInt and bbNovaint, we have committed to taking an interrupt;
*control can only get back by restarting the opcode.

***The code at bbMesaInt and bbMDone is in AltoX.Mc

*Since the Alto only checks for interrupts during jumps, we simulate JMP .
**Could call intEnt here and handle the (rare) return, if WW, ACTIVE, and
**DMA (used by intEnt) did not clobber BitBlt registers other than
**bbSrc and bbDest.
bbNovaInt:
	T _ PCB, LoadPage[nePage], Goto[bbNExit];
*	LoadPage[xoPage];
*	Call[intEnt];
*	T _ bbDBMR, Goto[bbAdvD];
bbNDone:	*Time to next opcode = 20 cycles
	T _ (PCB) + 1, LoadPage[nePage];
bbNExit:	T _ (PCF.word) + T, GotoP[brJmpPz];

bbExit:	PCF _ AC0, DblGoto[bbMDone,bbNDone,R<0];
bbExit1: PCF _ AC0, DblGoto[bbMDone,bbNDone,R<0];

*Item refill time: 16 (no src), 18 (src used)
*bbDestW _ bbDestQ _ bbDestW + bbDBMR
bbAdvD:	T _ bbDestQLo _ (bbDestQLo) + T, Goto[bbAD1,ALU>=0];	*Even
	MNBR _ bbNegItemWid, Skip[Carry];
	  bbDestQHi _ (bbDestQHi) - (400C) - 1;
	bbDestWLo _ 0C, Skip[MB'];
	  SB _ bbDlx, Goto[bbTchD];
*bbSrcW _ bbSrcQ _ bbSrcW + bbSBMR
	T _ bbSBMR;
	bbSrcQLo _ (bbSrcQLo) + T, Call[bbASn];
%Avoid page touching and simply refill bbSrc/bbDest except when either a
non-restartable function is being executed (xor, xnor) or dty=sty, in which
case there is possible src-dest overlap.  When touching, begin with the last
page of the scan-line and finish with a PFetch4 of the 1st quadword.
Initial displacement is [(ItemWidth - 1 + (startbit & 17)) rsh 4] + non-page
bits of start word, where the first term is computed during initialization.
The choice to touch/not-touch is indicated in bbSLast/bbDLast which contain
0 when not touching or some odd value when touching.
%
bbTchS:	T _ Rsh[bbSlast,4], Skip[R Odd];
bbNTS:	  PFetch4[bbSrcQLo,bbSrc], Goto[bbNTS1];
bbTS:	T _ (RHMask[bbSrcQLo]) + T;
	T _ (Lsh[AllOnes,10]) and T, Call[.+2];
	  T _ (Lsh[AllOnes,10]) + T;
	PFetch4[bbSrcQLo,bbSrc], Skip[ALU=0];	*Even
	  Return;
	bbSrcWLo _ 0C;
	SB _ bbSlx, Goto[bbTchD];

bbAD1:	MNBR _ bbNegItemWid, Skip[Carry'];	*Even
	  bbDestQHi _ (bbDestQHi) + (400C) + 1;
bbNS1:	bbDestWLo _ 0C, Goto[bbNoTchS,MB];
	  T _ bbSBMR;
	  T _ bbSrcQLo _ (bbSrcQLo) + T, Call[bbASp];
	  T _ Rsh[bbSlast,4], DblGoto[bbNTS,bbTS,R Even];
bbNoTchS:
	SB _ bbDlx;				*Odd
bbTchD:	T _ Rsh[bbDlast,4], Goto[bbNTD,R Even];	*Odd
	T _ (RHMask[bbDestQLo]) + T;
	T _ (Lsh[AllOnes,10]) and T, Call[.+2];
	  T _ (Lsh[AllOnes,10]) + T;
bbNTD:	PFetch4[bbDestQLo,bbDest], Skip[ALU=0];	*Even
bbRet:	  Return;
*Item refill time to here: 33 + [5 + 11*(NDPages-1) if xor function] (no src)
*50 (T-to-B), 53 (B-to-T) +
*[12 + 11*(NDPgs+NSPgs-2) if xor/xnor functions or sty=dty & sbmr=dbmr]
	Dispatch[bbFunction,4,4], Goto[bbItemSetup,R>=0];
*New R-to-L item
	T _ bbNegItemWid;		*Odd
	T _ (MNBR _ bbNegSDNonOverlap) - T, Call[bbSlxFix];	*T _ bits left
	bbNegBitsLeft _ (bbNegBitsLeft) - T, Goto[bbRtoLNew];

bbRtoLCont:	*Here when continuing R-to-L item.
	LU _ (bbNegSDNonOverlap) - T, Call[bbRtoLC1];	*Odd
*Initially T will contain ItemWidth-SDNonOverlap; subsequent iterations
*T will contain -SDNonOverlap until the last iteration for the scan-line, when
*T will contain -BitsLeft.  bbNegBitsLeft is 0 at onset of each scan-line.
bbRtoLNew:
	bbDlx _ (LdF[bbDlx,14,4]) + T, Call[bbSBWQ];
	PFetch4[bbSrcQLo,bbSrc,0], Call[bbDBWQ];
bbNTS1:	SB _ bbSlx;
	T _ bbSrcWLo _ 0C, Call[bbDestFetch];
	Dispatch[bbFunction,4,4];
bbItemSetup:	T _ Lsh[bbDBMR,4], Disp[bbInnerLoops];	*Even

**Worst case time to return from bbSlxFix is 46 cycles.
bbRtoLC1:
	bbDestWLo _ 0C, Skip[Carry'];
	  T _ MNBR _ bbNegSDNonOverlap;
	bbNegBitsLeft _ (bbNegBitsLeft) - T, Goto[bbSlxFix];

%bbSBWQ and bbDBWQ are used both by initialization and in the R-to-L case.
bbASn/bbASp are called by item refill to advance QLo/QHi by a signed
word displacement, bbSBMR.  The equivalent code in bbDBWQ is open-coded
for item refill.
%
bbSBWQ:	T _ Rsh[bbSlx,4], Skip[R>=0];
	  T _ (Lsh[AllOnes,14]) or T;
	T _ bbSrcQLo _ (bbSrcQLo) + T, Goto[bbASp,ALU>=0];
bbASn:	  T _ Lsh[bbSrcQLo,4], Skip[Carry];
	    bbSrcQHi _ (bbSrcQHi) - (400C) - 1;
	  bbSlx _ (LdF[bbSlx,14,4]) + T, Return;
bbASp:	T _ Lsh[bbSrcQLo,4], Skip[Carry'];
	  bbSrcQHi _ (bbSrcQHi) + (400C) + 1;
bbSlxFix:	bbSlx _ (LdF[bbSlx,14,4]) + T, Return;

bbDBWQ:	T _ Rsh[bbDlx,4], Skip[R>=0];
	  T _ (Lsh[AllOnes,14]) or T;
	T _ bbDestQLo _ (bbDestQLo) + T, Goto[bbADp,ALU>=0];	*Even
	  T _ Lsh[bbDestQLo,4], Skip[Carry];
	    bbDestQHi _ (bbDestQHi) - (400C) - 1;
	  bbDlx _ (LdF[bbDlx,14,4]) + T, Return;
bbADp:	T _ Lsh[bbDestQLo,4], Skip[Carry'];
	  bbDestQHi _ (bbDestQHi) + (400C) + 1;
bbDlxFix:	bbDlx _ (LdF[bbDlx,14,4]) + T, Return;

:END[BitBlt];e6(1795)
