;=======================================================================
; SFFT.ASM
; Keith Larson
; TMS320 DSP Applications
; (C) Copyright 1996,1997,1998
; Texas Instruments Incorporated
;
; This is unsupported freeware with no implied warranties or
; liabilities.  See the C3x DSK disclaimer document for details
;====================================================================;
;====================================================================;
; float SFFT_A(float diff);                                          ;
;                                                                    ;
; The forward and reverse SFFT are calculated within this one loop   ;
; The loop itself is unrolled to achieve an inner loop cycle count   ;
; of 7 cycles per bin calculation.  The inner loop contains both the ;
; REAL and IMAG filter summations, so if the output is for spectrum  ;
; analysis or only one filter sum is required, one or both summations;
; can be removed giving an inner loop speed of 6 cycles/bin          ;
;                                                                    ;
; Cycles:  7*N + 46                                                  ;
;                                                                    ;
;====================================================================;
; Registers used    F0,  F1,  F2,  F3,  F4,  F5,  F7
;                  AR0, AR1, AR2, IR0
;                   ST,  RC,  RE,  RS
;
; Registers saved   R4,  R5,  F7
;====================================================================;
          .global  _SFFT_A            ; Global symbol access
          .global  _Tbase, _Bbase     ;
          .global  _SFFTBINS          ;
          .global  _REAL_VEC,_IMAG_VEC;
          .global  _SFFT_RL, _SFFT_IM
          .global  _Scale, _diff      ;
;-------------------------------------
DR        .set     0                  ; 'cmplx' structure offset for R/I
DI        .set     1                  ;
TR        .set     0                  ;
TI        .set     1                  ;
;-------------------------------------
;//_SFFT_RL  .float   1.0                ; Complex output
;//_SFFT_IM  .float   1.0                ;
_SFFT_A
       ;//sti      R0,@080AFFFh
          ldi      SP,AR0             ; Read input from stack
          push     R4                 ; Preserve registers used by compiler
          push     R5                 ;
          pushf    R7                 ;
          ;- - - - - - - - - - - - - -
          ldf      *-AR0(1),R7        ; R7 = New - K2*Old (from stack)
          ldi      @_Tbase,AR0        ; R/I twiddle ptr
          ldi      @_Bbase,AR1        ; R/I SFFT array ptr
          ldi      @_Bbase,AR2        ; SFFT output (usualy in place)
          ldi      @_SFFTBINS,RC      ; Number of bins to calculate
          subi     1,RC               ;  (not the pass band width)
          ldi      2,IR0              ; Size of R/I pair in array
          ;----------------------------
          ldf      0,R4               ; Zero the REAL filter sum
          ldf      0,R5               ; Zero the IMAG filter sum
          ;--------------------------------
          mpyf3 *+AR0(TR),*+AR1(DR)    ,R0 ; TR*DR <- unroll from main loop
          rptb  EndSFFT                    ;
          ;--------------------------------
Loop      mpyf3 *+AR0(TR)  ,*+ AR1( DI),R1 ; TR*DI
          mpyf3 *+AR0(TI)  ,*+ AR1( DI),R0 ; TI*DI
       || addf3 R7,R0                  ,R3 ; (TR*DR + DIFF)
          mpyf3 *+AR0(TI)  ,*+ AR1( DR),R0 ; TI*DR
       || subf3 R0,R3                  ,R3 ; TR*DR - TI*DI + DIFF
          mpyf3 *++AR0(IR0),*++AR1(IR0),R0 ; TR*DR (used in next loop)
       || addf3 R1,R0                  ,R2 ; TR*DI + TI*DR
          stf   R2,*+AR2(DI)               ; Save the new Fbin values
       || stf   R3,*AR2++(IR0)             ;
          ;- - - - - - - - - - - - - - - -
          subf3    R4,R3,R4       ;REAL sum; sum'=R-sum  alternates sign of
EndSFFT   subf3    R5,R2,R5       ;IMAG sum; raised cosine window coeficients
          ;-----------------------------------------------------------
          ; For raised cosine window filters the endpoint bin values
          ; are scaled to 1/2 relative to the pass bins
          ;-----------------------------------------------------------
          addf     R4,R4              ; Double inner +/-1 sum loop
          addf     R5,R5              ;
          subf     R3,R4              ; Subtract endpoints at 50%
          subf     R2,R5              ;
          ldi      @_Bbase,AR1        ; ptr to start of R/I SFFT array
          ldf      *+AR1(DI),R2       ;
       || ldf      *+AR1(DR),R3       ;

          ldi      @_SFFTBINS,R0      ; If the loop count was odd, the
          tstb     1,R0               ;
          bz       $+3                ;
          mpyf     -1,R4              ; +,-,+,- sum result is negative
          mpyf     -1,R5              ;

          addf     R3,R4              ;
          addf     R2,R5              ;
          ;-----------------------------------------------------------
          ; When the SFFT is finished, the REAL/IMAG sums are scaled
          ; accordingly for the desired output phase angle.  A 'growth'
          ; scale factor is also applied since the summation occurs
          ; over N data points.
          ;-----------------------------------------------------------
          stf      R4,@_SFFT_RL
          stf      R5,@_SFFT_IM

ExitSFFT  mpyf     @_REAL_VEC,R4      ; Rotate to desired output phase
          mpyf     @_IMAG_VEC,R5      ;
          addf3    R4,R5,R0           ; Sum the R/I into a REAL output
          mpyf     @_Scale,R0         ; inverse of N/2 growth
          ; - - - - - - - - - - - - -
          popf     R7                 ;
          pop      R5                 ;
          pop      R4                 ;
       ;//sti      R0,@080A000h
          rets                        ;
;--------------------------------------