;---------------------------------------------------------------
; SFFT.ASM
; Keith Larson
; TMS320 DSP Applications
; (C) Copyright 1996,1997,1998
; Texas Instruments Incorporated
;
; This is unsupported freeware with no implied warranties or
; liabilities.  See the C3x DSK disclaimer document for details
;
; SFFT.TXT CONTAINS A DETAILED DISCUSSION OF THE SFFT AND THIS APPLICATION
;
; This and all other DSK applications can be downloaded from
; the Texas Instruments FTP site.
;
; ftp://ftp.ti.com/mirrors/tms320bbs              Main FTP site
; ftp://ftp.ti.com/mirrors/tms320bbs/c3xdskfiles  DSK file subdirectory
;----------------------------------------------------------------
; OVERVIEW
; --------
; SFFT.ASM uses a technique known as a Sliding FFT (SFFT) to calculate the
; spectrum of a signal in a continuous fashion.  Fundamentaly the SFFT is
; equivlent to a DFT, but is calculated on a sample by sample basis,
; reusing the past DFT data for high efficiency.  The SFFT can expressed
; in very simple terms, that do not at require first hand knowledge of
; the DFT or FFT, but can be used to derive them.
;=======================================================================
; This section initializes various constants that are used in
; calculations or for turning on and off various SFFT features
;
; The setup rules are as follows
;
;    Start bin       >= 0
;    Length          >  0
;    Start+Length    <  N/2
;    REAL_EN         0, 1
;    IMAG_EN         0, 1
;
; The easiest way to change sampling rates is to adjust the timer
; However, fine tuning the AIC divisors can result in better performance.
;
; Be careful not to set the sampling rate too high while also trying to
; perform a long SFFT.  Since the SFFT calculates all output bins in
; the time span of one sample, it can get bogged down for large SFFTs
; or high sampling rates.
;
; If the spectrum analyzer is turned off, the bandpass filter will
; have a sharp cutoff at 2 khz and 8 KHz.  If the FFT size is changed
; to 256, the cutoffs move to 1 and 4 Khz respectively.
;
; The default setup works unmodified on the C31 DSK
;=================================================================
SFFTSIZE  .set     128           ; non 2^N SFFT sizes are OK if CIRC_EN=0
SPECT_EN  .set     1             ; Enables spectrum analyzer output
REAL_EN   .set     1             ; Enables sum of all REAL output
IMAG_EN   .set     0             ; Enables sum of all IMAG output
RATE      .set     4             ; Spect analyzer display undersample rate
CIRC_EN   .set     0             ; Enables use of circular buffering hardware
BPF_EN    .set     1             ; Enable the bandpass filter
START_BIN .set	   10            ; Filter start bin (can be 0)
BPF_LENG  .set     31            ; Length in bins
TIM0_prd  .set     1  ;          ; AIC reference clock is TIM0
TA        .set     8  ;Fc =9.77  ; AIC register values
TB        .set     32 ;          ; NOTE: For the spectrum analyzer output
RA        .set     16 ;Fs =24.4  ;       The DAC switch cap filter rate
RB        .set     16 ;          ;       soften the edges of impulse signals
;-----------------------------------
N         .set     SFFTSIZE      ; Use 'N' as shorthand for SFFTSIZE
GIE       .set     0x2000        ; The value of the GIE bit in ST register
TR        .set     0             ; Real twiddle offset in each cell
TI        .set     1             ; Imag
DR        .set     0             ; Real data offset in each cell
DI        .set     1             ; Imag
RIBINSIZE .set     2             ; Size of R/I element pair
pi        .set     3.14159265    ; Useful in making apple pie
w         .set     2.0*pi/N      ; angle = F * 2*pi/Fs
;======================================================================
; The SFFT twiddles, data and input buffer arrays are allocated
; to be placed into RAM0 to avoid bus conflicts with program fetching
;======================================================================
          .include "C3XMMRS.ASM"    ;
          .start   "DATA",0x809800  ; Data arrays are placed at start of RAM0
          .sect    "DATA"           ;
TWIDCOEF   ;----------------------- ;
n         .set     0.0              ;
          .loop    N/2              ; This loop creates the R/I phase
          .float   K1*cos(n*w)      ; rotation rate value
          .float   K1*sin(n*w)      ;
n         .sdef    n+1.0            ; redefine value of 'n'
          .endloop                  ;
SFFTDATA  ;-------------------------;
          .loop    N/2              ; This loop reserves space for the
          .float   0,0              ; Current R/I frequency bin pair
          .endloop                  ;
BUF       ;-------------------------;
          .loop    N/2              ; This loop reserves space for the
          .float   0,0              ; ADC input data array and delay buffer
          .endloop                  ;
;=======================================================================
; The application code begins here, beginning with constants that are
; used in various routines.
;=======================================================================
          .start   "CODE",0x809E00  ; Start in last 512 words of RAM0
          .sect    "CODE"           ; allowing up to 512 pt SFFTs (see note)
Tbase     .word    TWIDCOEF         ; Lookup locations for data arrays
Bbase     .word    SFFTDATA         ;
CircAddr  .word    BUF              ;
BUFSTART  .word    BUF              ;
BUFEND    .word    BUF+N            ;
OutBin    .float   0                ;
MAX       .float   32000.0          ; Clip values
MIN       .float   -32000.0         ;
          ;------------------------
A_REG     .word    (TA<<9)+(RA<<2)+0; AIC register values
B_REG     .word    (TB<<9)+(RB<<2)+2;
C_REG     .word    00000011b        ;

;0gctrl   .word    0x0E970300       ; Ser port setup for AIC
S0gctrl   .word    0x0E973300       ; Ser port setup for AIC invert clkx/clkr

S0xctrl   .word    0x00000111       ;
S0rctrl   .word    0x00000111       ;
NewMnsOld .word    0                ;
K1        .set     0.99995          ; Use a value less than 1.0
K2        .float   pow(K1,N)        ; Nth oldest sample scale factor
REALSUM   .float   0.0              ;
IMAGSUM   .float   0.0              ;
Scale     .float   4.0/N            ; SFFT growth scale factor
REAL_VEC  .float   1.0              ; REAL sum scale factor
IMAG_VEC  .float   1.0              ; IMAG sum scale factor
FLOG2SC   .float   pow(2.0,-24.0)   ; Scale factor for log2 calculations
;==========================================================
; The main loop consists of waiting for a new ADC sample.
;==========================================================
main      or       GIE,ST           ;
          ldi      0xE4,IE          ; Enable XINT/RINT/INT2
          b        main             ; Do it again!
;======================================================================
; When an ADC interrupt is received it is first loaded into the data
; buffer, then the SFFT is calculated, and then the output of the SFFT
; is sent to the DAC, all before the next sample occurs.
;======================================================================
RINT      call     GetADC           ; Get ADC sample and put in delay buffer
          ldi      @0x80A000,R0     ; <-- Toggle Addr bus LSBs for ext signal
          call     SFFT             ; Calculate SFFT
          ldi      @0x80AFFF,R0     ; <-- Toggle Addr bus LSBs for ext signal
          call     Output           ; Output the result
          reti                      ;
XINT      reti                      ;
;=======================================================================
; The ADC data is read and buffered here
;=======================================================================
GetADC    ldi      @S0_rdata,R0     ; get ADC data
          lsh      16,R0            ; Sign extend ADC data
          ash      -16,R0           ;
          float    R0,R0            ; Convert the ADC data to float
          ldi      @CircAddr,AR0    ; Load present circ buf address
          ldf      *AR0,R7          ; Apply K2 compensation for bin stability
          mpyf     @K2,R7           ;  (see text)
          .if      CIRC_EN          ;
          ldi      N,BK             ; Use a circular buffer size of 2^N
          stf      R0,*AR0++(1)%    ; Save X[0] in present position
          .else                     ;
          stf      R0,*AR0++        ; Use a circular buffer of any size
          cmpi     @BUFEND,AR0      ; and place in any location
          ldige    @BUFSTART,AR0    ;
          .endif                    ;
          subrf    R0,R7            ; R7 =  X[-N] - X[0]
          sti      AR0,@CircAddr    ; save new 'circular' modified ptr
          stf      R7,@NewMnsOld    ;
          rets                      ;
;======================================================================
; The SFFT is calculated here
;======================================================================
SFFT      ldi      @Tbase,AR0              ; R/I twiddle ptr
          ldi      @Bbase,AR1              ; R/I SFFT array ptr
          ldi      @Bbase,AR2              ; SFFT output (usualy in place)
          ldi      RIBINSIZE,IR0           ; Size of R/I pair in array
          ldf      @NewMnsOld,R7           ;
          ldi      (N/2)-1,RC              ;  256/2-1 = 127
          ;--------------------------------
          ldf      *+AR1(DR),R4            ; Scale REAL[0] back by 50%
          mpyf     -0.5,R4                 ; (see text)
          ldf      0.0,R5                  ;
          ;--------------------------------
          mpyf3 *+AR0(TR),*+AR1(DR)    ,R0 ; TR*DR <- unrolled from main loop
          rptb  EndSFFT                    ;
          ;--------------------------------
Loop      mpyf3 *+AR0(TR)  ,*+AR1(DI)  ,R1 ; TR*DI
          mpyf3 *+AR0(TI)  ,*+AR1(DI)  ,R0 ; TI*DI
       || addf3 R7,R0                  ,R3 ; (TR*DR + DELTA)
          mpyf3 *+AR0(TI)  ,*+AR1(DR)  ,R0 ; TI*DR
       || subf3 R0,R3                  ,R3 ; TR*DR - TI*DI + DELTA
          mpyf3 *++AR0(IR0),*++AR1(IR0),R0 ; TR*DR
       || addf3 R1,R0                  ,R2 ; TR*DI + TI*DR
          stf   R2,*+AR2(DI)               ; Save the new Fbin values
       || stf   R3,*AR2++(IR0)             ; NOTE: Use only registers in next
          ;- - - - - - - - - - - - - - - -         to avoid data conflict
EndSFFT   .if   REAL_EN                    ;
          addf  R3,R4                      ; Sum all REAL values
          .endif                           ;
          .if   IMAG_EN                    ;
          addf  R2,R5                      ; Sum all IMAG values
          .endif                           ;
          ;-----------------------------------------------
          ; If the band pass filter is enabled, call the
          ; filter function for either REAL or IMAG data.
          ;-----------------------------------------------
          .if      BPF_EN                  ;
          .if      REAL_EN                 ;
          ldi      0,R0                    ;
          call     BPFilter                ;
          ldf      R0,R4                   ;
          .endif                           ;
          .if      IMAG_EN                 ;
          ldi      1,R0                    ;
          call     BPFilter                ;
          ldf      R0,R5                   ;
          .endif                           ;
          .endif                           ;
          ;-----------------------------------------------
          ; When the SFFT is finished, scale the results
          ; such that the output magnitude is the same
          ; as the input magnitude
          ;-----------------------------------------------
ExitSFFT  mpyf     @Scale,R4               ; inverse of N/2 growth
          mpyf     @Scale,R5               ;
          mpyf     @REAL_VEC,R4            ;
          mpyf     @IMAG_VEC,R5            ;
          stf      R4,@REALSUM             ;
          stf      R5,@IMAGSUM             ;
          rets                             ;
;========================================================================
; BPFilter calculates the sum of either the REAL or IMAG frequency bins
; using selected bandpass filter coefficients.  IMAG summation is selected
; if the value passed to the function in R0 is 1, otherwise set R0 = 0
; Also note the use of RPTS in the MPY/ACCU summation loop.  Since the
; device fetches the following opcode only one time, the current code block
; can be used for coefficients or data without bus conflict.
;========================================================================
BPFilter  ldi      RIBINSIZE,IR0           ; Step size of R/I pairs
          ldi      2,BK                    ; Size of coefficient table
          ldi      @CM1,AR0                ; Base address of coefficient
          ldi      @Bbase,AR1              ; Base address of SFFT data
          addi     START_BIN*2,AR1         ; Point to start bin
          addi     R0,AR1                  ; add IMAG offset of 1 if selected
          ;-   -   -   -   -   -   -   -   ;
          ldf      0,R0                    ;
          ldf      0.5,R3                  ; In all cases, the start and end
          mpyf3    *AR1++(IR0),R3,R2       ; bins are scaled back by 50%
          mpyf     *AR0++(1)%,R2           ;
          ;-   -   -   -   -   -   -   -   ;
          rpts     BPF_LENG-1              ; RPTS allows no conflict single
          mpyf3   *AR0++(1)%,*AR1++(IR0),R0; cycle access to coefficents
       || addf3    R0,R2,R2                ; stored in the same RAM block as
          addf     R0,R2                   ; the code
          ;-   -   -   -   -   -   -   -   ;
          mpyf     *AR0,R3                 ; Scale back end value by 50%
          mpyf     *AR1,R3                 ;
          addf3    R3,R2,R0                ;
          rets                             ;
          ;-------------------------------------------------------------
          ; The following is the coefficients that are used in the BPF
          ; Note that since the coefficients are accessed using circular
          ; addressing, the starting address is modified using the .brstart
          ; directive to ensure the correct address boundary.
          ;-------------------------------------------------------------
          .brstart "COEF",4                ; Circular addressed data needs to
CM1v      .float    1                      ; start on the 2^N boundary above
          .float   -1                      ; equal to the buffer length
CM1       .word    CM1v                    ;

;========================================================================
; The output section is written for both Spectrum analyzer output
; as well as REAL/IMAG filter sum outputs
;========================================================================
Output:   ; REAL/IMAG sum output
          ;-------------------------
          .if      SPECT_EN=0       ; If SPECT_EN=0 (disable) output either
          ldf      0,R0             ;
          .if      REAL_EN          ;
          addf     @REALSUM,R0      ; Output REAL/IMAG bin sum
          .endif                    ;
          .if      IMAG_EN          ;
          addf     @IMAGSUM,R0      ;
          .endif                    ;
          b        Out              ;
          .endif                    ;
          ;----------------------------------------------------
          ; The Spectrum analyzer output section is bypassed
          ; if the spectrum analyzer is not enabled
          ;----------------------------------------------------
          ldf      @OutBin,R0       ; Point to next output bin
          addf     1.0/RATE,R0      ; increment, wrap around if at end
          cmpf     N/2,R0           ; >> Note:
          ldfge    0,R0             ; >> A floating point add followed by a
          stf      R0,@OutBin       ; >> FIX operation provides an easy way
          fix      R0,R0            ; >> to interpolate data arrays using a
          bzd      Out              ; >> fractional sub-step.
          mpyi     RIBINSIZE,R0     ; >> DSK_SG.EXE also uses this trick
          ldfz     @MAX,R0          ; If at base Fbin 0 Hz, output a synch
          ldi      @Bbase,AR0       ;
          subi     2,AR0            ; point to previous bin
          addi     R0,AR0           ;
          ;- - - - - - - - - - - - -
          ldf      *+AR0(DI+0),R0   ; Perform convolutional window filter
       || ldf      *+AR0(DR+0),R2   ; on the R/I pairs for this output
          addf     *+AR0(DI+4),R0   ;
          addf     *+AR0(DR+4),R2   ;
          mpyf     -0.5,R0          ; Scaling coefficient for -1,+1 bins
          mpyf     -0.5,R2          ;
          addf     *+AR0(DI+2),R0   ;
          addf     *+AR0(DR+2),R2   ;
          ;- - - - - - - - - - - - -
          mpyf     R0,R0            ; Calculate REAL^2 + IMAG^2 magnitude
          mpyf     R2,R2            ;
          addf     R2,R0            ;
          call     FLOG2            ; Convert to log2(), then scale
          mpyf     32,R0            ; and shift for best display
          mpyf     32,R0            ;
          subf     @MAX,R0          ;
          ;- - - - - - - - - - - - -
Out       cmpf     @MAX,R0          ; Clip value if out of DAC range
          ldfgt    @MAX,R0          ;
          cmpf     @MIN,R0          ;
          ldflt    @MIN,R0          ;
          fix      R0,R0            ; Convert to integer DAC output
          andn     3,R0             ; Clear accidental request for 2 ndy xmit
          sti      R0,@S0_xdata     ; Output DAC value to serial port
          rets                      ;
;====================================================;
; FLOG2() Ultra Fast LOG2 FUNCTION                   ;
; computes log2(R0) and returns float value in R0    ;
;====================================================;
FLOG2:    cmpf     0.0,R0           ; Exit if value is <= Zero
          ldfle    -1,R0            ; if x<=0 return -1 (error)
          retsle                    ; return if X<=0
          lsh      1,R0             ; Concatenate mantissa to exponent
          pushf    R0               ; Convert 'fast log' to int, then float
          pop      R0               ; At this point result is 2^24 to large
          float    R0,R0            ;
          mpyf     @FLOG2SC,R0      ; Mpy by scale factor
          rets                      ;
;=====================================================;
; The startup stub is used during initialization only ;
; and can be safely overwritten by the stack or data  ;
; The stack can also be moved here but can cause      ;
; trouble while debugging                             ;
;=====================================================;
          .entry   ST_STUB          ; Debugger starts here
ST_STUB   ldp      T0_ctrl          ; Use kernel data page and stack
          ldi      @stack,SP        ;
          ldi      0,R0             ; Halt TIM0 & TIM1
          sti      R0,@T0_ctrl      ;
          sti      R0,@T0_count     ; Set counts to 0
          ldi      TIM0_prd,R0      ; Set period
          sti      R0,@T0_prd       ;
          ldi      0x2C1,R0         ; Restart both timers
          sti      R0,@T0_ctrl      ;
          ;---------------------
          ldi      @S0xctrl,R0      ;
          sti      R0,@S0_xctrl     ; transmit control
          ldi      @S0rctrl,R0      ;
          sti      R0,@S0_rctrl     ; receive control
          ldi      0,R0             ;
          sti      R0,@S0_xdata     ; DXR data value
          ldi      @S0gctrl,R0      ; Setup serial port
          sti      R0,@S0_gctrl     ; global control
;======================================================;
; This section of code initializes the AIC             ;
;======================================================;
AIC_INIT  LDI      0x10,IE          ; Enable only XINT interrupt
          andn     0x34,IF          ;
          ldi      0,R0             ;
          sti      R0,@S0_xdata     ;
          RPTS     0x040            ;
          LDI      2,IOF            ; XF0=0 resets AIC
          rpts     0x40             ;
          LDI      6,IOF            ; XF0=1 runs AIC
          ;---------------------
          ldi      @C_REG,R0        ; Setup control register
          call     prog_AIC         ;
          ldi      0xfffc  ,R0      ; Program the AIC to be real slow
          call     prog_AIC         ;
          ldi      0xfffc|2,R0      ;
          call     prog_AIC         ;
          ldi      @B_REG,R0        ; Bump up the Fs to final rate
          call     prog_AIC         ; (smallest diIor should be last)
          ldi      @A_REG,R0        ;
          call     prog_AIC         ;
          b        main             ; the DRR before going to the main loop
;======================================================;
; prog_AIC is used to write new timing configurations  ;
; to the AIC.  If you single step this routine, the AIC;
; will not be programmed properly and will likely crash;
;                                                      ;
; STEP OVER THIS ROUTINE USING THE F10 FUNCTION STEP   ;
;======================================================;
prog_AIC  ldi      @S0_xdata,R1     ; Use original DXR data during 2 ndy
          sti      R1,@S0_xdata     ;
          idle                      ;
          ldi      @S0_xdata,R1     ; Use original DXR data during 2 ndy
          or       3,R1             ; Request 2 ndy XMIT
          sti      R1,@S0_xdata     ;
          idle                      ;
          sti      R0,@S0_xdata     ; Send register value
          idle                      ;
          andn     3,R1             ;
          sti      R1,@S0_xdata     ; Leave with original safe value in DXR
          ;---------------------
          ldi      @S0_rdata,R0     ; Fix the receiver underrun by reading
          rets                      ;
;========================================================;
; By placing the stack at the end of the users runtime   ;
; code, maximum space is made available for applications.;
; This line can be moved to the front of the init stub   ;
; but is placed here to keep the init stub intact for    ;
; debug purposes                                         ;
;========================================================;
stack     .word    stack            ; Put stack at end of code
         ; DO NOT TO PUT 'stack' IN THE VECTOR SPACE!
;========================================================;
; Install the XINT/RINT ISR branch vectors               ;
;========================================================;
          .start   "SP0VECTS",0x809FC5
          .sect    "SP0VECTS"
          B        XINT             ; XINT0
          B        RINT             ; RINT0
