;*********************************************************************
;
; Filename:  VITERBI.ASM
;
;
; Author:  Chris Buehler
; Date: 11/21/95
; Last revision:  12/21/95
;
;*********************************************************************
;
; Description:  Contains the functions to implement the full viterbi
;		decoder for the 16-state code in V.34.  The algorithm
;		is split into 2 parts, 8 states each, to spread the
;		computations more evenly over the execution of the
;		program.  However, the algorithm can execute easily
;		within one sampling period at 7200 Hz.
;		important functions:
;		1 quantize4            - quantizes points to 4 subsets
;		2 calculate_errors     - calculates 2D errors
;		3 calc_branch_errors   - calculates 4D errors
;		4 update_path_metrics  - updates trellis path metrics
;		5 trace_back           - traces trellis path back
;		6 upm_finish           - cleans up after execution
;
;
; Algorithm:  The algorithm seeks to choose a trellis path with the smallest
;	      possible error.  At each iteration it updates the current
;	      path errors (update_path_metrics) and then selects the one
;	      with the smallest metric.  It then follows the path to its
;	      oldest state (trace_back) and outputs the appropriate point.
;	      A more complete description of the algorithm can be found in
;	      our project submission paper or in standard references on
;	      the viterbi maximum-likelihood algorithm.
;
;*********************************************************************
;
; Usage:  Variables must be set properly before calling update_path_metrics.
;	  before calling the first time (1st 8 states):
;	    all errors should be calculated with quantize4, calculate_errors
;		and calc_branch_errors
;	    cmm must be set to 0x03FFFFFFF (maximum)
;	    stateptr must be initialized to the start ot the states table
;	    vt_cpm_curr must point to the current path metrics array
;	    pptr must point to the current path pointer array
;
;	  before calling the second time (2nd 8 states):
;	    cmm must be left unchanged
;	    stateptr must be initialized to the middle of the states table
;	    vt_cpm_curr must be set to the middle of the path metrics array
;	    pptr must point to the middle of the path pointer array
;
;	  After calling update_path_metrics twice, upm_finish must be
;	  called to set up pointers for the next iteration.
;	  Then, trace_back can be called to walk through the path pointer
;	  matrix to find the most likely 4D point.
;
;
;
; Inputs:  quantize4
;	      ytx,yty, address of x and y arrays
;	   calculate_errors
;	      x1,x2,y1,y2,ytx,yty
;	   calc_branch_errors
;	      error1, error2
;	   update_path_metrics
;	      vt_cpm_prev,b_err,pptr
;	   trace_back
;	      vt_ppp
;	   upm_finish
;	      vt_cpm_curr,pptr
;
;
; Outputs: quantize4
;	     x1,x2,y1,y2
;	   calculate_errors
;	     error1,error2
;	   calc_branch_errors
;	     b_err
;	   update_path_metrics
;	     vt_cpm_curr
;	   trace_back
;	     yx,yy
;	   upm_finish
;	     vt_cpm_prev
;
;*********************************************************************
  .mmregs
  .include macros.inc

  ; functions
  .global quantize4,calculate_errors,calc_branch_errors
  .global update_path_metrics, viterbi_fill, trace_back
  .global get_new_point, upm_finish

  ; variables
  .global recvdata1
  .global parm0,parm1,parm2,parm3
  .global saveAR0,saveAR1,saveAR2,saveAR4,saveAR6
  .global ytx, yty, x1, y1, x2, y2, yx, yy
  .global error1, error2, bptr, b_err
  .global states, vt_cpm_curr, vt_cpm_prev
  .global vt_ppp0, vt_ppp15
  .global minstate, cmm
  .global m,oldpptr,pptr,pend,bpts,bend,vfcount,stateptr

  .text

; quantize4
;   assumes parm0 contains address of x array
;           parm1 contains address of y array
;           AR0 is current upon entry into quantize4

quantize4:
;//  int tx,ty, diffx,diffy;
;//  tx = temp0, ty = temp1, diffx = temp2, diffy = temp3

;//  subtract 1 for subset 0
;//  tx = ytx-128;
;//  round to nearest multiple of 4 , still 9:7
  ldp	#0
  splk	#4,INDX
  ldpk	recvdata1
  sar	AR0,saveAR0
  sar	AR1,saveAR1
  lar	AR0,parm0
  lar	AR1,parm1
  lacc	ytx
  sub	#128

;//  if(tx < 0)
;//  {
;//    tx = (labs(tx) + 256);
;//    tx &= 0xFFFFFE00;
;//    tx = -tx;
;//  }
;//  else
;//  {
;//    tx = (labs(tx) + 256);
;//    tx &= 0xFFFFFE00;
;//  }
  bcnd	q4xpos,GT
  abs
  add	#256
  and	#0FE00h
  neg
  b     q4do_ycoord
q4xpos:
  add	#256
  and	#0FE00h
q4do_ycoord:
;//  tx += 128;
  add	#128    			; ACC = tx
  sacl	temp0
;//  diffx = ytx-tx;
  neg
  add	ytx        			; ACC = diffx
  sacl	temp2

;//  ty = yty-128;
  lacc	yty
  sub	#128

;//  if(ty < 0)
;//  {
;//    ty = (labs(ty) + 256);
;//    ty &= 0xFFFFFE00;
;//    ty = -ty;
;//  }
;//  else
;//  {
;//    ty = (labs(ty) + 256);
;//    ty &= 0xFFFFFE00;
;//  }
  bcnd	q4ypos,GT
  abs
  add	#256
  and	#0FE00h
  neg
  b     q4do_rest
q4ypos:
  add	#256
  and	#0FE00h
q4do_rest:

;//  ty += 128;
  add	#128  				; ACC = ty
  sacl	temp1
;//  diffy = yty-ty;
  neg
  add	yty				; ACC = diffy
  sacl	temp3

;  // calculating 0 subset done, figure out rest as offset from 0
;  // know these already
;//  AR0[0] = tx;
;//  AR0[1] = tx;
  lacc	temp0
  sacl  *+
  sacl	*+

;//  if(diffx < 0)
;//  {
;//    AR0[3] = tx-256;
;//    AR0[2] = tx-256;
;//  }
;//  else
;//  {
;//    AR0[3] = tx+256;
;//    AR0[2] = tx+256;
;//  }
  lacc	temp2
  bcnd	q4diffx_pos,GEQ
  lacc	temp0
  sub	#256
  sacl	*+
  sacl	*,AR1
  b	q4calc_ys
q4diffx_pos:
  lacc	temp0
  add	#256
  sacl	*+
  sacl	*,AR1
q4calc_ys:
;//  AR1[0] = ty;
  lacc	temp1
  sacl	*+

;//  if(diffy < 0)
;//  {
;//    AR1[1] = ty-256;
;//    AR1[2] = ty-256;
;//  }
;//  else
;//  {
;//    AR1[1] = ty+256;
;//    AR1[2] = ty+256;
;//  }
  lacc	temp3
  bcnd	q4diffy_pos,GEQ
  lacc	temp1
  sub	#256
  sacl	*+
  sacl	*+
  b	q4done
q4diffy_pos:
  lacc	temp1
  add	#256
  sacl	*+
  sacl	*+
q4done:

;//  AR1[3] = ty;
  lacc	temp1
  sacl	*

  lar	AR0,saveAR0
  lar	AR1,saveAR1
  ret

calculate_errors:
;// calc error for current yt inputs
;//  error is stored 6:10
;//void calculate_errors(void)
;//{
;//  int i;
;//  long temp;

;//  for(i = 0; i < 4; i++)
;//  {
;//    temp = long(AR0[i]-ytx)*long(AR0[i]-ytx)
;//	   + long(AR1[i]-yty)*long(AR1[i]-yty);
;//    AR2[i] = (temp>>4);
;//  }
;//}
  ldpk	recvdata1
  sar	AR0,saveAR0
  sar	AR1,saveAR1
  sar	AR2,saveAR2
  lar	AR0,parm0
  lar	AR1,parm1
  lar	AR2,parm2

  ldp	#0
  splk	#3,BRCR
  ldpk	recvdata1
  mar	*,AR0
  rptb	calc_err_loop-1
    lacc  *+,AR1
    sub	  ytx
    sacl  temp0
    lacc  *+,AR2
    sub	  yty
    sacl  temp1
    zap
    sqra  temp0
    sqra  temp1
    apac
    bsar  4
    sacl  *+,AR0
calc_err_loop:
  ret

; assumes bptr points to array of 4D subset points for the current
;   4D symbol
calc_branch_errors:
  ldpk	recvdata1
  sar	AR0,saveAR0
  lar	AR0,bptr
  mar	*,AR0

;  // subset 0  (0,0) vs. (2,2)
;//  if(error1[0]+error2[0]-(error1[2]+error2[2]) < 0)
;//  {
;//    b_err[0] = error1[0]+error2[0];
;//    bx1[vcurrent][0] = x1[0];
;//    by1[vcurrent][0] = y1[0];
;//    bx2[vcurrent][0] = x2[0];
;//    by2[vcurrent][0] = y2[0];
;//  }
;//  else
;//  {
;//    b_err[0] = error1[2]+error2[2];
;//    bx1[vcurrent][0] = x1[2];
;//    by1[vcurrent][0] = y1[2];
;//    bx2[vcurrent][0] = x2[2];
;//    by2[vcurrent][0] = y2[2];
;//  }
  lacc	error1+0
  add	error2+0
  sub	error1+2
  sub	error2+2
  bcnd	subset0, GEQ
  lacc	error1+0
  add	error2+0
  sacl	b_err+0
  lacc	x1+0
  sacl  *+
  lacc	y1+0
  sacl	*+
  lacc	x2+0
  sacl	*+
  lacc	y2+0
  sacl	*+
  b     endsubset0
subset0:
  lacc	error1+2
  add	error2+2
  sacl	b_err+0
  lacc	x1+2
  sacl  *+
  lacc	y1+2
  sacl	*+
  lacc	x2+2
  sacl	*+
  lacc	y2+2
  sacl	*+
endsubset0:

;  // subset 1  (0,1) vs. (2,3)
  lacc	error1+0
  add	error2+1
  sub	error1+2
  sub	error2+3
  bcnd	subset1, GEQ
  lacc	error1+0
  add	error2+1
  sacl	b_err+1
  lacc	x1+0
  sacl  *+
  lacc	y1+0
  sacl	*+
  lacc	x2+1
  sacl	*+
  lacc	y2+1
  sacl	*+
  b     endsubset1
subset1:
  lacc	error1+2
  add	error2+3
  sacl	b_err+1
  lacc	x1+2
  sacl  *+
  lacc	y1+2
  sacl	*+
  lacc	x2+3
  sacl	*+
  lacc	y2+3
  sacl	*+
endsubset1:

;  // subset 2  (0,2) vs. (2,0)
  lacc	error1+0
  add	error2+2
  sub	error1+2
  sub	error2+0
  bcnd	subset2, GEQ
  lacc	error1+0
  add	error2+2
  sacl	b_err+2
  lacc	x1+0
  sacl  *+
  lacc	y1+0
  sacl	*+
  lacc	x2+2
  sacl	*+
  lacc	y2+2
  sacl	*+
  b     endsubset2
subset2:
  lacc	error1+2
  add	error2+0
  sacl	b_err+2
  lacc	x1+2
  sacl  *+
  lacc	y1+2
  sacl	*+
  lacc	x2+0
  sacl	*+
  lacc	y2+0
  sacl	*+
endsubset2:

;  // subset 3  (0,3) vs. (2,1)
  lacc	error1+0
  add	error2+3
  sub	error1+2
  sub	error2+1
  bcnd	subset3, GEQ
  lacc	error1+0
  add	error2+3
  sacl	b_err+3
  lacc	x1+0
  sacl  *+
  lacc	y1+0
  sacl	*+
  lacc	x2+3
  sacl	*+
  lacc	y2+3
  sacl	*+
  b     endsubset3
subset3:
  lacc	error1+2
  add	error2+1
  sacl	b_err+3
  lacc	x1+2
  sacl  *+
  lacc	y1+2
  sacl	*+
  lacc	x2+1
  sacl	*+
  lacc	y2+1
  sacl	*+
endsubset3:

;  // subset 4  (1,1) vs. (3,3)
  lacc	error1+1
  add	error2+1
  sub	error1+3
  sub	error2+3
  bcnd	subset4, GEQ
  lacc	error1+1
  add	error2+1
  sacl	b_err+4
  lacc	x1+1
  sacl  *+
  lacc	y1+1
  sacl	*+
  lacc	x2+1
  sacl	*+
  lacc	y2+1
  sacl	*+
  b     endsubset4
subset4:
  lacc	error1+3
  add	error2+3
  sacl	b_err+4
  lacc	x1+3
  sacl  *+
  lacc	y1+3
  sacl	*+
  lacc	x2+3
  sacl	*+
  lacc	y2+3
  sacl	*+
endsubset4:

;  // subset 5  (1,2) vs. (3,0)
  lacc	error1+1
  add	error2+2
  sub	error1+3
  sub	error2+0
  bcnd	subset5, GEQ
  lacc	error1+1
  add	error2+2
  sacl	b_err+5
  lacc	x1+1
  sacl  *+
  lacc	y1+1
  sacl	*+
  lacc	x2+2
  sacl	*+
  lacc	y2+2
  sacl	*+
  b     endsubset5
subset5:
  lacc	error1+3
  add	error2+0
  sacl	b_err+5
  lacc	x1+3
  sacl  *+
  lacc	y1+3
  sacl	*+
  lacc	x2+0
  sacl	*+
  lacc	y2+0
  sacl	*+
endsubset5:

;  // subset 6  (1,3) vs. (3,1)
  lacc	error1+1
  add	error2+3
  sub	error1+3
  sub	error2+1
  bcnd	subset6, GEQ
  lacc	error1+1
  add	error2+3
  sacl	b_err+6
  lacc	x1+1
  sacl  *+
  lacc	y1+1
  sacl	*+
  lacc	x2+3
  sacl	*+
  lacc	y2+3
  sacl	*+
  b     endsubset6
subset6:
  lacc	error1+3
  add	error2+1
  sacl	b_err+6
  lacc	x1+3
  sacl  *+
  lacc	y1+3
  sacl	*+
  lacc	x2+1
  sacl	*+
  lacc	y2+1
  sacl	*+
endsubset6:

;  // subset 7  (1,0) vs. (3,2)
  lacc	error1+1
  add	error2+0
  sub	error1+3
  sub	error2+2
  bcnd	subset7, GEQ
  lacc	error1+1
  add	error2+0
  sacl	b_err+7
  lacc	x1+1
  sacl  *+
  lacc	y1+1
  sacl	*+
  lacc	x2+0
  sacl	*+
  lacc	y2+0
  sacl	*+
  b     endsubset7
subset7:
  lacc	error1+3
  add	error2+2
  sacl	b_err+7
  lacc	x1+3
  sacl  *+
  lacc	y1+3
  sacl	*+
  lacc	x2+2
  sacl	*+
  lacc	y2+2
  sacl	*+
endsubset7:
  ret


;  parameters:
;    pptr = pointer to current ppps
;
;    ACCB = minmetric
;    temp2 = minst|minb
;    temp4 = outer loop counter
;
;    AR0 = states tables
;    AR1 = current cpms
;    AR2 = current ppps
;    AR4 = scratch register
;    AR6 = scratch register

update_path_metrics:
  ldpk	recvdata1
  sar	AR0,saveAR0
  sar	AR1,saveAR1
  sar	AR2,saveAR2
  sar	AR4,saveAR4
  sar	AR6,saveAR6

  lar	AR0,stateptr
  lar	AR1,vt_cpm_curr
  lar	AR2,pptr
  splk 	#8,temp4 			; set temp4 for 1/2 viterbi
  clrc	SXM
;//  for(i=0; i < 16; i++)
;//  {
;//    minmetric = 0xFFFFFFFFL;
upm_outer:
  lacc	#07FFFh,15			; save minmetric in ACCB
  or	#0FFFFh
  sacb

;//    for(j=0; j < 4; j++)
;//    {
  ldp	#0
  splk	#3,BRCR
  ldpk	recvdata1
  mar	*,AR0
  rptb	upm_inner_end-1

;//      prevstate = (states[i][j]>>3)&0xF;
    lacc  *                             ; get prevstate|branch (4:3)
    bsar  3				; mask out prevstate
    sfl					; multiply by 2 (long addressing)
    add	  #1 				;  add 1 for addressof high word
    adds  vt_cpm_prev			; build address
    samm  AR6				;   to store in AR6

;//      branch = states[i][j]&0x7;
    lacc  *,AR6				; get prevstate|branch (4:3)
    and	  #7                            ; mask out branch
    add   #b_err			; build address
    samm  AR4				;   to store in AR4

;//      metric = vt_cpm[(vcurrent-1)&0xF][prevstate];
;//      metric = metric + b_err[branch];
    lacc  *-,16	        		; load previous state metric (hi)
    or    *,AR4				;   (lo)
    add   *,AR0				; add branch error

;//      if(metric < minmetric)
;//      {
;//        minmetric = metric;
;//        minst = prevstate;
;//        minb = branch;
;//      }
    crlt
    bcnd  upm_no_update,NC		; if C == 0, ACC > ACCB
    lacc  *				; get prevstate|branch (4:3)
    sacl  temp2				; save them
upm_no_update:
    mar	  *+
;//   }  [end of inner loop]
upm_inner_end:

;//    vt_cpm[vcurrent][i] = minmetric;
  mar  *,AR1
  lacb
  sacl *+
  sach *+,AR2


  lacc  temp2				; get minst|minb
  bsar  3				; mask out minst
  sfl
  add	oldpptr			; know the offset is into 15th
upm_ppp_done:
  sacl	*+      			; store min state address
  lacc	temp2,2				; get minst|minb
  and	#28				; mask out minb
  add	bptr
  sacl	*+				; store min branch pts address

;//    if(minmetric < curr_min_metric)  -or- (minmetric-curr_min_metric <0)
;//    {
;//      curr_min_metric = minmetric;
;//      minstate = i;
;//    }
  lacb
  sub	cmm+1,16
  sub	cmm
  bcnd	upm_outer_end,GEQ
  lacb
  sacl	cmm
  sach	cmm+1
  lamm  AR2
  sub	#2
  sacl  minstate
;//  }
upm_outer_end:
  lacc	temp4
  sub	#1
  sacl	temp4
  bcnd  upm_outer,NEQ

  lar	AR0,saveAR0
  lar	AR1,saveAR1
  lar	AR2,saveAR2
  lar	AR4,saveAR4
  lar	AR6,saveAR6
  setc	SXM
;//}
  ret

upm_finish:
  ldp	#0
  splk	#15,BRCR
  ldpk	recvdata1
  sar	AR1,saveAR1
  sar	AR4,saveAR4
  mar	*,AR1
  lacl	vt_cpm_curr			; sub 16 for 1/2 viterbi
  sub	#16
  sacl  vt_cpm_curr
  lar	AR1,vt_cpm_curr
  lar	AR4,vt_cpm_curr
  rptb	upm_sub_cmms-1
    lacc  *+
    sacb
    lacc  *+,16,AR4
    orb
    sub	  cmm+1,16
    sub	  cmm
    sacl  *+
    sach  *+,AR1
upm_sub_cmms:

  lacl	vt_cpm_curr			; swaps the previous and
  sacb					;   current path metric
  lacl	vt_cpm_prev			;   arrays
  sacl	vt_cpm_curr
  lacb
  sacl	vt_cpm_prev

  ; update pointer to branch points array
  lacl	bptr
  add	#32
  sacl	bptr
  sub	#bend
  bcnd  no_reset_bptr,LT
  splk	#bpts,bptr
no_reset_bptr:

  ; update pointer to previous paths array
  lacl	pptr
  sub	#16				; sub 16 for 1/2 viterbi
  sacl	oldpptr
  add	#32
  sacl	pptr
  sub   #pend
  bcnd	no_reset_pptr,LT
  splk	#vt_ppp0,pptr
no_reset_pptr:

  lar	AR1,saveAR1
  lar	AR4,saveAR4
  ret


trace_back:
;//  int i, nextstate, branch,lastbaud;
  ldpk	recvdata1
  sar	AR0,saveAR0

;//  nextstate = minstate;
  lar	AR0,minstate			; load address of current min state
  mar	*,AR0

    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
    lar	AR0 ,*
;//  branch = vt_pts[(vcurrent-15)&0xF][nextstate];
;//  lastbaud = (vcurrent-15)&0xF;
;//  yx[0] = bx1[lastbaud][branch];
;//  yy[0] = by1[lastbaud][branch];
;//  yx[1] = bx2[lastbaud][branch];
;//  yy[1] = by2[lastbaud][branch];
  mar	*+
  lar	AR0,*
  lacc	*+
  sacl	yx

  lacc	*+
  sacl	yy

  lacc	*+
  sacl	yx+1

  lacc	*+
  sacl	yy+1
  lar	AR0,saveAR0
  ret





