;;; -*- Base: 10; Syntax: Common-Lisp; Package: cmi; Patch-File: Yes; -*- 

(in-package 'cmi)

;;
;;; Plug for io-shuffle
;;;
(defun twuffle-internal-io-transpose-always
    (address length transposition-type column-major-p)
  (assert (or (= transposition-type 2) ;cmfs:io-read-from-serial-data
	      (= transposition-type 3) ;cmfs:io-write-to-serial-data
	      ))
  (assert (= 1 (logcount length)))
  (let* ((geometry-id          (cm:vp-set-geometry cm:*current-vp-set*))
	 (geometry-serial-number (cm:geometry-serial-number geometry-id))
	 (cache-index (cached-io-twuffles
			geometry-serial-number length transposition-type column-major-p))
	 (geometry-rank (geometry-id-rank geometry-id)))
    ;;
    ;; Here's the patch, this assertion is bogus.
    ;;(assert (<= 1 geometry-rank 2) nil "Geometry must be rank 1 or 2, not ~d" geometry-rank)
    ;;
    (let ((twuffle-program
	   (if (>= cache-index 0)
	       (aref *io-twuffle-cache* cache-index)
	       (let* (
		      (geometry-descriptors (geometry-id-descriptors geometry-id))
		      (data-bits (log2 length))
		      (vp-bits   (log2 (geometry-id-vp-ratio geometry-id)))
		      (permutation-length (+ cm:*physical-processors-length* data-bits vp-bits))
		      (transpose-permutation
		       (let* ((permutation
			       (if (= 1 geometry-rank)
				   ;; 1d-geometry
				   (make-1d-io-shuffle-permutation
				     cm:*physical-processors-length*
				     data-bits
				     (log2 (axis-descriptor-length (aref geometry-descriptors 0))))
				   (make-nd-io-shuffle-permutation-from-geometry
				     data-bits
				     geometry-id
				     column-major-p))
				))
			 (cond ((eq (axis-descriptor-ordering
				      (aref geometry-descriptors 0)) :framebuffer-order)
				;; if one of the axes is framebuffer order,
				;; then both must be framebuffer order.
				(assert
				  (eq (axis-descriptor-ordering
					(aref geometry-descriptors 1)) :framebuffer-order))
				(let* (
				       (x-off-chip-bits
					(axis-descriptor-off-chip-bits (aref geometry-descriptors 0)))
				       (y-off-chip-bits
					(axis-descriptor-off-chip-bits (aref geometry-descriptors 1)))
				       (x-on-chip-bits
					(axis-descriptor-on-chip-bits (aref geometry-descriptors 0)))
				       (y-on-chip-bits
					(axis-descriptor-on-chip-bits (aref geometry-descriptors 1)))
				       (x-vp-bits
					(log2 (axis-descriptor-vp-ratio (aref geometry-descriptors 0))))
				       (y-vp-bits
					(log2 (axis-descriptor-vp-ratio (aref geometry-descriptors 1))))
				       (fb-output-permutation
					(cmi::make-frame-buffer-shuffle-part-permutation
					  data-bits
					  x-off-chip-bits
					  y-off-chip-bits
					  x-on-chip-bits
					  y-on-chip-bits
					  x-vp-bits
					  y-vp-bits
					  ))
				       )
				  (if (= transposition-type 2) ;input
				      (compose-permutations
					permutation-length
					fb-output-permutation
					permutation
					)
				      (inverse-permutation ;output
					permutation-length
					(compose-permutations
					  permutation-length
					  fb-output-permutation
					  permutation
					  )))))
			       ((eq transposition-type 2) ;cmfs:io-read-from-serial-data
				permutation)
			       (t
				 (inverse-permutation permutation-length permutation))
			       )
			 ))
		      (axis-size-array
		       (let* ((array (make-array geometry-rank :element-type 'fixnum)))
			 (loop for i from 0 below geometry-rank
			       do (setf (aref array i)
					(axis-descriptor-off-chip-bits
					 (aref geometry-descriptors i))))
			 array))
		      (axis-start-array
		       (make-start-array geometry-rank axis-size-array))
		      (ungray-code-at-beginning-array
		       (let* ((array (make-array geometry-rank :element-type 'fixnum)))
			 (loop for i from 0 below geometry-rank
			       do (setf (aref array i)
					(if (and (= transposition-type 3)
						 (eq (axis-descriptor-ordering
						       (aref geometry-descriptors i)) :news-order)
						 )
					    1
					    0
					    )))
			 array))
		      (gray-code-at-end-array
		       (let* ((array (make-array geometry-rank :element-type 'fixnum)))
			 (loop for i from 0 below geometry-rank
			       do (setf (aref array i)
					(if (and (= transposition-type 2)
						 (eq (axis-descriptor-ordering
						       (aref geometry-descriptors i)) :news-order)
						 )
					    1
					    0
					    )))
			 array))
		      )
		 (cache-insert-io-twuffle
		   geometry-serial-number length transposition-type column-major-p 
		   (compile-twuffle
		     permutation-length
		     transpose-permutation
		     cm:*physical-processors-length*
		     geometry-rank
		     axis-size-array
		     axis-start-array
		     ungray-code-at-beginning-array
		     gray-code-at-end-array
		     (and (= geometry-rank 2)
			  ;; there is an optimization in place for column
			  ;; major only.  TYhis can be extended to row major
			  column-major-p
			  (if (= transposition-type 3)
			      (= 1 (aref ungray-code-at-beginning-array 0)
				 (aref ungray-code-at-beginning-array 1))
			      (= 1 (aref gray-code-at-end-array 0)
				 (aref gray-code-at-end-array 1))
			      )
			  ;; must be square
			  (= (axis-descriptor-length (aref geometry-descriptors 0))
			     (axis-descriptor-length (aref geometry-descriptors 1))
			     )
			  ;; must be balanced (default)
			  (>= 1
			      (- (axis-descriptor-off-chip-bits (aref geometry-descriptors 1))
				 (axis-descriptor-off-chip-bits (aref geometry-descriptors 0)))
			      0)
			  )
		     ))
		 ))))
      #+lc(declare (type (simple-array twuffle-instruction (*)) twuffle-program))
      (execute-twuffle-program address address length twuffle-program)
      )))

(cmi::increment-patch-level 1)
