mu/target/MSP430/kernel-multdiv.mu4

| This file is part of muforth: https://muforth.dev/
|
| Copyright 2002-2025 David Frech. (Read the LICENSE for details.)

__meta
hex
ram

loading MSP430 Kernel (multiply and divide: a work-in-progress)


| Unsigned multiply of two 16-bit values.
|
|   w  is loop counter
|   x  holds high half of result
|   y  holds low half of result, and also 16-bit multipli-er - m-er
| top  holds multipli-cand - m-cand

code um*  ( u1 u2 - udprod)
   y pop ( m-er)
label do-um*  ( needed by m* when m-er and m-cand have same sign)
   x clr ( prodhi)  #16 # w mov
   begin  1 # y bit  0!= if  top x add  then  x ror  y ror
          1 # w sub  0= until
   y push  ( prodlo)  x top mov  next  ;c


| Unsigned 32-bit by 16-bit divide.
|
|   w  is loop counter
|   x  holds bit 16 of shifted-left d-end
|   y  hold high part of d-end
|   z  holds low part of d-end
| top  holds d-sor

code um/mod  ( ud u - urem uquot)
   y pop  ( dendhi)  z pop  ( dendlo)  #16 # w mov
   begin  setc  z z addc  y y addc  x clr  0 # x addc
          top y sub  0 # x subc  ( carry = not borrow)
          u< if  top y add  1 # z bic  ( shift in 0, not 1)  then
          1 # w sub  0= until
   y push  z top mov  next  ;c


| Unsigned 31-bit by 16-bit divide. fig-FORTH style.
|
|   w  is loop counter
|   y  hold high part of d-end
|   z  holds low part of d-end
| top  holds d-sor

code um/mod-fig  ( ud u - urem uquot)
   y pop  ( dendhi)  z pop  ( dendlo)  #16 # w mov
   begin  setc  z z addc  y y addc  top y sub
          u< if  top y add  1 # z bic  ( shift in 0, not 1)  then
          1 # w sub  0= until
   y push  z top mov  next  ;c


| Signed multiply of two 16-bit values.
|
| This is trickier. We have to be careful. There are several cases:
|
| * Both m-er and m-cand are 0<. Negate both, and proceed as with um*.
| * Both m-er and m-cand are >=0. Proceed as with um*.
| * m-er and m-cand have different signs. We want the m-er to be <0. If
|   m-cand is <0, swap m-er and m-cand and proceed with "mixed" multiply:
|   15 iterations of "unsigned" add and shift, followed by one "signed" add
|   and shift. This is one instance when the MSP430's "natural borrow"
|   causes us trouble. When we go to shift the product to the right that one
|   last time, shifting in the carry on the left, because we did a subtract
|   rather than an add, the carry is inverted. We fix this at the end by
|   XORing top with 8000 hex.
|
| The following code is correct but is clumsy in several respects. One is
| that we had to implement the 0< tests awkwkardly because of a limitation of
| the instruction set: there is no "jump if not negative" instruction, only a
| "jump if negative", so we do 0< tests by jumping conditionally over a
| following unconditional jump. It works, but it's slower and longer than
| necessary. Sadly, there are several of these 0< tests in this code.
|
| The tests for the different cases could be structured differently.
|
| Swapping the m-cand and m-er is clumsy.
|
| And I'm not happy about the kludge with the last carry in. Will it ever
| be 0? Should I simply shift in a 1? We know the product is negative: the
| m-er and m-cand had different signs!
|
| One last note: This code pushes a "flag" value to indicate whether it
| took the "mixed" multiply route (-1) or the unsigned multiply route (0).
| Obviously, in the production code these flags will go away, but they helped
| track down a stupid bug: I was using "bit" (AND) rather than XOR to test
| whether the m-cand and m-er had the same sign. Go figure.
|
| Register use is like that of um*:
|
|   w  is loop counter
|   x  holds high half of result
|   y  holds low half of result, and also 16-bit multipli-er - m-er
| top  holds multipli-cand - m-cand

code m*  ( n1 n2 - dprod)
   y pop ( m-er)  top x mov  y x xor  0< if  ( different signs)  | -1 # push
      top tst  0< if  top w mov  y top mov  w y mov  ( exch)  then

      x clr ( prodhi)  #15 # w mov
      begin  1 # y bit  0!= if  top x add  then  x ror  y ror
             1 # w sub  0= until
      top x sub  x ror  y ror  y push  ( prodlo)  x top mov
      8000 # top xor ( complement last carry)  next
   then

   ( same sign)
   | 0 # push
   top tst  0< if  top neg  y neg  then  ( now both >=0)  do-um* j  ;c


| Another quick take on this. We want m-cand >= 0. So, let's approach it
| this way: if m-cand < 0, negate *both* m-er and m-cand. Now there are two
| cases:
|
| * m-er *was* < 0 and is now >= 0. Jump to um*.
| * m-er *was* >= 0 and is now < 0. Great. Do the "mixed" multiply. And
|   shift in a 1 at the end. ;-)

| Except that this fails. The problem is that we are assuming that if we
| negate a value that is < 0 we will always get a value that is >= 0. This
| fails because 2's complement has a singularity: the most negative number -
| 8000 hex in a 16-bit world - negates to itself. It *stays* negative.
|
| So our assumptions in the cases above that m-er has switched signs are
| wrong, and lead to incorrect results. Eg,
|
|   -7fff  8000 m*2 ==> 3fff_8000  | correct
|    8000 -7fff m*2 ==> c000_8000  | WRONG: should be 3fff_8000
|    8000  8000 m*2 ==> c000_0000  | WRONG: should be 4000_0000
|
| We need to check same/different signs, as in the first m* above, to get
| the correct answer. Which is unfortunate, since the approach in m*2 is
| simpler and faster. And cleverer. And wrong. ;-)

code m*2  ( n1 n2 - dprod)
   y pop ( m-er)  top tst  0< if  top neg  y neg  then
   y tst  do-um* 0< until  ( both >= 0: do unsigned multiply)

   x clr ( prodhi)  #15 # w mov
   begin  1 # y bit  0!= if  top x add  then  x ror  y ror
          1 # w sub  0= until
   top x sub  setc  x ror  y ror
   y push  ( prodlo)  x top mov  ( prodhi)  next  ;c


| Take three? Let's combine the two and see if we can't get something
| reasonable.

code m*3  ( n1 n2 - dprod)
   y pop ( m-er)  top x mov  y x xor  0< if  ( different signs)  | -1 # push
      top tst  0< if  top w mov  y top mov  w y mov  ( exch)  then

      x clr ( prodhi)  #15 # w mov
      begin  1 # y bit  0!= if  top x add  then  x ror  y ror
             1 # w sub  0= until
      top x sub  setc  x ror  y ror
      y push  ( prodlo)  x top mov  ( prodhi)  next
   then

   ( same sign)
   | 0 # push
   top tst  0< if  top neg  y neg  then  ( now both >=0 ... maybe ;-)
   do-um* j  ;c

| Take four? What if we just didn't care? m-cand m-er and both be signed.
| Let's see what happens.
|
| Register use is like that of um*:
|
|   w  is loop counter
|   x  holds high half of result
|   y  holds low half of result, and also 16-bit multipli-er - m-er
| top  holds multipli-cand - m-cand

code m*4  ( n1 n2 - dprod)
   y pop ( m-er)  x clr ( prodhi)  #15 # w mov
   begin  1 # y bit  0!= if  top x add  then  x ror  y ror
          1 # w sub  0= until
   1 # y bit  0!= if  top x sub  then  x ror  y ror
   y push  ( prodlo)  x top mov  ( prodhi)
   8000 # top xor ( complement last carry)  next  ;c

( Tests.)
meta: table  create  ;code  ( index - value)
                     top top add  w top add  top ) top mov  next  ;c

table mcand   0 , 1 , ffff , 8000 , 8001 , 7fff ,  ( six "interesting" multiplicands)

: d=  ( alo ahi blo bhi - f)  ( f is true if alo=blo and ahi=bhi)
   rot = >r  = r> and ;

( For fetching and storing a series of cells.)
code @+   ( a - w a+)  top )+ w mov  w push  next  ;c
code !+   ( w a - a+)  sp )+ top ) mov  2 # top add  next  ;c

| : @+   ( a - n a+)   dup @  swap cell+ ;
| : !+   ( n a - a+)  tuck !       cell+ ;

: 2!  ( lo hi addr)    !+ ! ;
: 2@  ( addr - lo hi)  @+ @ swap ;

code 2drop   2 # sp add  top pop  next  ;c

meta: 2variable    4 buffer ;  ( A 2variable is just a 4-byte buffer!)

2variable bench
variable testee

: test  ( 'code)  testee !
   6 0 do  i mcand
      6 0 do  dup  i mcand  mpy32.um* bench 2!
              dup  i mcand  testee @execute  bench 2@ d=
              0= if  i mcand  bug  drop  then
      loop  drop  loop ;

{
: clamp16   ffff and ;
: clamp32   ffff_ffff and ;
: >lohi  ( w32 - lo16 hi16)  dup clamp16  swap #16 >>  clamp16 ;
: lohi>  ( lo16 hi16 - w32)  #16 <<  + ;
: sext16  dup 8000 and 2* - ;

: table  create  does>  ( index body - value)  swap cells + @ ;

( Table of unsigned multiplicands.)
table mcand   0 , 1 , ffff , 8000 , 8001 , 7fff ,  ( six "interesting" multiplicands)

variable testee
2variable bench
meta
: um*-test  ( 'code)  testee !
   6 0 do
      6 0 do
         cr  j mcand  dup u.  i mcand dup u.  ." : "
         2dup  *  clamp32  dup u.  bench !
         testee @ remote  lohi>  dup u.
         bench @  = if ." OK" else ." FAIL" then
      loop
   loop ;

: m*-test  ( 'code)  testee !
   6 0 do
      6 0 do
         cr  j mcand  dup u. sext16  i mcand dup u. sext16  ." : "
         2dup  *  clamp32  dup .  bench !
         testee @ remote  lohi>  dup .
         bench @  = if ." OK" else ." FAIL" then
      loop
   loop ;

forth
: d=  ( alo ahi blo bhi - f)  ( f is true if alo=blo and ahi=bhi)
   rot = >r  = r> and ;

: uorder  ( x y - lesser greater)  2dup u< if ^ then  swap ;
: order   ( x y - lesser greater)  2dup  < if ^ then  swap ;

: .h16  <#  4#  #>  type space ;
: .h32  <#  4#  char _ hold  4#  #>  type space ;

: host-u*/   ( offset mcand1 mcand2 - q r)  push  r@ * +  pop u/mod ;

2variable mcands
meta
: u/test  ( 'code)  testee !
   radix preserve  hex
   6 1 do
      6 1 do
         i mcand  j mcand  uorder  2dup mcands 2!
         cr  over .h16 ." * "  .h16 ." + "  1- .h16 ." => "
         mcands 2@  over 1- -rot  host-u*/         over .h16  dup .h16  bench 2!  ." , "
         mcands 2@  over 1- -rot  testee @ remote  over .h16  dup .h16  bench 2@
         d= if ." OK" else ." FAIL" then
      loop
   loop ;
forth

now nope is .regs
}

code d+  ( d1 d2 - dsum)
   x pop ( d2lo)  y pop ( d1hi)  sp )+ x add  ( d1lo + d2lo)  x push
   y top addc ( d1hi + d2hi + carry)  next  ;c

: u*/  ( offset mcand1 mcand2)  >r  r@ um*  rot 0 d+  r> um/mod ;