_32-BIT FLOATING-POINT MATH_
by Al Williams



[LISTING ONE]

;##########################################################
;# File: FPM.ASM                                          #
;# 386 Floating point package by Al Williams              #
;##########################################################
.MODEL         SMALL,C

; enable 386 instructions
.386
.DATA

; Number format
FPM_NUM        STRUC
SIGN           DB     ?            ; 0 for + FF for -
SCALE          DB     ?            ; position of decimal point
NUM            DD     ?            ; unsigned magnitude
FPM_NUM        ENDS

.CODE
; C  routines
PUBLIC         faddxy,fsubxy,fmulxy,fdivxy,bitscan

;************************************************
; Helper for C routines bitscan(dir,lword)
; if dir==0 do BSF on num
; if dir!=0 do BSR on num
; Returns index 1-32 or 0 if word was zero
bitscan        PROC
               ARG    DIR:WORD, LWORD:DWORD
               MOV    AX,DIR
               OR     AX,AX
               MOV    EAX,LWORD
               JZ     SHORT SCANFWD
               BSR    EAX,EAX
               JMP    SHORT BITSC0
SCANFWD:       BSF    EAX,EAX
BITSC0:        JNZ    SHORT BITSC1
               XOR    AX,AX
               RET
BITSC1:        INC    AX
               RET
bitscan        ENDP

;************************************************
; Adjust temp register @BX so that rightmost bit is 1
RADJ           PROC
; Find last set bit
               BSF    ECX,[BX].NUM
; If all zeros, jump to ZOUT
               JZ     SHORT ZOUT
; Shift right and adjust scale
               SHR    [BX].NUM,CL
               SUB    [BX].SCALE,CL
               RET
ZOUT:
               MOV    [BX].SCALE,0
               RET
RADJ           ENDP

;************************************************
; Adjust temp register @BX so leftmost bit is 1
LADJ           PROC
; Find first bit set
               BSR    ECX,[BX].NUM
; If all zero's goto zout (above in proc RADJ)
               JZ     ZOUT
; 31-Bit_index -> # places to shift
               MOV    CH,31
               SUB    CH,CL
               MOV    CL,CH
; Shift number and adjust scale
               SHL    [BX].NUM,CL
               ADD    [BX].SCALE,CL
               RET
LADJ           ENDP

;************************************************
; Add x+y->x
faddxy         PROC   USES SI
               ARG    ANSOFF:WORD,ANSSEG:WORD,XARG:FPM_NUM,\
                      YARG:FPM_NUM
; Check for zero addition (x=0)
               MOV    EAX,XARG.NUM
               OR     EAX,EAX
               JNZ    SHORT YZA
               MOV    EAX,YARG.NUM
               MOV    CH,YARG.SIGN
               MOV    CL,YARG.SCALE
               JMP    DOADD3
; Check for y=0
YZA:           MOV    EAX,YARG.NUM
               OR     EAX,EAX
               JNZ    SHORT NZA
               MOV    EAX,XARG.NUM
               MOV    CH,XARG.SIGN
               MOV    CL,XARG.SCALE
               JMP    DOADD3
NZA:
               LEA    BX,YARG
               CALL   RADJ
               LEA    BX,XARG
               CALL   RADJ
; if eq then ok
               MOV    CL,XARG.SCALE
               MOV    CH,YARG.SCALE
               CMP    CL,CH
               JE     SHORT DOADD
; if xscale>yscale ...
               LEA    SI,YARG
               JG     SHORT PLUSADJY
               XCHG   CH,CL
               XCHG   BX,SI
PLUSADJY:
;    amt=xscale-yscale
               SUB    CL,CH
;    find top bit in y
               BSR    EAX,[SI].NUM
               MOV    CH,31
               SUB    CH,AL
               CMP    CH,CL
;    if 31-topbit>=amt shift y left by amt; yscale+=amt
               JL     SHORT PLUSELSE
               SHL    [SI].NUM,CL
               ADD    [SI].SCALE,CL
               JMP    SHORT DOADD
PLUSELSE:
;    else ...
;       shift y left by 31-topbit; yscale+=31-topbit
               XCHG   CH,CL
               SHL    [SI].NUM,CL
               ADD    [SI].SCALE,CL
               XCHG   CH,CL
               SUB    CL,CH
;       shift x right by amt-(31-topbit); xscale-amt-(31-topbit)
               SHR    [BX].NUM,CL
               SUB    [BX].SCALE,CL
DOADD:
; Make certain bit 31 of each number is off and keep scales equal
               MOV    EAX,XARG.NUM
               MOV    EBX,YARG.NUM
               MOV    CL,XARG.SCALE

               MOV    EDX,EAX
               OR     EDX,EBX
               JNS    SHORT SETSIGN
               SHR    EAX,1
               SHR    EBX,1
               DEC    CL
SETSIGN:
               XOR    DH,DH        ; Set positive flag
               MOV    DL,XARG.SIGN
               OR     DL,DL
               JZ     SHORT DOADD1
               NEG    EAX
               INC    DH
DOADD1:        MOV    DL,YARG.SIGN
               OR     DL,DL
               JZ     SHORT DOADD2
               NEG    EBX
               INC    DH
DOADD2:
                                   ; Assume positive sign in CH
               XOR    CH,CH
               ADD    EAX,EBX
               JNS    SHORT DOADD3
               OR     DH,DH
               JZ     SHORT DOADD3 ; Large positive number
               NEG    EAX
               NOT    CH
DOADD3:
               PUSH   DS
               LDS    BX,DWORD PTR ANSOFF
; Store sign, scale, and value
               MOV    [BX].SIGN,CH
               MOV    [BX].SCALE,CL
               MOV    [BX].NUM,EAX
               POP    DX
               RET
faddxy         ENDP

;************************************************
fsubxy         PROC
               PUSH   BP
               MOV    BP,SP
; If X-0 then compute X+0
               MOV    EAX,[BP+14].NUM
               OR     EAX,EAX
               JZ     SHORT SUBBY0
; else compute X+-1*Y
               NOT    [BP+14].SIGN
SUBBY0:
               POP    BP
; Let + do the work
               JMP    faddxy
fsubxy         ENDP


;************************************************
fmulxy         PROC
               ARG    ANSOFF:WORD,ANSSEG:WORD,XARG:FPM_NUM,\
                      YARG:FPM_NUM
; Right adjust X
               LEA    BX,XARG
               CALL   RADJ
; Right adjust Y
               LEA    BX,YARG
               CALL   RADJ
; Check for X*0 or 0*Y
               MOV    EAX,XARG.NUM
               OR     EAX,EAX
               JZ     SHORT RETZM
               MOV    EBX,YARG.NUM
               OR     EBX,EBX
               JZ     SHORT RETZM
; Do it
               MUL    EBX
TSTZLP:
; Find bits in EDX (overflow of 32 bits)
               BSR    ECX,EDX
; If none, goto DXZ
               JZ     SHORT DXZ
; Shift bits to EAX and adjust scale
               INC    CL
               SHRD   EAX,EDX,CL
               SUB    XARG.SCALE,CL
DXZ:

               OR     EAX,EAX
               JNS    SHORT NOOF
; keep answer positive
               SHR    EAX,1
               DEC    XARG.SCALE
NOOF:
; store answer
               PUSH   ES
               LES    BX,DWORD PTR ANSOFF
               MOV    ES:[BX].NUM,EAX
; compute new scale
               MOV    AL,YARG.SCALE
               ADD    AL,XARG.SCALE
               MOV    ES:[BX].SCALE,AL
; compute new sign
               MOV    AL,YARG.SIGN
               XOR    AL,XARG.SIGN
               MOV    ES:[BX].SIGN,AL
               POP    ES
               RET
; Jump here to return a zero
; shared by MUL & DIV
RETZM:         PUSH   ES
               LES    BX,DWORD PTR ANSOFF
               XOR    EAX,EAX
               MOV    ES:[BX].NUM,EAX
               MOV    ES:[BX].SCALE,AL
               MOV    ES:[BX].SIGN,AL
               POP    ES
               RET
fmulxy         ENDP

;************************************************
fdivxy         PROC
               ARG    ANSOFF:WORD,ANSSEG:WORD,XARG:FPM_NUM,\
                      YARG:FPM_NUM
; LEFT ADJUST X
               LEA    BX,XARG
               CALL   LADJ
; RIGHT ADJUST Y
               LEA    BX,YARG
               CALL   RADJ
; CHECK FOR DIVIDE BY ZERO
               MOV    EAX,XARG.NUM
               OR     EAX,EAX
               JZ     RETZM
; CHECK FOR DIVIDE BY 1
               MOV    EBX,YARG.NUM
               CMP    EBX,1
; DON'T DIVIDE BY 1 -- BESIDES IF WE ROTATE EDX:EAX LEFT,
; / BY 1 WOULD RESULT IN OVERFLOW
               JZ     SHORT DODIV
; SHIFT X BITS INTO EDX BASED ON SIZE OF Y
               XOR    EDX,EDX
               BSR    ECX,EBX
               SHLD   EDX,EAX,CL
               SHL    EAX,CL
; ADJUST SCALE
               ADD    XARG.SCALE,CL
               DIV    EBX
DODIV:
               PUSH   ES
               LES    BX,DWORD PTR ANSOFF
               MOV    ES:[BX].NUM,EAX
; COMPUTE ANSWER'S SCALE
               MOV    AL,XARG.SCALE
               SUB    AL,YARG.SCALE
               MOV    ES:[BX].SCALE,AL
; FIND ANSWER'S SIGN
               MOV    AL,YARG.SIGN
               XOR    AL,XARG.SIGN
               MOV    ES:[BX].SIGN,AL
               POP    ES
               RET
fdivxy         ENDP

               END


[LISTING TWO]

/***************************************************
 * FPM.H - C language header for FPM               *
 ***************************************************/ 
#ifndef FPMHEADER
/* fixed point type */
typedef struct
 {
 char sign;
 char scale;
 unsigned long num;
 } FIXED;

/* Scale factor
  use - 1000.0 for 3 places, 100.0 for 2 places etc. */
#define SFACTOR (10000.0)

#define CVTTYPE float

#ifdef __cplusplus
extern "C"
{
#endif

/* Assembly core functions (from FPM.ASM) */
extern FIXED faddxy(FIXED,FIXED),fsubxy(FIXED,FIXED),
       fmulxy(FIXED,FIXED),fdivxy(FIXED,FIXED);
extern int bitscan(int dir,unsigned long lword);

/* C helpers (from FPMC.C) */
FIXED floattofix(CVTTYPE f);
CVTTYPE fixtofloat(FIXED f);
int fixtoint(FIXED f);

#ifdef __BORLANDC__
/* this is the same */
extern FIXED faddxy (FIXED,FIXED),
       FIXED fsubxy (FIXED,FIXED),
       FIXED fmulxy (FIXED,FIXED),
       FIXED fdivxy (FIXED,FIXED);
#else
/* Microsoft definitions */
FIXED fpmmsc_rv;
extern void faddxy (FIXED _far *, FIXED,FIXED),
            fsubxy (FIXED _far *, FIXED,FIXED),
            fmulxy (FIXED _far *, FIXED,FIXED),
            fdivxy (FIXED _far *, FIXED,FIXED);
#define faddxy(a,b) (faddxy(&fpmmsc_rv,a,b),fpmmsc_rv)
#define fsubxy(a,b) (fsubxy(&fpmmsc_rv,a,b),fpmmsc_rv)
#define fmulxy(a,b) (fmulxy(&fpmmsc_rv,a,b),fpmmsc_rv)
#define fdivxy(a,b) (fdivxy(&fpmmsc_rv,a,b),fpmmsc_rv)
#endif

#ifdef __cplusplus
}
#endif

#endif


[LISTING THREE]

/***************************************************
 * FPMC.C - C language routines for FPM            *
 ***************************************************/ 
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "fpm.h"

/* convert fpm to float */
FIXED floattofix(CVTTYPE f)
  {
  FIXED ans;
  unsigned long i,whole,frac=0L;
  CVTTYPE fp=.5;
  ans.sign=0;
/* special case */
  if (f==0.0)
    {
    ans.scale=0;
    ans.num=0L;
    return ans;
    }
  if (f<0.0)
    {
    ans.sign=0xff;
    f=-f;
    }
  whole=(unsigned long)f;
  f-=whole;
/* scale is number bits in whole */
  ans.scale=32-bitscan(1,(unsigned long)whole);
  ans.num=(unsigned long)whole<<ans.scale;
  if (ans.scale)
/* compute fractional part */
    for (i=1L<<(ans.scale-1);i!=0&&f!=0.0;fp/=2)
      {
      if (f>=fp)
        {
        f-=fp;
        frac|=i;
        }
      if (!frac&&!whole)
        ans.scale++;
      else
        i>>=1;
      }
  ans.num|=frac;
  return ans;
  }
/* convert FPM number to float */
/* Uses SFACTOR for rounding (see FPM.H) */ 
CVTTYPE fixtofloat(FIXED f)
  {
  unsigned long i;
  CVTTYPE fp=0.5,res,sg=1.0;
  if (f.sign)
    {
    sg=-1.0;
    }
  if (f.scale<0)
    res=(CVTTYPE)(f.num<<-f.scale);
  else
    {
    res=(CVTTYPE)(f.num>>f.scale);
    while (f.scale>>5)
      {
      f.scale--;
      fp/=2.0;
      }
    for (i=1L<<f.scale-1;i!=0;i>>=1,fp/=2)
      if (f.num&i) res+=fp;

    }
  return floor(sg*res*SFACTOR+.5)/SFACTOR;
  }
/* convert FPM number to int (truncate fraction) */
int fixtoint(FIXED f)
  {
  int r;
  if (f.scale>=0) r=f.num>>f.scale; else r=0;
  if (f.sign) r=-r;
  return r;
  }



[LISTING FOUR]

/***************************************************
 * FPMGRF.C - Graphical demo of FPM with benchmark * 
 ***************************************************/ 
#include <stdio.h>
#include <stdlib.h>
#include <dos.h>
#include <conio.h>
#include <time.h>
#include "fpm.h"

/* Macro to call BIOS */
#define INT10(r) int86(0x10,&r,&r)
/* Parameters for line (y=Mx+B) */
#define M .291    /* Slope */
#define B .08     /* y-intercept */

main(int argc,char *argv[])
  {
  clock_t s,e;
  long fpmtime,floattime;
/* Set CGA mode */
  setmode(4);
/* mark time */
  s=clock();
/* draw line with math library */
  floatline();
  e=clock();
/* compute time */
  floattime=e-s;
/* wait for a key */
  printat(1,10,"Press any key to continue");
  if (!getch()) getch();
  printat(1,10,"                         ");
/* mark time again */
  s=clock();
/* draw line with FPM */
  fpmline();
  e=clock();
/* compute time */
  fpmtime=e-s;
/* wait for a key */
  printat(1,10,"Press any key to continue\n");
  if (!getch()) getch();
  setmode(3);
/* print results */
  printf("Float time=%ld ticks\nFPM time=%ld ticks\n",floattime,fpmtime);
  }
/* Set video mode */
setmode(int n)
  {
  union REGS r;
  r.x.ax=n;
  INT10(r);
  return 0;
  }
/* plot a floating point pixel */
floatplot(float x,float y)
  {
  int x1,y1;
  x1=x*320;
  y1=y*200;
  plot(x1,y1,2);
  return 0;
  }
/* ploat a point with FPM numbers */
fpmplot(FIXED x,FIXED y)
 {
 static FIXED xscale;
 static FIXED yscale;
 static int first=0;
 if (!first)
   {
   xscale=floattofix(320.0);
   yscale=floattofix(200.0);
   first=1;
   }
 plot(fixtoint(fmulxy(x,xscale)),fixtoint(fmulxy(y,yscale)),3+128);
 }
/* plot a hardware pixel */
plot(int x,int y,int color)
  {
  union REGS r;
  r.h.ah=0x0c;
  r.h.al=color;
  r.h.bh=0;
  r.x.cx=x;
  r.x.dx=y;
  INT10(r);
  return 0;
  }
/* Floating point library line drawing routine */
floatline()
  {
  float x,y;
  for (x=0.0;x<1.0;x+=.001)
    {
    y=M*x+B;
    floatplot(x,y);
    }
  }
/* FPM line drawing routine */
fpmline()
  {
  FIXED x,y,m,b,incr;
  m=floattofix(M);
  b=floattofix(B);
  x=floattofix(0.0);
  incr=floattofix(.001);
  while (fixtoint(x)==0)
    {
    y=faddxy(fmulxy(m,x),b);
    fpmplot(x,y);
    x=faddxy(x,incr);
    }
  }
/* print string via BIOS at x,y */
printat(int x,int y,char *s)
  {
  union REGS r,r1;
  r.h.ah=2;
  r.h.bh=0;
  r.h.dh=y;
  r.h.dl=x;
  INT10(r);
  r.h.ah=0xe;
  r.h.bl=7;
  while (r.h.al=*s++)
    int86(0x10,&r,&r1);
  }


Example 1: (a) conventional DOS program that right adjusts EAX; 

(a)

SLOOP:  TEST EAX,1
        JZ   DONE
        SHR  EAX,1
        JMP  SLOOP
DONE:

(b)

SHR  EAX,ECX


Example 2: Using the 386 SHLD instruction

MOV EAX,10H
MOV EBX,0FA000000H
SHLD EAX,EBX,8


Figure 1: FPM number representation
        
struct fpmnum
/------------------------------------------------\
|                                                |
| val: contains the number's magnitude           |                                                                                      
|                                                |

|------------------------------------------------/
| scale:    |
| number's  |
| exponent  |
|-----------|
| sign:     |
| 0 if +    |
| FF if -   |
\-----------/

Examples:

If:
val=10101 (binary)
scale=1
sign=0
Then:
number=1010.1 (binary) = 10.5 (decimal)

If:
val=11 (binary)
scale=-1
sign=0
Then:
number=110 (binary) = 6 (decimal)
        
        

Figure 2: (a) pseudeocode for FPM addition; (b) pseuedocode for 
FPM subtraction; (c) pseudeocode for FPM multiply; (d) pseudocode for 
FPM divide 

(a)


if (x.value==0) return y

if (y.value==0) return x
right_adjust(y)
right_adjust(x)
if x.scale!=y.scale then
     A=number with larger scale
     B=number with smaller scale
     sdiff=A.scale-B.scale
     idx=left_bit_index(B)  /* find index of leftmost '1' bit */
     if 31-idx>=sdiff then
          B.value=B.value<<sdiff
          B.scale+=sdiff
     else
          B.value=B.value<<31-idx
          B.scale+=31-idx
          A.value=A.value>>sdiff-(31-idx)
          A.scale-=sdiff-(31-idx)
     endif
endif
if bit 31 of A.value or B.value is set then
     A.value=A.value>>1
     B.value=B.value>>1
     A.scale--
     B.scale--
endif
if A.sign==0xff A.value=-A.value
if B.sign==0xff B.value=-B.value
result.value=A.value+B.value
if result.value<0 then
     result.value=-result.value
     result.sign=0xff
endif
result.scale=A.scale
        

(b)

if y.value!=0 y=-y
jump to add routine 


(c)

right_adjust(x)
right_adjust(y)
if x.value==0||y.value==0 return 0
product=x.value*y.value /* product is 64 bit */
shift product left until top 32 bits are all zero
x.scale=x.scale-amount_shifted
if bit 31 of product set then
     product=product>>1

     x.scale--
endif
result.value=product
result.scale=x.scale+y.scale
result.sign=x.sign^y.sign  /* exclusive-or */

(d)

left_adjust(x)
right_adjust(y)
if x.value==0 return 0
if y.value!=1 then
     idx=left_bit_index(y)  /* find index of leftmost '1' bit */
     divx=x.value           /* divx is 64 bit */
     divx=divx<<idx
     x.scale+=idx
     result.value=divx/y.value
else
     result.value=x
endif
result.scale=x.scale-y.scale
result.sign=x.sign^y.sign   /* exclusive-or */
        




