_32-BIT FLOATING-POINT MATH_ by Al Williams [LISTING ONE] ;########################################################## ;# File: FPM.ASM # ;# 386 Floating point package by Al Williams # ;########################################################## .MODEL SMALL,C ; enable 386 instructions .386 .DATA ; Number format FPM_NUM STRUC SIGN DB ? ; 0 for + FF for - SCALE DB ? ; position of decimal point NUM DD ? ; unsigned magnitude FPM_NUM ENDS .CODE ; C routines PUBLIC faddxy,fsubxy,fmulxy,fdivxy,bitscan ;************************************************ ; Helper for C routines bitscan(dir,lword) ; if dir==0 do BSF on num ; if dir!=0 do BSR on num ; Returns index 1-32 or 0 if word was zero bitscan PROC ARG DIR:WORD, LWORD:DWORD MOV AX,DIR OR AX,AX MOV EAX,LWORD JZ SHORT SCANFWD BSR EAX,EAX JMP SHORT BITSC0 SCANFWD: BSF EAX,EAX BITSC0: JNZ SHORT BITSC1 XOR AX,AX RET BITSC1: INC AX RET bitscan ENDP ;************************************************ ; Adjust temp register @BX so that rightmost bit is 1 RADJ PROC ; Find last set bit BSF ECX,[BX].NUM ; If all zeros, jump to ZOUT JZ SHORT ZOUT ; Shift right and adjust scale SHR [BX].NUM,CL SUB [BX].SCALE,CL RET ZOUT: MOV [BX].SCALE,0 RET RADJ ENDP ;************************************************ ; Adjust temp register @BX so leftmost bit is 1 LADJ PROC ; Find first bit set BSR ECX,[BX].NUM ; If all zero's goto zout (above in proc RADJ) JZ ZOUT ; 31-Bit_index -> # places to shift MOV CH,31 SUB CH,CL MOV CL,CH ; Shift number and adjust scale SHL [BX].NUM,CL ADD [BX].SCALE,CL RET LADJ ENDP ;************************************************ ; Add x+y->x faddxy PROC USES SI ARG ANSOFF:WORD,ANSSEG:WORD,XARG:FPM_NUM,\ YARG:FPM_NUM ; Check for zero addition (x=0) MOV EAX,XARG.NUM OR EAX,EAX JNZ SHORT YZA MOV EAX,YARG.NUM MOV CH,YARG.SIGN MOV CL,YARG.SCALE JMP DOADD3 ; Check for y=0 YZA: MOV EAX,YARG.NUM OR EAX,EAX JNZ SHORT NZA MOV EAX,XARG.NUM MOV CH,XARG.SIGN MOV CL,XARG.SCALE JMP DOADD3 NZA: LEA BX,YARG CALL RADJ LEA BX,XARG CALL RADJ ; if eq then ok MOV CL,XARG.SCALE MOV CH,YARG.SCALE CMP CL,CH JE SHORT DOADD ; if xscale>yscale ... LEA SI,YARG JG SHORT PLUSADJY XCHG CH,CL XCHG BX,SI PLUSADJY: ; amt=xscale-yscale SUB CL,CH ; find top bit in y BSR EAX,[SI].NUM MOV CH,31 SUB CH,AL CMP CH,CL ; if 31-topbit>=amt shift y left by amt; yscale+=amt JL SHORT PLUSELSE SHL [SI].NUM,CL ADD [SI].SCALE,CL JMP SHORT DOADD PLUSELSE: ; else ... ; shift y left by 31-topbit; yscale+=31-topbit XCHG CH,CL SHL [SI].NUM,CL ADD [SI].SCALE,CL XCHG CH,CL SUB CL,CH ; shift x right by amt-(31-topbit); xscale-amt-(31-topbit) SHR [BX].NUM,CL SUB [BX].SCALE,CL DOADD: ; Make certain bit 31 of each number is off and keep scales equal MOV EAX,XARG.NUM MOV EBX,YARG.NUM MOV CL,XARG.SCALE MOV EDX,EAX OR EDX,EBX JNS SHORT SETSIGN SHR EAX,1 SHR EBX,1 DEC CL SETSIGN: XOR DH,DH ; Set positive flag MOV DL,XARG.SIGN OR DL,DL JZ SHORT DOADD1 NEG EAX INC DH DOADD1: MOV DL,YARG.SIGN OR DL,DL JZ SHORT DOADD2 NEG EBX INC DH DOADD2: ; Assume positive sign in CH XOR CH,CH ADD EAX,EBX JNS SHORT DOADD3 OR DH,DH JZ SHORT DOADD3 ; Large positive number NEG EAX NOT CH DOADD3: PUSH DS LDS BX,DWORD PTR ANSOFF ; Store sign, scale, and value MOV [BX].SIGN,CH MOV [BX].SCALE,CL MOV [BX].NUM,EAX POP DX RET faddxy ENDP ;************************************************ fsubxy PROC PUSH BP MOV BP,SP ; If X-0 then compute X+0 MOV EAX,[BP+14].NUM OR EAX,EAX JZ SHORT SUBBY0 ; else compute X+-1*Y NOT [BP+14].SIGN SUBBY0: POP BP ; Let + do the work JMP faddxy fsubxy ENDP ;************************************************ fmulxy PROC ARG ANSOFF:WORD,ANSSEG:WORD,XARG:FPM_NUM,\ YARG:FPM_NUM ; Right adjust X LEA BX,XARG CALL RADJ ; Right adjust Y LEA BX,YARG CALL RADJ ; Check for X*0 or 0*Y MOV EAX,XARG.NUM OR EAX,EAX JZ SHORT RETZM MOV EBX,YARG.NUM OR EBX,EBX JZ SHORT RETZM ; Do it MUL EBX TSTZLP: ; Find bits in EDX (overflow of 32 bits) BSR ECX,EDX ; If none, goto DXZ JZ SHORT DXZ ; Shift bits to EAX and adjust scale INC CL SHRD EAX,EDX,CL SUB XARG.SCALE,CL DXZ: OR EAX,EAX JNS SHORT NOOF ; keep answer positive SHR EAX,1 DEC XARG.SCALE NOOF: ; store answer PUSH ES LES BX,DWORD PTR ANSOFF MOV ES:[BX].NUM,EAX ; compute new scale MOV AL,YARG.SCALE ADD AL,XARG.SCALE MOV ES:[BX].SCALE,AL ; compute new sign MOV AL,YARG.SIGN XOR AL,XARG.SIGN MOV ES:[BX].SIGN,AL POP ES RET ; Jump here to return a zero ; shared by MUL & DIV RETZM: PUSH ES LES BX,DWORD PTR ANSOFF XOR EAX,EAX MOV ES:[BX].NUM,EAX MOV ES:[BX].SCALE,AL MOV ES:[BX].SIGN,AL POP ES RET fmulxy ENDP ;************************************************ fdivxy PROC ARG ANSOFF:WORD,ANSSEG:WORD,XARG:FPM_NUM,\ YARG:FPM_NUM ; LEFT ADJUST X LEA BX,XARG CALL LADJ ; RIGHT ADJUST Y LEA BX,YARG CALL RADJ ; CHECK FOR DIVIDE BY ZERO MOV EAX,XARG.NUM OR EAX,EAX JZ RETZM ; CHECK FOR DIVIDE BY 1 MOV EBX,YARG.NUM CMP EBX,1 ; DON'T DIVIDE BY 1 -- BESIDES IF WE ROTATE EDX:EAX LEFT, ; / BY 1 WOULD RESULT IN OVERFLOW JZ SHORT DODIV ; SHIFT X BITS INTO EDX BASED ON SIZE OF Y XOR EDX,EDX BSR ECX,EBX SHLD EDX,EAX,CL SHL EAX,CL ; ADJUST SCALE ADD XARG.SCALE,CL DIV EBX DODIV: PUSH ES LES BX,DWORD PTR ANSOFF MOV ES:[BX].NUM,EAX ; COMPUTE ANSWER'S SCALE MOV AL,XARG.SCALE SUB AL,YARG.SCALE MOV ES:[BX].SCALE,AL ; FIND ANSWER'S SIGN MOV AL,YARG.SIGN XOR AL,XARG.SIGN MOV ES:[BX].SIGN,AL POP ES RET fdivxy ENDP END [LISTING TWO] /*************************************************** * FPM.H - C language header for FPM * ***************************************************/ #ifndef FPMHEADER /* fixed point type */ typedef struct { char sign; char scale; unsigned long num; } FIXED; /* Scale factor use - 1000.0 for 3 places, 100.0 for 2 places etc. */ #define SFACTOR (10000.0) #define CVTTYPE float #ifdef __cplusplus extern "C" { #endif /* Assembly core functions (from FPM.ASM) */ extern FIXED faddxy(FIXED,FIXED),fsubxy(FIXED,FIXED), fmulxy(FIXED,FIXED),fdivxy(FIXED,FIXED); extern int bitscan(int dir,unsigned long lword); /* C helpers (from FPMC.C) */ FIXED floattofix(CVTTYPE f); CVTTYPE fixtofloat(FIXED f); int fixtoint(FIXED f); #ifdef __BORLANDC__ /* this is the same */ extern FIXED faddxy (FIXED,FIXED), FIXED fsubxy (FIXED,FIXED), FIXED fmulxy (FIXED,FIXED), FIXED fdivxy (FIXED,FIXED); #else /* Microsoft definitions */ FIXED fpmmsc_rv; extern void faddxy (FIXED _far *, FIXED,FIXED), fsubxy (FIXED _far *, FIXED,FIXED), fmulxy (FIXED _far *, FIXED,FIXED), fdivxy (FIXED _far *, FIXED,FIXED); #define faddxy(a,b) (faddxy(&fpmmsc_rv,a,b),fpmmsc_rv) #define fsubxy(a,b) (fsubxy(&fpmmsc_rv,a,b),fpmmsc_rv) #define fmulxy(a,b) (fmulxy(&fpmmsc_rv,a,b),fpmmsc_rv) #define fdivxy(a,b) (fdivxy(&fpmmsc_rv,a,b),fpmmsc_rv) #endif #ifdef __cplusplus } #endif #endif [LISTING THREE] /*************************************************** * FPMC.C - C language routines for FPM * ***************************************************/ #include #include #include #include #include "fpm.h" /* convert fpm to float */ FIXED floattofix(CVTTYPE f) { FIXED ans; unsigned long i,whole,frac=0L; CVTTYPE fp=.5; ans.sign=0; /* special case */ if (f==0.0) { ans.scale=0; ans.num=0L; return ans; } if (f<0.0) { ans.sign=0xff; f=-f; } whole=(unsigned long)f; f-=whole; /* scale is number bits in whole */ ans.scale=32-bitscan(1,(unsigned long)whole); ans.num=(unsigned long)whole<=fp) { f-=fp; frac|=i; } if (!frac&&!whole) ans.scale++; else i>>=1; } ans.num|=frac; return ans; } /* convert FPM number to float */ /* Uses SFACTOR for rounding (see FPM.H) */ CVTTYPE fixtofloat(FIXED f) { unsigned long i; CVTTYPE fp=0.5,res,sg=1.0; if (f.sign) { sg=-1.0; } if (f.scale<0) res=(CVTTYPE)(f.num<<-f.scale); else { res=(CVTTYPE)(f.num>>f.scale); while (f.scale>>5) { f.scale--; fp/=2.0; } for (i=1L<>=1,fp/=2) if (f.num&i) res+=fp; } return floor(sg*res*SFACTOR+.5)/SFACTOR; } /* convert FPM number to int (truncate fraction) */ int fixtoint(FIXED f) { int r; if (f.scale>=0) r=f.num>>f.scale; else r=0; if (f.sign) r=-r; return r; } [LISTING FOUR] /*************************************************** * FPMGRF.C - Graphical demo of FPM with benchmark * ***************************************************/ #include #include #include #include #include #include "fpm.h" /* Macro to call BIOS */ #define INT10(r) int86(0x10,&r,&r) /* Parameters for line (y=Mx+B) */ #define M .291 /* Slope */ #define B .08 /* y-intercept */ main(int argc,char *argv[]) { clock_t s,e; long fpmtime,floattime; /* Set CGA mode */ setmode(4); /* mark time */ s=clock(); /* draw line with math library */ floatline(); e=clock(); /* compute time */ floattime=e-s; /* wait for a key */ printat(1,10,"Press any key to continue"); if (!getch()) getch(); printat(1,10," "); /* mark time again */ s=clock(); /* draw line with FPM */ fpmline(); e=clock(); /* compute time */ fpmtime=e-s; /* wait for a key */ printat(1,10,"Press any key to continue\n"); if (!getch()) getch(); setmode(3); /* print results */ printf("Float time=%ld ticks\nFPM time=%ld ticks\n",floattime,fpmtime); } /* Set video mode */ setmode(int n) { union REGS r; r.x.ax=n; INT10(r); return 0; } /* plot a floating point pixel */ floatplot(float x,float y) { int x1,y1; x1=x*320; y1=y*200; plot(x1,y1,2); return 0; } /* ploat a point with FPM numbers */ fpmplot(FIXED x,FIXED y) { static FIXED xscale; static FIXED yscale; static int first=0; if (!first) { xscale=floattofix(320.0); yscale=floattofix(200.0); first=1; } plot(fixtoint(fmulxy(x,xscale)),fixtoint(fmulxy(y,yscale)),3+128); } /* plot a hardware pixel */ plot(int x,int y,int color) { union REGS r; r.h.ah=0x0c; r.h.al=color; r.h.bh=0; r.x.cx=x; r.x.dx=y; INT10(r); return 0; } /* Floating point library line drawing routine */ floatline() { float x,y; for (x=0.0;x<1.0;x+=.001) { y=M*x+B; floatplot(x,y); } } /* FPM line drawing routine */ fpmline() { FIXED x,y,m,b,incr; m=floattofix(M); b=floattofix(B); x=floattofix(0.0); incr=floattofix(.001); while (fixtoint(x)==0) { y=faddxy(fmulxy(m,x),b); fpmplot(x,y); x=faddxy(x,incr); } } /* print string via BIOS at x,y */ printat(int x,int y,char *s) { union REGS r,r1; r.h.ah=2; r.h.bh=0; r.h.dh=y; r.h.dl=x; INT10(r); r.h.ah=0xe; r.h.bl=7; while (r.h.al=*s++) int86(0x10,&r,&r1); } Example 1: (a) conventional DOS program that right adjusts EAX; (a) SLOOP: TEST EAX,1 JZ DONE SHR EAX,1 JMP SLOOP DONE: (b) SHR EAX,ECX Example 2: Using the 386 SHLD instruction MOV EAX,10H MOV EBX,0FA000000H SHLD EAX,EBX,8 Figure 1: FPM number representation struct fpmnum /------------------------------------------------\ | | | val: contains the number's magnitude | | | |------------------------------------------------/ | scale: | | number's | | exponent | |-----------| | sign: | | 0 if + | | FF if - | \-----------/ Examples: If: val=10101 (binary) scale=1 sign=0 Then: number=1010.1 (binary) = 10.5 (decimal) If: val=11 (binary) scale=-1 sign=0 Then: number=110 (binary) = 6 (decimal) Figure 2: (a) pseudeocode for FPM addition; (b) pseuedocode for FPM subtraction; (c) pseudeocode for FPM multiply; (d) pseudocode for FPM divide (a) if (x.value==0) return y if (y.value==0) return x right_adjust(y) right_adjust(x) if x.scale!=y.scale then A=number with larger scale B=number with smaller scale sdiff=A.scale-B.scale idx=left_bit_index(B) /* find index of leftmost '1' bit */ if 31-idx>=sdiff then B.value=B.value<>sdiff-(31-idx) A.scale-=sdiff-(31-idx) endif endif if bit 31 of A.value or B.value is set then A.value=A.value>>1 B.value=B.value>>1 A.scale-- B.scale-- endif if A.sign==0xff A.value=-A.value if B.sign==0xff B.value=-B.value result.value=A.value+B.value if result.value<0 then result.value=-result.value result.sign=0xff endif result.scale=A.scale (b) if y.value!=0 y=-y jump to add routine (c) right_adjust(x) right_adjust(y) if x.value==0||y.value==0 return 0 product=x.value*y.value /* product is 64 bit */ shift product left until top 32 bits are all zero x.scale=x.scale-amount_shifted if bit 31 of product set then product=product>>1 x.scale-- endif result.value=product result.scale=x.scale+y.scale result.sign=x.sign^y.sign /* exclusive-or */ (d) left_adjust(x) right_adjust(y) if x.value==0 return 0 if y.value!=1 then idx=left_bit_index(y) /* find index of leftmost '1' bit */ divx=x.value /* divx is 64 bit */ divx=divx<