_ALGORITHM ALLEY_ by Peter Heinrich Listing One MACHINE MC68020 EXPORT sqroot ;; unsigned long sqroot( unsigned long N ). This routine assumes standard ;; standard Macintosh C calling conventions, so it expects argument N to be ;; passed on the stack. Macintosh C register conventions specify that ;; d0-d1/a0-a1 are scratch. sqroot PROC ; If N < 2, return N; otherwise, save non-scratch registers. move.l 4(sp),d0 ; just past the return address cmpi.l #2,d0 bcs.b done movem.l d2-d3,-(sp) ; Compute the position of the highest bit set in the root. ; Using a loop instead of BFFFO will make this code run ; on any 680x0 processor. movea.l d0,a0 ; preserve N for later bfffo d0{0:0},d3 neg.l d3 addi.l #31,d3 lsr.l #1,d3 ; Determine the initial values of u, u^2, and v. moveq.l #1,d0 lsl.l d3,d0 ; u move.l d0,d1 ; v starts equal to u movea.l d0,a1 lsl.l d3,d1 ; u^2 exg.l d1,a1 ; Process bits until there are no more. checkBit dbf.w d3,nextBit movem.l (sp)+,d2-d3 done rts ; Solve the equation u^2 + 2uv + v^2. nextBit lsr.l #1,d1 ; v = next lower bit move.l d1,d2 add.l d0,d2 add.l d0,d2 ; n = 2u + v lsl.l d3,d2 add.l a1,d2 ; n = u^2 + v(2u + v) ; = u^2 + 2uv + v^2 ; If n <= N, the bit v is set. cmpa.l d2,a0 bcs.b checkBit add.l d1,d0 ; u += v movea.l d2,a1 ; u^2 = n bra.b checkBit END Listing Two NAME sqroot PUBLIC _sqroot ;; unsigned long sqroot( unsigned long N ). This routine assumes the argument ;; N is passed on the stack, and eax-edx are scratch registers. TEXT SEGMENT PUBLIC 'CODE' ASSUME CS:TEXT P386 _sqroot PROC FAR ; If 2 > N, return N; otherwise, save the non-scratch registers. mov eax,[esp+4] ; just past the return address cmp eax,2 jb short done push edi push esi ; Compute position of the highest set bit in the root. It's just ; half the position of the highest bit set in N. mov esi,eax ; preserve N for later bsr ecx,eax shr ecx,1 ; Determine the initial values of u, u^2, and v. mov eax,1 shl eax,cl ; u mov ebx,eax ; v starts equal to u mov edx,eax shl edx,cl ; u^2 ; Process bits until there are no more. checkBit dec ecx js short restore ; Solve the equation u^2 + 2uv + v^2. shr ebx,1 ; v = next lower bit mov edi,eax add edi,eax add edi,ebx ; n = 2u + v shl edi,cl add edi,edx ; n = u^2 + v(2u + v) ; = u^2 + 2uv + v^2 ; If n <= N, the bit v is set. cmp edi,esi ja short checkBit add eax,ebx ; u += v mov edx,edi ; u^2 = n jmp short checkBit Listing Three ; sqroot.asm restore pop esi pop edi done ; Return to caller. mov edx,eax shr edx,16 ; necessary, but seems silly... retf _sqroot ENDP TEXT ENDS END Listing Four AREA object,CODE EXPORT sqroot ;; unsigned long sqroot( unsigned long N ). This routine observes the ARM ;; Procedure Call Standard (APCS), so it expects the argument N to appear in ;; r0 (referred to as a1 by the APCS). Likewise, the first four registers, ;; r0-r3 (a1-a4 in the APCS), are treated as scratch. sqroot ROUT ; If N < 2, return N; otherwise, save non-scratch registers. cmp a1,#2 movcc pc,lr stmfd sp!,{v1,v2,lr} ; Compute position of the highest bit set in root. It's just ; half the position of the highest bit set in N. mov a2,a1 ; preserve N for later mov a3,a1 mov v1,#0 findlog2 movs a3,a3,LSR #2 addne v1,v1,#1 bne findlog2 ; Determine the initial values of u, u^2, and v. mov a1,#1 mov a1,a1,LSL v1 ; u mov a3,a1 ; v starts equal to u mov a4,a1,LSL v1 ; u^2 ; Process bits until there are no more. checkBit cmp v1,#0 ldmeqfd sp!,{v1,v2,pc} sub v1,v1,#1 ; Solve the equation u^2 + 2uv + v^2. mov a3,a3,LSR #1 ; v = next lower bit add v2,a3,a1,LSL #1 ; n = 2u + v add v2,a4,v2,LSL v1 ; n = u^2 + v(2u + v) ; = u^2 + 2uv + v^2 ; If n <= N, the bit v is set. cmp v2,a2 addls a1,a1,a3 ; u += v ldmeqfd sp!,{v1,v2,pc} ; exit early if n == N movls a4,v2 ; u^2 = n b checkBit END Example 1 (a) // Newton's Method -- O( log2 N ) unsigned long sqroot( unsigned long N ) { unsigned long n, p, low, high; if( 2 > N ) return( N ); low = 0; high = N; while( high > low + 1 ) { n = (high + low) / 2; p = n * n; if( N < p ) high = n; else if( N > p ) low = n; else break; } return( N == p ? n : low ); } (b) // Summing terms -- O( sqrt N ) unsigned long sqroot( unsigned long N ) { unsigned long n, u, v; if( 2 > N ) return( N ); u = 4; v = 5; for( n = 1; u <= N; n++ ) { u += v; v += 2; } return( n ); } Example 2 (a) // Binomial Theorem -- O( 1/2 log2 N ) unsigned long sqroot( unsigned long N ) { unsigned long l2, u, v, u2, v2, uv2, n; if( 2 > N ) return( N ); u = N; l2 = 0; while( u >>= 1 ) l2++; l2 >>= 1; u = 1L << l2; u2 = u << l2; while( l2-- ) { v = 1L << l2; v2 = v << l2; uv2 = u << (l2 + 1); n = u2 + uv2 + v2; if( n <= N ) { u += v; u2 = n; } } return( u ); } (b) // Optimized Binomial Theorem unsigned long sqroot( unsigned long N ) { unsigned long l2, u, v, u2, n; if( 2 > N ) return( N ); u = N; l2 = 0; while( u >>= 2 ) l2++; u = 1L << l2; v = u; u2 = u << l2; while( l2-- ) { v >>= 1; n = (u + u + v) << l2; n += u2; if( n <= N ) { u += v; u2 = n; } } return( u ); }