// APPROXPI     Monte Carlo approximation of pi

// Incorporates an in-line adaptation of RANDOM to get a
// randomly generated 64-bit integer, which is then made
// into a floating-point value between 0 and 1 (exclusive).
// The random integer is generated by the multiplier/adder
// method. The seed is initialized using the system time.
        MULT    = 3141592653589793221 // Multiplier
        TEN6    = 1000000         // 1,000,000 (one million)
        TOPS    = 1048576         // Large number of shots
        .global dbl_div_min_lat, gettimeofday, printf
        .data                     // Declare storage
        .align  8                 // Desirable alignment
ARGS:   .skip   24                // Space for num,den,quo
HALF:   real8   0.5               // Floating-point 0.5
FOUR:   real8   4.0               // Floating-point 4.0
SEED:   data8   0                 // Seed for algorithm
TIME:                             // For gettimeofday
SECS:   data8   0                 //  Seconds
USEC:   data8   0                 //  Microseconds
VOID:   data4   0                 // (info not needed)
        data4   0                 // (info not needed)
PRNT:   stringz "%18.18f\n"       // Format for printf
        .text                     // Section for code
        .align  32                // Desirable alignment
        .global main              // These three lines
        .proc   main              //  mark the mandatory
main:                             //   'main' program entry
        .prologue 12,r32          // Mask for rp, ar.pfs only
        alloc   loc0 = ar.pfs,0,4,3,0  // ins, locals, outs
        .save   rp,loc1           // Must save return address
        mov     loc1 = b0         //  to our caller
        .body                     // Now we really begin...
first:  add     out0 = @gprel(TIME),gp  // out0 -> TIME
        add     out1 = @gprel(VOID),gp  // out1 -> VOID
        mov     loc2 = gp;;       // Save gp
        br.call.sptk.many b0 = gettimeofday  // System time
        mov     gp = loc2;;       // Restore gp
        add     r2 = @gprel(SECS),gp;;  // r2 -> SECS
        ldf8    f8 = [r2]         // SECS is multiplicand
        movl    r3 = TEN6;;       // TEN6 is multiplier
        setf.sig f9 = r3          // TEN6 in fp register
        add     r2 = @gprel(USEC),gp;;  // r2 -> USEC
        ldf8    f10 = [r2];;      // USEC is to be added
        xma.lu  f8 = f8,f9,f10;;  // Seed = SECS*TEN6 + USEC
init:   add     r2 = @gprel(HALF),gp;;  // r2 -> HALF
        ldfd    f6 = [r2]         // Constant 0.5
        mov     f14 = f0          // Counter for hits
        mov     f15 = f0          // Counter for shots
        movl    r16 = TOPS;;      // Number of shots to do
        movl    r3 = MULT;;       // Multiplier
        setf.sig f7 = r3;;        // MULT in fp register
        fcvt.fx f9 = f1;;         // Integer format 1
loop:   xma.lu  f8 = f8,f7,f9;;   // Next = Seed*MULT + 1
        fcvt.xuf f10 = f8;;       // Now register fp format
        fmerge.se f10 = f6,f10;;  // Now scaled 0.5 to 1.0
        fsub.d  f10 = f10,f6;;    // Now scaled 0.0 to 0.5
        fadd.d  f10 = f10,f10;;   // Now scaled 0.0 to 1.0
        fmpy.d  f10 = f10,f10;;   // X * X
        xma.lu  f8 = f8,f7,f9;;   // Next = Seed*MULT + 1
        fcvt.xuf f11 = f8;;       // Now register fp format
        fmerge.se f11 = f6,f11;;  // Now scaled 0.5 to 1.0
        fsub.d  f11 = f11,f6;;    // Now scaled 0.0 to 0.5
        fadd.d  f11 = f11,f11;;   // Now scaled 0.0 to 1.0
        fmpy.d  f11 = f11,f11;;   // Y * Y
        fadd    f10 = f10,f11;;   // X * X + Y * Y
        fcmp.le p6,p0 = f10,f1;;  // If it's in the circle
   (p6) fadd    f14 = f14,f1      //  then count as a hit
        fadd    f15 = f15,f1      // Count this shot
        add     r16 = -1,r16;;    // Decrement loop counter
        cmp.gt p6,p0 = r16,r0     // We're counting down...
   (p6) br.cond.sptk.many loop    //  ...until done
        add     loc3 = @gprel(ARGS),gp;;  // loc3 -> args
        mov     out0 = loc3       // Point to hits
        stfd    [loc3] = f14,8;;  //  (numerator)
        mov     out1 = loc3       // Point to shots
        stfd    [loc3] = f15,8;;  //  (denominator)
        mov     out2 = loc3       // Point to quotient
        br.call.sptk.many b0 = dbl_div_min_lat  // Division
        mov     gp = loc2;;       // Restore gp
        ldfd    f8 = [loc3];;     // Get ratio: hits/shots
        add     r2 = @gprel(FOUR),gp;;  // r2 -> 4.0
        ldfd    f9 = [r2];;       // Get factor 4.0
        fmpy    f8 = f8,f9;;      // Compute pi
        add     out0 = @gprel(PRNT),r1;;  // out0 -> format
        getf.d  out1 = f8         // out1 = estimate of pi
        br.call.sptk.many b0 = printf  // C print function
        mov     gp = loc2         // Restore gp
done:   mov     ret0 = 0          // Signal all is normal
        mov     b0 = loc1         // Restore return address
        mov     ar.pfs = loc0     // Restore caller's ar.pfs
        br.ret.sptk.many b0;;     // Back to caller
        .endp   main              // Mark end of procedure
        .include "dbl_div_min_lat.s"