// DOTCTOP2          Scalar Product of N-vectors

// This program will compute the scalar product
// of two multielement vectors V and W.
        N       = 3               // N = dimensionality
        .data                     // Declare storage
        .align  8                 // Desired alignment
P:      .skip   8                 // Space for product
V:      data2   -1,+3,+5          // V1, V2, V3, etc.
W:      data2   -2,-4,+6          // W1, W2, W3, etc.
        .text                     // Section for code
        .align  32                // Desired alignment
        .global main              // These three lines
        .proc   main              //  mark the mandatory
main:                             //   'main' program entry
        .prologue                 // Leaf procedure can save
        .save   ar.lc, r9         //  the caller's ar.lc
        mov     r9 = ar.lc;;      //   in a scratch register
        .body                     // Now we really begin...
first:  alloc   r10 = ar.pfs,0,16,0,16 // 16 rots
        movl    r14 = V           // Pointer for V
        movl    r15 = W           // Pointer for W
        movl    r16 = P           // Pointer for P
        mov     r20 = 0           // r20 = running sum
        mov     ar.lc = N-1       // Traversals minus one
        mov     ar.ec = 7         // Rotational stages
        mov     pr.rot = 0x10000;; // Initialize predicates
top:
  (p16) ld2     r32 = [r14],2     // Get Vi; bump pointer
  (p16) ld2     r39 = [r15],2     // Get Wi; bump pointer
  (p18) pmpy2.r r34 = r34,r41     // Compute Vi times Wi
  (p22) add     r20 = r20,r38     // Update sum, after
  (p21) sxt4    r37 = r37         //  extension to 64 bits
        br.ctop.sptk.few top;;    // More to do?
        st8     [r16] = r20       // No, store the product
done:   mov     ret0 = 0          // Signal all is normal
        mov     ar.lc = r9        // Restore caller's ar.lc
        mov     ar.pfs = r10      // Restore caller's ar.pfs
        br.ret.sptk.many b0;;     // Back to command line
        .endp   main              // Mark end of procedure