// DOTCTOP2 Scalar Product of N-vectors // This program will compute the scalar product // of two multielement vectors V and W. N = 3 // N = dimensionality .data // Declare storage .align 8 // Desired alignment P: .skip 8 // Space for product V: data2 -1,+3,+5 // V1, V2, V3, etc. W: data2 -2,-4,+6 // W1, W2, W3, etc. .text // Section for code .align 32 // Desired alignment .global main // These three lines .proc main // mark the mandatory main: // 'main' program entry .prologue // Leaf procedure can save .save ar.lc, r9 // the caller's ar.lc mov r9 = ar.lc;; // in a scratch register .body // Now we really begin... first: alloc r10 = ar.pfs,0,16,0,16 // 16 rots movl r14 = V // Pointer for V movl r15 = W // Pointer for W movl r16 = P // Pointer for P mov r20 = 0 // r20 = running sum mov ar.lc = N-1 // Traversals minus one mov ar.ec = 7 // Rotational stages mov pr.rot = 0x10000;; // Initialize predicates top: (p16) ld2 r32 = [r14],2 // Get Vi; bump pointer (p16) ld2 r39 = [r15],2 // Get Wi; bump pointer (p18) pmpy2.r r34 = r34,r41 // Compute Vi times Wi (p22) add r20 = r20,r38 // Update sum, after (p21) sxt4 r37 = r37 // extension to 64 bits br.ctop.sptk.few top;; // More to do? st8 [r16] = r20 // No, store the product done: mov ret0 = 0 // Signal all is normal mov ar.lc = r9 // Restore caller's ar.lc mov ar.pfs = r10 // Restore caller's ar.pfs br.ret.sptk.many b0;; // Back to command line .endp main // Mark end of procedure