| 1 | // SSE Instruction Set |
| 2 | // SSE2: Added with Pentium 4 |
| 3 | // Floating-point Instructions: |
| 4 | // ADDPD, ADDSD, ANDNPD, ANDPD, CMPPD, CMPSD*, COMISD, CVTDQ2PD, CVTDQ2PS, CVTPD2DQ, CVTPD2PI, |
| 5 | // CVTPD2PS, CVTPI2PD, CVTPS2DQ, CVTPS2PD, CVTSD2SI, CVTSD2SS, CVTSI2SD, CVTSS2SD, CVTTPD2DQ, |
| 6 | // CVTTPD2PI, CVTTPS2DQ, CVTTSD2SI, DIVPD, DIVSD, MAXPD, MAXSD, MINPD, MINSD, MOVAPD, MOVHPD, |
| 7 | // MOVLPD, MOVMSKPD, MOVSD*, MOVUPD, MULPD, MULSD, ORPD, SHUFPD, SQRTPD, SQRTSD, SUBPD, SUBSD, |
| 8 | // UCOMISD, UNPCKHPD, UNPCKLPD, XORPD |
| 9 | // * CMPSD and MOVSD have the same name as the string instruction mnemonics CMPSD (CMPS) and |
| 10 | // MOVSD (MOVS); however, the former refer to scalar double-precision floating-points whereas |
| 11 | // the latter refer to doubleword strings. |
| 12 | // Integer Instructions: |
| 13 | // MOVDQ2Q, MOVDQA, MOVDQU, MOVQ2DQ, PADDQ, PSUBQ, PMULUDQ, PSHUFHW, PSHUFLW, PSHUFD, PSLLDQ, PSRLDQ, PUNPCKHQDQ, PUNPCKLQDQ |
| 14 | // The MULPD instruction multiplies two vectors of doubles using SSE2 instructions. |
| 15 | |
| 16 | @[if amd64 && !tinyc && !msvc] |
| 17 | fn multiply_vectors_sse2(a &f64, b &f64, result &f64) { |
| 18 | unsafe { |
| 19 | asm volatile amd64 { |
| 20 | movupd xmm0, [a] // Load 2 doubles from array a into SSE2 register xmm0 |
| 21 | movupd xmm1, [b] // Load 2 doubles from array b into SSE2 register xmm1 |
| 22 | mulpd xmm0, xmm1 // Multiply the two vectors using SSE2 instruction |
| 23 | movupd [result], xmm0 // Store the result back to memory |
| 24 | ; ; r (a) |
| 25 | r (b) |
| 26 | r (result) |
| 27 | ; xmm0 |
| 28 | xmm1 |
| 29 | } |
| 30 | } |
| 31 | } |
| 32 | |
| 33 | fn main() { |
| 34 | a := [f64(1.5), 2.5] |
| 35 | b := [f64(3.5), 4.5] |
| 36 | result := []f64{len: 2} |
| 37 | multiply_vectors_sse2(&a[0], &b[0], &result[0]) |
| 38 | println(result) |
| 39 | // 5.25 = 1.5 * 3.5 |
| 40 | // 11.25 = 2.5 * 4.5 |
| 41 | assert result == [f64(5.25), 11.25] |
| 42 | } |
| 43 | |