v / examples / cpu_features / SSE_and_MMX_Extensions / sse2.v
42 lines · 40 sloc · 1.63 KB · 05377f3c0378ce0285bc8584106bf78865c551a6
Raw
1// SSE Instruction Set
2// SSE2: Added with Pentium 4
3// Floating-point Instructions:
4// ADDPD, ADDSD, ANDNPD, ANDPD, CMPPD, CMPSD*, COMISD, CVTDQ2PD, CVTDQ2PS, CVTPD2DQ, CVTPD2PI,
5// CVTPD2PS, CVTPI2PD, CVTPS2DQ, CVTPS2PD, CVTSD2SI, CVTSD2SS, CVTSI2SD, CVTSS2SD, CVTTPD2DQ,
6// CVTTPD2PI, CVTTPS2DQ, CVTTSD2SI, DIVPD, DIVSD, MAXPD, MAXSD, MINPD, MINSD, MOVAPD, MOVHPD,
7// MOVLPD, MOVMSKPD, MOVSD*, MOVUPD, MULPD, MULSD, ORPD, SHUFPD, SQRTPD, SQRTSD, SUBPD, SUBSD,
8// UCOMISD, UNPCKHPD, UNPCKLPD, XORPD
9// * CMPSD and MOVSD have the same name as the string instruction mnemonics CMPSD (CMPS) and
10// MOVSD (MOVS); however, the former refer to scalar double-precision floating-points whereas
11// the latter refer to doubleword strings.
12// Integer Instructions:
13// MOVDQ2Q, MOVDQA, MOVDQU, MOVQ2DQ, PADDQ, PSUBQ, PMULUDQ, PSHUFHW, PSHUFLW, PSHUFD, PSLLDQ, PSRLDQ, PUNPCKHQDQ, PUNPCKLQDQ
14// The MULPD instruction multiplies two vectors of doubles using SSE2 instructions.
15
16@[if amd64 && !tinyc && !msvc]
17fn multiply_vectors_sse2(a &f64, b &f64, result &f64) {
18 unsafe {
19 asm volatile amd64 {
20 movupd xmm0, [a] // Load 2 doubles from array a into SSE2 register xmm0
21 movupd xmm1, [b] // Load 2 doubles from array b into SSE2 register xmm1
22 mulpd xmm0, xmm1 // Multiply the two vectors using SSE2 instruction
23 movupd [result], xmm0 // Store the result back to memory
24 ; ; r (a)
25 r (b)
26 r (result)
27 ; xmm0
28 xmm1
29 }
30 }
31}
32
33fn main() {
34 a := [f64(1.5), 2.5]
35 b := [f64(3.5), 4.5]
36 result := []f64{len: 2}
37 multiply_vectors_sse2(&a[0], &b[0], &result[0])
38 println(result)
39 // 5.25 = 1.5 * 3.5
40 // 11.25 = 2.5 * 4.5
41 assert result == [f64(5.25), 11.25]
42}
43