Gitly


1 // SSE Instruction Set
2 // SSE2: Added with Pentium 4
3 // Floating-point Instructions:
4 // ADDPD, ADDSD, ANDNPD, ANDPD, CMPPD, CMPSD*, COMISD, CVTDQ2PD, CVTDQ2PS, CVTPD2DQ, CVTPD2PI,
5 // CVTPD2PS, CVTPI2PD, CVTPS2DQ, CVTPS2PD, CVTSD2SI, CVTSD2SS, CVTSI2SD, CVTSS2SD, CVTTPD2DQ,
6 // CVTTPD2PI, CVTTPS2DQ, CVTTSD2SI, DIVPD, DIVSD, MAXPD, MAXSD, MINPD, MINSD, MOVAPD, MOVHPD,
7 // MOVLPD, MOVMSKPD, MOVSD*, MOVUPD, MULPD, MULSD, ORPD, SHUFPD, SQRTPD, SQRTSD, SUBPD, SUBSD,
8 // UCOMISD, UNPCKHPD, UNPCKLPD, XORPD
9 // * CMPSD and MOVSD have the same name as the string instruction mnemonics CMPSD (CMPS) and
10 // MOVSD (MOVS); however, the former refer to scalar double-precision floating-points whereas
11 // the latter refer to doubleword strings.
12 // Integer Instructions:
13 // MOVDQ2Q, MOVDQA, MOVDQU, MOVQ2DQ, PADDQ, PSUBQ, PMULUDQ, PSHUFHW, PSHUFLW, PSHUFD, PSLLDQ, PSRLDQ, PUNPCKHQDQ, PUNPCKLQDQ
14 // The MULPD instruction multiplies two vectors of doubles using SSE2 instructions.
15 
16 @[if amd64 && !tinyc && !msvc]
17 fn multiply_vectors_sse2(a &f64, b &f64, result &f64) {
18     unsafe {
19         asm volatile amd64 {
20             movupd xmm0, [a] // Load 2 doubles from array a into SSE2 register xmm0
21             movupd xmm1, [b] // Load 2 doubles from array b into SSE2 register xmm1
22             mulpd xmm0, xmm1 // Multiply the two vectors using SSE2 instruction
23             movupd [result], xmm0 // Store the result back to memory
24             ; ; r (a)
25               r (b)
26               r (result)
27             ; xmm0
28               xmm1
29         }
30     }
31 }
32 
33 fn main() {
34     a := [f64(1.5), 2.5]
35     b := [f64(3.5), 4.5]
36     result := []f64{len: 2}
37     multiply_vectors_sse2(&a[0], &b[0], &result[0])
38     println(result)
39     // 5.25 = 1.5 * 3.5
40     // 11.25 = 2.5 * 4.5
41     assert result == [f64(5.25), 11.25]
42 }
43

1	// SSE Instruction Set
2	// SSE2: Added with Pentium 4
3	// Floating-point Instructions:
4	// ADDPD, ADDSD, ANDNPD, ANDPD, CMPPD, CMPSD, COMISD, CVTDQ2PD, CVTDQ2PS, CVTPD2DQ, CVTPD2PI,*
5	// CVTPD2PS, CVTPI2PD, CVTPS2DQ, CVTPS2PD, CVTSD2SI, CVTSD2SS, CVTSI2SD, CVTSS2SD, CVTTPD2DQ,
6	// CVTTPD2PI, CVTTPS2DQ, CVTTSD2SI, DIVPD, DIVSD, MAXPD, MAXSD, MINPD, MINSD, MOVAPD, MOVHPD,
7	// MOVLPD, MOVMSKPD, MOVSD, MOVUPD, MULPD, MULSD, ORPD, SHUFPD, SQRTPD, SQRTSD, SUBPD, SUBSD,*
8	// UCOMISD, UNPCKHPD, UNPCKLPD, XORPD
9	// CMPSD and MOVSD have the same name as the string instruction mnemonics CMPSD (CMPS) and*
10	// MOVSD (MOVS); however, the former refer to scalar double-precision floating-points whereas
11	// the latter refer to doubleword strings.
12	// Integer Instructions:
13	// MOVDQ2Q, MOVDQA, MOVDQU, MOVQ2DQ, PADDQ, PSUBQ, PMULUDQ, PSHUFHW, PSHUFLW, PSHUFD, PSLLDQ, PSRLDQ, PUNPCKHQDQ, PUNPCKLQDQ
14	// The MULPD instruction multiplies two vectors of doubles using SSE2 instructions.
15
16	@[if amd64 && !tinyc && !msvc]
17	fn multiply_vectors_sse2(a &f64, b &f64, result &f64) {
18	unsafe {
19	asm volatile amd64 {
20	movupd xmm0, [a] // Load 2 doubles from array a into SSE2 register xmm0
21	movupd xmm1, [b] // Load 2 doubles from array b into SSE2 register xmm1
22	mulpd xmm0, xmm1 // Multiply the two vectors using SSE2 instruction
23	movupd [result], xmm0 // Store the result back to memory
24	; ; r (a)
25	r (b)
26	r (result)
27	; xmm0
28	xmm1
29	}
30	}
31	}
32
33	fn main() {
34	a := [f64(1.5), 2.5]
35	b := [f64(3.5), 4.5]
36	result := []f64{len: 2}
37	multiply_vectors_sse2(&a[0], &b[0], &result[0])
38	println(result)
39	// 5.25 = 1.5 3.5*
40	// 11.25 = 2.5 4.5*
41	assert result == [f64(5.25), 11.25]
42	}
43