Gitly


1 // SSE Instruction Set
2 // SSE: Added with Pentium III
3 // Floating-point Instructions:
4 // ADDPS, ADDSS, CMPPS, CMPSS, COMISS, CVTPI2PS, CVTPS2PI, CVTSI2SS, CVTSS2SI, CVTTPS2PI, CVTTSS2SI,
5 // DIVPS, DIVSS, LDMXCSR, MAXPS, MAXSS, MINPS, MINSS, MOVAPS, MOVHLPS, MOVHPS, MOVLHPS, MOVLPS,
6 // MOVMSKPS, MOVNTPS, MOVSS, MOVUPS, MULPS, MULSS, RCPPS, RCPSS, RSQRTPS, RSQRTSS, SHUFPS, SQRTPS,
7 // SQRTSS, STMXCSR, SUBPS, SUBSS, UCOMISS, UNPCKHPS, UNPCKLPS
8 //
9 // Integer Instructions:
10 // ANDNPS, ANDPS, ORPS, PAVGB, PAVGW, PEXTRW, PINSRW, PMAXSW, PMAXUB, PMINSW, PMINUB, PMOVMSKB, PMULHUW, PSADBW, PSHUFW, XORPS
11 // The ADDPS instruction adds two vectors of floats using SSE instructions.
12 
13 @[if amd64 && !tinyc && !msvc]
14 fn add_vectors_sse(a &f32, b &f32, result &f32) {
15     unsafe {
16         asm volatile amd64 {
17             movups xmm0, [a] // Load 4 floats from array a into SSE register xmm0
18             movups xmm1, [b] // Load 4 floats from array b into SSE register xmm1
19             addps xmm0, xmm1 // Add the two vectors using SSE instruction
20             movups [result], xmm0 // Store the result back to memory
21             ; ; r (a)
22               r (b)
23               r (result)
24             ; xmm0
25               xmm1
26         }
27     }
28 }
29 
30 fn main() {
31     a := [f32(1.0), 2.0, 3.0, 4.0]
32     b := [f32(4.0), 3.0, 2.0, 1.0]
33     result := []f32{len: 4}
34     add_vectors_sse(&a[0], &b[0], &result[0])
35     println(result)
36     assert result == [f32(5.0), 5.0, 5.0, 5.0]
37 }
38

1	// SSE Instruction Set
2	// SSE: Added with Pentium III
3	// Floating-point Instructions:
4	// ADDPS, ADDSS, CMPPS, CMPSS, COMISS, CVTPI2PS, CVTPS2PI, CVTSI2SS, CVTSS2SI, CVTTPS2PI, CVTTSS2SI,
5	// DIVPS, DIVSS, LDMXCSR, MAXPS, MAXSS, MINPS, MINSS, MOVAPS, MOVHLPS, MOVHPS, MOVLHPS, MOVLPS,
6	// MOVMSKPS, MOVNTPS, MOVSS, MOVUPS, MULPS, MULSS, RCPPS, RCPSS, RSQRTPS, RSQRTSS, SHUFPS, SQRTPS,
7	// SQRTSS, STMXCSR, SUBPS, SUBSS, UCOMISS, UNPCKHPS, UNPCKLPS
8	//
9	// Integer Instructions:
10	// ANDNPS, ANDPS, ORPS, PAVGB, PAVGW, PEXTRW, PINSRW, PMAXSW, PMAXUB, PMINSW, PMINUB, PMOVMSKB, PMULHUW, PSADBW, PSHUFW, XORPS
11	// The ADDPS instruction adds two vectors of floats using SSE instructions.
12
13	@[if amd64 && !tinyc && !msvc]
14	fn add_vectors_sse(a &f32, b &f32, result &f32) {
15	unsafe {
16	asm volatile amd64 {
17	movups xmm0, [a] // Load 4 floats from array a into SSE register xmm0
18	movups xmm1, [b] // Load 4 floats from array b into SSE register xmm1
19	addps xmm0, xmm1 // Add the two vectors using SSE instruction
20	movups [result], xmm0 // Store the result back to memory
21	; ; r (a)
22	r (b)
23	r (result)
24	; xmm0
25	xmm1
26	}
27	}
28	}
29
30	fn main() {
31	a := [f32(1.0), 2.0, 3.0, 4.0]
32	b := [f32(4.0), 3.0, 2.0, 1.0]
33	result := []f32{len: 4}
34	add_vectors_sse(&a[0], &b[0], &result[0])
35	println(result)
36	assert result == [f32(5.0), 5.0, 5.0, 5.0]
37	}
38