v2 / examples / cpu_features / SSE_and_MMX_Extensions / sse.v
37 lines · 35 sloc · 1.31 KB · 05377f3c0378ce0285bc8584106bf78865c551a6
Raw
1// SSE Instruction Set
2// SSE: Added with Pentium III
3// Floating-point Instructions:
4// ADDPS, ADDSS, CMPPS, CMPSS, COMISS, CVTPI2PS, CVTPS2PI, CVTSI2SS, CVTSS2SI, CVTTPS2PI, CVTTSS2SI,
5// DIVPS, DIVSS, LDMXCSR, MAXPS, MAXSS, MINPS, MINSS, MOVAPS, MOVHLPS, MOVHPS, MOVLHPS, MOVLPS,
6// MOVMSKPS, MOVNTPS, MOVSS, MOVUPS, MULPS, MULSS, RCPPS, RCPSS, RSQRTPS, RSQRTSS, SHUFPS, SQRTPS,
7// SQRTSS, STMXCSR, SUBPS, SUBSS, UCOMISS, UNPCKHPS, UNPCKLPS
8//
9// Integer Instructions:
10// ANDNPS, ANDPS, ORPS, PAVGB, PAVGW, PEXTRW, PINSRW, PMAXSW, PMAXUB, PMINSW, PMINUB, PMOVMSKB, PMULHUW, PSADBW, PSHUFW, XORPS
11// The ADDPS instruction adds two vectors of floats using SSE instructions.
12
13@[if amd64 && !tinyc && !msvc]
14fn add_vectors_sse(a &f32, b &f32, result &f32) {
15 unsafe {
16 asm volatile amd64 {
17 movups xmm0, [a] // Load 4 floats from array a into SSE register xmm0
18 movups xmm1, [b] // Load 4 floats from array b into SSE register xmm1
19 addps xmm0, xmm1 // Add the two vectors using SSE instruction
20 movups [result], xmm0 // Store the result back to memory
21 ; ; r (a)
22 r (b)
23 r (result)
24 ; xmm0
25 xmm1
26 }
27 }
28}
29
30fn main() {
31 a := [f32(1.0), 2.0, 3.0, 4.0]
32 b := [f32(4.0), 3.0, 2.0, 1.0]
33 result := []f32{len: 4}
34 add_vectors_sse(&a[0], &b[0], &result[0])
35 println(result)
36 assert result == [f32(5.0), 5.0, 5.0, 5.0]
37}
38