| 1 | // SSE Instruction Set |
| 2 | // SSE: Added with Pentium III |
| 3 | // Floating-point Instructions: |
| 4 | // ADDPS, ADDSS, CMPPS, CMPSS, COMISS, CVTPI2PS, CVTPS2PI, CVTSI2SS, CVTSS2SI, CVTTPS2PI, CVTTSS2SI, |
| 5 | // DIVPS, DIVSS, LDMXCSR, MAXPS, MAXSS, MINPS, MINSS, MOVAPS, MOVHLPS, MOVHPS, MOVLHPS, MOVLPS, |
| 6 | // MOVMSKPS, MOVNTPS, MOVSS, MOVUPS, MULPS, MULSS, RCPPS, RCPSS, RSQRTPS, RSQRTSS, SHUFPS, SQRTPS, |
| 7 | // SQRTSS, STMXCSR, SUBPS, SUBSS, UCOMISS, UNPCKHPS, UNPCKLPS |
| 8 | // |
| 9 | // Integer Instructions: |
| 10 | // ANDNPS, ANDPS, ORPS, PAVGB, PAVGW, PEXTRW, PINSRW, PMAXSW, PMAXUB, PMINSW, PMINUB, PMOVMSKB, PMULHUW, PSADBW, PSHUFW, XORPS |
| 11 | // The ADDPS instruction adds two vectors of floats using SSE instructions. |
| 12 | |
| 13 | @[if amd64 && !tinyc && !msvc] |
| 14 | fn add_vectors_sse(a &f32, b &f32, result &f32) { |
| 15 | unsafe { |
| 16 | asm volatile amd64 { |
| 17 | movups xmm0, [a] // Load 4 floats from array a into SSE register xmm0 |
| 18 | movups xmm1, [b] // Load 4 floats from array b into SSE register xmm1 |
| 19 | addps xmm0, xmm1 // Add the two vectors using SSE instruction |
| 20 | movups [result], xmm0 // Store the result back to memory |
| 21 | ; ; r (a) |
| 22 | r (b) |
| 23 | r (result) |
| 24 | ; xmm0 |
| 25 | xmm1 |
| 26 | } |
| 27 | } |
| 28 | } |
| 29 | |
| 30 | fn main() { |
| 31 | a := [f32(1.0), 2.0, 3.0, 4.0] |
| 32 | b := [f32(4.0), 3.0, 2.0, 1.0] |
| 33 | result := []f32{len: 4} |
| 34 | add_vectors_sse(&a[0], &b[0], &result[0]) |
| 35 | println(result) |
| 36 | assert result == [f32(5.0), 5.0, 5.0, 5.0] |
| 37 | } |
| 38 | |