| 1 | // SSE Instruction Set |
| 2 | // SSE4.1: Added with later Core 2 |
| 3 | // MPSADBW, PHMINPOSUW, PMULLD, PMULDQ, DPPS, DPPD, BLENDPS, BLENDPD, BLENDVPS, BLENDVPD, |
| 4 | // PBLENDVB, PBLENDW, PMINSB, PMAXSB, PMINUW, PMAXUW, PMINUD, PMAXUD, PMINSD, PMAXSD, ROUNDPS, |
| 5 | // ROUNDSS, ROUNDPD, ROUNDSD, INSERTPS, PINSRB, PINSRD, PINSRQ, EXTRACTPS, PEXTRB, PEXTRW, |
| 6 | // PEXTRD, PEXTRQ, PMOVSXBW, PMOVZXBW, PMOVSXBD, PMOVZXBD, PMOVSXBQ, PMOVZXBQ, PMOVSXWD, |
| 7 | // PMOVZXWD, PMOVSXWQ, PMOVZXWQ, PMOVSXDQ, PMOVZXDQ, PTEST, PCMPEQQ, PACKUSDW, MOVNTDQA |
| 8 | |
| 9 | @[if amd64 && !tinyc && !msvc] |
| 10 | fn round_floats_sse4_1(a &f32, result &f32) { |
| 11 | unsafe { |
| 12 | asm volatile amd64 { |
| 13 | movups xmm0, [a] // Load 4 floats from array a into xmm0 |
| 14 | roundps xmm0, xmm0, 0 // Round to nearest integer |
| 15 | movups [result], xmm0 // Store the result in result array |
| 16 | ; ; r (a) |
| 17 | r (result) |
| 18 | ; xmm0 |
| 19 | } |
| 20 | } |
| 21 | } |
| 22 | |
| 23 | fn main() { |
| 24 | a := [f32(1.2), 2.5, 3.8, 4.4] |
| 25 | result := []f32{len: 4} |
| 26 | // Rounding mode 0 corresponds to rounding to the nearest integer |
| 27 | round_floats_sse4_1(&a[0], &result[0]) |
| 28 | println(result) |
| 29 | // The expected rounded result should be [1.0, 2.0, 4.0, 4.0] |
| 30 | assert result == [f32(1.0), 2.0, 4.0, 4.0] |
| 31 | } |
| 32 | |