v2 / examples / cpu_features / SSE_and_MMX_Extensions / sse4_1.v
31 lines · 29 sloc · 1.12 KB · 05377f3c0378ce0285bc8584106bf78865c551a6
Raw
1// SSE Instruction Set
2// SSE4.1: Added with later Core 2
3// MPSADBW, PHMINPOSUW, PMULLD, PMULDQ, DPPS, DPPD, BLENDPS, BLENDPD, BLENDVPS, BLENDVPD,
4// PBLENDVB, PBLENDW, PMINSB, PMAXSB, PMINUW, PMAXUW, PMINUD, PMAXUD, PMINSD, PMAXSD, ROUNDPS,
5// ROUNDSS, ROUNDPD, ROUNDSD, INSERTPS, PINSRB, PINSRD, PINSRQ, EXTRACTPS, PEXTRB, PEXTRW,
6// PEXTRD, PEXTRQ, PMOVSXBW, PMOVZXBW, PMOVSXBD, PMOVZXBD, PMOVSXBQ, PMOVZXBQ, PMOVSXWD,
7// PMOVZXWD, PMOVSXWQ, PMOVZXWQ, PMOVSXDQ, PMOVZXDQ, PTEST, PCMPEQQ, PACKUSDW, MOVNTDQA
8
9@[if amd64 && !tinyc && !msvc]
10fn round_floats_sse4_1(a &f32, result &f32) {
11 unsafe {
12 asm volatile amd64 {
13 movups xmm0, [a] // Load 4 floats from array a into xmm0
14 roundps xmm0, xmm0, 0 // Round to nearest integer
15 movups [result], xmm0 // Store the result in result array
16 ; ; r (a)
17 r (result)
18 ; xmm0
19 }
20 }
21}
22
23fn main() {
24 a := [f32(1.2), 2.5, 3.8, 4.4]
25 result := []f32{len: 4}
26 // Rounding mode 0 corresponds to rounding to the nearest integer
27 round_floats_sse4_1(&a[0], &result[0])
28 println(result)
29 // The expected rounded result should be [1.0, 2.0, 4.0, 4.0]
30 assert result == [f32(1.0), 2.0, 4.0, 4.0]
31}
32