Gitly


1 // SSE Instruction Set
2 // SSE4.1: Added with later Core 2
3 // MPSADBW, PHMINPOSUW, PMULLD, PMULDQ, DPPS, DPPD, BLENDPS, BLENDPD, BLENDVPS, BLENDVPD,
4 // PBLENDVB, PBLENDW, PMINSB, PMAXSB, PMINUW, PMAXUW, PMINUD, PMAXUD, PMINSD, PMAXSD, ROUNDPS,
5 // ROUNDSS, ROUNDPD, ROUNDSD, INSERTPS, PINSRB, PINSRD, PINSRQ, EXTRACTPS, PEXTRB, PEXTRW,
6 // PEXTRD, PEXTRQ, PMOVSXBW, PMOVZXBW, PMOVSXBD, PMOVZXBD, PMOVSXBQ, PMOVZXBQ, PMOVSXWD,
7 // PMOVZXWD, PMOVSXWQ, PMOVZXWQ, PMOVSXDQ, PMOVZXDQ, PTEST, PCMPEQQ, PACKUSDW, MOVNTDQA
8 
9 @[if amd64 && !tinyc && !msvc]
10 fn round_floats_sse4_1(a &f32, result &f32) {
11     unsafe {
12         asm volatile amd64 {
13             movups xmm0, [a] // Load 4 floats from array a into xmm0
14             roundps xmm0, xmm0, 0 // Round to nearest integer
15             movups [result], xmm0 // Store the result in result array
16             ; ; r (a)
17               r (result)
18             ; xmm0
19         }
20     }
21 }
22 
23 fn main() {
24     a := [f32(1.2), 2.5, 3.8, 4.4]
25     result := []f32{len: 4}
26     // Rounding mode 0 corresponds to rounding to the nearest integer
27     round_floats_sse4_1(&a[0], &result[0])
28     println(result)
29     // The expected rounded result should be [1.0, 2.0, 4.0, 4.0]
30     assert result == [f32(1.0), 2.0, 4.0, 4.0]
31 }
32

1	// SSE Instruction Set
2	// SSE4.1: Added with later Core 2
3	// MPSADBW, PHMINPOSUW, PMULLD, PMULDQ, DPPS, DPPD, BLENDPS, BLENDPD, BLENDVPS, BLENDVPD,
4	// PBLENDVB, PBLENDW, PMINSB, PMAXSB, PMINUW, PMAXUW, PMINUD, PMAXUD, PMINSD, PMAXSD, ROUNDPS,
5	// ROUNDSS, ROUNDPD, ROUNDSD, INSERTPS, PINSRB, PINSRD, PINSRQ, EXTRACTPS, PEXTRB, PEXTRW,
6	// PEXTRD, PEXTRQ, PMOVSXBW, PMOVZXBW, PMOVSXBD, PMOVZXBD, PMOVSXBQ, PMOVZXBQ, PMOVSXWD,
7	// PMOVZXWD, PMOVSXWQ, PMOVZXWQ, PMOVSXDQ, PMOVZXDQ, PTEST, PCMPEQQ, PACKUSDW, MOVNTDQA
8
9	@[if amd64 && !tinyc && !msvc]
10	fn round_floats_sse4_1(a &f32, result &f32) {
11	unsafe {
12	asm volatile amd64 {
13	movups xmm0, [a] // Load 4 floats from array a into xmm0
14	roundps xmm0, xmm0, 0 // Round to nearest integer
15	movups [result], xmm0 // Store the result in result array
16	; ; r (a)
17	r (result)
18	; xmm0
19	}
20	}
21	}
22
23	fn main() {
24	a := [f32(1.2), 2.5, 3.8, 4.4]
25	result := []f32{len: 4}
26	// Rounding mode 0 corresponds to rounding to the nearest integer
27	round_floats_sse4_1(&a[0], &result[0])
28	println(result)
29	// The expected rounded result should be [1.0, 2.0, 4.0, 4.0]
30	assert result == [f32(1.0), 2.0, 4.0, 4.0]
31	}
32