32 lines
1.1 KiB
V

// SSE Instruction Set
// SSE4.1: Added with later Core 2
// MPSADBW, PHMINPOSUW, PMULLD, PMULDQ, DPPS, DPPD, BLENDPS, BLENDPD, BLENDVPS, BLENDVPD,
// PBLENDVB, PBLENDW, PMINSB, PMAXSB, PMINUW, PMAXUW, PMINUD, PMAXUD, PMINSD, PMAXSD, ROUNDPS,
// ROUNDSS, ROUNDPD, ROUNDSD, INSERTPS, PINSRB, PINSRD, PINSRQ, EXTRACTPS, PEXTRB, PEXTRW,
// PEXTRD, PEXTRQ, PMOVSXBW, PMOVZXBW, PMOVSXBD, PMOVZXBD, PMOVSXBQ, PMOVZXBQ, PMOVSXWD,
// PMOVZXWD, PMOVSXWQ, PMOVZXWQ, PMOVSXDQ, PMOVZXDQ, PTEST, PCMPEQQ, PACKUSDW, MOVNTDQA
@[if amd64 && !tinyc && !msvc]
fn round_floats_sse4_1(a &f32, result &f32) {
unsafe {
asm volatile amd64 {
movups xmm0, [a] // Load 4 floats from array a into xmm0
roundps xmm0, xmm0, 0 // Round to nearest integer
movups [result], xmm0 // Store the result in result array
; ; r (a)
r (result)
; xmm0
}
}
}
fn main() {
a := [f32(1.2), 2.5, 3.8, 4.4]
result := []f32{len: 4}
// Rounding mode 0 corresponds to rounding to the nearest integer
round_floats_sse4_1(&a[0], &result[0])
println(result)
// The expected rounded result should be [1.0, 2.0, 4.0, 4.0]
assert result == [f32(1.0), 2.0, 4.0, 4.0]
}