mirror of
https://github.com/vlang/v.git
synced 2025-09-07 22:30:57 -04:00
43 lines
1.6 KiB
V
43 lines
1.6 KiB
V
// SSE Instruction Set
|
|
// SSE2: Added with Pentium 4
|
|
// Floating-point Instructions:
|
|
// ADDPD, ADDSD, ANDNPD, ANDPD, CMPPD, CMPSD*, COMISD, CVTDQ2PD, CVTDQ2PS, CVTPD2DQ, CVTPD2PI,
|
|
// CVTPD2PS, CVTPI2PD, CVTPS2DQ, CVTPS2PD, CVTSD2SI, CVTSD2SS, CVTSI2SD, CVTSS2SD, CVTTPD2DQ,
|
|
// CVTTPD2PI, CVTTPS2DQ, CVTTSD2SI, DIVPD, DIVSD, MAXPD, MAXSD, MINPD, MINSD, MOVAPD, MOVHPD,
|
|
// MOVLPD, MOVMSKPD, MOVSD*, MOVUPD, MULPD, MULSD, ORPD, SHUFPD, SQRTPD, SQRTSD, SUBPD, SUBSD,
|
|
// UCOMISD, UNPCKHPD, UNPCKLPD, XORPD
|
|
// * CMPSD and MOVSD have the same name as the string instruction mnemonics CMPSD (CMPS) and
|
|
// MOVSD (MOVS); however, the former refer to scalar double-precision floating-points whereas
|
|
// the latter refer to doubleword strings.
|
|
// Integer Instructions:
|
|
// MOVDQ2Q, MOVDQA, MOVDQU, MOVQ2DQ, PADDQ, PSUBQ, PMULUDQ, PSHUFHW, PSHUFLW, PSHUFD, PSLLDQ, PSRLDQ, PUNPCKHQDQ, PUNPCKLQDQ
|
|
// The MULPD instruction multiplies two vectors of doubles using SSE2 instructions.
|
|
|
|
@[if amd64 && !tinyc && !msvc]
|
|
fn multiply_vectors_sse2(a &f64, b &f64, result &f64) {
|
|
unsafe {
|
|
asm volatile amd64 {
|
|
movupd xmm0, [a] // Load 2 doubles from array a into SSE2 register xmm0
|
|
movupd xmm1, [b] // Load 2 doubles from array b into SSE2 register xmm1
|
|
mulpd xmm0, xmm1 // Multiply the two vectors using SSE2 instruction
|
|
movupd [result], xmm0 // Store the result back to memory
|
|
; ; r (a)
|
|
r (b)
|
|
r (result)
|
|
; xmm0
|
|
xmm1
|
|
}
|
|
}
|
|
}
|
|
|
|
fn main() {
|
|
a := [f64(1.5), 2.5]
|
|
b := [f64(3.5), 4.5]
|
|
result := []f64{len: 2}
|
|
multiply_vectors_sse2(&a[0], &b[0], &result[0])
|
|
println(result)
|
|
// 5.25 = 1.5 * 3.5
|
|
// 11.25 = 2.5 * 4.5
|
|
assert result == [f64(5.25), 11.25]
|
|
}
|