From 118980e02e59ff31871df59dce257075394f3533 Mon Sep 17 00:00:00 2001 From: Aaditya Dhruv Date: Sun, 25 Jan 2026 15:10:37 -0600 Subject: wip --- include/cglm/simd/sse2/quat.h | 54 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 include/cglm/simd/sse2/quat.h (limited to 'include/cglm/simd/sse2/quat.h') diff --git a/include/cglm/simd/sse2/quat.h b/include/cglm/simd/sse2/quat.h new file mode 100644 index 0000000..def0fe2 --- /dev/null +++ b/include/cglm/simd/sse2/quat.h @@ -0,0 +1,54 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_quat_simd_h +#define cglm_quat_simd_h +#if defined( __SSE__ ) || defined( __SSE2__ ) + +#include "../../common.h" +#include "../intrin.h" + +CGLM_INLINE +void +glm_quat_mul_sse2(versor p, versor q, versor dest) { + /* + + (a1 b2 + b1 a2 + c1 d2 − d1 c2)i + + (a1 c2 − b1 d2 + c1 a2 + d1 b2)j + + (a1 d2 + b1 c2 − c1 b2 + d1 a2)k + a1 a2 − b1 b2 − c1 c2 − d1 d2 + */ + + __m128 xp, xq, x1, x2, x3, r, x, y, z; + + xp = glmm_load(p); /* 3 2 1 0 */ + xq = glmm_load(q); + x1 = glmm_float32x4_SIGNMASK_NPNP; /* TODO: _mm_set1_ss() + shuff ? */ + r = _mm_mul_ps(glmm_splat_w(xp), xq); + + x2 = _mm_unpackhi_ps(x1, x1); + x3 = glmm_shuff1(x1, 3, 2, 0, 1); + x = glmm_splat_x(xp); + y = glmm_splat_y(xp); + z = glmm_splat_z(xp); + + x = _mm_xor_ps(x, x1); + y = _mm_xor_ps(y, x2); + z = _mm_xor_ps(z, x3); + + x1 = glmm_shuff1(xq, 0, 1, 2, 3); + x2 = glmm_shuff1(xq, 1, 0, 3, 2); + x3 = glmm_shuff1(xq, 2, 3, 0, 1); + + r = glmm_fmadd(x, x1, r); + r = glmm_fmadd(y, x2, r); + r = glmm_fmadd(z, x3, r); + + glmm_store(dest, r); +} + +#endif +#endif /* cglm_quat_simd_h */ -- cgit