rootHEAD master

author: Nikolas <nikolas@boutalas.com> 2024-10-27 12:52:55 +0200
committer: Nikolas <nikolas@boutalas.com> 2024-10-27 12:52:55 +0200
commit: 43394c8a8908442982e3a7e25975c31b3c952923 (patch)
tree: 2facd563e29f48fe3b0653ac5c113998940b4d5e /3rdparty/include/glm/gtx/simd_vec4.inl
1 files changed, 727 insertions, 0 deletions
diff --git a/3rdparty/include/glm/gtx/simd_vec4.inl b/3rdparty/include/glm/gtx/simd_vec4.inl
new file mode 100644
index 0000000..ba5387c
--- /dev/null
+++ b/3rdparty/include/glm/gtx/simd_vec4.inl
@@ -0,0 +1,727 @@
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net)
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Created : 2009-05-07
+// Updated : 2009-05-07
+// Licence : This source is under MIT License
+// File    : glm/gtx/simd_vec4.inl
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace glm{
+namespace detail{
+
+template <int Value>
+struct shuffle_mask
+{
+	enum{value = Value};
+};
+
+//////////////////////////////////////
+// Implicit basic constructors
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD()
+#	ifdef GLM_FORCE_NO_CTOR_INIT
+		: Data(_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))
+#	endif
+{}
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(__m128 const & Data) :
+	Data(Data)
+{}
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(fvec4SIMD const & v) :
+	Data(v.Data)
+{}
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec4 const & v) :
+	Data(_mm_set_ps(v.w, v.z, v.y, v.x))
+{}
+
+//////////////////////////////////////
+// Explicit basic constructors
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s) :
+	Data(_mm_set1_ps(s))
+{}
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & x, float const & y, float const & z, float const & w) :
+//		Data(_mm_setr_ps(x, y, z, w))
+	Data(_mm_set_ps(w, z, y, x))
+{}
+/*
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const v[4]) :
+	Data(_mm_load_ps(v))
+{}
+*/
+//////////////////////////////////////
+// Swizzle constructors
+
+//fvec4SIMD(ref4<float> const & r);
+
+//////////////////////////////////////
+// Conversion vector constructors
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v, float const & s1, float const & s2) :
+	Data(_mm_set_ps(s2, s1, v.y, v.x))
+{}
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, vec2 const & v, float const & s2) :
+	Data(_mm_set_ps(s2, v.y, v.x, s1))
+{}
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, float const & s2, vec2 const & v) :
+	Data(_mm_set_ps(v.y, v.x, s2, s1))
+{}
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec3 const & v, float const & s) :
+	Data(_mm_set_ps(s, v.z, v.y, v.x))
+{}
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s, vec3 const & v) :
+	Data(_mm_set_ps(v.z, v.y, v.x, s))
+{}
+
+GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v1, vec2 const & v2) :
+	Data(_mm_set_ps(v2.y, v2.x, v1.y, v1.x))
+{}
+
+//GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(ivec4SIMD const & v) :
+//	Data(_mm_cvtepi32_ps(v.Data))
+//{}
+
+//////////////////////////////////////
+// Unary arithmetic operators
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator=(fvec4SIMD const & v)
+{
+	this->Data = v.Data;
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(float const & s)
+{
+	this->Data = _mm_add_ps(Data, _mm_set_ps1(s));
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(fvec4SIMD const & v)
+{
+	this->Data = _mm_add_ps(this->Data , v.Data);
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(float const & s)
+{
+	this->Data = _mm_sub_ps(Data, _mm_set_ps1(s));
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(fvec4SIMD const & v)
+{
+	this->Data = _mm_sub_ps(this->Data , v.Data);
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(float const & s)
+{
+	this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(fvec4SIMD const & v)
+{
+	this->Data = _mm_mul_ps(this->Data , v.Data);
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(float const & s)
+{
+	this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(fvec4SIMD const & v)
+{
+	this->Data = _mm_div_ps(this->Data , v.Data);
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator++()
+{
+	this->Data = _mm_add_ps(this->Data , glm::detail::one);
+	return *this;
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator--()
+{
+	this->Data = _mm_sub_ps(this->Data, glm::detail::one);
+	return *this;
+}
+
+//////////////////////////////////////
+// Swizzle operators
+
+template <comp X, comp Y, comp Z, comp W>
+GLM_FUNC_QUALIFIER fvec4SIMD fvec4SIMD::swizzle() const
+{
+	__m128 Data = _mm_shuffle_ps(
+		this->Data, this->Data, 
+		shuffle_mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value);
+	return fvec4SIMD(Data);
+}
+
+template <comp X, comp Y, comp Z, comp W>
+GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::swizzle()
+{
+	this->Data = _mm_shuffle_ps(
+		this->Data, this->Data, 
+		shuffle_mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value);
+	return *this;
+}
+
+// operator+
+GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v, float s)
+{
+	return fvec4SIMD(_mm_add_ps(v.Data, _mm_set1_ps(s)));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator+ (float s, fvec4SIMD const & v)
+{
+	return fvec4SIMD(_mm_add_ps(_mm_set1_ps(s), v.Data));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v1, fvec4SIMD const & v2)
+{
+	return fvec4SIMD(_mm_add_ps(v1.Data, v2.Data));
+}
+
+//operator-
+GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v, float s)
+{
+	return fvec4SIMD(_mm_sub_ps(v.Data, _mm_set1_ps(s)));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator- (float s, fvec4SIMD const & v)
+{
+	return fvec4SIMD(_mm_sub_ps(_mm_set1_ps(s), v.Data));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v1, fvec4SIMD const & v2)
+{
+	return fvec4SIMD(_mm_sub_ps(v1.Data, v2.Data));
+}
+
+//operator*
+GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, float s)
+{
+	__m128 par0 = v.Data;
+	__m128 par1 = _mm_set1_ps(s);
+	return fvec4SIMD(_mm_mul_ps(par0, par1));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator* (float s, fvec4SIMD const & v)
+{
+	__m128 par0 = _mm_set1_ps(s);
+	__m128 par1 = v.Data;
+	return fvec4SIMD(_mm_mul_ps(par0, par1));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v1, fvec4SIMD const & v2)
+{
+	return fvec4SIMD(_mm_mul_ps(v1.Data, v2.Data));
+}
+
+//operator/
+GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v, float s)
+{
+	__m128 par0 = v.Data;
+	__m128 par1 = _mm_set1_ps(s);
+	return fvec4SIMD(_mm_div_ps(par0, par1));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator/ (float s, fvec4SIMD const & v)
+{
+	__m128 par0 = _mm_set1_ps(s);
+	__m128 par1 = v.Data;
+	return fvec4SIMD(_mm_div_ps(par0, par1));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v1, fvec4SIMD const & v2)
+{
+	return fvec4SIMD(_mm_div_ps(v1.Data, v2.Data));
+}
+
+// Unary constant operators
+GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v)
+{
+	return fvec4SIMD(_mm_sub_ps(_mm_setzero_ps(), v.Data));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator++ (fvec4SIMD const & v, int)
+{
+	return fvec4SIMD(_mm_add_ps(v.Data, glm::detail::one));
+}
+
+GLM_FUNC_QUALIFIER fvec4SIMD operator-- (fvec4SIMD const & v, int)
+{
+	return fvec4SIMD(_mm_sub_ps(v.Data, glm::detail::one));
+}
+
+}//namespace detail
+
+GLM_FUNC_QUALIFIER vec4 vec4_cast
+(
+	detail::fvec4SIMD const & x
+)
+{
+	GLM_ALIGN(16) vec4 Result;
+	_mm_store_ps(&Result[0], x.Data);
+	return Result;
+}
+
+// Other possible implementation
+//float abs(float a)
+//{
+//  return max(-a, a);
+//}
+GLM_FUNC_QUALIFIER detail::fvec4SIMD abs
+(
+	detail::fvec4SIMD const & x
+)
+{
+	return detail::sse_abs_ps(x.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD sign
+(
+	detail::fvec4SIMD const & x
+)
+{
+	return detail::sse_sgn_ps(x.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD floor
+(
+	detail::fvec4SIMD const & x
+)
+{
+	return detail::sse_flr_ps(x.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD trunc
+(
+	detail::fvec4SIMD const & x
+)
+{
+    //return x < 0 ? -floor(-x) : floor(x);
+
+	__m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data));
+	__m128 Sub0 = _mm_sub_ps(Flr0, x.Data);
+	__m128 Flr1 = detail::sse_flr_ps(x.Data);
+
+	__m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero);
+	__m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero);
+
+	__m128 And0 = _mm_and_ps(Sub0, Cmp0);
+	__m128 And1 = _mm_and_ps(Flr1, Cmp1);
+
+	return _mm_or_ps(And0, And1);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD round
+(
+	detail::fvec4SIMD const & x
+)
+{
+	return detail::sse_rnd_ps(x.Data);
+}
+
+//GLM_FUNC_QUALIFIER detail::fvec4SIMD roundEven
+//(
+//	detail::fvec4SIMD const & x
+//)
+//{
+
+//}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD ceil
+(
+	detail::fvec4SIMD const & x
+)
+{
+	return detail::sse_ceil_ps(x.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD fract
+(
+	detail::fvec4SIMD const & x
+)
+{
+	return detail::sse_frc_ps(x.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD mod
+(
+	detail::fvec4SIMD const & x, 
+	detail::fvec4SIMD const & y
+)
+{
+	return detail::sse_mod_ps(x.Data, y.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD mod
+(
+	detail::fvec4SIMD const & x, 
+	float const & y
+)
+{
+	return detail::sse_mod_ps(x.Data, _mm_set1_ps(y));
+}
+
+//GLM_FUNC_QUALIFIER detail::fvec4SIMD modf
+//(
+//	detail::fvec4SIMD const & x, 
+//	detail::fvec4SIMD & i
+//)
+//{
+
+//}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD min
+(
+	detail::fvec4SIMD const & x, 
+	detail::fvec4SIMD const & y
+)
+{
+	return _mm_min_ps(x.Data, y.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD min
+(
+	detail::fvec4SIMD const & x, 
+	float const & y
+)
+{
+	return _mm_min_ps(x.Data, _mm_set1_ps(y));
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD max
+(
+	detail::fvec4SIMD const & x, 
+	detail::fvec4SIMD const & y
+)
+{
+	return _mm_max_ps(x.Data, y.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD max
+(
+	detail::fvec4SIMD const & x, 
+	float const & y
+)
+{
+	return _mm_max_ps(x.Data, _mm_set1_ps(y));
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp
+(
+	detail::fvec4SIMD const & x, 
+	detail::fvec4SIMD const & minVal, 
+	detail::fvec4SIMD const & maxVal
+)
+{
+	return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp
+(
+	detail::fvec4SIMD const & x, 
+	float const & minVal, 
+	float const & maxVal
+) 
+{
+	return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal));
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD mix
+(
+	detail::fvec4SIMD const & x, 
+	detail::fvec4SIMD const & y, 
+	detail::fvec4SIMD const & a
+)
+{
+	__m128 Sub0 = _mm_sub_ps(y.Data, x.Data);
+	__m128 Mul0 = _mm_mul_ps(a.Data, Sub0);
+	return _mm_add_ps(x.Data, Mul0);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD step
+(
+	detail::fvec4SIMD const & edge, 
+	detail::fvec4SIMD const & x
+)
+{
+	__m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data);
+	return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD step
+(
+	float const & edge, 
+	detail::fvec4SIMD const & x
+)
+{
+	__m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge));
+	return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep
+(
+	detail::fvec4SIMD const & edge0, 
+	detail::fvec4SIMD const & edge1, 
+	detail::fvec4SIMD const & x
+)
+{
+	return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep
+(
+	float const & edge0, 
+	float const & edge1, 
+	detail::fvec4SIMD const & x
+)
+{
+	return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data);
+}
+
+//GLM_FUNC_QUALIFIER bvec4 isnan(detail::fvec4SIMD const & x)
+//{
+
+//}
+
+//GLM_FUNC_QUALIFIER bvec4 isinf(detail::fvec4SIMD const & x)
+//{
+
+//}
+
+//GLM_FUNC_QUALIFIER detail::ivec4SIMD floatBitsToInt
+//(
+//	detail::fvec4SIMD const & value
+//)
+//{
+
+//}
+
+//GLM_FUNC_QUALIFIER detail::fvec4SIMD intBitsToFloat
+//(
+//	detail::ivec4SIMD const & value
+//)
+//{
+
+//}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD fma
+(
+	detail::fvec4SIMD const & a, 
+	detail::fvec4SIMD const & b, 
+	detail::fvec4SIMD const & c
+)
+{
+	return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
+}
+
+GLM_FUNC_QUALIFIER float length
+(
+	detail::fvec4SIMD const & x
+)
+{
+	detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
+	detail::fvec4SIMD sqt0 = sqrt(dot0);
+	float Result = 0;
+	_mm_store_ss(&Result, sqt0.Data);
+	return Result;
+}
+
+GLM_FUNC_QUALIFIER float fastLength
+(
+	detail::fvec4SIMD const & x
+)
+{
+	detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
+	detail::fvec4SIMD sqt0 = fastSqrt(dot0);
+	float Result = 0;
+	_mm_store_ss(&Result, sqt0.Data);
+	return Result;
+}
+
+GLM_FUNC_QUALIFIER float niceLength
+(
+	detail::fvec4SIMD const & x
+)
+{
+	detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
+	detail::fvec4SIMD sqt0 = niceSqrt(dot0);
+	float Result = 0;
+	_mm_store_ss(&Result, sqt0.Data);
+	return Result;
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD length4
+(
+	detail::fvec4SIMD const & x
+)
+{
+	return sqrt(dot4(x, x));
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD fastLength4
+(
+	detail::fvec4SIMD const & x
+)
+{
+	return fastSqrt(dot4(x, x));
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD niceLength4
+(
+	detail::fvec4SIMD const & x
+)
+{
+	return niceSqrt(dot4(x, x));
+}
+
+GLM_FUNC_QUALIFIER float distance
+(
+	detail::fvec4SIMD const & p0,
+	detail::fvec4SIMD const & p1
+)
+{
+	float Result = 0;
+	_mm_store_ss(&Result, detail::sse_dst_ps(p0.Data, p1.Data));
+	return Result;
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD distance4
+(
+	detail::fvec4SIMD const & p0,
+	detail::fvec4SIMD const & p1
+)
+{
+	return detail::sse_dst_ps(p0.Data, p1.Data);
+}
+
+GLM_FUNC_QUALIFIER float dot
+(
+	detail::fvec4SIMD const & x,
+	detail::fvec4SIMD const & y
+)
+{
+	float Result = 0;
+	_mm_store_ss(&Result, detail::sse_dot_ss(x.Data, y.Data));
+	return Result;
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD dot4
+(
+	detail::fvec4SIMD const & x,
+	detail::fvec4SIMD const & y
+)
+{
+	return detail::sse_dot_ps(x.Data, y.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD cross
+(
+	detail::fvec4SIMD const & x,
+	detail::fvec4SIMD const & y
+)
+{
+	return detail::sse_xpd_ps(x.Data, y.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD normalize
+(
+	detail::fvec4SIMD const & x
+)
+{
+	__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
+	__m128 isr0 = inversesqrt(detail::fvec4SIMD(dot0)).Data;
+	__m128 mul0 = _mm_mul_ps(x.Data, isr0);
+	return mul0;
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD fastNormalize
+(
+	detail::fvec4SIMD const & x
+)
+{
+	__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
+	__m128 isr0 = fastInversesqrt(dot0).Data;
+	__m128 mul0 = _mm_mul_ps(x.Data, isr0);
+	return mul0;
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD faceforward
+(
+	detail::fvec4SIMD const & N,
+	detail::fvec4SIMD const & I,
+	detail::fvec4SIMD const & Nref
+)
+{
+	return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD reflect
+(
+	detail::fvec4SIMD const & I,
+	detail::fvec4SIMD const & N
+)
+{
+	return detail::sse_rfe_ps(I.Data, N.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD refract
+(
+	detail::fvec4SIMD const & I,
+	detail::fvec4SIMD const & N,
+	float const & eta
+)
+{
+	return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x)
+{
+	return _mm_mul_ps(inversesqrt(x).Data, x.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x)
+{
+	return _mm_sqrt_ps(x.Data);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x)
+{
+	return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data);
+}
+
+// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
+// By Elan Ruskin, http://assemblyrequired.crashworks.org/
+GLM_FUNC_QUALIFIER detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x)
+{
+	GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
+	GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
+
+	__m128 recip = _mm_rsqrt_ps(x.Data);  // "estimate" opcode
+	__m128 halfrecip = _mm_mul_ps(half, recip);
+	__m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip)));
+	return _mm_mul_ps(halfrecip, threeminus_xrr);
+}
+
+GLM_FUNC_QUALIFIER detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x)
+{
+	return _mm_rsqrt_ps(x.Data);
+}
+
+}//namespace glm
author	Nikolas <nikolas@boutalas.com>	2024-10-27 12:52:55 +0200
committer	Nikolas <nikolas@boutalas.com>	2024-10-27 12:52:55 +0200
commit	43394c8a8908442982e3a7e25975c31b3c952923 (patch)
tree	2facd563e29f48fe3b0653ac5c113998940b4d5e /3rdparty/include/glm/gtx/simd_vec4.inl