The Windows 8.0 SDK includes DirectXMath version 3.03 for use with Windows Style apps and Win32 desktop applications on Windows 8, Windows RT, Windows 7, and Windows Vista. DirectXMath 3.03 is also part of the Windows phone 8.0 SDK for use on Windows phone 8. There are a number of minor bugs in the library that have been reported by customers since it was released, which will be addressed in future SDK releases. In the meantime, since the code is all inline in the headers, you can make the fix directly to a local copy as needed or work around the issue in your own code.
XMVector3Cross
The ARM-NEON implementation leaves the .w component undefined instead of setting it to zero as the other versions do. The fix is to change DirectXMathVector.inl
line 7678.
// Original code
return veorq_u32( vResult, g_XMFlipY );
// Corrected code
vResult = veorq_u32( vResult, g_XMFlipY );
return vandq_u32( vResult, g_XMMask3 );
XMVectorFloor and XMVectorCeiling
These functions use a naïve implementation that fails when given an odd whole number (such as 105.0) which causes the answer to jump to 104.0 due to round-to-nearest (even) behavior. The solution is to replace these functions with a different implementation in DirectXMathVector.inl
starting on line 2426.
inline XMVECTOR XMVectorFloor
(
FXMVECTOR V
)
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTOR vResult = {
floorf(V.vector4_f32[0]),
floorf(V.vector4_f32[1]),
floorf(V.vector4_f32[2]),
floorf(V.vector4_f32[3])
};
return vResult;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x4_t vTest = vabsq_f32( V );
vTest = vcltq_f32( vTest, g_XMNoFraction );
// Truncate
int32x4_t vInt = vcvtq_s32_f32( V );
XMVECTOR vResult = vcvtq_f32_s32( vInt );
XMVECTOR vLarger = vcgtq_f32( vResult, V );
// 0 -> 0, 0xffffffff -> -1.0f
vLarger = vcvtq_f32_s32( vLarger );
vResult = vaddq_f32( vResult, vLarger );
// All numbers less than 8388608 will use the round to int
// All others, use the ORIGINAL value
return vbslq_f32( vTest, vResult, V );
#elif defined(_XM_SSE_INTRINSICS_)
// To handle NAN, INF and numbers greater than 8388608, use masking
__m128i vTest = _mm_and_si128(_mm_castps_si128(V),g_XMAbsMask);
vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
// Truncate
__m128i vInt = _mm_cvttps_epi32(V);
XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
__m128 vLarger = _mm_cmpgt_ps( vResult, V );
// 0 -> 0, 0xffffffff -> -1.0f
vLarger = _mm_cvtepi32_ps( _mm_castps_si128( vLarger ) );
vResult = _mm_add_ps( vResult, vLarger );
// All numbers less than 8388608 will use the round to int
vResult = _mm_and_ps(vResult,_mm_castsi128_ps(vTest));
// All others, use the ORIGINAL value
vTest = _mm_andnot_si128(vTest,_mm_castps_si128(V));
vResult = _mm_or_ps(vResult,_mm_castsi128_ps(vTest));
return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}
and DirectXMathVector.inl
starting on line 2467
inline XMVECTOR XMVectorCeiling
(
FXMVECTOR V
)
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTOR vResult = {
ceilf(V.vector4_f32[0]),
ceilf(V.vector4_f32[1]),
ceilf(V.vector4_f32[2]),
ceilf(V.vector4_f32[3])
};
return vResult;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x4_t vTest = vabsq_f32( V );
vTest = vcltq_f32( vTest, g_XMNoFraction );
// Truncate
int32x4_t vInt = vcvtq_s32_f32( V );
XMVECTOR vResult = vcvtq_f32_s32( vInt );
XMVECTOR vSmaller = vcltq_f32( vResult, V );
// 0 -> 0, 0xffffffff -> -1.0f
vSmaller = vcvtq_f32_s32( vSmaller );
vResult = vsubq_f32( vResult, vSmaller );
// All numbers less than 8388608 will use the round to int
// All others, use the ORIGINAL value
return vbslq_f32( vTest, vResult, V );
#elif defined(_XM_SSE_INTRINSICS_)
// To handle NAN, INF and numbers greater than 8388608, use masking
__m128i vTest = _mm_and_si128(_mm_castps_si128(V),g_XMAbsMask);
vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
// Truncate
__m128i vInt = _mm_cvttps_epi32(V);
XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
__m128 vSmaller = _mm_cmplt_ps( vResult, V );
// 0 -> 0, 0xffffffff -> -1.0f
vSmaller = _mm_cvtepi32_ps( _mm_castps_si128( vSmaller ) );
vResult = _mm_sub_ps( vResult, vSmaller );
// All numbers less than 8388608 will use the round to int
vResult = _mm_and_ps(vResult,_mm_castsi128_ps(vTest));
// All others, use the ORIGINAL value
vTest = _mm_andnot_si128(vTest,_mm_castps_si128(V));
vResult = _mm_or_ps(vResult,_mm_castsi128_ps(vTest));
return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}
This problem does not apply to the SSE 4.1 versions of these functions.
XMConvertHalfToFloat and XMConvertFloatToHalf
These functions convert to the Xbox 360 variant of float16 rather than the IEEE 754 standard version of float16. This means values greater than +- 65504.0 map to QNAN rather than +/- INF as would be expected. The implementation makes sense for XNAMath (aka xboxmath 2.x), but doesn't make any sense in DirectXMath since it does not support the Xbox 360 platform. The solution is to change DirectXPackedVector.inl
starting on line 34.
// Original code
uint32_t Exponent;
if ((Value & 0x7C00) != 0) // The value is normalized
{
Exponent = (uint32_t)((Value >> 10) & 0x1F);
}
// Corrected code
uint32_t Exponent = (Value & 0x7C00);
if ( Exponent == 0x7C00 ) // INF/NAN
{
Exponent = (uint32_t)143;
}
else if (Exponent != 0) // The value is normalized
{
Exponent = (uint32_t)((Value >> 10) & 0x1F);
}
and in DirectXPackedVector.inl
starting on line 111.
// Original code
if (IValue > 0x47FFEFFFU)
{
// The number is too large to be represented as a half. Saturate to infinity.
Result = 0x7FFFU;
}
else
// Corrected code
if (IValue > 0x477FE000U)
{
// The number is too large to be represented as a half. Saturate to infinity.
if (((IValue & 0x7F800000) == 0x7F800000) && ((IValue & 0x7FFFFF ) != 0))
{
Result = 0x7FFF; // NAN
}
else
{
Result = 0x7C00U; // INF
}
}
else
This problem does not apply to the F16C / CVT16 versions of these functions
BoundingOrientedBox::Transform and BoundFrustum::Transform
The matrix form of these functions do not properly handle scaling transformations. The same change is applied in DirectXCollision.inl
on line 1952 and again on line 2824
//Original code
XMVECTOR Rotation = XMQuaternionRotationMatrix( M );
// Corrected code
XMMATRIX nM;
nM.r[0] = XMVector3Normalize( M.r[0] );
nM.r[1] = XMVector3Normalize( M.r[1] );
nM.r[2] = XMVector3Normalize( M.r[2] );
nM.r[3] = g_XMIdentityR3;
XMVECTOR Rotation = XMQuaternionRotationMatrix( nM );
XMStoreFloat3PK and XMStoreFloat3SE
These functions have some minor typos in the exact bits that are used in specials-generation. This doesn't really impact the functionality in any obvious way, but it's also an easy fix. In DirectXPackedVector.inl
on line 1709.
// Original code
Result[j] = 0x7c0 | (((I>>17)|(I>11)|(I>>6)|(I))&0x3f);
// Corrected code
Result[j] = 0x7c0 | (((I>>17)|(I>>11)|(I>>6)|(I))&0x3f);
and DirectXPackedVector.inl
line 1756.
// Original code
Result[2] = 0x3e0 | (((I>>18)|(I>13)|(I>>3)|(I))&0x1f);
// Corrected code
Result[2] = 0x3e0 | (((I>>18)|(I>>13)|(I>>3)|(I))&0x1f);
and DirectXPackedVector.inl
line 1826.
// Original code
Frac[j] = ((I>>14)|(I>5)|(I))&0x1ff;
// Corrected code
Frac[j] = ((I>>14)|(I>>5)|(I))&0x1ff;
Note: Attached are the relevant files with these fixes applied. It requires you use the rest of the library in the Windows 8.0 SDK or the Windows phone 8.0 SDK, and the code is subject to the respective SDK's license agreement. Was refreshed on March 7, 2013