genesis-3d_engine/Engine/foundation/math/OgreMath/asm_math.h

380 lines
7.7 KiB
C
Raw Normal View History

#ifndef __asm_math_H__
#define __asm_math_H__
#include "OgrePrerequisites.h"
#define OGRE_COMPILER_MSVC -1
#if OGRE_COMPILER == OGRE_COMPILER_MSVC
# pragma warning (push)
// disable "instruction may be inaccurate on some Pentiums"
# pragma warning (disable : 4725)
#endif
namespace Ogre
{
/*=============================================================================
ASM math routines posted by davepermen et al on flipcode forums
=============================================================================*/
const float pi = 4.0f * atan( 1.0f );
const float half_pi = 0.5f * pi;
/*=============================================================================
NO EXPLICIT RETURN REQUIRED FROM THESE METHODS!!
=============================================================================*/
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
# pragma warning( push )
# pragma warning( disable: 4035 )
#endif
float asm_arccos( float r ) {
// return half_pi + arctan( r / -sqr( 1.f - r * r ) );
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
#error
float asm_one = 1.f;
float asm_half_pi = half_pi;
__asm {
fld r // r0 = r
fld r // r1 = r0, r0 = r
fmul r // r0 = r0 * r
fsubr asm_one // r0 = r0 - 1.f
fsqrt // r0 = sqrtf( r0 )
fchs // r0 = - r0
fdiv // r0 = r1 / r0
fld1 // {{ r0 = atan( r0 )
fpatan // }}
fadd asm_half_pi // r0 = r0 + pi / 2
} // returns r0
#else
return float( acos( r ) );
#endif
}
float asm_arcsin( float r ) {
// return arctan( r / sqr( 1.f - r * r ) );
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
const float asm_one = 1.f;
__asm {
fld r // r0 = r
fld r // r1 = r0, r0 = r
fmul r // r0 = r0 * r
fsubr asm_one // r0 = r0 - 1.f
fsqrt // r0 = sqrtf( r0 )
fdiv // r0 = r1 / r0
fld1 // {{ r0 = atan( r0 )
fpatan // }}
} // returns r0
#else
return float( asin( r ) );
#endif
}
float asm_arctan( float r ) {
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
__asm {
fld r // r0 = r
fld1 // {{ r0 = atan( r0 )
fpatan // }}
} // returns r0
#else
return float( atan( r ) );
#endif
}
float asm_sin( float r ) {
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
__asm {
fld r // r0 = r
fsin // r0 = sinf( r0 )
} // returns r0
#else
return sin( r );
#endif
}
float asm_cos( float r ) {
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
__asm {
fld r // r0 = r
fcos // r0 = cosf( r0 )
} // returns r0
#else
return cos( r );
#endif
}
float asm_tan( float r ) {
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
// return sin( r ) / cos( r );
__asm {
fld r // r0 = r
fsin // r0 = sinf( r0 )
fld r // r1 = r0, r0 = r
fcos // r0 = cosf( r0 )
fdiv // r0 = r1 / r0
} // returns r0
#else
return tan( r );
#endif
}
// returns a for a * a = r
float asm_sqrt( float r )
{
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
__asm {
fld r // r0 = r
fsqrt // r0 = sqrtf( r0 )
} // returns r0
#else
return sqrt( r );
#endif
}
// returns 1 / a for a * a = r
// -- Use this for Vector normalisation!!!
float asm_rsq( float r )
{
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
__asm {
fld1 // r0 = 1.f
fld r // r1 = r0, r0 = r
fsqrt // r0 = sqrtf( r0 )
fdiv // r0 = r1 / r0
} // returns r0
#else
return 1.0f / sqrt( r );
#endif
}
// returns 1 / a for a * a = r
// Another version
float apx_rsq( float r ) {
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
const float asm_dot5 = 0.5f;
const float asm_1dot5 = 1.5f;
__asm {
fld r // r0 = r
fmul asm_dot5 // r0 = r0 * .5f
mov eax, r // eax = r
shr eax, 0x1 // eax = eax >> 1
neg eax // eax = -eax
add eax, 0x5F400000 // eax = eax & MAGICAL NUMBER
mov r, eax // r = eax
fmul r // r0 = r0 * r
fmul r // r0 = r0 * r
fsubr asm_1dot5 // r0 = 1.5f - r0
fmul r // r0 = r0 * r
} // returns r0
#else
return 1.0f / sqrt( r );
#endif
}
/* very MS-specific, commented out for now
Finally the best InvSqrt implementation?
Use for vector normalisation instead of 1/length() * x,y,z
*/
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
__declspec(naked) float __fastcall InvSqrt(float fValue)
{
__asm
{
mov eax, 0be6eb508h
mov dword ptr[esp-12],03fc00000h
sub eax, dword ptr[esp + 4]
sub dword ptr[esp+4], 800000h
shr eax, 1
mov dword ptr[esp - 8], eax
fld dword ptr[esp - 8]
fmul st, st
fld dword ptr[esp - 8]
fxch st(1)
fmul dword ptr[esp + 4]
fld dword ptr[esp - 12]
fld st(0)
fsub st,st(2)
fld st(1)
fxch st(1)
fmul st(3),st
fmul st(3),st
fmulp st(4),st
fsub st,st(2)
fmul st(2),st
fmul st(3),st
fmulp st(2),st
fxch st(1)
fsubp st(1),st
fmulp st(1), st
ret 4
}
}
#endif
// returns a random number
FORCEINLINE float asm_rand()
{
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
#if 0
#if OGRE_COMP_VER >= 1300
static unsigned __int64 q = time( NULL );
_asm {
movq mm0, q
// do the magic MMX thing
pshufw mm1, mm0, 0x1E
paddd mm0, mm1
// move to integer memory location and free MMX
movq q, mm0
emms
}
return float( q );
#endif
#else
// VC6 does not support pshufw
return float( rand() );
#endif
#else
// GCC etc
return float( rand() );
#endif
}
// returns the maximum random number
FORCEINLINE float asm_rand_max()
{
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
#if 0
#if OGRE_COMP_VER >= 1300
return (std::numeric_limits< unsigned __int64 >::max)();
return 9223372036854775807.0f;
#endif
#else
// VC6 does not support unsigned __int64
return float( RAND_MAX );
#endif
#else
// GCC etc
return float( RAND_MAX );
#endif
}
// returns log2( r ) / log2( e )
float asm_ln( float r ) {
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
const float asm_1_div_log2_e = .693147180559f;
const float asm_neg1_div_3 = -.33333333333333333333333333333f;
const float asm_neg2_div_3 = -.66666666666666666666666666667f;
const float asm_2 = 2.f;
int log_2 = 0;
__asm {
// log_2 = ( ( r >> 0x17 ) & 0xFF ) - 0x80;
mov eax, r
sar eax, 0x17
and eax, 0xFF
sub eax, 0x80
mov log_2, eax
// r = ( r & 0x807fffff ) + 0x3f800000;
mov ebx, r
and ebx, 0x807FFFFF
add ebx, 0x3F800000
mov r, ebx
// r = ( asm_neg1_div_3 * r + asm_2 ) * r + asm_neg2_div_3; // (1)
fld r
fmul asm_neg1_div_3
fadd asm_2
fmul r
fadd asm_neg2_div_3
fild log_2
fadd
fmul asm_1_div_log2_e
}
#else
return log( r );
#endif
}
#if OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_ARCH_TYPE == OGRE_ARCHITECTURE_32
# pragma warning( pop )
#endif
} // namespace
#if OGRE_COMPILER == OGRE_COMPILER_MSVC
# pragma warning (pop)
#endif
#endif