//////////////////////////////////////////////////////////////////////
//
//	Crytek Common Source code
//	
//	File:Cry_Quat_s.h
//	Description: Common quaternion class implemented with SPU SIMD
//
//	History
//	-June 25,2007: Created by MichaelG, based on Ivo code
//
//////////////////////////////////////////////////////////////////////

#ifndef _CRYQUAT_S_H
#define _CRYQUAT_S_H

//----------------------------------------------------------------------
// Quaternion
//----------------------------------------------------------------------
#if defined(PS3) && defined(__SPU__)

#include <platform.h>
#include <math.h>
#include <spu_intrinsics.h>
#include <vecidx_aos.h>

#define SHUF_X 0x00010203
#define SHUF_Y 0x04050607
#define SHUF_Z 0x08090a0b
#define SHUF_W 0x0c0d0e0f
#define SHUF_A 0x10111213
#define SHUF_B 0x14151617
#define SHUF_C 0x18191a1b
#define SHUF_D 0x1c1d1e1f

#define YZXW (vec_uchar16)(vec_uint4){ SHUF_Y, SHUF_Z, SHUF_X, SHUF_W}
#define SHUF_ZXYW (vec_uchar16)(vec_uint4){ SHUF_Z, SHUF_X, SHUF_Y, SHUF_W}

#define SHUF_ZZCX (vec_uchar16)(vec_uint4){ SHUF_Z, SHUF_Z, SHUF_C, SHUF_X}
#define SHUF_YBYY (vec_uchar16)(vec_uint4){ SHUF_Y, SHUF_B, SHUF_Y, SHUF_Y}
#define SHUF_AXXZ (vec_uchar16)(vec_uint4){ SHUF_A, SHUF_X, SHUF_X, SHUF_Z}
#define SHUF_CCZA (vec_uchar16)(vec_uint4){ SHUF_C, SHUF_C, SHUF_Z, SHUF_A}
#define SHUF_BYBB (vec_uchar16)(vec_uint4){ SHUF_B, SHUF_Y, SHUF_B, SHUF_B}
#define SHUF_XAAC (vec_uchar16)(vec_uint4){ SHUF_X, SHUF_A, SHUF_A, SHUF_C}

ILINE vec_float4 dot4(vec_float4 vec0, vec_float4 vec1)
{
	vec_float4 result;
	result = spu_mul( vec0, vec1 );
	result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
	return spu_add( spu_rlqwbyte( result, 8 ), result );
}

#define QUAT_S_IDENTITY (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
#ifndef VEC_EPSILON
	#define	VEC_EPSILON	(0.05f)
#endif

//do not use a union since this results in a heavy performance drop
struct Quat_s
{
	vec_float4 vec128;

	ILINE Quat_s() {}

	ILINE Quat_s(vec_float4 vf4)
	{
		vec128 = vf4;
	}

	ILINE Quat_s(const Quat_tpl<float>& floatQuat)
	{
//		assert(((uint32)&floatQuat & 15) == 0);
		vec128 = *(vec_float4*)&floatQuat;
	}

	ILINE Quat_s& operator=(const Quat_tpl<float>& floatQuat)
	{
//		assert(((uint32)&floatQuat & 15) == 0);
		vec128 = *(vec_float4*)&floatQuat;
	}

	ILINE operator Quat()
	{
		return *(Quat*)this;
	}

	ILINE vec_float4 get128() const
	{
		return vec128;
	}

	ILINE Quat_s(float W, float X, float Y, float Z) 
	{ 
		vec128 = (vec_float4){ X, Y, Z, W};
	}
	
	ILINE Quat_s( float angle, const Vec3_tpl<float> &axis) 
	{
		vec128 = (vec_float4){axis.x, axis.y, axis.z, angle};
	};

	ILINE Quat_s(type_identity) { vec128 = (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f };}

	ILINE Quat_s& operator =(Quat_s quat)
	{
		vec128 = quat.vec128;
		return *this;
	}

	ILINE const Quat_s operator *(float op) const
	{
		return Quat_s(spu_mul(vec128, spu_splats(op)));
	}

	//flip quaternion. don't confuse this with quaternion-inversion.
	ILINE Quat_s operator-() const 
	{ 
		return Quat_s((vec_float4)spu_xor((vec_uint4)vec128, spu_splats(0x80000000))); 
	};

	ILINE const Quat_s operator +(Quat_s quat) const
	{
		return Quat_s(spu_add(vec128, quat.vec128));
	}

	ILINE const Quat_s operator -(Quat_s quat) const
	{
		return Quat_s(spu_sub(vec128, quat.vec128));
	}

	ILINE Quat_s& operator +=(Quat_s quat)
	{
		*this = *this + quat;
		return *this;
	}

	ILINE Quat_s& operator -=(Quat_s quat)
	{
		*this = *this - quat;
		return *this;
	}

	ILINE Quat_s& operator *=(Quat_s quat)
	{
		*this = *this * quat;
		return *this;
	}

	ILINE const Quat_s operator *(Quat_s quat) const
	{
		vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
		vec_float4 product, l_wxyz, r_wxyz, xy, qw;
		ldata = vec128;
		rdata = quat.vec128;
		vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
		tmp0 = spu_shuffle(ldata, ldata, YZXW);
		tmp1 = spu_shuffle(rdata, rdata, SHUF_ZXYW);
		tmp2 = spu_shuffle(ldata, ldata, SHUF_ZXYW);
		tmp3 = spu_shuffle(rdata, rdata, YZXW);
		qv = spu_mul(spu_shuffle(ldata, ldata, shuffle_wwww), rdata);
		qv = spu_madd(spu_shuffle(rdata, rdata, shuffle_wwww), ldata, qv);
		qv = spu_madd(tmp0, tmp1, qv);
		qv = spu_nmsub(tmp2, tmp3, qv);
		product = spu_mul(ldata, rdata);
		l_wxyz = spu_rlqwbyte(ldata, 12);
		r_wxyz = spu_rlqwbyte(rdata, 12);
		qw = spu_nmsub(l_wxyz, r_wxyz, product);
		xy = spu_madd(l_wxyz, r_wxyz, product);
		qw = spu_sub( qw, spu_rlqwbyte( xy, 8));
		return Quat_s(spu_sel(qv, qw, (vec_uint4)spu_maskb(0x000f)));
	}

	ILINE const float operator |(Quat_s quat1)
	{
		return spu_extract(dot4(vec128, quat1.get128()), 0);
	}

	void operator %=(Quat_s p);

	ILINE Quat_s& SetX(const float _x)
	{
		vec128 = spu_insert(_x, vec128, 0);
		return *this;
	}

	ILINE float GetX() const
	{
		return spu_extract(vec128, 0);
	}

	ILINE Quat_s& SetY(const float _y)
	{
		vec128 = spu_insert(_y, vec128, 1);
		return *this;
	}

	ILINE float GetY() const
	{
		return spu_extract(vec128, 1);
	}

	ILINE Quat_s& SetZ(const float _z)
	{
		vec128 = spu_insert(_z, vec128, 2);
		return *this;
	}

	ILINE float GetZ() const
	{
		return spu_extract(vec128, 2);
	}

	ILINE Quat_s& SetW(const float _w)
	{
		vec128 = spu_insert(_w, vec128, 3);
		return *this;
	}

	ILINE float GetW() const
	{
		return spu_extract(vec128, 3);
	}

	ILINE bool operator==(const Quat q)
	{
		return *this == Quat_s(q);
	}

	ILINE bool operator==(const Quat_s q)
	{
		const vec_float4 cEps4	= spu_splats(0.00001f);
		const vec_float4 cDiff4	= spu_sub(q.vec128, vec128);
		return spu_extract(spu_gather(spu_cmpabsgt(cDiff4, cEps4)), 0) == 0;
	}

	ILINE bool operator!=(const Quat q)
	{
		return !(*this == q);
	}

	ILINE bool operator!=(const Quat_s q)
	{
		return !(*this == q);
	}

	//multiplication by a scalar
	ILINE Quat_s& operator *=(const float op) 
	{	
		vec128 = spu_mul(vec128, spu_splats(op));
		return *this;
	}

	ILINE Quat_s operator !() const 
	{ 
		vec_float4 invVec(spu_splats(-1.f));
		invVec = spu_insert(1.f, invVec, 3);
		return Quat_s(spu_mul(vec128, invVec));
	}

	ILINE bool IsIdentity() const 
	{ 
		return spu_extract(spu_gather(spu_cmpeq(QUAT_S_IDENTITY, vec128)), 0) == 15;
	}

	ILINE Quat_s CreateIdentity()	const
	{ 
		return Quat_s(QUAT_S_IDENTITY); 
	}

	ILINE void SetIdentity()
	{ 
		vec128 = QUAT_S_IDENTITY;
	}

	ILINE bool IsUnit(const float cEps = VEC_EPSILON) const
	{
		return fabs(1 - ((*this) | (*this)) ) < cEps;
	}

	ILINE bool IsValid(const float cEps = VEC_EPSILON) const
	{
		const Quat_tpl<float> cQuat = (Quat_tpl<float>)*this;
		return cQuat.IsValid(cEps);
	}

	ILINE void SetRotationXYZ(const Ang3 &a)	
	{ 
		vec_float4 s, c, angle;
		vec_float4 angles = {a.x, a.y, a.z};
		angles = spu_mul(angles, spu_splats(0.5f));
		sincosf4(angles, &s, &c);
		const vec_float4 c0 = spu_shuffle(c, s, SHUF_ZZCX);
		const vec_float4 c1 = spu_shuffle(c, s, SHUF_YBYY);
		const vec_float4 c2 = spu_shuffle(c, s, SHUF_AXXZ);
		const vec_float4 c3 = spu_shuffle(c, s, SHUF_CCZA);
		const vec_float4 c4 = spu_shuffle(c, s, SHUF_BYBB);
		const vec_float4 c5 = spu_shuffle(c, s, SHUF_XAAC);
		const vec_float4 c345 = spu_mul(spu_mul(spu_mul(c3, c4), c5), (vec_float4){-1.f, 1.f, -1.f, 1.f});
		vec128 = spu_madd(spu_mul(c0, c1), c2, c345);
	}

	explicit ILINE Quat_s(const Ang3& rad)
	{
		SetRotationXYZ(rad);
	}

	static Quat_s CreateRotationXYZ(const Ang3 &a)
	{
		Quat_s q;	q.SetRotationXYZ(a); 	return q;	
	}

	ILINE void SetRotationAA(const float rad, const Vec3 &axis)
	{
		vec_float4 s, c;
		vec_float4 angle = spu_mul(spu_splats(rad), spu_splats(0.5f));
		sincosf4(angle, &s, &c);
		vec128 = spu_sel(spu_mul(((vec_float4){axis.x, axis.y, axis.z}), s), c, (vec_uint4)spu_maskb(0x000f));
	}

	static Quat_s CreateRotationAA(const float rad, const Vec3 &axis) 
	{
		Quat_s q;	q.SetRotationAA(rad,axis); 	return q;	
	}
};

ILINE const Quat_s select(Quat_s quat0, Quat_s quat1, bool select1)
{
	return Quat_s(spu_sel(quat0.get128(), quat1.get128(), spu_splats((uint32)-(select1 > 0))));
}

ILINE void Quat_s::operator %=(Quat_s p)
{ 
	vec128 += (select(p, -p, (p | *this) < 0)).get128();
}

ILINE const Quat_s operator *(float op, Quat_s quat)
{
	return quat * op;
}

ILINE Quat_s operator % (const Quat_s &q, const Quat_s &tp) 
{
	Quat_s p = tp;
	return q + select(p, -p, (p | q) < 0); 
}


typedef Quat_s CryQuat_s;
typedef Quat_s quaternionf_s;

#endif//PS3
#endif//_CRYQUAT_S_H

