/* 
	utilities for SPU
*/
#ifndef __SPU_UTILITIES_H
#define __SPU_UTILITIES_H
#pragma once

#if defined(PS3)

#if defined(__SPU__)

//#define USE_SEL
#include <spu_intrinsics.h>

#if defined(USE_SEL)
//vector type selector
template <class T>
struct SVectorTypeSel
{};

template <>
struct SVectorTypeSel<unsigned char>
{
	typedef vec_uchar16 TVecType;
	typedef vec_uchar16 TCompResType;
	static const unsigned char Get2ndCmpArg(const unsigned char c2ndScalarArg)
	{
		return c2ndScalarArg;
	}

	static const unsigned char ToScalar(const TVecType cVec)
	{
		return __builtin_si_to_uchar(*(qword*)&cVec);
	}

	static const TVecType FromScalar(const unsigned char cScalar)
	{
		return (TVecType)spu_splats(cScalar);
	}
};

template <>
struct SVectorTypeSel<signed char>
{
	typedef vec_char16 TVecType;
	typedef vec_uchar16 TCompResType;
	static const signed char Get2ndCmpArg(const signed char c2ndScalarArg)
	{
		return c2ndScalarArg;
	}

	static const signed char ToScalar(const TVecType cVec)
	{
		return __builtin_si_to_char(cVec);
	}

	static const TVecType FromScalar(const signed char cScalar)
	{
		return spu_splats(cScalar);
	}
};

template <>
struct SVectorTypeSel<unsigned short>
{
	typedef vec_ushort8 TVecType;
	typedef vec_ushort8 TCompResType;
	static const unsigned short Get2ndCmpArg(const unsigned short c2ndScalarArg)
	{
		return c2ndScalarArg;
	}

	static const unsigned short ToScalar(const TVecType cVec)
	{
		return __builtin_si_to_ushort(*(qword*)&cVec);
	}

	static const TVecType FromScalar(const unsigned short cScalar)
	{
		return (TVecType)spu_splats(cScalar);
	}
};

template <>
struct SVectorTypeSel<signed short>
{
	typedef vec_short8 TVecType;
	typedef vec_ushort8 TCompResType;
	static const signed short Get2ndCmpArg(const signed short c2ndScalarArg)
	{
		return c2ndScalarArg;
	}

	static const signed short ToScalar(const TVecType cVec)
	{
		return __builtin_si_to_short(*(qword*)&cVec);
	}

	static const TVecType FromScalar(const signed short cScalar)
	{
		return (TVecType)spu_splats(cScalar);
	}
};

template <>
struct SVectorTypeSel<unsigned int>
{
	typedef vec_uint4 TVecType;
	typedef vec_uint4 TCompResType;
	static const unsigned int Get2ndCmpArg(const unsigned int c2ndScalarArg)
	{
		return c2ndScalarArg;
	}

	static const unsigned int ToScalar(const TVecType cVec)
	{
		return __builtin_si_to_uint(*(qword*)&cVec);
	}

	static const TVecType FromScalar(const unsigned int cScalar)
	{
		return (TVecType)__builtin_si_from_uint(cScalar);
	}
};

template <>
struct SVectorTypeSel<bool>
{
	typedef vec_uint4 TVecType;
	typedef vec_uint4 TCompResType;
	static const bool Get2ndCmpArg(const bool c2ndScalarArg)
	{
		return (unsigned int)c2ndScalarArg;
	}

	static const bool ToScalar(const TVecType cVec)
	{
		return __builtin_si_to_uint(*(qword*)&cVec);
	}

	static const TVecType FromScalar(const bool cScalar)
	{
		return (TVecType)__builtin_si_from_uint((unsigned int)cScalar);
	}
};

template <class T> 
struct SVectorTypeSel<T*>
{
	typedef vec_uint4 TVecType;
	typedef vec_uint4 TCompResType;
	static const unsigned int Get2ndCmpArg(const T* c2ndScalarArg)
	{
		return (unsigned int)c2ndScalarArg;
	}

	static const unsigned int ToScalar(const TVecType cVec)
	{
		return __builtin_si_to_uint(*(qword*)&cVec);
	}

	static const TVecType FromScalar(const T* cScalar)
	{
		return (TVecType)__builtin_si_from_uint((unsigned int)cScalar);
	}
};

template <>
struct SVectorTypeSel<signed int>
{
	typedef vec_int4 TVecType;
	typedef vec_uint4 TCompResType;
	static const signed int Get2ndCmpArg(const signed int c2ndScalarArg)
	{
		return c2ndScalarArg;
	}

	static const signed int ToScalar(const TVecType cVec)
	{
		return __builtin_si_to_int(*(qword*)&cVec);
	}

	static const TVecType FromScalar(const signed int cScalar)
	{
		return (TVecType)__builtin_si_from_int(cScalar);
	}
};

template <>
struct SVectorTypeSel<float>
{
	typedef vec_float4 TVecType;
	typedef vec_uint4 TCompResType;

	static const float ToScalar(const TVecType cVec)
	{
		return __builtin_si_to_float(*(qword*)&cVec);
	}

	static const TVecType FromScalar(const float cScalar)
	{
		return (TVecType)__builtin_si_from_float(cScalar);
	}

	static const TVecType Get2ndCmpArg(const float c2ndScalarArg)
	{
		return FromScalar(c2ndScalarArg);
	}
};
#endif //USE_SEL
//selects greater scalar element and returns it without branch
//
//implements without branch:
//
//	C cond0, cond1;
//	T val0, val1;
//	T res = (cond0 > cond1)?val0 : val1;
//
template <class C, class T>
__attribute__((always_inline))
inline const T CondSelGT(const C cCondArg0, const C cCondArg1, const T cA, const T cB)
{
#if defined(USE_SEL)
	const typename SVectorTypeSel<C>::TVecType cVecCond0 = SVectorTypeSel<C>::FromScalar(cCondArg0);
	const typename SVectorTypeSel<T>::TVecType cVecA = SVectorTypeSel<T>::FromScalar(cA);
	const typename SVectorTypeSel<T>::TVecType cVecB = SVectorTypeSel<T>::FromScalar(cB);
	const typename SVectorTypeSel<C>::TCompResType cSelectMask = (const typename SVectorTypeSel<C>::TCompResType)spu_cmpgt(cVecCond0, SVectorTypeSel<C>::Get2ndCmpArg(cCondArg1));
	return SVectorTypeSel<T>::ToScalar(spu_sel(cVecB, cVecA, (typename SVectorTypeSel<T>::TCompResType)cSelectMask));
#else
	return (cCondArg0 > cCondArg1)?cA : cB;
#endif
}

//implements without branch:
//
//	T val0, val1;
//	T res = (val0 > val1)?val0 : val1;
//
template <class T>
__attribute__((always_inline))
inline const T CondSelMax(const T cA, const T cB)
{
#if defined(USE_SEL)
	const typename SVectorTypeSel<T>::TVecType cVecA = SVectorTypeSel<T>::FromScalar(cA);
	const typename SVectorTypeSel<T>::TVecType cVecB = SVectorTypeSel<T>::FromScalar(cB);
	const typename SVectorTypeSel<T>::TCompResType cSelectMask = spu_cmpgt(cVecA, cVecB);
	return SVectorTypeSel<T>::ToScalar(spu_sel(cVecB, cVecA, cSelectMask));
#else
	return (cA > cB)?cA : cB;
#endif
}

template <class T>
__attribute__((always_inline))
inline const T CondSelMin(const T cA, const T cB)
{
#if defined(USE_SEL)
	const typename SVectorTypeSel<T>::TVecType cVecA = SVectorTypeSel<T>::FromScalar(cA);
	const typename SVectorTypeSel<T>::TVecType cVecB = SVectorTypeSel<T>::FromScalar(cB);
	const typename SVectorTypeSel<T>::TCompResType cSelectMask = spu_cmpgt(cVecA, cVecB);
	return SVectorTypeSel<T>::ToScalar(spu_sel(cVecA, cVecB, cSelectMask));
#else
	return (cA < cB)?cA : cB;
#endif
}

//selects first return element if equal, returns second otherwise
//
//implements without branch:
//
//	C cond0, cond1;
//	T val0, val1;
//	T res = (cond0 == cond1)?val0 : val1;
//
template <class C, class T>
__attribute__((always_inline))
inline const T CondSelEq(const C cCondArg0, const C cCondArg1, const T cA, const T cB)
{
#if defined(USE_SEL)
	const typename SVectorTypeSel<C>::TVecType cVecCond0 = SVectorTypeSel<C>::FromScalar(cCondArg0);
	const typename SVectorTypeSel<T>::TVecType cVecA = SVectorTypeSel<T>::FromScalar(cA);
	const typename SVectorTypeSel<T>::TVecType cVecB = SVectorTypeSel<T>::FromScalar(cB);
	const typename SVectorTypeSel<C>::TCompResType cSelectMask = (const typename SVectorTypeSel<C>::TCompResType)spu_cmpeq(cVecCond0, SVectorTypeSel<C>::Get2ndCmpArg(cCondArg1));
	return (T)SVectorTypeSel<T>::ToScalar(spu_sel(cVecB, cVecA, (typename SVectorTypeSel<T>::TCompResType)cSelectMask));
#else
	return (cCondArg0 == cCondArg1)?cA : cB;
#endif
}

//implements without branch:
//
//	C cond0;
//	T val0, val1;
//	T res = (cond0)?val0 : val1;
//
template <class C, class T>
__attribute__((always_inline))
inline const T CondSelNEZ(const C cCondArg0, const T cA, const T cB)
{
#if defined(USE_SEL)
	const typename SVectorTypeSel<C>::TVecType cVecCond0 = SVectorTypeSel<C>::FromScalar(cCondArg0);
	const typename SVectorTypeSel<T>::TVecType cVecA = SVectorTypeSel<T>::FromScalar(cA);
	const typename SVectorTypeSel<T>::TVecType cVecB = SVectorTypeSel<T>::FromScalar(cB);
	const typename SVectorTypeSel<C>::TCompResType cSelectMask = (const typename SVectorTypeSel<C>::TCompResType)spu_cmpeq(cVecCond0, SVectorTypeSel<C>::Get2ndCmpArg(0));
	return (T)SVectorTypeSel<T>::ToScalar(spu_sel(cVecA, cVecB, (typename SVectorTypeSel<T>::TCompResType)cSelectMask));
#else
	return (cCondArg0)?cA : cB;
#endif
}

template <class T>
__attribute__((always_inline))
inline const T CondSel(const bool cCondArg0, const T cA, const T cB)
{
	return CondSelNEZ((unsigned int)cCondArg0, cA, cB);
}

template <class T>
__attribute__((always_inline))
inline const T CondSel(const int cCondArg0, const T cA, const T cB)
{
	return CondSelNEZ(cCondArg0, cA, cB);
}

//impl. for vec types
__attribute__((always_inline))
inline const vec_uint4 CondSel(const int cCondArg0, const vec_uint4 cA, const vec_uint4 cB)
{
#if defined(USE_SEL)
	const vec_uint4 cVecCond0 = (vec_uint4)cCondArg0;
	const vec_uint4 cSelectMask = (vec_uint4)spu_cmpeq(cVecCond0, SVectorTypeSel<unsigned int>::Get2ndCmpArg(0));
	return spu_sel(cA, cB, cSelectMask);
#else
	return cCondArg0?cA : cB;
#endif
}

__attribute__((always_inline))
inline const vec_ushort8 CondSel(const int cCondArg0, const vec_ushort8 cA, const vec_ushort8 cB)
{
#if defined(USE_SEL)
	const vec_ushort8 cVecCond0 = (vec_ushort8)cCondArg0;
	const vec_ushort8 cSelectMask = (vec_ushort8)spu_cmpeq(cVecCond0, SVectorTypeSel<unsigned short>::Get2ndCmpArg(0));
	return spu_sel(cA, cB, cSelectMask);
#else
	return cCondArg0?cA : cB;
#endif
}

__attribute__((always_inline))
inline const vec_uchar16 CondSel(const int cCondArg0, const vec_uchar16 cA, const vec_uchar16 cB)
{
#if defined(USE_SEL)
	const vec_uchar16 cVecCond0 = (vec_uchar16)cCondArg0;
	const vec_uchar16 cSelectMask = (vec_uchar16)spu_cmpeq(cVecCond0, SVectorTypeSel<unsigned char>::Get2ndCmpArg(0));
	return spu_sel(cA, cB, cSelectMask);
#else
	return cCondArg0?cA : cB;
#endif
}

//	C cond0, cond1;
//	T val0, val1;
//	T res = (cond0 == 0)?val0 : val1;
//
template <class C, class T>
__attribute__((always_inline))
inline const T CondSelEZ(const C cCondArg0, const T cA, const T cB)
{
	return (T)CondSelEq(cCondArg0, (C)0, cA, cB);
}

#endif //__SPU__
#endif //PS3
#endif //__SPU_UTILITIES_H
