////////////////////////////////////////////////////////////////////////////
//
//  Crytek Engine Source File.
//  Copyright (C), Crytek Studios, 2002.
// -------------------------------------------------------------------------
//  File name:   CZBufferCuller.h
//  Version:     v1.00
//  Created:     13/8/2006 by Michael Kopietz
//  Compilers:   Visual Studio.NET
//  Description: Occlusion buffer
// -------------------------------------------------------------------------
//  History:
//
////////////////////////////////////////////////////////////////////////////

#include "StdAfx.h"
#include "CZBufferCuller.h"

#ifndef __SPU__
//#define ENABLE_DEBUG_CLEAR
#endif

#if defined(PS3)
#define GLOBALEXTERN extern
#else
#define GLOBALEXTERN 
#endif

GLOBALEXTERN	SHWOccZBuffer HWZBuffer;

#if defined(XENON)
const XMVECTORU8 CZBufferCuller::s_startPermuteMask[8] = {
	{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//0
	{0x10, 0x11, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//1
	{0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//2
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//3
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//4
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//5
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F},	//6
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x0E, 0x0F}	//7
};

const XMVECTORU8 CZBufferCuller::s_endPermuteMask[8] = {
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F},	//0
	{0x10, 0x11, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//1
	{0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//2
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//3
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//4
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F},	//5
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F},	//6
	{0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x0E, 0x0F}	//7
};
#endif


#ifdef ENABLE_DEBUG_CLEAR
	static volatile int shi=16;
#else
	#define shi 16
#endif
void CZBufferCuller::BeginFrame(SPU_DOMAIN_LOCAL const CCamera& rCam)
{
	m_AccurateTest	=	GetCVars()->e_CoverageBufferAccurateOBBTest;
#if defined(PS3) && !defined(__SPU__)
	//avoid obb test if build is used without spu code
	//setting spu_enable=0 enables it
	if(gPS3Env->spuEnabled == -1)
		m_AccurateTest=0;
#endif
	m_Treshold			=	GetCVars()->e_CoverageBufferTolerance;

#if !defined(__SPU__)
	FUNCTION_PROFILER_3DENGINE;
	if(GetCVars()->e_CoverageBufferDebugFreeze || GetCVars()->e_CameraFreeze)
		return;
#if !defined(ENABLE_DEBUG_CLEAR) && defined(PS3)
	CryPrefetch((void*)(HWZBuffer.pHardwareZBuffer));
	CryPrefetch((void*)((uint8*)HWZBuffer.pHardwareZBuffer+128));
	CryPrefetch((void*)m_ZBuffer);
#endif
#endif//__SPU__

	m_ObjectsTested	=
	m_ObjectsTestedAndRejected =	0;
  //to enable statistics
	m_Camera		=	rCam;
	m_Position	=	rCam.GetPosition();
	uint32 oldSizeX = m_SizeX;		
	uint32 oldSizeY = m_SizeY;
	const uint32 sizeX = min(max(1, GetCVars()->e_CoverageBufferResolution),1024);
	const uint32 sizeY = sizeX;
	m_SizeX					=	
	m_SizeY					= sizeX;
	m_fSizeX				=	static_cast<f32>(sizeX);
	m_fSizeY				=	static_cast<f32>(sizeX);
	m_fSizeZ				=	static_cast<f32>(TZB_MAXDEPTH);

#ifndef __SPU__
	if(oldSizeX != sizeX)
	{
		CryModuleMemalignFree(m_ZBuffer);
		m_ZBuffer = (TZBZexel*)CryModuleMemalign(sizeof(TZBZexel)*sizeX*sizeY,128);
	}
#endif

#if defined(XENON)
	//add camera which is associated with current depth buffer fetching
	if (!m_pRenderer->GetOcclusionBuffer(m_ZBuffer, sizeX, sizeY, &m_MatViewProj))
	{
		m_MatViewProj.SetIdentity();
	}
	assert(m_MatViewProj.IsValid());
#endif
#if defined(PS3)
	HWZBuffer.ZBufferSizeX	=	sizeX;
  HWZBuffer.ZBufferSizeY	=	sizeY;
#endif
//	CCamera tmpCam	= m_pRenderer->GetCamera();
//	m_pRenderer->SetCamera(m_Camera);
	//m_pRenderer->GetModelViewMatrix(reinterpret_cast<f32*>(&m_MatView));
	//m_pRenderer->GetProjectionMatrix(reinterpret_cast<f32*>(&m_MatProj));
	//m_MatViewProj				=		m_MatView*m_MatProj;

#if defined(XENON) && XENON_CULLER_VECTORIZED
	m_VMin	=	XMVectorSet(FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON);
	m_VMax	=	XMVectorSet(static_cast<float>(sizeX)-FLT_EPSILON,
		static_cast<float>(sizeY)-FLT_EPSILON,0.f,0.f);

  	const float SCALE		=	(const float)(sizeX / 2);
  	Matrix44A ScreenMat(	SCALE,0.f,0.f,SCALE,
  		0.f,SCALE,0.f,SCALE,
  		0.f,0.f,1.f,0.f,
  		0.f,0.f,0.f,1.f);

 	ScreenMat.Transpose();
	m_MatViewProjT	=	m_MatViewProj*ScreenMat;
	m_MatViewProj		=	m_MatViewProjT;
#endif
	m_MatViewProj.Transpose();
//	m_pRenderer->SetCamera(tmpCam);

	m_RotationSafe	=	GetCVars()->e_CoverageBufferRotationSafeCheck;
	m_DebugFreez		=	GetCVars()->e_CoverageBufferDebugFreeze!=0;
}

void CZBufferCuller::ReloadBuffer(const uint32 BufferID)
{
	if(m_DebugFreez)
		return;
#if defined(PS3)
	m_Bias				=	BufferID==0?static_cast<int32>(GetCVars()->e_CoverageBufferBias):0;
	int32_t SizeX	=	m_SizeX;
	int32_t SizeY	=	m_SizeY;

#ifdef __SPU__
	//transfer zbuffer here
	uint8* HWZLocalBuf = (uint8*)alloca(SizeX * SizeY * sizeof(uint32) + 128);
	SPU_DOMAIN_LOCAL uint32 *pHWZBufferLocal = SPU_LOCAL_PTR((uint32*)(((uint32)HWZLocalBuf+127)&~127));
	memtransfer_from_main(pHWZBufferLocal,HWZBuffer.pHardwareZBuffer+BufferID*SizeY*SizeX,SizeX  * SizeY * sizeof(uint32),3/*id*/);
	memtransfer_sync(3);
	#define HWZBuf pHWZBufferLocal
#else
	#define HWZBuf (HWZBuffer.pHardwareZBuffer+BufferID*SizeY*SizeX)
#endif


	uint16	Bias			=	static_cast<uint16>(m_Bias);
	const qword qBias	=	(qword)(vec_ushort8){Bias,Bias,Bias,Bias,Bias,Bias,Bias,Bias};
	const qword pat		= (qword){0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29};
	const qword qFFFF	=	(qword)(vec_int4){~0,~0,~0,~0};
	const qword qZMax	=	__si_sfh(qBias,qFFFF);

	qword* __restrict pZBufferVec	=	(qword*)&m_ZBuffer[0];
	for(uint32 y=0;y<SizeY;y++)
	{
		const qword* __restrict pVMemZ	=	(qword*)&HWZBuf[(SizeY-1-y)*SizeX];
		for(uint32 x=0;x<SizeX;x+=64)
		{
#if !defined(__SPU__)
			CryPrefetch((void*)(pVMemZ+16));
			CryPrefetch((void*)(pVMemZ+24));
			CryPrefetch((void*)(pZBufferVec+8));
#endif
			const qword zui0 = pVMemZ[0];
			const qword zui1 = pVMemZ[1];
			const qword zui2 = pVMemZ[2];
			const qword zui3 = pVMemZ[3];
			const qword zui4 = pVMemZ[4];
			const qword zui5 = pVMemZ[5];
			const qword zui6 = pVMemZ[6];
			const qword zui7 = pVMemZ[7];
			const qword zui8 = pVMemZ[8];
			const qword zui9 = pVMemZ[9];
			const qword zui10 = pVMemZ[10];
			const qword zui11 = pVMemZ[11];
			const qword zui12 = pVMemZ[12];
			const qword zui13 = pVMemZ[13];
			const qword zui14 = pVMemZ[14];
			const qword zui15 = pVMemZ[15];
			qword	Z0	 = __si_shufb(zui0, zui1, pat);
			qword	Z1	 = __si_shufb(zui2, zui3, pat);
			qword	Z2	 = __si_shufb(zui4, zui5, pat);
			qword	Z3	 = __si_shufb(zui6, zui7, pat);
			qword	Z4	 = __si_shufb(zui8, zui9, pat);
			qword	Z5	 = __si_shufb(zui10,zui11, pat);
			qword	Z6	 = __si_shufb(zui12,zui13, pat);
			qword	Z7	 = __si_shufb(zui14,zui15, pat);
			Z0	=	__si_or(__si_ah(Z0,qBias),__si_clgth(Z0,qZMax));//add saturate   ((z+Bias)| (z>ffff-Bias))
			Z1	=	__si_or(__si_ah(Z1,qBias),__si_clgth(Z1,qZMax));
			Z2	=	__si_or(__si_ah(Z2,qBias),__si_clgth(Z2,qZMax));
			Z3	=	__si_or(__si_ah(Z3,qBias),__si_clgth(Z3,qZMax));
			Z4	=	__si_or(__si_ah(Z4,qBias),__si_clgth(Z4,qZMax));
			Z5	=	__si_or(__si_ah(Z5,qBias),__si_clgth(Z5,qZMax));
			Z6	=	__si_or(__si_ah(Z6,qBias),__si_clgth(Z6,qZMax));
			Z7	=	__si_or(__si_ah(Z7,qBias),__si_clgth(Z7,qZMax));
			pZBufferVec[0] = Z0;
			pZBufferVec[1] = Z1;
			pZBufferVec[2] = Z2;
			pZBufferVec[3] = Z3;
			pZBufferVec[4] = Z4;
			pZBufferVec[5] = Z5;
			pZBufferVec[6] = Z6;
			pZBufferVec[7] = Z7;
			pZBufferVec	+= 8;
			pVMemZ			+= 16;
		}
	}

	//TODO NICK here you get the cam matrix, move those two lines up to support it for ps3 & x360
	memcpy(&m_MatViewProj,&HWZBuf[BufferID*SizeX],16*sizeof(float));
	//*******************************************************************************************

	m_VMin	=	(qword)(vec_float4){FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON};
	m_VMax	=	(qword)(vec_float4){static_cast<float>(SizeX)-FLT_EPSILON,
																static_cast<float>(SizeY)-FLT_EPSILON,0.f,0.f};


	const float SCALE		=	SizeX/2;
	Matrix44A ScreenMat(	SCALE,0.f,0.f,SCALE,
												0.f,SCALE,0.f,SCALE,
												0.f,0.f,1.f,0.f,
												0.f,0.f,0.f,1.f);

	ScreenMat.Transpose();
	m_MatViewProjT	=	m_MatViewProj*ScreenMat;
	m_MatViewProj		=	m_MatViewProjT;
#endif
#undef HWZBuf
}

#ifndef __SPU__
CZBufferCuller::CZBufferCuller():
m_OutdoorVisible(1)
{
	//construct zbuffer as if might never get executed when running via SPUs
	m_SizeX = 
	m_SizeY = min(max(1, GetCVars()->e_CoverageBufferResolution),1024);
	m_ZBuffer = (TZBZexel*)CryModuleMemalign(sizeof(TZBZexel)*m_SizeX*m_SizeY,128);
	m_FrameTime			=	0.f;
	m_ObjectsTested	=
	m_ObjectsTestedAndRejected	=	0;
}

bool CZBufferCuller::IsBoxVisible(const AABB& objBox, uint32* const __restrict pResDest)
{
	FUNCTION_PROFILER_3DENGINE;
	m_ObjectsTested++;
	Vec4 Verts[8] = 
	{
		m_MatViewProj*Vec4(objBox.min.x,objBox.min.y,objBox.min.z,1.f),//0
		m_MatViewProj*Vec4(objBox.min.x,objBox.max.y,objBox.min.z,1.f),//1
		m_MatViewProj*Vec4(objBox.max.x,objBox.min.y,objBox.min.z,1.f),//2
		m_MatViewProj*Vec4(objBox.max.x,objBox.max.y,objBox.min.z,1.f),//3
		m_MatViewProj*Vec4(objBox.min.x,objBox.min.y,objBox.max.z,1.f),//4
		m_MatViewProj*Vec4(objBox.min.x,objBox.max.y,objBox.max.z,1.f),//5
		m_MatViewProj*Vec4(objBox.max.x,objBox.min.y,objBox.max.z,1.f),//6
		m_MatViewProj*Vec4(objBox.max.x,objBox.max.y,objBox.max.z,1.f)//7
	};
	bool CutNearPlane=Verts[0].w<=0.f;
	CutNearPlane	|=	Verts[1].w<=0.f;
	CutNearPlane	|=	Verts[2].w<=0.f;
	CutNearPlane	|=	Verts[3].w<=0.f;
	CutNearPlane	|=	Verts[4].w<=0.f;
	CutNearPlane	|=	Verts[5].w<=0.f;
	CutNearPlane	|=	Verts[6].w<=0.f;
	CutNearPlane	|=	Verts[7].w<=0.f;
	if(CutNearPlane)
		return true;

	if(m_RotationSafe==1)
		return Rasterize<1>(Verts,8);
	if(m_RotationSafe==2)
		return Rasterize<2>(Verts,8);
	return Rasterize<0>(Verts,8);

	++m_ObjectsTestedAndRejected;

	return false;
}


static int sh	=	8;
void CZBufferCuller::DrawDebug(int32 nStep)
{ // project buffer to the screen
	nStep	%=	32;
  if(!nStep)
    return;

#if defined(PS3)
	if(!HWZBuffer.pHardwareZBuffer)
		return;

	ReloadBuffer((nStep-1)%4);
	nStep	=	1;
#endif

	const CCamera& rCam = GetCamera();
	float farPlane = rCam.GetFarPlane();
	float nearPlane = rCam.GetNearPlane();
	
	float a = farPlane / (farPlane - nearPlane);
	float b = farPlane * nearPlane / (nearPlane - farPlane);
	
	const float scale = 5.0f;
	
	
	m_pRenderer->Set2DMode(true,m_SizeX,m_SizeY);
	SAuxGeomRenderFlags	Flags	=	e_Def3DPublicRenderflags;
	Flags.SetDepthWriteFlag(e_DepthWriteOff);
	Flags.SetAlphaBlendMode(e_AlphaBlended);
	m_pRenderer->GetIRenderAuxGeom()->SetRenderFlags(Flags);
	Vec3 vSize(.4f,.4f,.4f);
	if(nStep==1)
		vSize = Vec3(.5f,.5f,.5f);
	for(uint32 y=0; y<m_SizeY; y+=nStep)
	for(uint32 x=0; x<m_SizeX; x+=nStep)
	{
		Vec3 vPos((float)x,(float)(m_SizeY-y-1),0);
		vPos += Vec3(0.5f,-0.5f,0);
		const uint32 Value	=	m_ZBuffer[x+y*m_SizeX];
		//Value>>=sh;
		
		float w = Value / (65535.0f);
		
		float z = b / (w - a);

		
	  uint32 ValueC =  min(255u,(uint32)(z*scale));
		ColorB col(ValueC,ValueC,ValueC, 200);
		if(Value!=0xffff)
		{
			//ColorB col((Value&31)<<3,((Value>>5)&31)<<3,((Value>>10)&63)<<2,200);
			GetRenderer()->GetIRenderAuxGeom()->DrawAABB(AABB(vPos-vSize, vPos+vSize), nStep<=2, col, eBBD_Faceted);
		}
	}
	//m_pRenderer->GetIRenderAuxGeom()->Flush();
  m_pRenderer->Set2DMode(false,m_SizeX,m_SizeY);
}

void CZBufferCuller::GetMemoryUsage(ICrySizer * pSizer) const
{
	SIZER_COMPONENT_NAME(pSizer, "CoverageBuffer");	
	pSizer->AddObject(m_ZBuffer, sizeof(TZBZexel)*m_SizeX*m_SizeY);
}
#endif//__SPU__
#undef shi
