////////////////////////////////////////////////////////////////////////////
//
//  Crytek Engine Source File.
//  Copyright (C), Crytek Studios, 2002.
// -------------------------------------------------------------------------
//  File name:   objmancullqueue.cpp
//  Version:     v1.00
//  Created:     2/12/2009 by Michael Glueck
//  Compilers:   Visual Studio.NET
//  Description: Implementation for asynchronous obj-culling queue
// -------------------------------------------------------------------------
//  History:
//
////////////////////////////////////////////////////////////////////////////

#include "StdAfx.h"
#include "ObjManCullQueue.h"
#include "CZBufferCuller.h"

#if defined(USE_CULL_QUEUE) && defined(PS3) && !defined(__SPU__) && !defined(__CRYCG__)
	DECLARE_SPU_CLASS_JOB("IsBoxOccluded", TBoxOccludedJob, NCullQueue::SCullQueue)
	volatile NSPU::NDriver::SExtJobState g_OcclJobState;
	#define USE_SPU
#endif

#ifdef XENON
	static const char *sOcclThreadName = "IsBoxOccluded";
	void NCullQueue::CCullTask::OnUpdate()
	{
#ifdef USE_CULL_QUEUE
		while(true)
		{
			m_Lock.Lock();
			while (!m_pQueue->Size() && !m_Quit)
				m_ProcessCondition.Wait(m_Lock);
			m_Lock.Unlock();
			if(m_Quit)
				break;
			m_IsProcessing = true;
			m_pQueue->ProcessInternal(m_MainFrameID, m_pCullBuffer, m_pCam);
			m_IsProcessing = false;
		}
		GetISystem()->GetIThreadTaskManager()->UnregisterTask(this);
		delete this;
#endif
	}
#endif

NCullQueue::SCullQueue::SCullQueue() : curIndex(0) 
{ 
#ifdef USE_CULL_QUEUE
	#ifndef __SPU__
		memset(cullItemBuf,0,sizeof(cullItemBuf)); 
	#endif
	
	#ifdef XENON
		m_pCullTask=NULL;
	#endif
#endif
}

void NCullQueue::SCullQueue::Process(uint32 mainFrameID, CCullBuffer *const pCullBuffer,const CCamera* pCam)
{
#ifdef USE_CULL_QUEUE
#ifdef USE_SPU
	if(InvokeJobOnSPU("IsBoxOccluded"))
	{
		TBoxOccludedJob job(mainFrameID, (CZBufferCuller*)pCullBuffer, pCam);
		job.SetClassInstance(*this);
		job.RegisterJobState(&g_OcclJobState);
		job.SetCacheMode(NPPU::eCM_4);
		job.Run();
	}
	else
		ProcessInternal(mainFrameID, (CZBufferCuller*)pCullBuffer, pCam);
#endif
#ifdef XENON
	if(!m_pCullTask)
	{
		SThreadTaskParams ttp;
		ttp.name = sOcclThreadName;
		ttp.nFlags = THREAD_TASK_BLOCKING;
		ICVar *pThreadToUse = gEnv->pConsole->GetCVar("e_CullerThread");
		ttp.nPreferedThread = pThreadToUse ? pThreadToUse->GetIVal() : 0; //Read from cvar, default to off if not present
		m_pCullTask = new NCullQueue::CCullTask(this);
		GetISystem()->GetIThreadTaskManager()->RegisterTask(m_pCullTask, ttp);
	}
	m_pCullTask->AddCullTask(mainFrameID, (CZBufferCuller*)pCullBuffer, pCam);
#endif
#endif
}

NCullQueue::SCullQueue::~SCullQueue()
{
#ifdef USE_CULL_QUEUE
#ifdef XENON
	if(m_pCullTask)
	{
		Wait();
		m_pCullTask->Stop();
	}
#endif
#endif
}

void NCullQueue::SCullQueue::Wait()
{
#ifdef USE_CULL_QUEUE
#ifdef USE_SPU
	if(InvokeJobOnSPU("IsBoxOccluded"))
		GetIJobManSPU()->WaitSPUJob(g_OcclJobState, 300);
#endif
#ifdef XENON
	//use polling using sleep, any condition stuff would be performance waste as it wont stall
	while(*(volatile int*)&curIndex != 0 && m_pCullTask->IsProcessing()) CrySleep(1);
	//assert(curIndex == 0);//if this fires, we have missed updates
	curIndex = 0;
#endif
#endif
}

#if !defined(CRYCG_CM)
SPU_ENTRY(IsBoxOccluded)
#endif
void NCullQueue::SCullQueue::ProcessInternal(uint32 mainFrameID, CZBufferCuller *const pCullBuffer, const CCamera* const pCam)
{
#ifdef USE_CULL_QUEUE
#ifdef __SPU__
	//BeginFrame of occlusion culler needs to be called
	CZBufferCuller cullBuffer(pCullBuffer);
	SCullQueue localQueue;
	memtransfer_from_main(&localQueue, this, sizeof(*this), 1);
	uint8* pLocalZBufferStorage = SPU_LOCAL_PTR((uint8*)alloca(sizeof(TZBZexel)*cullBuffer.SelRes()*cullBuffer.SelRes()+128));
	SPU_DOMAIN_LOCAL TZBZexel *const pZBufferLocal = SPU_LOCAL_PTR((TZBZexel*)(((uint32)pLocalZBufferStorage+127)&~127));
	cullBuffer.ZBuffer(pZBufferLocal);
	CCamera localCam;	
	memcpy(&localCam, pCam, sizeof(CCamera));
	memtransfer_sync(1);
	//clamp localQueue.curIndex as this can be larger than the SPU limits
	localQueue.curIndex = (localQueue.curIndex>=MAX_CULL_QUEUE_ITEM_COUNT)?(MAX_CULL_QUEUE_ITEM_COUNT-1):localQueue.curIndex;
#else
	CZBufferCuller& cullBuffer = *(CZBufferCuller*)pCullBuffer;
	#define localQueue (*this)
	#define localCam (*pCam)
#endif			
	cullBuffer.BeginFrame(localCam);
	bool feedbackExec = false;//becomes true after one possible feedback loop
#ifdef __SPU__
	FeedbackLoop:
#endif
	{
		SPU_DOMAIN_LOCAL const SCullItem*const cEnd = &localQueue.cullItemBuf[localQueue.curIndex];
		for(uint16 a=1,b=0;b<4;a<<=1,b++)//traverse through all 4 occlusion buffers
		{
			cullBuffer.ReloadBuffer(b);
			for(SCullItem* it = localQueue.cullItemBuf; it != cEnd; ++it)
			{
				SCullItem& rItem = *it;
				IF(!(rItem.BufferID&a),1)
					continue;

				IF((rItem.BufferID&1),0)	//zbuffer
				{
					if(!cullBuffer.IsObjectVisible(rItem.objBox, eoot_OBJECT, 0.f, &rItem.pOcclTestVars->nLastOccludedMainFrameID))
						rItem.pOcclTestVars->nLastOccludedMainFrameID = mainFrameID;
					else
						rItem.pOcclTestVars->nLastVisibleMainFrameID = mainFrameID;
				}
				else	//shadow buffer
				if(rItem.pOcclTestVars->nLastNoShadowCastMainFrameID != mainFrameID)
				{
					if(cullBuffer.IsObjectVisible(rItem.objBox, eoot_OBJECT, 0.f, &rItem.pOcclTestVars->nLastOccludedMainFrameID))
						rItem.pOcclTestVars->nLastShadowCastMainFrameID = mainFrameID;
				}
			}
		}

		//if not visible, set occluded
		for(SCullItem* it = localQueue.cullItemBuf; it != cEnd; ++it)
		{
			SCullItem& rItem = *it;
			IF((rItem.BufferID&6) & (rItem.pOcclTestVars->nLastNoShadowCastMainFrameID != mainFrameID),1)
					rItem.pOcclTestVars->nLastShadowCastMainFrameID = mainFrameID;
		}

	}
#ifdef __SPU__
	if(!feedbackExec)
	{
		uint32 newIndex = curIndex;
		//check if somebody has added another item meanwhile or we need to process more items than fit to spu
		if(newIndex != localQueue.curIndex)
		{
			//do 1 feedback loop, some items have been added in the meantime, latency high enough to assume all has been stored and not just the curIndex
			memtransfer_from_main(localQueue.cullItemBuf,	&this->cullItemBuf[localQueue.curIndex],(newIndex-localQueue.curIndex)*sizeof(SCullItem), 0);
			localQueue.curIndex = (newIndex-localQueue.curIndex);//to process remaining ones
			memtransfer_sync(0);
			feedbackExec = true;
			goto FeedbackLoop;
		}
	}
#endif
	curIndex = 0;//goes through cache
#endif
	#undef localQueue
	#undef localCam
}
