#include "StdAfx.h"
#include "../CCryDXPS.hpp"
#include "../../CCryTypes.hpp"
#include "CCryDXPSGCM_CMBMan.hpp"
#include "../../Device/Resource/Buffer/CCryDXPSBuffer.hpp"


extern "C" void *mwprivate2_memalign(size_t, size_t, ECryModule);


uint8*											g_pRSXMappedMemory;
static 	void*								g_pCMDBuffer;
static	CellGcmControl*			g_pControll;

namespace
{
	ILINE void Flush()
	{
		__sync();
		//faster way for: cellGcmAddressToOffset(gCellGcmCurrentContext->current, &Offset);
		uint32_t Offset = 
			tdLayer0::CMB().CmdBufferOffset() + 
			(uint32)gCellGcmCurrentContext->current -
			(uint32)g_pCMDBuffer;
		g_pControll->put = Offset;
		__sync();
	}
}

#if defined(CCRY_DXPS_UNSAFE)
int32_t DebugCallback(CellGcmContextData* pContext, uint32_t Id)
{
	using namespace CRY_DXPS_GCMNAMESPACE;
//	OutputDebugString("Callback\n");

	cellGcmSetJumpCommand(GCM_CMD_START_OFFSET);
	pContext->current	=	(uint32_t*)&reinterpret_cast<uint8*>(g_pCMDBuffer)[GCM_CMD_START_OFFSET];
//	gCellGcmCurrentContext->current	=	(uint32*)&reinterpret_cast<uint8*>(g_pCmdBuffer)[GCM_CMD_START_OFFSET];
	Flush();
	return CELL_OK;
}
#endif

void CCryDXPSGCMCMBMan::Init()
{
	using namespace CRY_DXPS_GCMNAMESPACE;
	m_Initialized				=	1;
	g_pRSXMappedMemory	= reinterpret_cast<uint8*>(mwprivate2_memalign(1024*1024, RSXMAPPED_S_SUM, eCryM_Render));//64kb min size of default cmd buffer
	g_pCMDBuffer				=	g_pRSXMappedMemory;
	if(CELL_OK!=cellGcmInit(RSXMAPPED_S_CMDBuffer-16*1024,RSXMAPPED_S_CMDBuffer,g_pCMDBuffer))
	{
		CRY_DEBUGOUT(__FUNC__);
		CRY_DEBUGOUT_ALWAYS(" Failed to initialize GCM (cellGcmInit())\n");
		return;
	}
	else
	{
		CRY_DEBUGOUT(" successfully initialized GCM\n");
	}
	m_pTransferBuffer	=	g_pRSXMappedMemory+RSXMAPPED_O_TempBuffer;

//	cellGcmAddressToOffset(&reinterpret_cast<uint8*>(g_pCmdBuffer)[gCellGcmCurrentContext->current],&m_CmdBufferOffset);
	//regarding the doc, the CMDBuffer starts always at 0
	m_CmdBufferOffset=~0;
	if(CELL_OK!=cellGcmAddressToOffset(g_pCMDBuffer,&m_CmdBufferOffset))
	{
		CRY_DEBUGOUT_ALWAYS("Could not translate commandbuffer-address to offset\n");
		return;
	}

	g_pControll	=	cellGcmGetControlRegister();

#if defined(CCRY_DXPS_UNSAFE)
	gCellGcmCurrentContext->begin			=	(uint32*)&reinterpret_cast<uint8*>(g_pCMDBuffer)[CELL_GCM_INIT_STATE_OFFSET];
	gCellGcmCurrentContext->end				=	(uint32*)&reinterpret_cast<uint8*>(g_pCMDBuffer)[CELL_GCM_INIT_STATE_OFFSET+GCM_CMD_SIZE-GCM_CMD_SEGMENTSIZE];
	gCellGcmCurrentContext->callback	=	DebugCallback;
#endif

	for(uint32 a=0;a<GCM_CMD_SEGMENTCOUNT;a++)
		m_SegmentHandle[a]=TDRES_CREATE(0);

	cellGcmSetJumpCommand(GCM_CMD_START_OFFSET);
	gCellGcmCurrentContext->current	=	reinterpret_cast<uint32*>(reinterpret_cast<uint8*>(g_pCMDBuffer)+GCM_CMD_START_OFFSET);
	Flush();

	//waiting just in case the rsx needs some time to process some older commands
	while(g_pControll->get!=g_pControll->put)
	{
		sys_timer_usleep(10);
	}

#if defined(CRY_DXPS_DOWNLOADABLE_VMEM)
	//init ring buffer
	for(uint32 i=0;i<GCM_CMD_INJECTION_CMD_CNT;++i)
	{
		gCellGcmCurrentContext->current	=	reinterpret_cast<uint32*>(reinterpret_cast<uint8*>(g_pCMDBuffer)+GCM_CMD_INJECTION_OFFSET+i*GCM_CMD_INJECTION_CMD_SIZE);
		cellGcmSetReturnCommand();
	}
	gCellGcmCurrentContext->current	=	reinterpret_cast<uint32*>(reinterpret_cast<uint8*>(g_pCMDBuffer)+GCM_CMD_START_OFFSET);
#endif

	m_TransferBufferOff = ~0;
	m_LastUsedSegment	=	Segment(GCM_CMD_START_OFFSET);
}

//void* CCryDXPSGCMCMBMan::SyncMemory()
//{
//	return reinterpret_cast<uint8*>(g_pCMDBuffer)+GCM_CMD_SIZE+TransferBufferSize;
//}

void CCryDXPSGCMCMBMan::FlushCMDs()
{
	using namespace CRY_DXPS_GCMNAMESPACE;
//	PROFILE_FRAME(Flush);
#if !defined(CCRY_DXPS_UNSAFE)
	cellGcmFlush();
#else
#if defined(CRY_DXPS_DOWNLOADABLE_VMEM)
	cellGcmSetCallCommand(GCM_CMD_INJECTION_OFFSET+tdLayer0::Device()->RSXPushOff());
#endif

	Flush();

	//if here is something changed, please adapt in Tools/PS3JobManager/SPU/libDriverDMA/LibGCM_spu.cpp
	const uint32 Current =	(uint32)gCellGcmCurrentContext->current-(uint32)g_pCMDBuffer;
	const uint32 CurrentSeg	=	Segment(Current);
	//current crosses segments?
	if(CurrentSeg!=m_LastUsedSegment)
	{
		//then make sure next segment is not in use anymore
		const uint32 Next				=	Current+GCM_CMD_SEGMENTSIZE>=GCM_CMD_SIZE?GCM_CMD_START_OFFSET:Current+GCM_CMD_SEGMENTSIZE;
		const uint32 NextSeg		=	Segment(Next);
		uint32 zWriteCount;
		tdLayer0::Sync().SyncToRSX<true,false>(m_SegmentHandle[NextSeg], zWriteCount);

		//set most recent handle for last segment
		m_SegmentHandle[m_LastUsedSegment]	=	tdLayer0::Sync().HandleDeviceThread();
//		printf("0x%x 0x%x %d %d\n",Current,cellGcmGetControlRegister()->get,m_LastUsedSegment,CurrentSeg);

		m_LastUsedSegment	=	CurrentSeg;

		//loop?
		if(Current+GCM_CMD_SEGMENTSIZE>=GCM_CMD_SIZE)
		{
			cellGcmSetJumpCommand(GCM_CMD_START_OFFSET);
			gCellGcmCurrentContext->current	=	(uint32*)(g_pRSXMappedMemory+GCM_CMD_START_OFFSET);
	//		sys_timer_usleep(100000);
	//		printf("LOOOOP\n");
		}
		Flush();
	}

#if defined(CRY_DXPS_SINGLEFLUSHVALIDATE)
	while(cellGcmGetControlRegister()->get!=cellGcmGetControlRegister()->put)
	{
		sys_timer_usleep(10);
		int a=0;
	}
#endif
#endif
}

void CCryDXPSGCMCMBMan::Size(ICrySizer* Sizer)
{
	{
		SIZER_COMPONENT_NAME(Sizer,"DXPS Commandbuffer manager");		
		{
			SIZER_COMPONENT_NAME(Sizer,"DXPS RSX-Commandbuffer");
			Sizer->AddObject(g_pRSXMappedMemory,RSXMAPPED_O_END);
		}
	}
}

//wrapper for the outside renderer world
bool CryMemcpyRSX(void* pDst,const void* pSrc,size_t Size, CallbackFunc Callback, bool sleep)
{
	return RSXmemcpy(pDst,pSrc,Size,Callback,sleep);
}

//callback is not guaranteed to be executed  in case of failure
//callback serves purpose of being able to do something whilst RSX is pushing back
bool CCryDXPSGCMCMBMan::InjectCopy(void* pDst,const void* pSrc,size_t Size, CallbackFunc Callback, bool sleep)
{
	RSXINJECTION_LOCK;

	const size_t BlockSize	=	2048;//2k recommended by sony's paper
#if !defined(CRY_DXPS_DOWNLOADABLE_VMEM)
	if(Callback)
	{
		Callback(0);
		Callback(1);
	}
	for(size_t a=0;a<Size;a+=BlockSize)
	{
		memcpy(reinterpret_cast<uint8*>(pDst)+a,reinterpret_cast<const uint8*>(pSrc)+a,a+BlockSize<Size?BlockSize:Size-a);
		sys_timer_usleep(50);	
	}

#else
	if(Size <= BlockSize)
	{
		if(Callback)
		{
			Callback(0);
			Callback(1);
		}
		memcpy(pDst, pSrc, Size);//copy directly from video mem if < 2 KB (faster)
		return true;
	}

	uint32 *const pTmpCtxBegin = (uint32*)&reinterpret_cast<uint8*>(g_pCMDBuffer)[GCM_CMD_INJECTION_OFFSET];
	if(m_TransferBufferOff == ~0)
	{
		if(CELL_OK!=cellGcmAddressToOffset(m_pTransferBuffer,&m_TransferBufferOff))
		{
			CRY_DEBUGOUT_ALWAYS("RSXMemcpy: Failed to map destination\n");
			return false;
		}
		if(CELL_OK!=cellGcmAddressToOffset(pTmpCtxBegin,&m_OffsetTransferCmdBuffer))
		{
			CRY_DEBUGOUT_ALWAYS("RSXMemcpy: Failed to map injection command-buffer\n");
			return false;
		}
	}
	//reset to current offset
	CCryDXPSGCMSyncMan& __restrict rSyncMan = tdLayer0::Sync();
	uint32 curRSXOff	= tdLayer0::Device()->RSXPushOff();
	CellGcmContextData TmpCtx;
	TmpCtx.begin			=	(uint32*)((uint32)pTmpCtxBegin+curRSXOff);
	TmpCtx.end				=	(uint32*)((uint32)pTmpCtxBegin+GCM_CMD_INJECTBUF_SIZE);
	TmpCtx.current		=	TmpCtx.begin+1;
	TmpCtx.callback		=	DebugCallback;

	uint8* pDataDst	=	reinterpret_cast<uint8*>(pDst);
	uint32 OffsetSrc;
	if(CELL_OK!=cellGcmAddressToOffset(pSrc,&OffsetSrc)) 
	{
		CRY_DEBUGOUT_ALWAYS("RSXMemcpy: Failed to map source\n");
		return false;
	}
	uint32 TransSize;
	size_t s = Size;

	static uint32 magicNumber = 2;//value we write into backtransfer to poll on
	do
	{
		//write magic number
		TransSize	=	RSXMAPPED_S_TempBuffer<s?RSXMAPPED_S_TempBuffer:s;
//CRY_DXPS_GCMNAMESPACE::cellGcmSetUserCommand(&TmpCtx,magicNumber);
		CRY_DXPS_GCMNAMESPACE::cellGcmInlineTransfer(&TmpCtx,m_OffsetTransferCmdBuffer+curRSXOff,TmpCtx.begin,1,CELL_GCM_LOCATION_MAIN);//begin should point to an Return command
		if(magicNumber == *(TmpCtx.begin+1))++magicNumber;
		*(uint32*)((uint32)TmpCtx.begin+GCM_CMD_INJECTION_CMD_SIZE-4) = magicNumber;
		rSyncMan.SyncRSXToInjectionNOP(TmpCtx);
		CRY_DXPS_GCMNAMESPACE::cellGcmSetReport(&TmpCtx,CELL_GCM_ZPASS_PIXEL_CNT,ECDXPSSLR_INJECTREPORT);
		CRY_DXPS_GCMNAMESPACE::cellGcmSetTransferData(&TmpCtx,(uint8)CELL_GCM_TRANSFER_LOCAL_TO_MAIN,
														m_TransferBufferOff,TransSize,
														OffsetSrc,TransSize,TransSize,1);
		CRY_DXPS_GCMNAMESPACE::cellGcmSetWaitForIdle(&TmpCtx);
		CRY_DXPS_GCMNAMESPACE::cellGcmInlineTransfer(&TmpCtx,m_OffsetTransferCmdBuffer+curRSXOff+4,&magicNumber,1,CELL_GCM_LOCATION_MAIN);//begin should point to an Return command
		CRY_DXPS_GCMNAMESPACE::cellGcmSetReturnCommand(&TmpCtx);

		assert((uint32)TmpCtx.current-(uint32)TmpCtx.begin <= GCM_CMD_INJECTION_CMD_SIZE);
		
		__sync();
		TmpCtx.current		=	TmpCtx.begin;
		CRY_DXPS_GCMNAMESPACE::cellGcmSetNopCommand(&TmpCtx,1);
		__sync();
		
		tdLayer0::Device()->ToggleRSXPush();

		volatile uint32 *const pRetCmd = (volatile uint32*)TmpCtx.begin;
		if(Callback)
			Callback(0);//do something whilst waiting for rsx
		uint64 curTicks = rdtsc();
		int iterCount = 0;
		//set timeout as 60 ms per 64 KB
		const uint32 timeOutMS = 60/*60 ms*/ * 80 * 1000/*frequency 80K/ms*/;
		const uint32 db16PollThreashold = 80*1000/10;//0.1 ms
		do
		{
			if(sleep)
				sys_timer_usleep(300);
			else
			{
				if(iterCount > 0 && (rdtsc()-curTicks > db16PollThreashold))
					sys_timer_usleep(300);
				else
					sys_timer_subusleep_inldb16cyc(256);//~10us
			}
			if(iterCount > 0 && rdtsc()-curTicks > timeOutMS)
			{
				printf("Fallback in Injection copy issued\n");
				memcpy(m_pTransferBuffer,pSrc,TransSize);//direct access, slow but what can we do?
				*pRetCmd			= 131072;
				*(pRetCmd+1)	= magicNumber;
				NVirtualMem::FlagVMMissFallback();
			}
			++iterCount;
		}while(*pRetCmd != 131072 || *(pRetCmd+1) != magicNumber);//"Return" command and magic number
		
		++magicNumber;

		tdLayer0::Device()->ResetRSXPush();
		if(Callback)
			Callback(1);//do something whilst waiting for rsx
		memcpy(pDataDst,m_pTransferBuffer,TransSize);
		
		pDataDst			 +=	TransSize;
		OffsetSrc			 +=	TransSize;
		s							 -=	TransSize;
		curRSXOff				= tdLayer0::Device()->NextRSXPushOff();
		TmpCtx.begin		=	(uint32*)((uint32)pTmpCtxBegin+curRSXOff);
		TmpCtx.current	=	TmpCtx.begin+1;
	}while(s);
	return true;
#endif
}
