/* 
	definition and implementation for spu job base
*/

#ifndef __SPU_JOBBASE_H
#define __SPU_JOBBASE_H
#pragma once

#if defined(PS3)

#define DEVIRTUALIZE_IJOBMAN

#if !defined(__SPU__)
	#if defined(DEVIRTUALIZE_IJOBMAN) || defined(JOB_LIB_COMP)
		#include "JobManSPU.h"
	#endif
#endif

#if !defined(__SPU__) || defined(_SPU_JOB)
	#if !defined(__SPU__)
		#include <IJobManSPU.h>
		#include "SPU/Elf.h"
		#include <ppu_intrinsics_gcc.h>
	#else
		#include <IJobManSPU.h>
		#include "SPU/SPU.h"
		#include "SPU/JobStructs.h"
	#endif
#endif

//min stack size depends if job was compiled with O0 or optimized
#if defined(PS3_SPUOPT)
	#define CACHE_MIN_STACK_SIZE 22
#else
	#define CACHE_MIN_STACK_SIZE 36
#endif

#if defined(USE_JOB_QUEUE_VERIFICATION)
static void OutputJobError(const NPPU::EAddJobRes cError, const char*const cpJobName, const uint32 cStrLen)
{
	if(NPPU::eAJR_Success)
		return;
	char jobBuf[256];
	memcpy(jobBuf, cpJobName, cStrLen);
	jobBuf[cStrLen] = '\0';
	switch(cError)
	{
	case NPPU::eAJR_NoElf:							
		printf("spu job: \"%s\" is no elf file\n",jobBuf);
		break;
	case NPPU::eAJR_NoSPUElf:			
		printf("spu job: \"%s\" is no spu elf file\n",jobBuf);
		break;
	case NPPU::eAJR_ElfTooBig:					
		printf("spu job: \"%s\" elf is too big\n",jobBuf);
		break;
	case NPPU::eAJR_NoQWAddress:				
		printf("spu job: \"%s\" image start is not on a quadword address\n",jobBuf);
		break;
	case NPPU::eAJR_EnqueueTimeOut:		
		printf("spu job: \"%s\" was not added due to timeout (SPU were occupied for too long)\n",jobBuf);
		break;
	case NPPU::eAJR_EnqueueTimeOutPushJob:		
		printf("spu job: \"%s\" was not added due to timeout of a push job slot (SPU was occupied by one particular job for too long)\n",jobBuf);
		break;
	case NPPU::eAJR_SPUNotInitialized:	
		printf("spu were not initialized\n");
		break;
	case NPPU::eAJR_JobSetupViolation:	
		printf("spu job: \"%s\" has some invalid setup for packets/depending jobs\n",jobBuf);
		break;
	case NPPU::eAJR_JobTooLarge:				
		printf("spu job: \"%s\" cannot fit into a single SPU local store\n",jobBuf);
		break;
	case NPPU::eAJR_InvalidJobHandle:
		printf("spu job: \"%s\" was invoked with an invalid job handle (job has not been found in spu repository)\n",jobBuf);
		break;
	case NPPU::eAJR_UnknownError:			
		printf("spu job: \"%s\" unknown error\n",jobBuf);
		break;
	}
}
#endif

namespace NPPU
{
	class CSPUJobBase;//forward declaration
	class CSPUPacketBase;
#if !defined(__SPU__)
	class CJobManSPU;//forward declaration for friend 
#endif

	//delegation class for each job
	class CSPUJobDel : public CCommonDMABase
	{
	public:
		typedef volatile CSPUJobBase* TDepJob;

		__attribute__((always_inline))
		inline CSPUJobDel() : m_PacketCount(0), m_SPUPacketCount(0), m_ParamDataSize(0)
		{
#if !defined(__SPU__)
			m_pJobPerfData = NULL;
			m_pQueue = NULL;
			m_DependentJobCount = 0;
			m_pJobState = NULL;
	#if defined(SUPP_SPU_FRAME_STATS)
			m_pFrameProfileData = NULL;
	#endif
#endif
		}

#if !defined(__SPU__)
		__attribute__((always_inline))
		inline const EAddJobRes RunJob
		(
			const unsigned int cOpMode,
			const unsigned char cMinStackSizeKB,
			const TJobHandle cJobHandle,
			const unsigned int cIsDependentJob = 0
		)
		{
		#if defined(DEVIRTUALIZE_IJOBMAN) || defined(JOB_LIB_COMP)
			CJobManSPU *const __restrict pJobMan = CJobManSPU::Instance();
		#else
			NPPU::IJobManSPU *const __restrict pJobMan = GetIJobManSPU();
		#endif
	#if defined(USE_JOB_QUEUE_VERIFICATION)
			const EAddJobRes cRes = 
	#else
			return 
	#endif
			pJobMan->AddJob((CSPUJobDel& __restrict)*this, cOpMode, cMinStackSizeKB, cJobHandle, cIsDependentJob);
	#if defined(USE_JOB_QUEUE_VERIFICATION)
			OutputJobError(cRes, cJobHandle->cpString, cJobHandle->strLen);
			return cRes;
	#endif
		}

		__attribute__((always_inline))
		inline void RegisterQueue(const void* const cpQueue)
		{
			assert(!m_pQueue);
			m_pQueue = cpQueue;
		}

		__attribute__((always_inline))
		inline const void* GetQueue() const
		{
			return m_pQueue;
		}

		__attribute__((always_inline))
		inline void RegisterCallback(void (*pCallbackFnct)(void*), void *pArg)
		{
			m_Callbackdata.pCallbackFnct = pCallbackFnct;
			m_Callbackdata.pArg = pArg;
		}

		__attribute__((always_inline))
		inline void RegisterJobState(volatile NSPU::NDriver::SExtJobState * __restrict pJobState)
		{
			m_pJobState = pJobState;
		}

		__attribute__((always_inline))
		inline const SCallback& __restrict GetCallbackdata() const
		{
			return m_Callbackdata;
		}
#endif//__SPU__

#if !defined(__SPU__)
		//return a copy since it will get overwritten
		__attribute__((always_inline))
		inline void SetJobPerfStats(volatile NSPU::NDriver::SJobPerfStats* pPerfStats)
		{
			m_pJobPerfData = pPerfStats;
		}
#endif

		__attribute__((always_inline))
		inline void SetParamDataSize(const unsigned int cParamDataSize)
		{
			m_ParamDataSize = cParamDataSize;
		}

		__attribute__((always_inline))
		inline const unsigned int GetParamDataSize() const
		{
			return m_ParamDataSize;
		}
#if !defined(__SPU__)
		//dependent job API
		__attribute__((always_inline))
		inline const unsigned int GetDependentJobs(const TDepJob* __restrict & rppJobs) const
		{
			rppJobs = m_pDependentJobs;
			return m_DependentJobCount;
		}

		__attribute__((always_inline))
		inline void AddDependentJob(volatile CSPUJobBase* pJob)
		{
			assert(m_DependentJobCount < scMaxDepJobs-1);
			m_pDependentJobs[m_DependentJobCount++] = pJob;
		}

	#if defined(SUPP_SPU_FRAME_STATS)
		__attribute__((always_inline))
		inline SFrameProfileData* GetFrameProfData() const
		{
			return m_pFrameProfileData;
		}

		__attribute__((always_inline))
		inline void SetFrameProfData(SFrameProfileData* pFrameProfData)
		{
			m_pFrameProfileData = pFrameProfData;
		}
	#endif
#endif
		//packet API
		//adds a packet to the job (executed on the same SPU)
		__attribute__((always_inline))
		inline void AddPacket(CCommonDMABase *pPacket)
		{
			assert(m_PacketCount < scMaxPackets);
			m_pPackets[m_PacketCount++]				= pPacket;
		}

		//adds a packet to the job executed on another SPU
		__attribute__((always_inline))
		inline void ConnectSPU(volatile CSPUPacketBase* pPacket)
		{
			assert(m_SPUPacketCount < scMaxPackets);
			m_pSPUPackets[m_SPUPacketCount++] = pPacket;
		}

		//pay attention that jobs with extra packets do not have dependent jobs
		__attribute__((always_inline))
		inline void GetAllPackets(unsigned int& rPacketCount, unsigned int& rSPUPacketCount, volatile const CCommonDMABase** __restrict & rppPackets, volatile const CSPUPacketBase** __restrict & rppSPUPackets) const
		{
			rppPackets				= (volatile const CCommonDMABase**)&m_pPackets[0];
			rppSPUPackets			= (volatile const CSPUPacketBase**)&m_pSPUPackets[0];
			rPacketCount			= m_PacketCount;
			rSPUPacketCount		= m_SPUPacketCount;
		}

		//pay attention that jobs with extra packets do not have dependent jobs
		__attribute__((always_inline))
		inline void GetPackets(unsigned int& rPacketCount, volatile const CCommonDMABase** __restrict & rppPackets) const
		{
			rppPackets				= (volatile const CCommonDMABase**)&m_pPackets[0];
			rPacketCount			= m_PacketCount;
		}

		static const unsigned int scMaxPackets = 8;
		static const unsigned int scMaxDepJobs = 4;

	protected:
		volatile NSPU::NDriver::SJobPerfStats* m_pJobPerfData;	//job performance data pointer
#if !defined(__SPU__)
		volatile NSPU::NDriver::SExtJobState *m_pJobState;			//extern job state
		SCallback	m_Callbackdata;										//callback data
		const void*	m_pQueue;												//consumer/producer queue
	#if defined(SUPP_SPU_FRAME_STATS)
		SFrameProfileData *m_pFrameProfileData;//one per job instance type
	#endif
#endif
		unsigned int		m_ParamDataSize;						//sizeof parameter struct 
#if !defined(__SPU__)
		unsigned int		m_DependentJobCount;				//number of dependent jobs (m_DependentJobCount <= scMaxDepJobs)
#endif
		unsigned int		m_PacketCount;							//number of added packets
		unsigned int		m_SPUPacketCount;						//number of added SPU packets (which run on an extra SPU)
		volatile CCommonDMABase* m_pPackets[scMaxPackets];//array of volatile packet pointers
		volatile CSPUPacketBase* m_pSPUPackets[scMaxSPU];	//array of volatile SPU packet pointers
#if !defined(__SPU__)
		TDepJob m_pDependentJobs[scMaxDepJobs];			//array of dependent jobs
		friend class CJobManSPU;//to be accessed from AddJob
#endif
	};

	//base class for spu packets job
	class CSPUPacketBase
	{
	public:
		__attribute__((always_inline))
		inline CSPUPacketBase()
		{}

		__attribute__((always_inline))
		inline void AddPacket(CCommonDMABase *pPacket) volatile
		{
			((CSPUPacketBase*)this)->m_JobDelegator.AddPacket(pPacket);
		}

		__attribute__((always_inline))
		inline void GetPackets(unsigned int& rPacketCount, volatile const CCommonDMABase** __restrict & rppPackets) const
		{
			((CSPUPacketBase*)this)->m_JobDelegator.GetPackets(rPacketCount, rppPackets);
		}
#if !defined(__SPU__)
	protected:
#endif
    NPPU::CSPUJobDel	m_JobDelegator;				//delegation implementation, all calls to job manager are going through it
#if !defined(__SPU__)
		friend class CJobManSPU;//to be accessed from AddJob
#endif
	};

#if !defined(__SPU__) || defined(_SPU_JOB)
	//base class for jobs
	class CSPUJobBase
	{
	public:
		__attribute__((always_inline))
		inline CSPUJobBase() : m_OpMode(BUBBLE_MODE_DEFAULT | CACHE_MODE_DEFAULT), m_MinStackSize(CACHE_MIN_STACK_SIZE)
		{
#if !defined(__SPU__)
			m_pJobProgramData = 0;
#endif
		}

		__attribute__((always_inline))
		inline void AddPacket(CCommonDMABase *pPacket) volatile
		{
			((CSPUJobBase*)this)->m_JobDelegator.AddPacket(pPacket);
		}

		__attribute__((always_inline))
		inline void GetPackets(unsigned int& rPacketCount, volatile const CCommonDMABase** __restrict & rppPackets) const
		{
			((CSPUJobBase*)this)->m_JobDelegator.GetPackets(rPacketCount, rppPackets);
		}

#if	!defined(__SPU__)
		__attribute__((always_inline))
		inline void RegisterCallback(void (*pCallbackFnct)(void*), void *pArg) volatile
		{
			((CSPUJobBase*)this)->m_JobDelegator.RegisterCallback(pCallbackFnct, pArg);
		}

		__attribute__((always_inline))
		inline void RegisterJobState(volatile NSPU::NDriver::SExtJobState * __restrict pJobState) volatile
		{
			((CSPUJobBase*)this)->m_JobDelegator.RegisterJobState(pJobState);
		}

		__attribute__((always_inline))
		inline void RegisterQueue(const void* const cpQueue) volatile
		{
			((CSPUJobBase*)this)->m_JobDelegator.RegisterQueue(cpQueue);
		}

		__attribute__((always_inline))
		inline void AddDependentJob(volatile CSPUJobBase* pJob) volatile
		{
			((CSPUJobBase*)this)->m_JobDelegator.AddDependentJob(pJob);
		}

		__attribute__((always_inline))
		inline const NPPU::EAddJobRes Run() volatile
		{
			return ((CSPUJobBase*)this)->m_JobDelegator.RunJob(m_OpMode, m_MinStackSize, m_pJobProgramData);
		}
#else
		__attribute__((always_inline))
		inline const NPPU::EAddJobRes Run
		(
			const uint32 cJobAddress,
			const bool cEnableAtParentExit = true,
			const unsigned short cJobId = 0
		) volatile
		{
			return RunSPUJob(((CSPUJobBase*)this)->m_JobDelegator, m_OpMode, m_MinStackSize, cJobAddress, cEnableAtParentExit, cJobId);
		}
#endif

		__attribute__((always_inline))
		inline void ConnectSPU(volatile CSPUPacketBase* pPacket) volatile
		{
			((CSPUJobBase*)this)->m_JobDelegator.ConnectSPU(pPacket);
		}

#if !defined(__SPU__)
		__attribute__((always_inline))
		inline const TJobHandle GetJobProgramData()
		{	
			return m_pJobProgramData;
		}
#endif

		__attribute__((always_inline))
		inline const unsigned int GetOpMode() const
		{	
			return m_OpMode;
		}

		__attribute__((always_inline))
		inline const unsigned char GetMinStackSizeKB() const
		{	
			return m_MinStackSize;
		}

		__attribute__((always_inline))
		inline void SetMinStackSizeKB(const unsigned char cMinStackSizeKB) volatile
		{	
			m_MinStackSize = cMinStackSizeKB;
		}

		__attribute__((always_inline))
		inline void SetMinStackSizeKB(const unsigned char cMinStackSizeKB)
		{	
			m_MinStackSize = cMinStackSizeKB;
		}

		__attribute__((always_inline))
		inline void SetBubbleMode(const EBubbleMode cBubbleMode) volatile
		{	
			m_OpMode = (m_OpMode & (unsigned int)~BUBBLE_MODE_MASK) | (unsigned char)cBubbleMode;
		}

		__attribute__((always_inline))
		inline void SetBubbleMode(const EBubbleMode cBubbleMode)
		{	
			m_OpMode = (m_OpMode & (unsigned int)~BUBBLE_MODE_MASK) | (unsigned char)cBubbleMode;
		}

		__attribute__((always_inline))
		inline void SetCacheMode(const ECacheMode cCacheMode) volatile
		{	
			m_OpMode = (m_OpMode & (unsigned int)~CACHE_MODE_MASK) | (unsigned char)cCacheMode;
		}

		__attribute__((always_inline))
		inline void SetCacheMode(const ECacheMode cCacheMode)
		{	
			m_OpMode = (m_OpMode & (unsigned int)~CACHE_MODE_MASK) | (unsigned char)cCacheMode;
		}

#if !defined(__SPU__)
		__attribute__((always_inline))
		inline void SetJobPerfStats(volatile NSPU::NDriver::SJobPerfStats* pPerfStats) volatile
		{
			((CSPUJobBase*)this)->m_JobDelegator.SetJobPerfStats(pPerfStats);
		}
#endif

	protected:
    NPPU::CSPUJobDel	m_JobDelegator;				//delegation implementation, all calls to job manager are going through it
#if !defined(__SPU__)
		TJobHandle				m_pJobProgramData;		//handle to program data to run
#endif
		unsigned int			m_OpMode;							//cache and code paging mode for the job
		unsigned char			m_MinStackSize;				//minimal stack size the job needs (required for cache size settings)

		//sets the job program data
		__attribute__((always_inline))
		inline void SetJobProgramData(const TJobHandle pJobProgramData, const EBubbleMode cBubbleMode = BUBBLE_MODE_DEFAULT, const ECacheMode cCacheMode = CACHE_MODE_DEFAULT, const unsigned char cMinStackSize = CACHE_MIN_STACK_SIZE)
		{
			assert(pJobProgramData != 0);
#if defined(__SPU__)
			(void)pJobProgramData;
#else
			m_pJobProgramData = pJobProgramData;
#endif
			m_OpMode					= cBubbleMode | cCacheMode;
			m_MinStackSize		= cMinStackSize;
		}
#if !defined(__SPU__)
		friend class CJobManSPU;//to be accessed from AddJob
#endif
	};
#endif //!defined(__SPU__) || defined(_SPU_JOB)

}//NPPU

#if defined(__SPU__) && !defined(_LIB_DRIVER) && defined(_SPU_JOB)
	#include "SPU/GenericSPUJob.h"
#endif


#endif //PS3 && _SPU_JOB
#endif //__SPU_JOBBASE_H
