/* 
	interface definition for spu job manager
*/

#ifndef __IJOBMAN_SPU_H
#define __IJOBMAN_SPU_H
#pragma once

struct ILog;

#ifndef _ALIGN
	#if defined __CRYCG__
		#define _ALIGN(num) __attribute__ ((aligned(num))) __attribute__ ((crycg_attr (aligned, num)))
	#else
		#define _ALIGN(num) __attribute__ ((aligned(num)))
	#endif
#endif

//#define PROVIDE_DEPENDENTJOB_API

//state of spu
#define SPUWaitState	  0xFCFCFCFC
#define SPUPollState	  0xBEBEBEBE
#define SPUThreadState	0xFEFEFEFE		//threading status written by each SPU
#define SPURunState			0							//running status written by each SPU

#if !defined(assert)
#define assert(condition) ((void)0)
#endif

namespace NPPU
{
	//page mode
	enum EPageMode
	{
		ePM_Single = 0,		//single page mode, job occupies as much as it takes
		ePM_Dual	 = 1,		//2 pages
		ePM_Quad	 = 2,		//4 pages
	};
}

#if defined(PS3)

//max num of supported cache lookup id's
#define PROF_ID_RESERVED 8
#define MAX_PROF_ID (1024)
#define SPU_PRINTF_BUF_SIZE 512

struct CellGcmContextData;

typedef void (*TSPUFreeFunc)(void*);
typedef void* (*TSPUMallocFunc)(unsigned int, unsigned int);
 
namespace NPPU
{
	#define PAGE_MODE_MASK (unsigned char)3

	//maximum cache size, 
	enum ECacheMode
	{
		eCM_None = 0,			//no cache, bypassing
		eCM_4		 = 8,			//max cache size 4 KB
		eCM_8		 = 16,		//max cache size 8 KB
		eCM_16	 = 32,		//max cache size 16 KB
		eCM_32	 = 64,		//max cache size 32 KB
		eCM_64	 = 128		//max cache size 64 KB, default
	};
	#define CACHE_MODE_MASK (unsigned char)(~7)
	#define CACHE_MODE_SIZE_SHIFT 1
}
#define CACHE_MODE_DEFAULT NPPU::eCM_64
#define PAGE_MODE_DEFAULT NPPU::ePM_Single

//min stack size depends if job was compiled with O0 or optimized
#if defined(PS3_SPUOPT)
	#define CACHE_MIN_STACK_SIZE 22
#else
	#define CACHE_MIN_STACK_SIZE 36
#endif

#include <PPU/PPU.h>
struct CellSpurs;

//enable to obtain stats of spu usage each frame
#define SUPP_SPU_FRAME_STATS

namespace NPPU
{
	struct SJobStringHandle;
	typedef SJobStringHandle* TJobHandle;	//handle retrieved by name for job invocation
	#define INVALID_JOB_HANDLE ((unsigned int)-1)

	//return results for AddJob
	enum EAddJobRes
	{
		eAJR_Success,						//success of adding job
		eAJR_NoElf,							//spu job is no elf file
		eAJR_NoSPUElf,					//spu job is no spu elf file
		eAJR_ElfTooBig,					//spu job elf is too big
		eAJR_NoQWAddress,				//spu job image start is not on a quadword address
		eAJR_EnqueueTimeOut,		//spu job was not added due to timeout (SPU were occupied for too long)
		eAJR_EnqueueTimeOutPushJob,		//spu job was not added due to timeout of a push job slot (SPU was occupied by one particular job for too long)
		eAJR_SPUNotInitialized,	//SPU were not initialized
		eAJR_JobTooLarge,				//spu job cannot fit into a single SPU local store
		eAJR_JobSetupViolation,	//spu job has some invalid setup for packets/depending jobs
		eAJR_InvalidJobHandle,	//spu job was invoked with an invalid job handle (job has not been found in spu repository)
		eAJR_UnknownError,			//unknown error
	};
}

//optimization options, disable all for final usage
//make sure job queue contains never the maximum count->otherwise entries will be overwritten and crash
//switch off for performance
//#define USE_JOB_QUEUE_VERIFICATION	//performs lots of verification during job adding, save cycles by disabling it 
#if defined(USE_JOB_QUEUE_VERIFICATION)
	#define USE_JOB_DMA_VERIFICATION		//performs checks on input and output DMA mapping
#endif

namespace NSPU
{
	namespace NDriver
	{
		struct SExtJobState;
		struct SJobPerfStats;
	}
	namespace NElf
	{
		struct SElfInfo;
	}
}

//-------------------------------------------------begin spu profiling support-----------------------------------------------

namespace NPPU
{
	//single statistic for an SPU, 16 byte and 16 byte aligned for DMA
	struct SSingleSPUStat
	{
		unsigned int lock;									//rw lock (write only PPU, read SPU)
		unsigned int count[6];							//running stats for SPUs, one SPU busy decrementer count for each SPU
		unsigned int dummy;									//was: curSPUPivot;		current pivot ID (to have the same SPU ID workflow per frame)
		unsigned int lockPad[128-8-(6<<2)];	//keep cacheline clean
		SSingleSPUStat() : lock(0){};
	} _ALIGN(128);//DMA relevant

	class CSPUJobDel;
	struct SFrameProfileData;

#if (!defined(__SPU__) || defined(__CRYCG__))
	struct SSPUFrameStats
	{
		float spuStatsPerc[NPPU::scMaxSPU];	//0.f..100.f
		void Reset(){for(int i=0; i<NPPU::scMaxSPU; ++i)spuStatsPerc[i] = 0.f;}
		SSPUFrameStats(){Reset();}
	};

	//per frame rsx data emitted by DXPS thread
	struct SFrameProfileRSXData
	{
		unsigned int frameTime;
		unsigned int rsxWaitTime;
		unsigned int psTime;
		unsigned int vsTime;
		unsigned int flushTime;
		unsigned int inputLayoutTime;
		unsigned int pad[2];
	} _ALIGN(16);

	//per frame and job specific 
	struct SFrameProfileData
	{
		unsigned int usec;				//last frames SPU time in microseconds (written and accumulated by SPU)
		unsigned int count;				//number of calls this frame (written by PPU and SPU)
	#if defined(__SPU__)
		unsigned int dummy;				//dummy, name string not available on SPU
	#else
		const char* cpName;				//job name (written by PPU)
	#endif
		unsigned int usecLast;		//last but one frames SPU time in microseconds (written and accumulated by SPU)
	//cannot have these ctors on SPU
	#if !defined(__SPU__)
		SFrameProfileData(const char* cpJobName) : usec(0), count(0), cpName(cpJobName), usecLast(0){}
		SFrameProfileData() : cpName("Uninitialized"), usec(0), count(0), usecLast(0){}

		void operator=(const SFrameProfileData& crFrom)
		{
			//fast copy op
			*(vec_uint4*)this = *(vec_uint4*)&crFrom;
		}

		void Reset()
		{
			usecLast = usec;
			usec = count = 0;
		}

		const bool operator<(const NPPU::SFrameProfileData& crOther) const
		{
			//sort from large to small
			return usec > crOther.usec;
		}
	#endif//__SPU__
	} _ALIGN(16);
#endif
}

//-------------------------------------------------end spu profiling support-----------------------------------------------

namespace NSPU
{
	namespace NDriver
	{
#if !defined(_SPU_JOB)
		struct SInfoBlock;
#endif
		//condition variable like struct to be used for polling if a job has been finished
		//since it is set by the SPU by DMA, it has to be on a 16 byte boundary (alignment ensured by padding in SJobData)
		struct SJobState
		{
			volatile unsigned int running;	//1 if running, 0 otherwise
#if !defined(_SPU_JOB)
			friend struct SInfoBlock;
#endif
		public:
#ifndef __SPU__
			__attribute__((always_inline))
				inline SJobState() : running(0)	{}
#endif
			__attribute__((always_inline))
				inline volatile const bool IsRunning() const volatile
			{
				return (running != 0);
			}
		};

		//same struct but aligned at 16 bytes
		struct SExtJobState : public SJobState
		{
			volatile unsigned int pad[3];	
#if !defined(_SPU_JOB)
			friend struct SInfoBlock;
#endif
		public:
			__attribute__((always_inline))
				inline SExtJobState() : SJobState()	{}
		} _ALIGN(16);
	}
}

#if !defined(__SPU__)
namespace NPPU
{
	struct SJobStringHandle
	{
		const char *cpString;			//points into repository, must be first element
		unsigned int strLen;			//string length
		uint32 jobHandle;					//job address (corresponding to NBinPage::SJob)
		int jobId;								//index (also acts as id) of job

		const bool operator==(const SJobStringHandle& crOther) const
		{
			if(strLen == crOther.strLen)
			{
				const char* pCharSrc = cpString;
				const char* pCharDst = crOther.cpString;
				unsigned int curIndex = 0;
				while(curIndex++ < strLen && *pCharSrc++ == *pCharDst++){}
				return (curIndex == (strLen+1));
			}
			return false;
		}

		const bool operator<(const SJobStringHandle& crOther) const
		{
			if(strLen != crOther.strLen)
				return strLen < crOther.strLen;
			else
			{
				assert(strLen > 0);
				const char* pCharSrcEnd = &cpString[strLen-1];
				const char* pCharSrc = cpString;
				const char* pCharDst = crOther.cpString;
				while(*pCharSrc++ == *pCharDst++ && pCharSrc != pCharSrcEnd){}
				return *pCharSrc < *pCharDst;
			}
		}
	};

	inline bool IsValidJobHandle(const TJobHandle cJobHandle)
	{
		return cJobHandle->jobHandle != INVALID_JOB_HANDLE;
	}

	// singleton managing the job queues and/for the SPUs
	struct IJobManSPU
	{
		//returns number of SPUs allowed for job scheduling
		virtual const unsigned int GetSPUsAllowed() const = 0;
		//sets number of SPUs allowed for job scheduling (must be called before spu initialization)
		virtual void SetSPUsAllowed(const unsigned int cNum) = 0;
		//returns spu driver size, all data must be placed behind it
		virtual const unsigned int GetDriverSize() const  = 0;
		//initializes all allowed SPUs
		virtual const bool InitSPUs(TSPUFreeFunc FreeFunc, TSPUMallocFunc MallocFunc, const int cSPUThreadCnt = 0, const int cSPURSCnt = 1, bool bEnablePrintf = true) = 0;
		//polls for a spu job (do not use is a callback has been registered)
		//returns false if a time out has occurred
		virtual const bool WaitSPUJob(volatile NSPU::NDriver::SExtJobState& rJobState, const int cTimeOutMS=-1) const = 0;
		//print performance stats
		virtual void PrintPerfStats(const volatile NSPU::NDriver::SJobPerfStats* pPerfStats, const char* cpJobName) const = 0;
		//sets the external log
		virtual void SetLog(ILog *pLog) = 0;
		//returns true if SPU jobs are still active
		virtual const bool SPUJobsActive() const = 0;
		//shuts down spu job manager
		virtual void ShutDown() = 0;
		//tests all acquired SPUs if they are running
		virtual void TestSPUs() = 0;
		//tests all acquired SPUs if they are running, reports -1 if something went wrong
		virtual int VerifySPUs(const bool cIgnoreDebugState = false) const = 0;
		//clean released memory form SPUs and refill buckets
		virtual void UpdateSPUMemMan() = 0;
		//enables spu driver debugging
		virtual void EnableSPUDriverDebugging(const bool) = 0;
		//enables spu debugging for a particular job
		virtual void EnableSPUJobDebugging(void*) = 0;
		//registers a variable to check if the profiling data should be transferred back
		virtual void RegisterProfileStatVar(int *pVar) = 0;
		//enables/disables spu profiling
		virtual void EnableSPUProfiling(const uint8=0) = 0;
		//obtains and resets the SPU stats of the last frame
		virtual void GetAndResetSPUFrameStats(SSPUFrameStats& rStats, const bool=true) = 0;
		virtual void GetAndResetSPUFrameStats(SSPUFrameStats& rStats, const SFrameProfileData*& rpCurFrameProfVec, uint32& rCount) = 0;
		//obtains and resets the SPU function profiling stats of the last frame
		virtual void GetAndResetSPUFuncProfStats(const SFrameProfileData*& rpCurFuncProfStatVec, uint32& rCount, const uint32 cThresholdUSecs=100) = 0;

		virtual void* Allocate(uint32 size, uint32 alignment = 8) = 0;
		virtual void Free(void*) = 0;
		//retrieve initialized spurs memory
		virtual CellSpurs* GetSPURS() = 0;
		//adds a job
		virtual const EAddJobRes AddJob
		(
			CSPUJobDel& __restrict crJob,
			const unsigned int cOpMode,
			const unsigned char cMinStackSizeKB,
			const TJobHandle cJobHandle
#if defined(PROVIDE_DEPENDENTJOB_API)			
			,	const unsigned int cIsDependentJob
#endif
		) = 0;

		//obtain job handle from name
		virtual const TJobHandle GetJobHandle(const char* cpJobName, const unsigned int cStrLen) const = 0;
		virtual const TJobHandle GetJobHandle(const char* cpJobName) const = 0;
		virtual const bool IsDebuggingActive() const = 0;
		virtual const uint32 GetAllocatedMemory() const = 0;
		virtual void StopSPUs() = 0;
		virtual void ContinueSPUs() = 0;
		//initialize values required for spu-libgcm usage
		virtual void GcmInit
		(
			const uint32,
			void *const __restrict,
			const uint32, 
			CellGcmContextData *const __restrict, 
			const uint32,
			const uint32,
			const uint32
		) = 0;
		//returns true if SPU is processing a job
		virtual bool IsSPUProcessing(const unsigned int cSPUIIndex) const = 0;
	};
}//NPPU


// IJobManSPU dynamic library Export
typedef NPPU::IJobManSPU* (*PFNCREATEJOBMANINTERFACE)();

// interface of the DLL
extern "C" 
{
	NPPU::IJobManSPU* CreateJobManSPUInterface();
}

extern NPPU::IJobManSPU* GetIJobManSPU();

//headers located in Tools/PS3JobManager
//platform.h must not be included sicne it would make all memory allocation be listed under the launcher
#include <SPU/JobStructs.h>

#endif //__SPU__

#else	//PS3
/*
//provide an implementation which can be called on any non ps3 platform evaluating to nothing
//avoids having nasty preprocessor defs

struct IJobManSPU
{
	const unsigned int GetSPUsAllowed() const{return 0;}
	void SetSPUsAllowed(const unsigned int)const{}
	const unsigned int GetDriverSize() const{return 0;}
	const bool InitSPUs(const char*)const{return true;}
	const bool SPUJobsActive() const{return false;}
	void ShutDown()const{}
	void TestSPUs() const{}
	void UpdateSPUMemMan()const{}
	void SetLog(ILog*)const{}
};

inline IJobManSPU *GetIJobManSPU()
{
	static IJobManSPU sJobManDummy;
	return &sJobManDummy;
}
*/
#endif //PS3
#endif //__IJOBMAN_SPU_H
	
