/* 
	definitions for spu job manager
	singleton implementation

	- it manages the job queue hierarchy, one node queue per individual SPU
	- to each SPU the initial loader is uploaded and the spu driver with the PPU push and pull addresses
	- each job has an ID
	- each job has an automatically generated Execute - function taking the parameter address as input
			this function is automatically generated and takes care of calling the actual job entry function
	- memory areas must be specified to be mapped to SPU
		- it can be issued in any order, on the first run it will get sorted, will be saved into template to be reused for faster processing each time
		- allocations are tracked as well, so a delete and (re)alloc can be performed on SPU and will get mirrored on PPU too
		- static class variables can be handled too
		- null can be specified as size too, this means allocation has to be performed on PPU first, SPU has to wait til it is issued
*/

#ifndef __JOBMAN_SPU_H
#define __JOBMAN_SPU_H
#pragma once

#if defined(PS3)

#include <platform.h>
#include <sys/raw_spu.h>
#include <IJobManSPU.h>
#include "../SPU/CodePage/SPUBubbles.h"
#include "SPU/SPULoaderDump.h"
#include "SPU/SPULoaderDefs.h"
#include "SPUMemAreaMan.h"
#include "PPU.h"
#include "SPUJobBase.h"
#include "../SPU/Cache/CacheDefs_spu.h"
#if defined(SUPP_SN)
	#include "../SPU/LibSN_Module.h"
#endif

namespace NSPU
{
	namespace NElf
	{
		struct SElfInfo;//forward declaration
	}
}

namespace NBubBin
{
	struct SHeader;
	struct SJobStringHandle;
}

namespace NPPU
{
	// singleton managing the job queues and/for the SPUs
	class CJobManSPU : public IJobManSPU
	{
	public:
		// singleton stuff
		static CJobManSPU *Instance();

		//destructor
		virtual ~CJobManSPU()
		{}

		//returns number of SPUs allowed for job scheduling
		virtual const unsigned int GetSPUsAllowed() const{return m_NumSPUAllowed;}

		//sets number of SPUs allowed for job scheduling (must be called before spu initialization)
		virtual void SetSPUsAllowed(const unsigned int cNum)
		{
			if(!m_Initialized)
			{
				assert(cNum > 0 && cNum < scMaxSPU); 
				m_NumSPUAllowed = cNum;
			}
		}

		//returns spu driver size, all data must be placed behind it
		virtual const unsigned int GetDriverSize() const {return m_DriverSize;}

		//initializes all allowed SPUs
		virtual const bool InitSPUs(const char* cpSPURepository);

		//polls for a spu job (do not use is a callback has been registered)
		virtual const bool WaitSPUJob(volatile NSPU::NDriver::SExtJobState& rJobState) const;

		//print performance stats
		virtual void PrintPerfStats(const volatile NSPU::NDriver::SJobPerfStats* pPerfStats, const char* cpJobName) const;

		//sets the external log
		virtual void SetLog(ILog *pLog);

		//returns true if SPU jobs are still active
		virtual const bool SPUJobsActive() const;

		virtual void ShutDown();

		//tests all acquired SPUs if they are running
		virtual void TestSPUs();

		//clean released memory form SPUs and refill buckets
		virtual void UpdateSPUMemMan();

		//enables spu driver debugging
		virtual void EnableSPUDriverDebugging(const bool cEnable);

		//enables spu debugging for a particular job
		virtual void EnableSPUJobDebugging(void* cJobHandle);

		//registers a variable to check if the profiling data should be transferred back
		virtual void RegisterProfileStatVar(int* pVar)
		{
#if defined(DO_SPU_PROFILING)
			m_pProfStatControl = pVar;
#endif
		}

		//retrieves the name of a job
		const char* GetJobName(const uint32 cId);

		//obtains and resets the SPU stats of the last frame
		virtual void GetAndResetSPUFrameStats(SSPUFrameStats& rStats);
		virtual void GetAndResetSPUFrameStats(SSPUFrameStats& rStats, std::vector<SFrameProfileData>& rCurFrameProfVec);

		//adds a job
		virtual const EAddJobRes AddJob
		(
			CSPUJobDel& __restrict crJob,
			const uint32 cOpMode,
			const uint8 cMinStackSizeKB,
			const TJobHandle cJobHandle,
			const uint32 cIsDependentJob
		);

		//obtain job handle from name
		virtual const TJobHandle GetJobHandle(const char* cpJobName, const uint32 cStrLen) const;

		ILog* GetLog() const
		{
			return m_pLog;
		}

		const bool IsDebuggingActive() const
		{
#if !defined(SUPP_SN)
			return false;
#else
			return m_DebuggingActive;
#endif
		}

	private:
		//this symbol also acts as the relative base of global/static variables, mangled name _ZN4NPPU10CJobManSPU17scInitalSPULoaderE
		static uint32 scInitalSPULoader[NSPU::scLoaderTextSizeBytes >> 2] _ALIGN(128);	//initial SPU loader
		static NSPU::SLoaderParams scSPULoaderParam;	//loader parameters, cannot be on the stack because its getting DMAd from

		SQueueNodeSPU m_SPUJobQueue _ALIGN(128);	//SPU job queue node where jobs are pushed into and from

		//following 16 bytes are written bytes SPU, do not change order or alignment, the full cache line is used by atomics
		volatile int m_SpinLock _ALIGN(128);			//spin lock memory, we just need non reentrant locking
		//must reside here, is accessed by SPU
		uint8	 m_CurSpuPacketSyncIndex;						//current used spu packet sync index, 1..127 (byte 0 used for locking)
		uint8	 pad[3];
		uint32 m_SpuIDs[scMaxSPU];								//raw SPU IDs, one for each logical SPU (8 bytes are written by SPU as well)
		sys_interrupt_tag_t m_SpuIntTags[scMaxSPU]; //raw SPU interrupt tag
		sys_interrupt_thread_handle_t m_SpuIntHandle[scMaxSPU]; //raw SPU interrupt handle
		uint32 m_RealCurPushAddress;							//current push address to be used by PPU, SPU might see temporarily an older one (if so, it is != ~0)
		NSPU::NDriver::SInfoBlock *m_pLastAddedInfoBlock;	//pointer to last recently added info block
#if defined(SUPP_SN)
		spu_mod_hdr* m_pElfInfo;									//elf info of driver
#else
		NSPU::NElf::SElfInfo*	m_pElfInfo;					//elf info of driver
#endif
		//align here again to 128 bytes to safely and fast use the locking above
		CSPUMemAreaMan m_MemAreaMan _ALIGN(128);	//memory allocation/deallocation request manager for SPU
		NSPU::SBubbleDirInfo m_BubbleInfo _ALIGN(16);//bubble info for SPUDriver's
		NBubBin::SHeader *m_pSPURep;							//pointer to SPU repository, mem is 128 byte aligned
		NSPU::SBubbleInfo *m_pBubbleDir;					//bubble dir for SPU, each bubble gets the EA
		std::vector<SJobStringHandle> m_JobStringTable;	//sorted job string table to retrieve a handle from a string
		std::vector<NBubBin::SJobStringHandle*> m_JobStringOffsets;		//pointer to job strings for each job id
#if defined(SUPP_SPU_FRAME_STATS)
		uint32 m_CurFrameProfDataIndex;						//current max used index of frame profiling data
#endif
		ILog *m_pLog;															//pointer to log file
		uint32 m_NumSPUAllowed;										//number of SPUs allowed to be used for job scheduling, 1..scMaxSPU
		uint32 m_DriverSize;											//driver size, SPU jobs must place data after it
#if defined(SUPP_SN)
		void*	 m_SPUJobDebugHandle;								//current job handle for debugging is to be enabled, 0 if non default
		bool	 m_SPUDriverDebuggingEnabled;				//true if spu driver debugging is to be enabled
		bool	 m_DebuggingActive;									//true if debuging is currently active
#endif
		bool	 m_Initialized;											//true if SPUs have been initialized
#if defined(DO_SPU_PROFILING)
		int*	 m_pProfStatControl;								//variable to check if the profiling data should be transferred back
#endif

		// singleton stuff
		CJobManSPU(void* pDriver);
		CJobManSPU(const CJobManSPU&);
		CJobManSPU& operator= (const CJobManSPU&);

		//loaded on an interrupt PPU thread, handles SPUs printf request
		static void HandleSpuInterrupt(uint64_t spuId);//use uint64_t since otherwise we earn a warning

		//registers an interrupt handler for SPU/PPU synchronization and SPU printf's for a SPU with id spuId
		//returns true if it has successfully been registered
		const bool CreateRawSpuIntrHandler(const sys_raw_spu_t cSPUId, const uint32 cIndex);

		//loads the SPU loader and the SPU driver into SPU local store
		//	cRealSPUId			:  ID for the SPU
		void LoadSPULoaderDriver
		(
			const unsigned int cRealSPUId, 
			const unsigned int cSPUIndex,
			const bool cIsRecreate = false
		) const;

		//gets job slot for next job (to get storage index for SJobdata), waits until a job slots becomes available again since data get overwritten
		const EAddJobRes GetSPUJobSlot(uint32& __restrict rJobSlot, uint32& __restrict rNextPush);

		//creates the DMA list for a job
		void CreateDMAList
		(
			const uint32 cJobParamSize,
			NSPU::NDriver::SInfoBlock& __restrict rInfoBlock,
			const CSPUJobDel& crJob,
			volatile const CCommonDMABase* __restrict * __restrict ppPackets,
			const uint32 cPacketCount
		);
		//simple version having just one packet
		void CreateDMAListSingle(const uint32 cJobParamSize, uint8* const __restrict pParamAddr,	const CSPUJobDel& crJob);

		//retrieves the number of available space in SPU
		__attribute__((always_inline))
		const unsigned int GetAvailableSPUSize() const
		{
			static const unsigned int scReservedStackSize = 16 * 1024 + 752/*initial stack address*/;//reserve 16 KB
			return 256 * 1024 - scReservedStackSize - m_DriverSize;
		}

		//retrieves spu memory control
		__attribute__((always_inline))
		CSPUMemAreaMan& GetMemAreaMan()
		{
			return m_MemAreaMan;
		}

		__attribute__((always_inline))
		SQueueNodeSPU& GetJobQueue()//for callback access
		{
			return m_SPUJobQueue;
		}

		//sends a MFC DMA command to a SPU
		//	cSPUId - id of spu
		//	cLS - destination local store address
		//	cEA - effective address for DMA transfer
		//	cSize - size of data to transfer
		//	cDMATag - DMA tag of the transfer
		//	cCommand - mfc command to be executed
		void SendMFCCmd
		(
			const unsigned int cSPUId, 
			const unsigned int cLS, 
			const uint32 cEA, 
			const unsigned int cSize, 
			const unsigned int cDMATag, 
			const unsigned int cCommand
		) const;

		//syncs a dma command
		//	cSPUId - id of spu
		//	cDMATag - DMA tag of the transfer
		void SyncMFCCmd
		(
			const unsigned int cSPUId, 
			const unsigned int cDMATag
		) const;

		//initializes the bucket headers for each SPU
		void InitBucketMemory();

		//creates the job handle <-> string table
		void CreateBubbleJobStringTable();

		//creates the bubble directory for SPU, each bubble index gets the PPU EA
		void CreateSPUBubbleDir();

		//locks the job manager
		ILINE void Lock()
		{
			CrySpinLock(&m_SpinLock, 0, 1);
		}

		//locks the job manager
		ILINE void UnLock()
		{
			m_SpinLock = 0;//only 1 thread should be allowed to unlock it
		}

#if defined(SUPP_SN)
		ILINE void SetDebuggingActive(const bool cActive)
		{
			m_DebuggingActive = cActive;
		}
#endif

		friend void HandleSpuInterrupt(uint64_t spuId);
	};
}//NPPU

__attribute__((always_inline))
inline void NPPU::CJobManSPU::CreateDMAListSingle
(
	const uint32 cJobParamSize,
	uint8* const __restrict pParamAddr,
	const CSPUJobDel& crJob
)
{
	assert((cJobParamSize & 0xF) == 0);
	assert(cJobParamSize < NSPU::NDriver::SInfoBlock::scAvailParamSize - 4/*space for no packet ptr for simplicity*/);
	uint8 *__restrict pCurParamDataDest = (uint8*)pParamAddr;
	const CCommonDMABase* __restrict pCurPacketData = (const CCommonDMABase*)&crJob;
	const void* const __restrict cpPacketSrc = pCurPacketData->GetJobParamData();
	memcpy((void* __restrict)pCurParamDataDest, cpPacketSrc, cJobParamSize);
	pCurParamDataDest += cJobParamSize;
	*(uint32*)pCurParamDataDest = NSPU::NDriver::SInfoBlock::scNoPacketVal;//signal no more packets
}

#endif //PS3
#endif //__JOBMAN_SPU_H
