/* 
	memcpy function wrappers
*/

#ifndef __MEMORY_H
#define __MEMORY_H
#pragma once

#if defined(PS3)

#if defined(__SPU__)

#include <cell/dma.h>
#include "SPU.h"
#include <IJobManSPU.h>
#if !defined(_SPU_JOB)
	#include "JobStructs.h"
#endif//_SPU_JOB

namespace NSPU
{
#if !defined(_SPU_JOB)
	namespace NDriver
	{
		extern SInfoBlock g_sInfoBlock;
	}
#endif
}

//clears tag update required before any DMA sync
#define MFC_CLEAR_TAG_UPDATE {spu_writech(MFC_WrTagUpdate,0);	do {} while(spu_readchcnt(MFC_WrTagUpdate) == 0);	spu_readch(MFC_RdTagStat);}

	//definitions for lock line lost reserv.events
#define MFC_LLAR_LOST_EVENT (1<<10)
#define MFC_RD_EVENT_STATUS 0
#define MFC_WR_EVENT_ACK    2
	//fast enable/disabling of LL events, phantom events can occur (should not matter)
	//keep in sync with SPUMultiThread.h
//	#define FAST_UNSAFE_LL_ENABLE

//synchronizes DMA transfers with a certain tag id
#if !defined(MFC_SYNC_BY_POLLING)
#define SyncMemory(cTagID)({\
	spu_writech(MFC_WrTagMask, (1<<cTagID));\
	spu_writech(MFC_WrTagUpdate,MFC_TAG_UPDATE_ALL);\
	spu_readch(MFC_RdTagStat);})
#else
	#define SyncMemory(cTagID)({\
		const unsigned int cTagMask = (1<<cTagID);\
		spu_writech(MFC_WrTagMask, cTagMask);\
		unsigned long long counter = 0;\
		do\
		{\
			counter = counter + 1;\
			if(counter > 400000)\
			{\
				printf("DMA-Timeout(tag=%d)\n", cTagID);\
				SPU_DEBUG_HALT;\
			}\
			si_wrch(MFC_WrTagUpdate,si_from_uint(MFC_TAG_UPDATE_IMMEDIATE));\
		}\
		while(__builtin_expect(si_to_uint(si_rdch(MFC_RdTagStat)) != cTagMask, false));})
#endif //MFC_SYNC_BY_POLLING

//copy any amount of data from MAIN memory into LS
#define MemcpyLargeLS(dest, cSource, cSize, cTagID, cFenced)({\
	assert((unsigned int)(cSource) > 256*1024);\
	assert(((unsigned int)(dest) & ~0xF) == (unsigned int)(dest));\
	int sizeLeft = (int)cSize;\
	unsigned int curDest = (unsigned int)(dest);\
	si_wrch(MFC_TagID,si_from_uint(cTagID));\
	const unsigned int cCmd = cFenced? MFC_GETF_CMD : MFC_GET_CMD;\
	unsigned int curSource = (unsigned int)(cSource);\
	do\
	{\
		si_wrch(MFC_LSA,si_from_uint(curDest));\
		si_wrch(MFC_EAL,si_from_uint(curSource));\
		si_wrch(MFC_Size,si_from_uint((sizeLeft>16*1024)?16*1024 : sizeLeft));\
		si_wrch(MFC_Cmd,si_from_uint(cCmd));\
		sizeLeft	-= 16*1024;\
		curSource	+= 16*1024;\
		curDest		+= 16*1024;\
	}\
	WHILE(sizeLeft > 0,0);\
})

#define MemcpyLargeMain(dest, cSource, cSize, cTagID, cFenced)({\
	assert(((unsigned int)(dest) & ~0xF) == (unsigned int)(dest));\
	assert((unsigned int)(dest) > 256*1024);\
	assert((unsigned int)(cSource) < 256*1024);\
	int sizeLeft = (int)cSize;\
	unsigned int curDest = (dest);\
	si_wrch(MFC_TagID,si_from_uint(cTagID));\
	const unsigned int cCmd = cFenced? MFC_PUTF_CMD : MFC_PUT_CMD;\
	unsigned int curSource = (unsigned int)(cSource);\
	do\
	{\
		si_wrch(MFC_LSA,si_from_uint(curSource));\
		si_wrch(MFC_EAL,si_from_uint(curDest));\
		si_wrch(MFC_Size,si_from_uint((sizeLeft>16*1024)?16*1024 : sizeLeft));\
		si_wrch(MFC_Cmd,si_from_uint(cCmd));\
		sizeLeft	-= 16*1024;\
		curSource	+= 16*1024;\
		curDest		+= 16*1024;\
	}\
	WHILE(sizeLeft > 0,0);\
})

#define MemcpyLS(dest, cSource, cSize, cTagID)({\
	assert((unsigned int)(cSource) > 256*1024);\
	assert((((unsigned int)(cSource) & 0xF) == 0 || cSize < 16));\
	assert((((unsigned int)(dest) & 0xF) == 0 || cSize < 16));\
	si_wrch(MFC_LSA,si_from_ptr((void*)(dest)));\
	si_wrch(MFC_EAL,si_from_uint((unsigned int)(cSource)));\
	si_wrch(MFC_Size,si_from_uint(cSize));\
	si_wrch(MFC_TagID,si_from_uint(cTagID));\
	si_wrch(MFC_Cmd,si_from_uint(MFC_GET_CMD));})

//copy data from MAIN memory into LS
#define MemcpyLSFenced(dest, cSource, cSize, cTagID)({\
	assert(((unsigned int)(cSource) & 0xF) == 0);\
	assert((unsigned int)(cSource) > 256*1024);\
	assert(((unsigned int)(dest) & 0xF) == 0);\
	si_wrch(MFC_LSA,si_from_ptr(dest));\
	si_wrch(MFC_EAL,si_from_uint(cSource));\
	si_wrch(MFC_Size,si_from_uint(cSize));\
	si_wrch(MFC_TagID,si_from_uint(cTagID));\
	si_wrch(MFC_Cmd,si_from_uint(MFC_GETF_CMD));})

// wrapper for copy data from LS into MAIN memory
#define	MemcpyMain(cDest, cSource, cSize, cTagID)({\
	assert((((unsigned int)(cSource) & 0xF) == 0 || cSize < 16));\
	assert((((unsigned int)(cDest) & 0xF) == 0 || cSize < 16));\
	assert((unsigned int)(cDest) > 256*1024);\
	assert((unsigned int)(cSource) < 256*1024);\
	si_wrch(MFC_LSA,si_from_ptr(cSource));\
	si_wrch(MFC_EAL,si_from_uint(cDest));\
	si_wrch(MFC_Size,si_from_uint(cSize));\
	si_wrch(MFC_TagID,si_from_uint(cTagID));\
	si_wrch(MFC_Cmd,si_from_uint(MFC_PUT_CMD));})

// wrapper for copy data from LS into MAIN memory, fenced version
#define MemcpyMainFenced(cDest, cSource, cSize, cTagID)({\
	assert(((unsigned int)(cSource) & 0xF) == (cDest & 0xF));\
	assert((unsigned int)(cDest) > 256*1024);\
	assert((unsigned int)(cSource) < 256*1024);\
	si_wrch(MFC_LSA,si_from_ptr(cSource));\
	si_wrch(MFC_EAL,si_from_uint(cDest));\
	si_wrch(MFC_Size,si_from_uint(cSize));\
	si_wrch(MFC_TagID,si_from_uint(cTagID));\
	si_wrch(MFC_Cmd,si_from_uint(MFC_PUTF_CMD));})

#define MemcpyMainBarrier(cDest, cSource, cSize, cTagID)({\
	assert(((unsigned int)(cSource) & 0xF) == (cDest & 0xF));\
	si_wrch(MFC_LSA,si_from_ptr(cSource));\
	assert((unsigned int)(cDest) > 256*1024);\
	assert((unsigned int)(cSource) < 256*1024);\
	si_wrch(MFC_EAL,si_from_uint(cDest));\
	si_wrch(MFC_Size,si_from_uint(cSize));\
	si_wrch(MFC_TagID,si_from_uint(cTagID));\
	si_wrch(MFC_Cmd,si_from_uint(MFC_PUTB_CMD));})

/*
__attribute__((always_inline))
// wrapper for copy small amount of data (<16 byte) from LS into MAIN memory
inline void MemcpySmallMainBarrier(const unsigned int cDest, const TAddrLS cSource, const unsigned int cSize, const unsigned int cTagID)
{
	DEBUG_PRINTF("MemcpySmallMainBarrier dest: 0x%x, source: 0x%x, size: %d, tagID: %d\n", cDest, cSource, cSize, cTagID);
	assert(((unsigned int)cDest & 0xF) == ((unsigned int)cSource & 0xF));
	assert(cSize == 1 || cSize == 2 || cSize == 4 || cSize == 8);
	//implements: mfc_putb((void*)(uintptr_t)cSource, cDest, cSize, cTagID, 0, 0) (without $ch17)
	si_wrch(MFC_LSA,si_from_ptr(cSource));
	si_wrch(MFC_EAL,si_from_uint(cDest));
	si_wrch(MFC_Size,si_from_uint(cSize));
	si_wrch(MFC_TagID,si_from_uint(cTagID));
	si_wrch(MFC_Cmd,si_from_uint(MFC_PUTB_CMD));//start asynchronous transfer back
}

// wrapper for copy small amount of data (<16 byte) from LS into MAIN memory
static void MemcpySmallMain(const unsigned int cDest, const TAddrLS cSource, const unsigned int cSize, const unsigned int cTagID)
{
	DEBUG_PRINTF("MemcpySmallMain dest: 0x%x, source: 0x%x, size: %d, tagID: %d\n", cDest, cSource, cSize, cTagID);
	assert(((unsigned int)cDest & 0xF) == ((unsigned int)cSource & 0xF));
	switch(cSize)
	{
	case 0:
	case 1:
	case 2:
	case 4:
		mfc_put((void*)(uintptr_t)cSource, cDest, cSize, cTagID, 0, 0);
		break;
	case 6:
		assert(((unsigned int)cDest & 0x1) == 0);
		mfc_put((void*)(uintptr_t)cSource, cDest, 2, cTagID, 0, 0);
		mfc_put((void*)(uintptr_t)((unsigned int)cSource+2), cDest+2, 2, cTagID, 0, 0);
		mfc_put((void*)(uintptr_t)((unsigned int)cSource+4), cDest+4, 2, cTagID, 0, 0);
		break;
	case 8:
		assert(((unsigned int)cDest & 0x3) == 0);
		if((cDest & 0x7) != 0)
		{
			mfc_put((void*)(uintptr_t)cSource, cDest, 4, cTagID, 0, 0);
			mfc_put((void*)(uintptr_t)((unsigned int)cSource+4), cDest+4, 4, cTagID, 0, 0);
		}
		else
			mfc_put((void*)(uintptr_t)cSource, cDest, 8, cTagID, 0, 0);
		break;
	case 12:
		assert(((unsigned int)cDest & 0x3) == 0);
		mfc_put((void*)(uintptr_t)cSource, cDest, 4, cTagID, 0, 0);
		mfc_put((void*)(uintptr_t)((unsigned int)cSource+4), cDest+4, 4, cTagID, 0, 0);
		mfc_put((void*)(uintptr_t)((unsigned int)cSource+8), cDest+8, 4, cTagID, 0, 0);
		break;
	default:
		assert(cSize == 0 || cSize == 1 || cSize == 2 || cSize == 4 || cSize == 6 || cSize == 8 || cSize == 12);
	}
}
*/

#endif //__SPU__
#endif //PS3
#endif //__MEMORY_H
