/*
	relocatable and on demand linkable version of FlushCacheRange
*/ 

#if defined(PS3)
#if defined(__SPU__)

#if !defined(eCryModule)
	#define eCryModule eCryM_Launcher
#endif
#include <CryModuleDefs.h>
#include <platform.h>
#include "../Memory.h"
#include "../SPUUtilities.h"
#include "../Cache/Cache_spu.h"

#undef ILINE
//#define ILINE static
#define ILINE __attribute__((always_inline)) inline 

#define GET_SHADOW_BUFFER_LINE(CACHE_ENTRY)\
	(vec_uint4*)(void*)(*(uint32*)G_SPU_CACHE_SHADOW_CACHE + (CACHE_ENTRY<<4))

//returns true if a cache line is not dirty and does not need to be merged with main memory atomically
//forced inline because used only once
//keep in sync with SPUMemManager_spu - impl.
ILINE const bool IsCacheLineUnchanged(const uint32 cCacheEntry, const vec_uint4 cEA4)
{
#if defined(NO_WRITE_BACK)
	return true;
#endif
	//generate write back mask and check if contents has really been altered (compare with shadowed area)
	//g_scWriteBackMask contains for each bit 1 if altered, 0 otherwise
	const vec_uint4* const __restrict cpCurLine		 = &g_pSPUCache[cCacheEntry];
	const vec_uint4* const __restrict cpShadowLine = GET_SHADOW_BUFFER_LINE(cCacheEntry);
	vec_uint4* const __restrict pWriteBackMask		 = g_scWriteBackMask;
	//since the backend currently does not reorder loads/stores, we need to explicitly store everything into a register first
	const vec_uint4 cCurLine0 = cpCurLine[0];
	const vec_uint4 cCurLine1 = cpCurLine[1];
	const vec_uint4 cCurLine2 = cpCurLine[2];
	const vec_uint4 cCurLine3 = cpCurLine[3];
	const vec_uint4 cCurLine4 = cpCurLine[4];
	const vec_uint4 cCurLine5 = cpCurLine[5];
	const vec_uint4 cCurLine6 = cpCurLine[6];
	const vec_uint4 cCurLine7 = cpCurLine[7];

	const vec_uint4 cShadowLine0 = cpShadowLine[0];
	const vec_uint4 cShadowLine1 = cpShadowLine[1];
	const vec_uint4 cShadowLine2 = cpShadowLine[2];
	const vec_uint4 cShadowLine3 = cpShadowLine[3];
	const vec_uint4 cShadowLine4 = cpShadowLine[4];
	const vec_uint4 cShadowLine5 = cpShadowLine[5];
	const vec_uint4 cShadowLine6 = cpShadowLine[6];
	const vec_uint4 cShadowLine7 = cpShadowLine[7];

	pWriteBackMask[0] = spu_xor(cCurLine0, cShadowLine0);
	pWriteBackMask[1] = spu_xor(cCurLine1, cShadowLine1);
	pWriteBackMask[2] = spu_xor(cCurLine2, cShadowLine2);
	pWriteBackMask[3] = spu_xor(cCurLine3, cShadowLine3);
	vec_uint4 diffVec = spu_or(pWriteBackMask[0], pWriteBackMask[1]);
	pWriteBackMask[4] = spu_xor(cCurLine4, cShadowLine4);
	diffVec = spu_or(diffVec, pWriteBackMask[2]);
	pWriteBackMask[5] = spu_xor(cCurLine5, cShadowLine5);
	diffVec = spu_or(diffVec, pWriteBackMask[2]);
	pWriteBackMask[6] = spu_xor(cCurLine6, cShadowLine6);
	diffVec = spu_or(diffVec, pWriteBackMask[3]);
	pWriteBackMask[7] = spu_xor(cCurLine7, cShadowLine7);
	diffVec = spu_or(diffVec, pWriteBackMask[4]);
	diffVec = spu_or(diffVec, pWriteBackMask[5]);
	diffVec = spu_or(diffVec, pWriteBackMask[6]);
	diffVec = spu_or(diffVec, pWriteBackMask[7]);
	//if cEA != 0 and contents has not been changed, do not write back
	return (spu_extract(spu_andc(spu_orx(diffVec), spu_cmpeq(cEA4, (vec_uint4){0})), 0) == 0);
}

//checks if an address is within one of the async write back ranges
//keep in sync with SPUMemManager_spu - impl.
ILINE const bool CheckWriteBackAsync(const vec_uint4 cEASplat)
{
	//true is returned if any element of g_AsyncRangesDir satisfies:
	//	!(g_AsyncRangesDirFrom  > ea) && 
	//check if greater equal than g_AsyncRangesDirFrom and less than g_AsyncRangesDirTo
	vec_uint4 *const __restrict pAsyncRangesDirFrom = (vec_uint4*)G_SPU_CACHE_ASYNC_RANGES_DIR_FROM;
	vec_uint4 *const __restrict pAsyncRangesDirTo		= (vec_uint4*)G_SPU_CACHE_ASYNC_RANGES_DIR_TO;
	const vec_uint4 cCurAsyncRangesDirFrom  = *pAsyncRangesDirFrom;
	const vec_uint4 cCurAsyncRangesDirTo		= *pAsyncRangesDirTo;
	const vec_uint4 cFromCmpRes  = spu_cmpgt(cCurAsyncRangesDirFrom, cEASplat);//elems with 0 further to check
	const vec_uint4 cToCmpRes		 = spu_cmpgt(cCurAsyncRangesDirTo, cEASplat);//elems with 1 further to check
	//if any element is 0 in cFromCmpRes and 1 at the same time in g_AsyncRangesDirTo, return true
	const vec_uint4 cFinalCmpRes = spu_andc(cToCmpRes, cFromCmpRes);//if any is 1, return true
	return (spu_extract(spu_orx(cFinalCmpRes), 0) != 0);
}

//flushes affected cache lines even if only partially occupied
//in case of no write back, the partially occupied cache lines are written back
//cDoWriteBack == false basically means invalidate cache range
	//this applies just to entire cachelines, partial ones are written back
void FlushCacheRange(const uint32 cEAFrom, const uint32 cSize, const bool cDoWriteBack)
{
	//expect at least one cache line to be affected
	spu_CheckCacheHazard((cEAFrom & 0x3) == 0);
	spu_CheckCacheHazard((cSize & 0x3) == 0);
	spu_CheckCacheHazard(cSize >= 4);
	//reset and flush each cache line affected by cEAFrom..cEAFrom+cSize
	bool syncAsyncTransfer					= false;
	//start at next full cache line if it is just invalidate (to not invalidate valid data in front)
	const uint32 cEAFromAligned		= cEAFrom & ~scSPUCacheLineSizeMask;
	const uint32 cEAAlignedToNext	= (cEAFrom + cSize + scSPUCacheLineSizeMask) & ~scSPUCacheLineSizeMask;
	vec_uint4 splatEA							= spu_splats(cEAFromAligned);
	const uint32 cEAToEnd					= cEAFrom + cSize;

	uint32 *const __restrict g_CurAtomicEA = (uint32*)G_SPU_CACHE_CUR_ATOMIC_EA;
	const uint32 g_SPUNumSets = *(uint32* __restrict)G_SPU_NUM_SETS;
	si_wrch(MFC_Size,si_from_uint(scSPUCacheLineSize));//Issue Cache Line size only once
	si_wrch(MFC_TagID,si_from_uint(g_scMemCpyTag));	
	bool writtenBack = false;
	WHILE(spu_extract(splatEA, 0) < cEAAlignedToNext, true)
	{
		const int cSet = GetCacheSetIndex(spu_extract(splatEA, 0));					//get set index
		int indexInSet = GetCacheIndexNum(SetCache4WayLookup(cSet, splatEA));
		IF(indexInSet >= 0, false)
		{
			indexInSet = scSPUCacheSetNumWaysMask - indexInSet;
			bool doWriteBack = cDoWriteBack;
			IF(!cDoWriteBack, 0)
			{
				//if write back is false, check if the first cache line is to be invalidated entirely
				doWriteBack |= (cEAFrom & scSPUCacheLineSizeMask && cEAFrom < spu_extract(splatEA, 0) + scSPUCacheLineSize);
				//if write back is false, check if the last cache line is to be invalidated entirely
				doWriteBack |= (cEAToEnd & scSPUCacheLineSizeMask && (spu_extract(splatEA, 0) + scSPUCacheLineSize == cEAAlignedToNext));
			}
			if(doWriteBack)
			{
				const uint32 cCacheEntry	= ((cSet << scSPUCacheSetNumWaysShift) + indexInSet) << (scSPUCacheLineSizeShift-4);
				const bool cIsCacheLineUnChanged = IsCacheLineUnchanged(cCacheEntry, splatEA);
				const bool cTransferAsync = CheckWriteBackAsync(splatEA);
				if(!cIsCacheLineUnChanged)
				{
					writtenBack = true;
					//cache line is altered
					if(cTransferAsync)
					{
						//start async write back
						//inlined: mfc_put(&g_pSPUCache[cCacheEntry], spu_extract(splatEA, 0), scSPUCacheLineSize, g_scDMAOutputTag, 0, 0);
						si_wrch(MFC_LSA,si_from_ptr(&g_pSPUCache[cCacheEntry]));
						si_wrch(MFC_EAL,si_from_uint(spu_extract(splatEA, 0)));
						si_wrch(MFC_Cmd,si_from_uint(MFC_PUT_CMD));//toggle prefetch

						syncAsyncTransfer = true;
					}
					else
					{
						SPUSyncAtomicDCache();//2nd step of atomic write back
						//copy current main memory contents here again, necessary Calls for mfc_getllar(g_scWriteBackArea, spu_extract(splatEA, 0), 0, 0);
						si_wrch(MFC_LSA,si_from_ptr(g_scWriteBackArea));
						si_wrch(MFC_EAL,si_from_uint(mfc_ea2l(spu_extract(splatEA, 0))));
						mfc_getllar_again();
						NSPU::NCache::CopyCacheLine((vec_uint4*)g_scWriteBackMaskAtomic, (vec_uint4*)g_scWriteBackMask);//copy write back mask
						*g_CurAtomicEA = spu_extract(splatEA, 0);
						NSPU::NCache::CopyCacheLine(g_scWriteBackSavedArea, &g_pSPUCache[cCacheEntry]);
						SPUStartAtomicWrite();
					}
				}
			}
			SetCacheLineEA(g_pSPUCacheDir, cSet, indexInSet, 0);//reset ea
			ResetLRUEntry(g_pSPUCacheLRUCtrl, cSet, indexInSet);//reset lru
		}
		splatEA = spu_add(splatEA, scSPUCacheLineSize);
	}

	//reset prefetch (might became invalid)
	vec_uint4 *const __restrict pPrefetchLRUDir = (vec_uint4*)G_SPU_CACHE_PREF_LRU_DIR_ADDR;
	vec_uint4 *const __restrict pPrefetchDir		= (vec_uint4*)G_SPU_CACHE_PREF_DIR_ADDR;
	*pPrefetchLRUDir = *pPrefetchDir = spu_splats((uint32)0);
//	#if !defined(SPU_CACHE_MISS_USE_ASM)
//		g_CurWrittenEA = (vec_uint4)0;
//	#endif
	if(writtenBack)
	{
		//sync transfer
		SPUSyncAtomicDCache();//2nd step of atomic write back
		IF(syncAsyncTransfer, false)
		{
			SyncMemory(g_scMemCpyTag);
		}
	}
}
#endif //__SPU__
#endif //PS3
