////////////////////////////////////////////////////////////////////////////
//
//  Crytek Engine Source File.
//  Copyright (C), Crytek Studios, 2002.
// -------------------------------------------------------------------------
//  File name:   ParticleUtils.h
//  Version:     v1.00
//  Created:     11/03/2010 by Corey (split out from other files).
//  Compilers:   Visual Studio.NET
//  Description: Splitting out some of the particle specific containers to here.
//							 Will be moved to a proper home eventually.
// -------------------------------------------------------------------------
//  History:
//
////////////////////////////////////////////////////////////////////////////

#ifndef __particleutils_h__
#define __particleutils_h__
#pragma once

#include "CryArray.h"
#include "ParticleFixedSizeElementPool.h"
#include "ParticleMemory.h"
#include "Allocator.h"

#undef PlaySound

struct CSpinLock
{
	CSpinLock()
		: m_Lock(0)
	{}
	operator volatile int&()
	{ return m_Lock; }
protected:
	volatile int	m_Lock;
};

//////////////////////////////////////////////////////////////////////////
// A variable-sized list of items with a pre-allocated maximum.
// Allows efficient element deletion during iteration.
// Note: TestPtrValidity() handles a null-pointer warning from static analysis
#ifndef __SPU__
#define for_all_ptrs_pc(Type, p, cont) \
	for (Type::traverser p(cont); p && ParticleListNS::TestPtrValidity((void*)(p)); ++p)

#define for_rev_all_ptrs_pc(Type, p, cont) \
		for (Type::traverser p(cont, false); p && ParticleListNS::TestPtrValidity((void*)(p)); ++p)
#else // !__SPU__
#define for_all_ptrs_pc(Type, p, cont) \
	for (Type::traverser p(cont); p; ++p)

#define for_rev_all_ptrs_pc(Type, p, cont) \
	for (Type::traverser p(cont, false); p; ++p)
#endif // !__SPU__

// This class uses a global pool for CParticle memory (shared by all containers) to allocate
// particles from. Internally it uses a linked-list to be able to traverse the elements
template<class T, class TAlloc = StdAllocator>
class ParticleCollection
{
	// states used for the SPU-traverse to indicate the state of the dma
	// transfer with a associated buffer (was moved here because CryCG generated
	// wrong code from an enum nested in a nested class of a template class)
	enum BufferStates 
	{	
		Unused,
		Active,
		TransferingFromMain,
		TransferingToMain
	};

public:
	typedef int size_type;

	ParticleCollection()
	{
		reset();
	}

	size_type size() const
	{
		return m_Count;
	}

	template <class LockingAllocator>
	void reserve(const size_type a_desiredReserveCount, LockingAllocator* lockingAllocator, void* a_pDebugInfo)
	{
		FUNCTION_PROFILER_SYS(PARTICLE);

#if !TRACK_FIXED_SIZE_ELEMENT_POOL_IN_USE_PTRS
		(void)a_pDebugInfo; // silence warning
#endif // !TRACK_FIXED_SIZE_ELEMENT_POOL_IN_USE_PTRS

		if(a_desiredReserveCount > capacity())
		{
			const int startIndex = capacity();
			// allocate the particle memory in advance so the SPUs won't need to do any allocating, 
			// and the locking on the allocator is minimised
			lockingAllocator->Lock();
			const size_type actualReserveCount = min(a_desiredReserveCount, lockingAllocator->UnsynchronisedGetNumFreeBlocks());
			m_ElemList.reserve(actualReserveCount+1);
			for(int i=startIndex; i < actualReserveCount; ++i)
			{
				// We initialise only the data mem pointers in the element list - no items have yet
				// been inserted into the list to use this memory, so we need to use begin() directly
				// to access the underlying data structure in m_ElemList
				const int elemIndex = i+1;
				ListElem& currentListElem = m_ElemList.begin()[elemIndex];
				currentListElem.m_data = (T*)lockingAllocator->UnsynchronisedAllocateElement();
#if TRACK_FIXED_SIZE_ELEMENT_POOL_IN_USE_PTRS
				((uint32*)currentListElem.m_data)[0] = (uint32)a_pDebugInfo;
#endif // TRACK_FIXED_SIZE_ELEMENT_POOL_IN_USE_PTRS
				assert(currentListElem.m_data != NULL);
			}
			(m_ElemList.begin()[0]).m_data = NULL;

			lockingAllocator->Unlock();
		}
	}

	size_type capacity() const 
	{
		return m_ElemList.capacity()-1;
	}

	bool empty() const
	{
		return size() == 0;
	}

	template <class LockingAllocator>
	void clear(LockingAllocator* lockingAllocator)
	{
		FUNCTION_PROFILER_SYS(PARTICLE);

		if(capacity() > 0)
		{
			for (traverser it(*this, false); it; --it)
			{
				it->~T();
			}
			lockingAllocator->Lock();
			const uint32 numElements = capacity();
			for(uint32 i = 0; i < numElements; ++i)
			{
				// We need to iterate through all the elements of m_ElemList here because,
				// regardless of the number of particles that were actually used, all the
				// memory that was allocated during the reserve call must be freed here.
				ListElem& currentListElem = m_ElemList.begin()[i+1];
				lockingAllocator->UnsynchronisedDeallocateElement(currentListElem.m_data);
			}
			lockingAllocator->Unlock();
		}
		reset();
	}

	T* push_back()
	{
		return insert(end_index());
	}

	T* push_front()
	{
		return insert(head_index());
	}

	T& front()
	{
		assert(!empty());
		return *get_elem(head_index());
	}

	void GetMemoryUsage(ICrySizer* pSizer) const
	{
		pSizer->AddObject(m_ElemList.begin(), m_ElemList.get_alloc_size() + (capacity()*sizeof(T)));
	}

	class traverser
	{
		/* Usage: 
			// Forward
			for (FixedArray::traverser t(array); t; ++t)
				t->Process();
			// Reverse
			for (FixedArray::traverser t(array, false); t; --t)
				t->Process();
		*/

public:

#if !defined(__SPU__)

		traverser(ParticleCollection& array, bool bForward = true)
		: m_pList(&array)
		, m_nIndex(bForward ? array.head_index() : array.tail_index())
		{
		}

		ILINE void operator++()
		{
			m_nIndex = m_pList->next_index(m_nIndex);
		}
		
		ILINE void operator--()
		{
			m_nIndex = m_pList->prev_index(m_nIndex);
		}

		ILINE operator bool() const
		{
			return m_pList->valid_index(m_nIndex);
		}

		ILINE T* operator->() const
		{
			return m_pList->get_elem(m_nIndex);
		}

		ILINE operator T*() const
		{
			return m_pList->get_elem(m_nIndex);
		}

		ILINE T& operator*() const
		{
			return *m_pList->get_elem(m_nIndex);
		}

		SPU_NO_INLINE void erase(bool bForward = true)
		{
			m_nIndex = m_pList->erase(m_nIndex, bForward);
		}

#else // !__SPU__

		// special version for SPUs, this one prefetches the elements and transfers em back asynchronously
		ILINE traverser(ParticleCollection& array, bool bForward = true)
		: m_pList(&array)
		, m_nIndex(bForward ? array.head_index() : array.tail_index())
		, m_curBuffer(0)
	{
		InitMembers();
		StartPrefetching(bForward);
	}

	ILINE ~traverser()
	{
		// start backtransfers if needed
		IF( m_states[0] == Active, true ) TransferToMain( 0 );
		IF( m_states[1] == Active, true ) TransferToMain( 1 );
		IF( m_states[2] == Active, true ) TransferToMain( 2 );

		// sync all transfers from destroy travers to prevent pending dma transfers
		IF( m_states[0] >= TransferingFromMain, true ) SyncTransfer( 0 );
		IF( m_states[1] >= TransferingFromMain, true ) SyncTransfer( 1 );
		IF( m_states[2] >= TransferingFromMain, true ) SyncTransfer( 2 );

	}

	ILINE void operator++()
	{
		// increase to next dma buffer
		m_curBuffer = NextDMABuffer();

		int nPrevBuffer = PrevDMABuffer();
		int nNextBuffer = NextDMABuffer();

		// transfer previous buffer back if needed
		IF( m_states[nPrevBuffer] == Active,true ) TransferToMain( nPrevBuffer );

		// go to next element
		m_nIndex = m_pList->next_index( m_nIndex );

		// sync and start downtransfer into following buffer
		IF( valid_index(m_nIndex) && valid_index(m_pList->next_index(m_nIndex)), true )
		{
			if( m_states[nNextBuffer] == TransferingToMain ) SyncTransfer( nNextBuffer );

			TransferFromMain( (m_curBuffer + 1)%3, m_pList->get_elem( m_pList->next_index(m_nIndex) ) );
		}
	}

	ILINE void operator--()
	{
		// increase to next dma buffer
		m_curBuffer = NextDMABuffer();

		int nPrevBuffer = PrevDMABuffer();
		int nNextBuffer = NextDMABuffer();

		// transfer previous buffer back if needed
		IF( m_states[nPrevBuffer] == Active,true ) TransferToMain( nPrevBuffer );

		// go to prev element
		m_nIndex = m_pList->prev_index( m_nIndex );

		// sync and start downtransfer into following buffer
		IF( valid_index(m_nIndex) && valid_index(m_pList->prev_index(m_nIndex)), true )
		{
			if( m_states[nNextBuffer] == TransferingToMain ) SyncTransfer( nNextBuffer );

			TransferFromMain( (m_curBuffer + 1)%3, m_pList->get_elem( m_pList->next_index(m_nIndex) ) );
		}
	}

	operator bool() const
	{
		return m_pList->valid_index(m_nIndex);
	}

	T* operator->()
	{
		SyncTransfer( m_curBuffer );
		return (T*)&m_buffer[m_curBuffer][0];
	}

	const T* operator->() const
	{
		SyncTransfer( m_curBuffer );
		return (T*)&m_buffer[m_curBuffer][0];
	}

	T& operator*()
	{
		SyncTransfer( m_curBuffer );
		return *( (T*)&m_buffer[m_curBuffer][0] );
	}

	const T& operator*() const
	{
		SyncTransfer( m_curBuffer );
		return *( (T*)&m_buffer[m_curBuffer][0] );
	}

	ILINE void erase( bool bForward = true )
	{
		// mark buffer as erased to prevent unnecessary backtransfer
		m_states[m_curBuffer] = Unused;

		// call destructor
		T* obj = (T*)&m_buffer[m_curBuffer][0];
		obj->~T();
		memset( obj, 0xFF, sizeof(T) );
		m_nIndex = m_pList->erase_raw( m_nIndex, bForward );

		// increase to next dma buffer
		m_curBuffer = NextDMABuffer();

		// prefetch next element if valid
		IF( valid_index(m_nIndex), true )
		{
			int nNextElement = bForward ?  m_pList->next_index(m_nIndex) :  m_pList->prev_index(m_nIndex);
			int nNextBuffer = NextDMABuffer();

			// sync and start downtransfer into following buffer
			IF( valid_index(nNextElement), true )
			{
				if( m_states[nNextBuffer] == TransferingToMain ) SyncTransfer( nNextBuffer );
				TransferFromMain( (m_curBuffer + 1)%3, m_pList->get_elem( m_pList->next_index(m_nIndex) ) );
			}
		}		
	}

	private:

		bool valid_index(int index) const
		{
			return m_pList->valid_index(index);
		}

		int NextDMABuffer()
		{
			return (m_curBuffer + 1)%3;
		}

		int PrevDMABuffer()
		{
			return (m_curBuffer + 2)%3;
		}

		ILINE void InitMembers()
		{
			m_mainAddr[0] = NULL;
			m_mainAddr[1] = NULL;
			m_mainAddr[2] = NULL;

			m_states[0] = Unused;
			m_states[1] = Unused;
			m_states[2] = Unused;

			int nIndex1 = m_pList->end_index();
			int nIndex2 = m_pList->end_index();
			int nIndex3 = m_pList->end_index();
		}

		ILINE void StartPrefetching( bool bForward )
		{
			int nIndex1 = m_pList->end_index();
			int nIndex2 = m_pList->end_index();
			int nIndex3 = m_pList->end_index();

			// the the following three indices
			IF( bForward, true )
			{
				nIndex1 = m_nIndex;
				if( valid_index(nIndex1) ) nIndex2 = m_pList->next_index( nIndex1 );
				if( valid_index(nIndex2) ) nIndex3 = m_pList->next_index( nIndex2 );
			}
			else
			{
				nIndex1 = m_nIndex;
				if( valid_index(nIndex1) ) nIndex2 = m_pList->prev_index( nIndex1 );
				if( valid_index(nIndex2) ) nIndex3 = m_pList->prev_index( nIndex2 );
			}

			// start prefetch for each valid following index
			IF( valid_index(nIndex1),true ) TransferFromMain( 0, m_pList->get_elem( nIndex1 ) );
			IF( valid_index(nIndex2),true ) TransferFromMain( 1, m_pList->get_elem( nIndex2 ) );
			IF( valid_index(nIndex3),true ) TransferFromMain( 2, m_pList->get_elem( nIndex3 ) );
		}

		void TransferToMain( int index )
		{
			memtransfer_to_main( SPU_MAIN_PTR(m_mainAddr[index]), &m_buffer[index][0], sizeof(T), DMA_ID_BASE + index );
			m_states[index] = TransferingToMain;
		}

		void TransferFromMain( int index, T *pMain ) 
		{
			memtransfer_from_main( &m_buffer[index][0], pMain, sizeof(T), DMA_ID_BASE + index );
			m_mainAddr[index] = pMain;
			m_states[index] = TransferingFromMain;
		}

		void SyncTransfer( int index )
		{
			if( m_states[index] < TransferingFromMain ) return;

			memtransfer_sync( DMA_ID_BASE + index );

			// adjust state
			if( m_states[index] == TransferingFromMain ) m_states[index] = Active;
			if( m_states[index] == TransferingToMain ) m_states[index] = Unused;
		}

		char				m_buffer[3][sizeof(T)] _ALIGN(128);
		T*					m_mainAddr[3];		

		BufferStates	m_states[3];
		int						m_curBuffer;

		static const int DMA_ID_BASE = 3;

#endif // !__SPU__

	private:
		ParticleCollection*	m_pList;
		int m_nIndex;
	};

private:

	friend class traverser;

	struct ListElem
	{
		T* m_data;
		int	next;
		int	prev;
	};

	T* get_elem(int index)
	{
		return m_ElemList[index].m_data;
	}

	int head_index() const
	{
		return m_ElemList.front().next;
	}

	int tail_index() const
	{
		return m_ElemList.front().prev;
	}

	static int end_index()
	{
		return 0;
	}

	bool valid_index(int index) const
	{
		assert(index >= 0 && index < m_ElemList.size());
		return index > 0;
	}

	int next_index(int index) const
	{		
		return m_ElemList[index].next;
	}

	void reset()
	{
		m_ElemList.resize_raw(1, 1);
		m_ElemList.front().next = m_ElemList.front().prev = end_index();

		m_nFirstFree = end_index();
		m_Count = 0;
		validate();
	}

	int prev_index(int index) const
	{		
		return m_ElemList[index].prev;
	}

	SPU_NO_INLINE int alloc_new_index()
	{
		m_Count++;

		// Is there an element in the free list?
		IF(valid_index(m_nFirstFree), true)
		{
			int index = m_nFirstFree;
			m_nFirstFree = next_index(m_nFirstFree);
			IF(!valid_index(index), false)
			{
				snPause();
			}
			return index;
		}
		else
		{
			// Increase number of active elements.
			m_ElemList.push_back();
			return (m_ElemList.size() - 1);
		}
	}

	SPU_NO_INLINE T* insert(int next_index)
	{
		int index = alloc_new_index();
		assert(index <= capacity());
		ListElem& rCurElem = m_ElemList[index];

		// Update links.
		ListElem& rNextElem = m_ElemList[next_index];

		rCurElem.next = next_index;
		rCurElem.prev = rNextElem.prev;

		m_ElemList[rCurElem.prev].next = index;
		m_ElemList[rCurElem.next].prev = index;

		validate();

		T* obj = get_elem(index);
		// initialise object
		new(obj) T();
		return obj;
	}

	SPU_NO_INLINE void add_to_freelist(const int index)
	{
		m_ElemList[index].next = m_nFirstFree;
		m_nFirstFree = index;
	}

	SPU_NO_INLINE int erase(int index, bool bForward = true)
	{
		T* obj = get_elem(index);
		obj->~T();		
		return erase_raw(index, bForward);
	}

	SPU_NO_INLINE int erase_raw(int index, bool bForward = true)
	{
		ListElem& elem = m_ElemList[index];		
		int ret = bForward ? elem.next : elem.prev;

		// Update links.
		m_ElemList[elem.prev].next = elem.next;
		m_ElemList[elem.next].prev = elem.prev;

		// Update FreeList.
		m_ElemList[index].next = m_nFirstFree;
		m_nFirstFree = index;

		m_Count--;

		validate();
		return ret;
	}

	void validate()
	{
#ifdef _DEBUG
		assert(head_index() < m_ElemList.size());
		assert(tail_index() < m_ElemList.size());
		assert(prev_index(head_index()) == end_index());
		assert(next_index(tail_index()) == end_index());
		assert(m_nFirstFree < m_ElemList.size());

		int prev = end_index();
		int nUsed = 0;
		for (int index = head_index(); valid_index(index); index = next_index(index), nUsed++)
		{
			assert(prev_index(index) == prev);
			prev = index;
		}
		int nFree = 0;
		for (int index = m_nFirstFree; valid_index(index); index = next_index(index), nFree++)
		{
		}
		assert(nUsed == size());
#endif // _DEBUG
	}

	int m_Count;
	FastDynArray<ListElem> m_ElemList; // Bi-directional list to manager the order of elements.
																		 // Has m_Count+1 entries, first entry contains head/tail indices.
	int	m_nFirstFree; // First free element.
};

//////////////////////////////////////////////////////////////////////////
// Reference-counting without automatic freeing, or virtual functions.
// Can be used as _smart_ptr<> target.

class DumbRefCount
{
public:
	DumbRefCount()
		: m_nRefs(0) {}
	~DumbRefCount()
		{ assert(m_nRefs == 0); }

	void AddRef()
		{ ++m_nRefs; }
	void Release()
		{ assert(m_nRefs > 0); --m_nRefs; }
	int GetRefCount() const
		{ return m_nRefs; }

protected:
	int m_nRefs;
};

//////////////////////////////////////////////////////////////////////////
// Util class for SPU, remembers on which objects Release needs to be called
// usses one global variable
class SpuDeferredReleaseObjects
{
public:

	// these are ifdefed for spu, since they use GetCurrentThreadId
#if defined(__SPU__) 
	void AddDeferredStatObjForRelease( IStatObj *obj )
	{
		IF( obj != NULL, true )
			m_deferredReleaseCalls[__spu_get_current_id() -1].m_arrStatObj.push( obj );
	}

	void AddDeferredSoundForRelease( ISound *obj )
	{
		IF( obj != NULL, true )
			m_deferredReleaseCalls[__spu_get_current_id() -1].m_arrSound.push( obj );
	}
#endif
	void ReleaseAll();

private:
	static const int NUM_SPUS = 4;

	// group per spu together and align, so that each spu has a cacheline for itself
	struct DeferredUpdateStacks
	{
		SpuDataStack<IStatObj*>							m_arrStatObj;
		SpuDataStack<ISound*>								m_arrSound;		
	} _ALIGN(128);

	DeferredUpdateStacks							m_deferredReleaseCalls[NUM_SPUS];
};

extern SpuDeferredReleaseObjects gSPUDeferredReleaseObjects;

//////////////////////////////////////////////////////////////////////////
// class to collect information if a ParticleContainer is updateable on SPU
class SpuUsage
{
public:
	SpuUsage( const string &sName, int nNumParticle ) : m_sName(sName), m_nNumParticle(nNumParticle) {}
	void SetCause( const string &sCause ) { m_sCause = sCause; }

	const char* Name() const { return m_sName.c_str(); }
	const char* Cause() const { return m_sCause.c_str(); }
	int NumParticle() const { return m_nNumParticle; }

	bool operator < ( const SpuUsage &other ) const { return m_nNumParticle > other.m_nNumParticle; }
private:
	string m_sName;
	string m_sCause;
	int m_nNumParticle;
};

typedef std::vector<SpuUsage> VecSpuUsageT;

#endif // __particleutils_h__
