#ifndef CRENDERER_H
#define CRENDERER_H

#include "stdafx.h"
#include "..\Common\IJobServer.h"

typedef signed char         int8;
typedef signed short        int16;
typedef signed int					int32;
typedef signed __int64			int64;
typedef unsigned char				uint8;
typedef unsigned short			uint16;
typedef unsigned int				uint32;
typedef unsigned __int64		uint64;
typedef float               f32;
typedef double              f64;
typedef double              real;  //biggest float-type on this machine

typedef unsigned long       DWORD;  //biggest float-type on this machine

#define SAFE_DELETE(p)				if(p){ delete p; p = NULL;}
#define SAFE_DELETE_ARRAY(p)	if(p){ delete [] p; p = NULL;}

inline void Normalize( float* pVector )
{
	float fLength = 1.f / sqrtf(pVector[0]*pVector[0]+pVector[1]*pVector[1]+pVector[2]*pVector[2]);
	pVector[0] *= fLength;
	pVector[1] *= fLength;
	pVector[2] *= fLength;
}

#define CROSS(dest,v1,v2) \
	dest[0]=v1[1]*v2[2]-v1[2]*v2[1]; \
	dest[1]=v1[2]*v2[0]-v1[0]*v2[2]; \
	dest[2]=v1[0]*v2[1]-v1[1]*v2[0];

#define DOT(v1,v2) (v1[0]*v2[0]+v1[1]*v2[1]+v1[2]*v2[2])

#define SUB(dest,v1,v2) \
	dest[0]=v1[0]-v2[0]; \
	dest[1]=v1[1]-v2[1]; \
	dest[2]=v1[2]-v2[2];

#define ADD(dest,v1,v2) \
	dest[0]=v1[0]+v2[0]; \
	dest[1]=v1[1]+v2[1]; \
	dest[2]=v1[2]+v2[2];


#define MAX_TREE_NODES	64

enum RendererStates
{
	RS_FINISHED,
	RS_LOAD_NEXT_CHUNK,
	RS_PROCESS_CHUNK,
	RS_POSTPROCESS,
};


// A bounding box structure
typedef union
{
	struct
	{
		f32 box[6];
	};
	struct
	{
		f32 min[3];
		f32 max[3];
	};
	struct
	{
		f32 minX, minY, minZ;
		f32 maxX, maxY, maxZ;
	};
} OctBoundingBox;


class CRay  
{
public:
	inline void Setup( const float* pData )
	{
		vFrom[0] = pData[0];
		vFrom[1] = pData[1];
		vFrom[2] = pData[2];
		vOrigNormal[0] = pData[3];
		vOrigNormal[1] = pData[4];
		vOrigNormal[2] = pData[5];
	}

	inline void CalcInvDir()
	{
		nSign[0] = vDir[0] < 0.f ? 3 : 0;
		nSign[1] = vDir[1] < 0.f ? 3 : 0;
		nSign[2] = vDir[2] < 0.f ? 3 : 0;
		vInvDir[0] = 1.f / vDir[0];
		vInvDir[1] = 1.f / vDir[1];
		vInvDir[2] = 1.f / vDir[2];
	}

	int32				nSign[3];			// is the direction paralell to the main axises
	float				vFrom[3];
	float				vInvDir[3];
	float				fT;
	float				fTmin;
	float				vDir[3];
	float				vOrigNormal[3];
};

// A cell within the Octree.
class COctreeCell
{
public:

	//===================================================================================
	// Method				:	Intersect
	// Description	:	Ray intersection with the octree.
	//===================================================================================
	inline bool Intersect(const CRay* pRay )
	{
		//X plane
		f32 fMinX = (m_boundingBox.box[ 0 + pRay->nSign[0] ] - pRay->vFrom[0]) * pRay->vInvDir[0];
		f32 fMaxX = (m_boundingBox.box[ 3 - pRay->nSign[0] ] - pRay->vFrom[0]) * pRay->vInvDir[0];

		//Y plane
		f32 fMinY = (m_boundingBox.box[ 1 + pRay->nSign[1] ] - pRay->vFrom[1]) * pRay->vInvDir[1];
		f32 fMaxY = (m_boundingBox.box[ 4 - pRay->nSign[1] ] - pRay->vFrom[1]) * pRay->vInvDir[1];

		if( fMinX > fMaxY || fMinY >  fMaxX )
			return false;

		f32 fNearDistance = fMinX > fMinY ? fMinX : fMinY;
		f32 fFarDistance = fMaxX < fMaxY ? fMaxX : fMaxY;

		//Z plane
		f32 fMinZ = (m_boundingBox.box[ 2 + pRay->nSign[2] ] - pRay->vFrom[2]) * pRay->vInvDir[2];
		f32 fMaxZ = (m_boundingBox.box[ 5 - pRay->nSign[2] ] - pRay->vFrom[2]) * pRay->vInvDir[2];

		if( fNearDistance > fMaxZ || fMinZ >  fFarDistance )
			return false;

		fNearDistance = fNearDistance > fMinZ ? fNearDistance : fMinZ;
		fFarDistance = fFarDistance < fMaxZ ? fFarDistance : fMaxZ;

		//test the planes
		return (fFarDistance > pRay->fTmin && fNearDistance < pRay->fT );
	}


public:
	// Number of stuff in this cell
	int32		m_numItems;
	int32  m_ChildrenNumber;

	// Bounding box for the cell.
	OctBoundingBox m_boundingBox;

	// The eight children of this cell
	COctreeCell* m_children[8];

	// List of stuff in this cell
	int32* m_item;

	// the last intersection's datas
//	f32 fNearDistance, fFarDistance;
};

class CIntersectionCacheCluster
{
public:
	CIntersectionCacheCluster():m_pCluster(NULL)
	{};
	~CIntersectionCacheCluster()
	{
		SAFE_DELETE_ARRAY( m_pCluster );
	}

	void					Init( const int32 nSideSize );															/// One size of a cluster

	inline void		SetCell( const int32 nCacheIndex, COctreeCell* pCell )	/// Set the last intersection cell based on the 1D offset
	{
		m_pCluster[ nCacheIndex ] = pCell;
	}

	inline int32	GetIndex( const float* pDir ) const
	{
		static int32 nProjIndex[6] =
		{
			1,2,
			2,0,
			0,1
		};
		int32 nID = 0;

		//search the biggest axis
		float fX = fabs(pDir[0]);
		float fY = fabs(pDir[1]);
		float fZ = fabs(pDir[2]);
		nID = fY > fX ? 1 : nID;
		nID = fZ > fY ? 2 : nID;
		bool bNegative = pDir[nID] < 0.f;
		float fMax = 1.f / pDir[nID];

		nID <<= 1;
		//project based on the biggest axis
		fX = pDir[0] * fMax;
		fY = pDir[1] * fMax;
		fZ = pDir[2] * fMax;


		//get the 2D projection to that axis
		float fSideX = pDir[ nProjIndex[nID] ];
		float fSideY = pDir[ nProjIndex[nID+1] ];

		//decide the face
		nID += bNegative ? 0 : 1;

		//calculate the offsets
		int32 nOffsetX = (int)floor( fSideX*m_fHalfSideSize+m_fHalfSideSize );
		int32 nOffsetY = (int)floor( fSideY*m_fHalfSideSize+m_fHalfSideSize );

		//prevent overflow
		nOffsetX -= (nOffsetX == m_nSideSize) ? 1 : 0;
		nOffsetY -= (nOffsetY == m_nSideSize) ? 1 : 0;

		//calculate
		return nID*m_nFaceSize+nOffsetY*m_nSideSize+nOffsetX;
	}

	inline COctreeCell*	GetCell( const int32 nCacheIndex ) const
	{
		return m_pCluster[ nCacheIndex ];
	}																																					/// Get the last intersection cell based on the 1D offset
protected:
	COctreeCell**	m_pCluster;																									/// The cluster itself
	int32					m_nFaceSize;																								/// The size of a face
	int32					m_nSideSize;																								/// The size of a side
	f32						m_fHalfSideSize;																						/// The half of a side of face
};


class cOctree
{
public:
	cOctree() : m_pTriangles(NULL), m_pCellArray(NULL), m_pCellItemArray(NULL),m_ppStackCell(NULL),m_nMaxCells(0)
	{
		InitializeCriticalSection( &m_csData );
	}

	~cOctree()
	{
		SAFE_DELETE_ARRAY( m_pTriangles );
		SAFE_DELETE_ARRAY( m_pCellArray );
		SAFE_DELETE_ARRAY( m_pCellItemArray );
		SAFE_DELETE_ARRAY( m_ppStackCell );
		DeleteCriticalSection( &m_csData );
	}

	void Log( const char* szFormat,...)
	{
		va_list arglist;
		va_start(arglist, szFormat);
		char szBuf[1024];
		vsprintf_s( szBuf, 1024, szFormat, arglist);

		::EnterCriticalSection( &m_csData );
		printf( szBuf );
		::LeaveCriticalSection( &m_csData );
	}

	JobServerResult	Load( const char* szFileName );

	inline bool FirstIntersect(CRay* pRay, CIntersectionCacheCluster* pCacheCluster, COctreeCell* pRootCell = NULL)
	{
//		++m_nSessionID;
		pRay->CalcInvDir();
		float fOriginalDistance = pRay->fT;
		pRay->fTmin = 0;
		//last node check... -> cacheing
		int32 nCacheIndex = pCacheCluster->GetIndex(pRay->vDir);
		COctreeCell* pLastCell = pCacheCluster->GetCell(nCacheIndex);

		//automatic max distance setup :)
		if( NULL != pLastCell )
			IntersectWithACell_Fast( pLastCell, pRay );

		// Walk the octree looking for intersections.
		int numCells = 0;
		if( pRootCell )
			AddCell (pRootCell, &numCells);
		else
			AddCell (m_pRoot, &numCells);


		while (numCells > 0)
		{
			// Take the cell from the list.
			numCells--;
			COctreeCell* currCell = m_ppStackCell[numCells];
			//the m_numItems exacly show it is a leaf or not.
			if( currCell->m_numItems )
			{
				//needed because the ray max distance is dynamic...
				if ( currCell->Intersect(pRay) )
				{
					f32 fT = pRay->fT;
					IntersectWithACell_Fast( currCell, pRay );

					if( pRay->fT < fT )
						pCacheCluster->SetCell(nCacheIndex, currCell);
				}
			} // end if leaf
			else
			{  // Non-leaf, add the children to the list if their bounding
				// box intersects the ray.
				//roll out the code

				COctreeCell** pChildrens = currCell->m_children;

				switch( currCell->m_ChildrenNumber )
				{
				case 8:
					if ((*pChildrens) != NULL && (*pChildrens)->Intersect(pRay))
						AddCell(*pChildrens, &numCells);
					pChildrens++;
				case 7:
					if ((*pChildrens) != NULL && (*pChildrens)->Intersect(pRay))
						AddCell(*pChildrens, &numCells);
					pChildrens++;
				case 6:
					if ((*pChildrens) != NULL && (*pChildrens)->Intersect(pRay))
						AddCell(*pChildrens, &numCells);
					pChildrens++;
				case 5:
					if ((*pChildrens) != NULL && (*pChildrens)->Intersect(pRay))
						AddCell(*pChildrens, &numCells);
					pChildrens++;
				case 4:
					if ((*pChildrens) != NULL && (*pChildrens)->Intersect(pRay))
						AddCell(*pChildrens, &numCells);
					pChildrens++;
				case 3:
					if ((*pChildrens) != NULL && (*pChildrens)->Intersect(pRay))
						AddCell(*pChildrens, &numCells);
					pChildrens++;
				case 2:
					if ((*pChildrens) != NULL && (*pChildrens)->Intersect(pRay))
						AddCell(*pChildrens, &numCells);
					pChildrens++;
				case 1:
					if ((*pChildrens) != NULL && (*pChildrens)->Intersect(pRay))
						AddCell(*pChildrens, &numCells);
					pChildrens++;
				}//switch
			} // end else non-leaf node.
		} // end while cells

		if( pRay->fT < fOriginalDistance )
			return true;

		pCacheCluster->SetCell(nCacheIndex,NULL);
		return false;
	}



private:
	void					GeneratePointersForCells( COctreeCell *pCell );

	//===================================================================================
	// Method					:	AddCell
	// Description		:	Add a cell into list.
	inline bool AddCell (COctreeCell* cell, int32* numCells)
	{
		// See if we have enough space first.
		if ((*numCells) >= m_nMaxCells)
		{
			m_nMaxCells += MAX_TREE_NODES;
			COctreeCell** tmp = new COctreeCell* [m_nMaxCells];
			if (tmp == NULL)
			{
				return false;
			}
			memset (tmp, 0, sizeof (COctreeCell*) * m_nMaxCells);
			memcpy (tmp, m_ppStackCell, sizeof (COctreeCell*) * (*numCells));
			delete [] m_ppStackCell;
			m_ppStackCell = tmp;
			tmp = NULL;
		}

		// Place the pointer in the list
		m_ppStackCell[(*numCells)] = cell;
		(*numCells)++;

		return true;
	}

	///////////////////////////////////////////////////////////////////////////////////
	//Run over all of the triangles give back the CLOSEST DISTANCE!
	///////////////////////////////////////////////////////////////////////////////////
	__inline void IntersectWithACell_Fast( const COctreeCell *cell, CRay *pRay )
	{
		//Hand optimized version
		int nLoopNum = (cell->m_numItems+15) / 16;
		int32* pItem = cell->m_item;
		switch( cell->m_numItems % 16 )
		{
			do
			{
		case 0:
			TriIntersect_Fast(pRay, *pItem++ );
		case 15:
			TriIntersect_Fast(pRay, *pItem++ );
		case 14:
			TriIntersect_Fast(pRay, *pItem++ );
		case 13:
			TriIntersect_Fast(pRay, *pItem++ );
		case 12:
			TriIntersect_Fast(pRay, *pItem++ );
		case 11:
			TriIntersect_Fast(pRay, *pItem++ );
		case 10:
			TriIntersect_Fast(pRay, *pItem++ );
		case 9:
			TriIntersect_Fast(pRay, *pItem++ );
		case 8:
			TriIntersect_Fast(pRay, *pItem++ );
		case 7:
			TriIntersect_Fast(pRay, *pItem++ );
		case 6:
			TriIntersect_Fast(pRay, *pItem++ );
		case 5:
			TriIntersect_Fast(pRay, *pItem++ );
		case 4:
			TriIntersect_Fast(pRay, *pItem++ );
		case 3:
			TriIntersect_Fast(pRay, *pItem++ );
		case 2:
			TriIntersect_Fast(pRay, *pItem++ );
		case 1:
			TriIntersect_Fast(pRay, *pItem++ );
			} while( --nLoopNum > 0 );
		}
	}

	//===================================================================================
	// Method					:	TriIntersect_Fast
	// Description		:	Main Triangle intersect routine (fast version - no intersection point )
	//===================================================================================
	inline bool TriIntersect_Fast(CRay* pRay, const int32 nTriID ) const
	{
		float *orig = pRay->vFrom;
		float *dir = pRay->vDir;
		float *vOrigNormal = pRay->vOrigNormal;
		float* pData = &m_pTriangles[ nTriID*18 ];

		f32 facenormal[3];
		f32 edge1[3],edge2[3];

		edge1[0] = pData[3] - pData[0];
		edge1[1] = pData[4] - pData[1];
		edge1[2] = pData[5] - pData[2];

		edge2[0] = pData[6] - pData[0];
		edge2[1] = pData[7] - pData[1];
		edge2[2] = pData[8] - pData[2];

		CROSS(facenormal, edge1, edge2);

		f32 fR1 = DOT( dir, facenormal );
		f32 fMinLen = DOT( facenormal, facenormal );
		f32 fAbsR1 = fabsf( fR1 ) * ( 1.f / 10e-5f );

		int nWrong = ( fMinLen < 10e-5f ) ? 1 : 0;
		nWrong += ( fAbsR1 < fMinLen ) ? 1 : 0;

		fR1 = ( fAbsR1 < fMinLen ) ? 1 : fR1;		//Div by zero

		f32 fR0 = DOT( orig, facenormal ) - DOT( pData, facenormal );
		f32 t = -fR0 / fR1;
		f32 fNormalDiffDist = fabs(t*DOT(dir, vOrigNormal ));

		//prevent shadow leaking - non valid tests..
		nWrong += t < 0.001f + pRay->fTmin ? 1 : 0;
		nWrong += t > pRay->fT ? 1 : 0;
		nWrong += fNormalDiffDist < 10e-5f ? 1 : 0;

		if( nWrong )
			return false;

		f32 v0[3],v1[3],v2[3],temp[3];

		SUB( v0, pData, orig );
		ADD( v1, v0, edge1 );
		ADD( v2, v0, edge2 );

		CROSS( temp, v0,v1 );
		f32 fDot0 = DOT( temp, dir );
		CROSS( temp, v1,v2 );
		f32 fDot1 = DOT( temp, dir );
		CROSS( temp, v2,v0 );
		f32 fDot2 = DOT( temp, dir );

		int nSign0 = fDot0 < 0.f ? -1 : (fDot0 > 0.f ? 1 : 0);
		int nSign1 = fDot1 < 0.f ? -1 : (fDot1 > 0.f ? 1 : 0);
		int nSign2 = fDot2 < 0.f ? -1 : (fDot2 > 0.f ? 1 : 0);

		int nGood = ( (!nSign0 && ( !nSign1 || !nSign2 ) ) || (!nSign1 && !nSign2) ) ? 1 : 0;
		nGood += (!nSign0 && nSign1 == nSign2 ) ? 1 : 0;
		nGood += (!nSign1 && nSign0 == nSign2 ) ? 1 : 0;
		nGood += (!nSign2 && nSign1 == nSign0 ) ? 1 : 0;
		nGood += (nSign0 == nSign1 && nSign0 == nSign2 ) ? 1 : 0;
		pRay->fT = nGood ? t : pRay->fT;
		return nGood ? true : false;
	}


	COctreeCell** m_ppStackCell;
	int m_nMaxCells;

	uint32				m_nTriangleNumber;
	float*				m_pTriangles;
	COctreeCell*	m_pCellArray;
	int32*				m_pCellItemArray;

	COctreeCell*	m_pRoot;

	CRITICAL_SECTION	m_csData;
};


class cPostProcess
{
public:
	cPostProcess()
	{}

	JobServerResult ConvertToTGA( const char* szDirectory, const int32 nID, const int nSSNumber );

private:

};


enum eJobType
{
	JOB_RENDERER,
	JOB_BLUR,
	JOB_POSTPROCESS
};


class cRenderer
{
public:
	cRenderer() : f(NULL), m_RS( RS_FINISHED ), m_pData(NULL), m_pPicture(NULL), m_pOctree(NULL), m_iM1(1),m_iM2(1),m_nUsedMemory(0)
	{
		InitializeCriticalSection( &m_csData );
	}

	~cRenderer()
	{
		DeleteCriticalSection( &m_csData );
	}

	void Log( const char* szFormat,...)
	{
		va_list arglist;
		va_start(arglist, szFormat);
		char szBuf[1024];
		vsprintf_s( szBuf, 1024, szFormat, arglist);
		va_end( arglist );

		::EnterCriticalSection( &m_csData );
		FILE *f;
		fopen_s( &f, "DJS_RAMJobServer.log", "at" );
		fprintf_s( f, szBuf );
		fclose(f);
//		printf( szBuf );
		::LeaveCriticalSection( &m_csData );
	}

	JobServerResult	Init( const char* szDirectory, const int32 nID, const int32 nM1, const int32 nM2, const int32 nSSNumber, const int32 nSSID, const int32 nWorkID );
	JobServerResult	Tick();
	void	Done();

	JobServerResult	BlurIt( const char* szDirectory, const int32 nID );

	int			GetPercent() const
	{
		return(int)m_nPercent;
	}

	int32		GetUsedMemoryMB() const { return m_nUsedMemory;}

private:
	uint8*	Load8BitTGA( const char* szFileName );
	bool		Save8BitDDS( const char* szFileName, float* pPicture, const int32 nWidth, const int32 nHeight );
	bool		Save8BitTGA( const char* szFileName, float* pPicture, const int32 nWidth, const int32 nHeight );
	bool		SaveFloatMap( const char* szFileName, float* pPicture, const int32 nWidth, const int32 nHeight );

	_inline int32	GenerateIndexFromCoordiatesToSurface( const uint8 Direction, const int32 nX, const int32 nY, const int32 nWidth, const int32 nChunkWidth, const int32 nChunkHeight ) const
	{
		switch( Direction )
		{
		default:
		case 0: //SD_NORMAL:
			return nY*nWidth+nX;
		case 1: //SD_XFLIP:
			return (nY)*nWidth+nChunkWidth-(nX+1);
		case 2: //SD_XYFLIP:
			return nX*nWidth+nY;
		case 3: //SD_YFLIP:
			return (nX)*nWidth+nChunkHeight-(nY+1);
		}
		return nY*nWidth+nX;
	};


	RendererStates m_RS;

	//Picture management
	FILE*		f;
	int32		m_nHeight;
	int32		m_nWidth;
	uint32	m_nSampleNumber;
	uint32	m_nBlockSize;
	float*	m_pPicture;
	int32		m_nFileID;
	float		m_fSSDivider;
	uint32	m_nPercent;
	uint32	m_nPixelNumber;
	uint32	m_nActualPixelNumber;

	//Chunk management
	int32		m_nChunkComponentOffset;
	int32		m_nChunkWidth;
	int32		m_nChunkHeight;
	int32		m_nChunkOctreeID;
	uint8		m_nChunkSpanDirection;
	uint8*	m_pData;
	int32		m_nPixelID;
	int32		m_iM1,m_iM2;
	int32		m_iSSNumber;
	int32		m_nSSID;

	//Octree management
	int32				m_nActualOctreeID;
	cOctree*		m_pOctree;
	CRay				m_Ray;
	CIntersectionCacheCluster m_CacheCluster;
	char				m_szDirectory[MAX_PATH];

	//Statistics
	int32		m_nStatChunkNumber;
	int32		m_nStatOctreeChanges;

	int32		m_nUsedMemory;
	CRITICAL_SECTION	m_csData;
};


#endif//CRENDERER_H