

//////////////////////////////////////////////////////////////////////
//
//	Crytek SuperFramework Source code
//	
//	File:PolyBump.cpp
//  Description: Creates a poly bump with 2 models (low and high resolution) as input 
//
//	History: rmal
//	-March 15,2001:Created by Marco Corbetta
//  -July 13,2001: Rewritten from scratch by Marco Corbetta
//  -2/21/2002 modified to work in 3DStudio max as a thread (MM)
//
//////////////////////////////////////////////////////////////////////


#include "stdafx.h"

#include <vector>																			// std::vector
#include <list>																				// std::list
#include <map>																				// std::map
#include "TGA.h"																			// PIX_LoadTGA32
#include "list.h"																			// list2
#include "SimpleIndexedMesh.h"												// CSimpleIndexedMesh	
#include "PolyBump.h"																	// 
#include "PolyBumpWorkerThread.h"											// CPolyBumpWorkerThread
#include "CopyProtection.h"														// ChangeSerialYesNo(),GetSerialFromRegistry(),HowToGetSerialDialog()
#include "KeyGen.h"																		// SimpleCheck() SerialNo
#include "MAC_Address.h"															// NetbiosHelper_CheckCRC
#include "Crytek_Logo.h"															// Crytek Logo
#include "Cry_Math.h"																	//

#include "TriangleInputProxy.h"												// CTriangleInputProxy

#define NORMAL_MATCH 0.0f


typedef unsigned char UBYTE;



#ifdef USE_ADAPTIVEMONTECARLOSAMPLER
	#include "MonteCarloSampling\SamplingSystem.h"				// CSamplingSystem

	// user provided class (typedef StoredSampleType,GetSampleAt(),ModifyPriority())
	class CSampleCalculator
	{
	public:

		Vec3			m_vPos;										//!< surface position
		Vec3			m_vBaseX;									//!< normalized base vector 1
		Vec3			m_vBaseY;									//!< normalized base vector 2
		Vec3			m_vNormal;								//!< normalized normal vector
		float			m_fRayMaxLength;					//!< length of the ray (this could be optimized)
		CPbMesh *	m_pMesh;									//!< pointer to the high detail mesh
		float			m_fCurrentPriority;				//!< for early stopping the progress

		typedef bool StoredSampleType;			// true=sky hidden, false=sky visible 


		StoredSampleType GetSampleAt( float infX, float infY )
		{
			infX*=0.99f;infY*=0.99f;		// to prevent hits with the nearest surface itself
			
			float fZ=(float)sqrt(1.0-sqr(infX)-sqr(infY));

			Vec3 vDir=m_vBaseX*infX + m_vBaseY*infY + m_vNormal*fZ;

			// true=sky hidden, false=sky visible, 
			return( m_pMesh->CalcIntersectionFromTo(m_vPos,m_vPos+vDir*m_fRayMaxLength) );
		}


		//! this method is allowed to scale the value inpTriangle->m_fPriority down to
		//! adjust the adaptive sampling or return(false) to do no further sampling in this triangle
		//! \return true=do further spliting of the triangle, false=no further spliting
		bool ModifyPriority( CSamplingTriangle *inpTriangle )
		{
			assert(inpTriangle);

			m_fCurrentPriority=inpTriangle->m_fPriority;

		//		if(m_fCurrentPriority<0.1f)							// don't reject too early
			{
				bool in=false,out=false;

				CSamplingPoint<StoredSampleType> *ip[3];
				
				ip[0]=(CSamplingPoint<StoredSampleType> *)inpTriangle->GetVertex(0);
				ip[1]=(CSamplingPoint<StoredSampleType> *)inpTriangle->GetVertex(1);
				ip[2]=(CSamplingPoint<StoredSampleType> *)inpTriangle->GetVertex(2);

				if(ip[0]->m_UserData)in=true; else out=true;
				if(ip[1]->m_UserData)in=true; else out=true;
				if(ip[2]->m_UserData)in=true; else out=true;

				if(!(in && out))
					return(false);												// sample only between true and false
			}

			return(true);
		}
	};

	// CSamplingSystem<TriangleCount,EdgeCount,PointCount,SamplePointUserData,SampleCalculator>
	CSamplingSystem<2000,2000,2000,CSampleCalculator> g_sample;
#endif	// USE_ADAPTIVEMONTECARLOSAMPLER

//////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////
int CPbMesh::m_nBumpImageSizeX=0;
int CPbMesh::m_nBumpImageSizeY=0;
int *CPbMesh::m_pnTriPointer=NULL;
unsigned char *CPbMesh::m_pcNormalMap=NULL;




//! /param invVec must not be normalized
static void SetVectorAsRGB( Vec3 invVec, unsigned char *outPtr, const bool inbJitter )
{
	if(inbJitter)				// normal jitter for better quality
	{
		float jitter[3]={ (float)((rand()%1024)-512)/(float)(512*255),			// -1..1 * (1/255)
											(float)((rand()%1024)-512)/(float)(512*255),
											(float)((rand()%1024)-512)/(float)(512*255) };

		invVec*=254/255.0f;																									// 254/255..254/255

		invVec.x+=jitter[0];
		invVec.y+=jitter[1];
		invVec.z+=jitter[2];

		// -1..1
	}

	int iX=(int)((invVec.x+1.0f)*(255.0f/2.0f));		if(iX>255)iX=255;		if(iX<0)iX=0;
	int iY=(int)((invVec.y+1.0f)*(255.0f/2.0f));		if(iY>255)iY=255;		if(iY<0)iY=0;
	int iZ=(int)((invVec.z+1.0f)*(255.0f/2.0f));		if(iZ>255)iZ=255;		if(iZ<0)iZ=0;

	outPtr[0]=(unsigned char)iX;
	outPtr[1]=(unsigned char)iY;
	outPtr[2]=(unsigned char)iZ;
}

// constuctor
CPbMesh::CPbMesh()
{
  m_pMesh=NULL;   
  m_nNumTris=0;
	m_iMaterialID=-1;		// every material

#ifndef USE_RASTERCUBE
	m_pRoot=NULL;
#endif
} 


// destructor
CPbMesh::~CPbMesh()
{
  if(m_pMesh)
  {
    delete [] m_pMesh;
    m_pMesh=NULL;
  }          

#ifndef USE_RASTERCUBE
  if(m_pRoot)
  {
    delete m_pRoot;
    m_pRoot=NULL;
  }
#endif
}


// calculate the size of the bounding sphere
float CPbMesh::CalcBoundingVolume( Vec3 &outMin, Vec3 &outMax )
{
	Vec3 minbox(FLT_MAX,FLT_MAX,FLT_MAX),maxbox(-FLT_MAX,-FLT_MAX,-FLT_MAX);

	// calculate bounding box
	for(int i=0;i!=m_nNumTris;i++)
		m_pMesh[i].ExtendMinMax(minbox,maxbox);

	// calc midpoint
	Vec3 vMidPoint=(minbox+maxbox)*0.5f;
	float Radius2=0.0;

	// calc radius
	for(int i=0;i!=m_nNumTris;i++)
			m_pMesh[i].ExtendSphere(vMidPoint,Radius2);

	outMin=minbox;
	outMax=maxbox;

	return((float)sqrt(Radius2));
}







//calc mesh-ray intersection
// call ClearHitList();m_pRoot->GatherRayHits(invStart,invEnd);		before
//////////////////////////////////////////////////////////////////////
CPbTri *CPbMesh::ChooseNearestAcceptableIntersection( CIntInfoList &inIntersections, const Vec3 &invStart, const Vec3 &invEnd, 
																											const Vec3 &refnormal, Vec3 &respoint )
{
#ifndef USE_RASTERCUBE
	assert(m_pRoot);			if(!m_pRoot)return(0);
#endif


  // find the nearest intersection point with a similar normal
  CPbTri *besttri=NULL;
  float bestdist=3.402823466e+38F;											// maximum possible value

	float mid=(invStart-invEnd).len()*0.5f;

	// find best hit with a good normal
  for(CIntInfoIt i=inIntersections.begin();i!=inIntersections.end();i++)
  {
		CIntersInfo &ci=(*i);

//		float dist2=(float)(fabs(sqrt(ci.m_fDist2)-mid2));
		float dist=(float)(fabs(ci.m_fDist-mid));

    if(dist<bestdist)
    {
			float factor=ci.m_pTri->m_Plane.n*refnormal;

			if(factor>0)
			{
				besttri=ci.m_pTri;
				bestdist=dist;
				respoint=ci.m_Point;
			}
    }
  } //i

  return (besttri);
}



	

//calc mesh-ray intersection
// call ::ClearHitList();m_pRoot->GatherRayHits(invStart,invEnd);		before
//////////////////////////////////////////////////////////////////////
CPbTri *CPbMesh::ChooseNearestIntersection( CIntInfoList &inIntersections, const Vec3 &invStart, const Vec3 &invEnd, 
																						const Vec3 &refnormal, Vec3 &respoint )
{
  // find the nearest intersection point with a similar normal
  CPbTri *besttri=NULL;
  float bestdist=3.402823466e+38F;											// maximum possible value

	float mid2=(invStart-invEnd).len()*0.5f;

	// find best hit with a good normal
  for(CIntInfoIt i=inIntersections.begin();i!=inIntersections.end();i++)
  {
		CIntersInfo &ci=(*i);

//		float dist2=(float)(fabs(sqrt(ci.m_fDist2)-mid2));
		float dist=(float)(fabs(ci.m_fDist-mid2));

    if(dist<bestdist)
    {
			float factor=ci.m_pTri->m_Plane.n*refnormal;

			if(factor>NORMAL_MATCH)
			{
				besttri=ci.m_pTri;
				bestdist=dist;
				respoint=ci.m_Point;
			}
    }
  } //i

  return(besttri);
}



//
// call ClearHitList();m_pRoot->GatherRayHits(p1,p2);		before
// refnormal has to be normalized
CPbTri *CPbMesh::ChooseLatestIntersection( CIntInfoList &inIntersections, const Vec3 &invStart, const Vec3 &invEnd, 
																					 const Vec3 &refnormal, Vec3 &respoint )
{
  //find the latest intersection point with a similar normal
  CPbTri *besttri=NULL;
  float bestdist=-1.0f;        

  for (CIntInfoIt i=inIntersections.begin();i!=inIntersections.end();i++)
  {
		CIntersInfo &ci=(*i);

		assert(ci.m_fDist>=0.0f);

		if(ci.m_fDist>bestdist)
    {
      float factor=ci.m_pTri->m_Plane.n*refnormal;

			if(factor>NORMAL_MATCH)
			{
				besttri=ci.m_pTri;
				bestdist=ci.m_fDist;
				respoint=ci.m_Point;                    
			}
    }
  } //i

  return(besttri);
}










#define EPS  0.00001f

// from nvidia kitchen 
//! /param v0 input [0..2] position vertex 1
//! /param v1 input [0..2] position vertex 2
//! /param v2 input [0..2] position vertex 3
//! /param t0 input [0..1] texture coordinate vertex 1
//! /param t1 input [0..1] texture coordinate vertex 2
//! /param t2 input [0..1] texture coordinate vertex 3
//! /param tangent output vector 1
//! /param binormal output vector 2
bool compute_tangent( const float * v0, const float * v1, const float * v2, 
                      const float * t0, const float * t1, const float * t2, 
                      Vec3 & tangent, Vec3 & binormal )
{
  Vec3 bi, cp, e0, e1;
//  sign = 1.0f;

  tangent  = Vec3(0,0,1);
  binormal = Vec3(0,0,1);

  // x
  e0[0] = v1[0] - v0[0];
  e0[1] = t1[0] - t0[0];
  e0[2] = t1[1] - t0[1];

  e1[0] = v2[0] - v0[0];
  e1[1] = t2[0] - t0[0];
  e1[2] = t2[1] - t0[1];


  cp = e0.Cross(e1);

  if ( fabs(cp[0]) > EPS )
  {
    tangent[0] = -cp[1] / cp[0];
    binormal[0] = -cp[2] / cp[0];
  }

  // y
  e0[0] = v1[1] - v0[1];
  e0[1] = t1[0] - t0[0];
  e0[2] = t1[1] - t0[1];

  e1[0] = v2[1] - v0[1];
  e1[1] = t2[0] - t0[0];
  e1[2] = t2[1] - t0[1];

  cp = e0.Cross(e1);

  if ( fabs(cp[0]) > EPS )
  {
    tangent[1] = -cp[1] / cp[0];
    binormal[1] = -cp[2] / cp[0];
  }

  // z
  e0[0] = v1[2] - v0[2];
  e0[1] = t1[0] - t0[0];
  e0[2] = t1[1] - t0[1];

  e1[0] = v2[2] - v0[2];
  e1[1] = t2[0] - t0[0];
  e1[2] = t2[1] - t0[1];

  cp = e0.Cross(e1);

  if ( fabs(cp[0]) > EPS )
  {
    tangent[2] = -cp[1] / cp[0];
    binormal[2] = -cp[2] / cp[0];
  }

  tangent.Normalize();
  binormal.Normalize();

  return true;
}



// orthogonalize the base vectors
//! /param v0 input [0..2] position vertex 1
//! /param v1 input [0..2] position vertex 2
//! /param v2 input [0..2] position vertex 3
//! /param t0 input [0..1] texture coordinate vertex 1
//! /param t1 input [0..1] texture coordinate vertex 2
//! /param t2 input [0..1] texture coordinate vertex 3
//! /param tangent output vector 1
//! /param binormal output vector 2
//! /param tnormal output vector 3
void compute_tangent_base( const float *v0, const float *v1, const float *v2, 
                      const float *t0, const float *t1, const float *t2, 
                      Vec3 &tangent, Vec3 &binormal, Vec3 &tnormal )
{
	float fA[2]={ t1[0]-t0[0], t1[1]-t0[1] },fB[2]={ t2[0]-t0[0], t2[1]-t0[1] };

	float fOrientation = fA[0]*fB[1]-fA[1]*fB[0];

	compute_tangent(v0,v1,v2,t0,t1,t2,tangent,binormal);

	// make sure they are orthogonal
	tnormal=tangent.Cross(binormal);		tnormal.Normalize();
	binormal=tnormal.Cross(tangent);		binormal.Normalize();
	if(fOrientation<0)tnormal=-tnormal;
}






class CVertexOrderHelper
{
public:

	DWORD					m_PosIndex;										//!< 0xffffffff means unassigned
	DWORD					m_TexIndex;										//!< 0xffffffff means unassigned
	DWORD					m_ShaderIndex;								//!< 
	DWORD					m_NeighbourTriID;							//!< 

	//! constructor
	CVertexOrderHelper( DWORD indwPosIndex, DWORD indwTexIndex, DWORD indwShaderIndex, DWORD indwNeighbourTriID )
	{
		m_PosIndex=indwPosIndex;
		m_TexIndex=indwTexIndex;
		m_ShaderIndex=indwShaderIndex;
		m_NeighbourTriID=indwNeighbourTriID;
	}
};


class CVertexTangentBase
{
public:
	Vec3					m_vTangent;								//!<
	Vec3					m_vBinormal;							//!<
};


// helper to get order for CVertexLoadHelper
struct CVertexOrder: public std::binary_function< CVertexOrderHelper, CVertexOrderHelper, bool>
{
	bool operator() ( const CVertexOrderHelper &a, const CVertexOrderHelper &b ) const
	{
		// first sort by position
		if(a.m_PosIndex<b.m_PosIndex)return(true);								// from 0 to max sorted
		if(a.m_PosIndex>b.m_PosIndex)return(false);

		// then by texture
		if(a.m_TexIndex<b.m_TexIndex)return(true);								// from 0 to max sorted
		if(a.m_TexIndex>b.m_TexIndex)return(false);

		// then by material
		if(a.m_ShaderIndex<b.m_ShaderIndex)return(true);					// from 0 to max sorted
		if(a.m_ShaderIndex>b.m_ShaderIndex)return(false);

		// then by triangle id
		if(a.m_NeighbourTriID<b.m_NeighbourTriID)return(true);		// from 0 to max sorted
		if(a.m_NeighbourTriID>b.m_NeighbourTriID)return(false);

		return(false);
	}
};




// * Build index for tangent space (tangent vector, binormal vector) and build a new index value for every
//   - vertex index
//   - material id
//   - normal index (build from smoothing groups)
//   - texture index
//
// * Calc tangent and binormal per index, sum indices together
//
// * normalize result



// transform the indexed mesh in a convenient format
//////////////////////////////////////////////////////////////////////
void CPbMesh::GetNormalsAndClearBaseVectors( CSimpleIndexedMesh *mesh, int iniMaterialID )
{
	assert(mesh->m_VertCount!=0);			if(mesh->m_VertCount==0)return;

	m_iMaterialID=iniMaterialID;

	m_pMesh=new CPbTri[mesh->m_FaceCount];  

	m_nNumTris=mesh->m_FaceCount;

	// copy data and build triangle data structures (area,area3d,plane)
	{
		CObjNorm  *pNorms=0;
		CPbTri    *pTri=m_pMesh;
		CObjFace	*pTface=mesh->m_pFaces;

		// for every traingle
		for(int k=0;k<mesh->m_FaceCount;k++,pTface++,pTri++)
		{	
			// for every vertex of the triangle
			for(int j=0;j<3;j++)
			{
				assert(pTface->v[j]>=0);
				
				CObjVert *pTvert = &mesh->m_pVerts[pTface->v[j]];

				memcpy(pTri->m_Verts[j],pTvert,sizeof(Vec3));

				if(mesh->m_pCoors)
				{
					CObjCoor *pTcoord = &mesh->m_pCoors[pTface->t[j]];	// uv mapping supplied
	  			pTri->m_fS[j]=pTcoord->s;
	  			pTri->m_fT[j]=pTcoord->t;		
				}
				else
				{
	  			pTri->m_fS[j]=0.0f;											// no uv mapping supplied
	  			pTri->m_fT[j]=0.0f;		
				}

				if(mesh->m_pNorms)												// normals supplied
				{
					pNorms=&mesh->m_pNorms[pTface->n[j]];  		
					memcpy(pTri->m_VertsNormal[j],pNorms,sizeof(Vec3));
				}
				else																			// no normals supplied
				{
					pTri->m_VertsNormal[j]=Vec3(0,0,0);
				}

				pTri->m_vTangent[j]=Vec3(0,0,0);					// clear tangent base vectors
				pTri->m_vBinormal[j]=Vec3(0,0,0);
			} //j

			pTri->RefreshInternals();										// this is neccessray to get rayshooting working
		} //k	
	}
}








void CPbMesh::BuildNormalsAndBaseVectors( CSimpleIndexedMesh *mesh, int iniMaterialID )
{
	assert(mesh);		// must not be 0 for CTriangleInputProxy()

	GetNormalsAndClearBaseVectors(mesh,iniMaterialID);

	CTangentSpaceCalculation<CTriangleInputProxy> tangents;
	CTriangleInputProxy Input(mesh);

	tangents.CalculateTangentSpace(Input);

	// store the calculated data
	{
		CPbTri    *pTri=m_pMesh;
		CObjFace	*pTface=mesh->m_pFaces;

		// for every triangle
		for(DWORD k=0;k<(DWORD)mesh->m_FaceCount;k++,pTri++)
		{	
			DWORD dwBaseIndx[3];

			tangents.GetTriangleBaseIndices(k,dwBaseIndx);

			pTri->m_iTriMaterialID=mesh->m_pFaces[k].shader_id;

			// for every vertex of the triangle
			for(int j=0;j<3;j++)
			{
				float vU[3],vV[3],vN[3];

				tangents.GetBase(dwBaseIndx[j],vU,vV,vN);

				pTri->m_vBinormal[j]=Vec3(vU[0],vU[1],vU[2]);
				pTri->m_vTangent[j]=Vec3(vV[0],vV[1],vV[2]);
				pTri->m_vTNormal[j]=Vec3(vN[0],vN[1],vN[2]);
			}
		}
	}
}







void CPbMesh::BuildNormalsAndBaseVectors( CSimpleIndexedMesh *mesh, CPBCloneMap *inpCloneMap, int iniMaterialID )
{
	assert(inpCloneMap);

	GetNormalsAndClearBaseVectors(mesh,iniMaterialID);

	CObjNorm  *pNorms=0;
	std::map<CVertexOrderHelper,CVertexTangentBase,CVertexOrder>	TangentBases;

	assert(m_pMesh);

	// Pass 1 calculate the tangent vectors, area, normal, per triangle
	{
		CPbTri    *pTri=m_pMesh;
		CObjFace	*pTface=mesh->m_pFaces;

		// Pass 2:
		//transform the data in a convenient format
		for (int k=0;k<mesh->m_FaceCount;k++,pTface++,pTri++)
		{	
      Vec3 vTangent,vBinormal,vTNormal;

			float t0[2] = { pTri->m_fS[0] , pTri->m_fT[0] };		// uv coordinates
			float t1[2] = { pTri->m_fS[1] , pTri->m_fT[1] };
			float t2[2] = { pTri->m_fS[2] , pTri->m_fT[2] };

			compute_tangent_base( (float *)pTri->m_Verts[0],(float *)pTri->m_Verts[1],(float *)pTri->m_Verts[2], t0,t1,t2, vTangent,vBinormal,vTNormal);

/*
			char str[80];
			sprintf(str,"(%.2f %.2f) (%.2f %.2f) (%.2f %.2f)\n",t0[0],t0[1],t1[0],t1[1],t2[0],t2[1]);
			OutputDebugString(str);
			sprintf(str,"(%.2f %.2f %.2f) (%.2f %.2f %.2f) (%.2f %.2f %.2f)\n",v0[0],v0[1],v0[2],v1[0],v1[1],v1[2],v2[0],v2[1],v2[2]);
			OutputDebugString(str);
			sprintf(str,"(%.2f %.2f %.2f) (%.2f %.2f %.2f) (%.2f %.2f %.2f)\n",v0[0],v0[1],v0[2],v1[0],v1[1],v1[2],v2[0],v2[1],v2[2]);
			OutputDebugString(str);
*/

			// fill in the .CLN map information
			if(inpCloneMap)
			{
				float fU[3]={ t0[0],t1[0],t2[0] };
				float fV[3]={ 1.0f-t0[1],1.0f-t1[1],1.0f-t2[1] };
				Vec3 vBase[3];

				vBase[0]=vTangent;
				vBase[1]=vBinormal;
				vBase[2]=vTNormal;
				inpCloneMap->Set(fU,fV,vBase);
			}

			// for every vertex of the triangle
			for(int j=0;j<3;j++)
			{
				CVertexOrderHelper VertexIdx(pTface->v[j],pTface->t[j],pTface->shader_id,k );		
#ifdef _DEBUG
				std::map<CVertexOrderHelper,CVertexTangentBase,CVertexOrder>::iterator iFind = TangentBases.find(VertexIdx);
				assert(iFind==TangentBases.end());
#endif

				// weight by the angle ******************************************

				// calculate angle at this vertex between the triangles sides
				float angle;
				{
					Vec3 vVecA		= pTri->m_Verts[(j+2)%3];
					Vec3 vVecB		= pTri->m_Verts[(j+1)%3];
					Vec3 vVecMid = pTri->m_Verts[j];

					angle=CalcAngleBetween( vVecA-vVecMid,vVecB-vVecMid );
				}
				// weight by the angle
				vTangent*=angle;vBinormal*=angle;
		
				// store the data in the map ******************************************

				CVertexTangentBase Base;
	
				Base.m_vTangent=vTangent;
				Base.m_vBinormal=vBinormal;

				TangentBases[VertexIdx]=Base;
/*
				{
					char str[80];

					sprintf(str,"TangentBases[%d,%d,%d,%d]=(%.2f %.2f %.2f),(..)\n",VertexIdx.m_PosIndex,VertexIdx.m_TexIndex,VertexIdx.m_TexIndex,VertexIdx.m_NeighbourTriID,
							vTangent.x,vTangent.y,vTangent.z);
					OutputDebugString(str);
				}
*/
			} //j
		} //k	
	}


	// for every vertex sum the contributions of the triangles together
	{
		CPbTri    *pTri=m_pMesh;
		CObjFace	*pTface=mesh->m_pFaces;

		// Pass 2: write data to the vertex
		for(DWORD k=0;k<(DWORD)mesh->m_FaceCount;k++,pTface++,pTri++)
		{	
			for(int j=0;j<3;j++)
			{
				CVertexOrderHelper VertexIdx(pTface->v[j],pTface->t[j],pTface->shader_id,0 );	// 0 because I want all neighbours

				std::map<CVertexOrderHelper,CVertexTangentBase,CVertexOrder>::iterator iFind;

				iFind=TangentBases.lower_bound(VertexIdx);

				int iSumCount=0;

				// for each 
				if(iFind!=TangentBases.end())
				for(;;)
				{
//					if(k != (*iFind).first.m_NeighbourTriID)
					if(mesh->IsSmoothBetweenTri(k,(*iFind).first.m_NeighbourTriID))
					{
						CVertexTangentBase &refBase=(*iFind).second;

						pTri->m_vTangent[j]+=refBase.m_vTangent;
						pTri->m_vBinormal[j]+=refBase.m_vBinormal;
						iSumCount++;
					}

					++iFind;
					if(iFind==TangentBases.end())break;
					if((*iFind).first.m_PosIndex!=VertexIdx.m_PosIndex)break;
					if((*iFind).first.m_TexIndex!=VertexIdx.m_TexIndex)break;
					if((*iFind).first.m_ShaderIndex!=VertexIdx.m_ShaderIndex)break;
				}

				// no triangles with smoothing found, so use the data from the triangle
				if(!iSumCount)
				{
					CVertexOrderHelper VertexIdx(pTface->v[j],pTface->t[j],pTface->shader_id,k);
					CVertexTangentBase &refBase=TangentBases[VertexIdx];

					pTri->m_vTangent[j]=refBase.m_vTangent;
					pTri->m_vBinormal[j]=refBase.m_vBinormal;
				}
/*
				{
					char str[80];

					sprintf(str,"dbgCount=%d (%.2f %.2f %.2f)\n",iSumCount,pTri->m_vTangent[j].x,pTri->m_vTangent[j].y,pTri->m_vTangent[j].z);
					OutputDebugString(str);
				}
*/
			}
		}
	}



	// orthonormalize tangent-space base vectors
	{
		CPbTri    *pTri=m_pMesh;
		CObjFace	*pTface=mesh->m_pFaces;

		// for every triangle
		for(DWORD k=0;k<(DWORD)mesh->m_FaceCount;k++,pTri++)
		{	
			Vec3 face_normal=pTri->m_Plane.n;

			// for every vertex of the triangle
			for(int j=0;j<3;j++)
			{
				pTri->m_vTangent[j].Normalize();
				pTri->m_vBinormal[j].Normalize();

				Vec3 tnormal=pTri->m_vTangent[j].Cross(pTri->m_vBinormal[j]);
				tnormal.Normalize();

				// Gram-Schmidt orthogonalization process for B
				// compute the cross product B=NxT to obtain 
				// an orthogonal basis
				pTri->m_vBinormal[j] = tnormal.Cross(pTri->m_vTangent[j]);

				if(tnormal.Dot(face_normal) < 0)
					tnormal = -tnormal;

				pTri->m_vTNormal[j]=tnormal;
			}
		}
	}
}





//draw low res tris pointers
//////////////////////////////////////////////////////////////////////
void CPbMesh::DrawTrisPointers( const bool inbDebug )
{
	// conservative
  for (int k=0;k<m_nNumTris;k++)  
    if(m_iMaterialID==-1 || m_pMesh[k].m_iTriMaterialID==m_iMaterialID)
			m_pMesh[k].DrawTriPointer(k,true);

	if(inbDebug)
		PIX_SaveTGA32("DrawTrisPointersCon.tga",(unsigned char *)CPbMesh::m_pnTriPointer,CPbMesh::m_nBumpImageSizeX,CPbMesh::m_nBumpImageSizeY,false,false);

	// non conservative
	for (k=0;k<m_nNumTris;k++)  
		if(m_iMaterialID==-1 || m_pMesh[k].m_iTriMaterialID==m_iMaterialID)
			m_pMesh[k].DrawTriPointer(k,false);

	if(inbDebug)
		PIX_SaveTGA32("DrawTrisPointersBoth.tga",(unsigned char *)CPbMesh::m_pnTriPointer,CPbMesh::m_nBumpImageSizeX,CPbMesh::m_nBumpImageSizeY,false,false);
}



// optimizable
// works only for RGB 
void ExpandBitmap( DWORD indwWidth, DWORD indwHeight, DWORD *inpSrc, DWORD *inpDst, bool inbDiagonal, DWORD indwColorKey )
{
	DWORD x,y;
	// expand the color information
	for(y=0;y<indwHeight;y++)
	{
		for(x=0;x<indwWidth;x++)
		{
			DWORD s=*inpSrc;

			if(s==indwColorKey)																														// at this point
			{
				DWORD dwRed=0,dwGreen=0,dwBlue=0,nCount=0;

				if(x!=0)if((s=inpSrc[-1])!=indwColorKey)																		// left
				{ 
					dwRed+=2*((s>>16)&0xff);dwGreen+=2*((s>>8)&0xff);dwBlue+=2*(s&0xff);	nCount+=2; 

					if(inbDiagonal)
					{
						if(y!=0)if((s=inpSrc[-(int)indwWidth-1])!=indwColorKey)									// up left
						{ 
							dwRed+=(s>>16)&0xff;dwGreen+=(s>>8)&0xff;dwBlue+=s&0xff;	nCount++; 
						}

						if(y!=indwHeight-1)if((s=inpSrc[indwWidth-1])!=indwColorKey)						// down left
						{ 
							dwRed+=(s>>16)&0xff;dwGreen+=(s>>8)&0xff;dwBlue+=s&0xff;	nCount++;
						}
					}
				}

				if(x!=indwWidth-1)if((s=inpSrc[1])!=indwColorKey)														// right
				{ 
					dwRed+=2*((s>>16)&0xff);dwGreen+=2*((s>>8)&0xff);dwBlue+=2*(s&0xff);	nCount+=2; 

					if(inbDiagonal)
					{
						if(y!=0)if((s=inpSrc[-(int)indwWidth+1])!=indwColorKey)									// up right
						{ 
							dwRed+=(s>>16)&0xff;dwGreen+=(s>>8)&0xff;dwBlue+=s&0xff;	nCount++; 
						}

						if(y!=indwHeight-1)if((s=inpSrc[indwWidth+1])!=indwColorKey)						// down right
						{ 
							dwRed+=(s>>16)&0xff;dwGreen+=(s>>8)&0xff;dwBlue+=s&0xff;	nCount++;
						}
					}
				}

				if(y!=0)if((s=inpSrc[-(int)indwWidth])!=indwColorKey)												// up
				{ 
					dwRed+=2*((s>>16)&0xff);dwGreen+=2*((s>>8)&0xff);dwBlue+=2*(s&0xff);	nCount+=2; 
				}

				if(y!=indwHeight-1)if((s=inpSrc[indwWidth])!=indwColorKey)									// down
				{ 
					dwRed+=2*((s>>16)&0xff);dwGreen+=2*((s>>8)&0xff);dwBlue+=2*(s&0xff);	nCount+=2; 
				}

				if(nCount)																																// there was a neighbour
				{
					dwRed/=nCount;dwGreen/=nCount;dwBlue/=nCount;
					s=(dwRed<<16) | (dwGreen<<8) | (dwBlue);
				}
			}

			*inpDst++=s;
			inpSrc++;
		}
	}
}



//! \param iniQuality 0=worst..10=very good
float CalcAccessability( const Vec3 invPos, const Vec3 invNormal, CPbMesh *inpHighMesh, 
												 float infRayLength, int iniQuality, Vec3 &outvAverageNormal, const CPbTri *inpIgnoreObject )
{
	Vec3	vFrom=invPos+invNormal*infRayLength*0.002f,vBaseA,vBaseB;
	GetOtherBaseVec(invNormal,vBaseA,vBaseB);															// base vectors of the surface

	outvAverageNormal=Vec3(0,0,0);																				// not calculated

#ifdef USE_ADAPTIVEMONTECARLOSAMPLER

	CSampleCalculator &prop=g_sample.ProblemConnection();

	float w=(2*PI)*(rand()%1024)*(1.0f/1024.0f);													// random rotation in circle
	float s=(float)sin(w),c=(float)cos(w);

	prop.m_vBaseX=vBaseA*s+vBaseB*c;
	prop.m_vBaseY=vBaseA*c-vBaseB*s;
	prop.m_vNormal=highresnorm;
	prop.m_fRayMaxLength=infRayMaxLength;
	prop.m_vPos=vFrom;
	prop.m_pMesh=inpHighMesh;

//				g_sample.ResetDisc(8,true);		// 8 circlesteps, hemisphere sampling
	g_sample.ResetDisc(12,true);		// 6 circlesteps, hemisphere sampling

	{
//					const float full=4.0f*PI;		// hemisphere*2 (*2 because priority=area*2)
//					const float compare=full/4096.0f;
		const float compare=0.005f;

		for(int i=0;i<5*100;i++)
		{
			if(!g_sample.SplitHightestPriorityTriangle())
				break;

			if(prop.m_fCurrentPriority<compare)
				break;
		}
	}

	int iCount;
	float fSum=0.0f,fMax;

	CSamplingPoint<CSampleCalculator::StoredSampleType> *points=g_sample.CalculatePointWeights(iCount,fMax);
	assert(fMax>0.0f);

	{
		for(int i=0;i<iCount;i++)
			if(!points[i].m_UserData)fSum+=points[i].m_fWeight;
	}

	if(fMax==0.0f)fMax=1.0f;	// to prevent divide by zero

	return(fSum/fMax);

#else	// USE_ADAPTIVEMONTECARLOSAMPLER

	// hemisphere lighting implemented with optimized algorithm (fast, good approximation)
	//
	// algorithm works like this:
	//
	//  To sample the whole hemisphere by dividing it into wedges (nAngleMaxCount).
	//  For every wedge a shadow horizon is searched (binary search, nTiltMaxCount times => resolution:2^nTiltMaxCount) 
	//  for good results (2^nTiltMaxCount) should be nearly the same as (nAngleMaxCount*3.14)
	float fSkyArea=0.0f;		
	float walker=gf_PI_MUL_2*(rand()%1024)*(1.0f/1024.0f);												// random rotation ic circle
	int nTiltMaxCount=iniQuality;
	int nAngleMaxCount=12;
	const float walkerstep=gf_PI_MUL_2/(float)nAngleMaxCount;

	for(int w=0;w<nAngleMaxCount;w++,walker+=walkerstep)
	{
		float s=sinf(walker),c=cosf(walker);
		float tiltanglepart;

		{
			float part=0.5f;					// 0..1
			float deltapart=0.25f;		//

			for(int t=0;t<nTiltMaxCount;t++)
			{
				float tilt=sinf(part*gf_PI_DIV_2);
				float rndx,rndy,rndz;

				rndx=s*tilt; rndy=c*tilt; rndz=(float)sqrt(1.0f-tilt);

				Vec3 vDir=vBaseA*rndx + vBaseB*rndy + invNormal*rndz;

				if(!inpHighMesh->CalcIntersection(vFrom,vFrom+vDir*infRayLength,inpIgnoreObject))
					part+=deltapart;
					else
					part-=deltapart;

				deltapart*=0.5f;
			}

			tiltanglepart=part;
		}
			
		// calculate the average normal - simple algorithm but this way it was easy to implement
		float fWedgeArea=(1.0f/(float)nAngleMaxCount)*(tiltanglepart*tiltanglepart);
		{
			float tiltdeltaanglepart=0.5f/(float)(1<<iniQuality);

			for(float fPart=0;fPart<tiltanglepart;fPart+=tiltdeltaanglepart)
			{
				assert(fPart<=1.1f);		// fPart 0..1
				float tilt=sinf(fPart*gf_PI_DIV_2);
				float rndx,rndy,rndz;

				rndx=s*tilt; rndy=c*tilt; rndz=(float)sqrt(1.0f-tilt);

				Vec3 vDir=vBaseA*rndx + vBaseB*rndy + invNormal*rndz;

				outvAverageNormal+=vDir * fWedgeArea;		// stronger average normal (more realistic)
			}
		}

		fSkyArea+=fWedgeArea;
	}

	// correct the average normal (no surfece hit -> unit length normal)
	outvAverageNormal.Normalize();
	outvAverageNormal*=max(invNormal*outvAverageNormal,0);					// the more the normal is rotated - the more is the length reduced - less specular
	outvAverageNormal*=1.0f-(1.0f-fSkyArea)*(1.0f-fSkyArea);				// better looking - not physical correct 

	return(fSkyArea);

#endif	// USE_ADAPTIVEMONTECARLOSAMPLER

/*
	// hemisphere lighting implemented with simple Monte-Carlo sampling (slow but correct)
	int nCount=0;
	const int nMaxCount=64*6;
	for(int i=0;i<nMaxCount;i++)		// nMaxCount samples
	{
		Vec3 vTo;

		// get ray in a sphere
		do
		{
			vTo=Vec3((rand()%1024)*(1.0f/512.0f)-1.0f,
								(rand()%1024)*(1.0f/512.0f)-1.0f,
								(rand()%1024)*(1.0f/512.0f)-1.0f);

		} while(vTo.Length2()>1.0f);

		if(vTo*highresnorm<0.0f)vTo*=-1.0f;			// wrong side of hermisphere, flip it

		// get ray on a sphere
		vTo.Normalize();

		vTo=vFrom+vTo*infRayScale;

		if(!inpHighMesh->CalcIntersection(vFrom,vTo))
			nCount++;
	}
	*outpAccessability=(float)nCount/(float)nMaxCount;
*/
}



// /param outpAccessability 0 or pointer to a float, will be in the range [0..1]
// /param outOccDirection 0 if is outpAccessability 0 as well or pointer to the Vec3
bool GetSamplePoint( CPbTri *inpTriLow, float infRayLength, float infRayMaxLength, CPbMesh *inpLowMesh,
										 CPbMesh *inpHighMesh, float infS, float infT, CPolyBumpWorkerThread *inpData,
										 Vec3 &outNormal, float *outpAccessability, Vec3 *outOccDirection, float *outfDisplacement, bool inbDoCLipping )
{
	CPbTri *cb=NULL;
	Vec3 lowresnorm,mid,end,highresnorm,respoint;
	Vec3 vLowResTangent,vLowResBinormal,vLowResTNormal;
	Vec3 vOcclusionDirection;																					// only used if outOccDirection!=0

	DWORD dwWidth=inpData->m_Properties.GetOutputWidth();
	DWORD dwHeight=inpData->m_Properties.GetOutputHeight();

	float deltamapx=1.0f/dwWidth;
	float deltamapy=1.0f/dwHeight;

	//get interpolated point and normal using barycentric coords from texel's uv
	inpTriLow->CalcBarycentricCoordsFromUV( infS,infT,respoint,lowresnorm,
																					vLowResTangent,vLowResBinormal,vLowResTNormal,
																					inbDoCLipping);

	float fNormalScaleFactor=1.0;

	{
		float fLen=lowresnorm.Length();
	
		if(fLen!=0.0f) fNormalScaleFactor = 1.0f/fLen;

		lowresnorm*=fNormalScaleFactor;		// is neccessary because result from CalcBarycentricCoordsFromUV is unnormalized
	}


	mid=respoint;

	// m.m. optimization (I shoot only one ray - because both rays have the same direction,
	// the orientation doesnt matter )
	end=mid+lowresnorm*infRayLength;
	Vec3 begin=mid-lowresnorm*infRayLength;

	if(inpData->m_Properties.m_nHitMode==3)		// best hit2
	{
#ifndef USE_RASTERCUBE
		assert(inpLowMesh->m_pRoot);
#endif

		float fMinLength=infRayLength;

		CIntInfoList hitlist;				hitlist.reserve(64);
		
		inpLowMesh->GatherRayHits(mid,end,hitlist,0);																							// get unsorted list
	    
		for(CIntInfoIt i=hitlist.begin();i!=hitlist.end();i++)
		{
			CIntersInfo &ci=*(hitlist.begin());

			if(ci.m_pTri==inpTriLow)continue;																											// I hit myself

			float factor=ci.m_pTri->m_Plane.n*lowresnorm;

			if(factor<=NORMAL_MATCH)continue;																											// only normals that points against me

//			Vec3 vLowHitNormal;

//			ci.m_pTri->CalcBarycentricCoordsFromPoint(ci.m_Point,vLowHitNormal);
//			float flLength=(float)sqrt(ci.m_fDist2);
			float flLength=ci.m_fDist;

			if(flLength<fMinLength)
			{
				fMinLength=flLength;
				// hit also lowres, if there is a hit, 60% to this hit is used to cast a ray for the highres
				end=mid+lowresnorm*fMinLength*0.8f;
			}
		}
	}

	CIntInfoList hitList;			hitList.reserve(64);

	// used by CalcIntersectionAndChoose...	
  inpHighMesh->GatherRayHits(begin,end,hitList,0);																// fill list

	switch(inpData->m_Properties.m_nHitMode)
	{	
		// nearest hit
		case 0: cb=inpHighMesh->ChooseNearestIntersection(hitList,begin,end,lowresnorm,respoint);break;
		// latest hit
		case 1: cb=inpHighMesh->ChooseLatestIntersection(hitList,begin,end,lowresnorm,respoint);break;
		// besthit2
		case 3: cb=inpHighMesh->ChooseLatestIntersection(hitList,begin,end,lowresnorm,respoint);break;
	}

	if(!cb) cb=inpHighMesh->ChooseNearestAcceptableIntersection(hitList,begin,end,lowresnorm,respoint);

	if(cb) cb->CalcBarycentricCoordsFromPoint(respoint,highresnorm,false);
		else return(false);																														//no tri at this texel

	// bump mapping
	{
		float dfX,dfY;

		if(inpData->GetBumpMapDerivate(infS,1.0f-infT,dfX,dfY))	// is bumpmap supplied?
		{
			Vec3 point12,point21;
			Vec3 vUnused;		// unused

			float stepx=deltamapx*0.1f;
			float stepy=deltamapy*0.1f;

			inpTriLow->CalcBarycentricCoordsFromUV(infS+stepx,infT,point21,vUnused,vUnused,vUnused,vUnused,inbDoCLipping);
			inpTriLow->CalcBarycentricCoordsFromUV(infS,infT+stepy,point12,vUnused,vUnused,vUnused,vUnused,inbDoCLipping);

			Vec3 dir12=point12-mid;
			Vec3 dir21=point21-mid;

			if(CPbTri::CalcArea(inpTriLow->m_fS,inpTriLow->m_fT)<0.0f)
				{ dir12=-dir12;dir21=-dir21; }

			if((dir12|dir12)>0.00001f && (dir21|dir21)>0.00001f)					// 
			{
				dir12.Normalize();
				dir21.Normalize();

				Matrix33 mat12;		mat12.SetRotationAA33(dfX*6.283f * gf_DEGTORAD,dir12);		// GetRotation should be static
				Matrix33 mat21;		mat21.SetRotationAA33(dfY*6.283f * gf_DEGTORAD,dir21);		// GetRotation should be static

				highresnorm = mat12*highresnorm + mat21*highresnorm;						// trick to make the rotation invariant to its position
				highresnorm.Normalize();
			}
		}
	}

	// then get the interpolated normal on the high poly mesh using barycentric coords
	// calculate one accessibility pixel
	if(outpAccessability)																								// != means accessability should be calculated
	{
		*outpAccessability = CalcAccessability(respoint,highresnorm,inpHighMesh,infRayMaxLength,5,vOcclusionDirection,cb);
	}
	else
	{
		assert(outOccDirection==0);
	}

	if(outfDisplacement)
	{
		if(inpData->m_Properties.m_bDisplaceNPatch)												// N-Patch displacement
		{
			Vec3 vPNPos,vPNNorm;

			inpTriLow->CalcPNTriangleData(infS,infT,true,vPNPos,vPNNorm,inpData->m_Properties.m_bDisplaceQuadratic);

			*outfDisplacement = vPNNorm*(respoint-vPNPos);
		}
		else																															// non N-Patch displacement
		{
			*outfDisplacement = lowresnorm*(respoint-mid);
		}
	}


	// convert to tangent space
	if(inpData->m_Properties.m_bObjectSpace)
	{
		// switch red and blue channel because objectspace and clonespace color scheme has changed V2.50
		outNormal=Vec3(highresnorm.z,highresnorm.y,highresnorm.x);	
		if(outOccDirection)
			(*outOccDirection) = Vec3(vOcclusionDirection.z,vOcclusionDirection.y,vOcclusionDirection.x);
	}
	else
	{
		// convert from objectspace bumpmapping to tangentspace bumpmapping

		// common blue tangentspace normalmap

		// build transform matrix
		Matrix33 mTangentSpace;
		
		mTangentSpace.SetMatFromVectors33(vLowResTNormal,-vLowResTangent,vLowResBinormal);	// blue(bright means to viewer), green(bright means down,dark means up), red(bright means right, dark means left)
		Matrix33 mInvTangentSpace=mTangentSpace;
		mInvTangentSpace.Invert33();

		// transform highresnorm
		{
			float fLength=highresnorm.Length();		if(fLength>1.0f)fLength=1.0f;

			outNormal=mInvTangentSpace*highresnorm;
			outNormal.Normalize();
			outNormal*=fLength;
		}

		// transform vOcclusionDirection
		if(outOccDirection)
		{
			float fLength=vOcclusionDirection.Length();		if(fLength>1.0f)fLength=1.0f;

			(*outOccDirection)=mInvTangentSpace*vOcclusionDirection;
			(*outOccDirection).Normalize();
			(*outOccDirection)*=fLength;
		}

//		outNormal=vLowResTangent;		// view Tangent vector in normalmap
//		outNormal=vLowResBinormal;	// view Binormal vector in normalmap
	}

	return(true);
}



static bool Difference( const Vec3 inA, const Vec3 inB, const float infTreshold )
{
		float dist2=(inA-inB).len2();

		return(dist2>=infTreshold);
}



static bool DoHereAntialias( unsigned char *pc, DWORD x, DWORD y, DWORD dwWidth, DWORD dwHeight, int *ppn, CPbMesh *pLowMesh, const float fAntialiasTreshold )
{
	Vec3 normal;

	normal=Vec3( (pc[0]*2.0f/255.0f)-1.0f,															// reference
								(pc[1]*2.0f/255.0f)-1.0f,
								(pc[2]*2.0f/255.0f)-1.0f );

	if(x!=0)																														// left
	{
		CPbTri *cb_left=&pLowMesh->m_pMesh[*(ppn-1)];

		Vec3 left=Vec3( (pc[-3+0]*2.0f/255.0f)-1.0f,
											(pc[-3+1]*2.0f/255.0f)-1.0f,
											(pc[-3+2]*2.0f/255.0f)-1.0f );

		if(Difference(normal,left,fAntialiasTreshold))return(true);
	}

	if(y!=0)																														// top
	{
		CPbTri *cb_top=&pLowMesh->m_pMesh[*(ppn-dwWidth)];

		Vec3 top=Vec3( (pc[-(int)dwWidth*3+0]*2.0f/255.0f)-1.0f,	
										 (pc[-(int)dwWidth*3+1]*2.0f/255.0f)-1.0f,
										 (pc[-(int)dwWidth*3+2]*2.0f/255.0f)-1.0f );

		if(Difference(normal,top,fAntialiasTreshold))return(true);
	}

	if(x!=dwWidth-1)																										// right
	{
		CPbTri *cb_right=&pLowMesh->m_pMesh[*(ppn+1)];

		Vec3 right=Vec3( (pc[3+0]*2.0f/255.0f)-1.0f,
											 (pc[3+1]*2.0f/255.0f)-1.0f,
											 (pc[3+2]*2.0f/255.0f)-1.0f );

		if(Difference(normal,right,fAntialiasTreshold))return(true);
	}

	if(y!=dwHeight-1)																										// down
	{
		CPbTri *cb_down=&pLowMesh->m_pMesh[*(ppn+dwWidth)];

		Vec3 down=Vec3( (pc[dwWidth*3+0]*2.0f/255.0f)-1.0f,	
											(pc[dwWidth*3+1]*2.0f/255.0f)-1.0f,
											(pc[dwWidth*3+2]*2.0f/255.0f)-1.0f );

		if(Difference(normal,down,fAntialiasTreshold))return(true);
	}

	return(false);
}



//create a polybump with 2 meshes as input
//////////////////////////////////////////////////////////////////////
bool CreateBumpMap( CPolyBumpWorkerThread *inpData )
{
	DWORD dwExpandTextureInPixels=(1 << inpData->m_Properties.m_iExpandTextureMode)/2;

	DWORD	dwTexelCount=0,dwCurrentTexelNo=0;									// for calculate the progress
	bool bEvaluationVersion=false;

	inpData->m_bUserHasStopped=false;

	CSimpleIndexedMesh *m1=inpData->GetLowMesh();	assert(m1);			if(!m1)return(false);
	CSimpleIndexedMesh *m2=inpData->GetHighMesh();	assert(m2);		if(!m2)return(false);

	DWORD dwWidth=inpData->m_Properties.GetOutputWidth();
	DWORD dwHeight=inpData->m_Properties.GetOutputHeight();

	{
		char str[256];

		sprintf(str,"Width: %d, Height: %d\n",dwWidth,dwHeight);
		inpData->m_Properties.m_bSummariesString+=str;
	}

	// init clonespace map
	inpData->m_CloneMap.Create(dwWidth,dwHeight);

	{
		DWORD m,d,y;
		UBYTE HardwareID[4];

		if(!CCrytekKeyGen::FurtherCheck(g_szSerialNo,"PB",m,d,y,HardwareID)											// Check serial number PB=PolyBump
		|| !NetbiosHelper_CheckCRC(HardwareID))
		{
			char str2[2048];

			sprintf(str2,"Unregistered Version (HardwareID dependant SerialNo is missing or wrong)\n\n"\
					"This trial version is is limited to %dx%d texture size and max. %d triangles\n\n"\
					"Check the AboutBox for information about getting the registered version.",TRIALVERSION_MAXTEXTURESIZE,TRIALVERSION_MAXTEXTURESIZE,TRIALVERSION_MAXHIGHPOLYCOUNT);
			MessageBox(0,str2,"PolyBumpPlugin",MB_OK);

			if(dwWidth>TRIALVERSION_MAXTEXTURESIZE || dwHeight>TRIALVERSION_MAXTEXTURESIZE || m2->m_FaceCount>TRIALVERSION_MAXHIGHPOLYCOUNT)
				return(false);

			bEvaluationVersion=true;
		}
	}

	// allocate space for the accessilibity map ---------------------------------------
	unsigned char *pAccessBitmap=0;
	
	if(inpData->m_Properties.m_bOutputAccessability)
	{
		pAccessBitmap=new unsigned char [dwWidth*dwHeight];
		memset(pAccessBitmap,0,dwWidth*dwHeight);
	}

	// allocate space for the displacement map ---------------------------------------
	float *pDisplaceBitmap=0;
	
	if(inpData->m_Properties.m_bOutputDisplace)
	{
		pDisplaceBitmap=new float[dwWidth*dwHeight];

		// clear displacement map
		float *pPtr=pDisplaceBitmap;

		for(DWORD i=0;i<dwWidth*dwHeight;i++)
			*pPtr++=FLT_MAX;
	}

	
  // allocate space for the occlusion direction map ---------------------------------------
	unsigned char *pOcclusionDirection=0;

#ifndef USE_REDUCEDFORPUBLIC
	if(inpData->m_Properties.m_bOutputAccessability)
	if(inpData->m_Properties.m_bOutputOccDirection)
	{
	  pOcclusionDirection=new unsigned char [dwWidth*dwHeight*3];
		memset(pOcclusionDirection,0,dwWidth*dwHeight*3);
	}
#endif

  // allocate space for the normal map ---------------------------------------

  CPbMesh::m_pcNormalMap=new unsigned char [dwWidth*dwHeight*3];
	memset(CPbMesh::m_pcNormalMap,0,dwWidth*dwHeight*3);
  
  CPbMesh::m_pnTriPointer=new int[dwWidth*dwHeight];  
  

	// -------------------------------------------------------------------------------

  CPbMesh::m_nBumpImageSizeX=dwWidth;
  CPbMesh::m_nBumpImageSizeY=dwHeight;
	float deltamapx=1.0f/dwWidth;
	float deltamapy=1.0f/dwHeight;

	CPbMesh *pLowMesh=new CPbMesh;

	// build low poly sphere hierarchie
	{
		if(inpData->m_Properties.m_bOutputClonemap)
			pLowMesh->BuildNormalsAndBaseVectors(m1,&inpData->m_CloneMap,inpData->m_Properties.m_iMaterialID);
		 else
			pLowMesh->BuildNormalsAndBaseVectors(m1,inpData->m_Properties.m_iMaterialID);

		{
			char str[256];

			sprintf(str,"LowRes triangle count: %d\n",pLowMesh->m_nNumTris);
		  inpData->m_Properties.m_bSummariesString+=str;
		}

		pLowMesh->BuildRayAccelerator(inpData->m_Properties.m_bOutputDebugInfo);
	}

	Vec3 vMin,vMax;
	float fLowPolyObjectSize=pLowMesh->CalcBoundingVolume(vMin,vMax);
	float rayscale=inpData->m_Properties.m_fRayLength*fLowPolyObjectSize*2.0f;	// *2.0f because diameter not radius should be used

  CPbMesh *pHighMesh=NULL;

	// build RayAccelerator
	{
    pHighMesh=new CPbMesh;
    pHighMesh->GetNormalsAndClearBaseVectors(m2,inpData->m_Properties.m_iMaterialID);

		{
			char str[256];

			sprintf(str,"HighRes triangle count: %d\n",pHighMesh->m_nNumTris);
		  inpData->m_Properties.m_bSummariesString+=str;
		}

    inpData->m_Properties.m_bSummariesString+=pHighMesh->BuildRayAccelerator(inpData->m_Properties.m_bOutputDebugInfo);
  }


  //reset pointers
  for (DWORD k=0;k<dwWidth*dwHeight;k++)
    CPbMesh::m_pnTriPointer[k]=-1;	

	if(!inpData->ThreadPollMessages(0,"Init")) inpData->m_bUserHasStopped=true;

  //draw tris pointer to keep track of the low mesh triangles
	if(!inpData->m_bUserHasStopped)
		pLowMesh->DrawTrisPointers(inpData->m_Properties.m_bOutputDebugInfo);

	{
		DWORD m,d,y;
		UBYTE HardwareID[4];

		if(!CCrytekKeyGen::FurtherCheck(g_szSerialNo,"PB",m,d,y,HardwareID)											// Check serial number PB=PolyBump
		|| !NetbiosHelper_CheckCRC(HardwareID))
		{
			if(dwWidth>TRIALVERSION_MAXTEXTURESIZE || dwHeight>TRIALVERSION_MAXTEXTURESIZE || m2->m_FaceCount>TRIALVERSION_MAXHIGHPOLYCOUNT)
				inpData->m_bUserHasStopped=true;
		}
	}



	// calculate texel count
	{
		for(DWORD y=0;y<dwHeight;y++)
			for(DWORD x=0;x<dwWidth;x++)
				if(CPbMesh::m_pnTriPointer[x+y*dwWidth]!=-1)dwTexelCount++;

		dwTexelCount++;		// to prevent divide by zero
	}

	if(!inpData->m_bUserHasStopped)
		if(!inpData->ThreadPollMessages(1,"Build")) inpData->m_bUserHasStopped=true;

	//for ray-tracing
	Vec3 start,end;
	Vec3 inters1,inters2;


	if(!inpData->m_bUserHasStopped)
	if(inpData->m_Properties.DoNonAAStep())
	{
		int *ppn=CPbMesh::m_pnTriPointer;
		unsigned char *pc=CPbMesh::m_pcNormalMap;
		float mapt=deltamapy*0.5f;											// correct half pixel
		for(DWORD y=0;y<dwHeight;y++,mapt+=deltamapy)
		{    
			const char *szInfoText=	inpData->m_Properties.m_iAntiAliasingMode==0 ? "Pass 1/1" : "Pass 1/2";

			if(!inpData->ThreadPollMessages((dwCurrentTexelNo*100.0f)/(float)dwTexelCount,szInfoText)){ inpData->m_bUserHasStopped=true;break; }

			float maps=deltamapx*0.5f;																					// correct half pixel
			for(DWORD x=0;x<dwWidth;x++,ppn++,pc+=3,maps+=deltamapx)
			{    
				if(*ppn<0)
					continue; //no tri at this texel

				// is this texel on a border?
				bool bBorder=false;
				{
					if(x!=0)if(ppn[-1]<0)bBorder=true;
					if(y!=0)if(ppn[-(int)dwWidth]<0)bBorder=true;
					if(x!=dwWidth-1)if(ppn[1]<0)bBorder=true;
					if(y!=dwHeight-1)if(ppn[dwWidth]<0)bBorder=true;
				}

				dwCurrentTexelNo++;																									// next texel (for progress)

				CPbTri *cb_low=&pLowMesh->m_pMesh[*ppn];      
				Vec3 normal;
				float displacement;
				bool bDoClipping=bBorder;

				if(pAccessBitmap)
				{
					Vec3 occdirection;
					float accessfloat;

					if(!GetSamplePoint(cb_low,rayscale,fLowPolyObjectSize,pLowMesh,pHighMesh,maps,mapt,inpData,normal,&accessfloat,&occdirection,&displacement,bDoClipping))
						continue;

					pAccessBitmap[x+y*dwWidth]=(UBYTE)(accessfloat*255.0f);

					if(pOcclusionDirection)
					{
						float fLength=occdirection.Length(); // make the vector less small =>brighter result (interreflections might brighten up the object)

						float fAddLength= (1-fLength)*inpData->m_Properties.m_fOccBrighter;					

						assert(fAddLength>=0.0f && fAddLength<=1.0f);

						occdirection=occdirection + fAddLength * normal;		// result can not be longer than 1

						SetVectorAsRGB(occdirection,&pOcclusionDirection[(x+y*dwWidth)*3],inpData->m_Properties.m_bNormalJitter);
					}
				}
				else
				{
					if(!GetSamplePoint(cb_low,rayscale,fLowPolyObjectSize,pLowMesh,pHighMesh,maps,mapt,inpData,normal,0,0,&displacement,bDoClipping))
						continue;
				}

				if(pDisplaceBitmap)
					pDisplaceBitmap[x+y*dwWidth]=displacement;

				/*
				assert(normal.x>=-1.0f);
				assert(normal.x<=1.0f);
				assert(normal.y>=-1.0f);
				assert(normal.y<=1.0f);
				assert(normal.z>=-1.0f);
				assert(normal.z<=1.0f);*/

				//finally map the normal to rgb space
				SetVectorAsRGB(normal,pc,inpData->m_Properties.m_bNormalJitter);


#ifdef PLUGIN_FOR_MAYA
				// x and z is flipped, because of different color coding of max and maya
				{
					int swap=pc[0];	pc[0]=pc[2];pc[2]=swap;
				}
#endif // PLUGIN_FOR_MAYA

			} //x
		} //y
	}
	
	if(!inpData->m_bUserHasStopped)
	if(inpData->m_Properties.m_iAntiAliasingMode)
	{
		float fAntiAliasStep,fAntialiasTreshold;

		inpData->m_Properties.GetAntialiasingParam(fAntiAliasStep,fAntialiasTreshold);

		dwCurrentTexelNo=0;																										// start progress again at 0%
		dwTexelCount=0;

		// count the texels for antialiasing
		int *ppn=CPbMesh::m_pnTriPointer;
		unsigned char *pc=CPbMesh::m_pcNormalMap;
		for(DWORD y=0;y<dwHeight;y++)
		for(DWORD x=0;x<dwWidth;x++,ppn++,pc+=3)
		{    
			if (*ppn<0)continue;																								// no tri at this texel

			if(DoHereAntialias(pc,x,y,dwWidth,dwHeight,ppn,pLowMesh,fAntialiasTreshold))
				dwTexelCount++;
		}



		ppn=CPbMesh::m_pnTriPointer;
		pc=CPbMesh::m_pcNormalMap;
		
		// adaptive Antialiasing
		float mapt=deltamapy*0.5f;																						// correct half pixel
		for(DWORD y=0;y<dwHeight;y++,mapt+=deltamapy)
		{    
			const char *szInfoText=inpData->m_Properties.DoNonAAStep() ? "Pass 2/2" : "Pass 1/1";

			if(!inpData->ThreadPollMessages((dwCurrentTexelNo*100.0f)/(float)dwTexelCount,szInfoText)){ inpData->m_bUserHasStopped=true;break; }

			float maps=deltamapx*0.5f;																					// correct half pixel
			for(DWORD x=0;x<dwWidth;x++,ppn++,pc+=3,maps+=deltamapx)
			{    
				if(*ppn<0)continue;																								// no tri at this texel

				if(!DoHereAntialias(pc,x,y,dwWidth,dwHeight,ppn,pLowMesh,fAntialiasTreshold))continue;

				Vec3 normal=Vec3((pc[0]*2.0f/255.0f)-1.0f,(pc[1]*2.0f/255.0f)-1.0f,(pc[2]*2.0f/255.0f)-1.0f);

				CPbTri *cb_low=&pLowMesh->m_pMesh[*ppn];

				dwCurrentTexelNo++;																									// next texel (for progress)

				float Sumaccessfloat=0.0f,Sumdisplacefloat=0.0f;
				float SumSamples=0.0f;
				Vec3 SumNormal(0,0,0);
				Vec3 SumOccDirection(0,0,0);
				float stepherex=fAntiAliasStep*deltamapx;
				float stepherey=fAntiAliasStep*deltamapy;

				bool bBorder=false;

				// is this texel on a border?
				{
					if(x!=0)if(ppn[-1]<0)bBorder=true;
					if(y!=0)if(ppn[-(int)dwWidth]<0)bBorder=true;
					if(x!=dwWidth-1)if(ppn[1]<0)bBorder=true;
					if(y!=dwHeight-1)if(ppn[dwWidth]<0)bBorder=true;
				}

				bool bDoClipping=bBorder;

				for(float suby=(fAntiAliasStep*0.5f-0.5f)*deltamapy;suby<=0.5f*deltamapy;suby+=stepherey)
				for(float subx=(fAntiAliasStep*0.5f-0.5f)*deltamapx;subx<=0.5f*deltamapx;subx+=stepherex)
				{
					float jitterx=0.0f,jittery=0.0f,displacement;

					if(inpData->m_Properties.DoAAJittering())
					{
						jitterx=((rand()%1024)*(1.0f/1024.0f)-0.5f)*stepherex;
						jittery=((rand()%1024)*(1.0f/1024.0f)-0.5f)*stepherey;
					}

					if(pAccessBitmap)
					{
						Vec3 occdirection;
						float accessfloat;

						if(!GetSamplePoint(cb_low,rayscale,fLowPolyObjectSize,pLowMesh,pHighMesh,maps+subx+jitterx,mapt+suby+jittery,inpData,normal,&accessfloat,&occdirection,&displacement,bDoClipping))
							continue;

						Sumaccessfloat+=accessfloat;

						if(pOcclusionDirection)
						{
							float fLength=occdirection.Length(); // make the vector less small =>brighter result (interreflections might brighten up the object)

							float fAddLength= (1-fLength)*inpData->m_Properties.m_fOccBrighter;					

							assert(fAddLength>=0.0f && fAddLength<=1.0f);

							SumOccDirection+=occdirection + fAddLength * normal;
						}
					}
					else
					{
						if(!GetSamplePoint(cb_low,rayscale,fLowPolyObjectSize,pLowMesh,pHighMesh,maps+subx+jitterx,mapt+suby+jittery,inpData,normal,0,0,&displacement,bDoClipping))
							continue;
					}
	
					Sumdisplacefloat+=displacement;
					SumNormal+=normal;SumSamples++;
				}

				if(SumSamples!=0.0f)
				{
					if(pAccessBitmap)
						pAccessBitmap[x+y*dwWidth]=(UBYTE)(Sumaccessfloat/SumSamples*255.0f);

					if(pOcclusionDirection)
						SetVectorAsRGB(SumOccDirection/SumSamples,&pOcclusionDirection[(x+y*dwWidth)*3],inpData->m_Properties.m_bNormalJitter);

					if(pDisplaceBitmap)
						pDisplaceBitmap[x+y*dwWidth]=Sumdisplacefloat/SumSamples;

					normal=SumNormal/SumSamples;

					// Baustelle
//					normal=Vec3(0,0,0); // fuck, to see the antialiased edges painted in grey

					//finally map the normal to rgb space
					SetVectorAsRGB(normal,pc,inpData->m_Properties.m_bNormalJitter);

#ifdef PLUGIN_FOR_MAYA
				// x and z is flipped, because of different color coding of max and maya
				{
					int swap=pc[0];	pc[0]=pc[2];pc[2]=swap;
				}
#endif // PLUGIN_FOR_MAYA

				}
			} //x
		} //y
	}


  if(!inpData->m_bUserHasStopped)
	{
		DWORD *pMemA=(DWORD *)malloc(dwWidth*dwHeight*4);	// allocate buffer
		DWORD *pMemB=(DWORD *)malloc(dwWidth*dwHeight*4);	// allocate buffer

		// fill in the first buffer
		{
			unsigned char *_mem=(unsigned char *)pMemA;

			for(DWORD y=0;y<dwHeight;y++)
			{
				unsigned char *src=(CPbMesh::m_pcNormalMap);

				src+=dwWidth*3*(dwHeight-y-1);
				
				for(DWORD x=0;x<dwWidth;x++)
					{	*_mem++=*src++;*_mem++=*src++;*_mem++=*src++;*_mem++=0; }
			}
		}

		// expand the normal information
		{
			for(DWORD f=0;f<dwExpandTextureInPixels;f++)
			{
				if((f%2)==0) ExpandBitmap(dwWidth,dwHeight,pMemA,pMemB,true,0);		// with diagonal
					else ExpandBitmap(dwWidth,dwHeight,pMemB,pMemA,false,0);				// without diagonal
			}
		}

		DWORD pitch;

		// save normalmap with MAX help
		if(inpData->m_Properties.m_bOutputHighpolyNormals)
		if(UBYTE *writeMap=inpData->m_BitMapOutput.GetMemoryAndPitch(pitch))
		{
			assert(pitch==dwWidth*3);

			for(DWORD y=0;y<dwHeight;y++)
			for(DWORD x=0;x<dwWidth;x++)
			{
				DWORD s=pMemA[x+y*dwWidth];

				writeMap[3*(x+y*dwWidth)]=		(UBYTE)((s>>16)&0xff);
				writeMap[3*(x+y*dwWidth)+1]=	(UBYTE)((s>>8)&0xff);
				writeMap[3*(x+y*dwWidth)+2]=	(UBYTE)(s&0xff);
			}

			// burn in Crytek logo
			if(bEvaluationVersion)
			if(dwWidth>=256)
			if(dwHeight>=256)
			{
				int left=dwWidth/2-g_RawImageWidth/2,top=dwHeight/2-g_RawImageHeight/2; 

				for(int sy=0;sy<g_RawImageHeight;sy++)
				for(int sx=0;sx<g_RawImageWidth;sx++)
				{
					DWORD col=g_RawImageData[sy*g_RawImageWidth+sx];

					int x=(sx+left)%dwWidth;
					int y=(sy+top)%dwHeight;

					if(col)
					{
						writeMap[3*(x+y*dwWidth)]=(UBYTE)(((int)writeMap[3*(x+y*dwWidth)]-127)/2+127);
						writeMap[3*(x+y*dwWidth)+1]=(UBYTE)(((int)writeMap[3*(x+y*dwWidth)+1]-127)/2+127);
						writeMap[3*(x+y*dwWidth)+2]=(UBYTE)(((int)writeMap[3*(x+y*dwWidth)+2]-127)/2+127);
					}
				}
			}

			inpData->m_BitMapOutput.SaveAndClear(inpData->ExtendFilename(inpData->m_Properties.m_szOutputFilename+inpData->m_Properties.m_szOutputExtension).c_str());

			// save clonespace map
			if(inpData->m_Properties.m_bOutputClonemap)
			{
				bool bOk=inpData->m_CloneMap.SaveCloneMap((inpData->ExtendFilename(inpData->m_Properties.m_szOutputFilename)+".cln").c_str());

				if(!bOk)
					MessageBox(0,"output .CLN file open failed","Error",MB_OK);
			}
		}
		else MessageBox(0,"output NORMAL file open failed","Error",MB_OK);

		// save accessibilitymap with MAX help
		if(inpData->m_Properties.m_bOutputAccessability)
		if(UBYTE *writeMap=inpData->m_BitMapOutput.GetMemoryAndPitch(pitch))
		{
			assert(pitch==dwWidth*3);
			// fill in the first buffer
			{
				unsigned char *_mem=(unsigned char *)pMemA;

				for(DWORD y=0;y<dwHeight;y++)
				{
					unsigned char *normal=(CPbMesh::m_pcNormalMap)+dwWidth*3*(dwHeight-y-1);
					unsigned char *src=pAccessBitmap+dwWidth*(dwHeight-y-1);

					for(DWORD x=0;x<dwWidth;x++)
					{
						unsigned char g=*src++;

						if(normal[0]==0 && normal[1]==0 && normal[2]==0)
						{	
							*_mem++=0xff;*_mem++=0xff;*_mem++=0xff;*_mem++=0xff;				// color key for expand bitmap
						}
						else
						{
							*_mem++=g;*_mem++=g;*_mem++=g;*_mem++=0;
						}

						normal+=3;
					}
				}
			}

			// expand the accessbility information
			{
				for(DWORD f=0;f<dwExpandTextureInPixels;f++)
				{
					if((f%2)==0) ExpandBitmap(dwWidth,dwHeight,pMemA,pMemB,true,0xffffffff);		// with diagonal
						else ExpandBitmap(dwWidth,dwHeight,pMemB,pMemA,false,0xffffffff);					// without diagonal
				}
			}

			for(DWORD y=0;y<dwHeight;y++)
			for(DWORD x=0;x<dwWidth;x++)
			{
				DWORD s=pMemA[x+y*dwWidth];

				if(s==0xffffffff)s=0;																						// remove colorkey for expand bitmap

				writeMap[3*(x+y*dwWidth)]=		(UBYTE)((s>>16)&0xff);
				writeMap[3*(x+y*dwWidth)+1]=	(UBYTE)((s>>8)&0xff);
				writeMap[3*(x+y*dwWidth)+2]=	(UBYTE)(s&0xff);
			}

			inpData->m_BitMapOutput.SaveAndClear(inpData->ExtendFilename(inpData->m_Properties.m_szOutputFilename+"_AM"+inpData->m_Properties.m_szOutputExtension).c_str());
		}	// end Acessibility

		// **************************
#ifndef USE_REDUCEDFORPUBLIC
		// save occlusiondirection with MAX help
		if(pOcclusionDirection)
		if(UBYTE *writeMap=inpData->m_BitMapOutput.GetMemoryAndPitch(pitch))
		{
			assert(pOcclusionDirection);
			assert(pitch==dwWidth*3);

			// fill in the first buffer
			{
				unsigned char *_mem=(unsigned char *)pMemA;

				for(DWORD y=0;y<dwHeight;y++)
				{
					unsigned char *normal=(CPbMesh::m_pcNormalMap)+dwWidth*3*(dwHeight-y-1);
					unsigned char *src=pOcclusionDirection+dwWidth*3*(dwHeight-y-1);

					for(DWORD x=0;x<dwWidth;x++)
					{
						unsigned char r=*src++;
						unsigned char g=*src++;
						unsigned char b=*src++;

						if(normal[0]==0 && normal[1]==0 && normal[2]==0)
						{	
							*_mem++=0xff;*_mem++=0xff;*_mem++=0xff;*_mem++=0xff;				// color key for expand bitmap
						}
						else
						{
							*_mem++=r;*_mem++=g;*_mem++=b;*_mem++=0;
						}

						normal+=3;
					}
				}
			}

			// expand the occlusiondirection information
			{
				for(DWORD f=0;f<dwExpandTextureInPixels;f++)
				{
					if((f%2)==0) ExpandBitmap(dwWidth,dwHeight,pMemA,pMemB,true,0xffffffff);		// with diagonal
						else ExpandBitmap(dwWidth,dwHeight,pMemB,pMemA,false,0xffffffff);					// without diagonal
				}
			}

			for(DWORD y=0;y<dwHeight;y++)
			for(DWORD x=0;x<dwWidth;x++)
			{
				DWORD s=pMemA[x+y*dwWidth];

				if(s==0xffffffff)s=0;																						// remove colorkey for expand bitmap

				writeMap[3*(x+y*dwWidth)]=		(UBYTE)((s>>16)&0xff);
				writeMap[3*(x+y*dwWidth)+1]=	(UBYTE)((s>>8)&0xff);
				writeMap[3*(x+y*dwWidth)+2]=	(UBYTE)(s&0xff);
			}

			{
				char szParams[80]="";

				if(inpData->m_Properties.m_bExtendFilename)
					sprintf(szParams,"_I%.1f",inpData->m_Properties.m_fOccBrighter*100.0f);		// I for interreflections

				inpData->m_BitMapOutput.SaveAndClear(inpData->ExtendFilename(inpData->m_Properties.m_szOutputFilename+"_OD"+szParams+inpData->m_Properties.m_szOutputExtension).c_str());
			}
		}	// end occlusiondirection
#endif // USE_REDUCEDFORPUBLIC

		// **************************

		// save displacementmap with MAX help
		if(inpData->m_Properties.m_bOutputDisplace)
		if(UBYTE *writeMap=inpData->m_BitMapOutput.GetMemoryAndPitch(pitch))
		{
			assert(pitch==dwWidth*3);

			// fill in the first buffer
			{
				// calculate the min and max of the displacement
				float fAbsBound=0.0f;
				{
					for(DWORD y=0;y<dwHeight;y++)
					{
						float *src=pDisplaceBitmap+dwWidth*(dwHeight-y-1);

						for(DWORD x=0;x<dwWidth;x++)
						{
							float fVal=*src++;

							if(fVal<0.0f)
								if(inpData->m_Properties.m_bDisplaceSigned)fVal=0.0f;			// clip nagative Values

							if(fVal!=FLT_MAX)
							{
								if(fVal>fAbsBound)fAbsBound=fVal;
								if(fVal<-fAbsBound)fAbsBound=-fVal;
							}
						}
					}
				}

				// scale the result to the output range

				float fDispScale=0.0f;
				
				inpData->m_Properties.m_fReturnDisplaceMag=fAbsBound;

				if(fAbsBound!=0.0f)fDispScale=1.0f/fAbsBound;

				unsigned char *_mem=(unsigned char *)pMemA;

				float fDispTranslate=1.0f;
				float fDispOutputScale=127.5f;

				if(!inpData->m_Properties.m_bDisplaceSigned)		// unsigned
				{
					fDispTranslate=0.0;fDispOutputScale=255.0f;
				}
				else																						// signed
				{
					inpData->m_Properties.m_fReturnDisplaceMag*=2.0f;
				}

				for(DWORD y=0;y<dwHeight;y++)
				{
					float *src=pDisplaceBitmap+dwWidth*(dwHeight-y-1);

					for(DWORD x=0;x<dwWidth;x++)
					{
						int g;
						float fValue=*src++;

						if(fValue==FLT_MAX)
						{
							*_mem++=0xff;*_mem++=0xff;*_mem++=0xff;*_mem++=0xff;		// colorkey for expand bitmap
						}
						else
						{
							g=(int)( (fValue*fDispScale+fDispTranslate)*fDispOutputScale );

							if(g<0)g=0;if(g>255)g=255;															// bound in range [0..255]

							*_mem++=g;*_mem++=g;*_mem++=g;*_mem++=0;
						}
					}
				}
			}

			// expand the displacement information
			{
				for(DWORD f=0;f<dwExpandTextureInPixels;f++)
				{
					if((f%2)==0) ExpandBitmap(dwWidth,dwHeight,pMemA,pMemB,true,0xffffffff);			// with diagonal
						else ExpandBitmap(dwWidth,dwHeight,pMemB,pMemA,false,0xffffffff);						// without diagonal
				}
			}

			for(DWORD y=0;y<dwHeight;y++)
			for(DWORD x=0;x<dwWidth;x++)
			{
				DWORD s=pMemA[x+y*dwWidth];

				if(s==0xffffffff)s=0;																						// remove colorkey for expand bitmap

				writeMap[3*(x+y*dwWidth)]=		(UBYTE)((s>>16)&0xff);
				writeMap[3*(x+y*dwWidth)+1]=	(UBYTE)((s>>8)&0xff);
				writeMap[3*(x+y*dwWidth)+2]=	(UBYTE)(s&0xff);
			}

			inpData->m_BitMapOutput.SaveAndClear(inpData->ExtendFilename(inpData->m_Properties.m_szOutputFilename+"_DM"+inpData->m_Properties.m_szOutputExtension).c_str());
		}	// end Displace


		free(pMemA);free(pMemB);
	}

	delete CPbMesh::m_pcNormalMap;CPbMesh::m_pcNormalMap=0;	
	delete [] pOcclusionDirection;pOcclusionDirection=0;
	delete [] pAccessBitmap;pAccessBitmap=0;
	delete [] pDisplaceBitmap;pDisplaceBitmap=0;

  // clean up
  delete pLowMesh;
  delete pHighMesh;

  delete [] CPbMesh::m_pnTriPointer; CPbMesh::m_pnTriPointer=NULL;

	return(true);
}












// raytracing, gather unsorted hits
// call ClearListBefore and GetHitList afterwards
void CPbMesh::GatherRayHits( const Vec3 invStart, const Vec3 invEnd, CIntInfoList &outIntersections, const CPbTri *inpIgnoreObject )
{
#ifdef USE_RASTERCUBE
	Vec3 vDir=invEnd-invStart;

	float fLength=vDir.Length();
	
	vDir.Normalize();	// Baustelle normalize?

	CEveryObjectOnlyOnceSTLList sTriangleSink(invStart,vDir,fLength,outIntersections,inpIgnoreObject);			// Baustelle - static may be faster

	m_Raster.GatherRayHitsTo(invStart,invEnd,sTriangleSink);
#else // USE_RASTERCUBE
	if(m_pRoot)
		m_pRoot->GatherRayHitsTo(invStart,invEnd,sTriangleSink);
#endif // USE_RASTERCUBE
}


// raytracing, test if there is a hit or not
bool CPbMesh::CalcIntersection( const Vec3 invStart, const Vec3 invEnd, const CPbTri *inpIgnoreObject )
{
#ifdef USE_RASTERCUBE
	Vec3 vDir=invEnd-invStart;	// Baustelle nrmalize?

	float fLength=vDir.Length();

	vDir.Normalize();	// Baustelle normalize?


	CEveryObjectOnlyOnceBool sTriangleSink(invStart,vDir,fLength,inpIgnoreObject);

	m_Raster.GatherRayHitsTo(invStart,invEnd,sTriangleSink);

	return(sTriangleSink.GetResult());
#else // USE_RASTERCUBE
	if(m_pRoot)	
		return(m_pRoot->CalcIntersection(invStart,invEnd));

	return(false);
#endif // USE_RASTERCUBE
}


#ifdef USE_RASTERCUBE
//! used only for debugging
std::string CPbMesh::TestRasterCube( void )
{
	DWORD dwBucketsOk=0;
	DWORD dwBucketsFailed=0;

	// for every triangle
	for(int i=0;i<m_nNumTris;i++)
	{
		// for every edge
		for(int e1=0;e1<3;e1++)
		{
			int e2 = (e1==2)?0:e1+1;

			CEveryObjectDebug dbg(&m_pMesh[i]);	

			m_Raster.GatherRayHitsTo(m_pMesh[i].m_Verts[e1],m_pMesh[i].m_Verts[e2],dbg);

			dwBucketsOk+=dbg.m_dwBucketsOk;
			dwBucketsFailed+=dbg.m_dwBucketsFailed;
		}
	}

	char str[1024];
	sprintf(str,"TestRasterCube good:%d failed:%d\n",dwBucketsOk,dwBucketsFailed);

	return(str);


/*	Vec3 vMin(-1,-2,-3),vMax(4,5,6);

	for(int f=0;f<10000;f++)
	{
		CRasterCube ras;
		ras.Init(vMin,vMax,1);

		CPbTri Element;
		Vec3 vTri[3],vFrom,vTo;

		for(int g=0;g<3;g++)
			Element.m_Verts[g]=Vec3(vMin[0]+(vMax[0]-vMin[0])*(rand()%100)*0.01f,
															 vMin[1]+(vMax[1]-vMin[1])*(rand()%100)*0.01f,
															 vMin[2]+(vMax[2]-vMin[2])*(rand()%100)*0.01f);

		Element.RefreshInternals();										// this is neccessray to get rayshooting working

		vFrom=Vec3(vMin[0]+(vMax[0]-vMin[0])*(rand()%100)*0.01f,
			vMin[1]+(vMax[1]-vMin[1])*(rand()%100)*0.01f,
			vMin[2]+(vMax[2]-vMin[2])*(rand()%100)*0.01f);

		vTo=Vec3(vMin[0]-(vMax[0]-vMin[0])+3*(vMax[0]-vMin[0])*(rand()%100)*0.01f,
			vMin[1]-(vMax[1]-vMin[1])+3*(vMax[1]-vMin[1])*(rand()%100)*0.01f,
			vMin[2]-(vMax[2]-vMin[2])+3*(vMax[2]-vMin[2])*(rand()%100)*0.01f);

		// if there is a hit
		if(Element.CalcIntersection(vTo,vFrom))
		{
			vTri[0]=Element.m_Verts[0];
			vTri[1]=Element.m_Verts[1];
			vTri[2]=Element.m_Verts[2];

			ras.PutInTriangle(vTri,&Element);
		
			ras.PreProcess(true);

			ras.PutInTriangle(vTri,&Element);

			bool bCorrect;

			{
				CIntInfoList gather;

				ras.GatherRayHits(vFrom,vTo,gather);

				bCorrect= (gather.size()==1);
			}

			if(!bCorrect)
			{
				CIntInfoList gather;

				char str[256];

				sprintf(str,"%d. TestRasterCube: %s\n",f,bCorrect?"ok":"failed");
				OutputDebugString(str);

				sprintf(str,"    Tri: (%.2f,%.2f,%.2f) (%.2f,%.2f,%.2f) (%.2f,%.2f,%.2f)\n",
					vTri[0].x,vTri[0].y,vTri[0].z,
					vTri[1].x,vTri[1].y,vTri[1].z,
					vTri[2].x,vTri[2].y,vTri[2].z);
				OutputDebugString(str);

				sprintf(str,"    Line: (%.2f,%.2f,%.2f)-(%.2f,%.2f,%.2f)\n\n",
					vFrom.x,vFrom.y,vFrom.z,
					vTo.x,vTo.y,vTo.z);
				OutputDebugString(str);

				Element.CalcIntersection(vTo,vFrom);

				ras.GatherRayHits(vFrom,vTo,gather);
			}
		}
	}
*/
}
#endif // USE_RASTERCUBE



std::string CPbMesh::BuildRayAccelerator( const bool inbDebug )
{
#ifdef USE_RASTERCUBE

	Vec3 vMin,vMax;

	CalcBoundingVolume(vMin,vMax);

	m_Raster.Init(vMin,vMax,m_nNumTris,USE_RASTERCUBE_ACCELERATOR_MAGNIFIER);

	for(int i=0;i<m_nNumTris;i++)
		m_Raster.PutInTriangle(m_pMesh[i].m_Verts,&m_pMesh[i]);

	m_Raster.PreProcess(inbDebug);

	for(int i=0;i<m_nNumTris;i++)
		m_Raster.PutInTriangle(m_pMesh[i].m_Verts,&m_pMesh[i]);

//	m_Raster.Compress();  deactivated because it seems there is still a bug


	char str[256];
	DWORD iSize[3];
	DWORD byteSize=m_Raster.CalcMemoryConsumption(iSize);

	sprintf(str,"Acceleration scheme: RasterCube %dx%dx%d ~%d bytes\n",iSize[0],iSize[1],iSize[2],byteSize);

	std::string sRet=str;

	if(inbDebug)sRet+=TestRasterCube();

	return(sRet);
#endif // USE_RASTERCUBE
}

