#include "StdAfx.h"
#include "GLUtils.h"
#include "BigMatrix.h"
#include "BigVector.h"
#include "bsplineapproximator.h"
#include "B3PApproximation.h"
#include "drand.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#endif

#define LOG_KNOTS 0

inline double DLength (const Vec3d& v)
{
	return sqrt (double(v.x)*v.x+double(v.y)*v.y+double(v.z)*v.z);
}

////////////////////////////////////////////////////////////////////////////
// Constructs the spline approximator with the given number of samples (keys)
// that is prepared to make up splines of the given degree (order=degree+1)
BSplineApproximator::BSplineApproximator (unsigned numKeys, int nDegree, bool isOpen):
	 //numKeys()(nNumKeys)
	 m_arrKeys (numKeys)
	,m_nDegree (-1) // not initialized yet
	//,m_nHalfFilterSize (nDegree>0?(nDegree+3)/2:1) // will be initialized in the setDegree
	,m_fStepNRmu(1)
	,m_fSmoothness(0.1)
	,m_bLoop(false)
	,m_isOpen(isOpen)
{
	setDegree (nDegree);

#ifdef _DEBUG
	//testDerivativeEstimator();
#endif
}

// sets the degree, recalculates all internal tables and invalidates knot storage
void BSplineApproximator::setDegree (int nDegree)
{
	if (m_nDegree != nDegree)
	{
		m_arrKnots.clear();
		m_nDegree = nDegree;
		m_nHalfFilterSize = nDegree>0?(nDegree+3)/2:1;
		m_fStepNRmu = 1;

		CBigMatrix mxAntiFilter (2*m_nHalfFilterSize, m_nDegree+2);

		for (int nBasis = 0, nFactorial = 1; nBasis < mxAntiFilter.GetWidth(); nFactorial *= ++nBasis)
		{
			// sample maps so that the right part starts with 0.5.
			for (int nDblSample = 0; nDblSample < m_nHalfFilterSize; nDblSample ++)
			{
				mxAntiFilter[m_nHalfFilterSize+nDblSample  ][mxAntiFilter.GetWidth()-1 - nBasis] = pow( 0.5+nDblSample, nBasis) / nFactorial;
				mxAntiFilter[m_nHalfFilterSize-nDblSample-1][mxAntiFilter.GetWidth()-1 - nBasis] = pow(-0.5-nDblSample, nBasis) / nFactorial;
			}
		}

		double fStability = mxAntiFilter.ComposePseudoInverse (m_mxFilter, 1e-4);
		/*
	#ifdef _DEBUG
		CBigMatrix mxTest;
		mxTest.SetMultiply(mxAntiFilter, m_mxFilter);
		double fError1 = mxTest.GetIdentityError();
		mxTest.SetMultiply(m_mxFilter, mxAntiFilter);
		double fError2 = mxTest.GetIdentityError();

		CBigVector bvTest(2*m_nHalfFilterSize);
		for (int i = 0; i < 2*m_nHalfFilterSize; ++i)
		{
			bvTest[i] = mxAntiFilter[i][0] + mxAntiFilter[i][m_nDegree+1];
		}

		CBigVector bvResult = m_mxFilter * bvTest;
		//bvResult.DebugDump();
	#endif
		*/
	}
}


//////////////////////////////////////////////////////////////////////////
// test - fill the first samples with pure parabola and check the estimation capabilities of the high derivative estimator
void BSplineApproximator::testDerivativeEstimator()
{
	double fDerivative = 1;

	double fCoefficient = fDerivative;
	for (int i = 2; i <= m_nDegree+1; ++i)
		fCoefficient /= i;

	for (int i = 0; i < 2*m_nHalfFilterSize; ++i)
	{
		m_arrKeys[i].pt.x = float(pow (i, m_nDegree+1)*fCoefficient) + 1 + i;
	}

	double fResult = estimateDeepDerivative(0).x;
	assert (tabs(fResult - fDerivative) < 1e-3);
}


BSplineApproximator::~BSplineApproximator(void)
{
}

//////////////////////////////////////////////////////////////////////////
// sets a sequence of random numbers to the array,
// starting at fStart, ending at fEnd, with the minimal step fMinDist
// cleans up the array beforehand
// Does NOT add the starting and ending 
void initKnotsRandom (int nNumKnots, float fStart, float fEnd, float fMinDist, float* pKnots)
{
	if (nNumKnots == 0)
	{
		// do nothing
	}
	else
	if (nNumKnots == 1)
	{
		pKnots[0] = (float)(fStart + drand() * (fEnd-fStart));
	}
	else
	if ((fEnd-fStart)/(nNumKnots-1) >= fMinDist)
	{
		// distribute knots evently, if there's not enough place to keep the fMinDist
		pKnots[0] = fStart;
		for (int nKnot = 1; nKnot < nNumKnots-1; ++nKnot)
		{
			pKnots[nKnot] = fStart + nKnot * (fEnd-fStart) / (nNumKnots-1);
		}
		pKnots[nNumKnots-1] = fEnd;
	}
	else
	{
		// the total length of the available place to put the next knot
		float fAvailPlace = fEnd-fStart;
		// the set of already placed and fixed knots
		FloatSet setKnots;
		int nKnot;
		FloatSet::iterator it;
		for (nKnot = 0; nKnot < nNumKnots; ++nKnot)
		{
			// get the next knot in the available space
			float fToEat = float(drand()*fAvailPlace);
			// map the knot from available to the time space
			// eat fNextKnot, incrementing fKnot
			float fKnot = 0;
			for (it = setKnots.begin(); it != setKnots.end(); ++it)
			{
				if (fKnot + fToEat < *it - fMinDist)
				{
					// we've reached the point
					setKnots.insert (it, fKnot+fToEat);
					break;
				}
				else
				{
					fKnot = *it + fMinDist;
					fToEat -= *it - fMinDist - fKnot;
					assert (fKnot < fEnd);
					assert (fToEat > 0);
				}
			}
		}

		// the necessary number of knots have been inserted into the set.
		// copy the set into the given array
		float* pKnot;
		for (pKnot = pKnots, it = setKnots.begin(); it != setKnots.end(); ++it, ++pKnot)
		{
			assert (it == setKnots.begin() || (pKnot[0] > pKnot[-1]));
			*pKnot = *it;
		}
	}
}

//////////////////////////////////////////////////////////////////////////
// Initializes the array of knots to random sorted values
// The first knot is set to the first sample time, the last one to the last
// A minimal distance between knots is maintained.
void BSplineApproximator::initKnotsRandom (int nNumKnots)
{
	float fTimeBegin  = m_arrKeys[0].t;
	float fTimeEnd    = m_arrKeys[numKeys()-1].t;

	m_arrKnots.resize (nNumKnots);
	m_arrKnots[0] = fTimeBegin;
	m_arrKnots[nNumKnots-1] = fTimeEnd;

	float fMinDist = tmin(m_nDegree * (fTimeEnd - fTimeBegin) / numKeys(), (fTimeEnd - fTimeBegin) / (nNumKnots-1));

	::initKnotsRandom(nNumKnots-2, fTimeBegin+fMinDist, fTimeEnd - fMinDist, fMinDist, &m_arrKnots[1]);

#if LOG_KNOTS
	LogToDbg("Knot array(%d):", nNumKnots);
	for (FloatArray::iterator it = m_arrKnots.begin(); it != m_arrKnots.end(); ++it)
		LogToDbg(" %.2f", *it);
	LogToDbg("\n");
#endif
}

//////////////////////////////////////////////////////////////////////////
// computes the jacobian into the given matrix, for the given spline
// numSamples - the first numSamples should be taken into account only
void BSplineApproximator::initJacobian (CBigMatrix& mxJ, BSplineVec3d* pSpline, int numSamples)
{
	assert (numSamples <= numKeys());
	int numCPs = pSpline->numCPs();

	mxJ.Resize (numSamples + (m_fSmoothness>0?numCPs-1:0), numCPs);
	
	mxJ.SetZero();	

	int nCP;
	float t;

	for (nCP = 0; nCP < numCPs; ++nCP)
	{
		int nKnotSupport[2];
		pSpline->getInfluenceInterval (nCP, nKnotSupport);
		float fSupport[2] = {pSpline->getKnotTime(nKnotSupport[0]), pSpline->getKnotTime(nKnotSupport[1])};

		// find the first sample after the start of knot support
		// and the last sample before the start of knot support
		// NOTE: the support interval is INCLUSIVE
		int nSampleSupport[2] = {
			std::lower_bound (m_arrKeys.begin(), m_arrKeys.end(), fSupport[0], KeyTimeSort()) - m_arrKeys.begin(),
			std::upper_bound (m_arrKeys.begin(), m_arrKeys.end(), fSupport[1], KeyTimeSort()) - m_arrKeys.begin()
		};
		// fixup for possible out-of-ranges
		if (nSampleSupport[1] >= (int)m_arrKeys.size())
			nSampleSupport[1] = m_arrKeys.size() - 1;

		// fixup for possible cyclic influence base
		if (nKnotSupport[0] >= nKnotSupport[1])
			nSampleSupport[1] += numKeys();

		// cycle through the support base and calculate the Jacobian matrix cells
		for (int nSample = nSampleSupport[0]; nSample <= nSampleSupport[1]; ++nSample)
		{
			t = m_arrKeys[nSample%numKeys()].t;
			mxJ[nSample%numKeys()][nCP] = pSpline->getBasis (nCP, t);
		}
#ifdef _DEBUG
		for (int nSample = 0; nSample < numKeys(); ++nSample)
		{
			t = m_arrKeys[nSample].t;
			double fBasis = pSpline->getBasis (nCP, t);
			assert (tabs(mxJ[nSample][nCP] - fBasis) < 1e-5);
		}
#endif
	}

	// add the smoothing terms
  if (m_fSmoothness > 0)
	{
		for (int i = 0; i < numCPs-1; ++i)
		{
			mxJ[numSamples+i][i] = m_fSmoothness;
			mxJ[numSamples+i][i+1] = -m_fSmoothness;
		}
	}

	// close the spline
	if (m_bLoop)
	{
		for (int nSample = 0; nSample < numKeys(); ++nSample)
		{
			mxJ[nSample][0] += mxJ[nSample][numCPs-1];
		}
		// It's important to preserve the matrix contents on this stage
		mxJ.Resize(mxJ.GetHeight(),mxJ.GetWidth()-1);
	}
}

//////////////////////////////////////////////////////////////
// Returns a spline with uninitialized Control Points,
// initializes its knot times to the m_arrKnots values
BSplineVec3d* BSplineApproximator::newEmptySpline(int nDegree)
{
	BSplineVec3d* pSpline = new BSplineVec3d (m_arrKnots.size(), nDegree, m_isOpen);
	for (unsigned nKnot = 0; nKnot < m_arrKnots.size(); ++nKnot)
		pSpline->setKnotTime (nKnot, m_arrKnots[nKnot]);
	pSpline->finalizeKnotTimes();
	return pSpline;
}

////////////////////////////////////////////////////////////
// Computes CPs of the spline, given the inverse jacobian
// of the current data/this spline
// Sets the CPs to the spline object
void BSplineApproximator::initCPs (BSplineVec3d* pSpline, CBigMatrix& mxInvJ)
{
	initCPs (pSpline, mxInvJ, numKeys());
}

////////////////////////////////////////////////////////////////////////////
// Calculates the control points and puts it to the given spline.
// Uses the given inverse jacobian and the given number of samples (0..N)
// for computations.
void BSplineApproximator::initCPs (BSplineVec3d* pSpline, CBigMatrix& mxInvJ, int numSamples)
{
	int nCoord, nSample;
	int nNumCPs = mxInvJ.GetHeight();
	assert (nNumCPs == pSpline->numCPs() + (m_bLoop?-1:0));

	for (int nCP = 0; nCP < nNumCPs; ++nCP)
	{
		Vec3d& rCP = (*pSpline)[nCP];
		rCP = Vec3d(0,0,0);
		for (nCoord = 0; nCoord < 3; ++nCoord)
		{
			for (nSample = 0; nSample < numSamples; ++nSample)
			{
				((&rCP.x)[nCoord]) += float(mxInvJ[nCP][nSample] * ((&(m_arrKeys[nSample].pt.x))[nCoord]));
			}
		}
	}

	// for loop splines, make the last CP the same as the first
	if (m_bLoop)
		(*pSpline)[nNumCPs] = (*pSpline)[0];
}

//////////////////////////////////////////////////////////////////////////
// creates a new spline out of the given array of knots
// ASSUMES: the knot array is initialized
BSplineVec3d* BSplineApproximator::newSpline (Quality* pQuality)
{
	return newSpline (pQuality, numKeys());
}

// creates a new spline out of the given array of knots, taking into account
// only the first numKeys samples
BSplineVec3d* BSplineApproximator::newSpline (Quality* pQuality, int nKeys)
{
	assert (nKeys <= numKeys());
	BSplineVec3d* pSpline = newEmptySpline (m_nDegree);

	// compute the jacobian - only for one degree of freedom
	CBigMatrix mxJ;
	initJacobian(mxJ, pSpline, nKeys);

	CBigMatrix mxInv;
	double fStability = mxJ.ComposePseudoInverse(mxInv, 1e-3);
	
	if (fStability < 1e-4)
	{
		delete pSpline;
		return NULL;
	}

	// take the possible smoothing term into account
	assert ((mxInv.GetWidth() == nKeys || mxInv.GetWidth() == nKeys + pSpline->numCPs()-1) && (mxInv.GetHeight() == pSpline->numCPs()));

	initCPs (pSpline, mxInv, nKeys);

	initQuality (pQuality, pSpline, nKeys);
	return pSpline;
}


// intitializes the knot array so that the knots are evently distributed
void BSplineApproximator::initKnotsEven (int nNumKnots)
{
	m_arrKnots.resize(nNumKnots);
	float fStart = m_arrKeys[0].t;
	float fEnd = m_arrKeys[numKeys()-1].t;
	m_arrKnots[0] = fStart;
	for (int i = 1 ; i < nNumKnots - 1; ++i)
	{
		m_arrKnots[i] = fStart + i*(fEnd-fStart)/(nNumKnots-1);
	}
	m_arrKnots[nNumKnots-1] = fEnd;
}

//////////////////////////////////////////////////////////////////////////
// gets the best approximation to the data with the given number of knots
BSplineVec3d* BSplineApproximator::newSplineRandomKnots (int nNumKnots, Quality* pQuality)
{
	initKnotsRandom(nNumKnots);

	return newSpline(pQuality);
}

//////////////////////////////////////////////////////////////////////////
// set the given quality structure from the spline
void BSplineApproximator::initQuality (Quality* pQuality, BSplineVec3d* pSpline)
{
	initQuality(pQuality, pSpline, numKeys());
}

void BSplineApproximator::initQuality (Quality* pQuality, BSplineVec3d* pSpline, int numSamples)
{
	if (pQuality)
	{
		//memset (pQuality, 0, sizeof(Quality));	
		pQuality->fKnotPenalty = 1;
		pQuality->fKnotPenaltyBlended = 1;
		pQuality->fRtotal = 0;
		pQuality->fTargetF = 0;

		for (int nSample = 0; nSample < numSamples; ++nSample)
		{
			// delta
			Vec3d vD = pSpline->getValue(m_arrKeys[nSample].t) - m_arrKeys[nSample].pt;
			float d[3] = {tsqr(vD.x),tsqr(vD.y),tsqr(vD.z)};
			// blend the error between the max*3 and sum(), to avoid both method limit pitfalls.
			// (and get the new ones :()
			pQuality->fRtotal += 1.5*tmax(d[0],d[1],d[2]) + 0.5*(d[0]+d[1]+d[2]);
		}

		pQuality->fKnotPenalty = pSpline->getKnotProductPenalty();

		pQuality->fKnotPenaltyBlended = pQuality->fKnotPenalty*0.1+1;
		
		pQuality->fTargetF = pQuality->fKnotPenaltyBlended * pQuality->fRtotal;

		//LogToDbg("Quality Calculated: %g*(%gb-1)=%g  ", pQuality->fRtotal,pQuality->fKnotPenalty,pQuality->fTargetF);
	}
}

// makes initial knot estimate, given the tolerance (average error per sample)
void BSplineApproximator::estimateKnots(float fAveErr)
{
	m_arrKnots.clear();
	
	// the first knot is always at the first sample
	m_arrKnots.push_back(m_arrKeys[0].t);
	
	// fill all the sample interval with knots
	while (m_arrKnots.back() < m_arrKeys[numKeys()-1].t)
	{
		// up to nFixed keys, 
	}
}

// fills in the array of knots with dense values (dense enough for the approximation to become almost an interpolation)
void BSplineApproximator::initDenseKnots ()
{
	m_arrKnots.clear();

	for (int i = 0; i < numKeys(); i += 2)
	{
		m_arrKnots.push_back (m_arrKeys[i].t);
	}

	if (m_arrKnots.back() < m_arrKeys[numKeys()-1].t)
		m_arrKnots.push_back(m_arrKeys[numKeys()-1].t);
}


BSplineVec3d* BSplineApproximator::newSplineDense (Quality*pQuality)
{
	initDenseKnots();

	return newSpline (pQuality);
}

// estimates the integral of the deep derivative over the whole approximation interval 
float BSplineApproximator::estimateKnotDistributionIntegral (int nMinusDegree)
{
	double fSum = 0;
	for (int i = m_nHalfFilterSize; i < numKeys() - m_nHalfFilterSize; ++i)
		fSum += pow(DLength(estimateDeepDerivative (i, nMinusDegree)), 1.0/(2*(m_nDegree+1-nMinusDegree)));
	return (float)fSum;
}

// estimates average (d+1)'th derivative at the sample interval [i,i+1], d == m_nDegree
Vec3d BSplineApproximator::estimateDeepDerivative (int i, int nMinusDegree)
{
	// just find the highest basis coefficient  and take power 1/n out of it
	tforceRangeIncl(i, m_nHalfFilterSize, numKeys() - m_nHalfFilterSize);
	
	// accumulator for the highest derivative
	double fPower[3] = {0,0,0};
	
	for (int nDblSample = 0; nDblSample < m_nHalfFilterSize; ++nDblSample)
	{
		for (int c = 0; c < 3; c++)
		{
			fPower[c] += m_arrKeys[i+nDblSample].pt[c] * m_mxFilter[nMinusDegree][m_nHalfFilterSize+nDblSample];
			fPower[c] += m_arrKeys[i-nDblSample-1].pt[c] * m_mxFilter[nMinusDegree][m_nHalfFilterSize-nDblSample-1];
		}
	}

	return Vec3d((float)fPower[0], (float)fPower[1], (float)fPower[2]);
}

// sets the given basis for NR calculations random
void BSplineApproximator::initRandomBase (CBigMatrix& mxBase)
{
	if (mxBase.GetHeight() == m_arrKnots.size()-2 && drand() < 0.75)
	{
		unsigned nKnotBegin = 1+int(drand() * (1+mxBase.GetHeight()-mxBase.GetWidth()));
		tforceRangeIncl<unsigned>(nKnotBegin, 1, mxBase.GetHeight()-mxBase.GetWidth());
		for (unsigned nKnot = nKnotBegin; nKnot < nKnotBegin + mxBase.GetWidth(); nKnot++)
		{
			if (drand()< 0.5)
			{
				// shift the knots
				int nNextIdx = (drand() < 0.5) ? -1 : 1;
				// make negative or positive shift of the times of the knots
				for (unsigned nDeltaKnot = 1; nDeltaKnot < m_arrKnots.size()-1; ++nDeltaKnot)
				{
					double dDelta;
					if (nDeltaKnot <= nKnot)
					{
						dDelta = double (nDeltaKnot * (m_arrKnots[int(nDeltaKnot)+nNextIdx]-m_arrKnots[nDeltaKnot])) / (2*nKnot);
					}
					else
					{
						dDelta = (m_arrKnots[int(nDeltaKnot)+nNextIdx]-m_arrKnots[nDeltaKnot]) * double (m_arrKnots.size() - 1 - nDeltaKnot) / (2*(m_arrKnots.size()-1-nKnot));
					}
					mxBase[nDeltaKnot-1][nKnot-nKnotBegin] = dDelta;
				}
			}
			else
			{
				// pull towards or push from a knot
				int nNextIdx = (drand() < 0.5) ? -1 : 1;
				for (unsigned nDeltaKnot = 1; nDeltaKnot < m_arrKnots.size()-1; ++nDeltaKnot)
				{
					double dDelta = 0;
					if (nDeltaKnot < nKnot)
					{
						dDelta = double (nDeltaKnot * (m_arrKnots[int(nDeltaKnot)+nNextIdx]-m_arrKnots[nDeltaKnot])) / (2*nKnot);
					}
					else
					if (nDeltaKnot > nKnot)
					{
						dDelta = (m_arrKnots[int(nDeltaKnot)-nNextIdx]-m_arrKnots[nDeltaKnot]) * double (m_arrKnots.size() - 1 - nDeltaKnot) / (2*(m_arrKnots.size()-1-nKnot));
					}
					mxBase[nDeltaKnot-1][nKnot-nKnotBegin] = dDelta;
				}
			}
		}
	}
	else
	for (int nBasis = 0; nBasis < mxBase.GetWidth(); )
	{
		int nCoord;
		// init the next base column
		for (nCoord = 0; nCoord < mxBase.GetHeight(); ++nCoord)
			mxBase[nCoord][nBasis] = drand() * 2 - 1;

		// for each previous column, subtract it from this one so that it's orthogonal
		for (int nPrev = 0; nPrev < nBasis; ++nPrev)
		{
			double fProj = 0;
			for (nCoord = 0; nCoord < mxBase.GetHeight(); ++nCoord)
				fProj += mxBase[nCoord][nPrev]*mxBase[nCoord][nBasis];
			for (nCoord = 0; nCoord < mxBase.GetHeight(); ++nCoord)
				mxBase[nCoord][nBasis] -= fProj * mxBase[nCoord][nPrev];
		}

		// normalize
		double fLength = 0;
		for (nCoord = 0; nCoord < mxBase.GetHeight(); ++nCoord)
			fLength += tsqr (mxBase[nCoord][nBasis]);
		fLength = sqrt(fLength);

		if (fLength > 1e-3)
		{
			// if we didn't loose all the randomness during orthogonalization, normalize and go on
			for (nCoord = 0; nCoord < mxBase.GetHeight(); ++nCoord)
				mxBase[nCoord][nBasis] /= fLength;
			++nBasis;
		}
		else
		{
		// otherwise, try this again
			LogToDbg("Retrying generation of basis ort, length=%g.", fLength);
		}
	}
}

// initializes the knots, as described by the optimization context
void BSplineApproximator::initKnotsAuto3StepDD (OptCtx3Step& ctx, int& numKnots)
{
	int numMaxKnots = numKeys() - m_nDegree + 1;
	tforceRangeIncl(numKnots, 2, numMaxKnots);

	initKnotsDerivDistributed(numKnots, 0);
	removeKnotsExtra ();
	numKnots = m_arrKnots.size();
}


// creates a new spline (or refines an existing one) corresponding to the optimization context
void BSplineApproximator::newSplineLD (OptCtxLD& ctx, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	if (ctx.bOptimizeNR)
		newSplineNewtonRaphson(ctx.nMaxIterationsNR, pSpline, pQuality);
	else
		pSpline = newSpline (pQuality);
}

// automatic progressive combinatory spline
void BSplineApproximator::newSplineAutoProgressive (OptCtxProg& ctx, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	// error that one sample can contribute at most
	float fUnitErr = ctx.fErr/numKeys();

	if (ctx.fStep <= 0)
		ctx.fStep = (m_arrKeys[numKeys()-1].t - m_arrKeys[0].t) / tmax<int>(1,numKeys()-m_nDegree+1);

	//ctx.fStep /= 3;

	m_arrKnots.clear();
	m_arrKnots.push_back(m_arrKeys[0].t);

	for (int nKnot = 1; m_arrKnots.back() < m_arrKeys[numKeys()-1].t; ++nKnot)
	{
    float fFinish = optimizeProgressiveStep (ctx, nKnot, pSpline, pQuality);
		LogToDbg("Progressive: knot %.3f finish %.3f\n", m_arrKnots[nKnot], fFinish);
	}
	
	if (ctx.bOptimizeNR)
		newSplineNewtonRaphson(ctx.nMaxIterationsNR, pSpline, pQuality);
	else
		pSpline = newSpline (pQuality);
}

// optimizes position of the given knot, trying to reach the farthest distance with the next knots
float BSplineApproximator::optimizeProgressiveStep (OptCtxProg& ctx, int nKnot, int nDepth, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	if ((int)m_arrKnots.size() <= nKnot)
	{
		assert (m_arrKnots.size() == nKnot);
		m_arrKnots.resize (nKnot+1);
	}

#ifdef _DEBUG
	{
		LogToDbg("OPStep %d: knot %d; knots:[", nDepth, nKnot);
		for (int i = 0; i < nKnot; i++)
			LogToDbg(" %.2f", m_arrKnots[i]);
		LogToDbg("]\n");
	}
#endif

	int nPrevSample = findSample (m_arrKnots[nKnot-1]);
	LogToDbg ("Prev Knot(%.2f) has sample %d (%.2f)\n", m_arrKnots[nKnot-1], nPrevSample, m_arrKeys[nPrevSample].t);
	assert (nPrevSample < numKeys());
	// make at least one key between the knots
	float fTimeStart = m_arrKeys[nPrevSample].t;
	float fTimeEnd = m_arrKeys[numKeys()-1].t;
	m_arrKnots[nKnot] = fTimeStart + (fTimeEnd-fTimeStart) * ctx.fFwdSpeed;

	Quality qCur;
	BSplineVec3d_AutoPtr pCurSpline;

	if (nDepth)
	{
		float fBestKnot = m_arrKnots[nKnot-1];
		float fBestFinish = 0;

		do
		{
			LogToDbg("Time %.2f Scan Range: [%.2f...%.2f]\n", m_arrKnots[nKnot], fTimeStart, fTimeEnd);
			if (m_arrKnots[nKnot] > m_arrKeys[numKeys()-2].t)
			{
				LogToDbg("Knot behind pre-last key, ");
				m_arrKnots[nKnot] = m_arrKeys[numKeys()-1].t;
				m_arrKnots.resize (nKnot+1);
				pSpline = newSpline (pQuality);

				int nPS1 = findSample(m_arrKnots[nKnot-1]);
				float fPartErr = getPartialError(pSpline, nPS1, numKeys());

				if (fPartErr <= ctx.fErr*(numKeys()-nPS1)/(numKeys()-1))
				{
					fBestKnot = m_arrKeys[numKeys()-1].t;
					fBestFinish = m_arrKeys[numKeys()-1].t;
					LogToDbg("best fit found.\n");
					break;
				}
				
				if (fTimeStart < m_arrKeys[numKeys()-2].t)
				{
					LogToDbg("spline doesn't fit, trunkating time range to [%.2f...%.2f]\n", fTimeStart, fTimeEnd);
          fTimeEnd = m_arrKeys[numKeys()-2].t;
					m_arrKnots[nKnot] = fTimeStart + (fTimeEnd-fTimeStart) * ctx.fFwdSpeed;
				}
				else
				{
					LogToDbg("finishing spline as is\n");
					m_arrKnots.resize(nKnot+2);
					m_arrKnots[nKnot] = fTimeStart;
					m_arrKnots[nKnot+1] = m_arrKeys[numKeys()-1].t;
					
					return m_arrKnots[nKnot+1];
				}
			}

			float fFinish = optimizeProgressiveStep(ctx, nKnot+1, nDepth-1, pCurSpline, &qCur);
			if (fFinish > fBestFinish)
			{
				fBestKnot = m_arrKnots[nKnot];
				fBestFinish = fFinish;
				
				pSpline = pCurSpline;
				*pQuality = qCur;

				fTimeStart = m_arrKnots[nKnot];
				m_arrKnots[nKnot] = fTimeStart + (fTimeEnd-fTimeStart) * ctx.fFwdSpeed;
			}
			else
			{
				fTimeEnd = m_arrKnots[nKnot];
				m_arrKnots[nKnot] = fTimeEnd - (fTimeEnd-fTimeStart) * ctx.fBwdSpeed;
			}
		}
		while (fBestFinish < m_arrKeys[numKeys()-1].t && fTimeEnd - fTimeStart > ctx.fStep);

		m_arrKnots [nKnot] = fBestKnot;
		return fBestFinish;
	}
	else
	{
		float fLastGoodKnot = 0;
		assert (m_arrKnots.size() == nKnot+1);

		do
		{
			LogToDbg("Time %.2f Scan Range: [%.2f...%.2f]\n", m_arrKnots[nKnot], fTimeStart, fTimeEnd);

			int nSamples = findSample(m_arrKnots[nKnot]);
			pCurSpline = newSpline (&qCur, nSamples);

			int nPS1 = findSample(m_arrKnots[nKnot - ctx.nSpans-1]),
				nPS2 = findSample(m_arrKnots[nKnot- ctx.nSpans+1]);
			float fErr = getPartialError(pSpline, nPS1, nPS2);

			if (qCur.fRtotal > (ctx.fErr * (nPS2-nPS1)) / (numKeys()-1))
			{
				fTimeEnd = m_arrKnots[nKnot];
				m_arrKnots[nKnot] = fTimeStart - tmax(ctx.fStep, (fTimeEnd-fTimeStart) * ctx.fBwdSpeed);
				if (m_arrKnots[nKnot] < fTimeStart)
					break;
				if (m_arrKnots[nKnot] > m_arrKeys[numKeys()-1].t)
					m_arrKnots[nKnot] = m_arrKeys[numKeys()-1].t;
			}
			else
			{
				pSpline = pCurSpline;
				*pQuality = qCur;
				fLastGoodKnot = m_arrKnots[nKnot];
			
				fTimeStart = m_arrKnots[nKnot];
				m_arrKnots[nKnot] = fTimeEnd - tmax(ctx.fStep, (fTimeEnd - fTimeStart) * ctx.fFwdSpeed);

				if (m_arrKnots[nKnot] > m_arrKeys[numKeys()-2].t)
					m_arrKnots[nKnot] = m_arrKeys[numKeys()-1].t;
			}
		}
		while (fLastGoodKnot < m_arrKeys[numKeys()-1].t && fTimeEnd - fTimeStart > ctx.fStep);
		LogToDbg("0-depth routine found best finish %.2f\n", fLastGoodKnot);
		m_arrKnots[nKnot] = fLastGoodKnot;
		return fLastGoodKnot;
	}
}

float BSplineApproximator::getPartialError (BSplineVec3d* pSpline, int nSampleStart, int nSampleEnd)
{
	double fSum = 0;
	for (int nSample = nSampleStart; nSample < nSampleEnd; ++nSample)
		fSum += DLength(pSpline->getValue(m_arrKeys[nSample].t)-m_arrKeys[nSample].pt);
	return (float)fSum;
}



// combined refinement-NR-KD algorithm
void BSplineApproximator::newSplineComboRefine (OptCtxComboRefine& ctx, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	newSplineAutoRefine (ctx, pSpline, pQuality);
	
	LogToDbg(" RSTEP: err=%g,%g; %d knots\n", pQuality->fRtotal, pQuality->fTargetF, pSpline->numKnots());

	if (ctx.bOptimizeNR && (float(numKeys())/pSpline->numKnots()) >= ctx.fMinSPKForNR)
	{
		newSplineNewtonRaphson(ctx.nMaxNRIterations2, pSpline, pQuality);
		LogToDbg (" NR STEP: err=%g,%g\n", pQuality->fRtotal, pQuality->fTargetF);
	}

	optimizeKD(ctx, pSpline, pQuality);

	LogToDbg(" KD STEP: err=%g,%g; %d knots\n", pQuality->fRtotal, pQuality->fTargetF, pSpline->numKnots());
}


void BSplineApproximator::newSplineAutoRefine (OptCtxPR& ctx, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	float fTotalError = numKeys()*ctx.fErrSingle;
	// initial knots
	m_arrKnots.resize (2);
	m_arrKnots[0] = m_arrKeys[0].t;
	m_arrKnots[1] = m_arrKeys[numKeys()-1].t;

	// unused keys for knots
	std::vector<int> arrKUU;
	arrKUU.resize (numKeys()-2);
	int i,k;
	for (i = 0; i < numKeys()-2; i++)
		arrKUU[i] = (i+1);

	HistoryAR history;	

	// at most N-2 steps can be done, because there won't be unused knots left
	while (!arrKUU.empty())
	{
#ifdef _DEBUG
		{
			LogToDbg("% 2d knots, error %g, knots:[", m_arrKnots.size(), pQuality->fRtotal);
			for (int j = 0; j < (int)m_arrKnots.size(); ++j)
				LogToDbg(" %.2f", m_arrKnots[j]);
			LogToDbg(" ]\n");
		}
#endif

		pSpline = newSpline (pQuality);
		history.push_front (HistoryARElement(pQuality->fRtotal, pSpline->numKnots()));

		if (pQuality->fRtotal < fTotalError)
			break;

		// Knot Insertion Repeat count
		int numKIRepeat = 1;
		if (history.size() >= 2)
		{
			// history[0] is the last, 1 is the one before
			float fDeltaError = float(history[1].fError - history[0].fError);
			int nDeltaKN = history[0].numKnots - history[1].numKnots;
			if (fDeltaError > fTotalError*arrKUU.size())
			{
				// actually , we have to take into account the delta-steps, but..
				numKIRepeat = (int)/* nDeltaKN */ ((history[0].fError - fTotalError) / fDeltaError);
			}
			else
			{
				numKIRepeat = arrKUU.size()/2;
			}
		}

		tforceRangeIncl<int> (numKIRepeat, 1, arrKUU.size());
		if (numKIRepeat > ctx.nMaxKIN)
			numKIRepeat = ctx.nMaxKIN;

		// todo: optimize this cycle
		while (numKIRepeat--)
		{
			// calculate the most errorneous samples
			std::vector<int>::iterator itP = arrKUU.end();
			double fE = -1;

			for (std::vector<int>::iterator it = arrKUU.begin(); it != arrKUU.end(); ++it)
			{
				k = *it;
				double e = DLength(pSpline->getValue(m_arrKeys[k].t) - m_arrKeys[k].pt);
		
				assert (*std::lower_bound(m_arrKnots.begin(), m_arrKnots.end(), m_arrKeys[k].t) > m_arrKeys[k].t);

				if (itP == arrKUU.end() || fE < e) 
				{
					fE = e;
					itP = it;
				}
			}

			// found the error, now add the knot
			k = *itP;
			arrKUU.erase(itP);
			FloatArray::iterator itK = std::lower_bound(m_arrKnots.begin(), m_arrKnots.end(), m_arrKeys[k].t);
			m_arrKnots.insert (itK, m_arrKeys[k].t);
		}
	}
}


// finds the first sample that's >= the given time
int BSplineApproximator::findSample (float t)
{
	KeyArray::iterator pKey = std::upper_bound (m_arrKeys.begin(), m_arrKeys.end(), t, KeyTimeSort());
  return pKey - m_arrKeys.begin();
}


// initializes knots so that they form a full equation that always can fit the data
/*
void BSplineApproximator::initKnotsFull ()
{
	m_arrKnots.resize (numKeys());
	for (int i = 0; i < numKeys(); ++i)
		m_arrKnots[i] = m_arrKeys[i].t;
}
*/
// automatic reduction from full-power spline
void BSplineApproximator::newSplineAutoKDFull (OptCtxKD& ctx, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	LogToDbg("Constructing full spline from %d samples.", numKeys());
	initKnotsFull ();
	pSpline = newSpline(pQuality);
	LogToDbg("Done; Optimizing.");
	optimizeKD(ctx, pSpline, pQuality);
}


///////////////////////////////////////////////////////////////////////////
// Creates a spline using lower-degree piecewise approximant strategy:
// searches with binary search for a lower-degree spline, from which deduces
// the knots and makes the resulting spline lower error than specified.
// THen , may run the knots removal
void BSplineApproximator::newSplineAutoLD(OptCtxLD& ctx, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	// piecewise approximation
	CB3PApproximation appr;
	constructB3PApproximation(appr, m_nDegree, ctx.fErrLD);
	initKnotsFromLDS (appr);

	newSplineLD(ctx, pSpline, pQuality);

	// refinement cycles
	float fStep = 2;
	//for (int i = 0; i < 4; ++i)
	{
		int j;
		/*
		for(j = 0; j < 4 && pQuality->fTargetF > ctx.fErrTo; ++j)
		{

			// try to decrease the error and retry
			ctx.fErrLD /= fStep;
			ctx.fErrLD = constructB3PApproximation(appr, m_nDegree, ctx.fErrLD);
			initKnotsFromLDS (appr);
			newSplineLD (ctx, pSpline, pQuality);
		}
		LogToDbg("LD:dec err=(%g,%g), lderr=%g\n", pQuality->fTargetF, pQuality->fRtotal, ctx.fErrLD);
		fStep = (float)sqrt(fStep);
		*/

		BSplineVec3d_AutoPtr pLKGSpline = pSpline;
		Quality qLKG = *pQuality;
		for(j = 0; j < 4 && pQuality->fRtotal < ctx.fErrSingleTarget*numKeys(); ++j)
		{
			ctx.fErrLD *= fStep;
			ctx.fErrLD = constructB3PApproximation(appr, m_nDegree, ctx.fErrLD);
			initKnotsFromLDS(appr);
			newSplineLD(ctx, pSpline, pQuality);
		}
		pSpline = pLKGSpline;
		*pQuality = qLKG;

		fStep = float(sqrt(fStep));
		LogToDbg("LD:inc err=(%g,%g), lderr=%g\n", pQuality->fRtotal,pQuality->fTargetF, ctx.fErrLD);
	}

	if (ctx.bOptimizeKD)
		optimizeKD(ctx, pSpline, pQuality);
}

// initializes knots from the given lower-degree spline
void BSplineApproximator::initKnotsFromLDS (class CB3PApproximation& appr)
{
	int numPieces = appr.numPieces();
	
	// number of pieces the knots subdivide each piece of the approximant
	int numSubdiv = m_nDegree - 1;

	m_arrKnots.clear();
	m_arrKnots.reserve (numSubdiv * appr.numPieces()+1);
	// index of the sample, on each cycle it denotes the end of the previous piece
	int nSample = 0;
	for (int nPiece = 0; nPiece < numPieces; ++nPiece)
	{
		const CBezierVec3dApproximation& piece = appr.getPiece(nPiece);
		int numSubsamples = appr.getPieceSize (nPiece);

		if (numSubsamples <= 2)
			break;
		
		if (!nPiece)
			m_arrKnots.push_back(m_arrKeys[0].t);
		else
		{
			m_arrKnots.push_back((m_arrKeys[nSample].t+m_arrKeys[nSample+1].t)/2);
			++nSample;
		}

		float fTimeStart = m_arrKeys[nSample].t;
		float fTimeEnd = m_arrKeys[nSample + numSubsamples-1].t;
		
		int numCurSubdiv = tmin (numSubdiv, numSubsamples);
		for (int nKnot = 1; nKnot < numCurSubdiv; ++nKnot)
		{
			m_arrKnots.push_back(fTimeStart + nKnot * (fTimeEnd-fTimeStart) / numCurSubdiv);
		}
		nSample += numSubsamples-1;
	}

	m_arrKnots.push_back(m_arrKeys[numKeys()-1].t);
	assert (m_arrKnots.back() != m_arrKnots[m_arrKnots.size()-2]);
}


///////////////////////////////////////////////////////////////////////////
// creates a spline using 3-step refinement strategy:
// initial distribution, addition of knots, deletion of knots
// automatically estimates the number of knots needed
void BSplineApproximator::newSplineAuto3Step (OptCtx3Step& ctx, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	int numSamples = numKeys();
	int numMaxKnots = (numSamples-m_nDegree+1);

	// TODO: knot initial estimation algorithm
	int nKnots;
	if (pSpline)
	{
		initKnotsFromSpline (pSpline);
		nKnots = m_arrKnots.size();
	}
	else
	{
		nKnots = tmax<int>(m_nDegree+1,tabs((int)((numSamples/60.0)*estimateKnotDistributionIntegral())));
		if(nKnots > numMaxKnots/m_nDegree) // don't start with higher values	initKnotsAuto3Step (ctx, nKnots);
			nKnots = numMaxKnots/m_nDegree;

		initKnotsAuto3StepDD(ctx, nKnots);
	}

	int nKnotDelta = tmax(1, nKnots/8);
	
	LogToDbg("Starting with %d knots over %d samples. ", nKnots, numKeys());
	
	bool bWasRefining = false; // did refining happened?

	if (ctx.bOptimizeNR)
	while (true)
	{
		newSplineNewtonRaphson (ctx.nMaxIterationsNR, pSpline, pQuality);

		if (pQuality->fTargetF < ctx.fErrFrom)
		{
			if (bWasRefining || pSpline->numKnots() <= 2)
				break; // already optimal
			nKnots -= nKnotDelta;
			initKnotsAuto3StepDD (ctx, nKnots);

			pSpline = NULL;
			++nKnotDelta;
		}
		else
		if (pQuality->fTargetF > ctx.fErrFrom)
		{
			if (!bWasRefining)
				nKnotDelta = 1;
			bWasRefining = true;

			if (pSpline->numCPs() >= numMaxKnots)
				break; // already optimal

			LogToDbg("Turning knots: %d->%d ", nKnots, nKnots+nKnotDelta);

			int nOldKnots = nKnots;
			nKnots += nKnotDelta;

			initKnotsAuto3StepDD(ctx, nKnots);
			if (nOldKnots >= nKnots)
				break;
			++nKnotDelta;
		}
		else
			break;
	}
	else
	{
		pSpline = newSpline (pQuality);
	}

	// last known good spline
	if (ctx.bOptimizeKD)
		optimizeKD (ctx, pSpline, pQuality);
}

// optimizes the spline to the given values of error to,
// with knot removal
void BSplineApproximator::optimizeKD(OptCtxKD& ctx, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	BSplineVec3d_AutoPtr pLKGSpline = pSpline;
	BSplineApproximator::Quality qLKG = *pQuality;

	while (pSpline->numKnots() > 2 && pQuality->fRtotal < ctx.fErrSingleTarget*numKeys())
	{
		removeLeastContributingKnot(pSpline);
		if (ctx.bOptimizeNR && (float(numKeys())/pSpline->numKnots()) >= ctx.fMinSPKForNR)
			newSplineNewtonRaphson (ctx.nMaxIterationsNR, pSpline, pQuality);
		else
			pSpline = newSpline (pQuality);

		if (pQuality->fRtotal < ctx.fErrSingleTarget*numKeys())
		{
			pLKGSpline = pSpline;
			qLKG = *pQuality;
		}
	}
	pSpline = pLKGSpline;
	*pQuality = qLKG;
}

///////////////////////////////////////////////////////////////////////////
// Creates a spline using n-th derivative uniform initial knot distribution
// and Newton-Raphson algorithm with ... modification to limit jumps for
// optimization of knot positions
void BSplineApproximator::newSplineNewtonRaphson (int nMaxIterations, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	//initKnotsDerivDistributed(nNumKnots);
	int nNumKnots = m_arrKnots.size();
	// the knot distribution function must have already been called

	if (!pSpline || pSpline->numKnots() != m_arrKnots.size())
		pSpline = newSpline (pQuality);

	if (nNumKnots <= 2)
	{
		// there are no inner knots, nothing to optimize
		return;
	}

	Quality qCur = *pQuality;
	BSplineVec3d_AutoPtr pCurSpline = pSpline;

	CBigMatrix mxBase (m_arrKnots.size() - 2, tmin<unsigned>(25,m_arrKnots.size() - 2));
	// Hessian ?
	CBigMatrix mxH; //(3*numKeys(), mxBase.GetWidth());
	//mxBase.SetIdentity();
	initRandomBase(mxBase);

	// max times the basis will be changed before resorting to another set of knots
	int nMaxBasisChange = 2;
	int nMaxKnotChange = tmax<unsigned>(1, (numKeys()/(m_arrKnots.size()*(m_nDegree+1))));

	int nBasisChanged = 0; // number of times basis has changed
	int nKnotsChanged = 0;

	LogToDbg("%d knots. ", nNumKnots);
	for (int nIter = 0; nKnotsChanged < nMaxKnotChange && nIter < nMaxIterations; nIter++)
	{
		initRandomBase(mxBase);

		LogToDbg ("% 3d.", nIter);

		// for each free knot, find the derivative of each data point on this knot
		Vec3dArray arrSplineSamples;
		splineNR_Hessian (mxBase, mxH, pCurSpline, arrSplineSamples, &qCur);

		// pseudoinverse of the Hessian, used to compute the ort coefficients 
		// out of REQUIRED DELTAS of y (samples)
		CBigMatrix mxHInv;
		double fStability = mxH.ComposePseudoInverse(mxHInv, 1e-5);
		
		// otherwise the matrix is unstable, generate another base
		if (fStability > 5e-7 && mxHInv.GetWidth() == numKeys()*3)
		{
			if (!splineNR_MoveKnots (mxBase, mxHInv, arrSplineSamples, pCurSpline, &qCur))
			{
				++nBasisChanged;
				LogToDbg ("New Random Base(%d). ", nBasisChanged);
				initRandomBase (mxBase);

				if (nBasisChanged > nMaxBasisChange)
				{
					LogToDbg ("New Random Knots. ");
					initKnotsRandom(nNumKnots);
					++nKnotsChanged;
#ifdef _DEBUG
					static int nCounter = 0;
					++nCounter;
					for (int i = 1; i < nNumKnots; ++i)
						assert (m_arrKnots[i] > m_arrKnots[i-1]);
#endif

					pCurSpline = newSpline (&qCur);
				}
			}
			else
			{
				nKnotsChanged = 0;
				nBasisChanged = 0;
			}

			if (qCur.fTargetF < pQuality->fTargetF)
			{
				*pQuality = qCur;
				pSpline = pCurSpline;
			}
		}
		else
			LogToDbg("Unstable matrix (stab. %g width %d). ", fStability, mxHInv.GetWidth());

		LogToDbg("Quality %.5f*(%.5fb+1)=%.5f, mu %.4f, cur %.5f\n", pQuality->fRtotal, pQuality->fKnotPenalty, pQuality->fTargetF, m_fStepNRmu, qCur.fTargetF);
	}
}

////////////////////////////////////////////////////////////////////////////
// creates a spline using n-th derivative uniform initial knot distribution
// and Newton-Raphson algorithm with ... modification to limit jumps for
// optimization of knot positions
/*
bool BSplineApproximator::refineSplineNewtonRaphson (
	BSplineVec3d_AutoPtr& pSpline,
	Quality* pQuality)
{
	initKnotsFromSpline (pSpline);
	int nNumKnots = m_arrKnots.size();

	// Hessian ?
	CBigMatrix mxBase (m_arrKnots.size() - 2,m_arrKnots.size() - 2);
	CBigMatrix mxH; //(3*numKeys(), mxBase.GetWidth());
	initRandomBase(mxBase);

	// for each free knot, find the derivative of each data point on this knot
	Vec3dArray arrSplineSamples;
	splineNR_Hessian(mxBase, mxH, pSpline, arrSplineSamples, pQuality);

	// pseudoinverse of the Hessian, used to compute the ort coefficients 
	// out of REQUIRED DELTAS of y (samples)
	CBigMatrix mxHInv;
	double fStability = mxH.ComposePseudoInverse(mxHInv, 1e-5);
	
	// otherwise the matrix is unstable, generate another base
	if (fStability > 5e-7 && mxHInv.GetWidth() == numKeys()*3)
	{
		if (!splineNR_MoveKnots (mxBase, mxHInv, arrSplineSamples, pSpline, pQuality))
			return false;
	}
	return true;
}
*/

//////////////////////////////////////////////////////////////
// copies the array of knots from the given spline knots
void BSplineApproximator::initKnotsFromSpline (BSplineVec3d* pSpline)
{
	m_arrKnots.resize (pSpline->numKnots());
	for (int i = 0; i < pSpline->numKnots(); ++i)
		m_arrKnots[i] = pSpline->getKnotTime(i);
}


// solves the system of equations:
// ax^2+bx+c = F(x)
// F(0) = e0
// F(x1) = e1
// F(x2) = e2
// returns X: F(X)->min(F(x))
double ParabolicMuApproximation (double e0, double e1, double e2, double x1, double x2)
{
	double sqr_x1 = x1*x1, sqr_x2 = x2*x2;
	double a = -(((e2-e0)*x1 + (e0-e1)*x2) / (x1*(x1*x2 - sqr_x2)));
	double b= -((e0-e2)*sqr_x1 - (e1-e0)*sqr_x2) / (sqr_x1*x2 - x1*sqr_x2);

	if (a > 0)
	{
		// the parabola has a minimum
		return -b/(2*a);
	}
	else
		return 0;
}

struct MuError
{
	float fMu;
	double fError;
	MuError () {}
	MuError(float mu, double error):
		fMu(mu), fError(error)
	{}
};

///////////////////////////////////////////////////////////////
// moves the knots of the current spline, according to the
// anti-hessian computed in the previous step
// out of the given basis; also, the array of samples from
// the previous spline must be in place
bool BSplineApproximator::splineNR_MoveKnots (
	CBigMatrix& mxBase,
	const CBigMatrix& mxHInv,
	Vec3dArray& arrSplineSamples,
	BSplineVec3d_AutoPtr& pSpline,
	Quality* pQuality)
{
	assert (mxBase.GetHeight() == m_arrKnots.size() - 2);
	//assert (mxBase.GetWidth() <= mxBase.GetHeight());
	assert (mxHInv.GetHeight() == mxBase.GetWidth());
	assert (mxHInv.GetWidth() == numKeys()*3);

	DoubleArray arrOrts;
	// predicted by the algorithm optimal values to be added to the knots..
	arrOrts.resize (mxBase.GetWidth(), 0);

	// feed the required deltas to the anti-hessian
	// and get the N-R prediction for the best orts to get the best fit to the data
	for (int nSample = 0; nSample < numKeys(); ++nSample)
		for (int nCoord = 0; nCoord < 3; ++nCoord)
		{
			float fDelta = m_arrKeys[nSample].pt[nCoord] - arrSplineSamples[nSample][nCoord];
			for (int nOrt = 0; nOrt < mxBase.GetWidth(); ++nOrt)
			{
				arrOrts[nOrt] += mxHInv[nOrt][nSample*3+nCoord] * fDelta;
			}
		}

	clampKnotDelta (mxBase, arrOrts);

	// now we have the orts to add to receive the best fit.
	FloatArray arrOriginalKnots = m_arrKnots;

	// error history of all the steps
	std::vector<MuError> arrMuErrors;

	// first, try to add full (or almost full) orts
	const float fMinStep = 1/32.0f;
	float fStep = tmax(fMinStep,tmin(1.0f,2*m_fStepNRmu));
	while (fStep > fMinStep)
	{
		// move towards the orts
		for (int nOrt = 0; nOrt < mxBase.GetWidth(); ++nOrt)
			moveInnerKnots (mxBase, nOrt, fStep*arrOrts[nOrt]);

		// calculate the spline
		Quality qNew;
		BSplineVec3d_AutoPtr pNewSpline = newSpline (&qNew);

		if (qNew.fTargetF < pQuality->fTargetF)
		{
			pSpline = pNewSpline;
			*pQuality = qNew;
			arrSplineSamples.clear();
			m_fStepNRmu = fStep;
			return true; // we managed to move forward
		}

		m_arrKnots = arrOriginalKnots;

		arrMuErrors.push_back (MuError(fStep, qNew.fTargetF));
		
		if (arrMuErrors.size() < 2)
			fStep *= 0.5;
		else
		{
			// find a step multiplier between 1/16 and 1/2 that minimizes the approximation of error decrease function
			float fStepAppr = (float)ParabolicMuApproximation(pQuality->fTargetF, arrMuErrors.back().fError, arrMuErrors[arrMuErrors.size()-2].fError, arrMuErrors.back().fMu, arrMuErrors[arrMuErrors.size()-2].fMu);
			tforceRangeIncl(fStepAppr, fStep/16, fStep/2);
			fStep = fStepAppr;
		}
	}

	return false;
}

// given the predicted ort increments and basis functions, clamp the ort increment vector
// to disallow breaknig the knot sequence rules
void BSplineApproximator::clampKnotDelta (const CBigMatrix& mxBase, DoubleArray& arrOrts)
{
	int nIKnot, nOrt;
	// get the influence on the knot vector, vector s
	int numIKnots = mxBase.GetHeight();
	int numOrts  = mxBase.GetWidth();
	assert (numIKnots == m_arrKnots.size()-2);
	assert (numOrts == arrOrts.size());
	DoubleArray arrS;
	arrS.resize (numIKnots, 0);

	// using the principle of linear effect, we apply the boundary rule to the effected vector of knot deltas
	// rather than directly to the ort array
	for (nIKnot = 0; nIKnot < numIKnots; ++nIKnot)
		for (nOrt = 0; nOrt < numOrts; ++nOrt)
			arrS[nIKnot] += arrOrts[nOrt] * mxBase[nIKnot][nOrt];

	// the coefficient on which the ort vector will be multiplied
	double k = 1;
	
	// threshold - should be well in between 0+epsilon and 0.5-epsilon,
	// 0.5 is the most aggressive, 0 is the slowest
	const double fMThresh = 1.0/3.0;
	
	// threshold - should be well near the 0

	for (nIKnot = 0; nIKnot < numIKnots; ++nIKnot)
	{
		if (arrS[nIKnot] > 1e-3)
		{
			// find the right boundary for the knot
			double fMaxS = fMThresh * (m_arrKnots[1 + nIKnot + 1] - m_arrKnots[1 + nIKnot]);
			if (arrS[nIKnot] > fMaxS)
			{
				k = tmin(k, fMaxS / arrS[nIKnot]);
			}
		}
		else
		if (arrS[nIKnot] < -1e-3)
		{
			// find the left boundary of the knot
			double fMinS = fMThresh * (m_arrKnots[1+nIKnot] - m_arrKnots[nIKnot]);
			if (arrS[nIKnot] < fMinS)
			{
				k = tmin(k, fMinS / arrS[nIKnot]);
			}
		}
	}

	if (k < 1)
	{
		for (nOrt = 0; nOrt < numOrts; ++nOrt)
			arrOrts[nOrt] *= k;
	}
}


///////////////////////////////////////////////////////////////
// adds ort nOrt multiplied by k to the array of inner knots
// doesn't let inner knots out and doesn't let a knot penetrate
// the far intervals
void BSplineApproximator::moveInnerKnots (const CBigMatrix& mxBase, int nOrt, double k)
{
	assert (mxBase.GetHeight() == m_arrKnots.size() - 2);

	// min distance to keep between the knots
	float fEpsilon = 1;

	for (unsigned i = 0; i < m_arrKnots.size() - 2; ++i)
	{
		float fMin = m_arrKnots[i] + fEpsilon;
		float fOldValue = m_arrKnots[i+1];
		float fMax = m_arrKnots[i+2] - fEpsilon;

		float fNewValue = fOldValue + float(mxBase[i][nOrt] * k);
		tforceRangeIncl(fNewValue, fMin, fMax);

		m_arrKnots[i+1] = fNewValue;
	}
}

////////////////////////////////////////////////////////////////
// computes spline samples into the given array (upon output,
// the array is of the same size as the sample array)
void BSplineApproximator::computeSplineSamples (
	BSplineVec3d* pSpline,
	Vec3dArray& arrSplineSamples)
{
	arrSplineSamples.resize (numKeys());
	for (int i = 0; i < numKeys(); ++i)
		arrSplineSamples[i] = pSpline->getValue(m_arrKeys[i].t);
}

//////////////////////////////////////////////////////////////////////////
// finds Hessian for the given starting spline, possibly modifying the spline
// also computes the final spline samples
void BSplineApproximator::splineNR_Hessian(const CBigMatrix& mxBase,CBigMatrix& mxH, BSplineVec3d_AutoPtr& pSpline, Vec3dArray& arrSplineSamples, Quality* pQuality)
{
	// each ort affects the delta of each sample, as well as
	// overall knot closeness penalty, actually...
	mxH.Resize(numKeys()*3, mxBase.GetWidth());

	double fStep = 0.1 * tmax(0.01f,m_fStepNRmu);
	FloatArray arrKnotsOrig  = m_arrKnots;

	// these are the samples of the current spline
	computeSplineSamples (pSpline, arrSplineSamples);
	// each new spline's samples
	Vec3dArray arrNewSplineSamples;

	// for each free knot, find the derivative of each data point on this knot
	for (int nOrt = 0; nOrt < mxBase.GetWidth(); ++nOrt)
	{
		// move the knot and possibly change the spline
		moveInnerKnots (mxBase, nOrt, fStep);
		
		Quality qNew;
		BSplineVec3d_AutoPtr pNewSpline = newSpline (&qNew);
		computeSplineSamples(pNewSpline, arrNewSplineSamples);

		// NOTE
		// to account for the knot closeness penalty, we exaggerate the affect of the ort on the deltas

		for (int nSample = 0; nSample < numKeys(); ++nSample)
			for (int nCoord = 0; nCoord < 3; ++nCoord)
				mxH[nSample*3+nCoord][nOrt] = (qNew.fKnotPenaltyBlended/fStep)*(arrNewSplineSamples[nSample][nCoord] - arrSplineSamples[nSample][nCoord]);

		if (qNew.fTargetF/pQuality->fTargetF < 1- 2e-3)
		{
			*pQuality = qNew;
			pSpline = pNewSpline;
			arrSplineSamples.swap(arrNewSplineSamples);
		}
		else
		{
			m_arrKnots = arrKnotsOrig;
		}
	}
}


////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Creates a new spline with knots that are attempted to be distributed according to the changing of the
// original data
////////////////////////////////////////////////////////////////////////////////////////////////////////////
void BSplineApproximator::newSplineDistributed (int nNumKnots, int nMinusDegree, int nMaxIterations, BSplineVec3d_AutoPtr& pSpline, Quality* pQuality)
{
	initKnotsDerivDistributed(nNumKnots, nMinusDegree);

	Quality q;
	if (!pQuality)
		pQuality = &q;

	pSpline = NULL;

	float fStep = 1;

	pSpline = newSpline (pQuality);
	
	if (nNumKnots < (int)m_arrKnots.size())
		nNumKnots = m_arrKnots.size();

	for (int nIter = 0; nIter < nMaxIterations; nIter++)
	{
		// now find the derivative of the residual sum on knot positions
		FloatArray arrDelta; // the derivative approximant
		arrDelta.resize (nNumKnots, 0);

		// try to move k-2 inner knots and record yi's derivatives
		for (int nDir = 1; nDir < nNumKnots - 1; ++nDir)
		{
			Quality qNew;
			m_arrKnots[nDir] += fStep;
			BSplineVec3d_AutoPtr pNewSpline = newSpline (&qNew);
			float fImprovement = arrDelta[nDir] = float(pQuality->fTargetF - qNew.fTargetF);
			if (fImprovement > 0)
			{
				// we improved the spline - leave it as is
				pSpline = pNewSpline;
				*pQuality = qNew;
			}
			else
			{
				// we made the spline worse - record the negative derivative
				// and step back a bit (there might be some improvement there
				m_arrKnots[nDir] -= 1.05f*fStep;
			}
		}

		arrDelta *= Length(arrDelta)*(fStep/5);
		while (1)
		{
			Quality qNew;
			m_arrKnots += arrDelta;
			BSplineVec3d_AutoPtr pNewSpline = newSpline (&qNew);
			float fImprovement = float (pQuality->fTargetF - qNew.fTargetF);
			if (fImprovement > 0)
			{
				pSpline = pNewSpline;
				*pQuality = qNew;
			}
			else
			{
				m_arrKnots -= arrDelta;
				break;
			}
		}
		fStep *= 0.8f;
	}

}

// sets the knot k to time t, summing the error in sum
inline void setKnotTime (float& k, float t, double&fSum)
{
	fSum += tabs(k - t);
	k = t;
}

//////////////////////////////////////////////////////////////////////////
// this moves knots so that they're distributed evently according to the error
// returns the metric of knot movement around
// the array must contain error sum for each interval (numKeys()-1 total)
// NOTE:
//  doesn't do anything if the error sum is close to 0
float BSplineApproximator::distributeKnotsEvenError (FloatArray& arrSumError)
{
	assert (arrSumError.size() == numKeys()-1);

	double fSum = 0;
	int nNumKnots = m_arrKnots.size();

	if (arrSumError.back() - arrSumError.front() < 1e-30)
		return 0;

	setKnotTime (m_arrKnots[0], m_arrKeys[0].t, fSum);

	// the next knot to reach
	float fStep = float(arrSumError.back()/(nNumKnots-1));

	for (int i = 1; i < nNumKnots-1; ++i)
	{
		// search for the correct sample interval to put the next knot in
		float fTargetF = fStep * i;
		FloatArray::iterator it = std::lower_bound (arrSumError.begin(), arrSumError.end()-1, fTargetF);

		float fFBegin = it == arrSumError.begin() ? 0:*(it-1), fFEnd = *it;
		assert (fFBegin <= fTargetF && fTargetF <= fFEnd);
		int nInterval = it - arrSumError.begin();

		float fTime;
		float k = (fTargetF - fFBegin)/(fFEnd-fFBegin);
		fTime = (1-k)*m_arrKeys[nInterval].t + k*m_arrKeys[nInterval+1].t;

		setKnotTime(m_arrKnots[i],fTime,fSum);
	}

	setKnotTime (m_arrKnots.back(), m_arrKeys[numKeys()-1].t, fSum);

#ifdef _DEBUG
	for (unsigned i = 1; i < m_arrKnots.size(); ++i)
		assert (m_arrKnots[i] > m_arrKnots[i-1]);
#endif

	return (float)fSum;
}

//////////////////////////////////////////////////////////////////////////
// calculates the sum of errors of the spline from 0-th to i-th sample and records it into the i-th element of the OUT array
// each sample error is recorded and summed in corresponding array item
void BSplineApproximator::calculateSumError (BSplineVec3d* pSpline, FloatArray& arrSumError)
{
	assert (arrSumError.size () == numKeys());
	double fSum = 0;
	for (int i = 0; i < numKeys(); ++i)
	{
		fSum += DLength(pSpline->getValue (m_arrKeys[i].t)-m_arrKeys[i].pt);
		arrSumError[i] = (float)fSum;
	}
}


////////////////////////////////////////////////////////////////////////////////////////////////////////////
// sets the knots distributed evently in the space of d+1-alpha-th derivative of the approximated function.
// alpha == nMinusDegree
// the approximated function is reconstructed using 2*m_nHalfFilterSize knots around the interval,
// as a polynom of d+1 degree. The value of the derivative in the middle of the interval
// is summed and projected back into the time space to produce knot spacing that is sensitive to the changing
// d+1-alpha-th derivative of the original data
////////////////////////////////////////////////////////////////////////////////////////////////////////////
void BSplineApproximator::initKnotsDerivDistributed (int nNumKnots, int nMinusDegree)
{
	if (nNumKnots < 2)
		nNumKnots = 2;

	// keeps the function F = Integral (pow (derivative, 1/n)), n being d+1
	FloatArray arrF;
	// the value is remembered at the end of the interval
	arrF.resize(numKeys()-1);

	double fLast = 0;
	int i;
	for (i = 0; i < numKeys()-1; ++i)
	{
		Vec3d ptDerivative = estimateDeepDerivative(i);
		fLast += pow(DLength(ptDerivative), 1.0/ (2* (m_nDegree+1)));
		arrF[i] = (float)fLast;
	}

	initKnotsEven (nNumKnots);
	distributeKnotsEvenError(arrF);
}

// deletes the knot that has the least delta function
// copies the knots from the spline, and removes one of them
// returns the contribution of the removed knot, in some metric
float BSplineApproximator::removeLeastContributingKnot (BSplineVec3d* pSpline)
{
	int nKnot = -1; // the best knot to remove
	float fContribution = 0;
	initKnotsFromSpline(pSpline);

	for (int i = 1; i < pSpline->numKnots()-1; ++i)
	{
		float c = pSpline->getDelta(i).Length();
		if (nKnot < 0 || fContribution > c)
		{
			nKnot = i;
			fContribution = c;
		}
	}

	if (nKnot > 0)
	{
		m_arrKnots.erase (m_arrKnots.begin() + nKnot);
		return fContribution;
	}
	else
		return -1;
}

// initializes the knot array so that each knot corresponds to one sample point
void BSplineApproximator::initKnotsFull()
{
	initKnotsEven (numKeys() - m_nDegree + 1);
}


// removes duplicate knots within one sampling interval
void BSplineApproximator::removeKnotsExtra()
{
	assert (m_arrKeys[0].t == m_arrKnots[0]);
	assert (m_arrKeys[numKeys()-1].t == m_arrKnots.back());

	// the next knot must be >= m_arrKeys[nNextAvail].t
	int nNextKnot = 0;
	for (int nNextAvail = 1; nNextAvail < numKeys()-1 && nNextKnot < (int)m_arrKnots.size()-1; ++nNextAvail)
	{
		float tStart = m_arrKeys[nNextAvail-1].t;
		float tEnd = m_arrKeys[nNextAvail].t;
		if (m_arrKnots[nNextKnot] <= tEnd)
		{
			++nNextKnot;
	    while (nNextKnot < (int)m_arrKnots.size()-1 && m_arrKnots[nNextKnot] < tEnd)
				m_arrKnots.erase (m_arrKnots.begin() + nNextKnot);
		}
	}

	// kill the last interval knots
	if (nNextKnot < (int)m_arrKnots.size()-1)
		m_arrKnots.erase (m_arrKnots.begin() + nNextKnot, m_arrKnots.end() - 1);

}

// constructs approximation of the samples
float BSplineApproximator::constructB3PApproximation (CB3PApproximation& approximation, int nDegree, float fError)
{
	Vec3d* pData = new Vec3d[numKeys()];
	for (int i = 0; i < numKeys(); ++i)
		pData[i] = m_arrKeys[i].pt;
	float fResult = approximation.init (pData, numKeys(), nDegree, fError);
	delete []pData;
	return fResult;
}

// sets the smoothness factor - how smooth the spline should be. 0 is no smoothing at all
void BSplineApproximator::setSmoothness (float fSmoothness)
{
	m_fSmoothness = fSmoothness;
}
