#include "StdAfx.h"
#include "DriverD3D.h"


#if defined(XENON)

//namespace
//{
//  CryCriticalSection g_cCBufferSwitchLock;
//}

void CD3D9Renderer::XE_SetGPRState(uint32 VertexShaderCount)
{
  m_RP.m_CurGPRAllocStateCommit = VertexShaderCount;
};

void CD3D9Renderer::XE_CommitGPRState_Int()
{
	//this check is done in the XE_CommitGPRState
	assert(m_RP.m_CurGPRAllocState != m_RP.m_CurGPRAllocStateCommit);

	if (m_RP.m_CurGPRAllocStateCommit == 0)
	{
		//reset
		m_pd3dDevice->SetShaderGPRAllocation(0, 0, 0);
	}
	else
	{
		m_RP.m_CurGPRAllocStateCommit = max((uint32)16, m_RP.m_CurGPRAllocStateCommit);
		int vtxRegCnt = m_RP.m_CurGPRAllocStateCommit;
		assert ( vtxRegCnt<=(GPU_GPRS-16) );
		m_pd3dDevice->SetShaderGPRAllocation(0, vtxRegCnt, GPU_GPRS-vtxRegCnt);
	}
	m_RP.m_CurGPRAllocState = m_RP.m_CurGPRAllocStateCommit;
}

//native zpass - can be used if FX_ProcessShadows() does not use stencil
/*
#ifdef XENON
    if (CV_d3d9_predicatedtiling && bLighting) //skip shadowgen
    {
      XE_ZScene(true);
    }
    FX_ProcessShadows(EFSLIST_GENERAL, 0);
    FX_ProcessShadows(EFSLIST_GENERAL, 1);
#endif
#if defined(XENON)
    if (CV_d3d9_predicatedtiling && bLighting) //skip shadowgen
    {
      //m_pd3dDevice->BeginZPass(0);
      //enable rendering during z pass
      m_pd3dDevice->SetPredication( 0 );
    }
#endif

    FX_ProcessRenderList(EFSLIST_GENERAL, 1, RenderFunc, bLighting);         // Sorted list without preprocess

#ifdef XENON
    if (CV_d3d9_predicatedtiling && bLighting) //skip shadowgen
    {
      //disable rendering during zpass
      m_pd3dDevice->SetPredication( D3DPRED_ALL_RENDER );
    }
#endif


#if defined(XENON)
    if (CV_d3d9_predicatedtiling && bLighting) //skip shadowgen
    {
      XE_ZScene(false);
    }
#endif
*/

void CD3D9Renderer::XE_GenerateZMaps()
{
  int nZBuffWidth = gcpRendD3D->GetWidth(); //m_d3dsdBackBuffer.Width;
  int nZBuffHeight = gcpRendD3D->GetHeight(); //m_d3dsdBackBuffer.Height;

  ETEX_Format eTFZ = eTF_DEPTH24; //CTexture::s_eTFZ;
  uint32 nFlags =  FT_DONT_STREAM | FT_USAGE_RENDERTARGET | FT_DONT_RELEASE | FT_USAGE_PREDICATED_TILING;
  //uint32 nFlags =  FT_DONT_STREAM | FT_USAGE_RENDERTARGET | FT_DONT_RELEASE | FT_USAGE_PREDICATED_TILING;
  if (CRenderer::CV_r_fsaa)
    nFlags |= FT_USAGE_FSAA;

  if (!CTexture::s_ptexZTarget)
    CTexture::s_ptexZTarget = CTexture::CreateTextureObject("$ZTarget", nZBuffWidth, nZBuffHeight, 1, eTT_2D, nFlags, eTFZ);


  //invalidate texture
  CTexture::s_ptexZTarget->m_nFlags = nFlags;
  CTexture::s_ptexZTarget->Invalidate(nZBuffWidth, nZBuffHeight, eTFZ);

  if (!(CTexture::s_ptexZTarget->GetDevTexture()))
  {
    byte *pData[6];
    for (int i=0; i<6; i++)
    {
      pData[i] = 0;
    }
    CTexture::s_ptexZTarget->CreateDeviceTexture(pData);
    CTexture::s_ptexZTarget->PostCreate();
  }
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

bool CD3D9Renderer::XE_ZPrepassScene(bool bEnable, bool bClearZBuffer)
{
	bool bUseNativeDepth = USE_NATIVE_DEPTH || CV_r_predicatedtiling;

	if (bEnable)
	{
		if (m_LogFile)
			Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " +++ Start Z-prepass scene +++ \n");
		int nWidth = gcpRendD3D->GetWidth(); //m_d3dsdBackBuffer.Width;
		int nHeight = gcpRendD3D->GetHeight(); //m_d3dsdBackBuffer.Height;

		if (!bUseNativeDepth)
		{
			if (!CTexture::s_ptexZTarget
				|| CTexture::s_ptexZTarget->IsFSAAChanged()
				|| CTexture::s_ptexZTarget->GetDstFormat() != CTexture::s_eTFZ 
				|| CTexture::s_ptexZTarget->GetWidth() != nWidth
				|| CTexture::s_ptexZTarget->GetHeight() != nHeight)
				CTexture::GenerateZMaps();
		}
		else
			XE_GenerateZMaps();

		EF_SetState(GS_DEPTHWRITE);
		//EF_ClearBuffers(FRT_CLEAR_COLOR, NULL);

		RT_SetViewport(0, 0, GetWidth(), GetHeight());

		if (bClearZBuffer)
		{
			const float fDefaultPowFactor = 16.0f / 255.0f;
			const ColorF cDepthClear(0.0f, (m_RP.m_FSAAData.Type? 1.0f :  0.0f), 0.0f, (!CRenderer::CV_r_deferredshading)? 0.0f : fDefaultPowFactor);
			EF_ClearBuffers(FRT_CLEAR_DEPTH|FRT_CLEAR_STENCIL, &cDepthClear);
		}

		m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags |= RBPF_ZPASS;
		m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 |= RBPF2_DISABLECOLORWRITES|RBPF2_ZPREPASS;
		if (CTexture::s_eTFZ == eTF_R32F || CTexture::s_eTFZ == eTF_G16R16F || CTexture::s_eTFZ == eTF_A16B16G16R16F)
			m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 |= RBPF2_NOALPHABLEND|RBPF2_NOALPHATEST;
	}
	else
	{
		if (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_ZPASS)
		{
			if (m_LogFile)
				Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " +++ End Z-prepass scene +++ \n");

			RT_SetViewport(0, 0, GetWidth(), GetHeight());
			m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags &= ~RBPF_ZPASS;
			m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 &= ~(RBPF2_DISABLECOLORWRITES|RBPF2_ZPREPASS);
			if (CTexture::s_eTFZ == eTF_G16R16F || CTexture::s_eTFZ == eTF_R32F || CTexture::s_eTFZ == eTF_A16B16G16R16F)
				m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 &= ~(RBPF2_NOALPHABLEND | RBPF2_NOALPHATEST);
		}
		else
		{
			//to fix destroy ZMaps if predicated tiling is enabled
			if (!CV_r_usezpass && !bUseNativeDepth)
				CTexture::DestroyZMaps();
		}
	}

	return false;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////

//#define USE_TILE_Z_PREDICTION
bool CD3D9Renderer::XE_ZScene(bool bEnable, bool bClearZBuffer)
{
  bool bUseNativeDepth = USE_NATIVE_DEPTH || CV_r_predicatedtiling;

  if (bEnable)
  {
    if (m_LogFile)
      Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " +++ Start Z scene +++ \n");
    int nWidth = gcpRendD3D->GetWidth(); //m_d3dsdBackBuffer.Width;
    int nHeight = gcpRendD3D->GetHeight(); //m_d3dsdBackBuffer.Height;

    if (!bUseNativeDepth)
    {
      if (!CTexture::s_ptexZTarget
        || CTexture::s_ptexZTarget->IsFSAAChanged()
        || CTexture::s_ptexZTarget->GetDstFormat() != CTexture::s_eTFZ 
        || CTexture::s_ptexZTarget->GetWidth() != nWidth
        || CTexture::s_ptexZTarget->GetHeight() != nHeight)
        CTexture::GenerateZMaps();
    }
    else
      XE_GenerateZMaps();

    EF_SetState(GS_DEPTHWRITE);
    EF_ClearBuffers(FRT_CLEAR_COLOR, NULL);

//////////////////////////////////////////////////////////////////////////
    /*if (m_LogFile)
      Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " Curr frame ID:%i, Text Frame ID:%i \n", GetFrameID(false), CTexture::s_ptexZTarget->m_nUpdateFrameID);

    assert( m_RP.m_TI[m_RP.m_nProcessThreadID].m_nFrameUpdateID == (CTexture::s_ptexZTarget->m_nUpdateFrameID+1) );*/
//////////////////////////////////////////////////////////////////////////

    // Set float render target for Z frame buffer
    if (!bUseNativeDepth)
    {
      FX_PushRenderTarget(0, CTexture::s_ptexZTarget, &m_DepthBufferOrigFSAA);

      if( CRenderer::CV_r_deferredshading )      
      {
        // Note: Current initial version will not work with FSAA      
        FX_PushRenderTarget(1, CTexture::s_ptexSceneNormalsMap, NULL);
				m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[ HWSR_DEFERRED_SHADING ];
      }
    }
    else
    if( CRenderer::CV_r_deferredshading )      
    {
      // Note: Current initial version will not work with FSAA      
      FX_PushRenderTarget(0, CTexture::s_ptexSceneNormalsMap, &m_DepthBufferOrigFSAA);
			m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[ HWSR_DEFERRED_SHADING ];
    }

    RT_SetViewport(0, 0, GetWidth(), GetHeight());

    if (bClearZBuffer)
    {
      const float fDefaultPowFactor = 16.0f / 255.0f;
      const ColorF cDepthClear(0.0f, (m_RP.m_FSAAData.Type? 1.0f :  0.0f), 0.0f, (!CRenderer::CV_r_deferredshading)? 0.0f : fDefaultPowFactor);
			uint32 nClearFlags = FRT_CLEAR_COLOR|FRT_CLEAR_IMMEDIATE;
			if( !CV_r_ZPrePass )
				nClearFlags |= FRT_CLEAR_DEPTH|FRT_CLEAR_STENCIL;
      EF_ClearBuffers(nClearFlags, &cDepthClear);
    }

    m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags |= RBPF_ZPASS;
    if (CTexture::s_eTFZ == eTF_R32F || CTexture::s_eTFZ == eTF_G16R16F || CTexture::s_eTFZ == eTF_A16B16G16R16F)
		{
      m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 |= RBPF2_NOALPHABLEND;
#if !defined(XENON) && !defined(PS3)
			m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 |= RBPF2_NOALPHATEST;
#else
			m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 &= ~RBPF2_NOALPHATEST;
#endif
		}

    //if (bUseNativeDepth)
    //  m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 &= ~RBPF2_NOALPHATEST;
  }
  else
  {
    if (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_ZPASS)
    {
      if (m_LogFile)
        Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " +++ End Z scene +++ \n");
      //////////////////////////////////////////////////////////////////////////
      //assert(CTexture::s_ptexZTarget->m_bResolved == false);
      //////////////////////////////////////////////////////////////////////////
      if (!bUseNativeDepth)
      {
        FX_PopRenderTarget(0);
        if( CRenderer::CV_r_deferredshading )
        {
          FX_PopRenderTarget(1);
        }
      }
      else
      if( CRenderer::CV_r_deferredshading )
      {
        FX_PopRenderTarget(0);
      }

      //Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " Curr frame ID:%i, Text Frame ID:%i \n", GetFrameID(false), CTexture::s_ptexZTarget->m_nUpdateFrameID);

      //if (!bUseNativeDepth)
      if( CRenderer::CV_r_deferredshading && bUseNativeDepth)
      {
        const uint32 nCBufferFlags = FT_USAGE_RENDERTARGET | FT_DONT_STREAM |  FT_DONT_RELEASE | FT_USAGE_READBACK;
        const int nZResolve = 32;
        CTexture::s_ptexZTarget->SetResolved( false );
        CTexture::s_ptexZTarget->Resolve( nZResolve );

        //prepare occlusion buffer
        if ( (m_FenceOcclusionReady==0L || !(m_pd3dDevice->IsFencePending(m_FenceOcclusionReady))) )
        {
          //AUTO_LOCK(g_cCBufferSwitchLock);
          //swap buffer
          //int nCurProcessZOcclusion = m_RP.m_nZOcclusionProcess;
          //m_RP.m_nZOcclusionProcess = m_RP.m_nZOcclusionReady;
          //m_RP.m_nZOcclusionReady = nCurProcessZOcclusion;
          m_RP.m_nZOcclusionReady = 1;
          m_RP.m_nZOcclusionProcess = 0;

					if (m_RP.m_newOcclusionCamera.IsValid())
						m_RP.m_oldOcclusionCamera = m_RP.m_newOcclusionCamera;

          Matrix44 mCurView,mCurProj;
          GetModelViewMatrix(reinterpret_cast<f32*>(&mCurView));
          GetProjectionMatrix(reinterpret_cast<f32*>(&mCurProj));

          m_RP.m_newOcclusionCamera = mCurView*mCurProj;


          ETEX_Format etfCBuffer = eTF_DEPTH24;

          int nZBuffX = CTexture::s_ptexZTarget->GetWidth();
          int nZBuffY = CTexture::s_ptexZTarget->GetHeight();
          assert(nZBuffX>0 && nZBuffY>0);
          //if (nZBuffX<=0 || nZBuffY<=0)
          //  return false;

          //////////////////////////////////////////////////////////////////////////
          if (CTexture::s_ptexZOcclusion[m_RP.m_nZOcclusionReady] == NULL)
            CTexture::s_ptexZOcclusion[m_RP.m_nZOcclusionReady] = CTexture::CreateTextureObject("$ZOcclusionBufferReady", nZBuffX>>1, nZBuffY>>1, 1, eTT_2D, nCBufferFlags, etfCBuffer);

          CTexture* pCurZOcclusion = CTexture::s_ptexZOcclusion[m_RP.m_nZOcclusionReady];

          //invalidate texture
          pCurZOcclusion->m_nFlags = nCBufferFlags; 
          pCurZOcclusion->Invalidate((nZBuffX>>1), (nZBuffY>>1), etfCBuffer);

          if (!CTexture::IsTextureExist(pCurZOcclusion))
          {
            byte *pData[6];
            for (int i=0; i<6; i++)
            {
              pData[i] = 0;
            }
            //TD make separate func for creating render targets
            ETEX_Format eTF = pCurZOcclusion->ClosestFormatSupported(etfCBuffer);

            pCurZOcclusion->RT_CreateDeviceTexture(pData);
            pCurZOcclusion->PostCreate();
            //return false;
          }
          //////////////////////////////////////////////////////////////////////////

          if (CTexture::s_ptexZOcclusion[m_RP.m_nZOcclusionProcess] == NULL)
            CTexture::s_ptexZOcclusion[m_RP.m_nZOcclusionProcess] = CTexture::CreateTextureObject("$ZOcclusionBufferProcess", nZBuffX>>1, nZBuffY>>1, 1, eTT_2D, nCBufferFlags, etfCBuffer);

          pCurZOcclusion = CTexture::s_ptexZOcclusion[m_RP.m_nZOcclusionProcess];

          if (!CTexture::IsTextureExist(pCurZOcclusion))
          {
            //invalidate texture
            pCurZOcclusion->m_nFlags = nCBufferFlags; 
            pCurZOcclusion->Invalidate((nZBuffX>>1), (nZBuffY>>1), etfCBuffer);

            byte *pData[6];
            for (int i=0; i<6; i++)
            {
              pData[i] = 0;
            }
            //TD make separate func for creating render targets
            ETEX_Format eTF = pCurZOcclusion->ClosestFormatSupported(etfCBuffer);

            pCurZOcclusion->RT_CreateDeviceTexture(pData);
            pCurZOcclusion->PostCreate();
            //return false;
          }

          if (CTexture::IsTextureExist(pCurZOcclusion))
          {
						PROFILE_LABEL_PUSH( "COVERAGE_BUFFER" );

            //pCurZOcclusion->SetResolved( false );
            //pCurZOcclusion->Resolve( nZResolve );
            //create max chain
            FX_ShadowBlur(0.0f, NULL, CTexture::s_ptexZOcclusion[0], 9, true, CTexture::s_ptexZOcclusion[1]); //pCurZOcclusion
            m_FenceOcclusionReady = m_pd3dDevice->InsertFence();

						PROFILE_LABEL_POP( "COVERAGE_BUFFER" );
          }
        }

      }
      else
        if( !bUseNativeDepth )
        CTexture::s_ptexZTarget->Resolve();
        
      RT_SetViewport(0, 0, GetWidth(), GetHeight());
      m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags &= ~RBPF_ZPASS;
      if (CTexture::s_eTFZ == eTF_G16R16F || CTexture::s_eTFZ == eTF_R32F || CTexture::s_eTFZ == eTF_A16B16G16R16F)
        m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 &= ~(RBPF2_NOALPHABLEND | RBPF2_NOALPHATEST);
    }
    else
    {
      //to fix destroy ZMaps if predicated tiling is enabled
      if (!CV_r_usezpass && !bUseNativeDepth)
        CTexture::DestroyZMaps();
    }
  }

  //////////////////////////////////////////////////////////////////////////
  // native zpass with predicated tiling
  //////////////////////////////////////////////////////////////////////////
  if (bUseNativeDepth && !CV_r_deferredshading)
  {
    if (bEnable)
    {
      FX_Commit(); //force begin predicated tiling

#ifdef USE_TILE_Z_PREDICTION
      //////////////////////////////////////////////////////////////////////////
      //disable stencil state
      EF_SetStencilState(
        STENC_FUNC(FSS_STENCFUNC_ALWAYS) |
        STENCOP_FAIL(FSS_STENCOP_KEEP) |
        STENCOP_ZFAIL(FSS_STENCOP_KEEP) |
        STENCOP_PASS(FSS_STENCOP_KEEP),
        0, 0xFFFFFFFF, 0xFFFFFFFF
        );
      int newState = m_RP.m_CurState;
      newState &= ~GS_STENCIL;
      EF_SetState(newState);
      m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 |=	RBPF2_LOCKSTENCIL;
      //////////////////////////////////////////////////////////////////////////

      m_pd3dDevice->BeginZPass(0);
      m_pd3dDevice->SetPredication( D3DPRED_ALL_Z );
#else
      //disable color writes
      EF_SetState(GS_COLMASK_NONE);
      m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 |= RBPF2_DISABLECOLORWRITES;
#endif

      return true;
    }
    else
    {
      XE_GenerateZMaps();
      CDeviceTexture* pCurZtarget = CTexture::s_ptexZTarget->GetDevTexture();

#ifdef USE_TILE_Z_PREDICTION
      if (m_pCurTarget!=NULL)
        XE_ResolveDepthStencilSurface(pCurZtarget->Get2DTexture(), true);

      m_pd3dDevice->EndZPass();
      //////////////////////////////////////////////////////////////////////////
      //reset stencil lock
      int newState = m_RP.m_CurState;
      newState &= ~GS_STENCIL;
      EF_SetState(newState);
      m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 &= ~RBPF2_LOCKSTENCIL;
      m_pd3dDevice->SetRenderState(D3DRS_STENCILENABLE, FALSE);
      //////////////////////////////////////////////////////////////////////////
      //enable father rendering
      m_pd3dDevice->SetPredication( 0 );
#else
      m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 &= ~RBPF2_DISABLECOLORWRITES;
      XE_ResolveDepthStencilSurface(pCurZtarget);
#endif
      return true;
    }

  }


  return false;
}



bool CD3D9Renderer::XE_PatchTiledVPOS()
{
  // Request ownership of pixel shader constants 4-7
  const DWORD dwVPosOffsetPixelConstantIndex = 32;
  m_pd3dDevice->GpuOwnPixelShaderConstantF( dwVPosOffsetPixelConstantIndex, 4 );
  // Predicated on each tile, set pixel shader constants 4-7 from a temporary buffer allocated from the command buffer.
  CONST TILING_SCENARIO& CurrentScenario = m_pTilingScenarios[m_dwTilingScenarioIndex];
  for( UINT i = 0; i < (UINT)CurrentScenario.dwTileCount; ++i )
  {
    m_pd3dDevice->SetPredication( D3DPRED_TILE_RENDER( i ) );
    // Request an allocation of pixel shader constants from the command buffer.
    D3DVECTOR4* pConstantData = NULL;
    HRESULT hr = m_pd3dDevice->GpuBeginPixelShaderConstantF4( dwVPosOffsetPixelConstantIndex, &pConstantData, 4 );
    if( SUCCEEDED(hr) )
    {
      ZeroMemory( pConstantData, 4 * sizeof(D3DVECTOR4) );

      // Fill in the first constant with the tiling offset for this tile.
      D3DPOINT* pDestPoint = (D3DPOINT*)&CurrentScenario.TilingRects[i];
      pConstantData[0].x = float(pDestPoint->x);
      pConstantData[0].y = float(pDestPoint->y);
      pConstantData[0].z = 0;
      pConstantData[0].w = 0;

      m_pd3dDevice->GpuEndPixelShaderConstantF4();
    }
  } 
  // Restore automatic predication.
  m_pd3dDevice->SetPredication( D3DPRED_ALL_RENDER );

  return true;
}

bool CD3D9Renderer::XE_ResolveDepthStencilSurface(CDeviceTexture *pTex, bool bTileZPred)
{
  //assert(m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 & RBPF2_IN_PREDICATED_TILING);

  D3DVECTOR4 ClearColor = { 0, 0, 0, 0 };

  CONST TILING_SCENARIO& CurrentScenario = m_pTilingScenarios[m_dwTilingScenarioIndex];
	GPUTEXTURE_FETCH_CONSTANT oldFmt;
	const bool bNeedRestore = CTexture::ConvertToResolvableFormat(pTex->Get2DTexture(), &oldFmt);
  for( UINT i = 0; i < (UINT)CurrentScenario.dwTileCount; ++i )
  {
    // Set predication to tile i.
    if(bTileZPred) 
    {
      m_pd3dDevice->SetPredication( D3DPRED_TILE_Z( i ) );
    }

    // Destination point is the upper left corner of the tiling rect.
    D3DPOINT* pDestPoint = (D3DPOINT*)&CurrentScenario.TilingRects[i];


    m_pd3dDevice->Resolve( D3DRESOLVE_DEPTHSTENCIL | D3DRESOLVE_FRAGMENT0,
      &CurrentScenario.TilingRects[i], 
      pTex->Get2DTexture(),
      pDestPoint,
      0, 0,
      &ClearColor,
      1.0f, 0L, NULL );

  }
	if(bNeedRestore)
		CTexture::RestoreFormat(pTex->Get2DTexture(), oldFmt);
  // Restore predication to default.
  if(bTileZPred) 
  {
    m_pd3dDevice->SetPredication( D3DPRED_ALL_Z );
  }

  //m_pd3dDevice->EndTiling( 0, NULL, NULL, &ClearColor, 1.0f, 0L, NULL );
  //////////////////////////////////////////////////////////////////////////
  //m_pd3dDevice->EndTiling(D3DRESOLVE_RENDERTARGET0 | D3DRESOLVE_ALLFRAGMENTS, NULL, pTe
  return true;
}

//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
bool CD3D9Renderer::XE_HiZUpdate()
{
  int nTarget = 0;
  if (m_nRTStackLevel[nTarget]>2)
    return false;

	gRenDev->m_cEF.mfRefreshSystemShader("ShadowBlur", CShaderMan::m_ShaderShadowBlur);

  //////////////////////////////////////////////////////////////////////////
  int prevState = m_RP.m_CurState;
  m_RP.m_pPrevObject = NULL;
  m_RP.m_FrameObject++;
  EF_Scissor(false, 0, 0, 0, 0);
  D3DSetCull(eCULL_None);

  Set2DMode(true, 1, 1);

  // setup screen aligned quad
  float fVertDepth = 0.f;
  SVF_P3F_C4B_T2F pScreenQuad[] =  
  {
    { Vec3(0, 0, fVertDepth), {{~0}}, Vec2(0, 0) },
    { Vec3(0, 1, fVertDepth), {{~0}}, Vec2(0, 1) },
    { Vec3(1, 0, fVertDepth), {{~0}}, Vec2(1, 0) },
  };     

  CTexture::BindNULLFrom(1);

  CShader *pSH = m_cEF.m_ShaderShadowBlur;
  if (!pSH)
  {
    Set2DMode(false, 1, 1);
    return false;
  }

  uint32 nPasses = 0;
  static CCryNameTSCRC TechName("MSAAResample");
  pSH->FXSetTechnique(TechName);
  pSH->FXBegin(&nPasses, FEF_DONTSETTEXTURES | FEF_DONTSETSTATES);

  STexState sPointFilterState = STexState(FILTER_POINT, true);

  EF_SetState(0); //GS_NODEPTHTEST

  //TD
  //m_pd3dDevice->SetRenderTarget( 0, NULL );
  //m_pd3dDevice->SetDepthStencilSurface( m_pDepthRestore4x );

  m_pd3dDevice->SetRenderState( D3DRS_ZENABLE, TRUE );
  m_pd3dDevice->SetRenderState( D3DRS_HIZENABLE, FALSE );
  m_pd3dDevice->SetRenderState( D3DRS_HIZWRITEENABLE, TRUE );
  m_pd3dDevice->SetRenderState( D3DRS_ZFUNC, D3DCMP_NEVER );


  pSH->FXBeginPass(0);
  CHWShader_D3D::mfBindPSNULL();
  //m_pd3dDevice->SetPixelShader( NULL );

  CVertexBuffer pVertexBuffer(pScreenQuad,eVF_P3F_C4B_T2F);
  DrawPrimitives(&pVertexBuffer, 3, R_PRIMV_RECTLIST);
  pSH->FXEndPass();

  //pTarget->SetResolved( false );

  // Restore the depth states
  m_pd3dDevice->SetRenderState( D3DRS_ZFUNC, D3DCMP_LESSEQUAL );
  m_pd3dDevice->SetRenderState( D3DRS_HIZENABLE, TRUE );

  m_pd3dDevice->FlushHiZStencil( D3DFHZS_ASYNCHRONOUS );

//////////////////////////////////////////////////////////////////////////

  m_pd3dDevice->SetRenderState( D3DRS_HIZWRITEENABLE, FALSE );

  //restore all states
  EF_SetState(prevState, -1, -1);
  
  pSH->FXEnd();

  Set2DMode(false, 1, 1);
  //FX_Commit();


  return 
    true;
}

//restore 
bool CD3D9Renderer::XE_MSAAResample(bool bNonSRGBResolve)
{
  int nTarget = 0;
  if (m_nRTStackLevel[nTarget]>1)
    return false;

	gRenDev->m_cEF.mfRefreshSystemShader("ShadowBlur", CShaderMan::m_ShaderShadowBlur);

  SRTStack *pCur = &m_RTStack[nTarget][m_nRTStackLevel[nTarget]];

  //back buffer
  CTexture *pTarget = pCur->m_pTex;

  if (pTarget == NULL)
    return false;

  assert(pCur->m_bWasSetRT == true);
  assert(pCur->m_bWasSetD == true);
  assert (pTarget == CTexture::s_pBackBuffer);
  //////////////////////////////////////////////////////////////////////////
  //resolve current backbuffer
  pTarget->Resolve();

  //////////////////////////////////////////////////////////////////////////
  //MSAA Resample 
  m_RP.m_pPrevObject = NULL;
  m_RP.m_FrameObject++;
  EF_Scissor(false, 0, 0, 0, 0);
  D3DSetCull(eCULL_None);
  int nSizeX = pTarget->GetWidth();
  int nSizeY = pTarget->GetHeight();


  Set2DMode(true, 1, 1);

  // setup screen aligned quad
  float fVertDepth = 0.f;
  SVF_P3F_C4B_T2F pScreenQuad[] =  
  {
    { Vec3(0, 0, fVertDepth), {{~0}}, Vec2(0, 0) },
    { Vec3(0, 1, fVertDepth), {{~0}}, Vec2(0, 1) },
    { Vec3(1, 0, fVertDepth), {{~0}}, Vec2(1, 0) },
    { Vec3(1, 1, fVertDepth), {{~0}}, Vec2(1, 1) },
  };     

  CTexture::BindNULLFrom(1);

  CShader *pSH = m_cEF.m_ShaderShadowBlur;
  if (!pSH)
  {
    Set2DMode(false, 1, 1);
    return false;
  }
  
  //DrawFullScreenQuad();

  uint32 nPasses = 0;
  static CCryNameTSCRC TechName("MSAAResample");
  pSH->FXSetTechnique(TechName);
  pSH->FXBegin(&nPasses, FEF_DONTSETTEXTURES | FEF_DONTSETSTATES);

  STexState sPointFilterState = STexState(FILTER_POINT, true);

  if(bNonSRGBResolve)
  {
    D3DSurface *pRTSurf = pTarget->GetSurface(0, 0);
    GPUTEXTURE_FETCH_CONSTANT& c = pRTSurf->Parent->Format;
    c.SignX = GPUSIGN_UNSIGNED;
    c.SignY = GPUSIGN_UNSIGNED;
    c.SignZ = GPUSIGN_UNSIGNED;
    SAFE_RELEASE(pRTSurf);
  }

  pTarget->Apply(0, CTexture::GetTexState(sPointFilterState)); 
  EF_SetState(GS_NODEPTHTEST); //|GS_NODEPTHTEST

  pSH->FXBeginPass(0);
  CVertexBuffer pVertexBuffer(pScreenQuad,eVF_P3F_C4B_T2F);
  DrawPrimitives(&pVertexBuffer, 4);  //EF_Commit() is called inside here
  pSH->FXEndPass();

  pTarget->SetResolved( false );

  //SetTexture(0);

  pSH->FXEnd();

  Set2DMode(false, 1, 1);
  //FX_Commit();

  if(bNonSRGBResolve)
  {
    D3DSurface *pRTSurf = pTarget->GetSurface(0, 0);
    GPUTEXTURE_FETCH_CONSTANT& c = pRTSurf->Parent->Format;
    c.SignX = GPUSIGN_GAMMA;
    c.SignY = GPUSIGN_GAMMA;
    c.SignZ = GPUSIGN_GAMMA;
    SAFE_RELEASE(pRTSurf);
  }

  return true;
}


bool CD3D9Renderer::XE_HalfScaleMode(bool bEnable, int nMode, bool bManualResolve)
{
  D3DSurface *pRTSurface, *pDSSurface;

  int nTarget = 0;
  //if (m_nRTStackLevel[nTarget]>1)
  //  return false;

  SRTStack *pCur = &m_RTStack[nTarget][m_nRTStackLevel[nTarget]];

  //back buffer
  CTexture *pTarget = pCur->m_pTex;

  if (pTarget == NULL)
    return false;

  //assert(pCur->m_bWasSetRT == true);
  //assert(pCur->m_bWasSetD == true);
  //assert (pTarget == CTexture::s_pBackBuffer);

  D3DMULTISAMPLE_TYPE d3dMS;
  int nWidth, nHeight;
  if (bEnable)
  {
    //m_pd3dDevice->SetRenderState(D3DRS_MULTISAMPLEANTIALIAS, FALSE);
    //m_pd3dDevice->SetRenderState(D3DRS_HALFPIXELOFFSET, TRUE);
    d3dMS = (nMode==4)?D3DMULTISAMPLE_4_SAMPLES:D3DMULTISAMPLE_2_SAMPLES;
    nWidth = (nMode==4)?pTarget->GetWidth()/2:pTarget->GetWidth();
    nHeight = pTarget->GetHeight()/2;
  }
  else
  {
    //m_pd3dDevice->SetRenderState(D3DRS_MULTISAMPLEANTIALIAS, FALSE);
    //m_pd3dDevice->SetRenderState(D3DRS_HALFPIXELOFFSET, FALSE);
    d3dMS = D3DMULTISAMPLE_NONE;
    nWidth = pTarget->GetWidth();
    nHeight = pTarget->GetHeight();
  }

//////////////////////////////////////////////////////////////////////////
  D3DSURFACE_PARAMETERS Parms;
  Parms.Base = 0;
#if _XDK_VER >= 6995
  Parms.HiZFunc = D3DHIZFUNC_DEFAULT;
#endif
  Parms.HierarchicalZBase = 0;
  Parms.ColorExpBias = 0;
  //FX_GetRTDimensions((pTarget->GetFlags() & FT_USAGE_PREDICATED_TILING) != 0, dwTileWidth, dwTileHeight);
  D3DFORMAT d3dFmt = (D3DFORMAT)pTarget->GetPixelFormat()->DeviceFormat;
  //if (bEnable)
  //{
  //  //reset SRGB
  //  d3dFmt = (D3DFORMAT) (d3dFmt & ~(D3DFORMAT_SIGNX_MASK | D3DFORMAT_SIGNY_MASK | D3DFORMAT_SIGNZ_MASK));
  //}

	d3dFmt = GetXenonRenderTargetFormat(d3dFmt, false);

  HRESULT hr = m_pd3dDevice->CreateRenderTarget(nWidth, nHeight, d3dFmt, d3dMS, 0L, FALSE, &pRTSurface, &Parms);
  assert(SUCCEEDED(hr));

//////////////////////////////////////////////////////////////////////////
  if (bEnable)
  {
    D3DSURFACE_PARAMETERS ZParams;

#if XENON_FORCE_720P
		ZParams.Base = (5*1024*1024) / GPU_EDRAM_TILE_SIZE;
#else
		ZParams.Base = (2 * 4 * 1200 * 704 ) / GPU_EDRAM_TILE_SIZE;
#endif

#if _XDK_VER >= 6995
#if defined(INVERT_DEPTH_RANGE)
		ZParams.HiZFunc = D3DHIZFUNC_GREATER_EQUAL;
#else
		ZParams.HiZFunc = D3DHIZFUNC_LESS_EQUAL;
#endif
#endif
    ZParams.HierarchicalZBase = 0;
    ZParams.ColorExpBias = 0;
    //FX_GetRTDimensions(true, nWidth, nHeight);
    hr = m_pd3dDevice->CreateDepthStencilSurface(nWidth, nHeight, m_ZFormat, d3dMS, 0, FALSE, &pDSSurface, &ZParams);
    assert(SUCCEEDED(hr));
  }

  //////////////////////////////////////////////////////////////////////////
  //color target surface
  hr = m_pd3dDevice->SetRenderTarget(0, pRTSurface);
  assert(SUCCEEDED(hr));

  m_pd3dDevice->SetRenderState(D3DRS_MULTISAMPLEANTIALIAS, FALSE);

  SAFE_RELEASE(pCur->m_pTarget);
  pCur->m_pTarget = pRTSurface;

  //depth target surface
  if (!bEnable)
  {
    hr = m_pd3dDevice->SetDepthStencilSurface((D3DSurface*)m_DepthBufferOrig.pSurf);
    assert(SUCCEEDED(hr));
    //release tmp depth buffer only
    SAFE_RELEASE(pCur->m_pDepth);
    pCur->m_pDepth = (D3DSurface*)m_DepthBufferOrig.pSurf;
  }
  else
  {
    hr = m_pd3dDevice->SetDepthStencilSurface(pDSSurface);
    assert(SUCCEEDED(hr));

    pCur->m_pDepth = pDSSurface;
  }

  //disable HiZ
  if (bEnable)
  {
    int st = m_RP.m_CurHiZState;
    st &= ~GS_HIZENABLE;
    EF_SetHiZState(st, m_RP.m_CurState, m_RP.m_CurStencilState);
  }
  else
  {
    int st = m_RP.m_CurHiZState;
    st |= GS_HIZENABLE;
    EF_SetHiZState(st, m_RP.m_CurState, m_RP.m_CurStencilState);
  }

  RT_SetViewport(0, 0, nWidth, nHeight);

	if( !bManualResolve )
		pTarget->SetResolved(false);

  return true;
}


D3DFormat CD3D9Renderer::GetXenonRenderTargetFormat(D3DFormat srcFmt, const bool bHigherPrecision)
{
	switch (srcFmt)
	{
	case D3DFMT_A4R4G4B4:
	case D3DFMT_R5G6B5:
		return D3DFMT_A8R8G8B8;
		// AntonK: don't support signed blending for D3DFMT_Q8W8V8U8 EDRAM surface!
	case D3DFMT_Q8W8V8U8:
		return bHigherPrecision ? D3DFMT_A16B16G16R16_EDRAM : D3DFMT_A8R8G8B8;
	case D3DFMT_R11G11B10_CUSTOM:
	case D3DFMT_R11G11B10:
		return D3DFMT_A2B10G10R10F_EDRAM;
	case D3DFMT_R16F:
		return D3DFMT_R32F;
	case D3DFMT_A2B10G10R10:
	case D3DFMT_A2R10G10B10:
		return D3DFMT_A2B10G10R10F_EDRAM;
	case D3DFMT_A16B16G16R16_UINT_CUSTOM:
	case D3DFMT_A16B16G16R16F_EXPAND:
	case D3DFMT_A16B16G16R16F:
		return bHigherPrecision ? D3DFMT_A16B16G16R16_EDRAM : D3DFMT_A2B10G10R10F_EDRAM;
	case D3DFMT_A16B16G16R16:
		return D3DFMT_A16B16G16R16_EDRAM;
	// G16R16_EDRAM has range [-32;32], however G16R16 texture format has range [0;1]. 
	// Thus it's recommended to use G32R32F instead. Beware of double Resolve() overhead for G32R32F despites of the same destination format!
	//case D3DFMT_G16R16:
		//	return D3DFMT_G16R16_EDRAM;		
	case D3DFMT_G16R16:
		return D3DFMT_G32R32F;
	}

	return srcFmt;
}


bool CD3D9Renderer::XE_SRGBWriteEnable(bool enable)
{
	int nTarget = 0;
	if (m_nRTStackLevel[nTarget]>1)
		return false;

	SRTStack *pCur = &m_RTStack[nTarget][m_nRTStackLevel[nTarget]];

	CTexture *pTarget = pCur->m_pTex;
	if (pTarget == NULL)
		return false;

	//assert(pCur->m_bWasSetRT == true);
	//assert(pCur->m_bWasSetD == true);
	assert (pTarget == CTexture::s_pBackBuffer);

	D3DFORMAT fmt = (D3DFORMAT) pTarget->GetPixelFormat()->DeviceFormat;
	fmt = (D3DFORMAT) (enable ? MAKESRGBFMT(fmt) : ((fmt) & ~(D3DFORMAT_SIGNX_MASK | D3DFORMAT_SIGNY_MASK | D3DFORMAT_SIGNZ_MASK)));

	D3DMULTISAMPLE_TYPE ms = D3DMULTISAMPLE_NONE;
	int width = pTarget->GetWidth();
	int height = pTarget->GetHeight();

	D3DSURFACE_PARAMETERS params;
	params.Base = 0;
#if _XDK_VER >= 6995
	params.HiZFunc = D3DHIZFUNC_DEFAULT;
#endif
	params.HierarchicalZBase = 0;
	params.ColorExpBias = 0;

	D3DSurface* pRTSurface(0);
	HRESULT hr = m_pd3dDevice->CreateRenderTarget(width, height, fmt, ms, 0, FALSE, &pRTSurface, &params);
	assert(SUCCEEDED(hr));

	hr = m_pd3dDevice->SetRenderTarget(0, pRTSurface);
	assert(SUCCEEDED(hr));

	SAFE_RELEASE(pCur->m_pTarget);
	pCur->m_pTarget = pRTSurface;

	RT_SetViewport(0, 0, width, height);
	pTarget->SetResolved(false);

	return true;
}



bool CD3D9Renderer::XE_ResolveRenderTarget(CDeviceTexture *pTex)
{
  assert(m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags2 & RBPF2_IN_PREDICATED_TILING);

  D3DVECTOR4 ClearColor = { 0, 0, 0, 0 };

  CONST TILING_SCENARIO& CurrentScenario = m_pTilingScenarios[m_dwTilingScenarioIndex];
	GPUTEXTURE_FETCH_CONSTANT oldFmt;
	const bool bNeedRestore = CTexture::ConvertToResolvableFormat(pTex->Get2DTexture(), &oldFmt);

  for( UINT i = 0; i < (UINT)CurrentScenario.dwTileCount; ++i )
  {
    // Set predication to tile i.
    m_pd3dDevice->SetPredication( D3DPRED_TILE_RENDER(i) );

    // Destination point is the upper left corner of the tiling rect.
    D3DPOINT* pDestPoint = (D3DPOINT*)&CurrentScenario.TilingRects[i];

    // Resolve render target 0 and clear it and the depth/stencil buffer.
    m_pd3dDevice->Resolve( D3DRESOLVE_RENDERTARGET0 | D3DRESOLVE_CLEARRENDERTARGET | D3DRESOLVE_CLEARDEPTHSTENCIL,
      &CurrentScenario.TilingRects[i], 
      pTex->Get2DTexture(), 
      pDestPoint, 
      0, 0,
      &ClearColor, 
      1.0f, 0L, NULL );

  }
	if(bNeedRestore)
		CTexture::RestoreFormat(pTex->Get2DTexture(), oldFmt);
  // Restore predication to default.
  m_pd3dDevice->SetPredication( D3DPRED_ALL_RENDER );

  //m_pd3dDevice->EndTiling( 0, NULL, NULL, &ClearColor, 1.0f, 0L, NULL );
  //////////////////////////////////////////////////////////////////////////
  //m_pd3dDevice->EndTiling(D3DRESOLVE_RENDERTARGET0 | D3DRESOLVE_ALLFRAGMENTS, NULL, pTe
  return true;
}

void CD3D9Renderer::XE_HiStencilState(bool bEnable, int bEqualValue, int depthFunc/*, int curHiZState, int curState, int curStencilState**/)
{
  //to fix - cache states
  if (bEnable)
  {
    m_pd3dDevice->SetRenderState( D3DRS_HISTENCILWRITEENABLE, TRUE );
    //m_pd3dDevice->SetRenderState( D3DRS_HISTENCILENABLE, FALSE );
    m_pd3dDevice->SetRenderState( D3DRS_HISTENCILREF, bEqualValue );

    if (depthFunc == GS_DEPTHFUNC_EQUAL)
    {
      m_pd3dDevice->SetRenderState( D3DRS_HISTENCILFUNC, D3DHSCMP_EQUAL );
    }
    else 
      if (depthFunc == GS_DEPTHFUNC_NOTEQUAL)
      {
        m_pd3dDevice->SetRenderState( D3DRS_HISTENCILFUNC, D3DHSCMP_NOTEQUAL );
      }
      else
      {
        assert(0);
      }
  }
  else
  {
    m_pd3dDevice->SetRenderState( D3DRS_HISTENCILWRITEENABLE, FALSE );
    m_pd3dDevice->FlushHiZStencil(D3DFHZS_ASYNCHRONOUS);
    //m_pd3dDevice->SetRenderState( D3DRS_HISTENCILENABLE, FALSE );
    //m_pd3dDevice->SetRenderState( D3DRS_HISTENCILREF, 0 );
    //m_pd3dDevice->SetRenderState( D3DRS_HISTENCILFUNC, D3DHSCMP_EQUAL );
  }
}


void CD3D9Renderer::XE_FetchDepthTexture( uint32 *pDepthTex, int nInSizeX, int nInSizeY, int nActualImageX, uint16 *pOutTex,  
							          int nOutSizeX, int nOutSizeY ) 
{
	int		i, j;

  assert(nOutSizeX <= 1024 );
								
  uint32 nStepX = (nInSizeX<<16)/nOutSizeX;
  uint32 nStepY = (nInSizeY<<16)/nOutSizeY;
  uint32 inX = nStepX>>2;
  uint32 inY = ((nInSizeY-1)<<16) - (nStepY>>2) - 1;

  uint32 InYCmp, InXCmp;
	for (i=0 ; i<nOutSizeY ; i++, pOutTex += nOutSizeX, inY -= nStepY) 
  {
    InYCmp = (inY>>16);

    //temp clamp
    if (InYCmp>=nInSizeY)
    {
      InYCmp=0;
    }

    //assert(InYCmp == InY);
    inX = nStepX>>2;
		for (j=0 ; j<nOutSizeX ; j++, inX += nStepX) 
    {

      InXCmp = (inX>>16);
      //assert(InXCmp == InX);

      //DWORD dwOffset = InXCmp + InYCmp * nActualImageX;
      DWORD dwOffset = XGAddress2DTiledOffset( InXCmp, InYCmp, nActualImageX, 4 );
      //assert(dwOffset==dwCmpOffset);
      uint32 *pDepth = (uint32 *)pDepthTex + dwOffset;
      pOutTex[j] = static_cast<uint16>( (pDepth[0])>>16 ); //&0xFFFFFFFF
		}
	}
}


//Add double buffering
bool CD3D9Renderer::GetOcclusionBuffer(uint16* pOutOcclBuffer, int32 nSizeX, int32 nSizeY, Matrix44* pmViewProj)
{
  //AUTO_LOCK(g_cCBufferSwitchLock);
  if (m_FenceOcclusionReady==0L)
    return false;

  if (m_pd3dDevice->IsFencePending(m_FenceOcclusionReady))
  {
    *pmViewProj = m_RP.m_oldOcclusionCamera;
  }
  else
  {
    *pmViewProj = m_RP.m_newOcclusionCamera;
  }

  m_RP.m_nZOcclusionReady = 1;
  CTexture* pCurZOcclusion = CTexture::s_ptexZOcclusion[m_RP.m_nZOcclusionReady];

  if (pCurZOcclusion==NULL)
    return false;

  //////////////////////////////////////////////////////////////////////////
  //calc number of downsample stages
  int nZBuffX = CTexture::s_ptexZTarget->GetWidth();
  int nZBuffY = CTexture::s_ptexZTarget->GetHeight();
  int iRatio = min((nZBuffX/nSizeX), (nZBuffY/nSizeY));
  m_numOcclusionDownsampleStages = int(log(float(iRatio))/log(2.0f));

  //////////////////////////////////////////////////////////////////////////


  int nZOccBuffActualX = pCurZOcclusion->GetWidth(); //m_d3dsdBackBuffer.Width;
  int nZOccBuffY = pCurZOcclusion->GetHeight(); //m_d3dsdBackBuffer.Height;

  int nZOccBuffX = nZOccBuffActualX>>m_numOcclusionDownsampleStages;
  nZOccBuffY >>= m_numOcclusionDownsampleStages;

  CDeviceTexture* pDepthTexture = NULL;
  if (CTexture::IsTextureExist(pCurZOcclusion)) //CTexture::s_ptexZTarget
    pDepthTexture =  pCurZOcclusion->GetDevTexture(); //CTexture::s_ptexZTarget->GetDevTexture();
  else
    return false;

  DWORD dwBaseAddress = pDepthTexture->Get2DTexture()->Format.BaseAddress << GPU_TEXTURE_ADDRESS_SHIFT;

  XE_FetchDepthTexture( (unsigned int*)dwBaseAddress, nZOccBuffX, nZOccBuffY, nZOccBuffActualX, pOutOcclBuffer, nSizeX, nSizeY);
  //XE_FetchDepthTexture( (unsigned int*)dwBaseAddress, nZBuffX, nZBuffY, nZBuffX, pOutOcclBuffer, nSizeX, nSizeY);

  return true;
}

bool CD3D9Renderer::XE_SetAliasRGB10A2RT(int nTarget)
{
	SRTStack *pCur = &m_RTStack[nTarget][m_nRTStackLevel[nTarget]];

	CTexture *pTarget = pCur->m_pTex;
	if (pTarget == NULL)
		return false;

	D3DFORMAT fmt = D3DFMT_A2B10G10R10;

	D3DMULTISAMPLE_TYPE ms = D3DMULTISAMPLE_NONE;
	D3DSURFACE_PARAMETERS Parms;
	Parms.Base = 0;
	if( nTarget )
	{
		// Accumulate bases, this is needed for render targets not to overlap EDRAM at same offset
		for( int t = 0; t < nTarget; ++t )
		{
			int nTargetPrev = max(t - 1, 0);
			SRTStack *pPrev = &m_RTStack[nTargetPrev][m_nRTStackLevel[nTargetPrev]];
			if( pPrev && pPrev->m_pTarget && pPrev->m_pTex)
			{
				D3DFORMAT prevFmt = D3DFMT_A2B10G10R10;
				Parms.Base += XGSurfaceSize(pPrev->m_Width, pPrev->m_Height, prevFmt, (D3DMULTISAMPLE_TYPE)pPrev->m_pTex->m_pRenderTargetData->m_nFSAAQuality);
			}
		}
		// Check if we not ran out of XBox360 EDRAM (10MB)
		if(Parms.Base >= GPU_EDRAM_TILES)
		{
			assert(0);
			return false;
		}
	}
	
#if _XDK_VER >= 6995
	Parms.HiZFunc = D3DHIZFUNC_DEFAULT;
#endif
	Parms.HierarchicalZBase = 0;
	Parms.ColorExpBias = (LONG) pTarget->GetExpAdjustRT();

	D3DSurface* pRTSurface(0);
	HRESULT hr = gcpRendD3D->m_pd3dDevice->CreateRenderTarget(pTarget->GetWidth(), pTarget->GetHeight(), fmt, ms, 0, FALSE, &pRTSurface, &Parms);
	hr = gcpRendD3D->m_pd3dDevice->SetRenderTarget(nTarget, pRTSurface);

	SAFE_RELEASE(pCur->m_pTarget);
	pCur->m_pTarget = pRTSurface;

	pTarget->SetResolved(false);	

	return true;
}

////////////////////////////////////////////////////////////////////////////////////////
// XENON gamma conversion utilities (from March XDK sample)
////////////////////////////////////////////////////////////////////////////////////////

//--------------------------------------------------------------------------------------
// Helper functions.  These perform all the necessary transforms between 32-bit linear
// values and various gamma spaces of various bit depths.
//--------------------------------------------------------------------------------------

template<typename t_type> 
t_type Squared( t_type a ) { return a * a; }
template<typename t_type>
t_type Min( t_type a, t_type b ) { return a < b ? a : b; }
template<typename t_type>
t_type Max( t_type a, t_type b ) { return a > b ? a : b; }
template<typename t_type>
t_type Saturate( t_type a ) { return Min( 1.0f, Max( 0.0f, a ) ); }

template<typename t_type, UINT t_N>
t_type ConvertFloatToNBits( FLOAT f ) { return (t_type) ( ( ( 1 << t_N ) - 1 ) * Saturate( f ) + 0.5f ); }
template<typename t_type, UINT t_N>
FLOAT ConvertNBitsToFloat( t_type i ) { return ( (FLOAT) i ) / ( ( 1 << t_N ) - 1 ); }

typedef FLOAT DegammaFunc( FLOAT );
typedef FLOAT GammaFunc( FLOAT );

// Conversions from gamma spaces to linear space
FLOAT DegammaFuncNull( FLOAT f ) { return f; }
FLOAT DegammaFunc_2_0( FLOAT f ) { return Squared( f ); }
FLOAT DegammaFunc_2_2( FLOAT f ) { return powf( f, 2.2f ); }
FLOAT DegammaFuncPWL( FLOAT f ) 
{ 
	if( f > 3.0f / 4.0f )
	{
		return ( 1.0f / 2.0f + ( f - 3.0f / 4.0f ) * 2.0f );
	}
	else if( f > 3.0f / 8.0f )
	{
		return ( 1.0f / 8.0f + ( f - 3.0f / 8.0f ) * 1.0f );
	}
	else if( f > 1.0f / 4.0f )
	{
		return ( 1.0f / 16.0f + ( f - 1.0f / 4.0f ) / 2.0f );
	}
	else
	{
		return ( 0.0f + ( f - 0.0f ) / 4.0f );
	}
}
FLOAT DegammaFuncsRGB( FLOAT f )
{
	if( f <= 0.04045f )
	{
		return f / 12.92f;
	}
	else
	{
		return powf( ( f + 0.055f ) / 1.055f, 2.4f );
	}
}
FLOAT DegammaFuncTV( FLOAT f )
{
	if( f <= 0.0812f )
	{
		return f / 4.5f;
	}
	else
	{
		return powf( ( f + 0.099f ) / 1.099f, 1.0f / 0.45f );
	}
}

// Conversions from linear space to gamma spaces
FLOAT GammaFuncNull( FLOAT f ) { return f; }
FLOAT GammaFunc_2_0( FLOAT f ) { return sqrtf( f ); }
FLOAT GammaFunc_2_2( FLOAT f ) { return powf( f, 1.0f / 2.2f ); }
FLOAT GammaFuncPWL( FLOAT f ) 
{ 
	if( f > 1.0f / 2.0f )
	{
		return ( 3.0f / 4.0f + ( f - 1.0f / 2.0f ) / 2.0f );
	}
	else if( f > 1.0f / 8.0f )
	{
		return ( 3.0f / 8.0f + ( f - 1.0f / 8.0f ) * 1.0f );
	}
	else if( f > 1.0f / 16.0f )
	{
		return ( 1.0f / 4.0f + ( f - 1.0f / 16.0f ) * 2.0f );
	}
	else
	{
		return ( 0.0f + ( f - 0.0f ) * 4.0f );
	}
}
FLOAT GammaFuncsRGB( FLOAT f )
{
	if( f <= 0.0031308f )
	{
		return f * 12.92f;
	}
	else
	{
		return powf( f, 1.0f / 2.4f ) * 1.055f - 0.055f;
	}
}
FLOAT GammaFuncTV( FLOAT f )
{
	if( f <= 0.018f )
	{
		return f * 4.5f;
	}
	else
	{
		return powf( f, 0.45f ) * 1.099f - 0.099f;
	}
}

template<typename t_type, UINT t_N, DegammaFunc Degamma> 
FLOAT ConvertNBitGammaToFloat( t_type i ) { return Degamma( ConvertNBitsToFloat<t_type, t_N>( i ) ); }
template<typename t_type, UINT t_N, GammaFunc Gamma> 
t_type ConvertFloatToNBitGamma( FLOAT f ) { return ConvertFloatToNBits<t_type, t_N>( Gamma( f ) ); }

// Gamma conversions from FLOAT to integer
BYTE ConvertFloatTo8BitLinear( FLOAT f )            { return ConvertFloatToNBitGamma<BYTE,  8, GammaFuncNull>( f ); }
BYTE ConvertFloatTo2BitLinear( FLOAT f )            { return ConvertFloatToNBitGamma<BYTE,  2, GammaFuncNull>( f ); }
WORD ConvertFloatTo10BitLinear( FLOAT f )           { return ConvertFloatToNBitGamma<WORD, 10, GammaFuncNull>( f ); }
WORD ConvertFloatTo16BitLinear( FLOAT f )           { return ConvertFloatToNBitGamma<WORD, 16, GammaFuncNull>( f ); }

BYTE ConvertFloatTo8BitGammaPWL( FLOAT f )          { return ConvertFloatToNBitGamma<BYTE,  8, GammaFuncPWL>( f ); }
WORD ConvertFloatTo10BitGammaPWL( FLOAT f )         { return ConvertFloatToNBitGamma<WORD, 10, GammaFuncPWL>( f ); }
WORD ConvertFloatTo16BitGammaPWL( FLOAT f )         { return ConvertFloatToNBitGamma<WORD, 16, GammaFuncPWL>( f ); }

BYTE ConvertFloatTo8BitGamma_2_0( FLOAT f )         { return ConvertFloatToNBitGamma<BYTE,  8, GammaFunc_2_0>( f ); }
WORD ConvertFloatTo10BitGamma_2_0( FLOAT f )        { return ConvertFloatToNBitGamma<WORD, 10, GammaFunc_2_0>( f ); }

BYTE ConvertFloatTo8BitGamma_2_2( FLOAT f )         { return ConvertFloatToNBitGamma<BYTE,  8, GammaFunc_2_2>( f ); }
WORD ConvertFloatTo10BitGamma_2_2( FLOAT f )        { return ConvertFloatToNBitGamma<WORD, 10, GammaFunc_2_2>( f ); }

WORD ConvertFloatTo10BitGammasRGB( FLOAT f )        { return ConvertFloatToNBitGamma<WORD, 10, GammaFuncsRGB>( f ); }
WORD ConvertFloatTo16BitGammasRGB( FLOAT f )        { return ConvertFloatToNBitGamma<WORD, 16, GammaFuncsRGB>( f ); }

WORD ConvertFloatTo10BitGammasRGBOverride( FLOAT f ){ return ConvertFloatTo10BitGammasRGB( DegammaFuncTV ( GammaFuncsRGB( f ) ) ); }
WORD ConvertFloatTo16BitGammasRGBOverride( FLOAT f ){ return ConvertFloatTo16BitGammasRGB( DegammaFuncTV ( GammaFuncsRGB( f ) ) ); }

// Degamma conversions from integer to FLOAT
FLOAT Convert8BitGammaPWLToFloat( BYTE i )          { return ConvertNBitGammaToFloat<BYTE,  8, DegammaFuncPWL>( i ); }
FLOAT Convert8BitLinearToFloat( BYTE i )            { return ConvertNBitGammaToFloat<BYTE,  8, DegammaFuncNull>( i ); }
FLOAT Convert8BitGamma_2_0ToFloat( BYTE i )         { return ConvertNBitGammaToFloat<BYTE,  8, DegammaFunc_2_0>( i ); }
FLOAT Convert8BitGamma_2_2ToFloat( BYTE i )         { return ConvertNBitGammaToFloat<BYTE,  8, DegammaFunc_2_2>( i ); }
FLOAT Convert8BitGammasRGBToFloat( BYTE i )         { return ConvertNBitGammaToFloat<BYTE,  8, DegammaFuncsRGB>( i ); }
FLOAT Convert8BitGammaTVToFloat( BYTE i )           { return ConvertNBitGammaToFloat<BYTE,  8, DegammaFuncTV>( i ); }

FLOAT Convert10BitGammaPWLToFloat( WORD i )         { return ConvertNBitGammaToFloat<WORD, 10, DegammaFuncPWL>( i ); }
FLOAT Convert10BitLinearToFloat( WORD i )           { return ConvertNBitGammaToFloat<WORD, 10, DegammaFuncNull>( i ); }
FLOAT Convert10BitGamma_2_0ToFloat( WORD i )        { return ConvertNBitGammaToFloat<WORD, 10, DegammaFunc_2_0>( i ); }
FLOAT Convert10BitGamma_2_2ToFloat( WORD i )        { return ConvertNBitGammaToFloat<WORD, 10, DegammaFunc_2_2>( i ); }
FLOAT Convert10BitGammasRGBToFloat( WORD i )        { return ConvertNBitGammaToFloat<WORD, 10, DegammaFuncsRGB>( i ); }
FLOAT Convert10BitGammaTVToFloat( WORD i )          { return ConvertNBitGammaToFloat<WORD, 10, DegammaFuncTV>( i ); }

// Conversions from one gamma space to another
WORD Convert8BitGammaPWLTo10BitGammasRGB( BYTE i )  { return ConvertFloatTo10BitGammasRGB( Convert8BitGammaPWLToFloat( i ) ); }
WORD Convert8BitLinearTo10BitGammasRGB( BYTE i )    { return ConvertFloatTo10BitGammasRGB( Convert8BitLinearToFloat( i ) ); }
WORD Convert8BitGamma_2_0To10BitGammasRGB( BYTE i ) { return ConvertFloatTo10BitGammasRGB( Convert8BitGamma_2_0ToFloat( i ) ); }
WORD Convert8BitGamma_2_2To10BitGammasRGB( BYTE i ) { return ConvertFloatTo10BitGammasRGB( Convert8BitGamma_2_2ToFloat( i ) ); }
WORD Convert8BitGammaTVTo10BitGammasRGB( BYTE i )   { return ConvertFloatTo10BitGammasRGB( Convert8BitGammaTVToFloat( i ) ); }

WORD Convert10BitGammaPWLTo16BitGammasRGB( WORD i ) { return ConvertFloatTo16BitGammasRGB( Convert10BitGammaPWLToFloat( i ) ); }
WORD Convert10BitLinearTo16BitGammasRGB( WORD i )   { return ConvertFloatTo16BitGammasRGB( Convert10BitLinearToFloat( i ) ); }
WORD Convert10BitGamma_2_0To16BitGammasRGB( WORD i ){ return ConvertFloatTo16BitGammasRGB( Convert10BitGamma_2_0ToFloat( i ) ); }
WORD Convert10BitGamma_2_2To16BitGammasRGB( WORD i ){ return ConvertFloatTo16BitGammasRGB( Convert10BitGamma_2_2ToFloat( i ) ); }
WORD Convert10BitGammaTVTo16BitGammasRGB( WORD i )  { return ConvertFloatTo16BitGammasRGB( Convert10BitGammaTVToFloat( i ) ); }

// Conversions from one gamma space to another, with a second conversion from TV-gamma
// to sRGB appended.  The purpose of the second conversion is to undo the implicit 
// system conversion from sRGB to TV-gamma, and ensure that the final output signal is 
// in sRGB space.  This might be done to match the output of other platforms, in case those
// platforms cannot be changed.
WORD Convert8BitGammaPWLTo10BitGammasRGBOverride( BYTE i )  { return ConvertFloatTo10BitGammasRGBOverride( Convert8BitGammaPWLToFloat( i ) ); }
WORD Convert8BitLinearTo10BitGammasRGBOverride( BYTE i )    { return ConvertFloatTo10BitGammasRGBOverride( Convert8BitLinearToFloat( i ) ); }
WORD Convert8BitGamma_2_0To10BitGammasRGBOverride( BYTE i ) { return ConvertFloatTo10BitGammasRGBOverride( Convert8BitGamma_2_0ToFloat( i ) ); }
WORD Convert8BitGamma_2_2To10BitGammasRGBOverride( BYTE i ) { return ConvertFloatTo10BitGammasRGBOverride( Convert8BitGamma_2_2ToFloat( i ) ); }
WORD Convert8BitGammasRGBTo10BitGammasRGBOverride( BYTE i ) { return ConvertFloatTo10BitGammasRGBOverride( Convert8BitGammasRGBToFloat( i ) ); }

WORD Convert10BitGammaPWLTo16BitGammasRGBOverride( WORD i ) { return ConvertFloatTo16BitGammasRGBOverride( Convert10BitGammaPWLToFloat( i ) ); }
WORD Convert10BitLinearTo16BitGammasRGBOverride( WORD i )   { return ConvertFloatTo16BitGammasRGBOverride( Convert10BitLinearToFloat( i ) ); }
WORD Convert10BitGamma_2_0To16BitGammasRGBOverride( WORD i ){ return ConvertFloatTo16BitGammasRGBOverride( Convert10BitGamma_2_0ToFloat( i ) ); }
WORD Convert10BitGamma_2_2To16BitGammasRGBOverride( WORD i ){ return ConvertFloatTo16BitGammasRGBOverride( Convert10BitGamma_2_2ToFloat( i ) ); }
WORD Convert10BitGammasRGBTo16BitGammasRGBOverride( WORD i ){ return ConvertFloatTo16BitGammasRGBOverride( Convert10BitGammasRGBToFloat( i ) ); }

// Convert from PWL gamma to SRGB
void CD3D9Renderer::XE_GammaPWLtoSRGB( uint16 pGammaRamp[3][256] )
{
	for( UINT j = 0; j < 256; ++j )
		pGammaRamp[0][j] = pGammaRamp[1][j] = pGammaRamp[2][j] = Convert8BitGammaPWLTo10BitGammasRGB( (BYTE) j ) << 6;
}

#endif
