/******************************************************************

  SOE SPU PBWater

  Module: main.cpp

  Author: Paul Balon

  Description:

    Simulates water on SPU.

  Copyright 2006 Sony Online Entertainment.  All rights reserved.

*******************************************************************/

#include <sys/spu_thread.h>
#include <sys/spu_event.h>
#include <cell/dma.h>
#include <spu_printf.h>
#include <math.h>
#include <PS3/SyWaterSharedPS3.h>
#include "../../../framework/ToolShare/SyWaterTileDimension.h"
//#include <spu_intrinsics.h>
#include <PSGL/spu_psgl.h>


#define RESTRICTED_POINTER __restrict

/* processMessageObject */
ProcessMessage processMessageObject;

/* theSimBuffer's */
SimBuffer theSimBuffer[4] __attribute__((aligned(128)));

/* cross product */
inline void cross(const volatile float * v0, const volatile float * v1, volatile float * v2)
{
  v2[0] =  v0[1] * v1[2] - v0[2] * v1[1];
  v2[1] = -v0[0] * v1[2] + v0[2] * v1[0];
  v2[2] =  v0[0] * v1[1] - v0[1] * v1[0];
}

unsigned long long cmd[8] __attribute__((aligned(128)));

/* main */
int main(uint64_t readFromMainMemoryLocation, uint64_t arg2, uint64_t psglInitData, uint64_t arg4)
{
	int ret;
  (void)arg2;							/* unused */
	(void)arg4;							/* unused */

  psglSPUInit( psglInitData );
  
  while( 1 )
  {
    /* 
      wait! WATERSPU_EVENT_PPUTOSPU_READYTOPROCESS
      -   PPU sends WATERSPU_EVENT_PPUTOSPU_READYTOPROCESS event to SPU with a pointer to a 
      -      the PPU (&processMessageObject) and size in bytes to read.
      -   This (processMessageObject) contains count and list of simulation pointers.
    */
	  uint32_t eventName, data2, data3;

		ret = sys_spu_thread_receive_event(WATER_SPU_QUEUE_NUMBER, &eventName, &data2, &data3);
    if (ret != CELL_OK )
    {
      // sys_spu_thread_receive_event failed!
      sys_spu_thread_exit(0);
    }
    // event received

		if (eventName != WATERSPU_EVENT_PPUTOSPU_READYTOPROCESS) 
    {
			sys_spu_thread_exit(0);
		}
    // ready to process!

    /* retrieve the list of objects to process ( in processMessageObject ) */

#define TID		0 /* transfer class id - "Cell Evaluation System Programming Note" indicates to use 0 */
#define RID		0 /* replacement class id - "Cell Evaluation System Programming Note" indicates to use 0 */
	  cellDmaLargeGet( &processMessageObject, readFromMainMemoryLocation, sizeof(ProcessMessage), 0, TID, RID );


    /* wait for processMessageObject to finish being loaded */
  	cellDmaWaitTagStatusAll( 1<<0 );

    // waterspu.elf received processMessageObject



    /* process simulation list in the processMessageObject */
      
    int nSimulation = 0;
    int nSimulationNext = 1;
    volatile uint64_t * pSimulation = &processMessageObject.simulationPointers[0];
    volatile ProcessMessage::Props * pSimProps = &processMessageObject.simProperties[0];
    volatile uint64_t vertexBuffer = processMessageObject.vertexBuffer;

//#define PBDISPLAYSIMCOUNTFROMSPU
#ifdef PBDISPLAYSIMCOUNTFROMSPU
    int nCount=0;
    while( processMessageObject.simulationPointers[ nCount ] && nCount < kMaxSimTiles )
    {
      nCount++;
    }
    spu_printf( "Received %d sims!\n", nCount );
#endif

    
    /* if there is a simulation then DMA it IN! */

    if ( *pSimulation )
    {
	    cellDmaLargeGet( &theSimBuffer[ nSimulation ], *pSimulation, sizeof(SimBuffer), nSimulation, TID, RID );
    }

    /* iterate to the next simulation */
    volatile uint64_t * pSimulationNext = pSimulation+1;
    volatile ProcessMessage::Props * pSimPropsNext = pSimProps+1;
    int nDebugCount=0;

    int nSimulationPrev = -1;
    while( *pSimulation )
    {
      /* if there is another simulation then DMA IN! */
      if ( *pSimulationNext )
      {
        cellDmaLargeGet( &theSimBuffer[ nSimulationNext ], *pSimulationNext, sizeof(SimBuffer), nSimulationNext, TID, RID );
      }
      /* wait for theSimBuffer[ nSimulation ] to finish DMA */
	    cellDmaWaitTagStatusAll( 1<<nSimulation );

	    /* theSimBuffer[nBuffer] is ready */

      /* update simulation */
      {
        volatile SimBuffer * pSim = &theSimBuffer[nSimulation];
        
        int GridDimension = SimBuffer::kDimensionOutput;
        const volatile float * RESTRICTED_POINTER pInputGrid = &( pSim->mGrid[ (!pSim->mParity) * SimBuffer::kGridSize] );
        int i;
        int Line;
        float Dampening = pSimProps->Dampening;
        float Viscosity = pSimProps->Viscosity;

        /* change to integration for better simulation behavior */
        for( Line = -1; Line < GridDimension+1; Line++ )
        {
          const volatile float * RESTRICTED_POINTER pEnableGrid0 = pSim->mEnableGrid  +  SimBuffer::kDimensionInput * (Line+1);
          const volatile float * RESTRICTED_POINTER pEnableGrid1 = pSim->mEnableGrid  +  SimBuffer::kDimensionInput * (Line+2);
          const volatile float * RESTRICTED_POINTER pEnableGrid2 = pSim->mEnableGrid  +  SimBuffer::kDimensionInput * (Line+3);
          const volatile float * RESTRICTED_POINTER pInputLine0 = pInputGrid  +  SimBuffer::kDimensionInput * (Line+1);
          const volatile float * RESTRICTED_POINTER pInputLine1 = pInputGrid  +  SimBuffer::kDimensionInput * (Line+2);
          const volatile float * RESTRICTED_POINTER pInputLine2 = pInputGrid  +  SimBuffer::kDimensionInput * (Line+3);

#define OUTPUTGRIDSTARTOFLINE(LineIndex)  &( pSim->mGrid[ pSim->mParity * SimBuffer::kGridSize + ((LineIndex+SimBuffer::kEdgeSize)*SimBuffer::kDimensionInput) + SimBuffer::kEdgeSize] )

          volatile float * pOutputGrid = OUTPUTGRIDSTARTOFLINE( Line ) - 1;

          for( i = -1; i < GridDimension+1; i++,pOutputGrid++,pInputLine0++,pInputLine1++,pInputLine2++,pEnableGrid0++,pEnableGrid1++,pEnableGrid2++ )
          {
            *pOutputGrid =  ((float)( 0.0f < pEnableGrid1[1] )) *      // anything more than 0.0f treat as 1.0f
              ( pInputLine1[1] +
                ((( pInputLine1[1] - *pOutputGrid ) < 0 && ( pInputLine1[1] - *pOutputGrid ) > -0.025f )||(( pInputLine1[1] - *pOutputGrid ) > 0 && ( pInputLine1[1] - *pOutputGrid ) < 0.025f )) * 1.0f * (pInputLine1[1] - *pOutputGrid) +       //  <----------------------------- energy absorption
                ((( pInputLine1[1] - *pOutputGrid ) <= -0.025f )||(( pInputLine1[1] - *pOutputGrid ) >= 0.025f )) * Dampening * (pInputLine1[1] - *pOutputGrid) +
                Viscosity * (((/*----------------------------------------------------*/    ((float)( 0.0f < pEnableGrid0[1] )) * pInputLine0[1]     + /*---------------------------------------------------*/   // <---- energy transfer
                                ((float)( 0.0f < pEnableGrid1[0] )) * pInputLine1[0]    /*----------------------------------------------------*/   +   ((float)( 0.0f < pEnableGrid1[2] )) * pInputLine1[2]   +
                              /*----------------------------------------------------*/    ((float)( 0.0f < pEnableGrid2[1] )) * pInputLine2[1]      /*----------------------------------------------------*/ ) * 0.25f ) - pInputLine1[1]));

            /* clamp */
            if ( *pOutputGrid < -0.75f ) *pOutputGrid = -0.75f;
            float maxHeight = 2.0f * pEnableGrid1[1];
            if (maxHeight > 0.75f ) maxHeight = 0.75f;

            if ( *pOutputGrid > maxHeight )
            {
              *pOutputGrid =  maxHeight  +  0.2f * ( *pOutputGrid - maxHeight ) ;
            }
          }
        }
      }

      /* update render data */
      {
        volatile SimBuffer * pSim = &theSimBuffer[nSimulation];

        float fromColor[4];
        float toColor[4];

        fromColor[0] = pSimProps->fromColor[0];
        fromColor[1] = pSimProps->fromColor[1];
        fromColor[2] = pSimProps->fromColor[2];
        fromColor[3] = pSimProps->fromColor[3];
        toColor[0] = pSimProps->toColor[0];
        toColor[1] = pSimProps->toColor[1];
        toColor[2] = pSimProps->toColor[2];
        toColor[3] = pSimProps->toColor[3];

        const int GridDimension = SimBuffer::kDimensionOutput;
        const int GridDimensionMinusOne = SimBuffer::kDimensionOutput - 1;

        const int NoNorth = !( pSimProps->neighborInfo & NEIGHBOR_N );
        const int NoSouth = !( pSimProps->neighborInfo & NEIGHBOR_S );
        const int NoEast  = !( pSimProps->neighborInfo & NEIGHBOR_E );
        const int NoWest  = !( pSimProps->neighborInfo & NEIGHBOR_W );

        const int NoNorthEast = !( pSimProps->neighborInfo & NEIGHBOR_NE );
        const int NoSouthEast = !( pSimProps->neighborInfo & NEIGHBOR_SE );
        const int NoNorthWest = !( pSimProps->neighborInfo & NEIGHBOR_NW );
        const int NoSouthWest = !( pSimProps->neighborInfo & NEIGHBOR_SW );

        float ux = 0.25f * floor( ( fmodf( pSimProps->Position[0], 16.0f ) / 4.0f ) );
        float uz = 0.25f * floor( ( fmodf( pSimProps->Position[2], 16.0f ) / 4.0f ) );

        volatile SimBuffer::RenderVert * RenderVerts = &pSim->Vert[0];

        const volatile float * RESTRICTED_POINTER pInputGrid = &( pSim->mGrid[ (!pSim->mParity) * SimBuffer::kGridSize] );

        /* flip! */

        pSim->mParity = !pSim->mParity;

        for( int Line = 0; Line < GridDimension + 1; Line++ )
        {
          const volatile float * pEnableGrid1 = pSim->mEnableGrid  +  SimBuffer::kDimensionInput *  (Line+1);
          const volatile float * RESTRICTED_POINTER pInputLine0 = pInputGrid  +  SimBuffer::kDimensionInput *  Line;
          const volatile float * RESTRICTED_POINTER pInputLine1 = pInputGrid  +  SimBuffer::kDimensionInput * (Line+1);
          const volatile float * RESTRICTED_POINTER pInputLine2 = pInputGrid  +  SimBuffer::kDimensionInput * (Line+2);

          float v = (float)Line / (float)( GridDimension );
          int i;
          for( i = 0; i < GridDimension + 1; i++, RenderVerts++, pInputLine0++, pInputLine1++, pInputLine2++, pEnableGrid1++  )
          {
            RenderVerts->V[0] = pSimProps->Position[0] - WATERTILEDIMENSIONHALF + ( WATERTILEDIMENSION * i ) / GridDimension;
            RenderVerts->V[2] = pSimProps->Position[2] - WATERTILEDIMENSIONHALF + ( WATERTILEDIMENSION * Line ) / GridDimension;
            RenderVerts->V[1] = pInputLine1[1] * 
              ( 1.0f - (   (Line == GridDimensionMinusOne && NoSouth) 
                        || (i == GridDimensionMinusOne && NoEast) 
                        || (i == 0 && NoWest) 
                        || (Line == 0 && NoNorth)
                        || (Line == GridDimensionMinusOne && i == 0 && NoSouthWest )
                        || (Line == GridDimensionMinusOne && i == GridDimensionMinusOne && NoSouthEast )
                        || (Line == 0 && i == 0 && NoNorthWest )
                        || (Line == 0 && i == GridDimensionMinusOne && NoNorthEast ) ) );

            /* blend from deep to shallow color/alpha */
            { 
              if ( pEnableGrid1[1] > 0.0f && pEnableGrid1[1] < 1.0f ) 
              {
                if ( RenderVerts->V[1] < -0.5f * ( pEnableGrid1[1] ) )
                {
                  RenderVerts->V[1] = -0.5f * ( pEnableGrid1[1] );
                }
                if ( RenderVerts->V[1] < 0.0f )
                {
                  float t = ( -RenderVerts->V[1] / ( 0.75f * 0.5f ));
                  if (t < 0.0f) t = 0.0f;
                  else if (t > 1.0f ) t = 1.0f;
                  RenderVerts->C0[0] = (uint8_t) ( fromColor[0] * ( 1.0f - t ) + toColor[0] * t );
                  RenderVerts->C0[1] = (uint8_t) ( fromColor[1] * ( 1.0f - t ) + toColor[1] * t );
                  RenderVerts->C0[2] = (uint8_t) ( fromColor[2] * ( 1.0f - t ) + toColor[2] * t );
                  RenderVerts->C0[3] = (uint8_t) ( fromColor[3] * ( 1.0f - t ) + toColor[3] * t );
                }
                else
                {
                  RenderVerts->C0[0] = (uint8_t)fromColor[0];
                  RenderVerts->C0[1] = (uint8_t)fromColor[1];
                  RenderVerts->C0[2] = (uint8_t)fromColor[2];
                  RenderVerts->C0[3] = (uint8_t)fromColor[3];
                }
              }
              else
              {
                RenderVerts->C0[0] = (uint8_t)fromColor[0];
                RenderVerts->C0[1] = (uint8_t)fromColor[1];
                RenderVerts->C0[2] = (uint8_t)fromColor[2];
                RenderVerts->C0[3] = (uint8_t)fromColor[3];
              }
            }
            RenderVerts->V[1] += pSimProps->Position[1];

            /* blend east to west */
            
            float y = ( ( pInputLine1[2] - pInputLine1[1] ) + ( pInputLine1[1] - pInputLine1[0] ) ) * 0.5f;
            float x = WATERTILEDIMENSION / (SimBuffer::kDimensionOutput);
            float d = 1.0f / sqrtf( y * y + x * x );
            RenderVerts->T[0] = x * d;
            RenderVerts->T[1] = y * d;
            RenderVerts->T[2] = 0.0f;

            /* blend north to south */
            float yb = ( ( pInputLine0[1] - pInputLine1[1] ) + ( pInputLine1[1] - pInputLine2[1] ) ) * 0.5f;
            float zb = -WATERTILEDIMENSION / (SimBuffer::kDimensionOutput);
            float db = 1.0f / sqrtf( yb * yb + zb * zb );
            RenderVerts->B[0] = 0.0f;
            RenderVerts->B[1] = yb * db;
            RenderVerts->B[2] = zb * db;

            cross( RenderVerts->T, RenderVerts->B, RenderVerts->N );

            RenderVerts->UV0[0] = ux + 0.25f * (float)(i) / (float)( GridDimension );
            RenderVerts->UV0[1] = uz + 0.25f * v;
          }
        }
      }

      

      /* end update render data */

      if (vertexBuffer)
      {
        // dma to mapped memory...
        psglSPUWriteMappedBuffer( vertexBuffer, &theSimBuffer[ nSimulation ].Vert[0], ( sizeof(SimBuffer::RenderVert) * ( (SimBuffer::kDimensionOutput+1)*(SimBuffer::kDimensionOutput+1) ) ) );  
        cellDmaWaitTagStatusAll( 1<<PSGL_WRITE_TAG );// first just use synchronous dma for testing...
      }
      cellDmaLargePut( &theSimBuffer[ nSimulation ], *pSimulation, sizeof(SimBuffer), nSimulation, TID, RID );
      nDebugCount++;

      
      cellDmaWaitTagStatusAll( (1<<nSimulationPrev) );

      /* point to the next simulation pointer */
      pSimulation = pSimulationNext;
      pSimProps = pSimPropsNext;
      pSimulationNext++;
      pSimPropsNext++;

      if (vertexBuffer)
      {
        vertexBuffer += ( sizeof(SimBuffer::RenderVert) * ( (SimBuffer::kDimensionOutput+1)*(SimBuffer::kDimensionOutput+1) ) ); /********************************  MUST MATCH  ***************************************/
      }

      /* get the next simulation buffer index */
      nSimulationPrev = nSimulation;
      nSimulation = nSimulationNext;
      nSimulationNext++;
      if (nSimulationNext == 4) nSimulationNext = 0;
    }
    /* wait for theSimBuffer[ nSimulationPrev ] to finish DMA'ing back to main */
    if ( nSimulationPrev != -1 )
    {
      cellDmaWaitTagStatusAll( 1<<nSimulationPrev );
    }
    //spu_printf("processed %d sims\n", nDebugCount );
    sys_spu_thread_send_event( WATERSPU_THREAD_PORT, WATERSPU_EVENT_SPUTOPPU_READYTORENDER, 0 );
  }
  // end!
	return 0;
}

