/* SCE CONFIDENTIAL
 * PLAYSTATION(R)3 Programmer Tool Runtime Library 084.006
 * Copyright (C) 2006 Sony Computer Entertainment Inc.
 * All Rights Reserved.
 */

/* glue.h
 */

#ifndef __CELL_DAISY_GLUE_H__
#define __CELL_DAISY_GLUE_H__

#include <cell/daisy/v_memcpy.h>
#include <cell/daisy/daisy_defs.h>

#undef CELL_DAISY_DEBUG_PRINTF
#define CELL_DAISY_DEBUG_PRINTF(...)
#ifdef CELL_DAISY_DEBUG_GLUE
#include <cell/daisy/daisy_debug.h>
#endif

#undef CELL_DAISY_BOOKMARK
#undef CELL_DAISY_BOOKMARK_CLASS
#define CELL_DAISY_BOOKMARK(x)
#define CELL_DAISY_BOOKMARK_CLASS(x,y)
#ifdef CELL_DAISY_BOOKMARK_GLUE
#include <cell/daisy/daisy_bookmark.h>
#endif

namespace cell {
	namespace Daisy {

#define CELL_DAISY_GLUE_USE_OUT_PORT_BUFFERS \
		((tOutPort::sBufferMode == COPY) && \
		 ( (tMode == TWO_PORT) | (tInPort::sBufferMode == COPY) ))
#define CELL_DAISY_GLUE_USE_IN_PORT_BUFFERS \
		((tInPort::sBufferMode == COPY) && tMode == TWO_PORT)

		static const SizeType MAX_PORTS = 6;

		CELL_DAISY_INLINE
		static void clearBit(BitmapType &mask, unsigned int bitPos)
		{
			mask &= ~(1u << bitPos);
		}

		CELL_DAISY_INLINE
		static void setBit(BitmapType &mask, unsigned int bitPos)
		{
			mask |= 1u << bitPos;
		}

		CELL_DAISY_INLINE
		static unsigned int isFull(BitmapType mask, unsigned int size)
		{
			return (mask == (1u << size) - 1u);
		}

		CELL_DAISY_INLINE
			static PointerType getAvailableBuffer(BitmapType useMask)
		{
			return (PointerType)spu_extract(spu_cntlz(spu_promote(~useMask, 0)), 0);
		}

		template<class tOutPort, class tInPort,
				 GlueMode tMode = TWO_PORT,
				 SizeType tOutPortBufferCount = 1,
				 SizeType tInPortBufferCount = 1>
		class Glue {

		public:

			typedef typename tOutPort::GlueDataType OutPortDataType;
			typedef typename tInPort::GlueDataType  InPortDataType;

		protected:
			
			static void default_two_port_calculate(
				typename tInPort::GlueDataType *dst,
				const typename tOutPort::GlueDataType *src)
			{
				_cellDaisyMemcpy((void *)(uintptr_t)dst, (const void *)(uintptr_t)src, sizeof(*src));
			}

			void (*mTwoPortCalculate)(typename tInPort::GlueDataType *dst, const typename tOutPort::GlueDataType *src);
			void (*mOnePortCalculate)(typename tOutPort::GlueDataType *srcDst);

			static const SizeType sBufferSize =
			(CELL_DAISY_GLUE_USE_OUT_PORT_BUFFERS ? sizeof(OutPortDataType)*tOutPortBufferCount :0) +
			(CELL_DAISY_GLUE_USE_IN_PORT_BUFFERS ? sizeof(InPortDataType)*tInPortBufferCount :0) +
			(CELL_DAISY_GLUE_USE_OUT_PORT_BUFFERS | CELL_DAISY_GLUE_USE_IN_PORT_BUFFERS |
			 (tMode == ONE_PORT &&
			  tOutPort::sBufferMode == REFERENCE &&
			  tInPort::sBufferMode == REFERENCE) ? 0 : 1);
			

			/* out port definition */
			SizeType         mOutPortCount                       __attribute__((aligned(16)));
			tOutPort        *mOutPort[MAX_PORTS]                 __attribute__((aligned(16)));
			PointerType      mOutPortBeginPointer                __attribute__((aligned(16)));
			PointerType      mOutPortEndPointer                  __attribute__((aligned(16)));
			Fifo<PointerType> mOutPortBufferPointerQueue[MAX_PORTS] __attribute__((aligned(16)));
			Fifo<OutPortDataType *> mOutPortBufferQueue[MAX_PORTS] __attribute__((aligned(16)));
			BitmapType       mOutPortTerminatedMask              __attribute__((aligned(16)));
			BitmapType       mPendingPopMask                     __attribute__((aligned(16)));

			/* out port buffer definition */
			OutPortDataType *mOutPortBuffers                     __attribute__((aligned(128)));
			BitmapType      *mOutPortBufferUseMask               __attribute__((aligned(16)));
			qword            mOutPortDataReady;
			PointerType      mOutPortDataReadyPointer            __attribute__((aligned(16)));
			uint32_t         mOutPortDataReadyPort               __attribute__((aligned(16)));
			OutPortDataType *mOutPortData                        __attribute__((aligned(16)));
			qword            mOutPortBufferAvail                 __attribute__((aligned(16)));
      
			/* in port definition */
			SizeType         mInPortCount                        __attribute__((aligned(16)));
			tInPort         *mInPort[MAX_PORTS]                  __attribute__((aligned(16)));
			PointerType      mInPortBeginPointer                 __attribute__((aligned(16)));
			PointerType      mInPortEndPointer                   __attribute__((aligned(16)));
			Fifo<PointerType> mInPortBufferPointerQueue[MAX_PORTS] __attribute__((aligned(16)));
			Fifo<InPortDataType *> mInPortBufferQueue[MAX_PORTS] __attribute__((aligned(16)));
			BitmapType       mInPortTerminatedMask               __attribute__((aligned(16)));
			BitmapType       mPendingPushMask                    __attribute__((aligned(16)));

			/* in port buffer definition */
			InPortDataType  *mInPortBuffers                      __attribute__((aligned(128)));
			BitmapType      *mInPortBufferUseMask                __attribute__((aligned(16)));
			qword            mInPortDataReady;
			PointerType      mInPortDataReadyPointer             __attribute__((aligned(16)));
			uint32_t         mInPortDataReadyPort                __attribute__((aligned(16)));
			InPortDataType  *mInPortData                         __attribute__((aligned(16)));

			Fifo<PointerType> mPostCalculatedPointerQueue __attribute__((aligned(16)));
			Fifo<InPortDataType *> mPostCalculatedQueue  __attribute__((aligned(16)));

			uint8_t          mBuffer[sBufferSize]                __attribute__((aligned(128)));

			BitmapType       mBufferUseMask[2]                   __attribute__((aligned(128)));

			int              mBookmarkId                         __attribute__((aligned(16)));

			/* dummy for padding */

			int              mDummy                              __attribute__((aligned(16)));

		private:

			void initialize(int bookmarkId);

			unsigned int canCalculate() {
				if (tMode == TWO_PORT) {
					return (__CELL_DAISY_SPU_GET_VAL__(mOutPortDataReady) &&
							__CELL_DAISY_SPU_GET_VAL__(mInPortDataReady));
				} else {
					return (__CELL_DAISY_SPU_GET_VAL__(mOutPortDataReady));
				}
			}

		public:
      
			/* constructor definition */
      
			explicit Glue(int bookmarkId=0);
			explicit Glue(
				void (*callback)(typename tInPort::GlueDataType *, const typename tOutPort::GlueDataType *),
				int bookmarkId=0);
			explicit Glue(
				void (*callback)(typename tOutPort::GlueDataType *),
				int bookmarkId=0);

			/* virtual destructor definition */
			virtual ~Glue(){}

			int addOutPort(tOutPort& outPort) {
				if (mOutPortCount >= MAX_PORTS) {
					CELL_ERROR_CHECK_ERROR(CELL_DAISY_ERROR_INVALID_PORT_ATTACH);
					return CELL_DAISY_ERROR_INVALID_PORT_ATTACH;
				}
				if (mOutPortCount>=1 && mInPortCount>1) {
					CELL_ERROR_CHECK_ERROR(CELL_DAISY_ERROR_INVALID_PORT_ATTACH);
					return CELL_DAISY_ERROR_INVALID_PORT_ATTACH;
				}
				if (tMode==ONE_PORT &&
					tOutPort::sBufferMode == REFERENCE && tInPort::sBufferMode == COPY &&
					mOutPortCount==1)
				{
					CELL_ERROR_CHECK_ERROR(CELL_DAISY_ERROR_INVALID_PORT_ATTACH);
					return CELL_DAISY_ERROR_INVALID_PORT_ATTACH;
				}

				mOutPort                  [mOutPortCount] = &outPort;
				mOutPortBufferPointerQueue[mOutPortCount].initialize();
				mOutPortBufferQueue       [mOutPortCount].initialize();
				mOutPortCount++;

				return CELL_OK;
			}
				
			int addInPort(tInPort& inPort) {
				if (mInPortCount >= MAX_PORTS) {
					CELL_ERROR_CHECK_ERROR(CELL_DAISY_ERROR_INVALID_PORT_ATTACH);
					return CELL_DAISY_ERROR_INVALID_PORT_ATTACH;
				}
				if (mInPortCount>=1 && mOutPortCount>1) {
					CELL_ERROR_CHECK_ERROR(CELL_DAISY_ERROR_INVALID_PORT_ATTACH);
					return CELL_DAISY_ERROR_INVALID_PORT_ATTACH;
				}
				if (tMode==ONE_PORT &&
					tOutPort::sBufferMode == REFERENCE && tInPort::sBufferMode == COPY &&
					mInPortCount==1)
				{
					CELL_ERROR_CHECK_ERROR(CELL_DAISY_ERROR_INVALID_PORT_ATTACH);
					return CELL_DAISY_ERROR_INVALID_PORT_ATTACH;
				}
				
				mInPort                  [mInPortCount] = &inPort;
				mInPortBufferPointerQueue[mInPortCount].initialize();
				mInPortBufferQueue       [mInPortCount].initialize();
				mInPortCount++;

				return CELL_OK;
			}

			/* execute 1-step of Glue dataflow */
			int step(BlockMode mode = NOT_STALL);

		protected:

			virtual void proceed(PointerType& pointer, const SizeType size, const InPortDataType *data);

			bool proceedOutPortBegin(BitmapType busyMask);
			bool proceedOutPortEnd(BitmapType busyMask);
			bool proceedInPortBegin(BitmapType busyMask, const InPortDataType *data);
			bool proceedInPortEnd(BitmapType busyMask);
		};

		/* constructor description(s) */
		
		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
		void Glue<tOutPort, tInPort, tMode, tOutPortBufferCount, tInPortBufferCount>::initialize(int bookmarkId) {
			mOutPortCount            = 0   ;
			mOutPortBeginPointer     = 0   ;
			mOutPortEndPointer       = -1  ;
			mOutPortTerminatedMask   = 0   ;
			mOutPortDataReady        = si_from_uint(false);
			mOutPortDataReadyPointer = -1  ;
			mOutPortDataReadyPort    = 0   ;
			mOutPortBufferAvail      = si_from_uint(true);
			mPendingPopMask          = 0   ;
			mInPortCount             = 0   ;
			mInPortBeginPointer      = 0   ;
			mInPortEndPointer        = -1  ;
			mInPortTerminatedMask    = 0   ;
			mInPortDataReady         = si_from_uint(false);
			mInPortDataReadyPointer  = -1  ;
			mInPortDataReadyPort     = 0   ;
			mPendingPushMask         = 0   ;
			mBookmarkId              = bookmarkId;
			mBufferUseMask[0] = mBufferUseMask[1] = 0u;

			SizeType offset = 0;
			if (CELL_DAISY_GLUE_USE_OUT_PORT_BUFFERS) {
				mOutPortBuffers =  (OutPortDataType *)(uintptr_t)&mBuffer[offset];
				offset += sizeof(OutPortDataType)*tOutPortBufferCount;
			}
			if (CELL_DAISY_GLUE_USE_IN_PORT_BUFFERS) {
				mInPortBuffers =  (InPortDataType *)(uintptr_t)&mBuffer[offset];
			}
			
			mOutPortBufferUseMask = &mBufferUseMask[0];
			if ((tMode == ONE_PORT) && (tInPort::sBufferMode == COPY)) {
				mInPortBufferUseMask = &mBufferUseMask[0];
			} else {
				mInPortBufferUseMask = &mBufferUseMask[1];
			}

			mPostCalculatedPointerQueue.initialize();
			mPostCalculatedQueue.initialize();
		}

		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
		Glue<tOutPort, tInPort, tMode, tOutPortBufferCount, tInPortBufferCount>::Glue(int bookmarkId)
		{
			initialize(bookmarkId);

			if (tMode == ONE_PORT) {
				cellDaisyAssert(sizeof(OutPortDataType) == sizeof(InPortDataType));
				cellDaisyAssert(tOutPort::sBufferMode == COPY |
								tInPort::sBufferMode == COPY);
				mOnePortCalculate = NULL;
			} else {
				mTwoPortCalculate = default_two_port_calculate;
			}
		}

		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
		Glue<tOutPort, tInPort, tMode, tOutPortBufferCount, tInPortBufferCount>::
		Glue(void (*callback)(typename tInPort::GlueDataType *, const typename tOutPort::GlueDataType *), int bookmarkId)
		{
			cellDaisyAssert(tMode == TWO_PORT);
			initialize(bookmarkId);
			
			mTwoPortCalculate = callback;

		}

		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
		Glue<tOutPort, tInPort, tMode, tOutPortBufferCount, tInPortBufferCount>::
		Glue(void (*callback)(typename tOutPort::GlueDataType *), int bookmarkId)
		{
			cellDaisyAssert(tMode == ONE_PORT);
			cellDaisyAssert(sizeof(OutPortDataType) == sizeof(InPortDataType));
			cellDaisyAssert(tOutPort::sBufferMode == COPY |
							tInPort::sBufferMode == COPY);
			initialize(bookmarkId);
			
			mOnePortCalculate = callback;
		}

		/* member function description(s) */

		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
//		CELL_DAISY_INLINE
			int Glue<tOutPort, tInPort, tMode, tOutPortBufferCount, tInPortBufferCount>::step(BlockMode mode)
		{

			int isActive = false;
			int isRetry = false;
			do {
				// begin out port loop
				if (__builtin_expect(tOutPort::sBufferMode == COPY |
									 !__CELL_DAISY_SPU_GET_VAL__(mOutPortDataReady), 1)) {
					BitmapType busyMask = 0;
					for(bool arbStat = true; arbStat; arbStat = proceedOutPortBegin(busyMask)) {
						int stat;
						if (tOutPort::sBufferMode == COPY) {
							// COPY
							PointerType      bufferPointer = 0; /* pointer to copy buffer */
							OutPortDataType *buffer = NULL; /* buffer to copy popped data */
							if (CELL_DAISY_GLUE_USE_OUT_PORT_BUFFERS) {
								bufferPointer = getAvailableBuffer(*mOutPortBufferUseMask);
								if (__builtin_expect(bufferPointer < (PointerType)tOutPortBufferCount, 1)) {
									CELL_DAISY_DEBUG_PRINTF("glue: mOutPortBuffers[%d] allocated\n",
															bufferPointer);
									buffer = &mOutPortBuffers[bufferPointer];
								}
							} else {
								if (__builtin_expect(__CELL_DAISY_SPU_GET_VAL__(mInPortDataReady), 1)) {
									buffer = (OutPortDataType *)mInPortData;
								}
							}
							if (__builtin_expect(buffer == NULL, 0)) {
								/* no buffer */
								__CELL_DAISY_SPU_SET_VAL__(mOutPortBufferAvail, false);
								break;
							}
							__CELL_DAISY_SPU_SET_VAL__(mOutPortBufferAvail, true);
							stat = mOutPort[mOutPortBeginPointer]->tryBeginPop(buffer);
						resumePop1:
							if (__builtin_expect(stat == CELL_OK, 1)) {
								clearBit(mPendingPopMask, mOutPortBeginPointer);
								CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_OUT, mOutPortBeginPointer);
								CELL_DAISY_DEBUG_PRINTF("glue: mOutPort[%d]->tryBeginPop(&mOutPortBuffers[%d])\n",
														mOutPortBeginPointer, bufferPointer);
								if (CELL_DAISY_GLUE_USE_OUT_PORT_BUFFERS) {
									*mOutPortBufferUseMask |= 1<<(31-bufferPointer);
								} else {
									__CELL_DAISY_SPU_SET_VAL__(mInPortDataReady, false);
								}
								mOutPortBufferPointerQueue[mOutPortBeginPointer].pushPtr(bufferPointer);
								mOutPortBufferQueue[mOutPortBeginPointer].pushPtr(buffer);
								mOutPortDataReadyPort = mOutPortBeginPointer;
								isActive = true;
								continue;
							} else if ((mode == STALL) && (tOutPort::sQueueControlType != QCTL_TYPE_LOCAL) && isRetry) {
								isRetry = false;
								// blocking
								CELL_DAISY_DEBUG_PRINTF("glue: >> STALL << mOutPort[%d]->beginPop(0x%p)\n",
														mOutPortBeginPointer, buffer);
								stat = mOutPort[mOutPortBeginPointer]->beginPop(buffer);
								if (__builtin_expect(stat == CELL_OK, 1)) {
									CELL_DAISY_DEBUG_PRINTF("glue: >> RESUME <<\n");
									goto resumePop1;
								} else {
									CELL_DAISY_DEBUG_PRINTF("glue:  STALL failed:0x%x\n", stat);
								}
							}
						} else {
							// REFERENCE
							stat = mOutPort[mOutPortBeginPointer]->tryBeginPop();
						resumePop2:
							if (__builtin_expect(stat == CELL_OK, 1)) {
								clearBit(mPendingPopMask, mOutPortBeginPointer);
								CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_OUT, mOutPortBeginPointer);
								CELL_DAISY_DEBUG_PRINTF("glue: mOutPort[%d]->tryBeginPop()\n", mOutPortBeginPointer);
								__CELL_DAISY_SPU_SET_VAL__(mOutPortDataReady, true);
								mOutPortData = (OutPortDataType *)(uintptr_t)mOutPort[mOutPortBeginPointer]
									->getCurrentReference();

								mOutPortDataReadyPort = mOutPortBeginPointer;
								proceedOutPortBegin(busyMask);
								isActive = true;
								break;
							} else if ((mode == STALL) && (tOutPort::sQueueControlType != QCTL_TYPE_LOCAL) && isRetry) {
								isRetry = false;
								// blocking
								CELL_DAISY_DEBUG_PRINTF("glue: >> STALL << mOutPort[%d]->beginPop()\n",
														mOutPortBeginPointer);
								stat = mOutPort[mOutPortBeginPointer]->beginPop();
								if (__builtin_expect(stat == CELL_OK, 1)) {
									CELL_DAISY_DEBUG_PRINTF("glue: >> RESUME <<\n");
									goto resumePop2;
								} else {
									CELL_DAISY_DEBUG_PRINTF("glue:  STALL failed:0x%x\n", stat);
								}
							}
						}
						if (!mOutPort[mOutPortBeginPointer]->hasPendingEntry()) {
							setBit(mPendingPopMask, mOutPortBeginPointer);
						}

						if (__builtin_expect((stat == TERMINATED) &&
											 !mOutPort[mOutPortBeginPointer]->hasPendingEntry() &&
											 !__CELL_DAISY_SPU_GET_VAL__(mOutPortDataReady), 0))
						{
							// mark OutPort[mOutPortBeginPointer] TERMINATED
							CELL_DAISY_DEBUG_PRINTF("glue: OutPort[%d] TERMINATED\n", mOutPortBeginPointer);
							setBit(mOutPortTerminatedMask, mOutPortBeginPointer);
						} else {
							// mark OutPort[mOutPortBeginPointer] BUSY
							setBit(busyMask, mOutPortBeginPointer);
						}
					}
				}

				if (__builtin_expect(!__CELL_DAISY_SPU_GET_VAL__(mOutPortDataReady), 1)) {
					// prepare OutPort buffer
					if (tOutPort::sBufferMode == COPY) {
						// COPY
						// end out port loop
						for(BitmapType busyMask=0; proceedOutPortEnd(busyMask); setBit(busyMask, mOutPortEndPointer)) {
							int stat = mOutPort[mOutPortEndPointer]->tryEndPop();
							if (__builtin_expect(stat == CELL_OK, 1)) {
								CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_OUT, MAX_PORTS+mOutPortEndPointer);
								__CELL_DAISY_SPU_SET_VAL__(mOutPortDataReady, true);
								mOutPortDataReadyPointer = mOutPortBufferPointerQueue[mOutPortEndPointer].popPtr();
								mOutPortData = mOutPortBufferQueue[mOutPortEndPointer].popPtr();
								CELL_DAISY_DEBUG_PRINTF("glue: mOutPort[%d]->tryEndPop() pointer=%d\n",
														mOutPortEndPointer , mOutPortDataReadyPointer);
								isActive = true;
								break;
							}
						}
					}
				}
						
				if (__builtin_expect(!__CELL_DAISY_SPU_GET_VAL__(mInPortDataReady) && tMode == TWO_PORT, 1)) {
					// prepare InPort buffer
					PointerType bufferPointer;
					if (CELL_DAISY_GLUE_USE_IN_PORT_BUFFERS) {
						bufferPointer = getAvailableBuffer(*mInPortBufferUseMask);
						if (__builtin_expect(bufferPointer < (PointerType)tInPortBufferCount, 1)) {
							*mInPortBufferUseMask |= 1<<(31-bufferPointer);

							__CELL_DAISY_SPU_SET_VAL__(mInPortDataReady, true);
							mInPortDataReadyPointer = bufferPointer;
							mInPortData = &mInPortBuffers[mInPortDataReadyPointer];
							CELL_DAISY_DEBUG_PRINTF("glue: mInPortBuffers[%d] allocated\n", bufferPointer);
							isActive = true;
						}
					}
				}

				// calculate
				if (__builtin_expect(canCalculate(), 1)) {
					if (tMode == TWO_PORT) {
						CELL_DAISY_DEBUG_PRINTF("glue: calculate(0x%p,0x%p)\n",	mInPortData, mOutPortData);
						(*mTwoPortCalculate)((typename tInPort::GlueDataType *)(uintptr_t)mInPortData,
											 (typename tOutPort::GlueDataType *)(uintptr_t)mOutPortData);
						CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_OUT, MAX_PORTS*2);
						if (tInPort::sBufferMode == COPY) {
							mPostCalculatedPointerQueue.pushPtr(mInPortDataReadyPointer);
							mPostCalculatedQueue.pushPtr(mInPortData);
						}
					
						__CELL_DAISY_SPU_SET_VAL__(mOutPortDataReady, false);
						__CELL_DAISY_SPU_SET_VAL__(mInPortDataReady, false);
					
						if (tOutPort::sBufferMode == COPY) {
							*mOutPortBufferUseMask &= ~(1<<(31-mOutPortDataReadyPointer)); // reset buffer use mask
						} else {
							mOutPort[mOutPortDataReadyPort]->endPop();
							CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_OUT, MAX_PORTS+mOutPortDataReadyPort);
							CELL_DAISY_DEBUG_PRINTF("glue: mOutPort[%d]->endPop()\n", mOutPortDataReadyPort);
						}
						if (tInPort::sBufferMode == REFERENCE) {
							mInPort[mInPortDataReadyPort]->endPush();
							CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_IN, MAX_PORTS+mInPortDataReadyPort);
							CELL_DAISY_DEBUG_PRINTF("glue: mInPort[%d]->endPush()\n", mInPortDataReadyPort);
						}
					} else {
						if (mOnePortCalculate != NULL) {
							CELL_DAISY_DEBUG_PRINTF("glue: calculate(0x%p)\n",	mInPortData);
							(*mOnePortCalculate)((typename tOutPort::GlueDataType *)(uintptr_t)mOutPortData);
							CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_OUT, MAX_PORTS*2);
						}
						__CELL_DAISY_SPU_SET_VAL__(mOutPortDataReady, false);

						if (tInPort::sBufferMode == REFERENCE) {
							mInPort[mInPortDataReadyPort]->endPush();
							CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_IN, MAX_PORTS+mInPortDataReadyPort);
							CELL_DAISY_DEBUG_PRINTF("glue: mInPort[%d]->endPush()\n", mInPortDataReadyPort);
							__CELL_DAISY_SPU_SET_VAL__(mInPortDataReady, false);
						} else {
							mPostCalculatedPointerQueue.pushPtr(mOutPortDataReadyPointer);
							mPostCalculatedQueue.pushPtr((InPortDataType *)mOutPortData);
						}
					}
					isActive = true;
				}

				if (__builtin_expect(isFull(mOutPortTerminatedMask, mOutPortCount) &&
									 mPostCalculatedQueue.isEmpty(), 0)) {
					for(uint32_t i=0; i<mInPortCount; i++) { 
						if (tInPort::sBufferMode == REFERENCE &&
							mInPort[i]->hasPendingEntry())
						{
							mInPort[i]->cancelPush();
							CELL_DAISY_DEBUG_PRINTF("glue: mInPort[%d]->cancelPush()\n", i);
						}
						if (!mInPort[i]->hasPendingEntry()) {
							CELL_DAISY_DEBUG_PRINTF("glue: terminate mInPort[%d]\n", i);
						
							mInPort[i]->terminate();
							setBit(mInPortTerminatedMask, i);
						}
					}
					isActive = true;
				} else if (__builtin_expect(tInPort::sBufferMode == COPY | !__CELL_DAISY_SPU_GET_VAL__(mInPortDataReady), 1)) {
					// begin in port loop
					BitmapType busyMask = 0;
					for(bool arbStat=true; arbStat; arbStat = proceedInPortBegin(busyMask,
																				 (tInPort::sBufferMode == COPY) ?
																				 mPostCalculatedQueue.getFront() : 0)) {
						if (tInPort::sBufferMode == COPY) {
							// COPY
							if (__builtin_expect(!mPostCalculatedPointerQueue.isEmpty(), 1)) {
								InPortDataType *buffer = mPostCalculatedQueue.getFront(); /* buffer to pe pushed */
								int stat = mInPort[mInPortBeginPointer]->tryBeginPush(buffer);
							resumePush1:
								if (__builtin_expect(stat == CELL_OK, 1)) {
									clearBit(mPendingPushMask, mInPortBeginPointer);
									CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_IN, mInPortBeginPointer);
									CELL_DAISY_DEBUG_PRINTF("glue: mInPort[%d]->tryBeginPush(0x%p)\n",
															mInPortBeginPointer, buffer);
									mInPortBufferPointerQueue[mInPortBeginPointer].pushPtr(
										mPostCalculatedPointerQueue.popPtr()
										);
									mInPortBufferQueue[mInPortBeginPointer].pushPtr(
										mPostCalculatedQueue.popPtr()
										);
									mInPortDataReadyPort = mInPortBeginPointer;
									isActive = true;
									continue;
								} else if ((mode == STALL) && (tInPort::sQueueControlType != QCTL_TYPE_LOCAL) && isRetry) {
									isRetry = false;
									// blocking
									CELL_DAISY_DEBUG_PRINTF("glue: >> STALL << mInPort[%d]->beginPush(0x%p)\n",
															mInPortBeginPointer, buffer);
									mInPort[mInPortBeginPointer]->beginPush(buffer);
									CELL_DAISY_DEBUG_PRINTF("glue: >> RESUME <<\n");
									stat = CELL_OK;
									goto resumePush1;
								}
							} else {
								/* no calculated data */
								break;
							}
						} else {
							// REFERENCE
							int stat = mInPort[mInPortBeginPointer]->tryBeginPush();
						resumePush2:
							if (__builtin_expect(stat == CELL_OK, 1)) {
								clearBit(mPendingPushMask, mInPortBeginPointer);
								CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_IN, mInPortBeginPointer);
								CELL_DAISY_DEBUG_PRINTF("glue: mInPort[%d]->tryBeginPush()\n", mInPortBeginPointer);
								__CELL_DAISY_SPU_SET_VAL__(mInPortDataReady, true);
								mInPortData = (InPortDataType *)(uintptr_t)mInPort[mInPortBeginPointer]->getCurrentReference();

								mInPortDataReadyPort = mInPortBeginPointer;
								proceedInPortBegin(busyMask, 0);
								isActive = true;
								break;
							} else if ((mode == STALL) && (tInPort::sQueueControlType != QCTL_TYPE_LOCAL) && isRetry) {
								isRetry = false;
								// blocking
								CELL_DAISY_DEBUG_PRINTF("glue: >> STALL << mInPort[%d]->beginPush()\n",
														mInPortBeginPointer);
								mInPort[mInPortBeginPointer]->beginPush();
								CELL_DAISY_DEBUG_PRINTF("glue: >> RESUME <<\n");
								stat = CELL_OK;
								goto resumePush2;
							}
						}
						setBit(busyMask, mInPortBeginPointer);
						if (!mInPort[mInPortBeginPointer]->hasPendingEntry()) {
							setBit(mPendingPushMask, mInPortBeginPointer);
						}
					}
				}

				if (tInPort::sBufferMode == COPY) {
					// end in port loop
					for(uint32_t i=0; i<mInPortCount; i++) {
						while(mInPort[i]->tryEndPush() == CELL_OK) {
							CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_IN, MAX_PORTS+i);
							PointerType bufferPointer = mInPortBufferPointerQueue[i].popPtr();
							CELL_DAISY_DEBUG_PRINTF("glue: mInPort[%d]->tryEndPush() pointer = %d\n",
													i, bufferPointer);
							if (tMode == ONE_PORT && tOutPort::sBufferMode == REFERENCE) {
								// only 1-to-1 connection is allowed
								mOutPort[0]->endPop();
								CELL_DAISY_BOOKMARK_CLASS(CELL_DAISY_BOOKMARK_PREFIX_GLUE_OUT, MAX_PORTS+0);
								CELL_DAISY_DEBUG_PRINTF("glue: mInPort[0]->endPop()\n");
							}
							*mInPortBufferUseMask &= ~(1<<(31-bufferPointer)); // reset buffer use mask
							isActive = true;
						}
					}
				}
				isRetry = !isActive;
				if (isRetry && (mode == STALL)) {
					CELL_DAISY_DEBUG_PRINTF("glue: retry\n");
				}
			} while(!isActive && (mode == STALL));

//			cellDaisyAssert(this->mInPortCount > 0);
			return (!isActive) ? QUEUE_IS_BUSY :
				(isFull(mInPortTerminatedMask, this->mInPortCount) ? TERMINATED : CELL_OK);
		}

		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
		CELL_DAISY_INLINE
			bool Glue<tOutPort, tInPort, tMode,
					  tOutPortBufferCount, tInPortBufferCount>::proceedOutPortBegin(BitmapType busyMask)
		{
			uint32_t i=0;
			do {
				if (++i > mOutPortCount) return false;
				proceed(mOutPortBeginPointer, mOutPortCount, 0);
			} while(__builtin_expect((busyMask & (1<<mOutPortBeginPointer)) != 0 |
									 (mOutPortTerminatedMask & (1<<mOutPortBeginPointer)) != 0, 0));
			return true;
		}

		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
		CELL_DAISY_INLINE
			bool Glue<tOutPort, tInPort, tMode,
					  tOutPortBufferCount, tInPortBufferCount>::proceedOutPortEnd(BitmapType busyMask)
		{
			uint32_t i=0;
			do {
				if (++i > mOutPortCount) return false;
				proceed(mOutPortEndPointer, mOutPortCount, 0);
			} while(__builtin_expect((busyMask & (1<<mOutPortEndPointer)) != 0 |
									 (mOutPortTerminatedMask & (1<<mOutPortEndPointer)) != 0, 0));
			return true;
		}

		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
		CELL_DAISY_INLINE
			bool Glue<tOutPort, tInPort, tMode,
					  tOutPortBufferCount, tInPortBufferCount>::proceedInPortBegin(BitmapType busyMask,
																				   const InPortDataType *data)
		{
			uint32_t i=0;
			do {
				if (++i > mInPortCount) return false;
				proceed(mInPortBeginPointer, mInPortCount, data);
			} while(__builtin_expect((busyMask & (1<<mInPortBeginPointer)) != 0 |
									 (mInPortTerminatedMask & (1<<mInPortBeginPointer)) != 0, 0));
			return true;
		}

		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
		CELL_DAISY_INLINE
			bool Glue<tOutPort, tInPort, tMode,
					  tOutPortBufferCount, tInPortBufferCount>::proceedInPortEnd(BitmapType busyMask)
		{
			uint32_t i=0;
			do {
				if (++i > mInPortCount) return false;
				proceed(mInPortEndPointer, mInPortCount, 0);
			} while(__builtin_expect((busyMask & (1<<mInPortEndPointer)) != 0 |
									 (mInPortTerminatedMask & (1<<mInPortEndPointer)) != 0, 0));
			return true;
		}

		template<class tOutPort, class tInPort, GlueMode tMode,
				 SizeType tOutPortBufferCount, SizeType tInPortBufferCount>
		CELL_DAISY_INLINE
			void Glue<tOutPort, tInPort, tMode,
					  tOutPortBufferCount, tInPortBufferCount>::proceed(PointerType& pointer,
																		const SizeType size, const InPortDataType *data)
		{
			(void)data;
			pointer++;
			if (__builtin_expect(pointer == (PointerType)size, 0)) {
				pointer = 0;
			}
		}

	} /* namespace Daisy */
} /* namespace cell */

#endif /* __CELL_DAISY_PIPE_H__ */

/*
 * Local Variables:
 * mode:C++
 * tab-width:4
 * End:
 * vim:ts=4:sw=4:
 */
