/*   SCE CONFIDENTIAL                                       */
/*   PLAYSTATION(R)3 Programmer Tool Runtime Library 154.001 */
/*   Copyright (C) 2006 Sony Computer Entertainment Inc.    */
/*   All Rights Reserved.                                   */

CELL_GCM_DECL void CELL_GCM_FUNC(SetReferenceCommand)(CELL_GCM_ARGS(uint32_t ref))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_REFERENCE(CELL_GCM_CURRENT, ref);
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetJumpCommand)(CELL_GCM_ARGS(uint32_t offset))
{
	CELL_GCM_ASSERT((offset & 3) == 0);

	CELL_GCM_RESERVE(1);
#if CELL_GCM_MEASURE
	CELL_GCM_UNUSED(offset);
#else
	CELL_GCM_CURRENT[0] = CELL_GCM_JUMP(offset);
	CELL_GCM_CURRENT += 1;
#endif
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetCallCommand)(CELL_GCM_ARGS(uint32_t offset))
{
	CELL_GCM_ASSERT((offset&3) == 0);

	CELL_GCM_RESERVE(1);
#if CELL_GCM_MEASURE
	CELL_GCM_UNUSED(offset);
#else
	CELL_GCM_CURRENT[0] = CELL_GCM_CALL(offset);
	CELL_GCM_CURRENT += 1;
#endif
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetReturnCommand)(CELL_GCM_NO_ARGS())
{
	CELL_GCM_RESERVE(1);
#if (CELL_GCM_MEASURE==0)
	CELL_GCM_CURRENT[0] = CELL_GCM_RETURN();
	CELL_GCM_CURRENT += 1;
#endif
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetNopCommand)(CELL_GCM_ARGS(uint32_t count))
{
	CELL_GCM_ASSERT(count <= 1024);
	CELL_GCM_RESERVE(count);
#if (CELL_GCM_MEASURE==0)
	uint32_t i;
	for(i=0;i<count;i++){
		CELL_GCM_CURRENT[i] = 0;
	}
	CELL_GCM_CURRENT += count;
#endif
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetSurface)(CELL_GCM_ARGS(const CellGcmSurface *surface))
{
	static const uint32_t s_cell_gcm_handles[2] = {0xfeed0000, 0xfeed0001};

	CELL_GCM_RESERVE(32);

	// alignment restriction, SET_SURFACE_COLOR_XOFFSET, SET_SURFACE_ZETA_OFFSET
	// need to be 64Byte alignment
	CELL_GCM_ASSERT((surface->colorOffset[0] & 63) == 0);
	CELL_GCM_ASSERT((surface->colorOffset[1] & 63) == 0);
	CELL_GCM_ASSERT((surface->colorOffset[2] & 63) == 0);
	CELL_GCM_ASSERT((surface->colorOffset[3] & 63) == 0);
	CELL_GCM_ASSERT((surface->depthOffset & 63) == 0);

	// alignment restriction, SET_SURFACE_PITCH_X needs to be 64byte alignment
	CELL_GCM_ASSERT((surface->colorPitch[0] & 63) == 0);
	CELL_GCM_ASSERT((surface->colorPitch[1] & 63) == 0);
	CELL_GCM_ASSERT((surface->colorPitch[2] & 63) == 0);
	CELL_GCM_ASSERT((surface->colorPitch[3] & 63) == 0);
	CELL_GCM_ASSERT((surface->depthPitch & 63) == 0);

	// pitch >= 64 && pitch < 128K
	CELL_GCM_ASSERT((surface->colorPitch[0] >= 64) && (surface->colorPitch[0] < 128*1024));
	CELL_GCM_ASSERT((surface->colorPitch[1] >= 64) && (surface->colorPitch[1] < 128*1024));
	CELL_GCM_ASSERT((surface->colorPitch[2] >= 64) && (surface->colorPitch[2] < 128*1024));
	CELL_GCM_ASSERT((surface->colorPitch[3] >= 64) && (surface->colorPitch[3] < 128*1024));
	CELL_GCM_ASSERT((surface->depthPitch >= 64) && (surface->depthPitch < 128*1024));


	// Set Context Dma
	CELL_GCM_SET_CONTEXT_DMA_COLOR_A(CELL_GCM_CURRENT, 
		s_cell_gcm_handles[surface->colorLocation[0]]);
	CELL_GCM_SET_CONTEXT_DMA_COLOR_B(CELL_GCM_CURRENT, 
		s_cell_gcm_handles[surface->colorLocation[1]]);
	CELL_GCM_SET_CONTEXT_DMA_COLOR_C_AND_D(CELL_GCM_CURRENT, 
		s_cell_gcm_handles[surface->colorLocation[2]], 
		s_cell_gcm_handles[surface->colorLocation[3]]);
	CELL_GCM_SET_CONTEXT_DMA_ZETA(CELL_GCM_CURRENT,
		s_cell_gcm_handles[surface->depthLocation]);


	// buffer format, color and z buffers
#ifdef __SPU__
	uint32_t log2Width = 31 - ({__asm__("clz %0,%1" : "=r" (log2Width) : "r" (surface->width)); log2Width;});
	uint32_t log2Height = 31 - ({__asm__("clz %0,%1" : "=r" (log2Height) : "r" (surface->height)); log2Height;});
#else
	uint32_t log2Width = 31 - ({__asm__("cntlzw %0,%1" : "=r" (log2Width) : "r" (surface->width)); log2Width;});
	uint32_t log2Height = 31 - ({__asm__("cntlzw %0,%1" : "=r" (log2Height) : "r" (surface->height)); log2Height;});
#endif
	CELL_GCM_SET_SURFACE_FORMAT_AND_PITCH_A_AND_COLOR_AOFFSET_AND_ZETA_OFFSET_AND_COLOR_BOFFSET_AND_PITCH_B(CELL_GCM_CURRENT, 
		surface->colorFormat, surface->depthFormat, surface->antialias, surface->type, log2Width, log2Height,
		surface->colorPitch[0],
		surface->colorOffset[0],
		surface->depthOffset,
		surface->colorOffset[1],
		surface->colorPitch[1]);
	CELL_GCM_SET_SURFACE_PITCH_Z(CELL_GCM_CURRENT, 
		surface->depthPitch);
	CELL_GCM_SET_SURFACE_PITCH_C_AND_PITCH_D_AND_COLOR_COFFSET_AND_COLOR_DOFFSET(CELL_GCM_CURRENT, 
		surface->colorPitch[2],
		surface->colorPitch[3],
		surface->colorOffset[2],
		surface->colorOffset[3]);
	CELL_GCM_SET_SURFACE_COLOR_TARGET(CELL_GCM_CURRENT, 
		surface->colorTarget);

	// Set the window origin
	CELL_GCM_SET_WINDOW_OFFSET(CELL_GCM_CURRENT, 
		surface->x, surface->y);

	// surface clips - hw expects origin/size values
	CELL_GCM_SET_SURFACE_CLIP_HORIZONTAL_AND_VERTICAL(CELL_GCM_CURRENT, 
		surface->x, surface->width,
		surface->y, surface->height);

	// allow fp's to know x/y position (yInverted)
	CELL_GCM_SET_SHADER_WINDOW(CELL_GCM_CURRENT, 
		surface->height, CELL_GCM_SHADER_WINDOW_ORIGIN_AT_BOTTOM, CELL_GCM_SHADER_WINDOW_PIXEL_CENTERS_AT_HALF_INTEGERS);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetAntiAliasingControl)(CELL_GCM_ARGS(
	uint32_t enable, uint32_t alphaToCoverage, uint32_t alphaToOne, uint32_t sampleMask))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_ANTI_ALIASING_CONTROL(CELL_GCM_CURRENT, 
		enable, alphaToCoverage, alphaToOne, sampleMask);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetWaitLabel)(CELL_GCM_ARGS(uint8_t index, uint32_t value))
{
	CELL_GCM_RESERVE(4);

	uint32_t offset = 0x10 * index;
	CELL_GCM_SEMAPHORE_OFFSET(CELL_GCM_CURRENT, offset);
	CELL_GCM_SEMAPHORE_ACQUIRE(CELL_GCM_CURRENT, value);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetWriteCommandLabel)(CELL_GCM_ARGS(
	uint8_t index, uint32_t value))
{
	CELL_GCM_RESERVE(4);

	uint32_t offset = 0x10 * index;
	CELL_GCM_SEMAPHORE_OFFSET(CELL_GCM_CURRENT, offset);
	CELL_GCM_SEMAPHORE_RELEASE(CELL_GCM_CURRENT, value);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetWriteBackEndLabel)(CELL_GCM_ARGS(uint8_t index, uint32_t value))
{
	CELL_GCM_RESERVE(4);

	// swap byte 0 and 2
	uint32_t war_value = value;
	war_value = ( war_value & 0xff00ff00)
		| ((war_value >> 16) & 0xff)
		| (((war_value >> 0 ) & 0xff) << 16);

	uint32_t offset = 0x10 * index;
	CELL_GCM_SET_SEMAPHORE_OFFSET(CELL_GCM_CURRENT, offset);
	CELL_GCM_BACK_END_WRITE_SEMAPHORE_RELEASE(CELL_GCM_CURRENT, war_value);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetWriteTextureLabel)(CELL_GCM_ARGS(uint8_t index, uint32_t value))
{
	CELL_GCM_RESERVE(4);

	uint32_t offset = 0x10 * index;
	CELL_GCM_SET_SEMAPHORE_OFFSET(CELL_GCM_CURRENT, offset);
	CELL_GCM_TEXTURE_READ_SEMAPHORE_RELEASE(CELL_GCM_CURRENT, value);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTimeStamp)(CELL_GCM_ARGS(uint32_t index))
{
	CELL_GCM_ASSERT(index < CELL_GCM_GET_REPORT_PERF_COUNT); 

	uint32_t offset = 0x10 * index;
	CELL_GCM_RESERVE(2);
	CELL_GCM_GET_REPORT(CELL_GCM_CURRENT, 1, offset);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetInvalidateZcull)(CELL_GCM_NO_ARGS())
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_INVALIDATE_ZCULL(CELL_GCM_CURRENT);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetAlphaFunc)(CELL_GCM_ARGS(uint32_t af, uint32_t ref))
{
	CELL_GCM_RESERVE(3);
	CELL_GCM_SET_ALPHA_FUNC_AND_REF(CELL_GCM_CURRENT, af, ref);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBlendColor)(CELL_GCM_ARGS(
	uint32_t color, uint32_t color2))
{
	CELL_GCM_RESERVE(4);
	CELL_GCM_SET_BLEND_COLOR(CELL_GCM_CURRENT, color);
	CELL_GCM_SET_BLEND_COLOR_2(CELL_GCM_CURRENT, color2);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBlendEquation)(CELL_GCM_ARGS(
	uint16_t color, uint16_t alpha))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_BLEND_EQUATION(CELL_GCM_CURRENT, color, alpha);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBlendFunc)(CELL_GCM_ARGS(uint16_t sfcolor, 
	uint16_t dfcolor, uint16_t sfalpha, uint16_t dfalpha))
{
	CELL_GCM_RESERVE(3);
	CELL_GCM_SET_BLEND_FUNC_SRC_AND_DST(CELL_GCM_CURRENT, 
		sfcolor, sfalpha, dfcolor, dfalpha);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetClearSurface)(CELL_GCM_ARGS(uint32_t mask))
{
	CELL_GCM_RESERVE(4);
	CELL_GCM_SET_CLEAR_SURFACE(CELL_GCM_CURRENT, mask);
	CELL_GCM_NOP(CELL_GCM_CURRENT);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetClearColor)(CELL_GCM_ARGS(uint32_t color))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_CLEAR_COLOR(CELL_GCM_CURRENT, color);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetClearDepthStencil)(CELL_GCM_ARGS(uint32_t value))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_CLEAR_DEPTH_STENCIL(CELL_GCM_CURRENT, value);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetColorMask)(CELL_GCM_ARGS(uint32_t mask))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_COLOR_MASK(CELL_GCM_CURRENT, mask);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetColorMaskMrt)(CELL_GCM_ARGS(uint32_t mask))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_COLOR_MASK_MRT(CELL_GCM_CURRENT, mask);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetCullFace)(CELL_GCM_ARGS(uint32_t cfm))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_CULL_FACE(CELL_GCM_CURRENT, cfm);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDepthBounds)(CELL_GCM_ARGS(float zmin, float zmax))
{
	CellGcmCast d0,d1;
	d0.f = zmin;
	d1.f = zmax;

	CELL_GCM_RESERVE(3);
	CELL_GCM_SET_DEPTH_BOUNDS_MIN_AND_MAX(CELL_GCM_CURRENT, d0.u, d1.u);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDepthFunc)(CELL_GCM_ARGS(uint32_t zf))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_DEPTH_FUNC(CELL_GCM_CURRENT, zf);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDepthMask)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_DEPTH_MASK(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetFrontFace)(CELL_GCM_ARGS(uint32_t dir))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_FRONT_FACE(CELL_GCM_CURRENT, dir);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetLineWidth)(CELL_GCM_ARGS(uint32_t width))
{
	CELL_GCM_RESERVE(2);

#ifdef CELL_GCM_BITFIELD
	CELL_GCM_SET_LINE_WIDTH(CELL_GCM_CURRENT, width & 0x1ff);
#else
	CELL_GCM_SET_LINE_WIDTH(CELL_GCM_CURRENT, width);
#endif

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetLineSmoothEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_LINE_SMOOTH_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetLineStippleEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_LINE_STIPPLE_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetLineStipplePattern)(CELL_GCM_ARGS(
	const uint16_t* pattern, uint8_t factor))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_LINE_STIPPLE_PATTERN(CELL_GCM_CURRENT, *pattern, factor);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetLogicOp)(CELL_GCM_ARGS(uint32_t op))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_LOGIC_OP(CELL_GCM_CURRENT, op);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetPointSize)(CELL_GCM_ARGS(float size))
{
	CellGcmCast d;
	d.f = size;

	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_POINT_SIZE(CELL_GCM_CURRENT, d.u);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetPolygonOffset)(CELL_GCM_ARGS(float factor, float units))
{
	CellGcmCast d0,d1;
	d0.f = factor;
	d1.f = units;

	CELL_GCM_RESERVE(3);
	CELL_GCM_SET_POLYGON_OFFSET_SCALE_FACTOR_AND_BIAS(CELL_GCM_CURRENT, 
		d0.u, d1.u);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetPolySmoothEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_POLY_SMOOTH_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetPolygonStippleEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_POLYGON_STIPPLE_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetPolygonStipplePattern)(CELL_GCM_ARGS(const uint32_t* pattern))
{
	CELL_GCM_RESERVE(33);
	CELL_GCM_CURRENT[0] = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_POLYGON_STIPPLE_PATTERN, 32);
	CELL_GCM_MEMCPY(&CELL_GCM_CURRENT[1], pattern, sizeof(uint32_t)*32);
	CELL_GCM_CURRENT += 33;

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetFrontPolygonMode)(CELL_GCM_ARGS(uint32_t mode))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_FRONT_POLYGON_MODE(CELL_GCM_CURRENT, mode);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBackPolygonMode)(CELL_GCM_ARGS(uint32_t mode))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_BACK_POLYGON_MODE(CELL_GCM_CURRENT, mode);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetScissor)(CELL_GCM_ARGS(uint16_t x, uint16_t y, uint16_t w, uint16_t h))
{
	CELL_GCM_RESERVE(3);
	CELL_GCM_SET_SCISSOR_LEFT_WIDTH_AND_TOP_HEIGHT(CELL_GCM_CURRENT, 
		x, w, y, h);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetShadeMode)(CELL_GCM_ARGS(uint32_t sm))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_SHADE_MODE(CELL_GCM_CURRENT, sm);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTwoSideLightEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_TWO_SIDE_LIGHT_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetStencilFunc)(CELL_GCM_ARGS(uint32_t func, 
	int32_t ref, uint32_t mask))
{
	CELL_GCM_RESERVE(4);
	CELL_GCM_SET_STENCIL_FUNC_AND_REF_AND_MASK(CELL_GCM_CURRENT, 
		func, ref, mask);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBackStencilFunc)(CELL_GCM_ARGS(uint32_t func, 
	int32_t ref, uint32_t mask))
{
	CELL_GCM_RESERVE(4);
	CELL_GCM_SET_BACK_STENCIL_FUNC_AND_REF_AND_MASK(CELL_GCM_CURRENT, 
		func, ref, mask);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetStencilMask)(CELL_GCM_ARGS(uint32_t sm))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_STENCIL_MASK(CELL_GCM_CURRENT, sm);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBackStencilMask)(CELL_GCM_ARGS(uint32_t sm))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_BACK_STENCIL_MASK(CELL_GCM_CURRENT, sm);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetStencilOp)(CELL_GCM_ARGS(uint32_t fail, 
	uint32_t depthFail, uint32_t depthPass))
{
	CELL_GCM_RESERVE(4);
	CELL_GCM_SET_STENCIL_OP_FAIL_AND_DEPTHFAIL_AND_DEPTHPASS(CELL_GCM_CURRENT, 
		fail, depthFail, depthPass);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBackStencilOp)(CELL_GCM_ARGS(uint32_t fail, 
	uint32_t depthFail, uint32_t depthPass))
{
	CELL_GCM_RESERVE(4);
	CELL_GCM_SET_BACK_STENCIL_OP_FAIL_AND_DEPTHFAIL_AND_DEPTHPASS(CELL_GCM_CURRENT, 
		fail, depthFail, depthPass);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetZMinMaxControl)(CELL_GCM_ARGS(
	const uint32_t cullNearFarEnable, const uint32_t zclampEnable, const uint32_t cullIgnoreW))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_ZMIN_MAX_CONTROL(CELL_GCM_CURRENT, cullNearFarEnable, zclampEnable, cullIgnoreW);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetViewport)(CELL_GCM_ARGS(uint16_t x, 
	uint16_t y, uint16_t w, uint16_t h, float min, float max, const float scale[4], 
	const float offset[4]))
{
	CellGcmCast d0,d1;
	d0.f = min;
	d1.f = max;

	CellGcmCast o[4],s[4];
	o[0].f = offset[0];
	o[1].f = offset[1];
	o[2].f = offset[2];
	o[3].f = offset[3];

	s[0].f = scale[0];
	s[1].f = scale[1];
	s[2].f = scale[2];
	s[3].f = scale[3];

	CELL_GCM_RESERVE(24);
	CELL_GCM_SET_VIEWPORT_LEFT_WIDTH_AND_TOP_HEIGHT(CELL_GCM_CURRENT,
		x, w, y, h);
	CELL_GCM_SET_CLIP_MIN_AND_MAX(CELL_GCM_CURRENT,
		d0.u, d1.u);
	CELL_GCM_SET_VIEWPORT_OFFSET_AND_SCALE(CELL_GCM_CURRENT,
		o[0].u, o[1].u, o[2].u, o[3].u, s[0].u, s[1].u, s[2].u, s[3].u);
	CELL_GCM_SET_VIEWPORT_OFFSET_AND_SCALE(CELL_GCM_CURRENT,
		o[0].u, o[1].u, o[2].u, o[3].u, s[0].u, s[1].u, s[2].u, s[3].u);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetAlphaTestEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_ALPHA_TEST_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBlendEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_BLEND_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBlendEnableMrt)(CELL_GCM_ARGS(uint32_t mrt1, 
	uint32_t mrt2, uint32_t mrt3))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_BLEND_ENABLE_MRT(CELL_GCM_CURRENT, 
		mrt1, mrt2, mrt3);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetLogicOpEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_LOGIC_OP_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetCullFaceEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_CULL_FACE_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDepthBoundsTestEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_DEPTH_BOUNDS_TEST_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDepthTestEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_DEPTH_TEST_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDitherEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_DITHER_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetStencilTestEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_STENCIL_TEST_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTwoSidedStencilTestEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_TWO_SIDED_STENCIL_TEST_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetPolygonOffsetFillEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_POLYGON_OFFSET_FILL_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetRestartIndexEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_RESTART_INDEX_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetPointSpriteControl)(CELL_GCM_ARGS(
	uint32_t enable, uint32_t rmode, uint32_t texcoord))
{
	CELL_GCM_RESERVE(4);
	CELL_GCM_SET_POINT_PARAMS_ENABLE(CELL_GCM_CURRENT, 
		enable);
	CELL_GCM_SET_POINT_SPRITE_CONTROL(CELL_GCM_CURRENT, 
		enable, rmode, texcoord);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetInvalidateTextureCache)(CELL_GCM_ARGS(uint32_t value))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_INVALIDATE_TEXTURE_CACHE(CELL_GCM_CURRENT, value);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTextureBorderColor)(CELL_GCM_ARGS(
	uint8_t index, uint32_t color))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_TEXTURE_BORDER_COLOR(CELL_GCM_CURRENT, index, color);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTextureFilter)(CELL_GCM_ARGS(uint8_t index, 
	uint16_t bias, uint8_t min, uint8_t mag, uint8_t conv))
{
	CELL_GCM_RESERVE(2);

#ifdef	CELL_GCM_BITFIELD
	CELL_GCM_SET_TEXTURE_FILTER(CELL_GCM_CURRENT, index, bias & 0x1fff, min, mag, conv);
#else
	CELL_GCM_SET_TEXTURE_FILTER(CELL_GCM_CURRENT, index, bias, min, mag, conv);
#endif

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTextureAddress)(CELL_GCM_ARGS(uint8_t index, 
	uint8_t wraps, uint8_t wrapt, uint8_t wrapr, uint8_t unsignedRemap, 
	uint8_t zfunc, uint8_t gamma))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_TEXTURE_ADDRESS(CELL_GCM_CURRENT, 
		index, wraps, wrapt, wrapr, unsignedRemap, zfunc, gamma);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTextureControl)(CELL_GCM_ARGS(uint8_t index, 
	uint32_t enable, uint16_t minlod, uint16_t maxlod, uint8_t maxaniso))
{
	CELL_GCM_RESERVE(2);

#ifdef CELL_GCM_BITFIELD
	CELL_GCM_SET_TEXTURE_CONTROL(CELL_GCM_CURRENT, index, enable, minlod & 0xfff, maxlod & 0xfff, maxaniso);
#else
	CELL_GCM_SET_TEXTURE_CONTROL(CELL_GCM_CURRENT, index, enable, minlod, maxlod, maxaniso);
#endif

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTextureOptimization)(CELL_GCM_ARGS(uint8_t index, const uint8_t slope, const uint8_t iso, const uint8_t aniso))
{
	CELL_GCM_RESERVE(2);
	uint32_t value = (slope) | (iso<<6) | (aniso<<7) | (0x2d<<8);
	CELL_GCM_SET_TEXTURE_CONTROL_3(CELL_GCM_CURRENT, index, value);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetCylindricalWrap)(CELL_GCM_ARGS(uint32_t enable, uint32_t reserved __attribute__((unused))))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_CYLINDRICAL_WRAP(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTexture)(CELL_GCM_ARGS(uint8_t index, 
	const CellGcmTexture *texture))
{
	uint32_t format, offset, control1, control3, imagerect;

	CELL_GCM_RESERVE(9);

	/*	set all paramters	*/
	offset = texture->offset;
	format = 0x00000008 | (texture->location + 1) | (texture->cubemap << 2) 
		| (texture->dimension << 4) | (texture->format << 8) 
		| (texture->mipmap << 16);
	imagerect = texture->height | (texture->width << 16);
	control1 = texture->remap;
	control3 = texture->pitch | (texture->depth << 20);

	CELL_GCM_SET_TEXTURE_OFFSET_AND_FORMAT(CELL_GCM_CURRENT, index, 
		offset, 
		format);
	CELL_GCM_SET_TEXTURE_SIZE1(CELL_GCM_CURRENT, index, 
		imagerect);
	CELL_GCM_SET_TEXTURE_SIZE2(CELL_GCM_CURRENT, index, 
		control3);
	CELL_GCM_SET_TEXTURE_SWIZZLE(CELL_GCM_CURRENT, index, 
		control1);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDrawArrays)(CELL_GCM_ARGS(uint8_t mode, 
	uint32_t first, uint32_t count))
{
	// parameter check
	CELL_GCM_ASSERT(count > 0);
	CELL_GCM_ASSERT(first < 0xfffff);

	uint32_t lcount;

	--count;
	lcount = count & 0xff;
	count >>= 8;

	// hw limit 0x7ff loop batches, if count > 256 * 0x7ff
	uint32_t loop, rest;
	loop = count / CELL_GCM_MAX_METHOD_COUNT;
	rest = count % CELL_GCM_MAX_METHOD_COUNT;

	// reserve buffer size
	CELL_GCM_RESERVE(10 + loop*(1+CELL_GCM_MAX_METHOD_COUNT) + (rest!=0 ? 1+rest : 0));

	// hw bug workaround
	CELL_GCM_INVALIDATE_VERTEX_FILE_3(CELL_GCM_CURRENT);

	// Draw first batch of 1-256...
	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, mode);
	CELL_GCM_SET_DRAW_ARRAYS(CELL_GCM_CURRENT, first, lcount);
	first += lcount + 1;

	// ...then complete batches of 256's
	uint32_t i,j;

	for(i=0;i<loop;i++){
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_DRAW_ARRAYS, CELL_GCM_MAX_METHOD_COUNT);
		CELL_GCM_CURRENT++;

		for(j=0;j<CELL_GCM_MAX_METHOD_COUNT;j++){
			CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP((first) | ((255U)<<24));
			CELL_GCM_CURRENT++;
			first += 256;
		}
	}

	if(rest){
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_DRAW_ARRAYS, rest);
		CELL_GCM_CURRENT++;

		for(j=0;j<rest;j++){
			CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP((first) | ((255U)<<24));
			CELL_GCM_CURRENT++;
			first += 256;
		}
	}

	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, 0);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDrawIndexArray)(CELL_GCM_ARGS(uint8_t mode, 
	uint32_t count, uint8_t type, uint8_t location, uint32_t indicies))
{
	uint32_t startOffset;
	uint32_t startIndex;
	uint32_t misalignedIndexCount;

	startOffset = indicies;
	/* alignment restriction, SET_INDEX_ARRAY_ADDRESS needs to be 2Byte alignment */
	CELL_GCM_ASSERT((startOffset & 1) == 0);

	// need to compute the number of indexes from starting
	// address to next 128-byte alignment

	// type == 32
	if(type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32)
		misalignedIndexCount = (((startOffset + 127) & ~127) - startOffset) >> 2;
	// type == 16
	else
		misalignedIndexCount = (((startOffset + 127) & ~127) - startOffset) >> 1;

	CELL_GCM_RESERVE(16 + (count >> 8));

	// begin
	CELL_GCM_SET_INDEX_ARRAY_OFFSET_AND_FORMAT(CELL_GCM_CURRENT, 
		location, startOffset, type);
	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, 
		mode);

	startIndex = 0;
	// starting address of first index is not 128 byte aligned
	// send the mis-aligned indices thus aligning the rest to 128 byte boundary
	if (misalignedIndexCount && (misalignedIndexCount < count))
	{
		uint32_t tmp = misalignedIndexCount-1;
		CELL_GCM_SET_DRAW_INDEX_ARRAY(CELL_GCM_CURRENT, startIndex,tmp);
		count -= misalignedIndexCount;
		startIndex += misalignedIndexCount;
	}

#if (CELL_GCM_MEASURE==0)
	// avoid writing more then 0x7ff words per inc method (hw limit)
	CELL_GCM_ASSERT(count && (count <= 0xfffff)); // hw limit
	while(count > 0x7FF00)
	{
		count -= 0x7ff00;
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_DRAW_INDEX_ARRAY, 0x7FF);
		CELL_GCM_CURRENT += 1;
		for (uint32_t lcount = 0x7ff; lcount; --lcount)
		{
			CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP(0xFF000000 | startIndex);
			CELL_GCM_CURRENT += 1;
			startIndex += 0x100;
		}
	}

	// 256 indices per DRAW_INDEX_ARRAY
	CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_DRAW_INDEX_ARRAY, (count + 0xff)>>8);
	CELL_GCM_CURRENT += 1;
	while(count > 0x100)
	{
		count -= 0x100;
		CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP(0xFF000000 | startIndex);
		CELL_GCM_CURRENT += 1;
		startIndex += 0x100;
	}

	// remainder indices
	if(count)
	{
		--count;
		CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP((count << 24) | startIndex);
		CELL_GCM_CURRENT += 1;
	}
#endif

	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, 0);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetInvalidateVertexCache)(CELL_GCM_NO_ARGS())
{
	CELL_GCM_RESERVE(8);
	CELL_GCM_INVALIDATE_VERTEX_CACHE_FILE(CELL_GCM_CURRENT);
	CELL_GCM_INVALIDATE_VERTEX_FILE(CELL_GCM_CURRENT); 
	CELL_GCM_INVALIDATE_VERTEX_FILE(CELL_GCM_CURRENT);
	CELL_GCM_INVALIDATE_VERTEX_FILE(CELL_GCM_CURRENT);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetRestartIndex)(CELL_GCM_ARGS(uint32_t index))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_RESTART_INDEX(CELL_GCM_CURRENT, index);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexData4f)(CELL_GCM_ARGS(uint8_t index, const float v[4]))
{
	CellGcmCast d[4];
	d[0].f = v[0]; d[1].f = v[1]; d[2].f = v[2]; d[3].f = v[3];

	CELL_GCM_RESERVE(5);
	CELL_GCM_SET_VERTEX_DATA_4F_U(CELL_GCM_CURRENT, index, d[0].u, d[1].u, d[2].u, d[3].u);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexDataArray)(CELL_GCM_ARGS(uint8_t index, uint16_t frequency, uint8_t stride, uint8_t size, uint8_t type, uint8_t location, uint32_t offset))
{
	CELL_GCM_RESERVE(4);
	CELL_GCM_SET_VERTEX_DATA_ARRAY_FORMAT(CELL_GCM_CURRENT, 
		index, frequency, stride, size, type);
	CELL_GCM_SET_VERTEX_DATA_ARRAY_OFFSET(CELL_GCM_CURRENT, 
		index, location, offset);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetFrequencyDividerOperation)(CELL_GCM_ARGS(uint16_t operation))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_FREQUENCY_DIVIDER_OPERATION(CELL_GCM_CURRENT, operation);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTransformBranchBits)(CELL_GCM_ARGS(uint32_t branchBits))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_TRANSFORM_BRANCH_BITS(CELL_GCM_CURRENT, branchBits);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexAttribInputMask)(CELL_GCM_ARGS(uint16_t mask))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_VERTEX_ATTRIB_INPUT_MASK(CELL_GCM_CURRENT, mask);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}


CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexProgramParameterBlock)(CELL_GCM_ARGS(uint32_t baseConst, uint32_t constCount, const float * __restrict value))
{
	uint32_t i;
	uint32_t blockCount  = (constCount*4) >> 5;		// # 32 blocks
	uint32_t blockRemain = (constCount*4) & 0x1f;		// remainder 

	CELL_GCM_RESERVE(2+blockCount*34+blockRemain);

	for (i=0; i < blockCount; i++)
	{
		uint32_t loadAt = baseConst+i*8;

		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_CONSTANT_LOAD, 33);
		CELL_GCM_CURRENT[1] = CELL_GCM_ENDIAN_SWAP(loadAt);
		CELL_GCM_MEMCPY(&CELL_GCM_CURRENT[2], value, sizeof(float)*32);
		CELL_GCM_CURRENT += 34;
		value += 32;
	}

#if (CELL_GCM_MEASURE==0)
	if(blockRemain)
	{
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_CONSTANT_LOAD, blockRemain+1);
		CELL_GCM_CURRENT[1] = CELL_GCM_ENDIAN_SWAP(baseConst + blockCount*8);
		CELL_GCM_CURRENT += 2;

		CELL_GCM_MEMCPY(CELL_GCM_CURRENT, value, sizeof(float)*blockRemain);
		CELL_GCM_CURRENT += blockRemain;
	}
#endif

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetUpdateFragmentProgramParameter)(CELL_GCM_ARGS(uint32_t offset))
{
	/* alignment restriction, SET_SHADER_PROGRAM needs to be 64byte alignment*/
	CELL_GCM_ASSERT((offset & 63) == 0);

	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_SHADER_PROGRAM(CELL_GCM_CURRENT, 
		CELL_GCM_SET_SHADER_PROGRAM_CONTEXT_DMA_A, offset);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetFragmentProgramGammaEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_SHADER_PACKER(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetUserClipPlaneControl)(CELL_GCM_ARGS(
	uint32_t plane0, uint32_t plane1, uint32_t plane2, uint32_t plane3, 
	uint32_t plane4, uint32_t plane5))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_USER_CLIP_PLANE_CONTROL(CELL_GCM_CURRENT, 
		plane0, plane1, plane2, plane3, plane4, plane5);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexProgramConstants)(CELL_GCM_ARGS(
	uint32_t first, uint32_t count, const float *data))
{
	uint32_t loop, rest, i;
	const float * __restrict value = data;

	loop = count >> 5;
	rest = count & 0x1F;

	CELL_GCM_RESERVE(loop*34+(rest!=0 ? 2+rest : 0));

	for(i=0;i<loop;i++)
	{
		uint32_t loadAt = first + i * 8;

		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_CONSTANT_LOAD, 33);
		CELL_GCM_CURRENT[1] = CELL_GCM_ENDIAN_SWAP(loadAt);
		CELL_GCM_MEMCPY(&CELL_GCM_CURRENT[2], value, sizeof(float)*32);
		CELL_GCM_CURRENT += 34;
		value += 32;
	}

#if (CELL_GCM_MEASURE==0)
	if(rest)
	{
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD(CELL_GCM_NV4097_SET_TRANSFORM_CONSTANT_LOAD, rest+1);
		CELL_GCM_CURRENT[1] = CELL_GCM_ENDIAN_SWAP(first + (loop << 3));
		CELL_GCM_CURRENT += 2;

		CELL_GCM_MEMCPY(CELL_GCM_CURRENT, value, sizeof(float)*rest); // << 
		CELL_GCM_CURRENT += rest;
	}
#endif

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(InlineTransfer)(CELL_GCM_ARGS(const uint32_t dstOffset, const void *srcAdr, const uint32_t sizeInWords, const uint8_t location))
{
	uint32_t *src;
	uint32_t *srcEnd;
	uint32_t paddedSizeInWords;
	uint32_t alignedVideoOffset;
	uint32_t pixelShift;

	// sanity checks
	CELL_GCM_ASSERT((dstOffset & 3) == 0);  // destination must be aligned
	CELL_GCM_ASSERT(sizeInWords < 2*896); // hw/class limit
	CELL_GCM_ASSERT(sizeInWords < 512);   // our artifical limit

	// handle 64 byte alignment restriction
	alignedVideoOffset = dstOffset & ~63;
	pixelShift = (dstOffset & 63) >> 2;

	// setup remaining image from cpu blit stuff
	paddedSizeInWords = (sizeInWords + 1) & ~1; // even width only

	CELL_GCM_RESERVE(12+paddedSizeInWords);

	CELL_GCM_SURFACE_2D_DMA_IMAGE_DESTIN(CELL_GCM_CURRENT, CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER + location);

	CELL_GCM_SURFACE_2D_SET_OFFSET_DESTIN(CELL_GCM_CURRENT, 
		alignedVideoOffset);
	CELL_GCM_SURFACE_2D_SET_COLOR_FORMAT_AND_PITCHES(CELL_GCM_CURRENT, 
		CELL_GCM_SURFACE_2D_FORMAT_Y32, 
		0x1000, 0x1000);
	CELL_GCM_BLIT_POINT_AND_SIZE_OUT_AND_SIZE_IN(CELL_GCM_CURRENT, 
		pixelShift, 0, 
		sizeInWords, 1, 
		sizeInWords, 1);

#if CELL_GCM_MEASURE
	CELL_GCM_UNUSED(src);
	CELL_GCM_UNUSED(srcAdr);
	CELL_GCM_UNUSED(srcEnd);
	CELL_GCM_UNUSED(paddedSizeInWords);
#else
	CELL_GCM_CURRENT[0] = CELL_GCM_METHOD(CELL_GCM_NV308A_COLOR, paddedSizeInWords);
	CELL_GCM_CURRENT += 1;
	// copy data into the command fifo
	src = (uint32_t*)srcAdr;
	srcEnd = src + sizeInWords;
	while(src<srcEnd)
	{
		CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP(src[0]);
		CELL_GCM_CURRENT += 1;
		src += 1;
	}
	if (paddedSizeInWords != sizeInWords)
	{
		CELL_GCM_CURRENT[0] = 0;
		CELL_GCM_CURRENT += 1;
	}
#endif

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL uint32_t CELL_GCM_FUNC(SetTransferImage)(CELL_GCM_ARGS(
	uint8_t mode, uint32_t dstOffset, uint32_t dstPitch, uint32_t dstX, 
	uint32_t dstY, uint32_t srcOffset, uint32_t srcPitch, uint32_t srcX, 
	uint32_t srcY, uint32_t width, uint32_t height, uint32_t bytesPerPixel))
{
	uint32_t srcHandle,dstHandle;

	switch(mode)
	{
	case CELL_GCM_TRANSFER_LOCAL_TO_LOCAL:
		srcHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		dstHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		break;
	case CELL_GCM_TRANSFER_MAIN_TO_LOCAL:
		srcHandle = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER;
		dstHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		break;
	case CELL_GCM_TRANSFER_LOCAL_TO_MAIN:
		srcHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		dstHandle = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER;
		break;
	default:
		return CELL_GCM_ERROR_FAILURE;
		break;
	}

	CELL_GCM_RESERVE_RET(6, CELL_GCM_ERROR_FAILURE);
	CELL_GCM_SURFACE_2D_DMA_IMAGE_DESTIN(CELL_GCM_CURRENT, 
		dstHandle);
	CELL_GCM_STRETCHBLIT_SET_CONTEXT_DMA_IMAGE(CELL_GCM_CURRENT, 
		srcHandle);
	CELL_GCM_STRETCHBLIT_SET_CONTEXT_SURFACE(CELL_GCM_CURRENT, 
		CELL_GCM_CONTEXT_2D_SURFACE);

	uint32_t NV_SURFACE2D_MAX_DIM = 10;
	uint32_t BLOCKSIZE = 1 << NV_SURFACE2D_MAX_DIM;
	uint32_t srcFormat;
	uint32_t dstFormat;
	uint32_t x;
	uint32_t y;
	uint32_t finalDstX;
	uint32_t finalDstY;

	// sanity test
	CELL_GCM_ASSERT((dstOffset & 63) == 0); // 64byte alignment
	CELL_GCM_ASSERT((dstPitch & 63) == 0); // 64byte alignment
	CELL_GCM_ASSERT(srcPitch < 0xffff);
	CELL_GCM_ASSERT(dstPitch < 0xffff);

	// determine color format
	switch(bytesPerPixel)
	{
	case 2:
		srcFormat = CELL_GCM_STRETCHBLIT_FORMAT_R5G6B5;
		dstFormat = CELL_GCM_SURFACE_2D_FORMAT_R5G6B5;
		break;
	case 4:
		srcFormat = CELL_GCM_STRETCHBLIT_FORMAT_A8R8G8B8;
		dstFormat = CELL_GCM_SURFACE_2D_FORMAT_A8R8G8B8;
		break;
	case 1: // LE_Y8 is not supported on curie
	default:
		srcFormat = 0;
		dstFormat = 0;
		CELL_GCM_ASSERT(0);
		break;
	}

	// split large blits
	finalDstX = dstX + width;
	finalDstY = dstY + height;
	for(y = dstY; y < finalDstY;)
	{
		// determine this blits height
		uint32_t dstTop = y & ~(BLOCKSIZE - 1);
		uint32_t dstBot = dstTop + BLOCKSIZE;
		uint32_t dstBltHeight = ((dstBot<finalDstY) ? dstBot : finalDstY) - y;
		for(x = dstX; x < finalDstX;)
		{
			// determine this blits width
			uint32_t dstLeft = x & ~(BLOCKSIZE - 1);
			uint32_t dstRight = dstLeft + BLOCKSIZE;
			uint32_t dstBltWidth = ((dstRight<finalDstX) ? dstRight : finalDstX) - x;

			// align the surface/destination surface properly
			uint32_t dstBlockOffset = bytesPerPixel * (dstLeft & ~(BLOCKSIZE - 1)) + dstPitch * dstTop;
			uint32_t srcBlockOffset = bytesPerPixel * (srcX + x-dstX) + srcPitch * (srcY + y-dstY);

			// handle bizarre class behavior
			uint32_t safeDstBltWidth = (dstBltWidth < 16) ? 16 : (dstBltWidth + 1) & ~1;
			uint32_t destinOffset = dstOffset + dstBlockOffset;

			CELL_GCM_RESERVE_RET(20, CELL_GCM_ERROR_FAILURE);

			// set src + first blit
			CELL_GCM_SURFACE_2D_SET_OFFSET_DESTIN(CELL_GCM_CURRENT, 
				destinOffset);
			CELL_GCM_SURFACE_2D_SET_COLOR_FORMAT_AND_PITCHES(CELL_GCM_CURRENT, 
				dstFormat, 
				dstPitch, dstPitch);

			CELL_GCM_STRETCHBLIT_SET_CONVERSION_AND_FORMAT_AND_OPERATION_AND_POINT_AND_SIZE_AND_OUT_POINT_AND_OUT_SIZE_AND_DS_DX_AND_DT_DY(CELL_GCM_CURRENT, 
				CELL_GCM_STRETCHBLIT_CONVERSION_TRUNCATE, 
				srcFormat,
				CELL_GCM_STRETCHBLIT_OPERATION_SRCCOPY,
				x - dstLeft, y - dstTop,
				dstBltWidth, dstBltHeight,
				x - dstLeft, y - dstTop,
				dstBltWidth, dstBltHeight,
				1<<20,
				1<<20);

			CELL_GCM_STRETCHBLIT_IMAGE_IN_SIZE_AND_FORMAT_AND_OFFSET_AND_POINT(CELL_GCM_CURRENT, 
				safeDstBltWidth,dstBltHeight,
				srcPitch, CELL_GCM_STRETCHBLIT_ORIGIN_CORNER, CELL_GCM_STRETCHBLIT_INTERPOLATOR_ZOH,
				srcOffset + srcBlockOffset,
				0, 0);
			x += dstBltWidth;
		}
		y += dstBltHeight;
	}

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);

	return CELL_OK;
}

CELL_GCM_DECL void CELL_GCM_FUNC(TransferData)(CELL_GCM_ARGS(uint32_t dstOffset, int32_t dstPitch, uint32_t srcOffset, int32_t srcPitch, int32_t bytesPerRow, int32_t rowCount))
{
	int32_t CL0039_MIN_PITCH = -32768;
	int32_t CL0039_MAX_PITCH = 32767;
	int32_t CL0039_MAX_ROWS = 0x7ff;
	uint32_t CL0039_MAX_LINES = 0x3fffff;
	uint32_t colCount;
	uint32_t rows;
	uint32_t cols;

	// argument check
	CELL_GCM_ASSERT(bytesPerRow >= 0);
	CELL_GCM_ASSERT(rowCount >= 0);

	// can we turn this into a contigous blit ?
	if ((srcPitch == bytesPerRow) && (dstPitch == bytesPerRow))
	{
		bytesPerRow *= rowCount;
		rowCount = 1;
		srcPitch = 0;
		dstPitch = 0;
	}

	// unusual pitch values
	if ((srcPitch < CL0039_MIN_PITCH) || (srcPitch > CL0039_MAX_PITCH) ||
		(dstPitch < CL0039_MIN_PITCH) || (dstPitch > CL0039_MAX_PITCH))
	{
		// fallback: blit per line (could improve this case)
		// Blit one line at a time
		while(--rowCount >= 0)
		{
			for(colCount = bytesPerRow; colCount>0; colCount -= cols)
			{
				// clamp to limit
				cols = (colCount > CL0039_MAX_LINES) ? CL0039_MAX_LINES : colCount;

				// do the blit
				CELL_GCM_RESERVE(9);
				CELL_GCM_HOST2VID_OFFSET_IN_AND_OUT_AND_PITCH_IN_AND_OUT_AND_LINE_LENGTH_AND_LINE_COUNT_AND_FORMAT_AND_NOTIFY(CELL_GCM_CURRENT, 
					srcOffset + (bytesPerRow - colCount),
					dstOffset + (bytesPerRow - colCount),
					0,
					0,
					cols,
					1,
					1, 1,
					CELL_GCM_HOST2VID_BUFFER_NOTIFY_WRITE_ONLY);
			}

			dstOffset += dstPitch;
			srcOffset += srcPitch;
		}
	}
	else
	{
		// for each batch of rows
		for(;rowCount>0; rowCount -= rows)
		{
			// clamp to limit ?
			rows = (rowCount > CL0039_MAX_ROWS) ? CL0039_MAX_ROWS : rowCount;

			// for each batch of cols
			for(colCount = bytesPerRow; colCount>0; colCount -= cols)
			{
				// clamp to limit
				cols = (colCount > CL0039_MAX_LINES) ? CL0039_MAX_LINES : colCount;

				// do the blit
				CELL_GCM_RESERVE(9);
				CELL_GCM_HOST2VID_OFFSET_IN_AND_OUT_AND_PITCH_IN_AND_OUT_AND_LINE_LENGTH_AND_LINE_COUNT_AND_FORMAT_AND_NOTIFY(CELL_GCM_CURRENT, 
					srcOffset + (bytesPerRow - colCount),
					dstOffset + (bytesPerRow - colCount),
					srcPitch,
					dstPitch,
					cols,
					rows,
					1, 1,
					CELL_GCM_HOST2VID_BUFFER_NOTIFY_WRITE_ONLY);
			}

			// Advance to next set of rows
			srcOffset += rows * srcPitch;
			dstOffset += rows * dstPitch;
		}
	}

	CELL_GCM_RESERVE(2);
	CELL_GCM_HOST2VID_OFFSET_OUT(CELL_GCM_CURRENT, 0);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL uint32_t CELL_GCM_FUNC(SetTransferData)(CELL_GCM_ARGS(
	uint8_t mode, uint32_t dstOffset, uint32_t dstPitch, uint32_t srcOffset, 
	uint32_t srcPitch, uint32_t bytesPerRow, uint32_t rowCount))
{
	uint32_t srcHandle,dstHandle;

	switch(mode)
	{
	case CELL_GCM_TRANSFER_MAIN_TO_LOCAL:
		srcHandle = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER;
		dstHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		break;
	case CELL_GCM_TRANSFER_LOCAL_TO_MAIN:
		srcHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		dstHandle = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER;
		break;
	case CELL_GCM_TRANSFER_LOCAL_TO_LOCAL:
		srcHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		dstHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		break;
	default:
		return CELL_GCM_ERROR_FAILURE;
		break;
	}

	CELL_GCM_RESERVE_RET(3, CELL_GCM_ERROR_FAILURE);
	CELL_GCM_HOST2VID_SET_CONTEXT_DMA_BUFFER_IN_AND_OUT(CELL_GCM_CURRENT, 
		srcHandle,
		dstHandle);
	CELL_GCM_FUNC(TransferData)(CELL_GCM_ARGS_FUNC(dstOffset, dstPitch, srcOffset, srcPitch, bytesPerRow, rowCount));

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);

	return CELL_OK;
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetRenderEnable)(CELL_GCM_ARGS(uint8_t mode, uint32_t index))
{
	CELL_GCM_ASSERT(index < CELL_GCM_MAX_USER_QUERIES);

	uint32_t hwOffset = 0x10 * index;

	if(mode == CELL_GCM_CONDITIONAL)
	{
		CELL_GCM_RESERVE(4);
		CELL_GCM_NOP(CELL_GCM_CURRENT);
		CELL_GCM_SET_RENDER_ENABLE(CELL_GCM_CURRENT, 2, hwOffset);
	}
	// mode == CELL_GCM_TRUE, CELL_GCM_FALSE
	else
	{
		CELL_GCM_RESERVE(2);
		CELL_GCM_SET_RENDER_ENABLE(CELL_GCM_CURRENT, 1, 0);
	}

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetZpassPixelCountEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_ZPASS_PIXEL_COUNT_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetClearReport)(CELL_GCM_ARGS(uint32_t type))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_CLEAR_REPORT_VALUE(CELL_GCM_CURRENT, type);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetReport)(CELL_GCM_ARGS(uint32_t type, uint32_t index))
{
	CELL_GCM_ASSERT(index < CELL_GCM_MAX_USER_QUERIES);

	uint32_t hwOffset = 0x10 * index;

	CELL_GCM_RESERVE(2);
	CELL_GCM_GET_REPORT(CELL_GCM_CURRENT, type, hwOffset);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetZcullStatsEnable)(CELL_GCM_ARGS(uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_ZCULL_STATS_ENABLE(CELL_GCM_CURRENT, enable);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetZcullControl)(CELL_GCM_ARGS(const uint8_t zCullDir, const uint8_t zCullFormat))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_ZCULL_CONTROL0(CELL_GCM_CURRENT, zCullDir, zCullFormat);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetZcullLimit)(CELL_GCM_ARGS(uint16_t moveForwardLimit, uint16_t pushBackLimit))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_ZCULL_CONTROL1(CELL_GCM_CURRENT, moveForwardLimit, pushBackLimit);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetScullControl)(CELL_GCM_ARGS(const uint8_t sFunc, const uint8_t sRef, const uint8_t sMask))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_SCULL_CONTROL(CELL_GCM_CURRENT, sFunc, sRef, sMask);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetConvertSwizzleFormat)(CELL_GCM_ARGS(
	uint32_t dstOffset, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstX, uint32_t dstY,
	uint32_t srcOffset, uint32_t srcPitch, uint32_t srcX, uint32_t srcY, uint32_t width, uint32_t height, uint32_t bytesPerPixel,
	uint8_t mode))
{
	uint32_t srcHandle,dstHandle;

	switch(mode)
	{
	  case CELL_GCM_TRANSFER_MAIN_TO_LOCAL:
	  default:
		  srcHandle = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER;
		  dstHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		  break;
	  case CELL_GCM_TRANSFER_LOCAL_TO_MAIN:
		  srcHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		  dstHandle = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER;
		  break;
	  case CELL_GCM_TRANSFER_LOCAL_TO_LOCAL:
		  srcHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		  dstHandle = CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER;
		  break;
	}

    uint32_t NV_MEM2MEM_MAX_HEIGHT_VALUE = 2047;
    uint32_t NV_SURFACE_SWIZZLED_MAX_DIM = 10;

#ifdef __SPU__
	uint32_t dstwlog2 = 31 - ({__asm__("clz %0,%1" : "=r" (dstwlog2) : "r" (dstWidth)); dstwlog2;});
	uint32_t dsthlog2 = 31 - ({__asm__("clz %0,%1" : "=r" (dsthlog2) : "r" (dstHeight)); dsthlog2;});
#else
	uint32_t dstwlog2 = 31 - ({__asm__("cntlzw %0,%1" : "=r" (dstwlog2) : "r" (dstWidth)); dstwlog2;});
	uint32_t dsthlog2 = 31 - ({__asm__("cntlzw %0,%1" : "=r" (dsthlog2) : "r" (dstHeight)); dsthlog2;});
#endif

    // a few sanity checks
    CELL_GCM_ASSERT(height && width);
    CELL_GCM_ASSERT((width<4096) && (height<4096));
    CELL_GCM_ASSERT((height <= dstHeight) && (width <= dstWidth));
    CELL_GCM_ASSERT(((dstY + height) <= dstHeight) && ((dstX + width) <= dstWidth));
    CELL_GCM_ASSERT(((dstWidth & (dstWidth - 1)) == 0) && (((dstHeight & (dstHeight - 1)) == 0)));
    CELL_GCM_ASSERT(srcPitch < 0xffff);

    switch (bytesPerPixel)
    {
        case 2:
        case 4:
            break;
        case 8:
            dstWidth <<= 1;
            dstX <<= 1;
            srcX <<= 1;
            width <<= 1;
            bytesPerPixel >>= 1;
            dstwlog2 += 1;
            break;
        case 16:
            dstWidth <<= 2;
            dstX <<= 2;
            srcX <<= 2;
            width <<= 2;
            bytesPerPixel >>= 2;
            dstwlog2 += 2;
            break;
        default:
            CELL_GCM_ASSERT(0);
            break;
    }

    // destination is a 1xN or Nx1 ? => swizzled result is a Nx1 linear texture
    // destination is a 2xN          => col 0 and col1 1 are interleaved
    // (NVXX_CONTEXT_SURFACE_SWIZZLED does not support an 1xN or Nx1 destination)
    if ((dstwlog2 <= 1) || (dsthlog2 == 0))
    {
		// set src/dst location
		CELL_GCM_RESERVE(3);
		CELL_GCM_HOST2VID_SET_CONTEXT_DMA_BUFFER_IN_AND_OUT(CELL_GCM_CURRENT, srcHandle, dstHandle);

        uint32_t dstPitch;
        uint32_t linesLeft;

        // Nx1 mapping is (x_n, ..., x_0)
        // 1xN mapping is (y_n, ..., y_0)
        // 2xN mapping is (y_n, ..., y_0, x_0)

        // get rid of src/dst position
		dstPitch = bytesPerPixel << dstwlog2;
		srcOffset = srcOffset + srcX * bytesPerPixel + srcY * srcPitch;
		dstOffset = dstOffset + dstX * bytesPerPixel + dstY * dstPitch;

        // MEM2MEM maximum height is 2047..
        for(linesLeft = height; linesLeft;)
        {
            // actualHeight = min(NV_MEM2MEM_MAX_HEIGHT_VALUE, linesLeft);
            uint32_t actualHeight = (linesLeft > NV_MEM2MEM_MAX_HEIGHT_VALUE)
                               ?  NV_MEM2MEM_MAX_HEIGHT_VALUE
                               :  linesLeft;

			// todo: this is incorrect for the vid->vid case
			CELL_GCM_FUNC(TransferData)(CELL_GCM_ARGS_FUNC(dstOffset, dstPitch, srcOffset, srcPitch, width*bytesPerPixel, actualHeight));

            srcOffset = srcOffset + actualHeight * srcPitch;
            dstOffset = dstOffset + actualHeight * dstPitch;
            linesLeft -= actualHeight;
        }
        return;
    }
    else
    {
		// set src/dst location
		CELL_GCM_RESERVE(6);
		CELL_GCM_SURFACE_SWIZ_SET_CONTEXT_DMA_IMAGE(CELL_GCM_CURRENT, dstHandle);
		CELL_GCM_STRETCHBLIT_SET_CONTEXT_DMA_IMAGE(CELL_GCM_CURRENT, srcHandle);
		CELL_GCM_STRETCHBLIT_SET_CONTEXT_SURFACE(CELL_GCM_CURRENT, CELL_GCM_CONTEXT_SWIZ_SURFACE);

        uint32_t origSrcOffset;
        uint32_t srcFormat;
        uint32_t dstFormat;
        uint32_t logWidthLimit;
        uint32_t logHeightLimit;
        uint32_t yTop;
        uint32_t xEnd;
        uint32_t yEnd;
        uint32_t x;
        uint32_t y;

        // note:
        //  NVXX_CONTEXT_SURFACE_SWIZZLED expects the destination to be 64byte aligned,
        //  and only lods with <= 16 texels (i.e. 4x4, 8x2, 2x8, 16x1, 1x16@16bit) can
        //  cause the lower lods to be unaligned (32texels@16 bit is a multiple of 64).
        //  [note that 2x4 is not handled with mem2mem but 2x4 is supposed to be well
        //   aligned, because the earlier lod 4x8 had 32 texels!]
        //  iow: unaligned lods have a either a width or height of 2 or 1.
        //  -- a assert guards this logic!)
        CELL_GCM_ASSERT((dstWidth >= 4) && (dstHeight >= 2));

        // determine color format
		switch(bytesPerPixel)
		{
		case 2:
			srcFormat = CELL_GCM_STRETCHBLIT_FORMAT_R5G6B5;
			dstFormat = CELL_GCM_SURFACE_2D_FORMAT_R5G6B5;
			break;
		case 4:
			srcFormat = CELL_GCM_STRETCHBLIT_FORMAT_A8R8G8B8;
			dstFormat = CELL_GCM_SURFACE_2D_FORMAT_A8R8G8B8;
			break;
		case 1: // LE_Y8 is not supported on curie
		default:
			srcFormat = 0;
			dstFormat = 0;
			CELL_GCM_ASSERT(0);
			break;
		}

        // The HW cannot handle arbitrarily large blts, so the blit is split into
        // multiple blocks. The regions are aligned to the dst. These are the begin/end
        // of a given block.
        logWidthLimit  = (dstwlog2 > NV_SURFACE_SWIZZLED_MAX_DIM ) ? NV_SURFACE_SWIZZLED_MAX_DIM : dstwlog2;
        logHeightLimit = (dsthlog2 > NV_SURFACE_SWIZZLED_MAX_DIM ) ? NV_SURFACE_SWIZZLED_MAX_DIM : dsthlog2;

        // align the Src Blt to the Dst, that way we can forget about srcX and srcY.
        origSrcOffset = srcOffset;
        srcOffset += (srcX - dstX) * bytesPerPixel + (srcY - dstY) * srcPitch;

        // blit limits
        xEnd = dstX + width;
        yEnd = dstY + height;

        // For the top row of blocks, yTop != y
        yTop = dstY & ~((1 << NV_SURFACE_SWIZZLED_MAX_DIM) - 1);
        for(y = dstY; y < yEnd;)
        {
            uint32_t xLeft;
            uint32_t yBottom;
            uint32_t bltHeight;

            // determine actual copy height for this iteration
            yBottom = yTop + (1 << NV_SURFACE_SWIZZLED_MAX_DIM);
            if(yBottom > (1ul << dsthlog2))
            {
                yBottom = (1 << dsthlog2);
            }
            bltHeight = (yBottom > yEnd) ? yEnd - y : yBottom - y;

            // for the left column of blocks, xLeft != x
            xLeft = dstX & ~((1 << NV_SURFACE_SWIZZLED_MAX_DIM) - 1);
            for(x = dstX; x < xEnd;)
            {
                uint32_t xRight;
                uint32_t bltWidth;
                uint32_t blockSrcOffset;
                uint32_t blockDstOffset;
                uint32_t blockX;
                uint32_t blockY;
                uint32_t srcWidth;

                // determine actual copy width for this iteration
                xRight = xLeft + (1 << NV_SURFACE_SWIZZLED_MAX_DIM);
                bltWidth = (xRight > xEnd ) ? xEnd - x : xRight - x;

                // NVXX_CONTEXT_SURFACE_SWIZZLED ignores the lower bits of the 
                // destination offset.
				if (!dstwlog2)
				{
					blockDstOffset = dstOffset + yTop * bytesPerPixel;
				}
				else if (!dsthlog2)
				{
					blockDstOffset = dstOffset + xLeft * bytesPerPixel;
				}
				else
				{
					// #'common' bits
					uint32_t log = (dstwlog2 < dsthlog2) ? dstwlog2 : dsthlog2;  
					// # of bits to interleave
					uint32_t doubleLog = log << 1;                     
					// bits to preserve
					uint32_t upperMask = ~((1 << doubleLog) - 1);      
					// bits to interleave
					uint32_t lowerMask = ~upperMask;                   

					// calc offset
					uint32_t upperU = (xLeft << log) & upperMask;
					uint32_t upperV = (yTop << log) & upperMask;
					uint32_t lower  = ((xLeft & 0x001) <<  0) | ((yTop & 0x001) <<  1)
									| ((xLeft & 0x002) <<  1) | ((yTop & 0x002) <<  2)
									| ((xLeft & 0x004) <<  2) | ((yTop & 0x004) <<  3)
									| ((xLeft & 0x008) <<  3) | ((yTop & 0x008) <<  4)
									| ((xLeft & 0x010) <<  4) | ((yTop & 0x010) <<  5)
									| ((xLeft & 0x020) <<  5) | ((yTop & 0x020) <<  6)
									| ((xLeft & 0x040) <<  6) | ((yTop & 0x040) <<  7)
									| ((xLeft & 0x080) <<  7) | ((yTop & 0x080) <<  8)
									| ((xLeft & 0x100) <<  8) | ((yTop & 0x100) <<  9)
									| ((xLeft & 0x200) <<  9) | ((yTop & 0x200) << 10)
									| ((xLeft & 0x400) << 10) | ((yTop & 0x400) << 11)
									| ((xLeft & 0x800) << 11) | ((yTop & 0x800) << 12);
					CELL_GCM_ASSERT((xLeft < 4096) && (yTop < 4096));
					blockDstOffset = dstOffset + ((lower & lowerMask) | upperU | upperV) * bytesPerPixel;
				}
				
				CELL_GCM_ASSERT((blockDstOffset & 0x3f) == 0); // ** SERIOUS (RENDERING) ERROR **

                // clip - blockX and blockY are the X and Y offsets within this block
                blockX = x & ((1 << NV_SURFACE_SWIZZLED_MAX_DIM) - 1);
                blockY = y & ((1 << NV_SURFACE_SWIZZLED_MAX_DIM) - 1);

                // compute blt location in src
                blockSrcOffset = srcOffset + x * bytesPerPixel + y * srcPitch;
                CELL_GCM_ASSERT(blockSrcOffset >= origSrcOffset);

                // handle bizarre class behavior
                srcWidth = (bltWidth < 16) ? 16 : (bltWidth + 1) & ~1;

                // set dst format/offset
				CELL_GCM_RESERVE(18);
				CELL_GCM_SURFACE_SWIZ_SET_FORMAT_AND_OFFSET(CELL_GCM_CURRENT,
					dstFormat, logWidthLimit, logHeightLimit,
					blockDstOffset);

                // set src + first blit
				CELL_GCM_STRETCHBLIT_SET_CONVERSION_AND_FORMAT_AND_OPERATION_AND_POINT_AND_SIZE_AND_OUT_POINT_AND_OUT_SIZE_AND_DS_DX_AND_DT_DY(CELL_GCM_CURRENT, 
					CELL_GCM_STRETCHBLIT_CONVERSION_TRUNCATE, 
					srcFormat,
					CELL_GCM_STRETCHBLIT_OPERATION_SRCCOPY,
					blockX, blockY,
					bltWidth, bltHeight,
					blockX, blockY,
					bltWidth, bltHeight,
					1 << 20,
					1 << 20);

				CELL_GCM_STRETCHBLIT_IMAGE_IN_SIZE_AND_FORMAT_AND_OFFSET_AND_POINT(CELL_GCM_CURRENT, 
					srcWidth, bltHeight,
					srcPitch, CELL_GCM_STRETCHBLIT_ORIGIN_CORNER, CELL_GCM_STRETCHBLIT_INTERPOLATOR_ZOH,
					blockSrcOffset,
					0, 0);

                // increment in X
                x = xLeft = xRight;
            }
        
            // increment in Y
            y = yTop = yBottom;
        }
    }

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexTexture)(CELL_GCM_ARGS(const uint8_t index, const CellGcmTexture *texture))
{
	uint32_t offset, format, control3, imagerect;

	// parameter check
	CELL_GCM_ASSERT(index < CELL_GCM_MAX_VERTEX_TEXTURE);
	CELL_GCM_ASSERTS((texture->format == CELL_GCM_TEXTURE_LN|CELL_GCM_TEXTURE_NR|CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT) 
	           | (texture->format == CELL_GCM_TEXTURE_LN|CELL_GCM_TEXTURE_NR|CELL_GCM_TEXTURE_X32_FLOAT), "Vertex Texture only support LN_NR_W32_Z32_Y32_X32_FLOAT or LN_NR_X32_FLOAT format" ); 
	CELL_GCM_ASSERTS( (texture->offset & 127)==0, "Texture offset must be 128 byte aligned" );
	CELL_GCM_ASSERTS((texture->mipmap > 0) && (texture->mipmap <=13), "Legal range for mipmap [1, 13]" );

	CELL_GCM_RESERVE(7);

	offset = texture->offset;
	format = (texture->location + 1) | (texture->dimension << 4) 
		| (texture->format << 8) | (texture->mipmap << 16);
	imagerect = texture->height | (texture->width << 16);
	control3 = texture->pitch;

	CELL_GCM_SET_VERTEX_TEXTURE_OFFSET_AND_FORMAT(CELL_GCM_CURRENT, index,
		offset,
		format);
	CELL_GCM_SET_VERTEX_TEXTURE_CONTROL3(CELL_GCM_CURRENT, index,
		control3);
	CELL_GCM_SET_VERTEX_TEXTURE_IMAGE_RECT(CELL_GCM_CURRENT, index,
		imagerect);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexTextureAddress)(CELL_GCM_ARGS(const uint8_t index, const uint8_t wraps, const uint8_t wrapt))
{
	CELL_GCM_ASSERT(index < CELL_GCM_MAX_VERTEX_TEXTURE);

	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_VERTEX_TEXTURE_ADDRESS(CELL_GCM_CURRENT, index, wraps, wrapt);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexTextureFilter)(CELL_GCM_ARGS(const uint8_t index, const uint16_t bias))
{
	CELL_GCM_ASSERT(index < CELL_GCM_MAX_VERTEX_TEXTURE);

	CELL_GCM_RESERVE(2);

#ifdef CELL_GCM_BITFIELD
	CELL_GCM_SET_VERTEX_TEXTURE_FILTER(CELL_GCM_CURRENT, index, bias & 0x1fff);
#else
	CELL_GCM_SET_VERTEX_TEXTURE_FILTER(CELL_GCM_CURRENT, index, bias);
#endif

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexTextureControl)(CELL_GCM_ARGS(const uint8_t index, const uint32_t enable, const uint16_t minLod, const uint16_t maxLod))
{
	CELL_GCM_ASSERT(index < CELL_GCM_MAX_VERTEX_TEXTURE);

	CELL_GCM_RESERVE(2);

#ifdef CELL_GCM_BITFIELD
	CELL_GCM_SET_VERTEX_TEXTURE_CONTROL0(CELL_GCM_CURRENT, index, enable, minLod & 0xfff, maxLod & 0xfff);
#else
	CELL_GCM_SET_VERTEX_TEXTURE_CONTROL0(CELL_GCM_CURRENT, index, enable, minLod, maxLod);
#endif

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetVertexTextureBorderColor)(CELL_GCM_ARGS(const uint8_t index, const uint32_t color))
{
	CELL_GCM_ASSERT(index < CELL_GCM_MAX_VERTEX_TEXTURE);

	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_VERTEX_TEXTURE_BORDER_COLOR(CELL_GCM_CURRENT, index, color);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetPerfMonTrigger)(CELL_GCM_NO_ARGS())
{
	CELL_GCM_RESERVE(4);
	CELL_GCM_WAIT_FOR_IDLE(CELL_GCM_CURRENT);
	CELL_GCM_PM_TRIGGER(CELL_GCM_CURRENT);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDrawInlineArray)(CELL_GCM_ARGS(const uint8_t mode, const uint32_t count, const void *data))
{
	uint32_t loop, rest, i,j;
	uint32_t *value = (uint32_t *)data;

	loop = count / CELL_GCM_MAX_METHOD_COUNT;
	rest = count % CELL_GCM_MAX_METHOD_COUNT;

	CELL_GCM_RESERVE(8+loop*(1+CELL_GCM_MAX_METHOD_COUNT)+(rest!=0 ? 1+rest : 0));

	// hw bug workaround
	CELL_GCM_INVALIDATE_VERTEX_FILE_3(CELL_GCM_CURRENT);

	// start draw mode
	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, mode);

	for(i=0;i<loop;i++){
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_INLINE_ARRAY, CELL_GCM_MAX_METHOD_COUNT);
		CELL_GCM_CURRENT++;

		for(j=0;j<CELL_GCM_MAX_METHOD_COUNT;j++){
			CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP(*value);
			CELL_GCM_CURRENT++;
			value++;
		}
	}

	if(rest){
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_INLINE_ARRAY, rest);
		CELL_GCM_CURRENT++;

		for(j=0;j<rest;j++){
			CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP(*value);
			CELL_GCM_CURRENT++;
			value++;
		}
	}

	// end draw mode
	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, 0);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDrawInlineIndexArray32)(CELL_GCM_ARGS(const uint8_t mode, const uint32_t start, const uint32_t count, const uint32_t *data))
{
	uint32_t loop, rest, i, j;

	data = data + start;
	loop = count / CELL_GCM_MAX_METHOD_COUNT;
	rest = count % CELL_GCM_MAX_METHOD_COUNT;

	// reserve word size
	CELL_GCM_RESERVE(8+loop*(1+CELL_GCM_MAX_METHOD_COUNT)+(rest!=0 ? 1+rest : 0));

	// hw bug workaround
	CELL_GCM_INVALIDATE_VERTEX_FILE_3(CELL_GCM_CURRENT);

	// start draw mode
	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, mode);

	for(i=0;i<loop;i++){
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_ARRAY_ELEMENT32, CELL_GCM_MAX_METHOD_COUNT);
		CELL_GCM_CURRENT++;

		for(j=0;j<CELL_GCM_MAX_METHOD_COUNT;j++){
			CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP(*data);
			CELL_GCM_CURRENT++;
			data++;
		}
	}

	if(rest){
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_ARRAY_ELEMENT32, rest);
		CELL_GCM_CURRENT++;

		for(j=0;j<rest;j++){
			CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP(*data);
			CELL_GCM_CURRENT++;
			data++;
		}
	}

	// end draw mode
	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, 0);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDrawInlineIndexArray16)(CELL_GCM_ARGS(const uint8_t mode, const uint32_t start, const uint32_t count, const uint16_t *data))
{
	uint32_t loop, rest, i, j;
	uint32_t odd;
	uint32_t lcount;

	if(count & 1){	// odd count
		odd = 1;
		lcount = count - 1;
	}
	else{			// even count
		odd = 0;
		lcount = count;
	}

	data = data + start;
	loop = (lcount>>1) / CELL_GCM_MAX_METHOD_COUNT;
	rest = (lcount>>1) % CELL_GCM_MAX_METHOD_COUNT;

	// reserve word size
	CELL_GCM_RESERVE(8 + odd*2 + loop*(1+CELL_GCM_MAX_METHOD_COUNT)+(rest!=0 ? 1+rest : 0));

	// hw bug workaround
	CELL_GCM_INVALIDATE_VERTEX_FILE_3(CELL_GCM_CURRENT);

	// start draw mode
	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, mode);

	if(odd){
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_ARRAY_ELEMENT32, 1);
		CELL_GCM_CURRENT[1] = CELL_GCM_ENDIAN_SWAP_SHORT(data[0], 0);
		CELL_GCM_CURRENT+=2;
		data++;
	}

	for(i=0;i<loop;i++){
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_ARRAY_ELEMENT16, CELL_GCM_MAX_METHOD_COUNT);
		CELL_GCM_CURRENT++;

		for(j=0;j<CELL_GCM_MAX_METHOD_COUNT;j++){
			CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP_SHORT(data[0], data[1]);
			CELL_GCM_CURRENT++;
			data+=2;
		}
	}

	if(rest){
		CELL_GCM_CURRENT[0] = CELL_GCM_METHOD_NI(CELL_GCM_NV4097_ARRAY_ELEMENT16, rest);
		CELL_GCM_CURRENT++;

		for(j=0;j<rest;j++){
			CELL_GCM_CURRENT[0] = CELL_GCM_ENDIAN_SWAP_SHORT(data[0], data[1]);
			CELL_GCM_CURRENT++;
			data+=2;
		}
	}

	// end draw mode
	CELL_GCM_SET_DRAW_MODE(CELL_GCM_CURRENT, 0);

	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetFogMode)(CELL_GCM_ARGS(const uint32_t mode))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_FOG_MODE(CELL_GCM_CURRENT, mode);
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetFogParams)(CELL_GCM_ARGS(const float p0, const float p1))
{
	CellGcmCast d0,d1;
	d0.f = p0;
	d1.f = p1;

	CELL_GCM_RESERVE(3);
	CELL_GCM_SET_FOG_PARAMS(CELL_GCM_CURRENT, d0.u, d1.u);
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetTransferLocation)(CELL_GCM_ARGS(const uint32_t location))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SURFACE_2D_DMA_IMAGE_DESTIN(CELL_GCM_CURRENT, CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER + location);
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetDepthFormat)(CELL_GCM_ARGS(const uint32_t format))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_CONTROL0(CELL_GCM_CURRENT, ((format&1)<<12) | 0x00100000);
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}

CELL_GCM_DECL void CELL_GCM_FUNC(SetBlendOptimization)(CELL_GCM_ARGS(const uint32_t enable))
{
	CELL_GCM_RESERVE(2);
	CELL_GCM_SET_REDUCE_DST_COLOR(CELL_GCM_CURRENT, enable);
	CELL_GCM_DEBUG_FINISH(CELL_GCM_THIS);
}
