	########################################################################
	#
	# Global register values.  The values in the following registers should not be altered
	# without full understanding their use throught the kernel.
	#
	#
	#	r80	= 0
	#	r81 = word extraction mask
	#	r100 = dma source address high
	#	r101 = dma source address low
	#
	#
	#
	#
	#
	#
	#
	########################################################################



.text
	
	.align 3


kernel_service:

	########################################################################
	# This is the entry point for kernel services provided to the pages.
	# This is a fixed entry point for page ELFs.  The label can not move without breaking the pages.
	#
	# Params:
	#	r3 kernel service index
	#	r4 to r127 are service defined params and must not be altered within this call
	# Clobber:
	#	r2
	#	r3
	########################################################################

	stqd	$lr, 16($sp)
	stqd	$sp, -32($sp)
	ai		$sp, $sp, -32
	
								#find the command handler for id in $3
								#TODO:  Check to make sure $80 is within the jump table limits
	shli	$3, $3, 4

	lqd		$2, service_cmd_jump_table($3)

	bisl	$lr, $2

.kernel_service_cleanup:

	ai		$sp, $sp, 32
	lqd		$lr, 16($sp)
	bi 		$lr

	.size 	kernel_service, .-kernel_service
	.type	kernel_service, @function
	.global	kernel_service




	
service_get_data_ptr:
	
	########################################################################
	#	This service should still be considerd temporary, but it is closer to
	#	the final implemetion than the last version.  The function will return
	#	to its caller the lowest address that is available to pages for working
	#	LS space.  The buffer will be 0x4000 byte aligned.
	#
	#
	#	Clobber:
	#		r3
	#
	#	Expects:
	#		r80 				- [0, 0, 0, 0]
	#		r81					- word extraction mask.
	#		__page_heap_start 	- to be provided by the linker.
	########################################################################

	ila		$3, __page_heap_start
	shufb	$3, $3, $80, $81				#base address

	bi 		$lr

	.type	service_get_data_ptr, @function
	.size 	service_get_data_ptr, .-service_get_data_ptr










	#END kernel services
	########################################################################
	########################################################################
	########################################################################
	########################################################################











handle_load_page:

	########################################################################
	# Load an ELF image into a given page index.
	#
	#	Command format
	#		[0:7]		command ID
	#		[8:15]		resv
	#		[16:31]		unused
	#		[32:63]		image source low eah
	#		[64:95]		image source high eal
	#		[96:111]	image size
	#		[112:119]	unused
	#		[120:127]	target page index
	#
	#	Expects:
	#		r80 	[0, 0, 0, 0]
	#		r81		word extraction mask
	#
	# 	Params:
	# 		r3 is the queue command
	#
	# 	Clobber:
	#		r2
	#		r3
	#		r4
	#		r5
	#		r6
	########################################################################

	stqd	$lr, 16($sp)
	stqd	$sp, -32($sp)
	ai		$sp, $sp, -32

	hbra	.handle_load_page_queue_mfc, spuutil_QueueMFCTransfer

	lqa		$4, load_page_extraction_masks + 0x30
	shufb	$4, $3, $80, $4

	ila 	$5, exec_page_space

	shli	$6, $4, 0x0e						#compute the target offset.  Eight 0x4000k pages are currently supported
	a		$6, $5, $6
	shufb	$6, $6, $80, $81

	ila		$2, page_base_table
	shli	$5, $4, 0x4
	stqx	$6, $2, $5							#store the page entry in the page base table

	ori		$8, $80, 0
	
	lqa		$7, load_page_extraction_masks + 0x20
	shufb	$7, $3, $80, $7
	
	lqa		$5, load_page_extraction_masks + 0x10
	shufb	$5, $3, $80, $5

	lqa		$4, load_page_extraction_masks + 0x00
	shufb	$4, $3, $80, $4

	ila 	$3, 0x40							#get
	shufb	$3, $3, $80, $81

.handle_load_page_queue_mfc:
	brsl	$lr, spuutil_QueueMFCTransfer

	il		$3, 0x01
	a			$4, $3, $3
	brsl	$lr, spuutil_WaitForDMATagGroups

	sync

	
.handle_load_page_cleanup:
	
	ai		$sp, $sp, 32
	lqd		$lr, 16($sp)

	bi 		$lr

	.type	handle_load_page, @function
	.size	handle_load_page, .-handle_load_page



	

	
handle_execute_page:
	
	########################################################################
	# Execute a page at a given index index
	#
	#
	#	Command format
	#		[0:7]		command ID
	#		[8:15]		resv
	#		[16:31]		unused
	#		[32:63]		page param0
	#		[64:95]		page param1
	#		[96:119]	unused
	#		[120:127]	target page index
	#
	#	Expects:
	#		r80 	[0, 0, 0, 0]
	#		r81		word extraction mask
	#
	# 	Params:
	# 		r3 is the queue command
	#
	#	Clobber:
	#		r2
	#		r3
	#		r4
	#		r7
	########################################################################

	stqd	$lr, 16($sp)
	stqd	$sp, -32($sp)
	ai		$sp, $sp, -32

	lqa		$2, execute_page_extraction_masks + 0x20
	shufb	$2, $3, $80, $2
	shli	$2, $2, 0x4

	ila		$4, page_base_table
	lqx		$2, $2, $4

	lqd		$4, 0($2)				#entry
	a		$2, $2, $4

	lqa		$4, execute_page_extraction_masks + 0x10
	shufb	$4, $3, $80, $4

	lqa		$7, execute_page_extraction_masks + 0x00
	shufb	$3, $3, $80, $7

	bisl	$lr, $2

.handle_execute_page_cleanup:

	ai		$sp, $sp, 32
	lqd		$lr, 16($sp)
	
	bi 		$lr

	.type	handle_execute_page, @function
	.size	handle_execute_page, .-handle_execute_page

	



handle_set_cmd_queue_source_addr:

	########################################################################
	# Set the command queue source address.
	#
	#	Command format
	#		[0:7]		commandID
	#		[8:15]		resv
	#		[16:31]		unused
	#		[32:63]		queue ea high
	#		[64:95]		queue ea low
	#		[96:127]	unused
	#
	#	Expects:
	#		r80 	[0, 0, 0, 0]
	#		r81		word extraction mask
	#		r100 	command queue high
	#		r101 	command queue low
	#
	# 	Params:
	# 		r3 is the queue command
	#
	#	Clobber:
	#		r4
	########################################################################

	lqa		$4, set_cmd_queue_source_address_masks + 0x00
	shufb	$100, $3, $80, $4
	
	lqa		$4, set_cmd_queue_source_address_masks + 0x10
	shufb	$101, $3, $80, $4

	
	bi 		$lr

	.type	handle_set_cmd_queue_source_addr, @function
	.size	handle_set_cmd_queue_source_addr, .-handle_set_cmd_queue_source_addr



handle_send_event:

	########################################################################
	# Send a PU INT MB "event".
	#
	#	Command format
	#		[0:7]			commandID
	#		[8:15]		resv
	#		[16:31]		unused
	#		[32:63]		event value
	#		[64:95]		unsigned	
	#		[96:127]	unused
	#
	#	Expects:
	#		r80 	[0, 0, 0, 0]
	#		r81		word extraction mask
	#
	# 	Params:
	# 		r3 is the queue command
	#
	#	Clobber:
	#		r3
	#		r4
	########################################################################

	hbra	.handle_send_event_send_signal, spuutil_SendPUEvent

	stqd	$lr, 16($sp)
	stqd	$sp, -32($sp)
	ai		$sp, $sp, -32

	lqa		$4, send_event_extraction_masks + 0x00
	shufb	$3, $3, $80, $4

.handle_send_event_send_signal:

	brsl	$lr, spuutil_SendPUEvent

.handle_send_event_cleanup:

	lqd		$sp, 0($sp)
	lqd		$lr, 16($sp)

.handle_send_event_return:
	bi 		$lr

	.type	handle_send_event, @function
	.size	handle_send_event, .-handle_send_event




	#END command handlers
	########################################################################
	########################################################################
	########################################################################
	########################################################################








	#END private helpers
	########################################################################
	########################################################################
	########################################################################
	########################################################################

















main:
	########################################################################
	#	This is the kernel entry point.
	#
	#	Params:
	#		r3:	initial dma source address for kernel commands.
	#		r4	unused.
	########################################################################

	hbra	.main_send_pu_event_0, spuutil_SendPUEvent

	andc	$80, $80, $80
	cwd		$81, 0($sp)

	ila		$sp, __stack							#Set the starting stack address

	stqd	$80, 0($sp)								#This should not be needed, but I don't think it hurts to be safe.
	stqd	$80, 16($sp)

	stqd	$sp, -32($sp)

	ai		$sp, $sp, -32

													#Sync with the PU.  Interrupt the PU to let it know we are loaded and running.
													#Then, block on the SPU MB for the initial queue address
.main_send_pu_event_0:
	andc	$3, $3, $3
	brsl	$lr, spuutil_SendPUEvent

	rdch	$100, $mfc_rd_mailbox
	rdch	$101, $mfc_rd_mailbox

.main_send_pu_event_1:
	il		$3, 0x1	
	brsl	$lr, spuutil_SendPUEvent

	hbra	.main_bhint_signal_loop, .main_signal1_value_loop

#######################################################################################
# MAIN LOOP

.main_signal1_value_loop:
	rdch 	$3, $mfc_rd_signal_2

	ila		$4, 0xffff
	ceq		$4, $3, $4								#check for shutdown signal
	brnz	$4, .main_cleanup

	shli	$90, $3, 0x4							#Compute the queue byte size

													#Load the command queue
	ila		$91, cmd_queue
	shufb	$91, $91, $80, $81
	wrch	$mfc_ls_addr, $91
	wrch	$mfc_ea_hi, $100
	wrch	$mfc_ea_low, $101
	wrch	$mfc_dma_size, $90
	wrch	$mfc_tag_id, $80
	ila		$2, 0x40
	shufb	$2, $2, $80, $81
	wrch	$mfc_cmd_queue, $2

	il		$3, 0x01
	a		$4, $3, $3
	brsl	$lr, spuutil_WaitForDMATagGroups

	dsync


														#r90 contains the byte size of the command queue
														#r91 still contain the 'cmd_queue' offset

														#load the command table and jump to the handler.

	ila		$92, queue_cmd_jump_table					
	shufb	$92, $92, $80, $81

	a		$91, $91, $90								#the processing loop uses a reverse pointer index scheme.
	nor		$90, $90, $90								#r5 will point to the end of the command queue.
	ai		$90, $90, 1									#while r90 contains a negative offset to the current command.  Each iteration increase r6 by the size of one command.
														#This addressing system is most usful on x86 (low register count) but it also saves us some work here.

	lqa		$82, cmd_extraction_masks
	lqa		$83, cmd_extraction_masks + 0x10

#######################################################################################

.main_cmd_processing_loop_top:

	lqx		$3, $91, $90								#load the command qw

														#TODO: check for value command values before jumping (debug mode only)
	shufb	$4, $3, $80, $82							#extract the command id. lsB of the 16 byte queue entry

	shli	$4, $4, 0x04
	lqd		$2, queue_cmd_jump_table($4)
	bisl	$lr, $2

	ai		$90, $90, 0x10
	brnz	$90, .main_cmd_processing_loop_top

.main_bhint_signal_loop:
	br		.main_signal1_value_loop			#wait for more work

# MAIN LOOP
#######################################################################################

.main_cleanup:
	ai		$sp, $sp, 32

#TODO, make sure the stack is still valid
#	$sp = __stack
#	($sp) = 0
#
.main_stop:
	stop

	.type	main, @function
	.size	main, .-main
	.global	main







	#END .text section
	###################################################################################
	###################################################################################
	###################################################################################
	###################################################################################
	###################################################################################





.section .data	

	.type _kernel_data, @object
	_kernel_data:


	queue_address:		.long	0x00, 0x00, 0x00, 0x00
	.size queue_address,.-queue_address


	page_base_table:	
						.long	0x00, 0x00, 0x00, 0x00
						.long	0x00, 0x00, 0x00, 0x01
						.long	0x00, 0x00, 0x00, 0x02
						.long	0x00, 0x00, 0x00, 0x03
						.long	0x00, 0x00, 0x00, 0x04
						.long	0x00, 0x00, 0x00, 0x05
						.long	0x00, 0x00, 0x00, 0x06
						.long	0x00, 0x00, 0x00, 0x07
	
	.size page_base_table, .-page_base_table




	service_cmd_jump_table:
						.long 	service_get_data_ptr, 0x00, 0x00, 0x00 

						.long 	(.-(service_cmd_jump_table))/16, 0x00, 0x00, 0x00 #TODO, this is be an immed number




	queue_cmd_jump_table:
						.long	handle_load_page, 0x00, 0x00, 0x00
						.long	handle_execute_page, 0x00, 0x00, 0x00
						.long	handle_set_cmd_queue_source_addr, 0x00, 0x00, 0x00
						.long handle_send_event, 0x00, 0x00, 0x00
						.long 	(.-(queue_cmd_jump_table))/16, 0x00, 0x00, 0x00 #TODO, this is be an immed number


	cmd_extraction_masks:
						.long	0x10101000, 0x10101010, 0x10101010, 0x10101010		#extract the queue command id
						.long	0x10101001, 0x10101010, 0x10101010, 0x10101010		#extract the queue command attribute

	load_page_extraction_masks:
						.long	0x04050607, 0x10101010, 0x10101010, 0x10101010		#Param0 eah
						.long	0x08090a0b, 0x10101010, 0x10101010, 0x10101010		#Param1 eal
						.long	0x10100c0d, 0x10101010, 0x10101010, 0x10101010		#Param2 image size
						.long	0x1010100f, 0x10101010, 0x10101010, 0x10101010		#Param3 target page index

	execute_page_extraction_masks:
						.long	0x04050607, 0x10101010, 0x10101010, 0x10101010		#Param0 page param0
						.long	0x08090a0b, 0x10101010, 0x10101010, 0x10101010		#Param1 page param1
						.long	0x1010100f, 0x10101010, 0x10101010, 0x10101010		#Param2 target page index

	set_cmd_queue_source_address_masks:
						.long	0x04050607, 0x10101010, 0x10101010, 0x10101010		#param0 eah
						.long	0x08090a0b, 0x10101010, 0x10101010, 0x10101010		#param1 eal
	
	send_event_extraction_masks:
						.long	0x04050607, 0x10101010, 0x10101010, 0x10101010		#param0 event value


	
	cmd_queue:
						.long	0x00000000
		.space 0x1fc

	.size _kernel_data, .-_kernel_data


.section .page_text			#The ld script will place this section at the correct offset.
							#LS space from this point on belongs to the running pages (and the stack)

	.type exec_page_space, @function
exec_page_space:


	.size exec_page_space, .-exec_page_space


