
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <stdint.h>
#include <stdio.h>
#include "common.h"
#include <ctype.h>

static parm_context ctx __attribute__ ((aligned (128)));

volatile data_elem          lsa_data[BUFF_SIZE] __attribute__ ((aligned (128)));
volatile mfc_list_element_t dma_list[MAX_LIST_SIZE] __attribute__ ((aligned (128)));
volatile uint32_t           status __attribute__ ((aligned(128)));

int elem_per_dma, tot_num_elem, byte_per_dma, byte_tota, dma_list_len;
int event_num=1, continue_dma=1;
int notify_incr=NOTIFY_INCR;


//============================================================================
// enable stall-and-notify event
//============================================================================
static inline void notify_event_enable(  )
{
	uint32_t eve_mask;

	eve_mask = spu_read_event_mask();
	spu_write_event_mask(eve_mask | MFC_LIST_STALL_NOTIFY_EVENT);
}

//============================================================================
// update DMA list according to data that was already transferred to LS
//============================================================================
static inline void notify_event_update_list( )
{
	int i, j, start, end;
	
	start = (event_num-1)*notify_incr*elem_per_dma;
	end   = event_num*notify_incr*elem_per_dma-1;
	
	//printf("SPE: check data elements %d to %d\n", start, end);
	
	for (i=start; i<=end; i++){
	
		// PPE wants us to stop DMAs - zero remaing DMAs
		if ( lsa_data[i].cmd == CMD_STOP){
			//printf("SPE: stop: event %d, DMA # %i, data # %d \n",event_num,event_num*notify_incr,i);
			//for (k=0;k<dma_list_len;k++){ 
			//	printf("%d:(%d,%x,%d)",k,dma_list[k].size,dma_list[k].eal, dma_list[k].notify);
			//	if ( (k+1)%16==0) printf("\n");
			//}
			
			//printf("SPE: zero DMA %d to %d\n",event_num*notify_incr+1,dma_list_len);

			dma_list[event_num*notify_incr+1].size=0;
			dma_list[dma_list_len-1].size=0;
			for (j=event_num*notify_incr; j<dma_list_len; j++){
				dma_list[j].size = 0;				
				dma_list[j].notify = 0; // DEBUG // no need more notifications
			}
			//for (j=event_num*notify_incr+1; j<dma_list_len; j++){
			//	dma_list[j].size = 0;				
				//dma_list[i].notify = 0; // no need more notifications
			//}
			////printf("Done\n");
			//for (k=0;k<dma_list_len;k++){ 
			//	printf("%d:(%d,%x,%d)",k,dma_list[k].size,dma_list[k].eal, dma_list[k].notify);
			//	if ( (k+1)%16==0) printf("\n");
			//}
			continue_dma = 0; // DEBUG
			break;
		}
	}
}

//============================================================================
// handle stall-and-notify event include acknowledge the MFC
//============================================================================
static inline void notify_event_handler( uint32_t tag_id )
{
	uint32_t eve_mask, tag_mask;

	eve_mask = spu_read_event_mask();
	spu_write_event_mask(eve_mask | MFC_LIST_STALL_NOTIFY_EVENT);

	// loop for checking that event is on the correct tag_id
	do{
		// loop for checking that stall-and-notify event occured
		do{
			eve_mask = spu_read_event_status();

		}while ( !(eve_mask&(uint32_t)MFC_LIST_STALL_NOTIFY_EVENT)  );

		// disable event stall-and-notify event 
		eve_mask = spu_read_event_mask();
		spu_write_event_mask(eve_mask & (~MFC_LIST_STALL_NOTIFY_EVENT));
		
		// acknowledge stall-and-notify event
		spu_write_event_ack(MFC_LIST_STALL_NOTIFY_EVENT);

		// read the tag_id that caused the event
		tag_mask = mfc_read_list_stall_status();

	}while ( !(tag_mask & (uint32_t)(1<<tag_id))  );

	// update DMA list according to data that was already transferred to LS
	notify_event_update_list( );
		
	// acknowlege the MFC to continue
	mfc_write_list_stall_ack(tag_id);

	// re-enable the event
	eve_mask = spu_read_event_mask();
	spu_write_event_mask(eve_mask  | MFC_LIST_STALL_NOTIFY_EVENT);
}

void exit_handler( uint32_t tag_id ){

	// update the status so PPE knows that all data is in place
	status = STATUS_DONE;
	
	// put barrier to ensure all data i written to memory before writing status
    mfc_putb((void*)&status, (uint32_t)(ctx.status), sizeof(uint32_t), tag_id,0,0);
	mfc_write_tag_mask(1<<tag_id);
    mfc_read_tag_status_all();  
	
	mfc_tag_release(tag_id);

	printf("<SPE: done\n");
}

int main(int speid , uint64_t argp)
{
	int i, j, num_notify_events;
	uint32_t addr, tag_id;

	printf("<SPE: start\n");

	// enable the stall-and-notify
	//============================================================================
	notify_event_enable( );

	// reserve DMA tag ID
	//============================================================================
	tag_id = mfc_tag_reserve();

	if(tag_id==MFC_TAG_INVALID){
		printf("SPE: ERROR - can't reserve a tag ID\n");
		exit_handler( tag_id ); return 1;
	}
		
	// get context information from system memory.
	//============================================================================
	mfc_get((void*) &ctx, argp, sizeof(ctx), tag_id, 0, 0);
	mfc_write_tag_mask(1<<tag_id);
	mfc_read_tag_status_all();   // Wait for DMA to complete

	// initalize DMA tranfer attributes
	//============================================================================
	tot_num_elem = ctx.tot_num_elem;
	elem_per_dma = ctx.elem_per_dma;
	dma_list_len = MAX( 1, tot_num_elem/elem_per_dma );
	byte_tota = tot_num_elem*sizeof(data_elem);
	byte_per_dma = elem_per_dma*sizeof(data_elem);
	
	// initalize data buffer
	//============================================================================
	for (i=0; i<tot_num_elem; ++i){
		lsa_data[i].cmd = CMD_EMPTY;
	}
	
	// use mfc_io.h definitions to check if DMA tranfer attributes are legal
	//============================================================================
	if (byte_per_dma<MFC_MIN_DMA_SIZE || byte_per_dma>MFC_MAX_DMA_SIZE){
		printf("SPE: ERROR - illegal DMA transfer's size (=%d)\n", byte_per_dma);
		exit_handler( tag_id ); return 1;
	}
	if (dma_list_len<MFC_MIN_DMA_LIST_SIZE || dma_list_len>MFC_MAX_DMA_LIST_SIZE){
		printf("SPE: ERROR - illegal DMA list size (=%d)\n",dma_list_len);
		exit_handler( tag_id ); return 1;
	}
	if (dma_list_len>=MAX_LIST_SIZE){
		printf("SPE: ERROR - DMA list size bigger then local list (=%d)\n",dma_list_len);
		exit_handler( tag_id ); return 1;
	}
	
	if(tot_num_elem>BUFF_SIZE){
		printf("SPE: ERROR - dma length bigger then local data buffer\n");
		exit_handler( tag_id ); return 1;
	}
	
	/*
	printf("elem_per_dma =%d\n", elem_per_dma);
	printf("tot_num_elem =%d\n", tot_num_elem);
	printf("byte_per_dma =%d\n", byte_per_dma);
	printf("byte_tota    =%d\n", byte_tota);
	printf("dma_list_len =%d\n", dma_list_len);
	printf("MAX_LIST_SIZE=%d\n", MAX_LIST_SIZE);
	printf("BUFF_SIZE    =%d\n", BUFF_SIZE);
	*/
	
	// touch each page of the target buffer so that the page tables and TLBs are all loaded up
	//============================================================================
	/*
	ea_in = ea_in;
	addr_incr = 4096;       // one page
	for (i=0; i<(int)sizeof(lsa_data); i+=addr_incr) {
		mfc_get((void *) lsa_data, ea_in, 128, tag_id, 0, 0);
		mfc_write_tag_mask(1<<tag_id);
		mfc_read_tag_status_all();   // Wait for DMA to complete
		mfc_put((void *) lsa_data, ea_in, 128, tag_id, 0, 0);
		mfc_write_tag_mask(1<<tag_id);
		mfc_read_tag_status_all();   // Wait for DMA to complete
		ea_in += addr_incr;
	}
	*/
	
	// create the DMA lists for the 'getl' comand
	//============================================================================
	addr = mfc_ea2l(ctx.ea_in);
	
	//printf("SPE: create DMA %d to %d\n", 0, dma_list_len-1);
	for (i=0; i<dma_list_len; i++) {
		//printf("SPE: elem[%04d]: (len,addr) = (%d,%x) \n", i, elem_per_dma, addr);
		dma_list[i].size = byte_per_dma;
		dma_list[i].eal = addr;
		dma_list[i].notify = 0;
		addr += byte_per_dma;
	}

	// update stall-and-notify bit in some DMA elements
	//============================================================================
	for (i=notify_incr-1, num_notify_events=0; i<(dma_list_len-1); i+=notify_incr) {
		num_notify_events++;
		dma_list[i].notify = 1;	
		//printf("SPE: Set notify at element %d \n", i );
	}
	//printf("SPE: Number of events (=%d)\n", num_notify_events);
	
	// issue the DMA list 'getl' command
	//============================================================================
	mfc_getl((void*)lsa_data, ctx.ea_in, (void*)dma_list, sizeof(mfc_list_element_t)*dma_list_len,tag_id,0,0);

	// handle stall-and-notify events
	//============================================================================
	for (event_num=1; event_num<=num_notify_events; event_num++) {
		notify_event_handler( tag_id );
		//printf("SPE: Handled event # %d, # elements trans %d\n", event_num, event_num*notify_incr-1);
		
		// printf("SPE: continue_dma = %d\n", continue_dma );
		
		if( !continue_dma ){ // don't need to conitue as PPE mark that we're done
			printf("SPE: no need for more DMAs\n" );
			break;
		}
	}

	printf("SPE: no need for more DMAs\n" );
	// wait for completion of the 'getl' command
	//============================================================================
	mfc_write_tag_mask(1<<tag_id);
	mfc_read_tag_status_all();   // wait for DMA to complete 

	printf("SPE: no need for more DMAs\n" );
	
	// calculate the output data
	//============================================================================
	/*printf("SPE: after get check data elements %d to %d\n", 0, tot_num_elem-1);
	for (i=0; i<tot_num_elem; i++){
		if (lsa_data[i].cmd  != CMD_GO){
			printf("ERROR cmd in data %d\n",i);
		}
		for (j=0; j<DATA_LEN; j++){
			if(lsa_data[i].data[j] != (char)j){
				printf("ERROR data in data %d\n",i); break;
			}
		}
	}
	*/
	//printf("SPE: =============================\n" );
	//printf("SPE: update data 0 from to %d\n",  tot_num_elem-1 );
	for (i=0; i<tot_num_elem; ++i){
		lsa_data[i].cmd = CMD_DONE;
		for (j=0; j<DATA_LEN; j++){
			//if(i==0)
			//printf("(%03d) ", lsa_data[i].data[j] );
			lsa_data[i].data[j] = calc_out_d( lsa_data[i].data[j] );
		}
		//if(i==0) printf("\n" );
	}
	//printf("SPE: =============================\n" );

	// + update the existing DMA lists for the 'putl' comand
	// + update only the address since the length is the same
	//============================================================================
	addr = mfc_ea2l(ctx.ea_out);
	
	//printf("SPE: update put DMA 0 from to %d\n",  dma_list_len-1 );
	for (i=0; i<dma_list_len; i++) {
		//dma_list[i].size = byte_per_dma;
		dma_list[i].eal = addr;
		dma_list[i].notify = 0;
		addr += byte_per_dma;
	}

	// + no notification is needed for the 'putl' command
	
	//printf("SPE: put back data.....\n" );

	// issue the DMA list 'getl' command
	//============================================================================
	mfc_putl((void*)lsa_data,ctx.ea_out,(void*)dma_list, sizeof(mfc_list_element_t)*dma_list_len,tag_id,0,0);
	
	// wait for completion of the 'putl' command
	//============================================================================
	mfc_write_tag_mask(1<<tag_id);
	mfc_read_tag_status_all();

	exit_handler(tag_id);
		
	return 0;
}
