
// -------------------------------------------------------------- 
// (C)Copyright 2007,                                         
// International Business Machines Corporation, 
// All Rights Reserved.
// Author: Eitan Peri, eitanp@il.ibm.com
// -------------------------------------------------------------- 


#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <stdint.h>
#include <stdio.h>
#include "common.h"
#include <ctype.h>

#define waittag(tag_id) mfc_write_tag_mask(1<<tag_id);	mfc_read_tag_status_all();

static parm_context ctx __attribute__ ((aligned (128)));

// Data structures for the get that other SPE will do do
volatile uint32_t data_other[2][BUFF_SIZE] __attribute__ ((aligned(128)));

// Data structures for the get that this SPE will do
volatile uint32_t data_my[2][BUFF_SIZE] __attribute__ ((aligned(128)));
uint32_t ea_offset_my[2];
uint64_t ea_addr_my[2];

uint32_t itter __attribute__ ((aligned(128)));

void process(uint32_t in_data[], uint32_t out_data[]){
	int i;
	for(i=0;i<BUFF_SIZE;i++){
		out_data[i] = in_data[i]+1;
	}
}

void exit_handler( char *str, uint32_t tag_id ){

	printf("<SPE: %s\n",str);

	// update the status so PPE knows that all data is in place
	itter = STATUS_DONE;
	
	// put barrier to ensure all data is written to memory before writing status
    mfc_putb((void*)&itter, ctx.ea_status, sizeof(uint32_t), tag_id,0,0);
	waittag(tag_id);

	mfc_tag_release(tag_id); // release tag ID before exiting

	printf("<SPE: done\n");
}


int main(int speid , uint64_t argp)
{
	uint32_t i, data, tag_id, next_itter, num_itter=0;

	// reserve DMA tag ID
	if((tag_id=mfc_tag_reserve())==MFC_TAG_INVALID){
		exit_handler( "ERROR - can't reserve a tag ID\n", tag_id ); return 1;
	}
	
	// get context information from system memory.
	mfc_get((void*) &ctx, argp, sizeof(ctx), tag_id, 0, 0);
	waittag(tag_id);

	// send to PPE the location of the data buffer to which other SPE should write
	spu_write_out_mbox((uint32_t)(data_other)); // stalls mailbox is full.	

	// read from the PPE the offeset of the remote buffer
	ea_offset_my[0] = spu_read_in_mbox(); // stall if empty
	ea_offset_my[1] = ea_offset_my[0] + BUFF_SIZE*sizeof(uint32_t);
	ea_addr_my[0] = ctx.ea_base  + ea_offset_my[0];
	ea_addr_my[1] = ctx.ea_base  + ea_offset_my[1];

	// init local buffer before first iteration
	itter = 0;
	for(i=0;i<BUFF_SIZE;i++){
		data_my[0][i]=0;          data_my[1][i]=0;
		data_other[0][i]=0; data_other[1][i]=0;
	}

	num_itter=1;
	int k;
	
	do{
		next_itter = itter^1;
		
	    mfc_put((void*)&num_itter, ctx.ea_status, sizeof(uint32_t), tag_id,0,0);
		waittag(tag_id);
		
		// wait for PPE to tell us that we can start
		data  = spu_read_in_mbox(); // stall if empty

		if (data==DATA_DONE) break;
		
		//printf("<SPE %d: bring from EA %d to LS %d process %d\n",ctx.num, itter,next_itter,itter);
		
		// send data to the next SPE - put barrier to ensure all data is written to memory before writing notification
		mfc_get((void*)(&data_my[next_itter][0]), ea_addr_my[itter], BUFF_SIZE*sizeof(uint32_t), tag_id, 0, 0);
		
		process( &(data_my[itter][0]), &(data_other[next_itter][0]) );
		for (k=0;k<10000;k++);
		
		waittag(tag_id);
			
		//printf("<SPE %d: process %d: [%d,%d]->[%d,%d]\n",ctx.num, num_itter,data_my[itter][0],data_my[itter][BUFF_SIZE-1],data_other[next_itter][0],data_other[next_itter][BUFF_SIZE-1]);
		
		itter = next_itter;
		num_itter++;
	}while(1);

	printf("<SPE %d: done after %d iterations\n",ctx.num, num_itter);
	
	mfc_tag_release(tag_id); // release tag ID before exiting
	
	return 0;
}
