// ==================================================================
// ppu_main.c file
// ==================================================================

#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <libspe2.h>
#include <cbe_mfc.h>
#include <pthread.h>

#include "common.h"

#define NUM_SPES 2

// input and output data buffers
volatile char in_data[BUFF_SIZE]  __attribute__ ((aligned(128)));
volatile char out_data[BUFF_SIZE] __attribute__ ((aligned(128)));

// Data structures to work with the SPE
volatile parm_context ctx[NUM_SPES] __attribute__ ((aligned(16)));
spe_program_handle_t *program[BUFF_SIZE];

// data structure for running SPE thread ==============================
typedef struct spu_data {
  spe_context_ptr_t spe_ctx;
  pthread_t pthread;
  void *argp;
} spu_data_t;

spu_data_t data[NUM_SPES];

// create and run one SPE thread ======================================
void *spu_pthread(void *arg) {

	spu_data_t *datp = (spu_data_t *)arg;
	uint32_t entry = SPE_DEFAULT_ENTRY;
	
	printf(")PPE: spe thread start run\n" );

	if(spe_context_run(datp->spe_ctx,&entry,0,datp->argp,NULL,NULL)<0){
		perror ("Failed running context"); 		exit (1);
	}

	printf(")PPE: spe thread finish run\n");
	pthread_exit(NULL);
}

// main ===============================================================
int main( )
{
	int num;

	// names of the two SPU executable file names
	char spe_names[2][20] = {"spu1/spu_main1","spu2/spu_main2"};
	
	// STEP 1: initiate SPEs control structure- all have same parameters
	for( num=0; num<NUM_SPES; num++){
		ctx[num].ea_in = (uint64_t)in_data  + num*(BUFF_SIZE/NUM_SPES);
		ctx[num].ea_out= (uint64_t)out_data + num*(BUFF_SIZE/NUM_SPES);	
		data[num].argp = &ctx;
	}

	// Loop on all SPEs and for each perform two steps:			
	// STEP 2: create SPE context
	// STEP 3: open images of SPE programs into main storage
	//         spe_names variable store the executable name
	// STEP 4: Load SPEs objects into SPE context local store
	for( num=0; num<NUM_SPES; num++){
		if ((data[num].spe_ctx = spe_context_create (0, NULL)) == NULL) {
			perror("Failed creating context"); exit(1);
		}
		if (!(program[num] = spe_image_open(&spe_names[num][0]))) {
			perror("Fail opening image"); exit(1);
		}
		if (spe_program_load ( data[num].spe_ctx, program[num])) {
			perror("Failed loading program"); exit(1);
		}		
	}
			
	// STEP 5: create SPE pthreads
	for( num=0; num<NUM_SPES; num++){
		if(pthread_create(&data[num].pthread,NULL,&spu_pthread,
			&data[num ])){
			perror("Failed creating thread");  exit(1);
		} 		
	}

	// Loop on all SPEs and for each perform two steps:			
	//   STEP 6: wait for all the SPE pthread to complete
	//   STEP 7: destroy the SPE contexts
	for( num=0; num<NUM_SPES; num++){
		if (pthread_join (data[num].pthread, NULL)) {
			perror("Failed joining thread"); exit (1);
		}

		if (spe_context_destroy( data[num].spe_ctx   )) {
			perror("Failed spe_context_destroy"); exit(1);
		}
	}
	printf(")PPE:) Complete running all super-fast SPEs\n");
	return (0);
}
