// -------------------------------------------------------------- 
// (C)Copyright 2007                                         
// International Business Machines Corporation, 
// All Rights Reserved.
// -------------------------------------------------------------- 

#include <stdio.h>
#include <stdint.h>

#include <stdlib.h>
#include <string.h>
#include <libspe2.h>
#include <pthread.h>

#define SPU_NUM	8

// data structure for running SPE thread ==============================
typedef struct spu_data {
  spe_context_ptr_t spe_ctx;
  pthread_t pthread;
  void *argp;
  void *spu_id;
} spu_data_t;

spu_data_t data[SPU_NUM];

// create and run one SPE thread ======================================
void *spu_pthread(void *arg) {

	spu_data_t *datp = (spu_data_t *)arg;
	uint32_t entry = SPE_DEFAULT_ENTRY;
	
	if(spe_context_run(datp->spe_ctx,&entry,0,datp->argp,datp->spu_id,NULL)<0){
		perror ("Failed running context"); 		exit (1);
	}

	pthread_exit(NULL);
}

typedef struct {
	int			processingStep; // Variable to contain the overall workload processing step
	int 		exitSignal; // Shared variable to signal end of processing step
	
	uint64_t 	accumulatedTime[8]; // Structure to contain workload dynamic execution statistics (max. 8 SPE)
	int			accumulatedSteps[8];
	
	char		_dummyAlignment[24]; // 24 bytes to set the structure size equal to 128 bytes (size of a cache line)
} SharedData_s;

// Main memory version of the shared structure
// size of this structure is a single cache line
static volatile SharedData_s SharedData __attribute__ ((aligned(128)));

int main(int argc, char *argv[])
{
	int i;
	spe_program_handle_t *program;

	printf("PPE Start:\n");

	// Initialize the shared data structure
	SharedData.exitSignal = 0;
	SharedData.processingStep = 0;

	for( i = 0 ; i < SPU_NUM ; ++i ) {
		SharedData.accumulatedTime[i] = 0;
		SharedData.accumulatedSteps[i] = 0;
		data[i].argp = (void*)&SharedData;
		data[i].spu_id = (void*)i;
	}

	// Create the SPE contexts
	for( i = 0 ; i < SPU_NUM ; ++i ) {
		if ((data[i].spe_ctx = spe_context_create (0, NULL)) == NULL) {
			perror ("Failed creating context"); exit (1);
		}
	}

	// Load SPE program into memory
	if (!(program = spe_image_open("spu/spu"))) {
		perror("Fail opening image"); return -1;
	}

	// Load the SPU executable into all the SPE involved in the workload
	for( i = 0 ; i < SPU_NUM ; ++i ) {
	    if (spe_program_load(data[i].spe_ctx, program)) {
			perror ("Failed loading program");	exit (1);
	    }
	}

	// Launch the computing threads
	for( i = 0; i < SPU_NUM; ++i) {
		if(pthread_create(&data[i].pthread,NULL,&spu_pthread,
			&data[i])){
			perror("Failed creating thread");  exit(1);
		} 		
	}

	// Wait for the SPE threads to complete
	for( i = 0; i < SPU_NUM ; ++i) {
		if (pthread_join (data[i].pthread, NULL)) {
			perror("Failed joining thread"); exit (1);
		}

		if (spe_context_destroy( data[i].spe_ctx   )) {
			perror("Failed spe_context_destroy"); exit(1);
		}
	}
	
	// Output the statistics
	for( i = 0; i < SPU_NUM ; ++i) {
		printf("SPE %d - Avg. processing time (decrementer steps): %lld\n", i, SharedData.accumulatedTime[i] / SharedData.accumulatedSteps[i]);
	}
			
	return (0);
}

