/* --------------------------------------------------------------- */
/* (C) Copyright 2001,2006,                                        */
/* International Business Machines Corporation,                    */
/*                                                                 */
/* All Rights Reserved.                                            */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx                                              */

#include <stdio.h>
#include <string.h>
#include <dacs.h>
#include "dacs_hello.h"

/* -------------------- Embedded SPE image handle -------------------------
 * Define an SPE DaCS program handle.  This program handle contains data
 * pertaining to an embedded SPE image.  The handle data is filled in during 
 * linking and *MUST* be named the same as the SPE executable.
 */
extern dacs_program_handle_t dacs_hello_spu;

/* -------------------- External SPE file handle  ---------------------------
 * An alternative to embedding the SPE executable in the PPE image is to
 * declare it extrernally.  This allows a bit more flexibility to the embedded
 * mechanism, but you have to make sure your SPE executable is available at
 * the path indicated.
 */
char *dacs_hello_spu_filename = "./spu/dacs_hello_spu";

uint32_t hello_error_handler(dacs_error_t);

/* ==========================================================================
 *
 * This is a convoluted version of the "Hello World" program using most of the
 * DaCS on cell APIs.  Obviously, there are simpler means for doing the same
 * thing, but this is intended to demonstrate the use of the various DaCS
 * APIs.
 *
 * This is the main parent (PPE) function.  The steps the parent will perform
 * are as follows:
 *
 *  1) Initialize DaCS on the parent (PPE).
 *  2) Create and initialize the shared resources needed for this application:
 *      a) group (hello_group) used to synchronize the parent and children
 *      b) shared memory (hello_shared_mem) contains the shared data
 *         containing the places the children will say hello to.
 *      c) reserve a wait id (hello_wid) for waiting on synchronous operations
 *         such as DMAs and messages.
 *      d) mutex (hello_mutex) is used to synchronize access to the shared
 *         data (hello_shared_mem).
 *  3) Determine the number of SPE children available then reserve them.
 *  4) Loop on each each reserved SPE child
 *      a) Start execution on the SPE child.
 *      b) Add the child to our process synchronization group
 *      c) Share the places data with the child
 *      d) Share the mutex with the child
 *      e) Send the child its index via mailbox
 *  5) Add the parent (PPE) to the process synchronization group
 *  6) Close the group so it can be used.
 *  7) Wait for the children to complete initialization.  Some of the children
 *     may be lagging behind on initialization, so wait until they have caught
 *     up.
 *  8) Wait for the children to update their pieces of the shared data.  This
 *     is where the children indicate where they will say hello to.
 *  9) Send each SPE child a message indicating what message to display.  The
 *     message is dependent on the place the child specified it would say
 *     hello to.
 * 10) Wait for all the sent messages to complete.
 * 11) Wait for all the children to receive their messages.
 * 12) Wait for all the children to display their message and exit.
 * 13) Release all the shared resources created at the beginning of the
 *     application.
 * 14) Exit
 *
 *
 * Operation Matching:
 * -------------------
 * Many of the DaCS operations require multiple participants and handshakes.
 * Cooresponding operations are denoted with matching titles for easy
 * matching between the PPE and SPE code.  For example, if you want to find
 * the barrier that cooresponds to "BARRIER #2" in the SPE code, simply search
 * for "BARRIER #2".
 *
 * =========================================================================*/
int
main(int argc __attribute__((unused)), char **args __attribute__ ((unused)))
{
    DACS_ERR_T rc;
    int32_t status;
    uint32_t i;
    uint32_t num_avail_child, num_rsvd_child;
    de_id_t rsvd_child_des[MAX_NCHILD];
    dacs_process_id_t rsvd_child_pids[MAX_NCHILD];
    dacs_group_t hello_group;
    dacs_remote_mem_t hello_remote_mem;
    dacs_wid_t hello_wid;
    dacs_mutex_t hello_mutex;

    /* ======================================================================
     * 
     * Initialize DaCS and the resources the application will need
     *
     * ====================================================================*/

    /*
     * First things first.  All DaCS applications must begin with a call to
     * initialize DaCS.  This sets-up all the DaCS queues and data structures,
     * so all the DaCS APIs can be used.
     */
    rc = dacs_runtime_init(NULL, NULL);
    ERRCHK("PPE: dacs_runtime_init", rc);

    /*
     * Just in case the application experiences an error, a handler should be
     * registered.
     */
    rc = dacs_errhandler_reg(hello_error_handler, 0);
    ERRCHK("PPE: dacs_errhandler_reg", rc);

    /*
     * There will be points during our application that the child processes 
     * will need to be synchronized.  This is done through the use of a
     * barrier.  The set of participants in a synchronizing barrier is 
     * defined by using a group.  Groups are used to define a common set of
     * participants in a collective operation such as a barrier.
     *
     * Note: All members of a group must participate in group operations for
     *       the operation to behave properly.
     *
     * We will create and initialize our group now.
     */
    rc = dacs_group_init(&hello_group, 0);
    ERRCHK("PPE: dacs_group_init", rc);

    /*
     * We are going to need a memory region, so the children can tell us where
     * they are from.  One way to do this is to have each child DMA their
     * whereabouts into a shared buffer that everyone can see.  Let's create
     * that shared memory region.  
     *
     * This call does not actually allocate the shared memory, in simply sets
     * up an already allocated memory region to be shared.
     */

    rc = dacs_remote_mem_create(&hello_shared_mem, sizeof(hello_shared_mem),
                                DACS_READ_WRITE, &hello_remote_mem); 
    ERRCHK("PPE: dacs_remote_mem_create", rc);

    /*
     * In order to do DMA or send/receive operations, we need a wait ID (wid).
     * This gives us a means to identify which asynchronous operations to wait
     * for completion on.
     */
    rc = dacs_wid_reserve(&hello_wid);
    ERRCHK("PPE: dacs_wid_reserve", rc);

    /*
     * All the children will be accessing the shared memory simultaneously, so
     * in order to guarantee it's coherence, we need to synchronize access to
     * it.  We can do this through a mutex, so let's create and initialize a 
     * mutex now.
     */
    rc = dacs_mutex_init(&hello_mutex);
    ERRCHK("PPE: dacs_mutex_init", rc);

    /*
     * Now we need some children to do the work.  Let's detremine how many 
     * accelerator children of type SPE are available for use.  The count of 
     * available children, at the time of this call, will be returned in 
     * num_avail_children.
     */
    rc = dacs_get_num_avail_children(DACS_DE_SPE, &num_avail_child);
    ERRCHK("PPE: dacs_get_num_avail_children", rc);

    /* 
     * We now know roughly how many SPE children are available.  Before we can
     * use any of the available children, they must first be reserved.  We 
     * will attempt to reserve the number of SPE children discovered above.  
     * 
     * Note: This number could have changed since the previous call.  The 
     * actual number of children reserved will be reserved in the in/out 
     * num_rsvd_children variable passed in.  This count will indicate the 
     * number of children DEs returned in the supplied DE output array, 
     * rsvd_child_array.
     */
    num_rsvd_child = num_avail_child;
    rc = dacs_reserve_children(DACS_DE_SPE, &num_rsvd_child, rsvd_child_des);
    ERRCHK("PPE: dacs_reserve_children", rc);

    PRINTF("PPE: Hello from my %d SPE children.\n", num_rsvd_child);

    /*
     * Now that we have reserved our children, we are safe to start them up.
     * We will start each of the children using the global dacs_program_handle
     * (hello_world_spu) declared above.  The program handle can either be an
     * embedded SPE program handle or a string containing the SPE program path.
     *
     * Starting a child will produce the second piece of information needed
     * for identifying the reserved child resource, a process ID.  We will
     * start half of our children using an embedded handle and the other half
     * using the actual file path name.
     */

    for (i = 0; i < num_rsvd_child; i++) {
        /*
         * For demonstration purposes, half of our children will be started
         * using and embedded program handle and the other half will be
         * started using an external program.
         */
        if ((i % 2) == 0) {
            /*
             * This half of the children are started using an embedded
             * program handle.  This means our SPE program is embedded within
             * the main PPE executable.
             */
            rc = dacs_de_start(rsvd_child_des[i], &dacs_hello_spu, 
                               NULL, NULL, DACS_PROC_EMBEDDED, 
                               &rsvd_child_pids[i]);
            ERRCHK("PPE: dacs_de_start", rc);
        }
        else {
            /*
             * This half of the SPEs are started by using the program 
             * filename path. This means an SPE program, external to the main
             * PPE executable, will be used.
             *
             * Note: Make sure the separate SPE executable is available to the
             * program at the supplied path, otherwise an error will occur.
             */
            rc = dacs_de_start(rsvd_child_des[i], dacs_hello_spu_filename, 
                               NULL, NULL, DACS_PROC_LOCAL_FILE, 
                               &rsvd_child_pids[i]);
            ERRCHK("PPE: dacs_de_start", rc);
        }

        if (i == 0) {
            PRINTF("PPE: Eventually, SPE child 0 will fail.\n"
                   "     SPE 0's DE = 0x%x and PID = 0x%lx\n", 
                   rsvd_child_des[i], rsvd_child_pids[i]);
        }

        /*
         * We have started the SPE child at this point and it is executing.
         * The resources that we created and initialized up above can now be
         * shared with the child.  
         *
         * The child must accept the resources in the same order which the
         * parent shares them, otherwise a deadlock will occur.  
         */

        /* ---------------- SHARE 1 - Group membership -------------------
         *
         * This child will be part of our process synchronization, so we must
         * add it to the group, so it can participate in barriers.
         *
         * --------------------------------------------------------------- */
        rc = dacs_group_add_member(rsvd_child_des[i], rsvd_child_pids[i], 
                                   hello_group);
        ERRCHK("PPE: dacs_group_add_member", rc);


        /* ---------------- SHARE 2 - Shared memory use  -----------------
         *
         * This child will need to be able to write to our shared memory,
         * so we need to share the memory with the child.
         *
         * --------------------------------------------------------------- */
        rc = dacs_remote_mem_share(rsvd_child_des[i], rsvd_child_pids[i],
                                   hello_remote_mem);
        ERRCHK("PPE: dacs_remote_mem_share", rc);


        /* ---------------- SHARE 3 - Mutex use        -------------------
         *
         * Access to the shared memory above is synchronized, so this child
         * must also have access to the protecting mutex.  Share the mutex
         * with the child.
         *
         * --------------------------------------------------------------- */
        rc = dacs_mutex_share(rsvd_child_des[i], rsvd_child_pids[i],
                              hello_mutex);
        ERRCHK("PPE: dacs_mutex_share", rc);


        /* --------------------- MAILBOX #1 - Write ---------------------
         *
         * We should tell each child what index they are so we know who is 
         * saying hello.
         *
         * This is a good opportunity to use a mailbox.  Mailboxes are good 
         * for sending short messages an int at a time.  Mailbox communication
         * is synchronous, so the mail recipient (reader) will block until the
         * sender (writer) has posted a message.  
         *
         * Note: Just like the above resource sharing, mailbox reads and 
         * writes must also be matched up between the participants.  If one 
         * is missing it could result in an application hang or data 
         * corruption.
         *
         * -------------------------------------------------------------- */
        dacs_mailbox_write(&i, rsvd_child_des[i], rsvd_child_pids[i]);
        ERRCHK("PPE: dacs_mailbox_write", rc);
    }

    /*
     * The parent PPE is going to participate in the process synchronization
     * as well.  Membership is not automatic, so the parent must explicitly
     * add itself to the group.  Acceptance is implied when adding itself, so
     * no accept call is needed.
     */
    rc = dacs_group_add_member(DACS_DE_SELF, DACS_PID_SELF, hello_group);
    ERRCHK("PPE: dacs_group_add_member", rc);

    /*
     * We have added all the group members, but we can't use the group until
     * it has been closed.  Collective group operations, such as barriers, 
     * will block until the group is closed.
     */
    rc = dacs_group_close(hello_group);
    ERRCHK("PPE: dacs_group_close", rc);


    /* ---------------------------- BARRIER #1 --------------------------- 
     *
     * The group is closed and we are ready to roll.  Let's wait until all 
     * of the children get to this point where they are fully initialized.
     *
     * A barrier wait is used as a synchronizing point.  Progress can't be
     * made past the barrier until all group participants have reached a
     * cooresponding barrier.
     *
     * ------------------------------------------------------------------- */
    dacs_barrier_wait(hello_group);
    ERRCHK("PPE: dacs_barrier_wait", rc);


    /* ---------------------------- BARRIER #2 --------------------------- 
     *
     * Wait for all the children to update the shared memory data with the
     * name of the place to say hello to.
     *
     * ------------------------------------------------------------------- */
    dacs_barrier_wait(hello_group);
    ERRCHK("PPE: dacs_barrier_wait", rc);


    /* ------------------------ MESSAGE #1 - Send ------------------------
     *
     * All the children have updated the shared memory with their place to say
     * hello to.  Now, loop through and send each child a message of what
     * message to display, using the shared data they supplied.
     *
     * Note: Sends and receives are matched according to their stream ID and
     * DE/PID pair.  Use of unique IDs between message transactions allows
     * sends and receives to be taken out of order.  Message transactions
     * using the same stream ID, must be ordered.
     *
     * ------------------------------------------------------------------- */
    for (i = 0; i < num_rsvd_child; i++) {
        sprintf(hello_message, "Hello %s", hello_shared_mem.place_array[i]);
        rc = dacs_send(hello_message, strlen(hello_message)+1, 
                       rsvd_child_des[i], rsvd_child_pids[i], HELLO_STREAM_ID, 
                       hello_wid, DACS_BYTE_SWAP_DISABLE);
        ERRCHK("PPE: dacs_send", rc);
    }

    /*
     * As usual, the send is asynchronous, so we need to wait.  In this case,
     * all the sends are done under the same wid, so we can do just a single
     * wait for all the sends to complete.
     */
    rc = dacs_wait(hello_wid);
    ERRCHK("PPE: dacs_wait", rc);


    /* ---------------------------- BARRIER #3 --------------------------- 
     *
     * Wait for all the children to receive their hello messages.
     *
     * ------------------------------------------------------------------- */
    rc = dacs_barrier_wait(hello_group);
    ERRCHK("PPE: dacs_barrier_wait", rc);


    /*
     * The children are printing their messages.  Let's wait for them to
     * finish, so we can clean-up and exit.  
     *
     * There are two ways to determine whether a process has completed:
     *      1) A call to dacs_de_wait can be used to block until the specified
     *         DE/PID has terminated.  This call will return with status when 
     *         the child has either failed or completed successfully.
     *      2) A call to dacs_de_test can be used to periodically check the
     *         current state of a DE/PID.  This call returns the same status
     *         as wait with the exception that it could also return that the
     *         process is still running.
     *
     * Note: After a non-running status is returned from test or wait 
     *       completes, the DE/PID is considered to be gone.  Calls thereafter
     *       will return invalid PID.  So, save your status.
     *
     * Note: Unlike most of the other DaCS API, the dacs_de_wait/test calls
     *       return positive status indicating the state of the tested/waited
     *       DE/PID.  These status are both for failures and successes.
     */
    for (i = 0; i < num_rsvd_child; i++) {
        /*
         * For demonstration purposes, half of the children will be waited on
         * using dacs_de_wait, the other half with dacs_de_test.  This is not a
         * normal practice.
         */
        if ((i % 2) == 0) {
            /*
             * This half of the children we will wait on by using the blocking
             * wait call.  We are stuck until the child completes.
             */
            rc = dacs_de_wait(rsvd_child_des[i], rsvd_child_pids[i], &status);
            if (rc < 0)
                PRINTF("dacs_de_wait failed - %s\n", dacs_strerror(rc));
        }
        else {
            /*
             * This half of the children we will wait on by polling the
             * child's state using dacs_de_test.  In between status checks we
             * could be doing real work.
             */
            while ((rc = dacs_de_test(rsvd_child_des[i], 
                                      rsvd_child_pids[i], &status)) == 
                                                    DACS_STS_PROC_RUNNING) {
                /*
                 * Do real work here
                 */
            }
            if (rc < 0)
                PRINTF("dacs_de_test failed - %s\n", dacs_strerror(rc));
        }
    }


    /*
     * All the children are done and we are heading out, but before we do so
     * we need to release all the shared resources we created in the
     * beginning.
     *
     * Note: Resources do not need to be released in any particular order.
     * 
     * Note: Releasing the various resources causes the handles to be
     *       invalidated, so don't attempt to use them after the release.
     */
    rc = dacs_group_destroy(&hello_group);
    ERRCHK("PPE: dacs_group_destroy", rc);

    rc = dacs_remote_mem_destroy(&hello_remote_mem);
    ERRCHK("PPE: dacs_remote_mem_destroy", rc);

    rc = dacs_mutex_destroy(&hello_mutex);
    ERRCHK("PPE: dacs_mutex_destroy", rc);

    rc = dacs_wid_release(&hello_wid);
    ERRCHK("PPE: dacs_wid_reserve", rc);

    /*
     * So, everything is cleaned up and all the children have completed.  We
     * are safe to close down DaCS.  This will cause DaCS to tear-down all
     * internal structures and data.  DaCS API calls (other than
     * dacs_runtime_init) will fail after this has been called.
     */
    dacs_runtime_exit();

    return 0;

}

uint32_t
hello_error_handler(dacs_error_t error)
{
    de_id_t de;
    dacs_process_id_t pid;
    const char *str;
    uint32_t code;

    /*
     * Use the dacs error interfaces to determine the details about the
     * error. 
     */
    dacs_error_code(error, &code);
    dacs_error_str(error, &str);
    dacs_error_de(error, &de);
    dacs_error_pid(error, &pid);
    PRINTF("PPE: Uh-oh ... we took a %s (%d) error on DE/PID 0x%x/0x%lx\n",
            str, code, de, pid);

    return 0;
}

