/* --------------------------------------------------------------- */
/* (C) Copyright 2001,2006,                                        */
/* International Business Machines Corporation,                    */
/*                                                                 */
/* All Rights Reserved.                                            */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx                                              */
#include <stdio.h>
#include <string.h>
#include <dacs.h>
#include "dacs_hello.h"

#define NUM_HELLO_PLACES 8
char hello_places[NUM_HELLO_PLACES][16] __attribute__ ((aligned(128))) = 
                                           {"World",
                                            "USA",
                                            "Earth",
                                            "Austin",
                                            "Texas",
                                            "North America",
                                            "IBM",
                                            "Cell"};

/* ==========================================================================
 *
 * This is a convoluted version of the "Hello World" program using most of the
 * DaCS on cell APIs.  Obviously, there are simpler means for doing the same
 * thing, but this is intended to demonstrate the use of the various DaCS
 * APIs.
 *
 * This is the main child (SPE) function executed by all children.  The steps
 * the children will perform are as follows:
 *
 *  1) Initialize DaCS on the child (SPE).
 *  2) Accept the shared resources, shared by the parent, needed for this 
 *     application:
 *      a) group (hello_group) used to synchronize this child with the parent 
 *         and the other children.
 *      b) shared memory (hello_shared_mem) contains the shared data
 *         containing the places the child will say hello to.
 *      c) reserve a wait id (hello_wid) for waiting on synchronous operations
 *         such as DMAs and messages.
 *      d) mutex (hello_mutex) is used to synchronize access to the shared
 *         data (hello_shared_mem).
 *      e) read our mailbox, waiting until the parent sends us our ID. 
 *  3) Wait until the parent and other children have initialized.
 *  4) Update the shared data with our place:
 *      a) Lock the shared data, using the mutex, to get exclusive access
 *      b) DMA the remote shared data into local memory
 *      c) Wait until the incoming DMA completes.
 *      d) Update the local shared data places index.  This index indicates
 *         the last index used, so the next child which index to use.
 *      e) Fill in the local shared data place for this SPE.
 *      f) DMA the local data out to the remote memory.
 *      g) Wait until the outgoing DMA completes.
 *      h) Unlock the shared data
 *  5) Wait untill all the other children have updated the shared memory.
 *  6) Receive the hello message to display, sent by the parent (PPE).
 *  7) Wait until the message has been received.
 *  8) Wait until all children have received their message to display.
 *  9) Print our hello message
 * 10) Release all the resources accepted or reserved in the beginning.
 * 11) If we are index 0, we intentionally induce an error by releasing our
 *     wid too many times. 
 * 12) Exit
 *
 *
 * Operation Matching:
 * -------------------
 * Many of the DaCS operations require multiple participants and handshakes.
 * Cooresponding operations are denoted with matching titles for easy
 * matching between the PPE and SPE code.  For example, if you want to find
 * the barrier that cooresponds to "BARRIER #2" in the PPE code, simply search
 * for "BARRIER #2".
 *
 * =========================================================================*/
int
main()
{
    DACS_ERR_T rc;
    dacs_group_t hello_group;
    dacs_remote_mem_t hello_remote_mem;
    dacs_mutex_t hello_mutex;
    dacs_wid_t hello_wid;
    unsigned int idx, next_place;
    hello_shared_data_t hello_shared_mem;
    uint64_t hello_shared_mem_size;

    /*
     * Remember that all DaCS programs must begin with initializing DaCS.
     * This includes the children (SPE) programs.  If we fail to initialize,
     * most of the APIs will return an error indicating so.
     */
    rc = dacs_runtime_init(NULL, NULL);
    ERRCHK("SPE: dacs_runtime_init", rc);


    /*
     * The parent is going to share resources and information needed within
     * the application.  
     *
     * Note: Share/accept operations require a handshake between the two
     * participants involved, so it is critical that the ordering be the same
     * on both ends.  Otherwise, performing shares and accepts out of order 
     * will result in a deadlock.
     */

    /* ---------------- ACCEPT 1 - Group membership -------------------
     *
     * In this application, we know that the parent is going to first create
     * and add children to a group.  So, we must wait to be added.
     *
     * Accepting membership to a group is blocking until the child has been
     * added.  Being added to a group does not imply that the group has been
     * closed, only that the group membership handshake has been completed.
     *
     * --------------------------------------------------------------- */
    rc = dacs_group_accept(DACS_DE_PARENT, DACS_PID_PARENT, &hello_group);
    ERRCHK("SPE: dacs_group_accept", rc);


    /* ---------------- ACCEPT 2 - Shared memory use -----------------
     *
     * Next, our parent is going to share a memory region with us.  This area
     * will be used by the parent and all other children.
     *
     * --------------------------------------------------------------- */
    rc = dacs_remote_mem_accept(DACS_DE_PARENT, DACS_PID_PARENT, 
                                &hello_remote_mem);
    ERRCHK("SPE: dacs_remote_mem_accept", rc);


    /*
     * Before we go and use the remote shared memory, we should determine how
     * big it is.  There are 3 attributes that can be queried: size, address
     * and permissions.  In this case we only need the size.
     */
    rc = dacs_remote_mem_query(hello_remote_mem, DACS_REMOTE_MEM_SIZE,
                               &hello_shared_mem_size);

    /*
     * In order to do DMA or send/receive operations, we need a wait ID (wid).
     * This gives us a means to identify which asynchronous operations to wait
     * for completion on.
     *
     * This is not an "accept" operation, but it shows that things can be done
     * between accepts.  Keep in mind that the parent is blocked waiting for
     * the accept, and it is not nice to keep others waiting.
     */
    rc = dacs_wid_reserve(&hello_wid);
    ERRCHK("SPE: dacs_wid_reserve", rc);


    /* ---------------- ACCEPT 3 - mutex use         -----------------
     *
     * Last, our parent is going to share a mutex with us.  This mutex will be
     * used for synchronizing data access to the shared memory region that we 
     * accepted above.
     *
     * --------------------------------------------------------------- */
    rc = dacs_mutex_accept(DACS_DE_PARENT, DACS_PID_PARENT, &hello_mutex);
    ERRCHK("SPE: dacs_mutex_accept", rc);


    /* --------------------- MAILBOX #1 - Read --------------------------
     *
     * Next, our parent is going to tell us what our ID is, so check our
     * mailbox.  Mailboxes are synchronous, so we'll block until we have mail.
     * Our ID is used as an index into the shared data places array, for
     * indicating where we would like to say hello to.
     * 
     * Note: that we must indicate which DE, in this case our parent, is
     * sending us mail.
     *
     * Note: Just like the above resource sharing, mailbox reads and 
     * writes must also be matched up between the participants.  If one 
     * is missing it could result in an application hang or data 
     * corruption.
     *
     * ------------------------------------------------------------------ */
    rc = dacs_mailbox_read(&idx, DACS_DE_PARENT, DACS_PID_PARENT);
    ERRCHK("SPE: dacs_mailbox_read", rc);


    /* ---------------------------- BARRIER #1 --------------------------- 
     *
     * Wait for all the other children and the parent to get to this point
     * where we know they are fully initialized.  
     *
     * By waiting on the barrier we know two things have happened: 
     *      1) The group has been closed and 
     *      2) All group members have reached their corresponding barrier.
     *
     * ------------------------------------------------------------------- */
    rc = dacs_barrier_wait(hello_group);
    ERRCHK("SPE: dacs_barrier_wait", rc);
    

    /*
     * Now, the child needs to tell the parent who it wants to say hello to.
     * We will DMA this "who" into the shared DMA region, so the parent can
     * see it.  This is based on a counter in the shared memory, which we also
     * need to update.
     *
     * First we must get exclusive access to the remote shared memory, so that
     * we don't lose any data and we can increment the counter.  To do this we
     * will use the mutex that was shared with use up above.
     */
    rc = dacs_mutex_lock(hello_mutex);
    ERRCHK("SPE: dacs_mutex_lock", rc);

    /*
     * Now, the child is guaranteed exclusive access, so DMA the remote 
     * shared memory into our local buffer hello_shared_mem.  The data being
     * fetched is a structure that contains a place index (index into the list
     * of places to say hello to) and an array containing each SPE child's
     * place to say hello to.
     *
     * We are required to update and write to both a shared location in the
     * shared data region as well as to a private location.  For this reason,
     * we fetch the entire shared region as a whole and write it out as a
     * whole.  Since we access the data in this way, we use an offset of 0 to
     * the DaCS DMA commands. 
     *
     * If all children were only accessing only their private area, we could
     * simply adjust the DMA offset for our own data, and read/write a smaller
     * portion of the data.
     */
    rc = dacs_get(&hello_shared_mem, hello_remote_mem, 0, 
                  hello_shared_mem_size, hello_wid, 
                  DACS_ORDER_ATTR_NONE, DACS_BYTE_SWAP_DISABLE);
    ERRCHK("SPE: dacs_get", rc);

    /*
     * Every DMA or message passing call must be followed by a wait/test
     * call.  This is how we know that our data has arrived and is safe to
     * consume.  Here we are using the blocking wait call that returns once
     * the DMAs associated with hello_wid have completed.
     */
    rc = dacs_wait(hello_wid);
    ERRCHK("SPE: dacs_wait", rc);

    /*
     * Update local copy of the place index to the next index 
     * (rolling count 0 -> NUM_HELLO_PLACES-1).  Then, lookup the place
     * corresponding to the index and copy it to the child's index within the
     * local place array.
     */
    next_place = (hello_shared_mem.last_place + 1) % NUM_HELLO_PLACES;
    hello_shared_mem.last_place = next_place;
    strcpy(hello_shared_mem.place_array[idx], hello_places[next_place]);

    /*
     * The local copy of the shared remote memory data has been updated to 
     * include this child's hello place and the last place index has been
     * incremented.  Now, we can write the updated copy back out to the remote
     * location.
     *
     * We are required to update and write to both a shared location in the
     * shared data region as well as to a private location.  For this reason,
     * we fetch the entire shared region as a whole and write it out as a
     * whole.  Since we access the data in this way, we use an offset of 0 to
     * the DaCS DMA commands. 
     *
     * If all children were only accessing only their private area, we could
     * simply adjust the DMA offset for our own data, and read/write a smaller
     * portion of the data.
     */
    rc = dacs_put(hello_remote_mem, 0, &hello_shared_mem,
                  hello_shared_mem_size, hello_wid,
                  DACS_ORDER_ATTR_NONE, DACS_BYTE_SWAP_DISABLE);
    ERRCHK("SPE: dacs_put", rc);

    /*
     * Again, we need to follow every DMA with a wait/test call.  In this case 
     * we are going to call test in a loop, waiting for a non-busy return.
     * This is equivalent to doing a wait, except it allows us to perform
     * other work while we are waiting.
     */
    while ((rc = dacs_test(hello_wid)) == DACS_WID_BUSY) {
        /* 
         * Do some real work here
         */
    }
    ERRCHK("SPE: dacs_test", rc);

    /*
     * Ok, the shared remote data has been updated for all to see.  We can now
     * release the exclusive use of the memory by unlocking it.
     */
    rc = dacs_mutex_unlock(hello_mutex);
    ERRCHK("SPE: dacs_mutex_unlock", rc);


    /* ---------------------------- BARRIER #2 --------------------------- 
     *
     * Wait for all the children to update the shared data.
     *
     * ------------------------------------------------------------------- */
    rc = dacs_barrier_wait(hello_group);
    ERRCHK("SPE: dacs_barrier_wait", rc);


    /* ------------------------ MESSAGE #1 - Receive ------------------------
     *
     * The parent is now going to send the child the hello message to display.
     * Calling receive tells DaCS where to receive the incoming asynchronous
     * message.
     *
     * Note: Sends and receives are matched according to their stream ID and
     * DE/PID pair.  Use of unique IDs between message transactions allows
     * sends and receives to be taken out of order.  Message transactions
     * using the same stream ID, must be ordered.
     *
     * ------------------------------------------------------------------- */
    rc = dacs_recv(hello_message, sizeof(hello_message), DACS_DE_PARENT, 
                   DACS_PID_PARENT, HELLO_STREAM_ID, hello_wid, 
                   DACS_BYTE_SWAP_DISABLE);
    ERRCHK("SPE: dacs_recv", rc);


    /*
     * Like put/get, messages are asynchronous, so me must call wait/test to
     * determine whether the message has been successfully received.  We will
     * block here until our message (or an error) is received.
     */
    rc = dacs_wait(hello_wid);
    ERRCHK("SPE: dacs_wait", rc);


    /* ---------------------------- BARRIER #3 --------------------------- 
     *
     * Wait for all the children to receive their message to display.
     *
     * ------------------------------------------------------------------- */
    rc = dacs_barrier_wait(hello_group);
    ERRCHK("SPE: dacs_barrier_wait", rc);

    /*
     * Woo-hoo!!  We can print our hello message.  We also print our index.
     */
    PRINTF("SPE %d: %s\n", idx, hello_message);

    /*
     * Well, we are done doing our work and we are on our way out.  Before we
     * leave, however, we must first release all the shared resources we
     * accepted in the beginning of the application. 
     *
     * Note: Resources do not need to be released in any particular order.
     * 
     * Note: Releasing the various resources causes the handles to be
     *       invalidated, so don't attempt to use them after the release.
     */
    rc = dacs_remote_mem_release(&hello_remote_mem);
    ERRCHK("SPE: dacs_remote_mem_release", rc);

    rc = dacs_group_leave(&hello_group);
    ERRCHK("SPE: dacs_group_leave", rc);

    rc = dacs_mutex_release(&hello_mutex);
    ERRCHK("SPE: dacs_mutex_release", rc);

    rc = dacs_wid_release(&hello_wid);
    ERRCHK("SPE: dacs_wid_reserve", rc);

    /*
     * Test the error handler that the parent registered.  This will
     * demonstrate how and what the error services do.
     *
     * By attempting to release the wid a second time we should induce a
     * DACS_ERR_INVALID_WID failure on the child.
     */
    if (idx == 0) {
        sleep(1);
        PRINTF("SPE %d: **** Inducing a DACS_ERR_INVALID_WID "
               "failure... ****\n", idx);
        rc = dacs_wid_release(&hello_wid);
        ERRCHK("SPE: dacs_wid_reserve", rc);
    }

    /*
     * Well, it has been fun, but it is time to leave.  Make sure that we
     * close down DaCS before returning.  This makes sure that certain
     * resources have been released and prevents leaks within DaCS.
     *
     * Of course, by leaving, you won't be allowed to use any of the DaCS API
     * services as DaCS has essentially gone away.
     */
    dacs_runtime_exit();


    return 0;
}

