/*
 * dataflow.c
 *
 * stuff that controls the dataflow of operators between lfBegin/lfEnd
 */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include "lightfield.h"

#define	VV_POS		5
#define	UV_POS		6

/* post-mortem for a point processing at a time */
void
lfSerialUpdatePoint(LFSlab **slabs, int nslab, struct LFInternOp *op)
{
    LFShared *sh = slabs[op->op_pos[LF_SLAB_POS]]->shared;

    if  (++op->op_pos[LF_T_POS] == sh->nt)  {
	op->op_pos[LF_T_POS] = 0;
	++op->op_pos[LF_S_POS];
    }
    op->op_output += op->op_output_size;
}

/* post-mortem for a block-point processing at a time */
void
lfSerialUpdateBlockPoint(LFSlab **slabs, int nslab, struct LFInternOp *op)
{
    LFShared *sh = slabs[op->op_pos[LF_SLAB_POS]]->shared;

    if  (++op->op_pos[LF_T_POS] == sh->nt)  {
	op->op_pos[LF_T_POS] = 0;
	if  (++op->op_pos[LF_S_POS] == sh->ns)  {
	    LFVQCodebook *vq = sh->vq;
	    int tile_v_msk = (1 << vq->vbits) - 1;

	    op->op_pos[LF_S_POS] = 0;
	    if  (! (++op->op_pos[LF_V_POS] & tile_v_msk))  {
		op->op_pos[LF_V_POS] = op->op_pos[VV_POS];
		++op->op_pos[LF_U_POS];
	    }
	}
    }
    op->op_output += op->op_output_size;
}


/* post-mortem for processing without need to do anything */
void
lfUpdateNoop(LFSlab **slabs, int nslab, struct LFInternOp *op)
{
}

/* post-mortem for a block-slice processing at a time */
void
lfSerialUpdateBlockSlice(LFSlab **slabs, int nslab, struct LFInternOp *op)
{
    LFVQCodebook *vq = slabs[op->op_pos[LF_SLAB_POS]]->shared->vq;
    int tile_v_msk = (1 << vq->vbits) - 1;

    if  (! (++op->op_pos[LF_V_POS] & tile_v_msk))  {
	op->op_pos[LF_V_POS] = op->op_pos[VV_POS];
	++op->op_pos[LF_U_POS];
    }
    op->op_output += op->op_output_size;
}

static int
slab_train_slices(LFSlab *slab)
{
    LFShared *sh = slab->shared;
    LFVQCodebook *vq = sh->vq;
    int msk = (1 << (vq->ubits + vq->vbits)) - 1;
    int n = sh->nu * sh->nv * vq->train_pct;
    return ((n ? n : 1) + msk) & ~msk;
}

static int
slabs_train_size(LFSlab **slabs, int nslab)
{
    LFShared *sh;
    int total_size;
    int i;

    for ( total_size=i=0 ; i<nslab ; i++ ) {
	sh = slabs[i]->shared;
	total_size += sh->ns * sh->nt * sh->sample_size *
		      slab_train_slices(slabs[i]);
    }

    return total_size;
}

/*
 * function to handle all stuff related to dataflow of a slab.
 * there is no need to call this function multiple times if all
 * slabs share the same attributes (except the data)
 */
static void
determine_slab_dataflow(LFSlab **slabs, int nslab, LFOps *ops)
{
    LFSlab *slab = slabs[ops->chain_pos[LF_SLAB_POS]];
    LFShared *sh = slab->shared;
    LFVQCodebook *vq = sh->vq;
    LFInternOp *op;
    bool_t do_block = ops->chain_msk&(LF_GEN_VQ_TRAINSET|LF_GEN_VQ_CODEARRAY);
    int i;

    /* define input storage, input/output size, loop count of pipe stages */
    for ( i=0,op=ops->chain_ops ; i<ops->chain_cnt ; i++,op++ ) {
	if  (op->op_input_alloced) {
	    free(op->op_input_alloced);
	    op->op_input_alloced = NULL;
	}

	switch(op->op_id) {
	case LF_OP_CREATE:
	    op->op_input_size = -1;		/* DON'T CARE */
	    switch(op->op_method) {
	    case LF_POINT:
		op->op_output_size = sh->sample_size;
		op->op_loop = sh->ns * sh->nt *
			      (do_block ? (1 << (vq->ubits + vq->vbits)) : 1);
		break;

	    case LF_SLICE_ST:
		op->op_output_size = sh->ns * sh->nt * sh->sample_size;
		op->op_loop = do_block ? (1 << (vq->ubits + vq->vbits)) : 1;
		break;
	    }
	    break;

	case LF_OP_MEM_INPUT:
	    op->op_input_size = -1;		/* DON'T CARE */
	    op->op_output_size = sh->ns * sh->nt * sh->sample_size;
	    op->op_loop = do_block ? (1 << (vq->ubits + vq->vbits)) : 1;
	    break;
	    
	case LF_OP_SHUFFLE:
	    op->op_input_size = (sh->ns * sh->nt * sh->sample_size)
				<< (vq->ubits + vq->vbits);
	    op->op_input_alloced = calloc(op->op_input_size, 1);
	    op->op_output_size = op->op_input_size;
	    op->op_loop = 1;
	    break;

	case LF_OP_COMPRESS:
	    op->op_input_size = (sh->ns * sh->nt * sh->sample_size)
				<< (vq->ubits + vq->vbits);
	    op->op_input_alloced = calloc(op->op_input_size, 1);
	    op->op_output_size = (sh->ns * sh->nt * sizeof(short))
				>> (vq->sbits + vq->tbits);
	    op->op_loop = 1;
	    break;

	case LF_OP_WRITE:
	    op->op_input_size = ! vq ? (sh->ns * sh->nt * sh->sample_size) :
		((sizeof(short)*sh->ns*sh->nt) >> (vq->sbits+vq->tbits));
	    if  (ops->chain_cnt > 1)
		op->op_input_alloced = malloc(op->op_input_size);
	    op->op_output_size = -1;		/* DON'T CARE */
	    op->op_loop = 1;
	    break;

	case LF_OP_READ:
	    op->op_input_size = -1;		/* DON'T CARE */
	    switch(op->op_method) {
	    case LF_LIGHTFIELD:
		op->op_output_size = ! vq ?
		    (sh->ns * sh->nt * sh->sample_size):
		    ((sh->ns * sh->nt * sizeof(short)) >> (vq->sbits+vq->tbits));
		op->op_loop = do_block ? (1 << (vq->ubits+vq->vbits)) : 1;
		break;

	    case LF_SLICE_ST:
		op->op_output_size = sh->ns * sh->nt * sh->sample_size;
		op->op_loop = do_block ? (1 << (vq->ubits + vq->vbits)) : 1;
		break;
	    }
	    break;

	case LF_OP_COMPRESS_TRAIN:
	    op->op_input_size = slabs_train_size(slabs, nslab);
	    op->op_output_size = (vq->size * sh->sample_size) <<
				(vq->ubits + vq->vbits + vq->sbits + vq->tbits);
	    op->op_loop = 1;
	    break;
	}
    }

    /* allocate destination */
    switch(ops->chain_dst) {
    case LF_DST_SLABMEM:
	if  (ops->chain_mem)
	    free(ops->chain_mem);
	ops->chain_mem = malloc(! vq ?
		(sh->nu * sh->nv * sh->ns * sh->nt * sh->sample_size) :
	    	((sh->nu * sh->nv * sh->ns * sh->nt * sizeof(short)) >>
		 (vq->ubits + vq->vbits + vq->sbits + vq->tbits)));
	break;

    case LF_DST_DFMEM:
	if  (ops->chain_pos[LF_SLAB_POS])
	    break;
	if  (ops->chain_mem)
	    free(ops->chain_mem);
	ops->chain_mem = malloc(slabs_train_size(slabs, nslab));
	break;
    }
}

static void
init_dataflow(LFOps *ops)
{
    LFInternOp *op = ops->chain_ops;
    int i;

    if  (  op->op_id != LF_OP_CREATE
	&& op->op_id != LF_OP_READ
	&& op->op_id != LF_OP_COMPRESS_TRAIN
	)  {
	int n;

	for ( i=n=0 ; i<ops->op_slab_cnt ; i++ )
	    if  (ops->op_slabs[i]->lightfield) {
		ops->op_slabs[n++] = ops->op_slabs[i];
	    }
	ops->op_slab_cnt = n;
    }

    /* set slab attributes */
    for ( i=0 ; i<ops->chain_cnt ; i++,op++ )  {
	switch(op->op_id) {
	case LF_OP_CREATE:
	    op->op_aux = lfBeginCreate(ops, op);
	    break;

	case LF_OP_READ:
	    switch(op->op_method) {
	    case LF_LIGHTFIELD:
		op->op_aux = lfBeginReadLif(ops, op);
		break;

	    case LF_SLICE_ST:
		op->op_aux = lfBeginReadSliceFile(ops, op);
	    	break;
	    }
	    break;

	case LF_OP_MEM_INPUT:
	    op->op_func   = lfMemInputSlice;
	    op->op_end    = lfEndDefault;
	    op->op_update = ops->chain_msk &
			    (LF_GEN_VQ_TRAINSET|LF_GEN_VQ_CODEARRAY) ?
			    lfSerialUpdateBlockSlice : lfUpdateNoop;
	    break;
	    
	case LF_OP_SHUFFLE:
	    op->op_aux  = lfBeginMemShuffle(ops, op);
	    break;

	case LF_OP_COMPRESS_TRAIN:
	    op->op_func  = lfVQGenCodebook;
	    op->op_end   = lfEndDefault;
	    op->op_update = lfUpdateNoop;
	    break;

	case LF_OP_COMPRESS:
	    op->op_func  = lfVQCompress;
	    op->op_end   = lfEndVQCompress;
	    op->op_update = lfUpdateNoop;
	    break;

	case LF_OP_DRAW:
	    op->op_aux  = lfBeginDrawFB(ops, op);
	    break;

	case LF_OP_WRITE:
	    op->op_aux = lfBeginWriteLif(ops, op);
	    break;
	}
    }

    /* initialize position */
    ops->chain_pos[LF_SLAB_POS] =
    ops->chain_pos[LF_U_POS]    =
    ops->chain_pos[LF_V_POS]    =
    ops->chain_pos[LF_S_POS]    =
    ops->chain_pos[LF_T_POS]    =
    ops->chain_pos[UV_POS]	= 0;
}

static void
cleanup_dataflow(LFOps *ops)
{
    LFInternOp *op;
    int i;

    for ( i=0,op=ops->chain_ops ; i<ops->chain_cnt ; i++,op++ )
	if  (op->op_input_alloced)  {
	    free(op->op_input_alloced);
	    op->op_input_alloced = NULL;
	}
}

void
lfMemInputSlice(LFSlab **slabs, int nslab, const int *pos,
	const float *descr, const void *input, int input_size,
	void *output, int output_size, void *aux_ptr)
{
    LFSlab *slab = slabs[pos[LF_SLAB_POS]];
    LFShared *sh = slab->shared;
    int slice_size = sh->ns * sh->nt * sh->sample_size;
    char *start_addr = (char *)slab->lightfield +
		       slice_size * (pos[LF_U_POS]*sh->nv+pos[LF_V_POS]);
    assert(output_size == slice_size);
    memcpy(output, start_addr, slice_size);
}

/*
 * function to maintain the underlying mechanism of dataflow.
 */
static bool_t
update_dataflow(LFSlab **slabs, int nslab, LFOps *ops)
{
    int pos = ops->chain_pos[LF_SLAB_POS];
    LFSlab *slab;
    LFShared *sh;
    LFVQCodebook *vq;
    LFInternOp *op_src = &ops->chain_ops[0];
    LFInternOp *op_dst = &ops->chain_ops[ops->chain_cnt-1];
    LFInternOp *op;
    int i;
    bool_t gen_vq_trainset = ops->chain_msk & LF_GEN_VQ_TRAINSET;
    bool_t gen_vq_codebook = ops->chain_msk & LF_GEN_VQ_CODEBOOK;
    bool_t gen_vq_codearray = ops->chain_msk & LF_GEN_VQ_CODEARRAY;

    /* populate position */
    for ( i=0,op=ops->chain_ops ; i<ops->chain_cnt ; i++,op++ ) {
	op->op_pos[LF_SLAB_POS] = ops->chain_pos[LF_SLAB_POS];
	op->op_pos[LF_U_POS]    = ops->chain_pos[LF_U_POS];
	op->op_pos[LF_V_POS]    = op->op_pos[VV_POS] = ops->chain_pos[LF_V_POS];
	op->op_pos[LF_S_POS]    = op->op_pos[LF_T_POS] = 0;
    }

    if  (ops->chain_msk & LF_NEW_SLAB)  {
	if  (! pos)
	    determine_slab_dataflow(slabs, nslab, ops);
	else {
	    switch(ops->chain_dst) {
	    case LF_DST_SLABMEM:
		/* save lightfield data if needed */
		slabs[pos-1]->lightfield = ops->chain_mem;
		ops->chain_mem = NULL;
		if  (pos >= nslab)	/* termination criterion */
		    return FALSE;
		determine_slab_dataflow(slabs, nslab, ops);
		break;

	    case LF_DST_SLABVQ:
		assert(ops->chain_mem);
		free(ops->chain_mem);
		ops->chain_mem = 0;
		return FALSE;

	    default:
		if  (pos >= nslab)	/* termination criterion */
		    return FALSE;
		if  (! (ops->chain_msk & LF_SHARED))
		    determine_slab_dataflow(slabs, nslab, ops);
		break;
	    }
	}
    }

    slab = slabs[pos];
    sh = slab->shared;
    vq = sh->vq;

    /* trigger source */
    switch(ops->chain_src) {
    case LF_SRC_SLABMEM:
	/* do nothing here */
	break;

    case LF_SRC_DFMEM:
	op_src->op_input = ops->chain_mem;
	break;

    case LF_SRC_FILE:
    case LF_SRC_CREATE:
	op_src->op_input = ops->chain_input;
	break;
    }

    /* populate output stages (using input of next operators) */
    for ( i=0,op=ops->chain_ops ; i<ops->chain_cnt-1 ; i++,op++ ) {
	op->op_output = (op+1)->op_input_alloced;
    }
    /* populate input stages */
    for ( i=1,op=ops->chain_ops+i ; i<ops->chain_cnt ; i++,op++ ) {
	op->op_input = op->op_input_alloced;
    }

    /* collect data */
    switch(ops->chain_dst) {
    case LF_DST_SLABMEM:
	if  (ops->chain_msk & LF_NEW_SLAB)
	    op_dst->op_output = ops->chain_mem;
	else 
	    op_dst->op_output += op_dst->op_output_size;
	break;

    case LF_DST_DFMEM:
	if  (! pos && (ops->chain_msk & LF_NEW_SLAB))
	    op_dst->op_output = ops->chain_mem;
	else
	    op_dst->op_output += op_dst->op_output_size;
	break;

    case LF_DST_FILE:
    case LF_DST_FRAMEBUF:
	op_dst->op_output = ops->chain_output;
	break;

    case LF_DST_SLABVQ:
	/* do nothing here */
	break;
    }

    /* update position */
    ops->chain_msk &= ~LF_NEW_SLAB;
    if  (gen_vq_codebook)  {
	lfOutput("train VQ...\n");
	ops->chain_msk |= LF_NEW_SLAB;
	ops->chain_pos[LF_SLAB_POS]++;
    }
    else if  (ops->chain_flow == LF_DF_SLAB) {
	lfOutput("dataflow 0x%x: slab %d...\n", ops->chain_msk,
		slabs[ops->chain_pos[LF_SLAB_POS]]->id);
	ops->chain_msk |= LF_NEW_SLAB;
	ops->chain_pos[LF_SLAB_POS]++;
    }
    else if  (gen_vq_trainset) {
	int cnt = slab_train_slices(slab) >> (vq->ubits + vq->vbits);
	int mod_uv = (sh->nu * sh->nv) >> (vq->ubits + vq->vbits);
	int div_v = sh->nv >> vq->vbits;
	int rand_val = random() % mod_uv;

	lfOutput("dataflow 0x%x: block [%d, %d, %d]...\n", ops->chain_msk,
		slabs[ops->chain_pos[LF_SLAB_POS]]->id,
		ops->chain_pos[LF_U_POS], ops->chain_pos[LF_V_POS]);
	ops->chain_pos[LF_U_POS] = (rand_val / div_v) << vq->ubits;
	ops->chain_pos[LF_V_POS] = (rand_val % div_v) << vq->vbits;
	if  (++ops->chain_pos[UV_POS] == cnt)  {
	    ops->chain_pos[UV_POS] = 0;
	    ops->chain_pos[LF_SLAB_POS]++;
	    ops->chain_msk |= LF_NEW_SLAB;
	}
    }
    else if  (gen_vq_codearray || vq) {
	lfOutput("dataflow 0x%x: block [%d, %d, %d]...\n", ops->chain_msk,
		slabs[ops->chain_pos[LF_SLAB_POS]]->id,
		ops->chain_pos[LF_U_POS], ops->chain_pos[LF_V_POS]);
	if  ((ops->chain_pos[LF_V_POS]+=(1<<vq->vbits)) == sh->nv) {
	    ops->chain_pos[LF_V_POS] = 0;
	    if  ((ops->chain_pos[LF_U_POS]+=(1<<vq->ubits)) == sh->nu) {
		ops->chain_pos[LF_U_POS] = 0;
		ops->chain_pos[LF_SLAB_POS]++;
		ops->chain_msk |= LF_NEW_SLAB;
	    }
	}
    }
    else {
	lfOutput("dataflow %d: slice [%d, %d, %d]...\n", ops->chain_msk,
		slabs[ops->chain_pos[LF_SLAB_POS]]->id,
		ops->chain_pos[LF_U_POS], ops->chain_pos[LF_V_POS]);
	if  (++ops->chain_pos[LF_V_POS] == sh->nv) {
	    ops->chain_pos[LF_V_POS] = 0;
	    if  (++ops->chain_pos[LF_U_POS] == sh->nu) {
		ops->chain_pos[LF_U_POS] = 0;
		ops->chain_pos[LF_SLAB_POS]++;
		ops->chain_msk |= LF_NEW_SLAB;
	    }
	}
    }

    return TRUE;
}

static void
dataflow(LFOps *ops)
{
    LFInternOp *op;
    int i, k;

    /* initialize data flow */
    init_dataflow(ops);

    /* trigger source to spit out data for the first time */
    update_dataflow(ops->op_slabs, ops->op_slab_cnt, ops);

    for ( k=0,op=ops->chain_ops ; ; ) {
	/* loop through an operation */
	for ( i=0 ; i<op->op_loop ; i++ ) {
	    (*op->op_func)(ops->op_slabs, ops->op_slab_cnt, op->op_pos,
			op->op_descr, op->op_input, op->op_input_size,
			op->op_output, op->op_output_size, op->op_aux);
	    (*op->op_update)(ops->op_slabs, ops->op_slab_cnt, op);
	}
	
	++k, ++op;
	if  (k == ops->chain_cnt) {
	    k = 0, op = ops->chain_ops;

	    /*
	     * keep pumping source data into the data flow stream until
	     * it's time to quit.
	     */
	    if  (! update_dataflow(ops->op_slabs, ops->op_slab_cnt, ops))
		break;
	}
    }

    /* terminate all ops */
    for ( k=0,op=ops->chain_ops ; k<ops->chain_cnt ; k++,op++ )
	(*op->op_end)(ops, op, op->op_aux);

    /* clean up leftover of dataflow */
    cleanup_dataflow(ops);
}

void
lfEndDefault(LFOps *ops, struct LFInternOp *op, void *aux_ptr)
{
}

/* function to set up an internal operator chain */
static void
set_op_chain(LFOps *ops, int src, int dst, int dataflow,
	bool_t set_slab, int cnt, void *input, void *output)
{
    LFInternOp *op;
    int n_true;
    int i;

    ops->chain_msk &= ~(LF_GEN_VQ_TRAINSET |
			LF_GEN_VQ_CODEBOOK |
			LF_GEN_VQ_CODEARRAY |
			LF_SET_SLAB);

    for ( n_true=i=0,op=ops->chain_ops ; i<cnt ; i++,op++ )  {
	if  (op->op_id == LF_OP_SHUFFLE)  {
	    ops->chain_msk |= LF_GEN_VQ_TRAINSET;
	    n_true++;
	}
	if  (op->op_id == LF_OP_COMPRESS_TRAIN)  {
	    ops->chain_msk |= LF_GEN_VQ_CODEBOOK;
	    n_true++;
	}
	if  (op->op_id == LF_OP_COMPRESS)  {
	    ops->chain_msk |= LF_GEN_VQ_CODEARRAY;
	    if  (ops->chain_msk & LF_GEN_VQ_TRAINSET)  {
		ops->chain_msk &= ~LF_GEN_VQ_TRAINSET;
		n_true--;
	    }
	    n_true++;
	}
    }

    /*
     * cannot do more than one of LF_GEN_VQ_TRAINSET, LF_GEN_VQ_CODEBOOK, 
     * LF_GEN_VQ_CODEARRAY in a single stream.
     */
    assert(n_true <= 1);

    ops->chain_msk   |= set_slab | LF_NEW_SLAB;

    ops->chain_src    = src,
    ops->chain_dst    = dst,
    ops->chain_flow   = dataflow,
    ops->chain_cnt    = cnt,
    ops->chain_input  = input,
    ops->chain_output = output;
}

typedef struct {
    int loc;
    int mode;
} PathSegment;

#define FILLOP(id, method, descr, extra, op) (	\
    (op)->op_id	  = id,				\
    (op)->op_method = method,			\
    (op)->op_descr  = descr,			\
    (op)->op_extra  = extra)

/*
 * function to analyze dataflow of the operations between the lfBegin/lfEnd
 * pair, determine the optimal way of constructing multiple dataflow pipelines
 * to process the data, and execute these pipelines
 */
void
lfDataflowAnalysis(LFOps *ops)
{
    const LFOp *op;
    LFInternOp *iop;
    PathSegment seg[LF_OP_MAX+1];
    int seg_cnt;
    char *op_strs[LF_OP_MAX+1] = {
	"draw", "create", "compress", "read", "write",
    };
    int mode=0;
    int i;

    if  (ops->op_cnt < 2)
	goto done_analysis;

    /*
     * check give-up (no optimization) conditions.
     * as it is, we do not do anything smart if there are two
     * identical operators in between Begin/End.
     */
    for ( i=0,op=ops->ops,mode=0 ; i<ops->op_cnt ; i++,op++ ) {
	if  (mode & (1 << op->op_id))  {
	    lfOutput("lfDataflowAnalysis: multiple %s operators, resort to slow data flow\n", op_strs[i]);
	    goto done_analysis;
	}
	mode |= 1 << op->op_id;
    }

    /* look at one operation at a time */
    for ( i=0,op=ops->ops ; i<ops->op_cnt ; i++,op++ )  {
	switch(op->op_id) {
	case LF_OP_COMPRESS:
	    /*
	     * XXX: making sure that none of the slabs is in compressed
	     * mode now.
	     */
	    break;
	}
    }

    /* look at two operations at a time */
    for ( i=mode=0,op=ops->ops ; i<ops->op_cnt-1 ; i++,op++ )  {
	switch(op->op_id) {
	case LF_OP_CREATE:
	    switch((op+1)->op_id)  {
	    case LF_OP_COMPRESS:
		mode |= LF_DF_CREATE_COMPRESS;
		break;

	    case LF_OP_WRITE:
		mode |= LF_DF_CREATE_WRITE;
		break;
	    }
	    break;

	case LF_OP_COMPRESS:
	    switch((op+1)->op_id)  {
	    case LF_OP_WRITE:
		mode |= LF_DF_COMPRESS_WRITE;
		break;
	    }
	    break;

	case LF_OP_READ:
	    switch((op+1)->op_id)  {
	    case LF_OP_COMPRESS:
		mode |= LF_DF_READ_COMPRESS;
		break;
	    }
	    break;
	}
    }

    /* look at three operations at a time */
    if  (! (~mode & (LF_DF_CREATE_COMPRESS|LF_DF_COMPRESS_WRITE)))  {
	/* create-compress-write */
	mode &= ~(LF_DF_CREATE_COMPRESS|LF_DF_COMPRESS_WRITE);
	mode |= LF_DF_CREATE_COMPRESS_WRITE;
    }
    else if  (! (~mode & (LF_DF_READ_COMPRESS|LF_DF_COMPRESS_WRITE))) {
	/* read-compress-write */
	mode &= ~(LF_DF_READ_COMPRESS|LF_DF_COMPRESS_WRITE);
	mode |= LF_DF_READ_COMPRESS_WRITE;
    }

done_analysis:
    /* generate a list of operator chains that can be optimized */
    for ( i=seg_cnt=0,op=ops->ops ; i<ops->op_cnt ; seg_cnt++ )  {
	seg[seg_cnt].loc = i;
	switch(op->op_id) {
	case LF_OP_DRAW:
	    seg[seg_cnt].mode = LF_DF_DRAW;
	    i++, op++;
	    break;

	case LF_OP_CREATE:
	    if  (mode & LF_DF_CREATE_COMPRESS_WRITE) {
		seg[seg_cnt].mode = LF_DF_CREATE_COMPRESS_WRITE;
		i += 3, op += 3;
	    }
	    else if (mode & LF_DF_CREATE_COMPRESS) {
		seg[seg_cnt].mode = LF_DF_CREATE_COMPRESS;
		i += 2, op += 2;
	    }
	    else if (mode & LF_DF_CREATE_WRITE) {
		seg[seg_cnt].mode = LF_DF_CREATE_WRITE;
		i += 2, op += 2;
	    }
	    else {
		seg[seg_cnt].mode = LF_DF_CREATE;
		i++, op++;
	    }
	    break;

	case LF_OP_COMPRESS:
	    if  (mode & LF_DF_COMPRESS_WRITE) {
		seg[seg_cnt].mode = LF_DF_COMPRESS_WRITE;
		i += 2, op += 2;
	    }
	    else {
		seg[seg_cnt].mode = LF_DF_COMPRESS;
		i++, op++;
	    }
	    break;

	case LF_OP_WRITE:
	    seg[seg_cnt].mode = LF_DF_WRITE;
	    i++, op++;
	    break;

	case LF_OP_READ:
	    if  (mode & LF_DF_READ_COMPRESS_WRITE) {
		seg[seg_cnt].mode = LF_DF_READ_COMPRESS_WRITE;
		i += 3, op += 3;
	    }
	    else if (mode & LF_DF_READ_COMPRESS) {
		seg[seg_cnt].mode = LF_DF_READ_COMPRESS;
		i += 2, op += 2;
	    }
	    else {
		seg[seg_cnt].mode = LF_DF_READ;
		i++, op++;
	    }
	    break;
	}
    }

    /* construct a list of operator chains */
    for ( i=0 ; i<seg_cnt ; i++ ) {
	op = &ops->ops[seg[i].loc];
	iop = ops->chain_ops;

	switch(seg[i].mode) {
	case LF_DF_CREATE_COMPRESS:
	    /* gather training set from creation */
	    FILLOP(LF_OP_CREATE,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[1]);
	    set_op_chain(ops, LF_SRC_CREATE, LF_DST_DFMEM,
		LF_DF_SLICE_ST, LF_SET_SLAB, 2, op[0].op_extra, NULL);
	    dataflow(ops);

	    /* do training */
	    FILLOP(LF_OP_COMPRESS_TRAIN,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[0]);
	    set_op_chain(ops, LF_SRC_DFMEM, LF_DST_SLABVQ,
		LF_DF_SLAB, 0, 1, NULL, NULL);
	    dataflow(ops);

	    /* do creation/compression */
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[1]);
	    FILLOP(op[1].op_id,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[2]);
	    set_op_chain(ops, LF_SRC_CREATE, LF_DST_SLABMEM,
		LF_DF_SLICE_ST, 0, 3, op[0].op_extra, NULL);
	    break;

	case LF_DF_CREATE_WRITE:
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    FILLOP(op[1].op_id,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[1]);
	    set_op_chain(ops, LF_SRC_CREATE, LF_DST_FILE,
		LF_DF_SLICE_ST, LF_SET_SLAB, 2, op[0].op_extra, op[1].op_extra);
	    break;

	case LF_DF_COMPRESS_WRITE:
	    /* gather training set from slab lightfield */
	    FILLOP(LF_OP_MEM_INPUT, NULL, NULL, NULL, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[1]);
	    set_op_chain(ops, LF_SRC_SLABMEM, LF_DST_DFMEM,
		LF_DF_SLICE_ST, LF_SET_SLAB, 2, NULL, NULL);
	    dataflow(ops);

	    /* do training */
	    FILLOP(LF_OP_COMPRESS_TRAIN,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    set_op_chain(ops, LF_SRC_DFMEM, LF_DST_SLABVQ,
		LF_DF_SLAB, 0, 1, NULL, NULL);
	    dataflow(ops);

	    /* do compression/write */
	    FILLOP(LF_OP_MEM_INPUT, NULL, NULL, NULL, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[1]);
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[2]);
	    FILLOP(op[1].op_id,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[3]);
	    set_op_chain(ops, LF_SRC_SLABMEM, LF_DST_FILE,
		LF_DF_SLICE_ST, 0, 4, NULL, op[1].op_extra);
	    break;

	case LF_DF_READ_COMPRESS:
	    /* gather training set from file */
	    FILLOP(LF_OP_READ,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[1]);
	    set_op_chain(ops, LF_SRC_FILE, LF_DST_DFMEM,
		LF_DF_SLICE_ST, LF_SET_SLAB, 2, op[0].op_extra, NULL);
	    dataflow(ops);

	    /* do training */
	    FILLOP(LF_OP_COMPRESS_TRAIN,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[0]);
	    set_op_chain(ops, LF_SRC_DFMEM, LF_DST_SLABVQ,
		LF_DF_SLAB, 0, 1, NULL, NULL);
	    dataflow(ops);

	    /* do read/compression */
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[1]);
	    FILLOP(op[1].op_id,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[2]);
	    set_op_chain(ops, LF_SRC_FILE, LF_DST_SLABMEM,
		LF_DF_SLICE_ST, 0, 3, op[0].op_extra, NULL);
	    break;

	case LF_DF_CREATE_COMPRESS_WRITE:
	    /* gather training set from creation */
	    FILLOP(LF_OP_CREATE,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[1]);
	    set_op_chain(ops, LF_SRC_CREATE, LF_DST_DFMEM,
		LF_DF_SLICE_ST, LF_SET_SLAB, 2, op[0].op_extra, NULL);
	    dataflow(ops);

	    /* do training */
	    FILLOP(LF_OP_COMPRESS_TRAIN,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[0]);
	    set_op_chain(ops, LF_SRC_DFMEM, LF_DST_SLABVQ,
		LF_DF_SLAB, 0, 1, NULL, NULL);
	    dataflow(ops);

	    /* do creation/compression/write */
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[1]);
	    FILLOP(op[1].op_id,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[2]);
	    FILLOP(op[2].op_id,
		op[2].op_method, op[2].op_descr, op[2].op_extra, &iop[3]);
	    set_op_chain(ops, LF_SRC_CREATE, LF_DST_FILE,
		LF_DF_SLICE_ST, 0, 4, op[0].op_extra, op[2].op_extra);
	    break;

	case LF_DF_READ_COMPRESS_WRITE:
	    /* gather training set from file */
	    FILLOP(LF_OP_READ,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[1]);
	    set_op_chain(ops, LF_SRC_FILE, LF_DST_DFMEM,
		LF_DF_SLICE_ST, LF_SET_SLAB, 2, op[0].op_extra, NULL);
	    dataflow(ops);

	    /* do training */
	    FILLOP(LF_OP_COMPRESS_TRAIN,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[0]);
	    set_op_chain(ops, LF_SRC_DFMEM, LF_DST_SLABVQ,
		LF_DF_SLAB, 0, 1, NULL, NULL);
	    dataflow(ops);

	    /* do read/compression/write */
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[1]);
	    FILLOP(op[1].op_id,
		op[1].op_method, op[1].op_descr, op[1].op_extra, &iop[2]);
	    FILLOP(op[2].op_id,
		op[2].op_method, op[2].op_descr, op[2].op_extra, &iop[3]);
	    set_op_chain(ops, LF_SRC_FILE, LF_DST_FILE,
		LF_DF_SLICE_ST, 0, 4, op[0].op_extra, op[2].op_extra);
	    break;

	case LF_DF_DRAW:
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    set_op_chain(ops, LF_SRC_SLABMEM, LF_DST_FRAMEBUF,
		LF_DF_SLAB, 0, 1, NULL, NULL);
	    break;

	case LF_DF_CREATE:
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    set_op_chain(ops, LF_SRC_CREATE, LF_DST_SLABMEM,
		LF_DF_SLICE_ST, LF_SET_SLAB, 1, op[0].op_extra, NULL);
	    break;

	case LF_DF_COMPRESS:
	    /* gather training set from slab lightfield */
	    FILLOP(LF_OP_MEM_INPUT, NULL, NULL, NULL, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[1]);
	    set_op_chain(ops, LF_SRC_SLABMEM, LF_DST_DFMEM,
		LF_DF_SLICE_ST, 0, 2, NULL, NULL);
	    dataflow(ops);

	    /* do training */
	    FILLOP(LF_OP_COMPRESS_TRAIN,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    set_op_chain(ops, LF_SRC_DFMEM, LF_DST_SLABVQ,
		LF_DF_SLAB, 0, 1, NULL, NULL);
	    dataflow(ops);

	    /* do compression */
	    FILLOP(LF_OP_MEM_INPUT, NULL, NULL, NULL, &iop[0]);
	    FILLOP(LF_OP_SHUFFLE,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[1]);
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[2]);
	    set_op_chain(ops, LF_SRC_SLABMEM, LF_DST_SLABMEM,
		LF_DF_SLICE_ST, 0, 3, NULL, NULL);
	    break;

	case LF_DF_WRITE:
	    FILLOP(LF_OP_MEM_INPUT, NULL, NULL, NULL, &iop[0]);
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[1]);
	    set_op_chain(ops, LF_SRC_SLABMEM, LF_DST_FILE,
		LF_DF_SLICE_ST, 0, 2, NULL, op[0].op_extra);
	    break;

	case LF_DF_READ:
	    FILLOP(op[0].op_id,
		op[0].op_method, op[0].op_descr, op[0].op_extra, &iop[0]);
	    set_op_chain(ops, LF_SRC_FILE, LF_DST_SLABMEM,
		LF_DF_SLICE_ST, LF_SET_SLAB, 1, op[0].op_extra, NULL);
	    break;

	default:
	    lfError("lfDataflowAnalysis: Unknown dataflow mode 0x%x\n", seg[i].mode);
	    exit(1);
	}

	/* do dataflow */
	dataflow(ops);
    }
}
