/* main.c */

#include <stdio.h>
#include <stdlib.h>
#ifndef WINDOWS
#include <unistd.h>
#endif
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <assert.h>

#ifdef WINDOWS
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif

#include <mpi.h>

#include <GL/gl.h>
#include <wiregl/include/wiregl_papi.h>

#include "misc.h"
#include "globals.h"
#include "draw.h"
#include "volume.h"
#include "gui.h"
#include "timer.h"

#if defined(WINDOWS)
#define DATA_ROOT "//radiance/flashg/data/march"
#elif defined(LINUX)
//#define DATA_ROOT "/usr/local/share/wiregl/data/march"
#define DATA_ROOT "/cr/data/march"
#elif defined(IRIX) || defined(IRIX64)
#define DATA_ROOT "/n/radiance/flashg/data/march"
#endif

Globals globals;

u32
ReadU32( const char *str )
{
    u32   x;
    char *end;

    x = (u32) strtoul( str, &end, 0 );
    if ( end == str || *end != '\0' ) 
	{
        xfatal( "\"%s\" doesn't look like an integer", str );
    }

	return x;
}

float
ReadFloat( const char *str )
{
    double x;
    char *end;

    x = strtod( str, &end );
    if ( end == str || *end != '\0' ) 
	{
        xfatal( "\"%s\" doesn't look like a float", str );
    }

	return (float) x;
}

void
ReadQuat( char *argv[], float *q )
{
    int   i;

    for ( i = 0; i != 4; i++ ) 
	{
        q[i] = ReadFloat( argv[i] );
    }
}

void
usage( char *prog )
{
    fprintf( stderr,
             "usage: %s [options]\n"
             "options:\n"
             "  -sphere <dim>           : test volume\n"
             "  -cube <dim>             : test volume\n"
             "  -point <dim>            : test volume\n"
             "  -raw <file> <w> <h> <d> : read raw byte volume\n"
             "  -den <file>             : read density volume\n"
             "  -sub <integer>          : subsample\n"
             "  -chunk <N>              : chunk volume NxNxN\n"
             "  -chunk <I> <J> <K>      : chunk volume IxJxK\n"
             "  -sequence <method>      : none|total|partial|barrier [partial]\n"
			 "  -static <bool>          : static work partition [1]\n"
             "  -rainbow <bool>         : rainbow colors [0]\n"
			 "  -depth <bool>           : use depth buffering [0]\n"
             "  -thresh <threshold>     : from 0 to 255\n"
             "  -opacity <opacity>      : from 0 to 1.0\n"
             "  -pad                    : pad volume\n"
             "  -nopad                  : don't pad volume\n"
             "  -camera <float[4]>      : camera quaternion\n"
             "  -radius <float>         : camera radius\n"
             "  -single                 : single buffered\n"
             "  -double                 : double buffered   [default]\n"
			 "  -spin <bool>            : spin the model when benchmarking [0]\n"
			 "  -bbox <bool>            : submit bounding boxes [0]\n"
			 "  -wireframe <bool>       : draw wireframe [0]\n"
			 "  -gui                    : provide gui on thread 0 host\n"
			 "  -nogui                  : no gui [default]\n"
			 "  -print <n>	            : print performance <n> times and exit\n"
			 "  -root <dir>             : root of data [" DATA_ROOT "]\n"
			 "                            (also from WIREGL_DATA_ROOT + \"/march\")\n"
             "  -help                   : this message\n",
             prog );
    exit( 1 );
}

enum { VOL_NONE, VOL_SPHERE, VOL_CUBE, VOL_POINT, VOL_RAW, VOL_DEN };

static struct {
    const char *name;
    int         kind;
    const char *file;
    int         dim;
    float       camera[4];
    float       radius;
    float       opacity;
    u32         threshold;
} scenes[] = {
    { "orangutan2x",
      VOL_DEN, "orangutan2x.den.gz", 0,
      { -0.537641f, 0.123249f, -0.196274f, 0.810696f }, 0.90f,
      1.00f, 21 },
    { "orangutan",
      VOL_DEN, "orangutan.original.den.gz", 0,
      { -0.537641f, 0.123249f, -0.196274f, 0.810696f }, 0.90f,
      1.00f, 21 },
    { "skulltiny",
      VOL_DEN, "skulltiny.den.gz", 0,
      { -0.672686f, 0.117906f, -0.103418f, 0.723116f }, 1.12f,
      1.00f, 127 },
    { "point128",
      VOL_POINT, NULL, 128,
      { 0.394343f, 0.374184f, 0.107333f, 0.832442f }, 1.00f,
      1.00f, 127 }
};


int
parseArgs( int argc, char *argv[] )
{
	int         i;
    u32         sub, ok, pad;
    u32         w, h, d;
    u32         chunk_w, chunk_h, chunk_d;
    u32         kind;
    char       *file;
	char       *root;
	char       *temp;

	ok = w = h = d = 0;

    globals.win.w            = 512;
    globals.win.h            = 512;

	globals.please_exit      = 0;
    globals.single_buffer    = 0;
    globals.idle_rotate      = 0;
    globals.sequence_chunks  = OrderPartial;
    globals.rainbow          = 0;
	globals.depth_buffer     = 0;
    globals.threshold        = 127;
    globals.opacity          = 1.0;
	globals.wireframe        = 0;
    globals.cull_enable      = 0;

    globals.camera.quat.q[0] = 0;
    globals.camera.quat.q[1] = 0;
    globals.camera.quat.q[2] = 0;
    globals.camera.quat.q[3] = 1;

    globals.camera.radius    = -100.0f;

    globals.camera.up.i      =  0;
    globals.camera.up.j      =  1;
    globals.camera.up.k      =  0;

    globals.camera.look.i    =  0;
    globals.camera.look.j    =  0;
    globals.camera.look.k    = -1;

    globals.verbose          =  0;
	globals.use_gui          =  0;
	globals.print            =  0;
	globals.passes           =  1;
	globals.measure_time     =  1.0;
	globals.spin             =  0;
	globals.bbox_hint        =  1;

	file    = NULL;
    kind    = VOL_NONE;
    sub     = 0;
    chunk_w = chunk_h = chunk_d = 0;
    pad     = 1;

	root = DATA_ROOT;
	temp = getenv( "WIREGL_DATA_ROOT" );
	if ( temp )
	{
		root = (char *) xmalloc( strlen(temp) + sizeof("/march") + 1 );
		strcpy( root, temp );
		strcat( root, "/march" );
	}

    for ( i = 1; i < argc; i++ ) {

        if ( !strcmp( argv[i], "-display" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s : not enough arguments", argv[i] );
            }

            i++;

        } else if ( !strcmp( argv[i], "-geometry" ) ) {

            int width, height;

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s : not enough arguments", argv[i] );
            }

            width = height = 0;
            if ( sscanf( argv[i+1], "%dx%d", &width, &height ) == 2 &&
                 width > 0 && height > 0 ) {
                globals.win.w = width;
                globals.win.h = height;
            } else {
				xfatal( "%s: bogus -geometry string: %s", argv[0], argv[i+1] );
            }

            i++;

        } else if ( !strcmp( argv[i], "-direct" ) ||
                    !strcmp( argv[i], "-indirect" ) ||
                    !strcmp( argv[i], "-iconic" ) ||
                    !strcmp( argv[i], "-gldebug" ) ||
                    !strcmp( argv[i], "-sync" ) ) {

            /* random GLUT option */

        } else if ( !strcmp( argv[i], "-sphere" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            kind = VOL_SPHERE;
            w = h = d = ReadU32( argv[i+1] );
            i++;

        } else if ( !strcmp( argv[i], "-cube" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            kind = VOL_CUBE;
            w = h = d = ReadU32( argv[i+1] );
            i++;

        } else if ( !strcmp( argv[i], "-point" ) ) {

            if ( i + 1 >= argc )
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            kind = VOL_POINT;
            w = h = d = ReadU32( argv[i+1] );
            i++;

        } else if ( !strcmp( argv[i], "-raw" ) ) {

            if ( i + 4 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            kind = VOL_RAW;

            file = argv[i+1];
            w = ReadU32( argv[i+2] );
            h = ReadU32( argv[i+3] );
            d = ReadU32( argv[i+4] );
            i += 4;

        } else if ( !strcmp( argv[i], "-den" ) ) {

            if ( i + 1 >= argc )
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            kind = VOL_DEN;
            file = argv[i+1];

            i += 1;

        } else if ( !strcmp( argv[i], "-sub" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            sub = ReadU32( argv[i+1] );

            i += 1;

        } else if ( !strcmp( argv[i], "-chunk" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            if ( i + 3 >= argc || !isdigit( argv[i+2][0] ) ) {

                /* okay, they've given us just one chunk dimension */
                chunk_w = chunk_h = chunk_d = ReadU32( argv[i+1] );
                i += 1;

            } else {

                /* full 3 arguments */

                chunk_w = ReadU32( argv[i+1] );
                chunk_h = ReadU32( argv[i+2] );
                chunk_d = ReadU32( argv[i+3] );
                i += 3;
            }

        } else if ( !strcmp( argv[i], "-thresh" ) ||
                    !strcmp( argv[i], "-threshold" ) ) {

            u32 temp;

            if ( i + 1 >= argc ) 
			{
				xfatal( "%s: not enough arguments", argv[i] );
            }

            temp = ReadU32( argv[i+1] );

            if ( temp > 255 )
			{
				xfatal( "-thresh: threshold should be between 0 and 255" );
            }

            globals.threshold = (u8) temp;

            i += 1;

        } else if ( !strcmp( argv[i], "-opacity" ) ) {

            if ( i + 1 >= argc ) 
			{
				xfatal( "%s: not enough arguments", argv[i] );
            }

            globals.opacity = ReadFloat( argv[i+1] );
            i += 1;

        } else if ( !strcmp( argv[i], "-measure" ) ) {

            if ( i + 1 >= argc ) 
			{
				xfatal( "%s: not enough arguments", argv[i] );
            }

            globals.measure_time = ReadFloat( argv[i+1] );
            i += 1;

        } else if ( !strcmp( argv[i], "-sequence" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            i += 1;

            if ( !strcmp( argv[i], "none" ) )
                globals.sequence_chunks = OrderNone;
            else if ( !strcmp( argv[i], "partial" ) )
                globals.sequence_chunks = OrderPartial;
            else if ( !strcmp( argv[i], "total" ) )
                globals.sequence_chunks = OrderTotal;
            else if ( !strcmp( argv[i], "barrier" ) )
                globals.sequence_chunks = OrderBarrier;
            else {
				xfatal( "%s: arg should be {none|partial|total"
						"|barrier}, not \"%s\"\n", argv[i-1], argv[i] );
            }

        } else if ( !strcmp( argv[i], "-rainbow" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            globals.rainbow = ReadU32( argv[i+1] );
            i += 1;

        } else if ( !strcmp( argv[i], "-depth" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }
			
            globals.depth_buffer = ReadU32( argv[i+1] );
            i += 1;

        } else if ( !strcmp( argv[i], "-print" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            globals.print = ReadU32( argv[i+1] );
            i += 1;

        } else if ( !strcmp( argv[i], "-spin" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            globals.spin = ReadU32( argv[i+1] );
            i += 1;

        } else if ( !strcmp( argv[i], "-bbox" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            globals.bbox_hint = ReadU32( argv[i+1] );
            i += 1;

        } else if ( !strcmp( argv[i], "-wireframe" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            globals.wireframe = ReadU32( argv[i+1] );
            i += 1;

        } else if ( !strcmp( argv[i], "-single" ) ) {

            globals.single_buffer = 1;

        } else if ( !strcmp( argv[i], "-double" ) ) {

            globals.single_buffer = 0;

        } else if ( !strcmp( argv[i], "-pad" ) ) {

            pad = 1;

        } else if ( !strcmp( argv[i], "-nopad" ) ) {

            pad = 0;

        } else if ( !strcmp( argv[i], "-camera" ) ) {

            if ( i + 4 > argc ) {
                xfatal( "%s: requires 4 floats", argv[i] );
            }

            ReadQuat( &argv[i+1], globals.camera.quat.q );
            i += 4;

        } else if ( !strcmp( argv[i], "-radius" ) ) {

            if ( i >= argc ) 
			{
                xfatal( "%s: requres an arg", argv[i] );
            }

            globals.camera.radius = ReadFloat( argv[i+1] );
            i++;

        } else if ( !strcmp( argv[i], "-passes" ) ) {

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: not enough arguments", argv[i] );
            }

            globals.passes = ReadU32( argv[i+1] );
            i++;

        } else if ( !strcmp( argv[i], "-help" ) ) {

            usage( argv[0] );

        } else if ( !strcmp( argv[i], "-verbose" ) ) {

            globals.verbose = 1;

        } else if ( !strcmp( argv[i], "-quiet" ) ) {

            globals.verbose = 0;

		} else if ( !strcmp( argv[i], "-gui" ) ) {

			globals.use_gui = 1;

		} else if ( !strcmp( argv[i], "-nogui" ) ) {

			globals.use_gui = 0;

        } else if ( !strcmp( argv[i], "-scene" ) ) {

            int j;

            if ( i + 1 >= argc ) 
			{
                xfatal( "%s: requires an arg", argv[i] );
            }

            i++;

            for ( j = 0; j < NELEMS(scenes); j++ ) {

                if ( !strcmp( argv[i], scenes[j].name ) )
                    break;
            }

            if ( j == NELEMS(scenes) ) {
                fprintf( stderr, "unknown scene: %s\n", argv[i] );
                fputs( "try one of:", stderr );
                for ( j = 0; j < NELEMS(scenes); j++ ) {
                    fprintf( stderr, " %s", scenes[j].name );
                }
                fputs( "\n", stderr );
                exit( 1 );
            }

            kind = scenes[j].kind;
            switch ( kind ) {
              case VOL_DEN:
                file = (char *) scenes[j].file;
                break;
              case VOL_POINT:
                file = NULL;
                w = h = d = scenes[j].dim;
                break;
              default:
                abort( );
            }

            globals.camera.quat.q[0] = scenes[j].camera[0];
            globals.camera.quat.q[1] = scenes[j].camera[1];
            globals.camera.quat.q[2] = scenes[j].camera[2];
            globals.camera.quat.q[3] = scenes[j].camera[3];
            globals.camera.radius    = scenes[j].radius;
            globals.opacity          = scenes[j].opacity;
            globals.threshold        = scenes[j].threshold;
            globals.rainbow          = 1;

        } else {
            xfatal( "unknown option: \"%s\" (-help for help)\n", argv[i] );
        }
    }

	if ( globals.thread_id != 0 ) 
		globals.verbose = 0;

	if ( globals.thread_id == 0 )
	{
		fputs(  "March " __DATE__ " " __TIME__ "\n", stdout );
		printf( "threads = %d\n", globals.num_threads );
		printf( "measure = %f\n", globals.measure_time );
		printf( "print   = %d\n", globals.print );
		printf( "passes  = %d\n", globals.passes );
		printf( "depth   = %d\n", globals.depth_buffer );
		printf( "rainbow = %d\n", globals.rainbow );
		printf( "single  = %d\n", globals.single_buffer );
		printf( "spin    = %d\n", globals.spin );
		printf( "bbox    = %d\n", globals.bbox_hint );
		fputs(  "sequence= ", stdout );
		switch ( globals.sequence_chunks ) {

		  case OrderNone:
			fputs( "none\n", stdout );
			break;

		  case OrderPartial:
			fputs( "partial\n", stdout );
			break;

		  case OrderTotal:
			fputs( "total\n", stdout );
			break;

		  case OrderBarrier:
			fputs( "barrier\n", stdout );
			break;

		  default:
			abort( );
		}
	}

    if ( sub > 16 ) 
	{
        xfatal( "sub of %u?  Are you sure?", sub );
    }

    if ( chunk_w || chunk_h || chunk_d  ) {

        if ( chunk_w < 2 || chunk_h < 2 || chunk_d < 2 ) 
		{
            xfatal( "chunk size is %dx%dx%d, must be at least 2x2x2",
					chunk_w, chunk_h, chunk_d );
        }

        if ( ( chunk_w & 0x1 ) || ( chunk_h & 0x1 ) || ( chunk_d & 0x1 ) ) 
		{
            xfatal( "chunk size is %dx%dx%d, all dimensions must be even",
					chunk_w, chunk_h, chunk_d );
        }
    }

	if ( kind == VOL_RAW || kind == VOL_DEN )
	{
		FILE *f;

		f = fopen( file, "rb" );
		if ( f )
		{
			fclose( f );
		}
		else if ( !strchr( file, '/' ) && !strchr( file, '\\' ) )
		{
			temp = (char *) xmalloc( strlen(root) + strlen(file) + 2 );
			strcpy( temp, root );
			strcat( temp, "/" );
			strcat( temp, file );
			f = fopen( temp, "rb" );
			if ( f )
			{
				fclose( f );
				file = temp;
			}
			else
			{
				xfatal( "couldn't open \"%s\" or \"%s\"", file, temp );
			}
		}
		else
		{
			xfatal( "couldn't open \"%s\"", file );
		}
	}

    switch ( kind ) {

      case VOL_NONE:
        xfatal( "no volume?" );
        break;

      case VOL_SPHERE:
		if ( globals.thread_id == 0 )
		{
			printf( "test volume: sphere (%dx%dx%d)\n", w, h, d );
		}
        ok = Volume_MakeSphere( w, &globals.volume );
        break;

      case VOL_CUBE:
		if ( globals.thread_id == 0 )
		{
			printf( "test volume: cube (%dx%dx%d)\n", w, h, d );
		}
        ok = Volume_MakeCube( w, &globals.volume );
        break;

      case VOL_POINT:
		if ( globals.thread_id == 0 )
		{
			printf( "test volume: point (%dx%dx%d)\n", w, h, d );
		}
        ok = Volume_MakePointField( w, &globals.volume );
        break;

      case VOL_RAW:
		if ( globals.thread_id == 0 )
		{
			printf( "raw volume: %s (%dx%dx%d)\n", file, w, h, d );
		}
        ok = Volume_ReadRaw( file, w, h, d, &globals.volume );
        break;

      case VOL_DEN:
		if ( globals.thread_id == 0 )
		{
			printf( "den volume: %s ", file );
			fflush( stdout );
		}
        ok =  Volume_ReadDen( file, &globals.volume );
		if ( globals.thread_id == 0 )
		{
			printf( "(%dx%dx%d)\n", globals.volume.width,
					globals.volume.height,
					globals.volume.depth );
		}
        break;

      default:
        abort( );
        break;
    }

    if ( !ok )
        return 1;

    if ( sub > 0 ) {
		if ( globals.thread_id == 0 )
		{
			printf( "subsampling volume by %u\n", sub );
		}
        Volume_SubSample( &globals.volume, sub, &globals.volume );
    }

    /*
      Volume_Save( &globals.volume, "volume%03d.pgm" );
    */

    if ( globals.camera.radius < 0.0f ) {

        globals.camera.radius = (float) MAX( globals.volume.width,
											 MAX( globals.volume.height,
												  globals.volume.depth ) );
    }

    if ( !chunk_w && !chunk_h && !chunk_d ) {
        /* pick a good chunking */

        chunk_w = MAX( 2, ( globals.volume.width  / 5 ) & ~1 );
        chunk_h = MAX( 2, ( globals.volume.height / 5 ) & ~1 );
        chunk_d = MAX( 2, ( globals.volume.depth  / 5 ) & ~1 );
    }

    if ( pad ) {
        int w, h, d;
		if ( globals.thread_id == 0 )
		{
			printf( "padding volume\n" );
		}
        w = globals.volume.width  + 2 + ( globals.volume.width  & 0x1 );
        h = globals.volume.height + 2 + ( globals.volume.height & 0x1 );
        d = globals.volume.depth  + 2 + ( globals.volume.depth  & 0x1 );
        Volume_Pad( &globals.volume, w, h, d );
    }

    Volume_Chunk( &globals.volume, chunk_w, chunk_h, chunk_d );

    return 0;
}

static int   frame_count = 0;
static Timer timer;

void
print_performance( int triangles )
{
	double elapsed, tri_rate, frame_time;

	elapsed = TimerTime( &timer );
	frame_count++;
	if (  elapsed < globals.measure_time )
		return;

	TimerReset( &timer );
	TimerStart( &timer );

	frame_time = elapsed / (double) frame_count;
	tri_rate = (double) triangles / frame_time;

	printf( "Mtris/sec: %.3f  (%d frames, %.3fms/frame, "
			"%d triangles/frame)\n", 1e-6 * tri_rate, frame_count,
			1e3 * frame_time, triangles );

	frame_count = 0;

	globals.print--;
	if ( globals.print == 0 )
	{
		globals.please_exit = 1;
	}
}

void
report_statistics( void )
{
	GLint i, n;
	int offset;
	char text[1024];

	offset = sprintf( text, "stats thread=%2d bytes:", globals.thread_id );

	wireGLGetIntegerv( WIREGL_NUM_PIPES, &n );
	for ( i = 0; i < n; i++ )
	{
		GLint bytes;
		wireGLGetIntegerv( WIREGL_BYTES_PIPE0 + i, &bytes );
		offset += sprintf( text + offset, " %d", bytes );
	}

	printf( "%s\n", text );

	fflush( NULL );
}

void
start_frame( void )
{
	FrameParam param;

	MPI_Barrier( MPI_COMM_WORLD );

    param.threshold        = globals.threshold;
    param.opacity          = globals.opacity;         
    param.sequence_chunks  = globals.sequence_chunks;
    param.rainbow          = globals.rainbow;
	param.depth_buffer     = globals.depth_buffer;
    param.cull_enable      = globals.cull_enable;
	param.wireframe        = globals.wireframe;
    param.camera           = globals.camera;
	param.exit             = globals.please_exit;

	MPI_Bcast( &param, sizeof(param), MPI_BYTE, 0, MPI_COMM_WORLD );

	if ( param.exit )
	{
		glFinish( );

		fflush( NULL );

		MPI_Barrier( MPI_COMM_WORLD );

		report_statistics( );

		MPI_Barrier( MPI_COMM_WORLD );

		MPI_Finalize( );

		exit( 0 );
	}

	globals.threshold        = param.threshold;
	globals.opacity          = param.opacity;         
	globals.sequence_chunks  = param.sequence_chunks;
	globals.rainbow          = param.rainbow;
	globals.depth_buffer     = param.depth_buffer;
	globals.cull_enable      = param.cull_enable;
	globals.wireframe        = param.wireframe;
	globals.camera           = param.camera;
}

void
finish_frame( int my_triangles )
{
	int sum = 0;
	MPI_Reduce( &my_triangles, &sum, 1, MPI_INT, MPI_SUM, 
				0 /* root */, MPI_COMM_WORLD );

	if ( !globals.use_gui && globals.spin )
	{
		/* spin a little bit */
		Quat quat = trackball( 0.5, 0.5, 0.49, 0.5, 1.0 );
		globals.camera.quat = addQuats( quat, globals.camera.quat );
	}

	if ( globals.thread_id == 0 )
	{
		print_performance( sum );
	}
}

void
distribute_work( void )
{
	start_frame( );

	finish_frame( 0 );
}

void
create_context( void )
{
	if ( globals.thread_id != 0 || !globals.use_gui )
	{
		/* don't do this if we are the guy running glut, since it is
           already creating a context */
		wireGLCreateContext( );
		wireGLMakeCurrent( );
	}

	if ( globals.thread_id == 0 )
	{
		wireGLSyncWithL2( );
	}

	ContextInit( );

	if ( globals.thread_id == 0 )
	{
		if ( globals.sequence_chunks == OrderPartial || 
			 globals.sequence_chunks == OrderTotal )
		{
			int i;
			printf( "march: creating %d semaphores, hee hee\n", 
					globals.volume.num_chunks );
			for ( i = 1; i <= globals.volume.num_chunks; i++ )
				glSemaphoreCreate( i, 0 );
		}
	}

	/* HACK -- everybody creates the barrier, only the first creation
	 * matters, the rest are ignored */
	glBarrierCreate( 1, globals.num_threads );

	/* now sync up globally */
	MPI_Barrier( MPI_COMM_WORLD );

	/* and reset the timer now that we are all started up */
	TimerReset( &timer );
	TimerStart( &timer );
}

static GLUTCALLBACK void
draw_frame( void )
{
	int triangles;

	start_frame( );

	if ( globals.thread_id == 0 ) 
	{
		if ( globals.depth_buffer )
			glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
		else
			glClear( GL_COLOR_BUFFER_BIT );
	}

	glBarrierExec( 1 );

	triangles = DrawFrame( );

	glBarrierExec( 1 );

	if ( globals.thread_id == 0 && !globals.single_buffer )
	{
		wireGLSwapBuffers( );
	}

	finish_frame( triangles );
}

int
main( int argc, char **argv )
{
	globals.malloc_bytes = 0;

	MPI_Init( &argc, &argv );

	MPI_Comm_rank( MPI_COMM_WORLD, &globals.thread_id );
	MPI_Comm_size( MPI_COMM_WORLD, &globals.num_threads );

#if 0
	printf( "thread=%d pid=%d\n", globals.thread_id, (int) getpid( ) );

	if ( globals.thread_id == 0 ) {
		volatile int foo = 1;
		while ( foo )
			sleep( 1 );
	}
#endif

    if ( parseArgs( argc, argv ) )
        return 1;

	TimerInit( &timer );
	TimerStart( &timer );

	if ( globals.thread_id == 0 && globals.use_gui )
	{
		GuiInit( &argc, argv );

		create_context( );

		GuiGo( draw_frame );
	}
	else
	{
		create_context( );

		for ( ; ; )
		{
			draw_frame( );
		}
	}

	MPI_Finalize( );

    return 0;
}
