From 14fcc1883e02b99e0cc45072d97f4ff25408054d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Lemaire?= Date: Tue, 18 Dec 2012 16:20:38 +0000 Subject: [PATCH] paysages : Parallel processing helpers. git-svn-id: https://subversion.assembla.com/svn/thunderk/paysages@483 b1fd45b6-86a6-48da-8261-f70d1f35bdcc --- lib_paysages/atmosphere/bruneton.c | 150 ++++++++++++++++++----------- lib_paysages/auto.c | 1 - lib_paysages/tools/cache.c | 2 - lib_paysages/tools/parallel.c | 142 +++++++++++++++++++++++++++ lib_paysages/tools/parallel.h | 30 ++++++ 5 files changed, 267 insertions(+), 58 deletions(-) create mode 100644 lib_paysages/tools/parallel.c create mode 100644 lib_paysages/tools/parallel.h diff --git a/lib_paysages/atmosphere/bruneton.c b/lib_paysages/atmosphere/bruneton.c index d3687c4..694003a 100644 --- a/lib_paysages/atmosphere/bruneton.c +++ b/lib_paysages/atmosphere/bruneton.c @@ -9,8 +9,10 @@ #include #include #include "../system.h" +#include "../tools.h" #include "../tools/cache.h" #include "../tools/texture.h" +#include "../tools/parallel.h" /*********************** Constants ***********************/ @@ -78,12 +80,6 @@ static const vec3 betaMSca = vec3(3e-3); static const vec3 betaMEx = betaMSca / 0.9; static const float mieG = 0.65;*/ -/*********************** Layer variables ***********************/ - -static double _r; -static Color _dhdH; -static int _layer; - /*********************** Shader helpers ***********************/ #define step(_a_,_b_) ((_a_) < (_b_) ? 0 : 1) @@ -441,7 +437,7 @@ static void _precomputeIrrDeltaETexture() } } -static void _setLayer(int layer) +static void _getLayerParams(int layer, double* _r, Color* _dhdH) { double r = layer / (RES_R - 1.0); r = r * r; @@ -451,12 +447,11 @@ static void _setLayer(int layer) double dminp = r - Rg; double dmaxp = sqrt(r * r - Rg * Rg); - _r = r; - _dhdH.r = dmin; - _dhdH.g = dmax; - _dhdH.b = dminp; - _dhdH.a = dmaxp; - _layer = layer; + *_r = r; + _dhdH->r = dmin; + _dhdH->g = dmax; + _dhdH->b = dminp; + _dhdH->a = dmaxp; } /*********************** inscatter1.glsl ***********************/ @@ -519,8 +514,21 @@ static void _inscatter1(double r, double mu, double muS, double nu, Color* ray, mie->b *= betaMSca.z; } -static void _inscatter1Prog(Texture3D* tex_rayleigh, Texture3D* tex_mie) +typedef struct { + Texture3D* ray; + Texture3D* mie; +} Inscatter1Params; + +static int _inscatter1Worker(ParallelWork* work, int layer, void* data) +{ + Inscatter1Params* params = (Inscatter1Params*)data; + UNUSED(work); + + double r; + Color dhdH; + _getLayerParams(layer, &r, &dhdH); + int x, y; for (x = 0; x < RES_MU_S * RES_NU; x++) { @@ -532,14 +540,15 @@ static void _inscatter1Prog(Texture3D* tex_rayleigh, Texture3D* tex_mie) Color ray = COLOR_BLACK; Color mie = COLOR_BLACK; double mu, muS, nu; - _getMuMuSNu((double)x, (double)y, _r, _dhdH, &mu, &muS, &nu); - _inscatter1(_r, mu, muS, nu, &ray, &mie); + _getMuMuSNu((double)x, (double)y, r, dhdH, &mu, &muS, &nu); + _inscatter1(r, mu, muS, nu, &ray, &mie); /* store separately Rayleigh and Mie contributions, WITHOUT the phase function factor * (cf "Angular precision") */ - texture3DSetPixel(tex_rayleigh, x, y, _layer, ray); - texture3DSetPixel(tex_mie, x, y, _layer, mie); + texture3DSetPixel(params->ray, x, y, layer, ray); + texture3DSetPixel(params->mie, x, y, layer, mie); } } + return 1; } /*********************** inscatterS.glsl ***********************/ @@ -642,8 +651,21 @@ static Color _inscatterS(double r, double mu, double muS, double nu, int first) return raymie; } -static void _jProg(Texture3D* result, int first) +typedef struct { + Texture3D* result; + int first; +} jParams; + +static int _jWorker(ParallelWork* work, int layer, void* data) +{ + jParams* params = (jParams*)data; + UNUSED(work); + + double r; + Color dhdH; + _getLayerParams(layer, &r, &dhdH); + int x, y; for (x = 0; x < RES_MU_S * RES_NU; x++) { @@ -651,11 +673,12 @@ static void _jProg(Texture3D* result, int first) { Color raymie; double mu, muS, nu; - _getMuMuSNu((double)x, (double)y, _r, _dhdH, &mu, &muS, &nu); - raymie = _inscatterS(_r, mu, muS, nu, first); - texture3DSetPixel(result, x, y, _layer, raymie); + _getMuMuSNu((double)x, (double)y, r, dhdH, &mu, &muS, &nu); + raymie = _inscatterS(r, mu, muS, nu, params->first); + texture3DSetPixel(params->result, x, y, layer, raymie); } } + return 1; } /*********************** irradianceN.glsl ***********************/ @@ -747,40 +770,61 @@ static Color _inscatterN(double r, double mu, double muS, double nu) return raymie; } -static void _inscatterNProg(Texture3D* result) +static int _inscatterNWorker(ParallelWork* work, int layer, void* data) { + UNUSED(work); + + double r; + Color dhdH; + _getLayerParams(layer, &r, &dhdH); + int x, y; for (x = 0; x < RES_MU_S * RES_NU; x++) { for (y = 0; y < RES_MU; y++) { double mu, muS, nu; - _getMuMuSNu((double)x, (double)y, _r, _dhdH, &mu, &muS, &nu); - texture3DSetPixel(result, x, y, _layer, _inscatterN(_r, mu, muS, nu)); + _getMuMuSNu((double)x, (double)y, r, dhdH, &mu, &muS, &nu); + texture3DSetPixel((Texture3D*)data, x, y, layer, _inscatterN(r, mu, muS, nu)); } } + return 1; } /*********************** copyInscatterN.glsl ***********************/ -static void _copyInscatterNProg(Texture3D* source, Texture3D* destination) +typedef struct { + Texture3D* source; + Texture3D* destination; +} CopyInscatterNParams; + +static int _copyInscatterNWorker(ParallelWork* work, int layer, void* data) +{ + CopyInscatterNParams* params = (CopyInscatterNParams*)data; + UNUSED(work); + + double r; + Color dhdH; + _getLayerParams(layer, &r, &dhdH); + int x, y; for (x = 0; x < RES_MU_S * RES_NU; x++) { for (y = 0; y < RES_MU; y++) { double mu, muS, nu; - _getMuMuSNu((double)x, (double)y, _r, _dhdH, &mu, &muS, &nu); - Color col1 = texture3DGetLinear(source, x / (double)(RES_MU_S * RES_NU), y / (double)(RES_MU), _layer + 0.5 / (double)(RES_R)); - Color col2 = texture3DGetPixel(destination, x, y, _layer); + _getMuMuSNu((double)x, (double)y, r, dhdH, &mu, &muS, &nu); + Color col1 = texture3DGetLinear(params->source, x / (double)(RES_MU_S * RES_NU), y / (double)(RES_MU), layer + 0.5 / (double)(RES_R)); + Color col2 = texture3DGetPixel(params->destination, x, y, layer); col2.r += col1.r * 0.1 / _phaseFunctionR(nu); col2.g += col1.g * 0.1 / _phaseFunctionR(nu); col2.b += col1.b * 0.1 / _phaseFunctionR(nu); col2.a = 1.0; - texture3DSetPixel(destination, x, y, _layer, col2); + texture3DSetPixel(params->destination, x, y, layer, col2); } } + return 1; } /*********************** Final getters ***********************/ @@ -1019,10 +1063,10 @@ static void _saveCache3D(Texture3D* tex, const char* tag, int order) } /*********************** Public methods ***********************/ - void brunetonInit() { - int layer, x, y, z, order; + int x, y, z, order; + ParallelWork* work; /* TODO Deletes */ @@ -1048,12 +1092,11 @@ void brunetonInit() _deltaSMTexture = texture3DCreate(RES_MU_S * RES_NU, RES_MU, RES_R); if (!_tryLoadCache3D(_deltaSRTexture, "deltaSR", 0) || !_tryLoadCache3D(_deltaSMTexture, "deltaSM", 0)) { - for (layer = 0; layer < RES_R; ++layer) - { - printf("deltaS %d\n", layer); - _setLayer(layer); - _inscatter1Prog(_deltaSRTexture, _deltaSMTexture); - } + Inscatter1Params params = {_deltaSRTexture, _deltaSMTexture}; + work = parallelWorkCreate(_inscatter1Worker, RES_R, ¶ms); + parallelWorkPerform(work, -1); + parallelWorkDelete(work); + _saveCache3D(_deltaSRTexture, "deltaSR", 0); _saveCache3D(_deltaSMTexture, "deltaSM", 0); } @@ -1090,12 +1133,11 @@ void brunetonInit() _deltaJTexture = texture3DCreate(RES_MU_S * RES_NU, RES_MU, RES_R); if (!_tryLoadCache3D(_deltaJTexture, "deltaJ", order)) { - for (layer = 0; layer < RES_R; ++layer) - { - printf("deltaJ %d %d\n", order, layer); - _setLayer(layer); - _jProg(_deltaJTexture, order == 2); - } + jParams params = {_deltaJTexture, order == 2}; + work = parallelWorkCreate(_jWorker, RES_R, ¶ms); + parallelWorkPerform(work, -1); + parallelWorkDelete(work); + _saveCache3D(_deltaJTexture, "deltaJ", order); } @@ -1110,12 +1152,10 @@ void brunetonInit() /* computes deltaS (line 9 in algorithm 4.1) */ if (!_tryLoadCache3D(_deltaSRTexture, "deltaSR", order)) { - for (layer = 0; layer < RES_R; ++layer) - { - printf("deltaS %d %d\n", order, layer); - _setLayer(layer); - _inscatterNProg(_deltaSRTexture); - } + work = parallelWorkCreate(_inscatterNWorker, RES_R, _deltaSRTexture); + parallelWorkPerform(work, -1); + parallelWorkDelete(work); + _saveCache3D(_deltaSRTexture, "deltaSR", order); } @@ -1129,11 +1169,11 @@ void brunetonInit() /* adds deltaS into inscatter texture S (line 11 in algorithm 4.1) */ if (!_tryLoadCache3D(_inscatterTexture, "inscatter", order)) { - for (layer = 0; layer < RES_R; ++layer) - { - _setLayer(layer); - _copyInscatterNProg(_deltaSRTexture, _inscatterTexture); - } + CopyInscatterNParams params = {_deltaSRTexture, _inscatterTexture}; + work = parallelWorkCreate(_copyInscatterNWorker, RES_R, ¶ms); + parallelWorkPerform(work, -1); + parallelWorkDelete(work); + _saveCache3D(_inscatterTexture, "inscatter", order); } } diff --git a/lib_paysages/auto.c b/lib_paysages/auto.c index af18a2d..489617f 100644 --- a/lib_paysages/auto.c +++ b/lib_paysages/auto.c @@ -40,7 +40,6 @@ void autoSetDaytimeFraction(double daytime) void autoGenRealisticLandscape(int seed) { - TerrainDefinition terrain; WaterDefinition water; CloudsDefinition clouds; TexturesDefinition textures; diff --git a/lib_paysages/tools/cache.c b/lib_paysages/tools/cache.c index e59d8fc..28b4c7f 100644 --- a/lib_paysages/tools/cache.c +++ b/lib_paysages/tools/cache.c @@ -1,5 +1,3 @@ -#include - #include "cache.h" #include diff --git a/lib_paysages/tools/parallel.c b/lib_paysages/tools/parallel.c new file mode 100644 index 0000000..73fddc1 --- /dev/null +++ b/lib_paysages/tools/parallel.c @@ -0,0 +1,142 @@ +#include "parallel.h" + +#include +#include "../system.h" + +#define PARALLEL_MAX_THREADS 20 + +typedef enum +{ + PARALLEL_WORKER_STATUS_VOID, + PARALLEL_WORKER_STATUS_RUNNING, + PARALLEL_WORKER_STATUS_DONE +} ParallelWorkerStatus; + +typedef struct +{ + Thread* thread; + ParallelWork* work; + ParallelWorkerStatus status; + int unit; + int result; +} ParallelWorker; + +struct ParallelWork +{ + int units; + int running; + ParallelUnitFunction unit_function; + ParallelWorker workers[PARALLEL_MAX_THREADS]; + void* data; +}; + +ParallelWork* parallelWorkCreate(ParallelUnitFunction func, int units, void* data) +{ + ParallelWork* result; + + result = (ParallelWork*)malloc(sizeof(ParallelWork)); + result->units = units; + result->running = 0; + result->unit_function = func; + result->data = data; + + return result; +} + +void parallelWorkDelete(ParallelWork* work) +{ + assert(!work->running); + free(work); +} + +static void* _workerThreadCallback(ParallelWorker* worker) +{ + worker->result = worker->work->unit_function(worker->work, worker->unit, worker->work->data); + worker->status = PARALLEL_WORKER_STATUS_DONE; + return NULL; +} + +static int _runNextWorker(ParallelWorker workers[], int worker_count, int unit) +{ + int i; + + while (1) + { + for (i = 0; i < worker_count; i++) + { + ParallelWorker* worker = workers + i; + if (worker->status == PARALLEL_WORKER_STATUS_VOID) + { + worker->status = PARALLEL_WORKER_STATUS_RUNNING; + worker->result = 0; + worker->unit = unit; + worker->thread = threadCreate((ThreadFunction)_workerThreadCallback, worker); + + return 0; + } + else if (worker->status == PARALLEL_WORKER_STATUS_DONE) + { + int result = worker->result; + + worker->status = PARALLEL_WORKER_STATUS_RUNNING; + worker->result = 0; + worker->unit = unit; + threadJoin(worker->thread); + worker->thread = threadCreate((ThreadFunction)_workerThreadCallback, worker); + + return result; + } + } + timeSleepMs(50); + } +} + +int parallelWorkPerform(ParallelWork* work, int workers) +{ + int i, done, result; + assert(!work->running); + + result = 0; + + if (workers <= 0) + { + workers = systemGetCoreCount(); + } + if (workers > PARALLEL_MAX_THREADS) + { + workers = PARALLEL_MAX_THREADS; + } + work->running = 1; + + /* Init workers */ + for (i = 0; i < workers; i++) + { + work->workers[i].status = PARALLEL_WORKER_STATUS_VOID; + work->workers[i].work = work; + } + + /* Perform run */ + for (done = 0; done < work->units; done++) + { + if (_runNextWorker(work->workers, workers, done)) + { + result++; + } + } + + /* Wait and clean up workers */ + for (i = 0; i < workers; i++) + { + if (work->workers[i].status != PARALLEL_WORKER_STATUS_VOID) + { + threadJoin(work->workers[i].thread); + if (work->workers[i].result) + { + result++; + } + } + } + + work->running = 0; + return result; +} diff --git a/lib_paysages/tools/parallel.h b/lib_paysages/tools/parallel.h new file mode 100644 index 0000000..6c5fca1 --- /dev/null +++ b/lib_paysages/tools/parallel.h @@ -0,0 +1,30 @@ +#ifndef _PAYSAGES_TOOLS_PARALLEL_H_ +#define _PAYSAGES_TOOLS_PARALLEL_H_ + +/* + * Parallel processing helpers. + * + * Several units of work can be accomplished by a given number of parallel workers. + * Workers are implemented by threads so thread-safety must be ensured while accessing + * shared data from unit functions. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct ParallelWork ParallelWork; +typedef int (*ParallelUnitFunction)(ParallelWork* work, int unit, void* data); + +/*void parallelInit(); +void parallelQuit();*/ + +ParallelWork* parallelWorkCreate(ParallelUnitFunction func, int units, void* data); +void parallelWorkDelete(ParallelWork* work); +int parallelWorkPerform(ParallelWork* work, int workers); + +#ifdef __cplusplus +} +#endif + +#endif