Merged raster_threading into master

This commit is contained in:
Michaël Lemaire 2013-07-07 00:52:00 +02:00
commit 359c6b5902
6 changed files with 462 additions and 77 deletions

2
TODO
View file

@ -47,11 +47,9 @@ Technology Preview 4 :
- Use bicubic interpolation for antialiasing.
- Allow for larger renders/antialias (will need several two-pass chunks).
- Add a progress indicator on previews.
- Multi threaded first pass.
- Mark modified tabs and ask for losing modifications (idem for layers).
- Fix potential holes in land rendering.
- Progressive final render (increasing resolution, for second pass only).
- Water and terrain LOD moves with the camera, fix it like in the wanderer.
- Improve 3d explorer
=> Restore LOD and intelligent poly count (and raise max tessellation)
=> Better handling of high altitude

View file

@ -51,12 +51,8 @@ struct RenderArea
int pixel_count;
int pixel_done;
RenderFragment* pixels;
ScanPoint* scanline_up;
ScanPoint* scanline_down;
int fragment_callbacks_count;
FragmentCallback fragment_callbacks[64];
int scanline_left;
int scanline_right;
Color background_color;
volatile int dirty_left;
volatile int dirty_right;
@ -69,7 +65,16 @@ struct RenderArea
RenderCallbackUpdate callback_update;
};
typedef struct {
typedef struct
{
ScanPoint* up;
ScanPoint* down;
int left;
int right;
} RenderScanlines;
typedef struct
{
int startx;
int endx;
int starty;
@ -109,10 +114,6 @@ RenderArea* renderCreateArea(Renderer* renderer)
result->pixel_count = 1;
result->pixels = malloc(sizeof(RenderFragment));
result->fragment_callbacks_count = 0;
result->scanline_up = malloc(sizeof(ScanPoint));
result->scanline_down = malloc(sizeof(ScanPoint));
result->scanline_left = 0;
result->scanline_right = 0;
result->background_color = COLOR_TRANSPARENT;
result->dirty_left = 1;
result->dirty_right = -1;
@ -132,8 +133,6 @@ void renderDeleteArea(RenderArea* area)
colorProfileDelete(area->hdr_mapping);
mutexDestroy(area->lock);
free(area->pixels);
free(area->scanline_up);
free(area->scanline_down);
free(area);
}
@ -156,11 +155,6 @@ void renderSetParams(RenderArea* area, RenderParams params)
area->pixels = realloc(area->pixels, sizeof(RenderFragment) * width * height);
area->pixel_count = width * height;
area->scanline_left = 0;
area->scanline_right = width - 1;
area->scanline_up = realloc(area->scanline_up, sizeof(ScanPoint) * width);
area->scanline_down = realloc(area->scanline_down, sizeof(ScanPoint) * width);
area->dirty_left = width;
area->dirty_right = -1;
area->dirty_down = height;
@ -182,18 +176,6 @@ void renderSetBackgroundColor(RenderArea* area, Color* col)
area->background_color = *col;
}
static void _clearScanLines(RenderArea* area)
{
int x;
for (x = area->scanline_left; x <= area->scanline_right; x++)
{
area->scanline_up[x].y = -1;
area->scanline_down[x].y = area->params.height * area->params.antialias;
}
area->scanline_left = area->params.width * area->params.antialias;
area->scanline_right = -1;
}
void renderClear(RenderArea* area)
{
RenderFragment* pixel;
@ -218,10 +200,6 @@ void renderClear(RenderArea* area)
}
}
area->scanline_left = 0;
area->scanline_right = area->params.width * area->params.antialias - 1;
_clearScanLines(area);
area->callback_start(area->params.width, area->params.height, area->background_color);
area->dirty_left = area->params.width * area->params.antialias;
@ -396,7 +374,7 @@ static void _scanInterpolate(CameraDefinition* camera, ScanPoint* v1, ScanPoint*
result->callback = v1->callback;
}
static void _pushScanPoint(RenderArea* area, ScanPoint* point)
static void _pushScanPoint(RenderArea* area, RenderScanlines* scanlines, ScanPoint* point)
{
point->x = (int)floor(point->pixel.x);
point->y = (int)floor(point->pixel.y);
@ -406,36 +384,36 @@ static void _pushScanPoint(RenderArea* area, ScanPoint* point)
return;
}
if (point->x > area->scanline_right)
if (point->x > scanlines->right)
{
area->scanline_right = point->x;
area->scanline_up[area->scanline_right] = *point;
area->scanline_down[area->scanline_right] = *point;
if (point->x < area->scanline_left)
scanlines->right = point->x;
scanlines->up[scanlines->right] = *point;
scanlines->down[scanlines->right] = *point;
if (point->x < scanlines->left)
{
area->scanline_left = point->x;
scanlines->left = point->x;
}
}
else if (point->x < area->scanline_left)
else if (point->x < scanlines->left)
{
area->scanline_left = point->x;
area->scanline_up[area->scanline_left] = *point;
area->scanline_down[area->scanline_left] = *point;
scanlines->left = point->x;
scanlines->up[scanlines->left] = *point;
scanlines->down[scanlines->left] = *point;
}
else
{
if (point->y > area->scanline_up[point->x].y)
if (point->y > scanlines->up[point->x].y)
{
area->scanline_up[point->x] = *point;
scanlines->up[point->x] = *point;
}
if (point->y < area->scanline_down[point->x].y)
if (point->y < scanlines->down[point->x].y)
{
area->scanline_down[point->x] = *point;
scanlines->down[point->x] = *point;
}
}
}
static void _pushScanLineEdge(RenderArea* area, ScanPoint* point1, ScanPoint* point2)
static void _pushScanLineEdge(RenderArea* area, RenderScanlines* scanlines, ScanPoint* point1, ScanPoint* point2)
{
double dx, fx;
ScanPoint diff, point;
@ -445,7 +423,7 @@ static void _pushScanLineEdge(RenderArea* area, ScanPoint* point1, ScanPoint* po
if (endx < startx)
{
_pushScanLineEdge(area, point2, point1);
_pushScanLineEdge(area, scanlines, point2, point1);
}
else if (endx < 0 || startx >= area->params.width * area->params.antialias)
{
@ -453,8 +431,8 @@ static void _pushScanLineEdge(RenderArea* area, ScanPoint* point1, ScanPoint* po
}
else if (startx == endx)
{
_pushScanPoint(area, point1);
_pushScanPoint(area, point2);
_pushScanPoint(area, scanlines, point1);
_pushScanPoint(area, scanlines, point2);
}
else
{
@ -485,24 +463,24 @@ static void _pushScanLineEdge(RenderArea* area, ScanPoint* point1, ScanPoint* po
/*point.pixel.x = (double)curx;*/
_pushScanPoint(area, &point);
_pushScanPoint(area, scanlines, &point);
}
}
}
static void _renderScanLines(RenderArea* area)
static void _renderScanLines(RenderArea* area, RenderScanlines* scanlines)
{
int x, starty, endy, cury;
ScanPoint diff;
double dy, fy;
ScanPoint up, down, current;
if (area->scanline_right > 0)
if (scanlines->right > 0)
{
for (x = area->scanline_left; x <= area->scanline_right; x++)
for (x = scanlines->left; x <= scanlines->right; x++)
{
up = area->scanline_up[x];
down = area->scanline_down[x];
up = scanlines->up[x];
down = scanlines->down[x];
starty = down.y;
endy = up.y;
@ -560,27 +538,51 @@ void renderPushTriangle(RenderArea* area, Vector3 pixel1, Vector3 pixel2, Vector
return;
}
/* Prepare fragment callback */
mutexAcquire(area->lock);
point1.callback = _pushCallback(area, fragment_callback);
mutexRelease(area->lock);
/* Prepare vertices */
point1.pixel = pixel1;
point1.location = location1;
point1.callback = _pushCallback(area, fragment_callback);
point2.pixel = pixel2;
point2.location = location2;
point2.callback = _pushCallback(area, fragment_callback);
point2.callback = point1.callback;
point3.pixel = pixel3;
point3.location = location3;
point3.callback = _pushCallback(area, fragment_callback);
point3.callback = point1.callback;
_clearScanLines(area);
/* Prepare scanlines */
RenderScanlines scanlines;
int x;
int width = area->params.width * area->params.antialias;
scanlines.left = width;
scanlines.right = -1;
scanlines.up = malloc(sizeof(ScanPoint) * width);
scanlines.down = malloc(sizeof(ScanPoint) * width);
for (x = 0; x < width; x++)
{
/* TODO Do not initialize whole width each time, init only when needed on point push */
scanlines.up[x].y = -1;
scanlines.down[x].y = area->params.height * area->params.antialias;
}
_pushScanLineEdge(area, &point1, &point2);
_pushScanLineEdge(area, &point2, &point3);
_pushScanLineEdge(area, &point3, &point1);
/* Render edges in scanlines */
_pushScanLineEdge(area, &scanlines, &point1, &point2);
_pushScanLineEdge(area, &scanlines, &point2, &point3);
_pushScanLineEdge(area, &scanlines, &point3, &point1);
/* Commit scanlines to area */
mutexAcquire(area->lock);
_renderScanLines(area);
_renderScanLines(area, &scanlines);
mutexRelease(area->lock);
/* Free scalines */
free(scanlines.up);
free(scanlines.down);
}
Color renderGetPixel(RenderArea* area, int x, int y)

View file

@ -4,7 +4,9 @@
#include <stdlib.h>
#include <math.h>
#include "../tools.h"
#include "../tools/boundingbox.h"
#include "../tools/parallel.h"
#include "../renderer.h"
/*
@ -13,6 +15,8 @@
static inline Vector3 _getPoint(TerrainDefinition* definition, Renderer* renderer, double x, double z)
{
UNUSED(definition);
Vector3 result;
result.x = x;
@ -201,16 +205,55 @@ void terrainGetTessellationInfo(Renderer* renderer, FuncTerrainTessellationCallb
}
}
typedef struct
{
Renderer* renderer;
TerrainChunkInfo chunk;
} ParallelRasterInfo;
static int _parallelJobCallback(ParallelQueue* queue, int job_id, void* data, int stopping)
{
ParallelRasterInfo* info = (ParallelRasterInfo*)data;
UNUSED(queue);
UNUSED(job_id);
if (!stopping)
{
terrainTessellateChunk(info->renderer, &info->chunk, info->chunk.detail_hint);
}
free(data);
return 0;
}
static int _standardTessellationCallback(Renderer* renderer, TerrainChunkInfo* chunk, double progress)
{
terrainTessellateChunk(renderer, chunk, chunk->detail_hint);
ParallelRasterInfo* info = malloc(sizeof(ParallelRasterInfo));
info->renderer = renderer;
info->chunk = *chunk;
if (!parallelQueueAddJob(renderer->customData[0], _parallelJobCallback, info))
{
free(info);
}
renderer->render_progress = 0.05 * progress;
return !renderer->render_interrupt;
}
void terrainRenderSurface(Renderer* renderer)
{
ParallelQueue* queue;
queue = parallelQueueCreate(0);
/* TODO Do not use custom data, it could already be used by another module */
renderer->customData[0] = queue;
renderer->render_progress = 0.0;
terrainGetTessellationInfo(renderer, _standardTessellationCallback, 0);
renderer->render_progress = 0.05;
parallelQueueWait(queue);
parallelQueueDelete(queue);
}

View file

@ -140,3 +140,215 @@ int parallelWorkPerform(ParallelWork* work, int workers)
work->running = 0;
return result;
}
#define QUEUE_SIZE 1000
typedef enum
{
JOB_STATE_FREE,
JOB_STATE_PENDING,
JOB_STATE_PROCESSING,
JOB_STATE_TOCOLLECT
} EnumJobState;
typedef struct
{
EnumJobState state;
int id;
FuncParallelJob process;
void* data;
} ParallelJob;
struct ParallelQueue
{
int collect;
volatile int stopping;
Mutex* lock;
int workers_count;
Thread** workers;
ParallelJob* jobs;
int jobs_count; /** Number of jobs in queue (all status except JOB_STATE_FREE) */
int jobs_index_free; /** Index of next free position */
int jobs_index_collect; /** Index of first job to collect */
int jobs_index_pending; /** Index of first pending job to process */
int jobs_next_id;
};
static void* _queueThreadCallback(ParallelQueue* queue)
{
ParallelJob* job;
while (!queue->stopping)
{
/* Try to take a job */
mutexAcquire(queue->lock);
job = queue->jobs + queue->jobs_index_pending;
if (job->state == JOB_STATE_PENDING)
{
if (queue->jobs_index_pending >= QUEUE_SIZE - 1)
{
queue->jobs_index_pending = 0;
}
else
{
queue->jobs_index_pending++;
}
job->state = JOB_STATE_PROCESSING;
}
else
{
job = NULL;
}
mutexRelease(queue->lock);
if (job)
{
/* Process the job */
job->process(queue, job->id, job->data, 0);
mutexAcquire(queue->lock);
if (queue->collect)
{
job->state = JOB_STATE_TOCOLLECT;
/* TODO jobs_index_collect ? */
}
else
{
job->state = JOB_STATE_FREE;
queue->jobs_count--;
}
mutexRelease(queue->lock);
}
else
{
timeSleepMs(50);
}
}
return NULL;
}
ParallelQueue* parallelQueueCreate(int collect)
{
int i;
assert(!collect); /* Not fully implemented yet ! */
ParallelQueue* queue = malloc(sizeof(ParallelQueue));
queue->collect = collect;
queue->stopping = 0;
queue->lock = mutexCreate();
queue->jobs = malloc(sizeof(ParallelJob) * QUEUE_SIZE);
for (i = 0; i < QUEUE_SIZE; i++)
{
queue->jobs[i].state = JOB_STATE_FREE;
}
queue->jobs_count = 0;
queue->jobs_index_free = 0;
queue->jobs_index_collect = 0;
queue->jobs_index_pending = 0;
queue->jobs_next_id = 1;
/* Start workers */
queue->workers_count = systemGetCoreCount();
queue->workers = malloc(sizeof(Thread*) * queue->workers_count);
for (i = 0; i < queue->workers_count; i++)
{
queue->workers[i] = threadCreate((ThreadFunction)_queueThreadCallback, queue);
}
return queue;
}
void parallelQueueDelete(ParallelQueue* queue)
{
parallelQueueInterrupt(queue);
assert(!queue->collect || queue->jobs[queue->jobs_index_collect].state != JOB_STATE_TOCOLLECT);
assert(queue->jobs_count == 0);
mutexDestroy(queue->lock);
free(queue->jobs);
free(queue->workers);
free(queue);
}
void parallelQueueInterrupt(ParallelQueue* queue)
{
int i;
if (!queue->stopping)
{
queue->stopping = 1;
for (i = 0; i < queue->workers_count; i++)
{
threadJoin(queue->workers[i]);
}
}
}
void parallelQueueWait(ParallelQueue* queue)
{
while (queue->jobs_count > 0)
{
timeSleepMs(100);
}
}
int parallelQueueAddJob(ParallelQueue* queue, FuncParallelJob func_process, void* data)
{
if (queue->stopping)
{
return 0;
}
/* Wait for a free slot */
while (queue->jobs[queue->jobs_index_free].state != JOB_STATE_FREE)
{
timeSleepMs(50);
if (queue->stopping)
{
return 0;
}
}
/* Prepare the job */
ParallelJob job;
job.state = JOB_STATE_PENDING;
job.id = queue->jobs_next_id++;
job.process = func_process;
job.data = data;
/* Add the job to the queue */
mutexAcquire(queue->lock);
if (queue->stopping)
{
mutexRelease(queue->lock);
return 0;
}
queue->jobs[queue->jobs_index_free] = job;
if (queue->jobs_index_free >= QUEUE_SIZE - 1)
{
queue->jobs_index_free = 0;
}
else
{
queue->jobs_index_free++;
}
queue->jobs_count++;
assert(queue->jobs_count <= QUEUE_SIZE);
mutexRelease(queue->lock);
return job.id;
}
int parallelQueueCollectJobs(FuncParallelJob func_collect)
{
/* TODO */
return 0;
}

View file

@ -16,13 +16,94 @@ extern "C" {
typedef struct ParallelWork ParallelWork;
typedef int (*ParallelUnitFunction)(ParallelWork* work, int unit, void* data);
/*void parallelInit();
void parallelQuit();*/
/**
* Create a parallel work handler.
*
* This will spawn an optimal number of threads to process a given number of work units.
* @param func The callback that will be called from threads to process one unit.
* @param units Number of units to handle.
* @param data Custom data that will be passed to the callback.
* @return The newly allocated handler.
*/
ParallelWork* parallelWorkCreate(ParallelUnitFunction func, int units, void* data);
/**
* Delete a parallel work handler.
*
* The work must be terminated or fully interrupted before calling this.
* @param work The handler to free.
*/
void parallelWorkDelete(ParallelWork* work);
/**
* Start working on the units.
*
* @param work The handler.
* @param workers Number of threads to spaws, -1 for an optimal number.
*/
int parallelWorkPerform(ParallelWork* work, int workers);
typedef struct ParallelQueue ParallelQueue;
typedef int (*FuncParallelJob)(ParallelQueue* queue, int job_id, void* data, int stopping);
/**
* Create a parallel processing queue.
*
* This queue will use parallel workers to process jobs added to it.
* @param collect True to collect finished jobs and wait for a call to parallelQueueCollectJobs, False to discard finished jobs.
* @return The newly allocated queue.
*/
ParallelQueue* parallelQueueCreate(int collect);
/**
* Delete a parallel queue.
*
* This will interrupt the queue.
* If the queue is in collect mode, you should call parallelQueueInterrupt, then parallelQueueCollectJobs, before calling this.
* @param queue The queue to free.
*/
void parallelQueueDelete(ParallelQueue* queue);
/**
* Interrupt the queue processing.
*
* This will wait for running jobs to end, cancel pending jobs (still calling their callbacks with stopping=1) and
* refuse future jobs.
* @param queue The queue to interrupt.
*/
void parallelQueueInterrupt(ParallelQueue* queue);
/**
* Wait for all jobs to finish.
*
* This function will return as soon as there is no pending jobs. It is recommended to stop feeding the queue, or this
* function may never return.
*/
void parallelQueueWait(ParallelQueue* queue);
/**
* Add a job to the queue.
*
* Don't call this method concurrently from several threads.
* @param queue The queue.
* @param func_process The function that will be called for the job processing.
* @param data The data that will be passed to the callback.
* @return The job ID, 0 if the queue doesn't accept jobs.
*/
int parallelQueueAddJob(ParallelQueue* queue, FuncParallelJob func_process, void* data);
/**
* Collect finished jobs.
*
* The callback func_collect will be called sequentially for each finished job, from the caller thread (not parallel threads).
* Don't call this method concurrently from several threads.
* @param func_collect The callback for collect.
* @return The number of collected jobs.
*/
int parallelQueueCollectJobs(FuncParallelJob func_collect);
#ifdef __cplusplus
}
#endif

View file

@ -2,8 +2,10 @@
#include "private.h"
#include <stdlib.h>
#include <math.h>
#include "../renderer.h"
#include "../tools.h"
#include "../tools/parallel.h"
static Color _postProcessFragment(Renderer* renderer, Vector3 location, void* data)
{
@ -34,12 +36,43 @@ static void _renderQuad(Renderer* renderer, double x, double z, double size)
renderer->pushQuad(renderer, v1, v2, v3, v4, _postProcessFragment, NULL);
}
typedef struct
{
Renderer* renderer;
int i;
double cx;
double cz;
double radius_int;
double chunk_size;
double radius_ext;
} ParallelRasterInfo;
static int _parallelJobCallback(ParallelQueue* queue, int job_id, void* data, int stopping)
{
ParallelRasterInfo* info = (ParallelRasterInfo*)data;
UNUSED(queue);
UNUSED(job_id);
if (!stopping)
{
_renderQuad(info->renderer, info->cx - info->radius_ext + info->chunk_size * info->i, info->cz - info->radius_ext, info->chunk_size);
_renderQuad(info->renderer, info->cx + info->radius_int, info->cz - info->radius_ext + info->chunk_size * info->i, info->chunk_size);
_renderQuad(info->renderer, info->cx + info->radius_int - info->chunk_size * info->i, info->cz + info->radius_int, info->chunk_size);
_renderQuad(info->renderer, info->cx - info->radius_ext, info->cz + info->radius_int - info->chunk_size * info->i, info->chunk_size);
}
free(data);
return 0;
}
void waterRenderSurface(Renderer* renderer)
{
ParallelRasterInfo* info;
ParallelQueue* queue;
queue = parallelQueueCreate(0);
int chunk_factor, chunk_count, i;
Vector3 cam = renderer->getCameraLocation(renderer, VECTOR_ZERO);
double cx = cam.x;
double cz = cam.z;
double radius_int, radius_ext, base_chunk_size, chunk_size;
base_chunk_size = 2.0 / (double)renderer->render_quality;
@ -54,6 +87,9 @@ void waterRenderSurface(Renderer* renderer)
radius_ext = base_chunk_size;
chunk_size = base_chunk_size;
double cx = cam.x - fmod(cam.x, base_chunk_size);
double cz = cam.z - fmod(cam.x, base_chunk_size);
while (radius_int < 20000.0)
{
if (!renderer->addRenderProgress(renderer, 0.0))
@ -63,10 +99,20 @@ void waterRenderSurface(Renderer* renderer)
for (i = 0; i < chunk_count - 1; i++)
{
_renderQuad(renderer, cx - radius_ext + chunk_size * i, cz - radius_ext, chunk_size);
_renderQuad(renderer, cx + radius_int, cz - radius_ext + chunk_size * i, chunk_size);
_renderQuad(renderer, cx + radius_int - chunk_size * i, cz + radius_int, chunk_size);
_renderQuad(renderer, cx - radius_ext, cz + radius_int - chunk_size * i, chunk_size);
info = malloc(sizeof(ParallelRasterInfo));
info->renderer = renderer;
info->cx = cx;
info->cz = cz;
info->i = i;
info->radius_int = radius_int;
info->radius_ext = radius_ext;
info->chunk_size = chunk_size;
if (!parallelQueueAddJob(queue, _parallelJobCallback, info))
{
free(info);
}
}
if (radius_int > 20.0 && chunk_count % 64 == 0 && (double)chunk_factor < radius_int / 20.0)
@ -79,4 +125,7 @@ void waterRenderSurface(Renderer* renderer)
radius_int = radius_ext;
radius_ext += chunk_size;
}
parallelQueueWait(queue);
parallelQueueDelete(queue);
}