paysages : Added structure for opencl usage.
git-svn-id: https://subversion.assembla.com/svn/thunderk/paysages@469 b1fd45b6-86a6-48da-8261-f70d1f35bdcc
This commit is contained in:
parent
15fe1cb0e1
commit
49c6c17118
10 changed files with 461 additions and 19 deletions
4
TODO
4
TODO
|
@ -10,6 +10,10 @@ Technology Preview 2 :
|
||||||
=> Apply model to atmosphere (aerial perspective)
|
=> Apply model to atmosphere (aerial perspective)
|
||||||
=> Find a proper model for night sky (maybe Shirley)
|
=> Find a proper model for night sky (maybe Shirley)
|
||||||
- Clouds should keep distance to ground.
|
- Clouds should keep distance to ground.
|
||||||
|
- Restore aerial perspective.
|
||||||
|
- Implement Bruneton's scattering model.
|
||||||
|
- Add clouds to explorer with 3d textures.
|
||||||
|
- Start using OpenCL to optimize rendering.
|
||||||
- Rethink the quality settings and detail smoothing in the distance.
|
- Rethink the quality settings and detail smoothing in the distance.
|
||||||
=> When quality setting is set to 10, add boost options
|
=> When quality setting is set to 10, add boost options
|
||||||
=> Add detail boost (adds granularity)
|
=> Add detail boost (adds granularity)
|
||||||
|
|
|
@ -5,19 +5,28 @@ SOURCES = $(wildcard *.c atmosphere/*.c)
|
||||||
OBJECTS = ${SOURCES:%.c=${OBJPATH}/%.o}
|
OBJECTS = ${SOURCES:%.c=${OBJPATH}/%.o}
|
||||||
HEADERS = $(wildcard shared/*.h atmosphere/*.h *.h)
|
HEADERS = $(wildcard shared/*.h atmosphere/*.h *.h)
|
||||||
RESULT = ${BUILDPATH}/libpaysages.so
|
RESULT = ${BUILDPATH}/libpaysages.so
|
||||||
CC_FLAGS = -Wall -fPIC $(shell pkg-config --cflags glib-2.0 gthread-2.0) -DHAVE_GLIB=1
|
LIBS = glib-2.0 gthread-2.0 IL ILU
|
||||||
CC_LDFLAGS = $(shell pkg-config --libs glib-2.0 gthread-2.0) -lIL -lILU
|
CC_FLAGS = -Wall -fPIC -DHAVE_GLIB=1
|
||||||
|
|
||||||
ifeq ($(BUILDMODE),debug)
|
CHECK_OPENCL = $(shell pkg-config --modversion --silence-errors OpenCL)
|
||||||
|
ifneq (,${CHECK_OPENCL})
|
||||||
|
LIBS += OpenCL
|
||||||
|
CC_FLAGS += -DHAVE_OPENCL=1
|
||||||
|
endif
|
||||||
|
|
||||||
|
CC_FLAGS += $(shell pkg-config --cflags ${LIBS})
|
||||||
|
CC_LDFLAGS = $(shell pkg-config --libs ${LIBS})
|
||||||
|
|
||||||
|
ifeq (${BUILDMODE},debug)
|
||||||
CC_FLAGS += -g -pg
|
CC_FLAGS += -g -pg
|
||||||
CC_LDFLAGS += -g -pg
|
CC_LDFLAGS += -g -pg
|
||||||
endif
|
endif
|
||||||
ifeq ($(BUILDMODE),release)
|
ifeq (${BUILDMODE},release)
|
||||||
CC_FLAGS += -O3 -DNDEBUG -Wno-unused-variable -Wno-unused-but-set-variable
|
CC_FLAGS += -O3 -DNDEBUG -Wno-unused-variable -Wno-unused-but-set-variable
|
||||||
endif
|
endif
|
||||||
|
|
||||||
all:prepare ${RESULT}
|
all:prepare ${RESULT}
|
||||||
|
|
||||||
prepare:
|
prepare:
|
||||||
mkdir -p ${OBJPATH}
|
mkdir -p ${OBJPATH}
|
||||||
mkdir -p ${BUILDPATH}
|
mkdir -p ${BUILDPATH}
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include "scenery.h"
|
#include "scenery.h"
|
||||||
#include "render.h"
|
#include "render.h"
|
||||||
#include "main.h"
|
#include "main.h"
|
||||||
|
#include "opencl.h"
|
||||||
|
|
||||||
#define APP_HEADER 198632.125
|
#define APP_HEADER 198632.125
|
||||||
|
|
||||||
|
@ -18,6 +19,7 @@ void paysagesInit()
|
||||||
CameraDefinition camera;
|
CameraDefinition camera;
|
||||||
|
|
||||||
systemInit();
|
systemInit();
|
||||||
|
openclInit();
|
||||||
|
|
||||||
sceneryInit();
|
sceneryInit();
|
||||||
renderInit();
|
renderInit();
|
||||||
|
@ -50,6 +52,8 @@ void paysagesQuit()
|
||||||
{
|
{
|
||||||
sceneryQuit();
|
sceneryQuit();
|
||||||
renderQuit();
|
renderQuit();
|
||||||
|
|
||||||
|
openclQuit();
|
||||||
}
|
}
|
||||||
|
|
||||||
FileOperationResult paysagesSave(char* filepath)
|
FileOperationResult paysagesSave(char* filepath)
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "noisesimplex.h"
|
#include "noisesimplex.h"
|
||||||
#include "noisenaive.h"
|
#include "noisenaive.h"
|
||||||
#include "noiseperlin.h"
|
#include "noiseperlin.h"
|
||||||
|
#include "opencl.h"
|
||||||
|
|
||||||
#define MAX_LEVEL_COUNT 30
|
#define MAX_LEVEL_COUNT 30
|
||||||
|
|
||||||
|
@ -19,7 +20,7 @@ struct NoiseGenerator
|
||||||
double height_offset;
|
double height_offset;
|
||||||
int level_count;
|
int level_count;
|
||||||
struct NoiseLevel levels[MAX_LEVEL_COUNT];
|
struct NoiseLevel levels[MAX_LEVEL_COUNT];
|
||||||
|
|
||||||
double _max_height;
|
double _max_height;
|
||||||
double (*_func_noise_1d)(double x);
|
double (*_func_noise_1d)(double x);
|
||||||
double (*_func_noise_2d)(double x, double y);
|
double (*_func_noise_2d)(double x, double y);
|
||||||
|
@ -31,7 +32,7 @@ void noiseInit()
|
||||||
noiseSimplexInit();
|
noiseSimplexInit();
|
||||||
noisePerlinInit();
|
noisePerlinInit();
|
||||||
noiseNaiveInit();
|
noiseNaiveInit();
|
||||||
|
|
||||||
/* Noise stats */
|
/* Noise stats */
|
||||||
/*NoiseGenerator* noise;
|
/*NoiseGenerator* noise;
|
||||||
int x;
|
int x;
|
||||||
|
@ -96,14 +97,14 @@ void noiseSaveGenerator(PackStream* stream, NoiseGenerator* generator)
|
||||||
x = (int)generator->function.algorithm;
|
x = (int)generator->function.algorithm;
|
||||||
packWriteInt(stream, &x);
|
packWriteInt(stream, &x);
|
||||||
packWriteDouble(stream, &generator->function.ridge_factor);
|
packWriteDouble(stream, &generator->function.ridge_factor);
|
||||||
|
|
||||||
packWriteDouble(stream, &generator->height_offset);
|
packWriteDouble(stream, &generator->height_offset);
|
||||||
packWriteInt(stream, &generator->level_count);
|
packWriteInt(stream, &generator->level_count);
|
||||||
|
|
||||||
for (x = 0; x < generator->level_count; x++)
|
for (x = 0; x < generator->level_count; x++)
|
||||||
{
|
{
|
||||||
NoiseLevel* level = generator->levels + x;
|
NoiseLevel* level = generator->levels + x;
|
||||||
|
|
||||||
packWriteDouble(stream, &level->scaling);
|
packWriteDouble(stream, &level->scaling);
|
||||||
packWriteDouble(stream, &level->height);
|
packWriteDouble(stream, &level->height);
|
||||||
packWriteDouble(stream, &level->xoffset);
|
packWriteDouble(stream, &level->xoffset);
|
||||||
|
@ -126,14 +127,14 @@ void noiseLoadGenerator(PackStream* stream, NoiseGenerator* generator)
|
||||||
for (x = 0; x < generator->level_count; x++)
|
for (x = 0; x < generator->level_count; x++)
|
||||||
{
|
{
|
||||||
NoiseLevel* level = generator->levels + x;
|
NoiseLevel* level = generator->levels + x;
|
||||||
|
|
||||||
packReadDouble(stream, &level->scaling);
|
packReadDouble(stream, &level->scaling);
|
||||||
packReadDouble(stream, &level->height);
|
packReadDouble(stream, &level->height);
|
||||||
packReadDouble(stream, &level->xoffset);
|
packReadDouble(stream, &level->xoffset);
|
||||||
packReadDouble(stream, &level->yoffset);
|
packReadDouble(stream, &level->yoffset);
|
||||||
packReadDouble(stream, &level->zoffset);
|
packReadDouble(stream, &level->zoffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
noiseValidate(generator);
|
noiseValidate(generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,7 +145,7 @@ void noiseCopy(NoiseGenerator* source, NoiseGenerator* destination)
|
||||||
destination->level_count = source->level_count;
|
destination->level_count = source->level_count;
|
||||||
|
|
||||||
memcpy(destination->levels, source->levels, sizeof(NoiseLevel) * destination->level_count);
|
memcpy(destination->levels, source->levels, sizeof(NoiseLevel) * destination->level_count);
|
||||||
|
|
||||||
noiseValidate(destination);
|
noiseValidate(destination);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,7 +153,7 @@ void noiseValidate(NoiseGenerator* generator)
|
||||||
{
|
{
|
||||||
int x;
|
int x;
|
||||||
double max_height = generator->height_offset;
|
double max_height = generator->height_offset;
|
||||||
|
|
||||||
if (generator->function.algorithm < 0 || generator->function.algorithm > NOISE_FUNCTION_NAIVE)
|
if (generator->function.algorithm < 0 || generator->function.algorithm > NOISE_FUNCTION_NAIVE)
|
||||||
{
|
{
|
||||||
generator->function.algorithm = NOISE_FUNCTION_SIMPLEX;
|
generator->function.algorithm = NOISE_FUNCTION_SIMPLEX;
|
||||||
|
@ -175,7 +176,7 @@ void noiseValidate(NoiseGenerator* generator)
|
||||||
generator->_func_noise_3d = noiseNaiveGet3DValue;
|
generator->_func_noise_3d = noiseNaiveGet3DValue;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (generator->function.ridge_factor > 0.5)
|
if (generator->function.ridge_factor > 0.5)
|
||||||
{
|
{
|
||||||
generator->function.ridge_factor = 0.5;
|
generator->function.ridge_factor = 0.5;
|
||||||
|
|
363
lib_paysages/opencl.c
Normal file
363
lib_paysages/opencl.c
Normal file
|
@ -0,0 +1,363 @@
|
||||||
|
#include "opencl.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <CL/opencl.h>
|
||||||
|
#include "system.h"
|
||||||
|
|
||||||
|
static cl_platform_id _platform;
|
||||||
|
static cl_device_id _device;
|
||||||
|
static cl_context _context = NULL;
|
||||||
|
/* TODO One queue per calling thread ? */
|
||||||
|
static cl_command_queue _queue = NULL;
|
||||||
|
|
||||||
|
static cl_program _noise_program = NULL;
|
||||||
|
static cl_kernel _noise_kernel_simplex2d = NULL;
|
||||||
|
static cl_kernel _noise_kernel_simplex3d = NULL;
|
||||||
|
|
||||||
|
static const char* _getErrorMessage(cl_int err)
|
||||||
|
{
|
||||||
|
switch (err)
|
||||||
|
{
|
||||||
|
case CL_SUCCESS: return "Success!";
|
||||||
|
case CL_DEVICE_NOT_FOUND: return "Device not found.";
|
||||||
|
case CL_DEVICE_NOT_AVAILABLE: return "Device not available";
|
||||||
|
case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available";
|
||||||
|
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure";
|
||||||
|
case CL_OUT_OF_RESOURCES: return "Out of resources";
|
||||||
|
case CL_OUT_OF_HOST_MEMORY: return "Out of host memory";
|
||||||
|
case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available";
|
||||||
|
case CL_MEM_COPY_OVERLAP: return "Memory copy overlap";
|
||||||
|
case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch";
|
||||||
|
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported";
|
||||||
|
case CL_BUILD_PROGRAM_FAILURE: return "Program build failure";
|
||||||
|
case CL_MAP_FAILURE: return "Map failure";
|
||||||
|
case CL_INVALID_VALUE: return "Invalid value";
|
||||||
|
case CL_INVALID_DEVICE_TYPE: return "Invalid device type";
|
||||||
|
case CL_INVALID_PLATFORM: return "Invalid platform";
|
||||||
|
case CL_INVALID_DEVICE: return "Invalid device";
|
||||||
|
case CL_INVALID_CONTEXT: return "Invalid context";
|
||||||
|
case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties";
|
||||||
|
case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue";
|
||||||
|
case CL_INVALID_HOST_PTR: return "Invalid host pointer";
|
||||||
|
case CL_INVALID_MEM_OBJECT: return "Invalid memory object";
|
||||||
|
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor";
|
||||||
|
case CL_INVALID_IMAGE_SIZE: return "Invalid image size";
|
||||||
|
case CL_INVALID_SAMPLER: return "Invalid sampler";
|
||||||
|
case CL_INVALID_BINARY: return "Invalid binary";
|
||||||
|
case CL_INVALID_BUILD_OPTIONS: return "Invalid build options";
|
||||||
|
case CL_INVALID_PROGRAM: return "Invalid program";
|
||||||
|
case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable";
|
||||||
|
case CL_INVALID_KERNEL_NAME: return "Invalid kernel name";
|
||||||
|
case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition";
|
||||||
|
case CL_INVALID_KERNEL: return "Invalid kernel";
|
||||||
|
case CL_INVALID_ARG_INDEX: return "Invalid argument index";
|
||||||
|
case CL_INVALID_ARG_VALUE: return "Invalid argument value";
|
||||||
|
case CL_INVALID_ARG_SIZE: return "Invalid argument size";
|
||||||
|
case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments";
|
||||||
|
case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension";
|
||||||
|
case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size";
|
||||||
|
case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size";
|
||||||
|
case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset";
|
||||||
|
case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list";
|
||||||
|
case CL_INVALID_EVENT: return "Invalid event";
|
||||||
|
case CL_INVALID_OPERATION: return "Invalid operation";
|
||||||
|
case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object";
|
||||||
|
case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size";
|
||||||
|
case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level";
|
||||||
|
default: return "Unknown";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static cl_program _loadProgram(const char* path)
|
||||||
|
{
|
||||||
|
cl_int error = 1;
|
||||||
|
size_t src_size = systemGetFileSize(path);
|
||||||
|
FILE* f = fopen(path, "rb");
|
||||||
|
if (!f)
|
||||||
|
{
|
||||||
|
printf("[OpenCL] Program %s not found !\n", path);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cl_program program = NULL;
|
||||||
|
char* source = malloc(sizeof (char)* src_size);
|
||||||
|
if (fread(source, src_size, 1, f) != 1)
|
||||||
|
{
|
||||||
|
printf("[OpenCL] Error reading program %s\n", path);
|
||||||
|
src_size = 0;
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
|
if (src_size > 0)
|
||||||
|
{
|
||||||
|
program = clCreateProgramWithSource(_context, 1, (const char**)&source, &src_size, &error);
|
||||||
|
if (error)
|
||||||
|
{
|
||||||
|
printf("[OpenCL] Error loading program %s : %s\n", path, _getErrorMessage(error));
|
||||||
|
if (program)
|
||||||
|
{
|
||||||
|
clReleaseProgram(program);
|
||||||
|
program = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(source);
|
||||||
|
|
||||||
|
if (program)
|
||||||
|
{
|
||||||
|
error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
|
||||||
|
if (error)
|
||||||
|
{
|
||||||
|
char* build_log;
|
||||||
|
size_t log_size;
|
||||||
|
|
||||||
|
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
||||||
|
build_log = malloc(sizeof (char)* (log_size + 1));
|
||||||
|
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
|
||||||
|
build_log[log_size] = '\0';
|
||||||
|
printf("[OpenCL] Build error for %s :\n%s\n", path, build_log);
|
||||||
|
free(build_log);
|
||||||
|
|
||||||
|
clReleaseProgram(program);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return program;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static cl_kernel _getkernel(cl_program program, const char* name)
|
||||||
|
{
|
||||||
|
cl_int error;
|
||||||
|
cl_kernel result;
|
||||||
|
|
||||||
|
result = clCreateKernel(program, name, &error);
|
||||||
|
if (!error)
|
||||||
|
{
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("[OpenCL] Error retrieving kernel %s : %s\n", name, _getErrorMessage(error));
|
||||||
|
if (result)
|
||||||
|
{
|
||||||
|
clReleaseKernel(result);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t _roundUp(int group_size, int global_size)
|
||||||
|
{
|
||||||
|
int r = global_size % group_size;
|
||||||
|
if (r == 0)
|
||||||
|
{
|
||||||
|
return global_size;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return global_size + group_size - r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void openclInit()
|
||||||
|
{
|
||||||
|
cl_int error;
|
||||||
|
cl_uint platform_count;
|
||||||
|
|
||||||
|
/* Get platform */
|
||||||
|
error = clGetPlatformIDs(1, &_platform, &platform_count);
|
||||||
|
if (error != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
printf("[OpenCL] Error getting platform id: %s\n", _getErrorMessage(error));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (platform_count != 1)
|
||||||
|
{
|
||||||
|
printf("[OpenCL] No platform available\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/* Get available devices */
|
||||||
|
/* TODO Handle several devices */
|
||||||
|
error = clGetDeviceIDs(_platform, CL_DEVICE_TYPE_GPU, 1, &_device, NULL);
|
||||||
|
if (error != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
printf("[OpenCL] Error getting devices: %s\n", _getErrorMessage(error));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/* Create a context on better device */
|
||||||
|
_context = clCreateContext(0, 1, &_device, NULL, NULL, &error);
|
||||||
|
if (error != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
printf("[OpenCL] Error creating context: %s\n", _getErrorMessage(error));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Preload programs */
|
||||||
|
_noise_program = _loadProgram("opencl/noise.cl");
|
||||||
|
if (_noise_program)
|
||||||
|
{
|
||||||
|
_noise_kernel_simplex2d = _getkernel(_noise_program, "simplex_2d");
|
||||||
|
_noise_kernel_simplex3d = _getkernel(_noise_program, "simplex_3d");
|
||||||
|
if (!_noise_kernel_simplex2d || !_noise_kernel_simplex3d)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create a command queue in this context */
|
||||||
|
_queue = clCreateCommandQueue(_context, _device, 0, &error);
|
||||||
|
if (error != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
printf("[OpenCL] Error creating command queue: %s\n", _getErrorMessage(error));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("OpenCL support is enabled.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int openclAvailable()
|
||||||
|
{
|
||||||
|
return _queue != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void openclQuit()
|
||||||
|
{
|
||||||
|
if (_noise_kernel_simplex2d)
|
||||||
|
{
|
||||||
|
clReleaseKernel(_noise_kernel_simplex2d);
|
||||||
|
}
|
||||||
|
if (_noise_kernel_simplex3d)
|
||||||
|
{
|
||||||
|
clReleaseKernel(_noise_kernel_simplex3d);
|
||||||
|
}
|
||||||
|
if (_noise_program)
|
||||||
|
{
|
||||||
|
clReleaseProgram(_noise_program);
|
||||||
|
}
|
||||||
|
if (_queue)
|
||||||
|
{
|
||||||
|
clReleaseCommandQueue(_queue);
|
||||||
|
}
|
||||||
|
if (_context)
|
||||||
|
{
|
||||||
|
clReleaseContext(_context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void openclTest()
|
||||||
|
{
|
||||||
|
cl_int error;
|
||||||
|
const int size = 100;
|
||||||
|
float src_a_h[size];
|
||||||
|
float src_b_h[size];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
// Initialize both vectors
|
||||||
|
for (i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
src_a_h[i] = src_b_h[i] = (float)i;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int mem_size = sizeof (float)*size;
|
||||||
|
// Allocates a buffer of size mem_size and copies mem_size bytes from src_a_h
|
||||||
|
cl_mem src_a_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_a_h, &error);
|
||||||
|
cl_mem src_b_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_b_h, &error);
|
||||||
|
cl_mem res_d = clCreateBuffer(_context, CL_MEM_WRITE_ONLY, mem_size, NULL, &error);
|
||||||
|
|
||||||
|
// Creates the program
|
||||||
|
cl_program program = _loadProgram("test.cl");
|
||||||
|
if (program)
|
||||||
|
{
|
||||||
|
// Builds the program
|
||||||
|
error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
|
||||||
|
if (error)
|
||||||
|
{
|
||||||
|
// Shows the log
|
||||||
|
char* build_log;
|
||||||
|
size_t log_size;
|
||||||
|
// First call to know the proper size
|
||||||
|
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
||||||
|
build_log = malloc(sizeof (char)* (log_size + 1));
|
||||||
|
// Second call to get the log
|
||||||
|
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
|
||||||
|
build_log[log_size] = '\0';
|
||||||
|
printf("[OPENCL] Build log :\n%s\n", build_log);
|
||||||
|
free(build_log);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Extracting the kernel
|
||||||
|
cl_kernel vector_add_k = clCreateKernel(program, "vector_add_gpu", &error);
|
||||||
|
if (!error)
|
||||||
|
{
|
||||||
|
// Enqueuing parameters
|
||||||
|
// Note that we inform the size of the cl_mem object, not the size of the memory pointed by it
|
||||||
|
error = clSetKernelArg(vector_add_k, 0, sizeof (cl_mem), &src_a_d);
|
||||||
|
error |= clSetKernelArg(vector_add_k, 1, sizeof (cl_mem), &src_b_d);
|
||||||
|
error |= clSetKernelArg(vector_add_k, 2, sizeof (cl_mem), &res_d);
|
||||||
|
error |= clSetKernelArg(vector_add_k, 3, sizeof (size_t), &size);
|
||||||
|
assert(error == CL_SUCCESS);
|
||||||
|
|
||||||
|
// Launching kernel
|
||||||
|
// TODO Get max number of items in device
|
||||||
|
// TODO Lock between the call and the result as we only have one queue
|
||||||
|
const size_t local_ws = 64; // Number of work-items per work-group
|
||||||
|
const size_t global_ws = _roundUp(local_ws, size); // Total number of work-items
|
||||||
|
error = clEnqueueNDRangeKernel(_queue, vector_add_k, 1, NULL, &global_ws, &local_ws, 0, NULL, NULL);
|
||||||
|
if (error)
|
||||||
|
{
|
||||||
|
printf("[OPENCL] Execution error : %s\n", _getErrorMessage(error));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Reading back
|
||||||
|
float check[size];
|
||||||
|
clEnqueueReadBuffer(_queue, res_d, CL_TRUE, 0, mem_size, check, 0, NULL, NULL);
|
||||||
|
|
||||||
|
for (i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
printf("%f %f %f\n", src_a_h[i], src_b_h[i], check[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clReleaseKernel(vector_add_k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clReleaseProgram(program);
|
||||||
|
}
|
||||||
|
|
||||||
|
clReleaseMemObject(src_a_d);
|
||||||
|
clReleaseMemObject(src_b_d);
|
||||||
|
clReleaseMemObject(res_d);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
void openclInit()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
int openclAvailable()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
void openclQuit()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
20
lib_paysages/opencl.h
Normal file
20
lib_paysages/opencl.h
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
#ifndef _PAYSAGES_OPENCL_H_
|
||||||
|
#define _PAYSAGES_OPENCL_H_
|
||||||
|
|
||||||
|
/* OpenCL usage */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "noise.h"
|
||||||
|
|
||||||
|
void openclInit();
|
||||||
|
int openclAvailable();
|
||||||
|
void openclQuit();
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#include "IL/il.h"
|
#include "IL/il.h"
|
||||||
#include "IL/ilu.h"
|
#include "IL/ilu.h"
|
||||||
|
@ -111,16 +112,16 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
|
||||||
|
|
||||||
ilGenImages(1, &image_id);
|
ilGenImages(1, &image_id);
|
||||||
ilBindImage(image_id);
|
ilBindImage(image_id);
|
||||||
|
|
||||||
if (ilLoadImage(filepath))
|
if (ilLoadImage(filepath))
|
||||||
{
|
{
|
||||||
width = ilGetInteger(IL_IMAGE_WIDTH);
|
width = ilGetInteger(IL_IMAGE_WIDTH);
|
||||||
height = ilGetInteger(IL_IMAGE_HEIGHT);
|
height = ilGetInteger(IL_IMAGE_HEIGHT);
|
||||||
callback_start(data, width, height);
|
callback_start(data, width, height);
|
||||||
|
|
||||||
pixels = malloc(sizeof(ILuint) * width * height);
|
pixels = malloc(sizeof(ILuint) * width * height);
|
||||||
ilCopyPixels(0, 0, 0, width, height, 1, IL_RGBA, IL_UNSIGNED_BYTE, pixels);
|
ilCopyPixels(0, 0, 0, width, height, 1, IL_RGBA, IL_UNSIGNED_BYTE, pixels);
|
||||||
|
|
||||||
for (y = 0; y < height; y++)
|
for (y = 0; y < height; y++)
|
||||||
{
|
{
|
||||||
for (x = 0; x < width; x++)
|
for (x = 0; x < width; x++)
|
||||||
|
@ -128,10 +129,10 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
|
||||||
callback_pixel(data, x, y, colorFrom32BitRGBA(pixels[y * width + x]));
|
callback_pixel(data, x, y, colorFrom32BitRGBA(pixels[y * width + x]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
free(pixels);
|
free(pixels);
|
||||||
}
|
}
|
||||||
|
|
||||||
error_count = 0;
|
error_count = 0;
|
||||||
while ((error=ilGetError()) != IL_NO_ERROR)
|
while ((error=ilGetError()) != IL_NO_ERROR)
|
||||||
{
|
{
|
||||||
|
@ -140,3 +141,10 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
|
||||||
}
|
}
|
||||||
return !error_count;
|
return !error_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int systemGetFileSize(const char* path)
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
stat(path, &st);
|
||||||
|
return st.st_size;
|
||||||
|
}
|
||||||
|
|
|
@ -21,6 +21,8 @@ int systemGetCoreCount();
|
||||||
int systemSavePictureFile(const char* filepath, PictureCallbackSavePixel callback_pixel, void* data, int width, int height);
|
int systemSavePictureFile(const char* filepath, PictureCallbackSavePixel callback_pixel, void* data, int width, int height);
|
||||||
int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callback_start, PictureCallbackLoadPixel callback_pixel, void* data);
|
int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callback_start, PictureCallbackLoadPixel callback_pixel, void* data);
|
||||||
|
|
||||||
|
int systemGetFileSize(const char* path);
|
||||||
|
|
||||||
#ifdef HAVE_GLIB
|
#ifdef HAVE_GLIB
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
|
|
||||||
|
|
15
opencl/noise.cl
Normal file
15
opencl/noise.cl
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
__kernel void simplex_2d(__global const int* level_count,
|
||||||
|
__global const double* levels,
|
||||||
|
__global const double* location,
|
||||||
|
__global double* res)
|
||||||
|
{
|
||||||
|
res[0] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void simplex_3d(__global const int* level_count,
|
||||||
|
__global const double* levels,
|
||||||
|
__global const double* location,
|
||||||
|
__global double* res)
|
||||||
|
{
|
||||||
|
res[0] = 0.0;
|
||||||
|
}
|
16
test.cl
Normal file
16
test.cl
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
__kernel void vector_add_gpu (__global const float* src_a,
|
||||||
|
__global const float* src_b,
|
||||||
|
__global float* res,
|
||||||
|
const int num)
|
||||||
|
{
|
||||||
|
/* get_global_id(0) returns the ID of the thread in execution.
|
||||||
|
As many threads are launched at the same time, executing the same kernel,
|
||||||
|
each one will receive a different ID, and consequently perform a different computation.*/
|
||||||
|
const int idx = get_global_id(0);
|
||||||
|
|
||||||
|
/* Now each work-item asks itself: "is my ID inside the vector's range?"
|
||||||
|
If the answer is YES, the work-item performs the corresponding computation*/
|
||||||
|
if (idx < num)
|
||||||
|
res[idx] = src_a[idx] + src_b[idx];
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue