diff --git a/TODO b/TODO index 1af76b4..c089896 100644 --- a/TODO +++ b/TODO @@ -10,6 +10,10 @@ Technology Preview 2 : => Apply model to atmosphere (aerial perspective) => Find a proper model for night sky (maybe Shirley) - Clouds should keep distance to ground. +- Restore aerial perspective. +- Implement Bruneton's scattering model. +- Add clouds to explorer with 3d textures. +- Start using OpenCL to optimize rendering. - Rethink the quality settings and detail smoothing in the distance. => When quality setting is set to 10, add boost options => Add detail boost (adds granularity) diff --git a/lib_paysages/Makefile b/lib_paysages/Makefile index 854b8da..c6cde54 100644 --- a/lib_paysages/Makefile +++ b/lib_paysages/Makefile @@ -5,19 +5,28 @@ SOURCES = $(wildcard *.c atmosphere/*.c) OBJECTS = ${SOURCES:%.c=${OBJPATH}/%.o} HEADERS = $(wildcard shared/*.h atmosphere/*.h *.h) RESULT = ${BUILDPATH}/libpaysages.so -CC_FLAGS = -Wall -fPIC $(shell pkg-config --cflags glib-2.0 gthread-2.0) -DHAVE_GLIB=1 -CC_LDFLAGS = $(shell pkg-config --libs glib-2.0 gthread-2.0) -lIL -lILU +LIBS = glib-2.0 gthread-2.0 IL ILU +CC_FLAGS = -Wall -fPIC -DHAVE_GLIB=1 -ifeq ($(BUILDMODE),debug) +CHECK_OPENCL = $(shell pkg-config --modversion --silence-errors OpenCL) +ifneq (,${CHECK_OPENCL}) + LIBS += OpenCL + CC_FLAGS += -DHAVE_OPENCL=1 +endif + +CC_FLAGS += $(shell pkg-config --cflags ${LIBS}) +CC_LDFLAGS = $(shell pkg-config --libs ${LIBS}) + +ifeq (${BUILDMODE},debug) CC_FLAGS += -g -pg CC_LDFLAGS += -g -pg endif -ifeq ($(BUILDMODE),release) +ifeq (${BUILDMODE},release) CC_FLAGS += -O3 -DNDEBUG -Wno-unused-variable -Wno-unused-but-set-variable endif all:prepare ${RESULT} - + prepare: mkdir -p ${OBJPATH} mkdir -p ${BUILDPATH} diff --git a/lib_paysages/main.c b/lib_paysages/main.c index f866962..a3440ec 100644 --- a/lib_paysages/main.c +++ b/lib_paysages/main.c @@ -10,6 +10,7 @@ #include "scenery.h" #include "render.h" #include "main.h" +#include "opencl.h" #define APP_HEADER 198632.125 @@ -18,6 +19,7 @@ void paysagesInit() CameraDefinition camera; systemInit(); + openclInit(); sceneryInit(); renderInit(); @@ -50,6 +52,8 @@ void paysagesQuit() { sceneryQuit(); renderQuit(); + + openclQuit(); } FileOperationResult paysagesSave(char* filepath) diff --git a/lib_paysages/noise.c b/lib_paysages/noise.c index 9ab950a..db47b40 100644 --- a/lib_paysages/noise.c +++ b/lib_paysages/noise.c @@ -8,6 +8,7 @@ #include "noisesimplex.h" #include "noisenaive.h" #include "noiseperlin.h" +#include "opencl.h" #define MAX_LEVEL_COUNT 30 @@ -19,7 +20,7 @@ struct NoiseGenerator double height_offset; int level_count; struct NoiseLevel levels[MAX_LEVEL_COUNT]; - + double _max_height; double (*_func_noise_1d)(double x); double (*_func_noise_2d)(double x, double y); @@ -31,7 +32,7 @@ void noiseInit() noiseSimplexInit(); noisePerlinInit(); noiseNaiveInit(); - + /* Noise stats */ /*NoiseGenerator* noise; int x; @@ -96,14 +97,14 @@ void noiseSaveGenerator(PackStream* stream, NoiseGenerator* generator) x = (int)generator->function.algorithm; packWriteInt(stream, &x); packWriteDouble(stream, &generator->function.ridge_factor); - + packWriteDouble(stream, &generator->height_offset); packWriteInt(stream, &generator->level_count); for (x = 0; x < generator->level_count; x++) { NoiseLevel* level = generator->levels + x; - + packWriteDouble(stream, &level->scaling); packWriteDouble(stream, &level->height); packWriteDouble(stream, &level->xoffset); @@ -126,14 +127,14 @@ void noiseLoadGenerator(PackStream* stream, NoiseGenerator* generator) for (x = 0; x < generator->level_count; x++) { NoiseLevel* level = generator->levels + x; - + packReadDouble(stream, &level->scaling); packReadDouble(stream, &level->height); packReadDouble(stream, &level->xoffset); packReadDouble(stream, &level->yoffset); packReadDouble(stream, &level->zoffset); } - + noiseValidate(generator); } @@ -144,7 +145,7 @@ void noiseCopy(NoiseGenerator* source, NoiseGenerator* destination) destination->level_count = source->level_count; memcpy(destination->levels, source->levels, sizeof(NoiseLevel) * destination->level_count); - + noiseValidate(destination); } @@ -152,7 +153,7 @@ void noiseValidate(NoiseGenerator* generator) { int x; double max_height = generator->height_offset; - + if (generator->function.algorithm < 0 || generator->function.algorithm > NOISE_FUNCTION_NAIVE) { generator->function.algorithm = NOISE_FUNCTION_SIMPLEX; @@ -175,7 +176,7 @@ void noiseValidate(NoiseGenerator* generator) generator->_func_noise_3d = noiseNaiveGet3DValue; break; } - + if (generator->function.ridge_factor > 0.5) { generator->function.ridge_factor = 0.5; diff --git a/lib_paysages/opencl.c b/lib_paysages/opencl.c new file mode 100644 index 0000000..79dae2f --- /dev/null +++ b/lib_paysages/opencl.c @@ -0,0 +1,363 @@ +#include "opencl.h" + +#ifdef HAVE_OPENCL + +#include +#include +#include +#include +#include "system.h" + +static cl_platform_id _platform; +static cl_device_id _device; +static cl_context _context = NULL; +/* TODO One queue per calling thread ? */ +static cl_command_queue _queue = NULL; + +static cl_program _noise_program = NULL; +static cl_kernel _noise_kernel_simplex2d = NULL; +static cl_kernel _noise_kernel_simplex3d = NULL; + +static const char* _getErrorMessage(cl_int err) +{ + switch (err) + { + case CL_SUCCESS: return "Success!"; + case CL_DEVICE_NOT_FOUND: return "Device not found."; + case CL_DEVICE_NOT_AVAILABLE: return "Device not available"; + case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure"; + case CL_OUT_OF_RESOURCES: return "Out of resources"; + case CL_OUT_OF_HOST_MEMORY: return "Out of host memory"; + case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available"; + case CL_MEM_COPY_OVERLAP: return "Memory copy overlap"; + case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported"; + case CL_BUILD_PROGRAM_FAILURE: return "Program build failure"; + case CL_MAP_FAILURE: return "Map failure"; + case CL_INVALID_VALUE: return "Invalid value"; + case CL_INVALID_DEVICE_TYPE: return "Invalid device type"; + case CL_INVALID_PLATFORM: return "Invalid platform"; + case CL_INVALID_DEVICE: return "Invalid device"; + case CL_INVALID_CONTEXT: return "Invalid context"; + case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties"; + case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue"; + case CL_INVALID_HOST_PTR: return "Invalid host pointer"; + case CL_INVALID_MEM_OBJECT: return "Invalid memory object"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor"; + case CL_INVALID_IMAGE_SIZE: return "Invalid image size"; + case CL_INVALID_SAMPLER: return "Invalid sampler"; + case CL_INVALID_BINARY: return "Invalid binary"; + case CL_INVALID_BUILD_OPTIONS: return "Invalid build options"; + case CL_INVALID_PROGRAM: return "Invalid program"; + case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable"; + case CL_INVALID_KERNEL_NAME: return "Invalid kernel name"; + case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition"; + case CL_INVALID_KERNEL: return "Invalid kernel"; + case CL_INVALID_ARG_INDEX: return "Invalid argument index"; + case CL_INVALID_ARG_VALUE: return "Invalid argument value"; + case CL_INVALID_ARG_SIZE: return "Invalid argument size"; + case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments"; + case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension"; + case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size"; + case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size"; + case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset"; + case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list"; + case CL_INVALID_EVENT: return "Invalid event"; + case CL_INVALID_OPERATION: return "Invalid operation"; + case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object"; + case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size"; + case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level"; + default: return "Unknown"; + } +} + +static cl_program _loadProgram(const char* path) +{ + cl_int error = 1; + size_t src_size = systemGetFileSize(path); + FILE* f = fopen(path, "rb"); + if (!f) + { + printf("[OpenCL] Program %s not found !\n", path); + return NULL; + } + else + { + cl_program program = NULL; + char* source = malloc(sizeof (char)* src_size); + if (fread(source, src_size, 1, f) != 1) + { + printf("[OpenCL] Error reading program %s\n", path); + src_size = 0; + } + fclose(f); + + if (src_size > 0) + { + program = clCreateProgramWithSource(_context, 1, (const char**)&source, &src_size, &error); + if (error) + { + printf("[OpenCL] Error loading program %s : %s\n", path, _getErrorMessage(error)); + if (program) + { + clReleaseProgram(program); + program = NULL; + } + } + } + free(source); + + if (program) + { + error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL); + if (error) + { + char* build_log; + size_t log_size; + + clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + build_log = malloc(sizeof (char)* (log_size + 1)); + clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL); + build_log[log_size] = '\0'; + printf("[OpenCL] Build error for %s :\n%s\n", path, build_log); + free(build_log); + + clReleaseProgram(program); + return NULL; + } + else + { + return program; + } + } + else + { + return NULL; + } + } +} + +static cl_kernel _getkernel(cl_program program, const char* name) +{ + cl_int error; + cl_kernel result; + + result = clCreateKernel(program, name, &error); + if (!error) + { + return result; + } + else + { + printf("[OpenCL] Error retrieving kernel %s : %s\n", name, _getErrorMessage(error)); + if (result) + { + clReleaseKernel(result); + } + return NULL; + } +} + +static size_t _roundUp(int group_size, int global_size) +{ + int r = global_size % group_size; + if (r == 0) + { + return global_size; + } + else + { + return global_size + group_size - r; + } +} + + +void openclInit() +{ + cl_int error; + cl_uint platform_count; + + /* Get platform */ + error = clGetPlatformIDs(1, &_platform, &platform_count); + if (error != CL_SUCCESS) + { + printf("[OpenCL] Error getting platform id: %s\n", _getErrorMessage(error)); + return; + } + else if (platform_count != 1) + { + printf("[OpenCL] No platform available\n"); + return; + } + /* Get available devices */ + /* TODO Handle several devices */ + error = clGetDeviceIDs(_platform, CL_DEVICE_TYPE_GPU, 1, &_device, NULL); + if (error != CL_SUCCESS) + { + printf("[OpenCL] Error getting devices: %s\n", _getErrorMessage(error)); + return; + } + /* Create a context on better device */ + _context = clCreateContext(0, 1, &_device, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + printf("[OpenCL] Error creating context: %s\n", _getErrorMessage(error)); + return; + } + + /* Preload programs */ + _noise_program = _loadProgram("opencl/noise.cl"); + if (_noise_program) + { + _noise_kernel_simplex2d = _getkernel(_noise_program, "simplex_2d"); + _noise_kernel_simplex3d = _getkernel(_noise_program, "simplex_3d"); + if (!_noise_kernel_simplex2d || !_noise_kernel_simplex3d) + { + return; + } + } + else + { + return; + } + + /* Create a command queue in this context */ + _queue = clCreateCommandQueue(_context, _device, 0, &error); + if (error != CL_SUCCESS) + { + printf("[OpenCL] Error creating command queue: %s\n", _getErrorMessage(error)); + return; + } + + printf("OpenCL support is enabled.\n"); +} + +int openclAvailable() +{ + return _queue != NULL; +} + +void openclQuit() +{ + if (_noise_kernel_simplex2d) + { + clReleaseKernel(_noise_kernel_simplex2d); + } + if (_noise_kernel_simplex3d) + { + clReleaseKernel(_noise_kernel_simplex3d); + } + if (_noise_program) + { + clReleaseProgram(_noise_program); + } + if (_queue) + { + clReleaseCommandQueue(_queue); + } + if (_context) + { + clReleaseContext(_context); + } +} + +void openclTest() +{ + cl_int error; + const int size = 100; + float src_a_h[size]; + float src_b_h[size]; + int i; + + // Initialize both vectors + for (i = 0; i < size; i++) + { + src_a_h[i] = src_b_h[i] = (float)i; + } + + const int mem_size = sizeof (float)*size; + // Allocates a buffer of size mem_size and copies mem_size bytes from src_a_h + cl_mem src_a_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_a_h, &error); + cl_mem src_b_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_b_h, &error); + cl_mem res_d = clCreateBuffer(_context, CL_MEM_WRITE_ONLY, mem_size, NULL, &error); + + // Creates the program + cl_program program = _loadProgram("test.cl"); + if (program) + { + // Builds the program + error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL); + if (error) + { + // Shows the log + char* build_log; + size_t log_size; + // First call to know the proper size + clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + build_log = malloc(sizeof (char)* (log_size + 1)); + // Second call to get the log + clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL); + build_log[log_size] = '\0'; + printf("[OPENCL] Build log :\n%s\n", build_log); + free(build_log); + } + else + { + // Extracting the kernel + cl_kernel vector_add_k = clCreateKernel(program, "vector_add_gpu", &error); + if (!error) + { + // Enqueuing parameters + // Note that we inform the size of the cl_mem object, not the size of the memory pointed by it + error = clSetKernelArg(vector_add_k, 0, sizeof (cl_mem), &src_a_d); + error |= clSetKernelArg(vector_add_k, 1, sizeof (cl_mem), &src_b_d); + error |= clSetKernelArg(vector_add_k, 2, sizeof (cl_mem), &res_d); + error |= clSetKernelArg(vector_add_k, 3, sizeof (size_t), &size); + assert(error == CL_SUCCESS); + + // Launching kernel + // TODO Get max number of items in device + // TODO Lock between the call and the result as we only have one queue + const size_t local_ws = 64; // Number of work-items per work-group + const size_t global_ws = _roundUp(local_ws, size); // Total number of work-items + error = clEnqueueNDRangeKernel(_queue, vector_add_k, 1, NULL, &global_ws, &local_ws, 0, NULL, NULL); + if (error) + { + printf("[OPENCL] Execution error : %s\n", _getErrorMessage(error)); + } + else + { + // Reading back + float check[size]; + clEnqueueReadBuffer(_queue, res_d, CL_TRUE, 0, mem_size, check, 0, NULL, NULL); + + for (i = 0; i < size; i++) + { + printf("%f %f %f\n", src_a_h[i], src_b_h[i], check[i]); + } + } + + clReleaseKernel(vector_add_k); + } + } + + clReleaseProgram(program); + } + + clReleaseMemObject(src_a_d); + clReleaseMemObject(src_b_d); + clReleaseMemObject(res_d); +} +#else +void openclInit() +{ +} +int openclAvailable() +{ + return 0; +} +void openclQuit() +{ +} +#endif diff --git a/lib_paysages/opencl.h b/lib_paysages/opencl.h new file mode 100644 index 0000000..1bd97f4 --- /dev/null +++ b/lib_paysages/opencl.h @@ -0,0 +1,20 @@ +#ifndef _PAYSAGES_OPENCL_H_ +#define _PAYSAGES_OPENCL_H_ + +/* OpenCL usage */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "noise.h" + +void openclInit(); +int openclAvailable(); +void openclQuit(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib_paysages/system.c b/lib_paysages/system.c index 83755c2..03b8314 100644 --- a/lib_paysages/system.c +++ b/lib_paysages/system.c @@ -2,6 +2,7 @@ #include #include +#include #include "IL/il.h" #include "IL/ilu.h" @@ -111,16 +112,16 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb ilGenImages(1, &image_id); ilBindImage(image_id); - + if (ilLoadImage(filepath)) { width = ilGetInteger(IL_IMAGE_WIDTH); height = ilGetInteger(IL_IMAGE_HEIGHT); callback_start(data, width, height); - + pixels = malloc(sizeof(ILuint) * width * height); ilCopyPixels(0, 0, 0, width, height, 1, IL_RGBA, IL_UNSIGNED_BYTE, pixels); - + for (y = 0; y < height; y++) { for (x = 0; x < width; x++) @@ -128,10 +129,10 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb callback_pixel(data, x, y, colorFrom32BitRGBA(pixels[y * width + x])); } } - + free(pixels); } - + error_count = 0; while ((error=ilGetError()) != IL_NO_ERROR) { @@ -140,3 +141,10 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb } return !error_count; } + +int systemGetFileSize(const char* path) +{ + struct stat st; + stat(path, &st); + return st.st_size; +} diff --git a/lib_paysages/system.h b/lib_paysages/system.h index c87b528..31cb90c 100644 --- a/lib_paysages/system.h +++ b/lib_paysages/system.h @@ -21,6 +21,8 @@ int systemGetCoreCount(); int systemSavePictureFile(const char* filepath, PictureCallbackSavePixel callback_pixel, void* data, int width, int height); int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callback_start, PictureCallbackLoadPixel callback_pixel, void* data); +int systemGetFileSize(const char* path); + #ifdef HAVE_GLIB #include diff --git a/opencl/noise.cl b/opencl/noise.cl new file mode 100644 index 0000000..f7c4087 --- /dev/null +++ b/opencl/noise.cl @@ -0,0 +1,15 @@ +__kernel void simplex_2d(__global const int* level_count, + __global const double* levels, + __global const double* location, + __global double* res) +{ + res[0] = 0.0; +} + +__kernel void simplex_3d(__global const int* level_count, + __global const double* levels, + __global const double* location, + __global double* res) +{ + res[0] = 0.0; +} diff --git a/test.cl b/test.cl new file mode 100644 index 0000000..c0ead57 --- /dev/null +++ b/test.cl @@ -0,0 +1,16 @@ +__kernel void vector_add_gpu (__global const float* src_a, + __global const float* src_b, + __global float* res, + const int num) +{ + /* get_global_id(0) returns the ID of the thread in execution. + As many threads are launched at the same time, executing the same kernel, + each one will receive a different ID, and consequently perform a different computation.*/ + const int idx = get_global_id(0); + + /* Now each work-item asks itself: "is my ID inside the vector's range?" + If the answer is YES, the work-item performs the corresponding computation*/ + if (idx < num) + res[idx] = src_a[idx] + src_b[idx]; +} +