Browse Source

paysages : Added structure for opencl usage.

git-svn-id: https://subversion.assembla.com/svn/thunderk/paysages@469 b1fd45b6-86a6-48da-8261-f70d1f35bdcc
Michaël Lemaire 7 years ago
parent
commit
49c6c17118
10 changed files with 461 additions and 19 deletions
  1. 4 0
      TODO
  2. 14 5
      lib_paysages/Makefile
  3. 4 0
      lib_paysages/main.c
  4. 10 9
      lib_paysages/noise.c
  5. 363 0
      lib_paysages/opencl.c
  6. 20 0
      lib_paysages/opencl.h
  7. 13 5
      lib_paysages/system.c
  8. 2 0
      lib_paysages/system.h
  9. 15 0
      opencl/noise.cl
  10. 16 0
      test.cl

+ 4 - 0
TODO

@@ -10,6 +10,10 @@ Technology Preview 2 :
    => Apply model to atmosphere (aerial perspective)
    => Find a proper model for night sky (maybe Shirley)
 - Clouds should keep distance to ground.
+- Restore aerial perspective.
+- Implement Bruneton's scattering model.
+- Add clouds to explorer with 3d textures.
+- Start using OpenCL to optimize rendering.
 - Rethink the quality settings and detail smoothing in the distance.
    => When quality setting is set to 10, add boost options
    => Add detail boost (adds granularity)

+ 14 - 5
lib_paysages/Makefile

@@ -5,19 +5,28 @@ SOURCES = $(wildcard *.c atmosphere/*.c)
 OBJECTS = ${SOURCES:%.c=${OBJPATH}/%.o}
 HEADERS = $(wildcard shared/*.h atmosphere/*.h *.h)
 RESULT = ${BUILDPATH}/libpaysages.so
-CC_FLAGS = -Wall -fPIC $(shell pkg-config --cflags glib-2.0 gthread-2.0) -DHAVE_GLIB=1
-CC_LDFLAGS = $(shell pkg-config --libs glib-2.0 gthread-2.0) -lIL -lILU
+LIBS = glib-2.0 gthread-2.0 IL ILU
+CC_FLAGS = -Wall -fPIC -DHAVE_GLIB=1
 
-ifeq ($(BUILDMODE),debug)
+CHECK_OPENCL = $(shell pkg-config --modversion --silence-errors OpenCL)
+ifneq (,${CHECK_OPENCL})
+	LIBS += OpenCL
+	CC_FLAGS += -DHAVE_OPENCL=1
+endif
+
+CC_FLAGS += $(shell pkg-config --cflags ${LIBS})
+CC_LDFLAGS = $(shell pkg-config --libs ${LIBS})
+
+ifeq (${BUILDMODE},debug)
 	CC_FLAGS += -g -pg
 	CC_LDFLAGS += -g -pg
 endif
-ifeq ($(BUILDMODE),release)
+ifeq (${BUILDMODE},release)
 	CC_FLAGS += -O3 -DNDEBUG -Wno-unused-variable -Wno-unused-but-set-variable
 endif
 
 all:prepare ${RESULT}
-	
+
 prepare:
 	mkdir -p ${OBJPATH}
 	mkdir -p ${BUILDPATH}

+ 4 - 0
lib_paysages/main.c

@@ -10,6 +10,7 @@
 #include "scenery.h"
 #include "render.h"
 #include "main.h"
+#include "opencl.h"
 
 #define APP_HEADER 198632.125
 
@@ -18,6 +19,7 @@ void paysagesInit()
     CameraDefinition camera;
 
     systemInit();
+    openclInit();
 
     sceneryInit();
     renderInit();
@@ -50,6 +52,8 @@ void paysagesQuit()
 {
     sceneryQuit();
     renderQuit();
+    
+    openclQuit();
 }
 
 FileOperationResult paysagesSave(char* filepath)

+ 10 - 9
lib_paysages/noise.c

@@ -8,6 +8,7 @@
 #include "noisesimplex.h"
 #include "noisenaive.h"
 #include "noiseperlin.h"
+#include "opencl.h"
 
 #define MAX_LEVEL_COUNT 30
 
@@ -19,7 +20,7 @@ struct NoiseGenerator
     double height_offset;
     int level_count;
     struct NoiseLevel levels[MAX_LEVEL_COUNT];
-    
+
     double _max_height;
     double (*_func_noise_1d)(double x);
     double (*_func_noise_2d)(double x, double y);
@@ -31,7 +32,7 @@ void noiseInit()
     noiseSimplexInit();
     noisePerlinInit();
     noiseNaiveInit();
-    
+
     /* Noise stats */
     /*NoiseGenerator* noise;
     int x;
@@ -96,14 +97,14 @@ void noiseSaveGenerator(PackStream* stream, NoiseGenerator* generator)
     x = (int)generator->function.algorithm;
     packWriteInt(stream, &x);
     packWriteDouble(stream, &generator->function.ridge_factor);
-    
+
     packWriteDouble(stream, &generator->height_offset);
     packWriteInt(stream, &generator->level_count);
 
     for (x = 0; x < generator->level_count; x++)
     {
         NoiseLevel* level = generator->levels + x;
-        
+
         packWriteDouble(stream, &level->scaling);
         packWriteDouble(stream, &level->height);
         packWriteDouble(stream, &level->xoffset);
@@ -126,14 +127,14 @@ void noiseLoadGenerator(PackStream* stream, NoiseGenerator* generator)
     for (x = 0; x < generator->level_count; x++)
     {
         NoiseLevel* level = generator->levels + x;
-        
+
         packReadDouble(stream, &level->scaling);
         packReadDouble(stream, &level->height);
         packReadDouble(stream, &level->xoffset);
         packReadDouble(stream, &level->yoffset);
         packReadDouble(stream, &level->zoffset);
     }
-    
+
     noiseValidate(generator);
 }
 
@@ -144,7 +145,7 @@ void noiseCopy(NoiseGenerator* source, NoiseGenerator* destination)
     destination->level_count = source->level_count;
 
     memcpy(destination->levels, source->levels, sizeof(NoiseLevel) * destination->level_count);
-    
+
     noiseValidate(destination);
 }
 
@@ -152,7 +153,7 @@ void noiseValidate(NoiseGenerator* generator)
 {
     int x;
     double max_height = generator->height_offset;
-    
+
     if (generator->function.algorithm < 0 || generator->function.algorithm > NOISE_FUNCTION_NAIVE)
     {
         generator->function.algorithm = NOISE_FUNCTION_SIMPLEX;
@@ -175,7 +176,7 @@ void noiseValidate(NoiseGenerator* generator)
         generator->_func_noise_3d = noiseNaiveGet3DValue;
         break;
     }
-    
+
     if (generator->function.ridge_factor > 0.5)
     {
         generator->function.ridge_factor = 0.5;

+ 363 - 0
lib_paysages/opencl.c

@@ -0,0 +1,363 @@
+#include "opencl.h"
+
+#ifdef HAVE_OPENCL
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <CL/opencl.h>
+#include "system.h"
+
+static cl_platform_id _platform;
+static cl_device_id _device;
+static cl_context _context = NULL;
+/* TODO One queue per calling thread ? */
+static cl_command_queue _queue = NULL;
+
+static cl_program _noise_program = NULL;
+static cl_kernel _noise_kernel_simplex2d = NULL;
+static cl_kernel _noise_kernel_simplex3d = NULL;
+
+static const char* _getErrorMessage(cl_int err)
+{
+    switch (err)
+    {
+        case CL_SUCCESS: return "Success!";
+        case CL_DEVICE_NOT_FOUND: return "Device not found.";
+        case CL_DEVICE_NOT_AVAILABLE: return "Device not available";
+        case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available";
+        case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure";
+        case CL_OUT_OF_RESOURCES: return "Out of resources";
+        case CL_OUT_OF_HOST_MEMORY: return "Out of host memory";
+        case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available";
+        case CL_MEM_COPY_OVERLAP: return "Memory copy overlap";
+        case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch";
+        case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported";
+        case CL_BUILD_PROGRAM_FAILURE: return "Program build failure";
+        case CL_MAP_FAILURE: return "Map failure";
+        case CL_INVALID_VALUE: return "Invalid value";
+        case CL_INVALID_DEVICE_TYPE: return "Invalid device type";
+        case CL_INVALID_PLATFORM: return "Invalid platform";
+        case CL_INVALID_DEVICE: return "Invalid device";
+        case CL_INVALID_CONTEXT: return "Invalid context";
+        case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties";
+        case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue";
+        case CL_INVALID_HOST_PTR: return "Invalid host pointer";
+        case CL_INVALID_MEM_OBJECT: return "Invalid memory object";
+        case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor";
+        case CL_INVALID_IMAGE_SIZE: return "Invalid image size";
+        case CL_INVALID_SAMPLER: return "Invalid sampler";
+        case CL_INVALID_BINARY: return "Invalid binary";
+        case CL_INVALID_BUILD_OPTIONS: return "Invalid build options";
+        case CL_INVALID_PROGRAM: return "Invalid program";
+        case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable";
+        case CL_INVALID_KERNEL_NAME: return "Invalid kernel name";
+        case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition";
+        case CL_INVALID_KERNEL: return "Invalid kernel";
+        case CL_INVALID_ARG_INDEX: return "Invalid argument index";
+        case CL_INVALID_ARG_VALUE: return "Invalid argument value";
+        case CL_INVALID_ARG_SIZE: return "Invalid argument size";
+        case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments";
+        case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension";
+        case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size";
+        case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size";
+        case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset";
+        case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list";
+        case CL_INVALID_EVENT: return "Invalid event";
+        case CL_INVALID_OPERATION: return "Invalid operation";
+        case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object";
+        case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size";
+        case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level";
+        default: return "Unknown";
+    }
+}
+
+static cl_program _loadProgram(const char* path)
+{
+    cl_int error = 1;
+    size_t src_size = systemGetFileSize(path);
+    FILE* f = fopen(path, "rb");
+    if (!f)
+    {
+        printf("[OpenCL] Program %s not found !\n", path);
+        return NULL;
+    }
+    else
+    {
+        cl_program program = NULL;
+        char* source = malloc(sizeof (char)* src_size);
+        if (fread(source, src_size, 1, f) != 1)
+        {
+            printf("[OpenCL] Error reading program %s\n", path);
+            src_size = 0;
+        }
+        fclose(f);
+
+        if (src_size > 0)
+        {
+            program = clCreateProgramWithSource(_context, 1, (const char**)&source, &src_size, &error);
+            if (error)
+            {
+                printf("[OpenCL] Error loading program %s : %s\n", path, _getErrorMessage(error));
+                if (program)
+                {
+                    clReleaseProgram(program);
+                    program = NULL;
+                }
+            }
+        }
+        free(source);
+
+        if (program)
+        {
+            error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
+            if (error)
+            {
+                char* build_log;
+                size_t log_size;
+
+                clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
+                build_log = malloc(sizeof (char)* (log_size + 1));
+                clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
+                build_log[log_size] = '\0';
+                printf("[OpenCL] Build error for %s :\n%s\n", path, build_log);
+                free(build_log);
+
+                clReleaseProgram(program);
+                return NULL;
+            }
+            else
+            {
+                return program;
+            }
+        }
+        else
+        {
+            return NULL;
+        }
+    }
+}
+
+static cl_kernel _getkernel(cl_program program, const char* name)
+{
+    cl_int error;
+    cl_kernel result;
+
+    result = clCreateKernel(program, name, &error);
+    if (!error)
+    {
+        return result;
+    }
+    else
+    {
+        printf("[OpenCL] Error retrieving kernel %s : %s\n", name, _getErrorMessage(error));
+        if (result)
+        {
+            clReleaseKernel(result);
+        }
+        return NULL;
+    }
+}
+
+static size_t _roundUp(int group_size, int global_size)
+{
+    int r = global_size % group_size;
+    if (r == 0)
+    {
+        return global_size;
+    }
+    else
+    {
+        return global_size + group_size - r;
+    }
+}
+
+
+void openclInit()
+{
+    cl_int error;
+    cl_uint platform_count;
+
+    /* Get platform */
+    error = clGetPlatformIDs(1, &_platform, &platform_count);
+    if (error != CL_SUCCESS)
+    {
+        printf("[OpenCL] Error getting platform id: %s\n", _getErrorMessage(error));
+        return;
+    }
+    else if (platform_count != 1)
+    {
+        printf("[OpenCL] No platform available\n");
+        return;
+    }
+    /* Get available devices */
+    /* TODO Handle several devices */
+    error = clGetDeviceIDs(_platform, CL_DEVICE_TYPE_GPU, 1, &_device, NULL);
+    if (error != CL_SUCCESS)
+    {
+        printf("[OpenCL] Error getting devices: %s\n", _getErrorMessage(error));
+        return;
+    }
+    /* Create a context on better device */
+    _context = clCreateContext(0, 1, &_device, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+        printf("[OpenCL] Error creating context: %s\n", _getErrorMessage(error));
+        return;
+    }
+
+    /* Preload programs */
+    _noise_program = _loadProgram("opencl/noise.cl");
+    if (_noise_program)
+    {
+        _noise_kernel_simplex2d = _getkernel(_noise_program, "simplex_2d");
+        _noise_kernel_simplex3d = _getkernel(_noise_program, "simplex_3d");
+        if (!_noise_kernel_simplex2d || !_noise_kernel_simplex3d)
+        {
+            return;
+        }
+    }
+    else
+    {
+        return;
+    }
+
+    /* Create a command queue in this context */
+    _queue = clCreateCommandQueue(_context, _device, 0, &error);
+    if (error != CL_SUCCESS)
+    {
+        printf("[OpenCL] Error creating command queue: %s\n", _getErrorMessage(error));
+        return;
+    }
+
+    printf("OpenCL support is enabled.\n");
+}
+
+int openclAvailable()
+{
+    return _queue != NULL;
+}
+
+void openclQuit()
+{
+    if (_noise_kernel_simplex2d)
+    {
+        clReleaseKernel(_noise_kernel_simplex2d);
+    }
+    if (_noise_kernel_simplex3d)
+    {
+        clReleaseKernel(_noise_kernel_simplex3d);
+    }
+    if (_noise_program)
+    {
+        clReleaseProgram(_noise_program);
+    }
+    if (_queue)
+    {
+        clReleaseCommandQueue(_queue);
+    }
+    if (_context)
+    {
+        clReleaseContext(_context);
+    }
+}
+
+void openclTest()
+{
+    cl_int error;
+    const int size = 100;
+    float src_a_h[size];
+    float src_b_h[size];
+    int i;
+
+    // Initialize both vectors
+    for (i = 0; i < size; i++)
+    {
+        src_a_h[i] = src_b_h[i] = (float)i;
+    }
+
+    const int mem_size = sizeof (float)*size;
+    // Allocates a buffer of size mem_size and copies mem_size bytes from src_a_h
+    cl_mem src_a_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_a_h, &error);
+    cl_mem src_b_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_b_h, &error);
+    cl_mem res_d = clCreateBuffer(_context, CL_MEM_WRITE_ONLY, mem_size, NULL, &error);
+
+    // Creates the program
+    cl_program program = _loadProgram("test.cl");
+    if (program)
+    {
+        // Builds the program
+        error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
+        if (error)
+        {
+            // Shows the log
+            char* build_log;
+            size_t log_size;
+            // First call to know the proper size
+            clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
+            build_log = malloc(sizeof (char)* (log_size + 1));
+            // Second call to get the log
+            clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
+            build_log[log_size] = '\0';
+            printf("[OPENCL] Build log :\n%s\n", build_log);
+            free(build_log);
+        }
+        else
+        {
+            // Extracting the kernel
+            cl_kernel vector_add_k = clCreateKernel(program, "vector_add_gpu", &error);
+            if (!error)
+            {
+                // Enqueuing parameters
+                // Note that we inform the size of the cl_mem object, not the size of the memory pointed by it
+                error = clSetKernelArg(vector_add_k, 0, sizeof (cl_mem), &src_a_d);
+                error |= clSetKernelArg(vector_add_k, 1, sizeof (cl_mem), &src_b_d);
+                error |= clSetKernelArg(vector_add_k, 2, sizeof (cl_mem), &res_d);
+                error |= clSetKernelArg(vector_add_k, 3, sizeof (size_t), &size);
+                assert(error == CL_SUCCESS);
+
+                // Launching kernel
+                // TODO Get max number of items in device
+                // TODO Lock between the call and the result as we only have one queue
+                const size_t local_ws = 64; // Number of work-items per work-group
+                const size_t global_ws = _roundUp(local_ws, size); // Total number of work-items
+                error = clEnqueueNDRangeKernel(_queue, vector_add_k, 1, NULL, &global_ws, &local_ws, 0, NULL, NULL);
+                if (error)
+                {
+                    printf("[OPENCL] Execution error : %s\n", _getErrorMessage(error));
+                }
+                else
+                {
+                    // Reading back
+                    float check[size];
+                    clEnqueueReadBuffer(_queue, res_d, CL_TRUE, 0, mem_size, check, 0, NULL, NULL);
+
+                    for (i = 0; i < size; i++)
+                    {
+                        printf("%f %f %f\n", src_a_h[i], src_b_h[i], check[i]);
+                    }
+                }
+
+                clReleaseKernel(vector_add_k);
+            }
+        }
+
+        clReleaseProgram(program);
+    }
+
+    clReleaseMemObject(src_a_d);
+    clReleaseMemObject(src_b_d);
+    clReleaseMemObject(res_d);
+}
+#else
+void openclInit()
+{
+}
+int openclAvailable()
+{
+    return 0;
+}
+void openclQuit()
+{
+}
+#endif

+ 20 - 0
lib_paysages/opencl.h

@@ -0,0 +1,20 @@
+#ifndef _PAYSAGES_OPENCL_H_
+#define _PAYSAGES_OPENCL_H_
+
+/* OpenCL usage */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "noise.h"
+
+void openclInit();
+int openclAvailable();
+void openclQuit();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

+ 13 - 5
lib_paysages/system.c

@@ -2,6 +2,7 @@
 
 #include <unistd.h>
 #include <stdlib.h>
+#include <sys/stat.h>
 
 #include "IL/il.h"
 #include "IL/ilu.h"
@@ -111,16 +112,16 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
 
     ilGenImages(1, &image_id);
     ilBindImage(image_id);
-    
+
     if (ilLoadImage(filepath))
     {
         width = ilGetInteger(IL_IMAGE_WIDTH);
         height = ilGetInteger(IL_IMAGE_HEIGHT);
         callback_start(data, width, height);
-        
+
         pixels = malloc(sizeof(ILuint) * width * height);
         ilCopyPixels(0, 0, 0, width, height, 1, IL_RGBA, IL_UNSIGNED_BYTE, pixels);
-        
+
         for (y = 0; y < height; y++)
         {
             for (x = 0; x < width; x++)
@@ -128,10 +129,10 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
                 callback_pixel(data, x, y, colorFrom32BitRGBA(pixels[y * width + x]));
             }
         }
-        
+
         free(pixels);
     }
-    
+
     error_count = 0;
     while ((error=ilGetError()) != IL_NO_ERROR)
     {
@@ -140,3 +141,10 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
     }
     return !error_count;
 }
+
+int systemGetFileSize(const char* path)
+{
+    struct stat st;
+    stat(path, &st);
+    return st.st_size;
+}

+ 2 - 0
lib_paysages/system.h

@@ -21,6 +21,8 @@ int systemGetCoreCount();
 int systemSavePictureFile(const char* filepath, PictureCallbackSavePixel callback_pixel, void* data, int width, int height);
 int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callback_start, PictureCallbackLoadPixel callback_pixel, void* data);
 
+int systemGetFileSize(const char* path);
+
 #ifdef HAVE_GLIB
 #include <glib.h>
 

+ 15 - 0
opencl/noise.cl

@@ -0,0 +1,15 @@
+__kernel void simplex_2d(__global const int* level_count,
+                    __global const double* levels,
+                    __global const double* location,
+                    __global double* res)
+{
+    res[0] = 0.0;
+}
+
+__kernel void simplex_3d(__global const int* level_count,
+                    __global const double* levels,
+                    __global const double* location,
+                    __global double* res)
+{
+    res[0] = 0.0;
+}

+ 16 - 0
test.cl

@@ -0,0 +1,16 @@
+__kernel void vector_add_gpu (__global const float* src_a,
+                     __global const float* src_b,
+                     __global float* res,
+           const int num)
+{
+   /* get_global_id(0) returns the ID of the thread in execution.
+   As many threads are launched at the same time, executing the same kernel,
+   each one will receive a different ID, and consequently perform a different computation.*/
+   const int idx = get_global_id(0);
+
+   /* Now each work-item asks itself: "is my ID inside the vector's range?"
+   If the answer is YES, the work-item performs the corresponding computation*/
+   if (idx < num)
+      res[idx] = src_a[idx] + src_b[idx];
+}
+