paysages : Added structure for opencl usage.
git-svn-id: https://subversion.assembla.com/svn/thunderk/paysages@469 b1fd45b6-86a6-48da-8261-f70d1f35bdcc
This commit is contained in:
parent
15fe1cb0e1
commit
49c6c17118
10 changed files with 461 additions and 19 deletions
4
TODO
4
TODO
|
@ -10,6 +10,10 @@ Technology Preview 2 :
|
|||
=> Apply model to atmosphere (aerial perspective)
|
||||
=> Find a proper model for night sky (maybe Shirley)
|
||||
- Clouds should keep distance to ground.
|
||||
- Restore aerial perspective.
|
||||
- Implement Bruneton's scattering model.
|
||||
- Add clouds to explorer with 3d textures.
|
||||
- Start using OpenCL to optimize rendering.
|
||||
- Rethink the quality settings and detail smoothing in the distance.
|
||||
=> When quality setting is set to 10, add boost options
|
||||
=> Add detail boost (adds granularity)
|
||||
|
|
|
@ -5,14 +5,23 @@ SOURCES = $(wildcard *.c atmosphere/*.c)
|
|||
OBJECTS = ${SOURCES:%.c=${OBJPATH}/%.o}
|
||||
HEADERS = $(wildcard shared/*.h atmosphere/*.h *.h)
|
||||
RESULT = ${BUILDPATH}/libpaysages.so
|
||||
CC_FLAGS = -Wall -fPIC $(shell pkg-config --cflags glib-2.0 gthread-2.0) -DHAVE_GLIB=1
|
||||
CC_LDFLAGS = $(shell pkg-config --libs glib-2.0 gthread-2.0) -lIL -lILU
|
||||
LIBS = glib-2.0 gthread-2.0 IL ILU
|
||||
CC_FLAGS = -Wall -fPIC -DHAVE_GLIB=1
|
||||
|
||||
ifeq ($(BUILDMODE),debug)
|
||||
CHECK_OPENCL = $(shell pkg-config --modversion --silence-errors OpenCL)
|
||||
ifneq (,${CHECK_OPENCL})
|
||||
LIBS += OpenCL
|
||||
CC_FLAGS += -DHAVE_OPENCL=1
|
||||
endif
|
||||
|
||||
CC_FLAGS += $(shell pkg-config --cflags ${LIBS})
|
||||
CC_LDFLAGS = $(shell pkg-config --libs ${LIBS})
|
||||
|
||||
ifeq (${BUILDMODE},debug)
|
||||
CC_FLAGS += -g -pg
|
||||
CC_LDFLAGS += -g -pg
|
||||
endif
|
||||
ifeq ($(BUILDMODE),release)
|
||||
ifeq (${BUILDMODE},release)
|
||||
CC_FLAGS += -O3 -DNDEBUG -Wno-unused-variable -Wno-unused-but-set-variable
|
||||
endif
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "scenery.h"
|
||||
#include "render.h"
|
||||
#include "main.h"
|
||||
#include "opencl.h"
|
||||
|
||||
#define APP_HEADER 198632.125
|
||||
|
||||
|
@ -18,6 +19,7 @@ void paysagesInit()
|
|||
CameraDefinition camera;
|
||||
|
||||
systemInit();
|
||||
openclInit();
|
||||
|
||||
sceneryInit();
|
||||
renderInit();
|
||||
|
@ -50,6 +52,8 @@ void paysagesQuit()
|
|||
{
|
||||
sceneryQuit();
|
||||
renderQuit();
|
||||
|
||||
openclQuit();
|
||||
}
|
||||
|
||||
FileOperationResult paysagesSave(char* filepath)
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "noisesimplex.h"
|
||||
#include "noisenaive.h"
|
||||
#include "noiseperlin.h"
|
||||
#include "opencl.h"
|
||||
|
||||
#define MAX_LEVEL_COUNT 30
|
||||
|
||||
|
|
363
lib_paysages/opencl.c
Normal file
363
lib_paysages/opencl.c
Normal file
|
@ -0,0 +1,363 @@
|
|||
#include "opencl.h"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <CL/opencl.h>
|
||||
#include "system.h"
|
||||
|
||||
static cl_platform_id _platform;
|
||||
static cl_device_id _device;
|
||||
static cl_context _context = NULL;
|
||||
/* TODO One queue per calling thread ? */
|
||||
static cl_command_queue _queue = NULL;
|
||||
|
||||
static cl_program _noise_program = NULL;
|
||||
static cl_kernel _noise_kernel_simplex2d = NULL;
|
||||
static cl_kernel _noise_kernel_simplex3d = NULL;
|
||||
|
||||
static const char* _getErrorMessage(cl_int err)
|
||||
{
|
||||
switch (err)
|
||||
{
|
||||
case CL_SUCCESS: return "Success!";
|
||||
case CL_DEVICE_NOT_FOUND: return "Device not found.";
|
||||
case CL_DEVICE_NOT_AVAILABLE: return "Device not available";
|
||||
case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure";
|
||||
case CL_OUT_OF_RESOURCES: return "Out of resources";
|
||||
case CL_OUT_OF_HOST_MEMORY: return "Out of host memory";
|
||||
case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available";
|
||||
case CL_MEM_COPY_OVERLAP: return "Memory copy overlap";
|
||||
case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch";
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported";
|
||||
case CL_BUILD_PROGRAM_FAILURE: return "Program build failure";
|
||||
case CL_MAP_FAILURE: return "Map failure";
|
||||
case CL_INVALID_VALUE: return "Invalid value";
|
||||
case CL_INVALID_DEVICE_TYPE: return "Invalid device type";
|
||||
case CL_INVALID_PLATFORM: return "Invalid platform";
|
||||
case CL_INVALID_DEVICE: return "Invalid device";
|
||||
case CL_INVALID_CONTEXT: return "Invalid context";
|
||||
case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties";
|
||||
case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue";
|
||||
case CL_INVALID_HOST_PTR: return "Invalid host pointer";
|
||||
case CL_INVALID_MEM_OBJECT: return "Invalid memory object";
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor";
|
||||
case CL_INVALID_IMAGE_SIZE: return "Invalid image size";
|
||||
case CL_INVALID_SAMPLER: return "Invalid sampler";
|
||||
case CL_INVALID_BINARY: return "Invalid binary";
|
||||
case CL_INVALID_BUILD_OPTIONS: return "Invalid build options";
|
||||
case CL_INVALID_PROGRAM: return "Invalid program";
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable";
|
||||
case CL_INVALID_KERNEL_NAME: return "Invalid kernel name";
|
||||
case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition";
|
||||
case CL_INVALID_KERNEL: return "Invalid kernel";
|
||||
case CL_INVALID_ARG_INDEX: return "Invalid argument index";
|
||||
case CL_INVALID_ARG_VALUE: return "Invalid argument value";
|
||||
case CL_INVALID_ARG_SIZE: return "Invalid argument size";
|
||||
case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments";
|
||||
case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension";
|
||||
case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size";
|
||||
case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size";
|
||||
case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset";
|
||||
case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list";
|
||||
case CL_INVALID_EVENT: return "Invalid event";
|
||||
case CL_INVALID_OPERATION: return "Invalid operation";
|
||||
case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object";
|
||||
case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size";
|
||||
case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level";
|
||||
default: return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
static cl_program _loadProgram(const char* path)
|
||||
{
|
||||
cl_int error = 1;
|
||||
size_t src_size = systemGetFileSize(path);
|
||||
FILE* f = fopen(path, "rb");
|
||||
if (!f)
|
||||
{
|
||||
printf("[OpenCL] Program %s not found !\n", path);
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
cl_program program = NULL;
|
||||
char* source = malloc(sizeof (char)* src_size);
|
||||
if (fread(source, src_size, 1, f) != 1)
|
||||
{
|
||||
printf("[OpenCL] Error reading program %s\n", path);
|
||||
src_size = 0;
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
if (src_size > 0)
|
||||
{
|
||||
program = clCreateProgramWithSource(_context, 1, (const char**)&source, &src_size, &error);
|
||||
if (error)
|
||||
{
|
||||
printf("[OpenCL] Error loading program %s : %s\n", path, _getErrorMessage(error));
|
||||
if (program)
|
||||
{
|
||||
clReleaseProgram(program);
|
||||
program = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
free(source);
|
||||
|
||||
if (program)
|
||||
{
|
||||
error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
|
||||
if (error)
|
||||
{
|
||||
char* build_log;
|
||||
size_t log_size;
|
||||
|
||||
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
||||
build_log = malloc(sizeof (char)* (log_size + 1));
|
||||
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
|
||||
build_log[log_size] = '\0';
|
||||
printf("[OpenCL] Build error for %s :\n%s\n", path, build_log);
|
||||
free(build_log);
|
||||
|
||||
clReleaseProgram(program);
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
return program;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static cl_kernel _getkernel(cl_program program, const char* name)
|
||||
{
|
||||
cl_int error;
|
||||
cl_kernel result;
|
||||
|
||||
result = clCreateKernel(program, name, &error);
|
||||
if (!error)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("[OpenCL] Error retrieving kernel %s : %s\n", name, _getErrorMessage(error));
|
||||
if (result)
|
||||
{
|
||||
clReleaseKernel(result);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static size_t _roundUp(int group_size, int global_size)
|
||||
{
|
||||
int r = global_size % group_size;
|
||||
if (r == 0)
|
||||
{
|
||||
return global_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
return global_size + group_size - r;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void openclInit()
|
||||
{
|
||||
cl_int error;
|
||||
cl_uint platform_count;
|
||||
|
||||
/* Get platform */
|
||||
error = clGetPlatformIDs(1, &_platform, &platform_count);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
printf("[OpenCL] Error getting platform id: %s\n", _getErrorMessage(error));
|
||||
return;
|
||||
}
|
||||
else if (platform_count != 1)
|
||||
{
|
||||
printf("[OpenCL] No platform available\n");
|
||||
return;
|
||||
}
|
||||
/* Get available devices */
|
||||
/* TODO Handle several devices */
|
||||
error = clGetDeviceIDs(_platform, CL_DEVICE_TYPE_GPU, 1, &_device, NULL);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
printf("[OpenCL] Error getting devices: %s\n", _getErrorMessage(error));
|
||||
return;
|
||||
}
|
||||
/* Create a context on better device */
|
||||
_context = clCreateContext(0, 1, &_device, NULL, NULL, &error);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
printf("[OpenCL] Error creating context: %s\n", _getErrorMessage(error));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Preload programs */
|
||||
_noise_program = _loadProgram("opencl/noise.cl");
|
||||
if (_noise_program)
|
||||
{
|
||||
_noise_kernel_simplex2d = _getkernel(_noise_program, "simplex_2d");
|
||||
_noise_kernel_simplex3d = _getkernel(_noise_program, "simplex_3d");
|
||||
if (!_noise_kernel_simplex2d || !_noise_kernel_simplex3d)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/* Create a command queue in this context */
|
||||
_queue = clCreateCommandQueue(_context, _device, 0, &error);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
printf("[OpenCL] Error creating command queue: %s\n", _getErrorMessage(error));
|
||||
return;
|
||||
}
|
||||
|
||||
printf("OpenCL support is enabled.\n");
|
||||
}
|
||||
|
||||
int openclAvailable()
|
||||
{
|
||||
return _queue != NULL;
|
||||
}
|
||||
|
||||
void openclQuit()
|
||||
{
|
||||
if (_noise_kernel_simplex2d)
|
||||
{
|
||||
clReleaseKernel(_noise_kernel_simplex2d);
|
||||
}
|
||||
if (_noise_kernel_simplex3d)
|
||||
{
|
||||
clReleaseKernel(_noise_kernel_simplex3d);
|
||||
}
|
||||
if (_noise_program)
|
||||
{
|
||||
clReleaseProgram(_noise_program);
|
||||
}
|
||||
if (_queue)
|
||||
{
|
||||
clReleaseCommandQueue(_queue);
|
||||
}
|
||||
if (_context)
|
||||
{
|
||||
clReleaseContext(_context);
|
||||
}
|
||||
}
|
||||
|
||||
void openclTest()
|
||||
{
|
||||
cl_int error;
|
||||
const int size = 100;
|
||||
float src_a_h[size];
|
||||
float src_b_h[size];
|
||||
int i;
|
||||
|
||||
// Initialize both vectors
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
src_a_h[i] = src_b_h[i] = (float)i;
|
||||
}
|
||||
|
||||
const int mem_size = sizeof (float)*size;
|
||||
// Allocates a buffer of size mem_size and copies mem_size bytes from src_a_h
|
||||
cl_mem src_a_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_a_h, &error);
|
||||
cl_mem src_b_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_b_h, &error);
|
||||
cl_mem res_d = clCreateBuffer(_context, CL_MEM_WRITE_ONLY, mem_size, NULL, &error);
|
||||
|
||||
// Creates the program
|
||||
cl_program program = _loadProgram("test.cl");
|
||||
if (program)
|
||||
{
|
||||
// Builds the program
|
||||
error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
|
||||
if (error)
|
||||
{
|
||||
// Shows the log
|
||||
char* build_log;
|
||||
size_t log_size;
|
||||
// First call to know the proper size
|
||||
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
||||
build_log = malloc(sizeof (char)* (log_size + 1));
|
||||
// Second call to get the log
|
||||
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
|
||||
build_log[log_size] = '\0';
|
||||
printf("[OPENCL] Build log :\n%s\n", build_log);
|
||||
free(build_log);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Extracting the kernel
|
||||
cl_kernel vector_add_k = clCreateKernel(program, "vector_add_gpu", &error);
|
||||
if (!error)
|
||||
{
|
||||
// Enqueuing parameters
|
||||
// Note that we inform the size of the cl_mem object, not the size of the memory pointed by it
|
||||
error = clSetKernelArg(vector_add_k, 0, sizeof (cl_mem), &src_a_d);
|
||||
error |= clSetKernelArg(vector_add_k, 1, sizeof (cl_mem), &src_b_d);
|
||||
error |= clSetKernelArg(vector_add_k, 2, sizeof (cl_mem), &res_d);
|
||||
error |= clSetKernelArg(vector_add_k, 3, sizeof (size_t), &size);
|
||||
assert(error == CL_SUCCESS);
|
||||
|
||||
// Launching kernel
|
||||
// TODO Get max number of items in device
|
||||
// TODO Lock between the call and the result as we only have one queue
|
||||
const size_t local_ws = 64; // Number of work-items per work-group
|
||||
const size_t global_ws = _roundUp(local_ws, size); // Total number of work-items
|
||||
error = clEnqueueNDRangeKernel(_queue, vector_add_k, 1, NULL, &global_ws, &local_ws, 0, NULL, NULL);
|
||||
if (error)
|
||||
{
|
||||
printf("[OPENCL] Execution error : %s\n", _getErrorMessage(error));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Reading back
|
||||
float check[size];
|
||||
clEnqueueReadBuffer(_queue, res_d, CL_TRUE, 0, mem_size, check, 0, NULL, NULL);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
printf("%f %f %f\n", src_a_h[i], src_b_h[i], check[i]);
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseKernel(vector_add_k);
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseProgram(program);
|
||||
}
|
||||
|
||||
clReleaseMemObject(src_a_d);
|
||||
clReleaseMemObject(src_b_d);
|
||||
clReleaseMemObject(res_d);
|
||||
}
|
||||
#else
|
||||
void openclInit()
|
||||
{
|
||||
}
|
||||
int openclAvailable()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
void openclQuit()
|
||||
{
|
||||
}
|
||||
#endif
|
20
lib_paysages/opencl.h
Normal file
20
lib_paysages/opencl.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
#ifndef _PAYSAGES_OPENCL_H_
|
||||
#define _PAYSAGES_OPENCL_H_
|
||||
|
||||
/* OpenCL usage */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "noise.h"
|
||||
|
||||
void openclInit();
|
||||
int openclAvailable();
|
||||
void openclQuit();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "IL/il.h"
|
||||
#include "IL/ilu.h"
|
||||
|
@ -140,3 +141,10 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
|
|||
}
|
||||
return !error_count;
|
||||
}
|
||||
|
||||
int systemGetFileSize(const char* path)
|
||||
{
|
||||
struct stat st;
|
||||
stat(path, &st);
|
||||
return st.st_size;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ int systemGetCoreCount();
|
|||
int systemSavePictureFile(const char* filepath, PictureCallbackSavePixel callback_pixel, void* data, int width, int height);
|
||||
int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callback_start, PictureCallbackLoadPixel callback_pixel, void* data);
|
||||
|
||||
int systemGetFileSize(const char* path);
|
||||
|
||||
#ifdef HAVE_GLIB
|
||||
#include <glib.h>
|
||||
|
||||
|
|
15
opencl/noise.cl
Normal file
15
opencl/noise.cl
Normal file
|
@ -0,0 +1,15 @@
|
|||
__kernel void simplex_2d(__global const int* level_count,
|
||||
__global const double* levels,
|
||||
__global const double* location,
|
||||
__global double* res)
|
||||
{
|
||||
res[0] = 0.0;
|
||||
}
|
||||
|
||||
__kernel void simplex_3d(__global const int* level_count,
|
||||
__global const double* levels,
|
||||
__global const double* location,
|
||||
__global double* res)
|
||||
{
|
||||
res[0] = 0.0;
|
||||
}
|
16
test.cl
Normal file
16
test.cl
Normal file
|
@ -0,0 +1,16 @@
|
|||
__kernel void vector_add_gpu (__global const float* src_a,
|
||||
__global const float* src_b,
|
||||
__global float* res,
|
||||
const int num)
|
||||
{
|
||||
/* get_global_id(0) returns the ID of the thread in execution.
|
||||
As many threads are launched at the same time, executing the same kernel,
|
||||
each one will receive a different ID, and consequently perform a different computation.*/
|
||||
const int idx = get_global_id(0);
|
||||
|
||||
/* Now each work-item asks itself: "is my ID inside the vector's range?"
|
||||
If the answer is YES, the work-item performs the corresponding computation*/
|
||||
if (idx < num)
|
||||
res[idx] = src_a[idx] + src_b[idx];
|
||||
}
|
||||
|
Loading…
Reference in a new issue