1
0
Fork 0

paysages : Added structure for opencl usage.

git-svn-id: https://subversion.assembla.com/svn/thunderk/paysages@469 b1fd45b6-86a6-48da-8261-f70d1f35bdcc
This commit is contained in:
Michaël Lemaire 2012-12-01 16:39:47 +00:00 committed by ThunderK
parent 15fe1cb0e1
commit 49c6c17118
10 changed files with 461 additions and 19 deletions

4
TODO
View File

@ -10,6 +10,10 @@ Technology Preview 2 :
=> Apply model to atmosphere (aerial perspective)
=> Find a proper model for night sky (maybe Shirley)
- Clouds should keep distance to ground.
- Restore aerial perspective.
- Implement Bruneton's scattering model.
- Add clouds to explorer with 3d textures.
- Start using OpenCL to optimize rendering.
- Rethink the quality settings and detail smoothing in the distance.
=> When quality setting is set to 10, add boost options
=> Add detail boost (adds granularity)

View File

@ -5,19 +5,28 @@ SOURCES = $(wildcard *.c atmosphere/*.c)
OBJECTS = ${SOURCES:%.c=${OBJPATH}/%.o}
HEADERS = $(wildcard shared/*.h atmosphere/*.h *.h)
RESULT = ${BUILDPATH}/libpaysages.so
CC_FLAGS = -Wall -fPIC $(shell pkg-config --cflags glib-2.0 gthread-2.0) -DHAVE_GLIB=1
CC_LDFLAGS = $(shell pkg-config --libs glib-2.0 gthread-2.0) -lIL -lILU
LIBS = glib-2.0 gthread-2.0 IL ILU
CC_FLAGS = -Wall -fPIC -DHAVE_GLIB=1
ifeq ($(BUILDMODE),debug)
CHECK_OPENCL = $(shell pkg-config --modversion --silence-errors OpenCL)
ifneq (,${CHECK_OPENCL})
LIBS += OpenCL
CC_FLAGS += -DHAVE_OPENCL=1
endif
CC_FLAGS += $(shell pkg-config --cflags ${LIBS})
CC_LDFLAGS = $(shell pkg-config --libs ${LIBS})
ifeq (${BUILDMODE},debug)
CC_FLAGS += -g -pg
CC_LDFLAGS += -g -pg
endif
ifeq ($(BUILDMODE),release)
ifeq (${BUILDMODE},release)
CC_FLAGS += -O3 -DNDEBUG -Wno-unused-variable -Wno-unused-but-set-variable
endif
all:prepare ${RESULT}
prepare:
mkdir -p ${OBJPATH}
mkdir -p ${BUILDPATH}

View File

@ -10,6 +10,7 @@
#include "scenery.h"
#include "render.h"
#include "main.h"
#include "opencl.h"
#define APP_HEADER 198632.125
@ -18,6 +19,7 @@ void paysagesInit()
CameraDefinition camera;
systemInit();
openclInit();
sceneryInit();
renderInit();
@ -50,6 +52,8 @@ void paysagesQuit()
{
sceneryQuit();
renderQuit();
openclQuit();
}
FileOperationResult paysagesSave(char* filepath)

View File

@ -8,6 +8,7 @@
#include "noisesimplex.h"
#include "noisenaive.h"
#include "noiseperlin.h"
#include "opencl.h"
#define MAX_LEVEL_COUNT 30
@ -19,7 +20,7 @@ struct NoiseGenerator
double height_offset;
int level_count;
struct NoiseLevel levels[MAX_LEVEL_COUNT];
double _max_height;
double (*_func_noise_1d)(double x);
double (*_func_noise_2d)(double x, double y);
@ -31,7 +32,7 @@ void noiseInit()
noiseSimplexInit();
noisePerlinInit();
noiseNaiveInit();
/* Noise stats */
/*NoiseGenerator* noise;
int x;
@ -96,14 +97,14 @@ void noiseSaveGenerator(PackStream* stream, NoiseGenerator* generator)
x = (int)generator->function.algorithm;
packWriteInt(stream, &x);
packWriteDouble(stream, &generator->function.ridge_factor);
packWriteDouble(stream, &generator->height_offset);
packWriteInt(stream, &generator->level_count);
for (x = 0; x < generator->level_count; x++)
{
NoiseLevel* level = generator->levels + x;
packWriteDouble(stream, &level->scaling);
packWriteDouble(stream, &level->height);
packWriteDouble(stream, &level->xoffset);
@ -126,14 +127,14 @@ void noiseLoadGenerator(PackStream* stream, NoiseGenerator* generator)
for (x = 0; x < generator->level_count; x++)
{
NoiseLevel* level = generator->levels + x;
packReadDouble(stream, &level->scaling);
packReadDouble(stream, &level->height);
packReadDouble(stream, &level->xoffset);
packReadDouble(stream, &level->yoffset);
packReadDouble(stream, &level->zoffset);
}
noiseValidate(generator);
}
@ -144,7 +145,7 @@ void noiseCopy(NoiseGenerator* source, NoiseGenerator* destination)
destination->level_count = source->level_count;
memcpy(destination->levels, source->levels, sizeof(NoiseLevel) * destination->level_count);
noiseValidate(destination);
}
@ -152,7 +153,7 @@ void noiseValidate(NoiseGenerator* generator)
{
int x;
double max_height = generator->height_offset;
if (generator->function.algorithm < 0 || generator->function.algorithm > NOISE_FUNCTION_NAIVE)
{
generator->function.algorithm = NOISE_FUNCTION_SIMPLEX;
@ -175,7 +176,7 @@ void noiseValidate(NoiseGenerator* generator)
generator->_func_noise_3d = noiseNaiveGet3DValue;
break;
}
if (generator->function.ridge_factor > 0.5)
{
generator->function.ridge_factor = 0.5;

363
lib_paysages/opencl.c Normal file
View File

@ -0,0 +1,363 @@
#include "opencl.h"
#ifdef HAVE_OPENCL
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <CL/opencl.h>
#include "system.h"
static cl_platform_id _platform;
static cl_device_id _device;
static cl_context _context = NULL;
/* TODO One queue per calling thread ? */
static cl_command_queue _queue = NULL;
static cl_program _noise_program = NULL;
static cl_kernel _noise_kernel_simplex2d = NULL;
static cl_kernel _noise_kernel_simplex3d = NULL;
static const char* _getErrorMessage(cl_int err)
{
switch (err)
{
case CL_SUCCESS: return "Success!";
case CL_DEVICE_NOT_FOUND: return "Device not found.";
case CL_DEVICE_NOT_AVAILABLE: return "Device not available";
case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available";
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure";
case CL_OUT_OF_RESOURCES: return "Out of resources";
case CL_OUT_OF_HOST_MEMORY: return "Out of host memory";
case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available";
case CL_MEM_COPY_OVERLAP: return "Memory copy overlap";
case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch";
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported";
case CL_BUILD_PROGRAM_FAILURE: return "Program build failure";
case CL_MAP_FAILURE: return "Map failure";
case CL_INVALID_VALUE: return "Invalid value";
case CL_INVALID_DEVICE_TYPE: return "Invalid device type";
case CL_INVALID_PLATFORM: return "Invalid platform";
case CL_INVALID_DEVICE: return "Invalid device";
case CL_INVALID_CONTEXT: return "Invalid context";
case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties";
case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue";
case CL_INVALID_HOST_PTR: return "Invalid host pointer";
case CL_INVALID_MEM_OBJECT: return "Invalid memory object";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor";
case CL_INVALID_IMAGE_SIZE: return "Invalid image size";
case CL_INVALID_SAMPLER: return "Invalid sampler";
case CL_INVALID_BINARY: return "Invalid binary";
case CL_INVALID_BUILD_OPTIONS: return "Invalid build options";
case CL_INVALID_PROGRAM: return "Invalid program";
case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable";
case CL_INVALID_KERNEL_NAME: return "Invalid kernel name";
case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition";
case CL_INVALID_KERNEL: return "Invalid kernel";
case CL_INVALID_ARG_INDEX: return "Invalid argument index";
case CL_INVALID_ARG_VALUE: return "Invalid argument value";
case CL_INVALID_ARG_SIZE: return "Invalid argument size";
case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments";
case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension";
case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size";
case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size";
case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset";
case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list";
case CL_INVALID_EVENT: return "Invalid event";
case CL_INVALID_OPERATION: return "Invalid operation";
case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object";
case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size";
case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level";
default: return "Unknown";
}
}
static cl_program _loadProgram(const char* path)
{
cl_int error = 1;
size_t src_size = systemGetFileSize(path);
FILE* f = fopen(path, "rb");
if (!f)
{
printf("[OpenCL] Program %s not found !\n", path);
return NULL;
}
else
{
cl_program program = NULL;
char* source = malloc(sizeof (char)* src_size);
if (fread(source, src_size, 1, f) != 1)
{
printf("[OpenCL] Error reading program %s\n", path);
src_size = 0;
}
fclose(f);
if (src_size > 0)
{
program = clCreateProgramWithSource(_context, 1, (const char**)&source, &src_size, &error);
if (error)
{
printf("[OpenCL] Error loading program %s : %s\n", path, _getErrorMessage(error));
if (program)
{
clReleaseProgram(program);
program = NULL;
}
}
}
free(source);
if (program)
{
error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
if (error)
{
char* build_log;
size_t log_size;
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
build_log = malloc(sizeof (char)* (log_size + 1));
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
build_log[log_size] = '\0';
printf("[OpenCL] Build error for %s :\n%s\n", path, build_log);
free(build_log);
clReleaseProgram(program);
return NULL;
}
else
{
return program;
}
}
else
{
return NULL;
}
}
}
static cl_kernel _getkernel(cl_program program, const char* name)
{
cl_int error;
cl_kernel result;
result = clCreateKernel(program, name, &error);
if (!error)
{
return result;
}
else
{
printf("[OpenCL] Error retrieving kernel %s : %s\n", name, _getErrorMessage(error));
if (result)
{
clReleaseKernel(result);
}
return NULL;
}
}
static size_t _roundUp(int group_size, int global_size)
{
int r = global_size % group_size;
if (r == 0)
{
return global_size;
}
else
{
return global_size + group_size - r;
}
}
void openclInit()
{
cl_int error;
cl_uint platform_count;
/* Get platform */
error = clGetPlatformIDs(1, &_platform, &platform_count);
if (error != CL_SUCCESS)
{
printf("[OpenCL] Error getting platform id: %s\n", _getErrorMessage(error));
return;
}
else if (platform_count != 1)
{
printf("[OpenCL] No platform available\n");
return;
}
/* Get available devices */
/* TODO Handle several devices */
error = clGetDeviceIDs(_platform, CL_DEVICE_TYPE_GPU, 1, &_device, NULL);
if (error != CL_SUCCESS)
{
printf("[OpenCL] Error getting devices: %s\n", _getErrorMessage(error));
return;
}
/* Create a context on better device */
_context = clCreateContext(0, 1, &_device, NULL, NULL, &error);
if (error != CL_SUCCESS)
{
printf("[OpenCL] Error creating context: %s\n", _getErrorMessage(error));
return;
}
/* Preload programs */
_noise_program = _loadProgram("opencl/noise.cl");
if (_noise_program)
{
_noise_kernel_simplex2d = _getkernel(_noise_program, "simplex_2d");
_noise_kernel_simplex3d = _getkernel(_noise_program, "simplex_3d");
if (!_noise_kernel_simplex2d || !_noise_kernel_simplex3d)
{
return;
}
}
else
{
return;
}
/* Create a command queue in this context */
_queue = clCreateCommandQueue(_context, _device, 0, &error);
if (error != CL_SUCCESS)
{
printf("[OpenCL] Error creating command queue: %s\n", _getErrorMessage(error));
return;
}
printf("OpenCL support is enabled.\n");
}
int openclAvailable()
{
return _queue != NULL;
}
void openclQuit()
{
if (_noise_kernel_simplex2d)
{
clReleaseKernel(_noise_kernel_simplex2d);
}
if (_noise_kernel_simplex3d)
{
clReleaseKernel(_noise_kernel_simplex3d);
}
if (_noise_program)
{
clReleaseProgram(_noise_program);
}
if (_queue)
{
clReleaseCommandQueue(_queue);
}
if (_context)
{
clReleaseContext(_context);
}
}
void openclTest()
{
cl_int error;
const int size = 100;
float src_a_h[size];
float src_b_h[size];
int i;
// Initialize both vectors
for (i = 0; i < size; i++)
{
src_a_h[i] = src_b_h[i] = (float)i;
}
const int mem_size = sizeof (float)*size;
// Allocates a buffer of size mem_size and copies mem_size bytes from src_a_h
cl_mem src_a_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_a_h, &error);
cl_mem src_b_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_b_h, &error);
cl_mem res_d = clCreateBuffer(_context, CL_MEM_WRITE_ONLY, mem_size, NULL, &error);
// Creates the program
cl_program program = _loadProgram("test.cl");
if (program)
{
// Builds the program
error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
if (error)
{
// Shows the log
char* build_log;
size_t log_size;
// First call to know the proper size
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
build_log = malloc(sizeof (char)* (log_size + 1));
// Second call to get the log
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
build_log[log_size] = '\0';
printf("[OPENCL] Build log :\n%s\n", build_log);
free(build_log);
}
else
{
// Extracting the kernel
cl_kernel vector_add_k = clCreateKernel(program, "vector_add_gpu", &error);
if (!error)
{
// Enqueuing parameters
// Note that we inform the size of the cl_mem object, not the size of the memory pointed by it
error = clSetKernelArg(vector_add_k, 0, sizeof (cl_mem), &src_a_d);
error |= clSetKernelArg(vector_add_k, 1, sizeof (cl_mem), &src_b_d);
error |= clSetKernelArg(vector_add_k, 2, sizeof (cl_mem), &res_d);
error |= clSetKernelArg(vector_add_k, 3, sizeof (size_t), &size);
assert(error == CL_SUCCESS);
// Launching kernel
// TODO Get max number of items in device
// TODO Lock between the call and the result as we only have one queue
const size_t local_ws = 64; // Number of work-items per work-group
const size_t global_ws = _roundUp(local_ws, size); // Total number of work-items
error = clEnqueueNDRangeKernel(_queue, vector_add_k, 1, NULL, &global_ws, &local_ws, 0, NULL, NULL);
if (error)
{
printf("[OPENCL] Execution error : %s\n", _getErrorMessage(error));
}
else
{
// Reading back
float check[size];
clEnqueueReadBuffer(_queue, res_d, CL_TRUE, 0, mem_size, check, 0, NULL, NULL);
for (i = 0; i < size; i++)
{
printf("%f %f %f\n", src_a_h[i], src_b_h[i], check[i]);
}
}
clReleaseKernel(vector_add_k);
}
}
clReleaseProgram(program);
}
clReleaseMemObject(src_a_d);
clReleaseMemObject(src_b_d);
clReleaseMemObject(res_d);
}
#else
void openclInit()
{
}
int openclAvailable()
{
return 0;
}
void openclQuit()
{
}
#endif

20
lib_paysages/opencl.h Normal file
View File

@ -0,0 +1,20 @@
#ifndef _PAYSAGES_OPENCL_H_
#define _PAYSAGES_OPENCL_H_
/* OpenCL usage */
#ifdef __cplusplus
extern "C" {
#endif
#include "noise.h"
void openclInit();
int openclAvailable();
void openclQuit();
#ifdef __cplusplus
}
#endif
#endif

View File

@ -2,6 +2,7 @@
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include "IL/il.h"
#include "IL/ilu.h"
@ -111,16 +112,16 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
ilGenImages(1, &image_id);
ilBindImage(image_id);
if (ilLoadImage(filepath))
{
width = ilGetInteger(IL_IMAGE_WIDTH);
height = ilGetInteger(IL_IMAGE_HEIGHT);
callback_start(data, width, height);
pixels = malloc(sizeof(ILuint) * width * height);
ilCopyPixels(0, 0, 0, width, height, 1, IL_RGBA, IL_UNSIGNED_BYTE, pixels);
for (y = 0; y < height; y++)
{
for (x = 0; x < width; x++)
@ -128,10 +129,10 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
callback_pixel(data, x, y, colorFrom32BitRGBA(pixels[y * width + x]));
}
}
free(pixels);
}
error_count = 0;
while ((error=ilGetError()) != IL_NO_ERROR)
{
@ -140,3 +141,10 @@ int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callb
}
return !error_count;
}
int systemGetFileSize(const char* path)
{
struct stat st;
stat(path, &st);
return st.st_size;
}

View File

@ -21,6 +21,8 @@ int systemGetCoreCount();
int systemSavePictureFile(const char* filepath, PictureCallbackSavePixel callback_pixel, void* data, int width, int height);
int systemLoadPictureFile(const char* filepath, PictureCallbackLoadStarted callback_start, PictureCallbackLoadPixel callback_pixel, void* data);
int systemGetFileSize(const char* path);
#ifdef HAVE_GLIB
#include <glib.h>

15
opencl/noise.cl Normal file
View File

@ -0,0 +1,15 @@
__kernel void simplex_2d(__global const int* level_count,
__global const double* levels,
__global const double* location,
__global double* res)
{
res[0] = 0.0;
}
__kernel void simplex_3d(__global const int* level_count,
__global const double* levels,
__global const double* location,
__global double* res)
{
res[0] = 0.0;
}

16
test.cl Normal file
View File

@ -0,0 +1,16 @@
__kernel void vector_add_gpu (__global const float* src_a,
__global const float* src_b,
__global float* res,
const int num)
{
/* get_global_id(0) returns the ID of the thread in execution.
As many threads are launched at the same time, executing the same kernel,
each one will receive a different ID, and consequently perform a different computation.*/
const int idx = get_global_id(0);
/* Now each work-item asks itself: "is my ID inside the vector's range?"
If the answer is YES, the work-item performs the corresponding computation*/
if (idx < num)
res[idx] = src_a[idx] + src_b[idx];
}