364 lines
12 KiB
C
364 lines
12 KiB
C
|
#include "opencl.h"
|
||
|
|
||
|
#ifdef HAVE_OPENCL
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <assert.h>
|
||
|
#include <CL/opencl.h>
|
||
|
#include "system.h"
|
||
|
|
||
|
static cl_platform_id _platform;
|
||
|
static cl_device_id _device;
|
||
|
static cl_context _context = NULL;
|
||
|
/* TODO One queue per calling thread ? */
|
||
|
static cl_command_queue _queue = NULL;
|
||
|
|
||
|
static cl_program _noise_program = NULL;
|
||
|
static cl_kernel _noise_kernel_simplex2d = NULL;
|
||
|
static cl_kernel _noise_kernel_simplex3d = NULL;
|
||
|
|
||
|
static const char* _getErrorMessage(cl_int err)
|
||
|
{
|
||
|
switch (err)
|
||
|
{
|
||
|
case CL_SUCCESS: return "Success!";
|
||
|
case CL_DEVICE_NOT_FOUND: return "Device not found.";
|
||
|
case CL_DEVICE_NOT_AVAILABLE: return "Device not available";
|
||
|
case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available";
|
||
|
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure";
|
||
|
case CL_OUT_OF_RESOURCES: return "Out of resources";
|
||
|
case CL_OUT_OF_HOST_MEMORY: return "Out of host memory";
|
||
|
case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available";
|
||
|
case CL_MEM_COPY_OVERLAP: return "Memory copy overlap";
|
||
|
case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch";
|
||
|
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported";
|
||
|
case CL_BUILD_PROGRAM_FAILURE: return "Program build failure";
|
||
|
case CL_MAP_FAILURE: return "Map failure";
|
||
|
case CL_INVALID_VALUE: return "Invalid value";
|
||
|
case CL_INVALID_DEVICE_TYPE: return "Invalid device type";
|
||
|
case CL_INVALID_PLATFORM: return "Invalid platform";
|
||
|
case CL_INVALID_DEVICE: return "Invalid device";
|
||
|
case CL_INVALID_CONTEXT: return "Invalid context";
|
||
|
case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties";
|
||
|
case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue";
|
||
|
case CL_INVALID_HOST_PTR: return "Invalid host pointer";
|
||
|
case CL_INVALID_MEM_OBJECT: return "Invalid memory object";
|
||
|
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor";
|
||
|
case CL_INVALID_IMAGE_SIZE: return "Invalid image size";
|
||
|
case CL_INVALID_SAMPLER: return "Invalid sampler";
|
||
|
case CL_INVALID_BINARY: return "Invalid binary";
|
||
|
case CL_INVALID_BUILD_OPTIONS: return "Invalid build options";
|
||
|
case CL_INVALID_PROGRAM: return "Invalid program";
|
||
|
case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable";
|
||
|
case CL_INVALID_KERNEL_NAME: return "Invalid kernel name";
|
||
|
case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition";
|
||
|
case CL_INVALID_KERNEL: return "Invalid kernel";
|
||
|
case CL_INVALID_ARG_INDEX: return "Invalid argument index";
|
||
|
case CL_INVALID_ARG_VALUE: return "Invalid argument value";
|
||
|
case CL_INVALID_ARG_SIZE: return "Invalid argument size";
|
||
|
case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments";
|
||
|
case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension";
|
||
|
case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size";
|
||
|
case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size";
|
||
|
case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset";
|
||
|
case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list";
|
||
|
case CL_INVALID_EVENT: return "Invalid event";
|
||
|
case CL_INVALID_OPERATION: return "Invalid operation";
|
||
|
case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object";
|
||
|
case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size";
|
||
|
case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level";
|
||
|
default: return "Unknown";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static cl_program _loadProgram(const char* path)
|
||
|
{
|
||
|
cl_int error = 1;
|
||
|
size_t src_size = systemGetFileSize(path);
|
||
|
FILE* f = fopen(path, "rb");
|
||
|
if (!f)
|
||
|
{
|
||
|
printf("[OpenCL] Program %s not found !\n", path);
|
||
|
return NULL;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cl_program program = NULL;
|
||
|
char* source = malloc(sizeof (char)* src_size);
|
||
|
if (fread(source, src_size, 1, f) != 1)
|
||
|
{
|
||
|
printf("[OpenCL] Error reading program %s\n", path);
|
||
|
src_size = 0;
|
||
|
}
|
||
|
fclose(f);
|
||
|
|
||
|
if (src_size > 0)
|
||
|
{
|
||
|
program = clCreateProgramWithSource(_context, 1, (const char**)&source, &src_size, &error);
|
||
|
if (error)
|
||
|
{
|
||
|
printf("[OpenCL] Error loading program %s : %s\n", path, _getErrorMessage(error));
|
||
|
if (program)
|
||
|
{
|
||
|
clReleaseProgram(program);
|
||
|
program = NULL;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
free(source);
|
||
|
|
||
|
if (program)
|
||
|
{
|
||
|
error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
|
||
|
if (error)
|
||
|
{
|
||
|
char* build_log;
|
||
|
size_t log_size;
|
||
|
|
||
|
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
||
|
build_log = malloc(sizeof (char)* (log_size + 1));
|
||
|
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
|
||
|
build_log[log_size] = '\0';
|
||
|
printf("[OpenCL] Build error for %s :\n%s\n", path, build_log);
|
||
|
free(build_log);
|
||
|
|
||
|
clReleaseProgram(program);
|
||
|
return NULL;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
return program;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static cl_kernel _getkernel(cl_program program, const char* name)
|
||
|
{
|
||
|
cl_int error;
|
||
|
cl_kernel result;
|
||
|
|
||
|
result = clCreateKernel(program, name, &error);
|
||
|
if (!error)
|
||
|
{
|
||
|
return result;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
printf("[OpenCL] Error retrieving kernel %s : %s\n", name, _getErrorMessage(error));
|
||
|
if (result)
|
||
|
{
|
||
|
clReleaseKernel(result);
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static size_t _roundUp(int group_size, int global_size)
|
||
|
{
|
||
|
int r = global_size % group_size;
|
||
|
if (r == 0)
|
||
|
{
|
||
|
return global_size;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
return global_size + group_size - r;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
void openclInit()
|
||
|
{
|
||
|
cl_int error;
|
||
|
cl_uint platform_count;
|
||
|
|
||
|
/* Get platform */
|
||
|
error = clGetPlatformIDs(1, &_platform, &platform_count);
|
||
|
if (error != CL_SUCCESS)
|
||
|
{
|
||
|
printf("[OpenCL] Error getting platform id: %s\n", _getErrorMessage(error));
|
||
|
return;
|
||
|
}
|
||
|
else if (platform_count != 1)
|
||
|
{
|
||
|
printf("[OpenCL] No platform available\n");
|
||
|
return;
|
||
|
}
|
||
|
/* Get available devices */
|
||
|
/* TODO Handle several devices */
|
||
|
error = clGetDeviceIDs(_platform, CL_DEVICE_TYPE_GPU, 1, &_device, NULL);
|
||
|
if (error != CL_SUCCESS)
|
||
|
{
|
||
|
printf("[OpenCL] Error getting devices: %s\n", _getErrorMessage(error));
|
||
|
return;
|
||
|
}
|
||
|
/* Create a context on better device */
|
||
|
_context = clCreateContext(0, 1, &_device, NULL, NULL, &error);
|
||
|
if (error != CL_SUCCESS)
|
||
|
{
|
||
|
printf("[OpenCL] Error creating context: %s\n", _getErrorMessage(error));
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* Preload programs */
|
||
|
_noise_program = _loadProgram("opencl/noise.cl");
|
||
|
if (_noise_program)
|
||
|
{
|
||
|
_noise_kernel_simplex2d = _getkernel(_noise_program, "simplex_2d");
|
||
|
_noise_kernel_simplex3d = _getkernel(_noise_program, "simplex_3d");
|
||
|
if (!_noise_kernel_simplex2d || !_noise_kernel_simplex3d)
|
||
|
{
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* Create a command queue in this context */
|
||
|
_queue = clCreateCommandQueue(_context, _device, 0, &error);
|
||
|
if (error != CL_SUCCESS)
|
||
|
{
|
||
|
printf("[OpenCL] Error creating command queue: %s\n", _getErrorMessage(error));
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
printf("OpenCL support is enabled.\n");
|
||
|
}
|
||
|
|
||
|
int openclAvailable()
|
||
|
{
|
||
|
return _queue != NULL;
|
||
|
}
|
||
|
|
||
|
void openclQuit()
|
||
|
{
|
||
|
if (_noise_kernel_simplex2d)
|
||
|
{
|
||
|
clReleaseKernel(_noise_kernel_simplex2d);
|
||
|
}
|
||
|
if (_noise_kernel_simplex3d)
|
||
|
{
|
||
|
clReleaseKernel(_noise_kernel_simplex3d);
|
||
|
}
|
||
|
if (_noise_program)
|
||
|
{
|
||
|
clReleaseProgram(_noise_program);
|
||
|
}
|
||
|
if (_queue)
|
||
|
{
|
||
|
clReleaseCommandQueue(_queue);
|
||
|
}
|
||
|
if (_context)
|
||
|
{
|
||
|
clReleaseContext(_context);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void openclTest()
|
||
|
{
|
||
|
cl_int error;
|
||
|
const int size = 100;
|
||
|
float src_a_h[size];
|
||
|
float src_b_h[size];
|
||
|
int i;
|
||
|
|
||
|
// Initialize both vectors
|
||
|
for (i = 0; i < size; i++)
|
||
|
{
|
||
|
src_a_h[i] = src_b_h[i] = (float)i;
|
||
|
}
|
||
|
|
||
|
const int mem_size = sizeof (float)*size;
|
||
|
// Allocates a buffer of size mem_size and copies mem_size bytes from src_a_h
|
||
|
cl_mem src_a_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_a_h, &error);
|
||
|
cl_mem src_b_d = clCreateBuffer(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, src_b_h, &error);
|
||
|
cl_mem res_d = clCreateBuffer(_context, CL_MEM_WRITE_ONLY, mem_size, NULL, &error);
|
||
|
|
||
|
// Creates the program
|
||
|
cl_program program = _loadProgram("test.cl");
|
||
|
if (program)
|
||
|
{
|
||
|
// Builds the program
|
||
|
error = clBuildProgram(program, 1, &_device, NULL, NULL, NULL);
|
||
|
if (error)
|
||
|
{
|
||
|
// Shows the log
|
||
|
char* build_log;
|
||
|
size_t log_size;
|
||
|
// First call to know the proper size
|
||
|
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
||
|
build_log = malloc(sizeof (char)* (log_size + 1));
|
||
|
// Second call to get the log
|
||
|
clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
|
||
|
build_log[log_size] = '\0';
|
||
|
printf("[OPENCL] Build log :\n%s\n", build_log);
|
||
|
free(build_log);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Extracting the kernel
|
||
|
cl_kernel vector_add_k = clCreateKernel(program, "vector_add_gpu", &error);
|
||
|
if (!error)
|
||
|
{
|
||
|
// Enqueuing parameters
|
||
|
// Note that we inform the size of the cl_mem object, not the size of the memory pointed by it
|
||
|
error = clSetKernelArg(vector_add_k, 0, sizeof (cl_mem), &src_a_d);
|
||
|
error |= clSetKernelArg(vector_add_k, 1, sizeof (cl_mem), &src_b_d);
|
||
|
error |= clSetKernelArg(vector_add_k, 2, sizeof (cl_mem), &res_d);
|
||
|
error |= clSetKernelArg(vector_add_k, 3, sizeof (size_t), &size);
|
||
|
assert(error == CL_SUCCESS);
|
||
|
|
||
|
// Launching kernel
|
||
|
// TODO Get max number of items in device
|
||
|
// TODO Lock between the call and the result as we only have one queue
|
||
|
const size_t local_ws = 64; // Number of work-items per work-group
|
||
|
const size_t global_ws = _roundUp(local_ws, size); // Total number of work-items
|
||
|
error = clEnqueueNDRangeKernel(_queue, vector_add_k, 1, NULL, &global_ws, &local_ws, 0, NULL, NULL);
|
||
|
if (error)
|
||
|
{
|
||
|
printf("[OPENCL] Execution error : %s\n", _getErrorMessage(error));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Reading back
|
||
|
float check[size];
|
||
|
clEnqueueReadBuffer(_queue, res_d, CL_TRUE, 0, mem_size, check, 0, NULL, NULL);
|
||
|
|
||
|
for (i = 0; i < size; i++)
|
||
|
{
|
||
|
printf("%f %f %f\n", src_a_h[i], src_b_h[i], check[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
clReleaseKernel(vector_add_k);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
clReleaseProgram(program);
|
||
|
}
|
||
|
|
||
|
clReleaseMemObject(src_a_d);
|
||
|
clReleaseMemObject(src_b_d);
|
||
|
clReleaseMemObject(res_d);
|
||
|
}
|
||
|
#else
|
||
|
void openclInit()
|
||
|
{
|
||
|
}
|
||
|
int openclAvailable()
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
void openclQuit()
|
||
|
{
|
||
|
}
|
||
|
#endif
|