So I'm writing an openCL program that runs on both CPU + GPU and am currently trying to save/cache the binaries after creating my program with clCreateProgramWithSource(). I create my clContext and clProgram with CL_DEVICE_TYPE_ALL and build the source with those specifications.
I then take the binaries and store them to disk (with one binary file per device) so that on subsequent starts my program automatically calls clBuildProgramWithBinary.
The problem is that if I save the binaries to disk that were created with the setting CL_DEVICE_TYPE_ALL, the binary for the CPU gets corrupted and clBuildProgramWithBinary throws an error.
In order to get all the binary files saved to disk properly, I've had to edit my code to first run using CL_DEVICE_TYPE_CPU and save the CPU binary on its own, then edit my code again to run using CL_DEVICE_TYPE_GPU, save the gpu binaries and then finally switch it back to CL_DEVICE_TYPE_ALL. If I do this, clBuildProgramWithBinary is able to accurately build the binary for each device type and execute my program.
So is this just a quirk of openCL that I can't build binaries for GPUs and CPUs together? Or am I just doing this incorrectly?
I'm basing my code on the implementation of binary saving found here: https://code.google.com/p/opencl-book-samples/source/browse/trunk/src/Chapter_6/HelloBinaryWorld/HelloBinaryWorld.cpp?r=42 with modifications in place to handle multiple devices.
Here are some portions of my code below:
/*----Initial setup of platform, context and devices---*/
cl_int err, deviceCount;
cl_device_id *devices;
cl_platform_id platform;
cl_context context;
cl_program program;
err = clGetPlatformIDs(1, &platform, NULL);
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
devices = new cl_device_id[deviceCount];
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
context = clCreateContext(NULL, deviceCount, devices, NULL, NULL, &err);
/*---Build Program---*/
int numFiles = 2;
const char *sourceFiles[] =
{
"File1.cl",
"File2.cl",
};
char *sourceStrings[numFiles];
for(int i = 0; i < numFiles; i++)
{
sourceStrings[i] = ReadFile(sourceFiles[i]);
}
/*---Create the compute program from the source buffer---*/
program = clCreateProgramWithSource(context, numFiles, (const char **)sourceStrings, NULL, &err);
/*---Build the program executable---*/
err = clBuildProgram(program, deviceCount, devices, NULL, NULL, NULL);
/*----Save binary to disk---*/
//Determine the size of each program binary
size_t *programBinarySizes = new size_t[deviceCount];
err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * deviceCount, programBinarySizes, NULL);
if(err != CL_SUCCESS)
{
delete [] devices;
delete [] programBinarySizes;
return false;
}
unsigned char **programBinaries = new unsigned char*[deviceCount];
for(cl_uint i = 0; i < deviceCount; i++)
{
programBinaries[i] = new unsigned char[programBinarySizes[i]];
}
//Get all of the program binaries
err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char *) * deviceCount, programBinaries, NULL);
if (err != CL_SUCCESS)
{
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < deviceCount; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
}
//Store the binaries
for(cl_uint i = 0; i < deviceCount; i++)
{
// Store the binary for all devices
std::string currFile = binaryFile + to_string(i) + ".txt";
FILE *fp = fopen(currFile.c_str(), "wb");
fwrite(programBinaries[i], 1, programBinarySizes[i], fp);
fclose(fp);
}
// Cleanup
delete [] programBinarySizes;
for (cl_uint i = 0; i < deviceCount; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
And then on the next go around my code with call this function to create the program from the binaries:
unsigned char **programBinaries = new unsigned char *[deviceCount];
size_t sizes[deviceCount];
for(int i = 0; i < deviceCount; i++)
{
string currFile = binaryFile + to_string(i) + ".txt";
FILE *fp = fopen(currFile.c_str(), "rb");
if(!fp) return NULL;
size_t binarySize;
fseek(fp, 0, SEEK_END);
binarySize = ftell(fp);
sizes[i] = binarySize;
rewind(fp);
programBinaries[i] = new unsigned char[binarySize];
fread(programBinaries[i], 1, binarySize, fp);
fclose(fp);
}
cl_int errNum = 0;
cl_program program;
cl_int binaryStatus;
program = clCreateProgramWithBinary(context,
deviceCount,
devices,
sizes,
(const unsigned char **)programBinaries,
&binaryStatus,
&errNum);
delete [] programBinaries;
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
I have a rmbp which has three devices on the only one apple platform. I run your code on it and encountered the same problem. Actually I do not know the solution, but I can give you some hints for debugging.
I modified your snippet as follows:
#include <sys/stat.h>
unsigned char **programBinaries = new unsigned char *[deviceCount];
size_t sizes[deviceCount];
int fd;
struct stat st;
for(cl_uint i = 0; i < deviceCount; i++)
{
string currFile = binaryFile + to_string(i) + ".txt";
fd = open(currFile.c_str(), O_RDONLY);
if (fd == -1) {
return -1;
}
if ((fstat(fd, &st) != 0) || (!S_ISREG(st.st_mode))) {
return -2;
}
size_t binarySize;
FILE *fp = fdopen(fd, "rb");
if (fseeko(fp, 0 , SEEK_END) != 0) {
return -3;
}
binarySize = ftello(fp);
cout << "device " << i << ": " << binarySize << endl;
sizes[i] = binarySize;
rewind(fp);
programBinaries[i] = new unsigned char[binarySize];
fread(programBinaries[i], 1, binarySize, fp);
fclose(fp);
close(fd);
}
on my system, however, I got the same result as your original code.
cl_program clCreateProgramWithBinary ( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret)
binary_status: Returns whether the program binary for each device specified in device_list was loaded successfully or not. It is an array of num_devices entries and returns CL_SUCCESS in binary_status[i] if binary was successfully loaded for device specified by device_list[i]; otherwise returns CL_INVALID_VALUE if lengths[i] is zero or if binaries[i] is a NULL value or CL_INVALID_BINARY in binary_status[i] if program binary is not a valid binary for the specified device. If binary_status is NULL, it is ignored.
if you modify your code like this:
cl_int binaryStatus[deviceCount];
program = clCreateProgramWithBinary(context,
deviceCount,
devices,
sizes,
(const unsigned char **)programBinaries,
binaryStatus,
&errNum);
for (cl_uint i = 0; i < deviceCount; ++i)
{
cout << "device: " << i << ": " << binaryStatus[i] << endl;
}
normally, you will get the following results:
device: 0: 0
device: 1: -42
the first line means that the first binary program (for CPU) was successfully loaded. -42 in the second line corresponds CL_INVALID_BINARY ,which means it is failed to load the binary program.
I also try to retrieve the build options from the program, but got nothing.
//set device_id to 0,1,3...
cl_uint device_id = 0;
cl_build_status status;
// Determine the reason for the error
char buildOptions[16384];
char buildLog[16384];
clGetProgramBuildInfo(program, devices[device_id], CL_PROGRAM_BUILD_STATUS,
sizeof(cl_build_status), &status, NULL);
std::cout << "status: " << status << endl;
clGetProgramBuildInfo(program, devices[device_id], CL_PROGRAM_BUILD_OPTIONS,
sizeof(buildOptions), buildOptions, NULL);
std::cout << "build options: " << endl;
std::cout << buildOptions;
clGetProgramBuildInfo(program, devices[device_id], CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cout << "build log: " << endl;
std::cout << buildLog;
I guess it is a bug of opencl driver. hope the above stuff is helpful for you.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With