When you have confusing OpenCL code(s), split it into steps:
Allocate and initialize host memory
// Usually using malloc
Get platform and device id + setup
// Example
cl_platform_id platform_id;
cl_device_id device_id = NULL;
clGetPlatformIDs(1, &platform_id, NULL);
clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL);
Create context
// Example
cl_context context = NULL;
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err);
Create command queue
// Example
clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err);
Create memory buffers
//Use
clCreateBuffer()
Copy host variables to device
//Use
clEnqueueWriteBuffer()
Create program from kernel
//Use
clCreateProgramWithBinary() or clCreateProgram()
Build program and create opencl kernel
//Use
clBuildProgram()
Set Global and local work size
a. Set args kernel
//Use
clSetKernelArg()
b. Invoke kernel
//Use
clEnqueueNDRangeKernel()
Copy the termination variable or results back
//Use
clEnqueueReadBuffer()