c++ - OpenCL 2-D array multiply -


i've begun experiment opencl. i'm trying make kernel multiply 2 2-d arrays. i've done vectors, in 2-d results first row. i've tried implementing of solutions found every single of them keeps messing first row. image execution: http://i.imgur.com/ljqsurv.png

here host file:

#include "stdafx.h" #include <cl/cl.hpp>  #include <vector> #include <iostream>  #include "util.hpp" // utility library     #define __cl_enable_exceptions #define rows (5) #define columns (5)  #include "metrics.h"  /*start main()*/  int main(void) {     int = 4;     /*define vectors operands , result*/      float** h_x = new float*[rows];     float** h_y = new float*[rows];     float** h_s = new float*[rows];      (int = 0; < rows; ++i){         h_x[i] = new float[columns];     }      (int = 0; < rows; ++i){         h_y[i] = new float[columns];     }      (int = 0; < rows; ++i){         h_s[i] = new float[columns];     }      // fill vectors , b random float values      (int = 0; < rows; i++)     {         (int j = 0; j < columns; j++){             h_x[i][j] = rand() / (float)rand_max;             h_y[i][j] = rand() / (float)rand_max;             h_s[i][j] = 0.0;         }        }      /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/      // platforms (drivers)      std::vector<cl::platform> all_platforms;     cl::platform::get(&all_platforms);       if (all_platforms.size() == 0){ // check issues         std::cout << " no platforms found. check opencl installation!\n";         exit(1);     }      cl::platform default_platform = all_platforms[0];     std::cout << "using platform: " << default_platform.getinfo<cl_platform_name>() << "\n";      // default device of default platform      std::vector<cl::device> all_devices;     default_platform.getdevices(cl_device_type_all, &all_devices);      if (all_devices.size() == 0){ // check issues         std::cout << " no devices found. check opencl installation!\n";         exit(1);     }      cl::device default_device = all_devices[0];     std::cout << "using device: " << default_device.getinfo<cl_device_name>() << "\n";      // create opencl context      cl::context context({ default_device });      cl::program program(context, util::loadprogram("saxy_kernel.cl"), true);      if (program.build({ default_device }) != cl_success){         std::cout << " error building: " << program.getbuildinfo<cl_program_build_log>(default_device) << "\n";         getchar();         exit(1);     }      // create buffers on device     cl::buffer buffer_x(context, cl_mem_read_write, sizeof(float)* rows*columns);     cl::buffer buffer_y(context, cl_mem_read_write, sizeof(float)* rows*columns);     cl::buffer buffer_s(context, cl_mem_read_write, sizeof(float)* rows*columns);     cl::buffer buffer_a(context, cl_mem_read_write, sizeof(int));      //create queue push commands device.     cl::commandqueue queue(context, default_device);       startcounter();     //write arrays , b device     queue.enqueuewritebuffer(buffer_x, cl_true, 0, sizeof(float)* rows*columns, &h_x[0][0]);     queue.enqueuewritebuffer(buffer_y, cl_true, 0, sizeof(float)* rows*columns, &h_y[0][0]);     queue.enqueuewritebuffer(buffer_a, cl_true, 0, sizeof(int), &a);      //run kernel     cl::kernel kernel_add = cl::kernel(program, "simple_add");     kernel_add.setarg(0, buffer_x);     kernel_add.setarg(1, buffer_y);     kernel_add.setarg(2, buffer_s);     kernel_add.setarg(3, buffer_a);      queue.enqueuendrangekernel(kernel_add, cl::nullrange, cl::ndrange(5,5), cl::nullrange);     queue.finish();      //read result c device array c     queue.enqueuereadbuffer(buffer_s, cl_true, 0, sizeof(float)* rows * columns, &h_s[0][0]);      std::cout << "kernel execution time: " << getcounter() << "ms \n";      /*print vectors*/     std::cout << "\nmatrix #1: \n";     (int = 0; i<rows; i++){         std::cout << "\n";         (int j = 0; j<columns; j++){             std::cout << "" << h_x[i][j] << "\t ";         }     }      std::cout << "\n\nmatrix #2: \n";     (int = 0; i<rows; i++){         std::cout << "\n";         (int j = 0; j<columns; j++){             std::cout << "" << h_y[i][j] << "\t ";         }     }      std::cout << "\n\nresult: \n";     (int = 0; i<rows; i++){         std::cout << "\n";         (int j = 0; j<columns; j++){             std::cout << "" << h_s[i][j] << "\t ";         }     }     getchar();     return 0; } 

and here kernel:

__kernel void kernel simple_add(    __global float* x,     __global float* y,     __global float* s,     __global int *a){     s[get_global_id(0)] = x[get_global_id(0)] * y[get_global_id(0)];  /* var defs    int k;    int = get_global_id(0);    int j = get_global_id(1);    float tmp;     if ( (i < 5) && (j < 5))    {        tmp = 0.0;        for(k=0;k<5;k++)            tmp += x[i*5+k] * y[k*5+j];        s[i*5+j] = tmp;    }*/ } 

i'm sure i'm doing really wrong, can't find out it. appreciated.

your kernel code fine, way creating opencl buffers , launching kernel. issue in way data represented on host, , how copying device.

your opencl buffers 1d arrays, necessary. host arrays 2d however, mean adjacent rows not contiguous (a 2d array array of pointers).

the (simplest) fix linearise storage on host, match data-layout of device:

float* h_x = new float[rows*columns]; (int = 0; < rows; ++i){     (int j = 0; j < columns; ++j){       h_x[j + i*columns] = rand() / (float)rand_max;;     } } 

Comments

Popular posts from this blog

Android : Making Listview full screen -

javascript - Parse JSON from the body of the POST -

javascript - Chrome Extension: Interacting with iframe embedded within popup -