#define __CL_ENABLE_EXCEPTIONS
#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenCL/cl.hpp>
#else
#include <CL/cl.hpp>
#endif
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <string>
#include <fstream>

std::string OpenCLSource ;

// Some interesting data for the vectors
float InitialData1[32] = {37.,50.,54.,50.,56.,0.,43.,43.,74.,71.,32.,36.,16.,43.,56.,100.,50.,25.,15.,17.,74.,71.,32.,36.,16.,43.,56.,100.,50.,25.,15.,17.};
float InitialData2[32] = {35.,51.,54.,58.,55.,32.,36.,69.,27.,39.,35.,40.,16.,44.,55.,14.,58.,75.,18.,15.,74.,71.,32.,36.,16.,43.,56.,100.,50.,25.,15.,17.};

// Number of elements in the vectors to be added
#define SIZE_TEST 40960

int main(void)
{
  std::ifstream SourceFile("../VectorAdd.cl");
  if (SourceFile.good())
  while (!SourceFile.eof())
  {
    char s[2048];
    SourceFile.getline(s,2048);
    OpenCLSource.append(s);
    OpenCLSource.append("\n");
  }
  else
    return -1;
  SourceFile.close();
  std::cout << OpenCLSource << std::endl;
// Two integer source vectors in Host memory
  float HostVector1[SIZE_TEST], HostVector2[SIZE_TEST];
  float HostOutputVector[SIZE_TEST];
// Initialize with some interesting repeating data
  for (int c = 0; c < SIZE_TEST; c++)
  {
    HostVector1[c] = InitialData1[c%32]+c/32;
    HostVector2[c] = InitialData2[c%32];
  }
  cl_int err = CL_SUCCESS;
  try
  {
    std::vector<cl::Platform> platforms;
    cl::Platform::get(&platforms);
    if (platforms.size() == 0)
    {
      std::cout << "Platform size 0\n";
      return -1;
    }
    std::cout << platforms.size() << " platforms" << std::endl;
    cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0};
    
    cl::Context context(CL_DEVICE_TYPE_ALL, properties);
    std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
    std::cout << devices.size() << " devices" << std::endl;
    cl_uint num;
    devices[0].getInfo(CL_DEVICE_MAX_COMPUTE_UNITS  ,&num);
    std::cout << num << " max_compute units" << std::endl;
    size_t wid;
    devices[0].getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS  ,&num);
    std::cout << num << " max_dimensions" << std::endl;
    devices[0].getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE ,&wid);
    std::cout << wid << " max_workgroup size" << std::endl;
    
    cl::Program::Sources source(1,std::make_pair(OpenCLSource.c_str(),OpenCLSource.size()));
    cl::Program program_ = cl::Program(context, source);
    program_.build(devices);




    cl::CommandQueue queue(context, devices[0], 0, &err);


    cl::Buffer GPUOutputVector(context, CL_MEM_WRITE_ONLY, sizeof(float) * SIZE_TEST, NULL, &err);
    for (int i=0;i<10000;++i)
    {
/*      for (int c = 0; c < SIZE_TEST; c++)
      {
      HostVector1[c] = InitialData1[(c+i)%32];
      HostVector2[c] = InitialData2[c%32];
      }*/
      float toto=1;
      cl::Buffer GPUVector1(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * SIZE_TEST, HostVector1, &err);
      cl::Buffer GPUVector2(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * SIZE_TEST, HostVector2, &err);
      cl::Buffer GPUConst1(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) , &toto, &err);
      cl::Event event;
      cl::Kernel kernel(program_, "VectorAdd", &err);
      kernel.setArg( 0, GPUOutputVector);
      kernel.setArg( 1, GPUVector1);
      kernel.setArg( 2, GPUVector2);
      kernel.setArg( 3, GPUConst1);
      queue.enqueueNDRangeKernel(kernel,cl::NullRange,cl::NDRange(SIZE_TEST),cl::NullRange,NULL,&event);
      event.wait();
    }
    cl::Event event;
    queue.enqueueReadBuffer(GPUOutputVector, CL_TRUE, 0, SIZE_TEST * sizeof(float), HostOutputVector, NULL, &event);
    event.wait();

// Print out the results
    for (int Rows = 0; Rows < 32 /*SIZE_TEST/32*/; Rows++)
    {
      for (int c = 0; c <32; c++)
        std::cout << HostOutputVector[Rows * 32 + c] << " " ;
      std::cout << std::endl;
    }
    std::cout <<"The End"  << std::endl;

  }
  catch (cl::Error err)
  {
    std::cerr
      << "ERROR: "
      << err.what()
      << "("
      << err.err()
      << ")"
      << std::endl;
  }
  return EXIT_SUCCESS;
}


