diff --git a/src/device.hpp b/src/device.hpp deleted file mode 100644 index 451d38a..0000000 --- a/src/device.hpp +++ /dev/null @@ -1,131 +0,0 @@ -#ifndef DEVICE_H -#define DEVICE_H - -#include - -#include -#include -#include -#include - -#include "opencl.hpp" - -class CalcEngine { -private: - cl_platform_id platform; - cl_device_id device; - cl_context context; - std::string device_name; - - void initializeOpenCL() { - OpenCL::checkError(clGetPlatformIDs(1, &platform, nullptr), - "clGetPlatformIDs"); - OpenCL::checkError( - clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, nullptr), - "clGetDeviceIDs"); - - char name[128]; - clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(name), name, nullptr); - device_name = name; - - context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, nullptr); - if (!context) { - throw OpenCLException(-1, "clCreateContext"); - } - - std::cout << "OpenCL initialized successfully" << std::endl; - } - - void cleanup() { - if (context) - clReleaseContext(context); - } - -public: - CalcEngine() { initializeOpenCL(); } - - ~CalcEngine() { cleanup(); } - - const cl_platform_id getPlatform() const { return platform; }; - const cl_device_id getDevice() const { return device; }; - const cl_context getContext() const { return context; }; - const std::string getDeviceName() const { return device_name; }; - - void printDeviceInfo() const { - std::cout << "Using OpenCL device: " << device_name << std::endl; - } - - cl_mem createBuffer(cl_mem_flags flags, size_t size, void *host_ptr) { - cl_int ret; - cl_mem buffer = clCreateBuffer(context, flags, size, host_ptr, &ret); - OpenCL::checkError(ret, "clCreateBuffer"); - return buffer; - } - - cl_kernel loadKernel(const std::string &filename) { - std::string kernelSource = OpenCL::readFile(filename); - - const char *source_str = kernelSource.c_str(); - cl_program program = - clCreateProgramWithSource(context, 1, &source_str, nullptr, nullptr); - if (!program) { - throw OpenCLException(-1, "clCreateProgramWithSource"); - } - - cl_int ret = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr); - if (ret != CL_SUCCESS) { - size_t log_size; - clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, - &log_size); - std::vector log(log_size); - clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, - log.data(), nullptr); - - std::cerr << "Build log:\n" << log.data() << std::endl; - throw OpenCLException(ret, "clBuildProgram"); - } - - cl_kernel kernel = clCreateKernel(program, "matrix_mult", nullptr); - if (!kernel) { - throw OpenCLException(-1, "clCreateKernel"); - } - - std::cout << "Kernel loaded and compiled successfully" << std::endl; - - return kernel; - } - - void runKernel(cl_command_queue queue, cl_kernel kernel, int M, int N) { - size_t globalSize[2] = {static_cast(M), static_cast(N)}; - OpenCL::checkError(clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, - globalSize, nullptr, 0, nullptr, - nullptr), - "clEnqueueNDRangeKernel"); - } - - void readResult(cl_command_queue queue, cl_mem buf, - std::vector &result) { - OpenCL::checkError(clEnqueueReadBuffer(queue, buf, CL_TRUE, 0, - result.size() * sizeof(float), - result.data(), 0, nullptr, nullptr), - "clEnqueueReadBuffer"); - } - - void setKernelArgs(cl_kernel kernel, cl_mem bufA, cl_mem bufB, cl_mem bufC, - int M, int N, int K) { - OpenCL::checkError(clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufA), - "clSetKernelArg for A"); - OpenCL::checkError(clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufB), - "clSetKernelArg for B"); - OpenCL::checkError(clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufC), - "clSetKernelArg for C"); - OpenCL::checkError(clSetKernelArg(kernel, 3, sizeof(int), &M), - "clSetKernelArg for M"); - OpenCL::checkError(clSetKernelArg(kernel, 4, sizeof(int), &N), - "clSetKernelArg for N"); - OpenCL::checkError(clSetKernelArg(kernel, 5, sizeof(int), &K), - "clSetKernelArg for K"); - } -}; - -#endif diff --git a/src/kernels/matrix.cl b/src/kernels/matrix.cl new file mode 100644 index 0000000..327a83f --- /dev/null +++ b/src/kernels/matrix.cl @@ -0,0 +1,72 @@ +__kernel void mult(__global float* A, __global float* B, __global float* C, + int M, int N, int K) { + const int tile_size = 16; + + int local_i = get_local_id(0); + int local_j = get_local_id(1); + int local_size_i = get_local_size(0); + int local_size_j = get_local_size(1); + + int global_i = get_group_id(0) * local_size_i + local_i; + int global_j = get_group_id(1) * local_size_j + local_j; + + __local float tile_A[16][16]; + __local float tile_B[16][16]; + + float sum = 0.0f; + + int num_tiles = (K + tile_size - 1) / tile_size; + + for (int tile = 0; tile < num_tiles; tile++) { + int tile_offset = tile * tile_size; + + int load_i_A = tile_offset + local_i; + int load_j_A = tile_offset + local_j; + + if (global_i < M && load_j_A < K) { + tile_A[local_j][local_i] = A[global_i * K + load_j_A]; + } else { + tile_A[local_j][local_i] = 0.0f; + } + + int load_i_B = tile_offset + local_i; + int load_j_B = tile_offset + local_j; + + if (load_i_B < K && global_j < N) { + tile_B[local_j][local_i] = B[load_i_B * N + global_j]; + } else { + tile_B[local_j][local_i] = 0.0f; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + #pragma unroll + for (int k = 0; k < tile_size; k++) { + sum += tile_A[k][local_i] * tile_B[local_j][k]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (global_i < M && global_j < N) { + C[global_i * N + global_j] = sum; + } +} + +__kernel void mult_sc(__global float* A, __global float* B, float scalar, int M, int N) { + int i = get_global_id(0); + int j = get_global_id(1); + B[i * N + j] = A[i * N + j] * scalar; +} + +__kernel void add(__global float* A, __global float* B, __global float* C, float a, float b, int M, int N) { + int i = get_global_id(0); + int j = get_global_id(1); + C[i * N + j] = (A[i * N + j] * a) + (B[i * N + j] * b); +} + +__kernel void add_sc(__global float* A, __global float* B, float scalar, int M, int N) { + int i = get_global_id(0); + int j = get_global_id(1); + B[i * N + j] = A[i * N + j] + scalar; +} diff --git a/src/main.cpp b/src/main.cpp index 78859bc..414de44 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,75 +1,136 @@ -#include - +#include +#include +#include #include #include -#include "device.hpp" -#include "matrix.hpp" +#include "./math/math.hpp" -class MutableMatrix : public Matrix { -private: - CalcEngine *calcEngine; - cl_command_queue queue; - cl_kernel kernel; +typedef Matrices::CPU Matrix; +typedef MutableMatrices::CPU MutableMatrix; -public: - MutableMatrix(CalcEngine &calcEngine, size_t rows, size_t cols, float *matrix) - : Matrix(calcEngine, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, rows, cols, - matrix) { - this->calcEngine = &calcEngine; - kernel = calcEngine.loadKernel("matrix_mult.cl"); - queue = clCreateCommandQueue(calcEngine.getContext(), - calcEngine.getDevice(), 0, nullptr); - if (!queue) { - throw OpenCLException(-1, "clCreateCommandQueue"); - } +OpenCL openCL; + +std::vector generateRandomMatrix(int rows, int cols) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(-1.0f, 1.0f); + + std::vector matrix(rows * cols); + for (int i = 0; i < rows * cols; ++i) { + matrix[i] = dis(gen); } - - ~MutableMatrix() { - if (queue) - clReleaseCommandQueue(queue); + return matrix; +} +std::vector generateIdentityMatrix(int size) { + std::vector matrix(size * size, 0.0f); + for (int i = 0; i < size; ++i) { + matrix[i * size + i] = 1.0f; } - - void mult_by(Matrix &m) { - if (cols != m.getRows()) { - throw std::invalid_argument("Invalid matrix dimensions"); - } - - cl_mem b = - calcEngine->createBuffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - rows * m.getCols() * sizeof(float), nullptr); - - calcEngine->setKernelArgs(kernel, buf, m.getBuf(), b, rows, m.getCols(), - cols); - calcEngine->runKernel(queue, kernel, rows, m.getCols()); - - clReleaseMemObject(buf); - buf = b; - } - - std::vector exportMatrix() { - std::vector C(rows, cols); - calcEngine->readResult(queue, buf, C); - return C; - } -}; + return matrix; +} int main() { - CalcEngine calcEngine; - calcEngine.printDeviceInfo(); + const int SIZE = 1024; - float matrixA[2 * 3] = {1, 2, 3, 4, 5, 6}; - MutableMatrix a(calcEngine, 2, 3, matrixA); + std::cout << "Testing with " << SIZE << "x" << SIZE << " matrices..." + << std::endl; - float matrixB[3 * 2] = {1, 2, 3, 4, 5, 6}; - Matrix b(calcEngine, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 3, 2, matrixB); + std::vector matrixA = generateRandomMatrix(SIZE, SIZE); + std::vector matrixB = generateRandomMatrix(SIZE, SIZE); + std::vector matrixC = generateRandomMatrix(SIZE, SIZE); - a.mult_by(b); + // std::vector matrixA = generateIdentityMatrix(SIZE); + // std::vector matrixB = generateIdentityMatrix(SIZE); + // std::vector matrixC = generateIdentityMatrix(SIZE); - std::vector v = a.exportMatrix(); - for (const auto &element : v) { - std::cout << element << " "; + // Тестирование на CPU + { + std::cout << "\n=== CPU Version ===" << std::endl; + + auto start = std::chrono::high_resolution_clock::now(); + + MutableMatrices::CPU a(SIZE, SIZE, matrixA); + Matrices::CPU b(SIZE, SIZE, matrixB); + Matrices::CPU c(SIZE, SIZE, matrixC); + + auto gen_end = std::chrono::high_resolution_clock::now(); + + auto op_start = std::chrono::high_resolution_clock::now(); + + for (int i = 0; i < 10; i++) { + a.mult(b); + } + + auto op_end = std::chrono::high_resolution_clock::now(); + + std::vector v = a.toVector(); + + auto total_end = std::chrono::high_resolution_clock::now(); + + auto gen_duration = + std::chrono::duration_cast(gen_end - start); + auto op_duration = std::chrono::duration_cast( + op_end - op_start); + auto total_duration = std::chrono::duration_cast( + total_end - start); + + std::cout << "Matrix generation time: " << gen_duration.count() << " ms" + << std::endl; + std::cout << "Operations time: " << op_duration.count() << " ms" + << std::endl; + std::cout << "Total time: " << total_duration.count() << " ms" << std::endl; + + std::cout << "First few elements: "; + for (int i = 0; i < 5 && i < v.size(); ++i) { + std::cout << v[i] << " "; + } + std::cout << std::endl; + } + + // Тестирование на GPU + { + std::cout << "\n=== GPU Version ===" << std::endl; + + auto start = std::chrono::high_resolution_clock::now(); + + MutableMatrices::GPU a(SIZE, SIZE, matrixA); + Matrices::GPU b(SIZE, SIZE, matrixB); + Matrices::GPU c(SIZE, SIZE, matrixC); + + auto gen_end = std::chrono::high_resolution_clock::now(); + + auto op_start = std::chrono::high_resolution_clock::now(); + + for (int i = 0; i < 10; i++) { + a.mult(b); + } + + auto op_end = std::chrono::high_resolution_clock::now(); + + std::vector v = a.toVector(); + + auto total_end = std::chrono::high_resolution_clock::now(); + + auto gen_duration = + std::chrono::duration_cast(gen_end - start); + auto op_duration = std::chrono::duration_cast( + op_end - op_start); + auto total_duration = std::chrono::duration_cast( + total_end - start); + + std::cout << "Matrix generation time: " << gen_duration.count() << " ms" + << std::endl; + std::cout << "Operations time: " << op_duration.count() << " ms" + << std::endl; + std::cout << "Total time: " << total_duration.count() << " ms" << std::endl; + + std::cout << "First few elements: "; + for (int i = 0; i < 5 && i < v.size(); ++i) { + std::cout << v[i] << " "; + } + std::cout << std::endl; } return 0; -} +} \ No newline at end of file diff --git a/src/math/math.hpp b/src/math/math.hpp new file mode 100644 index 0000000..7b0c4f4 --- /dev/null +++ b/src/math/math.hpp @@ -0,0 +1,11 @@ +#ifndef MATH_H +#define MATH_H + +#define __CL_ENABLE_EXCEPTIONS +#include + +#include "matrix.hpp" +#include "mutable_matrix.hpp" +#include "opencl/opencl.hpp" + +#endif \ No newline at end of file diff --git a/src/math/matrix.hpp b/src/math/matrix.hpp new file mode 100644 index 0000000..8959009 --- /dev/null +++ b/src/math/matrix.hpp @@ -0,0 +1,126 @@ +#ifndef MATRIX_H +#define MATRIX_H + +#include "./opencl/opencl.hpp" +#include +#include +#include +#include + +class IMatrix { +protected: + int rows; + int cols; + + void validateDimensions(int rows, int cols) { + if (rows <= 0 || cols <= 0) { + throw std::invalid_argument("Matrix dimensions must be positive"); + } + } + + void checkIndices(int row, int col) const { + if (row < 0 || row >= rows || col < 0 || col >= cols) { + throw std::out_of_range("Matrix indices out of range"); + } + } + +public: + IMatrix(int rows, int cols) : rows(rows), cols(cols) {} + virtual ~IMatrix() = default; + virtual int getRows() const = 0; + virtual int getCols() const = 0; + virtual const std::vector toVector() const = 0; +}; + +namespace Matrices { +class CPU; + +class GPU : public IMatrix { +protected: + cl::Buffer *buffer; + cl::CommandQueue queue; + +public: + GPU(int rows, int cols, const std::vector &matrix) + : IMatrix(rows, cols), queue(openCL.getContext(), openCL.getDevice()) { + validateDimensions(rows, cols); + if (matrix.size() != static_cast(rows * cols)) { + throw std::invalid_argument("Matrix data size doesn't match dimensions"); + } + + buffer = new cl::Buffer( + openCL.getContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + rows * cols * sizeof(float), const_cast(matrix.data())); + } + ~GPU() { delete buffer; } + + GPU(const GPU &) = delete; + GPU &operator=(const GPU &) = delete; + GPU(GPU &&other) = default; + GPU &operator=(GPU &&other) = default; + + int getRows() const override { return rows; } + int getCols() const override { return cols; } + size_t getSize() const { return rows * cols; } + + const cl::Buffer *getBuffer() const { return buffer; } + + const std::vector toVector() const { + std::vector result(rows * cols); + queue.enqueueReadBuffer(*buffer, CL_TRUE, 0, rows * cols * sizeof(float), + result.data()); + queue.finish(); + return result; + } + + CPU toCPU() const; +}; + +class CPU : public IMatrix { +protected: + std::vector data; + +public: + CPU(int rows, int cols, float value = 0.0f) + : IMatrix(rows, cols), data(rows * cols, value) { + validateDimensions(rows, cols); + } + + CPU(int rows, int cols, const std::vector &matrix) + : IMatrix(rows, cols), data(matrix) { + validateDimensions(rows, cols); + if (matrix.size() != static_cast(rows * cols)) { + throw std::invalid_argument("Data size doesn't match matrix dimensions"); + } + } + + CPU(const CPU &) = default; + CPU &operator=(const CPU &) = default; + CPU(CPU &&) = default; + CPU &operator=(CPU &&) = default; + ~CPU() override = default; + + float &operator()(int row, int col) { + checkIndices(row, col); + return data[row * cols + col]; + } + + const float &operator()(int row, int col) const { + checkIndices(row, col); + return data[row * cols + col]; + } + + const std::vector toVector() const { return data; } + + int getRows() const override { return rows; } + int getCols() const override { return cols; } + size_t getSize() const { return data.size(); } + + GPU toGPU(OpenCL &openCL) const { return GPU(rows, cols, data); } +}; + +CPU GPU::toCPU() const { return CPU(rows, cols, toVector()); } + +} // namespace Matrices + +#endif \ No newline at end of file diff --git a/src/math/mutable_matrix.hpp b/src/math/mutable_matrix.hpp new file mode 100644 index 0000000..812eade --- /dev/null +++ b/src/math/mutable_matrix.hpp @@ -0,0 +1,194 @@ +#ifndef MUTABLE_MATRIX_H +#define MUTABLE_MATRIX_H + +#include "./opencl/opencl.hpp" + +#include "matrix.hpp" + +template class IMutableMatrix { + static_assert(std::is_base_of::value, + "T must be derived from IMatrix"); + +public: + virtual void mult(T &m) = 0; + virtual void mult(float s) = 0; + virtual void add(T &m, float a, float b) = 0; + virtual void add(float a) = 0; + + void validateMultDimensions(T &a, T &b) { + if (a.getRows() != b.getCols()) { + throw std::invalid_argument( + "Invalid matrix dimensions for multiplication"); + } + } + void validateSameDimensions(T &a, T &b) { + if (a.getRows() != b.getRows() || a.getCols() != b.getCols()) { + throw std::invalid_argument("Invalid matrix dimensions for addition"); + } + } +}; + +namespace MutableMatrices { +class GPU : public Matrices::GPU, public IMutableMatrix { +private: + enum class Method { MULT, SCALAR_MULT, ADD, SCALAR_ADD }; + std::unordered_map kernels; + std::unordered_map kernelsNames = { + {Method::MULT, "mult"}, + {Method::SCALAR_MULT, "mult_sc"}, + {Method::ADD, "add"}, + {Method::SCALAR_ADD, "add_sc"}}; + + static void CL_CALLBACK releaseBuffer(cl_event event, cl_int status, + void *buf) { + if (status == CL_COMPLETE) { + // std::cout << "Kernel complete!" << std::endl; + delete buf; + } + } + +public: + GPU(int rows, int cols, const std::vector &matrix) + : Matrices::GPU(rows, cols, matrix) { + for (const auto &[method, kernelName] : kernelsNames) { + kernels[method] = + cl::Kernel(openCL.getProgram(OpenCL::Program::MATRIX), kernelName); + } + } + + void mult(Matrices::GPU &m) { + validateMultDimensions(*this, m); + + cl::Buffer *b = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE, + rows * m.getCols() * sizeof(float)); + + const int tile_size = 16; + cl::NDRange local_size(tile_size, tile_size); + cl::NDRange global_size(((rows + tile_size - 1) / tile_size) * tile_size, + ((m.getCols() + tile_size - 1) / tile_size) * + tile_size); + + kernels[Method::MULT].setArg(0, *buffer); + kernels[Method::MULT].setArg(1, *m.getBuffer()); + kernels[Method::MULT].setArg(2, *b); + kernels[Method::MULT].setArg(3, rows); + kernels[Method::MULT].setArg(4, m.getCols()); + kernels[Method::MULT].setArg(5, cols); + cl::Event event; + queue.enqueueNDRangeKernel(kernels[Method::MULT], cl::NullRange, + global_size, local_size, nullptr, &event); + + event.setCallback(CL_COMPLETE, releaseBuffer, buffer); + buffer = b; + cols = m.getCols(); + } + + void mult(float scalar) { + cl::Buffer *b = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE, + rows * cols * sizeof(float)); + kernels[Method::SCALAR_MULT].setArg(0, *buffer); + kernels[Method::SCALAR_MULT].setArg(1, *b); + kernels[Method::SCALAR_MULT].setArg(2, scalar); + kernels[Method::SCALAR_MULT].setArg(3, rows); + kernels[Method::SCALAR_MULT].setArg(4, cols); + cl::Event event; + queue.enqueueNDRangeKernel(kernels[Method::SCALAR_MULT], cl::NullRange, + cl::NDRange(rows, cols), cl::NullRange, nullptr, + &event); + + event.setCallback(CL_COMPLETE, releaseBuffer, buffer); + buffer = b; + } + + void add(Matrices::GPU &m, float a = 1.0f, float b = 1.0f) { + validateSameDimensions(*this, m); + + cl::Buffer *buf = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE, + rows * cols * sizeof(float)); + kernels[Method::ADD].setArg(0, *buffer); + kernels[Method::ADD].setArg(1, *m.getBuffer()); + kernels[Method::ADD].setArg(2, *buf); + kernels[Method::ADD].setArg(3, a); + kernels[Method::ADD].setArg(4, b); + kernels[Method::ADD].setArg(5, rows); + kernels[Method::ADD].setArg(6, cols); + cl::Event event; + queue.enqueueNDRangeKernel(kernels[Method::ADD], cl::NullRange, + cl::NDRange(rows, cols), cl::NullRange, nullptr, + &event); + + event.setCallback(CL_COMPLETE, releaseBuffer, buffer); + buffer = buf; + } + + void add(float scalar) { + cl::Buffer *b = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE, + rows * cols * sizeof(float)); + kernels[Method::SCALAR_ADD].setArg(0, *buffer); + kernels[Method::SCALAR_ADD].setArg(1, *b); + kernels[Method::SCALAR_ADD].setArg(2, scalar); + kernels[Method::SCALAR_ADD].setArg(3, rows); + kernels[Method::SCALAR_ADD].setArg(4, cols); + cl::Event event; + queue.enqueueNDRangeKernel(kernels[Method::SCALAR_ADD], cl::NullRange, + cl::NDRange(rows, cols), cl::NullRange, nullptr, + &event); + + event.setCallback(CL_COMPLETE, releaseBuffer, buffer); + buffer = b; + } +}; +class CPU : public Matrices::CPU, public IMutableMatrix { + +public: + CPU(int rows, int cols, const std::vector &matrix) + : Matrices::CPU(rows, cols, matrix) {} + + void mult(Matrices::CPU &m) { + validateMultDimensions(*this, m); + + std::vector result(rows * m.getCols(), 0.0f); + for (int i = 0; i < rows; i++) { + for (int j = 0; j < m.getCols(); j++) { + float sum = 0.0f; + for (int k = 0; k < cols; k++) { + sum += (*this)(i, k) * m(k, j); + } + result[i * m.getCols() + j] = sum; + } + } + data = std::move(result); + cols = m.getCols(); + } + + void mult(float scalar) { + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + data[i * cols + j] *= scalar; + } + } + } + + void add(Matrices::CPU &m, float a = 1.0f, float b = 1.0f) { + validateSameDimensions(*this, m); + + std::vector result(rows * cols, 0.0f); + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + result[i * cols + j] = ((*this)(i, j) * a) + (m(i, j) * b); + } + } + data = std::move(result); + } + + void add(float scalar) { + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + data[i * cols + j] += scalar; + } + } + } +}; +}; // namespace MutableMatrices + +#endif \ No newline at end of file diff --git a/src/math/opencl/opencl.hpp b/src/math/opencl/opencl.hpp new file mode 100644 index 0000000..7a314c2 --- /dev/null +++ b/src/math/opencl/opencl.hpp @@ -0,0 +1,171 @@ +#ifndef OPENCL_H +#define OPENCL_H + +#include +#include +#include +#include +#include +#include +#include + +class OpenCL { +public: + enum class Program { MATRIX, MATH, IMAGE_PROCESSING }; + +private: + cl::Device device; + cl::Context context; + cl::CommandQueue defaultQueue; + + std::unordered_map programs; + std::unordered_map programPaths = { + {Program::MATRIX, "./kernels/matrix.cl"}}; + + std::string readProgram(const std::string &filePath) { + std::ifstream file(filePath, std::ios::binary); + if (!file.is_open()) { + throw std::runtime_error("Cannot open file: " + filePath); + } + + std::stringstream buffer; + buffer << file.rdbuf(); + return buffer.str(); + } + + cl::Program compileProgram(const std::string &file) { + std::string source = readProgram(file); + cl::Program program(context, source); + try { + program.build({device}); + } catch (cl::Error &e) { + std::string build_log = + program.getBuildInfo(device); + std::cerr << "Build log:\n" << build_log << std::endl; + throw; + } + return program; + } + + void loadPrograms() { + for (const auto &[programType, filePath] : programPaths) { + try { + programs[programType] = compileProgram(filePath); + std::cout << "Loaded program: " << filePath << std::endl; + } catch (const std::exception &e) { + std::cerr << "Failed to load program " << filePath << ": " << e.what() + << std::endl; + } + } + } + + void initializeDevice() { + std::vector platforms; + cl::Platform::get(&platforms); + + if (platforms.empty()) { + throw std::runtime_error("No OpenCL platforms found"); + } + + std::vector devices; + bool deviceFound = false; + + for (const auto &platform : platforms) { + try { + platform.getDevices(CL_DEVICE_TYPE_GPU, &devices); + if (!devices.empty()) { + deviceFound = true; + break; + } + } catch (const cl::Error &) { + continue; + } + } + + if (!deviceFound) { + for (const auto &platform : platforms) { + try { + platform.getDevices(CL_DEVICE_TYPE_CPU, &devices); + if (!devices.empty()) { + deviceFound = true; + break; + } + } catch (const cl::Error &) { + continue; + } + } + } + + if (!deviceFound) { + throw std::runtime_error("No suitable OpenCL devices found"); + } + + device = devices[0]; + context = cl::Context(device); + defaultQueue = cl::CommandQueue(context, device); + + std::cout << "Using device: " << device.getInfo() + << "\nPlatform: " << platforms[0].getInfo() + << "\nCompute units: " + << device.getInfo() + << "\nGlobal memory: " + << device.getInfo() / (1024 * 1024) + << " MB" << std::endl; + } + +public: + OpenCL() { + try { + initializeDevice(); + loadPrograms(); + } catch (const cl::Error &e) { + std::cerr << "OpenCL error: " << e.what() << " (" << e.err() << ")" + << std::endl; + throw; + } + } + + OpenCL(const OpenCL &) = delete; + OpenCL &operator=(const OpenCL &) = delete; + OpenCL(OpenCL &&) = delete; + OpenCL &operator=(OpenCL &&) = delete; + + cl::Device &getDevice() { return device; } + cl::Context &getContext() { return context; } + cl::CommandQueue &getDefaultQueue() { return defaultQueue; } + + cl::Program &getProgram(Program program) { + auto it = programs.find(program); + if (it == programs.end()) { + throw std::invalid_argument("Program not loaded: " + + std::to_string(static_cast(program))); + } + return it->second; + } + + void printDeviceInfo() const { + std::cout << "=== OpenCL Device Info ===" << std::endl; + std::cout << "Name: " << device.getInfo() << std::endl; + std::cout << "Vendor: " << device.getInfo() << std::endl; + std::cout << "Version: " << device.getInfo() + << std::endl; + std::cout << "Compute Units: " + << device.getInfo() << std::endl; + std::cout << "Global Memory: " + << device.getInfo() / (1024 * 1024) + << " MB" << std::endl; + std::cout << "Local Memory: " + << device.getInfo() / 1024 << " KB" + << std::endl; + std::cout << "Max Work Group Size: " + << device.getInfo() << std::endl; + } + + bool hasProgram(Program program) const { + return programs.find(program) != programs.end(); + } +}; + +extern OpenCL openCL; + +#endif \ No newline at end of file diff --git a/src/matrix.hpp b/src/matrix.hpp deleted file mode 100644 index 840b8f0..0000000 --- a/src/matrix.hpp +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef MATRIX_H -#define MATRIX_H - -#include - -#include "device.hpp" - -class Matrix { -protected: - cl_mem buf; - size_t rows; - size_t cols; - -public: - Matrix(CalcEngine &calcEngine, cl_mem_flags flags, size_t rows, size_t cols, - float *matrix) - : rows(rows), cols(cols) { - if (rows == 0 || cols == 0) { - throw std::invalid_argument("Размеры матрицы должны быть больше 0"); - } - buf = calcEngine.createBuffer(flags, rows * cols * sizeof(float), matrix); - } - - ~Matrix() { clReleaseMemObject(buf); } - - size_t getRows() const { return rows; } - size_t getCols() const { return cols; } - - const cl_mem getBuf() const { return buf; } -}; - -#endif diff --git a/src/matrix_mult.cl b/src/matrix_mult.cl deleted file mode 100644 index 5cee616..0000000 --- a/src/matrix_mult.cl +++ /dev/null @@ -1,9 +0,0 @@ -__kernel void matrix_mult(__global float* A, __global float* B, __global float* C, int M, int N, int K) { - int i = get_global_id(0); - int j = get_global_id(1); - float sum = 0.0f; - for (int k = 0; k < K; k++) { - sum += A[i * K + k] * B[k * N + j]; - } - C[i * N + j] = sum; -} \ No newline at end of file diff --git a/src/opencl.hpp b/src/opencl.hpp deleted file mode 100644 index a7cd705..0000000 --- a/src/opencl.hpp +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef OPENCL_H -#define OPENCL_H - -#include -#include -#include - -class OpenCLException : public std::runtime_error { -private: - cl_int error_code; - -public: - OpenCLException(cl_int error, const std::string &operation) - : std::runtime_error("Error during " + operation + ": " + - std::to_string(error)), - error_code(error) {} - - cl_int getErrorCode() const { return error_code; } -}; - -class OpenCL { -public: - static void checkError(cl_int error, const std::string &operation) { - if (error != CL_SUCCESS) { - throw OpenCLException(error, operation); - } - } - - static std::string readFile(const std::string &filename) { - std::ifstream file(filename); - if (!file.is_open()) { - throw std::runtime_error("Failed to open kernel file: " + filename); - } - - return std::string((std::istreambuf_iterator(file)), - std::istreambuf_iterator()); - } -}; - -#endif