Tensor math OpenCL lib

This commit is contained in:
2025-11-19 18:05:11 +04:00
parent c1874212ae
commit bd8b26c35a
12 changed files with 361 additions and 355 deletions

View File

@@ -27,9 +27,9 @@ cl::Program OpenCL::compileProgram(const std::string &file) {
}
return program;
}
void OpenCL::loadPrograms() {
void OpenCL::loadPrograms(std::string &programsBasePath) {
for (const auto &entry : programPaths) {
programs[entry.first] = compileProgram(entry.second);
programs[entry.first] = compileProgram(programsBasePath + entry.second);
std::cout << "Loaded program: " << entry.second << std::endl;
}
}
@@ -89,10 +89,12 @@ void OpenCL::initializeDevice() {
<< " MB" << std::endl;
}
OpenCL::OpenCL() {
OpenCL::OpenCL() {}
void OpenCL::init(std::string programsBasePath) {
try {
initializeDevice();
loadPrograms();
loadPrograms(programsBasePath);
} catch (const cl::Error &e) {
std::cerr << "OpenCL error: " << e.what() << " (" << e.err() << ")"
<< std::endl;

View File

@@ -26,10 +26,10 @@ private:
std::unordered_map<Program, cl::Program> programs;
std::unordered_map<Program, std::string> programPaths = {
{Program::ATOMIC, "./opencl/kernels/atomic.cl"},
{Program::SCALAR, "./opencl/kernels/scalar.cl"},
{Program::TENSOR, "./opencl/kernels/tensor.cl"},
{Program::FUSION, "./opencl/kernels/fusion.cl"}};
{Program::ATOMIC, "opencl/kernels/atomic.cl"},
{Program::SCALAR, "opencl/kernels/scalar.cl"},
{Program::TENSOR, "opencl/kernels/tensor.cl"},
{Program::FUSION, "opencl/kernels/fusion.cl"}};
std::unordered_map<Method, Program> methodPrograms = {
{Method::POSITIVE, Program::ATOMIC},
{Method::NEGATIVE, Program::ATOMIC},
@@ -48,13 +48,15 @@ private:
std::string readProgram(const std::string &filePath);
cl::Program compileProgram(const std::string &file);
void loadPrograms();
void loadPrograms(std::string &programsBasePath);
void initializeDevice();
public:
OpenCL();
void init(std::string programsBasePath);
OpenCL(const OpenCL &) = delete;
OpenCL &operator=(const OpenCL &) = delete;
OpenCL(OpenCL &&) = delete;

View File

@@ -4,13 +4,12 @@
#include "../tensor.hpp"
#include <iostream>
#include <random>
#include <sstream>
template <typename T, int Dim> class Tensor : public ITensor<T, Dim> {
private:
cl::Buffer *data_ = nullptr;
cl::Event event_ = cl::Event();
mutable cl::Event event_ = cl::Event();
class AutoEventList {
private:
@@ -114,16 +113,10 @@ public:
const cl::Buffer *getData() const { return data_; }
const cl::Event &getEvent() const { return event_; }
// T &operator[](size_t i);
// const T &operator[](size_t i) const;
// template <typename... Indices> T &operator()(Indices... indices);
// template <typename... Indices> const T &operator()(Indices... indices)
// const;
using ITensor::operator+;
using ITensor::operator-;
Tensor operator+() override {
Tensor operator+() const override {
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::POSITIVE);
kernel.setArg(0, *data_);
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
@@ -132,7 +125,7 @@ public:
return *this;
}
Tensor operator-() override {
Tensor operator-() const override {
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::NEGATIVE);
kernel.setArg(0, *data_);
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
@@ -191,17 +184,17 @@ public:
if (shape_[axes_[1]] != other.shape_[other.axes_[0]])
throw std::invalid_argument(
"Matrix dimensions must match for multiplication");
int m = (int)shape_[axes_[0]];
int k = (int)shape_[axes_[1]];
int n = (int)other.shape_[other.axes_[1]];
size_t m = shape_[axes_[0]];
size_t k = shape_[axes_[1]];
size_t n = other.shape_[other.axes_[1]];
Tensor<T, 2> result({m, n});
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_MULT);
kernel.setArg(0, *data_);
kernel.setArg(1, *other.getData());
kernel.setArg(2, *result.getData());
kernel.setArg(3, m);
kernel.setArg(4, n);
kernel.setArg(5, k);
kernel.setArg(3, (int)m);
kernel.setArg(4, (int)n);
kernel.setArg(5, (int)k);
cl::NDRange global_size(((m + TILE_SIZE - 1) / TILE_SIZE) * TILE_SIZE,
((n + TILE_SIZE - 1) / TILE_SIZE) * TILE_SIZE);
cl::NDRange local_size(TILE_SIZE, TILE_SIZE);
@@ -214,50 +207,11 @@ public:
std::string toString() const override {
std::vector<float> result(getSize());
openCL.getQueue().enqueueReadBuffer(
*data_, CL_TRUE, 0, getSize() * sizeof(T), result.data(), all(event_));
std::ostringstream oss;
if constexpr (Dim == 0) {
oss << "Scalar<" << typeid(T).name() << ">: " << result[0];
} else if constexpr (Dim == 1) {
oss << "Vector<" << typeid(T).name() << ">(" << shape_[0] << "): [";
for (size_t i = 0; i < getSize(); ++i) {
oss << result[i];
if (i < getSize() - 1)
oss << ", ";
}
oss << "]";
} else if constexpr (Dim == 2) {
oss << "Matrix<" << typeid(T).name() << ">(" << shape_[axes_[0]] << "x"
<< shape_[axes_[1]] << "):";
for (size_t i = 0; i < shape_[axes_[0]]; ++i) {
oss << "\n [";
for (size_t j = 0; j < shape_[axes_[1]]; ++j) {
oss << result[i * shape_[axes_[0]] + j];
if (j < shape_[axes_[1]] - 1)
oss << ", ";
}
oss << "]";
}
} else {
oss << "Tensor" << Dim << "D<" << typeid(T).name() << ">" << "[";
for (size_t i = 0; i < Dim; ++i) {
oss << shape_[axes_[i]];
if (i < Dim - 1)
oss << "x";
}
oss << "]: [";
size_t show = std::min(getSize(), size_t(10));
for (size_t i = 0; i < show; ++i) {
oss << result[i];
if (i < show - 1)
oss << ", ";
}
if (getSize() > 10)
oss << ", ...";
oss << "]";
}
return oss.str();
openCL.getQueue().enqueueReadBuffer(*data_, CL_FALSE, 0,
getSize() * sizeof(T), result.data(),
all(event_), &event_);
event_.wait();
return ITensor::format(result);
}
};