mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-03 20:30:39 +04:00
234 lines
7.7 KiB
C++
234 lines
7.7 KiB
C++
#pragma once
|
|
|
|
#include "opencl.hpp"
|
|
|
|
#include "kernels.hpp"
|
|
|
|
#include "../tensor.hpp"
|
|
|
|
#include <random>
|
|
|
|
template <typename T, int Dim> class Tensor : public ITensor<T, Dim> {
|
|
private:
|
|
cl::Buffer *data_ = nullptr;
|
|
mutable cl::Event event_ = cl::Event();
|
|
|
|
class AutoEventList {
|
|
private:
|
|
std::vector<cl::Event> events_;
|
|
|
|
public:
|
|
AutoEventList(std::initializer_list<cl::Event> events) : events_(events) {}
|
|
operator const std::vector<cl::Event> *() const { return &events_; }
|
|
};
|
|
template <typename... Events> AutoEventList all(Events &&...events) const {
|
|
return AutoEventList{std::forward<Events>(events)...};
|
|
}
|
|
|
|
void createBuf(size_t size) {
|
|
if (data_ != nullptr)
|
|
throw std::runtime_error("Tensor buffer already exists");
|
|
data_ = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
|
size * sizeof(T));
|
|
}
|
|
|
|
void fillBuf(const std::vector<T> &data) {
|
|
createBuf(data.size());
|
|
openCL.getQueue().enqueueWriteBuffer(*data_, CL_FALSE, 0,
|
|
data.size() * sizeof(T), data.data(),
|
|
nullptr, &event_);
|
|
}
|
|
void fillBuf(const Tensor &other) {
|
|
createBuf(other.getSize());
|
|
openCL.getQueue().enqueueCopyBuffer(*other.getData(), *data_, 0, 0,
|
|
other.getSize() * sizeof(T),
|
|
all(other.getEvent()), &event_);
|
|
}
|
|
|
|
static cl::Kernel createKernel(Kernels<T>::Method method) {
|
|
static Kernels<T> kernels(Kernels<T>::Vector::type4);
|
|
return kernels.create(method);
|
|
}
|
|
|
|
public:
|
|
typedef class ITensor<T, Dim> ITensor;
|
|
|
|
using ITensor::axes_;
|
|
using ITensor::checkAxisInDim;
|
|
using ITensor::checkItHasSameShape;
|
|
using ITensor::computeIndex;
|
|
using ITensor::getSize;
|
|
using ITensor::shape_;
|
|
|
|
Tensor() = delete;
|
|
Tensor(const std::array<size_t, Dim> &shape) : ITensor(shape) {
|
|
createBuf(getSize());
|
|
};
|
|
Tensor(const std::array<size_t, Dim> &shape, T value) : ITensor(shape) {
|
|
std::vector<T> data(getSize());
|
|
std::fill(data.begin(), data.end(), value);
|
|
fillBuf(data);
|
|
}
|
|
Tensor(const std::array<size_t, Dim> &shape, const std::vector<T> &data)
|
|
: ITensor(shape) {
|
|
fillBuf(data);
|
|
}
|
|
Tensor(const std::array<size_t, Dim> &shape, T min, T max) : ITensor(shape) {
|
|
static std::random_device rd;
|
|
static std::mt19937 gen(rd());
|
|
std::vector<T> data(getSize());
|
|
if constexpr (std::is_integral_v<T>) {
|
|
std::uniform_int_distribution<T> dis(min, max);
|
|
for (T &e : data)
|
|
e = dis(gen);
|
|
} else if constexpr (std::is_floating_point_v<T>) {
|
|
std::uniform_real_distribution<T> dis(min, max);
|
|
for (T &e : data)
|
|
e = dis(gen);
|
|
} else
|
|
throw std::invalid_argument("Invalid randomized type");
|
|
fillBuf(data);
|
|
}
|
|
|
|
Tensor(const Tensor &other) : ITensor(other) {
|
|
event_ = other.event_;
|
|
fillBuf(other);
|
|
}
|
|
Tensor &operator=(const Tensor &other) {
|
|
ITensor::operator=(other);
|
|
event_ = other.event_;
|
|
fillBuf(other);
|
|
return *this;
|
|
}
|
|
Tensor(Tensor &&other) noexcept : ITensor(std::move(other)) {
|
|
data_ = other.data_;
|
|
event_ = other.event_;
|
|
other.data_ = nullptr;
|
|
}
|
|
Tensor &operator=(Tensor &&other) noexcept {
|
|
ITensor::operator=(std::move(other));
|
|
data_ = other.data_;
|
|
event_ = other.event_;
|
|
other.data_ = nullptr;
|
|
return *this;
|
|
}
|
|
~Tensor() {
|
|
if (data_ != nullptr)
|
|
delete data_;
|
|
};
|
|
|
|
const cl::Buffer *getData() const { return data_; }
|
|
const cl::Event &getEvent() const { return event_; }
|
|
|
|
using ITensor::operator+;
|
|
using ITensor::operator-;
|
|
|
|
Tensor operator+() const override {
|
|
cl::Kernel kernel = createKernel(Kernels<T>::Method::POSITIVE);
|
|
kernel.setArg(0, *data_);
|
|
kernel.setArg(1, (int)getSize());
|
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
|
cl::NDRange(getSize()),
|
|
cl::NullRange, all(event_), &event_);
|
|
return *this;
|
|
}
|
|
|
|
Tensor operator-() const override {
|
|
cl::Kernel kernel = createKernel(Kernels<T>::Method::NEGATIVE);
|
|
kernel.setArg(0, *data_);
|
|
kernel.setArg(1, (int)getSize());
|
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
|
cl::NDRange(getSize()),
|
|
cl::NullRange, all(event_), &event_);
|
|
return *this;
|
|
}
|
|
|
|
Tensor &operator+=(const T scalar) override {
|
|
cl::Kernel kernel = createKernel(Kernels<T>::Method::S_ADD);
|
|
kernel.setArg(0, *data_);
|
|
kernel.setArg(1, scalar);
|
|
kernel.setArg(2, (int)getSize());
|
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
|
cl::NDRange(getSize()),
|
|
cl::NullRange, all(event_), &event_);
|
|
return *this;
|
|
}
|
|
|
|
Tensor &operator*=(const T scalar) override {
|
|
cl::Kernel kernel = createKernel(Kernels<T>::Method::S_MULT);
|
|
kernel.setArg(0, *data_);
|
|
kernel.setArg(1, scalar);
|
|
kernel.setArg(2, (int)getSize());
|
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
|
cl::NDRange(getSize()),
|
|
cl::NullRange, all(event_), &event_);
|
|
return *this;
|
|
}
|
|
|
|
Tensor &operator+=(const Tensor &other) override {
|
|
cl::Kernel kernel = createKernel(Kernels<T>::Method::T_ADD);
|
|
kernel.setArg(0, *data_);
|
|
kernel.setArg(1, *other.getData());
|
|
kernel.setArg(2, (int)getSize());
|
|
openCL.getQueue().enqueueNDRangeKernel(
|
|
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
|
all(event_, other.event_), &event_);
|
|
return *this;
|
|
}
|
|
|
|
Tensor &operator*=(const Tensor &other) override {
|
|
cl::Kernel kernel = createKernel(Kernels<T>::Method::T_HADAMARD);
|
|
kernel.setArg(0, *data_);
|
|
kernel.setArg(1, *other.getData());
|
|
kernel.setArg(2, getSize());
|
|
openCL.getQueue().enqueueNDRangeKernel(
|
|
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
|
all(event_, other.event_), &event_);
|
|
return *this;
|
|
}
|
|
|
|
#define TILE_SIZE 16
|
|
#define VEC_SIZE 4
|
|
Tensor<T, Dim == 1 ? 0 : 2> operator%(const Tensor &other) const {
|
|
static_assert(Dim == 1 || Dim == 2,
|
|
"Inner product is only defined for vectors and matrices");
|
|
if constexpr (Dim == 1) {
|
|
static_assert(false, "TODO vector scalar multiplication");
|
|
} else if constexpr (Dim == 2) {
|
|
if (shape_[axes_[1]] != other.shape_[other.axes_[0]])
|
|
throw std::invalid_argument(
|
|
"Matrix dimensions must match for multiplication");
|
|
size_t m = shape_[axes_[0]];
|
|
size_t k = shape_[axes_[1]];
|
|
size_t n = other.shape_[other.axes_[1]];
|
|
Tensor<T, 2> result({m, n});
|
|
cl::Kernel kernel = createKernel(Kernels<T>::Method::T_MULT);
|
|
kernel.setArg(0, *data_);
|
|
kernel.setArg(1, *other.getData());
|
|
kernel.setArg(2, *result.getData());
|
|
kernel.setArg(3, (int)m);
|
|
kernel.setArg(4, (int)n);
|
|
kernel.setArg(5, (int)k);
|
|
cl::NDRange global_size(m / VEC_SIZE, n);
|
|
cl::NDRange local_size(TILE_SIZE / VEC_SIZE, TILE_SIZE);
|
|
openCL.getQueue().enqueueNDRangeKernel(
|
|
kernel, cl::NullRange, global_size, local_size,
|
|
all(event_, other.event_), &result.event_);
|
|
return result;
|
|
}
|
|
}
|
|
|
|
std::string toString() const override {
|
|
std::vector<float> result(getSize());
|
|
openCL.getQueue().enqueueReadBuffer(*data_, CL_FALSE, 0,
|
|
getSize() * sizeof(T), result.data(),
|
|
all(event_), &event_);
|
|
event_.wait();
|
|
return ITensor::format(result);
|
|
}
|
|
};
|
|
|
|
#include "tensor.tpp"
|
|
|
|
#include "../fabric.hpp"
|