mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-04 04:40:40 +04:00
Work
This commit is contained in:
@@ -4,13 +4,23 @@
|
||||
|
||||
#include "../tensor.hpp"
|
||||
|
||||
#include <random>
|
||||
|
||||
template <typename T, int Dim> class Tensor : public ITensor<T, Dim> {
|
||||
private:
|
||||
cl::Buffer *data_ = nullptr;
|
||||
cl::Event event_ = cl::Event();
|
||||
cl::Event *event_ = new cl::Event();
|
||||
|
||||
template <typename... Events> std::vector<cl::Event> all(Events &&...events) {
|
||||
return {std::forward<Events>(events)...};
|
||||
class AutoEventList {
|
||||
private:
|
||||
std::vector<cl::Event> events_;
|
||||
|
||||
public:
|
||||
AutoEventList(std::initializer_list<cl::Event> events) : events_(events) {}
|
||||
operator const std::vector<cl::Event> *() const { return &events_; }
|
||||
};
|
||||
template <typename... Events> AutoEventList all(Events &&...events) {
|
||||
return AutoEventList{std::forward<Events>(events)...};
|
||||
}
|
||||
|
||||
void createBuf(size_t size) {
|
||||
@@ -22,15 +32,16 @@ private:
|
||||
|
||||
void fillBuf(const std::vector<T> &data) {
|
||||
createBuf(data.size());
|
||||
// event_ = event?!
|
||||
openCL.getQueue().enqueueWriteBuffer(*data_, CL_FALSE, 0,
|
||||
data.size() * sizeof(T), data.data(),
|
||||
all(event_), &event_);
|
||||
all(*event_), event_);
|
||||
}
|
||||
void fillBuf(size_t size, cl::Buffer *data) {
|
||||
createBuf(size);
|
||||
openCL.getQueue().enqueueWriteBuffer(*data_, CL_FALSE, 0,
|
||||
data.size() * sizeof(T), other..data(),
|
||||
all(event_), &event_);
|
||||
void fillBuf(const Tensor &other) {
|
||||
createBuf(other.getSize());
|
||||
openCL.getQueue().enqueueCopyBuffer(
|
||||
*other.getData(), *data_, 0, 0, other.getSize() * sizeof(T),
|
||||
all(*event_, *other.getEvent()), event_);
|
||||
}
|
||||
|
||||
public:
|
||||
@@ -56,57 +67,154 @@ public:
|
||||
: ITensor(shape) {
|
||||
fillBuf(data);
|
||||
}
|
||||
Tensor(const std::array<size_t, Dim> &shape, T min, T max) {
|
||||
Tensor(const std::array<size_t, Dim> &shape, T min, T max) : ITensor(shape) {
|
||||
static std::random_device rd;
|
||||
static std::mt19937 gen(rd());
|
||||
std::vector<T> data(getSize());
|
||||
if constexpr (std::is_integral_v<T>) {
|
||||
std::uniform_int_distribution<T> dis(min, max);
|
||||
for (T &e : data_)
|
||||
for (T &e : data)
|
||||
e = dis(gen);
|
||||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
std::uniform_real_distribution<T> dis(min, max);
|
||||
for (T &e : data_)
|
||||
for (T &e : data)
|
||||
e = dis(gen);
|
||||
} else
|
||||
throw std::invalid_argument("Invalid randomized type");
|
||||
fillBuf(data);
|
||||
}
|
||||
|
||||
Tensor(const Tensor &other) : ITensor(other.shape) {
|
||||
createBuf(other.getSize());
|
||||
q.enqueueCopyBuffer(*other.buffer, *buffer, 0, 0,
|
||||
other.getSize() * sizeof(float));
|
||||
Tensor(const Tensor &other) : ITensor(other) {
|
||||
event_ = other.event_;
|
||||
fillBuf(other);
|
||||
}
|
||||
Tensor &operator=(const Tensor &other);
|
||||
Tensor(Tensor &&other) noexcept;
|
||||
Tensor &operator=(Tensor &&other) noexcept;
|
||||
~Tensor() = default;
|
||||
Tensor &operator=(const Tensor &other) {
|
||||
ITensor::operator=(other);
|
||||
event_ = other.event_;
|
||||
fillBuf(other);
|
||||
return *this;
|
||||
}
|
||||
Tensor(Tensor &&other) noexcept : ITensor(std::move(other)) {
|
||||
data_ = other.data_;
|
||||
event_ = other.event_;
|
||||
other.data = nullptr;
|
||||
}
|
||||
Tensor &operator=(Tensor &&other) noexcept {
|
||||
ITensor::operator=(std::move(other));
|
||||
data_ = other.data_;
|
||||
event_ = other.event_;
|
||||
other.data = nullptr;
|
||||
return *this;
|
||||
}
|
||||
~Tensor() {
|
||||
if (data_ != nullptr)
|
||||
delete data_;
|
||||
};
|
||||
|
||||
T &operator[](size_t i);
|
||||
const T &operator[](size_t i) const;
|
||||
template <typename... Indices> T &operator()(Indices... indices);
|
||||
template <typename... Indices> const T &operator()(Indices... indices) const;
|
||||
const cl::Buffer *getData() const { return data_; }
|
||||
const cl::Event *getEvent() const { return event_; }
|
||||
|
||||
// T &operator[](size_t i);
|
||||
// const T &operator[](size_t i) const;
|
||||
// template <typename... Indices> T &operator()(Indices... indices);
|
||||
// template <typename... Indices> const T &operator()(Indices... indices)
|
||||
// const;
|
||||
|
||||
using ITensor::operator+;
|
||||
using ITensor::operator-;
|
||||
|
||||
Tensor operator+() const override;
|
||||
Tensor operator-() const override;
|
||||
Tensor operator+() override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::POSITIVE);
|
||||
kernel.setArg(0, *data_);
|
||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||
cl::NDRange(getSize()),
|
||||
cl::NullRange, all(*event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor &operator+=(const T &scalar) override;
|
||||
Tensor operator-() override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::NEGATIVE);
|
||||
kernel.setArg(0, *data_);
|
||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||
cl::NDRange(getSize()),
|
||||
cl::NullRange, all(*event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor &operator*=(const T &scalar) override;
|
||||
Tensor &operator+=(const T scalar) override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::S_ADD);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, scalar);
|
||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||
cl::NDRange(getSize()),
|
||||
cl::NullRange, all(*event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor &operator+=(const Tensor &other) override;
|
||||
Tensor &operator*=(const T scalar) override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::S_MULT);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, scalar);
|
||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||
cl::NDRange(getSize()),
|
||||
cl::NullRange, all(*event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor &operator*=(const Tensor &other) override;
|
||||
Tensor &operator+=(const Tensor &other) override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_ADD);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, *other.getData());
|
||||
openCL.getQueue().enqueueNDRangeKernel(
|
||||
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
||||
all(*event_, *other.event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor<T, Dim == 1 ? 0 : 2> operator%(const Tensor &other) const;
|
||||
Tensor &operator*=(const Tensor &other) override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_HADAMARD);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, *other.getData());
|
||||
openCL.getQueue().enqueueNDRangeKernel(
|
||||
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
||||
all(*event_, *other.event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
#define TILE_SIZE 16
|
||||
Tensor<T, Dim == 1 ? 0 : 2> operator%(const Tensor &other) const {
|
||||
static_assert(Dim == 1 || Dim == 2,
|
||||
"Inner product is only defined for vectors and matrices");
|
||||
if constexpr (Dim == 1) {
|
||||
static_assert(false, "TODO vector scalar multiplication");
|
||||
} else if constexpr (Dim == 2) {
|
||||
if (shape_[axes_[1]] != other.shape_[other.axes_[0]])
|
||||
throw std::invalid_argument(
|
||||
"Matrix dimensions must match for multiplication");
|
||||
size_t m = shape_[axes_[0]];
|
||||
size_t k = shape_[axes_[1]];
|
||||
size_t n = other.shape_[other.axes_[1]];
|
||||
Tensor<T, 2> result({m, n});
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_MULT);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, *other.getData());
|
||||
kernel.setArg(2, *result.getData());
|
||||
kernel.setArg(3, m);
|
||||
kernel.setArg(4, n);
|
||||
kernel.setArg(5, k);
|
||||
cl::NDRange global_size(((m + TILE_SIZE - 1) / TILE_SIZE) * TILE_SIZE,
|
||||
((n + TILE_SIZE - 1) / TILE_SIZE) * TILE_SIZE);
|
||||
cl::NDRange local_size(TILE_SIZE, TILE_SIZE);
|
||||
openCL.getQueue().enqueueNDRangeKernel(
|
||||
kernel, cl::NullRange, global_size, local_size,
|
||||
all(*event_, *other.event_), result.event_);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
std::string toString() const override;
|
||||
};
|
||||
|
||||
#include "tensor.tpp"
|
||||
|
||||
#include "../fabric.hpp"
|
||||
#include "../fabric.hpp"
|
||||
|
||||
Reference in New Issue
Block a user