mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-03 20:30:39 +04:00
Work
This commit is contained in:
@@ -19,7 +19,7 @@ else
|
||||
endif
|
||||
|
||||
BUILD_DIR = build
|
||||
COMMON_SRC =
|
||||
COMMON_SRC = opencl/opencl.cpp
|
||||
|
||||
PYTHON_PATH = $(shell python -c "from sysconfig import get_paths; print(get_paths()['data'])")
|
||||
PYTHON_INCLUDE = $(shell python -c "import sysconfig; print(sysconfig.get_config_var('CONFINCLUDEPY'))")
|
||||
|
||||
@@ -38,12 +38,12 @@ public:
|
||||
using ITensor::operator+;
|
||||
using ITensor::operator-;
|
||||
|
||||
Tensor operator+() const override;
|
||||
Tensor operator-() const override;
|
||||
Tensor operator+() override;
|
||||
Tensor operator-() override;
|
||||
|
||||
Tensor &operator+=(const T &scalar) override;
|
||||
Tensor &operator+=(const T scalar) override;
|
||||
|
||||
Tensor &operator*=(const T &scalar) override;
|
||||
Tensor &operator*=(const T scalar) override;
|
||||
|
||||
Tensor &operator+=(const Tensor &other) override;
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ template <typename T, int Dim>
|
||||
Tensor<T, Dim>::Tensor(const std::array<size_t, Dim> &shape,
|
||||
const std::vector<T> &data)
|
||||
: Tensor(shape) {
|
||||
if (data.size() != data_.size())
|
||||
if (data.size() != getSize())
|
||||
throw std::invalid_argument("Invalid fill data size");
|
||||
data_ = data;
|
||||
}
|
||||
@@ -79,15 +79,13 @@ const T &Tensor<T, Dim>::operator()(Indices... indices) const {
|
||||
}
|
||||
|
||||
// ===== OPERATORS =====
|
||||
template <typename T, int Dim>
|
||||
Tensor<T, Dim> Tensor<T, Dim>::operator+() const {
|
||||
template <typename T, int Dim> Tensor<T, Dim> Tensor<T, Dim>::operator+() {
|
||||
Tensor result = *this;
|
||||
for (T &e : result.data_)
|
||||
e = +e;
|
||||
return result;
|
||||
}
|
||||
template <typename T, int Dim>
|
||||
Tensor<T, Dim> Tensor<T, Dim>::operator-() const {
|
||||
template <typename T, int Dim> Tensor<T, Dim> Tensor<T, Dim>::operator-() {
|
||||
Tensor result = *this;
|
||||
for (T &e : result.data_)
|
||||
e = -e;
|
||||
@@ -95,14 +93,14 @@ Tensor<T, Dim> Tensor<T, Dim>::operator-() const {
|
||||
}
|
||||
|
||||
template <typename T, int Dim>
|
||||
Tensor<T, Dim> &Tensor<T, Dim>::operator+=(const T &scalar) {
|
||||
Tensor<T, Dim> &Tensor<T, Dim>::operator+=(const T scalar) {
|
||||
for (T &e : data_)
|
||||
e += scalar;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T, int Dim>
|
||||
Tensor<T, Dim> &Tensor<T, Dim>::operator*=(const T &scalar) {
|
||||
Tensor<T, Dim> &Tensor<T, Dim>::operator*=(const T scalar) {
|
||||
for (T &e : data_)
|
||||
e *= scalar;
|
||||
return *this;
|
||||
@@ -111,7 +109,7 @@ Tensor<T, Dim> &Tensor<T, Dim>::operator*=(const T &scalar) {
|
||||
template <typename T, int Dim>
|
||||
Tensor<T, Dim> &Tensor<T, Dim>::operator+=(const Tensor &other) {
|
||||
checkItHasSameShape(other);
|
||||
for (size_t i = 0; i < data_.size(); ++i)
|
||||
for (size_t i = 0; i < getSize(); ++i)
|
||||
data_[i] += other.data_[i];
|
||||
return *this;
|
||||
}
|
||||
@@ -119,7 +117,7 @@ Tensor<T, Dim> &Tensor<T, Dim>::operator+=(const Tensor &other) {
|
||||
template <typename T, int Dim>
|
||||
Tensor<T, Dim> &Tensor<T, Dim>::operator*=(const Tensor &other) {
|
||||
checkItHasSameShape(other);
|
||||
for (size_t i = 0; i < data_.size(); ++i)
|
||||
for (size_t i = 0; i < getSize(); ++i)
|
||||
data_[i] *= other.data_[i];
|
||||
return *this;
|
||||
}
|
||||
@@ -130,10 +128,10 @@ Tensor<T, Dim>::operator%(const Tensor &other) const {
|
||||
static_assert(Dim == 1 || Dim == 2,
|
||||
"Inner product is only defined for vectors and matrices");
|
||||
if constexpr (Dim == 1) {
|
||||
if (data_.size() != other.data_.size())
|
||||
if (getSize() != other.getSize())
|
||||
throw std::invalid_argument("Vector sizes must match for inner product");
|
||||
T result_val = T(0);
|
||||
for (size_t i = 0; i < data_.size(); ++i)
|
||||
for (size_t i = 0; i < getSize(); ++i)
|
||||
result_val += data_[i] * other.data_[i];
|
||||
return Tensor<T, 0>({}, {result_val});
|
||||
} else if constexpr (Dim == 2) {
|
||||
@@ -163,9 +161,9 @@ template <typename T, int Dim> std::string Tensor<T, Dim>::toString() const {
|
||||
oss << "Scalar<" << typeid(T).name() << ">: " << data_[0];
|
||||
} else if constexpr (Dim == 1) {
|
||||
oss << "Vector<" << typeid(T).name() << ">(" << shape_[0] << "): [";
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
for (size_t i = 0; i < getSize(); ++i) {
|
||||
oss << data_[i];
|
||||
if (i < data_.size() - 1)
|
||||
if (i < getSize() - 1)
|
||||
oss << ", ";
|
||||
}
|
||||
oss << "]";
|
||||
@@ -189,13 +187,13 @@ template <typename T, int Dim> std::string Tensor<T, Dim>::toString() const {
|
||||
oss << "x";
|
||||
}
|
||||
oss << "]: [";
|
||||
size_t show = std::min(data_.size(), size_t(10));
|
||||
size_t show = std::min(getSize(), size_t(10));
|
||||
for (size_t i = 0; i < show; ++i) {
|
||||
oss << data_[i];
|
||||
if (i < show - 1)
|
||||
oss << ", ";
|
||||
}
|
||||
if (data_.size() > 10)
|
||||
if (getSize() > 10)
|
||||
oss << ", ...";
|
||||
oss << "]";
|
||||
}
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
#include "cpu/tensor.hpp"
|
||||
// #include "cpu/tensor.hpp"
|
||||
#include "opencl/tensor.hpp"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
// TODO: GENERIC KERNELS
|
||||
// TODO: Scalar mult
|
||||
|
||||
int main() {
|
||||
Tensor<float, 2> a = Tensor<float, 2>({2, 4});
|
||||
std::cout << a.toString();
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
__kernel void positive(__global float *A, __global float *B) {
|
||||
__kernel void positive(__global float *A) {
|
||||
int i = get_global_id(0);
|
||||
B[i] = +A[i];
|
||||
A[i] = +A[i];
|
||||
}
|
||||
|
||||
__kernel void negative(__global float *A, __global float *B) {
|
||||
__kernel void negative(__global float *A) {
|
||||
int i = get_global_id(0);
|
||||
B[i] = -A[i];
|
||||
A[i] = -A[i];
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
__kernel void add(__global float *A, __global float *B, float scalar) {
|
||||
__kernel void add(__global float *A, float scalar) {
|
||||
int i = get_global_id(0);
|
||||
B[i] = A[i] + scalar;
|
||||
A[i] += scalar;
|
||||
}
|
||||
|
||||
__kernel void mult(__global float *A, __global float *B, float scalar) {
|
||||
__kernel void mult(__global float *A, float scalar) {
|
||||
int i = get_global_id(0);
|
||||
B[i] = A[i] * scalar;
|
||||
}
|
||||
|
||||
@@ -1,134 +1,54 @@
|
||||
__kernel void add(__global float *A, __global float *B, __global float *C,
|
||||
float x) {
|
||||
__kernel void add(__global float *A, __global float *B) {
|
||||
int i = get_global_id(0);
|
||||
C[i] = A[i] + (B[i] * x);
|
||||
A[i] += B[i];
|
||||
}
|
||||
__kernel void hadamard_mult(__global float *A, __global float *B) {
|
||||
int i = get_global_id(0);
|
||||
A[i] *= B[i];
|
||||
}
|
||||
|
||||
#define TILE_SIZE 16
|
||||
__kernel void mult(__global float *A, __global float *B, __global float *C,
|
||||
float x) {
|
||||
int i = get_global_id(0);
|
||||
C[i] = A[i] * (B[i] * x);
|
||||
}
|
||||
|
||||
float activate(float x, const int activation_type, const float alpha) {
|
||||
switch (activation_type) {
|
||||
case 0: // LINEAR
|
||||
return x;
|
||||
case 1: // SIGMOID
|
||||
return 1.0f / (1.0f + exp(-x));
|
||||
case 2: // TANH
|
||||
return tanh(x);
|
||||
case 3: // RELU
|
||||
return fmax(0.0f, x);
|
||||
case 4: // LEAKY_RELU
|
||||
return (x > 0.0f) ? x : alpha * x;
|
||||
case 5: // ELU
|
||||
return (x > 0.0f) ? x : alpha * (exp(x) - 1.0f);
|
||||
default:
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void mult_small(__global float *A, __global float *B,
|
||||
__global float *C, __global float *bias,
|
||||
const int activation_type, const float alpha,
|
||||
const int M, const int N, const int K,
|
||||
const int transpose_B) {
|
||||
const int row = get_global_id(0);
|
||||
const int col = get_global_id(1);
|
||||
|
||||
if (row < M && col < N) {
|
||||
const int M, const int N, const int K) {
|
||||
|
||||
const int row = get_global_id(0);
|
||||
const int col = get_global_id(1);
|
||||
const int local_row = get_local_id(0);
|
||||
const int local_col = get_local_id(1);
|
||||
|
||||
__local float tile_A[TILE_SIZE][TILE_SIZE];
|
||||
__local float tile_B[TILE_SIZE][TILE_SIZE];
|
||||
|
||||
float sum = 0.0f;
|
||||
for (int k = 0; k < K; k++) {
|
||||
float a_val = A[row * K + k];
|
||||
|
||||
float b_val;
|
||||
if (transpose_B) {
|
||||
b_val = B[col * K + k];
|
||||
} else {
|
||||
b_val = B[k * N + col];
|
||||
}
|
||||
|
||||
sum += a_val * b_val;
|
||||
|
||||
for (int t = 0; t < (K - 1) / TILE_SIZE + 1; t++) {
|
||||
|
||||
int a_col = t * TILE_SIZE + local_col;
|
||||
if (row < M && a_col < K) {
|
||||
tile_A[local_row][local_col] = A[row * K + a_col];
|
||||
} else {
|
||||
tile_A[local_row][local_col] = 0.0f;
|
||||
}
|
||||
|
||||
int b_row = t * TILE_SIZE + local_row;
|
||||
if (b_row < K && col < N) {
|
||||
tile_B[local_row][local_col] = B[b_row * N + col];
|
||||
} else {
|
||||
tile_B[local_row][local_col] = 0.0f;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
int k_max = min(TILE_SIZE, K - t * TILE_SIZE);
|
||||
for (int k = 0; k < k_max; k++) {
|
||||
sum += tile_A[local_row][k] * tile_B[k][local_col];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
float result = sum + bias[col];
|
||||
if (activation_type != 0) {
|
||||
result = activate(result, activation_type, alpha);
|
||||
|
||||
if (row < M && col < N) {
|
||||
C[row * N + col] = sum;
|
||||
}
|
||||
C[row * N + col] = result;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void mult(__global float *A, __global float *B, __global float *C,
|
||||
__global float *bias, const int activation_type,
|
||||
const float alpha, const int M, const int N, const int K,
|
||||
const int transpose_B) {
|
||||
const int tile_size = 16;
|
||||
|
||||
int local_i = get_local_id(0);
|
||||
int local_j = get_local_id(1);
|
||||
int local_size_i = get_local_size(0);
|
||||
int local_size_j = get_local_size(1);
|
||||
|
||||
int global_i = get_group_id(0) * local_size_i + local_i;
|
||||
int global_j = get_group_id(1) * local_size_j + local_j;
|
||||
|
||||
__local float tile_A[16][16];
|
||||
__local float tile_B[16][16];
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
int num_tiles = (K + tile_size - 1) / tile_size;
|
||||
|
||||
for (int tile = 0; tile < num_tiles; tile++) {
|
||||
int tile_offset = tile * tile_size;
|
||||
|
||||
// Загрузка tile_A (без изменений)
|
||||
int load_i_A = tile_offset + local_i;
|
||||
int load_j_A = tile_offset + local_j;
|
||||
|
||||
if (global_i < M && load_j_A < K) {
|
||||
tile_A[local_j][local_i] = A[global_i * K + load_j_A];
|
||||
} else {
|
||||
tile_A[local_j][local_i] = 0.0f;
|
||||
}
|
||||
|
||||
// Загрузка tile_B с учетом транспонирования
|
||||
int load_i_B = tile_offset + local_i;
|
||||
int load_j_B = tile_offset + local_j;
|
||||
|
||||
if (transpose_B) {
|
||||
// B транспонирована: обращаем индексы
|
||||
if (load_i_B < N && global_j < K) {
|
||||
tile_B[local_j][local_i] = B[global_j * N + load_i_B];
|
||||
} else {
|
||||
tile_B[local_j][local_i] = 0.0f;
|
||||
}
|
||||
} else {
|
||||
// B не транспонирована (оригинальная логика)
|
||||
if (load_i_B < K && global_j < N) {
|
||||
tile_B[local_j][local_i] = B[load_i_B * N + global_j];
|
||||
} else {
|
||||
tile_B[local_j][local_i] = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < tile_size; ++k) {
|
||||
sum += tile_A[k][local_i] * tile_B[local_j][k];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (global_i < M && global_j < N) {
|
||||
float result = sum + bias[global_j];
|
||||
if (activation_type != 0) {
|
||||
result = activate(result, activation_type, alpha);
|
||||
}
|
||||
C[global_i * N + global_j] = result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -102,13 +102,24 @@ OpenCL::OpenCL() {
|
||||
|
||||
cl::Program &OpenCL::getProgram(Program program) {
|
||||
auto it = programs.find(program);
|
||||
if (it == programs.end()) {
|
||||
if (it == programs.end())
|
||||
throw std::invalid_argument("Program not loaded: " +
|
||||
std::to_string(static_cast<int>(program)));
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
cl::Kernel OpenCL::createKernel(Method method) {
|
||||
auto methodProgram = methodPrograms.find(method);
|
||||
if (methodProgram == methodPrograms.end())
|
||||
throw std::invalid_argument("Not found program for method: " +
|
||||
std::to_string(static_cast<int>(method)));
|
||||
auto methodName = methodNames.find(method);
|
||||
if (methodName == methodNames.end())
|
||||
throw std::invalid_argument("Not found name for method: " +
|
||||
std::to_string(static_cast<int>(method)));
|
||||
return cl::Kernel(getProgram(methodProgram->second), methodName->second);
|
||||
}
|
||||
|
||||
void OpenCL::printDeviceInfo() const {
|
||||
std::cout << "=== OpenCL Device Info ===" << std::endl;
|
||||
std::cout << "Name: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
|
||||
@@ -8,6 +8,15 @@
|
||||
|
||||
class OpenCL {
|
||||
public:
|
||||
enum class Method {
|
||||
POSITIVE,
|
||||
NEGATIVE,
|
||||
S_ADD,
|
||||
S_MULT,
|
||||
T_ADD,
|
||||
T_HADAMARD,
|
||||
T_MULT,
|
||||
};
|
||||
enum class Program { ATOMIC, SCALAR, TENSOR, FUSION };
|
||||
|
||||
private:
|
||||
@@ -21,6 +30,21 @@ private:
|
||||
{Program::SCALAR, "./opencl/kernels/scalar.cl"},
|
||||
{Program::TENSOR, "./opencl/kernels/tensor.cl"},
|
||||
{Program::FUSION, "./opencl/kernels/fusion.cl"}};
|
||||
std::unordered_map<Method, Program> methodPrograms = {
|
||||
{Method::POSITIVE, Program::ATOMIC},
|
||||
{Method::NEGATIVE, Program::ATOMIC},
|
||||
{Method::S_ADD, Program::SCALAR},
|
||||
{Method::S_MULT, Program::SCALAR},
|
||||
{Method::T_ADD, Program::TENSOR},
|
||||
{Method::T_HADAMARD, Program::TENSOR},
|
||||
{Method::T_MULT, Program::TENSOR},
|
||||
};
|
||||
std::unordered_map<Method, std::string> methodNames = {
|
||||
{Method::POSITIVE, "positive"}, {Method::NEGATIVE, "negative"},
|
||||
{Method::S_ADD, "add"}, {Method::S_MULT, "mult"},
|
||||
{Method::T_ADD, "add"}, {Method::T_HADAMARD, "hadamard_mult"},
|
||||
{Method::T_MULT, "mult"},
|
||||
};
|
||||
|
||||
std::string readProgram(const std::string &filePath);
|
||||
cl::Program compileProgram(const std::string &file);
|
||||
@@ -41,5 +65,9 @@ public:
|
||||
const cl::CommandQueue &getQueue() { return queue; }
|
||||
|
||||
cl::Program &getProgram(Program program);
|
||||
cl::Kernel createKernel(Method method);
|
||||
|
||||
void printDeviceInfo() const;
|
||||
};
|
||||
|
||||
OpenCL openCL;
|
||||
|
||||
@@ -4,13 +4,23 @@
|
||||
|
||||
#include "../tensor.hpp"
|
||||
|
||||
#include <random>
|
||||
|
||||
template <typename T, int Dim> class Tensor : public ITensor<T, Dim> {
|
||||
private:
|
||||
cl::Buffer *data_ = nullptr;
|
||||
cl::Event event_ = cl::Event();
|
||||
cl::Event *event_ = new cl::Event();
|
||||
|
||||
template <typename... Events> std::vector<cl::Event> all(Events &&...events) {
|
||||
return {std::forward<Events>(events)...};
|
||||
class AutoEventList {
|
||||
private:
|
||||
std::vector<cl::Event> events_;
|
||||
|
||||
public:
|
||||
AutoEventList(std::initializer_list<cl::Event> events) : events_(events) {}
|
||||
operator const std::vector<cl::Event> *() const { return &events_; }
|
||||
};
|
||||
template <typename... Events> AutoEventList all(Events &&...events) {
|
||||
return AutoEventList{std::forward<Events>(events)...};
|
||||
}
|
||||
|
||||
void createBuf(size_t size) {
|
||||
@@ -22,15 +32,16 @@ private:
|
||||
|
||||
void fillBuf(const std::vector<T> &data) {
|
||||
createBuf(data.size());
|
||||
// event_ = event?!
|
||||
openCL.getQueue().enqueueWriteBuffer(*data_, CL_FALSE, 0,
|
||||
data.size() * sizeof(T), data.data(),
|
||||
all(event_), &event_);
|
||||
all(*event_), event_);
|
||||
}
|
||||
void fillBuf(size_t size, cl::Buffer *data) {
|
||||
createBuf(size);
|
||||
openCL.getQueue().enqueueWriteBuffer(*data_, CL_FALSE, 0,
|
||||
data.size() * sizeof(T), other..data(),
|
||||
all(event_), &event_);
|
||||
void fillBuf(const Tensor &other) {
|
||||
createBuf(other.getSize());
|
||||
openCL.getQueue().enqueueCopyBuffer(
|
||||
*other.getData(), *data_, 0, 0, other.getSize() * sizeof(T),
|
||||
all(*event_, *other.getEvent()), event_);
|
||||
}
|
||||
|
||||
public:
|
||||
@@ -56,57 +67,154 @@ public:
|
||||
: ITensor(shape) {
|
||||
fillBuf(data);
|
||||
}
|
||||
Tensor(const std::array<size_t, Dim> &shape, T min, T max) {
|
||||
Tensor(const std::array<size_t, Dim> &shape, T min, T max) : ITensor(shape) {
|
||||
static std::random_device rd;
|
||||
static std::mt19937 gen(rd());
|
||||
std::vector<T> data(getSize());
|
||||
if constexpr (std::is_integral_v<T>) {
|
||||
std::uniform_int_distribution<T> dis(min, max);
|
||||
for (T &e : data_)
|
||||
for (T &e : data)
|
||||
e = dis(gen);
|
||||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
std::uniform_real_distribution<T> dis(min, max);
|
||||
for (T &e : data_)
|
||||
for (T &e : data)
|
||||
e = dis(gen);
|
||||
} else
|
||||
throw std::invalid_argument("Invalid randomized type");
|
||||
fillBuf(data);
|
||||
}
|
||||
|
||||
Tensor(const Tensor &other) : ITensor(other.shape) {
|
||||
createBuf(other.getSize());
|
||||
q.enqueueCopyBuffer(*other.buffer, *buffer, 0, 0,
|
||||
other.getSize() * sizeof(float));
|
||||
Tensor(const Tensor &other) : ITensor(other) {
|
||||
event_ = other.event_;
|
||||
fillBuf(other);
|
||||
}
|
||||
Tensor &operator=(const Tensor &other);
|
||||
Tensor(Tensor &&other) noexcept;
|
||||
Tensor &operator=(Tensor &&other) noexcept;
|
||||
~Tensor() = default;
|
||||
Tensor &operator=(const Tensor &other) {
|
||||
ITensor::operator=(other);
|
||||
event_ = other.event_;
|
||||
fillBuf(other);
|
||||
return *this;
|
||||
}
|
||||
Tensor(Tensor &&other) noexcept : ITensor(std::move(other)) {
|
||||
data_ = other.data_;
|
||||
event_ = other.event_;
|
||||
other.data = nullptr;
|
||||
}
|
||||
Tensor &operator=(Tensor &&other) noexcept {
|
||||
ITensor::operator=(std::move(other));
|
||||
data_ = other.data_;
|
||||
event_ = other.event_;
|
||||
other.data = nullptr;
|
||||
return *this;
|
||||
}
|
||||
~Tensor() {
|
||||
if (data_ != nullptr)
|
||||
delete data_;
|
||||
};
|
||||
|
||||
T &operator[](size_t i);
|
||||
const T &operator[](size_t i) const;
|
||||
template <typename... Indices> T &operator()(Indices... indices);
|
||||
template <typename... Indices> const T &operator()(Indices... indices) const;
|
||||
const cl::Buffer *getData() const { return data_; }
|
||||
const cl::Event *getEvent() const { return event_; }
|
||||
|
||||
// T &operator[](size_t i);
|
||||
// const T &operator[](size_t i) const;
|
||||
// template <typename... Indices> T &operator()(Indices... indices);
|
||||
// template <typename... Indices> const T &operator()(Indices... indices)
|
||||
// const;
|
||||
|
||||
using ITensor::operator+;
|
||||
using ITensor::operator-;
|
||||
|
||||
Tensor operator+() const override;
|
||||
Tensor operator-() const override;
|
||||
Tensor operator+() override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::POSITIVE);
|
||||
kernel.setArg(0, *data_);
|
||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||
cl::NDRange(getSize()),
|
||||
cl::NullRange, all(*event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor &operator+=(const T &scalar) override;
|
||||
Tensor operator-() override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::NEGATIVE);
|
||||
kernel.setArg(0, *data_);
|
||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||
cl::NDRange(getSize()),
|
||||
cl::NullRange, all(*event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor &operator*=(const T &scalar) override;
|
||||
Tensor &operator+=(const T scalar) override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::S_ADD);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, scalar);
|
||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||
cl::NDRange(getSize()),
|
||||
cl::NullRange, all(*event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor &operator+=(const Tensor &other) override;
|
||||
Tensor &operator*=(const T scalar) override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::S_MULT);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, scalar);
|
||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||
cl::NDRange(getSize()),
|
||||
cl::NullRange, all(*event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor &operator*=(const Tensor &other) override;
|
||||
Tensor &operator+=(const Tensor &other) override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_ADD);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, *other.getData());
|
||||
openCL.getQueue().enqueueNDRangeKernel(
|
||||
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
||||
all(*event_, *other.event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor<T, Dim == 1 ? 0 : 2> operator%(const Tensor &other) const;
|
||||
Tensor &operator*=(const Tensor &other) override {
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_HADAMARD);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, *other.getData());
|
||||
openCL.getQueue().enqueueNDRangeKernel(
|
||||
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
||||
all(*event_, *other.event_), event_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
#define TILE_SIZE 16
|
||||
Tensor<T, Dim == 1 ? 0 : 2> operator%(const Tensor &other) const {
|
||||
static_assert(Dim == 1 || Dim == 2,
|
||||
"Inner product is only defined for vectors and matrices");
|
||||
if constexpr (Dim == 1) {
|
||||
static_assert(false, "TODO vector scalar multiplication");
|
||||
} else if constexpr (Dim == 2) {
|
||||
if (shape_[axes_[1]] != other.shape_[other.axes_[0]])
|
||||
throw std::invalid_argument(
|
||||
"Matrix dimensions must match for multiplication");
|
||||
size_t m = shape_[axes_[0]];
|
||||
size_t k = shape_[axes_[1]];
|
||||
size_t n = other.shape_[other.axes_[1]];
|
||||
Tensor<T, 2> result({m, n});
|
||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_MULT);
|
||||
kernel.setArg(0, *data_);
|
||||
kernel.setArg(1, *other.getData());
|
||||
kernel.setArg(2, *result.getData());
|
||||
kernel.setArg(3, m);
|
||||
kernel.setArg(4, n);
|
||||
kernel.setArg(5, k);
|
||||
cl::NDRange global_size(((m + TILE_SIZE - 1) / TILE_SIZE) * TILE_SIZE,
|
||||
((n + TILE_SIZE - 1) / TILE_SIZE) * TILE_SIZE);
|
||||
cl::NDRange local_size(TILE_SIZE, TILE_SIZE);
|
||||
openCL.getQueue().enqueueNDRangeKernel(
|
||||
kernel, cl::NullRange, global_size, local_size,
|
||||
all(*event_, *other.event_), result.event_);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
std::string toString() const override;
|
||||
};
|
||||
|
||||
#include "tensor.tpp"
|
||||
|
||||
#include "../fabric.hpp"
|
||||
#include "../fabric.hpp"
|
||||
|
||||
@@ -35,34 +35,33 @@ public:
|
||||
Tensor &transpose(int axis_a, int axis_b);
|
||||
Tensor &t();
|
||||
|
||||
// === Operators ===
|
||||
virtual Tensor operator+() const = 0;
|
||||
virtual Tensor operator-() const = 0;
|
||||
virtual Tensor operator+() = 0;
|
||||
virtual Tensor operator-() = 0;
|
||||
|
||||
virtual Tensor &operator+=(const T &scalar) = 0;
|
||||
virtual Tensor &operator*=(const T &scalar) = 0;
|
||||
virtual Tensor &operator+=(const T scalar) = 0;
|
||||
virtual Tensor &operator*=(const T scalar) = 0;
|
||||
|
||||
virtual Tensor &operator+=(const Tensor &other) = 0;
|
||||
virtual Tensor &operator*=(const Tensor &other) = 0;
|
||||
|
||||
Tensor operator+(const T &scalar) const;
|
||||
friend Tensor operator+(const T &scalar, const Tensor &tensor) {
|
||||
Tensor operator+(const T scalar) const;
|
||||
friend Tensor operator+(const T scalar, const Tensor &tensor) {
|
||||
return tensor + scalar;
|
||||
}
|
||||
|
||||
Tensor &operator-=(const T &scalar);
|
||||
Tensor operator-(const T &scalar) const;
|
||||
friend Tensor operator-(const T &scalar, const Tensor &tensor) {
|
||||
Tensor &operator-=(const T scalar);
|
||||
Tensor operator-(const T scalar) const;
|
||||
friend Tensor operator-(const T scalar, const Tensor &tensor) {
|
||||
return tensor + (-scalar);
|
||||
}
|
||||
|
||||
Tensor operator*(const T &scalar) const;
|
||||
friend Tensor operator*(const T &scalar, const Tensor &tensor) {
|
||||
Tensor operator*(const T scalar) const;
|
||||
friend Tensor operator*(const T scalar, const Tensor &tensor) {
|
||||
return tensor * scalar;
|
||||
}
|
||||
|
||||
Tensor &operator/=(const T &scalar);
|
||||
Tensor operator/(const T &scalar) const;
|
||||
Tensor &operator/=(const T scalar);
|
||||
Tensor operator/(const T scalar) const;
|
||||
|
||||
Tensor operator+(const Tensor &other) const;
|
||||
|
||||
|
||||
@@ -115,39 +115,39 @@ template <typename T, int Dim> ITensor<T, Dim>::Tensor &ITensor<T, Dim>::t() {
|
||||
|
||||
// ===== OPERATORS ======
|
||||
template <typename T, int Dim>
|
||||
ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator+(const T &scalar) const {
|
||||
ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator+(const T scalar) const {
|
||||
Tensor result = static_cast<const Tensor &>(*this);
|
||||
result += scalar;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T, int Dim>
|
||||
ITensor<T, Dim>::Tensor &ITensor<T, Dim>::operator-=(const T &scalar) {
|
||||
ITensor<T, Dim>::Tensor &ITensor<T, Dim>::operator-=(const T scalar) {
|
||||
*this += -scalar;
|
||||
return static_cast<Tensor &>(*this);
|
||||
}
|
||||
|
||||
template <typename T, int Dim>
|
||||
ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator-(const T &scalar) const {
|
||||
ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator-(const T scalar) const {
|
||||
Tensor result = static_cast<const Tensor &>(*this);
|
||||
result -= scalar;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T, int Dim>
|
||||
ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator*(const T &scalar) const {
|
||||
ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator*(const T scalar) const {
|
||||
Tensor result = static_cast<const Tensor &>(*this);
|
||||
result *= scalar;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T, int Dim>
|
||||
ITensor<T, Dim>::Tensor &ITensor<T, Dim>::operator/=(const T &scalar) {
|
||||
ITensor<T, Dim>::Tensor &ITensor<T, Dim>::operator/=(const T scalar) {
|
||||
*this *= T(1) / scalar;
|
||||
return static_cast<Tensor &>(*this);
|
||||
}
|
||||
template <typename T, int Dim>
|
||||
ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator/(const T &scalar) const {
|
||||
ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator/(const T scalar) const {
|
||||
Tensor result = static_cast<const Tensor &>(*this);
|
||||
result /= scalar;
|
||||
return result;
|
||||
|
||||
Reference in New Issue
Block a user