From 982ddcb5e0ad05c0ae13f614e987e10bad6db4a2 Mon Sep 17 00:00:00 2001 From: StepanovPlaton Date: Mon, 17 Nov 2025 23:07:33 +0400 Subject: [PATCH] Work --- src/tensor/Makefile | 2 +- src/tensor/cpu/tensor.hpp | 8 +- src/tensor/cpu/tensor.tpp | 28 +++-- src/tensor/main.cpp | 6 +- src/tensor/opencl/kernels/atomic.cl | 8 +- src/tensor/opencl/kernels/scalar.cl | 6 +- src/tensor/opencl/kernels/tensor.cl | 172 ++++++++-------------------- src/tensor/opencl/opencl.cpp | 15 ++- src/tensor/opencl/opencl.hpp | 28 +++++ src/tensor/opencl/tensor.hpp | 172 ++++++++++++++++++++++------ src/tensor/tensor.hpp | 27 +++-- src/tensor/tensor.tpp | 12 +- 12 files changed, 276 insertions(+), 208 deletions(-) diff --git a/src/tensor/Makefile b/src/tensor/Makefile index 37c138e..1133f6c 100644 --- a/src/tensor/Makefile +++ b/src/tensor/Makefile @@ -19,7 +19,7 @@ else endif BUILD_DIR = build -COMMON_SRC = +COMMON_SRC = opencl/opencl.cpp PYTHON_PATH = $(shell python -c "from sysconfig import get_paths; print(get_paths()['data'])") PYTHON_INCLUDE = $(shell python -c "import sysconfig; print(sysconfig.get_config_var('CONFINCLUDEPY'))") diff --git a/src/tensor/cpu/tensor.hpp b/src/tensor/cpu/tensor.hpp index 9b3e98d..f6e2b40 100644 --- a/src/tensor/cpu/tensor.hpp +++ b/src/tensor/cpu/tensor.hpp @@ -38,12 +38,12 @@ public: using ITensor::operator+; using ITensor::operator-; - Tensor operator+() const override; - Tensor operator-() const override; + Tensor operator+() override; + Tensor operator-() override; - Tensor &operator+=(const T &scalar) override; + Tensor &operator+=(const T scalar) override; - Tensor &operator*=(const T &scalar) override; + Tensor &operator*=(const T scalar) override; Tensor &operator+=(const Tensor &other) override; diff --git a/src/tensor/cpu/tensor.tpp b/src/tensor/cpu/tensor.tpp index 0f3fea7..b418555 100644 --- a/src/tensor/cpu/tensor.tpp +++ b/src/tensor/cpu/tensor.tpp @@ -19,7 +19,7 @@ template Tensor::Tensor(const std::array &shape, const std::vector &data) : Tensor(shape) { - if (data.size() != data_.size()) + if (data.size() != getSize()) throw std::invalid_argument("Invalid fill data size"); data_ = data; } @@ -79,15 +79,13 @@ const T &Tensor::operator()(Indices... indices) const { } // ===== OPERATORS ===== -template -Tensor Tensor::operator+() const { +template Tensor Tensor::operator+() { Tensor result = *this; for (T &e : result.data_) e = +e; return result; } -template -Tensor Tensor::operator-() const { +template Tensor Tensor::operator-() { Tensor result = *this; for (T &e : result.data_) e = -e; @@ -95,14 +93,14 @@ Tensor Tensor::operator-() const { } template -Tensor &Tensor::operator+=(const T &scalar) { +Tensor &Tensor::operator+=(const T scalar) { for (T &e : data_) e += scalar; return *this; } template -Tensor &Tensor::operator*=(const T &scalar) { +Tensor &Tensor::operator*=(const T scalar) { for (T &e : data_) e *= scalar; return *this; @@ -111,7 +109,7 @@ Tensor &Tensor::operator*=(const T &scalar) { template Tensor &Tensor::operator+=(const Tensor &other) { checkItHasSameShape(other); - for (size_t i = 0; i < data_.size(); ++i) + for (size_t i = 0; i < getSize(); ++i) data_[i] += other.data_[i]; return *this; } @@ -119,7 +117,7 @@ Tensor &Tensor::operator+=(const Tensor &other) { template Tensor &Tensor::operator*=(const Tensor &other) { checkItHasSameShape(other); - for (size_t i = 0; i < data_.size(); ++i) + for (size_t i = 0; i < getSize(); ++i) data_[i] *= other.data_[i]; return *this; } @@ -130,10 +128,10 @@ Tensor::operator%(const Tensor &other) const { static_assert(Dim == 1 || Dim == 2, "Inner product is only defined for vectors and matrices"); if constexpr (Dim == 1) { - if (data_.size() != other.data_.size()) + if (getSize() != other.getSize()) throw std::invalid_argument("Vector sizes must match for inner product"); T result_val = T(0); - for (size_t i = 0; i < data_.size(); ++i) + for (size_t i = 0; i < getSize(); ++i) result_val += data_[i] * other.data_[i]; return Tensor({}, {result_val}); } else if constexpr (Dim == 2) { @@ -163,9 +161,9 @@ template std::string Tensor::toString() const { oss << "Scalar<" << typeid(T).name() << ">: " << data_[0]; } else if constexpr (Dim == 1) { oss << "Vector<" << typeid(T).name() << ">(" << shape_[0] << "): ["; - for (size_t i = 0; i < data_.size(); ++i) { + for (size_t i = 0; i < getSize(); ++i) { oss << data_[i]; - if (i < data_.size() - 1) + if (i < getSize() - 1) oss << ", "; } oss << "]"; @@ -189,13 +187,13 @@ template std::string Tensor::toString() const { oss << "x"; } oss << "]: ["; - size_t show = std::min(data_.size(), size_t(10)); + size_t show = std::min(getSize(), size_t(10)); for (size_t i = 0; i < show; ++i) { oss << data_[i]; if (i < show - 1) oss << ", "; } - if (data_.size() > 10) + if (getSize() > 10) oss << ", ..."; oss << "]"; } diff --git a/src/tensor/main.cpp b/src/tensor/main.cpp index c49967a..4e03024 100644 --- a/src/tensor/main.cpp +++ b/src/tensor/main.cpp @@ -1,7 +1,11 @@ -#include "cpu/tensor.hpp" +// #include "cpu/tensor.hpp" +#include "opencl/tensor.hpp" #include +// TODO: GENERIC KERNELS +// TODO: Scalar mult + int main() { Tensor a = Tensor({2, 4}); std::cout << a.toString(); diff --git a/src/tensor/opencl/kernels/atomic.cl b/src/tensor/opencl/kernels/atomic.cl index ce12a31..26d5d68 100644 --- a/src/tensor/opencl/kernels/atomic.cl +++ b/src/tensor/opencl/kernels/atomic.cl @@ -1,11 +1,11 @@ -__kernel void positive(__global float *A, __global float *B) { +__kernel void positive(__global float *A) { int i = get_global_id(0); - B[i] = +A[i]; + A[i] = +A[i]; } -__kernel void negative(__global float *A, __global float *B) { +__kernel void negative(__global float *A) { int i = get_global_id(0); - B[i] = -A[i]; + A[i] = -A[i]; } diff --git a/src/tensor/opencl/kernels/scalar.cl b/src/tensor/opencl/kernels/scalar.cl index 11f01d7..2b201b0 100644 --- a/src/tensor/opencl/kernels/scalar.cl +++ b/src/tensor/opencl/kernels/scalar.cl @@ -1,9 +1,9 @@ -__kernel void add(__global float *A, __global float *B, float scalar) { +__kernel void add(__global float *A, float scalar) { int i = get_global_id(0); - B[i] = A[i] + scalar; + A[i] += scalar; } -__kernel void mult(__global float *A, __global float *B, float scalar) { +__kernel void mult(__global float *A, float scalar) { int i = get_global_id(0); B[i] = A[i] * scalar; } diff --git a/src/tensor/opencl/kernels/tensor.cl b/src/tensor/opencl/kernels/tensor.cl index 97daf45..59dc4ea 100644 --- a/src/tensor/opencl/kernels/tensor.cl +++ b/src/tensor/opencl/kernels/tensor.cl @@ -1,134 +1,54 @@ -__kernel void add(__global float *A, __global float *B, __global float *C, - float x) { +__kernel void add(__global float *A, __global float *B) { int i = get_global_id(0); - C[i] = A[i] + (B[i] * x); + A[i] += B[i]; } +__kernel void hadamard_mult(__global float *A, __global float *B) { + int i = get_global_id(0); + A[i] *= B[i]; +} + +#define TILE_SIZE 16 __kernel void mult(__global float *A, __global float *B, __global float *C, - float x) { - int i = get_global_id(0); - C[i] = A[i] * (B[i] * x); -} - -float activate(float x, const int activation_type, const float alpha) { - switch (activation_type) { - case 0: // LINEAR - return x; - case 1: // SIGMOID - return 1.0f / (1.0f + exp(-x)); - case 2: // TANH - return tanh(x); - case 3: // RELU - return fmax(0.0f, x); - case 4: // LEAKY_RELU - return (x > 0.0f) ? x : alpha * x; - case 5: // ELU - return (x > 0.0f) ? x : alpha * (exp(x) - 1.0f); - default: - return x; - } -} - -__kernel void mult_small(__global float *A, __global float *B, - __global float *C, __global float *bias, - const int activation_type, const float alpha, - const int M, const int N, const int K, - const int transpose_B) { - const int row = get_global_id(0); - const int col = get_global_id(1); - - if (row < M && col < N) { + const int M, const int N, const int K) { + + const int row = get_global_id(0); + const int col = get_global_id(1); + const int local_row = get_local_id(0); + const int local_col = get_local_id(1); + + __local float tile_A[TILE_SIZE][TILE_SIZE]; + __local float tile_B[TILE_SIZE][TILE_SIZE]; + float sum = 0.0f; - for (int k = 0; k < K; k++) { - float a_val = A[row * K + k]; - - float b_val; - if (transpose_B) { - b_val = B[col * K + k]; - } else { - b_val = B[k * N + col]; - } - - sum += a_val * b_val; + + for (int t = 0; t < (K - 1) / TILE_SIZE + 1; t++) { + + int a_col = t * TILE_SIZE + local_col; + if (row < M && a_col < K) { + tile_A[local_row][local_col] = A[row * K + a_col]; + } else { + tile_A[local_row][local_col] = 0.0f; + } + + int b_row = t * TILE_SIZE + local_row; + if (b_row < K && col < N) { + tile_B[local_row][local_col] = B[b_row * N + col]; + } else { + tile_B[local_row][local_col] = 0.0f; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + int k_max = min(TILE_SIZE, K - t * TILE_SIZE); + for (int k = 0; k < k_max; k++) { + sum += tile_A[local_row][k] * tile_B[k][local_col]; + } + + barrier(CLK_LOCAL_MEM_FENCE); } - - float result = sum + bias[col]; - if (activation_type != 0) { - result = activate(result, activation_type, alpha); + + if (row < M && col < N) { + C[row * N + col] = sum; } - C[row * N + col] = result; - } -} - -__kernel void mult(__global float *A, __global float *B, __global float *C, - __global float *bias, const int activation_type, - const float alpha, const int M, const int N, const int K, - const int transpose_B) { - const int tile_size = 16; - - int local_i = get_local_id(0); - int local_j = get_local_id(1); - int local_size_i = get_local_size(0); - int local_size_j = get_local_size(1); - - int global_i = get_group_id(0) * local_size_i + local_i; - int global_j = get_group_id(1) * local_size_j + local_j; - - __local float tile_A[16][16]; - __local float tile_B[16][16]; - - float sum = 0.0f; - - int num_tiles = (K + tile_size - 1) / tile_size; - - for (int tile = 0; tile < num_tiles; tile++) { - int tile_offset = tile * tile_size; - - // Загрузка tile_A (без изменений) - int load_i_A = tile_offset + local_i; - int load_j_A = tile_offset + local_j; - - if (global_i < M && load_j_A < K) { - tile_A[local_j][local_i] = A[global_i * K + load_j_A]; - } else { - tile_A[local_j][local_i] = 0.0f; - } - - // Загрузка tile_B с учетом транспонирования - int load_i_B = tile_offset + local_i; - int load_j_B = tile_offset + local_j; - - if (transpose_B) { - // B транспонирована: обращаем индексы - if (load_i_B < N && global_j < K) { - tile_B[local_j][local_i] = B[global_j * N + load_i_B]; - } else { - tile_B[local_j][local_i] = 0.0f; - } - } else { - // B не транспонирована (оригинальная логика) - if (load_i_B < K && global_j < N) { - tile_B[local_j][local_i] = B[load_i_B * N + global_j]; - } else { - tile_B[local_j][local_i] = 0.0f; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - -#pragma unroll - for (int k = 0; k < tile_size; ++k) { - sum += tile_A[k][local_i] * tile_B[local_j][k]; - } - - barrier(CLK_LOCAL_MEM_FENCE); - } - - if (global_i < M && global_j < N) { - float result = sum + bias[global_j]; - if (activation_type != 0) { - result = activate(result, activation_type, alpha); - } - C[global_i * N + global_j] = result; - } } diff --git a/src/tensor/opencl/opencl.cpp b/src/tensor/opencl/opencl.cpp index d79b4b9..bf16456 100644 --- a/src/tensor/opencl/opencl.cpp +++ b/src/tensor/opencl/opencl.cpp @@ -102,13 +102,24 @@ OpenCL::OpenCL() { cl::Program &OpenCL::getProgram(Program program) { auto it = programs.find(program); - if (it == programs.end()) { + if (it == programs.end()) throw std::invalid_argument("Program not loaded: " + std::to_string(static_cast(program))); - } return it->second; } +cl::Kernel OpenCL::createKernel(Method method) { + auto methodProgram = methodPrograms.find(method); + if (methodProgram == methodPrograms.end()) + throw std::invalid_argument("Not found program for method: " + + std::to_string(static_cast(method))); + auto methodName = methodNames.find(method); + if (methodName == methodNames.end()) + throw std::invalid_argument("Not found name for method: " + + std::to_string(static_cast(method))); + return cl::Kernel(getProgram(methodProgram->second), methodName->second); +} + void OpenCL::printDeviceInfo() const { std::cout << "=== OpenCL Device Info ===" << std::endl; std::cout << "Name: " << device.getInfo() << std::endl; diff --git a/src/tensor/opencl/opencl.hpp b/src/tensor/opencl/opencl.hpp index d597d2f..d0bf975 100644 --- a/src/tensor/opencl/opencl.hpp +++ b/src/tensor/opencl/opencl.hpp @@ -8,6 +8,15 @@ class OpenCL { public: + enum class Method { + POSITIVE, + NEGATIVE, + S_ADD, + S_MULT, + T_ADD, + T_HADAMARD, + T_MULT, + }; enum class Program { ATOMIC, SCALAR, TENSOR, FUSION }; private: @@ -21,6 +30,21 @@ private: {Program::SCALAR, "./opencl/kernels/scalar.cl"}, {Program::TENSOR, "./opencl/kernels/tensor.cl"}, {Program::FUSION, "./opencl/kernels/fusion.cl"}}; + std::unordered_map methodPrograms = { + {Method::POSITIVE, Program::ATOMIC}, + {Method::NEGATIVE, Program::ATOMIC}, + {Method::S_ADD, Program::SCALAR}, + {Method::S_MULT, Program::SCALAR}, + {Method::T_ADD, Program::TENSOR}, + {Method::T_HADAMARD, Program::TENSOR}, + {Method::T_MULT, Program::TENSOR}, + }; + std::unordered_map methodNames = { + {Method::POSITIVE, "positive"}, {Method::NEGATIVE, "negative"}, + {Method::S_ADD, "add"}, {Method::S_MULT, "mult"}, + {Method::T_ADD, "add"}, {Method::T_HADAMARD, "hadamard_mult"}, + {Method::T_MULT, "mult"}, + }; std::string readProgram(const std::string &filePath); cl::Program compileProgram(const std::string &file); @@ -41,5 +65,9 @@ public: const cl::CommandQueue &getQueue() { return queue; } cl::Program &getProgram(Program program); + cl::Kernel createKernel(Method method); + void printDeviceInfo() const; }; + +OpenCL openCL; diff --git a/src/tensor/opencl/tensor.hpp b/src/tensor/opencl/tensor.hpp index 4b51fd0..bad21be 100644 --- a/src/tensor/opencl/tensor.hpp +++ b/src/tensor/opencl/tensor.hpp @@ -4,13 +4,23 @@ #include "../tensor.hpp" +#include + template class Tensor : public ITensor { private: cl::Buffer *data_ = nullptr; - cl::Event event_ = cl::Event(); + cl::Event *event_ = new cl::Event(); - template std::vector all(Events &&...events) { - return {std::forward(events)...}; + class AutoEventList { + private: + std::vector events_; + + public: + AutoEventList(std::initializer_list events) : events_(events) {} + operator const std::vector *() const { return &events_; } + }; + template AutoEventList all(Events &&...events) { + return AutoEventList{std::forward(events)...}; } void createBuf(size_t size) { @@ -22,15 +32,16 @@ private: void fillBuf(const std::vector &data) { createBuf(data.size()); + // event_ = event?! openCL.getQueue().enqueueWriteBuffer(*data_, CL_FALSE, 0, data.size() * sizeof(T), data.data(), - all(event_), &event_); + all(*event_), event_); } - void fillBuf(size_t size, cl::Buffer *data) { - createBuf(size); - openCL.getQueue().enqueueWriteBuffer(*data_, CL_FALSE, 0, - data.size() * sizeof(T), other..data(), - all(event_), &event_); + void fillBuf(const Tensor &other) { + createBuf(other.getSize()); + openCL.getQueue().enqueueCopyBuffer( + *other.getData(), *data_, 0, 0, other.getSize() * sizeof(T), + all(*event_, *other.getEvent()), event_); } public: @@ -56,57 +67,154 @@ public: : ITensor(shape) { fillBuf(data); } - Tensor(const std::array &shape, T min, T max) { + Tensor(const std::array &shape, T min, T max) : ITensor(shape) { static std::random_device rd; static std::mt19937 gen(rd()); std::vector data(getSize()); if constexpr (std::is_integral_v) { std::uniform_int_distribution dis(min, max); - for (T &e : data_) + for (T &e : data) e = dis(gen); } else if constexpr (std::is_floating_point_v) { std::uniform_real_distribution dis(min, max); - for (T &e : data_) + for (T &e : data) e = dis(gen); } else throw std::invalid_argument("Invalid randomized type"); fillBuf(data); } - Tensor(const Tensor &other) : ITensor(other.shape) { - createBuf(other.getSize()); - q.enqueueCopyBuffer(*other.buffer, *buffer, 0, 0, - other.getSize() * sizeof(float)); + Tensor(const Tensor &other) : ITensor(other) { + event_ = other.event_; + fillBuf(other); } - Tensor &operator=(const Tensor &other); - Tensor(Tensor &&other) noexcept; - Tensor &operator=(Tensor &&other) noexcept; - ~Tensor() = default; + Tensor &operator=(const Tensor &other) { + ITensor::operator=(other); + event_ = other.event_; + fillBuf(other); + return *this; + } + Tensor(Tensor &&other) noexcept : ITensor(std::move(other)) { + data_ = other.data_; + event_ = other.event_; + other.data = nullptr; + } + Tensor &operator=(Tensor &&other) noexcept { + ITensor::operator=(std::move(other)); + data_ = other.data_; + event_ = other.event_; + other.data = nullptr; + return *this; + } + ~Tensor() { + if (data_ != nullptr) + delete data_; + }; - T &operator[](size_t i); - const T &operator[](size_t i) const; - template T &operator()(Indices... indices); - template const T &operator()(Indices... indices) const; + const cl::Buffer *getData() const { return data_; } + const cl::Event *getEvent() const { return event_; } + + // T &operator[](size_t i); + // const T &operator[](size_t i) const; + // template T &operator()(Indices... indices); + // template const T &operator()(Indices... indices) + // const; using ITensor::operator+; using ITensor::operator-; - Tensor operator+() const override; - Tensor operator-() const override; + Tensor operator+() override { + cl::Kernel kernel = openCL.createKernel(OpenCL::Method::POSITIVE); + kernel.setArg(0, *data_); + openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, + cl::NDRange(getSize()), + cl::NullRange, all(*event_), event_); + return *this; + } - Tensor &operator+=(const T &scalar) override; + Tensor operator-() override { + cl::Kernel kernel = openCL.createKernel(OpenCL::Method::NEGATIVE); + kernel.setArg(0, *data_); + openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, + cl::NDRange(getSize()), + cl::NullRange, all(*event_), event_); + return *this; + } - Tensor &operator*=(const T &scalar) override; + Tensor &operator+=(const T scalar) override { + cl::Kernel kernel = openCL.createKernel(OpenCL::Method::S_ADD); + kernel.setArg(0, *data_); + kernel.setArg(1, scalar); + openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, + cl::NDRange(getSize()), + cl::NullRange, all(*event_), event_); + return *this; + } - Tensor &operator+=(const Tensor &other) override; + Tensor &operator*=(const T scalar) override { + cl::Kernel kernel = openCL.createKernel(OpenCL::Method::S_MULT); + kernel.setArg(0, *data_); + kernel.setArg(1, scalar); + openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, + cl::NDRange(getSize()), + cl::NullRange, all(*event_), event_); + return *this; + } - Tensor &operator*=(const Tensor &other) override; + Tensor &operator+=(const Tensor &other) override { + cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_ADD); + kernel.setArg(0, *data_); + kernel.setArg(1, *other.getData()); + openCL.getQueue().enqueueNDRangeKernel( + kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, + all(*event_, *other.event_), event_); + return *this; + } - Tensor operator%(const Tensor &other) const; + Tensor &operator*=(const Tensor &other) override { + cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_HADAMARD); + kernel.setArg(0, *data_); + kernel.setArg(1, *other.getData()); + openCL.getQueue().enqueueNDRangeKernel( + kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, + all(*event_, *other.event_), event_); + return *this; + } + +#define TILE_SIZE 16 + Tensor operator%(const Tensor &other) const { + static_assert(Dim == 1 || Dim == 2, + "Inner product is only defined for vectors and matrices"); + if constexpr (Dim == 1) { + static_assert(false, "TODO vector scalar multiplication"); + } else if constexpr (Dim == 2) { + if (shape_[axes_[1]] != other.shape_[other.axes_[0]]) + throw std::invalid_argument( + "Matrix dimensions must match for multiplication"); + size_t m = shape_[axes_[0]]; + size_t k = shape_[axes_[1]]; + size_t n = other.shape_[other.axes_[1]]; + Tensor result({m, n}); + cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_MULT); + kernel.setArg(0, *data_); + kernel.setArg(1, *other.getData()); + kernel.setArg(2, *result.getData()); + kernel.setArg(3, m); + kernel.setArg(4, n); + kernel.setArg(5, k); + cl::NDRange global_size(((m + TILE_SIZE - 1) / TILE_SIZE) * TILE_SIZE, + ((n + TILE_SIZE - 1) / TILE_SIZE) * TILE_SIZE); + cl::NDRange local_size(TILE_SIZE, TILE_SIZE); + openCL.getQueue().enqueueNDRangeKernel( + kernel, cl::NullRange, global_size, local_size, + all(*event_, *other.event_), result.event_); + return result; + } + } std::string toString() const override; }; #include "tensor.tpp" -#include "../fabric.hpp" \ No newline at end of file +#include "../fabric.hpp" diff --git a/src/tensor/tensor.hpp b/src/tensor/tensor.hpp index 987800f..458f1b0 100644 --- a/src/tensor/tensor.hpp +++ b/src/tensor/tensor.hpp @@ -35,34 +35,33 @@ public: Tensor &transpose(int axis_a, int axis_b); Tensor &t(); - // === Operators === - virtual Tensor operator+() const = 0; - virtual Tensor operator-() const = 0; + virtual Tensor operator+() = 0; + virtual Tensor operator-() = 0; - virtual Tensor &operator+=(const T &scalar) = 0; - virtual Tensor &operator*=(const T &scalar) = 0; + virtual Tensor &operator+=(const T scalar) = 0; + virtual Tensor &operator*=(const T scalar) = 0; virtual Tensor &operator+=(const Tensor &other) = 0; virtual Tensor &operator*=(const Tensor &other) = 0; - Tensor operator+(const T &scalar) const; - friend Tensor operator+(const T &scalar, const Tensor &tensor) { + Tensor operator+(const T scalar) const; + friend Tensor operator+(const T scalar, const Tensor &tensor) { return tensor + scalar; } - Tensor &operator-=(const T &scalar); - Tensor operator-(const T &scalar) const; - friend Tensor operator-(const T &scalar, const Tensor &tensor) { + Tensor &operator-=(const T scalar); + Tensor operator-(const T scalar) const; + friend Tensor operator-(const T scalar, const Tensor &tensor) { return tensor + (-scalar); } - Tensor operator*(const T &scalar) const; - friend Tensor operator*(const T &scalar, const Tensor &tensor) { + Tensor operator*(const T scalar) const; + friend Tensor operator*(const T scalar, const Tensor &tensor) { return tensor * scalar; } - Tensor &operator/=(const T &scalar); - Tensor operator/(const T &scalar) const; + Tensor &operator/=(const T scalar); + Tensor operator/(const T scalar) const; Tensor operator+(const Tensor &other) const; diff --git a/src/tensor/tensor.tpp b/src/tensor/tensor.tpp index 4868f37..a9547fe 100644 --- a/src/tensor/tensor.tpp +++ b/src/tensor/tensor.tpp @@ -115,39 +115,39 @@ template ITensor::Tensor &ITensor::t() { // ===== OPERATORS ====== template -ITensor::Tensor ITensor::operator+(const T &scalar) const { +ITensor::Tensor ITensor::operator+(const T scalar) const { Tensor result = static_cast(*this); result += scalar; return result; } template -ITensor::Tensor &ITensor::operator-=(const T &scalar) { +ITensor::Tensor &ITensor::operator-=(const T scalar) { *this += -scalar; return static_cast(*this); } template -ITensor::Tensor ITensor::operator-(const T &scalar) const { +ITensor::Tensor ITensor::operator-(const T scalar) const { Tensor result = static_cast(*this); result -= scalar; return result; } template -ITensor::Tensor ITensor::operator*(const T &scalar) const { +ITensor::Tensor ITensor::operator*(const T scalar) const { Tensor result = static_cast(*this); result *= scalar; return result; } template -ITensor::Tensor &ITensor::operator/=(const T &scalar) { +ITensor::Tensor &ITensor::operator/=(const T scalar) { *this *= T(1) / scalar; return static_cast(*this); } template -ITensor::Tensor ITensor::operator/(const T &scalar) const { +ITensor::Tensor ITensor::operator/(const T scalar) const { Tensor result = static_cast(*this); result /= scalar; return result;