Split headers and logic

2026-04-03 20:30:39 +04:00 · 2025-11-17 16:03:32 +04:00
parent bbd9c67c96
commit d7d93999a4
18 changed files with 589 additions and 394 deletions
--- a/src/tensor/.clangd
+++ b/src/tensor/.clangd
@@ -4,6 +4,7 @@ CompileFlags:
    - -Wall
    - -Wextra
    - -Wpedantic
    - -xc++
  Remove: []
 Diagnostics:
  UnusedIncludes: Strict
--- a/src/tensor/Makefile
+++ b/src/tensor/Makefile
@@ -19,7 +19,7 @@ else
 endif
 BUILD_DIR = build
-COMMON_SRC = tensor.cpp
+COMMON_SRC = 
 PYTHON_PATH = $(shell python -c "from sysconfig import get_paths; print(get_paths()['data'])")
 PYTHON_INCLUDE = $(shell python -c "import sysconfig; print(sysconfig.get_config_var('CONFINCLUDEPY'))")
--- a/src/tensor/cpu/tensor.hpp
+++ b/src/tensor/cpu/tensor.hpp
@@ -0,0 +1,59 @@
 #pragma once
 #include "../tensor.hpp"
 #include <vector>
 template <typename T, int Dim> class Tensor : public ITensor<T, Dim> {
 private:
  std::vector<T> data_;
 public:
  typedef class ITensor<T, Dim> ITensor;
  using ITensor::axes_;
  using ITensor::checkAxisInDim;
  using ITensor::checkItHasSameShape;
  using ITensor::computeIndex;
  using ITensor::getSize;
  using ITensor::shape_;
  Tensor() = delete;
  Tensor(const std::array<size_t, Dim> &shape);
  Tensor(const std::array<size_t, Dim> &shape, T value);
  Tensor(const std::array<size_t, Dim> &shape, const std::vector<T> &data);
  Tensor(const std::array<size_t, Dim> &shape, T min, T max);
  Tensor(const Tensor &other);
  Tensor &operator=(const Tensor &other);
  Tensor(Tensor &&other) noexcept;
  Tensor &operator=(Tensor &&other) noexcept;
  ~Tensor() = default;
  T &operator[](size_t i);
  const T &operator[](size_t i) const;
  template <typename... Indices> T &operator()(Indices... indices);
  template <typename... Indices> const T &operator()(Indices... indices) const;
  using ITensor::operator+;
  using ITensor::operator-;
  Tensor operator+() const override;
  Tensor operator-() const override;
  Tensor &operator+=(const T &scalar) override;
  Tensor &operator*=(const T &scalar) override;
  Tensor &operator+=(const Tensor &other) override;
  Tensor &operator*=(const Tensor &other) override;
  Tensor<T, Dim == 1 ? 0 : 2> operator%(const Tensor &other) const;
  std::string toString() const override;
 };
 #include "tensor.tpp"
 #include "../fabric.hpp"
--- a/src/tensor/cpu/tensor.tpp
+++ b/src/tensor/cpu/tensor.tpp
@@ -0,0 +1,206 @@
 #pragma once
 #include "tensor.hpp"
 #include <random>
 #include <sstream>
 // ===== CONSTRUCTORS =====
 template <typename T, int Dim>
 Tensor<T, Dim>::Tensor(const std::array<size_t, Dim> &shape) : ITensor(shape) {
  size_t size = 1;
  for (size_t dim : shape)
    size *= dim;
  data_.resize(size);
 }
 template <typename T, int Dim>
 Tensor<T, Dim>::Tensor(const std::array<size_t, Dim> &shape, T value)
    : Tensor(shape) {
  std::fill(data_.begin(), data_.end(), value);
 }
 template <typename T, int Dim>
 Tensor<T, Dim>::Tensor(const std::array<size_t, Dim> &shape,
                       const std::vector<T> &data)
    : Tensor(shape) {
  if (data.size() != data_.size())
    throw std::invalid_argument("Invalid fill data size");
  data_ = data;
 }
 template <typename T, int Dim>
 Tensor<T, Dim>::Tensor(const std::array<size_t, Dim> &shape, T min, T max)
    : Tensor(shape) {
  static std::random_device rd;
  static std::mt19937 gen(rd());
  if constexpr (std::is_integral_v<T>) {
    std::uniform_int_distribution<T> dis(min, max);
    for (T &e : data_)
      e = dis(gen);
  } else if constexpr (std::is_floating_point_v<T>) {
    std::uniform_real_distribution<T> dis(min, max);
    for (T &e : data_)
      e = dis(gen);
  } else
    throw std::invalid_argument("Invalid randomized type");
 }
 template <typename T, int Dim>
 Tensor<T, Dim>::Tensor(const Tensor &other)
    : ITensor(other), data_(other.data_) {}
 template <typename T, int Dim>
 Tensor<T, Dim> &Tensor<T, Dim>::operator=(const Tensor &other) {
  ITensor::operator=(other);
  data_ = other.data_;
  return *this;
 }
 template <typename T, int Dim>
 Tensor<T, Dim>::Tensor(Tensor &&other) noexcept
    : ITensor(std::move(other)), data_(std::move(other.data_)) {}
 template <typename T, int Dim>
 Tensor<T, Dim> &Tensor<T, Dim>::operator=(Tensor &&other) noexcept {
  ITensor::operator=(std::move(other));
  data_ = std::move(other.data_);
  return *this;
 }
 // ===== GET/SET =====
 template <typename T, int Dim> T &Tensor<T, Dim>::operator[](size_t i) {
  return data_[i];
 }
 template <typename T, int Dim>
 const T &Tensor<T, Dim>::operator[](size_t i) const {
  return data_[i];
 }
 template <typename T, int Dim>
 template <typename... Indices>
 T &Tensor<T, Dim>::operator()(Indices... indices) {
  return data_[computeIndex(indices...)];
 }
 template <typename T, int Dim>
 template <typename... Indices>
 const T &Tensor<T, Dim>::operator()(Indices... indices) const {
  return data_[computeIndex(indices...)];
 }
 // ===== OPERATORS =====
 template <typename T, int Dim>
 Tensor<T, Dim> Tensor<T, Dim>::operator+() const {
  Tensor result = *this;
  for (T &e : result.data_)
    e = +e;
  return result;
 }
 template <typename T, int Dim>
 Tensor<T, Dim> Tensor<T, Dim>::operator-() const {
  Tensor result = *this;
  for (T &e : result.data_)
    e = -e;
  return result;
 }
 template <typename T, int Dim>
 Tensor<T, Dim> &Tensor<T, Dim>::operator+=(const T &scalar) {
  for (T &e : data_)
    e += scalar;
  return *this;
 }
 template <typename T, int Dim>
 Tensor<T, Dim> &Tensor<T, Dim>::operator*=(const T &scalar) {
  for (T &e : data_)
    e *= scalar;
  return *this;
 }
 template <typename T, int Dim>
 Tensor<T, Dim> &Tensor<T, Dim>::operator+=(const Tensor &other) {
  checkItHasSameShape(other);
  for (size_t i = 0; i < data_.size(); ++i)
    data_[i] += other.data_[i];
  return *this;
 }
 template <typename T, int Dim>
 Tensor<T, Dim> &Tensor<T, Dim>::operator*=(const Tensor &other) {
  checkItHasSameShape(other);
  for (size_t i = 0; i < data_.size(); ++i)
    data_[i] *= other.data_[i];
  return *this;
 }
 template <typename T, int Dim>
 Tensor<T, Dim == 1 ? 0 : 2>
 Tensor<T, Dim>::operator%(const Tensor &other) const {
  static_assert(Dim == 1 || Dim == 2,
                "Inner product is only defined for vectors and matrices");
  if constexpr (Dim == 1) {
    if (data_.size() != other.data_.size())
      throw std::invalid_argument("Vector sizes must match for inner product");
    T result_val = T(0);
    for (size_t i = 0; i < data_.size(); ++i)
      result_val += data_[i] * other.data_[i];
    return Tensor<T, 0>({}, {result_val});
  } else if constexpr (Dim == 2) {
    if (shape_[axes_[1]] != other.shape_[other.axes_[0]])
      throw std::invalid_argument(
          "Matrix dimensions must match for multiplication");
    size_t m = shape_[axes_[0]];
    size_t n = shape_[axes_[1]];
    size_t p = other.shape_[other.axes_[1]];
    Tensor<T, 2> result({m, p}, T(0));
    for (size_t i = 0; i < m; ++i) {
      for (size_t j = 0; j < p; ++j) {
        T sum = T(0);
        for (size_t k = 0; k < n; ++k)
          sum += (*this)(i, k) * other(k, j);
        result(i, j) = sum;
      }
    }
    return result;
  }
 }
 // ===== UTILS =====
 template <typename T, int Dim> std::string Tensor<T, Dim>::toString() const {
  std::ostringstream oss;
  if constexpr (Dim == 0) {
    oss << "Scalar<" << typeid(T).name() << ">: " << data_[0];
  } else if constexpr (Dim == 1) {
    oss << "Vector<" << typeid(T).name() << ">(" << shape_[0] << "): [";
    for (size_t i = 0; i < data_.size(); ++i) {
      oss << data_[i];
      if (i < data_.size() - 1)
        oss << ", ";
    }
    oss << "]";
  } else if constexpr (Dim == 2) {
    oss << "Matrix<" << typeid(T).name() << ">(" << shape_[axes_[0]] << "x"
        << shape_[axes_[1]] << "):";
    for (size_t i = 0; i < shape_[axes_[0]]; ++i) {
      oss << "\n  [";
      for (size_t j = 0; j < shape_[axes_[1]]; ++j) {
        oss << (*this)(i, j);
        if (j < shape_[axes_[1]] - 1)
          oss << ", ";
      }
      oss << "]";
    }
  } else {
    oss << "Tensor" << Dim << "D<" << typeid(T).name() << ">" << "[";
    for (size_t i = 0; i < Dim; ++i) {
      oss << shape_[axes_[i]];
      if (i < Dim - 1)
        oss << "x";
    }
    oss << "]: [";
    size_t show = std::min(data_.size(), size_t(10));
    for (size_t i = 0; i < show; ++i) {
      oss << data_[i];
      if (i < show - 1)
        oss << ", ";
    }
    if (data_.size() > 10)
      oss << ", ...";
    oss << "]";
  }
  return oss.str();
 }
--- a/src/tensor/fabric.hpp
+++ b/src/tensor/fabric.hpp
@@ -0,0 +1,21 @@
 #include <cstddef>
 template <typename T, int Dim> class Tensor;
 class Tensors {
  Tensors() = delete;
 public:
  template <typename T, typename... Args> static auto empty(Args... args) {
    return Tensor<T, sizeof...(Args)>({static_cast<size_t>(args)...});
  }
  template <typename T, typename... Args> static auto zero(Args... args) {
    return Tensor<T, sizeof...(Args)>({static_cast<size_t>(args)...}, T(0));
  }
  template <typename T, typename... Args> static auto rand(Args... args) {
    return Tensor<T, sizeof...(Args)>({static_cast<size_t>(args)...}, T(0),
                                      T(1));
  }
 };
--- a/src/tensor/main
+++ b/src/tensor/main
--- a/src/tensor/main.cpp
+++ b/src/tensor/main.cpp
@@ -1,8 +1,9 @@
-#include "tensor.hpp"
+#include "cpu/tensor.hpp"
 #include <iostream>
 int main() {
-  Tensor<float, 2> a = Tensors::rand<float>(1, 3);
+  Tensor<float, 2> a = Tensor<float, 2>({2, 4});
  std::cout << a.toString();
  return 0;
 }
--- a/src/tensor/opencl/kernels/atomic.cl
+++ b/src/tensor/opencl/kernels/atomic.cl
@@ -0,0 +1,34 @@
 __kernel void positive(__global float *A, __global float *B) {
  int i = get_global_id(0);
  B[i] = +A[i];
 }
 __kernel void negative(__global float *A, __global float *B) {
  int i = get_global_id(0);
  B[i] = -A[i];
 }
 float activate_x(float x, const int activation_type, const float alpha) {
  switch (activation_type) {
  case 0: // LINEAR
    return x;
  case 1: // SIGMOID
    return 1.0f / (1.0f + exp(-x));
  case 2: // TANH
    return tanh(x);
  case 3: // RELU
    return fmax(0.0f, x);
  case 4: // LEAKY_RELU
    return (x > 0.0f) ? x : alpha * x;
  case 5: // ELU
    return (x > 0.0f) ? x : alpha * (exp(x) - 1.0f);
  default:
    return x;
  }
 }
 __kernel void activate(__global float *input, __global float *output,
                       const int activation_type, const float alpha) {
  int i = get_global_id(0);
  output[i] = activate_x(input[i], activation_type, alpha);
 }
--- a/src/tensor/opencl/kernels/fusion.cl
+++ b/src/tensor/opencl/kernels/fusion.cl
--- a/src/tensor/opencl/kernels/scalar.cl
+++ b/src/tensor/opencl/kernels/scalar.cl
@@ -0,0 +1,9 @@
 __kernel void add(__global float *A, __global float *B, float scalar) {
  int i = get_global_id(0);
  B[i] = A[i] + scalar;
 }
 __kernel void mult(__global float *A, __global float *B, float scalar) {
  int i = get_global_id(0);
  B[i] = A[i] * scalar;
 }
--- a/src/tensor/opencl/kernels/tensor.cl
+++ b/src/tensor/opencl/kernels/tensor.cl
@@ -1,4 +1,15 @@
-float activate_x(float x, const int activation_type, const float alpha) {
+__kernel void add(__global float *A, __global float *B, __global float *C,
                  float x) {
  int i = get_global_id(0);
  C[i] = A[i] + (B[i] * x);
 }
 __kernel void mult(__global float *A, __global float *B, __global float *C,
                  float x) {
  int i = get_global_id(0);
  C[i] = A[i] * (B[i] * x);
 }
 float activate(float x, const int activation_type, const float alpha) {
  switch (activation_type) {
  case 0: // LINEAR
    return x;
@@ -17,12 +28,6 @@ float activate_x(float x, const int activation_type, const float alpha) {
  }
 }
 __kernel void activate(__global float *input, __global float *output,
                       const int activation_type, const float alpha) {
  int i = get_global_id(0);
  output[i] = activate_x(input[i], activation_type, alpha);
 }
 __kernel void mult_small(__global float *A, __global float *B,
                         __global float *C, __global float *bias,
                         const int activation_type, const float alpha,
@@ -48,7 +53,7 @@ __kernel void mult_small(__global float *A, __global float *B,
    float result = sum + bias[col];
    if (activation_type != 0) {
-      result = activate_x(result, activation_type, alpha);
+      result = activate(result, activation_type, alpha);
    }
    C[row * N + col] = result;
  }
@@ -121,24 +126,9 @@ __kernel void mult(__global float *A, __global float *B, __global float *C,
  if (global_i < M && global_j < N) {
    float result = sum + bias[global_j];
    if (activation_type != 0) {
-      result = activate_x(result, activation_type, alpha);
+      result = activate(result, activation_type, alpha);
    }
    C[global_i * N + global_j] = result;
  }
 }
 __kernel void mult_sc(__global float *A, __global float *B, float scalar) {
  int i = get_global_id(0);
  B[i] = A[i] * scalar;
 }
 __kernel void add(__global float *A, __global float *B, __global float *C,
                  float x) {
  int i = get_global_id(0);
  C[i] = A[i] + (B[i] * x);
 }
 __kernel void add_sc(__global float *A, __global float *B, float scalar) {
  int i = get_global_id(0);
  B[i] = A[i] + scalar;
 }
--- a/src/tensor/opencl/opencl.cpp
+++ b/src/tensor/opencl/opencl.cpp
@@ -1,5 +1,10 @@
 #include "opencl.hpp"
 #include <fstream>
 #include <iostream>
 #include <sstream>
 #include <stdexcept>
 std::string OpenCL::readProgram(const std::string &filePath) {
  std::ifstream file(filePath, std::ios::binary);
  if (!file.is_open()) {
--- a/src/tensor/opencl/opencl.hpp
+++ b/src/tensor/opencl/opencl.hpp
@@ -4,16 +4,11 @@
 #define CL_HPP_TARGET_OPENCL_VERSION 300
 #include <CL/opencl.hpp>
 #include <fstream>
 #include <iostream>
 #include <memory>
 #include <sstream>
 #include <stdexcept>
 #include <unordered_map>
 class OpenCL {
 public:
-  enum class Program { TENSOR };
+  enum class Program { ATOMIC, SCALAR, TENSOR, FUSION };
 private:
  cl::Device device;
@@ -22,7 +17,10 @@ private:
  std::unordered_map<Program, cl::Program> programs;
  std::unordered_map<Program, std::string> programPaths = {
-      {Program::TENSOR, "./opencl/kernels/tensor.cl"}};
+      {Program::ATOMIC, "./opencl/kernels/atomic.cl"},
      {Program::SCALAR, "./opencl/kernels/scalar.cl"},
      {Program::TENSOR, "./opencl/kernels/tensor.cl"},
      {Program::FUSION, "./opencl/kernels/fusion.cl"}};
  std::string readProgram(const std::string &filePath);
  cl::Program compileProgram(const std::string &file);
--- a/src/tensor/opencl/tensor.hpp
+++ b/src/tensor/opencl/tensor.hpp
@@ -0,0 +1,5 @@
 #pragma once
 #include "opencl.hpp"
 #include "../tensor.hpp"
--- a/src/tensor/opencl/tensor.tpp
+++ b/src/tensor/opencl/tensor.tpp
--- a/src/tensor/tensor.cpp
+++ b/src/tensor/tensor.cpp
@@ -1 +0,0 @@
 #include "tensor.hpp"
--- a/src/tensor/tensor.hpp
+++ b/src/tensor/tensor.hpp
@@ -1,392 +1,78 @@
 #pragma once
 #include <array>
-#include <random>
+#include <cstddef>
-#include <sstream>
+#include <string>
 #include <stdexcept>
 #include <vector>
 template <typename T, int Dim> class Tensor;
-template <typename T, int Dim> class TensorInfo {
+template <typename T, int Dim> class ITensor {
 protected:
  std::array<size_t, Dim> shape_;
  std::array<int, Dim> axes_;
-  template <typename... Indices> size_t computeIndex(Indices... indices) const {
+  template <typename... Indices> size_t computeIndex(Indices... indices) const;
    static_assert(sizeof...(Indices) == Dim, "Invalid number of indices");
    std::array<size_t, Dim> indicesArray = {static_cast<size_t>(indices)...};
    std::array<size_t, Dim> axesIndices;
    for (int i = 0; i < Dim; ++i)
      axesIndices[axes_[i]] = indicesArray[i];
    size_t index = 0;
    size_t stride = 1;
    for (int i = Dim - 1; i >= 0; --i) {
      index += axesIndices[i] * stride;
      stride *= shape_[i];
    }
    return index;
  }
-  void checkItHasSameShape(const TensorInfo &other) {
+  void checkItHasSameShape(const ITensor &other) const;
-    if (getShape() != other.getShape())
+  void checkAxisInDim(int axis) const;
      throw std::invalid_argument("Tensor shapes must match");
  }
  void checkAxisInDim(int axis) {
    if (axis < 0 || axis >= Dim)
      throw std::invalid_argument("Invalid axis index");
  }
 public:
-  typedef class Tensor<T, Dim> Ten;
+  typedef class Tensor<T, Dim> Tensor;
-  TensorInfo() = delete;
+  ITensor() = delete;
  ITensor(const std::array<size_t, Dim> &shape);
  ITensor(const ITensor &other);
  ITensor &operator=(const ITensor &other);
  ITensor(ITensor &&other) noexcept;
  ITensor &operator=(ITensor &&other) noexcept;
  ~ITensor() = default;
-  TensorInfo(const std::array<size_t, Dim> &shape) {
+  const std::array<int, Dim> &getAxes() const;
-    for (size_t d : shape)
+  const std::array<size_t, Dim> getShape() const;
-      if (d == 0)
+  size_t getSize() const;
        throw std::invalid_argument("Invalid shape");
    shape_ = shape;
    for (int i = 0; i < Dim; ++i)
      axes_[i] = i;
  }
-  TensorInfo(const TensorInfo &other)
+  Tensor &transpose(const std::array<int, Dim> &new_axes);
-      : shape_(other.shape_), axes_(other.axes_) {}
+  Tensor &transpose(int axis_a, int axis_b);
-  TensorInfo &operator=(const TensorInfo &other) {
+  Tensor &t();
    shape_ = other.shape_;
    axes_ = other.axes_;
    return *this;
  }
  TensorInfo(TensorInfo &&other) noexcept
      : shape_(std::move(other.shape_)), axes_(std::move(other.axes_)) {}
  TensorInfo &operator=(TensorInfo &&other) noexcept {
    shape_ = std::move(other.shape_);
    axes_ = std::move(other.axes_);
    return *this;
  }
  ~TensorInfo() = default;
-  const std::array<int, Dim> &getAxes() const { return axes_; }
+  // === Operators ===
-  const std::array<size_t, Dim> getShape() const {
+  virtual Tensor operator+() const = 0;
-    std::array<size_t, Dim> result;
+  virtual Tensor operator-() const = 0;
    for (int i = 0; i < Dim; ++i)
      result[i] = shape_[axes_[i]];
    return result;
  }
  size_t getSize() const {
    size_t size = 1;
    for (size_t i = 0; i < shape_.size(); ++i)
      size *= shape_[i];
    return size;
  };
-  Ten &transpose(const std::array<int, Dim> &new_axes) {
+  virtual Tensor &operator+=(const T &scalar) = 0;
-    std::array<bool, Dim> used{};
+  virtual Tensor &operator*=(const T &scalar) = 0;
    for (int axis : new_axes) {
      checkAxisInDim(axis);
      if (used[axis])
        throw std::invalid_argument("Duplicate axis index");
      used[axis] = true;
    }
    axes_ = new_axes;
    return static_cast<Ten &>(*this);
  }
  Ten &transpose(int axis_a, int axis_b) {
    checkAxisInDim(axis_a);
    checkAxisInDim(axis_b);
    if (axis_a == axis_b)
      throw std::invalid_argument("Duplicate axis index");
    std::swap(axes_[axis_a], axes_[axis_b]);
    return static_cast<Ten &>(*this);
  }
  Ten &t() {
    static_assert(Dim >= 2, "Can't change the only axis");
    std::swap(axes_[Dim - 1], axes_[Dim - 2]);
    return static_cast<Ten &>(*this);
  }
-  virtual Ten operator+() const = 0;
+  virtual Tensor &operator+=(const Tensor &other) = 0;
-  virtual Ten operator-() const = 0;
+  virtual Tensor &operator*=(const Tensor &other) = 0;
-  virtual Ten &operator+=(const T &scalar) = 0;
+  Tensor operator+(const T &scalar) const;
-  virtual Ten &operator*=(const T &scalar) = 0;
+  friend Tensor operator+(const T &scalar, const Tensor &tensor) {
  Ten operator+(const T &scalar) const {
    Ten result = static_cast<const Ten &>(*this);
    result += scalar;
    return result;
  }
  friend Ten operator+(const T &scalar, const Ten &tensor) {
    return tensor + scalar;
  }
-  Ten &operator-=(const T &scalar) {
+  Tensor &operator-=(const T &scalar);
-    *this += -scalar;
+  Tensor operator-(const T &scalar) const;
-    return static_cast<Ten &>(*this);
+  friend Tensor operator-(const T &scalar, const Tensor &tensor) {
  }
  Ten operator-(const T &scalar) const {
    Ten result = static_cast<const Ten &>(*this);
    result -= scalar;
    return result;
  }
  friend Ten operator-(const T &scalar, const Ten &tensor) {
    return tensor + (-scalar);
  }
-  Ten operator*(const T &scalar) const {
+  Tensor operator*(const T &scalar) const;
-    Ten result = static_cast<const Ten &>(*this);
+  friend Tensor operator*(const T &scalar, const Tensor &tensor) {
    result *= scalar;
    return result;
  }
  friend Ten operator*(const T &scalar, const Ten &tensor) {
    return tensor * scalar;
  }
-  Ten &operator/=(const T &scalar) {
+  Tensor &operator/=(const T &scalar);
-    *this *= T(1) / scalar;
+  Tensor operator/(const T &scalar) const;
    return static_cast<Ten &>(*this);
  }
  Ten operator/(const T &scalar) const {
    Ten result = static_cast<const Ten &>(*this);
    result /= scalar;
    return result;
  }
-  virtual Ten &operator+=(const Ten &other) = 0;
+  Tensor operator+(const Tensor &other) const;
  virtual Ten &operator*=(const Ten &other) = 0;
-  Ten operator+(const Ten &other) const {
+  Tensor &operator-=(const Tensor &other);
-    Ten result = static_cast<const Ten &>(*this);
+  Tensor operator-(const Tensor &other) const;
    result += other;
    return result;
  }
-  Ten &operator-=(const Ten &other) {
+  Tensor operator*(const Tensor &other) const;
    checkItHasSameShape(other);
    *this += -other;
    return static_cast<Ten &>(*this);
  }
  Ten operator-(const Ten &other) const {
    Ten result = static_cast<const Ten &>(*this);
    result -= other;
    return result;
  }
  Ten operator*(const Ten &other) const {
    Ten result = static_cast<const Ten &>(*this);
    result *= other;
    return result;
  }
  // === Utils ===
  virtual std::string toString() const = 0;
 };
-template <typename T, int Dim> class Tensor : public TensorInfo<T, Dim> {
+#include "tensor.tpp"
 private:
  std::vector<T> data_;
 public:
  typedef class TensorInfo<T, Dim> TensorInfo;
  using TensorInfo::axes_;
  using TensorInfo::checkAxisInDim;
  using TensorInfo::checkItHasSameShape;
  using TensorInfo::computeIndex;
  using TensorInfo::getSize;
  using TensorInfo::shape_;
  Tensor() = delete;
  Tensor(const std::array<size_t, Dim> &shape) : TensorInfo(shape) {
    size_t size = 1;
    for (size_t dim : shape)
      size *= dim;
    data_.resize(size);
  }
  Tensor(const std::array<size_t, Dim> &shape, T value) : Tensor(shape) {
    std::fill(data_.begin(), data_.end(), value);
  }
  Tensor(const std::array<size_t, Dim> &shape, const std::vector<T> &data)
      : Tensor(shape) {
    if (data.size() != data_.size())
      throw std::invalid_argument("Invalid fill data size");
    data_ = data;
  }
  Tensor(const std::array<size_t, Dim> &shape, T min, T max) : Tensor(shape) {
    static std::random_device rd;
    static std::mt19937 gen(rd());
    if constexpr (std::is_integral_v<T>) {
      std::uniform_int_distribution<T> dis(min, max);
      for (T &e : data_)
        e = dis(gen);
    } else if constexpr (std::is_floating_point_v<T>) {
      std::uniform_real_distribution<T> dis(min, max);
      for (T &e : data_)
        e = dis(gen);
    } else
      throw std::invalid_argument("Invalid randomized type");
  }
  Tensor(const Tensor &other) : TensorInfo(other), data_(other.data_) {}
  Tensor &operator=(const Tensor &other) {
    TensorInfo::operator=(other);
    data_ = other.data_;
    return *this;
  }
  Tensor(Tensor &&other) noexcept
      : TensorInfo(std::move(other)), data_(std::move(other.data_)) {}
  Tensor &operator=(Tensor &&other) noexcept {
    TensorInfo::operator=(std::move(other));
    data_ = std::move(other.data_);
    return *this;
  }
  ~Tensor() = default;
  T &operator[](size_t i) { return data_[i]; }
  const T &operator[](size_t i) const { return data_[i]; }
  template <typename... Indices> T &operator()(Indices... indices) {
    return data_[computeIndex(indices...)];
  }
  template <typename... Indices> const T &operator()(Indices... indices) const {
    return data_[computeIndex(indices...)];
  }
  using TensorInfo::operator+;
  using TensorInfo::operator-;
  Tensor operator+() const override {
    Tensor result = *this;
    for (T &e : result.data_)
      e = +e;
    return result;
  }
  Tensor operator-() const override {
    Tensor result = *this;
    for (T &e : result.data_)
      e = -e;
    return result;
  }
  Tensor &operator+=(const T &scalar) override {
    for (T &e : data_)
      e += scalar;
    return *this;
  }
  Tensor &operator*=(const T &scalar) override {
    for (T &e : data_)
      e *= scalar;
    return *this;
  }
  Tensor &operator+=(const Tensor &other) override {
    checkItHasSameShape(other);
    for (size_t i = 0; i < data_.size(); ++i)
      data_[i] += other.data_[i];
    return *this;
  }
  Tensor &operator*=(const Tensor &other) override {
    checkItHasSameShape(other);
    for (size_t i = 0; i < data_.size(); ++i)
      data_[i] *= other.data_[i];
    return *this;
  }
  Tensor<T, Dim == 1 ? 0 : 2> operator%(const Tensor &other) const {
    static_assert(Dim == 1 || Dim == 2,
                  "Inner product is only defined for vectors and matrices");
    if constexpr (Dim == 1) {
      if (data_.size() != other.data_.size())
        throw std::invalid_argument(
            "Vector sizes must match for inner product");
      T result_val = T(0);
      for (size_t i = 0; i < data_.size(); ++i)
        result_val += data_[i] * other.data_[i];
      return Tensor<T, 0>({}, {result_val});
    } else if constexpr (Dim == 2) {
      if (shape_[axes_[1]] != other.shape_[other.axes_[0]])
        throw std::invalid_argument(
            "Matrix dimensions must match for multiplication");
      size_t m = shape_[axes_[0]];
      size_t n = shape_[axes_[1]];
      size_t p = other.shape_[other.axes_[1]];
      Tensor<T, 2> result({m, p}, T(0));
      for (size_t i = 0; i < m; ++i) {
        for (size_t j = 0; j < p; ++j) {
          T sum = T(0);
          for (size_t k = 0; k < n; ++k)
            sum += (*this)(i, k) * other(k, j);
          result(i, j) = sum;
        }
      }
      return result;
    }
  }
  std::string toString() const override {
    std::ostringstream oss;
    if constexpr (Dim == 0) {
      oss << "Scalar<" << typeid(T).name() << ">: " << data_[0];
    } else if constexpr (Dim == 1) {
      oss << "Vector<" << typeid(T).name() << ">(" << shape_[0] << "): [";
      for (size_t i = 0; i < data_.size(); ++i) {
        oss << data_[i];
        if (i < data_.size() - 1)
          oss << ", ";
      }
      oss << "]";
    } else if constexpr (Dim == 2) {
      oss << "Matrix<" << typeid(T).name() << ">(" << shape_[axes_[0]] << "x"
          << shape_[axes_[1]] << "):";
      for (size_t i = 0; i < shape_[axes_[0]]; ++i) {
        oss << "\n  [";
        for (size_t j = 0; j < shape_[axes_[1]]; ++j) {
          oss << (*this)(i, j);
          if (j < shape_[axes_[1]] - 1)
            oss << ", ";
        }
        oss << "]";
      }
    } else {
      oss << "Tensor" << Dim << "D<" << typeid(T).name() << ">" << "[";
      for (size_t i = 0; i < Dim; ++i) {
        oss << shape_[axes_[i]];
        if (i < Dim - 1)
          oss << "x";
      }
      oss << "]: [";
      size_t show = std::min(data_.size(), size_t(10));
      for (size_t i = 0; i < show; ++i) {
        oss << data_[i];
        if (i < show - 1)
          oss << ", ";
      }
      if (data_.size() > 10)
        oss << ", ...";
      oss << "]";
    }
    return oss.str();
  }
 };
 template <typename T> using Scalar = Tensor<T, 0>;
 template <typename T> using Vector = Tensor<T, 1>;
 template <typename T> using Matrix = Tensor<T, 2>;
 class Tensors {
  Tensors() = delete;
 public:
  template <typename T, typename... Args> static auto empty(Args... args) {
    return Tensor<T, sizeof...(Args)>({static_cast<size_t>(args)...});
  }
  template <typename T, typename... Args> static auto zero(Args... args) {
    return Tensor<T, sizeof...(Args)>({static_cast<size_t>(args)...}, T(0));
  }
  template <typename T, typename... Args> static auto rand(Args... args) {
    return Tensor<T, sizeof...(Args)>({static_cast<size_t>(args)...}, T(0),
                                      T(1));
  }
 };
--- a/src/tensor/tensor.tpp
+++ b/src/tensor/tensor.tpp
@@ -0,0 +1,181 @@
 #pragma once
 #include "tensor.hpp"
 #include <stdexcept>
 // ===== UTILS =====
 template <typename T, int Dim>
 template <typename... Indices>
 size_t ITensor<T, Dim>::computeIndex(Indices... indices) const {
  static_assert(sizeof...(Indices) == Dim, "Invalid number of indices");
  std::array<size_t, Dim> indicesArray = {static_cast<size_t>(indices)...};
  std::array<size_t, Dim> axesIndices;
  for (int i = 0; i < Dim; ++i)
    axesIndices[axes_[i]] = indicesArray[i];
  size_t index = 0;
  size_t stride = 1;
  for (int i = Dim - 1; i >= 0; --i) {
    index += axesIndices[i] * stride;
    stride *= shape_[i];
  }
  return index;
 }
 template <typename T, int Dim>
 void ITensor<T, Dim>::checkItHasSameShape(const ITensor<T, Dim> &other) const {
  if (getShape() != other.getShape())
    throw std::invalid_argument("Tensor shapes must match");
 }
 template <typename T, int Dim>
 void ITensor<T, Dim>::checkAxisInDim(int axis) const {
  if (axis < 0 || axis >= Dim)
    throw std::invalid_argument("Invalid axis index");
 }
 // ====== CONSTRUCT =====
 template <typename T, int Dim>
 ITensor<T, Dim>::ITensor(const std::array<size_t, Dim> &shape) {
  for (size_t d : shape)
    if (d == 0)
      throw std::invalid_argument("Invalid shape");
  shape_ = shape;
  for (int i = 0; i < Dim; ++i)
    axes_[i] = i;
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::ITensor(const ITensor &other)
    : shape_(other.shape_), axes_(other.axes_) {}
 template <typename T, int Dim>
 ITensor<T, Dim> &ITensor<T, Dim>::operator=(const ITensor &other) {
  shape_ = other.shape_;
  axes_ = other.axes_;
  return *this;
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::ITensor(ITensor &&other) noexcept
    : shape_(std::move(other.shape_)), axes_(std::move(other.axes_)) {}
 template <typename T, int Dim>
 ITensor<T, Dim> &ITensor<T, Dim>::operator=(ITensor &&other) noexcept {
  shape_ = std::move(other.shape_);
  axes_ = std::move(other.axes_);
  return *this;
 }
 // ===== GET/SET =====
 template <typename T, int Dim>
 const std::array<int, Dim> &ITensor<T, Dim>::getAxes() const {
  return axes_;
 }
 template <typename T, int Dim>
 const std::array<size_t, Dim> ITensor<T, Dim>::getShape() const {
  std::array<size_t, Dim> result;
  for (int i = 0; i < Dim; ++i)
    result[i] = shape_[axes_[i]];
  return result;
 }
 template <typename T, int Dim> size_t ITensor<T, Dim>::getSize() const {
  size_t size = 1;
  for (size_t i = 0; i < shape_.size(); ++i)
    size *= shape_[i];
  return size;
 };
 // ===== TRANSPOSE =====
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor &
 ITensor<T, Dim>::transpose(const std::array<int, Dim> &new_axes) {
  std::array<bool, Dim> used{};
  for (int axis : new_axes) {
    checkAxisInDim(axis);
    if (used[axis])
      throw std::invalid_argument("Duplicate axis index");
    used[axis] = true;
  }
  axes_ = new_axes;
  return static_cast<Tensor &>(*this);
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor &ITensor<T, Dim>::transpose(int axis_a, int axis_b) {
  checkAxisInDim(axis_a);
  checkAxisInDim(axis_b);
  if (axis_a == axis_b)
    throw std::invalid_argument("Duplicate axis index");
  std::swap(axes_[axis_a], axes_[axis_b]);
  return static_cast<Tensor &>(*this);
 }
 template <typename T, int Dim> ITensor<T, Dim>::Tensor &ITensor<T, Dim>::t() {
  static_assert(Dim >= 2, "Can't change the only axis");
  std::swap(axes_[Dim - 1], axes_[Dim - 2]);
  return static_cast<Tensor &>(*this);
 }
 // ===== OPERATORS ======
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator+(const T &scalar) const {
  Tensor result = static_cast<const Tensor &>(*this);
  result += scalar;
  return result;
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor &ITensor<T, Dim>::operator-=(const T &scalar) {
  *this += -scalar;
  return static_cast<Tensor &>(*this);
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator-(const T &scalar) const {
  Tensor result = static_cast<const Tensor &>(*this);
  result -= scalar;
  return result;
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator*(const T &scalar) const {
  Tensor result = static_cast<const Tensor &>(*this);
  result *= scalar;
  return result;
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor &ITensor<T, Dim>::operator/=(const T &scalar) {
  *this *= T(1) / scalar;
  return static_cast<Tensor &>(*this);
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator/(const T &scalar) const {
  Tensor result = static_cast<const Tensor &>(*this);
  result /= scalar;
  return result;
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator+(const Tensor &other) const {
  Tensor result = static_cast<const Tensor &>(*this);
  result += other;
  return result;
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor &ITensor<T, Dim>::operator-=(const Tensor &other) {
  checkItHasSameShape(other);
  *this += -other;
  return static_cast<Tensor &>(*this);
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator-(const Tensor &other) const {
  Tensor result = static_cast<const Tensor &>(*this);
  result -= other;
  return result;
 }
 template <typename T, int Dim>
 ITensor<T, Dim>::Tensor ITensor<T, Dim>::operator*(const Tensor &other) const {
  Tensor result = static_cast<const Tensor &>(*this);
  result *= other;
  return result;
 }