mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-04 12:50:39 +04:00
New tensor lib
This commit is contained in:
9
src/tensor/.clangd
Normal file
9
src/tensor/.clangd
Normal file
@@ -0,0 +1,9 @@
|
||||
CompileFlags:
|
||||
Add:
|
||||
- -std=c++23
|
||||
- -Wall
|
||||
- -Wextra
|
||||
- -Wpedantic
|
||||
Remove: []
|
||||
Diagnostics:
|
||||
UnusedIncludes: Strict
|
||||
39
src/tensor/Makefile
Normal file
39
src/tensor/Makefile
Normal file
@@ -0,0 +1,39 @@
|
||||
CXX = g++
|
||||
CXXFLAGS = -Wall -Wextra -O1 -g -std=c++23
|
||||
|
||||
ifeq ($(OS),Windows_NT)
|
||||
DETECTED_OS := Windows
|
||||
else
|
||||
DETECTED_OS := $(shell uname -s)
|
||||
endif
|
||||
ifeq ($(DETECTED_OS),Windows)
|
||||
TARGET = main.exe
|
||||
MKDIR = powershell -Command "mkdir"
|
||||
SHARED_LIB_EXT = pyd
|
||||
else
|
||||
TARGET = main
|
||||
MKDIR = mkdir -p
|
||||
SHARED_LIB_EXT = so
|
||||
endif
|
||||
|
||||
BUILD_DIR = build
|
||||
COMMON_SRC = tensor.cpp
|
||||
|
||||
PYTHON_PATH = $(shell python -c "from sysconfig import get_paths; print(get_paths()['data'])")
|
||||
PYTHON_INCLUDE = $(PYTHON_PATH)\include
|
||||
PYTHON_LIBS = $(PYTHON_PATH)\libs
|
||||
PYBIND_INCLUDE = $(shell python -c "import pybind11; print(pybind11.get_include())")
|
||||
|
||||
.DEFAULT_GOAL := $(TARGET)
|
||||
|
||||
$(BUILD_DIR):
|
||||
$(MKDIR) $(BUILD_DIR)
|
||||
|
||||
$(TARGET): $(COMMON_SRC) main.cpp | $(BUILD_DIR)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^
|
||||
|
||||
module: $(COMMON_SRC) pybind.cpp | $(BUILD_DIR)
|
||||
$(CXX) $(CXXFLAGS) -shared -fPIC -o tensor.$(SHARED_LIB_EXT) $^ -I"$(PYTHON_INCLUDE)" -L"$(PYTHON_LIBS)" -lpython313 -I"$(PYBIND_INCLUDE)"
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILD_DIR) $(TARGET) *.$(SHARED_LIB_EXT)
|
||||
2
src/tensor/main.cpp
Normal file
2
src/tensor/main.cpp
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
int main() { return 0; }
|
||||
144
src/tensor/opencl/kernels/tensor.cl
Normal file
144
src/tensor/opencl/kernels/tensor.cl
Normal file
@@ -0,0 +1,144 @@
|
||||
float activate_x(float x, const int activation_type, const float alpha) {
|
||||
switch (activation_type) {
|
||||
case 0: // LINEAR
|
||||
return x;
|
||||
case 1: // SIGMOID
|
||||
return 1.0f / (1.0f + exp(-x));
|
||||
case 2: // TANH
|
||||
return tanh(x);
|
||||
case 3: // RELU
|
||||
return fmax(0.0f, x);
|
||||
case 4: // LEAKY_RELU
|
||||
return (x > 0.0f) ? x : alpha * x;
|
||||
case 5: // ELU
|
||||
return (x > 0.0f) ? x : alpha * (exp(x) - 1.0f);
|
||||
default:
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void activate(__global float *input, __global float *output,
|
||||
const int activation_type, const float alpha) {
|
||||
int i = get_global_id(0);
|
||||
output[i] = activate_x(input[i], activation_type, alpha);
|
||||
}
|
||||
|
||||
__kernel void mult_small(__global float *A, __global float *B,
|
||||
__global float *C, __global float *bias,
|
||||
const int activation_type, const float alpha,
|
||||
const int M, const int N, const int K,
|
||||
const int transpose_B) {
|
||||
const int row = get_global_id(0);
|
||||
const int col = get_global_id(1);
|
||||
|
||||
if (row < M && col < N) {
|
||||
float sum = 0.0f;
|
||||
for (int k = 0; k < K; k++) {
|
||||
float a_val = A[row * K + k];
|
||||
|
||||
float b_val;
|
||||
if (transpose_B) {
|
||||
b_val = B[col * K + k];
|
||||
} else {
|
||||
b_val = B[k * N + col];
|
||||
}
|
||||
|
||||
sum += a_val * b_val;
|
||||
}
|
||||
|
||||
float result = sum + bias[col];
|
||||
if (activation_type != 0) {
|
||||
result = activate_x(result, activation_type, alpha);
|
||||
}
|
||||
C[row * N + col] = result;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void mult(__global float *A, __global float *B, __global float *C,
|
||||
__global float *bias, const int activation_type,
|
||||
const float alpha, const int M, const int N, const int K,
|
||||
const int transpose_B) {
|
||||
const int tile_size = 16;
|
||||
|
||||
int local_i = get_local_id(0);
|
||||
int local_j = get_local_id(1);
|
||||
int local_size_i = get_local_size(0);
|
||||
int local_size_j = get_local_size(1);
|
||||
|
||||
int global_i = get_group_id(0) * local_size_i + local_i;
|
||||
int global_j = get_group_id(1) * local_size_j + local_j;
|
||||
|
||||
__local float tile_A[16][16];
|
||||
__local float tile_B[16][16];
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
int num_tiles = (K + tile_size - 1) / tile_size;
|
||||
|
||||
for (int tile = 0; tile < num_tiles; tile++) {
|
||||
int tile_offset = tile * tile_size;
|
||||
|
||||
// Загрузка tile_A (без изменений)
|
||||
int load_i_A = tile_offset + local_i;
|
||||
int load_j_A = tile_offset + local_j;
|
||||
|
||||
if (global_i < M && load_j_A < K) {
|
||||
tile_A[local_j][local_i] = A[global_i * K + load_j_A];
|
||||
} else {
|
||||
tile_A[local_j][local_i] = 0.0f;
|
||||
}
|
||||
|
||||
// Загрузка tile_B с учетом транспонирования
|
||||
int load_i_B = tile_offset + local_i;
|
||||
int load_j_B = tile_offset + local_j;
|
||||
|
||||
if (transpose_B) {
|
||||
// B транспонирована: обращаем индексы
|
||||
if (load_i_B < N && global_j < K) {
|
||||
tile_B[local_j][local_i] = B[global_j * N + load_i_B];
|
||||
} else {
|
||||
tile_B[local_j][local_i] = 0.0f;
|
||||
}
|
||||
} else {
|
||||
// B не транспонирована (оригинальная логика)
|
||||
if (load_i_B < K && global_j < N) {
|
||||
tile_B[local_j][local_i] = B[load_i_B * N + global_j];
|
||||
} else {
|
||||
tile_B[local_j][local_i] = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < tile_size; ++k) {
|
||||
sum += tile_A[k][local_i] * tile_B[local_j][k];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (global_i < M && global_j < N) {
|
||||
float result = sum + bias[global_j];
|
||||
if (activation_type != 0) {
|
||||
result = activate_x(result, activation_type, alpha);
|
||||
}
|
||||
C[global_i * N + global_j] = result;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void mult_sc(__global float *A, __global float *B, float scalar) {
|
||||
int i = get_global_id(0);
|
||||
B[i] = A[i] * scalar;
|
||||
}
|
||||
|
||||
__kernel void add(__global float *A, __global float *B, __global float *C,
|
||||
float x) {
|
||||
int i = get_global_id(0);
|
||||
C[i] = A[i] + (B[i] * x);
|
||||
}
|
||||
|
||||
__kernel void add_sc(__global float *A, __global float *B, float scalar) {
|
||||
int i = get_global_id(0);
|
||||
B[i] = A[i] + scalar;
|
||||
}
|
||||
121
src/tensor/opencl/opencl.cpp
Normal file
121
src/tensor/opencl/opencl.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
#include "opencl.hpp"
|
||||
|
||||
std::string OpenCL::readProgram(const std::string &filePath) {
|
||||
std::ifstream file(filePath, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
throw std::runtime_error("Cannot open file: " + filePath);
|
||||
}
|
||||
|
||||
std::stringstream buffer;
|
||||
buffer << file.rdbuf();
|
||||
return buffer.str();
|
||||
}
|
||||
cl::Program OpenCL::compileProgram(const std::string &file) {
|
||||
std::string source = readProgram(file);
|
||||
cl::Program program(context, source);
|
||||
try {
|
||||
program.build({device});
|
||||
} catch (cl::Error &e) {
|
||||
std::string build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
|
||||
std::cerr << "Build log:\n" << build_log << std::endl;
|
||||
throw;
|
||||
}
|
||||
return program;
|
||||
}
|
||||
void OpenCL::loadPrograms() {
|
||||
for (const auto &entry : programPaths) {
|
||||
programs[entry.first] = compileProgram(entry.second);
|
||||
std::cout << "Loaded program: " << entry.second << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void OpenCL::initializeDevice() {
|
||||
std::vector<cl::Platform> platforms;
|
||||
cl::Platform::get(&platforms);
|
||||
|
||||
if (platforms.empty()) {
|
||||
throw std::runtime_error("No OpenCL platforms found");
|
||||
}
|
||||
|
||||
std::vector<cl::Device> devices;
|
||||
bool deviceFound = false;
|
||||
|
||||
for (const auto &platform : platforms) {
|
||||
try {
|
||||
platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
|
||||
if (!devices.empty()) {
|
||||
deviceFound = true;
|
||||
break;
|
||||
}
|
||||
} catch (const cl::Error &) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (!deviceFound) {
|
||||
for (const auto &platform : platforms) {
|
||||
try {
|
||||
platform.getDevices(CL_DEVICE_TYPE_CPU, &devices);
|
||||
if (!devices.empty()) {
|
||||
deviceFound = true;
|
||||
break;
|
||||
}
|
||||
} catch (const cl::Error &) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!deviceFound) {
|
||||
throw std::runtime_error("No suitable OpenCL devices found");
|
||||
}
|
||||
|
||||
device = devices[0];
|
||||
context = cl::Context(device);
|
||||
queue = cl::CommandQueue(context, device);
|
||||
|
||||
std::cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>()
|
||||
<< "\nPlatform: " << platforms[0].getInfo<CL_PLATFORM_NAME>()
|
||||
<< "\nCompute units: "
|
||||
<< device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>()
|
||||
<< "\nGlobal memory: "
|
||||
<< device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() / (1024 * 1024)
|
||||
<< " MB" << std::endl;
|
||||
}
|
||||
|
||||
OpenCL::OpenCL() {
|
||||
try {
|
||||
initializeDevice();
|
||||
loadPrograms();
|
||||
} catch (const cl::Error &e) {
|
||||
std::cerr << "OpenCL error: " << e.what() << " (" << e.err() << ")"
|
||||
<< std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
cl::Program &OpenCL::getProgram(Program program) {
|
||||
auto it = programs.find(program);
|
||||
if (it == programs.end()) {
|
||||
throw std::invalid_argument("Program not loaded: " +
|
||||
std::to_string(static_cast<int>(program)));
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void OpenCL::printDeviceInfo() const {
|
||||
std::cout << "=== OpenCL Device Info ===" << std::endl;
|
||||
std::cout << "Name: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "Vendor: " << device.getInfo<CL_DEVICE_VENDOR>() << std::endl;
|
||||
std::cout << "Version: " << device.getInfo<CL_DEVICE_VERSION>() << std::endl;
|
||||
std::cout << "Compute Units: "
|
||||
<< device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() << std::endl;
|
||||
std::cout << "Global Memory: "
|
||||
<< device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() / (1024 * 1024)
|
||||
<< " MB" << std::endl;
|
||||
std::cout << "Local Memory: "
|
||||
<< device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>() / 1024 << " KB"
|
||||
<< std::endl;
|
||||
std::cout << "Max Work Group Size: "
|
||||
<< device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << std::endl;
|
||||
}
|
||||
47
src/tensor/opencl/opencl.hpp
Normal file
47
src/tensor/opencl/opencl.hpp
Normal file
@@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#define CL_HPP_ENABLE_EXCEPTIONS
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 300
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
|
||||
class OpenCL {
|
||||
public:
|
||||
enum class Program { TENSOR };
|
||||
|
||||
private:
|
||||
cl::Device device;
|
||||
cl::Context context;
|
||||
cl::CommandQueue queue;
|
||||
|
||||
std::unordered_map<Program, cl::Program> programs;
|
||||
std::unordered_map<Program, std::string> programPaths = {
|
||||
{Program::TENSOR, "./opencl/kernels/tensor.cl"}};
|
||||
|
||||
std::string readProgram(const std::string &filePath);
|
||||
cl::Program compileProgram(const std::string &file);
|
||||
void loadPrograms();
|
||||
|
||||
void initializeDevice();
|
||||
|
||||
public:
|
||||
OpenCL();
|
||||
|
||||
OpenCL(const OpenCL &) = delete;
|
||||
OpenCL &operator=(const OpenCL &) = delete;
|
||||
OpenCL(OpenCL &&) = delete;
|
||||
OpenCL &operator=(OpenCL &&) = delete;
|
||||
|
||||
cl::Device &getDevice() { return device; }
|
||||
cl::Context &getContext() { return context; }
|
||||
const cl::CommandQueue &getQueue() { return queue; }
|
||||
|
||||
cl::Program &getProgram(Program program);
|
||||
void printDeviceInfo() const;
|
||||
};
|
||||
102
src/tensor/pybind.cpp
Normal file
102
src/tensor/pybind.cpp
Normal file
@@ -0,0 +1,102 @@
|
||||
#include <pybind11/operators.h>
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/stl.h>
|
||||
|
||||
#include "tensor.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
template <typename T, int Dim>
|
||||
void register_tensor(py::module &m, const std::string &name) {
|
||||
auto tensor = py::class_<Tensor<T, Dim>>(m, name.c_str())
|
||||
.def(py::init<const std::array<size_t, Dim> &>())
|
||||
.def(py::init<const std::array<size_t, Dim> &, T>())
|
||||
.def(py::init<const std::array<size_t, Dim> &,
|
||||
const std::vector<T> &>())
|
||||
.def(py::init<const std::array<size_t, Dim> &, T, T>())
|
||||
|
||||
.def("get_shape", &Tensor<T, Dim>::getShape)
|
||||
.def("get_data", &Tensor<T, Dim>::getData)
|
||||
.def("get_size", &Tensor<T, Dim>::getSize)
|
||||
.def("get_axes", &Tensor<T, Dim>::getAxes)
|
||||
|
||||
.def("__getitem__",
|
||||
[](const Tensor<T, Dim> &t, size_t i) -> T {
|
||||
if (i >= t.getSize())
|
||||
throw py::index_error();
|
||||
return t[i];
|
||||
})
|
||||
.def("__setitem__",
|
||||
[](Tensor<T, Dim> &t, size_t i, T value) {
|
||||
if (i >= t.getSize())
|
||||
throw py::index_error();
|
||||
t[i] = value;
|
||||
})
|
||||
|
||||
// .def("__call__",
|
||||
// [](Tensor<T, Dim> &t, py::args args) -> T & {
|
||||
//
|
||||
// })
|
||||
|
||||
.def(py::self + py::self)
|
||||
.def(py::self - py::self)
|
||||
.def(py::self * py::self)
|
||||
.def(py::self += py::self)
|
||||
.def(py::self -= py::self)
|
||||
.def(py::self *= py::self)
|
||||
|
||||
.def(py::self + T())
|
||||
.def(py::self - T())
|
||||
.def(py::self * T())
|
||||
.def(py::self / T())
|
||||
.def(T() + py::self)
|
||||
.def(T() - py::self)
|
||||
.def(T() * py::self)
|
||||
|
||||
.def(py::self += T())
|
||||
.def(py::self -= T())
|
||||
.def(py::self *= T())
|
||||
.def(py::self /= T())
|
||||
|
||||
.def("__pos__", [](const Tensor<T, Dim> &t) { return +t; })
|
||||
.def("__neg__", [](const Tensor<T, Dim> &t) { return -t; })
|
||||
|
||||
.def("print", &Tensor<T, Dim>::print);
|
||||
|
||||
if constexpr (Dim == 1 || Dim == 2)
|
||||
tensor.def("__matmul__", &Tensor<T, Dim>::operator%);
|
||||
|
||||
if constexpr (Dim >= 2) {
|
||||
tensor
|
||||
.def("transpose", py::overload_cast<const std::array<int, Dim> &>(
|
||||
&Tensor<T, Dim>::transpose))
|
||||
.def("transpose",
|
||||
py::overload_cast<int, int>(&Tensor<T, Dim>::transpose))
|
||||
.def("t", &Tensor<T, Dim>::t);
|
||||
}
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(tensor, m) {
|
||||
m.doc() = "Tensor math library";
|
||||
|
||||
register_tensor<float, 0>(m, "Scalar");
|
||||
register_tensor<float, 1>(m, "Vector");
|
||||
register_tensor<float, 2>(m, "Matrix");
|
||||
register_tensor<float, 3>(m, "Tensor3");
|
||||
register_tensor<float, 4>(m, "Tensor4");
|
||||
register_tensor<float, 5>(m, "Tensor5");
|
||||
|
||||
register_tensor<double, 0>(m, "dScalar");
|
||||
register_tensor<double, 1>(m, "dVector");
|
||||
register_tensor<double, 2>(m, "dMatrix");
|
||||
register_tensor<double, 3>(m, "dTensor3");
|
||||
register_tensor<double, 4>(m, "dTensor4");
|
||||
register_tensor<double, 5>(m, "dTensor5");
|
||||
|
||||
register_tensor<int, 0>(m, "iScalar");
|
||||
register_tensor<int, 1>(m, "iVector");
|
||||
register_tensor<int, 2>(m, "iMatrix");
|
||||
register_tensor<int, 3>(m, "iTensor3");
|
||||
register_tensor<int, 4>(m, "iTensor4");
|
||||
register_tensor<int, 5>(m, "iTensor5");
|
||||
}
|
||||
1
src/tensor/tensor.cpp
Normal file
1
src/tensor/tensor.cpp
Normal file
@@ -0,0 +1 @@
|
||||
#include "tensor.hpp"
|
||||
338
src/tensor/tensor.hpp
Normal file
338
src/tensor/tensor.hpp
Normal file
@@ -0,0 +1,338 @@
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
template <typename T, int Dim> class Tensor {
|
||||
private:
|
||||
std::array<size_t, Dim> shape_;
|
||||
std::array<int, Dim> axes_;
|
||||
std::vector<T> data_;
|
||||
|
||||
template <typename... Indices> size_t computeIndex(Indices... indices) const {
|
||||
static_assert(sizeof...(Indices) == Dim, "Invalid number of indices");
|
||||
std::array<size_t, Dim> indicesArray = {static_cast<size_t>(indices)...};
|
||||
std::array<size_t, Dim> axesIndices;
|
||||
for (int i = 0; i < Dim; ++i)
|
||||
axesIndices[axes_[i]] = indicesArray[i];
|
||||
size_t index = 0;
|
||||
size_t stride = 1;
|
||||
for (int i = Dim - 1; i >= 0; --i) {
|
||||
index += axesIndices[i] * stride;
|
||||
stride *= shape_[i];
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
void checkItHasSameShape(const Tensor &other) {
|
||||
if (getShape() != other.getShape())
|
||||
throw std::invalid_argument("Tensor shapes must match");
|
||||
}
|
||||
void checkAxisInDim(int axis) {
|
||||
if (axis < 0 || axis >= Dim)
|
||||
throw std::invalid_argument("Invalid axis index");
|
||||
}
|
||||
|
||||
public:
|
||||
Tensor() = delete;
|
||||
Tensor(const std::array<size_t, Dim> &shape) {
|
||||
for (size_t d : shape)
|
||||
if (d == 0)
|
||||
throw std::invalid_argument("Invalid shape");
|
||||
shape_ = shape;
|
||||
for (int i = 0; i < Dim; ++i)
|
||||
axes_[i] = i;
|
||||
size_t total_size = 1;
|
||||
for (size_t dim : shape)
|
||||
total_size *= dim;
|
||||
data_.resize(total_size);
|
||||
}
|
||||
Tensor(const std::array<size_t, Dim> &shape, T fill) : Tensor(shape) {
|
||||
std::fill(data_.begin(), data_.end(), fill);
|
||||
}
|
||||
Tensor(const std::array<size_t, Dim> &shape, const std::vector<T> &data)
|
||||
: Tensor(shape) {
|
||||
if (data.size() != data_.size())
|
||||
throw std::invalid_argument("Invalid data size");
|
||||
data_ = data;
|
||||
}
|
||||
Tensor(const std::array<size_t, Dim> &shape, T min, T max) : Tensor(shape) {
|
||||
static std::random_device rd;
|
||||
static std::mt19937 gen(rd());
|
||||
if constexpr (std::is_integral_v<T>) {
|
||||
std::uniform_int_distribution<T> dis(min, max);
|
||||
for (auto &element : data_)
|
||||
element = dis(gen);
|
||||
} else if constexpr (std::is_floating_point_v<T>) {
|
||||
std::uniform_real_distribution<T> dis(min, max);
|
||||
for (auto &element : data_)
|
||||
element = dis(gen);
|
||||
} else
|
||||
throw std::invalid_argument("Invalid randomized type");
|
||||
}
|
||||
|
||||
Tensor(const Tensor &other)
|
||||
: shape_(other.shape_), axes_(other.axes_), data_(other.data_) {}
|
||||
Tensor &operator=(const Tensor &other) {
|
||||
shape_ = other.shape_;
|
||||
axes_ = other.axes_;
|
||||
data_ = other.data_;
|
||||
return *this;
|
||||
}
|
||||
Tensor(Tensor &&other) noexcept
|
||||
: shape_(std::move(other.shape_)), axes_(std::move(other.axes_)),
|
||||
data_(std::move(other.data_)) {}
|
||||
Tensor &operator=(Tensor &&other) noexcept {
|
||||
shape_ = std::move(other.shape_);
|
||||
axes_ = std::move(other.axes_);
|
||||
data_ = std::move(other.data_);
|
||||
return *this;
|
||||
}
|
||||
~Tensor() = default;
|
||||
|
||||
const std::array<int, Dim> &getAxes() const { return axes_; }
|
||||
const std::vector<T> &getData() const { return data_; }
|
||||
size_t getSize() const { return data_.size(); }
|
||||
const std::array<size_t, Dim> getShape() const {
|
||||
std::array<size_t, Dim> result;
|
||||
for (int i = 0; i < Dim; ++i)
|
||||
result[i] = shape_[axes_[i]];
|
||||
return result;
|
||||
}
|
||||
|
||||
T &operator[](size_t i) { return data_[i]; }
|
||||
const T &operator[](size_t i) const { return data_[i]; }
|
||||
|
||||
template <typename... Indices> T &operator()(Indices... indices) {
|
||||
return data_[computeIndex(indices...)];
|
||||
}
|
||||
template <typename... Indices> const T &operator()(Indices... indices) const {
|
||||
return data_[computeIndex(indices...)];
|
||||
}
|
||||
|
||||
Tensor &transpose(const std::array<int, Dim> &new_axes) {
|
||||
std::array<bool, Dim> used{};
|
||||
for (int axis : new_axes) {
|
||||
checkAxisInDim(axis);
|
||||
if (used[axis])
|
||||
throw std::invalid_argument("Duplicate axis index");
|
||||
used[axis] = true;
|
||||
}
|
||||
axes_ = new_axes;
|
||||
return *this;
|
||||
}
|
||||
Tensor &transpose(int axis_a, int axis_b) {
|
||||
checkAxisInDim(axis_a);
|
||||
checkAxisInDim(axis_b);
|
||||
if (axis_a == axis_b)
|
||||
throw std::invalid_argument("Duplicate axis index");
|
||||
std::swap(axes_[axis_a], axes_[axis_b]);
|
||||
return *this;
|
||||
}
|
||||
Tensor &t() {
|
||||
static_assert(Dim >= 2, "Can't change the only axis");
|
||||
std::swap(axes_[Dim - 1], axes_[Dim - 2]);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor operator+() const { return *this; }
|
||||
Tensor operator-() const {
|
||||
Tensor result = *this;
|
||||
for (T &e : result.data_)
|
||||
e = -e;
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor &operator+=(const T &scalar) {
|
||||
for (T &e : data_)
|
||||
e += scalar;
|
||||
return *this;
|
||||
}
|
||||
Tensor operator+(const T &scalar) const {
|
||||
Tensor result = *this;
|
||||
result += scalar;
|
||||
return result;
|
||||
}
|
||||
friend Tensor operator+(const T &scalar, const Tensor &tensor) {
|
||||
return tensor + scalar;
|
||||
}
|
||||
|
||||
Tensor &operator-=(const T &scalar) {
|
||||
for (T &e : data_)
|
||||
e -= scalar;
|
||||
return *this;
|
||||
}
|
||||
Tensor operator-(const T &scalar) const {
|
||||
Tensor result = *this;
|
||||
result -= scalar;
|
||||
return result;
|
||||
}
|
||||
friend Tensor operator-(const T &scalar, const Tensor &tensor) {
|
||||
Tensor result = tensor;
|
||||
for (T &e : result.data_)
|
||||
e = scalar - e;
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor &operator*=(const T &scalar) {
|
||||
for (T &e : data_)
|
||||
e *= scalar;
|
||||
return *this;
|
||||
}
|
||||
Tensor operator*(const T &scalar) const {
|
||||
Tensor result = *this;
|
||||
result *= scalar;
|
||||
return result;
|
||||
}
|
||||
friend Tensor operator*(const T &scalar, const Tensor &tensor) {
|
||||
return tensor * scalar;
|
||||
}
|
||||
|
||||
Tensor &operator/=(const T &scalar) {
|
||||
if (scalar == T(0))
|
||||
throw std::invalid_argument("Division by zero");
|
||||
for (T &e : data_)
|
||||
e /= scalar;
|
||||
return *this;
|
||||
}
|
||||
Tensor operator/(const T &scalar) const {
|
||||
Tensor result = *this;
|
||||
result /= scalar;
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor &operator+=(const Tensor &other) {
|
||||
checkItHasSameShape(other);
|
||||
for (size_t i = 0; i < data_.size(); ++i)
|
||||
data_[i] += other.data_[i];
|
||||
return *this;
|
||||
}
|
||||
Tensor operator+(const Tensor &other) const {
|
||||
Tensor result = *this;
|
||||
result += other;
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor &operator-=(const Tensor &other) {
|
||||
checkItHasSameShape(other);
|
||||
for (size_t i = 0; i < data_.size(); ++i)
|
||||
data_[i] -= other.data_[i];
|
||||
return *this;
|
||||
}
|
||||
Tensor operator-(const Tensor &other) const {
|
||||
Tensor result = *this;
|
||||
result -= other;
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor &operator*=(const Tensor &other) {
|
||||
checkItHasSameShape(other);
|
||||
for (size_t i = 0; i < data_.size(); ++i)
|
||||
data_[i] *= other.data_[i];
|
||||
return *this;
|
||||
}
|
||||
Tensor operator*(const Tensor &other) const {
|
||||
Tensor result = *this;
|
||||
result *= other;
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor<T, Dim == 1 ? 0 : 2> operator%(const Tensor &other) const {
|
||||
static_assert(Dim == 1 || Dim == 2,
|
||||
"Inner product is only defined for vectors and matrices");
|
||||
if constexpr (Dim == 1) {
|
||||
if (data_.size() != other.data_.size())
|
||||
throw std::invalid_argument(
|
||||
"Vector sizes must match for inner product");
|
||||
T result_val = T(0);
|
||||
for (size_t i = 0; i < data_.size(); ++i)
|
||||
result_val += data_[i] * other.data_[i];
|
||||
return Tensor<T, 0>({}, {result_val});
|
||||
} else if constexpr (Dim == 2) {
|
||||
if (shape_[axes_[1]] != other.shape_[other.axes_[0]])
|
||||
throw std::invalid_argument(
|
||||
"Matrix dimensions must match for multiplication");
|
||||
size_t m = shape_[axes_[0]];
|
||||
size_t n = shape_[axes_[1]];
|
||||
size_t p = other.shape_[other.axes_[1]];
|
||||
Tensor<T, 2> result({m, p}, T(0));
|
||||
for (size_t i = 0; i < m; ++i) {
|
||||
for (size_t j = 0; j < p; ++j) {
|
||||
T sum = T(0);
|
||||
for (size_t k = 0; k < n; ++k)
|
||||
sum += (*this)(i, k) * other(k, j);
|
||||
result(i, j) = sum;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
void print() const {
|
||||
if constexpr (Dim == 0) {
|
||||
std::cout << "Scalar<" << typeid(T).name() << ">: " << data_[0]
|
||||
<< std::endl;
|
||||
} else if constexpr (Dim == 1) {
|
||||
std::cout << "Vector<" << typeid(T).name() << ">(" << shape_[0] << "): [";
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
std::cout << data_[i];
|
||||
if (i < data_.size() - 1)
|
||||
std::cout << ", ";
|
||||
}
|
||||
std::cout << "]" << std::endl;
|
||||
} else if constexpr (Dim == 2) {
|
||||
std::cout << "Matrix<" << typeid(T).name() << ">(" << shape_[axes_[0]]
|
||||
<< "x" << shape_[axes_[1]] << "):" << std::endl;
|
||||
for (size_t i = 0; i < shape_[axes_[0]]; ++i) {
|
||||
std::cout << " [";
|
||||
for (size_t j = 0; j < shape_[axes_[1]]; ++j) {
|
||||
std::cout << (*this)(i, j);
|
||||
if (j < shape_[axes_[1]] - 1)
|
||||
std::cout << ", ";
|
||||
}
|
||||
std::cout << "]" << std::endl;
|
||||
}
|
||||
} else {
|
||||
std::cout << "Tensor" << Dim << "D<" << typeid(T).name() << ">" << "[";
|
||||
for (size_t i = 0; i < Dim; ++i) {
|
||||
std::cout << shape_[axes_[i]];
|
||||
if (i < Dim - 1)
|
||||
std::cout << "x";
|
||||
}
|
||||
std::cout << "]: [";
|
||||
size_t show = std::min(data_.size(), size_t(10));
|
||||
for (size_t i = 0; i < show; ++i) {
|
||||
std::cout << data_[i];
|
||||
if (i < show - 1)
|
||||
std::cout << ", ";
|
||||
}
|
||||
if (data_.size() > 10)
|
||||
std::cout << ", ...";
|
||||
std::cout << "]" << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> using Scalar = Tensor<T, 0>;
|
||||
template <typename T> using Vector = Tensor<T, 1>;
|
||||
template <typename T> using Matrix = Tensor<T, 2>;
|
||||
|
||||
class Tensors {
|
||||
Tensors() = delete;
|
||||
|
||||
public:
|
||||
template <typename T, typename... Args> static auto empty(Args... args) {
|
||||
return Tensor<T, sizeof...(Args)>({static_cast<size_t>(args)...});
|
||||
}
|
||||
|
||||
template <typename T, typename... Args> static auto zero(Args... args) {
|
||||
return Tensor<T, sizeof...(Args)>({static_cast<size_t>(args)...}, T(0));
|
||||
}
|
||||
|
||||
template <typename T, typename... Args> static auto rand(Args... args) {
|
||||
return Tensor<T, sizeof...(Args)>({static_cast<size_t>(args)...}, T(0),
|
||||
T(1));
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user