diff --git a/.gitignore b/.gitignore index 8a27642..5c34dee 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,6 @@ *.exe *.so *.pyd +*pyi src/tensor/build \ No newline at end of file diff --git a/src/run.py b/src/run.py index 638da5d..a63f67f 100644 --- a/src/run.py +++ b/src/run.py @@ -1,54 +1,91 @@ -from tensor.tensor import * -import numpy as np -import time +import tensor.tensor as T -if (MODE == PLATFORM.OPENCL): - init("./tensor/") - -a = Matrix([4096*4, 4096*4], 1) -b = Matrix([4096*4, 4096*4], 1) +if (T.MODE == T.PLATFORM.OPENCL): + T.init() -def benchmark_tensor(): - c = a + b - return c +class Layer: + inputFeatures: int + outputFeatures: int + weights: T.Matrix + bias: T.Matrix # T.Vector + activation: T.FUNCTION + + internal: T.Matrix # T.Vector + outputs: T.Matrix # T.Vector + + def __init__(self, inputFeatures: int, outputFeatures: int, activation: T.FUNCTION): + self.inputFeatures = inputFeatures + self.outputFeatures = outputFeatures + self.weights = T.Matrix([outputFeatures, inputFeatures], 0, 1)*0.1 + self.bias = T.Matrix([outputFeatures, 1], 0) + self.activation = activation + + self.internal = T.Matrix([outputFeatures, 1], 0) + self.outputs = T.Matrix([outputFeatures, 1], 0) -a_np = np.ones([4096*4, 4096*4], dtype=np.float32) -b_np = np.ones([4096*4, 4096*4], dtype=np.float32) +class NN: + layers: list[Layer] + + def __init__(self, layers: list[Layer]): + self.layers = layers + + def forward(self, inputs: T.Matrix) -> T.Matrix: + for i, layer in enumerate(self.layers): + layer.internal = ( + layer.weights @ + (inputs if i == 0 else self.layers[i-1].outputs) + ) + layer.bias + layer.outputs = layer.internal(layer.activation) + return self.layers[len(self.layers)-1].outputs + + def learn(self, inputs: T.Matrix, target: T.Matrix): + self.forward(inputs) + + lossVector = self.layers[len(self.layers) - + 1].outputs - target + # print("loss", lossVector(T.FUNCTION.MSE)) + dAnl = lossVector(T.FUNCTION.MSE, True) + for i in range(len(self.layers)-1, -1, -1): + dZl = dAnl * \ + self.layers[i].internal(self.layers[i].activation, True) + dWl = dZl @ (inputs if i == + 0 else self.layers[i-1].outputs).t() + dbl = dZl + # dbl = dZl.sum(axis=1).reshape(dZl.shape[0], 1) + dAnl = self.layers[i].weights.t() @ dZl + self.layers[i].weights.t() + self.layers[i].weights += (dWl * -0.3) + self.layers[i].bias += (dbl * -0.3) -def benchmark_numpy(): - c = a_np + b_np - return c +nn = NN([Layer(2, 3, T.FUNCTION.SIGMOID), Layer(3, 1, T.FUNCTION.LINEAR)]) +print("Обучение...") +for epoch in range(1000): + total_loss = 0 + for i in range(0, 2): + for j in range(0, 2): + input = T.Matrix([2, 1], [i, j]) + output = T.Matrix([1, 1], [i ^ j]) + nn.learn(input, output) -# Многократное выполнение для более точного измерения -iterations = 2 + if epoch % 100 == 0: + print(f"Эпоха {epoch}") + for i in range(0, 2): + for j in range(0, 2): + input = T.Matrix([2, 1], [i, j]) + predicted = nn.forward(input) + print( + f"{i} XOR {j} = {i ^ j}, NN: ", predicted) + print() -print("Бенчмарк Tensor:") -tensor_times = [] -for i in range(iterations): - start = time.time() - result_tensor = benchmark_tensor() - print(result_tensor) - tensor_times.append(time.time() - start) - -print("Бенчмарк NumPy:") -numpy_times = [] -for i in range(iterations): - start = time.time() - result_numpy = benchmark_numpy() - print(result_numpy) - numpy_times.append(time.time() - start) - -print( - f"\nСреднее время Tensor: {np.mean(tensor_times):.4f} ± {np.std(tensor_times):.4f} сек") -print( - f"Среднее время NumPy: {np.mean(numpy_times):.4f} ± {np.std(numpy_times):.4f} сек") - -ratio = np.mean(numpy_times) / np.mean(tensor_times) -if ratio > 1: - print(f"Tensor быстрее в {ratio:.2f} раз") -else: - print(f"NumPy быстрее в {1/ratio:.2f} раз") +print("Финальные результаты:") +for i in range(0, 2): + for j in range(0, 2): + input = T.Matrix([2, 1], [i, j]) + predicted = nn.forward(input) + print( + f"{i} XOR {j} = {i ^ j}, NN: ", predicted) +print() diff --git a/src/tensor/Makefile b/src/tensor/Makefile index 455ef42..7832d4d 100644 --- a/src/tensor/Makefile +++ b/src/tensor/Makefile @@ -54,4 +54,4 @@ opencl_module: $(COMMON_SRC) $(OPENCL_SRC) pybind.cpp | $(BUILD_DIR) PYTHONPATH=. pybind11-stubgen tensor -o . clean: - rm -rf $(BUILD_DIR) $(TARGET) *.$(SHARED_LIB_EXT) + rm -rf $(BUILD_DIR) $(TARGET) *.$(SHARED_LIB_EXT) *.pyi diff --git a/src/tensor/cpu/tensor.hpp b/src/tensor/cpu/tensor.hpp index 170555e..af16a15 100644 --- a/src/tensor/cpu/tensor.hpp +++ b/src/tensor/cpu/tensor.hpp @@ -51,6 +51,8 @@ public: Tensor operator%(const Tensor &other) const; + Tensor apply(Function f, bool derivative = false) const override; + std::string toString() const override; }; diff --git a/src/tensor/cpu/tensor.tpp b/src/tensor/cpu/tensor.tpp index ae48fcf..14b2cbe 100644 --- a/src/tensor/cpu/tensor.tpp +++ b/src/tensor/cpu/tensor.tpp @@ -149,14 +149,49 @@ Tensor::operator%(const Tensor &other) const { for (size_t j = 0; j < p; ++j) { T sum = T(0); for (size_t k = 0; k < n; ++k) - sum += (*this)(i, k) * other(k, j); - result(i, j) = sum; + sum += (*this)[i * n + k] * other[k * p + j]; + result[i * p + j] = sum; } } return result; } } +template +Tensor Tensor::apply(Function f, bool derivative) const { + Tensor result = *this; + auto func = [f, derivative](T x) -> T { + switch (f) { + case Function::SIGMOID: + if (!derivative) + return T(1) / (T(1) + std::exp(-x)); + else { + T sigmoid = T(1) / (T(1) + std::exp(-x)); + return sigmoid * (T(1) - sigmoid); + } + case Function::RELU: + if (!derivative) + return std::max(T(0), x); + else + return (x > T(0)) ? T(1) : T(0); + case Function::MSE: + if (!derivative) + return x * x; + else + return T(2) * x; + case Function::LINEAR: + default: + if (!derivative) + return x; + else + return T(1); + } + }; + for (size_t i = 0; i < getSize(); ++i) + result[i] = func((*this)[i]); + return result; +} + // ===== UTILS ===== template std::string Tensor::toString() const { return ITensor::format(data_); diff --git a/src/tensor/main.cpp b/src/tensor/main.cpp index 180a37c..de8b8d8 100644 --- a/src/tensor/main.cpp +++ b/src/tensor/main.cpp @@ -20,20 +20,22 @@ public: auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start); - std::cout << operation << ": " << duration.count() << " ns\n"; + std::cout << operation << ": " << duration.count() / 1000000.0f << "s\n"; } }; int main() { #ifdef USE_OPENCL - openCL.printDeviceInfo(); + openCL.init(); #endif - Tensor a = Tensor({4096 * 2, 4096 * 2}, 1); - Tensor b = Tensor({4096 * 2, 4096 * 2}, 1); - Profiler::measure("Matrix multiplication", [&]() { - auto result = a % b; - std::cout << result.toString(); + Tensor a = Tensor({2, 3}, 0, 1); + std::cout << a.toString() << std::endl; + Tensor b = Tensor({2, 3}, 0, 1); + std::cout << b.toString() << std::endl; + Profiler::measure("Time", [&]() { + auto result = a * b; + std::cout << result.toString() << std::endl; }); return 0; diff --git a/src/tensor/opencl/kernels.hpp b/src/tensor/opencl/kernels.hpp index faf9a6f..946c4f1 100644 --- a/src/tensor/opencl/kernels.hpp +++ b/src/tensor/opencl/kernels.hpp @@ -11,6 +11,7 @@ template class Kernels { public: enum class Vector { + type1 = 1, type2 = 2, type4 = 4, type8 = 8, @@ -24,6 +25,7 @@ public: T_ADD, T_HADAMARD, T_MULT, + FUNC }; private: @@ -42,6 +44,7 @@ private: pos += value.length(); } } + // std::cout << result << std::endl; return result; } @@ -50,6 +53,7 @@ private: R"( __kernel void {method}(__global type* A, int len) { int gid = get_global_id(0); + #if WIDTH != 1 int base = gid * WIDTH; if (base + WIDTH <= len) { typeX data = vloadX(gid, A); @@ -60,6 +64,9 @@ private: if (idx < len) A[idx] = {operation}A[idx]; } } + #else + A[gid] = {operation}A[gid]; + #endif })", {{"method", name}, {"operation", operation}}); } @@ -69,6 +76,7 @@ private: R"( __kernel void {method}(__global type* A, int len, type scalar) { int gid = get_global_id(0); + #if WIDTH != 1 int base = gid * WIDTH; if (base + WIDTH <= len) { typeX data = vloadX(gid, A); @@ -80,6 +88,9 @@ private: if (idx < len) A[idx] = A[idx] {operation} scalar; } } + #else + A[gid] = A[gid] {operation} scalar; + #endif })", {{"method", name}, {"operation", operation}}); } @@ -89,6 +100,7 @@ private: R"( __kernel void {method}(__global type* A, __global type* B, int len) { int gid = get_global_id(0); + #if WIDTH != 1 int base = gid * WIDTH; if (base + WIDTH <= len) { typeX dataA = vloadX(gid, A); @@ -100,48 +112,65 @@ private: if (idx < len) A[idx] = A[idx] {operation} B[idx]; } } + #else + A[gid] = A[gid] {operation} B[gid]; + #endif })", {{"method", name}, {"operation", operation}}); } - std::string matrixMult(std::string name) { - return format( - R"( - #define TILE_SIZE WIDTH*4 - __kernel void mult(const __global typeX* A, - const __global typeX* B, - __global typeX* C, const int M, const int N, const int K) { - const int row = get_local_id(0); - const int col = get_local_id(1); - const int globalRow = (TILE_SIZE/WIDTH)*get_group_id(0) + row; - const int globalCol = TILE_SIZE*get_group_id(1) + col; - __local typeX Asub[TILE_SIZE][TILE_SIZE/WIDTH]; - __local typeX Bsub[TILE_SIZE][TILE_SIZE/WIDTH]; - typeX acc = 0; - const int numTiles = K/TILE_SIZE; - for (int tile = 0; tile < numTiles; tile++) { - const int tiledRow = (TILE_SIZE/WIDTH)*tile + row; - const int tiledCol = TILE_SIZE*tile + col; - Asub[col][row] = A[tiledCol*(M/WIDTH) + globalRow]; - Bsub[col][row] = B[globalCol*(K/WIDTH) + tiledRow]; - barrier(CLK_LOCAL_MEM_FENCE); - typeX vecA, vecB; - type valB; - for (int k = 0; k < TILE_SIZE/WIDTH; k++) { - vecB = Bsub[col][k]; - for (int w = 0; w < WIDTH; w++) { - vecA = Asub[WIDTH*k + w][row]; - valB = vecB[w]; - for (int i = 0; i < WIDTH; i++) - acc[i] += vecA[i] * valB; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } - C[globalCol*(M/WIDTH) + globalRow] = acc; - } - )", - {{"method", name}}); + std::string matrixMult() { + return R"( + __kernel void mult(const __global type* A, + const __global type* B, + __global type* C, + const int M, const int N, const int K) { + const int row = get_global_id(0); + const int col = get_global_id(1); + if (row < M && col < N) { + type sum = 0.0f; + for (int k = 0; k < K; k++) + sum += A[row * K + k] * B[k * N + col]; + C[row * N + col] = sum; + } + })"; + } + + std::string func() { + return R"( + __kernel void func(__global type* A, const int f, const int derivative) { + int gid = get_global_id(0); + type x = A[gid]; + switch (f) { + case 0: // SIGMOID + if (!derivative) + A[gid] = (type)1 / ((type)1 + exp(-x)); + else { + type sigmoid = (type)1 / ((type)1 + exp(-x)); + A[gid] = sigmoid * ((type)1 - sigmoid); + } + break; + case 1: // RELU + if (!derivative) + A[gid] = fmax((type)0, x); + else + A[gid] = (x > (type)0) ? (type)1 : (type)0; + break; + case 2: // MSE (здесь это скорее квадратная функция) + if (!derivative) + A[gid] = x * x; + else + A[gid] = (type)2 * x; + break; + case 3: // LINEAR + default: + if (!derivative) + A[gid] = x; + else + A[gid] = (type)1.0f; + break; + } + })"; } std::unordered_map> programs = { @@ -155,13 +184,18 @@ private: {Method::T_HADAMARD, {binaryOperation("hadamard_mult", "*"), "hadamard_mult"}}, - {Method::T_MULT, {matrixMult("mult"), "mult"}}, + {Method::T_MULT, {matrixMult(), "mult"}}, + + {Method::FUNC, {func(), "func"}}, }; std::unordered_map compiledPrograms; public: - Kernels(Vector vec = Vector::type4) : vector(vec) { + Kernels(Vector vec) : vector(vec) { + std::cout << "Compile " << getTypeName() + << " kernels with vector size = " << std::to_string((int)vector) + << " "; std::string extensions = openCL.getDevice().getInfo(); if (extensions.find("cl_khr_fp16") != std::string::npos) configuration = R"( @@ -183,10 +217,12 @@ public: configuration += format( R"( typedef {type} type; - typedef {type}{vector} typeX; #define WIDTH {vector} - #define vloadX vload{vector} - #define vstoreX vstore{vector} + #if WIDTH != 1 + typedef {type}{vector} typeX; + #define vloadX vload{vector} + #define vstoreX vstore{vector} + #endif )", {{"type", getTypeName()}, {"vector", std::to_string((int)vector)}}); @@ -209,6 +245,7 @@ public: } } } + std::cout << "completed" << std::endl; } cl::Kernel create(Method method) { diff --git a/src/tensor/opencl/opencl.cpp b/src/tensor/opencl/opencl.cpp index 6967b1f..f686065 100644 --- a/src/tensor/opencl/opencl.cpp +++ b/src/tensor/opencl/opencl.cpp @@ -3,18 +3,16 @@ #include #include -OpenCL::OpenCL() { +OpenCL::OpenCL() {} + +void OpenCL::init() { try { std::vector platforms; cl::Platform::get(&platforms); - - if (platforms.empty()) { + if (platforms.empty()) throw std::runtime_error("No OpenCL platforms found"); - } - std::vector devices; bool deviceFound = false; - for (const auto &platform : platforms) { try { platform.getDevices(CL_DEVICE_TYPE_GPU, &devices); @@ -26,7 +24,6 @@ OpenCL::OpenCL() { continue; } } - if (!deviceFound) { for (const auto &platform : platforms) { try { @@ -40,12 +37,10 @@ OpenCL::OpenCL() { } } } - - if (!deviceFound) { + if (!deviceFound) throw std::runtime_error("No suitable OpenCL devices found"); - } - device = devices[0]; + printDeviceInfo(); context = cl::Context(device); queue = cl::CommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); diff --git a/src/tensor/opencl/opencl.hpp b/src/tensor/opencl/opencl.hpp index 8841af7..42026e4 100644 --- a/src/tensor/opencl/opencl.hpp +++ b/src/tensor/opencl/opencl.hpp @@ -13,6 +13,8 @@ private: public: OpenCL(); + void init(); + OpenCL(const OpenCL &) = delete; OpenCL &operator=(const OpenCL &) = delete; OpenCL(OpenCL &&) = delete; diff --git a/src/tensor/opencl/tensor.hpp b/src/tensor/opencl/tensor.hpp index 2c03f0f..6ddd45f 100644 --- a/src/tensor/opencl/tensor.hpp +++ b/src/tensor/opencl/tensor.hpp @@ -45,8 +45,12 @@ private: all(other.getEvent()), &event_); } + constexpr const static Kernels::Vector vector = Kernels::Vector::type1; + constexpr const static int vectorSize = (int)vector; + constexpr const static int tileSize = vectorSize * 4; + static cl::Kernel createKernel(Kernels::Method method) { - static Kernels kernels(Kernels::Vector::type4); + static Kernels kernels(vector); return kernels.create(method); } @@ -56,7 +60,7 @@ public: using ITensor::axes_; using ITensor::checkAxisInDim; using ITensor::checkItHasSameShape; - using ITensor::computeIndex; + // using ITensor::computeIndex; using ITensor::getSize; using ITensor::shape_; @@ -124,30 +128,32 @@ public: using ITensor::operator-; Tensor operator+() const override { + Tensor result = *this; cl::Kernel kernel = createKernel(Kernels::Method::POSITIVE); - kernel.setArg(0, *data_); - kernel.setArg(1, (int)getSize()); - openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, - cl::NDRange(getSize()), - cl::NullRange, all(event_), &event_); - return *this; + kernel.setArg(0, *result.getData()); + kernel.setArg(1, (int)result.getSize()); + openCL.getQueue().enqueueNDRangeKernel( + kernel, cl::NullRange, cl::NDRange(result.getSize()), cl::NullRange, + all(result.event_), &result.event_); + return result; } Tensor operator-() const override { + Tensor result = *this; cl::Kernel kernel = createKernel(Kernels::Method::NEGATIVE); - kernel.setArg(0, *data_); - kernel.setArg(1, (int)getSize()); - openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, - cl::NDRange(getSize()), - cl::NullRange, all(event_), &event_); - return *this; + kernel.setArg(0, *result.getData()); + kernel.setArg(1, (int)result.getSize()); + openCL.getQueue().enqueueNDRangeKernel( + kernel, cl::NullRange, cl::NDRange(result.getSize()), cl::NullRange, + all(result.event_), &result.event_); + return result; } Tensor &operator+=(const T scalar) override { cl::Kernel kernel = createKernel(Kernels::Method::S_ADD); kernel.setArg(0, *data_); - kernel.setArg(1, scalar); - kernel.setArg(2, (int)getSize()); + kernel.setArg(1, (int)getSize()); + kernel.setArg(2, scalar); openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, all(event_), &event_); @@ -157,8 +163,8 @@ public: Tensor &operator*=(const T scalar) override { cl::Kernel kernel = createKernel(Kernels::Method::S_MULT); kernel.setArg(0, *data_); - kernel.setArg(1, scalar); - kernel.setArg(2, (int)getSize()); + kernel.setArg(1, (int)getSize()); + kernel.setArg(2, scalar); openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, all(event_), &event_); @@ -166,6 +172,7 @@ public: } Tensor &operator+=(const Tensor &other) override { + checkItHasSameShape(other); cl::Kernel kernel = createKernel(Kernels::Method::T_ADD); kernel.setArg(0, *data_); kernel.setArg(1, *other.getData()); @@ -177,18 +184,17 @@ public: } Tensor &operator*=(const Tensor &other) override { + checkItHasSameShape(other); cl::Kernel kernel = createKernel(Kernels::Method::T_HADAMARD); kernel.setArg(0, *data_); kernel.setArg(1, *other.getData()); - kernel.setArg(2, getSize()); + kernel.setArg(2, (int)getSize()); openCL.getQueue().enqueueNDRangeKernel( kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, all(event_, other.event_), &event_); return *this; } -#define TILE_SIZE 16 -#define VEC_SIZE 4 Tensor operator%(const Tensor &other) const { static_assert(Dim == 1 || Dim == 2, "Inner product is only defined for vectors and matrices"); @@ -209,17 +215,28 @@ public: kernel.setArg(3, (int)m); kernel.setArg(4, (int)n); kernel.setArg(5, (int)k); - cl::NDRange global_size(m / VEC_SIZE, n); - cl::NDRange local_size(TILE_SIZE / VEC_SIZE, TILE_SIZE); + cl::NDRange globalSize(m, n); openCL.getQueue().enqueueNDRangeKernel( - kernel, cl::NullRange, global_size, local_size, + kernel, cl::NullRange, globalSize, cl::NullRange, all(event_, other.event_), &result.event_); return result; } } + Tensor apply(Function f, bool derivative = false) const override { + Tensor result = *this; + cl::Kernel kernel = createKernel(Kernels::Method::FUNC); + kernel.setArg(0, *result.getData()); + kernel.setArg(1, (int)f); + kernel.setArg(2, (int)derivative); + openCL.getQueue().enqueueNDRangeKernel( + kernel, cl::NullRange, cl::NDRange(result.getSize()), cl::NullRange, + all(result.event_), &result.event_); + return result; + }; + std::string toString() const override { - std::vector result(getSize()); + std::vector result(getSize()); openCL.getQueue().enqueueReadBuffer(*data_, CL_FALSE, 0, getSize() * sizeof(T), result.data(), all(event_), &event_); diff --git a/src/tensor/pybind.cpp b/src/tensor/pybind.cpp index 523a081..bae76ef 100644 --- a/src/tensor/pybind.cpp +++ b/src/tensor/pybind.cpp @@ -4,6 +4,7 @@ #ifdef USE_OPENCL #include "opencl/tensor.hpp" +#include OpenCL openCL; #elif USE_CPU #include "cpu/tensor.hpp" @@ -15,40 +16,48 @@ enum class TENSOR_PLATFORM { CPU, OPENCL }; template void register_tensor(py::module &m, const std::string &name) { - auto tensor = py::class_>(m, name.c_str()) - .def(py::init &>()) - .def(py::init &, T>()) - .def(py::init &, - const std::vector &>()) - .def(py::init &, T, T>()) + auto tensor = + py::class_>(m, name.c_str()) + .def(py::init &>()) + .def(py::init &, T>()) + .def(py::init &, + const std::vector &>()) + .def(py::init &, T, T>()) - .def("get_shape", &Tensor::getShape) - .def("get_axes", &Tensor::getAxes) - .def("get_size", &Tensor::getSize) + .def("get_shape", &Tensor::getShape) + .def("get_axes", &Tensor::getAxes) + .def("get_size", &Tensor::getSize) - .def(py::self + py::self) - .def(py::self - py::self) - .def(py::self * py::self) - .def(py::self += py::self) - .def(py::self -= py::self) - .def(py::self *= py::self) + .def(py::self + py::self) + .def(py::self - py::self) + .def(py::self * py::self) + .def(py::self += py::self) + .def(py::self -= py::self) + .def(py::self *= py::self) - .def(py::self + T()) - .def(py::self - T()) - .def(py::self * T()) - .def(py::self / T()) - .def(py::self += T()) - .def(py::self -= T()) - .def(py::self *= T()) - .def(py::self /= T()) - .def(T() + py::self) - .def(T() - py::self) - .def(T() * py::self) + .def(py::self + T()) + .def(py::self - T()) + .def(py::self * T()) + .def(py::self / T()) + .def(py::self += T()) + .def(py::self -= T()) + .def(py::self *= T()) + .def(py::self /= T()) + .def(T() + py::self) + .def(T() - py::self) + .def(T() * py::self) - .def("__pos__", [](const Tensor &t) { return +t; }) - .def("__neg__", [](const Tensor &t) { return -t; }) + .def("__pos__", [](const Tensor &t) { return +t; }) + .def("__neg__", [](const Tensor &t) { return -t; }) - .def("__repr__", &Tensor::toString); + .def("__call__", [](const Tensor &self, + Function f) { return self.apply(f); }) + .def("__call__", + [](const Tensor &self, Function f, bool derivative) { + return self.apply(f, derivative); + }) + + .def("__repr__", &Tensor::toString); if constexpr (Dim >= 2) { tensor @@ -101,7 +110,6 @@ void register_tensor(py::module &m, const std::string &name) { }); #endif - // if constexpr (Dim == 1 || Dim == 2) if constexpr (Dim == 2) tensor.def("__matmul__", &Tensor::operator%); } @@ -114,18 +122,28 @@ PYBIND11_MODULE(tensor, m) { .value("OPENCL", TENSOR_PLATFORM::OPENCL) .export_values(); + py::enum_(m, "FUNCTION") + .value("SIGMOID", Function::SIGMOID) + .value("RELU", Function::RELU) + .value("MSE", Function::MSE) + .value("LINEAR", Function::LINEAR) + .export_values(); + #ifdef USE_OPENCL m.attr("MODE") = TENSOR_PLATFORM::OPENCL; #elif USE_CPU m.attr("MODE") = TENSOR_PLATFORM::CPU; #endif +#ifdef USE_OPENCL + m.def("init", []() { openCL.init(); }); +#endif + register_tensor(m, "Scalar"); register_tensor(m, "Vector"); register_tensor(m, "Matrix"); register_tensor(m, "Tensor3"); -#ifndef USE_OPENCL register_tensor(m, "dScalar"); register_tensor(m, "dVector"); register_tensor(m, "dMatrix"); @@ -135,5 +153,11 @@ PYBIND11_MODULE(tensor, m) { register_tensor(m, "iVector"); register_tensor(m, "iMatrix"); register_tensor(m, "iTensor3"); + +#ifdef USE_OPENCL + register_tensor(m, "hScalar"); + register_tensor(m, "hVector"); + register_tensor(m, "hMatrix"); + register_tensor(m, "hTensor3"); #endif } diff --git a/src/tensor/tensor.hpp b/src/tensor/tensor.hpp index 9842417..29b96d4 100644 --- a/src/tensor/tensor.hpp +++ b/src/tensor/tensor.hpp @@ -6,6 +6,7 @@ #include template class Tensor; +enum class Function { SIGMOID, RELU, MSE, LINEAR }; template class ITensor { protected: @@ -73,6 +74,8 @@ public: Tensor operator*(const Tensor &other) const; + virtual Tensor apply(Function f, bool derivative = false) const = 0; + // === Utils === virtual std::string toString() const = 0; };