From c548c3089b0a33e16d22cae70d5c19daba86867f Mon Sep 17 00:00:00 2001 From: StepanovPlaton Date: Sat, 1 Nov 2025 14:22:09 +0400 Subject: [PATCH] Forward with new tensors math --- src/Makefile | 2 +- src/kernels/matrix.cl | 8 +-- src/main.cpp | 75 +++++++++++++++++++++---- src/math/tensor/cpu/math.hpp | 11 +++- src/math/tensor/gpu/math.hpp | 32 ++++++----- src/math/tensor/gpu/tensor.hpp | 100 ++++++++++++++++++++++++++------- src/math/tensor/math.hpp | 12 +++- src/math/tensor/tensor.hpp | 20 +++---- 8 files changed, 192 insertions(+), 68 deletions(-) diff --git a/src/Makefile b/src/Makefile index dd89464..c4a81b0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,5 @@ CXX = g++ -CXXFLAGS = -Wall -Wextra -O2 -std=c++23 +CXXFLAGS = -Wall -Wextra -O1 -g -std=c++23 -fno-omit-frame-pointer LIBS = -lOpenCL TARGET = main COMMON_SRC = ./math/opencl/opencl.cpp diff --git a/src/kernels/matrix.cl b/src/kernels/matrix.cl index 95810b9..d8ca0ad 100644 --- a/src/kernels/matrix.cl +++ b/src/kernels/matrix.cl @@ -27,7 +27,7 @@ __kernel void activate(__global float *input, __global float *output, } __kernel void mult_small(__global float *A, __global float *B, - __global float *C, const float bias, + __global float *C, __global float *bias, const int activation_type, const float alpha, const int M, const int N, const int K, const int transpose_B) { @@ -49,7 +49,7 @@ __kernel void mult_small(__global float *A, __global float *B, sum += a_val * b_val; } - float result = sum + bias; + float result = sum + bias[col]; if (activation_type != 0) { result = activate_x(result, activation_type, alpha); } @@ -58,7 +58,7 @@ __kernel void mult_small(__global float *A, __global float *B, } __kernel void mult(__global float *A, __global float *B, __global float *C, - const float bias, const int activation_type, + __global float *bias, const int activation_type, const float alpha, const int M, const int N, const int K, const int transpose_B) { const int tile_size = 16; @@ -122,7 +122,7 @@ __kernel void mult(__global float *A, __global float *B, __global float *C, } if (global_i < M && global_j < N) { - float result = sum + bias; + float result = sum + bias[global_j]; if (activation_type != 0) { result = activate_x(result, activation_type, alpha); } diff --git a/src/main.cpp b/src/main.cpp index f3fa5ec..4b9ddff 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -5,27 +5,78 @@ using namespace GPU; +class Layer { +protected: + int inputFeatures; + int outputFeatures; + Vector bias; + Activation activation; + float alpha; + Matrix weights; + +public: + Layer(int inputFeatures, int outputFeatures, Activation activation, + Vector bias, float alpha = 0.0f) + : inputFeatures(inputFeatures), outputFeatures(outputFeatures), + bias(bias), activation(activation), alpha(alpha), + weights(outputFeatures, inputFeatures) {} + + int getInputFeatures() const { return inputFeatures; } + int getOuputFeatures() const { return outputFeatures; } + Activation getActivation() const { return activation; } + float getAlpha() const { return alpha; } + + const Vector &getBias() const { return bias; } + const Matrix &getWeights() const { return weights; } +}; + +class NeuralNetwork { +private: + std::vector layers; + +public: + NeuralNetwork(std::vector l) : layers(l) {} + + Matrix predict(Matrix inputs) { + MatrixMath mm; + std::vector steps; + steps.push_back(inputs); + for (size_t i = 0; i < layers.size(); i++) + steps.push_back(mm.mult(steps[steps.size() - 1], layers[i].getWeights(), + true, &layers[i].getBias(), + layers[i].getActivation(), layers[i].getAlpha())); + mm.await(); + return steps[steps.size() - 1]; + } + + const Layer &getLayer(int i) const { return layers[i]; } +}; + OpenCL openCL; int main() { - MatrixMath mm; + NeuralNetwork nn( + {Layer(2, 1, Activation::SIGMOID, Vector(std::vector{1.0f}))}); - Matrix a(2, 2); - Matrix b(2, 2); + for (int i = 0; i < 10; i++) { + int v1 = (i / 2) % 2; + int v2 = i % 2; - CPU::Matrix a_(2, 2, a.toVector()); - CPU::Matrix b_(2, 2, b.toVector()); + Matrix input(1, 2, {static_cast(v1), static_cast(v2)}); - a_.print(); - b_.print(); + Matrix r = nn.predict(input); + std::vector rv = r.toVector(); - Matrix c = mm.add(a, b); + std::cout << "Network: "; + for (size_t j = 0; j < rv.size(); ++j) { + printf("%f\t", rv[j]); + } - CPU::Matrix c_(2, 2, c.toVector(&mm.getQueue())); + float expected = static_cast(v1 ^ v2); + std::cout << " | XOR(" << v1 << ", " << v2 << ") = " << expected; - mm.await(); - - c_.print(); + std::cout << std::endl; + } return 0; } \ No newline at end of file diff --git a/src/math/tensor/cpu/math.hpp b/src/math/tensor/cpu/math.hpp index d2c1b3f..745c24a 100644 --- a/src/math/tensor/cpu/math.hpp +++ b/src/math/tensor/cpu/math.hpp @@ -75,19 +75,24 @@ class Tensor0Math : public TensorMath, public ITensor0Math {}; class Tensor1Math : public TensorMath, public ITensor1Math {}; -class Tensor2Math : public TensorMath, public ITensor2Math { +class Tensor2Math : public TensorMath, + public ITensor2Math { public: Tensor2 mult(const Tensor2 &a, const Tensor2 &b, bool transpose = false, - float bias = 0.0f, Activation type = Activation::LINEAR, + const Vector *bias = nullptr, + Activation type = Activation::LINEAR, float alpha = 0.01f) override { validateMultDimensions(a, b, transpose); + if (bias != nullptr) + validateBiasDimensions(b, *bias, transpose); Tensor2 result(a.getRows(), b.getCols(), 0.0f); for (int i = 0; i < result.getRows(); ++i) { for (int j = 0; j < result.getCols(); ++j) { float sum = 0.0f; for (int k = 0; k < a.getCols(); ++k) sum += a(i, k) * (transpose ? b(j, k) : b(k, j)); - result(i, j) = activate_x(sum + bias, type, alpha); + result(i, j) = activate_x(sum + (bias == nullptr ? 0.0f : (*bias)(j)), + type, alpha); } } return result; diff --git a/src/math/tensor/gpu/math.hpp b/src/math/tensor/gpu/math.hpp index d783d3a..4d06f37 100644 --- a/src/math/tensor/gpu/math.hpp +++ b/src/math/tensor/gpu/math.hpp @@ -93,12 +93,11 @@ class Tensor0Math : public TensorMath, public ITensor0Math {}; class Tensor1Math : public TensorMath, public ITensor1Math {}; -class Tensor2Math : public TensorMath, public ITensor2Math { +class Tensor2Math : public TensorMath, + public ITensor2Math { private: - Tensor2 mult_tiled(const Tensor2 &a, const Tensor2 &b, bool transpose = false, - float bias = 0.0f, Activation type = Activation::LINEAR, - float alpha = 0.01f) { - validateMultDimensions(a, b, transpose); + Tensor2 mult_tiled(const Tensor2 &a, const Tensor2 &b, bool transpose, + const Vector &bias, Activation type, float alpha) { Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), false, &queue); @@ -111,7 +110,7 @@ private: kernels[Method::MULT].setArg(0, *a.getBuffer()); kernels[Method::MULT].setArg(1, *b.getBuffer()); kernels[Method::MULT].setArg(2, *result.getBuffer()); - kernels[Method::MULT].setArg(3, bias); + kernels[Method::MULT].setArg(3, *bias.getBuffer()); kernels[Method::MULT].setArg(4, static_cast(type)); kernels[Method::MULT].setArg(5, alpha); kernels[Method::MULT].setArg(6, result.getRows()); @@ -122,16 +121,14 @@ private: global_size, local_size); return result; } - Tensor2 mult_small(const Tensor2 &a, const Tensor2 &b, bool transpose = false, - float bias = 0.0f, Activation type = Activation::LINEAR, - float alpha = 0.01f) { - validateMultDimensions(a, b, transpose); + Tensor2 mult_small(const Tensor2 &a, const Tensor2 &b, bool transpose, + const Vector &bias, Activation type, float alpha) { Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), false, &queue); kernels[Method::MULT_SMALL].setArg(0, *a.getBuffer()); kernels[Method::MULT_SMALL].setArg(1, *b.getBuffer()); kernels[Method::MULT_SMALL].setArg(2, *result.getBuffer()); - kernels[Method::MULT_SMALL].setArg(3, bias); + kernels[Method::MULT_SMALL].setArg(3, *bias.getBuffer()); kernels[Method::MULT_SMALL].setArg(4, static_cast(type)); kernels[Method::MULT_SMALL].setArg(5, alpha); kernels[Method::MULT_SMALL].setArg(6, result.getRows()); @@ -145,13 +142,20 @@ private: public: Tensor2 mult(const Tensor2 &a, const Tensor2 &b, bool transpose = false, - float bias = 0.0f, Activation type = Activation::LINEAR, + const Vector *bias = nullptr, + Activation type = Activation::LINEAR, float alpha = 0.01f) override { + validateMultDimensions(a, b, transpose); + const Vector defaultBias(a.getRows(), 0.0f, &queue); + if (bias != nullptr) + validateBiasDimensions(b, *bias, transpose); if (a.getRows() > 64 || a.getCols() > 64 || b.getRows() > 64 || b.getCols() > 64) - return mult_tiled(a, b, transpose, bias, type, alpha); + return mult_tiled(a, b, transpose, bias == nullptr ? defaultBias : *bias, + type, alpha); else - return mult_small(a, b, transpose, bias, type, alpha); + return mult_small(a, b, transpose, bias == nullptr ? defaultBias : *bias, + type, alpha); } }; diff --git a/src/math/tensor/gpu/tensor.hpp b/src/math/tensor/gpu/tensor.hpp index 58fe563..df628ee 100644 --- a/src/math/tensor/gpu/tensor.hpp +++ b/src/math/tensor/gpu/tensor.hpp @@ -69,12 +69,41 @@ public: if (fill) createBuf(getShapeSize(shape), 0.0f, queue); } - Tensor(const Tensor &) = delete; - Tensor &operator=(const Tensor &) = delete; - Tensor(Tensor &&other) : ITensor(other.shape), buffer(other.buffer) { + + Tensor(const Tensor &other, const cl::CommandQueue *queue = nullptr) + : ITensor(other) { + cl::CommandQueue q = queue == nullptr ? openCL.getDefaultQueue() : *queue; + createBuf(other.getSize(), &q); + q.enqueueCopyBuffer(*other.buffer, *buffer, 0, 0, + other.getSize() * sizeof(float)); + }; + Tensor &operator=(const Tensor &other) { + if (buffer != nullptr) + delete buffer; + ITensor::operator=(other); + createBuf(other.getSize(), &openCL.getDefaultQueue()); + openCL.getDefaultQueue().enqueueCopyBuffer(*other.buffer, *buffer, 0, 0, + other.getSize() * sizeof(float)); + return *this; + }; + Tensor(Tensor &&other) : ITensor(other), buffer(other.buffer) { other.buffer = nullptr; }; - Tensor &operator=(Tensor &&other) = delete; + Tensor &operator=(Tensor &&other) { + if (this != &other) { + if (buffer != nullptr) + delete buffer; + ITensor::operator=(std::move(other)); + buffer = other.buffer; + other.buffer = nullptr; + } + return *this; + }; + + ~Tensor() { + if (buffer != nullptr) + delete buffer; + } std::vector toVector(const cl::CommandQueue *queue = nullptr) { size_t size = getShapeSize(shape); @@ -144,17 +173,25 @@ public: if (shape.size() != 0) throw std::invalid_argument("Tensor0 dimension must be 0"); } - Tensor0(const cl::CommandQueue *queue = nullptr) : Tensor({}, queue) { + Tensor0(const cl::CommandQueue *queue = nullptr) + : Tensor(std::vector{}, queue) { createBuf(1, queue); } Tensor0(float value, const cl::CommandQueue *queue = nullptr) - : Tensor({}, queue) { + : Tensor(std::vector{}, queue) { createBuf(1, value, queue); } - Tensor0(const Tensor0 &) = delete; - Tensor0 &operator=(const Tensor0 &) = delete; + Tensor0(const Tensor0 &other, const cl::CommandQueue *queue = nullptr) + : Tensor(other, queue) {}; + Tensor0 &operator=(const Tensor0 &other) { + Tensor::operator=(other); + return *this; + }; Tensor0(Tensor0 &&other) : Tensor(std::move(other)) {}; - Tensor0 &operator=(Tensor0 &&other) = delete; + Tensor0 &operator=(Tensor0 &&other) { + Tensor::operator=(std::move(other)); + return *this; + }; }; class Tensor1 : public Tensor, public ITensor1 { @@ -180,10 +217,17 @@ public: : Tensor({(int)values.size()}, false, queue) { fillBuf(values, queue); } - Tensor1(const Tensor1 &) = delete; - Tensor1 &operator=(const Tensor1 &) = delete; - Tensor1(Tensor1 &&other) : Tensor(std::move(other)) {} - Tensor1 &operator=(Tensor1 &&other) = delete; + Tensor1(const Tensor1 &other, const cl::CommandQueue *queue = nullptr) + : Tensor(other, queue) {}; + Tensor1 &operator=(const Tensor1 &other) { + Tensor::operator=(other); + return *this; + }; + Tensor1(Tensor1 &&other) : Tensor(std::move(other)) {}; + Tensor1 &operator=(Tensor1 &&other) { + Tensor::operator=(std::move(other)); + return *this; + }; int getSize() const override { return shape[0]; } }; @@ -223,10 +267,17 @@ public: fillBuf(v, queue); } - Tensor2(const Tensor2 &) = delete; - Tensor2 &operator=(const Tensor2 &) = delete; - Tensor2(Tensor2 &&other) : Tensor(std::move(other)) {} - Tensor2 &operator=(Tensor2 &&other) = delete; + Tensor2(const Tensor2 &other, const cl::CommandQueue *queue = nullptr) + : Tensor(other, queue) {}; + Tensor2 &operator=(const Tensor2 &other) { + Tensor::operator=(other); + return *this; + }; + Tensor2(Tensor2 &&other) : Tensor(std::move(other)) {}; + Tensor2 &operator=(Tensor2 &&other) { + Tensor::operator=(std::move(other)); + return *this; + }; int getRows() const override { return shape[0]; } int getCols() const override { return shape[1]; } @@ -269,10 +320,17 @@ public: } fillBuf(v, queue); } - Tensor3(const Tensor3 &) = delete; - Tensor3 &operator=(const Tensor3 &) = delete; - Tensor3(Tensor3 &&other) : Tensor(std::move(other)) {} - Tensor3 &operator=(Tensor3 &&other) = delete; + Tensor3(const Tensor3 &other, const cl::CommandQueue *queue = nullptr) + : Tensor(other, queue) {}; + Tensor3 &operator=(const Tensor3 &other) { + Tensor::operator=(other); + return *this; + }; + Tensor3(Tensor3 &&other) : Tensor(std::move(other)) {}; + Tensor3 &operator=(Tensor3 &&other) { + Tensor::operator=(std::move(other)); + return *this; + }; }; typedef Tensor0 Scalar; diff --git a/src/math/tensor/math.hpp b/src/math/tensor/math.hpp index 36794d9..40b768f 100644 --- a/src/math/tensor/math.hpp +++ b/src/math/tensor/math.hpp @@ -41,18 +41,24 @@ template class ITensor0Math {}; template class ITensor1Math {}; -template class ITensor2Math { +template class ITensor2Math { public: - virtual T mult(const T &a, const T &b, bool transpose, float bias, + virtual M mult(const M &a, const M &b, bool transpose, const V *bias, Activation type, float alpha) = 0; - void validateMultDimensions(const T &a, const T &b, bool transpose) const { + void validateMultDimensions(const M &a, const M &b, bool transpose) const { if ((!transpose && a.getCols() != b.getRows()) || (transpose && a.getCols() != b.getCols())) { throw std::invalid_argument( "Invalid matrix dimensions for multiplication"); } }; + void validateBiasDimensions(const M &a, const V &b, bool transpose) const { + if ((!transpose && a.getCols() != b.getSize()) || + (transpose && a.getRows() != b.getSize())) { + throw std::invalid_argument("Invalid matrix bias"); + } + }; }; template class ITensor3Math {}; \ No newline at end of file diff --git a/src/math/tensor/tensor.hpp b/src/math/tensor/tensor.hpp index 0ccc2dd..d200153 100644 --- a/src/math/tensor/tensor.hpp +++ b/src/math/tensor/tensor.hpp @@ -23,10 +23,16 @@ protected: public: ITensor(const std::vector &shape) : shape(shape) {} - ITensor(const ITensor &) = default; - ITensor &operator=(const ITensor &) = default; - ITensor(ITensor &&other) = default; - ITensor &operator=(ITensor &&other) = default; + ITensor(const ITensor &other) : shape(other.shape) {}; + ITensor &operator=(const ITensor &other) { + shape = other.shape; + return *this; + }; + ITensor(ITensor &&other) : shape(other.shape) {}; + ITensor &operator=(ITensor &&other) { + shape = other.shape; + return *this; + }; const std::vector &getShape() const { return shape; } int getDim() const { return static_cast(shape.size()); } @@ -50,12 +56,6 @@ public: class ITensor2 { public: - ITensor2() = default; - ITensor2(const ITensor2 &) = default; - ITensor2 &operator=(const ITensor2 &) = default; - ITensor2(ITensor2 &&other) = default; - ITensor2 &operator=(ITensor2 &&other) = default; - virtual int getRows() const = 0; virtual int getCols() const = 0; };