From f728261354fcf47be177dde12632bbd0601628cd Mon Sep 17 00:00:00 2001 From: StepanovPlaton Date: Thu, 30 Oct 2025 23:26:53 +0400 Subject: [PATCH] First NN forward --- src/Makefile | 14 +- src/benchmark.cpp | 136 +++++++++++++++++++ src/main.cpp | 179 ++++++++----------------- src/math/matrix/gpu/matrix.cpp | 28 +++- src/math/matrix/gpu/matrix.hpp | 9 +- src/math/matrix/gpu/mutable_matrix.cpp | 7 + src/math/matrix/gpu/mutable_matrix.hpp | 6 + src/math/matrix/matrix.hpp | 1 + src/math/matrix/mutable_matrix.hpp | 2 +- src/math/opencl/opencl.hpp | 2 +- 10 files changed, 254 insertions(+), 130 deletions(-) create mode 100644 src/benchmark.cpp diff --git a/src/Makefile b/src/Makefile index 3c13673..8d6b1b3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -2,13 +2,18 @@ CXX = g++ CXXFLAGS = -Wall -Wextra -O2 -std=c++23 LIBS = -lOpenCL TARGET = main -SRC = main.cpp ./math/opencl/opencl.cpp ./math/matrix/cpu/matrix.cpp ./math/matrix/cpu/mutable_matrix.cpp ./math/matrix/gpu/matrix.cpp ./math/matrix/gpu/mutable_matrix.cpp +COMMON_SRC = ./math/opencl/opencl.cpp ./math/matrix/cpu/matrix.cpp ./math/matrix/cpu/mutable_matrix.cpp ./math/matrix/gpu/matrix.cpp ./math/matrix/gpu/mutable_matrix.cpp +MAIN_SRC = main.cpp $(COMMON_SRC) +BENCHMARK_SRC = benchmark.cpp $(COMMON_SRC) INCLUDES = -I"A:/Programs/OpenCL/include" LIB_PATH = -L"A:/Programs/OpenCL/lib" -$(TARGET): $(SRC) - $(CXX) $(CXXFLAGS) $(INCLUDES) $(LIB_PATH) -o $(TARGET) $(SRC) $(LIBS) +$(TARGET): $(MAIN_SRC) + $(CXX) $(CXXFLAGS) $(INCLUDES) $(LIB_PATH) -o $(TARGET) $(MAIN_SRC) $(LIBS) + +benchmark: $(BENCHMARK_SRC) + $(CXX) $(CXXFLAGS) $(INCLUDES) $(LIB_PATH) -o $(TARGET) $(BENCHMARK_SRC) $(LIBS) clean: rm -f $(TARGET) @@ -16,4 +21,7 @@ clean: run: $(TARGET) ./$(TARGET) +run_benchmark: benchmark + ./$(TARGET) + .PHONY: clean run \ No newline at end of file diff --git a/src/benchmark.cpp b/src/benchmark.cpp new file mode 100644 index 0000000..d3e1492 --- /dev/null +++ b/src/benchmark.cpp @@ -0,0 +1,136 @@ +#include +#include +#include +#include +#include + +#include "./math/math.hpp" + +typedef Matrices::CPU Matrix; +typedef MutableMatrices::CPU MutableMatrix; + +OpenCL openCL; + +std::vector generateRandomMatrix(int rows, int cols) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(-1.0f, 1.0f); + + std::vector matrix(rows * cols); + for (int i = 0; i < rows * cols; ++i) { + matrix[i] = dis(gen); + } + return matrix; +} +std::vector generateIdentityMatrix(int size) { + std::vector matrix(size * size, 0.0f); + for (int i = 0; i < size; ++i) { + matrix[i * size + i] = 1.0f; + } + return matrix; +} + +int main() { + const int SIZE = 1024; + + std::cout << "Testing with " << SIZE << "x" << SIZE << " matrices..." + << std::endl; + + std::vector matrixA = generateRandomMatrix(SIZE, SIZE); + std::vector matrixB = generateRandomMatrix(SIZE, SIZE); + std::vector matrixC = generateRandomMatrix(SIZE, SIZE); + + // std::vector matrixA = generateIdentityMatrix(SIZE); + // std::vector matrixB = generateIdentityMatrix(SIZE); + // std::vector matrixC = generateIdentityMatrix(SIZE); + + // Тестирование на CPU + { + std::cout << "\n=== CPU Version ===" << std::endl; + + auto start = std::chrono::high_resolution_clock::now(); + + MutableMatrices::CPU a(SIZE, SIZE, matrixA); + Matrices::CPU b(SIZE, SIZE, matrixB); + Matrices::CPU c(SIZE, SIZE, matrixC); + + auto gen_end = std::chrono::high_resolution_clock::now(); + + auto op_start = std::chrono::high_resolution_clock::now(); + + for (int i = 0; i < 10; i++) { + a.mult(b, 0.2f, MutableMatrices::CPU::Activate::SIGMOID); + } + + auto op_end = std::chrono::high_resolution_clock::now(); + + std::vector v = a.toVector(); + + auto total_end = std::chrono::high_resolution_clock::now(); + + auto gen_duration = + std::chrono::duration_cast(gen_end - start); + auto op_duration = std::chrono::duration_cast( + op_end - op_start); + auto total_duration = std::chrono::duration_cast( + total_end - start); + + std::cout << "Matrix generation time: " << gen_duration.count() << " ms" + << std::endl; + std::cout << "Operations time: " << op_duration.count() << " ms" + << std::endl; + std::cout << "Total time: " << total_duration.count() << " ms" << std::endl; + + std::cout << "First few elements: "; + for (size_t i = 0; i < 5 && i < v.size(); ++i) { + std::cout << v[i] << " "; + } + std::cout << std::endl; + } + + // Тестирование на GPU + { + std::cout << "\n=== GPU Version ===" << std::endl; + + auto start = std::chrono::high_resolution_clock::now(); + + MutableMatrices::GPU a(SIZE, SIZE, matrixA); + Matrices::GPU b(SIZE, SIZE, matrixB); + Matrices::GPU c(SIZE, SIZE, matrixC); + + auto gen_end = std::chrono::high_resolution_clock::now(); + + auto op_start = std::chrono::high_resolution_clock::now(); + + for (int i = 0; i < 10; i++) { + a.mult(b, 0.2f, MutableMatrices::GPU::Activate::SIGMOID, 0.0f); + } + + auto op_end = std::chrono::high_resolution_clock::now(); + + std::vector v = a.toVector(); + + auto total_end = std::chrono::high_resolution_clock::now(); + + auto gen_duration = + std::chrono::duration_cast(gen_end - start); + auto op_duration = std::chrono::duration_cast( + op_end - op_start); + auto total_duration = std::chrono::duration_cast( + total_end - start); + + std::cout << "Matrix generation time: " << gen_duration.count() << " ms" + << std::endl; + std::cout << "Operations time: " << op_duration.count() << " ms" + << std::endl; + std::cout << "Total time: " << total_duration.count() << " ms" << std::endl; + + std::cout << "First few elements: "; + for (size_t i = 0; i < 5 && i < v.size(); ++i) { + std::cout << v[i] << " "; + } + std::cout << std::endl; + } + + return 0; +} \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index d3e1492..cb9731c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,133 +1,72 @@ -#include -#include -#include -#include -#include - #include "./math/math.hpp" -typedef Matrices::CPU Matrix; -typedef MutableMatrices::CPU MutableMatrix; +#include +#include + +typedef Matrices::GPU M; +typedef MutableMatrices::GPU MM; + +class Layer { +protected: + int features; + float bias; + MM::Activate activate; + float alpha; + +public: + Layer(int features, MM::Activate activate = MM::Activate::LINEAR, + float bias = 0.0f, float alpha = 0.0f) + : features(features), activate(activate), bias(bias), alpha(alpha) {} + + int getFeatures() const { return features; } + float getBias() const { return bias; } + MM::Activate getActivate() const { return activate; } + float getAlpha() const { return alpha; } +}; + +class NeuralNetwork { +private: + std::vector layers; + std::vector weights; + +public: + NeuralNetwork(int n, std::initializer_list l) : layers(l) { + weights.emplace_back(n, layers[0].getFeatures()); + for (int i = 0; i < layers.size() - 1; i++) + weights.emplace_back(layers[i].getFeatures(), + layers[i + 1].getFeatures()); + } + + std::vector predict(std::vector i) { + if (i.size() != weights[0].getRows()) + std::invalid_argument("Invalid input size"); + MM input(1, (int)i.size(), i); + for (size_t i = 0; i < weights.size(); i++) + input.mult(weights[i], layers[i + 1].getBias(), + layers[i + 1].getActivate(), layers[i + 1].getAlpha()); + return input.toVector(); + } +}; OpenCL openCL; -std::vector generateRandomMatrix(int rows, int cols) { - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(-1.0f, 1.0f); - - std::vector matrix(rows * cols); - for (int i = 0; i < rows * cols; ++i) { - matrix[i] = dis(gen); - } - return matrix; -} -std::vector generateIdentityMatrix(int size) { - std::vector matrix(size * size, 0.0f); - for (int i = 0; i < size; ++i) { - matrix[i * size + i] = 1.0f; - } - return matrix; -} - int main() { - const int SIZE = 1024; + NeuralNetwork nn( + 2, {Layer(3, MM::Activate::RELU), Layer(1, MM::Activate::RELU)}); - std::cout << "Testing with " << SIZE << "x" << SIZE << " matrices..." - << std::endl; + for (int i = 0; i < 10; i++) { + int v1 = (i / 2) % 2; + int v2 = i % 2; - std::vector matrixA = generateRandomMatrix(SIZE, SIZE); - std::vector matrixB = generateRandomMatrix(SIZE, SIZE); - std::vector matrixC = generateRandomMatrix(SIZE, SIZE); + std::vector v = {static_cast(v1), static_cast(v2)}; - // std::vector matrixA = generateIdentityMatrix(SIZE); - // std::vector matrixB = generateIdentityMatrix(SIZE); - // std::vector matrixC = generateIdentityMatrix(SIZE); + std::vector r = nn.predict(v); + float expected = static_cast(v1 ^ v2); - // Тестирование на CPU - { - std::cout << "\n=== CPU Version ===" << std::endl; - - auto start = std::chrono::high_resolution_clock::now(); - - MutableMatrices::CPU a(SIZE, SIZE, matrixA); - Matrices::CPU b(SIZE, SIZE, matrixB); - Matrices::CPU c(SIZE, SIZE, matrixC); - - auto gen_end = std::chrono::high_resolution_clock::now(); - - auto op_start = std::chrono::high_resolution_clock::now(); - - for (int i = 0; i < 10; i++) { - a.mult(b, 0.2f, MutableMatrices::CPU::Activate::SIGMOID); - } - - auto op_end = std::chrono::high_resolution_clock::now(); - - std::vector v = a.toVector(); - - auto total_end = std::chrono::high_resolution_clock::now(); - - auto gen_duration = - std::chrono::duration_cast(gen_end - start); - auto op_duration = std::chrono::duration_cast( - op_end - op_start); - auto total_duration = std::chrono::duration_cast( - total_end - start); - - std::cout << "Matrix generation time: " << gen_duration.count() << " ms" - << std::endl; - std::cout << "Operations time: " << op_duration.count() << " ms" - << std::endl; - std::cout << "Total time: " << total_duration.count() << " ms" << std::endl; - - std::cout << "First few elements: "; - for (size_t i = 0; i < 5 && i < v.size(); ++i) { - std::cout << v[i] << " "; - } - std::cout << std::endl; - } - - // Тестирование на GPU - { - std::cout << "\n=== GPU Version ===" << std::endl; - - auto start = std::chrono::high_resolution_clock::now(); - - MutableMatrices::GPU a(SIZE, SIZE, matrixA); - Matrices::GPU b(SIZE, SIZE, matrixB); - Matrices::GPU c(SIZE, SIZE, matrixC); - - auto gen_end = std::chrono::high_resolution_clock::now(); - - auto op_start = std::chrono::high_resolution_clock::now(); - - for (int i = 0; i < 10; i++) { - a.mult(b, 0.2f, MutableMatrices::GPU::Activate::SIGMOID, 0.0f); - } - - auto op_end = std::chrono::high_resolution_clock::now(); - - std::vector v = a.toVector(); - - auto total_end = std::chrono::high_resolution_clock::now(); - - auto gen_duration = - std::chrono::duration_cast(gen_end - start); - auto op_duration = std::chrono::duration_cast( - op_end - op_start); - auto total_duration = std::chrono::duration_cast( - total_end - start); - - std::cout << "Matrix generation time: " << gen_duration.count() << " ms" - << std::endl; - std::cout << "Operations time: " << op_duration.count() << " ms" - << std::endl; - std::cout << "Total time: " << total_duration.count() << " ms" << std::endl; - - std::cout << "First few elements: "; - for (size_t i = 0; i < 5 && i < v.size(); ++i) { - std::cout << v[i] << " "; + std::cout << "XOR(" << v1 << ", " << v2 << ") = " << expected; + std::cout << " | Network: "; + for (size_t j = 0; j < r.size(); ++j) { + std::cout << r[j] << " "; } std::cout << std::endl; } diff --git a/src/math/matrix/gpu/matrix.cpp b/src/math/matrix/gpu/matrix.cpp index 3b46ac3..116b94f 100644 --- a/src/math/matrix/gpu/matrix.cpp +++ b/src/math/matrix/gpu/matrix.cpp @@ -1,15 +1,35 @@ +#include + #include "matrix.hpp" +std::random_device rd; +std::mt19937 gen(rd()); + +Matrices::GPU::GPU(int rows, int cols) + : IMatrix(rows, cols), queue(openCL.getContext(), openCL.getDevice()) { + validateDimensions(rows, cols); + std::vector matrix; + matrix.reserve(rows * cols); + for (size_t i = 0; i < (size_t)rows * (size_t)cols; ++i) + matrix.push_back(std::generate_canonical(gen)); + buffer = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE, + rows * cols * sizeof(float)); + queue.enqueueWriteBuffer(*buffer, CL_TRUE, 0, rows * cols * sizeof(float), + matrix.data()); + queue.finish(); +} + Matrices::GPU::GPU(int rows, int cols, const std::vector &matrix) : IMatrix(rows, cols), queue(openCL.getContext(), openCL.getDevice()) { validateDimensions(rows, cols); if (matrix.size() != static_cast(rows * cols)) { throw std::invalid_argument("Matrix data size doesn't match dimensions"); } - - buffer = new cl::Buffer( - openCL.getContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - rows * cols * sizeof(float), const_cast(matrix.data())); + buffer = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE, + rows * cols * sizeof(float)); + queue.enqueueWriteBuffer(*buffer, CL_TRUE, 0, rows * cols * sizeof(float), + matrix.data()); + queue.finish(); } const std::vector Matrices::GPU::toVector() const { diff --git a/src/math/matrix/gpu/matrix.hpp b/src/math/matrix/gpu/matrix.hpp index c6b7fae..1c4b244 100644 --- a/src/math/matrix/gpu/matrix.hpp +++ b/src/math/matrix/gpu/matrix.hpp @@ -11,12 +11,19 @@ protected: cl::CommandQueue queue; public: + GPU(int rows, int cols); GPU(int rows, int cols, const std::vector &matrix); ~GPU() { delete buffer; } GPU(const GPU &) = delete; GPU &operator=(const GPU &) = delete; - GPU(GPU &&other) = default; + GPU(GPU &&other) + : IMatrix(other.rows, other.cols), buffer(other.buffer), + queue(std::move(other.queue)) { + other.buffer = nullptr; + other.rows = 0; + other.cols = 0; + } GPU &operator=(GPU &&other) = default; int getRows() const override { return rows; } diff --git a/src/math/matrix/gpu/mutable_matrix.cpp b/src/math/matrix/gpu/mutable_matrix.cpp index 1e09e0a..ebe2e03 100644 --- a/src/math/matrix/gpu/mutable_matrix.cpp +++ b/src/math/matrix/gpu/mutable_matrix.cpp @@ -1,5 +1,12 @@ #include "mutable_matrix.hpp" +MutableMatrices::GPU::GPU(int rows, int cols) : Matrices::GPU(rows, cols) { + for (const auto &entry : kernelsNames) { + kernels[entry.first] = + cl::Kernel(openCL.getProgram(OpenCL::Program::MATRIX), entry.second); + } +} + MutableMatrices::GPU::GPU(int rows, int cols, const std::vector &matrix) : Matrices::GPU(rows, cols, matrix) { for (const auto &entry : kernelsNames) { diff --git a/src/math/matrix/gpu/mutable_matrix.hpp b/src/math/matrix/gpu/mutable_matrix.hpp index 02df50f..8cd6f9d 100644 --- a/src/math/matrix/gpu/mutable_matrix.hpp +++ b/src/math/matrix/gpu/mutable_matrix.hpp @@ -27,8 +27,14 @@ private: } public: + GPU(int rows, int cols); GPU(int rows, int cols, const std::vector &matrix); + GPU(const GPU &) = delete; + GPU &operator=(const GPU &) = delete; + GPU(GPU &&other) = default; + GPU &operator=(GPU &&other) = default; + void mult(Matrices::GPU &m, float bias = 0.0f, Activate type = Activate::LINEAR, float alpha = 0.01f); void mult(float scalar); diff --git a/src/math/matrix/matrix.hpp b/src/math/matrix/matrix.hpp index fa8748b..339509a 100644 --- a/src/math/matrix/matrix.hpp +++ b/src/math/matrix/matrix.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include diff --git a/src/math/matrix/mutable_matrix.hpp b/src/math/matrix/mutable_matrix.hpp index 5f84236..72e3867 100644 --- a/src/math/matrix/mutable_matrix.hpp +++ b/src/math/matrix/mutable_matrix.hpp @@ -16,7 +16,7 @@ public: virtual void activate(Activate type, float alpha = 0.01f) = 0; void validateMultDimensions(T &a, T &b) const { - if (a.getRows() != b.getCols()) { + if (a.getCols() != b.getRows()) { throw std::invalid_argument( "Invalid matrix dimensions for multiplication"); } diff --git a/src/math/opencl/opencl.hpp b/src/math/opencl/opencl.hpp index 8b32101..5623655 100644 --- a/src/math/opencl/opencl.hpp +++ b/src/math/opencl/opencl.hpp @@ -13,7 +13,7 @@ class OpenCL { public: - enum class Program { MATRIX, MATH, IMAGE_PROCESSING }; + enum class Program { MATRIX }; private: cl::Device device;