From bbafbf557401be46ced94a3ad2aea16f3c169383 Mon Sep 17 00:00:00 2001 From: StepanovPlaton Date: Sat, 8 Nov 2025 20:04:09 +0400 Subject: [PATCH] First XOR NN --- src/main.cpp | 618 +++++++++++++++++++-------------- src/math/tensor/cpu/math.hpp | 4 +- src/math/tensor/cpu/tensor.hpp | 47 ++- src/utils/output.h | 61 ++++ 4 files changed, 448 insertions(+), 282 deletions(-) create mode 100644 src/utils/output.h diff --git a/src/main.cpp b/src/main.cpp index 3fed565..b1d6351 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,275 +1,383 @@ -#define NOGPU +#include +#include +#include +#include +#include +#include -#include "math/math.hpp" - -#ifdef NOGPU -using namespace CPU; -#else -using namespace GPU; -#endif - -class Layer { -protected: - int outputFeatures; - Vector bias; - Activation activation; - float alpha; - -public: - Layer(int outputFeatures, Activation activation, float alpha = 0.0f) - : outputFeatures(outputFeatures), bias(outputFeatures), - activation(activation), alpha(alpha) {} - - int getOuputFeatures() const { return outputFeatures; } - Activation getActivation() const { return activation; } - float getAlpha() const { return alpha; } - - const Vector &getBias() const { return bias; } - void setBias(const Vector &b) { bias = b; } -}; - -class ConnectedLayer : public Layer { -protected: - int inputFeatures; - Matrix weights; - -public: - ConnectedLayer(int inputFeatures, const Layer &layer) - : Layer(layer), inputFeatures(inputFeatures), - weights(layer.getOuputFeatures(), inputFeatures) {} - ConnectedLayer(const Layer &a, const Layer &b) - : ConnectedLayer(a.getOuputFeatures(), b) {} - - int getInputFeatures() const { return inputFeatures; } - const Matrix &getWeights() const { return weights; } - void setWeights(const Matrix &w) { weights = w; } -}; - -class LearnLayer : public ConnectedLayer { -protected: - Matrix internal; - Matrix outputs; - -public: - LearnLayer(int inputFeatures, const Layer &layer) - : ConnectedLayer(inputFeatures, layer), - internal(layer.getOuputFeatures(), inputFeatures, false), - outputs(layer.getOuputFeatures(), inputFeatures, false) {} - LearnLayer(const Layer &a, const Layer &b) - : LearnLayer(a.getOuputFeatures(), b) {} - - const Matrix &getInternal() const { return internal; } - const Matrix &getOutputs() const { return outputs; } - void setInternal(const Matrix &i) { internal = i; } - void setOutputs(const Matrix &o) { outputs = o; } -}; - -class NeuralNetwork { -private: - std::vector layers; - -public: - NeuralNetwork(int inputFeatures, std::vector l) { - // employ back - layers.push_back(ConnectedLayer(inputFeatures, l[0])); - for (size_t i = 1; i < l.size(); i++) - layers.push_back(ConnectedLayer(l[i - 1].getOuputFeatures(), l[i])); +// Чтение файла в строку +std::string readFile(const char *filename) { + std::ifstream file(filename); + if (!file.is_open()) { + throw std::runtime_error(std::string("Failed to open file: ") + filename); } + return std::string((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); +} - Matrix predict(Matrix inputs) { - MatrixMath mm; - std::vector steps; - steps.push_back(inputs); - for (size_t i = 0; i < layers.size(); i++) - steps.push_back(mm.dot(steps[steps.size() - 1], layers[i].getWeights(), - false, true, &layers[i].getBias(), - layers[i].getActivation(), layers[i].getAlpha())); - mm.await(); - return steps[steps.size() - 1]; +// Получение ошибки OpenCL в виде строки +const char *getErrorString(cl_int error) { + switch (error) { + case CL_SUCCESS: + return "CL_SUCCESS"; + case CL_DEVICE_NOT_FOUND: + return "CL_DEVICE_NOT_FOUND"; + case CL_DEVICE_NOT_AVAILABLE: + return "CL_DEVICE_NOT_AVAILABLE"; + case CL_COMPILER_NOT_AVAILABLE: + return "CL_COMPILER_NOT_AVAILABLE"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + case CL_OUT_OF_RESOURCES: + return "CL_OUT_OF_RESOURCES"; + case CL_OUT_OF_HOST_MEMORY: + return "CL_OUT_OF_HOST_MEMORY"; + case CL_PROFILING_INFO_NOT_AVAILABLE: + return "CL_PROFILING_INFO_NOT_AVAILABLE"; + case CL_MEM_COPY_OVERLAP: + return "CL_MEM_COPY_OVERLAP"; + case CL_IMAGE_FORMAT_MISMATCH: + return "CL_IMAGE_FORMAT_MISMATCH"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; + case CL_BUILD_PROGRAM_FAILURE: + return "CL_BUILD_PROGRAM_FAILURE"; + case CL_MAP_FAILURE: + return "CL_MAP_FAILURE"; + case CL_INVALID_VALUE: + return "CL_INVALID_VALUE"; + case CL_INVALID_DEVICE_TYPE: + return "CL_INVALID_DEVICE_TYPE"; + case CL_INVALID_PLATFORM: + return "CL_INVALID_PLATFORM"; + case CL_INVALID_DEVICE: + return "CL_INVALID_DEVICE"; + case CL_INVALID_CONTEXT: + return "CL_INVALID_CONTEXT"; + case CL_INVALID_QUEUE_PROPERTIES: + return "CL_INVALID_QUEUE_PROPERTIES"; + case CL_INVALID_COMMAND_QUEUE: + return "CL_INVALID_COMMAND_QUEUE"; + case CL_INVALID_HOST_PTR: + return "CL_INVALID_HOST_PTR"; + case CL_INVALID_MEM_OBJECT: + return "CL_INVALID_MEM_OBJECT"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; + case CL_INVALID_IMAGE_SIZE: + return "CL_INVALID_IMAGE_SIZE"; + case CL_INVALID_SAMPLER: + return "CL_INVALID_SAMPLER"; + case CL_INVALID_BINARY: + return "CL_INVALID_BINARY"; + case CL_INVALID_BUILD_OPTIONS: + return "CL_INVALID_BUILD_OPTIONS"; + case CL_INVALID_PROGRAM: + return "CL_INVALID_PROGRAM"; + case CL_INVALID_PROGRAM_EXECUTABLE: + return "CL_INVALID_PROGRAM_EXECUTABLE"; + case CL_INVALID_KERNEL_NAME: + return "CL_INVALID_KERNEL_NAME"; + case CL_INVALID_KERNEL_DEFINITION: + return "CL_INVALID_KERNEL_DEFINITION"; + case CL_INVALID_KERNEL: + return "CL_INVALID_KERNEL"; + case CL_INVALID_ARG_INDEX: + return "CL_INVALID_ARG_INDEX"; + case CL_INVALID_ARG_VALUE: + return "CL_INVALID_ARG_VALUE"; + case CL_INVALID_ARG_SIZE: + return "CL_INVALID_ARG_SIZE"; + case CL_INVALID_KERNEL_ARGS: + return "CL_INVALID_KERNEL_ARGS"; + case CL_INVALID_WORK_DIMENSION: + return "CL_INVALID_WORK_DIMENSION"; + case CL_INVALID_WORK_GROUP_SIZE: + return "CL_INVALID_WORK_GROUP_SIZE"; + case CL_INVALID_WORK_ITEM_SIZE: + return "CL_INVALID_WORK_ITEM_SIZE"; + case CL_INVALID_GLOBAL_OFFSET: + return "CL_INVALID_GLOBAL_OFFSET"; + case CL_INVALID_EVENT_WAIT_LIST: + return "CL_INVALID_EVENT_WAIT_LIST"; + case CL_INVALID_EVENT: + return "CL_INVALID_EVENT"; + case CL_INVALID_OPERATION: + return "CL_INVALID_OPERATION"; + case CL_INVALID_GL_OBJECT: + return "CL_INVALID_GL_OBJECT"; + case CL_INVALID_BUFFER_SIZE: + return "CL_INVALID_BUFFER_SIZE"; + case CL_INVALID_MIP_LEVEL: + return "CL_INVALID_MIP_LEVEL"; + case CL_INVALID_GLOBAL_WORK_SIZE: + return "CL_INVALID_GLOBAL_WORK_SIZE"; + default: + return "Unknown OpenCL error"; } +} - const ConnectedLayer &getLayer(int i) const { return layers[i]; } -}; - -class LearnNerualNetrowk { -private: - std::vector layers; - -public: - LearnNerualNetrowk(int inputFeatures, std::vector l) { - // employ back - layers.push_back(LearnLayer(inputFeatures, l[0])); - for (size_t i = 1; i < l.size(); i++) - layers.push_back(LearnLayer(l[i - 1], l[i])); +// Проверка ошибок OpenCL +void checkError(cl_int err, const char *operation) { + if (err != CL_SUCCESS) { + std::cerr << "Error during " << operation << ": " << getErrorString(err) + << " (" << err << ")" << std::endl; + exit(1); } +} - Matrix learn(Matrix inputs, Matrix target, float speed = 1.0f) { - MatrixMath mm; - VectorMath vm; - for (size_t i = 0; i < layers.size(); i++) { - layers[i].setInternal(mm.dot(layers[i].getWeights(), - i == 0 ? inputs : layers[i - 1].getOutputs(), - false, false, &layers[i].getBias())); - layers[i].setOutputs(mm.activate(layers[i].getInternal(), - layers[i].getActivation(), - layers[i].getAlpha())); +// Код ядра для матричного умножения с тайлингом +const char *kernelSource = R"( +__kernel void matmul_tiled(__global const float* A, + __global const float* B, + __global float* C, + const int N, + const int TILE_SIZE) { + + int row = get_global_id(1); + int col = get_global_id(0); + + __local float tileA[16][16]; + __local float tileB[16][16]; + + float sum = 0.0f; + + int numTiles = (N + TILE_SIZE - 1) / TILE_SIZE; + + for (int t = 0; t < numTiles; t++) { + // Загрузка тайлов в локальную память + int tileRow = get_local_id(1); + int tileCol = get_local_id(0); + + int loadRow = row; + int loadCol = t * TILE_SIZE + tileCol; + if (loadRow < N && loadCol < N) { + tileA[tileRow][tileCol] = A[loadRow * N + loadCol]; + } else { + tileA[tileRow][tileCol] = 0.0f; + } + + loadRow = t * TILE_SIZE + tileRow; + loadCol = col; + if (loadRow < N && loadCol < N) { + tileB[tileRow][tileCol] = B[loadRow * N + loadCol]; + } else { + tileB[tileRow][tileCol] = 0.0f; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // Вычисление частичной суммы + for (int k = 0; k < TILE_SIZE; k++) { + sum += tileA[tileRow][k] * tileB[k][tileCol]; + } + + barrier(CLK_LOCAL_MEM_FENCE); } - mm.await(); - - std::vector io = inputs.toVector(); - std::cout << "I: "; - for (size_t i = 0; i < io.size(); ++i) - printf("%4.2f ", io[i]); - - std::vector ni = layers[layers.size() - 1].getInternal().toVector(); - std::cout << "| NNI: "; - for (size_t i = 0; i < ni.size(); ++i) - printf("%4.2f ", ni[i]); - - std::vector no = layers[layers.size() - 1].getOutputs().toVector(); - std::cout << "| NNO: "; - for (size_t i = 0; i < no.size(); ++i) - printf("%4.2f ", no[i]); - - std::vector to = target.toVector(); - std::cout << "| T: "; - for (size_t i = 0; i < to.size(); ++i) - printf("%4.2f ", to[i]); - - Matrix mse = - mm.loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE); - - std::vector lo = mse.toVector(); - std::cout << "| L: "; - for (size_t i = 0; i < lo.size(); ++i) - printf("%5.3f ", lo[i]); - std::cout << std::endl; - - Matrix dAnl = - mm.d_loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE); - - for (int i = layers.size() - 1; i >= 0; --i) { - printf("=== Layer %d ===\n", i + 1); - printf("dAnl: "); - dAnl.print(); - - Matrix dZl = mm.mult(dAnl, mm.d_activate(layers[i].getInternal())); - printf("dZl: "); - dZl.print(); - - Matrix dWl = - mm.mult(mm.dot(dZl, i == 0 ? inputs : layers[i - 1].getOutputs(), - false, true), - 1.0f / (float)inputs.getRows()); - printf("dWl: "); - dWl.print(); - - Vector dbl = mm.axis_sum(mm.mult(dZl, 1.0f / (float)inputs.getRows())); - printf("dbl: "); - dbl.print(); - - dAnl = mm.dot(layers[i].getWeights(), dZl, true); // false true?! - - mm.await(); - - layers[i].setWeights(mm.add(layers[i].getWeights(), dWl, -speed)); - printf("Weights %d: ", i + 1); - layers[i].getWeights().print(); - - layers[i].setBias( - vm.add(layers[i].getBias(), dbl, -speed / (float)inputs.getRows())); - printf("Bias %d: ", i + 1); - layers[i].getBias().print(); + + if (row < N && col < N) { + C[row * N + col] = sum; } - - return mse; - } - - const LearnLayer &getLayer(int i) const { return layers[i]; } - - // delete - LearnLayer &getLayer(int i) { return layers[i]; } -}; - -#ifndef NOGPU -OpenCL openCL; -#endif +} +)"; int main() { - // LearnNerualNetrowk nn( - // 3, {Layer(3, Activation::SIGMOID), Layer(3, Activation::SIGMOID)}); - // - // Matrix weights1(3, 3, - // {0.88f, 0.39f, 0.9f, 0.37f, 0.14f, 0.41f, 0.96f, 0.5f, - // 0.6f}); - // Matrix weights2( - // 3, 3, {0.29f, 0.57f, 0.36f, 0.73f, 0.53f, 0.68f, 0.01f, 0.02f, 0.58f}); - // - // Vector bias1(std::vector{0.23f, 0.89f, 0.08f}); - // Vector bias2(std::vector{0.78f, 0.83f, 0.8f}); - // - // nn.getLayer(0).setWeights(weights1); - // nn.getLayer(0).setBias(bias1); - // - // nn.getLayer(1).setWeights(weights2); - // nn.getLayer(1).setBias(bias2); - // - // std::cout << std::endl; - // - // Matrix input(3, 1, {0.03f, 0.72f, 0.49f}); - // Matrix target(3, 1, {0.93f, 0.74f, 0.17f}); - // - // // for (int i = 0; i < 1000; i++) - // nn.learn(input, target, 0.01f); + cl_int err; - LearnNerualNetrowk nn( - 2, {Layer(3, Activation::SIGMOID), Layer(1, Activation::SIGMOID)}); + // Параметры матрицы + const int N = 1024; // Размер матрицы (уменьшено для демонстрации) + const int TILE_SIZE = 16; + const size_t matrixSize = N * N * sizeof(float); - Matrix input(2, 4); - Matrix target(1, 4); + std::cout << "Matrix size: " << N << "x" << N << " (" << N * N << " elements)" + << std::endl; + std::cout << "Total data: " << matrixSize / (1024 * 1024) << " MB per matrix" + << std::endl; - float min = 100.0f; - for (int batch = 0; batch < 4; batch++) { - for (int i = 0; i < 4; i++) { - int v1 = (i / 2) % 2; - int v2 = i % 2; + // Инициализация данных + std::vector A(N * N); + std::vector B(N * N); + std::vector C(N * N, 0.0f); - input(0, i) = static_cast(v1); - input(1, i) = static_cast(v2); - target(0, i) = static_cast(v1 ^ v2); - } + // Заполнение матриц тестовыми данными + for (int i = 0; i < N * N; i++) { + A[i] = static_cast(i % 100) * 0.1f; + B[i] = static_cast((i + 1) % 100) * 0.1f; } - for (int i = 0; i < 1000; i++) { - printf("%4d | ", i + 1); - Matrix mse = nn.learn(input, target, 0.0001f * std::pow(0.99f, i)); - std::vector lv = mse.toVector(); - float loss = 0.0f; - for (size_t i = 0; i < lv.size(); ++i) - loss += lv[i]; - if (loss < min) - min = loss; - } - std::cout << min << std::endl; + // 1. Получение платформы + cl_platform_id platform; + err = clGetPlatformIDs(1, &platform, NULL); + checkError(err, "clGetPlatformIDs"); - // LearnNerualNetrowk nn( - // 2, {Layer(3, Activation::SIGMOID), Layer(1, Activation::SIGMOID)}); - // float min = 100.0f; - // for (int i = 0; i < 4 * 10000; i++) { - // int v1 = (i / 2) % 2; - // int v2 = i % 2; - // - // Matrix input(2, 1, {static_cast(v1), static_cast(v2)}); - // Matrix target(1, 1, static_cast(v1 ^ v2)); - // - // printf("%5d | ", i + 1); - // Matrix mse = nn.learn(input, target, 0.0001f * std::pow(0.95f, i)); - // if (i % 4 == 3) - // std::cout << std::endl; - // if (mse[0] < min) - // min = mse[0]; - // } - // std::cout << min << std::endl; + // 2. Получение устройства (GPU) + cl_device_id device; + err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); + if (err != CL_SUCCESS) { + std::cout << "GPU not found, trying CPU..." << std::endl; + err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL); + checkError(err, "clGetDeviceIDs"); + std::cout << "Using CPU" << std::endl; + } else { + std::cout << "Using GPU" << std::endl; + } + + // 3. Создание контекста + cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); + checkError(err, "clCreateContext"); + + // 4. Создание очереди команд + cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); + checkError(err, "clCreateCommandQueue"); + + // 5. Создание буферов + cl_mem bufferA = + clCreateBuffer(context, CL_MEM_READ_ONLY, matrixSize, NULL, &err); + checkError(err, "clCreateBuffer A"); + + cl_mem bufferB = + clCreateBuffer(context, CL_MEM_READ_ONLY, matrixSize, NULL, &err); + checkError(err, "clCreateBuffer B"); + + cl_mem bufferC = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, matrixSize, NULL, &err); + checkError(err, "clCreateBuffer C"); + + // 6. Копирование данных на устройство + auto copy_start = std::chrono::high_resolution_clock::now(); + + err = clEnqueueWriteBuffer(queue, bufferA, CL_TRUE, 0, matrixSize, A.data(), + 0, NULL, NULL); + checkError(err, "clEnqueueWriteBuffer A"); + + err = clEnqueueWriteBuffer(queue, bufferB, CL_TRUE, 0, matrixSize, B.data(), + 0, NULL, NULL); + checkError(err, "clEnqueueWriteBuffer B"); + + auto copy_end = std::chrono::high_resolution_clock::now(); + auto copy_time = std::chrono::duration_cast( + copy_end - copy_start); + + // 7. Создание программы + auto program_start = std::chrono::high_resolution_clock::now(); + + cl_program program = + clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err); + checkError(err, "clCreateProgramWithSource"); + + // Компиляция программы + err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); + if (err != CL_SUCCESS) { + // Получение логов компиляции + size_t log_size; + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, + &log_size); + std::vector log(log_size); + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, + log.data(), NULL); + std::cerr << "Build failed:\n" << log.data() << std::endl; + checkError(err, "clBuildProgram"); + } + + auto program_end = std::chrono::high_resolution_clock::now(); + auto program_time = std::chrono::duration_cast( + program_end - program_start); + + // 8. Создание ядра + auto kernel_start = std::chrono::high_resolution_clock::now(); + + cl_kernel kernel = clCreateKernel(program, "matmul_tiled", &err); + checkError(err, "clCreateKernel"); + + auto kernel_end = std::chrono::high_resolution_clock::now(); + auto kernel_time = std::chrono::duration_cast( + kernel_end - kernel_start); + + // 9. Установка аргументов ядра + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA); + checkError(err, "clSetKernelArg 0"); + + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB); + checkError(err, "clSetKernelArg 1"); + + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC); + checkError(err, "clSetKernelArg 2"); + + err = clSetKernelArg(kernel, 3, sizeof(int), &N); + checkError(err, "clSetKernelArg 3"); + + err = clSetKernelArg(kernel, 4, sizeof(int), &TILE_SIZE); + checkError(err, "clSetKernelArg 4"); + + // 10. Запуск матричного умножения + size_t global[2] = {N, N}; + size_t local[2] = {TILE_SIZE, TILE_SIZE}; + + auto matmul_start = std::chrono::high_resolution_clock::now(); + + err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, + NULL); + checkError(err, "clEnqueueNDRangeKernel"); + + clFinish(queue); + + auto matmul_end = std::chrono::high_resolution_clock::now(); + auto matmul_time = std::chrono::duration_cast( + matmul_end - matmul_start); + + // 11. Чтение результатов + auto read_start = std::chrono::high_resolution_clock::now(); + + err = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, matrixSize, C.data(), 0, + NULL, NULL); + checkError(err, "clEnqueueReadBuffer"); + + auto read_end = std::chrono::high_resolution_clock::now(); + auto read_time = std::chrono::duration_cast( + read_end - read_start); + + // Вывод результатов измерений + std::cout << "\n=== TIMING RESULTS ===" << std::endl; + std::cout << "Data copy to device: " << copy_time.count() << " ns" + << std::endl; + std::cout << "Program creation: " << program_time.count() << " ns" + << std::endl; + std::cout << "Kernel creation: " << kernel_time.count() << " ns" << std::endl; + std::cout << "Matrix multiplication: " << matmul_time.count() << " ms" + << std::endl; + std::cout << "Data read from device: " << read_time.count() << " ns" + << std::endl; + + // Расчет отношения времени выполнения к времени создания ядра + if (kernel_time.count() > 0) { + long long ratio = (matmul_time.count() * 1000) / + kernel_time.count(); // переводим ms в ns для сравнения + std::cout << "Kernel creation vs execution ratio: 1 : " << ratio + << std::endl; + } + + // Расчет производительности + long long total_flops = 2LL * N * N * N; // 2*N^3 FLOP + double gflops = (double)total_flops / (matmul_time.count() * 1e6); // GFLOP/s + std::cout << "Performance: " << gflops << " GFLOP/s" << std::endl; + + // Проверка результата (простая валидация) + float checksum = 0.0f; + for (int i = 0; i < N * N; i++) { + checksum += C[i]; + } + std::cout << "Result checksum: " << checksum << std::endl; + + // 12. Освобождение ресурсов + clReleaseMemObject(bufferA); + clReleaseMemObject(bufferB); + clReleaseMemObject(bufferC); + clReleaseKernel(kernel); + clReleaseProgram(program); + clReleaseCommandQueue(queue); + clReleaseContext(context); + + std::cout << "\nDone!" << std::endl; return 0; -} +} \ No newline at end of file diff --git a/src/math/tensor/cpu/math.hpp b/src/math/tensor/cpu/math.hpp index ad4f586..1809b77 100644 --- a/src/math/tensor/cpu/math.hpp +++ b/src/math/tensor/cpu/math.hpp @@ -116,13 +116,13 @@ private: Tensor2 mse(const Tensor2 &a, const Tensor2 &b) { Tensor2 result(a.getShape(), false); for (size_t i = 0; i < result.getSize(); ++i) - result[i] = (a[i] - b[i]) * (a[i] - b[i]) / (float)a.getCols(); + result[i] = (a[i] - b[i]) * (a[i] - b[i]) / (float)a.getSize(); return result; } Tensor2 d_mse(const Tensor2 &a, const Tensor2 &b) { Tensor2 result(a.getShape(), false); for (size_t i = 0; i < result.getSize(); ++i) - result[i] = 2 * (a[i] - b[i]) / (float)a.getCols(); + result[i] = 2 * (a[i] - b[i]) / (float)a.getSize(); return result; } diff --git a/src/math/tensor/cpu/tensor.hpp b/src/math/tensor/cpu/tensor.hpp index 59ae653..0aed6cd 100644 --- a/src/math/tensor/cpu/tensor.hpp +++ b/src/math/tensor/cpu/tensor.hpp @@ -7,6 +7,8 @@ #include "../tensor.hpp" +#include "../../../utils/output.h" + extern std::mt19937 gen; namespace CPU { @@ -52,17 +54,17 @@ public: const float &operator[](int index) const { return data[index]; } virtual void print() const { - std::cout << "Tensor(" << getDim() << "): ["; + debugi("Tensor(%d): [", getDim()); for (size_t i = 0; i < data.size(); ++i) { - std::cout << data[i]; + debugi("%4.3f", data[i]); if (i > 15) { - std::cout << "... "; + debugi("... "); break; } if (i != data.size() - 1) - std::cout << ", "; + debugi(" "); } - std::cout << "]" << std::endl; + debug("]"); } std::vector toVector() const { return data; } @@ -132,9 +134,7 @@ public: Tensor0(Tensor0 &&other) = default; Tensor0 &operator=(Tensor0 &&other) = default; - void print() const override { - std::cout << "Scalar: " << data[0] << std::endl; - } + void print() const override { debug("Scalar: %4.3f", data[0]); } float &value() { return data[0]; } const float &value() const { return data[0]; } @@ -161,13 +161,13 @@ public: Tensor1 &operator=(Tensor1 &&other) = default; void print() const override { - std::cout << "Vector(" << shape[0] << "): ["; + debugi("Vector(%d): [", shape[0]); for (size_t i = 0; i < data.size(); ++i) { - std::cout << data[i]; + debugi("%4.3f", data[i]); if (i != data.size() - 1) - std::cout << ", "; + debugi(" "); } - std::cout << "]" << std::endl; + debug("]"); } float &operator()(int i) { return data[i]; } @@ -209,12 +209,11 @@ public: Tensor2 &operator=(Tensor2 &&other) = default; void print() const override { - std::cout << "Matrix(" << shape[0] << "x" << shape[1] << "):\n"; + debug("Matrix(%dx%d):", shape[0], shape[1]); for (int i = 0; i < shape[0]; ++i) { - for (int j = 0; j < shape[1]; ++j) { - std::cout << data[i * shape[1] + j] << " "; - } - std::cout << std::endl; + for (int j = 0; j < shape[1]; ++j) + debugi("%4.3f ", data[i * shape[1] + j]); + debugi("\n"); } } @@ -265,17 +264,15 @@ public: Tensor3 &operator=(Tensor3 &&other) = default; void print() const override { - std::cout << "Tensor3(" << shape[0] << "x" << shape[1] << "x" << shape[2] - << "):\n"; + debugi("Tensor3(%dx%dx%d):", shape[0], shape[1], shape[2]); for (int i = 0; i < shape[0]; ++i) { - std::cout << "Slice " << i << ":\n"; + debug("Slice %d", i); for (int j = 0; j < shape[1]; ++j) { - for (int k = 0; k < shape[2]; ++k) { - std::cout << data[i * shape[1] * shape[2] + j * shape[2] + k] << " "; - } - std::cout << std::endl; + for (int k = 0; k < shape[2]; ++k) + debugi("%4.3f ", data[i * shape[1] * shape[2] + j * shape[2] + k]); + debugi("\n"); } - std::cout << std::endl; + debugi("\n"); } } diff --git a/src/utils/output.h b/src/utils/output.h new file mode 100644 index 0000000..8e3f9c3 --- /dev/null +++ b/src/utils/output.h @@ -0,0 +1,61 @@ +#pragma once + +#include + +// Определения цветов и стилей +#define RESET "\033[0m" +#define BOLD "\033[1m" +#define ITALIC "\033[3m" +#define UNDERLINE "\033[4m" + +// Цвета текста +#define BLACK "\033[30m" +#define RED "\033[31m" +#define GREEN "\033[32m" +#define YELLOW "\033[33m" +#define BLUE "\033[34m" +#define MAGENTA "\033[35m" +#define CYAN "\033[36m" +#define WHITE "\033[37m" + +// Фоновые цвета +#define BG_BLACK "\033[40m" +#define BG_RED "\033[41m" +#define BG_GREEN "\033[42m" +#define BG_YELLOW "\033[43m" +#define BG_BLUE "\033[44m" +#define BG_MAGENTA "\033[45m" +#define BG_CYAN "\033[46m" +#define BG_WHITE "\033[47m" + +#define printff(format_codes, ...) \ + do { \ + printf("%s", format_codes); \ + printf(__VA_ARGS__); \ + printf("\033[0m\n"); \ + } while (0) + +#ifdef DEBUG_MODE +#define debug(fmt, ...) \ + do { \ + printf(fmt, ##__VA_ARGS__); \ + printf("\n"); \ + } while (0) +#define debugi(fmt, ...) \ + do { \ + printf(fmt, ##__VA_ARGS__); \ + } while (0) +#define debugf(format_codes, fmt, ...) \ + do { \ + printf("%s", format_codes); \ + printf("[%s:%d] ", __FILE__, __LINE__); \ + printf(fmt, ##__VA_ARGS__); \ + printf("\n"); \ + printf("\033[0m\n"); \ + } while (0) +#define loge(fmt, ...) logff(RED UNDERLINE, fmt, ##__VA_ARGS__) +#else +#define debug(fmt, ...) +#define debugi(fmt, ...) +#define debugf(format_codes, fmt, ...) +#endif