diff --git a/.gitignore b/.gitignore index 0f662bc..8a27642 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,7 @@ .vscode + *.exe +*.so +*.pyd + +src/tensor/build \ No newline at end of file diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index c4a81b0..0000000 --- a/src/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -CXX = g++ -CXXFLAGS = -Wall -Wextra -O1 -g -std=c++23 -fno-omit-frame-pointer -LIBS = -lOpenCL -TARGET = main -COMMON_SRC = ./math/opencl/opencl.cpp -MAIN_SRC = main.cpp $(COMMON_SRC) -BENCHMARK_SRC = benchmark.cpp $(COMMON_SRC) - -INCLUDES = -I"A:/Programs/OpenCL/include" -LIB_PATH = -L"A:/Programs/OpenCL/lib" - -$(TARGET): $(MAIN_SRC) - $(CXX) $(CXXFLAGS) $(INCLUDES) $(LIB_PATH) -o $(TARGET) $(MAIN_SRC) $(LIBS) - -benchmark: $(BENCHMARK_SRC) - $(CXX) $(CXXFLAGS) $(INCLUDES) $(LIB_PATH) -o $(TARGET) $(BENCHMARK_SRC) $(LIBS) - -clean: - rm -f $(TARGET) - -run: $(TARGET) - ./$(TARGET) - -run_benchmark: benchmark - ./$(TARGET) - -.PHONY: clean run \ No newline at end of file diff --git a/src/benchmark.cpp b/src/benchmark.cpp deleted file mode 100644 index 58ff15a..0000000 --- a/src/benchmark.cpp +++ /dev/null @@ -1,130 +0,0 @@ -#include -#include -#include -#include -#include - -#include "./math/math.hpp" - -using namespace GPU; - -OpenCL openCL; - -std::vector generateRandomMatrix(int rows, int cols) { - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(-1.0f, 1.0f); - - std::vector matrix(rows * cols); - for (int i = 0; i < rows * cols; ++i) { - matrix[i] = dis(gen); - } - return matrix; -} -std::vector generateIdentityMatrix(int size) { - std::vector matrix(size * size, 0.0f); - for (int i = 0; i < size; ++i) { - matrix[i * size + i] = 1.0f; - } - return matrix; -} - -int main() { - const int SIZE = 48; - - std::cout << "Testing with " << SIZE << "x" << SIZE << " matrices..." - << std::endl; - - // std::vector matrixA = generateRandomMatrix(SIZE, SIZE); - // std::vector matrixB = generateRandomMatrix(SIZE, SIZE); - // std::vector matrixC = generateRandomMatrix(SIZE, SIZE); - - std::vector matrixA = generateIdentityMatrix(SIZE); - std::vector matrixB = generateIdentityMatrix(SIZE); - std::vector matrixC = generateIdentityMatrix(SIZE); - - // Тестирование на GPU - { - std::cout << "\n=== GPU Version ===" << std::endl; - - auto start = std::chrono::high_resolution_clock::now(); - - MatrixMath mm; - Matrix a(SIZE, SIZE, matrixA); - Matrix b(SIZE, SIZE, matrixB); - - auto gen_end = std::chrono::high_resolution_clock::now(); - auto op_start = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < 100; ++i) { - Matrix x = mm.mult(a, b); - } - auto op_end = std::chrono::high_resolution_clock::now(); - - std::vector v = a.toVector(&mm.getQueue()); - - auto total_end = std::chrono::high_resolution_clock::now(); - - auto gen_duration = - std::chrono::duration_cast(gen_end - start); - auto op_duration = std::chrono::duration_cast( - op_end - op_start); - auto total_duration = std::chrono::duration_cast( - total_end - start); - - std::cout << "Matrix generation time: " << gen_duration.count() << " ms" - << std::endl; - std::cout << "Operations time: " << op_duration.count() << " ms" - << std::endl; - std::cout << "Total time: " << total_duration.count() << " ms" << std::endl; - - std::cout << "First few elements: "; - for (size_t i = 0; i < 5 && i < v.size(); ++i) { - std::cout << v[i] << " "; - } - std::cout << std::endl; - } - - // Тестирование на CPU - { - std::cout << "\n=== CPU Version ===" << std::endl; - - auto start = std::chrono::high_resolution_clock::now(); - - CPU::MatrixMath mm; - CPU::Matrix a(SIZE, SIZE, matrixA); - CPU::Matrix b(SIZE, SIZE, matrixB); - - auto gen_end = std::chrono::high_resolution_clock::now(); - - auto op_start = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < 100; ++i) { - CPU::Matrix x = mm.mult(a, b); - } - auto op_end = std::chrono::high_resolution_clock::now(); - - std::vector v = a.toVector(); - - auto total_end = std::chrono::high_resolution_clock::now(); - - auto gen_duration = - std::chrono::duration_cast(gen_end - start); - auto op_duration = std::chrono::duration_cast( - op_end - op_start); - auto total_duration = std::chrono::duration_cast( - total_end - start); - - std::cout << "Matrix generation time: " << gen_duration.count() << " ms" - << std::endl; - std::cout << "Operations time: " << op_duration.count() << " ms" - << std::endl; - std::cout << "Total time: " << total_duration.count() << " ms" << std::endl; - - std::cout << "First few elements: "; - for (size_t i = 0; i < 5 && i < v.size(); ++i) { - std::cout << v[i] << " "; - } - std::cout << std::endl; - } - - return 0; -} \ No newline at end of file diff --git a/src/main b/src/main deleted file mode 100755 index aa7ca69..0000000 Binary files a/src/main and /dev/null differ diff --git a/src/main.cpp b/src/main.cpp deleted file mode 100644 index b1d6351..0000000 --- a/src/main.cpp +++ /dev/null @@ -1,383 +0,0 @@ -#include -#include -#include -#include -#include -#include - -// Чтение файла в строку -std::string readFile(const char *filename) { - std::ifstream file(filename); - if (!file.is_open()) { - throw std::runtime_error(std::string("Failed to open file: ") + filename); - } - return std::string((std::istreambuf_iterator(file)), - std::istreambuf_iterator()); -} - -// Получение ошибки OpenCL в виде строки -const char *getErrorString(cl_int error) { - switch (error) { - case CL_SUCCESS: - return "CL_SUCCESS"; - case CL_DEVICE_NOT_FOUND: - return "CL_DEVICE_NOT_FOUND"; - case CL_DEVICE_NOT_AVAILABLE: - return "CL_DEVICE_NOT_AVAILABLE"; - case CL_COMPILER_NOT_AVAILABLE: - return "CL_COMPILER_NOT_AVAILABLE"; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; - case CL_OUT_OF_RESOURCES: - return "CL_OUT_OF_RESOURCES"; - case CL_OUT_OF_HOST_MEMORY: - return "CL_OUT_OF_HOST_MEMORY"; - case CL_PROFILING_INFO_NOT_AVAILABLE: - return "CL_PROFILING_INFO_NOT_AVAILABLE"; - case CL_MEM_COPY_OVERLAP: - return "CL_MEM_COPY_OVERLAP"; - case CL_IMAGE_FORMAT_MISMATCH: - return "CL_IMAGE_FORMAT_MISMATCH"; - case CL_IMAGE_FORMAT_NOT_SUPPORTED: - return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; - case CL_BUILD_PROGRAM_FAILURE: - return "CL_BUILD_PROGRAM_FAILURE"; - case CL_MAP_FAILURE: - return "CL_MAP_FAILURE"; - case CL_INVALID_VALUE: - return "CL_INVALID_VALUE"; - case CL_INVALID_DEVICE_TYPE: - return "CL_INVALID_DEVICE_TYPE"; - case CL_INVALID_PLATFORM: - return "CL_INVALID_PLATFORM"; - case CL_INVALID_DEVICE: - return "CL_INVALID_DEVICE"; - case CL_INVALID_CONTEXT: - return "CL_INVALID_CONTEXT"; - case CL_INVALID_QUEUE_PROPERTIES: - return "CL_INVALID_QUEUE_PROPERTIES"; - case CL_INVALID_COMMAND_QUEUE: - return "CL_INVALID_COMMAND_QUEUE"; - case CL_INVALID_HOST_PTR: - return "CL_INVALID_HOST_PTR"; - case CL_INVALID_MEM_OBJECT: - return "CL_INVALID_MEM_OBJECT"; - case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: - return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; - case CL_INVALID_IMAGE_SIZE: - return "CL_INVALID_IMAGE_SIZE"; - case CL_INVALID_SAMPLER: - return "CL_INVALID_SAMPLER"; - case CL_INVALID_BINARY: - return "CL_INVALID_BINARY"; - case CL_INVALID_BUILD_OPTIONS: - return "CL_INVALID_BUILD_OPTIONS"; - case CL_INVALID_PROGRAM: - return "CL_INVALID_PROGRAM"; - case CL_INVALID_PROGRAM_EXECUTABLE: - return "CL_INVALID_PROGRAM_EXECUTABLE"; - case CL_INVALID_KERNEL_NAME: - return "CL_INVALID_KERNEL_NAME"; - case CL_INVALID_KERNEL_DEFINITION: - return "CL_INVALID_KERNEL_DEFINITION"; - case CL_INVALID_KERNEL: - return "CL_INVALID_KERNEL"; - case CL_INVALID_ARG_INDEX: - return "CL_INVALID_ARG_INDEX"; - case CL_INVALID_ARG_VALUE: - return "CL_INVALID_ARG_VALUE"; - case CL_INVALID_ARG_SIZE: - return "CL_INVALID_ARG_SIZE"; - case CL_INVALID_KERNEL_ARGS: - return "CL_INVALID_KERNEL_ARGS"; - case CL_INVALID_WORK_DIMENSION: - return "CL_INVALID_WORK_DIMENSION"; - case CL_INVALID_WORK_GROUP_SIZE: - return "CL_INVALID_WORK_GROUP_SIZE"; - case CL_INVALID_WORK_ITEM_SIZE: - return "CL_INVALID_WORK_ITEM_SIZE"; - case CL_INVALID_GLOBAL_OFFSET: - return "CL_INVALID_GLOBAL_OFFSET"; - case CL_INVALID_EVENT_WAIT_LIST: - return "CL_INVALID_EVENT_WAIT_LIST"; - case CL_INVALID_EVENT: - return "CL_INVALID_EVENT"; - case CL_INVALID_OPERATION: - return "CL_INVALID_OPERATION"; - case CL_INVALID_GL_OBJECT: - return "CL_INVALID_GL_OBJECT"; - case CL_INVALID_BUFFER_SIZE: - return "CL_INVALID_BUFFER_SIZE"; - case CL_INVALID_MIP_LEVEL: - return "CL_INVALID_MIP_LEVEL"; - case CL_INVALID_GLOBAL_WORK_SIZE: - return "CL_INVALID_GLOBAL_WORK_SIZE"; - default: - return "Unknown OpenCL error"; - } -} - -// Проверка ошибок OpenCL -void checkError(cl_int err, const char *operation) { - if (err != CL_SUCCESS) { - std::cerr << "Error during " << operation << ": " << getErrorString(err) - << " (" << err << ")" << std::endl; - exit(1); - } -} - -// Код ядра для матричного умножения с тайлингом -const char *kernelSource = R"( -__kernel void matmul_tiled(__global const float* A, - __global const float* B, - __global float* C, - const int N, - const int TILE_SIZE) { - - int row = get_global_id(1); - int col = get_global_id(0); - - __local float tileA[16][16]; - __local float tileB[16][16]; - - float sum = 0.0f; - - int numTiles = (N + TILE_SIZE - 1) / TILE_SIZE; - - for (int t = 0; t < numTiles; t++) { - // Загрузка тайлов в локальную память - int tileRow = get_local_id(1); - int tileCol = get_local_id(0); - - int loadRow = row; - int loadCol = t * TILE_SIZE + tileCol; - if (loadRow < N && loadCol < N) { - tileA[tileRow][tileCol] = A[loadRow * N + loadCol]; - } else { - tileA[tileRow][tileCol] = 0.0f; - } - - loadRow = t * TILE_SIZE + tileRow; - loadCol = col; - if (loadRow < N && loadCol < N) { - tileB[tileRow][tileCol] = B[loadRow * N + loadCol]; - } else { - tileB[tileRow][tileCol] = 0.0f; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // Вычисление частичной суммы - for (int k = 0; k < TILE_SIZE; k++) { - sum += tileA[tileRow][k] * tileB[k][tileCol]; - } - - barrier(CLK_LOCAL_MEM_FENCE); - } - - if (row < N && col < N) { - C[row * N + col] = sum; - } -} -)"; - -int main() { - cl_int err; - - // Параметры матрицы - const int N = 1024; // Размер матрицы (уменьшено для демонстрации) - const int TILE_SIZE = 16; - const size_t matrixSize = N * N * sizeof(float); - - std::cout << "Matrix size: " << N << "x" << N << " (" << N * N << " elements)" - << std::endl; - std::cout << "Total data: " << matrixSize / (1024 * 1024) << " MB per matrix" - << std::endl; - - // Инициализация данных - std::vector A(N * N); - std::vector B(N * N); - std::vector C(N * N, 0.0f); - - // Заполнение матриц тестовыми данными - for (int i = 0; i < N * N; i++) { - A[i] = static_cast(i % 100) * 0.1f; - B[i] = static_cast((i + 1) % 100) * 0.1f; - } - - // 1. Получение платформы - cl_platform_id platform; - err = clGetPlatformIDs(1, &platform, NULL); - checkError(err, "clGetPlatformIDs"); - - // 2. Получение устройства (GPU) - cl_device_id device; - err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); - if (err != CL_SUCCESS) { - std::cout << "GPU not found, trying CPU..." << std::endl; - err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL); - checkError(err, "clGetDeviceIDs"); - std::cout << "Using CPU" << std::endl; - } else { - std::cout << "Using GPU" << std::endl; - } - - // 3. Создание контекста - cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); - checkError(err, "clCreateContext"); - - // 4. Создание очереди команд - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); - checkError(err, "clCreateCommandQueue"); - - // 5. Создание буферов - cl_mem bufferA = - clCreateBuffer(context, CL_MEM_READ_ONLY, matrixSize, NULL, &err); - checkError(err, "clCreateBuffer A"); - - cl_mem bufferB = - clCreateBuffer(context, CL_MEM_READ_ONLY, matrixSize, NULL, &err); - checkError(err, "clCreateBuffer B"); - - cl_mem bufferC = - clCreateBuffer(context, CL_MEM_WRITE_ONLY, matrixSize, NULL, &err); - checkError(err, "clCreateBuffer C"); - - // 6. Копирование данных на устройство - auto copy_start = std::chrono::high_resolution_clock::now(); - - err = clEnqueueWriteBuffer(queue, bufferA, CL_TRUE, 0, matrixSize, A.data(), - 0, NULL, NULL); - checkError(err, "clEnqueueWriteBuffer A"); - - err = clEnqueueWriteBuffer(queue, bufferB, CL_TRUE, 0, matrixSize, B.data(), - 0, NULL, NULL); - checkError(err, "clEnqueueWriteBuffer B"); - - auto copy_end = std::chrono::high_resolution_clock::now(); - auto copy_time = std::chrono::duration_cast( - copy_end - copy_start); - - // 7. Создание программы - auto program_start = std::chrono::high_resolution_clock::now(); - - cl_program program = - clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err); - checkError(err, "clCreateProgramWithSource"); - - // Компиляция программы - err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); - if (err != CL_SUCCESS) { - // Получение логов компиляции - size_t log_size; - clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, - &log_size); - std::vector log(log_size); - clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, - log.data(), NULL); - std::cerr << "Build failed:\n" << log.data() << std::endl; - checkError(err, "clBuildProgram"); - } - - auto program_end = std::chrono::high_resolution_clock::now(); - auto program_time = std::chrono::duration_cast( - program_end - program_start); - - // 8. Создание ядра - auto kernel_start = std::chrono::high_resolution_clock::now(); - - cl_kernel kernel = clCreateKernel(program, "matmul_tiled", &err); - checkError(err, "clCreateKernel"); - - auto kernel_end = std::chrono::high_resolution_clock::now(); - auto kernel_time = std::chrono::duration_cast( - kernel_end - kernel_start); - - // 9. Установка аргументов ядра - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA); - checkError(err, "clSetKernelArg 0"); - - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB); - checkError(err, "clSetKernelArg 1"); - - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC); - checkError(err, "clSetKernelArg 2"); - - err = clSetKernelArg(kernel, 3, sizeof(int), &N); - checkError(err, "clSetKernelArg 3"); - - err = clSetKernelArg(kernel, 4, sizeof(int), &TILE_SIZE); - checkError(err, "clSetKernelArg 4"); - - // 10. Запуск матричного умножения - size_t global[2] = {N, N}; - size_t local[2] = {TILE_SIZE, TILE_SIZE}; - - auto matmul_start = std::chrono::high_resolution_clock::now(); - - err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, - NULL); - checkError(err, "clEnqueueNDRangeKernel"); - - clFinish(queue); - - auto matmul_end = std::chrono::high_resolution_clock::now(); - auto matmul_time = std::chrono::duration_cast( - matmul_end - matmul_start); - - // 11. Чтение результатов - auto read_start = std::chrono::high_resolution_clock::now(); - - err = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, matrixSize, C.data(), 0, - NULL, NULL); - checkError(err, "clEnqueueReadBuffer"); - - auto read_end = std::chrono::high_resolution_clock::now(); - auto read_time = std::chrono::duration_cast( - read_end - read_start); - - // Вывод результатов измерений - std::cout << "\n=== TIMING RESULTS ===" << std::endl; - std::cout << "Data copy to device: " << copy_time.count() << " ns" - << std::endl; - std::cout << "Program creation: " << program_time.count() << " ns" - << std::endl; - std::cout << "Kernel creation: " << kernel_time.count() << " ns" << std::endl; - std::cout << "Matrix multiplication: " << matmul_time.count() << " ms" - << std::endl; - std::cout << "Data read from device: " << read_time.count() << " ns" - << std::endl; - - // Расчет отношения времени выполнения к времени создания ядра - if (kernel_time.count() > 0) { - long long ratio = (matmul_time.count() * 1000) / - kernel_time.count(); // переводим ms в ns для сравнения - std::cout << "Kernel creation vs execution ratio: 1 : " << ratio - << std::endl; - } - - // Расчет производительности - long long total_flops = 2LL * N * N * N; // 2*N^3 FLOP - double gflops = (double)total_flops / (matmul_time.count() * 1e6); // GFLOP/s - std::cout << "Performance: " << gflops << " GFLOP/s" << std::endl; - - // Проверка результата (простая валидация) - float checksum = 0.0f; - for (int i = 0; i < N * N; i++) { - checksum += C[i]; - } - std::cout << "Result checksum: " << checksum << std::endl; - - // 12. Освобождение ресурсов - clReleaseMemObject(bufferA); - clReleaseMemObject(bufferB); - clReleaseMemObject(bufferC); - clReleaseKernel(kernel); - clReleaseProgram(program); - clReleaseCommandQueue(queue); - clReleaseContext(context); - - std::cout << "\nDone!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/src/math/math.hpp b/src/math/math.hpp deleted file mode 100644 index 23a1cae..0000000 --- a/src/math/math.hpp +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include "tensor/cpu/math.hpp" - -#ifndef NOGPU -#include "opencl/opencl.hpp" - -#include "tensor/gpu/math.hpp" -#endif diff --git a/src/math/tensor/cpu/math.cpp b/src/math/tensor/cpu/math.cpp deleted file mode 100644 index 07a104b..0000000 --- a/src/math/tensor/cpu/math.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "math.hpp" diff --git a/src/math/tensor/cpu/math.hpp b/src/math/tensor/cpu/math.hpp deleted file mode 100644 index 1809b77..0000000 --- a/src/math/tensor/cpu/math.hpp +++ /dev/null @@ -1,189 +0,0 @@ -#pragma once - -#include "tensor.hpp" - -#include "../math.hpp" - -#include - -#define M_PI 3.14159265358979323846 - -namespace CPU { -template class TensorMath; -class Tensor0Math; -class Tensor1Math; -class Tensor2Math; -class Tensor3Math; - -template class TensorMath : public ITensorMath { -protected: - float activateX(float x, Activation type, float alpha = 0.01f) { - switch (type) { - case Activation::LINEAR: - return x; - case Activation::SIGMOID: - return 1.0f / (1.0f + std::exp(-x)); - case Activation::TANH: - return std::tanh(x); - case Activation::RELU: - return std::max(0.0f, x); - case Activation::LEAKY_RELU: - return (x > 0.0f) ? x : alpha * x; - case Activation::ELU: - return (x > 0.0f) ? x : alpha * (std::exp(x) - 1.0f); - default: - throw std::invalid_argument("Unknown activation type"); - } - } - float d_activateX(float x, Activation type, float alpha = 0.01f) { - switch (type) { - case Activation::LINEAR: - return 1.0f; - case Activation::SIGMOID: { - float sigmoid = 1.0f / (1.0f + std::exp(-x)); - return sigmoid * (1.0f - sigmoid); - } - case Activation::TANH: { - float tanh_x = std::tanh(x); - return 1.0f - tanh_x * tanh_x; - } - case Activation::RELU: - return (x > 0.0f) ? 1.0f : 0.0f; - case Activation::LEAKY_RELU: - return (x > 0.0f) ? 1.0f : alpha; - case Activation::ELU: - return (x > 0.0f) ? 1.0f : alpha * std::exp(x); - default: - throw std::invalid_argument("Unknown activation type"); - } - } - -public: - T activate(const T &t, Activation type = Activation::LINEAR, - float alpha = 0.0f) override { - T result(t.getShape(), false); - for (size_t i = 0; i < t.getSize(); ++i) { - result[i] = activateX(t[i], type, alpha); - } - return result; - } - T d_activate(const T &t, Activation type = Activation::LINEAR, - float alpha = 0.0f) override { - T result(t.getShape(), false); - for (size_t i = 0; i < t.getSize(); ++i) { - result[i] = d_activateX(t[i], type, alpha); - } - return result; - } - - T mult(const T &a, const T &b) override { - this->validateSameDimensions(a, b); - T result(a.getShape(), false); - for (size_t i = 0; i < a.getSize(); ++i) - result[i] = a[i] * b[i]; - return result; - } - T mult(const T &t, float x) override { - T result(t.getShape(), false); - for (size_t i = 0; i < t.getSize(); ++i) - result[i] = t[i] * x; - return result; - } - T add(const T &a, const T &b, float x = 1.0f) override { - this->validateSameDimensions(a, b); - T result(a.getShape(), false); - for (size_t i = 0; i < a.getSize(); ++i) - result[i] = a[i] + (b[i] * x); - return result; - } - T add(const T &t, float x) override { - T result(t.getShape(), false); - for (size_t i = 0; i < t.getSize(); ++i) - result[i] = t[i] + x; - return result; - } - - void await() const override {} -}; - -class Tensor0Math : public TensorMath, public ITensor0Math {}; - -class Tensor1Math : public TensorMath, public ITensor1Math {}; - -class Tensor2Math : public TensorMath, - public ITensor2Math { -private: - Tensor2 mse(const Tensor2 &a, const Tensor2 &b) { - Tensor2 result(a.getShape(), false); - for (size_t i = 0; i < result.getSize(); ++i) - result[i] = (a[i] - b[i]) * (a[i] - b[i]) / (float)a.getSize(); - return result; - } - Tensor2 d_mse(const Tensor2 &a, const Tensor2 &b) { - Tensor2 result(a.getShape(), false); - for (size_t i = 0; i < result.getSize(); ++i) - result[i] = 2 * (a[i] - b[i]) / (float)a.getSize(); - return result; - } - -public: - Tensor2 dot(const Tensor2 &a, const Tensor2 &b, bool transpose_a = false, - bool transpose_b = false, const Vector *bias = nullptr, - Activation type = Activation::LINEAR, - float alpha = 0.01f) override { - validateMultDimensions(a, b, transpose_a, transpose_b); - if (bias != nullptr) - validateBiasDimensions(a, *bias, transpose_a); - Tensor2 result(transpose_a ? a.getCols() : a.getRows(), - transpose_b ? b.getRows() : b.getCols(), 0.0f); - for (int i = 0; i < result.getRows(); ++i) { - for (int j = 0; j < result.getCols(); ++j) { - float sum = 0.0f; - for (int k = 0; k < a.getCols(); ++k) - sum += (transpose_a ? a(k, i) : a(i, k)) * - (transpose_b ? b(j, k) : b(k, j)); - result(i, j) = - activateX(sum + (bias == nullptr ? 0.0f : (*bias)(i)), type, alpha); - } - } - return result; - } - - Tensor2 loss(const Tensor2 &a, const Tensor2 &b, Loss type) override { - this->validateSameDimensions(a, b); - switch (type) { - case Loss::MSE: - return mse(a, b); - default: - throw std::invalid_argument("Unknown loss type"); - } - } - Tensor2 d_loss(const Tensor2 &a, const Tensor2 &b, Loss type) override { - this->validateSameDimensions(a, b); - switch (type) { - case Loss::MSE: - return d_mse(a, b); - default: - throw std::invalid_argument("Unknown loss type"); - } - } - - Tensor1 axis_sum(const Tensor2 &m) override { - Tensor1 result(m.getRows(), 0.0f); - for (int i = 0; i < m.getRows(); ++i) { - float sum = 0.0f; - for (int j = 0; j < m.getCols(); ++j) - sum += m(i, j); - result(i) = sum; - } - return result; - } -}; - -class Tensor3Math : public TensorMath, public ITensor3Math {}; - -typedef Tensor0Math ScalarMath; -typedef Tensor1Math VectorMath; -typedef Tensor2Math MatrixMath; - -} // namespace CPU diff --git a/src/math/tensor/cpu/tensor.cpp b/src/math/tensor/cpu/tensor.cpp deleted file mode 100644 index f7d3338..0000000 --- a/src/math/tensor/cpu/tensor.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "tensor.hpp" diff --git a/src/math/tensor/cpu/tensor.hpp b/src/math/tensor/cpu/tensor.hpp deleted file mode 100644 index 0aed6cd..0000000 --- a/src/math/tensor/cpu/tensor.hpp +++ /dev/null @@ -1,291 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "../tensor.hpp" - -#include "../../../utils/output.h" - -extern std::mt19937 gen; - -namespace CPU { -class Tensor; -class Tensor0; -class Tensor1; -class Tensor2; -class Tensor3; - -class Tensor : public ITensor { -protected: - std::vector data; - - void resize(size_t size) { data.resize(size); } - void resize(const std::vector &shape) { - size_t size = 1; - for (int dim : shape) - size *= dim; - resize(size); - } - -public: - Tensor(const std::vector &shape) : ITensor(shape) { - resize(shape); - std::generate(data.begin(), data.end(), - []() { return std::generate_canonical(gen); }); - } - Tensor(const std::vector &shape, float value) : ITensor(shape) { - resize(shape); - std::fill(data.begin(), data.end(), value); - } - Tensor(const std::vector &shape, bool fill) : ITensor(shape) { - resize(shape); - if (fill) - std::fill(data.begin(), data.end(), 0.0f); - } - Tensor(const Tensor &) = default; - Tensor &operator=(const Tensor &) = default; - Tensor(Tensor &&other) = default; - Tensor &operator=(Tensor &&other) = default; - - float &operator[](int index) { return data[index]; } - const float &operator[](int index) const { return data[index]; } - - virtual void print() const { - debugi("Tensor(%d): [", getDim()); - for (size_t i = 0; i < data.size(); ++i) { - debugi("%4.3f", data[i]); - if (i > 15) { - debugi("... "); - break; - } - if (i != data.size() - 1) - debugi(" "); - } - debug("]"); - } - - std::vector toVector() const { return data; } - - static Tensor0 *asScalar(Tensor *tensor) { - return tensor->getType() == Type::SCALAR - ? reinterpret_cast(tensor) - : nullptr; - } - static const Tensor0 *asScalar(const Tensor *tensor) { - return tensor->getType() == Type::SCALAR - ? reinterpret_cast(tensor) - : nullptr; - } - static Tensor1 *asVector(Tensor *tensor) { - return tensor->getType() == Type::VECTOR - ? reinterpret_cast(tensor) - : nullptr; - } - static const Tensor1 *asVector(const Tensor *tensor) { - return tensor->getType() == Type::VECTOR - ? reinterpret_cast(tensor) - : nullptr; - } - static Tensor2 *asMatrix(Tensor *tensor) { - return tensor->getType() == Type::MATRIX - ? reinterpret_cast(tensor) - : nullptr; - } - static const Tensor2 *asMatrix(const Tensor *tensor) { - return tensor->getType() == Type::MATRIX - ? reinterpret_cast(tensor) - : nullptr; - } - static Tensor3 *asTensor3(Tensor *tensor) { - return tensor->getType() == Type::TENSOR3 - ? reinterpret_cast(tensor) - : nullptr; - } - static const Tensor3 *asTensor3(const Tensor *tensor) { - return tensor->getType() == Type::TENSOR3 - ? reinterpret_cast(tensor) - : nullptr; - } -}; - -class Tensor0 : public Tensor, public ITensor0 { -public: - Tensor0(const std::vector &shape) : Tensor(shape) { - if (shape.size() != 0) - throw std::invalid_argument("Tensor0 dimension must be 0"); - } - Tensor0(const std::vector &shape, float value) : Tensor(shape, value) { - if (shape.size() != 0) - throw std::invalid_argument("Tensor0 dimension must be 0"); - } - Tensor0() : Tensor({}) { - resize(1); - data[0] = std::generate_canonical(gen); - } - Tensor0(float value) : Tensor({}) { - resize(1); - data[0] = value; - } - Tensor0(const Tensor0 &) = default; - Tensor0 &operator=(const Tensor0 &) = default; - Tensor0(Tensor0 &&other) = default; - Tensor0 &operator=(Tensor0 &&other) = default; - - void print() const override { debug("Scalar: %4.3f", data[0]); } - - float &value() { return data[0]; } - const float &value() const { return data[0]; } -}; - -class Tensor1 : public Tensor, public ITensor1 { -public: - Tensor1(const std::vector &shape) : Tensor(shape) { - if (shape.size() != 1) - throw std::invalid_argument("Tensor1 dimension must be 1"); - } - Tensor1(const std::vector &shape, float value) : Tensor(shape, value) { - if (shape.size() != 1) - throw std::invalid_argument("Tensor1 dimension must be 1"); - } - Tensor1(int size) : Tensor({size}) {} - Tensor1(int size, float value) : Tensor({size}, value) {} - Tensor1(const std::vector &values) : Tensor({(int)values.size()}) { - data = values; - } - Tensor1(const Tensor1 &) = default; - Tensor1 &operator=(const Tensor1 &) = default; - Tensor1(Tensor1 &&other) = default; - Tensor1 &operator=(Tensor1 &&other) = default; - - void print() const override { - debugi("Vector(%d): [", shape[0]); - for (size_t i = 0; i < data.size(); ++i) { - debugi("%4.3f", data[i]); - if (i != data.size() - 1) - debugi(" "); - } - debug("]"); - } - - float &operator()(int i) { return data[i]; } - const float &operator()(int i) const { return data[i]; } -}; - -class Tensor2 : public ITensor2, public Tensor { -public: - Tensor2(const std::vector &shape) : Tensor(shape) { - if (shape.size() != 2) - throw std::invalid_argument("Tensor2 dimension must be 2"); - } - Tensor2(const std::vector &shape, float value) : Tensor(shape, value) { - if (shape.size() != 2) - throw std::invalid_argument("Tensor2 dimension must be 2"); - } - Tensor2(int rows, int cols) : ITensor2(), Tensor({rows, cols}) {} - Tensor2(int rows, int cols, float value) - : ITensor2(), Tensor({rows, cols}, value) {} - Tensor2(int rows, int cols, const std::vector &values) - : Tensor({rows, cols}, false) { - for (int i = 0; i < shape[0]; ++i) { - for (int j = 0; j < shape[1]; ++j) { - data[i * shape[1] + j] = values[i * shape[1] + j]; - } - } - } - Tensor2(const std::vector> &values) - : Tensor({(int)values.size(), (int)values[0].size()}) { - for (int i = 0; i < shape[0]; ++i) { - for (int j = 0; j < shape[1]; ++j) { - data[i * shape[1] + j] = values[i][j]; - } - } - } - Tensor2(const Tensor2 &) = default; - Tensor2 &operator=(const Tensor2 &) = default; - Tensor2(Tensor2 &&other) = default; - Tensor2 &operator=(Tensor2 &&other) = default; - - void print() const override { - debug("Matrix(%dx%d):", shape[0], shape[1]); - for (int i = 0; i < shape[0]; ++i) { - for (int j = 0; j < shape[1]; ++j) - debugi("%4.3f ", data[i * shape[1] + j]); - debugi("\n"); - } - } - - float &operator()(int i, int j) { return data[i * shape[1] + j]; } - const float &operator()(int i, int j) const { return data[i * shape[1] + j]; } - - int getRows() const override { return shape[0]; } - int getCols() const override { return shape[1]; } -}; - -class Tensor3 : public Tensor, public ITensor3 { -public: - Tensor3(const std::vector &shape) : Tensor(shape) { - if (shape.size() != 3) - throw std::invalid_argument("Tensor3 dimension must be 3"); - } - Tensor3(const std::vector &shape, float value) : Tensor(shape, value) { - if (shape.size() != 3) - throw std::invalid_argument("Tensor3 dimension must be 3"); - } - Tensor3(int d1, int d2, int d3) : Tensor({d1, d2, d3}) {} - Tensor3(int d1, int d2, int d3, float value) : Tensor({d1, d2, d3}, value) {} - Tensor3(int d1, int d2, int d3, const std::vector &values) - : Tensor({d1, d2, d3}, false) { - for (int i = 0; i < shape[0]; ++i) { - for (int j = 0; j < shape[1]; ++j) { - for (int k = 0; k < shape[2]; ++k) { - data[i * shape[1] * shape[2] + j * shape[2] + k] = - values[i * shape[1] * shape[2] + j * shape[2] + k]; - } - } - } - } - Tensor3(const std::vector>> &values) - : Tensor({(int)values.size(), (int)values[0].size(), - (int)values[0][0].size()}) { - for (int i = 0; i < shape[0]; ++i) { - for (int j = 0; j < shape[1]; ++j) { - for (int k = 0; k < shape[2]; ++k) { - data[i * shape[1] * shape[2] + j * shape[2] + k] = values[i][j][k]; - } - } - } - } - Tensor3(const Tensor3 &) = default; - Tensor3 &operator=(const Tensor3 &) = default; - Tensor3(Tensor3 &&other) = default; - Tensor3 &operator=(Tensor3 &&other) = default; - - void print() const override { - debugi("Tensor3(%dx%dx%d):", shape[0], shape[1], shape[2]); - for (int i = 0; i < shape[0]; ++i) { - debug("Slice %d", i); - for (int j = 0; j < shape[1]; ++j) { - for (int k = 0; k < shape[2]; ++k) - debugi("%4.3f ", data[i * shape[1] * shape[2] + j * shape[2] + k]); - debugi("\n"); - } - debugi("\n"); - } - } - - float &operator()(int i, int j, int k) { - return data[i * shape[1] * shape[2] + j * shape[2] + k]; - } - const float &operator()(int i, int j, int k) const { - return data[i * shape[1] * shape[2] + j * shape[2] + k]; - } -}; - -typedef Tensor0 Scalar; -typedef Tensor1 Vector; -typedef Tensor2 Matrix; - -} // namespace CPU diff --git a/src/math/tensor/gpu/math.cpp b/src/math/tensor/gpu/math.cpp deleted file mode 100644 index 07a104b..0000000 --- a/src/math/tensor/gpu/math.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "math.hpp" diff --git a/src/math/tensor/gpu/math.hpp b/src/math/tensor/gpu/math.hpp deleted file mode 100644 index 1cd89ac..0000000 --- a/src/math/tensor/gpu/math.hpp +++ /dev/null @@ -1,168 +0,0 @@ -#pragma once - -#include "../../opencl/opencl.hpp" - -#include "tensor.hpp" - -#include "../math.hpp" - -namespace GPU { -template class TensorMath; -class Tensor0Math; -class Tensor1Math; -class Tensor2Math; -class Tensor3Math; - -template class TensorMath : public ITensorMath { -protected: - enum class Method { - MULT, - MULT_SMALL, - SCALAR_MULT, - ADD, - SCALAR_ADD, - ACTIVATE - }; - std::unordered_map kernels; - std::unordered_map kernelsNames = { - {Method::MULT, "mult"}, {Method::MULT_SMALL, "mult_small"}, - {Method::SCALAR_MULT, "mult_sc"}, {Method::ADD, "add"}, - {Method::SCALAR_ADD, "add_sc"}, {Method::ACTIVATE, "activate"}}; - - cl::CommandQueue queue; - -public: - TensorMath() { - queue = cl::CommandQueue(openCL.getContext(), openCL.getDevice()); - for (const auto &entry : kernelsNames) { - kernels[entry.first] = - cl::Kernel(openCL.getProgram(OpenCL::Program::MATRIX), entry.second); - } - } - - const cl::CommandQueue &getQueue() const { return queue; } - - void await() const override { queue.finish(); } - - T activate(const T &t, Activation type = Activation::LINEAR, - float alpha = 0.0f) override { - T result(t.getShape(), false, &queue); - kernels[Method::ACTIVATE].setArg(0, *t.getBuffer()); - kernels[Method::ACTIVATE].setArg(1, *result.getBuffer()); - kernels[Method::ACTIVATE].setArg(2, static_cast(type)); - kernels[Method::ACTIVATE].setArg(3, alpha); - queue.enqueueNDRangeKernel(kernels[Method::ACTIVATE], cl::NullRange, - cl::NDRange(t.getSize())); - return result; - } - - T mult(const T &t, float x) override { - T result(t.getShape(), false, &queue); - kernels[Method::SCALAR_MULT].setArg(0, *t.getBuffer()); - kernels[Method::SCALAR_MULT].setArg(1, *result.getBuffer()); - kernels[Method::SCALAR_MULT].setArg(2, x); - queue.enqueueNDRangeKernel(kernels[Method::SCALAR_MULT], cl::NullRange, - cl::NDRange(t.getSize())); - return result; - } - - T add(const T &a, const T &b, float x = 1.0f) override { - this->validateSameDimensions(a, b); - T result(a.getShape(), false, &queue); - kernels[Method::ADD].setArg(0, *a.getBuffer()); - kernels[Method::ADD].setArg(1, *b.getBuffer()); - kernels[Method::ADD].setArg(2, *result.getBuffer()); - kernels[Method::ADD].setArg(3, x); - queue.enqueueNDRangeKernel(kernels[Method::ADD], cl::NullRange, - cl::NDRange(a.getSize())); - return result; - } - - T add(const T &t, float x) override { - T result(t.getShape(), false, &queue); - kernels[Method::SCALAR_ADD].setArg(0, *t.getBuffer()); - kernels[Method::SCALAR_ADD].setArg(1, *result.getBuffer()); - kernels[Method::SCALAR_ADD].setArg(2, x); - queue.enqueueNDRangeKernel(kernels[Method::SCALAR_ADD], cl::NullRange, - cl::NDRange(t.getSize())); - return result; - } -}; - -class Tensor0Math : public TensorMath, public ITensor0Math {}; - -class Tensor1Math : public TensorMath, public ITensor1Math {}; - -class Tensor2Math : public TensorMath, - public ITensor2Math { -private: - Tensor2 dot_tiled(const Tensor2 &a, const Tensor2 &b, bool transpose, - const Vector &bias, Activation type, float alpha) { - Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), false, - &queue); - - const int tile_size = 16; - cl::NDRange local_size(tile_size, tile_size); - cl::NDRange global_size( - ((result.getRows() + tile_size - 1) / tile_size) * tile_size, - ((result.getCols() + tile_size - 1) / tile_size) * tile_size); - - kernels[Method::MULT].setArg(0, *a.getBuffer()); - kernels[Method::MULT].setArg(1, *b.getBuffer()); - kernels[Method::MULT].setArg(2, *result.getBuffer()); - kernels[Method::MULT].setArg(3, *bias.getBuffer()); - kernels[Method::MULT].setArg(4, static_cast(type)); - kernels[Method::MULT].setArg(5, alpha); - kernels[Method::MULT].setArg(6, result.getRows()); - kernels[Method::MULT].setArg(7, result.getCols()); - kernels[Method::MULT].setArg(8, a.getCols()); - kernels[Method::MULT].setArg(9, transpose ? 1 : 0); - queue.enqueueNDRangeKernel(kernels[Method::MULT], cl::NullRange, - global_size, local_size); - return result; - } - Tensor2 dot_small(const Tensor2 &a, const Tensor2 &b, bool transpose, - const Vector &bias, Activation type, float alpha) { - Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), false, - &queue); - kernels[Method::MULT_SMALL].setArg(0, *a.getBuffer()); - kernels[Method::MULT_SMALL].setArg(1, *b.getBuffer()); - kernels[Method::MULT_SMALL].setArg(2, *result.getBuffer()); - kernels[Method::MULT_SMALL].setArg(3, *bias.getBuffer()); - kernels[Method::MULT_SMALL].setArg(4, static_cast(type)); - kernels[Method::MULT_SMALL].setArg(5, alpha); - kernels[Method::MULT_SMALL].setArg(6, result.getRows()); - kernels[Method::MULT_SMALL].setArg(7, result.getCols()); - kernels[Method::MULT_SMALL].setArg(8, a.getCols()); - kernels[Method::MULT_SMALL].setArg(9, transpose ? 1 : 0); - queue.enqueueNDRangeKernel(kernels[Method::MULT_SMALL], cl::NullRange, - cl::NDRange(result.getRows(), result.getCols())); - return result; - } - -public: - Tensor2 dot(const Tensor2 &a, const Tensor2 &b, bool transpose = false, - const Vector *bias = nullptr, - Activation type = Activation::LINEAR, - float alpha = 0.01f) override { - validateMultDimensions(a, b, transpose); - const Vector defaultBias(a.getRows(), 0.0f, &queue); - if (bias != nullptr) - validateBiasDimensions(b, *bias, transpose); - if (a.getRows() > 64 || a.getCols() > 64 || b.getRows() > 64 || - b.getCols() > 64) - return dot_tiled(a, b, transpose, bias == nullptr ? defaultBias : *bias, - type, alpha); - else - return dot_small(a, b, transpose, bias == nullptr ? defaultBias : *bias, - type, alpha); - } -}; - -class Tensor3Math : public TensorMath, public ITensor3Math {}; - -typedef Tensor0Math ScalarMath; -typedef Tensor1Math VectorMath; -typedef Tensor2Math MatrixMath; - -} // namespace GPU diff --git a/src/math/tensor/gpu/tensor.cpp b/src/math/tensor/gpu/tensor.cpp deleted file mode 100644 index f7d3338..0000000 --- a/src/math/tensor/gpu/tensor.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "tensor.hpp" diff --git a/src/math/tensor/gpu/tensor.hpp b/src/math/tensor/gpu/tensor.hpp deleted file mode 100644 index 37758a9..0000000 --- a/src/math/tensor/gpu/tensor.hpp +++ /dev/null @@ -1,338 +0,0 @@ -#pragma once - -#include "../../opencl/opencl.hpp" - -#include -#include -#include - -#include "../tensor.hpp" - -extern std::mt19937 gen; - -namespace GPU { -class Tensor; -class Tensor0; -class Tensor1; -class Tensor2; -class Tensor3; - -class Tensor : public ITensor { -protected: - cl::Buffer *buffer = nullptr; - - size_t getShapeSize(const std::vector &shape) { - size_t size = 1; - for (int dim : shape) - size *= dim; - return size; - } - void fillBuf(const std::vector &v, - const cl::CommandQueue *queue = nullptr) { - if (buffer != nullptr) - throw std::runtime_error("Tensor buffer already exists"); - buffer = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE, - v.size() * sizeof(float)); - cl::CommandQueue q = queue == nullptr ? openCL.getDefaultQueue() : *queue; - q.enqueueWriteBuffer(*buffer, CL_TRUE, 0, v.size() * sizeof(float), - v.data()); - q.finish(); - } - void createBuf(size_t size, const cl::CommandQueue *queue = nullptr) { - std::vector v(size); - std::generate(v.begin(), v.end(), - []() { return std::generate_canonical(gen); }); - fillBuf(v, queue); - } - void createBuf(size_t size, float value, - const cl::CommandQueue *queue = nullptr) { - std::vector v(size); - std::fill(v.begin(), v.end(), value); - fillBuf(v, queue); - } - -public: - Tensor(const std::vector &shape, const cl::CommandQueue *queue = nullptr) - : ITensor(shape) { - createBuf(getShapeSize(shape), queue); - } - Tensor(const std::vector &shape, float value, - const cl::CommandQueue *queue = nullptr) - : ITensor(shape) { - createBuf(getShapeSize(shape), value, queue); - } - Tensor(const std::vector &shape, bool fill, - const cl::CommandQueue *queue = nullptr) - : ITensor(shape) { - if (fill) - createBuf(getShapeSize(shape), 0.0f, queue); - } - - Tensor(const Tensor &other, const cl::CommandQueue *queue = nullptr) - : ITensor(other) { - cl::CommandQueue q = queue == nullptr ? openCL.getDefaultQueue() : *queue; - createBuf(other.getSize(), &q); - q.enqueueCopyBuffer(*other.buffer, *buffer, 0, 0, - other.getSize() * sizeof(float)); - }; - Tensor &operator=(const Tensor &other) { - if (buffer != nullptr) - delete buffer; - ITensor::operator=(other); - createBuf(other.getSize(), &openCL.getDefaultQueue()); - openCL.getDefaultQueue().enqueueCopyBuffer(*other.buffer, *buffer, 0, 0, - other.getSize() * sizeof(float)); - return *this; - }; - Tensor(Tensor &&other) : ITensor(other), buffer(other.buffer) { - other.buffer = nullptr; - }; - Tensor &operator=(Tensor &&other) { - if (this != &other) { - if (buffer != nullptr) - delete buffer; - ITensor::operator=(std::move(other)); - buffer = other.buffer; - other.buffer = nullptr; - } - return *this; - }; - - ~Tensor() { - if (buffer != nullptr) - delete buffer; - } - - std::vector toVector(const cl::CommandQueue *queue = nullptr) { - size_t size = getShapeSize(shape); - std::vector result(size); - cl::CommandQueue q = queue == nullptr ? openCL.getDefaultQueue() : *queue; - q.enqueueReadBuffer(*buffer, CL_TRUE, 0, size * sizeof(float), - result.data()); - q.finish(); - return result; - } - - const cl::Buffer *getBuffer() const { return buffer; } - - static Tensor0 *asScalar(Tensor *tensor) { - return tensor->getType() == Type::SCALAR - ? reinterpret_cast(tensor) - : nullptr; - } - static const Tensor0 *asScalar(const Tensor *tensor) { - return tensor->getType() == Type::SCALAR - ? reinterpret_cast(tensor) - : nullptr; - } - static Tensor1 *asVector(Tensor *tensor) { - return tensor->getType() == Type::VECTOR - ? reinterpret_cast(tensor) - : nullptr; - } - static const Tensor1 *asVector(const Tensor *tensor) { - return tensor->getType() == Type::VECTOR - ? reinterpret_cast(tensor) - : nullptr; - } - static Tensor2 *asMatrix(Tensor *tensor) { - return tensor->getType() == Type::MATRIX - ? reinterpret_cast(tensor) - : nullptr; - } - static const Tensor2 *asMatrix(const Tensor *tensor) { - return tensor->getType() == Type::MATRIX - ? reinterpret_cast(tensor) - : nullptr; - } - static Tensor3 *asTensor3(Tensor *tensor) { - return tensor->getType() == Type::TENSOR3 - ? reinterpret_cast(tensor) - : nullptr; - } - static const Tensor3 *asTensor3(const Tensor *tensor) { - return tensor->getType() == Type::TENSOR3 - ? reinterpret_cast(tensor) - : nullptr; - } -}; - -class Tensor0 : public Tensor, public ITensor0 { -public: - Tensor0(const std::vector &shape, - const cl::CommandQueue *queue = nullptr) - : Tensor(shape, queue) { - if (shape.size() != 0) - throw std::invalid_argument("Tensor0 dimension must be 0"); - } - Tensor0(const std::vector &shape, float value, - const cl::CommandQueue *queue = nullptr) - : Tensor(shape, value, queue) { - if (shape.size() != 0) - throw std::invalid_argument("Tensor0 dimension must be 0"); - } - Tensor0(const cl::CommandQueue *queue = nullptr) - : Tensor(std::vector{}, queue) { - createBuf(1, queue); - } - Tensor0(float value, const cl::CommandQueue *queue = nullptr) - : Tensor(std::vector{}, queue) { - createBuf(1, value, queue); - } - Tensor0(const Tensor0 &other, const cl::CommandQueue *queue = nullptr) - : Tensor(other, queue) {}; - Tensor0 &operator=(const Tensor0 &other) { - Tensor::operator=(other); - return *this; - }; - Tensor0(Tensor0 &&other) : Tensor(std::move(other)) {}; - Tensor0 &operator=(Tensor0 &&other) { - Tensor::operator=(std::move(other)); - return *this; - }; -}; - -class Tensor1 : public Tensor, public ITensor1 { -public: - Tensor1(const std::vector &shape, - const cl::CommandQueue *queue = nullptr) - : Tensor(shape, queue) { - if (shape.size() != 1) - throw std::invalid_argument("Tensor1 dimension must be 1"); - } - Tensor1(const std::vector &shape, float value, - const cl::CommandQueue *queue = nullptr) - : Tensor(shape, value, queue) { - if (shape.size() != 1) - throw std::invalid_argument("Tensor1 dimension must be 1"); - } - Tensor1(int size, const cl::CommandQueue *queue = nullptr) - : Tensor({size}, queue) {} - Tensor1(int size, float value, const cl::CommandQueue *queue = nullptr) - : Tensor({size}, value, queue) {} - Tensor1(const std::vector &values, - const cl::CommandQueue *queue = nullptr) - : Tensor({(int)values.size()}, false, queue) { - fillBuf(values, queue); - } - Tensor1(const Tensor1 &other, const cl::CommandQueue *queue = nullptr) - : Tensor(other, queue) {}; - Tensor1 &operator=(const Tensor1 &other) { - Tensor::operator=(other); - return *this; - }; - Tensor1(Tensor1 &&other) : Tensor(std::move(other)) {}; - Tensor1 &operator=(Tensor1 &&other) { - Tensor::operator=(std::move(other)); - return *this; - }; - - int getSize() const override { return shape[0]; } -}; - -class Tensor2 : public ITensor2, public Tensor { -public: - Tensor2(const std::vector &shape, - const cl::CommandQueue *queue = nullptr) - : Tensor(shape, queue) { - if (shape.size() != 2) - throw std::invalid_argument("Tensor2 dimension must be 2"); - } - Tensor2(const std::vector &shape, float value, - const cl::CommandQueue *queue = nullptr) - : Tensor(shape, value, queue) { - if (shape.size() != 2) - throw std::invalid_argument("Tensor2 dimension must be 2"); - } - Tensor2(int rows, int cols, const cl::CommandQueue *queue = nullptr) - : ITensor2(), Tensor({rows, cols}, queue) {} - Tensor2(int rows, int cols, float value, - const cl::CommandQueue *queue = nullptr) - : ITensor2(), Tensor({rows, cols}, value, queue) {} - Tensor2(int rows, int cols, const std::vector &values, - const cl::CommandQueue *queue = nullptr) - : Tensor({rows, cols}, false, queue) { - fillBuf(values, queue); - } - Tensor2(const std::vector> &values, - const cl::CommandQueue *queue = nullptr) - : Tensor({(int)values.size(), (int)values[0].size()}, false) { - std::vector v(values.size() * values[0].size()); - for (size_t i = 0; i < values.size(); ++i) { - for (size_t j = 0; j < values[i].size(); ++j) - v[i * values[0].size() + j] = values[i][j]; - } - fillBuf(v, queue); - } - - Tensor2(const Tensor2 &other, const cl::CommandQueue *queue = nullptr) - : Tensor(other, queue) {}; - Tensor2 &operator=(const Tensor2 &other) { - Tensor::operator=(other); - return *this; - }; - Tensor2(Tensor2 &&other) : Tensor(std::move(other)) {}; - Tensor2 &operator=(Tensor2 &&other) { - Tensor::operator=(std::move(other)); - return *this; - }; - - int getRows() const override { return shape[0]; } - int getCols() const override { return shape[1]; } -}; - -class Tensor3 : public Tensor, public ITensor3 { -public: - Tensor3(const std::vector &shape, - const cl::CommandQueue *queue = nullptr) - : Tensor(shape, queue) { - if (shape.size() != 3) - throw std::invalid_argument("Tensor3 dimension must be 3"); - } - Tensor3(const std::vector &shape, float value, - const cl::CommandQueue *queue = nullptr) - : Tensor(shape, value, queue) { - if (shape.size() != 3) - throw std::invalid_argument("Tensor3 dimension must be 3"); - } - Tensor3(int d1, int d2, int d3, const cl::CommandQueue *queue = nullptr) - : Tensor({d1, d2, d3}, queue) {} - Tensor3(int d1, int d2, int d3, float value, - const cl::CommandQueue *queue = nullptr) - : Tensor({d1, d2, d3}, value, queue) {} - Tensor3(int d1, int d2, int d3, const std::vector &values, - const cl::CommandQueue *queue = nullptr) - : Tensor({d1, d2, d3}, false, queue) { - fillBuf(values, queue); - } - Tensor3(const std::vector>> &values, - const cl::CommandQueue *queue = nullptr) - : Tensor({(int)values.size(), (int)values[0].size(), - (int)values[0][0].size()}, - false, queue) { - std::vector v(shape[0] * shape[1] * shape[2]); - for (int i = 0; i < shape[0]; ++i) { - for (int j = 0; j < shape[1]; ++j) - for (int k = 0; k < shape[2]; ++k) - v[i * shape[1] * shape[2] + j * shape[1] + k] = values[i][j][k]; - } - fillBuf(v, queue); - } - Tensor3(const Tensor3 &other, const cl::CommandQueue *queue = nullptr) - : Tensor(other, queue) {}; - Tensor3 &operator=(const Tensor3 &other) { - Tensor::operator=(other); - return *this; - }; - Tensor3(Tensor3 &&other) : Tensor(std::move(other)) {}; - Tensor3 &operator=(Tensor3 &&other) { - Tensor::operator=(std::move(other)); - return *this; - }; -}; - -typedef Tensor0 Scalar; -typedef Tensor1 Vector; -typedef Tensor2 Matrix; - -} // namespace GPU diff --git a/src/math/tensor/math.hpp b/src/math/tensor/math.hpp deleted file mode 100644 index 1a20bca..0000000 --- a/src/math/tensor/math.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include "tensor.hpp" - -enum class Activation { LINEAR, SIGMOID, TANH, RELU, LEAKY_RELU, ELU }; -enum class Loss { MSE }; - -template -concept ITensorType = std::is_base_of_v; - -template -concept ITensor0Type = std::is_base_of_v; -template -concept ITensor1Type = std::is_base_of_v; -template -concept ITensor2Type = std::is_base_of_v; -template -concept ITensor3Type = std::is_base_of_v; - -template class ITensorMath { -protected: - void validateSameDimensions(const T &a, const T &b) const { - if (a.getDim() != b.getDim()) - throw std::invalid_argument("Tensors must have the same dimension"); - if (a.getSize() != b.getSize()) - throw std::invalid_argument("Tensors must have the same size"); - for (int i = 0; i < a.getDim(); ++i) { - if (a.getShape()[i] != b.getShape()[i]) - throw std::invalid_argument("Tensors must have the same shape"); - } - }; - -public: - virtual T activate(const T &m, Activation type, float alpha) = 0; - virtual T d_activate(const T &m, Activation type, float alpha) = 0; - - virtual T mult(const T &a, const T &b) = 0; - virtual T mult(const T &m, float x) = 0; - virtual T add(const T &a, const T &b, float x) = 0; - virtual T add(const T &m, float x) = 0; - - virtual void await() const = 0; -}; - -template class ITensor0Math {}; - -template class ITensor1Math {}; - -template class ITensor2Math { -public: - virtual M dot(const M &a, const M &b, bool transpose_a, bool transpose_b, - const V *bias, Activation type, float alpha) = 0; - - virtual M loss(const M &a, const M &b, Loss type) = 0; - virtual M d_loss(const M &a, const M &b, Loss type) = 0; - - virtual V axis_sum(const M &m) = 0; - - void validateMultDimensions(const M &a, const M &b, bool transpose_a, - bool transpose_b) const { - int a_cols = transpose_a ? a.getRows() : a.getCols(); - int b_rows = transpose_b ? b.getCols() : b.getRows(); - if (a_cols != b_rows) - throw std::invalid_argument( - "Invalid matrix dimensions for multiplication"); - }; - void validateBiasDimensions(const M &m, const V &v, bool transpose) const { - if ((transpose && (size_t)m.getCols() != v.getSize()) || - (!transpose && (size_t)m.getRows() != v.getSize())) - throw std::invalid_argument("Invalid matrix bias"); - }; -}; - -template class ITensor3Math {}; diff --git a/src/math/tensor/tensor.hpp b/src/math/tensor/tensor.hpp deleted file mode 100644 index 4b1ff81..0000000 --- a/src/math/tensor/tensor.hpp +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include -#include -#include - -std::random_device rd; -std::mt19937 gen(rd()); - -class ITensor { -protected: - std::vector shape; - - void validateDimensions(const std::vector &shape) const { - if (shape.empty()) - throw std::invalid_argument("Tensor shape cannot be empty"); - for (size_t i = 0; i < shape.size(); ++i) { - if (shape[i] <= 0) - throw std::invalid_argument( - "All tensor dimensions must be positive, but dimension " + - std::to_string(i) + " is " + std::to_string(shape[i])); - } - }; - -public: - ITensor(const std::vector &shape) : shape(shape) {} - ITensor(const ITensor &other) : shape(other.shape) {}; - ITensor &operator=(const ITensor &other) { - shape = other.shape; - return *this; - }; - ITensor(ITensor &&other) : shape(other.shape) {}; - ITensor &operator=(ITensor &&other) { - shape = other.shape; - return *this; - }; - - const std::vector &getShape() const { return shape; } - int getDim() const { return static_cast(shape.size()); } - size_t getSize() const { - size_t size = 1; - for (int dim : shape) - size *= dim; - return size; - }; - - enum class Type { SCALAR, VECTOR, MATRIX, TENSOR3 }; - Type getType() const { return static_cast(shape.size()); }; -}; - -class ITensor0 {}; - -class ITensor1 {}; - -class ITensor2 { -public: - virtual int getRows() const = 0; - virtual int getCols() const = 0; -}; - -class ITensor3 {}; - -typedef ITensor0 IScalar; -typedef ITensor1 IVector; -typedef ITensor2 IMatrix; diff --git a/src/run.py b/src/run.py new file mode 100644 index 0000000..aff44c5 --- /dev/null +++ b/src/run.py @@ -0,0 +1,5 @@ +from tensor.tensor import * + +a = Matrix([2, 3], 2) +b = Matrix([3, 2], 3) +(a @ b).print() diff --git a/src/.clangd b/src/tensor/.clangd similarity index 100% rename from src/.clangd rename to src/tensor/.clangd diff --git a/src/tensor/Makefile b/src/tensor/Makefile new file mode 100644 index 0000000..05fed62 --- /dev/null +++ b/src/tensor/Makefile @@ -0,0 +1,39 @@ +CXX = g++ +CXXFLAGS = -Wall -Wextra -O1 -g -std=c++23 + +ifeq ($(OS),Windows_NT) + DETECTED_OS := Windows +else + DETECTED_OS := $(shell uname -s) +endif +ifeq ($(DETECTED_OS),Windows) + TARGET = main.exe + MKDIR = powershell -Command "mkdir" + SHARED_LIB_EXT = pyd +else + TARGET = main + MKDIR = mkdir -p + SHARED_LIB_EXT = so +endif + +BUILD_DIR = build +COMMON_SRC = tensor.cpp + +PYTHON_PATH = $(shell python -c "from sysconfig import get_paths; print(get_paths()['data'])") +PYTHON_INCLUDE = $(PYTHON_PATH)\include +PYTHON_LIBS = $(PYTHON_PATH)\libs +PYBIND_INCLUDE = $(shell python -c "import pybind11; print(pybind11.get_include())") + +.DEFAULT_GOAL := $(TARGET) + +$(BUILD_DIR): + $(MKDIR) $(BUILD_DIR) + +$(TARGET): $(COMMON_SRC) main.cpp | $(BUILD_DIR) + $(CXX) $(CXXFLAGS) -o $@ $^ + +module: $(COMMON_SRC) pybind.cpp | $(BUILD_DIR) + $(CXX) $(CXXFLAGS) -shared -fPIC -o tensor.$(SHARED_LIB_EXT) $^ -I"$(PYTHON_INCLUDE)" -L"$(PYTHON_LIBS)" -lpython313 -I"$(PYBIND_INCLUDE)" + +clean: + rm -rf $(BUILD_DIR) $(TARGET) *.$(SHARED_LIB_EXT) diff --git a/src/tensor/main.cpp b/src/tensor/main.cpp new file mode 100644 index 0000000..c257ce1 --- /dev/null +++ b/src/tensor/main.cpp @@ -0,0 +1,2 @@ + +int main() { return 0; } \ No newline at end of file diff --git a/src/kernels/matrix.cl b/src/tensor/opencl/kernels/tensor.cl similarity index 100% rename from src/kernels/matrix.cl rename to src/tensor/opencl/kernels/tensor.cl diff --git a/src/math/opencl/opencl.cpp b/src/tensor/opencl/opencl.cpp similarity index 98% rename from src/math/opencl/opencl.cpp rename to src/tensor/opencl/opencl.cpp index 853029d..223e695 100644 --- a/src/math/opencl/opencl.cpp +++ b/src/tensor/opencl/opencl.cpp @@ -72,7 +72,7 @@ void OpenCL::initializeDevice() { device = devices[0]; context = cl::Context(device); - defaultQueue = cl::CommandQueue(context, device); + queue = cl::CommandQueue(context, device); std::cout << "Using device: " << device.getInfo() << "\nPlatform: " << platforms[0].getInfo() diff --git a/src/math/opencl/opencl.hpp b/src/tensor/opencl/opencl.hpp similarity index 81% rename from src/math/opencl/opencl.hpp rename to src/tensor/opencl/opencl.hpp index e509b9d..188d663 100644 --- a/src/math/opencl/opencl.hpp +++ b/src/tensor/opencl/opencl.hpp @@ -13,16 +13,16 @@ class OpenCL { public: - enum class Program { MATRIX }; + enum class Program { TENSOR }; private: cl::Device device; cl::Context context; - cl::CommandQueue defaultQueue; + cl::CommandQueue queue; std::unordered_map programs; std::unordered_map programPaths = { - {Program::MATRIX, "./kernels/matrix.cl"}}; + {Program::TENSOR, "./opencl/kernels/tensor.cl"}}; std::string readProgram(const std::string &filePath); cl::Program compileProgram(const std::string &file); @@ -40,10 +40,8 @@ public: cl::Device &getDevice() { return device; } cl::Context &getContext() { return context; } - const cl::CommandQueue &getDefaultQueue() { return defaultQueue; } + const cl::CommandQueue &getQueue() { return queue; } cl::Program &getProgram(Program program); void printDeviceInfo() const; }; - -extern OpenCL openCL; diff --git a/src/tensor/pybind.cpp b/src/tensor/pybind.cpp new file mode 100644 index 0000000..cfd8f05 --- /dev/null +++ b/src/tensor/pybind.cpp @@ -0,0 +1,102 @@ +#include +#include +#include + +#include "tensor.hpp" + +namespace py = pybind11; + +template +void register_tensor(py::module &m, const std::string &name) { + auto tensor = py::class_>(m, name.c_str()) + .def(py::init &>()) + .def(py::init &, T>()) + .def(py::init &, + const std::vector &>()) + .def(py::init &, T, T>()) + + .def("get_shape", &Tensor::getShape) + .def("get_data", &Tensor::getData) + .def("get_size", &Tensor::getSize) + .def("get_axes", &Tensor::getAxes) + + .def("__getitem__", + [](const Tensor &t, size_t i) -> T { + if (i >= t.getSize()) + throw py::index_error(); + return t[i]; + }) + .def("__setitem__", + [](Tensor &t, size_t i, T value) { + if (i >= t.getSize()) + throw py::index_error(); + t[i] = value; + }) + + // .def("__call__", + // [](Tensor &t, py::args args) -> T & { + // + // }) + + .def(py::self + py::self) + .def(py::self - py::self) + .def(py::self * py::self) + .def(py::self += py::self) + .def(py::self -= py::self) + .def(py::self *= py::self) + + .def(py::self + T()) + .def(py::self - T()) + .def(py::self * T()) + .def(py::self / T()) + .def(T() + py::self) + .def(T() - py::self) + .def(T() * py::self) + + .def(py::self += T()) + .def(py::self -= T()) + .def(py::self *= T()) + .def(py::self /= T()) + + .def("__pos__", [](const Tensor &t) { return +t; }) + .def("__neg__", [](const Tensor &t) { return -t; }) + + .def("print", &Tensor::print); + + if constexpr (Dim == 1 || Dim == 2) + tensor.def("__matmul__", &Tensor::operator%); + + if constexpr (Dim >= 2) { + tensor + .def("transpose", py::overload_cast &>( + &Tensor::transpose)) + .def("transpose", + py::overload_cast(&Tensor::transpose)) + .def("t", &Tensor::t); + } +} + +PYBIND11_MODULE(tensor, m) { + m.doc() = "Tensor math library"; + + register_tensor(m, "Scalar"); + register_tensor(m, "Vector"); + register_tensor(m, "Matrix"); + register_tensor(m, "Tensor3"); + register_tensor(m, "Tensor4"); + register_tensor(m, "Tensor5"); + + register_tensor(m, "dScalar"); + register_tensor(m, "dVector"); + register_tensor(m, "dMatrix"); + register_tensor(m, "dTensor3"); + register_tensor(m, "dTensor4"); + register_tensor(m, "dTensor5"); + + register_tensor(m, "iScalar"); + register_tensor(m, "iVector"); + register_tensor(m, "iMatrix"); + register_tensor(m, "iTensor3"); + register_tensor(m, "iTensor4"); + register_tensor(m, "iTensor5"); +} \ No newline at end of file diff --git a/src/tensor/tensor.cpp b/src/tensor/tensor.cpp new file mode 100644 index 0000000..806feb7 --- /dev/null +++ b/src/tensor/tensor.cpp @@ -0,0 +1 @@ +#include "tensor.hpp" \ No newline at end of file diff --git a/src/tensor/tensor.hpp b/src/tensor/tensor.hpp new file mode 100644 index 0000000..e50cc0f --- /dev/null +++ b/src/tensor/tensor.hpp @@ -0,0 +1,338 @@ +#include +#include +#include +#include +#include +#include + +template class Tensor { +private: + std::array shape_; + std::array axes_; + std::vector data_; + + template size_t computeIndex(Indices... indices) const { + static_assert(sizeof...(Indices) == Dim, "Invalid number of indices"); + std::array indicesArray = {static_cast(indices)...}; + std::array axesIndices; + for (int i = 0; i < Dim; ++i) + axesIndices[axes_[i]] = indicesArray[i]; + size_t index = 0; + size_t stride = 1; + for (int i = Dim - 1; i >= 0; --i) { + index += axesIndices[i] * stride; + stride *= shape_[i]; + } + return index; + } + + void checkItHasSameShape(const Tensor &other) { + if (getShape() != other.getShape()) + throw std::invalid_argument("Tensor shapes must match"); + } + void checkAxisInDim(int axis) { + if (axis < 0 || axis >= Dim) + throw std::invalid_argument("Invalid axis index"); + } + +public: + Tensor() = delete; + Tensor(const std::array &shape) { + for (size_t d : shape) + if (d == 0) + throw std::invalid_argument("Invalid shape"); + shape_ = shape; + for (int i = 0; i < Dim; ++i) + axes_[i] = i; + size_t total_size = 1; + for (size_t dim : shape) + total_size *= dim; + data_.resize(total_size); + } + Tensor(const std::array &shape, T fill) : Tensor(shape) { + std::fill(data_.begin(), data_.end(), fill); + } + Tensor(const std::array &shape, const std::vector &data) + : Tensor(shape) { + if (data.size() != data_.size()) + throw std::invalid_argument("Invalid data size"); + data_ = data; + } + Tensor(const std::array &shape, T min, T max) : Tensor(shape) { + static std::random_device rd; + static std::mt19937 gen(rd()); + if constexpr (std::is_integral_v) { + std::uniform_int_distribution dis(min, max); + for (auto &element : data_) + element = dis(gen); + } else if constexpr (std::is_floating_point_v) { + std::uniform_real_distribution dis(min, max); + for (auto &element : data_) + element = dis(gen); + } else + throw std::invalid_argument("Invalid randomized type"); + } + + Tensor(const Tensor &other) + : shape_(other.shape_), axes_(other.axes_), data_(other.data_) {} + Tensor &operator=(const Tensor &other) { + shape_ = other.shape_; + axes_ = other.axes_; + data_ = other.data_; + return *this; + } + Tensor(Tensor &&other) noexcept + : shape_(std::move(other.shape_)), axes_(std::move(other.axes_)), + data_(std::move(other.data_)) {} + Tensor &operator=(Tensor &&other) noexcept { + shape_ = std::move(other.shape_); + axes_ = std::move(other.axes_); + data_ = std::move(other.data_); + return *this; + } + ~Tensor() = default; + + const std::array &getAxes() const { return axes_; } + const std::vector &getData() const { return data_; } + size_t getSize() const { return data_.size(); } + const std::array getShape() const { + std::array result; + for (int i = 0; i < Dim; ++i) + result[i] = shape_[axes_[i]]; + return result; + } + + T &operator[](size_t i) { return data_[i]; } + const T &operator[](size_t i) const { return data_[i]; } + + template T &operator()(Indices... indices) { + return data_[computeIndex(indices...)]; + } + template const T &operator()(Indices... indices) const { + return data_[computeIndex(indices...)]; + } + + Tensor &transpose(const std::array &new_axes) { + std::array used{}; + for (int axis : new_axes) { + checkAxisInDim(axis); + if (used[axis]) + throw std::invalid_argument("Duplicate axis index"); + used[axis] = true; + } + axes_ = new_axes; + return *this; + } + Tensor &transpose(int axis_a, int axis_b) { + checkAxisInDim(axis_a); + checkAxisInDim(axis_b); + if (axis_a == axis_b) + throw std::invalid_argument("Duplicate axis index"); + std::swap(axes_[axis_a], axes_[axis_b]); + return *this; + } + Tensor &t() { + static_assert(Dim >= 2, "Can't change the only axis"); + std::swap(axes_[Dim - 1], axes_[Dim - 2]); + return *this; + } + + Tensor operator+() const { return *this; } + Tensor operator-() const { + Tensor result = *this; + for (T &e : result.data_) + e = -e; + return result; + } + + Tensor &operator+=(const T &scalar) { + for (T &e : data_) + e += scalar; + return *this; + } + Tensor operator+(const T &scalar) const { + Tensor result = *this; + result += scalar; + return result; + } + friend Tensor operator+(const T &scalar, const Tensor &tensor) { + return tensor + scalar; + } + + Tensor &operator-=(const T &scalar) { + for (T &e : data_) + e -= scalar; + return *this; + } + Tensor operator-(const T &scalar) const { + Tensor result = *this; + result -= scalar; + return result; + } + friend Tensor operator-(const T &scalar, const Tensor &tensor) { + Tensor result = tensor; + for (T &e : result.data_) + e = scalar - e; + return result; + } + + Tensor &operator*=(const T &scalar) { + for (T &e : data_) + e *= scalar; + return *this; + } + Tensor operator*(const T &scalar) const { + Tensor result = *this; + result *= scalar; + return result; + } + friend Tensor operator*(const T &scalar, const Tensor &tensor) { + return tensor * scalar; + } + + Tensor &operator/=(const T &scalar) { + if (scalar == T(0)) + throw std::invalid_argument("Division by zero"); + for (T &e : data_) + e /= scalar; + return *this; + } + Tensor operator/(const T &scalar) const { + Tensor result = *this; + result /= scalar; + return result; + } + + Tensor &operator+=(const Tensor &other) { + checkItHasSameShape(other); + for (size_t i = 0; i < data_.size(); ++i) + data_[i] += other.data_[i]; + return *this; + } + Tensor operator+(const Tensor &other) const { + Tensor result = *this; + result += other; + return result; + } + + Tensor &operator-=(const Tensor &other) { + checkItHasSameShape(other); + for (size_t i = 0; i < data_.size(); ++i) + data_[i] -= other.data_[i]; + return *this; + } + Tensor operator-(const Tensor &other) const { + Tensor result = *this; + result -= other; + return result; + } + + Tensor &operator*=(const Tensor &other) { + checkItHasSameShape(other); + for (size_t i = 0; i < data_.size(); ++i) + data_[i] *= other.data_[i]; + return *this; + } + Tensor operator*(const Tensor &other) const { + Tensor result = *this; + result *= other; + return result; + } + + Tensor operator%(const Tensor &other) const { + static_assert(Dim == 1 || Dim == 2, + "Inner product is only defined for vectors and matrices"); + if constexpr (Dim == 1) { + if (data_.size() != other.data_.size()) + throw std::invalid_argument( + "Vector sizes must match for inner product"); + T result_val = T(0); + for (size_t i = 0; i < data_.size(); ++i) + result_val += data_[i] * other.data_[i]; + return Tensor({}, {result_val}); + } else if constexpr (Dim == 2) { + if (shape_[axes_[1]] != other.shape_[other.axes_[0]]) + throw std::invalid_argument( + "Matrix dimensions must match for multiplication"); + size_t m = shape_[axes_[0]]; + size_t n = shape_[axes_[1]]; + size_t p = other.shape_[other.axes_[1]]; + Tensor result({m, p}, T(0)); + for (size_t i = 0; i < m; ++i) { + for (size_t j = 0; j < p; ++j) { + T sum = T(0); + for (size_t k = 0; k < n; ++k) + sum += (*this)(i, k) * other(k, j); + result(i, j) = sum; + } + } + return result; + } + } + + void print() const { + if constexpr (Dim == 0) { + std::cout << "Scalar<" << typeid(T).name() << ">: " << data_[0] + << std::endl; + } else if constexpr (Dim == 1) { + std::cout << "Vector<" << typeid(T).name() << ">(" << shape_[0] << "): ["; + for (size_t i = 0; i < data_.size(); ++i) { + std::cout << data_[i]; + if (i < data_.size() - 1) + std::cout << ", "; + } + std::cout << "]" << std::endl; + } else if constexpr (Dim == 2) { + std::cout << "Matrix<" << typeid(T).name() << ">(" << shape_[axes_[0]] + << "x" << shape_[axes_[1]] << "):" << std::endl; + for (size_t i = 0; i < shape_[axes_[0]]; ++i) { + std::cout << " ["; + for (size_t j = 0; j < shape_[axes_[1]]; ++j) { + std::cout << (*this)(i, j); + if (j < shape_[axes_[1]] - 1) + std::cout << ", "; + } + std::cout << "]" << std::endl; + } + } else { + std::cout << "Tensor" << Dim << "D<" << typeid(T).name() << ">" << "["; + for (size_t i = 0; i < Dim; ++i) { + std::cout << shape_[axes_[i]]; + if (i < Dim - 1) + std::cout << "x"; + } + std::cout << "]: ["; + size_t show = std::min(data_.size(), size_t(10)); + for (size_t i = 0; i < show; ++i) { + std::cout << data_[i]; + if (i < show - 1) + std::cout << ", "; + } + if (data_.size() > 10) + std::cout << ", ..."; + std::cout << "]" << std::endl; + } + } +}; + +template using Scalar = Tensor; +template using Vector = Tensor; +template using Matrix = Tensor; + +class Tensors { + Tensors() = delete; + +public: + template static auto empty(Args... args) { + return Tensor({static_cast(args)...}); + } + + template static auto zero(Args... args) { + return Tensor({static_cast(args)...}, T(0)); + } + + template static auto rand(Args... args) { + return Tensor({static_cast(args)...}, T(0), + T(1)); + } +}; \ No newline at end of file diff --git a/src/utils/output.h b/src/utils/output.h deleted file mode 100644 index 8e3f9c3..0000000 --- a/src/utils/output.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once - -#include - -// Определения цветов и стилей -#define RESET "\033[0m" -#define BOLD "\033[1m" -#define ITALIC "\033[3m" -#define UNDERLINE "\033[4m" - -// Цвета текста -#define BLACK "\033[30m" -#define RED "\033[31m" -#define GREEN "\033[32m" -#define YELLOW "\033[33m" -#define BLUE "\033[34m" -#define MAGENTA "\033[35m" -#define CYAN "\033[36m" -#define WHITE "\033[37m" - -// Фоновые цвета -#define BG_BLACK "\033[40m" -#define BG_RED "\033[41m" -#define BG_GREEN "\033[42m" -#define BG_YELLOW "\033[43m" -#define BG_BLUE "\033[44m" -#define BG_MAGENTA "\033[45m" -#define BG_CYAN "\033[46m" -#define BG_WHITE "\033[47m" - -#define printff(format_codes, ...) \ - do { \ - printf("%s", format_codes); \ - printf(__VA_ARGS__); \ - printf("\033[0m\n"); \ - } while (0) - -#ifdef DEBUG_MODE -#define debug(fmt, ...) \ - do { \ - printf(fmt, ##__VA_ARGS__); \ - printf("\n"); \ - } while (0) -#define debugi(fmt, ...) \ - do { \ - printf(fmt, ##__VA_ARGS__); \ - } while (0) -#define debugf(format_codes, fmt, ...) \ - do { \ - printf("%s", format_codes); \ - printf("[%s:%d] ", __FILE__, __LINE__); \ - printf(fmt, ##__VA_ARGS__); \ - printf("\n"); \ - printf("\033[0m\n"); \ - } while (0) -#define loge(fmt, ...) logff(RED UNDERLINE, fmt, ##__VA_ARGS__) -#else -#define debug(fmt, ...) -#define debugi(fmt, ...) -#define debugf(format_codes, fmt, ...) -#endif