mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-03 20:30:39 +04:00
Complete tensors math
This commit is contained in:
@@ -2,7 +2,7 @@ CXX = g++
|
||||
CXXFLAGS = -Wall -Wextra -O2 -std=c++23
|
||||
LIBS = -lOpenCL
|
||||
TARGET = main
|
||||
COMMON_SRC = ./math/opencl/opencl.cpp ./math/matrix/cpu/matrix.cpp ./math/matrix/cpu/mutable_matrix.cpp ./math/matrix/gpu/matrix.cpp ./math/matrix/gpu/mutable_matrix.cpp
|
||||
COMMON_SRC = ./math/opencl/opencl.cpp
|
||||
MAIN_SRC = main.cpp $(COMMON_SRC)
|
||||
BENCHMARK_SRC = benchmark.cpp $(COMMON_SRC)
|
||||
|
||||
|
||||
@@ -6,8 +6,7 @@
|
||||
|
||||
#include "./math/math.hpp"
|
||||
|
||||
typedef Matrices::CPU Matrix;
|
||||
typedef MutableMatrices::CPU MutableMatrix;
|
||||
using namespace GPU;
|
||||
|
||||
OpenCL openCL;
|
||||
|
||||
@@ -31,40 +30,37 @@ std::vector<float> generateIdentityMatrix(int size) {
|
||||
}
|
||||
|
||||
int main() {
|
||||
const int SIZE = 1024;
|
||||
const int SIZE = 48;
|
||||
|
||||
std::cout << "Testing with " << SIZE << "x" << SIZE << " matrices..."
|
||||
<< std::endl;
|
||||
|
||||
std::vector<float> matrixA = generateRandomMatrix(SIZE, SIZE);
|
||||
std::vector<float> matrixB = generateRandomMatrix(SIZE, SIZE);
|
||||
std::vector<float> matrixC = generateRandomMatrix(SIZE, SIZE);
|
||||
// std::vector<float> matrixA = generateRandomMatrix(SIZE, SIZE);
|
||||
// std::vector<float> matrixB = generateRandomMatrix(SIZE, SIZE);
|
||||
// std::vector<float> matrixC = generateRandomMatrix(SIZE, SIZE);
|
||||
|
||||
// std::vector<float> matrixA = generateIdentityMatrix(SIZE);
|
||||
// std::vector<float> matrixB = generateIdentityMatrix(SIZE);
|
||||
// std::vector<float> matrixC = generateIdentityMatrix(SIZE);
|
||||
std::vector<float> matrixA = generateIdentityMatrix(SIZE);
|
||||
std::vector<float> matrixB = generateIdentityMatrix(SIZE);
|
||||
std::vector<float> matrixC = generateIdentityMatrix(SIZE);
|
||||
|
||||
// Тестирование на CPU
|
||||
// Тестирование на GPU
|
||||
{
|
||||
std::cout << "\n=== CPU Version ===" << std::endl;
|
||||
std::cout << "\n=== GPU Version ===" << std::endl;
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
MutableMatrices::CPU a(SIZE, SIZE, matrixA);
|
||||
Matrices::CPU b(SIZE, SIZE, matrixB);
|
||||
Matrices::CPU c(SIZE, SIZE, matrixC);
|
||||
MatrixMath mm;
|
||||
Matrix a(SIZE, SIZE, matrixA);
|
||||
Matrix b(SIZE, SIZE, matrixB);
|
||||
|
||||
auto gen_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto op_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
a.mult(b, 0.2f, MutableMatrices::CPU::Activate::SIGMOID);
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
Matrix x = mm.mult(a, b);
|
||||
}
|
||||
|
||||
auto op_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::vector<float> v = a.toVector();
|
||||
std::vector<float> v = a.toVector(&mm.getQueue());
|
||||
|
||||
auto total_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
@@ -88,24 +84,22 @@ int main() {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// Тестирование на GPU
|
||||
// Тестирование на CPU
|
||||
{
|
||||
std::cout << "\n=== GPU Version ===" << std::endl;
|
||||
std::cout << "\n=== CPU Version ===" << std::endl;
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
MutableMatrices::GPU a(SIZE, SIZE, matrixA);
|
||||
Matrices::GPU b(SIZE, SIZE, matrixB);
|
||||
Matrices::GPU c(SIZE, SIZE, matrixC);
|
||||
CPU::MatrixMath mm;
|
||||
CPU::Matrix a(SIZE, SIZE, matrixA);
|
||||
CPU::Matrix b(SIZE, SIZE, matrixB);
|
||||
|
||||
auto gen_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto op_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
a.mult(b, 0.2f, MutableMatrices::GPU::Activate::SIGMOID, 0.0f);
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
CPU::Matrix x = mm.mult(a, b);
|
||||
}
|
||||
|
||||
auto op_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::vector<float> v = a.toVector();
|
||||
|
||||
@@ -13,40 +13,54 @@ float activate_x(float x, const int activation_type, const float alpha) {
|
||||
case 5: // ELU
|
||||
return (x > 0.0f) ? x : alpha * (exp(x) - 1.0f);
|
||||
case 6: // GELU
|
||||
return 0.5f * x * (1.0f + tanh(sqrt(2.0f / M_PI_F) * (x + 0.044715f * x * x * x)));
|
||||
return 0.5f * x *
|
||||
(1.0f + tanh(sqrt(2.0f / M_PI_F) * (x + 0.044715f * x * x * x)));
|
||||
default:
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void activate(
|
||||
__global float* input,
|
||||
__global float* output,
|
||||
const int activation_type,
|
||||
const float alpha,
|
||||
const int rows,
|
||||
const int cols)
|
||||
{
|
||||
int row = get_global_id(0);
|
||||
int col = get_global_id(1);
|
||||
__kernel void activate(__global float *input, __global float *output,
|
||||
const int activation_type, const float alpha) {
|
||||
int i = get_global_id(0);
|
||||
output[i] = activate_x(input[i], activation_type, alpha);
|
||||
}
|
||||
|
||||
if (row < rows && col < cols) {
|
||||
int idx = row * cols + col;
|
||||
output[idx] = activate_x(input[idx], activation_type, alpha);
|
||||
__kernel void mult_small(__global float *A, __global float *B,
|
||||
__global float *C, const float bias,
|
||||
const int activation_type, const float alpha,
|
||||
const int M, const int N, const int K,
|
||||
const int transpose_B) {
|
||||
const int row = get_global_id(0);
|
||||
const int col = get_global_id(1);
|
||||
|
||||
if (row < M && col < N) {
|
||||
float sum = 0.0f;
|
||||
for (int k = 0; k < K; k++) {
|
||||
float a_val = A[row * K + k];
|
||||
|
||||
float b_val;
|
||||
if (transpose_B) {
|
||||
b_val = B[col * K + k];
|
||||
} else {
|
||||
b_val = B[k * N + col];
|
||||
}
|
||||
|
||||
sum += a_val * b_val;
|
||||
}
|
||||
|
||||
float result = sum + bias;
|
||||
if (activation_type != 0) {
|
||||
result = activate_x(result, activation_type, alpha);
|
||||
}
|
||||
C[row * N + col] = result;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void mult(
|
||||
__global float* A,
|
||||
__global float* B,
|
||||
__global float* C,
|
||||
const float bias,
|
||||
const int activation_type,
|
||||
const float alpha,
|
||||
const int M,
|
||||
const int N,
|
||||
const int K)
|
||||
{
|
||||
__kernel void mult(__global float *A, __global float *B, __global float *C,
|
||||
const float bias, const int activation_type,
|
||||
const float alpha, const int M, const int N, const int K,
|
||||
const int transpose_B) {
|
||||
const int tile_size = 16;
|
||||
|
||||
int local_i = get_local_id(0);
|
||||
@@ -67,6 +81,7 @@ __kernel void mult(
|
||||
for (int tile = 0; tile < num_tiles; tile++) {
|
||||
int tile_offset = tile * tile_size;
|
||||
|
||||
// Загрузка tile_A (без изменений)
|
||||
int load_i_A = tile_offset + local_i;
|
||||
int load_j_A = tile_offset + local_j;
|
||||
|
||||
@@ -76,19 +91,30 @@ __kernel void mult(
|
||||
tile_A[local_j][local_i] = 0.0f;
|
||||
}
|
||||
|
||||
// Загрузка tile_B с учетом транспонирования
|
||||
int load_i_B = tile_offset + local_i;
|
||||
int load_j_B = tile_offset + local_j;
|
||||
|
||||
if (transpose_B) {
|
||||
// B транспонирована: обращаем индексы
|
||||
if (load_i_B < N && global_j < K) {
|
||||
tile_B[local_j][local_i] = B[global_j * N + load_i_B];
|
||||
} else {
|
||||
tile_B[local_j][local_i] = 0.0f;
|
||||
}
|
||||
} else {
|
||||
// B не транспонирована (оригинальная логика)
|
||||
if (load_i_B < K && global_j < N) {
|
||||
tile_B[local_j][local_i] = B[load_i_B * N + global_j];
|
||||
} else {
|
||||
tile_B[local_j][local_i] = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < tile_size; k++) {
|
||||
for (int k = 0; k < tile_size; ++k) {
|
||||
sum += tile_A[k][local_i] * tile_B[local_j][k];
|
||||
}
|
||||
|
||||
@@ -104,21 +130,18 @@ __kernel void mult(
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void mult_sc(__global float* A, __global float* B, float scalar, int M, int N) {
|
||||
__kernel void mult_sc(__global float *A, __global float *B, float scalar) {
|
||||
int i = get_global_id(0);
|
||||
int j = get_global_id(1);
|
||||
B[i * N + j] = A[i * N + j] * scalar;
|
||||
B[i] = A[i] * scalar;
|
||||
}
|
||||
|
||||
__kernel void add(__global float* A, __global float* B, __global float* C, float a, float b, int M, int N) {
|
||||
__kernel void add(__global float *A, __global float *B, __global float *C,
|
||||
float x) {
|
||||
int i = get_global_id(0);
|
||||
int j = get_global_id(1);
|
||||
C[i * N + j] = (A[i * N + j] * a) + (B[i * N + j] * b);
|
||||
C[i] = A[i] + (B[i] * x);
|
||||
}
|
||||
|
||||
__kernel void add_sc(__global float* A, __global float* B, float scalar, int M, int N) {
|
||||
__kernel void add_sc(__global float *A, __global float *B, float scalar) {
|
||||
int i = get_global_id(0);
|
||||
int j = get_global_id(1);
|
||||
B[i * N + j] = A[i * N + j] + scalar;
|
||||
B[i] = A[i] + scalar;
|
||||
}
|
||||
|
||||
|
||||
74
src/main.cpp
74
src/main.cpp
@@ -3,73 +3,29 @@
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
typedef Matrices::GPU M;
|
||||
typedef MutableMatrices::GPU MM;
|
||||
|
||||
class Layer {
|
||||
protected:
|
||||
int features;
|
||||
float bias;
|
||||
MM::Activate activate;
|
||||
float alpha;
|
||||
|
||||
public:
|
||||
Layer(int features, MM::Activate activate = MM::Activate::LINEAR,
|
||||
float bias = 0.0f, float alpha = 0.0f)
|
||||
: features(features), activate(activate), bias(bias), alpha(alpha) {}
|
||||
|
||||
int getFeatures() const { return features; }
|
||||
float getBias() const { return bias; }
|
||||
MM::Activate getActivate() const { return activate; }
|
||||
float getAlpha() const { return alpha; }
|
||||
};
|
||||
|
||||
class NeuralNetwork {
|
||||
private:
|
||||
std::vector<Layer> layers;
|
||||
std::vector<MM> weights;
|
||||
|
||||
public:
|
||||
NeuralNetwork(int n, std::initializer_list<Layer> l) : layers(l) {
|
||||
weights.emplace_back(n, layers[0].getFeatures());
|
||||
for (int i = 0; i < layers.size() - 1; i++)
|
||||
weights.emplace_back(layers[i].getFeatures(),
|
||||
layers[i + 1].getFeatures());
|
||||
}
|
||||
|
||||
std::vector<float> predict(std::vector<float> i) {
|
||||
if (i.size() != weights[0].getRows())
|
||||
std::invalid_argument("Invalid input size");
|
||||
MM input(1, (int)i.size(), i);
|
||||
for (size_t i = 0; i < weights.size(); i++)
|
||||
input.mult(weights[i], layers[i + 1].getBias(),
|
||||
layers[i + 1].getActivate(), layers[i + 1].getAlpha());
|
||||
return input.toVector();
|
||||
}
|
||||
};
|
||||
using namespace GPU;
|
||||
|
||||
OpenCL openCL;
|
||||
|
||||
int main() {
|
||||
NeuralNetwork nn(
|
||||
2, {Layer(3, MM::Activate::RELU), Layer(1, MM::Activate::RELU)});
|
||||
MatrixMath mm;
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
int v1 = (i / 2) % 2;
|
||||
int v2 = i % 2;
|
||||
Matrix a(2, 2);
|
||||
Matrix b(2, 2);
|
||||
|
||||
std::vector<float> v = {static_cast<float>(v1), static_cast<float>(v2)};
|
||||
CPU::Matrix a_(2, 2, a.toVector());
|
||||
CPU::Matrix b_(2, 2, b.toVector());
|
||||
|
||||
std::vector<float> r = nn.predict(v);
|
||||
float expected = static_cast<float>(v1 ^ v2);
|
||||
a_.print();
|
||||
b_.print();
|
||||
|
||||
std::cout << "XOR(" << v1 << ", " << v2 << ") = " << expected;
|
||||
std::cout << " | Network: ";
|
||||
for (size_t j = 0; j < r.size(); ++j) {
|
||||
std::cout << r[j] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
Matrix c = mm.add(a, b);
|
||||
|
||||
CPU::Matrix c_(2, 2, c.toVector(&mm.getQueue()));
|
||||
|
||||
mm.await();
|
||||
|
||||
c_.print();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2,8 +2,6 @@
|
||||
|
||||
#include "opencl/opencl.hpp"
|
||||
|
||||
#include "matrix/cpu/matrix.hpp"
|
||||
#include "matrix/cpu/mutable_matrix.hpp"
|
||||
#include "tensor/cpu/math.hpp"
|
||||
|
||||
#include "matrix/gpu/matrix.hpp"
|
||||
#include "matrix/gpu/mutable_matrix.hpp"
|
||||
#include "tensor/gpu/math.hpp"
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
#include "matrix.hpp"
|
||||
|
||||
Matrices::CPU::CPU(int rows, int cols, float value)
|
||||
: IMatrix(rows, cols), data(rows * cols, value) {
|
||||
validateDimensions(rows, cols);
|
||||
}
|
||||
|
||||
Matrices::CPU::CPU(int rows, int cols, const std::vector<float> &matrix)
|
||||
: IMatrix(rows, cols), data(matrix) {
|
||||
validateDimensions(rows, cols);
|
||||
if (matrix.size() != static_cast<size_t>(rows * cols)) {
|
||||
throw std::invalid_argument("Data size doesn't match matrix dimensions");
|
||||
}
|
||||
}
|
||||
|
||||
float &Matrices::CPU::operator()(int row, int col) {
|
||||
checkIndices(row, col);
|
||||
return data[row * cols + col];
|
||||
}
|
||||
|
||||
const float &Matrices::CPU::operator()(int row, int col) const {
|
||||
checkIndices(row, col);
|
||||
return data[row * cols + col];
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
#include "../matrix.hpp"
|
||||
|
||||
namespace Matrices {
|
||||
|
||||
class CPU : public IMatrix {
|
||||
protected:
|
||||
std::vector<float> data;
|
||||
|
||||
public:
|
||||
CPU(int rows, int cols, float value = 0.0f);
|
||||
CPU(int rows, int cols, const std::vector<float> &matrix);
|
||||
|
||||
CPU(const CPU &) = default;
|
||||
CPU &operator=(const CPU &) = default;
|
||||
CPU(CPU &&) = default;
|
||||
CPU &operator=(CPU &&) = default;
|
||||
~CPU() override = default;
|
||||
|
||||
float &operator()(int row, int col);
|
||||
const float &operator()(int row, int col) const;
|
||||
|
||||
const std::vector<float> toVector() const { return data; }
|
||||
|
||||
int getRows() const override { return rows; }
|
||||
int getCols() const override { return cols; }
|
||||
size_t getSize() const { return data.size(); }
|
||||
|
||||
// GPU toGPU(OpenCL &openCL) const { return GPU(rows, cols, data); }
|
||||
};
|
||||
|
||||
} // namespace Matrices
|
||||
@@ -1,76 +0,0 @@
|
||||
#include "mutable_matrix.hpp"
|
||||
|
||||
float MutableMatrices::CPU::activate_x(float x, Activate type, float alpha) {
|
||||
switch (type) {
|
||||
case Activate::LINEAR:
|
||||
return x;
|
||||
case Activate::SIGMOID:
|
||||
return 1.0f / (1.0f + std::exp(-x));
|
||||
case Activate::TANH:
|
||||
return std::tanh(x);
|
||||
case Activate::RELU:
|
||||
return std::max(0.0f, x);
|
||||
case Activate::LEAKY_RELU:
|
||||
return (x > 0.0f) ? x : alpha * x;
|
||||
case Activate::ELU:
|
||||
return (x > 0.0f) ? x : alpha * (std::exp(x) - 1.0f);
|
||||
case Activate::GELU:
|
||||
return 0.5f * x *
|
||||
(1.0f +
|
||||
std::tanh(std::sqrt(2.0f / M_PI) * (x + 0.044715f * x * x * x)));
|
||||
default:
|
||||
throw std::invalid_argument("Unknown activation type");
|
||||
}
|
||||
}
|
||||
void MutableMatrices::CPU::mult(Matrices::CPU &m, float bias, Activate type,
|
||||
float alpha) {
|
||||
validateMultDimensions(*this, m);
|
||||
|
||||
std::vector<float> result(rows * m.getCols(), 0.0f);
|
||||
for (int i = 0; i < rows; i++) {
|
||||
for (int j = 0; j < m.getCols(); j++) {
|
||||
float sum = 0.0f;
|
||||
for (int k = 0; k < cols; k++) {
|
||||
sum += (*this)(i, k) * m(k, j);
|
||||
}
|
||||
result[i * m.getCols() + j] = activate_x(sum + bias, type, alpha);
|
||||
}
|
||||
}
|
||||
data = std::move(result);
|
||||
cols = m.getCols();
|
||||
}
|
||||
|
||||
void MutableMatrices::CPU::mult(float scalar) {
|
||||
for (int i = 0; i < rows; i++) {
|
||||
for (int j = 0; j < cols; j++) {
|
||||
data[i * cols + j] *= scalar;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MutableMatrices::CPU::add(Matrices::CPU &m, float a, float b) {
|
||||
validateSameDimensions(*this, m);
|
||||
|
||||
std::vector<float> result(rows * cols, 0.0f);
|
||||
for (int i = 0; i < rows; i++) {
|
||||
for (int j = 0; j < cols; j++) {
|
||||
result[i * cols + j] = ((*this)(i, j) * a) + (m(i, j) * b);
|
||||
}
|
||||
}
|
||||
data = std::move(result);
|
||||
}
|
||||
|
||||
void MutableMatrices::CPU::add(float scalar) {
|
||||
for (int i = 0; i < rows; i++) {
|
||||
for (int j = 0; j < cols; j++) {
|
||||
data[i * cols + j] += scalar;
|
||||
}
|
||||
}
|
||||
}
|
||||
void MutableMatrices::CPU::activate(Activate type, float alpha) {
|
||||
for (int i = 0; i < rows; i++) {
|
||||
for (int j = 0; j < cols; j++) {
|
||||
data[i * cols + j] = activate_x(data[i * cols + j], type, alpha);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "matrix.hpp"
|
||||
|
||||
#include "../mutable_matrix.hpp"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#define M_PI 3.14159265358979323846
|
||||
|
||||
namespace MutableMatrices {
|
||||
|
||||
class CPU : public Matrices::CPU, public IMutableMatrix<Matrices::CPU> {
|
||||
private:
|
||||
static float activate_x(float x, Activate type, float alpha = 0.01f);
|
||||
|
||||
public:
|
||||
CPU(int rows, int cols, const std::vector<float> &matrix)
|
||||
: Matrices::CPU(rows, cols, matrix) {}
|
||||
|
||||
void mult(Matrices::CPU &m, float bias = 0.0f,
|
||||
Activate type = Activate::LINEAR, float alpha = 0.01f);
|
||||
void mult(float scalar);
|
||||
void add(Matrices::CPU &m, float a = 1.0f, float b = 1.0f);
|
||||
void add(float scalar);
|
||||
void activate(Activate type, float alpha = 0.01f);
|
||||
};
|
||||
}; // namespace MutableMatrices
|
||||
@@ -1,41 +0,0 @@
|
||||
#include <random>
|
||||
|
||||
#include "matrix.hpp"
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
|
||||
Matrices::GPU::GPU(int rows, int cols)
|
||||
: IMatrix(rows, cols), queue(openCL.getContext(), openCL.getDevice()) {
|
||||
validateDimensions(rows, cols);
|
||||
std::vector<float> matrix;
|
||||
matrix.reserve(rows * cols);
|
||||
for (size_t i = 0; i < (size_t)rows * (size_t)cols; ++i)
|
||||
matrix.push_back(std::generate_canonical<float, 32>(gen));
|
||||
buffer = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||
rows * cols * sizeof(float));
|
||||
queue.enqueueWriteBuffer(*buffer, CL_TRUE, 0, rows * cols * sizeof(float),
|
||||
matrix.data());
|
||||
queue.finish();
|
||||
}
|
||||
|
||||
Matrices::GPU::GPU(int rows, int cols, const std::vector<float> &matrix)
|
||||
: IMatrix(rows, cols), queue(openCL.getContext(), openCL.getDevice()) {
|
||||
validateDimensions(rows, cols);
|
||||
if (matrix.size() != static_cast<size_t>(rows * cols)) {
|
||||
throw std::invalid_argument("Matrix data size doesn't match dimensions");
|
||||
}
|
||||
buffer = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||
rows * cols * sizeof(float));
|
||||
queue.enqueueWriteBuffer(*buffer, CL_TRUE, 0, rows * cols * sizeof(float),
|
||||
matrix.data());
|
||||
queue.finish();
|
||||
}
|
||||
|
||||
const std::vector<float> Matrices::GPU::toVector() const {
|
||||
std::vector<float> result(rows * cols);
|
||||
queue.enqueueReadBuffer(*buffer, CL_TRUE, 0, rows * cols * sizeof(float),
|
||||
result.data());
|
||||
queue.finish();
|
||||
return result;
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../opencl/opencl.hpp"
|
||||
|
||||
#include "../matrix.hpp"
|
||||
|
||||
namespace Matrices {
|
||||
class GPU : public IMatrix {
|
||||
protected:
|
||||
cl::Buffer *buffer;
|
||||
cl::CommandQueue queue;
|
||||
|
||||
public:
|
||||
GPU(int rows, int cols);
|
||||
GPU(int rows, int cols, const std::vector<float> &matrix);
|
||||
~GPU() { delete buffer; }
|
||||
|
||||
GPU(const GPU &) = delete;
|
||||
GPU &operator=(const GPU &) = delete;
|
||||
GPU(GPU &&other)
|
||||
: IMatrix(other.rows, other.cols), buffer(other.buffer),
|
||||
queue(std::move(other.queue)) {
|
||||
other.buffer = nullptr;
|
||||
other.rows = 0;
|
||||
other.cols = 0;
|
||||
}
|
||||
GPU &operator=(GPU &&other) = default;
|
||||
|
||||
int getRows() const override { return rows; }
|
||||
int getCols() const override { return cols; }
|
||||
size_t getSize() const { return rows * cols; }
|
||||
|
||||
const cl::Buffer *getBuffer() const { return buffer; }
|
||||
|
||||
const std::vector<float> toVector() const;
|
||||
|
||||
// CPU toCPU() const { return CPU(rows, cols, toVector()); };
|
||||
};
|
||||
|
||||
} // namespace Matrices
|
||||
@@ -1,120 +0,0 @@
|
||||
#include "mutable_matrix.hpp"
|
||||
|
||||
MutableMatrices::GPU::GPU(int rows, int cols) : Matrices::GPU(rows, cols) {
|
||||
for (const auto &entry : kernelsNames) {
|
||||
kernels[entry.first] =
|
||||
cl::Kernel(openCL.getProgram(OpenCL::Program::MATRIX), entry.second);
|
||||
}
|
||||
}
|
||||
|
||||
MutableMatrices::GPU::GPU(int rows, int cols, const std::vector<float> &matrix)
|
||||
: Matrices::GPU(rows, cols, matrix) {
|
||||
for (const auto &entry : kernelsNames) {
|
||||
kernels[entry.first] =
|
||||
cl::Kernel(openCL.getProgram(OpenCL::Program::MATRIX), entry.second);
|
||||
}
|
||||
}
|
||||
|
||||
void MutableMatrices::GPU::mult(Matrices::GPU &m, float bias, Activate type,
|
||||
float alpha) {
|
||||
validateMultDimensions(*this, m);
|
||||
|
||||
cl::Buffer *b = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||
rows * m.getCols() * sizeof(float));
|
||||
|
||||
const int tile_size = 16;
|
||||
cl::NDRange local_size(tile_size, tile_size);
|
||||
cl::NDRange global_size(((rows + tile_size - 1) / tile_size) * tile_size,
|
||||
((m.getCols() + tile_size - 1) / tile_size) *
|
||||
tile_size);
|
||||
|
||||
kernels[Method::MULT].setArg(0, *buffer);
|
||||
kernels[Method::MULT].setArg(1, *m.getBuffer());
|
||||
kernels[Method::MULT].setArg(2, *b);
|
||||
kernels[Method::MULT].setArg(3, bias);
|
||||
kernels[Method::MULT].setArg(4, static_cast<int>(type));
|
||||
kernels[Method::MULT].setArg(5, alpha);
|
||||
kernels[Method::MULT].setArg(6, rows);
|
||||
kernels[Method::MULT].setArg(7, m.getCols());
|
||||
kernels[Method::MULT].setArg(8, cols);
|
||||
cl::Event event;
|
||||
queue.enqueueNDRangeKernel(kernels[Method::MULT], cl::NullRange, global_size,
|
||||
local_size, nullptr, &event);
|
||||
|
||||
event.setCallback(CL_COMPLETE, releaseBuffer, buffer);
|
||||
buffer = b;
|
||||
cols = m.getCols();
|
||||
}
|
||||
|
||||
void MutableMatrices::GPU::mult(float scalar) {
|
||||
cl::Buffer *b = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||
rows * cols * sizeof(float));
|
||||
kernels[Method::SCALAR_MULT].setArg(0, *buffer);
|
||||
kernels[Method::SCALAR_MULT].setArg(1, *b);
|
||||
kernels[Method::SCALAR_MULT].setArg(2, scalar);
|
||||
kernels[Method::SCALAR_MULT].setArg(3, rows);
|
||||
kernels[Method::SCALAR_MULT].setArg(4, cols);
|
||||
cl::Event event;
|
||||
queue.enqueueNDRangeKernel(kernels[Method::SCALAR_MULT], cl::NullRange,
|
||||
cl::NDRange(rows, cols), cl::NullRange, nullptr,
|
||||
&event);
|
||||
|
||||
event.setCallback(CL_COMPLETE, releaseBuffer, buffer);
|
||||
buffer = b;
|
||||
}
|
||||
|
||||
void MutableMatrices::GPU::add(Matrices::GPU &m, float a, float b) {
|
||||
validateSameDimensions(*this, m);
|
||||
|
||||
cl::Buffer *buf = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||
rows * cols * sizeof(float));
|
||||
kernels[Method::ADD].setArg(0, *buffer);
|
||||
kernels[Method::ADD].setArg(1, *m.getBuffer());
|
||||
kernels[Method::ADD].setArg(2, *buf);
|
||||
kernels[Method::ADD].setArg(3, a);
|
||||
kernels[Method::ADD].setArg(4, b);
|
||||
kernels[Method::ADD].setArg(5, rows);
|
||||
kernels[Method::ADD].setArg(6, cols);
|
||||
cl::Event event;
|
||||
queue.enqueueNDRangeKernel(kernels[Method::ADD], cl::NullRange,
|
||||
cl::NDRange(rows, cols), cl::NullRange, nullptr,
|
||||
&event);
|
||||
|
||||
event.setCallback(CL_COMPLETE, releaseBuffer, buffer);
|
||||
buffer = buf;
|
||||
}
|
||||
|
||||
void MutableMatrices::GPU::add(float scalar) {
|
||||
cl::Buffer *b = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||
rows * cols * sizeof(float));
|
||||
kernels[Method::SCALAR_ADD].setArg(0, *buffer);
|
||||
kernels[Method::SCALAR_ADD].setArg(1, *b);
|
||||
kernels[Method::SCALAR_ADD].setArg(2, scalar);
|
||||
kernels[Method::SCALAR_ADD].setArg(3, rows);
|
||||
kernels[Method::SCALAR_ADD].setArg(4, cols);
|
||||
cl::Event event;
|
||||
queue.enqueueNDRangeKernel(kernels[Method::SCALAR_ADD], cl::NullRange,
|
||||
cl::NDRange(rows, cols), cl::NullRange, nullptr,
|
||||
&event);
|
||||
|
||||
event.setCallback(CL_COMPLETE, releaseBuffer, buffer);
|
||||
buffer = b;
|
||||
}
|
||||
|
||||
void MutableMatrices::GPU::activate(Activate type, float alpha) {
|
||||
cl::Buffer *b = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||
rows * cols * sizeof(float));
|
||||
kernels[Method::ACTIVATE].setArg(0, *buffer);
|
||||
kernels[Method::ACTIVATE].setArg(1, *b);
|
||||
kernels[Method::ACTIVATE].setArg(2, static_cast<int>(type));
|
||||
kernels[Method::ACTIVATE].setArg(3, alpha);
|
||||
kernels[Method::ACTIVATE].setArg(4, rows);
|
||||
kernels[Method::ACTIVATE].setArg(5, cols);
|
||||
cl::Event event;
|
||||
queue.enqueueNDRangeKernel(kernels[Method::ACTIVATE], cl::NullRange,
|
||||
cl::NDRange(rows, cols), cl::NullRange, nullptr,
|
||||
&event);
|
||||
|
||||
event.setCallback(CL_COMPLETE, releaseBuffer, buffer);
|
||||
buffer = b;
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../opencl/opencl.hpp"
|
||||
|
||||
#include "matrix.hpp"
|
||||
|
||||
#include "../mutable_matrix.hpp"
|
||||
|
||||
namespace MutableMatrices {
|
||||
|
||||
class GPU : public Matrices::GPU, public IMutableMatrix<Matrices::GPU> {
|
||||
private:
|
||||
enum class Method { MULT, SCALAR_MULT, ADD, SCALAR_ADD, ACTIVATE };
|
||||
std::unordered_map<Method, cl::Kernel> kernels;
|
||||
std::unordered_map<Method, std::string> kernelsNames = {
|
||||
{Method::MULT, "mult"},
|
||||
{Method::SCALAR_MULT, "mult_sc"},
|
||||
{Method::ADD, "add"},
|
||||
{Method::SCALAR_ADD, "add_sc"},
|
||||
{Method::ACTIVATE, "activate"}};
|
||||
|
||||
static void CL_CALLBACK releaseBuffer(cl_event, cl_int status, void *buf) {
|
||||
if (status == CL_COMPLETE) {
|
||||
// std::cout << "Kernel complete!" << std::endl;
|
||||
delete (cl::Buffer *)buf;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
GPU(int rows, int cols);
|
||||
GPU(int rows, int cols, const std::vector<float> &matrix);
|
||||
|
||||
GPU(const GPU &) = delete;
|
||||
GPU &operator=(const GPU &) = delete;
|
||||
GPU(GPU &&other) = default;
|
||||
GPU &operator=(GPU &&other) = default;
|
||||
|
||||
void mult(Matrices::GPU &m, float bias = 0.0f,
|
||||
Activate type = Activate::LINEAR, float alpha = 0.01f);
|
||||
void mult(float scalar);
|
||||
void add(Matrices::GPU &m, float a = 1.0f, float b = 1.0f);
|
||||
void add(float scalar);
|
||||
void activate(Activate type, float alpha = 0.01f);
|
||||
};
|
||||
|
||||
}; // namespace MutableMatrices
|
||||
@@ -1,29 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
class IMatrix {
|
||||
protected:
|
||||
int rows;
|
||||
int cols;
|
||||
|
||||
void validateDimensions(int rows, int cols) const {
|
||||
if (rows <= 0 || cols <= 0) {
|
||||
throw std::invalid_argument("Matrix dimensions must be positive");
|
||||
}
|
||||
};
|
||||
void checkIndices(int row, int col) const {
|
||||
if (row < 0 || row >= rows || col < 0 || col >= cols) {
|
||||
throw std::out_of_range("Matrix indices out of range");
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
IMatrix(int rows, int cols) : rows(rows), cols(cols) {}
|
||||
virtual ~IMatrix() = default;
|
||||
virtual int getRows() const = 0;
|
||||
virtual int getCols() const = 0;
|
||||
virtual const std::vector<float> toVector() const = 0;
|
||||
};
|
||||
@@ -1,29 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "matrix.hpp"
|
||||
|
||||
template <typename T> class IMutableMatrix {
|
||||
static_assert(std::is_base_of<IMatrix, T>::value,
|
||||
"T must be derived from IMatrix");
|
||||
|
||||
public:
|
||||
enum class Activate { LINEAR, SIGMOID, TANH, RELU, LEAKY_RELU, ELU, GELU };
|
||||
|
||||
virtual void mult(T &m, float bias, Activate type, float alpha) = 0;
|
||||
virtual void mult(float s) = 0;
|
||||
virtual void add(T &m, float a, float b) = 0;
|
||||
virtual void add(float a) = 0;
|
||||
virtual void activate(Activate type, float alpha = 0.01f) = 0;
|
||||
|
||||
void validateMultDimensions(T &a, T &b) const {
|
||||
if (a.getCols() != b.getRows()) {
|
||||
throw std::invalid_argument(
|
||||
"Invalid matrix dimensions for multiplication");
|
||||
}
|
||||
};
|
||||
void validateSameDimensions(T &a, T &b) const {
|
||||
if (a.getRows() != b.getRows() || a.getCols() != b.getCols()) {
|
||||
throw std::invalid_argument("Invalid matrix dimensions for addition");
|
||||
}
|
||||
};
|
||||
};
|
||||
@@ -40,7 +40,7 @@ public:
|
||||
|
||||
cl::Device &getDevice() { return device; }
|
||||
cl::Context &getContext() { return context; }
|
||||
cl::CommandQueue &getDefaultQueue() { return defaultQueue; }
|
||||
const cl::CommandQueue &getDefaultQueue() { return defaultQueue; }
|
||||
|
||||
cl::Program &getProgram(Program program);
|
||||
void printDeviceInfo() const;
|
||||
|
||||
1
src/math/tensor/cpu/math.cpp
Normal file
1
src/math/tensor/cpu/math.cpp
Normal file
@@ -0,0 +1 @@
|
||||
#include "math.hpp"
|
||||
103
src/math/tensor/cpu/math.hpp
Normal file
103
src/math/tensor/cpu/math.hpp
Normal file
@@ -0,0 +1,103 @@
|
||||
#pragma once
|
||||
|
||||
#include "tensor.hpp"
|
||||
|
||||
#include "../math.hpp"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#define M_PI 3.14159265358979323846
|
||||
|
||||
namespace CPU {
|
||||
template <ITensorType T> class TensorMath;
|
||||
class Tensor0Math;
|
||||
class Tensor1Math;
|
||||
class Tensor2Math;
|
||||
class Tensor3Math;
|
||||
|
||||
template <ITensorType T> class TensorMath : public ITensorMath<T> {
|
||||
protected:
|
||||
float activate_x(float x, Activation type, float alpha = 0.01f) {
|
||||
switch (type) {
|
||||
case Activation::LINEAR:
|
||||
return x;
|
||||
case Activation::SIGMOID:
|
||||
return 1.0f / (1.0f + std::exp(-x));
|
||||
case Activation::TANH:
|
||||
return std::tanh(x);
|
||||
case Activation::RELU:
|
||||
return std::max(0.0f, x);
|
||||
case Activation::LEAKY_RELU:
|
||||
return (x > 0.0f) ? x : alpha * x;
|
||||
case Activation::ELU:
|
||||
return (x > 0.0f) ? x : alpha * (std::exp(x) - 1.0f);
|
||||
case Activation::GELU:
|
||||
return 0.5f * x *
|
||||
(1.0f +
|
||||
std::tanh(std::sqrt(2.0f / M_PI) * (x + 0.044715f * x * x * x)));
|
||||
default:
|
||||
throw std::invalid_argument("Unknown activation type");
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
T activate(const T &t, Activation type = Activation::LINEAR,
|
||||
float alpha = 0.0f) override {
|
||||
T result(t.getShape(), false);
|
||||
for (size_t i = 0; i < t.getSize(); ++i) {
|
||||
result[i] = activate_x(t[i], type, alpha);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
T mult(const T &t, float x) override {
|
||||
T result(t.getShape(), false);
|
||||
for (size_t i = 0; i < t.getSize(); ++i)
|
||||
result[i] = t[i] * x;
|
||||
return result;
|
||||
}
|
||||
T add(const T &a, const T &b, float x = 1.0f) override {
|
||||
this->validateSameDimensions(a, b);
|
||||
T result(a.getShape(), false);
|
||||
for (size_t i = 0; i < a.getSize(); ++i)
|
||||
result[i] = a[i] + (b[i] * x);
|
||||
return result;
|
||||
}
|
||||
T add(const T &t, float x) override {
|
||||
T result(t.getShape(), false);
|
||||
for (size_t i = 0; i < t.getSize(); ++i)
|
||||
result[i] = t[i] + x;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class Tensor0Math : public TensorMath<Tensor0>, public ITensor0Math<Tensor0> {};
|
||||
|
||||
class Tensor1Math : public TensorMath<Tensor1>, public ITensor1Math<Tensor1> {};
|
||||
|
||||
class Tensor2Math : public TensorMath<Tensor2>, public ITensor2Math<Tensor2> {
|
||||
public:
|
||||
Tensor2 mult(const Tensor2 &a, const Tensor2 &b, bool transpose = false,
|
||||
float bias = 0.0f, Activation type = Activation::LINEAR,
|
||||
float alpha = 0.01f) override {
|
||||
validateMultDimensions(a, b, transpose);
|
||||
Tensor2 result(a.getRows(), b.getCols(), 0.0f);
|
||||
for (int i = 0; i < result.getRows(); ++i) {
|
||||
for (int j = 0; j < result.getCols(); ++j) {
|
||||
float sum = 0.0f;
|
||||
for (int k = 0; k < a.getCols(); ++k)
|
||||
sum += a(i, k) * (transpose ? b(j, k) : b(k, j));
|
||||
result(i, j) = activate_x(sum + bias, type, alpha);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class Tensor3Math : public TensorMath<Tensor3>, public ITensor3Math<Tensor3> {};
|
||||
|
||||
typedef Tensor0Math ScalarMath;
|
||||
typedef Tensor1Math VectorMath;
|
||||
typedef Tensor2Math MatrixMath;
|
||||
|
||||
} // namespace CPU
|
||||
1
src/math/tensor/cpu/tensor.cpp
Normal file
1
src/math/tensor/cpu/tensor.cpp
Normal file
@@ -0,0 +1 @@
|
||||
#include "tensor.hpp"
|
||||
296
src/math/tensor/cpu/tensor.hpp
Normal file
296
src/math/tensor/cpu/tensor.hpp
Normal file
@@ -0,0 +1,296 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "../tensor.hpp"
|
||||
|
||||
extern std::mt19937 gen;
|
||||
|
||||
namespace CPU {
|
||||
class Tensor;
|
||||
class Tensor0;
|
||||
class Tensor1;
|
||||
class Tensor2;
|
||||
class Tensor3;
|
||||
|
||||
class Tensor : public ITensor {
|
||||
protected:
|
||||
std::vector<float> data;
|
||||
|
||||
void resize(size_t size) { data.resize(size); }
|
||||
void resize(const std::vector<int> &shape) {
|
||||
size_t size = 1;
|
||||
for (int dim : shape)
|
||||
size *= dim;
|
||||
resize(size);
|
||||
}
|
||||
|
||||
public:
|
||||
Tensor(const std::vector<int> &shape) : ITensor(shape) {
|
||||
resize(shape);
|
||||
std::generate(data.begin(), data.end(),
|
||||
[]() { return std::generate_canonical<float, 10>(gen); });
|
||||
}
|
||||
Tensor(const std::vector<int> &shape, float value) : ITensor(shape) {
|
||||
resize(shape);
|
||||
std::fill(data.begin(), data.end(), value);
|
||||
}
|
||||
Tensor(const std::vector<int> &shape, bool fill) : ITensor(shape) {
|
||||
resize(shape);
|
||||
if (fill)
|
||||
std::fill(data.begin(), data.end(), 0.0f);
|
||||
}
|
||||
Tensor(const Tensor &) = default;
|
||||
Tensor &operator=(const Tensor &) = default;
|
||||
Tensor(Tensor &&other) = default;
|
||||
Tensor &operator=(Tensor &&other) = default;
|
||||
|
||||
float &operator[](int index) { return data[index]; }
|
||||
const float &operator[](int index) const { return data[index]; }
|
||||
|
||||
virtual void print() const {
|
||||
std::cout << "Tensor(" << getDim() << "): [";
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
std::cout << data[i];
|
||||
if (i > 15) {
|
||||
std::cout << "... ";
|
||||
break;
|
||||
}
|
||||
if (i != data.size() - 1)
|
||||
std::cout << ", ";
|
||||
}
|
||||
std::cout << "]" << std::endl;
|
||||
}
|
||||
|
||||
std::vector<float> toVector() const { return data; }
|
||||
|
||||
static Tensor0 *asScalar(Tensor *tensor) {
|
||||
return tensor->getType() == Type::SCALAR
|
||||
? reinterpret_cast<Tensor0 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static const Tensor0 *asScalar(const Tensor *tensor) {
|
||||
return tensor->getType() == Type::SCALAR
|
||||
? reinterpret_cast<const Tensor0 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static Tensor1 *asVector(Tensor *tensor) {
|
||||
return tensor->getType() == Type::VECTOR
|
||||
? reinterpret_cast<Tensor1 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static const Tensor1 *asVector(const Tensor *tensor) {
|
||||
return tensor->getType() == Type::VECTOR
|
||||
? reinterpret_cast<const Tensor1 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static Tensor2 *asMatrix(Tensor *tensor) {
|
||||
return tensor->getType() == Type::MATRIX
|
||||
? reinterpret_cast<Tensor2 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static const Tensor2 *asMatrix(const Tensor *tensor) {
|
||||
return tensor->getType() == Type::MATRIX
|
||||
? reinterpret_cast<const Tensor2 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static Tensor3 *asTensor3(Tensor *tensor) {
|
||||
return tensor->getType() == Type::TENSOR3
|
||||
? reinterpret_cast<Tensor3 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static const Tensor3 *asTensor3(const Tensor *tensor) {
|
||||
return tensor->getType() == Type::TENSOR3
|
||||
? reinterpret_cast<const Tensor3 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
class Tensor0 : public Tensor, public ITensor0 {
|
||||
public:
|
||||
Tensor0(const std::vector<int> &shape) : Tensor(shape) {
|
||||
if (shape.size() != 0)
|
||||
throw std::invalid_argument("Tensor0 dimension must be 0");
|
||||
}
|
||||
Tensor0(const std::vector<int> &shape, float value) : Tensor(shape, value) {
|
||||
if (shape.size() != 0)
|
||||
throw std::invalid_argument("Tensor0 dimension must be 0");
|
||||
}
|
||||
Tensor0() : Tensor({}) {
|
||||
resize(1);
|
||||
data[0] = std::generate_canonical<float, 10>(gen);
|
||||
}
|
||||
Tensor0(float value) : Tensor({}) {
|
||||
resize(1);
|
||||
data[0] = value;
|
||||
}
|
||||
Tensor0(const Tensor0 &) = default;
|
||||
Tensor0 &operator=(const Tensor0 &) = default;
|
||||
Tensor0(Tensor0 &&other) = default;
|
||||
Tensor0 &operator=(Tensor0 &&other) = default;
|
||||
|
||||
void print() const override {
|
||||
std::cout << "Scalar: " << data[0] << std::endl;
|
||||
}
|
||||
|
||||
float &value() { return data[0]; }
|
||||
const float &value() const { return data[0]; }
|
||||
};
|
||||
|
||||
class Tensor1 : public Tensor, public ITensor1 {
|
||||
public:
|
||||
Tensor1(const std::vector<int> &shape) : Tensor(shape) {
|
||||
if (shape.size() != 1)
|
||||
throw std::invalid_argument("Tensor1 dimension must be 1");
|
||||
}
|
||||
Tensor1(const std::vector<int> &shape, float value) : Tensor(shape, value) {
|
||||
if (shape.size() != 1)
|
||||
throw std::invalid_argument("Tensor1 dimension must be 1");
|
||||
}
|
||||
Tensor1(int size) : Tensor({size}) {}
|
||||
Tensor1(int size, float value) : Tensor({size}, value) {}
|
||||
Tensor1(const std::vector<float> &values) : Tensor({(int)values.size()}) {
|
||||
data = values;
|
||||
}
|
||||
Tensor1(const Tensor1 &) = default;
|
||||
Tensor1 &operator=(const Tensor1 &) = default;
|
||||
Tensor1(Tensor1 &&other) = default;
|
||||
Tensor1 &operator=(Tensor1 &&other) = default;
|
||||
|
||||
void print() const override {
|
||||
std::cout << "Vector(" << shape[0] << "): [";
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
std::cout << data[i];
|
||||
if (i != data.size() - 1)
|
||||
std::cout << ", ";
|
||||
}
|
||||
std::cout << "]" << std::endl;
|
||||
}
|
||||
|
||||
float &operator()(int i) { return data[i]; }
|
||||
const float &operator()(int i) const { return data[i]; }
|
||||
|
||||
int getSize() const override { return shape[0]; }
|
||||
};
|
||||
|
||||
class Tensor2 : public ITensor2, public Tensor {
|
||||
public:
|
||||
Tensor2(const std::vector<int> &shape) : Tensor(shape) {
|
||||
if (shape.size() != 2)
|
||||
throw std::invalid_argument("Tensor2 dimension must be 2");
|
||||
}
|
||||
Tensor2(const std::vector<int> &shape, float value) : Tensor(shape, value) {
|
||||
if (shape.size() != 2)
|
||||
throw std::invalid_argument("Tensor2 dimension must be 2");
|
||||
}
|
||||
Tensor2(int rows, int cols) : ITensor2(), Tensor({rows, cols}) {}
|
||||
Tensor2(int rows, int cols, float value)
|
||||
: ITensor2(), Tensor({rows, cols}, value) {}
|
||||
Tensor2(int rows, int cols, const std::vector<float> &values)
|
||||
: Tensor({rows, cols}, false) {
|
||||
for (int i = 0; i < shape[0]; ++i) {
|
||||
for (int j = 0; j < shape[1]; ++j) {
|
||||
data[i * shape[1] + j] = values[i * shape[1] + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
Tensor2(const std::vector<std::vector<float>> &values)
|
||||
: Tensor({(int)values.size(), (int)values[0].size()}) {
|
||||
for (int i = 0; i < shape[0]; ++i) {
|
||||
for (int j = 0; j < shape[1]; ++j) {
|
||||
data[i * shape[1] + j] = values[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
Tensor2(const Tensor2 &) = default;
|
||||
Tensor2 &operator=(const Tensor2 &) = default;
|
||||
Tensor2(Tensor2 &&other) = default;
|
||||
Tensor2 &operator=(Tensor2 &&other) = default;
|
||||
|
||||
void print() const override {
|
||||
std::cout << "Matrix(" << shape[0] << "x" << shape[1] << "):\n";
|
||||
for (int i = 0; i < shape[0]; ++i) {
|
||||
for (int j = 0; j < shape[1]; ++j) {
|
||||
std::cout << data[i * shape[1] + j] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
float &operator()(int i, int j) { return data[i * shape[1] + j]; }
|
||||
const float &operator()(int i, int j) const { return data[i * shape[1] + j]; }
|
||||
|
||||
int getRows() const override { return shape[0]; }
|
||||
int getCols() const override { return shape[1]; }
|
||||
};
|
||||
|
||||
class Tensor3 : public Tensor, public ITensor3 {
|
||||
public:
|
||||
Tensor3(const std::vector<int> &shape) : Tensor(shape) {
|
||||
if (shape.size() != 3)
|
||||
throw std::invalid_argument("Tensor3 dimension must be 3");
|
||||
}
|
||||
Tensor3(const std::vector<int> &shape, float value) : Tensor(shape, value) {
|
||||
if (shape.size() != 3)
|
||||
throw std::invalid_argument("Tensor3 dimension must be 3");
|
||||
}
|
||||
Tensor3(int d1, int d2, int d3) : Tensor({d1, d2, d3}) {}
|
||||
Tensor3(int d1, int d2, int d3, float value) : Tensor({d1, d2, d3}, value) {}
|
||||
Tensor3(int d1, int d2, int d3, const std::vector<float> &values)
|
||||
: Tensor({d1, d2, d3}, false) {
|
||||
for (int i = 0; i < shape[0]; ++i) {
|
||||
for (int j = 0; j < shape[1]; ++j) {
|
||||
for (int k = 0; k < shape[2]; ++k) {
|
||||
data[i * shape[1] * shape[2] + j * shape[2] + k] =
|
||||
values[i * shape[1] * shape[2] + j * shape[2] + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Tensor3(const std::vector<std::vector<std::vector<float>>> &values)
|
||||
: Tensor({(int)values.size(), (int)values[0].size(),
|
||||
(int)values[0][0].size()}) {
|
||||
for (int i = 0; i < shape[0]; ++i) {
|
||||
for (int j = 0; j < shape[1]; ++j) {
|
||||
for (int k = 0; k < shape[2]; ++k) {
|
||||
data[i * shape[1] * shape[2] + j * shape[2] + k] = values[i][j][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Tensor3(const Tensor3 &) = default;
|
||||
Tensor3 &operator=(const Tensor3 &) = default;
|
||||
Tensor3(Tensor3 &&other) = default;
|
||||
Tensor3 &operator=(Tensor3 &&other) = default;
|
||||
|
||||
void print() const override {
|
||||
std::cout << "Tensor3(" << shape[0] << "x" << shape[1] << "x" << shape[2]
|
||||
<< "):\n";
|
||||
for (int i = 0; i < shape[0]; ++i) {
|
||||
std::cout << "Slice " << i << ":\n";
|
||||
for (int j = 0; j < shape[1]; ++j) {
|
||||
for (int k = 0; k < shape[2]; ++k) {
|
||||
std::cout << data[i * shape[1] * shape[2] + j * shape[2] + k] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
float &operator()(int i, int j, int k) {
|
||||
return data[i * shape[1] * shape[2] + j * shape[2] + k];
|
||||
}
|
||||
const float &operator()(int i, int j, int k) const {
|
||||
return data[i * shape[1] * shape[2] + j * shape[2] + k];
|
||||
}
|
||||
};
|
||||
|
||||
typedef Tensor0 Scalar;
|
||||
typedef Tensor1 Vector;
|
||||
typedef Tensor2 Matrix;
|
||||
|
||||
} // namespace CPU
|
||||
1
src/math/tensor/gpu/math.cpp
Normal file
1
src/math/tensor/gpu/math.cpp
Normal file
@@ -0,0 +1 @@
|
||||
#include "math.hpp"
|
||||
164
src/math/tensor/gpu/math.hpp
Normal file
164
src/math/tensor/gpu/math.hpp
Normal file
@@ -0,0 +1,164 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../opencl/opencl.hpp"
|
||||
|
||||
#include "tensor.hpp"
|
||||
|
||||
#include "../math.hpp"
|
||||
|
||||
namespace GPU {
|
||||
template <ITensorType T> class TensorMath;
|
||||
class Tensor0Math;
|
||||
class Tensor1Math;
|
||||
class Tensor2Math;
|
||||
class Tensor3Math;
|
||||
|
||||
template <ITensorType T> class TensorMath : public ITensorMath<T> {
|
||||
protected:
|
||||
enum class Method {
|
||||
MULT,
|
||||
MULT_SMALL,
|
||||
SCALAR_MULT,
|
||||
ADD,
|
||||
SCALAR_ADD,
|
||||
ACTIVATE
|
||||
};
|
||||
std::unordered_map<Method, cl::Kernel> kernels;
|
||||
std::unordered_map<Method, std::string> kernelsNames = {
|
||||
{Method::MULT, "mult"}, {Method::MULT_SMALL, "mult_small"},
|
||||
{Method::SCALAR_MULT, "mult_sc"}, {Method::ADD, "add"},
|
||||
{Method::SCALAR_ADD, "add_sc"}, {Method::ACTIVATE, "activate"}};
|
||||
|
||||
cl::CommandQueue queue;
|
||||
|
||||
public:
|
||||
TensorMath() {
|
||||
queue = cl::CommandQueue(openCL.getContext(), openCL.getDevice());
|
||||
for (const auto &entry : kernelsNames) {
|
||||
kernels[entry.first] =
|
||||
cl::Kernel(openCL.getProgram(OpenCL::Program::MATRIX), entry.second);
|
||||
}
|
||||
}
|
||||
|
||||
const cl::CommandQueue &getQueue() const { return queue; }
|
||||
|
||||
void await() const { queue.finish(); }
|
||||
|
||||
T activate(const T &t, Activation type = Activation::LINEAR,
|
||||
float alpha = 0.0f) override {
|
||||
T result(t.getShape(), false, &queue);
|
||||
kernels[Method::ACTIVATE].setArg(0, *t.getBuffer());
|
||||
kernels[Method::ACTIVATE].setArg(1, *result.getBuffer());
|
||||
kernels[Method::ACTIVATE].setArg(2, static_cast<int>(type));
|
||||
kernels[Method::ACTIVATE].setArg(3, alpha);
|
||||
queue.enqueueNDRangeKernel(kernels[Method::ACTIVATE], cl::NullRange,
|
||||
cl::NDRange(t.getSize()));
|
||||
return result;
|
||||
}
|
||||
|
||||
T mult(const T &t, float x) override {
|
||||
T result(t.getShape(), false, &queue);
|
||||
kernels[Method::SCALAR_MULT].setArg(0, *t.getBuffer());
|
||||
kernels[Method::SCALAR_MULT].setArg(1, *result.getBuffer());
|
||||
kernels[Method::SCALAR_MULT].setArg(2, x);
|
||||
queue.enqueueNDRangeKernel(kernels[Method::SCALAR_MULT], cl::NullRange,
|
||||
cl::NDRange(t.getSize()));
|
||||
return result;
|
||||
}
|
||||
|
||||
T add(const T &a, const T &b, float x = 1.0f) override {
|
||||
this->validateSameDimensions(a, b);
|
||||
T result(a.getShape(), false, &queue);
|
||||
kernels[Method::ADD].setArg(0, *a.getBuffer());
|
||||
kernels[Method::ADD].setArg(1, *b.getBuffer());
|
||||
kernels[Method::ADD].setArg(2, *result.getBuffer());
|
||||
kernels[Method::ADD].setArg(3, x);
|
||||
queue.enqueueNDRangeKernel(kernels[Method::ADD], cl::NullRange,
|
||||
cl::NDRange(a.getSize()));
|
||||
return result;
|
||||
}
|
||||
|
||||
T add(const T &t, float x) override {
|
||||
T result(t.getShape(), false, &queue);
|
||||
kernels[Method::SCALAR_ADD].setArg(0, *t.getBuffer());
|
||||
kernels[Method::SCALAR_ADD].setArg(1, *result.getBuffer());
|
||||
kernels[Method::SCALAR_ADD].setArg(2, x);
|
||||
queue.enqueueNDRangeKernel(kernels[Method::SCALAR_ADD], cl::NullRange,
|
||||
cl::NDRange(t.getSize()));
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class Tensor0Math : public TensorMath<Tensor0>, public ITensor0Math<Tensor0> {};
|
||||
|
||||
class Tensor1Math : public TensorMath<Tensor1>, public ITensor1Math<Tensor1> {};
|
||||
|
||||
class Tensor2Math : public TensorMath<Tensor2>, public ITensor2Math<Tensor2> {
|
||||
private:
|
||||
Tensor2 mult_tiled(const Tensor2 &a, const Tensor2 &b, bool transpose = false,
|
||||
float bias = 0.0f, Activation type = Activation::LINEAR,
|
||||
float alpha = 0.01f) {
|
||||
validateMultDimensions(a, b, transpose);
|
||||
Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), false,
|
||||
&queue);
|
||||
|
||||
const int tile_size = 16;
|
||||
cl::NDRange local_size(tile_size, tile_size);
|
||||
cl::NDRange global_size(
|
||||
((result.getRows() + tile_size - 1) / tile_size) * tile_size,
|
||||
((result.getCols() + tile_size - 1) / tile_size) * tile_size);
|
||||
|
||||
kernels[Method::MULT].setArg(0, *a.getBuffer());
|
||||
kernels[Method::MULT].setArg(1, *b.getBuffer());
|
||||
kernels[Method::MULT].setArg(2, *result.getBuffer());
|
||||
kernels[Method::MULT].setArg(3, bias);
|
||||
kernels[Method::MULT].setArg(4, static_cast<int>(type));
|
||||
kernels[Method::MULT].setArg(5, alpha);
|
||||
kernels[Method::MULT].setArg(6, result.getRows());
|
||||
kernels[Method::MULT].setArg(7, result.getCols());
|
||||
kernels[Method::MULT].setArg(8, a.getCols());
|
||||
kernels[Method::MULT].setArg(9, transpose ? 1 : 0);
|
||||
queue.enqueueNDRangeKernel(kernels[Method::MULT], cl::NullRange,
|
||||
global_size, local_size);
|
||||
return result;
|
||||
}
|
||||
Tensor2 mult_small(const Tensor2 &a, const Tensor2 &b, bool transpose = false,
|
||||
float bias = 0.0f, Activation type = Activation::LINEAR,
|
||||
float alpha = 0.01f) {
|
||||
validateMultDimensions(a, b, transpose);
|
||||
Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), false,
|
||||
&queue);
|
||||
kernels[Method::MULT_SMALL].setArg(0, *a.getBuffer());
|
||||
kernels[Method::MULT_SMALL].setArg(1, *b.getBuffer());
|
||||
kernels[Method::MULT_SMALL].setArg(2, *result.getBuffer());
|
||||
kernels[Method::MULT_SMALL].setArg(3, bias);
|
||||
kernels[Method::MULT_SMALL].setArg(4, static_cast<int>(type));
|
||||
kernels[Method::MULT_SMALL].setArg(5, alpha);
|
||||
kernels[Method::MULT_SMALL].setArg(6, result.getRows());
|
||||
kernels[Method::MULT_SMALL].setArg(7, result.getCols());
|
||||
kernels[Method::MULT_SMALL].setArg(8, a.getCols());
|
||||
kernels[Method::MULT_SMALL].setArg(9, transpose ? 1 : 0);
|
||||
queue.enqueueNDRangeKernel(kernels[Method::MULT_SMALL], cl::NullRange,
|
||||
cl::NDRange(result.getRows(), result.getCols()));
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
Tensor2 mult(const Tensor2 &a, const Tensor2 &b, bool transpose = false,
|
||||
float bias = 0.0f, Activation type = Activation::LINEAR,
|
||||
float alpha = 0.01f) override {
|
||||
if (a.getRows() > 64 || a.getCols() > 64 || b.getRows() > 64 ||
|
||||
b.getCols() > 64)
|
||||
return mult_tiled(a, b, transpose, bias, type, alpha);
|
||||
else
|
||||
return mult_small(a, b, transpose, bias, type, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
class Tensor3Math : public TensorMath<Tensor3>, public ITensor3Math<Tensor3> {};
|
||||
|
||||
typedef Tensor0Math ScalarMath;
|
||||
typedef Tensor1Math VectorMath;
|
||||
typedef Tensor2Math MatrixMath;
|
||||
|
||||
} // namespace GPU
|
||||
1
src/math/tensor/gpu/tensor.cpp
Normal file
1
src/math/tensor/gpu/tensor.cpp
Normal file
@@ -0,0 +1 @@
|
||||
#include "tensor.hpp"
|
||||
282
src/math/tensor/gpu/tensor.hpp
Normal file
282
src/math/tensor/gpu/tensor.hpp
Normal file
@@ -0,0 +1,282 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../opencl/opencl.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "../tensor.hpp"
|
||||
#include "math.hpp"
|
||||
|
||||
extern std::mt19937 gen;
|
||||
|
||||
namespace GPU {
|
||||
class Tensor;
|
||||
class Tensor0;
|
||||
class Tensor1;
|
||||
class Tensor2;
|
||||
class Tensor3;
|
||||
|
||||
class Tensor : public ITensor {
|
||||
protected:
|
||||
cl::Buffer *buffer = nullptr;
|
||||
|
||||
size_t getShapeSize(const std::vector<int> &shape) {
|
||||
size_t size = 1;
|
||||
for (int dim : shape)
|
||||
size *= dim;
|
||||
return size;
|
||||
}
|
||||
void fillBuf(const std::vector<float> &v,
|
||||
const cl::CommandQueue *queue = nullptr) {
|
||||
if (buffer != nullptr)
|
||||
throw std::runtime_error("Tensor buffer already exists");
|
||||
buffer = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||
v.size() * sizeof(float));
|
||||
cl::CommandQueue q = queue == nullptr ? openCL.getDefaultQueue() : *queue;
|
||||
q.enqueueWriteBuffer(*buffer, CL_TRUE, 0, v.size() * sizeof(float),
|
||||
v.data());
|
||||
q.finish();
|
||||
}
|
||||
void createBuf(size_t size, const cl::CommandQueue *queue = nullptr) {
|
||||
std::vector<float> v(size);
|
||||
std::generate(v.begin(), v.end(),
|
||||
[]() { return std::generate_canonical<float, 10>(gen); });
|
||||
fillBuf(v, queue);
|
||||
}
|
||||
void createBuf(size_t size, float value,
|
||||
const cl::CommandQueue *queue = nullptr) {
|
||||
std::vector<float> v(size);
|
||||
std::fill(v.begin(), v.end(), value);
|
||||
fillBuf(v, queue);
|
||||
}
|
||||
|
||||
public:
|
||||
Tensor(const std::vector<int> &shape, const cl::CommandQueue *queue = nullptr)
|
||||
: ITensor(shape) {
|
||||
createBuf(getShapeSize(shape), queue);
|
||||
}
|
||||
Tensor(const std::vector<int> &shape, float value,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: ITensor(shape) {
|
||||
createBuf(getShapeSize(shape), value, queue);
|
||||
}
|
||||
Tensor(const std::vector<int> &shape, bool fill,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: ITensor(shape) {
|
||||
if (fill)
|
||||
createBuf(getShapeSize(shape), 0.0f, queue);
|
||||
}
|
||||
Tensor(const Tensor &) = delete;
|
||||
Tensor &operator=(const Tensor &) = delete;
|
||||
Tensor(Tensor &&other) : ITensor(other.shape), buffer(other.buffer) {
|
||||
other.buffer = nullptr;
|
||||
};
|
||||
Tensor &operator=(Tensor &&other) = delete;
|
||||
|
||||
std::vector<float> toVector(const cl::CommandQueue *queue = nullptr) {
|
||||
size_t size = getShapeSize(shape);
|
||||
std::vector<float> result(size);
|
||||
cl::CommandQueue q = queue == nullptr ? openCL.getDefaultQueue() : *queue;
|
||||
q.enqueueReadBuffer(*buffer, CL_TRUE, 0, size * sizeof(float),
|
||||
result.data());
|
||||
q.finish();
|
||||
return result;
|
||||
}
|
||||
|
||||
const cl::Buffer *getBuffer() const { return buffer; }
|
||||
|
||||
static Tensor0 *asScalar(Tensor *tensor) {
|
||||
return tensor->getType() == Type::SCALAR
|
||||
? reinterpret_cast<Tensor0 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static const Tensor0 *asScalar(const Tensor *tensor) {
|
||||
return tensor->getType() == Type::SCALAR
|
||||
? reinterpret_cast<const Tensor0 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static Tensor1 *asVector(Tensor *tensor) {
|
||||
return tensor->getType() == Type::VECTOR
|
||||
? reinterpret_cast<Tensor1 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static const Tensor1 *asVector(const Tensor *tensor) {
|
||||
return tensor->getType() == Type::VECTOR
|
||||
? reinterpret_cast<const Tensor1 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static Tensor2 *asMatrix(Tensor *tensor) {
|
||||
return tensor->getType() == Type::MATRIX
|
||||
? reinterpret_cast<Tensor2 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static const Tensor2 *asMatrix(const Tensor *tensor) {
|
||||
return tensor->getType() == Type::MATRIX
|
||||
? reinterpret_cast<const Tensor2 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static Tensor3 *asTensor3(Tensor *tensor) {
|
||||
return tensor->getType() == Type::TENSOR3
|
||||
? reinterpret_cast<Tensor3 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
static const Tensor3 *asTensor3(const Tensor *tensor) {
|
||||
return tensor->getType() == Type::TENSOR3
|
||||
? reinterpret_cast<const Tensor3 *>(tensor)
|
||||
: nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
class Tensor0 : public Tensor, public ITensor0 {
|
||||
public:
|
||||
Tensor0(const std::vector<int> &shape,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor(shape, queue) {
|
||||
if (shape.size() != 0)
|
||||
throw std::invalid_argument("Tensor0 dimension must be 0");
|
||||
}
|
||||
Tensor0(const std::vector<int> &shape, float value,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor(shape, value, queue) {
|
||||
if (shape.size() != 0)
|
||||
throw std::invalid_argument("Tensor0 dimension must be 0");
|
||||
}
|
||||
Tensor0(const cl::CommandQueue *queue = nullptr) : Tensor({}, queue) {
|
||||
createBuf(1, queue);
|
||||
}
|
||||
Tensor0(float value, const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({}, queue) {
|
||||
createBuf(1, value, queue);
|
||||
}
|
||||
Tensor0(const Tensor0 &) = delete;
|
||||
Tensor0 &operator=(const Tensor0 &) = delete;
|
||||
Tensor0(Tensor0 &&other) : Tensor(std::move(other)) {};
|
||||
Tensor0 &operator=(Tensor0 &&other) = delete;
|
||||
};
|
||||
|
||||
class Tensor1 : public Tensor, public ITensor1 {
|
||||
public:
|
||||
Tensor1(const std::vector<int> &shape,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor(shape, queue) {
|
||||
if (shape.size() != 1)
|
||||
throw std::invalid_argument("Tensor1 dimension must be 1");
|
||||
}
|
||||
Tensor1(const std::vector<int> &shape, float value,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor(shape, value, queue) {
|
||||
if (shape.size() != 1)
|
||||
throw std::invalid_argument("Tensor1 dimension must be 1");
|
||||
}
|
||||
Tensor1(int size, const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({size}, queue) {}
|
||||
Tensor1(int size, float value, const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({size}, value, queue) {}
|
||||
Tensor1(const std::vector<float> &values,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({(int)values.size()}, false, queue) {
|
||||
fillBuf(values, queue);
|
||||
}
|
||||
Tensor1(const Tensor1 &) = delete;
|
||||
Tensor1 &operator=(const Tensor1 &) = delete;
|
||||
Tensor1(Tensor1 &&other) : Tensor(std::move(other)) {}
|
||||
Tensor1 &operator=(Tensor1 &&other) = delete;
|
||||
|
||||
int getSize() const override { return shape[0]; }
|
||||
};
|
||||
|
||||
class Tensor2 : public ITensor2, public Tensor {
|
||||
public:
|
||||
Tensor2(const std::vector<int> &shape,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor(shape, queue) {
|
||||
if (shape.size() != 2)
|
||||
throw std::invalid_argument("Tensor2 dimension must be 2");
|
||||
}
|
||||
Tensor2(const std::vector<int> &shape, float value,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor(shape, value, queue) {
|
||||
if (shape.size() != 2)
|
||||
throw std::invalid_argument("Tensor2 dimension must be 2");
|
||||
}
|
||||
Tensor2(int rows, int cols, const cl::CommandQueue *queue = nullptr)
|
||||
: ITensor2(), Tensor({rows, cols}, queue) {}
|
||||
Tensor2(int rows, int cols, float value,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: ITensor2(), Tensor({rows, cols}, value, queue) {}
|
||||
Tensor2(int rows, int cols, const std::vector<float> &values,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({rows, cols}, false, queue) {
|
||||
fillBuf(values, queue);
|
||||
}
|
||||
Tensor2(const std::vector<std::vector<float>> &values,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({(int)values.size(), (int)values[0].size()}, false) {
|
||||
std::vector<float> v(values.size() * values[0].size());
|
||||
for (size_t i = 0; i < values.size(); ++i) {
|
||||
for (size_t j = 0; j < values[i].size(); ++j)
|
||||
v[i * values[0].size() + j] = values[i][j];
|
||||
}
|
||||
fillBuf(v, queue);
|
||||
}
|
||||
|
||||
Tensor2(const Tensor2 &) = delete;
|
||||
Tensor2 &operator=(const Tensor2 &) = delete;
|
||||
Tensor2(Tensor2 &&other) : Tensor(std::move(other)) {}
|
||||
Tensor2 &operator=(Tensor2 &&other) = delete;
|
||||
|
||||
int getRows() const override { return shape[0]; }
|
||||
int getCols() const override { return shape[1]; }
|
||||
};
|
||||
|
||||
class Tensor3 : public Tensor, public ITensor3 {
|
||||
public:
|
||||
Tensor3(const std::vector<int> &shape,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor(shape, queue) {
|
||||
if (shape.size() != 3)
|
||||
throw std::invalid_argument("Tensor3 dimension must be 3");
|
||||
}
|
||||
Tensor3(const std::vector<int> &shape, float value,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor(shape, value, queue) {
|
||||
if (shape.size() != 3)
|
||||
throw std::invalid_argument("Tensor3 dimension must be 3");
|
||||
}
|
||||
Tensor3(int d1, int d2, int d3, const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({d1, d2, d3}, queue) {}
|
||||
Tensor3(int d1, int d2, int d3, float value,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({d1, d2, d3}, value, queue) {}
|
||||
Tensor3(int d1, int d2, int d3, const std::vector<float> &values,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({d1, d2, d3}, false, queue) {
|
||||
fillBuf(values, queue);
|
||||
}
|
||||
Tensor3(const std::vector<std::vector<std::vector<float>>> &values,
|
||||
const cl::CommandQueue *queue = nullptr)
|
||||
: Tensor({(int)values.size(), (int)values[0].size(),
|
||||
(int)values[0][0].size()},
|
||||
false, queue) {
|
||||
std::vector<float> v(shape[0] * shape[1] * shape[2]);
|
||||
for (int i = 0; i < shape[0]; ++i) {
|
||||
for (int j = 0; j < shape[1]; ++j)
|
||||
for (int k = 0; k < shape[2]; ++k)
|
||||
v[i * shape[1] * shape[2] + j * shape[1] + k] = values[i][j][k];
|
||||
}
|
||||
fillBuf(v, queue);
|
||||
}
|
||||
Tensor3(const Tensor3 &) = delete;
|
||||
Tensor3 &operator=(const Tensor3 &) = delete;
|
||||
Tensor3(Tensor3 &&other) : Tensor(std::move(other)) {}
|
||||
Tensor3 &operator=(Tensor3 &&other) = delete;
|
||||
};
|
||||
|
||||
typedef Tensor0 Scalar;
|
||||
typedef Tensor1 Vector;
|
||||
typedef Tensor2 Matrix;
|
||||
|
||||
} // namespace GPU
|
||||
58
src/math/tensor/math.hpp
Normal file
58
src/math/tensor/math.hpp
Normal file
@@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
|
||||
#include "tensor.hpp"
|
||||
|
||||
enum class Activation { LINEAR, SIGMOID, TANH, RELU, LEAKY_RELU, ELU, GELU };
|
||||
|
||||
template <typename T>
|
||||
concept ITensorType = std::is_base_of_v<ITensor, T>;
|
||||
|
||||
template <typename T>
|
||||
concept ITensor0Type = std::is_base_of_v<ITensor0, T>;
|
||||
template <typename T>
|
||||
concept ITensor1Type = std::is_base_of_v<ITensor1, T>;
|
||||
template <typename T>
|
||||
concept ITensor2Type = std::is_base_of_v<ITensor2, T>;
|
||||
template <typename T>
|
||||
concept ITensor3Type = std::is_base_of_v<ITensor3, T>;
|
||||
|
||||
template <ITensorType T> class ITensorMath {
|
||||
protected:
|
||||
void validateSameDimensions(const T &a, const T &b) const {
|
||||
if (a.getDim() != b.getDim())
|
||||
throw std::invalid_argument("Tensors must have the same dimension");
|
||||
if (a.getSize() != b.getSize())
|
||||
throw std::invalid_argument("Tensors must have the same size");
|
||||
for (int i = 0; i < a.getDim(); ++i) {
|
||||
if (a.getShape()[i] != b.getShape()[i])
|
||||
throw std::invalid_argument("Tensors must have the same shape");
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
virtual T activate(const T &m, Activation type, float alpha) = 0;
|
||||
|
||||
virtual T mult(const T &m, float x) = 0;
|
||||
virtual T add(const T &a, const T &b, float x) = 0;
|
||||
virtual T add(const T &m, float x) = 0;
|
||||
};
|
||||
|
||||
template <ITensor0Type T> class ITensor0Math {};
|
||||
|
||||
template <ITensor1Type T> class ITensor1Math {};
|
||||
|
||||
template <ITensor2Type T> class ITensor2Math {
|
||||
public:
|
||||
virtual T mult(const T &a, const T &b, bool transpose, float bias,
|
||||
Activation type, float alpha) = 0;
|
||||
|
||||
void validateMultDimensions(const T &a, const T &b, bool transpose) const {
|
||||
if ((!transpose && a.getCols() != b.getRows()) ||
|
||||
(transpose && a.getCols() != b.getCols())) {
|
||||
throw std::invalid_argument(
|
||||
"Invalid matrix dimensions for multiplication");
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template <ITensor3Type T> class ITensor3Math {};
|
||||
67
src/math/tensor/tensor.hpp
Normal file
67
src/math/tensor/tensor.hpp
Normal file
@@ -0,0 +1,67 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
|
||||
class ITensor {
|
||||
protected:
|
||||
std::vector<int> shape;
|
||||
|
||||
void validateDimensions(const std::vector<int> &shape) const {
|
||||
if (shape.empty())
|
||||
throw std::invalid_argument("Tensor shape cannot be empty");
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
if (shape[i] <= 0)
|
||||
throw std::invalid_argument(
|
||||
"All tensor dimensions must be positive, but dimension " +
|
||||
std::to_string(i) + " is " + std::to_string(shape[i]));
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
ITensor(const std::vector<int> &shape) : shape(shape) {}
|
||||
ITensor(const ITensor &) = default;
|
||||
ITensor &operator=(const ITensor &) = default;
|
||||
ITensor(ITensor &&other) = default;
|
||||
ITensor &operator=(ITensor &&other) = default;
|
||||
|
||||
const std::vector<int> &getShape() const { return shape; }
|
||||
int getDim() const { return static_cast<int>(shape.size()); }
|
||||
size_t getSize() const {
|
||||
size_t size = 1;
|
||||
for (int dim : shape)
|
||||
size *= dim;
|
||||
return size;
|
||||
};
|
||||
|
||||
enum class Type { SCALAR, VECTOR, MATRIX, TENSOR3 };
|
||||
Type getType() const { return static_cast<Type>(shape.size()); };
|
||||
};
|
||||
|
||||
class ITensor0 {};
|
||||
|
||||
class ITensor1 {
|
||||
public:
|
||||
virtual int getSize() const = 0;
|
||||
};
|
||||
|
||||
class ITensor2 {
|
||||
public:
|
||||
ITensor2() = default;
|
||||
ITensor2(const ITensor2 &) = default;
|
||||
ITensor2 &operator=(const ITensor2 &) = default;
|
||||
ITensor2(ITensor2 &&other) = default;
|
||||
ITensor2 &operator=(ITensor2 &&other) = default;
|
||||
|
||||
virtual int getRows() const = 0;
|
||||
virtual int getCols() const = 0;
|
||||
};
|
||||
|
||||
class ITensor3 {};
|
||||
|
||||
typedef ITensor0 IScalar;
|
||||
typedef ITensor1 IVector;
|
||||
typedef ITensor2 IMatrix;
|
||||
Reference in New Issue
Block a user