mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-04 04:40:40 +04:00
First NN forward
This commit is contained in:
14
src/Makefile
14
src/Makefile
@@ -2,13 +2,18 @@ CXX = g++
|
|||||||
CXXFLAGS = -Wall -Wextra -O2 -std=c++23
|
CXXFLAGS = -Wall -Wextra -O2 -std=c++23
|
||||||
LIBS = -lOpenCL
|
LIBS = -lOpenCL
|
||||||
TARGET = main
|
TARGET = main
|
||||||
SRC = main.cpp ./math/opencl/opencl.cpp ./math/matrix/cpu/matrix.cpp ./math/matrix/cpu/mutable_matrix.cpp ./math/matrix/gpu/matrix.cpp ./math/matrix/gpu/mutable_matrix.cpp
|
COMMON_SRC = ./math/opencl/opencl.cpp ./math/matrix/cpu/matrix.cpp ./math/matrix/cpu/mutable_matrix.cpp ./math/matrix/gpu/matrix.cpp ./math/matrix/gpu/mutable_matrix.cpp
|
||||||
|
MAIN_SRC = main.cpp $(COMMON_SRC)
|
||||||
|
BENCHMARK_SRC = benchmark.cpp $(COMMON_SRC)
|
||||||
|
|
||||||
INCLUDES = -I"A:/Programs/OpenCL/include"
|
INCLUDES = -I"A:/Programs/OpenCL/include"
|
||||||
LIB_PATH = -L"A:/Programs/OpenCL/lib"
|
LIB_PATH = -L"A:/Programs/OpenCL/lib"
|
||||||
|
|
||||||
$(TARGET): $(SRC)
|
$(TARGET): $(MAIN_SRC)
|
||||||
$(CXX) $(CXXFLAGS) $(INCLUDES) $(LIB_PATH) -o $(TARGET) $(SRC) $(LIBS)
|
$(CXX) $(CXXFLAGS) $(INCLUDES) $(LIB_PATH) -o $(TARGET) $(MAIN_SRC) $(LIBS)
|
||||||
|
|
||||||
|
benchmark: $(BENCHMARK_SRC)
|
||||||
|
$(CXX) $(CXXFLAGS) $(INCLUDES) $(LIB_PATH) -o $(TARGET) $(BENCHMARK_SRC) $(LIBS)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f $(TARGET)
|
rm -f $(TARGET)
|
||||||
@@ -16,4 +21,7 @@ clean:
|
|||||||
run: $(TARGET)
|
run: $(TARGET)
|
||||||
./$(TARGET)
|
./$(TARGET)
|
||||||
|
|
||||||
|
run_benchmark: benchmark
|
||||||
|
./$(TARGET)
|
||||||
|
|
||||||
.PHONY: clean run
|
.PHONY: clean run
|
||||||
136
src/benchmark.cpp
Normal file
136
src/benchmark.cpp
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
#include <chrono>
|
||||||
|
#include <iostream>
|
||||||
|
#include <random>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "./math/math.hpp"
|
||||||
|
|
||||||
|
typedef Matrices::CPU Matrix;
|
||||||
|
typedef MutableMatrices::CPU MutableMatrix;
|
||||||
|
|
||||||
|
OpenCL openCL;
|
||||||
|
|
||||||
|
std::vector<float> generateRandomMatrix(int rows, int cols) {
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen(rd());
|
||||||
|
std::uniform_real_distribution<float> dis(-1.0f, 1.0f);
|
||||||
|
|
||||||
|
std::vector<float> matrix(rows * cols);
|
||||||
|
for (int i = 0; i < rows * cols; ++i) {
|
||||||
|
matrix[i] = dis(gen);
|
||||||
|
}
|
||||||
|
return matrix;
|
||||||
|
}
|
||||||
|
std::vector<float> generateIdentityMatrix(int size) {
|
||||||
|
std::vector<float> matrix(size * size, 0.0f);
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
matrix[i * size + i] = 1.0f;
|
||||||
|
}
|
||||||
|
return matrix;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
const int SIZE = 1024;
|
||||||
|
|
||||||
|
std::cout << "Testing with " << SIZE << "x" << SIZE << " matrices..."
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
std::vector<float> matrixA = generateRandomMatrix(SIZE, SIZE);
|
||||||
|
std::vector<float> matrixB = generateRandomMatrix(SIZE, SIZE);
|
||||||
|
std::vector<float> matrixC = generateRandomMatrix(SIZE, SIZE);
|
||||||
|
|
||||||
|
// std::vector<float> matrixA = generateIdentityMatrix(SIZE);
|
||||||
|
// std::vector<float> matrixB = generateIdentityMatrix(SIZE);
|
||||||
|
// std::vector<float> matrixC = generateIdentityMatrix(SIZE);
|
||||||
|
|
||||||
|
// Тестирование на CPU
|
||||||
|
{
|
||||||
|
std::cout << "\n=== CPU Version ===" << std::endl;
|
||||||
|
|
||||||
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
MutableMatrices::CPU a(SIZE, SIZE, matrixA);
|
||||||
|
Matrices::CPU b(SIZE, SIZE, matrixB);
|
||||||
|
Matrices::CPU c(SIZE, SIZE, matrixC);
|
||||||
|
|
||||||
|
auto gen_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto op_start = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
a.mult(b, 0.2f, MutableMatrices::CPU::Activate::SIGMOID);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto op_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
std::vector<float> v = a.toVector();
|
||||||
|
|
||||||
|
auto total_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto gen_duration =
|
||||||
|
std::chrono::duration_cast<std::chrono::milliseconds>(gen_end - start);
|
||||||
|
auto op_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||||
|
op_end - op_start);
|
||||||
|
auto total_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||||
|
total_end - start);
|
||||||
|
|
||||||
|
std::cout << "Matrix generation time: " << gen_duration.count() << " ms"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "Operations time: " << op_duration.count() << " ms"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "Total time: " << total_duration.count() << " ms" << std::endl;
|
||||||
|
|
||||||
|
std::cout << "First few elements: ";
|
||||||
|
for (size_t i = 0; i < 5 && i < v.size(); ++i) {
|
||||||
|
std::cout << v[i] << " ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Тестирование на GPU
|
||||||
|
{
|
||||||
|
std::cout << "\n=== GPU Version ===" << std::endl;
|
||||||
|
|
||||||
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
MutableMatrices::GPU a(SIZE, SIZE, matrixA);
|
||||||
|
Matrices::GPU b(SIZE, SIZE, matrixB);
|
||||||
|
Matrices::GPU c(SIZE, SIZE, matrixC);
|
||||||
|
|
||||||
|
auto gen_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto op_start = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
a.mult(b, 0.2f, MutableMatrices::GPU::Activate::SIGMOID, 0.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto op_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
std::vector<float> v = a.toVector();
|
||||||
|
|
||||||
|
auto total_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto gen_duration =
|
||||||
|
std::chrono::duration_cast<std::chrono::milliseconds>(gen_end - start);
|
||||||
|
auto op_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||||
|
op_end - op_start);
|
||||||
|
auto total_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||||
|
total_end - start);
|
||||||
|
|
||||||
|
std::cout << "Matrix generation time: " << gen_duration.count() << " ms"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "Operations time: " << op_duration.count() << " ms"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "Total time: " << total_duration.count() << " ms" << std::endl;
|
||||||
|
|
||||||
|
std::cout << "First few elements: ";
|
||||||
|
for (size_t i = 0; i < 5 && i < v.size(); ++i) {
|
||||||
|
std::cout << v[i] << " ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
179
src/main.cpp
179
src/main.cpp
@@ -1,133 +1,72 @@
|
|||||||
#include <chrono>
|
|
||||||
#include <iostream>
|
|
||||||
#include <random>
|
|
||||||
#include <stdexcept>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "./math/math.hpp"
|
#include "./math/math.hpp"
|
||||||
|
|
||||||
typedef Matrices::CPU Matrix;
|
#include <chrono>
|
||||||
typedef MutableMatrices::CPU MutableMatrix;
|
#include <thread>
|
||||||
|
|
||||||
|
typedef Matrices::GPU M;
|
||||||
|
typedef MutableMatrices::GPU MM;
|
||||||
|
|
||||||
|
class Layer {
|
||||||
|
protected:
|
||||||
|
int features;
|
||||||
|
float bias;
|
||||||
|
MM::Activate activate;
|
||||||
|
float alpha;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Layer(int features, MM::Activate activate = MM::Activate::LINEAR,
|
||||||
|
float bias = 0.0f, float alpha = 0.0f)
|
||||||
|
: features(features), activate(activate), bias(bias), alpha(alpha) {}
|
||||||
|
|
||||||
|
int getFeatures() const { return features; }
|
||||||
|
float getBias() const { return bias; }
|
||||||
|
MM::Activate getActivate() const { return activate; }
|
||||||
|
float getAlpha() const { return alpha; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class NeuralNetwork {
|
||||||
|
private:
|
||||||
|
std::vector<Layer> layers;
|
||||||
|
std::vector<MM> weights;
|
||||||
|
|
||||||
|
public:
|
||||||
|
NeuralNetwork(int n, std::initializer_list<Layer> l) : layers(l) {
|
||||||
|
weights.emplace_back(n, layers[0].getFeatures());
|
||||||
|
for (int i = 0; i < layers.size() - 1; i++)
|
||||||
|
weights.emplace_back(layers[i].getFeatures(),
|
||||||
|
layers[i + 1].getFeatures());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<float> predict(std::vector<float> i) {
|
||||||
|
if (i.size() != weights[0].getRows())
|
||||||
|
std::invalid_argument("Invalid input size");
|
||||||
|
MM input(1, (int)i.size(), i);
|
||||||
|
for (size_t i = 0; i < weights.size(); i++)
|
||||||
|
input.mult(weights[i], layers[i + 1].getBias(),
|
||||||
|
layers[i + 1].getActivate(), layers[i + 1].getAlpha());
|
||||||
|
return input.toVector();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
OpenCL openCL;
|
OpenCL openCL;
|
||||||
|
|
||||||
std::vector<float> generateRandomMatrix(int rows, int cols) {
|
|
||||||
std::random_device rd;
|
|
||||||
std::mt19937 gen(rd());
|
|
||||||
std::uniform_real_distribution<float> dis(-1.0f, 1.0f);
|
|
||||||
|
|
||||||
std::vector<float> matrix(rows * cols);
|
|
||||||
for (int i = 0; i < rows * cols; ++i) {
|
|
||||||
matrix[i] = dis(gen);
|
|
||||||
}
|
|
||||||
return matrix;
|
|
||||||
}
|
|
||||||
std::vector<float> generateIdentityMatrix(int size) {
|
|
||||||
std::vector<float> matrix(size * size, 0.0f);
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
matrix[i * size + i] = 1.0f;
|
|
||||||
}
|
|
||||||
return matrix;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
const int SIZE = 1024;
|
NeuralNetwork nn(
|
||||||
|
2, {Layer(3, MM::Activate::RELU), Layer(1, MM::Activate::RELU)});
|
||||||
|
|
||||||
std::cout << "Testing with " << SIZE << "x" << SIZE << " matrices..."
|
for (int i = 0; i < 10; i++) {
|
||||||
<< std::endl;
|
int v1 = (i / 2) % 2;
|
||||||
|
int v2 = i % 2;
|
||||||
|
|
||||||
std::vector<float> matrixA = generateRandomMatrix(SIZE, SIZE);
|
std::vector<float> v = {static_cast<float>(v1), static_cast<float>(v2)};
|
||||||
std::vector<float> matrixB = generateRandomMatrix(SIZE, SIZE);
|
|
||||||
std::vector<float> matrixC = generateRandomMatrix(SIZE, SIZE);
|
|
||||||
|
|
||||||
// std::vector<float> matrixA = generateIdentityMatrix(SIZE);
|
std::vector<float> r = nn.predict(v);
|
||||||
// std::vector<float> matrixB = generateIdentityMatrix(SIZE);
|
float expected = static_cast<float>(v1 ^ v2);
|
||||||
// std::vector<float> matrixC = generateIdentityMatrix(SIZE);
|
|
||||||
|
|
||||||
// Тестирование на CPU
|
std::cout << "XOR(" << v1 << ", " << v2 << ") = " << expected;
|
||||||
{
|
std::cout << " | Network: ";
|
||||||
std::cout << "\n=== CPU Version ===" << std::endl;
|
for (size_t j = 0; j < r.size(); ++j) {
|
||||||
|
std::cout << r[j] << " ";
|
||||||
auto start = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
MutableMatrices::CPU a(SIZE, SIZE, matrixA);
|
|
||||||
Matrices::CPU b(SIZE, SIZE, matrixB);
|
|
||||||
Matrices::CPU c(SIZE, SIZE, matrixC);
|
|
||||||
|
|
||||||
auto gen_end = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
auto op_start = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
a.mult(b, 0.2f, MutableMatrices::CPU::Activate::SIGMOID);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto op_end = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
std::vector<float> v = a.toVector();
|
|
||||||
|
|
||||||
auto total_end = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
auto gen_duration =
|
|
||||||
std::chrono::duration_cast<std::chrono::milliseconds>(gen_end - start);
|
|
||||||
auto op_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
||||||
op_end - op_start);
|
|
||||||
auto total_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
||||||
total_end - start);
|
|
||||||
|
|
||||||
std::cout << "Matrix generation time: " << gen_duration.count() << " ms"
|
|
||||||
<< std::endl;
|
|
||||||
std::cout << "Operations time: " << op_duration.count() << " ms"
|
|
||||||
<< std::endl;
|
|
||||||
std::cout << "Total time: " << total_duration.count() << " ms" << std::endl;
|
|
||||||
|
|
||||||
std::cout << "First few elements: ";
|
|
||||||
for (size_t i = 0; i < 5 && i < v.size(); ++i) {
|
|
||||||
std::cout << v[i] << " ";
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Тестирование на GPU
|
|
||||||
{
|
|
||||||
std::cout << "\n=== GPU Version ===" << std::endl;
|
|
||||||
|
|
||||||
auto start = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
MutableMatrices::GPU a(SIZE, SIZE, matrixA);
|
|
||||||
Matrices::GPU b(SIZE, SIZE, matrixB);
|
|
||||||
Matrices::GPU c(SIZE, SIZE, matrixC);
|
|
||||||
|
|
||||||
auto gen_end = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
auto op_start = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
a.mult(b, 0.2f, MutableMatrices::GPU::Activate::SIGMOID, 0.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto op_end = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
std::vector<float> v = a.toVector();
|
|
||||||
|
|
||||||
auto total_end = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
auto gen_duration =
|
|
||||||
std::chrono::duration_cast<std::chrono::milliseconds>(gen_end - start);
|
|
||||||
auto op_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
||||||
op_end - op_start);
|
|
||||||
auto total_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
||||||
total_end - start);
|
|
||||||
|
|
||||||
std::cout << "Matrix generation time: " << gen_duration.count() << " ms"
|
|
||||||
<< std::endl;
|
|
||||||
std::cout << "Operations time: " << op_duration.count() << " ms"
|
|
||||||
<< std::endl;
|
|
||||||
std::cout << "Total time: " << total_duration.count() << " ms" << std::endl;
|
|
||||||
|
|
||||||
std::cout << "First few elements: ";
|
|
||||||
for (size_t i = 0; i < 5 && i < v.size(); ++i) {
|
|
||||||
std::cout << v[i] << " ";
|
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,15 +1,35 @@
|
|||||||
|
#include <random>
|
||||||
|
|
||||||
#include "matrix.hpp"
|
#include "matrix.hpp"
|
||||||
|
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen(rd());
|
||||||
|
|
||||||
|
Matrices::GPU::GPU(int rows, int cols)
|
||||||
|
: IMatrix(rows, cols), queue(openCL.getContext(), openCL.getDevice()) {
|
||||||
|
validateDimensions(rows, cols);
|
||||||
|
std::vector<float> matrix;
|
||||||
|
matrix.reserve(rows * cols);
|
||||||
|
for (size_t i = 0; i < (size_t)rows * (size_t)cols; ++i)
|
||||||
|
matrix.push_back(std::generate_canonical<float, 32>(gen));
|
||||||
|
buffer = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||||
|
rows * cols * sizeof(float));
|
||||||
|
queue.enqueueWriteBuffer(*buffer, CL_TRUE, 0, rows * cols * sizeof(float),
|
||||||
|
matrix.data());
|
||||||
|
queue.finish();
|
||||||
|
}
|
||||||
|
|
||||||
Matrices::GPU::GPU(int rows, int cols, const std::vector<float> &matrix)
|
Matrices::GPU::GPU(int rows, int cols, const std::vector<float> &matrix)
|
||||||
: IMatrix(rows, cols), queue(openCL.getContext(), openCL.getDevice()) {
|
: IMatrix(rows, cols), queue(openCL.getContext(), openCL.getDevice()) {
|
||||||
validateDimensions(rows, cols);
|
validateDimensions(rows, cols);
|
||||||
if (matrix.size() != static_cast<size_t>(rows * cols)) {
|
if (matrix.size() != static_cast<size_t>(rows * cols)) {
|
||||||
throw std::invalid_argument("Matrix data size doesn't match dimensions");
|
throw std::invalid_argument("Matrix data size doesn't match dimensions");
|
||||||
}
|
}
|
||||||
|
buffer = new cl::Buffer(openCL.getContext(), CL_MEM_READ_WRITE,
|
||||||
buffer = new cl::Buffer(
|
rows * cols * sizeof(float));
|
||||||
openCL.getContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
queue.enqueueWriteBuffer(*buffer, CL_TRUE, 0, rows * cols * sizeof(float),
|
||||||
rows * cols * sizeof(float), const_cast<float *>(matrix.data()));
|
matrix.data());
|
||||||
|
queue.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<float> Matrices::GPU::toVector() const {
|
const std::vector<float> Matrices::GPU::toVector() const {
|
||||||
|
|||||||
@@ -11,12 +11,19 @@ protected:
|
|||||||
cl::CommandQueue queue;
|
cl::CommandQueue queue;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
GPU(int rows, int cols);
|
||||||
GPU(int rows, int cols, const std::vector<float> &matrix);
|
GPU(int rows, int cols, const std::vector<float> &matrix);
|
||||||
~GPU() { delete buffer; }
|
~GPU() { delete buffer; }
|
||||||
|
|
||||||
GPU(const GPU &) = delete;
|
GPU(const GPU &) = delete;
|
||||||
GPU &operator=(const GPU &) = delete;
|
GPU &operator=(const GPU &) = delete;
|
||||||
GPU(GPU &&other) = default;
|
GPU(GPU &&other)
|
||||||
|
: IMatrix(other.rows, other.cols), buffer(other.buffer),
|
||||||
|
queue(std::move(other.queue)) {
|
||||||
|
other.buffer = nullptr;
|
||||||
|
other.rows = 0;
|
||||||
|
other.cols = 0;
|
||||||
|
}
|
||||||
GPU &operator=(GPU &&other) = default;
|
GPU &operator=(GPU &&other) = default;
|
||||||
|
|
||||||
int getRows() const override { return rows; }
|
int getRows() const override { return rows; }
|
||||||
|
|||||||
@@ -1,5 +1,12 @@
|
|||||||
#include "mutable_matrix.hpp"
|
#include "mutable_matrix.hpp"
|
||||||
|
|
||||||
|
MutableMatrices::GPU::GPU(int rows, int cols) : Matrices::GPU(rows, cols) {
|
||||||
|
for (const auto &entry : kernelsNames) {
|
||||||
|
kernels[entry.first] =
|
||||||
|
cl::Kernel(openCL.getProgram(OpenCL::Program::MATRIX), entry.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
MutableMatrices::GPU::GPU(int rows, int cols, const std::vector<float> &matrix)
|
MutableMatrices::GPU::GPU(int rows, int cols, const std::vector<float> &matrix)
|
||||||
: Matrices::GPU(rows, cols, matrix) {
|
: Matrices::GPU(rows, cols, matrix) {
|
||||||
for (const auto &entry : kernelsNames) {
|
for (const auto &entry : kernelsNames) {
|
||||||
|
|||||||
@@ -27,8 +27,14 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
GPU(int rows, int cols);
|
||||||
GPU(int rows, int cols, const std::vector<float> &matrix);
|
GPU(int rows, int cols, const std::vector<float> &matrix);
|
||||||
|
|
||||||
|
GPU(const GPU &) = delete;
|
||||||
|
GPU &operator=(const GPU &) = delete;
|
||||||
|
GPU(GPU &&other) = default;
|
||||||
|
GPU &operator=(GPU &&other) = default;
|
||||||
|
|
||||||
void mult(Matrices::GPU &m, float bias = 0.0f,
|
void mult(Matrices::GPU &m, float bias = 0.0f,
|
||||||
Activate type = Activate::LINEAR, float alpha = 0.01f);
|
Activate type = Activate::LINEAR, float alpha = 0.01f);
|
||||||
void mult(float scalar);
|
void mult(float scalar);
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <random>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ public:
|
|||||||
virtual void activate(Activate type, float alpha = 0.01f) = 0;
|
virtual void activate(Activate type, float alpha = 0.01f) = 0;
|
||||||
|
|
||||||
void validateMultDimensions(T &a, T &b) const {
|
void validateMultDimensions(T &a, T &b) const {
|
||||||
if (a.getRows() != b.getCols()) {
|
if (a.getCols() != b.getRows()) {
|
||||||
throw std::invalid_argument(
|
throw std::invalid_argument(
|
||||||
"Invalid matrix dimensions for multiplication");
|
"Invalid matrix dimensions for multiplication");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
class OpenCL {
|
class OpenCL {
|
||||||
public:
|
public:
|
||||||
enum class Program { MATRIX, MATH, IMAGE_PROCESSING };
|
enum class Program { MATRIX };
|
||||||
|
|
||||||
private:
|
private:
|
||||||
cl::Device device;
|
cl::Device device;
|
||||||
|
|||||||
Reference in New Issue
Block a user