mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-04 04:40:40 +04:00
Complete first GPU tensors
This commit is contained in:
@@ -26,13 +26,17 @@ PYTHON_INCLUDE = $(shell python -c "import sysconfig; print(sysconfig.get_config
|
|||||||
PYTHON_LIBS = $(PYTHON_PATH)$(SP)libs
|
PYTHON_LIBS = $(PYTHON_PATH)$(SP)libs
|
||||||
PYBIND_INCLUDE = $(shell python -c "import pybind11; print(pybind11.get_include())")
|
PYBIND_INCLUDE = $(shell python -c "import pybind11; print(pybind11.get_include())")
|
||||||
|
|
||||||
|
OPENCL_INCLUDES = -I"A:/Programs/OpenCL/include"
|
||||||
|
OPENCL_LIB_PATH = -L"A:/Programs/OpenCL/lib" -lOpenCL
|
||||||
|
OPENCL_LIB = -L"A:/Programs/OpenCL/lib" -lOpenCL
|
||||||
|
|
||||||
.DEFAULT_GOAL := $(TARGET)
|
.DEFAULT_GOAL := $(TARGET)
|
||||||
|
|
||||||
$(BUILD_DIR):
|
$(BUILD_DIR):
|
||||||
$(MKDIR) $(BUILD_DIR)
|
$(MKDIR) $(BUILD_DIR)
|
||||||
|
|
||||||
$(TARGET): $(COMMON_SRC) main.cpp | $(BUILD_DIR)
|
$(TARGET): $(COMMON_SRC) main.cpp | $(BUILD_DIR)
|
||||||
$(CXX) $(CXXFLAGS) -o $@ $^
|
$(CXX) $(CXXFLAGS) $(OPENCL_INCLUDES) $(OPENCL_LIB_PATH) -o $@ $^ $(OPENCL_LIB)
|
||||||
|
|
||||||
module: $(COMMON_SRC) pybind.cpp | $(BUILD_DIR)
|
module: $(COMMON_SRC) pybind.cpp | $(BUILD_DIR)
|
||||||
$(CXX) $(CXXFLAGS) -shared -fPIC -o tensor.$(SHARED_LIB_EXT) $^ -I"$(PYTHON_INCLUDE)" -L"$(PYTHON_LIBS)" -lpython3.13 -I"$(PYBIND_INCLUDE)"
|
$(CXX) $(CXXFLAGS) -shared -fPIC -o tensor.$(SHARED_LIB_EXT) $^ -I"$(PYTHON_INCLUDE)" -L"$(PYTHON_LIBS)" -lpython3.13 -I"$(PYBIND_INCLUDE)"
|
||||||
|
|||||||
@@ -5,9 +5,16 @@
|
|||||||
|
|
||||||
// TODO: GENERIC KERNELS
|
// TODO: GENERIC KERNELS
|
||||||
// TODO: Scalar mult
|
// TODO: Scalar mult
|
||||||
|
// TODO: TMult >2
|
||||||
|
|
||||||
|
OpenCL openCL;
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
Tensor<float, 2> a = Tensor<float, 2>({2, 4});
|
Tensor<float, 2> a = Tensor<float, 2>({8192, 8192}, 1);
|
||||||
std::cout << a.toString();
|
Tensor<float, 2> b = Tensor<float, 2>({8192, 8192}, 1);
|
||||||
|
auto c = a % b;
|
||||||
|
Tensor<float, 2> d = Tensor<float, 2>(c);
|
||||||
|
d += 1;
|
||||||
|
std::cout << d.toString();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,5 +5,5 @@ __kernel void add(__global float *A, float scalar) {
|
|||||||
|
|
||||||
__kernel void mult(__global float *A, float scalar) {
|
__kernel void mult(__global float *A, float scalar) {
|
||||||
int i = get_global_id(0);
|
int i = get_global_id(0);
|
||||||
B[i] = A[i] * scalar;
|
A[i] *= scalar;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,4 +70,4 @@ public:
|
|||||||
void printDeviceInfo() const;
|
void printDeviceInfo() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
OpenCL openCL;
|
extern OpenCL openCL;
|
||||||
|
|||||||
@@ -4,12 +4,13 @@
|
|||||||
|
|
||||||
#include "../tensor.hpp"
|
#include "../tensor.hpp"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
#include <sstream>
|
||||||
template <typename T, int Dim> class Tensor : public ITensor<T, Dim> {
|
template <typename T, int Dim> class Tensor : public ITensor<T, Dim> {
|
||||||
private:
|
private:
|
||||||
cl::Buffer *data_ = nullptr;
|
cl::Buffer *data_ = nullptr;
|
||||||
cl::Event *event_ = new cl::Event();
|
cl::Event event_ = cl::Event();
|
||||||
|
|
||||||
class AutoEventList {
|
class AutoEventList {
|
||||||
private:
|
private:
|
||||||
@@ -19,7 +20,7 @@ private:
|
|||||||
AutoEventList(std::initializer_list<cl::Event> events) : events_(events) {}
|
AutoEventList(std::initializer_list<cl::Event> events) : events_(events) {}
|
||||||
operator const std::vector<cl::Event> *() const { return &events_; }
|
operator const std::vector<cl::Event> *() const { return &events_; }
|
||||||
};
|
};
|
||||||
template <typename... Events> AutoEventList all(Events &&...events) {
|
template <typename... Events> AutoEventList all(Events &&...events) const {
|
||||||
return AutoEventList{std::forward<Events>(events)...};
|
return AutoEventList{std::forward<Events>(events)...};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,16 +33,15 @@ private:
|
|||||||
|
|
||||||
void fillBuf(const std::vector<T> &data) {
|
void fillBuf(const std::vector<T> &data) {
|
||||||
createBuf(data.size());
|
createBuf(data.size());
|
||||||
// event_ = event?!
|
|
||||||
openCL.getQueue().enqueueWriteBuffer(*data_, CL_FALSE, 0,
|
openCL.getQueue().enqueueWriteBuffer(*data_, CL_FALSE, 0,
|
||||||
data.size() * sizeof(T), data.data(),
|
data.size() * sizeof(T), data.data(),
|
||||||
all(*event_), event_);
|
nullptr, &event_);
|
||||||
}
|
}
|
||||||
void fillBuf(const Tensor &other) {
|
void fillBuf(const Tensor &other) {
|
||||||
createBuf(other.getSize());
|
createBuf(other.getSize());
|
||||||
openCL.getQueue().enqueueCopyBuffer(
|
openCL.getQueue().enqueueCopyBuffer(*other.getData(), *data_, 0, 0,
|
||||||
*other.getData(), *data_, 0, 0, other.getSize() * sizeof(T),
|
other.getSize() * sizeof(T),
|
||||||
all(*event_, *other.getEvent()), event_);
|
all(other.getEvent()), &event_);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -97,7 +97,7 @@ public:
|
|||||||
Tensor(Tensor &&other) noexcept : ITensor(std::move(other)) {
|
Tensor(Tensor &&other) noexcept : ITensor(std::move(other)) {
|
||||||
data_ = other.data_;
|
data_ = other.data_;
|
||||||
event_ = other.event_;
|
event_ = other.event_;
|
||||||
other.data = nullptr;
|
other.data_ = nullptr;
|
||||||
}
|
}
|
||||||
Tensor &operator=(Tensor &&other) noexcept {
|
Tensor &operator=(Tensor &&other) noexcept {
|
||||||
ITensor::operator=(std::move(other));
|
ITensor::operator=(std::move(other));
|
||||||
@@ -112,7 +112,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
const cl::Buffer *getData() const { return data_; }
|
const cl::Buffer *getData() const { return data_; }
|
||||||
const cl::Event *getEvent() const { return event_; }
|
const cl::Event &getEvent() const { return event_; }
|
||||||
|
|
||||||
// T &operator[](size_t i);
|
// T &operator[](size_t i);
|
||||||
// const T &operator[](size_t i) const;
|
// const T &operator[](size_t i) const;
|
||||||
@@ -128,7 +128,7 @@ public:
|
|||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||||
cl::NDRange(getSize()),
|
cl::NDRange(getSize()),
|
||||||
cl::NullRange, all(*event_), event_);
|
cl::NullRange, all(event_), &event_);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -137,7 +137,7 @@ public:
|
|||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||||
cl::NDRange(getSize()),
|
cl::NDRange(getSize()),
|
||||||
cl::NullRange, all(*event_), event_);
|
cl::NullRange, all(event_), &event_);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -147,7 +147,7 @@ public:
|
|||||||
kernel.setArg(1, scalar);
|
kernel.setArg(1, scalar);
|
||||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||||
cl::NDRange(getSize()),
|
cl::NDRange(getSize()),
|
||||||
cl::NullRange, all(*event_), event_);
|
cl::NullRange, all(event_), &event_);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -157,7 +157,7 @@ public:
|
|||||||
kernel.setArg(1, scalar);
|
kernel.setArg(1, scalar);
|
||||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||||
cl::NDRange(getSize()),
|
cl::NDRange(getSize()),
|
||||||
cl::NullRange, all(*event_), event_);
|
cl::NullRange, all(event_), &event_);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -167,7 +167,7 @@ public:
|
|||||||
kernel.setArg(1, *other.getData());
|
kernel.setArg(1, *other.getData());
|
||||||
openCL.getQueue().enqueueNDRangeKernel(
|
openCL.getQueue().enqueueNDRangeKernel(
|
||||||
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
||||||
all(*event_, *other.event_), event_);
|
all(event_, other.event_), &event_);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -177,7 +177,7 @@ public:
|
|||||||
kernel.setArg(1, *other.getData());
|
kernel.setArg(1, *other.getData());
|
||||||
openCL.getQueue().enqueueNDRangeKernel(
|
openCL.getQueue().enqueueNDRangeKernel(
|
||||||
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange,
|
||||||
all(*event_, *other.event_), event_);
|
all(event_, other.event_), &event_);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,9 +191,9 @@ public:
|
|||||||
if (shape_[axes_[1]] != other.shape_[other.axes_[0]])
|
if (shape_[axes_[1]] != other.shape_[other.axes_[0]])
|
||||||
throw std::invalid_argument(
|
throw std::invalid_argument(
|
||||||
"Matrix dimensions must match for multiplication");
|
"Matrix dimensions must match for multiplication");
|
||||||
size_t m = shape_[axes_[0]];
|
int m = (int)shape_[axes_[0]];
|
||||||
size_t k = shape_[axes_[1]];
|
int k = (int)shape_[axes_[1]];
|
||||||
size_t n = other.shape_[other.axes_[1]];
|
int n = (int)other.shape_[other.axes_[1]];
|
||||||
Tensor<T, 2> result({m, n});
|
Tensor<T, 2> result({m, n});
|
||||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_MULT);
|
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_MULT);
|
||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
@@ -207,12 +207,58 @@ public:
|
|||||||
cl::NDRange local_size(TILE_SIZE, TILE_SIZE);
|
cl::NDRange local_size(TILE_SIZE, TILE_SIZE);
|
||||||
openCL.getQueue().enqueueNDRangeKernel(
|
openCL.getQueue().enqueueNDRangeKernel(
|
||||||
kernel, cl::NullRange, global_size, local_size,
|
kernel, cl::NullRange, global_size, local_size,
|
||||||
all(*event_, *other.event_), result.event_);
|
all(event_, other.event_), &result.event_);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string toString() const override;
|
std::string toString() const override {
|
||||||
|
std::vector<float> result(getSize());
|
||||||
|
openCL.getQueue().enqueueReadBuffer(
|
||||||
|
*data_, CL_TRUE, 0, getSize() * sizeof(T), result.data(), all(event_));
|
||||||
|
std::ostringstream oss;
|
||||||
|
if constexpr (Dim == 0) {
|
||||||
|
oss << "Scalar<" << typeid(T).name() << ">: " << result[0];
|
||||||
|
} else if constexpr (Dim == 1) {
|
||||||
|
oss << "Vector<" << typeid(T).name() << ">(" << shape_[0] << "): [";
|
||||||
|
for (size_t i = 0; i < getSize(); ++i) {
|
||||||
|
oss << result[i];
|
||||||
|
if (i < getSize() - 1)
|
||||||
|
oss << ", ";
|
||||||
|
}
|
||||||
|
oss << "]";
|
||||||
|
} else if constexpr (Dim == 2) {
|
||||||
|
oss << "Matrix<" << typeid(T).name() << ">(" << shape_[axes_[0]] << "x"
|
||||||
|
<< shape_[axes_[1]] << "):";
|
||||||
|
for (size_t i = 0; i < shape_[axes_[0]]; ++i) {
|
||||||
|
oss << "\n [";
|
||||||
|
for (size_t j = 0; j < shape_[axes_[1]]; ++j) {
|
||||||
|
oss << result[i * shape_[axes_[0]] + j];
|
||||||
|
if (j < shape_[axes_[1]] - 1)
|
||||||
|
oss << ", ";
|
||||||
|
}
|
||||||
|
oss << "]";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
oss << "Tensor" << Dim << "D<" << typeid(T).name() << ">" << "[";
|
||||||
|
for (size_t i = 0; i < Dim; ++i) {
|
||||||
|
oss << shape_[axes_[i]];
|
||||||
|
if (i < Dim - 1)
|
||||||
|
oss << "x";
|
||||||
|
}
|
||||||
|
oss << "]: [";
|
||||||
|
size_t show = std::min(getSize(), size_t(10));
|
||||||
|
for (size_t i = 0; i < show; ++i) {
|
||||||
|
oss << result[i];
|
||||||
|
if (i < show - 1)
|
||||||
|
oss << ", ";
|
||||||
|
}
|
||||||
|
if (getSize() > 10)
|
||||||
|
oss << ", ...";
|
||||||
|
oss << "]";
|
||||||
|
}
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "tensor.tpp"
|
#include "tensor.tpp"
|
||||||
|
|||||||
Reference in New Issue
Block a user