mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-03 20:30:39 +04:00
Work
This commit is contained in:
156
src/tensor/opencl/kernels.hpp
Normal file
156
src/tensor/opencl/kernels.hpp
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
#include "opencl.hpp"
|
||||||
|
#include <CL/opencl.hpp>
|
||||||
|
#include <format>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
template <typename T, int Dim> class Kernels {
|
||||||
|
public:
|
||||||
|
enum class Vector {
|
||||||
|
type2 = 2,
|
||||||
|
type4 = 4,
|
||||||
|
type8 = 8,
|
||||||
|
type16 = 16,
|
||||||
|
};
|
||||||
|
enum class Method {
|
||||||
|
POSITIVE,
|
||||||
|
NEGATIVE,
|
||||||
|
S_ADD,
|
||||||
|
S_MULT,
|
||||||
|
T_ADD,
|
||||||
|
T_HADAMARD,
|
||||||
|
T_MULT,
|
||||||
|
};
|
||||||
|
constexpr const static std::string type = typeid(T).name();
|
||||||
|
|
||||||
|
// TODO: get native vector size
|
||||||
|
static Vector vector = Vector::type8;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static std::string unaryOperation(std::string name, std::string operation) {
|
||||||
|
return std::format(
|
||||||
|
R"(
|
||||||
|
__kernel void {method}(__global {type}* A, int len) {{
|
||||||
|
int gid = get_global_id(0);
|
||||||
|
int base = gid * {vector};
|
||||||
|
if (base + ({vector}-1) < len) {{
|
||||||
|
{type}{vector} data = vload{vector}(gid, A);
|
||||||
|
vstore{vector}({operation}data, gid, A);
|
||||||
|
}} else {{
|
||||||
|
for (int i = 0; i < {vec_size}; i++) {{
|
||||||
|
int idx = base + i;
|
||||||
|
if (idx < len) A[idx] = {operation}A[idx];
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
)",
|
||||||
|
std::make_format_args(std::make_pair("method", name),
|
||||||
|
std::make_pair("vector", vector),
|
||||||
|
std::make_pair("type", type),
|
||||||
|
std::make_pair("operation", operation)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string scalarOperation(std::string name, std::string operation) {
|
||||||
|
return std::format(
|
||||||
|
R"(
|
||||||
|
__kernel void {method}(__global {type}* A, int len, {type} scalar) {{
|
||||||
|
int gid = get_global_id(0);
|
||||||
|
int base = gid * {vector};
|
||||||
|
if (base + ({vector}-1) < len) {{
|
||||||
|
{type}{vector} data = vload{vector}(gid, A);
|
||||||
|
data = data {operation} scalar;
|
||||||
|
vstore{vector}(data, gid, A);
|
||||||
|
}} else {{
|
||||||
|
for (int i = 0; i < {vec_size}; i++) {{
|
||||||
|
int idx = base + i;
|
||||||
|
if (idx < len) A[idx] = A[idx] {operation} scalar;
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
)",
|
||||||
|
std::make_format_args(std::make_pair("method", name),
|
||||||
|
std::make_pair("vector", vector),
|
||||||
|
std::make_pair("type", type),
|
||||||
|
std::make_pair("operation", operation)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string binaryOperation(std::string name, std::string operation) {
|
||||||
|
return std::format(
|
||||||
|
R"(
|
||||||
|
__kernel void {method}(__global {type}* A, __global {type}* B, int len) {{
|
||||||
|
int gid = get_global_id(0);
|
||||||
|
int base = gid * {vector};
|
||||||
|
if (base + ({vector}-1) < len) {{
|
||||||
|
{type}{vector} dataA = vload{vector}(gid, A);
|
||||||
|
{type}{vector} dataB = vload{vector}(gid, B);
|
||||||
|
vstore{vector}(dataA {operation} dataB, gid, A);
|
||||||
|
}} else {{
|
||||||
|
for (int i = 0; i < {vector}; i++) {{
|
||||||
|
int idx = base + i;
|
||||||
|
if (idx < len) A[idx] = A[idx] {operation} B[idx];
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
)",
|
||||||
|
std::make_format_args(std::make_pair("method", name),
|
||||||
|
std::make_pair("vector", vector),
|
||||||
|
std::make_pair("type", type),
|
||||||
|
std::make_pair("operation", operation)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::unordered_map<Method, std::tuple<std::string, std::string>>
|
||||||
|
programs = {
|
||||||
|
{Method::POSITIVE, {unaryOperation("positive", "+"), "positive"}},
|
||||||
|
{Method::NEGATIVE, {unaryOperation("negative", "-")}, "negative"},
|
||||||
|
|
||||||
|
{Method::S_ADD, {scalarOperation("add", "+")}, "add"},
|
||||||
|
{Method::S_MULT, {scalarOperation("mult", "*")}, "mult"},
|
||||||
|
|
||||||
|
{Method::T_ADD, {binaryOperation("add", "+")}, "add"},
|
||||||
|
{Method::T_HADAMARD,
|
||||||
|
{binaryOperation("hadamard_mult", "*")},
|
||||||
|
"hadamard_mult"},
|
||||||
|
{Method::T_MULT, {"", "mult"}},
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline std::unordered_map<Method, cl::Program> compiledPrograms;
|
||||||
|
static inline std::mutex compileMutex;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static cl::Kernel create(Method method) {
|
||||||
|
std::lock_guard<std::mutex> lock(compileMutex);
|
||||||
|
|
||||||
|
auto cache = compiledPrograms.find(method);
|
||||||
|
if (cache != compiledPrograms.end()) {
|
||||||
|
const auto &programName = std::get<1>(programs[method]);
|
||||||
|
return cl::Kernel(cache->second, programName.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
auto program = programs.find(method);
|
||||||
|
if (program == programs.end())
|
||||||
|
throw std::runtime_error("Unknown method: " +
|
||||||
|
std::to_string(static_cast<int>(method)));
|
||||||
|
const auto &[sourceCode, kernelName] = program->second;
|
||||||
|
|
||||||
|
try {
|
||||||
|
cl::Program::Sources sources;
|
||||||
|
sources.push_back({sourceCode.c_str(), sourceCode.length()});
|
||||||
|
cl::Program program(openCL.getContext(), sources);
|
||||||
|
program.build({openCL.getDevice()});
|
||||||
|
compiledPrograms[method] = program;
|
||||||
|
return cl::Kernel(program, kernelName.c_str());
|
||||||
|
|
||||||
|
} catch (const cl::Error &e) {
|
||||||
|
if (e.err() == CL_BUILD_PROGRAM_FAILURE) {
|
||||||
|
cl::Program program(openCL.getContext(),
|
||||||
|
{sourceCode.c_str(), sourceCode.length()});
|
||||||
|
auto buildInfo =
|
||||||
|
program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(openCL.getDevice());
|
||||||
|
throw std::runtime_error(
|
||||||
|
"OpenCL compilation failed: " + std::string(e.what()) +
|
||||||
|
"\nBuild log:\n" + buildInfo);
|
||||||
|
}
|
||||||
|
throw std::runtime_error("OpenCL error: " + std::string(e.what()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
__kernel void positive(__global float *A) {
|
|
||||||
int i = get_global_id(0);
|
|
||||||
A[i] = +A[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void negative(__global float *A) {
|
|
||||||
int i = get_global_id(0);
|
|
||||||
A[i] = -A[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
float activate_x(float x, const int activation_type, const float alpha) {
|
|
||||||
switch (activation_type) {
|
|
||||||
case 0: // LINEAR
|
|
||||||
return x;
|
|
||||||
case 1: // SIGMOID
|
|
||||||
return 1.0f / (1.0f + exp(-x));
|
|
||||||
case 2: // TANH
|
|
||||||
return tanh(x);
|
|
||||||
case 3: // RELU
|
|
||||||
return fmax(0.0f, x);
|
|
||||||
case 4: // LEAKY_RELU
|
|
||||||
return (x > 0.0f) ? x : alpha * x;
|
|
||||||
case 5: // ELU
|
|
||||||
return (x > 0.0f) ? x : alpha * (exp(x) - 1.0f);
|
|
||||||
default:
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
__kernel void activate(__global float *input, __global float *output,
|
|
||||||
const int activation_type, const float alpha) {
|
|
||||||
int i = get_global_id(0);
|
|
||||||
output[i] = activate_x(input[i], activation_type, alpha);
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
__kernel void add(__global float *A, float scalar) {
|
|
||||||
int i = get_global_id(0);
|
|
||||||
A[i] += scalar;
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void mult(__global float *A, float scalar) {
|
|
||||||
int i = get_global_id(0);
|
|
||||||
A[i] *= scalar;
|
|
||||||
}
|
|
||||||
@@ -1,81 +0,0 @@
|
|||||||
__kernel void add(__global float *A, __global float *B) {
|
|
||||||
int i = get_global_id(0);
|
|
||||||
A[i] += B[i];
|
|
||||||
}
|
|
||||||
__kernel void hadamard_mult(__global float *A, __global float *B) {
|
|
||||||
int i = get_global_id(0);
|
|
||||||
A[i] *= B[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
#define TILE_SIZE 16
|
|
||||||
#define VEC_SIZE 4
|
|
||||||
__kernel void mult(__global float *A, __global float *B, __global float *C,
|
|
||||||
const int M, const int N, const int K) {
|
|
||||||
|
|
||||||
const int row = get_global_id(0) * VEC_SIZE;
|
|
||||||
const int col = get_global_id(1);
|
|
||||||
const int local_row = get_local_id(0);
|
|
||||||
const int local_col = get_local_id(1);
|
|
||||||
|
|
||||||
__local float tile_A[TILE_SIZE][TILE_SIZE + 1]; // +1 для избежания bank conflicts
|
|
||||||
__local float tile_B[TILE_SIZE][TILE_SIZE + 1];
|
|
||||||
|
|
||||||
float4 sum[VEC_SIZE];
|
|
||||||
for (int i = 0; i < VEC_SIZE; i++) {
|
|
||||||
sum[i] = (float4)(0.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
const int numTiles = (K + TILE_SIZE - 1) / TILE_SIZE;
|
|
||||||
|
|
||||||
for (int t = 0; t < numTiles; t++) {
|
|
||||||
// Загрузка tile_A с векторизацией
|
|
||||||
int a_col = t * TILE_SIZE + local_col;
|
|
||||||
#pragma unroll
|
|
||||||
for (int v = 0; v < VEC_SIZE; v++) {
|
|
||||||
int current_row = row + v;
|
|
||||||
if (current_row < M && a_col < K) {
|
|
||||||
tile_A[local_row * VEC_SIZE + v][local_col] = A[current_row * K + a_col];
|
|
||||||
} else {
|
|
||||||
tile_A[local_row * VEC_SIZE + v][local_col] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Загрузка tile_B
|
|
||||||
int b_row = t * TILE_SIZE + local_row;
|
|
||||||
if (b_row < K && col < N) {
|
|
||||||
tile_B[local_row][local_col] = B[b_row * N + col];
|
|
||||||
} else {
|
|
||||||
tile_B[local_row][local_col] = 0.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
// Векторизованное вычисление
|
|
||||||
#pragma unroll
|
|
||||||
for (int k = 0; k < TILE_SIZE; k++) {
|
|
||||||
float4 a_vals = (float4)(
|
|
||||||
tile_A[local_row * VEC_SIZE + 0][k],
|
|
||||||
tile_A[local_row * VEC_SIZE + 1][k],
|
|
||||||
tile_A[local_row * VEC_SIZE + 2][k],
|
|
||||||
tile_A[local_row * VEC_SIZE + 3][k]
|
|
||||||
);
|
|
||||||
float b_val = tile_B[k][local_col];
|
|
||||||
|
|
||||||
sum[0] += a_vals.x * b_val;
|
|
||||||
sum[1] += a_vals.y * b_val;
|
|
||||||
sum[2] += a_vals.z * b_val;
|
|
||||||
sum[3] += a_vals.w * b_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Сохранение результатов с векторизацией
|
|
||||||
#pragma unroll
|
|
||||||
for (int v = 0; v < VEC_SIZE; v++) {
|
|
||||||
int current_row = row + v;
|
|
||||||
if (current_row < M && col < N) {
|
|
||||||
C[current_row * N + col] = sum[v].x + sum[v].y + sum[v].z + sum[v].w;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,66 +1,23 @@
|
|||||||
#include "opencl.hpp"
|
#include "opencl.hpp"
|
||||||
|
|
||||||
#include <fstream>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <sstream>
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
std::string OpenCL::readProgram(const std::string &filePath) {
|
OpenCL::OpenCL() {
|
||||||
std::ifstream file(filePath, std::ios::binary);
|
|
||||||
if (!file.is_open()) {
|
|
||||||
throw std::runtime_error("Cannot open file: " + filePath);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::stringstream buffer;
|
|
||||||
buffer << file.rdbuf();
|
|
||||||
return buffer.str();
|
|
||||||
}
|
|
||||||
cl::Program OpenCL::compileProgram(const std::string &file) {
|
|
||||||
std::string source = readProgram(file);
|
|
||||||
cl::Program program(context, source);
|
|
||||||
try {
|
try {
|
||||||
program.build({device});
|
std::vector<cl::Platform> platforms;
|
||||||
} catch (cl::Error &e) {
|
cl::Platform::get(&platforms);
|
||||||
std::string build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
|
|
||||||
std::cerr << "Build log:\n" << build_log << std::endl;
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
return program;
|
|
||||||
}
|
|
||||||
void OpenCL::loadPrograms(std::string &programsBasePath) {
|
|
||||||
for (const auto &entry : programPaths) {
|
|
||||||
programs[entry.first] = compileProgram(programsBasePath + entry.second);
|
|
||||||
std::cout << "Loaded program: " << entry.second << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void OpenCL::initializeDevice() {
|
if (platforms.empty()) {
|
||||||
std::vector<cl::Platform> platforms;
|
throw std::runtime_error("No OpenCL platforms found");
|
||||||
cl::Platform::get(&platforms);
|
|
||||||
|
|
||||||
if (platforms.empty()) {
|
|
||||||
throw std::runtime_error("No OpenCL platforms found");
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<cl::Device> devices;
|
|
||||||
bool deviceFound = false;
|
|
||||||
|
|
||||||
for (const auto &platform : platforms) {
|
|
||||||
try {
|
|
||||||
platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
|
|
||||||
if (!devices.empty()) {
|
|
||||||
deviceFound = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} catch (const cl::Error &) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (!deviceFound) {
|
std::vector<cl::Device> devices;
|
||||||
|
bool deviceFound = false;
|
||||||
|
|
||||||
for (const auto &platform : platforms) {
|
for (const auto &platform : platforms) {
|
||||||
try {
|
try {
|
||||||
platform.getDevices(CL_DEVICE_TYPE_CPU, &devices);
|
platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
|
||||||
if (!devices.empty()) {
|
if (!devices.empty()) {
|
||||||
deviceFound = true;
|
deviceFound = true;
|
||||||
break;
|
break;
|
||||||
@@ -69,32 +26,29 @@ void OpenCL::initializeDevice() {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (!deviceFound) {
|
if (!deviceFound) {
|
||||||
throw std::runtime_error("No suitable OpenCL devices found");
|
for (const auto &platform : platforms) {
|
||||||
}
|
try {
|
||||||
|
platform.getDevices(CL_DEVICE_TYPE_CPU, &devices);
|
||||||
|
if (!devices.empty()) {
|
||||||
|
deviceFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (const cl::Error &) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
device = devices[0];
|
if (!deviceFound) {
|
||||||
context = cl::Context(device);
|
throw std::runtime_error("No suitable OpenCL devices found");
|
||||||
queue =
|
}
|
||||||
cl::CommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
|
|
||||||
|
|
||||||
std::cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>()
|
device = devices[0];
|
||||||
<< "\nPlatform: " << platforms[0].getInfo<CL_PLATFORM_NAME>()
|
context = cl::Context(device);
|
||||||
<< "\nCompute units: "
|
queue = cl::CommandQueue(context, device,
|
||||||
<< device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>()
|
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
|
||||||
<< "\nGlobal memory: "
|
|
||||||
<< device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() / (1024 * 1024)
|
|
||||||
<< " MB" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
OpenCL::OpenCL() {}
|
|
||||||
|
|
||||||
void OpenCL::init(std::string programsBasePath) {
|
|
||||||
try {
|
|
||||||
initializeDevice();
|
|
||||||
loadPrograms(programsBasePath);
|
|
||||||
} catch (const cl::Error &e) {
|
} catch (const cl::Error &e) {
|
||||||
std::cerr << "OpenCL error: " << e.what() << " (" << e.err() << ")"
|
std::cerr << "OpenCL error: " << e.what() << " (" << e.err() << ")"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
@@ -102,26 +56,6 @@ void OpenCL::init(std::string programsBasePath) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cl::Program &OpenCL::getProgram(Program program) {
|
|
||||||
auto it = programs.find(program);
|
|
||||||
if (it == programs.end())
|
|
||||||
throw std::invalid_argument("Program not loaded: " +
|
|
||||||
std::to_string(static_cast<int>(program)));
|
|
||||||
return it->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
cl::Kernel OpenCL::createKernel(Method method) {
|
|
||||||
auto methodProgram = methodPrograms.find(method);
|
|
||||||
if (methodProgram == methodPrograms.end())
|
|
||||||
throw std::invalid_argument("Not found program for method: " +
|
|
||||||
std::to_string(static_cast<int>(method)));
|
|
||||||
auto methodName = methodNames.find(method);
|
|
||||||
if (methodName == methodNames.end())
|
|
||||||
throw std::invalid_argument("Not found name for method: " +
|
|
||||||
std::to_string(static_cast<int>(method)));
|
|
||||||
return cl::Kernel(getProgram(methodProgram->second), methodName->second);
|
|
||||||
}
|
|
||||||
|
|
||||||
void OpenCL::printDeviceInfo() const {
|
void OpenCL::printDeviceInfo() const {
|
||||||
std::cout << "=== OpenCL Device Info ===" << std::endl;
|
std::cout << "=== OpenCL Device Info ===" << std::endl;
|
||||||
std::cout << "Name: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
std::cout << "Name: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||||
|
|||||||
@@ -4,59 +4,15 @@
|
|||||||
#define CL_HPP_TARGET_OPENCL_VERSION 300
|
#define CL_HPP_TARGET_OPENCL_VERSION 300
|
||||||
#include <CL/opencl.hpp>
|
#include <CL/opencl.hpp>
|
||||||
|
|
||||||
#include <unordered_map>
|
|
||||||
|
|
||||||
class OpenCL {
|
class OpenCL {
|
||||||
public:
|
|
||||||
enum class Method {
|
|
||||||
POSITIVE,
|
|
||||||
NEGATIVE,
|
|
||||||
S_ADD,
|
|
||||||
S_MULT,
|
|
||||||
T_ADD,
|
|
||||||
T_HADAMARD,
|
|
||||||
T_MULT,
|
|
||||||
};
|
|
||||||
enum class Program { ATOMIC, SCALAR, TENSOR, FUSION };
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
cl::Device device;
|
cl::Device device;
|
||||||
cl::Context context;
|
cl::Context context;
|
||||||
cl::CommandQueue queue;
|
cl::CommandQueue queue;
|
||||||
|
|
||||||
std::unordered_map<Program, cl::Program> programs;
|
|
||||||
std::unordered_map<Program, std::string> programPaths = {
|
|
||||||
{Program::ATOMIC, "opencl/kernels/atomic.cl"},
|
|
||||||
{Program::SCALAR, "opencl/kernels/scalar.cl"},
|
|
||||||
{Program::TENSOR, "opencl/kernels/tensor.cl"},
|
|
||||||
{Program::FUSION, "opencl/kernels/fusion.cl"}};
|
|
||||||
std::unordered_map<Method, Program> methodPrograms = {
|
|
||||||
{Method::POSITIVE, Program::ATOMIC},
|
|
||||||
{Method::NEGATIVE, Program::ATOMIC},
|
|
||||||
{Method::S_ADD, Program::SCALAR},
|
|
||||||
{Method::S_MULT, Program::SCALAR},
|
|
||||||
{Method::T_ADD, Program::TENSOR},
|
|
||||||
{Method::T_HADAMARD, Program::TENSOR},
|
|
||||||
{Method::T_MULT, Program::TENSOR},
|
|
||||||
};
|
|
||||||
std::unordered_map<Method, std::string> methodNames = {
|
|
||||||
{Method::POSITIVE, "positive"}, {Method::NEGATIVE, "negative"},
|
|
||||||
{Method::S_ADD, "add"}, {Method::S_MULT, "mult"},
|
|
||||||
{Method::T_ADD, "add"}, {Method::T_HADAMARD, "hadamard_mult"},
|
|
||||||
{Method::T_MULT, "mult"},
|
|
||||||
};
|
|
||||||
|
|
||||||
std::string readProgram(const std::string &filePath);
|
|
||||||
cl::Program compileProgram(const std::string &file);
|
|
||||||
void loadPrograms(std::string &programsBasePath);
|
|
||||||
|
|
||||||
void initializeDevice();
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
OpenCL();
|
OpenCL();
|
||||||
|
|
||||||
void init(std::string programsBasePath);
|
|
||||||
|
|
||||||
OpenCL(const OpenCL &) = delete;
|
OpenCL(const OpenCL &) = delete;
|
||||||
OpenCL &operator=(const OpenCL &) = delete;
|
OpenCL &operator=(const OpenCL &) = delete;
|
||||||
OpenCL(OpenCL &&) = delete;
|
OpenCL(OpenCL &&) = delete;
|
||||||
@@ -66,9 +22,6 @@ public:
|
|||||||
cl::Context &getContext() { return context; }
|
cl::Context &getContext() { return context; }
|
||||||
const cl::CommandQueue &getQueue() { return queue; }
|
const cl::CommandQueue &getQueue() { return queue; }
|
||||||
|
|
||||||
cl::Program &getProgram(Program program);
|
|
||||||
cl::Kernel createKernel(Method method);
|
|
||||||
|
|
||||||
void printDeviceInfo() const;
|
void printDeviceInfo() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
#include "opencl.hpp"
|
#include "opencl.hpp"
|
||||||
|
|
||||||
|
#include "kernels.hpp"
|
||||||
|
|
||||||
#include "../tensor.hpp"
|
#include "../tensor.hpp"
|
||||||
|
|
||||||
#include <random>
|
#include <random>
|
||||||
@@ -45,6 +47,7 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
typedef class ITensor<T, Dim> ITensor;
|
typedef class ITensor<T, Dim> ITensor;
|
||||||
|
typedef class Kernels<T, Dim> Kernels;
|
||||||
|
|
||||||
using ITensor::axes_;
|
using ITensor::axes_;
|
||||||
using ITensor::checkAxisInDim;
|
using ITensor::checkAxisInDim;
|
||||||
@@ -117,7 +120,7 @@ public:
|
|||||||
using ITensor::operator-;
|
using ITensor::operator-;
|
||||||
|
|
||||||
Tensor operator+() const override {
|
Tensor operator+() const override {
|
||||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::POSITIVE);
|
cl::Kernel kernel = Kernels::create(Kernels::Method::POSITIVE);
|
||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||||
cl::NDRange(getSize()),
|
cl::NDRange(getSize()),
|
||||||
@@ -126,7 +129,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Tensor operator-() const override {
|
Tensor operator-() const override {
|
||||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::NEGATIVE);
|
cl::Kernel kernel = Kernels::create(Kernels::Method::NEGATIVE);
|
||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||||
cl::NDRange(getSize()),
|
cl::NDRange(getSize()),
|
||||||
@@ -135,7 +138,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Tensor &operator+=(const T scalar) override {
|
Tensor &operator+=(const T scalar) override {
|
||||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::S_ADD);
|
cl::Kernel kernel = Kernels::create(Kernels::Method::S_ADD);
|
||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
kernel.setArg(1, scalar);
|
kernel.setArg(1, scalar);
|
||||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||||
@@ -145,7 +148,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Tensor &operator*=(const T scalar) override {
|
Tensor &operator*=(const T scalar) override {
|
||||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::S_MULT);
|
cl::Kernel kernel = Kernels::create(Kernels::Method::S_MULT);
|
||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
kernel.setArg(1, scalar);
|
kernel.setArg(1, scalar);
|
||||||
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange,
|
||||||
@@ -155,7 +158,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Tensor &operator+=(const Tensor &other) override {
|
Tensor &operator+=(const Tensor &other) override {
|
||||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_ADD);
|
cl::Kernel kernel = Kernels::create(Kernels::Method::T_ADD);
|
||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
kernel.setArg(1, *other.getData());
|
kernel.setArg(1, *other.getData());
|
||||||
openCL.getQueue().enqueueNDRangeKernel(
|
openCL.getQueue().enqueueNDRangeKernel(
|
||||||
@@ -165,7 +168,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Tensor &operator*=(const Tensor &other) override {
|
Tensor &operator*=(const Tensor &other) override {
|
||||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_HADAMARD);
|
cl::Kernel kernel = Kernels::create(Kernels::Method::T_HADAMARD);
|
||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
kernel.setArg(1, *other.getData());
|
kernel.setArg(1, *other.getData());
|
||||||
openCL.getQueue().enqueueNDRangeKernel(
|
openCL.getQueue().enqueueNDRangeKernel(
|
||||||
@@ -189,7 +192,7 @@ public:
|
|||||||
size_t k = shape_[axes_[1]];
|
size_t k = shape_[axes_[1]];
|
||||||
size_t n = other.shape_[other.axes_[1]];
|
size_t n = other.shape_[other.axes_[1]];
|
||||||
Tensor<T, 2> result({m, n});
|
Tensor<T, 2> result({m, n});
|
||||||
cl::Kernel kernel = openCL.createKernel(OpenCL::Method::T_MULT);
|
cl::Kernel kernel = Kernels::create(Kernels::Method::T_MULT);
|
||||||
kernel.setArg(0, *data_);
|
kernel.setArg(0, *data_);
|
||||||
kernel.setArg(1, *other.getData());
|
kernel.setArg(1, *other.getData());
|
||||||
kernel.setArg(2, *result.getData());
|
kernel.setArg(2, *result.getData());
|
||||||
|
|||||||
Reference in New Issue
Block a user