mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-04 04:40:40 +04:00
272 lines
8.4 KiB
C++
272 lines
8.4 KiB
C++
#include <CL/opencl.hpp>
|
|
|
|
#include "opencl.hpp"
|
|
|
|
#include <format>
|
|
#include <iostream>
|
|
#include <ostream>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
|
|
template <typename T> class Kernels {
|
|
public:
|
|
enum class Vector {
|
|
type1 = 1,
|
|
type2 = 2,
|
|
type4 = 4,
|
|
type8 = 8,
|
|
type16 = 16,
|
|
};
|
|
enum class Method {
|
|
POSITIVE,
|
|
NEGATIVE,
|
|
S_ADD,
|
|
S_MULT,
|
|
T_ADD,
|
|
T_HADAMARD,
|
|
T_MULT,
|
|
FUNC
|
|
};
|
|
|
|
private:
|
|
constexpr std::string getTypeName() { return "unknown"; }
|
|
Vector vector;
|
|
std::string configuration;
|
|
|
|
std::string format(std::string tmp,
|
|
std::unordered_map<std::string, std::string> args) {
|
|
std::string result(tmp);
|
|
for (const auto &[key, value] : args) {
|
|
std::string placeholder = "{" + key + "}";
|
|
size_t pos = 0;
|
|
while ((pos = result.find(placeholder, pos)) != std::string::npos) {
|
|
result.replace(pos, placeholder.length(), value);
|
|
pos += value.length();
|
|
}
|
|
}
|
|
// std::cout << result << std::endl;
|
|
return result;
|
|
}
|
|
|
|
std::string unaryOperation(std::string name, std::string operation) {
|
|
return format(
|
|
R"(
|
|
__kernel void {method}(__global type* A, int len) {
|
|
int gid = get_global_id(0);
|
|
#if WIDTH != 1
|
|
int base = gid * WIDTH;
|
|
if (base + WIDTH <= len) {
|
|
typeX data = vloadX(gid, A);
|
|
vstoreX({operation}data, gid, A);
|
|
} else {
|
|
for (int i = 0; i < WIDTH; i++) {
|
|
int idx = base + i;
|
|
if (idx < len) A[idx] = {operation}A[idx];
|
|
}
|
|
}
|
|
#else
|
|
A[gid] = {operation}A[gid];
|
|
#endif
|
|
})",
|
|
{{"method", name}, {"operation", operation}});
|
|
}
|
|
|
|
std::string scalarOperation(std::string name, std::string operation) {
|
|
return format(
|
|
R"(
|
|
__kernel void {method}(__global type* A, int len, type scalar) {
|
|
int gid = get_global_id(0);
|
|
#if WIDTH != 1
|
|
int base = gid * WIDTH;
|
|
if (base + WIDTH <= len) {
|
|
typeX data = vloadX(gid, A);
|
|
data = data {operation} scalar;
|
|
vstoreX(data, gid, A);
|
|
} else {
|
|
for (int i = 0; i < WIDTH; i++) {
|
|
int idx = base + i;
|
|
if (idx < len) A[idx] = A[idx] {operation} scalar;
|
|
}
|
|
}
|
|
#else
|
|
A[gid] = A[gid] {operation} scalar;
|
|
#endif
|
|
})",
|
|
{{"method", name}, {"operation", operation}});
|
|
}
|
|
|
|
std::string binaryOperation(std::string name, std::string operation) {
|
|
return format(
|
|
R"(
|
|
__kernel void {method}(__global type* A, __global type* B, int len) {
|
|
int gid = get_global_id(0);
|
|
#if WIDTH != 1
|
|
int base = gid * WIDTH;
|
|
if (base + WIDTH <= len) {
|
|
typeX dataA = vloadX(gid, A);
|
|
typeX dataB = vloadX(gid, B);
|
|
vstoreX(dataA {operation} dataB, gid, A);
|
|
} else {
|
|
for (int i = 0; i < WIDTH; i++) {
|
|
int idx = base + i;
|
|
if (idx < len) A[idx] = A[idx] {operation} B[idx];
|
|
}
|
|
}
|
|
#else
|
|
A[gid] = A[gid] {operation} B[gid];
|
|
#endif
|
|
})",
|
|
{{"method", name}, {"operation", operation}});
|
|
}
|
|
|
|
std::string matrixMult() {
|
|
return R"(
|
|
__kernel void mult(const __global type* A,
|
|
const __global type* B,
|
|
__global type* C,
|
|
const int M, const int N, const int K) {
|
|
const int row = get_global_id(0);
|
|
const int col = get_global_id(1);
|
|
if (row < M && col < N) {
|
|
type sum = 0.0f;
|
|
for (int k = 0; k < K; k++)
|
|
sum += A[row * K + k] * B[k * N + col];
|
|
C[row * N + col] = sum;
|
|
}
|
|
})";
|
|
}
|
|
|
|
std::string func() {
|
|
return R"(
|
|
__kernel void func(__global type* A, const int f, const int derivative) {
|
|
int gid = get_global_id(0);
|
|
type x = A[gid];
|
|
switch (f) {
|
|
case 0: // SIGMOID
|
|
if (!derivative)
|
|
A[gid] = (type)1 / ((type)1 + exp(-x));
|
|
else {
|
|
type sigmoid = (type)1 / ((type)1 + exp(-x));
|
|
A[gid] = sigmoid * ((type)1 - sigmoid);
|
|
}
|
|
break;
|
|
case 1: // RELU
|
|
if (!derivative)
|
|
A[gid] = fmax((type)0, x);
|
|
else
|
|
A[gid] = (x > (type)0) ? (type)1 : (type)0;
|
|
break;
|
|
case 2: // MSE (здесь это скорее квадратная функция)
|
|
if (!derivative)
|
|
A[gid] = x * x;
|
|
else
|
|
A[gid] = (type)2 * x;
|
|
break;
|
|
case 3: // LINEAR
|
|
default:
|
|
if (!derivative)
|
|
A[gid] = x;
|
|
else
|
|
A[gid] = (type)1.0f;
|
|
break;
|
|
}
|
|
})";
|
|
}
|
|
|
|
std::unordered_map<Method, std::tuple<std::string, std::string>> programs = {
|
|
{Method::POSITIVE, {unaryOperation("positive", "+"), "positive"}},
|
|
{Method::NEGATIVE, {unaryOperation("negative", "-"), "negative"}},
|
|
|
|
{Method::S_ADD, {scalarOperation("add", "+"), "add"}},
|
|
{Method::S_MULT, {scalarOperation("mult", "*"), "mult"}},
|
|
|
|
{Method::T_ADD, {binaryOperation("add", "+"), "add"}},
|
|
{Method::T_HADAMARD,
|
|
{binaryOperation("hadamard_mult", "*"), "hadamard_mult"}},
|
|
|
|
{Method::T_MULT, {matrixMult(), "mult"}},
|
|
|
|
{Method::FUNC, {func(), "func"}},
|
|
};
|
|
|
|
std::unordered_map<Method, cl::Program> compiledPrograms;
|
|
|
|
public:
|
|
Kernels(Vector vec) : vector(vec) {
|
|
std::cout << "Compile " << getTypeName()
|
|
<< " kernels with vector size = " << std::to_string((int)vector)
|
|
<< " ";
|
|
std::string extensions = openCL.getDevice().getInfo<CL_DEVICE_EXTENSIONS>();
|
|
if (extensions.find("cl_khr_fp16") != std::string::npos)
|
|
configuration = R"(
|
|
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
|
typedef half _half;
|
|
typedef half2 _half2;
|
|
typedef half4 _half4;
|
|
typedef half8 _half8;
|
|
typedef half16 _half16;
|
|
)";
|
|
else
|
|
configuration = R"(
|
|
typedef float _half;
|
|
typedef float2 _half2;
|
|
typedef float4 _half4;
|
|
typedef float8 _half8;
|
|
typedef float16 _half16;
|
|
)";
|
|
configuration += format(
|
|
R"(
|
|
typedef {type} type;
|
|
#define WIDTH {vector}
|
|
#if WIDTH != 1
|
|
typedef {type}{vector} typeX;
|
|
#define vloadX vload{vector}
|
|
#define vstoreX vstore{vector}
|
|
#endif
|
|
)",
|
|
{{"type", getTypeName()}, {"vector", std::to_string((int)vector)}});
|
|
|
|
for (const auto &[method, programInfo] : programs) {
|
|
const auto &[sourceCode, kernelName] = programInfo;
|
|
if (!sourceCode.empty()) {
|
|
cl::Program program(openCL.getContext(), configuration + sourceCode);
|
|
try {
|
|
program.build({openCL.getDevice()});
|
|
compiledPrograms[method] = program;
|
|
} catch (const cl::Error &e) {
|
|
std::cerr << "OpenCL compilation error for method "
|
|
<< static_cast<int>(method) << ": " << e.what()
|
|
<< std::endl;
|
|
std::string buildLog =
|
|
program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(openCL.getDevice());
|
|
std::cerr << "Build log for method " << static_cast<int>(method)
|
|
<< ":" << std::endl;
|
|
std::cerr << buildLog << std::endl;
|
|
}
|
|
}
|
|
}
|
|
std::cout << "completed" << std::endl;
|
|
}
|
|
|
|
cl::Kernel create(Method method) {
|
|
auto it = compiledPrograms.find(method);
|
|
if (it == compiledPrograms.end())
|
|
throw std::runtime_error("Program for method not found or not compiled");
|
|
const auto &kernelName = std::get<1>(programs[method]);
|
|
return cl::Kernel(it->second, kernelName.c_str());
|
|
}
|
|
};
|
|
|
|
#define SPECIALIZE_KERNELS_TYPE(type, name) \
|
|
template <> constexpr std::string Kernels<type>::getTypeName() { \
|
|
return name; \
|
|
}
|
|
SPECIALIZE_KERNELS_TYPE(char, "char")
|
|
SPECIALIZE_KERNELS_TYPE(short, "short")
|
|
SPECIALIZE_KERNELS_TYPE(int, "int")
|
|
SPECIALIZE_KERNELS_TYPE(long, "long")
|
|
SPECIALIZE_KERNELS_TYPE(float, "float")
|
|
SPECIALIZE_KERNELS_TYPE(double, "double")
|
|
|
|
typedef cl_half half;
|
|
SPECIALIZE_KERNELS_TYPE(half, "_half") |