Split headers and logic

This commit is contained in:
2025-11-17 16:03:32 +04:00
parent bbd9c67c96
commit d7d93999a4
18 changed files with 589 additions and 394 deletions

View File

@@ -0,0 +1,34 @@
__kernel void positive(__global float *A, __global float *B) {
int i = get_global_id(0);
B[i] = +A[i];
}
__kernel void negative(__global float *A, __global float *B) {
int i = get_global_id(0);
B[i] = -A[i];
}
float activate_x(float x, const int activation_type, const float alpha) {
switch (activation_type) {
case 0: // LINEAR
return x;
case 1: // SIGMOID
return 1.0f / (1.0f + exp(-x));
case 2: // TANH
return tanh(x);
case 3: // RELU
return fmax(0.0f, x);
case 4: // LEAKY_RELU
return (x > 0.0f) ? x : alpha * x;
case 5: // ELU
return (x > 0.0f) ? x : alpha * (exp(x) - 1.0f);
default:
return x;
}
}
__kernel void activate(__global float *input, __global float *output,
const int activation_type, const float alpha) {
int i = get_global_id(0);
output[i] = activate_x(input[i], activation_type, alpha);
}

View File

View File

@@ -0,0 +1,9 @@
__kernel void add(__global float *A, __global float *B, float scalar) {
int i = get_global_id(0);
B[i] = A[i] + scalar;
}
__kernel void mult(__global float *A, __global float *B, float scalar) {
int i = get_global_id(0);
B[i] = A[i] * scalar;
}

View File

@@ -1,4 +1,15 @@
float activate_x(float x, const int activation_type, const float alpha) {
__kernel void add(__global float *A, __global float *B, __global float *C,
float x) {
int i = get_global_id(0);
C[i] = A[i] + (B[i] * x);
}
__kernel void mult(__global float *A, __global float *B, __global float *C,
float x) {
int i = get_global_id(0);
C[i] = A[i] * (B[i] * x);
}
float activate(float x, const int activation_type, const float alpha) {
switch (activation_type) {
case 0: // LINEAR
return x;
@@ -17,12 +28,6 @@ float activate_x(float x, const int activation_type, const float alpha) {
}
}
__kernel void activate(__global float *input, __global float *output,
const int activation_type, const float alpha) {
int i = get_global_id(0);
output[i] = activate_x(input[i], activation_type, alpha);
}
__kernel void mult_small(__global float *A, __global float *B,
__global float *C, __global float *bias,
const int activation_type, const float alpha,
@@ -48,7 +53,7 @@ __kernel void mult_small(__global float *A, __global float *B,
float result = sum + bias[col];
if (activation_type != 0) {
result = activate_x(result, activation_type, alpha);
result = activate(result, activation_type, alpha);
}
C[row * N + col] = result;
}
@@ -121,24 +126,9 @@ __kernel void mult(__global float *A, __global float *B, __global float *C,
if (global_i < M && global_j < N) {
float result = sum + bias[global_j];
if (activation_type != 0) {
result = activate_x(result, activation_type, alpha);
result = activate(result, activation_type, alpha);
}
C[global_i * N + global_j] = result;
}
}
__kernel void mult_sc(__global float *A, __global float *B, float scalar) {
int i = get_global_id(0);
B[i] = A[i] * scalar;
}
__kernel void add(__global float *A, __global float *B, __global float *C,
float x) {
int i = get_global_id(0);
C[i] = A[i] + (B[i] * x);
}
__kernel void add_sc(__global float *A, __global float *B, float scalar) {
int i = get_global_id(0);
B[i] = A[i] + scalar;
}

View File

@@ -1,5 +1,10 @@
#include "opencl.hpp"
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdexcept>
std::string OpenCL::readProgram(const std::string &filePath) {
std::ifstream file(filePath, std::ios::binary);
if (!file.is_open()) {
@@ -118,4 +123,4 @@ void OpenCL::printDeviceInfo() const {
<< std::endl;
std::cout << "Max Work Group Size: "
<< device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << std::endl;
}
}

View File

@@ -4,16 +4,11 @@
#define CL_HPP_TARGET_OPENCL_VERSION 300
#include <CL/opencl.hpp>
#include <fstream>
#include <iostream>
#include <memory>
#include <sstream>
#include <stdexcept>
#include <unordered_map>
class OpenCL {
public:
enum class Program { TENSOR };
enum class Program { ATOMIC, SCALAR, TENSOR, FUSION };
private:
cl::Device device;
@@ -22,7 +17,10 @@ private:
std::unordered_map<Program, cl::Program> programs;
std::unordered_map<Program, std::string> programPaths = {
{Program::TENSOR, "./opencl/kernels/tensor.cl"}};
{Program::ATOMIC, "./opencl/kernels/atomic.cl"},
{Program::SCALAR, "./opencl/kernels/scalar.cl"},
{Program::TENSOR, "./opencl/kernels/tensor.cl"},
{Program::FUSION, "./opencl/kernels/fusion.cl"}};
std::string readProgram(const std::string &filePath);
cl::Program compileProgram(const std::string &file);

View File

@@ -0,0 +1,5 @@
#pragma once
#include "opencl.hpp"
#include "../tensor.hpp"

View File