From 2db52adf0f52260d740003cd11c9454a35a710d0 Mon Sep 17 00:00:00 2001 From: StepanovPlaton Date: Tue, 25 Nov 2025 23:15:43 +0400 Subject: [PATCH] Check --- src/tensor/Makefile | 1 + src/tensor/main.cpp | 11 +- src/tensor/opencl/kernels.hpp | 285 +++++++++++++++++---------- src/tensor/opencl/opencl.cpp | 61 ++++++ src/tensor/opencl/tensor.hpp | 32 ++-- src/tensor/pybind.cpp | 6 - src/tensor/tensor.pyi | 351 ---------------------------------- 7 files changed, 270 insertions(+), 477 deletions(-) delete mode 100644 src/tensor/tensor.pyi diff --git a/src/tensor/Makefile b/src/tensor/Makefile index f5fe52c..455ef42 100644 --- a/src/tensor/Makefile +++ b/src/tensor/Makefile @@ -34,6 +34,7 @@ OPENCL_LIB_PATH = -L"A:/Programs/OpenCL/lib" OPENCL_LIB = -lOpenCL .DEFAULT_GOAL := cpu +.PHONY: cpu opencl cpu_module opencl_module clean $(BUILD_DIR): $(MKDIR) $(BUILD_DIR) diff --git a/src/tensor/main.cpp b/src/tensor/main.cpp index c7501a0..180a37c 100644 --- a/src/tensor/main.cpp +++ b/src/tensor/main.cpp @@ -1,7 +1,6 @@ #ifdef USE_OPENCL #include "opencl/tensor.hpp" OpenCL openCL; -// TODO: GENERIC KERNELS // TODO: Scalar mult #elif USE_CPU #include "cpu/tensor.hpp" @@ -21,19 +20,21 @@ public: auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start); - std::cout << operation << ": " << duration.count() << " μs\n"; + std::cout << operation << ": " << duration.count() << " ns\n"; } }; int main() { #ifdef USE_OPENCL - openCL.init("./"); + openCL.printDeviceInfo(); #endif Tensor a = Tensor({4096 * 2, 4096 * 2}, 1); Tensor b = Tensor({4096 * 2, 4096 * 2}, 1); + Profiler::measure("Matrix multiplication", [&]() { + auto result = a % b; + std::cout << result.toString(); + }); - Profiler::measure("Matrix multiplication", [&]() { auto result = a % b; }); - std::cout << a.toString(); return 0; } diff --git a/src/tensor/opencl/kernels.hpp b/src/tensor/opencl/kernels.hpp index 196a564..faf9a6f 100644 --- a/src/tensor/opencl/kernels.hpp +++ b/src/tensor/opencl/kernels.hpp @@ -1,10 +1,14 @@ -#include "opencl.hpp" #include + +#include "opencl.hpp" + #include +#include +#include #include #include -template class Kernels { +template class Kernels { public: enum class Vector { type2 = 2, @@ -21,136 +25,211 @@ public: T_HADAMARD, T_MULT, }; - constexpr const static std::string type = typeid(T).name(); - - // TODO: get native vector size - static Vector vector = Vector::type8; private: - static std::string unaryOperation(std::string name, std::string operation) { - return std::format( + constexpr std::string getTypeName() { return "unknown"; } + Vector vector; + std::string configuration; + + std::string format(std::string tmp, + std::unordered_map args) { + std::string result(tmp); + for (const auto &[key, value] : args) { + std::string placeholder = "{" + key + "}"; + size_t pos = 0; + while ((pos = result.find(placeholder, pos)) != std::string::npos) { + result.replace(pos, placeholder.length(), value); + pos += value.length(); + } + } + return result; + } + + std::string unaryOperation(std::string name, std::string operation) { + return format( R"( - __kernel void {method}(__global {type}* A, int len) {{ + __kernel void {method}(__global type* A, int len) { int gid = get_global_id(0); - int base = gid * {vector}; - if (base + ({vector}-1) < len) {{ - {type}{vector} data = vload{vector}(gid, A); - vstore{vector}({operation}data, gid, A); - }} else {{ - for (int i = 0; i < {vec_size}; i++) {{ + int base = gid * WIDTH; + if (base + WIDTH <= len) { + typeX data = vloadX(gid, A); + vstoreX({operation}data, gid, A); + } else { + for (int i = 0; i < WIDTH; i++) { int idx = base + i; if (idx < len) A[idx] = {operation}A[idx]; - }} - }} - }} - )", - std::make_format_args(std::make_pair("method", name), - std::make_pair("vector", vector), - std::make_pair("type", type), - std::make_pair("operation", operation))); + } + } + })", + {{"method", name}, {"operation", operation}}); } - static std::string scalarOperation(std::string name, std::string operation) { - return std::format( + std::string scalarOperation(std::string name, std::string operation) { + return format( R"( - __kernel void {method}(__global {type}* A, int len, {type} scalar) {{ + __kernel void {method}(__global type* A, int len, type scalar) { int gid = get_global_id(0); - int base = gid * {vector}; - if (base + ({vector}-1) < len) {{ - {type}{vector} data = vload{vector}(gid, A); + int base = gid * WIDTH; + if (base + WIDTH <= len) { + typeX data = vloadX(gid, A); data = data {operation} scalar; - vstore{vector}(data, gid, A); - }} else {{ - for (int i = 0; i < {vec_size}; i++) {{ + vstoreX(data, gid, A); + } else { + for (int i = 0; i < WIDTH; i++) { int idx = base + i; if (idx < len) A[idx] = A[idx] {operation} scalar; - }} - }} - }} - )", - std::make_format_args(std::make_pair("method", name), - std::make_pair("vector", vector), - std::make_pair("type", type), - std::make_pair("operation", operation))); + } + } + })", + {{"method", name}, {"operation", operation}}); } - static std::string binaryOperation(std::string name, std::string operation) { - return std::format( + std::string binaryOperation(std::string name, std::string operation) { + return format( R"( - __kernel void {method}(__global {type}* A, __global {type}* B, int len) {{ + __kernel void {method}(__global type* A, __global type* B, int len) { int gid = get_global_id(0); - int base = gid * {vector}; - if (base + ({vector}-1) < len) {{ - {type}{vector} dataA = vload{vector}(gid, A); - {type}{vector} dataB = vload{vector}(gid, B); - vstore{vector}(dataA {operation} dataB, gid, A); - }} else {{ - for (int i = 0; i < {vector}; i++) {{ + int base = gid * WIDTH; + if (base + WIDTH <= len) { + typeX dataA = vloadX(gid, A); + typeX dataB = vloadX(gid, B); + vstoreX(dataA {operation} dataB, gid, A); + } else { + for (int i = 0; i < WIDTH; i++) { int idx = base + i; if (idx < len) A[idx] = A[idx] {operation} B[idx]; - }} - }} - }} - )", - std::make_format_args(std::make_pair("method", name), - std::make_pair("vector", vector), - std::make_pair("type", type), - std::make_pair("operation", operation))); + } + } + })", + {{"method", name}, {"operation", operation}}); } - static std::unordered_map> - programs = { - {Method::POSITIVE, {unaryOperation("positive", "+"), "positive"}}, - {Method::NEGATIVE, {unaryOperation("negative", "-")}, "negative"}, + std::string matrixMult(std::string name) { + return format( + R"( + #define TILE_SIZE WIDTH*4 + __kernel void mult(const __global typeX* A, + const __global typeX* B, + __global typeX* C, const int M, const int N, const int K) { + const int row = get_local_id(0); + const int col = get_local_id(1); + const int globalRow = (TILE_SIZE/WIDTH)*get_group_id(0) + row; + const int globalCol = TILE_SIZE*get_group_id(1) + col; + __local typeX Asub[TILE_SIZE][TILE_SIZE/WIDTH]; + __local typeX Bsub[TILE_SIZE][TILE_SIZE/WIDTH]; + typeX acc = 0; + const int numTiles = K/TILE_SIZE; + for (int tile = 0; tile < numTiles; tile++) { + const int tiledRow = (TILE_SIZE/WIDTH)*tile + row; + const int tiledCol = TILE_SIZE*tile + col; + Asub[col][row] = A[tiledCol*(M/WIDTH) + globalRow]; + Bsub[col][row] = B[globalCol*(K/WIDTH) + tiledRow]; + barrier(CLK_LOCAL_MEM_FENCE); + typeX vecA, vecB; + type valB; + for (int k = 0; k < TILE_SIZE/WIDTH; k++) { + vecB = Bsub[col][k]; + for (int w = 0; w < WIDTH; w++) { + vecA = Asub[WIDTH*k + w][row]; + valB = vecB[w]; + for (int i = 0; i < WIDTH; i++) + acc[i] += vecA[i] * valB; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + C[globalCol*(M/WIDTH) + globalRow] = acc; + } + )", + {{"method", name}}); + } - {Method::S_ADD, {scalarOperation("add", "+")}, "add"}, - {Method::S_MULT, {scalarOperation("mult", "*")}, "mult"}, + std::unordered_map> programs = { + {Method::POSITIVE, {unaryOperation("positive", "+"), "positive"}}, + {Method::NEGATIVE, {unaryOperation("negative", "-"), "negative"}}, - {Method::T_ADD, {binaryOperation("add", "+")}, "add"}, - {Method::T_HADAMARD, - {binaryOperation("hadamard_mult", "*")}, - "hadamard_mult"}, - {Method::T_MULT, {"", "mult"}}, + {Method::S_ADD, {scalarOperation("add", "+"), "add"}}, + {Method::S_MULT, {scalarOperation("mult", "*"), "mult"}}, + + {Method::T_ADD, {binaryOperation("add", "+"), "add"}}, + {Method::T_HADAMARD, + {binaryOperation("hadamard_mult", "*"), "hadamard_mult"}}, + + {Method::T_MULT, {matrixMult("mult"), "mult"}}, }; - static inline std::unordered_map compiledPrograms; - static inline std::mutex compileMutex; + std::unordered_map compiledPrograms; public: - static cl::Kernel create(Method method) { - std::lock_guard lock(compileMutex); + Kernels(Vector vec = Vector::type4) : vector(vec) { + std::string extensions = openCL.getDevice().getInfo(); + if (extensions.find("cl_khr_fp16") != std::string::npos) + configuration = R"( + #pragma OPENCL EXTENSION cl_khr_fp16 : enable + typedef half _half; + typedef half2 _half2; + typedef half4 _half4; + typedef half8 _half8; + typedef half16 _half16; + )"; + else + configuration = R"( + typedef float _half; + typedef float2 _half2; + typedef float4 _half4; + typedef float8 _half8; + typedef float16 _half16; + )"; + configuration += format( + R"( + typedef {type} type; + typedef {type}{vector} typeX; + #define WIDTH {vector} + #define vloadX vload{vector} + #define vstoreX vstore{vector} + )", + {{"type", getTypeName()}, {"vector", std::to_string((int)vector)}}); - auto cache = compiledPrograms.find(method); - if (cache != compiledPrograms.end()) { - const auto &programName = std::get<1>(programs[method]); - return cl::Kernel(cache->second, programName.c_str()); - } - - auto program = programs.find(method); - if (program == programs.end()) - throw std::runtime_error("Unknown method: " + - std::to_string(static_cast(method))); - const auto &[sourceCode, kernelName] = program->second; - - try { - cl::Program::Sources sources; - sources.push_back({sourceCode.c_str(), sourceCode.length()}); - cl::Program program(openCL.getContext(), sources); - program.build({openCL.getDevice()}); - compiledPrograms[method] = program; - return cl::Kernel(program, kernelName.c_str()); - - } catch (const cl::Error &e) { - if (e.err() == CL_BUILD_PROGRAM_FAILURE) { - cl::Program program(openCL.getContext(), - {sourceCode.c_str(), sourceCode.length()}); - auto buildInfo = - program.getBuildInfo(openCL.getDevice()); - throw std::runtime_error( - "OpenCL compilation failed: " + std::string(e.what()) + - "\nBuild log:\n" + buildInfo); + for (const auto &[method, programInfo] : programs) { + const auto &[sourceCode, kernelName] = programInfo; + if (!sourceCode.empty()) { + cl::Program program(openCL.getContext(), configuration + sourceCode); + try { + program.build({openCL.getDevice()}); + compiledPrograms[method] = program; + } catch (const cl::Error &e) { + std::cerr << "OpenCL compilation error for method " + << static_cast(method) << ": " << e.what() + << std::endl; + std::string buildLog = + program.getBuildInfo(openCL.getDevice()); + std::cerr << "Build log for method " << static_cast(method) + << ":" << std::endl; + std::cerr << buildLog << std::endl; + } } - throw std::runtime_error("OpenCL error: " + std::string(e.what())); } } + + cl::Kernel create(Method method) { + auto it = compiledPrograms.find(method); + if (it == compiledPrograms.end()) + throw std::runtime_error("Program for method not found or not compiled"); + const auto &kernelName = std::get<1>(programs[method]); + return cl::Kernel(it->second, kernelName.c_str()); + } }; + +#define SPECIALIZE_KERNELS_TYPE(type, name) \ + template <> constexpr std::string Kernels::getTypeName() { \ + return name; \ + } +SPECIALIZE_KERNELS_TYPE(char, "char") +SPECIALIZE_KERNELS_TYPE(short, "short") +SPECIALIZE_KERNELS_TYPE(int, "int") +SPECIALIZE_KERNELS_TYPE(long, "long") +SPECIALIZE_KERNELS_TYPE(float, "float") +SPECIALIZE_KERNELS_TYPE(double, "double") + +typedef cl_half half; +SPECIALIZE_KERNELS_TYPE(half, "_half") \ No newline at end of file diff --git a/src/tensor/opencl/opencl.cpp b/src/tensor/opencl/opencl.cpp index c16b4f0..6967b1f 100644 --- a/src/tensor/opencl/opencl.cpp +++ b/src/tensor/opencl/opencl.cpp @@ -71,4 +71,65 @@ void OpenCL::printDeviceInfo() const { << std::endl; std::cout << "Max Work Group Size: " << device.getInfo() << std::endl; + std::string extensions = device.getInfo(); + + std::cout << "Optimal vector sizes:" << std::endl; + try { + cl_uint short_native = + device.getInfo(); + cl_uint short_preferred = + device.getInfo(); + std::cout << " short: native=" << short_native + << ", preferred=" << short_preferred << std::endl; + } catch (const cl::Error &e) { + std::cout << " short: N/A (error: " << e.what() << ")" << std::endl; + } + try { + cl_uint int_native = device.getInfo(); + cl_uint int_preferred = + device.getInfo(); + std::cout << " int: native=" << int_native + << ", preferred=" << int_preferred << std::endl; + } catch (const cl::Error &e) { + std::cout << " int: N/A (error: " << e.what() << ")" << std::endl; + } + try { + if (extensions.find("cl_khr_fp16") != std::string::npos) { + cl_uint half_native = + device.getInfo(); + cl_uint half_preferred = + device.getInfo(); + std::cout << " half: native=" << half_native + << ", preferred=" << half_preferred << std::endl; + } else { + std::cout << " half: not supported" << std::endl; + } + } catch (const cl::Error &e) { + std::cout << " half: N/A (error: " << e.what() << ")" << std::endl; + } + try { + cl_uint float_native = + device.getInfo(); + cl_uint float_preferred = + device.getInfo(); + std::cout << " float: native=" << float_native + << ", preferred=" << float_preferred << std::endl; + } catch (const cl::Error &e) { + std::cout << " float: N/A (error: " << e.what() << ")" << std::endl; + } + try { + if (extensions.find("cl_khr_fp64") != std::string::npos || + device.getInfo().find("1.0") == std::string::npos) { + cl_uint double_native = + device.getInfo(); + cl_uint double_preferred = + device.getInfo(); + std::cout << " double: native=" << double_native + << ", preferred=" << double_preferred << std::endl; + } else { + std::cout << " double: not supported" << std::endl; + } + } catch (const cl::Error &e) { + std::cout << " double: N/A (error: " << e.what() << ")" << std::endl; + } } diff --git a/src/tensor/opencl/tensor.hpp b/src/tensor/opencl/tensor.hpp index 8c424bb..2c03f0f 100644 --- a/src/tensor/opencl/tensor.hpp +++ b/src/tensor/opencl/tensor.hpp @@ -45,9 +45,13 @@ private: all(other.getEvent()), &event_); } + static cl::Kernel createKernel(Kernels::Method method) { + static Kernels kernels(Kernels::Vector::type4); + return kernels.create(method); + } + public: typedef class ITensor ITensor; - typedef class Kernels Kernels; using ITensor::axes_; using ITensor::checkAxisInDim; @@ -105,7 +109,7 @@ public: ITensor::operator=(std::move(other)); data_ = other.data_; event_ = other.event_; - other.data = nullptr; + other.data_ = nullptr; return *this; } ~Tensor() { @@ -120,8 +124,9 @@ public: using ITensor::operator-; Tensor operator+() const override { - cl::Kernel kernel = Kernels::create(Kernels::Method::POSITIVE); + cl::Kernel kernel = createKernel(Kernels::Method::POSITIVE); kernel.setArg(0, *data_); + kernel.setArg(1, (int)getSize()); openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, all(event_), &event_); @@ -129,8 +134,9 @@ public: } Tensor operator-() const override { - cl::Kernel kernel = Kernels::create(Kernels::Method::NEGATIVE); + cl::Kernel kernel = createKernel(Kernels::Method::NEGATIVE); kernel.setArg(0, *data_); + kernel.setArg(1, (int)getSize()); openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, all(event_), &event_); @@ -138,9 +144,10 @@ public: } Tensor &operator+=(const T scalar) override { - cl::Kernel kernel = Kernels::create(Kernels::Method::S_ADD); + cl::Kernel kernel = createKernel(Kernels::Method::S_ADD); kernel.setArg(0, *data_); kernel.setArg(1, scalar); + kernel.setArg(2, (int)getSize()); openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, all(event_), &event_); @@ -148,9 +155,10 @@ public: } Tensor &operator*=(const T scalar) override { - cl::Kernel kernel = Kernels::create(Kernels::Method::S_MULT); + cl::Kernel kernel = createKernel(Kernels::Method::S_MULT); kernel.setArg(0, *data_); kernel.setArg(1, scalar); + kernel.setArg(2, (int)getSize()); openCL.getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, all(event_), &event_); @@ -158,9 +166,10 @@ public: } Tensor &operator+=(const Tensor &other) override { - cl::Kernel kernel = Kernels::create(Kernels::Method::T_ADD); + cl::Kernel kernel = createKernel(Kernels::Method::T_ADD); kernel.setArg(0, *data_); kernel.setArg(1, *other.getData()); + kernel.setArg(2, (int)getSize()); openCL.getQueue().enqueueNDRangeKernel( kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, all(event_, other.event_), &event_); @@ -168,9 +177,10 @@ public: } Tensor &operator*=(const Tensor &other) override { - cl::Kernel kernel = Kernels::create(Kernels::Method::T_HADAMARD); + cl::Kernel kernel = createKernel(Kernels::Method::T_HADAMARD); kernel.setArg(0, *data_); kernel.setArg(1, *other.getData()); + kernel.setArg(2, getSize()); openCL.getQueue().enqueueNDRangeKernel( kernel, cl::NullRange, cl::NDRange(getSize()), cl::NullRange, all(event_, other.event_), &event_); @@ -192,16 +202,14 @@ public: size_t k = shape_[axes_[1]]; size_t n = other.shape_[other.axes_[1]]; Tensor result({m, n}); - cl::Kernel kernel = Kernels::create(Kernels::Method::T_MULT); + cl::Kernel kernel = createKernel(Kernels::Method::T_MULT); kernel.setArg(0, *data_); kernel.setArg(1, *other.getData()); kernel.setArg(2, *result.getData()); kernel.setArg(3, (int)m); kernel.setArg(4, (int)n); kernel.setArg(5, (int)k); - cl::NDRange global_size( - ((m + TILE_SIZE * VEC_SIZE - 1) / (TILE_SIZE * VEC_SIZE)) * TILE_SIZE, - ((n + TILE_SIZE - 1) / TILE_SIZE) * TILE_SIZE); + cl::NDRange global_size(m / VEC_SIZE, n); cl::NDRange local_size(TILE_SIZE / VEC_SIZE, TILE_SIZE); openCL.getQueue().enqueueNDRangeKernel( kernel, cl::NullRange, global_size, local_size, diff --git a/src/tensor/pybind.cpp b/src/tensor/pybind.cpp index 54ed0e3..523a081 100644 --- a/src/tensor/pybind.cpp +++ b/src/tensor/pybind.cpp @@ -125,12 +125,6 @@ PYBIND11_MODULE(tensor, m) { register_tensor(m, "Matrix"); register_tensor(m, "Tensor3"); -#ifdef USE_OPENCL - m.def("init", [](const std::string &programsBasePath) { - openCL.init(programsBasePath); - }); -#endif - #ifndef USE_OPENCL register_tensor(m, "dScalar"); register_tensor(m, "dVector"); diff --git a/src/tensor/tensor.pyi b/src/tensor/tensor.pyi deleted file mode 100644 index d740736..0000000 --- a/src/tensor/tensor.pyi +++ /dev/null @@ -1,351 +0,0 @@ -""" -Tensor math library -""" -from __future__ import annotations -import collections.abc -import typing -__all__: list[str] = ['CPU', 'MODE', 'Matrix', 'OPENCL', 'PLATFORM', 'Scalar', 'Tensor3', 'Vector', 'init'] -class Matrix: - @typing.overload - def __add__(self, arg0: Matrix) -> Matrix: - ... - @typing.overload - def __add__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - @typing.overload - def __iadd__(self, arg0: Matrix) -> Matrix: - ... - @typing.overload - def __iadd__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - @typing.overload - def __imul__(self, arg0: Matrix) -> Matrix: - ... - @typing.overload - def __imul__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(2)"]) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(2)"], arg1: typing.SupportsFloat) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(2)"], arg1: collections.abc.Sequence[typing.SupportsFloat]) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(2)"], arg1: typing.SupportsFloat, arg2: typing.SupportsFloat) -> None: - ... - @typing.overload - def __isub__(self, arg0: Matrix) -> Matrix: - ... - @typing.overload - def __isub__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - def __itruediv__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - def __matmul__(self, arg0: Matrix) -> Matrix: - ... - @typing.overload - def __mul__(self, arg0: Matrix) -> Matrix: - ... - @typing.overload - def __mul__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - def __neg__(self) -> Matrix: - ... - def __pos__(self) -> Matrix: - ... - def __radd__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - def __repr__(self) -> str: - ... - def __rmul__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - def __rsub__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - @typing.overload - def __sub__(self, arg0: Matrix) -> Matrix: - ... - @typing.overload - def __sub__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - def __truediv__(self, arg0: typing.SupportsFloat) -> Matrix: - ... - def get_axes(self) -> typing.Annotated[list[int], "FixedSize(2)"]: - ... - def get_shape(self) -> typing.Annotated[list[int], "FixedSize(2)"]: - ... - def get_size(self) -> int: - ... - def t(self) -> Matrix: - ... - @typing.overload - def transpose(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(2)"]) -> Matrix: - ... - @typing.overload - def transpose(self, arg0: typing.SupportsInt, arg1: typing.SupportsInt) -> Matrix: - ... -class PLATFORM: - """ - Members: - - CPU - - OPENCL - """ - CPU: typing.ClassVar[PLATFORM] # value = - OPENCL: typing.ClassVar[PLATFORM] # value = - __members__: typing.ClassVar[dict[str, PLATFORM]] # value = {'CPU': , 'OPENCL': } - def __eq__(self, other: typing.Any) -> bool: - ... - def __getstate__(self) -> int: - ... - def __hash__(self) -> int: - ... - def __index__(self) -> int: - ... - def __init__(self, value: typing.SupportsInt) -> None: - ... - def __int__(self) -> int: - ... - def __ne__(self, other: typing.Any) -> bool: - ... - def __repr__(self) -> str: - ... - def __setstate__(self, state: typing.SupportsInt) -> None: - ... - def __str__(self) -> str: - ... - @property - def name(self) -> str: - ... - @property - def value(self) -> int: - ... -class Scalar: - @typing.overload - def __add__(self, arg0: Scalar) -> Scalar: - ... - @typing.overload - def __add__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - @typing.overload - def __iadd__(self, arg0: Scalar) -> Scalar: - ... - @typing.overload - def __iadd__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - @typing.overload - def __imul__(self, arg0: Scalar) -> Scalar: - ... - @typing.overload - def __imul__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(0)"]) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(0)"], arg1: typing.SupportsFloat) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(0)"], arg1: collections.abc.Sequence[typing.SupportsFloat]) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(0)"], arg1: typing.SupportsFloat, arg2: typing.SupportsFloat) -> None: - ... - @typing.overload - def __isub__(self, arg0: Scalar) -> Scalar: - ... - @typing.overload - def __isub__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - def __itruediv__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - @typing.overload - def __mul__(self, arg0: Scalar) -> Scalar: - ... - @typing.overload - def __mul__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - def __neg__(self) -> Scalar: - ... - def __pos__(self) -> Scalar: - ... - def __radd__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - def __repr__(self) -> str: - ... - def __rmul__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - def __rsub__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - @typing.overload - def __sub__(self, arg0: Scalar) -> Scalar: - ... - @typing.overload - def __sub__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - def __truediv__(self, arg0: typing.SupportsFloat) -> Scalar: - ... - def get_axes(self) -> typing.Annotated[list[int], "FixedSize(0)"]: - ... - def get_shape(self) -> typing.Annotated[list[int], "FixedSize(0)"]: - ... - def get_size(self) -> int: - ... -class Tensor3: - @typing.overload - def __add__(self, arg0: Tensor3) -> Tensor3: - ... - @typing.overload - def __add__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - @typing.overload - def __iadd__(self, arg0: Tensor3) -> Tensor3: - ... - @typing.overload - def __iadd__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - @typing.overload - def __imul__(self, arg0: Tensor3) -> Tensor3: - ... - @typing.overload - def __imul__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(3)"]) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(3)"], arg1: typing.SupportsFloat) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(3)"], arg1: collections.abc.Sequence[typing.SupportsFloat]) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(3)"], arg1: typing.SupportsFloat, arg2: typing.SupportsFloat) -> None: - ... - @typing.overload - def __isub__(self, arg0: Tensor3) -> Tensor3: - ... - @typing.overload - def __isub__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - def __itruediv__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - @typing.overload - def __mul__(self, arg0: Tensor3) -> Tensor3: - ... - @typing.overload - def __mul__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - def __neg__(self) -> Tensor3: - ... - def __pos__(self) -> Tensor3: - ... - def __radd__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - def __repr__(self) -> str: - ... - def __rmul__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - def __rsub__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - @typing.overload - def __sub__(self, arg0: Tensor3) -> Tensor3: - ... - @typing.overload - def __sub__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - def __truediv__(self, arg0: typing.SupportsFloat) -> Tensor3: - ... - def get_axes(self) -> typing.Annotated[list[int], "FixedSize(3)"]: - ... - def get_shape(self) -> typing.Annotated[list[int], "FixedSize(3)"]: - ... - def get_size(self) -> int: - ... - def t(self) -> Tensor3: - ... - @typing.overload - def transpose(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(3)"]) -> Tensor3: - ... - @typing.overload - def transpose(self, arg0: typing.SupportsInt, arg1: typing.SupportsInt) -> Tensor3: - ... -class Vector: - @typing.overload - def __add__(self, arg0: Vector) -> Vector: - ... - @typing.overload - def __add__(self, arg0: typing.SupportsFloat) -> Vector: - ... - @typing.overload - def __iadd__(self, arg0: Vector) -> Vector: - ... - @typing.overload - def __iadd__(self, arg0: typing.SupportsFloat) -> Vector: - ... - @typing.overload - def __imul__(self, arg0: Vector) -> Vector: - ... - @typing.overload - def __imul__(self, arg0: typing.SupportsFloat) -> Vector: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(1)"]) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(1)"], arg1: typing.SupportsFloat) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(1)"], arg1: collections.abc.Sequence[typing.SupportsFloat]) -> None: - ... - @typing.overload - def __init__(self, arg0: typing.Annotated[collections.abc.Sequence[typing.SupportsInt], "FixedSize(1)"], arg1: typing.SupportsFloat, arg2: typing.SupportsFloat) -> None: - ... - @typing.overload - def __isub__(self, arg0: Vector) -> Vector: - ... - @typing.overload - def __isub__(self, arg0: typing.SupportsFloat) -> Vector: - ... - def __itruediv__(self, arg0: typing.SupportsFloat) -> Vector: - ... - @typing.overload - def __mul__(self, arg0: Vector) -> Vector: - ... - @typing.overload - def __mul__(self, arg0: typing.SupportsFloat) -> Vector: - ... - def __neg__(self) -> Vector: - ... - def __pos__(self) -> Vector: - ... - def __radd__(self, arg0: typing.SupportsFloat) -> Vector: - ... - def __repr__(self) -> str: - ... - def __rmul__(self, arg0: typing.SupportsFloat) -> Vector: - ... - def __rsub__(self, arg0: typing.SupportsFloat) -> Vector: - ... - @typing.overload - def __sub__(self, arg0: Vector) -> Vector: - ... - @typing.overload - def __sub__(self, arg0: typing.SupportsFloat) -> Vector: - ... - def __truediv__(self, arg0: typing.SupportsFloat) -> Vector: - ... - def get_axes(self) -> typing.Annotated[list[int], "FixedSize(1)"]: - ... - def get_shape(self) -> typing.Annotated[list[int], "FixedSize(1)"]: - ... - def get_size(self) -> int: - ... -def init(arg0: str) -> None: - ... -CPU: PLATFORM # value = -MODE: PLATFORM # value = -OPENCL: PLATFORM # value =