diff --git a/src/main b/src/main index cc9cc83..45b2c56 100755 Binary files a/src/main and b/src/main differ diff --git a/src/main.cpp b/src/main.cpp index 1bea81d..2f4187f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -25,6 +25,7 @@ public: float getAlpha() const { return alpha; } const Vector &getBias() const { return bias; } + void setBias(const Vector &b) { bias = b; } }; class ConnectedLayer : public Layer { @@ -41,6 +42,7 @@ public: int getInputFeatures() const { return inputFeatures; } const Matrix &getWeights() const { return weights; } + void setWeights(const Matrix &w) { weights = w; } }; class LearnLayer : public ConnectedLayer { @@ -81,7 +83,7 @@ public: steps.push_back(inputs); for (size_t i = 0; i < layers.size(); i++) steps.push_back(mm.dot(steps[steps.size() - 1], layers[i].getWeights(), - true, &layers[i].getBias(), + false, true, &layers[i].getBias(), layers[i].getActivation(), layers[i].getAlpha())); mm.await(); return steps[steps.size() - 1]; @@ -102,11 +104,12 @@ public: layers.push_back(LearnLayer(l[i - 1].getOuputFeatures(), l[i])); } - Matrix learn(Matrix inputs, Matrix target) { + Matrix learn(Matrix inputs, Matrix target, float speed = 1.0f) { MatrixMath mm; + VectorMath vm; for (size_t i = 0; i < layers.size(); i++) { layers[i].setInternal(mm.dot(i == 0 ? inputs : layers[i - 1].getOutputs(), - layers[i].getWeights(), true, + layers[i].getWeights(), false, true, &layers[i].getBias())); layers[i].setOutputs(mm.activate(layers[i].getInternal(), layers[i].getActivation(), @@ -134,10 +137,22 @@ public: printf("%5.3f ", lo[i]); std::cout << std::endl; - // Matrix dA2 = - // mm.d_loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE); - // Matrix = mm.dot(dA2, - // mm.d_activate(layers[layers.size()-1].getOutputs())); + Matrix dAnl = + mm.d_loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE); + for (int i = layers.size() - 1; i >= 0; --i) { + Matrix dZl = mm.mult(dAnl, mm.d_activate(layers[i].getInternal())); + Matrix dWl = mm.mult( + mm.dot(dZl, i == 0 ? inputs : layers[i - 1].getOutputs(), true), + 1.0f / (float)inputs.getRows()); + Vector dbl = mm.axis_sum(mm.mult(dZl, 1.0f / (float)inputs.getRows())); + dAnl = mm.dot(dZl, layers[i].getWeights(), false, false); // false true?! + + mm.await(); + + layers[i].setWeights(mm.add(layers[i].getWeights(), dWl, -speed)); + layers[i].setBias( + vm.add(layers[i].getBias(), dbl, -speed / (float)inputs.getRows())); + } return mse; } @@ -151,19 +166,39 @@ OpenCL openCL; int main() { LearnNerualNetrowk nn( - 2, {Layer(3, Activation::SIGMOID), Layer(3, Activation::SIGMOID)}); - std::cout << "NN created!" << std::endl; + 2, {Layer(2, Activation::TANH), Layer(1, Activation::SIGMOID)}); + std::cout << std::endl; - for (int i = 0; i < 4; i++) { + // Matrix input(4, 2); + // Matrix target(4, 1); + // + // for (int batch = 0; batch < 4; batch++) { + // for (int i = 0; i < 4; i++) { + // int v1 = (i / 2) % 2; + // int v2 = i % 2; + // + // input(i, 0) = static_cast(v1); + // input(i, 1) = static_cast(v2); + // target(i, 0) = static_cast(v1 ^ v2); + // } + // } + // + // for (int i = 0; i < 10; i++) { + // printf("%4d | ", i + 1); + // Matrix mse = nn.learn(input, target, 0.1f * std::pow(0.99, i)); + // } + + for (int i = 0; i < 4 * 1000; i++) { int v1 = (i / 2) % 2; int v2 = i % 2; Matrix input(1, 2, {static_cast(v1), static_cast(v2)}); - Matrix target(1, 3, - {static_cast(v1 ^ v2), static_cast(v1 & v2), - static_cast(v1 | v2)}); + Matrix target(1, 1, static_cast(v1 ^ v2)); - nn.learn(input, target); + printf("%5d | ", i + 1); + Matrix mse = nn.learn(input, target, 0.00003f); + if (i % 4 == 3) + std::cout << std::endl; } return 0; diff --git a/src/math/tensor/cpu/math.hpp b/src/math/tensor/cpu/math.hpp index efdef18..cb34ed6 100644 --- a/src/math/tensor/cpu/math.hpp +++ b/src/math/tensor/cpu/math.hpp @@ -35,20 +35,24 @@ protected: throw std::invalid_argument("Unknown activation type"); } } - float d_activateX(float f, Activation type, float alpha = 0.01f) { + float d_activateX(float x, Activation type, float alpha = 0.01f) { switch (type) { case Activation::LINEAR: return 1.0f; - case Activation::SIGMOID: - return f * (1.0f - f); - case Activation::TANH: - return 1.0f - f * f; + case Activation::SIGMOID: { + float sigmoid = 1.0f / (1.0f + std::exp(-x)); + return sigmoid * (1.0f - sigmoid); + } + case Activation::TANH: { + float tanh_x = std::tanh(x); + return 1.0f - tanh_x * tanh_x; + } case Activation::RELU: - return (f > 0.0f) ? 1.0f : 0.0f; + return (x > 0.0f) ? 1.0f : 0.0f; case Activation::LEAKY_RELU: - return (f > 0.0f) ? 1.0f : alpha; + return (x > 0.0f) ? 1.0f : alpha; case Activation::ELU: - return (f > 0.0f) ? 1.0f : f + alpha; + return (x > 0.0f) ? 1.0f : alpha * std::exp(x); default: throw std::invalid_argument("Unknown activation type"); } @@ -72,6 +76,13 @@ public: return result; } + T mult(const T &a, const T &b) override { + this->validateSameDimensions(a, b); + T result(a.getShape(), false); + for (size_t i = 0; i < a.getSize(); ++i) + result[i] = a[i] * b[i]; + return result; + } T mult(const T &t, float x) override { T result(t.getShape(), false); for (size_t i = 0; i < t.getSize(); ++i) @@ -116,19 +127,21 @@ private: } public: - Tensor2 dot(const Tensor2 &a, const Tensor2 &b, bool transpose = false, - const Vector *bias = nullptr, + Tensor2 dot(const Tensor2 &a, const Tensor2 &b, bool transpose_a = false, + bool transpose_b = false, const Vector *bias = nullptr, Activation type = Activation::LINEAR, float alpha = 0.01f) override { - validateMultDimensions(a, b, transpose); + validateMultDimensions(a, b, transpose_a, transpose_b); if (bias != nullptr) - validateBiasDimensions(b, *bias, transpose); - Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), 0.0f); + validateBiasDimensions(b, *bias, transpose_b); + Tensor2 result(transpose_a ? a.getCols() : a.getRows(), + transpose_b ? b.getRows() : b.getCols(), 0.0f); for (int i = 0; i < result.getRows(); ++i) { for (int j = 0; j < result.getCols(); ++j) { float sum = 0.0f; for (int k = 0; k < a.getCols(); ++k) - sum += a(i, k) * (transpose ? b(j, k) : b(k, j)); + sum += (transpose_a ? a(k, i) : a(i, k)) * + (transpose_b ? b(j, k) : b(k, j)); result(i, j) = activateX(sum + (bias == nullptr ? 0.0f : (*bias)(j)), type, alpha); } @@ -154,6 +167,17 @@ public: throw std::invalid_argument("Unknown loss type"); } } + + Tensor1 axis_sum(const Tensor2 &m) override { + Tensor1 result(m.getCols(), 0.0f); + for (int i = 0; i < m.getCols(); ++i) { + float sum = 0.0f; + for (int j = 0; j < m.getRows(); ++j) + sum += m(j, i); + result(i) = sum; + } + return result; + } }; class Tensor3Math : public TensorMath, public ITensor3Math {}; diff --git a/src/math/tensor/cpu/tensor.hpp b/src/math/tensor/cpu/tensor.hpp index ab67455..59ae653 100644 --- a/src/math/tensor/cpu/tensor.hpp +++ b/src/math/tensor/cpu/tensor.hpp @@ -172,8 +172,6 @@ public: float &operator()(int i) { return data[i]; } const float &operator()(int i) const { return data[i]; } - - int getSize() const override { return shape[0]; } }; class Tensor2 : public ITensor2, public Tensor { diff --git a/src/math/tensor/math.hpp b/src/math/tensor/math.hpp index 12db29d..b0c435f 100644 --- a/src/math/tensor/math.hpp +++ b/src/math/tensor/math.hpp @@ -34,6 +34,7 @@ public: virtual T activate(const T &m, Activation type, float alpha) = 0; virtual T d_activate(const T &m, Activation type, float alpha) = 0; + virtual T mult(const T &a, const T &b) = 0; virtual T mult(const T &m, float x) = 0; virtual T add(const T &a, const T &b, float x) = 0; virtual T add(const T &m, float x) = 0; @@ -47,24 +48,26 @@ template class ITensor1Math {}; template class ITensor2Math { public: - virtual M dot(const M &a, const M &b, bool transpose, const V *bias, - Activation type, float alpha) = 0; + virtual M dot(const M &a, const M &b, bool transpose_a, bool transpose_b, + const V *bias, Activation type, float alpha) = 0; virtual M loss(const M &a, const M &b, Loss type) = 0; virtual M d_loss(const M &a, const M &b, Loss type) = 0; - void validateMultDimensions(const M &a, const M &b, bool transpose) const { - if ((!transpose && a.getCols() != b.getRows()) || - (transpose && a.getCols() != b.getCols())) { + virtual V axis_sum(const M &m) = 0; + + void validateMultDimensions(const M &a, const M &b, bool transpose_a, + bool transpose_b) const { + int a_cols = transpose_a ? a.getRows() : a.getCols(); + int b_rows = transpose_b ? b.getCols() : b.getRows(); + if (a_cols != b_rows) throw std::invalid_argument( "Invalid matrix dimensions for multiplication"); - } }; void validateBiasDimensions(const M &a, const V &b, bool transpose) const { if ((!transpose && a.getCols() != b.getSize()) || - (transpose && a.getRows() != b.getSize())) { + (transpose && a.getRows() != b.getSize())) throw std::invalid_argument("Invalid matrix bias"); - } }; }; diff --git a/src/math/tensor/tensor.hpp b/src/math/tensor/tensor.hpp index b590dcd..4b1ff81 100644 --- a/src/math/tensor/tensor.hpp +++ b/src/math/tensor/tensor.hpp @@ -50,10 +50,7 @@ public: class ITensor0 {}; -class ITensor1 { -public: - virtual int getSize() const = 0; -}; +class ITensor1 {}; class ITensor2 { public: