mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-03 20:30:39 +04:00
Back propogation. Not work(
This commit is contained in:
63
src/main.cpp
63
src/main.cpp
@@ -25,6 +25,7 @@ public:
|
|||||||
float getAlpha() const { return alpha; }
|
float getAlpha() const { return alpha; }
|
||||||
|
|
||||||
const Vector &getBias() const { return bias; }
|
const Vector &getBias() const { return bias; }
|
||||||
|
void setBias(const Vector &b) { bias = b; }
|
||||||
};
|
};
|
||||||
|
|
||||||
class ConnectedLayer : public Layer {
|
class ConnectedLayer : public Layer {
|
||||||
@@ -41,6 +42,7 @@ public:
|
|||||||
|
|
||||||
int getInputFeatures() const { return inputFeatures; }
|
int getInputFeatures() const { return inputFeatures; }
|
||||||
const Matrix &getWeights() const { return weights; }
|
const Matrix &getWeights() const { return weights; }
|
||||||
|
void setWeights(const Matrix &w) { weights = w; }
|
||||||
};
|
};
|
||||||
|
|
||||||
class LearnLayer : public ConnectedLayer {
|
class LearnLayer : public ConnectedLayer {
|
||||||
@@ -81,7 +83,7 @@ public:
|
|||||||
steps.push_back(inputs);
|
steps.push_back(inputs);
|
||||||
for (size_t i = 0; i < layers.size(); i++)
|
for (size_t i = 0; i < layers.size(); i++)
|
||||||
steps.push_back(mm.dot(steps[steps.size() - 1], layers[i].getWeights(),
|
steps.push_back(mm.dot(steps[steps.size() - 1], layers[i].getWeights(),
|
||||||
true, &layers[i].getBias(),
|
false, true, &layers[i].getBias(),
|
||||||
layers[i].getActivation(), layers[i].getAlpha()));
|
layers[i].getActivation(), layers[i].getAlpha()));
|
||||||
mm.await();
|
mm.await();
|
||||||
return steps[steps.size() - 1];
|
return steps[steps.size() - 1];
|
||||||
@@ -102,11 +104,12 @@ public:
|
|||||||
layers.push_back(LearnLayer(l[i - 1].getOuputFeatures(), l[i]));
|
layers.push_back(LearnLayer(l[i - 1].getOuputFeatures(), l[i]));
|
||||||
}
|
}
|
||||||
|
|
||||||
Matrix learn(Matrix inputs, Matrix target) {
|
Matrix learn(Matrix inputs, Matrix target, float speed = 1.0f) {
|
||||||
MatrixMath mm;
|
MatrixMath mm;
|
||||||
|
VectorMath vm;
|
||||||
for (size_t i = 0; i < layers.size(); i++) {
|
for (size_t i = 0; i < layers.size(); i++) {
|
||||||
layers[i].setInternal(mm.dot(i == 0 ? inputs : layers[i - 1].getOutputs(),
|
layers[i].setInternal(mm.dot(i == 0 ? inputs : layers[i - 1].getOutputs(),
|
||||||
layers[i].getWeights(), true,
|
layers[i].getWeights(), false, true,
|
||||||
&layers[i].getBias()));
|
&layers[i].getBias()));
|
||||||
layers[i].setOutputs(mm.activate(layers[i].getInternal(),
|
layers[i].setOutputs(mm.activate(layers[i].getInternal(),
|
||||||
layers[i].getActivation(),
|
layers[i].getActivation(),
|
||||||
@@ -134,10 +137,22 @@ public:
|
|||||||
printf("%5.3f ", lo[i]);
|
printf("%5.3f ", lo[i]);
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
|
||||||
// Matrix dA2 =
|
Matrix dAnl =
|
||||||
// mm.d_loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE);
|
mm.d_loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE);
|
||||||
// Matrix = mm.dot(dA2,
|
for (int i = layers.size() - 1; i >= 0; --i) {
|
||||||
// mm.d_activate(layers[layers.size()-1].getOutputs()));
|
Matrix dZl = mm.mult(dAnl, mm.d_activate(layers[i].getInternal()));
|
||||||
|
Matrix dWl = mm.mult(
|
||||||
|
mm.dot(dZl, i == 0 ? inputs : layers[i - 1].getOutputs(), true),
|
||||||
|
1.0f / (float)inputs.getRows());
|
||||||
|
Vector dbl = mm.axis_sum(mm.mult(dZl, 1.0f / (float)inputs.getRows()));
|
||||||
|
dAnl = mm.dot(dZl, layers[i].getWeights(), false, false); // false true?!
|
||||||
|
|
||||||
|
mm.await();
|
||||||
|
|
||||||
|
layers[i].setWeights(mm.add(layers[i].getWeights(), dWl, -speed));
|
||||||
|
layers[i].setBias(
|
||||||
|
vm.add(layers[i].getBias(), dbl, -speed / (float)inputs.getRows()));
|
||||||
|
}
|
||||||
|
|
||||||
return mse;
|
return mse;
|
||||||
}
|
}
|
||||||
@@ -151,19 +166,39 @@ OpenCL openCL;
|
|||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
LearnNerualNetrowk nn(
|
LearnNerualNetrowk nn(
|
||||||
2, {Layer(3, Activation::SIGMOID), Layer(3, Activation::SIGMOID)});
|
2, {Layer(2, Activation::TANH), Layer(1, Activation::SIGMOID)});
|
||||||
std::cout << "NN created!" << std::endl;
|
std::cout << std::endl;
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
// Matrix input(4, 2);
|
||||||
|
// Matrix target(4, 1);
|
||||||
|
//
|
||||||
|
// for (int batch = 0; batch < 4; batch++) {
|
||||||
|
// for (int i = 0; i < 4; i++) {
|
||||||
|
// int v1 = (i / 2) % 2;
|
||||||
|
// int v2 = i % 2;
|
||||||
|
//
|
||||||
|
// input(i, 0) = static_cast<float>(v1);
|
||||||
|
// input(i, 1) = static_cast<float>(v2);
|
||||||
|
// target(i, 0) = static_cast<float>(v1 ^ v2);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// for (int i = 0; i < 10; i++) {
|
||||||
|
// printf("%4d | ", i + 1);
|
||||||
|
// Matrix mse = nn.learn(input, target, 0.1f * std::pow(0.99, i));
|
||||||
|
// }
|
||||||
|
|
||||||
|
for (int i = 0; i < 4 * 1000; i++) {
|
||||||
int v1 = (i / 2) % 2;
|
int v1 = (i / 2) % 2;
|
||||||
int v2 = i % 2;
|
int v2 = i % 2;
|
||||||
|
|
||||||
Matrix input(1, 2, {static_cast<float>(v1), static_cast<float>(v2)});
|
Matrix input(1, 2, {static_cast<float>(v1), static_cast<float>(v2)});
|
||||||
Matrix target(1, 3,
|
Matrix target(1, 1, static_cast<float>(v1 ^ v2));
|
||||||
{static_cast<float>(v1 ^ v2), static_cast<float>(v1 & v2),
|
|
||||||
static_cast<float>(v1 | v2)});
|
|
||||||
|
|
||||||
nn.learn(input, target);
|
printf("%5d | ", i + 1);
|
||||||
|
Matrix mse = nn.learn(input, target, 0.00003f);
|
||||||
|
if (i % 4 == 3)
|
||||||
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -35,20 +35,24 @@ protected:
|
|||||||
throw std::invalid_argument("Unknown activation type");
|
throw std::invalid_argument("Unknown activation type");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
float d_activateX(float f, Activation type, float alpha = 0.01f) {
|
float d_activateX(float x, Activation type, float alpha = 0.01f) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case Activation::LINEAR:
|
case Activation::LINEAR:
|
||||||
return 1.0f;
|
return 1.0f;
|
||||||
case Activation::SIGMOID:
|
case Activation::SIGMOID: {
|
||||||
return f * (1.0f - f);
|
float sigmoid = 1.0f / (1.0f + std::exp(-x));
|
||||||
case Activation::TANH:
|
return sigmoid * (1.0f - sigmoid);
|
||||||
return 1.0f - f * f;
|
}
|
||||||
|
case Activation::TANH: {
|
||||||
|
float tanh_x = std::tanh(x);
|
||||||
|
return 1.0f - tanh_x * tanh_x;
|
||||||
|
}
|
||||||
case Activation::RELU:
|
case Activation::RELU:
|
||||||
return (f > 0.0f) ? 1.0f : 0.0f;
|
return (x > 0.0f) ? 1.0f : 0.0f;
|
||||||
case Activation::LEAKY_RELU:
|
case Activation::LEAKY_RELU:
|
||||||
return (f > 0.0f) ? 1.0f : alpha;
|
return (x > 0.0f) ? 1.0f : alpha;
|
||||||
case Activation::ELU:
|
case Activation::ELU:
|
||||||
return (f > 0.0f) ? 1.0f : f + alpha;
|
return (x > 0.0f) ? 1.0f : alpha * std::exp(x);
|
||||||
default:
|
default:
|
||||||
throw std::invalid_argument("Unknown activation type");
|
throw std::invalid_argument("Unknown activation type");
|
||||||
}
|
}
|
||||||
@@ -72,6 +76,13 @@ public:
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
T mult(const T &a, const T &b) override {
|
||||||
|
this->validateSameDimensions(a, b);
|
||||||
|
T result(a.getShape(), false);
|
||||||
|
for (size_t i = 0; i < a.getSize(); ++i)
|
||||||
|
result[i] = a[i] * b[i];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
T mult(const T &t, float x) override {
|
T mult(const T &t, float x) override {
|
||||||
T result(t.getShape(), false);
|
T result(t.getShape(), false);
|
||||||
for (size_t i = 0; i < t.getSize(); ++i)
|
for (size_t i = 0; i < t.getSize(); ++i)
|
||||||
@@ -116,19 +127,21 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Tensor2 dot(const Tensor2 &a, const Tensor2 &b, bool transpose = false,
|
Tensor2 dot(const Tensor2 &a, const Tensor2 &b, bool transpose_a = false,
|
||||||
const Vector *bias = nullptr,
|
bool transpose_b = false, const Vector *bias = nullptr,
|
||||||
Activation type = Activation::LINEAR,
|
Activation type = Activation::LINEAR,
|
||||||
float alpha = 0.01f) override {
|
float alpha = 0.01f) override {
|
||||||
validateMultDimensions(a, b, transpose);
|
validateMultDimensions(a, b, transpose_a, transpose_b);
|
||||||
if (bias != nullptr)
|
if (bias != nullptr)
|
||||||
validateBiasDimensions(b, *bias, transpose);
|
validateBiasDimensions(b, *bias, transpose_b);
|
||||||
Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), 0.0f);
|
Tensor2 result(transpose_a ? a.getCols() : a.getRows(),
|
||||||
|
transpose_b ? b.getRows() : b.getCols(), 0.0f);
|
||||||
for (int i = 0; i < result.getRows(); ++i) {
|
for (int i = 0; i < result.getRows(); ++i) {
|
||||||
for (int j = 0; j < result.getCols(); ++j) {
|
for (int j = 0; j < result.getCols(); ++j) {
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
for (int k = 0; k < a.getCols(); ++k)
|
for (int k = 0; k < a.getCols(); ++k)
|
||||||
sum += a(i, k) * (transpose ? b(j, k) : b(k, j));
|
sum += (transpose_a ? a(k, i) : a(i, k)) *
|
||||||
|
(transpose_b ? b(j, k) : b(k, j));
|
||||||
result(i, j) =
|
result(i, j) =
|
||||||
activateX(sum + (bias == nullptr ? 0.0f : (*bias)(j)), type, alpha);
|
activateX(sum + (bias == nullptr ? 0.0f : (*bias)(j)), type, alpha);
|
||||||
}
|
}
|
||||||
@@ -154,6 +167,17 @@ public:
|
|||||||
throw std::invalid_argument("Unknown loss type");
|
throw std::invalid_argument("Unknown loss type");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Tensor1 axis_sum(const Tensor2 &m) override {
|
||||||
|
Tensor1 result(m.getCols(), 0.0f);
|
||||||
|
for (int i = 0; i < m.getCols(); ++i) {
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (int j = 0; j < m.getRows(); ++j)
|
||||||
|
sum += m(j, i);
|
||||||
|
result(i) = sum;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class Tensor3Math : public TensorMath<Tensor3>, public ITensor3Math<Tensor3> {};
|
class Tensor3Math : public TensorMath<Tensor3>, public ITensor3Math<Tensor3> {};
|
||||||
|
|||||||
@@ -172,8 +172,6 @@ public:
|
|||||||
|
|
||||||
float &operator()(int i) { return data[i]; }
|
float &operator()(int i) { return data[i]; }
|
||||||
const float &operator()(int i) const { return data[i]; }
|
const float &operator()(int i) const { return data[i]; }
|
||||||
|
|
||||||
int getSize() const override { return shape[0]; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class Tensor2 : public ITensor2, public Tensor {
|
class Tensor2 : public ITensor2, public Tensor {
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ public:
|
|||||||
virtual T activate(const T &m, Activation type, float alpha) = 0;
|
virtual T activate(const T &m, Activation type, float alpha) = 0;
|
||||||
virtual T d_activate(const T &m, Activation type, float alpha) = 0;
|
virtual T d_activate(const T &m, Activation type, float alpha) = 0;
|
||||||
|
|
||||||
|
virtual T mult(const T &a, const T &b) = 0;
|
||||||
virtual T mult(const T &m, float x) = 0;
|
virtual T mult(const T &m, float x) = 0;
|
||||||
virtual T add(const T &a, const T &b, float x) = 0;
|
virtual T add(const T &a, const T &b, float x) = 0;
|
||||||
virtual T add(const T &m, float x) = 0;
|
virtual T add(const T &m, float x) = 0;
|
||||||
@@ -47,24 +48,26 @@ template <ITensor1Type T> class ITensor1Math {};
|
|||||||
|
|
||||||
template <ITensor2Type M, ITensor1Type V> class ITensor2Math {
|
template <ITensor2Type M, ITensor1Type V> class ITensor2Math {
|
||||||
public:
|
public:
|
||||||
virtual M dot(const M &a, const M &b, bool transpose, const V *bias,
|
virtual M dot(const M &a, const M &b, bool transpose_a, bool transpose_b,
|
||||||
Activation type, float alpha) = 0;
|
const V *bias, Activation type, float alpha) = 0;
|
||||||
|
|
||||||
virtual M loss(const M &a, const M &b, Loss type) = 0;
|
virtual M loss(const M &a, const M &b, Loss type) = 0;
|
||||||
virtual M d_loss(const M &a, const M &b, Loss type) = 0;
|
virtual M d_loss(const M &a, const M &b, Loss type) = 0;
|
||||||
|
|
||||||
void validateMultDimensions(const M &a, const M &b, bool transpose) const {
|
virtual V axis_sum(const M &m) = 0;
|
||||||
if ((!transpose && a.getCols() != b.getRows()) ||
|
|
||||||
(transpose && a.getCols() != b.getCols())) {
|
void validateMultDimensions(const M &a, const M &b, bool transpose_a,
|
||||||
|
bool transpose_b) const {
|
||||||
|
int a_cols = transpose_a ? a.getRows() : a.getCols();
|
||||||
|
int b_rows = transpose_b ? b.getCols() : b.getRows();
|
||||||
|
if (a_cols != b_rows)
|
||||||
throw std::invalid_argument(
|
throw std::invalid_argument(
|
||||||
"Invalid matrix dimensions for multiplication");
|
"Invalid matrix dimensions for multiplication");
|
||||||
}
|
|
||||||
};
|
};
|
||||||
void validateBiasDimensions(const M &a, const V &b, bool transpose) const {
|
void validateBiasDimensions(const M &a, const V &b, bool transpose) const {
|
||||||
if ((!transpose && a.getCols() != b.getSize()) ||
|
if ((!transpose && a.getCols() != b.getSize()) ||
|
||||||
(transpose && a.getRows() != b.getSize())) {
|
(transpose && a.getRows() != b.getSize()))
|
||||||
throw std::invalid_argument("Invalid matrix bias");
|
throw std::invalid_argument("Invalid matrix bias");
|
||||||
}
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -50,10 +50,7 @@ public:
|
|||||||
|
|
||||||
class ITensor0 {};
|
class ITensor0 {};
|
||||||
|
|
||||||
class ITensor1 {
|
class ITensor1 {};
|
||||||
public:
|
|
||||||
virtual int getSize() const = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ITensor2 {
|
class ITensor2 {
|
||||||
public:
|
public:
|
||||||
|
|||||||
Reference in New Issue
Block a user