mirror of
https://github.com/StepanovPlaton/NeuralNetwork.git
synced 2026-04-03 20:30:39 +04:00
Fixes for backpropogation
This commit is contained in:
160
src/main.cpp
160
src/main.cpp
@@ -38,7 +38,7 @@ public:
|
|||||||
: Layer(layer), inputFeatures(inputFeatures),
|
: Layer(layer), inputFeatures(inputFeatures),
|
||||||
weights(layer.getOuputFeatures(), inputFeatures) {}
|
weights(layer.getOuputFeatures(), inputFeatures) {}
|
||||||
ConnectedLayer(const Layer &a, const Layer &b)
|
ConnectedLayer(const Layer &a, const Layer &b)
|
||||||
: ConnectedLayer(b.getOuputFeatures(), a) {}
|
: ConnectedLayer(a.getOuputFeatures(), b) {}
|
||||||
|
|
||||||
int getInputFeatures() const { return inputFeatures; }
|
int getInputFeatures() const { return inputFeatures; }
|
||||||
const Matrix &getWeights() const { return weights; }
|
const Matrix &getWeights() const { return weights; }
|
||||||
@@ -47,7 +47,6 @@ public:
|
|||||||
|
|
||||||
class LearnLayer : public ConnectedLayer {
|
class LearnLayer : public ConnectedLayer {
|
||||||
protected:
|
protected:
|
||||||
// Matrix gradients;
|
|
||||||
Matrix internal;
|
Matrix internal;
|
||||||
Matrix outputs;
|
Matrix outputs;
|
||||||
|
|
||||||
@@ -57,7 +56,7 @@ public:
|
|||||||
internal(layer.getOuputFeatures(), inputFeatures, false),
|
internal(layer.getOuputFeatures(), inputFeatures, false),
|
||||||
outputs(layer.getOuputFeatures(), inputFeatures, false) {}
|
outputs(layer.getOuputFeatures(), inputFeatures, false) {}
|
||||||
LearnLayer(const Layer &a, const Layer &b)
|
LearnLayer(const Layer &a, const Layer &b)
|
||||||
: LearnLayer(b.getOuputFeatures(), a) {}
|
: LearnLayer(a.getOuputFeatures(), b) {}
|
||||||
|
|
||||||
const Matrix &getInternal() const { return internal; }
|
const Matrix &getInternal() const { return internal; }
|
||||||
const Matrix &getOutputs() const { return outputs; }
|
const Matrix &getOutputs() const { return outputs; }
|
||||||
@@ -101,16 +100,16 @@ public:
|
|||||||
// employ back
|
// employ back
|
||||||
layers.push_back(LearnLayer(inputFeatures, l[0]));
|
layers.push_back(LearnLayer(inputFeatures, l[0]));
|
||||||
for (size_t i = 1; i < l.size(); i++)
|
for (size_t i = 1; i < l.size(); i++)
|
||||||
layers.push_back(LearnLayer(l[i - 1].getOuputFeatures(), l[i]));
|
layers.push_back(LearnLayer(l[i - 1], l[i]));
|
||||||
}
|
}
|
||||||
|
|
||||||
Matrix learn(Matrix inputs, Matrix target, float speed = 1.0f) {
|
Matrix learn(Matrix inputs, Matrix target, float speed = 1.0f) {
|
||||||
MatrixMath mm;
|
MatrixMath mm;
|
||||||
VectorMath vm;
|
VectorMath vm;
|
||||||
for (size_t i = 0; i < layers.size(); i++) {
|
for (size_t i = 0; i < layers.size(); i++) {
|
||||||
layers[i].setInternal(mm.dot(i == 0 ? inputs : layers[i - 1].getOutputs(),
|
layers[i].setInternal(mm.dot(layers[i].getWeights(),
|
||||||
layers[i].getWeights(), false, true,
|
i == 0 ? inputs : layers[i - 1].getOutputs(),
|
||||||
&layers[i].getBias()));
|
false, false, &layers[i].getBias()));
|
||||||
layers[i].setOutputs(mm.activate(layers[i].getInternal(),
|
layers[i].setOutputs(mm.activate(layers[i].getInternal(),
|
||||||
layers[i].getActivation(),
|
layers[i].getActivation(),
|
||||||
layers[i].getAlpha()));
|
layers[i].getAlpha()));
|
||||||
@@ -120,17 +119,26 @@ public:
|
|||||||
std::vector<float> io = inputs.toVector();
|
std::vector<float> io = inputs.toVector();
|
||||||
std::cout << "I: ";
|
std::cout << "I: ";
|
||||||
for (size_t i = 0; i < io.size(); ++i)
|
for (size_t i = 0; i < io.size(); ++i)
|
||||||
printf("%5.3f ", io[i]);
|
printf("%4.2f ", io[i]);
|
||||||
|
|
||||||
|
std::vector<float> ni = layers[layers.size() - 1].getInternal().toVector();
|
||||||
|
std::cout << "| NNI: ";
|
||||||
|
for (size_t i = 0; i < ni.size(); ++i)
|
||||||
|
printf("%4.2f ", ni[i]);
|
||||||
|
|
||||||
std::vector<float> no = layers[layers.size() - 1].getOutputs().toVector();
|
std::vector<float> no = layers[layers.size() - 1].getOutputs().toVector();
|
||||||
std::cout << "| NN: ";
|
std::cout << "| NNO: ";
|
||||||
for (size_t i = 0; i < no.size(); ++i)
|
for (size_t i = 0; i < no.size(); ++i)
|
||||||
printf("%5.3f ", no[i]);
|
printf("%4.2f ", no[i]);
|
||||||
|
|
||||||
std::vector<float> to = target.toVector();
|
std::vector<float> to = target.toVector();
|
||||||
std::cout << "| T: ";
|
std::cout << "| T: ";
|
||||||
for (size_t i = 0; i < to.size(); ++i)
|
for (size_t i = 0; i < to.size(); ++i)
|
||||||
printf("%5.3f ", to[i]);
|
printf("%4.2f ", to[i]);
|
||||||
|
|
||||||
Matrix mse =
|
Matrix mse =
|
||||||
mm.loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE);
|
mm.loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE);
|
||||||
|
|
||||||
std::vector<float> lo = mse.toVector();
|
std::vector<float> lo = mse.toVector();
|
||||||
std::cout << "| L: ";
|
std::cout << "| L: ";
|
||||||
for (size_t i = 0; i < lo.size(); ++i)
|
for (size_t i = 0; i < lo.size(); ++i)
|
||||||
@@ -139,25 +147,48 @@ public:
|
|||||||
|
|
||||||
Matrix dAnl =
|
Matrix dAnl =
|
||||||
mm.d_loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE);
|
mm.d_loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE);
|
||||||
|
|
||||||
for (int i = layers.size() - 1; i >= 0; --i) {
|
for (int i = layers.size() - 1; i >= 0; --i) {
|
||||||
|
printf("=== Layer %d ===\n", i + 1);
|
||||||
|
printf("dAnl: ");
|
||||||
|
dAnl.print();
|
||||||
|
|
||||||
Matrix dZl = mm.mult(dAnl, mm.d_activate(layers[i].getInternal()));
|
Matrix dZl = mm.mult(dAnl, mm.d_activate(layers[i].getInternal()));
|
||||||
Matrix dWl = mm.mult(
|
printf("dZl: ");
|
||||||
mm.dot(dZl, i == 0 ? inputs : layers[i - 1].getOutputs(), true),
|
dZl.print();
|
||||||
1.0f / (float)inputs.getRows());
|
|
||||||
|
Matrix dWl =
|
||||||
|
mm.mult(mm.dot(dZl, i == 0 ? inputs : layers[i - 1].getOutputs(),
|
||||||
|
false, true),
|
||||||
|
1.0f / (float)inputs.getRows());
|
||||||
|
printf("dWl: ");
|
||||||
|
dWl.print();
|
||||||
|
|
||||||
Vector dbl = mm.axis_sum(mm.mult(dZl, 1.0f / (float)inputs.getRows()));
|
Vector dbl = mm.axis_sum(mm.mult(dZl, 1.0f / (float)inputs.getRows()));
|
||||||
dAnl = mm.dot(dZl, layers[i].getWeights(), false, false); // false true?!
|
printf("dbl: ");
|
||||||
|
dbl.print();
|
||||||
|
|
||||||
|
dAnl = mm.dot(layers[i].getWeights(), dZl, true); // false true?!
|
||||||
|
|
||||||
mm.await();
|
mm.await();
|
||||||
|
|
||||||
layers[i].setWeights(mm.add(layers[i].getWeights(), dWl, -speed));
|
layers[i].setWeights(mm.add(layers[i].getWeights(), dWl, -speed));
|
||||||
|
printf("Weights %d: ", i + 1);
|
||||||
|
layers[i].getWeights().print();
|
||||||
|
|
||||||
layers[i].setBias(
|
layers[i].setBias(
|
||||||
vm.add(layers[i].getBias(), dbl, -speed / (float)inputs.getRows()));
|
vm.add(layers[i].getBias(), dbl, -speed / (float)inputs.getRows()));
|
||||||
|
printf("Bias %d: ", i + 1);
|
||||||
|
layers[i].getBias().print();
|
||||||
}
|
}
|
||||||
|
|
||||||
return mse;
|
return mse;
|
||||||
}
|
}
|
||||||
|
|
||||||
const LearnLayer &getLayer(int i) const { return layers[i]; }
|
const LearnLayer &getLayer(int i) const { return layers[i]; }
|
||||||
|
|
||||||
|
// delete
|
||||||
|
LearnLayer &getLayer(int i) { return layers[i]; }
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef NOGPU
|
#ifndef NOGPU
|
||||||
@@ -165,41 +196,80 @@ OpenCL openCL;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
// LearnNerualNetrowk nn(
|
||||||
|
// 3, {Layer(3, Activation::SIGMOID), Layer(3, Activation::SIGMOID)});
|
||||||
|
//
|
||||||
|
// Matrix weights1(3, 3,
|
||||||
|
// {0.88f, 0.39f, 0.9f, 0.37f, 0.14f, 0.41f, 0.96f, 0.5f,
|
||||||
|
// 0.6f});
|
||||||
|
// Matrix weights2(
|
||||||
|
// 3, 3, {0.29f, 0.57f, 0.36f, 0.73f, 0.53f, 0.68f, 0.01f, 0.02f, 0.58f});
|
||||||
|
//
|
||||||
|
// Vector bias1(std::vector<float>{0.23f, 0.89f, 0.08f});
|
||||||
|
// Vector bias2(std::vector<float>{0.78f, 0.83f, 0.8f});
|
||||||
|
//
|
||||||
|
// nn.getLayer(0).setWeights(weights1);
|
||||||
|
// nn.getLayer(0).setBias(bias1);
|
||||||
|
//
|
||||||
|
// nn.getLayer(1).setWeights(weights2);
|
||||||
|
// nn.getLayer(1).setBias(bias2);
|
||||||
|
//
|
||||||
|
// std::cout << std::endl;
|
||||||
|
//
|
||||||
|
// Matrix input(3, 1, {0.03f, 0.72f, 0.49f});
|
||||||
|
// Matrix target(3, 1, {0.93f, 0.74f, 0.17f});
|
||||||
|
//
|
||||||
|
// // for (int i = 0; i < 1000; i++)
|
||||||
|
// nn.learn(input, target, 0.01f);
|
||||||
|
|
||||||
LearnNerualNetrowk nn(
|
LearnNerualNetrowk nn(
|
||||||
2, {Layer(2, Activation::TANH), Layer(1, Activation::SIGMOID)});
|
2, {Layer(3, Activation::SIGMOID), Layer(1, Activation::SIGMOID)});
|
||||||
std::cout << std::endl;
|
|
||||||
|
|
||||||
// Matrix input(4, 2);
|
Matrix input(2, 4);
|
||||||
// Matrix target(4, 1);
|
Matrix target(1, 4);
|
||||||
//
|
|
||||||
// for (int batch = 0; batch < 4; batch++) {
|
|
||||||
// for (int i = 0; i < 4; i++) {
|
|
||||||
// int v1 = (i / 2) % 2;
|
|
||||||
// int v2 = i % 2;
|
|
||||||
//
|
|
||||||
// input(i, 0) = static_cast<float>(v1);
|
|
||||||
// input(i, 1) = static_cast<float>(v2);
|
|
||||||
// target(i, 0) = static_cast<float>(v1 ^ v2);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// for (int i = 0; i < 10; i++) {
|
|
||||||
// printf("%4d | ", i + 1);
|
|
||||||
// Matrix mse = nn.learn(input, target, 0.1f * std::pow(0.99, i));
|
|
||||||
// }
|
|
||||||
|
|
||||||
for (int i = 0; i < 4 * 1000; i++) {
|
float min = 100.0f;
|
||||||
int v1 = (i / 2) % 2;
|
for (int batch = 0; batch < 4; batch++) {
|
||||||
int v2 = i % 2;
|
for (int i = 0; i < 4; i++) {
|
||||||
|
int v1 = (i / 2) % 2;
|
||||||
|
int v2 = i % 2;
|
||||||
|
|
||||||
Matrix input(1, 2, {static_cast<float>(v1), static_cast<float>(v2)});
|
input(0, i) = static_cast<float>(v1);
|
||||||
Matrix target(1, 1, static_cast<float>(v1 ^ v2));
|
input(1, i) = static_cast<float>(v2);
|
||||||
|
target(0, i) = static_cast<float>(v1 ^ v2);
|
||||||
printf("%5d | ", i + 1);
|
}
|
||||||
Matrix mse = nn.learn(input, target, 0.00003f);
|
|
||||||
if (i % 4 == 3)
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 1000; i++) {
|
||||||
|
printf("%4d | ", i + 1);
|
||||||
|
Matrix mse = nn.learn(input, target, 0.0001f * std::pow(0.99f, i));
|
||||||
|
std::vector<float> lv = mse.toVector();
|
||||||
|
float loss = 0.0f;
|
||||||
|
for (size_t i = 0; i < lv.size(); ++i)
|
||||||
|
loss += lv[i];
|
||||||
|
if (loss < min)
|
||||||
|
min = loss;
|
||||||
|
}
|
||||||
|
std::cout << min << std::endl;
|
||||||
|
|
||||||
|
// LearnNerualNetrowk nn(
|
||||||
|
// 2, {Layer(3, Activation::SIGMOID), Layer(1, Activation::SIGMOID)});
|
||||||
|
// float min = 100.0f;
|
||||||
|
// for (int i = 0; i < 4 * 10000; i++) {
|
||||||
|
// int v1 = (i / 2) % 2;
|
||||||
|
// int v2 = i % 2;
|
||||||
|
//
|
||||||
|
// Matrix input(2, 1, {static_cast<float>(v1), static_cast<float>(v2)});
|
||||||
|
// Matrix target(1, 1, static_cast<float>(v1 ^ v2));
|
||||||
|
//
|
||||||
|
// printf("%5d | ", i + 1);
|
||||||
|
// Matrix mse = nn.learn(input, target, 0.0001f * std::pow(0.95f, i));
|
||||||
|
// if (i % 4 == 3)
|
||||||
|
// std::cout << std::endl;
|
||||||
|
// if (mse[0] < min)
|
||||||
|
// min = mse[0];
|
||||||
|
// }
|
||||||
|
// std::cout << min << std::endl;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -116,13 +116,13 @@ private:
|
|||||||
Tensor2 mse(const Tensor2 &a, const Tensor2 &b) {
|
Tensor2 mse(const Tensor2 &a, const Tensor2 &b) {
|
||||||
Tensor2 result(a.getShape(), false);
|
Tensor2 result(a.getShape(), false);
|
||||||
for (size_t i = 0; i < result.getSize(); ++i)
|
for (size_t i = 0; i < result.getSize(); ++i)
|
||||||
result[i] += (a[i] - b[i]) * (a[i] - b[i]) / (float)a.getCols();
|
result[i] = (a[i] - b[i]) * (a[i] - b[i]) / (float)a.getCols();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
Tensor2 dmse(const Tensor2 &a, const Tensor2 &b) {
|
Tensor2 d_mse(const Tensor2 &a, const Tensor2 &b) {
|
||||||
Tensor2 result(a.getShape(), false);
|
Tensor2 result(a.getShape(), false);
|
||||||
for (size_t i = 0; i < result.getSize(); ++i)
|
for (size_t i = 0; i < result.getSize(); ++i)
|
||||||
result[i] += 2 * (a[i] - b[i]) / (float)a.getCols();
|
result[i] = 2 * (a[i] - b[i]) / (float)a.getCols();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ public:
|
|||||||
float alpha = 0.01f) override {
|
float alpha = 0.01f) override {
|
||||||
validateMultDimensions(a, b, transpose_a, transpose_b);
|
validateMultDimensions(a, b, transpose_a, transpose_b);
|
||||||
if (bias != nullptr)
|
if (bias != nullptr)
|
||||||
validateBiasDimensions(b, *bias, transpose_b);
|
validateBiasDimensions(a, *bias, transpose_a);
|
||||||
Tensor2 result(transpose_a ? a.getCols() : a.getRows(),
|
Tensor2 result(transpose_a ? a.getCols() : a.getRows(),
|
||||||
transpose_b ? b.getRows() : b.getCols(), 0.0f);
|
transpose_b ? b.getRows() : b.getCols(), 0.0f);
|
||||||
for (int i = 0; i < result.getRows(); ++i) {
|
for (int i = 0; i < result.getRows(); ++i) {
|
||||||
@@ -143,7 +143,7 @@ public:
|
|||||||
sum += (transpose_a ? a(k, i) : a(i, k)) *
|
sum += (transpose_a ? a(k, i) : a(i, k)) *
|
||||||
(transpose_b ? b(j, k) : b(k, j));
|
(transpose_b ? b(j, k) : b(k, j));
|
||||||
result(i, j) =
|
result(i, j) =
|
||||||
activateX(sum + (bias == nullptr ? 0.0f : (*bias)(j)), type, alpha);
|
activateX(sum + (bias == nullptr ? 0.0f : (*bias)(i)), type, alpha);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
@@ -162,18 +162,18 @@ public:
|
|||||||
this->validateSameDimensions(a, b);
|
this->validateSameDimensions(a, b);
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case Loss::MSE:
|
case Loss::MSE:
|
||||||
return dmse(a, b);
|
return d_mse(a, b);
|
||||||
default:
|
default:
|
||||||
throw std::invalid_argument("Unknown loss type");
|
throw std::invalid_argument("Unknown loss type");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor1 axis_sum(const Tensor2 &m) override {
|
Tensor1 axis_sum(const Tensor2 &m) override {
|
||||||
Tensor1 result(m.getCols(), 0.0f);
|
Tensor1 result(m.getRows(), 0.0f);
|
||||||
for (int i = 0; i < m.getCols(); ++i) {
|
for (int i = 0; i < m.getRows(); ++i) {
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
for (int j = 0; j < m.getRows(); ++j)
|
for (int j = 0; j < m.getCols(); ++j)
|
||||||
sum += m(j, i);
|
sum += m(i, j);
|
||||||
result(i) = sum;
|
result(i) = sum;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
|||||||
@@ -64,9 +64,9 @@ public:
|
|||||||
throw std::invalid_argument(
|
throw std::invalid_argument(
|
||||||
"Invalid matrix dimensions for multiplication");
|
"Invalid matrix dimensions for multiplication");
|
||||||
};
|
};
|
||||||
void validateBiasDimensions(const M &a, const V &b, bool transpose) const {
|
void validateBiasDimensions(const M &m, const V &v, bool transpose) const {
|
||||||
if ((!transpose && a.getCols() != b.getSize()) ||
|
if ((transpose && (size_t)m.getCols() != v.getSize()) ||
|
||||||
(transpose && a.getRows() != b.getSize()))
|
(!transpose && (size_t)m.getRows() != v.getSize()))
|
||||||
throw std::invalid_argument("Invalid matrix bias");
|
throw std::invalid_argument("Invalid matrix bias");
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user