diff --git a/README.md b/README.md
index c3ae23a..0ac0300 100644
--- a/README.md
+++ b/README.md
@@ -1,21 +1,34 @@
 # Neural Network ++
-> Neural Network++ - это движок для создания нейронных сетей написанный на С++
+
+> Neural Network++ - это движок для ~путешествия в Мордор~ создания нейронных сетей написанный на С++
 
 ## Стек:
+
 - [C++ 23](https://ru.wikipedia.org/wiki/C%2B%2B23)
 - [OpenCL](https://ru.wikipedia.org/wiki/OpenCL)
 - **Всё!** :wink:
 
 ## О проекте:
+
 - Движок для создания нейронных сетей
-- Классы с классической математикой для расчётов на CPU
-- Классы с ускорением на GPU с использованием OpenCL
+- Поддерка вычислений [на CPU](./math/tensor/cpu) или [на GPU](./math/tensor/cpu)
+  - Полиморные пространства имён CPU и GPU соответственно
+  - [Алгоритмы с массовым параллелизмом на GPU](./kernels) для ускорения
+  - Классические алгоритмы на CPU для проверки
+- [Класс Tensor](./math/tensor/tensor.hpp) для работы с тензорами N-ой размерности и [классы Scalar, Vector, Matrix и Tensor3](./math/tensor/tensor.hpp) с размерно-специфичной логикой
+- [Классы ScalarMath, VectorMath, MatrixMath, Tensor3Math](./math/tensor/math.hpp) с базовыми математическими функциями
 
 ## Запуск:
+
 - **Windows:**
   ```
-    make
     make run
   ```
 
+## Forward & Back propogation - это путешествие в Мордор и обратно!
+
+![back_propogation.png]()
+
+> Верная смерть. Никаких шансов на успех. Так чего же мы ждём?!
+
 ### Над проектом работали [StepanovPlaton](https://github.com/StepanovPlaton) и [Fluorouacil](https://github.com/Fluorouacil)!
diff --git a/back_propogation.png b/back_propogation.png
new file mode 100644
index 0000000..e5bd019
Binary files /dev/null and b/back_propogation.png differ
diff --git a/src/.clangd b/src/.clangd
new file mode 100644
index 0000000..d7e5726
--- /dev/null
+++ b/src/.clangd
@@ -0,0 +1,9 @@
+CompileFlags:
+  Add:
+    - -std=c++23
+    - -Wall
+    - -Wextra
+    - -Wpedantic
+  Remove: []
+Diagnostics:
+  UnusedIncludes: Strict
diff --git a/src/kernels/matrix.cl b/src/kernels/matrix.cl
index d8ca0ad..8d4b7a9 100644
--- a/src/kernels/matrix.cl
+++ b/src/kernels/matrix.cl
@@ -12,9 +12,6 @@ float activate_x(float x, const int activation_type, const float alpha) {
     return (x > 0.0f) ? x : alpha * x;
   case 5: // ELU
     return (x > 0.0f) ? x : alpha * (exp(x) - 1.0f);
-  case 6: // GELU
-    return 0.5f * x *
-           (1.0f + tanh(sqrt(2.0f / M_PI_F) * (x + 0.044715f * x * x * x)));
   default:
     return x;
   }
diff --git a/src/main b/src/main
new file mode 100755
index 0000000..cc9cc83
Binary files /dev/null and b/src/main differ
diff --git a/src/main.cpp b/src/main.cpp
index 0c3d361..1bea81d 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,9 +1,12 @@
-#include "./math/math.hpp"
+#define NOGPU
 
-#include <chrono>
-#include <thread>
+#include "math/math.hpp"
 
+#ifdef NOGPU
+using namespace CPU;
+#else
 using namespace GPU;
+#endif
 
 class Layer {
 protected:
@@ -13,10 +16,9 @@ protected:
   float alpha;
 
 public:
-  Layer(int outputFeatures, Activation activation, Vector bias,
-        float alpha = 0.0f)
-      : outputFeatures(outputFeatures), bias(bias), activation(activation),
-        alpha(alpha) {}
+  Layer(int outputFeatures, Activation activation, float alpha = 0.0f)
+      : outputFeatures(outputFeatures), bias(outputFeatures),
+        activation(activation), alpha(alpha) {}
 
   int getOuputFeatures() const { return outputFeatures; }
   Activation getActivation() const { return activation; }
@@ -30,16 +32,10 @@ protected:
   int inputFeatures;
   Matrix weights;
 
-  // Matrix gradients;
-  Matrix internal;
-  Matrix outputs;
-
 public:
   ConnectedLayer(int inputFeatures, const Layer &layer)
       : Layer(layer), inputFeatures(inputFeatures),
-        weights(layer.getOuputFeatures(), inputFeatures),
-        internal(layer.getOuputFeatures(), inputFeatures, false),
-        outputs(layer.getOuputFeatures(), inputFeatures, false) {}
+        weights(layer.getOuputFeatures(), inputFeatures) {}
   ConnectedLayer(const Layer &a, const Layer &b)
       : ConnectedLayer(b.getOuputFeatures(), a) {}
 
@@ -47,6 +43,26 @@ public:
   const Matrix &getWeights() const { return weights; }
 };
 
+class LearnLayer : public ConnectedLayer {
+protected:
+  // Matrix gradients;
+  Matrix internal;
+  Matrix outputs;
+
+public:
+  LearnLayer(int inputFeatures, const Layer &layer)
+      : ConnectedLayer(inputFeatures, layer),
+        internal(layer.getOuputFeatures(), inputFeatures, false),
+        outputs(layer.getOuputFeatures(), inputFeatures, false) {}
+  LearnLayer(const Layer &a, const Layer &b)
+      : LearnLayer(b.getOuputFeatures(), a) {}
+
+  const Matrix &getInternal() const { return internal; }
+  const Matrix &getOutputs() const { return outputs; }
+  void setInternal(const Matrix &i) { internal = i; }
+  void setOutputs(const Matrix &o) { outputs = o; }
+};
+
 class NeuralNetwork {
 private:
   std::vector<ConnectedLayer> layers;
@@ -60,62 +76,95 @@ public:
   }
 
   Matrix predict(Matrix inputs) {
-    MatrixMath mm;
-    std::vector<Matrix> steps;
-    steps.push_back(inputs);
-    for (size_t i = 0; i < layers.size(); i++) {
-      Matrix internal = mm.mult(steps[steps.size() - 1], layers[i].getWeights(),
-                                true, &layers[i].getBias());
-      Matrix output = mm.activate(internal, layers[i].getActivation(),
-                                  layers[i].getAlpha());
-      steps.push_back(output);
-    }
-    mm.await();
-    return steps[steps.size() - 1];
-  }
-
-  Matrix training(Matrix inputs) {
     MatrixMath mm;
     std::vector<Matrix> steps;
     steps.push_back(inputs);
     for (size_t i = 0; i < layers.size(); i++)
-      steps.push_back(mm.mult(steps[steps.size() - 1], layers[i].getWeights(),
-                              true, &layers[i].getBias(),
-                              layers[i].getActivation(), layers[i].getAlpha()));
+      steps.push_back(mm.dot(steps[steps.size() - 1], layers[i].getWeights(),
+                             true, &layers[i].getBias(),
+                             layers[i].getActivation(), layers[i].getAlpha()));
     mm.await();
     return steps[steps.size() - 1];
   }
 
-  const Layer &getLayer(int i) const { return layers[i]; }
+  const ConnectedLayer &getLayer(int i) const { return layers[i]; }
 };
 
+class LearnNerualNetrowk {
+private:
+  std::vector<LearnLayer> layers;
+
+public:
+  LearnNerualNetrowk(int inputFeatures, std::vector<Layer> l) {
+    // employ back
+    layers.push_back(LearnLayer(inputFeatures, l[0]));
+    for (size_t i = 1; i < l.size(); i++)
+      layers.push_back(LearnLayer(l[i - 1].getOuputFeatures(), l[i]));
+  }
+
+  Matrix learn(Matrix inputs, Matrix target) {
+    MatrixMath mm;
+    for (size_t i = 0; i < layers.size(); i++) {
+      layers[i].setInternal(mm.dot(i == 0 ? inputs : layers[i - 1].getOutputs(),
+                                   layers[i].getWeights(), true,
+                                   &layers[i].getBias()));
+      layers[i].setOutputs(mm.activate(layers[i].getInternal(),
+                                       layers[i].getActivation(),
+                                       layers[i].getAlpha()));
+    }
+    mm.await();
+
+    std::vector<float> io = inputs.toVector();
+    std::cout << "I: ";
+    for (size_t i = 0; i < io.size(); ++i)
+      printf("%5.3f ", io[i]);
+    std::vector<float> no = layers[layers.size() - 1].getOutputs().toVector();
+    std::cout << "| NN: ";
+    for (size_t i = 0; i < no.size(); ++i)
+      printf("%5.3f ", no[i]);
+    std::vector<float> to = target.toVector();
+    std::cout << "| T: ";
+    for (size_t i = 0; i < to.size(); ++i)
+      printf("%5.3f ", to[i]);
+    Matrix mse =
+        mm.loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE);
+    std::vector<float> lo = mse.toVector();
+    std::cout << "| L: ";
+    for (size_t i = 0; i < lo.size(); ++i)
+      printf("%5.3f ", lo[i]);
+    std::cout << std::endl;
+
+    // Matrix dA2 =
+    //     mm.d_loss(layers[layers.size() - 1].getOutputs(), target, Loss::MSE);
+    // Matrix  = mm.dot(dA2,
+    // mm.d_activate(layers[layers.size()-1].getOutputs()));
+
+    return mse;
+  }
+
+  const LearnLayer &getLayer(int i) const { return layers[i]; }
+};
+
+#ifndef NOGPU
 OpenCL openCL;
+#endif
 
 int main() {
-  NeuralNetwork nn(
-      2, {Layer(3, Activation::SIGMOID,
-                Vector(std::vector<float>{0.0f, 0.0f, 0.0f})),
-          Layer(1, Activation::SIGMOID, Vector(std::vector<float>{0.0f}))});
+  LearnNerualNetrowk nn(
+      2, {Layer(3, Activation::SIGMOID), Layer(3, Activation::SIGMOID)});
+  std::cout << "NN created!" << std::endl;
 
   for (int i = 0; i < 4; i++) {
     int v1 = (i / 2) % 2;
     int v2 = i % 2;
 
     Matrix input(1, 2, {static_cast<float>(v1), static_cast<float>(v2)});
+    Matrix target(1, 3,
+                  {static_cast<float>(v1 ^ v2), static_cast<float>(v1 & v2),
+                   static_cast<float>(v1 | v2)});
 
-    Matrix r = nn.predict(input);
-    std::vector<float> rv = r.toVector();
-
-    std::cout << "Network: ";
-    for (size_t j = 0; j < rv.size(); ++j) {
-      printf("%f\t", rv[j]);
-    }
-
-    float expected = static_cast<float>(v1 ^ v2);
-    std::cout << " | XOR(" << v1 << ", " << v2 << ") = " << expected;
-
-    std::cout << std::endl;
+    nn.learn(input, target);
   }
 
   return 0;
-}
\ No newline at end of file
+}
diff --git a/src/math/math.hpp b/src/math/math.hpp
index 54ff3a1..23a1cae 100644
--- a/src/math/math.hpp
+++ b/src/math/math.hpp
@@ -1,7 +1,9 @@
 #pragma once
 
-#include "opencl/opencl.hpp"
-
 #include "tensor/cpu/math.hpp"
 
+#ifndef NOGPU
+#include "opencl/opencl.hpp"
+
 #include "tensor/gpu/math.hpp"
+#endif
diff --git a/src/math/tensor/cpu/math.hpp b/src/math/tensor/cpu/math.hpp
index 1d13355..efdef18 100644
--- a/src/math/tensor/cpu/math.hpp
+++ b/src/math/tensor/cpu/math.hpp
@@ -17,7 +17,7 @@ class Tensor3Math;
 
 template <ITensorType T> class TensorMath : public ITensorMath<T> {
 protected:
-  float activate_x(float x, Activation type, float alpha = 0.01f) {
+  float activateX(float x, Activation type, float alpha = 0.01f) {
     switch (type) {
     case Activation::LINEAR:
       return x;
@@ -31,10 +31,24 @@ protected:
       return (x > 0.0f) ? x : alpha * x;
     case Activation::ELU:
       return (x > 0.0f) ? x : alpha * (std::exp(x) - 1.0f);
-    case Activation::GELU:
-      return 0.5f * x *
-             (1.0f +
-              std::tanh(std::sqrt(2.0f / M_PI) * (x + 0.044715f * x * x * x)));
+    default:
+      throw std::invalid_argument("Unknown activation type");
+    }
+  }
+  float d_activateX(float f, Activation type, float alpha = 0.01f) {
+    switch (type) {
+    case Activation::LINEAR:
+      return 1.0f;
+    case Activation::SIGMOID:
+      return f * (1.0f - f);
+    case Activation::TANH:
+      return 1.0f - f * f;
+    case Activation::RELU:
+      return (f > 0.0f) ? 1.0f : 0.0f;
+    case Activation::LEAKY_RELU:
+      return (f > 0.0f) ? 1.0f : alpha;
+    case Activation::ELU:
+      return (f > 0.0f) ? 1.0f : f + alpha;
     default:
       throw std::invalid_argument("Unknown activation type");
     }
@@ -45,7 +59,15 @@ public:
              float alpha = 0.0f) override {
     T result(t.getShape(), false);
     for (size_t i = 0; i < t.getSize(); ++i) {
-      result[i] = activate_x(t[i], type, alpha);
+      result[i] = activateX(t[i], type, alpha);
+    }
+    return result;
+  }
+  T d_activate(const T &t, Activation type = Activation::LINEAR,
+               float alpha = 0.0f) override {
+    T result(t.getShape(), false);
+    for (size_t i = 0; i < t.getSize(); ++i) {
+      result[i] = d_activateX(t[i], type, alpha);
     }
     return result;
   }
@@ -79,11 +101,25 @@ class Tensor1Math : public TensorMath<Tensor1>, public ITensor1Math<Tensor1> {};
 
 class Tensor2Math : public TensorMath<Tensor2>,
                     public ITensor2Math<Tensor2, Tensor1> {
+private:
+  Tensor2 mse(const Tensor2 &a, const Tensor2 &b) {
+    Tensor2 result(a.getShape(), false);
+    for (size_t i = 0; i < result.getSize(); ++i)
+      result[i] += (a[i] - b[i]) * (a[i] - b[i]) / (float)a.getCols();
+    return result;
+  }
+  Tensor2 dmse(const Tensor2 &a, const Tensor2 &b) {
+    Tensor2 result(a.getShape(), false);
+    for (size_t i = 0; i < result.getSize(); ++i)
+      result[i] += 2 * (a[i] - b[i]) / (float)a.getCols();
+    return result;
+  }
+
 public:
-  Tensor2 mult(const Tensor2 &a, const Tensor2 &b, bool transpose = false,
-               const Vector *bias = nullptr,
-               Activation type = Activation::LINEAR,
-               float alpha = 0.01f) override {
+  Tensor2 dot(const Tensor2 &a, const Tensor2 &b, bool transpose = false,
+              const Vector *bias = nullptr,
+              Activation type = Activation::LINEAR,
+              float alpha = 0.01f) override {
     validateMultDimensions(a, b, transpose);
     if (bias != nullptr)
       validateBiasDimensions(b, *bias, transpose);
@@ -93,12 +129,31 @@ public:
         float sum = 0.0f;
         for (int k = 0; k < a.getCols(); ++k)
           sum += a(i, k) * (transpose ? b(j, k) : b(k, j));
-        result(i, j) = activate_x(sum + (bias == nullptr ? 0.0f : (*bias)(j)),
-                                  type, alpha);
+        result(i, j) =
+            activateX(sum + (bias == nullptr ? 0.0f : (*bias)(j)), type, alpha);
       }
     }
     return result;
   }
+
+  Tensor2 loss(const Tensor2 &a, const Tensor2 &b, Loss type) override {
+    this->validateSameDimensions(a, b);
+    switch (type) {
+    case Loss::MSE:
+      return mse(a, b);
+    default:
+      throw std::invalid_argument("Unknown loss type");
+    }
+  }
+  Tensor2 d_loss(const Tensor2 &a, const Tensor2 &b, Loss type) override {
+    this->validateSameDimensions(a, b);
+    switch (type) {
+    case Loss::MSE:
+      return dmse(a, b);
+    default:
+      throw std::invalid_argument("Unknown loss type");
+    }
+  }
 };
 
 class Tensor3Math : public TensorMath<Tensor3>, public ITensor3Math<Tensor3> {};
diff --git a/src/math/tensor/gpu/math.hpp b/src/math/tensor/gpu/math.hpp
index 4a68288..1cd89ac 100644
--- a/src/math/tensor/gpu/math.hpp
+++ b/src/math/tensor/gpu/math.hpp
@@ -96,8 +96,8 @@ class Tensor1Math : public TensorMath<Tensor1>, public ITensor1Math<Tensor1> {};
 class Tensor2Math : public TensorMath<Tensor2>,
                     public ITensor2Math<Tensor2, Tensor1> {
 private:
-  Tensor2 mult_tiled(const Tensor2 &a, const Tensor2 &b, bool transpose,
-                     const Vector &bias, Activation type, float alpha) {
+  Tensor2 dot_tiled(const Tensor2 &a, const Tensor2 &b, bool transpose,
+                    const Vector &bias, Activation type, float alpha) {
     Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), false,
                    &queue);
 
@@ -121,8 +121,8 @@ private:
                                global_size, local_size);
     return result;
   }
-  Tensor2 mult_small(const Tensor2 &a, const Tensor2 &b, bool transpose,
-                     const Vector &bias, Activation type, float alpha) {
+  Tensor2 dot_small(const Tensor2 &a, const Tensor2 &b, bool transpose,
+                    const Vector &bias, Activation type, float alpha) {
     Tensor2 result(a.getRows(), transpose ? b.getRows() : b.getCols(), false,
                    &queue);
     kernels[Method::MULT_SMALL].setArg(0, *a.getBuffer());
@@ -141,21 +141,21 @@ private:
   }
 
 public:
-  Tensor2 mult(const Tensor2 &a, const Tensor2 &b, bool transpose = false,
-               const Vector *bias = nullptr,
-               Activation type = Activation::LINEAR,
-               float alpha = 0.01f) override {
+  Tensor2 dot(const Tensor2 &a, const Tensor2 &b, bool transpose = false,
+              const Vector *bias = nullptr,
+              Activation type = Activation::LINEAR,
+              float alpha = 0.01f) override {
     validateMultDimensions(a, b, transpose);
     const Vector defaultBias(a.getRows(), 0.0f, &queue);
     if (bias != nullptr)
       validateBiasDimensions(b, *bias, transpose);
     if (a.getRows() > 64 || a.getCols() > 64 || b.getRows() > 64 ||
         b.getCols() > 64)
-      return mult_tiled(a, b, transpose, bias == nullptr ? defaultBias : *bias,
-                        type, alpha);
+      return dot_tiled(a, b, transpose, bias == nullptr ? defaultBias : *bias,
+                       type, alpha);
     else
-      return mult_small(a, b, transpose, bias == nullptr ? defaultBias : *bias,
-                        type, alpha);
+      return dot_small(a, b, transpose, bias == nullptr ? defaultBias : *bias,
+                       type, alpha);
   }
 };
 
diff --git a/src/math/tensor/gpu/tensor.hpp b/src/math/tensor/gpu/tensor.hpp
index df628ee..37758a9 100644
--- a/src/math/tensor/gpu/tensor.hpp
+++ b/src/math/tensor/gpu/tensor.hpp
@@ -3,12 +3,10 @@
 #include "../../opencl/opencl.hpp"
 
 #include <algorithm>
-#include <iostream>
 #include <random>
 #include <vector>
 
 #include "../tensor.hpp"
-#include "math.hpp"
 
 extern std::mt19937 gen;
 
diff --git a/src/math/tensor/math.hpp b/src/math/tensor/math.hpp
index b4cee1b..12db29d 100644
--- a/src/math/tensor/math.hpp
+++ b/src/math/tensor/math.hpp
@@ -2,7 +2,8 @@
 
 #include "tensor.hpp"
 
-enum class Activation { LINEAR, SIGMOID, TANH, RELU, LEAKY_RELU, ELU, GELU };
+enum class Activation { LINEAR, SIGMOID, TANH, RELU, LEAKY_RELU, ELU };
+enum class Loss { MSE };
 
 template <typename T>
 concept ITensorType = std::is_base_of_v<ITensor, T>;
@@ -31,6 +32,7 @@ protected:
 
 public:
   virtual T activate(const T &m, Activation type, float alpha) = 0;
+  virtual T d_activate(const T &m, Activation type, float alpha) = 0;
 
   virtual T mult(const T &m, float x) = 0;
   virtual T add(const T &a, const T &b, float x) = 0;
@@ -45,11 +47,13 @@ template <ITensor1Type T> class ITensor1Math {};
 
 template <ITensor2Type M, ITensor1Type V> class ITensor2Math {
 public:
-  virtual M mult(const M &a, const M &b, bool transpose, const V *bias,
-                 Activation type, float alpha) = 0;
+  virtual M dot(const M &a, const M &b, bool transpose, const V *bias,
+                Activation type, float alpha) = 0;
+
+  virtual M loss(const M &a, const M &b, Loss type) = 0;
+  virtual M d_loss(const M &a, const M &b, Loss type) = 0;
 
   void validateMultDimensions(const M &a, const M &b, bool transpose) const {
-    printf("%dx%d %dx%d\n", a.getRows(), a.getCols(), b.getRows(), b.getCols());
     if ((!transpose && a.getCols() != b.getRows()) ||
         (transpose && a.getCols() != b.getCols())) {
       throw std::invalid_argument(
@@ -64,4 +68,4 @@ public:
   };
 };
 
-template <ITensor3Type T> class ITensor3Math {};
\ No newline at end of file
+template <ITensor3Type T> class ITensor3Math {};
diff --git a/src/math/tensor/tensor.hpp b/src/math/tensor/tensor.hpp
index d200153..b590dcd 100644
--- a/src/math/tensor/tensor.hpp
+++ b/src/math/tensor/tensor.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <random>
 #include <stdexcept>
 #include <vector>
 
@@ -64,4 +65,4 @@ class ITensor3 {};
 
 typedef ITensor0 IScalar;
 typedef ITensor1 IVector;
-typedef ITensor2 IMatrix;
\ No newline at end of file
+typedef ITensor2 IMatrix;