From 644874f27fe03d20265a87f3a883b573d2f7a9ce Mon Sep 17 00:00:00 2001
From: daniel <daniel@kapla.at>
Date: Sun, 13 Mar 2022 17:58:02 +0100
Subject: [PATCH] wip: ex_01

---
 Exercise_01/Makefile                 |   8 +-
 Exercise_01/Matrix.h                 |  21 +-
 Exercise_01/Solver.h                 |  72 ++++--
 Exercise_01/examples/MPI_RowSums.cpp |  11 +-
 Exercise_01/main.cpp                 | 349 ++++++++++++++++++++++++---
 5 files changed, 396 insertions(+), 65 deletions(-)
diff --git a/Exercise_01/Makefile b/Exercise_01/Makefile
index 19dbdbb..1f05476 100644
--- a/Exercise_01/Makefile
+++ b/Exercise_01/Makefile
@@ -12,8 +12,10 @@ mpi: main.cpp Matrix.h Solver.h
 
 all: seriel mpi
 
-test: seriel
-	./$(OUT)_seriel 120 10
+test: seriel mpi
+	./$(OUT)_seriel 1D 120 1000
+	mpirun -n 4 ./$(OUT)_mpi 2D 120 1000
+# for script in *.R; do R --vanilla < $${script}; done
 
 clean:
-	rm -f *.o main $(OUT)_seriel $(OUT)_mpi
+	rm -f *.o *.R main $(OUT)_seriel $(OUT)_mpi
diff --git a/Exercise_01/Matrix.h b/Exercise_01/Matrix.h
index a8ecc93..67fada7 100644
--- a/Exercise_01/Matrix.h
+++ b/Exercise_01/Matrix.h
@@ -50,11 +50,17 @@ public:
           T* data()       { return _data.data(); };
     const T* data() const { return _data.data(); };
 
-          T& operator()(int i)       { return _data[index(i)]; };
-    const T& operator()(int i) const { return _data[index(i)]; };
+          T& operator()(int i)       { return _data[i]; };
+    const T& operator()(int i) const { return _data[i]; };
 
-          T& operator()(int i, int j)       { return _data[index(i, j)]; }
-    const T& operator()(int i, int j) const { return _data[index(i, j)]; }
+          T& operator()(int i, int j)       { return _data[i + j * _nrow]; }
+    const T& operator()(int i, int j) const { return _data[i + j * _nrow]; }
+
+          T& get(int i)       { return _data[index(i)]; };
+    const T& get(int i) const { return _data[index(i)]; };
+
+          T& get(int i, int j)       { return _data[index(i, j)]; }
+    const T& get(int i, int j) const { return _data[index(i, j)]; }
 
     template <enum Norm N = Norm::Frob>
     double norm() const {
@@ -98,13 +104,13 @@ public:
         if constexpr (N == Norm::Frob) {
             // Sum of Squared differences
             for (size_t i = 0; i < nelem; ++i) {
-                T diff = this(i) - A(i);
+                T diff = (*this)(i) - A(i);
                 accum += diff * diff;
             }
             return std::sqrt(static_cast<double>(accum));
         } else { // Maximum Norm
             for (size_t i = 0; i < nelem; ++i) {
-                T diff = std::abs(this(i) - A(i));
+                T diff = std::abs((*this)(i) - A(i));
                 if (accum < diff) {
                     accum = diff;
                 }
@@ -174,6 +180,9 @@ public:
           T& operator()(int i)       { return _matrix(_index + i * _stride); };
     const T& operator()(int i) const { return _matrix(_index + i * _stride); };
 
+          T& get(int i)       { return _matrix.get(_index + i * _stride); };
+    const T& get(int i) const { return _matrix.get(_index + i * _stride); };
+
 protected:
     Matrix<T>& _matrix;
     size_t _index;
diff --git a/Exercise_01/Solver.h b/Exercise_01/Solver.h
index 0b964d8..78f27ff 100644
--- a/Exercise_01/Solver.h
+++ b/Exercise_01/Solver.h
@@ -35,9 +35,9 @@ public:
                  -1.0 / (h * h),                /* Left */
                  -1.0 / (h * h),                /* Up */
                  -1.0 / (h * h)},               /* Right */
-        _rhs(nx, ny),
-        _sol(nx, ny, 0.0),
-        _tmp(nx, ny)
+        _rhs(nx, ny, 0.),
+        _sol(nx, ny, 0.),
+        _tmp(nx, ny, 0.)
     {
         // Initialize Right Hand Size _rhs(x, y) = f(X(x), Y(y))
         // Note that the correspondence usual matrix indexing sceeme as
@@ -53,9 +53,9 @@ public:
 
         // Set Dirichlet boundary conditions
         // North
-        for (size_t x = 0; x < nx; ++x) { _sol(x, -1) = _tmp(x, -1) = gN(X(x)); }
+        for (size_t x = 0; x < nx; ++x) { _sol.get(x, -1) = _tmp.get(x, -1) = gN(X(x)); }
         // East
-        for (size_t y = 0; y < ny; ++y) { _sol(-1, y) = _tmp(-1, y) = gE(Y(y)); }
+        for (size_t y = 0; y < ny; ++y) { _sol.get(-1, y) = _tmp.get(-1, y) = gE(Y(y)); }
         // South
         for (size_t x = 0; x < nx; ++x) { _sol(x, 0) = _tmp(x, 0) = gS(X(x)); }
         // West
@@ -77,7 +77,20 @@ public:
         N = North, E = East, S = South, W = West
     };
 
-    MatrixView<double> boundary(enum Dir dir) {
+    MatrixView<double> read_boundary(enum Dir dir) {
+        switch (dir) {
+            case Dir::North:
+                return Col<double>(_sol, -2);
+            case Dir::East:
+                return Row<double>(_sol, -2);
+            case Dir::South:
+                return Col<double>(_sol, 1);
+            default:    // Dir::West
+                return Row<double>(_sol, 1);
+        }
+    };
+
+    MatrixView<double> write_boundary(enum Dir dir) {
         switch (dir) {
             case Dir::North:
                 return Col<double>(_sol, -1);
@@ -91,13 +104,11 @@ public:
     };
 
     /** Solution getter */
-    Matrix<double>& solution() {
-        return _sol;
-    }
+    Matrix<double>& solution() { return _sol; }
+    /** Right Hand Side getter (grid evaluated f(x, y)) */
+    Matrix<double>& rhs() { return _rhs; }
 
-    /**
-     * Performs a single Jacobian Iteration
-     */
+    /** Performs a single Jacobian Iteration */
     void iterate() {
         double s = 1.0 / _stencil.C;
 
@@ -114,6 +125,37 @@ public:
         _iter++;
     };
 
+    /** Computes the L2 and Inf norm of the residuals */
+    template <enum Norm N = Norm::Frob>
+    double resid_norm() {
+        // Enforce valid Norm types
+        static_assert(N == Norm::Frob || N == Norm::Max);
+
+        // Norm accumulator
+        double accum = 0.0;
+
+        for (size_t y = 1; y < _ny - 1; ++y) {
+            for (size_t x = 1; x < _nx - 1; ++x) {
+                double resid = _rhs(x, y) - (_stencil.C * _sol(x, y) +
+                                             _stencil.B * _sol(x, y - 1) +
+                                             _stencil.L * _sol(x - 1, y) +
+                                             _stencil.U * _sol(x, y + 1) +
+                                             _stencil.R * _sol(x + 1, y));
+                if constexpr (N == Norm::Frob) {
+                    accum += resid * resid;
+                } else {    // Maximum Norm
+                    accum = std::max(std::abs(resid), accum);
+                }
+            }
+        }
+
+        if constexpr (N == Norm::Frob) {
+            return std::sqrt(accum);
+        } else {    // Maximum Norm
+            return accum;
+        }
+    }
+
 private:
     size_t _iter;           /*< Iteration count */
     const size_t _nx;       /*< Number of X-axis grid points */
@@ -122,8 +164,8 @@ private:
     const double _xmax;     /*< Domain X-max (east border pos) */
     const double _ymin;     /*< Domain Y-min (south border pos) */
     const double _ymax;     /*< Domain Y-max (north border pos) */
-    const Stencil _stencil; /*< Simple, + shaped stencil */
-    Matrix<double> _rhs;    /*< Grid evaluated RHS of the PDE = f(x, y) */
-    Matrix<double> _sol;    /*< Solution after _iter iterations */
+    const Stencil _stencil; /*< Simple '+' shaped stencil */
+    Matrix<double> _rhs;    /*< Grid evaluated RHS of the PDE, f(x, y) */
+    Matrix<double> _sol;    /*< Solution after `_iter` iterations */
     Matrix<double> _tmp;    /*< Temp. datablock, used in iterate() */
 };
diff --git a/Exercise_01/examples/MPI_RowSums.cpp b/Exercise_01/examples/MPI_RowSums.cpp
index 90e6315..6d8624a 100644
--- a/Exercise_01/examples/MPI_RowSums.cpp
+++ b/Exercise_01/examples/MPI_RowSums.cpp
@@ -1,8 +1,8 @@
 /**
  * Computes row sums of a matrix containing (0-indexed) consecutive numbers
- * in row major order (yes, useless but illustrativ)
+ * in column major order
  *
- * Example:
+ * Example (entries are array indices):
  *     matrix (11 x 7):                          row_sums (11):
  *        0   11   22   33   44   55   66   ->      231
  *        1   12   23   34   45   56   67   ->      238
@@ -22,7 +22,7 @@
  * worker in a dense layout (MPI_DOUBLE)
  *
  * Send/Recv Example:
- *     Data send (entries are the array indices):
+ *     Data send (entries are array indices):
  *        -    -    -    -    -    -    -
  *        -    -    -    -    -    -    -
  *        -    -    -    -    -    -    -
@@ -68,13 +68,12 @@ int min(int a, int b) { return a < b ? a : b; }
 
 int main(int argn, char* argv[]) {
 
-    // Build a (simple and barebones, row major) matrix model, it's just a
+    // Build a (simple and barebones, column major) matrix model, it's just a
     // vector with external row/col count.
     int nrow = 11;  // defaults
     int ncol = 7;   // defaults
 
-    // Parse arguments to set nrow, ncol (sloppy, but thats not the point of
-    // this example)
+    // Parse arguments to set nrow, ncol (sloppy, but not the point of the example)
     if (argn > 1) {
         nrow = atoi(argv[1]);
         if (nrow < 1 || nrow > 1024) {
diff --git a/Exercise_01/main.cpp b/Exercise_01/main.cpp
index 32e6208..b56025a 100644
--- a/Exercise_01/main.cpp
+++ b/Exercise_01/main.cpp
@@ -1,9 +1,11 @@
 /**
- * g++ main.cpp -std=c++17 -Wall -Wpedantic -pedantic -o main; ./main
+ * 
  */
 
 #include <stddef.h>
 #include <iostream>
+#include <iomanip>
+#include <sstream>
 #include <functional>
 #define _USE_MATH_DEFINES   /* enables math constants from cmath */
 #include <cmath>
@@ -11,8 +13,14 @@
 #include <mpi.h>
 #endif
 
+
+#include <fstream>
+#include <iomanip>
+
+
 #include "Matrix.h"
 #include "Solver.h"
+#include "utils.h"
 
 int main(int argn, char* argv[]) {
 
@@ -23,63 +31,334 @@ int main(int argn, char* argv[]) {
 
     // Get MPI config
     int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */
-    int mpi_rank; /*< MPI rank (a.k.a. process ID in the context of MPI) */
     MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
-    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+#else
+    int mpi_size = 1;
 #endif
 
     /**************************** Parse Arguments *****************************/
-    if (argn < 3) {
-        std::cerr << "usage: " << argv[0] << " <resolution> <iterations>" << std::endl;
-        return -1;
-    } else if (argn > 3) {
-        std::cerr << "warning: " << "ignoring all but the first two params" << std::endl;
-    }
-    // TODO: make this proper!!!
-    size_t resolution = atol(argv[1]);
-    size_t iterations = atol(argv[2]);
-    if (resolution < 1 || resolution > 65536
-    ||  iterations < 1 || iterations > 65536) {
-        std::cerr << "error: parsing arguments failed" << std::endl;
+    size_t dim, resolution, iterations;
+    {
+        if (0 < argn && argn < 4) {
+            std::cerr << "usage: " << argv[0] << " <resolution> <iterations>" << std::endl;
+            return -1;
+        } else if (argn > 4) {
+            std::cerr << "warning: " << "ignoring all but the first three params" << std::endl;
+        }
+        if (std::string(argv[1]) == "1D") {
+            dim = 1;
+        } else if (std::string(argv[1]) == "2D") {
+            dim = 2;
+        } else {
+            std::cerr << "error: Parsing arg 1 <dim> failed" << std::endl;
+            return -1;
+        }
+        if (!(std::istringstream(argv[2]) >> resolution)) {
+            std::cerr << "error: Parsing arg 2 <resolution> failed" << std::endl;
+            return -1;
+        }
+        if (!(std::istringstream(argv[3]) >> iterations)) {
+            std::cerr << "error: Parsing arg 3 <iterations> failed" << std::endl;
+            return -1;
+        }
+        if (resolution < 10 || resolution > 65536) {
+            std::cerr << "error: arg 1 <resolution> " << resolution
+                << " out of domain [10, 65536]" << std::endl;
+            return -1;
+        }
+        if (iterations > 65536) {
+            std::cerr << "error: arg 2 <iterations> " << iterations
+                << "out of domain [0, 65536]" << std::endl;
+            return -1;
+        }
     }
 
+    // Report configuration
+    std::cout << std::setfill(' ')
+#ifdef USE_MPI
+        << "using MPI:      true\n"
+#else
+        << "using MPI:     false\n"
+#endif
+        << "nr. processes: " << std::setw(5) << mpi_size   << '\n'
+        << "resolution:    " << std::setw(5) << resolution << '\n'
+        << "iterations:    " << std::setw(5) << iterations << std::endl;
 
-    /************************* Initialize PDE Solver **************************/
-    size_t nx = resolution;
-    size_t ny = resolution;
+    /**************** Setup (local) PDE + Boundary Conditions *****************/
     const double k = M_PI;
     const double h = 1.0 / static_cast<double>(resolution - 1);
 
+#ifdef USE_MPI
+    // Group processes into a cartesian communication topology. Set initial
+    // values for a 1D grid.
+    int mpi_dims[2] = {mpi_size, 1};
+    // In case of a 2D grid, make equal partitions ob both axes (as equal as
+    // possible. Note that `MPI_Dims_create` does not garantee "as equal as".
+    // For example it was observed that for 9 processes the generated grid
+    // was a 9 x 1, the following computes a 3 x 3).
+    if (dim == 2) {
+        two_factors(mpi_size, mpi_dims);
+    }
+
+    // Report grid topology
+    std::cout << "topology:      " << dim << "D";
+    if (dim == 2) {
+        std::cout << " (" << mpi_dims[0] << " x " << mpi_dims[1] << ")";
+    }
+    std::cout << std::endl;
+
+    // Setup a cartesian topology communicator (NON-cyclic)
+    const int mpi_periods[2] = {false, false};
+    MPI_Comm mpi_comm_grid;
+    MPI_Cart_create(
+        MPI_COMM_WORLD,     // Old Communicator
+        2,                  // number of dimensions
+        mpi_dims,           // grid dimensions
+        mpi_periods,        // grid periodicity
+        true,               // allow process reordering
+        &mpi_comm_grid
+    );
+
+    // Get MPI rank
+    int mpi_world_rank; /*< MPI world rank (a.k.a. grid process ID) */
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank);
+
+    // Get MPI rank with respect to the grid communicator
+    int mpi_grid_rank; /*< MPI grid rank (a.k.a. grid process ID) */
+    MPI_Comm_rank(mpi_comm_grid, &mpi_grid_rank);
+
+    // Get coordinates with respect to the grid communicator
+    int mpi_coords[2];
+    MPI_Cart_coords(mpi_comm_grid, mpi_grid_rank, 2, mpi_coords);
+
+    // Get direct neightbours in the communication grid
+    struct { int north; int east; int south; int west; } mpi_neighbours;
+    // Get X-direction (dim 0) neightbours
+    MPI_Cart_shift(
+        mpi_comm_grid,              // grid communicator
+        0,                          // axis index (0 <-> X)
+        1,                          // offset
+        &(mpi_neighbours.west),     // negated offset neightbour
+        &(mpi_neighbours.east)      // offset neightbour
+    );
+    // Get Y-direction (dim 1) neightbours
+    MPI_Cart_shift(
+        mpi_comm_grid,
+        1,                          // axis index (1 <-> Y)
+        1,
+        &(mpi_neighbours.south),
+        &(mpi_neighbours.north)
+    );
+
+    // Calc local (base) grid size (without ghost layers)
+    size_t nx = partition(resolution, mpi_dims[0], mpi_coords[0]);
+    size_t ny = partition(resolution, mpi_dims[1], mpi_coords[1]);
+    // Add ghost layers for each (existing) neighbour
+    ny += (mpi_neighbours.north != MPI_PROC_NULL);
+    nx += (mpi_neighbours.east  != MPI_PROC_NULL);
+    ny += (mpi_neighbours.south != MPI_PROC_NULL);
+    nx += (mpi_neighbours.west  != MPI_PROC_NULL);
+
+    // Compute local domain [xmin, xmax] x [ymin, ymax]
+    double xmin = (mpi_neighbours.west  == MPI_PROC_NULL) ? 0.0
+        : h * partition_sum(resolution, mpi_dims[0], mpi_coords[0] - 1);
+    double xmax = (mpi_neighbours.east  == MPI_PROC_NULL) ? 1.0
+        : h * (partition_sum(resolution, mpi_dims[0], mpi_coords[0]) + 1);
+    double ymin = (mpi_neighbours.south == MPI_PROC_NULL) ? 0.0
+        : h * partition_sum(resolution, mpi_dims[1], mpi_coords[1] - 1);
+    double ymax = (mpi_neighbours.north == MPI_PROC_NULL) ? 1.0
+        : h * (partition_sum(resolution, mpi_dims[1], mpi_coords[1]) + 1);
+
+    // /*************************__________DEBUG__________*************************/
+    // std::cout << "  (" << mpi_coords[0] << ", " << mpi_coords[1] << ")  ";
+
+    // std::cout << "  ->  "
+    //         << "N: " << ((mpi_neighbours.north != MPI_PROC_NULL) ? '1' : '.') << ", "
+    //         << "E: " << ((mpi_neighbours.east  != MPI_PROC_NULL) ? '1' : '.') << ", "
+    //         << "S: " << ((mpi_neighbours.south != MPI_PROC_NULL) ? '1' : '.') << ", "
+    //         << "W: " << ((mpi_neighbours.west  != MPI_PROC_NULL) ? '1' : '.');
+
+    // std::cout << "  ->  "
+    //         << nx << " x " << ny;
+
+    // std::cout << "  ->  " << std::setprecision(2)
+    //         << "["  << std::setw(4) << std::left << xmin
+    //         << ", " << std::setw(4) << std::left << xmax << "]"
+    //         << " x "
+    //         << "["  << std::setw(4) << std::left << ymin
+    //         << ", " << std::setw(4) << std::left << ymax << "]";
+
+    // std::cout << std::endl;
+    // /*************************__________DEBUG__________*************************/
+
+    // Create MPI vector Type (for boundary condition exchange)
+    // Allows directly exchange of matrix rows (north/south bounds) since the
+    // row elements are "sparce" in the sence that they are not directly aside
+    // each other in memory (column major matrix layout) in constrast to columns.
+    MPI_Datatype mpi_type_row;
+    MPI_Type_vector(ny, 1, nx, MPI_DOUBLE, &mpi_type_row);
+    MPI_Type_commit(&mpi_type_row);
+#else
+    // Discretization grid resolution
+    size_t nx = resolution, ny = resolution;
+    // PDE domain borders [xmin, xmax] x [ymin, ymax] = [0, 1] x [0, 1]
+    double xmin = 0.0, xmax = 1.0, ymin = 0.0, ymax = 1.0;
+#endif
+
     // Declare right hand side function f(x, y) = k^2 sin(2 pi x) sinh(2 pi y)
     std::function<double(double, double)> fun = [k](double x, double y) {
-        return k * k * sin(M_2_PI * x) * sinh(M_2_PI * y);
+        return k * k * sin(2 * M_PI * x) * sinh(2 * M_PI * y);
     };
     // Boundary conditions
-    /** North boundary condition g(x) = k^2 sin(2 pi x) sinh(2 pi) */
-    std::function<double(double)> gN = [k](double x) {
-        return k * k * sin(M_2_PI * x) * sinh(M_2_PI);
-    };
-    /** East, South and West boundary conditions are simply = 0 */
+    /** North boundary condition gN(x) = k^2 sin(2 pi x) sinh(2 pi) */
+    std::function<double(double)> gN;
+#ifdef USE_MPI
+    // Check if local north boundary is part of the global north boundary
+    if (mpi_neighbours.north == MPI_PROC_NULL) {
+#endif
+        // The local north boundary is equals the global north boundary
+        gN = [k](double x) { return sin(2 * M_PI * x) * sinh(2 * M_PI); };
+#ifdef USE_MPI
+    } else {
+        // local north boundary is zero like all the rest
+        gN = [k](double) { return 0.0; };
+    }
+#endif
+    /** East, South and West boundary conditions are all 0 */
     std::function<double(double)> g0 = [k](double) { return 0.0; };
 
     /******************************* Solve PDE ********************************/
-    Matrix<double> solution;
-    {
-        // Instanciate solver (local instance)
-        Solver solver(nx, ny, 0., 1., 0., 1., h, k, fun, gN, g0, g0, g0);
+    // Instanciate solver (local instance)
+    Solver solver(nx, ny, xmin, xmax, ymin, ymax, h, k, fun, gN, g0, g0, g0);
 
-        // Run solver iterations
-        for (size_t iter = 0; iter < iterations; ++iter) {
-            solver.iterate();
+    // Run solver iterations
+    for (size_t iter = 0; iter < iterations; ++iter) {
+        // Perform a single stencil jacobi iteration
+        solver.iterate();
+
+#ifdef USE_MPI
+        // Non-blocking send boundary conditions to all neightbours
+        MPI_Request mpi_requests[4];
+        int mpi_request_count = 0;
+
+        if (mpi_neighbours.north != MPI_PROC_NULL) {
+            auto bound = solver.read_boundary(Solver::Dir::North);
+            MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
+                mpi_neighbours.north, iter, mpi_comm_grid,
+                &mpi_requests[mpi_request_count++]);
+        }
+        if (mpi_neighbours.east != MPI_PROC_NULL) {
+            auto bound = solver.read_boundary(Solver::Dir::East);
+            MPI_Isend(bound.data(), 1, mpi_type_row,
+                mpi_neighbours.east, iter, mpi_comm_grid,
+                &mpi_requests[mpi_request_count++]);
+        }
+        if (mpi_neighbours.south != MPI_PROC_NULL) {
+            auto bound = solver.read_boundary(Solver::Dir::South);
+            MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
+                mpi_neighbours.south, iter, mpi_comm_grid,
+                &mpi_requests[mpi_request_count++]);
+        }
+        if (mpi_neighbours.west != MPI_PROC_NULL) {
+            auto bound = solver.read_boundary(Solver::Dir::West);
+            MPI_Isend(bound.data(), 1, mpi_type_row,
+                mpi_neighbours.west, iter, mpi_comm_grid,
+                &mpi_requests[mpi_request_count++]);
         }
 
-        // extract solution
-        solution = std::move(solver.solution());
+        // Wait for all send to complete before receiving new data
+        // (just to be save)
+        MPI_Waitall(mpi_request_count, mpi_requests, MPI_STATUSES_IGNORE);
+
+        // Get new boundary conditions using a blocking receive
+        MPI_Status mpi_status;
+        if (mpi_neighbours.north != MPI_PROC_NULL) {
+            auto bound = solver.write_boundary(Solver::Dir::North);
+            MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
+                mpi_neighbours.north, iter, mpi_comm_grid, &mpi_status);
+        }
+        if (mpi_neighbours.east != MPI_PROC_NULL) {
+            auto bound = solver.write_boundary(Solver::Dir::East);
+            MPI_Recv(bound.data(), 1, mpi_type_row,
+                mpi_neighbours.east, iter, mpi_comm_grid, &mpi_status);
+        }
+        if (mpi_neighbours.south != MPI_PROC_NULL) {
+            auto bound = solver.write_boundary(Solver::Dir::South);
+            MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
+                mpi_neighbours.south, iter, mpi_comm_grid, &mpi_status);
+        }
+        if (mpi_neighbours.west != MPI_PROC_NULL) {
+            auto bound = solver.write_boundary(Solver::Dir::West);
+            MPI_Recv(bound.data(), 1, mpi_type_row,
+                mpi_neighbours.west, iter, mpi_comm_grid, &mpi_status);
+        }
+#endif
     }
 
-    /****************************** Tests/Report ******************************/
+    /***************************** Solution Stats *****************************/
+    std::cout
+        <<   "|| residuals ||_2:   " << std::setw(15) << solver.resid_norm()
+        << "\n|| residuals ||_inf: " << std::setw(15) << solver.resid_norm<Max>()
+        << std::endl;
+
+    // Get solution
+    Matrix<double>& solution = solver.solution();
+
+    // Compute analytic (true) solution
+    Matrix<double> analytic(solution.nrow(), solution.ncol());
+    double x = xmin;
+    double y = ymin;
+    for (size_t j = 0; j < analytic.ncol(); ++j, y += h) {
+        for (size_t i = 0; i < analytic.nrow(); ++i, x += h) {
+            analytic(i, j) = sin(2 * M_PI * x) * sinh(2 * M_PI * y);
+        }
+    }
+
+    // Calculate error
+    std::cout
+        <<   "|| error ||_2:       " << std::setw(15) << solution.dist(analytic)
+        << "\n|| error ||_inf:     " << std::setw(15) << solution.dist<Max>(analytic)
+        << std::endl;
+
+    /*************************__________DEBUG__________*************************/
+    // Create R scripts for plotting the solution
+    std::ostringstream file_name;
+#ifdef USE_MPI
+    file_name << "Rplot_" << mpi_coords[0] << "_" << mpi_coords[1] << ".R";
+#else
+    file_name << "Rplot_seriel.R";
+#endif
+    std::ofstream out(file_name.str());
+
+    out << "sol <- matrix(c(" << solution(0);
+    for (size_t i = 1; i < solution.size(); ++i) {
+        out << ',' << solution(i);
+    }
+    out << "), " << nx << ", " << ny << ")\n"
+#ifdef USE_MPI
+        << "png(filename = \"mpi_plot_" << mpi_coords[0] <<
+                                    "_" << mpi_coords[1] << ".png\")\n"
+#else
+        << "png(filename = \"seriel_plot.png\")\n"
+#endif
+#ifdef USE_MPI
+        << "image(sol, zlim = c(-270, 270), axes = FALSE, main = \"Cart Coords: "
+            << mpi_coords[0] << ", " << mpi_coords[1] << "\")\n"
+#else
+        << "image(sol, zlim = c(-270, 270), axes = FALSE, main = \"Seriel\")\n"
+#endif
+        << "axis(1, at = c(0, 1), labels = c("
+            << "\"" << std::setprecision(2) << xmin << "\", "
+            << "\"" << std::setprecision(2) << xmax << "\"))\n"
+        << "axis(2, at = c(0, 1), labels = c("
+            << "\"" << std::setprecision(2) << ymin << "\", "
+            << "\"" << std::setprecision(2) << ymax << "\"))\n"
+        // << "axis(2, at = seq(100, 800, by = 100))"
+        << "dev.off()" << std::endl;
+    out.close();
+    /*************************__________DEBUG__________*************************/
+
 
-    std::cout << solution << std::endl;
 
     // MPI shutdown/cleanup
 #ifdef USE_MPI