6 changed files with 62 additions and 481 deletions
--- a/Exercise_01/Makefile
+++ b/Exercise_01/Makefile
@ -12,10 +12,8 @@ mpi: main.cpp Matrix.h Solver.h
 all: seriel mpi
-test: seriel mpi
+test: seriel
-	./$(OUT)_seriel 1D 120 1000
+	./$(OUT)_seriel 120 10
 	mpirun -n 4 ./$(OUT)_mpi 2D 120 1000
 # for script in *.R; do R --vanilla < $${script}; done
 clean:
-	rm -f *.o *.R main $(OUT)_seriel $(OUT)_mpi
+	rm -f *.o main $(OUT)_seriel $(OUT)_mpi
--- a/Exercise_01/Matrix.h
+++ b/Exercise_01/Matrix.h
@ -50,17 +50,11 @@ public:
          T* data()       { return _data.data(); };
    const T* data() const { return _data.data(); };
-          T& operator()(int i)       { return _data[i]; };
+          T& operator()(int i)       { return _data[index(i)]; };
-    const T& operator()(int i) const { return _data[i]; };
+    const T& operator()(int i) const { return _data[index(i)]; };
-          T& operator()(int i, int j)       { return _data[i + j * _nrow]; }
+          T& operator()(int i, int j)       { return _data[index(i, j)]; }
-    const T& operator()(int i, int j) const { return _data[i + j * _nrow]; }
+    const T& operator()(int i, int j) const { return _data[index(i, j)]; }
          T& get(int i)       { return _data[index(i)]; };
    const T& get(int i) const { return _data[index(i)]; };
          T& get(int i, int j)       { return _data[index(i, j)]; }
    const T& get(int i, int j) const { return _data[index(i, j)]; }
    template <enum Norm N = Norm::Frob>
    double norm() const {
@ -104,13 +98,13 @@ public:
        if constexpr (N == Norm::Frob) {
            // Sum of Squared differences
            for (size_t i = 0; i < nelem; ++i) {
-                T diff = (*this)(i) - A(i);
+                T diff = this(i) - A(i);
                accum += diff * diff;
            }
            return std::sqrt(static_cast<double>(accum));
        } else { // Maximum Norm
            for (size_t i = 0; i < nelem; ++i) {
-                T diff = std::abs((*this)(i) - A(i));
+                T diff = std::abs(this(i) - A(i));
                if (accum < diff) {
                    accum = diff;
                }
@ -180,9 +174,6 @@ public:
          T& operator()(int i)       { return _matrix(_index + i * _stride); };
    const T& operator()(int i) const { return _matrix(_index + i * _stride); };
          T& get(int i)       { return _matrix.get(_index + i * _stride); };
    const T& get(int i) const { return _matrix.get(_index + i * _stride); };
 protected:
    Matrix<T>& _matrix;
    size_t _index;
--- a/Exercise_01/Solver.h
+++ b/Exercise_01/Solver.h
@ -35,9 +35,9 @@ public:
                 -1.0 / (h * h),                /* Left */
                 -1.0 / (h * h),                /* Up */
                 -1.0 / (h * h)},               /* Right */
-        _rhs(nx, ny, 0.),
+        _rhs(nx, ny),
-        _sol(nx, ny, 0.),
+        _sol(nx, ny, 0.0),
-        _tmp(nx, ny, 0.)
+        _tmp(nx, ny)
    {
        // Initialize Right Hand Size _rhs(x, y) = f(X(x), Y(y))
        // Note that the correspondence usual matrix indexing sceeme as
@ -53,9 +53,9 @@ public:
        // Set Dirichlet boundary conditions
        // North
-        for (size_t x = 0; x < nx; ++x) { _sol.get(x, -1) = _tmp.get(x, -1) = gN(X(x)); }
+        for (size_t x = 0; x < nx; ++x) { _sol(x, -1) = _tmp(x, -1) = gN(X(x)); }
        // East
-        for (size_t y = 0; y < ny; ++y) { _sol.get(-1, y) = _tmp.get(-1, y) = gE(Y(y)); }
+        for (size_t y = 0; y < ny; ++y) { _sol(-1, y) = _tmp(-1, y) = gE(Y(y)); }
        // South
        for (size_t x = 0; x < nx; ++x) { _sol(x, 0) = _tmp(x, 0) = gS(X(x)); }
        // West
@ -77,20 +77,7 @@ public:
        N = North, E = East, S = South, W = West
    };
-    MatrixView<double> read_boundary(enum Dir dir) {
+    MatrixView<double> boundary(enum Dir dir) {
        switch (dir) {
            case Dir::North:
                return Col<double>(_sol, -2);
            case Dir::East:
                return Row<double>(_sol, -2);
            case Dir::South:
                return Col<double>(_sol, 1);
            default:    // Dir::West
                return Row<double>(_sol, 1);
        }
    };
    MatrixView<double> write_boundary(enum Dir dir) {
        switch (dir) {
            case Dir::North:
                return Col<double>(_sol, -1);
@ -104,11 +91,13 @@ public:
    };
    /** Solution getter */
-    Matrix<double>& solution() { return _sol; }
+    Matrix<double>& solution() {
-    /** Right Hand Side getter (grid evaluated f(x, y)) */
+        return _sol;
-    Matrix<double>& rhs() { return _rhs; }
+    }
-    /** Performs a single Jacobian Iteration */
+    /**
     * Performs a single Jacobian Iteration
     */
    void iterate() {
        double s = 1.0 / _stencil.C;
@ -125,37 +114,6 @@ public:
        _iter++;
    };
    /** Computes the L2 and Inf norm of the residuals */
    template <enum Norm N = Norm::Frob>
    double resid_norm() {
        // Enforce valid Norm types
        static_assert(N == Norm::Frob || N == Norm::Max);
        // Norm accumulator
        double accum = 0.0;
        for (size_t y = 1; y < _ny - 1; ++y) {
            for (size_t x = 1; x < _nx - 1; ++x) {
                double resid = _rhs(x, y) - (_stencil.C * _sol(x, y) +
                                             _stencil.B * _sol(x, y - 1) +
                                             _stencil.L * _sol(x - 1, y) +
                                             _stencil.U * _sol(x, y + 1) +
                                             _stencil.R * _sol(x + 1, y));
                if constexpr (N == Norm::Frob) {
                    accum += resid * resid;
                } else {    // Maximum Norm
                    accum = std::max(std::abs(resid), accum);
                }
            }
        }
        if constexpr (N == Norm::Frob) {
            return std::sqrt(accum);
        } else {    // Maximum Norm
            return accum;
        }
    }
 private:
    size_t _iter;           /*< Iteration count */
    const size_t _nx;       /*< Number of X-axis grid points */
@ -164,8 +122,8 @@ private:
    const double _xmax;     /*< Domain X-max (east border pos) */
    const double _ymin;     /*< Domain Y-min (south border pos) */
    const double _ymax;     /*< Domain Y-max (north border pos) */
-    const Stencil _stencil; /*< Simple '+' shaped stencil */
+    const Stencil _stencil; /*< Simple, + shaped stencil */
-    Matrix<double> _rhs;    /*< Grid evaluated RHS of the PDE, f(x, y) */
+    Matrix<double> _rhs;    /*< Grid evaluated RHS of the PDE = f(x, y) */
-    Matrix<double> _sol;    /*< Solution after `_iter` iterations */
+    Matrix<double> _sol;    /*< Solution after _iter iterations */
    Matrix<double> _tmp;    /*< Temp. datablock, used in iterate() */
 };
--- a/Exercise_01/examples/MPI_RowSums.cpp
+++ b/Exercise_01/examples/MPI_RowSums.cpp
@ -1,8 +1,8 @@
 /**
 * Computes row sums of a matrix containing (0-indexed) consecutive numbers
- * in column major order
+ * in row major order (yes, useless but illustrativ)
 *
- * Example (entries are array indices):
+ * Example:
 *     matrix (11 x 7):                          row_sums (11):
 *        0   11   22   33   44   55   66   ->      231
 *        1   12   23   34   45   56   67   ->      238
@ -22,7 +22,7 @@
 * worker in a dense layout (MPI_DOUBLE)
 *
 * Send/Recv Example:
- *     Data send (entries are array indices):
+ *     Data send (entries are the array indices):
 *        -    -    -    -    -    -    -
 *        -    -    -    -    -    -    -
 *        -    -    -    -    -    -    -
@ -68,12 +68,13 @@ int min(int a, int b) { return a < b ? a : b; }
 int main(int argn, char* argv[]) {
-    // Build a (simple and barebones, column major) matrix model, it's just a
+    // Build a (simple and barebones, row major) matrix model, it's just a
    // vector with external row/col count.
    int nrow = 11;  // defaults
    int ncol = 7;   // defaults
-    // Parse arguments to set nrow, ncol (sloppy, but not the point of the example)
+    // Parse arguments to set nrow, ncol (sloppy, but thats not the point of
    // this example)
    if (argn > 1) {
        nrow = atoi(argv[1]);
        if (nrow < 1 || nrow > 1024) {
--- a/Exercise_01/main.cpp
+++ b/Exercise_01/main.cpp
@ -1,13 +1,10 @@
 /**
- * 
+ * g++ main.cpp -std=c++17 -Wall -Wpedantic -pedantic -o main; ./main
 */
 #include <stddef.h>
 #include <iostream>
 #include <iomanip>
 #include <sstream>
 #include <functional>
 #include <chrono>
 #define _USE_MATH_DEFINES   /* enables math constants from cmath */
 #include <cmath>
 #ifdef USE_MPI
@ -16,7 +13,6 @@
 #include "Matrix.h"
 #include "Solver.h"
 #include "utils.h"
 int main(int argn, char* argv[]) {
@ -27,321 +23,63 @@ int main(int argn, char* argv[]) {
    // Get MPI config
    int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */
    int mpi_rank; /*< MPI rank (a.k.a. process ID in the context of MPI) */
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
-
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
    // Get MPI rank
    int mpi_world_rank; /*< MPI world rank (a.k.a. grid process ID) */
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank);
 #else
    int mpi_size = 1;
    int mpi_world_rank = 0;
 #endif
    /**************************** Parse Arguments *****************************/
-    if (0 < argn && argn < 4) {
+    if (argn < 3) {
        std::cerr << "usage: " << argv[0] << " <resolution> <iterations>" << std::endl;
        return -1;
-    } else if (argn > 4) {
+    } else if (argn > 3) {
-        std::cerr << "warning: " << "ignoring all but the first three params" << std::endl;
+        std::cerr << "warning: " << "ignoring all but the first two params" << std::endl;
    }
-#ifdef USE_MPI
+    // TODO: make this proper!!!
-    size_t dim;
+    size_t resolution = atol(argv[1]);
-    if (std::string(argv[1]) == "1D") {
+    size_t iterations = atol(argv[2]);
-        dim = 1;
+    if (resolution < 1 || resolution > 65536
-    } else if (std::string(argv[1]) == "2D") {
+    ||  iterations < 1 || iterations > 65536) {
-        dim = 2;
+        std::cerr << "error: parsing arguments failed" << std::endl;
    } else {
        std::cerr << "error: Parsing arg 1 <dim> failed" << std::endl;
        return -1;
    }
 #endif
    size_t resolution;
    if (!(std::istringstream(argv[2]) >> resolution)) {
        std::cerr << "error: Parsing arg 2 <resolution> failed" << std::endl;
        return -1;
    }
    size_t iterations;
    if (!(std::istringstream(argv[3]) >> iterations)) {
        std::cerr << "error: Parsing arg 3 <iterations> failed" << std::endl;
        return -1;
    }
    if (resolution < 10 || resolution > 65536) {
        std::cerr << "error: arg 1 <resolution> " << resolution
            << " out of domain [10, 65536]" << std::endl;
        return -1;
    }
    if (iterations > 65536) {
        std::cerr << "error: arg 2 <iterations> " << iterations
            << "out of domain [0, 65536]" << std::endl;
        return -1;
    }
    // Report configuration
    if (mpi_world_rank == 0) {
        std::cout << std::setfill(' ')
 #ifdef USE_MPI
            << "use MPI:         YES\n"
 #else
            << "use MPI:          NO\n"
 #endif
            << "nr. processes: " << std::setw(5) << mpi_size   << '\n'
            << "resolution:    " << std::setw(5) << resolution << '\n'
            << "iterations:    " << std::setw(5) << iterations << std::endl;
    }
-    /************************* Start Time Measurement *************************/
+    /************************* Initialize PDE Solver **************************/
-    auto start = std::chrono::high_resolution_clock::now();
+    size_t nx = resolution;
-
+    size_t ny = resolution;
    /**************** Setup (local) PDE + Boundary Conditions *****************/
    const double k = M_PI;
    const double h = 1.0 / static_cast<double>(resolution - 1);
 #ifdef USE_MPI
    // Group processes into a cartesian communication topology. Set initial
    // values for a 1D grid.
    int mpi_dims[2] = {mpi_size, 1};
    // In case of a 2D grid, make equal partitions ob both axes (as equal as
    // possible. Note that `MPI_Dims_create` does not garantee "as equal as".
    // For example it was observed that for 9 processes the generated grid
    // was a 9 x 1, the following computes a 3 x 3).
    if (dim == 2) {
        two_factors(mpi_size, mpi_dims);
    }
    // Report grid topology
    std::cout << "topology:      " << dim << "D";
    if (dim == 2) {
        std::cout << " (" << mpi_dims[0] << " x " << mpi_dims[1] << ")";
    }
    std::cout << std::endl;
    // Setup a cartesian topology communicator (NON-cyclic)
    const int mpi_periods[2] = {false, false};
    MPI_Comm mpi_comm_grid;
    MPI_Cart_create(
        MPI_COMM_WORLD,     // Old Communicator
        2,                  // number of dimensions
        mpi_dims,           // grid dimensions
        mpi_periods,        // grid periodicity
        true,               // allow process reordering
        &mpi_comm_grid
    );
    // Get MPI rank with respect to the grid communicator
    int mpi_grid_rank; /*< MPI grid rank (a.k.a. grid process ID) */
    MPI_Comm_rank(mpi_comm_grid, &mpi_grid_rank);
    // Get coordinates with respect to the grid communicator
    int mpi_coords[2];
    MPI_Cart_coords(mpi_comm_grid, mpi_grid_rank, 2, mpi_coords);
    // Get direct neightbours in the communication grid
    struct { int north; int east; int south; int west; } mpi_neighbours;
    // Get X-direction (dim 0) neightbours
    MPI_Cart_shift(
        mpi_comm_grid,              // grid communicator
        0,                          // axis index (0 <-> X)
        1,                          // offset
        &(mpi_neighbours.west),     // negated offset neightbour
        &(mpi_neighbours.east)      // offset neightbour
    );
    // Get Y-direction (dim 1) neightbours
    MPI_Cart_shift(
        mpi_comm_grid,
        1,                          // axis index (1 <-> Y)
        1,
        &(mpi_neighbours.south),
        &(mpi_neighbours.north)
    );
    // Calc local (base) grid size (without ghost layers)
    size_t nx = partition(resolution, mpi_dims[0], mpi_coords[0]);
    size_t ny = partition(resolution, mpi_dims[1], mpi_coords[1]);
    // Add ghost layers for each (existing) neighbour
    ny += (mpi_neighbours.north != MPI_PROC_NULL);
    nx += (mpi_neighbours.east  != MPI_PROC_NULL);
    ny += (mpi_neighbours.south != MPI_PROC_NULL);
    nx += (mpi_neighbours.west  != MPI_PROC_NULL);
    // Compute local domain [xmin, xmax] x [ymin, ymax]
    double xmin = (mpi_neighbours.west  == MPI_PROC_NULL) ? 0.0
        : h * partition_sum(resolution, mpi_dims[0], mpi_coords[0] - 1);
    double xmax = (mpi_neighbours.east  == MPI_PROC_NULL) ? 1.0
        : h * (partition_sum(resolution, mpi_dims[0], mpi_coords[0]) + 1);
    double ymin = (mpi_neighbours.south == MPI_PROC_NULL) ? 0.0
        : h * partition_sum(resolution, mpi_dims[1], mpi_coords[1] - 1);
    double ymax = (mpi_neighbours.north == MPI_PROC_NULL) ? 1.0
        : h * (partition_sum(resolution, mpi_dims[1], mpi_coords[1]) + 1);
    // Create MPI vector Type (for boundary condition exchange)
    // Allows directly exchange of matrix rows (north/south bounds) since the
    // row elements are "sparce" in the sence that they are not directly aside
    // each other in memory (column major matrix layout) in constrast to columns.
    MPI_Datatype mpi_type_row;
    MPI_Type_vector(ny, 1, nx, MPI_DOUBLE, &mpi_type_row);
    MPI_Type_commit(&mpi_type_row);
 #else
    // Discretization grid resolution
    size_t nx = resolution, ny = resolution;
    // PDE domain borders [xmin, xmax] x [ymin, ymax] = [0, 1] x [0, 1]
    double xmin = 0.0, xmax = 1.0, ymin = 0.0, ymax = 1.0;
 #endif
    // Declare right hand side function f(x, y) = k^2 sin(2 pi x) sinh(2 pi y)
    std::function<double(double, double)> fun = [k](double x, double y) {
-        return k * k * sin(2 * M_PI * x) * sinh(2 * M_PI * y);
+        return k * k * sin(M_2_PI * x) * sinh(M_2_PI * y);
    };
    // Boundary conditions
-    /** North boundary condition gN(x) = k^2 sin(2 pi x) sinh(2 pi) */
+    /** North boundary condition g(x) = k^2 sin(2 pi x) sinh(2 pi) */
-    std::function<double(double)> gN;
+    std::function<double(double)> gN = [k](double x) {
-#ifdef USE_MPI
+        return k * k * sin(M_2_PI * x) * sinh(M_2_PI);
-    // Check if local north boundary is part of the global north boundary
+    };
-    if (mpi_neighbours.north == MPI_PROC_NULL) {
+    /** East, South and West boundary conditions are simply = 0 */
 #endif
        // The local north boundary is equals the global north boundary
        gN = [k](double x) { return sin(2 * M_PI * x) * sinh(2 * M_PI); };
 #ifdef USE_MPI
    } else {
        // local north boundary is zero like all the rest
        gN = [k](double) { return 0.0; };
    }
 #endif
    /** East, South and West boundary conditions are all 0 */
    std::function<double(double)> g0 = [k](double) { return 0.0; };
    /******************************* Solve PDE ********************************/
-    // Instanciate solver (local instance)
+    Matrix<double> solution;
-    Solver solver(nx, ny, xmin, xmax, ymin, ymax, h, k, fun, gN, g0, g0, g0);
+    {
        // Instanciate solver (local instance)
        Solver solver(nx, ny, 0., 1., 0., 1., h, k, fun, gN, g0, g0, g0);
-    // Run solver iterations
+        // Run solver iterations
-    for (size_t iter = 0; iter < iterations; ++iter) {
+        for (size_t iter = 0; iter < iterations; ++iter) {
-        // Perform a single stencil jacobi iteration
+            solver.iterate();
        solver.iterate();
 #ifdef USE_MPI
        // Non-blocking send boundary conditions to all neightbours
        MPI_Request mpi_requests[4];
        int mpi_request_count = 0;
        if (mpi_neighbours.north != MPI_PROC_NULL) {
            auto bound = solver.read_boundary(Solver::Dir::North);
            MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
                mpi_neighbours.north, iter, mpi_comm_grid,
                &mpi_requests[mpi_request_count++]);
        }
        if (mpi_neighbours.east != MPI_PROC_NULL) {
            auto bound = solver.read_boundary(Solver::Dir::East);
            MPI_Isend(bound.data(), 1, mpi_type_row,
                mpi_neighbours.east, iter, mpi_comm_grid,
                &mpi_requests[mpi_request_count++]);
        }
        if (mpi_neighbours.south != MPI_PROC_NULL) {
            auto bound = solver.read_boundary(Solver::Dir::South);
            MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
                mpi_neighbours.south, iter, mpi_comm_grid,
                &mpi_requests[mpi_request_count++]);
        }
        if (mpi_neighbours.west != MPI_PROC_NULL) {
            auto bound = solver.read_boundary(Solver::Dir::West);
            MPI_Isend(bound.data(), 1, mpi_type_row,
                mpi_neighbours.west, iter, mpi_comm_grid,
                &mpi_requests[mpi_request_count++]);
        }
-        // Wait for all send to complete before receiving new data
+        // extract solution
-        // (just to be save)
+        solution = std::move(solver.solution());
        MPI_Waitall(mpi_request_count, mpi_requests, MPI_STATUSES_IGNORE);
        // Get new boundary conditions using a blocking receive
        MPI_Status mpi_status;
        if (mpi_neighbours.north != MPI_PROC_NULL) {
            auto bound = solver.write_boundary(Solver::Dir::North);
            MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
                mpi_neighbours.north, iter, mpi_comm_grid, &mpi_status);
        }
        if (mpi_neighbours.east != MPI_PROC_NULL) {
            auto bound = solver.write_boundary(Solver::Dir::East);
            MPI_Recv(bound.data(), 1, mpi_type_row,
                mpi_neighbours.east, iter, mpi_comm_grid, &mpi_status);
        }
        if (mpi_neighbours.south != MPI_PROC_NULL) {
            auto bound = solver.write_boundary(Solver::Dir::South);
            MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
                mpi_neighbours.south, iter, mpi_comm_grid, &mpi_status);
        }
        if (mpi_neighbours.west != MPI_PROC_NULL) {
            auto bound = solver.write_boundary(Solver::Dir::West);
            MPI_Recv(bound.data(), 1, mpi_type_row,
                mpi_neighbours.west, iter, mpi_comm_grid, &mpi_status);
        }
 #endif
    }
-    /***************************** Solution Stats *****************************/
+    /****************************** Tests/Report ******************************/
    // Get solution
    Matrix<double>& solution = solver.solution();
-    // Compute analytic (true) solution
+    std::cout << solution << std::endl;
    Matrix<double> analytic(solution.nrow(), solution.ncol());
    double x = xmin;
    double y = ymin;
    for (size_t j = 0; j < analytic.ncol(); ++j, y += h) {
        for (size_t i = 0; i < analytic.nrow(); ++i, x += h) {
            analytic(i, j) = sin(2 * M_PI * x) * sinh(2 * M_PI * y);
        }
    }
 #ifdef USE_MPI
    // Frobenius Norm Accumulator
    double frob_norm_sendbuf[2] = {
        solver.resid_norm(), solution.dist(analytic)
    };
    // Square the norms (Accumulation of partial results is the square root of
    // the sum of the squared partial norms)
    frob_norm_sendbuf[0] = frob_norm_sendbuf[0] * frob_norm_sendbuf[0];
    frob_norm_sendbuf[1] = frob_norm_sendbuf[1] * frob_norm_sendbuf[1];
    // Maximum Norm Accumulator
    double max_norm_sendbuf[2] = {
        solver.resid_norm<Max>(), solution.dist<Max>(analytic)
    };
    // Global result reduction buffers
    double frob_norm_recvbuf[2], max_norm_recvbuf[2];
    // Accumulate global stats by sum/max reduction of local stats.
    MPI_Reduce(frob_norm_sendbuf, frob_norm_recvbuf, 2, MPI_DOUBLE, MPI_SUM,
        0, MPI_COMM_WORLD);
    MPI_Reduce(max_norm_sendbuf, max_norm_recvbuf, 2, MPI_DOUBLE, MPI_MAX,
        0, MPI_COMM_WORLD);
    // Finish by taking the square root of the accumulated global
    // squared frobenius norms
    frob_norm_recvbuf[0] = std::sqrt(frob_norm_recvbuf[0]);
    frob_norm_recvbuf[1] = std::sqrt(frob_norm_recvbuf[1]);
 #else
    double frob_norm_recvbuf[2] = {
        solver.resid_norm(), solution.dist(analytic)
    };
    double  max_norm_recvbuf[2] =  {
        solver.resid_norm<Max>(), solution.dist<Max>(analytic)
    };
 #endif
    // calculate runtime time
    auto stop = std::chrono::high_resolution_clock::now();
    auto time = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start)
        .count();
    // Report global stats (from "root" only)
    if (mpi_world_rank == 0) {
        std::cout
            <<   "Time [sec]:          " << std::setw(15) << time
            << "\n|| residuals ||_2:   " << std::setw(15) << frob_norm_recvbuf[0]
            << "\n|| residuals ||_inf: " << std::setw(15) <<  max_norm_recvbuf[0]
            << "\n|| error ||_2:       " << std::setw(15) << frob_norm_recvbuf[1]
            << "\n|| error ||_inf:     " << std::setw(15) <<  max_norm_recvbuf[1]
            << std::endl;
    }
    // MPI shutdown/cleanup
 #ifdef USE_MPI
--- a/Exercise_01/utils.h
+++ b/Exercise_01/utils.h
@ -1,105 +0,0 @@
 /**
 * Partitions an integer `num` into `div` summands and returns the `i`th
 * of the partition.
 *
 * @example
 * num = 17
 * div = 5
 *
 * partition(17, 5, 0) -> 4
 * partition(17, 5, 1) -> 4
 * partition(17, 5, 2) -> 3
 * partition(17, 5, 3) -> 3
 * partition(17, 5, 5) -> 3
 *
 * 17 = num = div * num + num % div = 4 + 4 + 3 + 3 + 3
 *                                   1st 2nd 3rd 4th 5th
 *                                   i=0 i=1 i=2 i=3 i=4
 */
 int partition(int num, int div, int i) {
    return num / div + static_cast<int>(i < (num % div));
 }
 /**
 * Computes the partial sum of the first `i` integer `num` partitiones into
 * `div` parts.
 * 
 * @example
 * num = 17
 * div = 5
 *
 * partition_sum(17, 5, 0) -> 4
 * partition_sum(17, 5, 1) -> 8
 * partition_sum(17, 5, 2) -> 11
 * partition_sum(17, 5, 3) -> 14
 * partition_sum(17, 5, 5) -> 17
 */
 int partition_sum(int num, int div, int i) {
    int sum = 0;
    for (int j = 0; j <= i; ++j) {
        sum += partition(num, div, j);
    }
    return sum;
 }
 /**
 * Factorized a integer number `num` into two multiplicative factors.
 * These factors are as close together as possible. This means for example for
 * square numbers that the two factors are the square root of `num`.
 *
 * Assumes small numbers and therefore uses a simple linear search.
 *
 * The first few integers are factorized as follows:
 *
 *   0 = 1 * 0
 *   1 = 1 * 1
 *   2 = 1 * 2      (is prime)
 *   3 = 1 * 3      (is prime)
 *   4 = 2 * 2      (is square)
 *   5 = 1 * 5      (is prime)
 *   6 = 2 * 3
 *   7 = 1 * 7      (is prime)
 *   8 = 2 * 4
 *   9 = 3 * 3      (is square)
 *  10 = 2 * 5
 *  11 = 1 * 11     (is prime)
 *  12 = 3 * 4
 *  13 = 1 * 13     (is prime)
 *  14 = 2 * 7
 *  15 = 3 * 5
 *  16 = 4 * 4      (is square)
 *
 * @param num integer to be factorized
 * @param factor [out] output parameter of length 2 where the two factors are
 *  written into.
 *
 * @example
 * int factors[2];
 * two_factors(15, factors);    // -> factors = {3, 5}
 */
 void two_factors(int num, int* factors) {
    // In case of zero, set both to zero
    if (!num) {
        factors[0] = factors[1] = 0;
    }
    // Ensure `num` is positive
    if (num < 0) {
        num *= -1;
    }
    // Set initial factorization (this always works)
    factors[0] = 1;
    factors[1] = num;
    // Check all numbers `i` untill the integer square-root
    for (int i = 2; i * i <= num; ++i) {
        // Check if `i` is a divisor
        if (!(num % i)) {
            // Update factors as `i` divides `num`
            factors[0] = i;
            factors[1] = num / i;
        }
    }
 }