add: utils,

wip: ex_01 (almost done)
wip: ex_01
2022-03-13 19:07:39 +01:00 · 2022-03-13 17:58:02 +01:00
6 changed files with 481 additions and 62 deletions
--- a/Exercise_01/Makefile
+++ b/Exercise_01/Makefile
@ -12,8 +12,10 @@ mpi: main.cpp Matrix.h Solver.h

 all: seriel mpi

-test: seriel
-	./$(OUT)_seriel 120 10
+test: seriel mpi
+	./$(OUT)_seriel 1D 120 1000
+	mpirun -n 4 ./$(OUT)_mpi 2D 120 1000
+# for script in *.R; do R --vanilla < $${script}; done

 clean:
-	rm -f *.o main $(OUT)_seriel $(OUT)_mpi
+	rm -f *.o *.R main $(OUT)_seriel $(OUT)_mpi
--- a/Exercise_01/Matrix.h
+++ b/Exercise_01/Matrix.h
@ -50,11 +50,17 @@ public:
          T* data()       { return _data.data(); };
    const T* data() const { return _data.data(); };

-          T& operator()(int i)       { return _data[index(i)]; };
-    const T& operator()(int i) const { return _data[index(i)]; };
+          T& operator()(int i)       { return _data[i]; };
+    const T& operator()(int i) const { return _data[i]; };

-          T& operator()(int i, int j)       { return _data[index(i, j)]; }
-    const T& operator()(int i, int j) const { return _data[index(i, j)]; }
+          T& operator()(int i, int j)       { return _data[i + j * _nrow]; }
+    const T& operator()(int i, int j) const { return _data[i + j * _nrow]; }
+
+          T& get(int i)       { return _data[index(i)]; };
+    const T& get(int i) const { return _data[index(i)]; };
+
+          T& get(int i, int j)       { return _data[index(i, j)]; }
+    const T& get(int i, int j) const { return _data[index(i, j)]; }

    template <enum Norm N = Norm::Frob>
    double norm() const {
@ -98,13 +104,13 @@ public:
        if constexpr (N == Norm::Frob) {
            // Sum of Squared differences
            for (size_t i = 0; i < nelem; ++i) {
-                T diff = this(i) - A(i);
+                T diff = (*this)(i) - A(i);
                accum += diff * diff;
            }
            return std::sqrt(static_cast<double>(accum));
        } else { // Maximum Norm
            for (size_t i = 0; i < nelem; ++i) {
-                T diff = std::abs(this(i) - A(i));
+                T diff = std::abs((*this)(i) - A(i));
                if (accum < diff) {
                    accum = diff;
                }
@ -174,6 +180,9 @@ public:
          T& operator()(int i)       { return _matrix(_index + i * _stride); };
    const T& operator()(int i) const { return _matrix(_index + i * _stride); };

+          T& get(int i)       { return _matrix.get(_index + i * _stride); };
+    const T& get(int i) const { return _matrix.get(_index + i * _stride); };
+
 protected:
    Matrix<T>& _matrix;
    size_t _index;
--- a/Exercise_01/Solver.h
+++ b/Exercise_01/Solver.h
@ -35,9 +35,9 @@ public:
                 -1.0 / (h * h),                /* Left */
                 -1.0 / (h * h),                /* Up */
                 -1.0 / (h * h)},               /* Right */
-        _rhs(nx, ny),
-        _sol(nx, ny, 0.0),
-        _tmp(nx, ny)
+        _rhs(nx, ny, 0.),
+        _sol(nx, ny, 0.),
+        _tmp(nx, ny, 0.)
    {
        // Initialize Right Hand Size _rhs(x, y) = f(X(x), Y(y))
        // Note that the correspondence usual matrix indexing sceeme as
@ -53,9 +53,9 @@ public:

        // Set Dirichlet boundary conditions
        // North
-        for (size_t x = 0; x < nx; ++x) { _sol(x, -1) = _tmp(x, -1) = gN(X(x)); }
+        for (size_t x = 0; x < nx; ++x) { _sol.get(x, -1) = _tmp.get(x, -1) = gN(X(x)); }
        // East
-        for (size_t y = 0; y < ny; ++y) { _sol(-1, y) = _tmp(-1, y) = gE(Y(y)); }
+        for (size_t y = 0; y < ny; ++y) { _sol.get(-1, y) = _tmp.get(-1, y) = gE(Y(y)); }
        // South
        for (size_t x = 0; x < nx; ++x) { _sol(x, 0) = _tmp(x, 0) = gS(X(x)); }
        // West
@ -77,7 +77,20 @@ public:
        N = North, E = East, S = South, W = West
    };

-    MatrixView<double> boundary(enum Dir dir) {
+    MatrixView<double> read_boundary(enum Dir dir) {
+        switch (dir) {
+            case Dir::North:
+                return Col<double>(_sol, -2);
+            case Dir::East:
+                return Row<double>(_sol, -2);
+            case Dir::South:
+                return Col<double>(_sol, 1);
+            default:    // Dir::West
+                return Row<double>(_sol, 1);
+        }
+    };
+
+    MatrixView<double> write_boundary(enum Dir dir) {
        switch (dir) {
            case Dir::North:
                return Col<double>(_sol, -1);
@ -91,13 +104,11 @@ public:
    };

    /** Solution getter */
-    Matrix<double>& solution() {
-        return _sol;
-    }
+    Matrix<double>& solution() { return _sol; }
+    /** Right Hand Side getter (grid evaluated f(x, y)) */
+    Matrix<double>& rhs() { return _rhs; }

-    /**
-     * Performs a single Jacobian Iteration
-     */
+    /** Performs a single Jacobian Iteration */
    void iterate() {
        double s = 1.0 / _stencil.C;

@ -114,6 +125,37 @@ public:
        _iter++;
    };

+    /** Computes the L2 and Inf norm of the residuals */
+    template <enum Norm N = Norm::Frob>
+    double resid_norm() {
+        // Enforce valid Norm types
+        static_assert(N == Norm::Frob || N == Norm::Max);
+
+        // Norm accumulator
+        double accum = 0.0;
+
+        for (size_t y = 1; y < _ny - 1; ++y) {
+            for (size_t x = 1; x < _nx - 1; ++x) {
+                double resid = _rhs(x, y) - (_stencil.C * _sol(x, y) +
+                                             _stencil.B * _sol(x, y - 1) +
+                                             _stencil.L * _sol(x - 1, y) +
+                                             _stencil.U * _sol(x, y + 1) +
+                                             _stencil.R * _sol(x + 1, y));
+                if constexpr (N == Norm::Frob) {
+                    accum += resid * resid;
+                } else {    // Maximum Norm
+                    accum = std::max(std::abs(resid), accum);
+                }
+            }
+        }
+
+        if constexpr (N == Norm::Frob) {
+            return std::sqrt(accum);
+        } else {    // Maximum Norm
+            return accum;
+        }
+    }
+
 private:
    size_t _iter;           /*< Iteration count */
    const size_t _nx;       /*< Number of X-axis grid points */
@ -122,8 +164,8 @@ private:
    const double _xmax;     /*< Domain X-max (east border pos) */
    const double _ymin;     /*< Domain Y-min (south border pos) */
    const double _ymax;     /*< Domain Y-max (north border pos) */
-    const Stencil _stencil; /*< Simple, + shaped stencil */
-    Matrix<double> _rhs;    /*< Grid evaluated RHS of the PDE = f(x, y) */
-    Matrix<double> _sol;    /*< Solution after _iter iterations */
+    const Stencil _stencil; /*< Simple '+' shaped stencil */
+    Matrix<double> _rhs;    /*< Grid evaluated RHS of the PDE, f(x, y) */
+    Matrix<double> _sol;    /*< Solution after `_iter` iterations */
    Matrix<double> _tmp;    /*< Temp. datablock, used in iterate() */
 };
--- a/Exercise_01/examples/MPI_RowSums.cpp
+++ b/Exercise_01/examples/MPI_RowSums.cpp
@ -1,8 +1,8 @@
 /**
 * Computes row sums of a matrix containing (0-indexed) consecutive numbers
- * in row major order (yes, useless but illustrativ)
+ * in column major order
 *
- * Example:
+ * Example (entries are array indices):
 *     matrix (11 x 7):                          row_sums (11):
 *        0   11   22   33   44   55   66   ->      231
 *        1   12   23   34   45   56   67   ->      238
@ -22,7 +22,7 @@
 * worker in a dense layout (MPI_DOUBLE)
 *
 * Send/Recv Example:
- *     Data send (entries are the array indices):
+ *     Data send (entries are array indices):
 *        -    -    -    -    -    -    -
 *        -    -    -    -    -    -    -
 *        -    -    -    -    -    -    -
@ -68,13 +68,12 @@ int min(int a, int b) { return a < b ? a : b; }

 int main(int argn, char* argv[]) {

-    // Build a (simple and barebones, row major) matrix model, it's just a
+    // Build a (simple and barebones, column major) matrix model, it's just a
    // vector with external row/col count.
    int nrow = 11;  // defaults
    int ncol = 7;   // defaults

-    // Parse arguments to set nrow, ncol (sloppy, but thats not the point of
-    // this example)
+    // Parse arguments to set nrow, ncol (sloppy, but not the point of the example)
    if (argn > 1) {
        nrow = atoi(argv[1]);
        if (nrow < 1 || nrow > 1024) {
--- a/Exercise_01/main.cpp
+++ b/Exercise_01/main.cpp
@ -1,10 +1,13 @@
 /**
- * g++ main.cpp -std=c++17 -Wall -Wpedantic -pedantic -o main; ./main
+ * 
 */

 #include <stddef.h>
 #include <iostream>
+#include <iomanip>
+#include <sstream>
 #include <functional>
+#include <chrono>
 #define _USE_MATH_DEFINES   /* enables math constants from cmath */
 #include <cmath>
 #ifdef USE_MPI
@ -13,6 +16,7 @@

 #include "Matrix.h"
 #include "Solver.h"
+#include "utils.h"

 int main(int argn, char* argv[]) {

@ -23,63 +27,321 @@ int main(int argn, char* argv[]) {

    // Get MPI config
    int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */
-    int mpi_rank; /*< MPI rank (a.k.a. process ID in the context of MPI) */
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
-    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+
+    // Get MPI rank
+    int mpi_world_rank; /*< MPI world rank (a.k.a. grid process ID) */
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank);
+#else
+    int mpi_size = 1;
+    int mpi_world_rank = 0;
 #endif

    /**************************** Parse Arguments *****************************/
-    if (argn < 3) {
+    if (0 < argn && argn < 4) {
        std::cerr << "usage: " << argv[0] << " <resolution> <iterations>" << std::endl;
        return -1;
-    } else if (argn > 3) {
-        std::cerr << "warning: " << "ignoring all but the first two params" << std::endl;
+    } else if (argn > 4) {
+        std::cerr << "warning: " << "ignoring all but the first three params" << std::endl;
    }
-    // TODO: make this proper!!!
-    size_t resolution = atol(argv[1]);
-    size_t iterations = atol(argv[2]);
-    if (resolution < 1 || resolution > 65536
-    ||  iterations < 1 || iterations > 65536) {
-        std::cerr << "error: parsing arguments failed" << std::endl;
+#ifdef USE_MPI
+    size_t dim;
+    if (std::string(argv[1]) == "1D") {
+        dim = 1;
+    } else if (std::string(argv[1]) == "2D") {
+        dim = 2;
+    } else {
+        std::cerr << "error: Parsing arg 1 <dim> failed" << std::endl;
+        return -1;
+    }
+#endif
+    size_t resolution;
+    if (!(std::istringstream(argv[2]) >> resolution)) {
+        std::cerr << "error: Parsing arg 2 <resolution> failed" << std::endl;
+        return -1;
+    }
+    size_t iterations;
+    if (!(std::istringstream(argv[3]) >> iterations)) {
+        std::cerr << "error: Parsing arg 3 <iterations> failed" << std::endl;
+        return -1;
+    }
+    if (resolution < 10 || resolution > 65536) {
+        std::cerr << "error: arg 1 <resolution> " << resolution
+            << " out of domain [10, 65536]" << std::endl;
+        return -1;
+    }
+    if (iterations > 65536) {
+        std::cerr << "error: arg 2 <iterations> " << iterations
+            << "out of domain [0, 65536]" << std::endl;
+        return -1;
    }

+    // Report configuration
+    if (mpi_world_rank == 0) {
+        std::cout << std::setfill(' ')
+#ifdef USE_MPI
+            << "use MPI:         YES\n"
+#else
+            << "use MPI:          NO\n"
+#endif
+            << "nr. processes: " << std::setw(5) << mpi_size   << '\n'
+            << "resolution:    " << std::setw(5) << resolution << '\n'
+            << "iterations:    " << std::setw(5) << iterations << std::endl;
+    }

-    /************************* Initialize PDE Solver **************************/
-    size_t nx = resolution;
-    size_t ny = resolution;
+    /************************* Start Time Measurement *************************/
+    auto start = std::chrono::high_resolution_clock::now();
+
+    /**************** Setup (local) PDE + Boundary Conditions *****************/
    const double k = M_PI;
    const double h = 1.0 / static_cast<double>(resolution - 1);

+#ifdef USE_MPI
+    // Group processes into a cartesian communication topology. Set initial
+    // values for a 1D grid.
+    int mpi_dims[2] = {mpi_size, 1};
+    // In case of a 2D grid, make equal partitions ob both axes (as equal as
+    // possible. Note that `MPI_Dims_create` does not garantee "as equal as".
+    // For example it was observed that for 9 processes the generated grid
+    // was a 9 x 1, the following computes a 3 x 3).
+    if (dim == 2) {
+        two_factors(mpi_size, mpi_dims);
+    }
+
+    // Report grid topology
+    std::cout << "topology:      " << dim << "D";
+    if (dim == 2) {
+        std::cout << " (" << mpi_dims[0] << " x " << mpi_dims[1] << ")";
+    }
+    std::cout << std::endl;
+
+    // Setup a cartesian topology communicator (NON-cyclic)
+    const int mpi_periods[2] = {false, false};
+    MPI_Comm mpi_comm_grid;
+    MPI_Cart_create(
+        MPI_COMM_WORLD,     // Old Communicator
+        2,                  // number of dimensions
+        mpi_dims,           // grid dimensions
+        mpi_periods,        // grid periodicity
+        true,               // allow process reordering
+        &mpi_comm_grid
+    );
+
+    // Get MPI rank with respect to the grid communicator
+    int mpi_grid_rank; /*< MPI grid rank (a.k.a. grid process ID) */
+    MPI_Comm_rank(mpi_comm_grid, &mpi_grid_rank);
+
+    // Get coordinates with respect to the grid communicator
+    int mpi_coords[2];
+    MPI_Cart_coords(mpi_comm_grid, mpi_grid_rank, 2, mpi_coords);
+
+    // Get direct neightbours in the communication grid
+    struct { int north; int east; int south; int west; } mpi_neighbours;
+    // Get X-direction (dim 0) neightbours
+    MPI_Cart_shift(
+        mpi_comm_grid,              // grid communicator
+        0,                          // axis index (0 <-> X)
+        1,                          // offset
+        &(mpi_neighbours.west),     // negated offset neightbour
+        &(mpi_neighbours.east)      // offset neightbour
+    );
+    // Get Y-direction (dim 1) neightbours
+    MPI_Cart_shift(
+        mpi_comm_grid,
+        1,                          // axis index (1 <-> Y)
+        1,
+        &(mpi_neighbours.south),
+        &(mpi_neighbours.north)
+    );
+
+    // Calc local (base) grid size (without ghost layers)
+    size_t nx = partition(resolution, mpi_dims[0], mpi_coords[0]);
+    size_t ny = partition(resolution, mpi_dims[1], mpi_coords[1]);
+    // Add ghost layers for each (existing) neighbour
+    ny += (mpi_neighbours.north != MPI_PROC_NULL);
+    nx += (mpi_neighbours.east  != MPI_PROC_NULL);
+    ny += (mpi_neighbours.south != MPI_PROC_NULL);
+    nx += (mpi_neighbours.west  != MPI_PROC_NULL);
+
+    // Compute local domain [xmin, xmax] x [ymin, ymax]
+    double xmin = (mpi_neighbours.west  == MPI_PROC_NULL) ? 0.0
+        : h * partition_sum(resolution, mpi_dims[0], mpi_coords[0] - 1);
+    double xmax = (mpi_neighbours.east  == MPI_PROC_NULL) ? 1.0
+        : h * (partition_sum(resolution, mpi_dims[0], mpi_coords[0]) + 1);
+    double ymin = (mpi_neighbours.south == MPI_PROC_NULL) ? 0.0
+        : h * partition_sum(resolution, mpi_dims[1], mpi_coords[1] - 1);
+    double ymax = (mpi_neighbours.north == MPI_PROC_NULL) ? 1.0
+        : h * (partition_sum(resolution, mpi_dims[1], mpi_coords[1]) + 1);
+
+    // Create MPI vector Type (for boundary condition exchange)
+    // Allows directly exchange of matrix rows (north/south bounds) since the
+    // row elements are "sparce" in the sence that they are not directly aside
+    // each other in memory (column major matrix layout) in constrast to columns.
+    MPI_Datatype mpi_type_row;
+    MPI_Type_vector(ny, 1, nx, MPI_DOUBLE, &mpi_type_row);
+    MPI_Type_commit(&mpi_type_row);
+#else
+    // Discretization grid resolution
+    size_t nx = resolution, ny = resolution;
+    // PDE domain borders [xmin, xmax] x [ymin, ymax] = [0, 1] x [0, 1]
+    double xmin = 0.0, xmax = 1.0, ymin = 0.0, ymax = 1.0;
+#endif
+
    // Declare right hand side function f(x, y) = k^2 sin(2 pi x) sinh(2 pi y)
    std::function<double(double, double)> fun = [k](double x, double y) {
-        return k * k * sin(M_2_PI * x) * sinh(M_2_PI * y);
+        return k * k * sin(2 * M_PI * x) * sinh(2 * M_PI * y);
    };
    // Boundary conditions
-    /** North boundary condition g(x) = k^2 sin(2 pi x) sinh(2 pi) */
-    std::function<double(double)> gN = [k](double x) {
-        return k * k * sin(M_2_PI * x) * sinh(M_2_PI);
-    };
-    /** East, South and West boundary conditions are simply = 0 */
+    /** North boundary condition gN(x) = k^2 sin(2 pi x) sinh(2 pi) */
+    std::function<double(double)> gN;
+#ifdef USE_MPI
+    // Check if local north boundary is part of the global north boundary
+    if (mpi_neighbours.north == MPI_PROC_NULL) {
+#endif
+        // The local north boundary is equals the global north boundary
+        gN = [k](double x) { return sin(2 * M_PI * x) * sinh(2 * M_PI); };
+#ifdef USE_MPI
+    } else {
+        // local north boundary is zero like all the rest
+        gN = [k](double) { return 0.0; };
+    }
+#endif
+    /** East, South and West boundary conditions are all 0 */
    std::function<double(double)> g0 = [k](double) { return 0.0; };

    /******************************* Solve PDE ********************************/
-    Matrix<double> solution;
-    {
-        // Instanciate solver (local instance)
-        Solver solver(nx, ny, 0., 1., 0., 1., h, k, fun, gN, g0, g0, g0);
+    // Instanciate solver (local instance)
+    Solver solver(nx, ny, xmin, xmax, ymin, ymax, h, k, fun, gN, g0, g0, g0);

-        // Run solver iterations
-        for (size_t iter = 0; iter < iterations; ++iter) {
-            solver.iterate();
+    // Run solver iterations
+    for (size_t iter = 0; iter < iterations; ++iter) {
+        // Perform a single stencil jacobi iteration
+        solver.iterate();
+
+#ifdef USE_MPI
+        // Non-blocking send boundary conditions to all neightbours
+        MPI_Request mpi_requests[4];
+        int mpi_request_count = 0;
+
+        if (mpi_neighbours.north != MPI_PROC_NULL) {
+            auto bound = solver.read_boundary(Solver::Dir::North);
+            MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
+                mpi_neighbours.north, iter, mpi_comm_grid,
+                &mpi_requests[mpi_request_count++]);
+        }
+        if (mpi_neighbours.east != MPI_PROC_NULL) {
+            auto bound = solver.read_boundary(Solver::Dir::East);
+            MPI_Isend(bound.data(), 1, mpi_type_row,
+                mpi_neighbours.east, iter, mpi_comm_grid,
+                &mpi_requests[mpi_request_count++]);
+        }
+        if (mpi_neighbours.south != MPI_PROC_NULL) {
+            auto bound = solver.read_boundary(Solver::Dir::South);
+            MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
+                mpi_neighbours.south, iter, mpi_comm_grid,
+                &mpi_requests[mpi_request_count++]);
+        }
+        if (mpi_neighbours.west != MPI_PROC_NULL) {
+            auto bound = solver.read_boundary(Solver::Dir::West);
+            MPI_Isend(bound.data(), 1, mpi_type_row,
+                mpi_neighbours.west, iter, mpi_comm_grid,
+                &mpi_requests[mpi_request_count++]);
        }

-        // extract solution
-        solution = std::move(solver.solution());
+        // Wait for all send to complete before receiving new data
+        // (just to be save)
+        MPI_Waitall(mpi_request_count, mpi_requests, MPI_STATUSES_IGNORE);
+
+        // Get new boundary conditions using a blocking receive
+        MPI_Status mpi_status;
+        if (mpi_neighbours.north != MPI_PROC_NULL) {
+            auto bound = solver.write_boundary(Solver::Dir::North);
+            MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
+                mpi_neighbours.north, iter, mpi_comm_grid, &mpi_status);
+        }
+        if (mpi_neighbours.east != MPI_PROC_NULL) {
+            auto bound = solver.write_boundary(Solver::Dir::East);
+            MPI_Recv(bound.data(), 1, mpi_type_row,
+                mpi_neighbours.east, iter, mpi_comm_grid, &mpi_status);
+        }
+        if (mpi_neighbours.south != MPI_PROC_NULL) {
+            auto bound = solver.write_boundary(Solver::Dir::South);
+            MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
+                mpi_neighbours.south, iter, mpi_comm_grid, &mpi_status);
+        }
+        if (mpi_neighbours.west != MPI_PROC_NULL) {
+            auto bound = solver.write_boundary(Solver::Dir::West);
+            MPI_Recv(bound.data(), 1, mpi_type_row,
+                mpi_neighbours.west, iter, mpi_comm_grid, &mpi_status);
+        }
+#endif
    }

-    /****************************** Tests/Report ******************************/
+    /***************************** Solution Stats *****************************/
+    // Get solution
+    Matrix<double>& solution = solver.solution();

-    std::cout << solution << std::endl;
+    // Compute analytic (true) solution
+    Matrix<double> analytic(solution.nrow(), solution.ncol());
+    double x = xmin;
+    double y = ymin;
+    for (size_t j = 0; j < analytic.ncol(); ++j, y += h) {
+        for (size_t i = 0; i < analytic.nrow(); ++i, x += h) {
+            analytic(i, j) = sin(2 * M_PI * x) * sinh(2 * M_PI * y);
+        }
+    }
+
+#ifdef USE_MPI
+    // Frobenius Norm Accumulator
+    double frob_norm_sendbuf[2] = {
+        solver.resid_norm(), solution.dist(analytic)
+    };
+    // Square the norms (Accumulation of partial results is the square root of
+    // the sum of the squared partial norms)
+    frob_norm_sendbuf[0] = frob_norm_sendbuf[0] * frob_norm_sendbuf[0];
+    frob_norm_sendbuf[1] = frob_norm_sendbuf[1] * frob_norm_sendbuf[1];
+    // Maximum Norm Accumulator
+    double max_norm_sendbuf[2] = {
+        solver.resid_norm<Max>(), solution.dist<Max>(analytic)
+    };
+
+    // Global result reduction buffers
+    double frob_norm_recvbuf[2], max_norm_recvbuf[2];
+
+    // Accumulate global stats by sum/max reduction of local stats.
+    MPI_Reduce(frob_norm_sendbuf, frob_norm_recvbuf, 2, MPI_DOUBLE, MPI_SUM,
+        0, MPI_COMM_WORLD);
+    MPI_Reduce(max_norm_sendbuf, max_norm_recvbuf, 2, MPI_DOUBLE, MPI_MAX,
+        0, MPI_COMM_WORLD);
+
+    // Finish by taking the square root of the accumulated global
+    // squared frobenius norms
+    frob_norm_recvbuf[0] = std::sqrt(frob_norm_recvbuf[0]);
+    frob_norm_recvbuf[1] = std::sqrt(frob_norm_recvbuf[1]);
+#else
+    double frob_norm_recvbuf[2] = {
+        solver.resid_norm(), solution.dist(analytic)
+    };
+    double  max_norm_recvbuf[2] =  {
+        solver.resid_norm<Max>(), solution.dist<Max>(analytic)
+    };
+#endif
+
+    // calculate runtime time
+    auto stop = std::chrono::high_resolution_clock::now();
+    auto time = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start)
+        .count();
+
+    // Report global stats (from "root" only)
+    if (mpi_world_rank == 0) {
+        std::cout
+            <<   "Time [sec]:          " << std::setw(15) << time
+            << "\n|| residuals ||_2:   " << std::setw(15) << frob_norm_recvbuf[0]
+            << "\n|| residuals ||_inf: " << std::setw(15) <<  max_norm_recvbuf[0]
+            << "\n|| error ||_2:       " << std::setw(15) << frob_norm_recvbuf[1]
+            << "\n|| error ||_inf:     " << std::setw(15) <<  max_norm_recvbuf[1]
+            << std::endl;
+    }

    // MPI shutdown/cleanup
 #ifdef USE_MPI
--- a/Exercise_01/utils.h
+++ b/Exercise_01/utils.h
@ -0,0 +1,105 @@
+
+/**
+ * Partitions an integer `num` into `div` summands and returns the `i`th
+ * of the partition.
+ *
+ * @example
+ * num = 17
+ * div = 5
+ *
+ * partition(17, 5, 0) -> 4
+ * partition(17, 5, 1) -> 4
+ * partition(17, 5, 2) -> 3
+ * partition(17, 5, 3) -> 3
+ * partition(17, 5, 5) -> 3
+ *
+ * 17 = num = div * num + num % div = 4 + 4 + 3 + 3 + 3
+ *                                   1st 2nd 3rd 4th 5th
+ *                                   i=0 i=1 i=2 i=3 i=4
+ */
+int partition(int num, int div, int i) {
+    return num / div + static_cast<int>(i < (num % div));
+}
+
+/**
+ * Computes the partial sum of the first `i` integer `num` partitiones into
+ * `div` parts.
+ * 
+ * @example
+ * num = 17
+ * div = 5
+ *
+ * partition_sum(17, 5, 0) -> 4
+ * partition_sum(17, 5, 1) -> 8
+ * partition_sum(17, 5, 2) -> 11
+ * partition_sum(17, 5, 3) -> 14
+ * partition_sum(17, 5, 5) -> 17
+ */
+int partition_sum(int num, int div, int i) {
+    int sum = 0;
+    for (int j = 0; j <= i; ++j) {
+        sum += partition(num, div, j);
+    }
+    return sum;
+}
+
+/**
+ * Factorized a integer number `num` into two multiplicative factors.
+ * These factors are as close together as possible. This means for example for
+ * square numbers that the two factors are the square root of `num`.
+ *
+ * Assumes small numbers and therefore uses a simple linear search.
+ *
+ * The first few integers are factorized as follows:
+ *
+ *   0 = 1 * 0
+ *   1 = 1 * 1
+ *   2 = 1 * 2      (is prime)
+ *   3 = 1 * 3      (is prime)
+ *   4 = 2 * 2      (is square)
+ *   5 = 1 * 5      (is prime)
+ *   6 = 2 * 3
+ *   7 = 1 * 7      (is prime)
+ *   8 = 2 * 4
+ *   9 = 3 * 3      (is square)
+ *  10 = 2 * 5
+ *  11 = 1 * 11     (is prime)
+ *  12 = 3 * 4
+ *  13 = 1 * 13     (is prime)
+ *  14 = 2 * 7
+ *  15 = 3 * 5
+ *  16 = 4 * 4      (is square)
+ *
+ * @param num integer to be factorized
+ * @param factor [out] output parameter of length 2 where the two factors are
+ *  written into.
+ *
+ * @example
+ * int factors[2];
+ * two_factors(15, factors);    // -> factors = {3, 5}
+ */
+void two_factors(int num, int* factors) {
+    // In case of zero, set both to zero
+    if (!num) {
+        factors[0] = factors[1] = 0;
+    }
+
+    // Ensure `num` is positive
+    if (num < 0) {
+        num *= -1;
+    }
+
+    // Set initial factorization (this always works)
+    factors[0] = 1;
+    factors[1] = num;
+
+    // Check all numbers `i` untill the integer square-root
+    for (int i = 2; i * i <= num; ++i) {
+        // Check if `i` is a divisor
+        if (!(num % i)) {
+            // Update factors as `i` divides `num`
+            factors[0] = i;
+            factors[1] = num / i;
+        }
+    }
+}
Author	SHA1	Message	Date
daniel	e8e09b0637	add: utils, wip: ex_01 (almost done)	2022-03-13 19:07:39 +01:00
daniel	644874f27f	wip: ex_01	2022-03-13 17:58:02 +01:00