Compare commits
2 Commits
3b07e2cd55
...
e8e09b0637
Author | SHA1 | Date |
---|---|---|
Daniel Kapla | e8e09b0637 | |
Daniel Kapla | 644874f27f |
|
@ -12,8 +12,10 @@ mpi: main.cpp Matrix.h Solver.h
|
||||||
|
|
||||||
all: seriel mpi
|
all: seriel mpi
|
||||||
|
|
||||||
test: seriel
|
test: seriel mpi
|
||||||
./$(OUT)_seriel 120 10
|
./$(OUT)_seriel 1D 120 1000
|
||||||
|
mpirun -n 4 ./$(OUT)_mpi 2D 120 1000
|
||||||
|
# for script in *.R; do R --vanilla < $${script}; done
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o main $(OUT)_seriel $(OUT)_mpi
|
rm -f *.o *.R main $(OUT)_seriel $(OUT)_mpi
|
||||||
|
|
|
@ -50,11 +50,17 @@ public:
|
||||||
T* data() { return _data.data(); };
|
T* data() { return _data.data(); };
|
||||||
const T* data() const { return _data.data(); };
|
const T* data() const { return _data.data(); };
|
||||||
|
|
||||||
T& operator()(int i) { return _data[index(i)]; };
|
T& operator()(int i) { return _data[i]; };
|
||||||
const T& operator()(int i) const { return _data[index(i)]; };
|
const T& operator()(int i) const { return _data[i]; };
|
||||||
|
|
||||||
T& operator()(int i, int j) { return _data[index(i, j)]; }
|
T& operator()(int i, int j) { return _data[i + j * _nrow]; }
|
||||||
const T& operator()(int i, int j) const { return _data[index(i, j)]; }
|
const T& operator()(int i, int j) const { return _data[i + j * _nrow]; }
|
||||||
|
|
||||||
|
T& get(int i) { return _data[index(i)]; };
|
||||||
|
const T& get(int i) const { return _data[index(i)]; };
|
||||||
|
|
||||||
|
T& get(int i, int j) { return _data[index(i, j)]; }
|
||||||
|
const T& get(int i, int j) const { return _data[index(i, j)]; }
|
||||||
|
|
||||||
template <enum Norm N = Norm::Frob>
|
template <enum Norm N = Norm::Frob>
|
||||||
double norm() const {
|
double norm() const {
|
||||||
|
@ -98,13 +104,13 @@ public:
|
||||||
if constexpr (N == Norm::Frob) {
|
if constexpr (N == Norm::Frob) {
|
||||||
// Sum of Squared differences
|
// Sum of Squared differences
|
||||||
for (size_t i = 0; i < nelem; ++i) {
|
for (size_t i = 0; i < nelem; ++i) {
|
||||||
T diff = this(i) - A(i);
|
T diff = (*this)(i) - A(i);
|
||||||
accum += diff * diff;
|
accum += diff * diff;
|
||||||
}
|
}
|
||||||
return std::sqrt(static_cast<double>(accum));
|
return std::sqrt(static_cast<double>(accum));
|
||||||
} else { // Maximum Norm
|
} else { // Maximum Norm
|
||||||
for (size_t i = 0; i < nelem; ++i) {
|
for (size_t i = 0; i < nelem; ++i) {
|
||||||
T diff = std::abs(this(i) - A(i));
|
T diff = std::abs((*this)(i) - A(i));
|
||||||
if (accum < diff) {
|
if (accum < diff) {
|
||||||
accum = diff;
|
accum = diff;
|
||||||
}
|
}
|
||||||
|
@ -174,6 +180,9 @@ public:
|
||||||
T& operator()(int i) { return _matrix(_index + i * _stride); };
|
T& operator()(int i) { return _matrix(_index + i * _stride); };
|
||||||
const T& operator()(int i) const { return _matrix(_index + i * _stride); };
|
const T& operator()(int i) const { return _matrix(_index + i * _stride); };
|
||||||
|
|
||||||
|
T& get(int i) { return _matrix.get(_index + i * _stride); };
|
||||||
|
const T& get(int i) const { return _matrix.get(_index + i * _stride); };
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Matrix<T>& _matrix;
|
Matrix<T>& _matrix;
|
||||||
size_t _index;
|
size_t _index;
|
||||||
|
|
|
@ -35,9 +35,9 @@ public:
|
||||||
-1.0 / (h * h), /* Left */
|
-1.0 / (h * h), /* Left */
|
||||||
-1.0 / (h * h), /* Up */
|
-1.0 / (h * h), /* Up */
|
||||||
-1.0 / (h * h)}, /* Right */
|
-1.0 / (h * h)}, /* Right */
|
||||||
_rhs(nx, ny),
|
_rhs(nx, ny, 0.),
|
||||||
_sol(nx, ny, 0.0),
|
_sol(nx, ny, 0.),
|
||||||
_tmp(nx, ny)
|
_tmp(nx, ny, 0.)
|
||||||
{
|
{
|
||||||
// Initialize Right Hand Size _rhs(x, y) = f(X(x), Y(y))
|
// Initialize Right Hand Size _rhs(x, y) = f(X(x), Y(y))
|
||||||
// Note that the correspondence usual matrix indexing sceeme as
|
// Note that the correspondence usual matrix indexing sceeme as
|
||||||
|
@ -53,9 +53,9 @@ public:
|
||||||
|
|
||||||
// Set Dirichlet boundary conditions
|
// Set Dirichlet boundary conditions
|
||||||
// North
|
// North
|
||||||
for (size_t x = 0; x < nx; ++x) { _sol(x, -1) = _tmp(x, -1) = gN(X(x)); }
|
for (size_t x = 0; x < nx; ++x) { _sol.get(x, -1) = _tmp.get(x, -1) = gN(X(x)); }
|
||||||
// East
|
// East
|
||||||
for (size_t y = 0; y < ny; ++y) { _sol(-1, y) = _tmp(-1, y) = gE(Y(y)); }
|
for (size_t y = 0; y < ny; ++y) { _sol.get(-1, y) = _tmp.get(-1, y) = gE(Y(y)); }
|
||||||
// South
|
// South
|
||||||
for (size_t x = 0; x < nx; ++x) { _sol(x, 0) = _tmp(x, 0) = gS(X(x)); }
|
for (size_t x = 0; x < nx; ++x) { _sol(x, 0) = _tmp(x, 0) = gS(X(x)); }
|
||||||
// West
|
// West
|
||||||
|
@ -77,7 +77,20 @@ public:
|
||||||
N = North, E = East, S = South, W = West
|
N = North, E = East, S = South, W = West
|
||||||
};
|
};
|
||||||
|
|
||||||
MatrixView<double> boundary(enum Dir dir) {
|
MatrixView<double> read_boundary(enum Dir dir) {
|
||||||
|
switch (dir) {
|
||||||
|
case Dir::North:
|
||||||
|
return Col<double>(_sol, -2);
|
||||||
|
case Dir::East:
|
||||||
|
return Row<double>(_sol, -2);
|
||||||
|
case Dir::South:
|
||||||
|
return Col<double>(_sol, 1);
|
||||||
|
default: // Dir::West
|
||||||
|
return Row<double>(_sol, 1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
MatrixView<double> write_boundary(enum Dir dir) {
|
||||||
switch (dir) {
|
switch (dir) {
|
||||||
case Dir::North:
|
case Dir::North:
|
||||||
return Col<double>(_sol, -1);
|
return Col<double>(_sol, -1);
|
||||||
|
@ -91,13 +104,11 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Solution getter */
|
/** Solution getter */
|
||||||
Matrix<double>& solution() {
|
Matrix<double>& solution() { return _sol; }
|
||||||
return _sol;
|
/** Right Hand Side getter (grid evaluated f(x, y)) */
|
||||||
}
|
Matrix<double>& rhs() { return _rhs; }
|
||||||
|
|
||||||
/**
|
/** Performs a single Jacobian Iteration */
|
||||||
* Performs a single Jacobian Iteration
|
|
||||||
*/
|
|
||||||
void iterate() {
|
void iterate() {
|
||||||
double s = 1.0 / _stencil.C;
|
double s = 1.0 / _stencil.C;
|
||||||
|
|
||||||
|
@ -114,6 +125,37 @@ public:
|
||||||
_iter++;
|
_iter++;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Computes the L2 and Inf norm of the residuals */
|
||||||
|
template <enum Norm N = Norm::Frob>
|
||||||
|
double resid_norm() {
|
||||||
|
// Enforce valid Norm types
|
||||||
|
static_assert(N == Norm::Frob || N == Norm::Max);
|
||||||
|
|
||||||
|
// Norm accumulator
|
||||||
|
double accum = 0.0;
|
||||||
|
|
||||||
|
for (size_t y = 1; y < _ny - 1; ++y) {
|
||||||
|
for (size_t x = 1; x < _nx - 1; ++x) {
|
||||||
|
double resid = _rhs(x, y) - (_stencil.C * _sol(x, y) +
|
||||||
|
_stencil.B * _sol(x, y - 1) +
|
||||||
|
_stencil.L * _sol(x - 1, y) +
|
||||||
|
_stencil.U * _sol(x, y + 1) +
|
||||||
|
_stencil.R * _sol(x + 1, y));
|
||||||
|
if constexpr (N == Norm::Frob) {
|
||||||
|
accum += resid * resid;
|
||||||
|
} else { // Maximum Norm
|
||||||
|
accum = std::max(std::abs(resid), accum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (N == Norm::Frob) {
|
||||||
|
return std::sqrt(accum);
|
||||||
|
} else { // Maximum Norm
|
||||||
|
return accum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t _iter; /*< Iteration count */
|
size_t _iter; /*< Iteration count */
|
||||||
const size_t _nx; /*< Number of X-axis grid points */
|
const size_t _nx; /*< Number of X-axis grid points */
|
||||||
|
@ -122,8 +164,8 @@ private:
|
||||||
const double _xmax; /*< Domain X-max (east border pos) */
|
const double _xmax; /*< Domain X-max (east border pos) */
|
||||||
const double _ymin; /*< Domain Y-min (south border pos) */
|
const double _ymin; /*< Domain Y-min (south border pos) */
|
||||||
const double _ymax; /*< Domain Y-max (north border pos) */
|
const double _ymax; /*< Domain Y-max (north border pos) */
|
||||||
const Stencil _stencil; /*< Simple, + shaped stencil */
|
const Stencil _stencil; /*< Simple '+' shaped stencil */
|
||||||
Matrix<double> _rhs; /*< Grid evaluated RHS of the PDE = f(x, y) */
|
Matrix<double> _rhs; /*< Grid evaluated RHS of the PDE, f(x, y) */
|
||||||
Matrix<double> _sol; /*< Solution after _iter iterations */
|
Matrix<double> _sol; /*< Solution after `_iter` iterations */
|
||||||
Matrix<double> _tmp; /*< Temp. datablock, used in iterate() */
|
Matrix<double> _tmp; /*< Temp. datablock, used in iterate() */
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
/**
|
/**
|
||||||
* Computes row sums of a matrix containing (0-indexed) consecutive numbers
|
* Computes row sums of a matrix containing (0-indexed) consecutive numbers
|
||||||
* in row major order (yes, useless but illustrativ)
|
* in column major order
|
||||||
*
|
*
|
||||||
* Example:
|
* Example (entries are array indices):
|
||||||
* matrix (11 x 7): row_sums (11):
|
* matrix (11 x 7): row_sums (11):
|
||||||
* 0 11 22 33 44 55 66 -> 231
|
* 0 11 22 33 44 55 66 -> 231
|
||||||
* 1 12 23 34 45 56 67 -> 238
|
* 1 12 23 34 45 56 67 -> 238
|
||||||
|
@ -22,7 +22,7 @@
|
||||||
* worker in a dense layout (MPI_DOUBLE)
|
* worker in a dense layout (MPI_DOUBLE)
|
||||||
*
|
*
|
||||||
* Send/Recv Example:
|
* Send/Recv Example:
|
||||||
* Data send (entries are the array indices):
|
* Data send (entries are array indices):
|
||||||
* - - - - - - -
|
* - - - - - - -
|
||||||
* - - - - - - -
|
* - - - - - - -
|
||||||
* - - - - - - -
|
* - - - - - - -
|
||||||
|
@ -68,13 +68,12 @@ int min(int a, int b) { return a < b ? a : b; }
|
||||||
|
|
||||||
int main(int argn, char* argv[]) {
|
int main(int argn, char* argv[]) {
|
||||||
|
|
||||||
// Build a (simple and barebones, row major) matrix model, it's just a
|
// Build a (simple and barebones, column major) matrix model, it's just a
|
||||||
// vector with external row/col count.
|
// vector with external row/col count.
|
||||||
int nrow = 11; // defaults
|
int nrow = 11; // defaults
|
||||||
int ncol = 7; // defaults
|
int ncol = 7; // defaults
|
||||||
|
|
||||||
// Parse arguments to set nrow, ncol (sloppy, but thats not the point of
|
// Parse arguments to set nrow, ncol (sloppy, but not the point of the example)
|
||||||
// this example)
|
|
||||||
if (argn > 1) {
|
if (argn > 1) {
|
||||||
nrow = atoi(argv[1]);
|
nrow = atoi(argv[1]);
|
||||||
if (nrow < 1 || nrow > 1024) {
|
if (nrow < 1 || nrow > 1024) {
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
/**
|
/**
|
||||||
* g++ main.cpp -std=c++17 -Wall -Wpedantic -pedantic -o main; ./main
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <chrono>
|
||||||
#define _USE_MATH_DEFINES /* enables math constants from cmath */
|
#define _USE_MATH_DEFINES /* enables math constants from cmath */
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#ifdef USE_MPI
|
#ifdef USE_MPI
|
||||||
|
@ -13,6 +16,7 @@
|
||||||
|
|
||||||
#include "Matrix.h"
|
#include "Matrix.h"
|
||||||
#include "Solver.h"
|
#include "Solver.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
int main(int argn, char* argv[]) {
|
int main(int argn, char* argv[]) {
|
||||||
|
|
||||||
|
@ -23,63 +27,321 @@ int main(int argn, char* argv[]) {
|
||||||
|
|
||||||
// Get MPI config
|
// Get MPI config
|
||||||
int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */
|
int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */
|
||||||
int mpi_rank; /*< MPI rank (a.k.a. process ID in the context of MPI) */
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
|
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
|
|
||||||
|
// Get MPI rank
|
||||||
|
int mpi_world_rank; /*< MPI world rank (a.k.a. grid process ID) */
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank);
|
||||||
|
#else
|
||||||
|
int mpi_size = 1;
|
||||||
|
int mpi_world_rank = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**************************** Parse Arguments *****************************/
|
/**************************** Parse Arguments *****************************/
|
||||||
if (argn < 3) {
|
if (0 < argn && argn < 4) {
|
||||||
std::cerr << "usage: " << argv[0] << " <resolution> <iterations>" << std::endl;
|
std::cerr << "usage: " << argv[0] << " <resolution> <iterations>" << std::endl;
|
||||||
return -1;
|
return -1;
|
||||||
} else if (argn > 3) {
|
} else if (argn > 4) {
|
||||||
std::cerr << "warning: " << "ignoring all but the first two params" << std::endl;
|
std::cerr << "warning: " << "ignoring all but the first three params" << std::endl;
|
||||||
}
|
}
|
||||||
// TODO: make this proper!!!
|
#ifdef USE_MPI
|
||||||
size_t resolution = atol(argv[1]);
|
size_t dim;
|
||||||
size_t iterations = atol(argv[2]);
|
if (std::string(argv[1]) == "1D") {
|
||||||
if (resolution < 1 || resolution > 65536
|
dim = 1;
|
||||||
|| iterations < 1 || iterations > 65536) {
|
} else if (std::string(argv[1]) == "2D") {
|
||||||
std::cerr << "error: parsing arguments failed" << std::endl;
|
dim = 2;
|
||||||
|
} else {
|
||||||
|
std::cerr << "error: Parsing arg 1 <dim> failed" << std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
size_t resolution;
|
||||||
|
if (!(std::istringstream(argv[2]) >> resolution)) {
|
||||||
|
std::cerr << "error: Parsing arg 2 <resolution> failed" << std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
size_t iterations;
|
||||||
|
if (!(std::istringstream(argv[3]) >> iterations)) {
|
||||||
|
std::cerr << "error: Parsing arg 3 <iterations> failed" << std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (resolution < 10 || resolution > 65536) {
|
||||||
|
std::cerr << "error: arg 1 <resolution> " << resolution
|
||||||
|
<< " out of domain [10, 65536]" << std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (iterations > 65536) {
|
||||||
|
std::cerr << "error: arg 2 <iterations> " << iterations
|
||||||
|
<< "out of domain [0, 65536]" << std::endl;
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Report configuration
|
||||||
|
if (mpi_world_rank == 0) {
|
||||||
|
std::cout << std::setfill(' ')
|
||||||
|
#ifdef USE_MPI
|
||||||
|
<< "use MPI: YES\n"
|
||||||
|
#else
|
||||||
|
<< "use MPI: NO\n"
|
||||||
|
#endif
|
||||||
|
<< "nr. processes: " << std::setw(5) << mpi_size << '\n'
|
||||||
|
<< "resolution: " << std::setw(5) << resolution << '\n'
|
||||||
|
<< "iterations: " << std::setw(5) << iterations << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
/************************* Initialize PDE Solver **************************/
|
/************************* Start Time Measurement *************************/
|
||||||
size_t nx = resolution;
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
size_t ny = resolution;
|
|
||||||
|
/**************** Setup (local) PDE + Boundary Conditions *****************/
|
||||||
const double k = M_PI;
|
const double k = M_PI;
|
||||||
const double h = 1.0 / static_cast<double>(resolution - 1);
|
const double h = 1.0 / static_cast<double>(resolution - 1);
|
||||||
|
|
||||||
|
#ifdef USE_MPI
|
||||||
|
// Group processes into a cartesian communication topology. Set initial
|
||||||
|
// values for a 1D grid.
|
||||||
|
int mpi_dims[2] = {mpi_size, 1};
|
||||||
|
// In case of a 2D grid, make equal partitions ob both axes (as equal as
|
||||||
|
// possible. Note that `MPI_Dims_create` does not garantee "as equal as".
|
||||||
|
// For example it was observed that for 9 processes the generated grid
|
||||||
|
// was a 9 x 1, the following computes a 3 x 3).
|
||||||
|
if (dim == 2) {
|
||||||
|
two_factors(mpi_size, mpi_dims);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Report grid topology
|
||||||
|
std::cout << "topology: " << dim << "D";
|
||||||
|
if (dim == 2) {
|
||||||
|
std::cout << " (" << mpi_dims[0] << " x " << mpi_dims[1] << ")";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
// Setup a cartesian topology communicator (NON-cyclic)
|
||||||
|
const int mpi_periods[2] = {false, false};
|
||||||
|
MPI_Comm mpi_comm_grid;
|
||||||
|
MPI_Cart_create(
|
||||||
|
MPI_COMM_WORLD, // Old Communicator
|
||||||
|
2, // number of dimensions
|
||||||
|
mpi_dims, // grid dimensions
|
||||||
|
mpi_periods, // grid periodicity
|
||||||
|
true, // allow process reordering
|
||||||
|
&mpi_comm_grid
|
||||||
|
);
|
||||||
|
|
||||||
|
// Get MPI rank with respect to the grid communicator
|
||||||
|
int mpi_grid_rank; /*< MPI grid rank (a.k.a. grid process ID) */
|
||||||
|
MPI_Comm_rank(mpi_comm_grid, &mpi_grid_rank);
|
||||||
|
|
||||||
|
// Get coordinates with respect to the grid communicator
|
||||||
|
int mpi_coords[2];
|
||||||
|
MPI_Cart_coords(mpi_comm_grid, mpi_grid_rank, 2, mpi_coords);
|
||||||
|
|
||||||
|
// Get direct neightbours in the communication grid
|
||||||
|
struct { int north; int east; int south; int west; } mpi_neighbours;
|
||||||
|
// Get X-direction (dim 0) neightbours
|
||||||
|
MPI_Cart_shift(
|
||||||
|
mpi_comm_grid, // grid communicator
|
||||||
|
0, // axis index (0 <-> X)
|
||||||
|
1, // offset
|
||||||
|
&(mpi_neighbours.west), // negated offset neightbour
|
||||||
|
&(mpi_neighbours.east) // offset neightbour
|
||||||
|
);
|
||||||
|
// Get Y-direction (dim 1) neightbours
|
||||||
|
MPI_Cart_shift(
|
||||||
|
mpi_comm_grid,
|
||||||
|
1, // axis index (1 <-> Y)
|
||||||
|
1,
|
||||||
|
&(mpi_neighbours.south),
|
||||||
|
&(mpi_neighbours.north)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Calc local (base) grid size (without ghost layers)
|
||||||
|
size_t nx = partition(resolution, mpi_dims[0], mpi_coords[0]);
|
||||||
|
size_t ny = partition(resolution, mpi_dims[1], mpi_coords[1]);
|
||||||
|
// Add ghost layers for each (existing) neighbour
|
||||||
|
ny += (mpi_neighbours.north != MPI_PROC_NULL);
|
||||||
|
nx += (mpi_neighbours.east != MPI_PROC_NULL);
|
||||||
|
ny += (mpi_neighbours.south != MPI_PROC_NULL);
|
||||||
|
nx += (mpi_neighbours.west != MPI_PROC_NULL);
|
||||||
|
|
||||||
|
// Compute local domain [xmin, xmax] x [ymin, ymax]
|
||||||
|
double xmin = (mpi_neighbours.west == MPI_PROC_NULL) ? 0.0
|
||||||
|
: h * partition_sum(resolution, mpi_dims[0], mpi_coords[0] - 1);
|
||||||
|
double xmax = (mpi_neighbours.east == MPI_PROC_NULL) ? 1.0
|
||||||
|
: h * (partition_sum(resolution, mpi_dims[0], mpi_coords[0]) + 1);
|
||||||
|
double ymin = (mpi_neighbours.south == MPI_PROC_NULL) ? 0.0
|
||||||
|
: h * partition_sum(resolution, mpi_dims[1], mpi_coords[1] - 1);
|
||||||
|
double ymax = (mpi_neighbours.north == MPI_PROC_NULL) ? 1.0
|
||||||
|
: h * (partition_sum(resolution, mpi_dims[1], mpi_coords[1]) + 1);
|
||||||
|
|
||||||
|
// Create MPI vector Type (for boundary condition exchange)
|
||||||
|
// Allows directly exchange of matrix rows (north/south bounds) since the
|
||||||
|
// row elements are "sparce" in the sence that they are not directly aside
|
||||||
|
// each other in memory (column major matrix layout) in constrast to columns.
|
||||||
|
MPI_Datatype mpi_type_row;
|
||||||
|
MPI_Type_vector(ny, 1, nx, MPI_DOUBLE, &mpi_type_row);
|
||||||
|
MPI_Type_commit(&mpi_type_row);
|
||||||
|
#else
|
||||||
|
// Discretization grid resolution
|
||||||
|
size_t nx = resolution, ny = resolution;
|
||||||
|
// PDE domain borders [xmin, xmax] x [ymin, ymax] = [0, 1] x [0, 1]
|
||||||
|
double xmin = 0.0, xmax = 1.0, ymin = 0.0, ymax = 1.0;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Declare right hand side function f(x, y) = k^2 sin(2 pi x) sinh(2 pi y)
|
// Declare right hand side function f(x, y) = k^2 sin(2 pi x) sinh(2 pi y)
|
||||||
std::function<double(double, double)> fun = [k](double x, double y) {
|
std::function<double(double, double)> fun = [k](double x, double y) {
|
||||||
return k * k * sin(M_2_PI * x) * sinh(M_2_PI * y);
|
return k * k * sin(2 * M_PI * x) * sinh(2 * M_PI * y);
|
||||||
};
|
};
|
||||||
// Boundary conditions
|
// Boundary conditions
|
||||||
/** North boundary condition g(x) = k^2 sin(2 pi x) sinh(2 pi) */
|
/** North boundary condition gN(x) = k^2 sin(2 pi x) sinh(2 pi) */
|
||||||
std::function<double(double)> gN = [k](double x) {
|
std::function<double(double)> gN;
|
||||||
return k * k * sin(M_2_PI * x) * sinh(M_2_PI);
|
#ifdef USE_MPI
|
||||||
};
|
// Check if local north boundary is part of the global north boundary
|
||||||
/** East, South and West boundary conditions are simply = 0 */
|
if (mpi_neighbours.north == MPI_PROC_NULL) {
|
||||||
|
#endif
|
||||||
|
// The local north boundary is equals the global north boundary
|
||||||
|
gN = [k](double x) { return sin(2 * M_PI * x) * sinh(2 * M_PI); };
|
||||||
|
#ifdef USE_MPI
|
||||||
|
} else {
|
||||||
|
// local north boundary is zero like all the rest
|
||||||
|
gN = [k](double) { return 0.0; };
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/** East, South and West boundary conditions are all 0 */
|
||||||
std::function<double(double)> g0 = [k](double) { return 0.0; };
|
std::function<double(double)> g0 = [k](double) { return 0.0; };
|
||||||
|
|
||||||
/******************************* Solve PDE ********************************/
|
/******************************* Solve PDE ********************************/
|
||||||
Matrix<double> solution;
|
// Instanciate solver (local instance)
|
||||||
{
|
Solver solver(nx, ny, xmin, xmax, ymin, ymax, h, k, fun, gN, g0, g0, g0);
|
||||||
// Instanciate solver (local instance)
|
|
||||||
Solver solver(nx, ny, 0., 1., 0., 1., h, k, fun, gN, g0, g0, g0);
|
|
||||||
|
|
||||||
// Run solver iterations
|
// Run solver iterations
|
||||||
for (size_t iter = 0; iter < iterations; ++iter) {
|
for (size_t iter = 0; iter < iterations; ++iter) {
|
||||||
solver.iterate();
|
// Perform a single stencil jacobi iteration
|
||||||
|
solver.iterate();
|
||||||
|
|
||||||
|
#ifdef USE_MPI
|
||||||
|
// Non-blocking send boundary conditions to all neightbours
|
||||||
|
MPI_Request mpi_requests[4];
|
||||||
|
int mpi_request_count = 0;
|
||||||
|
|
||||||
|
if (mpi_neighbours.north != MPI_PROC_NULL) {
|
||||||
|
auto bound = solver.read_boundary(Solver::Dir::North);
|
||||||
|
MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
|
||||||
|
mpi_neighbours.north, iter, mpi_comm_grid,
|
||||||
|
&mpi_requests[mpi_request_count++]);
|
||||||
|
}
|
||||||
|
if (mpi_neighbours.east != MPI_PROC_NULL) {
|
||||||
|
auto bound = solver.read_boundary(Solver::Dir::East);
|
||||||
|
MPI_Isend(bound.data(), 1, mpi_type_row,
|
||||||
|
mpi_neighbours.east, iter, mpi_comm_grid,
|
||||||
|
&mpi_requests[mpi_request_count++]);
|
||||||
|
}
|
||||||
|
if (mpi_neighbours.south != MPI_PROC_NULL) {
|
||||||
|
auto bound = solver.read_boundary(Solver::Dir::South);
|
||||||
|
MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
|
||||||
|
mpi_neighbours.south, iter, mpi_comm_grid,
|
||||||
|
&mpi_requests[mpi_request_count++]);
|
||||||
|
}
|
||||||
|
if (mpi_neighbours.west != MPI_PROC_NULL) {
|
||||||
|
auto bound = solver.read_boundary(Solver::Dir::West);
|
||||||
|
MPI_Isend(bound.data(), 1, mpi_type_row,
|
||||||
|
mpi_neighbours.west, iter, mpi_comm_grid,
|
||||||
|
&mpi_requests[mpi_request_count++]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// extract solution
|
// Wait for all send to complete before receiving new data
|
||||||
solution = std::move(solver.solution());
|
// (just to be save)
|
||||||
|
MPI_Waitall(mpi_request_count, mpi_requests, MPI_STATUSES_IGNORE);
|
||||||
|
|
||||||
|
// Get new boundary conditions using a blocking receive
|
||||||
|
MPI_Status mpi_status;
|
||||||
|
if (mpi_neighbours.north != MPI_PROC_NULL) {
|
||||||
|
auto bound = solver.write_boundary(Solver::Dir::North);
|
||||||
|
MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
|
||||||
|
mpi_neighbours.north, iter, mpi_comm_grid, &mpi_status);
|
||||||
|
}
|
||||||
|
if (mpi_neighbours.east != MPI_PROC_NULL) {
|
||||||
|
auto bound = solver.write_boundary(Solver::Dir::East);
|
||||||
|
MPI_Recv(bound.data(), 1, mpi_type_row,
|
||||||
|
mpi_neighbours.east, iter, mpi_comm_grid, &mpi_status);
|
||||||
|
}
|
||||||
|
if (mpi_neighbours.south != MPI_PROC_NULL) {
|
||||||
|
auto bound = solver.write_boundary(Solver::Dir::South);
|
||||||
|
MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
|
||||||
|
mpi_neighbours.south, iter, mpi_comm_grid, &mpi_status);
|
||||||
|
}
|
||||||
|
if (mpi_neighbours.west != MPI_PROC_NULL) {
|
||||||
|
auto bound = solver.write_boundary(Solver::Dir::West);
|
||||||
|
MPI_Recv(bound.data(), 1, mpi_type_row,
|
||||||
|
mpi_neighbours.west, iter, mpi_comm_grid, &mpi_status);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/****************************** Tests/Report ******************************/
|
/***************************** Solution Stats *****************************/
|
||||||
|
// Get solution
|
||||||
|
Matrix<double>& solution = solver.solution();
|
||||||
|
|
||||||
std::cout << solution << std::endl;
|
// Compute analytic (true) solution
|
||||||
|
Matrix<double> analytic(solution.nrow(), solution.ncol());
|
||||||
|
double x = xmin;
|
||||||
|
double y = ymin;
|
||||||
|
for (size_t j = 0; j < analytic.ncol(); ++j, y += h) {
|
||||||
|
for (size_t i = 0; i < analytic.nrow(); ++i, x += h) {
|
||||||
|
analytic(i, j) = sin(2 * M_PI * x) * sinh(2 * M_PI * y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USE_MPI
|
||||||
|
// Frobenius Norm Accumulator
|
||||||
|
double frob_norm_sendbuf[2] = {
|
||||||
|
solver.resid_norm(), solution.dist(analytic)
|
||||||
|
};
|
||||||
|
// Square the norms (Accumulation of partial results is the square root of
|
||||||
|
// the sum of the squared partial norms)
|
||||||
|
frob_norm_sendbuf[0] = frob_norm_sendbuf[0] * frob_norm_sendbuf[0];
|
||||||
|
frob_norm_sendbuf[1] = frob_norm_sendbuf[1] * frob_norm_sendbuf[1];
|
||||||
|
// Maximum Norm Accumulator
|
||||||
|
double max_norm_sendbuf[2] = {
|
||||||
|
solver.resid_norm<Max>(), solution.dist<Max>(analytic)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Global result reduction buffers
|
||||||
|
double frob_norm_recvbuf[2], max_norm_recvbuf[2];
|
||||||
|
|
||||||
|
// Accumulate global stats by sum/max reduction of local stats.
|
||||||
|
MPI_Reduce(frob_norm_sendbuf, frob_norm_recvbuf, 2, MPI_DOUBLE, MPI_SUM,
|
||||||
|
0, MPI_COMM_WORLD);
|
||||||
|
MPI_Reduce(max_norm_sendbuf, max_norm_recvbuf, 2, MPI_DOUBLE, MPI_MAX,
|
||||||
|
0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
// Finish by taking the square root of the accumulated global
|
||||||
|
// squared frobenius norms
|
||||||
|
frob_norm_recvbuf[0] = std::sqrt(frob_norm_recvbuf[0]);
|
||||||
|
frob_norm_recvbuf[1] = std::sqrt(frob_norm_recvbuf[1]);
|
||||||
|
#else
|
||||||
|
double frob_norm_recvbuf[2] = {
|
||||||
|
solver.resid_norm(), solution.dist(analytic)
|
||||||
|
};
|
||||||
|
double max_norm_recvbuf[2] = {
|
||||||
|
solver.resid_norm<Max>(), solution.dist<Max>(analytic)
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// calculate runtime time
|
||||||
|
auto stop = std::chrono::high_resolution_clock::now();
|
||||||
|
auto time = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start)
|
||||||
|
.count();
|
||||||
|
|
||||||
|
// Report global stats (from "root" only)
|
||||||
|
if (mpi_world_rank == 0) {
|
||||||
|
std::cout
|
||||||
|
<< "Time [sec]: " << std::setw(15) << time
|
||||||
|
<< "\n|| residuals ||_2: " << std::setw(15) << frob_norm_recvbuf[0]
|
||||||
|
<< "\n|| residuals ||_inf: " << std::setw(15) << max_norm_recvbuf[0]
|
||||||
|
<< "\n|| error ||_2: " << std::setw(15) << frob_norm_recvbuf[1]
|
||||||
|
<< "\n|| error ||_inf: " << std::setw(15) << max_norm_recvbuf[1]
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
// MPI shutdown/cleanup
|
// MPI shutdown/cleanup
|
||||||
#ifdef USE_MPI
|
#ifdef USE_MPI
|
||||||
|
|
|
@ -0,0 +1,105 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Partitions an integer `num` into `div` summands and returns the `i`th
|
||||||
|
* of the partition.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* num = 17
|
||||||
|
* div = 5
|
||||||
|
*
|
||||||
|
* partition(17, 5, 0) -> 4
|
||||||
|
* partition(17, 5, 1) -> 4
|
||||||
|
* partition(17, 5, 2) -> 3
|
||||||
|
* partition(17, 5, 3) -> 3
|
||||||
|
* partition(17, 5, 5) -> 3
|
||||||
|
*
|
||||||
|
* 17 = num = div * num + num % div = 4 + 4 + 3 + 3 + 3
|
||||||
|
* 1st 2nd 3rd 4th 5th
|
||||||
|
* i=0 i=1 i=2 i=3 i=4
|
||||||
|
*/
|
||||||
|
int partition(int num, int div, int i) {
|
||||||
|
return num / div + static_cast<int>(i < (num % div));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the partial sum of the first `i` integer `num` partitiones into
|
||||||
|
* `div` parts.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* num = 17
|
||||||
|
* div = 5
|
||||||
|
*
|
||||||
|
* partition_sum(17, 5, 0) -> 4
|
||||||
|
* partition_sum(17, 5, 1) -> 8
|
||||||
|
* partition_sum(17, 5, 2) -> 11
|
||||||
|
* partition_sum(17, 5, 3) -> 14
|
||||||
|
* partition_sum(17, 5, 5) -> 17
|
||||||
|
*/
|
||||||
|
int partition_sum(int num, int div, int i) {
|
||||||
|
int sum = 0;
|
||||||
|
for (int j = 0; j <= i; ++j) {
|
||||||
|
sum += partition(num, div, j);
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factorized a integer number `num` into two multiplicative factors.
|
||||||
|
* These factors are as close together as possible. This means for example for
|
||||||
|
* square numbers that the two factors are the square root of `num`.
|
||||||
|
*
|
||||||
|
* Assumes small numbers and therefore uses a simple linear search.
|
||||||
|
*
|
||||||
|
* The first few integers are factorized as follows:
|
||||||
|
*
|
||||||
|
* 0 = 1 * 0
|
||||||
|
* 1 = 1 * 1
|
||||||
|
* 2 = 1 * 2 (is prime)
|
||||||
|
* 3 = 1 * 3 (is prime)
|
||||||
|
* 4 = 2 * 2 (is square)
|
||||||
|
* 5 = 1 * 5 (is prime)
|
||||||
|
* 6 = 2 * 3
|
||||||
|
* 7 = 1 * 7 (is prime)
|
||||||
|
* 8 = 2 * 4
|
||||||
|
* 9 = 3 * 3 (is square)
|
||||||
|
* 10 = 2 * 5
|
||||||
|
* 11 = 1 * 11 (is prime)
|
||||||
|
* 12 = 3 * 4
|
||||||
|
* 13 = 1 * 13 (is prime)
|
||||||
|
* 14 = 2 * 7
|
||||||
|
* 15 = 3 * 5
|
||||||
|
* 16 = 4 * 4 (is square)
|
||||||
|
*
|
||||||
|
* @param num integer to be factorized
|
||||||
|
* @param factor [out] output parameter of length 2 where the two factors are
|
||||||
|
* written into.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* int factors[2];
|
||||||
|
* two_factors(15, factors); // -> factors = {3, 5}
|
||||||
|
*/
|
||||||
|
void two_factors(int num, int* factors) {
|
||||||
|
// In case of zero, set both to zero
|
||||||
|
if (!num) {
|
||||||
|
factors[0] = factors[1] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure `num` is positive
|
||||||
|
if (num < 0) {
|
||||||
|
num *= -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set initial factorization (this always works)
|
||||||
|
factors[0] = 1;
|
||||||
|
factors[1] = num;
|
||||||
|
|
||||||
|
// Check all numbers `i` untill the integer square-root
|
||||||
|
for (int i = 2; i * i <= num; ++i) {
|
||||||
|
// Check if `i` is a divisor
|
||||||
|
if (!(num % i)) {
|
||||||
|
// Update factors as `i` divides `num`
|
||||||
|
factors[0] = i;
|
||||||
|
factors[1] = num / i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue