a bit of cleanup
This commit is contained in:
parent
ee5cbef1d6
commit
1992c38dc4
|
@ -35,11 +35,6 @@ public:
|
||||||
_nrow{A._nrow},
|
_nrow{A._nrow},
|
||||||
_ncol{A._ncol},
|
_ncol{A._ncol},
|
||||||
_data(A._data) { };
|
_data(A._data) { };
|
||||||
// // Move constructor // TODO:
|
|
||||||
// Matrix(Matrix<T>&& A) :
|
|
||||||
// _nrow{A._nrow},
|
|
||||||
// _ncol{A._ncol},
|
|
||||||
// _data(std::move(A._data)) { };
|
|
||||||
|
|
||||||
size_t nrow() const { return _nrow; };
|
size_t nrow() const { return _nrow; };
|
||||||
size_t ncol() const { return _ncol; };
|
size_t ncol() const { return _ncol; };
|
||||||
|
@ -87,12 +82,11 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Distance of this Matrix to given matrix A as || this - A ||_N
|
* Distance of this Matrix to given matrix A as || this - A ||_N
|
||||||
*
|
*
|
||||||
* Note: It there is a dimension missmatch, only the number of elements
|
* Note: It there is a dimension mismatch, only the number of elements
|
||||||
* of the smaller matrix are considured.
|
* of the smaller matrix are considered.
|
||||||
*
|
*
|
||||||
* @param A matrix to compute the "distance" to
|
* @param A matrix to compute the "distance" to
|
||||||
* @param mar_b bottom margins
|
* @param mar_b bottom margins
|
||||||
|
@ -110,7 +104,7 @@ public:
|
||||||
assert(this->_nrow == A._nrow);
|
assert(this->_nrow == A._nrow);
|
||||||
assert(this->_ncol == A._ncol);
|
assert(this->_ncol == A._ncol);
|
||||||
|
|
||||||
T accum = static_cast<T>(0); /*< result accomulator */
|
T accum = static_cast<T>(0); /*< result accumulator */
|
||||||
for (size_t j = mar_l; j + mar_r < _ncol; ++j) {
|
for (size_t j = mar_l; j + mar_r < _ncol; ++j) {
|
||||||
for (size_t i = mar_t; i + mar_b < _nrow; ++i) {
|
for (size_t i = mar_t; i + mar_b < _nrow; ++i) {
|
||||||
if constexpr (N == Norm::Frob) {
|
if constexpr (N == Norm::Frob) {
|
|
@ -40,11 +40,6 @@ public:
|
||||||
_tmp(nx, ny, 0.)
|
_tmp(nx, ny, 0.)
|
||||||
{
|
{
|
||||||
// Initialize Right Hand Size _rhs(x, y) = f(X(x), Y(y))
|
// Initialize Right Hand Size _rhs(x, y) = f(X(x), Y(y))
|
||||||
// Note that the correspondence usual matrix indexing sceeme as
|
|
||||||
// row/colums indices lead to an missplaces representation if the matrix
|
|
||||||
// is printed in the usual format (x <-> rows, y <-> negative columns).
|
|
||||||
// If this in entierly ignored and only considured in the case of
|
|
||||||
// printing the matrix, everything is fine.
|
|
||||||
for (size_t x = 0; x < nx; ++x) {
|
for (size_t x = 0; x < nx; ++x) {
|
||||||
for (size_t y = 0; y < ny; ++y) {
|
for (size_t y = 0; y < ny; ++y) {
|
||||||
_rhs(x, y) = fun(X(x), Y(y));
|
_rhs(x, y) = fun(X(x), Y(y));
|
||||||
|
@ -108,7 +103,7 @@ public:
|
||||||
/** Right Hand Side getter (grid evaluated f(x, y)) */
|
/** Right Hand Side getter (grid evaluated f(x, y)) */
|
||||||
Matrix<double>& rhs() { return _rhs; }
|
Matrix<double>& rhs() { return _rhs; }
|
||||||
|
|
||||||
/** Performs a single Jacobian Iteration */
|
/** Performs a single Jacobi iteration */
|
||||||
void iterate() {
|
void iterate() {
|
||||||
double s = 1.0 / _stencil.C;
|
double s = 1.0 / _stencil.C;
|
||||||
|
|
||||||
|
@ -160,12 +155,12 @@ private:
|
||||||
size_t _iter; /*< Iteration count */
|
size_t _iter; /*< Iteration count */
|
||||||
const size_t _nx; /*< Number of X-axis grid points */
|
const size_t _nx; /*< Number of X-axis grid points */
|
||||||
const size_t _ny; /*< Number of Y-axis grid points */
|
const size_t _ny; /*< Number of Y-axis grid points */
|
||||||
const double _xmin; /*< Domain X-min (west border pos) */
|
const double _xmin; /*< Domain X-min (west border position) */
|
||||||
const double _xmax; /*< Domain X-max (east border pos) */
|
const double _xmax; /*< Domain X-max (east border position) */
|
||||||
const double _ymin; /*< Domain Y-min (south border pos) */
|
const double _ymin; /*< Domain Y-min (south border position) */
|
||||||
const double _ymax; /*< Domain Y-max (north border pos) */
|
const double _ymax; /*< Domain Y-max (north border position) */
|
||||||
const Stencil _stencil; /*< Simple '+' shaped stencil */
|
const Stencil _stencil; /*< Simple '+' shaped stencil */
|
||||||
Matrix<double> _rhs; /*< Grid evaluated RHS of the PDE, f(x, y) */
|
Matrix<double> _rhs; /*< Grid evaluated RHS of the PDE, f(x, y) */
|
||||||
Matrix<double> _sol; /*< Solution after `_iter` iterations */
|
Matrix<double> _sol; /*< Solution after `_iter` iterations */
|
||||||
Matrix<double> _tmp; /*< Temp. datablock, used in iterate() */
|
Matrix<double> _tmp; /*< Temp. data block, used in iterate() */
|
||||||
};
|
};
|
|
@ -1,7 +1,21 @@
|
||||||
/**
|
/**
|
||||||
|
* Implementation of an MPI_Parallel Stencil-Based Jacobi Solver for the 2D PDE
|
||||||
*
|
*
|
||||||
|
* -Du(x, y) + k^2 u(x, y) = f(x, y), (x, y) in [0, 1] x [0, 1]
|
||||||
|
*
|
||||||
|
* with the scalar k = 2 pi. The right hand side is
|
||||||
|
*
|
||||||
|
* f(x, y) = k^2 sin(2 pi x) sinh(2 pi y)
|
||||||
|
*
|
||||||
|
* and the Dirichlet boundary conditions
|
||||||
|
*
|
||||||
|
* u(0, y) = u(1, y) = u(x, 0) = 0, x, y in [0, 1]
|
||||||
|
* u(x, 1) = sin(2 pi x) sinh(2 pi), x in [0, 1]
|
||||||
|
*
|
||||||
|
* The following programm is implemented such that it can be compiled in seriel
|
||||||
|
* or MPI-parallel form by declaring the `USE_MPI` macro (see `Makefile`).
|
||||||
*/
|
*/
|
||||||
#define _USE_MATH_DEFINES /* enables math constants from cmath */
|
#define _USE_MATH_DEFINES /* enables math constants from `cmath` */
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
@ -10,8 +24,9 @@
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#ifdef USE_MPI
|
#ifdef USE_MPI
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "Matrix.h"
|
#include "Matrix.h"
|
||||||
|
@ -25,7 +40,7 @@ int main(int argn, char* argv[]) {
|
||||||
// Initialize MPI
|
// Initialize MPI
|
||||||
MPI_Init(nullptr, nullptr);
|
MPI_Init(nullptr, nullptr);
|
||||||
|
|
||||||
// Get MPI config
|
// Get MPI configure
|
||||||
int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */
|
int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
|
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
|
||||||
|
|
||||||
|
@ -97,13 +112,13 @@ int main(int argn, char* argv[]) {
|
||||||
const double h = 1.0 / static_cast<double>(resolution - 1);
|
const double h = 1.0 / static_cast<double>(resolution - 1);
|
||||||
|
|
||||||
#ifdef USE_MPI
|
#ifdef USE_MPI
|
||||||
// Group processes into a cartesian communication topology. Set initial
|
// Group processes into a Cartesian communication topology. Set initial
|
||||||
// values for a 1D grid.
|
// values for a 1D grid.
|
||||||
int mpi_dims[2] = {mpi_size, 1};
|
int mpi_dims[2] = {mpi_size, 1};
|
||||||
// In case of a 2D grid, make equal partitions ob both axes (as equal as
|
// In case of a 2D grid, make equal partitions ob both axes (as equal as
|
||||||
// possible. Note that `MPI_Dims_create` does not garantee "as equal as".
|
// possible. Note that `MPI_Dims_create` does not guarantee "as equal as".
|
||||||
// For example it was observed that for 9 processes the generated grid
|
// For example it was observed that for 9 processes the generated grid
|
||||||
// was a 9 x 1, the following computes a 3 x 3).
|
// was a 9 x 1, the following computes a 3 x 3 decomposition).
|
||||||
if (dim == 2) {
|
if (dim == 2) {
|
||||||
two_factors(mpi_size, mpi_dims);
|
two_factors(mpi_size, mpi_dims);
|
||||||
}
|
}
|
||||||
|
@ -115,7 +130,7 @@ int main(int argn, char* argv[]) {
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
|
||||||
// Setup a cartesian topology communicator (NON-cyclic)
|
// Setup a Cartesian topology communicator (NON-cyclic)
|
||||||
const int mpi_periods[2] = {false, false};
|
const int mpi_periods[2] = {false, false};
|
||||||
MPI_Comm mpi_comm_grid;
|
MPI_Comm mpi_comm_grid;
|
||||||
MPI_Cart_create(
|
MPI_Cart_create(
|
||||||
|
@ -135,53 +150,53 @@ int main(int argn, char* argv[]) {
|
||||||
int mpi_coords[2];
|
int mpi_coords[2];
|
||||||
MPI_Cart_coords(mpi_comm_grid, mpi_grid_rank, 2, mpi_coords);
|
MPI_Cart_coords(mpi_comm_grid, mpi_grid_rank, 2, mpi_coords);
|
||||||
|
|
||||||
// Get direct neightbours in the communication grid
|
// Get direct neighbors in the communication grid
|
||||||
struct { int north; int east; int south; int west; } mpi_neighbours;
|
struct { int north; int east; int south; int west; } mpi_neighbors;
|
||||||
// Get X-direction (dim 0) neightbours
|
// Get X-direction (dim 0) neighbors
|
||||||
MPI_Cart_shift(
|
MPI_Cart_shift(
|
||||||
mpi_comm_grid, // grid communicator
|
mpi_comm_grid, // grid communicator
|
||||||
0, // axis index (0 <-> X)
|
0, // axis index (0 <-> X)
|
||||||
1, // offset
|
1, // offset
|
||||||
&(mpi_neighbours.west), // negated offset neightbour
|
&(mpi_neighbors.west), // negated offset neighbor
|
||||||
&(mpi_neighbours.east) // offset neightbour
|
&(mpi_neighbors.east) // offset neighbor
|
||||||
);
|
);
|
||||||
// Get Y-direction (dim 1) neightbours
|
// Get Y-direction (dim 1) neighbors
|
||||||
MPI_Cart_shift(
|
MPI_Cart_shift(
|
||||||
mpi_comm_grid,
|
mpi_comm_grid,
|
||||||
1, // axis index (1 <-> Y)
|
1, // axis index (1 <-> Y)
|
||||||
1,
|
1,
|
||||||
&(mpi_neighbours.south),
|
&(mpi_neighbors.south),
|
||||||
&(mpi_neighbours.north)
|
&(mpi_neighbors.north)
|
||||||
);
|
);
|
||||||
|
|
||||||
// Calc local (base) grid size (without ghost layers)
|
// Calculate local (base) grid size (without ghost layers)
|
||||||
size_t nx = partition(resolution, mpi_dims[0], mpi_coords[0]);
|
size_t nx = partition(resolution, mpi_dims[0], mpi_coords[0]);
|
||||||
size_t ny = partition(resolution, mpi_dims[1], mpi_coords[1]);
|
size_t ny = partition(resolution, mpi_dims[1], mpi_coords[1]);
|
||||||
// Add ghost layers for each (existing) neighbour
|
// Add ghost layers for each (existing) neighbor
|
||||||
ny += (mpi_neighbours.north != MPI_PROC_NULL);
|
ny += (mpi_neighbors.north != MPI_PROC_NULL);
|
||||||
nx += (mpi_neighbours.east != MPI_PROC_NULL);
|
nx += (mpi_neighbors.east != MPI_PROC_NULL);
|
||||||
ny += (mpi_neighbours.south != MPI_PROC_NULL);
|
ny += (mpi_neighbors.south != MPI_PROC_NULL);
|
||||||
nx += (mpi_neighbours.west != MPI_PROC_NULL);
|
nx += (mpi_neighbors.west != MPI_PROC_NULL);
|
||||||
|
|
||||||
// Compute local domain [xmin, xmax] x [ymin, ymax]
|
// Compute local domain [xmin, xmax] x [ymin, ymax]
|
||||||
double xmin = (mpi_neighbours.west == MPI_PROC_NULL) ? 0.0
|
double xmin = (mpi_neighbors.west == MPI_PROC_NULL) ? 0.0
|
||||||
: h * (partition_sum(resolution, mpi_dims[0], mpi_coords[0] - 1) - 1);
|
: h * (partition_sum(resolution, mpi_dims[0], mpi_coords[0] - 1) - 1);
|
||||||
double xmax = (mpi_neighbours.east == MPI_PROC_NULL) ? 1.0
|
double xmax = (mpi_neighbors.east == MPI_PROC_NULL) ? 1.0
|
||||||
: h * partition_sum(resolution, mpi_dims[0], mpi_coords[0]);
|
: h * partition_sum(resolution, mpi_dims[0], mpi_coords[0]);
|
||||||
double ymin = (mpi_neighbours.south == MPI_PROC_NULL) ? 0.0
|
double ymin = (mpi_neighbors.south == MPI_PROC_NULL) ? 0.0
|
||||||
: h * (partition_sum(resolution, mpi_dims[1], mpi_coords[1] - 1) - 1);
|
: h * (partition_sum(resolution, mpi_dims[1], mpi_coords[1] - 1) - 1);
|
||||||
double ymax = (mpi_neighbours.north == MPI_PROC_NULL) ? 1.0
|
double ymax = (mpi_neighbors.north == MPI_PROC_NULL) ? 1.0
|
||||||
: h * partition_sum(resolution, mpi_dims[1], mpi_coords[1]);
|
: h * partition_sum(resolution, mpi_dims[1], mpi_coords[1]);
|
||||||
|
|
||||||
// Create MPI vector Type (for boundary condition exchange)
|
// Create MPI vector Type (for boundary condition exchange)
|
||||||
// Allows directly exchange of matrix rows (north/south bounds) since the
|
// Allows directly exchange of matrix rows (north/south bounds) since the
|
||||||
// row elements are "sparce" in the sence that they are not directly aside
|
// row elements are "sparse" in the sense that they are not directly aside
|
||||||
// each other in memory (column major matrix layout) in constrast to columns.
|
// each other in memory (column major matrix layout) in contrast to columns.
|
||||||
MPI_Datatype mpi_type_row;
|
MPI_Datatype mpi_type_row;
|
||||||
MPI_Type_vector(ny, 1, nx, MPI_DOUBLE, &mpi_type_row);
|
MPI_Type_vector(ny, 1, nx, MPI_DOUBLE, &mpi_type_row);
|
||||||
MPI_Type_commit(&mpi_type_row);
|
MPI_Type_commit(&mpi_type_row);
|
||||||
#else
|
#else
|
||||||
// Discretization grid resolution
|
// discretization grid resolution
|
||||||
size_t nx = resolution, ny = resolution;
|
size_t nx = resolution, ny = resolution;
|
||||||
// PDE domain borders [xmin, xmax] x [ymin, ymax] = [0, 1] x [0, 1]
|
// PDE domain borders [xmin, xmax] x [ymin, ymax] = [0, 1] x [0, 1]
|
||||||
double xmin = 0.0, xmax = 1.0, ymin = 0.0, ymax = 1.0;
|
double xmin = 0.0, xmax = 1.0, ymin = 0.0, ymax = 1.0;
|
||||||
|
@ -196,7 +211,7 @@ int main(int argn, char* argv[]) {
|
||||||
std::function<double(double)> gN;
|
std::function<double(double)> gN;
|
||||||
#ifdef USE_MPI
|
#ifdef USE_MPI
|
||||||
// Check if local north boundary is part of the global north boundary
|
// Check if local north boundary is part of the global north boundary
|
||||||
if (mpi_neighbours.north == MPI_PROC_NULL) {
|
if (mpi_neighbors.north == MPI_PROC_NULL) {
|
||||||
#endif
|
#endif
|
||||||
// The local north boundary is equals the global north boundary
|
// The local north boundary is equals the global north boundary
|
||||||
gN = [k](double x) { return sin(2 * M_PI * x) * sinh(2 * M_PI); };
|
gN = [k](double x) { return sin(2 * M_PI * x) * sinh(2 * M_PI); };
|
||||||
|
@ -210,41 +225,41 @@ int main(int argn, char* argv[]) {
|
||||||
std::function<double(double)> g0 = [k](double) { return 0.0; };
|
std::function<double(double)> g0 = [k](double) { return 0.0; };
|
||||||
|
|
||||||
/******************************* Solve PDE ********************************/
|
/******************************* Solve PDE ********************************/
|
||||||
// Instanciate solver (local instance)
|
// Instantiate solver (local instance)
|
||||||
Solver solver(nx, ny, xmin, xmax, ymin, ymax, h, k, fun, gN, g0, g0, g0);
|
Solver solver(nx, ny, xmin, xmax, ymin, ymax, h, k, fun, gN, g0, g0, g0);
|
||||||
|
|
||||||
// Run solver iterations
|
// Run solver iterations
|
||||||
for (size_t iter = 0; iter < iterations; ++iter) {
|
for (size_t iter = 0; iter < iterations; ++iter) {
|
||||||
// Perform a single stencil jacobi iteration
|
// Perform a single stencil Jacobi iteration
|
||||||
solver.iterate();
|
solver.iterate();
|
||||||
|
|
||||||
#ifdef USE_MPI
|
#ifdef USE_MPI
|
||||||
// Non-blocking send boundary conditions to all neightbours
|
// Non-blocking send boundary conditions to all neighbors
|
||||||
MPI_Request mpi_requests[4];
|
MPI_Request mpi_requests[4];
|
||||||
int mpi_request_count = 0;
|
int mpi_request_count = 0;
|
||||||
|
|
||||||
if (mpi_neighbours.north != MPI_PROC_NULL) {
|
if (mpi_neighbors.north != MPI_PROC_NULL) {
|
||||||
auto bound = solver.read_boundary(Solver::Dir::North);
|
auto bound = solver.read_boundary(Solver::Dir::North);
|
||||||
MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
|
MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
|
||||||
mpi_neighbours.north, iter, mpi_comm_grid,
|
mpi_neighbors.north, iter, mpi_comm_grid,
|
||||||
&mpi_requests[mpi_request_count++]);
|
&mpi_requests[mpi_request_count++]);
|
||||||
}
|
}
|
||||||
if (mpi_neighbours.east != MPI_PROC_NULL) {
|
if (mpi_neighbors.east != MPI_PROC_NULL) {
|
||||||
auto bound = solver.read_boundary(Solver::Dir::East);
|
auto bound = solver.read_boundary(Solver::Dir::East);
|
||||||
MPI_Isend(bound.data(), 1, mpi_type_row,
|
MPI_Isend(bound.data(), 1, mpi_type_row,
|
||||||
mpi_neighbours.east, iter, mpi_comm_grid,
|
mpi_neighbors.east, iter, mpi_comm_grid,
|
||||||
&mpi_requests[mpi_request_count++]);
|
&mpi_requests[mpi_request_count++]);
|
||||||
}
|
}
|
||||||
if (mpi_neighbours.south != MPI_PROC_NULL) {
|
if (mpi_neighbors.south != MPI_PROC_NULL) {
|
||||||
auto bound = solver.read_boundary(Solver::Dir::South);
|
auto bound = solver.read_boundary(Solver::Dir::South);
|
||||||
MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
|
MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
|
||||||
mpi_neighbours.south, iter, mpi_comm_grid,
|
mpi_neighbors.south, iter, mpi_comm_grid,
|
||||||
&mpi_requests[mpi_request_count++]);
|
&mpi_requests[mpi_request_count++]);
|
||||||
}
|
}
|
||||||
if (mpi_neighbours.west != MPI_PROC_NULL) {
|
if (mpi_neighbors.west != MPI_PROC_NULL) {
|
||||||
auto bound = solver.read_boundary(Solver::Dir::West);
|
auto bound = solver.read_boundary(Solver::Dir::West);
|
||||||
MPI_Isend(bound.data(), 1, mpi_type_row,
|
MPI_Isend(bound.data(), 1, mpi_type_row,
|
||||||
mpi_neighbours.west, iter, mpi_comm_grid,
|
mpi_neighbors.west, iter, mpi_comm_grid,
|
||||||
&mpi_requests[mpi_request_count++]);
|
&mpi_requests[mpi_request_count++]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -254,25 +269,25 @@ int main(int argn, char* argv[]) {
|
||||||
|
|
||||||
// Get new boundary conditions using a blocking receive
|
// Get new boundary conditions using a blocking receive
|
||||||
MPI_Status mpi_status;
|
MPI_Status mpi_status;
|
||||||
if (mpi_neighbours.north != MPI_PROC_NULL) {
|
if (mpi_neighbors.north != MPI_PROC_NULL) {
|
||||||
auto bound = solver.write_boundary(Solver::Dir::North);
|
auto bound = solver.write_boundary(Solver::Dir::North);
|
||||||
MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
|
MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
|
||||||
mpi_neighbours.north, iter, mpi_comm_grid, &mpi_status);
|
mpi_neighbors.north, iter, mpi_comm_grid, &mpi_status);
|
||||||
}
|
}
|
||||||
if (mpi_neighbours.east != MPI_PROC_NULL) {
|
if (mpi_neighbors.east != MPI_PROC_NULL) {
|
||||||
auto bound = solver.write_boundary(Solver::Dir::East);
|
auto bound = solver.write_boundary(Solver::Dir::East);
|
||||||
MPI_Recv(bound.data(), 1, mpi_type_row,
|
MPI_Recv(bound.data(), 1, mpi_type_row,
|
||||||
mpi_neighbours.east, iter, mpi_comm_grid, &mpi_status);
|
mpi_neighbors.east, iter, mpi_comm_grid, &mpi_status);
|
||||||
}
|
}
|
||||||
if (mpi_neighbours.south != MPI_PROC_NULL) {
|
if (mpi_neighbors.south != MPI_PROC_NULL) {
|
||||||
auto bound = solver.write_boundary(Solver::Dir::South);
|
auto bound = solver.write_boundary(Solver::Dir::South);
|
||||||
MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
|
MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
|
||||||
mpi_neighbours.south, iter, mpi_comm_grid, &mpi_status);
|
mpi_neighbors.south, iter, mpi_comm_grid, &mpi_status);
|
||||||
}
|
}
|
||||||
if (mpi_neighbours.west != MPI_PROC_NULL) {
|
if (mpi_neighbors.west != MPI_PROC_NULL) {
|
||||||
auto bound = solver.write_boundary(Solver::Dir::West);
|
auto bound = solver.write_boundary(Solver::Dir::West);
|
||||||
MPI_Recv(bound.data(), 1, mpi_type_row,
|
MPI_Recv(bound.data(), 1, mpi_type_row,
|
||||||
mpi_neighbours.west, iter, mpi_comm_grid, &mpi_status);
|
mpi_neighbors.west, iter, mpi_comm_grid, &mpi_status);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -325,7 +340,7 @@ int main(int argn, char* argv[]) {
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// calculate runtime time
|
// calculate run-time
|
||||||
auto stop = std::chrono::high_resolution_clock::now();
|
auto stop = std::chrono::high_resolution_clock::now();
|
||||||
auto time = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start)
|
auto time = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start)
|
||||||
.count();
|
.count();
|
|
@ -3,6 +3,10 @@
|
||||||
* Partitions an integer `num` into `div` summands and returns the `i`th
|
* Partitions an integer `num` into `div` summands and returns the `i`th
|
||||||
* of the partition.
|
* of the partition.
|
||||||
*
|
*
|
||||||
|
* @param num Integer to be partitioned
|
||||||
|
* @param div Number of partitions
|
||||||
|
* @param i Index of partition
|
||||||
|
*
|
||||||
* @example
|
* @example
|
||||||
* num = 17
|
* num = 17
|
||||||
* div = 5
|
* div = 5
|
||||||
|
@ -22,8 +26,14 @@ int partition(int num, int div, int i) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the partial sum of the first `i` integer `num` partitiones into
|
* Computes the partial sum of the first `i` integer `num` partitioned into
|
||||||
* `div` parts.
|
* `div` parts using `partition()`.
|
||||||
|
*
|
||||||
|
* sum_{j = 0}^i partition(num, div, j).
|
||||||
|
*
|
||||||
|
* @param num Integer to be partitioned
|
||||||
|
* @param div Number of partitions
|
||||||
|
* @param i Index of partition
|
||||||
*
|
*
|
||||||
* @example
|
* @example
|
||||||
* num = 17
|
* num = 17
|
||||||
|
@ -93,7 +103,7 @@ void two_factors(int num, int* factors) {
|
||||||
factors[0] = 1;
|
factors[0] = 1;
|
||||||
factors[1] = num;
|
factors[1] = num;
|
||||||
|
|
||||||
// Check all numbers `i` untill the integer square-root
|
// Check all numbers `i` until the integer square-root
|
||||||
for (int i = 2; i * i <= num; ++i) {
|
for (int i = 2; i * i <= num; ++i) {
|
||||||
// Check if `i` is a divisor
|
// Check if `i` is a divisor
|
||||||
if (!(num % i)) {
|
if (!(num % i)) {
|
|
@ -30,7 +30,7 @@ benchmarking.
|
||||||
|
|
||||||
## MPI-Parallel Stencil-Based Jacobi Solver
|
## MPI-Parallel Stencil-Based Jacobi Solver
|
||||||
|
|
||||||
In this excercise, your task is to parallelize a stencil-based Jacobi solver for the 2D elliptic PDE
|
In this exercise, your task is to parallelize a stencil-based Jacobi solver for the 2D elliptic PDE
|
||||||
$$
|
$$
|
||||||
-\Delta u(x,y) + k^2 u(x,y) = f(x,y) \quad, \text{with} \ k=2\pi
|
-\Delta u(x,y) + k^2 u(x,y) = f(x,y) \quad, \text{with} \ k=2\pi
|
||||||
$$
|
$$
|
||||||
|
@ -47,7 +47,7 @@ $$
|
||||||
f(x,y) = k^2 u_p(x,y)
|
f(x,y) = k^2 u_p(x,y)
|
||||||
$$
|
$$
|
||||||
by implementing an MPI-based domain decomposition.
|
by implementing an MPI-based domain decomposition.
|
||||||
The PDE is dicretized on a regular finite-difference grid with fixed (Diriclet) boundary conditions:
|
The PDE is discretized on a regular finite-difference grid with fixed (Dirichlet) boundary conditions:
|
||||||
$$
|
$$
|
||||||
\begin{align}
|
\begin{align}
|
||||||
u(0,y) &= 0 \\
|
u(0,y) &= 0 \\
|
||||||
|
@ -65,7 +65,7 @@ Your task is to decompose the finite-difference grid into domain regions such th
|
||||||
The decoupling of the regions is achieved by introducing a *ghost layer* of grid points which surrounds each region.
|
The decoupling of the regions is achieved by introducing a *ghost layer* of grid points which surrounds each region.
|
||||||
The values in the ghost layer of a region are not updated during an iteration.
|
The values in the ghost layer of a region are not updated during an iteration.
|
||||||
|
|
||||||
Instead, after an iteration is finished the updated values for the ghost layer are received from the neighbouring regions, and the boundary layer is sent to the neighouring regions (see Figure below).
|
Instead, after an iteration is finished the updated values for the ghost layer are received from the neighboring regions, and the boundary layer is sent to the neighboring regions (see Figure below).
|
||||||
|
|
||||||
![Decomposition](images/unitsquare_decomposition_1D_2D.png)
|
![Decomposition](images/unitsquare_decomposition_1D_2D.png)
|
||||||
|
|
||||||
|
@ -92,14 +92,14 @@ mpirun -n 4 ./jacobiMPI 250 30
|
||||||
```
|
```
|
||||||
|
|
||||||
- `NUMMPIPROC`: number of MPI-processes to launch
|
- `NUMMPIPROC`: number of MPI-processes to launch
|
||||||
- `resolution`: number of grid points along each dimension of the unit square; the gridspacing is $`h = 1.0/(\text{resolution}-1)`$
|
- `resolution`: number of grid points along each dimension of the unit square; the grid spacing is $`h = 1.0/(\text{resolution}-1)`$
|
||||||
- `iterations`: number of Jacobi iterations to perform
|
- `iterations`: number of Jacobi iterations to perform
|
||||||
|
|
||||||
Further and more specifically, your program should
|
Further and more specifically, your program should
|
||||||
|
|
||||||
- use $`\bar{u}_h=\mathbf{0}`$ as initial approximation to $`u`$, and (after finishing all iterations)
|
- use $`\bar{u}_h=\mathbf{0}`$ as initial approximation to $`u`$, and (after finishing all iterations)
|
||||||
- print the Euclidean $`\parallel \cdot \parallel_2`$ and Maximum $`\parallel \cdot \parallel_{\infty}`$ norm of the residual $`\parallel A_h\bar{u}_h-b_h \parallel`$ and of the total error $`\parallel \bar{u}_h-u_p \parallel`$ to the console,
|
- print the Euclidean $`\parallel \cdot \parallel_2`$ and Maximum $`\parallel \cdot \parallel_{\infty}`$ norm of the residual $`\parallel A_h\bar{u}_h-b_h \parallel`$ and of the total error $`\parallel \bar{u}_h-u_p \parallel`$ to the console,
|
||||||
- print the average runtime per iteration to the console, and
|
- print the average run time per iteration to the console, and
|
||||||
- produce the same results as a serial run.
|
- produce the same results as a serial run.
|
||||||
|
|
||||||
Finally, benchmark the parallel performance of your program `jacobiMPI` using 2 nodes of the IUE-Cluster for 4 different `resolution`s=$`\{125,250,1000,4000\}`$ using between 1 and 80 MPI-processes (`NUMMPIPROC`).
|
Finally, benchmark the parallel performance of your program `jacobiMPI` using 2 nodes of the IUE-Cluster for 4 different `resolution`s=$`\{125,250,1000,4000\}`$ using between 1 and 80 MPI-processes (`NUMMPIPROC`).
|
||||||
|
@ -128,7 +128,7 @@ mpirun -n 4 ./jacobiMPI 2D 125 200
|
||||||
- the new parameter `DIM` has two valid values `1D` or `2D` and switches between one-dimensional and two-dimensional decomposition.
|
- the new parameter `DIM` has two valid values `1D` or `2D` and switches between one-dimensional and two-dimensional decomposition.
|
||||||
|
|
||||||
|
|
||||||
Ensure a correct implementation by comparing your results to a serial run. Benmarking on the cluster is **not**
|
Ensure a correct implementation by comparing your results to a serial run. Benchmarking on the cluster is **not**
|
||||||
required.
|
required.
|
||||||
|
|
||||||
**Notes:**
|
**Notes:**
|
||||||
|
@ -145,7 +145,7 @@ required.
|
||||||
|
|
||||||
- Your login credentials will be provided via email.
|
- Your login credentials will be provided via email.
|
||||||
- You need to enable a "TU Wien VPN" connection.
|
- You need to enable a "TU Wien VPN" connection.
|
||||||
- You can login to the cluser using `ssh` and your credentials.
|
- You can login to the cluster using `ssh` and your credentials.
|
||||||
- You will be asked to change your initial password upon first login.
|
- You will be asked to change your initial password upon first login.
|
||||||
|
|
||||||
**File Transfer**
|
**File Transfer**
|
||||||
|
@ -159,8 +159,8 @@ required.
|
||||||
- The cluster has a *login node* (the one you `ssh` to, details will be announced in the email with the credentials)
|
- The cluster has a *login node* (the one you `ssh` to, details will be announced in the email with the credentials)
|
||||||
- This login node must only be used to compile your project and **never** to perform any benchmarks or MPI-runs (beside minimal lightweight tests of for the MPI configuration)
|
- This login node must only be used to compile your project and **never** to perform any benchmarks or MPI-runs (beside minimal lightweight tests of for the MPI configuration)
|
||||||
- All other nodes of the cluster are used to run the "jobs" you submit.
|
- All other nodes of the cluster are used to run the "jobs" you submit.
|
||||||
- To support cluster users, a set of *environement modules* (relevant for us is only the "MPI"-module) is made available. You can list all modules using `module avail`
|
- To support cluster users, a set of *environment modules* (relevant for us is only the "MPI"-module) is made available. You can list all modules using `module avail`
|
||||||
- Note that you also need to load the modules you require in your job subsmission scripts (see example provided in this repo).
|
- Note that you also need to load the modules you require in your job submission scripts (see example provided in this repo).
|
||||||
|
|
||||||
**Executing jobs on the cluster**
|
**Executing jobs on the cluster**
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue