364 lines
14 KiB
C++
364 lines
14 KiB
C++
/**
|
|
* Implementation of an MPI_Parallel Stencil-Based Jacobi Solver for the 2D PDE
|
|
*
|
|
* -Du(x, y) + k^2 u(x, y) = f(x, y), (x, y) in [0, 1] x [0, 1]
|
|
*
|
|
* with the scalar k = 2 pi. The right hand side is
|
|
*
|
|
* f(x, y) = k^2 sin(2 pi x) sinh(2 pi y)
|
|
*
|
|
* and the Dirichlet boundary conditions
|
|
*
|
|
* u(0, y) = u(1, y) = u(x, 0) = 0, x, y in [0, 1]
|
|
* u(x, 1) = sin(2 pi x) sinh(2 pi), x in [0, 1]
|
|
*
|
|
* The following programm is implemented such that it can be compiled in seriel
|
|
* or MPI-parallel form by declaring the `USE_MPI` macro (see `Makefile`).
|
|
*/
|
|
#define _USE_MATH_DEFINES /* enables math constants from `cmath` */
|
|
|
|
#include <stddef.h>
|
|
#include <iostream>
|
|
#include <iomanip>
|
|
#include <sstream>
|
|
#include <functional>
|
|
#include <chrono>
|
|
#include <cmath>
|
|
|
|
#ifdef USE_MPI
|
|
#include <mpi.h>
|
|
#endif
|
|
|
|
#include "Matrix.h"
|
|
#include "Solver.h"
|
|
#include "utils.h"
|
|
|
|
int main(int argn, char* argv[]) {
|
|
|
|
/******************************* MPI Setup ********************************/
|
|
#ifdef USE_MPI
|
|
// Initialize MPI
|
|
MPI_Init(nullptr, nullptr);
|
|
|
|
// Get MPI configure
|
|
int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */
|
|
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
|
|
|
|
// Get MPI rank
|
|
int mpi_world_rank; /*< MPI world rank (a.k.a. grid process ID) */
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank);
|
|
#else
|
|
int mpi_size = 1;
|
|
int mpi_world_rank = 0;
|
|
#endif
|
|
|
|
/**************************** Parse Arguments *****************************/
|
|
if (0 < argn && argn < 4) {
|
|
std::cerr << "usage: " << argv[0] << " <resolution> <iterations>" << std::endl;
|
|
return -1;
|
|
} else if (argn > 4) {
|
|
std::cerr << "warning: " << "ignoring all but the first three params" << std::endl;
|
|
}
|
|
#ifdef USE_MPI
|
|
size_t dim;
|
|
if (std::string(argv[1]) == "1D") {
|
|
dim = 1;
|
|
} else if (std::string(argv[1]) == "2D") {
|
|
dim = 2;
|
|
} else {
|
|
std::cerr << "error: Parsing arg 1 <dim> failed" << std::endl;
|
|
return -1;
|
|
}
|
|
#endif
|
|
size_t resolution;
|
|
if (!(std::istringstream(argv[2]) >> resolution)) {
|
|
std::cerr << "error: Parsing arg 2 <resolution> failed" << std::endl;
|
|
return -1;
|
|
}
|
|
size_t iterations;
|
|
if (!(std::istringstream(argv[3]) >> iterations)) {
|
|
std::cerr << "error: Parsing arg 3 <iterations> failed" << std::endl;
|
|
return -1;
|
|
}
|
|
if (resolution < 10 || resolution > 65536) {
|
|
std::cerr << "error: arg 1 <resolution> " << resolution
|
|
<< " out of domain [10, 65536]" << std::endl;
|
|
return -1;
|
|
}
|
|
if (iterations > 65536) {
|
|
std::cerr << "error: arg 2 <iterations> " << iterations
|
|
<< "out of domain [0, 65536]" << std::endl;
|
|
return -1;
|
|
}
|
|
|
|
// Report configuration
|
|
if (mpi_world_rank == 0) {
|
|
std::cout << std::setfill(' ')
|
|
#ifdef USE_MPI
|
|
<< "use MPI: YES\n"
|
|
#else
|
|
<< "use MPI: NO\n"
|
|
#endif
|
|
<< "nr. processes: " << std::setw(5) << mpi_size << '\n'
|
|
<< "resolution: " << std::setw(5) << resolution << '\n'
|
|
<< "iterations: " << std::setw(5) << iterations << std::endl;
|
|
}
|
|
|
|
/************************* Start Time Measurement *************************/
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
|
|
/**************** Setup (local) PDE + Boundary Conditions *****************/
|
|
const double k = 2 * M_PI;
|
|
const double h = 1.0 / static_cast<double>(resolution - 1);
|
|
|
|
#ifdef USE_MPI
|
|
// Group processes into a Cartesian communication topology. Set initial
|
|
// values for a 1D grid.
|
|
int mpi_dims[2] = {mpi_size, 1};
|
|
// In case of a 2D grid, make equal partitions ob both axes (as equal as
|
|
// possible. Note that `MPI_Dims_create` does not guarantee "as equal as".
|
|
// For example it was observed that for 9 processes the generated grid
|
|
// was a 9 x 1, the following computes a 3 x 3 decomposition).
|
|
if (dim == 2) {
|
|
two_factors(mpi_size, mpi_dims);
|
|
}
|
|
|
|
// Report grid topology
|
|
if (mpi_world_rank == 0) {
|
|
std::cout << "topology: " << dim << "D";
|
|
if (dim == 2) {
|
|
std::cout << " (" << mpi_dims[0] << " x " << mpi_dims[1] << ")";
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
// Setup a Cartesian topology communicator (NON-cyclic)
|
|
const int mpi_periods[2] = {false, false};
|
|
MPI_Comm mpi_comm_grid;
|
|
MPI_Cart_create(
|
|
MPI_COMM_WORLD, // Old Communicator
|
|
2, // number of dimensions
|
|
mpi_dims, // grid dimensions
|
|
mpi_periods, // grid periodicity
|
|
true, // allow process reordering
|
|
&mpi_comm_grid
|
|
);
|
|
|
|
// Get MPI rank with respect to the grid communicator
|
|
int mpi_grid_rank; /*< MPI grid rank (a.k.a. grid process ID) */
|
|
MPI_Comm_rank(mpi_comm_grid, &mpi_grid_rank);
|
|
|
|
// Get coordinates with respect to the grid communicator
|
|
int mpi_coords[2];
|
|
MPI_Cart_coords(mpi_comm_grid, mpi_grid_rank, 2, mpi_coords);
|
|
|
|
// Get direct neighbors in the communication grid
|
|
struct { int north; int east; int south; int west; } mpi_neighbors;
|
|
// Get X-direction (dim 0) neighbors
|
|
MPI_Cart_shift(
|
|
mpi_comm_grid, // grid communicator
|
|
0, // axis index (0 <-> X)
|
|
1, // offset
|
|
&(mpi_neighbors.west), // negated offset neighbor
|
|
&(mpi_neighbors.east) // offset neighbor
|
|
);
|
|
// Get Y-direction (dim 1) neighbors
|
|
MPI_Cart_shift(
|
|
mpi_comm_grid,
|
|
1, // axis index (1 <-> Y)
|
|
1,
|
|
&(mpi_neighbors.south),
|
|
&(mpi_neighbors.north)
|
|
);
|
|
|
|
// Calculate local (base) grid size (without ghost layers)
|
|
size_t nx = partition(resolution, mpi_dims[0], mpi_coords[0]);
|
|
size_t ny = partition(resolution, mpi_dims[1], mpi_coords[1]);
|
|
// Add ghost layers for each (existing) neighbor
|
|
ny += (mpi_neighbors.north != MPI_PROC_NULL);
|
|
nx += (mpi_neighbors.east != MPI_PROC_NULL);
|
|
ny += (mpi_neighbors.south != MPI_PROC_NULL);
|
|
nx += (mpi_neighbors.west != MPI_PROC_NULL);
|
|
|
|
// Compute local domain [xmin, xmax] x [ymin, ymax]
|
|
double xmin = (mpi_neighbors.west == MPI_PROC_NULL) ? 0.0
|
|
: h * (partition_sum(resolution, mpi_dims[0], mpi_coords[0] - 1) - 1);
|
|
double xmax = (mpi_neighbors.east == MPI_PROC_NULL) ? 1.0
|
|
: h * partition_sum(resolution, mpi_dims[0], mpi_coords[0]);
|
|
double ymin = (mpi_neighbors.south == MPI_PROC_NULL) ? 0.0
|
|
: h * (partition_sum(resolution, mpi_dims[1], mpi_coords[1] - 1) - 1);
|
|
double ymax = (mpi_neighbors.north == MPI_PROC_NULL) ? 1.0
|
|
: h * partition_sum(resolution, mpi_dims[1], mpi_coords[1]);
|
|
|
|
// Create MPI vector Type (for boundary condition exchange)
|
|
// Allows directly exchange of matrix rows (north/south bounds) since the
|
|
// row elements are "sparse" in the sense that they are not directly aside
|
|
// each other in memory (column major matrix layout) in contrast to columns.
|
|
MPI_Datatype mpi_type_row;
|
|
MPI_Type_vector(ny, 1, nx, MPI_DOUBLE, &mpi_type_row);
|
|
MPI_Type_commit(&mpi_type_row);
|
|
#else
|
|
// discretization grid resolution
|
|
size_t nx = resolution, ny = resolution;
|
|
// PDE domain borders [xmin, xmax] x [ymin, ymax] = [0, 1] x [0, 1]
|
|
double xmin = 0.0, xmax = 1.0, ymin = 0.0, ymax = 1.0;
|
|
#endif
|
|
|
|
// Declare right hand side function f(x, y) = k^2 sin(2 pi x) sinh(2 pi y)
|
|
std::function<double(double, double)> fun = [k](double x, double y) {
|
|
return k * k * sin(2 * M_PI * x) * sinh(2 * M_PI * y);
|
|
};
|
|
// Boundary conditions
|
|
/** North boundary condition gN(x) = k^2 sin(2 pi x) sinh(2 pi) */
|
|
std::function<double(double)> gN;
|
|
#ifdef USE_MPI
|
|
// Check if local north boundary is part of the global north boundary
|
|
if (mpi_neighbors.north == MPI_PROC_NULL) {
|
|
#endif
|
|
// The local north boundary is equals the global north boundary
|
|
gN = [k](double x) { return sin(2 * M_PI * x) * sinh(2 * M_PI); };
|
|
#ifdef USE_MPI
|
|
} else {
|
|
// local north boundary is zero like all the rest
|
|
gN = [k](double) { return 0.0; };
|
|
}
|
|
#endif
|
|
/** East, South and West boundary conditions are all 0 */
|
|
std::function<double(double)> g0 = [k](double) { return 0.0; };
|
|
|
|
/******************************* Solve PDE ********************************/
|
|
// Instantiate solver (local instance)
|
|
Solver solver(nx, ny, xmin, xmax, ymin, ymax, h, k, fun, gN, g0, g0, g0);
|
|
|
|
// Run solver iterations
|
|
for (size_t iter = 0; iter < iterations; ++iter) {
|
|
// Perform a single stencil Jacobi iteration
|
|
solver.iterate();
|
|
|
|
#ifdef USE_MPI
|
|
// Non-blocking send boundary conditions to all neighbors
|
|
MPI_Request mpi_requests[4];
|
|
int mpi_request_count = 0;
|
|
|
|
if (mpi_neighbors.north != MPI_PROC_NULL) {
|
|
auto bound = solver.read_boundary(Solver::Dir::North);
|
|
MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
|
|
mpi_neighbors.north, iter, mpi_comm_grid,
|
|
&mpi_requests[mpi_request_count++]);
|
|
}
|
|
if (mpi_neighbors.east != MPI_PROC_NULL) {
|
|
auto bound = solver.read_boundary(Solver::Dir::East);
|
|
MPI_Isend(bound.data(), 1, mpi_type_row,
|
|
mpi_neighbors.east, iter, mpi_comm_grid,
|
|
&mpi_requests[mpi_request_count++]);
|
|
}
|
|
if (mpi_neighbors.south != MPI_PROC_NULL) {
|
|
auto bound = solver.read_boundary(Solver::Dir::South);
|
|
MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE,
|
|
mpi_neighbors.south, iter, mpi_comm_grid,
|
|
&mpi_requests[mpi_request_count++]);
|
|
}
|
|
if (mpi_neighbors.west != MPI_PROC_NULL) {
|
|
auto bound = solver.read_boundary(Solver::Dir::West);
|
|
MPI_Isend(bound.data(), 1, mpi_type_row,
|
|
mpi_neighbors.west, iter, mpi_comm_grid,
|
|
&mpi_requests[mpi_request_count++]);
|
|
}
|
|
|
|
// Get new boundary conditions using a blocking receive
|
|
MPI_Status mpi_status;
|
|
if (mpi_neighbors.north != MPI_PROC_NULL) {
|
|
auto bound = solver.write_boundary(Solver::Dir::North);
|
|
MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
|
|
mpi_neighbors.north, iter, mpi_comm_grid, &mpi_status);
|
|
}
|
|
if (mpi_neighbors.east != MPI_PROC_NULL) {
|
|
auto bound = solver.write_boundary(Solver::Dir::East);
|
|
MPI_Recv(bound.data(), 1, mpi_type_row,
|
|
mpi_neighbors.east, iter, mpi_comm_grid, &mpi_status);
|
|
}
|
|
if (mpi_neighbors.south != MPI_PROC_NULL) {
|
|
auto bound = solver.write_boundary(Solver::Dir::South);
|
|
MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE,
|
|
mpi_neighbors.south, iter, mpi_comm_grid, &mpi_status);
|
|
}
|
|
if (mpi_neighbors.west != MPI_PROC_NULL) {
|
|
auto bound = solver.write_boundary(Solver::Dir::West);
|
|
MPI_Recv(bound.data(), 1, mpi_type_row,
|
|
mpi_neighbors.west, iter, mpi_comm_grid, &mpi_status);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************** Solution Stats *****************************/
|
|
// Get solution
|
|
Matrix<double>& solution = solver.solution();
|
|
|
|
// Compute analytic (true) solution
|
|
Matrix<double> analytic(solution.nrow(), solution.ncol());
|
|
for (size_t j = 0; j < analytic.ncol(); ++j) {
|
|
for (size_t i = 0; i < analytic.nrow(); ++i) {
|
|
analytic(i, j) = sin(2 * M_PI * solver.X(i)) * sinh(2 * M_PI * solver.Y(j));
|
|
}
|
|
}
|
|
|
|
#ifdef USE_MPI
|
|
// Frobenius Norm Accumulator
|
|
double frob_norm_sendbuf[2] = {
|
|
solver.resid_norm(), solution.dist(analytic, 1, 1, 1, 1)
|
|
};
|
|
// Square the norms (Accumulation of partial results is the square root of
|
|
// the sum of the squared partial norms)
|
|
frob_norm_sendbuf[0] = frob_norm_sendbuf[0] * frob_norm_sendbuf[0];
|
|
frob_norm_sendbuf[1] = frob_norm_sendbuf[1] * frob_norm_sendbuf[1];
|
|
// Maximum Norm Accumulator
|
|
double max_norm_sendbuf[2] = {
|
|
solver.resid_norm<Max>(), solution.dist<Max>(analytic, 1, 1, 1, 1)
|
|
};
|
|
|
|
// Global result reduction buffers
|
|
double frob_norm_recvbuf[2], max_norm_recvbuf[2];
|
|
|
|
// Accumulate global stats by sum/max reduction of local stats.
|
|
MPI_Reduce(frob_norm_sendbuf, frob_norm_recvbuf, 2, MPI_DOUBLE, MPI_SUM,
|
|
0, MPI_COMM_WORLD);
|
|
MPI_Reduce(max_norm_sendbuf, max_norm_recvbuf, 2, MPI_DOUBLE, MPI_MAX,
|
|
0, MPI_COMM_WORLD);
|
|
|
|
// Finish by taking the square root of the accumulated global
|
|
// squared frobenius norms
|
|
frob_norm_recvbuf[0] = std::sqrt(frob_norm_recvbuf[0]);
|
|
frob_norm_recvbuf[1] = std::sqrt(frob_norm_recvbuf[1]);
|
|
#else
|
|
double frob_norm_recvbuf[2] = {
|
|
solver.resid_norm(), solution.dist(analytic, 1, 1, 1, 1)
|
|
};
|
|
double max_norm_recvbuf[2] = {
|
|
solver.resid_norm<Max>(), solution.dist<Max>(analytic, 1, 1, 1, 1)
|
|
};
|
|
#endif
|
|
|
|
// calculate run-time
|
|
auto stop = std::chrono::high_resolution_clock::now();
|
|
auto time = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start)
|
|
.count();
|
|
|
|
// Report global stats (from "root" only)
|
|
if (mpi_world_rank == 0) {
|
|
std::cout
|
|
<< "Time [sec]: " << std::setw(15) << time
|
|
<< "\n|| residuals ||_2: " << std::setw(15) << frob_norm_recvbuf[0]
|
|
<< "\n|| residuals ||_inf: " << std::setw(15) << max_norm_recvbuf[0]
|
|
<< "\n|| error ||_2: " << std::setw(15) << frob_norm_recvbuf[1]
|
|
<< "\n|| error ||_inf: " << std::setw(15) << max_norm_recvbuf[1]
|
|
<< std::endl;
|
|
}
|
|
|
|
// MPI shutdown/cleanup
|
|
#ifdef USE_MPI
|
|
MPI_Finalize();
|
|
#endif
|
|
|
|
return 0;
|
|
}
|