/** * Implementation of an MPI_Parallel Stencil-Based Jacobi Solver for the 2D PDE * * -Du(x, y) + k^2 u(x, y) = f(x, y), (x, y) in [0, 1] x [0, 1] * * with the scalar k = 2 pi. The right hand side is * * f(x, y) = k^2 sin(2 pi x) sinh(2 pi y) * * and the Dirichlet boundary conditions * * u(0, y) = u(1, y) = u(x, 0) = 0, x, y in [0, 1] * u(x, 1) = sin(2 pi x) sinh(2 pi), x in [0, 1] * * The following programm is implemented such that it can be compiled in seriel * or MPI-parallel form by declaring the `USE_MPI` macro (see `Makefile`). */ #define _USE_MATH_DEFINES /* enables math constants from `cmath` */ #include #include #include #include #include #include #include #ifdef USE_MPI #include #endif #include "Matrix.h" #include "Solver.h" #include "utils.h" int main(int argn, char* argv[]) { /******************************* MPI Setup ********************************/ #ifdef USE_MPI // Initialize MPI MPI_Init(nullptr, nullptr); // Get MPI configure int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */ MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); // Get MPI rank int mpi_world_rank; /*< MPI world rank (a.k.a. grid process ID) */ MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank); #else int mpi_size = 1; int mpi_world_rank = 0; #endif /**************************** Parse Arguments *****************************/ if (0 < argn && argn < 4) { std::cerr << "usage: " << argv[0] << " " << std::endl; return -1; } else if (argn > 4) { std::cerr << "warning: " << "ignoring all but the first three params" << std::endl; } #ifdef USE_MPI size_t dim; if (std::string(argv[1]) == "1D") { dim = 1; } else if (std::string(argv[1]) == "2D") { dim = 2; } else { std::cerr << "error: Parsing arg 1 failed" << std::endl; return -1; } #endif size_t resolution; if (!(std::istringstream(argv[2]) >> resolution)) { std::cerr << "error: Parsing arg 2 failed" << std::endl; return -1; } size_t iterations; if (!(std::istringstream(argv[3]) >> iterations)) { std::cerr << "error: Parsing arg 3 failed" << std::endl; return -1; } if (resolution < 10 || resolution > 65536) { std::cerr << "error: arg 1 " << resolution << " out of domain [10, 65536]" << std::endl; return -1; } if (iterations > 65536) { std::cerr << "error: arg 2 " << iterations << "out of domain [0, 65536]" << std::endl; return -1; } // Report configuration if (mpi_world_rank == 0) { std::cout << std::setfill(' ') #ifdef USE_MPI << "use MPI: YES\n" #else << "use MPI: NO\n" #endif << "nr. processes: " << std::setw(5) << mpi_size << '\n' << "resolution: " << std::setw(5) << resolution << '\n' << "iterations: " << std::setw(5) << iterations << std::endl; } /************************* Start Time Measurement *************************/ auto start = std::chrono::high_resolution_clock::now(); /**************** Setup (local) PDE + Boundary Conditions *****************/ const double k = 2 * M_PI; const double h = 1.0 / static_cast(resolution - 1); #ifdef USE_MPI // Group processes into a Cartesian communication topology. Set initial // values for a 1D grid. int mpi_dims[2] = {mpi_size, 1}; // In case of a 2D grid, make equal partitions ob both axes (as equal as // possible. Note that `MPI_Dims_create` does not guarantee "as equal as". // For example it was observed that for 9 processes the generated grid // was a 9 x 1, the following computes a 3 x 3 decomposition). if (dim == 2) { two_factors(mpi_size, mpi_dims); } // Report grid topology if (mpi_world_rank == 0) { std::cout << "topology: " << dim << "D"; if (dim == 2) { std::cout << " (" << mpi_dims[0] << " x " << mpi_dims[1] << ")"; } std::cout << std::endl; } // Setup a Cartesian topology communicator (NON-cyclic) const int mpi_periods[2] = {false, false}; MPI_Comm mpi_comm_grid; MPI_Cart_create( MPI_COMM_WORLD, // Old Communicator 2, // number of dimensions mpi_dims, // grid dimensions mpi_periods, // grid periodicity true, // allow process reordering &mpi_comm_grid ); // Get MPI rank with respect to the grid communicator int mpi_grid_rank; /*< MPI grid rank (a.k.a. grid process ID) */ MPI_Comm_rank(mpi_comm_grid, &mpi_grid_rank); // Get coordinates with respect to the grid communicator int mpi_coords[2]; MPI_Cart_coords(mpi_comm_grid, mpi_grid_rank, 2, mpi_coords); // Get direct neighbors in the communication grid struct { int north; int east; int south; int west; } mpi_neighbors; // Get X-direction (dim 0) neighbors MPI_Cart_shift( mpi_comm_grid, // grid communicator 0, // axis index (0 <-> X) 1, // offset &(mpi_neighbors.west), // negated offset neighbor &(mpi_neighbors.east) // offset neighbor ); // Get Y-direction (dim 1) neighbors MPI_Cart_shift( mpi_comm_grid, 1, // axis index (1 <-> Y) 1, &(mpi_neighbors.south), &(mpi_neighbors.north) ); // Calculate local (base) grid size (without ghost layers) size_t nx = partition(resolution, mpi_dims[0], mpi_coords[0]); size_t ny = partition(resolution, mpi_dims[1], mpi_coords[1]); // Add ghost layers for each (existing) neighbor ny += (mpi_neighbors.north != MPI_PROC_NULL); nx += (mpi_neighbors.east != MPI_PROC_NULL); ny += (mpi_neighbors.south != MPI_PROC_NULL); nx += (mpi_neighbors.west != MPI_PROC_NULL); // Compute local domain [xmin, xmax] x [ymin, ymax] double xmin = (mpi_neighbors.west == MPI_PROC_NULL) ? 0.0 : h * (partition_sum(resolution, mpi_dims[0], mpi_coords[0] - 1) - 1); double xmax = (mpi_neighbors.east == MPI_PROC_NULL) ? 1.0 : h * partition_sum(resolution, mpi_dims[0], mpi_coords[0]); double ymin = (mpi_neighbors.south == MPI_PROC_NULL) ? 0.0 : h * (partition_sum(resolution, mpi_dims[1], mpi_coords[1] - 1) - 1); double ymax = (mpi_neighbors.north == MPI_PROC_NULL) ? 1.0 : h * partition_sum(resolution, mpi_dims[1], mpi_coords[1]); // Create MPI vector Type (for boundary condition exchange) // Allows directly exchange of matrix rows (north/south bounds) since the // row elements are "sparse" in the sense that they are not directly aside // each other in memory (column major matrix layout) in contrast to columns. MPI_Datatype mpi_type_row; MPI_Type_vector(ny, 1, nx, MPI_DOUBLE, &mpi_type_row); MPI_Type_commit(&mpi_type_row); #else // discretization grid resolution size_t nx = resolution, ny = resolution; // PDE domain borders [xmin, xmax] x [ymin, ymax] = [0, 1] x [0, 1] double xmin = 0.0, xmax = 1.0, ymin = 0.0, ymax = 1.0; #endif // Declare right hand side function f(x, y) = k^2 sin(2 pi x) sinh(2 pi y) std::function fun = [k](double x, double y) { return k * k * sin(2 * M_PI * x) * sinh(2 * M_PI * y); }; // Boundary conditions /** North boundary condition gN(x) = k^2 sin(2 pi x) sinh(2 pi) */ std::function gN; #ifdef USE_MPI // Check if local north boundary is part of the global north boundary if (mpi_neighbors.north == MPI_PROC_NULL) { #endif // The local north boundary is equals the global north boundary gN = [k](double x) { return sin(2 * M_PI * x) * sinh(2 * M_PI); }; #ifdef USE_MPI } else { // local north boundary is zero like all the rest gN = [k](double) { return 0.0; }; } #endif /** East, South and West boundary conditions are all 0 */ std::function g0 = [k](double) { return 0.0; }; /******************************* Solve PDE ********************************/ // Instantiate solver (local instance) Solver solver(nx, ny, xmin, xmax, ymin, ymax, h, k, fun, gN, g0, g0, g0); // Run solver iterations for (size_t iter = 0; iter < iterations; ++iter) { // Perform a single stencil Jacobi iteration solver.iterate(); #ifdef USE_MPI // Non-blocking send boundary conditions to all neighbors MPI_Request mpi_requests[4]; int mpi_request_count = 0; if (mpi_neighbors.north != MPI_PROC_NULL) { auto bound = solver.read_boundary(Solver::Dir::North); MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE, mpi_neighbors.north, iter, mpi_comm_grid, &mpi_requests[mpi_request_count++]); } if (mpi_neighbors.east != MPI_PROC_NULL) { auto bound = solver.read_boundary(Solver::Dir::East); MPI_Isend(bound.data(), 1, mpi_type_row, mpi_neighbors.east, iter, mpi_comm_grid, &mpi_requests[mpi_request_count++]); } if (mpi_neighbors.south != MPI_PROC_NULL) { auto bound = solver.read_boundary(Solver::Dir::South); MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE, mpi_neighbors.south, iter, mpi_comm_grid, &mpi_requests[mpi_request_count++]); } if (mpi_neighbors.west != MPI_PROC_NULL) { auto bound = solver.read_boundary(Solver::Dir::West); MPI_Isend(bound.data(), 1, mpi_type_row, mpi_neighbors.west, iter, mpi_comm_grid, &mpi_requests[mpi_request_count++]); } // Get new boundary conditions using a blocking receive MPI_Status mpi_status; if (mpi_neighbors.north != MPI_PROC_NULL) { auto bound = solver.write_boundary(Solver::Dir::North); MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE, mpi_neighbors.north, iter, mpi_comm_grid, &mpi_status); } if (mpi_neighbors.east != MPI_PROC_NULL) { auto bound = solver.write_boundary(Solver::Dir::East); MPI_Recv(bound.data(), 1, mpi_type_row, mpi_neighbors.east, iter, mpi_comm_grid, &mpi_status); } if (mpi_neighbors.south != MPI_PROC_NULL) { auto bound = solver.write_boundary(Solver::Dir::South); MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE, mpi_neighbors.south, iter, mpi_comm_grid, &mpi_status); } if (mpi_neighbors.west != MPI_PROC_NULL) { auto bound = solver.write_boundary(Solver::Dir::West); MPI_Recv(bound.data(), 1, mpi_type_row, mpi_neighbors.west, iter, mpi_comm_grid, &mpi_status); } #endif } /***************************** Solution Stats *****************************/ // Get solution Matrix& solution = solver.solution(); // Compute analytic (true) solution Matrix analytic(solution.nrow(), solution.ncol()); for (size_t j = 0; j < analytic.ncol(); ++j) { for (size_t i = 0; i < analytic.nrow(); ++i) { analytic(i, j) = sin(2 * M_PI * solver.X(i)) * sinh(2 * M_PI * solver.Y(j)); } } #ifdef USE_MPI // Frobenius Norm Accumulator double frob_norm_sendbuf[2] = { solver.resid_norm(), solution.dist(analytic, 1, 1, 1, 1) }; // Square the norms (Accumulation of partial results is the square root of // the sum of the squared partial norms) frob_norm_sendbuf[0] = frob_norm_sendbuf[0] * frob_norm_sendbuf[0]; frob_norm_sendbuf[1] = frob_norm_sendbuf[1] * frob_norm_sendbuf[1]; // Maximum Norm Accumulator double max_norm_sendbuf[2] = { solver.resid_norm(), solution.dist(analytic, 1, 1, 1, 1) }; // Global result reduction buffers double frob_norm_recvbuf[2], max_norm_recvbuf[2]; // Accumulate global stats by sum/max reduction of local stats. MPI_Reduce(frob_norm_sendbuf, frob_norm_recvbuf, 2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(max_norm_sendbuf, max_norm_recvbuf, 2, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); // Finish by taking the square root of the accumulated global // squared frobenius norms frob_norm_recvbuf[0] = std::sqrt(frob_norm_recvbuf[0]); frob_norm_recvbuf[1] = std::sqrt(frob_norm_recvbuf[1]); #else double frob_norm_recvbuf[2] = { solver.resid_norm(), solution.dist(analytic, 1, 1, 1, 1) }; double max_norm_recvbuf[2] = { solver.resid_norm(), solution.dist(analytic, 1, 1, 1, 1) }; #endif // calculate run-time auto stop = std::chrono::high_resolution_clock::now(); auto time = std::chrono::duration_cast>(stop - start) .count(); // Report global stats (from "root" only) if (mpi_world_rank == 0) { std::cout << "Time [sec]: " << std::setw(15) << time << "\n|| residuals ||_2: " << std::setw(15) << frob_norm_recvbuf[0] << "\n|| residuals ||_inf: " << std::setw(15) << max_norm_recvbuf[0] << "\n|| error ||_2: " << std::setw(15) << frob_norm_recvbuf[1] << "\n|| error ||_inf: " << std::setw(15) << max_norm_recvbuf[1] << std::endl; } // MPI shutdown/cleanup #ifdef USE_MPI MPI_Finalize(); #endif return 0; }