From 3b07e2cd5522a550f75bdc5483fb9a60ab7cebb3 Mon Sep 17 00:00:00 2001 From: daniel Date: Sat, 12 Mar 2022 17:09:57 +0100 Subject: [PATCH] add: ex_01 domain to solver, add: MPI examples --- Exercise_01/Makefile | 3 +- Exercise_01/Matrix.h | 31 ++- Exercise_01/Solver.h | 27 ++- Exercise_01/examples/MPI_Cart_coords.cpp | 91 +++++++ Exercise_01/examples/MPI_RowSums.cpp | 288 +++++++++++++++++++++++ Exercise_01/main.cpp | 35 +-- 6 files changed, 437 insertions(+), 38 deletions(-) create mode 100644 Exercise_01/examples/MPI_Cart_coords.cpp create mode 100644 Exercise_01/examples/MPI_RowSums.cpp diff --git a/Exercise_01/Makefile b/Exercise_01/Makefile index d928f75..19dbdbb 100644 --- a/Exercise_01/Makefile +++ b/Exercise_01/Makefile @@ -12,9 +12,8 @@ mpi: main.cpp Matrix.h Solver.h all: seriel mpi -test: seriel mpi +test: seriel ./$(OUT)_seriel 120 10 - mpirun -n 4 ./$(OUT)_mpi 120 10 clean: rm -f *.o main $(OUT)_seriel $(OUT)_mpi diff --git a/Exercise_01/Matrix.h b/Exercise_01/Matrix.h index f6d98a3..a8ecc93 100644 --- a/Exercise_01/Matrix.h +++ b/Exercise_01/Matrix.h @@ -2,6 +2,7 @@ #include #include +#include #include #define _USE_MATH_DEFINES /* enables math constants from cmath */ #include @@ -17,14 +18,28 @@ enum Norm { Frob, Max }; template class Matrix { public: - Matrix(size_t nrow, size_t ncol) : _nrow{nrow}, _ncol{ncol} { - _data.reserve(nrow * ncol); - }; - Matrix(size_t nrow, size_t ncol, T elem) : Matrix(nrow, ncol) { - for (size_t i = 0; i < nrow * ncol; ++i) { - _data[i] = elem; - } - }; + // Default Constructor + Matrix() = default; + // Creation Constructor (0 Matrix) + Matrix(size_t nrow, size_t ncol) : + _nrow{nrow}, + _ncol{ncol}, + _data(nrow * ncol) { }; + // Creation Constructor with default Element (for example all elements 1) + Matrix(size_t nrow, size_t ncol, const T& elem) : + _nrow{nrow}, + _ncol{ncol}, + _data(nrow * ncol, elem) { }; + // Copy Constructor + Matrix(const Matrix& A) : + _nrow{A._nrow}, + _ncol{A._ncol}, + _data(A._data) { }; + // // Move constructor // TODO: + // Matrix(Matrix&& A) : + // _nrow{A._nrow}, + // _ncol{A._ncol}, + // _data(std::move(A._data)) { }; size_t nrow() const { return _nrow; }; size_t ncol() const { return _ncol; }; diff --git a/Exercise_01/Solver.h b/Exercise_01/Solver.h index 62be119..0b964d8 100644 --- a/Exercise_01/Solver.h +++ b/Exercise_01/Solver.h @@ -16,7 +16,10 @@ struct Stencil { class Solver { public: - Solver(const size_t nx, const size_t ny, const double h, const double k, + Solver(const size_t nx, const size_t ny, + const double xmin, const double xmax, + const double ymin, const double ymax, + const double h, const double k, std::function fun, std::function gN, std::function gE, @@ -24,8 +27,9 @@ public: std::function gW ) : _iter{0}, - _nx{nx}, - _ny{ny}, + _nx{nx}, _ny{ny}, + _xmin{xmin}, _xmax{xmax}, + _ymin{ymin}, _ymax{ymax}, _stencil{ 4.0 / (h * h) + 4.0 * k * k, /* Center */ -1.0 / (h * h), /* Bottom */ -1.0 / (h * h), /* Left */ @@ -58,12 +62,14 @@ public: for (size_t y = 0; y < ny; ++y) { _sol(0, y) = _tmp(0, y) = gW(Y(y)); } }; - /* Maps x/y index to x/y position in the [0, 1] x [0, 1] domain */ + /* Maps x/y index to x/y position in the [xmin, xmax] x [ymin, ymax] domain */ double X(size_t x) const { - return static_cast(x) / static_cast(_nx - 1); + double ratio = static_cast(x) / static_cast(_nx - 1); + return _xmin + ratio * (_xmax - _xmin); } double Y(size_t y) const { - return static_cast(y) / static_cast(_ny - 1); + double ratio = static_cast(y) / static_cast(_ny - 1); + return _ymin + ratio * (_ymax - _ymin); } enum class Dir { @@ -84,6 +90,11 @@ public: } }; + /** Solution getter */ + Matrix& solution() { + return _sol; + } + /** * Performs a single Jacobian Iteration */ @@ -107,6 +118,10 @@ private: size_t _iter; /*< Iteration count */ const size_t _nx; /*< Number of X-axis grid points */ const size_t _ny; /*< Number of Y-axis grid points */ + const double _xmin; /*< Domain X-min (west border pos) */ + const double _xmax; /*< Domain X-max (east border pos) */ + const double _ymin; /*< Domain Y-min (south border pos) */ + const double _ymax; /*< Domain Y-max (north border pos) */ const Stencil _stencil; /*< Simple, + shaped stencil */ Matrix _rhs; /*< Grid evaluated RHS of the PDE = f(x, y) */ Matrix _sol; /*< Solution after _iter iterations */ diff --git a/Exercise_01/examples/MPI_Cart_coords.cpp b/Exercise_01/examples/MPI_Cart_coords.cpp new file mode 100644 index 0000000..a1cc195 --- /dev/null +++ b/Exercise_01/examples/MPI_Cart_coords.cpp @@ -0,0 +1,91 @@ + +#include +#include +#include + +/* Quick and dirty primality test */ +bool is_prime(int num) { + if (num == 2 || num == 3) { return true; } + if ((num % 2) == 0 || (num % 3) == 0) { return false; } + + for (int div = 5; div * div <= num; div += 6) { + if ((num % div) == 0 || ((num + 2) % div) == 0) { return false; } + } + + return true; +} + +int main(int argn, char* argv[]) { + + MPI_Init(nullptr, nullptr); + + int mpi_size; + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + + // Check if a 2D grid is (more or less) possible + if ((mpi_size > 2) && is_prime(mpi_size)) { + // Print warning only once (by rank 0) + int mpi_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + if (mpi_rank == 0) { + std::cout << "Warning: 2D grid degenerates to 1D." << std::endl; + } + } + + // Group processes into a cartesian communication topology + int mpi_dims[2] = {0, 0}; + MPI_Dims_create(mpi_size, 2, mpi_dims); + + // Setup a cartesian topology communicator (NON-cyclic) + const int mpi_periods[2] = {false, false}; + MPI_Comm mpi_comm_grid; + MPI_Cart_create( + MPI_COMM_WORLD, // Old Communicator + 2, // number of dimensions + mpi_dims, // grid dimensions + mpi_periods, // grid periodicity + true, // allow process reordering + &mpi_comm_grid + ); + + // Get rank with respect to the grid communicator + int mpi_rank; + MPI_Comm_rank(mpi_comm_grid, &mpi_rank); + + // Get coordinates with respect to the grid communicator + int mpi_coords[2]; + MPI_Cart_coords(mpi_comm_grid, mpi_rank, 2, mpi_coords); + + // Get direct neightbours in the communication grid + struct { int north; int east; int south; int west; } mpi_neighbours; + // Get X-direction (dim 0) neightbours + MPI_Cart_shift( + mpi_comm_grid, // grid communicator + 0, // axis index (0 <-> X) + 1, // offset + &(mpi_neighbours.west), // negated offset neightbour + &(mpi_neighbours.east) // offset neightbour + ); + // Get Y-direction (dim 1) neightbours + MPI_Cart_shift( + mpi_comm_grid, + 1, // axis index (1 <-> X) + 1, + &(mpi_neighbours.south), + &(mpi_neighbours.north) + ); + + // Print grid communicator location and with direct neighbors (offset +-1) + std::cout << "Rank " << mpi_rank << ": Coords (" + << mpi_coords[0] << ", " + << mpi_coords[1] << ") - Neighbours: "; + if (mpi_neighbours.west != MPI_PROC_NULL) { std::cout << mpi_neighbours.west << ' '; } + if (mpi_neighbours.east != MPI_PROC_NULL) { std::cout << mpi_neighbours.east << ' '; } + if (mpi_neighbours.south != MPI_PROC_NULL) { std::cout << mpi_neighbours.south << ' '; } + if (mpi_neighbours.north != MPI_PROC_NULL) { std::cout << mpi_neighbours.north << ' '; } + std::cout << std::endl; + + MPI_Finalize(); + + return 0; +} diff --git a/Exercise_01/examples/MPI_RowSums.cpp b/Exercise_01/examples/MPI_RowSums.cpp new file mode 100644 index 0000000..90e6315 --- /dev/null +++ b/Exercise_01/examples/MPI_RowSums.cpp @@ -0,0 +1,288 @@ +/** + * Computes row sums of a matrix containing (0-indexed) consecutive numbers + * in row major order (yes, useless but illustrativ) + * + * Example: + * matrix (11 x 7): row_sums (11): + * 0 11 22 33 44 55 66 -> 231 + * 1 12 23 34 45 56 67 -> 238 + * 2 13 24 35 46 57 68 -> 245 + * 3 14 25 36 47 58 69 -> 252 + * 4 15 26 37 48 59 70 -> 259 + * 5 16 27 38 49 60 71 -> 266 + * 6 17 28 39 50 61 72 -> 273 + * 7 18 29 40 51 62 73 -> 280 + * 8 19 30 41 52 63 74 -> 287 + * 9 20 31 42 53 64 75 -> 294 + * 10 21 32 43 54 65 76 -> 301 + * + * Each worker process computes one row sum at a time. Therefore, the scheduler + * process (rank 0) sends one row to the worker (rank > 0) using a + * MPI_Type_vector representing a sparse row layout which is reseaved by the + * worker in a dense layout (MPI_DOUBLE) + * + * Send/Recv Example: + * Data send (entries are the array indices): + * - - - - - - - + * - - - - - - - + * - - - - - - - + * - - - - - - - + * - - - - - - - + * 5 16 27 38 49 60 71 + * - - - - - - - + * - - - - - - - + * - - - - - - - + * - - - - - - - + * - - - - - - - + * + * Receved (indices 0, 1, 2, 3, 4, 5, 6): + * 5 16 27 38 49 60 71 + * + * Usage: + * mpirun -n MPI_RowSums [ []] + * + * must be at least 2 + * number of rows, between 1 and 1024 defaults to 11 + * number of cols, between 1 and 1024 defaults to 7 + * + * on parse error; , are set to there defaults. + * + * Compile: + * mpic++ -Wall -Wpedantic -pedantic MPI_RowSums.cpp -o MPI_RowSums + * + * Interesting Parameters: + * # Single Worker Process + * mpirun -n 2 MPI_RowSums 1 10 + * mpirun -n 2 MPI_RowSums 20 10 + * # Less Rows than workers (some processes don't get any work at all) + * mpirun -n 4 MPI_RowSums 2 10 + * # Classic Example (bunch of work and some workers) + * mpirun -n 4 MPI_RowSums 100 42 + */ + +#include +#include +#include + +int min(int a, int b) { return a < b ? a : b; } + +int main(int argn, char* argv[]) { + + // Build a (simple and barebones, row major) matrix model, it's just a + // vector with external row/col count. + int nrow = 11; // defaults + int ncol = 7; // defaults + + // Parse arguments to set nrow, ncol (sloppy, but thats not the point of + // this example) + if (argn > 1) { + nrow = atoi(argv[1]); + if (nrow < 1 || nrow > 1024) { + nrow = 11; + } + } + if (argn > 2) { + ncol = atoi(argv[2]); + if (ncol < 1 || ncol > 1024) { + ncol = 7; + } + } + + // Initialize MPI (always required) + MPI_Init(nullptr, nullptr); + + // Allocate MPI Settings + int mpi_size; /*< Number of processes */ + int mpi_rank; /*< This process rank (a.k.a. the MPI process ID) */ + // Set/Get MPI Settings + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + + // Check if there is at least a single worker, otherwise abort right away + if (mpi_size < 2) { + std::cerr + << "Nr. processes must be at least 2! (No workers -> No work done)" + << std::endl; + MPI_Finalize(); + return -1; + } + + // Setup shutdown tag (tags must be non-negative) + int shutdown = nrow + 1; // unreachable row index + + // Sparce row data type, elements are strided in the matrix + MPI_Datatype mpi_type_row; + MPI_Type_vector(ncol, 1, nrow, MPI_DOUBLE, &mpi_type_row); + MPI_Type_commit(&mpi_type_row); + + // Distinguish between workers (rank > 0) and scheduler (rank = 0) + if (mpi_rank == 0) { + // Create row sums result array + std::vector row_sums(nrow); + + // Construct a nrow x ncol matrix (and enumerate elems) + std::vector matrix(nrow * ncol); + for (size_t i = 0; i < matrix.size(); ++i) { + matrix[i] = static_cast(i); + } + + // tracks processed rows + int row_counter = 0; + + // Start by sending to all workers some data + for (int rank = 1; rank < min(mpi_size, nrow + 1); ++rank) { + // Send rows + MPI_Send( + matrix.data() + row_counter, // Pos of first row elem + 1, // Send one row + mpi_type_row, // row datatype, (sparce layout) + rank, // target worker process + row_counter, // tag = row index + MPI_COMM_WORLD + ); + + // Increment processed row count + row_counter++; + } + + // In case of less work than workers (nrow < mpi_size) send remaining + // workers home (all ranks with mpi_rank >= nrow get shutdown signal) + for (int rank = min(mpi_size, nrow + 1); rank < mpi_size; ++rank) { + // Empty workload with shutdown tag + MPI_Send( + nullptr, // no data + 0, // no data + MPI_CHAR, // something + rank, // ranks without work + shutdown, // tag + MPI_COMM_WORLD + ); + } + + // Repeat till all rows are processed + while (row_counter < nrow) { + double row_sum; + + // First listen for any worker process to respond (rank finished) + MPI_Status mpi_status; + MPI_Recv( + &row_sum, // responding rank result + 1, // row sum is a scalar + MPI_DOUBLE, // and has type double + MPI_ANY_SOURCE, // listen for everything + MPI_ANY_TAG, // unknown who finishes first + MPI_COMM_WORLD, + &mpi_status + ); + + // Write result to row sums + row_sums[mpi_status.MPI_TAG] = row_sum; + + // Send the next row to process + MPI_Send( + matrix.data() + row_counter, + 1, + mpi_type_row, + mpi_status.MPI_SOURCE, // responding rank gets new work + row_counter, + MPI_COMM_WORLD + ); + + // Increment processed row count + row_counter++; + } + + // Now collect remaining results and send a "shutdown" message + for (int rank = 1; rank < min(mpi_size, nrow + 1); ++rank) { + double row_sum; + + // First listen for any rank to respond (a rank finished working) + MPI_Status mpi_status; + MPI_Recv( + &row_sum, + 1, + MPI_DOUBLE, + MPI_ANY_SOURCE, + MPI_ANY_TAG, + MPI_COMM_WORLD, + &mpi_status + ); + + // Write result to row sums + row_sums[mpi_status.MPI_TAG] = row_sum; + + // Send rank shutdown message (work done) + MPI_Send( + nullptr, // no data + 0, // no data + MPI_CHAR, // something + mpi_status.MPI_SOURCE, // responding rank gets shutdown + shutdown, // tag + MPI_COMM_WORLD + ); + } + + // Report final result (row sums) + std::cout << "Rank 0: Done.\n\033[1m"; + for (double& val : row_sums) { + std::cout << val << ' '; + } + std::cout << "\033[0m\nCheck result with the following R code:\n" + << " rowSums(matrix(seq(0, len = " << (nrow * ncol) << "), " + << nrow << ", " << ncol << "))" << std::endl; + } else { + // Dense row representation, NO stride + std::vector row(ncol); + // Counts the number of processed rows (for analytic purposes) + int work_count = 0; + + // Repeate till a shutdown signal is send (shutdown tag) + while (true) { + + // Receive new work + MPI_Status mpi_status; + MPI_Recv( + row.data(), // Raw row vector data + ncol, // nr of row elments + MPI_DOUBLE, // simple double data type (dense layout) + 0, // Listen to main rank (rank 0) + MPI_ANY_TAG, // tag (at this point) unknown + MPI_COMM_WORLD, + &mpi_status + ); + + // check shutdown -> go home, all work done + if (mpi_status.MPI_TAG == shutdown) { + break; + } + + // Process new work (compute row sum) + double sum = 0; + for (double& val : row) { + sum += val; + } + + // Increment work count (track number of completed jobs) + work_count++; + + // Send result back to scheduler + MPI_Send( + &sum, // processing result + 1, // single scalar + MPI_DOUBLE, + 0, // target scheduler (rank 0) + mpi_status.MPI_TAG, // row index + MPI_COMM_WORLD + ); + } + + // Report shutdown (worker goes home) + std::cout << "Rank " << mpi_rank << ": processed " + << work_count << " rows done -> shutdown" << std::endl; + } + + // Shutdown MPI (always required) + MPI_Finalize(); + + return 0; +} diff --git a/Exercise_01/main.cpp b/Exercise_01/main.cpp index 8fe53a3..32e6208 100644 --- a/Exercise_01/main.cpp +++ b/Exercise_01/main.cpp @@ -62,33 +62,24 @@ int main(int argn, char* argv[]) { /** East, South and West boundary conditions are simply = 0 */ std::function g0 = [k](double) { return 0.0; }; - // Instanciate solver - Solver solver(nx, ny, h, k, fun, gN, g0, g0, g0); - - // Set Diriclet boundary conditions (East, South and West to 0) - for (auto dir : { Solver::Dir::E, Solver::Dir::S, Solver::Dir::W }) { - MatrixView boundary = solver.boundary(dir); - for (size_t i = 0; i < boundary.size(); ++i) { - boundary(i) = 0.0; - } - } - // The North boundary condition is g(x) = sin(2 pi x) sinh(2 pi) - { - MatrixView boundary = solver.boundary(Solver::Dir::North); - for (size_t i = 0; i < boundary.size(); ++i) { - double x = static_cast(i) / static_cast(nx - 1); - boundary(i) = sin(M_2_PI * x) * sinh(M_2_PI); - } - } - /******************************* Solve PDE ********************************/ - // Run solver iterations - for (size_t iter = 0; iter < iterations; ++iter) { - solver.iterate(); + Matrix solution; + { + // Instanciate solver (local instance) + Solver solver(nx, ny, 0., 1., 0., 1., h, k, fun, gN, g0, g0, g0); + + // Run solver iterations + for (size_t iter = 0; iter < iterations; ++iter) { + solver.iterate(); + } + + // extract solution + solution = std::move(solver.solution()); } /****************************** Tests/Report ******************************/ + std::cout << solution << std::endl; // MPI shutdown/cleanup #ifdef USE_MPI