From 644874f27fe03d20265a87f3a883b573d2f7a9ce Mon Sep 17 00:00:00 2001 From: daniel Date: Sun, 13 Mar 2022 17:58:02 +0100 Subject: [PATCH] wip: ex_01 --- Exercise_01/Makefile | 8 +- Exercise_01/Matrix.h | 21 +- Exercise_01/Solver.h | 72 ++++-- Exercise_01/examples/MPI_RowSums.cpp | 11 +- Exercise_01/main.cpp | 349 ++++++++++++++++++++++++--- 5 files changed, 396 insertions(+), 65 deletions(-) diff --git a/Exercise_01/Makefile b/Exercise_01/Makefile index 19dbdbb..1f05476 100644 --- a/Exercise_01/Makefile +++ b/Exercise_01/Makefile @@ -12,8 +12,10 @@ mpi: main.cpp Matrix.h Solver.h all: seriel mpi -test: seriel - ./$(OUT)_seriel 120 10 +test: seriel mpi + ./$(OUT)_seriel 1D 120 1000 + mpirun -n 4 ./$(OUT)_mpi 2D 120 1000 +# for script in *.R; do R --vanilla < $${script}; done clean: - rm -f *.o main $(OUT)_seriel $(OUT)_mpi + rm -f *.o *.R main $(OUT)_seriel $(OUT)_mpi diff --git a/Exercise_01/Matrix.h b/Exercise_01/Matrix.h index a8ecc93..67fada7 100644 --- a/Exercise_01/Matrix.h +++ b/Exercise_01/Matrix.h @@ -50,11 +50,17 @@ public: T* data() { return _data.data(); }; const T* data() const { return _data.data(); }; - T& operator()(int i) { return _data[index(i)]; }; - const T& operator()(int i) const { return _data[index(i)]; }; + T& operator()(int i) { return _data[i]; }; + const T& operator()(int i) const { return _data[i]; }; - T& operator()(int i, int j) { return _data[index(i, j)]; } - const T& operator()(int i, int j) const { return _data[index(i, j)]; } + T& operator()(int i, int j) { return _data[i + j * _nrow]; } + const T& operator()(int i, int j) const { return _data[i + j * _nrow]; } + + T& get(int i) { return _data[index(i)]; }; + const T& get(int i) const { return _data[index(i)]; }; + + T& get(int i, int j) { return _data[index(i, j)]; } + const T& get(int i, int j) const { return _data[index(i, j)]; } template double norm() const { @@ -98,13 +104,13 @@ public: if constexpr (N == Norm::Frob) { // Sum of Squared differences for (size_t i = 0; i < nelem; ++i) { - T diff = this(i) - A(i); + T diff = (*this)(i) - A(i); accum += diff * diff; } return std::sqrt(static_cast(accum)); } else { // Maximum Norm for (size_t i = 0; i < nelem; ++i) { - T diff = std::abs(this(i) - A(i)); + T diff = std::abs((*this)(i) - A(i)); if (accum < diff) { accum = diff; } @@ -174,6 +180,9 @@ public: T& operator()(int i) { return _matrix(_index + i * _stride); }; const T& operator()(int i) const { return _matrix(_index + i * _stride); }; + T& get(int i) { return _matrix.get(_index + i * _stride); }; + const T& get(int i) const { return _matrix.get(_index + i * _stride); }; + protected: Matrix& _matrix; size_t _index; diff --git a/Exercise_01/Solver.h b/Exercise_01/Solver.h index 0b964d8..78f27ff 100644 --- a/Exercise_01/Solver.h +++ b/Exercise_01/Solver.h @@ -35,9 +35,9 @@ public: -1.0 / (h * h), /* Left */ -1.0 / (h * h), /* Up */ -1.0 / (h * h)}, /* Right */ - _rhs(nx, ny), - _sol(nx, ny, 0.0), - _tmp(nx, ny) + _rhs(nx, ny, 0.), + _sol(nx, ny, 0.), + _tmp(nx, ny, 0.) { // Initialize Right Hand Size _rhs(x, y) = f(X(x), Y(y)) // Note that the correspondence usual matrix indexing sceeme as @@ -53,9 +53,9 @@ public: // Set Dirichlet boundary conditions // North - for (size_t x = 0; x < nx; ++x) { _sol(x, -1) = _tmp(x, -1) = gN(X(x)); } + for (size_t x = 0; x < nx; ++x) { _sol.get(x, -1) = _tmp.get(x, -1) = gN(X(x)); } // East - for (size_t y = 0; y < ny; ++y) { _sol(-1, y) = _tmp(-1, y) = gE(Y(y)); } + for (size_t y = 0; y < ny; ++y) { _sol.get(-1, y) = _tmp.get(-1, y) = gE(Y(y)); } // South for (size_t x = 0; x < nx; ++x) { _sol(x, 0) = _tmp(x, 0) = gS(X(x)); } // West @@ -77,7 +77,20 @@ public: N = North, E = East, S = South, W = West }; - MatrixView boundary(enum Dir dir) { + MatrixView read_boundary(enum Dir dir) { + switch (dir) { + case Dir::North: + return Col(_sol, -2); + case Dir::East: + return Row(_sol, -2); + case Dir::South: + return Col(_sol, 1); + default: // Dir::West + return Row(_sol, 1); + } + }; + + MatrixView write_boundary(enum Dir dir) { switch (dir) { case Dir::North: return Col(_sol, -1); @@ -91,13 +104,11 @@ public: }; /** Solution getter */ - Matrix& solution() { - return _sol; - } + Matrix& solution() { return _sol; } + /** Right Hand Side getter (grid evaluated f(x, y)) */ + Matrix& rhs() { return _rhs; } - /** - * Performs a single Jacobian Iteration - */ + /** Performs a single Jacobian Iteration */ void iterate() { double s = 1.0 / _stencil.C; @@ -114,6 +125,37 @@ public: _iter++; }; + /** Computes the L2 and Inf norm of the residuals */ + template + double resid_norm() { + // Enforce valid Norm types + static_assert(N == Norm::Frob || N == Norm::Max); + + // Norm accumulator + double accum = 0.0; + + for (size_t y = 1; y < _ny - 1; ++y) { + for (size_t x = 1; x < _nx - 1; ++x) { + double resid = _rhs(x, y) - (_stencil.C * _sol(x, y) + + _stencil.B * _sol(x, y - 1) + + _stencil.L * _sol(x - 1, y) + + _stencil.U * _sol(x, y + 1) + + _stencil.R * _sol(x + 1, y)); + if constexpr (N == Norm::Frob) { + accum += resid * resid; + } else { // Maximum Norm + accum = std::max(std::abs(resid), accum); + } + } + } + + if constexpr (N == Norm::Frob) { + return std::sqrt(accum); + } else { // Maximum Norm + return accum; + } + } + private: size_t _iter; /*< Iteration count */ const size_t _nx; /*< Number of X-axis grid points */ @@ -122,8 +164,8 @@ private: const double _xmax; /*< Domain X-max (east border pos) */ const double _ymin; /*< Domain Y-min (south border pos) */ const double _ymax; /*< Domain Y-max (north border pos) */ - const Stencil _stencil; /*< Simple, + shaped stencil */ - Matrix _rhs; /*< Grid evaluated RHS of the PDE = f(x, y) */ - Matrix _sol; /*< Solution after _iter iterations */ + const Stencil _stencil; /*< Simple '+' shaped stencil */ + Matrix _rhs; /*< Grid evaluated RHS of the PDE, f(x, y) */ + Matrix _sol; /*< Solution after `_iter` iterations */ Matrix _tmp; /*< Temp. datablock, used in iterate() */ }; diff --git a/Exercise_01/examples/MPI_RowSums.cpp b/Exercise_01/examples/MPI_RowSums.cpp index 90e6315..6d8624a 100644 --- a/Exercise_01/examples/MPI_RowSums.cpp +++ b/Exercise_01/examples/MPI_RowSums.cpp @@ -1,8 +1,8 @@ /** * Computes row sums of a matrix containing (0-indexed) consecutive numbers - * in row major order (yes, useless but illustrativ) + * in column major order * - * Example: + * Example (entries are array indices): * matrix (11 x 7): row_sums (11): * 0 11 22 33 44 55 66 -> 231 * 1 12 23 34 45 56 67 -> 238 @@ -22,7 +22,7 @@ * worker in a dense layout (MPI_DOUBLE) * * Send/Recv Example: - * Data send (entries are the array indices): + * Data send (entries are array indices): * - - - - - - - * - - - - - - - * - - - - - - - @@ -68,13 +68,12 @@ int min(int a, int b) { return a < b ? a : b; } int main(int argn, char* argv[]) { - // Build a (simple and barebones, row major) matrix model, it's just a + // Build a (simple and barebones, column major) matrix model, it's just a // vector with external row/col count. int nrow = 11; // defaults int ncol = 7; // defaults - // Parse arguments to set nrow, ncol (sloppy, but thats not the point of - // this example) + // Parse arguments to set nrow, ncol (sloppy, but not the point of the example) if (argn > 1) { nrow = atoi(argv[1]); if (nrow < 1 || nrow > 1024) { diff --git a/Exercise_01/main.cpp b/Exercise_01/main.cpp index 32e6208..b56025a 100644 --- a/Exercise_01/main.cpp +++ b/Exercise_01/main.cpp @@ -1,9 +1,11 @@ /** - * g++ main.cpp -std=c++17 -Wall -Wpedantic -pedantic -o main; ./main + * */ #include #include +#include +#include #include #define _USE_MATH_DEFINES /* enables math constants from cmath */ #include @@ -11,8 +13,14 @@ #include #endif + +#include +#include + + #include "Matrix.h" #include "Solver.h" +#include "utils.h" int main(int argn, char* argv[]) { @@ -23,63 +31,334 @@ int main(int argn, char* argv[]) { // Get MPI config int mpi_size; /*< MPI pool size (a.k.a. total number of processes) */ - int mpi_rank; /*< MPI rank (a.k.a. process ID in the context of MPI) */ MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); +#else + int mpi_size = 1; #endif /**************************** Parse Arguments *****************************/ - if (argn < 3) { - std::cerr << "usage: " << argv[0] << " " << std::endl; - return -1; - } else if (argn > 3) { - std::cerr << "warning: " << "ignoring all but the first two params" << std::endl; - } - // TODO: make this proper!!! - size_t resolution = atol(argv[1]); - size_t iterations = atol(argv[2]); - if (resolution < 1 || resolution > 65536 - || iterations < 1 || iterations > 65536) { - std::cerr << "error: parsing arguments failed" << std::endl; + size_t dim, resolution, iterations; + { + if (0 < argn && argn < 4) { + std::cerr << "usage: " << argv[0] << " " << std::endl; + return -1; + } else if (argn > 4) { + std::cerr << "warning: " << "ignoring all but the first three params" << std::endl; + } + if (std::string(argv[1]) == "1D") { + dim = 1; + } else if (std::string(argv[1]) == "2D") { + dim = 2; + } else { + std::cerr << "error: Parsing arg 1 failed" << std::endl; + return -1; + } + if (!(std::istringstream(argv[2]) >> resolution)) { + std::cerr << "error: Parsing arg 2 failed" << std::endl; + return -1; + } + if (!(std::istringstream(argv[3]) >> iterations)) { + std::cerr << "error: Parsing arg 3 failed" << std::endl; + return -1; + } + if (resolution < 10 || resolution > 65536) { + std::cerr << "error: arg 1 " << resolution + << " out of domain [10, 65536]" << std::endl; + return -1; + } + if (iterations > 65536) { + std::cerr << "error: arg 2 " << iterations + << "out of domain [0, 65536]" << std::endl; + return -1; + } } + // Report configuration + std::cout << std::setfill(' ') +#ifdef USE_MPI + << "using MPI: true\n" +#else + << "using MPI: false\n" +#endif + << "nr. processes: " << std::setw(5) << mpi_size << '\n' + << "resolution: " << std::setw(5) << resolution << '\n' + << "iterations: " << std::setw(5) << iterations << std::endl; - /************************* Initialize PDE Solver **************************/ - size_t nx = resolution; - size_t ny = resolution; + /**************** Setup (local) PDE + Boundary Conditions *****************/ const double k = M_PI; const double h = 1.0 / static_cast(resolution - 1); +#ifdef USE_MPI + // Group processes into a cartesian communication topology. Set initial + // values for a 1D grid. + int mpi_dims[2] = {mpi_size, 1}; + // In case of a 2D grid, make equal partitions ob both axes (as equal as + // possible. Note that `MPI_Dims_create` does not garantee "as equal as". + // For example it was observed that for 9 processes the generated grid + // was a 9 x 1, the following computes a 3 x 3). + if (dim == 2) { + two_factors(mpi_size, mpi_dims); + } + + // Report grid topology + std::cout << "topology: " << dim << "D"; + if (dim == 2) { + std::cout << " (" << mpi_dims[0] << " x " << mpi_dims[1] << ")"; + } + std::cout << std::endl; + + // Setup a cartesian topology communicator (NON-cyclic) + const int mpi_periods[2] = {false, false}; + MPI_Comm mpi_comm_grid; + MPI_Cart_create( + MPI_COMM_WORLD, // Old Communicator + 2, // number of dimensions + mpi_dims, // grid dimensions + mpi_periods, // grid periodicity + true, // allow process reordering + &mpi_comm_grid + ); + + // Get MPI rank + int mpi_world_rank; /*< MPI world rank (a.k.a. grid process ID) */ + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank); + + // Get MPI rank with respect to the grid communicator + int mpi_grid_rank; /*< MPI grid rank (a.k.a. grid process ID) */ + MPI_Comm_rank(mpi_comm_grid, &mpi_grid_rank); + + // Get coordinates with respect to the grid communicator + int mpi_coords[2]; + MPI_Cart_coords(mpi_comm_grid, mpi_grid_rank, 2, mpi_coords); + + // Get direct neightbours in the communication grid + struct { int north; int east; int south; int west; } mpi_neighbours; + // Get X-direction (dim 0) neightbours + MPI_Cart_shift( + mpi_comm_grid, // grid communicator + 0, // axis index (0 <-> X) + 1, // offset + &(mpi_neighbours.west), // negated offset neightbour + &(mpi_neighbours.east) // offset neightbour + ); + // Get Y-direction (dim 1) neightbours + MPI_Cart_shift( + mpi_comm_grid, + 1, // axis index (1 <-> Y) + 1, + &(mpi_neighbours.south), + &(mpi_neighbours.north) + ); + + // Calc local (base) grid size (without ghost layers) + size_t nx = partition(resolution, mpi_dims[0], mpi_coords[0]); + size_t ny = partition(resolution, mpi_dims[1], mpi_coords[1]); + // Add ghost layers for each (existing) neighbour + ny += (mpi_neighbours.north != MPI_PROC_NULL); + nx += (mpi_neighbours.east != MPI_PROC_NULL); + ny += (mpi_neighbours.south != MPI_PROC_NULL); + nx += (mpi_neighbours.west != MPI_PROC_NULL); + + // Compute local domain [xmin, xmax] x [ymin, ymax] + double xmin = (mpi_neighbours.west == MPI_PROC_NULL) ? 0.0 + : h * partition_sum(resolution, mpi_dims[0], mpi_coords[0] - 1); + double xmax = (mpi_neighbours.east == MPI_PROC_NULL) ? 1.0 + : h * (partition_sum(resolution, mpi_dims[0], mpi_coords[0]) + 1); + double ymin = (mpi_neighbours.south == MPI_PROC_NULL) ? 0.0 + : h * partition_sum(resolution, mpi_dims[1], mpi_coords[1] - 1); + double ymax = (mpi_neighbours.north == MPI_PROC_NULL) ? 1.0 + : h * (partition_sum(resolution, mpi_dims[1], mpi_coords[1]) + 1); + + // /*************************__________DEBUG__________*************************/ + // std::cout << " (" << mpi_coords[0] << ", " << mpi_coords[1] << ") "; + + // std::cout << " -> " + // << "N: " << ((mpi_neighbours.north != MPI_PROC_NULL) ? '1' : '.') << ", " + // << "E: " << ((mpi_neighbours.east != MPI_PROC_NULL) ? '1' : '.') << ", " + // << "S: " << ((mpi_neighbours.south != MPI_PROC_NULL) ? '1' : '.') << ", " + // << "W: " << ((mpi_neighbours.west != MPI_PROC_NULL) ? '1' : '.'); + + // std::cout << " -> " + // << nx << " x " << ny; + + // std::cout << " -> " << std::setprecision(2) + // << "[" << std::setw(4) << std::left << xmin + // << ", " << std::setw(4) << std::left << xmax << "]" + // << " x " + // << "[" << std::setw(4) << std::left << ymin + // << ", " << std::setw(4) << std::left << ymax << "]"; + + // std::cout << std::endl; + // /*************************__________DEBUG__________*************************/ + + // Create MPI vector Type (for boundary condition exchange) + // Allows directly exchange of matrix rows (north/south bounds) since the + // row elements are "sparce" in the sence that they are not directly aside + // each other in memory (column major matrix layout) in constrast to columns. + MPI_Datatype mpi_type_row; + MPI_Type_vector(ny, 1, nx, MPI_DOUBLE, &mpi_type_row); + MPI_Type_commit(&mpi_type_row); +#else + // Discretization grid resolution + size_t nx = resolution, ny = resolution; + // PDE domain borders [xmin, xmax] x [ymin, ymax] = [0, 1] x [0, 1] + double xmin = 0.0, xmax = 1.0, ymin = 0.0, ymax = 1.0; +#endif + // Declare right hand side function f(x, y) = k^2 sin(2 pi x) sinh(2 pi y) std::function fun = [k](double x, double y) { - return k * k * sin(M_2_PI * x) * sinh(M_2_PI * y); + return k * k * sin(2 * M_PI * x) * sinh(2 * M_PI * y); }; // Boundary conditions - /** North boundary condition g(x) = k^2 sin(2 pi x) sinh(2 pi) */ - std::function gN = [k](double x) { - return k * k * sin(M_2_PI * x) * sinh(M_2_PI); - }; - /** East, South and West boundary conditions are simply = 0 */ + /** North boundary condition gN(x) = k^2 sin(2 pi x) sinh(2 pi) */ + std::function gN; +#ifdef USE_MPI + // Check if local north boundary is part of the global north boundary + if (mpi_neighbours.north == MPI_PROC_NULL) { +#endif + // The local north boundary is equals the global north boundary + gN = [k](double x) { return sin(2 * M_PI * x) * sinh(2 * M_PI); }; +#ifdef USE_MPI + } else { + // local north boundary is zero like all the rest + gN = [k](double) { return 0.0; }; + } +#endif + /** East, South and West boundary conditions are all 0 */ std::function g0 = [k](double) { return 0.0; }; /******************************* Solve PDE ********************************/ - Matrix solution; - { - // Instanciate solver (local instance) - Solver solver(nx, ny, 0., 1., 0., 1., h, k, fun, gN, g0, g0, g0); + // Instanciate solver (local instance) + Solver solver(nx, ny, xmin, xmax, ymin, ymax, h, k, fun, gN, g0, g0, g0); - // Run solver iterations - for (size_t iter = 0; iter < iterations; ++iter) { - solver.iterate(); + // Run solver iterations + for (size_t iter = 0; iter < iterations; ++iter) { + // Perform a single stencil jacobi iteration + solver.iterate(); + +#ifdef USE_MPI + // Non-blocking send boundary conditions to all neightbours + MPI_Request mpi_requests[4]; + int mpi_request_count = 0; + + if (mpi_neighbours.north != MPI_PROC_NULL) { + auto bound = solver.read_boundary(Solver::Dir::North); + MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE, + mpi_neighbours.north, iter, mpi_comm_grid, + &mpi_requests[mpi_request_count++]); + } + if (mpi_neighbours.east != MPI_PROC_NULL) { + auto bound = solver.read_boundary(Solver::Dir::East); + MPI_Isend(bound.data(), 1, mpi_type_row, + mpi_neighbours.east, iter, mpi_comm_grid, + &mpi_requests[mpi_request_count++]); + } + if (mpi_neighbours.south != MPI_PROC_NULL) { + auto bound = solver.read_boundary(Solver::Dir::South); + MPI_Isend(bound.data(), bound.size(), MPI_DOUBLE, + mpi_neighbours.south, iter, mpi_comm_grid, + &mpi_requests[mpi_request_count++]); + } + if (mpi_neighbours.west != MPI_PROC_NULL) { + auto bound = solver.read_boundary(Solver::Dir::West); + MPI_Isend(bound.data(), 1, mpi_type_row, + mpi_neighbours.west, iter, mpi_comm_grid, + &mpi_requests[mpi_request_count++]); } - // extract solution - solution = std::move(solver.solution()); + // Wait for all send to complete before receiving new data + // (just to be save) + MPI_Waitall(mpi_request_count, mpi_requests, MPI_STATUSES_IGNORE); + + // Get new boundary conditions using a blocking receive + MPI_Status mpi_status; + if (mpi_neighbours.north != MPI_PROC_NULL) { + auto bound = solver.write_boundary(Solver::Dir::North); + MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE, + mpi_neighbours.north, iter, mpi_comm_grid, &mpi_status); + } + if (mpi_neighbours.east != MPI_PROC_NULL) { + auto bound = solver.write_boundary(Solver::Dir::East); + MPI_Recv(bound.data(), 1, mpi_type_row, + mpi_neighbours.east, iter, mpi_comm_grid, &mpi_status); + } + if (mpi_neighbours.south != MPI_PROC_NULL) { + auto bound = solver.write_boundary(Solver::Dir::South); + MPI_Recv(bound.data(), bound.size(), MPI_DOUBLE, + mpi_neighbours.south, iter, mpi_comm_grid, &mpi_status); + } + if (mpi_neighbours.west != MPI_PROC_NULL) { + auto bound = solver.write_boundary(Solver::Dir::West); + MPI_Recv(bound.data(), 1, mpi_type_row, + mpi_neighbours.west, iter, mpi_comm_grid, &mpi_status); + } +#endif } - /****************************** Tests/Report ******************************/ + /***************************** Solution Stats *****************************/ + std::cout + << "|| residuals ||_2: " << std::setw(15) << solver.resid_norm() + << "\n|| residuals ||_inf: " << std::setw(15) << solver.resid_norm() + << std::endl; + + // Get solution + Matrix& solution = solver.solution(); + + // Compute analytic (true) solution + Matrix analytic(solution.nrow(), solution.ncol()); + double x = xmin; + double y = ymin; + for (size_t j = 0; j < analytic.ncol(); ++j, y += h) { + for (size_t i = 0; i < analytic.nrow(); ++i, x += h) { + analytic(i, j) = sin(2 * M_PI * x) * sinh(2 * M_PI * y); + } + } + + // Calculate error + std::cout + << "|| error ||_2: " << std::setw(15) << solution.dist(analytic) + << "\n|| error ||_inf: " << std::setw(15) << solution.dist(analytic) + << std::endl; + + /*************************__________DEBUG__________*************************/ + // Create R scripts for plotting the solution + std::ostringstream file_name; +#ifdef USE_MPI + file_name << "Rplot_" << mpi_coords[0] << "_" << mpi_coords[1] << ".R"; +#else + file_name << "Rplot_seriel.R"; +#endif + std::ofstream out(file_name.str()); + + out << "sol <- matrix(c(" << solution(0); + for (size_t i = 1; i < solution.size(); ++i) { + out << ',' << solution(i); + } + out << "), " << nx << ", " << ny << ")\n" +#ifdef USE_MPI + << "png(filename = \"mpi_plot_" << mpi_coords[0] << + "_" << mpi_coords[1] << ".png\")\n" +#else + << "png(filename = \"seriel_plot.png\")\n" +#endif +#ifdef USE_MPI + << "image(sol, zlim = c(-270, 270), axes = FALSE, main = \"Cart Coords: " + << mpi_coords[0] << ", " << mpi_coords[1] << "\")\n" +#else + << "image(sol, zlim = c(-270, 270), axes = FALSE, main = \"Seriel\")\n" +#endif + << "axis(1, at = c(0, 1), labels = c(" + << "\"" << std::setprecision(2) << xmin << "\", " + << "\"" << std::setprecision(2) << xmax << "\"))\n" + << "axis(2, at = c(0, 1), labels = c(" + << "\"" << std::setprecision(2) << ymin << "\", " + << "\"" << std::setprecision(2) << ymax << "\"))\n" + // << "axis(2, at = seq(100, 800, by = 100))" + << "dev.off()" << std::endl; + out.close(); + /*************************__________DEBUG__________*************************/ + - std::cout << solution << std::endl; // MPI shutdown/cleanup #ifdef USE_MPI