From aab53a949b5605077939d0096eaf67f1dad90952 Mon Sep 17 00:00:00 2001 From: Simone Bna <s.bn@cineca.it> Date: Wed, 18 Nov 2020 19:12:22 +0100 Subject: [PATCH] ENH: merged misc and solve into AmgXSolve, removed useless functions --- Make/files | 2 - src/AmgXSolver.H | 30 ------- src/AmgXSolver.cu | 203 ++++++++++++++++++++++++++++++++++++++++++++++ src/misc.cu | 62 -------------- src/solve.cu | 202 --------------------------------------------- 5 files changed, 203 insertions(+), 296 deletions(-) delete mode 100644 src/misc.cu delete mode 100644 src/solve.cu diff --git a/Make/files b/Make/files index de49e0a..1c1778a 100644 --- a/Make/files +++ b/Make/files @@ -2,8 +2,6 @@ src/FOAM2CSR.cu src/consolidate.cu src/AmgXSolver.cu src/init.cu -src/misc.cu src/setA.cu -src/solve.cu LIB = $(FOAM_MODULE_LIBBIN)/libfoam2csr diff --git a/src/AmgXSolver.H b/src/AmgXSolver.H index 29d3660..7070573 100644 --- a/src/AmgXSolver.H +++ b/src/AmgXSolver.H @@ -189,23 +189,6 @@ class AmgXSolver const PetscScalar* values); - /** \brief Solve the linear system. - * - * \p p vector will be used as an initial guess and will be updated to the - * solution by the end of solving. - * - * For cases that use more MPI processes than the number of GPUs, this - * function will do data gathering before solving and data scattering - * after the solving. - * - * \param p [in, out] A PETSc Vec object representing unknowns. - * \param b [in] A PETSc Vec representing right-hand-side. - * - * \return PetscErrorCode. - */ - PetscErrorCode solve(Vec &p, Vec &b); - - /** \brief Solve the linear system. * * \p p vector will be used as an initial guess and will be updated to the @@ -598,19 +581,6 @@ class AmgXSolver PetscErrorCode getPartData(const IS &devIS, const PetscInt &N, std::vector<PetscInt> &partData, PetscBool &usesOffsets); - - /** \brief Function that actually solves the system. - * - * This function won't check if the process is involved in AmgX solver. - * So if calling this function from processes not in the - * \ref AmgXSolver::gpuWorld "gpuWorld", something bad will happen. - * This function hence won't do data gathering/scattering, either. - * - * \param p [in, out] PETSc Vec for unknowns. - * \param b [in] PETSc Vec for RHS. - * \return PetscErrorCode. - */ - PetscErrorCode solve_real(Vec &p, Vec &b); }; #endif diff --git a/src/AmgXSolver.cu b/src/AmgXSolver.cu index c437c35..c628917 100644 --- a/src/AmgXSolver.cu +++ b/src/AmgXSolver.cu @@ -17,3 +17,206 @@ int AmgXSolver::count = 0; // initialize AmgXSolver::rsrc to nullptr; AMGX_resources_handle AmgXSolver::rsrc = nullptr; + + +/* \implements AmgXSolver::solve */ +PetscErrorCode AmgXSolver::solve(Vec& p, Vec &b, const int nRows) +{ + PetscFunctionBeginUser; + + int ierr; + + PetscScalar* pscalar; + PetscScalar* bscalar; + + // get pointers to the raw data of local vectors + ierr = VecGetArray(p, &pscalar); CHK; + ierr = VecGetArray(b, &bscalar); CHK; + + if (consolidationStatus == ConsolidationStatus::Device) + { + CHECK( + cudaMemcpy( + (void**)&pCons[rowDispls[myDevWorldRank]], + pscalar, + sizeof(PetscScalar) * nRows, + cudaMemcpyDefault + ) + ); + CHECK( + cudaMemcpy( + (void**)&rhsCons[rowDispls[myDevWorldRank]], + bscalar, + sizeof(PetscScalar) * nRows, + cudaMemcpyDefault + ) + ); + + // Must synchronize here as device to device copies are non-blocking w.r.t host + CHECK(cudaDeviceSynchronize()); + ierr = MPI_Barrier(devWorld); CHK; + } + else if (consolidationStatus == ConsolidationStatus::Host) + { + MPI_Request req[2]; + ierr = MPI_Igatherv( + pscalar, + nRows, + MPI_DOUBLE, + &pCons[rowDispls[myDevWorldRank]], + nRowsInDevWorld.data(), + rowDispls.data(), + MPI_DOUBLE, + 0, + devWorld, + &req[0] + ); CHK; + ierr = MPI_Igatherv( + bscalar, + nRows, + MPI_DOUBLE, + &rhsCons[rowDispls[myDevWorldRank]], + nRowsInDevWorld.data(), + rowDispls.data(), + MPI_DOUBLE, + 0, + devWorld, + &req[1] + ); CHK; + MPI_Waitall(2, req, MPI_STATUSES_IGNORE); + } + + if (gpuWorld != MPI_COMM_NULL) + { + // Upload potentially consolidated vectors to AmgX + if (consolidationStatus == ConsolidationStatus::None) + { + AMGX_vector_upload(AmgXP, nRows, 1, pscalar); + AMGX_vector_upload(AmgXRHS, nRows, 1, bscalar); + } + else + { + AMGX_vector_upload(AmgXP, nConsRows, 1, pCons); + AMGX_vector_upload(AmgXRHS, nConsRows, 1, rhsCons); + } + + ierr = MPI_Barrier(gpuWorld); CHK; + + // Solve + AMGX_solver_solve(solver, AmgXRHS, AmgXP); + + // Get the status of the solver + AMGX_SOLVE_STATUS status; + AMGX_solver_get_status(solver, &status); + + // Check whether the solver successfully solved the problem + if (status != AMGX_SOLVE_SUCCESS) + SETERRQ1(globalCpuWorld, + PETSC_ERR_CONV_FAILED, "AmgX solver failed to solve the system! " + "The error code is %d.\n", + status); + + // Download data from device + if (consolidationStatus == ConsolidationStatus::None) + { + AMGX_vector_download(AmgXP, pscalar); + } + else + { + AMGX_vector_download(AmgXP, pCons); + + // AMGX_vector_download invokes a device to device copy here, so it is essential that + // the root rank blocks the host before other ranks copy from the consolidated solution + CHECK(cudaDeviceSynchronize()); + } + } + + // If the matrix is consolidated, scatter the + if (consolidationStatus == ConsolidationStatus::Device) + { + // Must synchronise before each rank attempts to read from the consolidated solution + ierr = MPI_Barrier(devWorld); CHK; + + CHECK( + cudaMemcpy( + (void **)pscalar, + &pCons[rowDispls[myDevWorldRank]], + sizeof(PetscScalar) * nRows, + cudaMemcpyDefault + ) + ); + CHECK(cudaDeviceSynchronize()); + } + else if (consolidationStatus == ConsolidationStatus::Host) + { + // Must synchronise before each rank attempts to read from the consolidated solution + ierr = MPI_Barrier(devWorld); CHK; + + ierr = MPI_Scatterv( + &pCons[rowDispls[myDevWorldRank]], + nRowsInDevWorld.data(), + rowDispls.data(), + MPI_DOUBLE, + pscalar, + nRows, + MPI_DOUBLE, + 0, + devWorld + ); CHK; + } + + ierr = MPI_Barrier(globalCpuWorld); CHK; + + PetscFunctionReturn(0); +} + + +/* \implements AmgXSolver::setMode */ +PetscErrorCode AmgXSolver::setMode(const std::string &modeStr) +{ + PetscFunctionBeginUser; + + if (modeStr == "dDDI") + mode = AMGX_mode_dDDI; + else if (modeStr == "dDFI") + mode = AMGX_mode_dDFI; + else if (modeStr == "dFFI") + mode = AMGX_mode_dFFI; + else if (modeStr[0] == 'h') + SETERRQ1(MPI_COMM_WORLD, PETSC_ERR_ARG_WRONG, + "CPU mode, %s, is not supported in this wrapper!", + modeStr.c_str()); + else + SETERRQ1(MPI_COMM_WORLD, PETSC_ERR_ARG_WRONG, + "%s is not an available mode! Available modes are: " + "dDDI, dDFI, dFFI.\n", modeStr.c_str()); + + PetscFunctionReturn(0); +} + + +/* \implements AmgXSolver::getIters */ +PetscErrorCode AmgXSolver::getIters(int &iter) +{ + PetscFunctionBeginUser; + + // only processes using AmgX will try to get # of iterations + if (gpuProc == 0) + AMGX_solver_get_iterations_number(solver, &iter); + + PetscFunctionReturn(0); +} + + +/* \implements AmgXSolver::getResidual */ +PetscErrorCode AmgXSolver::getResidual(const int &iter, double &res) +{ + PetscFunctionBeginUser; + + // only processes using AmgX will try to get residual + if (gpuProc == 0) + AMGX_solver_get_iteration_residual(solver, iter, 0, &res); + + PetscFunctionReturn(0); +} + diff --git a/src/misc.cu b/src/misc.cu deleted file mode 100644 index 86a7fa1..0000000 --- a/src/misc.cu +++ /dev/null @@ -1,62 +0,0 @@ -/** - * \file misc.cpp - * \brief Definition of some member functions of the class AmgXSolver. - * \author Pi-Yueh Chuang (pychuang@gwu.edu) - * \date 2016-01-08 - * \copyright Copyright (c) 2015-2019 Pi-Yueh Chuang, Lorena A. Barba. - * This project is released under MIT License. - */ - - -// AmgXWrapper -# include "AmgXSolver.H" - - -/* \implements AmgXSolver::setMode */ -PetscErrorCode AmgXSolver::setMode(const std::string &modeStr) -{ - PetscFunctionBeginUser; - - if (modeStr == "dDDI") - mode = AMGX_mode_dDDI; - else if (modeStr == "dDFI") - mode = AMGX_mode_dDFI; - else if (modeStr == "dFFI") - mode = AMGX_mode_dFFI; - else if (modeStr[0] == 'h') - SETERRQ1(MPI_COMM_WORLD, PETSC_ERR_ARG_WRONG, - "CPU mode, %s, is not supported in this wrapper!", - modeStr.c_str()); - else - SETERRQ1(MPI_COMM_WORLD, PETSC_ERR_ARG_WRONG, - "%s is not an available mode! Available modes are: " - "dDDI, dDFI, dFFI.\n", modeStr.c_str()); - - PetscFunctionReturn(0); -} - - -/* \implements AmgXSolver::getIters */ -PetscErrorCode AmgXSolver::getIters(int &iter) -{ - PetscFunctionBeginUser; - - // only processes using AmgX will try to get # of iterations - if (gpuProc == 0) - AMGX_solver_get_iterations_number(solver, &iter); - - PetscFunctionReturn(0); -} - - -/* \implements AmgXSolver::getResidual */ -PetscErrorCode AmgXSolver::getResidual(const int &iter, double &res) -{ - PetscFunctionBeginUser; - - // only processes using AmgX will try to get residual - if (gpuProc == 0) - AMGX_solver_get_iteration_residual(solver, iter, 0, &res); - - PetscFunctionReturn(0); -} diff --git a/src/solve.cu b/src/solve.cu deleted file mode 100644 index f74e1a4..0000000 --- a/src/solve.cu +++ /dev/null @@ -1,202 +0,0 @@ -/** - * @file solve.cpp - * @brief Definition of member functions regarding solving in AmgXSolver. - * @author Pi-Yueh Chuang (pychuang@gwu.edu) - * @date 2016-01-08 - * \copyright Copyright (c) 2015-2019 Pi-Yueh Chuang, Lorena A. Barba. - * This project is released under MIT License. - */ - - -// AmgXWrapper -# include "AmgXSolver.H" - - -/* \implements AmgXSolver::solve */ -PetscErrorCode AmgXSolver::solve(Vec &p, Vec &b) -{ - PetscFunctionBeginUser; - - PetscErrorCode ierr; - - if (globalSize != gpuWorldSize) - { - ierr = VecScatterBegin(scatterRhs, - b, redistRhs, INSERT_VALUES, SCATTER_FORWARD); CHK; - ierr = VecScatterBegin(scatterLhs, - p, redistLhs, INSERT_VALUES, SCATTER_FORWARD); CHK; - - ierr = VecScatterEnd(scatterRhs, - b, redistRhs, INSERT_VALUES, SCATTER_FORWARD); CHK; - ierr = VecScatterEnd(scatterLhs, - p, redistLhs, INSERT_VALUES, SCATTER_FORWARD); CHK; - - if (gpuWorld != MPI_COMM_NULL) - { - ierr = solve_real(redistLhs, redistRhs); CHK; - } - ierr = MPI_Barrier(globalCpuWorld); CHK; - - ierr = VecScatterBegin(scatterLhs, - redistLhs, p, INSERT_VALUES, SCATTER_REVERSE); CHK; - ierr = VecScatterEnd(scatterLhs, - redistLhs, p, INSERT_VALUES, SCATTER_REVERSE); CHK; - } - else - { - if (gpuWorld != MPI_COMM_NULL) - { - ierr = solve_real(p, b); CHK; - } - ierr = MPI_Barrier(globalCpuWorld); CHK; - } - - PetscFunctionReturn(0); -} - - -/* \implements AmgXSolver::solve_real */ -PetscErrorCode AmgXSolver::solve_real(Vec &p, Vec &b) -{ - PetscFunctionBeginUser; - - PetscErrorCode ierr; - - double *unks, - *rhs; - - int size; - - AMGX_SOLVE_STATUS status; - - // get size of local vector (p and b should have the same local size) - ierr = VecGetLocalSize(p, &size); CHK; - - // get pointers to the raw data of local vectors - ierr = VecGetArray(p, &unks); CHK; - ierr = VecGetArray(b, &rhs); CHK; - - // upload vectors to AmgX - AMGX_vector_upload(AmgXP, size, 1, unks); - AMGX_vector_upload(AmgXRHS, size, 1, rhs); - - // solve - ierr = MPI_Barrier(gpuWorld); CHK; - AMGX_solver_solve(solver, AmgXRHS, AmgXP); - - // get the status of the solver - AMGX_solver_get_status(solver, &status); - - // check whether the solver successfully solve the problem - if (status != AMGX_SOLVE_SUCCESS) SETERRQ1(globalCpuWorld, - PETSC_ERR_CONV_FAILED, "AmgX solver failed to solve the system! " - "The error code is %d.\n", status); - - // download data from device - AMGX_vector_download(AmgXP, unks); - - // restore PETSc vectors - ierr = VecRestoreArray(p, &unks); CHK; - ierr = VecRestoreArray(b, &rhs); CHK; - - PetscFunctionReturn(0); -} - - -/* \implements AmgXSolver::solve */ -PetscErrorCode AmgXSolver::solve(Vec& p, Vec &b, const int nRows) -{ - PetscFunctionBeginUser; - - int ierr; - - PetscScalar* pscalar; - PetscScalar* bscalar; - - // get pointers to the raw data of local vectors - ierr = VecGetArray(p, &pscalar); CHK; - ierr = VecGetArray(b, &bscalar); CHK; - - if (consolidationStatus == ConsolidationStatus::Device) - { - CHECK(cudaMemcpy((void**)&pCons[rowDispls[myDevWorldRank]], pscalar, sizeof(PetscScalar) * nRows, cudaMemcpyDefault)); - CHECK(cudaMemcpy((void**)&rhsCons[rowDispls[myDevWorldRank]], bscalar, sizeof(PetscScalar) * nRows, cudaMemcpyDefault)); - - // Must synchronize here as device to device copies are non-blocking w.r.t host - CHECK(cudaDeviceSynchronize()); - ierr = MPI_Barrier(devWorld); CHK; - } - else if (consolidationStatus == ConsolidationStatus::Host) - { - MPI_Request req[2]; - ierr = MPI_Igatherv(pscalar, nRows, MPI_DOUBLE, &pCons[rowDispls[myDevWorldRank]], nRowsInDevWorld.data(), rowDispls.data(), MPI_DOUBLE, 0, devWorld, &req[0]); CHK; - ierr = MPI_Igatherv(bscalar, nRows, MPI_DOUBLE, &rhsCons[rowDispls[myDevWorldRank]], nRowsInDevWorld.data(), rowDispls.data(), MPI_DOUBLE, 0, devWorld, &req[1]); CHK; - MPI_Waitall(2, req, MPI_STATUSES_IGNORE); - } - - if (gpuWorld != MPI_COMM_NULL) - { - // Upload potentially consolidated vectors to AmgX - if (consolidationStatus == ConsolidationStatus::None) - { - AMGX_vector_upload(AmgXP, nRows, 1, pscalar); - AMGX_vector_upload(AmgXRHS, nRows, 1, bscalar); - } - else - { - AMGX_vector_upload(AmgXP, nConsRows, 1, pCons); - AMGX_vector_upload(AmgXRHS, nConsRows, 1, rhsCons); - } - - ierr = MPI_Barrier(gpuWorld); CHK; - - // Solve - AMGX_solver_solve(solver, AmgXRHS, AmgXP); - - // Get the status of the solver - AMGX_SOLVE_STATUS status; - AMGX_solver_get_status(solver, &status); - - // Check whether the solver successfully solved the problem - if (status != AMGX_SOLVE_SUCCESS) - SETERRQ1(globalCpuWorld, - PETSC_ERR_CONV_FAILED, "AmgX solver failed to solve the system! " - "The error code is %d.\n", - status); - - // Download data from device - if (consolidationStatus == ConsolidationStatus::None) - { - AMGX_vector_download(AmgXP, pscalar); - } - else - { - AMGX_vector_download(AmgXP, pCons); - - // AMGX_vector_download invokes a device to device copy here, so it is essential that - // the root rank blocks the host before other ranks copy from the consolidated solution - CHECK(cudaDeviceSynchronize()); - } - } - - // If the matrix is consolidated, scatter the - if (consolidationStatus == ConsolidationStatus::Device) - { - // Must synchronise before each rank attempts to read from the consolidated solution - ierr = MPI_Barrier(devWorld); CHK; - - CHECK(cudaMemcpy((void **)pscalar, &pCons[rowDispls[myDevWorldRank]], sizeof(PetscScalar) * nRows, cudaMemcpyDefault)); - CHECK(cudaDeviceSynchronize()); - } - else if (consolidationStatus == ConsolidationStatus::Host) - { - // Must synchronise before each rank attempts to read from the consolidated solution - ierr = MPI_Barrier(devWorld); CHK; - - ierr = MPI_Scatterv(&pCons[rowDispls[myDevWorldRank]], nRowsInDevWorld.data(), rowDispls.data(), MPI_DOUBLE, pscalar, nRows, MPI_DOUBLE, 0, devWorld); CHK; - } - - ierr = MPI_Barrier(globalCpuWorld); CHK; - - PetscFunctionReturn(0); -} -- GitLab