From aab53a949b5605077939d0096eaf67f1dad90952 Mon Sep 17 00:00:00 2001
From: Simone Bna <s.bn@cineca.it>
Date: Wed, 18 Nov 2020 19:12:22 +0100
Subject: [PATCH] ENH: merged misc and solve into AmgXSolve, removed useless
 functions

---
 Make/files        |   2 -
 src/AmgXSolver.H  |  30 -------
 src/AmgXSolver.cu | 203 ++++++++++++++++++++++++++++++++++++++++++++++
 src/misc.cu       |  62 --------------
 src/solve.cu      | 202 ---------------------------------------------
 5 files changed, 203 insertions(+), 296 deletions(-)
 delete mode 100644 src/misc.cu
 delete mode 100644 src/solve.cu

diff --git a/Make/files b/Make/files
index de49e0a..1c1778a 100644
--- a/Make/files
+++ b/Make/files
@@ -2,8 +2,6 @@ src/FOAM2CSR.cu
 src/consolidate.cu
 src/AmgXSolver.cu
 src/init.cu
-src/misc.cu
 src/setA.cu
-src/solve.cu
 
 LIB = $(FOAM_MODULE_LIBBIN)/libfoam2csr
diff --git a/src/AmgXSolver.H b/src/AmgXSolver.H
index 29d3660..7070573 100644
--- a/src/AmgXSolver.H
+++ b/src/AmgXSolver.H
@@ -189,23 +189,6 @@ class AmgXSolver
             const PetscScalar* values);
 
 
-        /** \brief Solve the linear system.
-         *
-         * \p p vector will be used as an initial guess and will be updated to the
-         * solution by the end of solving.
-         *
-         * For cases that use more MPI processes than the number of GPUs, this
-         * function will do data gathering before solving and data scattering
-         * after the solving.
-         *
-         * \param p [in, out] A PETSc Vec object representing unknowns.
-         * \param b [in] A PETSc Vec representing right-hand-side.
-         *
-         * \return PetscErrorCode.
-         */
-        PetscErrorCode solve(Vec &p, Vec &b);
-
-
         /** \brief Solve the linear system.
          *
          * \p p vector will be used as an initial guess and will be updated to the
@@ -598,19 +581,6 @@ class AmgXSolver
         PetscErrorCode getPartData(const IS &devIS,
                 const PetscInt &N, std::vector<PetscInt> &partData, PetscBool &usesOffsets);
 
-
-        /** \brief Function that actually solves the system.
-         *
-         * This function won't check if the process is involved in AmgX solver.
-         * So if calling this function from processes not in the
-         * \ref AmgXSolver::gpuWorld "gpuWorld", something bad will happen.
-         * This function hence won't do data gathering/scattering, either.
-         *
-         * \param p [in, out] PETSc Vec for unknowns.
-         * \param b [in] PETSc Vec for RHS.
-         * \return PetscErrorCode.
-         */
-        PetscErrorCode solve_real(Vec &p, Vec &b);
 };
 
 #endif
diff --git a/src/AmgXSolver.cu b/src/AmgXSolver.cu
index c437c35..c628917 100644
--- a/src/AmgXSolver.cu
+++ b/src/AmgXSolver.cu
@@ -17,3 +17,206 @@ int AmgXSolver::count = 0;
 
 // initialize AmgXSolver::rsrc to nullptr;
 AMGX_resources_handle AmgXSolver::rsrc = nullptr;
+
+
+/* \implements AmgXSolver::solve */
+PetscErrorCode AmgXSolver::solve(Vec& p, Vec &b, const int nRows)
+{
+    PetscFunctionBeginUser;
+
+    int ierr;
+
+    PetscScalar* pscalar;
+    PetscScalar* bscalar;
+
+    // get pointers to the raw data of local vectors
+    ierr = VecGetArray(p, &pscalar); CHK;
+    ierr = VecGetArray(b, &bscalar); CHK;
+
+    if (consolidationStatus == ConsolidationStatus::Device)
+    {
+        CHECK(
+            cudaMemcpy(
+                (void**)&pCons[rowDispls[myDevWorldRank]],
+                pscalar, 
+                sizeof(PetscScalar) * nRows, 
+                cudaMemcpyDefault
+            )
+        );
+        CHECK(
+            cudaMemcpy(
+                (void**)&rhsCons[rowDispls[myDevWorldRank]], 
+                bscalar, 
+                sizeof(PetscScalar) * nRows, 
+                cudaMemcpyDefault
+            )
+        );
+
+        // Must synchronize here as device to device copies are non-blocking w.r.t host
+        CHECK(cudaDeviceSynchronize());
+        ierr = MPI_Barrier(devWorld); CHK;
+    }
+    else if (consolidationStatus == ConsolidationStatus::Host)
+    {
+        MPI_Request req[2];
+        ierr = MPI_Igatherv(
+            pscalar, 
+            nRows, 
+            MPI_DOUBLE, 
+            &pCons[rowDispls[myDevWorldRank]], 
+            nRowsInDevWorld.data(), 
+            rowDispls.data(), 
+            MPI_DOUBLE, 
+            0, 
+            devWorld, 
+            &req[0]
+        ); CHK;
+        ierr = MPI_Igatherv(
+            bscalar, 
+            nRows, 
+            MPI_DOUBLE, 
+            &rhsCons[rowDispls[myDevWorldRank]], 
+            nRowsInDevWorld.data(), 
+            rowDispls.data(), 
+            MPI_DOUBLE, 
+            0, 
+            devWorld, 
+            &req[1]
+        ); CHK;
+        MPI_Waitall(2, req, MPI_STATUSES_IGNORE);
+    }
+
+    if (gpuWorld != MPI_COMM_NULL)
+    {
+        // Upload potentially consolidated vectors to AmgX
+        if (consolidationStatus == ConsolidationStatus::None)
+        {
+            AMGX_vector_upload(AmgXP, nRows, 1, pscalar);
+            AMGX_vector_upload(AmgXRHS, nRows, 1, bscalar);
+        }
+        else
+        {
+            AMGX_vector_upload(AmgXP, nConsRows, 1, pCons);
+            AMGX_vector_upload(AmgXRHS, nConsRows, 1, rhsCons);
+        }
+
+        ierr = MPI_Barrier(gpuWorld); CHK;
+
+        // Solve
+        AMGX_solver_solve(solver, AmgXRHS, AmgXP);
+
+        // Get the status of the solver
+        AMGX_SOLVE_STATUS   status;
+        AMGX_solver_get_status(solver, &status);
+
+        // Check whether the solver successfully solved the problem
+        if (status != AMGX_SOLVE_SUCCESS)
+                SETERRQ1(globalCpuWorld,
+                        PETSC_ERR_CONV_FAILED, "AmgX solver failed to solve the system! "
+                                                "The error code is %d.\n",
+                        status);
+
+        // Download data from device
+        if (consolidationStatus == ConsolidationStatus::None)
+        {
+            AMGX_vector_download(AmgXP, pscalar);
+        }
+        else
+        {
+            AMGX_vector_download(AmgXP, pCons);
+
+            // AMGX_vector_download invokes a device to device copy here, so it is essential that
+            // the root rank blocks the host before other ranks copy from the consolidated solution
+            CHECK(cudaDeviceSynchronize());
+        }
+    }
+
+    // If the matrix is consolidated, scatter the
+    if (consolidationStatus == ConsolidationStatus::Device)
+    {
+        // Must synchronise before each rank attempts to read from the consolidated solution
+        ierr = MPI_Barrier(devWorld); CHK;
+
+        CHECK(
+            cudaMemcpy(
+                (void **)pscalar, 
+                &pCons[rowDispls[myDevWorldRank]], 
+                sizeof(PetscScalar) * nRows, 
+                cudaMemcpyDefault
+            )
+        );
+        CHECK(cudaDeviceSynchronize());
+    }
+    else if (consolidationStatus == ConsolidationStatus::Host)
+    {
+        // Must synchronise before each rank attempts to read from the consolidated solution
+        ierr = MPI_Barrier(devWorld); CHK;
+
+        ierr = MPI_Scatterv(
+            &pCons[rowDispls[myDevWorldRank]], 
+            nRowsInDevWorld.data(), 
+            rowDispls.data(), 
+            MPI_DOUBLE, 
+            pscalar, 
+            nRows, 
+            MPI_DOUBLE, 
+            0, 
+            devWorld
+        ); CHK;
+    }
+
+    ierr = MPI_Barrier(globalCpuWorld); CHK;
+
+    PetscFunctionReturn(0);
+}
+
+
+/* \implements AmgXSolver::setMode */
+PetscErrorCode AmgXSolver::setMode(const std::string &modeStr)
+{
+    PetscFunctionBeginUser;
+
+    if (modeStr == "dDDI")
+        mode = AMGX_mode_dDDI;
+    else if (modeStr == "dDFI")
+        mode = AMGX_mode_dDFI;
+    else if (modeStr == "dFFI")
+        mode = AMGX_mode_dFFI;
+    else if (modeStr[0] == 'h')
+        SETERRQ1(MPI_COMM_WORLD, PETSC_ERR_ARG_WRONG,
+                "CPU mode, %s, is not supported in this wrapper!",
+                modeStr.c_str());
+    else
+        SETERRQ1(MPI_COMM_WORLD, PETSC_ERR_ARG_WRONG,
+                "%s is not an available mode! Available modes are: "
+                "dDDI, dDFI, dFFI.\n", modeStr.c_str());
+
+    PetscFunctionReturn(0);
+}
+
+
+/* \implements AmgXSolver::getIters */
+PetscErrorCode AmgXSolver::getIters(int &iter)
+{
+    PetscFunctionBeginUser;
+
+    // only processes using AmgX will try to get # of iterations
+    if (gpuProc == 0)
+        AMGX_solver_get_iterations_number(solver, &iter);
+
+    PetscFunctionReturn(0);
+}
+
+
+/* \implements AmgXSolver::getResidual */
+PetscErrorCode AmgXSolver::getResidual(const int &iter, double &res)
+{
+    PetscFunctionBeginUser;
+
+    // only processes using AmgX will try to get residual
+    if (gpuProc == 0)
+        AMGX_solver_get_iteration_residual(solver, iter, 0, &res);
+
+    PetscFunctionReturn(0);
+}
+
diff --git a/src/misc.cu b/src/misc.cu
deleted file mode 100644
index 86a7fa1..0000000
--- a/src/misc.cu
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * \file misc.cpp
- * \brief Definition of some member functions of the class AmgXSolver.
- * \author Pi-Yueh Chuang (pychuang@gwu.edu)
- * \date 2016-01-08
- * \copyright Copyright (c) 2015-2019 Pi-Yueh Chuang, Lorena A. Barba.
- *            This project is released under MIT License.
- */
-
-
-// AmgXWrapper
-# include "AmgXSolver.H"
-
-
-/* \implements AmgXSolver::setMode */
-PetscErrorCode AmgXSolver::setMode(const std::string &modeStr)
-{
-    PetscFunctionBeginUser;
-
-    if (modeStr == "dDDI")
-        mode = AMGX_mode_dDDI;
-    else if (modeStr == "dDFI")
-        mode = AMGX_mode_dDFI;
-    else if (modeStr == "dFFI")
-        mode = AMGX_mode_dFFI;
-    else if (modeStr[0] == 'h')
-        SETERRQ1(MPI_COMM_WORLD, PETSC_ERR_ARG_WRONG,
-                "CPU mode, %s, is not supported in this wrapper!",
-                modeStr.c_str());
-    else
-        SETERRQ1(MPI_COMM_WORLD, PETSC_ERR_ARG_WRONG,
-                "%s is not an available mode! Available modes are: "
-                "dDDI, dDFI, dFFI.\n", modeStr.c_str());
-
-    PetscFunctionReturn(0);
-}
-
-
-/* \implements AmgXSolver::getIters */
-PetscErrorCode AmgXSolver::getIters(int &iter)
-{
-    PetscFunctionBeginUser;
-
-    // only processes using AmgX will try to get # of iterations
-    if (gpuProc == 0)
-        AMGX_solver_get_iterations_number(solver, &iter);
-
-    PetscFunctionReturn(0);
-}
-
-
-/* \implements AmgXSolver::getResidual */
-PetscErrorCode AmgXSolver::getResidual(const int &iter, double &res)
-{
-    PetscFunctionBeginUser;
-
-    // only processes using AmgX will try to get residual
-    if (gpuProc == 0)
-        AMGX_solver_get_iteration_residual(solver, iter, 0, &res);
-
-    PetscFunctionReturn(0);
-}
diff --git a/src/solve.cu b/src/solve.cu
deleted file mode 100644
index f74e1a4..0000000
--- a/src/solve.cu
+++ /dev/null
@@ -1,202 +0,0 @@
-/**
- * @file solve.cpp
- * @brief Definition of member functions regarding solving in AmgXSolver.
- * @author Pi-Yueh Chuang (pychuang@gwu.edu)
- * @date 2016-01-08
- * \copyright Copyright (c) 2015-2019 Pi-Yueh Chuang, Lorena A. Barba.
- *            This project is released under MIT License.
- */
-
-
-// AmgXWrapper
-# include "AmgXSolver.H"
-
-
-/* \implements AmgXSolver::solve */
-PetscErrorCode AmgXSolver::solve(Vec &p, Vec &b)
-{
-    PetscFunctionBeginUser;
-
-    PetscErrorCode      ierr;
-
-    if (globalSize != gpuWorldSize)
-    {
-        ierr = VecScatterBegin(scatterRhs,
-                b, redistRhs, INSERT_VALUES, SCATTER_FORWARD); CHK;
-        ierr = VecScatterBegin(scatterLhs,
-                p, redistLhs, INSERT_VALUES, SCATTER_FORWARD); CHK;
-
-        ierr = VecScatterEnd(scatterRhs,
-                b, redistRhs, INSERT_VALUES, SCATTER_FORWARD); CHK;
-        ierr = VecScatterEnd(scatterLhs,
-                p, redistLhs, INSERT_VALUES, SCATTER_FORWARD); CHK;
-
-        if (gpuWorld != MPI_COMM_NULL)
-        {
-            ierr = solve_real(redistLhs, redistRhs); CHK;
-        }
-        ierr = MPI_Barrier(globalCpuWorld); CHK;
-
-        ierr = VecScatterBegin(scatterLhs,
-                redistLhs, p, INSERT_VALUES, SCATTER_REVERSE); CHK;
-        ierr = VecScatterEnd(scatterLhs,
-                redistLhs, p, INSERT_VALUES, SCATTER_REVERSE); CHK;
-    }
-    else
-    {
-        if (gpuWorld != MPI_COMM_NULL)
-        {
-            ierr = solve_real(p, b); CHK;
-        }
-        ierr = MPI_Barrier(globalCpuWorld); CHK;
-    }
-
-    PetscFunctionReturn(0);
-}
-
-
-/* \implements AmgXSolver::solve_real */
-PetscErrorCode AmgXSolver::solve_real(Vec &p, Vec &b)
-{
-    PetscFunctionBeginUser;
-
-    PetscErrorCode      ierr;
-
-    double              *unks,
-                        *rhs;
-
-    int                 size;
-
-    AMGX_SOLVE_STATUS   status;
-
-    // get size of local vector (p and b should have the same local size)
-    ierr = VecGetLocalSize(p, &size); CHK;
-
-    // get pointers to the raw data of local vectors
-    ierr = VecGetArray(p, &unks); CHK;
-    ierr = VecGetArray(b, &rhs); CHK;
-
-    // upload vectors to AmgX
-    AMGX_vector_upload(AmgXP, size, 1, unks);
-    AMGX_vector_upload(AmgXRHS, size, 1, rhs);
-
-    // solve
-    ierr = MPI_Barrier(gpuWorld); CHK;
-    AMGX_solver_solve(solver, AmgXRHS, AmgXP);
-
-    // get the status of the solver
-    AMGX_solver_get_status(solver, &status);
-
-    // check whether the solver successfully solve the problem
-    if (status != AMGX_SOLVE_SUCCESS) SETERRQ1(globalCpuWorld,
-            PETSC_ERR_CONV_FAILED, "AmgX solver failed to solve the system! "
-            "The error code is %d.\n", status);
-
-    // download data from device
-    AMGX_vector_download(AmgXP, unks);
-
-    // restore PETSc vectors
-    ierr = VecRestoreArray(p, &unks); CHK;
-    ierr = VecRestoreArray(b, &rhs); CHK;
-
-    PetscFunctionReturn(0);
-}
-
-
-/* \implements AmgXSolver::solve */
-PetscErrorCode AmgXSolver::solve(Vec& p, Vec &b, const int nRows)
-{
-    PetscFunctionBeginUser;
-
-    int ierr;
-
-    PetscScalar* pscalar;
-    PetscScalar* bscalar;
-
-    // get pointers to the raw data of local vectors
-    ierr = VecGetArray(p, &pscalar); CHK;
-    ierr = VecGetArray(b, &bscalar); CHK;
-
-    if (consolidationStatus == ConsolidationStatus::Device)
-    {
-        CHECK(cudaMemcpy((void**)&pCons[rowDispls[myDevWorldRank]], pscalar, sizeof(PetscScalar) * nRows, cudaMemcpyDefault));
-        CHECK(cudaMemcpy((void**)&rhsCons[rowDispls[myDevWorldRank]], bscalar, sizeof(PetscScalar) * nRows, cudaMemcpyDefault));
-
-        // Must synchronize here as device to device copies are non-blocking w.r.t host
-        CHECK(cudaDeviceSynchronize());
-        ierr = MPI_Barrier(devWorld); CHK;
-    }
-    else if (consolidationStatus == ConsolidationStatus::Host)
-    {
-        MPI_Request req[2];
-        ierr = MPI_Igatherv(pscalar, nRows, MPI_DOUBLE, &pCons[rowDispls[myDevWorldRank]], nRowsInDevWorld.data(), rowDispls.data(), MPI_DOUBLE, 0, devWorld, &req[0]); CHK;
-        ierr = MPI_Igatherv(bscalar, nRows, MPI_DOUBLE, &rhsCons[rowDispls[myDevWorldRank]], nRowsInDevWorld.data(), rowDispls.data(), MPI_DOUBLE, 0, devWorld, &req[1]); CHK;
-        MPI_Waitall(2, req, MPI_STATUSES_IGNORE);
-    }
-
-    if (gpuWorld != MPI_COMM_NULL)
-    {
-        // Upload potentially consolidated vectors to AmgX
-        if (consolidationStatus == ConsolidationStatus::None)
-        {
-            AMGX_vector_upload(AmgXP, nRows, 1, pscalar);
-            AMGX_vector_upload(AmgXRHS, nRows, 1, bscalar);
-        }
-        else
-        {
-            AMGX_vector_upload(AmgXP, nConsRows, 1, pCons);
-            AMGX_vector_upload(AmgXRHS, nConsRows, 1, rhsCons);
-        }
-
-        ierr = MPI_Barrier(gpuWorld); CHK;
-
-        // Solve
-        AMGX_solver_solve(solver, AmgXRHS, AmgXP);
-
-        // Get the status of the solver
-        AMGX_SOLVE_STATUS   status;
-        AMGX_solver_get_status(solver, &status);
-
-        // Check whether the solver successfully solved the problem
-        if (status != AMGX_SOLVE_SUCCESS)
-                SETERRQ1(globalCpuWorld,
-                        PETSC_ERR_CONV_FAILED, "AmgX solver failed to solve the system! "
-                                                "The error code is %d.\n",
-                        status);
-
-        // Download data from device
-        if (consolidationStatus == ConsolidationStatus::None)
-        {
-            AMGX_vector_download(AmgXP, pscalar);
-        }
-        else
-        {
-            AMGX_vector_download(AmgXP, pCons);
-
-            // AMGX_vector_download invokes a device to device copy here, so it is essential that
-            // the root rank blocks the host before other ranks copy from the consolidated solution
-            CHECK(cudaDeviceSynchronize());
-        }
-    }
-
-    // If the matrix is consolidated, scatter the
-    if (consolidationStatus == ConsolidationStatus::Device)
-    {
-        // Must synchronise before each rank attempts to read from the consolidated solution
-        ierr = MPI_Barrier(devWorld); CHK;
-
-        CHECK(cudaMemcpy((void **)pscalar, &pCons[rowDispls[myDevWorldRank]], sizeof(PetscScalar) * nRows, cudaMemcpyDefault));
-        CHECK(cudaDeviceSynchronize());
-    }
-    else if (consolidationStatus == ConsolidationStatus::Host)
-    {
-        // Must synchronise before each rank attempts to read from the consolidated solution
-        ierr = MPI_Barrier(devWorld); CHK;
-
-        ierr = MPI_Scatterv(&pCons[rowDispls[myDevWorldRank]], nRowsInDevWorld.data(), rowDispls.data(), MPI_DOUBLE, pscalar, nRows, MPI_DOUBLE, 0, devWorld); CHK;
-    }
-
-    ierr = MPI_Barrier(globalCpuWorld); CHK;
-
-    PetscFunctionReturn(0);
-}
-- 
GitLab