From 2562d871403bb5e30ed844ae99340c32b8d82f74 Mon Sep 17 00:00:00 2001
From: Mirco Valentini <m.valentini@cineca.it>
Date: Wed, 18 Nov 2020 13:13:44 +0100
Subject: [PATCH] FIX re-enabled consolidate

---
 TODO         |  5 ++++
 src/setA.cu  | 79 +++++++++++++++-------------------------------------
 src/solve.cu |  4 +--
 3 files changed, 29 insertions(+), 59 deletions(-)
 create mode 100644 TODO

diff --git a/TODO b/TODO
new file mode 100644
index 0000000..7a1bf89
--- /dev/null
+++ b/TODO
@@ -0,0 +1,5 @@
+- return to FOAM2CSR vanilla
+- add documentation to all functions
+- change name of the library (or integrate into the external solver)
+- remove PETSc dependency
+- align to openFoam coding style
diff --git a/src/setA.cu b/src/setA.cu
index 39d514a..1007e11 100644
--- a/src/setA.cu
+++ b/src/setA.cu
@@ -412,7 +412,7 @@ PetscErrorCode AmgXSolver::setA(
     PetscFunctionBeginUser;
 
     // Merge the distributed matrix for MPI processes sharing a GPU
-    // consolidateMatrix(nLocalRows, nLocalNz, rowOffsets, colIndicesGlobal, values);
+    consolidateMatrix(nLocalRows, nLocalNz, rowOffsets, colIndicesGlobal, values);
 
     int ierr;
 
@@ -421,58 +421,23 @@ PetscErrorCode AmgXSolver::setA(
     {
         ierr = MPI_Barrier(gpuWorld); CHK;
 
-        //if (consolidationStatus == ConsolidationStatus::None)
-        //{
-        
-        
-        
-            std::cout << "Modified version :: " << ring  << std::endl;
+        if (consolidationStatus == ConsolidationStatus::None)
+        {
             AMGX_matrix_upload_all_global_32(
                 AmgXA, nGlobalRows, nLocalRows, nLocalNz,
                 1, 1, rowOffsets, colIndicesGlobal, values,
-                nullptr, 1, 1, nullptr );
-
-/*
-                int*    csr_rowOfs = (int*)    malloc( sizeof(int)*   (nrows+1) );
-    int*    csr_rowIdx = (int*)    malloc( sizeof(int)*   totalNnz );
-    int*    csr_colIdx = (int*)    malloc( sizeof(int)*   totalNnz );
-    double* csr_values = (double*) malloc( sizeof(double)*totalNnz );
-
-    CHECK(cudaMemcpy( csr_rowOfs, rowOffsets, (nrows+1) * sizeof(int),    cudaMemcpyDeviceToHost));
-    CHECK(cudaMemcpy( csr_rowIdx, rowIndices, totalNnz * sizeof(int),     cudaMemcpyDeviceToHost));
-    CHECK(cudaMemcpy( csr_colIdx, colIndices, totalNnz  * sizeof(int),    cudaMemcpyDeviceToHost));
-    CHECK(cudaMemcpy( csr_values, values,     totalNnz  * sizeof(double), cudaMemcpyDeviceToHost));
-
-    if ( diagIndexGlobal > 0 )
-    {
-      FILE* fid = fopen( "P1_CSR_rowOffs.txt", "w" );
-      for ( int i =0; i<nrows+1; i++ ) fprintf( fid, "%d\n", csr_rowOfs[i] );
-      fclose(fid);
-      fid = fopen( "P1_CSR_col.txt", "w" );
-      for ( int i =0; i<totalNnz; i++ ) fprintf( fid, "%d, %d, %g\n", csr_rowIdx[i], csr_colIdx[i], csr_values[i] );
-      fclose(fid);
-   }
-   else
-   {
-     FILE* fid = fopen( "P0_CSR_rowOffs.txt", "w" );
-     for ( int i =0; i<nrows+1; i++ ) fprintf( fid, "%d\n", csr_rowOfs[i] );
-     fclose(fid);
-     fid = fopen( "P0_CSR_col.txt", "w" );
-     for ( int i =0; i<totalNnz; i++ ) fprintf( fid, "%d, %d, %g\n", csr_rowIdx[i], csr_colIdx[i], csr_values[i] );
-     fclose(fid);
- */
+                nullptr, ring, ring, partData);
+        }
+        else
+        {
+            AMGX_matrix_upload_all_global_32(
+                AmgXA, nGlobalRows, nConsRows, nConsNz,
+                1, 1, rowOffsetsCons, colIndicesGlobalCons, valuesCons,
+                nullptr, ring, ring, partData);
 
-        //}
-        //else
-        //{
-        //    AMGX_matrix_upload_all_global_32(
-        //        AmgXA, nGlobalRows, nConsRows, nConsNz,
-        //        1, 1, rowOffsetsCons, colIndicesGlobalCons, valuesCons,
-        //        nullptr, ring, ring, partData);
-        //
-        //    // The rowOffsets and colIndices are no longer needed
-        //    freeConsStructure();
-        //}
+            // The rowOffsets and colIndices are no longer needed
+            freeConsStructure();
+        }
 
         // bind the matrix A to the solver
         ierr = MPI_Barrier(gpuWorld); CHK;
@@ -496,7 +461,7 @@ PetscErrorCode AmgXSolver::updateA(
     PetscFunctionBeginUser;
 
     // Merges the values from multiple MPI processes sharing a single GPU
-    // reconsolidateValues(nLocalNz, values);
+    reconsolidateValues(nLocalNz, values);
 
     int ierr;
     // Replace the coefficients for the CSR matrix A within AmgX
@@ -504,14 +469,14 @@ PetscErrorCode AmgXSolver::updateA(
     {
         ierr = MPI_Barrier(gpuWorld); CHK;
 
-        //if (consolidationStatus == ConsolidationStatus::None)
-        //{
+        if (consolidationStatus == ConsolidationStatus::None)
+        {
             AMGX_matrix_replace_coefficients(AmgXA, nLocalRows, nLocalNz, values, nullptr);
-        //}
-        //else
-        //{
-        //    AMGX_matrix_replace_coefficients(AmgXA, nConsRows, nConsNz, valuesCons, nullptr);
-        //}
+        }
+        else
+        {
+            AMGX_matrix_replace_coefficients(AmgXA, nConsRows, nConsNz, valuesCons, nullptr);
+        }
 
         ierr = MPI_Barrier(gpuWorld); CHK;
 
diff --git a/src/solve.cu b/src/solve.cu
index 2ef0bf7..55a33f8 100644
--- a/src/solve.cu
+++ b/src/solve.cu
@@ -109,7 +109,7 @@ PetscErrorCode AmgXSolver::solve(PetscScalar* p, const PetscScalar* b, const int
     PetscFunctionBeginUser;
 
     int ierr;
-/*
+
     if (consolidationStatus == ConsolidationStatus::Device)
     {
         CHECK(cudaMemcpy((void**)&pCons[rowDispls[myDevWorldRank]], p, sizeof(PetscScalar) * nRows, cudaMemcpyDefault));
@@ -126,7 +126,7 @@ PetscErrorCode AmgXSolver::solve(PetscScalar* p, const PetscScalar* b, const int
         ierr = MPI_Igatherv(b, nRows, MPI_DOUBLE, &rhsCons[rowDispls[myDevWorldRank]], nRowsInDevWorld.data(), rowDispls.data(), MPI_DOUBLE, 0, devWorld, &req[1]); CHK;
         MPI_Waitall(2, req, MPI_STATUSES_IGNORE);
     }
-*/
+
     if (gpuWorld != MPI_COMM_NULL)
     {
         // Upload potentially consolidated vectors to AmgX
-- 
GitLab