From 2562d871403bb5e30ed844ae99340c32b8d82f74 Mon Sep 17 00:00:00 2001 From: Mirco Valentini <m.valentini@cineca.it> Date: Wed, 18 Nov 2020 13:13:44 +0100 Subject: [PATCH] FIX re-enabled consolidate --- TODO | 5 ++++ src/setA.cu | 79 +++++++++++++++------------------------------------- src/solve.cu | 4 +-- 3 files changed, 29 insertions(+), 59 deletions(-) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 0000000..7a1bf89 --- /dev/null +++ b/TODO @@ -0,0 +1,5 @@ +- return to FOAM2CSR vanilla +- add documentation to all functions +- change name of the library (or integrate into the external solver) +- remove PETSc dependency +- align to openFoam coding style diff --git a/src/setA.cu b/src/setA.cu index 39d514a..1007e11 100644 --- a/src/setA.cu +++ b/src/setA.cu @@ -412,7 +412,7 @@ PetscErrorCode AmgXSolver::setA( PetscFunctionBeginUser; // Merge the distributed matrix for MPI processes sharing a GPU - // consolidateMatrix(nLocalRows, nLocalNz, rowOffsets, colIndicesGlobal, values); + consolidateMatrix(nLocalRows, nLocalNz, rowOffsets, colIndicesGlobal, values); int ierr; @@ -421,58 +421,23 @@ PetscErrorCode AmgXSolver::setA( { ierr = MPI_Barrier(gpuWorld); CHK; - //if (consolidationStatus == ConsolidationStatus::None) - //{ - - - - std::cout << "Modified version :: " << ring << std::endl; + if (consolidationStatus == ConsolidationStatus::None) + { AMGX_matrix_upload_all_global_32( AmgXA, nGlobalRows, nLocalRows, nLocalNz, 1, 1, rowOffsets, colIndicesGlobal, values, - nullptr, 1, 1, nullptr ); - -/* - int* csr_rowOfs = (int*) malloc( sizeof(int)* (nrows+1) ); - int* csr_rowIdx = (int*) malloc( sizeof(int)* totalNnz ); - int* csr_colIdx = (int*) malloc( sizeof(int)* totalNnz ); - double* csr_values = (double*) malloc( sizeof(double)*totalNnz ); - - CHECK(cudaMemcpy( csr_rowOfs, rowOffsets, (nrows+1) * sizeof(int), cudaMemcpyDeviceToHost)); - CHECK(cudaMemcpy( csr_rowIdx, rowIndices, totalNnz * sizeof(int), cudaMemcpyDeviceToHost)); - CHECK(cudaMemcpy( csr_colIdx, colIndices, totalNnz * sizeof(int), cudaMemcpyDeviceToHost)); - CHECK(cudaMemcpy( csr_values, values, totalNnz * sizeof(double), cudaMemcpyDeviceToHost)); - - if ( diagIndexGlobal > 0 ) - { - FILE* fid = fopen( "P1_CSR_rowOffs.txt", "w" ); - for ( int i =0; i<nrows+1; i++ ) fprintf( fid, "%d\n", csr_rowOfs[i] ); - fclose(fid); - fid = fopen( "P1_CSR_col.txt", "w" ); - for ( int i =0; i<totalNnz; i++ ) fprintf( fid, "%d, %d, %g\n", csr_rowIdx[i], csr_colIdx[i], csr_values[i] ); - fclose(fid); - } - else - { - FILE* fid = fopen( "P0_CSR_rowOffs.txt", "w" ); - for ( int i =0; i<nrows+1; i++ ) fprintf( fid, "%d\n", csr_rowOfs[i] ); - fclose(fid); - fid = fopen( "P0_CSR_col.txt", "w" ); - for ( int i =0; i<totalNnz; i++ ) fprintf( fid, "%d, %d, %g\n", csr_rowIdx[i], csr_colIdx[i], csr_values[i] ); - fclose(fid); - */ + nullptr, ring, ring, partData); + } + else + { + AMGX_matrix_upload_all_global_32( + AmgXA, nGlobalRows, nConsRows, nConsNz, + 1, 1, rowOffsetsCons, colIndicesGlobalCons, valuesCons, + nullptr, ring, ring, partData); - //} - //else - //{ - // AMGX_matrix_upload_all_global_32( - // AmgXA, nGlobalRows, nConsRows, nConsNz, - // 1, 1, rowOffsetsCons, colIndicesGlobalCons, valuesCons, - // nullptr, ring, ring, partData); - // - // // The rowOffsets and colIndices are no longer needed - // freeConsStructure(); - //} + // The rowOffsets and colIndices are no longer needed + freeConsStructure(); + } // bind the matrix A to the solver ierr = MPI_Barrier(gpuWorld); CHK; @@ -496,7 +461,7 @@ PetscErrorCode AmgXSolver::updateA( PetscFunctionBeginUser; // Merges the values from multiple MPI processes sharing a single GPU - // reconsolidateValues(nLocalNz, values); + reconsolidateValues(nLocalNz, values); int ierr; // Replace the coefficients for the CSR matrix A within AmgX @@ -504,14 +469,14 @@ PetscErrorCode AmgXSolver::updateA( { ierr = MPI_Barrier(gpuWorld); CHK; - //if (consolidationStatus == ConsolidationStatus::None) - //{ + if (consolidationStatus == ConsolidationStatus::None) + { AMGX_matrix_replace_coefficients(AmgXA, nLocalRows, nLocalNz, values, nullptr); - //} - //else - //{ - // AMGX_matrix_replace_coefficients(AmgXA, nConsRows, nConsNz, valuesCons, nullptr); - //} + } + else + { + AMGX_matrix_replace_coefficients(AmgXA, nConsRows, nConsNz, valuesCons, nullptr); + } ierr = MPI_Barrier(gpuWorld); CHK; diff --git a/src/solve.cu b/src/solve.cu index 2ef0bf7..55a33f8 100644 --- a/src/solve.cu +++ b/src/solve.cu @@ -109,7 +109,7 @@ PetscErrorCode AmgXSolver::solve(PetscScalar* p, const PetscScalar* b, const int PetscFunctionBeginUser; int ierr; -/* + if (consolidationStatus == ConsolidationStatus::Device) { CHECK(cudaMemcpy((void**)&pCons[rowDispls[myDevWorldRank]], p, sizeof(PetscScalar) * nRows, cudaMemcpyDefault)); @@ -126,7 +126,7 @@ PetscErrorCode AmgXSolver::solve(PetscScalar* p, const PetscScalar* b, const int ierr = MPI_Igatherv(b, nRows, MPI_DOUBLE, &rhsCons[rowDispls[myDevWorldRank]], nRowsInDevWorld.data(), rowDispls.data(), MPI_DOUBLE, 0, devWorld, &req[1]); CHK; MPI_Waitall(2, req, MPI_STATUSES_IGNORE); } -*/ + if (gpuWorld != MPI_COMM_NULL) { // Upload potentially consolidated vectors to AmgX -- GitLab