From e426bb090a5b325723f0e64c64655c5a2885f2a6 Mon Sep 17 00:00:00 2001 From: Simone Bna <s.bn@cineca.it> Date: Mon, 23 Nov 2020 17:54:50 +0100 Subject: [PATCH] ENH: memcpy lowVals from device in case of symmetric matrices --- src/AmgXCSRMatrix.cu | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/AmgXCSRMatrix.cu b/src/AmgXCSRMatrix.cu index 066a1cb..bb05e9d 100644 --- a/src/AmgXCSRMatrix.cu +++ b/src/AmgXCSRMatrix.cu @@ -191,10 +191,27 @@ void AmgXCSRMatrix::setValuesLDU CHECK(cudaMalloc(&valuesTmp, totalNnz * sizeof(double))); CHECK(cudaMemcpy(valuesTmp, diagVals, nrows * sizeof(double), cudaMemcpyDefault)); CHECK(cudaMemcpy(valuesTmp + nrows, upperVals, nInternalFaces * sizeof(double), cudaMemcpyDefault)); - CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, lowerVals, nInternalFaces * sizeof(double), cudaMemcpyDefault)); + // symmetric matrices + if (lowerVals == upperVals) + { + CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, + valuesTmp + nrows, + nInternalFaces * sizeof(double), + cudaMemcpyDefault)); + } + else + { + CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, + lowerVals, + nInternalFaces * sizeof(double), + cudaMemcpyDefault)); + } if (extNnz > 0) { - CHECK(cudaMemcpy(valuesTmp + localNnz, extVals, extNnz * sizeof(double), cudaMemcpyDefault)); + CHECK(cudaMemcpy(valuesTmp + localNnz, + extVals, + extNnz * sizeof(double), + cudaMemcpyDefault)); } // Concat [0, ..., n-1], upperAddr, lowerAddr (note switched) into column indices @@ -245,10 +262,27 @@ void AmgXCSRMatrix::updateValues // Copy the values in [ diag, upper, lower, (external) ] CHECK(cudaMemcpy(valuesTmp, diagVal, sizeof(double) * nrows, cudaMemcpyDefault)); CHECK(cudaMemcpy(valuesTmp + nrows, uppVal, sizeof(double) * nInternalFaces, cudaMemcpyDefault)); - CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, lowVal, sizeof(double) * nInternalFaces, cudaMemcpyDefault)); + // symmetric matrices + if (lowVal == uppVal) + { + CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, + valuesTmp + nrows, + sizeof(double) * nInternalFaces, + cudaMemcpyDefault)); + } + else + { + CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, + lowVal, + sizeof(double) * nInternalFaces, + cudaMemcpyDefault)); + } if (extNnz > 0) { - CHECK(cudaMemcpy(valuesTmp + localNnz, extVal, sizeof(double) * extNnz, cudaMemcpyDefault)); + CHECK(cudaMemcpy(valuesTmp + localNnz, + extVal, + sizeof(double) * extNnz, + cudaMemcpyDefault)); } constexpr int nthreads = 128; -- GitLab