diff --git a/src/AmgXCSRMatrix.cu b/src/AmgXCSRMatrix.cu index 066a1cbf37c240906bc67e48682d24728039a2cd..bb05e9d4d77a4ebdc2947dccb05a0cbc47b3a4e3 100644 --- a/src/AmgXCSRMatrix.cu +++ b/src/AmgXCSRMatrix.cu @@ -191,10 +191,27 @@ void AmgXCSRMatrix::setValuesLDU CHECK(cudaMalloc(&valuesTmp, totalNnz * sizeof(double))); CHECK(cudaMemcpy(valuesTmp, diagVals, nrows * sizeof(double), cudaMemcpyDefault)); CHECK(cudaMemcpy(valuesTmp + nrows, upperVals, nInternalFaces * sizeof(double), cudaMemcpyDefault)); - CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, lowerVals, nInternalFaces * sizeof(double), cudaMemcpyDefault)); + // symmetric matrices + if (lowerVals == upperVals) + { + CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, + valuesTmp + nrows, + nInternalFaces * sizeof(double), + cudaMemcpyDefault)); + } + else + { + CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, + lowerVals, + nInternalFaces * sizeof(double), + cudaMemcpyDefault)); + } if (extNnz > 0) { - CHECK(cudaMemcpy(valuesTmp + localNnz, extVals, extNnz * sizeof(double), cudaMemcpyDefault)); + CHECK(cudaMemcpy(valuesTmp + localNnz, + extVals, + extNnz * sizeof(double), + cudaMemcpyDefault)); } // Concat [0, ..., n-1], upperAddr, lowerAddr (note switched) into column indices @@ -245,10 +262,27 @@ void AmgXCSRMatrix::updateValues // Copy the values in [ diag, upper, lower, (external) ] CHECK(cudaMemcpy(valuesTmp, diagVal, sizeof(double) * nrows, cudaMemcpyDefault)); CHECK(cudaMemcpy(valuesTmp + nrows, uppVal, sizeof(double) * nInternalFaces, cudaMemcpyDefault)); - CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, lowVal, sizeof(double) * nInternalFaces, cudaMemcpyDefault)); + // symmetric matrices + if (lowVal == uppVal) + { + CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, + valuesTmp + nrows, + sizeof(double) * nInternalFaces, + cudaMemcpyDefault)); + } + else + { + CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, + lowVal, + sizeof(double) * nInternalFaces, + cudaMemcpyDefault)); + } if (extNnz > 0) { - CHECK(cudaMemcpy(valuesTmp + localNnz, extVal, sizeof(double) * extNnz, cudaMemcpyDefault)); + CHECK(cudaMemcpy(valuesTmp + localNnz, + extVal, + sizeof(double) * extNnz, + cudaMemcpyDefault)); } constexpr int nthreads = 128;