From e426bb090a5b325723f0e64c64655c5a2885f2a6 Mon Sep 17 00:00:00 2001
From: Simone Bna <s.bn@cineca.it>
Date: Mon, 23 Nov 2020 17:54:50 +0100
Subject: [PATCH] ENH: memcpy lowVals from device in case of symmetric matrices

---
 src/AmgXCSRMatrix.cu | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/AmgXCSRMatrix.cu b/src/AmgXCSRMatrix.cu
index 066a1cb..bb05e9d 100644
--- a/src/AmgXCSRMatrix.cu
+++ b/src/AmgXCSRMatrix.cu
@@ -191,10 +191,27 @@ void AmgXCSRMatrix::setValuesLDU
     CHECK(cudaMalloc(&valuesTmp, totalNnz * sizeof(double)));
     CHECK(cudaMemcpy(valuesTmp, diagVals, nrows * sizeof(double), cudaMemcpyDefault));
     CHECK(cudaMemcpy(valuesTmp + nrows, upperVals, nInternalFaces * sizeof(double), cudaMemcpyDefault));
-    CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, lowerVals, nInternalFaces * sizeof(double), cudaMemcpyDefault));
+    // symmetric matrices
+    if (lowerVals == upperVals)
+    {
+        CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, 
+                         valuesTmp + nrows, 
+                         nInternalFaces * sizeof(double), 
+                         cudaMemcpyDefault));    
+    }
+    else
+    {
+        CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, 
+                         lowerVals, 
+                         nInternalFaces * sizeof(double), 
+                         cudaMemcpyDefault));
+    }
     if (extNnz > 0)
     {
-        CHECK(cudaMemcpy(valuesTmp + localNnz, extVals, extNnz * sizeof(double), cudaMemcpyDefault));
+        CHECK(cudaMemcpy(valuesTmp + localNnz, 
+                         extVals, 
+                         extNnz * sizeof(double), 
+                         cudaMemcpyDefault));
     }
 
     // Concat [0, ..., n-1], upperAddr, lowerAddr (note switched) into column indices
@@ -245,10 +262,27 @@ void AmgXCSRMatrix::updateValues
     // Copy the values in [ diag, upper, lower, (external) ]
     CHECK(cudaMemcpy(valuesTmp, diagVal, sizeof(double) * nrows, cudaMemcpyDefault));
     CHECK(cudaMemcpy(valuesTmp + nrows, uppVal, sizeof(double) * nInternalFaces, cudaMemcpyDefault));
-    CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, lowVal, sizeof(double) * nInternalFaces, cudaMemcpyDefault));
+    // symmetric matrices
+    if (lowVal == uppVal)
+    {
+        CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, 
+                         valuesTmp + nrows, 
+                         sizeof(double) * nInternalFaces, 
+                         cudaMemcpyDefault));
+    }
+    else
+    {
+        CHECK(cudaMemcpy(valuesTmp + nrows + nInternalFaces, 
+                         lowVal, 
+                         sizeof(double) * nInternalFaces, 
+                         cudaMemcpyDefault));
+    }
     if (extNnz > 0)
     {
-        CHECK(cudaMemcpy(valuesTmp + localNnz, extVal, sizeof(double) * extNnz, cudaMemcpyDefault));
+        CHECK(cudaMemcpy(valuesTmp + localNnz, 
+                         extVal, 
+                         sizeof(double) * extNnz, 
+                         cudaMemcpyDefault));
     }
 
     constexpr int nthreads = 128;
-- 
GitLab