ginkgo-project
diff --git a/‎common/cuda_hip/solver/multigrid_kernels.hpp.inc‎
Lines changed: 109 additions & 0 deletions b/‎common/cuda_hip/solver/multigrid_kernels.hpp.inc‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎core/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎core/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/device_hooks/common_kernels.inc.cpp‎
Lines changed: 25 additions & 0 deletions b/‎core/device_hooks/common_kernels.inc.cpp‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎core/multigrid/amgx_pgm.cpp‎
Lines changed: 8 additions & 8 deletions b/‎core/multigrid/amgx_pgm.cpp‎
Lines changed: 8 additions & 8 deletions
@@ -0,0 +1,109 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2021, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+// grid_nrows is the number of rows handled in the whole grid at the same time.
+// Thus, the threads whose index is larger than grid_nrows * nrhs are not used.
+// Let the thread handle the same col (has same scalar) in whole loop.
+template <typename ValueType>
+__global__ __launch_bounds__(default_block_size) void kcycle_step_1_kernel(
+    const size_type num_rows, const size_type nrhs, const size_type stride,
+    const size_type grid_nrows, const ValueType *__restrict__ alpha,
+    const ValueType *__restrict__ rho, const ValueType *__restrict__ v,
+    ValueType *__restrict__ g, ValueType *__restrict__ d,
+    ValueType *__restrict__ e)
+{
+    const auto tidx = thread::get_thread_id_flat();
+    const auto col = tidx % nrhs;
+    const auto num_elems = grid_nrows * nrhs;
+    if (tidx >= num_elems) {
+        return;
+    }
+    const auto total_elems = num_rows * stride;
+    const auto grid_stride = grid_nrows * stride;
+    const auto temp = alpha[col] / rho[col];
+    const bool update = is_finite(temp);
+    for (auto idx = tidx / nrhs * stride + col; idx < total_elems;
+         idx += grid_stride) {
+        auto store_e = e[idx];
+        if (update) {
+            g[idx] -= temp * v[idx];
+            store_e *= temp;
+            e[idx] = store_e;
+        }
+        d[idx] = store_e;
+    }
+}
+
+
+template <typename ValueType>
+__global__ __launch_bounds__(default_block_size) void kcycle_step_2_kernel(
+    const size_type num_rows, const size_type nrhs, const size_type stride,
+    const size_type grid_nrows, const ValueType *__restrict__ alpha,
+    const ValueType *__restrict__ rho, const ValueType *__restrict__ gamma,
+    const ValueType *__restrict__ beta, const ValueType *__restrict__ zeta,
+    const ValueType *__restrict__ d, ValueType *__restrict__ e)
+{
+    const auto tidx = thread::get_thread_id_flat();
+    const auto col = tidx % nrhs;
+    const auto num_elems = grid_nrows * nrhs;
+    if (tidx >= num_elems) {
+        return;
+    }
+    const auto total_elems = num_rows * stride;
+    const auto grid_stride = grid_nrows * stride;
+    const auto scalar_d =
+        zeta[col] / (beta[col] - gamma[col] * gamma[col] / rho[col]);
+    const auto scalar_e = one<ValueType>() - gamma[col] / alpha[col] * scalar_d;
+    if (is_finite(scalar_d) && is_finite(scalar_e)) {
+        for (auto idx = tidx / nrhs * stride + col; idx < total_elems;
+             idx += grid_stride) {
+            e[idx] = scalar_e * e[idx] + scalar_d * d[idx];
+        }
+    }
+}
+
+
+template <typename ValueType>
+__global__ __launch_bounds__(default_block_size) void kcycle_check_stop_kernel(
+    const size_type nrhs, const ValueType *__restrict__ old_norm,
+    const ValueType *__restrict__ new_norm, const ValueType rel_tol,
+    bool *__restrict__ is_stop)
+{
+    auto tidx = thread::get_thread_id_flat();
+    if (tidx >= nrhs) {
+        return;
+    }
+    if (new_norm[tidx] > rel_tol * old_norm[tidx]) {
+        *is_stop = false;
+    }
+}
@@ -45,6 +45,7 @@ target_sources(ginkgo
     solver/idr.cpp
     solver/ir.cpp
     solver/lower_trs.cpp
+    solver/multigrid.cpp
     solver/upper_trs.cpp
     stop/combined.cpp
     stop/criterion.cpp
 
@@ -68,6 +68,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/solver/idr_kernels.hpp"
 #include "core/solver/ir_kernels.hpp"
 #include "core/solver/lower_trs_kernels.hpp"
+#include "core/solver/multigrid_kernels.hpp"
 #include "core/solver/upper_trs_kernels.hpp"
 #include "core/stop/criterion_kernels.hpp"
 #include "core/stop/residual_norm_kernels.hpp"
@@ -100,6 +101,7 @@ template <typename IndexType>
 GKO_DECLARE_FILL_ARRAY_KERNEL(IndexType)
 GKO_NOT_COMPILED(GKO_HOOK_MODULE);
 GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL);
+template GKO_DECLARE_FILL_ARRAY_KERNEL(bool);
 
 template <typename IndexType>
 GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL(IndexType)
@@ -645,6 +647,29 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE);
 }  // namespace ir
 
 
+namespace multigrid {
+
+
+template <typename ValueType>
+GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL(ValueType)
+GKO_NOT_COMPILED(GKO_HOOK_MODULE);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL);
+
+template <typename ValueType>
+GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL(ValueType)
+GKO_NOT_COMPILED(GKO_HOOK_MODULE);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL);
+
+template <typename ValueType>
+GKO_DECLARE_MULTIGRID_KCYCLE_CHECK_STOP_KERNEL(ValueType)
+GKO_NOT_COMPILED(GKO_HOOK_MODULE);
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE(
+    GKO_DECLARE_MULTIGRID_KCYCLE_CHECK_STOP_KERNEL);
+
+
+}  // namespace multigrid
+
+
 namespace sparsity_csr {
 
 
 
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/identity.hpp>
 
 
+#include "core/base/utils.hpp"
 #include "core/components/fill_array.hpp"
 #include "core/matrix/csr_builder.hpp"
 #include "core/multigrid/amgx_pgm_kernels.hpp"
@@ -81,14 +82,13 @@ void AmgxPgm<ValueType, IndexType>::generate()
     Array<IndexType> intermediate_agg(this->get_executor(),
                                       parameters_.deterministic * num_rows);
     // Only support csr matrix currently.
-    const matrix_type *amgxpgm_op = nullptr;
-    // Store the csr matrix if needed
-    auto amgxpgm_op_unique_ptr = matrix_type::create(exec);
-    amgxpgm_op = dynamic_cast<const matrix_type *>(system_matrix_.get());
-    if (!amgxpgm_op) {
-        // if original matrix is not csr, converting it to csr.
-        as<ConvertibleTo<matrix_type>>(this->system_matrix_.get())
-            ->convert_to(amgxpgm_op_unique_ptr.get());
+    const matrix_type *amgxpgm_op =
+        dynamic_cast<const matrix_type *>(system_matrix_.get());
+    std::shared_ptr<const matrix_type> amgxpgm_op_unique_ptr{};
+    // If system matrix is not csr or need sorting, generate the csr.
+    if (!parameters_.skip_sorting || !amgxpgm_op) {
+        amgxpgm_op_unique_ptr = convert_to_with_sorting<matrix_type>(
+            exec, system_matrix_, parameters_.skip_sorting);
         amgxpgm_op = amgxpgm_op_unique_ptr.get();
     }