Skip to content

Commit 4046ebc

Browse files
authored
Merge Add Multigrid solver (except for dpcpp), examples, build_smoother shortcut
This pr mainly adds the multigrid solver (except for dpcpp) with different cycles and build_smoother shortcut Summary: - add multigrid solver - add skip_ordering in amgx_pgm - add use multigrid as preconditioner example - add use mixed-precision multigrid level in multigrid example - add build_smoother shortcut - multigrid contains v,f,w,k(gcr/fcg) cycle - multigrid allows selection of multigrid level, smoother, or coarsest solver. Related PR: #542
2 parents 2bf7411 + cb08a25 commit 4046ebc

65 files changed

Lines changed: 6752 additions & 19 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/*******************************<GINKGO LICENSE>******************************
2+
Copyright (c) 2017-2021, the Ginkgo authors
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions
7+
are met:
8+
9+
1. Redistributions of source code must retain the above copyright
10+
notice, this list of conditions and the following disclaimer.
11+
12+
2. Redistributions in binary form must reproduce the above copyright
13+
notice, this list of conditions and the following disclaimer in the
14+
documentation and/or other materials provided with the distribution.
15+
16+
3. Neither the name of the copyright holder nor the names of its
17+
contributors may be used to endorse or promote products derived from
18+
this software without specific prior written permission.
19+
20+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21+
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22+
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
******************************<GINKGO LICENSE>*******************************/
32+
33+
34+
// grid_nrows is the number of rows handled in the whole grid at the same time.
35+
// Thus, the threads whose index is larger than grid_nrows * nrhs are not used.
36+
// Let the thread handle the same col (has same scalar) in whole loop.
37+
template <typename ValueType>
38+
__global__ __launch_bounds__(default_block_size) void kcycle_step_1_kernel(
39+
const size_type num_rows, const size_type nrhs, const size_type stride,
40+
const size_type grid_nrows, const ValueType *__restrict__ alpha,
41+
const ValueType *__restrict__ rho, const ValueType *__restrict__ v,
42+
ValueType *__restrict__ g, ValueType *__restrict__ d,
43+
ValueType *__restrict__ e)
44+
{
45+
const auto tidx = thread::get_thread_id_flat();
46+
const auto col = tidx % nrhs;
47+
const auto num_elems = grid_nrows * nrhs;
48+
if (tidx >= num_elems) {
49+
return;
50+
}
51+
const auto total_elems = num_rows * stride;
52+
const auto grid_stride = grid_nrows * stride;
53+
const auto temp = alpha[col] / rho[col];
54+
const bool update = is_finite(temp);
55+
for (auto idx = tidx / nrhs * stride + col; idx < total_elems;
56+
idx += grid_stride) {
57+
auto store_e = e[idx];
58+
if (update) {
59+
g[idx] -= temp * v[idx];
60+
store_e *= temp;
61+
e[idx] = store_e;
62+
}
63+
d[idx] = store_e;
64+
}
65+
}
66+
67+
68+
template <typename ValueType>
69+
__global__ __launch_bounds__(default_block_size) void kcycle_step_2_kernel(
70+
const size_type num_rows, const size_type nrhs, const size_type stride,
71+
const size_type grid_nrows, const ValueType *__restrict__ alpha,
72+
const ValueType *__restrict__ rho, const ValueType *__restrict__ gamma,
73+
const ValueType *__restrict__ beta, const ValueType *__restrict__ zeta,
74+
const ValueType *__restrict__ d, ValueType *__restrict__ e)
75+
{
76+
const auto tidx = thread::get_thread_id_flat();
77+
const auto col = tidx % nrhs;
78+
const auto num_elems = grid_nrows * nrhs;
79+
if (tidx >= num_elems) {
80+
return;
81+
}
82+
const auto total_elems = num_rows * stride;
83+
const auto grid_stride = grid_nrows * stride;
84+
const auto scalar_d =
85+
zeta[col] / (beta[col] - gamma[col] * gamma[col] / rho[col]);
86+
const auto scalar_e = one<ValueType>() - gamma[col] / alpha[col] * scalar_d;
87+
if (is_finite(scalar_d) && is_finite(scalar_e)) {
88+
for (auto idx = tidx / nrhs * stride + col; idx < total_elems;
89+
idx += grid_stride) {
90+
e[idx] = scalar_e * e[idx] + scalar_d * d[idx];
91+
}
92+
}
93+
}
94+
95+
96+
template <typename ValueType>
97+
__global__ __launch_bounds__(default_block_size) void kcycle_check_stop_kernel(
98+
const size_type nrhs, const ValueType *__restrict__ old_norm,
99+
const ValueType *__restrict__ new_norm, const ValueType rel_tol,
100+
bool *__restrict__ is_stop)
101+
{
102+
auto tidx = thread::get_thread_id_flat();
103+
if (tidx >= nrhs) {
104+
return;
105+
}
106+
if (new_norm[tidx] > rel_tol * old_norm[tidx]) {
107+
*is_stop = false;
108+
}
109+
}

core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ target_sources(ginkgo
4545
solver/idr.cpp
4646
solver/ir.cpp
4747
solver/lower_trs.cpp
48+
solver/multigrid.cpp
4849
solver/upper_trs.cpp
4950
stop/combined.cpp
5051
stop/criterion.cpp

core/device_hooks/common_kernels.inc.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6868
#include "core/solver/idr_kernels.hpp"
6969
#include "core/solver/ir_kernels.hpp"
7070
#include "core/solver/lower_trs_kernels.hpp"
71+
#include "core/solver/multigrid_kernels.hpp"
7172
#include "core/solver/upper_trs_kernels.hpp"
7273
#include "core/stop/criterion_kernels.hpp"
7374
#include "core/stop/residual_norm_kernels.hpp"
@@ -100,6 +101,7 @@ template <typename IndexType>
100101
GKO_DECLARE_FILL_ARRAY_KERNEL(IndexType)
101102
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
102103
GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL);
104+
template GKO_DECLARE_FILL_ARRAY_KERNEL(bool);
103105

104106
template <typename IndexType>
105107
GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL(IndexType)
@@ -645,6 +647,29 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE);
645647
} // namespace ir
646648

647649

650+
namespace multigrid {
651+
652+
653+
template <typename ValueType>
654+
GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL(ValueType)
655+
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
656+
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL);
657+
658+
template <typename ValueType>
659+
GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL(ValueType)
660+
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
661+
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL);
662+
663+
template <typename ValueType>
664+
GKO_DECLARE_MULTIGRID_KCYCLE_CHECK_STOP_KERNEL(ValueType)
665+
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
666+
GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE(
667+
GKO_DECLARE_MULTIGRID_KCYCLE_CHECK_STOP_KERNEL);
668+
669+
670+
} // namespace multigrid
671+
672+
648673
namespace sparsity_csr {
649674

650675

core/multigrid/amgx_pgm.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4444
#include <ginkgo/core/matrix/identity.hpp>
4545

4646

47+
#include "core/base/utils.hpp"
4748
#include "core/components/fill_array.hpp"
4849
#include "core/matrix/csr_builder.hpp"
4950
#include "core/multigrid/amgx_pgm_kernels.hpp"
@@ -81,14 +82,13 @@ void AmgxPgm<ValueType, IndexType>::generate()
8182
Array<IndexType> intermediate_agg(this->get_executor(),
8283
parameters_.deterministic * num_rows);
8384
// Only support csr matrix currently.
84-
const matrix_type *amgxpgm_op = nullptr;
85-
// Store the csr matrix if needed
86-
auto amgxpgm_op_unique_ptr = matrix_type::create(exec);
87-
amgxpgm_op = dynamic_cast<const matrix_type *>(system_matrix_.get());
88-
if (!amgxpgm_op) {
89-
// if original matrix is not csr, converting it to csr.
90-
as<ConvertibleTo<matrix_type>>(this->system_matrix_.get())
91-
->convert_to(amgxpgm_op_unique_ptr.get());
85+
const matrix_type *amgxpgm_op =
86+
dynamic_cast<const matrix_type *>(system_matrix_.get());
87+
std::shared_ptr<const matrix_type> amgxpgm_op_unique_ptr{};
88+
// If system matrix is not csr or need sorting, generate the csr.
89+
if (!parameters_.skip_sorting || !amgxpgm_op) {
90+
amgxpgm_op_unique_ptr = convert_to_with_sorting<matrix_type>(
91+
exec, system_matrix_, parameters_.skip_sorting);
9292
amgxpgm_op = amgxpgm_op_unique_ptr.get();
9393
}
9494

0 commit comments

Comments
 (0)