From f5bd86830c2be7e7a189fa11882d9c6d8d4b7de9 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Mon, 14 Oct 2019 12:04:04 +0200 Subject: [PATCH 01/11] move the spmv and add the solver name --- benchmark/solver/solver.cpp | 40 ++++--- benchmark/spmv/spmv.cpp | 54 +-------- benchmark/utils/common.hpp | 136 ++++++++++++++++++++++ benchmark/{spmv => utils}/cuda_linops.hpp | 6 +- benchmark/utils/general.hpp | 11 +- benchmark/utils/spmv_common.hpp | 40 +------ 6 files changed, 176 insertions(+), 111 deletions(-) create mode 100644 benchmark/utils/common.hpp rename benchmark/{spmv => utils}/cuda_linops.hpp (99%) diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 591fb1a3423..de96ffb0ba5 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -58,7 +58,8 @@ DEFINE_double(rel_res_goal, 1e-6, "The relative residual goal of the solver"); DEFINE_string(solvers, "cg", "A comma-separated list of solvers to run." - "Supported values are: cg, bicgstab, cgs, fcg"); + "Supported values are: bicgstab, cg, cgs, fcg, gmres, ir, " + "lower_trs, upper_trs"); DEFINE_string(preconditioners, "none", "A comma-separated list of preconditioners to use." @@ -66,8 +67,7 @@ DEFINE_string(preconditioners, "none", // input validation -[[noreturn]] void print_config_error_and_exit() -{ +[[noreturn]] void print_config_error_and_exit() { std::cerr << "Input has to be a JSON array of matrix configurations:\n" << " [\n" << " { \"filename\": \"my_file.mtx\", \"optimal\": { " @@ -120,8 +120,8 @@ std::unique_ptr create_solver( const std::map( std::shared_ptr, std::shared_ptr)>> - solver_factory{{"cg", create_solver>}, - {"bicgstab", create_solver>}, + solver_factory{{"bicgstab", create_solver>}, + {"cg", create_solver>}, {"cgs", create_solver>}, {"fcg", create_solver>}, {"gmres", create_solver>}}; @@ -249,8 +249,8 @@ void solve_system(const std::string &solver_name, rapidjson::Value(rapidjson::kArrayType), allocator); add_or_set_member(solver_json, "true_residuals", rapidjson::Value(rapidjson::kArrayType), allocator); - auto rhs_norm = compute_norm(lend(b)); - add_or_set_member(solver_json, "rhs_norm", rhs_norm, allocator); + // auto rhs_norm = compute_norm(lend(b)); + // add_or_set_member(solver_json, "rhs_norm", rhs_norm, allocator); for (auto stage : {"generate", "apply"}) { add_or_set_member(solver_json, stage, rapidjson::Value(rapidjson::kObjectType), @@ -287,11 +287,11 @@ void solve_system(const std::string &solver_name, auto apply_logger = std::make_shared(exec); exec->add_logger(apply_logger); - auto res_logger = std::make_shared>( - exec, lend(system_matrix), b, - solver_json["recurrent_residuals"], - solver_json["true_residuals"], allocator); - solver->add_logger(res_logger); + // auto res_logger = std::make_shared>( + // exec, lend(system_matrix), b, + // solver_json["recurrent_residuals"], + // solver_json["true_residuals"], allocator); + // solver->add_logger(res_logger); solver->apply(lend(b), lend(x_clone)); @@ -332,10 +332,11 @@ void solve_system(const std::string &solver_name, add_or_set_member(solver_json["apply"], "time", apply_time.count(), allocator); - auto residual = compute_residual_norm(lend(system_matrix), lend(b), - lend(x_clone)); - add_or_set_member(solver_json, "residual_norm", residual, - allocator); + // auto residual = compute_residual_norm(lend(system_matrix), + // lend(b), + // lend(x_clone)); + // add_or_set_member(solver_json, "residual_norm", residual, + // allocator); } // compute and write benchmark data @@ -411,9 +412,10 @@ int main(int argc, char *argv[]) auto system_matrix = share(matrix_factory.at( test_case["optimal"]["spmv"].GetString())(exec, test_case)); - auto b = create_vector(exec, system_matrix->get_size()[0], - engine); - auto x = create_vector(exec, system_matrix->get_size()[0]); + auto b = create_matrix( + exec, gko::dim<2>{system_matrix->get_size()[0], 1}, engine); + auto x = create_matrix( + exec, gko::dim<2>{system_matrix->get_size()[0], 1}); std::clog << "Matrix is of size (" << system_matrix->get_size()[0] << ", " << system_matrix->get_size()[1] << ")" diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp index 0339d5cc26e..529ce7f319d 100644 --- a/benchmark/spmv/spmv.cpp +++ b/benchmark/spmv/spmv.cpp @@ -43,16 +43,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "benchmark/utils/common.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/spmv_common.hpp" -#ifdef HAS_CUDA -#include "cuda_linops.hpp" -#endif // HAS_CUDA - - using etype = double; @@ -94,54 +90,6 @@ DEFINE_string( DEFINE_uint32(nrhs, 1, "The number of right hand sides"); -const std::map( - std::shared_ptr, - const gko::matrix_data<> &)>> - matrix_factory{ - {"csr", READ_MATRIX(csr, std::make_shared())}, - {"csri", READ_MATRIX(csr, std::make_shared())}, - {"csrm", READ_MATRIX(csr, std::make_shared())}, - {"csrc", READ_MATRIX(csr, std::make_shared())}, - {"coo", read_matrix_from_data>}, - {"ell", read_matrix_from_data>}, -#ifdef HAS_CUDA - {"cusp_csr", read_matrix_from_data}, - {"cusp_csrmp", read_matrix_from_data}, - {"cusp_csrex", read_matrix_from_data}, - {"cusp_csrmm", read_matrix_from_data}, - {"cusp_hybrid", read_matrix_from_data}, - {"cusp_coo", read_matrix_from_data}, - {"cusp_ell", read_matrix_from_data}, -#endif // HAS_CUDA - {"hybrid", read_matrix_from_data}, - {"hybrid0", - READ_MATRIX(hybrid, std::make_shared(0))}, - {"hybrid25", - READ_MATRIX(hybrid, std::make_shared(0.25))}, - {"hybrid33", - READ_MATRIX(hybrid, - std::make_shared(1.0 / 3.0))}, - {"hybrid40", - READ_MATRIX(hybrid, std::make_shared(0.4))}, - {"hybrid60", - READ_MATRIX(hybrid, std::make_shared(0.6))}, - {"hybrid80", - READ_MATRIX(hybrid, std::make_shared(0.8))}, - {"hybridlimit0", - READ_MATRIX(hybrid, - std::make_shared(0))}, - {"hybridlimit25", - READ_MATRIX(hybrid, - std::make_shared(0.25))}, - {"hybridlimit33", - READ_MATRIX(hybrid, std::make_shared( - 1.0 / 3.0))}, - {"hybridminstorage", - READ_MATRIX(hybrid, - std::make_shared())}, - {"sellp", read_matrix_from_data>}}; - - // This function supposes that management of `FLAGS_overwrite` is done before // calling it void apply_spmv(const char *format_name, std::shared_ptr exec, diff --git a/benchmark/utils/common.hpp b/benchmark/utils/common.hpp new file mode 100644 index 00000000000..8f221063fe0 --- /dev/null +++ b/benchmark/utils/common.hpp @@ -0,0 +1,136 @@ +/************************************************************* +Copyright (c) 2017-2019, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_BENCHMARK_UTILS_COMMON_HPP_ +#define GKO_BENCHMARK_UTILS_COMMON_HPP_ + + +#include + + +#include + + +#ifdef HAS_CUDA +#include "cuda_linops.hpp" +#endif // HAS_CUDA + + +// some shortcuts +using hybrid = gko::matrix::Hybrid<>; +using csr = gko::matrix::Csr<>; + + +/** + * Creates a Ginkgo matrix from the intermediate data representation format + * gko::matrix_data. + * + * @param exec the executor where the matrix will be put + * @param data the data represented in the intermediate representation format + * + * @tparam MatrixType the Ginkgo matrix type (such as `gko::matrix::Csr<>`) + * + * @return a `unique_pointer` to the created matrix + */ +template +std::unique_ptr read_matrix_from_data( + std::shared_ptr exec, const gko::matrix_data<> &data) +{ + auto mat = MatrixType::create(std::move(exec)); + mat->read(data); + return mat; +} + +/** + * Creates a Ginkgo matrix from the intermediate data representation format + * gko::matrix_data with support for variable arguments. + * + * @param MATRIX_TYPE the Ginkgo matrix type (such as `gko::matrix::Csr<>`) + */ +#define READ_MATRIX(MATRIX_TYPE, ...) \ + [](std::shared_ptr exec, \ + const gko::matrix_data<> &data) -> std::unique_ptr { \ + auto mat = MATRIX_TYPE::create(std::move(exec), __VA_ARGS__); \ + mat->read(data); \ + return mat; \ + } + + +const std::map( + std::shared_ptr, + const gko::matrix_data<> &)>> + matrix_factory{ + {"csr", READ_MATRIX(csr, std::make_shared())}, + {"csri", READ_MATRIX(csr, std::make_shared())}, + {"csrm", READ_MATRIX(csr, std::make_shared())}, + {"csrc", READ_MATRIX(csr, std::make_shared())}, + {"coo", read_matrix_from_data>}, + {"ell", read_matrix_from_data>}, +#ifdef HAS_CUDA + {"cusp_csr", read_matrix_from_data}, + {"cusp_csrmp", read_matrix_from_data}, + {"cusp_csrex", read_matrix_from_data}, + {"cusp_csrmm", read_matrix_from_data}, + {"cusp_hybrid", read_matrix_from_data}, + {"cusp_coo", read_matrix_from_data}, + {"cusp_ell", read_matrix_from_data}, +#endif // HAS_CUDA + {"hybrid", read_matrix_from_data}, + {"hybrid0", + READ_MATRIX(hybrid, std::make_shared(0))}, + {"hybrid25", + READ_MATRIX(hybrid, std::make_shared(0.25))}, + {"hybrid33", + READ_MATRIX(hybrid, + std::make_shared(1.0 / 3.0))}, + {"hybrid40", + READ_MATRIX(hybrid, std::make_shared(0.4))}, + {"hybrid60", + READ_MATRIX(hybrid, std::make_shared(0.6))}, + {"hybrid80", + READ_MATRIX(hybrid, std::make_shared(0.8))}, + {"hybridlimit0", + READ_MATRIX(hybrid, + std::make_shared(0))}, + {"hybridlimit25", + READ_MATRIX(hybrid, + std::make_shared(0.25))}, + {"hybridlimit33", + READ_MATRIX(hybrid, std::make_shared( + 1.0 / 3.0))}, + {"hybridminstorage", + READ_MATRIX(hybrid, + std::make_shared())}, + {"sellp", read_matrix_from_data>}}; + + +#endif // GKO_BENCHMARK_UTILS_COMMON_HPP_ \ No newline at end of file diff --git a/benchmark/spmv/cuda_linops.hpp b/benchmark/utils/cuda_linops.hpp similarity index 99% rename from benchmark/spmv/cuda_linops.hpp rename to benchmark/utils/cuda_linops.hpp index 087101aa95b..41dc93c1ce3 100644 --- a/benchmark/spmv/cuda_linops.hpp +++ b/benchmark/utils/cuda_linops.hpp @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_BENCHMARK_SPMV_CUDA_LINOPS_HPP_ -#define GKO_BENCHMARK_SPMV_CUDA_LINOPS_HPP_ +#ifndef GKO_BENCHMARK_UTILS_CUDA_LINOPS_HPP_ +#define GKO_BENCHMARK_UTILS_CUDA_LINOPS_HPP_ #include @@ -477,4 +477,4 @@ using cusp_ell = detail::CuspHybrid; using cusp_hybrid = detail::CuspHybrid<>; -#endif // GKO_BENCHMARK_SPMV_CUDA_LINOPS_HPP_ +#endif // GKO_BENCHMARK_UTILS_CUDA_LINOPS_HPP_ diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index ed2f3985192..267109f0dcc 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -280,6 +280,15 @@ std::unique_ptr> create_vector( return res; } +template +std::unique_ptr> create_matrix( + std::shared_ptr exec, gko::dim<2> size) +{ + auto res = vec::create(exec); + res->read(gko::matrix_data(size)); + return res; +} + // creates a random matrix template @@ -335,4 +344,4 @@ double compute_residual_norm(const gko::LinOp *system_matrix, } -#endif // GKO_BENCHMARK_UTILS_HPP_ +#endif // GKO_BENCHMARK_UTILS_GENERAL_HPP_ \ No newline at end of file diff --git a/benchmark/utils/spmv_common.hpp b/benchmark/utils/spmv_common.hpp index fe7ce520957..f027d52c0ce 100644 --- a/benchmark/utils/spmv_common.hpp +++ b/benchmark/utils/spmv_common.hpp @@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#ifndef GKO_BENCHMARK_UTILS_SPMV_COMMON_HPP_ +#define GKO_BENCHMARK_UTILS_SPMV_COMMON_HPP_ + #include @@ -47,8 +50,7 @@ using csr = gko::matrix::Csr<>; /** * Function which outputs the input format for benchmarks similar to the spmv. */ -[[noreturn]] void print_config_error_and_exit() -{ +[[noreturn]] void print_config_error_and_exit() { std::cerr << "Input has to be a JSON array of matrix configurations:\n" << " [\n" << " { \"filename\": \"my_file.mtx\" },\n" @@ -72,36 +74,4 @@ void validate_option_object(const rapidjson::Value &value) } -/** - * Creates a Ginkgo matrix from the intermediate data representation format - * gko::matrix_data. - * - * @param exec the executor where the matrix will be put - * @param data the data represented in the intermediate representation format - * - * @tparam MatrixType the Ginkgo matrix type (such as `gko::matrix::Csr<>`) - * - * @return a `unique_pointer` to the created matrix - */ -template -std::unique_ptr read_matrix_from_data( - std::shared_ptr exec, const gko::matrix_data<> &data) -{ - auto mat = MatrixType::create(std::move(exec)); - mat->read(data); - return mat; -} - -/** - * Creates a Ginkgo matrix from the intermediate data representation format - * gko::matrix_data with support for variable arguments. - * - * @param MATRIX_TYPE the Ginkgo matrix type (such as `gko::matrix::Csr<>`) - */ -#define READ_MATRIX(MATRIX_TYPE, ...) \ - [](std::shared_ptr exec, \ - const gko::matrix_data<> &data) -> std::unique_ptr { \ - auto mat = MATRIX_TYPE::create(std::move(exec), __VA_ARGS__); \ - mat->read(data); \ - return mat; \ - } +#endif // GKO_BENCHMARK_UTILS_SPMV_COMMON_HPP_ \ No newline at end of file From acc1f3236a287c0b6ff6cdbfd42f39cdf10d002d Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Mon, 14 Oct 2019 12:41:43 +0200 Subject: [PATCH 02/11] keep the same format as spmv's --- benchmark/solver/CMakeLists.txt | 9 +++++++++ benchmark/solver/solver.cpp | 14 ++------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/benchmark/solver/CMakeLists.txt b/benchmark/solver/CMakeLists.txt index 112446e7cab..03cbabf2b9b 100644 --- a/benchmark/solver/CMakeLists.txt +++ b/benchmark/solver/CMakeLists.txt @@ -1,2 +1,11 @@ add_executable(solver solver.cpp) target_link_libraries(solver ginkgo gflags rapidjson) +if (GINKGO_BUILD_CUDA) + target_compile_definitions(solver PRIVATE HAS_CUDA=1) + target_link_libraries(solver ginkgo ${CUDA_RUNTIME_LIBS} + ${CUBLAS} ${CUSPARSE}) + target_include_directories(solver SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS}) + if(CMAKE_CUDA_COMPILER_VERSION GREATER_EQUAL "9.2") + target_compile_definitions(solver PRIVATE ALLOWMP=1) + endif() +endif() \ No newline at end of file diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index de96ffb0ba5..f96aec2381a 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "benchmark/utils/common.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" @@ -58,8 +59,7 @@ DEFINE_double(rel_res_goal, 1e-6, "The relative residual goal of the solver"); DEFINE_string(solvers, "cg", "A comma-separated list of solvers to run." - "Supported values are: bicgstab, cg, cgs, fcg, gmres, ir, " - "lower_trs, upper_trs"); + "Supported values are: bicgstab, cg, cgs, fcg, gmres"); DEFINE_string(preconditioners, "none", "A comma-separated list of preconditioners to use." @@ -90,16 +90,6 @@ void validate_option_object(const rapidjson::Value &value) } -const std::map( - std::shared_ptr, - const rapidjson::Value &)>> - matrix_factory{{"csr", read_matrix>}, - {"coo", read_matrix>}, - {"ell", read_matrix>}, - {"hybrid", read_matrix>}, - {"sellp", read_matrix>}}; - - // solver mapping template std::unique_ptr create_solver( From 8d16aa7b4c784362c1b46700ad0d920ff373a416 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Mon, 14 Oct 2019 14:09:13 +0200 Subject: [PATCH 03/11] fix compile error --- benchmark/conversions/conversions.cpp | 12 +------ benchmark/solver/solver.cpp | 47 +++++++++++++++++---------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp index a1f267e3640..2b8c8ea345f 100644 --- a/benchmark/conversions/conversions.cpp +++ b/benchmark/conversions/conversions.cpp @@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "benchmark/utils/common.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/spmv_common.hpp" @@ -64,17 +65,6 @@ DEFINE_string( "sellp: Sliced Ellpack format.\n"); -const std::map( - std::shared_ptr, - const gko::matrix_data<> &)>> - matrix_factory{ - {"csr", READ_MATRIX(csr, std::make_shared())}, - {"coo", read_matrix_from_data>}, - {"ell", read_matrix_from_data>}, - {"hybrid", read_matrix_from_data}, - {"sellp", read_matrix_from_data>}}; - - // This function supposes that management of `FLAGS_overwrite` is done before // calling it void convert_matrix(const gko::LinOp *matrix_from, const char *format_to, diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index f96aec2381a..4f1ae4feae0 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -65,6 +65,9 @@ DEFINE_string(preconditioners, "none", "A comma-separated list of preconditioners to use." "Supported values are: none, jacobi, adaptive-jacobi"); +DEFINE_uint32( + nrhs, 1, + "The number of right hand sides. Record the residual only when nrhs == 1."); // input validation [[noreturn]] void print_config_error_and_exit() { @@ -107,6 +110,7 @@ std::unique_ptr create_solver( .on(exec); } + const std::map( std::shared_ptr, std::shared_ptr)>> @@ -239,8 +243,10 @@ void solve_system(const std::string &solver_name, rapidjson::Value(rapidjson::kArrayType), allocator); add_or_set_member(solver_json, "true_residuals", rapidjson::Value(rapidjson::kArrayType), allocator); - // auto rhs_norm = compute_norm(lend(b)); - // add_or_set_member(solver_json, "rhs_norm", rhs_norm, allocator); + if (FLAGS_nrhs == 1) { + auto rhs_norm = compute_norm(lend(b)); + add_or_set_member(solver_json, "rhs_norm", rhs_norm, allocator); + } for (auto stage : {"generate", "apply"}) { add_or_set_member(solver_json, stage, rapidjson::Value(rapidjson::kObjectType), @@ -277,11 +283,13 @@ void solve_system(const std::string &solver_name, auto apply_logger = std::make_shared(exec); exec->add_logger(apply_logger); - // auto res_logger = std::make_shared>( - // exec, lend(system_matrix), b, - // solver_json["recurrent_residuals"], - // solver_json["true_residuals"], allocator); - // solver->add_logger(res_logger); + if (FLAGS_nrhs == 1) { + auto res_logger = std::make_shared>( + exec, lend(system_matrix), b, + solver_json["recurrent_residuals"], + solver_json["true_residuals"], allocator); + solver->add_logger(res_logger); + } solver->apply(lend(b), lend(x_clone)); @@ -321,12 +329,12 @@ void solve_system(const std::string &solver_name, a_tic); add_or_set_member(solver_json["apply"], "time", apply_time.count(), allocator); - - // auto residual = compute_residual_norm(lend(system_matrix), - // lend(b), - // lend(x_clone)); - // add_or_set_member(solver_json, "residual_norm", residual, - // allocator); + if (FLAGS_nrhs == 1) { + auto residual = compute_residual_norm(lend(system_matrix), + lend(b), lend(x_clone)); + add_or_set_member(solver_json, "residual_norm", residual, + allocator); + } } // compute and write benchmark data @@ -358,7 +366,9 @@ int main(int argc, char *argv[]) std::string extra_information = "Running " + FLAGS_solvers + " with " + std::to_string(FLAGS_max_iters) + " iterations and residual goal of " + - std::to_string(FLAGS_rel_res_goal) + "\n"; + std::to_string(FLAGS_rel_res_goal) + + "\nThe number of right hand sides is " + + std::to_string(FLAGS_nrhs) + "\n"; print_general_information(extra_information); auto exec = get_executor(); @@ -399,13 +409,16 @@ int main(int argc, char *argv[]) continue; } std::clog << "Running test case: " << test_case << std::endl; + std::ifstream mtx_fd(test_case["filename"].GetString()); + auto data = gko::read_raw(mtx_fd); auto system_matrix = share(matrix_factory.at( - test_case["optimal"]["spmv"].GetString())(exec, test_case)); + test_case["optimal"]["spmv"].GetString())(exec, data)); auto b = create_matrix( - exec, gko::dim<2>{system_matrix->get_size()[0], 1}, engine); + exec, gko::dim<2>{system_matrix->get_size()[0], FLAGS_nrhs}, + engine); auto x = create_matrix( - exec, gko::dim<2>{system_matrix->get_size()[0], 1}); + exec, gko::dim<2>{system_matrix->get_size()[0], FLAGS_nrhs}); std::clog << "Matrix is of size (" << system_matrix->get_size()[0] << ", " << system_matrix->get_size()[1] << ")" From 05b1d3cce5d54f4c78f034362b5bece6b6718c34 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 15 Oct 2019 11:46:56 +0200 Subject: [PATCH 04/11] split residual computation and timing component --- benchmark/solver/solver.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 4f1ae4feae0..13c6db70b1a 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -257,7 +257,7 @@ void solve_system(const std::string &solver_name, } if (FLAGS_detailed) { - // slow run, gets the recurrent and true residuals of each iteration + // slow run, get the time of each functions auto x_clone = clone(x); auto gen_logger = std::make_shared(exec); @@ -283,19 +283,24 @@ void solve_system(const std::string &solver_name, auto apply_logger = std::make_shared(exec); exec->add_logger(apply_logger); + + solver->apply(lend(b), lend(x_clone)); + + exec->remove_logger(gko::lend(apply_logger)); + apply_logger->write_data(solver_json["apply"]["components"], + allocator, 1); + + // slow run, gets the recurrent and true residuals of each iteration if (FLAGS_nrhs == 1) { + x_clone = clone(x); auto res_logger = std::make_shared>( exec, lend(system_matrix), b, solver_json["recurrent_residuals"], solver_json["true_residuals"], allocator); solver->add_logger(res_logger); + solver->apply(lend(b), lend(x_clone)); } - - solver->apply(lend(b), lend(x_clone)); - - exec->remove_logger(gko::lend(apply_logger)); - apply_logger->write_data(solver_json["apply"]["components"], - allocator, 1); + exec->synchronize(); } // timed run From e8c53a4a17750e57942ed7d795a97c03a8a3826a Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 15 Oct 2019 16:07:42 +0200 Subject: [PATCH 05/11] keep same matrix format argument --- benchmark/conversions/conversions.cpp | 13 ------ benchmark/preconditioner/preconditioner.cpp | 28 ++++++------ benchmark/solver/solver.cpp | 2 + benchmark/spmv/spmv.cpp | 33 -------------- benchmark/utils/common.hpp | 49 +++++++++++++++++++++ 5 files changed, 66 insertions(+), 59 deletions(-) diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp index 2b8c8ea345f..3d1ca678d98 100644 --- a/benchmark/conversions/conversions.cpp +++ b/benchmark/conversions/conversions.cpp @@ -51,19 +51,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. using etype = double; -// Command-line arguments -DEFINE_string( - formats, "coo", - "A comma-separated list of formats to benchmark. All conversions from the " - "formats given as argument to existing Ginkgo formats are benchmarked.\n" - "Supported values are: coo, csr, ell, hybrid, sellp" - "coo: Coordinate storage.\n" - "csr: Compressed Sparse Row storage.\n" - "ell: Ellpack format according to Bell and Garland: Efficient Sparse " - "Matrix-Vector Multiplication on CUDA.\n" - "hybrid: Hybrid uses ell and coo to represent the matrix.\n" - "sellp: Sliced Ellpack format.\n"); - // This function supposes that management of `FLAGS_overwrite` is done before // calling it diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index b2d03a7490f..c8c501a666d 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "benchmark/utils/common.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/spmv_common.hpp" @@ -51,8 +52,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. DEFINE_uint32(max_block_size, 32, "Maximal block size of the block-Jacobi preconditioner"); -DEFINE_string(matrix_format, "csr", "The format in which to read the matrix"); - DEFINE_string(preconditioners, "jacobi", "A comma-separated list of solvers to run." "Supported values are: jacobi"); @@ -67,6 +66,8 @@ DEFINE_double(accuracy, 1e-1, "This value is used as the accuracy flag of the adaptive Jacobi " "preconditioner."); +DECLARE_string(formats); + // some shortcuts using etype = double; @@ -87,16 +88,6 @@ gko::precision_reduction parse_storage_optimization(const std::string &flag) } -const std::map( - std::shared_ptr, - const rapidjson::Value &)>> - matrix_factory{{"csr", read_matrix>}, - {"coo", read_matrix>}, - {"ell", read_matrix>}, - {"hybrid", read_matrix>}, - {"sellp", read_matrix>}}; - - // preconditioner mapping const std::map( std::shared_ptr exec)>> @@ -244,6 +235,8 @@ void run_preconditioner(const char *precond_name, int main(int argc, char *argv[]) { + // Use csr as the default format + FLAGS_formats = "csr"; std::string header = "A benchmark for measuring preconditioner performance.\n"; std::string format = std::string() + " [\n" + @@ -260,6 +253,12 @@ int main(int argc, char *argv[]) auto preconditioners = split(FLAGS_preconditioners, ','); + auto formats = split(FLAGS_formats, ','); + if (formats.size() != 1) { + std::cerr << "Preconditioner only supports one format" << std::endl; + std::exit(1); + } + rapidjson::IStreamWrapper jcin(std::cin); rapidjson::Document test_cases; test_cases.ParseStream(jcin); @@ -288,8 +287,11 @@ int main(int argc, char *argv[]) } std::clog << "Running test case: " << test_case << std::endl; + std::ifstream mtx_fd(test_case["filename"].GetString()); + auto data = gko::read_raw(mtx_fd); + auto system_matrix = - share(matrix_factory.at(FLAGS_matrix_format)(exec, test_case)); + share(matrix_factory.at(FLAGS_formats)(exec, data)); auto b = create_vector(exec, system_matrix->get_size()[0], engine); auto x = create_vector(exec, system_matrix->get_size()[0]); diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 13c6db70b1a..20e40c5f357 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -42,7 +42,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#define DISABLE_FORMATS_COMMAND #include "benchmark/utils/common.hpp" +#undef DISABLE_FORMATS_COMMAND #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp index 529ce7f319d..0fc35d84fe2 100644 --- a/benchmark/spmv/spmv.cpp +++ b/benchmark/spmv/spmv.cpp @@ -53,39 +53,6 @@ using etype = double; // Command-line arguments -DEFINE_string( - formats, "coo", - "A comma-separated list of formats to run." - "Supported values are: coo, csr, ell, sellp, hybrid, hybrid0, " - "hybrid25, hybrid33, hybrid40, hybrid60, hybrid80, hybridlimit0, " - "hybridlimit25, hybridlimit33, hybridminstorage, cusp_csr, cusp_csrex, " - "cusp_csrmp, cusp_csrmm, cusp_coo, cusp_ell, cusp_hybrid.\n" - "coo: Coordinate storage. The CUDA kernel uses the load-balancing approach " - "suggested in Flegar et al.: Overcoming Load Imbalance for Irregular " - "Sparse Matrices.\n" - "csr: Compressed Sparse Row storage. Ginkgo implementation with automatic " - "strategy.\n" - "csrc: Ginkgo's CSR implementation with automatic stategy.\n" - "csri: Ginkgo's CSR implementation with inbalance strategy.\n" - "csrm: Ginkgo's CSR implementation with merge_path strategy.\n" - "ell: Ellpack format according to Bell and Garland: Efficient Sparse " - "Matrix-Vector Multiplication on CUDA.\n" - "sellp: Sliced Ellpack uses a default block size of 32.\n" - "hybrid: Hybrid uses ell and coo to represent the matrix.\n" - "hybrid0, hybrid25, hybrid33, hybrid40, hybrid60, hybrid80: Hybrid uses " - "the row distribution to decide the partition.\n" - "hybridlimit0, hybridlimit25, hybrid33: Add the upper bound on the ell " - "part of hybrid0, hybrid25, hybrid33.\n" - "hybridminstorage: Hybrid uses the minimal storage to store the matrix.\n" - "cusp_hybrid: benchmark CuSPARSE spmv with cusparseXhybmv and an automatic " - "partition.\n" - "cusp_coo: use cusparseXhybmv with a CUSPARSE_HYB_PARTITION_USER " - "partition.\n" - "cusp_ell: use cusparseXhybmv with CUSPARSE_HYB_PARTITION_MAX partition.\n" - "cusp_csr: benchmark CuSPARSE with the cusparseXcsrmv function.\n" - "cusp_csrex: benchmark CuSPARSE with the cusparseXcsrmvEx function.\n" - "cusp_csrmp: benchmark CuSPARSE with the cusparseXcsrmv_mp function.\n" - "cusp_csrmm: benchmark CuSPARSE with the cusparseXcsrmv_mm function.\n"); DEFINE_uint32(nrhs, 1, "The number of right hand sides"); diff --git a/benchmark/utils/common.hpp b/benchmark/utils/common.hpp index 8f221063fe0..2a7051d3760 100644 --- a/benchmark/utils/common.hpp +++ b/benchmark/utils/common.hpp @@ -40,6 +40,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #ifdef HAS_CUDA #include "cuda_linops.hpp" #endif // HAS_CUDA @@ -49,6 +52,52 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. using hybrid = gko::matrix::Hybrid<>; using csr = gko::matrix::Csr<>; +// the formats command-line argument +// If define DISABLE_FORMATS_COMMAND, do not define this argument. +#ifndef DISABLE_FORMATS_COMMAND +DEFINE_string( + formats, "coo", + "A comma-separated list of formats to run." + "Supported values are: coo, csr, ell, sellp, hybrid, hybrid0, " + "hybrid25, hybrid33, hybrid40, hybrid60, hybrid80, hybridlimit0, " + "hybridlimit25, hybridlimit33, hybridminstorage" +#ifdef HAS_CUDA + ", cusp_csr, cusp_csrex, cusp_csrmp, cusp_csrmm, cusp_coo, cusp_ell, " + "cusp_hybrid" +#endif // HAS_CUDA + ".\n" + "coo: Coordinate storage. The CUDA kernel uses the load-balancing approach " + "suggested in Flegar et al.: Overcoming Load Imbalance for Irregular " + "Sparse Matrices.\n" + "csr: Compressed Sparse Row storage. Ginkgo implementation with automatic " + "strategy.\n" + "csrc: Ginkgo's CSR implementation with automatic stategy.\n" + "csri: Ginkgo's CSR implementation with inbalance strategy.\n" + "csrm: Ginkgo's CSR implementation with merge_path strategy.\n" + "ell: Ellpack format according to Bell and Garland: Efficient Sparse " + "Matrix-Vector Multiplication on CUDA.\n" + "sellp: Sliced Ellpack uses a default block size of 32.\n" + "hybrid: Hybrid uses ell and coo to represent the matrix.\n" + "hybrid0, hybrid25, hybrid33, hybrid40, hybrid60, hybrid80: Hybrid uses " + "the row distribution to decide the partition.\n" + "hybridlimit0, hybridlimit25, hybrid33: Add the upper bound on the ell " + "part of hybrid0, hybrid25, hybrid33.\n" + "hybridminstorage: Hybrid uses the minimal storage to store the matrix." +#ifdef HAS_CUDA + "\n" + "cusp_hybrid: benchmark CuSPARSE spmv with cusparseXhybmv and an automatic " + "partition.\n" + "cusp_coo: use cusparseXhybmv with a CUSPARSE_HYB_PARTITION_USER " + "partition.\n" + "cusp_ell: use cusparseXhybmv with CUSPARSE_HYB_PARTITION_MAX partition.\n" + "cusp_csr: benchmark CuSPARSE with the cusparseXcsrmv function.\n" + "cusp_csrex: benchmark CuSPARSE with the cusparseXcsrmvEx function.\n" + "cusp_csrmp: benchmark CuSPARSE with the cusparseXcsrmv_mp function.\n" + "cusp_csrmm: benchmark CuSPARSE with the cusparseXcsrmv_mm function." +#endif // HAS_CUDA +); +#endif // DISABLE_FORMATS_COMMAND + /** * Creates a Ginkgo matrix from the intermediate data representation format From a03e81bf9385db48eeff4092e31f6f336f037ec2 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 15 Oct 2019 16:40:08 +0200 Subject: [PATCH 06/11] add warmup and repetitions in solver --- benchmark/solver/solver.cpp | 41 ++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 20e40c5f357..dbf0939ac1a 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -71,6 +71,8 @@ DEFINE_uint32( nrhs, 1, "The number of right hand sides. Record the residual only when nrhs == 1."); +DECLARE_uint32(repetitions); + // input validation [[noreturn]] void print_config_error_and_exit() { std::cerr << "Input has to be a JSON array of matrix configurations:\n" @@ -258,6 +260,17 @@ void solve_system(const std::string &solver_name, allocator); } + // warm run + for (unsigned int i = 0; i < FLAGS_warmup; i++) { + auto x_clone = clone(x); + auto precond = precond_factory.at(precond_name)(exec); + auto solver = solver_factory.at(solver_name)(exec, give(precond)) + ->generate(system_matrix); + solver->apply(lend(b), lend(x_clone)); + exec->synchronize(); + } + + // detail run if (FLAGS_detailed) { // slow run, get the time of each functions auto x_clone = clone(x); @@ -306,7 +319,9 @@ void solve_system(const std::string &solver_name, } // timed run - { + std::chrono::nanoseconds apply_time(0); + std::chrono::nanoseconds generate_time(0); + for (unsigned int i = 0; i < FLAGS_repetitions; i++) { auto x_clone = clone(x); exec->synchronize(); @@ -318,11 +333,9 @@ void solve_system(const std::string &solver_name, exec->synchronize(); auto g_tac = std::chrono::steady_clock::now(); - auto generate_time = + generate_time += std::chrono::duration_cast(g_tac - g_tic); - add_or_set_member(solver_json["generate"], "time", - generate_time.count(), allocator); exec->synchronize(); auto a_tic = std::chrono::steady_clock::now(); @@ -331,18 +344,24 @@ void solve_system(const std::string &solver_name, exec->synchronize(); auto a_tac = std::chrono::steady_clock::now(); - auto apply_time = - std::chrono::duration_cast(a_tac - - a_tic); - add_or_set_member(solver_json["apply"], "time", apply_time.count(), - allocator); - if (FLAGS_nrhs == 1) { + apply_time += std::chrono::duration_cast( + a_tac - a_tic); + + if (FLAGS_nrhs == 1 && i == FLAGS_repetitions - 1) { auto residual = compute_residual_norm(lend(system_matrix), lend(b), lend(x_clone)); add_or_set_member(solver_json, "residual_norm", residual, allocator); } } + add_or_set_member( + solver_json["generate"], "time", + static_cast(generate_time.count()) / FLAGS_repetitions, + allocator); + add_or_set_member( + solver_json["apply"], "time", + static_cast(apply_time.count()) / FLAGS_repetitions, + allocator); // compute and write benchmark data add_or_set_member(solver_json, "completed", true, allocator); @@ -357,6 +376,8 @@ void solve_system(const std::string &solver_name, int main(int argc, char *argv[]) { + // Set the default repetitions = 1. + FLAGS_repetitions = 1; std::string header = "A benchmark for measuring performance of Ginkgo's solvers.\n"; std::string format = From 5ae78a27fa883d3625eb40fa492b5e28fe4c947a Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 16 Oct 2019 09:24:44 +0200 Subject: [PATCH 07/11] delete gflags declare --- benchmark/preconditioner/preconditioner.cpp | 2 -- benchmark/solver/solver.cpp | 1 - 2 files changed, 3 deletions(-) diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index c8c501a666d..0e78f77fa7e 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -66,8 +66,6 @@ DEFINE_double(accuracy, 1e-1, "This value is used as the accuracy flag of the adaptive Jacobi " "preconditioner."); -DECLARE_string(formats); - // some shortcuts using etype = double; diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index dbf0939ac1a..85c28d8dcf5 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -71,7 +71,6 @@ DEFINE_uint32( nrhs, 1, "The number of right hand sides. Record the residual only when nrhs == 1."); -DECLARE_uint32(repetitions); // input validation [[noreturn]] void print_config_error_and_exit() { From a72aeaeca2dbdf4c44aee2de7dd75a6f42f5703b Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 17 Oct 2019 10:49:20 +0200 Subject: [PATCH 08/11] fix warning --- benchmark/conversions/conversions.cpp | 11 +++-- benchmark/preconditioner/preconditioner.cpp | 4 +- benchmark/solver/solver.cpp | 4 +- benchmark/spmv/spmv.cpp | 5 +- benchmark/utils/{common.hpp => formats.hpp} | 53 ++++++++++++++------- 5 files changed, 49 insertions(+), 28 deletions(-) rename benchmark/utils/{common.hpp => formats.hpp} (91%) diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp index 3d1ca678d98..7921c31fa81 100644 --- a/benchmark/conversions/conversions.cpp +++ b/benchmark/conversions/conversions.cpp @@ -43,7 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "benchmark/utils/common.hpp" +#include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/spmv_common.hpp" @@ -66,7 +66,8 @@ void convert_matrix(const gko::LinOp *matrix_from, const char *format_to, rapidjson::Value(rapidjson::kObjectType), allocator); gko::matrix_data<> data{gko::dim<2>{1, 1}, 1}; - auto matrix_to = share(matrix_factory.at(format_to)(exec, data)); + auto matrix_to = + share(formats::matrix_factory.at(format_to)(exec, data)); // warm run for (unsigned int i = 0; i < FLAGS_warmup; i++) { exec->synchronize(); @@ -153,8 +154,8 @@ int main(int argc, char *argv[]) for (const auto &format_from : formats) { try { auto matrix_from = - share(matrix_factory.at(format_from)(exec, data)); - for (const auto &format : matrix_factory) { + share(formats::matrix_factory.at(format_from)(exec, data)); + for (const auto &format : formats::matrix_factory) { const auto format_to = std::get<0>(format); if (format_from == format_to) { continue; @@ -175,7 +176,7 @@ int main(int argc, char *argv[]) } backup_results(test_cases); } catch (const gko::AllocationError &e) { - for (const auto &format : matrix_factory) { + for (const auto &format : formats::matrix_factory) { const auto format_to = std::get<0>(format); auto conversion_name = std::string(format_from) + "-" + format_to; diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index 0e78f77fa7e..0fc19054d85 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "benchmark/utils/common.hpp" +#include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/spmv_common.hpp" @@ -289,7 +289,7 @@ int main(int argc, char *argv[]) auto data = gko::read_raw(mtx_fd); auto system_matrix = - share(matrix_factory.at(FLAGS_formats)(exec, data)); + share(formats::matrix_factory.at(FLAGS_formats)(exec, data)); auto b = create_vector(exec, system_matrix->get_size()[0], engine); auto x = create_vector(exec, system_matrix->get_size()[0]); diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 85c28d8dcf5..537bf65e1f4 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -43,7 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define DISABLE_FORMATS_COMMAND -#include "benchmark/utils/common.hpp" +#include "benchmark/utils/formats.hpp" #undef DISABLE_FORMATS_COMMAND #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" @@ -439,7 +439,7 @@ int main(int argc, char *argv[]) std::ifstream mtx_fd(test_case["filename"].GetString()); auto data = gko::read_raw(mtx_fd); - auto system_matrix = share(matrix_factory.at( + auto system_matrix = share(formats::matrix_factory.at( test_case["optimal"]["spmv"].GetString())(exec, data)); auto b = create_matrix( exec, gko::dim<2>{system_matrix->get_size()[0], FLAGS_nrhs}, diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp index 0fc35d84fe2..69a3a9e90e0 100644 --- a/benchmark/spmv/spmv.cpp +++ b/benchmark/spmv/spmv.cpp @@ -43,7 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "benchmark/utils/common.hpp" +#include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/spmv_common.hpp" @@ -71,7 +71,8 @@ void apply_spmv(const char *format_name, std::shared_ptr exec, auto storage_logger = std::make_shared(exec); exec->add_logger(storage_logger); - auto system_matrix = share(matrix_factory.at(format_name)(exec, data)); + auto system_matrix = + share(formats::matrix_factory.at(format_name)(exec, data)); exec->remove_logger(gko::lend(storage_logger)); storage_logger->write_data(spmv_case[format_name], allocator); // warm run diff --git a/benchmark/utils/common.hpp b/benchmark/utils/formats.hpp similarity index 91% rename from benchmark/utils/common.hpp rename to benchmark/utils/formats.hpp index 2a7051d3760..65bd93651c4 100644 --- a/benchmark/utils/common.hpp +++ b/benchmark/utils/formats.hpp @@ -30,14 +30,15 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_BENCHMARK_UTILS_COMMON_HPP_ -#define GKO_BENCHMARK_UTILS_COMMON_HPP_ +#ifndef GKO_BENCHMARK_UTILS_FORMATS_HPP_ +#define GKO_BENCHMARK_UTILS_FORMATS_HPP_ #include #include +#include #include @@ -48,24 +49,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif // HAS_CUDA -// some shortcuts -using hybrid = gko::matrix::Hybrid<>; -using csr = gko::matrix::Csr<>; +namespace { -// the formats command-line argument -// If define DISABLE_FORMATS_COMMAND, do not define this argument. -#ifndef DISABLE_FORMATS_COMMAND -DEFINE_string( - formats, "coo", - "A comma-separated list of formats to run." - "Supported values are: coo, csr, ell, sellp, hybrid, hybrid0, " - "hybrid25, hybrid33, hybrid40, hybrid60, hybrid80, hybridlimit0, " - "hybridlimit25, hybridlimit33, hybridminstorage" + +std::string available_format = + "coo, csr, ell, sellp, hybrid, hybrid0, hybrid25, hybrid33, hybrid40, " + "hybrid60, hybrid80, hybridlimit0, hybridlimit25, hybridlimit33, " + "hybridminstorage" #ifdef HAS_CUDA ", cusp_csr, cusp_csrex, cusp_csrmp, cusp_csrmm, cusp_coo, cusp_ell, " "cusp_hybrid" #endif // HAS_CUDA - ".\n" + ".\n"; + +std::string format_description = "coo: Coordinate storage. The CUDA kernel uses the load-balancing approach " "suggested in Flegar et al.: Overcoming Load Imbalance for Irregular " "Sparse Matrices.\n" @@ -95,10 +92,30 @@ DEFINE_string( "cusp_csrmp: benchmark CuSPARSE with the cusparseXcsrmv_mp function.\n" "cusp_csrmm: benchmark CuSPARSE with the cusparseXcsrmv_mm function." #endif // HAS_CUDA -); + ; + +std::string format_command = + "A comma-separated list of formats to run. Supported values are: " + + available_format + format_description; + + +} // namespace + + +// the formats command-line argument +// If define DISABLE_FORMATS_COMMAND, do not define this argument. +#ifndef DISABLE_FORMATS_COMMAND +DEFINE_string(formats, "coo", format_command.c_str()); #endif // DISABLE_FORMATS_COMMAND +namespace formats { + + +// some shortcuts +using hybrid = gko::matrix::Hybrid<>; +using csr = gko::matrix::Csr<>; + /** * Creates a Ginkgo matrix from the intermediate data representation format * gko::matrix_data. @@ -182,4 +199,6 @@ const std::map( {"sellp", read_matrix_from_data>}}; -#endif // GKO_BENCHMARK_UTILS_COMMON_HPP_ \ No newline at end of file +} // namespace formats + +#endif // GKO_BENCHMARK_UTILS_FORMATS_HPP_ \ No newline at end of file From 18df52366e0c0013bfa59ccde8ad1ff4b6d8ab10 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 17 Oct 2019 11:17:30 +0200 Subject: [PATCH 09/11] fix sonar issues --- benchmark/utils/cuda_linops.hpp | 35 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/benchmark/utils/cuda_linops.hpp b/benchmark/utils/cuda_linops.hpp index 41dc93c1ce3..9a1fc3e870c 100644 --- a/benchmark/utils/cuda_linops.hpp +++ b/benchmark/utils/cuda_linops.hpp @@ -74,11 +74,17 @@ class CuspBase : public gko::LinOp { this->initialize_descr(); } + ~CuspBase() = default; + + CuspBase(const CuspBase &other) = delete; + CuspBase &operator=(const CuspBase &other) { - gko::LinOp::operator=(other); - this->gpu_exec_ = other.gpu_exec_; - this->initialize_descr(); + if (this != &other) { + gko::LinOp::operator=(other); + this->gpu_exec_ = other.gpu_exec_; + this->initialize_descr(); + } return *this; } @@ -312,16 +318,14 @@ class CuspCsrEx const auto id = this->get_gpu_exec()->get_device_id(); gko::device_guard g{id}; if (set_buffer_) { - try { - GKO_ASSERT_NO_CUDA_ERRORS(cudaFree(buffer_)); - } catch (const std::exception &e) { - std::cerr - << "Error when unallocating CuspCsrEx temporary buffer: " - << e.what() << std::endl; - } + GKO_ASSERT_NO_CUDA_ERRORS(cudaFree(buffer_)); } } + CuspCsrEx(const CuspCsrEx &other) = delete; + + CuspCsrEx &operator=(const CuspCsrEx &other) = default; + protected: void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override { @@ -418,14 +422,13 @@ class CuspHybrid { const auto id = this->get_gpu_exec()->get_device_id(); gko::device_guard g{id}; - try { - GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyHybMat(hyb_)); - } catch (const std::exception &e) { - std::cerr << "Error when unallocating CuspHybrid hyb_ matrix: " - << e.what() << std::endl; - } + GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyHybMat(hyb_)); } + CuspHybrid(const CuspHybrid &other) = delete; + + CuspHybrid &operator=(const CuspHybrid &other) = default; + protected: void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override { From a28219c9e5e2224b00681ddc4427ee10b1f0aa91 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 17 Oct 2019 11:55:12 +0200 Subject: [PATCH 10/11] create func to handle duplication and rm disable --- benchmark/CMakeLists.txt | 10 ++++++++++ benchmark/solver/CMakeLists.txt | 8 +------- benchmark/solver/solver.cpp | 2 -- benchmark/spmv/CMakeLists.txt | 10 ++-------- benchmark/utils/formats.hpp | 3 --- 5 files changed, 13 insertions(+), 20 deletions(-) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index bcf88165b47..cbfe9975edc 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -4,6 +4,16 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Release") "will be affected") endif() +function(ginkgo_benchmark_cusp_linops name) + target_compile_definitions("${name}" PRIVATE HAS_CUDA=1) + target_link_libraries("${name}" ginkgo ${CUDA_RUNTIME_LIBS} + ${CUBLAS} ${CUSPARSE}) + target_include_directories("${name}" SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS}) + if(CMAKE_CUDA_COMPILER_VERSION GREATER_EQUAL "9.2") + target_compile_definitions("${name}" PRIVATE ALLOWMP=1) + endif() +endfunction() + add_subdirectory(conversions) add_subdirectory(matrix_generator) add_subdirectory(matrix_statistics) diff --git a/benchmark/solver/CMakeLists.txt b/benchmark/solver/CMakeLists.txt index 03cbabf2b9b..fc1d203ca05 100644 --- a/benchmark/solver/CMakeLists.txt +++ b/benchmark/solver/CMakeLists.txt @@ -1,11 +1,5 @@ add_executable(solver solver.cpp) target_link_libraries(solver ginkgo gflags rapidjson) if (GINKGO_BUILD_CUDA) - target_compile_definitions(solver PRIVATE HAS_CUDA=1) - target_link_libraries(solver ginkgo ${CUDA_RUNTIME_LIBS} - ${CUBLAS} ${CUSPARSE}) - target_include_directories(solver SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS}) - if(CMAKE_CUDA_COMPILER_VERSION GREATER_EQUAL "9.2") - target_compile_definitions(solver PRIVATE ALLOWMP=1) - endif() + ginkgo_benchmark_cusp_linops(solver) endif() \ No newline at end of file diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 537bf65e1f4..7885c30511c 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -42,9 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#define DISABLE_FORMATS_COMMAND #include "benchmark/utils/formats.hpp" -#undef DISABLE_FORMATS_COMMAND #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" diff --git a/benchmark/spmv/CMakeLists.txt b/benchmark/spmv/CMakeLists.txt index 0653bb8b509..13e637097cf 100644 --- a/benchmark/spmv/CMakeLists.txt +++ b/benchmark/spmv/CMakeLists.txt @@ -1,11 +1,5 @@ add_executable(spmv spmv.cpp) target_link_libraries(spmv ginkgo gflags rapidjson) if (GINKGO_BUILD_CUDA) - target_compile_definitions(spmv PRIVATE HAS_CUDA=1) - target_link_libraries(spmv ginkgo ${CUDA_RUNTIME_LIBS} - ${CUBLAS} ${CUSPARSE}) - target_include_directories(spmv SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS}) - if(CMAKE_CUDA_COMPILER_VERSION GREATER_EQUAL "9.2") - target_compile_definitions(spmv PRIVATE ALLOWMP=1) - endif() -endif() \ No newline at end of file + ginkgo_benchmark_cusp_linops(spmv) +endif() diff --git a/benchmark/utils/formats.hpp b/benchmark/utils/formats.hpp index 65bd93651c4..1a9fd70d481 100644 --- a/benchmark/utils/formats.hpp +++ b/benchmark/utils/formats.hpp @@ -103,10 +103,7 @@ std::string format_command = // the formats command-line argument -// If define DISABLE_FORMATS_COMMAND, do not define this argument. -#ifndef DISABLE_FORMATS_COMMAND DEFINE_string(formats, "coo", format_command.c_str()); -#endif // DISABLE_FORMATS_COMMAND namespace formats { From 76cb7c116c12c6db353855c406c865e25508cebe Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 17 Oct 2019 18:08:47 +0200 Subject: [PATCH 11/11] fix throw in destructor --- benchmark/utils/cuda_linops.hpp | 19 +++++++++++++++---- benchmark/utils/formats.hpp | 6 +++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/benchmark/utils/cuda_linops.hpp b/benchmark/utils/cuda_linops.hpp index 9a1fc3e870c..105e0a3f4d5 100644 --- a/benchmark/utils/cuda_linops.hpp +++ b/benchmark/utils/cuda_linops.hpp @@ -316,9 +316,15 @@ class CuspCsrEx ~CuspCsrEx() override { const auto id = this->get_gpu_exec()->get_device_id(); - gko::device_guard g{id}; if (set_buffer_) { - GKO_ASSERT_NO_CUDA_ERRORS(cudaFree(buffer_)); + try { + gko::device_guard g{id}; + GKO_ASSERT_NO_CUDA_ERRORS(cudaFree(buffer_)); + } catch (const std::exception &e) { + std::cerr + << "Error when unallocating CuspCsrEx temporary buffer: " + << e.what() << std::endl; + } } } @@ -421,8 +427,13 @@ class CuspHybrid ~CuspHybrid() override { const auto id = this->get_gpu_exec()->get_device_id(); - gko::device_guard g{id}; - GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyHybMat(hyb_)); + try { + gko::device_guard g{id}; + GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyHybMat(hyb_)); + } catch (const std::exception &e) { + std::cerr << "Error when unallocating CuspHybrid hyb_ matrix: " + << e.what() << std::endl; + } } CuspHybrid(const CuspHybrid &other) = delete; diff --git a/benchmark/utils/formats.hpp b/benchmark/utils/formats.hpp index 1a9fd70d481..aa757030017 100644 --- a/benchmark/utils/formats.hpp +++ b/benchmark/utils/formats.hpp @@ -49,7 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif // HAS_CUDA -namespace { +namespace formats { std::string available_format = @@ -99,11 +99,11 @@ std::string format_command = available_format + format_description; -} // namespace +} // namespace formats // the formats command-line argument -DEFINE_string(formats, "coo", format_command.c_str()); +DEFINE_string(formats, "coo", formats::format_command.c_str()); namespace formats {