ginkgo-project · upsj · May 4, 2020 · May 4, 2020 · May 4, 2020 · May 4, 2020
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -12,6 +12,7 @@ add_subdirectory(ilu-preconditioned-solver)
 add_subdirectory(inverse-iteration)
 add_subdirectory(iterative-refinement)
 add_subdirectory(minimal-cuda-solver)
+add_subdirectory(mixed-precision-ir)
 add_subdirectory(nine-pt-stencil-solver)
 add_subdirectory(papi-logging)
 add_subdirectory(performance-debugging)

diff --git a/examples/iterative-refinement/build.sh b/examples/iterative-refinement/build.sh
diff --git a/examples/mixed-precision-ir/CMakeLists.txt b/examples/mixed-precision-ir/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_executable(mixed-precision-ir mixed-precision-ir.cpp)
+target_link_libraries(mixed-precision-ir ginkgo)
+target_include_directories(mixed-precision-ir PRIVATE ${PROJECT_SOURCE_DIR})
+configure_file(data/A.mtx data/A.mtx COPYONLY)
diff --git a/examples/mixed-precision-ir/build.sh b/examples/mixed-precision-ir/build.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# set up script
+if [ $# -ne 1 ]; then
+    echo -e "Usage: $0 GINKGO_BUILD_DIRECTORY"
+    exit 1
+fi
+BUILD_DIR=$1
+THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd )
+
+# copy libraries
+LIBRARY_DIRS="core core/device_hooks reference omp cuda hip"
+LIBRARY_NAMES="ginkgo ginkgo_reference ginkgo_omp ginkgo_cuda ginkgo_hip"
+SUFFIXES=".so .dylib .dll d.so d.dylib d.dll"
+for prefix in ${LIBRARY_DIRS}; do
+    for name in ${LIBRARY_NAMES}; do
+        for suffix in ${SUFFIXES}; do
+            cp ${BUILD_DIR}/${prefix}/lib${name}${suffix} \
+                ${THIS_DIR}/lib${name}${suffix} 2>/dev/null
+        done
+    done
+done
+
+# figure out correct compiler flags
+if ls ${THIS_DIR} | grep -F "libginkgo." >/dev/null; then
+    LINK_FLAGS="-lginkgo -lginkgo_omp -lginkgo_cuda -lginkgo_reference -lginkgo_hip"
+else
+    LINK_FLAGS="-lginkgod -lginkgo_ompd -lginkgo_cudad -lginkgo_referenced -lginkgo_hipd"
+fi
+if [ -z "${CXX}" ]; then
+    CXX="c++"
+fi
+
+# build
+${CXX} -std=c++11 -o ${THIS_DIR}/mixed-precision-ir \
+    ${THIS_DIR}/mixed-precision-ir.cpp \
+    -I${THIS_DIR}/../../include -I${BUILD_DIR}/include \
+    -L${THIS_DIR} ${LINK_FLAGS}
diff --git a/examples/mixed-precision-ir/data/A.mtx b/examples/mixed-precision-ir/data/A.mtx
@@ -0,0 +1,114 @@
+%%MatrixMarket matrix coordinate integer symmetric
+%-------------------------------------------------------------------------------
+% UF Sparse Matrix Collection, Tim Davis
+% http://www.cise.ufl.edu/research/sparse/matrices/JGD_Trefethen/Trefethen_20b
+% name: JGD_Trefethen/Trefethen_20b
+% [Diagonal matrices with primes, Nick Trefethen, Oxford Univ.]
+% id: 2203
+% date: 2008
+% author: N. Trefethen
+% ed: J.-G. Dumas
+% fields: name title A id date author ed kind notes
+% kind: combinatorial problem
+%-------------------------------------------------------------------------------
+% notes:
+% Diagonal matrices with primes, Nick Trefethen, Oxford Univ.          
+% From Jean-Guillaume Dumas' Sparse Integer Matrix Collection,         
+% http://ljk.imag.fr/membres/Jean-Guillaume.Dumas/simc.html            
+%                                                                      
+% Problem 7 of the Hundred-dollar, Hundred-digit Challenge Problems,   
+% SIAM News, vol 35, no. 1.                                            
+%                                                                      
+% 7. Let A be the 20,000 x 20,000 matrix whose entries are zero        
+% everywhere except for the primes 2, 3, 5, 7, . . . , 224737 along the
+% main diagonal and the number 1 in all the positions A(i,j) with      
+% |i-j| = 1,2,4,8, . . . ,16384.  What is the (1,1) entry of inv(A)?   
+%                                                                      
+% http://www.siam.org/news/news.php?id=388                             
+%                                                                      
+% Filename in JGD collection: Trefethen/trefethen_20__19_minor.sms     
+%-------------------------------------------------------------------------------
+19 19 83
+1 1 3
+2 1 1
+3 1 1
+5 1 1
+9 1 1
+17 1 1
+2 2 5
+3 2 1
+4 2 1
+6 2 1
+10 2 1
+18 2 1
+3 3 7
+4 3 1
+5 3 1
+7 3 1
+11 3 1
+19 3 1
+4 4 11
+5 4 1
+6 4 1
+8 4 1
+12 4 1
+5 5 13
+6 5 1
+7 5 1
+9 5 1
+13 5 1
+6 6 17
+7 6 1
+8 6 1
+10 6 1
+14 6 1
+7 7 19
+8 7 1
+9 7 1
+11 7 1
+15 7 1
+8 8 23
+9 8 1
+10 8 1
+12 8 1
+16 8 1
+9 9 29
+10 9 1
+11 9 1
+13 9 1
+17 9 1
+10 10 31
+11 10 1
+12 10 1
+14 10 1
+18 10 1
+11 11 37
+12 11 1
+13 11 1
+15 11 1
+19 11 1
+12 12 41
+13 12 1
+14 12 1
+16 12 1
+13 13 43
+14 13 1
+15 13 1
+17 13 1
+14 14 47
+15 14 1
+16 14 1
+18 14 1
+15 15 53
+16 15 1
+17 15 1
+19 15 1
+16 16 59
+17 16 1
+18 16 1
+17 17 61
+18 17 1
+19 17 1
+18 18 67
+19 18 1
+19 19 71
diff --git a/examples/mixed-precision-ir/doc/builds-on b/examples/mixed-precision-ir/doc/builds-on
@@ -0,0 +1 @@
+iterative-refinement
diff --git a/examples/mixed-precision-ir/doc/intro.dox b/examples/mixed-precision-ir/doc/intro.dox
@@ -0,0 +1,8 @@
+<a name="Mixed Precision Iterative Refinement (MPIR)"></a>
+<h1>This example manually implements a Mixed Precision Iterative Refinement (MPIR) solver.</h1>
+
+<h3> In this example, we first read in a matrix from file, then generate a
+right-hand side and an initial guess. An inaccurate CG solver in single precision
+is used as the inner solver to an iterative refinement (IR) in double precision
+method which solves a linear system.
+</h3>
diff --git a/examples/mixed-precision-ir/doc/kind b/examples/mixed-precision-ir/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/mixed-precision-ir/doc/results.dox b/examples/mixed-precision-ir/doc/results.dox
@@ -0,0 +1,19 @@
+<h1>Results</h1>
+This is the expected output:
+
+@code{.cpp}
+
+Initial residual norm sqrt(r^T r): 
+%%MatrixMarket matrix array real general
+1 1
+194.679
+Final residual norm sqrt(r^T r): 
+%%MatrixMarket matrix array real general
+1 1
+1.22728e-10
+MPIR iteration count:     25
+MPIR execution time [ms]: 18.0933
+
+@endcode
+
+<h3> Comments about programming and debugging </h3>
diff --git a/examples/mixed-precision-ir/doc/short-intro b/examples/mixed-precision-ir/doc/short-intro
@@ -0,0 +1 @@
+The Mixed Precision Iterative Refinement (MPIR) solver example.
diff --git a/examples/mixed-precision-ir/doc/tooltip b/examples/mixed-precision-ir/doc/tooltip
@@ -0,0 +1 @@
+Manually implement a Mixed Precision Iterative Refinement (MPIR) method in Ginkgo. Solve a linear system.
diff --git a/examples/mixed-precision-ir/mixed-precision-ir.cpp b/examples/mixed-precision-ir/mixed-precision-ir.cpp
@@ -0,0 +1,177 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2020, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+#include <ginkgo/ginkgo.hpp>
+
+
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+
+
+int main(int argc, char *argv[])
+{
+    // Some shortcuts
+    using ValueType = double;
+    using SolverType = float;
+    using IndexType = int;
+    using vec = gko::matrix::Dense<ValueType>;
+    using solver_vec = gko::matrix::Dense<SolverType>;
+    using mtx = gko::matrix::Csr<ValueType, IndexType>;
+    using solver_mtx = gko::matrix::Csr<SolverType, IndexType>;
+    using cg = gko::solver::Cg<SolverType>;
+
+    gko::size_type max_outer_iters = 100u;
+    gko::size_type max_inner_iters = 100u;
+    gko::remove_complex<ValueType> outer_reduction_factor = 1e-12;
+    gko::remove_complex<SolverType> inner_reduction_factor = 1e-2;
+
+    // Print version information
+    std::cout << gko::version_info::get() << std::endl;
+
+    // Figure out where to run the code
+    std::shared_ptr<gko::Executor> exec;
+    if (argc == 1 || std::string(argv[1]) == "reference") {
+        exec = gko::ReferenceExecutor::create();
+    } else if (argc == 2 && std::string(argv[1]) == "omp") {
+        exec = gko::OmpExecutor::create();
+    } else if (argc == 2 && std::string(argv[1]) == "cuda" &&
+               gko::CudaExecutor::get_num_devices() > 0) {
+        exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create());
+    } else if (argc == 2 && std::string(argv[1]) == "hip" &&
+               gko::HipExecutor::get_num_devices() > 0) {
+        exec = gko::HipExecutor::create(0, gko::OmpExecutor::create());
+    } else {
+        std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
+        std::exit(-1);
+    }
+
+    // Read data
+    auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
+    // Create RHS and initial guess as 1
+    gko::size_type size = A->get_size()[0];
+    auto host_x = vec::create(exec->get_master(), gko::dim<2>(size, 1));
+    for (auto i = 0; i < size; i++) {
+        host_x->at(i, 0) = 1.;
+    }
+    auto x = vec::create(exec);
+    auto b = vec::create(exec);
+    x->copy_from(host_x.get());
+    b->copy_from(host_x.get());
+
+    // Calculate initial residual by overwriting b
+    auto one = gko::initialize<vec>({1.0}, exec);
+    auto neg_one = gko::initialize<vec>({-1.0}, exec);
+    auto initres_vec = gko::initialize<vec>({0.0}, exec);
+    A->apply(lend(one), lend(x), lend(neg_one), lend(b));
+    b->compute_norm2(lend(initres_vec));
+
+    // Build lower-precision system matrix and residual
+    auto solver_A = solver_mtx::create(exec);
+    auto inner_residual = solver_vec::create(exec);
+    auto outer_residual = vec::create(exec);
+    A->convert_to(lend(solver_A));
+    b->convert_to(lend(outer_residual));
+
+    // restore b
+    b->copy_from(host_x.get());
+
+    // Create inner solver
+    auto inner_solver =
+        cg::build()
+            .with_criteria(gko::stop::ResidualNormReduction<SolverType>::build()
+                               .with_reduction_factor(inner_reduction_factor)
+                               .on(exec),
+                           gko::stop::Iteration::build()
+                               .with_max_iters(max_inner_iters)
+                               .on(exec))
+            .on(exec)
+            ->generate(give(solver_A));
+
+    // Solve system
+    exec->synchronize();
+    std::chrono::nanoseconds time(0);
+    auto res_vec = gko::initialize<vec>({0.0}, exec);
+    auto initres = exec->copy_val_to_host(initres_vec->get_const_values());
+    auto inner_solution = solver_vec::create(exec);
+    auto outer_delta = vec::create(exec);
+    auto tic = std::chrono::steady_clock::now();
+    int iter = -1;
+    while (true) {
+        ++iter;
+
+        // convert residual to inner precision
+        outer_residual->convert_to(lend(inner_residual));
+        outer_residual->compute_norm2(lend(res_vec));
+        auto res = exec->copy_val_to_host(res_vec->get_const_values());
+
+        // break if we exceed the number of iterations or have converged
+        if (iter > max_outer_iters || res / initres < outer_reduction_factor) {
+            break;
+        }
+
+        // Use the inner solver to solve
+        // A * inner_solution = inner_residual
+        // with residual as initial guess.
+        inner_solution->copy_from(lend(inner_residual));
+        inner_solver->apply(lend(inner_residual), lend(inner_solution));
+
+        // convert inner solution to outer precision
+        inner_solution->convert_to(lend(outer_delta));
+
+        // x = x + inner_solution
+        x->add_scaled(lend(one), lend(outer_delta));
+
+        // residual = b - A * x
+        outer_residual->copy_from(lend(b));
+        A->apply(lend(neg_one), lend(x), lend(one), lend(outer_residual));
+    }
+
+    auto toc = std::chrono::steady_clock::now();
+    time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
+
+    // Calculate residual
+    A->apply(lend(one), lend(x), lend(neg_one), lend(b));
+    b->compute_norm2(lend(res_vec));
+
+    std::cout << "Initial residual norm sqrt(r^T r): \n";
+    write(std::cout, lend(initres_vec));
+    std::cout << "Final residual norm sqrt(r^T r): \n";
+    write(std::cout, lend(res_vec));
+
+    // Print solver statistics
+    std::cout << "MPIR iteration count:     " << iter << std::endl;
+    std::cout << "MPIR execution time [ms]: "
+              << static_cast<double>(time.count()) / 1000000.0 << std::endl;
+}
diff --git a/examples/nine-pt-stencil-solver/build.sh b/examples/nine-pt-stencil-solver/build.sh
diff --git a/examples/twentyseven-pt-stencil-solver/build.sh b/examples/twentyseven-pt-stencil-solver/build.sh
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		The Mixed Precision Iterative Refinement (MPIR) solver example.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Manually implement a Mixed Precision Iterative Refinement (MPIR) method in Ginkgo. Solve a linear system.