@@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4646#include < ginkgo/core/matrix/sellp.hpp>
4747
4848
49+ #include " core/matrix/common_kernels.hpp"
4950#include " core/matrix/dense_kernels.hpp"
5051#include " core/synthesizer/implementation_selection.hpp"
5152#include " cuda/base/config.hpp"
@@ -55,7 +56,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5556#include " cuda/base/types.hpp"
5657#include " cuda/components/atomic.cuh"
5758#include " cuda/components/cooperative_groups.cuh"
58- #include " cuda/components/prefix_sum.cuh"
5959#include " cuda/components/reduction.cuh"
6060#include " cuda/components/segment_scan.cuh"
6161#include " cuda/components/uninitialized_array.hpp"
@@ -675,27 +675,14 @@ void convert_to_sellp(std::shared_ptr<const CudaExecutor> exec,
675675 as_cuda_type (nnz_per_row.get_const_data ()), as_cuda_type (slice_lengths),
676676 as_cuda_type (slice_sets));
677677
678- auto add_values =
679- Array<size_type>(exec, ceildiv (slice_num + 1 , default_block_size));
680- grid_dim = ceildiv (slice_num + 1 , default_block_size);
681-
682- start_prefix_sum<default_block_size><<<grid_dim, default_block_size>>> (
683- slice_num + 1 , as_cuda_type (slice_sets),
684- as_cuda_type (add_values.get_data ()));
685-
686- finalize_prefix_sum<default_block_size><<<grid_dim, default_block_size>>> (
687- slice_num + 1 , as_cuda_type (slice_sets),
688- as_cuda_type (add_values.get_const_data ()));
678+ prefix_sum (exec, slice_sets, slice_num + 1 );
689679
690680 grid_dim = ceildiv (num_rows, default_block_size);
691681 kernel::fill_in_sellp<<<grid_dim, default_block_size>>> (
692682 num_rows, slice_size, as_cuda_type (source_values),
693683 as_cuda_type (source_row_ptrs), as_cuda_type (source_col_idxs),
694684 as_cuda_type (slice_lengths), as_cuda_type (slice_sets),
695685 as_cuda_type (result_col_idxs), as_cuda_type (result_values));
696-
697- nnz_per_row.clear ();
698- add_values.clear ();
699686}
700687
701688GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE (
@@ -777,11 +764,6 @@ void calculate_total_cols(std::shared_ptr<const CudaExecutor> exec,
777764
778765 exec->get_master ()->copy_from (exec.get (), 1 , d_result.get_const_data (),
779766 result);
780-
781- block_results.clear ();
782- nnz_per_row.clear ();
783- max_nnz_per_slice.clear ();
784- d_result.clear ();
785767}
786768
787769GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE (
@@ -915,10 +897,6 @@ void calculate_max_nnz_per_row(std::shared_ptr<const CudaExecutor> exec,
915897
916898 exec->get_master ()->copy_from (exec.get (), 1 , d_result.get_const_data (),
917899 result);
918-
919- nnz_per_row.clear ();
920- block_results.clear ();
921- d_result.clear ();
922900}
923901
924902GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE (
@@ -950,15 +928,7 @@ void convert_to_hybrid(std::shared_ptr<const CudaExecutor> exec,
950928 num_rows, max_nnz_per_row, as_cuda_type (source->get_const_row_ptrs ()),
951929 as_cuda_type (coo_offset.get_data ()));
952930
953- auto add_values =
954- Array<size_type>(exec, ceildiv (num_rows, default_block_size));
955- grid_dim = ceildiv (num_rows, default_block_size);
956- start_prefix_sum<default_block_size><<<grid_dim, default_block_size>>> (
957- num_rows, as_cuda_type (coo_offset.get_data ()),
958- as_cuda_type (add_values.get_data ()));
959- finalize_prefix_sum<default_block_size><<<grid_dim, default_block_size>>> (
960- num_rows, as_cuda_type (coo_offset.get_data ()),
961- as_cuda_type (add_values.get_const_data ()));
931+ prefix_sum (exec, coo_offset.get_data (), num_rows + 1 );
962932
963933 grid_dim = ceildiv (num_rows * config::warp_size, default_block_size);
964934 kernel::fill_in_hybrid<<<grid_dim, default_block_size>>> (
0 commit comments