Skip to content

Commit 00226d0

Browse files
authored
Merge: Fix for unified reduction kernel
Small fix for unified kernel reduction, change lambda to capture by value. issue first found on Summit with gcc-7.50.0 and cuda-10.1, 11.3 and 11.4 Related PR: #926
2 parents cf69abf + 2f2d416 commit 00226d0

3 files changed

Lines changed: 6 additions & 6 deletions

File tree

cuda/base/kernel_launch_reduction.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ void run_kernel_row_reduction(std::shared_ptr<const CudaExecutor> exec,
458458
} else {
459459
select_run_generic_kernel_row_reduction(
460460
subwarp_sizes(),
461-
[&](int compiled_subwarp_size) {
461+
[cols](int compiled_subwarp_size) {
462462
return compiled_subwarp_size >= cols ||
463463
compiled_subwarp_size == config::warp_size;
464464
},
@@ -488,7 +488,7 @@ void run_kernel_col_reduction(std::shared_ptr<const CudaExecutor> exec,
488488
if (cols <= config::warp_size) {
489489
select_generic_col_reduction_small(
490490
subwarp_sizes(),
491-
[&](int compiled_subwarp_size) {
491+
[cols](int compiled_subwarp_size) {
492492
return compiled_subwarp_size >= cols ||
493493
compiled_subwarp_size == config::warp_size;
494494
},

dpcpp/base/kernel_launch_reduction.dp.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ void run_kernel_row_reduction_stage1(std::shared_ptr<const DpcppExecutor> exec,
576576
} else {
577577
select_generic_kernel_row_reduction_2d(
578578
subsubgroup_sizes(),
579-
[&](int compiled_ssg_size) {
579+
[cols](int compiled_ssg_size) {
580580
return compiled_ssg_size >= cols ||
581581
compiled_ssg_size == sg_size;
582582
},
@@ -612,7 +612,7 @@ void run_kernel_col_reduction_stage1(std::shared_ptr<const DpcppExecutor> exec,
612612
if (cols <= sg_size) {
613613
select_generic_col_reduction_small(
614614
subsubgroup_sizes(),
615-
[&](int compiled_ssg_size) {
615+
[cols](int compiled_ssg_size) {
616616
return compiled_ssg_size >= cols ||
617617
compiled_ssg_size == sg_size;
618618
},

hip/base/kernel_launch_reduction.hip.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ void run_kernel_row_reduction(std::shared_ptr<const HipExecutor> exec,
466466
} else {
467467
select_run_generic_kernel_row_reduction(
468468
subwarp_sizes(),
469-
[&](int compiled_subwarp_size) {
469+
[cols](int compiled_subwarp_size) {
470470
return compiled_subwarp_size >= cols ||
471471
compiled_subwarp_size == config::warp_size;
472472
},
@@ -496,7 +496,7 @@ void run_kernel_col_reduction(std::shared_ptr<const HipExecutor> exec,
496496
if (cols <= config::warp_size) {
497497
select_generic_col_reduction_small(
498498
subwarp_sizes(),
499-
[&](int compiled_subwarp_size) {
499+
[cols](int compiled_subwarp_size) {
500500
return compiled_subwarp_size >= cols ||
501501
compiled_subwarp_size == config::warp_size;
502502
},

0 commit comments

Comments
 (0)