Skip to content

Commit

Permalink
Limit the max size of the untuned kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
alazzaro committed Jun 28, 2024
1 parent 390670e commit c4f8eea
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
10 changes: 7 additions & 3 deletions src/acc/libsmm_acc/libsmm_acc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ kernel_map_iterator add_kernel_handle_to_jitted_kernels(
ACC_DRV(function) kern_func, ACC_DRV(stream) stream, Triplet h_mnk, int& threads, int& grouping, bool& generated_acc_untuned) {
kernel_map_iterator kernel_it = kernel_handles.end();

// Check if the kernel was already generated and failed
// Check if the kernel was already generated and failed or if it is too big
if (failed_acc_kernels.find(h_mnk) != failed_acc_kernels.end()) return kernel_it;

libsmm_acc_algo algo;
Expand All @@ -219,17 +219,21 @@ kernel_map_iterator add_kernel_handle_to_jitted_kernels(
minblocks = params[7];
generated_acc_untuned = false;
}
else { // Use a default untuned kernel
else if (h_mnk[0] < 50 && h_mnk[1] < 50 && h_mnk[2] < 50) { // Use a default untuned kernel
algo = medium;
tile_m = 2;
tile_n = 2;
w = 0;
v = 0;
threads = 256;
grouping = 30;
minblocks = 2;
minblocks = 1;
generated_acc_untuned = true;
}
else {
failed_acc_kernels.insert(h_mnk);
return kernel_it;
}

// JIT and validate the kernel
jit_kernel(kern_func, algo, tile_m, tile_n, w, v, threads, grouping, minblocks, h_mnk[0], h_mnk[1], h_mnk[2]);
Expand Down
2 changes: 1 addition & 1 deletion src/core/dbcsr_config.F
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,7 @@ SUBROUTINE reset_accdrv_active_device_id()
accdrv_active_device_id = default_accdrv_active_device_id
END SUBROUTINE reset_accdrv_active_device_id

FUNCTION use_acc()
PURE FUNCTION use_acc()
LOGICAL :: use_acc

IF (has_acc .AND. dbcsr_cfg%run_on_gpu%val) THEN
Expand Down
17 changes: 15 additions & 2 deletions src/mm/dbcsr_mm_sched.F
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,8 @@ SUBROUTINE stats_print_report(report, output_unit)
INTEGER(KIND=int_8), DIMENSION(3) :: flops_homo
INTEGER, ALLOCATABLE, DIMENSION(:) :: sort_idx
CHARACTER(LEN=4) :: generated_acc_untuned_label
LOGICAL :: has_acc_untuned_kernel
LOGICAL :: has_acc_untuned_kernel, &
use_cpu_kernels

IF (output_unit <= 0) RETURN

Expand All @@ -582,6 +583,8 @@ SUBROUTINE stats_print_report(report, output_unit)
total_flops_homo = 0
flops_homo(:) = 0
has_acc_untuned_kernel = .FALSE.
use_cpu_kernels = .FALSE.

DO i = 1, SIZE(sort_idx)
j = sort_idx(i) + 1
total = SUM(report%num_mnk_stacks(j, 4:6))
Expand All @@ -595,6 +598,10 @@ SUBROUTINE stats_print_report(report, output_unit)
has_acc_untuned_kernel = .TRUE.
END IF

IF (SUM(report%num_mnk_stacks(j, 4:5)) .GT. 0) THEN
use_cpu_kernels = .TRUE.
END IF

WRITE (output_unit, "(A,I5,' x ',I5,' x ',I5,T30,I20,5X,F5.1,'%',4X,F5.1,'%',4X,F5.1,'% ',A)") &
" flops ", report%num_mnk_stacks(j, 1:3), &
flops, &
Expand All @@ -603,7 +610,13 @@ SUBROUTINE stats_print_report(report, output_unit)
END DO

IF (has_acc_untuned_kernel) THEN
DBCSR_WARN(" (*) ACC Untuned kernels, consider to run the tuning procedure")
CALL dbcsr_warn(__LOCATION__, &
" (*) ACC Untuned kernels, consider to run the ACC tuning procedure for them")
END IF

IF (use_cpu_kernels .AND. use_acc()) THEN
CALL dbcsr_warn(__LOCATION__, &
" Some kernels are running on the CPU, consider to run the ACC tuning procedure for them")
END IF

total = report%cpu_flop + report%smm_flop + report%acc_flop
Expand Down

0 comments on commit c4f8eea

Please sign in to comment.