Skip to content

Commit

Permalink
Merge pull request #151 from jti-lanl/omp-vec
Browse files Browse the repository at this point in the history
Allow gcc to vectorize gather_smallbuf() in openmp_kernels
  • Loading branch information
jyoung3131 authored Aug 18, 2023
2 parents 5fb07d2 + accaa80 commit 8f6384a
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions src/openmp/openmp_kernels.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ void gather_smallbuf(
size_t delta,
size_t n,
size_t target_len) {

#ifdef __GNUC__
#pragma omp parallel
#else
Expand All @@ -311,19 +312,21 @@ void gather_smallbuf(

#ifdef __CRAYC__
#pragma concurrent
#endif
#ifdef __INTEL_COMPILER
#elif defined __INTEL_COMPILER
#pragma ivdep
#endif

#pragma omp for
for (size_t i = 0; i < n; i++) {
sgData_t *sl = source + delta * i;
sgData_t *tl = target[t] + pat_len*(i%target_len);

#ifdef __CRAYC__
#pragma concurrent
#endif
#if defined __CRAYC__ || defined __INTEL_COMPILER
#elif defined __CRAYC__ || defined __INTEL_COMPILER
#pragma vector always,unaligned
#elif defined __GNUC__
#pragma omp simd // or: #pragma GCC ivdep
#endif
for (size_t j = 0; j < pat_len; j++) {
tl[j] = sl[pat[j]];
Expand Down

0 comments on commit 8f6384a

Please sign in to comment.