Skip to content

Commit

Permalink
[SYCL][Matrix][E2E] Organize matrix tests (#16563)
Browse files Browse the repository at this point in the history
Move Matrix E2E test header files into the `Matrix/Inputs` folder. Also
adds that folder to the included directories by adding to the `%clangxx`
expansion in the `Matrix/lit.local.cfg` file to simplify the include
statements.
  • Loading branch information
ayylol authored Jan 23, 2025
1 parent 43ee651 commit 251eb90
Show file tree
Hide file tree
Showing 83 changed files with 111 additions and 103 deletions.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,17 @@ template <size_t TM, size_t TN, size_t TK> class MatMul;

template <
#if !defined(ARG_DIM) && !defined(RUNTIME_DIM)
size_t rowsA, size_t colsA, size_t rowsB, size_t colsB,
size_t rowsA, size_t colsA, size_t rowsB, size_t colsB,
#endif // ARG_DIM, RUNTIME_DIM
size_t vnniFactor, typename TOperand, typename TResult, size_t TM,
size_t TN, size_t TK, size_t MCache1, size_t NCache1, size_t KCache1,
size_t MCache2, size_t NCache2, size_t KCache2>
size_t vnniFactor, typename TOperand, typename TResult, size_t TM,
size_t TN, size_t TK, size_t MCache1, size_t NCache1, size_t KCache1,
size_t MCache2, size_t NCache2, size_t KCache2>
double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i
#if defined(ARG_DIM) || defined(RUNTIME_DIM)
, size_t rowsA, size_t colsA, size_t rowsB, size_t colsB
,
size_t rowsA, size_t colsA, size_t rowsB, size_t colsB
#endif // ARG_DIM, RUNTIME_DIM
) {
) {

size_t sgSize = get_sg_size<MatMul<TM, TN, TK>>(q);
range<2> global{rowsA / MCache1, (colsB / NCache1) * sgSize};
Expand Down Expand Up @@ -118,12 +119,12 @@ double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i
// pm1B and pn1B are used to identify the distribution of subgroups
// along the workgroup prefetch for B matrix. For A matrix, sgId is
// enough.
size_t pm1B = sgId / 16; // prefetch m1 (sgId/16)
size_t pn1B = sgId & 0xF; // prefetch n1 (sgId%16)
#else // VNNI
size_t pm1B = sgId / 16; // prefetch m1 (sgId/16)
size_t pn1B = sgId & 0xF; // prefetch n1 (sgId%16)
#else // VNNI
size_t pm1B = sgId / 8; // prefetch m1 (sgId/8)
size_t pn1B = sgId & 0x7; // prefetch n1 (sgId%8)
#endif // VNNI
#endif // VNNI
constexpr size_t prefDistance = 3;
for (int p = 0; p < prefDistance; p++)
joint_matrix_prefetch<prefRow, prefCol>(
Expand Down Expand Up @@ -306,8 +307,8 @@ double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i
pm1B * prefRow) *
(colsB)*vnniFactor +
(n2 * NCache2 * vnniFactor + pn1B * prefCol);
if ((prefetch_offsetB + (prefRow * colsB * vnniFactor) +
prefCol) < (rowsB * colsB))
if ((prefetch_offsetB + (prefRow * colsB * vnniFactor) + prefCol) <
(rowsB * colsB))
joint_matrix_prefetch<prefRow, prefCol>(
sg, B + prefetch_offsetB, colsB * vnniFactor,
layout::row_major,
Expand Down Expand Up @@ -395,18 +396,17 @@ void test(size_t matrix_size_input) {
// run testIterations time, aggregate and calculate average run time
double totalDuration = 0;
for (unsigned int i = 0; i < testIterations; i++) {
double duration =
joint_matmul<
double duration = joint_matmul<
#if !defined(ARG_DIM) && !defined(RUNTIME_DIM)
matrix_size, matrix_size, matrix_size, matrix_size,
matrix_size, matrix_size, matrix_size, matrix_size,
#endif // ARG_DIM, RUNTIME_DIM
vnniFactor, T, TResult, TM, TN, TK, MCache1, NCache1,
KCache1, MCache2, NCache2, KCache2>
(A, B, C, q, i
vnniFactor, T, TResult, TM, TN, TK, MCache1, NCache1, KCache1, MCache2,
NCache2, KCache2>(A, B, C, q, i
#if defined(ARG_DIM) || defined(RUNTIME_DIM)
, matrix_size, matrix_size, matrix_size, matrix_size
,
matrix_size, matrix_size, matrix_size, matrix_size
#endif // ARG_DIM, RUNTIME_DIM
);
);

if (i >= recordThresh) {
totalDuration += duration;
Expand All @@ -431,19 +431,19 @@ void test(size_t matrix_size_input) {

int main(
#ifdef RUNTIME_DIM
int argc, char *argv[]
#endif //RUNTIME_DIM
) {
int argc, char *argv[]
#endif // RUNTIME_DIM
) {

size_t matrix_size = -1;
size_t matrix_size = -1;
#ifdef RUNTIME_DIM
if (argc == 2) {
matrix_size = std::stoul(argv[1]);
} else {
std::cerr << "Usage: ./program matrix_size\n";
return 1; // Error if no argument
}
#endif //RUNTIME_DIM
#endif // RUNTIME_DIM

queue q;
std::vector<combination> combinations =
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/element_wise_abc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../element_wise_abc_impl.hpp"
#include "element_wise_abc_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../element_wise_all_ops_impl.hpp"
#include "element_wise_all_ops_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/element_wise_all_ops_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../element_wise_all_ops_half_impl.hpp"
#include "element_wise_all_ops_half_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/element_wise_all_ops_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../element_wise_all_ops_int8_impl.hpp"
#include "element_wise_all_ops_int8_impl.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

// This test stores the matrix B that is VNNIed (packed).

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../element_wise_all_ops_int8_packed_impl.hpp"
#include "element_wise_all_ops_int8_packed_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/element_wise_all_ops_tf32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32
constexpr size_t TN = 16;

#include "../element_wise_all_ops_tf32_impl.hpp"
#include "element_wise_all_ops_tf32_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/element_wise_all_sizes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../element_wise_all_sizes_impl.hpp"
#include "element_wise_all_sizes_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/element_wise_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../element_wise_ops_impl.hpp"
#include "element_wise_ops_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/get_coordinate_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../get_coordinate_ops_impl.hpp"
#include "get_coordinate_ops_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_all_sizes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_all_sizes_impl.hpp"
#include "joint_matrix_all_sizes_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_annotated_ptr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
// XFAIL: gpu
// XFAIL-TRACKER: GSD-4181

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32
constexpr size_t TN = 16;

#include "../joint_matrix_annotated_ptr_impl.hpp"
#include "joint_matrix_annotated_ptr_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_apply_bf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_apply_bf16_impl.hpp"
#include "joint_matrix_apply_bf16_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_apply_two_matrices.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// RUN: %{build} %fp-model-precise -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_apply_two_matrices_impl.hpp"
#include "joint_matrix_apply_two_matrices_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

// -ffp-model=precise is added to not depend on compiler defaults.

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bf16_fill_k_cache_impl.hpp"
#include "joint_matrix_bf16_fill_k_cache_impl.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

// -ffp-model=precise is added to not depend on compiler defaults.

#include "../common.hpp"
#include "common.hpp"
#define SG_SZ 32

#include "../joint_matrix_bf16_fill_k_cache_impl.hpp"
#include "joint_matrix_bf16_fill_k_cache_impl.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

// -ffp-model=precise is added to not depend on compiler defaults.

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bf16_fill_k_cache_impl.hpp"
#include "joint_matrix_bf16_fill_k_cache_impl.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
// since IGC doesn't support some variants of IR for Joint Matrix currently
// -ffp-model=precise is added to not depend on compiler defaults.

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bf16_fill_k_cache_impl.hpp"
#include "joint_matrix_bf16_fill_k_cache_impl.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
// since IGC doesn't support some variants of IR for Joint Matrix currently
// -ffp-model=precise is added to not depend on compiler defaults.

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bf16_fill_k_cache_impl.hpp"
#include "joint_matrix_bf16_fill_k_cache_impl.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bf16_rowmajorB_load_store_impl.hpp"
#include "joint_matrix_bf16_rowmajorB_load_store_impl.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp"
#include "joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bfloat16_impl.hpp"
#include "joint_matrix_bfloat16_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_bfloat16_array_impl.hpp"
#include "joint_matrix_bfloat16_array_impl.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@
// XFAIL: gpu
// XFAIL-TRACKER: GSD-5768

#include "../common.hpp"
#include "common.hpp"
#define SG_SZ 32
#include "../joint_matrix_bfloat16_colmajorA_colmajorB_impl.hpp"
#include "joint_matrix_bfloat16_colmajorA_colmajorB_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_bfloat16_packedB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
// XFAIL: gpu
// XFAIL-TRACKER: GSD-4181

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32
#include "../joint_matrix_bfloat16_packedB_impl.hpp"
#include "joint_matrix_bfloat16_packedB_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_colA_rowB_colC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
// XFAIL: gpu && run-mode
// XFAIL-TRACKER: GSD-5768

#include "../common.hpp"
#include "common.hpp"
#define SG_SZ 32
#include "../joint_matrix_colA_rowB_colC_impl.hpp"
#include "joint_matrix_colA_rowB_colC_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_down_convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_down_convert_impl.hpp"
#include "joint_matrix_down_convert_impl.hpp"
4 changes: 2 additions & 2 deletions sycl/test-e2e/Matrix/SG32/joint_matrix_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include "../common.hpp"
#include "common.hpp"

#define SG_SZ 32

#include "../joint_matrix_half_impl.hpp"
#include "joint_matrix_half_impl.hpp"
Loading

0 comments on commit 251eb90

Please sign in to comment.