[Clang][XTHeadVector] add pffft example

ruyisdk · Jun 27, 2024 · a07e454 · a07e454
1 parent 590cf6e
commit a07e454
Showing 1 changed file with 39 additions and 0 deletions.
diff --git a/clang/test/CodeGen/RISCV/rvv0p71-intrinsics-handcrafted/examples/rvv-pffft.c b/clang/test/CodeGen/RISCV/rvv0p71-intrinsics-handcrafted/examples/rvv-pffft.c
@@ -0,0 +1,39 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple riscv64 -target-feature +xtheadvector \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | \
+// RUN:   opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-IR %s
+
+#include <riscv_vector.h>
+
+// https://github.com/marton78/pffft/blob/master/pffft_priv_impl.h
+// CHECK-IR-LABEL: define dso_local void @pffft_real_preprocess_4x4
+// CHECK-IR-SAME: (ptr noundef [[FS:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-IR-NEXT:  entry:
+// CHECK-IR-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.th.vle.nxv8i32.i64(<vscale x 8 x i32> poison, ptr @pffft_real_preprocess_4x4.idx_, i64 16)
+// CHECK-IR-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.th.vle.nxv2f32.i64(<vscale x 2 x float> poison, ptr [[FS]], i64 4)
+// CHECK-IR-NEXT:    [[TMP2:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv2f32(<vscale x 8 x float> poison, <vscale x 2 x float> [[TMP1]], i64 0)
+// CHECK-IR-NEXT:    [[TMP3:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv2f32(<vscale x 8 x float> [[TMP2]], <vscale x 2 x float> [[TMP1]], i64 2)
+// CHECK-IR-NEXT:    [[TMP4:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv2f32(<vscale x 8 x float> [[TMP3]], <vscale x 2 x float> [[TMP1]], i64 4)
+// CHECK-IR-NEXT:    [[TMP5:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv2f32(<vscale x 8 x float> [[TMP4]], <vscale x 2 x float> [[TMP1]], i64 6)
+// CHECK-IR-NEXT:    [[TMP6:%.*]] = call <vscale x 8 x float> @llvm.riscv.th.vrgather.vv.nxv8f32.i64(<vscale x 8 x float> poison, <vscale x 8 x float> [[TMP5]], <vscale x 8 x i32> [[TMP0]], i64 16)
+// CHECK-IR-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv8f32(<vscale x 8 x float> [[TMP6]], i64 0)
+// CHECK-IR-NEXT:    ret void
+//
+void pffft_real_preprocess_4x4(const float *fs) {
+  static const uint32_t idx_[16] = {
+    0, 4, 8, 12,
+    1, 5, 9, 13,
+    2, 6, 10, 14,
+    3, 7, 11, 15,
+  };
+  vuint32m4_t idx = __riscv_vle32_v_u32m4(idx_, 16);
+  vfloat32m1_t r0 = __riscv_vle32_v_f32m1(fs, 4);
+  vfloat32m4_t arr = __riscv_vundefined_f32m4();
+  arr = __riscv_vset_v_f32m1_f32m4(arr, 0, r0);
+  arr = __riscv_vset_v_f32m1_f32m4(arr, 1, r0);
+  arr = __riscv_vset_v_f32m1_f32m4(arr, 2, r0);
+  arr = __riscv_vset_v_f32m1_f32m4(arr, 3, r0);
+  arr = __riscv_vrgather_vv_f32m4(arr, idx, 16);
+  r0 = __riscv_vget_v_f32m4_f32m1(arr, 0);
+}