Skip to content

Commit

Permalink
Implement the int part of the gather family vendor intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
bjorn3 committed Nov 24, 2023
1 parent d5a7ae7 commit 3b49b9e
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions src/intrinsics/llvm_x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
ret.write_cvalue(fx, val);
}

"llvm.x86.avx2.gather.d.ps"
"llvm.x86.avx2.gather.d.d"
| "llvm.x86.avx2.gather.d.q"
| "llvm.x86.avx2.gather.d.ps"
| "llvm.x86.avx2.gather.d.pd"
| "llvm.x86.avx2.gather.d.d.256"
| "llvm.x86.avx2.gather.d.q.256"
| "llvm.x86.avx2.gather.d.ps.256"
| "llvm.x86.avx2.gather.d.pd.256"
| "llvm.x86.avx2.gather.q.d"
| "llvm.x86.avx2.gather.q.q"
| "llvm.x86.avx2.gather.q.ps"
| "llvm.x86.avx2.gather.q.pd"
| "llvm.x86.avx2.gather.q.d.256"
| "llvm.x86.avx2.gather.q.q.256"
| "llvm.x86.avx2.gather.q.ps.256"
| "llvm.x86.avx2.gather.q.pd.256" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818
Expand All @@ -94,10 +102,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
let (index_lane_count, index_lane_ty) = index.layout().ty.simd_size_and_type(fx.tcx);
let (mask_lane_count, mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert!(src_lane_ty.is_floating_point());
assert_eq!(src_lane_ty, ret_lane_ty);
assert!(index_lane_ty.is_integral());
assert!(mask_lane_ty.is_floating_point());
assert!(ret_lane_ty.is_floating_point());
assert_eq!(src_lane_count, mask_lane_count);
assert_eq!(src_lane_count, ret_lane_count);

Expand All @@ -122,8 +128,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);

let mask_lane = match mask_lane_clif_ty {
types::F32 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64),
types::F64 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64),
types::I32 | types::F32 => {
fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64)
}
types::I64 | types::F64 => {
fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64)
}
_ => unreachable!(),
};
fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
Expand Down

0 comments on commit 3b49b9e

Please sign in to comment.