diff --git a/README.md b/README.md index 71d3ec3e..9c6f3b30 100644 --- a/README.md +++ b/README.md @@ -26,10 +26,6 @@ Vector Set Equal/Less than or Equal/Less Than Vector Subtract -### vaddwev.h.b/w.h/d.w/q.d - -Vector Add with Width Extension - ### vsubwev.h.b/w.h/d.w/q.d Vector Subtract with Width Extension @@ -37,10 +33,6 @@ Vector Subtract with Width Extension ### vaddwod.h.b/w.h/d.w/q.d ### vsubwod.h.b/w.h/d.w/q.d -### vaddwev.h.bu/w.hu/d.wu/q.du - -Vector Add with Width Extension Unsigned - ### vsubwev.h.bu/w.hu/d.wu/q.du Vector Subtract with Width Extension Unsigned @@ -48,7 +40,6 @@ Vector Subtract with Width Extension Unsigned ### vaddwod.h.bu/w.hu/d.wu/q.du ### vsubwod.h.bu/w.hu/d.wu/q.du -### vaddwev.h.bu.b/w.hu.h/q.wu.w/q.du.d ### vaddwod.h.bu.b/w.hu.h/q.wu.w/q.du.d ### vsadd.b/h/w/d @@ -260,8 +251,6 @@ Vector Multiplication High ### vslti.b/h/w/d/bu/hu/wu/du -### vaddi.bu/hu/wu/du - ### vsubi.bu/hu/wu/du ### vbsll.v diff --git a/code/common.h b/code/common.h index d1d2078d..e6fd4728 100644 --- a/code/common.h +++ b/code/common.h @@ -13,6 +13,8 @@ typedef int32_t s32; typedef uint32_t u32; typedef int64_t s64; typedef uint64_t u64; +typedef __int128 s128; +typedef unsigned __int128 u128; #ifndef MACHINE_3C5000 #define MACHINE_3C5000 0 diff --git a/code/vaddwev_d_w.cpp b/code/vaddwev_d_w.cpp new file mode 100644 index 00000000..57e24c92 --- /dev/null +++ b/code/vaddwev_d_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vaddwev_d_w(v128 a, v128 b) { + v128 dst; +#include "vaddwev_d_w.h" + return dst; +} + +void test() { FUZZ2(vaddwev_d_w); } diff --git a/code/vaddwev_d_w.h b/code/vaddwev_d_w.h new file mode 100644 index 00000000..2d7ea9c7 --- /dev/null +++ b/code/vaddwev_d_w.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.dword[i] = (s64)(s32)a.word[2 * i] + (s64)(s32)b.word[2 * i]; +} diff --git a/code/vaddwev_d_wu.cpp b/code/vaddwev_d_wu.cpp new file mode 100644 index 00000000..8c19ed3b --- /dev/null +++ b/code/vaddwev_d_wu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vaddwev_d_wu(v128 a, v128 b) { + v128 dst; +#include "vaddwev_d_wu.h" + return dst; +} + +void test() { FUZZ2(vaddwev_d_wu); } diff --git a/code/vaddwev_d_wu.h b/code/vaddwev_d_wu.h new file mode 100644 index 00000000..7957d286 --- /dev/null +++ b/code/vaddwev_d_wu.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.dword[i] = (u64)(u32)a.word[2 * i] + (u64)(u32)b.word[2 * i]; +} diff --git a/code/vaddwev_d_wu_w.cpp b/code/vaddwev_d_wu_w.cpp new file mode 100644 index 00000000..b2a67268 --- /dev/null +++ b/code/vaddwev_d_wu_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vaddwev_d_wu_w(v128 a, v128 b) { + v128 dst; +#include "vaddwev_d_wu_w.h" + return dst; +} + +void test() { FUZZ2(vaddwev_d_wu_w); } diff --git a/code/vaddwev_d_wu_w.h b/code/vaddwev_d_wu_w.h new file mode 100644 index 00000000..5359ea65 --- /dev/null +++ b/code/vaddwev_d_wu_w.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.dword[i] = (u64)(u32)a.word[2 * i] + (s64)(s32)b.word[2 * i]; +} diff --git a/code/vaddwev_q_d.cpp b/code/vaddwev_q_d.cpp new file mode 100644 index 00000000..475f7231 --- /dev/null +++ b/code/vaddwev_q_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vaddwev_q_d(v128 a, v128 b) { + v128 dst; +#include "vaddwev_q_d.h" + return dst; +} + +void test() { FUZZ2(vaddwev_q_d); } diff --git a/code/vaddwev_q_d.h b/code/vaddwev_q_d.h new file mode 100644 index 00000000..afdaf5c8 --- /dev/null +++ b/code/vaddwev_q_d.h @@ -0,0 +1 @@ +dst.qword[0] = (s128)(s64)a.dword[0] + (s128)(s64)b.dword[0]; diff --git a/code/vaddwev_q_du.cpp b/code/vaddwev_q_du.cpp new file mode 100644 index 00000000..fadb6719 --- /dev/null +++ b/code/vaddwev_q_du.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vaddwev_q_du(v128 a, v128 b) { + v128 dst; +#include "vaddwev_q_du.h" + return dst; +} + +void test() { FUZZ2(vaddwev_q_du); } diff --git a/code/vaddwev_q_du.h b/code/vaddwev_q_du.h new file mode 100644 index 00000000..0c7d6e4b --- /dev/null +++ b/code/vaddwev_q_du.h @@ -0,0 +1 @@ +dst.qword[0] = (u128)(u64)a.dword[0] + (u128)(u64)b.dword[0]; diff --git a/code/vaddwev_q_du_d.cpp b/code/vaddwev_q_du_d.cpp new file mode 100644 index 00000000..054970e3 --- /dev/null +++ b/code/vaddwev_q_du_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vaddwev_q_du_d(v128 a, v128 b) { + v128 dst; +#include "vaddwev_q_du_d.h" + return dst; +} + +void test() { FUZZ2(vaddwev_q_du_d); } diff --git a/code/vaddwev_q_du_d.h b/code/vaddwev_q_du_d.h new file mode 100644 index 00000000..00aa6ed4 --- /dev/null +++ b/code/vaddwev_q_du_d.h @@ -0,0 +1 @@ +dst.qword[0] = (u128)(u64)a.dword[0] + (s128)(s64)b.dword[0]; diff --git a/code/vaddwev_w_h.cpp b/code/vaddwev_w_h.cpp new file mode 100644 index 00000000..fc15bdc2 --- /dev/null +++ b/code/vaddwev_w_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vaddwev_w_h(v128 a, v128 b) { + v128 dst; +#include "vaddwev_w_h.h" + return dst; +} + +void test() { FUZZ2(vaddwev_w_h); } diff --git a/code/vaddwev_w_h.h b/code/vaddwev_w_h.h new file mode 100644 index 00000000..a7e259f3 --- /dev/null +++ b/code/vaddwev_w_h.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 4;i++) { + dst.word[i] = (s32)(s16)a.half[2 * i] + (s32)(s16)b.half[2 * i]; +} diff --git a/code/vaddwev_w_hu.cpp b/code/vaddwev_w_hu.cpp new file mode 100644 index 00000000..85247b68 --- /dev/null +++ b/code/vaddwev_w_hu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vaddwev_w_hu(v128 a, v128 b) { + v128 dst; +#include "vaddwev_w_hu.h" + return dst; +} + +void test() { FUZZ2(vaddwev_w_hu); } diff --git a/code/vaddwev_w_hu.h b/code/vaddwev_w_hu.h new file mode 100644 index 00000000..5a13d045 --- /dev/null +++ b/code/vaddwev_w_hu.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 4;i++) { + dst.word[i] = (u32)(u16)a.half[2 * i] + (u32)(u16)b.half[2 * i]; +} diff --git a/code/vaddwev_w_hu_h.cpp b/code/vaddwev_w_hu_h.cpp new file mode 100644 index 00000000..e8f7f91b --- /dev/null +++ b/code/vaddwev_w_hu_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vaddwev_w_hu_h(v128 a, v128 b) { + v128 dst; +#include "vaddwev_w_hu_h.h" + return dst; +} + +void test() { FUZZ2(vaddwev_w_hu_h); } diff --git a/code/vaddwev_w_hu_h.h b/code/vaddwev_w_hu_h.h new file mode 100644 index 00000000..62b0421c --- /dev/null +++ b/code/vaddwev_w_hu_h.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 4;i++) { + dst.word[i] = (u32)(u16)a.half[2 * i] + (s32)(s16)b.half[2 * i]; +} diff --git a/docs/lsx_integer/vaddsub.md b/docs/lsx_integer/vaddsub.md index f91d42c7..fb9118e2 100644 --- a/docs/lsx_integer/vaddsub.md +++ b/docs/lsx_integer/vaddsub.md @@ -23,4 +23,17 @@ {{ vaddi('bu') }} {{ vaddi('hu') }} {{ vaddi('wu') }} -{{ vaddi('du') }} \ No newline at end of file +{{ vaddi('du') }} + +{{ vaddwev('h', 'b') }} +{{ vaddwev('h', 'bu') }} +{{ vaddwev('h', 'bu', 'b') }} +{{ vaddwev('w', 'h') }} +{{ vaddwev('w', 'hu') }} +{{ vaddwev('w', 'hu', 'h') }} +{{ vaddwev('d', 'w') }} +{{ vaddwev('d', 'wu') }} +{{ vaddwev('d', 'wu', 'w') }} +{{ vaddwev('q', 'd') }} +{{ vaddwev('q', 'du') }} +{{ vaddwev('q', 'du', 'd') }} \ No newline at end of file diff --git a/main.py b/main.py index b7e6338f..f296c337 100644 --- a/main.py +++ b/main.py @@ -98,6 +98,25 @@ def vaddi(name): desc=f"Add {width}-bit elements in `a` and `imm`, save the result in `dst`.", ) + @env.macro + def vaddwev(wide, narrow, narrow2=None): + wide_width = widths[wide] + if narrow2 is None: + narrow2 = narrow + inst_suffix = "" + intrinsic_suffix = "" + else: + inst_suffix = f".{narrow2}" + intrinsic_suffix = f"_{narrow2}" + narrow_width = widths[narrow] + signedness = signednesses[narrow] + signedness2 = signednesses[narrow2] + return instruction( + intrinsic=f"__m128i __lsx_vaddwev_{wide}_{narrow}{intrinsic_suffix} (__m128i a, __m128i b)", + instr=f"vaddwev.{wide}.{narrow}{inst_suffix} vr, vr, vr", + desc=f"Add even-positioned {signedness} {narrow_width}-bit elements in `a` and {signedness2} elements in `b`, save the {wide_width}-bit result in `dst`.", + ) + @env.macro def vshuf_hwd(name): width = widths[name]