From 8f819530dcbc4277fa58f58301a83f61827eb1c5 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Thu, 14 Dec 2023 02:43:17 +0800 Subject: [PATCH] Fix xvssran/xvssrln --- code/gen_impl.py | 71 ++++++++++++++++++++++++++++++++++----------- code/xvssran_b_h.h | 12 ++++++-- code/xvssran_bu_h.h | 12 ++++++-- code/xvssran_h_w.h | 12 ++++++-- code/xvssran_hu_w.h | 12 ++++++-- code/xvssran_w_d.h | 12 ++++++-- code/xvssran_wu_d.h | 12 ++++++-- code/xvssrln_b_h.h | 12 ++++++-- code/xvssrln_bu_h.h | 12 ++++++-- code/xvssrln_h_w.h | 12 ++++++-- code/xvssrln_hu_w.h | 12 ++++++-- code/xvssrln_w_d.h | 12 ++++++-- code/xvssrln_wu_d.h | 12 ++++++-- 13 files changed, 174 insertions(+), 41 deletions(-) diff --git a/code/gen_impl.py b/code/gen_impl.py index 385a41cb..eea6ae36 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -239,23 +239,60 @@ with open( f"{prefix}s{name}n_{width}_{double_width_signed}.h", "w" ) as f: - print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f) - print(f"if (i < {vlen // 2 // w}) {{", file=f) - print( - f" {shift_sign}{double_w} temp = ({shift_sign}{double_w})a.{double_m}[i] >> (b.{double_m}[i] & {double_w-1});", - file=f, - ) - print( - f" dst.{m}[i] = clamp<{shift_sign}{double_w}>(temp, {min}, {max});", - file=f, - ) - print(f"}} else {{", file=f) - print( - f" dst.{m}[i] = 0;", - file=f, - ) - print(f"}}", file=f) - print(f"}}", file=f) + if prefix == "v": + print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f) + print(f"if (i < {vlen // 2 // w}) {{", file=f) + print( + f" {shift_sign}{double_w} temp = ({shift_sign}{double_w})a.{double_m}[i] >> (b.{double_m}[i] & {double_w-1});", + file=f, + ) + print( + f" dst.{m}[i] = clamp<{shift_sign}{double_w}>(temp, {min}, {max});", + file=f, + ) + print(f"}} else {{", file=f) + print( + f" dst.{m}[i] = 0;", + file=f, + ) + print(f"}}", file=f) + print(f"}}", file=f) + else: + print(f"for (int i = 0;i < {vlen // 2 // w};i++) {{", file=f) + print(f"if (i < {vlen // 4 // w}) {{", file=f) + print( + f" {shift_sign}{double_w} temp = ({shift_sign}{double_w})a.{double_m}[i] >> (b.{double_m}[i] & {double_w-1});", + file=f, + ) + print( + f" dst.{m}[i] = clamp<{shift_sign}{double_w}>(temp, {min}, {max});", + file=f, + ) + print(f"}} else {{", file=f) + print( + f" dst.{m}[i] = 0;", + file=f, + ) + print(f"}}", file=f) + print(f"}}", file=f) + + print(f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f) + print(f"if (i < {3 * vlen // 4 // w}) {{", file=f) + print( + f" {shift_sign}{double_w} temp = ({shift_sign}{double_w})a.{double_m}[i - {vlen // 4 // w}] >> (b.{double_m}[i - {vlen // 4 // w}] & {double_w-1});", + file=f, + ) + print( + f" dst.{m}[i] = clamp<{shift_sign}{double_w}>(temp, {min}, {max});", + file=f, + ) + print(f"}} else {{", file=f) + print( + f" dst.{m}[i] = 0;", + file=f, + ) + print(f"}}", file=f) + print(f"}}", file=f) with open( f"{prefix}s{name}rn_{width}_{double_width_signed}.h", "w" ) as f: diff --git a/code/xvssran_b_h.h b/code/xvssran_b_h.h index fac704cc..75f383a4 100644 --- a/code/xvssran_b_h.h +++ b/code/xvssran_b_h.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 32; i++) { - if (i < 16) { +for (int i = 0; i < 16; i++) { + if (i < 8) { s16 temp = (s16)a.half[i] >> (b.half[i] & 15); dst.byte[i] = clamp(temp, -128, 127); } else { dst.byte[i] = 0; } } +for (int i = 16; i < 32; i++) { + if (i < 24) { + s16 temp = (s16)a.half[i - 8] >> (b.half[i - 8] & 15); + dst.byte[i] = clamp(temp, -128, 127); + } else { + dst.byte[i] = 0; + } +} diff --git a/code/xvssran_bu_h.h b/code/xvssran_bu_h.h index 547a2ad5..8b1914c6 100644 --- a/code/xvssran_bu_h.h +++ b/code/xvssran_bu_h.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 32; i++) { - if (i < 16) { +for (int i = 0; i < 16; i++) { + if (i < 8) { s16 temp = (s16)a.half[i] >> (b.half[i] & 15); dst.byte[i] = clamp(temp, 0, 255); } else { dst.byte[i] = 0; } } +for (int i = 16; i < 32; i++) { + if (i < 24) { + s16 temp = (s16)a.half[i - 8] >> (b.half[i - 8] & 15); + dst.byte[i] = clamp(temp, 0, 255); + } else { + dst.byte[i] = 0; + } +} diff --git a/code/xvssran_h_w.h b/code/xvssran_h_w.h index fc4c47af..ed27287f 100644 --- a/code/xvssran_h_w.h +++ b/code/xvssran_h_w.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 16; i++) { - if (i < 8) { +for (int i = 0; i < 8; i++) { + if (i < 4) { s32 temp = (s32)a.word[i] >> (b.word[i] & 31); dst.half[i] = clamp(temp, -32768, 32767); } else { dst.half[i] = 0; } } +for (int i = 8; i < 16; i++) { + if (i < 12) { + s32 temp = (s32)a.word[i - 4] >> (b.word[i - 4] & 31); + dst.half[i] = clamp(temp, -32768, 32767); + } else { + dst.half[i] = 0; + } +} diff --git a/code/xvssran_hu_w.h b/code/xvssran_hu_w.h index f99435fb..c3568676 100644 --- a/code/xvssran_hu_w.h +++ b/code/xvssran_hu_w.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 16; i++) { - if (i < 8) { +for (int i = 0; i < 8; i++) { + if (i < 4) { s32 temp = (s32)a.word[i] >> (b.word[i] & 31); dst.half[i] = clamp(temp, 0, 65535); } else { dst.half[i] = 0; } } +for (int i = 8; i < 16; i++) { + if (i < 12) { + s32 temp = (s32)a.word[i - 4] >> (b.word[i - 4] & 31); + dst.half[i] = clamp(temp, 0, 65535); + } else { + dst.half[i] = 0; + } +} diff --git a/code/xvssran_w_d.h b/code/xvssran_w_d.h index 9108b1a4..92562f55 100644 --- a/code/xvssran_w_d.h +++ b/code/xvssran_w_d.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 8; i++) { - if (i < 4) { +for (int i = 0; i < 4; i++) { + if (i < 2) { s64 temp = (s64)a.dword[i] >> (b.dword[i] & 63); dst.word[i] = clamp(temp, -2147483648, 2147483647); } else { dst.word[i] = 0; } } +for (int i = 4; i < 8; i++) { + if (i < 6) { + s64 temp = (s64)a.dword[i - 2] >> (b.dword[i - 2] & 63); + dst.word[i] = clamp(temp, -2147483648, 2147483647); + } else { + dst.word[i] = 0; + } +} diff --git a/code/xvssran_wu_d.h b/code/xvssran_wu_d.h index d939ff03..56802a92 100644 --- a/code/xvssran_wu_d.h +++ b/code/xvssran_wu_d.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 8; i++) { - if (i < 4) { +for (int i = 0; i < 4; i++) { + if (i < 2) { s64 temp = (s64)a.dword[i] >> (b.dword[i] & 63); dst.word[i] = clamp(temp, 0, 4294967295); } else { dst.word[i] = 0; } } +for (int i = 4; i < 8; i++) { + if (i < 6) { + s64 temp = (s64)a.dword[i - 2] >> (b.dword[i - 2] & 63); + dst.word[i] = clamp(temp, 0, 4294967295); + } else { + dst.word[i] = 0; + } +} diff --git a/code/xvssrln_b_h.h b/code/xvssrln_b_h.h index 33172401..6919f864 100644 --- a/code/xvssrln_b_h.h +++ b/code/xvssrln_b_h.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 32; i++) { - if (i < 16) { +for (int i = 0; i < 16; i++) { + if (i < 8) { u16 temp = (u16)a.half[i] >> (b.half[i] & 15); dst.byte[i] = clamp(temp, 0, 127); } else { dst.byte[i] = 0; } } +for (int i = 16; i < 32; i++) { + if (i < 24) { + u16 temp = (u16)a.half[i - 8] >> (b.half[i - 8] & 15); + dst.byte[i] = clamp(temp, 0, 127); + } else { + dst.byte[i] = 0; + } +} diff --git a/code/xvssrln_bu_h.h b/code/xvssrln_bu_h.h index 9be937d0..ef433475 100644 --- a/code/xvssrln_bu_h.h +++ b/code/xvssrln_bu_h.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 32; i++) { - if (i < 16) { +for (int i = 0; i < 16; i++) { + if (i < 8) { u16 temp = (u16)a.half[i] >> (b.half[i] & 15); dst.byte[i] = clamp(temp, 0, 255); } else { dst.byte[i] = 0; } } +for (int i = 16; i < 32; i++) { + if (i < 24) { + u16 temp = (u16)a.half[i - 8] >> (b.half[i - 8] & 15); + dst.byte[i] = clamp(temp, 0, 255); + } else { + dst.byte[i] = 0; + } +} diff --git a/code/xvssrln_h_w.h b/code/xvssrln_h_w.h index 870d96ee..136e0d8c 100644 --- a/code/xvssrln_h_w.h +++ b/code/xvssrln_h_w.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 16; i++) { - if (i < 8) { +for (int i = 0; i < 8; i++) { + if (i < 4) { u32 temp = (u32)a.word[i] >> (b.word[i] & 31); dst.half[i] = clamp(temp, 0, 32767); } else { dst.half[i] = 0; } } +for (int i = 8; i < 16; i++) { + if (i < 12) { + u32 temp = (u32)a.word[i - 4] >> (b.word[i - 4] & 31); + dst.half[i] = clamp(temp, 0, 32767); + } else { + dst.half[i] = 0; + } +} diff --git a/code/xvssrln_hu_w.h b/code/xvssrln_hu_w.h index 27a7bdcc..2afe20ce 100644 --- a/code/xvssrln_hu_w.h +++ b/code/xvssrln_hu_w.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 16; i++) { - if (i < 8) { +for (int i = 0; i < 8; i++) { + if (i < 4) { u32 temp = (u32)a.word[i] >> (b.word[i] & 31); dst.half[i] = clamp(temp, 0, 65535); } else { dst.half[i] = 0; } } +for (int i = 8; i < 16; i++) { + if (i < 12) { + u32 temp = (u32)a.word[i - 4] >> (b.word[i - 4] & 31); + dst.half[i] = clamp(temp, 0, 65535); + } else { + dst.half[i] = 0; + } +} diff --git a/code/xvssrln_w_d.h b/code/xvssrln_w_d.h index 6cf2000a..6a74bd45 100644 --- a/code/xvssrln_w_d.h +++ b/code/xvssrln_w_d.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 8; i++) { - if (i < 4) { +for (int i = 0; i < 4; i++) { + if (i < 2) { u64 temp = (u64)a.dword[i] >> (b.dword[i] & 63); dst.word[i] = clamp(temp, 0, 2147483647); } else { dst.word[i] = 0; } } +for (int i = 4; i < 8; i++) { + if (i < 6) { + u64 temp = (u64)a.dword[i - 2] >> (b.dword[i - 2] & 63); + dst.word[i] = clamp(temp, 0, 2147483647); + } else { + dst.word[i] = 0; + } +} diff --git a/code/xvssrln_wu_d.h b/code/xvssrln_wu_d.h index 81790d15..ab2146aa 100644 --- a/code/xvssrln_wu_d.h +++ b/code/xvssrln_wu_d.h @@ -1,8 +1,16 @@ -for (int i = 0; i < 8; i++) { - if (i < 4) { +for (int i = 0; i < 4; i++) { + if (i < 2) { u64 temp = (u64)a.dword[i] >> (b.dword[i] & 63); dst.word[i] = clamp(temp, 0, 4294967295); } else { dst.word[i] = 0; } } +for (int i = 4; i < 8; i++) { + if (i < 6) { + u64 temp = (u64)a.dword[i - 2] >> (b.dword[i - 2] & 63); + dst.word[i] = clamp(temp, 0, 4294967295); + } else { + dst.word[i] = 0; + } +}