Skip to content

Commit

Permalink
fix sanitizer error
Browse files Browse the repository at this point in the history
  • Loading branch information
xujuntwt95329 committed Nov 19, 2024
1 parent 0b62021 commit 916717b
Show file tree
Hide file tree
Showing 10 changed files with 6,036 additions and 5,375 deletions.
786 changes: 422 additions & 364 deletions src/qs8-packw/gen/qs8-packw-x16c4-gemm-gio-scalar.c

Large diffs are not rendered by default.

1,618 changes: 838 additions & 780 deletions src/qs8-packw/gen/qs8-packw-x16c8-gemm-gio-scalar.c

Large diffs are not rendered by default.

1,620 changes: 871 additions & 749 deletions src/qs8-packw/gen/qs8-packw-x32c4-gemm-gio-scalar.c

Large diffs are not rendered by default.

3,284 changes: 1,767 additions & 1,517 deletions src/qs8-packw/gen/qs8-packw-x64c4-gemm-gio-scalar.c

Large diffs are not rendered by default.

370 changes: 198 additions & 172 deletions src/qs8-packw/gen/qs8-packw-x8c4-gemm-gio-scalar.c

Large diffs are not rendered by default.

754 changes: 390 additions & 364 deletions src/qs8-packw/gen/qs8-packw-x8c8-gemm-gio-scalar.c

Large diffs are not rendered by default.

1,618 changes: 838 additions & 780 deletions src/qs8-qu8-packw/gen/qs8-qu8-packw-x16c8-gemm-gio-scalar.c

Large diffs are not rendered by default.

754 changes: 390 additions & 364 deletions src/qs8-qu8-packw/gen/qs8-qu8-packw-x8c8-gemm-gio-scalar.c

Large diffs are not rendered by default.

558 changes: 292 additions & 266 deletions src/x8-packw/gen/x8-packw-x8c8-gemm-gio-scalar.c

Large diffs are not rendered by default.

49 changes: 30 additions & 19 deletions src/x8-packw/kr-gio-scalar.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,23 @@ void xnn_${DATATYPE.lower()}${"_to_qu8" if IZP == 128 else ""}_packw_gemm_gio_uk
// KC main loop multiple of ${NR}x${KR}
size_t k = kc;
for (; k >= ${KR}; k -= ${KR}) {
$for N in range(NR-1):
$for K in range(KR):
const ${TYPE} v${K}x${N} = w${K}[${N}];
$for K in range(KR):
$if DATATYPE in ["QS8"]:
ksum${N} += (uint32_t) v${K}x${N};
$for K in range(KR):
out[${N*KR+K}] = v${K}x${N};
$for K in range(KR):
const ${TYPE} v${K}x0 = w${K}[0];
$for K in range(KR):
$if DATATYPE in ["QS8"]:
ksum0 += (uint32_t) v${K}x0;
$for K in range(KR):
out[${K}] = v${K}x0;
$for N in range(1, NR-1):
if (${N} < n) {
$for K in range(KR):
const ${TYPE} v${K}x${N} = w${K}[${N}];
$for K in range(KR):
$if DATATYPE in ["QS8"]:
ksum${N} += (uint32_t) v${K}x${N};
$for K in range(KR):
out[${N*KR+K}] = v${K}x${N};
}
$for K in range(KR):
w${K} += ${KR} * k_stride;
out += ${NR*KR};
Expand All @@ -177,17 +186,19 @@ void xnn_${DATATYPE.lower()}${"_to_qu8" if IZP == 128 else ""}_packw_gemm_gio_uk
if (k != 0) {
assert(k >= 1 && k <= ${KR-1});
$for N in range(NR-1):
const ${TYPE} v0x${N} = w0[${N}];
$if DATATYPE in ["QS8"]:
ksum${N} += (uint32_t) v0x${N};
out[${N*KR}] = v0x${N};
$for K in range(1, KR):
if (${K} < k) {
const ${TYPE} v${K}x${N} = w${K}[${N}];
$if DATATYPE in ["QS8"]:
ksum${N} += (uint32_t) v${K}x${N};
out[${N*KR+K}] = v${K}x${N};
}
if (${N} < n) {
const ${TYPE} v0x${N} = w0[${N}];
$if DATATYPE in ["QS8"]:
ksum${N} += (uint32_t) v0x${N};
out[${N*KR}] = v0x${N};
$for K in range(1, KR):
if (${K} < k) {
const ${TYPE} v${K}x${N} = w${K}[${N}];
$if DATATYPE in ["QS8"]:
ksum${N} += (uint32_t) v${K}x${N};
out[${N*KR+K}] = v${K}x${N};
}
}
$for K in range(KR):
w${K} += k * k_stride;
out += ${NR*KR};
Expand Down

0 comments on commit 916717b

Please sign in to comment.