Skip to content

Commit

Permalink
powerpc: Cleanup: use actual power8 assembly mnemonics
Browse files Browse the repository at this point in the history
Some implementations in sysdeps/powerpc/powerpc64/power8/*.S still had
pre power8 compatible binutils hardcoded macros and were not using
.machine power8.

This patch should not have semantic changes, in fact it should have the
same exact code generated.

Tested that generated stripped shared objects are identical when
using "strip --remove-section=.note.gnu.build-id".

Checked on:
- powerpc64le, power9, build-many-glibcs.py, gcc 6.4.1 20180104, binutils 2.26.2.20160726
- powerpc64le, power8, debian 9, gcc 6.3.0 20170516, binutils 2.28
- powerpc64le, power9, ubuntu 19.04, gcc 8.3.0, binutils 2.32
- powerpc64le, power9, opensuse tumbleweed, gcc 9.1.1 20190527, binutils 2.32
- powerpc64, power9, debian 10, gcc 8.3.0, binutils 2.31.1

Reviewed-by: Adhemerval Zanella <[email protected]>
Reviewed-by: Gabriel F. T. Gomes <[email protected]>
  • Loading branch information
Raoni Fassina Firmino authored and inconstante committed Aug 1, 2019
1 parent 3175dcc commit 066020c
Show file tree
Hide file tree
Showing 12 changed files with 102 additions and 187 deletions.
15 changes: 15 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
2019-08-01 Raoni Fassina Firmino <[email protected]>

* sysdeps/powerpc/powerpc64/power8/memchr.S: Update power8
mnemonics and set .machine power8.
* sysdeps/powerpc/powerpc64/power8/memcmp.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/memrchr.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/memset.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/strchr.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/strlen.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/strncmp.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/strncpy.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/strnlen.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/strrchr.S: Likewise.
* sysdeps/powerpc/powerpc64/power8/strspn.S: Likewise.

2019-08-01 Adhemerval Zanella <[email protected]>

* sysdeps/hppa/fpu/libm-test-ulps: Update.
Expand Down
31 changes: 10 additions & 21 deletions sysdeps/powerpc/powerpc64/power8/memchr.S
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,10 @@

/* void *[r3] memchr (const void *s [r3], int c [r4], size_t n [r5]) */

/* TODO: change these to the actual instructions when the minimum required
binutils allows it. */
#define MTVRD(v, r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
#define MFVRD(r, v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
#define VBPERMQ(t, a, b) .long (0x1000054c \
| ((t)<<(32-11)) \
| ((a)<<(32-16)) \
| ((b)<<(32-21)) )

#ifndef MEMCHR
# define MEMCHR __memchr
#endif
/* TODO: change this to .machine power8 when the minimum required binutils
allows it. */
.machine power7
.machine power8
ENTRY_TOCLESS (MEMCHR)
CALL_MCOUNT 3
dcbt 0, r3
Expand Down Expand Up @@ -97,7 +86,7 @@ L(align_qw):
li r0, 0
lvsl v11, r0, r0
vslb v10, v11, v10
MTVRD(v1, r4)
mtvrd v1, r4
vspltb v1, v1, 7
cmpldi r5, 64
ble L(tail64)
Expand Down Expand Up @@ -210,10 +199,10 @@ L(tail64):
.align 4
L(found):
/* Permute the first bit of each byte into bits 48-63. */
VBPERMQ(v6, v6, v10)
VBPERMQ(v7, v7, v10)
VBPERMQ(v8, v8, v10)
VBPERMQ(v9, v9, v10)
vbpermq v6, v6, v10
vbpermq v7, v7, v10
vbpermq v8, v8, v10
vbpermq v9, v9, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v7, v7, v7, 2
Expand All @@ -228,7 +217,7 @@ L(found):
vor v11, v6, v7
vor v4, v9, v8
vor v4, v11, v4
MFVRD(r5, v4)
mfvrd r5, v4
#ifdef __LITTLE_ENDIAN__
addi r6, r5, -1
andc r6, r6, r5
Expand All @@ -243,16 +232,16 @@ L(found):
.align 4
L(found_16B):
/* Permute the first bit of each byte into bits 48-63. */
VBPERMQ(v6, v6, v10)
vbpermq v6, v6, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
MFVRD(r7, v6)
mfvrd r7, v6
addi r6, r7, -1
andc r6, r6, r7
popcntd r6, r6
#else
vsldoi v6, v6, v6, 6
MFVRD(r7, v6)
mfvrd r7, v6
cntlzd r6, r7 /* Count leading zeros before the match. */
#endif
add r3, r8, r6 /* Compute final length. */
Expand Down
21 changes: 9 additions & 12 deletions sysdeps/powerpc/powerpc64/power8/memcmp.S
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,10 @@
const char *s2 [r4],
size_t size [r5]) */

/* TODO: change these to the actual instructions when the minimum required
binutils allows it. */
#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
#ifndef MEMCMP
# define MEMCMP memcmp
#endif
.machine power7
.machine power8
ENTRY_TOCLESS (MEMCMP, 4)
CALL_MCOUNT 3

Expand Down Expand Up @@ -231,15 +228,15 @@ L(different2):
vperm v4, v4, v0, v9
vperm v5, v5, v0, v9
#endif
MFVRD(r7, v4)
MFVRD(r9, v5)
mfvrd r7, v4
mfvrd r9, v5
cmpld cr6, r7, r9
bne cr6, L(ret_diff)
/* Difference in second DW. */
vsldoi v4, v4, v4, 8
vsldoi v5, v5, v5, 8
MFVRD(r7, v4)
MFVRD(r9, v5)
mfvrd r7, v4
mfvrd r9, v5
cmpld cr6, r7, r9
L(ret_diff):
li rRTN, 1
Expand All @@ -256,15 +253,15 @@ L(different3):
vperm v6, v6, v0, v9
vperm v8, v8, v0, v9
#endif
MFVRD(r7, v6)
MFVRD(r9, v8)
mfvrd r7, v6
mfvrd r9, v8
cmpld cr6, r7, r9
bne cr6, L(ret_diff)
/* Difference in second DW. */
vsldoi v6, v6, v6, 8
vsldoi v8, v8, v8, 8
MFVRD(r7, v6)
MFVRD(r9, v8)
mfvrd r7, v6
mfvrd r9, v8
cmpld cr6, r7, r9
li rRTN, 1
bgtlr cr6
Expand Down
30 changes: 11 additions & 19 deletions sysdeps/powerpc/powerpc64/power8/memrchr.S
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,10 @@

/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */

/* TODO: change these to the actual instructions when the minimum required
binutils allows it. */
#define MTVRD(v, r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
#define MFVRD(r, v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
#define VBPERMQ(t, a, b) .long (0x1000054c \
| ((t)<<(32-11)) \
| ((a)<<(32-16)) \
| ((b)<<(32-21)) )
#ifndef MEMRCHR
# define MEMRCHR __memrchr
#endif
.machine power7
.machine power8
ENTRY_TOCLESS (MEMRCHR)
CALL_MCOUNT 3
add r7, r3, r5 /* Calculate the last acceptable address. */
Expand Down Expand Up @@ -92,7 +84,7 @@ L(align_qw):
li r0, 0
lvsl v11, r0, r0
vslb v10, v11, v10
MTVRD(v1, r4)
mtvrd v1, r4
vspltb v1, v1, 7
cmpldi r5, 64
ble L(tail64)
Expand Down Expand Up @@ -205,10 +197,10 @@ L(tail64):
.align 4
L(found):
/* Permute the first bit of each byte into bits 48-63. */
VBPERMQ(v6, v6, v10)
VBPERMQ(v7, v7, v10)
VBPERMQ(v8, v8, v10)
VBPERMQ(v9, v9, v10)
vbpermq v6, v6, v10
vbpermq v7, v7, v10
vbpermq v8, v8, v10
vbpermq v9, v9, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v7, v7, v7, 2
Expand All @@ -223,7 +215,7 @@ L(found):
vor v11, v6, v7
vor v4, v9, v8
vor v4, v11, v4
MFVRD(r5, v4)
mfvrd r5, v4
#ifdef __LITTLE_ENDIAN__
cntlzd r6, r5 /* Count leading zeros before the match. */
#else
Expand All @@ -245,7 +237,7 @@ L(found_16B):
bge L(last)
/* Now discard bytes before starting address. */
sub r9, r10, r8
MTVRD(v9, r9)
mtvrd v9, r9
vspltisb v8, 3
/* Mask unwanted bytes. */
#ifdef __LITTLE_ENDIAN__
Expand All @@ -263,14 +255,14 @@ L(found_16B):
#endif
L(last):
/* Permute the first bit of each byte into bits 48-63. */
VBPERMQ(v6, v6, v10)
vbpermq v6, v6, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v6, v6, v6, 6
MFVRD(r7, v6)
mfvrd r7, v6
cntlzd r6, r7 /* Count leading zeros before the match. */
#else
MFVRD(r7, v6)
mfvrd r7, v6
addi r6, r7, -1
andc r6, r6, r7
popcntd r6, r6
Expand Down
10 changes: 2 additions & 8 deletions sysdeps/powerpc/powerpc64/power8/memset.S
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,13 @@

#include <sysdep.h>

#define MTVSRD_V1_R4 .long 0x7c240166 /* mtvsrd v1,r4 */

/* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5]));
Returns 's'. */

#ifndef MEMSET
# define MEMSET memset
#endif

/* No need to use .machine power8 since mtvsrd is already
handled by the define. It avoid breakage on binutils
that does not support this machine specifier. */
.machine power7
.machine power8
ENTRY_TOCLESS (MEMSET, 5)
CALL_MCOUNT 3

Expand Down Expand Up @@ -151,7 +145,7 @@ L(tail_bytes):
vector instruction to achieve best throughput. */
L(huge_vector):
/* Replicate set byte to quadword in VMX register. */
MTVSRD_V1_R4
mtvsrd v1,r4
xxpermdi 32,v0,v1,0
vspltb v2,v0,15

Expand Down
30 changes: 10 additions & 20 deletions sysdeps/powerpc/powerpc64/power8/strchr.S
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,7 @@
#endif /* !USE_AS_STRCHRNUL */

/* int [r3] strchr (char *s [r3], int c [r4]) */
/* TODO: change these to the actual instructions when the minimum required
binutils allows it. */
#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
#define VBPERMQ(t,a,b) .long (0x1000054c \
| ((t)<<(32-11)) \
| ((a)<<(32-16)) \
| ((b)<<(32-21)) )
/* TODO: change this to .machine power8 when the minimum required binutils
allows it. */
.machine power7
.machine power8
ENTRY_TOCLESS (FUNC_NAME)
CALL_MCOUNT 2
dcbt 0,r3
Expand Down Expand Up @@ -178,7 +168,7 @@ L(vector):
vspltisb v10, 3
lvsl v11, r0, r0
vslb v10, v11, v10
MTVRD(v1,r4)
mtvrd v1, r4
li r5, 16
vspltb v1, v1, 7
/* Compare 32 bytes in each loop. */
Expand All @@ -202,10 +192,10 @@ L(continue):
blt cr6, L(no_match)
#endif
/* Permute the first bit of each byte into bits 48-63. */
VBPERMQ(v2, v2, v10)
VBPERMQ(v3, v3, v10)
VBPERMQ(v6, v6, v10)
VBPERMQ(v7, v7, v10)
vbpermq v2, v2, v10
vbpermq v3, v3, v10
vbpermq v6, v6, v10
vbpermq v7, v7, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v3, v3, v3, 2
Expand All @@ -221,7 +211,7 @@ L(continue):
vor v1, v3, v2
vor v2, v6, v7
vor v4, v1, v2
MFVRD(r5, v4)
mfvrd r5, v4
#ifdef __LITTLE_ENDIAN__
addi r6, r5, -1
andc r6, r6, r5
Expand Down Expand Up @@ -347,8 +337,8 @@ L(continue1):
blt cr6, L(continue1)
addi r3, r3, -32
L(end1):
VBPERMQ(v2, v2, v10)
VBPERMQ(v3, v3, v10)
vbpermq v2, v2, v10
vbpermq v3, v3, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v3, v3, v3, 2
Expand All @@ -359,7 +349,7 @@ L(end1):

/* Merge the results and move to a GPR. */
vor v4, v3, v2
MFVRD(r5, v4)
mfvrd r5, v4
#ifdef __LITTLE_ENDIAN__
addi r6, r5, -1
andc r6, r6, r5
Expand Down
23 changes: 6 additions & 17 deletions sysdeps/powerpc/powerpc64/power8/strlen.S
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,12 @@

#include <sysdep.h>

/* TODO: change these to the actual instructions when the minimum required
binutils allows it. */
#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
#define VBPERMQ(t,a,b) .long (0x1000054c \
| ((t)<<(32-11)) \
| ((a)<<(32-16)) \
| ((b)<<(32-21)) )

/* int [r3] strlen (char *s [r3]) */

#ifndef STRLEN
# define STRLEN strlen
#endif

/* TODO: change this to .machine power8 when the minimum required binutils
allows it. */
.machine power7
.machine power8
ENTRY_TOCLESS (STRLEN, 4)
CALL_MCOUNT 1
dcbt 0,r3
Expand Down Expand Up @@ -250,10 +239,10 @@ L(vmx_zero):
vslb v10,v11,v10

/* Permute the first bit of each byte into bits 48-63. */
VBPERMQ(v1,v1,v10)
VBPERMQ(v2,v2,v10)
VBPERMQ(v3,v3,v10)
VBPERMQ(v4,v4,v10)
vbpermq v1,v1,v10
vbpermq v2,v2,v10
vbpermq v3,v3,v10
vbpermq v4,v4,v10

/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
Expand All @@ -270,7 +259,7 @@ L(vmx_zero):
vor v1,v2,v1
vor v2,v3,v4
vor v4,v1,v2
MFVRD(r10,v4)
mfvrd r10,v4

/* Adjust address to the begninning of the current 64-byte block. */
addi r4,r4,-64
Expand Down
2 changes: 1 addition & 1 deletion sysdeps/powerpc/powerpc64/power8/strncmp.S
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
64K as default, the page cross handling assumes minimum page size of
4k. */

.machine power7
.machine power8
ENTRY_TOCLESS (STRNCMP, 4)
/* Check if size is 0. */
mr. r10,r5
Expand Down
2 changes: 1 addition & 1 deletion sysdeps/powerpc/powerpc64/power8/strncpy.S
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
64K as default, the page cross handling assumes minimum page size of
4k. */

.machine power7
.machine power8
#ifdef MEMSET_is_local
ENTRY_TOCLESS (FUNC_NAME, 4)
#else
Expand Down
Loading

0 comments on commit 066020c

Please sign in to comment.