Skip to content

Commit

Permalink
Remove optimized unaligned-access code paths.
Browse files Browse the repository at this point in the history
With newer compilers using vector instructions, these manual optimizations can
lead to build errors and do not provide benefits over compiler optimizations.

See also r1924100 which deprecated and disabled the corresponding macro.

* subversion/libsvn_diff/diff_file.c
  (contains_eol): Remove.
  (find_identical_prefix, find_identical_suffix): Remove code under
   #ifdef SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_fs_fs/tree.c
  (hash_func): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_fs_x/dag_cache.c
  (cache_lookup): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_fs_x/string_table.c
  (copy_masks): Remove.
  (table_copy_string): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK. 

* subversion/libsvn_subr/eol.c
  (svn_eol__find_eol_start): Remove code under #ifdef
   SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_subr/hash.c
  (hashfunc_compatible): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_subr/string.c
 (svn_cstring__match_length): Remove code under #ifdef
   SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_subr/utf_validate.c
  (first_non_fsm_start_char):  Remove code under #ifdef
   SVN_UNALIGNED_ACCESS_IS_OK.

Reported by: Sam James from gentoo


git-svn-id: https://svn.apache.org/repos/asf/subversion/trunk@1924143 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
stspdotname committed Mar 3, 2025
1 parent d6c36c0 commit 33a7886
Show file tree
Hide file tree
Showing 8 changed files with 3 additions and 304 deletions.
127 changes: 0 additions & 127 deletions subversion/libsvn_diff/diff_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -355,23 +355,6 @@ is_one_at_eof(struct file_info file[], apr_size_t file_len)
return FALSE;
}

/* Quickly determine whether there is a eol char in CHUNK.
* (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
*/

#if SVN_UNALIGNED_ACCESS_IS_OK
static svn_boolean_t contains_eol(apr_uintptr_t chunk)
{
apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
apr_uintptr_t n_test = chunk ^ SVN__N_MASK;

r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;

return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
}
#endif

/* Find the prefix which is identical between all elements of the FILE array.
* Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be
* set to TRUE if one of the FILEs reached its end while scanning prefix,
Expand All @@ -396,10 +379,6 @@ find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
is_match = is_match && *file[0].curp == *file[i].curp;
while (is_match)
{
#if SVN_UNALIGNED_ACCESS_IS_OK
apr_ssize_t max_delta, delta;
#endif /* SVN_UNALIGNED_ACCESS_IS_OK */

/* ### TODO: see if we can take advantage of
diff options like ignore_eol_style or ignore_space. */
/* check for eol, and count */
Expand All @@ -419,53 +398,6 @@ find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,

INCREMENT_POINTERS(file, file_len, pool);

#if SVN_UNALIGNED_ACCESS_IS_OK

/* Try to advance as far as possible with machine-word granularity.
* Determine how far we may advance with chunky ops without reaching
* endp for any of the files.
* Signedness is important here if curp gets close to endp.
*/
max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
for (i = 1; i < file_len; i++)
{
delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
if (delta < max_delta)
max_delta = delta;
}

is_match = TRUE;
for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
{
apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
if (contains_eol(chunk))
break;

for (i = 1; i < file_len; i++)
if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
{
is_match = FALSE;
break;
}

if (! is_match)
break;
}

if (delta /* > 0*/)
{
/* We either found a mismatch or an EOL at or shortly behind curp+delta
* or we cannot proceed with chunky ops without exceeding endp.
* In any way, everything up to curp + delta is equal and not an EOL.
*/
for (i = 0; i < file_len; i++)
file[i].curp += delta;

/* Skipped data without EOL markers, so last char was not a CR. */
had_cr = FALSE;
}
#endif

*reached_one_eof = is_one_at_eof(file, file_len);
if (*reached_one_eof)
break;
Expand Down Expand Up @@ -611,11 +543,6 @@ find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
while (is_match)
{
svn_boolean_t reached_prefix;
#if SVN_UNALIGNED_ACCESS_IS_OK
/* Initialize the minimum pointer positions. */
const char *min_curp[4];
svn_boolean_t can_read_word;
#endif /* SVN_UNALIGNED_ACCESS_IS_OK */

/* ### TODO: see if we can take advantage of
diff options like ignore_eol_style or ignore_space. */
Expand All @@ -636,60 +563,6 @@ find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],

DECREMENT_POINTERS(file_for_suffix, file_len, pool);

#if SVN_UNALIGNED_ACCESS_IS_OK
for (i = 0; i < file_len; i++)
min_curp[i] = file_for_suffix[i].buffer;

/* If we are in the same chunk that contains the last part of the common
prefix, use the min_curp[0] pointer to make sure we don't get a
suffix that overlaps the already determined common prefix. */
if (file_for_suffix[0].chunk == suffix_min_chunk0)
min_curp[0] += suffix_min_offset0;

/* Scan quickly by reading with machine-word granularity. */
for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
> min_curp[i]);

while (can_read_word)
{
apr_uintptr_t chunk;

/* For each file curp is positioned at the current byte, but we
want to examine the current byte and the ones before the current
location as one machine word. */

chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
- sizeof(apr_uintptr_t));
if (contains_eol(chunk))
break;

for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
is_match = (chunk
== *(const apr_uintptr_t *)
(file_for_suffix[i].curp + 1
- sizeof(apr_uintptr_t)));

if (! is_match)
break;

for (i = 0; i < file_len; i++)
{
file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
can_read_word = can_read_word
&& ( (file_for_suffix[i].curp + 1
- sizeof(apr_uintptr_t))
> min_curp[i]);
}

/* We skipped some bytes, so there are no closing EOLs */
had_nl = FALSE;
}

/* The > min_curp[i] check leaves at least one final byte for checking
in the non block optimized case below. */
#endif

reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
&& (file_for_suffix[0].curp - file_for_suffix[0].buffer)
== suffix_min_offset0;
Expand Down
34 changes: 1 addition & 33 deletions subversion/libsvn_fs_fs/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,6 @@ hash_func(svn_revnum_t revision,
apr_size_t i;
apr_uint32_t hash_value = (apr_uint32_t)revision;

#if SVN_UNALIGNED_ACCESS_IS_OK
/* "randomizing" / distributing factor used in our hash function */
const apr_uint32_t factor = 0xd1f3da69;
#endif

/* Calculate the hash value
(HASH_VALUE has been initialized to REVISION).
Expand All @@ -233,35 +228,8 @@ hash_func(svn_revnum_t revision,
make as much of *PATH influence the result as possible to get an "even"
spread across the hash buckets (maximizes our cache retention rate and
thus the hit rates).
When chunked access is possible (independent of the PATH pointer's
value!), we read 4 bytes at once and multiply the hash value with a
FACTOR that mirror / pattern / shift all 4 input bytes to various bits
of the result. The final result will be taken from the MSBs.
When chunked access is not possible (not supported by CPU or odd bytes
at the end of *PATH), we use the simple traditional "* 33" hash
function that works very well with texts / paths and that e.g. APR uses.
Please note that the bytewise and the chunked calculation are *NOT*
interchangeable as they will yield different results for the same input.
For any given machine and *PATH, we must use a fixed combination of the
two functions.
*/
i = 0;
#if SVN_UNALIGNED_ACCESS_IS_OK
/* We relax the dependency chain between iterations by processing
two chunks from the input per hash_value self-multiplication.
The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency
per 2 chunks instead of 1 chunk.
*/
for (; i + 8 <= path_len; i += 8)
hash_value = hash_value * factor * factor
+ ( *(const apr_uint32_t*)(path + i) * factor
+ *(const apr_uint32_t*)(path + i + 4));
#endif

for (; i < path_len; ++i)
for (i = 0; i < path_len; ++i)
/* Help GCC to minimize the HASH_VALUE update latency by splitting the
MUL 33 of the naive implementation: h = h * 33 + path[i]. This
shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD.
Expand Down
20 changes: 1 addition & 19 deletions subversion/libsvn_fs_x/dag_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,11 +312,6 @@ cache_lookup(svn_fs_x__dag_cache_t *cache,
apr_size_t path_len = path->len;
apr_uint32_t hash_value = (apr_uint32_t)(apr_uint64_t)change_set;

#if SVN_UNALIGNED_ACCESS_IS_OK
/* "randomizing" / distributing factor used in our hash function */
const apr_uint32_t factor = 0xd1f3da69;
#endif

/* optimistic lookup: hit the same bucket again? */
cache_entry_t *result = &cache->buckets[cache->last_hit];
if ( (result->change_set == change_set)
Expand All @@ -332,20 +327,7 @@ cache_lookup(svn_fs_x__dag_cache_t *cache,

/* need to do a full lookup. Calculate the hash value
(HASH_VALUE has been initialized to REVISION). */
i = 0;
#if SVN_UNALIGNED_ACCESS_IS_OK
/* We relax the dependency chain between iterations by processing
two chunks from the input per hash_value self-multiplication.
The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency
per 2 chunks instead of 1 chunk.
*/
for (; i + 8 <= path_len; i += 8)
hash_value = hash_value * factor * factor
+ ( *(const apr_uint32_t*)(path->data + i) * factor
+ *(const apr_uint32_t*)(path->data + i + 4));
#endif

for (; i < path_len; ++i)
for (i = 0; i < path_len; ++i)
/* Help GCC to minimize the HASH_VALUE update latency by splitting the
MUL 33 of the naive implementation: h = h * 33 + path[i]. This
shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD.
Expand Down
45 changes: 0 additions & 45 deletions subversion/libsvn_fs_x/string_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -473,21 +473,6 @@ svn_fs_x__string_table_create(const string_table_builder_t *builder,
return result;
}

/* Masks used by table_copy_string. copy_mask[I] is used if the target
content to be preserved starts at byte I within the current chunk.
This is used to work around alignment issues.
*/
#if SVN_UNALIGNED_ACCESS_IS_OK
static const char *copy_masks[8] = { "\xff\xff\xff\xff\xff\xff\xff\xff",
"\x00\xff\xff\xff\xff\xff\xff\xff",
"\x00\x00\xff\xff\xff\xff\xff\xff",
"\x00\x00\x00\xff\xff\xff\xff\xff",
"\x00\x00\x00\x00\xff\xff\xff\xff",
"\x00\x00\x00\x00\x00\xff\xff\xff",
"\x00\x00\x00\x00\x00\x00\xff\xff",
"\x00\x00\x00\x00\x00\x00\x00\xff" };
#endif

static void
table_copy_string(char *buffer,
apr_size_t len,
Expand All @@ -499,40 +484,10 @@ table_copy_string(char *buffer,
{
assert(header->head_length <= len);
{
#if SVN_UNALIGNED_ACCESS_IS_OK
/* the sections that we copy tend to be short but we can copy
*all* of it chunky because we made sure that source and target
buffer have some extra padding to prevent segfaults. */
apr_uint64_t mask;
apr_size_t to_copy = len - header->head_length;
apr_size_t copied = 0;

const char *source = table->data + header->tail_start;
char *target = buffer + header->head_length;
len = header->head_length;

/* copy whole chunks */
while (to_copy >= copied + sizeof(apr_uint64_t))
{
*(apr_uint64_t *)(target + copied)
= *(const apr_uint64_t *)(source + copied);
copied += sizeof(apr_uint64_t);
}

/* copy the remainder assuming that we have up to 8 extra bytes
of addressable buffer on the source and target sides.
Now, we simply copy 8 bytes and use a mask to filter & merge
old with new data. */
mask = *(const apr_uint64_t *)copy_masks[to_copy - copied];
*(apr_uint64_t *)(target + copied)
= (*(apr_uint64_t *)(target + copied) & mask)
| (*(const apr_uint64_t *)(source + copied) & ~mask);
#else
memcpy(buffer + header->head_length,
table->data + header->tail_start,
len - header->head_length);
len = header->head_length;
#endif
}

header = &table->short_strings[header->head_string];
Expand Down
28 changes: 0 additions & 28 deletions subversion/libsvn_subr/eol.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,34 +33,6 @@
char *
svn_eol__find_eol_start(char *buf, apr_size_t len)
{
#if SVN_UNALIGNED_ACCESS_IS_OK

/* Scan the input one machine word at a time. */
for (; len > sizeof(apr_uintptr_t)
; buf += sizeof(apr_uintptr_t), len -= sizeof(apr_uintptr_t))
{
/* This is a variant of the well-known strlen test: */
apr_uintptr_t chunk = *(const apr_uintptr_t *)buf;

/* A byte in SVN__R_TEST is \0, iff it was \r in *BUF.
* Similarly, SVN__N_TEST is an indicator for \n. */
apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
apr_uintptr_t n_test = chunk ^ SVN__N_MASK;

/* A byte in SVN__R_TEST can only be < 0x80, iff it has been \0 before
* (i.e. \r in *BUF). Ditto for SVN__N_TEST. */
r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;

/* Check whether at least one of the words contains a byte <0x80
* (if one is detected, there was a \r or \n in CHUNK). */
if ((r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET)
break;
}

#endif

/* The remaining odd bytes will be examined the naive way: */
for (; len > 0; ++buf, --len)
{
if (*buf == '\n' || *buf == '\r')
Expand Down
12 changes: 1 addition & 11 deletions subversion/libsvn_subr/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -636,16 +636,6 @@ hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
if (*klen == APR_HASH_KEY_STRING)
*klen = strlen(char_key);

#if SVN_UNALIGNED_ACCESS_IS_OK
for (p = key, i = *klen; i >= 4; i-=4, p+=4)
{
apr_uint32_t chunk = *(const apr_uint32_t *)p;

/* the ">> 17" part gives upper bits in the chunk a chance to make
some impact as well */
hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17);
}
#else
for (p = key, i = *klen; i >= 4; i-=4, p+=4)
{
hash = hash * 33 * 33 * 33 * 33
Expand All @@ -654,7 +644,7 @@ hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
+ p[2] * 33
+ p[3];
}
#endif

for (; i; i--, p++)
hash = hash * 33 + *p;

Expand Down
Loading

0 comments on commit 33a7886

Please sign in to comment.