Skip to content

Commit

Permalink
add a custom allocator for aligned memory
Browse files Browse the repository at this point in the history
With this allocator we can use the STL container, such as std::vector,
and be sure that we can use aligned load instruction when necessary.

Refs: scality#222
  • Loading branch information
slaperche-scality committed Oct 17, 2018
1 parent 65e34d8 commit e262ef0
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 0 deletions.
166 changes: 166 additions & 0 deletions src/simd/allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/*
* Copyright 2017-2018 Scality
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

/** @file allocator.h
*
* Provide a custom memory allocator for SIMD.
*
* This allocator always returns memory that is suitably aligned to be loaded
* efficiently by a Register object.
*/

#ifndef __QUAD_SIMD_SIMD_ALLOCATOR_H__
#define __QUAD_SIMD_SIMD_ALLOCATOR_H__

#include <cassert>
#include <cstdint>
#include <limits>

#include "simd/definitions.h"

namespace quadiron {
namespace simd {

/// Check that the given address is properly aligned.
template <typename T>
inline bool addr_is_aligned(const T* addr)
{
// Without SIMD, there is no specific alignment constraint.
if (INSTRUCTION_SET == InstructionSet::NONE) {
return true;
}
const std::uintptr_t address = reinterpret_cast<std::uintptr_t>(addr);
return (address & (ALIGNMENT - 1)) == 0;
}

/** Custom allocator to take advantage of SIMD processing.
*
* This allocator always return memory that is suitably aligned for the current
* SIMD instruction set. Thanks to this property, you can safely use the aligned
* load from the Register class in order to increase performance.
*/
template <typename T>
class AlignedAllocator {
public:
using value_type = T;

AlignedAllocator() noexcept {}
// No state, => nothing to copy.
template <class U>
AlignedAllocator(AlignedAllocator<U> const& other) noexcept
{
}

value_type* allocate(std::size_t count)
{
// Guard against overflow!
if (count > max_size()) {
throw std::bad_alloc();
}

// No SIMD: default allocator is good enough!
if (INSTRUCTION_SET == InstructionSet::NONE) {
return static_cast<value_type*>(
::operator new(count * sizeof(value_type)));
}

// Overallocate just enough to have room for alignment adjustment.
const std::size_t size = count * sizeof(value_type) + ALIGNMENT;
unsigned char* ptr = static_cast<unsigned char*>(::operator new(size));

// Align the allocated memory.
const std::uintptr_t address = reinterpret_cast<std::uintptr_t>(ptr);
const unsigned offset = ALIGNMENT - (address % ALIGNMENT);
assert(offset >= 1); // We need a byte to store the offset itself.
unsigned char* aligned_ptr = ptr + offset;

// Store the offset just before the aligned memory.
assert(offset <= std::numeric_limits<unsigned char>::max());
*(aligned_ptr - 1) = static_cast<unsigned char>(offset);

// Return the aligned pointer.
//
// Clang analyser think that we leak `ptr`, whereas we can re-compute
// it from `aligned_ptr` and free it in `deallocate`.
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
return reinterpret_cast<value_type*>(aligned_ptr);
}

void deallocate(value_type* ptr, std::size_t count) noexcept
{
// No SIMD: default allocator is good enough!
if (INSTRUCTION_SET == InstructionSet::NONE) {
::operator delete(ptr, count * sizeof(value_type));
return;
}

if (ptr == nullptr) {
return;
}
const std::size_t size = count * sizeof(value_type) + ALIGNMENT;
// Respect strict aliasing rules: read through a character type.
unsigned char* raw = reinterpret_cast<unsigned char*>(ptr);
// Get the alignment offset stored just before the aligned pointer.
const unsigned offset = *(raw - 1);
::operator delete(raw - offset, size);
}

std::size_t max_size() const noexcept
{
const std::size_t max_size = std::numeric_limits<std::size_t>::max();
return (max_size - ALIGNMENT) / sizeof(value_type);
}

// Our allocator is stateless.
using propagate_on_container_copy_assignment = std::true_type;
using propagate_on_container_move_assignment = std::true_type;
using propagate_on_container_swap = std::true_type;
};

template <class T, class U>
bool operator==(AlignedAllocator<T> const&, AlignedAllocator<U> const&) noexcept
{
// Our allocator is stateless: Any instance of our allocator can deallocate
// the memory from another instance.
return true;
}

template <class T, class U>
bool operator!=(
AlignedAllocator<T> const& x,
AlignedAllocator<U> const& y) noexcept
{
return !(x == y);
}

} // namespace simd
} // namespace quadiron

#endif
1 change: 1 addition & 0 deletions src/simd/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#ifndef __QUAD_SIMD_SIMD_H__
#define __QUAD_SIMD_SIMD_H__

#include "simd/allocator.h"
#include "simd/definitions.h"

namespace quadiron {
Expand Down

0 comments on commit e262ef0

Please sign in to comment.