Skip to content

Commit

Permalink
[performance] Add custom vector class that avoids the initialization …
Browse files Browse the repository at this point in the history
…during resize
  • Loading branch information
mxmlnkn committed Feb 7, 2024
1 parent e209e3a commit e62cb4d
Show file tree
Hide file tree
Showing 4 changed files with 401 additions and 11 deletions.
316 changes: 311 additions & 5 deletions src/core/FasterVector.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
#pragma once

#include <algorithm>
#include <cassert>
#if !defined( WITH_RPMALLOC )
#include <cstdlib>
#endif
#include <iterator>
#include <optional>
#include <stdexcept>
#include <vector>

#include "common.hpp" // ceilDiv


#ifdef WITH_RPMALLOC
#include <rpmalloc.h>
Expand Down Expand Up @@ -46,9 +56,12 @@ class RpmallocThreadInit


void*
rpmalloc_ensuring_initialization( size_t nBytes )
rpmalloc_ensuring_initialization( size_t nBytes = 0 )
{
static const thread_local RpmallocThreadInit rpmallocThreadInit{};
if ( nBytes == 0 ) {
return nullptr;
}
return rpmalloc( nBytes );
}

Expand Down Expand Up @@ -99,13 +112,306 @@ class RpmallocAllocator

static_assert( std::is_empty_v<RpmallocAllocator<char> > );

#endif


[[nodiscard]] constexpr bool
isPowerOf2( size_t value )
{
return ( value > 0 ) && ( ( value & ( value - 1U ) ) == 0U );
}


template<typename T>
using FasterVector = std::vector<T, RpmallocAllocator<T> >;
using RequireInputIterator = typename std::enable_if<
std::is_convertible<typename std::iterator_traits<T>::iterator_category,
std::input_iterator_tag>::value
>::type;

#else

template<typename T>
using FasterVector = std::vector<T>;
class FasterVector
{
public:
static_assert( isPowerOf2( sizeof( T ) ), "Size of element type must be a power of 2 for alignment purpose!" );
static constexpr size_t ALIGNMENT = std::max<size_t>( sizeof( T ), 512U / 8U );

#endif
using value_type = T;

public:
FasterVector() = default;

explicit
FasterVector( size_t size,
const std::optional<T>& initialValue = {} )
{
resize( size, initialValue );
}

template<typename InputIt,
typename = RequireInputIterator<InputIt> >
FasterVector( InputIt inputBegin,
InputIt inputEnd )
{
insert( end(), inputBegin, inputEnd );
}

FasterVector( FasterVector&& other ) :
m_data( other.m_data ),
m_capacity( other.m_capacity ),
m_size( other.m_size )
{
other.m_data = nullptr;
other.m_capacity = 0;
other.m_size = 0;
}

FasterVector&
operator=( FasterVector&& other )
{
m_data = other.m_data;
m_capacity = other.m_capacity;
m_size = other.m_size;

other.m_data = nullptr;
other.m_capacity = 0;
other.m_size = 0;

return *this;
}

/* Forbid copies because they are expensive and because they have unexpected behavior like not copying
* the capacity. */
FasterVector( const FasterVector& ) = delete;
FasterVector& operator=( const FasterVector& ) = delete;

~FasterVector()
{
free();
}

void
resize( size_t size,
const std::optional<T>& initialValue = {} )
{
if ( size > m_size ) {
reserve( size );
if ( initialValue ) {
std::fill( m_data + m_size, m_data + size, *initialValue );
}
}
m_size = size;
}

void
reserve( size_t newCapacity )
{
if ( newCapacity > m_capacity ) {
reallocate( newCapacity );
}
}

void clear() { m_size = 0; }
void shrink_to_fit() { reallocate( m_size ); }

[[nodiscard]] constexpr size_t capacity() const noexcept { return m_capacity; }
[[nodiscard]] constexpr size_t size() const noexcept { return m_size; }
[[nodiscard]] constexpr bool empty() const noexcept { return m_size == 0; }

[[nodiscard]] constexpr const T* data() const noexcept { return m_data; }
[[nodiscard]] constexpr T* data() noexcept { return m_data; }

[[nodiscard]] constexpr const T* cbegin() const noexcept { return m_data; }
[[nodiscard]] constexpr const T* begin() const noexcept { return m_data; }
[[nodiscard]] constexpr T* begin() noexcept { return m_data; }

[[nodiscard]] constexpr const T* cend() const noexcept { return m_data + m_size; }
[[nodiscard]] constexpr const T* end() const noexcept { return m_data + m_size; }
[[nodiscard]] constexpr T* end() noexcept { return m_data + m_size; }

[[nodiscard]] constexpr auto crbegin() const noexcept { return std::reverse_iterator<const T*>( m_data + m_size ); }
[[nodiscard]] constexpr auto rbegin() const noexcept { return std::reverse_iterator<const T*>( m_data + m_size ); }
[[nodiscard]] constexpr auto rbegin() noexcept { return std::reverse_iterator<T*>( m_data + m_size ); }

[[nodiscard]] constexpr auto crend() const noexcept { return std::reverse_iterator<const T*>( m_data ); }
[[nodiscard]] constexpr auto rend() const noexcept { return std::reverse_iterator<const T*>( m_data ); }
[[nodiscard]] constexpr auto rend() noexcept { return std::reverse_iterator<T*>( m_data ); }

[[nodiscard]] const T&
operator[]( size_t i ) const
{
assert( i < m_size );
return m_data[i];
}

[[nodiscard]] T&
operator[]( size_t i )
{
assert( i < m_size );
return m_data[i];
}

template<typename InputIt,
typename = RequireInputIterator<InputIt> >
void
insert( const T* position,
const InputIt& inputBegin,
const InputIt& inputEnd )
{
const auto inputDistance = inputEnd - inputBegin;
if ( inputDistance <= 0 ) {
return;
}
const auto inputSize = static_cast<size_t>( inputDistance );

const auto positionDistance = position - m_data;
if ( ( positionDistance < 0 ) || ( static_cast<size_t>( positionDistance ) > m_size ) ) {
throw std::logic_error( "The insertion position must be inside the valid range of this vector or end()!" );
}
const auto positionIndex = static_cast<size_t>( positionDistance );

/* Beware that reserve may invalidate "position"! */
reserve( size_t( 1U ) << static_cast<size_t>( std::ceil( std::log2( size() + inputSize ) ) ) );
if ( positionIndex < m_size ) {
std::memmove( m_data + positionIndex + inputSize, m_data + positionIndex, inputSize * sizeof( T ) );
}
std::copy( inputBegin, inputEnd, m_data + positionIndex );
m_size += inputSize;
}

[[nodiscard]] bool
operator==( const FasterVector& other ) const
{
return std::equal( begin(), end(), other.begin(), other.end() );
}

[[nodiscard]] bool
operator!=( const FasterVector& other ) const
{
return !std::equal( begin(), end(), other.begin(), other.end() );
}

[[nodiscard]] const T&
front() const
{
requireNonEmpty();
return m_data[0];
}

[[nodiscard]] T&
front()
{
requireNonEmpty();
return m_data[0];
}

[[nodiscard]] const T&
back() const
{
requireNonEmpty();
return m_data[m_size - 1];
}

[[nodiscard]] T&
back()
{
requireNonEmpty();
return m_data[m_size - 1];
}

private:
void
requireNonEmpty()
{
if ( empty() ) {
throw std::out_of_range( "Cannot get last element of empty vector!" );
}
}

void
reallocate( const size_t newCapacity )
{
if ( newCapacity == m_capacity ) {
return;
}

if ( newCapacity == 0 ) {
free();
} else {
#ifdef WITH_RPMALLOC
#if 1
if ( m_data == nullptr ) {
rpmalloc_ensuring_initialization();
m_data = static_cast<T*>( rpaligned_alloc( ALIGNMENT, newCapacity * sizeof( T ) ) );
} else {
m_data = static_cast<T*>( rpaligned_realloc( m_data, ALIGNMENT, newCapacity * sizeof( T ),
m_capacity * sizeof( T ), /* flags */ 0 ) );
}
#else
if ( m_data == nullptr ) {
m_data = static_cast<T*>( rpmalloc_ensuring_initialization( newCapacity * sizeof( T ) ) );
} else {
m_data = static_cast<T*>( rprealloc( m_data, newCapacity * sizeof( T ) ) );
}
#endif
#else
/* > If ptr is a null pointer, the behavior is the same as calling std::malloc(new_size). */
m_data = static_cast<T*>( std::realloc( m_data, newCapacity * sizeof( T ) ) );
#endif
}

m_capacity = newCapacity;
}

void
free()
{
#ifdef WITH_RPMALLOC
rpfree( m_data );
#else
std::free( m_data );
#endif
m_data = nullptr;
}

private:
T* m_data{ nullptr };
size_t m_capacity{ 0 };
size_t m_size{ 0 };
};


template<class T, class Alloc>
[[nodiscard]] bool
operator==( const FasterVector<T>& lhs,
const std::vector<T, Alloc>& rhs )
{
return std::equal( lhs.begin(), lhs.end(), rhs.begin(), rhs.end() );
}


template<class T, class Alloc>
[[nodiscard]] bool
operator==( const std::vector<T, Alloc>& lhs,
const FasterVector<T>& rhs )
{
return std::equal( lhs.begin(), lhs.end(), rhs.begin(), rhs.end() );
}


template<class T, class Alloc>
[[nodiscard]] bool
operator!=( const FasterVector<T>& lhs,
const std::vector<T, Alloc>& rhs )
{
return !std::equal( lhs.begin(), lhs.end(), rhs.begin(), rhs.end() );
}


template<class T, class Alloc>
[[nodiscard]] bool
operator!=( const std::vector<T, Alloc>& lhs,
const FasterVector<T>& rhs )
{
return !std::equal( lhs.begin(), lhs.end(), rhs.begin(), rhs.end() );
}
15 changes: 9 additions & 6 deletions src/rapidgzip/IndexFileFormat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,23 +490,26 @@ readGzipIndex( UniqueFileReader indexFile,

index.windows = std::make_shared<WindowMap>();
for ( auto& [offset, windowSize, compressionRatio] : windowInfos ) {
FasterVector<uint8_t> window;
/* Package the non-copyable FasterVector into a copyable smart pointer because the lambda given into the
* ThreadPool gets inserted into a std::function living inside std::packaged_task, and std::function
* requires every capture to be copyable. While it may compile with Clang and GCC, it does not with MSVC. */
auto window = std::make_shared<FasterVector<uint8_t> >();
if ( windowSize > 0 ) {
window.resize( windowSize );
checkedRead( indexFile.get(), window.data(), window.size() );
window->resize( windowSize );
checkedRead( indexFile.get(), window->data(), window->size() );
}

/* Only bother with overhead-introducing compression for large chunk compression ratios. */
if ( compressionRatio > 2 ) {
futures.emplace_back( threadPool.submit( [toCompress = std::move( window ), offset = offset] () {
futures.emplace_back( threadPool.submit( [toCompress = std::move( window ), offset = offset] () mutable {
return std::make_pair(
offset, std::make_shared<WindowMap::Window>( std::move( toCompress ), CompressionType::GZIP ) );
offset, std::make_shared<WindowMap::Window>( std::move( *toCompress ), CompressionType::GZIP ) );
} ) );
if ( futures.size() >= 2 * backgroundThreadCount ) {
processFuture();
}
} else {
index.windows->emplace( offset, std::move( window ), CompressionType::NONE );
index.windows->emplace( offset, std::move( *window ), CompressionType::NONE );
}
}

Expand Down
1 change: 1 addition & 0 deletions src/tests/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ addCoreTest(testParallelBitStringFinder)
addCoreTest(testThreadPool)
addCoreTest(testPrefetcher)
addCoreTest(testStreamAdapter)
addCoreTest(testFasterVector)
Loading

0 comments on commit e62cb4d

Please sign in to comment.