Skip to content

Commit

Permalink
Initial NEON code, improved stats output, allowing maxrate setting as…
Browse files Browse the repository at this point in the history
… factor (#340)
  • Loading branch information
adamjw24 authored Dec 19, 2023
1 parent eea6fce commit ed1fa2d
Show file tree
Hide file tree
Showing 42 changed files with 1,615 additions and 78 deletions.
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
* Christian Stoffers, , Fraunhofer HHI
* Gabriel Hege, , Fraunhofer HHI
* Jens Güther, , Fraunhofer HHI
* Florian Eisenreich, , Fraunhofer HHI
19 changes: 17 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,18 @@ endif()
set( CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/modules" )
message( STATUS "CMAKE_MODULE_PATH: updating module path to: ${CMAKE_MODULE_PATH}" )

# check for arm architecture support
set( VVENC_ARM_SIMD_DEFAULT FALSE )
if( ( "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64\|arm"
OR "${CMAKE_CXX_COMPILER}" MATCHES "aarch64\|arm"
OR "${CMAKE_OSX_ARCHITECTURES}" MATCHES "arm64\|armv" )
AND NOT "${CMAKE_OSX_ARCHITECTURES}" MATCHES "x86\|x64" )
set( VVENC_ARM_SIMD_DEFAULT TRUE )
endif()

# we enable x86 intrinsics for all target architectures, because they are implemented through simd-everywhere on non-x86
set( VVENC_ENABLE_X86_SIMD TRUE CACHE BOOL "enable x86 intrinsics" )
set( VVENC_ENABLE_ARM_SIMD ${VVENC_ARM_SIMD_DEFAULT} CACHE BOOL "enable ARM intrinsics" )

include( vvencCompilerSupport )

Expand All @@ -39,8 +49,13 @@ if( VVENC_ENABLE_X86_SIMD )
check_missing_intrinsics()
endif()

set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTARGET_SIMD_X86" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTARGET_SIMD_X86" )
message( STATUS "x86 SIMD intrinsics enabled (using SIMDE for non-x86 targets)" )
add_compile_definitions( TARGET_SIMD_X86 )
endif()

if( VVENC_ENABLE_ARM_SIMD )
message( STATUS "ARM SIMD intrinsics enabled" )
add_compile_definitions( TARGET_SIMD_ARM )
endif()

if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR )
Expand Down
6 changes: 5 additions & 1 deletion include/vvenc/vvencCfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,11 @@ typedef struct vvenc_config
int8_t m_sliceTypeAdapt; // enable slice type adaptation (STA)
bool m_treatAsSubPic;

int m_RCMaxBitrate; // maximum bitrate in bps (default: 0 (RC disabled or least constrained VBR))
#define VVENC_SET_MAXRATE_FACTOR(f) (-((int)(f*16+0.5)))
int m_RCMaxBitrate; // maximum bitrate in bps (default: 0 (RC disabled or least constrained VBR),
// if negative, the absolute value is interpreted as a 4-bit fixed point multiplier of the target bitrate).
// -24, i.e. -1.1000 binary, means the maxrate would be set to be the 1.5x of the target bitrate.
// for convenience use VVENC_SET_MAXRATE_FACTOR, e.g. VVENC_SET_MAXRATE_FACTOR(1.5), to set the multiplier
int m_reservedInt;
double m_reservedDouble[9];

Expand Down
4 changes: 3 additions & 1 deletion source/App/vvencFFapp/EncApp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ int EncApp::encode()
int64_t frameCount = apputils::VVEncAppCfg::getFrameCount( appCfg.m_inputFileName, vvencCfg.m_SourceWidth, vvencCfg.m_SourceHeight, vvencCfg.m_inputBitDepth[0], appCfg.m_packedYUVInput );
frameCount = std::max<int64_t>( 0, frameCount-appCfg.m_FrameSkip );
int64_t framesToEncode = (vvencCfg.m_framesToBeEncoded == 0 || vvencCfg.m_framesToBeEncoded >= frameCount) ? frameCount : vvencCfg.m_framesToBeEncoded;
cStats.init( vvencCfg.m_FrameRate, vvencCfg.m_FrameScale, (int)framesToEncode, "vvenc [info]: " );
cStats.init( vvencCfg.m_FrameRate, vvencCfg.m_FrameScale, (int)framesToEncode, vvencCfg.m_verbosity, "vvenc [info]: " );
bool statsInfoReady = false;

// loop over input YUV data
Expand Down Expand Up @@ -370,6 +370,7 @@ int EncApp::encode()
if( statsInfoReady )
{
msgApp( VVENC_INFO, cStats.getInfoString().c_str() );
fflush( stdout );
}
}

Expand All @@ -383,6 +384,7 @@ int EncApp::encode()
if( appCfg.m_printStats )
{
msgApp( VVENC_INFO, cStats.getFinalStats().c_str() );
fflush( stdout );
}
}

Expand Down
4 changes: 3 additions & 1 deletion source/App/vvencapp/vvencapp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ int main( int argc, char* argv[] )
int64_t framesToEncode = (vvenccfg.m_framesToBeEncoded == 0 || vvenccfg.m_framesToBeEncoded >= frameCount) ? frameCount : vvenccfg.m_framesToBeEncoded;

apputils::Stats cStats;
cStats.init( vvenccfg.m_FrameRate, vvenccfg.m_FrameScale, (int)framesToEncode, "vvenc [info]: " );
cStats.init( vvenccfg.m_FrameRate, vvenccfg.m_FrameScale, (int)framesToEncode, vvenccfg.m_verbosity, "vvenc [info]: " );
bool statsInfoReady = false;

while( !bEof || !bEncodeDone )
Expand Down Expand Up @@ -404,6 +404,7 @@ int main( int argc, char* argv[] )
if( statsInfoReady )
{
msgApp( nullptr, VVENC_INFO, cStats.getInfoString().c_str() );
fflush( stdout );
}
}

Expand All @@ -426,6 +427,7 @@ int main( int argc, char* argv[] )
if( vvencappCfg.m_printStats )
{
msgApp( nullptr, VVENC_INFO, cStats.getFinalStats().c_str() );
fflush( stdout );
}
}

Expand Down
2 changes: 2 additions & 0 deletions source/Lib/CommonLib/AdaptiveLoopFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ POSSIBILITY OF SUCH DAMAGE.

namespace vvenc {

using namespace x86_simd;

struct AlfClassifier
{
AlfClassifier() {}
Expand Down
2 changes: 2 additions & 0 deletions source/Lib/CommonLib/AffineGradientSearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ namespace vvenc {
//! \ingroup CommonLib
//! \{

using namespace x86_simd;

class AffineGradientSearch
{
public:
Expand Down
19 changes: 19 additions & 0 deletions source/Lib/CommonLib/Buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ struct vvencYUVBuffer;

namespace vvenc {

using namespace x86_simd;
using namespace arm_simd;

// ---------------------------------------------------------------------------
// AreaBuf struct
// ---------------------------------------------------------------------------
Expand All @@ -81,6 +84,22 @@ struct PelBufferOps
template<X86_VEXT vext>
void _initPelBufOpsX86();
#endif

#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_ARM )
void initPelBufOpsARM();
template<ARM_VEXT vext>
void _initPelBufOpsARM();
#endif

#define INCX( ptr, stride ) { ptr++; }
#define INCY( ptr, stride ) { ptr += ( stride ); }
#define OFFSETX( ptr, stride, x ) { ptr += ( x ); }
#define OFFSETY( ptr, stride, y ) { ptr += ( y ) * ( stride ); }
#define OFFSET( ptr, stride, x, y ) { ptr += ( x ) + ( y ) * ( stride ); }
#define GET_OFFSETX( ptr, stride, x ) ( ( ptr ) + ( x ) )
#define GET_OFFSETY( ptr, stride, y ) ( ( ptr ) + ( y ) * ( stride ) )
#define GET_OFFSET( ptr, stride, x, y ) ( ( ptr ) + ( x ) + ( y ) * ( stride ) ) // need in loopFilter.cpp + some ARM files

void ( *roundGeo ) ( const Pel* src, Pel* dest, const int numSamples, unsigned rshift, int offset, const ClpRng &clpRng);
void ( *addAvg ) ( const Pel* src0, const Pel* src1, Pel* dst, int numsamples, unsigned shift, int offset, const ClpRng& clpRng );
void ( *reco ) ( const Pel* src0, const Pel* src1, Pel* dst, int numSamples, const ClpRng& clpRng );
Expand Down
69 changes: 53 additions & 16 deletions source/Lib/CommonLib/CommonDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -636,22 +636,6 @@ static inline T* aligned_malloc(size_t len, size_t alignement) {
# define ALWAYS_INLINE
#endif

#ifdef TARGET_SIMD_X86
typedef enum
{
UNDEFINED = -1,
SCALAR = 0,
SSE41,
SSE42,
AVX,
AVX2,
AVX512
} X86_VEXT;
#endif

template <typename ValueType> inline ValueType leftShiftU (const ValueType value, const unsigned shift) { return value << shift; }
template <typename ValueType> inline ValueType rightShiftU (const ValueType value, const unsigned shift) { return value >> shift; }

#if defined( _WIN32 ) && defined( TARGET_SIMD_X86 )
static inline unsigned int bit_scan_reverse( int a )
{
Expand All @@ -672,6 +656,59 @@ static inline unsigned int bit_scan_reverse( int a )
}
#endif

#if ENABLE_SIMD_LOG2
static inline int getLog2( int val )
{
return bit_scan_reverse( val );
}
#else
extern int8_t g_aucLog2[MAX_CU_SIZE + 1];
static inline int getLog2( int val )
{
CHECKD( g_aucLog2[2] != 1, "g_aucLog2[] has not been initialized yet." );
if( val > 0 && val < (int) sizeof( g_aucLog2 ) )
{
return g_aucLog2[val];
}
return std::log2( val );
}
#endif

#if ENABLE_SIMD_OPT

namespace x86_simd
{
#ifdef TARGET_SIMD_X86
typedef enum
{
UNDEFINED = -1,
SCALAR = 0,
SSE41,
SSE42,
AVX,
AVX2,
AVX512
} X86_VEXT;
#endif
}

namespace arm_simd
{
#ifdef TARGET_SIMD_ARM
typedef enum
{
UNDEFINED = -1,
SCALAR = 0,
NEON,
} ARM_VEXT;
#endif // TARGET_SIMD_ARM
} // namespace arm_simd

#endif //ENABLE_SIMD_OPT

template <typename ValueType> inline ValueType leftShiftU (const ValueType value, const unsigned shift) { return value << shift; }
template <typename ValueType> inline ValueType rightShiftU (const ValueType value, const unsigned shift) { return value >> shift; }

#if ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 )
static inline int floorLog2( int val )
{
Expand Down
2 changes: 1 addition & 1 deletion source/Lib/CommonLib/DepQuant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,7 @@ namespace DQIntern

#if ENABLE_SIMD_OPT_QUANT && defined( TARGET_SIMD_X86 )
// if more than one 4x4 coding subblock is available, use SIMD to find first subblock with coefficient larger than threshold
if( firstTestPos >= 16 && tuPars.m_log2SbbWidth == 2 && tuPars.m_log2SbbHeight == 2 && read_x86_extension_flags() > SCALAR )
if( firstTestPos >= 16 && tuPars.m_log2SbbWidth == 2 && tuPars.m_log2SbbHeight == 2 && read_x86_extension_flags() > x86_simd::SCALAR )
{
const int sbbSize = tuPars.m_sbbSize;
// move the pointer to the beginning of the current subblock
Expand Down
2 changes: 2 additions & 0 deletions source/Lib/CommonLib/InterPrediction.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ POSSIBILITY OF SUCH DAMAGE.

namespace vvenc {

using namespace x86_simd;

// forward declaration
class Mv;

Expand Down
8 changes: 6 additions & 2 deletions source/Lib/CommonLib/InterpolationFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,12 +1071,16 @@ void InterpolationFilter::xWeightedGeoBlk(const ClpRngs &clpRngs, const CodingUn
void InterpolationFilter::initInterpolationFilter( bool enable )
{
#if ENABLE_SIMD_OPT_MCIF
#ifdef TARGET_SIMD_X86
if ( enable )
{
#ifdef TARGET_SIMD_X86
initInterpolationFilterX86();
}
#endif

#ifdef TARGET_SIMD_ARM
initInterpolationFilterARM();
#endif
}
#endif
}

Expand Down
9 changes: 9 additions & 0 deletions source/Lib/CommonLib/InterpolationFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ POSSIBILITY OF SUCH DAMAGE.

namespace vvenc {

using namespace x86_simd;
using namespace arm_simd;

#define IF_INTERNAL_PREC 14 ///< Number of bits for internal precision
#define IF_FILTER_PREC 6 ///< Log2 of sum of filter taps
#define IF_INTERNAL_OFFS (1<<(IF_INTERNAL_PREC-1)) ///< Offset used internally
Expand Down Expand Up @@ -117,6 +120,12 @@ class InterpolationFilter
template <X86_VEXT vext>
void _initInterpolationFilterX86();
#endif

#ifdef TARGET_SIMD_ARM
void initInterpolationFilterARM();
template <ARM_VEXT vext>
void _initInterpolationFilterARM();
#endif

void filterN2_2D(const ComponentID compID, Pel const *src, int srcStride, Pel* dst, int dstStride, int width, int height, int fracX, int fracY, const ClpRng& clpRng);
void filter4x4 (const ComponentID compID, Pel const *src, int srcStride, Pel* dst, int dstStride, int width, int height, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, bool useAltHpelIf = false, int nFilterIdx = 0);
Expand Down
2 changes: 2 additions & 0 deletions source/Lib/CommonLib/IntraPrediction.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ POSSIBILITY OF SUCH DAMAGE.

namespace vvenc {

using namespace x86_simd;

// ====================================================================================================================
// Class definition
// ====================================================================================================================
Expand Down
9 changes: 0 additions & 9 deletions source/Lib/CommonLib/LoopFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,6 @@ const uint8_t LoopFilter::sm_betaTable[MAX_QP + 1] =
// utility functions
// ====================================================================================================================

#define INCX( ptr, stride ) { ptr++; }
#define INCY( ptr, stride ) { ptr += ( stride ); }
#define OFFSETX( ptr, stride, x ) { ptr += ( x ); }
#define OFFSETY( ptr, stride, y ) { ptr += ( y ) * ( stride ); }
#define OFFSET( ptr, stride, x, y ) { ptr += ( x ) + ( y ) * ( stride ); }
#define GET_OFFSETX( ptr, stride, x ) ( ( ptr ) + ( x ) )
#define GET_OFFSETY( ptr, stride, y ) ( ( ptr ) + ( y ) * ( stride ) )
#define GET_OFFSET( ptr, stride, x, y ) ( ( ptr ) + ( x ) + ( y ) * ( stride ) )

#define BsSet( val, compIdx ) ( ( val ) << ( ( compIdx ) << 1 ) )
#define BsGet( val, compIdx ) ( ( ( val ) >> ( ( compIdx ) << 1 ) ) & 3 )

Expand Down
1 change: 1 addition & 0 deletions source/Lib/CommonLib/LoopFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ POSSIBILITY OF SUCH DAMAGE.

namespace vvenc {

using namespace x86_simd;

#define DEBLOCK_SMALLEST_BLOCK 8

Expand Down
2 changes: 2 additions & 0 deletions source/Lib/CommonLib/MCTF.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ POSSIBILITY OF SUCH DAMAGE.

namespace vvenc {

using namespace x86_simd;

class NoMallocThreadPool;

//! \ingroup EncoderLib
Expand Down
2 changes: 2 additions & 0 deletions source/Lib/CommonLib/Quant.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ POSSIBILITY OF SUCH DAMAGE.

namespace vvenc {

using namespace x86_simd;

// ====================================================================================================================
// Constants
// ====================================================================================================================
Expand Down
2 changes: 1 addition & 1 deletion source/Lib/CommonLib/QuantRDOQ2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ int QuantRDOQ2::xRateDistOptQuantFast( TransformUnit &tu, const ComponentID &com

const bool scanFirstBlk = !bUseScalingList && log2CGSize == 4 && cctx.log2CGWidth() == 2;
#if ENABLE_SIMD_OPT_QUANT && defined( TARGET_SIMD_X86 )
const bool isSimd = read_x86_extension_flags() > SCALAR;
const bool isSimd = read_x86_extension_flags() > x86_simd::SCALAR;
#endif

int subSetId = iScanPos >> log2CGSize;
Expand Down
3 changes: 3 additions & 0 deletions source/Lib/CommonLib/RdCost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ void RdCost::create()
#ifdef TARGET_SIMD_X86
initRdCostX86();
#endif
#ifdef TARGET_SIMD_ARM
initRdCostARM();
#endif
#endif

m_costMode = VVENC_COST_STANDARD_LOSSY;
Expand Down
Loading

0 comments on commit ed1fa2d

Please sign in to comment.