diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index a23d3f1dd..af3eec88a 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -68,14 +68,15 @@ B2_API void b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, /// Cast a ray into the world to collect shapes in the path of the ray. /// Your callback function controls whether you get the closest point, any point, or n-points. /// The ray-cast ignores shapes that contain the starting point. +/// @note The callback function may receive shapes in any order /// @param worldId The world to cast the ray against /// @param origin The start point of the ray /// @param translation The translation of the ray from the start point to the end point /// @param filter Contains bit flags to filter unwanted shapes from the results /// @param fcn A user implemented callback function /// @param context A user context that is passed along to the callback function -/// @note The callback function may receive shapes in any order -B2_API void b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, +/// @return traversal performance counters +B2_API b2TraversalResult b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ); /// Cast a ray into the world to collect the closest hit. This is a convenience function. diff --git a/include/box2d/collision.h b/include/box2d/collision.h index 0e55b9aa3..73e1ec250 100644 --- a/include/box2d/collision.h +++ b/include/box2d/collision.h @@ -29,7 +29,7 @@ typedef struct b2Hull b2Hull; /// don't use more vertices. #define b2_maxPolygonVertices 8 -/// Low level ray-cast input data +/// Low level ray cast input data typedef struct b2RayCastInput { /// Start point of the ray cast @@ -63,7 +63,7 @@ typedef struct b2ShapeCastInput float maxFraction; } b2ShapeCastInput; -/// Low level ray-cast or shape-cast output data +/// Low level ray cast or shape-cast output data typedef struct b2CastOutput { /// The surface normal at the hit point @@ -602,10 +602,12 @@ B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segment /// The default category bit for a tree proxy. Used for collision filtering. #define b2_defaultCategoryBits ( 1 ) -/// Convenience mask bits to use when you don't need collision filtering and just want -/// all results. +/// Convenience mask bits to use when you don't need collision filtering and just want all results. #define b2_defaultMaskBits ( UINT64_MAX ) +#define B2_TREE_32 0 + +#if B2_TREE_32 == 0 /// A node in the dynamic tree. This is private data placed here for performance reasons. typedef struct b2TreeNode { @@ -627,23 +629,62 @@ typedef struct b2TreeNode /// Child 1 index int32_t child1; // 4 - /// Child 2 index - int32_t child2; // 4 + union + { + /// Child 2 index + int32_t child2; - /// User data - // todo could be union with child index - int32_t userData; // 4 + /// User data + int32_t userData; + }; // 4 /// Leaf = 0, free node = -1 - int16_t height; // 2 + uint16_t height; // 2 + uint16_t flags; +} b2TreeNode; - /// Has the AABB been enlarged? - bool enlarged; // 1 +#else + +struct b2InternalNode +{ + int32_t child1; + int32_t child2; +}; + +struct b2LeafNode +{ + // limited to 32 bits, see b2TreeNode32::e_category64 + uint32_t categoryBits; + int32_t userData; +}; + +typedef struct b2TreeNode +{ + /// The node bounding box + b2AABB aabb; // 16 + + union + { + struct b2InternalNode internal; + struct b2LeafNode leaf; + }; // 8 + + union + { + /// The node parent index + int32_t parent; + + /// The node freelist next index + int32_t next; + }; // 4 + + uint16_t height; // 2 + uint16_t flags; // 2 - /// Padding for clarity - char pad[5]; } b2TreeNode; +#endif + /// The dynamic tree structure. This should be considered private data. /// It is placed here for performance reasons. typedef struct b2DynamicTree @@ -682,6 +723,13 @@ typedef struct b2DynamicTree int32_t rebuildCapacity; } b2DynamicTree; +/// These are performance results returned by BVH queries. +typedef struct b2TraversalResult +{ + int32_t nodeVisits; + int32_t leafVisits; +} b2TraversalResult; + /// Constructing the tree initializes the node pool. B2_API b2DynamicTree b2DynamicTree_Create( void ); @@ -708,41 +756,43 @@ typedef bool b2TreeQueryCallbackFcn( int32_t proxyId, int32_t userData, void* co B2_API void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback, void* context ); -/// This function receives clipped raycast input for a proxy. The function +/// This function receives clipped ray cast input for a proxy. The function /// returns the new ray fraction. /// - return a value of 0 to terminate the ray cast /// - return a value less than input->maxFraction to clip the ray /// - return a value of input->maxFraction to continue the ray cast without clipping typedef float b2TreeRayCastCallbackFcn( const b2RayCastInput* input, int32_t proxyId, int32_t userData, void* context ); -/// Ray-cast against the proxies in the tree. This relies on the callback -/// to perform a exact ray-cast in the case were the proxy contains a shape. +/// Ray cast against the proxies in the tree. This relies on the callback +/// to perform a exact ray cast in the case were the proxy contains a shape. /// The callback also performs the any collision filtering. This has performance /// roughly equal to k * log(n), where k is the number of collisions and n is the /// number of proxies in the tree. /// Bit-wise filtering using mask bits can greatly improve performance in some scenarios. +/// However, this filtering may be approximate, so the user should still apply filtering to results. /// @param tree the dynamic tree to ray cast -/// @param input the ray-cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1) -/// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;` +/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1) +/// @param maskBits mask bit hint: `bool accept = (maskBits & node->categoryBits) != 0;` /// @param callback a callback class that is called for each proxy that is hit by the ray /// @param context user context that is passed to the callback -B2_API void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, +/// @return performance data +B2_API b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, b2TreeRayCastCallbackFcn* callback, void* context ); -/// This function receives clipped ray-cast input for a proxy. The function +/// This function receives clipped ray cast input for a proxy. The function /// returns the new ray fraction. -/// - return a value of 0 to terminate the ray-cast +/// - return a value of 0 to terminate the ray cast /// - return a value less than input->maxFraction to clip the ray /// - return a value of input->maxFraction to continue the ray cast without clipping typedef float b2TreeShapeCastCallbackFcn( const b2ShapeCastInput* input, int32_t proxyId, int32_t userData, void* context ); -/// Ray-cast against the proxies in the tree. This relies on the callback -/// to perform a exact ray-cast in the case were the proxy contains a shape. +/// Ray cast against the proxies in the tree. This relies on the callback +/// to perform a exact ray cast in the case were the proxy contains a shape. /// The callback also performs the any collision filtering. This has performance /// roughly equal to k * log(n), where k is the number of collisions and n is the /// number of proxies in the tree. /// @param tree the dynamic tree to ray cast -/// @param input the ray-cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1). +/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1). /// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;` /// @param callback a callback class that is called for each proxy that is hit by the shape /// @param context user context that is passed to the callback @@ -780,6 +830,8 @@ B2_API void b2DynamicTree_ShiftOrigin( b2DynamicTree* tree, b2Vec2 newOrigin ); /// Get the number of bytes used by this tree B2_API int b2DynamicTree_GetByteCount( const b2DynamicTree* tree ); +#if B2_TREE_32 == 0 + /// Get proxy user data /// @return the proxy user data or 0 if the id is invalid B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t proxyId ) @@ -787,6 +839,17 @@ B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t return tree->nodes[proxyId].userData; } +#else + +/// Get proxy user data +/// @return the proxy user data or 0 if the id is invalid +B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t proxyId ) +{ + return tree->nodes[proxyId].leaf.userData; +} + +#endif + /// Get the AABB of a proxy B2_INLINE b2AABB b2DynamicTree_GetAABB( const b2DynamicTree* tree, int32_t proxyId ) { diff --git a/include/box2d/types.h b/include/box2d/types.h index ead84a045..672e105b8 100644 --- a/include/box2d/types.h +++ b/include/box2d/types.h @@ -58,6 +58,8 @@ typedef struct b2RayResult b2Vec2 point; b2Vec2 normal; float fraction; + int nodeVisits; + int leafVisits; bool hit; } b2RayResult; diff --git a/samples/sample_benchmark.cpp b/samples/sample_benchmark.cpp index af21f438d..158153f66 100644 --- a/samples/sample_benchmark.cpp +++ b/samples/sample_benchmark.cpp @@ -339,7 +339,6 @@ class BenchmarkTumbler : public Sample circle = { { -5.0f, 5.0f }, 1.0f }; b2CreateCircleShape( bodyId, &shapeDef, &circle ); - // m_motorSpeed = 9.0f; m_motorSpeed = 25.0f; @@ -1554,6 +1553,7 @@ enum QueryType class BenchmarkCast : public Sample { public: + explicit BenchmarkCast( Settings& settings ) : Sample( settings ) { @@ -1561,6 +1561,7 @@ class BenchmarkCast : public Sample { g_camera.m_center = { 500.0f, 500.0f }; g_camera.m_zoom = 25.0f * 21.0f; + settings.drawShapes = g_sampleDebug; } m_queryType = e_rayCast; @@ -1569,7 +1570,24 @@ class BenchmarkCast : public Sample m_fill = 0.1f; m_rowCount = g_sampleDebug ? 100 : 1000; m_columnCount = g_sampleDebug ? 100 : 1000; - m_categoryBits = true; + m_minTime = 1e6f; + m_drawIndex = 0; + + g_seed = 1234; + int sampleCount = g_sampleDebug ? 100 : 10000; + m_origins.resize( sampleCount ); + m_translations.resize( sampleCount ); + float extent = m_rowCount * m_grid; + + // Pre-compute rays to avoid randomizer overhead + for ( int i = 0; i < sampleCount; ++i ) + { + b2Vec2 rayStart = RandomVec2( 0.0f, extent ); + b2Vec2 rayEnd = RandomVec2( 0.0f, extent ); + + m_origins[i] = rayStart; + m_translations[i] = rayEnd - rayStart; + } BuildScene(); } @@ -1611,13 +1629,13 @@ class BenchmarkCast : public Sample box = b2MakeBox( halfWidth, ratio * halfWidth ); } - int category = RandomInt( 1, 3 ); - shapeDef.filter.categoryBits = category; - if ( category == 1 ) + int category = RandomInt( 0, 2 ); + shapeDef.filter.categoryBits = 1 << category; + if ( category == 0 ) { shapeDef.customColor = b2_colorBox2DBlue; } - else if ( category == 2 ) + else if ( category == 1 ) { shapeDef.customColor = b2_colorBox2DYellow; } @@ -1638,7 +1656,7 @@ class BenchmarkCast : public Sample void UpdateUI() override { - float height = 320.0f; + float height = 220.0f; ImGui::SetNextWindowPos( ImVec2( 10.0f, g_camera.m_height - height - 50.0f ), ImGuiCond_Once ); ImGui::SetNextWindowSize( ImVec2( 200.0f, height ) ); @@ -1672,16 +1690,16 @@ class BenchmarkCast : public Sample changed = true; } - if ( ImGui::Checkbox( "categories", &m_categoryBits) ) - { - changed = true; - } - const char* queryTypes[] = { "Ray Cast", "Circle Cast", "Overlap" }; int queryType = int( m_queryType ); changed = changed || ImGui::Combo( "Query", &queryType, queryTypes, IM_ARRAYSIZE( queryTypes ) ); m_queryType = QueryType( queryType ); + if ( ImGui::Button( "Draw Next" ) ) + { + m_drawIndex = ( m_drawIndex + 1 ) % m_origins.size(); + } + ImGui::PopItemWidth(); ImGui::End(); @@ -1691,42 +1709,73 @@ class BenchmarkCast : public Sample } } - void Step( Settings& settings) override + void Step( Settings& settings ) override { Sample::Step( settings ); - int sampleCount = g_sampleDebug ? 10 : 1000; - - float extent = m_rowCount * m_grid; b2QueryFilter filter = b2DefaultQueryFilter(); filter.maskBits = 1; int hitCount = 0; + int nodeVisits = 0; + int leafVisits = 0; float ms = 0.0f; + int sampleCount = m_origins.size(); - if (m_queryType == e_rayCast) + if ( m_queryType == e_rayCast ) { b2Timer timer = b2CreateTimer(); - b2Vec2 rayStart = b2Vec2_zero; - b2Vec2 rayEnd = b2Vec2_zero; - for (int i = 0; i < sampleCount; ++i) + b2RayResult drawResult = {}; + + for ( int i = 0; i < sampleCount; ++i ) { - rayStart = RandomVec2( 0.0f, extent ); - rayEnd = RandomVec2( 0.0f, extent ); + b2Vec2 origin = m_origins[i]; + b2Vec2 translation = m_translations[i]; + + // todo for breakpoint + if (i == 2) + { + i += 0; + } + + b2RayResult result = b2World_CastRayClosest( m_worldId, origin, translation, filter ); + + if (i == m_drawIndex) + { + drawResult = result; + } - b2RayResult result = b2World_CastRayClosest( m_worldId, rayStart, b2Sub( rayEnd, rayStart ), filter ); + nodeVisits += result.nodeVisits; + leafVisits += result.leafVisits; hitCount += result.hit ? 1 : 0; } ms = b2GetMilliseconds( &timer ); - - g_draw.DrawSegment( rayStart, rayEnd, b2_colorBeige ); + + m_minTime = b2MinFloat( m_minTime, ms ); + + b2Vec2 p1 = m_origins[m_drawIndex]; + b2Vec2 p2 = p1 + m_translations[m_drawIndex]; + g_draw.DrawSegment( p1, p2, b2_colorWhite ); + g_draw.DrawPoint( p1, 5.0f, b2_colorGreen ); + g_draw.DrawPoint( p2, 5.0f, b2_colorRed ); + if (drawResult.hit) + { + g_draw.DrawPoint( drawResult.point, 5.0f, b2_colorWhite ); + } } - g_draw.DrawString( 5, m_textLine, "hit count = %03d", hitCount ); + g_draw.DrawString( 5, m_textLine, "hit count = %d, node visits = %d, leaf visits = %d", hitCount, nodeVisits, leafVisits ); + m_textLine += m_textIncrement; + + g_draw.DrawString( 5, m_textLine, "total ms = %.3f", ms ); + m_textLine += m_textIncrement; + + g_draw.DrawString( 5, m_textLine, "min total ms = %.3f", m_minTime ); m_textLine += m_textIncrement; - g_draw.DrawString( 5, m_textLine, "ms = %.3f",ms ); + float aveRayCost = 1000.0f * m_minTime / float( sampleCount ); + g_draw.DrawString( 5, m_textLine, "average ray us = %.2f", aveRayCost ); m_textLine += m_textIncrement; } @@ -1739,13 +1788,14 @@ class BenchmarkCast : public Sample std::vector m_origins; std::vector m_translations; + float m_minTime; int m_rowCount, m_columnCount; int m_updateType; + int m_drawIndex; float m_fill; float m_ratio; float m_grid; - bool m_categoryBits; }; static int sampleCast = RegisterSample( "Benchmark", "Cast", BenchmarkCast::Create ); diff --git a/samples/sample_collision.cpp b/samples/sample_collision.cpp index 9241152e4..74760010b 100644 --- a/samples/sample_collision.cpp +++ b/samples/sample_collision.cpp @@ -691,11 +691,14 @@ class DynamicTree : public Sample if ( m_rayDrag ) { b2RayCastInput input = { m_startPoint, b2Sub( m_endPoint, m_startPoint ), 1.0f }; - b2DynamicTree_RayCast( &m_tree, &input, b2_defaultMaskBits, RayCallback, this ); + b2TraversalResult result = b2DynamicTree_RayCast( &m_tree, &input, b2_defaultMaskBits, RayCallback, this ); g_draw.DrawSegment( m_startPoint, m_endPoint, b2_colorWhite ); g_draw.DrawPoint( m_startPoint, 5.0f, b2_colorGreen ); g_draw.DrawPoint( m_endPoint, 5.0f, b2_colorRed ); + + g_draw.DrawString( 5, m_textLine, "node visits = %d, leaf visits = %d", result.nodeVisits, result.leafVisits ); + m_textLine += m_textIncrement; } b2HexColor c = b2_colorBlue; diff --git a/src/broad_phase.c b/src/broad_phase.c index 7502c2f9f..1dcc1504f 100644 --- a/src/broad_phase.c +++ b/src/broad_phase.c @@ -479,12 +479,11 @@ void b2ValidateNoEnlarged( const b2BroadPhase* bp ) continue; } - if ( node->enlarged == true ) - { - capacity += 0; - } - +#if B2_TREE_32 == 0 B2_ASSERT( node->enlarged == false ); +#else + B2_ASSERT( (node->flags & b2_enlargedNode) == 0 ); +#endif } } #else diff --git a/src/core.c b/src/core.c index 030fb7282..8e8701d8f 100644 --- a/src/core.c +++ b/src/core.c @@ -78,7 +78,7 @@ void b2SetAllocator( b2AllocFcn* allocFcn, b2FreeFcn* freeFcn ) } // Use 32 byte alignment for everything. Works with 256bit SIMD. -#define B2_ALIGNMENT 32 +#define B2_ALIGNMENT 64 void* b2Alloc( int size ) { diff --git a/src/core.h b/src/core.h index a37ec42a0..1890c8b4c 100644 --- a/src/core.h +++ b/src/core.h @@ -178,6 +178,14 @@ extern float b2_lengthUnitsPerMeter; #define b2CheckDef( DEF ) B2_ASSERT( DEF->internalValue == B2_SECRET_COOKIE ) +enum b2TreeNodeFlags +{ + b2_allocatedNode = 0x0001, + b2_enlargedNode = 0x0002, + b2_leafNode = 0x0004, + b2_category64 = 0x0008, +}; + void* b2Alloc( int size ); void b2Free( void* mem, int size ); void* b2GrowAlloc( void* oldMem, int oldSize, int newSize ); diff --git a/src/dynamic_tree.c b/src/dynamic_tree.c index 92885d1fc..4769dc30a 100644 --- a/src/dynamic_tree.c +++ b/src/dynamic_tree.c @@ -15,9 +15,11 @@ // TODO_ERIN // - try incrementally sorting internal nodes by height for better cache efficiency during depth first traversal. +#if B2_TREE_32 == 0 + static b2TreeNode b2_defaultTreeNode = { - { { 0.0f, 0.0f }, { 0.0f, 0.0f } }, 0, { B2_NULL_INDEX }, B2_NULL_INDEX, B2_NULL_INDEX, -1, -2, false, - { 0, 0, 0, 0, 0 } }; + .aabb = { { 0.0f, 0.0f }, { 0.0f, 0.0f } }, + 0, { B2_NULL_INDEX }, B2_NULL_INDEX, B2_NULL_INDEX, -1, -2, false, { 0, 0, 0, 0, 0 } }; static inline bool b2IsLeaf( const b2TreeNode* node ) { @@ -1157,9 +1159,11 @@ void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskB } } -void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, - b2TreeRayCastCallbackFcn* callback, void* context ) +b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, + b2TreeRayCastCallbackFcn* callback, void* context ) { + b2TraversalResult result = { 0 }; + b2Vec2 p1 = input->origin; b2Vec2 d = input->translation; @@ -1194,7 +1198,11 @@ void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* inp } const b2TreeNode* node = tree->nodes + nodeId; - if ( b2AABB_Overlaps( node->aabb, segmentAABB ) == false || ( node->categoryBits & maskBits ) == 0 ) + result.nodeVisits += 1; + + b2AABB nodeAABB = node->aabb; + + if ( ( node->categoryBits & maskBits ) == 0 || b2AABB_Overlaps( nodeAABB, segmentAABB ) == false ) { continue; } @@ -1202,8 +1210,8 @@ void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* inp // Separating axis for segment (Gino, p80). // |dot(v, p1 - c)| > dot(|v|, h) // radius extension is added to the node in this case - b2Vec2 c = b2AABB_Center( node->aabb ); - b2Vec2 h = b2AABB_Extents( node->aabb ); + b2Vec2 c = b2AABB_Center( nodeAABB ); + b2Vec2 h = b2AABB_Extents( nodeAABB ); float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) ); float term2 = b2Dot( abs_v, h ); if ( term2 < term1 ) @@ -1216,11 +1224,12 @@ void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* inp subInput.maxFraction = maxFraction; float value = callback( &subInput, nodeId, node->userData, context ); + result.leafVisits += 1; if ( value == 0.0f ) { // The client has terminated the ray cast. - return; + return result; } if ( 0.0f < value && value < maxFraction ) @@ -1244,6 +1253,8 @@ void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* inp } } } + + return result; } void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, @@ -1472,7 +1483,7 @@ static int32_t b2PartitionMid( int32_t* indices, b2Vec2* centers, int32_t count #else - #define B2_BIN_COUNT 8 +#define B2_BIN_COUNT 8 typedef struct b2TreeBin { @@ -1923,3 +1934,1760 @@ int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) return leafCount; } + +#else + +/* + typedef struct b2TreeNode +{ + enum + { + e_enlarged = 0x0001, + e_free = 0x0002, + e_leaf = 0x0004, + e_category64 = 0x0008, + }; + + /// The node bounding box + b2AABB aabb; // 16 + + union + { + struct b2InternalNode internal; + struct b2LeafNode leaf; + }; // 8 + + union + { + /// The node parent index + int32_t parent; + + /// The node freelist next index + int32_t next; + }; // 4 + + uint16_t height; // 2 + uint16_t flags; // 2 + +} b2TreeNode; + + */ + +static b2TreeNode b2_defaultTreeNode = { + .aabb = { { 0.0f, 0.0f }, { 0.0f, 0.0f } }, + .internal = { B2_NULL_INDEX, B2_NULL_INDEX }, + .parent = B2_NULL_INDEX, + .height = 0, + .flags = b2_allocatedNode, +}; + +static inline bool b2IsLeaf( const b2TreeNode* node ) +{ + return (bool)(node->flags & b2_leafNode); +} + +static inline uint16_t b2MaxUInt16( uint16_t a, uint16_t b ) +{ + return a > b ? a : b; +} + +b2DynamicTree b2DynamicTree_Create( void ) +{ + _Static_assert( ( sizeof( b2TreeNode ) & 0xF ) == 0, "tree node size not a multiple of 16" ); + + b2DynamicTree tree; + tree.root = B2_NULL_INDEX; + tree.nodeCapacity = 16; + tree.nodeCount = 0; + tree.nodes = (b2TreeNode*)b2Alloc( tree.nodeCapacity * sizeof( b2TreeNode ) ); + memset( tree.nodes, 0, tree.nodeCapacity * sizeof( b2TreeNode ) ); + + // Build a linked list for the free list. + for ( int32_t i = 0; i < tree.nodeCapacity - 1; ++i ) + { + tree.nodes[i].next = i + 1; + } + + tree.nodes[tree.nodeCapacity - 1].next = B2_NULL_INDEX; + tree.freeList = 0; + tree.proxyCount = 0; + tree.leafIndices = NULL; + tree.leafBoxes = NULL; + tree.leafCenters = NULL; + tree.binIndices = NULL; + tree.rebuildCapacity = 0; + + return tree; +} + +void b2DynamicTree_Destroy( b2DynamicTree* tree ) +{ + b2Free( tree->nodes, tree->nodeCapacity * sizeof( b2TreeNode ) ); + b2Free( tree->leafIndices, tree->rebuildCapacity * sizeof( int32_t ) ); + b2Free( tree->leafBoxes, tree->rebuildCapacity * sizeof( b2AABB ) ); + b2Free( tree->leafCenters, tree->rebuildCapacity * sizeof( b2Vec2 ) ); + b2Free( tree->binIndices, tree->rebuildCapacity * sizeof( int32_t ) ); + + memset( tree, 0, sizeof( b2DynamicTree ) ); +} + +// Allocate a node from the pool. Grow the pool if necessary. +static int32_t b2AllocateNode( b2DynamicTree* tree ) +{ + // Expand the node pool as needed. + if ( tree->freeList == B2_NULL_INDEX ) + { + B2_ASSERT( tree->nodeCount == tree->nodeCapacity ); + + // The free list is empty. Rebuild a bigger pool. + b2TreeNode* oldNodes = tree->nodes; + int32_t oldCapcity = tree->nodeCapacity; + tree->nodeCapacity += oldCapcity >> 1; + tree->nodes = (b2TreeNode*)b2Alloc( tree->nodeCapacity * sizeof( b2TreeNode ) ); + B2_ASSERT( oldNodes != NULL ); + memcpy( tree->nodes, oldNodes, tree->nodeCount * sizeof( b2TreeNode ) ); + b2Free( oldNodes, oldCapcity * sizeof( b2TreeNode ) ); + + // Build a linked list for the free list + for ( int32_t i = tree->nodeCount; i < tree->nodeCapacity - 1; ++i ) + { + tree->nodes[i].next = i + 1; + } + tree->nodes[tree->nodeCapacity - 1].next = B2_NULL_INDEX; + tree->freeList = tree->nodeCount; + } + + // Peel a node off the free list. + int32_t nodeIndex = tree->freeList; + b2TreeNode* node = tree->nodes + nodeIndex; + tree->freeList = node->next; + *node = b2_defaultTreeNode; + ++tree->nodeCount; + return nodeIndex; +} + +// Return a node to the pool. +static void b2FreeNode( b2DynamicTree* tree, int32_t nodeId ) +{ + B2_ASSERT( 0 <= nodeId && nodeId < tree->nodeCapacity ); + B2_ASSERT( 0 < tree->nodeCount ); + tree->nodes[nodeId].next = tree->freeList; + tree->nodes[nodeId].flags = 0; + tree->freeList = nodeId; + --tree->nodeCount; +} + +// Greedy algorithm for sibling selection using the SAH +// We have three nodes A-(B,C) and want to add a leaf D, there are three choices. +// 1: make a new parent for A and D : E-(A-(B,C), D) +// 2: associate D with B +// a: B is a leaf : A-(E-(B,D), C) +// b: B is an internal node: A-(B{D},C) +// 3: associate D with C +// a: C is a leaf : A-(B, E-(C,D)) +// b: C is an internal node: A-(B, C{D}) +// All of these have a clear cost except when B or C is an internal node. Hence we need to be greedy. + +// The cost for cases 1, 2a, and 3a can be computed using the sibling cost formula. +// cost of sibling H = area(union(H, D)) + increased are of ancestors + +// Suppose B (or C) is an internal node, then the lowest cost would be one of two cases: +// case1: D becomes a sibling of B +// case2: D becomes a descendant of B along with a new internal node of area(D). +static int32_t b2FindBestSibling( const b2DynamicTree* tree, b2AABB boxD ) +{ + b2Vec2 centerD = b2AABB_Center( boxD ); + float areaD = b2Perimeter( boxD ); + + const b2TreeNode* nodes = tree->nodes; + int32_t rootIndex = tree->root; + + b2AABB rootBox = nodes[rootIndex].aabb; + + // Area of current node + float areaBase = b2Perimeter( rootBox ); + + // Area of inflated node + float directCost = b2Perimeter( b2AABB_Union( rootBox, boxD ) ); + float inheritedCost = 0.0f; + + int32_t bestSibling = rootIndex; + float bestCost = directCost; + + // Descend the tree from root, following a single greedy path. + int32_t index = rootIndex; + while ( nodes[index].height > 0 ) + { + int32_t child1 = nodes[index].internal.child1; + int32_t child2 = nodes[index].internal.child2; + + // Cost of creating a new parent for this node and the new leaf + float cost = directCost + inheritedCost; + + // Sometimes there are multiple identical costs within tolerance. + // This breaks the ties using the centroid distance. + if ( cost < bestCost ) + { + bestSibling = index; + bestCost = cost; + } + + // Inheritance cost seen by children + inheritedCost += directCost - areaBase; + + bool leaf1 = nodes[child1].height == 0; + bool leaf2 = nodes[child2].height == 0; + + // Cost of descending into child 1 + float lowerCost1 = FLT_MAX; + b2AABB box1 = nodes[child1].aabb; + float directCost1 = b2Perimeter( b2AABB_Union( box1, boxD ) ); + float area1 = 0.0f; + if ( leaf1 ) + { + // Child 1 is a leaf + // Cost of creating new node and increasing area of node P + float cost1 = directCost1 + inheritedCost; + + // Need this here due to while condition above + if ( cost1 < bestCost ) + { + bestSibling = child1; + bestCost = cost1; + } + } + else + { + // Child 1 is an internal node + area1 = b2Perimeter( box1 ); + + // Lower bound cost of inserting under child 1. + lowerCost1 = inheritedCost + directCost1 + b2MinFloat( areaD - area1, 0.0f ); + } + + // Cost of descending into child 2 + float lowerCost2 = FLT_MAX; + b2AABB box2 = nodes[child2].aabb; + float directCost2 = b2Perimeter( b2AABB_Union( box2, boxD ) ); + float area2 = 0.0f; + if ( leaf2 ) + { + // Child 2 is a leaf + // Cost of creating new node and increasing area of node P + float cost2 = directCost2 + inheritedCost; + + // Need this here due to while condition above + if ( cost2 < bestCost ) + { + bestSibling = child2; + bestCost = cost2; + } + } + else + { + // Child 2 is an internal node + area2 = b2Perimeter( box2 ); + + // Lower bound cost of inserting under child 2. This is not the cost + // of child 2, it is the best we can hope for under child 2. + lowerCost2 = inheritedCost + directCost2 + b2MinFloat( areaD - area2, 0.0f ); + } + + if ( leaf1 && leaf2 ) + { + break; + } + + // Can the cost possibly be decreased? + if ( bestCost <= lowerCost1 && bestCost <= lowerCost2 ) + { + break; + } + + if ( lowerCost1 == lowerCost2 && leaf1 == false ) + { + B2_ASSERT( lowerCost1 < FLT_MAX ); + B2_ASSERT( lowerCost2 < FLT_MAX ); + + // No clear choice based on lower bound surface area. This can happen when both + // children fully contain D. Fall back to node distance. + b2Vec2 d1 = b2Sub( b2AABB_Center( box1 ), centerD ); + b2Vec2 d2 = b2Sub( b2AABB_Center( box2 ), centerD ); + lowerCost1 = b2LengthSquared( d1 ); + lowerCost2 = b2LengthSquared( d2 ); + } + + // Descend + if ( lowerCost1 < lowerCost2 && leaf1 == false ) + { + index = child1; + areaBase = area1; + directCost = directCost1; + } + else + { + index = child2; + areaBase = area2; + directCost = directCost2; + } + + B2_ASSERT( nodes[index].height > 0 ); + } + + return bestSibling; +} + +enum b2RotateType +{ + b2_rotateNone, + b2_rotateBF, + b2_rotateBG, + b2_rotateCD, + b2_rotateCE +}; + +// Perform a left or right rotation if node A is imbalanced. +// Returns the new root index. +static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) +{ + B2_ASSERT( iA != B2_NULL_INDEX ); + + b2TreeNode* nodes = tree->nodes; + + b2TreeNode* A = nodes + iA; + if ( A->height < 2 ) + { + return; + } + + int32_t iB = A->internal.child1; + int32_t iC = A->internal.child2; + B2_ASSERT( 0 <= iB && iB < tree->nodeCapacity ); + B2_ASSERT( 0 <= iC && iC < tree->nodeCapacity ); + + b2TreeNode* B = nodes + iB; + b2TreeNode* C = nodes + iC; + + if ( B->height == 0 ) + { + // B is a leaf and C is internal + B2_ASSERT( C->height > 0 ); + + int32_t iF = C->internal.child1; + int32_t iG = C->internal.child2; + b2TreeNode* F = nodes + iF; + b2TreeNode* G = nodes + iG; + B2_ASSERT( 0 <= iF && iF < tree->nodeCapacity ); + B2_ASSERT( 0 <= iG && iG < tree->nodeCapacity ); + + // Base cost + float costBase = b2Perimeter( C->aabb ); + + // Cost of swapping B and F + b2AABB aabbBG = b2AABB_Union( B->aabb, G->aabb ); + float costBF = b2Perimeter( aabbBG ); + + // Cost of swapping B and G + b2AABB aabbBF = b2AABB_Union( B->aabb, F->aabb ); + float costBG = b2Perimeter( aabbBF ); + + if ( costBase < costBF && costBase < costBG ) + { + // Rotation does not improve cost + return; + } + + if ( costBF < costBG ) + { + // Swap B and F + A->internal.child1 = iF; + C->internal.child1 = iB; + + B->parent = iC; + F->parent = iA; + + C->aabb = aabbBG; + + C->height = 1 + b2MaxUInt16( B->height, G->height ); + A->height = 1 + b2MaxUInt16( C->height, F->height ); + C->flags |= (B->flags | G->flags) & b2_enlargedNode; + A->flags |= (C->flags | F->flags) & b2_enlargedNode; + } + else + { + // Swap B and G + A->internal.child1 = iG; + C->internal.child2 = iB; + + B->parent = iC; + G->parent = iA; + + C->aabb = aabbBF; + + C->height = 1 + b2MaxUInt16( B->height, F->height ); + A->height = 1 + b2MaxUInt16( C->height, G->height ); + C->flags |= (B->flags | F->flags) & b2_enlargedNode; + A->flags |= (C->flags | G->flags) & b2_enlargedNode; + } + } + else if ( C->height == 0 ) + { + // C is a leaf and B is internal + B2_ASSERT( B->height > 0 ); + + int iD = B->internal.child1; + int iE = B->internal.child2; + b2TreeNode* D = nodes + iD; + b2TreeNode* E = nodes + iE; + B2_ASSERT( 0 <= iD && iD < tree->nodeCapacity ); + B2_ASSERT( 0 <= iE && iE < tree->nodeCapacity ); + + // Base cost + float costBase = b2Perimeter( B->aabb ); + + // Cost of swapping C and D + b2AABB aabbCE = b2AABB_Union( C->aabb, E->aabb ); + float costCD = b2Perimeter( aabbCE ); + + // Cost of swapping C and E + b2AABB aabbCD = b2AABB_Union( C->aabb, D->aabb ); + float costCE = b2Perimeter( aabbCD ); + + if ( costBase < costCD && costBase < costCE ) + { + // Rotation does not improve cost + return; + } + + if ( costCD < costCE ) + { + // Swap C and D + A->internal.child2 = iD; + B->internal.child1 = iC; + + C->parent = iB; + D->parent = iA; + + B->aabb = aabbCE; + + B->height = 1 + b2MaxUInt16( C->height, E->height ); + A->height = 1 + b2MaxUInt16( B->height, D->height ); + B->flags |= (C->flags | E->flags) & b2_enlargedNode; + A->flags |= (B->flags | D->flags) & b2_enlargedNode; + } + else + { + // Swap C and E + A->internal.child2 = iE; + B->internal.child2 = iC; + + C->parent = iB; + E->parent = iA; + + B->aabb = aabbCD; + B->height = 1 + b2MaxUInt16( C->height, D->height ); + A->height = 1 + b2MaxUInt16( B->height, E->height ); + B->flags |= (C->flags | D->flags) & b2_enlargedNode; + A->flags |= (B->flags | E->flags) & b2_enlargedNode; + } + } + else + { + int iD = B->internal.child1; + int iE = B->internal.child2; + int iF = C->internal.child1; + int iG = C->internal.child2; + + b2TreeNode* D = nodes + iD; + b2TreeNode* E = nodes + iE; + b2TreeNode* F = nodes + iF; + b2TreeNode* G = nodes + iG; + + B2_ASSERT( 0 <= iD && iD < tree->nodeCapacity ); + B2_ASSERT( 0 <= iE && iE < tree->nodeCapacity ); + B2_ASSERT( 0 <= iF && iF < tree->nodeCapacity ); + B2_ASSERT( 0 <= iG && iG < tree->nodeCapacity ); + + // Base cost + float areaB = b2Perimeter( B->aabb ); + float areaC = b2Perimeter( C->aabb ); + float costBase = areaB + areaC; + enum b2RotateType bestRotation = b2_rotateNone; + float bestCost = costBase; + + // Cost of swapping B and F + b2AABB aabbBG = b2AABB_Union( B->aabb, G->aabb ); + float costBF = areaB + b2Perimeter( aabbBG ); + if ( costBF < bestCost ) + { + bestRotation = b2_rotateBF; + bestCost = costBF; + } + + // Cost of swapping B and G + b2AABB aabbBF = b2AABB_Union( B->aabb, F->aabb ); + float costBG = areaB + b2Perimeter( aabbBF ); + if ( costBG < bestCost ) + { + bestRotation = b2_rotateBG; + bestCost = costBG; + } + + // Cost of swapping C and D + b2AABB aabbCE = b2AABB_Union( C->aabb, E->aabb ); + float costCD = areaC + b2Perimeter( aabbCE ); + if ( costCD < bestCost ) + { + bestRotation = b2_rotateCD; + bestCost = costCD; + } + + // Cost of swapping C and E + b2AABB aabbCD = b2AABB_Union( C->aabb, D->aabb ); + float costCE = areaC + b2Perimeter( aabbCD ); + if ( costCE < bestCost ) + { + bestRotation = b2_rotateCE; + // bestCost = costCE; + } + + switch ( bestRotation ) + { + case b2_rotateNone: + break; + + case b2_rotateBF: + A->internal.child1 = iF; + C->internal.child1 = iB; + + B->parent = iC; + F->parent = iA; + + C->aabb = aabbBG; + C->height = 1 + b2MaxUInt16( B->height, G->height ); + A->height = 1 + b2MaxUInt16( C->height, F->height ); + C->flags |= (B->flags | G->flags) & b2_enlargedNode; + A->flags |= (C->flags | F->flags) & b2_enlargedNode; + break; + + case b2_rotateBG: + A->internal.child1 = iG; + C->internal.child2 = iB; + + B->parent = iC; + G->parent = iA; + + C->aabb = aabbBF; + C->height = 1 + b2MaxUInt16( B->height, F->height ); + A->height = 1 + b2MaxUInt16( C->height, G->height ); + C->flags |= (B->flags | F->flags) & b2_enlargedNode; + A->flags |= (C->flags | G->flags) & b2_enlargedNode; + break; + + case b2_rotateCD: + A->internal.child2 = iD; + B->internal.child1 = iC; + + C->parent = iB; + D->parent = iA; + + B->aabb = aabbCE; + B->height = 1 + b2MaxUInt16( C->height, E->height ); + A->height = 1 + b2MaxUInt16( B->height, D->height ); + B->flags |= (C->flags | E->flags) & b2_enlargedNode; + A->flags |= (B->flags | D->flags) & b2_enlargedNode; + break; + + case b2_rotateCE: + A->internal.child2 = iE; + B->internal.child2 = iC; + + C->parent = iB; + E->parent = iA; + + B->aabb = aabbCD; + B->height = 1 + b2MaxUInt16( C->height, D->height ); + A->height = 1 + b2MaxUInt16( B->height, E->height ); + B->flags |= (C->flags | D->flags) & b2_enlargedNode; + A->flags |= (B->flags | E->flags) & b2_enlargedNode; + break; + + default: + B2_ASSERT( false ); + break; + } + } +} + +static void b2InsertLeaf( b2DynamicTree* tree, int32_t leaf, bool shouldRotate ) +{ + if ( tree->root == B2_NULL_INDEX ) + { + tree->root = leaf; + tree->nodes[tree->root].parent = B2_NULL_INDEX; + return; + } + + // Stage 1: find the best sibling for this node + b2AABB leafAABB = tree->nodes[leaf].aabb; + int32_t sibling = b2FindBestSibling( tree, leafAABB ); + + // Stage 2: create a new parent for the leaf and sibling + int32_t oldParent = tree->nodes[sibling].parent; + int32_t newParent = b2AllocateNode( tree ); + + // warning: node pointer can change after allocation + b2TreeNode* nodes = tree->nodes; + nodes[newParent].parent = oldParent; + nodes[newParent].aabb = b2AABB_Union( leafAABB, nodes[sibling].aabb ); + nodes[newParent].height = nodes[sibling].height + 1; + + if ( oldParent != B2_NULL_INDEX ) + { + // The sibling was not the root. + if ( nodes[oldParent].internal.child1 == sibling ) + { + nodes[oldParent].internal.child1 = newParent; + } + else + { + nodes[oldParent].internal.child2 = newParent; + } + + nodes[newParent].internal.child1 = sibling; + nodes[newParent].internal.child2 = leaf; + nodes[sibling].parent = newParent; + nodes[leaf].parent = newParent; + } + else + { + // The sibling was the root. + nodes[newParent].internal.child1 = sibling; + nodes[newParent].internal.child2 = leaf; + nodes[sibling].parent = newParent; + nodes[leaf].parent = newParent; + tree->root = newParent; + } + + // Stage 3: walk back up the tree fixing heights and AABBs + int32_t index = nodes[leaf].parent; + while ( index != B2_NULL_INDEX ) + { + int32_t child1 = nodes[index].internal.child1; + int32_t child2 = nodes[index].internal.child2; + + B2_ASSERT( child1 != B2_NULL_INDEX ); + B2_ASSERT( child2 != B2_NULL_INDEX ); + + nodes[index].aabb = b2AABB_Union( nodes[child1].aabb, nodes[child2].aabb ); + nodes[index].height = 1 + b2MaxUInt16( nodes[child1].height, nodes[child2].height ); + nodes[index].flags |= (nodes[child1].flags | nodes[child2].flags) & b2_enlargedNode; + + if ( shouldRotate ) + { + b2RotateNodes( tree, index ); + } + + index = nodes[index].parent; + } +} + +static void b2RemoveLeaf( b2DynamicTree* tree, int32_t leaf ) +{ + if ( leaf == tree->root ) + { + tree->root = B2_NULL_INDEX; + return; + } + + b2TreeNode* nodes = tree->nodes; + + int32_t parent = nodes[leaf].parent; + int32_t grandParent = nodes[parent].parent; + int32_t sibling; + if ( nodes[parent].internal.child1 == leaf ) + { + sibling = nodes[parent].internal.child2; + } + else + { + sibling = nodes[parent].internal.child1; + } + + if ( grandParent != B2_NULL_INDEX ) + { + // Destroy parent and connect sibling to grandParent. + if ( nodes[grandParent].internal.child1 == parent ) + { + nodes[grandParent].internal.child1 = sibling; + } + else + { + nodes[grandParent].internal.child2 = sibling; + } + nodes[sibling].parent = grandParent; + b2FreeNode( tree, parent ); + + // Adjust ancestor bounds. + int32_t index = grandParent; + while ( index != B2_NULL_INDEX ) + { + b2TreeNode* node = nodes + index; + b2TreeNode* child1 = nodes + node->internal.child1; + b2TreeNode* child2 = nodes + node->internal.child2; + + // Fast union using SSE + //__m128 aabb1 = _mm_load_ps(&child1->aabb.lowerBound.x); + //__m128 aabb2 = _mm_load_ps(&child2->aabb.lowerBound.x); + //__m128 lower = _mm_min_ps(aabb1, aabb2); + //__m128 upper = _mm_max_ps(aabb1, aabb2); + //__m128 aabb = _mm_shuffle_ps(lower, upper, _MM_SHUFFLE(3, 2, 1, 0)); + //_mm_store_ps(&node->aabb.lowerBound.x, aabb); + + node->aabb = b2AABB_Union( child1->aabb, child2->aabb ); + node->height = 1 + b2MaxUInt16( child1->height, child2->height ); + + index = node->parent; + } + } + else + { + tree->root = sibling; + tree->nodes[sibling].parent = B2_NULL_INDEX; + b2FreeNode( tree, parent ); + } +} + +// Create a proxy in the tree as a leaf node. We return the index of the node instead of a pointer so that we can grow +// the node pool. +int32_t b2DynamicTree_CreateProxy( b2DynamicTree* tree, b2AABB aabb, uint64_t categoryBits, int32_t userData ) +{ + B2_ASSERT( -b2_huge < aabb.lowerBound.x && aabb.lowerBound.x < b2_huge ); + B2_ASSERT( -b2_huge < aabb.lowerBound.y && aabb.lowerBound.y < b2_huge ); + B2_ASSERT( -b2_huge < aabb.upperBound.x && aabb.upperBound.x < b2_huge ); + B2_ASSERT( -b2_huge < aabb.upperBound.y && aabb.upperBound.y < b2_huge ); + + int32_t proxyId = b2AllocateNode( tree ); + b2TreeNode* node = tree->nodes + proxyId; + + node->aabb = aabb; + node->leaf.userData = userData; + node->leaf.categoryBits = (uint32_t)(categoryBits & UINT32_MAX); + node->height = 0; + node->flags |= b2_leafNode; + + bool shouldRotate = true; + b2InsertLeaf( tree, proxyId, shouldRotate ); + + tree->proxyCount += 1; + + // todo temp testing + //b2DynamicTree_Validate( tree ); + + return proxyId; +} + +void b2DynamicTree_DestroyProxy( b2DynamicTree* tree, int32_t proxyId ) +{ + B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity ); + B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) ); + + b2RemoveLeaf( tree, proxyId ); + b2FreeNode( tree, proxyId ); + + B2_ASSERT( tree->proxyCount > 0 ); + tree->proxyCount -= 1; +} + +int32_t b2DynamicTree_GetProxyCount( const b2DynamicTree* tree ) +{ + return tree->proxyCount; +} + +void b2DynamicTree_MoveProxy( b2DynamicTree* tree, int32_t proxyId, b2AABB aabb ) +{ + B2_ASSERT( b2AABB_IsValid( aabb ) ); + B2_ASSERT( aabb.upperBound.x - aabb.lowerBound.x < b2_huge ); + B2_ASSERT( aabb.upperBound.y - aabb.lowerBound.y < b2_huge ); + B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity ); + B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) ); + + b2RemoveLeaf( tree, proxyId ); + + tree->nodes[proxyId].aabb = aabb; + + bool shouldRotate = false; + b2InsertLeaf( tree, proxyId, shouldRotate ); +} + +void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int32_t proxyId, b2AABB aabb ) +{ + b2TreeNode* nodes = tree->nodes; + + B2_ASSERT( b2AABB_IsValid( aabb ) ); + B2_ASSERT( aabb.upperBound.x - aabb.lowerBound.x < b2_huge ); + B2_ASSERT( aabb.upperBound.y - aabb.lowerBound.y < b2_huge ); + B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity ); + B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) ); + + // Caller must ensure this + B2_ASSERT( b2AABB_Contains( nodes[proxyId].aabb, aabb ) == false ); + + nodes[proxyId].aabb = aabb; + + int32_t parentIndex = nodes[proxyId].parent; + while ( parentIndex != B2_NULL_INDEX ) + { + bool changed = b2EnlargeAABB( &nodes[parentIndex].aabb, aabb ); + nodes[parentIndex].flags |= b2_enlargedNode; + parentIndex = nodes[parentIndex].parent; + + if ( changed == false ) + { + break; + } + } + + while ( parentIndex != B2_NULL_INDEX ) + { + if ( nodes[parentIndex].flags & b2_enlargedNode ) + { + // early out because this ancestor was previously ascended and marked as enlarged + break; + } + + nodes[parentIndex].flags |= b2_enlargedNode; + parentIndex = nodes[parentIndex].parent; + } +} + +int b2DynamicTree_GetHeight( const b2DynamicTree* tree ) +{ + if ( tree->root == B2_NULL_INDEX ) + { + return 0; + } + + return tree->nodes[tree->root].height; +} + +float b2DynamicTree_GetAreaRatio( const b2DynamicTree* tree ) +{ + if ( tree->root == B2_NULL_INDEX ) + { + return 0.0f; + } + + const b2TreeNode* root = tree->nodes + tree->root; + float rootArea = b2Perimeter( root->aabb ); + + float totalArea = 0.0f; + for ( int32_t i = 0; i < tree->nodeCapacity; ++i ) + { + const b2TreeNode* node = tree->nodes + i; + if ( node->height < 0 || b2IsLeaf( node ) || i == tree->root ) + { + // Free node in pool + continue; + } + + totalArea += b2Perimeter( node->aabb ); + } + + return totalArea / rootArea; +} + +// Compute the height of a sub-tree. +static int b2ComputeHeight( const b2DynamicTree* tree, int32_t nodeId ) +{ + B2_ASSERT( 0 <= nodeId && nodeId < tree->nodeCapacity ); + b2TreeNode* node = tree->nodes + nodeId; + + if ( b2IsLeaf( node ) ) + { + return 0; + } + + int32_t height1 = b2ComputeHeight( tree, node->internal.child1 ); + int32_t height2 = b2ComputeHeight( tree, node->internal.child2 ); + return 1 + b2MaxInt( height1, height2 ); +} + +int b2DynamicTree_ComputeHeight( const b2DynamicTree* tree ) +{ + int height = b2ComputeHeight( tree, tree->root ); + return height; +} + +#if B2_VALIDATE +static void b2ValidateStructure( const b2DynamicTree* tree, int32_t index ) +{ + if ( index == B2_NULL_INDEX ) + { + return; + } + + if ( index == tree->root ) + { + B2_ASSERT( tree->nodes[index].parent == B2_NULL_INDEX ); + } + + const b2TreeNode* node = tree->nodes + index; + + int32_t child1 = node->internal.child1; + int32_t child2 = node->internal.child2; + + if ( b2IsLeaf( node ) ) + { + B2_ASSERT( node->height == 0 ); + return; + } + + B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity ); + B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity ); + + B2_ASSERT( tree->nodes[child1].parent == index ); + B2_ASSERT( tree->nodes[child2].parent == index ); + + if ( (tree->nodes[child1].flags | tree->nodes[child2].flags) & b2_enlargedNode ) + { + B2_ASSERT( node->flags & b2_enlargedNode ); + } + + b2ValidateStructure( tree, child1 ); + b2ValidateStructure( tree, child2 ); +} + +static void b2ValidateMetrics( const b2DynamicTree* tree, int32_t index ) +{ + if ( index == B2_NULL_INDEX ) + { + return; + } + + const b2TreeNode* node = tree->nodes + index; + + B2_ASSERT( node->flags & b2_allocatedNode ); + + if ( b2IsLeaf( node ) ) + { + B2_ASSERT( node->height == 0 ); + return; + } + + int child1 = node->internal.child1; + int child2 = node->internal.child2; + + B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity ); + B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity ); + + int height1 = tree->nodes[child1].height; + int height2 = tree->nodes[child2].height; + int height = 1 + b2MaxInt( height1, height2 ); + B2_ASSERT( node->height == height ); + + // b2AABB aabb = b2AABB_Union(tree->nodes[child1].aabb, tree->nodes[child2].aabb); + + B2_ASSERT( b2AABB_Contains( node->aabb, tree->nodes[child1].aabb ) ); + B2_ASSERT( b2AABB_Contains( node->aabb, tree->nodes[child2].aabb ) ); + + // B2_ASSERT(aabb.lowerBound.x == node->aabb.lowerBound.x); + // B2_ASSERT(aabb.lowerBound.y == node->aabb.lowerBound.y); + // B2_ASSERT(aabb.upperBound.x == node->aabb.upperBound.x); + // B2_ASSERT(aabb.upperBound.y == node->aabb.upperBound.y); + + b2ValidateMetrics( tree, child1 ); + b2ValidateMetrics( tree, child2 ); +} +#endif + +void b2DynamicTree_Validate( const b2DynamicTree* tree ) +{ +#if B2_VALIDATE + if ( tree->root == B2_NULL_INDEX ) + { + return; + } + + b2ValidateStructure( tree, tree->root ); + b2ValidateMetrics( tree, tree->root ); + + int32_t freeCount = 0; + int32_t freeIndex = tree->freeList; + while ( freeIndex != B2_NULL_INDEX ) + { + B2_ASSERT( 0 <= freeIndex && freeIndex < tree->nodeCapacity ); + freeIndex = tree->nodes[freeIndex].next; + ++freeCount; + } + + int32_t height = b2DynamicTree_GetHeight( tree ); + int32_t computedHeight = b2DynamicTree_ComputeHeight( tree ); + B2_ASSERT( height == computedHeight ); + + B2_ASSERT( tree->nodeCount + freeCount == tree->nodeCapacity ); +#else + B2_MAYBE_UNUSED( tree ); +#endif +} + +int32_t b2DynamicTree_GetMaxBalance( const b2DynamicTree* tree ) +{ + int maxBalance = 0; + for ( int i = 0; i < tree->nodeCapacity; ++i ) + { + const b2TreeNode* node = tree->nodes + i; + if ( node->height <= 1 ) + { + continue; + } + + B2_ASSERT( b2IsLeaf( node ) == false ); + + int child1 = node->internal.child1; + int child2 = node->internal.child2; + int balance = b2AbsInt( tree->nodes[child2].height - tree->nodes[child1].height ); + maxBalance = b2MaxInt( maxBalance, balance ); + } + + return maxBalance; +} + +void b2DynamicTree_RebuildBottomUp( b2DynamicTree* tree ) +{ + int* nodes = b2Alloc( tree->nodeCount * sizeof( int ) ); + int count = 0; + + // Build array of leaves. Free the rest. + for ( int i = 0; i < tree->nodeCapacity; ++i ) + { + if ( (tree->nodes[i].flags & b2_allocatedNode) == 0 ) + { + // free node in pool + continue; + } + + if ( b2IsLeaf( tree->nodes + i ) ) + { + tree->nodes[i].parent = B2_NULL_INDEX; + nodes[count] = i; + ++count; + } + else + { + b2FreeNode( tree, i ); + } + } + + while ( count > 1 ) + { + float minCost = FLT_MAX; + int32_t iMin = -1, jMin = -1; + for ( int32_t i = 0; i < count; ++i ) + { + b2AABB aabbi = tree->nodes[nodes[i]].aabb; + + for ( int32_t j = i + 1; j < count; ++j ) + { + b2AABB aabbj = tree->nodes[nodes[j]].aabb; + b2AABB b = b2AABB_Union( aabbi, aabbj ); + float cost = b2Perimeter( b ); + if ( cost < minCost ) + { + iMin = i; + jMin = j; + minCost = cost; + } + } + } + + int32_t index1 = nodes[iMin]; + int32_t index2 = nodes[jMin]; + b2TreeNode* child1 = tree->nodes + index1; + b2TreeNode* child2 = tree->nodes + index2; + + int32_t parentIndex = b2AllocateNode( tree ); + b2TreeNode* parent = tree->nodes + parentIndex; + parent->internal.child1 = index1; + parent->internal.child2 = index2; + parent->aabb = b2AABB_Union( child1->aabb, child2->aabb ); + parent->height = 1 + b2MaxUInt16( child1->height, child2->height ); + parent->parent = B2_NULL_INDEX; + + child1->parent = parentIndex; + child2->parent = parentIndex; + + nodes[jMin] = nodes[count - 1]; + nodes[iMin] = parentIndex; + --count; + } + + tree->root = nodes[0]; + b2Free( nodes, tree->nodeCount * sizeof( b2TreeNode ) ); + + b2DynamicTree_Validate( tree ); +} + +void b2DynamicTree_ShiftOrigin( b2DynamicTree* tree, b2Vec2 newOrigin ) +{ + // shift all AABBs + for ( int32_t i = 0; i < tree->nodeCapacity; ++i ) + { + b2TreeNode* n = tree->nodes + i; + n->aabb.lowerBound.x -= newOrigin.x; + n->aabb.lowerBound.y -= newOrigin.y; + n->aabb.upperBound.x -= newOrigin.x; + n->aabb.upperBound.y -= newOrigin.y; + } +} + +int b2DynamicTree_GetByteCount( const b2DynamicTree* tree ) +{ + size_t size = sizeof( b2DynamicTree ) + sizeof( b2TreeNode ) * tree->nodeCapacity + + tree->rebuildCapacity * ( sizeof( int32_t ) + sizeof( b2AABB ) + sizeof( b2Vec2 ) + sizeof( int32_t ) ); + + return (int)size; +} + +void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback, + void* context ) +{ + int32_t stack[b2_treeStackSize]; + int32_t stackCount = 0; + stack[stackCount++] = tree->root; + + uint32_t maskBits32 = (uint32_t)( maskBits & UINT32_MAX ); + + while ( stackCount > 0 ) + { + int32_t nodeId = stack[--stackCount]; + if ( nodeId == B2_NULL_INDEX ) + { + continue; + } + + const b2TreeNode* node = tree->nodes + nodeId; + + if ( b2AABB_Overlaps( node->aabb, aabb ) ) + { + if ( b2IsLeaf( node ) && ( node->leaf.categoryBits & maskBits32 ) != 0 ) + { + // callback to user code with proxy id + bool proceed = callback( nodeId, node->leaf.userData, context ); + if ( proceed == false ) + { + return; + } + } + else + { + B2_ASSERT( stackCount < b2_treeStackSize - 1 ); + if ( stackCount < b2_treeStackSize - 1 ) + { + stack[stackCount++] = node->internal.child1; + stack[stackCount++] = node->internal.child2; + } + } + } + } +} + +b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, + b2TreeRayCastCallbackFcn* callback, void* context ) +{ + b2TraversalResult result = { 0 }; + + b2Vec2 p1 = input->origin; + b2Vec2 d = input->translation; + + b2Vec2 r = b2Normalize( d ); + + // v is perpendicular to the segment. + b2Vec2 v = b2CrossSV( 1.0f, r ); + b2Vec2 abs_v = b2Abs( v ); + + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + + float maxFraction = input->maxFraction; + + b2Vec2 p2 = b2MulAdd( p1, maxFraction, d ); + + // Build a bounding box for the segment. + b2AABB segmentAABB = { b2Min( p1, p2 ), b2Max( p1, p2 ) }; + + int32_t stack[b2_treeStackSize]; + int32_t stackCount = 0; + stack[stackCount++] = tree->root; + + b2RayCastInput subInput = *input; + uint32_t maskBits32 = (uint32_t)( maskBits & UINT32_MAX ); + + while ( stackCount > 0 ) + { + int32_t nodeId = stack[--stackCount]; + if ( nodeId == B2_NULL_INDEX ) + { + continue; + } + + const b2TreeNode* node = tree->nodes + nodeId; + result.nodeVisits += 1; + + b2AABB nodeAABB = node->aabb; + + if ( b2AABB_Overlaps( nodeAABB, segmentAABB ) == false ) + { + continue; + } + + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + // radius extension is added to the node in this case + b2Vec2 c = b2AABB_Center( nodeAABB ); + b2Vec2 h = b2AABB_Extents( nodeAABB ); + float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) ); + float term2 = b2Dot( abs_v, h ); + if ( term2 < term1 ) + { + continue; + } + + if ( b2IsLeaf( node ) ) + { + if ( ( node->leaf.categoryBits & maskBits32 ) == 0 ) + { + continue; + } + + subInput.maxFraction = maxFraction; + + float value = callback( &subInput, nodeId, node->leaf.userData, context ); + result.leafVisits += 1; + + if ( value == 0.0f ) + { + // The client has terminated the ray cast. + return result; + } + + if ( 0.0f < value && value < maxFraction ) + { + // Update segment bounding box. + maxFraction = value; + p2 = b2MulAdd( p1, maxFraction, d ); + segmentAABB.lowerBound = b2Min( p1, p2 ); + segmentAABB.upperBound = b2Max( p1, p2 ); + } + } + else + { + B2_ASSERT( stackCount < b2_treeStackSize - 1 ); + if ( stackCount < b2_treeStackSize - 1 ) + { + // TODO_ERIN just put one node on the stack, continue on a child node + // TODO_ERIN test ordering children by nearest to ray origin + stack[stackCount++] = node->internal.child1; + stack[stackCount++] = node->internal.child2; + } + } + } + + return result; +} + +void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, + b2TreeShapeCastCallbackFcn* callback, void* context ) +{ + if ( input->count == 0 ) + { + return; + } + + b2AABB originAABB = { input->points[0], input->points[0] }; + for ( int i = 1; i < input->count; ++i ) + { + originAABB.lowerBound = b2Min( originAABB.lowerBound, input->points[i] ); + originAABB.upperBound = b2Max( originAABB.upperBound, input->points[i] ); + } + + b2Vec2 radius = { input->radius, input->radius }; + + originAABB.lowerBound = b2Sub( originAABB.lowerBound, radius ); + originAABB.upperBound = b2Add( originAABB.upperBound, radius ); + + b2Vec2 p1 = b2AABB_Center( originAABB ); + b2Vec2 extension = b2AABB_Extents( originAABB ); + + // v is perpendicular to the segment. + b2Vec2 r = input->translation; + b2Vec2 v = b2CrossSV( 1.0f, r ); + b2Vec2 abs_v = b2Abs( v ); + + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + + float maxFraction = input->maxFraction; + + // Build total box for the shape cast + b2Vec2 t = b2MulSV( maxFraction, input->translation ); + b2AABB totalAABB = { + b2Min( originAABB.lowerBound, b2Add( originAABB.lowerBound, t ) ), + b2Max( originAABB.upperBound, b2Add( originAABB.upperBound, t ) ), + }; + + b2ShapeCastInput subInput = *input; + uint32_t maskBits32 = (uint32_t)( maskBits & UINT32_MAX ); + + int32_t stack[b2_treeStackSize]; + int32_t stackCount = 0; + stack[stackCount++] = tree->root; + + while ( stackCount > 0 ) + { + int32_t nodeId = stack[--stackCount]; + if ( nodeId == B2_NULL_INDEX ) + { + continue; + } + + const b2TreeNode* node = tree->nodes + nodeId; + if ( b2AABB_Overlaps( node->aabb, totalAABB ) == false ) + { + continue; + } + + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + // radius extension is added to the node in this case + b2Vec2 c = b2AABB_Center( node->aabb ); + b2Vec2 h = b2Add( b2AABB_Extents( node->aabb ), extension ); + float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) ); + float term2 = b2Dot( abs_v, h ); + if ( term2 < term1 ) + { + continue; + } + + if ( b2IsLeaf( node ) && ( node->leaf.categoryBits & maskBits32 ) != 0 ) + { + subInput.maxFraction = maxFraction; + + float value = callback( &subInput, nodeId, node->leaf.userData, context ); + + if ( value == 0.0f ) + { + // The client has terminated the ray cast. + return; + } + + if ( 0.0f < value && value < maxFraction ) + { + // Update segment bounding box. + maxFraction = value; + t = b2MulSV( maxFraction, input->translation ); + totalAABB.lowerBound = b2Min( originAABB.lowerBound, b2Add( originAABB.lowerBound, t ) ); + totalAABB.upperBound = b2Max( originAABB.upperBound, b2Add( originAABB.upperBound, t ) ); + } + } + else + { + B2_ASSERT( stackCount < b2_treeStackSize - 1 ); + if ( stackCount < b2_treeStackSize - 1 ) + { + // TODO_ERIN just put one node on the stack, continue on a child node + // TODO_ERIN test ordering children by nearest to ray origin + stack[stackCount++] = node->internal.child1; + stack[stackCount++] = node->internal.child2; + } + } + } +} + +// Median split heuristic +static int32_t b2PartitionMid( int32_t* indices, b2Vec2* centers, int32_t count ) +{ + // Handle trivial case + if ( count <= 2 ) + { + return count / 2; + } + + // todo SIMD? + b2Vec2 lowerBound = centers[0]; + b2Vec2 upperBound = centers[0]; + + for ( int32_t i = 1; i < count; ++i ) + { + lowerBound = b2Min( lowerBound, centers[i] ); + upperBound = b2Max( upperBound, centers[i] ); + } + + b2Vec2 d = b2Sub( upperBound, lowerBound ); + b2Vec2 c = { 0.5f * ( lowerBound.x + upperBound.x ), 0.5f * ( lowerBound.y + upperBound.y ) }; + + // Partition longest axis using the Hoare partition scheme + // https://en.wikipedia.org/wiki/Quicksort + // https://nicholasvadivelu.com/2021/01/11/array-partition/ + int32_t i1 = 0, i2 = count; + if ( d.x > d.y ) + { + float pivot = c.x; + + while ( i1 < i2 ) + { + while ( i1 < i2 && centers[i1].x < pivot ) + { + i1 += 1; + }; + + while ( i1 < i2 && centers[i2 - 1].x >= pivot ) + { + i2 -= 1; + }; + + if ( i1 < i2 ) + { + // Swap indices + { + int32_t temp = indices[i1]; + indices[i1] = indices[i2 - 1]; + indices[i2 - 1] = temp; + } + + // Swap centers + { + b2Vec2 temp = centers[i1]; + centers[i1] = centers[i2 - 1]; + centers[i2 - 1] = temp; + } + + i1 += 1; + i2 -= 1; + } + } + } + else + { + float pivot = c.y; + + while ( i1 < i2 ) + { + while ( i1 < i2 && centers[i1].y < pivot ) + { + i1 += 1; + }; + + while ( i1 < i2 && centers[i2 - 1].y >= pivot ) + { + i2 -= 1; + }; + + if ( i1 < i2 ) + { + // Swap indices + { + int32_t temp = indices[i1]; + indices[i1] = indices[i2 - 1]; + indices[i2 - 1] = temp; + } + + // Swap centers + { + b2Vec2 temp = centers[i1]; + centers[i1] = centers[i2 - 1]; + centers[i2 - 1] = temp; + } + + i1 += 1; + i2 -= 1; + } + } + } + B2_ASSERT( i1 == i2 ); + + if ( i1 > 0 && i1 < count ) + { + return i1; + } + else + { + return count / 2; + } +} + +// Temporary data used to track the rebuild of a tree node +struct b2RebuildItem +{ + int32_t nodeIndex; + int32_t childCount; + + // Leaf indices + int32_t startIndex; + int32_t splitIndex; + int32_t endIndex; +}; + +// Returns root node index +static int32_t b2BuildTree( b2DynamicTree* tree, int32_t leafCount ) +{ + b2TreeNode* nodes = tree->nodes; + int32_t* leafIndices = tree->leafIndices; + + if ( leafCount == 1 ) + { + nodes[leafIndices[0]].parent = B2_NULL_INDEX; + return leafIndices[0]; + } + + b2Vec2* leafCenters = tree->leafCenters; + + // todo large stack item + struct b2RebuildItem stack[b2_treeStackSize]; + int32_t top = 0; + + stack[0].nodeIndex = b2AllocateNode( tree ); + stack[0].childCount = -1; + stack[0].startIndex = 0; + stack[0].endIndex = leafCount; + stack[0].splitIndex = b2PartitionMid( leafIndices, leafCenters, leafCount ); + + while ( true ) + { + struct b2RebuildItem* item = stack + top; + + item->childCount += 1; + + if ( item->childCount == 2 ) + { + // This internal node has both children established + + if ( top == 0 ) + { + // all done + break; + } + + struct b2RebuildItem* parentItem = stack + ( top - 1 ); + b2TreeNode* parentNode = nodes + parentItem->nodeIndex; + + if ( parentItem->childCount == 0 ) + { + B2_ASSERT( parentNode->internal.child1 == B2_NULL_INDEX ); + parentNode->internal.child1 = item->nodeIndex; + } + else + { + B2_ASSERT( parentItem->childCount == 1 ); + B2_ASSERT( parentNode->internal.child2 == B2_NULL_INDEX ); + parentNode->internal.child2 = item->nodeIndex; + } + + b2TreeNode* node = nodes + item->nodeIndex; + + B2_ASSERT( node->parent == B2_NULL_INDEX ); + node->parent = parentItem->nodeIndex; + + B2_ASSERT( node->internal.child1 != B2_NULL_INDEX ); + B2_ASSERT( node->internal.child2 != B2_NULL_INDEX ); + b2TreeNode* child1 = nodes + node->internal.child1; + b2TreeNode* child2 = nodes + node->internal.child2; + + node->aabb = b2AABB_Union( child1->aabb, child2->aabb ); + node->height = 1 + b2MaxUInt16( child1->height, child2->height ); + + // Pop stack + top -= 1; + } + else + { + int32_t startIndex, endIndex; + if ( item->childCount == 0 ) + { + startIndex = item->startIndex; + endIndex = item->splitIndex; + } + else + { + B2_ASSERT( item->childCount == 1 ); + startIndex = item->splitIndex; + endIndex = item->endIndex; + } + + int32_t count = endIndex - startIndex; + + if ( count == 1 ) + { + int32_t childIndex = leafIndices[startIndex]; + b2TreeNode* node = nodes + item->nodeIndex; + + if ( item->childCount == 0 ) + { + B2_ASSERT( node->internal.child1 == B2_NULL_INDEX ); + node->internal.child1 = childIndex; + } + else + { + B2_ASSERT( item->childCount == 1 ); + B2_ASSERT( node->internal.child2 == B2_NULL_INDEX ); + node->internal.child2 = childIndex; + } + + b2TreeNode* childNode = nodes + childIndex; + B2_ASSERT( childNode->parent == B2_NULL_INDEX ); + childNode->parent = item->nodeIndex; + } + else + { + B2_ASSERT( count > 0 ); + B2_ASSERT( top < b2_treeStackSize ); + + top += 1; + struct b2RebuildItem* newItem = stack + top; + newItem->nodeIndex = b2AllocateNode( tree ); + newItem->childCount = -1; + newItem->startIndex = startIndex; + newItem->endIndex = endIndex; +#if B2_TREE_HEURISTIC == 0 + newItem->splitIndex = b2PartitionMid( leafIndices + startIndex, leafCenters + startIndex, count ); +#else + newItem->splitIndex = + b2PartitionSAH( leafIndices + startIndex, binIndices + startIndex, leafBoxes + startIndex, count ); +#endif + newItem->splitIndex += startIndex; + } + } + } + + b2TreeNode* rootNode = nodes + stack[0].nodeIndex; + B2_ASSERT( rootNode->parent == B2_NULL_INDEX ); + B2_ASSERT( rootNode->internal.child1 != B2_NULL_INDEX ); + B2_ASSERT( rootNode->internal.child2 != B2_NULL_INDEX ); + + b2TreeNode* child1 = nodes + rootNode->internal.child1; + b2TreeNode* child2 = nodes + rootNode->internal.child2; + + rootNode->aabb = b2AABB_Union( child1->aabb, child2->aabb ); + rootNode->height = 1 + b2MaxUInt16( child1->height, child2->height ); + + return stack[0].nodeIndex; +} + +// Not safe to access tree during this operation because it may grow +int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) +{ + int32_t proxyCount = tree->proxyCount; + if ( proxyCount == 0 ) + { + return 0; + } + + // Ensure capacity for rebuild space + if ( proxyCount > tree->rebuildCapacity ) + { + int32_t newCapacity = proxyCount + proxyCount / 2; + + b2Free( tree->leafIndices, tree->rebuildCapacity * sizeof( int32_t ) ); + tree->leafIndices = b2Alloc( newCapacity * sizeof( int32_t ) ); + +#if B2_TREE_HEURISTIC == 0 + b2Free( tree->leafCenters, tree->rebuildCapacity * sizeof( b2Vec2 ) ); + tree->leafCenters = b2Alloc( newCapacity * sizeof( b2Vec2 ) ); +#else + b2Free( tree->leafBoxes, tree->rebuildCapacity * sizeof( b2AABB ) ); + tree->leafBoxes = b2Alloc( newCapacity * sizeof( b2AABB ) ); + b2Free( tree->binIndices, tree->rebuildCapacity * sizeof( int32_t ) ); + tree->binIndices = b2Alloc( newCapacity * sizeof( int32_t ) ); +#endif + tree->rebuildCapacity = newCapacity; + } + + int32_t leafCount = 0; + int32_t stack[b2_treeStackSize]; + int32_t stackCount = 0; + + int32_t nodeIndex = tree->root; + b2TreeNode* nodes = tree->nodes; + b2TreeNode* node = nodes + nodeIndex; + + // These are the nodes that get sorted to rebuild the tree. + // I'm using indices because the node pool may grow during the build. + int32_t* leafIndices = tree->leafIndices; + +#if B2_TREE_HEURISTIC == 0 + b2Vec2* leafCenters = tree->leafCenters; +#else + b2AABB* leafBoxes = tree->leafBoxes; +#endif + + // Gather all proxy nodes that have grown and all internal nodes that haven't grown. Both are + // considered leaves in the tree rebuild. + // Free all internal nodes that have grown. + // todo use a node growth metric instead of simply enlarged to reduce rebuild size and frequency + // this should be weighed against b2_aabbMargin + while ( true ) + { + if ( node->height == 0 || ( ( node->flags & b2_enlargedNode) == 0 && fullBuild == false ) ) + { + leafIndices[leafCount] = nodeIndex; +#if B2_TREE_HEURISTIC == 0 + leafCenters[leafCount] = b2AABB_Center( node->aabb ); +#else + leafBoxes[leafCount] = node->aabb; +#endif + leafCount += 1; + + // Detach + node->parent = B2_NULL_INDEX; + } + else + { + int32_t doomedNodeIndex = nodeIndex; + + // Handle children + nodeIndex = node->internal.child1; + + B2_ASSERT( stackCount < b2_treeStackSize ); + if ( stackCount < b2_treeStackSize ) + { + stack[stackCount++] = node->internal.child2; + } + + node = nodes + nodeIndex; + + // Remove doomed node + b2FreeNode( tree, doomedNodeIndex ); + + continue; + } + + if ( stackCount == 0 ) + { + break; + } + + nodeIndex = stack[--stackCount]; + node = nodes + nodeIndex; + } + +#if B2_VALIDATE == 1 + int32_t capacity = tree->nodeCapacity; + for ( int32_t i = 0; i < capacity; ++i ) + { + if ( nodes[i].flags & b2_allocatedNode ) + { + B2_ASSERT( (nodes[i].flags & b2_enlargedNode) == 0 ); + } + } +#endif + + B2_ASSERT( leafCount <= proxyCount ); + + tree->root = b2BuildTree( tree, leafCount ); + + b2DynamicTree_Validate( tree ); + + return leafCount; +} + +#endif diff --git a/src/solver.c b/src/solver.c index bd1a81eef..0603f2a31 100644 --- a/src/solver.c +++ b/src/solver.c @@ -1515,8 +1515,7 @@ void b2Solve( b2World* world, b2StepContext* stepContext ) } } - ptrdiff_t blockDiff = baseGraphBlock - graphBlocks; - B2_ASSERT( blockDiff == graphBlockCount ); + B2_ASSERT( (ptrdiff_t)(baseGraphBlock - graphBlocks) == graphBlockCount ); b2SolverStage* stage = stages; diff --git a/src/world.c b/src/world.c index e9b7d718a..1a89e193f 100644 --- a/src/world.c +++ b/src/world.c @@ -2065,14 +2065,16 @@ static float RayCastCallback( const b2RayCastInput* input, int proxyId, int shap return input->maxFraction; } -void b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, +b2TraversalResult b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ) { + b2TraversalResult traversalResult = { 0 }; + b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return; + return traversalResult; } B2_ASSERT( b2Vec2_IsValid( origin ) ); @@ -2084,15 +2086,19 @@ void b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2Qu for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + b2TraversalResult treeResult = b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + traversalResult.nodeVisits += treeResult.nodeVisits; + traversalResult.leafVisits += treeResult.leafVisits; if ( worldContext.fraction == 0.0f ) { - return; + return traversalResult; } input.maxFraction = worldContext.fraction; } + + return traversalResult; } // This callback finds the closest hit. This is the most common callback used in games. @@ -2126,7 +2132,9 @@ b2RayResult b2World_CastRayClosest( b2WorldId worldId, b2Vec2 origin, b2Vec2 tra for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + b2TraversalResult treeResult = b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + result.nodeVisits += treeResult.nodeVisits; + result.leafVisits += treeResult.leafVisits; if ( worldContext.fraction == 0.0f ) {