Skip to content

Commit

Permalink
Remove fancy iterators (#852)
Browse files Browse the repository at this point in the history
* removed zip from collider

* removed zip from boolean result

* removed zip from boolean3

* refactored constructors

* refactored mesh_fixes

* refactored impl

* ≈removed reduce_by_key

* more zip removal

* refactored properties

* refactored shared

* refactored smoothing

* refactored sort

* refactored subdivision

* refactored polygon

* removed zip completely

* cleanup

* transform iterator

* strided range

* format

* fix pointer/reference to strided range

* avoid thrust copy

* fix iter

* correct location

---------

Co-authored-by: pca006132 <[email protected]>
  • Loading branch information
elalish and pca006132 authored Jul 7, 2024
1 parent b817d07 commit ddb3cbe
Show file tree
Hide file tree
Showing 18 changed files with 774 additions and 740 deletions.
35 changes: 16 additions & 19 deletions src/collider/src/collider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,13 @@ struct CreateRadixTree {

template <typename T, const bool selfCollision, typename Recorder>
struct FindCollisions {
VecView<const T> queries;
VecView<const Box> nodeBBox_;
VecView<const thrust::pair<int, int>> internalChildren_;
Recorder recorder;

int RecordCollision(int node, thrust::tuple<T, int>& query) {
const T& queryObj = thrust::get<0>(query);
const int queryIdx = thrust::get<1>(query);

bool overlaps = nodeBBox_[node].DoesOverlap(queryObj);
int RecordCollision(int node, const int queryIdx) {
bool overlaps = nodeBBox_[node].DoesOverlap(queries[queryIdx]);
if (overlaps && IsLeaf(node)) {
int leafIdx = Node2Leaf(node);
if (!selfCollision || leafIdx != queryIdx) {
Expand All @@ -172,23 +170,22 @@ struct FindCollisions {
return overlaps && IsInternal(node); // Should traverse into node
}

void operator()(thrust::tuple<T, int> query) {
void operator()(const int queryIdx) {
// stack cannot overflow because radix tree has max depth 30 (Morton code) +
// 32 (index).
int stack[64];
int top = -1;
// Depth-first search
int node = kRoot;
const int queryIdx = thrust::get<1>(query);
// same implies that this query do not have any collision
if (recorder.earlyexit(queryIdx)) return;
while (1) {
int internal = Node2Internal(node);
int child1 = internalChildren_[internal].first;
int child2 = internalChildren_[internal].second;

int traverse1 = RecordCollision(child1, query);
int traverse2 = RecordCollision(child2, query);
int traverse1 = RecordCollision(child1, queryIdx);
int traverse2 = RecordCollision(child2, queryIdx);

if (!traverse1 && !traverse2) {
if (top < 0) break; // done
Expand Down Expand Up @@ -313,30 +310,30 @@ SparseIndices Collider::Collisions(const VecView<const T>& queriesIn) const {
// element can store the sum when using exclusive scan
if (queriesIn.size() < kSequentialThreshold) {
SparseIndices queryTri;
for_each_n(ExecutionPolicy::Seq, zip(queriesIn.cbegin(), countAt(0)),
queriesIn.size(),
for_each_n(ExecutionPolicy::Seq, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision, SeqCollisionRecorder<inverted>>{
nodeBBox_, internalChildren_, {queryTri}});
queriesIn, nodeBBox_, internalChildren_, {queryTri}});
return queryTri;
} else {
// compute the number of collisions to determine the size for allocation and
// offset, this avoids the need for atomic
Vec<int> counts(queriesIn.size() + 1, 0);
Vec<char> empty(queriesIn.size(), 0);
for_each_n(ExecutionPolicy::Par, zip(queriesIn.cbegin(), countAt(0)),
queriesIn.size(),
for_each_n(ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision, CountCollisions>{
nodeBBox_, internalChildren_, {counts, empty}});
queriesIn, nodeBBox_, internalChildren_, {counts, empty}});
// compute start index for each query and total count
exclusive_scan(ExecutionPolicy::Par, counts.begin(), counts.end(),
counts.begin(), 0, std::plus<int>());
if (counts.back() == 0) return SparseIndices(0);
SparseIndices queryTri(counts.back());
// actually recording collisions
for_each_n(ExecutionPolicy::Par, zip(queriesIn.cbegin(), countAt(0)),
queriesIn.size(),
for_each_n(ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision, ParCollisionRecorder<inverted>>{
nodeBBox_, internalChildren_, {queryTri, counts, empty}});
queriesIn,
nodeBBox_,
internalChildren_,
{queryTri, counts, empty}});
return queryTri;
}
}
Expand All @@ -350,7 +347,7 @@ void Collider::UpdateBoxes(const VecView<const Box>& leafBB) {
DEBUG_ASSERT(leafBB.size() == NumLeaves(), userErr,
"must have the same number of updated boxes as original");
// copy in leaf node Boxes
strided_range<Vec<Box>::Iter> leaves(nodeBBox_.begin(), nodeBBox_.end(), 2);
auto leaves = StridedRange(nodeBBox_.begin(), nodeBBox_.end(), 2);
auto policy = autoPolicy(NumInternal());
copy(policy, leafBB.cbegin(), leafBB.cend(), leaves.begin());
// create global counters
Expand Down
95 changes: 41 additions & 54 deletions src/manifold/src/boolean3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,13 @@ glm::vec4 Intersect(const glm::vec3 &pL, const glm::vec3 &pR,
template <const bool inverted>
struct CopyFaceEdges {
const SparseIndices &p1q1;
// const int *p1q1;
// x can be either vert or edge (0 or 1).
SparseIndices &pXq1;
VecView<const Halfedge> halfedgesQ;
const size_t offset;

void operator()(thrust::tuple<size_t, size_t> in) {
int idx = 3 * thrust::get<0>(in);
size_t i = thrust::get<1>(in);
void operator()(const size_t i) {
int idx = 3 * (i + offset);
int pX = p1q1.Get(i, inverted);
int q2 = p1q1.Get(i, !inverted);

Expand All @@ -94,10 +93,10 @@ SparseIndices Filter11(const Manifold::Impl &inP, const Manifold::Impl &inQ,
const SparseIndices &p1q2, const SparseIndices &p2q1) {
ZoneScoped;
SparseIndices p1q1(3 * p1q2.size() + 3 * p2q1.size());
for_each_n(autoPolicy(p1q2.size()), zip(countAt(0_z), countAt(0_z)),
p1q2.size(), CopyFaceEdges<false>({p1q2, p1q1, inQ.halfedge_}));
for_each_n(autoPolicy(p2q1.size()), zip(countAt(p1q2.size()), countAt(0_z)),
p2q1.size(), CopyFaceEdges<true>({p2q1, p1q1, inP.halfedge_}));
for_each_n(autoPolicy(p1q2.size()), countAt(0_z), p1q2.size(),
CopyFaceEdges<false>({p1q2, p1q1, inQ.halfedge_, 0_z}));
for_each_n(autoPolicy(p2q1.size()), countAt(0_z), p2q1.size(),
CopyFaceEdges<true>({p2q1, p1q1, inP.halfedge_, p1q2.size()}));
p1q1.Unique();
return p1q1;
}
Expand Down Expand Up @@ -177,19 +176,21 @@ size_t monobound_quaternary_search(VecView<const int64_t> array, int64_t key) {
}

struct Kernel11 {
VecView<glm::vec4> xyzz;
VecView<int> s;
VecView<const glm::vec3> vertPosP;
VecView<const glm::vec3> vertPosQ;
VecView<const Halfedge> halfedgeP;
VecView<const Halfedge> halfedgeQ;
float expandP;
const float expandP;
VecView<const glm::vec3> normalP;
const SparseIndices &p1q1;

void operator()(thrust::tuple<size_t, glm::vec4 &, int &> inout) {
const int p1 = p1q1.Get(thrust::get<0>(inout), false);
const int q1 = p1q1.Get(thrust::get<0>(inout), true);
glm::vec4 &xyzz11 = thrust::get<1>(inout);
int &s11 = thrust::get<2>(inout);
void operator()(const size_t idx) {
const int p1 = p1q1.Get(idx, false);
const int q1 = p1q1.Get(idx, true);
glm::vec4 &xyzz11 = xyzz[idx];
int &s11 = s[idx];

// For pRL[k], qRL[k], k==0 is the left and k==1 is the right.
int k = 0;
Expand Down Expand Up @@ -262,17 +263,18 @@ std::tuple<Vec<int>, Vec<glm::vec4>> Shadow11(SparseIndices &p1q1,
Vec<int> s11(p1q1.size());
Vec<glm::vec4> xyzz11(p1q1.size());

for_each_n(autoPolicy(p1q1.size()),
zip(countAt(0_z), xyzz11.begin(), s11.begin()), p1q1.size(),
Kernel11({inP.vertPos_, inQ.vertPos_, inP.halfedge_, inQ.halfedge_,
expandP, inP.vertNormal_, p1q1}));
for_each_n(autoPolicy(p1q1.size()), countAt(0_z), p1q1.size(),
Kernel11({xyzz11, s11, inP.vertPos_, inQ.vertPos_, inP.halfedge_,
inQ.halfedge_, expandP, inP.vertNormal_, p1q1}));

p1q1.KeepFinite(xyzz11, s11);

return std::make_tuple(s11, xyzz11);
};

struct Kernel02 {
VecView<int> s;
VecView<float> z;
VecView<const glm::vec3> vertPosP;
VecView<const Halfedge> halfedgeQ;
VecView<const glm::vec3> vertPosQ;
Expand All @@ -281,11 +283,11 @@ struct Kernel02 {
const SparseIndices &p0q2;
const bool forward;

void operator()(thrust::tuple<size_t, int &, float &> inout) {
const int p0 = p0q2.Get(thrust::get<0>(inout), !forward);
const int q2 = p0q2.Get(thrust::get<0>(inout), forward);
int &s02 = thrust::get<1>(inout);
float &z02 = thrust::get<2>(inout);
void operator()(const size_t idx) {
const int p0 = p0q2.Get(idx, !forward);
const int q2 = p0q2.Get(idx, forward);
int &s02 = s[idx];
float &z02 = z[idx];

// For yzzLR[k], k==0 is the left and k==1 is the right.
int k = 0;
Expand Down Expand Up @@ -353,17 +355,18 @@ std::tuple<Vec<int>, Vec<float>> Shadow02(const Manifold::Impl &inP,
Vec<float> z02(p0q2.size());

auto vertNormalP = forward ? inP.vertNormal_ : inQ.vertNormal_;
for_each_n(autoPolicy(p0q2.size()),
zip(countAt(0_z), s02.begin(), z02.begin()), p0q2.size(),
Kernel02({inP.vertPos_, inQ.halfedge_, inQ.vertPos_, expandP,
vertNormalP, p0q2, forward}));
for_each_n(autoPolicy(p0q2.size()), countAt(0_z), p0q2.size(),
Kernel02({s02, z02, inP.vertPos_, inQ.halfedge_, inQ.vertPos_,
expandP, vertNormalP, p0q2, forward}));

p0q2.KeepFinite(z02, s02);

return std::make_tuple(s02, z02);
};

struct Kernel12 {
VecView<int> x;
VecView<glm::vec3> v;
VecView<const int64_t> p0q2;
VecView<const int> s02;
VecView<const float> z02;
Expand All @@ -376,11 +379,11 @@ struct Kernel12 {
const bool forward;
const SparseIndices &p1q2;

void operator()(thrust::tuple<size_t, int &, glm::vec3 &> inout) {
int p1 = p1q2.Get(thrust::get<0>(inout), !forward);
int q2 = p1q2.Get(thrust::get<0>(inout), forward);
int &x12 = thrust::get<1>(inout);
glm::vec3 &v12 = thrust::get<2>(inout);
void operator()(const size_t idx) {
int p1 = p1q2.Get(idx, !forward);
int q2 = p1q2.Get(idx, forward);
int &x12 = x[idx];
glm::vec3 &v12 = v[idx];

// For xzyLR-[k], k==0 is the left and k==1 is the right.
int k = 0;
Expand Down Expand Up @@ -460,9 +463,8 @@ std::tuple<Vec<int>, Vec<glm::vec3>> Intersect12(
Vec<glm::vec3> v12(p1q2.size());

for_each_n(
autoPolicy(p1q2.size()), zip(countAt(0_z), x12.begin(), v12.begin()),
p1q2.size(),
Kernel12({p0q2.AsVec64(), s02, z02, p1q1.AsVec64(), s11, xyzz11,
autoPolicy(p1q2.size()), countAt(0_z), p1q2.size(),
Kernel12({x12, v12, p0q2.AsVec64(), s02, z02, p1q1.AsVec64(), s11, xyzz11,
inP.halfedge_, inQ.halfedge_, inP.vertPos_, forward, p1q2}));

p1q2.KeepFinite(v12, x12);
Expand All @@ -475,26 +477,11 @@ Vec<int> Winding03(const Manifold::Impl &inP, Vec<int> &vertices, Vec<int> &s02,
ZoneScoped;
// verts that are not shadowed (not in p0q2) have winding number zero.
Vec<int> w03(inP.NumVert(), 0);
// checking is slow, so just sort and reduce
auto policy = autoPolicy(vertices.size());
stable_sort(
policy, zip(vertices.begin(), s02.begin()),
zip(vertices.end(), s02.end()),
[](const thrust::tuple<int, int> &a, const thrust::tuple<int, int> &b) {
return thrust::get<0>(a) < thrust::get<0>(b);
});
Vec<int> w03val(w03.size());
Vec<int> w03vert(w03.size());
// sum known s02 values into w03 (winding number)
auto endPair = reduce_by_key<
thrust::pair<decltype(w03val.begin()), decltype(w03val.begin())>>(
policy, vertices.begin(), vertices.end(), s02.begin(), w03vert.begin(),
w03val.begin());
scatter(policy, w03val.begin(), endPair.second, w03vert.begin(), w03.begin());

if (reverse)
transform(policy, w03.begin(), w03.end(), w03.begin(),
thrust::negate<int>());
for_each_n(policy, countAt(0), s02.size(),
[&w03, &vertices, &s02, reverse](const int i) {
AtomicAdd(w03[vertices[i]], s02[i] * (reverse ? -1 : 1));
});
return w03;
};
} // namespace
Expand Down
Loading

0 comments on commit ddb3cbe

Please sign in to comment.