Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove fancy iterators #852

Merged
merged 24 commits into from
Jul 7, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 15 additions & 18 deletions src/collider/src/collider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,13 @@ struct CreateRadixTree {

template <typename T, const bool selfCollision, typename Recorder>
struct FindCollisions {
VecView<const T> queries;
VecView<const Box> nodeBBox_;
VecView<const thrust::pair<int, int>> internalChildren_;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking if we can remove this thrust::pair as well, maybe with std::pair?
Not really related to fancy iterator, but I feel that the reason for using thrust::pair is mostly because of zip.

Recorder recorder;

int RecordCollision(int node, thrust::tuple<T, int>& query) {
const T& queryObj = thrust::get<0>(query);
const int queryIdx = thrust::get<1>(query);

bool overlaps = nodeBBox_[node].DoesOverlap(queryObj);
int RecordCollision(int node, const int queryIdx) {
bool overlaps = nodeBBox_[node].DoesOverlap(queries[queryIdx]);
if (overlaps && IsLeaf(node)) {
int leafIdx = Node2Leaf(node);
if (!selfCollision || leafIdx != queryIdx) {
Expand All @@ -172,23 +170,22 @@ struct FindCollisions {
return overlaps && IsInternal(node); // Should traverse into node
}

void operator()(thrust::tuple<T, int> query) {
void operator()(const int queryIdx) {
// stack cannot overflow because radix tree has max depth 30 (Morton code) +
// 32 (index).
int stack[64];
int top = -1;
// Depth-first search
int node = kRoot;
const int queryIdx = thrust::get<1>(query);
// same implies that this query do not have any collision
if (recorder.earlyexit(queryIdx)) return;
while (1) {
int internal = Node2Internal(node);
int child1 = internalChildren_[internal].first;
int child2 = internalChildren_[internal].second;

int traverse1 = RecordCollision(child1, query);
int traverse2 = RecordCollision(child2, query);
int traverse1 = RecordCollision(child1, queryIdx);
int traverse2 = RecordCollision(child2, queryIdx);

if (!traverse1 && !traverse2) {
if (top < 0) break; // done
Expand Down Expand Up @@ -313,30 +310,30 @@ SparseIndices Collider::Collisions(const VecView<const T>& queriesIn) const {
// element can store the sum when using exclusive scan
if (queriesIn.size() < kSequentialThreshold) {
SparseIndices queryTri;
for_each_n(ExecutionPolicy::Seq, zip(queriesIn.cbegin(), countAt(0)),
queriesIn.size(),
for_each_n(ExecutionPolicy::Seq, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision, SeqCollisionRecorder<inverted>>{
nodeBBox_, internalChildren_, {queryTri}});
queriesIn, nodeBBox_, internalChildren_, {queryTri}});
return queryTri;
} else {
// compute the number of collisions to determine the size for allocation and
// offset, this avoids the need for atomic
Vec<int> counts(queriesIn.size() + 1, 0);
Vec<char> empty(queriesIn.size(), 0);
for_each_n(ExecutionPolicy::Par, zip(queriesIn.cbegin(), countAt(0)),
queriesIn.size(),
for_each_n(ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision, CountCollisions>{
nodeBBox_, internalChildren_, {counts, empty}});
queriesIn, nodeBBox_, internalChildren_, {counts, empty}});
// compute start index for each query and total count
exclusive_scan(ExecutionPolicy::Par, counts.begin(), counts.end(),
counts.begin(), 0, std::plus<int>());
if (counts.back() == 0) return SparseIndices(0);
SparseIndices queryTri(counts.back());
// actually recording collisions
for_each_n(ExecutionPolicy::Par, zip(queriesIn.cbegin(), countAt(0)),
queriesIn.size(),
for_each_n(ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision, ParCollisionRecorder<inverted>>{
nodeBBox_, internalChildren_, {queryTri, counts, empty}});
queriesIn,
nodeBBox_,
internalChildren_,
{queryTri, counts, empty}});
return queryTri;
}
}
Expand Down
95 changes: 41 additions & 54 deletions src/manifold/src/boolean3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,13 @@ glm::vec4 Intersect(const glm::vec3 &pL, const glm::vec3 &pR,
template <const bool inverted>
struct CopyFaceEdges {
const SparseIndices &p1q1;
// const int *p1q1;
// x can be either vert or edge (0 or 1).
SparseIndices &pXq1;
VecView<const Halfedge> halfedgesQ;
const size_t offset;

void operator()(thrust::tuple<size_t, size_t> in) {
int idx = 3 * thrust::get<0>(in);
size_t i = thrust::get<1>(in);
void operator()(const size_t i) {
int idx = 3 * (i + offset);
int pX = p1q1.Get(i, inverted);
int q2 = p1q1.Get(i, !inverted);

Expand All @@ -94,10 +93,10 @@ SparseIndices Filter11(const Manifold::Impl &inP, const Manifold::Impl &inQ,
const SparseIndices &p1q2, const SparseIndices &p2q1) {
ZoneScoped;
SparseIndices p1q1(3 * p1q2.size() + 3 * p2q1.size());
for_each_n(autoPolicy(p1q2.size()), zip(countAt(0_z), countAt(0_z)),
p1q2.size(), CopyFaceEdges<false>({p1q2, p1q1, inQ.halfedge_}));
for_each_n(autoPolicy(p2q1.size()), zip(countAt(p1q2.size()), countAt(0_z)),
p2q1.size(), CopyFaceEdges<true>({p2q1, p1q1, inP.halfedge_}));
for_each_n(autoPolicy(p1q2.size()), countAt(0_z), p1q2.size(),
CopyFaceEdges<false>({p1q2, p1q1, inQ.halfedge_, 0_z}));
for_each_n(autoPolicy(p2q1.size()), countAt(0_z), p2q1.size(),
CopyFaceEdges<true>({p2q1, p1q1, inP.halfedge_, p1q2.size()}));
p1q1.Unique();
return p1q1;
}
Expand Down Expand Up @@ -177,19 +176,21 @@ size_t monobound_quaternary_search(VecView<const int64_t> array, int64_t key) {
}

struct Kernel11 {
VecView<glm::vec4> xyzz;
VecView<int> s;
VecView<const glm::vec3> vertPosP;
VecView<const glm::vec3> vertPosQ;
VecView<const Halfedge> halfedgeP;
VecView<const Halfedge> halfedgeQ;
float expandP;
const float expandP;
VecView<const glm::vec3> normalP;
const SparseIndices &p1q1;

void operator()(thrust::tuple<size_t, glm::vec4 &, int &> inout) {
const int p1 = p1q1.Get(thrust::get<0>(inout), false);
const int q1 = p1q1.Get(thrust::get<0>(inout), true);
glm::vec4 &xyzz11 = thrust::get<1>(inout);
int &s11 = thrust::get<2>(inout);
void operator()(const size_t idx) {
const int p1 = p1q1.Get(idx, false);
const int q1 = p1q1.Get(idx, true);
glm::vec4 &xyzz11 = xyzz[idx];
int &s11 = s[idx];

// For pRL[k], qRL[k], k==0 is the left and k==1 is the right.
int k = 0;
Expand Down Expand Up @@ -262,17 +263,18 @@ std::tuple<Vec<int>, Vec<glm::vec4>> Shadow11(SparseIndices &p1q1,
Vec<int> s11(p1q1.size());
Vec<glm::vec4> xyzz11(p1q1.size());

for_each_n(autoPolicy(p1q1.size()),
zip(countAt(0_z), xyzz11.begin(), s11.begin()), p1q1.size(),
Kernel11({inP.vertPos_, inQ.vertPos_, inP.halfedge_, inQ.halfedge_,
expandP, inP.vertNormal_, p1q1}));
for_each_n(autoPolicy(p1q1.size()), countAt(0_z), p1q1.size(),
Kernel11({xyzz11, s11, inP.vertPos_, inQ.vertPos_, inP.halfedge_,
inQ.halfedge_, expandP, inP.vertNormal_, p1q1}));

p1q1.KeepFinite(xyzz11, s11);

return std::make_tuple(s11, xyzz11);
};

struct Kernel02 {
VecView<int> s;
VecView<float> z;
VecView<const glm::vec3> vertPosP;
VecView<const Halfedge> halfedgeQ;
VecView<const glm::vec3> vertPosQ;
Expand All @@ -281,11 +283,11 @@ struct Kernel02 {
const SparseIndices &p0q2;
const bool forward;

void operator()(thrust::tuple<size_t, int &, float &> inout) {
const int p0 = p0q2.Get(thrust::get<0>(inout), !forward);
const int q2 = p0q2.Get(thrust::get<0>(inout), forward);
int &s02 = thrust::get<1>(inout);
float &z02 = thrust::get<2>(inout);
void operator()(const size_t idx) {
const int p0 = p0q2.Get(idx, !forward);
const int q2 = p0q2.Get(idx, forward);
int &s02 = s[idx];
float &z02 = z[idx];

// For yzzLR[k], k==0 is the left and k==1 is the right.
int k = 0;
Expand Down Expand Up @@ -353,17 +355,18 @@ std::tuple<Vec<int>, Vec<float>> Shadow02(const Manifold::Impl &inP,
Vec<float> z02(p0q2.size());

auto vertNormalP = forward ? inP.vertNormal_ : inQ.vertNormal_;
for_each_n(autoPolicy(p0q2.size()),
zip(countAt(0_z), s02.begin(), z02.begin()), p0q2.size(),
Kernel02({inP.vertPos_, inQ.halfedge_, inQ.vertPos_, expandP,
vertNormalP, p0q2, forward}));
for_each_n(autoPolicy(p0q2.size()), countAt(0_z), p0q2.size(),
Kernel02({s02, z02, inP.vertPos_, inQ.halfedge_, inQ.vertPos_,
expandP, vertNormalP, p0q2, forward}));

p0q2.KeepFinite(z02, s02);

return std::make_tuple(s02, z02);
};

struct Kernel12 {
VecView<int> x;
VecView<glm::vec3> v;
VecView<const int64_t> p0q2;
VecView<const int> s02;
VecView<const float> z02;
Expand All @@ -376,11 +379,11 @@ struct Kernel12 {
const bool forward;
const SparseIndices &p1q2;

void operator()(thrust::tuple<size_t, int &, glm::vec3 &> inout) {
int p1 = p1q2.Get(thrust::get<0>(inout), !forward);
int q2 = p1q2.Get(thrust::get<0>(inout), forward);
int &x12 = thrust::get<1>(inout);
glm::vec3 &v12 = thrust::get<2>(inout);
void operator()(const size_t idx) {
int p1 = p1q2.Get(idx, !forward);
int q2 = p1q2.Get(idx, forward);
int &x12 = x[idx];
glm::vec3 &v12 = v[idx];

// For xzyLR-[k], k==0 is the left and k==1 is the right.
int k = 0;
Expand Down Expand Up @@ -460,9 +463,8 @@ std::tuple<Vec<int>, Vec<glm::vec3>> Intersect12(
Vec<glm::vec3> v12(p1q2.size());

for_each_n(
autoPolicy(p1q2.size()), zip(countAt(0_z), x12.begin(), v12.begin()),
p1q2.size(),
Kernel12({p0q2.AsVec64(), s02, z02, p1q1.AsVec64(), s11, xyzz11,
autoPolicy(p1q2.size()), countAt(0_z), p1q2.size(),
Kernel12({x12, v12, p0q2.AsVec64(), s02, z02, p1q1.AsVec64(), s11, xyzz11,
inP.halfedge_, inQ.halfedge_, inP.vertPos_, forward, p1q2}));

p1q2.KeepFinite(v12, x12);
Expand All @@ -475,26 +477,11 @@ Vec<int> Winding03(const Manifold::Impl &inP, Vec<int> &vertices, Vec<int> &s02,
ZoneScoped;
// verts that are not shadowed (not in p0q2) have winding number zero.
Vec<int> w03(inP.NumVert(), 0);
// checking is slow, so just sort and reduce
auto policy = autoPolicy(vertices.size());
stable_sort(
policy, zip(vertices.begin(), s02.begin()),
zip(vertices.end(), s02.end()),
[](const thrust::tuple<int, int> &a, const thrust::tuple<int, int> &b) {
return thrust::get<0>(a) < thrust::get<0>(b);
});
Vec<int> w03val(w03.size());
Vec<int> w03vert(w03.size());
// sum known s02 values into w03 (winding number)
auto endPair = reduce_by_key<
thrust::pair<decltype(w03val.begin()), decltype(w03val.begin())>>(
policy, vertices.begin(), vertices.end(), s02.begin(), w03vert.begin(),
w03val.begin());
scatter(policy, w03val.begin(), endPair.second, w03vert.begin(), w03.begin());

if (reverse)
transform(policy, w03.begin(), w03.end(), w03.begin(),
thrust::negate<int>());
for_each_n(policy, countAt(0), s02.size(),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is much simpler!

[&w03, &vertices, &s02, reverse](const int i) {
AtomicAdd(w03[vertices[i]], s02[i] * (reverse ? -1 : 1));
});
return w03;
};
} // namespace
Expand Down
Loading
Loading