Skip to content

Commit

Permalink
[Memory Snapshot][BE] Clean up record function callback scope (pytorc…
Browse files Browse the repository at this point in the history
…h#130265)

Summary: We can directly set the scope to at::RecordScope::USER_SCOPE for the at::RecordFunctionCallback object, rather than performing a check inside of the callback.

Test Plan:
Ran locally, works fine.

https://www.internalfb.com/pytorch_memory_visualizer/mvai_gpu_traces/tree/gpu_snapshot/fire-aaronshi-20240704-1709-7a80b83b/0/rank-0_itrn-1503.Jul_04_17_24_02.3577.snapshot.pickle

Differential Revision: D59477046

Pulled By: aaronenyeshi

Pull Request resolved: pytorch#130265
Approved by: https://github.com/davidberard98
  • Loading branch information
aaronenyeshi authored and pytorchmergebot committed Jul 9, 2024
1 parent ded469c commit 6c4efd4
Showing 1 changed file with 17 additions and 21 deletions.
38 changes: 17 additions & 21 deletions torch/csrc/cuda/memory_snapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,27 +101,23 @@ void _initRecordAnnotations() {
static c10::once_flag ra_init;
c10::call_once(ra_init, [&] {
// Save user annotations to CCA memory snapshot tool
at::addThreadLocalCallback(at::RecordFunctionCallback(
[](const at::RecordFunction& fn)
-> std::unique_ptr<at::ObserverContext> {
if (fn.scope() != at::RecordScope::USER_SCOPE) {
return nullptr; // only record user-defined scopes.
}
unwind::Frame frame{fn.name(), "START", 0};
auto r = std::make_shared<CapturedTraceback>();
r->recordUserDefinedFrame(frame);
c10::cuda::CUDACachingAllocator::recordAnnotation(r);
return nullptr;
},
[](const at::RecordFunction& fn, at::ObserverContext* ctx_ptr) {
if (fn.scope() != at::RecordScope::USER_SCOPE) {
return; // only record user-defined scopes.
}
unwind::Frame frame{fn.name(), "END", 0};
auto r = std::make_shared<CapturedTraceback>();
r->recordUserDefinedFrame(frame);
c10::cuda::CUDACachingAllocator::recordAnnotation(r);
}));
at::addThreadLocalCallback(
at::RecordFunctionCallback(
[](const at::RecordFunction& fn)
-> std::unique_ptr<at::ObserverContext> {
unwind::Frame frame{fn.name(), "START", 0};
auto r = std::make_shared<CapturedTraceback>();
r->recordUserDefinedFrame(frame);
c10::cuda::CUDACachingAllocator::recordAnnotation(r);
return nullptr;
},
[](const at::RecordFunction& fn, at::ObserverContext* ctx_ptr) {
unwind::Frame frame{fn.name(), "END", 0};
auto r = std::make_shared<CapturedTraceback>();
r->recordUserDefinedFrame(frame);
c10::cuda::CUDACachingAllocator::recordAnnotation(r);
})
.scopes({at::RecordScope::USER_SCOPE}));
});
}

Expand Down

0 comments on commit 6c4efd4

Please sign in to comment.