From ae0c85c507fd4683712a4eacdc5728a7449154a8 Mon Sep 17 00:00:00 2001 From: Roger Waleffe Date: Tue, 21 Nov 2023 11:08:07 -0600 Subject: [PATCH] remove some printing --- src/cpp/src/common/util.cpp | 4 +- src/cpp/src/pipeline/pipeline_gpu.cpp | 78 +++++++++++++-------------- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/cpp/src/common/util.cpp b/src/cpp/src/common/util.cpp index 40515fd1..76a2216d 100644 --- a/src/cpp/src/common/util.cpp +++ b/src/cpp/src/common/util.cpp @@ -203,8 +203,8 @@ torch::Tensor receive_tensor(shared_ptr pg, int worker_i } torch::Tensor sizes = metadata.narrow(0, 0, dim); - std::cout< sizes_vec = std::vector(data_ptr, end); diff --git a/src/cpp/src/pipeline/pipeline_gpu.cpp b/src/cpp/src/pipeline/pipeline_gpu.cpp index 06d8737f..d5520b41 100644 --- a/src/cpp/src/pipeline/pipeline_gpu.cpp +++ b/src/cpp/src/pipeline/pipeline_gpu.cpp @@ -44,8 +44,8 @@ void updateEvalForBatch(Pipeline* pipeline_, shared_ptr batch) { void RemoteLoadWorker::run() { while (!done_) { while (!paused_) { - Timer t = new Timer(false); - t.start(); +// Timer t = new Timer(false); +// t.start(); // NOTE: this "train" is probably not set correctly all the time shared_ptr batch = std::make_shared(pipeline_->dataloader_->train_); @@ -86,8 +86,8 @@ void RemoteLoadWorker::run() { } else { ((PipelineCPU *)pipeline_)->loaded_batches_->blocking_push(batch); } - t.stop(); - std::cout<<"remote load: "<loaded_batches_->blocking_pop(); - t.stop(); - std::cout<<"remote to block: "<(tup); shared_ptr batch = std::get<1>(tup); if (!popped) { @@ -124,8 +124,8 @@ void RemoteToDeviceWorker::run() { batch->creator_id_ = pipeline_->model_->pg_gloo_->pg->getRank(); batch->remoteTo(pipeline_->model_->pg_gloo_->pg, child, tag); - t.stop(); - std::cout<<"remote to: "<loaded_batches_->blocking_pop(); - t.stop(); - std::cout<<"batch to block: "<(tup); shared_ptr batch = std::get<1>(tup); @@ -181,12 +181,12 @@ void ComputeWorkerGPU::run() { while (!done_) { while (!paused_) { - Timer t = new Timer(false); - t.start(); +// Timer t = new Timer(false); +// t.start(); auto tup = ((PipelineGPU *)pipeline_)->device_loaded_batches_[gpu_id_]->blocking_pop(); - t.stop(); - std::cout<<"compute block: "<(tup); shared_ptr batch = std::get<1>(tup); if (!popped) { @@ -276,8 +276,8 @@ void ComputeWorkerGPU::run() { batch->clear(); } } - t.stop(); - std::cout<<"compute: "<device_update_batches_[gpu_id_]->blocking_pop(); - t.stop(); - std::cout<<"batch to host block: "<(tup); shared_ptr batch = std::get<1>(tup); if (!popped) { @@ -339,8 +339,8 @@ void BatchToHostWorker::run() { } ((PipelineGPU *)pipeline_)->update_batches_->blocking_push(batch); - t.stop(); - std::cout<<"batch to host: "<update_batches_->blocking_pop(); - t.stop(); - std::cout<<"remote to host block: "<(tup); shared_ptr batch = std::get<1>(tup); if (!popped) { @@ -391,8 +391,8 @@ void RemoteToHostWorker::run() { lock.unlock(); batch->remoteTo(pipeline_->model_->pg_gloo_->pg, parent, tag); - t.stop(); - std::cout<<"remote to host: "< batch = std::make_shared(pipeline_->dataloader_->train_); @@ -434,8 +434,8 @@ void RemoteListenForUpdatesWorker::run() { batch->remoteReceive(pipeline_->model_->pg_gloo_->pg, child, tag); ((PipelineGPU *)pipeline_)->update_batches_->blocking_push(batch); - t.stop(); - std::cout<<"remote listen: "<