Skip to content

Commit

Permalink
unique feature loading
Browse files Browse the repository at this point in the history
  • Loading branch information
Roger Waleffe authored and Roger Waleffe committed Nov 21, 2023
1 parent 72ce035 commit 7c5dc2d
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 10 deletions.
60 changes: 58 additions & 2 deletions src/cpp/src/data/dataloader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ void DataLoader::getBatchHelper(shared_ptr<Batch> batch, int worker_id) {
batch->dense_graph_.partition_size_ = graph_storage_->getPartitionSize();
}

loadCPUParameters(batch);
// loadCPUParameters(batch);
}

shared_ptr<Batch> DataLoader::getBatch(at::optional<torch::Device> device, bool perform_map, int worker_id) {
Expand Down Expand Up @@ -480,6 +480,8 @@ shared_ptr<Batch> DataLoader::getBatch(at::optional<torch::Device> device, bool

batch->sub_batches_ = sub_batches;

// loadCPUParameters(batch);

return batch;
}

Expand Down Expand Up @@ -645,7 +647,61 @@ void DataLoader::loadCPUParameters(shared_ptr<Batch> batch) {
if (only_root_features_) {
batch->node_features_ = graph_storage_->getNodeFeatures(batch->root_node_indices_);
} else {
batch->node_features_ = graph_storage_->getNodeFeatures(batch->unique_node_indices_);
// batch->node_features_ = graph_storage_->getNodeFeatures(batch->unique_node_indices_);



if (batch->sub_batches_.size() > 0) {
// std::cout << "start\n";
std::vector<torch::Tensor> all_unique_nodes_vec(batch->sub_batches_.size());
// int total_unique_nodes = 0;

// #pragma omp parallel for # TODO
for (int i = 0; i < batch->sub_batches_.size(); i++) {
all_unique_nodes_vec[i] = batch->sub_batches_[i]->unique_node_indices_;
// total_unique_nodes += batch->sub_batches_[i]->unique_node_indices_.size(0);

// std::cout << batch->sub_batches_[i]->unique_node_indices_.sizes() << " "
// << batch->sub_batches_[i]->unique_node_indices_.device() << "\n";
}

// std::cout << "cat\n";
torch::Tensor all_unique_nodes = torch::cat({all_unique_nodes_vec}, 0);
// std::cout << all_unique_nodes.sizes() << "\n";
auto unique_nodes = torch::_unique2(all_unique_nodes, true, true, false);
torch::Tensor unique_indices = std::get<0>(unique_nodes);
torch::Tensor inverse = std::get<1>(unique_nodes);
torch::Tensor unique_features = graph_storage_->getNodeFeatures(unique_indices);
// std::cout << unique_indices.sizes() << "\n";
// std::cout << inverse.sizes() << " " << inverse.device() << "\n";
// std::cout << unique_features.sizes() << " " << unique_features.device() << "\n";
std::cout<<unique_indices.size(0)<<" vs " <<all_unique_nodes.size(0)<<"\n";

// std::cout << "end cat\n";
int count = 0;
int count1 = 0;
int split_size = (int) ceil((float) unique_features.size(0) / batch->sub_batches_.size());
for (int i = 0; i < batch->sub_batches_.size(); i++) {
if (!batch->sub_batches_[i]->node_features_.defined()) {
batch->sub_batches_[i]->unique_node_indices_ = inverse.narrow(0, count, batch->sub_batches_[i]->unique_node_indices_.size(0));
count += batch->sub_batches_[i]->unique_node_indices_.size(0);
// std::cout << batch->sub_batches_[i]->unique_node_indices_.sizes() << "\n";

int size = split_size;
if (count1 + split_size > unique_features.size(0)) size = unique_features.size(0) - count1;
batch->sub_batches_[i]->node_features_ = unique_features.narrow(0, count1, size);
count1 += size;
// std::cout << batch->sub_batches_[i]->node_features_.sizes() << "\n";
}
}
// std::cout << "end\n";
} else {
batch->node_features_ = graph_storage_->getNodeFeatures(batch->unique_node_indices_);
}




}
}
}
Expand Down
49 changes: 41 additions & 8 deletions src/cpp/src/pipeline/pipeline_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,13 @@ void BatchToDeviceWorker::run() {
}

if (batch->sub_batches_.size() > 0) {
// #pragma omp parallel for # TODO
for (int i = 0; i < batch->sub_batches_.size(); i++) {
if (!batch->sub_batches_[i]->node_features_.defined())
batch->sub_batches_[i]->node_features_ = pipeline_->dataloader_->graph_storage_->getNodeFeatures(batch->sub_batches_[i]->unique_node_indices_);
// batch->sub_batches_[i]->node_labels_ = pipeline_->dataloader_->graph_storage_->getNodeLabels(
// batch->sub_batches_[i]->dense_graph_.node_ids_.narrow(0, batch->sub_batches_[i]->dense_graph_.hop_offsets_[-2].item<int64_t>(),
// (batch->sub_batches_[i]->dense_graph_.node_ids_.size(0)-batch->sub_batches_[i]->dense_graph_.hop_offsets_[-2]).item<int64_t>())).flatten(0, 1);
if (!batch->sub_batches_[0]->node_features_.defined()) {
pipeline_->dataloader_->loadCPUParameters(batch);
}
} else {
if (!batch->node_features_.defined())
batch->node_features_ = pipeline_->dataloader_->graph_storage_->getNodeFeatures(batch->unique_node_indices_);
pipeline_->dataloader_->loadCPUParameters(batch);
// batch->node_features_ = pipeline_->dataloader_->graph_storage_->getNodeFeatures(batch->unique_node_indices_);
// batch->node_labels_ = pipeline_->dataloader_->graph_storage_->getNodeLabels(
// batch->dense_graph_.node_ids_.narrow(0, batch->dense_graph_.hop_offsets_[-2].item<int64_t>(),
// (batch->dense_graph_.node_ids_.size(0)-batch->dense_graph_.hop_offsets_[-2]).item<int64_t>())).flatten(0, 1);
Expand Down Expand Up @@ -205,11 +201,48 @@ void ComputeWorkerGPU::run() {
streams_for_multi_guard.emplace_back(*(pipeline_->dataloader_->compute_streams_[i]));
}


int unique_size = 0;
int feat_dim = batch->sub_batches_[0]->node_features_.size(1);
for (int i = 0; i < batch->sub_batches_.size(); i++) {
unique_size += batch->sub_batches_[i]->node_features_.size(0);
}
std::vector<torch::Tensor> unique_features_per_gpu(batch->sub_batches_.size());

// std::cout<<"start"<<"\n";
// std::cout<<unique_size<<"\n";
// std::cout<<feat_dim<<"\n";


#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < batch->sub_batches_.size(); i++) {
CudaStreamGuard stream_guard(*(pipeline_->dataloader_->compute_streams_[i]));
auto device_options = torch::TensorOptions().dtype(torch::kFloat16).device(batch->sub_batches_[i]->node_features_.device());

torch::Tensor unique_node_features = torch::zeros({unique_size, feat_dim}, device_options);
// std::cout<<unique_node_features.sizes()<<"\n";

int count = 0;
for (int j = 0; j < batch->sub_batches_.size(); j++) {
unique_node_features.narrow(0, count, batch->sub_batches_[j]->node_features_.size(0)).copy_(batch->sub_batches_[j]->node_features_);
count += batch->sub_batches_[j]->node_features_.size(0);
// std::cout<<unique_node_features.sizes()<<"\n";
}

unique_features_per_gpu[i] = unique_node_features;
}

#pragma omp for
for (int i = 0; i < batch->sub_batches_.size(); i++) {
CudaStreamGuard stream_guard(*(pipeline_->dataloader_->compute_streams_[i]));
auto device_options = torch::TensorOptions().dtype(torch::kFloat16).device(batch->sub_batches_[i]->node_features_.device());

batch->sub_batches_[i]->node_features_ = torch::zeros({batch->sub_batches_[i]->unique_node_indices_.size(0), feat_dim}, device_options);
torch::index_select_out(batch->sub_batches_[i]->node_features_, unique_features_per_gpu[i], 0, batch->sub_batches_[i]->unique_node_indices_);
// std::cout<<batch->sub_batches_[i]->node_features_.sizes()<<"\n";

pipeline_->model_->device_models_[i]->clear_grad();
pipeline_->model_->device_models_[i]->train_batch(batch->sub_batches_[i], false);
}
Expand Down

0 comments on commit 7c5dc2d

Please sign in to comment.