Skip to content

Commit

Permalink
Add logic to write output with encoded Ids.
Browse files Browse the repository at this point in the history
Differential Revision: D44645325

fbshipit-source-id: 8b3efd3e9785884d7e2d8b1696ea826d511e0cb5
  • Loading branch information
ajinkyaghonge authored and facebook-github-bot committed Apr 6, 2023
1 parent aea927b commit b9aa883
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 17 deletions.
71 changes: 56 additions & 15 deletions fbpcs/pc_translator/PCTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,27 @@
#include <fbpcf/mpc_std_lib/oram/encoder/IOramEncoder.h>
#include <fbpcf/mpc_std_lib/oram/encoder/OramEncoder.h>
#include <algorithm>
#include <cstdint>
#include <iterator>
#include <set>
#include <stdexcept>
#include <string>
#include "fbpcs/emp_games/common/Csv.h"
#include "folly/String.h"

namespace pc_translator {

std::string PCTranslator::encode(const std::string& inputDataset) {
std::string PCTranslator::encode(const std::string& inputDatasetPath) {
auto validInstructionSetNames =
PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_);
auto pcInstructionSets =
PCTranslator::retrieveInstructionSets(validInstructionSetNames);
if (pcInstructionSets.empty()) {
// No instruction set found. return the input dataset path.
return inputDataset;
return inputDatasetPath;
}
return PCTranslator::transformDataset(
inputDataset, pcInstructionSets.front());
inputDatasetPath, pcInstructionSets.front());
}

std::string PCTranslator::decode(
Expand Down Expand Up @@ -79,30 +82,43 @@ std::vector<std::string> PCTranslator::retrieveInstructionSetNamesForRun(
}

std::string PCTranslator::transformDataset(
const std::string& inputData,
const std::string& inputDatasetPath,
std::shared_ptr<pc_translator::PCInstructionSet> pcInstructionSet) {
// Parse the input CSV
auto lineNo = 0;
std::vector<std::vector<uint32_t>> inputColums;
std::vector<std::string> outputHeader;
std::vector<std::vector<std::string>> outputContent;
private_measurement::csv::readCsv(
inputData,
inputDatasetPath,
[&](const std::vector<std::string>& header,
const std::vector<std::string>& parts) {
std::vector<uint32_t> inputColumnPerRow;
std::string column;
std::uint32_t value;
bool found = false;
std::vector<std::string> outputContentPerRow;
for (std::vector<std::string>::size_type i = 0; i < header.size();
++i) {
auto& column = header[i];
auto value = std::atoi(parts[i].c_str());
auto iter = std::find(
pcInstructionSet->getGroupByIds().begin(),
pcInstructionSet->getGroupByIds().end(),
column);
if (iter != pcInstructionSet->getGroupByIds().end()) {
column = header[i];
value = std::atoi(parts[i].c_str());
found =
(std::find(
pcInstructionSet->getGroupByIds().begin(),
pcInstructionSet->getGroupByIds().end(),
column) != pcInstructionSet->getGroupByIds().end());
if (found) {
inputColumnPerRow.push_back(value);
} else {
if (lineNo == 0) {
outputHeader.push_back(header[i]);
}
outputContentPerRow.push_back(parts[i]);
}
}

inputColums.push_back(inputColumnPerRow);
outputContent.push_back(outputContentPerRow);
lineNo++;
});

Expand All @@ -114,9 +130,34 @@ std::string PCTranslator::transformDataset(

auto encodedIndexes = encoder->generateORAMIndexes(inputColums);

// TODO : Append the enodedIndexes at the end of publisher output and return
// output path.
return "";
auto dir = inputDatasetPath.substr(0, inputDatasetPath.rfind("/") + 1);
auto output_dataset_path = dir + "transformed_publisher_input.csv";

PCTranslator::putOutputData(
output_dataset_path, outputHeader, outputContent, encodedIndexes);
return output_dataset_path;
}

void PCTranslator::putOutputData(
const std::string& output_dataset_path,
std::vector<std::string>& outputHeader,
std::vector<std::vector<std::string>>& outputContent,
const std::vector<uint32_t>& encodedIndexes) {
outputHeader.push_back("breakdown_id");

if (outputContent.size() != encodedIndexes.size()) {
throw std::runtime_error(
"Encoded index vector size should match the input vector size.");
}

for (std::vector<std::string>::size_type i = 0; i < encodedIndexes.size();
++i) {
auto indexVec = std::to_string(encodedIndexes[i]);
outputContent[i].push_back(indexVec);
}

private_measurement::csv::writeCsv(
output_dataset_path, outputHeader, outputContent);
}

std::shared_ptr<PCInstructionSet> PCTranslator::parseInstructionSet(
Expand Down
6 changes: 6 additions & 0 deletions fbpcs/pc_translator/PCTranslator.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ class PCTranslator {
std::string transformDataset(
const std::string& inputData,
std::shared_ptr<pc_translator::PCInstructionSet> pcInstructionSet);

void putOutputData(
const std::string& output_dataset_path,
std::vector<std::string>& outputHeader,
std::vector<std::vector<std::string>>& outputContent,
const std::vector<uint32_t>& encodedIndexes);
};

} // namespace pc_translator
22 changes: 20 additions & 2 deletions fbpcs/pc_translator/tests/TestPCTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* LICENSE file in the root directory of this source tree.
*/

#include <fbpcf/io/api/FileIOWrappers.h>
#include <gtest/gtest.h>
#include "../../emp_games/common/TestUtil.h"
#include "fbpcs/pc_translator/PCTranslator.h"
Expand All @@ -16,21 +17,38 @@ class TestPCTranslator : public ::testing::Test {
std::string pcs_features_;
std::string test_instruction_set_base_path_;
std::string test_publisher_input_path_;
std::string test_transformed_output_path_;
std::string expected_transformed_output_path_;

void SetUp() override {
pcs_features_ =
"'num_mpc_container_mutation', 'private_lift_unified_data_process', 'pc_instr_test_instruction_set'";
std::string baseDir =
private_measurement::test_util::getBaseDirFromPath(__FILE__);
test_instruction_set_base_path_ = baseDir + "input_processing/";
test_publisher_input_path_ = baseDir + "publisher_unittest.csv";
test_publisher_input_path_ = "/tmp/publisher_unittest.csv";
test_transformed_output_path_ = "/tmp/transformed_publisher_input.csv";
expected_transformed_output_path_ =
baseDir + "expected_transformed_publisher_input.csv";
auto contents =
fbpcf::io::FileIOWrappers::readFile(baseDir + "publisher_unittest.csv");
fbpcf::io::FileIOWrappers::writeFile(test_publisher_input_path_, contents);
}

void TearDown() override {
std::remove(test_publisher_input_path_.c_str());
std::remove(test_transformed_output_path_.c_str());
}
};

TEST_F(TestPCTranslator, TestEncode) {
auto pcTranslator = std::make_shared<PCTranslator>(
pcs_features_, test_instruction_set_base_path_);
auto outputPath = pcTranslator->encode(test_publisher_input_path_);
EXPECT_EQ(outputPath, "");
auto contents = fbpcf::io::FileIOWrappers::readFile(outputPath);
auto expectedContents =
fbpcf::io::FileIOWrappers::readFile(expected_transformed_output_path_);
EXPECT_EQ(outputPath, test_transformed_output_path_);
EXPECT_EQ(contents, expectedContents);
}
} // namespace pc_translator
13 changes: 13 additions & 0 deletions fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
id_,opportunity,test_flag,opportunity_timestamp,breakdown_id
cfcd208495d565ef66e7dff9f98764da,1,0,1600000430,0
c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401,1
c81e728d9d4c2f636f067f89cc14862c,0,0,0,2
eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0,3
a87ff679a2f3e71d9181a67b7542122c,0,0,0,0
e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461,4
1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052,5
8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831,6
c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530,7
45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972,5
d3d9446802a44259755d38e6d163e820,0,0,0,0
6512bd43d9caa6e02c990b0a82652dca,0,0,0,0

0 comments on commit b9aa883

Please sign in to comment.