We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#include <CSVReader.h> // From previous post. namespace { using namespace std; class MemTable { using ColIdMap = vector<pair<string, int>>; using OrderedRows = map<string, vector<string>>; public: explicit MemTable(string tableName) : m_tblName(std::move(tableName)) {} explicit MemTable(string tableName, CSVReader& rdr) : m_tblName(std::move(tableName)) { bool firstRow = true; string pKey; for (auto& row : rdr) { int colId = -1; for (auto& col : row) { ++colId; if (firstRow) { // First Row has Names. // Construct Name -> Id mapping. m_colIds.push_back({col, colId}); continue; } if (colId == 0) { // Handle Pkey. Assuming Col0. pKey = std::move(col); } else { // Handle remaining cols . m_data[pKey].push_back(col); } } // Mark first row processing done. firstRow = false; } } struct RowIter { struct ColIter { ColIter(RowIter& p, long id = -1) : pItr(p), colId(id) {} bool operator==(const ColIter& other) { return this == &other || colId == other.colId; } bool operator!=(const ColIter& other) { return !(*this == other); } ColIter& operator++() { inRange() ? ++colId : (colId = -1); return *this; } ColIter operator++(int) { auto t = *this; ++(*this); return t; } string operator*() { return pItr.getColVal(colId); } bool inRange() { return colId >= 0 && colId < pItr.numCols() - 1; } RowIter& pItr; long colId; }; explicit RowIter(OrderedRows::iterator&& itr) : ptr(itr) {} RowIter& operator*() { return *this; } RowIter& operator++() { ++ptr; return *this; } RowIter operator++(int) { auto t = *this; ++(*this); return t; } bool operator==(const RowIter& other) { return this == &other || ptr == other.ptr; } bool operator!=(const RowIter& other) { return !(*this == other); } ColIter begin() { return ColIter(*this, 0); } ColIter end() { return ColIter(*this); } int numCols() { return ptr->second.size() + 1; } string getColVal(size_t colId) { return colId == 0 ? ptr->first : ptr->second[colId - 1]; } OrderedRows::iterator ptr; }; vector<RowIter> orderByColumn(const string& orderingCol) { auto colId = toColId(orderingCol); if (colId == -1) throw std::invalid_argument("Invalid ordering Col: " + orderingCol); vector<RowIter> res; res.reserve(m_data.size()); for (auto r : *this) { res.push_back(std::move(r)); } sort(res.begin(), res.end(), [&](auto&& l, auto&& r) { return l.getColVal(colId) < r.getColVal(colId); }); return res; } RowIter begin() { return RowIter(m_data.begin()); } RowIter end() { return RowIter(m_data.end()); } RowIter find(string k) { return RowIter(m_data.find(k)); } int toColId(const string& colName) { auto itr = find_if(m_colIds.begin(), m_colIds.end(), [&colName] (auto& v) { return v.first == colName; }); if(itr != m_colIds.end()) { return itr->second; } return -1; } map<string,long long> countBy(const string& colName) { auto colId = toColId(colName); if (colId == -1) { throw std::invalid_argument("Unknown column for table " + m_tblName); } map<string,long long> result; for (auto&& r : *this) { const auto& colToCount = r.getColVal(colId); cout << "Counting col " << colToCount << "\n"; ++result[colToCount]; } return result; } void printTable() { cout << "Table Name = [ " << m_tblName << " ]\n"; for (auto& [colName, _] : m_colIds) { cout << " | " << colName << " | "; } std::cout << "\n"; for (auto&& r : *this) { for (auto&& c : r) { std::cout << " | " << c << " | "; } std::cout << "\n"; } } void extend(MemTable& data, bool selectedCols = false, vector<string> colsToSkip = {}) { if (!m_data.empty() && m_data.size() != data.size()) { throw std::invalid_argument("Not enough rows to match table " + m_tblName); } // Extend colNames. m_tblName += " + "; m_tblName += data.name(); auto newColId = m_colIds.size(); set<int> colIdsToSkip; for (auto& c : colsToSkip) { colIdsToSkip.insert(data.toColId(c)); } auto canSkipCol = [&](auto& colName) { return std::find(colsToSkip.begin(), colsToSkip.end(), colName) != colsToSkip.end(); }; for (auto& cn : data.colNames()) { if (selectedCols && canSkipCol(cn)) continue; m_colIds.emplace_back(make_pair(std::move(cn), newColId++)); } auto writer = m_data.begin(); for (auto&& r : data) { for (int cid = 0; cid < data.begin().numCols(); ++cid) { if (colIdsToSkip.contains(cid)) continue; writer->second.push_back(r.getColVal(cid)); } ++writer; } } void join(MemTable& joinTbl, const string& joinCol) { if (!m_data.empty() && m_data.size() != joinTbl.size()) { throw std::invalid_argument("Not enough rows to match table " + m_tblName); } // Extend colNames. m_tblName += " JOIN "; m_tblName += joinTbl.name(); auto newColId = m_colIds.size(); for (auto& cn : joinTbl.colNames()) { if (cn == joinCol) continue; m_colIds.emplace_back(make_pair(std::move(cn), newColId++)); } int jColId = joinTbl.toColId(joinCol); map<string, RowIter> joinRefs; for (auto&& jr : joinTbl) { joinRefs.emplace(jr.getColVal(jColId), jr); } auto writer = m_data.begin(); auto rowPtr = begin(); for (; writer != m_data.end(); ++writer, ++rowPtr) { const auto& jkey = rowPtr.getColVal(jColId); auto jItr = joinRefs.find(jkey); if (jItr == joinRefs.end()) continue; for (int cid = 0; cid < joinTbl.begin().numCols(); ++cid) { if (cid == jColId) continue; auto& rdr = jItr->second; writer->second.push_back(rdr.getColVal(cid)); } } } string& name() { return m_tblName; } size_t size() const { return m_data.size(); } vector<string> colNames() { vector<string> res; res.reserve(m_colIds.size()); transform(m_colIds.begin(), m_colIds.end(), back_inserter(res), [](const auto& p) { return p.first; }); return res; } private: string m_tblName; OrderedRows m_data; ColIdMap m_colIds; }; } int main() { string testData = {"Id0, Id1,Id2\n1,2,3\n4,5,6\n7,8,9"}; string testData2 = {"Id3, Id1,Id5\na,2,b\nc,5,d\ne,2,f"}; stringstream ss(testData); stringstream ss2(testData2); CSVReader rdr2(ss2); CSVReader rdr(ss); MemTable tbl("Test", rdr); MemTable tbl2("Test2", rdr2); tbl2.join(tbl, "Id1"); tbl2.printTable(); for (auto&& [value, count] : tbl.countBy("Id1")) { cout << "CountOf " << value << "," << count << "\n"; } for (auto&& v : tbl.orderByColumn("Id1")) { for (auto&& c : v) { cout << "OrderedView (Id1) " << c << " "; } cout << "\n"; } tbl.printTable(); auto itr = tbl.find("1"); if (itr == tbl.end()) { cout << "No row with Pkey 2"; } else { cout << "Via getColVal " << itr.getColVal(0) << "\n"; cout << "Num cols = " << itr.numCols() << "\n"; for (auto&&c : itr) cout << c << ","; } return 0; }
Table Name = [ Test2 JOIN Test ] | Id3 | | Id1 | | Id5 | | Id0 | | Id2 | | a | | 2 | | b | | 1 | | 3 | | c | | 5 | | d | | 4 | | 6 | | e | | 2 | | f | | 1 | | 3 | Counting col 2 Counting col 5 Counting col 8 CountOf 2,1 CountOf 5,1 CountOf 8,1 OrderedView (Id1) 1 OrderedView (Id1) 2 OrderedView (Id1) 3 OrderedView (Id1) 4 OrderedView (Id1) 5 OrderedView (Id1) 6 OrderedView (Id1) 7 OrderedView (Id1) 8 OrderedView (Id1) 9 Table Name = [ Test ] | Id0 | | Id1 | | Id2 | | 1 | | 2 | | 3 | | 4 | | 5 | | 6 | | 7 | | 8 | | 9 | Via getColVal 1 Num cols = 3 1,2,3, Ret: 0
The text was updated successfully, but these errors were encountered:
aakshintala
No branches or pull requests
Output
The text was updated successfully, but these errors were encountered: