Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bored... Simple idiomatic MemDb #2

Open
PyRO69 opened this issue Jan 23, 2022 · 0 comments
Open

Bored... Simple idiomatic MemDb #2

PyRO69 opened this issue Jan 23, 2022 · 0 comments
Assignees

Comments

@PyRO69
Copy link
Collaborator

PyRO69 commented Jan 23, 2022

#include <CSVReader.h> // From previous post.

namespace {
using namespace std;

class MemTable {
    using ColIdMap = vector<pair<string, int>>;
    using OrderedRows = map<string, vector<string>>;
    public:
    explicit MemTable(string tableName) : m_tblName(std::move(tableName)) {}
    explicit MemTable(string tableName, CSVReader& rdr) : m_tblName(std::move(tableName)) {
        bool firstRow = true;
        string pKey;
        for (auto& row : rdr) {
            int colId = -1;
            for (auto& col : row) {
                ++colId;
                if (firstRow) {
                    // First Row has Names.
                    // Construct Name -> Id mapping.
                    m_colIds.push_back({col, colId});
                    continue;
                }
                if (colId == 0) {
                    // Handle Pkey. Assuming Col0.
                    pKey = std::move(col);
                } else {
                    // Handle remaining cols .
                    m_data[pKey].push_back(col);
                }
            }
            // Mark first row processing done.
            firstRow = false;
        }
    }
    
    struct RowIter {
        struct ColIter {
            ColIter(RowIter& p, long id = -1) : pItr(p), colId(id) {}
            bool operator==(const ColIter& other) { return this == &other || colId == other.colId; }
            bool operator!=(const ColIter& other) { return !(*this == other); }
            ColIter& operator++() { inRange() ? ++colId : (colId = -1); return *this; }
            ColIter operator++(int) { auto t = *this; ++(*this); return t; }
            string operator*() { return pItr.getColVal(colId); }
            bool inRange() { return colId >= 0 && colId < pItr.numCols() - 1; }
            RowIter& pItr;
            long colId; 
        };
        explicit RowIter(OrderedRows::iterator&& itr) : ptr(itr) {}
        RowIter& operator*() { return *this; }
        RowIter& operator++() { ++ptr; return *this; }
        RowIter operator++(int) { auto t = *this; ++(*this); return t; }
        bool operator==(const RowIter& other) { return this == &other || ptr == other.ptr; }
        bool operator!=(const RowIter& other) { return !(*this == other); }
        ColIter begin() { return ColIter(*this, 0); }
        ColIter end() { return ColIter(*this); }
        int numCols() { return ptr->second.size() + 1; }
        string getColVal(size_t colId) { return colId == 0 ? ptr->first : ptr->second[colId - 1]; }
        OrderedRows::iterator ptr;
    };
    
    vector<RowIter> orderByColumn(const string& orderingCol) {
        auto colId = toColId(orderingCol);
        if (colId == -1)
            throw std::invalid_argument("Invalid ordering Col: " + orderingCol);
        vector<RowIter> res;
        res.reserve(m_data.size());
        for (auto r : *this) {
            res.push_back(std::move(r));    
        }
        sort(res.begin(), res.end(), [&](auto&& l, auto&& r) {
            return l.getColVal(colId) < r.getColVal(colId);
        });
        return res;
    }

    RowIter begin() { return RowIter(m_data.begin()); }
    RowIter end() { return RowIter(m_data.end()); }
    RowIter find(string k) { return RowIter(m_data.find(k)); }
    int toColId(const string& colName) { 
        auto itr = find_if(m_colIds.begin(), m_colIds.end(), [&colName] (auto& v) { return v.first == colName; }); 
        if(itr != m_colIds.end()) {
            return itr->second;
        }
        return -1;
    }
    map<string,long long> countBy(const string& colName) {
        auto colId = toColId(colName);
        if (colId == -1) {
            throw std::invalid_argument("Unknown column for table " + m_tblName);
        }
        map<string,long long> result;
        for (auto&& r : *this) {
            const auto& colToCount = r.getColVal(colId);
            cout << "Counting col " << colToCount << "\n";
            ++result[colToCount];
        }
        return result;
    }
    void printTable() {
        cout << "Table Name = [ " << m_tblName << " ]\n";
        for (auto& [colName, _] : m_colIds) {
            cout << " | " << colName << " | ";
        }
        std::cout << "\n";
        for (auto&& r : *this) {
            for (auto&& c : r) {
                std::cout << " | " << c << " | ";
            }
            std::cout << "\n";
        }
    }
    void extend(MemTable& data, bool selectedCols = false, vector<string> colsToSkip = {}) {
        if (!m_data.empty() && m_data.size() != data.size()) {
            throw std::invalid_argument("Not enough rows to match table " + m_tblName);
        }
        // Extend colNames.
        m_tblName += " + ";
        m_tblName += data.name();
        auto newColId = m_colIds.size();
        set<int> colIdsToSkip;
        for (auto& c : colsToSkip) {
            colIdsToSkip.insert(data.toColId(c));   
        }
        auto canSkipCol = [&](auto& colName) { return std::find(colsToSkip.begin(), colsToSkip.end(), colName) != colsToSkip.end(); };
        for (auto& cn : data.colNames()) {
            if (selectedCols && canSkipCol(cn)) continue;
            m_colIds.emplace_back(make_pair(std::move(cn), newColId++));
        }
        auto writer = m_data.begin();
        for (auto&& r : data) {
            for (int cid = 0; cid < data.begin().numCols(); ++cid) {
               if (colIdsToSkip.contains(cid)) continue;
               writer->second.push_back(r.getColVal(cid)); 
            }
            ++writer;
        }
    }
    void join(MemTable& joinTbl, const string& joinCol) {
        if (!m_data.empty() && m_data.size() != joinTbl.size()) {
            throw std::invalid_argument("Not enough rows to match table " + m_tblName);
        }
        // Extend colNames.
        m_tblName += " JOIN ";
        m_tblName += joinTbl.name();
        auto newColId = m_colIds.size();
        for (auto& cn : joinTbl.colNames()) {
            if (cn == joinCol) continue;
            m_colIds.emplace_back(make_pair(std::move(cn), newColId++));
        }
        int jColId = joinTbl.toColId(joinCol);
        map<string, RowIter> joinRefs;
        for (auto&& jr : joinTbl) {
            joinRefs.emplace(jr.getColVal(jColId), jr);
        }
        auto writer = m_data.begin();
        auto rowPtr = begin();
        for (; writer != m_data.end(); ++writer, ++rowPtr) {
            const auto& jkey = rowPtr.getColVal(jColId);
            auto jItr = joinRefs.find(jkey);
            if (jItr == joinRefs.end()) continue;
            for (int cid = 0; cid < joinTbl.begin().numCols(); ++cid) {
               if (cid == jColId) continue;
               auto& rdr = jItr->second;
               writer->second.push_back(rdr.getColVal(cid)); 
            }
        }
    }
    string& name() {
        return m_tblName;
    }
    size_t size() const {
        return m_data.size();
    }
    vector<string> colNames() {
        vector<string> res;
        res.reserve(m_colIds.size());
        transform(m_colIds.begin(), m_colIds.end(), back_inserter(res), [](const auto& p) { return p.first; });
        return res;
    }
private:
    string m_tblName;
    OrderedRows m_data;
    ColIdMap m_colIds;
};
    
}

int main()
{
    string testData = {"Id0, Id1,Id2\n1,2,3\n4,5,6\n7,8,9"};
    string testData2 = {"Id3, Id1,Id5\na,2,b\nc,5,d\ne,2,f"};
    stringstream ss(testData);
    stringstream ss2(testData2);
    CSVReader rdr2(ss2);
    CSVReader rdr(ss);
    MemTable tbl("Test", rdr);
    MemTable tbl2("Test2", rdr2);
    tbl2.join(tbl, "Id1");
    tbl2.printTable();
    for (auto&& [value, count] : tbl.countBy("Id1")) {
        cout << "CountOf " << value << "," << count << "\n";   
    }
    for (auto&& v : tbl.orderByColumn("Id1")) {
        for (auto&& c : v) {
            cout << "OrderedView (Id1) " << c << " ";   
        }
        cout << "\n";
    }
    tbl.printTable();
    auto itr = tbl.find("1");
    if (itr == tbl.end()) {
        cout << "No row with Pkey 2";   
    } else {
        cout << "Via getColVal " << itr.getColVal(0) << "\n";
        cout << "Num cols = " << itr.numCols() << "\n";
        for (auto&&c : itr)
            cout << c << ",";   
    }
    return 0;
}

Output

Table Name = [ Test2 JOIN Test ]
 | Id3 |  | Id1 |  | Id5 |  | Id0 |  | Id2 | 
 | a |  | 2 |  | b |  | 1 |  | 3 | 
 | c |  | 5 |  | d |  | 4 |  | 6 | 
 | e |  | 2 |  | f |  | 1 |  | 3 | 
Counting col 2
Counting col 5
Counting col 8
CountOf 2,1
CountOf 5,1
CountOf 8,1
OrderedView (Id1) 1 OrderedView (Id1) 2 OrderedView (Id1) 3 
OrderedView (Id1) 4 OrderedView (Id1) 5 OrderedView (Id1) 6 
OrderedView (Id1) 7 OrderedView (Id1) 8 OrderedView (Id1) 9 
Table Name = [ Test ]
 | Id0 |  | Id1 |  | Id2 | 
 | 1 |  | 2 |  | 3 | 
 | 4 |  | 5 |  | 6 | 
 | 7 |  | 8 |  | 9 | 
Via getColVal 1
Num cols = 3
1,2,3,
Ret: 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants