Skip to content

Commit

Permalink
Merge pull request hyrise#234 from hyrise/feature/intersect_positions
Browse files Browse the repository at this point in the history
Fixes bug in IntersectPositions and adds functionality to intersect multiple PCs
  • Loading branch information
bastih committed Nov 22, 2013
2 parents 97f60d5 + 11b032a commit 9e8decd
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 16 deletions.
24 changes: 14 additions & 10 deletions src/lib/access/IntersectPositions.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
// Copyright (c) 2012 Hasso-Plattner-Institut fuer Softwaresystemtechnik GmbH. All rights reserved.
#include "access/IntersectPositions.h"

#include "storage/PointerCalculator.h"

#include <algorithm>

namespace hyrise {
namespace access {

Expand All @@ -13,15 +14,18 @@ std::shared_ptr<PlanOperation> IntersectPositions::parse(const Json::Value&) {
}

void IntersectPositions::executePlanOperation() {
const auto& pc1 = std::dynamic_pointer_cast<const PointerCalculator>(getInputTable(0));
const auto& pc2 = std::dynamic_pointer_cast<const PointerCalculator>(getInputTable(1));

if (pc1 == nullptr) { throw std::runtime_error("Passed input 0 is not a PC!"); }
if (pc2 == nullptr) { throw std::runtime_error("Passed input 1 is not a PC!"); }

auto pc3 = pc1->intersect(pc2);

addResult(pc3);
const auto& tables = input.getTables();
std::vector<std::shared_ptr<const PointerCalculator>> pcs(tables.size());
std::transform(begin(tables), end(tables),
begin(pcs),
[] (decltype(*begin(tables)) table) {
return std::dynamic_pointer_cast<const PointerCalculator>(table);
});
if (std::all_of(begin(pcs), end(pcs), [] (decltype(*begin(tables)) pc) { return pc != nullptr; })) {
addResult(PointerCalculator::intersect_many(begin(pcs), end(pcs)));
} else {
throw std::runtime_error(_planOperationName + " is only supported for PointerCalculators (IntersectPositions.cpp)");
}
}

const std::string IntersectPositions::vname() {
Expand Down
79 changes: 79 additions & 0 deletions src/lib/helper/PositionsIntersect.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (c) 2012 Hasso-Plattner-Institut fuer Softwaresystemtechnik GmbH. All rights reserved.
#ifndef SRC_LIB_HELPER_POSITIONSINTERESECT_H_
#define SRC_LIB_HELPER_POSITIONSINTERESECT_H_

#include <algorithm>
#include <iterator>
// produces sorted intersection of two pos lists. based on
// baeza-yates algorithm with average complexity nicely
// adapting to the smaller list size, whereas std::set_intersection
// iterates over both lists with linear complexity.
template <typename IterT, typename OutputIter>
void intersect_pos_list(IterT beg1, IterT end1, IterT beg2, IterT end2, OutputIter resultIter, bool first_sorted=true, bool second_sorted=true)
{
std::vector<typename std::iterator_traits<IterT>::value_type> input1_sorted, input2_sorted;

auto size_1 = std::distance(beg1, end1);
auto size_2 = std::distance(beg2, end2);

if (!first_sorted) {
// copy input 1 and sort it
input1_sorted.reserve(size_1);
input1_sorted.insert(input1_sorted.end(), beg1, end1);
std::sort(input1_sorted.begin(), input1_sorted.end());
beg1 = input1_sorted.begin();
end1 = input1_sorted.end();
}
if (!second_sorted) {
// copy input 2 and sort it
input2_sorted.reserve(size_2);
input2_sorted.insert(input2_sorted.end(), beg2, end2);
std::sort(input2_sorted.begin(), input2_sorted.end());
beg2 = input2_sorted.begin();
end2 = input2_sorted.end();
}

// if one of the inputs is empty,
// return as intersect is empty
if ((size_1 <= 0) or (size_2 <= 0)) return;

// if input 1 and input 2 do not overlap at all,
// return as intersect is empty
if ((*(end1 - 1) < *beg2) or (*(end2 - 1) < *beg1)) return;

// if both lists are very small,
// use std intersection as iterating is faster than binary search
if ((size_1+size_2) < 20) {
std::set_intersection(beg1, end1, beg2, end2, resultIter);
return;
}

// make sure input 1 is larger than input 2
if (size_1 < size_2) {
std::swap(end1, end2);
std::swap(beg1, beg2);
std::swap(size_1, size_2);
}

// find overlap by searching in smaller input (input 2)
beg2 = std::lower_bound(beg2, end2, *beg1);
end2 = std::upper_bound(beg2, end2, *(end1 - 1));
size_2 = std::distance(beg2, end2);

// search median of input 2 in input 1
// effectively dividing larger input in two
auto m = beg2 + (size_2 / 2);
auto m_in_1 = std::lower_bound(beg1, end1, *m);

// and recursively do the rest
if (*m_in_1 == *m) {
intersect_pos_list(beg1, m_in_1, beg2, m, resultIter);
*resultIter++ = *m; // add m to result iterator
intersect_pos_list(m_in_1+1, end1, m+1, end2, resultIter);
} else {
intersect_pos_list(beg1, m_in_1, beg2, m, resultIter);
intersect_pos_list(m_in_1, end1, m+1, end2, resultIter);
}
}

#endif
30 changes: 24 additions & 6 deletions src/lib/storage/PointerCalculator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <unordered_set>

#include "helper/checked_cast.h"
#include "helper/PositionsIntersect.h"

#include "storage/PrettyPrinter.h"
#include "storage/Store.h"
Expand Down Expand Up @@ -317,25 +318,42 @@ hyrise::storage::atable_ptr_t PointerCalculator::copy_structure(const field_list
}

std::shared_ptr<PointerCalculator> PointerCalculator::intersect(const std::shared_ptr<const PointerCalculator>& other) const {
pos_list_t *result = new pos_list_t(std::max(pos_list->size(), other->pos_list->size()));
pos_list_t *result = new pos_list_t();
result->reserve(std::max(pos_list->size(), other->pos_list->size()));
assert(std::is_sorted(begin(*pos_list), end(*pos_list)) && std::is_sorted(begin(*other->pos_list), end(*other->pos_list)) && "Both lists have to be sorted");
std::set_intersection(pos_list->begin(), pos_list->end(),
other->pos_list->begin(), other->pos_list->end(),
std::back_inserter(*result));

intersect_pos_list(
pos_list->begin(), pos_list->end(),
other->pos_list->begin(), other->pos_list->end(),
std::back_inserter(*result));

assert((other->table == this->table) && "Should point to same table");
return create(table, result, fields);
}


bool PointerCalculator::isSmaller( std::shared_ptr<const PointerCalculator> lx, std::shared_ptr<const PointerCalculator> rx ) {
return lx->size() < rx->size() ;
}

std::shared_ptr<const PointerCalculator> PointerCalculator::intersect_many(pc_vector::iterator it, pc_vector::iterator it_end) {
std::sort(it, it_end, PointerCalculator::isSmaller);
std::shared_ptr<const PointerCalculator> base = *(it++);
for (;it != it_end; ++it) {
base = base->intersect(*it);
}
return base;
}

std::shared_ptr<PointerCalculator> PointerCalculator::unite(const std::shared_ptr<const PointerCalculator>& other) const {
assert((other->table == this->table) && "Should point to same table");
if (pos_list && other->pos_list) {
auto result = new pos_list_t(std::max(pos_list->size(), other->pos_list->size()));
auto result = new pos_list_t();
result->reserve(pos_list->size() + other->pos_list->size());
assert(std::is_sorted(begin(*pos_list), end(*pos_list)) && std::is_sorted(begin(*other->pos_list), end(*other->pos_list)) && "Both lists have to be sorted");
std::set_union(pos_list->begin(), pos_list->end(),
other->pos_list->begin(), other->pos_list->end(),
result->begin());
std::back_inserter(*result));
return create(table, result, copy_vec(fields));
} else {
pos_list_t* positions = nullptr;
Expand Down
3 changes: 3 additions & 0 deletions src/lib/storage/PointerCalculator.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
class PointerCalculator : public AbstractTable,
public SharedFactory<PointerCalculator> {
public:

PointerCalculator(hyrise::storage::c_atable_ptr_t t, pos_list_t *pos = nullptr, field_list_t *f = nullptr);
PointerCalculator(const PointerCalculator& other);

Expand All @@ -27,7 +28,9 @@ class PointerCalculator : public AbstractTable,

typedef std::vector<std::shared_ptr<const PointerCalculator> > pc_vector;
static std::shared_ptr<const PointerCalculator> unite_many(pc_vector::const_iterator it, pc_vector::const_iterator it_end);
static std::shared_ptr<const PointerCalculator> intersect_many(pc_vector::iterator it, pc_vector::iterator it_end);
static std::shared_ptr<PointerCalculator> concatenate_many(pc_vector::const_iterator it, pc_vector::const_iterator it_end);
static bool isSmaller( std::shared_ptr<const PointerCalculator> lx, std::shared_ptr<const PointerCalculator> rx );

const pos_list_t *getPositions() const;
pos_list_t getActualTablePositions() const;
Expand Down
50 changes: 50 additions & 0 deletions test/autojson/intersect.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"operators": {
"-1": {
"type": "TableLoad",
"table": "reference",
"filename": "tables/employees_idx.tbl"
},

"0": {
"type": "TableLoad",
"table": "employees",
"filename": "tables/employees.tbl"
},

"scan_gt0" : {
"type" : "SimpleTableScan",
"predicates" : [
{"type": "GT", "in": 0, "f": "employee_company_id", "vtype": 0, "value":0}
]
},

"scan_gt2" : {
"type" : "SimpleTableScan",
"predicates" : [
{"type": "GT", "in": 0, "f": "employee_company_id", "vtype": 0, "value":2}
]
},

"scan_eq3" : {
"type" : "SimpleTableScan",
"predicates" : [
{"type": "EQ", "in": 0, "f": "employee_company_id", "vtype": 0, "value":3}
]
},

"intersect": {
"type": "IntersectPositions"
}

},
"edges": [
["0", "scan_gt0"],
["0", "scan_gt2"],
["0", "scan_eq3"],

["scan_gt0", "intersect"],
["scan_gt2", "intersect"],
["scan_eq3", "intersect"]
]
}
52 changes: 52 additions & 0 deletions test/autojson/union.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"operators": {
"-1": {
"type": "TableLoad",
"table": "reference",
"filename": "tables/employees.tbl"
},

"0": {
"type": "TableLoad",
"table": "employees",
"filename": "tables/employees.tbl"
},

"scan_gt0" : {
"type" : "SimpleTableScan",
"predicates" : [
{"type": "AND"},
{"type": "GT", "in": 0, "f": "employee_company_id", "vtype": 0, "value":0},
{"type": "LT", "in": 0, "f": "employee_company_id", "vtype": 0, "value":3}
]
},

"scan_eq3" : {
"type" : "SimpleTableScan",
"predicates" : [
{"type": "EQ", "in": 0, "f": "employee_company_id", "vtype": 0, "value":3}
]
},

"scan_eq4" : {
"type" : "SimpleTableScan",
"predicates" : [
{"type": "EQ", "in": 0, "f": "employee_company_id", "vtype": 0, "value":4}
]
},

"intersect": {
"type": "UnionScan"
}

},
"edges": [
["0", "scan_gt0"],
["0", "scan_eq3"],
["0", "scan_eq4"],

["scan_gt0", "intersect"],
["scan_eq3", "intersect"],
["scan_eq4", "intersect"]
]
}

0 comments on commit 9e8decd

Please sign in to comment.