Skip to content

Commit

Permalink
Query API
Browse files Browse the repository at this point in the history
  • Loading branch information
programLyrique committed Jan 3, 2022
1 parent be50f98 commit 0be394d
Show file tree
Hide file tree
Showing 7 changed files with 22,224 additions and 2 deletions.
152 changes: 152 additions & 0 deletions src/description.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#ifndef SXPDB_DESCRIPTION_H
#define SXPDB_DESCRIPTION_H


#define R_NO_REMAP
#include <R.h>
#include <Rinternals.h>
#include "Rversion.h"

#include <optional>
#include <vector>
#include <cstdint>
#include <string>

#define UNIONTYPE 40

/**
* Description of a value (aka type...)
*
*/

template <typename T>
bool na_in(SEXP value, T check_na) {
int length = Rf_length(value);

for (int i = 0; i < length; ++i) {
if (check_na(value, i)) {
return true;
}
}
return false;
}

class Description {
public:
SEXPTYPE type = ANYSXP;
std::optional<bool> is_vector;
std::optional<bool> has_na;
std::optional<bool> has_attributes;
std::optional<bool> has_class;
std::optional<uint64_t> length;
std::optional<int> ndims; // 2 = matrix, otherwise = array
std::vector<std::string> class_names;
std::vector<Description> descriptions;// For union types, lists...
public:
Description() {}
Description(SEXPTYPE type_) : type(type_) {}

void relax_na() {has_na.reset();}
void relax_vector() {is_vector.reset();}
void relax_length() {length.reset();}
void relax_attributes() {has_attributes.reset(); }
void relax_ndims() {ndims.reset(); }
void relax_class() {has_class.reset(); class_names.clear();}
void relax_type() {type = ANYSXP; }

// returns the closest description of the SEXP
// We may relax it later on
inline static const Description description_from_value(SEXP val) {
Description d(TYPEOF(val));

d.length = Rf_length(val);

d.is_vector = *d.length != 1 && d.type != ENVSXP && d.type != LISTSXP;

d.has_attributes = Rf_length(ATTRIB(val)) > 0;

SEXP klass = Rf_getAttrib(val, R_ClassSymbol);
d.has_class = klass != R_NilValue;

if(d.has_class && TYPEOF(klass) == STRSXP) {
for (int index = 0; index < Rf_length(klass); index++) {
d.class_names.push_back(CHAR(STRING_ELT(klass, index)));
}
}

SEXP dim = Rf_getAttrib(val, R_DimSymbol);
d.ndims = Rf_length(dim);

//NA
switch(d.type) {
case STRSXP:
d.has_na = na_in(val, [](SEXP vector, int index) -> bool {
return STRING_ELT(vector, index) == NA_STRING;
});
break;
case CPLXSXP:
d.has_na = na_in(val, [](SEXP vector, int index) -> bool {
Rcomplex v = COMPLEX_ELT(vector, index);
return (ISNAN(v.r) || ISNAN(v.i));
});
break;
case REALSXP:
d.has_na = na_in(val, [](SEXP vector, int index) -> bool {
return ISNAN(REAL_ELT(vector, index));
});
break;
case LGLSXP:
d.has_na = na_in(val, [](SEXP vector, int index) -> bool {
return LOGICAL_ELT(vector, index) == NA_LOGICAL;
});
break;
case INTSXP:
d.has_na = na_in(val, [](SEXP vector, int index) -> bool {
return INTEGER_ELT(vector, index) == NA_INTEGER;
});
break;
default:
d.has_na = false;
}

if(d.type == VECSXP) {
for(int index = 0; index < d.length ; index++) {
d.descriptions.push_back(description_from_value(VECTOR_ELT(val, index)));
}
}

return d;
}

inline static const Description union_description(const Description& d1, const Description& d2) {
Description d(UNIONTYPE);

// unify if it is the same for both, otherwise, make non defined
if(d1.is_vector && d2.is_vector && *d1.is_vector == *d2.is_vector) {
d.is_vector = d1.is_vector;
}
if(d1.has_na && d2.has_na && *d1.has_na == *d2.has_na) {
d.has_na = d1.has_na;
}
if(d1.has_attributes && d2.has_attributes && *d1.has_attributes == *d2.has_attributes) {
d.has_attributes = d1.has_attributes;
}
if(d1.has_class && d2.has_class && *d1.has_class == *d2.has_class) {
d.has_class = d1.has_class;
//we do not unify here the class names
}
if(d1.length && d2.length && *d1.length == *d2.length) {
d.length = d1.length;
}
if(d1.ndims && d2.ndims && *d1.ndims == *d1.ndims) {
d.ndims = d1.ndims;
}

d.descriptions = {d1, d2}; // does it work even if d1 or d2 is already an union?

return d;
}
};


#endif
76 changes: 76 additions & 0 deletions src/global_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,82 @@ SEXP GlobalStore::sample_value() {
return get_value(dist(rand_engine));
}

SEXP GlobalStore::sample_value(const Description& d) {
roaring::Roaring64Map index;
if(d.type != UNIONTYPE) {
index |= types_index[d.type];
}
else if(d.type == UNIONTYPE) {
for(auto& desc : d.descriptions) {
assert(desc.type != UNIONTYPE);
index |= types_index[desc.type];
}
}

if(d.has_class && d.has_class.value()) {
index &= class_index;
}
else if(d.has_class && !d.has_class.value()) {
roaring::Roaring64Map nonclass = class_index;
nonclass.flip(std::max(index.minimum(), nonclass.minimum()), std::min(nonclass.maximum(), index.maximum()));

index &= nonclass;
}

if(d.has_attributes && d.has_attributes.value()) {
index &= attributes_index;
}
else if(d.has_attributes && !d.has_attributes.value()) {
roaring::Roaring64Map nonattributes = attributes_index;
nonattributes.flip(std::max(index.minimum(), nonattributes.minimum()), std::min(nonattributes.maximum(), index.maximum()));
index &= nonattributes;
}

if(d.is_vector && d.is_vector.value()) {
index &= vector_index;
}
else if(d.is_vector && !d.is_vector.value()) {
roaring::Roaring64Map nonvector = vector_index;
nonvector.flip(std::max(index.minimum(), nonvector.minimum()), std::min(nonvector.maximum(), index.maximum()));
index &= nonvector;
}

if(d.has_na && d.has_na.value()) {
index &= na_index;
}
else if(d.has_na && !d.has_na.value()) {
roaring::Roaring64Map nonna = na_index;
nonna.flip(std::max(index.minimum(), nonna.minimum()), std::min(nonna.maximum(), index.maximum()));
index &= nonna;
}

//TODO: length, class names, n dimensions

//TODO: better API: rather generate a sampler, which will hold the unioned/intersected indexes already, so as not to renegenerate it after
// each request
// Then we can optimize and shrink to fit

std::uniform_int_distribution<uint64_t> dist(0, index.cardinality() - 1);

uint64_t element;

bool res = index.select(dist(rand_engine), &element);
assert(res);

return get_value(element);
}

void GlobalStore::build_indexes() {
// TODO: we should really break the current class hierarchy architecture...
// DefaultStore and GenericStore should simply be tables

//TODO: use a build index function from the generic store...

// we should have ANYSXP which should be an index of all the database

index_generated = true;
}


void GlobalStore::write_configuration() {
CSVFile file;
Expand Down
15 changes: 15 additions & 0 deletions src/global_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
#include "default_store.h"
#include "generic_store.h"
#include "source_ref.h"
#include "roaring.hh"
#include "description.h"

#include <vector>
#include <memory>
#include <random>
#include <unistd.h>



class GlobalStore : Store {
private:
// Stores the names of the various stores, their types, their number of values
Expand All @@ -38,6 +41,14 @@ class GlobalStore : Store {

std::shared_ptr<SourceRefs> src_refs;

//Indexes
bool index_generated = false;
std::vector<roaring::Roaring64Map> types_index;//the index in the vector is the type (from TYPEOF())
roaring::Roaring64Map na_index;//has at least one NA In the vector
roaring::Roaring64Map class_index;//has a class a attribute
roaring::Roaring64Map vector_index;//vector (but not scalar)
roaring::Roaring64Map attributes_index;


public:
GlobalStore(const std::string& description_name, bool _quiet);
Expand Down Expand Up @@ -70,6 +81,10 @@ class GlobalStore : Store {

virtual SEXP sample_value();

virtual SEXP sample_value(const Description& description);

virtual void build_indexes();

virtual bool add_origins(const sexp_hash& hash, const std::string& pkg_name, const std::string& func_name, const std::string& arg_name);

const std::vector<std::tuple<const std::string, const std::string, const std::string>> source_locations(const sexp_hash& key) const {
Expand Down
Loading

0 comments on commit 0be394d

Please sign in to comment.