diff --git a/CHANGES b/CHANGES index 0f86c2b..e7c59cb 100644 --- a/CHANGES +++ b/CHANGES @@ -24,7 +24,7 @@ Initial version 3. Support disk mode 4. Support RLE compression ---- Release version 1.04 (??.01.2014) ------------------------------------- +--- Release version 1.04 (05.03.2014) ------------------------------------- 1. Add cs_trend function 2. Fix hang in case of calculating aggregates for unbounded sequence 3. Fix incorrect construction of string constants @@ -32,5 +32,5 @@ Initial version 5. Fix bug in parallel executor causing crash in case of empty result 6. Add TABLE_is_loaded() function 7. Fix IMCS context cleanup bug in case of nested SPI calls - +8. Add limit parameter to XXX_get() function diff --git a/META.json b/META.json index 10b90b4..e583fb2 100644 --- a/META.json +++ b/META.json @@ -2,7 +2,7 @@ "name": "imcs", "abstract": "In-Memory Columnar Store", "description": "IMCS provides columnar (vertical) store for PostgreSQL. It allows to reach 10-100 advantages in performance because of avoiding disk and MVCC overhead, vector operations, parallel execution and data skipping. IMCS providers a wide range of analytic operators.", - "version": "0.1.3", + "version": "0.1.4", "maintainer": ["Konstantin Kninzhik "], "license": { "PostgreSQL": "http://www.postgresql.org/about/licence" @@ -23,7 +23,7 @@ "imcs": { "file": "imcs--1.1.sql", "docfile": "user_guide.html", - "version": "0.1.3", + "version": "0.1.4", "abstract": "In-Memory Columnar Store" } }, diff --git a/Makefile b/Makefile index a32113d..79b5d1e 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ MODULE_big = imcs CUSTOM_COPT = -O3 -Wall -pthread +IMCS_VERSION=1.04 ifdef USE_DISK OBJS = imcs.o func.o smp.o btree.o threadpool.o fileio.o disk.o @@ -42,4 +43,4 @@ endif distrib: rm -f *.o rm -rf results/ regression.diffs regression.out tmp_check/ log/ - cd .. ; tar --exclude=.svn -chvzf imcs-1.03.tar.gz imcs \ No newline at end of file + cd .. ; tar --exclude=.svn -chvzf imcs-$(IMCS_VERSION).tar.gz imcs \ No newline at end of file diff --git a/btree.c b/btree.c index a5bde1e..7cceffd 100644 --- a/btree.c +++ b/btree.c @@ -496,7 +496,7 @@ bool imcs_search_page_##TYPE(imcs_page_t* pg, imcs_iterator_h iterator, TYPE val return found; \ } \ \ -imcs_iterator_h imcs_search_##TYPE(imcs_timeseries_t* ts, TYPE low, imcs_boundary_kind_t low_boundary, TYPE high, imcs_boundary_kind_t high_boundary) \ +imcs_iterator_h imcs_search_##TYPE(imcs_timeseries_t* ts, TYPE low, imcs_boundary_kind_t low_boundary, TYPE high, imcs_boundary_kind_t high_boundary, imcs_count_t limit) \ { \ imcs_iterator_h iterator = NULL; \ if (ts->root_page != NULL) { \ @@ -524,6 +524,13 @@ imcs_iterator_h imcs_search_##TYPE(imcs_timeseries_t* ts, TYPE low, imcs_boundar iterator->next_pos = 0; \ if (imcs_search_page_##TYPE(ts->root_page, iterator, low, low_boundary, 0)) { \ if (iterator->next_pos <= iterator->last_pos) { \ + if (limit != 0 && iterator->next_pos + limit <= iterator->last_pos) { \ + if (low_boundary == BOUNDARY_OPEN) { \ + iterator->next_pos = iterator->last_pos - limit + 1; \ + } else { \ + iterator->last_pos = iterator->next_pos + limit - 1; \ + } \ + } \ iterator->first_pos = iterator->next_pos; \ return iterator; \ } \ diff --git a/btree.h b/btree.h index fe3ad2d..4a42151 100644 --- a/btree.h +++ b/btree.h @@ -71,7 +71,7 @@ extern imcs_count_t imcs_delete_all(imcs_timeseries_t* ts); extern void imcs_append_##TYPE(imcs_timeseries_t* ts, TYPE val); \ extern bool imcs_first_##TYPE(imcs_timeseries_t* ts, TYPE* val); \ extern bool imcs_last_##TYPE(imcs_timeseries_t* ts, TYPE* val); \ - extern imcs_iterator_h imcs_search_##TYPE(imcs_timeseries_t* ts, TYPE low, imcs_boundary_kind_t low_boundary, TYPE high, imcs_boundary_kind_t high_boundary); \ + extern imcs_iterator_h imcs_search_##TYPE(imcs_timeseries_t* ts, TYPE low, imcs_boundary_kind_t low_boundary, TYPE high, imcs_boundary_kind_t high_boundary, imcs_count_t limit); \ extern bool imcs_search_page_##TYPE(imcs_page_t* root, imcs_iterator_h iterator, TYPE val, imcs_boundary_kind_t boundary, int level) IMCS_BTREE_METHODS(int8); diff --git a/imcs--1.1.sql b/imcs--1.1.sql index 2a89f67..8b19269 100644 --- a/imcs--1.1.sql +++ b/imcs--1.1.sql @@ -129,8 +129,8 @@ begin if (timeseries_id is not null) then create_drop_func := create_drop_func||' - drop function '||table_name||'_get('||id_type||','||timestamp_type||','||timestamp_type||'); - drop function '||table_name||'_get('||id_type||'[],'||timestamp_type||','||timestamp_type||'); + drop function '||table_name||'_get('||id_type||','||timestamp_type||','||timestamp_type||',bigint); + drop function '||table_name||'_get('||id_type||'[],'||timestamp_type||','||timestamp_type||',bigint); drop function '||table_name||'_span('||id_type||',bigint,bigint); drop function '||table_name||'_span('||id_type||'[],bigint,bigint); drop function '||table_name||'_concat('||id_type||'[],'||timestamp_type||','||timestamp_type||'); @@ -142,7 +142,7 @@ begin drop function '||table_name||'_count('||id_type||');'; else create_drop_func := create_drop_func||' - drop function '||table_name||'_get('||timestamp_type||','||timestamp_type||'); + drop function '||table_name||'_get('||timestamp_type||','||timestamp_type||',bigint); drop function '||table_name||'_span(bigint,bigint); drop function '||table_name||'_delete('||timestamp_type||'); drop function '||table_name||'_delete('||timestamp_type||','||timestamp_type||'); @@ -201,14 +201,14 @@ begin n := n + 1;'; if (timeseries_id is not null) then - create_getall_func := 'create function '||table_name||'_get(ids '||id_type||'[],from_ts '||timestamp_type||' default null,till_ts '||timestamp_type||' default null) + create_getall_func := 'create function '||table_name||'_get(ids '||id_type||'[],from_ts '||timestamp_type||' default null,till_ts '||timestamp_type||' default null, limit_ts bigint default null) returns setof '||table_name||'_timeseries as $$ declare id '||id_type||'; ts '||table_name||'_timeseries; begin foreach id in array ids loop - ts:='||table_name||'_get(id,from_ts,till_ts); + ts:='||table_name||'_get(id,from_ts,till_ts,limit_ts); return next ts; end loop; return; @@ -247,7 +247,7 @@ begin if (timeseries_id is not null) then create_get_func := create_get_func||timeseries_id||' '||id_type||', '; end if; - create_get_func := create_get_func||'from_ts '||timestamp_type||' default null, till_ts '||timestamp_type||' default null) + create_get_func := create_get_func||'from_ts '||timestamp_type||' default null, till_ts '||timestamp_type||' default null, limit_ts bigint default null) returns '||table_name||'_timeseries as $$ declare result '||table_name||'_timeseries; @@ -259,7 +259,7 @@ begin else create_get_func := create_get_func||''''||table_name||'-'||timestamp_id||''''; end if; - create_get_func := create_get_func||',from_ts,till_ts,'||timestamp_tid||'); + create_get_func := create_get_func||',from_ts,till_ts,'||timestamp_tid||',limit_ts); if (search_result is null) then return null; end if; @@ -432,15 +432,15 @@ create function columnar_store_append_bpchar(cs_id cstring, val text, field_type create function columnar_store_append_varchar(cs_id cstring, val text, field_type integer, is_timestamp bool, field_size integer) returns void as 'MODULE_PATHNAME','columnar_store_append_char' language C strict; -create function columnar_store_search_char(cs_id cstring, from_ts "char", till_ts "char", field_type integer) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int8' language C stable; -create function columnar_store_search_int2(cs_id cstring, from_ts int2, till_ts int2, field_type integer) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int16' language C stable; -create function columnar_store_search_int4(cs_id cstring, from_ts int4, till_ts int4, field_type integer) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int32' language C stable; -create function columnar_store_search_int8(cs_id cstring, from_ts int8, till_ts int8, field_type integer) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int64' language C stable; -create function columnar_store_search_date(cs_id cstring, from_ts date, till_ts date, field_type integer) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int32' language C stable; -create function columnar_store_search_time(cs_id cstring, from_ts time, till_ts time, field_type integer) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int64' language C stable; -create function columnar_store_search_timestamp(cs_id cstring, from_ts timestamp, till_ts timestamp, field_type integer) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int64' language C stable; -create function columnar_store_search_float4(cs_id cstring, from_ts float4, till_ts float4, field_type integer) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_float' language C stable; -create function columnar_store_search_float8(cs_id cstring, from_ts float8, till_ts float8, field_type integer) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_double' language C stable; +create function columnar_store_search_char(cs_id cstring, from_ts "char", till_ts "char", field_type integer, limit_ts bigint default null) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int8' language C stable; +create function columnar_store_search_int2(cs_id cstring, from_ts int2, till_ts int2, field_type integer, limit_ts bigint default null) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int16' language C stable; +create function columnar_store_search_int4(cs_id cstring, from_ts int4, till_ts int4, field_type integer, limit_ts bigint default null) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int32' language C stable; +create function columnar_store_search_int8(cs_id cstring, from_ts int8, till_ts int8, field_type integer, limit_ts bigint default null) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int64' language C stable; +create function columnar_store_search_date(cs_id cstring, from_ts date, till_ts date, field_type integer, limit_ts bigint default null) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int32' language C stable; +create function columnar_store_search_time(cs_id cstring, from_ts time, till_ts time, field_type integer, limit_ts bigint default null) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int64' language C stable; +create function columnar_store_search_timestamp(cs_id cstring, from_ts timestamp, till_ts timestamp, field_type integer, limit_ts bigint default null) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_int64' language C stable; +create function columnar_store_search_float4(cs_id cstring, from_ts float4, till_ts float4, field_type integer, limit_ts bigint default null) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_float' language C stable; +create function columnar_store_search_float8(cs_id cstring, from_ts float8, till_ts float8, field_type integer, limit_ts bigint default null) returns timeseries as 'MODULE_PATHNAME','columnar_store_search_double' language C stable; create function columnar_store_first_char(id cstring, field_type integer, field_size integer) returns "char" as 'MODULE_PATHNAME','columnar_store_first_int8' language C strict stable; create function columnar_store_first_int2(id cstring, field_type integer, field_size integer) returns int2 as 'MODULE_PATHNAME','columnar_store_first_int16' language C strict stable; @@ -827,10 +827,10 @@ create function cs_abs(timeseries) returns timeseries as 'MODULE_PATHNAME' langu create operator @ (rightarg=timeseries, procedure=cs_abs); create function cs_limit(timeseries, from_pos bigint default 0, till_pos bigint default 9223372036854775807) returns timeseries as 'MODULE_PATHNAME' language C stable strict; -create function cs_head(ts timeseries, n bigint) returns timeseries as $$ begin return cs_limit(ts, 0, n-1); end; $$ language plpgsql stable strict; -create function cs_tail(ts timeseries, n bigint) returns timeseries as $$ begin return cs_limit(ts, -n); end; $$ language plpgsql stable strict; -create function cs_cut_head(ts timeseries, n bigint) returns timeseries as $$ begin return cs_limit(ts, n); end; $$ language plpgsql stable strict; -create function cs_cut_tail(ts timeseries, n bigint) returns timeseries as $$ begin return cs_limit(ts, 0, -n-1); end; $$ language plpgsql stable strict; +create function cs_head(ts timeseries, n bigint default 1) returns timeseries as $$ begin return cs_limit(ts, 0, n-1); end; $$ language plpgsql stable strict; +create function cs_tail(ts timeseries, n bigint default 1) returns timeseries as $$ begin return cs_limit(ts, -n); end; $$ language plpgsql stable strict; +create function cs_cut_head(ts timeseries, n bigint default 1) returns timeseries as $$ begin return cs_limit(ts, n); end; $$ language plpgsql stable strict; +create function cs_cut_tail(ts timeseries, n bigint default 1) returns timeseries as $$ begin return cs_limit(ts, 0, -n-1); end; $$ language plpgsql stable strict; create operator << (leftarg=timeseries, rightarg=bigint, procedure=cs_cut_head); create operator >> (leftarg=timeseries, rightarg=bigint, procedure=cs_cut_tail); diff --git a/imcs.c b/imcs.c index 417b061..8aea942 100644 --- a/imcs.c +++ b/imcs.c @@ -1949,6 +1949,7 @@ Datum columnar_store_search_##TYPE(PG_FUNCTION_ARGS) \ TYPE low = 0, high = 0; \ imcs_boundary_kind_t low_boundary = BOUNDARY_OPEN; \ imcs_boundary_kind_t high_boundary = BOUNDARY_OPEN; \ + imcs_count_t limit = PG_ARGISNULL(4) ? 0 : PG_GETARG_INT64(4); \ if (ts == NULL) { \ PG_RETURN_NULL(); \ } \ @@ -1960,7 +1961,7 @@ Datum columnar_store_search_##TYPE(PG_FUNCTION_ARGS) \ high = PG_GETARG_##PG_TYPE(2); \ high_boundary = BOUNDARY_INCLUSIVE; \ } \ - result = imcs_search_##TYPE(ts, low, low_boundary, high, high_boundary); \ + result = imcs_search_##TYPE(ts, low, low_boundary, high, high_boundary, limit); \ if (result == NULL) { \ PG_RETURN_NULL(); \ } else { \ diff --git a/user_guide.html b/user_guide.html index 7245adb..188d6d7 100644 --- a/user_guide.html +++ b/user_guide.html @@ -245,8 +245,8 @@

Generated data access functions for single timeseriesReturns number of elements in timeseries. -function TABLE_get(low TIMESTAMP_TYPE default null, high TIMESTAMP_TYPE default null) returns TABLE_timeseries -Returns vertical representation of the whole table or its time slice. Returned record contains the same columns as record of the original table, but them have timeseries type instead of original scalar types. These columns can be used in timeseries functions (cs_*). If high or low parameters are not null, then them specify correspondingly upper/lower inclusive boundary for timestamp value. If some or both parameters are omitted, then corresponding boundary is open. +function TABLE_get(low TIMESTAMP_TYPE default null, high TIMESTAMP_TYPE default null, limit_ts bigint default null) returns TABLE_timeseries +Returns vertical representation of the whole table or its time slice. Returned record contains the same columns as record of the original table, but them have timeseries type instead of original scalar types. These columns can be used in timeseries functions (cs_*). If high or low parameters are not null, then them specify correspondingly upper/lower inclusive boundary for timestamp value. If some or both parameters are omitted, then corresponding boundary is open. It is possible to limit number of selected elements by specifying limit_ts parameter (if low boundary is open then last limit_ts elements will be selected, otherwise first limit_ts elements will be selected). function TABLE_span(from_pos bigint default 0, till_pos bigint default 9223372036854775807) returns TABLE_timeseries @@ -296,11 +296,11 @@

Generated data access functions for multiple timeseries ( Returns number of elements in timeseries. -function TABLE_get(id TIMESERIES_ID_TYPE, TIMESTAMP_TYPE low default null, TIMESTAMP_TYPE high default null) returns TABLE_timeseries -Returns timeseries with specified identifier for the corresponding table or its time slice. Returned record contains the same columns as record of original table, but them have timeseries type instead of original scalar types. These columns can be used in timeseries functions (cs_*). If high or low parameters are not null, then them specify correspondingly upper/lower inclusive boundary for timestamp value. If some or both parameters are omitted, then corresponding boundary is open. +function TABLE_get(id TIMESERIES_ID_TYPE, TIMESTAMP_TYPE low default null, TIMESTAMP_TYPE high default null, limit_ts bigint default null) returns TABLE_timeseries +Returns timeseries with specified identifier for the corresponding table or its time slice. Returned record contains the same columns as record of original table, but them have timeseries type instead of original scalar types. These columns can be used in timeseries functions (cs_*). If high or low parameters are not null, then them specify correspondingly upper/lower inclusive boundary for timestamp value. If some or both parameters are omitted, then corresponding boundary is open. It is possible to limit number of selected elements by specifying limit_ts parameter (if low boundary is open then last limit_ts elements will be selected, otherwise first limit_ts elements will be selected). -function TABLE_get(id TIMESERIES_ID_TYPE[], TIMESTAMP_TYPE low default null, TIMESTAMP_TYPE high default null) returns setof TABLE_timeseries +function TABLE_get(id TIMESERIES_ID_TYPE[], TIMESTAMP_TYPE low default null, TIMESTAMP_TYPE high default null, limit_ts bigint default null) returns setof TABLE_timeseries Does the same as function described above but for array of timeseries identifiers. For each timeseries identifier this function returns TABLE_timeseries record, so output will contain as much rows as there are identifiers. @@ -658,19 +658,19 @@

Timeseries transformation functions

Extracts subsequence from timeseries. Parameter from_pos specifies start position of subsequence (inclusive) and parameter till_pos specifies end position (inclusive). If till_pos parameter is missed, then subsequence spans till end of timeseries. Values of both from_pos and till_pos parameters can be negative. In this case position is calculated from end of timeseries, i.e. cs_limit(s, from_pos:=-1) extracts last element of the timeseries. -function cs_head(timeseries, n bigint) returns timeseries +function cs_head(timeseries, n bigint default 1) returns timeseries Extracts n first elements of timeseries. This function is equivalent to cs_limit(0, n-1). -function cs_tail(timeseries, n bigint) returns timeseries +function cs_tail(timeseries, n bigint default 1) returns timeseries Extracts n last elements of timeseries. This function is equivalent to cs_limit(-n). -function cs_cut_head(timeseries, n bigint) returns timeseries +function cs_cut_head(timeseries, n bigint default 1) returns timeseries Extracts all except first n elements of timeseries. This function is equivalent to cs_limit(n). -function cs_cut_tail(timeseries, n bigint) returns timeseries +function cs_cut_tail(timeseries, n bigint default 1) returns timeseries Extracts all except last n elements of timeseries. This function is equivalent to cs_limit(0,-n-1).