From bd48841ba78873c4dd03ca0d336a2b993d90ce41 Mon Sep 17 00:00:00 2001 From: Ryan Hamilton Date: Sat, 7 Dec 2024 10:26:57 +0000 Subject: [PATCH] Add kdb tutorial notebook. Saved snapshot is at: https://www.timestored.com/sqlnotebook/examples/kdb-tutorial-notebook.html --- .../kdb-tutorial/pages/getting-started.md | 90 +++++ notebook-examples/kdb-tutorial/pages/joins.md | 137 +++++++ .../kdb-tutorial/pages/keyed-tables.md | 379 ++++++++++++++++++ notebook-examples/kdb-tutorial/pages/qsql.md | 196 +++++++++ .../kdb-tutorial/pages/tables.md | 169 ++++++++ .../kdb-tutorial/pages/time-series-joins.md | 195 +++++++++ 6 files changed, 1166 insertions(+) create mode 100644 notebook-examples/kdb-tutorial/pages/getting-started.md create mode 100644 notebook-examples/kdb-tutorial/pages/joins.md create mode 100644 notebook-examples/kdb-tutorial/pages/keyed-tables.md create mode 100644 notebook-examples/kdb-tutorial/pages/qsql.md create mode 100644 notebook-examples/kdb-tutorial/pages/tables.md create mode 100644 notebook-examples/kdb-tutorial/pages/time-series-joins.md diff --git a/notebook-examples/kdb-tutorial/pages/getting-started.md b/notebook-examples/kdb-tutorial/pages/getting-started.md new file mode 100644 index 0000000..5a7de31 --- /dev/null +++ b/notebook-examples/kdb-tutorial/pages/getting-started.md @@ -0,0 +1,90 @@ +--- +server: localhost:5000 +--- + +# kdb+ Tutorials using SQL Notebooks + +These tutorials are available free at: https://www.timestored.com/kdb-guides/ + +These particular pages were generated using [SQL Notebooks](https://www.timestored.com/sqlnotebook/) within [QStudio](https://www.timestored.com/qstudio/) to demonstrate how easy it is to create tutorials. + +[QStudio](https://www.timestored.com/qstudio/) is a free SQL Client. + +# Getting Started + +### type math expressions directly into kdb + +```sql showcodeonly +10+3 +900-88 +13*2 +90%20 // division is the percentage symbol +``` + +Expressions are evaluated right to left + +```sql showcodeonly +100%10+10 +``` + +```sql +100%10+10 +``` + +## Defining a variable + +```sql showcodeonly +a:13 +a +b:10*9 +b +a+b +c:a+b +c +``` + +## Comments + +comments must be space then slash then comment +within a script there's another format / on new line then closed later + +```sql showcodeonly +b: 1 // comments +b:1/ error as similar to adjectives we will see later +b: 1 // whitespace doesn't matter in kdb +``` + +## Slash Commands + +What objects exist on the server + +```sql showcodeonly +\v // variables +\a // tables +system "v" +system "a" +``` + +Can alter settings e.g. Precision and Console size + +```sql showcodeonly +\P +\c +\c 22 88 + +\l script.q +``` + +If the slash command isn't recognised as a kdb call +it's passed to the underlying OS, e.g. dos commands + +```sql showcodeonly +\cd +\echo test +system "cd" +``` + +```sql showcodeonly +\\ +exit 0; +``` \ No newline at end of file diff --git a/notebook-examples/kdb-tutorial/pages/joins.md b/notebook-examples/kdb-tutorial/pages/joins.md new file mode 100644 index 0000000..9d6e350 --- /dev/null +++ b/notebook-examples/kdb-tutorial/pages/joins.md @@ -0,0 +1,137 @@ +--- +server: localhost:5000 +--- + +# kdb+ Joins + +Sql joins allow pulling corresponding data from one table, onto another to give a combined result table with columns from both. +Most kdb joins rely on column names corresponding (rather than standard sql's more verbose explicit naming). + +# Example Tables + +```sql showcodeonly +stock:([sym:`s#`AAPL`C`FB`MS] + sector:`Tech`Financial`Tech`Financial; + employees:72800 262000 4331 57726); + +trades:([] dt:`s#2015.01.01+0 1 2 3 3 4 5 6 6; + sym:`C`C`MS`C`DBK`AAPL`AAPL`MS`MS; + price:10 10.5 260 11 35.6 1010 1020 255 254; + size:10 100 15 200 55 20 300 200 400); + +fbTrades:([] dt:`s#2015.01.01+1 2 4; sym:`FB; size:1000; book:`A`B`A); +``` + +## stock + +``` +stock:([sym:`s#`AAPL`C`FB`MS] + sector:`Tech`Financial`Tech`Financial; + employees:72800 262000 4331 57726); +stock +``` + +## trades +``` +trades:([] dt:`s#2015.01.01+0 1 2 3 3 4 5 6 6; + sym:`C`C`MS`C`DBK`AAPL`AAPL`MS`MS; + price:10 10.5 260 11 35.6 1010 1020 255 254; + size:10 100 15 200 55 20 300 200 400); +trades +``` + +## fbTrades + +``` +fbTrades:([] dt:`s#2015.01.01+1 2 4; sym:`FB; size:1000; book:`A`B`A); +fbTrades +``` + + + + +# lj - Left Join + +The format of lj is: ``t lj kt`` where t is your source table and kt is your lookup table that MUST be keyed. +**Lj - left join - means for each row in table t, try to look up corresponding values in keyed-table kt, +where there is no match use nulls**. The columns used for mathing are the key columns of kt. i.e. +The key columns of kt, must appear in t and their column names MUST match exactly. + +Note: + - Where a lookup table contains non-key columns with the same name as existing columns, the matched columns overwrite the original value. + - Where multiple matches are possible (duplicate keys in keyed table), the first match is always taken. + + +``` +trades lj stock +``` + + +# pj - Plus Join + +``t pj kt`` - Same principle as lj, but existing values are added to where column names match. + +**Pj - plus join - means for each row in table t, try to look up corresponding values in keyed-table kt, where there are matching numeric columns add their values.** + +``` +stock pj ([sym:`FB`C] employees:100000 -260000) +``` + +# ij - Inner Join + +``t ij kt`` - **Where matches occur between t and kt on primary key columns, update or add that column.** +Non-matches are not returned in the result. The columns used for matching are the key columns of kt. +i.e. The key columns of kt, must appear in t and their column names MUST match exactly. + +``` +trades ij stock +``` + +As you can see the result has one less row than there was in trade, row 4 `DBK did not have a match by key lookup in the stock table so was dropped from the result. + +### Different inner join than standard sql + +Notice above that we said the join returns ONLY the first match from the lookup table. This is different than standard SQL which returns the cartesian join of all matches. The code below showns how we could replicate an SQL standard inner join: + +#### t1 +```sql type="table" +q)t1:([] sym:`a`b`c; v:1 2 3); +t1 +``` +#### t2 +```sql type="table" +t2:([sym:`a`a`b] s:100 200 300); +t2 +``` + +```sql type="table" +t1 ij t2 +``` + +```sql type="table" +ungroup t1 ij `sym xgroup t2 +``` + +# uj - Union Join + +**uj - Union-Join all rows/columns from two tables, upserting when keyed, appending when unkeyed or no existing match found.** + +``` +trades uj fbTrades +``` + +Notice: Columns with common names are now one column in the result. However columns that occurred in only one table are included in the result, however where no values existed they are filled with nulls. + + + + + + + + + + + + + + \ No newline at end of file diff --git a/notebook-examples/kdb-tutorial/pages/keyed-tables.md b/notebook-examples/kdb-tutorial/pages/keyed-tables.md new file mode 100644 index 0000000..d96aae1 --- /dev/null +++ b/notebook-examples/kdb-tutorial/pages/keyed-tables.md @@ -0,0 +1,379 @@ +--- +server: localhost:5000 +--- + +# Keyed Tables + +## Create a Keyed Table + +A keyed table is a dictionary from one table of keys to another table of values with an equal number of rows. +We can define it as such or we can use the shorter table syntax. Where we place the key columns inside square braces. + + +```sql showcodeonly +k:flip (enlist `id)!enlist `a`b`c`e +v:flip `name`employer`age!(`jane`jim`kim`john; `citi`citi`ms`ts; 11 22 33 55) +k!v +/ this syntax is easier to read. +kt:([id:`a`b`c`e] name:`jane`jim`kim`john; employer:`citi`citi`ms`ts; age:11 22 33 55) +kt~k!v +/ multiple key columns +([id:`a`b`c`e; name:`jane`jim`kim`john] employer:`citi`citi`ms`ts; age:11 22 33 55) +``` +```sql +([id:`a`b`c`e; name:`jane`jim`kim`john] employer:`citi`citi`ms`ts; age:11 22 33 55) +``` + +## Common Keyed Table Functions + +The functions xkey and 0!,1!,2!.. allow setting and removing key columns: + +```sql showcodeonly +q)keys kt +,`id + +q)/ changing key columns +q)kt +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 +c | kim ms 33 +e | john ts 55 +q)`id`name xkey kt +id name| employer age +-------| ------------ +a jane| citi 11 +b jim | citi 22 +c kim | ms 33 +e john| ts 55 +q) +q)kt +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 +c | kim ms 33 +e | john ts 55 +q)`id`name xkey `kt +`kt +q)kt +id name| employer age +-------| ------------ +a jane| citi 11 +b jim | citi 22 +c kim | ms 33 +e john| ts 55 +q)() xkey kt +id name employer age +-------------------- +a jane citi 11 +b jim citi 22 +c kim ms 33 +e john ts 55 +q)`id xkey `kt +`kt + + +q)ut:0!kt +q)ut +id name employer age +-------------------- +a jane citi 11 +b jim citi 22 +c kim ms 33 +e john ts 55 +q)2!ut +id name| employer age +-------| ------------ +a jane| citi 11 +b jim | citi 22 +c kim | ms 33 +e john| ts 55 +q)3!ut +id name employer| age +----------------| --- +a jane citi | 11 +b jim citi | 22 +c kim ms | 33 +e john ts | 55 +q)4!ut +'length +``` + + +## Upserting Data into a Table + +Whereas insert always appended data to unkeyed tables for keyed tables we use upsert. +Upsert has two different behaviours, if there is an existing key -> update the values else if it's a new key -> insert. + + +```sql showcodeonly +q)kt:([id:`a`b`c`e] name:`jane`jim`kim`john; employer:`citi`citi`ms`ts; age:11 22 33 5 +q)kt +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 +c | kim ms 33 +e | john ts 55 +q)nd:([id:`e`f] name:`dan`kate; employer:`walmart`walmart; age:200 200) +q)nd +id| name employer age +--| ----------------- +e | dan walmart 200 +f | kate walmart 200 +q)upsert[ kt; nd] +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22f +c | kim ms 33 +e | dan walmart 200 +f | kate walmart 200 +``` + +You do not need to upsert all value columns. + + - If the keys do not already exist, any specified columns will be inserted. Unspecified columns will be filled with appropriate nulls. + - If the keys already exist, specified columns will be overwritten with the new value. Other unspecified columns will remain unchanged. + + +```sql showcodeonly +q)upsert[ kt; ([id:`e`f] name:`dan`kate) ] +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 +c | kim ms 33 +e | dan ts 55 +f | kate +q)upsert[ kt; ([id:`e`f] name:`dan`kate; employer:`PPP`OOO) ] +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 +c | kim ms 33 +e | dan PPP 55 +f | kate OOO + +q)/ upsert data must contain key columns +q)upsert[ kt; ([] name:`dan`kate; employer:`PPP`OOO) ] +'id +q)upsert[ kt; ([] id:`e`f; name:`dan`kate; employer:`PPP`OOO) ] +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 +c | kim ms 33 +e | dan PPP 55 +f | kate OOO + +q)/ backtick needed to change underlying table +q)upsert[ `kt; ([] id:`e`f; name:`dan`kate; employer:`PPP`OOO) ] +`kt +q)kt +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 +c | kim ms 33 +e | dan PPP 55 +f | kate OOO + +upsert[ kt; `id`name!`z`Alfred ] / single item dictionary + / single item list, must have all columns +upsert[ kt; (`z;`Alfred;`fedex;100) ] +``` + +## Multiple Key Columns + +Tables with more than one key column use compound keys for access and upserts. New Data must contain values for all key columns to allow upserts to succeed. + + +```sql showcodeonly +q)et:([employer:`kx`ms`ms; loc:`NY`NY`LONDON] size:10 2000 1000; area:0.9 15.1 11.2) +q)et +employer loc | size area +---------------| --------- +kx NY | 10 0.9 +ms NY | 2000 15.1 +ms LONDON| 1000 11.2 +q)upsert[ et; ([employer:`kx`ms; loc:`NY`TURKEY] size:9 12) ] +employer loc | size area +---------------| --------- +kx NY | 9 0.9 +ms NY | 2000 15.1 +ms LONDON| 1000 11.2 +ms TURKEY| 12 + +q) / new data must have all key columns +q)upsert[ et; ([employer:`kx`ms] size:9 12) ] +'mismatch +q)upsert[ et; ([employer:`kx`ms; loc:`NY`TURKEY] size:9 12) ] +employer loc | size area +---------------| --------- +kx NY | 9 0.9 +ms NY | 2000 15.1 +ms LONDON| 1000 11.2 +ms + +q) / backtick to actually alter table +q)upsert[ `et; ([employer:`kx`ms; loc:`NY`TURKEY] size:9 12) ] +`et +q)et +employer loc | size area +---------------| --------- +kx NY | 9 0.9 +ms NY | 2000 15.1 +ms LONDON| 1000 11.2 +ms TURKEY| 12 + +q)upsert[ et; ([] employer:`kx`ms; loc:`NY`TURKEY)] // no value columns +``` + +## Selecting Data + +Keyed tables can be accessed in many ways including: + + + - **qSQL** - it provides a universal wrapper for accessing keyed and unkeyed tables. + - **Id Lookup** + - Single - kt \`a + - Multiple - kt[flip enlist \`c\`d] + - **Table Lookup** - kt ([] id:\`a\`b ) + - Table #Take - ([] id:\`a\`b )#kt + + +```sql showcodeonly +q)kt +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 +c | kim ms 33 +e | dan PPP 55 +f | kate OOO + +q)select from kt where employer=`citi +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 +q)kt `a +name | `jane +employer| `citi +age | 11 +q)kt `aasd / non-existant = nulls +name | ` +employer| ` +age | 0N + +q)kt `a`b +'length +q)kt[flip enlist `a`b] +name employer age +----------------- +jane citi 11 +jim citi 22 + + +q)kt ([] id:`a`b) +name employer age +----------------- +jane citi 11 +jim citi 22 +q)([] id:`a`b)#kt +id| name employer age +--| ----------------- +a | jane citi 11 +b | jim citi 22 + + + +/ We can use the find operator ? +/ to lookup keys that match given values +kt?(`jane;`citi;11) / reverse lookup +``` + +Compound keys work similar to single keys, the table format retrieving values is recommended as being more clear in it's intent and easier for other developers to read. + +```sql showcodeonly +q)et +employer loc | size area +---------------| --------- +kx NY | 9 0.9 +ms NY | 2000 15.1 +ms LONDON| 1000 11.2 +ms TURKEY| 12 +q)et `ms`LONDON +size| 1000 +area| 11.2 +q)et (`ms`LONDON; `kx`NY) +size area +--------- +1000 11.2 +9 0.9 +q)et ([] employer:`ms`kx; loc:`LONDON`NY) +size area +--------- +1000 11.2 +9 0.9 +q)([] employer:`ms`kx; loc:`LONDON`NY)#et +employer loc | size area +---------------| --------- +ms LONDON| 1000 11.2 +kx NY | 9 0.9 +``` + +## Non-Unique Keys + +One feature to be aware of is that key uniqueness is not enforced on table creation or when using xkey. +Below we demonstrate how we can create a table with repeated keys and the behaviour of accessing such a table. + +```sql showcodeonly +q)lt:([a:1 2 2 3] val:`a`b`c`d) +q)lt +a| val +-| --- +1| a +2| b +2| c +3| d + +q)/ only the firt match returned +q)lt 2 +val| b + + +/ these functions do not force uniqueness +q)mt:0!lt +q)mt +a val +----- +1 a +2 b +2 c +3 d +q)`a xkey mt +a| val +-| --- +1| a +2| b +2| c +3| d +q)1!mt +a| val +-| --- +1| a +2| b +2| c +3| d + +``` + + + + + + diff --git a/notebook-examples/kdb-tutorial/pages/qsql.md b/notebook-examples/kdb-tutorial/pages/qsql.md new file mode 100644 index 0000000..e2fa00f --- /dev/null +++ b/notebook-examples/kdb-tutorial/pages/qsql.md @@ -0,0 +1,196 @@ +--- +server: localhost:5000 +--- + +# Q-SQL + +Q-SQL is the subset of sql that kdb provides to allow querying and manipulating tables. +There are a number of specific fundamental concepts underlying q-sql that make it work differently to standard SQL. +Unlike most databases, kdb is column oriented. A column is a list or vector and has order. +This contrasts to row oriented standard sql databases that are based on the concepts of sets and have no order. +The order and specific wording of the syntax is often different to standard sql. Q shortens standard sql syntax where possible. + +## Query Formats + +```sql showcodeonly +q)\l trades.q +(+`date`sym!(2013.09.21 2013.09.22 2013.09.23 2013.09.24 2013.09.25;`RBS`RBS`RBS`RBS +q)\a +`quote`stock`trade +``` +``` +q)select from trade where i<20 +``` + +``` +q)-20 sublist select from trade where date=2024.12.02 +``` + +``` +q)-20 sublist select from trade where date=2024.12.02,sym=`A +``` + +```sql height='100px' +q)select max price from trade where date=2024.12.02,sym=`A +``` + +## Compared to standard SQL: + +You can see from the first line that we don't have "select *", in kdb if you want to select every column we just omit the column names +(typical of kdb to favour brevity). On the second line date=2011.01.02,sym=`a In kdb comma is the preferred separator of conditional +clauses for reasons we shall see later. Rather than standards sql's group by, we use select by in q-sql and the columns are in the + output without having to repeat in the select. + + +# Select From Where + +### select c1, c2 by c3, c4 from table where expression1, expression2 + +A select query contains 4 parts that are evaluated in the order: + +1. Table +2. Where clauses +3. By Aggregates +4. Column / Value selections + +For a query such as: ``select max price by exchange from trade where date=2024.01.02,sym=`AA`` + +The steps are: + +1. Starting from the leftmost where clause examing each in turn + - Scan the date column entirely and find all positions that match the date 2011.01.02. + - Take the positions that matched in the date column and examine the same positions in the sym column this time to match `AA +2. Take our final list of matching rows, scan the exchange column and group them in order by their exchange value. So that for each distinct exchange we get a list of indices. +3. Extract each list of matching indices from price column giving us a list of prices per sym. +4. Perform max on each list, meaning we now have one price per sym. + +## Where Clause: Acts as a logical filter on rows. + + - Commas act as a logical "and", where each condition is evaluated in turn, left to right. + - The OR logical operation is available however due to q's natural right-to-left evaluation, we must use parentheses: ``select from t where (a=1) or (b=2)`` + +```sql showcodeonly +q)\t do[80; select max price from trade where sym=`AAPL,cond="A"] +338 +q)\t do[80; select max price from trade where cond="A",sym=`AAPL] +587 + +q)count select from trade where cond="A" +211597 +q)count select from trade where sym=`AAPL +50106 + +q) / careful to use parentheses with and/or clauses +q)\t do[80; select max price from trade where (cond="A") and sym=`AAPL] +402 +``` + +``` +q)select max price from trade where sym=`AAPL,(cond="A") or cond="B" +``` + +``` +q)-20 sublist select from trade where sym=`AAPL,(cond="A") or cond="B" +``` +The below query is not giving the result you would expect: +``` +q)-20 sublist select from trade where sym=`AAPL,cond="A" or cond="B" +``` + +Notice also the timing and performance difference of the queries: + +```sql showcodeonly +q)\t do[80; select max price from (select from trade where cond="A") where sym=`AAPL] +893 + +q)/ the comma filter in the correct order is fastest +q)\t do[80; a:select max price from trade where sym=`AAPL,cond="A"] +370 +q)\t do[80; b:select max price from trade where cond="A",sym=`AAPL] +615 +q)\t do[80; c:select max price from trade where (cond="A") and sym=`AAPL] +409 +q)\t do[80; d:select max price from (select from trade where (cond="A")) where sym=`AAPL] +884 +q)a~b +1b +q)b~c +1b +q)c~d +1b +``` + +# By Aggregates: + + + - Groups selected column values by an aggregate. + - The aggregates become the keys of the keyed table. + - Is often the most expensive part of a query. + + +### Special case that returns the last row + +``` +select by sym from trade where date=.z.d +``` + +By, groups the select columns by the aggregates making the table keyed by them aggregates in order: + +``` +select 10 sublist price by sym from trade where date=.z.d +``` + +```sql showcodeonly +select price by sym from trade where date=.z.d +select price by sym, cond from trade where date=.z.d +// then by applying an aggregate function, we reduce each list to an atom +select max price by sym from trade where date=.z.d + +// BY - is a costly operation, in time and memory +\ts do[20; select price by sym from trade where date=.z.d] +\ts do[20; select price by sym, cond from trade where date=.z.d] +\ts do[20; select price by sym, cond, date from trade where date=.z.d] +``` + +# Select Parameters: + + - Each entry becomes a column in the output table + - If unspecified, the column name is taken from the last used underlying column. + + +Since vectors and ordered, some things are much easier in qSQL +``` +q)select first price,first time by date from trade where sym=`AAPL +``` +``` +q)select last price,last time by date from trade where sym=`AAPL +``` + +### Open High Low Close = Candlestick +``` +q)select open:first price, high:max price, low:min price, close:last price by date from trade where sym=`AAPL +``` +```sql type='candle' +q)select open:first price, high:max price, low:min price, close:last price by date from trade where sym=`AAPL +``` + + +## select in + +in - checks to see if every item of its LHS argument occurs anywhere in its RHS argument, if so it returns true, otherwise false. +In a where clause this is useful for selecting a group of data, rather than specifying each using equals. +The below queries return the same result: + +``` +q)-20 sublist select from trade where (sym=`RBS) or (sym=`AAPL) +``` +``` +q)-20 sublist select from trade where sym in `AAPL`RBS +``` + + + + + + + \ No newline at end of file diff --git a/notebook-examples/kdb-tutorial/pages/tables.md b/notebook-examples/kdb-tutorial/pages/tables.md new file mode 100644 index 0000000..3191e0d --- /dev/null +++ b/notebook-examples/kdb-tutorial/pages/tables.md @@ -0,0 +1,169 @@ +--- +server: localhost:5000 +--- +# Tables + +## Create a kdb Table + +In an earlier tutorial I said that lists and dictionaries were the only fundamental data structures in kdb that all others built on top of. +A table is a specially formed dictionary from column names to equal length vectors of data. This simplicity is also powerful as it allows us +to access and manipulate table data using all our previously learnt list/dictionary methods. + +```sql showcodeonly +d:`company`employees!(`ford`bmw;300 100) +t:flip d +type d +type t +``` + +```sql type='table' width='100px' +flip `company`employees!(`ford`bmw;300 100) +``` + +Typically when defining a table we use the q language shorthand notation. Parentheses to contain our table, with vector data assigned to column names, separated by semi-colons. +(The square bracket is for defining keyed tables and we will look at this further in [keyed tables](/markdown?sheet=kdb-tutorials%2Fkeyed-tables)). + +```sql showcodeonly +t~([] company:`ford`bmw; employees:300 100) + +/ must enlist data if creating one row table +([] company:`ford; employees:300) +([] company:enlist `ford; employees:enlist 300) + + +([] syms:`a`b`c; floats:1.1 2.2 3.3; strings:("bob";"jim";"john")) +([] syms:`a`b`c; chars:"aaa"; times:08:00 08:30 09:00) +/ atoms get expanded to fill columns +([] syms:`a`b`c; num:33) +``` +```sql height='150px' +([] syms:`a`b`c; floats:1.1 2.2 3.3; strings:("bob";"jim";"john")) +``` + +## Defining Empty Tables + +Normally when defining a table, you will define it as empty and insert data later, e.g. from a feedhandler. When defining a table the columns should be set to the correct type when possible as this allows type checking inserted data. + + +```sql showcodeonly +t:([] company:`ford`bmw; employees:300 100) +meta t +t:([] company:(); employees:()) +meta t +t:([] company:`symbol$(); employees:`int$()) +meta t +``` +```sql height='120px' +t:([] company:`symbol$(); employees:`int$()); +meta t +``` + +## Common Table Functions + +The most common functions used with tables are shown below: + + +```sql showcodeonly +t:([] company:`ford`bmw; employees:300 100) +t +type t +count t // return number of rows +cols t // retrieve symbol list of column names +meta t + +/ family of xfunctions +`a`b xcols t // reorder columns +`employees xasc t // sort table by a column + +/ List the tables that exist +\a . +system "a ." +tables `. +``` + +## Set Operations + +The set functions that we previously used on lists also work on tables: + +```sql showcodeonly +t:([] company:`ferrari`ford`rover; employees:3 66 200) +u:([] company:`ferrari`bmw`ford; employees:3 88 77) +distinct t +t union u +t except u +t inter u +``` + + +## Accessing a Table - qSQL + +There are three methods for accessing an unkeyed table, qSQL, as a dictionary and as a list. qSQL is the most common method + and we will look at it in much more detail later. Unlike standard SQL no * is needed to select all columns and some simple queries would include: + + +```sql showcodeonly + t:([] company:`ferrari`ford`rover`bmw`AA; employees:3 66 200 88 1) + +// qSQL +select from t +select from t where company=`ford +select employees,eFactor:employees%100 from t where company=`ford +``` + +```sql type='table' + carst:([] company:`ferrari`ford`rover`bmw`AA; employees:3 66 200 88 1); +select employees,eFactor:employees%100 from carst where company=`ford +``` + + At the start we demonstrated a table is a dictionary from a list of column name symbols to vectors of data. We can use that + method of accessing a table, by supplying a column name as a lookup, we return that columns data as a list. + +```sql showcodeonly +t[`company] +t[`employees]-:1000 + +// as a list +t[0 1 2] +-3#t +-2?t +``` + +Alternatively if we treat the table as a list of dictionaries, we can index into that list to retrieve multiple items. +Other standard list functions work similarly, returning a number of rows from the table. + +## Inserting Data into a Table + +To insert data into an unkeyed table, we use the insert function to append new rows to the end of the table. +Insert allows multiple formats including single lists, multiple batch lists and insertion of tables. + + +```sql showcodeonly +t:([] company:(); employees:()) +meta t + +insert[`t; (`ferrari;8)] +meta t +insert[`t; (`ferrari;8.22)] / this fails as wrong type +/ why you should specify type during creation +insert[`t; (`ferrari`mg;9 7)] +insert[`t; ([] company:`subaru`hyundai; employees:55 56)] +insert[`t; ([] company:`jeep`mercedes; employees:66 65.666)] + +/ append using table joins (comma) +t:t,([] company:`bmw`skoda; employees:200 300) +``` + +### See Also: + + - Next Lesson: [keyed tables](/markdown?sheet=kdb-tutorials%2Fkeyed-tables) + - [kdb+ Tutorials](https://www.timestored.com/kdb-guides/) + + + + + + + + + + diff --git a/notebook-examples/kdb-tutorial/pages/time-series-joins.md b/notebook-examples/kdb-tutorial/pages/time-series-joins.md new file mode 100644 index 0000000..485461c --- /dev/null +++ b/notebook-examples/kdb-tutorial/pages/time-series-joins.md @@ -0,0 +1,195 @@ +--- +server: localhost:5000 +--- + +# Asof AJ WJ Time Series Joins + +Original Source: https://www.timestored.com/kdb-guides/asof-time-joins-aj-wj + +Kdb has timeseries data specific joins that provide powerful tools for analysing tick data in particular. +Due to kdb being column-oriented and based on ordered lists, the syntax is usually much more concise and the speed much faster than standard sql databases. + + + +# Asof Time Join + +We will use the following simplified trade-t and quote-q tables to demonstrate the various joins. + +``` +t:([] + time:07:00 08:30 09:59 10:00 12:00 16:00t; + sym:`a`a`a`a`b`a; + price:0.9 1.5 1.9 2 9. 10.; + size:100*6?10); +t +``` + +``` +q:([] + time:08:00+`time$60*60000*til 8; + sym:`a`b`a`b`b`a`b`a; + bid:1 9 2 8 8.5 3 7 4.); +q +``` + +# AJ + +- **AJ** aj[ cols; sourceTable; lookupTable] +- **AJ0** aj0[ cols; sourceTable; lookupTable] + +For each row in the source table lookup a matching value in the lookup table, by matching on the columns specified in cols. cols is a list of column names where the initial columns MUST match exactly and the last column matches the closest value LESS-THAN in the source table. + + - ``sourceTable`` - The table whos items you want to try and find close matches for, the result will have the same number of rows as this table. + - ``lookupTable`` - The table used for finding matching data to join, the size and schema of this table will strongly affect the speed. + - ``cols`` - A list of columns to use for joining on. The initial columns excluding the last will be matched exactly. The last column matches if an entry less-than is found. + + +``` +q)aj[`sym`time; t; q] +``` + +Adding some columns makes it clearer which time columns are which: +``` +fq:update qtime:time,qsym:sym from q; +ft:update ftime:time, fsym:sym from t; +aj[`sym`time; ft; fq] +``` + +# aj0 + +AJ0 is the exact same as aj but returns the lookup tables time column. + +``` +q)aj[`sym`time; ft; fq] +``` + +``` +q)aj0[`sym`time; ft; fq] +``` + +# asof + +Asof is a built-in kdb function, that provides a limited version of AJ, you may find it used occasionally. + +``` +t,'q asof `sym`time#t +``` + +# Union Join + +An alternative method of viewing time-series data for examing sequential events between tables, is using the union join uj to get a combined table then sorting the full table on time. + +``` +q)`time xasc q uj t +``` + +# Running AJ on large tables + +```sql showcodeonly +q)\l trades.q +(+`date`sym!(2013.09.27 2013.09.28 2013.09.29 2013.09.30 2013.10.01;`RBS`RBS`RBS`RBS`.. +q)trade:100?trade +q)count each (trade;quote) +100 1700000 +q)meta quote +c | t f a +-----| ----- +date | d s +time | t +sym | s +size | i +cond | c +bid | f +ask | f +asize| j +bsize| j +q)\t r1:aj[`sym`time; trade; quote] +681 +q)\t update `g#sym from `quote +46 +q)meta quote +c | t f a +-----| ----- +date | d s +time | t +sym | s g +size | i +cond | c +bid | f +ask | f +asize| j +bsize| j +q)\t r2:aj[`sym`time; trade; quote] +0 +q)r1~r2 +1b +q) +``` + +Running time-series joins such as AJ on large amounts of data takes a significant amount of time. +By applying a grouped attribute to the sym column we reduced the time from over half a second to under a tenth of a second. +You must be careful running aj/wj's, particularly against on-disk data, it is recommended that you consult the documentation on +code kx or consult an experienced kdb programmer if you have any issues. + + +# Time Window Join + +We will use the following simplified trade-t and quote-q tables to demonstrate the various time window joins. + +``` +tt:([] + time: 09:00 09:04 09:12 09:13t; + sym: `a`a`a`a; + price: 10 11 12 13.); +tt +``` + +``` +qq:([] + time: 09:00+`time$60000*til 13; + sym: `a`a`a`a`a`b`b`b`a`a`a`a`a; + bid: asc 9.+13?10); +qq +``` + +# wj + + +- **WJ** wj[ windows; cols; sourceTab; (lookupTab;(agg0;col0);(agg1;col1)] +- **WJ1** wj1[ windows; cols; sourceTab; (lookupTab;(agg0;col0);(agg1;col1)] + +For each row in the sourcetable, a time window pair is specified, matches on cols are then found and those that occur within the time window have the aggregate functions applied to the selected columns. + + - ``sourceTable`` - The table whos items you want to try and find close matches for, the result will have the same number of rows as this table. + - ``lookupTable`` - The table used for finding matching data to join + - ``cols`` A list of columns to use for joining on. The initial columns excluding the last will be matched exactly. The last column will match within the specified windows. + +```sql type="table" +windows:flip tt.time +\: -00:02 00:02t; +windows +``` + +``` +wj[windows; `sym`time; tt; (qq; (::; `bid))] +``` +``` +wj[windows; `sym`time; tt; (qq; (avg; `bid))] +``` + +# WJ1 + +The only difference between wj1 and wj, the difference is that where wj pulls in prevailing values not within the time window, wj1 strictly excludes values outside the interval. + +``` +win2:(08:58:00.000 09:02:00.000 09:10:00.000 10:10:00.00; 09:02:00.000 09:06:00.000 09:14:00.000 10:15:00.0); +wj[win2; `sym`time; tt; (qq; (::; `bid))] +``` + +### See Also: + + - [Setting up a Kdb Development Environment](https://www.timestored.com/kdb-guides/developer-environment) - installation, linux/windows tools + - [Commonly encountered kdb limits](https://www.timestored.com/kdb-guides/kdb-database-limits) - rank branch constant errors + + + + \ No newline at end of file