Skip to content

Commit

Permalink
ADD: Implement DBNv2
Browse files Browse the repository at this point in the history
  • Loading branch information
threecgreen committed Oct 27, 2023
1 parent c6d30d9 commit 18c9935
Show file tree
Hide file tree
Showing 90 changed files with 1,603 additions and 430 deletions.
42 changes: 41 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,52 @@

## 0.14.0 - TBD
### Enhancements
- This version begins the transition to DBN version 2 (DBNv2). In this version, the
decoders support decoding both versions of DBN and the DBN encoders default to
keeping version of the input. However, in a future version, decoders will by default
convert DBNv1 to DBNv2 and support will be dropped for encoding DBNv1.
- Affects `SymbolMappingMsg`, `InstrumentDefMsg`, and `Metadata`. All other record
types and market data schemas are unchanged
- Version 1 structs can be converted to version 2 structs with the `From` trait
- Added `symbol_cstr_len` field to `Metadata` to indicate the length of fixed symbol
strings
- Added `stype_in` and `stype_out` fields to `SymbolMappingMsg` to provide more context
with live symbology updates
- Added smart wrapping to `dbn` CLI help output
- Updated `rtype_dispatch` family of macros to check record length to handle both
versions of records. This is temporary during the transition period
- Added `VersionUpgradePolicy` enum and associated methods to the decoders to
allow specifying how to handle decoding records
- Added `Metadata::upgrade()` method to update `Metadata` from a prior DBN version to
the latest version
- Added `-u`/`--upgrade` flags to `dbn` CLI that when passed upgrades DBN data from
previous versions. By default data is decoded as-is
- Added `TOB` flag to denote top-of-book messages
- Added new publisher values in preparation for IFEU.IMPACT and NDEX.IMPACT datasets

### Breaking changes
- The old `InstrumentDefMsg` is now `compat::InstrumentDefMsgV1`
- `compat::InstrumentDefMsgV2` is now an alias for `InstrumentDefMsg`
- The old `SymbolMappingMsg` is now `compat::SymbolMappingMsgV1`
- `compat::SymbolMappingMsgV2` is now an alias for `SymbolMappingMsg`
- Changed `SYMBOL_CSTR_LEN` constant to 71. Previous value is now in
`compat::SYMBOL_CSTR_V1`
- Changed `DBN_VERSION` constant to 2
- `security_update_action` was converted to a raw `c_char` to safely support adding
variants in the future
- Renamed `_dummy` in `InstrumentDefMsg` to `_reserved`
- Removed `_reserved2`, `_reserved3`, and `_reserved5` from `InstrumentDefMsg`
- Removed `_dummy` from `SymbolMappingMsg`
- Added `upgrade_policy` parameter to `RecordDecoder::with_version` constructor to
control whether records of previous versions will be upgraded
- Added `upgrade_policy` parameter to `DynDecoder` constructors to control whether
records of previous versions will be upgraded
- Renamed `symbol_map` parameter to Python Transcoder to `symbol_interval_map` to
better reflect the date intervals it contains

### Bug fixes
- Fixed type signature for `Metadata.stype_in` and `Metadata.stype_out` Python methods

## 0.13.0 - 2023-10-20
### Enhancements
- Added `SymbolMappingMsgV2::new` method
Expand Down Expand Up @@ -372,7 +411,8 @@

## 0.2.1 - 2022-12-02
- Added Python DBZ writing example
- Changed [databento-defs](https://crates.io/crates/databento-defs) dependency to crates.io version
- Changed [databento-defs](https://crates.io/crates/databento-defs) dependency to
crates.io version

## 0.2.0 - 2022-11-28
### Enhancements
Expand Down
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions c/cbindgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ renaming_overrides_prefixing = true
[export.rename]
"FILE" = "FILE"
# Workaround for cbindgen not understanding constants defined in terms of other constants
# TODO(carter): update for V2
"SYMBOL_CSTR_LEN_V1" = "DbnSYMBOL_CSTR_LEN"
"SYMBOL_CSTR_LEN_V2" = "DbnSYMBOL_CSTR_LEN"

[enum]
prefix_with_name = true
Expand Down
10 changes: 5 additions & 5 deletions c/src/compat.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use dbn::{
compat::{InstrumentDefMsgV2, SymbolMappingMsgV2},
compat::{InstrumentDefMsgV1, SymbolMappingMsgV1},
InstrumentDefMsg, SymbolMappingMsg,
};

/// Converts an V1 InstrumentDefMsg to V2.
#[no_mangle]
pub extern "C" fn from_instrument_def_v1_to_v2(def_v1: &InstrumentDefMsg) -> InstrumentDefMsgV2 {
InstrumentDefMsgV2::from(def_v1)
pub extern "C" fn from_instrument_def_v1_to_v2(def_v1: &InstrumentDefMsgV1) -> InstrumentDefMsg {
InstrumentDefMsg::from(def_v1)
}

/// Converts an V1 SymbolMappingMsg to V2.
#[no_mangle]
pub extern "C" fn from_symbol_mapping_v1_to_v2(def_v1: &SymbolMappingMsg) -> SymbolMappingMsgV2 {
SymbolMappingMsgV2::from(def_v1)
pub extern "C" fn from_symbol_mapping_v1_to_v2(def_v1: &SymbolMappingMsgV1) -> SymbolMappingMsg {
SymbolMappingMsg::from(def_v1)
}
8 changes: 6 additions & 2 deletions c/src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::{

use dbn::{
decode::{DecodeDbn, DecodeRecordRef, DynDecoder},
Compression, Metadata, Record, RecordHeader,
Compression, Metadata, Record, RecordHeader, VersionUpgradePolicy,
};

pub type Decoder = DynDecoder<'static, BufReader<File>>;
Expand All @@ -21,7 +21,11 @@ pub type Decoder = DynDecoder<'static, BufReader<File>>;
/// `file` must be a valid file descriptor. This function assumes ownership of `file`.
#[no_mangle]
pub unsafe extern "C" fn DbnDecoder_create(file: RawFd, compression: Compression) -> *mut Decoder {
let decoder = match DynDecoder::new(File::from_raw_fd(file), compression) {
let decoder = match DynDecoder::new(
File::from_raw_fd(file),
compression,
VersionUpgradePolicy::AsIs,
) {
Ok(d) => d,
Err(_) => {
return null_mut();
Expand Down
6 changes: 6 additions & 0 deletions c/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub const METADATA_MIN_ENCODED_SIZE: usize = 128;
/// - Returns -1 if `buffer` is null.
/// - Returns -2 if `dataset` cannot be parsed.
/// - Returns -3 if the metadata cannot be encoded.
/// - Returns -4 if the version is invalid.
///
/// # Safety
/// This function assumes `dataset` is a valid pointer and `buffer` is of size
Expand All @@ -28,6 +29,7 @@ pub const METADATA_MIN_ENCODED_SIZE: usize = 128;
pub unsafe extern "C" fn encode_metadata(
buffer: *mut c_char,
length: libc::size_t,
version: u8,
dataset: *const c_char,
schema: Schema,
start: u64,
Expand All @@ -43,7 +45,11 @@ pub unsafe extern "C" fn encode_metadata(
return -2;
}
};
if version == 0 || version > dbn::DBN_VERSION {
return -4;
}
let metadata = MetadataBuilder::new()
.version(version)
.dataset(dataset)
.start(start)
.stype_in(Some(SType::InstrumentId))
Expand Down
33 changes: 8 additions & 25 deletions c/src/text_serialization.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use std::{
ffi::c_char,
io::{self, Write},
mem, slice,
slice,
};

use crate::cfile::CFileRef;
use dbn::{
compat::InstrumentDefMsgV2,
encode::{csv, json, DbnEncodable, EncodeRecord, EncodeRecordRef},
rtype, rtype_ts_out_dispatch, Record, RecordHeader, RecordRef, Schema,
encode::{csv, json, DbnEncodable, EncodeRecordRef},
rtype, rtype_ts_out_dispatch, RecordHeader, RecordRef, Schema,
};

use crate::cfile::CFileRef;

/// The encoding to serialize as.
#[repr(C)]
pub enum TextEncoding {
Expand Down Expand Up @@ -158,22 +158,6 @@ pub unsafe extern "C" fn s_serialize_record(
return SerializeError::NullOptions as libc::c_int;
};
let mut cursor = io::Cursor::new(buffer);
// TODO(carter): reverse when V2 becomes the default
if record.record_size() >= mem::size_of::<InstrumentDefMsgV2>() {
if let Some(def_v2) = record.get::<InstrumentDefMsgV2>() {
let res = match options.encoding {
TextEncoding::Json => {
json::Encoder::new(&mut cursor, false, options.pretty_px, options.pretty_ts)
.encode_record(def_v2)
}
TextEncoding::Csv => {
csv::Encoder::new(&mut cursor, options.pretty_px, options.pretty_ts)
.encode_record(def_v2)
}
};
return write_null_and_ret(cursor, res);
}
};
let res = match options.encoding {
TextEncoding::Json => {
json::Encoder::new(&mut cursor, false, options.pretty_px, options.pretty_ts)
Expand Down Expand Up @@ -264,14 +248,13 @@ fn write_null_and_ret(mut cursor: io::Cursor<&mut [u8]>, res: dbn::Result<()>) -
mod tests {
use std::os::raw::c_char;

use dbn::InstrumentDefMsg;
use dbn::{compat::InstrumentDefMsgV1, InstrumentDefMsg};

use super::*;

#[test]
fn test_serialize_def_v1() {
// TODO(carter): update once DBNv2 is the default
let mut def_v1 = InstrumentDefMsg::default();
let mut def_v1 = InstrumentDefMsgV1::default();
def_v1.raw_symbol = [b'a' as c_char; dbn::compat::SYMBOL_CSTR_LEN_V1];
def_v1.raw_symbol[dbn::compat::SYMBOL_CSTR_LEN_V1 - 1] = 0;
let mut buf = [0; 5000];
Expand Down Expand Up @@ -299,7 +282,7 @@ mod tests {

#[test]
fn test_serialize_def_v2() {
let mut def_v2 = InstrumentDefMsgV2::from(&InstrumentDefMsg::default());
let mut def_v2 = InstrumentDefMsg::default();
def_v2.raw_symbol = [b'a' as c_char; dbn::compat::SYMBOL_CSTR_LEN_V2];
def_v2.raw_symbol[dbn::compat::SYMBOL_CSTR_LEN_V2 - 1] = 0;
let mut buf = [0; 5000];
Expand Down
68 changes: 62 additions & 6 deletions python/databento_dbn.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,25 @@ class SType(Enum):
@classmethod
def variants(cls) -> Iterable[SType]: ...

class VersionUpgradePolicy(Enum):
"""
How to handle decoding a DBN data from a prior version.
AS_IS
Decode data from previous versions as-is.
UPGRADE
Decode data from previous versions converting it to the latest version.
"""

AS_IS: str
UPGRADE: str

@classmethod
def from_str(cls, str) -> SType: ...
@classmethod
def variants(cls) -> Iterable[SType]: ...

class Metadata(SupportsBytes):
"""
Information about the data contained in a DBN file or stream. DBN requires
Expand Down Expand Up @@ -233,23 +252,23 @@ class Metadata(SupportsBytes):
"""
@property
def stype_in(self) -> str | None:
def stype_in(self) -> SType | None:
"""
The input symbology type to map from.
Returns
-------
str | None
SType | None
"""
@property
def stype_out(self) -> str:
def stype_out(self) -> SType:
"""
The output symbology type to map to.
Returns
-------
str
SType
"""
@property
Expand Down Expand Up @@ -305,7 +324,7 @@ class Metadata(SupportsBytes):
"""
@classmethod
def decode(cls, data: bytes) -> Metadata:
def decode(cls, data: bytes, upgrade_policy: VersionUpgradePolicy | None = None) -> Metadata:
"""
Decode the given Python `bytes` to `Metadata`. Returns a `Metadata`
object with all the DBN metadata attributes.
Expand All @@ -314,6 +333,8 @@ class Metadata(SupportsBytes):
----------
data : bytes
The bytes to decode from.
upgrade_policy : VersionUpgradePolicy
How to decode data from prior DBN versions. Defaults to decoding as-is.
Returns
-------
Expand Down Expand Up @@ -2305,6 +2326,16 @@ class SymbolMappingMsg(Record):
another.
"""

@property
def stype_in(self) -> SType:
"""
The input symbology type.
Returns
-------
SType
"""
@property
def stype_in_symbol(self) -> str:
"""
Expand All @@ -2316,6 +2347,16 @@ class SymbolMappingMsg(Record):
"""
@property
def stype_out(self) -> SType:
"""
The output symbology type.
Returns
-------
SType
"""
@property
def stype_out_symbol(self) -> str:
"""
The output symbol.
Expand Down Expand Up @@ -2412,9 +2453,17 @@ class DBNDecoder:
ts_out : bool, default False
Whether the records include the server send timestamp ts_out. Only needs to be
specified if `has_metadata` is False.
input_version : int, default current DBN version
Specify the DBN version of the input. Only used when transcoding data without
metadata.
upgrade_policy : VersionUpgradePolicy
How to decode data from prior DBN versions. Defaults to decoding as-is.
"""

def __init__(self, has_metadata: bool = True, ts_out: bool = False): ...
def __init__(self, has_metadata: bool = True, ts_out: bool = False,
input_version: int = 2,
upgrade_policy: VersionUpgradePolicy | None = None,
): ...

def buffer(self) -> bytes:
"""
Expand Down Expand Up @@ -2498,6 +2547,11 @@ class Transcoder:
schema : Schema | None, default None
The data record schema to encode. This is required for transcoding Live CSV data,
as the tabular format is incompatible with mixed schemas.
input_version : int, default current DBN version
Specify the DBN version of the input. Only used when transcoding data without
metadata.
upgrade_policy : VersionUpgradePolicy
How to decode data from prior DBN versions. Defaults to decoding as-is.
"""

def __init__(
Expand All @@ -2512,6 +2566,8 @@ class Transcoder:
ts_out: bool = False,
symbol_interval_map: dict[int, list[tuple[datetime.date, datetime.date, str]]] | None = None,
schema: Schema | None = None,
input_version: int = 2,
upgrade_policy: VersionUpgradePolicy | None = None,
): ...
def buffer(self) -> bytes:
"""
Expand Down
Loading

0 comments on commit 18c9935

Please sign in to comment.