From 6ea9aecfa173d4ac37c3cd947cf6a47019eb0e4c Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Wed, 25 Oct 2023 16:34:03 -0700 Subject: [PATCH 1/6] FIX: Reduce symbol subscription batch size --- CHANGELOG.md | 5 +++++ databento/live/protocol.py | 6 ++++-- tests/test_live_client.py | 16 +++++++++++++--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35f7a86..76f379d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 0.22.2 - TBD + +#### Bug fixes +- Fixed issue where a large unreadable symbol subscription message could be sent + ## 0.22.1 - 2023-10-24 #### Bug fixes diff --git a/databento/live/protocol.py b/databento/live/protocol.py index e71a415..036fece 100644 --- a/databento/live/protocol.py +++ b/databento/live/protocol.py @@ -5,6 +5,7 @@ from collections.abc import Iterable from functools import singledispatchmethod from numbers import Number +from typing import Final import databento_dbn from databento_dbn import Schema @@ -30,7 +31,8 @@ from databento.live.gateway import SubscriptionRequest -RECV_BUFFER_SIZE: int = 64 * 2**10 # 64kb +RECV_BUFFER_SIZE: Final = 64 * 2**10 # 64kb +SYMBOL_LIST_BATCH_SIZE: Final = 64 logger = logging.getLogger(__name__) @@ -278,7 +280,7 @@ def subscribe( stype_in_valid = validate_enum(stype_in, SType, "stype_in") symbols_list = optional_symbols_list_to_list(symbols, stype_in_valid) - for batch in chunk(symbols_list, 128): + for batch in chunk(symbols_list, SYMBOL_LIST_BATCH_SIZE): batch_str = ",".join(batch) message = SubscriptionRequest( schema=validate_enum(schema, Schema, "schema"), diff --git a/tests/test_live_client.py b/tests/test_live_client.py index 628b528..4336d9b 100644 --- a/tests/test_live_client.py +++ b/tests/test_live_client.py @@ -399,14 +399,24 @@ async def test_live_subscribe_large_symbol_list( first_message = mock_live_server.get_message_of_type( gateway.SubscriptionRequest, timeout=1, - ) + ).symbols.split(",") second_message = mock_live_server.get_message_of_type( gateway.SubscriptionRequest, timeout=1, - ) + ).symbols.split(",") + + third_message = mock_live_server.get_message_of_type( + gateway.SubscriptionRequest, + timeout=1, + ).symbols.split(",") + + fourth_message = mock_live_server.get_message_of_type( + gateway.SubscriptionRequest, + timeout=1, + ).symbols.split(",") - reconstructed = first_message.symbols.split(",") + second_message.symbols.split(",") + reconstructed = first_message + second_message + third_message + fourth_message assert reconstructed == large_symbol_list From 745f21d19f9576b9ae615fa9d98a49d1f0e5f9b5 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Wed, 25 Oct 2023 23:43:34 -0700 Subject: [PATCH 2/6] FIX: Performance of DBNStore.to_df with pretty_ts --- CHANGELOG.md | 1 + databento/common/dbnstore.py | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76f379d..6ecea5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ #### Bug fixes - Fixed issue where a large unreadable symbol subscription message could be sent +- Fixed an issue where `DBNStore.to_df` with `pretty_ts=True` was very slow ## 0.22.1 - 2023-10-24 diff --git a/databento/common/dbnstore.py b/databento/common/dbnstore.py index 4f38287..78829ed 100644 --- a/databento/common/dbnstore.py +++ b/databento/common/dbnstore.py @@ -7,7 +7,6 @@ import warnings from collections.abc import Generator from collections.abc import Iterator -from functools import partial from io import BytesIO from os import PathLike from pathlib import Path @@ -1246,9 +1245,7 @@ def _format_px( def _format_pretty_ts(self, df: pd.DataFrame) -> None: for field in self._struct._timestamp_fields: - df[field] = df[field].apply( - partial(pd.to_datetime, utc=True, errors="coerce"), - ) + df[field] = pd.to_datetime(df[field], utc=True, errors="coerce") def _format_set_index(self, df: pd.DataFrame) -> None: index_column = ( From 90e28503291bba744a9d6d47b3c6b5271ec2ff06 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Thu, 26 Oct 2023 00:07:57 -0700 Subject: [PATCH 3/6] ADD: Symbology mapping tools for CSV and JSON --- CHANGELOG.md | 6 +- databento/__init__.py | 5 + databento/common/bentologging.py | 2 +- databento/common/constants.py | 1 + databento/common/dbnstore.py | 2 +- databento/common/parsing.py | 2 +- databento/common/symbology.py | 273 +++++++++++++++++++++++- databento/live/client.py | 2 +- databento/live/protocol.py | 2 +- databento/live/session.py | 2 +- tests/data/test_data.definition.dbn.zst | Bin 325 -> 317 bytes tests/data/test_data.statistics.dbn.zst | Bin 129 -> 249 bytes tests/test_common_symbology.py | 95 ++++++++- tests/test_historical_bento.py | 6 +- tests/test_live_client.py | 2 +- 15 files changed, 387 insertions(+), 13 deletions(-) create mode 100644 databento/common/constants.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ecea5f..3dfd956 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # Changelog -## 0.22.2 - TBD +## 0.23.0 - TBD + +#### Enhancements +- Added `map_symbols_csv` function to the `databento` module for using `symbology.json` files to map a symbol column onto a CSV file +- Added `map_symbols_json` function to the `databento` module for using `symbology.json` files to add a symbol key to a file of JSON records #### Bug fixes - Fixed issue where a large unreadable symbol subscription message could be sent diff --git a/databento/__init__.py b/databento/__init__.py index c06776e..92f0c99 100644 --- a/databento/__init__.py +++ b/databento/__init__.py @@ -19,6 +19,7 @@ from databento_dbn import TradeMsg from databento.common import bentologging +from databento.common import symbology from databento.common.dbnstore import DBNStore from databento.common.enums import Delivery from databento.common.enums import FeedMode @@ -35,6 +36,7 @@ from databento.common.publishers import Dataset from databento.common.publishers import Publisher from databento.common.publishers import Venue +from databento.common.symbology import InstrumentMap from databento.historical.api import API_VERSION from databento.historical.client import Historical from databento.live import DBNRecord @@ -60,6 +62,7 @@ "RecordFlags", "Historical", "HistoricalGateway", + "InstrumentMap", "Live", "Packaging", "RollRule", @@ -91,3 +94,5 @@ # Convenience imports enable_logging = bentologging.enable_logging from_dbn = DBNStore.from_file +map_symbols_csv = symbology.map_symbols_csv +map_symbols_json = symbology.map_symbols_json diff --git a/databento/common/bentologging.py b/databento/common/bentologging.py index 44f5bf4..47796c5 100644 --- a/databento/common/bentologging.py +++ b/databento/common/bentologging.py @@ -6,7 +6,7 @@ def enable_logging(level: int | str = logging.INFO) -> None: """ Enable logging for the Databento module. This function should be used for - simple applications and examples. It is advisible to configure your own + simple applications and examples. It is advisable to configure your own logging for serious applications. Parameters diff --git a/databento/common/constants.py b/databento/common/constants.py new file mode 100644 index 0000000..4176b4a --- /dev/null +++ b/databento/common/constants.py @@ -0,0 +1 @@ +ALL_SYMBOLS = "ALL_SYMBOLS" diff --git a/databento/common/dbnstore.py b/databento/common/dbnstore.py index 78829ed..c9c8b63 100644 --- a/databento/common/dbnstore.py +++ b/databento/common/dbnstore.py @@ -1111,8 +1111,8 @@ def _transcode( compression=compression, pretty_px=pretty_px, pretty_ts=pretty_ts, - map_symbols=map_symbols, has_metadata=True, + map_symbols=map_symbols, symbol_map=symbol_map, # type: ignore [arg-type] schema=schema, ) diff --git a/databento/common/parsing.py b/databento/common/parsing.py index ea1a54c..cfa44aa 100644 --- a/databento/common/parsing.py +++ b/databento/common/parsing.py @@ -9,7 +9,7 @@ import pandas as pd from databento_dbn import SType -from databento.common.symbology import ALL_SYMBOLS +from databento.common.constants import ALL_SYMBOLS from databento.common.validation import validate_smart_symbol diff --git a/databento/common/symbology.py b/databento/common/symbology.py index 65dadb0..fdd69ca 100644 --- a/databento/common/symbology.py +++ b/databento/common/symbology.py @@ -1,12 +1,16 @@ from __future__ import annotations import bisect +import csv import datetime as dt import functools import json +import os from collections import defaultdict from collections.abc import Mapping from io import TextIOWrapper +from os import PathLike +from pathlib import Path from typing import Any, ClassVar, NamedTuple, TextIO import pandas as pd @@ -15,8 +19,7 @@ from databento_dbn import SType from databento_dbn import SymbolMappingMsg - -ALL_SYMBOLS = "ALL_SYMBOLS" +from databento.common.parsing import datetime_to_unix_nanoseconds class MappingInterval(NamedTuple): @@ -39,6 +42,126 @@ class MappingInterval(NamedTuple): symbol: str +def _validate_path_pair( + in_file: Path | PathLike[str] | str, + out_file: Path | PathLike[str] | str | None, +) -> tuple[Path, Path]: + in_file_valid = Path(in_file) + + if not in_file_valid.exists(): + raise ValueError(f"{in_file_valid} does not exist") + if not in_file_valid.is_file(): + raise ValueError(f"{in_file_valid} is not a file") + + if out_file is not None: + out_file_valid = Path(out_file) + else: + out_file_valid = in_file_valid.with_name( + f"{in_file_valid.stem}_mapped{in_file_valid.suffix}", + ) + + i = 0 + while out_file_valid.exists(): + out_file_valid = in_file_valid.with_name( + f"{in_file_valid.stem}_mapped_{i}{in_file_valid.suffix}", + ) + i += 1 + + if in_file_valid == out_file_valid: + raise ValueError("The input file cannot be the same path as the output file.") + + return in_file_valid, out_file_valid + + +def map_symbols_csv( + symbology_file: Path | PathLike[str] | str, + csv_file: Path | PathLike[str] | str, + out_file: Path | PathLike[str] | str | None = None, +) -> Path: + """ + Use a `symbology.json` file to map a symbols column onto an existing CSV + file. The result is written to `out_file`. + + Parameters + ---------- + symbology_file: Path | PathLike[str] | str + Path to a `symbology.json` file to use as a symbology source. + csv_file: Path | PathLike[str] | str + Path to a CSV file that contains encoded DBN data; must contain + a `ts_recv` or `ts_event` and `instrument_id` column. + out_file: Path | PathLike[str] | str (optional) + Path to a file to write results to. If unspecified, `_mapped` will be + appended to the `csv_file` name. + + Returns + ------- + Path + The path to the written file. + + Raises + ------ + ValueError + When the input or output paths are invalid. + When the input CSV file does not contain a valid timestamp or instrument_id column. + + See Also + -------- + map_symbols_json + + """ + instrument_map = InstrumentMap() + with open(symbology_file) as input_symbology: + instrument_map.insert_json(json.load(input_symbology)) + return instrument_map.map_symbols_csv( + csv_file=csv_file, + out_file=out_file, + ) + + +def map_symbols_json( + symbology_file: Path | PathLike[str] | str, + json_file: Path | PathLike[str] | str, + out_file: Path | PathLike[str] | str | None = None, +) -> Path: + """ + Use a `symbology.json` file to insert a symbols key into records of an + existing JSON file. The result is written to `out_file`. + + Parameters + ---------- + symbology_file: Path | PathLike[str] | str + Path to a `symbology.json` file to use as a symbology source. + json_file: Path | PathLike[str] | str + Path to a JSON file that contains encoded DBN data. + out_file: Path | PathLike[str] | str (optional) + Path to a file to write results to. If unspecified, `_mapped` will be + appended to the `json_file` name. + + Returns + ------- + Path + The path to the written file. + + Raises + ------ + ValueError + When the input or output paths are invalid. + When the input JSON file does not contain a valid record. + + See Also + -------- + map_symbols_csv + + """ + instrument_map = InstrumentMap() + with open(symbology_file) as input_symbology: + instrument_map.insert_json(json.load(input_symbology)) + return instrument_map.map_symbols_json( + json_file=json_file, + out_file=out_file, + ) + + class InstrumentMap: SYMBOLOGY_RESOLVE_KEYS: ClassVar[tuple[str, ...]] = ( "result", @@ -94,7 +217,7 @@ def resolve( If the InstrumentMap does not contain a mapping for the `instrument_id`. """ - mappings = self._data[instrument_id] + mappings = self._data[int(instrument_id)] for entry in mappings: if entry.start_date <= date < entry.end_date: return entry.symbol @@ -270,6 +393,150 @@ def insert_json( ), ) + def map_symbols_csv( + self, + csv_file: Path | PathLike[str] | str, + out_file: Path | PathLike[str] | str | None = None, + ) -> Path: + """ + Use the loaded symbology data to map a symbols column onto an existing + CSV file. The result is written to `out_file`. + + Parameters + ---------- + csv_file: Path | PathLike[str] | str + Path to a CSV file that contains encoded DBN data; must contain + a `ts_recv` or `ts_event` and `instrument_id` column. + out_file: Path | PathLike[str] | str (optional) + Path to a file to write results to. If unspecified, `_mapped` will be + appended to the `csv_file` name. + + Returns + ------- + Path + The path to the written file. + + Raises + ------ + ValueError + When the input or output paths are invalid. + When the input CSV file does not contain a valid timestamp or instrument_id column. + + See Also + -------- + InstrumentMap.map_symbols_json + + """ + csv_file_valid, out_file_valid = _validate_path_pair(csv_file, out_file) + + with csv_file_valid.open() as input_: + reader = csv.DictReader(input_) + + in_fields = reader.fieldnames + + if in_fields is None: + raise ValueError(f"no CSV header in {csv_file}") + + if "ts_recv" in in_fields: + ts_field = "ts_recv" + elif "ts_event" in in_fields: + ts_field = "ts_event" + else: + raise ValueError( + f"{csv_file} does not have a 'ts_recv' or 'ts_event' column", + ) + + if "instrument_id" not in in_fields: + raise ValueError(f"{csv_file} does not have an 'instrument_id' column") + + out_fields = (*in_fields, "symbol") + + with out_file_valid.open("w") as output: + writer = csv.DictWriter(output, fieldnames=out_fields) + writer.writeheader() + + for row in reader: + ts = datetime_to_unix_nanoseconds(row[ts_field]) + date = pd.Timestamp(ts, unit="ns").date() + instrument_id = row["instrument_id"] + if instrument_id is None: + row["symbol"] = "" + else: + row["symbol"] = self.resolve(instrument_id, date) + + writer.writerow(row) + + return out_file_valid + + def map_symbols_json( + self, + json_file: Path | PathLike[str] | str, + out_file: Path | PathLike[str] | str | None = None, + ) -> Path: + """ + Use the loaded symbology data to insert a symbols key into records of + an existing JSON file. The result is written to `out_file`. + + Parameters + ---------- + json_file: Path | PathLike[str] | str + Path to a JSON file that contains encoded DBN data. + out_file: Path | PathLike[str] | str (optional) + Path to a file to write results to. If unspecified, `_mapped` will be + appended to the `json_file` name. + + Returns + ------- + Path + The path to the written file. + + Raises + ------ + ValueError + When the input or output paths are invalid. + When the input JSON file does not contain a valid record. + + See Also + -------- + InstrumentMap.map_symbols_csv + + """ + json_file_valid, out_file_valid = _validate_path_pair(json_file, out_file) + + with json_file_valid.open() as input_: + with out_file_valid.open("w") as output: + for i, record in enumerate(map(json.loads, input_)): + try: + header = record["hd"] + instrument_id = header["instrument_id"] + except KeyError: + raise ValueError( + f"{json_file}:{i} does not contain a valid JSON encoded record", + ) + + if "ts_recv" in record: + ts_field = record["ts_recv"] + elif "ts_event" in header: + ts_field = header["ts_event"] + else: + raise ValueError( + f"{json_file}:{i} does not have a 'ts_recv' or 'ts_event' key", + ) + + ts = datetime_to_unix_nanoseconds(ts_field) + + date = pd.Timestamp(ts, unit="ns").date() + record["symbol"] = self.resolve(instrument_id, date) + + json.dump( + record, + output, + separators=(",", ":"), + ) + output.write(os.linesep) + + return out_file_valid + def _insert_inverval(self, instrument_id: int, interval: MappingInterval) -> None: """ Insert a SymbolInterval into the map. diff --git a/databento/live/client.py b/databento/live/client.py index 2b09c71..03a74d3 100644 --- a/databento/live/client.py +++ b/databento/live/client.py @@ -16,11 +16,11 @@ from databento_dbn import Schema from databento_dbn import SType +from databento.common.constants import ALL_SYMBOLS from databento.common.cram import BUCKET_ID_LENGTH from databento.common.error import BentoError from databento.common.parsing import optional_datetime_to_unix_nanoseconds from databento.common.publishers import Dataset -from databento.common.symbology import ALL_SYMBOLS from databento.common.validation import validate_enum from databento.common.validation import validate_semantic_string from databento.live import DBNRecord diff --git a/databento/live/protocol.py b/databento/live/protocol.py index 036fece..04e6d74 100644 --- a/databento/live/protocol.py +++ b/databento/live/protocol.py @@ -12,12 +12,12 @@ from databento_dbn import SType from databento.common import cram +from databento.common.constants import ALL_SYMBOLS from databento.common.error import BentoError from databento.common.iterator import chunk from databento.common.parsing import optional_datetime_to_unix_nanoseconds from databento.common.parsing import optional_symbols_list_to_list from databento.common.publishers import Dataset -from databento.common.symbology import ALL_SYMBOLS from databento.common.validation import validate_enum from databento.common.validation import validate_semantic_string from databento.live import DBNRecord diff --git a/databento/live/session.py b/databento/live/session.py index 4bfc602..cc3e447 100644 --- a/databento/live/session.py +++ b/databento/live/session.py @@ -14,9 +14,9 @@ from databento_dbn import Schema from databento_dbn import SType +from databento.common.constants import ALL_SYMBOLS from databento.common.error import BentoError from databento.common.publishers import Dataset -from databento.common.symbology import ALL_SYMBOLS from databento.live import AUTH_TIMEOUT_SECONDS from databento.live import CONNECT_TIMEOUT_SECONDS from databento.live import DBNRecord diff --git a/tests/data/test_data.definition.dbn.zst b/tests/data/test_data.definition.dbn.zst index 76c7c3ed6e5420f475275a8728724f3cec02f241..181e41003cdcf1418613ca09c2c647f5189e0274 100644 GIT binary patch delta 102 zcmV-s0Ga>A0=)u|V+6eh0JM>3S_TRL006juk;*Aee@6fR{|EmCK?DE)PJe&@0000Q z09uXY0V{J|zyd6=A_D}}lOSLw097sln)blPj@;IXCtS!GfGX&y-~z6=VLD^ZjJ#A3 IR>JWDPZsSei2wiq delta 110 zcmV-!0FnQ_0>uK5V+8#M0Q8Y(S_%LE|9=Vq006j=$0I^?K$WlRA3C9aO0r}!BRR910 diff --git a/tests/data/test_data.statistics.dbn.zst b/tests/data/test_data.statistics.dbn.zst index 31cab7f8a4790f08797be89aad9d57cbe39ae330..8a86fb084107b43d7784d4bb4695ad3add1c441a 100644 GIT binary patch literal 249 zcmV<{9 z0e}GVqXhuq27TCH`^D;17G@4)0CBaXV*>yH|NsAgCJY_`j!Xam2>&0=h(gBdR2H&h zp@1WtvkDI$01RLN00j>Kz#Kpb5Lme!gdGB3zKe%tR|E%%LH37GEFc(e&Rr!8JT*{( literal 129 zcmdPcs{c2IfnhE)!xI)4CqKpt1_lOqAEyXCUzY%51}-4{&&2SbK|q3$fzghMq28_U zzSaB)@umM2AG|2@5U&r+WME)t;ALQ}+{*AsXDQ=TcAx None: + """ + Test that a CSV file without mapped symbols is equivelant to a CSV file + with mapped symbols after processing with map_symbols_csv. + """ + store = DBNStore.from_file(test_data_path(schema)) + csv_path = tmp_path / f"test_{schema}.csv" + store.to_csv( + csv_path, + pretty_ts=pretty_ts, + map_symbols=False, + ) + + expected_path = tmp_path / "expected.csv" + store.to_csv( + expected_path, + pretty_ts=pretty_ts, + map_symbols=True, + ) + + outfile = tmp_path / f"test_{schema}_mapped.csv" + written_path = store._instrument_map.map_symbols_csv( + csv_file=csv_path, + out_file=outfile, + ) + + assert outfile == written_path + assert outfile.read_text() == expected_path.read_text() + + +@pytest.mark.parametrize( + "schema", + [pytest.param(s, id=str(s)) for s in Schema.variants()], +) +@pytest.mark.parametrize( + "pretty_ts", + [ + True, + False, + ], +) +def test_instrument_map_symbols_json( + tmp_path: pathlib.Path, + test_data_path: Callable[[Schema], pathlib.Path], + pretty_ts: bool, + schema: Schema, +) -> None: + """ + Test that a JSON file without mapped symbols is equivelant to a JSON file + with mapped symbols after processing with map_symbols_json. + """ + store = DBNStore.from_file(test_data_path(schema)) + json_path = tmp_path / f"test_{schema}.json" + store.to_json( + json_path, + pretty_ts=pretty_ts, + map_symbols=False, + ) + + expected_path = tmp_path / "expected.json" + store.to_json( + expected_path, + pretty_ts=pretty_ts, + map_symbols=True, + ) + + outfile = tmp_path / f"test_{schema}_mapped.json" + written_path = store._instrument_map.map_symbols_json( + json_file=json_path, + out_file=outfile, + ) + + assert outfile == written_path + assert outfile.read_text() == expected_path.read_text() diff --git a/tests/test_historical_bento.py b/tests/test_historical_bento.py index 3b085c0..dd15a03 100644 --- a/tests/test_historical_bento.py +++ b/tests/test_historical_bento.py @@ -1017,7 +1017,11 @@ def test_dbnstore_to_df_with_count( assert len(batch) <= count aggregator.append(batch) - assert expected.equals(pd.concat(aggregator)) + pd.testing.assert_frame_equal( + pd.concat(aggregator), + expected, + check_dtype=False, + ) @pytest.mark.parametrize( diff --git a/tests/test_live_client.py b/tests/test_live_client.py index 4336d9b..c270a04 100644 --- a/tests/test_live_client.py +++ b/tests/test_live_client.py @@ -14,12 +14,12 @@ import databento_dbn import pytest import zstandard +from databento.common.constants import ALL_SYMBOLS from databento.common.cram import BUCKET_ID_LENGTH from databento.common.data import SCHEMA_STRUCT_MAP from databento.common.dbnstore import DBNStore from databento.common.error import BentoError from databento.common.publishers import Dataset -from databento.common.symbology import ALL_SYMBOLS from databento.live import DBNRecord from databento.live import client from databento.live import gateway From 19de83508501f4c48b508350cb4d55a535ddc3a5 Mon Sep 17 00:00:00 2001 From: Renan Gemignani Date: Thu, 26 Oct 2023 14:50:47 +0200 Subject: [PATCH 4/6] ADD: New publisher values for ICE EU/Endex --- CHANGELOG.md | 1 + databento/common/publishers.py | 62 ++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dfd956..6b3ce74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ #### Enhancements - Added `map_symbols_csv` function to the `databento` module for using `symbology.json` files to map a symbol column onto a CSV file - Added `map_symbols_json` function to the `databento` module for using `symbology.json` files to add a symbol key to a file of JSON records +- Added new publisher values in preparation for IFEU.IMPACT and NDEX.IMPACT datasets #### Bug fixes - Fixed issue where a large unreadable symbol subscription message could be sent diff --git a/databento/common/publishers.py b/databento/common/publishers.py index 69b7134..08f7a01 100644 --- a/databento/common/publishers.py +++ b/databento/common/publishers.py @@ -91,6 +91,10 @@ class Venue(StringyMixin, str, Enum): Cboe BZX Options Exchange. MXOP MEMX LLC Options. + IFEU + ICE Futures Europe (Commodities). + NDEX + ICE Endex. """ @@ -131,6 +135,8 @@ class Venue(StringyMixin, str, Enum): XPHL = "XPHL" BATO = "BATO" MXOP = "MXOP" + IFEU = "IFEU" + NDEX = "NDEX" @classmethod def from_int(cls, value: int) -> Venue: @@ -211,6 +217,10 @@ def from_int(cls, value: int) -> Venue: return Venue.BATO if value == 37: return Venue.MXOP + if value == 38: + return Venue.IFEU + if value == 39: + return Venue.NDEX raise ValueError(f"Integer value {value} does not correspond with any Venue variant") def to_int(self) -> int: @@ -291,6 +301,10 @@ def to_int(self) -> int: return 36 if self == Venue.MXOP: return 37 + if self == Venue.IFEU: + return 38 + if self == Venue.NDEX: + return 39 raise ValueError("Invalid Venue") @property @@ -372,6 +386,10 @@ def description(self) -> str: return "Cboe BZX Options Exchange" if self == Venue.MXOP: return "MEMX LLC Options" + if self == Venue.IFEU: + return "ICE Futures Europe (Commodities)" + if self == Venue.NDEX: + return "ICE Endex" raise ValueError("Unexpected Venue value") @unique @@ -434,6 +452,10 @@ class Dataset(StringyMixin, str, Enum): Nasdaq QBBO. XNAS_NLS Nasdaq NLS. + IFEU_IMPACT + ICE Futures Europe (Commodities) iMpact. + NDEX_IMPACT + ICE Endex iMpact. """ @@ -464,6 +486,8 @@ class Dataset(StringyMixin, str, Enum): XNYS_TRADES = "XNYS.TRADES" XNAS_QBBO = "XNAS.QBBO" XNAS_NLS = "XNAS.NLS" + IFEU_IMPACT = "IFEU.IMPACT" + NDEX_IMPACT = "NDEX.IMPACT" @classmethod def from_int(cls, value: int) -> Dataset: @@ -524,6 +548,10 @@ def from_int(cls, value: int) -> Dataset: return Dataset.XNAS_QBBO if value == 27: return Dataset.XNAS_NLS + if value == 28: + return Dataset.IFEU_IMPACT + if value == 29: + return Dataset.NDEX_IMPACT raise ValueError(f"Integer value {value} does not correspond with any Dataset variant") def to_int(self) -> int: @@ -584,6 +612,10 @@ def to_int(self) -> int: return 26 if self == Dataset.XNAS_NLS: return 27 + if self == Dataset.IFEU_IMPACT: + return 28 + if self == Dataset.NDEX_IMPACT: + return 29 raise ValueError("Invalid Dataset") @property @@ -645,6 +677,10 @@ def description(self) -> str: return "Nasdaq QBBO" if self == Dataset.XNAS_NLS: return "Nasdaq NLS" + if self == Dataset.IFEU_IMPACT: + return "ICE Futures Europe (Commodities) iMpact" + if self == Dataset.NDEX_IMPACT: + return "ICE Endex iMpact" raise ValueError("Unexpected Dataset value") @unique @@ -765,6 +801,10 @@ class Publisher(StringyMixin, str, Enum): DBEQ Plus - FINRA/Nasdaq TRF Carteret. DBEQ_PLUS_FINC DBEQ Plus - FINRA/Nasdaq TRF Chicago. + IFEU_IMPACT_IFEU + ICE Futures Europe (Commodities). + NDEX_IMPACT_NDEX + ICE Endex. """ @@ -824,6 +864,8 @@ class Publisher(StringyMixin, str, Enum): DBEQ_PLUS_FINN = "DBEQ.PLUS.FINN" DBEQ_PLUS_FINY = "DBEQ.PLUS.FINY" DBEQ_PLUS_FINC = "DBEQ.PLUS.FINC" + IFEU_IMPACT_IFEU = "IFEU.IMPACT.IFEU" + NDEX_IMPACT_NDEX = "NDEX.IMPACT.NDEX" @classmethod def from_int(cls, value: int) -> Publisher: @@ -942,6 +984,10 @@ def from_int(cls, value: int) -> Publisher: return Publisher.DBEQ_PLUS_FINY if value == 56: return Publisher.DBEQ_PLUS_FINC + if value == 57: + return Publisher.IFEU_IMPACT_IFEU + if value == 58: + return Publisher.NDEX_IMPACT_NDEX raise ValueError(f"Integer value {value} does not correspond with any Publisher variant") def to_int(self) -> int: @@ -1060,6 +1106,10 @@ def to_int(self) -> int: return 55 if self == Publisher.DBEQ_PLUS_FINC: return 56 + if self == Publisher.IFEU_IMPACT_IFEU: + return 57 + if self == Publisher.NDEX_IMPACT_NDEX: + return 58 raise ValueError("Invalid Publisher") @property def venue(self) -> Venue: @@ -1178,6 +1228,10 @@ def venue(self) -> Venue: return Venue.FINY if self == Publisher.DBEQ_PLUS_FINC: return Venue.FINC + if self == Publisher.IFEU_IMPACT_IFEU: + return Venue.IFEU + if self == Publisher.NDEX_IMPACT_NDEX: + return Venue.NDEX raise ValueError("Unexpected Publisher value") @property def dataset(self) -> Dataset: @@ -1296,6 +1350,10 @@ def dataset(self) -> Dataset: return Dataset.DBEQ_PLUS if self == Publisher.DBEQ_PLUS_FINC: return Dataset.DBEQ_PLUS + if self == Publisher.IFEU_IMPACT_IFEU: + return Dataset.IFEU_IMPACT + if self == Publisher.NDEX_IMPACT_NDEX: + return Dataset.NDEX_IMPACT raise ValueError("Unexpected Publisher value") @property @@ -1415,4 +1473,8 @@ def description(self) -> str: return "DBEQ Plus - FINRA/Nasdaq TRF Carteret" if self == Publisher.DBEQ_PLUS_FINC: return "DBEQ Plus - FINRA/Nasdaq TRF Chicago" + if self == Publisher.IFEU_IMPACT_IFEU: + return "ICE Futures Europe (Commodities)" + if self == Publisher.NDEX_IMPACT_NDEX: + return "ICE Endex" raise ValueError("Unexpected Publisher value") From 9741f40579edaa8a0d343847e378c662fd4d7211 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Thu, 26 Oct 2023 08:24:28 -0700 Subject: [PATCH 5/6] FIX: Windows line endings for symbol mapping tools --- databento/common/symbology.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/databento/common/symbology.py b/databento/common/symbology.py index fdd69ca..f2c0a51 100644 --- a/databento/common/symbology.py +++ b/databento/common/symbology.py @@ -5,7 +5,6 @@ import datetime as dt import functools import json -import os from collections import defaultdict from collections.abc import Mapping from io import TextIOWrapper @@ -452,7 +451,11 @@ def map_symbols_csv( out_fields = (*in_fields, "symbol") with out_file_valid.open("w") as output: - writer = csv.DictWriter(output, fieldnames=out_fields) + writer = csv.DictWriter( + output, + fieldnames=out_fields, + lineterminator="\n", + ) writer.writeheader() for row in reader: @@ -533,7 +536,7 @@ def map_symbols_json( output, separators=(",", ":"), ) - output.write(os.linesep) + output.write("\n") return out_file_valid From 75e158eeb3c7150bd924631e639ba2eba3af6929 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Thu, 26 Oct 2023 00:18:24 -0700 Subject: [PATCH 6/6] VER: Release 0.23.0 --- CHANGELOG.md | 2 +- databento/version.py | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b3ce74..05789e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## 0.23.0 - TBD +## 0.23.0 - 2023-10-26 #### Enhancements - Added `map_symbols_csv` function to the `databento` module for using `symbology.json` files to map a symbol column onto a CSV file diff --git a/databento/version.py b/databento/version.py index d74a474..8b301a7 100644 --- a/databento/version.py +++ b/databento/version.py @@ -1 +1 @@ -__version__ = "0.22.1" +__version__ = "0.23.0" diff --git a/pyproject.toml b/pyproject.toml index 4bc144e..d39b4f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databento" -version = "0.22.1" +version = "0.23.0" description = "Official Python client library for Databento" authors = [ "Databento ",