From ee60688284bcd6f1c42391356f75c4ce8fb8d7b5 Mon Sep 17 00:00:00 2001 From: Anthony Mahanna <43019056+aMahanna@users.noreply.github.com> Date: Fri, 19 Jan 2024 10:03:26 -0500 Subject: [PATCH] cleanup `__fetch adb docs` (#35) * cleanup: `__fetch_adb_docs` * set `continue-on-error` --- .github/workflows/build.yml | 1 + adbdgl_adapter/adapter.py | 27 ++++++++++++--------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 840478e..e409b05 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,6 +10,7 @@ env: jobs: build: runs-on: ubuntu-latest + continue-on-error: true strategy: matrix: python: ["3.8", "3.9", "3.10", "3.11"] # "3.12" diff --git a/adbdgl_adapter/adapter.py b/adbdgl_adapter/adapter.py index 601055d..a48d856 100644 --- a/adbdgl_adapter/adapter.py +++ b/adbdgl_adapter/adapter.py @@ -267,7 +267,7 @@ def udf_v1_x(v1_df): # 1. Fetch ArangoDB vertices v_col_cursor, v_col_size = self.__fetch_adb_docs( - v_col, meta, **adb_export_kwargs + v_col, False, meta, **adb_export_kwargs ) # 2. Process ArangoDB vertices @@ -294,7 +294,7 @@ def udf_v1_x(v1_df): # 1. Fetch ArangoDB edges e_col_cursor, e_col_size = self.__fetch_adb_docs( - e_col, meta, **adb_export_kwargs + e_col, True, meta, **adb_export_kwargs ) # 2. Process ArangoDB edges @@ -614,6 +614,7 @@ def y_tensor_to_2_column_dataframe(dgl_tensor): def __fetch_adb_docs( self, col: str, + is_edge: bool, meta: Union[Set[str], Dict[str, ADBMetagraphValues]], **adb_export_kwargs: Any, ) -> Tuple[Cursor, int]: @@ -622,6 +623,8 @@ def __fetch_adb_docs( :param col: The ArangoDB collection. :type col: str + :param is_edge: True if **col** is an edge collection. + :type is_edge: bool :param meta: The MetaGraph associated to **col** :type meta: Set[str] | Dict[str, adbdgl_adapter.typings.ADBMetagraphValues] :param adb_export_kwargs: Keyword arguments to specify AQL query options @@ -631,34 +634,28 @@ def __fetch_adb_docs( :rtype: pandas.DataFrame """ - def get_aql_return_value( - meta: Union[Set[str], Dict[str, ADBMetagraphValues]] - ) -> str: + def get_aql_return_value() -> str: """Helper method to formulate the AQL `RETURN` value based on the document attributes specified in **meta** """ - attributes = [] + attributes = ["_key"] + attributes += ["_from", "_to"] if is_edge else [] if type(meta) is set: - attributes = list(meta) + attributes += list(meta) elif type(meta) is dict: for value in meta.values(): if type(value) is str: attributes.append(value) elif type(value) is dict: - attributes.extend(list(value.keys())) + attributes += list(value.keys()) elif callable(value): # Cannot determine which attributes to extract if UDFs are used # Therefore we just return the entire document return "doc" - return f""" - MERGE( - {{ _key: doc._key, _from: doc._from, _to: doc._to }}, - KEEP(doc, {list(attributes)}) - ) - """ + return f"KEEP(doc, {attributes})" col_size: int = self.__db.collection(col).count() @@ -666,7 +663,7 @@ def get_aql_return_value( p.add_task(col) cursor: Cursor = self.__db.aql.execute( - f"FOR doc IN @@col RETURN {get_aql_return_value(meta)}", + f"FOR doc IN @@col RETURN {get_aql_return_value()}", bind_vars={"@col": col}, **{**adb_export_kwargs, **{"stream": True}}, )