added csv upload helper + api option

henryivesjones · Oct 13, 2022 · 26a5acf · 26a5acf
1 parent 3408e15
commit 26a5acf
Show file tree

Hide file tree

Showing 6 changed files with 136 additions and 5 deletions.
diff --git a/examples/upload_csv_to_table.py b/examples/upload_csv_to_table.py
@@ -0,0 +1,12 @@
+from tulip_api import TulipAPI, TulipTable, TulipTableCSVUploader
+
+api = TulipAPI(
+    "abc.tulip.co",
+)
+
+filename = "example.csv"
+table_id = "PT929bpqB3s84bbTf"
+uploaded_records = TulipTableCSVUploader(TulipTable(api, table_id), filename).execute(
+    create_random_id=True, warn_on_failure=True
+)
+print(f"Uploaded {uploaded_records} to table {table_id}")
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "community-tulip-api"
-version = "0.1.3"
+version = "0.1.4"
 authors = [
   { name="Henry Jones", email="[email protected]" },
 ]
@@ -10,6 +10,7 @@ license = {file = "LICENSE"}
 requires-python = ">=3.6"
 dependencies = [
     "requests",
+    "python-dateutil"
 ]
 classifiers = [
     "Programming Language :: Python :: 3",

diff --git a/tulip_api/__init__.py b/tulip_api/__init__.py
@@ -2,4 +2,5 @@
 from .tulip_api import TulipAPI
 from .tulip_machine import TulipMachine
 from .tulip_table import TulipTable
+from .tulip_table_csv_upload import TulipTableCSVUploader
 from .tulip_table_link import TulipTableLink
diff --git a/tulip_api/tulip_api.py b/tulip_api/tulip_api.py
@@ -25,9 +25,12 @@ def __init__(
         api_key: Union[str, None] = None,
         api_key_secret: Union[str, None] = None,
         auth: Union[str, None] = None,
+        use_full_url: bool = False,
     ):
-        self.host = self._construct_base_url(tulip_url)
-        self.fqdn = tulip_url
+        """
+        use_full_url: if set to true, the tulip_url must include `http://` or `https://` as well as the fqdn. For example `https://abc.tulip.co`
+        """
+        self.host = self._construct_base_url(tulip_url, use_full_url)
 
         self.auth = TulipAPI._provide_api_credentials(
             api_key=api_key, api_key_secret=api_key_secret, auth=auth
@@ -93,7 +96,9 @@ def _provide_api_credentials(
         raise TulipAPINoCredentialsFound()
 
     @staticmethod
-    def _construct_base_url(host):
+    def _construct_base_url(host, use_full_url):
+        if use_full_url:
+            return f"{host}/api/v3"
         cleaned_host = host.replace("http://", "").replace("https://", "")
         return f"https://{cleaned_host}/api/v3/"
 

diff --git a/tulip_api/tulip_table.py b/tulip_api/tulip_table.py
@@ -1,7 +1,12 @@
+import json
 from typing import Any, Dict, Generator, Iterable, List, Union
 from uuid import uuid4
 
-from .exceptions import TulipAPIInvalidChunkSize, TulipApiTableRecordCreateMustIncludeID
+from .exceptions import (
+    TulipAPIInvalidChunkSize,
+    TulipAPIMalformedRequestError,
+    TulipApiTableRecordCreateMustIncludeID,
+)
 from .tulip_api import TulipAPI
 
 
@@ -204,6 +209,34 @@ def create_record(self, record: dict, create_random_id=False) -> Dict:
             self._construct_records_path(), "POST", json=record
         )
 
+    def create_records(
+        self, records: Iterable[dict], create_random_id=False, warn_on_failure=False
+    ) -> int:
+        """
+        Iterates over a list of records and creates them. Calling `create_record`
+
+        Returns the # of successfully created records.
+
+        `warn_on_failure`: set to True if you want to continue with creating the rest of the records
+        , despite a malformed request.
+
+        """
+        created_records = 0
+        failed_records = 0
+        for record in records:
+            try:
+                self.create_record(record, create_random_id=create_random_id)
+                created_records += 1
+            except TulipAPIMalformedRequestError as exception:
+                failed_records += 1
+                print(f"There was an issue creating the record:\n{json.dumps(record)}")
+                if not warn_on_failure:
+                    raise exception
+        if warn_on_failure and failed_records > 0:
+            print(f"Failed to create {failed_records} records.")
+
+        return created_records
+
     def update_record(self, record_id: str, record: dict = {}):
         """
         PUT `/tables/{tableId}/records/{recordId}`

diff --git a/tulip_api/tulip_table_csv_upload.py b/tulip_api/tulip_table_csv_upload.py
@@ -0,0 +1,79 @@
+from csv import DictReader
+from typing import Any, Dict, Iterable, TextIO, Union
+
+from dateutil import parser
+
+from .tulip_table import TulipTable
+
+
+class TulipTableCSVUploader:
+    tulip_table: TulipTable
+
+    def __init__(self, tulip_table: TulipTable, csv_file: Union[str, TextIO]):
+        self.tulip_table = tulip_table
+        self.csv_file = csv_file
+
+    def execute(self, create_random_id=False, warn_on_failure=False) -> int:
+        if isinstance(self.csv_file, str):
+            with open(self.csv_file, "r") as csv_file:
+                return self._upload_records(csv_file, create_random_id=create_random_id, warn_on_failure=warn_on_failure)
+
+        return self._upload_records(self.csv_file, create_random_id=create_random_id, warn_on_failure=warn_on_failure)
+
+
+    def _upload_records(self, file: TextIO, create_random_id=False, warn_on_failure=False):
+        table_columns = self.tulip_table.get_details()["columns"]
+        column_types = {
+            column["name"]: column["dataType"]["type"] for column in table_columns
+        }
+        reader = DictReader(file)
+        TulipTableCSVUploader._validate_csv_columns(reader.fieldnames, column_types)
+
+        return self.tulip_table.create_records(
+            TulipTableCSVUploader._yield_coerced_records(reader, column_types),
+            warn_on_failure=warn_on_failure,
+            create_random_id=create_random_id,
+        )
+
+    @staticmethod
+    def _coerce_type(value: Any, type: str):
+        if type == "string":
+            return str(value)
+        if type == "integer":
+            if isinstance(value, str):
+                return int(float(value))
+            return int(value)
+        if type == "float":
+            return float(value)
+        if type == "boolean":
+            return bool(value)
+        if type == "timestamp":
+            return parser.parse(value, ignoretz=True).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+        raise Exception("Unsupported datatype: {type}. Value: {value}")
+
+    @staticmethod
+    def _coerce_record_types(record: Dict[str, Any], column_types: Dict[str, str]):
+        new_record = {}
+        for column_id, value in record.items():
+            if column_id not in column_types:
+                raise Exception(
+                    f"Column {column_id} found in record, but not in table."
+                )
+            new_record[column_id] = TulipTableCSVUploader._coerce_type(
+                value, column_types[column_id]
+            )
+        return new_record
+
+    @staticmethod
+    def _yield_coerced_records(records: Iterable, column_types: Dict[str, str]):
+        for record in records:
+            yield TulipTableCSVUploader._coerce_record_types(record, column_types)
+
+    @staticmethod
+    def _validate_csv_columns(csv_fieldnames, table_columns):
+        for csv_fieldname in csv_fieldnames:
+            if not csv_fieldname in table_columns:
+                raise Exception(
+                    f"Column {csv_fieldname} is not found in the Tulip Table columns."
+                )