Skip to content

Commit

Permalink
added csv upload helper + api option
Browse files Browse the repository at this point in the history
  • Loading branch information
henryivesjones committed Oct 13, 2022
1 parent 3408e15 commit 26a5acf
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 5 deletions.
12 changes: 12 additions & 0 deletions examples/upload_csv_to_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from tulip_api import TulipAPI, TulipTable, TulipTableCSVUploader

api = TulipAPI(
"abc.tulip.co",
)

filename = "example.csv"
table_id = "PT929bpqB3s84bbTf"
uploaded_records = TulipTableCSVUploader(TulipTable(api, table_id), filename).execute(
create_random_id=True, warn_on_failure=True
)
print(f"Uploaded {uploaded_records} to table {table_id}")
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "community-tulip-api"
version = "0.1.3"
version = "0.1.4"
authors = [
{ name="Henry Jones", email="[email protected]" },
]
Expand All @@ -10,6 +10,7 @@ license = {file = "LICENSE"}
requires-python = ">=3.6"
dependencies = [
"requests",
"python-dateutil"
]
classifiers = [
"Programming Language :: Python :: 3",
Expand Down
1 change: 1 addition & 0 deletions tulip_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
from .tulip_api import TulipAPI
from .tulip_machine import TulipMachine
from .tulip_table import TulipTable
from .tulip_table_csv_upload import TulipTableCSVUploader
from .tulip_table_link import TulipTableLink
11 changes: 8 additions & 3 deletions tulip_api/tulip_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ def __init__(
api_key: Union[str, None] = None,
api_key_secret: Union[str, None] = None,
auth: Union[str, None] = None,
use_full_url: bool = False,
):
self.host = self._construct_base_url(tulip_url)
self.fqdn = tulip_url
"""
use_full_url: if set to true, the tulip_url must include `http://` or `https://` as well as the fqdn. For example `https://abc.tulip.co`
"""
self.host = self._construct_base_url(tulip_url, use_full_url)

self.auth = TulipAPI._provide_api_credentials(
api_key=api_key, api_key_secret=api_key_secret, auth=auth
Expand Down Expand Up @@ -93,7 +96,9 @@ def _provide_api_credentials(
raise TulipAPINoCredentialsFound()

@staticmethod
def _construct_base_url(host):
def _construct_base_url(host, use_full_url):
if use_full_url:
return f"{host}/api/v3"
cleaned_host = host.replace("http://", "").replace("https://", "")
return f"https://{cleaned_host}/api/v3/"

Expand Down
35 changes: 34 additions & 1 deletion tulip_api/tulip_table.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import json
from typing import Any, Dict, Generator, Iterable, List, Union
from uuid import uuid4

from .exceptions import TulipAPIInvalidChunkSize, TulipApiTableRecordCreateMustIncludeID
from .exceptions import (
TulipAPIInvalidChunkSize,
TulipAPIMalformedRequestError,
TulipApiTableRecordCreateMustIncludeID,
)
from .tulip_api import TulipAPI


Expand Down Expand Up @@ -204,6 +209,34 @@ def create_record(self, record: dict, create_random_id=False) -> Dict:
self._construct_records_path(), "POST", json=record
)

def create_records(
self, records: Iterable[dict], create_random_id=False, warn_on_failure=False
) -> int:
"""
Iterates over a list of records and creates them. Calling `create_record`
Returns the # of successfully created records.
`warn_on_failure`: set to True if you want to continue with creating the rest of the records
, despite a malformed request.
"""
created_records = 0
failed_records = 0
for record in records:
try:
self.create_record(record, create_random_id=create_random_id)
created_records += 1
except TulipAPIMalformedRequestError as exception:
failed_records += 1
print(f"There was an issue creating the record:\n{json.dumps(record)}")
if not warn_on_failure:
raise exception
if warn_on_failure and failed_records > 0:
print(f"Failed to create {failed_records} records.")

return created_records

def update_record(self, record_id: str, record: dict = {}):
"""
PUT `/tables/{tableId}/records/{recordId}`
Expand Down
79 changes: 79 additions & 0 deletions tulip_api/tulip_table_csv_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from csv import DictReader
from typing import Any, Dict, Iterable, TextIO, Union

from dateutil import parser

from .tulip_table import TulipTable


class TulipTableCSVUploader:
tulip_table: TulipTable

def __init__(self, tulip_table: TulipTable, csv_file: Union[str, TextIO]):
self.tulip_table = tulip_table
self.csv_file = csv_file

def execute(self, create_random_id=False, warn_on_failure=False) -> int:
if isinstance(self.csv_file, str):
with open(self.csv_file, "r") as csv_file:
return self._upload_records(csv_file, create_random_id=create_random_id, warn_on_failure=warn_on_failure)

return self._upload_records(self.csv_file, create_random_id=create_random_id, warn_on_failure=warn_on_failure)


def _upload_records(self, file: TextIO, create_random_id=False, warn_on_failure=False):
table_columns = self.tulip_table.get_details()["columns"]
column_types = {
column["name"]: column["dataType"]["type"] for column in table_columns
}
reader = DictReader(file)
TulipTableCSVUploader._validate_csv_columns(reader.fieldnames, column_types)

return self.tulip_table.create_records(
TulipTableCSVUploader._yield_coerced_records(reader, column_types),
warn_on_failure=warn_on_failure,
create_random_id=create_random_id,
)

@staticmethod
def _coerce_type(value: Any, type: str):
if type == "string":
return str(value)
if type == "integer":
if isinstance(value, str):
return int(float(value))
return int(value)
if type == "float":
return float(value)
if type == "boolean":
return bool(value)
if type == "timestamp":
return parser.parse(value, ignoretz=True).strftime("%Y-%m-%dT%H:%M:%SZ")

raise Exception("Unsupported datatype: {type}. Value: {value}")

@staticmethod
def _coerce_record_types(record: Dict[str, Any], column_types: Dict[str, str]):
new_record = {}
for column_id, value in record.items():
if column_id not in column_types:
raise Exception(
f"Column {column_id} found in record, but not in table."
)
new_record[column_id] = TulipTableCSVUploader._coerce_type(
value, column_types[column_id]
)
return new_record

@staticmethod
def _yield_coerced_records(records: Iterable, column_types: Dict[str, str]):
for record in records:
yield TulipTableCSVUploader._coerce_record_types(record, column_types)

@staticmethod
def _validate_csv_columns(csv_fieldnames, table_columns):
for csv_fieldname in csv_fieldnames:
if not csv_fieldname in table_columns:
raise Exception(
f"Column {csv_fieldname} is not found in the Tulip Table columns."
)

0 comments on commit 26a5acf

Please sign in to comment.