Skip to content

Commit

Permalink
Fix issues with SF ContentDocument restrictions
Browse files Browse the repository at this point in the history
Workaround which fixes issues with SF malformed queries which worked previously. Errors that were occurring:

> Implementation restriction: ContentDocumentLink requires a filter by a single Id on ContentDocumentId or LinkedEntityId using the equals operator or multiple Id's using the IN operator.

> Implementation restriction: filtering on non-id fields is only permitted when filtering by ContentDocumentLink.LinkedEntityId using the equals operator.
  • Loading branch information
piotrekkr authored Sep 11, 2024
1 parent 1c07e48 commit 4ac97c1
Showing 1 changed file with 24 additions and 8 deletions.
32 changes: 24 additions & 8 deletions src/salesforce_archivist/salesforce/salesforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,25 +47,37 @@ def _init_tmp_dir(self) -> str:
return tmp_dir

def _get_content_document_list_query(self) -> str:
select_list = ["LinkedEntityId", "ContentDocumentId"]
select_list = ["LinkedEntityId", "ContentDocumentId", "LinkedEntity.Type"]
if self._archivist_obj.dir_name_field is not None and self._archivist_obj.dir_name_field not in select_list:
select_list.append(self._archivist_obj.dir_name_field)
where_list = ["LinkedEntity.Type = '{obj_type}'".format(obj_type=self._archivist_obj.obj_type)]
where_conditions = []
if self._archivist_obj.modified_date_lt is not None:
where_list.append(
where_conditions.append(
"ContentDocument.ContentModifiedDate < {date}".format(
date=self._archivist_obj.modified_date_lt.strftime("%Y-%m-%dT%H:%M:%SZ")
)
)
if self._archivist_obj.modified_date_gt is not None:
where_list.append(
where_conditions.append(
"ContentDocument.ContentModifiedDate > {date}".format(
date=self._archivist_obj.modified_date_gt.strftime("%Y-%m-%dT%H:%M:%SZ")
)
)
return "SELECT {fields} FROM ContentDocumentLink WHERE {where}".format(
fields=", ".join(select_list), where=" AND ".join(where_list)
)
where = ""
if len(where_conditions):
where = "WHERE {}".format(" AND ".join(where_conditions))
# Using WHERE IN and not using filter on `LinkedEntity.Type` is done because of SF restrictions like:
#
# Implementation restriction: ContentDocumentLink requires a filter by a single Id on ContentDocumentId
# or LinkedEntityId using the equals operator or multiple Id's using the IN operator.
#
# Implementation restriction: filtering on non-id fields is only permitted when filtering
# by ContentDocumentLink.LinkedEntityId using the equals operator.

return (
"SELECT {fields} FROM ContentDocumentLink "
"WHERE ContentDocumentId IN (SELECT Id FROM ContentDocument {where})"
).format(fields=", ".join(select_list), where=where)

def download_content_document_link_list(
self,
Expand All @@ -81,10 +93,14 @@ def download_content_document_link_list(
reader = csv.reader(file)
next(reader)
for row in reader:
# If type is not the same as the object type, skip.
# This is a workaround for restriction on ContentDocumentLink filtering directly in query.
if row[2] != self._archivist_obj.obj_type:
continue
link = ContentDocumentLink(
linked_entity_id=row[0],
content_document_id=row[1],
download_dir_name=row[2] if self._archivist_obj.dir_name_field is not None else None,
download_dir_name=row[3] if self._archivist_obj.dir_name_field is not None else None,
)
document_link_list.add_link(link)

Expand Down

0 comments on commit 4ac97c1

Please sign in to comment.