From 8666f3d76c745973305539302f59b6bf0f68e1ad Mon Sep 17 00:00:00 2001 From: Bernie Hamlin Date: Mon, 8 Apr 2024 10:40:16 +1200 Subject: [PATCH 1/2] Force live object on unserialise This corrects behaviour that potentially allows indexing of draft content. Queues are normally run via a "dev task" which is run in the DevelopmentAdmin context. This controller will set the reading mode to DRAFT. The upshot of this is that an Index job will unserialise a DataObject in DRAFT reading mode (saved as a Data Query param) and the subsequent `toArray` call could get draft content. This is often seen as a link indexed with `?stage=Stage` parameters. The impact is likely limited because an IndexJob is usually queued by a publish action and run shortly afterwards. In these cases it is unlikely that draft changes are present - but is isn't impossible. I checked the ReIndexJob run and it doesn't appear to have this issue because it sets the reading mode in the setup method. I have instead added this to the unserialise function as it seemed a safer method to ensure the reading mode was as expected. --- src/DataObject/DataObjectDocument.php | 25 ++++++++---- tests/DataObject/DataObjectDocumentTest.php | 45 +++++++++++++++++++++ 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/src/DataObject/DataObjectDocument.php b/src/DataObject/DataObjectDocument.php index 3d99458..0bb0f54 100644 --- a/src/DataObject/DataObjectDocument.php +++ b/src/DataObject/DataObjectDocument.php @@ -606,14 +606,25 @@ public function __serialize(): array public function __unserialize(array $data): void { - $dataObject = DataObject::get_by_id($data['className'], $data['id']); + if (DataObject::has_extension($data['className'], Versioned::class)) { + // get data object in live mode always - needed as queue runners normally use a dev task to run + // so will have a DRAFT reading mode @see SilverStripe\Dev\DevelopmentAdmin + $dataObject = Versioned::withVersionedMode(function () use ($data): ?DataObject { + Versioned::set_stage(Versioned::LIVE); - if (!$dataObject && DataObject::has_extension($data['className'], Versioned::class) && $data['fallback']) { - // get the latest version - usually this is an object that has been deleted - $dataObject = Versioned::get_latest_version( - $data['className'], - $data['id'] - ); + return DataObject::get_by_id($data['className'], $data['id']); + }); + + if (!$dataObject && $data['fallback']) { + // get the latest version - usually this is an object that has been deleted + $dataObject = Versioned::get_latest_version( + $data['className'], + $data['id'] + ); + } + } else { + // un-versioned object so we don't need to worry about stages and fallbacks do not exist + $dataObject = DataObject::get_by_id($data['className'], $data['id']); } if (!$dataObject) { diff --git a/tests/DataObject/DataObjectDocumentTest.php b/tests/DataObject/DataObjectDocumentTest.php index 0d0a729..76c9701 100644 --- a/tests/DataObject/DataObjectDocumentTest.php +++ b/tests/DataObject/DataObjectDocumentTest.php @@ -21,6 +21,7 @@ use SilverStripe\SearchService\Tests\SearchServiceTest; use SilverStripe\Security\Member; use SilverStripe\Subsites\Model\Subsite; +use SilverStripe\Versioned\Versioned; class DataObjectDocumentTest extends SearchServiceTest { @@ -660,4 +661,48 @@ public function testDeletedDataObject(): void unserialize(serialize($doc)); } + public function testToArrayInQueueRun(): void + { + $config = $this->mockConfig(); + $config->set('getSearchableClasses', [ + PageFake::class, + ]); + $config->set('getFieldsForClass', [ + PageFake::class => [ + new Field('title'), + new Field('page_link', 'Link'), + ], + ]); + + Versioned::withVersionedMode(function (): void { + // Reading mode as if run by job - development Admin sets Draft reading mode + // usually via /dev/tasks/ProcessJobQueueTask + // @see SilverStripe\Dev\DevelopmentAdmin + Versioned::set_stage(Versioned::DRAFT); + + $dataObject = PageFake::create(); + $dataObject->Title = 'Published'; + $dataObject->write(); + $dataObject->publishRecursive(); + $dataObject->Title = 'Draft'; + $dataObject->write(); + + $doc = DataObjectDocument::create($dataObject); + + /** @var DataObjectDocument $serialDoc */ + $serialDoc = unserialize(serialize($doc)); + $indexData = $serialDoc->toArray(); + + $this->assertEqualsCanonicalizing( + [ + 'page_content' => '', + 'title' => 'Published', + 'page_link' => '/published', + ], + $indexData, + 'Potential draft content being indexed' + ); + }); + } + } From e95d2345dc69d38c14731abf135d0a9405f108e5 Mon Sep 17 00:00:00 2001 From: Bernie Hamlin Date: Mon, 15 Apr 2024 14:27:29 +1200 Subject: [PATCH 2/2] Force live on adding to index --- src/DataObject/DataObjectDocument.php | 76 +++++++++-------- tests/DataObject/DataObjectDocumentTest.php | 91 ++++++++++++++++----- 2 files changed, 116 insertions(+), 51 deletions(-) diff --git a/src/DataObject/DataObjectDocument.php b/src/DataObject/DataObjectDocument.php index 0bb0f54..b35deef 100644 --- a/src/DataObject/DataObjectDocument.php +++ b/src/DataObject/DataObjectDocument.php @@ -19,6 +19,7 @@ use SilverStripe\ORM\RelationList; use SilverStripe\ORM\UnsavedRelationList; use SilverStripe\SearchService\Exception\IndexConfigurationException; +use SilverStripe\SearchService\Exception\IndexingServiceException; use SilverStripe\SearchService\Extensions\DBFieldExtension; use SilverStripe\SearchService\Extensions\SearchServiceExtension; use SilverStripe\SearchService\Interfaces\DataObjectDocumentInterface; @@ -147,8 +148,10 @@ public function shouldIndex(): bool return false; } - // Dataobject is only in draft - if ($dataObject->hasExtension(Versioned::class) && !$dataObject->isLiveVersion()) { + // DataObject has no published version (or draft changes could cause a doc to be removed) + if ($dataObject->hasExtension(Versioned::class) && !$dataObject->isPublished()) { + // note even if we pass a draft object to the indexer onAddToSearchIndexes will + // set the version to live before adding return false; } @@ -204,24 +207,20 @@ public function getIndexes(): array /** * Generates a map of all the fields and values which will be sent * + * This will always use the current DataObject so you must ensure + * it is in the correct state (eg Live) prior to calling toArray. + * For example the onAddToSearchIndexes method will set the data + * object to LIVE when adding to the index + * + * @see DataObjectDocument::onAddToSearchIndexes() * @throws IndexConfigurationException */ public function toArray(): array { $pageContentField = $this->config()->get('page_content_field'); - if ($this->getDataObject()->hasExtension(Versioned::class)) { - $dataObject = Versioned::withVersionedMode(function () { - Versioned::set_stage(Versioned::LIVE); - - return DataObject::get_by_id($this->getSourceClass(), $this->getDataObject()->ID); - }); - } else { - $dataObject = DataObject::get_by_id( - $this->getSourceClass(), - $this->getDataObject()->ID - ); - } + // assume shouldIndex is called before this + $dataObject = $this->getDataObject(); if (!$dataObject || !$dataObject->exists()) { throw new IndexConfigurationException( @@ -606,25 +605,14 @@ public function __serialize(): array public function __unserialize(array $data): void { - if (DataObject::has_extension($data['className'], Versioned::class)) { - // get data object in live mode always - needed as queue runners normally use a dev task to run - // so will have a DRAFT reading mode @see SilverStripe\Dev\DevelopmentAdmin - $dataObject = Versioned::withVersionedMode(function () use ($data): ?DataObject { - Versioned::set_stage(Versioned::LIVE); - - return DataObject::get_by_id($data['className'], $data['id']); - }); + $dataObject = DataObject::get_by_id($data['className'], $data['id']); - if (!$dataObject && $data['fallback']) { - // get the latest version - usually this is an object that has been deleted - $dataObject = Versioned::get_latest_version( - $data['className'], - $data['id'] - ); - } - } else { - // un-versioned object so we don't need to worry about stages and fallbacks do not exist - $dataObject = DataObject::get_by_id($data['className'], $data['id']); + if (!$dataObject && DataObject::has_extension($data['className'], Versioned::class) && $data['fallback']) { + // get the latest version - usually this is an object that has been deleted + $dataObject = Versioned::get_latest_version( + $data['className'], + $data['id'] + ); } if (!$dataObject) { @@ -639,8 +627,32 @@ public function __unserialize(array $data): void } } + /** + * Add to index event handler + * + * @throws IndexingServiceException + * @return void + */ public function onAddToSearchIndexes(string $event): void { + if ($event === DocumentAddHandler::BEFORE_ADD) { + // make sure DataObject is always live on adding to the index + Versioned::withVersionedMode(function (): void { + Versioned::set_stage(Versioned::LIVE); + + $currentDataObject = $this->getDataObject(); + + $liveDataObject = DataObject::get($currentDataObject->ClassName)->byID($currentDataObject->ID); + + if (!$liveDataObject) { + // unlikely case as indexer calls 'shouldIndex' immediately prior to this + throw new IndexingServiceException('Only published DataObjects may be added to the index'); + } + + $this->setDataObject($liveDataObject); + }); + } + if ($event === DocumentAddHandler::AFTER_ADD) { $this->markIndexed(); } diff --git a/tests/DataObject/DataObjectDocumentTest.php b/tests/DataObject/DataObjectDocumentTest.php index 76c9701..e2b87b7 100644 --- a/tests/DataObject/DataObjectDocumentTest.php +++ b/tests/DataObject/DataObjectDocumentTest.php @@ -11,16 +11,19 @@ use SilverStripe\SearchService\Interfaces\DocumentAddHandler; use SilverStripe\SearchService\Interfaces\DocumentRemoveHandler; use SilverStripe\SearchService\Schema\Field; +use SilverStripe\SearchService\Service\Indexer; use SilverStripe\SearchService\Tests\Fake\DataObjectFake; use SilverStripe\SearchService\Tests\Fake\DataObjectFakePrivate; use SilverStripe\SearchService\Tests\Fake\DataObjectFakeVersioned; use SilverStripe\SearchService\Tests\Fake\DataObjectSubclassFake; use SilverStripe\SearchService\Tests\Fake\ImageFake; use SilverStripe\SearchService\Tests\Fake\PageFake; +use SilverStripe\SearchService\Tests\Fake\ServiceFake; use SilverStripe\SearchService\Tests\Fake\TagFake; use SilverStripe\SearchService\Tests\SearchServiceTest; use SilverStripe\Security\Member; use SilverStripe\Subsites\Model\Subsite; +use SilverStripe\Versioned\ReadingMode; use SilverStripe\Versioned\Versioned; class DataObjectDocumentTest extends SearchServiceTest @@ -624,21 +627,58 @@ public function testExtensionRequired(): void $this->assertEquals($fake, $doc->getDataObject()); } - public function testEvents(): void + public function testMarkIndexedOnEvents(): void { + $dataObject = $this->objFromFixture(DataObjectFakeVersioned::class, 'one'); $mock = $this->getMockBuilder(DataObjectDocument::class) - ->onlyMethods(['markIndexed']) + ->onlyMethods(['markIndexed', 'getDataObject']) ->disableOriginalConstructor() ->getMock(); $mock->expects($this->exactly(2)) ->method('markIndexed'); - $mock->onAddToSearchIndexes(DocumentAddHandler::BEFORE_ADD); + $mock->method('getDataObject') + ->willReturn($dataObject); + $mock->onAddToSearchIndexes(DocumentAddHandler::AFTER_ADD); + // currenlty BEFORE_REMOVE is a noop $mock->onRemoveFromSearchIndexes(DocumentRemoveHandler::BEFORE_REMOVE); $mock->onRemoveFromSearchIndexes(DocumentRemoveHandler::AFTER_REMOVE); } + public function testOnAddToSearchIndexes(): void + { + Versioned::withVersionedMode(function (): void { + // set DRAFT to match state of queue runners which generally + // run through DevelopmentAdmin controller + Versioned::set_stage(Versioned::DRAFT); + + $dataObject = $this->objFromFixture(DataObjectFakeVersioned::class, 'one'); + $dataObject->publishRecursive(); + $document = DataObjectDocument::create($dataObject); + + $queryParams = $document->getDataObject()->getSourceQueryParams(); + $readingMode = ReadingMode::fromDataQueryParams($queryParams); + + $this->assertEquals('Stage.' . Versioned::DRAFT, $readingMode); + + $document->onAddToSearchIndexes(DocumentAddHandler::BEFORE_ADD); + + $queryParams = $document->getDataObject()->getSourceQueryParams(); + $readingMode = ReadingMode::fromDataQueryParams($queryParams); + + $this->assertEquals('Stage.' . Versioned::LIVE, $readingMode); + + $dataObject->doArchive(); + + $this->expectExceptionMessage( + sprintf('Only published DataObjects may be added to the index') + ); + + $document->onAddToSearchIndexes(DocumentAddHandler::BEFORE_ADD); + }); + } + public function testDeletedDataObject(): void { $dataObject = $this->objFromFixture(DataObjectFakeVersioned::class, 'one'); @@ -661,7 +701,7 @@ public function testDeletedDataObject(): void unserialize(serialize($doc)); } - public function testToArrayInQueueRun(): void + public function testIndexDataObjectDocument(): void { $config = $this->mockConfig(); $config->set('getSearchableClasses', [ @@ -674,34 +714,47 @@ public function testToArrayInQueueRun(): void ], ]); - Versioned::withVersionedMode(function (): void { + Versioned::withVersionedMode(function () use ($config): void { // Reading mode as if run by job - development Admin sets Draft reading mode // usually via /dev/tasks/ProcessJobQueueTask // @see SilverStripe\Dev\DevelopmentAdmin Versioned::set_stage(Versioned::DRAFT); $dataObject = PageFake::create(); - $dataObject->Title = 'Published'; - $dataObject->write(); - $dataObject->publishRecursive(); $dataObject->Title = 'Draft'; $dataObject->write(); $doc = DataObjectDocument::create($dataObject); - /** @var DataObjectDocument $serialDoc */ - $serialDoc = unserialize(serialize($doc)); - $indexData = $serialDoc->toArray(); - - $this->assertEqualsCanonicalizing( + $config->set( + 'getIndexesForDocument', [ - 'page_content' => '', - 'title' => 'Published', - 'page_link' => '/published', - ], - $indexData, - 'Potential draft content being indexed' + $doc->getIdentifier() => [ + 'index' => 'data', + ], + ] ); + + $indexer = new Indexer([$doc]); + $indexer->setIndexService($service = new ServiceFake()); + $indexer->setBatchSize(1); + $indexer->processNode(); + + $this->assertCount(0, $service->documents, 'Draft documents should not be indexed'); + + $dataObject->Title = 'Published'; + $dataObject->write(); + $published = $dataObject->publishRecursive(); + $this->assertTrue($published); + $dataObject->flushCache(); + $doc = DataObjectDocument::create($dataObject); + + $indexer = new Indexer([$doc]); + $indexer->setIndexService($service = new ServiceFake()); + $indexer->setBatchSize(1); + $indexer->processNode(); + + $this->assertCount(1, $service->documents, 'Published documents should be indexed'); }); }