diff --git a/Classes/Command/GraphIndexNewCommandController.php b/Classes/Command/GraphIndexNewCommandController.php new file mode 100644 index 0000000..b97398c --- /dev/null +++ b/Classes/Command/GraphIndexNewCommandController.php @@ -0,0 +1,541 @@ +logger->info(sprintf('Starting elasticsearch indexing %s sub processes', $this->useSubProcesses ? 'with' : 'without'), LogEnvironment::fromMethodName(__METHOD__)); + + if ($workspace !== null && $this->workspaceRepository->findByIdentifier($workspace) === null) { + $this->logger->error('The given workspace (' . $workspace . ') does not exist.', LogEnvironment::fromMethodName(__METHOD__)); + $this->quit(1); + } + + $postfix = (string)($postfix ?: time()); + $this->nodeIndexer->setIndexNamePostfix($postfix); + + $createIndicesAndApplyMapping = function (array $dimensionsValues) use ($update, $postfix) { + $this->executeInternalCommand('createInternal', [ + 'dimensionsValues' => json_encode($dimensionsValues), + 'update' => $update, + 'postfix' => $postfix, + ]); + }; + + $buildIndex = function (array $dimensionsValues) use ($workspace, $limit, $postfix) { + $this->build($dimensionsValues, $workspace, $postfix, $limit); + }; + + $refresh = function (array $dimensionsValues) use ($postfix) { + $this->executeInternalCommand('refreshInternal', [ + 'dimensionsValues' => json_encode($dimensionsValues), + 'postfix' => $postfix, + ]); + }; + + $updateAliases = function (array $dimensionsValues) use ($update, $postfix) { + $this->executeInternalCommand('aliasInternal', [ + 'dimensionsValues' => json_encode($dimensionsValues), + 'postfix' => $postfix, + 'update' => $update, + ]); + }; + + $combinations = new ArrayCollection($this->contentDimensionCombinator->getAllAllowedCombinations()); + + $runAndLog = function ($command, string $stepInfo) use ($combinations) { + $timeStart = microtime(true); + $this->output(str_pad($stepInfo . '... ', 20)); + $combinations->map($command); + $this->outputLine('Done (took %s seconds)', [number_format(microtime(true) - $timeStart, 2)]); + }; + + $runAndLog($createIndicesAndApplyMapping, 'Creating indices and apply mapping'); + + if ($this->aliasesExist() === false) { + $runAndLog($updateAliases, 'Set up aliases'); + } + + $runAndLog($buildIndex, 'Indexing nodes'); + + $runAndLog($refresh, 'Refresh indicies'); + $runAndLog($updateAliases, 'Update aliases'); + + $this->outputLine('Update main alias'); + $this->nodeIndexer->updateMainAlias(); + + $this->outputLine(); + $this->outputMemoryUsage(); + } + + /** + * @return bool + * @throws ApiException + * @throws ConfigurationException + * @throws Exception + */ + private function aliasesExist(): bool + { + $aliasName = $this->searchClient->getIndexName(); + $aliasesExist = false; + try { + $aliasesExist = $this->indexDriver->getIndexNamesByAlias($aliasName) !== []; + } catch (ApiException $exception) { + // in case of 404, do not throw an error... + if ($exception->getResponse()->getStatusCode() !== 404) { + throw $exception; + } + } + + return $aliasesExist; + } + + /** + * Build up the node index + * + * @param array $dimensionsValues + * @param string|null $workspace + * @param string|null $postfix + * @param int|null $limit + * @throws ConfigurationException + * @throws Exception + * @throws RuntimeException + * @throws SubProcessException + */ + private function build(array $dimensionsValues, ?string $workspace = null, ?string $postfix = null, ?int $limit = null): void + { + $dimensionsValues = $this->configureNodeIndexer($dimensionsValues, $postfix); + + $this->outputLine(json_encode($dimensionsValues)); + + +// $this->logger->info(vsprintf('Indexing %s nodes to %s', [($limit !== null ? 'the first ' . $limit . ' ' : ''), $this->nodeIndexer->getIndexName()]), LogEnvironment::fromMethodName(__METHOD__)); +// +// if ($workspace === null && $this->settings['indexAllWorkspaces'] === false) { +// $workspace = 'live'; +// } +// +// $buildWorkspaceCommandOptions = static function ($workspace, array $dimensionsValues, ?int $limit, ?string $postfix) { +// return [ +// 'workspace' => $workspace instanceof Workspace ? $workspace->getName() : $workspace, +// 'dimensionsValues' => json_encode($dimensionsValues), +// 'postfix' => $postfix, +// 'limit' => $limit, +// ]; +// }; +// +// $output = ''; +// if ($workspace === null) { +// foreach ($this->workspaceRepository->findAll() as $iteratedWorkspace) { +// $output .= $this->executeInternalCommand('buildWorkspaceInternal', $buildWorkspaceCommandOptions($iteratedWorkspace, $dimensionsValues, $limit, $postfix)); +// } +// } else { +// $output = $this->executeInternalCommand('buildWorkspaceInternal', $buildWorkspaceCommandOptions($workspace, $dimensionsValues, $limit, $postfix)); +// } +// +// $outputArray = explode(PHP_EOL, $output); +// if (count($outputArray) > 0) { +// foreach ($outputArray as $line) { +// $line = trim($line); +// if ($line === '') { +// continue; +// } +// $this->outputLine('+ %s', [$line]); +// } +// } +// +// $this->outputErrorHandling(); + } + + /** + * Internal sub-command to create an index and apply the mapping + * + * @param string $dimensionsValues + * @param bool $update + * @param string|null $postfix + * @throws Exception + * @throws \Flowpack\ElasticSearch\Exception + * @throws \Neos\Flow\Http\Exception + * @throws \Exception + * @Flow\Internal + */ + public function createInternalCommand(string $dimensionsValues, bool $update = false, string $postfix = null): void + { + if ($update === true) { + $this->logger->warning('!!! Update Mode (Development) active!', LogEnvironment::fromMethodName(__METHOD__)); + } else { + $dimensionsValuesArray = $this->configureNodeIndexer(json_decode($dimensionsValues, true), $postfix); + if ($this->nodeIndexer->getIndex()->exists() === true) { + $this->logger->warning(sprintf('Deleted index with the same postfix (%s)!', $postfix), LogEnvironment::fromMethodName(__METHOD__)); + $this->nodeIndexer->getIndex()->delete(); + } + $this->nodeIndexer->getIndex()->create(); + $this->logger->info('Created index ' . $this->nodeIndexer->getIndexName() . ' with dimensions ' . json_encode($dimensionsValuesArray), LogEnvironment::fromMethodName(__METHOD__)); + } + + $this->applyMapping(); + $this->outputErrorHandling(); + } + + /** + * @param string $workspace + * @param string $dimensionsValues + * @param string $postfix + * @param int|null $limit + * @return void + * @Flow\Internal + */ + public function buildWorkspaceInternalCommand(string $workspace, string $dimensionsValues, string $postfix, int $limit = null): void + { + $dimensionsValuesArray = $this->configureNodeIndexer(json_decode($dimensionsValues, true), $postfix); + + $workspaceLogger = function ($workspaceName, $indexedNodes, $dimensions) { + if ($dimensions === []) { + $message = 'Workspace "' . $workspaceName . '" without dimensions done. (Indexed ' . $indexedNodes . ' nodes)'; + } else { + $message = 'Workspace "' . $workspaceName . '" and dimensions "' . json_encode($dimensions) . '" done. (Indexed ' . $indexedNodes . ' nodes)'; + } + $this->outputLine($message); + }; + + $this->workspaceIndexer->indexWithDimensions($workspace, $dimensionsValuesArray, $limit, $workspaceLogger); + + $this->outputErrorHandling(); + } + + /** + * Internal subcommand to refresh the index + * + * @param string $dimensionsValues + * @param string $postfix + * @throws Exception + * @throws \Flowpack\ElasticSearch\Exception + * @throws \Neos\Flow\Http\Exception + * @throws ConfigurationException + * @Flow\Internal + */ + public function refreshInternalCommand(string $dimensionsValues, string $postfix): void + { + $this->configureNodeIndexer(json_decode($dimensionsValues, true), $postfix); + + $this->logger->info(vsprintf('Refreshing index %s', [$this->nodeIndexer->getIndexName()]), LogEnvironment::fromMethodName(__METHOD__)); + $this->nodeIndexer->getIndex()->refresh(); + + $this->outputErrorHandling(); + } + + /** + * @param string $dimensionsValues + * @param string $postfix + * @param bool $update + * @throws Exception + * @throws \Flowpack\ElasticSearch\Exception + * @throws ApiException + * @throws ConfigurationException + * @Flow\Internal + */ + public function aliasInternalCommand(string $dimensionsValues, string $postfix, bool $update = false): void + { + if ($update === true) { + return; + } + $this->configureNodeIndexer(json_decode($dimensionsValues, true), $postfix); + + $this->logger->info(vsprintf('Update alias for index %s', [$this->nodeIndexer->getIndexName()]), LogEnvironment::fromMethodName(__METHOD__)); + $this->nodeIndexer->updateIndexAlias(); + $this->outputErrorHandling(); + } + + /** + * @param array $dimensionsValues + * @param string $postfix + * @return array + */ + private function configureNodeIndexer(array $dimensionsValues, string $postfix): array + { + $this->nodeIndexer->setIndexNamePostfix($postfix); + $this->nodeIndexer->setDimensions($dimensionsValues); + return $dimensionsValues; + } + + /** + * Clean up old indexes (i.e. all but the current one) + * + * @return void + * @throws ConfigurationException + * @throws Exception + */ + public function cleanupCommand(): void + { + $removed = false; + $combinations = $this->contentDimensionCombinator->getAllAllowedCombinations(); + foreach ($combinations as $dimensionsValues) { + try { + $this->nodeIndexer->setDimensions($dimensionsValues); + $removedIndices = $this->nodeIndexer->removeOldIndices(); + + foreach ($removedIndices as $indexToBeRemoved) { + $removed = true; + $this->logger->info('Removing old index ' . $indexToBeRemoved, LogEnvironment::fromMethodName(__METHOD__)); + } + } catch (ApiException $exception) { + $exception->getResponse()->getBody()->rewind(); + $response = json_decode($exception->getResponse()->getBody()->getContents(), false); + $message = sprintf('Nothing removed. ElasticSearch responded with status %s', $response->status); + + if (isset($response->error->type)) { + $this->logger->error(sprintf('%s, saying "%s: %s"', $message, $response->error->type, $response->error->reason), LogEnvironment::fromMethodName(__METHOD__)); + } else { + $this->logger->error(sprintf('%s, saying "%s"', $message, $response->error), LogEnvironment::fromMethodName(__METHOD__)); + } + } + } + if ($removed === false) { + $this->logger->info('Nothing to remove.', LogEnvironment::fromMethodName(__METHOD__)); + } + } + + private function outputErrorHandling(): void + { + if ($this->errorHandlingService->hasError() === false) { + return; + } + + $this->outputLine(); + $this->outputLine('%s Errors where returned while indexing. Check your logs for more information.', [$this->errorHandlingService->getErrorCount()]); + } + + /** + * @param string $command + * @param array $arguments + * @return string + * @throws RuntimeException + * @throws SubProcessException + */ + private function executeInternalCommand(string $command, array $arguments): string + { + ob_start(null, 1 << 20); + + if ($this->useSubProcesses) { + $commandIdentifier = 'flowpack.elasticsearch.contentrepositoryadaptor:nodeindex:' . $command; + $status = Scripts::executeCommand($commandIdentifier, $this->flowSettings, true, array_filter($arguments)); + + if ($status !== true) { + throw new RuntimeException(vsprintf('Command: %s with parameters: %s', [$commandIdentifier, json_encode($arguments)]), 1426767159); + } + } else { + $commandIdentifier = $command . 'Command'; + call_user_func_array([self::class, $commandIdentifier], $arguments); + } + + return ob_get_clean(); + } + + /** + * Create a ContentContext based on the given workspace name + * + * @param string $workspaceName Name of the workspace to set for the context + * @param array $dimensions Optional list of dimensions and their values which should be set + * @return Context + */ + private function createContentContext(string $workspaceName, array $dimensions = []): Context + { + $contextProperties = [ + 'workspaceName' => $workspaceName, + 'invisibleContentShown' => true, + 'inaccessibleContentShown' => true + ]; + + if ($dimensions !== []) { + $contextProperties['dimensions'] = $dimensions; + $contextProperties['targetDimensions'] = array_map(static function ($dimensionValues) { + return array_shift($dimensionValues); + }, $dimensions); + } + + return $this->contextFactory->create($contextProperties); + } + + /** + * Apply the mapping to the current index. + * + * @return void + * @throws Exception + * @throws \Flowpack\ElasticSearch\Exception + * @throws ConfigurationException + * @throws \Neos\Flow\Http\Exception + */ + private function applyMapping(): void + { + $nodeTypeMappingCollection = $this->nodeTypeMappingBuilder->buildMappingInformation($this->nodeIndexer->getIndex()); + foreach ($nodeTypeMappingCollection as $mapping) { + /** @var Mapping $mapping */ + $mapping->apply(); + } + } + + private function outputMemoryUsage(): void + { + $this->outputLine('! Memory usage %s', [Files::bytesToSizeString(memory_get_usage(true))]); + } +} diff --git a/Classes/Eel/ElasticSearchQuery.php b/Classes/Eel/ElasticSearchQuery.php new file mode 100644 index 0000000..ef273e7 --- /dev/null +++ b/Classes/Eel/ElasticSearchQuery.php @@ -0,0 +1,147 @@ +queryFilter('term', ['__parentPath' => $contextNode->getPath()]); + + $workspaceName = $contextNode->getContext()->getWorkspace()->getName(); + + $coordinates = []; + foreach ($contextNode->getNodeData()->getDimensionValues() as $dimensionName => $rawDimensionValues) { + $coordinates[$dimensionName] = reset($rawDimensionValues); + } + $coordinates['_workspace'] = $contextNode->getContext()->getWorkspace()->getName(); + + $dimensionSpacePoint = new DimensionSpace\DimensionSpacePoint($coordinates); + $contentSubgraphIdentifier = new ContentSubgraphIdentifier($workspaceName, $dimensionSpacePoint); + + $edgeFilter = [ + 'path' => '__hierarchyRelations', + 'query' => [ + 'bool' => [ + 'must' => [ + [ + 'match' => [ + '__hierarchyRelations.subgraph' => (string) $contentSubgraphIdentifier, + ], + ], + ], + 'should' => [], + 'must_not' => [] + ] + ] + ]; + + if (!$contextNode->getContext()->isInvisibleContentShown()) { + $edgeFilter['query']['bool']['must_not'][] = [ + 'match' => [ + '__hierarchyRelations.hidden' => true, + ], + ]; + $edgeFilter['query']['bool']['must_not'][] = [ + 'range' => [ + '__hierarchyRelations.hiddenBeforeDateTime' => [ + 'gt' => 'now' + ] + ] + ]; + $edgeFilter['query']['bool']['must_not'][] = [ + 'range' => [ + '__hierarchyRelations.hiddenAfterDateTime' => [ + 'lt' => 'now' + ] + ] + ]; + } + + /* + * @todo make this work + if (!$contextNode->getContext()->isInaccessibleContentShown()) { + $edgeFilter['query']['bool']['minimum_should_match'] = 1; + foreach (array_keys($this->securityContext->getRoles()) as $roleName) { + $edgeFilter['query']['bool']['should'][] = [ + 'term' => [ + '__hierarchyRelations.accessRoles' => $roleName + ], + ]; + } + } + */ + + $this->queryFilter('nested', $edgeFilter); + + $this->contextNode = $contextNode; + + return $this; + } +} diff --git a/Classes/Eel/ElasticSearchQueryBuilder.php b/Classes/Eel/ElasticSearchQueryBuilder.php index 8e74e3f..e1033c1 100644 --- a/Classes/Eel/ElasticSearchQueryBuilder.php +++ b/Classes/Eel/ElasticSearchQueryBuilder.php @@ -11,136 +11,29 @@ * source code. */ -use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Eel\ElasticSearchQuery; use Neos\Flow\Annotations as Flow; -use Neos\Flow\Security\Context as SecurityContext; -use Neos\ContentRepository\Domain\Model\NodeInterface; -use Neos\ContentRepository\Domain\Service\ContentDimensionPresetSourceInterface; -use Neos\ContentRepository\Search\Search\QueryBuilderInterface; -use Neos\ContentRepository\DimensionSpace\DimensionSpace; -use Neos\ContentRepository\InMemoryGraph\Dimension\DimensionSpacePointFactory; -use Neos\ContentRepository\InMemoryGraph\ContentSubgraph\ContentSubgraphIdentifier; +use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Eel\ElasticSearchQueryBuilder as BaseElasticSearchQueryBuilder; /** * Query Builder for ElasticSearch Queries */ -class ElasticSearchQueryBuilder extends ElasticSearchQuery +class ElasticSearchQueryBuilder extends BaseElasticSearchQueryBuilder { - /** - * @Flow\Inject - * @var ContentDimensionPresetSourceInterface - */ - protected $dimensionPresetSource; - - /** - * @Flow\Inject - * @var SecurityContext - */ - protected $securityContext; /** - * @Flow\Inject - * @var DimensionSpacePointFactory + * @var int + * @Flow\InjectConfiguration(path="driver.version", package="Flowpack.ElasticSearch.ContentRepositoryAdaptor") */ - protected $dimensionSpacePointFactory; + protected $driverVersion; /** - * These fields are not accepted in a count request and must therefore be removed before doing so - * * @var array + * @Flow\InjectConfiguration(path="driver.mapping", package="Flowpack.ElasticSearch.ContentRepositoryAdaptor") */ - protected $unsupportedFieldsInCountRequest = ['fields', 'sort', 'from', 'size', 'highlight', 'aggs', 'aggregations']; - - /** - * Amount of total items in response without limit - * - * @var integer - */ - protected $totalItems; - - /** - * Sets the starting point for this query. Search result should only contain nodes that - * match the context of the given node and have it as parent node in their rootline. - * - * @param NodeInterface $contextNode - * @return QueryBuilderInterface - * @api - */ - public function query(NodeInterface $contextNode) - { - // on indexing, the __parentPath is tokenized to contain ALL parent path parts, - // e.g. /foo, /foo/bar/, /foo/bar/baz; to speed up matching.. That's why we use a simple "term" filter here. - // http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-term-filter.html - $this->queryFilter('term', ['__parentPath' => $contextNode->getPath()]); - - $workspaceName = $contextNode->getContext()->getWorkspace()->getName(); - - $coordinates = []; - foreach ($contextNode->getNodeData()->getDimensionValues() as $dimensionName => $rawDimensionValues) { - $coordinates[$dimensionName] = reset($rawDimensionValues); - } - $coordinates['_workspace'] = $contextNode->getContext()->getWorkspace()->getName(); - - $dimensionSpacePoint = new DimensionSpace\DimensionSpacePoint($coordinates); - $contentSubgraphIdentifier = new ContentSubgraphIdentifier($workspaceName, $dimensionSpacePoint); - - $edgeFilter = [ - 'path' => '__hierarchyRelations', - 'query' => [ - 'bool' => [ - 'must' => [ - [ - 'match' => [ - '__hierarchyRelations.subgraph' => (string) $contentSubgraphIdentifier, - ], - ], - ], - 'should' => [], - 'must_not' => [] - ] - ] - ]; - - if (!$contextNode->getContext()->isInvisibleContentShown()) { - $edgeFilter['query']['bool']['must_not'][] = [ - 'match' => [ - '__hierarchyRelations.hidden' => true, - ], - ]; - $edgeFilter['query']['bool']['must_not'][] = [ - 'range' => [ - '__hierarchyRelations.hiddenBeforeDateTime' => [ - 'gt' => 'now' - ] - ] - ]; - $edgeFilter['query']['bool']['must_not'][] = [ - 'range' => [ - '__hierarchyRelations.hiddenAfterDateTime' => [ - 'lt' => 'now' - ] - ] - ]; - } - - /* - * @todo make this work - if (!$contextNode->getContext()->isInaccessibleContentShown()) { - $edgeFilter['query']['bool']['minimum_should_match'] = 1; - foreach (array_keys($this->securityContext->getRoles()) as $roleName) { - $edgeFilter['query']['bool']['should'][] = [ - 'term' => [ - '__hierarchyRelations.accessRoles' => $roleName - ], - ]; - } - } - */ - - $this->queryFilter('nested', $edgeFilter); - - $this->contextNode = $contextNode; + protected $mapping; - return $this; + public function initializeObject() { + // we initialize an adjusted query here + $this->request = new ElasticSearchQuery(...array_values($this->mapping[$this->driverVersion]['query']['arguments'])); } }