From 24edb86ff2483289dbe5202dcc600b79ef84217c Mon Sep 17 00:00:00 2001 From: Stefan Doorn Date: Mon, 24 Aug 2020 14:37:40 +0200 Subject: [PATCH 1/2] Split big sitemaps & generate proper index (closes #79, #106) --- src/Builder/SitemapIndexBuilder.php | 17 +++++++++- src/Command/GenerateSitemapCommand.php | 37 +++++++++++++--------- src/Controller/SitemapController.php | 4 +-- src/Provider/IndexUrlProvider.php | 16 ++++++++-- src/Provider/IndexUrlProviderInterface.php | 2 +- src/Renderer/SitemapRenderer.php | 15 +++++++-- src/Renderer/SitemapRendererInterface.php | 6 +++- src/Routing/SitemapLoader.php | 8 +++-- 8 files changed, 77 insertions(+), 28 deletions(-) diff --git a/src/Builder/SitemapIndexBuilder.php b/src/Builder/SitemapIndexBuilder.php index 8c1f7987..39a0a4de 100644 --- a/src/Builder/SitemapIndexBuilder.php +++ b/src/Builder/SitemapIndexBuilder.php @@ -20,6 +20,9 @@ final class SitemapIndexBuilder implements SitemapIndexBuilderInterface /** @var array */ private $indexProviders = []; + /** @var array */ + private $paths = []; + public function __construct(SitemapIndexFactoryInterface $sitemapIndexFactory) { $this->sitemapIndexFactory = $sitemapIndexFactory; @@ -41,6 +44,18 @@ public function addIndexProvider(IndexUrlProviderInterface $provider): void $this->indexProviders[] = $provider; } + /** + * {@inheritdoc} + */ + public function addPath(UrlProviderInterface $provider, string $path): void + { + if (!array_key_exists($provider->getName(), $this->paths)) { + $this->paths[$provider->getName()] = []; + } + + $this->paths[$provider->getName()][] = $path; + } + /** * {@inheritdoc} */ @@ -53,7 +68,7 @@ public function build(): SitemapInterface foreach ($this->indexProviders as $indexProvider) { /** @var UrlProviderInterface $provider */ foreach ($this->providers as $provider) { - $indexProvider->addProvider($provider); + $indexProvider->addProvider($provider, $this->paths[$provider->getName()]); } $urls[] = $indexProvider->generate(); diff --git a/src/Command/GenerateSitemapCommand.php b/src/Command/GenerateSitemapCommand.php index f42e02de..833947f8 100644 --- a/src/Command/GenerateSitemapCommand.php +++ b/src/Command/GenerateSitemapCommand.php @@ -56,43 +56,50 @@ public function __construct( protected function configure(): void { $this->addOption('channel', 'c', InputOption::VALUE_IS_ARRAY | InputOption::VALUE_OPTIONAL, 'Channel codes to generate. If none supplied, all channels will generated.'); + $this->addOption('limit', 'l', InputOption::VALUE_OPTIONAL, 'Limit amount of URLs per sitemap', 50000); } protected function execute(InputInterface $input, OutputInterface $output) { foreach ($this->channels($input) as $channel) { - $this->executeChannel($channel, $output); + $this->executeChannel($channel, $input, $output); } } - private function executeChannel(ChannelInterface $channel, OutputInterface $output) + private function executeChannel(ChannelInterface $channel, InputInterface $input, OutputInterface $output) { - // TODO make sure providers are every time emptied (reset call or smth?) foreach ($this->sitemapBuilder->getProviders() as $provider) { $output->writeln(\sprintf('Start generating sitemap "%s" for channel "%s"', $provider->getName(), $channel->getCode())); $sitemap = $this->sitemapBuilder->build($provider, $channel); // TODO use provider instance, not the name - $xml = $this->sitemapRenderer->render($sitemap); - $path = $path = $this->path($channel, \sprintf('%s.xml', $provider->getName())); + $xml = $this->sitemapRenderer->render($sitemap, (int)$input->getOption('limit')); + foreach($xml as $index => $data) { + $path = $path = $this->path($channel, \sprintf('%s_%d.xml', $provider->getName(), $index)); - $this->writer->write( - $path, - $xml - ); + $this->writer->write( + $path, + $data + ); - $output->writeln(\sprintf('Finished generating sitemap "%s" for channel "%s" at path "%s"', $provider->getName(), $channel->getCode(), $path)); + $output->writeln(\sprintf('Finished generating sitemap "%s" (%d) for channel "%s" at path "%s"', $provider->getName(), $index, $channel->getCode(), $path)); + + $this->sitemapIndexBuilder->addPath($provider, $path); + } } $output->writeln(\sprintf('Start generating sitemap index for channel "%s"', $channel->getCode())); $sitemap = $this->sitemapIndexBuilder->build(); $xml = $this->sitemapIndexRenderer->render($sitemap); - $path = $this->path($channel, 'sitemap_index.xml'); - $this->writer->write( - $path, - $xml - ); + foreach($xml as $index => $data) { + $path = $this->path($channel, 'sitemap_index.xml'); + + $this->writer->write( + $path, + $data + ); + } $output->writeln(\sprintf('Finished generating sitemap index for channel "%s" at path "%s"', $channel->getCode(), $path)); } diff --git a/src/Controller/SitemapController.php b/src/Controller/SitemapController.php index cea3f73c..7777bc6f 100644 --- a/src/Controller/SitemapController.php +++ b/src/Controller/SitemapController.php @@ -22,9 +22,9 @@ public function __construct( parent::__construct($reader); } - public function showAction(string $name): Response + public function showAction(string $name, int $index): Response { - $path = \sprintf('%s/%s', $this->channelContext->getChannel()->getCode(), \sprintf('%s.xml', $name)); + $path = \sprintf('%s/%s', $this->channelContext->getChannel()->getCode(), \sprintf('%s_%d.xml', $name, $index)); return $this->createResponse($path); } diff --git a/src/Provider/IndexUrlProvider.php b/src/Provider/IndexUrlProvider.php index 2fd2a491..7187c14d 100644 --- a/src/Provider/IndexUrlProvider.php +++ b/src/Provider/IndexUrlProvider.php @@ -21,6 +21,9 @@ final class IndexUrlProvider implements IndexUrlProviderInterface /** @var array */ private $urls = []; + /** @var array */ + private $paths = []; + public function __construct( RouterInterface $router, IndexUrlFactoryInterface $sitemapIndexUrlFactory @@ -29,17 +32,24 @@ public function __construct( $this->sitemapIndexUrlFactory = $sitemapIndexUrlFactory; } - public function addProvider(UrlProviderInterface $provider): void + public function addProvider(UrlProviderInterface $provider, array $paths = []): void { $this->providers[] = $provider; + $this->paths[$provider->getName()] = $paths; } public function generate(): iterable { foreach ($this->providers as $provider) { - $location = $this->router->generate('sylius_sitemap_' . $provider->getName()); + $pathCount = count($this->paths[$provider->getName()]); + + for ($i = 0; $i < $pathCount; $i++) { + $params = ['index' => $i]; + + $location = $this->router->generate('sylius_sitemap_'.$provider->getName(), $params); - $this->urls[] = $this->sitemapIndexUrlFactory->createNew($location); + $this->urls[] = $this->sitemapIndexUrlFactory->createNew($location); + } } return $this->urls; diff --git a/src/Provider/IndexUrlProviderInterface.php b/src/Provider/IndexUrlProviderInterface.php index 7f896c03..a2f77117 100644 --- a/src/Provider/IndexUrlProviderInterface.php +++ b/src/Provider/IndexUrlProviderInterface.php @@ -11,5 +11,5 @@ interface IndexUrlProviderInterface */ public function generate(): iterable; - public function addProvider(UrlProviderInterface $provider): void; + public function addProvider(UrlProviderInterface $provider, array $paths = []): void; } diff --git a/src/Renderer/SitemapRenderer.php b/src/Renderer/SitemapRenderer.php index 33c09e20..eb6fd1e5 100644 --- a/src/Renderer/SitemapRenderer.php +++ b/src/Renderer/SitemapRenderer.php @@ -19,8 +19,19 @@ public function __construct(RendererAdapterInterface $adapter) /** * {@inheritdoc} */ - public function render(SitemapInterface $sitemap): string + public function render(SitemapInterface $sitemap, ?int $limit = null): iterable { - return $this->adapter->render($sitemap); + $urls = $sitemap->getUrls(); + $total = count($urls); + + if (null === $limit || $limit < 0) { + $limit = $total; + } + + foreach(array_chunk($urls, $limit) as $slice) { + $sitemap->setUrls($slice); + + yield $this->adapter->render($sitemap); + } } } diff --git a/src/Renderer/SitemapRendererInterface.php b/src/Renderer/SitemapRendererInterface.php index 8c426b66..5192d18c 100644 --- a/src/Renderer/SitemapRendererInterface.php +++ b/src/Renderer/SitemapRendererInterface.php @@ -8,5 +8,9 @@ interface SitemapRendererInterface { - public function render(SitemapInterface $sitemap): string; + + /** + * @return string[] + */ + public function render(SitemapInterface $sitemap, ?int $limit = null): iterable; } diff --git a/src/Routing/SitemapLoader.php b/src/Routing/SitemapLoader.php index 08558dc0..c5ebfe87 100644 --- a/src/Routing/SitemapLoader.php +++ b/src/Routing/SitemapLoader.php @@ -34,7 +34,7 @@ public function __construct(SitemapBuilderInterface $sitemapBuilder) public function load($resource, $type = null): RouteCollection { $routes = new RouteCollection(); - + if (true === $this->loaded) { return $routes; } @@ -50,12 +50,14 @@ public function load($resource, $type = null): RouteCollection $routes->add( $name, new Route( - '/sitemap/' . $provider->getName() . '.xml', + '/sitemap/' . $provider->getName() . '/{index}.xml', [ '_controller' => 'sylius.controller.sitemap:showAction', 'name' => $provider->getName(), ], - [], + [ + 'index' => '\d+', + ], [], '', [], From 13bdaa38c179b0ad218ee29bdbb82c726d01da3e Mon Sep 17 00:00:00 2001 From: Stefan Doorn Date: Mon, 24 Aug 2020 14:43:29 +0200 Subject: [PATCH 2/2] Update UPGRADE --- UPGRADE-2.0.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/UPGRADE-2.0.md b/UPGRADE-2.0.md index e3386bde..89ef406c 100644 --- a/UPGRADE-2.0.md +++ b/UPGRADE-2.0.md @@ -12,8 +12,9 @@ ## New features * Generation of sitemaps is done via the CLI, schedule them in a cronjob: - * Sitemap Index: `bin/console sylius:sitemap:generate-index` + * Sitemap Index: `bin/console sylius:sitemap:generate` * Sitemap URLs now support adding images. The default providers do this where possible. It can be disabled using the `images` configuration key. +* Sitemaps are split automatically per 50.000 URL's. This can be adjusted by providing `--limit={int}` to the CLI command. ## Removed features