From 68111ba9d9f869d483853ab35e9a899bc4a72a00 Mon Sep 17 00:00:00 2001 From: Luca Gallinari Date: Wed, 20 Sep 2023 11:03:18 +0200 Subject: [PATCH] Split big sitemaps & generate proper index See https://github.com/stefandoorn/sitemap-plugin/pull/128 --- src/Builder/SitemapIndexBuilder.php | 13 +++++++ src/Builder/SitemapIndexBuilderInterface.php | 3 ++ src/Command/GenerateSitemapCommand.php | 37 ++++++++++++-------- src/Controller/SitemapController.php | 4 +-- src/Provider/IndexUrlProvider.php | 16 +++++++-- src/Provider/IndexUrlProviderInterface.php | 2 ++ src/Renderer/SitemapRenderer.php | 15 ++++++-- src/Renderer/SitemapRendererInterface.php | 5 ++- src/Routing/SitemapLoader.php | 6 ++-- 9 files changed, 77 insertions(+), 24 deletions(-) diff --git a/src/Builder/SitemapIndexBuilder.php b/src/Builder/SitemapIndexBuilder.php index 2343e1b3..1af6eb42 100644 --- a/src/Builder/SitemapIndexBuilder.php +++ b/src/Builder/SitemapIndexBuilder.php @@ -19,6 +19,9 @@ final class SitemapIndexBuilder implements SitemapIndexBuilderInterface /** @var IndexUrlProviderInterface[] */ private array $indexProviders = []; + /** @var array */ + private array $paths = []; + public function __construct(SitemapIndexFactoryInterface $sitemapIndexFactory) { $this->sitemapIndexFactory = $sitemapIndexFactory; @@ -38,12 +41,22 @@ public function addIndexProvider(IndexUrlProviderInterface $indexProvider): void $this->indexProviders[] = $indexProvider; } + public function addPath(UrlProviderInterface $provider, string $path): void + { + if (!array_key_exists($provider->getName(), $this->paths)) { + $this->paths[$provider->getName()] = []; + } + + $this->paths[$provider->getName()][] = $path; + } + public function build(): SitemapInterface { $sitemap = $this->sitemapIndexFactory->createNew(); $urls = []; foreach ($this->indexProviders as $indexProvider) { + $indexProvider->addPaths($this->paths); $urls[] = $indexProvider->generate(); } diff --git a/src/Builder/SitemapIndexBuilderInterface.php b/src/Builder/SitemapIndexBuilderInterface.php index 22f6a700..b4d1fad4 100644 --- a/src/Builder/SitemapIndexBuilderInterface.php +++ b/src/Builder/SitemapIndexBuilderInterface.php @@ -6,10 +6,13 @@ use SitemapPlugin\Model\SitemapInterface; use SitemapPlugin\Provider\IndexUrlProviderInterface; +use SitemapPlugin\Provider\UrlProviderInterface; interface SitemapIndexBuilderInterface extends BuilderInterface { public function addIndexProvider(IndexUrlProviderInterface $indexProvider): void; + public function addPath(UrlProviderInterface $provider, string $path): void; + public function build(): SitemapInterface; } diff --git a/src/Command/GenerateSitemapCommand.php b/src/Command/GenerateSitemapCommand.php index f01c0ba8..b551eadb 100644 --- a/src/Command/GenerateSitemapCommand.php +++ b/src/Command/GenerateSitemapCommand.php @@ -55,18 +55,19 @@ public function __construct( protected function configure(): void { $this->addOption('channel', 'c', InputOption::VALUE_IS_ARRAY | InputOption::VALUE_OPTIONAL, 'Channel codes to generate. If none supplied, all channels will generated.'); + $this->addOption('limit', 'l', InputOption::VALUE_OPTIONAL, 'Limit amount of URLs per sitemap', 50000); } protected function execute(InputInterface $input, OutputInterface $output): int { foreach ($this->channels($input) as $channel) { - $this->executeChannel($channel, $output); + $this->executeChannel($channel, $input, $output); } return 0; } - private function executeChannel(ChannelInterface $channel, OutputInterface $output): void + private function executeChannel(ChannelInterface $channel, InputInterface $input, OutputInterface $output): void { $output->writeln(\sprintf('Start generating sitemaps for channel "%s"', $channel->getName())); @@ -76,27 +77,33 @@ private function executeChannel(ChannelInterface $channel, OutputInterface $outp $output->writeln(\sprintf('Start generating sitemap "%s" for channel "%s"', $provider->getName(), $channel->getCode())); $sitemap = $this->sitemapBuilder->build($provider, $channel); // TODO use provider instance, not the name - $xml = $this->sitemapRenderer->render($sitemap); - $path = $this->path($channel, \sprintf('%s.xml', $provider->getName())); - $this->writer->write( - $path, - $xml - ); - - $output->writeln(\sprintf('Finished generating sitemap "%s" for channel "%s" at path "%s"', $provider->getName(), $channel->getCode(), $path)); + $xml = $this->sitemapRenderer->render($sitemap, (int)$input->getOption('limit')); + foreach($xml as $index => $data) { + $path = $this->path($channel, \sprintf('%s_%d.xml', $provider->getName(), $index)); + $this->writer->write($path, $data); + $output->writeln( + \sprintf( + 'Finished generating sitemap "%s" (%d) for channel "%s" at path "%s"', + $provider->getName(), + $index, + $channel->getCode(), + $path + ) + ); + $provider->addPath($path); + } } $output->writeln(\sprintf('Start generating sitemap index for channel "%s"', $channel->getCode())); $sitemap = $this->sitemapIndexBuilder->build(); $xml = $this->sitemapIndexRenderer->render($sitemap); - $path = $this->path($channel, 'sitemap_index.xml'); - $this->writer->write( - $path, - $xml - ); + foreach($xml as $index => $data) { + $path = $this->path($channel, 'sitemap_index.xml'); + $this->writer->write($path, $data); + } $output->writeln(\sprintf('Finished generating sitemap index for channel "%s" at path "%s"', $channel->getCode(), $path)); } diff --git a/src/Controller/SitemapController.php b/src/Controller/SitemapController.php index af6d7632..6185d46e 100644 --- a/src/Controller/SitemapController.php +++ b/src/Controller/SitemapController.php @@ -21,9 +21,9 @@ public function __construct( parent::__construct($reader); } - public function showAction(string $name): Response + public function showAction(string $name, int $index): Response { - $path = \sprintf('%s/%s', $this->channelContext->getChannel()->getCode(), \sprintf('%s.xml', $name)); + $path = \sprintf('%s/%s', $this->channelContext->getChannel()->getCode(), \sprintf('%s_%d.xml', $name, $index)); return $this->createResponse($path); } diff --git a/src/Provider/IndexUrlProvider.php b/src/Provider/IndexUrlProvider.php index d79970e9..29622446 100644 --- a/src/Provider/IndexUrlProvider.php +++ b/src/Provider/IndexUrlProvider.php @@ -16,6 +16,9 @@ final class IndexUrlProvider implements IndexUrlProviderInterface private IndexUrlFactoryInterface $sitemapIndexUrlFactory; + /** @var array */ + private array $paths = []; + public function __construct( RouterInterface $router, IndexUrlFactoryInterface $sitemapIndexUrlFactory @@ -29,12 +32,21 @@ public function addProvider(UrlProviderInterface $provider): void $this->providers[] = $provider; } + public function addPaths(array $paths): void + { + $this->paths = $paths; + } + public function generate(): iterable { $urls = []; foreach ($this->providers as $provider) { - $location = $this->router->generate('sylius_sitemap_' . $provider->getName()); - $urls[] = $this->sitemapIndexUrlFactory->createNew($location); + $pathCount = count($this->paths[$provider->getName()]); + for ($i = 0; $i < $pathCount; $i++) { + $params = ['index' => $i]; + $location = $this->router->generate('sylius_sitemap_'.$provider->getName(), $params); + $urls[] = $this->sitemapIndexUrlFactory->createNew($location); + } } return $urls; diff --git a/src/Provider/IndexUrlProviderInterface.php b/src/Provider/IndexUrlProviderInterface.php index ee8ae8d3..0480acb2 100644 --- a/src/Provider/IndexUrlProviderInterface.php +++ b/src/Provider/IndexUrlProviderInterface.php @@ -9,4 +9,6 @@ interface IndexUrlProviderInterface public function generate(): iterable; public function addProvider(UrlProviderInterface $provider): void; + + public function addPaths(array $paths): void; } diff --git a/src/Renderer/SitemapRenderer.php b/src/Renderer/SitemapRenderer.php index b81acb56..0e05a703 100644 --- a/src/Renderer/SitemapRenderer.php +++ b/src/Renderer/SitemapRenderer.php @@ -15,8 +15,19 @@ public function __construct(RendererAdapterInterface $adapter) $this->adapter = $adapter; } - public function render(SitemapInterface $sitemap): string + public function render(SitemapInterface $sitemap, ?int $limit = null): iterable { - return $this->adapter->render($sitemap); + $urls = $sitemap->getUrls(); + $total = count($urls); + + if (null === $limit || $limit < 0) { + $limit = $total; + } + + foreach(array_chunk($urls, $limit) as $slice) { + $sitemap->setUrls($slice); + + yield $this->adapter->render($sitemap); + } } } diff --git a/src/Renderer/SitemapRendererInterface.php b/src/Renderer/SitemapRendererInterface.php index 8c426b66..55c264d9 100644 --- a/src/Renderer/SitemapRendererInterface.php +++ b/src/Renderer/SitemapRendererInterface.php @@ -8,5 +8,8 @@ interface SitemapRendererInterface { - public function render(SitemapInterface $sitemap): string; + /** + * @return string[] + */ + public function render(SitemapInterface $sitemap, ?int $limit = null): iterable; } diff --git a/src/Routing/SitemapLoader.php b/src/Routing/SitemapLoader.php index cc528714..0031d316 100644 --- a/src/Routing/SitemapLoader.php +++ b/src/Routing/SitemapLoader.php @@ -40,12 +40,14 @@ public function load($resource, $type = null) $routes->add( $name, new Route( - '/sitemap/' . $provider->getName() . '.xml', + '/sitemap/' . $provider->getName() . '/{index}.xml', [ '_controller' => 'sylius.controller.sitemap::showAction', 'name' => $provider->getName(), ], - [], + [ + 'index' => '\d+', + ], [], '', [],