From 898296f7e7d059ad02e8cc3ab2039c177c39a51e Mon Sep 17 00:00:00 2001 From: Alex Skrypnyk Date: Fri, 21 Feb 2020 14:17:04 +1100 Subject: [PATCH] Refactored updating of minisite instances to use batch and fetch archives. --- minisite.install | 200 +++++++++++++----- minisite.module | 61 ++++++ .../UploadBrowseAliasPathautoTest.php | 2 +- 3 files changed, 209 insertions(+), 54 deletions(-) diff --git a/minisite.install b/minisite.install index 46c468a..c108fa4 100644 --- a/minisite.install +++ b/minisite.install @@ -7,8 +7,8 @@ use Drupal\Core\Database\Database; use Drupal\Core\File\FileSystemInterface; -use Drupal\field\Entity\FieldStorageConfig; use Drupal\minisite\Minisite; +use Symfony\Component\HttpFoundation\Request; /** * Implements hook_uninstall(). @@ -207,79 +207,173 @@ function minisite_update_8001() { } /** - * Updates mime type and file size DB records for all Minisite assets. + * Re-saves all Minisite instances to populate the database with asset links. */ -function minisite_update_8013(&$sandbox) { - $batch_size = 10; - - if (!isset($sandbox['current_minisite_field_id'])) { - $minisite_field_ids = []; - - // Get all the entity reference revisions fields. - $map = \Drupal::service('entity_field.manager')->getFieldMapByFieldType('minisite'); - foreach ($map as $entity_type_id => $info) { - foreach ($info as $name => $data) { - if (FieldStorageConfig::loadByName($entity_type_id, $name)->getSetting('target_type') == 'file') { - $minisite_field_ids[] = "$entity_type_id.$name"; - } - } - } +function minisite_update_8002(&$sandbox) { + // Number of Minisite instances to process in a single batch. + // trying to keep this as low as possible as some minisites may have + // a lot of assets that will take time and resources to process. + $batch_size = getenv('MINISITE_UPDATE_BATCH_SIZE') ?: 1; + + if (!isset($sandbox['info'])) { + module_load_include('module', 'minisite'); + + $info = minisite_get_info_all(TRUE); + + if (empty($info)) { + $sandbox['#finished'] = 1; - if (!$minisite_field_ids) { - // There are no minisite fields. Return before initializing the sandbox. - return; + return t('There are no Minisite fields with content in any available entity types.'); } - // Initialize the sandbox. - $sandbox['current_minisite_field_id'] = 0; - $sandbox['minisite_field_ids'] = $minisite_field_ids; - $sandbox['max'] = count($minisite_field_ids); + $sandbox['info'] = $info; + $sandbox['max'] = count($info); $sandbox['progress'] = 0; $sandbox['updated'] = 0; } - /** @var \Drupal\field\FieldStorageConfigInterface $field_storage */ - $field_storage = FieldStorageConfig::load($sandbox['minisite_field_ids'][$sandbox['current_minisite_field_id']]); + $current_batch_info = array_slice($sandbox['info'], $sandbox['progress'], $batch_size); - $target_entity_type = \Drupal::entityTypeManager()->getDefinition($field_storage->getTargetEntityTypeId()); + $stage_file_proxy_is_enabled = \Drupal::service('module_handler')->moduleExists('stage_file_proxy'); - $id = $target_entity_type->getKey('id'); - $entity_ids = \Drupal::entityQuery($field_storage->getTargetEntityTypeId()) - ->condition($field_storage->getName(), NULL, 'IS NOT NULL') - ->range($sandbox['progress'], $batch_size) - ->sort($id, 'ASC') - ->accessCheck(FALSE) - ->execute(); + $messages = []; + foreach ($current_batch_info as $info) { + $sandbox['progress']++; - foreach ($entity_ids as $entity_id) { - $host_entity = \Drupal::entityTypeManager() - ->getStorage($field_storage->getTargetEntityTypeId()) - ->load($entity_id); + list($entity_type, $field_name, $entity_id) = explode('__', $info); + + $messages[] = t('Processing Minisite for field @field_name attached to @entity_type with ID @entity_id.', [ + '@entity_type' => $entity_type, + '@field_name' => $field_name, + '@entity_id' => $entity_id, + ]); + + $host_entity = \Drupal::entityTypeManager()->getStorage($entity_type)->load($entity_id); + $field_item_list = $host_entity->get($field_name); + + // Before proceeding with minisite instantiation, we need to check that + // the archive file is available in this environment and do our best effort + // to fetch files from origin location (works only on the environments with + // stage_file_proxy enabled). + /** @var \Drupal\file\Entity\File $archive_file */ + $archive_file = $field_item_list->entity; + $archive_file_uri = $archive_file->getFileUri(); + $archive_file_absolute_url = $archive_file->createFileUrl(FALSE); + if (!is_readable($archive_file_uri)) { + if (!$stage_file_proxy_is_enabled) { + $messages[] = ' ' . t('SKIPPED: Archive file is missing in this environment and stage_file_proxy module is not enabled.'); + continue; + } + + _minisite_install_stage_file_proxy_fetch($archive_file_absolute_url); + if (!is_readable($archive_file_uri)) { + $messages[] = ' ' . t('SKIPPED: Unable to fetch archive file @uri.', [ + '@uri' => $archive_file_uri, + ]); + continue; + } + + $messages[] = ' ' . t('Fetched archive file @uri.', [ + '@uri' => $archive_file_uri, + ]); + } - $field_item_list = $host_entity->get($field_storage->getName()); $minisite = Minisite::createInstance($field_item_list); if ($minisite) { $minisite->save(); $sandbox['updated']++; + $messages[] = ' ' . t('SUCCESS: Updated Ministe.'); + } + else { + $messages[] = ' ' . t('SKIPPED: Unable to process Ministe.'); } } - if (count($entity_ids) < $batch_size) { - $sandbox['current_minisite_field_id']++; - $sandbox['progress'] = 0; + $sandbox['#finished'] = $sandbox['progress'] / $sandbox['max']; + + return t('Processed @processed of @total and updated @updated Minisite instances: @messages', [ + '@total' => $sandbox['max'], + '@processed' => $sandbox['progress'], + '@updated' => $sandbox['updated'], + '@messages' => PHP_EOL . implode(PHP_EOL, $messages), + ]); +} + +/** + * Fetch file from the specified URI using stage_file_proxy fetcher. + * + * This is a shortened version of + * \Drupal\stage_file_proxy\EventSubscriber\ProxySubscriber::checkFileOrigin. + * + * Unfortunately, it is not possible to use functionality of stage_file_proxy + * transparently within update hooks (send request from the update hook to the + * same server) as they can be ran via CLI, in which case it is possible that + * the request may not reach the website (for example, if PHP runs in a Docker + * container separate to the web-server and the internal name of the web-server + * is not the same as site's external URI). + * + * It is also not possible to simply craft a stub request and pass it to the + * stage_file_proxy's ProxySubscriber::checkFileOrigin method, since the method + * uses 'exit' to perform a redirect (the logic is not separated from the + * request stack handling), which will terminate the process itself. + * + * The only viable solution is to use stage_file_proxy's fetcher manager with + * additionally added logic taken from ProxySubscriber::checkFileOrigin method. + * + * @param string $url + * Absolute URL to the file to download. Absolute is required in order to + * check that this is not an origin server. + * + * @return bool + * TRUE if file was downloaded, FALSE otherwise. + * + * @see \Drupal\stage_file_proxy\EventSubscriber\ProxySubscriber::checkFileOrigin + */ +function _minisite_install_stage_file_proxy_fetch($url) { + $config = \Drupal::configFactory()->get('stage_file_proxy.settings'); + + // Get the origin server. + $server = $config->get('origin'); + + // Quit if no origin given. + if (!$server) { + return FALSE; } - else { - $sandbox['progress'] += $batch_size; + + $request = Request::create($url); + + // Quit if we are the origin, ignore http(s). + $current_host = $request->getHost(); + if (preg_replace('#^[a-z]*://#u', '', $server) === $current_host) { + return FALSE; } - // Update #finished, 1 if the the whole update has finished. - $sandbox['#finished'] = empty($sandbox['max']) ? 1 : ($sandbox['current_minisite_field_id'] / $sandbox['max']); - - if ($sandbox['#finished'] < 1) { - return t('Processed @processed of @total and updated @updated assets', [ - '@total' => $sandbox['max'], - '@processed' => $sandbox['progress'], - '@updated' => $sandbox['updated'], - ]); + + $fetch_manager = \Drupal::getContainer()->get('stage_file_proxy.fetch_manager'); + + $file_dir = $fetch_manager->filePublicPath(); + $request_path = $request->getPathInfo(); + + $request_path = mb_substr($request_path, 1); + + if (strpos($request_path, '' . $file_dir) !== 0) { + return FALSE; + } + + // Note if the origin server files location is different. This + // must be the exact path for the remote site's public file + // system path, and defaults to the local public file system path. + $remote_file_dir = trim($config->get('origin_dir')); + if (!$remote_file_dir) { + $remote_file_dir = $file_dir; } + + $request_path = rawurldecode($request_path); + $relative_path = mb_substr($request_path, mb_strlen($file_dir) + 1); + + $options = [ + 'verify' => $config->get('verify'), + ]; + + return $fetch_manager->fetch($server, $remote_file_dir, $relative_path, $options); } diff --git a/minisite.module b/minisite.module index 51087a0..2a74c86 100644 --- a/minisite.module +++ b/minisite.module @@ -10,6 +10,7 @@ use Drupal\Core\Entity\FieldableEntityInterface; use Drupal\Core\Field\FieldDefinitionInterface; use Drupal\Core\Routing\RouteMatchInterface; use Drupal\Core\Url; +use Drupal\field\Entity\FieldStorageConfig; use Drupal\file\FileInterface; use Drupal\minisite\AssetInterface; use Drupal\minisite\Exception\ArchiveException; @@ -128,6 +129,66 @@ function minisite_path_update($path) { minisite_path_insert($path); } +/** + * Get information about all entities that have a minisite archive attached. + * + * @param bool $flatten + * (optional) Flatten a list instead of providing as a tree. + * Defaults to FALSE. + * @param string $flatten_delimiter + * (optional) The delimiter to use if $flatten is TRUE. Defaults to '__'. + * + * @return array + * If $flatten is FALSE, returns a multidimentional array with hierarchy + * entity_type => field_name => id. + * If $flatten is TRUE, returns an array of flatenned values in format + * "entity_type__field_name__entity_id". + */ +function minisite_get_info_all($flatten = FALSE, $flatten_delimiter = '__') { + $info = []; + + // Collect all entity types with field names. + $map = \Drupal::service('entity_field.manager')->getFieldMapByFieldType('minisite'); + foreach ($map as $entity_type_id => $entity_type_info) { + foreach (array_keys($entity_type_info) as $name) { + if (FieldStorageConfig::loadByName($entity_type_id, $name)->getSetting('target_type') == 'file') { + $minisite_field_ids[] = "$entity_type_id.$name"; + } + } + } + + foreach ($minisite_field_ids as $minisite_field_id) { + /** @var \Drupal\field\FieldStorageConfigInterface $field_storage */ + $field_storage = FieldStorageConfig::load($minisite_field_id); + + $entity_type_id = $field_storage->getTargetEntityTypeId(); + $field_name = $field_storage->getName(); + + $target_entity_type_definition = \Drupal::entityTypeManager()->getDefinition($entity_type_id); + $id_key = $target_entity_type_definition->getKey('id'); + + // Get all entities for this field that have values. + $entity_ids = \Drupal::entityQuery($entity_type_id) + ->condition($field_name, NULL, 'IS NOT NULL') + ->sort($id_key, 'ASC') + ->accessCheck(FALSE) + ->execute(); + + if ($entity_ids) { + if ($flatten) { + foreach ($entity_ids as $entity_id) { + $info[] = $entity_type_id . $flatten_delimiter . $field_name . $flatten_delimiter . $entity_id; + } + } + else { + $info[$entity_type_id][$field_name] = array_values($entity_ids); + } + } + } + + return $info; +} + /** * Get Minisite fields from the entity. * diff --git a/tests/src/Functional/UploadBrowseAliasPathautoTest.php b/tests/src/Functional/UploadBrowseAliasPathautoTest.php index 1bbf625..0725dd4 100644 --- a/tests/src/Functional/UploadBrowseAliasPathautoTest.php +++ b/tests/src/Functional/UploadBrowseAliasPathautoTest.php @@ -45,7 +45,7 @@ public function testUploadAndBrowsingAlias() { $minisite_description = 'D' . $this->randomMachineName(); // Create pathauto pattern. - $this->createPathautoPattern('node', mb_strtolower($this->randomMachineName()) . '/' . '[node:title]'); + $this->createPattern('node', mb_strtolower($this->randomMachineName()) . '/' . '[node:title]'); // Create a field and a node with Pathauto enabled. $edit = [