Skip to content

Commit

Permalink
Refactored updating of minisite instances to use batch and fetch arch…
Browse files Browse the repository at this point in the history
…ives.
  • Loading branch information
Alex Skrypnyk committed Feb 21, 2020
1 parent 9dbc65c commit 898296f
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 54 deletions.
200 changes: 147 additions & 53 deletions minisite.install
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

use Drupal\Core\Database\Database;
use Drupal\Core\File\FileSystemInterface;
use Drupal\field\Entity\FieldStorageConfig;
use Drupal\minisite\Minisite;
use Symfony\Component\HttpFoundation\Request;

/**
* Implements hook_uninstall().
Expand Down Expand Up @@ -207,79 +207,173 @@ function minisite_update_8001() {
}

/**
* Updates mime type and file size DB records for all Minisite assets.
* Re-saves all Minisite instances to populate the database with asset links.
*/
function minisite_update_8013(&$sandbox) {
$batch_size = 10;

if (!isset($sandbox['current_minisite_field_id'])) {
$minisite_field_ids = [];

// Get all the entity reference revisions fields.
$map = \Drupal::service('entity_field.manager')->getFieldMapByFieldType('minisite');
foreach ($map as $entity_type_id => $info) {
foreach ($info as $name => $data) {
if (FieldStorageConfig::loadByName($entity_type_id, $name)->getSetting('target_type') == 'file') {
$minisite_field_ids[] = "$entity_type_id.$name";
}
}
}
function minisite_update_8002(&$sandbox) {
// Number of Minisite instances to process in a single batch.
// trying to keep this as low as possible as some minisites may have
// a lot of assets that will take time and resources to process.
$batch_size = getenv('MINISITE_UPDATE_BATCH_SIZE') ?: 1;

if (!isset($sandbox['info'])) {
module_load_include('module', 'minisite');

$info = minisite_get_info_all(TRUE);

if (empty($info)) {
$sandbox['#finished'] = 1;

if (!$minisite_field_ids) {
// There are no minisite fields. Return before initializing the sandbox.
return;
return t('There are no Minisite fields with content in any available entity types.');
}

// Initialize the sandbox.
$sandbox['current_minisite_field_id'] = 0;
$sandbox['minisite_field_ids'] = $minisite_field_ids;
$sandbox['max'] = count($minisite_field_ids);
$sandbox['info'] = $info;
$sandbox['max'] = count($info);
$sandbox['progress'] = 0;
$sandbox['updated'] = 0;
}

/** @var \Drupal\field\FieldStorageConfigInterface $field_storage */
$field_storage = FieldStorageConfig::load($sandbox['minisite_field_ids'][$sandbox['current_minisite_field_id']]);
$current_batch_info = array_slice($sandbox['info'], $sandbox['progress'], $batch_size);

$target_entity_type = \Drupal::entityTypeManager()->getDefinition($field_storage->getTargetEntityTypeId());
$stage_file_proxy_is_enabled = \Drupal::service('module_handler')->moduleExists('stage_file_proxy');

$id = $target_entity_type->getKey('id');
$entity_ids = \Drupal::entityQuery($field_storage->getTargetEntityTypeId())
->condition($field_storage->getName(), NULL, 'IS NOT NULL')
->range($sandbox['progress'], $batch_size)
->sort($id, 'ASC')
->accessCheck(FALSE)
->execute();
$messages = [];
foreach ($current_batch_info as $info) {
$sandbox['progress']++;

foreach ($entity_ids as $entity_id) {
$host_entity = \Drupal::entityTypeManager()
->getStorage($field_storage->getTargetEntityTypeId())
->load($entity_id);
list($entity_type, $field_name, $entity_id) = explode('__', $info);

$messages[] = t('Processing Minisite for field @field_name attached to @entity_type with ID @entity_id.', [
'@entity_type' => $entity_type,
'@field_name' => $field_name,
'@entity_id' => $entity_id,
]);

$host_entity = \Drupal::entityTypeManager()->getStorage($entity_type)->load($entity_id);
$field_item_list = $host_entity->get($field_name);

// Before proceeding with minisite instantiation, we need to check that
// the archive file is available in this environment and do our best effort
// to fetch files from origin location (works only on the environments with
// stage_file_proxy enabled).
/** @var \Drupal\file\Entity\File $archive_file */
$archive_file = $field_item_list->entity;
$archive_file_uri = $archive_file->getFileUri();
$archive_file_absolute_url = $archive_file->createFileUrl(FALSE);
if (!is_readable($archive_file_uri)) {
if (!$stage_file_proxy_is_enabled) {
$messages[] = ' ' . t('SKIPPED: Archive file is missing in this environment and stage_file_proxy module is not enabled.');
continue;
}

_minisite_install_stage_file_proxy_fetch($archive_file_absolute_url);
if (!is_readable($archive_file_uri)) {
$messages[] = ' ' . t('SKIPPED: Unable to fetch archive file @uri.', [
'@uri' => $archive_file_uri,
]);
continue;
}

$messages[] = ' ' . t('Fetched archive file @uri.', [
'@uri' => $archive_file_uri,
]);
}

$field_item_list = $host_entity->get($field_storage->getName());
$minisite = Minisite::createInstance($field_item_list);

if ($minisite) {
$minisite->save();
$sandbox['updated']++;
$messages[] = ' ' . t('SUCCESS: Updated Ministe.');
}
else {
$messages[] = ' ' . t('SKIPPED: Unable to process Ministe.');
}
}

if (count($entity_ids) < $batch_size) {
$sandbox['current_minisite_field_id']++;
$sandbox['progress'] = 0;
$sandbox['#finished'] = $sandbox['progress'] / $sandbox['max'];

return t('Processed @processed of @total and updated @updated Minisite instances: @messages', [
'@total' => $sandbox['max'],
'@processed' => $sandbox['progress'],
'@updated' => $sandbox['updated'],
'@messages' => PHP_EOL . implode(PHP_EOL, $messages),
]);
}

/**
* Fetch file from the specified URI using stage_file_proxy fetcher.
*
* This is a shortened version of
* \Drupal\stage_file_proxy\EventSubscriber\ProxySubscriber::checkFileOrigin.
*
* Unfortunately, it is not possible to use functionality of stage_file_proxy
* transparently within update hooks (send request from the update hook to the
* same server) as they can be ran via CLI, in which case it is possible that
* the request may not reach the website (for example, if PHP runs in a Docker
* container separate to the web-server and the internal name of the web-server
* is not the same as site's external URI).
*
* It is also not possible to simply craft a stub request and pass it to the
* stage_file_proxy's ProxySubscriber::checkFileOrigin method, since the method
* uses 'exit' to perform a redirect (the logic is not separated from the
* request stack handling), which will terminate the process itself.
*
* The only viable solution is to use stage_file_proxy's fetcher manager with
* additionally added logic taken from ProxySubscriber::checkFileOrigin method.
*
* @param string $url
* Absolute URL to the file to download. Absolute is required in order to
* check that this is not an origin server.
*
* @return bool
* TRUE if file was downloaded, FALSE otherwise.
*
* @see \Drupal\stage_file_proxy\EventSubscriber\ProxySubscriber::checkFileOrigin
*/
function _minisite_install_stage_file_proxy_fetch($url) {
$config = \Drupal::configFactory()->get('stage_file_proxy.settings');

// Get the origin server.
$server = $config->get('origin');

// Quit if no origin given.
if (!$server) {
return FALSE;
}
else {
$sandbox['progress'] += $batch_size;

$request = Request::create($url);

// Quit if we are the origin, ignore http(s).
$current_host = $request->getHost();
if (preg_replace('#^[a-z]*://#u', '', $server) === $current_host) {
return FALSE;
}
// Update #finished, 1 if the the whole update has finished.
$sandbox['#finished'] = empty($sandbox['max']) ? 1 : ($sandbox['current_minisite_field_id'] / $sandbox['max']);

if ($sandbox['#finished'] < 1) {
return t('Processed @processed of @total and updated @updated assets', [
'@total' => $sandbox['max'],
'@processed' => $sandbox['progress'],
'@updated' => $sandbox['updated'],
]);

$fetch_manager = \Drupal::getContainer()->get('stage_file_proxy.fetch_manager');

$file_dir = $fetch_manager->filePublicPath();
$request_path = $request->getPathInfo();

$request_path = mb_substr($request_path, 1);

if (strpos($request_path, '' . $file_dir) !== 0) {
return FALSE;
}

// Note if the origin server files location is different. This
// must be the exact path for the remote site's public file
// system path, and defaults to the local public file system path.
$remote_file_dir = trim($config->get('origin_dir'));
if (!$remote_file_dir) {
$remote_file_dir = $file_dir;
}

$request_path = rawurldecode($request_path);
$relative_path = mb_substr($request_path, mb_strlen($file_dir) + 1);

$options = [
'verify' => $config->get('verify'),
];

return $fetch_manager->fetch($server, $remote_file_dir, $relative_path, $options);
}
61 changes: 61 additions & 0 deletions minisite.module
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use Drupal\Core\Entity\FieldableEntityInterface;
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\Core\Routing\RouteMatchInterface;
use Drupal\Core\Url;
use Drupal\field\Entity\FieldStorageConfig;
use Drupal\file\FileInterface;
use Drupal\minisite\AssetInterface;
use Drupal\minisite\Exception\ArchiveException;
Expand Down Expand Up @@ -128,6 +129,66 @@ function minisite_path_update($path) {
minisite_path_insert($path);
}

/**
* Get information about all entities that have a minisite archive attached.
*
* @param bool $flatten
* (optional) Flatten a list instead of providing as a tree.
* Defaults to FALSE.
* @param string $flatten_delimiter
* (optional) The delimiter to use if $flatten is TRUE. Defaults to '__'.
*
* @return array
* If $flatten is FALSE, returns a multidimentional array with hierarchy
* entity_type => field_name => id.
* If $flatten is TRUE, returns an array of flatenned values in format
* "entity_type__field_name__entity_id".
*/
function minisite_get_info_all($flatten = FALSE, $flatten_delimiter = '__') {
$info = [];

// Collect all entity types with field names.
$map = \Drupal::service('entity_field.manager')->getFieldMapByFieldType('minisite');
foreach ($map as $entity_type_id => $entity_type_info) {
foreach (array_keys($entity_type_info) as $name) {
if (FieldStorageConfig::loadByName($entity_type_id, $name)->getSetting('target_type') == 'file') {
$minisite_field_ids[] = "$entity_type_id.$name";
}
}
}

foreach ($minisite_field_ids as $minisite_field_id) {
/** @var \Drupal\field\FieldStorageConfigInterface $field_storage */
$field_storage = FieldStorageConfig::load($minisite_field_id);

$entity_type_id = $field_storage->getTargetEntityTypeId();
$field_name = $field_storage->getName();

$target_entity_type_definition = \Drupal::entityTypeManager()->getDefinition($entity_type_id);
$id_key = $target_entity_type_definition->getKey('id');

// Get all entities for this field that have values.
$entity_ids = \Drupal::entityQuery($entity_type_id)
->condition($field_name, NULL, 'IS NOT NULL')
->sort($id_key, 'ASC')
->accessCheck(FALSE)
->execute();

if ($entity_ids) {
if ($flatten) {
foreach ($entity_ids as $entity_id) {
$info[] = $entity_type_id . $flatten_delimiter . $field_name . $flatten_delimiter . $entity_id;
}
}
else {
$info[$entity_type_id][$field_name] = array_values($entity_ids);
}
}
}

return $info;
}

/**
* Get Minisite fields from the entity.
*
Expand Down
2 changes: 1 addition & 1 deletion tests/src/Functional/UploadBrowseAliasPathautoTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public function testUploadAndBrowsingAlias() {
$minisite_description = 'D' . $this->randomMachineName();

// Create pathauto pattern.
$this->createPathautoPattern('node', mb_strtolower($this->randomMachineName()) . '/' . '[node:title]');
$this->createPattern('node', mb_strtolower($this->randomMachineName()) . '/' . '[node:title]');

// Create a field and a node with Pathauto enabled.
$edit = [
Expand Down

0 comments on commit 898296f

Please sign in to comment.