Skip to content

Commit

Permalink
Full sync: Set chunk size of comments dynamically (#41350)
Browse files Browse the repository at this point in the history
* Move filter_objects_and_metadata_by_size to module class and use it by posts and comments

* changelog

* Fixed tests

* changelog

* ID for comments is not an int so let's cast before comparing

* Added tests for comments

* Id field for posts is ID

* Added tests to cover filter_objects_and_metadata_by_size

* Replace deprecated method

* Typo in docblocks

Committed via a GitHub action: https://github.com/Automattic/jetpack/actions/runs/13050060760

Upstream-Ref: Automattic/jetpack@d705b87
  • Loading branch information
darssen authored and matticbot committed Jan 30, 2025
1 parent c9725c4 commit 937b6fe
Show file tree
Hide file tree
Showing 13 changed files with 239 additions and 181 deletions.
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"automattic/jetpack-config": "^3.0.0",
"automattic/jetpack-connection": "^6.3.2-alpha",
"automattic/jetpack-my-jetpack": "^5.4.1-alpha",
"automattic/jetpack-sync": "^4.5.1-alpha",
"automattic/jetpack-sync": "^4.6.0-alpha",
"automattic/jetpack-status": "^5.0.2"
},
"require-dev": {
Expand Down
2 changes: 1 addition & 1 deletion jetpack_vendor/automattic/jetpack-my-jetpack/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"automattic/jetpack-constants": "^3.0.1",
"automattic/jetpack-plans": "^0.5.1",
"automattic/jetpack-status": "^5.0.2",
"automattic/jetpack-sync": "^4.5.1-alpha",
"automattic/jetpack-sync": "^4.6.0-alpha",
"automattic/jetpack-protect-status": "^0.4.2"
},
"require-dev": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"php": ">=7.2",
"automattic/jetpack-connection": "^6.3.2-alpha",
"automattic/jetpack-plugins-installer": "^0.5.0",
"automattic/jetpack-sync": "^4.5.1-alpha",
"automattic/jetpack-sync": "^4.6.0-alpha",
"automattic/jetpack-protect-models": "^0.4.1",
"automattic/jetpack-plans": "^0.5.1"
},
Expand Down
7 changes: 5 additions & 2 deletions jetpack_vendor/automattic/jetpack-sync/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [4.5.1-alpha] - unreleased
## [4.6.0-alpha] - unreleased

This is an alpha version! The changes listed here are not final.

### Added
- Sync: Full Sync comments now send dynamic chunks if chunk size default is too big

### Changed
- Jetpack Sync: Checksum performance optimizations for Meta Sync Module

Expand Down Expand Up @@ -1385,7 +1388,7 @@ This is an alpha version! The changes listed here are not final.

- Packages: Move sync to a classmapped package

[4.5.1-alpha]: https://github.com/Automattic/jetpack-sync/compare/v4.5.0...v4.5.1-alpha
[4.6.0-alpha]: https://github.com/Automattic/jetpack-sync/compare/v4.5.0...v4.6.0-alpha
[4.5.0]: https://github.com/Automattic/jetpack-sync/compare/v4.4.0...v4.5.0
[4.4.0]: https://github.com/Automattic/jetpack-sync/compare/v4.3.0...v4.4.0
[4.3.0]: https://github.com/Automattic/jetpack-sync/compare/v4.2.0...v4.3.0
Expand Down
2 changes: 1 addition & 1 deletion jetpack_vendor/automattic/jetpack-sync/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
"link-template": "https://github.com/Automattic/jetpack-sync/compare/v${old}...v${new}"
},
"branch-alias": {
"dev-trunk": "4.5.x-dev"
"dev-trunk": "4.6.x-dev"
},
"dependencies": {
"test-only": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
*/
class Package_Version {

const PACKAGE_VERSION = '4.5.1-alpha';
const PACKAGE_VERSION = '4.6.0-alpha';

const PACKAGE_SLUG = 'sync';

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,25 @@
* Class to handle sync for comments.
*/
class Comments extends Module {

/**
* Max bytes allowed for full sync upload.
* Current Setting : 7MB.
*
* @access public
*
* @var int
*/
const MAX_SIZE_FULL_SYNC = 7000000;
/**
* Max bytes allowed for post meta_value => length.
* Current Setting : 2MB.
*
* @access public
*
* @var int
*/
const MAX_COMMENT_META_LENGTH = 2000000;
/**
* Sync module name.
*
Expand Down Expand Up @@ -573,10 +592,21 @@ public function get_next_chunk( $config, $status, $chunk_size ) {
}
// Get the comment IDs from the comments that were fetched.
$fetched_comment_ids = wp_list_pluck( $comments, 'comment_ID' );
$metadata = $this->get_metadata( $fetched_comment_ids, 'comment', Settings::get_setting( 'comment_meta_whitelist' ) );

// Filter the comments and metadata based on the maximum size constraints.
list( $filtered_comment_ids, $filtered_comments, $filtered_comments_metadata ) = $this->filter_objects_and_metadata_by_size(
'comment',
$comments,
$metadata,
self::MAX_COMMENT_META_LENGTH, // Replace with appropriate comment meta length constant.
self::MAX_SIZE_FULL_SYNC
);

return array(
'object_ids' => $comment_ids, // Still send the original comment IDs since we need them to update the status.
'objects' => $comments,
'meta' => $this->get_metadata( $fetched_comment_ids, 'comment', Settings::get_setting( 'comment_meta_whitelist' ) ),
'object_ids' => $filtered_comment_ids,
'objects' => $filtered_comments,
'meta' => $filtered_comments_metadata,
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -691,4 +691,62 @@ public function total( $config ) {
public function get_where_sql( $config ) { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable
return '1=1';
}

/**
* Filters objects and metadata based on maximum size constraints.
* It always allows the first object with its metadata, even if they exceed the limit.
*
* @access public
*
* @param string $type The type of objects to filter (e.g., 'post' or 'comment').
* @param array $objects The array of objects to filter (e.g., posts or comments).
* @param array $metadata The array of metadata to filter.
* @param int $max_meta_size Maximum size for individual objects.
* @param int $max_total_size Maximum combined size for objects and metadata.
* @return array An array containing the filtered object IDs, filtered objects, and filtered metadata.
*/
public function filter_objects_and_metadata_by_size( $type, $objects, $metadata, $max_meta_size, $max_total_size ) {
$filtered_objects = array();
$filtered_metadata = array();
$filtered_object_ids = array();
$current_size = 0;

foreach ( $objects as $object ) {
$object_size = strlen( maybe_serialize( $object ) );
$current_metadata = array();
$metadata_size = 0;

foreach ( $metadata as $key => $metadata_item ) {
if ( (int) $metadata_item->{$type . '_id'} === (int) $object->{$this->id_field()} ) {
$metadata_item_size = strlen( maybe_serialize( $metadata_item->meta_value ) );
if ( $metadata_item_size >= $max_meta_size ) {
$metadata_item->meta_value = ''; // Trim metadata if too large.
}
$current_metadata[] = $metadata_item;
$metadata_size += $metadata_item_size >= $max_meta_size ? 0 : $metadata_item_size;

if ( ! empty( $filtered_object_ids ) && ( $current_size + $object_size + $metadata_size ) > $max_total_size ) {
break 2; // Exit both loops.
}
unset( $metadata[ $key ] );
}
}

// Always allow the first object with metadata.
if ( empty( $filtered_object_ids ) || ( $current_size + $object_size + $metadata_size ) <= $max_total_size ) {
$filtered_object_ids[] = strval( $object->{$this->id_field()} );
$filtered_objects[] = $object;
$filtered_metadata = array_merge( $filtered_metadata, $current_metadata );
$current_size += $object_size + $metadata_size;
} else {
break;
}
}

return array(
$filtered_object_ids,
$filtered_objects,
$filtered_metadata,
);
}
}
77 changes: 22 additions & 55 deletions jetpack_vendor/automattic/jetpack-sync/src/modules/class-posts.php
Original file line number Diff line number Diff line change
Expand Up @@ -875,11 +875,29 @@ public function get_next_chunk( $config, $status, $chunk_size ) {
return array();
}

$posts = $this->expand_posts( $post_ids );
$posts_metadata = $this->get_metadata( $post_ids, 'post', Settings::get_setting( 'post_meta_whitelist' ) );
$posts = $this->expand_posts( $post_ids );

// If no posts were fetched, make sure to return the expected structure so that status is updated correctly.
if ( empty( $posts ) ) {
return array(
'object_ids' => $post_ids,
'objects' => array(),
'meta' => array(),
);
}
// Get the post IDs from the posts that were fetched.
$fetched_post_ids = wp_list_pluck( $posts, 'ID' );
$metadata = $this->get_metadata( $fetched_post_ids, 'post', Settings::get_setting( 'post_meta_whitelist' ) );

// Filter the posts and metadata based on the maximum size constraints.
list( $filtered_post_ids, $filtered_posts, $filtered_posts_metadata ) = $this->filter_objects_and_metadata_by_size(
'post',
$posts,
$metadata,
self::MAX_POST_META_LENGTH,
self::MAX_SIZE_FULL_SYNC
);

// Filter posts and metadata based on maximum size constraints.
list( $filtered_post_ids, $filtered_posts, $filtered_posts_metadata ) = $this->filter_posts_and_metadata_max_size( $posts, $posts_metadata );
return array(
'object_ids' => $filtered_post_ids,
'objects' => $filtered_posts,
Expand All @@ -901,57 +919,6 @@ private function expand_posts( $post_ids ) {
return $posts;
}

/**
* Filters posts and metadata based on maximum size constraints.
* It always allows the first post with its metadata even if they exceed the limit, otherwise they will never be synced.
*
* @access public
*
* @param array $posts The array of posts to filter.
* @param array $metadata The array of metadata to filter.
* @return array An array containing the filtered post IDs, filtered posts, and filtered metadata.
*/
public function filter_posts_and_metadata_max_size( $posts, $metadata ) {
$filtered_posts = array();
$filtered_metadata = array();
$filtered_post_ids = array();
$current_size = 0;
foreach ( $posts as $post ) {
$post_content_size = isset( $post->post_content ) ? strlen( $post->post_content ) : 0;
$current_metadata = array();
$metadata_size = 0;
foreach ( $metadata as $key => $metadata_item ) {
if ( (int) $metadata_item->post_id === $post->ID ) {
// Trimming metadata if it exceeds limit. Similar to trim_post_meta.
$metadata_item_size = strlen( maybe_serialize( $metadata_item->meta_value ) );
if ( $metadata_item_size >= self::MAX_POST_META_LENGTH ) {
$metadata_item->meta_value = '';
}
$current_metadata[] = $metadata_item;
$metadata_size += $metadata_item_size >= self::MAX_POST_META_LENGTH ? 0 : $metadata_item_size;
if ( ! empty( $filtered_post_ids ) && ( $current_size + $post_content_size + $metadata_size ) > ( self::MAX_SIZE_FULL_SYNC ) ) {
break 2; // Break both foreach loops.
}
unset( $metadata[ $key ] );
}
}
// Always allow the first post with its metadata.
if ( empty( $filtered_post_ids ) || ( $current_size + $post_content_size + $metadata_size ) <= ( self::MAX_SIZE_FULL_SYNC ) ) {
$filtered_post_ids[] = strval( $post->ID );
$filtered_posts[] = $post;
$filtered_metadata = array_merge( $filtered_metadata, $current_metadata );
$current_size += $post_content_size + $metadata_size;
} else {
break;
}
}
return array(
$filtered_post_ids,
$filtered_posts,
$filtered_metadata,
);
}

/**
* Set the status of the full sync action based on the objects that were sent.
*
Expand Down
2 changes: 1 addition & 1 deletion jetpack_vendor/i18n-map.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
),
'jetpack-sync' => array(
'path' => 'jetpack_vendor/automattic/jetpack-sync',
'ver' => '4.5.1-alpha1738231751',
'ver' => '4.6.0-alpha1738231842',
),
),
);
Loading

0 comments on commit 937b6fe

Please sign in to comment.