Skip to content

Commit

Permalink
Add retry limit to the connection pilot (#5098)
Browse files Browse the repository at this point in the history
* Add retry limit to the connection pilot

* Track “failed attempts” rather than failed retries
  • Loading branch information
WPprodigy committed Dec 13, 2023
1 parent c638f85 commit cc7b432
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,136 +20,134 @@ protected static function getMethod( $name ) {
/**
* @group jetpack-required
* @preserveGlobalState disabled
* @dataProvider get_test_data__update_backoff_factor
* @dataProvider get_test_data__update_heartbeat_on_failure
*/
public function test__update_backoff_factor( ?int $backoff_factor, int $expected ) {
$option = array(
'site_url' => get_site_url(),
'hashed_site_url' => md5( get_site_url() ),
'cache_site_id' => (int) \Jetpack_Options::get_option( 'id', - 1 ),
'timestamp' => time(),
'backoff_factor' => $backoff_factor,
);
update_option( 'vip_jetpack_connection_pilot_heartbeat', $option, false );

$cp = Connection_Pilot::instance();
$update_backoff_factor = self::getMethod( 'update_backoff_factor' );
$update_backoff_factor->invoke( $cp );

$option = get_option( 'vip_jetpack_connection_pilot_heartbeat' );
$this->assertEquals( $expected, $option['backoff_factor'] );
public function test__update_heartbeat_on_failure( ?int $backoff_factor, int $expected_backoff, int $failed_attempts ) {
$last_failure = time() - 1000;

$this->set_heartbeat( [
'site_url' => 'example.com',
'backoff_factor' => $backoff_factor,
'failed_attempts' => $failed_attempts,
'failure_timestamp' => $last_failure,
] );

$connection_pilot = Connection_Pilot::instance();
$update_backoff_factor = self::getMethod( 'update_heartbeat_on_failure' );
$update_backoff_factor->invoke( $connection_pilot );

$option = $this->get_heartbeat();
$this->assertEquals( $expected_backoff, $option['backoff_factor'] );
$this->assertEquals( $failed_attempts + 1, $option['failed_attempts'] );
$this->assertTrue( $option['failure_timestamp'] > $last_failure );

// Doesn't change.
$this->assertEquals( 'example.com', $option['site_url'] );
}

/**
* @group jetpack-required
* @preserveGlobalState disabled
* @dataProvider get_test_data__update_heartbeat
*/
public function test__update_heartbeat( ?int $backoff_factor, array $expected ) {
$cp = Connection_Pilot::instance();
$update_heartbeat = self::getMethod( 'update_heartbeat' );

if ( $backoff_factor ) {
$update_heartbeat->invokeArgs( $cp, [ $backoff_factor ] );
} else {
$update_heartbeat->invoke( $cp );
}
public function test__update_heartbeat_on_success() {
$this->set_heartbeat( [
'site_url' => 'example.com',
'cache_site_id' => 22,
'backoff_factor' => 24,
'failed_attempts' => 2,
'failure_timestamp' => time() - 1000,
] );

$option = get_option( 'vip_jetpack_connection_pilot_heartbeat' );
$this->assertEquals( $expected['site_url'], $option['site_url'] );
$this->assertEquals( $expected['hashed_site_url'], $option['hashed_site_url'] );
$this->assertEquals( $expected['cache_site_id'], $option['cache_site_id'] );
$this->assertEquals( $expected['backoff_factor'], $option['backoff_factor'] );
$connection_pilot = Connection_Pilot::instance();
$update_heartbeat = self::getMethod( 'update_heartbeat_on_success' );
$update_heartbeat->invoke( $connection_pilot );

$option = $this->get_heartbeat();
$this->assertEquals( get_site_url(), $option['site_url'] );
$this->assertEquals( md5( get_site_url() ), $option['hashed_site_url'] );
$this->assertEquals( \Jetpack_Options::get_option( 'id', - 1 ), $option['cache_site_id'] );

// Resets.
$this->assertEquals( 0, $option['backoff_factor'] );
$this->assertEquals( 0, $option['failed_attempts'] );
$this->assertEquals( 0, $option['failure_timestamp'] );
}

/**
* @group jetpack-required
* @preserveGlobalState disabled
* @dataProvider get_test_data__should_back_off
*/
public function test__should_back_off( ?int $backoff_factor, DateTime $dt, bool $expected ) {
public function test__should_back_off( ?int $backoff_factor, ?DateTime $failure_time, ?DateTime $legacy_time, int $failed_attempts, bool $expected ) {
if ( null !== $backoff_factor ) {
$option = array(
'site_url' => get_site_url(),
'hashed_site_url' => md5( get_site_url() ),
'cache_site_id' => (int) \Jetpack_Options::get_option( 'id', - 1 ),
'timestamp' => $dt->getTimestamp(),
'backoff_factor' => $backoff_factor,
);
update_option( 'vip_jetpack_connection_pilot_heartbeat', $option, false );
$this->set_heartbeat( [
'backoff_factor' => $backoff_factor,
'failed_attempts' => $failed_attempts,
'failure_timestamp' => null === $failure_time ? $legacy_time->getTimestamp() : $failure_time->getTimestamp(),
] );
}

$cp = Connection_Pilot::instance();
$should_back_off = self::getMethod( 'should_back_off' );

$result = $should_back_off->invoke( $cp );
$connection_pilot = Connection_Pilot::instance();
$should_back_off = self::getMethod( 'should_back_off' );

$this->assertEquals( $expected, $result );
$this->assertEquals( $expected, $should_back_off->invoke( $connection_pilot ) );
}

public function get_test_data__update_backoff_factor() {
public function get_test_data__update_heartbeat_on_failure() {
$connection_pilot = Connection_Pilot::instance();
$increments = $connection_pilot::BACKOFF_INCREMENTS;
$max_increment = $connection_pilot::MAX_BACKOFF_FACTOR;

$test_data = [];
foreach ( $increments as $key => $increment ) {
if ( isset( $increments[ $key + 1 ] ) ) {
$test_data[ 'increment_' . $key ] = [ $increment, $increments[ $key + 1 ] ];
$test_data[ 'increment_' . $key ] = [ $increment, $increments[ $key + 1 ], $key ];
}
}

$test_data['null'] = [ null, $increments[0] ];
$test_data['zero'] = [ 0, $increments[0] ];
$test_data['max'] = [ $max_increment, $max_increment ];
$test_data['over_max'] = [ $max_increment + 1000, $max_increment ];
$test_data['null'] = [ null, $increments[0], 1 ];
$test_data['zero'] = [ 0, $increments[0], 2 ];
$test_data['max'] = [ $max_increment, $max_increment, 3 ];
$test_data['over_max'] = [ $max_increment + 1000, $max_increment, 4 ];

return $test_data;
}

public function get_test_data__update_heartbeat() {
public function get_test_data__should_back_off() {
$connection_pilot = Connection_Pilot::instance();
$max_retries = $connection_pilot::MAX_RETRIES;

// [ current backoff factor, last failure's timestamp, legacy timestamp, failed attempts, expected result ]
return [
'null' => [
null,
array(
'site_url' => get_site_url(),
'hashed_site_url' => md5( get_site_url() ),
'cache_site_id' => (int) \Jetpack_Options::get_option( 'id', - 1 ),
'timestamp' => time(),
'backoff_factor' => 0,
),
],
'zero' => [
0,
array(
'site_url' => get_site_url(),
'hashed_site_url' => md5( get_site_url() ),
'cache_site_id' => (int) \Jetpack_Options::get_option( 'id', - 1 ),
'timestamp' => time(),
'backoff_factor' => 0,
),
],
'one' => [
1,
array(
'site_url' => get_site_url(),
'hashed_site_url' => md5( get_site_url() ),
'cache_site_id' => (int) \Jetpack_Options::get_option( 'id', - 1 ),
'timestamp' => time(),
'backoff_factor' => 1,
),
],
'null' => [ null, new DateTime(), null, 0, false ],
'zero' => [ 0, new DateTime(), null, 0, false ],
'one-hour-true' => [ 1, new DateTime(), null, 0, true ],
'one-hour-false' => [ 1, ( new DateTime() )->modify( '-2 hours' ), null, 0, false ],
'eight-hours-true' => [ 8, new DateTime(), null, 0, true ],
'eight-hours-false' => [ 8, ( new DateTime() )->modify( '-9 hours' ), null, 0, false ],
'eight-hours-legacy-true' => [ 8, null, new DateTime(), 0, true ],
'eight-hours-legacy-false' => [ 8, null, ( new DateTime() )->modify( '-9 hours' ), 0, false ],
'exceeds-retry-limit-true' => [ 8, ( new DateTime() )->modify( '-9 hours' ), null, $max_retries + 1, true ],
];
}

public function get_test_data__should_back_off() {
return [
'null' => [ null, new DateTime(), false ],
'zero' => [ 0, new DateTime(), false ],
'one-hour-true' => [ 1, new DateTime(), true ],
'one-hour-false' => [ 1, ( new DateTime() )->modify( '-2 hours' ), false ],
'eight-hours-true' => [ 8, new DateTime(), true ],
'eight-hours-false' => [ 8, ( new DateTime() )->modify( '-9 hours' ), false ],
];
private function get_heartbeat() {
return get_option( 'vip_jetpack_connection_pilot_heartbeat', [] );
}

private function set_heartbeat( $values = [] ) {
$saved = update_option( 'vip_jetpack_connection_pilot_heartbeat', [
'site_url' => $values['site_url'] ?? get_site_url(),
'hashed_site_url' => isset( $values['site_url'] ) ? md5( $values['site_url'] ) : md5( get_site_url() ),
'cache_site_id' => $values['cache_site_id'] ?? (int) \Jetpack_Options::get_option( 'id', - 1 ),
'success_timestamp' => $values['success_timestamp'] ?? time(),
'backoff_factor' => $values['backoff_factor'] ?? 0,
'failed_attempts' => $values['failed_attempts'] ?? 0,
'failure_timestamp' => $values['failure_timestamp'] ?? 0,
], false );

if ( ! $saved ) {
throw new Error( 'Failed to set heartbeat' );
}
}
}
87 changes: 56 additions & 31 deletions vip-jetpack/connection-pilot/class-jetpack-connection-pilot.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ class Connection_Pilot {
*/
const BACKOFF_INCREMENTS = [ 1, 12, 24, 48, 96, self::MAX_BACKOFF_FACTOR ];

const MAX_RETRIES = 1;

/**
* The healtcheck option's current data.
*
* Example: [ 'site_url' => 'https://example.go-vip.co', 'hashed_site_url' => '371a92eb7d5d63007db216dbd3b49187', 'cache_site_id' => 1234, 'timestamp' => 1555124370 ]
*
* @var mixed False if doesn't exist, else an array with the data shown above.
* @var array Connection-related data.
*/
private $last_heartbeat;

Expand Down Expand Up @@ -120,7 +120,7 @@ public function run_connection_pilot() {

if ( true === $is_connected ) {
// Everything checks out. Update the heartbeat option and move on.
$this->update_heartbeat();
$this->update_heartbeat_on_success();

// Attempting Akismet connection given that Jetpack is connected.
$skip_akismet = defined( 'VIP_AKISMET_SKIP_LOAD' ) && VIP_AKISMET_SKIP_LOAD;
Expand Down Expand Up @@ -176,7 +176,7 @@ public function reconnect( $prev_connection_error = null ) {

// Reconnection failed.
$this->send_alert( 'Jetpack (re)connection attempt failed.' . $prev_connection_error_message, $connection_attempt );
$this->update_backoff_factor();
$this->update_heartbeat_on_failure();
}

/**
Expand All @@ -185,29 +185,40 @@ public function reconnect( $prev_connection_error = null ) {
* @return bool True if CP should back off, false otherwise.
*/
private function should_back_off(): bool {
if ( ! empty( $this->last_heartbeat['backoff_factor'] ) && ! empty( $this->last_heartbeat['timestamp'] ) ) {
$backoff_factor = min( $this->last_heartbeat['backoff_factor'], self::MAX_BACKOFF_FACTOR );
if ( empty( $this->last_heartbeat['backoff_factor'] ) ) {
return false;
}

$backoff_factor = min( $this->last_heartbeat['backoff_factor'], self::MAX_BACKOFF_FACTOR );
if ( $backoff_factor <= 0 ) {
return false;
}

if ( $backoff_factor > 0 ) {
$seconds_elapsed = time() - $this->last_heartbeat['timestamp'];
$hours_elapsed = $seconds_elapsed / HOUR_IN_SECONDS;
$failed_attempts = $this->last_heartbeat['failed_attempts'] ?? 0;
if ( $failed_attempts >= self::MAX_RETRIES + 1 ) {
return true;
}

if ( $backoff_factor > $hours_elapsed ) {
// We're still in the backoff period.
return true;
}
}
$last_failure = 0;
if ( isset( $this->last_heartbeat['failure_timestamp'] ) ) {
$last_failure = $this->last_heartbeat['failure_timestamp'];
} elseif ( ! empty( $this->last_heartbeat['timestamp'] ) ) {
// Backwards compat.
$last_failure = $this->last_heartbeat['timestamp'];
}

return false;
$hours_elapsed = ( time() - $last_failure ) / HOUR_IN_SECONDS;

// We'll backoff (not attempt reconnection) until the hours elapsed exceeds the backoff factor.
return $backoff_factor > $hours_elapsed;
}

/**
* Updates the backoff factor after a connection attempt has failed
*
* @return void
*/
private function update_backoff_factor(): void {
private function update_heartbeat_on_failure(): void {
$current_backoff_factor = isset( $this->last_heartbeat['backoff_factor'] ) ? (int) $this->last_heartbeat['backoff_factor'] : 0;

$new_backoff_factor = self::MAX_BACKOFF_FACTOR;
Expand All @@ -219,22 +230,32 @@ private function update_backoff_factor(): void {
}
}

$this->update_heartbeat( min( $new_backoff_factor, self::MAX_BACKOFF_FACTOR ) );
$new_heartbeat = $this->last_heartbeat;

// Just want to update some values, not overwrite them all.
$new_heartbeat['backoff_factor'] = $new_backoff_factor;
$new_heartbeat['failed_attempts'] = isset( $new_heartbeat['failed_attempts'] ) ? $new_heartbeat['failed_attempts'] + 1 : 1;
$new_heartbeat['failure_timestamp'] = time();

$update = update_option( self::HEARTBEAT_OPTION_NAME, $new_heartbeat, false );
if ( $update ) {
$this->last_heartbeat = $new_heartbeat;
}
}

/**
* @param int $backoff_factor
*
* @return void
*/
private function update_heartbeat( int $backoff_factor = 0 ): void {
private function update_heartbeat_on_success(): void {
$option = array(
'site_url' => get_site_url(),
'hashed_site_url' => md5( get_site_url() ), // used to protect against S&Rs/imports/syncs
'cache_site_id' => (int) \Jetpack_Options::get_option( 'id', -1 ), // if no id can be retrieved, we'll fall back to -1
'timestamp' => time(),
'backoff_factor' => $backoff_factor,
'site_url' => get_site_url(),
'hashed_site_url' => md5( get_site_url() ), // used to protect against S&Rs/imports/syncs
'cache_site_id' => (int) \Jetpack_Options::get_option( 'id', -1 ),
'success_timestamp' => time(),
);

// Reset these, as we're now successfully connected.
$option['backoff_factor'] = 0;
$option['failed_attempts'] = 0;
$option['failure_timestamp'] = 0;

$update = update_option( self::HEARTBEAT_OPTION_NAME, $option, false );
if ( $update ) {
$this->last_heartbeat = $option;
Expand Down Expand Up @@ -308,17 +329,21 @@ protected function send_alert( $message = '', $wp_error = null ) {
$message .= sprintf( ' Site: %s (ID %d).', get_site_url(), defined( 'VIP_GO_APP_ID' ) ? VIP_GO_APP_ID : 0 );

$last_heartbeat = $this->last_heartbeat;
if ( isset( $last_heartbeat['site_url'], $last_heartbeat['cache_site_id'], $last_heartbeat['timestamp'] ) && -1 != $last_heartbeat['cache_site_id'] ) {
if ( isset( $last_heartbeat['site_url'], $last_heartbeat['cache_site_id'], $last_heartbeat['success_timestamp'] ) && -1 != $last_heartbeat['cache_site_id'] ) {
$message .= sprintf(
' The last known connection was on %s UTC to Cache Site ID %d (%s).',
gmdate( 'F j, H:i', $last_heartbeat['timestamp'] ), $last_heartbeat['cache_site_id'], $last_heartbeat['site_url']
gmdate( 'F j, H:i', $last_heartbeat['success_timestamp'] ), $last_heartbeat['cache_site_id'], $last_heartbeat['site_url']
);
}

if ( isset( $last_heartbeat['backoff_factor'] ) && $last_heartbeat['backoff_factor'] > 0 ) {
$message .= sprintf( ' Backoff Factor: %s hours.', $last_heartbeat['backoff_factor'] );
}

if ( isset( $last_heartbeat['failed_attempts'] ) && $last_heartbeat['failed_attempts'] > 0 ) {
$message .= sprintf( ' Failed Attempts: %s.', $last_heartbeat['failed_attempts'] );
}

if ( is_wp_error( $wp_error ) ) {
$message .= sprintf( ' Error: [%s] %s', $wp_error->get_error_code(), $wp_error->get_error_message() );
}
Expand Down

0 comments on commit cc7b432

Please sign in to comment.