diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 4dd7d0e492d9e..f652900e2455d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -417,6 +417,84 @@ public function next_tag( $query = null ) { return false; } + /** + * Returns the raw HTMl content inside a matched tag. + * + * @since 6.4.0 + * + * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag. + * + * @return string|null The inner HTML if available, else NULL. + */ + public function get_inner_markup() { + if ( null === $this->get_tag() ) { + return null; + } + + $start = $this->current_token; + parent::set_bookmark( 'start' ); + // @TODO: add after-pop hook to turn this into a constant boolean check. + do { + $found_tag = $this->step(); + } while ( $found_tag && $this->state->stack_of_open_elements->contains_node( $start ) ); + + /* + * If there's no tag to bookmark then it means the opened tag has no closing + * and the rest of the document is contained within the inner HTML. + */ + if ( ! $found_tag ) { + $inner_html = $this->substr_bookmark( 'after', 'start' ); + parent::release_bookmark( 'start' ); + } else { + parent::set_bookmark( 'end' ); + $inner_html = $this->substr_bookmarks( 'after', 'start', 'before', 'end' ); + parent::release_bookmark( 'start' ); + parent::release_bookmark( 'end' ); + } + + return $inner_html; + } + + /** + * Returns the raw HTMl content inside a matched tag. + * + * @since 6.4.0 + * + * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag. + * + * @return string|null The inner HTML if available, else NULL. + */ + public function get_outer_markup() { + if ( null === $this->get_tag() ) { + return null; + } + + $start = $this->current_token; + parent::set_bookmark( 'start' ); + // @TODO: add after-pop hook to turn this into a constant boolean check. + do { + $found_tag = $this->step(); + } while ( $found_tag && $this->state->stack_of_open_elements->contains_node( $start ) ); + + /* + * If there's no tag to bookmark then it means the opened tag has no closing + * and the rest of the document is contained within the inner HTML. + */ + if ( ! $found_tag ) { + $inner_html = $this->substr_bookmark( 'before', 'start' ); + } else { + parent::set_bookmark( 'end' ); + $did_close = $this->get_tag() === $start->node_name && $this->is_tag_closer(); + $end_position = $did_close ? 'after' : 'before'; + $inner_html = $this->substr_bookmarks( 'before', 'start', $end_position, 'end' ); + } + + parent::release_bookmark( 'start' ); + parent::release_bookmark( 'end' ); + + return $inner_html; + } + /** * Steps through the HTML document and stop at the next tag, if any. * @@ -437,12 +515,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) { $this->state->stack_of_open_elements->pop(); } - parent::next_tag( self::VISIT_EVERYTHING ); - } - - // Finish stepping when there are no more tokens in the document. - if ( null === $this->get_tag() ) { - return false; + if ( ! parent::next_tag( self::VISIT_EVERYTHING ) ) { + return false; + } } $this->current_token = new WP_HTML_Token( @@ -722,6 +797,42 @@ private function bookmark_tag() { return "{$this->bookmark_counter}"; } + /** + * Returns a substring of the input HTML document from a bookmark until the end. + * + * @since 6.4.0 + * + * @param string $start_position "before" to clip before bookmark, "after" to clip after. + * @param string $start Bookmark name at which to start clipping. + * @return string Clipped substring of input HTMl document. + */ + private function substr_bookmark( $start_position, $start ) { + $start_bookmark = $this->bookmarks[ $start ]; + $start_offset = 'before' === $start_position ? $start_bookmark->start : $start_bookmark->end + 1; + + return substr( $this->html, $start_offset ); + } + + /** + * Returns a substring of the input HTML document delimited by bookmarks. + * + * @since 6.4.0 + * + * @param string $start_position "before" to clip before bookmark, "after" to clip after. + * @param string $start Bookmark name at which to start clipping. + * @param string $end_position "before" to clip before bookmark, "after" to clip after. + * @param string $end Bookmark name at which to end clipping. + * @return string Clipped substring of input HTMl document. + */ + private function substr_bookmarks( $start_position, $start, $end_position, $end ) { + $start_bookmark = $this->bookmarks[ $start ]; + $end_bookmark = $this->bookmarks[ $end ]; + $start_offset = 'before' === $start_position ? $start_bookmark->start : $start_bookmark->end + 1; + $end_offset = 'before' === $end_position ? $end_bookmark->start : $end_bookmark->end + 1; + + return substr( $this->html, $start_offset, $end_offset - $start_offset ); + } + /* * HTML semantic overrides for Tag Processor */ diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorGetInnerMarkup.php b/tests/phpunit/tests/html-api/wpHtmlProcessorGetInnerMarkup.php new file mode 100644 index 0000000000000..d16aac6f9cc91 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorGetInnerMarkup.php @@ -0,0 +1,93 @@ +
Inside the P
This is not in the match. +
This is another paragraph not in the match. + +
This is also note in the match.
+Inside the P
Inside the P' ), + ); + + $inner_html = <<This is inside the Match
+ +This is not in the match. +
This is another paragraph not in the match. +
This is also note in the match.
+