From 241d4f0db7416f51ea1db3bc6b313b31cfb4e967 Mon Sep 17 00:00:00 2001 From: VAN BOSSUYT Nicolas Date: Tue, 19 Mar 2024 13:33:59 +0100 Subject: [PATCH] web-html: More spec comment. --- src/web/web-html/builder.cpp | 37 +++++++++++++++++++++++++++++------- src/web/web-html/builder.h | 33 ++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/src/web/web-html/builder.cpp b/src/web/web-html/builder.cpp index 77f6b814631..2c16f491d54 100644 --- a/src/web/web-html/builder.cpp +++ b/src/web/web-html/builder.cpp @@ -5,18 +5,18 @@ namespace Web::Html { -void Builder::_switchTo(Mode mode) { - _mode = mode; -} +// 13.2.6 Tree construction +// https://html.spec.whatwg.org/multipage/parsing.html#tree-construction + +// 13.2.2 Parse errors +// https://html.spec.whatwg.org/multipage/parsing.html#parse-errors void Builder::_raise(Str msg) { logError("{}: {}", toStr(_mode), msg); } -// https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode -Dom::QuirkMode Builder::_whichQuirkMode(Token const &) { - // NOSPEC: We assume no quirk mode - return Dom::QuirkMode::NO; +void Builder::_switchTo(Mode mode) { + _mode = mode; } // https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token @@ -26,7 +26,16 @@ Strong Builder::_createElementFor(Token const &t) { return el; } +// 13.2.6.4 The rules for parsing tokens in HTML content + +// 13.2.6.4.1 The "initial" insertion mode // https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode + +Dom::QuirkMode Builder::_whichQuirkMode(Token const &) { + // NOSPEC: We assume no quirk mode + return Dom::QuirkMode::NO; +} + void Builder::_handleInitialMode(Token const &t) { if (t.type == Token::CHARACTER and (t.rune == '\t' or @@ -87,7 +96,21 @@ void Builder::_handleBeforeHead(Token const &t) { t.rune == '\n' or t.rune == '\f' or t.rune == ' ')) { + // Ignore the token. + } else if (t.type == Token::COMMENT) { + // Insert a comment. + } else if (t.type == Token::DOCTYPE) { + // Parse error. Ignore the token. + _raise(); + } else if (t.type == Token::START_TAG and t.name == "html") { + // Process the token using the rules for the "in body" insertion mode. + _acceptIn(Mode::IN_BODY, t); + } else if (t.type == Token::START_TAG and t.name == "head") { + + } else if (t.type == Token::END_TAG and not(t.name == "head" or t.name == "body" or t.name == "html" or t.name == "br")) { // ignore + _raise(); + } else { } } diff --git a/src/web/web-html/builder.h b/src/web/web-html/builder.h index bee23c5ac33..fa8e2374488 100644 --- a/src/web/web-html/builder.h +++ b/src/web/web-html/builder.h @@ -34,6 +34,9 @@ namespace Web::Html { MODE(AFTER_AFTER_FRAMESET) struct Builder { + // 13.2.6 Tree construction + // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction + enum struct Mode { #define ITER(NAME) NAME, FOREACH_INSERTION_MODE(ITER) @@ -44,19 +47,45 @@ struct Builder { Lexer _lexer; Strong _document; Vec> _openElements; + Opt> _headElement; + Opt> _formElement; Builder(Strong document) : _document(document) { } - void _switchTo(Mode mode); + // 13.2.2 Parse errors + // https://html.spec.whatwg.org/multipage/parsing.html#parse-errors void _raise(Str msg = "parse-error"); - Dom::QuirkMode _whichQuirkMode(Token const &); + // 13.2.6.1 Creating and inserting nodes + // https://html.spec.whatwg.org/multipage/parsing.html#creating-and-inserting-nodes + + void _apropriatePlaceForInsertingANode(); Strong _createElementFor(Token const &t); + void _insertAnElementAtTheAdjustedInsertionLocation(); + + void _insertAForeignElement(Token const &t); + + void _insertAnHtmlElement(); + + void _insertACharacter(); + + void _insertAComment(); + + ////////////////////////////////////////// + + void _switchTo(Mode mode); + + // 13.2.6.4 The rules for parsing tokens in HTML content + + // 13.2.6.4.1 The "initial" insertion mode + // https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode + Dom::QuirkMode _whichQuirkMode(Token const &); + void _handleInitialMode(Token const &t); void _handleBeforeHtml(Token const &t);