vapor-community · mattesmohr · Mar 2, 2023 · Mar 4, 2023 · Mar 4, 2023 · Mar 4, 2023
diff --git a/Sources/EagerBeaver/Construction/Parser.swift b/Sources/EagerBeaver/Construction/Parser.swift
@@ -4,14 +4,19 @@ internal class Parser {
     /// A enumeration of possible errors
     internal enum ParserError: Error {
 
+        case missingBodyTag
         case missingHeadTag
         case missingHtmlTag
         case missingDoctypeTag
         case invalidToken
+        case invalidTag
 
         internal var description: String {
 
             switch self {
+            case .missingBodyTag:
+                return "Missing body tag."
+
             case .missingHeadTag:
                 return "Missing head tag."
 
@@ -24,6 +29,8 @@ internal class Parser {
             case .invalidToken:
                 return "Invalid token."
 
+            case .invalidTag:
+                return "Invalid tag."
             }
         }
     }
@@ -84,7 +91,7 @@ internal class Parser {
         }
     }
 
-    /// Inserts the node into the nodes collection
+    /// Inserts the node into the tree
     private func insert(node: HtmlNode) {
 
         self.log(#function)
@@ -173,7 +180,7 @@ internal class Parser {
                     self.nodes.append(ElementNode(token: tag))
 
                 case .endtag:
-                    fatalError()
+                    throw ParserError.invalidTag
                 }
 
             } else {
@@ -200,7 +207,7 @@ internal class Parser {
                     self.nodes.append(ElementNode(token: tag))
 
                 case .endtag:
-                    fatalError()
+                    throw ParserError.invalidTag
                 }
 
             } else {
@@ -240,8 +247,13 @@ internal class Parser {
 
             switch tag.kind {
             case .starttag:
+
                 self.nodes.append(ElementNode(token: tag))
 
+                if tag.name == "meta" || tag.name == "base" || tag.name == "link" {
+                    self.pop()
+                }
+
             case .endtag:
 
                 self.pop()
@@ -300,15 +312,21 @@ internal class Parser {
 
         if let tag = token as? TagToken {
 
-            switch tag.kind {
-            case .starttag:
-                self.nodes.append(ElementNode(token: tag))
+            if tag.name == "body" {
 
-            case .endtag:
-                self.pop()
+                switch tag.kind {
+                case .starttag:
+                    self.nodes.append(ElementNode(token: tag))
+
+                case .endtag:
+                    throw ParserError.invalidTag
+                }
+
+            } else {
+               throw ParserError.missingBodyTag
             }
 
-            return .afterhead
+            return .inbody
         }
 
         if let attribute = token as? AttributeToken {
@@ -328,7 +346,57 @@ internal class Parser {
 
         self.log(#function)
 
-        return .inbody
+        if let comment = token as? CommentToken {
+
+            if let last = self.nodes.last {
+                last.add(child: CommentNode(token: comment))
+            }
+
+            return .inbody
+        }
+
+        if let text = token as? TextToken {
+
+            if let last = self.nodes.last {
+                last.add(child: TextNode(token: text))
+            }
+
+            return .inbody
+        }
+
+        if let tag = token as? TagToken {
+
+            switch tag.kind {
+            case .starttag:
+
+                self.nodes.append(ElementNode(token: tag))
+
+                if tag.name == "input" || tag.name == "img" || tag.name == "area" || tag.name == "embed" || tag.name == "hr" || tag.name == "wbr" || tag.name == "br"  {
+                    self.pop()
+                }
+
+            case .endtag:
+
+                self.pop()
+
+                if tag.name == "body" {
+                    return .afterbody
+                }
+            }
+
+            return .inbody
+        }
+
+        if let attribute = token as? AttributeToken {
+
+            if let last = self.nodes.last {
+                last.add(attribute: AttributeNode(token: attribute))
+            }
+
+            return .inbody
+        }
+
+        throw ParserError.invalidToken
     }
 
     /// Processes the token
@@ -344,6 +412,34 @@ internal class Parser {
 
         self.log(#function)
 
-        return .afterbody
+        if let comment = token as? CommentToken {
+
+            if let last = self.nodes.last {
+                last.add(child: CommentNode(token: comment))
+            }
+
+            return .afterbody
+        }
+
+        if let tag = token as? TagToken {
+
+            if tag.name == "html" {
+
+                switch tag.kind {
+                case .starttag:
+                    throw ParserError.invalidTag
+
+                case .endtag:
+                    self.pop()
+                }
+
+            } else {
+                throw ParserError.missingHtmlTag
+            }
+
+            return .afterbody
+        }
+
+        throw ParserError.invalidToken
     }
 }
diff --git a/Sources/EagerBeaver/HtmlDefinition.swift b/Sources/EagerBeaver/HtmlDefinition.swift
@@ -19,6 +19,11 @@ public class HtmlDefinition {
     }
 
     internal func render() -> String {
-        return "<!DOCTYPE HTML PUBLIC \"\(publicId ?? "")\" \"\(systemId ?? "")\">"
+
+        if let publicId = self.publicId, let systemId = self.systemId {
+            return "<!DOCTYPE HTML PUBLIC \"\(publicId)\" \"\(systemId)\">"
+        }
+
+        return "<!DOCTYPE html>"
     }
 }
diff --git a/Sources/EagerBeaver/Tokenization/Tokenizer.swift b/Sources/EagerBeaver/Tokenization/Tokenizer.swift
@@ -271,6 +271,21 @@ internal class Tokenizer {
             return .starttag
         }
 
+        if character.isLetter {
+
+            if let token = self.token as? TextToken {
+
+                token.data.append(character)
+
+                self.token = token
+
+            } else {
+                self.token = TextToken(data: String(character))
+            }
+
+            return .text
+        }
+
         return .data
     }
 
@@ -321,7 +336,7 @@ internal class Tokenizer {
 
             try self.emit()
 
-            return .text
+            return .data
         }
 
         if character.isLetter || character.isNumber {
@@ -393,7 +408,14 @@ internal class Tokenizer {
 
         self.log(#function, character)
 
-        if character.isLetter {
+        if character.isGreaterThanSign {
+
+            try self.emit()
+
+            return .data
+        }
+
+        if character.isLetter || character.isHyphenMinus {
 
             if let token = self.token as? AttributeToken {
 
@@ -429,7 +451,14 @@ internal class Tokenizer {
 
         self.log(#function, character)
 
-        if character.isLetter {
+        if character.isApostrophe || character.isQuotationMark {
+
+            try self.emit()
+
+            return .afterattributevalue
+        }
+
+        if character.isASCII {
 
             if let token = self.token as? AttributeToken {
 
@@ -441,13 +470,6 @@ internal class Tokenizer {
             return .attributevalue
         }
 
-        if character.isApostrophe || character.isQuotationMark {
-
-            try self.emit()
-
-            return .afterattributevalue
-        }
-
         throw TokenizerError.invalidCharacter(character)
     }
 
@@ -456,6 +478,10 @@ internal class Tokenizer {
 
         self.log(#function, character)
 
+        if character.isWhitespace {
+            return .beforeattributename
+        }
+
         if character.isSolidus {
             return .selfclosing
         }

diff --git a/Tests/EagerBeaverTests/TokenizerTests.swift b/Tests/EagerBeaverTests/TokenizerTests.swift
@@ -20,6 +20,9 @@ final class TokenizerTests: XCTestCase {
 
         // ...when the tag name is missing
         XCTAssertThrowsError(try Tokenizer(log: .information).consume("<>"))
+
+        // ...when the tag name contains a number
+        XCTAssertNoThrow(try Tokenizer(log: .information).consume("<h1>"))
     }
 
     // Tests consuming a end tag
@@ -30,6 +33,9 @@ final class TokenizerTests: XCTestCase {
 
         // ...when the tag name is missing
         XCTAssertThrowsError(try Tokenizer(log: .information).consume("</>"))
+
+        // ...when the tag name contains a number
+        XCTAssertNoThrow(try Tokenizer(log: .information).consume("</h1>"))
     }
 
     // Tests consuming a doctype
@@ -108,13 +114,25 @@ final class TokenizerTests: XCTestCase {
 
         // ...with double quotation mark and no value
         XCTAssertNoThrow(try Tokenizer(log: .information).consume("<html name=\"\">"))
+
+        // ...with a value, containing a number
+        XCTAssertNoThrow(try Tokenizer(log: .information).consume("<html name=\"8\">"))
+
+        // ...with a value, containing an hyphen
+        XCTAssertNoThrow(try Tokenizer(log: .information).consume("<html name=\"-\">"))
+
+        // ...with a name, containing an hyphen
+        XCTAssertNoThrow(try Tokenizer(log: .information).consume("<html name-name=\"\">"))
+
+        // ...with a single name
+        XCTAssertNoThrow(try Tokenizer(log: .information).consume("<html name>"))
     }
 
     // Tests consuming a whole element
     func testElement() throws {
 
         // ...with content
-        XCTAssertNoThrow(try Tokenizer(log: .information).consume("<title>content</tile>"))
+        XCTAssertNoThrow(try Tokenizer(log: .information).consume("<title>content</title>"))
 
         // ...with content seperated by a whitespace
         XCTAssertNoThrow(try Tokenizer(log: .information).consume("<title>content content</title>"))