From 7703d8738636ba7cac62f207edb132f606e10f48 Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Thu, 18 Jul 2024 14:31:43 -0700
Subject: [PATCH 01/10] zig update: Update signature for BuildStep makeFn

---
 build/GenerateDef.zig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/build/GenerateDef.zig b/build/GenerateDef.zig
index 9c6a4d6c..508a629f 100644
--- a/build/GenerateDef.zig
+++ b/build/GenerateDef.zig
@@ -50,8 +50,8 @@ pub fn create(owner: *std.Build, options: Options) std.Build.Module.Import {
     };
 }
 
-fn make(step: *Step, prog_node: std.Progress.Node) !void {
-    _ = prog_node;
+fn make(step: *Step, options: std.Build.Step.MakeOptions) !void {
+    _ = options;
     const b = step.owner;
     const self: *GenerateDef = @fieldParentPtr("step", step);
     const arena = b.allocator;

From c22f743b0a778c9ebbfe1c179c7c2fec5ce1021a Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Tue, 14 May 2024 11:41:39 -0700
Subject: [PATCH 02/10] wip new preprocessor

---
 src/aro.zig                      |    1 +
 src/aro/Diagnostics/messages.def |   10 +
 src/aro/Driver.zig               |   45 +
 src/aro/NewPreprocessor.zig      | 2097 ++++++++++++++++++++++++++++++
 src/aro/Tokenizer.zig            |   17 +
 src/aro/Treap.zig                |  165 +++
 6 files changed, 2335 insertions(+)
 create mode 100644 src/aro/NewPreprocessor.zig
 create mode 100644 src/aro/Treap.zig

diff --git a/src/aro.zig b/src/aro.zig
index a9b76e0e..45e90154 100644
--- a/src/aro.zig
+++ b/src/aro.zig
@@ -35,5 +35,6 @@ test {
     _ = @import("aro/target.zig");
     _ = @import("aro/Tokenizer.zig");
     _ = @import("aro/toolchains/Linux.zig");
+    _ = @import("aro/treap.zig");
     _ = @import("aro/Value.zig");
 }
diff --git a/src/aro/Diagnostics/messages.def b/src/aro/Diagnostics/messages.def
index 119e72a2..7700e1b8 100644
--- a/src/aro/Diagnostics/messages.def
+++ b/src/aro/Diagnostics/messages.def
@@ -2509,3 +2509,13 @@ auto_type_self_initialized
     .msg = "variable '{s}' declared with deduced type '__auto_type' cannot appear in its own initializer"
     .extra = .str
     .kind = .@"error"
+
+expected_left_angle_bracket
+    .msg = "expected '<' but got '{s}'"
+    .extra = .str
+    .kind = .@"error"
+
+closing_paren_after
+    .msg = "expected '(' after '{s}'"
+    .extra = .str
+    .kind = .@"error"
diff --git a/src/aro/Driver.zig b/src/aro/Driver.zig
index db0b1155..2d383bf0 100644
--- a/src/aro/Driver.zig
+++ b/src/aro/Driver.zig
@@ -9,6 +9,7 @@ const Compilation = @import("Compilation.zig");
 const Diagnostics = @import("Diagnostics.zig");
 const LangOpts = @import("LangOpts.zig");
 const Preprocessor = @import("Preprocessor.zig");
+const NewPreprocessor = @import("NewPreprocessor.zig");
 const Source = @import("Source.zig");
 const Toolchain = @import("Toolchain.zig");
 const target_util = @import("target.zig");
@@ -36,6 +37,7 @@ line_commands: bool = true,
 /// If true, use `#line <num>` instead of `# <num>` for line directives
 use_line_directives: bool = false,
 only_preprocess: bool = false,
+new_preprocessor: bool = false,
 only_syntax: bool = false,
 only_compile: bool = false,
 only_preprocess_and_compile: bool = false,
@@ -236,6 +238,8 @@ pub fn parseArgs(
                 d.only_compile = true;
             } else if (mem.eql(u8, arg, "-E")) {
                 d.only_preprocess = true;
+            } else if (mem.eql(u8, arg, "-fnew-preprocessor")) {
+                d.new_preprocessor = true;
             } else if (mem.eql(u8, arg, "-P") or mem.eql(u8, arg, "--no-line-commands")) {
                 d.line_commands = false;
             } else if (mem.eql(u8, arg, "-fuse-line-directives")) {
@@ -630,6 +634,47 @@ fn processSource(
     comptime fast_exit: bool,
 ) !void {
     d.comp.generated_buf.items.len = 0;
+    if (d.new_preprocessor) {
+        var pp = try NewPreprocessor.initDefault(d.comp);
+        defer pp.deinit();
+        if (d.comp.langopts.ms_extensions) {
+            d.comp.ms_cwd_source_id = source.id;
+        }
+
+        if (d.verbose_pp) pp.verbose = true;
+        if (d.only_preprocess) {
+            pp.preserve_whitespace = true;
+            if (d.line_commands) {
+                pp.linemarkers = if (d.use_line_directives) .line_directives else .numeric_directives;
+            }
+        }
+
+        try pp.preprocessSources(&.{ source, builtin, user_macros });
+
+        d.renderErrors();
+
+        if (d.comp.diagnostics.errors != 0) {
+            if (fast_exit) std.process.exit(1); // Not linking, no need for cleanup.
+            return;
+        }
+
+        const file = if (d.output_name) |some|
+            std.fs.cwd().createFile(some, .{}) catch |er|
+                return d.fatal("unable to create output file '{s}': {s}", .{ some, errorDescription(er) })
+        else
+            std.io.getStdOut();
+        defer if (d.output_name != null) file.close();
+
+        var buf_w = std.io.bufferedWriter(file.writer());
+        pp.prettyPrintTokens(buf_w.writer()) catch |er|
+            return d.fatal("unable to write result: {s}", .{errorDescription(er)});
+
+        buf_w.flush() catch |er|
+            return d.fatal("unable to write result: {s}", .{errorDescription(er)});
+
+        std.process.exit(0); // Not linking, no need for cleanup.
+        return;
+    }
     var pp = try Preprocessor.initDefault(d.comp);
     defer pp.deinit();
 
diff --git a/src/aro/NewPreprocessor.zig b/src/aro/NewPreprocessor.zig
new file mode 100644
index 00000000..77442d26
--- /dev/null
+++ b/src/aro/NewPreprocessor.zig
@@ -0,0 +1,2097 @@
+const std = @import("std");
+const mem = std.mem;
+const Allocator = mem.Allocator;
+const assert = std.debug.assert;
+const Compilation = @import("Compilation.zig");
+const Error = Compilation.Error;
+const Source = @import("Source.zig");
+const Tokenizer = @import("Tokenizer.zig");
+const RawToken = Tokenizer.Token;
+const Parser = @import("Parser.zig");
+const Diagnostics = @import("Diagnostics.zig");
+const Tree = @import("Tree.zig");
+const Token = Tree.Token;
+const TokenWithExpansionLocs = Tree.TokenWithExpansionLocs;
+const Attribute = @import("Attribute.zig");
+const features = @import("features.zig");
+const OldPreprocessor = @import("Preprocessor.zig");
+const Treap = @import("treap.zig");
+
+const ParamMap = std.StringHashMapUnmanaged(PreprocessorToken);
+const DefineMap = std.StringHashMapUnmanaged(Macro);
+
+const TokenList = std.ArrayListUnmanaged(PreprocessorToken);
+const max_include_depth = 200;
+
+/// Errors that can be returned when expanding a macro.
+/// error.UnknownPragma can occur within Preprocessor.pragma() but
+/// it is handled there and doesn't escape that function
+const MacroError = Error || error{StopPreprocessing};
+
+const PreprocessingError = Error || error{PreprocessingFailed};
+
+const SpecialMacroFn = fn (*Preprocessor, PreprocessorToken) Error!void;
+
+fn Range(comptime T: type) type {
+    return struct {
+        const Self = @This();
+        const Item = T;
+
+        start: u32,
+        end: u32,
+        const empty: Self = .{ .start = 0, .end = 0 };
+
+        fn len(self: Self) u32 {
+            return self.end - self.start;
+        }
+
+        fn slice(self: Self, items: []const Item) []const Item {
+            return items[self.start..self.end];
+        }
+    };
+}
+
+/// Each macro argument is a list of tokens (represented as a range of Preprocessor.macro_arg_tokens)
+const MacroArg = Range(PreprocessorToken);
+
+/// List of MacroArg's for a macro invocation (represented as a range of Preprocessor.macro_args)
+const MacroArgList = Range(MacroArg);
+
+const PreprocessorToken = struct {
+    flags: packed struct(u8) {
+        is_bol: bool = false,
+        space: bool = false,
+        _: u6 = undefined,
+    } = .{},
+    id: Tokenizer.Token.Id,
+    hideset: Treap.Node = null,
+    loc: Source.Location,
+    expansion_locs: ?[*]Source.Location = null,
+
+    fn argPosition(self: PreprocessorToken) u32 {
+        std.debug.assert(self.id == .macro_param);
+        return self.loc.byte_offset;
+    }
+
+    fn isVarArg(self: PreprocessorToken) bool {
+        std.debug.assert(self.id == .macro_param);
+        return self.loc.line != 0;
+    }
+
+    pub fn expansionSlice(tok: PreprocessorToken) []const Source.Location {
+        const locs = tok.expansion_locs orelse return &[0]Source.Location{};
+        var i: usize = 0;
+        while (locs[i].id != .unused) : (i += 1) {}
+        return locs[0..i];
+    }
+
+    pub fn addExpansionLocation(tok: *PreprocessorToken, gpa: std.mem.Allocator, new: []const Source.Location) !void {
+        if (new.len == 0 or tok.id == .whitespace or tok.id == .macro_ws or tok.id == .placemarker) return;
+        var list = std.ArrayList(Source.Location).init(gpa);
+        defer {
+            @memset(list.items.ptr[list.items.len..list.capacity], .{});
+            // Add a sentinel to indicate the end of the list since
+            // the ArrayList's capacity isn't guaranteed to be exactly
+            // what we ask for.
+            if (list.capacity > 0) {
+                list.items.ptr[list.capacity - 1].byte_offset = 1;
+            }
+            tok.expansion_locs = list.items.ptr;
+        }
+
+        if (tok.expansion_locs) |locs| {
+            var i: usize = 0;
+            while (locs[i].id != .unused) : (i += 1) {}
+            list.items = locs[0..i];
+            while (locs[i].byte_offset != 1) : (i += 1) {}
+            list.capacity = i + 1;
+        }
+
+        const min_len = @max(list.items.len + new.len + 1, 4);
+        const wanted_len = std.math.ceilPowerOfTwo(usize, min_len) catch
+            return error.OutOfMemory;
+        try list.ensureTotalCapacity(wanted_len);
+
+        for (new) |new_loc| {
+            if (new_loc.id == .generated) continue;
+            list.appendAssumeCapacity(new_loc);
+        }
+    }
+
+    pub fn free(expansion_locs: ?[*]Source.Location, gpa: std.mem.Allocator) void {
+        const locs = expansion_locs orelse return;
+        var i: usize = 0;
+        while (locs[i].id != .unused) : (i += 1) {}
+        while (locs[i].byte_offset != 1) : (i += 1) {}
+        gpa.free(locs[0 .. i + 1]);
+    }
+
+    pub fn dupe(tok: PreprocessorToken, gpa: std.mem.Allocator) !PreprocessorToken {
+        var copy = tok;
+        copy.expansion_locs = null;
+        try copy.addExpansionLocation(gpa, tok.expansionSlice());
+        return copy;
+    }
+
+    pub fn checkMsEof(tok: PreprocessorToken, source: Source, comp: *Compilation) !void {
+        std.debug.assert(tok.id == .eof);
+        if (source.buf.len > tok.loc.byte_offset and source.buf[tok.loc.byte_offset] == 0x1A) {
+            try comp.addDiagnostic(.{
+                .tag = .ctrl_z_eof,
+                .loc = .{
+                    .id = source.id,
+                    .byte_offset = tok.loc.byte_offset,
+                    .line = tok.loc.line,
+                },
+            }, &.{});
+        }
+    }
+
+    const one: PreprocessorToken = .{ .id = .one, .loc = .{} };
+    const zero: PreprocessorToken = .{ .id = .zero, .loc = .{} };
+};
+
+const Macro = struct {
+    /// Tokens constituting the macro body
+    tokens: []const PreprocessorToken,
+
+    /// Number of arguments for function-like macros
+    nargs: usize,
+
+    /// If the function type macro has variable number of arguments
+    var_args: bool,
+
+    /// Location of macro in the source
+    loc: Source.Location,
+
+    kind: Kind,
+
+    const Kind = union(enum) {
+        object,
+        func,
+        special: *const SpecialMacroFn,
+    };
+
+    fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool {
+        if ((a.kind == .object and b.kind != .object) or (a.kind == .func and b.kind != .func)) return false;
+        if (!std.meta.eql(a.kind, b.kind)) return false;
+        if (a.tokens.len != b.tokens.len) return false;
+        for (a.tokens, b.tokens) |a_tok, b_tok| if (!tokEql(pp, a_tok, b_tok)) return false;
+
+        if (a.kind == .func) {
+            if (a.var_args != b.var_args) return false;
+        }
+
+        return true;
+    }
+
+    fn tokEql(pp: *Preprocessor, a: PreprocessorToken, b: PreprocessorToken) bool {
+        return mem.eql(u8, pp.tokSlice(a), pp.tokSlice(b));
+    }
+};
+
+const Preprocessor = @This();
+
+const ExpansionEntry = struct {
+    idx: Tree.TokenIndex,
+    locs: [*]Source.Location,
+};
+
+const TokenState = struct {
+    tokens_len: usize,
+    expansion_entries_len: usize,
+};
+
+comp: *Compilation,
+gpa: mem.Allocator,
+arena: std.heap.ArenaAllocator,
+
+tokens: std.MultiArrayList(PreprocessorToken) = .{},
+/// Do not directly mutate this; must be kept in sync with `tokens`
+expansion_entries: std.MultiArrayList(ExpansionEntry) = .{},
+
+/// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any
+include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{},
+
+char_buf: std.ArrayListUnmanaged(u8) = .{},
+
+/// Dump current state to stderr.
+verbose: bool = false,
+preserve_whitespace: bool = false,
+
+/// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
+linemarkers: Linemarkers = .none,
+
+tokenizers: std.ArrayListUnmanaged(Tokenizer) = .{},
+
+expansion_bufs: std.ArrayListUnmanaged(TokenList) = .{},
+
+defines: DefineMap = .{},
+
+generated_line: u32 = 1,
+
+counter: u32 = 0,
+
+if_level: u8 = 0,
+
+if_kind: std.PackedIntArray(u2, 256) = blk: {
+    @setEvalBranchQuota(2000);
+    break :blk std.PackedIntArray(u2, 256).initAllTo(0);
+},
+
+guard_stack: std.ArrayListUnmanaged(?[]const u8) = .{},
+
+macro_arg_tokens: std.ArrayListUnmanaged(MacroArg.Item) = .{},
+macro_args: std.ArrayListUnmanaged(MacroArgList.Item) = .{},
+
+safe_strings: std.StringHashMapUnmanaged(void) = .{},
+
+treap: Treap,
+
+pub const parse = Parser.parse;
+
+pub const Linemarkers = enum {
+    /// No linemarker tokens. Required setting if parser will run
+    none,
+    /// #line <num> "filename"
+    line_directives,
+    /// # <num> "filename" flags
+    numeric_directives,
+};
+
+pub fn init(comp: *Compilation) Preprocessor {
+    const pp = Preprocessor{
+        .comp = comp,
+        .gpa = comp.gpa,
+        .arena = std.heap.ArenaAllocator.init(comp.gpa),
+        .treap = Treap.init(comp.gpa),
+    };
+    comp.pragmaEvent(.before_preprocess);
+    return pp;
+}
+
+fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, func: *const SpecialMacroFn) !void {
+    try pp.defines.putNoClobber(pp.gpa, name, .{
+        .tokens = &.{},
+        .var_args = false,
+        .loc = .{ .id = .generated },
+        .kind = .{ .special = func },
+        .nargs = 0,
+    });
+}
+
+fn handleLineMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    const start = pp.comp.generated_buf.items.len;
+    const source = pp.comp.getSource(tok.loc.id);
+    const w = pp.comp.generated_buf.writer(pp.gpa);
+    try w.print("{d}\n", .{source.physicalLine(tok.loc)});
+    const pasted_tok = try pp.makeGeneratedToken(start, .pp_num, tok);
+    return pp.ungetToken(pasted_tok);
+}
+
+fn handleFileMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    const start = pp.comp.generated_buf.items.len;
+    const source = pp.comp.getSource(tok.loc.id);
+    const w = pp.comp.generated_buf.writer(pp.gpa);
+    try w.print("\"{s}\"\n", .{source.path});
+    const pasted_tok = try pp.makeGeneratedToken(start, .string_literal, tok);
+    return pp.ungetToken(pasted_tok);
+}
+
+fn handleCounterMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    defer pp.counter += 1;
+    const start = pp.comp.generated_buf.items.len;
+    const w = pp.comp.generated_buf.writer(pp.gpa);
+    try w.print("{d}\n", .{pp.counter});
+    const pasted_tok = try pp.makeGeneratedToken(start, .pp_num, tok);
+    return pp.ungetToken(pasted_tok);
+}
+
+fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: PreprocessorToken) !PreprocessorToken {
+    const pasted_token = PreprocessorToken{ .id = id, .flags = source.flags, .loc = .{
+        .id = .generated,
+        .byte_offset = @intCast(start),
+        .line = pp.generated_line,
+    } };
+    pp.generated_line += 1;
+    // try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc});
+    // try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice());
+    return pasted_token;
+}
+
+fn errStr(pp: *Preprocessor, tok: PreprocessorToken, tag: Diagnostics.Tag, str: []const u8) !void {
+    try pp.comp.addDiagnostic(.{
+        .tag = tag,
+        .loc = tok.loc,
+        .extra = .{ .str = str },
+    }, &.{}); // todo expansion slice
+}
+
+fn errTok(pp: *Preprocessor, tok: PreprocessorToken, tag: Diagnostics.Tag) !void {
+    try pp.comp.addDiagnostic(.{
+        .tag = tag,
+        .loc = tok.loc,
+        .extra = .{ .none = {} },
+    }, &.{}); // todo expansion slice
+}
+
+fn expectClosing(pp: *Preprocessor, opening: PreprocessorToken, id: Token.Id) !void {
+    // todo: fix expect
+    const item = try pp.expect(id, .closing_paren);
+    if (item.id != id) {
+        try pp.errTok(opening, .to_match_paren);
+    }
+}
+
+fn tokFromBool(b: bool) PreprocessorToken {
+    return if (b) PreprocessorToken.one else PreprocessorToken.zero;
+}
+
+fn handleHasAttribute(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    _ = tok;
+    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
+
+    const has_attr = Attribute.fromString(.gnu, null, pp.tokSlice(attr_name)) != null;
+    return pp.ungetToken(tokFromBool(has_attr));
+}
+
+fn handleHasCAttribute(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    _ = macro_tok;
+    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
+    var r: TokenList = .{};
+    defer r.deinit(pp.gpa);
+
+    var tok: PreprocessorToken = undefined;
+    while (true) {
+        tok = try pp.readToken();
+        if (tok.id == .comment) continue;
+        if (tok.id.isDirectiveEnd() or tok.id == .r_paren) break;
+        try r.append(pp.gpa, tok);
+    }
+    try pp.expectClosing(l_paren, .r_paren);
+}
+
+fn handleHasDeclSpecAttribute(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    _ = tok;
+    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
+
+    const ident_str = pp.tokSlice(attr_name);
+    const has_attr = if (pp.comp.langopts.declspec_attrs) Attribute.fromString(.declspec, null, ident_str) != null else false;
+    return pp.ungetToken(tokFromBool(has_attr));
+}
+
+fn handleHasFeature(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    _ = tok;
+    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
+
+    const ident_str = pp.tokSlice(attr_name);
+    const has_feature = features.hasFeature(pp.comp, ident_str);
+    return pp.ungetToken(tokFromBool(has_feature));
+}
+
+fn handleHasExtension(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    _ = tok;
+    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
+
+    const ident_str = pp.tokSlice(attr_name);
+    const has_extension = features.hasExtension(pp.comp, ident_str);
+    return pp.ungetToken(tokFromBool(has_extension));
+}
+
+fn handleHasBuiltin(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    _ = tok;
+    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
+
+    const ident_str = pp.tokSlice(attr_name);
+    const has_builtin = pp.comp.hasBuiltin(ident_str);
+    return pp.ungetToken(tokFromBool(has_builtin));
+}
+
+fn handleHasWarning(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
+    const start = pp.char_buf.items.len;
+    defer pp.char_buf.items.len = start;
+
+    while (true) {
+        const tok = try pp.readExpandNewline();
+        switch (tok.id) {
+            .nl, .eof => {
+                try pp.errTok(tok, .unterminated_macro_arg_list);
+                return pp.ungetToken(PreprocessorToken.zero);
+            },
+            .r_paren => break,
+            .string_literal => {
+                const string = pp.tokSlice(tok);
+                try pp.char_buf.appendSlice(pp.gpa, string[1 .. string.len - 1]);
+            },
+            else => {
+                pp.skipToNl();
+                try pp.errTok(tok, .missing_paren_param_list);
+                try pp.errTok(l_paren, .to_match_paren);
+                return pp.ungetToken(PreprocessorToken.zero);
+            },
+        }
+    }
+    const actual_param = pp.char_buf.items[start..];
+    if (actual_param.len == 0) {
+        try pp.comp.addDiagnostic(.{
+            .tag = .expected_arguments,
+            .loc = macro_tok.loc,
+            .extra = .{ .arguments = .{ .expected = 1, .actual = 0 } },
+        }, &.{}); // todo expansion slice
+        return pp.ungetToken(PreprocessorToken.zero);
+    }
+    if (!mem.startsWith(u8, actual_param, "-W")) {
+        try pp.errStr(l_paren, .malformed_warning_check, "__has_warning");
+        return pp.ungetToken(PreprocessorToken.zero);
+    }
+    const warning_name = actual_param[2..];
+    const exists = Diagnostics.warningExists(warning_name);
+    return pp.ungetToken(tokFromBool(exists));
+}
+
+fn handleHasInclude(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    return pp.handleHasIncludeExtra(macro_tok, .first);
+}
+
+fn handleHasIncludeNext(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    return pp.handleHasIncludeExtra(macro_tok, .next);
+}
+
+fn handleHasIncludeExtra(pp: *Preprocessor, macro_tok: PreprocessorToken, which: Compilation.WhichInclude) Error!void {
+    const l_paren = pp.getToken();
+    if (l_paren.id != .l_paren) {
+        pp.skipToNl();
+        return;
+    }
+
+    var is_std: bool = undefined;
+    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
+        error.InvalidInclude => return pp.ungetToken(PreprocessorToken.zero),
+        else => |e| return e,
+    };
+    try pp.expectClosing(l_paren, .r_paren);
+
+    const filename = include_str[1 .. include_str.len - 1];
+    const include_type: Compilation.IncludeType = switch (include_str[0]) {
+        '"' => .quotes,
+        '<' => .angle_brackets,
+        else => unreachable,
+    };
+
+    if (which == .first or pp.includeDepth() == 0) {
+        if (which == .next) {
+            try pp.comp.addDiagnostic(.{
+                .tag = .include_next_outside_header,
+                .loc = macro_tok.loc,
+            }, &.{});
+        }
+        const has = try pp.comp.hasInclude(filename, macro_tok.loc.id, include_type, .first);
+        return pp.ungetToken(tokFromBool(has));
+    }
+    const has = try pp.comp.hasInclude(filename, macro_tok.loc.id, include_type, .next);
+    return pp.ungetToken(tokFromBool(has));
+}
+
+fn includeDepth(pp: *Preprocessor) usize {
+    return pp.tokenizers.items.len - 1;
+}
+
+fn hasEmbedValue(contents_arg: ?[]const u8) []const u8 {
+    const contents = contents_arg orelse return "0\n";
+    if (contents.len == 0) return "2\n";
+    return "1\n";
+}
+
+/// TODO: handle limit/prefix/suffix/etc
+fn handleHasEmbed(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    const l_paren = pp.getToken();
+    if (l_paren.id != .l_paren) {
+        pp.skipToNl();
+        return;
+    }
+
+    var is_std: bool = undefined;
+    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
+        error.InvalidInclude => return,
+        else => |e| return e,
+    };
+    try pp.expectClosing(l_paren, .r_paren);
+
+    const filename = include_str[1 .. include_str.len - 1];
+    const include_type: Compilation.IncludeType = switch (include_str[0]) {
+        '"' => .quotes,
+        '<' => .angle_brackets,
+        else => unreachable,
+    };
+
+    const contents = try pp.comp.findEmbed(filename, macro_tok.loc.id, include_type, 1);
+    const result = hasEmbedValue(contents);
+    const start = pp.comp.generated_buf.items.len;
+    try pp.comp.generated_buf.appendSlice(pp.comp.gpa, result);
+    const pasted_tok = try pp.makeGeneratedToken(start, .pp_num, macro_tok);
+    return pp.ungetToken(pasted_tok);
+}
+
+// Skip until newline, ignore other tokens.
+fn skipToNl(pp: *Preprocessor) void {
+    while (true) {
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) return;
+    }
+}
+
+fn readOneIdentifierArgument(pp: *Preprocessor, macro_tok: PreprocessorToken) !?PreprocessorToken {
+    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
+    _ = l_paren;
+    var invalid: ?PreprocessorToken = null;
+    var identifier: ?PreprocessorToken = null;
+    while (true) {
+        var tok = pp.getToken();
+        tok.id.simplifyMacroKeywordExtra(true);
+
+        switch (tok.id) {
+            .r_paren, .eof => break,
+            else => {
+                if (identifier) |_| invalid = tok else identifier = tok;
+            },
+        }
+    }
+    if (invalid) |some| {
+        try pp.comp.addDiagnostic(.{
+            .tag = .missing_tok_builtin,
+            .loc = some.loc,
+            .extra = .{ .tok_id_expected = .r_paren },
+        }, &.{}); // TODO: expansion slice
+        return null;
+    }
+    if (identifier) |ident| {
+        if (ident.id == .identifier or ident.id == .extended_identifier) return ident;
+    } else {
+        const extra: Diagnostics.Message.Extra = .{ .arguments = .{ .expected = 1, .actual = 0 } };
+        try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = macro_tok.loc, .extra = extra }, &.{});
+    }
+    return null;
+}
+
+fn handleIsIdentifier(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    if (try pp.readOneIdentifierArgument(macro_tok)) |_| {
+        return pp.ungetToken(PreprocessorToken.one);
+    } else {
+        return pp.ungetToken(PreprocessorToken.zero);
+    }
+}
+
+fn handlePragmaOperator(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    _ = pp;
+    _ = macro_tok;
+    // TODO
+}
+
+fn addBuiltinMacros(pp: *Preprocessor) !void {
+    try pp.addBuiltinMacro("__has_attribute", handleHasAttribute);
+    try pp.addBuiltinMacro("__has_c_attribute", handleHasCAttribute);
+    try pp.addBuiltinMacro("__has_declspec_attribute", handleHasDeclSpecAttribute);
+    try pp.addBuiltinMacro("__has_feature", handleHasFeature);
+    try pp.addBuiltinMacro("__has_extension", handleHasExtension);
+    try pp.addBuiltinMacro("__has_builtin", handleHasBuiltin);
+    try pp.addBuiltinMacro("__has_warning", handleHasWarning);
+    try pp.addBuiltinMacro("__has_include", handleHasInclude);
+    try pp.addBuiltinMacro("__has_include_next", handleHasIncludeNext);
+    try pp.addBuiltinMacro("__has_embed", handleHasEmbed);
+
+    try pp.addBuiltinMacro("__is_identifier", handleIsIdentifier);
+
+    try pp.addBuiltinMacro("__FILE__", handleFileMacro);
+    try pp.addBuiltinMacro("__LINE__", handleLineMacro);
+    try pp.addBuiltinMacro("__COUNTER__", handleCounterMacro);
+    try pp.addBuiltinMacro("_Pragma", handlePragmaOperator);
+}
+
+/// Initialize Preprocessor with builtin macros.
+pub fn initDefault(comp: *Compilation) !Preprocessor {
+    var pp = init(comp);
+    errdefer pp.deinit();
+    try pp.addBuiltinMacros();
+    return pp;
+}
+
+pub fn deinit(pp: *Preprocessor) void {
+    pp.arena.deinit();
+    pp.include_guards.deinit(pp.gpa);
+    pp.tokens.deinit(pp.gpa);
+    pp.tokenizers.deinit(pp.gpa);
+    for (pp.expansion_bufs.items) |*toklist| {
+        toklist.deinit(pp.gpa);
+    }
+    pp.expansion_bufs.deinit(pp.gpa);
+    pp.defines.deinit(pp.gpa);
+    pp.char_buf.deinit(pp.gpa);
+    for (pp.expansion_entries.items(.locs)) |locs| PreprocessorToken.free(locs, pp.gpa);
+    pp.expansion_entries.deinit(pp.gpa);
+    pp.guard_stack.deinit(pp.gpa);
+    pp.macro_arg_tokens.deinit(pp.gpa);
+    pp.macro_args.deinit(pp.gpa);
+    pp.safe_strings.deinit(pp.gpa);
+    pp.treap.deinit();
+}
+
+/// Preprocess a compilation unit of sources into a parsable list of tokens.
+pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void {
+    assert(sources.len > 1);
+    const first = sources[0];
+
+    for (sources[1..]) |header| {
+        _ = try pp.preprocess(header);
+    }
+    const eof = try pp.preprocess(first);
+    try pp.addToken(eof);
+}
+
+fn propagateSpace(pp: *Preprocessor, tokens: []PreprocessorToken, template: PreprocessorToken) void {
+    if (tokens.len > 0) {
+        tokens[0].flags = template.flags;
+    } else {
+        pp.injectSpace();
+    }
+}
+
+fn ungetAll(pp: *Preprocessor, tokens: []const PreprocessorToken) !void {
+    if (tokens.len == 0) return;
+    const start = pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items.len;
+    try pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].appendSlice(pp.gpa, tokens);
+    std.mem.reverse(PreprocessorToken, pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items[start..]);
+}
+
+fn addHideSet(pp: *Preprocessor, toks: []PreprocessorToken, hideset: Treap.Node) !void {
+    for (toks) |*tok| {
+        switch (tok.id) {
+            // non-identifiers are not expanded, so we don't need to track their hidesets.
+            // Track r_paren hideset since it is used for computing the hideset of function-like macro expansions
+            .identifier, .extended_identifier, .r_paren => {
+                tok.hideset = try pp.treap.@"union"(tok.hideset, hideset);
+            },
+            else => {},
+        }
+    }
+}
+
+fn stringize(pp: *Preprocessor, tmpl: PreprocessorToken, args_range: MacroArg) !PreprocessorToken {
+    const start = pp.comp.generated_buf.items.len;
+    try pp.comp.generated_buf.append(pp.gpa, '"');
+    const args = args_range.slice(pp.macro_arg_tokens.items);
+    for (args, 0..) |tok, i| {
+        const slice = pp.tokSlice(tok);
+        if (slice.len > 0 and tok.flags.space and i != 0) {
+            try pp.comp.generated_buf.append(pp.gpa, ' ');
+        }
+        try pp.comp.generated_buf.appendSlice(pp.gpa, slice);
+    }
+    try pp.comp.generated_buf.append(pp.gpa, '"');
+    var tok = tmpl;
+    tok.id = .string_literal;
+    tok.loc = .{
+        .id = .generated,
+        .byte_offset = @intCast(start),
+        .line = pp.generated_line,
+    };
+    pp.generated_line += 1;
+    return tok;
+}
+
+fn subst(pp: *Preprocessor, macro: *const Macro, macro_tok: PreprocessorToken, args: MacroArgList, hideset_arg: Treap.Node) ![]PreprocessorToken {
+    _ = macro_tok;
+    var hideset = hideset_arg;
+    var r: TokenList = .{};
+    defer r.deinit(pp.gpa);
+    var i: usize = 0;
+    while (i < macro.tokens.len) : (i += 1) {
+        const t0 = macro.tokens[i];
+        const t1: ?PreprocessorToken = if (i == macro.tokens.len - 1) null else macro.tokens[i + 1];
+
+        const t0_param = t0.id == .macro_param;
+        const t1_param = if (t1) |tok| tok.id == .macro_param else false;
+
+        if (t0.id == .hash and t1_param) {
+            const arg = args.slice(pp.macro_args.items)[t1.?.argPosition()];
+            const stringized = try pp.stringize(t0, arg);
+            try r.append(pp.gpa, stringized);
+            i += 1;
+            continue;
+        }
+        if (t0.id == .hash_hash and t1_param) {
+            const arg = args.slice(pp.macro_args.items)[t1.?.argPosition()];
+            if (t1.?.isVarArg() and r.items.len > 0 and r.items[r.items.len - 1].id == .comma) {
+                if (arg.len() == 0) {
+                    _ = r.pop();
+                } else {
+                    try r.appendSlice(pp.gpa, arg.slice(pp.macro_arg_tokens.items));
+                }
+            } else if (arg.len() > 0) {
+                try pp.pasteAndPush(&r, arg.slice(pp.macro_arg_tokens.items)[0]);
+                try r.appendSlice(pp.gpa, arg.slice(pp.macro_arg_tokens.items)[1..]);
+            }
+            i += 1;
+            continue;
+        }
+        if (t0.id == .hash_hash and t1 != null) {
+            hideset = t1.?.hideset;
+            try pp.pasteAndPush(&r, t1.?);
+            i += 1;
+            continue;
+        }
+        if (t0_param and t1 != null and t1.?.id == .hash_hash) {
+            hideset = t1.?.hideset;
+            const arg = args.slice(pp.macro_args.items)[t0.argPosition()];
+            if (arg.len() == 0) {
+                i += 1;
+            } else {
+                try r.appendSlice(pp.gpa, arg.slice(pp.macro_arg_tokens.items));
+            }
+            continue;
+        }
+        if (t0_param) {
+            const arg = args.slice(pp.macro_args.items)[t0.argPosition()];
+            const expanded = try pp.expandAll(arg.slice(pp.macro_arg_tokens.items), t0);
+            defer pp.gpa.free(expanded);
+            try r.appendSlice(pp.gpa, expanded);
+            continue;
+        }
+        try r.append(pp.gpa, t0);
+    }
+    try pp.addHideSet(r.items, hideset);
+    return r.toOwnedSlice(pp.gpa);
+}
+
+fn pasteTokens(pp: *Preprocessor, lhs: PreprocessorToken, rhs: PreprocessorToken) !PreprocessorToken {
+    const start = pp.comp.generated_buf.items.len;
+    const end = start + pp.tokSlice(lhs).len + pp.tokSlice(rhs).len;
+    try pp.comp.generated_buf.ensureTotalCapacity(pp.gpa, end + 1); // +1 for a newline
+
+    // We cannot use the same slices here since they might be invalidated by `ensureCapacity`
+    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.tokSlice(lhs));
+    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.tokSlice(rhs));
+    pp.comp.generated_buf.appendAssumeCapacity('\n');
+
+    // Try to tokenize the result.
+    var tmp_tokenizer = Tokenizer{
+        .buf = pp.comp.generated_buf.items,
+        .langopts = pp.comp.langopts,
+        .index = @intCast(start),
+        .source = .generated,
+    };
+    const pasted_token = tmp_tokenizer.nextNoWSComments();
+    const next_tok = tmp_tokenizer.next();
+    if (next_tok.id != .nl) {
+        try pp.errStr(
+            lhs,
+            .pasting_formed_invalid,
+            try pp.comp.diagnostics.arena.allocator().dupe(u8, pp.comp.generated_buf.items[start..end]),
+        );
+    }
+    return pp.makeGeneratedToken(start, pasted_token.id, lhs);
+}
+
+/// Paste `tok` onto the last token in `tokens`
+fn pasteAndPush(pp: *Preprocessor, tokens: *TokenList, tok: PreprocessorToken) !void {
+    const last = tokens.pop();
+    const pasted = try pp.pasteTokens(last, tok);
+    return tokens.append(pp.gpa, pasted);
+}
+
+fn tokenBufferStashReverse(pp: *Preprocessor, tokens: []const PreprocessorToken) !void {
+    try pp.expansion_bufs.append(pp.gpa, .{});
+    try pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].appendSlice(pp.gpa, tokens);
+    std.mem.reverse(PreprocessorToken, pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items);
+}
+
+fn tokenBufferUnstash(pp: *Preprocessor) void {
+    var buf = pp.expansion_bufs.pop();
+    buf.deinit(pp.gpa);
+}
+
+fn expandAll(pp: *Preprocessor, tokens: []const PreprocessorToken, tmpl: PreprocessorToken) ![]const PreprocessorToken {
+    try pp.tokenBufferStashReverse(tokens);
+    defer pp.tokenBufferUnstash();
+    var r: TokenList = .{};
+    defer r.deinit(pp.gpa);
+    while (true) {
+        const tok = try pp.readExpand();
+        if (tok.id == .eof) break;
+        try r.append(pp.gpa, tok);
+    }
+    pp.propagateSpace(r.items, tmpl);
+    return r.toOwnedSlice(pp.gpa);
+}
+
+fn peekToken(pp: *Preprocessor) !PreprocessorToken {
+    const tok = try pp.readToken();
+    try pp.ungetToken(tok);
+    return tok;
+}
+
+/// Return a string with the same contents as `name` and whose lifetime is the same as the preprocessor's lifetime
+/// If `tok` is not from the generated source, this is just `name`.
+/// If `tok` is from the generated source, pointers are invalidated when the underlying ArrayList is resized. Therefore,
+/// duplicate the string and store it (so we aren't repeatedly copying the same string)
+fn getSafeString(pp: *Preprocessor, tok: PreprocessorToken, name: []const u8) ![]const u8 {
+    if (tok.loc.id != .generated) return name;
+    const gop = try pp.safe_strings.getOrPut(pp.gpa, name);
+    if (!gop.found_existing) {
+        const copy = try pp.arena.allocator().dupe(u8, name);
+        gop.key_ptr.* = copy;
+    }
+    return gop.key_ptr.*;
+}
+
+fn injectSpace(pp: *Preprocessor) void {
+    var i = pp.expansion_bufs.items.len;
+    while (i > 0) : (i -= 1) {
+        var j = pp.expansion_bufs.items[i - 1].items.len;
+        while (j > 0) : (j -= 1) {
+            pp.expansion_bufs.items[i - 1].items[j - 1].flags.space = true;
+            return;
+        }
+    }
+}
+
+fn readExpandNewline(pp: *Preprocessor) Error!PreprocessorToken {
+    const tok = pp.getToken();
+    if (!tok.id.isMacroIdentifier()) return tok;
+    const name = pp.tokSlice(tok);
+    const macro = pp.defines.getPtr(name) orelse return tok;
+
+    const macro_hideset = tok.hideset;
+    if (pp.treap.contains(macro_hideset, name)) return tok;
+
+    switch (macro.kind) {
+        .object => {
+            const safe_name = try pp.getSafeString(tok, name);
+            const new_hideset = try pp.treap.addNodeTo(tok.hideset, safe_name);
+
+            const tokens = try pp.subst(macro, tok, MacroArgList.empty, new_hideset);
+            defer pp.gpa.free(tokens);
+            pp.propagateSpace(tokens, tok);
+            try pp.ungetAll(tokens);
+            return pp.readExpand();
+        },
+        .func => {
+            if (!try pp.next(.l_paren)) return tok;
+            const arg_tokens_start = pp.macro_arg_tokens.items.len;
+            defer pp.macro_arg_tokens.items.len = arg_tokens_start;
+            const macro_args_start = pp.macro_args.items.len;
+            defer pp.macro_args.items.len = macro_args_start;
+
+            const args = pp.readArgs(tok, macro) catch |err| switch (err) {
+                error.IncorrectArgumentCount => return PreprocessorToken.zero,
+                error.UnterminatedMacroArgumentList => {
+                    try pp.errTok(tok, .unterminated_macro_arg_list);
+                    return PreprocessorToken.zero;
+                },
+                else => |e| return e,
+            };
+            const r_paren = pp.getToken();
+            std.debug.assert(r_paren.id == .r_paren);
+            const safe_name = try pp.getSafeString(tok, name);
+
+            const intersection = try pp.treap.intersection(macro_hideset, r_paren.hideset);
+            const hideset = try pp.treap.addNodeTo(intersection, safe_name);
+            const tokens = try pp.subst(macro, tok, args, hideset);
+            defer pp.gpa.free(tokens);
+            pp.propagateSpace(tokens, tok);
+            try pp.ungetAll(tokens);
+            return pp.readExpand();
+        },
+        .special => |func| {
+            try func(pp, tok);
+            return pp.readExpand();
+        },
+    }
+}
+
+fn readMacroArg(pp: *Preprocessor, end: *bool, readall: bool) !MacroArg {
+    var level: i32 = 0;
+    const start: u32 = @intCast(pp.macro_arg_tokens.items.len);
+    while (true) {
+        var tok = pp.getToken();
+        if (tok.id == .eof) {
+            return error.UnterminatedMacroArgumentList;
+        }
+        if (tok.id == .nl) continue;
+        if (tok.flags.is_bol and tok.id == .hash) {
+            try pp.readDirective();
+            continue;
+        }
+        if (level == 0 and tok.id == .r_paren) {
+            try pp.ungetToken(tok);
+            end.* = true;
+            break;
+        }
+        if (level == 0 and tok.id == .comma and !readall) {
+            break;
+        }
+        if (tok.id == .l_paren) {
+            level += 1;
+        }
+        if (tok.id == .r_paren) {
+            level -= 1;
+        }
+        if (tok.flags.is_bol) {
+            tok.flags = .{ .is_bol = false, .space = true };
+        }
+        try pp.macro_arg_tokens.append(pp.gpa, tok);
+    }
+    return .{ .start = start, .end = @intCast(pp.macro_arg_tokens.items.len) };
+}
+
+fn doReadArgs(pp: *Preprocessor, macro: *const Macro) !MacroArgList {
+    const start: u32 = @intCast(pp.macro_args.items.len);
+    var end = false;
+    while (!end) {
+        const in_ellipsis = macro.var_args and (pp.macro_args.items.len - start) + 1 == macro.nargs;
+        const arg_range = try pp.readMacroArg(&end, in_ellipsis);
+        try pp.macro_args.append(pp.gpa, arg_range);
+    }
+    if (macro.var_args and (pp.macro_args.items.len - start) + 1 == macro.nargs) {
+        try pp.macro_args.append(pp.gpa, MacroArg.empty);
+    }
+    return .{ .start = start, .end = @intCast(pp.macro_args.items.len) };
+}
+
+fn readArgs(pp: *Preprocessor, ident: PreprocessorToken, macro: *const Macro) !MacroArgList {
+    if (macro.nargs == 0 and (try pp.peekToken()).id == .r_paren) {
+        return MacroArgList.empty;
+    }
+    const args = try pp.doReadArgs(macro);
+    if (args.len() != macro.nargs) {
+        const extra = Diagnostics.Message.Extra{
+            .arguments = .{ .expected = @intCast(macro.nargs), .actual = @intCast(args.len()) },
+        };
+        try pp.comp.addDiagnostic(
+            .{ .tag = .expected_arguments, .loc = ident.loc, .extra = extra },
+            &.{}, // TODO: expansion slice
+        );
+        return error.IncorrectArgumentCount;
+    }
+    return args;
+}
+
+fn readExpand(pp: *Preprocessor) Error!PreprocessorToken {
+    while (true) {
+        const tok = try pp.readExpandNewline();
+        if (tok.id != .nl) return tok;
+    }
+}
+
+/// # number "file" flags
+/// TODO: validate that the pp_num token is solely digits
+/// if not, emit `GNU line marker directive requires a simple digit sequence`
+fn readLinemarker(pp: *Preprocessor) !void {
+    const name = pp.getToken();
+    if (name.id.isDirectiveEnd()) return;
+    if (name.id != .string_literal) try pp.errTok(name, .line_invalid_filename);
+
+    const flag_1 = pp.getToken();
+    if (flag_1.id.isDirectiveEnd()) return;
+    const flag_2 = pp.getToken();
+    if (flag_2.id.isDirectiveEnd()) return;
+    const flag_3 = pp.getToken();
+    if (flag_3.id.isDirectiveEnd()) return;
+    const flag_4 = pp.getToken();
+    if (flag_4.id.isDirectiveEnd()) return;
+    try pp.expectNewline();
+}
+
+fn readIdent(pp: *Preprocessor) !?PreprocessorToken {
+    const tok = pp.getToken();
+    if (!tok.id.isMacroIdentifier()) {
+        try pp.errTok(tok, .macro_name_must_be_identifier);
+        return null;
+    }
+    return tok;
+}
+
+fn ungetToken(pp: *Preprocessor, tok: PreprocessorToken) !void {
+    if (tok.id == .eof) return;
+    if (pp.isBufferEmpty()) {
+        try pp.expansion_bufs.append(pp.gpa, .{});
+    }
+    try pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].append(pp.gpa, tok);
+}
+
+fn hashHashCheck(pp: *Preprocessor, toks: []const PreprocessorToken) !void {
+    if (toks.len == 0) return;
+    if (toks[0].id == .hash_hash) {
+        return pp.errTok(toks[0], .hash_hash_at_start);
+    }
+    if (toks[toks.len - 1].id == .hash_hash) {
+        return pp.errTok(toks[toks.len - 1], .hash_hash_at_end);
+    }
+}
+
+fn readObjMacro(pp: *Preprocessor, name: PreprocessorToken) !void {
+    var body: TokenList = .{};
+    errdefer body.deinit(pp.gpa);
+
+    while (true) {
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) break;
+
+        try body.append(pp.gpa, tok);
+    }
+    try pp.hashHashCheck(body.items);
+    const macro: Macro = .{
+        .tokens = body.items,
+        .var_args = false,
+        .loc = undefined,
+        .kind = .object,
+        .nargs = undefined,
+    };
+    try pp.defineMacro(name, macro);
+}
+
+/// Defines a new macro and warns if it is a duplicate
+fn defineMacro(pp: *Preprocessor, name_tok: PreprocessorToken, macro: Macro) Error!void {
+    const name_str = pp.tokSlice(name_tok);
+    const gop = try pp.defines.getOrPut(pp.gpa, name_str);
+    if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
+        const tag: Diagnostics.Tag = if (gop.value_ptr.kind == .special) .builtin_macro_redefined else .macro_redefined;
+        const start = pp.comp.diagnostics.list.items.len;
+        try pp.comp.addDiagnostic(.{
+            .tag = tag,
+            .loc = name_tok.loc,
+            .extra = .{ .str = name_str },
+        }, &.{});
+        if (gop.value_ptr.kind != .special and pp.comp.diagnostics.list.items.len != start) {
+            try pp.comp.addDiagnostic(.{
+                .tag = .previous_definition,
+                .loc = gop.value_ptr.loc,
+            }, &.{});
+        }
+    }
+    gop.value_ptr.* = macro;
+}
+
+/// Get raw token source string.
+/// Returned slice is invalidated when comp.generated_buf is updated.
+pub fn tokSlice(pp: *Preprocessor, token: PreprocessorToken) []const u8 {
+    if (token.id.lexeme()) |some| return some;
+    const source = pp.comp.getSource(token.loc.id);
+    var tmp_tokenizer = Tokenizer{
+        .buf = source.buf,
+        .langopts = pp.comp.langopts,
+        .index = token.loc.byte_offset,
+        .source = .generated,
+    };
+    const tok = tmp_tokenizer.next();
+    return tmp_tokenizer.buf[tok.start..tok.end];
+}
+
+fn expect(pp: *Preprocessor, expected: Tokenizer.Token.Id, tag: Diagnostics.Tag) !PreprocessorToken {
+    const tok = pp.getToken();
+    if (tok.id != expected) {
+        try pp.errTok(tok, tag);
+    }
+    return tok;
+}
+
+fn makeMacroToken(position: usize, is_vararg: bool) PreprocessorToken {
+    return .{
+        .id = .macro_param,
+        .hideset = null,
+        .loc = .{
+            .id = .unused,
+            .byte_offset = @intCast(position),
+            .line = @intFromBool(is_vararg),
+        },
+    };
+}
+
+fn next(pp: *Preprocessor, id: Tokenizer.Token.Id) !bool {
+    const tok = pp.getToken();
+    if (tok.id == id) return true;
+    try pp.ungetToken(tok);
+    return false;
+}
+
+/// Returns true for vararg function-like macro, false otherwise
+fn readFunclikeMacroParams(pp: *Preprocessor, name: PreprocessorToken, l_paren: PreprocessorToken, params: *ParamMap) !bool {
+    _ = name;
+    var pos: usize = 0;
+    while (true) {
+        var tok = pp.getToken();
+        if (tok.id == .r_paren) return false;
+        if (pos != 0) {
+            if (tok.id != .comma) {
+                switch (tok.id) {
+                    .nl, .eof => {},
+                    else => pp.skipToNl(),
+                }
+                try pp.errTok(tok, .expected_comma_param_list);
+                return error.InvalidMacroDef;
+            }
+            tok = pp.getToken();
+        }
+        if (tok.id.isDirectiveEnd()) {
+            try pp.errTok(tok, .missing_paren_param_list);
+            return false;
+        }
+        if (tok.id == .ellipsis) {
+            try params.put(pp.gpa, "__VA_ARGS__", makeMacroToken(pos, true));
+            pos += 1;
+            const r_paren = pp.getToken();
+            if (r_paren.id != .r_paren) {
+                try pp.errTok(r_paren, .missing_paren_param_list);
+                try pp.errTok(l_paren, .to_match_paren);
+                return error.InvalidMacroDef;
+            }
+            return true;
+        }
+        if (!tok.id.isMacroIdentifier()) {
+            try pp.errTok(tok, .invalid_token_param_list);
+            return error.InvalidMacroDef;
+        }
+        const arg = pp.tokSlice(tok);
+        if (try pp.next(.ellipsis)) {
+            const r_paren = pp.getToken();
+            if (r_paren.id != .r_paren) {
+                try pp.errTok(r_paren, .missing_paren_param_list);
+                try pp.errTok(l_paren, .to_match_paren);
+                pp.skipToNl();
+            }
+            try params.put(pp.gpa, arg, makeMacroToken(pos, true));
+            pos += 1;
+            return true;
+        }
+        try params.put(pp.gpa, arg, makeMacroToken(pos, false));
+        pos += 1;
+    }
+}
+
+fn readFunclikeMacroBody(pp: *Preprocessor, params: *const ParamMap) ![]const PreprocessorToken {
+    var tokens: TokenList = .{};
+    errdefer tokens.deinit(pp.gpa);
+    while (true) {
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) {
+            return tokens.toOwnedSlice(pp.gpa);
+        }
+        if (tok.id.isMacroIdentifier()) {
+            // const subst = params.
+            if (params.get(pp.tokSlice(tok))) |sub| {
+                var copy = sub;
+                copy.flags.space = tok.flags.space;
+                try tokens.append(pp.gpa, copy);
+                continue;
+            }
+        }
+        try tokens.append(pp.gpa, tok);
+    }
+}
+
+fn readFuncLikeMacro(pp: *Preprocessor, name: PreprocessorToken, l_paren: PreprocessorToken) Error!void {
+    var params: ParamMap = .{};
+    defer params.deinit(pp.gpa);
+    const is_vararg = pp.readFunclikeMacroParams(name, l_paren, &params) catch |err| switch (err) {
+        error.InvalidMacroDef => blk: {
+            pp.skipToNl();
+            break :blk false;
+        },
+        else => |e| return e,
+    };
+    const body = try pp.readFunclikeMacroBody(&params);
+    errdefer pp.gpa.free(body);
+    try pp.hashHashCheck(body);
+    const macro: Macro = .{
+        .tokens = body,
+        .var_args = is_vararg,
+        .loc = undefined,
+        .kind = .func,
+        .nargs = params.count(),
+    };
+    try pp.defineMacro(name, macro);
+}
+
+fn readDefine(pp: *Preprocessor) !void {
+    const name = try pp.readIdent() orelse {
+        pp.skipToNl();
+        return;
+    };
+    const next_tok = pp.getToken();
+    if (next_tok.id == .l_paren and !next_tok.flags.space) {
+        try pp.readFuncLikeMacro(name, next_tok);
+        return;
+    }
+    try pp.ungetToken(next_tok);
+    try pp.readObjMacro(name);
+}
+
+fn doSkipSpace(pp: *Preprocessor) bool {
+    const saved_tokenizer = pp.tokenizers.items[pp.tokenizers.items.len - 1];
+    const tok = pp.tokenizers.items[pp.tokenizers.items.len - 1].next();
+    switch (tok.id) {
+        .eof => return false,
+        .whitespace, .comment => return true,
+        else => {
+            pp.tokenizers.items[pp.tokenizers.items.len - 1] = saved_tokenizer;
+            return false;
+        },
+    }
+}
+
+/// Skips spaces including comments.
+/// Returns true if at least one space is skipped.
+fn skipSpace(pp: *Preprocessor) bool {
+    if (!pp.doSkipSpace()) {
+        return false;
+    }
+    while (pp.doSkipSpace()) {}
+    return true;
+}
+
+/// Read the next raw token from the tokenizer stack
+fn lexToken(pp: *Preprocessor) PreprocessorToken {
+    if (pp.skipSpace()) {
+        return .{ .id = .whitespace, .loc = undefined };
+    }
+    const tok = pp.tokenizers.items[pp.tokenizers.items.len - 1].next();
+    return .{
+        .id = tok.id,
+        .flags = .{
+            .is_bol = tok.bol,
+        },
+        .loc = .{
+            .id = tok.source,
+            .byte_offset = tok.start,
+            .line = tok.line,
+        },
+    };
+}
+
+/// Read the next token without expanding it
+fn getToken(pp: *Preprocessor) PreprocessorToken {
+    if (!pp.isBufferEmpty() and pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items.len > 0) {
+        return pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].pop();
+    }
+    if (pp.expansion_bufs.items.len > 1) {
+        return .{ .id = .eof, .loc = undefined };
+    }
+    const bol = pp.tokenizers.items[pp.tokenizers.items.len - 1].bol;
+    var tok = pp.lexToken();
+    while (tok.id == .whitespace) {
+        tok = pp.lexToken();
+        tok.flags.space = true;
+    }
+    tok.flags.is_bol = bol;
+    return tok;
+}
+
+fn readDefinedOp(pp: *Preprocessor) !PreprocessorToken {
+    var tok = pp.getToken();
+    if (tok.id == .l_paren) {
+        tok = pp.getToken();
+        const r_paren = pp.getToken();
+        if (r_paren.id != .r_paren) {
+            try pp.errStr(r_paren, .closing_paren_after, "defined");
+        }
+    }
+    if (!tok.id.isMacroIdentifier()) {
+        try pp.errTok(tok, .macro_name_must_be_identifier);
+    }
+    const slice = pp.tokSlice(tok);
+    if (pp.defines.contains(slice)) {
+        return PreprocessorToken.one;
+    }
+    return PreprocessorToken.zero;
+}
+
+fn readIntExprLine(pp: *Preprocessor) !void {
+    while (true) {
+        const tok = try pp.readExpandNewline();
+        if (tok.id.isDirectiveEnd()) break;
+        if (tok.id == .keyword_defined) {
+            const result = try pp.readDefinedOp();
+            try pp.addToken(result);
+        } else if (tok.id.isMacroIdentifier()) {
+            try pp.addToken(PreprocessorToken.zero);
+        } else {
+            try pp.addToken(tok);
+        }
+    }
+    try pp.addToken(.{ .id = .eof, .loc = .{} });
+}
+
+fn readConstexpr(pp: *Preprocessor) !bool {
+    const start = pp.tokens.len;
+    defer pp.tokens.len = start;
+    try pp.readIntExprLine();
+
+    var oldpp = try OldPreprocessor.initDefault(pp.comp);
+    defer oldpp.deinit();
+
+    var i: usize = start;
+    while (i < pp.tokens.len) : (i += 1) {
+        const tok = pp.tokens.get(i);
+        try oldpp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc });
+    }
+
+    var parser = Parser{
+        .pp = &oldpp,
+        .comp = pp.comp,
+        .gpa = pp.gpa,
+        .tok_ids = pp.tokens.items(.id)[start..],
+        .tok_i = 0,
+        .arena = undefined,
+        .in_macro = true,
+        .strings = std.ArrayListAligned(u8, 4).init(pp.comp.gpa),
+
+        .data = undefined,
+        .value_map = undefined,
+        .labels = undefined,
+        .decl_buf = undefined,
+        .list_buf = undefined,
+        .param_buf = undefined,
+        .enum_buf = undefined,
+        .record_buf = undefined,
+        .attr_buf = undefined,
+        .field_attr_buf = undefined,
+        .string_ids = undefined,
+    };
+    defer parser.strings.deinit();
+    return parser.macroExpr();
+}
+
+/// #line number "file"
+/// TODO: validate that the pp_num token is solely digits
+fn readLine(pp: *Preprocessor) Error!void {
+    const digits = pp.getToken();
+    if (digits.id != .pp_num) try pp.errTok(digits, .line_simple_digit);
+
+    if (digits.id.isDirectiveEnd()) return;
+    const name = pp.getToken();
+    if (name.id.isDirectiveEnd()) return;
+    if (name.id != .string_literal) try pp.errTok(name, .line_invalid_filename);
+    try pp.expectNewline();
+}
+
+fn readPragma(pp: *Preprocessor) Error!void {
+    _ = pp;
+    // TODO
+}
+
+fn readUndef(pp: *Preprocessor) Error!void {
+    const name = try pp.readIdent() orelse {
+        pp.skipToNl();
+        return;
+    };
+    try pp.expectNewline();
+    _ = pp.defines.remove(pp.tokSlice(name));
+}
+
+/// Skip until after a newline, error if extra tokens before it.
+fn expectNewline(pp: *Preprocessor) !void {
+    var sent_err = false;
+    while (true) {
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) return;
+        if (tok.id == .whitespace or tok.id == .comment) continue;
+        if (!sent_err) {
+            sent_err = true;
+            try pp.errTok(tok, .extra_tokens_directive_end);
+        }
+    }
+}
+
+/// TODO: pragma once
+fn readIncludeExtra(pp: *Preprocessor, include_token: PreprocessorToken, which: Compilation.WhichInclude) Error!void {
+    var is_std: bool = undefined;
+    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
+        error.InvalidInclude => return,
+        else => |e| return e,
+    };
+    try pp.expectNewline();
+
+    const filename = include_str[1 .. include_str.len - 1];
+    const include_type: Compilation.IncludeType = switch (include_str[0]) {
+        '"' => .quotes,
+        '<' => .angle_brackets,
+        else => unreachable,
+    };
+    const tok: RawToken = .{ .id = include_token.id, .source = include_token.loc.id, .start = include_token.loc.byte_offset, .line = include_token.loc.line };
+    const source = (try pp.comp.findInclude(filename, tok, include_type, which)) orelse return pp.fatalNotFound(include_token, filename);
+    if (pp.include_guards.get(source.id)) |guard| {
+        if (pp.defines.contains(guard)) return;
+    }
+    const guard = pp.findIncludeGuard(source);
+    try pp.guard_stack.append(pp.gpa, guard);
+
+    try pp.tokenizers.append(pp.gpa, .{
+        .buf = source.buf,
+        .langopts = pp.comp.langopts,
+        .index = 0,
+        .source = source.id,
+    });
+}
+
+/// Read a header name delimited by quotes or angle brackets
+fn readHeaderFileName(pp: *Preprocessor, is_std: *bool) !?[]const u8 {
+    if (!pp.isBufferEmpty()) return null;
+    _ = pp.skipSpace();
+
+    var close: u8 = undefined;
+    var tokenizer = pp.tokenizers.items[pp.tokenizers.items.len - 1];
+    defer pp.tokenizers.items[pp.tokenizers.items.len - 1] = tokenizer;
+
+    if (tokenizer.buf[tokenizer.index..].len < 2) {
+        return null;
+    }
+    const start = tokenizer.index;
+    switch (tokenizer.buf[tokenizer.index..][0]) {
+        '"' => {
+            is_std.* = false;
+            close = '"';
+        },
+        '<' => {
+            is_std.* = true;
+            close = '>';
+        },
+        else => return null,
+    }
+    tokenizer.index += 1;
+    while (tokenizer.index < tokenizer.buf.len and tokenizer.buf[tokenizer.index] != close and tokenizer.buf[tokenizer.index] != '\n') : (tokenizer.index += 1) {}
+
+    if (tokenizer.index == tokenizer.buf.len or tokenizer.buf[tokenizer.index] != close) {
+        try pp.errTok(.{ .id = undefined, .loc = .{ .id = tokenizer.source, .byte_offset = tokenizer.index, .line = tokenizer.line } }, .header_str_closing);
+        try pp.errTok(.{ .id = undefined, .loc = .{ .id = tokenizer.source, .byte_offset = start, .line = tokenizer.line } }, .header_str_match);
+        return error.InvalidInclude;
+    }
+
+    tokenizer.index += 1;
+
+    const buf = tokenizer.buf[start..tokenizer.index];
+    if (buf.len == 2) {
+        try pp.errTok(.{ .id = .nl, .loc = .{ .id = tokenizer.source, .byte_offset = start, .line = tokenizer.line } }, .empty_filename);
+        return error.InvalidInclude;
+    }
+    return buf;
+}
+
+fn isBufferEmpty(pp: *const Preprocessor) bool {
+    return pp.expansion_bufs.items.len == 0;
+}
+
+/// Read a delimited header name, or a macro expanded one
+fn readHeaderName(pp: *Preprocessor, is_std: *bool) ![]const u8 {
+    if (try pp.readHeaderFileName(is_std)) |path| return path;
+
+    // If a token following #include does not start with < nor ",
+    // try to read the token as a regular token. Macro-expanded
+    // form may be a valid header file path.
+    const tok = try pp.readExpandNewline();
+    if (tok.id.isDirectiveEnd()) {
+        try pp.errTok(tok, .expected_filename);
+        return error.InvalidInclude;
+    }
+    if (tok.id == .string_literal) {
+        is_std.* = false;
+        return pp.tokSlice(tok);
+    }
+    if (tok.id != .angle_bracket_left) {
+        try pp.errStr(tok, .expected_left_angle_bracket, pp.tokSlice(tok));
+        return error.InvalidInclude;
+    }
+    const start = pp.char_buf.items.len;
+    try pp.char_buf.append(pp.gpa, '<');
+    defer pp.char_buf.items.len = start;
+    const writer = pp.char_buf.writer(pp.gpa);
+    while (true) {
+        const path_tok = try pp.readExpandNewline();
+        if (path_tok.id == .nl) {
+            try pp.errTok(path_tok, .header_str_closing);
+            try pp.errTok(tok, .header_str_match);
+            return error.InvalidInclude;
+        }
+        if (path_tok.id == .angle_bracket_right) {
+            break;
+        }
+        try pp.prettyPrintToken(writer, path_tok);
+    }
+    is_std.* = true;
+    try pp.char_buf.append(pp.gpa, '>');
+    return pp.gpa.dupe(u8, pp.char_buf.items[start..]);
+}
+
+fn readInclude(pp: *Preprocessor, include_token: PreprocessorToken) Error!void {
+    return pp.readIncludeExtra(include_token, .first);
+}
+
+fn readIncludeNext(pp: *Preprocessor, include_token: PreprocessorToken) Error!void {
+    return pp.readIncludeExtra(include_token, .next);
+}
+
+fn readErrorMessage(pp: *Preprocessor, directive_tok: PreprocessorToken, tag: Diagnostics.Tag) !void {
+    const char_top = pp.char_buf.items.len;
+    defer pp.char_buf.items.len = char_top;
+    var i: usize = 0;
+    while (true) : (i += 1) {
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) break;
+        const slice = pp.tokSlice(tok);
+        if (slice.len > 0 and tok.flags.space and i != 0) {
+            try pp.char_buf.append(pp.gpa, ' ');
+        }
+        try pp.char_buf.appendSlice(pp.gpa, slice);
+    }
+    const slice = pp.char_buf.items[char_top..];
+    const duped = try pp.comp.diagnostics.arena.allocator().dupe(u8, slice);
+    try pp.comp.addDiagnostic(.{
+        .tag = tag,
+        .loc = directive_tok.loc,
+        .extra = .{ .str = duped },
+    }, &.{});
+}
+
+fn clearGuard(pp: *Preprocessor) void {
+    pp.guard_stack.items[pp.guard_stack.items.len - 1] = null;
+}
+
+fn readDirective(pp: *Preprocessor) Error!void {
+    const directive = pp.getToken();
+    if (directive.id.isDirectiveEnd()) return;
+    if (directive.id == .pp_num) {
+        return pp.readLinemarker();
+    }
+
+    const until_else = 0;
+    const until_endif = 1;
+    const until_endif_seen_else = 2;
+
+    switch (directive.id) {
+        .keyword_define => try pp.readDefine(),
+        .keyword_elif => {
+            if (pp.if_level == 0) {
+                try pp.errTok(directive, .elif_without_if);
+                pp.if_level += 1;
+                pp.if_kind.set(pp.if_level, until_else);
+            } else if (pp.if_level == 1) {
+                pp.clearGuard();
+            }
+            switch (pp.if_kind.get(pp.if_level)) {
+                until_else => if (try pp.readConstexpr()) {
+                    pp.if_kind.set(pp.if_level, until_endif);
+                    if (pp.verbose) {
+                        pp.verboseLog(directive, "entering then branch of #elif", .{});
+                    }
+                } else {
+                    try pp.skip(.until_else);
+                    if (pp.verbose) {
+                        pp.verboseLog(directive, "entering else branch of #elif", .{});
+                    }
+                },
+                until_endif => try pp.skip(.until_endif),
+                until_endif_seen_else => {
+                    try pp.errTok(directive, .elif_after_else);
+                    pp.skipToNl();
+                },
+                else => unreachable,
+            }
+        },
+        .keyword_else => {
+            try pp.expectNewline();
+            if (pp.if_level == 0) {
+                try pp.errTok(directive, .else_without_if);
+                return;
+            } else if (pp.if_level == 1) {
+                pp.clearGuard();
+            }
+            switch (pp.if_kind.get(pp.if_level)) {
+                until_else => {
+                    pp.if_kind.set(pp.if_level, until_endif_seen_else);
+                    if (pp.verbose) {
+                        pp.verboseLog(directive, "#else branch here", .{});
+                    }
+                },
+                until_endif => try pp.skip(.until_endif_seen_else),
+                until_endif_seen_else => {
+                    try pp.errTok(directive, .else_after_else);
+                    pp.skipToNl();
+                },
+                else => unreachable,
+            }
+        },
+        .keyword_endif => {
+            try pp.expectNewline();
+            if (pp.if_level == 0) {
+                pp.clearGuard();
+                try pp.errTok(directive, .endif_without_if);
+                return;
+            } else if (pp.if_level == 1) {
+                var tokenizer = &pp.tokenizers.items[pp.tokenizers.items.len - 1];
+                const saved_tokenizer = tokenizer.*;
+                defer tokenizer.* = saved_tokenizer;
+
+                var next_tok = tokenizer.nextNoWS();
+                while (next_tok.id == .nl) : (next_tok = tokenizer.nextNoWS()) {}
+                if (next_tok.id != .eof) pp.clearGuard();
+            }
+            pp.if_level -= 1;
+        },
+        .keyword_error => try pp.readErrorMessage(directive, .error_directive),
+        .keyword_if => {
+            const sum, const overflowed = @addWithOverflow(pp.if_level, 1);
+            if (overflowed != 0)
+                return pp.fatal(directive, "too many #if nestings", .{});
+            pp.if_level = sum;
+
+            if (try pp.readConstexpr()) {
+                pp.if_kind.set(pp.if_level, until_endif);
+                if (pp.verbose) {
+                    pp.verboseLog(directive, "entering then branch of #if", .{});
+                }
+            } else {
+                pp.if_kind.set(pp.if_level, until_else);
+                try pp.skip(.until_else);
+                if (pp.verbose) {
+                    pp.verboseLog(directive, "entering else branch of #if", .{});
+                }
+            }
+        },
+        .keyword_ifdef => {
+            const sum, const overflowed = @addWithOverflow(pp.if_level, 1);
+            if (overflowed != 0)
+                return pp.fatal(directive, "too many #if nestings", .{});
+            pp.if_level = sum;
+
+            const macro_name = (try pp.expectMacroName()) orelse return;
+            try pp.expectNewline();
+            if (pp.defines.get(macro_name) != null) {
+                pp.if_kind.set(pp.if_level, until_endif);
+                if (pp.verbose) {
+                    pp.verboseLog(directive, "entering then branch of #ifdef", .{});
+                }
+            } else {
+                pp.if_kind.set(pp.if_level, until_else);
+                try pp.skip(.until_else);
+                if (pp.verbose) {
+                    pp.verboseLog(directive, "entering else branch of #ifdef", .{});
+                }
+            }
+        },
+        .keyword_ifndef => {
+            const sum, const overflowed = @addWithOverflow(pp.if_level, 1);
+            if (overflowed != 0)
+                return pp.fatal(directive, "too many #if nestings", .{});
+            pp.if_level = sum;
+
+            const macro_name = (try pp.expectMacroName()) orelse return;
+            try pp.expectNewline();
+            if (pp.defines.get(macro_name) == null) {
+                pp.if_kind.set(pp.if_level, until_endif);
+            } else {
+                pp.if_kind.set(pp.if_level, until_else);
+                try pp.skip(.until_else);
+            }
+        },
+        .keyword_elifdef => {
+            if (pp.if_level == 0) {
+                try pp.errTok(directive, .elifdef_without_if);
+                pp.if_level += 1;
+                pp.if_kind.set(pp.if_level, until_else);
+            } else if (pp.if_level == 1) {
+                pp.clearGuard();
+            }
+            switch (pp.if_kind.get(pp.if_level)) {
+                until_else => {
+                    const macro_name = try pp.expectMacroName();
+                    if (macro_name == null) {
+                        pp.if_kind.set(pp.if_level, until_else);
+                        try pp.skip(.until_else);
+                        if (pp.verbose) {
+                            pp.verboseLog(directive, "entering else branch of #elifdef", .{});
+                        }
+                    } else {
+                        try pp.expectNewline();
+                        if (pp.defines.get(macro_name.?) != null) {
+                            pp.if_kind.set(pp.if_level, until_endif);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering then branch of #elifdef", .{});
+                            }
+                        } else {
+                            pp.if_kind.set(pp.if_level, until_else);
+                            try pp.skip(.until_else);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering else branch of #elifdef", .{});
+                            }
+                        }
+                    }
+                },
+                until_endif => try pp.skip(.until_endif),
+                until_endif_seen_else => {
+                    try pp.errTok(directive, .elifdef_after_else);
+                    pp.skipToNl();
+                },
+                else => unreachable,
+            }
+        },
+        .keyword_elifndef => {
+            if (pp.if_level == 0) {
+                try pp.errTok(directive, .elifdef_without_if);
+                pp.if_level += 1;
+                pp.if_kind.set(pp.if_level, until_else);
+            } else if (pp.if_level == 1) {
+                pp.clearGuard();
+            }
+            switch (pp.if_kind.get(pp.if_level)) {
+                until_else => {
+                    const macro_name = try pp.expectMacroName();
+                    if (macro_name == null) {
+                        pp.if_kind.set(pp.if_level, until_else);
+                        try pp.skip(.until_else);
+                        if (pp.verbose) {
+                            pp.verboseLog(directive, "entering else branch of #elifndef", .{});
+                        }
+                    } else {
+                        try pp.expectNewline();
+                        if (pp.defines.get(macro_name.?) == null) {
+                            pp.if_kind.set(pp.if_level, until_endif);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering then branch of #elifndef", .{});
+                            }
+                        } else {
+                            pp.if_kind.set(pp.if_level, until_else);
+                            try pp.skip(.until_else);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering else branch of #elifndef", .{});
+                            }
+                        }
+                    }
+                },
+                until_endif => try pp.skip(.until_endif),
+                until_endif_seen_else => {
+                    try pp.errTok(directive, .elifdef_after_else);
+                    pp.skipToNl();
+                },
+                else => unreachable,
+            }
+        },
+        .keyword_include => try pp.readInclude(directive),
+        .keyword_include_next => try pp.readIncludeNext(directive),
+        .keyword_line => try pp.readLine(),
+        .keyword_pragma => try pp.readPragma(),
+        .keyword_undef => try pp.readUndef(),
+        .keyword_warning => try pp.readErrorMessage(directive, .warning_directive),
+        .keyword_embed => try pp.readEmbed(directive),
+        else => try pp.errTok(directive, .invalid_preprocessing_directive),
+    }
+}
+
+/// TODO: handle limit/prefix/suffix/etc
+fn readEmbed(pp: *Preprocessor, directive_tok: PreprocessorToken) Error!void {
+    var is_std: bool = undefined;
+    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
+        error.InvalidInclude => return,
+        else => |e| return e,
+    };
+
+    const filename = include_str[1 .. include_str.len - 1];
+    const include_type: Compilation.IncludeType = switch (include_str[0]) {
+        '"' => .quotes,
+        '<' => .angle_brackets,
+        else => unreachable,
+    };
+
+    const limit = std.math.maxInt(u32);
+    const embed_bytes = (try pp.comp.findEmbed(filename, directive_tok.loc.id, include_type, limit)) orelse
+        return pp.fatalNotFound(directive_tok, filename);
+    defer pp.comp.gpa.free(embed_bytes);
+
+    try pp.ensureUnusedTokenCapacity(2 * embed_bytes.len - 1); // N bytes and N-1 commas
+
+    // TODO: We currently only support systems with CHAR_BIT == 8
+    // If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes
+    // and correctly account for the target's endianness
+    const writer = pp.comp.generated_buf.writer(pp.gpa);
+
+    {
+        const byte = embed_bytes[0];
+        const start = pp.comp.generated_buf.items.len;
+        try writer.print("{d}", .{byte});
+        var generated = try pp.makeGeneratedToken(start, .embed_byte, directive_tok);
+        generated.flags.is_bol = true;
+        pp.addTokenAssumeCapacity(generated);
+    }
+
+    for (embed_bytes[1..]) |byte| {
+        const start = pp.comp.generated_buf.items.len;
+        try writer.print(",{d}", .{byte});
+        pp.addTokenAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
+        pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, directive_tok));
+    }
+    try pp.comp.generated_buf.append(pp.gpa, '\n');
+}
+
+fn readToken(pp: *Preprocessor) Error!PreprocessorToken {
+    while (true) {
+        const tok = try pp.readExpand();
+        if (tok.flags.is_bol and tok.id == .hash and tok.hideset == null) {
+            try pp.readDirective();
+            continue;
+        }
+        return tok;
+    }
+}
+
+pub fn preprocess(pp: *Preprocessor, source: Source) !PreprocessorToken {
+    const guard = pp.findIncludeGuard(source);
+    try pp.guard_stack.append(pp.gpa, guard);
+
+    try pp.tokenizers.append(pp.gpa, .{
+        .buf = source.buf,
+        .langopts = pp.comp.langopts,
+        .index = 0,
+        .source = source.id,
+    });
+    while (true) {
+        const tok = try pp.readToken();
+        if (tok.id == .eof) {
+            const tokenizer = pp.tokenizers.pop();
+            const guard_name = pp.guard_stack.pop();
+            if (guard_name) |name| {
+                try pp.include_guards.put(pp.gpa, tokenizer.source, name);
+            }
+            if (pp.tokenizers.items.len == 0) {
+                return tok;
+            }
+        } else {
+            try pp.addToken(tok);
+        }
+    }
+}
+
+// After how many empty lines are needed to replace them with linemarkers.
+const collapse_newlines = 8;
+
+/// Pretty print tokens and try to preserve whitespace.
+pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
+    var i: usize = 0;
+    while (i < pp.tokens.len) : (i += 1) {
+        const tok = pp.tokens.get(i);
+        if (tok.id == .eof) break;
+        try pp.prettyPrintToken(w, tok);
+    }
+    try w.writeByte('\n');
+}
+
+fn prettyPrintToken(pp: *Preprocessor, w: anytype, tok: PreprocessorToken) !void {
+    if (tok.flags.is_bol) {
+        try w.writeByte('\n');
+    }
+    if (tok.flags.space) {
+        try w.writeByte(' ');
+    }
+    if (tok.id.lexeme()) |some| {
+        try w.writeAll(some);
+    } else {
+        try w.writeAll(pp.tokSlice(tok));
+    }
+}
+
+pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location {
+    const S = struct {
+        fn order_token_index(context: void, lhs: Tree.TokenIndex, rhs: Tree.TokenIndex) std.math.Order {
+            _ = context;
+            return std.math.order(lhs, rhs);
+        }
+    };
+
+    const indices = pp.expansion_entries.items(.idx);
+    const idx = std.sort.binarySearch(Tree.TokenIndex, tok, indices, {}, S.order_token_index) orelse return &.{};
+    const locs = pp.expansion_entries.items(.locs)[idx];
+    var i: usize = 0;
+    while (locs[i].id != .unused) : (i += 1) {}
+    return locs[0..i];
+}
+
+pub fn addToken(pp: *Preprocessor, tok: PreprocessorToken) !void {
+    if (tok.expansion_locs) |expansion_locs| {
+        try pp.expansion_entries.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
+    }
+    try pp.tokens.append(pp.gpa, tok);
+}
+
+pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: PreprocessorToken) void {
+    if (tok.expansion_locs) |expansion_locs| {
+        pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
+    }
+    pp.tokens.appendAssumeCapacity(tok);
+}
+
+pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
+    try pp.tokens.ensureTotalCapacity(pp.gpa, capacity);
+    try pp.expansion_entries.ensureTotalCapacity(pp.gpa, capacity);
+}
+
+pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
+    try pp.tokens.ensureUnusedCapacity(pp.gpa, capacity);
+    try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, capacity);
+}
+
+fn skip(
+    pp: *Preprocessor,
+    cont: enum { until_else, until_endif, until_endif_seen_else },
+) Error!void {
+    var ifs_seen: u32 = 0;
+    var line_start = true;
+    var tokenizer = &pp.tokenizers.items[pp.tokenizers.items.len - 1];
+
+    while (tokenizer.index < tokenizer.buf.len) {
+        if (line_start) {
+            const saved_tokenizer = tokenizer.*;
+            const hash = tokenizer.nextNoWS();
+            if (hash.id == .nl) continue;
+            line_start = false;
+            if (hash.id != .hash) continue;
+            const directive = tokenizer.nextNoWS();
+            switch (directive.id) {
+                .keyword_else => {
+                    if (ifs_seen != 0) continue;
+                    if (cont == .until_endif_seen_else) {
+                        // try pp.err(directive, .else_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_elif => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    if (cont == .until_endif_seen_else) {
+                        // try pp.err(directive, .elif_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_elifdef => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    if (cont == .until_endif_seen_else) {
+                        // try pp.err(directive, .elifdef_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_elifndef => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    if (cont == .until_endif_seen_else) {
+                        // try pp.err(directive, .elifndef_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_endif => {
+                    if (ifs_seen == 0) {
+                        tokenizer.* = saved_tokenizer;
+                        return;
+                    }
+                    ifs_seen -= 1;
+                },
+                .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1,
+                else => {},
+            }
+        } else if (tokenizer.buf[tokenizer.index] == '\n') {
+            line_start = true;
+            tokenizer.index += 1;
+            tokenizer.line += 1;
+            tokenizer.bol = true;
+            if (pp.preserve_whitespace) {
+                try pp.addToken(.{ .id = .nl, .loc = .{
+                    .id = tokenizer.source,
+                    .line = tokenizer.line,
+                } });
+            }
+        } else {
+            line_start = false;
+            tokenizer.index += 1;
+        }
+    } else {
+        return pp.errTok(.{ .id = .eof, .loc = .{ .id = tokenizer.source, .byte_offset = tokenizer.index, .line = tokenizer.line } }, .unterminated_conditional_directive);
+    }
+}
+
+fn verboseLog(pp: *Preprocessor, tok: PreprocessorToken, comptime fmt: []const u8, args: anytype) void {
+    const source = pp.comp.getSource(tok.loc.id);
+    const line_col = source.lineCol(tok.loc);
+
+    const stderr = std.io.getStdErr().writer();
+    var buf_writer = std.io.bufferedWriter(stderr);
+    const writer = buf_writer.writer();
+    defer buf_writer.flush() catch {};
+    writer.print("{s}:{d}:{d}: ", .{ source.path, line_col.line_no, line_col.col }) catch return;
+    writer.print(fmt, args) catch return;
+    writer.writeByte('\n') catch return;
+    writer.writeAll(line_col.line) catch return;
+    writer.writeByte('\n') catch return;
+}
+
+fn fatal(pp: *Preprocessor, tok: PreprocessorToken, comptime fmt: []const u8, args: anytype) Compilation.Error {
+    try pp.comp.diagnostics.list.append(pp.gpa, .{
+        .tag = .cli_error,
+        .kind = .@"fatal error",
+        .extra = .{ .str = try std.fmt.allocPrint(pp.comp.diagnostics.arena.allocator(), fmt, args) },
+        .loc = tok.loc,
+    });
+    return error.FatalError;
+}
+
+fn fatalNotFound(pp: *Preprocessor, tok: PreprocessorToken, filename: []const u8) Compilation.Error {
+    const old = pp.comp.diagnostics.fatal_errors;
+    pp.comp.diagnostics.fatal_errors = true;
+    defer pp.comp.diagnostics.fatal_errors = old;
+
+    try pp.comp.diagnostics.addExtra(pp.comp.langopts, .{ .tag = .cli_error, .loc = tok.loc, .extra = .{
+        .str = try std.fmt.allocPrint(pp.comp.diagnostics.arena.allocator(), "'{s}' not found", .{filename}),
+    } }, tok.expansionSlice(), false);
+    unreachable; // addExtra should've returned FatalError
+}
+
+/// Consume next token, error if it is not an identifier.
+fn expectMacroName(pp: *Preprocessor) Error!?[]const u8 {
+    const macro_name = pp.getToken();
+    if (!macro_name.id.isMacroIdentifier()) {
+        try pp.errTok(macro_name, .macro_name_missing);
+        pp.skipToNl();
+        return null;
+    }
+    return pp.tokSlice(macro_name);
+}
+
+/// Return the name of the #ifndef guard macro that starts a source, if any.
+/// If a source starts with `#ifndef IDENTIFIER`, return `IDENTIFIER`
+/// This function does not validate that the entire source is guarded by the
+/// initial ifndef, if any
+fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
+    var tokenizer = Tokenizer{
+        .buf = source.buf,
+        .langopts = pp.comp.langopts,
+        .source = source.id,
+    };
+    var hash = tokenizer.nextNoWS();
+    while (hash.id == .nl) hash = tokenizer.nextNoWS();
+    if (hash.id != .hash) return null;
+    const ifndef = tokenizer.nextNoWS();
+    if (ifndef.id != .keyword_ifndef) return null;
+    const guard = tokenizer.nextNoWS();
+    if (guard.id != .identifier) return null;
+    return pp.tokSlice(.{ .id = guard.id, .loc = .{ .id = guard.source, .byte_offset = guard.start, .line = guard.line } });
+}
diff --git a/src/aro/Tokenizer.zig b/src/aro/Tokenizer.zig
index 8ee38126..33f76429 100644
--- a/src/aro/Tokenizer.zig
+++ b/src/aro/Tokenizer.zig
@@ -10,6 +10,7 @@ pub const Token = struct {
     start: u32 = 0,
     end: u32 = 0,
     line: u32 = 0,
+    bol: bool = false,
 
     pub const Id = enum(u8) {
         invalid,
@@ -323,6 +324,10 @@ pub const Token = struct {
         /// A comment token if asked to preserve comments.
         comment,
 
+        pub fn isDirectiveEnd(id: Id) bool {
+            return id == .nl or id == .eof;
+        }
+
         /// Return true if token is identifier or keyword.
         pub fn isMacroIdentifier(id: Id) bool {
             switch (id) {
@@ -1030,6 +1035,7 @@ index: u32 = 0,
 source: Source.Id,
 langopts: LangOpts,
 line: u32 = 1,
+bol: bool = true,
 
 pub fn next(self: *Tokenizer) Token {
     var state: enum {
@@ -1077,6 +1083,8 @@ pub fn next(self: *Tokenizer) Token {
 
     var start = self.index;
     var id: Token.Id = .eof;
+    const bol = self.bol;
+    self.bol = false;
 
     while (self.index < self.buf.len) : (self.index += 1) {
         const c = self.buf[self.index];
@@ -1086,6 +1094,7 @@ pub fn next(self: *Tokenizer) Token {
                     id = .nl;
                     self.index += 1;
                     self.line += 1;
+                    self.bol = true;
                     break;
                 },
                 '"' => {
@@ -1265,6 +1274,7 @@ pub fn next(self: *Tokenizer) Token {
                 },
                 '\n' => {
                     id = .unterminated_string_literal;
+                    self.bol = true;
                     break;
                 },
                 '\r' => unreachable,
@@ -1281,6 +1291,7 @@ pub fn next(self: *Tokenizer) Token {
                 },
                 '\n' => {
                     id = .unterminated_char_literal;
+                    self.bol = true;
                     break;
                 },
                 else => {
@@ -1297,6 +1308,7 @@ pub fn next(self: *Tokenizer) Token {
                 },
                 '\n' => {
                     id = .unterminated_char_literal;
+                    self.bol = true;
                     break;
                 },
                 else => {},
@@ -1304,6 +1316,7 @@ pub fn next(self: *Tokenizer) Token {
             .char_escape_sequence => switch (c) {
                 '\r', '\n' => {
                     id = .unterminated_char_literal;
+                    self.bol = true;
                     break;
                 },
                 else => state = .char_literal,
@@ -1311,6 +1324,7 @@ pub fn next(self: *Tokenizer) Token {
             .string_escape_sequence => switch (c) {
                 '\r', '\n' => {
                     id = .unterminated_string_literal;
+                    self.bol = true;
                     break;
                 },
                 else => state = .string_literal,
@@ -1624,6 +1638,7 @@ pub fn next(self: *Tokenizer) Token {
             },
             .line_comment => switch (c) {
                 '\n' => {
+                    self.bol = true;
                     if (self.langopts.preserve_comments) {
                         id = .comment;
                         break;
@@ -1656,6 +1671,7 @@ pub fn next(self: *Tokenizer) Token {
             },
             .multi_line_comment_done => switch (c) {
                 '\n' => {
+                    self.bol = true;
                     start = self.index;
                     id = .nl;
                     self.index += 1;
@@ -1782,6 +1798,7 @@ pub fn next(self: *Tokenizer) Token {
         .end = self.index,
         .line = self.line,
         .source = self.source,
+        .bol = bol,
     };
 }
 
diff --git a/src/aro/Treap.zig b/src/aro/Treap.zig
new file mode 100644
index 00000000..74d3005d
--- /dev/null
+++ b/src/aro/Treap.zig
@@ -0,0 +1,165 @@
+/// Persistent treap data structure. Nodes are immutable and set operations do not invalidate
+/// existing nodes.
+/// Adapted from https://arxiv.org/pdf/1301.3388
+const std = @import("std");
+
+const Treap = @This();
+const Key = []const u8;
+
+pub const Node = ?*const Item;
+
+const Item = struct {
+    key: Key,
+    left: Node,
+    right: Node,
+
+    fn priority(node: *const Item) u64 {
+        return std.hash.Wyhash.hash(0, node.key);
+    }
+
+    const HashContext = struct {
+        pub fn hash(self: @This(), s: *const Item) u64 {
+            _ = self;
+            return std.hash.Wyhash.hash(0, std.mem.asBytes(s));
+        }
+        pub fn eql(self: @This(), a: *const Item, b: *const Item) bool {
+            _ = self;
+            return a.left == b.left and a.right == b.right and std.mem.eql(u8, a.key, b.key);
+        }
+    };
+};
+
+allocator: std.mem.Allocator,
+node_arena: std.heap.ArenaAllocator,
+/// nodes are hash-consed so structural equality can be determined by comparing pointers
+nodes: std.HashMapUnmanaged(*const Item, void, Item.HashContext, std.hash_map.default_max_load_percentage) = .{},
+
+pub fn init(allocator: std.mem.Allocator) Treap {
+    return .{ .allocator = allocator, .node_arena = std.heap.ArenaAllocator.init(allocator) };
+}
+
+pub fn deinit(self: *Treap) void {
+    self.nodes.deinit(self.allocator);
+    self.node_arena.deinit();
+}
+
+fn makeNode(self: *Treap, key: Key, left: Node, right: Node) !Node {
+    const node: Item = .{ .key = key, .left = left, .right = right };
+    const gop = try self.nodes.getOrPut(self.allocator, &node);
+    if (gop.found_existing) return gop.key_ptr.*;
+
+    const new_node = try self.node_arena.allocator().create(Item);
+    new_node.* = .{
+        .key = key,
+        .left = left,
+        .right = right,
+    };
+    gop.key_ptr.* = new_node;
+    return new_node;
+}
+
+fn join(self: *Treap, t1_arg: Node, t2_arg: Node) !Node {
+    const t1 = t1_arg orelse return t2_arg;
+    const t2 = t2_arg orelse return t1_arg;
+    if (t1.priority() < t2.priority()) {
+        return self.makeNode(t2.key, try self.join(t1, t2.left), t2.right);
+    } else {
+        return self.makeNode(t1.key, t1.left, try self.join(t1.right, t2));
+    }
+}
+
+fn split(self: *Treap, t_arg: Node, key: Key) !struct { Node, Node } {
+    const t = t_arg orelse return .{ null, null };
+    switch (std.mem.order(u8, key, t.key)) {
+        .lt => {
+            const l1, const l2 = try self.split(t.left, key);
+            return .{ l1, try self.makeNode(t.key, l2, t.right) };
+        },
+        .eq, .gt => {
+            const r1, const r2 = try self.split(t.right, key);
+            return .{ try self.makeNode(t.key, t.left, r1), r2 };
+        },
+    }
+}
+
+fn add(self: *Treap, t1_arg: Node, t2_arg: Node) !Node {
+    const t1 = t1_arg orelse return t2_arg;
+    const t2 = t2_arg orelse return t1_arg;
+    std.debug.assert(!std.mem.eql(u8, t1.key, t2.key));
+    if (t1.priority() < t2.priority()) {
+        const l1, const r1 = try self.split(t1, t2.key);
+        return self.makeNode(t2.key, try self.add(l1, t2.left), try self.add(r1, t2.right));
+    } else {
+        const l2, const r2 = try self.split(t2, t1.key);
+        return self.makeNode(t1.key, try self.add(t1.left, l2), try self.add(t1.right, r2));
+    }
+}
+
+pub fn addNodeTo(self: *Treap, t1: Node, key: Key) !Node {
+    std.debug.assert(!self.contains(t1, key));
+    const node = try self.makeNode(key, null, null);
+    return self.add(t1, node);
+}
+
+pub fn @"union"(self: *Treap, t1_arg: Node, t2_arg: Node) !Node {
+    if (t1_arg == t2_arg) return t1_arg;
+    const t1 = t1_arg orelse return t2_arg;
+    const t2 = t2_arg orelse return t1_arg;
+
+    if (std.mem.eql(u8, t1.key, t2.key)) {
+        return self.makeNode(t1.key, try self.@"union"(t1.left, t2.left), try self.@"union"(t1.right, t2.right));
+    } else if (t1.priority() < t2.priority()) {
+        const l1, const r1 = try self.split(t1, t2.key);
+        return self.makeNode(t2.key, try self.@"union"(l1, t2.left), try self.@"union"(r1, t2.right));
+    } else {
+        const l2, const r2 = try self.split(t2, t1.key);
+        return self.makeNode(t1.key, try self.@"union"(t1.left, l2), try self.@"union"(t1.right, r2));
+    }
+}
+
+pub fn intersection(self: *Treap, t1_arg: Node, t2_arg: Node) !Node {
+    if (t1_arg == t2_arg) return t1_arg;
+    const t1 = t1_arg orelse return null;
+    const t2 = t2_arg orelse return null;
+
+    if (std.mem.eql(u8, t1.key, t2.key)) {
+        return self.makeNode(t1.key, try self.intersection(t1.left, t2.left), try self.intersection(t1.right, t2.right));
+    } else if (t1.priority() < t2.priority()) {
+        const l1, const r1 = try self.split(t1, t2.key);
+        return self.join(try self.intersection(l1, t2.left), try self.intersection(r1, t2.right));
+    } else {
+        const l2, const r2 = try self.split(t2, t1.key);
+        return self.join(try self.intersection(t1.left, l2), try self.intersection(t1.right, r2));
+    }
+}
+
+pub fn contains(self: *Treap, t_arg: Node, key: Key) bool {
+    const t = t_arg orelse return false;
+    return switch (std.mem.order(u8, key, t.key)) {
+        .eq => true,
+        .lt => self.contains(t.left, key),
+        .gt => self.contains(t.right, key),
+    };
+}
+
+test add {
+    var treap = Treap.init(std.testing.allocator);
+    defer treap.deinit();
+
+    const tree1 = try treap.addNodeTo(null, "1");
+    const tree2 = try treap.addNodeTo(tree1, "2");
+    const tree3 = try treap.addNodeTo(tree2, "3");
+    const tree4 = try treap.addNodeTo(tree3, "4");
+
+    try std.testing.expect(treap.contains(tree1, "1"));
+    try std.testing.expect(!treap.contains(tree1, "2"));
+    try std.testing.expect(treap.contains(tree2, "1"));
+    try std.testing.expect(treap.contains(tree2, "2"));
+    try std.testing.expect(!treap.contains(tree2, "3"));
+    try std.testing.expect(treap.contains(tree3, "1"));
+    try std.testing.expect(treap.contains(tree3, "2"));
+    try std.testing.expect(treap.contains(tree3, "3"));
+    try std.testing.expect(!treap.contains(tree3, "4"));
+
+    try std.testing.expect(treap.contains(tree4, "4"));
+}

From d96db70e6d32747d78097d1ef8aaf8a74215bc88 Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Wed, 17 Jul 2024 13:32:15 -0700
Subject: [PATCH 03/10] Preprocessor: connect new preprocessor to parser

---
 src/aro/Driver.zig          |   45 -
 src/aro/NewPreprocessor.zig | 2097 ---------------
 src/aro/Parser.zig          |   33 +
 src/aro/Pragma.zig          |    4 +-
 src/aro/Preprocessor.zig    | 4756 ++++++++++++-----------------------
 src/aro/Tree.zig            |   46 +-
 src/aro/pragmas/gcc.zig     |   10 +-
 test/runner.zig             |    6 +-
 8 files changed, 1693 insertions(+), 5304 deletions(-)
 delete mode 100644 src/aro/NewPreprocessor.zig

diff --git a/src/aro/Driver.zig b/src/aro/Driver.zig
index 2d383bf0..db0b1155 100644
--- a/src/aro/Driver.zig
+++ b/src/aro/Driver.zig
@@ -9,7 +9,6 @@ const Compilation = @import("Compilation.zig");
 const Diagnostics = @import("Diagnostics.zig");
 const LangOpts = @import("LangOpts.zig");
 const Preprocessor = @import("Preprocessor.zig");
-const NewPreprocessor = @import("NewPreprocessor.zig");
 const Source = @import("Source.zig");
 const Toolchain = @import("Toolchain.zig");
 const target_util = @import("target.zig");
@@ -37,7 +36,6 @@ line_commands: bool = true,
 /// If true, use `#line <num>` instead of `# <num>` for line directives
 use_line_directives: bool = false,
 only_preprocess: bool = false,
-new_preprocessor: bool = false,
 only_syntax: bool = false,
 only_compile: bool = false,
 only_preprocess_and_compile: bool = false,
@@ -238,8 +236,6 @@ pub fn parseArgs(
                 d.only_compile = true;
             } else if (mem.eql(u8, arg, "-E")) {
                 d.only_preprocess = true;
-            } else if (mem.eql(u8, arg, "-fnew-preprocessor")) {
-                d.new_preprocessor = true;
             } else if (mem.eql(u8, arg, "-P") or mem.eql(u8, arg, "--no-line-commands")) {
                 d.line_commands = false;
             } else if (mem.eql(u8, arg, "-fuse-line-directives")) {
@@ -634,47 +630,6 @@ fn processSource(
     comptime fast_exit: bool,
 ) !void {
     d.comp.generated_buf.items.len = 0;
-    if (d.new_preprocessor) {
-        var pp = try NewPreprocessor.initDefault(d.comp);
-        defer pp.deinit();
-        if (d.comp.langopts.ms_extensions) {
-            d.comp.ms_cwd_source_id = source.id;
-        }
-
-        if (d.verbose_pp) pp.verbose = true;
-        if (d.only_preprocess) {
-            pp.preserve_whitespace = true;
-            if (d.line_commands) {
-                pp.linemarkers = if (d.use_line_directives) .line_directives else .numeric_directives;
-            }
-        }
-
-        try pp.preprocessSources(&.{ source, builtin, user_macros });
-
-        d.renderErrors();
-
-        if (d.comp.diagnostics.errors != 0) {
-            if (fast_exit) std.process.exit(1); // Not linking, no need for cleanup.
-            return;
-        }
-
-        const file = if (d.output_name) |some|
-            std.fs.cwd().createFile(some, .{}) catch |er|
-                return d.fatal("unable to create output file '{s}': {s}", .{ some, errorDescription(er) })
-        else
-            std.io.getStdOut();
-        defer if (d.output_name != null) file.close();
-
-        var buf_w = std.io.bufferedWriter(file.writer());
-        pp.prettyPrintTokens(buf_w.writer()) catch |er|
-            return d.fatal("unable to write result: {s}", .{errorDescription(er)});
-
-        buf_w.flush() catch |er|
-            return d.fatal("unable to write result: {s}", .{errorDescription(er)});
-
-        std.process.exit(0); // Not linking, no need for cleanup.
-        return;
-    }
     var pp = try Preprocessor.initDefault(d.comp);
     defer pp.deinit();
 
diff --git a/src/aro/NewPreprocessor.zig b/src/aro/NewPreprocessor.zig
deleted file mode 100644
index 77442d26..00000000
--- a/src/aro/NewPreprocessor.zig
+++ /dev/null
@@ -1,2097 +0,0 @@
-const std = @import("std");
-const mem = std.mem;
-const Allocator = mem.Allocator;
-const assert = std.debug.assert;
-const Compilation = @import("Compilation.zig");
-const Error = Compilation.Error;
-const Source = @import("Source.zig");
-const Tokenizer = @import("Tokenizer.zig");
-const RawToken = Tokenizer.Token;
-const Parser = @import("Parser.zig");
-const Diagnostics = @import("Diagnostics.zig");
-const Tree = @import("Tree.zig");
-const Token = Tree.Token;
-const TokenWithExpansionLocs = Tree.TokenWithExpansionLocs;
-const Attribute = @import("Attribute.zig");
-const features = @import("features.zig");
-const OldPreprocessor = @import("Preprocessor.zig");
-const Treap = @import("treap.zig");
-
-const ParamMap = std.StringHashMapUnmanaged(PreprocessorToken);
-const DefineMap = std.StringHashMapUnmanaged(Macro);
-
-const TokenList = std.ArrayListUnmanaged(PreprocessorToken);
-const max_include_depth = 200;
-
-/// Errors that can be returned when expanding a macro.
-/// error.UnknownPragma can occur within Preprocessor.pragma() but
-/// it is handled there and doesn't escape that function
-const MacroError = Error || error{StopPreprocessing};
-
-const PreprocessingError = Error || error{PreprocessingFailed};
-
-const SpecialMacroFn = fn (*Preprocessor, PreprocessorToken) Error!void;
-
-fn Range(comptime T: type) type {
-    return struct {
-        const Self = @This();
-        const Item = T;
-
-        start: u32,
-        end: u32,
-        const empty: Self = .{ .start = 0, .end = 0 };
-
-        fn len(self: Self) u32 {
-            return self.end - self.start;
-        }
-
-        fn slice(self: Self, items: []const Item) []const Item {
-            return items[self.start..self.end];
-        }
-    };
-}
-
-/// Each macro argument is a list of tokens (represented as a range of Preprocessor.macro_arg_tokens)
-const MacroArg = Range(PreprocessorToken);
-
-/// List of MacroArg's for a macro invocation (represented as a range of Preprocessor.macro_args)
-const MacroArgList = Range(MacroArg);
-
-const PreprocessorToken = struct {
-    flags: packed struct(u8) {
-        is_bol: bool = false,
-        space: bool = false,
-        _: u6 = undefined,
-    } = .{},
-    id: Tokenizer.Token.Id,
-    hideset: Treap.Node = null,
-    loc: Source.Location,
-    expansion_locs: ?[*]Source.Location = null,
-
-    fn argPosition(self: PreprocessorToken) u32 {
-        std.debug.assert(self.id == .macro_param);
-        return self.loc.byte_offset;
-    }
-
-    fn isVarArg(self: PreprocessorToken) bool {
-        std.debug.assert(self.id == .macro_param);
-        return self.loc.line != 0;
-    }
-
-    pub fn expansionSlice(tok: PreprocessorToken) []const Source.Location {
-        const locs = tok.expansion_locs orelse return &[0]Source.Location{};
-        var i: usize = 0;
-        while (locs[i].id != .unused) : (i += 1) {}
-        return locs[0..i];
-    }
-
-    pub fn addExpansionLocation(tok: *PreprocessorToken, gpa: std.mem.Allocator, new: []const Source.Location) !void {
-        if (new.len == 0 or tok.id == .whitespace or tok.id == .macro_ws or tok.id == .placemarker) return;
-        var list = std.ArrayList(Source.Location).init(gpa);
-        defer {
-            @memset(list.items.ptr[list.items.len..list.capacity], .{});
-            // Add a sentinel to indicate the end of the list since
-            // the ArrayList's capacity isn't guaranteed to be exactly
-            // what we ask for.
-            if (list.capacity > 0) {
-                list.items.ptr[list.capacity - 1].byte_offset = 1;
-            }
-            tok.expansion_locs = list.items.ptr;
-        }
-
-        if (tok.expansion_locs) |locs| {
-            var i: usize = 0;
-            while (locs[i].id != .unused) : (i += 1) {}
-            list.items = locs[0..i];
-            while (locs[i].byte_offset != 1) : (i += 1) {}
-            list.capacity = i + 1;
-        }
-
-        const min_len = @max(list.items.len + new.len + 1, 4);
-        const wanted_len = std.math.ceilPowerOfTwo(usize, min_len) catch
-            return error.OutOfMemory;
-        try list.ensureTotalCapacity(wanted_len);
-
-        for (new) |new_loc| {
-            if (new_loc.id == .generated) continue;
-            list.appendAssumeCapacity(new_loc);
-        }
-    }
-
-    pub fn free(expansion_locs: ?[*]Source.Location, gpa: std.mem.Allocator) void {
-        const locs = expansion_locs orelse return;
-        var i: usize = 0;
-        while (locs[i].id != .unused) : (i += 1) {}
-        while (locs[i].byte_offset != 1) : (i += 1) {}
-        gpa.free(locs[0 .. i + 1]);
-    }
-
-    pub fn dupe(tok: PreprocessorToken, gpa: std.mem.Allocator) !PreprocessorToken {
-        var copy = tok;
-        copy.expansion_locs = null;
-        try copy.addExpansionLocation(gpa, tok.expansionSlice());
-        return copy;
-    }
-
-    pub fn checkMsEof(tok: PreprocessorToken, source: Source, comp: *Compilation) !void {
-        std.debug.assert(tok.id == .eof);
-        if (source.buf.len > tok.loc.byte_offset and source.buf[tok.loc.byte_offset] == 0x1A) {
-            try comp.addDiagnostic(.{
-                .tag = .ctrl_z_eof,
-                .loc = .{
-                    .id = source.id,
-                    .byte_offset = tok.loc.byte_offset,
-                    .line = tok.loc.line,
-                },
-            }, &.{});
-        }
-    }
-
-    const one: PreprocessorToken = .{ .id = .one, .loc = .{} };
-    const zero: PreprocessorToken = .{ .id = .zero, .loc = .{} };
-};
-
-const Macro = struct {
-    /// Tokens constituting the macro body
-    tokens: []const PreprocessorToken,
-
-    /// Number of arguments for function-like macros
-    nargs: usize,
-
-    /// If the function type macro has variable number of arguments
-    var_args: bool,
-
-    /// Location of macro in the source
-    loc: Source.Location,
-
-    kind: Kind,
-
-    const Kind = union(enum) {
-        object,
-        func,
-        special: *const SpecialMacroFn,
-    };
-
-    fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool {
-        if ((a.kind == .object and b.kind != .object) or (a.kind == .func and b.kind != .func)) return false;
-        if (!std.meta.eql(a.kind, b.kind)) return false;
-        if (a.tokens.len != b.tokens.len) return false;
-        for (a.tokens, b.tokens) |a_tok, b_tok| if (!tokEql(pp, a_tok, b_tok)) return false;
-
-        if (a.kind == .func) {
-            if (a.var_args != b.var_args) return false;
-        }
-
-        return true;
-    }
-
-    fn tokEql(pp: *Preprocessor, a: PreprocessorToken, b: PreprocessorToken) bool {
-        return mem.eql(u8, pp.tokSlice(a), pp.tokSlice(b));
-    }
-};
-
-const Preprocessor = @This();
-
-const ExpansionEntry = struct {
-    idx: Tree.TokenIndex,
-    locs: [*]Source.Location,
-};
-
-const TokenState = struct {
-    tokens_len: usize,
-    expansion_entries_len: usize,
-};
-
-comp: *Compilation,
-gpa: mem.Allocator,
-arena: std.heap.ArenaAllocator,
-
-tokens: std.MultiArrayList(PreprocessorToken) = .{},
-/// Do not directly mutate this; must be kept in sync with `tokens`
-expansion_entries: std.MultiArrayList(ExpansionEntry) = .{},
-
-/// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any
-include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{},
-
-char_buf: std.ArrayListUnmanaged(u8) = .{},
-
-/// Dump current state to stderr.
-verbose: bool = false,
-preserve_whitespace: bool = false,
-
-/// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
-linemarkers: Linemarkers = .none,
-
-tokenizers: std.ArrayListUnmanaged(Tokenizer) = .{},
-
-expansion_bufs: std.ArrayListUnmanaged(TokenList) = .{},
-
-defines: DefineMap = .{},
-
-generated_line: u32 = 1,
-
-counter: u32 = 0,
-
-if_level: u8 = 0,
-
-if_kind: std.PackedIntArray(u2, 256) = blk: {
-    @setEvalBranchQuota(2000);
-    break :blk std.PackedIntArray(u2, 256).initAllTo(0);
-},
-
-guard_stack: std.ArrayListUnmanaged(?[]const u8) = .{},
-
-macro_arg_tokens: std.ArrayListUnmanaged(MacroArg.Item) = .{},
-macro_args: std.ArrayListUnmanaged(MacroArgList.Item) = .{},
-
-safe_strings: std.StringHashMapUnmanaged(void) = .{},
-
-treap: Treap,
-
-pub const parse = Parser.parse;
-
-pub const Linemarkers = enum {
-    /// No linemarker tokens. Required setting if parser will run
-    none,
-    /// #line <num> "filename"
-    line_directives,
-    /// # <num> "filename" flags
-    numeric_directives,
-};
-
-pub fn init(comp: *Compilation) Preprocessor {
-    const pp = Preprocessor{
-        .comp = comp,
-        .gpa = comp.gpa,
-        .arena = std.heap.ArenaAllocator.init(comp.gpa),
-        .treap = Treap.init(comp.gpa),
-    };
-    comp.pragmaEvent(.before_preprocess);
-    return pp;
-}
-
-fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, func: *const SpecialMacroFn) !void {
-    try pp.defines.putNoClobber(pp.gpa, name, .{
-        .tokens = &.{},
-        .var_args = false,
-        .loc = .{ .id = .generated },
-        .kind = .{ .special = func },
-        .nargs = 0,
-    });
-}
-
-fn handleLineMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
-    const start = pp.comp.generated_buf.items.len;
-    const source = pp.comp.getSource(tok.loc.id);
-    const w = pp.comp.generated_buf.writer(pp.gpa);
-    try w.print("{d}\n", .{source.physicalLine(tok.loc)});
-    const pasted_tok = try pp.makeGeneratedToken(start, .pp_num, tok);
-    return pp.ungetToken(pasted_tok);
-}
-
-fn handleFileMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
-    const start = pp.comp.generated_buf.items.len;
-    const source = pp.comp.getSource(tok.loc.id);
-    const w = pp.comp.generated_buf.writer(pp.gpa);
-    try w.print("\"{s}\"\n", .{source.path});
-    const pasted_tok = try pp.makeGeneratedToken(start, .string_literal, tok);
-    return pp.ungetToken(pasted_tok);
-}
-
-fn handleCounterMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
-    defer pp.counter += 1;
-    const start = pp.comp.generated_buf.items.len;
-    const w = pp.comp.generated_buf.writer(pp.gpa);
-    try w.print("{d}\n", .{pp.counter});
-    const pasted_tok = try pp.makeGeneratedToken(start, .pp_num, tok);
-    return pp.ungetToken(pasted_tok);
-}
-
-fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: PreprocessorToken) !PreprocessorToken {
-    const pasted_token = PreprocessorToken{ .id = id, .flags = source.flags, .loc = .{
-        .id = .generated,
-        .byte_offset = @intCast(start),
-        .line = pp.generated_line,
-    } };
-    pp.generated_line += 1;
-    // try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc});
-    // try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice());
-    return pasted_token;
-}
-
-fn errStr(pp: *Preprocessor, tok: PreprocessorToken, tag: Diagnostics.Tag, str: []const u8) !void {
-    try pp.comp.addDiagnostic(.{
-        .tag = tag,
-        .loc = tok.loc,
-        .extra = .{ .str = str },
-    }, &.{}); // todo expansion slice
-}
-
-fn errTok(pp: *Preprocessor, tok: PreprocessorToken, tag: Diagnostics.Tag) !void {
-    try pp.comp.addDiagnostic(.{
-        .tag = tag,
-        .loc = tok.loc,
-        .extra = .{ .none = {} },
-    }, &.{}); // todo expansion slice
-}
-
-fn expectClosing(pp: *Preprocessor, opening: PreprocessorToken, id: Token.Id) !void {
-    // todo: fix expect
-    const item = try pp.expect(id, .closing_paren);
-    if (item.id != id) {
-        try pp.errTok(opening, .to_match_paren);
-    }
-}
-
-fn tokFromBool(b: bool) PreprocessorToken {
-    return if (b) PreprocessorToken.one else PreprocessorToken.zero;
-}
-
-fn handleHasAttribute(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
-    _ = tok;
-    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
-    const attr_name = try pp.readToken();
-    try pp.expectClosing(l_paren, .r_paren);
-
-    const has_attr = Attribute.fromString(.gnu, null, pp.tokSlice(attr_name)) != null;
-    return pp.ungetToken(tokFromBool(has_attr));
-}
-
-fn handleHasCAttribute(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
-    _ = macro_tok;
-    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
-    var r: TokenList = .{};
-    defer r.deinit(pp.gpa);
-
-    var tok: PreprocessorToken = undefined;
-    while (true) {
-        tok = try pp.readToken();
-        if (tok.id == .comment) continue;
-        if (tok.id.isDirectiveEnd() or tok.id == .r_paren) break;
-        try r.append(pp.gpa, tok);
-    }
-    try pp.expectClosing(l_paren, .r_paren);
-}
-
-fn handleHasDeclSpecAttribute(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
-    _ = tok;
-    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
-    const attr_name = try pp.readToken();
-    try pp.expectClosing(l_paren, .r_paren);
-
-    const ident_str = pp.tokSlice(attr_name);
-    const has_attr = if (pp.comp.langopts.declspec_attrs) Attribute.fromString(.declspec, null, ident_str) != null else false;
-    return pp.ungetToken(tokFromBool(has_attr));
-}
-
-fn handleHasFeature(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
-    _ = tok;
-    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
-    const attr_name = try pp.readToken();
-    try pp.expectClosing(l_paren, .r_paren);
-
-    const ident_str = pp.tokSlice(attr_name);
-    const has_feature = features.hasFeature(pp.comp, ident_str);
-    return pp.ungetToken(tokFromBool(has_feature));
-}
-
-fn handleHasExtension(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
-    _ = tok;
-    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
-    const attr_name = try pp.readToken();
-    try pp.expectClosing(l_paren, .r_paren);
-
-    const ident_str = pp.tokSlice(attr_name);
-    const has_extension = features.hasExtension(pp.comp, ident_str);
-    return pp.ungetToken(tokFromBool(has_extension));
-}
-
-fn handleHasBuiltin(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
-    _ = tok;
-    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
-    const attr_name = try pp.readToken();
-    try pp.expectClosing(l_paren, .r_paren);
-
-    const ident_str = pp.tokSlice(attr_name);
-    const has_builtin = pp.comp.hasBuiltin(ident_str);
-    return pp.ungetToken(tokFromBool(has_builtin));
-}
-
-fn handleHasWarning(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
-    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
-    const start = pp.char_buf.items.len;
-    defer pp.char_buf.items.len = start;
-
-    while (true) {
-        const tok = try pp.readExpandNewline();
-        switch (tok.id) {
-            .nl, .eof => {
-                try pp.errTok(tok, .unterminated_macro_arg_list);
-                return pp.ungetToken(PreprocessorToken.zero);
-            },
-            .r_paren => break,
-            .string_literal => {
-                const string = pp.tokSlice(tok);
-                try pp.char_buf.appendSlice(pp.gpa, string[1 .. string.len - 1]);
-            },
-            else => {
-                pp.skipToNl();
-                try pp.errTok(tok, .missing_paren_param_list);
-                try pp.errTok(l_paren, .to_match_paren);
-                return pp.ungetToken(PreprocessorToken.zero);
-            },
-        }
-    }
-    const actual_param = pp.char_buf.items[start..];
-    if (actual_param.len == 0) {
-        try pp.comp.addDiagnostic(.{
-            .tag = .expected_arguments,
-            .loc = macro_tok.loc,
-            .extra = .{ .arguments = .{ .expected = 1, .actual = 0 } },
-        }, &.{}); // todo expansion slice
-        return pp.ungetToken(PreprocessorToken.zero);
-    }
-    if (!mem.startsWith(u8, actual_param, "-W")) {
-        try pp.errStr(l_paren, .malformed_warning_check, "__has_warning");
-        return pp.ungetToken(PreprocessorToken.zero);
-    }
-    const warning_name = actual_param[2..];
-    const exists = Diagnostics.warningExists(warning_name);
-    return pp.ungetToken(tokFromBool(exists));
-}
-
-fn handleHasInclude(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
-    return pp.handleHasIncludeExtra(macro_tok, .first);
-}
-
-fn handleHasIncludeNext(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
-    return pp.handleHasIncludeExtra(macro_tok, .next);
-}
-
-fn handleHasIncludeExtra(pp: *Preprocessor, macro_tok: PreprocessorToken, which: Compilation.WhichInclude) Error!void {
-    const l_paren = pp.getToken();
-    if (l_paren.id != .l_paren) {
-        pp.skipToNl();
-        return;
-    }
-
-    var is_std: bool = undefined;
-    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
-        error.InvalidInclude => return pp.ungetToken(PreprocessorToken.zero),
-        else => |e| return e,
-    };
-    try pp.expectClosing(l_paren, .r_paren);
-
-    const filename = include_str[1 .. include_str.len - 1];
-    const include_type: Compilation.IncludeType = switch (include_str[0]) {
-        '"' => .quotes,
-        '<' => .angle_brackets,
-        else => unreachable,
-    };
-
-    if (which == .first or pp.includeDepth() == 0) {
-        if (which == .next) {
-            try pp.comp.addDiagnostic(.{
-                .tag = .include_next_outside_header,
-                .loc = macro_tok.loc,
-            }, &.{});
-        }
-        const has = try pp.comp.hasInclude(filename, macro_tok.loc.id, include_type, .first);
-        return pp.ungetToken(tokFromBool(has));
-    }
-    const has = try pp.comp.hasInclude(filename, macro_tok.loc.id, include_type, .next);
-    return pp.ungetToken(tokFromBool(has));
-}
-
-fn includeDepth(pp: *Preprocessor) usize {
-    return pp.tokenizers.items.len - 1;
-}
-
-fn hasEmbedValue(contents_arg: ?[]const u8) []const u8 {
-    const contents = contents_arg orelse return "0\n";
-    if (contents.len == 0) return "2\n";
-    return "1\n";
-}
-
-/// TODO: handle limit/prefix/suffix/etc
-fn handleHasEmbed(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
-    const l_paren = pp.getToken();
-    if (l_paren.id != .l_paren) {
-        pp.skipToNl();
-        return;
-    }
-
-    var is_std: bool = undefined;
-    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
-        error.InvalidInclude => return,
-        else => |e| return e,
-    };
-    try pp.expectClosing(l_paren, .r_paren);
-
-    const filename = include_str[1 .. include_str.len - 1];
-    const include_type: Compilation.IncludeType = switch (include_str[0]) {
-        '"' => .quotes,
-        '<' => .angle_brackets,
-        else => unreachable,
-    };
-
-    const contents = try pp.comp.findEmbed(filename, macro_tok.loc.id, include_type, 1);
-    const result = hasEmbedValue(contents);
-    const start = pp.comp.generated_buf.items.len;
-    try pp.comp.generated_buf.appendSlice(pp.comp.gpa, result);
-    const pasted_tok = try pp.makeGeneratedToken(start, .pp_num, macro_tok);
-    return pp.ungetToken(pasted_tok);
-}
-
-// Skip until newline, ignore other tokens.
-fn skipToNl(pp: *Preprocessor) void {
-    while (true) {
-        const tok = pp.getToken();
-        if (tok.id.isDirectiveEnd()) return;
-    }
-}
-
-fn readOneIdentifierArgument(pp: *Preprocessor, macro_tok: PreprocessorToken) !?PreprocessorToken {
-    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
-    _ = l_paren;
-    var invalid: ?PreprocessorToken = null;
-    var identifier: ?PreprocessorToken = null;
-    while (true) {
-        var tok = pp.getToken();
-        tok.id.simplifyMacroKeywordExtra(true);
-
-        switch (tok.id) {
-            .r_paren, .eof => break,
-            else => {
-                if (identifier) |_| invalid = tok else identifier = tok;
-            },
-        }
-    }
-    if (invalid) |some| {
-        try pp.comp.addDiagnostic(.{
-            .tag = .missing_tok_builtin,
-            .loc = some.loc,
-            .extra = .{ .tok_id_expected = .r_paren },
-        }, &.{}); // TODO: expansion slice
-        return null;
-    }
-    if (identifier) |ident| {
-        if (ident.id == .identifier or ident.id == .extended_identifier) return ident;
-    } else {
-        const extra: Diagnostics.Message.Extra = .{ .arguments = .{ .expected = 1, .actual = 0 } };
-        try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = macro_tok.loc, .extra = extra }, &.{});
-    }
-    return null;
-}
-
-fn handleIsIdentifier(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
-    if (try pp.readOneIdentifierArgument(macro_tok)) |_| {
-        return pp.ungetToken(PreprocessorToken.one);
-    } else {
-        return pp.ungetToken(PreprocessorToken.zero);
-    }
-}
-
-fn handlePragmaOperator(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
-    _ = pp;
-    _ = macro_tok;
-    // TODO
-}
-
-fn addBuiltinMacros(pp: *Preprocessor) !void {
-    try pp.addBuiltinMacro("__has_attribute", handleHasAttribute);
-    try pp.addBuiltinMacro("__has_c_attribute", handleHasCAttribute);
-    try pp.addBuiltinMacro("__has_declspec_attribute", handleHasDeclSpecAttribute);
-    try pp.addBuiltinMacro("__has_feature", handleHasFeature);
-    try pp.addBuiltinMacro("__has_extension", handleHasExtension);
-    try pp.addBuiltinMacro("__has_builtin", handleHasBuiltin);
-    try pp.addBuiltinMacro("__has_warning", handleHasWarning);
-    try pp.addBuiltinMacro("__has_include", handleHasInclude);
-    try pp.addBuiltinMacro("__has_include_next", handleHasIncludeNext);
-    try pp.addBuiltinMacro("__has_embed", handleHasEmbed);
-
-    try pp.addBuiltinMacro("__is_identifier", handleIsIdentifier);
-
-    try pp.addBuiltinMacro("__FILE__", handleFileMacro);
-    try pp.addBuiltinMacro("__LINE__", handleLineMacro);
-    try pp.addBuiltinMacro("__COUNTER__", handleCounterMacro);
-    try pp.addBuiltinMacro("_Pragma", handlePragmaOperator);
-}
-
-/// Initialize Preprocessor with builtin macros.
-pub fn initDefault(comp: *Compilation) !Preprocessor {
-    var pp = init(comp);
-    errdefer pp.deinit();
-    try pp.addBuiltinMacros();
-    return pp;
-}
-
-pub fn deinit(pp: *Preprocessor) void {
-    pp.arena.deinit();
-    pp.include_guards.deinit(pp.gpa);
-    pp.tokens.deinit(pp.gpa);
-    pp.tokenizers.deinit(pp.gpa);
-    for (pp.expansion_bufs.items) |*toklist| {
-        toklist.deinit(pp.gpa);
-    }
-    pp.expansion_bufs.deinit(pp.gpa);
-    pp.defines.deinit(pp.gpa);
-    pp.char_buf.deinit(pp.gpa);
-    for (pp.expansion_entries.items(.locs)) |locs| PreprocessorToken.free(locs, pp.gpa);
-    pp.expansion_entries.deinit(pp.gpa);
-    pp.guard_stack.deinit(pp.gpa);
-    pp.macro_arg_tokens.deinit(pp.gpa);
-    pp.macro_args.deinit(pp.gpa);
-    pp.safe_strings.deinit(pp.gpa);
-    pp.treap.deinit();
-}
-
-/// Preprocess a compilation unit of sources into a parsable list of tokens.
-pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void {
-    assert(sources.len > 1);
-    const first = sources[0];
-
-    for (sources[1..]) |header| {
-        _ = try pp.preprocess(header);
-    }
-    const eof = try pp.preprocess(first);
-    try pp.addToken(eof);
-}
-
-fn propagateSpace(pp: *Preprocessor, tokens: []PreprocessorToken, template: PreprocessorToken) void {
-    if (tokens.len > 0) {
-        tokens[0].flags = template.flags;
-    } else {
-        pp.injectSpace();
-    }
-}
-
-fn ungetAll(pp: *Preprocessor, tokens: []const PreprocessorToken) !void {
-    if (tokens.len == 0) return;
-    const start = pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items.len;
-    try pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].appendSlice(pp.gpa, tokens);
-    std.mem.reverse(PreprocessorToken, pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items[start..]);
-}
-
-fn addHideSet(pp: *Preprocessor, toks: []PreprocessorToken, hideset: Treap.Node) !void {
-    for (toks) |*tok| {
-        switch (tok.id) {
-            // non-identifiers are not expanded, so we don't need to track their hidesets.
-            // Track r_paren hideset since it is used for computing the hideset of function-like macro expansions
-            .identifier, .extended_identifier, .r_paren => {
-                tok.hideset = try pp.treap.@"union"(tok.hideset, hideset);
-            },
-            else => {},
-        }
-    }
-}
-
-fn stringize(pp: *Preprocessor, tmpl: PreprocessorToken, args_range: MacroArg) !PreprocessorToken {
-    const start = pp.comp.generated_buf.items.len;
-    try pp.comp.generated_buf.append(pp.gpa, '"');
-    const args = args_range.slice(pp.macro_arg_tokens.items);
-    for (args, 0..) |tok, i| {
-        const slice = pp.tokSlice(tok);
-        if (slice.len > 0 and tok.flags.space and i != 0) {
-            try pp.comp.generated_buf.append(pp.gpa, ' ');
-        }
-        try pp.comp.generated_buf.appendSlice(pp.gpa, slice);
-    }
-    try pp.comp.generated_buf.append(pp.gpa, '"');
-    var tok = tmpl;
-    tok.id = .string_literal;
-    tok.loc = .{
-        .id = .generated,
-        .byte_offset = @intCast(start),
-        .line = pp.generated_line,
-    };
-    pp.generated_line += 1;
-    return tok;
-}
-
-fn subst(pp: *Preprocessor, macro: *const Macro, macro_tok: PreprocessorToken, args: MacroArgList, hideset_arg: Treap.Node) ![]PreprocessorToken {
-    _ = macro_tok;
-    var hideset = hideset_arg;
-    var r: TokenList = .{};
-    defer r.deinit(pp.gpa);
-    var i: usize = 0;
-    while (i < macro.tokens.len) : (i += 1) {
-        const t0 = macro.tokens[i];
-        const t1: ?PreprocessorToken = if (i == macro.tokens.len - 1) null else macro.tokens[i + 1];
-
-        const t0_param = t0.id == .macro_param;
-        const t1_param = if (t1) |tok| tok.id == .macro_param else false;
-
-        if (t0.id == .hash and t1_param) {
-            const arg = args.slice(pp.macro_args.items)[t1.?.argPosition()];
-            const stringized = try pp.stringize(t0, arg);
-            try r.append(pp.gpa, stringized);
-            i += 1;
-            continue;
-        }
-        if (t0.id == .hash_hash and t1_param) {
-            const arg = args.slice(pp.macro_args.items)[t1.?.argPosition()];
-            if (t1.?.isVarArg() and r.items.len > 0 and r.items[r.items.len - 1].id == .comma) {
-                if (arg.len() == 0) {
-                    _ = r.pop();
-                } else {
-                    try r.appendSlice(pp.gpa, arg.slice(pp.macro_arg_tokens.items));
-                }
-            } else if (arg.len() > 0) {
-                try pp.pasteAndPush(&r, arg.slice(pp.macro_arg_tokens.items)[0]);
-                try r.appendSlice(pp.gpa, arg.slice(pp.macro_arg_tokens.items)[1..]);
-            }
-            i += 1;
-            continue;
-        }
-        if (t0.id == .hash_hash and t1 != null) {
-            hideset = t1.?.hideset;
-            try pp.pasteAndPush(&r, t1.?);
-            i += 1;
-            continue;
-        }
-        if (t0_param and t1 != null and t1.?.id == .hash_hash) {
-            hideset = t1.?.hideset;
-            const arg = args.slice(pp.macro_args.items)[t0.argPosition()];
-            if (arg.len() == 0) {
-                i += 1;
-            } else {
-                try r.appendSlice(pp.gpa, arg.slice(pp.macro_arg_tokens.items));
-            }
-            continue;
-        }
-        if (t0_param) {
-            const arg = args.slice(pp.macro_args.items)[t0.argPosition()];
-            const expanded = try pp.expandAll(arg.slice(pp.macro_arg_tokens.items), t0);
-            defer pp.gpa.free(expanded);
-            try r.appendSlice(pp.gpa, expanded);
-            continue;
-        }
-        try r.append(pp.gpa, t0);
-    }
-    try pp.addHideSet(r.items, hideset);
-    return r.toOwnedSlice(pp.gpa);
-}
-
-fn pasteTokens(pp: *Preprocessor, lhs: PreprocessorToken, rhs: PreprocessorToken) !PreprocessorToken {
-    const start = pp.comp.generated_buf.items.len;
-    const end = start + pp.tokSlice(lhs).len + pp.tokSlice(rhs).len;
-    try pp.comp.generated_buf.ensureTotalCapacity(pp.gpa, end + 1); // +1 for a newline
-
-    // We cannot use the same slices here since they might be invalidated by `ensureCapacity`
-    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.tokSlice(lhs));
-    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.tokSlice(rhs));
-    pp.comp.generated_buf.appendAssumeCapacity('\n');
-
-    // Try to tokenize the result.
-    var tmp_tokenizer = Tokenizer{
-        .buf = pp.comp.generated_buf.items,
-        .langopts = pp.comp.langopts,
-        .index = @intCast(start),
-        .source = .generated,
-    };
-    const pasted_token = tmp_tokenizer.nextNoWSComments();
-    const next_tok = tmp_tokenizer.next();
-    if (next_tok.id != .nl) {
-        try pp.errStr(
-            lhs,
-            .pasting_formed_invalid,
-            try pp.comp.diagnostics.arena.allocator().dupe(u8, pp.comp.generated_buf.items[start..end]),
-        );
-    }
-    return pp.makeGeneratedToken(start, pasted_token.id, lhs);
-}
-
-/// Paste `tok` onto the last token in `tokens`
-fn pasteAndPush(pp: *Preprocessor, tokens: *TokenList, tok: PreprocessorToken) !void {
-    const last = tokens.pop();
-    const pasted = try pp.pasteTokens(last, tok);
-    return tokens.append(pp.gpa, pasted);
-}
-
-fn tokenBufferStashReverse(pp: *Preprocessor, tokens: []const PreprocessorToken) !void {
-    try pp.expansion_bufs.append(pp.gpa, .{});
-    try pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].appendSlice(pp.gpa, tokens);
-    std.mem.reverse(PreprocessorToken, pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items);
-}
-
-fn tokenBufferUnstash(pp: *Preprocessor) void {
-    var buf = pp.expansion_bufs.pop();
-    buf.deinit(pp.gpa);
-}
-
-fn expandAll(pp: *Preprocessor, tokens: []const PreprocessorToken, tmpl: PreprocessorToken) ![]const PreprocessorToken {
-    try pp.tokenBufferStashReverse(tokens);
-    defer pp.tokenBufferUnstash();
-    var r: TokenList = .{};
-    defer r.deinit(pp.gpa);
-    while (true) {
-        const tok = try pp.readExpand();
-        if (tok.id == .eof) break;
-        try r.append(pp.gpa, tok);
-    }
-    pp.propagateSpace(r.items, tmpl);
-    return r.toOwnedSlice(pp.gpa);
-}
-
-fn peekToken(pp: *Preprocessor) !PreprocessorToken {
-    const tok = try pp.readToken();
-    try pp.ungetToken(tok);
-    return tok;
-}
-
-/// Return a string with the same contents as `name` and whose lifetime is the same as the preprocessor's lifetime
-/// If `tok` is not from the generated source, this is just `name`.
-/// If `tok` is from the generated source, pointers are invalidated when the underlying ArrayList is resized. Therefore,
-/// duplicate the string and store it (so we aren't repeatedly copying the same string)
-fn getSafeString(pp: *Preprocessor, tok: PreprocessorToken, name: []const u8) ![]const u8 {
-    if (tok.loc.id != .generated) return name;
-    const gop = try pp.safe_strings.getOrPut(pp.gpa, name);
-    if (!gop.found_existing) {
-        const copy = try pp.arena.allocator().dupe(u8, name);
-        gop.key_ptr.* = copy;
-    }
-    return gop.key_ptr.*;
-}
-
-fn injectSpace(pp: *Preprocessor) void {
-    var i = pp.expansion_bufs.items.len;
-    while (i > 0) : (i -= 1) {
-        var j = pp.expansion_bufs.items[i - 1].items.len;
-        while (j > 0) : (j -= 1) {
-            pp.expansion_bufs.items[i - 1].items[j - 1].flags.space = true;
-            return;
-        }
-    }
-}
-
-fn readExpandNewline(pp: *Preprocessor) Error!PreprocessorToken {
-    const tok = pp.getToken();
-    if (!tok.id.isMacroIdentifier()) return tok;
-    const name = pp.tokSlice(tok);
-    const macro = pp.defines.getPtr(name) orelse return tok;
-
-    const macro_hideset = tok.hideset;
-    if (pp.treap.contains(macro_hideset, name)) return tok;
-
-    switch (macro.kind) {
-        .object => {
-            const safe_name = try pp.getSafeString(tok, name);
-            const new_hideset = try pp.treap.addNodeTo(tok.hideset, safe_name);
-
-            const tokens = try pp.subst(macro, tok, MacroArgList.empty, new_hideset);
-            defer pp.gpa.free(tokens);
-            pp.propagateSpace(tokens, tok);
-            try pp.ungetAll(tokens);
-            return pp.readExpand();
-        },
-        .func => {
-            if (!try pp.next(.l_paren)) return tok;
-            const arg_tokens_start = pp.macro_arg_tokens.items.len;
-            defer pp.macro_arg_tokens.items.len = arg_tokens_start;
-            const macro_args_start = pp.macro_args.items.len;
-            defer pp.macro_args.items.len = macro_args_start;
-
-            const args = pp.readArgs(tok, macro) catch |err| switch (err) {
-                error.IncorrectArgumentCount => return PreprocessorToken.zero,
-                error.UnterminatedMacroArgumentList => {
-                    try pp.errTok(tok, .unterminated_macro_arg_list);
-                    return PreprocessorToken.zero;
-                },
-                else => |e| return e,
-            };
-            const r_paren = pp.getToken();
-            std.debug.assert(r_paren.id == .r_paren);
-            const safe_name = try pp.getSafeString(tok, name);
-
-            const intersection = try pp.treap.intersection(macro_hideset, r_paren.hideset);
-            const hideset = try pp.treap.addNodeTo(intersection, safe_name);
-            const tokens = try pp.subst(macro, tok, args, hideset);
-            defer pp.gpa.free(tokens);
-            pp.propagateSpace(tokens, tok);
-            try pp.ungetAll(tokens);
-            return pp.readExpand();
-        },
-        .special => |func| {
-            try func(pp, tok);
-            return pp.readExpand();
-        },
-    }
-}
-
-fn readMacroArg(pp: *Preprocessor, end: *bool, readall: bool) !MacroArg {
-    var level: i32 = 0;
-    const start: u32 = @intCast(pp.macro_arg_tokens.items.len);
-    while (true) {
-        var tok = pp.getToken();
-        if (tok.id == .eof) {
-            return error.UnterminatedMacroArgumentList;
-        }
-        if (tok.id == .nl) continue;
-        if (tok.flags.is_bol and tok.id == .hash) {
-            try pp.readDirective();
-            continue;
-        }
-        if (level == 0 and tok.id == .r_paren) {
-            try pp.ungetToken(tok);
-            end.* = true;
-            break;
-        }
-        if (level == 0 and tok.id == .comma and !readall) {
-            break;
-        }
-        if (tok.id == .l_paren) {
-            level += 1;
-        }
-        if (tok.id == .r_paren) {
-            level -= 1;
-        }
-        if (tok.flags.is_bol) {
-            tok.flags = .{ .is_bol = false, .space = true };
-        }
-        try pp.macro_arg_tokens.append(pp.gpa, tok);
-    }
-    return .{ .start = start, .end = @intCast(pp.macro_arg_tokens.items.len) };
-}
-
-fn doReadArgs(pp: *Preprocessor, macro: *const Macro) !MacroArgList {
-    const start: u32 = @intCast(pp.macro_args.items.len);
-    var end = false;
-    while (!end) {
-        const in_ellipsis = macro.var_args and (pp.macro_args.items.len - start) + 1 == macro.nargs;
-        const arg_range = try pp.readMacroArg(&end, in_ellipsis);
-        try pp.macro_args.append(pp.gpa, arg_range);
-    }
-    if (macro.var_args and (pp.macro_args.items.len - start) + 1 == macro.nargs) {
-        try pp.macro_args.append(pp.gpa, MacroArg.empty);
-    }
-    return .{ .start = start, .end = @intCast(pp.macro_args.items.len) };
-}
-
-fn readArgs(pp: *Preprocessor, ident: PreprocessorToken, macro: *const Macro) !MacroArgList {
-    if (macro.nargs == 0 and (try pp.peekToken()).id == .r_paren) {
-        return MacroArgList.empty;
-    }
-    const args = try pp.doReadArgs(macro);
-    if (args.len() != macro.nargs) {
-        const extra = Diagnostics.Message.Extra{
-            .arguments = .{ .expected = @intCast(macro.nargs), .actual = @intCast(args.len()) },
-        };
-        try pp.comp.addDiagnostic(
-            .{ .tag = .expected_arguments, .loc = ident.loc, .extra = extra },
-            &.{}, // TODO: expansion slice
-        );
-        return error.IncorrectArgumentCount;
-    }
-    return args;
-}
-
-fn readExpand(pp: *Preprocessor) Error!PreprocessorToken {
-    while (true) {
-        const tok = try pp.readExpandNewline();
-        if (tok.id != .nl) return tok;
-    }
-}
-
-/// # number "file" flags
-/// TODO: validate that the pp_num token is solely digits
-/// if not, emit `GNU line marker directive requires a simple digit sequence`
-fn readLinemarker(pp: *Preprocessor) !void {
-    const name = pp.getToken();
-    if (name.id.isDirectiveEnd()) return;
-    if (name.id != .string_literal) try pp.errTok(name, .line_invalid_filename);
-
-    const flag_1 = pp.getToken();
-    if (flag_1.id.isDirectiveEnd()) return;
-    const flag_2 = pp.getToken();
-    if (flag_2.id.isDirectiveEnd()) return;
-    const flag_3 = pp.getToken();
-    if (flag_3.id.isDirectiveEnd()) return;
-    const flag_4 = pp.getToken();
-    if (flag_4.id.isDirectiveEnd()) return;
-    try pp.expectNewline();
-}
-
-fn readIdent(pp: *Preprocessor) !?PreprocessorToken {
-    const tok = pp.getToken();
-    if (!tok.id.isMacroIdentifier()) {
-        try pp.errTok(tok, .macro_name_must_be_identifier);
-        return null;
-    }
-    return tok;
-}
-
-fn ungetToken(pp: *Preprocessor, tok: PreprocessorToken) !void {
-    if (tok.id == .eof) return;
-    if (pp.isBufferEmpty()) {
-        try pp.expansion_bufs.append(pp.gpa, .{});
-    }
-    try pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].append(pp.gpa, tok);
-}
-
-fn hashHashCheck(pp: *Preprocessor, toks: []const PreprocessorToken) !void {
-    if (toks.len == 0) return;
-    if (toks[0].id == .hash_hash) {
-        return pp.errTok(toks[0], .hash_hash_at_start);
-    }
-    if (toks[toks.len - 1].id == .hash_hash) {
-        return pp.errTok(toks[toks.len - 1], .hash_hash_at_end);
-    }
-}
-
-fn readObjMacro(pp: *Preprocessor, name: PreprocessorToken) !void {
-    var body: TokenList = .{};
-    errdefer body.deinit(pp.gpa);
-
-    while (true) {
-        const tok = pp.getToken();
-        if (tok.id.isDirectiveEnd()) break;
-
-        try body.append(pp.gpa, tok);
-    }
-    try pp.hashHashCheck(body.items);
-    const macro: Macro = .{
-        .tokens = body.items,
-        .var_args = false,
-        .loc = undefined,
-        .kind = .object,
-        .nargs = undefined,
-    };
-    try pp.defineMacro(name, macro);
-}
-
-/// Defines a new macro and warns if it is a duplicate
-fn defineMacro(pp: *Preprocessor, name_tok: PreprocessorToken, macro: Macro) Error!void {
-    const name_str = pp.tokSlice(name_tok);
-    const gop = try pp.defines.getOrPut(pp.gpa, name_str);
-    if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
-        const tag: Diagnostics.Tag = if (gop.value_ptr.kind == .special) .builtin_macro_redefined else .macro_redefined;
-        const start = pp.comp.diagnostics.list.items.len;
-        try pp.comp.addDiagnostic(.{
-            .tag = tag,
-            .loc = name_tok.loc,
-            .extra = .{ .str = name_str },
-        }, &.{});
-        if (gop.value_ptr.kind != .special and pp.comp.diagnostics.list.items.len != start) {
-            try pp.comp.addDiagnostic(.{
-                .tag = .previous_definition,
-                .loc = gop.value_ptr.loc,
-            }, &.{});
-        }
-    }
-    gop.value_ptr.* = macro;
-}
-
-/// Get raw token source string.
-/// Returned slice is invalidated when comp.generated_buf is updated.
-pub fn tokSlice(pp: *Preprocessor, token: PreprocessorToken) []const u8 {
-    if (token.id.lexeme()) |some| return some;
-    const source = pp.comp.getSource(token.loc.id);
-    var tmp_tokenizer = Tokenizer{
-        .buf = source.buf,
-        .langopts = pp.comp.langopts,
-        .index = token.loc.byte_offset,
-        .source = .generated,
-    };
-    const tok = tmp_tokenizer.next();
-    return tmp_tokenizer.buf[tok.start..tok.end];
-}
-
-fn expect(pp: *Preprocessor, expected: Tokenizer.Token.Id, tag: Diagnostics.Tag) !PreprocessorToken {
-    const tok = pp.getToken();
-    if (tok.id != expected) {
-        try pp.errTok(tok, tag);
-    }
-    return tok;
-}
-
-fn makeMacroToken(position: usize, is_vararg: bool) PreprocessorToken {
-    return .{
-        .id = .macro_param,
-        .hideset = null,
-        .loc = .{
-            .id = .unused,
-            .byte_offset = @intCast(position),
-            .line = @intFromBool(is_vararg),
-        },
-    };
-}
-
-fn next(pp: *Preprocessor, id: Tokenizer.Token.Id) !bool {
-    const tok = pp.getToken();
-    if (tok.id == id) return true;
-    try pp.ungetToken(tok);
-    return false;
-}
-
-/// Returns true for vararg function-like macro, false otherwise
-fn readFunclikeMacroParams(pp: *Preprocessor, name: PreprocessorToken, l_paren: PreprocessorToken, params: *ParamMap) !bool {
-    _ = name;
-    var pos: usize = 0;
-    while (true) {
-        var tok = pp.getToken();
-        if (tok.id == .r_paren) return false;
-        if (pos != 0) {
-            if (tok.id != .comma) {
-                switch (tok.id) {
-                    .nl, .eof => {},
-                    else => pp.skipToNl(),
-                }
-                try pp.errTok(tok, .expected_comma_param_list);
-                return error.InvalidMacroDef;
-            }
-            tok = pp.getToken();
-        }
-        if (tok.id.isDirectiveEnd()) {
-            try pp.errTok(tok, .missing_paren_param_list);
-            return false;
-        }
-        if (tok.id == .ellipsis) {
-            try params.put(pp.gpa, "__VA_ARGS__", makeMacroToken(pos, true));
-            pos += 1;
-            const r_paren = pp.getToken();
-            if (r_paren.id != .r_paren) {
-                try pp.errTok(r_paren, .missing_paren_param_list);
-                try pp.errTok(l_paren, .to_match_paren);
-                return error.InvalidMacroDef;
-            }
-            return true;
-        }
-        if (!tok.id.isMacroIdentifier()) {
-            try pp.errTok(tok, .invalid_token_param_list);
-            return error.InvalidMacroDef;
-        }
-        const arg = pp.tokSlice(tok);
-        if (try pp.next(.ellipsis)) {
-            const r_paren = pp.getToken();
-            if (r_paren.id != .r_paren) {
-                try pp.errTok(r_paren, .missing_paren_param_list);
-                try pp.errTok(l_paren, .to_match_paren);
-                pp.skipToNl();
-            }
-            try params.put(pp.gpa, arg, makeMacroToken(pos, true));
-            pos += 1;
-            return true;
-        }
-        try params.put(pp.gpa, arg, makeMacroToken(pos, false));
-        pos += 1;
-    }
-}
-
-fn readFunclikeMacroBody(pp: *Preprocessor, params: *const ParamMap) ![]const PreprocessorToken {
-    var tokens: TokenList = .{};
-    errdefer tokens.deinit(pp.gpa);
-    while (true) {
-        const tok = pp.getToken();
-        if (tok.id.isDirectiveEnd()) {
-            return tokens.toOwnedSlice(pp.gpa);
-        }
-        if (tok.id.isMacroIdentifier()) {
-            // const subst = params.
-            if (params.get(pp.tokSlice(tok))) |sub| {
-                var copy = sub;
-                copy.flags.space = tok.flags.space;
-                try tokens.append(pp.gpa, copy);
-                continue;
-            }
-        }
-        try tokens.append(pp.gpa, tok);
-    }
-}
-
-fn readFuncLikeMacro(pp: *Preprocessor, name: PreprocessorToken, l_paren: PreprocessorToken) Error!void {
-    var params: ParamMap = .{};
-    defer params.deinit(pp.gpa);
-    const is_vararg = pp.readFunclikeMacroParams(name, l_paren, &params) catch |err| switch (err) {
-        error.InvalidMacroDef => blk: {
-            pp.skipToNl();
-            break :blk false;
-        },
-        else => |e| return e,
-    };
-    const body = try pp.readFunclikeMacroBody(&params);
-    errdefer pp.gpa.free(body);
-    try pp.hashHashCheck(body);
-    const macro: Macro = .{
-        .tokens = body,
-        .var_args = is_vararg,
-        .loc = undefined,
-        .kind = .func,
-        .nargs = params.count(),
-    };
-    try pp.defineMacro(name, macro);
-}
-
-fn readDefine(pp: *Preprocessor) !void {
-    const name = try pp.readIdent() orelse {
-        pp.skipToNl();
-        return;
-    };
-    const next_tok = pp.getToken();
-    if (next_tok.id == .l_paren and !next_tok.flags.space) {
-        try pp.readFuncLikeMacro(name, next_tok);
-        return;
-    }
-    try pp.ungetToken(next_tok);
-    try pp.readObjMacro(name);
-}
-
-fn doSkipSpace(pp: *Preprocessor) bool {
-    const saved_tokenizer = pp.tokenizers.items[pp.tokenizers.items.len - 1];
-    const tok = pp.tokenizers.items[pp.tokenizers.items.len - 1].next();
-    switch (tok.id) {
-        .eof => return false,
-        .whitespace, .comment => return true,
-        else => {
-            pp.tokenizers.items[pp.tokenizers.items.len - 1] = saved_tokenizer;
-            return false;
-        },
-    }
-}
-
-/// Skips spaces including comments.
-/// Returns true if at least one space is skipped.
-fn skipSpace(pp: *Preprocessor) bool {
-    if (!pp.doSkipSpace()) {
-        return false;
-    }
-    while (pp.doSkipSpace()) {}
-    return true;
-}
-
-/// Read the next raw token from the tokenizer stack
-fn lexToken(pp: *Preprocessor) PreprocessorToken {
-    if (pp.skipSpace()) {
-        return .{ .id = .whitespace, .loc = undefined };
-    }
-    const tok = pp.tokenizers.items[pp.tokenizers.items.len - 1].next();
-    return .{
-        .id = tok.id,
-        .flags = .{
-            .is_bol = tok.bol,
-        },
-        .loc = .{
-            .id = tok.source,
-            .byte_offset = tok.start,
-            .line = tok.line,
-        },
-    };
-}
-
-/// Read the next token without expanding it
-fn getToken(pp: *Preprocessor) PreprocessorToken {
-    if (!pp.isBufferEmpty() and pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items.len > 0) {
-        return pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].pop();
-    }
-    if (pp.expansion_bufs.items.len > 1) {
-        return .{ .id = .eof, .loc = undefined };
-    }
-    const bol = pp.tokenizers.items[pp.tokenizers.items.len - 1].bol;
-    var tok = pp.lexToken();
-    while (tok.id == .whitespace) {
-        tok = pp.lexToken();
-        tok.flags.space = true;
-    }
-    tok.flags.is_bol = bol;
-    return tok;
-}
-
-fn readDefinedOp(pp: *Preprocessor) !PreprocessorToken {
-    var tok = pp.getToken();
-    if (tok.id == .l_paren) {
-        tok = pp.getToken();
-        const r_paren = pp.getToken();
-        if (r_paren.id != .r_paren) {
-            try pp.errStr(r_paren, .closing_paren_after, "defined");
-        }
-    }
-    if (!tok.id.isMacroIdentifier()) {
-        try pp.errTok(tok, .macro_name_must_be_identifier);
-    }
-    const slice = pp.tokSlice(tok);
-    if (pp.defines.contains(slice)) {
-        return PreprocessorToken.one;
-    }
-    return PreprocessorToken.zero;
-}
-
-fn readIntExprLine(pp: *Preprocessor) !void {
-    while (true) {
-        const tok = try pp.readExpandNewline();
-        if (tok.id.isDirectiveEnd()) break;
-        if (tok.id == .keyword_defined) {
-            const result = try pp.readDefinedOp();
-            try pp.addToken(result);
-        } else if (tok.id.isMacroIdentifier()) {
-            try pp.addToken(PreprocessorToken.zero);
-        } else {
-            try pp.addToken(tok);
-        }
-    }
-    try pp.addToken(.{ .id = .eof, .loc = .{} });
-}
-
-fn readConstexpr(pp: *Preprocessor) !bool {
-    const start = pp.tokens.len;
-    defer pp.tokens.len = start;
-    try pp.readIntExprLine();
-
-    var oldpp = try OldPreprocessor.initDefault(pp.comp);
-    defer oldpp.deinit();
-
-    var i: usize = start;
-    while (i < pp.tokens.len) : (i += 1) {
-        const tok = pp.tokens.get(i);
-        try oldpp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc });
-    }
-
-    var parser = Parser{
-        .pp = &oldpp,
-        .comp = pp.comp,
-        .gpa = pp.gpa,
-        .tok_ids = pp.tokens.items(.id)[start..],
-        .tok_i = 0,
-        .arena = undefined,
-        .in_macro = true,
-        .strings = std.ArrayListAligned(u8, 4).init(pp.comp.gpa),
-
-        .data = undefined,
-        .value_map = undefined,
-        .labels = undefined,
-        .decl_buf = undefined,
-        .list_buf = undefined,
-        .param_buf = undefined,
-        .enum_buf = undefined,
-        .record_buf = undefined,
-        .attr_buf = undefined,
-        .field_attr_buf = undefined,
-        .string_ids = undefined,
-    };
-    defer parser.strings.deinit();
-    return parser.macroExpr();
-}
-
-/// #line number "file"
-/// TODO: validate that the pp_num token is solely digits
-fn readLine(pp: *Preprocessor) Error!void {
-    const digits = pp.getToken();
-    if (digits.id != .pp_num) try pp.errTok(digits, .line_simple_digit);
-
-    if (digits.id.isDirectiveEnd()) return;
-    const name = pp.getToken();
-    if (name.id.isDirectiveEnd()) return;
-    if (name.id != .string_literal) try pp.errTok(name, .line_invalid_filename);
-    try pp.expectNewline();
-}
-
-fn readPragma(pp: *Preprocessor) Error!void {
-    _ = pp;
-    // TODO
-}
-
-fn readUndef(pp: *Preprocessor) Error!void {
-    const name = try pp.readIdent() orelse {
-        pp.skipToNl();
-        return;
-    };
-    try pp.expectNewline();
-    _ = pp.defines.remove(pp.tokSlice(name));
-}
-
-/// Skip until after a newline, error if extra tokens before it.
-fn expectNewline(pp: *Preprocessor) !void {
-    var sent_err = false;
-    while (true) {
-        const tok = pp.getToken();
-        if (tok.id.isDirectiveEnd()) return;
-        if (tok.id == .whitespace or tok.id == .comment) continue;
-        if (!sent_err) {
-            sent_err = true;
-            try pp.errTok(tok, .extra_tokens_directive_end);
-        }
-    }
-}
-
-/// TODO: pragma once
-fn readIncludeExtra(pp: *Preprocessor, include_token: PreprocessorToken, which: Compilation.WhichInclude) Error!void {
-    var is_std: bool = undefined;
-    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
-        error.InvalidInclude => return,
-        else => |e| return e,
-    };
-    try pp.expectNewline();
-
-    const filename = include_str[1 .. include_str.len - 1];
-    const include_type: Compilation.IncludeType = switch (include_str[0]) {
-        '"' => .quotes,
-        '<' => .angle_brackets,
-        else => unreachable,
-    };
-    const tok: RawToken = .{ .id = include_token.id, .source = include_token.loc.id, .start = include_token.loc.byte_offset, .line = include_token.loc.line };
-    const source = (try pp.comp.findInclude(filename, tok, include_type, which)) orelse return pp.fatalNotFound(include_token, filename);
-    if (pp.include_guards.get(source.id)) |guard| {
-        if (pp.defines.contains(guard)) return;
-    }
-    const guard = pp.findIncludeGuard(source);
-    try pp.guard_stack.append(pp.gpa, guard);
-
-    try pp.tokenizers.append(pp.gpa, .{
-        .buf = source.buf,
-        .langopts = pp.comp.langopts,
-        .index = 0,
-        .source = source.id,
-    });
-}
-
-/// Read a header name delimited by quotes or angle brackets
-fn readHeaderFileName(pp: *Preprocessor, is_std: *bool) !?[]const u8 {
-    if (!pp.isBufferEmpty()) return null;
-    _ = pp.skipSpace();
-
-    var close: u8 = undefined;
-    var tokenizer = pp.tokenizers.items[pp.tokenizers.items.len - 1];
-    defer pp.tokenizers.items[pp.tokenizers.items.len - 1] = tokenizer;
-
-    if (tokenizer.buf[tokenizer.index..].len < 2) {
-        return null;
-    }
-    const start = tokenizer.index;
-    switch (tokenizer.buf[tokenizer.index..][0]) {
-        '"' => {
-            is_std.* = false;
-            close = '"';
-        },
-        '<' => {
-            is_std.* = true;
-            close = '>';
-        },
-        else => return null,
-    }
-    tokenizer.index += 1;
-    while (tokenizer.index < tokenizer.buf.len and tokenizer.buf[tokenizer.index] != close and tokenizer.buf[tokenizer.index] != '\n') : (tokenizer.index += 1) {}
-
-    if (tokenizer.index == tokenizer.buf.len or tokenizer.buf[tokenizer.index] != close) {
-        try pp.errTok(.{ .id = undefined, .loc = .{ .id = tokenizer.source, .byte_offset = tokenizer.index, .line = tokenizer.line } }, .header_str_closing);
-        try pp.errTok(.{ .id = undefined, .loc = .{ .id = tokenizer.source, .byte_offset = start, .line = tokenizer.line } }, .header_str_match);
-        return error.InvalidInclude;
-    }
-
-    tokenizer.index += 1;
-
-    const buf = tokenizer.buf[start..tokenizer.index];
-    if (buf.len == 2) {
-        try pp.errTok(.{ .id = .nl, .loc = .{ .id = tokenizer.source, .byte_offset = start, .line = tokenizer.line } }, .empty_filename);
-        return error.InvalidInclude;
-    }
-    return buf;
-}
-
-fn isBufferEmpty(pp: *const Preprocessor) bool {
-    return pp.expansion_bufs.items.len == 0;
-}
-
-/// Read a delimited header name, or a macro expanded one
-fn readHeaderName(pp: *Preprocessor, is_std: *bool) ![]const u8 {
-    if (try pp.readHeaderFileName(is_std)) |path| return path;
-
-    // If a token following #include does not start with < nor ",
-    // try to read the token as a regular token. Macro-expanded
-    // form may be a valid header file path.
-    const tok = try pp.readExpandNewline();
-    if (tok.id.isDirectiveEnd()) {
-        try pp.errTok(tok, .expected_filename);
-        return error.InvalidInclude;
-    }
-    if (tok.id == .string_literal) {
-        is_std.* = false;
-        return pp.tokSlice(tok);
-    }
-    if (tok.id != .angle_bracket_left) {
-        try pp.errStr(tok, .expected_left_angle_bracket, pp.tokSlice(tok));
-        return error.InvalidInclude;
-    }
-    const start = pp.char_buf.items.len;
-    try pp.char_buf.append(pp.gpa, '<');
-    defer pp.char_buf.items.len = start;
-    const writer = pp.char_buf.writer(pp.gpa);
-    while (true) {
-        const path_tok = try pp.readExpandNewline();
-        if (path_tok.id == .nl) {
-            try pp.errTok(path_tok, .header_str_closing);
-            try pp.errTok(tok, .header_str_match);
-            return error.InvalidInclude;
-        }
-        if (path_tok.id == .angle_bracket_right) {
-            break;
-        }
-        try pp.prettyPrintToken(writer, path_tok);
-    }
-    is_std.* = true;
-    try pp.char_buf.append(pp.gpa, '>');
-    return pp.gpa.dupe(u8, pp.char_buf.items[start..]);
-}
-
-fn readInclude(pp: *Preprocessor, include_token: PreprocessorToken) Error!void {
-    return pp.readIncludeExtra(include_token, .first);
-}
-
-fn readIncludeNext(pp: *Preprocessor, include_token: PreprocessorToken) Error!void {
-    return pp.readIncludeExtra(include_token, .next);
-}
-
-fn readErrorMessage(pp: *Preprocessor, directive_tok: PreprocessorToken, tag: Diagnostics.Tag) !void {
-    const char_top = pp.char_buf.items.len;
-    defer pp.char_buf.items.len = char_top;
-    var i: usize = 0;
-    while (true) : (i += 1) {
-        const tok = pp.getToken();
-        if (tok.id.isDirectiveEnd()) break;
-        const slice = pp.tokSlice(tok);
-        if (slice.len > 0 and tok.flags.space and i != 0) {
-            try pp.char_buf.append(pp.gpa, ' ');
-        }
-        try pp.char_buf.appendSlice(pp.gpa, slice);
-    }
-    const slice = pp.char_buf.items[char_top..];
-    const duped = try pp.comp.diagnostics.arena.allocator().dupe(u8, slice);
-    try pp.comp.addDiagnostic(.{
-        .tag = tag,
-        .loc = directive_tok.loc,
-        .extra = .{ .str = duped },
-    }, &.{});
-}
-
-fn clearGuard(pp: *Preprocessor) void {
-    pp.guard_stack.items[pp.guard_stack.items.len - 1] = null;
-}
-
-fn readDirective(pp: *Preprocessor) Error!void {
-    const directive = pp.getToken();
-    if (directive.id.isDirectiveEnd()) return;
-    if (directive.id == .pp_num) {
-        return pp.readLinemarker();
-    }
-
-    const until_else = 0;
-    const until_endif = 1;
-    const until_endif_seen_else = 2;
-
-    switch (directive.id) {
-        .keyword_define => try pp.readDefine(),
-        .keyword_elif => {
-            if (pp.if_level == 0) {
-                try pp.errTok(directive, .elif_without_if);
-                pp.if_level += 1;
-                pp.if_kind.set(pp.if_level, until_else);
-            } else if (pp.if_level == 1) {
-                pp.clearGuard();
-            }
-            switch (pp.if_kind.get(pp.if_level)) {
-                until_else => if (try pp.readConstexpr()) {
-                    pp.if_kind.set(pp.if_level, until_endif);
-                    if (pp.verbose) {
-                        pp.verboseLog(directive, "entering then branch of #elif", .{});
-                    }
-                } else {
-                    try pp.skip(.until_else);
-                    if (pp.verbose) {
-                        pp.verboseLog(directive, "entering else branch of #elif", .{});
-                    }
-                },
-                until_endif => try pp.skip(.until_endif),
-                until_endif_seen_else => {
-                    try pp.errTok(directive, .elif_after_else);
-                    pp.skipToNl();
-                },
-                else => unreachable,
-            }
-        },
-        .keyword_else => {
-            try pp.expectNewline();
-            if (pp.if_level == 0) {
-                try pp.errTok(directive, .else_without_if);
-                return;
-            } else if (pp.if_level == 1) {
-                pp.clearGuard();
-            }
-            switch (pp.if_kind.get(pp.if_level)) {
-                until_else => {
-                    pp.if_kind.set(pp.if_level, until_endif_seen_else);
-                    if (pp.verbose) {
-                        pp.verboseLog(directive, "#else branch here", .{});
-                    }
-                },
-                until_endif => try pp.skip(.until_endif_seen_else),
-                until_endif_seen_else => {
-                    try pp.errTok(directive, .else_after_else);
-                    pp.skipToNl();
-                },
-                else => unreachable,
-            }
-        },
-        .keyword_endif => {
-            try pp.expectNewline();
-            if (pp.if_level == 0) {
-                pp.clearGuard();
-                try pp.errTok(directive, .endif_without_if);
-                return;
-            } else if (pp.if_level == 1) {
-                var tokenizer = &pp.tokenizers.items[pp.tokenizers.items.len - 1];
-                const saved_tokenizer = tokenizer.*;
-                defer tokenizer.* = saved_tokenizer;
-
-                var next_tok = tokenizer.nextNoWS();
-                while (next_tok.id == .nl) : (next_tok = tokenizer.nextNoWS()) {}
-                if (next_tok.id != .eof) pp.clearGuard();
-            }
-            pp.if_level -= 1;
-        },
-        .keyword_error => try pp.readErrorMessage(directive, .error_directive),
-        .keyword_if => {
-            const sum, const overflowed = @addWithOverflow(pp.if_level, 1);
-            if (overflowed != 0)
-                return pp.fatal(directive, "too many #if nestings", .{});
-            pp.if_level = sum;
-
-            if (try pp.readConstexpr()) {
-                pp.if_kind.set(pp.if_level, until_endif);
-                if (pp.verbose) {
-                    pp.verboseLog(directive, "entering then branch of #if", .{});
-                }
-            } else {
-                pp.if_kind.set(pp.if_level, until_else);
-                try pp.skip(.until_else);
-                if (pp.verbose) {
-                    pp.verboseLog(directive, "entering else branch of #if", .{});
-                }
-            }
-        },
-        .keyword_ifdef => {
-            const sum, const overflowed = @addWithOverflow(pp.if_level, 1);
-            if (overflowed != 0)
-                return pp.fatal(directive, "too many #if nestings", .{});
-            pp.if_level = sum;
-
-            const macro_name = (try pp.expectMacroName()) orelse return;
-            try pp.expectNewline();
-            if (pp.defines.get(macro_name) != null) {
-                pp.if_kind.set(pp.if_level, until_endif);
-                if (pp.verbose) {
-                    pp.verboseLog(directive, "entering then branch of #ifdef", .{});
-                }
-            } else {
-                pp.if_kind.set(pp.if_level, until_else);
-                try pp.skip(.until_else);
-                if (pp.verbose) {
-                    pp.verboseLog(directive, "entering else branch of #ifdef", .{});
-                }
-            }
-        },
-        .keyword_ifndef => {
-            const sum, const overflowed = @addWithOverflow(pp.if_level, 1);
-            if (overflowed != 0)
-                return pp.fatal(directive, "too many #if nestings", .{});
-            pp.if_level = sum;
-
-            const macro_name = (try pp.expectMacroName()) orelse return;
-            try pp.expectNewline();
-            if (pp.defines.get(macro_name) == null) {
-                pp.if_kind.set(pp.if_level, until_endif);
-            } else {
-                pp.if_kind.set(pp.if_level, until_else);
-                try pp.skip(.until_else);
-            }
-        },
-        .keyword_elifdef => {
-            if (pp.if_level == 0) {
-                try pp.errTok(directive, .elifdef_without_if);
-                pp.if_level += 1;
-                pp.if_kind.set(pp.if_level, until_else);
-            } else if (pp.if_level == 1) {
-                pp.clearGuard();
-            }
-            switch (pp.if_kind.get(pp.if_level)) {
-                until_else => {
-                    const macro_name = try pp.expectMacroName();
-                    if (macro_name == null) {
-                        pp.if_kind.set(pp.if_level, until_else);
-                        try pp.skip(.until_else);
-                        if (pp.verbose) {
-                            pp.verboseLog(directive, "entering else branch of #elifdef", .{});
-                        }
-                    } else {
-                        try pp.expectNewline();
-                        if (pp.defines.get(macro_name.?) != null) {
-                            pp.if_kind.set(pp.if_level, until_endif);
-                            if (pp.verbose) {
-                                pp.verboseLog(directive, "entering then branch of #elifdef", .{});
-                            }
-                        } else {
-                            pp.if_kind.set(pp.if_level, until_else);
-                            try pp.skip(.until_else);
-                            if (pp.verbose) {
-                                pp.verboseLog(directive, "entering else branch of #elifdef", .{});
-                            }
-                        }
-                    }
-                },
-                until_endif => try pp.skip(.until_endif),
-                until_endif_seen_else => {
-                    try pp.errTok(directive, .elifdef_after_else);
-                    pp.skipToNl();
-                },
-                else => unreachable,
-            }
-        },
-        .keyword_elifndef => {
-            if (pp.if_level == 0) {
-                try pp.errTok(directive, .elifdef_without_if);
-                pp.if_level += 1;
-                pp.if_kind.set(pp.if_level, until_else);
-            } else if (pp.if_level == 1) {
-                pp.clearGuard();
-            }
-            switch (pp.if_kind.get(pp.if_level)) {
-                until_else => {
-                    const macro_name = try pp.expectMacroName();
-                    if (macro_name == null) {
-                        pp.if_kind.set(pp.if_level, until_else);
-                        try pp.skip(.until_else);
-                        if (pp.verbose) {
-                            pp.verboseLog(directive, "entering else branch of #elifndef", .{});
-                        }
-                    } else {
-                        try pp.expectNewline();
-                        if (pp.defines.get(macro_name.?) == null) {
-                            pp.if_kind.set(pp.if_level, until_endif);
-                            if (pp.verbose) {
-                                pp.verboseLog(directive, "entering then branch of #elifndef", .{});
-                            }
-                        } else {
-                            pp.if_kind.set(pp.if_level, until_else);
-                            try pp.skip(.until_else);
-                            if (pp.verbose) {
-                                pp.verboseLog(directive, "entering else branch of #elifndef", .{});
-                            }
-                        }
-                    }
-                },
-                until_endif => try pp.skip(.until_endif),
-                until_endif_seen_else => {
-                    try pp.errTok(directive, .elifdef_after_else);
-                    pp.skipToNl();
-                },
-                else => unreachable,
-            }
-        },
-        .keyword_include => try pp.readInclude(directive),
-        .keyword_include_next => try pp.readIncludeNext(directive),
-        .keyword_line => try pp.readLine(),
-        .keyword_pragma => try pp.readPragma(),
-        .keyword_undef => try pp.readUndef(),
-        .keyword_warning => try pp.readErrorMessage(directive, .warning_directive),
-        .keyword_embed => try pp.readEmbed(directive),
-        else => try pp.errTok(directive, .invalid_preprocessing_directive),
-    }
-}
-
-/// TODO: handle limit/prefix/suffix/etc
-fn readEmbed(pp: *Preprocessor, directive_tok: PreprocessorToken) Error!void {
-    var is_std: bool = undefined;
-    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
-        error.InvalidInclude => return,
-        else => |e| return e,
-    };
-
-    const filename = include_str[1 .. include_str.len - 1];
-    const include_type: Compilation.IncludeType = switch (include_str[0]) {
-        '"' => .quotes,
-        '<' => .angle_brackets,
-        else => unreachable,
-    };
-
-    const limit = std.math.maxInt(u32);
-    const embed_bytes = (try pp.comp.findEmbed(filename, directive_tok.loc.id, include_type, limit)) orelse
-        return pp.fatalNotFound(directive_tok, filename);
-    defer pp.comp.gpa.free(embed_bytes);
-
-    try pp.ensureUnusedTokenCapacity(2 * embed_bytes.len - 1); // N bytes and N-1 commas
-
-    // TODO: We currently only support systems with CHAR_BIT == 8
-    // If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes
-    // and correctly account for the target's endianness
-    const writer = pp.comp.generated_buf.writer(pp.gpa);
-
-    {
-        const byte = embed_bytes[0];
-        const start = pp.comp.generated_buf.items.len;
-        try writer.print("{d}", .{byte});
-        var generated = try pp.makeGeneratedToken(start, .embed_byte, directive_tok);
-        generated.flags.is_bol = true;
-        pp.addTokenAssumeCapacity(generated);
-    }
-
-    for (embed_bytes[1..]) |byte| {
-        const start = pp.comp.generated_buf.items.len;
-        try writer.print(",{d}", .{byte});
-        pp.addTokenAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
-        pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, directive_tok));
-    }
-    try pp.comp.generated_buf.append(pp.gpa, '\n');
-}
-
-fn readToken(pp: *Preprocessor) Error!PreprocessorToken {
-    while (true) {
-        const tok = try pp.readExpand();
-        if (tok.flags.is_bol and tok.id == .hash and tok.hideset == null) {
-            try pp.readDirective();
-            continue;
-        }
-        return tok;
-    }
-}
-
-pub fn preprocess(pp: *Preprocessor, source: Source) !PreprocessorToken {
-    const guard = pp.findIncludeGuard(source);
-    try pp.guard_stack.append(pp.gpa, guard);
-
-    try pp.tokenizers.append(pp.gpa, .{
-        .buf = source.buf,
-        .langopts = pp.comp.langopts,
-        .index = 0,
-        .source = source.id,
-    });
-    while (true) {
-        const tok = try pp.readToken();
-        if (tok.id == .eof) {
-            const tokenizer = pp.tokenizers.pop();
-            const guard_name = pp.guard_stack.pop();
-            if (guard_name) |name| {
-                try pp.include_guards.put(pp.gpa, tokenizer.source, name);
-            }
-            if (pp.tokenizers.items.len == 0) {
-                return tok;
-            }
-        } else {
-            try pp.addToken(tok);
-        }
-    }
-}
-
-// After how many empty lines are needed to replace them with linemarkers.
-const collapse_newlines = 8;
-
-/// Pretty print tokens and try to preserve whitespace.
-pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
-    var i: usize = 0;
-    while (i < pp.tokens.len) : (i += 1) {
-        const tok = pp.tokens.get(i);
-        if (tok.id == .eof) break;
-        try pp.prettyPrintToken(w, tok);
-    }
-    try w.writeByte('\n');
-}
-
-fn prettyPrintToken(pp: *Preprocessor, w: anytype, tok: PreprocessorToken) !void {
-    if (tok.flags.is_bol) {
-        try w.writeByte('\n');
-    }
-    if (tok.flags.space) {
-        try w.writeByte(' ');
-    }
-    if (tok.id.lexeme()) |some| {
-        try w.writeAll(some);
-    } else {
-        try w.writeAll(pp.tokSlice(tok));
-    }
-}
-
-pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location {
-    const S = struct {
-        fn order_token_index(context: void, lhs: Tree.TokenIndex, rhs: Tree.TokenIndex) std.math.Order {
-            _ = context;
-            return std.math.order(lhs, rhs);
-        }
-    };
-
-    const indices = pp.expansion_entries.items(.idx);
-    const idx = std.sort.binarySearch(Tree.TokenIndex, tok, indices, {}, S.order_token_index) orelse return &.{};
-    const locs = pp.expansion_entries.items(.locs)[idx];
-    var i: usize = 0;
-    while (locs[i].id != .unused) : (i += 1) {}
-    return locs[0..i];
-}
-
-pub fn addToken(pp: *Preprocessor, tok: PreprocessorToken) !void {
-    if (tok.expansion_locs) |expansion_locs| {
-        try pp.expansion_entries.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
-    }
-    try pp.tokens.append(pp.gpa, tok);
-}
-
-pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: PreprocessorToken) void {
-    if (tok.expansion_locs) |expansion_locs| {
-        pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
-    }
-    pp.tokens.appendAssumeCapacity(tok);
-}
-
-pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
-    try pp.tokens.ensureTotalCapacity(pp.gpa, capacity);
-    try pp.expansion_entries.ensureTotalCapacity(pp.gpa, capacity);
-}
-
-pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
-    try pp.tokens.ensureUnusedCapacity(pp.gpa, capacity);
-    try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, capacity);
-}
-
-fn skip(
-    pp: *Preprocessor,
-    cont: enum { until_else, until_endif, until_endif_seen_else },
-) Error!void {
-    var ifs_seen: u32 = 0;
-    var line_start = true;
-    var tokenizer = &pp.tokenizers.items[pp.tokenizers.items.len - 1];
-
-    while (tokenizer.index < tokenizer.buf.len) {
-        if (line_start) {
-            const saved_tokenizer = tokenizer.*;
-            const hash = tokenizer.nextNoWS();
-            if (hash.id == .nl) continue;
-            line_start = false;
-            if (hash.id != .hash) continue;
-            const directive = tokenizer.nextNoWS();
-            switch (directive.id) {
-                .keyword_else => {
-                    if (ifs_seen != 0) continue;
-                    if (cont == .until_endif_seen_else) {
-                        // try pp.err(directive, .else_after_else);
-                        continue;
-                    }
-                    tokenizer.* = saved_tokenizer;
-                    return;
-                },
-                .keyword_elif => {
-                    if (ifs_seen != 0 or cont == .until_endif) continue;
-                    if (cont == .until_endif_seen_else) {
-                        // try pp.err(directive, .elif_after_else);
-                        continue;
-                    }
-                    tokenizer.* = saved_tokenizer;
-                    return;
-                },
-                .keyword_elifdef => {
-                    if (ifs_seen != 0 or cont == .until_endif) continue;
-                    if (cont == .until_endif_seen_else) {
-                        // try pp.err(directive, .elifdef_after_else);
-                        continue;
-                    }
-                    tokenizer.* = saved_tokenizer;
-                    return;
-                },
-                .keyword_elifndef => {
-                    if (ifs_seen != 0 or cont == .until_endif) continue;
-                    if (cont == .until_endif_seen_else) {
-                        // try pp.err(directive, .elifndef_after_else);
-                        continue;
-                    }
-                    tokenizer.* = saved_tokenizer;
-                    return;
-                },
-                .keyword_endif => {
-                    if (ifs_seen == 0) {
-                        tokenizer.* = saved_tokenizer;
-                        return;
-                    }
-                    ifs_seen -= 1;
-                },
-                .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1,
-                else => {},
-            }
-        } else if (tokenizer.buf[tokenizer.index] == '\n') {
-            line_start = true;
-            tokenizer.index += 1;
-            tokenizer.line += 1;
-            tokenizer.bol = true;
-            if (pp.preserve_whitespace) {
-                try pp.addToken(.{ .id = .nl, .loc = .{
-                    .id = tokenizer.source,
-                    .line = tokenizer.line,
-                } });
-            }
-        } else {
-            line_start = false;
-            tokenizer.index += 1;
-        }
-    } else {
-        return pp.errTok(.{ .id = .eof, .loc = .{ .id = tokenizer.source, .byte_offset = tokenizer.index, .line = tokenizer.line } }, .unterminated_conditional_directive);
-    }
-}
-
-fn verboseLog(pp: *Preprocessor, tok: PreprocessorToken, comptime fmt: []const u8, args: anytype) void {
-    const source = pp.comp.getSource(tok.loc.id);
-    const line_col = source.lineCol(tok.loc);
-
-    const stderr = std.io.getStdErr().writer();
-    var buf_writer = std.io.bufferedWriter(stderr);
-    const writer = buf_writer.writer();
-    defer buf_writer.flush() catch {};
-    writer.print("{s}:{d}:{d}: ", .{ source.path, line_col.line_no, line_col.col }) catch return;
-    writer.print(fmt, args) catch return;
-    writer.writeByte('\n') catch return;
-    writer.writeAll(line_col.line) catch return;
-    writer.writeByte('\n') catch return;
-}
-
-fn fatal(pp: *Preprocessor, tok: PreprocessorToken, comptime fmt: []const u8, args: anytype) Compilation.Error {
-    try pp.comp.diagnostics.list.append(pp.gpa, .{
-        .tag = .cli_error,
-        .kind = .@"fatal error",
-        .extra = .{ .str = try std.fmt.allocPrint(pp.comp.diagnostics.arena.allocator(), fmt, args) },
-        .loc = tok.loc,
-    });
-    return error.FatalError;
-}
-
-fn fatalNotFound(pp: *Preprocessor, tok: PreprocessorToken, filename: []const u8) Compilation.Error {
-    const old = pp.comp.diagnostics.fatal_errors;
-    pp.comp.diagnostics.fatal_errors = true;
-    defer pp.comp.diagnostics.fatal_errors = old;
-
-    try pp.comp.diagnostics.addExtra(pp.comp.langopts, .{ .tag = .cli_error, .loc = tok.loc, .extra = .{
-        .str = try std.fmt.allocPrint(pp.comp.diagnostics.arena.allocator(), "'{s}' not found", .{filename}),
-    } }, tok.expansionSlice(), false);
-    unreachable; // addExtra should've returned FatalError
-}
-
-/// Consume next token, error if it is not an identifier.
-fn expectMacroName(pp: *Preprocessor) Error!?[]const u8 {
-    const macro_name = pp.getToken();
-    if (!macro_name.id.isMacroIdentifier()) {
-        try pp.errTok(macro_name, .macro_name_missing);
-        pp.skipToNl();
-        return null;
-    }
-    return pp.tokSlice(macro_name);
-}
-
-/// Return the name of the #ifndef guard macro that starts a source, if any.
-/// If a source starts with `#ifndef IDENTIFIER`, return `IDENTIFIER`
-/// This function does not validate that the entire source is guarded by the
-/// initial ifndef, if any
-fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
-    var tokenizer = Tokenizer{
-        .buf = source.buf,
-        .langopts = pp.comp.langopts,
-        .source = source.id,
-    };
-    var hash = tokenizer.nextNoWS();
-    while (hash.id == .nl) hash = tokenizer.nextNoWS();
-    if (hash.id != .hash) return null;
-    const ifndef = tokenizer.nextNoWS();
-    if (ifndef.id != .keyword_ifndef) return null;
-    const guard = tokenizer.nextNoWS();
-    if (guard.id != .identifier) return null;
-    return pp.tokSlice(.{ .id = guard.id, .loc = .{ .id = guard.source, .byte_offset = guard.start, .line = guard.line } });
-}
diff --git a/src/aro/Parser.zig b/src/aro/Parser.zig
index 270d0a33..17c43e25 100644
--- a/src/aro/Parser.zig
+++ b/src/aro/Parser.zig
@@ -7097,6 +7097,10 @@ fn unExpr(p: *Parser) Error!Result {
             return operand;
         },
         .plus_plus => {
+            if (p.in_macro) {
+                try p.err(.invalid_preproc_operator);
+                return error.ParsingFailed;
+            }
             p.tok_i += 1;
 
             var operand = try p.castExpr();
@@ -7123,6 +7127,10 @@ fn unExpr(p: *Parser) Error!Result {
             return operand;
         },
         .minus_minus => {
+            if (p.in_macro) {
+                try p.err(.invalid_preproc_operator);
+                return error.ParsingFailed;
+            }
             p.tok_i += 1;
 
             var operand = try p.castExpr();
@@ -7423,6 +7431,10 @@ fn suffixExpr(p: *Parser, lhs: Result) Error!Result {
     switch (p.tok_ids[p.tok_i]) {
         .l_paren => return p.callExpr(lhs),
         .plus_plus => {
+            if (p.in_macro) {
+                try p.err(.invalid_preproc_operator);
+                return error.ParsingFailed;
+            }
             defer p.tok_i += 1;
 
             var operand = lhs;
@@ -7441,6 +7453,10 @@ fn suffixExpr(p: *Parser, lhs: Result) Error!Result {
             return operand;
         },
         .minus_minus => {
+            if (p.in_macro) {
+                try p.err(.invalid_preproc_operator);
+                return error.ParsingFailed;
+            }
             defer p.tok_i += 1;
 
             var operand = lhs;
@@ -7459,6 +7475,10 @@ fn suffixExpr(p: *Parser, lhs: Result) Error!Result {
             return operand;
         },
         .l_bracket => {
+            if (p.in_macro) {
+                try p.err(.invalid_preproc_operator);
+                return error.ParsingFailed;
+            }
             const l_bracket = p.tok_i;
             p.tok_i += 1;
             var index = try p.expr();
@@ -7495,11 +7515,19 @@ fn suffixExpr(p: *Parser, lhs: Result) Error!Result {
             return ptr;
         },
         .period => {
+            if (p.in_macro) {
+                try p.err(.invalid_preproc_operator);
+                return error.ParsingFailed;
+            }
             p.tok_i += 1;
             const name = try p.expectIdentifier();
             return p.fieldAccess(lhs, name, false);
         },
         .arrow => {
+            if (p.in_macro) {
+                try p.err(.invalid_preproc_operator);
+                return error.ParsingFailed;
+            }
             p.tok_i += 1;
             const name = try p.expectIdentifier();
             if (lhs.ty.isArray()) {
@@ -8039,6 +8067,11 @@ fn makePredefinedIdentifier(p: *Parser, strings_top: usize) !Result {
 }
 
 fn stringLiteral(p: *Parser) Error!Result {
+    if (p.in_macro) {
+        try p.err(.invalid_preproc_expr_start);
+        return error.ParsingFailed;
+    }
+
     var string_end = p.tok_i;
     var string_kind: text_literal.Kind = .char;
     while (text_literal.Kind.classify(p.tok_ids[string_end], .string_literal)) |next| : (string_end += 1) {
diff --git a/src/aro/Pragma.zig b/src/aro/Pragma.zig
index 279ac5f0..3f698c31 100644
--- a/src/aro/Pragma.zig
+++ b/src/aro/Pragma.zig
@@ -57,8 +57,8 @@ pub fn pasteTokens(pp: *Preprocessor, start_idx: TokenIndex) ![]const u8 {
             .r_paren => rparen_count += 1,
             .string_literal => {
                 if (rparen_count != 0) return error.ExpectedStringLiteral;
-                const str = pp.expandedSlice(tok);
-                try pp.char_buf.appendSlice(str[1 .. str.len - 1]);
+                const str = pp.tokSlice(tok);
+                try pp.char_buf.appendSlice(pp.gpa, str[1 .. str.len - 1]);
             },
             else => return error.ExpectedStringLiteral,
         }
diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig
index 3fd98882..33df74c8 100644
--- a/src/aro/Preprocessor.zig
+++ b/src/aro/Preprocessor.zig
@@ -14,10 +14,13 @@ const Token = Tree.Token;
 const TokenWithExpansionLocs = Tree.TokenWithExpansionLocs;
 const Attribute = @import("Attribute.zig");
 const features = @import("features.zig");
-const Hideset = @import("Hideset.zig");
+const OldPreprocessor = @import("Preprocessor.zig");
+const Treap = @import("Treap.zig");
 
+const ParamMap = std.StringHashMapUnmanaged(PreprocessorToken);
 const DefineMap = std.StringHashMapUnmanaged(Macro);
-const RawTokenList = std.ArrayList(RawToken);
+
+const TokenList = std.ArrayListUnmanaged(PreprocessorToken);
 const max_include_depth = 200;
 
 /// Errors that can be returned when expanding a macro.
@@ -25,40 +28,72 @@ const max_include_depth = 200;
 /// it is handled there and doesn't escape that function
 const MacroError = Error || error{StopPreprocessing};
 
+const PreprocessingError = Error || error{PreprocessingFailed};
+
+const SpecialMacroFn = fn (*Preprocessor, PreprocessorToken) Error!void;
+
+fn Range(comptime T: type) type {
+    return struct {
+        const Self = @This();
+        const Item = T;
+
+        start: u32,
+        end: u32,
+        const empty: Self = .{ .start = 0, .end = 0 };
+
+        fn len(self: Self) u32 {
+            return self.end - self.start;
+        }
+
+        fn slice(self: Self, items: []const Item) []const Item {
+            return items[self.start..self.end];
+        }
+    };
+}
+
+/// Each macro argument is a list of tokens (represented as a range of Preprocessor.macro_arg_tokens)
+const MacroArg = Range(PreprocessorToken);
+
+/// List of MacroArg's for a macro invocation (represented as a range of Preprocessor.macro_args)
+const MacroArgList = Range(MacroArg);
+
+const PreprocessorToken = TokenWithExpansionLocs;
+
 const Macro = struct {
-    /// Parameters of the function type macro
-    params: []const []const u8,
+    /// Tokens constituting the macro body
+    tokens: []const PreprocessorToken,
 
-    /// Token constituting the macro body
-    tokens: []const RawToken,
+    /// Number of arguments for function-like macros
+    nargs: usize,
 
     /// If the function type macro has variable number of arguments
     var_args: bool,
 
-    /// Is a function type macro
-    is_func: bool,
-
-    /// Is a predefined macro
-    is_builtin: bool = false,
-
     /// Location of macro in the source
     loc: Source.Location,
 
+    kind: Kind,
+
+    const Kind = union(enum) {
+        object,
+        func,
+        special: *const SpecialMacroFn,
+    };
+
     fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool {
+        if ((a.kind == .object and b.kind != .object) or (a.kind == .func and b.kind != .func)) return false;
+        if (!std.meta.eql(a.kind, b.kind)) return false;
         if (a.tokens.len != b.tokens.len) return false;
-        if (a.is_builtin != b.is_builtin) return false;
         for (a.tokens, b.tokens) |a_tok, b_tok| if (!tokEql(pp, a_tok, b_tok)) return false;
 
-        if (a.is_func and b.is_func) {
+        if (a.kind == .func) {
             if (a.var_args != b.var_args) return false;
-            if (a.params.len != b.params.len) return false;
-            for (a.params, b.params) |a_param, b_param| if (!mem.eql(u8, a_param, b_param)) return false;
         }
 
         return true;
     }
 
-    fn tokEql(pp: *Preprocessor, a: RawToken, b: RawToken) bool {
+    fn tokEql(pp: *Preprocessor, a: PreprocessorToken, b: PreprocessorToken) bool {
         return mem.eql(u8, pp.tokSlice(a), pp.tokSlice(b));
     }
 };
@@ -78,27 +113,15 @@ const TokenState = struct {
 comp: *Compilation,
 gpa: mem.Allocator,
 arena: std.heap.ArenaAllocator,
-defines: DefineMap = .{},
-/// Do not directly mutate this; use addToken / addTokenAssumeCapacity / ensureTotalTokenCapacity / ensureUnusedTokenCapacity
-tokens: Token.List = .{},
+
+tokens: std.MultiArrayList(Token) = .{},
 /// Do not directly mutate this; must be kept in sync with `tokens`
 expansion_entries: std.MultiArrayList(ExpansionEntry) = .{},
-token_buf: RawTokenList,
-char_buf: std.ArrayList(u8),
-/// Counter that is incremented each time preprocess() is called
-/// Can be used to distinguish multiple preprocessings of the same file
-preprocess_count: u32 = 0,
-generated_line: u32 = 1,
-add_expansion_nl: u32 = 0,
-include_depth: u8 = 0,
-counter: u32 = 0,
-expansion_source_loc: Source.Location = undefined,
-poisoned_identifiers: std.StringHashMap(void),
+
 /// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any
 include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{},
 
-/// Memory is retained to avoid allocation on every single token.
-top_expansion_buf: ExpandBuf,
+char_buf: std.ArrayListUnmanaged(u8) = .{},
 
 /// Dump current state to stderr.
 verbose: bool = false,
@@ -107,7 +130,35 @@ preserve_whitespace: bool = false,
 /// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
 linemarkers: Linemarkers = .none,
 
-hideset: Hideset,
+tokenizers: std.ArrayListUnmanaged(Tokenizer) = .{},
+
+expansion_bufs: std.ArrayListUnmanaged(TokenList) = .{},
+
+defines: DefineMap = .{},
+
+generated_line: u32 = 1,
+
+counter: u32 = 0,
+
+if_level: u8 = 0,
+
+preprocess_count: u32 = 0,
+
+poisoned_identifiers: std.StringHashMap(void),
+
+if_kind: std.PackedIntArray(u2, 256) = blk: {
+    @setEvalBranchQuota(2000);
+    break :blk std.PackedIntArray(u2, 256).initAllTo(0);
+},
+
+guard_stack: std.ArrayListUnmanaged(?[]const u8) = .{},
+
+macro_arg_tokens: std.ArrayListUnmanaged(MacroArg.Item) = .{},
+macro_args: std.ArrayListUnmanaged(MacroArgList.Item) = .{},
+
+safe_strings: std.StringHashMapUnmanaged(void) = .{},
+
+treap: Treap,
 
 pub const parse = Parser.parse;
 
@@ -125,2854 +176,1578 @@ pub fn init(comp: *Compilation) Preprocessor {
         .comp = comp,
         .gpa = comp.gpa,
         .arena = std.heap.ArenaAllocator.init(comp.gpa),
-        .token_buf = RawTokenList.init(comp.gpa),
-        .char_buf = std.ArrayList(u8).init(comp.gpa),
         .poisoned_identifiers = std.StringHashMap(void).init(comp.gpa),
-        .top_expansion_buf = ExpandBuf.init(comp.gpa),
-        .hideset = .{ .comp = comp },
+        .treap = Treap.init(comp.gpa),
     };
     comp.pragmaEvent(.before_preprocess);
     return pp;
 }
 
-/// Initialize Preprocessor with builtin macros.
-pub fn initDefault(comp: *Compilation) !Preprocessor {
-    var pp = init(comp);
-    errdefer pp.deinit();
-    try pp.addBuiltinMacros();
-    return pp;
-}
-
-const builtin_macros = struct {
-    const args = [1][]const u8{"X"};
-
-    const has_attribute = [1]RawToken{.{
-        .id = .macro_param_has_attribute,
-        .source = .generated,
-    }};
-    const has_c_attribute = [1]RawToken{.{
-        .id = .macro_param_has_c_attribute,
-        .source = .generated,
-    }};
-    const has_declspec_attribute = [1]RawToken{.{
-        .id = .macro_param_has_declspec_attribute,
-        .source = .generated,
-    }};
-    const has_warning = [1]RawToken{.{
-        .id = .macro_param_has_warning,
-        .source = .generated,
-    }};
-    const has_feature = [1]RawToken{.{
-        .id = .macro_param_has_feature,
-        .source = .generated,
-    }};
-    const has_extension = [1]RawToken{.{
-        .id = .macro_param_has_extension,
-        .source = .generated,
-    }};
-    const has_builtin = [1]RawToken{.{
-        .id = .macro_param_has_builtin,
-        .source = .generated,
-    }};
-    const has_include = [1]RawToken{.{
-        .id = .macro_param_has_include,
-        .source = .generated,
-    }};
-    const has_include_next = [1]RawToken{.{
-        .id = .macro_param_has_include_next,
-        .source = .generated,
-    }};
-    const has_embed = [1]RawToken{.{
-        .id = .macro_param_has_embed,
-        .source = .generated,
-    }};
-
-    const is_identifier = [1]RawToken{.{
-        .id = .macro_param_is_identifier,
-        .source = .generated,
-    }};
-
-    const pragma_operator = [1]RawToken{.{
-        .id = .macro_param_pragma_operator,
-        .source = .generated,
-    }};
-
-    const file = [1]RawToken{.{
-        .id = .macro_file,
-        .source = .generated,
-    }};
-    const line = [1]RawToken{.{
-        .id = .macro_line,
-        .source = .generated,
-    }};
-    const counter = [1]RawToken{.{
-        .id = .macro_counter,
-        .source = .generated,
-    }};
-};
-
-fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, tokens: []const RawToken) !void {
+fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, func: *const SpecialMacroFn) !void {
     try pp.defines.putNoClobber(pp.gpa, name, .{
-        .params = &builtin_macros.args,
-        .tokens = tokens,
+        .tokens = &.{},
         .var_args = false,
-        .is_func = is_func,
         .loc = .{ .id = .generated },
-        .is_builtin = true,
+        .kind = .{ .special = func },
+        .nargs = 0,
     });
 }
 
-pub fn addBuiltinMacros(pp: *Preprocessor) !void {
-    try pp.addBuiltinMacro("__has_attribute", true, &builtin_macros.has_attribute);
-    try pp.addBuiltinMacro("__has_c_attribute", true, &builtin_macros.has_c_attribute);
-    try pp.addBuiltinMacro("__has_declspec_attribute", true, &builtin_macros.has_declspec_attribute);
-    try pp.addBuiltinMacro("__has_warning", true, &builtin_macros.has_warning);
-    try pp.addBuiltinMacro("__has_feature", true, &builtin_macros.has_feature);
-    try pp.addBuiltinMacro("__has_extension", true, &builtin_macros.has_extension);
-    try pp.addBuiltinMacro("__has_builtin", true, &builtin_macros.has_builtin);
-    try pp.addBuiltinMacro("__has_include", true, &builtin_macros.has_include);
-    try pp.addBuiltinMacro("__has_include_next", true, &builtin_macros.has_include_next);
-    try pp.addBuiltinMacro("__has_embed", true, &builtin_macros.has_embed);
-    try pp.addBuiltinMacro("__is_identifier", true, &builtin_macros.is_identifier);
-    try pp.addBuiltinMacro("_Pragma", true, &builtin_macros.pragma_operator);
-
-    try pp.addBuiltinMacro("__FILE__", false, &builtin_macros.file);
-    try pp.addBuiltinMacro("__LINE__", false, &builtin_macros.line);
-    try pp.addBuiltinMacro("__COUNTER__", false, &builtin_macros.counter);
+fn handleLineMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    const start = pp.comp.generated_buf.items.len;
+    const source = pp.comp.getSource(tok.loc.id);
+    const w = pp.comp.generated_buf.writer(pp.gpa);
+    try w.print("{d}\n", .{source.physicalLine(tok.loc)});
+    const pasted_tok = try pp.makeGeneratedToken(start, .pp_num, tok);
+    return pp.ungetToken(pasted_tok);
 }
 
-pub fn deinit(pp: *Preprocessor) void {
-    pp.defines.deinit(pp.gpa);
-    pp.tokens.deinit(pp.gpa);
-    pp.arena.deinit();
-    pp.token_buf.deinit();
-    pp.char_buf.deinit();
-    pp.poisoned_identifiers.deinit();
-    pp.include_guards.deinit(pp.gpa);
-    pp.top_expansion_buf.deinit();
-    pp.hideset.deinit();
-    for (pp.expansion_entries.items(.locs)) |locs| TokenWithExpansionLocs.free(locs, pp.gpa);
-    pp.expansion_entries.deinit(pp.gpa);
+fn handleFileMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    const start = pp.comp.generated_buf.items.len;
+    const source = pp.comp.getSource(tok.loc.id);
+    const w = pp.comp.generated_buf.writer(pp.gpa);
+    try w.print("\"{s}\"\n", .{source.path});
+    const pasted_tok = try pp.makeGeneratedToken(start, .string_literal, tok);
+    return pp.ungetToken(pasted_tok);
 }
 
-/// Free buffers that are not needed after preprocessing
-fn clearBuffers(pp: *Preprocessor) void {
-    pp.token_buf.clearAndFree();
-    pp.char_buf.clearAndFree();
-    pp.top_expansion_buf.clearAndFree();
-    pp.hideset.clearAndFree();
+fn handleCounterMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
+    defer pp.counter += 1;
+    const start = pp.comp.generated_buf.items.len;
+    const w = pp.comp.generated_buf.writer(pp.gpa);
+    try w.print("{d}\n", .{pp.counter});
+    const pasted_tok = try pp.makeGeneratedToken(start, .pp_num, tok);
+    return pp.ungetToken(pasted_tok);
 }
 
-pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location {
-    const S = struct {
-        fn order_token_index(context: void, lhs: Tree.TokenIndex, rhs: Tree.TokenIndex) std.math.Order {
-            _ = context;
-            return std.math.order(lhs, rhs);
-        }
-    };
+fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: PreprocessorToken) !PreprocessorToken {
+    const pasted_token = PreprocessorToken{ .id = id, .flags = source.flags, .loc = .{
+        .id = .generated,
+        .byte_offset = @intCast(start),
+        .line = pp.generated_line,
+    } };
+    pp.generated_line += 1;
+    // try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc});
+    // try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice());
+    return pasted_token;
+}
 
-    const indices = pp.expansion_entries.items(.idx);
-    const idx = std.sort.binarySearch(Tree.TokenIndex, tok, indices, {}, S.order_token_index) orelse return &.{};
-    const locs = pp.expansion_entries.items(.locs)[idx];
-    var i: usize = 0;
-    while (locs[i].id != .unused) : (i += 1) {}
-    return locs[0..i];
+fn errStr(pp: *Preprocessor, tok: PreprocessorToken, tag: Diagnostics.Tag, str: []const u8) !void {
+    try pp.comp.addDiagnostic(.{
+        .tag = tag,
+        .loc = tok.loc,
+        .extra = .{ .str = str },
+    }, &.{}); // todo expansion slice
 }
 
-/// Preprocess a compilation unit of sources into a parsable list of tokens.
-pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void {
-    assert(sources.len > 1);
-    const first = sources[0];
-    try pp.addIncludeStart(first);
-    for (sources[1..]) |header| {
-        try pp.addIncludeStart(header);
-        _ = try pp.preprocess(header);
+fn errTok(pp: *Preprocessor, tok: PreprocessorToken, tag: Diagnostics.Tag) !void {
+    try pp.comp.addDiagnostic(.{
+        .tag = tag,
+        .loc = tok.loc,
+        .extra = .{ .none = {} },
+    }, &.{}); // todo expansion slice
+}
+
+fn expectClosing(pp: *Preprocessor, opening: PreprocessorToken, id: Token.Id) !void {
+    // todo: fix expect
+    const item = try pp.expect(id, .closing_paren);
+    if (item.id != id) {
+        try pp.errTok(opening, .to_match_paren);
     }
-    try pp.addIncludeResume(first.id, 0, 1);
-    const eof = try pp.preprocess(first);
-    try pp.addToken(eof);
-    pp.clearBuffers();
 }
 
-/// Preprocess a source file, returns eof token.
-pub fn preprocess(pp: *Preprocessor, source: Source) Error!TokenWithExpansionLocs {
-    const eof = pp.preprocessExtra(source) catch |er| switch (er) {
-        // This cannot occur in the main file and is handled in `include`.
-        error.StopPreprocessing => unreachable,
-        else => |e| return e,
-    };
-    try eof.checkMsEof(source, pp.comp);
-    return eof;
+fn tokFromBool(b: bool) PreprocessorToken {
+    return if (b) PreprocessorToken.one else PreprocessorToken.zero;
 }
 
-/// Tokenize a file without any preprocessing, returns eof token.
-pub fn tokenize(pp: *Preprocessor, source: Source) Error!Token {
-    assert(pp.linemarkers == .none);
-    assert(pp.preserve_whitespace == false);
-    var tokenizer = Tokenizer{
-        .buf = source.buf,
-        .comp = pp.comp,
-        .source = source.id,
-    };
+fn handleHasAttribute(pp: *Preprocessor, ident_tok: PreprocessorToken) Error!void {
+    const l_paren = try pp.expectLParen(ident_tok);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
+
+    const has_attr = Attribute.fromString(.gnu, null, pp.tokSlice(attr_name)) != null;
+    return pp.ungetToken(tokFromBool(has_attr));
+}
 
-    // Estimate how many new tokens this source will contain.
-    const estimated_token_count = source.buf.len / 8;
-    try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);
+fn handleHasCAttribute(pp: *Preprocessor, ident_tok: PreprocessorToken) Error!void {
+    const l_paren = try pp.expectLParen(ident_tok);
+    var r: TokenList = .{};
+    defer r.deinit(pp.gpa);
 
+    var tok: PreprocessorToken = undefined;
     while (true) {
-        const tok = tokenizer.next();
-        if (tok.id == .eof) return tokFromRaw(tok);
-        try pp.addToken(tokFromRaw(tok));
+        tok = try pp.readToken();
+        if (tok.id == .comment) continue;
+        if (tok.id.isDirectiveEnd() or tok.id == .r_paren) break;
+        try r.append(pp.gpa, tok);
     }
+    try pp.expectClosing(l_paren, .r_paren);
 }
 
-pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void {
-    if (pp.linemarkers == .none) return;
-    try pp.addToken(.{ .id = .include_start, .loc = .{
-        .id = source.id,
-        .byte_offset = std.math.maxInt(u32),
-        .line = 1,
-    } });
-}
+fn handleHasDeclSpecAttribute(pp: *Preprocessor, ident_tok: PreprocessorToken) Error!void {
+    const l_paren = try pp.expectLParen(ident_tok);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
 
-pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void {
-    if (pp.linemarkers == .none) return;
-    try pp.addToken(.{ .id = .include_resume, .loc = .{
-        .id = source,
-        .byte_offset = offset,
-        .line = line,
-    } });
+    const ident_str = pp.tokSlice(attr_name);
+    const has_attr = if (pp.comp.langopts.declspec_attrs) Attribute.fromString(.declspec, null, ident_str) != null else false;
+    return pp.ungetToken(tokFromBool(has_attr));
 }
 
-fn invalidTokenDiagnostic(tok_id: Token.Id) Diagnostics.Tag {
-    return switch (tok_id) {
-        .unterminated_string_literal => .unterminated_string_literal_warning,
-        .empty_char_literal => .empty_char_literal_warning,
-        .unterminated_char_literal => .unterminated_char_literal_warning,
-        else => unreachable,
-    };
-}
+fn handleHasFeature(pp: *Preprocessor, ident_tok: PreprocessorToken) Error!void {
+    const l_paren = try pp.expectLParen(ident_tok);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
 
-/// Return the name of the #ifndef guard macro that starts a source, if any.
-fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
-    var tokenizer = Tokenizer{
-        .buf = source.buf,
-        .langopts = pp.comp.langopts,
-        .source = source.id,
-    };
-    var hash = tokenizer.nextNoWS();
-    while (hash.id == .nl) hash = tokenizer.nextNoWS();
-    if (hash.id != .hash) return null;
-    const ifndef = tokenizer.nextNoWS();
-    if (ifndef.id != .keyword_ifndef) return null;
-    const guard = tokenizer.nextNoWS();
-    if (guard.id != .identifier) return null;
-    return pp.tokSlice(guard);
+    const ident_str = pp.tokSlice(attr_name);
+    const has_feature = features.hasFeature(pp.comp, ident_str);
+    return pp.ungetToken(tokFromBool(has_feature));
 }
 
-fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!TokenWithExpansionLocs {
-    var guard_name = pp.findIncludeGuard(source);
+fn handleHasExtension(pp: *Preprocessor, ident_tok: PreprocessorToken) Error!void {
+    const l_paren = try pp.expectLParen(ident_tok);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
 
-    pp.preprocess_count += 1;
-    var tokenizer = Tokenizer{
-        .buf = source.buf,
-        .langopts = pp.comp.langopts,
-        .source = source.id,
-    };
+    const ident_str = pp.tokSlice(attr_name);
+    const has_extension = features.hasExtension(pp.comp, ident_str);
+    return pp.ungetToken(tokFromBool(has_extension));
+}
 
-    // Estimate how many new tokens this source will contain.
-    const estimated_token_count = source.buf.len / 8;
-    try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);
+fn handleHasBuiltin(pp: *Preprocessor, ident_tok: PreprocessorToken) Error!void {
+    const l_paren = try pp.expectLParen(ident_tok);
+    const attr_name = try pp.readToken();
+    try pp.expectClosing(l_paren, .r_paren);
 
-    var if_level: u8 = 0;
-    var if_kind = std.PackedIntArray(u2, 256).init([1]u2{0} ** 256);
-    const until_else = 0;
-    const until_endif = 1;
-    const until_endif_seen_else = 2;
+    const ident_str = pp.tokSlice(attr_name);
+    const has_builtin = pp.comp.hasBuiltin(ident_str);
+    return pp.ungetToken(tokFromBool(has_builtin));
+}
+
+fn handleHasWarning(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    const l_paren = try pp.expectLParen(macro_tok);
+    const start = pp.char_buf.items.len;
+    defer pp.char_buf.items.len = start;
 
-    var start_of_line = true;
     while (true) {
-        var tok = tokenizer.next();
+        const tok = try pp.readExpandNewline();
         switch (tok.id) {
-            .hash => if (!start_of_line) try pp.addToken(tokFromRaw(tok)) else {
-                const directive = tokenizer.nextNoWS();
-                switch (directive.id) {
-                    .keyword_error, .keyword_warning => {
-                        // #error tokens..
-                        pp.top_expansion_buf.items.len = 0;
-                        const char_top = pp.char_buf.items.len;
-                        defer pp.char_buf.items.len = char_top;
-
-                        while (true) {
-                            tok = tokenizer.next();
-                            if (tok.id == .nl or tok.id == .eof) break;
-                            if (tok.id == .whitespace) tok.id = .macro_ws;
-                            try pp.top_expansion_buf.append(tokFromRaw(tok));
-                        }
-                        try pp.stringify(pp.top_expansion_buf.items);
-                        const slice = pp.char_buf.items[char_top + 1 .. pp.char_buf.items.len - 2];
-                        const duped = try pp.comp.diagnostics.arena.allocator().dupe(u8, slice);
-
-                        try pp.comp.addDiagnostic(.{
-                            .tag = if (directive.id == .keyword_error) .error_directive else .warning_directive,
-                            .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line },
-                            .extra = .{ .str = duped },
-                        }, &.{});
-                    },
-                    .keyword_if => {
-                        const sum, const overflowed = @addWithOverflow(if_level, 1);
-                        if (overflowed != 0)
-                            return pp.fatal(directive, "too many #if nestings", .{});
-                        if_level = sum;
-
-                        if (try pp.expr(&tokenizer)) {
-                            if_kind.set(if_level, until_endif);
-                            if (pp.verbose) {
-                                pp.verboseLog(directive, "entering then branch of #if", .{});
-                            }
-                        } else {
-                            if_kind.set(if_level, until_else);
-                            try pp.skip(&tokenizer, .until_else);
-                            if (pp.verbose) {
-                                pp.verboseLog(directive, "entering else branch of #if", .{});
-                            }
-                        }
-                    },
-                    .keyword_ifdef => {
-                        const sum, const overflowed = @addWithOverflow(if_level, 1);
-                        if (overflowed != 0)
-                            return pp.fatal(directive, "too many #if nestings", .{});
-                        if_level = sum;
-
-                        const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
-                        try pp.expectNl(&tokenizer);
-                        if (pp.defines.get(macro_name) != null) {
-                            if_kind.set(if_level, until_endif);
-                            if (pp.verbose) {
-                                pp.verboseLog(directive, "entering then branch of #ifdef", .{});
-                            }
-                        } else {
-                            if_kind.set(if_level, until_else);
-                            try pp.skip(&tokenizer, .until_else);
-                            if (pp.verbose) {
-                                pp.verboseLog(directive, "entering else branch of #ifdef", .{});
-                            }
-                        }
-                    },
-                    .keyword_ifndef => {
-                        const sum, const overflowed = @addWithOverflow(if_level, 1);
-                        if (overflowed != 0)
-                            return pp.fatal(directive, "too many #if nestings", .{});
-                        if_level = sum;
-
-                        const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
-                        try pp.expectNl(&tokenizer);
-                        if (pp.defines.get(macro_name) == null) {
-                            if_kind.set(if_level, until_endif);
-                        } else {
-                            if_kind.set(if_level, until_else);
-                            try pp.skip(&tokenizer, .until_else);
-                        }
-                    },
-                    .keyword_elif => {
-                        if (if_level == 0) {
-                            try pp.err(directive, .elif_without_if);
-                            if_level += 1;
-                            if_kind.set(if_level, until_else);
-                        } else if (if_level == 1) {
-                            guard_name = null;
-                        }
-                        switch (if_kind.get(if_level)) {
-                            until_else => if (try pp.expr(&tokenizer)) {
-                                if_kind.set(if_level, until_endif);
-                                if (pp.verbose) {
-                                    pp.verboseLog(directive, "entering then branch of #elif", .{});
-                                }
-                            } else {
-                                try pp.skip(&tokenizer, .until_else);
-                                if (pp.verbose) {
-                                    pp.verboseLog(directive, "entering else branch of #elif", .{});
-                                }
-                            },
-                            until_endif => try pp.skip(&tokenizer, .until_endif),
-                            until_endif_seen_else => {
-                                try pp.err(directive, .elif_after_else);
-                                skipToNl(&tokenizer);
-                            },
-                            else => unreachable,
-                        }
-                    },
-                    .keyword_elifdef => {
-                        if (if_level == 0) {
-                            try pp.err(directive, .elifdef_without_if);
-                            if_level += 1;
-                            if_kind.set(if_level, until_else);
-                        } else if (if_level == 1) {
-                            guard_name = null;
-                        }
-                        switch (if_kind.get(if_level)) {
-                            until_else => {
-                                const macro_name = try pp.expectMacroName(&tokenizer);
-                                if (macro_name == null) {
-                                    if_kind.set(if_level, until_else);
-                                    try pp.skip(&tokenizer, .until_else);
-                                    if (pp.verbose) {
-                                        pp.verboseLog(directive, "entering else branch of #elifdef", .{});
-                                    }
-                                } else {
-                                    try pp.expectNl(&tokenizer);
-                                    if (pp.defines.get(macro_name.?) != null) {
-                                        if_kind.set(if_level, until_endif);
-                                        if (pp.verbose) {
-                                            pp.verboseLog(directive, "entering then branch of #elifdef", .{});
-                                        }
-                                    } else {
-                                        if_kind.set(if_level, until_else);
-                                        try pp.skip(&tokenizer, .until_else);
-                                        if (pp.verbose) {
-                                            pp.verboseLog(directive, "entering else branch of #elifdef", .{});
-                                        }
-                                    }
-                                }
-                            },
-                            until_endif => try pp.skip(&tokenizer, .until_endif),
-                            until_endif_seen_else => {
-                                try pp.err(directive, .elifdef_after_else);
-                                skipToNl(&tokenizer);
-                            },
-                            else => unreachable,
-                        }
-                    },
-                    .keyword_elifndef => {
-                        if (if_level == 0) {
-                            try pp.err(directive, .elifdef_without_if);
-                            if_level += 1;
-                            if_kind.set(if_level, until_else);
-                        } else if (if_level == 1) {
-                            guard_name = null;
-                        }
-                        switch (if_kind.get(if_level)) {
-                            until_else => {
-                                const macro_name = try pp.expectMacroName(&tokenizer);
-                                if (macro_name == null) {
-                                    if_kind.set(if_level, until_else);
-                                    try pp.skip(&tokenizer, .until_else);
-                                    if (pp.verbose) {
-                                        pp.verboseLog(directive, "entering else branch of #elifndef", .{});
-                                    }
-                                } else {
-                                    try pp.expectNl(&tokenizer);
-                                    if (pp.defines.get(macro_name.?) == null) {
-                                        if_kind.set(if_level, until_endif);
-                                        if (pp.verbose) {
-                                            pp.verboseLog(directive, "entering then branch of #elifndef", .{});
-                                        }
-                                    } else {
-                                        if_kind.set(if_level, until_else);
-                                        try pp.skip(&tokenizer, .until_else);
-                                        if (pp.verbose) {
-                                            pp.verboseLog(directive, "entering else branch of #elifndef", .{});
-                                        }
-                                    }
-                                }
-                            },
-                            until_endif => try pp.skip(&tokenizer, .until_endif),
-                            until_endif_seen_else => {
-                                try pp.err(directive, .elifdef_after_else);
-                                skipToNl(&tokenizer);
-                            },
-                            else => unreachable,
-                        }
-                    },
-                    .keyword_else => {
-                        try pp.expectNl(&tokenizer);
-                        if (if_level == 0) {
-                            try pp.err(directive, .else_without_if);
-                            continue;
-                        } else if (if_level == 1) {
-                            guard_name = null;
-                        }
-                        switch (if_kind.get(if_level)) {
-                            until_else => {
-                                if_kind.set(if_level, until_endif_seen_else);
-                                if (pp.verbose) {
-                                    pp.verboseLog(directive, "#else branch here", .{});
-                                }
-                            },
-                            until_endif => try pp.skip(&tokenizer, .until_endif_seen_else),
-                            until_endif_seen_else => {
-                                try pp.err(directive, .else_after_else);
-                                skipToNl(&tokenizer);
-                            },
-                            else => unreachable,
-                        }
-                    },
-                    .keyword_endif => {
-                        try pp.expectNl(&tokenizer);
-                        if (if_level == 0) {
-                            guard_name = null;
-                            try pp.err(directive, .endif_without_if);
-                            continue;
-                        } else if (if_level == 1) {
-                            const saved_tokenizer = tokenizer;
-                            defer tokenizer = saved_tokenizer;
-
-                            var next = tokenizer.nextNoWS();
-                            while (next.id == .nl) : (next = tokenizer.nextNoWS()) {}
-                            if (next.id != .eof) guard_name = null;
-                        }
-                        if_level -= 1;
-                    },
-                    .keyword_define => try pp.define(&tokenizer),
-                    .keyword_undef => {
-                        const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
-
-                        _ = pp.defines.remove(macro_name);
-                        try pp.expectNl(&tokenizer);
-                    },
-                    .keyword_include => {
-                        try pp.include(&tokenizer, .first);
-                        continue;
-                    },
-                    .keyword_include_next => {
-                        try pp.comp.addDiagnostic(.{
-                            .tag = .include_next,
-                            .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line },
-                        }, &.{});
-                        if (pp.include_depth == 0) {
-                            try pp.comp.addDiagnostic(.{
-                                .tag = .include_next_outside_header,
-                                .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line },
-                            }, &.{});
-                            try pp.include(&tokenizer, .first);
-                        } else {
-                            try pp.include(&tokenizer, .next);
-                        }
-                    },
-                    .keyword_embed => try pp.embed(&tokenizer),
-                    .keyword_pragma => {
-                        try pp.pragma(&tokenizer, directive, null, &.{});
-                        continue;
-                    },
-                    .keyword_line => {
-                        // #line number "file"
-                        const digits = tokenizer.nextNoWS();
-                        if (digits.id != .pp_num) try pp.err(digits, .line_simple_digit);
-                        // TODO: validate that the pp_num token is solely digits
-
-                        if (digits.id == .eof or digits.id == .nl) continue;
-                        const name = tokenizer.nextNoWS();
-                        if (name.id == .eof or name.id == .nl) continue;
-                        if (name.id != .string_literal) try pp.err(name, .line_invalid_filename);
-                        try pp.expectNl(&tokenizer);
-                    },
-                    .pp_num => {
-                        // # number "file" flags
-                        // TODO: validate that the pp_num token is solely digits
-                        // if not, emit `GNU line marker directive requires a simple digit sequence`
-                        const name = tokenizer.nextNoWS();
-                        if (name.id == .eof or name.id == .nl) continue;
-                        if (name.id != .string_literal) try pp.err(name, .line_invalid_filename);
-
-                        const flag_1 = tokenizer.nextNoWS();
-                        if (flag_1.id == .eof or flag_1.id == .nl) continue;
-                        const flag_2 = tokenizer.nextNoWS();
-                        if (flag_2.id == .eof or flag_2.id == .nl) continue;
-                        const flag_3 = tokenizer.nextNoWS();
-                        if (flag_3.id == .eof or flag_3.id == .nl) continue;
-                        const flag_4 = tokenizer.nextNoWS();
-                        if (flag_4.id == .eof or flag_4.id == .nl) continue;
-                        try pp.expectNl(&tokenizer);
-                    },
-                    .nl => {},
-                    .eof => {
-                        if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive);
-                        return tokFromRaw(directive);
-                    },
-                    else => {
-                        try pp.err(tok, .invalid_preprocessing_directive);
-                        skipToNl(&tokenizer);
-                    },
-                }
-                if (pp.preserve_whitespace) {
-                    tok.id = .nl;
-                    try pp.addToken(tokFromRaw(tok));
-                }
-            },
-            .whitespace => if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok)),
-            .nl => {
-                start_of_line = true;
-                if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok));
-            },
-            .eof => {
-                if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive);
-                // The following check needs to occur here and not at the top of the function
-                // because a pragma may change the level during preprocessing
-                if (source.buf.len > 0 and source.buf[source.buf.len - 1] != '\n') {
-                    try pp.err(tok, .newline_eof);
-                }
-                if (guard_name) |name| {
-                    if (try pp.include_guards.fetchPut(pp.gpa, source.id, name)) |prev| {
-                        assert(mem.eql(u8, name, prev.value));
-                    }
-                }
-                return tokFromRaw(tok);
+            .nl, .eof => {
+                try pp.errTok(tok, .unterminated_macro_arg_list);
+                return pp.ungetToken(PreprocessorToken.zero);
             },
-            .unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
-                start_of_line = false;
-                try pp.err(tok, invalidTokenDiagnostic(tag));
-                try pp.expandMacro(&tokenizer, tok);
+            .r_paren => break,
+            .string_literal => {
+                const string = pp.tokSlice(tok);
+                try pp.char_buf.appendSlice(pp.gpa, string[1 .. string.len - 1]);
             },
-            .unterminated_comment => try pp.err(tok, .unterminated_comment),
             else => {
-                if (tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(tok)) != null) {
-                    try pp.err(tok, .poisoned_identifier);
-                }
-                // Add the token to the buffer doing any necessary expansions.
-                start_of_line = false;
-                try pp.expandMacro(&tokenizer, tok);
+                pp.skipToNl();
+                try pp.errTok(tok, .missing_paren_param_list);
+                try pp.errTok(l_paren, .to_match_paren);
+                return pp.ungetToken(PreprocessorToken.zero);
             },
         }
     }
+    const actual_param = pp.char_buf.items[start..];
+    if (actual_param.len == 0) {
+        try pp.comp.addDiagnostic(.{
+            .tag = .expected_arguments,
+            .loc = macro_tok.loc,
+            .extra = .{ .arguments = .{ .expected = 1, .actual = 0 } },
+        }, &.{}); // todo expansion slice
+        return pp.ungetToken(PreprocessorToken.zero);
+    }
+    if (!mem.startsWith(u8, actual_param, "-W")) {
+        try pp.errStr(l_paren, .malformed_warning_check, "__has_warning");
+        return pp.ungetToken(PreprocessorToken.zero);
+    }
+    const warning_name = actual_param[2..];
+    const exists = Diagnostics.warningExists(warning_name);
+    return pp.ungetToken(tokFromBool(exists));
 }
 
-/// Get raw token source string.
-/// Returned slice is invalidated when comp.generated_buf is updated.
-pub fn tokSlice(pp: *Preprocessor, token: anytype) []const u8 {
-    if (token.id.lexeme()) |some| return some;
-    const source = pp.comp.getSource(token.source);
-    return source.buf[token.start..token.end];
+fn handleHasInclude(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    return pp.handleHasIncludeExtra(macro_tok, .first);
 }
 
-/// Convert a token from the Tokenizer into a token used by the parser.
-fn tokFromRaw(raw: RawToken) TokenWithExpansionLocs {
-    return .{
-        .id = raw.id,
-        .loc = .{
-            .id = raw.source,
-            .byte_offset = raw.start,
-            .line = raw.line,
-        },
-    };
+fn handleHasIncludeNext(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    return pp.handleHasIncludeExtra(macro_tok, .next);
 }
 
-fn err(pp: *Preprocessor, raw: RawToken, tag: Diagnostics.Tag) !void {
-    try pp.comp.addDiagnostic(.{
-        .tag = tag,
-        .loc = .{
-            .id = raw.source,
-            .byte_offset = raw.start,
-            .line = raw.line,
-        },
-    }, &.{});
+fn handleHasIncludeExtra(pp: *Preprocessor, macro_tok: PreprocessorToken, which: Compilation.WhichInclude) Error!void {
+    const l_paren = pp.getToken();
+    if (l_paren.id != .l_paren) {
+        pp.skipToNl();
+        return;
+    }
+
+    var is_std: bool = undefined;
+    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
+        error.InvalidInclude => return pp.ungetToken(PreprocessorToken.zero),
+        else => |e| return e,
+    };
+    try pp.expectClosing(l_paren, .r_paren);
+
+    const filename = include_str[1 .. include_str.len - 1];
+    const include_type: Compilation.IncludeType = switch (include_str[0]) {
+        '"' => .quotes,
+        '<' => .angle_brackets,
+        else => unreachable,
+    };
+
+    if (which == .first or pp.includeDepth() == 0) {
+        if (which == .next) {
+            try pp.comp.addDiagnostic(.{
+                .tag = .include_next_outside_header,
+                .loc = macro_tok.loc,
+            }, &.{});
+        }
+        const has = try pp.comp.hasInclude(filename, macro_tok.loc.id, include_type, .first);
+        return pp.ungetToken(tokFromBool(has));
+    }
+    const has = try pp.comp.hasInclude(filename, macro_tok.loc.id, include_type, .next);
+    return pp.ungetToken(tokFromBool(has));
 }
 
-fn errStr(pp: *Preprocessor, tok: TokenWithExpansionLocs, tag: Diagnostics.Tag, str: []const u8) !void {
-    try pp.comp.addDiagnostic(.{
-        .tag = tag,
-        .loc = tok.loc,
-        .extra = .{ .str = str },
-    }, tok.expansionSlice());
+fn includeDepth(pp: *Preprocessor) usize {
+    return pp.tokenizers.items.len - 1;
 }
 
-fn fatal(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) Compilation.Error {
-    try pp.comp.diagnostics.list.append(pp.gpa, .{
-        .tag = .cli_error,
-        .kind = .@"fatal error",
-        .extra = .{ .str = try std.fmt.allocPrint(pp.comp.diagnostics.arena.allocator(), fmt, args) },
-        .loc = .{
-            .id = raw.source,
-            .byte_offset = raw.start,
-            .line = raw.line,
-        },
-    });
-    return error.FatalError;
+fn hasEmbedValue(contents_arg: ?[]const u8) []const u8 {
+    const contents = contents_arg orelse return "0\n";
+    if (contents.len == 0) return "2\n";
+    return "1\n";
 }
 
-fn fatalNotFound(pp: *Preprocessor, tok: TokenWithExpansionLocs, filename: []const u8) Compilation.Error {
-    const old = pp.comp.diagnostics.fatal_errors;
-    pp.comp.diagnostics.fatal_errors = true;
-    defer pp.comp.diagnostics.fatal_errors = old;
+/// TODO: handle limit/prefix/suffix/etc
+fn handleHasEmbed(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    const l_paren = pp.getToken();
+    if (l_paren.id != .l_paren) {
+        pp.skipToNl();
+        return;
+    }
 
-    try pp.comp.diagnostics.addExtra(pp.comp.langopts, .{ .tag = .cli_error, .loc = tok.loc, .extra = .{
-        .str = try std.fmt.allocPrint(pp.comp.diagnostics.arena.allocator(), "'{s}' not found", .{filename}),
-    } }, tok.expansionSlice(), false);
-    unreachable; // addExtra should've returned FatalError
-}
+    var is_std: bool = undefined;
+    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
+        error.InvalidInclude => return,
+        else => |e| return e,
+    };
+    try pp.expectClosing(l_paren, .r_paren);
 
-fn verboseLog(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) void {
-    const source = pp.comp.getSource(raw.source);
-    const line_col = source.lineCol(.{ .id = raw.source, .line = raw.line, .byte_offset = raw.start });
+    const filename = include_str[1 .. include_str.len - 1];
+    const include_type: Compilation.IncludeType = switch (include_str[0]) {
+        '"' => .quotes,
+        '<' => .angle_brackets,
+        else => unreachable,
+    };
 
-    const stderr = std.io.getStdErr().writer();
-    var buf_writer = std.io.bufferedWriter(stderr);
-    const writer = buf_writer.writer();
-    defer buf_writer.flush() catch {};
-    writer.print("{s}:{d}:{d}: ", .{ source.path, line_col.line_no, line_col.col }) catch return;
-    writer.print(fmt, args) catch return;
-    writer.writeByte('\n') catch return;
-    writer.writeAll(line_col.line) catch return;
-    writer.writeByte('\n') catch return;
+    const contents = try pp.comp.findEmbed(filename, macro_tok.loc.id, include_type, 1);
+    const result = hasEmbedValue(contents);
+    const start = pp.comp.generated_buf.items.len;
+    try pp.comp.generated_buf.appendSlice(pp.comp.gpa, result);
+    const pasted_tok = try pp.makeGeneratedToken(start, .pp_num, macro_tok);
+    return pp.ungetToken(pasted_tok);
 }
 
-/// Consume next token, error if it is not an identifier.
-fn expectMacroName(pp: *Preprocessor, tokenizer: *Tokenizer) Error!?[]const u8 {
-    const macro_name = tokenizer.nextNoWS();
-    if (!macro_name.id.isMacroIdentifier()) {
-        try pp.err(macro_name, .macro_name_missing);
-        skipToNl(tokenizer);
-        return null;
+// Skip until newline, ignore other tokens.
+fn skipToNl(pp: *Preprocessor) void {
+    while (true) {
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) return;
     }
-    return pp.tokSlice(macro_name);
 }
 
-/// Skip until after a newline, error if extra tokens before it.
-fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
-    var sent_err = false;
+fn readOneIdentifierArgument(pp: *Preprocessor, macro_tok: PreprocessorToken) !?PreprocessorToken {
+    const l_paren = try pp.expect(.l_paren, .missing_lparen_after_builtin);
+    _ = l_paren;
+    var invalid: ?PreprocessorToken = null;
+    var identifier: ?PreprocessorToken = null;
     while (true) {
-        const tok = tokenizer.next();
-        if (tok.id == .nl or tok.id == .eof) return;
-        if (tok.id == .whitespace or tok.id == .comment) continue;
-        if (!sent_err) {
-            sent_err = true;
-            try pp.err(tok, .extra_tokens_directive_end);
+        var tok = pp.getToken();
+        tok.id.simplifyMacroKeywordExtra(true);
+
+        switch (tok.id) {
+            .r_paren, .eof => break,
+            else => {
+                if (identifier) |_| invalid = tok else identifier = tok;
+            },
         }
     }
+    if (invalid) |some| {
+        try pp.comp.addDiagnostic(.{
+            .tag = .missing_tok_builtin,
+            .loc = some.loc,
+            .extra = .{ .tok_id_expected = .r_paren },
+        }, &.{}); // TODO: expansion slice
+        return null;
+    }
+    if (identifier) |ident| {
+        if (ident.id == .identifier or ident.id == .extended_identifier) return ident;
+    } else {
+        const extra: Diagnostics.Message.Extra = .{ .arguments = .{ .expected = 1, .actual = 0 } };
+        try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = macro_tok.loc, .extra = extra }, &.{});
+    }
+    return null;
 }
 
-fn getTokenState(pp: *const Preprocessor) TokenState {
-    return .{
-        .tokens_len = pp.tokens.len,
-        .expansion_entries_len = pp.expansion_entries.len,
-    };
+fn handleIsIdentifier(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    if (try pp.readOneIdentifierArgument(macro_tok)) |_| {
+        return pp.ungetToken(PreprocessorToken.one);
+    } else {
+        return pp.ungetToken(PreprocessorToken.zero);
+    }
 }
 
-fn restoreTokenState(pp: *Preprocessor, state: TokenState) void {
-    pp.tokens.len = state.tokens_len;
-    pp.expansion_entries.len = state.expansion_entries_len;
+fn handlePragmaOperator(pp: *Preprocessor, macro_tok: PreprocessorToken) Error!void {
+    _ = pp;
+    _ = macro_tok;
+    // TODO
 }
 
-/// Consume all tokens until a newline and parse the result into a boolean.
-fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
-    const token_state = pp.getTokenState();
-    defer {
-        for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
-        pp.restoreTokenState(token_state);
-    }
+pub fn addBuiltinMacros(pp: *Preprocessor) !void {
+    try pp.addBuiltinMacro("__has_attribute", handleHasAttribute);
+    try pp.addBuiltinMacro("__has_c_attribute", handleHasCAttribute);
+    try pp.addBuiltinMacro("__has_declspec_attribute", handleHasDeclSpecAttribute);
+    try pp.addBuiltinMacro("__has_feature", handleHasFeature);
+    try pp.addBuiltinMacro("__has_extension", handleHasExtension);
+    try pp.addBuiltinMacro("__has_builtin", handleHasBuiltin);
+    try pp.addBuiltinMacro("__has_warning", handleHasWarning);
+    try pp.addBuiltinMacro("__has_include", handleHasInclude);
+    try pp.addBuiltinMacro("__has_include_next", handleHasIncludeNext);
+    try pp.addBuiltinMacro("__has_embed", handleHasEmbed);
 
-    pp.top_expansion_buf.items.len = 0;
-    const eof = while (true) {
-        const tok = tokenizer.next();
-        switch (tok.id) {
-            .nl, .eof => break tok,
-            .whitespace => if (pp.top_expansion_buf.items.len == 0) continue,
-            else => {},
-        }
-        try pp.top_expansion_buf.append(tokFromRaw(tok));
-    } else unreachable;
-    if (pp.top_expansion_buf.items.len != 0) {
-        pp.expansion_source_loc = pp.top_expansion_buf.items[0].loc;
-        pp.hideset.clearRetainingCapacity();
-        try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr);
-    }
-    for (pp.top_expansion_buf.items) |tok| {
-        if (tok.id == .macro_ws) continue;
-        if (!tok.id.validPreprocessorExprStart()) {
-            try pp.comp.addDiagnostic(.{
-                .tag = .invalid_preproc_expr_start,
-                .loc = tok.loc,
-            }, tok.expansionSlice());
-            return false;
-        }
-        break;
-    } else {
-        try pp.err(eof, .expected_value_in_expr);
-        return false;
-    }
+    try pp.addBuiltinMacro("__is_identifier", handleIsIdentifier);
 
-    // validate the tokens in the expression
-    try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
-    var i: usize = 0;
-    const items = pp.top_expansion_buf.items;
-    while (i < items.len) : (i += 1) {
-        var tok = items[i];
-        switch (tok.id) {
-            .string_literal,
-            .string_literal_utf_16,
-            .string_literal_utf_8,
-            .string_literal_utf_32,
-            .string_literal_wide,
-            => {
-                try pp.comp.addDiagnostic(.{
-                    .tag = .string_literal_in_pp_expr,
-                    .loc = tok.loc,
-                }, tok.expansionSlice());
-                return false;
-            },
-            .plus_plus,
-            .minus_minus,
-            .plus_equal,
-            .minus_equal,
-            .asterisk_equal,
-            .slash_equal,
-            .percent_equal,
-            .angle_bracket_angle_bracket_left_equal,
-            .angle_bracket_angle_bracket_right_equal,
-            .ampersand_equal,
-            .caret_equal,
-            .pipe_equal,
-            .l_bracket,
-            .r_bracket,
-            .l_brace,
-            .r_brace,
-            .ellipsis,
-            .semicolon,
-            .hash,
-            .hash_hash,
-            .equal,
-            .arrow,
-            .period,
-            => {
-                try pp.comp.addDiagnostic(.{
-                    .tag = .invalid_preproc_operator,
-                    .loc = tok.loc,
-                }, tok.expansionSlice());
-                return false;
-            },
-            .macro_ws, .whitespace => continue,
-            .keyword_false => tok.id = .zero,
-            .keyword_true => tok.id = .one,
-            else => if (tok.id.isMacroIdentifier()) {
-                if (tok.id == .keyword_defined) {
-                    const tokens_consumed = try pp.handleKeywordDefined(&tok, items[i + 1 ..], eof);
-                    i += tokens_consumed;
-                } else {
-                    try pp.errStr(tok, .undefined_macro, pp.expandedSlice(tok));
+    try pp.addBuiltinMacro("__FILE__", handleFileMacro);
+    try pp.addBuiltinMacro("__LINE__", handleLineMacro);
+    try pp.addBuiltinMacro("__COUNTER__", handleCounterMacro);
+    try pp.addBuiltinMacro("_Pragma", handlePragmaOperator);
+}
 
-                    if (i + 1 < pp.top_expansion_buf.items.len and
-                        pp.top_expansion_buf.items[i + 1].id == .l_paren)
-                    {
-                        try pp.errStr(tok, .fn_macro_undefined, pp.expandedSlice(tok));
-                        return false;
-                    }
+/// Initialize Preprocessor with builtin macros.
+pub fn initDefault(comp: *Compilation) !Preprocessor {
+    var pp = init(comp);
+    errdefer pp.deinit();
+    try pp.addBuiltinMacros();
+    return pp;
+}
 
-                    tok.id = .zero; // undefined macro
-                }
-            },
-        }
-        pp.addTokenAssumeCapacity(tok);
+pub fn deinit(pp: *Preprocessor) void {
+    pp.arena.deinit();
+    pp.include_guards.deinit(pp.gpa);
+    pp.tokens.deinit(pp.gpa);
+    pp.tokenizers.deinit(pp.gpa);
+    for (pp.expansion_bufs.items) |*toklist| {
+        toklist.deinit(pp.gpa);
     }
-    try pp.addToken(.{
-        .id = .eof,
-        .loc = tokFromRaw(eof).loc,
-    });
-
-    // Actually parse it.
-    var parser = Parser{
-        .pp = pp,
-        .comp = pp.comp,
-        .gpa = pp.gpa,
-        .tok_ids = pp.tokens.items(.id),
-        .tok_i = @intCast(token_state.tokens_len),
-        .arena = pp.arena.allocator(),
-        .in_macro = true,
-        .strings = std.ArrayListAligned(u8, 4).init(pp.comp.gpa),
-
-        .data = undefined,
-        .value_map = undefined,
-        .labels = undefined,
-        .decl_buf = undefined,
-        .list_buf = undefined,
-        .param_buf = undefined,
-        .enum_buf = undefined,
-        .record_buf = undefined,
-        .attr_buf = undefined,
-        .field_attr_buf = undefined,
-        .string_ids = undefined,
-    };
-    defer parser.strings.deinit();
-    return parser.macroExpr();
+    pp.expansion_bufs.deinit(pp.gpa);
+    pp.defines.deinit(pp.gpa);
+    pp.char_buf.deinit(pp.gpa);
+    for (pp.expansion_entries.items(.locs)) |locs| PreprocessorToken.free(locs, pp.gpa);
+    pp.expansion_entries.deinit(pp.gpa);
+    pp.guard_stack.deinit(pp.gpa);
+    pp.macro_arg_tokens.deinit(pp.gpa);
+    pp.macro_args.deinit(pp.gpa);
+    pp.safe_strings.deinit(pp.gpa);
+    pp.treap.deinit();
+    pp.poisoned_identifiers.deinit();
 }
 
-/// Turns macro_tok from .keyword_defined into .zero or .one depending on whether the argument is defined
-/// Returns the number of tokens consumed
-fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *TokenWithExpansionLocs, tokens: []const TokenWithExpansionLocs, eof: RawToken) !usize {
-    std.debug.assert(macro_tok.id == .keyword_defined);
-    var it = TokenIterator.init(tokens);
-    const first = it.nextNoWS() orelse {
-        try pp.err(eof, .macro_name_missing);
-        return it.i;
-    };
-    switch (first.id) {
-        .l_paren => {},
-        else => {
-            if (!first.id.isMacroIdentifier()) {
-                try pp.errStr(first, .macro_name_must_be_identifier, pp.expandedSlice(first));
-            }
-            macro_tok.id = if (pp.defines.contains(pp.expandedSlice(first))) .one else .zero;
-            return it.i;
-        },
-    }
-    const second = it.nextNoWS() orelse {
-        try pp.err(eof, .macro_name_missing);
-        return it.i;
-    };
-    if (!second.id.isMacroIdentifier()) {
-        try pp.comp.addDiagnostic(.{
-            .tag = .macro_name_must_be_identifier,
-            .loc = second.loc,
-        }, second.expansionSlice());
-        return it.i;
-    }
-    macro_tok.id = if (pp.defines.contains(pp.expandedSlice(second))) .one else .zero;
+/// Preprocess a compilation unit of sources into a parsable list of tokens.
+pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void {
+    assert(sources.len > 1);
+    const first = sources[0];
 
-    const last = it.nextNoWS();
-    if (last == null or last.?.id != .r_paren) {
-        const tok = last orelse tokFromRaw(eof);
-        try pp.comp.addDiagnostic(.{
-            .tag = .closing_paren,
-            .loc = tok.loc,
-        }, tok.expansionSlice());
-        try pp.comp.addDiagnostic(.{
-            .tag = .to_match_paren,
-            .loc = first.loc,
-        }, first.expansionSlice());
+    for (sources[1..]) |header| {
+        _ = try pp.preprocess(header);
     }
-
-    return it.i;
+    const eof = try pp.preprocess(first);
+    try pp.addToken(eof);
 }
 
-/// Skip until #else #elif #endif, return last directive token id.
-/// Also skips nested #if ... #endifs.
-fn skip(
-    pp: *Preprocessor,
-    tokenizer: *Tokenizer,
-    cont: enum { until_else, until_endif, until_endif_seen_else },
-) Error!void {
-    var ifs_seen: u32 = 0;
-    var line_start = true;
-    while (tokenizer.index < tokenizer.buf.len) {
-        if (line_start) {
-            const saved_tokenizer = tokenizer.*;
-            const hash = tokenizer.nextNoWS();
-            if (hash.id == .nl) continue;
-            line_start = false;
-            if (hash.id != .hash) continue;
-            const directive = tokenizer.nextNoWS();
-            switch (directive.id) {
-                .keyword_else => {
-                    if (ifs_seen != 0) continue;
-                    if (cont == .until_endif_seen_else) {
-                        try pp.err(directive, .else_after_else);
-                        continue;
-                    }
-                    tokenizer.* = saved_tokenizer;
-                    return;
-                },
-                .keyword_elif => {
-                    if (ifs_seen != 0 or cont == .until_endif) continue;
-                    if (cont == .until_endif_seen_else) {
-                        try pp.err(directive, .elif_after_else);
-                        continue;
-                    }
-                    tokenizer.* = saved_tokenizer;
-                    return;
-                },
-                .keyword_elifdef => {
-                    if (ifs_seen != 0 or cont == .until_endif) continue;
-                    if (cont == .until_endif_seen_else) {
-                        try pp.err(directive, .elifdef_after_else);
-                        continue;
-                    }
-                    tokenizer.* = saved_tokenizer;
-                    return;
-                },
-                .keyword_elifndef => {
-                    if (ifs_seen != 0 or cont == .until_endif) continue;
-                    if (cont == .until_endif_seen_else) {
-                        try pp.err(directive, .elifndef_after_else);
-                        continue;
-                    }
-                    tokenizer.* = saved_tokenizer;
-                    return;
-                },
-                .keyword_endif => {
-                    if (ifs_seen == 0) {
-                        tokenizer.* = saved_tokenizer;
-                        return;
-                    }
-                    ifs_seen -= 1;
-                },
-                .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1,
-                else => {},
-            }
-        } else if (tokenizer.buf[tokenizer.index] == '\n') {
-            line_start = true;
-            tokenizer.index += 1;
-            tokenizer.line += 1;
-            if (pp.preserve_whitespace) {
-                try pp.addToken(.{ .id = .nl, .loc = .{
-                    .id = tokenizer.source,
-                    .line = tokenizer.line,
-                } });
-            }
-        } else {
-            line_start = false;
-            tokenizer.index += 1;
-        }
+fn propagateSpace(pp: *Preprocessor, tokens: []PreprocessorToken, template: PreprocessorToken) void {
+    if (tokens.len > 0) {
+        tokens[0].flags = template.flags;
     } else {
-        const eof = tokenizer.next();
-        return pp.err(eof, .unterminated_conditional_directive);
+        pp.injectSpace();
     }
 }
 
-// Skip until newline, ignore other tokens.
-fn skipToNl(tokenizer: *Tokenizer) void {
-    while (true) {
-        const tok = tokenizer.next();
-        if (tok.id == .nl or tok.id == .eof) return;
-    }
+fn ungetAll(pp: *Preprocessor, tokens: []const PreprocessorToken) !void {
+    if (tokens.len == 0) return;
+    const start = pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items.len;
+    try pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].appendSlice(pp.gpa, tokens);
+    std.mem.reverse(PreprocessorToken, pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items[start..]);
 }
 
-const ExpandBuf = std.ArrayList(TokenWithExpansionLocs);
-fn removePlacemarkers(buf: *ExpandBuf) void {
-    var i: usize = buf.items.len -% 1;
-    while (i < buf.items.len) : (i -%= 1) {
-        if (buf.items[i].id == .placemarker) {
-            const placemarker = buf.orderedRemove(i);
-            TokenWithExpansionLocs.free(placemarker.expansion_locs, buf.allocator);
+fn addHideSet(pp: *Preprocessor, toks: []PreprocessorToken, hideset: Treap.Node) !void {
+    for (toks) |*tok| {
+        switch (tok.id) {
+            // non-identifiers are not expanded, so we don't need to track their hidesets.
+            // Track r_paren hideset since it is used for computing the hideset of function-like macro expansions
+            .identifier, .extended_identifier, .r_paren => {
+                tok.hideset = try pp.treap.@"union"(tok.hideset, hideset);
+            },
+            else => {},
         }
     }
 }
 
-const MacroArguments = std.ArrayList([]const TokenWithExpansionLocs);
-fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void {
-    for (args.items) |item| {
-        for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, allocator);
-        allocator.free(item);
+fn stringize(pp: *Preprocessor, tmpl: PreprocessorToken, args_range: MacroArg) !PreprocessorToken {
+    const start = pp.comp.generated_buf.items.len;
+    try pp.comp.generated_buf.append(pp.gpa, '"');
+    const args = args_range.slice(pp.macro_arg_tokens.items);
+    for (args, 0..) |tok, i| {
+        const slice = pp.tokSlice(tok);
+        if (slice.len > 0 and tok.flags.space and i != 0) {
+            try pp.comp.generated_buf.append(pp.gpa, ' ');
+        }
+        try pp.comp.generated_buf.appendSlice(pp.gpa, slice);
     }
-    args.deinit();
+    try pp.comp.generated_buf.append(pp.gpa, '"');
+    var tok = tmpl;
+    tok.id = .string_literal;
+    tok.loc = .{
+        .id = .generated,
+        .byte_offset = @intCast(start),
+        .line = pp.generated_line,
+    };
+    pp.generated_line += 1;
+    return tok;
 }
 
-fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf {
-    var buf = ExpandBuf.init(pp.gpa);
-    errdefer buf.deinit();
-    if (simple_macro.tokens.len == 0) {
-        try buf.append(.{ .id = .placemarker, .loc = .{ .id = .generated } });
-        return buf;
-    }
-    try buf.ensureTotalCapacity(simple_macro.tokens.len);
-
-    // Add all of the simple_macros tokens to the new buffer handling any concats.
+fn subst(pp: *Preprocessor, macro: *const Macro, macro_tok: PreprocessorToken, args: MacroArgList, hideset_arg: Treap.Node) ![]PreprocessorToken {
+    _ = macro_tok;
+    var hideset = hideset_arg;
+    var r: TokenList = .{};
+    defer r.deinit(pp.gpa);
     var i: usize = 0;
-    while (i < simple_macro.tokens.len) : (i += 1) {
-        const raw = simple_macro.tokens[i];
-        const tok = tokFromRaw(raw);
-        switch (raw.id) {
-            .hash_hash => {
-                var rhs = tokFromRaw(simple_macro.tokens[i + 1]);
-                i += 1;
-                while (true) {
-                    if (rhs.id == .whitespace) {
-                        rhs = tokFromRaw(simple_macro.tokens[i + 1]);
-                        i += 1;
-                    } else if (rhs.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
-                        rhs = tokFromRaw(simple_macro.tokens[i + 1]);
-                        i += 1;
-                    } else break;
+    while (i < macro.tokens.len) : (i += 1) {
+        const t0 = macro.tokens[i];
+        const t1: ?PreprocessorToken = if (i == macro.tokens.len - 1) null else macro.tokens[i + 1];
+
+        const t0_param = t0.id == .macro_param;
+        const t1_param = if (t1) |tok| tok.id == .macro_param else false;
+
+        if (t0.id == .hash and t1_param) {
+            const arg = args.slice(pp.macro_args.items)[t1.?.argPosition()];
+            const stringized = try pp.stringize(t0, arg);
+            try r.append(pp.gpa, stringized);
+            i += 1;
+            continue;
+        }
+        if (t0.id == .hash_hash and t1_param) {
+            const arg = args.slice(pp.macro_args.items)[t1.?.argPosition()];
+            if (t1.?.isVarArg() and r.items.len > 0 and r.items[r.items.len - 1].id == .comma) {
+                if (arg.len() == 0) {
+                    _ = r.pop();
+                } else {
+                    try r.appendSlice(pp.gpa, arg.slice(pp.macro_arg_tokens.items));
                 }
-                try pp.pasteTokens(&buf, &.{rhs});
-            },
-            .whitespace => if (pp.preserve_whitespace) buf.appendAssumeCapacity(tok),
-            .macro_file => {
-                const start = pp.comp.generated_buf.items.len;
-                const source = pp.comp.getSource(pp.expansion_source_loc.id);
-                const w = pp.comp.generated_buf.writer(pp.gpa);
-                try w.print("\"{s}\"\n", .{source.path});
-
-                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
-            },
-            .macro_line => {
-                const start = pp.comp.generated_buf.items.len;
-                const source = pp.comp.getSource(pp.expansion_source_loc.id);
-                const w = pp.comp.generated_buf.writer(pp.gpa);
-                try w.print("{d}\n", .{source.physicalLine(pp.expansion_source_loc)});
-
-                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
-            },
-            .macro_counter => {
-                defer pp.counter += 1;
-                const start = pp.comp.generated_buf.items.len;
-                const w = pp.comp.generated_buf.writer(pp.gpa);
-                try w.print("{d}\n", .{pp.counter});
-
-                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
-            },
-            else => buf.appendAssumeCapacity(tok),
+            } else if (arg.len() > 0) {
+                try pp.pasteAndPush(&r, arg.slice(pp.macro_arg_tokens.items)[0]);
+                try r.appendSlice(pp.gpa, arg.slice(pp.macro_arg_tokens.items)[1..]);
+            }
+            i += 1;
+            continue;
         }
+        if (t0.id == .hash_hash and t1 != null) {
+            hideset = t1.?.hideset;
+            try pp.pasteAndPush(&r, t1.?);
+            i += 1;
+            continue;
+        }
+        if (t0_param and t1 != null and t1.?.id == .hash_hash) {
+            hideset = t1.?.hideset;
+            const arg = args.slice(pp.macro_args.items)[t0.argPosition()];
+            if (arg.len() == 0) {
+                i += 1;
+            } else {
+                try r.appendSlice(pp.gpa, arg.slice(pp.macro_arg_tokens.items));
+            }
+            continue;
+        }
+        if (t0_param) {
+            const arg = args.slice(pp.macro_args.items)[t0.argPosition()];
+            const expanded = try pp.expandAll(arg.slice(pp.macro_arg_tokens.items), t0);
+            defer pp.gpa.free(expanded);
+            try r.appendSlice(pp.gpa, expanded);
+            continue;
+        }
+        try r.append(pp.gpa, t0);
     }
-
-    return buf;
-}
-
-/// Join a possibly-parenthesized series of string literal tokens into a single string without
-/// leading or trailing quotes. The returned slice is invalidated if pp.char_buf changes.
-/// Returns error.ExpectedStringLiteral if parentheses are not balanced, a non-string-literal
-/// is encountered, or if no string literals are encountered
-/// TODO: destringize (replace all '\\' with a single `\` and all '\"' with a '"')
-fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const TokenWithExpansionLocs) ![]const u8 {
-    const char_top = pp.char_buf.items.len;
-    defer pp.char_buf.items.len = char_top;
-    var unwrapped = toks;
-    if (toks.len >= 2 and toks[0].id == .l_paren and toks[toks.len - 1].id == .r_paren) {
-        unwrapped = toks[1 .. toks.len - 1];
-    }
-    if (unwrapped.len == 0) return error.ExpectedStringLiteral;
-
-    for (unwrapped) |tok| {
-        if (tok.id == .macro_ws) continue;
-        if (tok.id != .string_literal) return error.ExpectedStringLiteral;
-        const str = pp.expandedSlice(tok);
-        try pp.char_buf.appendSlice(str[1 .. str.len - 1]);
-    }
-    return pp.char_buf.items[char_top..];
+    try pp.addHideSet(r.items, hideset);
+    return r.toOwnedSlice(pp.gpa);
 }
 
-/// Handle the _Pragma operator (implemented as a builtin macro)
-fn pragmaOperator(pp: *Preprocessor, arg_tok: TokenWithExpansionLocs, operator_loc: Source.Location) !void {
-    const arg_slice = pp.expandedSlice(arg_tok);
-    const content = arg_slice[1 .. arg_slice.len - 1];
-    const directive = "#pragma ";
+fn pasteTokens(pp: *Preprocessor, lhs: PreprocessorToken, rhs: PreprocessorToken) !PreprocessorToken {
+    const start = pp.comp.generated_buf.items.len;
+    const end = start + pp.tokSlice(lhs).len + pp.tokSlice(rhs).len;
+    try pp.comp.generated_buf.ensureTotalCapacity(pp.gpa, end + 1); // +1 for a newline
 
-    pp.char_buf.clearRetainingCapacity();
-    const total_len = directive.len + content.len + 1; // destringify can never grow the string, + 1 for newline
-    try pp.char_buf.ensureUnusedCapacity(total_len);
-    pp.char_buf.appendSliceAssumeCapacity(directive);
-    pp.destringify(content);
-    pp.char_buf.appendAssumeCapacity('\n');
+    // We cannot use the same slices here since they might be invalidated by `ensureCapacity`
+    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.tokSlice(lhs));
+    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.tokSlice(rhs));
+    pp.comp.generated_buf.appendAssumeCapacity('\n');
 
-    const start = pp.comp.generated_buf.items.len;
-    try pp.comp.generated_buf.appendSlice(pp.gpa, pp.char_buf.items);
+    // Try to tokenize the result.
     var tmp_tokenizer = Tokenizer{
         .buf = pp.comp.generated_buf.items,
         .langopts = pp.comp.langopts,
         .index = @intCast(start),
         .source = .generated,
-        .line = pp.generated_line,
     };
-    pp.generated_line += 1;
-    const hash_tok = tmp_tokenizer.next();
-    assert(hash_tok.id == .hash);
-    const pragma_tok = tmp_tokenizer.next();
-    assert(pragma_tok.id == .keyword_pragma);
-    try pp.pragma(&tmp_tokenizer, pragma_tok, operator_loc, arg_tok.expansionSlice());
-}
-
-/// Inverts the output of the preprocessor stringify (#) operation
-/// (except all whitespace is condensed to a single space)
-/// writes output to pp.char_buf; assumes capacity is sufficient
-/// backslash backslash -> backslash
-/// backslash doublequote -> doublequote
-/// All other characters remain the same
-fn destringify(pp: *Preprocessor, str: []const u8) void {
-    var state: enum { start, backslash_seen } = .start;
-    for (str) |c| {
-        switch (c) {
-            '\\' => {
-                if (state == .backslash_seen) pp.char_buf.appendAssumeCapacity(c);
-                state = if (state == .start) .backslash_seen else .start;
-            },
-            else => {
-                if (state == .backslash_seen and c != '"') pp.char_buf.appendAssumeCapacity('\\');
-                pp.char_buf.appendAssumeCapacity(c);
-                state = .start;
-            },
-        }
+    const pasted_token = tmp_tokenizer.nextNoWSComments();
+    const next_tok = tmp_tokenizer.next();
+    if (next_tok.id != .nl) {
+        try pp.errStr(
+            lhs,
+            .pasting_formed_invalid,
+            try pp.comp.diagnostics.arena.allocator().dupe(u8, pp.comp.generated_buf.items[start..end]),
+        );
     }
+    return pp.makeGeneratedToken(start, pasted_token.id, lhs);
 }
 
-/// Stringify `tokens` into pp.char_buf.
-/// See https://gcc.gnu.org/onlinedocs/gcc-11.2.0/cpp/Stringizing.html#Stringizing
-fn stringify(pp: *Preprocessor, tokens: []const TokenWithExpansionLocs) !void {
-    try pp.char_buf.append('"');
-    var ws_state: enum { start, need, not_needed } = .start;
-    for (tokens) |tok| {
-        if (tok.id == .macro_ws) {
-            if (ws_state == .start) continue;
-            ws_state = .need;
-            continue;
-        }
-        if (ws_state == .need) try pp.char_buf.append(' ');
-        ws_state = .not_needed;
-
-        // backslashes not inside strings are not escaped
-        const is_str = switch (tok.id) {
-            .string_literal,
-            .string_literal_utf_16,
-            .string_literal_utf_8,
-            .string_literal_utf_32,
-            .string_literal_wide,
-            .char_literal,
-            .char_literal_utf_16,
-            .char_literal_utf_32,
-            .char_literal_wide,
-            => true,
-            else => false,
-        };
-
-        for (pp.expandedSlice(tok)) |c| {
-            if (c == '"')
-                try pp.char_buf.appendSlice("\\\"")
-            else if (c == '\\' and is_str)
-                try pp.char_buf.appendSlice("\\\\")
-            else
-                try pp.char_buf.append(c);
-        }
-    }
-    try pp.char_buf.ensureUnusedCapacity(2);
-    if (pp.char_buf.items[pp.char_buf.items.len - 1] != '\\') {
-        pp.char_buf.appendSliceAssumeCapacity("\"\n");
-        return;
-    }
-    pp.char_buf.appendAssumeCapacity('"');
-    var tokenizer: Tokenizer = .{
-        .buf = pp.char_buf.items,
-        .index = 0,
-        .source = .generated,
-        .langopts = pp.comp.langopts,
-        .line = 0,
-    };
-    const item = tokenizer.next();
-    if (item.id == .unterminated_string_literal) {
-        const tok = tokens[tokens.len - 1];
-        try pp.comp.addDiagnostic(.{
-            .tag = .invalid_pp_stringify_escape,
-            .loc = tok.loc,
-        }, tok.expansionSlice());
-        pp.char_buf.items.len -= 2; // erase unpaired backslash and appended end quote
-        pp.char_buf.appendAssumeCapacity('"');
-    }
-    pp.char_buf.appendAssumeCapacity('\n');
+/// Paste `tok` onto the last token in `tokens`
+fn pasteAndPush(pp: *Preprocessor, tokens: *TokenList, tok: PreprocessorToken) !void {
+    const last = tokens.pop();
+    const pasted = try pp.pasteTokens(last, tok);
+    return tokens.append(pp.gpa, pasted);
 }
 
-fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const TokenWithExpansionLocs, embed_args: ?*[]const TokenWithExpansionLocs, first: TokenWithExpansionLocs) !?[]const u8 {
-    if (param_toks.len == 0) {
-        try pp.comp.addDiagnostic(.{
-            .tag = .expected_filename,
-            .loc = first.loc,
-        }, first.expansionSlice());
-        return null;
-    }
-
-    const char_top = pp.char_buf.items.len;
-    defer pp.char_buf.items.len = char_top;
-
-    // Trim leading/trailing whitespace
-    var begin: usize = 0;
-    var end: usize = param_toks.len;
-    while (begin < end and param_toks[begin].id == .macro_ws) : (begin += 1) {}
-    while (end > begin and param_toks[end - 1].id == .macro_ws) : (end -= 1) {}
-    const params = param_toks[begin..end];
-
-    if (params.len == 0) {
-        try pp.comp.addDiagnostic(.{
-            .tag = .expected_filename,
-            .loc = first.loc,
-        }, first.expansionSlice());
-        return null;
-    }
-    // no string pasting
-    if (embed_args == null and params[0].id == .string_literal and params.len > 1) {
-        try pp.comp.addDiagnostic(.{
-            .tag = .closing_paren,
-            .loc = params[1].loc,
-        }, params[1].expansionSlice());
-        return null;
-    }
-
-    for (params, 0..) |tok, i| {
-        const str = pp.expandedSliceExtra(tok, .preserve_macro_ws);
-        try pp.char_buf.appendSlice(str);
-        if (embed_args) |some| {
-            if ((i == 0 and tok.id == .string_literal) or tok.id == .angle_bracket_right) {
-                some.* = params[i + 1 ..];
-                break;
-            }
-        }
-    }
-
-    const include_str = pp.char_buf.items[char_top..];
-    if (include_str.len < 3) {
-        if (include_str.len == 0) {
-            try pp.comp.addDiagnostic(.{
-                .tag = .expected_filename,
-                .loc = first.loc,
-            }, first.expansionSlice());
-            return null;
-        }
-        try pp.comp.addDiagnostic(.{
-            .tag = .empty_filename,
-            .loc = params[0].loc,
-        }, params[0].expansionSlice());
-        return null;
-    }
-
-    switch (include_str[0]) {
-        '<' => {
-            if (include_str[include_str.len - 1] != '>') {
-                // Ugly hack to find out where the '>' should go, since we don't have the closing ')' location
-                const start = params[0].loc;
-                try pp.comp.addDiagnostic(.{
-                    .tag = .header_str_closing,
-                    .loc = .{ .id = start.id, .byte_offset = start.byte_offset + @as(u32, @intCast(include_str.len)) + 1, .line = start.line },
-                }, params[0].expansionSlice());
-                try pp.comp.addDiagnostic(.{
-                    .tag = .header_str_match,
-                    .loc = params[0].loc,
-                }, params[0].expansionSlice());
-                return null;
-            }
-            return include_str;
-        },
-        '"' => return include_str,
-        else => {
-            try pp.comp.addDiagnostic(.{
-                .tag = .expected_filename,
-                .loc = params[0].loc,
-            }, params[0].expansionSlice());
-            return null;
-        },
-    }
+fn tokenBufferStashReverse(pp: *Preprocessor, tokens: []const PreprocessorToken) !void {
+    try pp.expansion_bufs.append(pp.gpa, .{});
+    try pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].appendSlice(pp.gpa, tokens);
+    std.mem.reverse(PreprocessorToken, pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items);
 }
 
-fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const TokenWithExpansionLocs, src_loc: Source.Location) Error!bool {
-    switch (builtin) {
-        .macro_param_has_attribute,
-        .macro_param_has_declspec_attribute,
-        .macro_param_has_feature,
-        .macro_param_has_extension,
-        .macro_param_has_builtin,
-        => {
-            var invalid: ?TokenWithExpansionLocs = null;
-            var identifier: ?TokenWithExpansionLocs = null;
-            for (param_toks) |tok| {
-                if (tok.id == .macro_ws) continue;
-                if (tok.id == .comment) continue;
-                if (!tok.id.isMacroIdentifier()) {
-                    invalid = tok;
-                    break;
-                }
-                if (identifier) |_| invalid = tok else identifier = tok;
-            }
-            if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
-            if (invalid) |some| {
-                try pp.comp.addDiagnostic(
-                    .{ .tag = .feature_check_requires_identifier, .loc = some.loc },
-                    some.expansionSlice(),
-                );
-                return false;
-            }
-
-            const ident_str = pp.expandedSlice(identifier.?);
-            return switch (builtin) {
-                .macro_param_has_attribute => Attribute.fromString(.gnu, null, ident_str) != null,
-                .macro_param_has_declspec_attribute => {
-                    return if (pp.comp.langopts.declspec_attrs)
-                        Attribute.fromString(.declspec, null, ident_str) != null
-                    else
-                        false;
-                },
-                .macro_param_has_feature => features.hasFeature(pp.comp, ident_str),
-                .macro_param_has_extension => features.hasExtension(pp.comp, ident_str),
-                .macro_param_has_builtin => pp.comp.hasBuiltin(ident_str),
-                else => unreachable,
-            };
-        },
-        .macro_param_has_warning => {
-            const actual_param = pp.pasteStringsUnsafe(param_toks) catch |er| switch (er) {
-                error.ExpectedStringLiteral => {
-                    try pp.errStr(param_toks[0], .expected_str_literal_in, "__has_warning");
-                    return false;
-                },
-                else => |e| return e,
-            };
-            if (!mem.startsWith(u8, actual_param, "-W")) {
-                try pp.errStr(param_toks[0], .malformed_warning_check, "__has_warning");
-                return false;
-            }
-            const warning_name = actual_param[2..];
-            return Diagnostics.warningExists(warning_name);
-        },
-        .macro_param_is_identifier => {
-            var invalid: ?TokenWithExpansionLocs = null;
-            var identifier: ?TokenWithExpansionLocs = null;
-            for (param_toks) |tok| switch (tok.id) {
-                .macro_ws => continue,
-                .comment => continue,
-                else => {
-                    if (identifier) |_| invalid = tok else identifier = tok;
-                },
-            };
-            if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
-            if (invalid) |some| {
-                try pp.comp.addDiagnostic(.{
-                    .tag = .missing_tok_builtin,
-                    .loc = some.loc,
-                    .extra = .{ .tok_id_expected = .r_paren },
-                }, some.expansionSlice());
-                return false;
-            }
+fn tokenBufferUnstash(pp: *Preprocessor) void {
+    var buf = pp.expansion_bufs.pop();
+    buf.deinit(pp.gpa);
+}
 
-            const id = identifier.?.id;
-            return id == .identifier or id == .extended_identifier;
-        },
-        .macro_param_has_include, .macro_param_has_include_next => {
-            const include_str = (try pp.reconstructIncludeString(param_toks, null, param_toks[0])) orelse return false;
-            const include_type: Compilation.IncludeType = switch (include_str[0]) {
-                '"' => .quotes,
-                '<' => .angle_brackets,
-                else => unreachable,
-            };
-            const filename = include_str[1 .. include_str.len - 1];
-            if (builtin == .macro_param_has_include or pp.include_depth == 0) {
-                if (builtin == .macro_param_has_include_next) {
-                    try pp.comp.addDiagnostic(.{
-                        .tag = .include_next_outside_header,
-                        .loc = src_loc,
-                    }, &.{});
-                }
-                return pp.comp.hasInclude(filename, src_loc.id, include_type, .first);
-            }
-            return pp.comp.hasInclude(filename, src_loc.id, include_type, .next);
-        },
-        else => unreachable,
+fn expandAll(pp: *Preprocessor, tokens: []const PreprocessorToken, tmpl: PreprocessorToken) ![]const PreprocessorToken {
+    try pp.tokenBufferStashReverse(tokens);
+    defer pp.tokenBufferUnstash();
+    var r: TokenList = .{};
+    defer r.deinit(pp.gpa);
+    while (true) {
+        const tok = try pp.readExpand();
+        if (tok.id == .eof) break;
+        try r.append(pp.gpa, tok);
     }
+    pp.propagateSpace(r.items, tmpl);
+    return r.toOwnedSlice(pp.gpa);
+}
+
+fn peekToken(pp: *Preprocessor) !PreprocessorToken {
+    const tok = try pp.readToken();
+    try pp.ungetToken(tok);
+    return tok;
 }
 
-/// Treat whitespace-only paste arguments as empty
-fn getPasteArgs(args: []const TokenWithExpansionLocs) []const TokenWithExpansionLocs {
-    for (args) |tok| {
-        if (tok.id != .macro_ws) return args;
+/// Return a string with the same contents as `name` and whose lifetime is the same as the preprocessor's lifetime
+/// If `tok` is not from the generated source, this is just `name`.
+/// If `tok` is from the generated source, pointers are invalidated when the underlying ArrayList is resized. Therefore,
+/// duplicate the string and store it (so we aren't repeatedly copying the same string)
+fn getSafeString(pp: *Preprocessor, tok: PreprocessorToken, name: []const u8) ![]const u8 {
+    if (tok.loc.id != .generated) return name;
+    const gop = try pp.safe_strings.getOrPut(pp.gpa, name);
+    if (!gop.found_existing) {
+        const copy = try pp.arena.allocator().dupe(u8, name);
+        gop.key_ptr.* = copy;
     }
-    return &[1]TokenWithExpansionLocs{.{
-        .id = .placemarker,
-        .loc = .{ .id = .generated, .byte_offset = 0, .line = 0 },
-    }};
+    return gop.key_ptr.*;
 }
 
-fn expandFuncMacro(
-    pp: *Preprocessor,
-    macro_tok: TokenWithExpansionLocs,
-    func_macro: *const Macro,
-    args: *const MacroArguments,
-    expanded_args: *const MacroArguments,
-    hideset_arg: Hideset.Index,
-) MacroError!ExpandBuf {
-    var hideset = hideset_arg;
-    var buf = ExpandBuf.init(pp.gpa);
-    try buf.ensureTotalCapacity(func_macro.tokens.len);
-    errdefer buf.deinit();
-
-    var expanded_variable_arguments = ExpandBuf.init(pp.gpa);
-    defer expanded_variable_arguments.deinit();
-    var variable_arguments = ExpandBuf.init(pp.gpa);
-    defer variable_arguments.deinit();
-
-    if (func_macro.var_args) {
-        var i: usize = func_macro.params.len;
-        while (i < expanded_args.items.len) : (i += 1) {
-            try variable_arguments.appendSlice(args.items[i]);
-            try expanded_variable_arguments.appendSlice(expanded_args.items[i]);
-            if (i != expanded_args.items.len - 1) {
-                const comma = TokenWithExpansionLocs{ .id = .comma, .loc = .{ .id = .generated } };
-                try variable_arguments.append(comma);
-                try expanded_variable_arguments.append(comma);
-            }
+fn injectSpace(pp: *Preprocessor) void {
+    var i = pp.expansion_bufs.items.len;
+    while (i > 0) : (i -= 1) {
+        var j = pp.expansion_bufs.items[i - 1].items.len;
+        while (j > 0) : (j -= 1) {
+            pp.expansion_bufs.items[i - 1].items[j - 1].flags.space = true;
+            return;
         }
     }
+}
 
-    // token concatenation and expansion phase
-    var tok_i: usize = 0;
-    while (tok_i < func_macro.tokens.len) : (tok_i += 1) {
-        const raw = func_macro.tokens[tok_i];
-        switch (raw.id) {
-            .hash_hash => while (tok_i + 1 < func_macro.tokens.len) {
-                const raw_next = func_macro.tokens[tok_i + 1];
-                tok_i += 1;
-
-                var va_opt_buf = ExpandBuf.init(pp.gpa);
-                defer va_opt_buf.deinit();
-
-                const next = switch (raw_next.id) {
-                    .macro_ws => continue,
-                    .hash_hash => continue,
-                    .comment => if (!pp.comp.langopts.preserve_comments_in_macros)
-                        continue
-                    else
-                        &[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
-                    .macro_param, .macro_param_no_expand => getPasteArgs(args.items[raw_next.end]),
-                    .keyword_va_args => variable_arguments.items,
-                    .keyword_va_opt => blk: {
-                        try pp.expandVaOpt(&va_opt_buf, raw_next, variable_arguments.items.len != 0);
-                        if (va_opt_buf.items.len == 0) break;
-                        break :blk va_opt_buf.items;
-                    },
-                    else => &[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
-                };
-                try pp.pasteTokens(&buf, next);
-                if (next.len != 0) break;
-            },
-            .macro_param_no_expand => {
-                if (tok_i + 1 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
-                    hideset = pp.hideset.get(tokFromRaw(func_macro.tokens[tok_i + 1]).loc);
-                }
-                const slice = getPasteArgs(args.items[raw.end]);
-                const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
-                try bufCopyTokens(&buf, slice, &.{raw_loc});
-            },
-            .macro_param => {
-                if (tok_i + 1 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
-                    hideset = pp.hideset.get(tokFromRaw(func_macro.tokens[tok_i + 1]).loc);
-                }
-                const arg = expanded_args.items[raw.end];
-                const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
-                try bufCopyTokens(&buf, arg, &.{raw_loc});
-            },
-            .keyword_va_args => {
-                const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
-                try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
-            },
-            .keyword_va_opt => {
-                try pp.expandVaOpt(&buf, raw, variable_arguments.items.len != 0);
-            },
-            .stringify_param, .stringify_va_args => {
-                const arg = if (raw.id == .stringify_va_args)
-                    variable_arguments.items
-                else
-                    args.items[raw.end];
-
-                pp.char_buf.clearRetainingCapacity();
-                try pp.stringify(arg);
-
-                const start = pp.comp.generated_buf.items.len;
-                try pp.comp.generated_buf.appendSlice(pp.gpa, pp.char_buf.items);
+fn readExpandNewline(pp: *Preprocessor) Error!PreprocessorToken {
+    const tok = pp.getToken();
+    if (!tok.id.isMacroIdentifier()) return tok;
+    const name = pp.tokSlice(tok);
+    const macro = pp.defines.getPtr(name) orelse return tok;
 
-                try buf.append(try pp.makeGeneratedToken(start, .string_literal, tokFromRaw(raw)));
-            },
-            .macro_param_has_attribute,
-            .macro_param_has_declspec_attribute,
-            .macro_param_has_warning,
-            .macro_param_has_feature,
-            .macro_param_has_extension,
-            .macro_param_has_builtin,
-            .macro_param_has_include,
-            .macro_param_has_include_next,
-            .macro_param_is_identifier,
-            => {
-                const arg = expanded_args.items[0];
-                const result = if (arg.len == 0) blk: {
-                    const extra = Diagnostics.Message.Extra{ .arguments = .{ .expected = 1, .actual = 0 } };
-                    try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = macro_tok.loc, .extra = extra }, &.{});
-                    break :blk false;
-                } else try pp.handleBuiltinMacro(raw.id, arg, macro_tok.loc);
-                const start = pp.comp.generated_buf.items.len;
-                const w = pp.comp.generated_buf.writer(pp.gpa);
-                try w.print("{}\n", .{@intFromBool(result)});
-                try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
-            },
-            .macro_param_has_c_attribute => {
-                const arg = expanded_args.items[0];
-                const not_found = "0\n";
-                const result = if (arg.len == 0) blk: {
-                    const extra = Diagnostics.Message.Extra{ .arguments = .{ .expected = 1, .actual = 0 } };
-                    try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = macro_tok.loc, .extra = extra }, &.{});
-                    break :blk not_found;
-                } else res: {
-                    var invalid: ?TokenWithExpansionLocs = null;
-                    var vendor_ident: ?TokenWithExpansionLocs = null;
-                    var colon_colon: ?TokenWithExpansionLocs = null;
-                    var attr_ident: ?TokenWithExpansionLocs = null;
-                    for (arg) |tok| {
-                        if (tok.id == .macro_ws) continue;
-                        if (tok.id == .comment) continue;
-                        if (tok.id == .colon_colon) {
-                            if (colon_colon != null or attr_ident == null) {
-                                invalid = tok;
-                                break;
-                            }
-                            vendor_ident = attr_ident;
-                            attr_ident = null;
-                            colon_colon = tok;
-                            continue;
-                        }
-                        if (!tok.id.isMacroIdentifier()) {
-                            invalid = tok;
-                            break;
-                        }
-                        if (attr_ident) |_| {
-                            invalid = tok;
-                            break;
-                        } else attr_ident = tok;
-                    }
-                    if (vendor_ident != null and attr_ident == null) {
-                        invalid = vendor_ident;
-                    } else if (attr_ident == null and invalid == null) {
-                        invalid = .{ .id = .eof, .loc = macro_tok.loc };
-                    }
-                    if (invalid) |some| {
-                        try pp.comp.addDiagnostic(
-                            .{ .tag = .feature_check_requires_identifier, .loc = some.loc },
-                            some.expansionSlice(),
-                        );
-                        break :res not_found;
-                    }
-                    if (vendor_ident) |some| {
-                        const vendor_str = pp.expandedSlice(some);
-                        const attr_str = pp.expandedSlice(attr_ident.?);
-                        const exists = Attribute.fromString(.gnu, vendor_str, attr_str) != null;
-
-                        const start = pp.comp.generated_buf.items.len;
-                        try pp.comp.generated_buf.appendSlice(pp.gpa, if (exists) "1\n" else "0\n");
-                        try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
-                        continue;
-                    }
-                    if (!pp.comp.langopts.standard.atLeast(.c23)) break :res not_found;
-
-                    const attrs = std.StaticStringMap([]const u8).initComptime(.{
-                        .{ "deprecated", "201904L\n" },
-                        .{ "fallthrough", "201904L\n" },
-                        .{ "maybe_unused", "201904L\n" },
-                        .{ "nodiscard", "202003L\n" },
-                        .{ "noreturn", "202202L\n" },
-                        .{ "_Noreturn", "202202L\n" },
-                        .{ "unsequenced", "202207L\n" },
-                        .{ "reproducible", "202207L\n" },
-                    });
-
-                    const attr_str = Attribute.normalize(pp.expandedSlice(attr_ident.?));
-                    break :res attrs.get(attr_str) orelse not_found;
-                };
-                const start = pp.comp.generated_buf.items.len;
-                try pp.comp.generated_buf.appendSlice(pp.gpa, result);
-                try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
-            },
-            .macro_param_has_embed => {
-                const arg = expanded_args.items[0];
-                const not_found = "0\n";
-                const result = if (arg.len == 0) blk: {
-                    const extra = Diagnostics.Message.Extra{ .arguments = .{ .expected = 1, .actual = 0 } };
-                    try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = macro_tok.loc, .extra = extra }, &.{});
-                    break :blk not_found;
-                } else res: {
-                    var embed_args: []const TokenWithExpansionLocs = &.{};
-                    const include_str = (try pp.reconstructIncludeString(arg, &embed_args, arg[0])) orelse
-                        break :res not_found;
-
-                    var prev = tokFromRaw(raw);
-                    prev.id = .eof;
-                    var it: struct {
-                        i: u32 = 0,
-                        slice: []const TokenWithExpansionLocs,
-                        prev: TokenWithExpansionLocs,
-                        fn next(it: *@This()) TokenWithExpansionLocs {
-                            while (it.i < it.slice.len) switch (it.slice[it.i].id) {
-                                .macro_ws, .whitespace => it.i += 1,
-                                else => break,
-                            } else return it.prev;
-                            defer it.i += 1;
-                            it.prev = it.slice[it.i];
-                            it.prev.id = .eof;
-                            return it.slice[it.i];
-                        }
-                    } = .{ .slice = embed_args, .prev = prev };
-
-                    while (true) {
-                        const param_first = it.next();
-                        if (param_first.id == .eof) break;
-                        if (param_first.id != .identifier) {
-                            try pp.comp.addDiagnostic(
-                                .{ .tag = .malformed_embed_param, .loc = param_first.loc },
-                                param_first.expansionSlice(),
-                            );
-                            continue;
-                        }
+    const macro_hideset = tok.hideset;
+    if (pp.treap.contains(macro_hideset, name)) return tok;
 
-                        const char_top = pp.char_buf.items.len;
-                        defer pp.char_buf.items.len = char_top;
-
-                        const maybe_colon = it.next();
-                        const param = switch (maybe_colon.id) {
-                            .colon_colon => blk: {
-                                // vendor::param
-                                const param = it.next();
-                                if (param.id != .identifier) {
-                                    try pp.comp.addDiagnostic(
-                                        .{ .tag = .malformed_embed_param, .loc = param.loc },
-                                        param.expansionSlice(),
-                                    );
-                                    continue;
-                                }
-                                const l_paren = it.next();
-                                if (l_paren.id != .l_paren) {
-                                    try pp.comp.addDiagnostic(
-                                        .{ .tag = .malformed_embed_param, .loc = l_paren.loc },
-                                        l_paren.expansionSlice(),
-                                    );
-                                    continue;
-                                }
-                                break :blk "doesn't exist";
-                            },
-                            .l_paren => Attribute.normalize(pp.expandedSlice(param_first)),
-                            else => {
-                                try pp.comp.addDiagnostic(
-                                    .{ .tag = .malformed_embed_param, .loc = maybe_colon.loc },
-                                    maybe_colon.expansionSlice(),
-                                );
-                                continue;
-                            },
-                        };
-
-                        var arg_count: u32 = 0;
-                        var first_arg: TokenWithExpansionLocs = undefined;
-                        while (true) {
-                            const next = it.next();
-                            if (next.id == .eof) {
-                                try pp.comp.addDiagnostic(
-                                    .{ .tag = .malformed_embed_limit, .loc = param_first.loc },
-                                    param_first.expansionSlice(),
-                                );
-                                break;
-                            }
-                            if (next.id == .r_paren) break;
-                            arg_count += 1;
-                            if (arg_count == 1) first_arg = next;
-                        }
+    switch (macro.kind) {
+        .object => {
+            const safe_name = try pp.getSafeString(tok, name);
+            const new_hideset = try pp.treap.addNodeTo(tok.hideset, safe_name);
 
-                        if (std.mem.eql(u8, param, "limit")) {
-                            if (arg_count != 1) {
-                                try pp.comp.addDiagnostic(
-                                    .{ .tag = .malformed_embed_limit, .loc = param_first.loc },
-                                    param_first.expansionSlice(),
-                                );
-                                continue;
-                            }
-                            if (first_arg.id != .pp_num) {
-                                try pp.comp.addDiagnostic(
-                                    .{ .tag = .malformed_embed_limit, .loc = param_first.loc },
-                                    param_first.expansionSlice(),
-                                );
-                                continue;
-                            }
-                            _ = std.fmt.parseInt(u32, pp.expandedSlice(first_arg), 10) catch {
-                                break :res not_found;
-                            };
-                        } else if (!std.mem.eql(u8, param, "prefix") and !std.mem.eql(u8, param, "suffix") and
-                            !std.mem.eql(u8, param, "if_empty"))
-                        {
-                            break :res not_found;
-                        }
-                    }
+            const tokens = try pp.subst(macro, tok, MacroArgList.empty, new_hideset);
+            defer pp.gpa.free(tokens);
+            pp.propagateSpace(tokens, tok);
+            try pp.ungetAll(tokens);
+            return pp.readExpand();
+        },
+        .func => {
+            if (!try pp.next(.l_paren)) return tok;
+            const arg_tokens_start = pp.macro_arg_tokens.items.len;
+            defer pp.macro_arg_tokens.items.len = arg_tokens_start;
+            const macro_args_start = pp.macro_args.items.len;
+            defer pp.macro_args.items.len = macro_args_start;
+
+            const args = pp.readArgs(tok, macro) catch |err| switch (err) {
+                error.IncorrectArgumentCount => return PreprocessorToken.zero,
+                error.UnterminatedMacroArgumentList => {
+                    try pp.errTok(tok, .unterminated_macro_arg_list);
+                    return PreprocessorToken.zero;
+                },
+                else => |e| return e,
+            };
+            const r_paren = pp.getToken();
+            std.debug.assert(r_paren.id == .r_paren);
+            const safe_name = try pp.getSafeString(tok, name);
+
+            const intersection = try pp.treap.intersection(macro_hideset, r_paren.hideset);
+            const hideset = try pp.treap.addNodeTo(intersection, safe_name);
+            const tokens = try pp.subst(macro, tok, args, hideset);
+            defer pp.gpa.free(tokens);
+            pp.propagateSpace(tokens, tok);
+            try pp.ungetAll(tokens);
+            return pp.readExpand();
+        },
+        .special => |func| {
+            try func(pp, tok);
+            return pp.readExpand();
+        },
+    }
+}
 
-                    const include_type: Compilation.IncludeType = switch (include_str[0]) {
-                        '"' => .quotes,
-                        '<' => .angle_brackets,
-                        else => unreachable,
-                    };
-                    const filename = include_str[1 .. include_str.len - 1];
-                    const contents = (try pp.comp.findEmbed(filename, arg[0].loc.id, include_type, 1)) orelse
-                        break :res not_found;
-
-                    defer pp.comp.gpa.free(contents);
-                    break :res if (contents.len != 0) "1\n" else "2\n";
-                };
-                const start = pp.comp.generated_buf.items.len;
-                try pp.comp.generated_buf.appendSlice(pp.comp.gpa, result);
-                try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
-            },
-            .macro_param_pragma_operator => {
-                const param_toks = expanded_args.items[0];
-                // Clang and GCC require exactly one token (so, no parentheses or string pasting)
-                // even though their error messages indicate otherwise. Ours is slightly more
-                // descriptive.
-                var invalid: ?TokenWithExpansionLocs = null;
-                var string: ?TokenWithExpansionLocs = null;
-                for (param_toks) |tok| switch (tok.id) {
-                    .string_literal => {
-                        if (string) |_| invalid = tok else string = tok;
-                    },
-                    .macro_ws => continue,
-                    .comment => continue,
-                    else => {
-                        invalid = tok;
-                        break;
-                    },
-                };
-                if (string == null and invalid == null) invalid = .{ .loc = macro_tok.loc, .id = .eof };
-                if (invalid) |some| try pp.comp.addDiagnostic(
-                    .{ .tag = .pragma_operator_string_literal, .loc = some.loc },
-                    some.expansionSlice(),
-                ) else try pp.pragmaOperator(string.?, macro_tok.loc);
-            },
-            .comma => {
-                if (tok_i + 2 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
-                    const hash_hash = func_macro.tokens[tok_i + 1];
-                    var maybe_va_args = func_macro.tokens[tok_i + 2];
-                    var consumed: usize = 2;
-                    if (maybe_va_args.id == .macro_ws and tok_i + 3 < func_macro.tokens.len) {
-                        consumed = 3;
-                        maybe_va_args = func_macro.tokens[tok_i + 3];
-                    }
-                    if (maybe_va_args.id == .keyword_va_args) {
-                        // GNU extension: `, ##__VA_ARGS__` deletes the comma if __VA_ARGS__ is empty
-                        tok_i += consumed;
-                        if (func_macro.params.len == expanded_args.items.len) {
-                            // Empty __VA_ARGS__, drop the comma
-                            try pp.err(hash_hash, .comma_deletion_va_args);
-                        } else if (func_macro.params.len == 0 and expanded_args.items.len == 1 and expanded_args.items[0].len == 0) {
-                            // Ambiguous whether this is "empty __VA_ARGS__" or "__VA_ARGS__ omitted"
-                            if (pp.comp.langopts.standard.isGNU()) {
-                                // GNU standard, drop the comma
-                                try pp.err(hash_hash, .comma_deletion_va_args);
-                            } else {
-                                // C standard, retain the comma
-                                try buf.append(tokFromRaw(raw));
-                            }
-                        } else {
-                            try buf.append(tokFromRaw(raw));
-                            if (expanded_variable_arguments.items.len > 0 or variable_arguments.items.len == func_macro.params.len) {
-                                try pp.err(hash_hash, .comma_deletion_va_args);
-                            }
-                            const raw_loc = Source.Location{
-                                .id = maybe_va_args.source,
-                                .byte_offset = maybe_va_args.start,
-                                .line = maybe_va_args.line,
-                            };
-                            try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
-                        }
-                        continue;
-                    }
-                }
-                // Regular comma, no token pasting with __VA_ARGS__
-                try buf.append(tokFromRaw(raw));
-            },
-            else => try buf.append(tokFromRaw(raw)),
+fn readMacroArg(pp: *Preprocessor, end: *bool, readall: bool) !MacroArg {
+    var level: i32 = 0;
+    const start: u32 = @intCast(pp.macro_arg_tokens.items.len);
+    while (true) {
+        var tok = pp.getToken();
+        if (tok.id == .eof) {
+            return error.UnterminatedMacroArgumentList;
+        }
+        if (tok.id == .nl) continue;
+        if (tok.flags.is_bol and tok.id == .hash) {
+            try pp.readDirective();
+            continue;
+        }
+        if (level == 0 and tok.id == .r_paren) {
+            try pp.ungetToken(tok);
+            end.* = true;
+            break;
+        }
+        if (level == 0 and tok.id == .comma and !readall) {
+            break;
+        }
+        if (tok.id == .l_paren) {
+            level += 1;
+        }
+        if (tok.id == .r_paren) {
+            level -= 1;
+        }
+        if (tok.flags.is_bol) {
+            tok.flags = .{ .is_bol = false, .space = true };
         }
+        try pp.macro_arg_tokens.append(pp.gpa, tok);
     }
-    removePlacemarkers(&buf);
+    return .{ .start = start, .end = @intCast(pp.macro_arg_tokens.items.len) };
+}
 
-    const macro_expansion_locs = macro_tok.expansionSlice();
-    for (buf.items) |*tok| {
-        try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
-        try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
-        const tok_hidelist = pp.hideset.get(tok.loc);
-        const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hideset);
-        try pp.hideset.put(tok.loc, new_hidelist);
+fn doReadArgs(pp: *Preprocessor, macro: *const Macro) !MacroArgList {
+    const start: u32 = @intCast(pp.macro_args.items.len);
+    var end = false;
+    while (!end) {
+        const in_ellipsis = macro.var_args and (pp.macro_args.items.len - start) + 1 == macro.nargs;
+        const arg_range = try pp.readMacroArg(&end, in_ellipsis);
+        try pp.macro_args.append(pp.gpa, arg_range);
+    }
+    if (macro.var_args and (pp.macro_args.items.len - start) + 1 == macro.nargs) {
+        try pp.macro_args.append(pp.gpa, MacroArg.empty);
     }
+    return .{ .start = start, .end = @intCast(pp.macro_args.items.len) };
+}
 
-    return buf;
+fn readArgs(pp: *Preprocessor, ident: PreprocessorToken, macro: *const Macro) !MacroArgList {
+    if (macro.nargs == 0 and (try pp.peekToken()).id == .r_paren) {
+        return MacroArgList.empty;
+    }
+    const args = try pp.doReadArgs(macro);
+    if (args.len() != macro.nargs) {
+        const extra = Diagnostics.Message.Extra{
+            .arguments = .{ .expected = @intCast(macro.nargs), .actual = @intCast(args.len()) },
+        };
+        try pp.comp.addDiagnostic(
+            .{ .tag = .expected_arguments, .loc = ident.loc, .extra = extra },
+            &.{}, // TODO: expansion slice
+        );
+        return error.IncorrectArgumentCount;
+    }
+    return args;
 }
 
-fn expandVaOpt(
-    pp: *Preprocessor,
-    buf: *ExpandBuf,
-    raw: RawToken,
-    should_expand: bool,
-) !void {
-    if (!should_expand) return;
-
-    const source = pp.comp.getSource(raw.source);
-    var tokenizer: Tokenizer = .{
-        .buf = source.buf,
-        .index = raw.start,
-        .source = raw.source,
-        .langopts = pp.comp.langopts,
-        .line = raw.line,
-    };
-    while (tokenizer.index < raw.end) {
-        const tok = tokenizer.next();
-        try buf.append(tokFromRaw(tok));
+fn readExpand(pp: *Preprocessor) Error!PreprocessorToken {
+    while (true) {
+        const tok = try pp.readExpandNewline();
+        if (tok.id != .nl) return tok;
+    }
+}
+
+/// # number "file" flags
+/// TODO: validate that the pp_num token is solely digits
+/// if not, emit `GNU line marker directive requires a simple digit sequence`
+fn readLinemarker(pp: *Preprocessor) !void {
+    const name = pp.getToken();
+    if (name.id.isDirectiveEnd()) return;
+    if (name.id != .string_literal) try pp.errTok(name, .line_invalid_filename);
+
+    const flag_1 = pp.getToken();
+    if (flag_1.id.isDirectiveEnd()) return;
+    const flag_2 = pp.getToken();
+    if (flag_2.id.isDirectiveEnd()) return;
+    const flag_3 = pp.getToken();
+    if (flag_3.id.isDirectiveEnd()) return;
+    const flag_4 = pp.getToken();
+    if (flag_4.id.isDirectiveEnd()) return;
+    try pp.expectNewline();
+}
+
+fn readIdent(pp: *Preprocessor) !?PreprocessorToken {
+    const tok = pp.getToken();
+    if (!tok.id.isMacroIdentifier()) {
+        try pp.errTok(tok, .macro_name_must_be_identifier);
+        return null;
     }
+    return tok;
 }
 
-fn bufCopyTokens(buf: *ExpandBuf, tokens: []const TokenWithExpansionLocs, src: []const Source.Location) !void {
-    try buf.ensureUnusedCapacity(tokens.len);
-    for (tokens) |tok| {
-        var copy = try tok.dupe(buf.allocator);
-        errdefer TokenWithExpansionLocs.free(copy.expansion_locs, buf.allocator);
-        try copy.addExpansionLocation(buf.allocator, src);
-        buf.appendAssumeCapacity(copy);
+fn ungetToken(pp: *Preprocessor, tok: PreprocessorToken) !void {
+    if (tok.id == .eof) return;
+    if (pp.isBufferEmpty()) {
+        try pp.expansion_bufs.append(pp.gpa, .{});
     }
+    try pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].append(pp.gpa, tok);
 }
 
-fn nextBufToken(
-    pp: *Preprocessor,
-    tokenizer: *Tokenizer,
-    buf: *ExpandBuf,
-    start_idx: *usize,
-    end_idx: *usize,
-    extend_buf: bool,
-) Error!TokenWithExpansionLocs {
-    start_idx.* += 1;
-    if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) {
-        if (extend_buf) {
-            const raw_tok = tokenizer.next();
-            if (raw_tok.id.isMacroIdentifier() and
-                pp.poisoned_identifiers.get(pp.tokSlice(raw_tok)) != null)
-                try pp.err(raw_tok, .poisoned_identifier);
-
-            if (raw_tok.id == .nl) pp.add_expansion_nl += 1;
-
-            const new_tok = tokFromRaw(raw_tok);
-            end_idx.* += 1;
-            try buf.append(new_tok);
-            return new_tok;
-        } else {
-            return TokenWithExpansionLocs{ .id = .eof, .loc = .{ .id = .generated } };
-        }
-    } else {
-        return buf.items[start_idx.*];
+fn hashHashCheck(pp: *Preprocessor, toks: []const PreprocessorToken) !void {
+    if (toks.len == 0) return;
+    if (toks[0].id == .hash_hash) {
+        return pp.errTok(toks[0], .hash_hash_at_start);
+    }
+    if (toks[toks.len - 1].id == .hash_hash) {
+        return pp.errTok(toks[toks.len - 1], .hash_hash_at_end);
     }
 }
 
-fn collectMacroFuncArguments(
-    pp: *Preprocessor,
-    tokenizer: *Tokenizer,
-    buf: *ExpandBuf,
-    start_idx: *usize,
-    end_idx: *usize,
-    extend_buf: bool,
-    is_builtin: bool,
-    r_paren: *TokenWithExpansionLocs,
-) !MacroArguments {
-    const name_tok = buf.items[start_idx.*];
-    const saved_tokenizer = tokenizer.*;
-    const old_end = end_idx.*;
+fn readObjMacro(pp: *Preprocessor, name: PreprocessorToken) !void {
+    var body: TokenList = .{};
+    errdefer body.deinit(pp.gpa);
 
     while (true) {
-        const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) break;
         switch (tok.id) {
-            .nl, .whitespace, .macro_ws => {},
-            .l_paren => break,
-            else => {
-                if (is_builtin) {
-                    try pp.errStr(name_tok, .missing_lparen_after_builtin, pp.expandedSlice(name_tok));
-                }
-                // Not a macro function call, go over normal identifier, rewind
-                tokenizer.* = saved_tokenizer;
-                end_idx.* = old_end;
-                return error.MissingLParen;
-            },
+            .unterminated_comment => try pp.errTok(tok, .unterminated_comment),
+            else => try body.append(pp.gpa, tok),
         }
     }
+    try pp.hashHashCheck(body.items);
+    const macro: Macro = .{
+        .tokens = body.items,
+        .var_args = false,
+        .loc = name.loc,
+        .kind = .object,
+        .nargs = undefined,
+    };
+    try pp.defineMacro(name, macro);
+}
 
-    // collect the arguments.
-    var parens: u32 = 0;
-    var args = MacroArguments.init(pp.gpa);
-    errdefer deinitMacroArguments(pp.gpa, &args);
-    var curArgument = std.ArrayList(TokenWithExpansionLocs).init(pp.gpa);
-    defer curArgument.deinit();
-    while (true) {
-        var tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
-        tok.flags.is_macro_arg = true;
-        switch (tok.id) {
-            .comma => {
-                if (parens == 0) {
-                    const owned = try curArgument.toOwnedSlice();
-                    errdefer pp.gpa.free(owned);
-                    try args.append(owned);
-                } else {
-                    const duped = try tok.dupe(pp.gpa);
-                    errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
-                    try curArgument.append(duped);
-                }
-            },
-            .l_paren => {
-                const duped = try tok.dupe(pp.gpa);
-                errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
-                try curArgument.append(duped);
-                parens += 1;
-            },
-            .r_paren => {
-                if (parens == 0) {
-                    const owned = try curArgument.toOwnedSlice();
-                    errdefer pp.gpa.free(owned);
-                    try args.append(owned);
-                    r_paren.* = tok;
-                    break;
-                } else {
-                    const duped = try tok.dupe(pp.gpa);
-                    errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
-                    try curArgument.append(duped);
-                    parens -= 1;
-                }
-            },
-            .eof => {
-                {
-                    const owned = try curArgument.toOwnedSlice();
-                    errdefer pp.gpa.free(owned);
-                    try args.append(owned);
-                }
-                tokenizer.* = saved_tokenizer;
-                try pp.comp.addDiagnostic(
-                    .{ .tag = .unterminated_macro_arg_list, .loc = name_tok.loc },
-                    name_tok.expansionSlice(),
-                );
-                return error.Unterminated;
-            },
-            .nl, .whitespace => {
-                try curArgument.append(.{ .id = .macro_ws, .loc = tok.loc });
-            },
-            else => {
-                const duped = try tok.dupe(pp.gpa);
-                errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
-                try curArgument.append(duped);
-            },
+/// Defines a new macro and warns if it is a duplicate
+fn defineMacro(pp: *Preprocessor, name_tok: PreprocessorToken, macro: Macro) Error!void {
+    const name_str = pp.tokSlice(name_tok);
+    const gop = try pp.defines.getOrPut(pp.gpa, name_str);
+    if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
+        const tag: Diagnostics.Tag = if (gop.value_ptr.kind == .special) .builtin_macro_redefined else .macro_redefined;
+        const start = pp.comp.diagnostics.list.items.len;
+        try pp.comp.addDiagnostic(.{
+            .tag = tag,
+            .loc = name_tok.loc,
+            .extra = .{ .str = name_str },
+        }, &.{});
+        if (gop.value_ptr.kind != .special and pp.comp.diagnostics.list.items.len != start) {
+            try pp.comp.addDiagnostic(.{
+                .tag = .previous_definition,
+                .loc = gop.value_ptr.loc,
+            }, &.{});
         }
     }
-
-    return args;
+    gop.value_ptr.* = macro;
 }
 
-fn removeExpandedTokens(pp: *Preprocessor, buf: *ExpandBuf, start: usize, len: usize, moving_end_idx: *usize) !void {
-    for (buf.items[start .. start + len]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
-    try buf.replaceRange(start, len, &.{});
-    moving_end_idx.* -|= len;
+/// Get raw token source string.
+/// Returned slice is invalidated when comp.generated_buf is updated.
+pub fn tokSlice(pp: *Preprocessor, token: anytype) []const u8 {
+    if (token.id.lexeme()) |some| return some;
+    const source = pp.comp.getSource(token.loc.id);
+    var tmp_tokenizer = Tokenizer{
+        .buf = source.buf,
+        .langopts = pp.comp.langopts,
+        .index = token.loc.byte_offset,
+        .source = .generated,
+    };
+    const tok = tmp_tokenizer.next();
+    return tmp_tokenizer.buf[tok.start..tok.end];
 }
 
-/// The behavior of `defined` depends on whether we are in a preprocessor
-/// expression context (#if or #elif) or not.
-/// In a non-expression context it's just an identifier. Within a preprocessor
-/// expression it is a unary operator or one-argument function.
-const EvalContext = enum {
-    expr,
-    non_expr,
-};
-
-/// Helper for safely iterating over a slice of tokens while skipping whitespace
-const TokenIterator = struct {
-    toks: []const TokenWithExpansionLocs,
-    i: usize,
+fn expect(pp: *Preprocessor, expected: Tokenizer.Token.Id, tag: Diagnostics.Tag) !PreprocessorToken {
+    const tok = pp.getToken();
+    if (tok.id != expected) {
+        try pp.comp.addDiagnostic(.{
+            .tag = tag,
+            .loc = tok.loc,
+            .extra = .{ .none = {} },
+        }, &.{}); // todo expansion slice
+        try pp.errTok(tok, tag);
+    }
+    return tok;
+}
 
-    fn init(toks: []const TokenWithExpansionLocs) TokenIterator {
-        return .{ .toks = toks, .i = 0 };
+fn expectLParen(pp: *Preprocessor, tok: PreprocessorToken) !PreprocessorToken {
+    const l_paren = pp.getToken();
+    if (l_paren.id != .l_paren) {
+        try pp.comp.addDiagnostic(.{
+            .tag = .missing_lparen_after_builtin,
+            .loc = tok.loc,
+            .extra = .{ .str = pp.tokSlice(tok) },
+        }, &.{}); // todo expansion slice
     }
+    return l_paren;
+}
+
+fn makeMacroToken(position: usize, is_vararg: bool) PreprocessorToken {
+    return .{
+        .id = .macro_param,
+        .hideset = null,
+        .loc = .{
+            .id = .unused,
+            .byte_offset = @intCast(position),
+            .line = @intFromBool(is_vararg),
+        },
+    };
+}
 
-    fn nextNoWS(self: *TokenIterator) ?TokenWithExpansionLocs {
-        while (self.i < self.toks.len) : (self.i += 1) {
-            const tok = self.toks[self.i];
-            if (tok.id == .whitespace or tok.id == .macro_ws) continue;
+fn next(pp: *Preprocessor, id: Tokenizer.Token.Id) !bool {
+    const tok = pp.getToken();
+    if (tok.id == id) return true;
+    try pp.ungetToken(tok);
+    return false;
+}
 
-            self.i += 1;
-            return tok;
+/// Returns true for vararg function-like macro, false otherwise
+fn readFunclikeMacroParams(pp: *Preprocessor, name: PreprocessorToken, l_paren: PreprocessorToken, params: *ParamMap) !bool {
+    _ = name;
+    var pos: usize = 0;
+    while (true) {
+        var tok = pp.getToken();
+        switch (tok.id) {
+            .r_paren => return false,
+            .unterminated_comment => {
+                try pp.errTok(tok, .unterminated_comment);
+                return false;
+            },
+            else => {},
         }
-        return null;
-    }
-};
-
-fn expandMacroExhaustive(
-    pp: *Preprocessor,
-    tokenizer: *Tokenizer,
-    buf: *ExpandBuf,
-    start_idx: usize,
-    end_idx: usize,
-    extend_buf: bool,
-    eval_ctx: EvalContext,
-) MacroError!void {
-    var moving_end_idx = end_idx;
-    var advance_index: usize = 0;
-    // rescan loop
-    var do_rescan = true;
-    while (do_rescan) {
-        do_rescan = false;
-        // expansion loop
-        var idx: usize = start_idx + advance_index;
-        while (idx < moving_end_idx) {
-            const macro_tok = buf.items[idx];
-            if (macro_tok.id == .keyword_defined and eval_ctx == .expr) {
-                idx += 1;
-                var it = TokenIterator.init(buf.items[idx..moving_end_idx]);
-                if (it.nextNoWS()) |tok| {
-                    switch (tok.id) {
-                        .l_paren => {
-                            _ = it.nextNoWS(); // eat (what should be) identifier
-                            _ = it.nextNoWS(); // eat (what should be) r paren
-                        },
-                        .identifier, .extended_identifier => {},
-                        else => {},
-                    }
+        if (pos != 0) {
+            if (tok.id != .comma) {
+                switch (tok.id) {
+                    .nl, .eof => {},
+                    else => pp.skipToNl(),
                 }
-                idx += it.i;
-                continue;
+                try pp.errTok(tok, .expected_comma_param_list);
+                return error.InvalidMacroDef;
             }
-            if (!macro_tok.id.isMacroIdentifier() or macro_tok.flags.expansion_disabled) {
-                idx += 1;
-                continue;
+            tok = pp.getToken();
+        }
+        if (tok.id.isDirectiveEnd()) {
+            try pp.errTok(tok, .missing_paren_param_list);
+            return false;
+        }
+        if (tok.id == .ellipsis) {
+            try params.put(pp.gpa, "__VA_ARGS__", makeMacroToken(pos, true));
+            pos += 1;
+            const r_paren = pp.getToken();
+            if (r_paren.id != .r_paren) {
+                try pp.errTok(r_paren, .missing_paren_param_list);
+                try pp.errTok(l_paren, .to_match_paren);
+                return error.InvalidMacroDef;
             }
-            const expanded = pp.expandedSlice(macro_tok);
-            const macro = pp.defines.getPtr(expanded) orelse {
-                idx += 1;
-                continue;
-            };
-            const macro_hidelist = pp.hideset.get(macro_tok.loc);
-            if (pp.hideset.contains(macro_hidelist, expanded)) {
-                idx += 1;
-                continue;
+            return true;
+        }
+        if (!tok.id.isMacroIdentifier()) {
+            try pp.errTok(tok, .invalid_token_param_list);
+            return error.InvalidMacroDef;
+        }
+        const arg = pp.tokSlice(tok);
+        if (try pp.next(.ellipsis)) {
+            const r_paren = pp.getToken();
+            if (r_paren.id != .r_paren) {
+                try pp.errTok(r_paren, .missing_paren_param_list);
+                try pp.errTok(l_paren, .to_match_paren);
+                pp.skipToNl();
             }
+            try params.put(pp.gpa, arg, makeMacroToken(pos, true));
+            pos += 1;
+            return true;
+        }
+        try params.put(pp.gpa, arg, makeMacroToken(pos, false));
+        pos += 1;
+    }
+}
 
-            macro_handler: {
-                if (macro.is_func) {
-                    var r_paren: TokenWithExpansionLocs = undefined;
-                    var macro_scan_idx = idx;
-                    // to be saved in case this doesn't turn out to be a call
-                    const args = pp.collectMacroFuncArguments(
-                        tokenizer,
-                        buf,
-                        &macro_scan_idx,
-                        &moving_end_idx,
-                        extend_buf,
-                        macro.is_builtin,
-                        &r_paren,
-                    ) catch |er| switch (er) {
-                        error.MissingLParen => {
-                            if (!buf.items[idx].flags.is_macro_arg) buf.items[idx].flags.expansion_disabled = true;
-                            idx += 1;
-                            break :macro_handler;
-                        },
-                        error.Unterminated => {
-                            if (pp.comp.langopts.emulate == .gcc) idx += 1;
-                            try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx, &moving_end_idx);
-                            break :macro_handler;
-                        },
-                        else => |e| return e,
-                    };
-                    assert(r_paren.id == .r_paren);
-                    var free_arg_expansion_locs = false;
-                    defer {
-                        for (args.items) |item| {
-                            if (free_arg_expansion_locs) for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
-                            pp.gpa.free(item);
-                        }
-                        args.deinit();
-                    }
-                    const r_paren_hidelist = pp.hideset.get(r_paren.loc);
-                    var hs = try pp.hideset.intersection(macro_hidelist, r_paren_hidelist);
-                    hs = try pp.hideset.prepend(macro_tok.loc, hs);
-
-                    var args_count: u32 = @intCast(args.items.len);
-                    // if the macro has zero arguments g() args_count is still 1
-                    // an empty token list g() and a whitespace-only token list g(    )
-                    // counts as zero arguments for the purposes of argument-count validation
-                    if (args_count == 1 and macro.params.len == 0) {
-                        for (args.items[0]) |tok| {
-                            if (tok.id != .macro_ws) break;
-                        } else {
-                            args_count = 0;
-                        }
-                    }
-
-                    // Validate argument count.
-                    const extra = Diagnostics.Message.Extra{
-                        .arguments = .{ .expected = @intCast(macro.params.len), .actual = args_count },
-                    };
-                    if (macro.var_args and args_count < macro.params.len) {
-                        free_arg_expansion_locs = true;
-                        try pp.comp.addDiagnostic(
-                            .{ .tag = .expected_at_least_arguments, .loc = buf.items[idx].loc, .extra = extra },
-                            buf.items[idx].expansionSlice(),
-                        );
-                        idx += 1;
-                        try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
-                        continue;
-                    }
-                    if (!macro.var_args and args_count != macro.params.len) {
-                        free_arg_expansion_locs = true;
-                        try pp.comp.addDiagnostic(
-                            .{ .tag = .expected_arguments, .loc = buf.items[idx].loc, .extra = extra },
-                            buf.items[idx].expansionSlice(),
-                        );
-                        idx += 1;
-                        try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
-                        continue;
-                    }
-                    var expanded_args = MacroArguments.init(pp.gpa);
-                    defer deinitMacroArguments(pp.gpa, &expanded_args);
-                    try expanded_args.ensureTotalCapacity(args.items.len);
-                    for (args.items) |arg| {
-                        var expand_buf = ExpandBuf.init(pp.gpa);
-                        errdefer expand_buf.deinit();
-                        try expand_buf.appendSlice(arg);
-
-                        try pp.expandMacroExhaustive(tokenizer, &expand_buf, 0, expand_buf.items.len, false, eval_ctx);
+fn readFunclikeMacroBody(pp: *Preprocessor, params: *const ParamMap) ![]const PreprocessorToken {
+    var tokens: TokenList = .{};
+    errdefer tokens.deinit(pp.gpa);
+    while (true) {
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) {
+            return tokens.toOwnedSlice(pp.gpa);
+        }
+        if (tok.id.isMacroIdentifier()) {
+            // const subst = params.
+            if (params.get(pp.tokSlice(tok))) |sub| {
+                var copy = sub;
+                copy.flags.space = tok.flags.space;
+                try tokens.append(pp.gpa, copy);
+                continue;
+            }
+        }
+        try tokens.append(pp.gpa, tok);
+    }
+}
 
-                        expanded_args.appendAssumeCapacity(try expand_buf.toOwnedSlice());
-                    }
+fn readFuncLikeMacro(pp: *Preprocessor, name: PreprocessorToken, l_paren: PreprocessorToken) Error!void {
+    var params: ParamMap = .{};
+    defer params.deinit(pp.gpa);
+    const is_vararg = pp.readFunclikeMacroParams(name, l_paren, &params) catch |err| switch (err) {
+        error.InvalidMacroDef => blk: {
+            pp.skipToNl();
+            break :blk false;
+        },
+        else => |e| return e,
+    };
+    const body = try pp.readFunclikeMacroBody(&params);
+    errdefer pp.gpa.free(body);
+    try pp.hashHashCheck(body);
+    const macro: Macro = .{
+        .tokens = body,
+        .var_args = is_vararg,
+        .loc = name.loc,
+        .kind = .func,
+        .nargs = params.count(),
+    };
+    try pp.defineMacro(name, macro);
+}
 
-                    var res = try pp.expandFuncMacro(macro_tok, macro, &args, &expanded_args, hs);
-                    defer res.deinit();
-                    const tokens_added = res.items.len;
-                    const tokens_removed = macro_scan_idx - idx + 1;
-                    for (buf.items[idx .. idx + tokens_removed]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
-                    try buf.replaceRange(idx, tokens_removed, res.items);
-
-                    moving_end_idx += tokens_added;
-                    // Overflow here means that we encountered an unterminated argument list
-                    // while expanding the body of this macro.
-                    moving_end_idx -|= tokens_removed;
-                    idx += tokens_added;
-                    do_rescan = true;
-                } else {
-                    const res = try pp.expandObjMacro(macro);
-                    defer res.deinit();
-
-                    const hs = try pp.hideset.prepend(macro_tok.loc, macro_hidelist);
-
-                    const macro_expansion_locs = macro_tok.expansionSlice();
-                    var increment_idx_by = res.items.len;
-                    for (res.items, 0..) |*tok, i| {
-                        tok.flags.is_macro_arg = macro_tok.flags.is_macro_arg;
-                        try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
-                        try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
-
-                        const tok_hidelist = pp.hideset.get(tok.loc);
-                        const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hs);
-                        try pp.hideset.put(tok.loc, new_hidelist);
-
-                        if (tok.id == .keyword_defined and eval_ctx == .expr) {
-                            try pp.comp.addDiagnostic(.{
-                                .tag = .expansion_to_defined,
-                                .loc = tok.loc,
-                            }, tok.expansionSlice());
-                        }
+fn readDefine(pp: *Preprocessor) !void {
+    const name = try pp.readIdent() orelse {
+        pp.skipToNl();
+        return;
+    };
+    const next_tok = pp.getToken();
+    if (next_tok.id == .l_paren and !next_tok.flags.space) {
+        try pp.readFuncLikeMacro(name, next_tok);
+        return;
+    }
+    try pp.ungetToken(next_tok);
+    try pp.readObjMacro(name);
+}
 
-                        if (i < increment_idx_by and (tok.id == .keyword_defined or pp.defines.contains(pp.expandedSlice(tok.*)))) {
-                            increment_idx_by = i;
-                        }
-                    }
+fn doSkipSpace(pp: *Preprocessor) bool {
+    const saved_tokenizer = pp.tokenizers.items[pp.tokenizers.items.len - 1];
+    const tok = pp.tokenizers.items[pp.tokenizers.items.len - 1].next();
+    switch (tok.id) {
+        .eof => return false,
+        .whitespace, .comment => return true,
+        else => {
+            pp.tokenizers.items[pp.tokenizers.items.len - 1] = saved_tokenizer;
+            return false;
+        },
+    }
+}
 
-                    TokenWithExpansionLocs.free(buf.items[idx].expansion_locs, pp.gpa);
-                    try buf.replaceRange(idx, 1, res.items);
-                    idx += increment_idx_by;
-                    moving_end_idx = moving_end_idx + res.items.len - 1;
-                    do_rescan = true;
-                }
-            }
-            if (idx - start_idx == advance_index + 1 and !do_rescan) {
-                advance_index += 1;
-            }
-        } // end of replacement phase
+/// Skips spaces including comments.
+/// Returns true if at least one space is skipped.
+fn skipSpace(pp: *Preprocessor) bool {
+    if (!pp.doSkipSpace()) {
+        return false;
     }
-    // end of scanning phase
+    while (pp.doSkipSpace()) {}
+    return true;
+}
 
-    // trim excess buffer
-    for (buf.items[moving_end_idx..]) |item| {
-        TokenWithExpansionLocs.free(item.expansion_locs, pp.gpa);
+/// Read the next raw token from the tokenizer stack
+fn lexToken(pp: *Preprocessor) PreprocessorToken {
+    if (pp.skipSpace()) {
+        return .{ .id = .whitespace, .loc = undefined };
     }
-    buf.items.len = moving_end_idx;
+    const tok = pp.tokenizers.items[pp.tokenizers.items.len - 1].next();
+    return .{
+        .id = tok.id,
+        .flags = .{
+            .is_bol = tok.bol,
+        },
+        .loc = .{
+            .id = tok.source,
+            .byte_offset = tok.start,
+            .line = tok.line,
+        },
+    };
 }
 
-/// Try to expand a macro after a possible candidate has been read from the `tokenizer`
-/// into the `raw` token passed as argument
-fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroError!void {
-    var source_tok = tokFromRaw(raw);
-    if (!raw.id.isMacroIdentifier()) {
-        source_tok.id.simplifyMacroKeyword();
-        return pp.addToken(source_tok);
+/// Read the next token without expanding it
+fn getToken(pp: *Preprocessor) PreprocessorToken {
+    if (!pp.isBufferEmpty() and pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].items.len > 0) {
+        return pp.expansion_bufs.items[pp.expansion_bufs.items.len - 1].pop();
+    }
+    if (pp.expansion_bufs.items.len > 1) {
+        return .{ .id = .eof, .loc = undefined };
     }
-    pp.top_expansion_buf.items.len = 0;
-    try pp.top_expansion_buf.append(source_tok);
-    pp.expansion_source_loc = source_tok.loc;
+    const bol = pp.tokenizers.items[pp.tokenizers.items.len - 1].bol;
+    var tok = pp.lexToken();
+    while (tok.id == .whitespace) {
+        tok = pp.lexToken();
+        tok.flags.space = true;
+    }
+    tok.flags.is_bol = bol;
+    return tok;
+}
 
-    pp.hideset.clearRetainingCapacity();
-    try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
-    try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
-    for (pp.top_expansion_buf.items) |*tok| {
-        if (tok.id == .macro_ws and !pp.preserve_whitespace) {
-            TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
-            continue;
-        }
-        if (tok.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
-            TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
-            continue;
-        }
-        if (tok.id == .placemarker) {
-            TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
-            continue;
-        }
-        tok.id.simplifyMacroKeywordExtra(true);
-        pp.addTokenAssumeCapacity(tok.*);
-    }
-    if (pp.preserve_whitespace) {
-        try pp.ensureUnusedTokenCapacity(pp.add_expansion_nl);
-        while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) {
-            pp.addTokenAssumeCapacity(.{ .id = .nl, .loc = .{
-                .id = tokenizer.source,
-                .line = tokenizer.line,
-            } });
+fn readDefinedOp(pp: *Preprocessor) !PreprocessorToken {
+    var tok = pp.getToken();
+    if (tok.id == .l_paren) {
+        tok = pp.getToken();
+        const r_paren = pp.getToken();
+        if (r_paren.id != .r_paren) {
+            try pp.errStr(r_paren, .closing_paren_after, "defined");
         }
     }
+    if (!tok.id.isMacroIdentifier()) {
+        try pp.errTok(tok, .macro_name_must_be_identifier);
+    }
+    const slice = pp.tokSlice(tok);
+    if (pp.defines.contains(slice)) {
+        return PreprocessorToken.one;
+    }
+    return PreprocessorToken.zero;
 }
 
-fn expandedSliceExtra(pp: *const Preprocessor, tok: anytype, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
-    if (tok.id.lexeme()) |some| {
-        if (!tok.id.allowsDigraphs(pp.comp.langopts) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
+fn readIntExprLine(pp: *Preprocessor) !void {
+    while (true) {
+        const tok = try pp.readExpandNewline();
+        if (tok.id.isDirectiveEnd()) break;
+        if (tok.id == .keyword_defined) {
+            const result = try pp.readDefinedOp();
+            try pp.addToken(result);
+        } else if (tok.id.isMacroIdentifier()) {
+            try pp.addToken(PreprocessorToken.zero);
+        } else {
+            try pp.addToken(tok);
+        }
     }
-    var tmp_tokenizer = Tokenizer{
-        .buf = pp.comp.getSource(tok.loc.id).buf,
-        .langopts = pp.comp.langopts,
-        .index = tok.loc.byte_offset,
-        .source = .generated,
+    try pp.addToken(.{ .id = .eof, .loc = .{} });
+}
+
+fn readConstexpr(pp: *Preprocessor) !bool {
+    const start = pp.tokens.len;
+    defer pp.tokens.len = start;
+    try pp.readIntExprLine();
+
+    var parser = Parser{
+        .pp = pp,
+        .comp = pp.comp,
+        .gpa = pp.gpa,
+        .tok_ids = pp.tokens.items(.id),
+        .tok_i = @intCast(start),
+        .arena = undefined,
+        .in_macro = true,
+        .strings = std.ArrayListAligned(u8, 4).init(pp.comp.gpa),
+
+        .data = undefined,
+        .value_map = undefined,
+        .labels = undefined,
+        .decl_buf = undefined,
+        .list_buf = undefined,
+        .param_buf = undefined,
+        .enum_buf = undefined,
+        .record_buf = undefined,
+        .attr_buf = undefined,
+        .field_attr_buf = undefined,
+        .string_ids = undefined,
     };
-    if (tok.id == .macro_string) {
-        while (true) : (tmp_tokenizer.index += 1) {
-            if (tmp_tokenizer.buf[tmp_tokenizer.index] == '>') break;
-        }
-        return tmp_tokenizer.buf[tok.loc.byte_offset .. tmp_tokenizer.index + 1];
-    }
-    const res = tmp_tokenizer.next();
-    return tmp_tokenizer.buf[res.start..res.end];
+    defer parser.strings.deinit();
+    return parser.macroExpr();
 }
 
-/// Get expanded token source string.
-pub fn expandedSlice(pp: *const Preprocessor, tok: anytype) []const u8 {
-    return pp.expandedSliceExtra(tok, .single_macro_ws);
+/// #line number "file"
+/// TODO: validate that the pp_num token is solely digits
+fn readLine(pp: *Preprocessor) Error!void {
+    const digits = pp.getToken();
+    if (digits.id != .pp_num) try pp.errTok(digits, .line_simple_digit);
+
+    if (digits.id.isDirectiveEnd()) return;
+    const name = pp.getToken();
+    if (name.id.isDirectiveEnd()) return;
+    if (name.id != .string_literal) try pp.errTok(name, .line_invalid_filename);
+    try pp.expectNewline();
 }
 
-/// Concat two tokens and add the result to pp.generated
-fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const TokenWithExpansionLocs) Error!void {
-    const lhs = while (lhs_toks.popOrNull()) |lhs| {
-        if ((pp.comp.langopts.preserve_comments_in_macros and lhs.id == .comment) or
-            (lhs.id != .macro_ws and lhs.id != .comment))
-            break lhs;
+fn readPragma(pp: *Preprocessor) Error!void {
+    // TODO
+    pp.skipToNl();
+}
 
-        TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);
-    } else {
-        return bufCopyTokens(lhs_toks, rhs_toks, &.{});
+fn readUndef(pp: *Preprocessor) Error!void {
+    const name = try pp.readIdent() orelse {
+        pp.skipToNl();
+        return;
     };
+    try pp.expectNewline();
+    _ = pp.defines.remove(pp.tokSlice(name));
+}
 
-    var rhs_rest: u32 = 1;
-    const rhs = for (rhs_toks) |rhs| {
-        if ((pp.comp.langopts.preserve_comments_in_macros and rhs.id == .comment) or
-            (rhs.id != .macro_ws and rhs.id != .comment))
-            break rhs;
+/// Skip until after a newline, error if extra tokens before it.
+fn expectNewline(pp: *Preprocessor) !void {
+    var sent_err = false;
+    while (true) {
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) return;
+        if (tok.id == .whitespace or tok.id == .comment) continue;
+        if (!sent_err) {
+            sent_err = true;
+            try pp.errTok(tok, .extra_tokens_directive_end);
+        }
+    }
+}
 
-        rhs_rest += 1;
-    } else {
-        return lhs_toks.appendAssumeCapacity(lhs);
+/// TODO: pragma once
+fn readIncludeExtra(pp: *Preprocessor, include_token: PreprocessorToken, which: Compilation.WhichInclude) Error!void {
+    var is_std: bool = undefined;
+    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
+        error.InvalidInclude => return,
+        else => |e| return e,
     };
-    defer TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);
+    try pp.expectNewline();
 
-    const start = pp.comp.generated_buf.items.len;
-    const end = start + pp.expandedSlice(lhs).len + pp.expandedSlice(rhs).len;
-    try pp.comp.generated_buf.ensureTotalCapacity(pp.gpa, end + 1); // +1 for a newline
-    // We cannot use the same slices here since they might be invalidated by `ensureCapacity`
-    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(lhs));
-    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(rhs));
-    pp.comp.generated_buf.appendAssumeCapacity('\n');
-
-    // Try to tokenize the result.
-    var tmp_tokenizer = Tokenizer{
-        .buf = pp.comp.generated_buf.items,
-        .langopts = pp.comp.langopts,
-        .index = @intCast(start),
-        .source = .generated,
+    const filename = include_str[1 .. include_str.len - 1];
+    const include_type: Compilation.IncludeType = switch (include_str[0]) {
+        '"' => .quotes,
+        '<' => .angle_brackets,
+        else => unreachable,
     };
-    const pasted_token = tmp_tokenizer.nextNoWSComments();
-    const next = tmp_tokenizer.nextNoWSComments();
-    const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker)
-        .placemarker
-    else
-        pasted_token.id;
-    try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs));
-
-    if (next.id != .nl and next.id != .eof) {
-        try pp.errStr(
-            lhs,
-            .pasting_formed_invalid,
-            try pp.comp.diagnostics.arena.allocator().dupe(u8, pp.comp.generated_buf.items[start..end]),
-        );
-        try lhs_toks.append(tokFromRaw(next));
+    const tok: RawToken = .{ .id = include_token.id, .source = include_token.loc.id, .start = include_token.loc.byte_offset, .line = include_token.loc.line };
+    const source = (try pp.comp.findInclude(filename, tok, include_type, which)) orelse return pp.fatalNotFound(include_token, filename);
+    if (pp.include_guards.get(source.id)) |guard| {
+        if (pp.defines.contains(guard)) return;
     }
+    const guard = pp.findIncludeGuard(source);
+    try pp.guard_stack.append(pp.gpa, guard);
 
-    try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{});
-}
-
-fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: TokenWithExpansionLocs) !TokenWithExpansionLocs {
-    var pasted_token = TokenWithExpansionLocs{ .id = id, .loc = .{
-        .id = .generated,
-        .byte_offset = @intCast(start),
-        .line = pp.generated_line,
-    } };
-    pp.generated_line += 1;
-    try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc});
-    try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice());
-    return pasted_token;
-}
-
-/// Defines a new macro and warns if it is a duplicate
-fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void {
-    const name_str = pp.tokSlice(name_tok);
-    const gop = try pp.defines.getOrPut(pp.gpa, name_str);
-    if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
-        const tag: Diagnostics.Tag = if (gop.value_ptr.is_builtin) .builtin_macro_redefined else .macro_redefined;
-        const start = pp.comp.diagnostics.list.items.len;
+    if (pp.tokenizers.items.len > max_include_depth) {
         try pp.comp.addDiagnostic(.{
-            .tag = tag,
-            .loc = .{ .id = name_tok.source, .byte_offset = name_tok.start, .line = name_tok.line },
-            .extra = .{ .str = name_str },
+            .tag = .too_many_includes,
+            .loc = include_token.loc,
         }, &.{});
-        if (!gop.value_ptr.is_builtin and pp.comp.diagnostics.list.items.len != start) {
-            try pp.comp.addDiagnostic(.{
-                .tag = .previous_definition,
-                .loc = gop.value_ptr.loc,
-            }, &.{});
-        }
+        return error.FatalError;
     }
-    if (pp.verbose) {
-        pp.verboseLog(name_tok, "macro {s} defined", .{name_str});
-    }
-    gop.value_ptr.* = macro;
+    try pp.tokenizers.append(pp.gpa, .{
+        .buf = source.buf,
+        .langopts = pp.comp.langopts,
+        .index = 0,
+        .source = source.id,
+    });
 }
 
-/// Handle a #define directive.
-fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
-    // Get macro name and validate it.
-    const macro_name = tokenizer.nextNoWS();
-    if (macro_name.id == .keyword_defined) {
-        try pp.err(macro_name, .defined_as_macro_name);
-        return skipToNl(tokenizer);
+/// Read a header name delimited by quotes or angle brackets
+fn readHeaderFileName(pp: *Preprocessor, is_std: *bool) !?[]const u8 {
+    if (!pp.isBufferEmpty()) return null;
+    _ = pp.skipSpace();
+
+    var close: u8 = undefined;
+    var tokenizer = pp.tokenizers.items[pp.tokenizers.items.len - 1];
+    defer pp.tokenizers.items[pp.tokenizers.items.len - 1] = tokenizer;
+
+    if (tokenizer.buf[tokenizer.index..].len < 2) {
+        return null;
     }
-    if (!macro_name.id.isMacroIdentifier()) {
-        try pp.err(macro_name, .macro_name_must_be_identifier);
-        return skipToNl(tokenizer);
-    }
-    var macro_name_token_id = macro_name.id;
-    macro_name_token_id.simplifyMacroKeyword();
-    switch (macro_name_token_id) {
-        .identifier, .extended_identifier => {},
-        else => if (macro_name_token_id.isMacroIdentifier()) {
-            try pp.err(macro_name, .keyword_macro);
+    const start = tokenizer.index;
+    switch (tokenizer.buf[tokenizer.index..][0]) {
+        '"' => {
+            is_std.* = false;
+            close = '"';
+        },
+        '<' => {
+            is_std.* = true;
+            close = '>';
         },
+        else => return null,
     }
+    tokenizer.index += 1;
+    while (tokenizer.index < tokenizer.buf.len and tokenizer.buf[tokenizer.index] != close and tokenizer.buf[tokenizer.index] != '\n') : (tokenizer.index += 1) {}
 
-    // Check for function macros and empty defines.
-    var first = tokenizer.next();
-    switch (first.id) {
-        .nl, .eof => return pp.defineMacro(macro_name, .{
-            .params = &.{},
-            .tokens = &.{},
-            .var_args = false,
-            .loc = tokFromRaw(macro_name).loc,
-            .is_func = false,
-        }),
-        .whitespace => first = tokenizer.next(),
-        .l_paren => return pp.defineFn(tokenizer, macro_name, first),
-        else => try pp.err(first, .whitespace_after_macro_name),
-    }
-    if (first.id == .hash_hash) {
-        try pp.err(first, .hash_hash_at_start);
-        return skipToNl(tokenizer);
+    if (tokenizer.index == tokenizer.buf.len or tokenizer.buf[tokenizer.index] != close) {
+        try pp.errTok(.{ .id = undefined, .loc = .{ .id = tokenizer.source, .byte_offset = tokenizer.index, .line = tokenizer.line } }, .header_str_closing);
+        try pp.errTok(.{ .id = undefined, .loc = .{ .id = tokenizer.source, .byte_offset = start, .line = tokenizer.line } }, .header_str_match);
+        return error.InvalidInclude;
     }
-    first.id.simplifyMacroKeyword();
 
-    pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
+    tokenizer.index += 1;
 
-    var need_ws = false;
-    // Collect the token body and validate any ## found.
-    var tok = first;
-    while (true) {
-        tok.id.simplifyMacroKeyword();
-        switch (tok.id) {
-            .hash_hash => {
-                const next = tokenizer.nextNoWSComments();
-                switch (next.id) {
-                    .nl, .eof => {
-                        try pp.err(tok, .hash_hash_at_end);
-                        return;
-                    },
-                    .hash_hash => {
-                        try pp.err(next, .hash_hash_at_end);
-                        return;
-                    },
-                    else => {},
-                }
-                try pp.token_buf.append(tok);
-                try pp.token_buf.append(next);
-            },
-            .nl, .eof => break,
-            .comment => if (pp.comp.langopts.preserve_comments_in_macros) {
-                if (need_ws) {
-                    need_ws = false;
-                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
-                }
-                try pp.token_buf.append(tok);
-            },
-            .whitespace => need_ws = true,
-            .unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
-                try pp.err(tok, invalidTokenDiagnostic(tag));
-                try pp.token_buf.append(tok);
-            },
-            .unterminated_comment => try pp.err(tok, .unterminated_comment),
-            else => {
-                if (tok.id != .whitespace and need_ws) {
-                    need_ws = false;
-                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
-                }
-                try pp.token_buf.append(tok);
-            },
-        }
-        tok = tokenizer.next();
+    const buf = tokenizer.buf[start..tokenizer.index];
+    if (buf.len == 2) {
+        try pp.errTok(.{ .id = .nl, .loc = .{ .id = tokenizer.source, .byte_offset = start, .line = tokenizer.line } }, .empty_filename);
+        return error.InvalidInclude;
     }
+    return buf;
+}
 
-    const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
-    try pp.defineMacro(macro_name, .{
-        .loc = tokFromRaw(macro_name).loc,
-        .tokens = list,
-        .params = undefined,
-        .is_func = false,
-        .var_args = false,
-    });
+fn isBufferEmpty(pp: *const Preprocessor) bool {
+    return pp.expansion_bufs.items.len == 0;
 }
 
-/// Handle a function like #define directive.
-fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_paren: RawToken) Error!void {
-    assert(macro_name.id.isMacroIdentifier());
-    var params = std.ArrayList([]const u8).init(pp.gpa);
-    defer params.deinit();
+/// Read a delimited header name, or a macro expanded one
+fn readHeaderName(pp: *Preprocessor, is_std: *bool) ![]const u8 {
+    if (try pp.readHeaderFileName(is_std)) |path| return path;
 
-    // Parse the parameter list.
-    var gnu_var_args: []const u8 = "";
-    var var_args = false;
+    // If a token following #include does not start with < nor ",
+    // try to read the token as a regular token. Macro-expanded
+    // form may be a valid header file path.
+    const tok = try pp.readExpandNewline();
+    if (tok.id.isDirectiveEnd()) {
+        try pp.errTok(tok, .expected_filename);
+        return error.InvalidInclude;
+    }
+    if (tok.id == .string_literal) {
+        is_std.* = false;
+        return pp.tokSlice(tok);
+    }
+    if (tok.id != .angle_bracket_left) {
+        try pp.errStr(tok, .expected_left_angle_bracket, pp.tokSlice(tok));
+        return error.InvalidInclude;
+    }
+    const start = pp.char_buf.items.len;
+    try pp.char_buf.append(pp.gpa, '<');
+    defer pp.char_buf.items.len = start;
+    const writer = pp.char_buf.writer(pp.gpa);
     while (true) {
-        var tok = tokenizer.nextNoWS();
-        if (tok.id == .r_paren) break;
-        if (tok.id == .eof) return pp.err(tok, .unterminated_macro_param_list);
-        if (tok.id == .ellipsis) {
-            var_args = true;
-            const r_paren = tokenizer.nextNoWS();
-            if (r_paren.id != .r_paren) {
-                try pp.err(r_paren, .missing_paren_param_list);
-                try pp.err(l_paren, .to_match_paren);
-                return skipToNl(tokenizer);
-            }
-            break;
+        const path_tok = try pp.readExpandNewline();
+        if (path_tok.id == .nl) {
+            try pp.errTok(path_tok, .header_str_closing);
+            try pp.errTok(tok, .header_str_match);
+            return error.InvalidInclude;
         }
-        if (!tok.id.isMacroIdentifier()) {
-            try pp.err(tok, .invalid_token_param_list);
-            return skipToNl(tokenizer);
-        }
-
-        try params.append(pp.tokSlice(tok));
-
-        tok = tokenizer.nextNoWS();
-        if (tok.id == .ellipsis) {
-            try pp.err(tok, .gnu_va_macro);
-            gnu_var_args = params.pop();
-            const r_paren = tokenizer.nextNoWS();
-            if (r_paren.id != .r_paren) {
-                try pp.err(r_paren, .missing_paren_param_list);
-                try pp.err(l_paren, .to_match_paren);
-                return skipToNl(tokenizer);
-            }
-            break;
-        } else if (tok.id == .r_paren) {
+        if (path_tok.id == .angle_bracket_right) {
             break;
-        } else if (tok.id != .comma) {
-            try pp.err(tok, .expected_comma_param_list);
-            return skipToNl(tokenizer);
         }
+        try pp.prettyPrintToken(writer, path_tok.toTreeToken());
     }
+    is_std.* = true;
+    try pp.char_buf.append(pp.gpa, '>');
+    return pp.gpa.dupe(u8, pp.char_buf.items[start..]);
+}
 
-    var need_ws = false;
-    // Collect the body tokens and validate # and ##'s found.
-    pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
-    tok_loop: while (true) {
-        var tok = tokenizer.next();
-        switch (tok.id) {
-            .nl, .eof => break,
-            .whitespace => need_ws = pp.token_buf.items.len != 0,
-            .comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else {
-                if (need_ws) {
-                    need_ws = false;
-                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
-                }
-                try pp.token_buf.append(tok);
-            },
-            .hash => {
-                if (tok.id != .whitespace and need_ws) {
-                    need_ws = false;
-                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
-                }
-                const param = tokenizer.nextNoWS();
-                blk: {
-                    if (var_args and param.id == .keyword_va_args) {
-                        tok.id = .stringify_va_args;
-                        try pp.token_buf.append(tok);
-                        continue :tok_loop;
-                    }
-                    if (!param.id.isMacroIdentifier()) break :blk;
-                    const s = pp.tokSlice(param);
-                    if (mem.eql(u8, s, gnu_var_args)) {
-                        tok.id = .stringify_va_args;
-                        try pp.token_buf.append(tok);
-                        continue :tok_loop;
-                    }
-                    for (params.items, 0..) |p, i| {
-                        if (mem.eql(u8, p, s)) {
-                            tok.id = .stringify_param;
-                            tok.end = @intCast(i);
-                            try pp.token_buf.append(tok);
-                            continue :tok_loop;
-                        }
-                    }
-                }
-                try pp.err(param, .hash_not_followed_param);
-                return skipToNl(tokenizer);
-            },
-            .hash_hash => {
-                need_ws = false;
-                // if ## appears at the beginning, the token buf is still empty
-                // in this case, error out
-                if (pp.token_buf.items.len == 0) {
-                    try pp.err(tok, .hash_hash_at_start);
-                    return skipToNl(tokenizer);
-                }
-                const saved_tokenizer = tokenizer.*;
-                const next = tokenizer.nextNoWSComments();
-                if (next.id == .nl or next.id == .eof) {
-                    try pp.err(tok, .hash_hash_at_end);
-                    return;
-                }
-                tokenizer.* = saved_tokenizer;
-                // convert the previous token to .macro_param_no_expand if it was .macro_param
-                if (pp.token_buf.items[pp.token_buf.items.len - 1].id == .macro_param) {
-                    pp.token_buf.items[pp.token_buf.items.len - 1].id = .macro_param_no_expand;
-                }
-                try pp.token_buf.append(tok);
-            },
-            .unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
-                try pp.err(tok, invalidTokenDiagnostic(tag));
-                try pp.token_buf.append(tok);
-            },
-            .unterminated_comment => try pp.err(tok, .unterminated_comment),
-            else => {
-                if (tok.id != .whitespace and need_ws) {
-                    need_ws = false;
-                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
-                }
-                if (var_args and tok.id == .keyword_va_args) {
-                    // do nothing
-                } else if (var_args and tok.id == .keyword_va_opt) {
-                    const opt_l_paren = tokenizer.next();
-                    if (opt_l_paren.id != .l_paren) {
-                        try pp.err(opt_l_paren, .va_opt_lparen);
-                        return skipToNl(tokenizer);
-                    }
-                    tok.start = opt_l_paren.end;
-
-                    var parens: u32 = 0;
-                    while (true) {
-                        const opt_tok = tokenizer.next();
-                        switch (opt_tok.id) {
-                            .l_paren => parens += 1,
-                            .r_paren => if (parens == 0) {
-                                break;
-                            } else {
-                                parens -= 1;
-                            },
-                            .nl, .eof => {
-                                try pp.err(opt_tok, .va_opt_rparen);
-                                try pp.err(opt_l_paren, .to_match_paren);
-                                return skipToNl(tokenizer);
-                            },
-                            .whitespace => {},
-                            else => tok.end = opt_tok.end,
-                        }
-                    }
-                } else if (tok.id.isMacroIdentifier()) {
-                    tok.id.simplifyMacroKeyword();
-                    const s = pp.tokSlice(tok);
-                    if (mem.eql(u8, gnu_var_args, s)) {
-                        tok.id = .keyword_va_args;
-                    } else for (params.items, 0..) |param, i| {
-                        if (mem.eql(u8, param, s)) {
-                            // NOTE: it doesn't matter to assign .macro_param_no_expand
-                            // here in case a ## was the previous token, because
-                            // ## processing will eat this token with the same semantics
-                            tok.id = .macro_param;
-                            tok.end = @intCast(i);
-                            break;
-                        }
-                    }
-                }
-                try pp.token_buf.append(tok);
-            },
+fn readInclude(pp: *Preprocessor, include_token: PreprocessorToken) Error!void {
+    return pp.readIncludeExtra(include_token, .first);
+}
+
+fn readIncludeNext(pp: *Preprocessor, include_token: PreprocessorToken) Error!void {
+    return pp.readIncludeExtra(include_token, .next);
+}
+
+fn readErrorMessage(pp: *Preprocessor, directive_tok: PreprocessorToken, tag: Diagnostics.Tag) !void {
+    const char_top = pp.char_buf.items.len;
+    defer pp.char_buf.items.len = char_top;
+    var i: usize = 0;
+    while (true) : (i += 1) {
+        const tok = pp.getToken();
+        if (tok.id.isDirectiveEnd()) break;
+        const slice = pp.tokSlice(tok);
+        if (slice.len > 0 and tok.flags.space and i != 0) {
+            try pp.char_buf.append(pp.gpa, ' ');
         }
+        try pp.char_buf.appendSlice(pp.gpa, slice);
     }
-
-    const param_list = try pp.arena.allocator().dupe([]const u8, params.items);
-    const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
-    try pp.defineMacro(macro_name, .{
-        .is_func = true,
-        .params = param_list,
-        .var_args = var_args or gnu_var_args.len != 0,
-        .tokens = token_list,
-        .loc = tokFromRaw(macro_name).loc,
-    });
+    const slice = pp.char_buf.items[char_top..];
+    const duped = try pp.comp.diagnostics.arena.allocator().dupe(u8, slice);
+    try pp.comp.addDiagnostic(.{
+        .tag = tag,
+        .loc = directive_tok.loc,
+        .extra = .{ .str = duped },
+    }, &.{});
 }
 
-/// Handle an #embed directive
-/// embedDirective : ("FILENAME" | <FILENAME>) embedParam*
-/// embedParam : IDENTIFIER (:: IDENTIFIER)? '(' <tokens> ')'
-fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
-    const first = tokenizer.nextNoWS();
-    const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .ignore_trailing_tokens) catch |er| switch (er) {
-        error.InvalidInclude => return,
-        else => |e| return e,
-    };
-    defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);
+fn clearGuard(pp: *Preprocessor) void {
+    pp.guard_stack.items[pp.guard_stack.items.len - 1] = null;
+}
 
-    // Check for empty filename.
-    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
-    if (tok_slice.len < 3) {
-        try pp.err(first, .empty_filename);
-        return;
+fn readDirective(pp: *Preprocessor) Error!void {
+    const directive = pp.getToken();
+    if (directive.id.isDirectiveEnd()) return;
+    if (directive.id == .pp_num) {
+        return pp.readLinemarker();
     }
-    const filename = tok_slice[1 .. tok_slice.len - 1];
-    const include_type: Compilation.IncludeType = switch (filename_tok.id) {
-        .string_literal => .quotes,
-        .macro_string => .angle_brackets,
-        else => unreachable,
-    };
 
-    // Index into `token_buf`
-    const Range = struct {
-        start: u32,
-        end: u32,
+    const until_else = 0;
+    const until_endif = 1;
+    const until_endif_seen_else = 2;
 
-        fn expand(opt_range: ?@This(), pp_: *Preprocessor, tokenizer_: *Tokenizer) !void {
-            const range = opt_range orelse return;
-            const slice = pp_.token_buf.items[range.start..range.end];
-            for (slice) |tok| {
-                try pp_.expandMacro(tokenizer_, tok);
+    switch (directive.id) {
+        .keyword_define => try pp.readDefine(),
+        .keyword_elif => {
+            if (pp.if_level == 0) {
+                try pp.errTok(directive, .elif_without_if);
+                pp.if_level += 1;
+                pp.if_kind.set(pp.if_level, until_else);
+            } else if (pp.if_level == 1) {
+                pp.clearGuard();
             }
-        }
-    };
-    pp.token_buf.items.len = 0;
-
-    var limit: ?u32 = null;
-    var prefix: ?Range = null;
-    var suffix: ?Range = null;
-    var if_empty: ?Range = null;
-    while (true) {
-        const param_first = tokenizer.nextNoWS();
-        switch (param_first.id) {
-            .nl, .eof => break,
-            .identifier => {},
-            else => {
-                try pp.err(param_first, .malformed_embed_param);
-                continue;
-            },
-        }
+            switch (pp.if_kind.get(pp.if_level)) {
+                until_else => if (try pp.readConstexpr()) {
+                    pp.if_kind.set(pp.if_level, until_endif);
+                    if (pp.verbose) {
+                        pp.verboseLog(directive, "entering then branch of #elif", .{});
+                    }
+                } else {
+                    try pp.skip(.until_else);
+                    if (pp.verbose) {
+                        pp.verboseLog(directive, "entering else branch of #elif", .{});
+                    }
+                },
+                until_endif => try pp.skip(.until_endif),
+                until_endif_seen_else => {
+                    try pp.errTok(directive, .elif_after_else);
+                    pp.skipToNl();
+                },
+                else => unreachable,
+            }
+        },
+        .keyword_else => {
+            try pp.expectNewline();
+            if (pp.if_level == 0) {
+                try pp.errTok(directive, .else_without_if);
+                return;
+            } else if (pp.if_level == 1) {
+                pp.clearGuard();
+            }
+            switch (pp.if_kind.get(pp.if_level)) {
+                until_else => {
+                    pp.if_kind.set(pp.if_level, until_endif_seen_else);
+                    if (pp.verbose) {
+                        pp.verboseLog(directive, "#else branch here", .{});
+                    }
+                },
+                until_endif => try pp.skip(.until_endif_seen_else),
+                until_endif_seen_else => {
+                    try pp.errTok(directive, .else_after_else);
+                    pp.skipToNl();
+                },
+                else => unreachable,
+            }
+        },
+        .keyword_endif => {
+            try pp.expectNewline();
+            if (pp.if_level == 0) {
+                pp.clearGuard();
+                try pp.errTok(directive, .endif_without_if);
+                return;
+            } else if (pp.if_level == 1) {
+                var tokenizer = &pp.tokenizers.items[pp.tokenizers.items.len - 1];
+                const saved_tokenizer = tokenizer.*;
+                defer tokenizer.* = saved_tokenizer;
 
-        const char_top = pp.char_buf.items.len;
-        defer pp.char_buf.items.len = char_top;
-
-        const maybe_colon = tokenizer.colonColon();
-        const param = switch (maybe_colon.id) {
-            .colon_colon => blk: {
-                // vendor::param
-                const param = tokenizer.nextNoWS();
-                if (param.id != .identifier) {
-                    try pp.err(param, .malformed_embed_param);
-                    continue;
+                var next_tok = tokenizer.nextNoWS();
+                while (next_tok.id == .nl) : (next_tok = tokenizer.nextNoWS()) {}
+                if (next_tok.id != .eof) pp.clearGuard();
+            }
+            pp.if_level -= 1;
+        },
+        .keyword_error => try pp.readErrorMessage(directive, .error_directive),
+        .keyword_if => {
+            const sum, const overflowed = @addWithOverflow(pp.if_level, 1);
+            if (overflowed != 0)
+                return pp.fatal(directive, "too many #if nestings", .{});
+            pp.if_level = sum;
+
+            if (try pp.readConstexpr()) {
+                pp.if_kind.set(pp.if_level, until_endif);
+                if (pp.verbose) {
+                    pp.verboseLog(directive, "entering then branch of #if", .{});
                 }
-                const l_paren = tokenizer.nextNoWS();
-                if (l_paren.id != .l_paren) {
-                    try pp.err(l_paren, .malformed_embed_param);
-                    continue;
+            } else {
+                pp.if_kind.set(pp.if_level, until_else);
+                try pp.skip(.until_else);
+                if (pp.verbose) {
+                    pp.verboseLog(directive, "entering else branch of #if", .{});
                 }
-                try pp.char_buf.appendSlice(Attribute.normalize(pp.tokSlice(param_first)));
-                try pp.char_buf.appendSlice("::");
-                try pp.char_buf.appendSlice(Attribute.normalize(pp.tokSlice(param)));
-                break :blk pp.char_buf.items;
-            },
-            .l_paren => Attribute.normalize(pp.tokSlice(param_first)),
-            else => {
-                try pp.err(maybe_colon, .malformed_embed_param);
-                continue;
-            },
-        };
-
-        const start: u32 = @intCast(pp.token_buf.items.len);
-        while (true) {
-            const next = tokenizer.nextNoWS();
-            if (next.id == .r_paren) break;
-            if (next.id == .eof) {
-                try pp.err(maybe_colon, .malformed_embed_param);
-                break;
             }
-            try pp.token_buf.append(next);
-        }
-        const end: u32 = @intCast(pp.token_buf.items.len);
-
-        if (std.mem.eql(u8, param, "limit")) {
-            if (limit != null) {
-                try pp.errStr(tokFromRaw(param_first), .duplicate_embed_param, "limit");
-                continue;
+        },
+        .keyword_ifdef => {
+            const sum, const overflowed = @addWithOverflow(pp.if_level, 1);
+            if (overflowed != 0)
+                return pp.fatal(directive, "too many #if nestings", .{});
+            pp.if_level = sum;
+
+            const macro_name = (try pp.expectMacroName()) orelse return;
+            try pp.expectNewline();
+            if (pp.defines.get(macro_name) != null) {
+                pp.if_kind.set(pp.if_level, until_endif);
+                if (pp.verbose) {
+                    pp.verboseLog(directive, "entering then branch of #ifdef", .{});
+                }
+            } else {
+                pp.if_kind.set(pp.if_level, until_else);
+                try pp.skip(.until_else);
+                if (pp.verbose) {
+                    pp.verboseLog(directive, "entering else branch of #ifdef", .{});
+                }
             }
-            if (start + 1 != end) {
-                try pp.err(param_first, .malformed_embed_limit);
-                continue;
+        },
+        .keyword_ifndef => {
+            const sum, const overflowed = @addWithOverflow(pp.if_level, 1);
+            if (overflowed != 0)
+                return pp.fatal(directive, "too many #if nestings", .{});
+            pp.if_level = sum;
+
+            const macro_name = (try pp.expectMacroName()) orelse return;
+            try pp.expectNewline();
+            if (pp.defines.get(macro_name) == null) {
+                pp.if_kind.set(pp.if_level, until_endif);
+            } else {
+                pp.if_kind.set(pp.if_level, until_else);
+                try pp.skip(.until_else);
             }
-            const limit_tok = pp.token_buf.items[start];
-            if (limit_tok.id != .pp_num) {
-                try pp.err(param_first, .malformed_embed_limit);
-                continue;
+        },
+        .keyword_elifdef => {
+            if (pp.if_level == 0) {
+                try pp.errTok(directive, .elifdef_without_if);
+                pp.if_level += 1;
+                pp.if_kind.set(pp.if_level, until_else);
+            } else if (pp.if_level == 1) {
+                pp.clearGuard();
             }
-            limit = std.fmt.parseInt(u32, pp.tokSlice(limit_tok), 10) catch {
-                try pp.err(limit_tok, .malformed_embed_limit);
-                continue;
-            };
-            pp.token_buf.items.len = start;
-        } else if (std.mem.eql(u8, param, "prefix")) {
-            if (prefix != null) {
-                try pp.errStr(tokFromRaw(param_first), .duplicate_embed_param, "prefix");
-                continue;
+            switch (pp.if_kind.get(pp.if_level)) {
+                until_else => {
+                    const macro_name = try pp.expectMacroName();
+                    if (macro_name == null) {
+                        pp.if_kind.set(pp.if_level, until_else);
+                        try pp.skip(.until_else);
+                        if (pp.verbose) {
+                            pp.verboseLog(directive, "entering else branch of #elifdef", .{});
+                        }
+                    } else {
+                        try pp.expectNewline();
+                        if (pp.defines.get(macro_name.?) != null) {
+                            pp.if_kind.set(pp.if_level, until_endif);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering then branch of #elifdef", .{});
+                            }
+                        } else {
+                            pp.if_kind.set(pp.if_level, until_else);
+                            try pp.skip(.until_else);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering else branch of #elifdef", .{});
+                            }
+                        }
+                    }
+                },
+                until_endif => try pp.skip(.until_endif),
+                until_endif_seen_else => {
+                    try pp.errTok(directive, .elifdef_after_else);
+                    pp.skipToNl();
+                },
+                else => unreachable,
             }
-            prefix = .{ .start = start, .end = end };
-        } else if (std.mem.eql(u8, param, "suffix")) {
-            if (suffix != null) {
-                try pp.errStr(tokFromRaw(param_first), .duplicate_embed_param, "suffix");
-                continue;
+        },
+        .keyword_elifndef => {
+            if (pp.if_level == 0) {
+                try pp.errTok(directive, .elifdef_without_if);
+                pp.if_level += 1;
+                pp.if_kind.set(pp.if_level, until_else);
+            } else if (pp.if_level == 1) {
+                pp.clearGuard();
             }
-            suffix = .{ .start = start, .end = end };
-        } else if (std.mem.eql(u8, param, "if_empty")) {
-            if (if_empty != null) {
-                try pp.errStr(tokFromRaw(param_first), .duplicate_embed_param, "if_empty");
-                continue;
+            switch (pp.if_kind.get(pp.if_level)) {
+                until_else => {
+                    const macro_name = try pp.expectMacroName();
+                    if (macro_name == null) {
+                        pp.if_kind.set(pp.if_level, until_else);
+                        try pp.skip(.until_else);
+                        if (pp.verbose) {
+                            pp.verboseLog(directive, "entering else branch of #elifndef", .{});
+                        }
+                    } else {
+                        try pp.expectNewline();
+                        if (pp.defines.get(macro_name.?) == null) {
+                            pp.if_kind.set(pp.if_level, until_endif);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering then branch of #elifndef", .{});
+                            }
+                        } else {
+                            pp.if_kind.set(pp.if_level, until_else);
+                            try pp.skip(.until_else);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering else branch of #elifndef", .{});
+                            }
+                        }
+                    }
+                },
+                until_endif => try pp.skip(.until_endif),
+                until_endif_seen_else => {
+                    try pp.errTok(directive, .elifdef_after_else);
+                    pp.skipToNl();
+                },
+                else => unreachable,
             }
-            if_empty = .{ .start = start, .end = end };
-        } else {
-            try pp.errStr(
-                tokFromRaw(param_first),
-                .unsupported_embed_param,
-                try pp.comp.diagnostics.arena.allocator().dupe(u8, param),
-            );
-            pp.token_buf.items.len = start;
-        }
+        },
+        .keyword_include => try pp.readInclude(directive),
+        .keyword_include_next => try pp.readIncludeNext(directive),
+        .keyword_line => try pp.readLine(),
+        .keyword_pragma => try pp.readPragma(),
+        .keyword_undef => try pp.readUndef(),
+        .keyword_warning => try pp.readErrorMessage(directive, .warning_directive),
+        .keyword_embed => try pp.readEmbed(directive),
+        else => try pp.errTok(directive, .invalid_preprocessing_directive),
     }
+}
+
+/// TODO: handle limit/prefix/suffix/etc
+fn readEmbed(pp: *Preprocessor, directive_tok: PreprocessorToken) Error!void {
+    var is_std: bool = undefined;
+    const include_str = pp.readHeaderName(&is_std) catch |err| switch (err) {
+        error.InvalidInclude => return,
+        else => |e| return e,
+    };
+
+    const filename = include_str[1 .. include_str.len - 1];
+    const include_type: Compilation.IncludeType = switch (include_str[0]) {
+        '"' => .quotes,
+        '<' => .angle_brackets,
+        else => unreachable,
+    };
 
-    const embed_bytes = (try pp.comp.findEmbed(filename, first.source, include_type, limit)) orelse
-        return pp.fatalNotFound(filename_tok, filename);
+    const limit = std.math.maxInt(u32);
+    const embed_bytes = (try pp.comp.findEmbed(filename, directive_tok.loc.id, include_type, limit)) orelse
+        return pp.fatalNotFound(directive_tok, filename);
     defer pp.comp.gpa.free(embed_bytes);
 
-    try Range.expand(prefix, pp, tokenizer);
-
-    if (embed_bytes.len == 0) {
-        try Range.expand(if_empty, pp, tokenizer);
-        try Range.expand(suffix, pp, tokenizer);
-        return;
-    }
+    if (embed_bytes.len == 0) return;
 
     try pp.ensureUnusedTokenCapacity(2 * embed_bytes.len - 1); // N bytes and N-1 commas
 
@@ -2985,102 +1760,117 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
         const byte = embed_bytes[0];
         const start = pp.comp.generated_buf.items.len;
         try writer.print("{d}", .{byte});
-        pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok));
+        var generated = try pp.makeGeneratedToken(start, .embed_byte, directive_tok);
+        generated.flags.is_bol = true;
+        pp.addTokenAssumeCapacity(generated);
     }
 
     for (embed_bytes[1..]) |byte| {
         const start = pp.comp.generated_buf.items.len;
         try writer.print(",{d}", .{byte});
         pp.addTokenAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
-        pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok));
+        pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, directive_tok));
     }
     try pp.comp.generated_buf.append(pp.gpa, '\n');
+}
 
-    try Range.expand(suffix, pp, tokenizer);
+fn readToken(pp: *Preprocessor) Error!PreprocessorToken {
+    while (true) {
+        const tok = try pp.readExpand();
+        if (tok.flags.is_bol and tok.id == .hash and tok.hideset == null) {
+            try pp.readDirective();
+            continue;
+        }
+        return tok;
+    }
 }
 
-// Handle a #include directive.
-fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void {
-    const first = tokenizer.nextNoWS();
-    const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) {
-        error.InvalidInclude => return,
-        else => |e| return e,
-    };
+pub fn preprocess(pp: *Preprocessor, source: Source) !PreprocessorToken {
+    const guard = pp.findIncludeGuard(source);
+    try pp.guard_stack.append(pp.gpa, guard);
 
-    // Prevent stack overflow
-    pp.include_depth += 1;
-    defer pp.include_depth -= 1;
-    if (pp.include_depth > max_include_depth) {
-        try pp.comp.addDiagnostic(.{
-            .tag = .too_many_includes,
-            .loc = .{ .id = first.source, .byte_offset = first.start, .line = first.line },
-        }, &.{});
-        return error.StopPreprocessing;
+    try pp.tokenizers.append(pp.gpa, .{
+        .buf = source.buf,
+        .langopts = pp.comp.langopts,
+        .index = 0,
+        .source = source.id,
+    });
+    while (true) {
+        const tok = try pp.readToken();
+        if (tok.id == .eof) {
+            const tokenizer = pp.tokenizers.pop();
+            const guard_name = pp.guard_stack.pop();
+            if (guard_name) |name| {
+                try pp.include_guards.put(pp.gpa, tokenizer.source, name);
+            }
+            if (pp.tokenizers.items.len == 0) {
+                return tok;
+            }
+        } else {
+            switch (tok.id) {
+                .unterminated_comment => try pp.errTok(tok, .unterminated_comment),
+                else => try pp.addToken(tok),
+            }
+        }
     }
+}
 
-    if (pp.include_guards.get(new_source.id)) |guard| {
-        if (pp.defines.contains(guard)) return;
+// After how many empty lines are needed to replace them with linemarkers.
+const collapse_newlines = 8;
+
+/// Pretty print tokens and try to preserve whitespace.
+pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
+    var i: usize = 0;
+    while (i < pp.tokens.len) : (i += 1) {
+        const tok = pp.tokens.get(i);
+        if (tok.id == .eof) break;
+        try pp.prettyPrintToken(w, tok);
     }
+    try w.writeByte('\n');
+}
 
-    if (pp.verbose) {
-        pp.verboseLog(first, "include file {s}", .{new_source.path});
+fn prettyPrintToken(pp: *Preprocessor, w: anytype, tok: Token) !void {
+    if (tok.flags.is_bol) {
+        try w.writeByte('\n');
+    }
+    if (tok.flags.space) {
+        try w.writeByte(' ');
+    }
+    if (tok.id.lexeme()) |some| {
+        try w.writeAll(some);
+    } else {
+        try w.writeAll(pp.tokSlice(tok));
     }
+}
 
-    const token_state = pp.getTokenState();
-    try pp.addIncludeStart(new_source);
-    const eof = pp.preprocessExtra(new_source) catch |er| switch (er) {
-        error.StopPreprocessing => {
-            for (pp.expansion_entries.items(.locs)[token_state.expansion_entries_len..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa);
-            pp.restoreTokenState(token_state);
-            return;
-        },
-        else => |e| return e,
+pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location {
+    const S = struct {
+        fn order_token_index(context: void, lhs: Tree.TokenIndex, rhs: Tree.TokenIndex) std.math.Order {
+            _ = context;
+            return std.math.order(lhs, rhs);
+        }
     };
-    try eof.checkMsEof(new_source, pp.comp);
-    if (pp.preserve_whitespace and pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) {
-        try pp.addToken(.{ .id = .nl, .loc = .{
-            .id = tokenizer.source,
-            .line = tokenizer.line,
-        } });
-    }
-    if (pp.linemarkers == .none) return;
-    var next = first;
-    while (true) {
-        var tmp = tokenizer.*;
-        next = tmp.nextNoWS();
-        if (next.id != .nl) break;
-        tokenizer.* = tmp;
-    }
-    try pp.addIncludeResume(next.source, next.end, next.line);
-}
-
-/// tokens that are part of a pragma directive can happen in 3 ways:
-///     1. directly in the text via `#pragma ...`
-///     2. Via a string literal argument to `_Pragma`
-///     3. Via a stringified macro argument which is used as an argument to `_Pragma`
-/// operator_loc: Location of `_Pragma`; null if this is from #pragma
-/// arg_locs: expansion locations of the argument to _Pragma. empty if #pragma or a raw string literal was used
-fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !TokenWithExpansionLocs {
-    var tok = tokFromRaw(raw);
-    if (operator_loc) |loc| {
-        try tok.addExpansionLocation(pp.gpa, &.{loc});
-    }
-    try tok.addExpansionLocation(pp.gpa, arg_locs);
-    return tok;
+
+    const indices = pp.expansion_entries.items(.idx);
+    const idx = std.sort.binarySearch(Tree.TokenIndex, tok, indices, {}, S.order_token_index) orelse return &.{};
+    const locs = pp.expansion_entries.items(.locs)[idx];
+    var i: usize = 0;
+    while (locs[i].id != .unused) : (i += 1) {}
+    return locs[0..i];
 }
 
-pub fn addToken(pp: *Preprocessor, tok: TokenWithExpansionLocs) !void {
+pub fn addToken(pp: *Preprocessor, tok: PreprocessorToken) !void {
     if (tok.expansion_locs) |expansion_locs| {
         try pp.expansion_entries.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
     }
-    try pp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc });
+    try pp.tokens.append(pp.gpa, tok.toTreeToken());
 }
 
-pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: TokenWithExpansionLocs) void {
+pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: PreprocessorToken) void {
     if (tok.expansion_locs) |expansion_locs| {
         pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
     }
-    pp.tokens.appendAssumeCapacity(.{ .id = tok.id, .loc = tok.loc });
+    pp.tokens.appendAssumeCapacity(tok.toTreeToken());
 }
 
 pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
@@ -3093,466 +1883,152 @@ pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
     try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, capacity);
 }
 
-/// Handle a pragma directive
-fn pragma(pp: *Preprocessor, tokenizer: *Tokenizer, pragma_tok: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !void {
-    const name_tok = tokenizer.nextNoWS();
-    if (name_tok.id == .nl or name_tok.id == .eof) return;
-
-    const name = pp.tokSlice(name_tok);
-    try pp.addToken(try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs));
-    const pragma_start: u32 = @intCast(pp.tokens.len);
-
-    const pragma_name_tok = try pp.makePragmaToken(name_tok, operator_loc, arg_locs);
-    try pp.addToken(pragma_name_tok);
-    while (true) {
-        const next_tok = tokenizer.next();
-        if (next_tok.id == .whitespace) continue;
-        if (next_tok.id == .eof) {
-            try pp.addToken(.{
-                .id = .nl,
-                .loc = .{ .id = .generated },
-            });
-            break;
-        }
-        try pp.addToken(try pp.makePragmaToken(next_tok, operator_loc, arg_locs));
-        if (next_tok.id == .nl) break;
-    }
-    if (pp.comp.getPragma(name)) |prag| unknown: {
-        return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) {
-            error.UnknownPragma => break :unknown,
-            else => |e| return e,
-        };
-    }
-    return pp.comp.addDiagnostic(.{
-        .tag = .unknown_pragma,
-        .loc = pragma_name_tok.loc,
-    }, pragma_name_tok.expansionSlice());
-}
-
-fn findIncludeFilenameToken(
+fn skip(
     pp: *Preprocessor,
-    first_token: RawToken,
-    tokenizer: *Tokenizer,
-    trailing_token_behavior: enum { ignore_trailing_tokens, expect_nl_eof },
-) !TokenWithExpansionLocs {
-    var first = first_token;
-
-    if (first.id == .angle_bracket_left) to_end: {
-        // The tokenizer does not handle <foo> include strings so do it here.
-        while (tokenizer.index < tokenizer.buf.len) : (tokenizer.index += 1) {
-            switch (tokenizer.buf[tokenizer.index]) {
-                '>' => {
-                    tokenizer.index += 1;
-                    first.end = tokenizer.index;
-                    first.id = .macro_string;
-                    break :to_end;
+    cont: enum { until_else, until_endif, until_endif_seen_else },
+) Error!void {
+    var ifs_seen: u32 = 0;
+    var line_start = true;
+    var tokenizer = &pp.tokenizers.items[pp.tokenizers.items.len - 1];
+
+    while (tokenizer.index < tokenizer.buf.len) {
+        if (line_start) {
+            const saved_tokenizer = tokenizer.*;
+            const hash = tokenizer.nextNoWS();
+            if (hash.id == .nl) continue;
+            line_start = false;
+            if (hash.id != .hash) continue;
+            const directive = tokenizer.nextNoWS();
+            switch (directive.id) {
+                .keyword_else => {
+                    if (ifs_seen != 0) continue;
+                    if (cont == .until_endif_seen_else) {
+                        // try pp.err(directive, .else_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_elif => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    if (cont == .until_endif_seen_else) {
+                        // try pp.err(directive, .elif_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_elifdef => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    if (cont == .until_endif_seen_else) {
+                        // try pp.err(directive, .elifdef_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_elifndef => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    if (cont == .until_endif_seen_else) {
+                        // try pp.err(directive, .elifndef_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_endif => {
+                    if (ifs_seen == 0) {
+                        tokenizer.* = saved_tokenizer;
+                        return;
+                    }
+                    ifs_seen -= 1;
                 },
-                '\n' => break,
+                .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1,
                 else => {},
             }
-        }
-        try pp.comp.addDiagnostic(.{
-            .tag = .header_str_closing,
-            .loc = .{ .id = first.source, .byte_offset = tokenizer.index, .line = first.line },
-        }, &.{});
-        try pp.err(first, .header_str_match);
-    }
-
-    const source_tok = tokFromRaw(first);
-    const filename_tok, const expanded_trailing = switch (source_tok.id) {
-        .string_literal, .macro_string => .{ source_tok, false },
-        else => expanded: {
-            // Try to expand if the argument is a macro.
-            pp.top_expansion_buf.items.len = 0;
-            defer for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
-            try pp.top_expansion_buf.append(source_tok);
-            pp.expansion_source_loc = source_tok.loc;
-
-            try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
-            var trailing_toks: []const TokenWithExpansionLocs = &.{};
-            const include_str = (try pp.reconstructIncludeString(pp.top_expansion_buf.items, &trailing_toks, tokFromRaw(first))) orelse {
-                try pp.expectNl(tokenizer);
-                return error.InvalidInclude;
-            };
-            const start = pp.comp.generated_buf.items.len;
-            try pp.comp.generated_buf.appendSlice(pp.gpa, include_str);
-
-            break :expanded .{ try pp.makeGeneratedToken(start, switch (include_str[0]) {
-                '"' => .string_literal,
-                '<' => .macro_string,
-                else => unreachable,
-            }, pp.top_expansion_buf.items[0]), trailing_toks.len != 0 };
-        },
-    };
-
-    switch (trailing_token_behavior) {
-        .expect_nl_eof => {
-            // Error on extra tokens.
-            const nl = tokenizer.nextNoWS();
-            if ((nl.id != .nl and nl.id != .eof) or expanded_trailing) {
-                skipToNl(tokenizer);
-                try pp.comp.diagnostics.addExtra(pp.comp.langopts, .{
-                    .tag = .extra_tokens_directive_end,
-                    .loc = filename_tok.loc,
-                }, filename_tok.expansionSlice(), false);
+        } else if (tokenizer.buf[tokenizer.index] == '\n') {
+            line_start = true;
+            tokenizer.index += 1;
+            tokenizer.line += 1;
+            tokenizer.bol = true;
+            if (pp.preserve_whitespace) {
+                try pp.addToken(.{ .id = .nl, .loc = .{
+                    .id = tokenizer.source,
+                    .line = tokenizer.line,
+                } });
             }
-        },
-        .ignore_trailing_tokens => if (expanded_trailing) {
-            try pp.comp.diagnostics.addExtra(pp.comp.langopts, .{
-                .tag = .extra_tokens_directive_end,
-                .loc = filename_tok.loc,
-            }, filename_tok.expansionSlice(), false);
-        },
+        } else {
+            line_start = false;
+            tokenizer.index += 1;
+        }
+    } else {
+        return pp.errTok(.{ .id = .eof, .loc = .{ .id = tokenizer.source, .byte_offset = tokenizer.index, .line = tokenizer.line } }, .unterminated_conditional_directive);
     }
-    return filename_tok;
 }
 
-fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken, which: Compilation.WhichInclude) !Source {
-    const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);
-    defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);
-
-    // Check for empty filename.
-    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
-    if (tok_slice.len < 3) {
-        try pp.err(first, .empty_filename);
-        return error.InvalidInclude;
-    }
-
-    // Find the file.
-    const filename = tok_slice[1 .. tok_slice.len - 1];
-    const include_type: Compilation.IncludeType = switch (filename_tok.id) {
-        .string_literal => .quotes,
-        .macro_string => .angle_brackets,
-        else => unreachable,
-    };
+fn verboseLog(pp: *Preprocessor, tok: PreprocessorToken, comptime fmt: []const u8, args: anytype) void {
+    const source = pp.comp.getSource(tok.loc.id);
+    const line_col = source.lineCol(tok.loc);
 
-    return (try pp.comp.findInclude(filename, first, include_type, which)) orelse
-        return pp.fatalNotFound(filename_tok, filename);
+    const stderr = std.io.getStdErr().writer();
+    var buf_writer = std.io.bufferedWriter(stderr);
+    const writer = buf_writer.writer();
+    defer buf_writer.flush() catch {};
+    writer.print("{s}:{d}:{d}: ", .{ source.path, line_col.line_no, line_col.col }) catch return;
+    writer.print(fmt, args) catch return;
+    writer.writeByte('\n') catch return;
+    writer.writeAll(line_col.line) catch return;
+    writer.writeByte('\n') catch return;
 }
 
-fn printLinemarker(
-    pp: *Preprocessor,
-    w: anytype,
-    line_no: u32,
-    source: Source,
-    start_resume: enum(u8) { start, @"resume", none },
-) !void {
-    try w.writeByte('#');
-    if (pp.linemarkers == .line_directives) try w.writeAll("line");
-    try w.print(" {d} \"", .{line_no});
-    for (source.path) |byte| switch (byte) {
-        '\n' => try w.writeAll("\\n"),
-        '\r' => try w.writeAll("\\r"),
-        '\t' => try w.writeAll("\\t"),
-        '\\' => try w.writeAll("\\\\"),
-        '"' => try w.writeAll("\\\""),
-        ' ', '!', '#'...'&', '('...'[', ']'...'~' => try w.writeByte(byte),
-        // Use hex escapes for any non-ASCII/unprintable characters.
-        // This ensures that the parsed version of this string will end up
-        // containing the same bytes as the input regardless of encoding.
-        else => {
-            try w.writeAll("\\x");
-            try std.fmt.formatInt(byte, 16, .lower, .{ .width = 2, .fill = '0' }, w);
-        },
-    };
-    try w.writeByte('"');
-    if (pp.linemarkers == .numeric_directives) {
-        switch (start_resume) {
-            .none => {},
-            .start => try w.writeAll(" 1"),
-            .@"resume" => try w.writeAll(" 2"),
-        }
-        switch (source.kind) {
-            .user => {},
-            .system => try w.writeAll(" 3"),
-            .extern_c_system => try w.writeAll(" 3 4"),
-        }
-    }
-    try w.writeByte('\n');
+fn fatal(pp: *Preprocessor, tok: PreprocessorToken, comptime fmt: []const u8, args: anytype) Compilation.Error {
+    try pp.comp.diagnostics.list.append(pp.gpa, .{
+        .tag = .cli_error,
+        .kind = .@"fatal error",
+        .extra = .{ .str = try std.fmt.allocPrint(pp.comp.diagnostics.arena.allocator(), fmt, args) },
+        .loc = tok.loc,
+    });
+    return error.FatalError;
 }
 
-// After how many empty lines are needed to replace them with linemarkers.
-const collapse_newlines = 8;
-
-/// Pretty print tokens and try to preserve whitespace.
-pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
-    const tok_ids = pp.tokens.items(.id);
-
-    var i: u32 = 0;
-    var last_nl = true;
-    outer: while (true) : (i += 1) {
-        var cur: Token = pp.tokens.get(i);
-        switch (cur.id) {
-            .eof => {
-                if (!last_nl) try w.writeByte('\n');
-                return;
-            },
-            .nl => {
-                var newlines: u32 = 0;
-                for (tok_ids[i..], i..) |id, j| {
-                    if (id == .nl) {
-                        newlines += 1;
-                    } else if (id == .eof) {
-                        if (!last_nl) try w.writeByte('\n');
-                        return;
-                    } else if (id != .whitespace) {
-                        if (pp.linemarkers == .none) {
-                            if (newlines < 2) break;
-                        } else if (newlines < collapse_newlines) {
-                            break;
-                        }
-
-                        i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace));
-                        if (!last_nl) try w.writeAll("\n");
-                        if (pp.linemarkers != .none) {
-                            const next = pp.tokens.get(i);
-                            const source = pp.comp.getSource(next.loc.id);
-                            const line_col = source.lineCol(next.loc);
-                            try pp.printLinemarker(w, line_col.line_no, source, .none);
-                            last_nl = true;
-                        }
-                        continue :outer;
-                    }
-                }
-                last_nl = true;
-                try w.writeAll("\n");
-            },
-            .keyword_pragma => {
-                const pragma_name = pp.expandedSlice(pp.tokens.get(i + 1));
-                const end_idx = mem.indexOfScalarPos(Token.Id, tok_ids, i, .nl) orelse i + 1;
-                const pragma_len = @as(u32, @intCast(end_idx)) - i;
-
-                if (pp.comp.getPragma(pragma_name)) |prag| {
-                    if (!prag.shouldPreserveTokens(pp, i + 1)) {
-                        try w.writeByte('\n');
-                        i += pragma_len;
-                        cur = pp.tokens.get(i);
-                        continue;
-                    }
-                }
-                try w.writeAll("#pragma");
-                i += 1;
-                while (true) : (i += 1) {
-                    cur = pp.tokens.get(i);
-                    if (cur.id == .nl) {
-                        try w.writeByte('\n');
-                        last_nl = true;
-                        break;
-                    }
-                    try w.writeByte(' ');
-                    const slice = pp.expandedSlice(cur);
-                    try w.writeAll(slice);
-                }
-            },
-            .whitespace => {
-                var slice = pp.expandedSlice(cur);
-                while (mem.indexOfScalar(u8, slice, '\n')) |some| {
-                    if (pp.linemarkers != .none) try w.writeByte('\n');
-                    slice = slice[some + 1 ..];
-                }
-                for (slice) |_| try w.writeByte(' ');
-                last_nl = false;
-            },
-            .include_start => {
-                const source = pp.comp.getSource(cur.loc.id);
+fn fatalNotFound(pp: *Preprocessor, tok: PreprocessorToken, filename: []const u8) Compilation.Error {
+    const old = pp.comp.diagnostics.fatal_errors;
+    pp.comp.diagnostics.fatal_errors = true;
+    defer pp.comp.diagnostics.fatal_errors = old;
 
-                try pp.printLinemarker(w, 1, source, .start);
-                last_nl = true;
-            },
-            .include_resume => {
-                const source = pp.comp.getSource(cur.loc.id);
-                const line_col = source.lineCol(cur.loc);
-                if (!last_nl) try w.writeAll("\n");
+    try pp.comp.diagnostics.addExtra(pp.comp.langopts, .{ .tag = .cli_error, .loc = tok.loc, .extra = .{
+        .str = try std.fmt.allocPrint(pp.comp.diagnostics.arena.allocator(), "'{s}' not found", .{filename}),
+    } }, tok.expansionSlice(), false);
+    unreachable; // addExtra should've returned FatalError
+}
 
-                try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
-                last_nl = true;
-            },
-            else => {
-                const slice = pp.expandedSlice(cur);
-                try w.writeAll(slice);
-                last_nl = false;
-            },
-        }
+/// Consume next token, error if it is not an identifier.
+fn expectMacroName(pp: *Preprocessor) Error!?[]const u8 {
+    const macro_name = pp.getToken();
+    if (!macro_name.id.isMacroIdentifier()) {
+        try pp.errTok(macro_name, .macro_name_missing);
+        pp.skipToNl();
+        return null;
     }
+    return pp.tokSlice(macro_name);
 }
 
-test "Preserve pragma tokens sometimes" {
-    const allocator = std.testing.allocator;
-    const Test = struct {
-        fn runPreprocessor(source_text: []const u8) ![]const u8 {
-            var buf = std.ArrayList(u8).init(allocator);
-            defer buf.deinit();
-
-            var comp = Compilation.init(allocator);
-            defer comp.deinit();
-
-            try comp.addDefaultPragmaHandlers();
-
-            var pp = Preprocessor.init(&comp);
-            defer pp.deinit();
-
-            pp.preserve_whitespace = true;
-            assert(pp.linemarkers == .none);
-
-            const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
-            const eof = try pp.preprocess(test_runner_macros);
-            try pp.addToken(eof);
-            try pp.prettyPrintTokens(buf.writer());
-            return allocator.dupe(u8, buf.items);
-        }
-
-        fn check(source_text: []const u8, expected: []const u8) !void {
-            const output = try runPreprocessor(source_text);
-            defer allocator.free(output);
-
-            try std.testing.expectEqualStrings(expected, output);
-        }
-    };
-    const preserve_gcc_diagnostic =
-        \\#pragma GCC diagnostic error "-Wnewline-eof"
-        \\#pragma GCC warning error "-Wnewline-eof"
-        \\int x;
-        \\#pragma GCC ignored error "-Wnewline-eof"
-        \\
-    ;
-    try Test.check(preserve_gcc_diagnostic, preserve_gcc_diagnostic);
-
-    const omit_once =
-        \\#pragma once
-        \\int x;
-        \\#pragma once
-        \\
-    ;
-    // TODO should only be one newline afterwards when emulating clang
-    try Test.check(omit_once, "\nint x;\n\n");
-
-    const omit_poison =
-        \\#pragma GCC poison foobar
-        \\
-    ;
-    try Test.check(omit_poison, "\n");
-}
-
-test "destringify" {
-    const allocator = std.testing.allocator;
-    const Test = struct {
-        fn testDestringify(pp: *Preprocessor, stringified: []const u8, destringified: []const u8) !void {
-            pp.char_buf.clearRetainingCapacity();
-            try pp.char_buf.ensureUnusedCapacity(stringified.len);
-            pp.destringify(stringified);
-            try std.testing.expectEqualStrings(destringified, pp.char_buf.items);
-        }
-    };
-    var comp = Compilation.init(allocator);
-    defer comp.deinit();
-    var pp = Preprocessor.init(&comp);
-    defer pp.deinit();
-
-    try Test.testDestringify(&pp, "hello\tworld\n", "hello\tworld\n");
-    try Test.testDestringify(&pp,
-        \\ \"FOO BAR BAZ\"
-    ,
-        \\ "FOO BAR BAZ"
-    );
-    try Test.testDestringify(&pp,
-        \\ \\t\\n
-        \\
-    ,
-        \\ \t\n
-        \\
-    );
-}
-
-test "Include guards" {
-    const Test = struct {
-        /// This is here so that when #elifdef / #elifndef are added we don't forget
-        /// to test that they don't accidentally break include guard detection
-        fn pairsWithIfndef(tok_id: RawToken.Id) bool {
-            return switch (tok_id) {
-                .keyword_elif,
-                .keyword_elifdef,
-                .keyword_elifndef,
-                .keyword_else,
-                => true,
-
-                .keyword_include,
-                .keyword_include_next,
-                .keyword_embed,
-                .keyword_define,
-                .keyword_defined,
-                .keyword_undef,
-                .keyword_ifdef,
-                .keyword_ifndef,
-                .keyword_error,
-                .keyword_warning,
-                .keyword_pragma,
-                .keyword_line,
-                .keyword_endif,
-                => false,
-                else => unreachable,
-            };
-        }
-
-        fn skippable(tok_id: RawToken.Id) bool {
-            return switch (tok_id) {
-                .keyword_defined, .keyword_va_args, .keyword_va_opt, .keyword_endif => true,
-                else => false,
-            };
-        }
-
-        fn testIncludeGuard(allocator: std.mem.Allocator, comptime template: []const u8, tok_id: RawToken.Id, expected_guards: u32) !void {
-            var comp = Compilation.init(allocator);
-            defer comp.deinit();
-            var pp = Preprocessor.init(&comp);
-            defer pp.deinit();
-
-            const path = try std.fs.path.join(allocator, &.{ ".", "bar.h" });
-            defer allocator.free(path);
-
-            _ = try comp.addSourceFromBuffer(path, "int bar = 5;\n");
-
-            var buf = std.ArrayList(u8).init(allocator);
-            defer buf.deinit();
-
-            var writer = buf.writer();
-            switch (tok_id) {
-                .keyword_include, .keyword_include_next => try writer.print(template, .{ tok_id.lexeme().?, " \"bar.h\"" }),
-                .keyword_define, .keyword_undef => try writer.print(template, .{ tok_id.lexeme().?, " BAR" }),
-                .keyword_ifndef,
-                .keyword_ifdef,
-                .keyword_elifdef,
-                .keyword_elifndef,
-                => try writer.print(template, .{ tok_id.lexeme().?, " BAR\n#endif" }),
-                else => try writer.print(template, .{ tok_id.lexeme().?, "" }),
-            }
-            const source = try comp.addSourceFromBuffer("test.h", buf.items);
-            _ = try pp.preprocess(source);
-
-            try std.testing.expectEqual(expected_guards, pp.include_guards.count());
-        }
+/// Return the name of the #ifndef guard macro that starts a source, if any.
+/// If a source starts with `#ifndef IDENTIFIER`, return `IDENTIFIER`
+/// This function does not validate that the entire source is guarded by the
+/// initial ifndef, if any
+fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
+    var tokenizer = Tokenizer{
+        .buf = source.buf,
+        .langopts = pp.comp.langopts,
+        .source = source.id,
     };
-    const tags = std.meta.tags(RawToken.Id);
-    for (tags) |tag| {
-        if (Test.skippable(tag)) continue;
-        var copy = tag;
-        copy.simplifyMacroKeyword();
-        if (copy != tag or tag == .keyword_else) {
-            const inside_ifndef_template =
-                \\//Leading comment (should be ignored)
-                \\
-                \\#ifndef FOO
-                \\#{s}{s}
-                \\#endif
-            ;
-            const expected_guards: u32 = if (Test.pairsWithIfndef(tag)) 0 else 1;
-            try Test.testIncludeGuard(std.testing.allocator, inside_ifndef_template, tag, expected_guards);
-
-            const outside_ifndef_template =
-                \\#ifndef FOO
-                \\#endif
-                \\#{s}{s}
-            ;
-            try Test.testIncludeGuard(std.testing.allocator, outside_ifndef_template, tag, 0);
-        }
-    }
+    var hash = tokenizer.nextNoWS();
+    while (hash.id == .nl) hash = tokenizer.nextNoWS();
+    if (hash.id != .hash) return null;
+    const ifndef = tokenizer.nextNoWS();
+    if (ifndef.id != .keyword_ifndef) return null;
+    const guard = tokenizer.nextNoWS();
+    if (guard.id != .identifier) return null;
+    return pp.tokSlice(.{ .id = guard.id, .loc = .{ .id = guard.source, .byte_offset = guard.start, .line = guard.line } });
 }
diff --git a/src/aro/Tree.zig b/src/aro/Tree.zig
index f176930a..a58f42ea 100644
--- a/src/aro/Tree.zig
+++ b/src/aro/Tree.zig
@@ -6,11 +6,19 @@ const Compilation = @import("Compilation.zig");
 const number_affixes = @import("Tree/number_affixes.zig");
 const Source = @import("Source.zig");
 const Tokenizer = @import("Tokenizer.zig");
+const Treap = @import("Treap.zig");
 const Type = @import("Type.zig");
 const Value = @import("Value.zig");
 const StringInterner = @import("StringInterner.zig");
 
+const Flags = packed struct(u8) {
+    is_bol: bool = false,
+    space: bool = false,
+    _: u6 = undefined,
+};
+
 pub const Token = struct {
+    flags: Flags,
     id: Id,
     loc: Source.Location,
 
@@ -21,25 +29,36 @@ pub const Token = struct {
 };
 
 pub const TokenWithExpansionLocs = struct {
-    id: Token.Id,
-    flags: packed struct {
-        expansion_disabled: bool = false,
-        is_macro_arg: bool = false,
-    } = .{},
-    /// This location contains the actual token slice which might be generated.
-    /// If it is generated then there is guaranteed to be at least one
-    /// expansion location.
+    const Self = @This();
+
+    flags: Flags = .{},
+    id: Tokenizer.Token.Id,
+    hideset: Treap.Node = null,
     loc: Source.Location,
     expansion_locs: ?[*]Source.Location = null,
 
-    pub fn expansionSlice(tok: TokenWithExpansionLocs) []const Source.Location {
+    pub fn toTreeToken(self: Self) Token {
+        return .{ .flags = self.flags, .id = self.id, .loc = self.loc };
+    }
+
+    pub fn argPosition(self: Self) u32 {
+        std.debug.assert(self.id == .macro_param);
+        return self.loc.byte_offset;
+    }
+
+    pub fn isVarArg(self: Self) bool {
+        std.debug.assert(self.id == .macro_param);
+        return self.loc.line != 0;
+    }
+
+    pub fn expansionSlice(tok: Self) []const Source.Location {
         const locs = tok.expansion_locs orelse return &[0]Source.Location{};
         var i: usize = 0;
         while (locs[i].id != .unused) : (i += 1) {}
         return locs[0..i];
     }
 
-    pub fn addExpansionLocation(tok: *TokenWithExpansionLocs, gpa: std.mem.Allocator, new: []const Source.Location) !void {
+    pub fn addExpansionLocation(tok: *Self, gpa: std.mem.Allocator, new: []const Source.Location) !void {
         if (new.len == 0 or tok.id == .whitespace or tok.id == .macro_ws or tok.id == .placemarker) return;
         var list = std.ArrayList(Source.Location).init(gpa);
         defer {
@@ -80,14 +99,14 @@ pub const TokenWithExpansionLocs = struct {
         gpa.free(locs[0 .. i + 1]);
     }
 
-    pub fn dupe(tok: TokenWithExpansionLocs, gpa: std.mem.Allocator) !TokenWithExpansionLocs {
+    pub fn dupe(tok: Self, gpa: std.mem.Allocator) !Self {
         var copy = tok;
         copy.expansion_locs = null;
         try copy.addExpansionLocation(gpa, tok.expansionSlice());
         return copy;
     }
 
-    pub fn checkMsEof(tok: TokenWithExpansionLocs, source: Source, comp: *Compilation) !void {
+    pub fn checkMsEof(tok: Self, source: Source, comp: *Compilation) !void {
         std.debug.assert(tok.id == .eof);
         if (source.buf.len > tok.loc.byte_offset and source.buf[tok.loc.byte_offset] == 0x1A) {
             try comp.addDiagnostic(.{
@@ -100,6 +119,9 @@ pub const TokenWithExpansionLocs = struct {
             }, &.{});
         }
     }
+
+    pub const one: Self = .{ .id = .one, .loc = .{} };
+    pub const zero: Self = .{ .id = .zero, .loc = .{} };
 };
 
 pub const TokenIndex = u32;
diff --git a/src/aro/pragmas/gcc.zig b/src/aro/pragmas/gcc.zig
index 91ab750b..8887b632 100644
--- a/src/aro/pragmas/gcc.zig
+++ b/src/aro/pragmas/gcc.zig
@@ -69,7 +69,7 @@ fn diagnosticHandler(self: *GCC, pp: *Preprocessor, start_idx: TokenIndex) Pragm
     const diagnostic_tok = pp.tokens.get(start_idx);
     if (diagnostic_tok.id == .nl) return;
 
-    const diagnostic = std.meta.stringToEnum(Directive.Diagnostics, pp.expandedSlice(diagnostic_tok)) orelse
+    const diagnostic = std.meta.stringToEnum(Directive.Diagnostics, pp.tokSlice(diagnostic_tok)) orelse
         return error.UnknownPragma;
 
     switch (diagnostic) {
@@ -112,7 +112,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex
     const directive_tok = pp.tokens.get(start_idx + 1);
     if (directive_tok.id == .nl) return;
 
-    const gcc_pragma = std.meta.stringToEnum(Directive, pp.expandedSlice(directive_tok)) orelse
+    const gcc_pragma = std.meta.stringToEnum(Directive, pp.tokSlice(directive_tok)) orelse
         return pp.comp.addDiagnostic(.{
         .tag = .unknown_gcc_pragma,
         .loc = directive_tok.loc,
@@ -159,7 +159,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex
                         .loc = tok.loc,
                     }, pp.expansionSlice(start_idx + i));
                 }
-                const str = pp.expandedSlice(tok);
+                const str = pp.tokSlice(tok);
                 if (pp.defines.get(str) != null) {
                     try pp.comp.addDiagnostic(.{
                         .tag = .pragma_poison_macro,
@@ -177,7 +177,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation
     var self: *GCC = @fieldParentPtr("pragma", pragma);
     const directive_tok = p.pp.tokens.get(start_idx + 1);
     if (directive_tok.id == .nl) return;
-    const name = p.pp.expandedSlice(directive_tok);
+    const name = p.pp.tokSlice(directive_tok);
     if (mem.eql(u8, name, "diagnostic")) {
         return self.diagnosticHandler(p.pp, start_idx + 2) catch |err| switch (err) {
             error.UnknownPragma => {}, // handled during preprocessing
@@ -190,7 +190,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation
 fn preserveTokens(_: *Pragma, pp: *Preprocessor, start_idx: TokenIndex) bool {
     const next = pp.tokens.get(start_idx + 1);
     if (next.id != .nl) {
-        const name = pp.expandedSlice(next);
+        const name = pp.tokSlice(next);
         if (mem.eql(u8, name, "poison")) {
             return false;
         }
diff --git a/test/runner.zig b/test/runner.zig
index a1d6cbf7..8e8a0fff 100644
--- a/test/runner.zig
+++ b/test/runner.zig
@@ -239,7 +239,7 @@ pub fn main() !void {
         try pp.addToken(eof);
 
         if (pp.defines.get("TESTS_SKIPPED")) |macro| {
-            if (macro.is_func or macro.tokens.len != 1 or macro.tokens[0].id != .pp_num) {
+            if (macro.kind == .func or macro.tokens.len != 1 or macro.tokens[0].id != .pp_num) {
                 fail_count += 1;
                 std.debug.print("invalid TESTS_SKIPPED, definition should contain exactly one integer literal {}\n", .{macro});
                 continue;
@@ -380,7 +380,7 @@ pub fn main() !void {
         if (pp.defines.get("EXPECTED_OUTPUT")) |macro| blk: {
             if (comp.diagnostics.errors != 0) break :blk;
 
-            if (macro.is_func) {
+            if (macro.kind == .func) {
                 fail_count += 1;
                 std.debug.print("invalid EXPECTED_OUTPUT {}\n", .{macro});
                 continue;
@@ -470,7 +470,7 @@ fn checkExpectedErrors(pp: *aro.Preprocessor, buf: *std.ArrayList(u8)) !?bool {
     defer m.deinit();
     aro.Diagnostics.renderMessages(pp.comp, &m);
 
-    if (macro.is_func) {
+    if (macro.kind == .func) {
         std.debug.print("invalid EXPECTED_ERRORS {}\n", .{macro});
         return false;
     }

From efb35a1a1c777887cb0f223c2627a939f27159f5 Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Thu, 18 Jul 2024 14:49:57 -0700
Subject: [PATCH 04/10] Preprocessor: basic pragma directive support

---
 src/aro/Preprocessor.zig | 80 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 72 insertions(+), 8 deletions(-)

diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig
index 33df74c8..d0eae66c 100644
--- a/src/aro/Preprocessor.zig
+++ b/src/aro/Preprocessor.zig
@@ -1309,9 +1309,39 @@ fn readLine(pp: *Preprocessor) Error!void {
     try pp.expectNewline();
 }
 
-fn readPragma(pp: *Preprocessor) Error!void {
-    // TODO
-    pp.skipToNl();
+fn readPragma(pp: *Preprocessor, pragma_tok: PreprocessorToken) Error!void {
+    const name_tok = pp.getToken();
+    if (name_tok.id == .nl or name_tok.id == .eof) return;
+
+    try pp.addToken(pragma_tok);
+
+    const pragma_start: u32 = @intCast(pp.tokens.len);
+    try pp.addToken(name_tok);
+
+    while (true) {
+        const next_tok = pp.getToken();
+        if (next_tok.id == .eof) {
+            try pp.addToken(.{
+                .id = .nl,
+                .loc = .{ .id = .generated },
+            });
+            break;
+        }
+        try pp.addToken(next_tok);
+        if (next_tok.id == .nl) break;
+    }
+    const name = pp.tokSlice(name_tok);
+    if (pp.comp.getPragma(name)) |prag| unknown: {
+        return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) {
+            error.UnknownPragma => break :unknown,
+            error.StopPreprocessing => {
+                _ = pp.tokenizers.pop();
+                return;
+            },
+            else => |e| return e,
+        };
+    }
+    return pp.errTok(name_tok, .unknown_pragma);
 }
 
 fn readUndef(pp: *Preprocessor) Error!void {
@@ -1367,6 +1397,7 @@ fn readIncludeExtra(pp: *Preprocessor, include_token: PreprocessorToken, which:
         }, &.{});
         return error.FatalError;
     }
+    pp.preprocess_count += 1;
     try pp.tokenizers.append(pp.gpa, .{
         .buf = source.buf,
         .langopts = pp.comp.langopts,
@@ -1719,7 +1750,7 @@ fn readDirective(pp: *Preprocessor) Error!void {
         .keyword_include => try pp.readInclude(directive),
         .keyword_include_next => try pp.readIncludeNext(directive),
         .keyword_line => try pp.readLine(),
-        .keyword_pragma => try pp.readPragma(),
+        .keyword_pragma => try pp.readPragma(directive),
         .keyword_undef => try pp.readUndef(),
         .keyword_warning => try pp.readErrorMessage(directive, .warning_directive),
         .keyword_embed => try pp.readEmbed(directive),
@@ -1789,6 +1820,7 @@ pub fn preprocess(pp: *Preprocessor, source: Source) !PreprocessorToken {
     const guard = pp.findIncludeGuard(source);
     try pp.guard_stack.append(pp.gpa, guard);
 
+    pp.preprocess_count += 1;
     try pp.tokenizers.append(pp.gpa, .{
         .buf = source.buf,
         .langopts = pp.comp.langopts,
@@ -1820,11 +1852,43 @@ const collapse_newlines = 8;
 
 /// Pretty print tokens and try to preserve whitespace.
 pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
-    var i: usize = 0;
+    const tok_ids = pp.tokens.items(.id);
+    var i: u32 = 0;
+    var last_nl = false;
     while (i < pp.tokens.len) : (i += 1) {
-        const tok = pp.tokens.get(i);
-        if (tok.id == .eof) break;
-        try pp.prettyPrintToken(w, tok);
+        var cur: Token = pp.tokens.get(i);
+        switch (cur.id) {
+            .eof => break,
+            .keyword_pragma => {
+                const pragma_name = pp.tokSlice(pp.tokens.get(i + 1));
+                const end_idx = mem.indexOfScalarPos(Token.Id, tok_ids, i, .nl) orelse i + 1;
+                const pragma_len = @as(u32, @intCast(end_idx)) - i;
+
+                if (pp.comp.getPragma(pragma_name)) |prag| {
+                    if (!prag.shouldPreserveTokens(pp, i + 1)) {
+                        try w.writeByte('\n');
+                        i += pragma_len;
+                        cur = pp.tokens.get(i);
+                        continue;
+                    }
+                }
+                try w.writeAll("#pragma");
+                i += 1;
+                while (true) : (i += 1) {
+                    cur = pp.tokens.get(i);
+                    if (cur.id == .nl) {
+                        try w.writeByte('\n');
+                        last_nl = true;
+                        break;
+                    }
+                    try w.writeByte(' ');
+                    const slice = pp.tokSlice(cur);
+                    try w.writeAll(slice);
+                }
+
+            },
+            else => try pp.prettyPrintToken(w, cur),   
+        }
     }
     try w.writeByte('\n');
 }

From e8e87cf9649383ed42f76d328c1b7d516ad4e13f Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Thu, 18 Jul 2024 16:19:04 -0700
Subject: [PATCH 05/10] Preprocessor: allocate space before appending

---
 src/aro/Preprocessor.zig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig
index d0eae66c..3bdfd4d9 100644
--- a/src/aro/Preprocessor.zig
+++ b/src/aro/Preprocessor.zig
@@ -600,10 +600,10 @@ fn stringize(pp: *Preprocessor, tmpl: PreprocessorToken, args_range: MacroArg) !
     const args = args_range.slice(pp.macro_arg_tokens.items);
     for (args, 0..) |tok, i| {
         const slice = pp.tokSlice(tok);
-        if (slice.len > 0 and tok.flags.space and i != 0) {
-            try pp.comp.generated_buf.append(pp.gpa, ' ');
-        }
-        try pp.comp.generated_buf.appendSlice(pp.gpa, slice);
+        const needs_space = slice.len > 0 and tok.flags.space and i != 0;
+        const bytes_needed = slice.len + @intFromBool(needs_space);
+        try pp.comp.generated_buf.ensureUnusedCapacity(pp.gpa, bytes_needed);
+        pp.comp.generated_buf.appendSliceAssumeCapacity(pp.tokSlice(tok));
     }
     try pp.comp.generated_buf.append(pp.gpa, '"');
     var tok = tmpl;

From a4a66abef8c44d746daad2320903ad72dab1e513 Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Thu, 18 Jul 2024 21:12:45 -0700
Subject: [PATCH 06/10] Preprocessor: fix stringification

---
 src/aro/Preprocessor.zig | 69 ++++++++++++++++++++++++++++++++++------
 1 file changed, 59 insertions(+), 10 deletions(-)

diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig
index 3bdfd4d9..ce432006 100644
--- a/src/aro/Preprocessor.zig
+++ b/src/aro/Preprocessor.zig
@@ -595,17 +595,67 @@ fn addHideSet(pp: *Preprocessor, toks: []PreprocessorToken, hideset: Treap.Node)
 }
 
 fn stringize(pp: *Preprocessor, tmpl: PreprocessorToken, args_range: MacroArg) !PreprocessorToken {
+    const char_buf_top = pp.char_buf.items.len;
+    defer pp.char_buf.items.len = char_buf_top;
+
     const start = pp.comp.generated_buf.items.len;
-    try pp.comp.generated_buf.append(pp.gpa, '"');
+    try pp.char_buf.append(pp.gpa, '"');
     const args = args_range.slice(pp.macro_arg_tokens.items);
-    for (args, 0..) |tok, i| {
-        const slice = pp.tokSlice(tok);
-        const needs_space = slice.len > 0 and tok.flags.space and i != 0;
-        const bytes_needed = slice.len + @intFromBool(needs_space);
-        try pp.comp.generated_buf.ensureUnusedCapacity(pp.gpa, bytes_needed);
-        pp.comp.generated_buf.appendSliceAssumeCapacity(pp.tokSlice(tok));
+    for (args) |tok| {
+        if (tok.flags.space and pp.char_buf.items.len - 1 > char_buf_top) {
+            try pp.char_buf.append(pp.gpa, ' ');
+        }
+        // backslashes not inside strings are not escaped
+        const is_str = switch (tok.id) {
+            .string_literal,
+            .string_literal_utf_16,
+            .string_literal_utf_8,
+            .string_literal_utf_32,
+            .string_literal_wide,
+            .char_literal,
+            .char_literal_utf_16,
+            .char_literal_utf_32,
+            .char_literal_wide,
+            => true,
+            else => false,
+        };
+
+        for (pp.tokSlice(tok)) |c| {
+            if (c == '"')
+                try pp.char_buf.appendSlice(pp.gpa, "\\\"")
+            else if (c == '\\' and is_str)
+                try pp.char_buf.appendSlice(pp.gpa, "\\\\")
+            else
+                try pp.char_buf.append(pp.gpa, c);
+        }
     }
-    try pp.comp.generated_buf.append(pp.gpa, '"');
+    try pp.char_buf.ensureUnusedCapacity(pp.gpa, 2);
+    if (pp.char_buf.items[pp.char_buf.items.len - 1] != '\\') {
+        pp.char_buf.appendSliceAssumeCapacity("\"\n");
+    } else {
+        pp.char_buf.appendAssumeCapacity('"');
+        var tokenizer: Tokenizer = .{
+            .buf = pp.char_buf.items,
+            .index = 0,
+            .source = .generated,
+            .langopts = pp.comp.langopts,
+            .line = 0,
+        };
+        const item = tokenizer.next();
+        if (item.id == .unterminated_string_literal) {
+            const tok = args[args.len - 1];
+            try pp.comp.addDiagnostic(.{
+                .tag = .invalid_pp_stringify_escape,
+                .loc = tok.loc,
+            }, tok.expansionSlice());
+            pp.char_buf.items.len -= 2; // erase unpaired backslash and appended end quote
+            pp.char_buf.appendAssumeCapacity('"');
+        }
+        pp.char_buf.appendAssumeCapacity('\n');
+    }
+
+    try pp.comp.generated_buf.appendSlice(pp.gpa, pp.char_buf.items[char_buf_top..]);
+
     var tok = tmpl;
     tok.id = .string_literal;
     tok.loc = .{
@@ -1885,9 +1935,8 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
                     const slice = pp.tokSlice(cur);
                     try w.writeAll(slice);
                 }
-
             },
-            else => try pp.prettyPrintToken(w, cur),   
+            else => try pp.prettyPrintToken(w, cur),
         }
     }
     try w.writeByte('\n');

From e49dde70aba67c2940f385af588bd61539bc4376 Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Fri, 19 Jul 2024 14:26:00 -0700
Subject: [PATCH 07/10] Preprocessor: start working on expansion locations

---
 src/aro/Preprocessor.zig | 53 +++++++++++++++++-----------------
 src/aro/Tree.zig         | 61 ++++++++++++++++++++--------------------
 2 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig
index ce432006..6b0c8beb 100644
--- a/src/aro/Preprocessor.zig
+++ b/src/aro/Preprocessor.zig
@@ -221,14 +221,14 @@ fn handleCounterMacro(pp: *Preprocessor, tok: PreprocessorToken) Error!void {
 }
 
 fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: PreprocessorToken) !PreprocessorToken {
-    const pasted_token = PreprocessorToken{ .id = id, .flags = source.flags, .loc = .{
+    var pasted_token = PreprocessorToken{ .id = id, .flags = source.flags, .loc = .{
         .id = .generated,
         .byte_offset = @intCast(start),
         .line = pp.generated_line,
     } };
     pp.generated_line += 1;
-    // try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc});
-    // try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice());
+    try pasted_token.addExpansionLocation(pp.gpa, source.loc);
+    // try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice()); TODO
     return pasted_token;
 }
 
@@ -668,7 +668,6 @@ fn stringize(pp: *Preprocessor, tmpl: PreprocessorToken, args_range: MacroArg) !
 }
 
 fn subst(pp: *Preprocessor, macro: *const Macro, macro_tok: PreprocessorToken, args: MacroArgList, hideset_arg: Treap.Node) ![]PreprocessorToken {
-    _ = macro_tok;
     var hideset = hideset_arg;
     var r: TokenList = .{};
     defer r.deinit(pp.gpa);
@@ -728,6 +727,10 @@ fn subst(pp: *Preprocessor, macro: *const Macro, macro_tok: PreprocessorToken, a
         try r.append(pp.gpa, t0);
     }
     try pp.addHideSet(r.items, hideset);
+    for (r.items) |*tok| {
+        try tok.addExpansionLocation(pp.gpa, macro_tok.loc);
+        try tok.addExpansionLocationList(pp.gpa, macro_tok.loc_list);
+    }
     return r.toOwnedSlice(pp.gpa);
 }
 
@@ -838,6 +841,10 @@ fn readExpandNewline(pp: *Preprocessor) Error!PreprocessorToken {
             const new_hideset = try pp.treap.addNodeTo(tok.hideset, safe_name);
 
             const tokens = try pp.subst(macro, tok, MacroArgList.empty, new_hideset);
+            for (tokens) |*t| {
+                try t.addExpansionLocation(pp.gpa, tok.loc);
+                try t.addExpansionLocationList(pp.gpa, tok.loc_list);
+            }
             defer pp.gpa.free(tokens);
             pp.propagateSpace(tokens, tok);
             try pp.ungetAll(tokens);
@@ -1830,8 +1837,6 @@ fn readEmbed(pp: *Preprocessor, directive_tok: PreprocessorToken) Error!void {
 
     if (embed_bytes.len == 0) return;
 
-    try pp.ensureUnusedTokenCapacity(2 * embed_bytes.len - 1); // N bytes and N-1 commas
-
     // TODO: We currently only support systems with CHAR_BIT == 8
     // If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes
     // and correctly account for the target's endianness
@@ -1843,14 +1848,14 @@ fn readEmbed(pp: *Preprocessor, directive_tok: PreprocessorToken) Error!void {
         try writer.print("{d}", .{byte});
         var generated = try pp.makeGeneratedToken(start, .embed_byte, directive_tok);
         generated.flags.is_bol = true;
-        pp.addTokenAssumeCapacity(generated);
+        try pp.addToken(generated);
     }
 
     for (embed_bytes[1..]) |byte| {
         const start = pp.comp.generated_buf.items.len;
         try writer.print(",{d}", .{byte});
-        pp.addTokenAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
-        pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, directive_tok));
+        try pp.addToken(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
+        try pp.addToken(try pp.makeGeneratedToken(start + 1, .embed_byte, directive_tok));
     }
     try pp.comp.generated_buf.append(pp.gpa, '\n');
 }
@@ -1973,27 +1978,23 @@ pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location
 }
 
 pub fn addToken(pp: *Preprocessor, tok: PreprocessorToken) !void {
-    if (tok.expansion_locs) |expansion_locs| {
-        try pp.expansion_entries.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
-    }
-    try pp.tokens.append(pp.gpa, tok.toTreeToken());
-}
+    var r: std.ArrayListUnmanaged(Source.Location) = .{};
+    defer r.deinit(pp.gpa);
 
-pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: PreprocessorToken) void {
-    if (tok.expansion_locs) |expansion_locs| {
-        pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
+    var it = tok.loc_list.first;
+    while (it) |node| : (it = node.next) {
+        try r.append(pp.gpa, node.data);
     }
-    pp.tokens.appendAssumeCapacity(tok.toTreeToken());
-}
+    if (r.items.len > 0) {
+        // std.debug.print("gottem\n", .{});
+        try r.append(pp.gpa, .{ .id = .unused, .byte_offset = 1 });
+        try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, 1);
 
-pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
-    try pp.tokens.ensureTotalCapacity(pp.gpa, capacity);
-    try pp.expansion_entries.ensureTotalCapacity(pp.gpa, capacity);
-}
+        const items = try r.toOwnedSlice(pp.gpa); // TODO: reverse?
+        pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = items.ptr });
+    }
 
-pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
-    try pp.tokens.ensureUnusedCapacity(pp.gpa, capacity);
-    try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, capacity);
+    try pp.tokens.append(pp.gpa, tok.toTreeToken());
 }
 
 fn skip(
diff --git a/src/aro/Tree.zig b/src/aro/Tree.zig
index a58f42ea..5756d37d 100644
--- a/src/aro/Tree.zig
+++ b/src/aro/Tree.zig
@@ -28,6 +28,8 @@ pub const Token = struct {
     pub const NumberSuffix = number_affixes.Suffix;
 };
 
+const LocList = std.SinglyLinkedList(Source.Location);
+
 pub const TokenWithExpansionLocs = struct {
     const Self = @This();
 
@@ -36,6 +38,7 @@ pub const TokenWithExpansionLocs = struct {
     hideset: Treap.Node = null,
     loc: Source.Location,
     expansion_locs: ?[*]Source.Location = null,
+    loc_list: LocList = .{},
 
     pub fn toTreeToken(self: Self) Token {
         return .{ .flags = self.flags, .id = self.id, .loc = self.loc };
@@ -58,37 +61,35 @@ pub const TokenWithExpansionLocs = struct {
         return locs[0..i];
     }
 
-    pub fn addExpansionLocation(tok: *Self, gpa: std.mem.Allocator, new: []const Source.Location) !void {
-        if (new.len == 0 or tok.id == .whitespace or tok.id == .macro_ws or tok.id == .placemarker) return;
-        var list = std.ArrayList(Source.Location).init(gpa);
-        defer {
-            @memset(list.items.ptr[list.items.len..list.capacity], .{});
-            // Add a sentinel to indicate the end of the list since
-            // the ArrayList's capacity isn't guaranteed to be exactly
-            // what we ask for.
-            if (list.capacity > 0) {
-                list.items.ptr[list.capacity - 1].byte_offset = 1;
-            }
-            tok.expansion_locs = list.items.ptr;
-        }
-
-        if (tok.expansion_locs) |locs| {
-            var i: usize = 0;
-            while (locs[i].id != .unused) : (i += 1) {}
-            list.items = locs[0..i];
-            while (locs[i].byte_offset != 1) : (i += 1) {}
-            list.capacity = i + 1;
-        }
-
-        const min_len = @max(list.items.len + new.len + 1, 4);
-        const wanted_len = std.math.ceilPowerOfTwo(usize, min_len) catch
-            return error.OutOfMemory;
-        try list.ensureTotalCapacity(wanted_len);
+    pub fn addExpansionLocationList(tok: *Self, gpa: std.mem.Allocator, list: LocList) !void {
+        const first = tok.loc_list.first orelse return;
+        const new_list = list.first orelse return;
+        const end = first.findLast();
+        // end.insertAfter(new_list);
+        _ = end;
+        _ = new_list;
+        // _ = end;
+        // const last = tok.loc_list.first.?.findLast();
+        // _ = list;
+        // const last = first.findLast();
+        // last.insertAfter(first);
+        _ = gpa;
+        // _ = last;
+        // // var it = list.first;
+        // // while (it) |node| : (it = node.next) {
+        // //     // try r.append(pp.gpa, node.data);
+        // // }
+        // _ = tok;
+        // _ = gpa;
+    }
 
-        for (new) |new_loc| {
-            if (new_loc.id == .generated) continue;
-            list.appendAssumeCapacity(new_loc);
-        }
+    pub fn addExpansionLocation(tok: *Self, gpa: std.mem.Allocator, loc: Source.Location) !void {
+        _ = tok;
+        _ = gpa;
+        _ = loc;
+        // const node = try gpa.create(LocList.Node);
+        // node.* = .{ .data = loc };
+        // tok.loc_list.prepend(node);
     }
 
     pub fn free(expansion_locs: ?[*]Source.Location, gpa: std.mem.Allocator) void {

From 1f87ef0940d3efb9217414d643e9fa40fdeacaab Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Tue, 23 Jul 2024 23:07:39 -0700
Subject: [PATCH 08/10] Preprocessor: start fixing line markers

---
 src/aro/Preprocessor.zig | 121 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 120 insertions(+), 1 deletion(-)

diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig
index 6b0c8beb..372b3f3a 100644
--- a/src/aro/Preprocessor.zig
+++ b/src/aro/Preprocessor.zig
@@ -559,9 +559,12 @@ pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void
     assert(sources.len > 1);
     const first = sources[0];
 
+    try pp.addIncludeStart(first);
     for (sources[1..]) |header| {
+        try pp.addIncludeStart(header);
         _ = try pp.preprocess(header);
     }
+    try pp.addIncludeResume(first.id, 0, 1);
     const eof = try pp.preprocess(first);
     try pp.addToken(eof);
 }
@@ -1101,6 +1104,24 @@ fn makeMacroToken(position: usize, is_vararg: bool) PreprocessorToken {
     };
 }
 
+pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void {
+    if (pp.linemarkers == .none) return;
+    try pp.addToken(.{ .id = .include_start, .loc = .{
+        .id = source.id,
+        .byte_offset = std.math.maxInt(u32),
+        .line = 1,
+    } });
+}
+
+pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void {
+    if (pp.linemarkers == .none) return;
+    try pp.addToken(.{ .id = .include_resume, .loc = .{
+        .id = source,
+        .byte_offset = offset,
+        .line = line,
+    } });
+}
+
 fn next(pp: *Preprocessor, id: Tokenizer.Token.Id) !bool {
     const tok = pp.getToken();
     if (tok.id == id) return true;
@@ -1455,6 +1476,7 @@ fn readIncludeExtra(pp: *Preprocessor, include_token: PreprocessorToken, which:
         return error.FatalError;
     }
     pp.preprocess_count += 1;
+    try pp.addIncludeStart(source);
     try pp.tokenizers.append(pp.gpa, .{
         .buf = source.buf,
         .langopts = pp.comp.langopts,
@@ -1886,6 +1908,17 @@ pub fn preprocess(pp: *Preprocessor, source: Source) !PreprocessorToken {
         const tok = try pp.readToken();
         if (tok.id == .eof) {
             const tokenizer = pp.tokenizers.pop();
+
+            if (pp.tokenizers.items.len > 0) {
+                var next_tok: RawToken = undefined;
+                var tmp = pp.tokenizers.items[pp.tokenizers.items.len - 1];
+                while (true) {
+                    next_tok = tmp.nextNoWS();
+                    if (next_tok.id != .nl) break;
+                }
+                try pp.addIncludeResume(next_tok.source, next_tok.end, next_tok.line);
+            }
+
             const guard_name = pp.guard_stack.pop();
             if (guard_name) |name| {
                 try pp.include_guards.put(pp.gpa, tokenizer.source, name);
@@ -1910,10 +1943,40 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
     const tok_ids = pp.tokens.items(.id);
     var i: u32 = 0;
     var last_nl = false;
-    while (i < pp.tokens.len) : (i += 1) {
+    outer: while (i < pp.tokens.len) : (i += 1) {
         var cur: Token = pp.tokens.get(i);
         switch (cur.id) {
             .eof => break,
+            .nl => {
+                var newlines: u32 = 0;
+                for (tok_ids[i..], i..) |id, j| {
+                    if (id == .nl) {
+                        newlines += 1;
+                    } else if (id == .eof) {
+                        if (!last_nl) try w.writeByte('\n');
+                        return;
+                    } else if (id != .whitespace) {
+                        if (pp.linemarkers == .none) {
+                            if (newlines < 2) break;
+                        } else if (newlines < collapse_newlines) {
+                            break;
+                        }
+
+                        i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace));
+                        if (!last_nl) try w.writeAll("\n");
+                        if (pp.linemarkers != .none) {
+                            const next_tok = pp.tokens.get(i);
+                            const source = pp.comp.getSource(next_tok.loc.id);
+                            const line_col = source.lineCol(next_tok.loc);
+                            try pp.printLinemarker(w, line_col.line_no, source, .none);
+                            last_nl = true;
+                        }
+                        continue :outer;
+                    }
+                }
+                last_nl = true;
+                try w.writeAll("\n");
+            },
             .keyword_pragma => {
                 const pragma_name = pp.tokSlice(pp.tokens.get(i + 1));
                 const end_idx = mem.indexOfScalarPos(Token.Id, tok_ids, i, .nl) orelse i + 1;
@@ -1941,12 +2004,68 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
                     try w.writeAll(slice);
                 }
             },
+            .include_start => {
+                const source = pp.comp.getSource(cur.loc.id);
+
+                try pp.printLinemarker(w, 1, source, .start);
+                last_nl = true;
+            },
+            .include_resume => {
+                const source = pp.comp.getSource(cur.loc.id);
+                const line_col = source.lineCol(cur.loc);
+                if (!last_nl) try w.writeAll("\n");
+
+                try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
+                last_nl = true;
+            },            
             else => try pp.prettyPrintToken(w, cur),
         }
     }
     try w.writeByte('\n');
 }
 
+fn printLinemarker(
+    pp: *Preprocessor,
+    w: anytype,
+    line_no: u32,
+    source: Source,
+    start_resume: enum(u8) { start, @"resume", none },
+) !void {
+    try w.writeByte('#');
+    if (pp.linemarkers == .line_directives) try w.writeAll("line");
+    try w.print(" {d} \"", .{line_no});
+    for (source.path) |byte| switch (byte) {
+        '\n' => try w.writeAll("\\n"),
+        '\r' => try w.writeAll("\\r"),
+        '\t' => try w.writeAll("\\t"),
+        '\\' => try w.writeAll("\\\\"),
+        '"' => try w.writeAll("\\\""),
+        ' ', '!', '#'...'&', '('...'[', ']'...'~' => try w.writeByte(byte),
+        // Use hex escapes for any non-ASCII/unprintable characters.
+        // This ensures that the parsed version of this string will end up
+        // containing the same bytes as the input regardless of encoding.
+        else => {
+            try w.writeAll("\\x");
+            try std.fmt.formatInt(byte, 16, .lower, .{ .width = 2, .fill = '0' }, w);
+        },
+    };
+    try w.writeByte('"');
+    if (pp.linemarkers == .numeric_directives) {
+        switch (start_resume) {
+            .none => {},
+            .start => try w.writeAll(" 1"),
+            .@"resume" => try w.writeAll(" 2"),
+        }
+        switch (source.kind) {
+            .user => {},
+            .system => try w.writeAll(" 3"),
+            .extern_c_system => try w.writeAll(" 3 4"),
+        }
+    }
+    try w.writeByte('\n');
+}
+
+
 fn prettyPrintToken(pp: *Preprocessor, w: anytype, tok: Token) !void {
     if (tok.flags.is_bol) {
         try w.writeByte('\n');

From e6fe13a995eb313fd52fb28128b7280155423d10 Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Thu, 25 Jul 2024 11:34:19 -0700
Subject: [PATCH 09/10] import: fix spelling

---
 src/aro.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/aro.zig b/src/aro.zig
index 45e90154..73f105f4 100644
--- a/src/aro.zig
+++ b/src/aro.zig
@@ -35,6 +35,6 @@ test {
     _ = @import("aro/target.zig");
     _ = @import("aro/Tokenizer.zig");
     _ = @import("aro/toolchains/Linux.zig");
-    _ = @import("aro/treap.zig");
+    _ = @import("aro/Treap.zig");
     _ = @import("aro/Value.zig");
 }

From 0d8bfaeec088105645cdc0c09ddec88772991181 Mon Sep 17 00:00:00 2001
From: Evan Haas <evan@lagerdata.com>
Date: Thu, 25 Jul 2024 22:12:43 -0700
Subject: [PATCH 10/10] Preprocessor: ignore newlines before func-like macro
 left paren

---
 src/aro/Preprocessor.zig                         | 16 +++++++++++++---
 ...standard-redefinition-reexamination-example.c |  5 ++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig
index 372b3f3a..13008292 100644
--- a/src/aro/Preprocessor.zig
+++ b/src/aro/Preprocessor.zig
@@ -854,7 +854,7 @@ fn readExpandNewline(pp: *Preprocessor) Error!PreprocessorToken {
             return pp.readExpand();
         },
         .func => {
-            if (!try pp.next(.l_paren)) return tok;
+            if (!try pp.getMacroLParen()) return tok;
             const arg_tokens_start = pp.macro_arg_tokens.items.len;
             defer pp.macro_arg_tokens.items.len = arg_tokens_start;
             const macro_args_start = pp.macro_args.items.len;
@@ -1129,6 +1129,17 @@ fn next(pp: *Preprocessor, id: Tokenizer.Token.Id) !bool {
     return false;
 }
 
+fn getMacroLParen(pp: *Preprocessor) !bool {
+    while (true) {
+        const tok = pp.getToken();
+        if (tok.id == .nl) continue;
+
+        if (tok.id == .l_paren) return true;
+        try pp.ungetToken(tok);
+        return false;
+    }
+}
+
 /// Returns true for vararg function-like macro, false otherwise
 fn readFunclikeMacroParams(pp: *Preprocessor, name: PreprocessorToken, l_paren: PreprocessorToken, params: *ParamMap) !bool {
     _ = name;
@@ -2017,7 +2028,7 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
 
                 try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
                 last_nl = true;
-            },            
+            },
             else => try pp.prettyPrintToken(w, cur),
         }
     }
@@ -2065,7 +2076,6 @@ fn printLinemarker(
     try w.writeByte('\n');
 }
 
-
 fn prettyPrintToken(pp: *Preprocessor, w: anytype, tok: Token) !void {
     if (tok.flags.is_bol) {
         try w.writeByte('\n');
diff --git a/test/cases/expanded/standard-redefinition-reexamination-example.c b/test/cases/expanded/standard-redefinition-reexamination-example.c
index 5e5688ce..a9673c54 100644
--- a/test/cases/expanded/standard-redefinition-reexamination-example.c
+++ b/test/cases/expanded/standard-redefinition-reexamination-example.c
@@ -1,5 +1,4 @@
 f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
-f(2 * (2+(3,4)-0,1)) | f(2 * (\~{ } 5)) & f(2 * (0,1))
-^m(0,1);
-int i[] = { 1, 23, 4, 5,  };
+f(2 * (2+(3,4)-0,1)) | f(2 * (\~{ } 5)) & f(2 * (0,1))^m(0,1);
+int i[] = { 1, 23, 4, 5, };
 char c[2][6] = { "hello", "" };