From 6831422ce9bd32dcf86b220f23a170e573414e80 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Mon, 5 Feb 2024 00:31:34 -0800 Subject: [PATCH 1/7] Tree: store expansion locations separately from tokens --- src/aro/Parser.zig | 2 +- src/aro/Preprocessor.zig | 262 ++++++++++++++++++++++-------------- src/aro/Tree.zig | 23 ++-- src/aro/pragmas/gcc.zig | 18 +-- src/aro/pragmas/message.zig | 2 +- src/aro/pragmas/once.zig | 2 +- src/aro/pragmas/pack.zig | 2 +- test/record_runner.zig | 2 +- test/runner.zig | 2 +- 9 files changed, 188 insertions(+), 127 deletions(-) diff --git a/src/aro/Parser.zig b/src/aro/Parser.zig index 704c672d..674d8916 100644 --- a/src/aro/Parser.zig +++ b/src/aro/Parser.zig @@ -403,7 +403,7 @@ pub fn errExtra(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex, extra: Diag .tag = tag, .loc = loc, .extra = extra, - }, tok.expansionSlice()); + }, p.pp.expansionSlice(tok_i)); } pub fn errTok(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex) Compilation.Error!void { diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig index b85fc462..2f448d77 100644 --- a/src/aro/Preprocessor.zig +++ b/src/aro/Preprocessor.zig @@ -9,7 +9,9 @@ const Tokenizer = @import("Tokenizer.zig"); const RawToken = Tokenizer.Token; const Parser = @import("Parser.zig"); const Diagnostics = @import("Diagnostics.zig"); -const Token = @import("Tree.zig").Token; +const Tree = @import("Tree.zig"); +const Token = Tree.Token; +const TokenWithExpansionLocs = Tree.TokenWithExpansionLocs; const Attribute = @import("Attribute.zig"); const features = @import("features.zig"); const Hideset = @import("Hideset.zig"); @@ -63,11 +65,17 @@ const Macro = struct { const Preprocessor = @This(); +const ExpansionEntry = struct { + idx: u32, + locs: [*]Source.Location, +}; + comp: *Compilation, gpa: mem.Allocator, arena: std.heap.ArenaAllocator, defines: DefineMap = .{}, tokens: Token.List = .{}, +expansion_locs: std.MultiArrayList(ExpansionEntry), token_buf: RawTokenList, char_buf: std.ArrayList(u8), /// Counter that is incremented each time preprocess() is called @@ -115,6 +123,7 @@ pub fn init(comp: *Compilation) Preprocessor { .poisoned_identifiers = std.StringHashMap(void).init(comp.gpa), .top_expansion_buf = ExpandBuf.init(comp.gpa), .hideset = .{ .comp = comp }, + .expansion_locs = .{}, }; comp.pragmaEvent(.before_preprocess); return pp; @@ -228,7 +237,6 @@ pub fn addBuiltinMacros(pp: *Preprocessor) !void { pub fn deinit(pp: *Preprocessor) void { pp.defines.deinit(pp.gpa); - for (pp.tokens.items(.expansion_locs)) |loc| Token.free(loc, pp.gpa); pp.tokens.deinit(pp.gpa); pp.arena.deinit(); pp.token_buf.deinit(); @@ -237,6 +245,24 @@ pub fn deinit(pp: *Preprocessor) void { pp.include_guards.deinit(pp.gpa); pp.top_expansion_buf.deinit(); pp.hideset.deinit(); + for (pp.expansion_locs.items(.locs)) |locs| TokenWithExpansionLocs.free(locs, pp.gpa); + pp.expansion_locs.deinit(pp.gpa); +} + +pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location { + const S = struct { + fn order_token_index(context: void, lhs: Tree.TokenIndex, rhs: Tree.TokenIndex) std.math.Order { + _ = context; + return std.math.order(lhs, rhs); + } + }; + + const indices = pp.expansion_locs.items(.idx); + const idx = std.sort.binarySearch(Tree.TokenIndex, tok, indices, {}, S.order_token_index) orelse return &.{}; + const locs = pp.expansion_locs.items(.locs)[idx]; + var i: usize = 0; + while (locs[i].id != .unused) : (i += 1) {} + return locs[0..i]; } /// Preprocess a compilation unit of sources into a parsable list of tokens. @@ -250,11 +276,11 @@ pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void } try pp.addIncludeResume(first.id, 0, 1); const eof = try pp.preprocess(first); - try pp.tokens.append(pp.comp.gpa, eof); + try pp.addToken(eof); } /// Preprocess a source file, returns eof token. -pub fn preprocess(pp: *Preprocessor, source: Source) Error!Token { +pub fn preprocess(pp: *Preprocessor, source: Source) Error!TokenWithExpansionLocs { const eof = pp.preprocessExtra(source) catch |er| switch (er) { // This cannot occur in the main file and is handled in `include`. error.StopPreprocessing => unreachable, @@ -276,18 +302,18 @@ pub fn tokenize(pp: *Preprocessor, source: Source) Error!Token { // Estimate how many new tokens this source will contain. const estimated_token_count = source.buf.len / 8; - try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count); + try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count); while (true) { const tok = tokenizer.next(); if (tok.id == .eof) return tokFromRaw(tok); - try pp.tokens.append(pp.gpa, tokFromRaw(tok)); + try pp.addToken(tokFromRaw(tok)); } } pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void { if (pp.linemarkers == .none) return; - try pp.tokens.append(pp.gpa, .{ .id = .include_start, .loc = .{ + try pp.addToken(.{ .id = .include_start, .loc = .{ .id = source.id, .byte_offset = std.math.maxInt(u32), .line = 1, @@ -296,7 +322,7 @@ pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void { pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void { if (pp.linemarkers == .none) return; - try pp.tokens.append(pp.gpa, .{ .id = .include_resume, .loc = .{ + try pp.addToken(.{ .id = .include_resume, .loc = .{ .id = source, .byte_offset = offset, .line = line, @@ -329,7 +355,7 @@ fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 { return pp.tokSlice(guard); } -fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token { +fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!TokenWithExpansionLocs { var guard_name = pp.findIncludeGuard(source); pp.preprocess_count += 1; @@ -341,7 +367,7 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token { // Estimate how many new tokens this source will contain. const estimated_token_count = source.buf.len / 8; - try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count); + try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count); var if_level: u8 = 0; var if_kind = std.PackedIntArray(u2, 256).init([1]u2{0} ** 256); @@ -353,7 +379,7 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token { while (true) { var tok = tokenizer.next(); switch (tok.id) { - .hash => if (!start_of_line) try pp.tokens.append(pp.gpa, tokFromRaw(tok)) else { + .hash => if (!start_of_line) try pp.addToken(tokFromRaw(tok)) else { const directive = tokenizer.nextNoWS(); switch (directive.id) { .keyword_error, .keyword_warning => { @@ -655,13 +681,13 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token { } if (pp.preserve_whitespace) { tok.id = .nl; - try pp.tokens.append(pp.gpa, tokFromRaw(tok)); + try pp.addToken(tokFromRaw(tok)); } }, - .whitespace => if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok)), + .whitespace => if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok)), .nl => { start_of_line = true; - if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok)); + if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok)); }, .eof => { if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive); @@ -697,14 +723,14 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token { /// Get raw token source string. /// Returned slice is invalidated when comp.generated_buf is updated. -pub fn tokSlice(pp: *Preprocessor, token: RawToken) []const u8 { +pub fn tokSlice(pp: *Preprocessor, token: anytype) []const u8 { if (token.id.lexeme()) |some| return some; const source = pp.comp.getSource(token.source); return source.buf[token.start..token.end]; } /// Convert a token from the Tokenizer into a token used by the parser. -fn tokFromRaw(raw: RawToken) Token { +fn tokFromRaw(raw: RawToken) TokenWithExpansionLocs { return .{ .id = raw.id, .loc = .{ @@ -726,7 +752,7 @@ fn err(pp: *Preprocessor, raw: RawToken, tag: Diagnostics.Tag) !void { }, &.{}); } -fn errStr(pp: *Preprocessor, tok: Token, tag: Diagnostics.Tag, str: []const u8) !void { +fn errStr(pp: *Preprocessor, tok: TokenWithExpansionLocs, tag: Diagnostics.Tag, str: []const u8) !void { try pp.comp.addDiagnostic(.{ .tag = tag, .loc = tok.loc, @@ -748,7 +774,7 @@ fn fatal(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anyty return error.FatalError; } -fn fatalNotFound(pp: *Preprocessor, tok: Token, filename: []const u8) Compilation.Error { +fn fatalNotFound(pp: *Preprocessor, tok: TokenWithExpansionLocs, filename: []const u8) Compilation.Error { const old = pp.comp.diagnostics.fatal_errors; pp.comp.diagnostics.fatal_errors = true; defer pp.comp.diagnostics.fatal_errors = old; @@ -802,9 +828,11 @@ fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { /// Consume all tokens until a newline and parse the result into a boolean. fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool { const start = pp.tokens.len; + const locs_start = pp.expansion_locs.len; defer { - for (pp.top_expansion_buf.items) |tok| Token.free(tok.expansion_locs, pp.gpa); + for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa); pp.tokens.len = start; + pp.expansion_locs.len = locs_start; } pp.top_expansion_buf.items.len = 0; @@ -838,7 +866,7 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool { } // validate the tokens in the expression - try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.top_expansion_buf.items.len); + try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len); var i: usize = 0; const items = pp.top_expansion_buf.items; while (i < items.len) : (i += 1) { @@ -907,9 +935,9 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool { } }, } - pp.tokens.appendAssumeCapacity(tok); + pp.addTokenAssumeCapacity(tok); } - try pp.tokens.append(pp.gpa, .{ + try pp.addToken(.{ .id = .eof, .loc = tokFromRaw(eof).loc, }); @@ -943,7 +971,7 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool { /// Turns macro_tok from .keyword_defined into .zero or .one depending on whether the argument is defined /// Returns the number of tokens consumed -fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *Token, tokens: []const Token, eof: RawToken) !usize { +fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *TokenWithExpansionLocs, tokens: []const TokenWithExpansionLocs, eof: RawToken) !usize { std.debug.assert(macro_tok.id == .keyword_defined); var it = TokenIterator.init(tokens); const first = it.nextNoWS() orelse { @@ -1058,7 +1086,7 @@ fn skip( tokenizer.index += 1; tokenizer.line += 1; if (pp.preserve_whitespace) { - try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{ + try pp.addToken(.{ .id = .nl, .loc = .{ .id = tokenizer.source, .line = tokenizer.line, } }); @@ -1081,21 +1109,21 @@ fn skipToNl(tokenizer: *Tokenizer) void { } } -const ExpandBuf = std.ArrayList(Token); +const ExpandBuf = std.ArrayList(TokenWithExpansionLocs); fn removePlacemarkers(buf: *ExpandBuf) void { var i: usize = buf.items.len -% 1; while (i < buf.items.len) : (i -%= 1) { if (buf.items[i].id == .placemarker) { const placemarker = buf.orderedRemove(i); - Token.free(placemarker.expansion_locs, buf.allocator); + TokenWithExpansionLocs.free(placemarker.expansion_locs, buf.allocator); } } } -const MacroArguments = std.ArrayList([]const Token); +const MacroArguments = std.ArrayList([]const TokenWithExpansionLocs); fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void { for (args.items) |item| { - for (item) |tok| Token.free(tok.expansion_locs, allocator); + for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, allocator); allocator.free(item); } args.deinit(); @@ -1167,7 +1195,7 @@ fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf /// Returns error.ExpectedStringLiteral if parentheses are not balanced, a non-string-literal /// is encountered, or if no string literals are encountered /// TODO: destringize (replace all '\\' with a single `\` and all '\"' with a '"') -fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const Token) ![]const u8 { +fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const TokenWithExpansionLocs) ![]const u8 { const char_top = pp.char_buf.items.len; defer pp.char_buf.items.len = char_top; var unwrapped = toks; @@ -1186,7 +1214,7 @@ fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const Token) ![]const u8 { } /// Handle the _Pragma operator (implemented as a builtin macro) -fn pragmaOperator(pp: *Preprocessor, arg_tok: Token, operator_loc: Source.Location) !void { +fn pragmaOperator(pp: *Preprocessor, arg_tok: TokenWithExpansionLocs, operator_loc: Source.Location) !void { const arg_slice = pp.expandedSlice(arg_tok); const content = arg_slice[1 .. arg_slice.len - 1]; const directive = "#pragma "; @@ -1240,7 +1268,7 @@ fn destringify(pp: *Preprocessor, str: []const u8) void { /// Stringify `tokens` into pp.char_buf. /// See https://gcc.gnu.org/onlinedocs/gcc-11.2.0/cpp/Stringizing.html#Stringizing -fn stringify(pp: *Preprocessor, tokens: []const Token) !void { +fn stringify(pp: *Preprocessor, tokens: []const TokenWithExpansionLocs) !void { try pp.char_buf.append('"'); var ws_state: enum { start, need, not_needed } = .start; for (tokens) |tok| { @@ -1287,7 +1315,7 @@ fn stringify(pp: *Preprocessor, tokens: []const Token) !void { try pp.char_buf.appendSlice("\"\n"); } -fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token, embed_args: ?*[]const Token, first: Token) !?[]const u8 { +fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const TokenWithExpansionLocs, embed_args: ?*[]const TokenWithExpansionLocs, first: TokenWithExpansionLocs) !?[]const u8 { assert(param_toks.len != 0); const char_top = pp.char_buf.items.len; defer pp.char_buf.items.len = char_top; @@ -1370,7 +1398,7 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token, embed_ } } -fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const Token, src_loc: Source.Location) Error!bool { +fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const TokenWithExpansionLocs, src_loc: Source.Location) Error!bool { switch (builtin) { .macro_param_has_attribute, .macro_param_has_declspec_attribute, @@ -1378,8 +1406,8 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con .macro_param_has_extension, .macro_param_has_builtin, => { - var invalid: ?Token = null; - var identifier: ?Token = null; + var invalid: ?TokenWithExpansionLocs = null; + var identifier: ?TokenWithExpansionLocs = null; for (param_toks) |tok| { if (tok.id == .macro_ws) continue; if (tok.id == .comment) continue; @@ -1429,8 +1457,8 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con return Diagnostics.warningExists(warning_name); }, .macro_param_is_identifier => { - var invalid: ?Token = null; - var identifier: ?Token = null; + var invalid: ?TokenWithExpansionLocs = null; + var identifier: ?TokenWithExpansionLocs = null; for (param_toks) |tok| switch (tok.id) { .macro_ws => continue, .comment => continue, @@ -1475,11 +1503,11 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con } /// Treat whitespace-only paste arguments as empty -fn getPasteArgs(args: []const Token) []const Token { +fn getPasteArgs(args: []const TokenWithExpansionLocs) []const TokenWithExpansionLocs { for (args) |tok| { if (tok.id != .macro_ws) return args; } - return &[1]Token{.{ + return &[1]TokenWithExpansionLocs{.{ .id = .placemarker, .loc = .{ .id = .generated, .byte_offset = 0, .line = 0 }, }}; @@ -1507,7 +1535,7 @@ fn expandFuncMacro( try variable_arguments.appendSlice(args.items[i]); try expanded_variable_arguments.appendSlice(expanded_args.items[i]); if (i != expanded_args.items.len - 1) { - const comma = Token{ .id = .comma, .loc = .{ .id = .generated } }; + const comma = TokenWithExpansionLocs{ .id = .comma, .loc = .{ .id = .generated } }; try variable_arguments.append(comma); try expanded_variable_arguments.append(comma); } @@ -1532,7 +1560,7 @@ fn expandFuncMacro( .comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else - &[1]Token{tokFromRaw(raw_next)}, + &[1]TokenWithExpansionLocs{tokFromRaw(raw_next)}, .macro_param, .macro_param_no_expand => getPasteArgs(args.items[raw_next.end]), .keyword_va_args => variable_arguments.items, .keyword_va_opt => blk: { @@ -1540,7 +1568,7 @@ fn expandFuncMacro( if (va_opt_buf.items.len == 0) break; break :blk va_opt_buf.items; }, - else => &[1]Token{tokFromRaw(raw_next)}, + else => &[1]TokenWithExpansionLocs{tokFromRaw(raw_next)}, }; try pp.pasteTokens(&buf, next); @@ -1606,10 +1634,10 @@ fn expandFuncMacro( try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = loc, .extra = extra }, &.{}); break :blk not_found; } else res: { - var invalid: ?Token = null; - var vendor_ident: ?Token = null; - var colon_colon: ?Token = null; - var attr_ident: ?Token = null; + var invalid: ?TokenWithExpansionLocs = null; + var vendor_ident: ?TokenWithExpansionLocs = null; + var colon_colon: ?TokenWithExpansionLocs = null; + var attr_ident: ?TokenWithExpansionLocs = null; for (arg) |tok| { if (tok.id == .macro_ws) continue; if (tok.id == .comment) continue; @@ -1682,7 +1710,7 @@ fn expandFuncMacro( try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = loc, .extra = extra }, &.{}); break :blk not_found; } else res: { - var embed_args: []const Token = &.{}; + var embed_args: []const TokenWithExpansionLocs = &.{}; const include_str = (try pp.reconstructIncludeString(arg, &embed_args, arg[0])) orelse break :res not_found; @@ -1690,9 +1718,9 @@ fn expandFuncMacro( prev.id = .eof; var it: struct { i: u32 = 0, - slice: []const Token, - prev: Token, - fn next(it: *@This()) Token { + slice: []const TokenWithExpansionLocs, + prev: TokenWithExpansionLocs, + fn next(it: *@This()) TokenWithExpansionLocs { while (it.i < it.slice.len) switch (it.slice[it.i].id) { .macro_ws, .whitespace => it.i += 1, else => break, @@ -1751,7 +1779,7 @@ fn expandFuncMacro( }; var arg_count: u32 = 0; - var first_arg: Token = undefined; + var first_arg: TokenWithExpansionLocs = undefined; while (true) { const next = it.next(); if (next.id == .eof) { @@ -1812,8 +1840,8 @@ fn expandFuncMacro( // Clang and GCC require exactly one token (so, no parentheses or string pasting) // even though their error messages indicate otherwise. Ours is slightly more // descriptive. - var invalid: ?Token = null; - var string: ?Token = null; + var invalid: ?TokenWithExpansionLocs = null; + var string: ?TokenWithExpansionLocs = null; for (param_toks) |tok| switch (tok.id) { .string_literal => { if (string) |_| invalid = tok else string = tok; @@ -1903,11 +1931,11 @@ fn expandVaOpt( } } -fn bufCopyTokens(buf: *ExpandBuf, tokens: []const Token, src: []const Source.Location) !void { +fn bufCopyTokens(buf: *ExpandBuf, tokens: []const TokenWithExpansionLocs, src: []const Source.Location) !void { try buf.ensureUnusedCapacity(tokens.len); for (tokens) |tok| { var copy = try tok.dupe(buf.allocator); - errdefer Token.free(copy.expansion_locs, buf.allocator); + errdefer TokenWithExpansionLocs.free(copy.expansion_locs, buf.allocator); try copy.addExpansionLocation(buf.allocator, src); buf.appendAssumeCapacity(copy); } @@ -1920,7 +1948,7 @@ fn nextBufToken( start_idx: *usize, end_idx: *usize, extend_buf: bool, -) Error!Token { +) Error!TokenWithExpansionLocs { start_idx.* += 1; if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) { if (extend_buf) { @@ -1936,7 +1964,7 @@ fn nextBufToken( try buf.append(new_tok); return new_tok; } else { - return Token{ .id = .eof, .loc = .{ .id = .generated } }; + return TokenWithExpansionLocs{ .id = .eof, .loc = .{ .id = .generated } }; } } else { return buf.items[start_idx.*]; @@ -1951,7 +1979,7 @@ fn collectMacroFuncArguments( end_idx: *usize, extend_buf: bool, is_builtin: bool, - r_paren: *Token, + r_paren: *TokenWithExpansionLocs, ) !MacroArguments { const name_tok = buf.items[start_idx.*]; const saved_tokenizer = tokenizer.*; @@ -1978,7 +2006,7 @@ fn collectMacroFuncArguments( var parens: u32 = 0; var args = MacroArguments.init(pp.gpa); errdefer deinitMacroArguments(pp.gpa, &args); - var curArgument = std.ArrayList(Token).init(pp.gpa); + var curArgument = std.ArrayList(TokenWithExpansionLocs).init(pp.gpa); defer curArgument.deinit(); while (true) { var tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf); @@ -1991,13 +2019,13 @@ fn collectMacroFuncArguments( try args.append(owned); } else { const duped = try tok.dupe(pp.gpa); - errdefer Token.free(duped.expansion_locs, pp.gpa); + errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa); try curArgument.append(duped); } }, .l_paren => { const duped = try tok.dupe(pp.gpa); - errdefer Token.free(duped.expansion_locs, pp.gpa); + errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa); try curArgument.append(duped); parens += 1; }, @@ -2010,7 +2038,7 @@ fn collectMacroFuncArguments( break; } else { const duped = try tok.dupe(pp.gpa); - errdefer Token.free(duped.expansion_locs, pp.gpa); + errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa); try curArgument.append(duped); parens -= 1; } @@ -2033,7 +2061,7 @@ fn collectMacroFuncArguments( }, else => { const duped = try tok.dupe(pp.gpa); - errdefer Token.free(duped.expansion_locs, pp.gpa); + errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa); try curArgument.append(duped); }, } @@ -2043,7 +2071,7 @@ fn collectMacroFuncArguments( } fn removeExpandedTokens(pp: *Preprocessor, buf: *ExpandBuf, start: usize, len: usize, moving_end_idx: *usize) !void { - for (buf.items[start .. start + len]) |tok| Token.free(tok.expansion_locs, pp.gpa); + for (buf.items[start .. start + len]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa); try buf.replaceRange(start, len, &.{}); moving_end_idx.* -|= len; } @@ -2059,14 +2087,14 @@ const EvalContext = enum { /// Helper for safely iterating over a slice of tokens while skipping whitespace const TokenIterator = struct { - toks: []const Token, + toks: []const TokenWithExpansionLocs, i: usize, - fn init(toks: []const Token) TokenIterator { + fn init(toks: []const TokenWithExpansionLocs) TokenIterator { return .{ .toks = toks, .i = 0 }; } - fn nextNoWS(self: *TokenIterator) ?Token { + fn nextNoWS(self: *TokenIterator) ?TokenWithExpansionLocs { while (self.i < self.toks.len) : (self.i += 1) { const tok = self.toks[self.i]; if (tok.id == .whitespace or tok.id == .macro_ws) continue; @@ -2130,7 +2158,7 @@ fn expandMacroExhaustive( macro_handler: { if (macro.is_func) { - var r_paren: Token = undefined; + var r_paren: TokenWithExpansionLocs = undefined; var macro_scan_idx = idx; // to be saved in case this doesn't turn out to be a call const args = pp.collectMacroFuncArguments( @@ -2226,7 +2254,7 @@ fn expandMacroExhaustive( } const tokens_removed = macro_scan_idx - idx + 1; - for (buf.items[idx .. idx + tokens_removed]) |tok| Token.free(tok.expansion_locs, pp.gpa); + for (buf.items[idx .. idx + tokens_removed]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa); try buf.replaceRange(idx, tokens_removed, res.items); moving_end_idx += tokens_added; @@ -2264,7 +2292,7 @@ fn expandMacroExhaustive( } } - Token.free(buf.items[idx].expansion_locs, pp.gpa); + TokenWithExpansionLocs.free(buf.items[idx].expansion_locs, pp.gpa); try buf.replaceRange(idx, 1, res.items); idx += increment_idx_by; moving_end_idx = moving_end_idx + res.items.len - 1; @@ -2280,7 +2308,7 @@ fn expandMacroExhaustive( // trim excess buffer for (buf.items[moving_end_idx..]) |item| { - Token.free(item.expansion_locs, pp.gpa); + TokenWithExpansionLocs.free(item.expansion_locs, pp.gpa); } buf.items.len = moving_end_idx; } @@ -2291,7 +2319,7 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr var source_tok = tokFromRaw(raw); if (!raw.id.isMacroIdentifier()) { source_tok.id.simplifyMacroKeyword(); - return pp.tokens.append(pp.gpa, source_tok); + return pp.addToken(source_tok); } pp.top_expansion_buf.items.len = 0; try pp.top_expansion_buf.append(source_tok); @@ -2299,27 +2327,27 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr pp.hideset.clearRetainingCapacity(); try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr); - try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.top_expansion_buf.items.len); + try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len); for (pp.top_expansion_buf.items) |*tok| { if (tok.id == .macro_ws and !pp.preserve_whitespace) { - Token.free(tok.expansion_locs, pp.gpa); + TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa); continue; } if (tok.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) { - Token.free(tok.expansion_locs, pp.gpa); + TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa); continue; } if (tok.id == .placemarker) { - Token.free(tok.expansion_locs, pp.gpa); + TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa); continue; } tok.id.simplifyMacroKeywordExtra(true); - pp.tokens.appendAssumeCapacity(tok.*); + pp.addTokenAssumeCapacity(tok.*); } if (pp.preserve_whitespace) { - try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.add_expansion_nl); + try pp.ensureUnusedTokenCapacity(pp.add_expansion_nl); while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) { - pp.tokens.appendAssumeCapacity(.{ .id = .nl, .loc = .{ + pp.addTokenAssumeCapacity(.{ .id = .nl, .loc = .{ .id = tokenizer.source, .line = tokenizer.line, } }); @@ -2327,7 +2355,7 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr } } -fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 { +fn expandedSliceExtra(pp: *const Preprocessor, tok: anytype, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 { if (tok.id.lexeme()) |some| { if (!tok.id.allowsDigraphs(pp.comp.langopts) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some; } @@ -2348,18 +2376,18 @@ fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: en } /// Get expanded token source string. -pub fn expandedSlice(pp: *const Preprocessor, tok: Token) []const u8 { +pub fn expandedSlice(pp: *const Preprocessor, tok: anytype) []const u8 { return pp.expandedSliceExtra(tok, .single_macro_ws); } /// Concat two tokens and add the result to pp.generated -fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token) Error!void { +fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const TokenWithExpansionLocs) Error!void { const lhs = while (lhs_toks.popOrNull()) |lhs| { if ((pp.comp.langopts.preserve_comments_in_macros and lhs.id == .comment) or (lhs.id != .macro_ws and lhs.id != .comment)) break lhs; - Token.free(lhs.expansion_locs, pp.gpa); + TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa); } else { return bufCopyTokens(lhs_toks, rhs_toks, &.{}); }; @@ -2374,7 +2402,7 @@ fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token) } else { return lhs_toks.appendAssumeCapacity(lhs); }; - defer Token.free(lhs.expansion_locs, pp.gpa); + defer TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa); const start = pp.comp.generated_buf.items.len; const end = start + pp.expandedSlice(lhs).len + pp.expandedSlice(rhs).len; @@ -2411,8 +2439,8 @@ fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token) try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{}); } -fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: Token) !Token { - var pasted_token = Token{ .id = id, .loc = .{ +fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: TokenWithExpansionLocs) !TokenWithExpansionLocs { + var pasted_token = TokenWithExpansionLocs{ .id = id, .loc = .{ .id = .generated, .byte_offset = @intCast(start), .line = pp.generated_line, @@ -2744,7 +2772,7 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void { error.InvalidInclude => return, else => |e| return e, }; - defer Token.free(filename_tok.expansion_locs, pp.gpa); + defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa); // Check for empty filename. const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws); @@ -2889,7 +2917,7 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void { return; } - try pp.tokens.ensureUnusedCapacity(pp.comp.gpa, 2 * embed_bytes.len - 1); // N bytes and N-1 commas + try pp.ensureUnusedTokenCapacity(2 * embed_bytes.len - 1); // N bytes and N-1 commas // TODO: We currently only support systems with CHAR_BIT == 8 // If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes @@ -2900,14 +2928,14 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void { const byte = embed_bytes[0]; const start = pp.comp.generated_buf.items.len; try writer.print("{d}", .{byte}); - pp.tokens.appendAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok)); + pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok)); } for (embed_bytes[1..]) |byte| { const start = pp.comp.generated_buf.items.len; try writer.print(",{d}", .{byte}); - pp.tokens.appendAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } }); - pp.tokens.appendAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok)); + pp.addTokenAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } }); + pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok)); } try pp.comp.generated_buf.append(pp.gpa, '\n'); @@ -2942,18 +2970,20 @@ fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInc } const tokens_start = pp.tokens.len; + const locs_start = pp.expansion_locs.len; try pp.addIncludeStart(new_source); const eof = pp.preprocessExtra(new_source) catch |er| switch (er) { error.StopPreprocessing => { - for (pp.tokens.items(.expansion_locs)[tokens_start..]) |loc| Token.free(loc, pp.gpa); + for (pp.expansion_locs.items(.locs)[locs_start..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa); pp.tokens.len = tokens_start; + pp.expansion_locs.len = locs_start; return; }, else => |e| return e, }; try eof.checkMsEof(new_source, pp.comp); if (pp.preserve_whitespace and pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) { - try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{ + try pp.addToken(.{ .id = .nl, .loc = .{ .id = tokenizer.source, .line = tokenizer.line, } }); @@ -2975,7 +3005,7 @@ fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInc /// 3. Via a stringified macro argument which is used as an argument to `_Pragma` /// operator_loc: Location of `_Pragma`; null if this is from #pragma /// arg_locs: expansion locations of the argument to _Pragma. empty if #pragma or a raw string literal was used -fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !Token { +fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !TokenWithExpansionLocs { var tok = tokFromRaw(raw); if (operator_loc) |loc| { try tok.addExpansionLocation(pp.gpa, &.{loc}); @@ -2984,28 +3014,54 @@ fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Locat return tok; } +pub fn addToken(pp: *Preprocessor, tok: TokenWithExpansionLocs) !void { + const idx: u32 = @intCast(pp.tokens.len); + try pp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc }); + if (tok.expansion_locs) |expansion_locs| { + try pp.expansion_locs.append(pp.gpa, .{ .idx = idx, .locs = expansion_locs }); + } +} + +pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: TokenWithExpansionLocs) void { + const idx: u32 = @intCast(pp.tokens.len); + pp.tokens.appendAssumeCapacity(.{ .id = tok.id, .loc = tok.loc }); + if (tok.expansion_locs) |expansion_locs| { + pp.expansion_locs.appendAssumeCapacity(.{ .idx = idx, .locs = expansion_locs }); + } +} + +pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void { + try pp.tokens.ensureTotalCapacity(pp.gpa, capacity); + try pp.expansion_locs.ensureTotalCapacity(pp.gpa, capacity); +} + +pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void { + try pp.tokens.ensureUnusedCapacity(pp.gpa, capacity); + try pp.expansion_locs.ensureUnusedCapacity(pp.gpa, capacity); +} + /// Handle a pragma directive fn pragma(pp: *Preprocessor, tokenizer: *Tokenizer, pragma_tok: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !void { const name_tok = tokenizer.nextNoWS(); if (name_tok.id == .nl or name_tok.id == .eof) return; const name = pp.tokSlice(name_tok); - try pp.tokens.append(pp.gpa, try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs)); + try pp.addToken(try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs)); const pragma_start: u32 = @intCast(pp.tokens.len); const pragma_name_tok = try pp.makePragmaToken(name_tok, operator_loc, arg_locs); - try pp.tokens.append(pp.gpa, pragma_name_tok); + try pp.addToken(pragma_name_tok); while (true) { const next_tok = tokenizer.next(); if (next_tok.id == .whitespace) continue; if (next_tok.id == .eof) { - try pp.tokens.append(pp.gpa, .{ + try pp.addToken(.{ .id = .nl, .loc = .{ .id = .generated }, }); break; } - try pp.tokens.append(pp.gpa, try pp.makePragmaToken(next_tok, operator_loc, arg_locs)); + try pp.addToken(try pp.makePragmaToken(next_tok, operator_loc, arg_locs)); if (next_tok.id == .nl) break; } if (pp.comp.getPragma(name)) |prag| unknown: { @@ -3025,7 +3081,7 @@ fn findIncludeFilenameToken( first_token: RawToken, tokenizer: *Tokenizer, trailing_token_behavior: enum { ignore_trailing_tokens, expect_nl_eof }, -) !Token { +) !TokenWithExpansionLocs { var first = first_token; if (first.id == .angle_bracket_left) to_end: { @@ -3055,12 +3111,12 @@ fn findIncludeFilenameToken( else => expanded: { // Try to expand if the argument is a macro. pp.top_expansion_buf.items.len = 0; - defer for (pp.top_expansion_buf.items) |tok| Token.free(tok.expansion_locs, pp.gpa); + defer for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa); try pp.top_expansion_buf.append(source_tok); pp.expansion_source_loc = source_tok.loc; try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr); - var trailing_toks: []const Token = &.{}; + var trailing_toks: []const TokenWithExpansionLocs = &.{}; const include_str = (try pp.reconstructIncludeString(pp.top_expansion_buf.items, &trailing_toks, tokFromRaw(first))) orelse { try pp.expectNl(tokenizer); return error.InvalidInclude; @@ -3100,7 +3156,7 @@ fn findIncludeFilenameToken( fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken, which: Compilation.WhichInclude) !Source { const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof); - defer Token.free(filename_tok.expansion_locs, pp.gpa); + defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa); // Check for empty filename. const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws); @@ -3287,7 +3343,7 @@ test "Preserve pragma tokens sometimes" { const test_runner_macros = try comp.addSourceFromBuffer("", source_text); const eof = try pp.preprocess(test_runner_macros); - try pp.tokens.append(pp.gpa, eof); + try pp.addToken(eof); try pp.prettyPrintTokens(buf.writer()); return allocator.dupe(u8, buf.items); } diff --git a/src/aro/Tree.zig b/src/aro/Tree.zig index bb764002..6db4b36d 100644 --- a/src/aro/Tree.zig +++ b/src/aro/Tree.zig @@ -12,6 +12,16 @@ const StringInterner = @import("StringInterner.zig"); pub const Token = struct { id: Id, + loc: Source.Location, + + pub const List = std.MultiArrayList(Token); + pub const Id = Tokenizer.Token.Id; + pub const NumberPrefix = number_affixes.Prefix; + pub const NumberSuffix = number_affixes.Suffix; +}; + +pub const TokenWithExpansionLocs = struct { + id: Token.Id, flags: packed struct { expansion_disabled: bool = false, is_macro_arg: bool = false, @@ -22,14 +32,14 @@ pub const Token = struct { loc: Source.Location, expansion_locs: ?[*]Source.Location = null, - pub fn expansionSlice(tok: Token) []const Source.Location { + pub fn expansionSlice(tok: TokenWithExpansionLocs) []const Source.Location { const locs = tok.expansion_locs orelse return &[0]Source.Location{}; var i: usize = 0; while (locs[i].id != .unused) : (i += 1) {} return locs[0..i]; } - pub fn addExpansionLocation(tok: *Token, gpa: std.mem.Allocator, new: []const Source.Location) !void { + pub fn addExpansionLocation(tok: *TokenWithExpansionLocs, gpa: std.mem.Allocator, new: []const Source.Location) !void { if (new.len == 0 or tok.id == .whitespace or tok.id == .macro_ws or tok.id == .placemarker) return; var list = std.ArrayList(Source.Location).init(gpa); defer { @@ -70,14 +80,14 @@ pub const Token = struct { gpa.free(locs[0 .. i + 1]); } - pub fn dupe(tok: Token, gpa: std.mem.Allocator) !Token { + pub fn dupe(tok: TokenWithExpansionLocs, gpa: std.mem.Allocator) !TokenWithExpansionLocs { var copy = tok; copy.expansion_locs = null; try copy.addExpansionLocation(gpa, tok.expansionSlice()); return copy; } - pub fn checkMsEof(tok: Token, source: Source, comp: *Compilation) !void { + pub fn checkMsEof(tok: TokenWithExpansionLocs, source: Source, comp: *Compilation) !void { std.debug.assert(tok.id == .eof); if (source.buf.len > tok.loc.byte_offset and source.buf[tok.loc.byte_offset] == 0x1A) { try comp.addDiagnostic(.{ @@ -90,11 +100,6 @@ pub const Token = struct { }, &.{}); } } - - pub const List = std.MultiArrayList(Token); - pub const Id = Tokenizer.Token.Id; - pub const NumberPrefix = number_affixes.Prefix; - pub const NumberSuffix = number_affixes.Suffix; }; pub const TokenIndex = u32; diff --git a/src/aro/pragmas/gcc.zig b/src/aro/pragmas/gcc.zig index f55b3a1a..83a4a134 100644 --- a/src/aro/pragmas/gcc.zig +++ b/src/aro/pragmas/gcc.zig @@ -80,7 +80,7 @@ fn diagnosticHandler(self: *GCC, pp: *Preprocessor, start_idx: TokenIndex) Pragm .tag = .pragma_requires_string_literal, .loc = diagnostic_tok.loc, .extra = .{ .str = "GCC diagnostic" }, - }, diagnostic_tok.expansionSlice()); + }, pp.expansionSlice(start_idx)); }, else => |e| return e, }; @@ -90,7 +90,7 @@ fn diagnosticHandler(self: *GCC, pp: *Preprocessor, start_idx: TokenIndex) Pragm .tag = .malformed_warning_check, .loc = next.loc, .extra = .{ .str = "GCC diagnostic" }, - }, next.expansionSlice()); + }, pp.expansionSlice(start_idx + 1)); } const new_kind: Diagnostics.Kind = switch (diagnostic) { .ignored => .off, @@ -116,7 +116,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex return pp.comp.addDiagnostic(.{ .tag = .unknown_gcc_pragma, .loc = directive_tok.loc, - }, directive_tok.expansionSlice()); + }, pp.expansionSlice(start_idx + 1)); switch (gcc_pragma) { .warning, .@"error" => { @@ -126,7 +126,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex .tag = .pragma_requires_string_literal, .loc = directive_tok.loc, .extra = .{ .str = @tagName(gcc_pragma) }, - }, directive_tok.expansionSlice()); + }, pp.expansionSlice(start_idx + 1)); }, else => |e| return e, }; @@ -134,7 +134,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex const diagnostic_tag: Diagnostics.Tag = if (gcc_pragma == .warning) .pragma_warning_message else .pragma_error_message; return pp.comp.addDiagnostic( .{ .tag = diagnostic_tag, .loc = directive_tok.loc, .extra = extra }, - directive_tok.expansionSlice(), + pp.expansionSlice(start_idx + 1), ); }, .diagnostic => return self.diagnosticHandler(pp, start_idx + 2) catch |err| switch (err) { @@ -143,12 +143,12 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex return pp.comp.addDiagnostic(.{ .tag = .unknown_gcc_pragma_directive, .loc = tok.loc, - }, tok.expansionSlice()); + }, pp.expansionSlice(start_idx + 2)); }, else => |e| return e, }, .poison => { - var i: usize = 2; + var i: u32 = 2; while (true) : (i += 1) { const tok = pp.tokens.get(start_idx + i); if (tok.id == .nl) break; @@ -157,14 +157,14 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex return pp.comp.addDiagnostic(.{ .tag = .pragma_poison_identifier, .loc = tok.loc, - }, tok.expansionSlice()); + }, pp.expansionSlice(start_idx + i)); } const str = pp.expandedSlice(tok); if (pp.defines.get(str) != null) { try pp.comp.addDiagnostic(.{ .tag = .pragma_poison_macro, .loc = tok.loc, - }, tok.expansionSlice()); + }, pp.expansionSlice(start_idx + i)); } try pp.poisoned_identifiers.put(str, {}); } diff --git a/src/aro/pragmas/message.zig b/src/aro/pragmas/message.zig index 7786c205..a42b5a08 100644 --- a/src/aro/pragmas/message.zig +++ b/src/aro/pragmas/message.zig @@ -28,7 +28,7 @@ fn deinit(pragma: *Pragma, comp: *Compilation) void { fn preprocessorHandler(_: *Pragma, pp: *Preprocessor, start_idx: TokenIndex) Pragma.Error!void { const message_tok = pp.tokens.get(start_idx); - const message_expansion_locs = message_tok.expansionSlice(); + const message_expansion_locs = pp.expansionSlice(start_idx); const str = Pragma.pasteTokens(pp, start_idx + 1) catch |err| switch (err) { error.ExpectedStringLiteral => { diff --git a/src/aro/pragmas/once.zig b/src/aro/pragmas/once.zig index 53b59bb1..790e5e12 100644 --- a/src/aro/pragmas/once.zig +++ b/src/aro/pragmas/once.zig @@ -45,7 +45,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex try pp.comp.addDiagnostic(.{ .tag = .extra_tokens_directive_end, .loc = name_tok.loc, - }, next.expansionSlice()); + }, pp.expansionSlice(start_idx + 1)); } const seen = self.preprocess_count == pp.preprocess_count; const prev = try self.pragma_once.fetchPut(name_tok.loc.id, {}); diff --git a/src/aro/pragmas/pack.zig b/src/aro/pragmas/pack.zig index 1fab0eca..61306e88 100644 --- a/src/aro/pragmas/pack.zig +++ b/src/aro/pragmas/pack.zig @@ -37,7 +37,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation return p.comp.addDiagnostic(.{ .tag = .pragma_pack_lparen, .loc = l_paren.loc, - }, l_paren.expansionSlice()); + }, p.pp.expansionSlice(idx)); } idx += 1; diff --git a/test/record_runner.zig b/test/record_runner.zig index d19b1279..e1f6cfed 100644 --- a/test/record_runner.zig +++ b/test/record_runner.zig @@ -281,7 +281,7 @@ fn singleRun(alloc: std.mem.Allocator, test_dir: []const u8, test_case: TestCase stats.progress.log("could not preprocess file '{s}': {s}\n", .{ path, @errorName(err) }); return; }; - try pp.tokens.append(alloc, eof); + try pp.addToken(eof); var tree = try aro.Parser.parse(&pp); defer tree.deinit(); diff --git a/test/runner.zig b/test/runner.zig index 486f61c0..77c18246 100644 --- a/test/runner.zig +++ b/test/runner.zig @@ -237,7 +237,7 @@ pub fn main() !void { progress.log("could not preprocess file '{s}': {s}\n", .{ path, @errorName(err) }); continue; }; - try pp.tokens.append(gpa, eof); + try pp.addToken(eof); if (pp.defines.get("TESTS_SKIPPED")) |macro| { if (macro.is_func or macro.tokens.len != 1 or macro.tokens[0].id != .pp_num) { From 992da7db9fdf05c77ab678b448ff77a4e62e815c Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Wed, 7 Feb 2024 22:05:16 -0800 Subject: [PATCH 2/7] Preprocessor: store token and expansion len together when deleting tokens --- src/aro/Preprocessor.zig | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig index 2f448d77..60985df3 100644 --- a/src/aro/Preprocessor.zig +++ b/src/aro/Preprocessor.zig @@ -70,12 +70,17 @@ const ExpansionEntry = struct { locs: [*]Source.Location, }; +const TokenState = struct { + tokens_len: usize, + expansion_locs_len: usize, +}; + comp: *Compilation, gpa: mem.Allocator, arena: std.heap.ArenaAllocator, defines: DefineMap = .{}, tokens: Token.List = .{}, -expansion_locs: std.MultiArrayList(ExpansionEntry), +expansion_locs: std.MultiArrayList(ExpansionEntry) = .{}, token_buf: RawTokenList, char_buf: std.ArrayList(u8), /// Counter that is incremented each time preprocess() is called @@ -123,7 +128,6 @@ pub fn init(comp: *Compilation) Preprocessor { .poisoned_identifiers = std.StringHashMap(void).init(comp.gpa), .top_expansion_buf = ExpandBuf.init(comp.gpa), .hideset = .{ .comp = comp }, - .expansion_locs = .{}, }; comp.pragmaEvent(.before_preprocess); return pp; @@ -825,14 +829,24 @@ fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { } } +fn getTokenState(pp: *const Preprocessor) TokenState { + return .{ + .tokens_len = pp.tokens.len, + .expansion_locs_len = pp.expansion_locs.len, + }; +} + +fn restoreTokenState(pp: *Preprocessor, state: TokenState) void { + pp.tokens.len = state.tokens_len; + pp.expansion_locs.len = state.expansion_locs_len; +} + /// Consume all tokens until a newline and parse the result into a boolean. fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool { - const start = pp.tokens.len; - const locs_start = pp.expansion_locs.len; + const token_state = pp.getTokenState(); defer { for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa); - pp.tokens.len = start; - pp.expansion_locs.len = locs_start; + pp.restoreTokenState(token_state); } pp.top_expansion_buf.items.len = 0; @@ -948,7 +962,7 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool { .comp = pp.comp, .gpa = pp.gpa, .tok_ids = pp.tokens.items(.id), - .tok_i = @intCast(start), + .tok_i = @intCast(token_state.tokens_len), .arena = pp.arena.allocator(), .in_macro = true, .strings = std.ArrayList(u8).init(pp.comp.gpa), @@ -2969,14 +2983,12 @@ fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInc pp.verboseLog(first, "include file {s}", .{new_source.path}); } - const tokens_start = pp.tokens.len; - const locs_start = pp.expansion_locs.len; + const token_state = pp.getTokenState(); try pp.addIncludeStart(new_source); const eof = pp.preprocessExtra(new_source) catch |er| switch (er) { error.StopPreprocessing => { - for (pp.expansion_locs.items(.locs)[locs_start..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa); - pp.tokens.len = tokens_start; - pp.expansion_locs.len = locs_start; + for (pp.expansion_locs.items(.locs)[token_state.expansion_locs_len..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa); + pp.restoreTokenState(token_state); return; }, else => |e| return e, From 0a279bfce72482c0b7313c544536cde41e18cc52 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Thu, 8 Feb 2024 22:16:43 -0800 Subject: [PATCH 3/7] Preprocessor: Add expansion locs before tokens --- src/aro/Preprocessor.zig | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig index 60985df3..b79e1938 100644 --- a/src/aro/Preprocessor.zig +++ b/src/aro/Preprocessor.zig @@ -3027,19 +3027,17 @@ fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Locat } pub fn addToken(pp: *Preprocessor, tok: TokenWithExpansionLocs) !void { - const idx: u32 = @intCast(pp.tokens.len); - try pp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc }); if (tok.expansion_locs) |expansion_locs| { - try pp.expansion_locs.append(pp.gpa, .{ .idx = idx, .locs = expansion_locs }); + try pp.expansion_locs.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs }); } + try pp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc }); } pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: TokenWithExpansionLocs) void { - const idx: u32 = @intCast(pp.tokens.len); - pp.tokens.appendAssumeCapacity(.{ .id = tok.id, .loc = tok.loc }); if (tok.expansion_locs) |expansion_locs| { - pp.expansion_locs.appendAssumeCapacity(.{ .idx = idx, .locs = expansion_locs }); + pp.expansion_locs.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs }); } + pp.tokens.appendAssumeCapacity(.{ .id = tok.id, .loc = tok.loc }); } pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void { From be63453cd59234f89b4cf13fc8382662edeca60e Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Thu, 8 Feb 2024 22:28:49 -0800 Subject: [PATCH 4/7] Preprocessor: rename expansion_locs to expansion_entries --- src/aro/Preprocessor.zig | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig index b79e1938..5b9b0e8a 100644 --- a/src/aro/Preprocessor.zig +++ b/src/aro/Preprocessor.zig @@ -72,15 +72,17 @@ const ExpansionEntry = struct { const TokenState = struct { tokens_len: usize, - expansion_locs_len: usize, + expansion_entries_len: usize, }; comp: *Compilation, gpa: mem.Allocator, arena: std.heap.ArenaAllocator, defines: DefineMap = .{}, +/// Do not directly mutate this; use addToken / addTokenAssumeCapacity / ensureTotalTokenCapacity / ensureUnusedTokenCapacity tokens: Token.List = .{}, -expansion_locs: std.MultiArrayList(ExpansionEntry) = .{}, +/// Do not directly mutate this; must be kept in sync with `tokens` +expansion_entries: std.MultiArrayList(ExpansionEntry) = .{}, token_buf: RawTokenList, char_buf: std.ArrayList(u8), /// Counter that is incremented each time preprocess() is called @@ -249,8 +251,8 @@ pub fn deinit(pp: *Preprocessor) void { pp.include_guards.deinit(pp.gpa); pp.top_expansion_buf.deinit(); pp.hideset.deinit(); - for (pp.expansion_locs.items(.locs)) |locs| TokenWithExpansionLocs.free(locs, pp.gpa); - pp.expansion_locs.deinit(pp.gpa); + for (pp.expansion_entries.items(.locs)) |locs| TokenWithExpansionLocs.free(locs, pp.gpa); + pp.expansion_entries.deinit(pp.gpa); } pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location { @@ -261,9 +263,9 @@ pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location } }; - const indices = pp.expansion_locs.items(.idx); + const indices = pp.expansion_entries.items(.idx); const idx = std.sort.binarySearch(Tree.TokenIndex, tok, indices, {}, S.order_token_index) orelse return &.{}; - const locs = pp.expansion_locs.items(.locs)[idx]; + const locs = pp.expansion_entries.items(.locs)[idx]; var i: usize = 0; while (locs[i].id != .unused) : (i += 1) {} return locs[0..i]; @@ -832,13 +834,13 @@ fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { fn getTokenState(pp: *const Preprocessor) TokenState { return .{ .tokens_len = pp.tokens.len, - .expansion_locs_len = pp.expansion_locs.len, + .expansion_entries_len = pp.expansion_entries.len, }; } fn restoreTokenState(pp: *Preprocessor, state: TokenState) void { pp.tokens.len = state.tokens_len; - pp.expansion_locs.len = state.expansion_locs_len; + pp.expansion_entries.len = state.expansion_entries_len; } /// Consume all tokens until a newline and parse the result into a boolean. @@ -2987,7 +2989,7 @@ fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInc try pp.addIncludeStart(new_source); const eof = pp.preprocessExtra(new_source) catch |er| switch (er) { error.StopPreprocessing => { - for (pp.expansion_locs.items(.locs)[token_state.expansion_locs_len..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa); + for (pp.expansion_entries.items(.locs)[token_state.expansion_entries_len..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa); pp.restoreTokenState(token_state); return; }, @@ -3028,26 +3030,26 @@ fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Locat pub fn addToken(pp: *Preprocessor, tok: TokenWithExpansionLocs) !void { if (tok.expansion_locs) |expansion_locs| { - try pp.expansion_locs.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs }); + try pp.expansion_entries.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs }); } try pp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc }); } pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: TokenWithExpansionLocs) void { if (tok.expansion_locs) |expansion_locs| { - pp.expansion_locs.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs }); + pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs }); } pp.tokens.appendAssumeCapacity(.{ .id = tok.id, .loc = tok.loc }); } pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void { try pp.tokens.ensureTotalCapacity(pp.gpa, capacity); - try pp.expansion_locs.ensureTotalCapacity(pp.gpa, capacity); + try pp.expansion_entries.ensureTotalCapacity(pp.gpa, capacity); } pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void { try pp.tokens.ensureUnusedCapacity(pp.gpa, capacity); - try pp.expansion_locs.ensureUnusedCapacity(pp.gpa, capacity); + try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, capacity); } /// Handle a pragma directive From ff6fd83085c504fa8e626bd67ea7011d329d1a15 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Thu, 8 Feb 2024 22:35:31 -0800 Subject: [PATCH 5/7] Preprocessor: use TokenIndex instead ofu32 for ExpansionEntry index --- src/aro/Preprocessor.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig index 5b9b0e8a..b666dee0 100644 --- a/src/aro/Preprocessor.zig +++ b/src/aro/Preprocessor.zig @@ -66,7 +66,7 @@ const Macro = struct { const Preprocessor = @This(); const ExpansionEntry = struct { - idx: u32, + idx: Tree.TokenIndex, locs: [*]Source.Location, }; From 58c2ba449c2cbe29e47f7528466e867d81ef27ab Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Thu, 8 Feb 2024 23:48:03 -0800 Subject: [PATCH 6/7] tests: use addToken instead of directly appending to preprocessor tokens --- test/fuzz/fuzz_lib.zig | 2 +- test/runner.zig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/fuzz/fuzz_lib.zig b/test/fuzz/fuzz_lib.zig index 86f40f5d..089eda09 100644 --- a/test/fuzz/fuzz_lib.zig +++ b/test/fuzz/fuzz_lib.zig @@ -45,7 +45,7 @@ fn processSource(comp: *Compilation, builtin: Source, user_source: Source) !void _ = try pp.preprocess(builtin); const eof = try pp.preprocess(user_source); - try pp.tokens.append(pp.comp.gpa, eof); + try pp.addToken(eof); var tree = try Parser.parse(&pp); defer tree.deinit(); diff --git a/test/runner.zig b/test/runner.zig index 77c18246..6afa376e 100644 --- a/test/runner.zig +++ b/test/runner.zig @@ -80,7 +80,7 @@ fn testOne(allocator: std.mem.Allocator, path: []const u8, test_dir: []const u8) _ = try pp.preprocess(user_macros); const eof = try pp.preprocess(file); - try pp.tokens.append(allocator, eof); + try pp.addToken(eof); var tree = try aro.Parser.parse(&pp); defer tree.deinit(); From f21caa8f38e8d0079b98a890309a8576ae33a9b7 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Fri, 9 Feb 2024 09:15:53 -0800 Subject: [PATCH 7/7] Preprocessor: clear temporary buffers after preprocessing --- src/aro/Hideset.zig | 6 ++++++ src/aro/Preprocessor.zig | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/src/aro/Hideset.zig b/src/aro/Hideset.zig index 7cebb860..433be9f3 100644 --- a/src/aro/Hideset.zig +++ b/src/aro/Hideset.zig @@ -81,6 +81,12 @@ pub fn clearRetainingCapacity(self: *Hideset) void { self.map.clearRetainingCapacity(); } +pub fn clearAndFree(self: *Hideset) void { + self.map.clearAndFree(self.comp.gpa); + self.intersection_map.clearAndFree(self.comp.gpa); + self.linked_list.shrinkAndFree(self.comp.gpa, 0); +} + /// Iterator is invalidated if the underlying MultiArrayList slice is reallocated due to resize fn iterator(self: *const Hideset, idx: Index) Iterator { return Iterator{ diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig index b666dee0..dc7be1ff 100644 --- a/src/aro/Preprocessor.zig +++ b/src/aro/Preprocessor.zig @@ -255,6 +255,14 @@ pub fn deinit(pp: *Preprocessor) void { pp.expansion_entries.deinit(pp.gpa); } +/// Free buffers that are not needed after preprocessing +fn clearBuffers(pp: *Preprocessor) void { + pp.token_buf.clearAndFree(); + pp.char_buf.clearAndFree(); + pp.top_expansion_buf.clearAndFree(); + pp.hideset.clearAndFree(); +} + pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location { const S = struct { fn order_token_index(context: void, lhs: Tree.TokenIndex, rhs: Tree.TokenIndex) std.math.Order { @@ -283,6 +291,7 @@ pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void try pp.addIncludeResume(first.id, 0, 1); const eof = try pp.preprocess(first); try pp.addToken(eof); + pp.clearBuffers(); } /// Preprocess a source file, returns eof token.