Skip to content

Commit

Permalink
Preprocessor: use hidesets to manage token expansion disabling
Browse files Browse the repository at this point in the history
  • Loading branch information
ehaas committed Nov 30, 2023
1 parent 521b2f6 commit b3e6f44
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 6 deletions.
176 changes: 176 additions & 0 deletions src/aro/Hideset.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;
const Source = @import("Source.zig");
const Compilation = @import("Compilation.zig");
const Tokenizer = @import("Tokenizer.zig");

pub const Hideset = @This();

const HashContext = struct {
pub fn hash(ctx: HashContext, key: Identifier) u64 {
_ = ctx;
return std.hash.Wyhash.hash(0, std.mem.asBytes(&key));
}
pub fn eql(ctx: HashContext, a: Identifier, b: Identifier) bool {
_ = ctx;
return a.id == b.id and a.byte_offset == b.byte_offset;
}
};

const Identifier = packed struct(u64) {
id: Source.Id = .unused,
byte_offset: u32 = 0,

fn slice(self: Identifier, comp: *const Compilation) []const u8 {
var tmp_tokenizer = Tokenizer{
.buf = comp.getSource(self.id).buf,
.langopts = comp.langopts,
.index = self.byte_offset,
.source = .generated,
};
const res = tmp_tokenizer.next();
return tmp_tokenizer.buf[res.start..res.end];
}
};

const Item = struct {
name: Identifier = .{},
next: Index = .sentinel,

const List = std.MultiArrayList(Item);
};

const Index = enum(u32) {
sentinel = std.math.maxInt(u32),
_,
};

map: std.HashMapUnmanaged(Identifier, Index, HashContext, std.hash_map.default_max_load_percentage) = .{},
intersection_map: std.StringHashMapUnmanaged(void) = .{},
linked_list: Item.List = .{},
next_idx: Index = @enumFromInt(0),
comp: *const Compilation,

const Iterator = struct {
slice: Item.List.Slice,
i: Index,

fn next(self: *Iterator) ?Identifier {
if (self.i == .sentinel) return null;
defer self.i = self.slice.items(.next)[@intFromEnum(self.i)];
return self.slice.items(.name)[@intFromEnum(self.i)];
}
};

pub fn init(comp: *const Compilation) Hideset {
return Hideset{
.comp = comp,
};
}

pub fn deinit(self: *Hideset) void {
self.map.deinit(self.comp.gpa);
self.intersection_map.deinit(self.comp.gpa);
self.linked_list.deinit(self.comp.gpa);
}

pub fn clearRetainingCapacity(self: *Hideset) void {
self.next_idx = @enumFromInt(0);
self.map.clearRetainingCapacity();
}

pub fn iterator(self: *const Hideset, idx: Index) Iterator {
return Iterator{
.slice = self.linked_list.slice(),
.i = idx,
};
}

pub fn get(self: *const Hideset, name: Identifier) Index {
return self.map.get(name) orelse .sentinel;
}

pub fn put(self: *Hideset, key: Identifier, value: Index) !void {
try self.map.put(self.comp.gpa, key, value);
}

pub fn ensureTotalCapacity(self: *Hideset, new_size: usize) !void {
try self.linked_list.ensureTotalCapacity(self.comp.gpa, new_size);
}

/// Allocates a new item and returns its index
fn allocate(self: *Hideset, name: Identifier) !Index {
const next: Index = if (@intFromEnum(self.next_idx) < self.linked_list.len) self.next_idx else blk: {
const new_item_idx = try self.linked_list.addOne(self.comp.gpa);
break :blk @enumFromInt(new_item_idx);
};
self.next_idx = @enumFromInt(@intFromEnum(next) + 1);
self.linked_list.set(@intFromEnum(next), .{ .name = name });
return next;
}

/// Create a new list with `name` at the front followed by `tail`
pub fn prepend(self: *Hideset, name: Identifier, tail: Index) !Index {
const new_idx = try self.allocate(name);
self.linked_list.items(.next)[@intFromEnum(new_idx)] = tail;
return new_idx;
}

/// Copy a, then attach b at the end
pub fn @"union"(self: *Hideset, a: Index, b: Index) !Index {
var cur: Index = .sentinel;
var head: Index = b;
var it = self.iterator(a);
while (it.next()) |name| {
const new_idx = try self.allocate(name);
if (head == b) {
head = new_idx;
}
if (cur != .sentinel) {
self.linked_list.items(.next)[@intFromEnum(cur)] = new_idx;
}
cur = new_idx;
}
if (cur != .sentinel) {
self.linked_list.items(.next)[@intFromEnum(cur)] = b;
}
return head;
}

pub fn contains(self: *const Hideset, list: Index, name: []const u8) bool {
var it = self.iterator(list);
while (it.next()) |item_name| {
const this = item_name.slice(self.comp);
if (mem.eql(u8, name, this)) return true;
}
return false;
}

pub fn intersection(self: *Hideset, a: Index, b: Index) !Index {
if (a == .sentinel or b == .sentinel) return .sentinel;
self.intersection_map.clearRetainingCapacity();

var cur: Index = .sentinel;
var head: Index = .sentinel;
var it = self.iterator(a);
while (it.next()) |name| {
const str = name.slice(self.comp);
try self.intersection_map.put(self.comp.gpa, str, {});
}
it = self.iterator(b);
while (it.next()) |name| {
const str = name.slice(self.comp);
if (self.intersection_map.contains(str)) {
const new_idx = try self.allocate(name);
if (head == .sentinel) {
head = new_idx;
}
if (cur != .sentinel) {
self.linked_list.items(.next)[@intFromEnum(cur)] = new_idx;
}
cur = new_idx;
}
}
return head;
}
44 changes: 40 additions & 4 deletions src/aro/Preprocessor.zig
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const Diagnostics = @import("Diagnostics.zig");
const Token = @import("Tree.zig").Token;
const Attribute = @import("Attribute.zig");
const features = @import("features.zig");
const Hideset = @import("Hideset.zig");

const DefineMap = std.StringHashMapUnmanaged(Macro);
const RawTokenList = std.ArrayList(RawToken);
Expand Down Expand Up @@ -93,6 +94,8 @@ preserve_whitespace: bool = false,
/// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
linemarkers: Linemarkers = .none,

hideset: Hideset,

pub const parse = Parser.parse;

pub const Linemarkers = enum {
Expand All @@ -113,6 +116,7 @@ pub fn init(comp: *Compilation) Preprocessor {
.char_buf = std.ArrayList(u8).init(comp.gpa),
.poisoned_identifiers = std.StringHashMap(void).init(comp.gpa),
.top_expansion_buf = ExpandBuf.init(comp.gpa),
.hideset = Hideset.init(comp),
};
comp.pragmaEvent(.before_preprocess);
return pp;
Expand Down Expand Up @@ -236,6 +240,7 @@ pub fn deinit(pp: *Preprocessor) void {
pp.poisoned_identifiers.deinit();
pp.include_guards.deinit(pp.gpa);
pp.top_expansion_buf.deinit();
pp.hideset.deinit();
}

/// Preprocess a compilation unit of sources into a parsable list of tokens.
Expand Down Expand Up @@ -341,6 +346,7 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
// Estimate how many new tokens this source will contain.
const estimated_token_count = source.buf.len / 8;
try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count);
try pp.hideset.ensureTotalCapacity(1024);

var if_level: u8 = 0;
var if_kind = std.PackedIntArray(u2, 256).init([1]u2{0} ** 256);
Expand Down Expand Up @@ -818,6 +824,7 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
} else unreachable;
if (pp.top_expansion_buf.items.len != 0) {
pp.expansion_source_loc = pp.top_expansion_buf.items[0].loc;
pp.hideset.clearRetainingCapacity();
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr);
}
for (pp.top_expansion_buf.items) |tok| {
Expand Down Expand Up @@ -1948,6 +1955,7 @@ fn collectMacroFuncArguments(
end_idx: *usize,
extend_buf: bool,
is_builtin: bool,
r_paren: *Token,
) !MacroArguments {
const name_tok = buf.items[start_idx.*];
const saved_tokenizer = tokenizer.*;
Expand Down Expand Up @@ -2002,6 +2010,7 @@ fn collectMacroFuncArguments(
const owned = try curArgument.toOwnedSlice();
errdefer pp.gpa.free(owned);
try args.append(owned);
r_paren.* = tok;
break;
} else {
const duped = try tok.dupe(pp.gpa);
Expand Down Expand Up @@ -2108,13 +2117,24 @@ fn expandMacroExhaustive(
idx += it.i;
continue;
}
const macro_entry = pp.defines.getPtr(pp.expandedSlice(macro_tok));
if (macro_entry == null or !shouldExpand(buf.items[idx], macro_entry.?)) {
if (!macro_tok.id.isMacroIdentifier() or macro_tok.flags.expansion_disabled) {
idx += 1;
continue;
}
if (macro_entry) |macro| macro_handler: {
const expanded = pp.expandedSlice(macro_tok);
const macro = pp.defines.getPtr(expanded) orelse {
idx += 1;
continue;
};
const macro_hidelist = pp.hideset.get(.{ .id = macro_tok.loc.id, .byte_offset = macro_tok.loc.byte_offset });
if (pp.hideset.contains(macro_hidelist, expanded)) {
idx += 1;
continue;
}

macro_handler: {
if (macro.is_func) {
var r_paren: Token = undefined;
var macro_scan_idx = idx;
// to be saved in case this doesn't turn out to be a call
const args = pp.collectMacroFuncArguments(
Expand All @@ -2124,6 +2144,7 @@ fn expandMacroExhaustive(
&moving_end_idx,
extend_buf,
macro.is_builtin,
&r_paren,
) catch |er| switch (er) {
error.MissingLParen => {
if (!buf.items[idx].flags.is_macro_arg) buf.items[idx].flags.expansion_disabled = true;
Expand All @@ -2137,12 +2158,16 @@ fn expandMacroExhaustive(
},
else => |e| return e,
};
assert(r_paren.id == .r_paren);
defer {
for (args.items) |item| {
pp.gpa.free(item);
}
args.deinit();
}
const r_paren_hidelist = pp.hideset.get(.{ .id = r_paren.loc.id, .byte_offset = r_paren.loc.byte_offset });
var hs = try pp.hideset.intersection(macro_hidelist, r_paren_hidelist);
hs = try pp.hideset.prepend(.{ .id = macro_tok.loc.id, .byte_offset = macro_tok.loc.byte_offset }, hs);

var args_count: u32 = @intCast(args.items.len);
// if the macro has zero arguments g() args_count is still 1
Expand Down Expand Up @@ -2199,6 +2224,9 @@ fn expandMacroExhaustive(
for (res.items) |*tok| {
try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
const tok_hidelist = pp.hideset.get(.{ .id = tok.loc.id, .byte_offset = tok.loc.byte_offset });
const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hs);
try pp.hideset.put(.{ .id = tok.loc.id, .byte_offset = tok.loc.byte_offset }, new_hidelist);
}

const tokens_removed = macro_scan_idx - idx + 1;
Expand All @@ -2215,12 +2243,19 @@ fn expandMacroExhaustive(
const res = try pp.expandObjMacro(macro);
defer res.deinit();

const hs = try pp.hideset.prepend(.{ .id = macro_tok.loc.id, .byte_offset = macro_tok.loc.byte_offset }, macro_hidelist);

const macro_expansion_locs = macro_tok.expansionSlice();
var increment_idx_by = res.items.len;
for (res.items, 0..) |*tok, i| {
tok.flags.is_macro_arg = macro_tok.flags.is_macro_arg;
try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);

const tok_hidelist = pp.hideset.get(.{ .id = tok.loc.id, .byte_offset = tok.loc.byte_offset });
const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hs);
try pp.hideset.put(.{ .id = tok.loc.id, .byte_offset = tok.loc.byte_offset }, new_hidelist);

if (tok.id == .keyword_defined and eval_ctx == .expr) {
try pp.comp.addDiagnostic(.{
.tag = .expansion_to_defined,
Expand Down Expand Up @@ -2266,6 +2301,7 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr
try pp.top_expansion_buf.append(source_tok);
pp.expansion_source_loc = source_tok.loc;

pp.hideset.clearRetainingCapacity();
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.top_expansion_buf.items.len);
for (pp.top_expansion_buf.items) |*tok| {
Expand Down Expand Up @@ -2312,7 +2348,7 @@ fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: en
}

/// Get expanded token source string.
pub fn expandedSlice(pp: *Preprocessor, tok: Token) []const u8 {
pub fn expandedSlice(pp: *const Preprocessor, tok: Token) []const u8 {
return pp.expandedSliceExtra(tok, .single_macro_ws);
}

Expand Down
1 change: 1 addition & 0 deletions test/cases/expanded/recursive call non-expanded parens.c
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 2 1 bar
2 changes: 1 addition & 1 deletion test/cases/expanded/unspecified expansion.c
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2*f(9)
2*9*g
5 changes: 5 additions & 0 deletions test/cases/recursive call non-expanded parens.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//aro-args -E -P
#define foo(X) 1 bar
#define bar(X) 2 foo

foo(X)(Y)(Z)
2 changes: 1 addition & 1 deletion test/cases/unspecified expansion.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//aro-args -E -P
// This can either expand as 2*f(9) or as 2*9*g (see 6.10.3.4 in the standard)
// Currently arocc does the former, but gcc and clang do the latter
// We follow gcc and clang in expanding it to 2*9*g

#define f(a) a*g
#define g(a) f(a)
Expand Down

0 comments on commit b3e6f44

Please sign in to comment.