From 7d449fe13ead1a51997488cd004ad8a60a044c88 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Sun, 3 Jul 2022 20:46:25 -0700 Subject: [PATCH 1/5] Type: use interned strings for field names Store unique names as integers for faster comparisons --- src/Attribute.zig | 3 +- src/Builtins.zig | 2 +- src/Compilation.zig | 40 ++++++----- src/Parser.zig | 150 +++++++++++++++++++++++----------------- src/StringInterner.zig | 78 +++++++++++++++++++++ src/SymbolStack.zig | 100 +++++++++++++-------------- src/Tree.zig | 126 +++++++++++++++++---------------- src/Type.zig | 153 +++++++++++++++++++++-------------------- src/lib.zig | 1 + test/runner.zig | 23 ++++--- 10 files changed, 400 insertions(+), 276 deletions(-) create mode 100644 src/StringInterner.zig diff --git a/src/Attribute.zig b/src/Attribute.zig index 39c22c87..7977279d 100644 --- a/src/Attribute.zig +++ b/src/Attribute.zig @@ -1282,7 +1282,8 @@ fn applyTransparentUnion(attr: Attribute, p: *Parser, tok: TokenIndex, ty: Type) for (fields[1..]) |field| { const field_size = field.ty.bitSizeof(p.comp).?; if (field_size == first_field_size) continue; - const str = try std.fmt.allocPrint(p.comp.diag.arena.allocator(), "'{s}' ({d}", .{ field.name, field_size }); + const mapper = p.comp.string_interner.getSlowTypeMapper(); + const str = try std.fmt.allocPrint(p.comp.diag.arena.allocator(), "'{s}' ({d}", .{ mapper.lookup(field.name), field_size }); try p.errStr(.transparent_union_size, field.name_tok, str); return p.errExtra(.transparent_union_size_note, fields[0].name_tok, .{ .unsigned = first_field_size }); } diff --git a/src/Builtins.zig b/src/Builtins.zig index fb281f9d..8ee54f56 100644 --- a/src/Builtins.zig +++ b/src/Builtins.zig @@ -40,7 +40,7 @@ fn add( ) void { var params = a.alloc(Type.Func.Param, param_types.len) catch unreachable; // fib for (param_types) |param_ty, i| { - params[i] = .{ .name_tok = 0, .ty = param_ty, .name = "" }; + params[i] = .{ .name_tok = 0, .ty = param_ty, .name = .empty }; } b.putAssumeCapacity(name, .{ .spec = spec, diff --git a/src/Compilation.zig b/src/Compilation.zig index 1ff6fbbf..5096b206 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -11,6 +11,7 @@ const Tokenizer = @import("Tokenizer.zig"); const Token = Tokenizer.Token; const Type = @import("Type.zig"); const Pragma = @import("Pragma.zig"); +const StringInterner = @import("StringInterner.zig"); const Compilation = @This(); @@ -41,6 +42,7 @@ types: struct { } = undefined, /// Mapping from Source.Id to byte offset of first non-utf8 byte invalid_utf8_locs: std.AutoHashMapUnmanaged(Source.Id, u32) = .{}, +string_interner: StringInterner = .{}, pub fn init(gpa: Allocator) Compilation { return .{ @@ -72,6 +74,11 @@ pub fn deinit(comp: *Compilation) void { comp.generated_buf.deinit(); comp.builtins.deinit(comp.gpa); comp.invalid_utf8_locs.deinit(comp.gpa); + comp.string_interner.deinit(comp.gpa); +} + +pub fn intern(comp: *Compilation, str: []const u8) !StringInterner.StringId { + return comp.string_interner.intern(comp.gpa, str); } fn generateDateAndTime(w: anytype) !void { @@ -337,16 +344,17 @@ pub fn generateBuiltinMacros(comp: *Compilation) !Source { // try comp.generateSizeofType(w, "__SIZEOF_WINT_T__", .{ .specifier = .pointer }); // various int types - try generateTypeMacro(w, "__PTRDIFF_TYPE__", comp.types.ptrdiff); - try generateTypeMacro(w, "__SIZE_TYPE__", comp.types.size); - try generateTypeMacro(w, "__WCHAR_TYPE__", comp.types.wchar); + const mapper = comp.string_interner.getSlowTypeMapper(); + try generateTypeMacro(w, mapper, "__PTRDIFF_TYPE__", comp.types.ptrdiff); + try generateTypeMacro(w, mapper, "__SIZE_TYPE__", comp.types.size); + try generateTypeMacro(w, mapper, "__WCHAR_TYPE__", comp.types.wchar); return comp.addSourceFromBuffer("", buf.items); } -fn generateTypeMacro(w: anytype, name: []const u8, ty: Type) !void { +fn generateTypeMacro(w: anytype, mapper: StringInterner.TypeMapper, name: []const u8, ty: Type) !void { try w.print("#define {s} ", .{name}); - try ty.print(w); + try ty.print(mapper, w); try w.writeByte('\n'); } @@ -422,7 +430,7 @@ fn generateVaListType(comp: *Compilation) !Type { .aarch64_va_list => { const record_ty = try arena.create(Type.Record); record_ty.* = .{ - .name = "__va_list_tag", + .name = try comp.intern("__va_list_tag"), .fields = try arena.alloc(Type.Record.Field, 5), .size = 32, .alignment = 8, @@ -431,17 +439,17 @@ fn generateVaListType(comp: *Compilation) !Type { const void_ty = try arena.create(Type); void_ty.* = .{ .specifier = .void }; const void_ptr = Type{ .specifier = .pointer, .data = .{ .sub_type = void_ty } }; - record_ty.fields[0] = .{ .name = "__stack", .ty = void_ptr }; - record_ty.fields[1] = .{ .name = "__gr_top", .ty = void_ptr }; - record_ty.fields[2] = .{ .name = "__vr_top", .ty = void_ptr }; - record_ty.fields[3] = .{ .name = "__gr_offs", .ty = .{ .specifier = .int } }; - record_ty.fields[4] = .{ .name = "__vr_offs", .ty = .{ .specifier = .int } }; + record_ty.fields[0] = .{ .name = try comp.intern("__stack"), .ty = void_ptr }; + record_ty.fields[1] = .{ .name = try comp.intern("__gr_top"), .ty = void_ptr }; + record_ty.fields[2] = .{ .name = try comp.intern("__vr_top"), .ty = void_ptr }; + record_ty.fields[3] = .{ .name = try comp.intern("__gr_offs"), .ty = .{ .specifier = .int } }; + record_ty.fields[4] = .{ .name = try comp.intern("__vr_offs"), .ty = .{ .specifier = .int } }; ty = .{ .specifier = .@"struct", .data = .{ .record = record_ty } }; }, .x86_64_va_list => { const record_ty = try arena.create(Type.Record); record_ty.* = .{ - .name = "__va_list_tag", + .name = try comp.intern("__va_list_tag"), .fields = try arena.alloc(Type.Record.Field, 4), .size = 24, .alignment = 8, @@ -450,10 +458,10 @@ fn generateVaListType(comp: *Compilation) !Type { const void_ty = try arena.create(Type); void_ty.* = .{ .specifier = .void }; const void_ptr = Type{ .specifier = .pointer, .data = .{ .sub_type = void_ty } }; - record_ty.fields[0] = .{ .name = "gp_offset", .ty = .{ .specifier = .uint } }; - record_ty.fields[1] = .{ .name = "fp_offset", .ty = .{ .specifier = .uint } }; - record_ty.fields[2] = .{ .name = "overflow_arg_area", .ty = void_ptr }; - record_ty.fields[3] = .{ .name = "reg_save_area", .ty = void_ptr }; + record_ty.fields[0] = .{ .name = try comp.intern("gp_offset"), .ty = .{ .specifier = .uint } }; + record_ty.fields[1] = .{ .name = try comp.intern("fp_offset"), .ty = .{ .specifier = .uint } }; + record_ty.fields[2] = .{ .name = try comp.intern("overflow_arg_area"), .ty = void_ptr }; + record_ty.fields[3] = .{ .name = try comp.intern("reg_save_area"), .ty = void_ptr }; ty = .{ .specifier = .@"struct", .data = .{ .record = record_ty } }; }, } diff --git a/src/Parser.zig b/src/Parser.zig index c9578bc9..8fff934e 100644 --- a/src/Parser.zig +++ b/src/Parser.zig @@ -19,6 +19,7 @@ const CharInfo = @import("CharInfo.zig"); const Value = @import("Value.zig"); const SymbolStack = @import("SymbolStack.zig"); const Symbol = SymbolStack.Symbol; +const StringId = @import("StringInterner.zig").StringId; const Parser = @This(); @@ -134,13 +135,12 @@ record: struct { start: usize = 0, field_attr_start: usize = 0, - fn addField(r: @This(), p: *Parser, tok: TokenIndex) Error!void { - const name = p.tokSlice(tok); + fn addField(r: @This(), p: *Parser, name: StringId, tok: TokenIndex) Error!void { var i = p.record_members.items.len; while (i > r.start) { i -= 1; - if (mem.eql(u8, p.record_members.items[i].name, name)) { - try p.errStr(.duplicate_member, tok, name); + if (p.record_members.items[i].name == name) { + try p.errStr(.duplicate_member, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, p.record_members.items[i].tok); break; } @@ -153,12 +153,12 @@ record: struct { if (f.isAnonymousRecord()) { try r.addFieldsFromAnonymous(p, f.ty.canonicalize(.standard)); } else if (f.name_tok != 0) { - try r.addField(p, f.name_tok); + try r.addField(p, f.name, f.name_tok); } } } } = .{}, -record_members: std.ArrayListUnmanaged(struct { tok: TokenIndex, name: []const u8 }) = .{}, +record_members: std.ArrayListUnmanaged(struct { tok: TokenIndex, name: StringId }) = .{}, @"switch": ?*Switch = null, in_loop: bool = false, pragma_pack: u8 = 8, @@ -344,7 +344,8 @@ pub fn typeStr(p: *Parser, ty: Type) ![]const u8 { const strings_top = p.strings.items.len; defer p.strings.items.len = strings_top; - try ty.print(p.strings.writer()); + const mapper = p.comp.string_interner.getSlowTypeMapper(); + try ty.print(mapper, p.strings.writer()); return try p.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]); } @@ -357,11 +358,12 @@ pub fn typePairStrExtra(p: *Parser, a: Type, msg: []const u8, b: Type) ![]const defer p.strings.items.len = strings_top; try p.strings.append('\''); - try a.print(p.strings.writer()); + const mapper = p.comp.string_interner.getSlowTypeMapper(); + try a.print(mapper, p.strings.writer()); try p.strings.append('\''); try p.strings.appendSlice(msg); try p.strings.append('\''); - try b.print(p.strings.writer()); + try b.print(mapper, p.strings.writer()); try p.strings.append('\''); return try p.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]); } @@ -545,18 +547,18 @@ pub fn parse(pp: *Preprocessor) Compilation.Error!Tree { _ = try p.addNode(.{ .tag = .invalid, .ty = undefined, .data = undefined }); { - try p.syms.defineTypedef(&p, "__int128_t", .{ .specifier = .int128 }, 0, .none); - try p.syms.defineTypedef(&p, "__uint128_t", .{ .specifier = .uint128 }, 0, .none); + try p.syms.defineTypedef(&p, try p.comp.intern("__int128_t"), .{ .specifier = .int128 }, 0, .none); + try p.syms.defineTypedef(&p, try p.comp.intern("__uint128_t"), .{ .specifier = .uint128 }, 0, .none); const elem_ty = try p.arena.create(Type); elem_ty.* = .{ .specifier = .char }; - try p.syms.defineTypedef(&p, "__builtin_ms_va_list", .{ + try p.syms.defineTypedef(&p, try p.comp.intern("__builtin_ms_va_list"), .{ .specifier = .pointer, .data = .{ .sub_type = elem_ty }, }, 0, .none); const ty = &pp.comp.types.va_list; - try p.syms.defineTypedef(&p, "__builtin_va_list", ty.*, 0, .none); + try p.syms.defineTypedef(&p, try p.comp.intern("__builtin_va_list"), ty.*, 0, .none); if (ty.isArray()) ty.decayArray(); } @@ -742,7 +744,7 @@ fn decl(p: *Parser) Error!bool { var init_d = (try p.initDeclarator(&decl_spec, attr_buf_top)) orelse { _ = try p.expectToken(.semicolon); if (decl_spec.ty.is(.@"enum") or - (decl_spec.ty.isRecord() and !decl_spec.ty.isAnonymousRecord() and + (decl_spec.ty.isRecord() and !decl_spec.ty.isAnonymousRecord(p.comp) and !decl_spec.ty.isTypeof())) // we follow GCC and clang's behavior here { const specifier = decl_spec.ty.canonicalize(.standard).specifier; @@ -778,7 +780,8 @@ fn decl(p: *Parser) Error!bool { if (p.func.ty != null) try p.err(.func_not_in_root); const node = try p.addNode(undefined); // reserve space - try p.syms.defineSymbol(p, init_d.d.ty, init_d.d.name, node, .{}); + const interned_declarator_name = try p.comp.intern(p.tokSlice(init_d.d.name)); + try p.syms.defineSymbol(p, interned_declarator_name, init_d.d.ty, init_d.d.name, node, .{}); const func = p.func; p.func = .{ @@ -837,8 +840,9 @@ fn decl(p: *Parser) Error!bool { // find and correct parameter types // TODO check for missing declarations and redefinitions const name_str = p.tokSlice(d.name); + const interned_name = try p.comp.intern(name_str); for (init_d.d.ty.params()) |*param| { - if (mem.eql(u8, param.name, name_str)) { + if (param.name == interned_name) { param.ty = d.ty; break; } @@ -850,7 +854,7 @@ fn decl(p: *Parser) Error!bool { // bypass redefinition check to avoid duplicate errors try p.syms.syms.append(p.gpa, .{ .kind = .def, - .name = name_str, + .name = interned_name, .tok = d.name, .ty = d.ty, .val = .{}, @@ -864,7 +868,7 @@ fn decl(p: *Parser) Error!bool { if (param.ty.hasUnboundVLA()) try p.errTok(.unbound_vla, param.name_tok); if (param.ty.hasIncompleteSize() and !param.ty.is(.void)) try p.errStr(.parameter_incomplete_ty, param.name_tok, try p.typeStr(param.ty)); - if (param.name.len == 0) { + if (param.name == .empty) { try p.errTok(.omitting_parameter_name, param.name_tok); continue; } @@ -919,20 +923,22 @@ fn decl(p: *Parser) Error!bool { } }); try p.decl_buf.append(node); + const interned_name = try p.comp.intern(p.tokSlice(init_d.d.name)); if (decl_spec.storage_class == .typedef) { - try p.syms.defineTypedef(p, p.tokSlice(init_d.d.name), init_d.d.ty, init_d.d.name, node); + try p.syms.defineTypedef(p, interned_name, init_d.d.ty, init_d.d.name, node); } else if (init_d.initializer.node != .none or (p.func.ty != null and decl_spec.storage_class != .@"extern")) { try p.syms.defineSymbol( p, + interned_name, init_d.d.ty, init_d.d.name, node, if (init_d.d.ty.isConst()) init_d.initializer.val else .{}, ); } else { - try p.syms.declareSymbol(p, init_d.d.ty, init_d.d.name, node); + try p.syms.declareSymbol(p, interned_name, init_d.d.ty, init_d.d.name, node); } if (p.eatToken(.comma) == null) break; @@ -1421,7 +1427,8 @@ fn initDeclarator(p: *Parser, decl_spec: *DeclSpec, attr_buf_top: usize) Error!? try p.syms.pushScope(p); defer p.syms.popScope(); - try p.syms.declareSymbol(p, init_d.d.ty, init_d.d.name, .none); + const interned_name = try p.comp.intern(p.tokSlice(init_d.d.name)); + try p.syms.declareSymbol(p, interned_name, init_d.d.ty, init_d.d.name, .none); var init_list_expr = try p.initializer(init_d.d.ty); init_d.initializer = init_list_expr; if (!init_list_expr.ty.isArray()) break :init; @@ -1590,7 +1597,8 @@ fn typeSpec(p: *Parser, ty: *Type.Builder) Error!bool { } } if (ty.typedef != null) break; - const typedef = (try p.syms.findTypedef(p, p.tok_i, ty.specifier != .none)) orelse break; + const interned_name = try p.comp.intern(p.tokSlice(p.tok_i)); + const typedef = (try p.syms.findTypedef(p, interned_name, p.tok_i, ty.specifier != .none)) orelse break; if (!ty.combineTypedef(p, typedef.ty, typedef.tok)) break; }, else => break, @@ -1601,7 +1609,7 @@ fn typeSpec(p: *Parser, ty: *Type.Builder) Error!bool { return p.tok_i != start; } -fn getAnonymousName(p: *Parser, kind_tok: TokenIndex) ![]const u8 { +fn getAnonymousName(p: *Parser, kind_tok: TokenIndex) !StringId { const loc = p.pp.tokens.items(.loc)[kind_tok]; const source = p.comp.getSource(loc.id); const line_col = source.lineCol(loc); @@ -1611,11 +1619,12 @@ fn getAnonymousName(p: *Parser, kind_tok: TokenIndex) ![]const u8 { else => "record field", }; - return std.fmt.allocPrint( + const str = try std.fmt.allocPrint( p.arena, "(anonymous {s} at {s}:{d}:{d})", .{ kind_str, source.path, line_col.line_no, line_col.col }, ); + return p.comp.intern(str); } /// recordSpec @@ -1636,18 +1645,19 @@ fn recordSpec(p: *Parser) Error!Type { return error.ParsingFailed; }; // check if this is a reference to a previous type - if (try p.syms.findTag(p, p.tok_ids[kind_tok], ident, p.tok_ids[p.tok_i])) |prev| { + const interned_name = try p.comp.intern(p.tokSlice(ident)); + if (try p.syms.findTag(p, interned_name, p.tok_ids[kind_tok], ident, p.tok_ids[p.tok_i])) |prev| { return prev.ty; } else { // this is a forward declaration, create a new record Type. - const record_ty = try Type.Record.create(p.arena, p.tokSlice(ident)); + const record_ty = try Type.Record.create(p.arena, interned_name); const ty = try Attribute.applyTypeAttributes(p, .{ .specifier = if (is_struct) .@"struct" else .@"union", .data = .{ .record = record_ty }, }, attr_buf_top, null); try p.syms.syms.append(p.gpa, .{ .kind = if (is_struct) .@"struct" else .@"union", - .name = record_ty.name, + .name = interned_name, .tok = ident, .ty = ty, .val = .{}, @@ -1667,17 +1677,19 @@ fn recordSpec(p: *Parser) Error!Type { // Get forward declared type or create a new one var defined = false; const record_ty: *Type.Record = if (maybe_ident) |ident| record_ty: { - if (try p.syms.defineTag(p, p.tok_ids[kind_tok], ident)) |prev| { + const ident_str = p.tokSlice(ident); + const interned_name = try p.comp.intern(ident_str); + if (try p.syms.defineTag(p, interned_name, p.tok_ids[kind_tok], ident)) |prev| { if (!prev.ty.hasIncompleteSize()) { // if the record isn't incomplete, this is a redefinition - try p.errStr(.redefinition, ident, p.tokSlice(ident)); + try p.errStr(.redefinition, ident, ident_str); try p.errTok(.previous_definition, prev.tok); } else { defined = true; break :record_ty prev.ty.get(if (is_struct) .@"struct" else .@"union").?.data.record; } } - break :record_ty try Type.Record.create(p.arena, p.tokSlice(ident)); + break :record_ty try Type.Record.create(p.arena, interned_name); } else try Type.Record.create(p.arena, try p.getAnonymousName(kind_tok)); // Initially create ty as a regular non-attributed type, since attributes for a record @@ -1694,7 +1706,7 @@ fn recordSpec(p: *Parser) Error!Type { symbol_index = p.syms.syms.len; try p.syms.syms.append(p.gpa, .{ .kind = if (is_struct) .@"struct" else .@"union", - .name = p.tokSlice(maybe_ident.?), + .name = record_ty.name, .tok = maybe_ident.?, .ty = ty, .val = .{}, @@ -1878,7 +1890,7 @@ fn recordDeclarator(p: *Parser) Error!bool { if (name_tok == 0 and bits_node == .none) unnamed: { if (ty.is(.@"enum") or ty.hasIncompleteSize()) break :unnamed; - if (ty.isAnonymousRecord()) { + if (ty.isAnonymousRecord(p.comp)) { // An anonymous record appears as indirect fields on the parent try p.record_buf.append(.{ .name = try p.getAnonymousName(first_tok), @@ -1896,13 +1908,14 @@ fn recordDeclarator(p: *Parser) Error!bool { } try p.err(.missing_declaration); } else { + const interned_name = if (name_tok != 0) try p.comp.intern(p.tokSlice(name_tok)) else try p.getAnonymousName(first_tok); try p.record_buf.append(.{ - .name = if (name_tok != 0) p.tokSlice(name_tok) else try p.getAnonymousName(first_tok), + .name = interned_name, .ty = ty, .name_tok = name_tok, .bit_width = bits, }); - if (name_tok != 0) try p.record.addField(p, name_tok); + if (name_tok != 0) try p.record.addField(p, interned_name, name_tok); const node = try p.addNode(.{ .tag = .record_field_decl, .ty = ty, @@ -1976,19 +1989,20 @@ fn enumSpec(p: *Parser) Error!Type { return error.ParsingFailed; }; // check if this is a reference to a previous type - if (try p.syms.findTag(p, .keyword_enum, ident, p.tok_ids[p.tok_i])) |prev| { + const interned_name = try p.comp.intern(p.tokSlice(ident)); + if (try p.syms.findTag(p, interned_name, .keyword_enum, ident, p.tok_ids[p.tok_i])) |prev| { try p.checkEnumFixedTy(fixed_ty, ident, prev); return prev.ty; } else { // this is a forward declaration, create a new enum Type. - const enum_ty = try Type.Enum.create(p.arena, p.tokSlice(ident), fixed_ty); + const enum_ty = try Type.Enum.create(p.arena, interned_name, fixed_ty); const ty = try Attribute.applyTypeAttributes(p, .{ .specifier = .@"enum", .data = .{ .@"enum" = enum_ty }, }, attr_buf_top, null); try p.syms.syms.append(p.gpa, .{ .kind = .@"enum", - .name = enum_ty.name, + .name = interned_name, .tok = ident, .ty = ty, .val = .{}, @@ -2008,11 +2022,13 @@ fn enumSpec(p: *Parser) Error!Type { // Get forward declared type or create a new one var defined = false; const enum_ty: *Type.Enum = if (maybe_ident) |ident| enum_ty: { - if (try p.syms.defineTag(p, .keyword_enum, ident)) |prev| { + const ident_str = p.tokSlice(ident); + const interned_name = try p.comp.intern(ident_str); + if (try p.syms.defineTag(p, interned_name, .keyword_enum, ident)) |prev| { const enum_ty = prev.ty.get(.@"enum").?.data.@"enum"; if (!enum_ty.isIncomplete() and !enum_ty.fixed) { // if the enum isn't incomplete, this is a redefinition - try p.errStr(.redefinition, ident, p.tokSlice(ident)); + try p.errStr(.redefinition, ident, ident_str); try p.errTok(.previous_definition, prev.tok); } else { try p.checkEnumFixedTy(fixed_ty, ident, prev); @@ -2020,7 +2036,7 @@ fn enumSpec(p: *Parser) Error!Type { break :enum_ty enum_ty; } } - break :enum_ty try Type.Enum.create(p.arena, p.tokSlice(ident), fixed_ty); + break :enum_ty try Type.Enum.create(p.arena, interned_name, fixed_ty); } else try Type.Enum.create(p.arena, try p.getAnonymousName(enum_tok), fixed_ty); // reserve space for this enum @@ -2098,7 +2114,7 @@ fn enumSpec(p: *Parser) Error!Type { if (maybe_ident != null and !defined) { try p.syms.syms.append(p.gpa, .{ .kind = .@"enum", - .name = p.tokSlice(maybe_ident.?), + .name = enum_ty.name, .ty = ty, .tok = maybe_ident.?, .val = .{}, @@ -2298,7 +2314,8 @@ fn enumerator(p: *Parser, e: *Enumerator) Error!?EnumFieldAndNode { } } - try p.syms.defineEnumeration(p, res.ty, name_tok, e.res.val); + const interned_name = try p.comp.intern(p.tokSlice(name_tok)); + try p.syms.defineEnumeration(p, interned_name, res.ty, name_tok, e.res.val); const node = try p.addNode(.{ .tag = .enum_field_decl, .ty = res.ty, @@ -2308,7 +2325,7 @@ fn enumerator(p: *Parser, e: *Enumerator) Error!?EnumFieldAndNode { } }, }); return EnumFieldAndNode{ .field = .{ - .name = p.tokSlice(name_tok), + .name = interned_name, .ty = res.ty, .name_tok = name_tok, .node = res.node, @@ -2565,9 +2582,10 @@ fn directDeclarator(p: *Parser, base_type: Type, d: *Declarator, kind: Declarato specifier = .old_style_func; while (true) { const name_tok = try p.expectIdentifier(); - try p.syms.defineParam(p, undefined, name_tok); + const interned_name = try p.comp.intern(p.tokSlice(name_tok)); + try p.syms.defineParam(p, interned_name, undefined, name_tok); try p.param_buf.append(.{ - .name = p.tokSlice(name_tok), + .name = interned_name, .name_tok = name_tok, .ty = .{ .specifier = .int }, }); @@ -2638,7 +2656,8 @@ fn paramDecls(p: *Parser) Error!?[]Type.Func.Param { name_tok = some.name; param_ty = some.ty; if (some.name != 0) { - try p.syms.defineParam(p, param_ty, name_tok); + const interned_name = try p.comp.intern(p.tokSlice(name_tok)); + try p.syms.defineParam(p, interned_name, param_ty, name_tok); } } param_ty = try Attribute.applyParameterAttributes(p, param_ty, attr_buf_top, .alignas_on_param); @@ -2670,7 +2689,7 @@ fn paramDecls(p: *Parser) Error!?[]Type.Func.Param { try param_decl_spec.validateParam(p, ¶m_ty); try p.param_buf.append(.{ - .name = if (name_tok == 0) "" else p.tokSlice(name_tok), + .name = if (name_tok == 0) .empty else try p.comp.intern(p.tokSlice(name_tok)), .name_tok = if (name_tok == 0) first_tok else name_tok, .ty = param_ty, }); @@ -2796,13 +2815,15 @@ fn initializerItem(p: *Parser, il: *InitList, init_ty: Type) Error!bool { cur_ty = cur_ty.elemType(); designation = true; } else if (p.eatToken(.period)) |period| { - const field_name = p.tokSlice(try p.expectIdentifier()); + const field_tok = try p.expectIdentifier(); + const field_str = p.tokSlice(field_tok); + const field_name = try p.comp.intern(field_str); cur_ty = cur_ty.canonicalize(.standard); if (!cur_ty.isRecord()) { try p.errStr(.invalid_field_designator, period, try p.typeStr(cur_ty)); return error.ParsingFailed; } else if (!cur_ty.hasField(field_name)) { - try p.errStr(.no_such_field_designator, period, field_name); + try p.errStr(.no_such_field_designator, period, field_str); return error.ParsingFailed; } @@ -2817,7 +2838,7 @@ fn initializerItem(p: *Parser, il: *InitList, init_ty: Type) Error!bool { cur_index_hint = cur_index_hint orelse i; continue :outer; } - if (std.mem.eql(u8, field_name, f.name)) { + if (field_name == f.name) { cur_il = try cur_il.find(p.gpa, i); cur_ty = f.ty; cur_index_hint = cur_index_hint orelse i; @@ -5531,7 +5552,7 @@ fn builtinOffsetof(p: *Parser) Error!Result { fn offsetofMemberDesignator(p: *Parser, base_ty: Type) Error!Result { errdefer p.skipTo(.r_paren); const base_field_name_tok = try p.expectIdentifier(); - const base_field_name = p.tokSlice(base_field_name_tok); + const base_field_name = try p.comp.intern(p.tokSlice(base_field_name_tok)); try p.validateFieldAccess(base_ty, base_ty, base_field_name_tok, base_field_name); const base_node = try p.addNode(.{ .tag = .default_init_expr, .ty = base_ty, .data = undefined }); @@ -5542,7 +5563,7 @@ fn offsetofMemberDesignator(p: *Parser, base_ty: Type) Error!Result { .period => { p.tok_i += 1; const field_name_tok = try p.expectIdentifier(); - const field_name = p.tokSlice(field_name_tok); + const field_name = try p.comp.intern(p.tokSlice(field_name_tok)); if (!lhs.ty.isRecord()) { try p.errStr(.offsetof_ty, field_name_tok, try p.typeStr(lhs.ty)); @@ -5998,18 +6019,19 @@ fn fieldAccess( if (is_arrow and !is_ptr) try p.errStr(.member_expr_not_ptr, field_name_tok, try p.typeStr(expr_ty)); if (!is_arrow and is_ptr) try p.errStr(.member_expr_ptr, field_name_tok, try p.typeStr(expr_ty)); - const field_name = p.tokSlice(field_name_tok); + const field_name = try p.comp.intern(p.tokSlice(field_name_tok)); try p.validateFieldAccess(record_ty, expr_ty, field_name_tok, field_name); return p.fieldAccessExtra(lhs.node, record_ty, field_name, is_arrow); } -fn validateFieldAccess(p: *Parser, record_ty: Type, expr_ty: Type, field_name_tok: TokenIndex, field_name: []const u8) Error!void { +fn validateFieldAccess(p: *Parser, record_ty: Type, expr_ty: Type, field_name_tok: TokenIndex, field_name: StringId) Error!void { if (record_ty.hasField(field_name)) return; p.strings.items.len = 0; - try p.strings.writer().print("'{s}' in '", .{field_name}); - try expr_ty.print(p.strings.writer()); + try p.strings.writer().print("'{s}' in '", .{p.tokSlice(field_name_tok)}); + const mapper = p.comp.string_interner.getSlowTypeMapper(); + try expr_ty.print(mapper, p.strings.writer()); try p.strings.append('\''); const duped = try p.comp.diag.arena.allocator().dupe(u8, p.strings.items); @@ -6017,7 +6039,7 @@ fn validateFieldAccess(p: *Parser, record_ty: Type, expr_ty: Type, field_name_to return error.ParsingFailed; } -fn fieldAccessExtra(p: *Parser, lhs: NodeIndex, record_ty: Type, field_name: []const u8, is_arrow: bool) Error!Result { +fn fieldAccessExtra(p: *Parser, lhs: NodeIndex, record_ty: Type, field_name: StringId, is_arrow: bool) Error!Result { for (record_ty.data.record.fields) |f, i| { if (f.isAnonymousRecord()) { if (!f.ty.hasField(field_name)) continue; @@ -6028,7 +6050,7 @@ fn fieldAccessExtra(p: *Parser, lhs: NodeIndex, record_ty: Type, field_name: []c }); return p.fieldAccessExtra(inner, f.ty, field_name, false); } - if (std.mem.eql(u8, field_name, f.name)) return Result{ + if (field_name == f.name) return Result{ .ty = f.ty, .node = try p.addNode(.{ .tag = if (is_arrow) .member_access_ptr_expr else .member_access_expr, @@ -6097,9 +6119,7 @@ fn callExpr(p: *Parser, lhs: Result) Error!Result { } const last_param_name = func_params[func_params.len - 1].name; const decl_ref = p.getNode(raw_arg_node, .decl_ref_expr); - if (decl_ref == null or - !mem.eql(u8, p.tokSlice(p.nodes.items(.data)[@enumToInt(decl_ref.?)].decl_ref), last_param_name)) - { + if (decl_ref == null or last_param_name != try p.comp.intern(p.tokSlice(p.nodes.items(.data)[@enumToInt(decl_ref.?)].decl_ref))) { try p.errTok(.va_start_not_last_param, param_tok); } } else { @@ -6227,6 +6247,7 @@ fn primaryExpr(p: *Parser) Error!Result { .identifier, .extended_identifier => { const name_tok = p.expectIdentifier() catch unreachable; const name = p.tokSlice(name_tok); + const interned_name = try p.comp.intern(name); if (p.comp.builtins.get(name)) |some| { for (p.tok_ids[p.tok_i..]) |id| switch (id) { .r_paren => {}, // closing grouped expr @@ -6245,7 +6266,7 @@ fn primaryExpr(p: *Parser) Error!Result { }), }; } - if (p.syms.findSymbol(p, name_tok)) |sym| { + if (p.syms.findSymbol(interned_name)) |sym| { try p.checkDeprecatedUnavailable(sym.ty, name_tok, sym.tok); if (sym.val.tag == .int) { switch (p.const_decl_folding) { @@ -6281,7 +6302,7 @@ fn primaryExpr(p: *Parser) Error!Result { }); try p.decl_buf.append(node); - try p.syms.declareSymbol(p, ty, name_tok, node); + try p.syms.declareSymbol(p, interned_name, ty, name_tok, node); return Result{ .ty = ty, @@ -6333,8 +6354,9 @@ fn primaryExpr(p: *Parser) Error!Result { if (p.func.pretty_ident) |some| { ty = some.ty; } else if (p.func.ty) |func_ty| { + const mapper = p.comp.string_interner.getSlowTypeMapper(); p.strings.items.len = 0; - try Type.printNamed(func_ty, p.tokSlice(p.func.name), p.strings.writer()); + try Type.printNamed(func_ty, p.tokSlice(p.func.name), mapper, p.strings.writer()); try p.strings.append(0); const predef = try p.makePredefinedIdentifier(); ty = predef.ty; diff --git a/src/StringInterner.zig b/src/StringInterner.zig new file mode 100644 index 00000000..1320c757 --- /dev/null +++ b/src/StringInterner.zig @@ -0,0 +1,78 @@ +const std = @import("std"); +const mem = std.mem; + +const StringInterner = @This(); + +const StringToIdMap = std.StringHashMapUnmanaged(StringId); + +pub const StringId = enum(u32) { + empty, + _, +}; + +pub const TypeMapper = struct { + const LookupSpeed = enum { + fast, + slow, + }; + + data: union(LookupSpeed) { + fast: []const []const u8, + slow: *const StringToIdMap, + }, + + pub fn lookup(self: *const TypeMapper, string_id: StringInterner.StringId) []const u8 { + if (string_id == .empty) return ""; + switch (self.data) { + .fast => |arr| return arr[@enumToInt(string_id)], + .slow => |map| { + var it = map.iterator(); + while (it.next()) |entry| { + if (entry.value_ptr.* == string_id) return entry.key_ptr.*; + } + unreachable; + }, + } + } + + pub fn deinit(self: *const TypeMapper, allocator: mem.Allocator) void { + switch (self.data) { + .slow => {}, + .fast => |arr| allocator.free(arr), + } + } +}; + +string_table: StringToIdMap = .{}, +next_id: StringId = @intToEnum(StringId, @enumToInt(StringId.empty) + 1), + +pub fn deinit(self: *StringInterner, allocator: mem.Allocator) void { + self.string_table.deinit(allocator); +} + +pub fn intern(self: *StringInterner, allocator: mem.Allocator, str: []const u8) !StringId { + if (str.len == 0) return .empty; + + const gop = try self.string_table.getOrPut(allocator, str); + if (gop.found_existing) return gop.value_ptr.*; + + defer self.next_id = @intToEnum(StringId, @enumToInt(self.next_id) + 1); + gop.value_ptr.* = self.next_id; + return self.next_id; +} + +/// deinit for the returned TypeMapper is a no-op and does not need to be called +pub fn getSlowTypeMapper(self: *const StringInterner) TypeMapper { + return TypeMapper{ .data = .{ .slow = &self.string_table } }; +} + +/// Caller must call `deinit` on the returned TypeMapper +pub fn getFastTypeMapper(self: *const StringInterner, allocator: mem.Allocator) !TypeMapper { + var strings = try allocator.alloc([]const u8, @enumToInt(self.next_id)); + var it = self.string_table.iterator(); + strings[0] = ""; + while (it.next()) |entry| { + strings[@enumToInt(entry.value_ptr.*)] = entry.key_ptr.*; + } + return TypeMapper{ .data = .{ .fast = strings } }; +} diff --git a/src/SymbolStack.zig b/src/SymbolStack.zig index 2ab81ffb..b5258a52 100644 --- a/src/SymbolStack.zig +++ b/src/SymbolStack.zig @@ -9,11 +9,12 @@ const NodeIndex = Tree.NodeIndex; const Type = @import("Type.zig"); const Parser = @import("Parser.zig"); const Value = @import("Value.zig"); +const StringId = @import("StringInterner.zig").StringId; const SymbolStack = @This(); pub const Symbol = struct { - name: []const u8, + name: StringId, ty: Type, tok: TokenIndex, node: NodeIndex = .none, @@ -53,46 +54,44 @@ pub fn popScope(s: *SymbolStack) void { s.syms.len = s.scopes.pop(); } -pub fn findTypedef(s: *SymbolStack, p: *Parser, name_tok: TokenIndex, no_type_yet: bool) !?Symbol { - const name = p.tokSlice(name_tok); +pub fn findTypedef(s: *SymbolStack, p: *Parser, name: StringId, name_tok: TokenIndex, no_type_yet: bool) !?Symbol { const kinds = s.syms.items(.kind); const names = s.syms.items(.name); var i = s.syms.len; while (i > 0) { i -= 1; switch (kinds[i]) { - .typedef => if (mem.eql(u8, names[i], name)) return s.syms.get(i), - .@"struct" => if (mem.eql(u8, names[i], name)) { + .typedef => if (names[i] == name) return s.syms.get(i), + .@"struct" => if (names[i] == name) { if (no_type_yet) return null; - try p.errStr(.must_use_struct, name_tok, name); + try p.errStr(.must_use_struct, name_tok, p.tokSlice(name_tok)); return s.syms.get(i); }, - .@"union" => if (mem.eql(u8, names[i], name)) { + .@"union" => if (names[i] == name) { if (no_type_yet) return null; - try p.errStr(.must_use_union, name_tok, name); + try p.errStr(.must_use_union, name_tok, p.tokSlice(name_tok)); return s.syms.get(i); }, - .@"enum" => if (mem.eql(u8, names[i], name)) { + .@"enum" => if (names[i] == name) { if (no_type_yet) return null; - try p.errStr(.must_use_enum, name_tok, name); + try p.errStr(.must_use_enum, name_tok, p.tokSlice(name_tok)); return s.syms.get(i); }, - .def, .decl => if (mem.eql(u8, names[i], name)) return null, + .def, .decl => if (names[i] == name) return null, else => {}, } } return null; } -pub fn findSymbol(s: *SymbolStack, p: *Parser, name_tok: TokenIndex) ?Symbol { - const name = p.tokSlice(name_tok); +pub fn findSymbol(s: *SymbolStack, name: StringId) ?Symbol { const kinds = s.syms.items(.kind); const names = s.syms.items(.name); var i = s.syms.len; while (i > 0) { i -= 1; switch (kinds[i]) { - .def, .decl, .enumeration => if (mem.eql(u8, names[i], name)) return s.syms.get(i), + .def, .decl, .enumeration => if (names[i] == name) return s.syms.get(i), else => {}, } } @@ -102,11 +101,11 @@ pub fn findSymbol(s: *SymbolStack, p: *Parser, name_tok: TokenIndex) ?Symbol { pub fn findTag( s: *SymbolStack, p: *Parser, + name: StringId, kind: Token.Id, name_tok: TokenIndex, next_tok_id: Token.Id, ) !?Symbol { - const name = p.tokSlice(name_tok); const kinds = s.syms.items(.kind); const names = s.syms.items(.name); // `tag Name;` should always result in a new type if in a new scope. @@ -115,15 +114,15 @@ pub fn findTag( while (i > end) { i -= 1; switch (kinds[i]) { - .@"enum" => if (mem.eql(u8, names[i], name)) { + .@"enum" => if (names[i] == name) { if (kind == .keyword_enum) return s.syms.get(i); break; }, - .@"struct" => if (mem.eql(u8, names[i], name)) { + .@"struct" => if (names[i] == name) { if (kind == .keyword_struct) return s.syms.get(i); break; }, - .@"union" => if (mem.eql(u8, names[i], name)) { + .@"union" => if (names[i] == name) { if (kind == .keyword_union) return s.syms.get(i); break; }, @@ -132,7 +131,7 @@ pub fn findTag( } else return null; if (i < s.scopeEnd()) return null; - try p.errStr(.wrong_tag, name_tok, name); + try p.errStr(.wrong_tag, name_tok, p.tokSlice(name_tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); return null; } @@ -140,7 +139,7 @@ pub fn findTag( pub fn defineTypedef( s: *SymbolStack, p: *Parser, - name: []const u8, + name: StringId, ty: Type, tok: TokenIndex, node: NodeIndex, @@ -152,7 +151,7 @@ pub fn defineTypedef( while (i > end) { i -= 1; switch (kinds[i]) { - .typedef => if (mem.eql(u8, names[i], name)) { + .typedef => if (names[i] == name) { const prev_ty = s.syms.items(.ty)[i]; if (ty.eql(prev_ty, p.pp.comp, true)) break; try p.errStr(.redefinition_of_typedef, tok, try p.typePairStrExtra(ty, " vs ", prev_ty)); @@ -176,12 +175,12 @@ pub fn defineTypedef( pub fn defineSymbol( s: *SymbolStack, p: *Parser, + name: StringId, ty: Type, tok: TokenIndex, node: NodeIndex, val: Value, ) !void { - const name = p.tokSlice(tok); const kinds = s.syms.items(.kind); const names = s.syms.items(.name); const end = s.scopeEnd(); @@ -189,21 +188,21 @@ pub fn defineSymbol( while (i > end) { i -= 1; switch (kinds[i]) { - .enumeration => if (mem.eql(u8, names[i], name)) { - try p.errStr(.redefinition_different_sym, tok, name); + .enumeration => if (names[i] == name) { + try p.errStr(.redefinition_different_sym, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); break; }, - .decl => if (mem.eql(u8, names[i], name)) { + .decl => if (names[i] == name) { const prev_ty = s.syms.items(.ty)[i]; if (!ty.eql(prev_ty, p.pp.comp, true)) { // TODO adjusted equality check - try p.errStr(.redefinition_incompatible, tok, name); + try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); } break; }, - .def => if (mem.eql(u8, names[i], name)) { - try p.errStr(.redefinition, tok, name); + .def => if (names[i] == name) { + try p.errStr(.redefinition, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); break; }, @@ -223,11 +222,11 @@ pub fn defineSymbol( pub fn declareSymbol( s: *SymbolStack, p: *Parser, + name: StringId, ty: Type, tok: TokenIndex, node: NodeIndex, ) !void { - const name = p.tokSlice(tok); const kinds = s.syms.items(.kind); const names = s.syms.items(.name); const end = s.scopeEnd(); @@ -235,23 +234,23 @@ pub fn declareSymbol( while (i > end) { i -= 1; switch (kinds[i]) { - .enumeration => if (mem.eql(u8, names[i], name)) { - try p.errStr(.redefinition_different_sym, tok, name); + .enumeration => if (names[i] == name) { + try p.errStr(.redefinition_different_sym, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); break; }, - .decl => if (mem.eql(u8, names[i], name)) { + .decl => if (names[i] == name) { const prev_ty = s.syms.items(.ty)[i]; if (!ty.eql(prev_ty, p.pp.comp, true)) { // TODO adjusted equality check - try p.errStr(.redefinition_incompatible, tok, name); + try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); } break; }, - .def => if (mem.eql(u8, names[i], name)) { + .def => if (names[i] == name) { const prev_ty = s.syms.items(.ty)[i]; if (!ty.eql(prev_ty, p.pp.comp, true)) { // TODO adjusted equality check - try p.errStr(.redefinition_incompatible, tok, name); + try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); break; } @@ -270,8 +269,7 @@ pub fn declareSymbol( }); } -pub fn defineParam(s: *SymbolStack, p: *Parser, ty: Type, tok: TokenIndex) !void { - const name = p.tokSlice(tok); +pub fn defineParam(s: *SymbolStack, p: *Parser, name: StringId, ty: Type, tok: TokenIndex) !void { const kinds = s.syms.items(.kind); const names = s.syms.items(.name); const end = s.scopeEnd(); @@ -279,8 +277,8 @@ pub fn defineParam(s: *SymbolStack, p: *Parser, ty: Type, tok: TokenIndex) !void while (i > end) { i -= 1; switch (kinds[i]) { - .enumeration, .decl, .def => if (mem.eql(u8, names[i], name)) { - try p.errStr(.redefinition_of_parameter, tok, name); + .enumeration, .decl, .def => if (names[i] == name) { + try p.errStr(.redefinition_of_parameter, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); break; }, @@ -299,10 +297,10 @@ pub fn defineParam(s: *SymbolStack, p: *Parser, ty: Type, tok: TokenIndex) !void pub fn defineTag( s: *SymbolStack, p: *Parser, + name: StringId, kind: Token.Id, tok: TokenIndex, ) !?Symbol { - const name = p.tokSlice(tok); const kinds = s.syms.items(.kind); const names = s.syms.items(.name); const end = s.scopeEnd(); @@ -310,21 +308,21 @@ pub fn defineTag( while (i > end) { i -= 1; switch (kinds[i]) { - .@"enum" => if (mem.eql(u8, names[i], name)) { + .@"enum" => if (names[i] == name) { if (kind == .keyword_enum) return s.syms.get(i); - try p.errStr(.wrong_tag, tok, name); + try p.errStr(.wrong_tag, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); return null; }, - .@"struct" => if (mem.eql(u8, names[i], name)) { + .@"struct" => if (names[i] == name) { if (kind == .keyword_struct) return s.syms.get(i); - try p.errStr(.wrong_tag, tok, name); + try p.errStr(.wrong_tag, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); return null; }, - .@"union" => if (mem.eql(u8, names[i], name)) { + .@"union" => if (names[i] == name) { if (kind == .keyword_union) return s.syms.get(i); - try p.errStr(.wrong_tag, tok, name); + try p.errStr(.wrong_tag, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); return null; }, @@ -337,11 +335,11 @@ pub fn defineTag( pub fn defineEnumeration( s: *SymbolStack, p: *Parser, + name: StringId, ty: Type, tok: TokenIndex, val: Value, ) !void { - const name = p.tokSlice(tok); const kinds = s.syms.items(.kind); const names = s.syms.items(.name); const end = s.scopeEnd(); @@ -349,13 +347,13 @@ pub fn defineEnumeration( while (i > end) { i -= 1; switch (kinds[i]) { - .enumeration => if (mem.eql(u8, names[i], name)) { - try p.errStr(.redefinition, tok, name); + .enumeration => if (names[i] == name) { + try p.errStr(.redefinition, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); return; }, - .decl, .def => if (mem.eql(u8, names[i], name)) { - try p.errStr(.redefinition_different_sym, tok, name); + .decl, .def => if (names[i] == name) { + try p.errStr(.redefinition_different_sym, tok, p.tokSlice(tok)); try p.errTok(.previous_definition, s.syms.items(.tok)[i]); return; }, diff --git a/src/Tree.zig b/src/Tree.zig index bf0e9498..e0cdf46d 100644 --- a/src/Tree.zig +++ b/src/Tree.zig @@ -5,6 +5,7 @@ const Compilation = @import("Compilation.zig"); const Source = @import("Source.zig"); const Attribute = @import("Attribute.zig"); const Value = @import("Value.zig"); +const StringInterner = @import("StringInterner.zig"); const Tree = @This(); @@ -605,8 +606,11 @@ pub fn tokSlice(tree: Tree, tok_i: TokenIndex) []const u8 { } pub fn dump(tree: Tree, color: bool, writer: anytype) @TypeOf(writer).Error!void { + const mapper = tree.comp.string_interner.getFastTypeMapper(tree.comp.gpa) catch tree.comp.string_interner.getSlowTypeMapper(); + defer mapper.deinit(tree.comp.gpa); + for (tree.root_decls) |i| { - try tree.dumpNode(i, 0, color, writer); + try tree.dumpNode(i, 0, mapper, color, writer); try writer.writeByte('\n'); } } @@ -649,7 +653,7 @@ fn dumpAttribute(attr: Attribute, writer: anytype) !void { } } -fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @TypeOf(w).Error!void { +fn dumpNode(tree: Tree, node: NodeIndex, level: u32, mapper: StringInterner.TypeMapper, color: bool, w: anytype) @TypeOf(w).Error!void { const delta = 2; const half = delta / 2; const util = @import("util.zig"); @@ -674,7 +678,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T } if (color) util.setColor(TYPE, w); try w.writeByte('\''); - try ty.dump(w); + try ty.dump(mapper, w); try w.writeByte('\''); if (isLval(tree.nodes, tree.data, tree.value_map, node)) { @@ -705,11 +709,11 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T .static_assert => { try w.writeByteNTimes(' ', level + 1); try w.writeAll("condition:\n"); - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); if (data.bin.rhs != .none) { try w.writeByteNTimes(' ', level + 1); try w.writeAll("diagnostic:\n"); - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); } }, .fn_proto, @@ -735,7 +739,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T if (color) util.setColor(.reset, w); try w.writeByteNTimes(' ', level + half); try w.writeAll("body:\n"); - try tree.dumpNode(data.decl.node, level + delta, color, w); + try tree.dumpNode(data.decl.node, level + delta, mapper, color, w); }, .typedef, .@"var", @@ -754,7 +758,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T if (data.decl.node != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("init:\n"); - try tree.dumpNode(data.decl.node, level + delta, color, w); + try tree.dumpNode(data.decl.node, level + delta, mapper, color, w); } }, .enum_field_decl => { @@ -766,7 +770,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T if (data.decl.node != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("value:\n"); - try tree.dumpNode(data.decl.node, level + delta, color, w); + try tree.dumpNode(data.decl.node, level + delta, mapper, color, w); } }, .record_field_decl => { @@ -780,7 +784,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T if (data.decl.node != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("bits:\n"); - try tree.dumpNode(data.decl.node, level + delta, color, w); + try tree.dumpNode(data.decl.node, level + delta, mapper, color, w); } }, .indirect_record_field_decl => {}, @@ -794,7 +798,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T const maybe_field_attributes = if (ty.getRecord()) |record| record.field_attributes else null; for (tree.data[data.range.start..data.range.end]) |stmt, i| { if (i != 0) try w.writeByte('\n'); - try tree.dumpNode(stmt, level + delta, color, w); + try tree.dumpNode(stmt, level + delta, mapper, color, w); if (maybe_field_attributes) |field_attributes| { if (field_attributes[i].len == 0) continue; @@ -815,7 +819,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T const empty: [][]const Attribute = &attr_array; const field_attributes = if (ty.getRecord()) |record| (record.field_attributes orelse empty.ptr) else empty.ptr; if (data.bin.lhs != .none) { - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); if (field_attributes[0].len > 0) { if (color) util.setColor(ATTRIBUTE, w); try dumpFieldAttributes(field_attributes[0], level + delta + half, w); @@ -823,7 +827,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T } } if (data.bin.rhs != .none) { - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); if (field_attributes[1].len > 0) { if (color) util.setColor(ATTRIBUTE, w); try dumpFieldAttributes(field_attributes[1], level + delta + half, w); @@ -838,11 +842,11 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T try w.print("{d}\n", .{data.union_init.field_index}); if (color) util.setColor(.reset, w); if (data.union_init.node != .none) { - try tree.dumpNode(data.union_init.node, level + delta, color, w); + try tree.dumpNode(data.union_init.node, level + delta, mapper, color, w); } }, .compound_literal_expr => { - try tree.dumpNode(data.un, level + half, color, w); + try tree.dumpNode(data.un, level + half, mapper, color, w); }, .labeled_stmt => { try w.writeByteNTimes(' ', level + half); @@ -853,83 +857,83 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T if (data.decl.node != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("stmt:\n"); - try tree.dumpNode(data.decl.node, level + delta, color, w); + try tree.dumpNode(data.decl.node, level + delta, mapper, color, w); } }, .case_stmt => { try w.writeByteNTimes(' ', level + half); try w.writeAll("value:\n"); - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); if (data.bin.rhs != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("stmt:\n"); - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); } }, .case_range_stmt => { try w.writeByteNTimes(' ', level + half); try w.writeAll("range start:\n"); - try tree.dumpNode(tree.data[data.if3.body], level + delta, color, w); + try tree.dumpNode(tree.data[data.if3.body], level + delta, mapper, color, w); try w.writeByteNTimes(' ', level + half); try w.writeAll("range end:\n"); - try tree.dumpNode(tree.data[data.if3.body + 1], level + delta, color, w); + try tree.dumpNode(tree.data[data.if3.body + 1], level + delta, mapper, color, w); if (data.if3.cond != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("stmt:\n"); - try tree.dumpNode(data.if3.cond, level + delta, color, w); + try tree.dumpNode(data.if3.cond, level + delta, mapper, color, w); } }, .default_stmt => { if (data.un != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("stmt:\n"); - try tree.dumpNode(data.un, level + delta, color, w); + try tree.dumpNode(data.un, level + delta, mapper, color, w); } }, .binary_cond_expr, .cond_expr, .if_then_else_stmt, .builtin_choose_expr => { try w.writeByteNTimes(' ', level + half); try w.writeAll("cond:\n"); - try tree.dumpNode(data.if3.cond, level + delta, color, w); + try tree.dumpNode(data.if3.cond, level + delta, mapper, color, w); try w.writeByteNTimes(' ', level + half); try w.writeAll("then:\n"); - try tree.dumpNode(tree.data[data.if3.body], level + delta, color, w); + try tree.dumpNode(tree.data[data.if3.body], level + delta, mapper, color, w); try w.writeByteNTimes(' ', level + half); try w.writeAll("else:\n"); - try tree.dumpNode(tree.data[data.if3.body + 1], level + delta, color, w); + try tree.dumpNode(tree.data[data.if3.body + 1], level + delta, mapper, color, w); }, .if_else_stmt => { try w.writeByteNTimes(' ', level + half); try w.writeAll("cond:\n"); - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); try w.writeByteNTimes(' ', level + half); try w.writeAll("else:\n"); - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); }, .if_then_stmt => { try w.writeByteNTimes(' ', level + half); try w.writeAll("cond:\n"); - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); if (data.bin.rhs != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("then:\n"); - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); } }, .switch_stmt, .while_stmt, .do_while_stmt => { try w.writeByteNTimes(' ', level + half); try w.writeAll("cond:\n"); - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); if (data.bin.rhs != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("body:\n"); - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); } }, .for_decl_stmt => { @@ -938,30 +942,30 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T try w.writeByteNTimes(' ', level + half); try w.writeAll("decl:\n"); for (for_decl.decls) |decl| { - try tree.dumpNode(decl, level + delta, color, w); + try tree.dumpNode(decl, level + delta, mapper, color, w); try w.writeByte('\n'); } if (for_decl.cond != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("cond:\n"); - try tree.dumpNode(for_decl.cond, level + delta, color, w); + try tree.dumpNode(for_decl.cond, level + delta, mapper, color, w); } if (for_decl.incr != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("incr:\n"); - try tree.dumpNode(for_decl.incr, level + delta, color, w); + try tree.dumpNode(for_decl.incr, level + delta, mapper, color, w); } if (for_decl.body != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("body:\n"); - try tree.dumpNode(for_decl.body, level + delta, color, w); + try tree.dumpNode(for_decl.body, level + delta, mapper, color, w); } }, .forever_stmt => { if (data.un != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("body:\n"); - try tree.dumpNode(data.un, level + delta, color, w); + try tree.dumpNode(data.un, level + delta, mapper, color, w); } }, .for_stmt => { @@ -970,22 +974,22 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T if (for_stmt.init != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("init:\n"); - try tree.dumpNode(for_stmt.init, level + delta, color, w); + try tree.dumpNode(for_stmt.init, level + delta, mapper, color, w); } if (for_stmt.cond != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("cond:\n"); - try tree.dumpNode(for_stmt.cond, level + delta, color, w); + try tree.dumpNode(for_stmt.cond, level + delta, mapper, color, w); } if (for_stmt.incr != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("incr:\n"); - try tree.dumpNode(for_stmt.incr, level + delta, color, w); + try tree.dumpNode(for_stmt.incr, level + delta, mapper, color, w); } if (for_stmt.body != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("body:\n"); - try tree.dumpNode(for_stmt.body, level + delta, color, w); + try tree.dumpNode(for_stmt.body, level + delta, mapper, color, w); } }, .goto_stmt, .addr_of_label => { @@ -1000,26 +1004,26 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T if (data.un != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("expr:\n"); - try tree.dumpNode(data.un, level + delta, color, w); + try tree.dumpNode(data.un, level + delta, mapper, color, w); } }, .call_expr => { try w.writeByteNTimes(' ', level + half); try w.writeAll("lhs:\n"); - try tree.dumpNode(tree.data[data.range.start], level + delta, color, w); + try tree.dumpNode(tree.data[data.range.start], level + delta, mapper, color, w); try w.writeByteNTimes(' ', level + half); try w.writeAll("args:\n"); - for (tree.data[data.range.start + 1 .. data.range.end]) |arg| try tree.dumpNode(arg, level + delta, color, w); + for (tree.data[data.range.start + 1 .. data.range.end]) |arg| try tree.dumpNode(arg, level + delta, mapper, color, w); }, .call_expr_one => { try w.writeByteNTimes(' ', level + half); try w.writeAll("lhs:\n"); - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); if (data.bin.rhs != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("arg:\n"); - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); } }, .builtin_call_expr => { @@ -1031,7 +1035,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T try w.writeByteNTimes(' ', level + half); try w.writeAll("args:\n"); - for (tree.data[data.range.start + 1 .. data.range.end]) |arg| try tree.dumpNode(arg, level + delta, color, w); + for (tree.data[data.range.start + 1 .. data.range.end]) |arg| try tree.dumpNode(arg, level + delta, mapper, color, w); }, .builtin_call_expr_one => { try w.writeByteNTimes(' ', level + half); @@ -1042,7 +1046,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T if (data.decl.node != .none) { try w.writeByteNTimes(' ', level + half); try w.writeAll("arg:\n"); - try tree.dumpNode(data.decl.node, level + delta, color, w); + try tree.dumpNode(data.decl.node, level + delta, mapper, color, w); } }, .comma_expr, @@ -1078,12 +1082,12 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T => { try w.writeByteNTimes(' ', level + 1); try w.writeAll("lhs:\n"); - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); try w.writeByteNTimes(' ', level + 1); try w.writeAll("rhs:\n"); - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); }, - .explicit_cast, .implicit_cast => try tree.dumpNode(data.cast.operand, level + delta, color, w), + .explicit_cast, .implicit_cast => try tree.dumpNode(data.cast.operand, level + delta, mapper, color, w), .addr_of_expr, .computed_goto_stmt, .deref_expr, @@ -1101,7 +1105,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T => { try w.writeByteNTimes(' ', level + 1); try w.writeAll("operand:\n"); - try tree.dumpNode(data.un, level + delta, color, w); + try tree.dumpNode(data.un, level + delta, mapper, color, w); }, .decl_ref_expr => { try w.writeByteNTimes(' ', level + 1); @@ -1126,7 +1130,7 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T .member_access_expr, .member_access_ptr_expr => { try w.writeByteNTimes(' ', level + 1); try w.writeAll("lhs:\n"); - try tree.dumpNode(data.member.lhs, level + delta, color, w); + try tree.dumpNode(data.member.lhs, level + delta, mapper, color, w); var lhs_ty = tree.nodes.items(.ty)[@enumToInt(data.member.lhs)]; if (lhs_ty.isPtr()) lhs_ty = lhs_ty.elemType(); @@ -1135,50 +1139,50 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, color: bool, w: anytype) @T try w.writeByteNTimes(' ', level + 1); try w.writeAll("name: "); if (color) util.setColor(NAME, w); - try w.print("{s}\n", .{lhs_ty.data.record.fields[data.member.index].name}); + try w.print("{s}\n", .{mapper.lookup(lhs_ty.data.record.fields[data.member.index].name)}); if (color) util.setColor(.reset, w); }, .array_access_expr => { if (data.bin.lhs != .none) { try w.writeByteNTimes(' ', level + 1); try w.writeAll("lhs:\n"); - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); } try w.writeByteNTimes(' ', level + 1); try w.writeAll("index:\n"); - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); }, .sizeof_expr, .alignof_expr => { if (data.un != .none) { try w.writeByteNTimes(' ', level + 1); try w.writeAll("expr:\n"); - try tree.dumpNode(data.un, level + delta, color, w); + try tree.dumpNode(data.un, level + delta, mapper, color, w); } }, .generic_expr_one => { try w.writeByteNTimes(' ', level + 1); try w.writeAll("controlling:\n"); - try tree.dumpNode(data.bin.lhs, level + delta, color, w); + try tree.dumpNode(data.bin.lhs, level + delta, mapper, color, w); try w.writeByteNTimes(' ', level + 1); try w.writeAll("chosen:\n"); - try tree.dumpNode(data.bin.rhs, level + delta, color, w); + try tree.dumpNode(data.bin.rhs, level + delta, mapper, color, w); }, .generic_expr => { const nodes = tree.data[data.range.start..data.range.end]; try w.writeByteNTimes(' ', level + 1); try w.writeAll("controlling:\n"); - try tree.dumpNode(nodes[0], level + delta, color, w); + try tree.dumpNode(nodes[0], level + delta, mapper, color, w); try w.writeByteNTimes(' ', level + 1); try w.writeAll("chosen:\n"); - try tree.dumpNode(nodes[1], level + delta, color, w); + try tree.dumpNode(nodes[1], level + delta, mapper, color, w); try w.writeByteNTimes(' ', level + 1); try w.writeAll("rest:\n"); for (nodes[2..]) |expr| { - try tree.dumpNode(expr, level + delta, color, w); + try tree.dumpNode(expr, level + delta, mapper, color, w); } }, .generic_association_expr, .generic_default_expr, .stmt_expr, .imaginary_literal => { - try tree.dumpNode(data.un, level + delta, color, w); + try tree.dumpNode(data.un, level + delta, mapper, color, w); }, .array_filler_expr => { try w.writeByteNTimes(' ', level + 1); diff --git a/src/Type.zig b/src/Type.zig index 56118c2e..b7d54111 100644 --- a/src/Type.zig +++ b/src/Type.zig @@ -5,6 +5,8 @@ const NodeIndex = Tree.NodeIndex; const Parser = @import("Parser.zig"); const Compilation = @import("Compilation.zig"); const Attribute = @import("Attribute.zig"); +const StringInterner = @import("StringInterner.zig"); +const StringId = StringInterner.StringId; const Type = @This(); @@ -97,7 +99,7 @@ pub const Func = struct { params: []Param, pub const Param = struct { - name: []const u8, + name: StringId, ty: Type, name_tok: TokenIndex, }; @@ -135,13 +137,13 @@ pub const Attributed = struct { // TODO improve memory usage pub const Enum = struct { - name: []const u8, + name: StringId, tag_ty: Type, fields: []Field, fixed: bool, pub const Field = struct { - name: []const u8, + name: StringId, ty: Type, name_tok: TokenIndex, node: NodeIndex, @@ -151,7 +153,7 @@ pub const Enum = struct { return e.fields.len == std.math.maxInt(usize); } - pub fn create(allocator: std.mem.Allocator, name: []const u8, fixed_ty: ?Type) !*Enum { + pub fn create(allocator: std.mem.Allocator, name: StringId, fixed_ty: ?Type) !*Enum { var e = try allocator.create(Enum); e.name = name; e.fields.len = std.math.maxInt(usize); @@ -163,7 +165,7 @@ pub const Enum = struct { // TODO improve memory usage pub const Record = struct { - name: []const u8, + name: StringId, fields: []Field, size: u64, alignment: u29, @@ -174,7 +176,7 @@ pub const Record = struct { field_attributes: ?[*][]const Attribute, pub const Field = struct { - name: []const u8, + name: StringId, ty: Type, /// zero for anonymous fields name_tok: TokenIndex = 0, @@ -189,7 +191,7 @@ pub const Record = struct { return r.fields.len == std.math.maxInt(usize); } - pub fn create(allocator: std.mem.Allocator, name: []const u8) !*Record { + pub fn create(allocator: std.mem.Allocator, name: StringId) !*Record { var r = try allocator.create(Record); r.name = name; r.fields.len = std.math.maxInt(usize); @@ -478,14 +480,17 @@ pub fn isRecord(ty: Type) bool { }; } -pub fn isAnonymousRecord(ty: Type) bool { +pub fn isAnonymousRecord(ty: Type, comp: *const Compilation) bool { return switch (ty.specifier) { // anonymous records can be recognized by their names which are in // the format "(anonymous TAG at path:line:col)". - .@"struct", .@"union" => ty.data.record.name[0] == '(', - .typeof_type => ty.data.sub_type.isAnonymousRecord(), - .typeof_expr => ty.data.expr.ty.isAnonymousRecord(), - .attributed => ty.data.attributed.base.isAnonymousRecord(), + .@"struct", .@"union" => { + const mapper = comp.string_interner.getSlowTypeMapper(); + return mapper.lookup(ty.data.record.name)[0] == '('; + }, + .typeof_type => ty.data.sub_type.isAnonymousRecord(comp), + .typeof_expr => ty.data.expr.ty.isAnonymousRecord(comp), + .attributed => ty.data.attributed.base.isAnonymousRecord(comp), else => false, }; } @@ -624,20 +629,20 @@ pub fn hasUnboundVLA(ty: Type) bool { } } -pub fn hasField(ty: Type, name: []const u8) bool { +pub fn hasField(ty: Type, name: StringId) bool { switch (ty.specifier) { .@"struct" => { std.debug.assert(!ty.data.record.isIncomplete()); for (ty.data.record.fields) |f| { if (f.isAnonymousRecord() and f.ty.hasField(name)) return true; - if (std.mem.eql(u8, name, f.name)) return true; + if (name == f.name) return true; } }, .@"union" => { std.debug.assert(!ty.data.record.isIncomplete()); for (ty.data.record.fields) |f| { if (f.isAnonymousRecord() and f.ty.hasField(name)) return true; - if (std.mem.eql(u8, name, f.name)) return true; + if (name == f.name) return true; } }, .typeof_type => return ty.data.sub_type.hasField(name), @@ -1901,25 +1906,27 @@ pub fn hasAttribute(ty: Type, tag: Attribute.Tag) bool { } /// Print type in C style -pub fn print(ty: Type, w: anytype) @TypeOf(w).Error!void { - _ = try ty.printPrologue(w); - try ty.printEpilogue(w); +pub fn print(ty: Type, mapper: StringInterner.TypeMapper, w: anytype) @TypeOf(w).Error!void { + _ = try ty.printPrologue(mapper, w); + try ty.printEpilogue(mapper, w); } -pub fn printNamed(ty: Type, name: []const u8, w: anytype) @TypeOf(w).Error!void { - const simple = try ty.printPrologue(w); +pub fn printNamed(ty: Type, name: []const u8, mapper: StringInterner.TypeMapper, w: anytype) @TypeOf(w).Error!void { + const simple = try ty.printPrologue(mapper, w); if (simple) try w.writeByte(' '); try w.writeAll(name); - try ty.printEpilogue(w); + try ty.printEpilogue(mapper, w); } +const StringGetter = fn (TokenIndex) []const u8; + /// return true if `ty` is simple -fn printPrologue(ty: Type, w: anytype) @TypeOf(w).Error!bool { +fn printPrologue(ty: Type, mapper: StringInterner.TypeMapper, w: anytype) @TypeOf(w).Error!bool { if (ty.qual.atomic) { var non_atomic_ty = ty; non_atomic_ty.qual.atomic = false; try w.writeAll("_Atomic("); - try non_atomic_ty.print(w); + try non_atomic_ty.print(mapper, w); try w.writeAll(")"); return true; } @@ -1934,7 +1941,7 @@ fn printPrologue(ty: Type, w: anytype) @TypeOf(w).Error!bool { .decayed_typeof_expr, => { const elem_ty = ty.elemType(); - const simple = try elem_ty.printPrologue(w); + const simple = try elem_ty.printPrologue(mapper, w); if (simple) try w.writeByte(' '); if (elem_ty.isFunc() or elem_ty.isArray()) try w.writeByte('('); try w.writeByte('*'); @@ -1943,23 +1950,23 @@ fn printPrologue(ty: Type, w: anytype) @TypeOf(w).Error!bool { }, .func, .var_args_func, .old_style_func => { const ret_ty = ty.data.func.return_type; - const simple = try ret_ty.printPrologue(w); + const simple = try ret_ty.printPrologue(mapper, w); if (simple) try w.writeByte(' '); return false; }, .array, .static_array, .incomplete_array, .unspecified_variable_len_array, .variable_len_array => { const elem_ty = ty.elemType(); - const simple = try elem_ty.printPrologue(w); + const simple = try elem_ty.printPrologue(mapper, w); if (simple) try w.writeByte(' '); return false; }, .typeof_type, .typeof_expr => { const actual = ty.canonicalize(.standard); - return actual.printPrologue(w); + return actual.printPrologue(mapper, w); }, .attributed => { const actual = ty.canonicalize(.standard); - return actual.printPrologue(w); + return actual.printPrologue(mapper, w); }, else => {}, } @@ -1967,22 +1974,22 @@ fn printPrologue(ty: Type, w: anytype) @TypeOf(w).Error!bool { switch (ty.specifier) { .@"enum" => if (ty.data.@"enum".fixed) { - try w.print("enum {s}: ", .{ty.data.@"enum".name}); - try ty.data.@"enum".tag_ty.dump(w); + try w.print("enum {s}: ", .{mapper.lookup(ty.data.@"enum".name)}); + try ty.data.@"enum".tag_ty.dump(mapper, w); } else { - try w.print("enum {s}", .{ty.data.@"enum".name}); + try w.print("enum {s}", .{mapper.lookup(ty.data.@"enum".name)}); }, - .@"struct" => try w.print("struct {s}", .{ty.data.record.name}), - .@"union" => try w.print("union {s}", .{ty.data.record.name}), + .@"struct" => try w.print("struct {s}", .{mapper.lookup(ty.data.record.name)}), + .@"union" => try w.print("union {s}", .{mapper.lookup(ty.data.record.name)}), .vector => { const len = ty.data.array.len; const elem_ty = ty.data.array.elem; try w.print("__attribute__((__vector_size__({d} * sizeof(", .{len}); - _ = try elem_ty.printPrologue(w); + _ = try elem_ty.printPrologue(mapper, w); try w.writeAll(")))) "); - _ = try elem_ty.printPrologue(w); + _ = try elem_ty.printPrologue(mapper, w); try w.print(" (vector of {d} '", .{len}); - _ = try elem_ty.printPrologue(w); + _ = try elem_ty.printPrologue(mapper, w); try w.writeAll("' values)"); }, else => try w.writeAll(Builder.fromType(ty).str().?), @@ -1990,7 +1997,7 @@ fn printPrologue(ty: Type, w: anytype) @TypeOf(w).Error!bool { return true; } -fn printEpilogue(ty: Type, w: anytype) @TypeOf(w).Error!void { +fn printEpilogue(ty: Type, mapper: StringInterner.TypeMapper, w: anytype) @TypeOf(w).Error!void { if (ty.qual.atomic) return; switch (ty.specifier) { .pointer, @@ -2004,14 +2011,14 @@ fn printEpilogue(ty: Type, w: anytype) @TypeOf(w).Error!void { => { const elem_ty = ty.elemType(); if (elem_ty.isFunc() or elem_ty.isArray()) try w.writeByte(')'); - try elem_ty.printEpilogue(w); + try elem_ty.printEpilogue(mapper, w); }, .func, .var_args_func, .old_style_func => { try w.writeByte('('); for (ty.data.func.params) |param, i| { if (i != 0) try w.writeAll(", "); - _ = try param.ty.printPrologue(w); - try param.ty.printEpilogue(w); + _ = try param.ty.printPrologue(mapper, w); + try param.ty.printEpilogue(mapper, w); } if (ty.specifier != .func) { if (ty.data.func.params.len != 0) try w.writeAll(", "); @@ -2020,40 +2027,40 @@ fn printEpilogue(ty: Type, w: anytype) @TypeOf(w).Error!void { try w.writeAll("void"); } try w.writeByte(')'); - try ty.data.func.return_type.printEpilogue(w); + try ty.data.func.return_type.printEpilogue(mapper, w); }, .array, .static_array => { try w.writeByte('['); if (ty.specifier == .static_array) try w.writeAll("static "); try ty.qual.dump(w); try w.print("{d}]", .{ty.data.array.len}); - try ty.data.array.elem.printEpilogue(w); + try ty.data.array.elem.printEpilogue(mapper, w); }, .incomplete_array => { try w.writeByte('['); try ty.qual.dump(w); try w.writeByte(']'); - try ty.data.array.elem.printEpilogue(w); + try ty.data.array.elem.printEpilogue(mapper, w); }, .unspecified_variable_len_array => { try w.writeByte('['); try ty.qual.dump(w); try w.writeAll("*]"); - try ty.data.sub_type.printEpilogue(w); + try ty.data.sub_type.printEpilogue(mapper, w); }, .variable_len_array => { try w.writeByte('['); try ty.qual.dump(w); try w.writeAll("]"); - try ty.data.expr.ty.printEpilogue(w); + try ty.data.expr.ty.printEpilogue(mapper, w); }, .typeof_type, .typeof_expr => { const actual = ty.canonicalize(.standard); - try actual.printEpilogue(w); + try actual.printEpilogue(mapper, w); }, .attributed => { const actual = ty.canonicalize(.standard); - try actual.printEpilogue(w); + try actual.printEpilogue(mapper, w); }, else => {}, } @@ -2063,85 +2070,85 @@ fn printEpilogue(ty: Type, w: anytype) @TypeOf(w).Error!void { const dump_detailed_containers = false; // Print as Zig types since those are actually readable -pub fn dump(ty: Type, w: anytype) @TypeOf(w).Error!void { +pub fn dump(ty: Type, mapper: StringInterner.TypeMapper, w: anytype) @TypeOf(w).Error!void { try ty.qual.dump(w); switch (ty.specifier) { .pointer => { try w.writeAll("*"); - try ty.data.sub_type.dump(w); + try ty.data.sub_type.dump(mapper, w); }, .func, .var_args_func, .old_style_func => { try w.writeAll("fn ("); for (ty.data.func.params) |param, i| { if (i != 0) try w.writeAll(", "); - if (param.name.len != 0) try w.print("{s}: ", .{param.name}); - try param.ty.dump(w); + if (param.name != .empty) try w.print("{s}: ", .{mapper.lookup(param.name)}); + try param.ty.dump(mapper, w); } if (ty.specifier != .func) { if (ty.data.func.params.len != 0) try w.writeAll(", "); try w.writeAll("..."); } try w.writeAll(") "); - try ty.data.func.return_type.dump(w); + try ty.data.func.return_type.dump(mapper, w); }, .array, .static_array, .decayed_array, .decayed_static_array => { if (ty.specifier == .decayed_array or ty.specifier == .decayed_static_array) try w.writeByte('d'); try w.writeByte('['); if (ty.specifier == .static_array or ty.specifier == .decayed_static_array) try w.writeAll("static "); try w.print("{d}]", .{ty.data.array.len}); - try ty.data.array.elem.dump(w); + try ty.data.array.elem.dump(mapper, w); }, .vector => { try w.print("vector({d}, ", .{ty.data.array.len}); - try ty.data.array.elem.dump(w); + try ty.data.array.elem.dump(mapper, w); try w.writeAll(")"); }, .incomplete_array, .decayed_incomplete_array => { if (ty.specifier == .decayed_incomplete_array) try w.writeByte('d'); try w.writeAll("[]"); - try ty.data.array.elem.dump(w); + try ty.data.array.elem.dump(mapper, w); }, .@"enum" => { const enum_ty = ty.data.@"enum"; if (enum_ty.isIncomplete() and !enum_ty.fixed) { - try w.print("enum {s}", .{enum_ty.name}); + try w.print("enum {s}", .{mapper.lookup(enum_ty.name)}); } else { - try w.print("enum {s}: ", .{enum_ty.name}); - try enum_ty.tag_ty.dump(w); + try w.print("enum {s}: ", .{mapper.lookup(enum_ty.name)}); + try enum_ty.tag_ty.dump(mapper, w); } - if (dump_detailed_containers) try dumpEnum(enum_ty, w); + if (dump_detailed_containers) try dumpEnum(enum_ty, mapper, w); }, .@"struct" => { - try w.print("struct {s}", .{ty.data.record.name}); - if (dump_detailed_containers) try dumpRecord(ty.data.record, w); + try w.print("struct {s}", .{mapper.lookup(ty.data.record.name)}); + if (dump_detailed_containers) try dumpRecord(ty.data.record, mapper, w); }, .@"union" => { - try w.print("union {s}", .{ty.data.record.name}); - if (dump_detailed_containers) try dumpRecord(ty.data.record, w); + try w.print("union {s}", .{mapper.lookup(ty.data.record.name)}); + if (dump_detailed_containers) try dumpRecord(ty.data.record, mapper, w); }, .unspecified_variable_len_array, .decayed_unspecified_variable_len_array => { if (ty.specifier == .decayed_unspecified_variable_len_array) try w.writeByte('d'); try w.writeAll("[*]"); - try ty.data.sub_type.dump(w); + try ty.data.sub_type.dump(mapper, w); }, .variable_len_array, .decayed_variable_len_array => { if (ty.specifier == .decayed_variable_len_array) try w.writeByte('d'); try w.writeAll("[]"); - try ty.data.expr.ty.dump(w); + try ty.data.expr.ty.dump(mapper, w); }, .typeof_type, .decayed_typeof_type => { try w.writeAll("typeof("); - try ty.data.sub_type.dump(w); + try ty.data.sub_type.dump(mapper, w); try w.writeAll(")"); }, .typeof_expr, .decayed_typeof_expr => { try w.writeAll("typeof(: "); - try ty.data.expr.ty.dump(w); + try ty.data.expr.ty.dump(mapper, w); try w.writeAll(")"); }, .attributed => { try w.writeAll("attributed("); - try ty.data.attributed.base.dump(w); + try ty.data.attributed.base.dump(mapper, w); try w.writeAll(")"); }, .special_va_start => try w.writeAll("(va start param)"), @@ -2149,20 +2156,20 @@ pub fn dump(ty: Type, w: anytype) @TypeOf(w).Error!void { } } -fn dumpEnum(@"enum": *Enum, w: anytype) @TypeOf(w).Error!void { +fn dumpEnum(@"enum": *Enum, mapper: StringInterner.TypeMapper, w: anytype) @TypeOf(w).Error!void { try w.writeAll(" {"); for (@"enum".fields) |field| { - try w.print(" {s} = {d},", .{ field.name, field.value }); + try w.print(" {s} = {d},", .{ mapper.lookup(field.name), field.value }); } try w.writeAll(" }"); } -fn dumpRecord(record: *Record, w: anytype) @TypeOf(w).Error!void { +fn dumpRecord(record: *Record, mapper: StringInterner.TypeMapper, w: anytype) @TypeOf(w).Error!void { try w.writeAll(" {"); for (record.fields) |field| { try w.writeByte(' '); - try field.ty.dump(w); - try w.print(" {s}: {d};", .{ field.name, field.bit_width }); + try field.ty.dump(mapper, w); + try w.print(" {s}: {d};", .{ mapper.lookup(field.name), field.bit_width }); } try w.writeAll(" }"); } diff --git a/src/lib.zig b/src/lib.zig index 944b24cc..168c6f7b 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -8,6 +8,7 @@ pub const Source = @import("Source.zig"); pub const Tokenizer = @import("Tokenizer.zig"); pub const Tree = @import("Tree.zig"); pub const parseArgs = @import("main.zig").parseArgs; +pub const TypeMapper = @import("StringInterner.zig").TypeMapper; pub const version_str = "0.0.0-dev"; pub const version = @import("std").SemanticVersion.parse(version_str) catch unreachable; diff --git a/test/runner.zig b/test/runner.zig index 2ae83040..cfe743bd 100644 --- a/test/runner.zig +++ b/test/runner.zig @@ -58,7 +58,7 @@ fn testOne(allocator: std.mem.Allocator, path: []const u8) !void { var tree = try aro.Parser.parse(&pp); defer tree.deinit(); - tree.dump(std.io.null_writer) catch {}; + tree.dump(false, std.io.null_writer) catch {}; } fn testAllAllocationFailures(cases: [][]const u8) !void { @@ -72,7 +72,10 @@ fn testAllAllocationFailures(cases: [][]const u8) !void { defer case_node.end(); progress.refresh(); - try std.testing.checkAllAllocationFailures(std.testing.allocator, testOne, .{case}); + std.testing.checkAllAllocationFailures(std.testing.allocator, testOne, .{case}) catch |er| switch (er) { + error.SwallowedOutOfMemoryError => {}, + else => |e| return e, + }; } root_node.end(); } @@ -286,7 +289,9 @@ pub fn main() !void { var actual = StmtTypeDumper.init(gpa); defer actual.deinit(gpa); - try actual.dump(&tree, test_fn.decl.node, gpa); + const mapper = try tree.comp.string_interner.getFastTypeMapper(gpa); + defer mapper.deinit(gpa); + try actual.dump(&tree, mapper, test_fn.decl.node, gpa); var i: usize = 0; for (types.tokens) |str| { @@ -525,18 +530,18 @@ const StmtTypeDumper = struct { }; } - fn dumpNode(self: *StmtTypeDumper, tree: *const aro.Tree, node: NodeIndex, m: *MsgWriter) AllocatorError!void { + fn dumpNode(self: *StmtTypeDumper, tree: *const aro.Tree, mapper: aro.TypeMapper, node: NodeIndex, m: *MsgWriter) AllocatorError!void { if (node == .none) return; const tag = tree.nodes.items(.tag)[@enumToInt(node)]; if (tag == .implicit_return) return; const ty = tree.nodes.items(.ty)[@enumToInt(node)]; - ty.dump(m.buf.writer()) catch {}; + ty.dump(mapper, m.buf.writer()) catch {}; const owned = m.buf.toOwnedSlice(); errdefer m.buf.allocator.free(owned); try self.types.append(owned); } - fn dump(self: *StmtTypeDumper, tree: *const aro.Tree, decl_idx: NodeIndex, allocator: std.mem.Allocator) AllocatorError!void { + fn dump(self: *StmtTypeDumper, tree: *const aro.Tree, mapper: aro.TypeMapper, decl_idx: NodeIndex, allocator: std.mem.Allocator) AllocatorError!void { var m = MsgWriter.init(allocator); defer m.deinit(); @@ -547,12 +552,12 @@ const StmtTypeDumper = struct { switch (tag) { .compound_stmt_two => { - try self.dumpNode(tree, data.bin.lhs, &m); - try self.dumpNode(tree, data.bin.rhs, &m); + try self.dumpNode(tree, mapper, data.bin.lhs, &m); + try self.dumpNode(tree, mapper, data.bin.rhs, &m); }, .compound_stmt => { for (tree.data[data.range.start..data.range.end]) |stmt| { - try self.dumpNode(tree, stmt, &m); + try self.dumpNode(tree, mapper, stmt, &m); } }, else => unreachable, From 2bb162f233f8883f556727d6f3359aa453a716c0 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Mon, 18 Jul 2022 13:21:07 -0700 Subject: [PATCH 2/5] Parser: intern the string "__declspec" to make identifier validation faster This speeds up parsing in large programs by about 1% --- src/Parser.zig | 12 ++++++++++-- src/Preprocessor.zig | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Parser.zig b/src/Parser.zig index 8fff934e..0ede04af 100644 --- a/src/Parser.zig +++ b/src/Parser.zig @@ -162,6 +162,7 @@ record_members: std.ArrayListUnmanaged(struct { tok: TokenIndex, name: StringId @"switch": ?*Switch = null, in_loop: bool = false, pragma_pack: u8 = 8, +declspec_id: StringId, fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Location) Compilation.Error!bool { if (codepoint <= 0x7F) return false; @@ -521,6 +522,7 @@ pub fn parse(pp: *Preprocessor) Compilation.Error!Tree { .enum_buf = std.ArrayList(Type.Enum.Field).init(pp.comp.gpa), .record_buf = std.ArrayList(Type.Record.Field).init(pp.comp.gpa), .field_attr_buf = std.ArrayList([]const Attribute).init(pp.comp.gpa), + .declspec_id = try pp.comp.intern("__declspec"), }; errdefer { p.nodes.deinit(pp.comp.gpa); @@ -1588,16 +1590,22 @@ fn typeSpec(p: *Parser, ty: *Type.Builder) Error!bool { continue; }, .identifier, .extended_identifier => { - if (mem.eql(u8, p.tokSlice(p.tok_i), "__declspec")) { + var interned_name = try p.comp.intern(p.tokSlice(p.tok_i)); + var declspec_found = false; + + if (interned_name == p.declspec_id) { try p.errTok(.declspec_not_enabled, p.tok_i); p.tok_i += 1; if (p.eatToken(.l_paren)) |_| { p.skipTo(.r_paren); continue; } + declspec_found = true; } if (ty.typedef != null) break; - const interned_name = try p.comp.intern(p.tokSlice(p.tok_i)); + if (declspec_found) { + interned_name = try p.comp.intern(p.tokSlice(p.tok_i)); + } const typedef = (try p.syms.findTypedef(p, interned_name, p.tok_i, ty.specifier != .none)) orelse break; if (!ty.combineTypedef(p, typedef.ty, typedef.tok)) break; }, diff --git a/src/Preprocessor.zig b/src/Preprocessor.zig index 0a03dbc9..b07ce1b4 100644 --- a/src/Preprocessor.zig +++ b/src/Preprocessor.zig @@ -643,6 +643,7 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool { .record_buf = undefined, .attr_buf = undefined, .field_attr_buf = undefined, + .declspec_id = undefined, }; return parser.macroExpr(); } From 28062524a04fdabe5e9a61d81edd83b6a6474fbb Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Mon, 18 Jul 2022 18:24:46 -0700 Subject: [PATCH 3/5] Test runner: Zig stdlib update, use openIterableDir to open cases dir --- test/runner.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runner.zig b/test/runner.zig index cfe743bd..57bf4c3d 100644 --- a/test/runner.zig +++ b/test/runner.zig @@ -102,7 +102,7 @@ pub fn main() !void { // collect all cases { - var cases_dir = try std.fs.cwd().openDir(args[1], .{ .iterate = true }); + var cases_dir = try std.fs.cwd().openIterableDir(args[1], .{}); defer cases_dir.close(); var it = cases_dir.iterate(); From 7311ca389c87116d91b554e060a796cb5004dd24 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Mon, 18 Jul 2022 20:50:38 -0700 Subject: [PATCH 4/5] StringInterner: TypeMapper lookup/deinit don't require pointers --- src/StringInterner.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StringInterner.zig b/src/StringInterner.zig index 1320c757..a9396122 100644 --- a/src/StringInterner.zig +++ b/src/StringInterner.zig @@ -21,7 +21,7 @@ pub const TypeMapper = struct { slow: *const StringToIdMap, }, - pub fn lookup(self: *const TypeMapper, string_id: StringInterner.StringId) []const u8 { + pub fn lookup(self: TypeMapper, string_id: StringInterner.StringId) []const u8 { if (string_id == .empty) return ""; switch (self.data) { .fast => |arr| return arr[@enumToInt(string_id)], @@ -35,7 +35,7 @@ pub const TypeMapper = struct { } } - pub fn deinit(self: *const TypeMapper, allocator: mem.Allocator) void { + pub fn deinit(self: TypeMapper, allocator: mem.Allocator) void { switch (self.data) { .slow => {}, .fast => |arr| allocator.free(arr), From 0acf64c7de94893505b8c168695425b14ba0c986 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Tue, 19 Jul 2022 15:56:33 -0700 Subject: [PATCH 5/5] Type: rearrange some struct fields for reduced memory usage --- src/Type.zig | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Type.zig b/src/Type.zig index b7d54111..5454d684 100644 --- a/src/Type.zig +++ b/src/Type.zig @@ -99,8 +99,8 @@ pub const Func = struct { params: []Param, pub const Param = struct { - name: StringId, ty: Type, + name: StringId, name_tok: TokenIndex, }; }; @@ -137,14 +137,14 @@ pub const Attributed = struct { // TODO improve memory usage pub const Enum = struct { - name: StringId, - tag_ty: Type, fields: []Field, + tag_ty: Type, + name: StringId, fixed: bool, pub const Field = struct { - name: StringId, ty: Type, + name: StringId, name_tok: TokenIndex, node: NodeIndex, }; @@ -165,19 +165,18 @@ pub const Enum = struct { // TODO improve memory usage pub const Record = struct { - name: StringId, fields: []Field, size: u64, - alignment: u29, - /// If this is null, none of the fields have attributes /// Otherwise, it's a pointer to N items (where N == number of fields) /// and the item at index i is the attributes for the field at index i field_attributes: ?[*][]const Attribute, + name: StringId, + alignment: u29, pub const Field = struct { - name: StringId, ty: Type, + name: StringId, /// zero for anonymous fields name_tok: TokenIndex = 0, bit_width: u32 = 0,