diff --git a/src/Attribute.zig b/src/Attribute.zig
index 966d240c..23e038d4 100644
--- a/src/Attribute.zig
+++ b/src/Attribute.zig
@@ -60,7 +60,7 @@ pub const ArgumentType = enum {
 
     fn fromType(comptime T: type) ArgumentType {
         return switch (T) {
-            []const u8 => .string,
+            Value.ByteRange => .string,
             Identifier => .identifier,
             u32 => .int,
             Alignment => .alignment,
@@ -263,10 +263,17 @@ fn diagnoseField(
         .bytes => {
             const bytes = val.data.bytes.trim(1); // remove null terminator
             if (wanted == Value.ByteRange) {
+                std.debug.assert(node.tag == .string_literal_expr);
+                if (!node.ty.elemType().is(.char) and !node.ty.elemType().is(.uchar)) {
+                    return Diagnostics.Message{
+                        .tag = .attribute_requires_string,
+                        .extra = .{ .str = decl.name },
+                    };
+                }
                 @field(@field(arguments, decl.name), field.name) = bytes;
                 return null;
             } else if (@typeInfo(wanted) == .Enum and @hasDecl(wanted, "opts") and wanted.opts.enum_kind == .string) {
-                const str = bytes.slice(strings);
+                const str = bytes.slice(strings, .@"1");
                 if (std.meta.stringToEnum(wanted, str)) |enum_val| {
                     @field(@field(arguments, decl.name), field.name) = enum_val;
                     return null;
diff --git a/src/Compilation.zig b/src/Compilation.zig
index c0c3126a..49846b8a 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -238,6 +238,8 @@ pub fn generateBuiltinMacros(comp: *Compilation) !Source {
         \\#define __STDC_NO_COMPLEX__ 1
         \\#define __STDC_NO_THREADS__ 1
         \\#define __STDC_NO_VLA__ 1
+        \\#define __STDC_UTF_16__ 1
+        \\#define __STDC_UTF_32__ 1
         \\
     );
     if (comp.langopts.standard.StdCVersionMacro()) |stdc_version| {
@@ -1428,6 +1430,20 @@ pub fn hasBuiltinFunction(comp: *const Compilation, builtin: Builtin) bool {
     }
 }
 
+pub const CharUnitSize = enum(u32) {
+    @"1" = 1,
+    @"2" = 2,
+    @"4" = 4,
+
+    pub fn Type(comptime self: CharUnitSize) type {
+        return switch (self) {
+            .@"1" => u8,
+            .@"2" => u16,
+            .@"4" => u32,
+        };
+    }
+};
+
 pub const renderErrors = Diagnostics.render;
 
 test "addSourceFromReader" {
diff --git a/src/Diagnostics.zig b/src/Diagnostics.zig
index 7c8ae4fd..64ab6ca5 100644
--- a/src/Diagnostics.zig
+++ b/src/Diagnostics.zig
@@ -179,6 +179,7 @@ pub const Options = packed struct {
     @"invalid-source-encoding": Kind = .default,
     @"four-char-constants": Kind = .default,
     @"unknown-escape-sequence": Kind = .default,
+    @"invalid-pp-token": Kind = .default,
 };
 
 const messages = struct {
@@ -2510,6 +2511,42 @@ const messages = struct {
         const opt = "unknown-escape-sequence";
         const extra = .invalid_escape;
     };
+    pub const attribute_requires_string = struct {
+        const msg = "attribute '{s}' requires an ordinary string";
+        const kind = .@"error";
+        const extra = .str;
+    };
+    pub const unterminated_string_literal_warning = struct {
+        const msg = "missing terminating '\"' character";
+        const kind = .warning;
+        const opt = "invalid-pp-token";
+    };
+    pub const unterminated_string_literal_error = struct {
+        const msg = "missing terminating '\"' character";
+        const kind = .@"error";
+    };
+    pub const empty_char_literal_warning = struct {
+        const msg = "empty character constant";
+        const kind = .warning;
+        const opt = "invalid-pp-token";
+    };
+    pub const empty_char_literal_error = struct {
+        const msg = "empty character constant";
+        const kind = .@"error";
+    };
+    pub const unterminated_char_literal_warning = struct {
+        const msg = "missing terminating ' character";
+        const kind = .warning;
+        const opt = "invalid-pp-token";
+    };
+    pub const unterminated_char_literal_error = struct {
+        const msg = "missing terminating ' character";
+        const kind = .@"error";
+    };
+    pub const unterminated_comment = struct {
+        const msg = "unterminated comment";
+        const kind = .@"error";
+    };
 };
 
 list: std.ArrayListUnmanaged(Message) = .{},
diff --git a/src/Ir.zig b/src/Ir.zig
index 43739252..4c45f78b 100644
--- a/src/Ir.zig
+++ b/src/Ir.zig
@@ -552,7 +552,7 @@ fn writeValue(ir: Ir, val_ref: Interner.Ref, color: bool, w: anytype) !void {
     switch (v.tag) {
         .unavailable => try w.writeAll(" unavailable"),
         .int => try w.print("{d}", .{v.data.int}),
-        .bytes => try w.print("\"{s}\"", .{v.data.bytes.slice(ir.strings)}),
+        .bytes => try w.print("\"{s}\"", .{v.data.bytes.slice(ir.strings, .@"1")}),
         // std.fmt does @as instead of @floatCast
         .float => try w.print("{d}", .{@as(f64, @floatCast(v.data.float))}),
         else => try w.print("({s})", .{@tagName(v.tag)}),
diff --git a/src/Parser.zig b/src/Parser.zig
index 93da1d3e..660fbf10 100644
--- a/src/Parser.zig
+++ b/src/Parser.zig
@@ -17,7 +17,7 @@ const NodeList = std.ArrayList(NodeIndex);
 const InitList = @import("InitList.zig");
 const Attribute = @import("Attribute.zig");
 const CharInfo = @import("CharInfo.zig");
-const CharLiteral = @import("CharLiteral.zig");
+const TextLiteral = @import("TextLiteral.zig");
 const Value = @import("Value.zig");
 const SymbolStack = @import("SymbolStack.zig");
 const Symbol = SymbolStack.Symbol;
@@ -468,7 +468,7 @@ fn checkDeprecatedUnavailable(p: *Parser, ty: Type, usage_tok: TokenIndex, decl_
         defer p.strings.items.len = strings_top;
 
         const w = p.strings.writer();
-        const msg_str = p.retainedString(@"error".msg);
+        const msg_str = p.attributeMessageString(@"error".msg);
         try w.print("call to '{s}' declared with attribute error: {s}", .{ p.tokSlice(@"error".__name_tok), msg_str });
         const str = try p.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]);
         try p.errStr(.error_attribute, usage_tok, str);
@@ -478,7 +478,7 @@ fn checkDeprecatedUnavailable(p: *Parser, ty: Type, usage_tok: TokenIndex, decl_
         defer p.strings.items.len = strings_top;
 
         const w = p.strings.writer();
-        const msg_str = p.retainedString(warning.msg);
+        const msg_str = p.attributeMessageString(warning.msg);
         try w.print("call to '{s}' declared with attribute warning: {s}", .{ p.tokSlice(warning.__name_tok), msg_str });
         const str = try p.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]);
         try p.errStr(.warning_attribute, usage_tok, str);
@@ -493,9 +493,10 @@ fn checkDeprecatedUnavailable(p: *Parser, ty: Type, usage_tok: TokenIndex, decl_
     }
 }
 
+/// Assumes that the specified range was created by an ordinary or `u8` string literal
 /// Returned slice is invalidated if additional strings are added to p.retained_strings
-fn retainedString(p: *Parser, range: Value.ByteRange) []const u8 {
-    return range.slice(p.retained_strings.items);
+fn attributeMessageString(p: *Parser, range: Value.ByteRange) []const u8 {
+    return range.slice(p.retained_strings.items, .@"1");
 }
 
 fn errDeprecated(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex, msg: ?Value.ByteRange) Compilation.Error!void {
@@ -511,7 +512,7 @@ fn errDeprecated(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex, msg: ?Valu
     };
     try w.writeAll(reason);
     if (msg) |m| {
-        const str = p.retainedString(m);
+        const str = p.attributeMessageString(m);
         try w.print(": {s}", .{str});
     }
     const str = try p.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]);
@@ -1153,17 +1154,13 @@ fn staticAssertMessage(p: *Parser, cond_node: NodeIndex, message: Result) !?[]co
         try buf.appendSlice(")'");
     }
     if (message.node != .none) {
+        assert(p.nodes.items(.tag)[@intFromEnum(message.node)] == .string_literal_expr);
         if (buf.items.len > 0) {
             try buf.append(' ');
         }
-        const data = message.val.data.bytes;
-        try buf.ensureUnusedCapacity(data.len());
-        try Tree.dumpStr(
-            p.retained_strings.items,
-            data,
-            p.nodes.items(.tag)[@intFromEnum(message.node)],
-            buf.writer(),
-        );
+        const byte_range = message.val.data.bytes;
+        try buf.ensureUnusedCapacity(byte_range.len());
+        try byte_range.dumpString(message.ty, p.comp, p.retained_strings.items, buf.writer());
     }
     return try p.comp.diag.arena.allocator().dupe(u8, buf.items);
 }
@@ -1184,6 +1181,7 @@ fn staticAssert(p: *Parser) Error!bool {
             .string_literal_utf_8,
             .string_literal_utf_32,
             .string_literal_wide,
+            .unterminated_string_literal,
             => try p.stringLiteral(),
             else => {
                 try p.err(.expected_str_literal);
@@ -3953,7 +3951,7 @@ fn assembly(p: *Parser, kind: enum { global, decl_label, stmt }) Error!?NodeInde
 fn asmStr(p: *Parser) Error!Result {
     var i = p.tok_i;
     while (true) : (i += 1) switch (p.tok_ids[i]) {
-        .string_literal => {},
+        .string_literal, .unterminated_string_literal => {},
         .string_literal_utf_16, .string_literal_utf_8, .string_literal_utf_32 => {
             try p.errStr(.invalid_asm_str, p.tok_i, "unicode");
             return error.ParsingFailed;
@@ -7461,12 +7459,15 @@ fn primaryExpr(p: *Parser) Error!Result {
         .string_literal_utf_8,
         .string_literal_utf_32,
         .string_literal_wide,
+        .unterminated_string_literal,
         => return p.stringLiteral(),
         .char_literal,
         .char_literal_utf_8,
         .char_literal_utf_16,
         .char_literal_utf_32,
         .char_literal_wide,
+        .empty_char_literal,
+        .unterminated_char_literal,
         => return p.charLiteral(),
         .zero => {
             p.tok_i += 1;
@@ -7523,131 +7524,123 @@ fn makePredefinedIdentifier(p: *Parser, start: u32) !Result {
 }
 
 fn stringLiteral(p: *Parser) Error!Result {
-    var start = p.tok_i;
-    // use 1 for wchar_t
-    var width: ?u8 = null;
-    var is_u8_literal = false;
-    while (true) {
-        switch (p.tok_ids[p.tok_i]) {
-            .string_literal => {},
-            .string_literal_utf_16 => if (width) |some| {
-                if (some != 16) try p.err(.unsupported_str_cat);
-            } else {
-                width = 16;
-            },
-            .string_literal_utf_8 => {
-                is_u8_literal = true;
-                if (width) |some| {
-                    if (some != 8) try p.err(.unsupported_str_cat);
-                } else {
-                    width = 8;
+    var string_end = p.tok_i;
+    var string_kind: TextLiteral.Kind = .char;
+    while (TextLiteral.Kind.classify(p.tok_ids[string_end], .string_literal)) |next| : (string_end += 1) {
+        string_kind = string_kind.concat(next) catch {
+            try p.errTok(.unsupported_str_cat, string_end);
+            while (p.tok_ids[p.tok_i].isStringLiteral()) : (p.tok_i += 1) {}
+            return error.ParsingFailed;
+        };
+        if (string_kind == .unterminated) {
+            try p.errTok(.unterminated_string_literal_error, string_end);
+            p.tok_i = string_end + 1;
+            return error.ParsingFailed;
+        }
+    }
+    assert(string_end > p.tok_i);
+
+    const char_width = string_kind.charUnitSize(p.comp);
+
+    const retain_start = mem.alignForward(usize, p.retained_strings.items.len, string_kind.internalStorageAlignment(p.comp));
+    try p.retained_strings.resize(retain_start);
+
+    while (p.tok_i < string_end) : (p.tok_i += 1) {
+        const this_kind = TextLiteral.Kind.classify(p.tok_ids[p.tok_i], .string_literal).?;
+        const slice = this_kind.contentSlice(p.tokSlice(p.tok_i));
+        var char_literal_parser = TextLiteral.Parser.init(slice, this_kind, 0x10ffff, p.comp);
+
+        try p.retained_strings.ensureUnusedCapacity((slice.len + 1) * @intFromEnum(char_width)); // +1 for null terminator
+        while (char_literal_parser.next()) |item| switch (item) {
+            .value => |v| {
+                switch (char_width) {
+                    .@"1" => p.retained_strings.appendAssumeCapacity(@intCast(v)),
+                    .@"2" => {
+                        const word: u16 = @intCast(v);
+                        p.retained_strings.appendSliceAssumeCapacity(mem.asBytes(&word));
+                    },
+                    .@"4" => p.retained_strings.appendSliceAssumeCapacity(mem.asBytes(&v)),
                 }
             },
-            .string_literal_utf_32 => if (width) |some| {
-                if (some != 32) try p.err(.unsupported_str_cat);
-            } else {
-                width = 32;
+            .codepoint => |c| {
+                switch (char_width) {
+                    .@"1" => {
+                        var buf: [4]u8 = undefined;
+                        const written = std.unicode.utf8Encode(c, &buf) catch unreachable;
+                        const encoded = buf[0..written];
+                        p.retained_strings.appendSliceAssumeCapacity(encoded);
+                    },
+                    .@"2" => {
+                        var utf16_buf: [2]u16 = undefined;
+                        var utf8_buf: [4]u8 = undefined;
+                        const utf8_written = std.unicode.utf8Encode(c, &utf8_buf) catch unreachable;
+                        const utf16_written = std.unicode.utf8ToUtf16Le(&utf16_buf, utf8_buf[0..utf8_written]) catch unreachable;
+                        const bytes = std.mem.sliceAsBytes(utf16_buf[0..utf16_written]);
+                        p.retained_strings.appendSliceAssumeCapacity(bytes);
+                    },
+                    .@"4" => {
+                        const val: u32 = c;
+                        p.retained_strings.appendSliceAssumeCapacity(mem.asBytes(&val));
+                    },
+                }
             },
-            .string_literal_wide => if (width) |some| {
-                if (some != 1) try p.err(.unsupported_str_cat);
-            } else {
-                width = 1;
+            .improperly_encoded => |bytes| p.retained_strings.appendSliceAssumeCapacity(bytes),
+            .utf8_text => |view| {
+                switch (char_width) {
+                    .@"1" => p.retained_strings.appendSliceAssumeCapacity(view.bytes),
+                    .@"2" => {
+                        var capacity_slice: []align(@alignOf(u16)) u8 = @alignCast(p.retained_strings.unusedCapacitySlice());
+                        const dest_len = std.mem.alignBackward(usize, capacity_slice.len, 2);
+                        var dest = std.mem.bytesAsSlice(u16, capacity_slice[0..dest_len]);
+                        const words_written = std.unicode.utf8ToUtf16Le(dest, view.bytes) catch unreachable;
+                        p.retained_strings.resize(p.retained_strings.items.len + words_written * 2) catch unreachable;
+                    },
+                    .@"4" => {
+                        var it = view.iterator();
+                        while (it.nextCodepoint()) |codepoint| {
+                            const val: u32 = codepoint;
+                            p.retained_strings.appendSliceAssumeCapacity(mem.asBytes(&val));
+                        }
+                    },
+                }
             },
-            else => break,
-        }
-        p.tok_i += 1;
-    }
-    if (width == null) width = 8;
-    if (width.? != 8) return p.todo("unicode string literals");
-
-    const string_start = p.retained_strings.items.len;
-    while (start < p.tok_i) : (start += 1) {
-        var slice = p.tokSlice(start);
-        slice = slice[0 .. slice.len - 1];
-        var i = mem.indexOf(u8, slice, "\"").? + 1;
-        try p.retained_strings.ensureUnusedCapacity(slice.len);
-        while (i < slice.len) : (i += 1) {
-            switch (slice[i]) {
-                '\\' => {
-                    i += 1;
-                    switch (slice[i]) {
-                        '\n' => i += 1,
-                        '\r' => i += 2,
-                        '\'', '\"', '\\', '?' => |c| p.retained_strings.appendAssumeCapacity(c),
-                        'n' => p.retained_strings.appendAssumeCapacity('\n'),
-                        'r' => p.retained_strings.appendAssumeCapacity('\r'),
-                        't' => p.retained_strings.appendAssumeCapacity('\t'),
-                        'a' => p.retained_strings.appendAssumeCapacity(0x07),
-                        'b' => p.retained_strings.appendAssumeCapacity(0x08),
-                        'e' => {
-                            try p.errExtra(.non_standard_escape_char, start, .{ .invalid_escape = .{ .char = 'e', .offset = @intCast(i) } });
-                            p.retained_strings.appendAssumeCapacity(0x1B);
-                        },
-                        'f' => p.retained_strings.appendAssumeCapacity(0x0C),
-                        'v' => p.retained_strings.appendAssumeCapacity(0x0B),
-                        'x' => p.retained_strings.appendAssumeCapacity(try p.parseNumberEscape(start, 16, slice, &i)),
-                        '0'...'7' => p.retained_strings.appendAssumeCapacity(try p.parseNumberEscape(start, 8, slice, &i)),
-                        'u' => try p.parseUnicodeEscape(start, 4, slice, &i),
-                        'U' => try p.parseUnicodeEscape(start, 8, slice, &i),
-                        else => unreachable,
-                    }
-                },
-                else => |c| p.retained_strings.appendAssumeCapacity(c),
-            }
+        };
+        for (char_literal_parser.errors.constSlice()) |item| {
+            try p.errExtra(item.tag, p.tok_i, item.extra);
         }
     }
-    try p.retained_strings.append(0);
-    const slice = p.retained_strings.items[string_start..];
+    p.retained_strings.appendNTimesAssumeCapacity(0, @intFromEnum(char_width));
+    const slice = p.retained_strings.items[retain_start..];
 
     const arr_ty = try p.arena.create(Type.Array);
-    const specifier: Type.Specifier = if (is_u8_literal and p.comp.langopts.hasChar8_T()) .uchar else .char;
-
-    arr_ty.* = .{ .elem = .{ .specifier = specifier }, .len = slice.len };
+    arr_ty.* = .{ .elem = string_kind.elementType(p.comp), .len = @divExact(slice.len, @intFromEnum(char_width)) };
     var res: Result = .{
         .ty = .{
             .specifier = .array,
             .data = .{ .array = arr_ty },
         },
-        .val = Value.bytes(@intCast(string_start), @intCast(p.retained_strings.items.len)),
+        .val = Value.bytes(@intCast(retain_start), @intCast(p.retained_strings.items.len)),
     };
     res.node = try p.addNode(.{ .tag = .string_literal_expr, .ty = res.ty, .data = undefined });
     if (!p.in_macro) try p.value_map.put(res.node, res.val);
     return res;
 }
 
-fn parseNumberEscape(p: *Parser, tok: TokenIndex, base: u8, slice: []const u8, i: *usize) !u8 {
-    if (base == 16) i.* += 1; // skip x
-    var char: u8 = 0;
-    var reported = false;
-    while (i.* < slice.len) : (i.* += 1) {
-        const val = std.fmt.charToDigit(slice[i.*], base) catch break; // validated by Tokenizer
-        const product, const overflowed = @mulWithOverflow(char, base);
-        if (overflowed != 0 and !reported) {
-            try p.errExtra(.escape_sequence_overflow, tok, .{ .unsigned = i.* });
-            reported = true;
-        }
-        char = product + val;
-    }
-    i.* -= 1;
-    return char;
-}
-
-fn parseUnicodeEscape(p: *Parser, tok: TokenIndex, count: u8, slice: []const u8, i: *usize) !void {
-    const c = std.fmt.parseInt(u21, slice[i.* + 1 ..][0..count], 16) catch 0x110000; // count validated by tokenizer
-    i.* += count + 1;
-    if (!std.unicode.utf8ValidCodepoint(c) or (c < 0xa0 and c != '$' and c != '@' and c != '`')) {
-        try p.errExtra(.invalid_universal_character, tok, .{ .unsigned = i.* - count - 2 });
-        return;
-    }
-    var buf: [4]u8 = undefined;
-    const to_write = std.unicode.utf8Encode(c, &buf) catch unreachable; // validated above
-    p.retained_strings.appendSliceAssumeCapacity(buf[0..to_write]);
-}
-
 fn charLiteral(p: *Parser) Error!Result {
     defer p.tok_i += 1;
     const tok_id = p.tok_ids[p.tok_i];
-    const char_kind = CharLiteral.Kind.classify(tok_id);
+    const char_kind = TextLiteral.Kind.classify(tok_id, .char_literal) orelse {
+        if (tok_id == .empty_char_literal) {
+            try p.err(.empty_char_literal_error);
+        } else if (tok_id == .unterminated_char_literal) {
+            try p.err(.unterminated_char_literal_error);
+        } else unreachable;
+        return .{
+            .ty = Type.int,
+            .val = Value.int(0),
+            .node = try p.addNode(.{ .tag = .char_literal, .ty = Type.int, .data = undefined }),
+        };
+    };
     var val: u32 = 0;
 
     const slice = char_kind.contentSlice(p.tokSlice(p.tok_i));
@@ -7656,7 +7649,8 @@ fn charLiteral(p: *Parser) Error!Result {
         // fast path: single unescaped ASCII char
         val = slice[0];
     } else {
-        var char_literal_parser = CharLiteral.Parser.init(slice, char_kind, p.comp);
+        const max_codepoint = char_kind.maxCodepoint(p.comp);
+        var char_literal_parser = TextLiteral.Parser.init(slice, char_kind, max_codepoint, p.comp);
 
         const max_chars_expected = 4;
         var stack_fallback = std.heap.stackFallback(max_chars_expected * @sizeOf(u32), p.comp.gpa);
@@ -7664,20 +7658,21 @@ fn charLiteral(p: *Parser) Error!Result {
         defer chars.deinit();
 
         while (char_literal_parser.next()) |item| switch (item) {
-            .value => |c| try chars.append(c),
+            .value => |v| try chars.append(v),
+            .codepoint => |c| try chars.append(c),
             .improperly_encoded => |s| {
                 try chars.ensureUnusedCapacity(s.len);
                 for (s) |c| chars.appendAssumeCapacity(c);
             },
             .utf8_text => |view| {
                 var it = view.iterator();
-                var max_codepoint: u21 = 0;
+                var max_codepoint_seen: u21 = 0;
                 try chars.ensureUnusedCapacity(view.bytes.len);
                 while (it.nextCodepoint()) |c| {
-                    max_codepoint = @max(max_codepoint, c);
+                    max_codepoint_seen = @max(max_codepoint_seen, c);
                     chars.appendAssumeCapacity(c);
                 }
-                if (max_codepoint > char_kind.maxCodepoint(p.comp)) {
+                if (max_codepoint_seen > max_codepoint) {
                     char_literal_parser.err(.char_too_large, .{ .none = {} });
                 }
             },
diff --git a/src/Preprocessor.zig b/src/Preprocessor.zig
index ca1b41ce..b0e28f9e 100644
--- a/src/Preprocessor.zig
+++ b/src/Preprocessor.zig
@@ -266,6 +266,15 @@ pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line:
     } });
 }
 
+fn invalidTokenDiagnostic(tok_id: Token.Id) Diagnostics.Tag {
+    return switch (tok_id) {
+        .unterminated_string_literal => .unterminated_string_literal_warning,
+        .empty_char_literal => .empty_char_literal_warning,
+        .unterminated_char_literal => .unterminated_char_literal_warning,
+        else => unreachable,
+    };
+}
+
 /// Return the name of the #ifndef guard macro that starts a source, if any.
 fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
     var tokenizer = Tokenizer{
@@ -631,6 +640,12 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
                 }
                 return tokFromRaw(tok);
             },
+            .unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
+                start_of_line = false;
+                try pp.err(tok, invalidTokenDiagnostic(tag));
+                try pp.expandMacro(&tokenizer, tok);
+            },
+            .unterminated_comment => try pp.err(tok, .unterminated_comment),
             else => {
                 if (tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(tok)) != null) {
                     try pp.err(tok, .poisoned_identifier);
@@ -1239,7 +1254,7 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token) !?[]co
     }
 
     for (params) |tok| {
-        const str = pp.expandedSliceExtra(tok, .preserve_macro_ws, false);
+        const str = pp.expandedSliceExtra(tok, .preserve_macro_ws);
         try pp.char_buf.appendSlice(str);
     }
 
@@ -1985,12 +2000,7 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr
     }
 }
 
-fn expandedSliceExtra(
-    pp: *const Preprocessor,
-    tok: Token,
-    macro_ws_handling: enum { single_macro_ws, preserve_macro_ws },
-    path_escapes: bool,
-) []const u8 {
+fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
     if (tok.id.lexeme()) |some| {
         if (!tok.id.allowsDigraphs(pp.comp) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
     }
@@ -1999,7 +2009,6 @@ fn expandedSliceExtra(
         .comp = pp.comp,
         .index = tok.loc.byte_offset,
         .source = .generated,
-        .path_escapes = path_escapes,
     };
     if (tok.id == .macro_string) {
         while (true) : (tmp_tokenizer.index += 1) {
@@ -2013,7 +2022,7 @@ fn expandedSliceExtra(
 
 /// Get expanded token source string.
 pub fn expandedSlice(pp: *Preprocessor, tok: Token) []const u8 {
-    return pp.expandedSliceExtra(tok, .single_macro_ws, false);
+    return pp.expandedSliceExtra(tok, .single_macro_ws);
 }
 
 /// Concat two tokens and add the result to pp.generated
@@ -2182,6 +2191,11 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
                 try pp.token_buf.append(tok);
             },
             .whitespace => need_ws = true,
+            .unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
+                try pp.err(tok, invalidTokenDiagnostic(tag));
+                try pp.token_buf.append(tok);
+            },
+            .unterminated_comment => try pp.err(tok, .unterminated_comment),
             else => {
                 if (tok.id != .whitespace and need_ws) {
                     need_ws = false;
@@ -2323,6 +2337,11 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
                 }
                 try pp.token_buf.append(tok);
             },
+            .unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
+                try pp.err(tok, invalidTokenDiagnostic(tag));
+                try pp.token_buf.append(tok);
+            },
+            .unterminated_comment => try pp.err(tok, .unterminated_comment),
             else => {
                 if (tok.id != .whitespace and need_ws) {
                     need_ws = false;
@@ -2368,8 +2387,6 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
 
 /// Handle an #embed directive
 fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
-    tokenizer.path_escapes = true;
-    defer tokenizer.path_escapes = false;
     const first = tokenizer.nextNoWS();
     const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof) catch |er| switch (er) {
         error.InvalidInclude => return,
@@ -2377,7 +2394,7 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
     };
 
     // Check for empty filename.
-    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true);
+    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
     if (tok_slice.len < 3) {
         try pp.err(first, .empty_filename);
         return;
@@ -2419,8 +2436,6 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
 
 // Handle a #include directive.
 fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void {
-    tokenizer.path_escapes = true;
-    defer tokenizer.path_escapes = false;
     const first = tokenizer.nextNoWS();
     const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) {
         error.InvalidInclude => return,
@@ -2586,7 +2601,7 @@ fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken,
     const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);
 
     // Check for empty filename.
-    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true);
+    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
     if (tok_slice.len < 3) {
         try pp.err(first, .empty_filename);
         return error.InvalidInclude;
diff --git a/src/Source.zig b/src/Source.zig
index c7f401cf..6986f88b 100644
--- a/src/Source.zig
+++ b/src/Source.zig
@@ -74,7 +74,10 @@ pub fn lineCol(source: Source, loc: Location) LineCol {
             i += 1;
             continue;
         };
-        const cp = std.unicode.utf8Decode(source.buf[i..][0..len]) catch unreachable;
+        const cp = std.unicode.utf8Decode(source.buf[i..][0..len]) catch {
+            i += 1;
+            continue;
+        };
         width += codepointWidth(cp);
         i += len;
     }
diff --git a/src/CharLiteral.zig b/src/TextLiteral.zig
similarity index 68%
rename from src/CharLiteral.zig
rename to src/TextLiteral.zig
index 7c47ac7f..4364a1d8 100644
--- a/src/CharLiteral.zig
+++ b/src/TextLiteral.zig
@@ -1,3 +1,5 @@
+//! Parsing and classification of string and character literals
+
 const std = @import("std");
 const Compilation = @import("Compilation.zig");
 const Type = @import("Type.zig");
@@ -6,8 +8,10 @@ const Tokenizer = @import("Tokenizer.zig");
 const mem = std.mem;
 
 pub const Item = union(enum) {
-    /// decoded escape
+    /// decoded hex or character escape
     value: u32,
+    /// validated unicode codepoint
+    codepoint: u21,
     /// Char literal in the source text is not utf8 encoded
     improperly_encoded: []const u8,
     /// 1 or more unescaped bytes
@@ -25,28 +29,41 @@ pub const Kind = enum {
     utf_8,
     utf_16,
     utf_32,
-
-    pub fn classify(id: Tokenizer.Token.Id) Kind {
-        return switch (id) {
-            .char_literal,
-            .string_literal,
-            => .char,
-            .char_literal_utf_8,
-            .string_literal_utf_8,
-            => .utf_8,
-            .char_literal_wide,
-            .string_literal_wide,
-            => .wide,
-            .char_literal_utf_16,
-            .string_literal_utf_16,
-            => .utf_16,
-            .char_literal_utf_32,
-            .string_literal_utf_32,
-            => .utf_32,
-            else => unreachable,
+    /// Error kind that halts parsing
+    unterminated,
+
+    pub fn classify(id: Tokenizer.Token.Id, context: enum { string_literal, char_literal }) ?Kind {
+        return switch (context) {
+            .string_literal => switch (id) {
+                .string_literal => .char,
+                .string_literal_utf_8 => .utf_8,
+                .string_literal_wide => .wide,
+                .string_literal_utf_16 => .utf_16,
+                .string_literal_utf_32 => .utf_32,
+                .unterminated_string_literal => .unterminated,
+                else => null,
+            },
+            .char_literal => switch (id) {
+                .char_literal => .char,
+                .char_literal_utf_8 => .utf_8,
+                .char_literal_wide => .wide,
+                .char_literal_utf_16 => .utf_16,
+                .char_literal_utf_32 => .utf_32,
+                else => null,
+            },
         };
     }
 
+    /// Should only be called for string literals. Determines the result kind of two adjacent string
+    /// literals
+    pub fn concat(self: Kind, other: Kind) !Kind {
+        if (self == .unterminated or other == .unterminated) return .unterminated;
+        if (self == other) return self; // can always concat with own kind
+        if (self == .char) return other; // char + X -> X
+        if (other == .char) return self; // X + char -> X
+        return error.CannotConcat;
+    }
+
     /// Largest unicode codepoint that can be represented by this character kind
     /// May be smaller than the largest value that can be represented.
     /// For example u8 char literals may only specify 0-127 via literals or
@@ -58,6 +75,7 @@ pub const Kind = enum {
             .utf_8 => std.math.maxInt(u7),
             .utf_16 => std.math.maxInt(u16),
             .utf_32 => 0x10FFFF,
+            .unterminated => unreachable,
         });
     }
 
@@ -68,9 +86,11 @@ pub const Kind = enum {
             .wide => comp.types.wchar.maxInt(comp),
             .utf_16 => std.math.maxInt(u16),
             .utf_32 => std.math.maxInt(u32),
+            .unterminated => unreachable,
         });
     }
 
+    /// The C type of a character literal of this kind
     pub fn charLiteralType(kind: Kind, comp: *const Compilation) Type {
         return switch (kind) {
             .char => Type.int,
@@ -78,10 +98,11 @@ pub const Kind = enum {
             .utf_8 => .{ .specifier = .uchar },
             .utf_16 => comp.types.uint_least16_t,
             .utf_32 => comp.types.uint_least32_t,
+            .unterminated => unreachable,
         };
     }
 
-    /// Return the actual contents of the string literal with leading / trailing quotes and
+    /// Return the actual contents of the literal with leading / trailing quotes and
     /// specifiers removed
     pub fn contentSlice(kind: Kind, delimited: []const u8) []const u8 {
         const end = delimited.len - 1; // remove trailing quote
@@ -91,6 +112,40 @@ pub const Kind = enum {
             .utf_8 => delimited[3..end],
             .utf_16 => delimited[2..end],
             .utf_32 => delimited[2..end],
+            .unterminated => unreachable,
+        };
+    }
+
+    /// The size of a character unit for a string literal of this kind
+    pub fn charUnitSize(kind: Kind, comp: *const Compilation) Compilation.CharUnitSize {
+        return switch (kind) {
+            .char => .@"1",
+            .wide => switch (comp.types.wchar.sizeof(comp).?) {
+                2 => .@"2",
+                4 => .@"4",
+                else => unreachable,
+            },
+            .utf_8 => .@"1",
+            .utf_16 => .@"2",
+            .utf_32 => .@"4",
+            .unterminated => unreachable,
+        };
+    }
+
+    /// Required alignment within aro (on compiler host) for writing to retained_strings
+    pub fn internalStorageAlignment(kind: Kind, comp: *const Compilation) usize {
+        return switch (kind.charUnitSize(comp)) {
+            inline else => |size| @alignOf(size.Type()),
+        };
+    }
+
+    /// The C type of an element of a string literal of this kind
+    pub fn elementType(kind: Kind, comp: *const Compilation) Type {
+        return switch (kind) {
+            .unterminated => unreachable,
+            .char => .{ .specifier = .char },
+            .utf_8 => if (comp.langopts.hasChar8_T()) .{ .specifier = .uchar } else .{ .specifier = .char },
+            else => kind.charLiteralType(comp),
         };
     }
 };
@@ -99,23 +154,38 @@ pub const Parser = struct {
     literal: []const u8,
     i: usize = 0,
     kind: Kind,
+    max_codepoint: u21,
     /// We only want to issue a max of 1 error per char literal
     errored: bool = false,
     errors: std.BoundedArray(CharDiagnostic, 4) = .{},
     comp: *const Compilation,
 
-    pub fn init(literal: []const u8, kind: Kind, comp: *const Compilation) Parser {
+    pub fn init(literal: []const u8, kind: Kind, max_codepoint: u21, comp: *const Compilation) Parser {
         return .{
             .literal = literal,
             .comp = comp,
             .kind = kind,
+            .max_codepoint = max_codepoint,
+        };
+    }
+
+    fn prefixLen(self: *const Parser) usize {
+        return switch (self.kind) {
+            .unterminated => unreachable,
+            .char => 0,
+            .utf_8 => 2,
+            .wide, .utf_16, .utf_32 => 1,
         };
     }
 
     pub fn err(self: *Parser, tag: Diagnostics.Tag, extra: Diagnostics.Message.Extra) void {
         if (self.errored) return;
         self.errored = true;
-        self.errors.append(.{ .tag = tag, .extra = extra }) catch {};
+        const diagnostic = .{ .tag = tag, .extra = extra };
+        self.errors.append(diagnostic) catch {
+            _ = self.errors.pop();
+            self.errors.append(diagnostic) catch unreachable;
+        };
     }
 
     pub fn warn(self: *Parser, tag: Diagnostics.Tag, extra: Diagnostics.Message.Extra) void {
@@ -134,9 +204,9 @@ pub const Parser = struct {
             const view = std.unicode.Utf8View.init(unescaped_slice) catch {
                 if (self.kind != .char) {
                     self.err(.illegal_char_encoding_error, .{ .none = {} });
-                } else {
-                    self.warn(.illegal_char_encoding_warning, .{ .none = {} });
+                    return null;
                 }
+                self.warn(.illegal_char_encoding_warning, .{ .none = {} });
                 return .{ .improperly_encoded = self.literal[start..self.i] };
             };
             return .{ .utf8_text = view };
@@ -180,7 +250,7 @@ pub const Parser = struct {
         self.i += expected_len;
 
         if (overflowed) {
-            self.err(.escape_sequence_overflow, .{ .unsigned = start });
+            self.err(.escape_sequence_overflow, .{ .unsigned = start + self.prefixLen() });
             return null;
         }
 
@@ -190,12 +260,13 @@ pub const Parser = struct {
         }
 
         if (val > std.math.maxInt(u21) or !std.unicode.utf8ValidCodepoint(@intCast(val))) {
-            self.err(.invalid_universal_character, .{ .unsigned = start });
+            self.err(.invalid_universal_character, .{ .unsigned = start + self.prefixLen() });
             return null;
         }
 
-        if (val > self.kind.maxCodepoint(self.comp)) {
+        if (val > self.max_codepoint) {
             self.err(.char_too_large, .{ .none = {} });
+            return null;
         }
 
         if (val < 0xA0 and (val != '$' and val != '@' and val != '`')) {
@@ -216,7 +287,7 @@ pub const Parser = struct {
         }
 
         self.warn(.c89_ucn_in_literal, .{ .none = {} });
-        return .{ .value = val };
+        return .{ .codepoint = @intCast(val) };
     }
 
     fn parseEscapedChar(self: *Parser) Item {
@@ -259,6 +330,7 @@ pub const Parser = struct {
         var val: u32 = 0;
         var count: usize = 0;
         var overflowed = false;
+        const start = self.i;
         defer self.i += count;
         const slice = switch (base) {
             .octal => self.literal[self.i..@min(self.literal.len, self.i + 3)], // max 3 chars
@@ -275,7 +347,8 @@ pub const Parser = struct {
             count += 1;
         }
         if (overflowed or val > self.kind.maxInt(self.comp)) {
-            self.err(.escape_sequence_overflow, .{ .unsigned = 0 });
+            self.err(.escape_sequence_overflow, .{ .unsigned = start + self.prefixLen() });
+            return 0;
         }
         if (count == 0) {
             std.debug.assert(base == .hex);
diff --git a/src/Tokenizer.zig b/src/Tokenizer.zig
index 4e37d764..3c9a85a0 100644
--- a/src/Tokenizer.zig
+++ b/src/Tokenizer.zig
@@ -30,6 +30,10 @@ pub const Token = struct {
         string_literal_utf_32,
         string_literal_wide,
 
+        /// Any string literal with an embedded newline or EOF
+        /// Always a parser error; by default just a warning from preprocessor
+        unterminated_string_literal,
+
         // <foobar> only generated by preprocessor
         macro_string,
 
@@ -40,6 +44,17 @@ pub const Token = struct {
         char_literal_utf_32,
         char_literal_wide,
 
+        /// Any character literal with nothing inside the quotes
+        /// Always a parser error; by default just a warning from preprocessor
+        empty_char_literal,
+
+        /// Any character literal with an embedded newline or EOF
+        /// Always a parser error; by default just a warning from preprocessor
+        unterminated_char_literal,
+
+        /// `/* */` style comment without a closing `*/` before EOF
+        unterminated_comment,
+
         /// Integer literal tokens generated by preprocessor.
         one,
         zero,
@@ -470,6 +485,7 @@ pub const Token = struct {
             return switch (id) {
                 .include_start,
                 .include_resume,
+                .unterminated_comment, // Fatal error; parsing should not be attempted
                 => unreachable,
 
                 .invalid,
@@ -480,6 +496,9 @@ pub const Token = struct {
                 .string_literal_utf_8,
                 .string_literal_utf_32,
                 .string_literal_wide,
+                .unterminated_string_literal,
+                .unterminated_char_literal,
+                .empty_char_literal,
                 .char_literal,
                 .char_literal_utf_8,
                 .char_literal_utf_16,
@@ -984,8 +1003,6 @@ index: u32 = 0,
 source: Source.Id,
 comp: *const Compilation,
 line: u32 = 1,
-/// Used to parse include strings with Windows style paths.
-path_escapes: bool = false,
 
 pub fn next(self: *Tokenizer) Token {
     var state: enum {
@@ -996,14 +1013,10 @@ pub fn next(self: *Tokenizer) Token {
         U,
         L,
         string_literal,
-        path_escape,
         char_literal_start,
         char_literal,
         char_escape_sequence,
-        escape_sequence,
-        octal_escape,
-        hex_escape,
-        unicode_escape,
+        string_escape_sequence,
         identifier,
         extended_identifier,
         equal,
@@ -1038,8 +1051,6 @@ pub fn next(self: *Tokenizer) Token {
     var start = self.index;
     var id: Token.Id = .eof;
 
-    var return_state = state;
-    var counter: u32 = 0;
     while (self.index < self.buf.len) : (self.index += 1) {
         const c = self.buf[self.index];
         switch (state) {
@@ -1219,29 +1230,30 @@ pub fn next(self: *Tokenizer) Token {
             },
             .string_literal => switch (c) {
                 '\\' => {
-                    return_state = .string_literal;
-                    state = if (self.path_escapes) .path_escape else .escape_sequence;
+                    state = .string_escape_sequence;
                 },
                 '"' => {
                     self.index += 1;
                     break;
                 },
                 '\n' => {
-                    id = .invalid;
+                    id = .unterminated_string_literal;
                     break;
                 },
                 '\r' => unreachable,
                 else => {},
             },
-            .path_escape => {
-                state = .string_literal;
-            },
             .char_literal_start => switch (c) {
                 '\\' => {
                     state = .char_escape_sequence;
                 },
-                '\'', '\n' => {
-                    id = .invalid;
+                '\'' => {
+                    id = .empty_char_literal;
+                    self.index += 1;
+                    break;
+                },
+                '\n' => {
+                    id = .unterminated_char_literal;
                     break;
                 },
                 else => {
@@ -1257,7 +1269,7 @@ pub fn next(self: *Tokenizer) Token {
                     break;
                 },
                 '\n' => {
-                    id = .invalid;
+                    id = .unterminated_char_literal;
                     break;
                 },
                 else => {},
@@ -1266,55 +1278,9 @@ pub fn next(self: *Tokenizer) Token {
                 '\r', '\n' => unreachable, // removed by line splicing
                 else => state = .char_literal,
             },
-            .escape_sequence => switch (c) {
-                '\'', '"', '?', '\\', 'a', 'b', 'e', 'f', 'n', 'r', 't', 'v' => {
-                    state = return_state;
-                },
+            .string_escape_sequence => switch (c) {
                 '\r', '\n' => unreachable, // removed by line splicing
-                '0'...'7' => {
-                    counter = 1;
-                    state = .octal_escape;
-                },
-                'x' => state = .hex_escape,
-                'u' => {
-                    counter = 4;
-                    state = .unicode_escape;
-                },
-                'U' => {
-                    counter = 8;
-                    state = .unicode_escape;
-                },
-                else => {
-                    id = .invalid;
-                    break;
-                },
-            },
-            .octal_escape => switch (c) {
-                '0'...'7' => {
-                    counter += 1;
-                    if (counter == 3) state = return_state;
-                },
-                else => {
-                    self.index -= 1;
-                    state = return_state;
-                },
-            },
-            .hex_escape => switch (c) {
-                '0'...'9', 'a'...'f', 'A'...'F' => {},
-                else => {
-                    self.index -= 1;
-                    state = return_state;
-                },
-            },
-            .unicode_escape => switch (c) {
-                '0'...'9', 'a'...'f', 'A'...'F' => {
-                    counter -= 1;
-                    if (counter == 0) state = return_state;
-                },
-                else => {
-                    id = .invalid;
-                    break;
-                },
+                else => state = .string_literal,
             },
             .identifier, .extended_identifier => switch (c) {
                 'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
@@ -1732,19 +1698,18 @@ pub fn next(self: *Tokenizer) Token {
             .start, .line_comment => {},
             .u, .u8, .U, .L, .identifier => id = Token.getTokenId(self.comp, self.buf[start..self.index]),
             .extended_identifier => id = .extended_identifier,
-            .period2,
-            .string_literal,
-            .path_escape,
-            .char_literal_start,
-            .char_literal,
-            .escape_sequence,
-            .char_escape_sequence,
-            .octal_escape,
-            .hex_escape,
-            .unicode_escape,
+
+            .period2 => {
+                self.index -= 1;
+                id = .period;
+            },
+
             .multi_line_comment,
             .multi_line_comment_asterisk,
-            => id = .invalid,
+            => id = .unterminated_comment,
+
+            .char_escape_sequence, .char_literal, .char_literal_start => id = .unterminated_char_literal,
+            .string_escape_sequence, .string_literal => id = .unterminated_string_literal,
 
             .whitespace => id = .whitespace,
             .multi_line_comment_done => id = .whitespace,
@@ -2114,7 +2079,7 @@ test "extended identifiers" {
     try expectTokens("0x0\u{E0000}", &.{ .pp_num, .extended_identifier });
     try expectTokens("\"\\0\u{E0000}\"", &.{.string_literal});
     try expectTokens("\"\\x\u{E0000}\"", &.{.string_literal});
-    try expectTokens("\"\\u\u{E0000}\"", &.{ .invalid, .extended_identifier, .invalid });
+    try expectTokens("\"\\u\u{E0000}\"", &.{.string_literal});
     try expectTokens("1e\u{E0000}", &.{ .pp_num, .extended_identifier });
     try expectTokens("1e1\u{E0000}", &.{ .pp_num, .extended_identifier });
 }
diff --git a/src/Tree.zig b/src/Tree.zig
index 5c25ec66..f6335014 100644
--- a/src/Tree.zig
+++ b/src/Tree.zig
@@ -656,17 +656,6 @@ pub fn isLvalExtra(nodes: Node.List.Slice, extra: []const NodeIndex, value_map:
     }
 }
 
-pub fn dumpStr(retained_strings: []const u8, range: Value.ByteRange, tag: Tag, writer: anytype) !void {
-    switch (tag) {
-        .string_literal_expr => {
-            const lit_range = range.trim(1); // remove null-terminator
-            const str = lit_range.slice(retained_strings);
-            try writer.print("\"{}\"", .{std.zig.fmtEscapes(str)});
-        },
-        else => unreachable,
-    }
-}
-
 pub fn tokSlice(tree: Tree, tok_i: TokenIndex) []const u8 {
     if (tree.tokens.items(.id)[tok_i].lexeme()) |some| return some;
     const loc = tree.tokens.items(.loc)[tok_i];
@@ -716,8 +705,8 @@ fn dumpAttribute(attr: Attribute, strings: []const u8, writer: anytype) !void {
                 try writer.writeAll(f.name);
                 try writer.writeAll(": ");
                 switch (f.type) {
-                    Value.ByteRange => try writer.print("\"{s}\"", .{@field(args, f.name).slice(strings)}),
-                    ?Value.ByteRange => try writer.print("\"{?s}\"", .{if (@field(args, f.name)) |range| range.slice(strings) else null}),
+                    Value.ByteRange => try writer.print("\"{s}\"", .{@field(args, f.name).slice(strings, .@"1")}),
+                    ?Value.ByteRange => try writer.print("\"{?s}\"", .{if (@field(args, f.name)) |range| range.slice(strings, .@"1") else null}),
                     else => switch (@typeInfo(f.type)) {
                         .Enum => try writer.writeAll(@tagName(@field(args, f.name))),
                         else => try writer.print("{any}", .{@field(args, f.name)}),
diff --git a/src/Value.zig b/src/Value.zig
index 58a058c3..1577db93 100644
--- a/src/Value.zig
+++ b/src/Value.zig
@@ -18,8 +18,40 @@ pub const ByteRange = struct {
         return .{ .start = self.start, .end = self.end - amount };
     }
 
-    pub fn slice(self: ByteRange, all_bytes: []const u8) []const u8 {
-        return all_bytes[self.start..self.end];
+    pub fn slice(self: ByteRange, all_bytes: []const u8, comptime size: Compilation.CharUnitSize) []const size.Type() {
+        switch (size) {
+            inline else => |sz| {
+                const aligned: []align(@alignOf(sz.Type())) const u8 = @alignCast(all_bytes[self.start..self.end]);
+                return std.mem.bytesAsSlice(sz.Type(), aligned);
+            },
+        }
+    }
+
+    pub fn dumpString(range: ByteRange, ty: Type, comp: *const Compilation, strings: []const u8, w: anytype) !void {
+        const size: Compilation.CharUnitSize = @enumFromInt(ty.elemType().sizeof(comp).?);
+        const without_null = range.trim(@intFromEnum(size));
+        switch (size) {
+            inline .@"1", .@"2" => |sz| {
+                const data_slice = without_null.slice(strings, sz);
+                const formatter = if (sz == .@"1") std.zig.fmtEscapes(data_slice) else std.unicode.fmtUtf16le(data_slice);
+                try w.print("\"{}\"", .{formatter});
+            },
+            .@"4" => {
+                try w.writeByte('"');
+                const data_slice = without_null.slice(strings, .@"4");
+                var buf: [4]u8 = undefined;
+                for (data_slice) |item| {
+                    if (item <= std.math.maxInt(u21) and std.unicode.utf8ValidCodepoint(@intCast(item))) {
+                        const codepoint: u21 = @intCast(item);
+                        const written = std.unicode.utf8Encode(codepoint, &buf) catch unreachable;
+                        try w.print("{s}", .{buf[0..written]});
+                    } else {
+                        try w.print("\\x{x}", .{item});
+                    }
+                }
+                try w.writeByte('"');
+            },
+        }
     }
 };
 
@@ -593,7 +625,7 @@ pub fn dump(v: Value, ty: Type, comp: *Compilation, strings: []const u8, w: anyt
         } else {
             try w.print("{d}", .{v.signExtend(ty, comp)});
         },
-        .bytes => try w.print("\"{s}\"", .{v.data.bytes.slice(strings)}),
+        .bytes => try v.data.bytes.dumpString(ty, comp, strings, w),
         // std.fmt does @as instead of @floatCast
         .float => try w.print("{d}", .{@as(f64, @floatCast(v.data.float))}),
         else => try w.print("({s})", .{@tagName(v.tag)}),
diff --git a/src/codegen/x86_64.zig b/src/codegen/x86_64.zig
index bc8b43df..aa96b4df 100644
--- a/src/codegen/x86_64.zig
+++ b/src/codegen/x86_64.zig
@@ -177,7 +177,7 @@ fn genNode(func: *Fn, node: NodeIndex) Codegen.Error!Value {
         .int_literal => return Value{ .immediate = @bitCast(data.int) },
         .string_literal_expr => {
             const range = func.c.tree.value_map.get(node).?.data.bytes;
-            const str_bytes = range.slice(func.c.tree.strings);
+            const str_bytes = range.slice(func.c.tree.strings, .@"1");
             const section = try func.c.obj.getSection(.strings);
             const start = section.items.len;
             try section.appendSlice(str_bytes);
diff --git a/test/cases/attributes.c b/test/cases/attributes.c
index 73b59567..2363b8eb 100644
--- a/test/cases/attributes.c
+++ b/test/cases/attributes.c
@@ -107,6 +107,8 @@ typedef struct {
 __attribute__((aligned(32))) char aligned_arr[] = {1, 2, 3};
 _Static_assert(sizeof(aligned_arr) == 3, "");
 
+__attribute__((section(1))) int Z;
+
 __attribute__(()) // test attribute at eof
 
 #define TESTS_SKIPPED 1
@@ -119,4 +121,5 @@ __attribute__(()) // test attribute at eof
     "attributes.c:36:5: error: fallthrough annotation does not directly precede switch label" \
     "attributes.c:40:20: error: 'noreturn' attribute cannot be applied to a statement" \
     "attributes.c:76:6: error: cannot call non function type 'int'" \
-    "attributes.c:110:18: error: expected identifier or '('" \
+    "attributes.c:110:24: error: Attribute argument is invalid, expected a string but got an integer constant" \
+    "attributes.c:112:18: error: expected identifier or '('" \
diff --git a/test/cases/stringify invalid.c b/test/cases/stringify invalid.c
index 84e3d172..6d551d29 100644
--- a/test/cases/stringify invalid.c	
+++ b/test/cases/stringify invalid.c	
@@ -1,7 +1,5 @@
-// clang also reports: warning: missing terminating '"' character [-Winvalid-pp-token]
-#define TESTS_SKIPPED 1
-
-#define EXPECTED_ERRORS "stringify invalid.c:15:1: error: expected ';', found '}'"
+#define EXPECTED_ERRORS "stringify invalid.c:9:20: warning: missing terminating '\"' character [-Winvalid-pp-token]" \
+    "stringify invalid.c:13:1: error: expected ';', found '}'"
 
 void foo(void) {
 
diff --git a/test/cases/strings.c b/test/cases/strings.c
index 402f4633..148c5e1b 100644
--- a/test/cases/strings.c
+++ b/test/cases/strings.c
@@ -1,5 +1,5 @@
 _Static_assert(1, "foo" "\n" "bar");
-_Static_assert(1, "foo" "\x606262 ");
+_Static_assert(1, "foo" "abc\x606262 ");
 _Static_assert(1, "\000062");
 _Static_assert(1, "\U00110000");
 _Static_assert(1, "\u0062");
@@ -15,12 +15,12 @@ _Static_assert(1, "\u0060");
 _Static_assert(1, "aaァ\e[1;");
 #pragma GCC diagnostic pop
 
-#define EXPECTED_ERRORS "strings.c:2:30: error: escape sequence out of range" \
-    "strings.c:4:20: error: invalid universal character" \
-    "strings.c:5:20: error: invalid universal character" \
+#define EXPECTED_ERRORS "strings.c:2:29: error: escape sequence out of range" \
+    "strings.c:4:19: error: invalid universal character" \
+    "strings.c:5:19: error: character 'b' cannot be specified by a universal character name" \
     "strings.c:7:9: warning: multi-character character constant [-Wmultichar]" \
     "strings.c:7:9: warning: character constant too long for its type" \
-    "strings.c:9:20: error: invalid universal character" \
-    "strings.c:10:20: error: invalid universal character" \
-    "strings.c:11:20: error: invalid universal character" \
-    "strings.c:15:24: warning: use of non-standard escape character '\\e' [-Wpedantic]" \
+    "strings.c:9:19: error: invalid universal character" \
+    "strings.c:10:19: error: invalid universal character" \
+    "strings.c:11:19: error: invalid universal character" \
+    "strings.c:15:23: warning: use of non-standard escape character '\\e' [-Wpedantic]" \
diff --git a/test/cases/unterminated char literal.c b/test/cases/unterminated char literal.c
new file mode 100644
index 00000000..951ca6e4
--- /dev/null
+++ b/test/cases/unterminated char literal.c	
@@ -0,0 +1,17 @@
+#define A 'b
+#define B ''
+#define C(X) ''
+#define D(X) 'A
+
+#define EXPECTED_ERRORS "unterminated char literal.c:1:11: warning: missing terminating ' character [-Winvalid-pp-token]" \
+    "unterminated char literal.c:2:11: warning: empty character constant [-Winvalid-pp-token]" \
+    "unterminated char literal.c:3:14: warning: empty character constant [-Winvalid-pp-token]" \
+    "unterminated char literal.c:4:14: warning: missing terminating ' character [-Winvalid-pp-token]" \
+    "unterminated char literal.c:16:10: warning: empty character constant [-Winvalid-pp-token]" \
+    "unterminated char literal.c:17:10: warning: missing terminating ' character [-Winvalid-pp-token]" \
+    "unterminated char literal.c:16:10: error: empty character constant" \
+    "unterminated char literal.c:17:10: error: missing terminating ' character" \
+    "unterminated char literal.c:17:11: error: expected ';' before end of file" \
+
+char c = u8'';
+char d = '
\ No newline at end of file
diff --git a/test/cases/unterminated comment.c b/test/cases/unterminated comment.c
new file mode 100644
index 00000000..91b22923
--- /dev/null
+++ b/test/cases/unterminated comment.c	
@@ -0,0 +1,4 @@
+#define EXPECTED_ERRORS "unterminated comment.c:4:7: error: unterminated comment" \
+    "unterminated comment.c:4:6: error: expected ';' before end of file" \
+
+int x /**
\ No newline at end of file
diff --git a/test/cases/unterminated string literal.c b/test/cases/unterminated string literal.c
new file mode 100644
index 00000000..d5bbcc9b
--- /dev/null
+++ b/test/cases/unterminated string literal.c	
@@ -0,0 +1,11 @@
+#define EXPECTED_ERRORS "unterminated string literal.c:9:12: warning: missing terminating '\"' character [-Winvalid-pp-token]" \
+    "unterminated string literal.c:10:20: warning: missing terminating '\"' character [-Winvalid-pp-token]" \
+    "unterminated string literal.c:11:12: warning: missing terminating '\"' character [-Winvalid-pp-token]" \
+    "unterminated string literal.c:9:12: error: missing terminating '\"' character" \
+    "unterminated string literal.c:10:20: error: missing terminating '\"' character" \
+    "unterminated string literal.c:11:12: error: missing terminating '\"' character" \
+
+
+char A[] = "hello
+char B[] = "hello" "world
+char C[] = "
\ No newline at end of file
diff --git a/test/cases/wide character constants.c b/test/cases/wide character constants.c
index 99d512d4..c174fbef 100644
--- a/test/cases/wide character constants.c	
+++ b/test/cases/wide character constants.c	
@@ -55,13 +55,13 @@ int Z = 'ABC\D';
     "wide character constants.c:10:16: error: wide character literals may not contain multiple characters" \
     "wide character constants.c:11:16: error: Unicode character literals may not contain multiple characters" \
     "wide character constants.c:14:16: warning: multi-character character constant [-Wfour-char-constants]" \
-    "wide character constants.c:20:19: error: escape sequence out of range" \
+    "wide character constants.c:20:21: error: escape sequence out of range" \
     "wide character constants.c:22:19: error: character too large for enclosing character literal type" \
-    "wide character constants.c:25:19: error: invalid universal character" \
+    "wide character constants.c:25:20: error: invalid universal character" \
     "wide character constants.c:26:19: error: character too large for enclosing character literal type" \
     "wide character constants.c:27:19: error: Unicode character literals may not contain multiple characters" \
     "wide character constants.c:28:19: error: Unicode character literals may not contain multiple characters" \
-    "wide character constants.c:29:19: error: escape sequence out of range" \
+    "wide character constants.c:29:20: error: escape sequence out of range" \
     "wide character constants.c:33:9: error: Unicode character literals may not contain multiple characters" \
     "wide character constants.c:35:9: error: character too large for enclosing character literal type" \
     "wide character constants.c:36:9: error: character 'A' cannot be specified by a universal character name" \
diff --git a/test/cases/wide strings.c b/test/cases/wide strings.c
new file mode 100644
index 00000000..2e37b07d
--- /dev/null
+++ b/test/cases/wide strings.c	
@@ -0,0 +1,79 @@
+//aro-args -std=c2x
+#include <stdint.h>
+typedef __WCHAR_TYPE__ wchar_t;
+
+uint8_t b[] = u8"";
+_Static_assert(sizeof(b) == sizeof(uint8_t[1]));
+char c[] = "";
+_Static_assert(sizeof(c) == 1);
+wchar_t d[] = L"";
+_Static_assert(sizeof(d) == sizeof(wchar_t[1]));
+uint16_t e[] = u"";
+_Static_assert(sizeof(e) == sizeof(uint16_t[1]));
+uint32_t f[] = U"";
+_Static_assert(sizeof(f) == sizeof(uint32_t[1]));
+
+uint16_t A[] = u"abc";
+_Static_assert(sizeof(A) == 8);
+
+uint32_t B[] = U"ABC";
+_Static_assert(sizeof(B) == 16);
+
+wchar_t C[] = L"ABC";
+_Static_assert(sizeof(C) == sizeof(wchar_t) * 4);
+
+uint16_t D[] = u"a" U"b";
+
+uint16_t E[] = u"a" u"bc";
+_Static_assert(sizeof(E) == 8);
+
+uint32_t F[] = U"A" "BC";
+_Static_assert(sizeof(F) == 16);
+
+uint16_t G[] = u"🤗";
+_Static_assert(sizeof(G) == 6);
+
+uint16_t H[] = u"\U0001F917";
+_Static_assert(sizeof(H) == 6);
+
+uint32_t I[] = U"🤗";
+_Static_assert(sizeof(I) == 8);
+
+uint8_t J[] = u8"🤗";
+_Static_assert(sizeof(J) == 5);
+
+uint8_t K[] = u8"\U0001F917";
+_Static_assert(sizeof(K) == 5);
+
+uint16_t L[] = u"\xFFFFF";
+
+uint8_t M[] = u8"\xFFF";
+
+_Static_assert(1 == 2, u"😬\U0001f62c");
+_Static_assert(1 == 2, U"😬\U0001f62c");
+
+char foo[] = "\u0020\u0020\u0020\u0020\xFFFFFFFF";
+
+wchar_t N[] = "word" L"" "a";
+_Static_assert(sizeof(N) == sizeof(wchar_t) * 6);
+uint32_t O[] = "word" U"" "a";
+_Static_assert(sizeof(O) == sizeof(uint32_t) * 6);
+uint16_t P[] = "word" u"" "a";
+_Static_assert(sizeof(P) == sizeof(uint16_t) * 6);
+
+uint32_t Q[] = U"abc\ndef\xFFghi";
+_Static_assert(sizeof(Q) == sizeof(uint32_t) * 12);
+
+uint32_t R[] = U"a" U'b';
+uint32_t S[] = U'a';
+uint32_t T[] = { U'a', U'b'};
+
+#define EXPECTED_ERRORS "wide strings.c:25:21: error: unsupported string literal concatenation" \
+    "wide strings.c:48:18: error: escape sequence out of range" \
+    "wide strings.c:50:18: error: escape sequence out of range" \
+    "wide strings.c:52:1: error: static assertion failed \"😬😬\"" \
+    "wide strings.c:53:1: error: static assertion failed \"😬😬\"" \
+    "wide strings.c:55:39: error: escape sequence out of range" \
+    "wide strings.c:67:21: error: expected ';', found 'a character literal'" \
+    "wide strings.c:68:16: error: array initializer must be an initializer list or wide string literal" \
+