Skip to content

Commit

Permalink
CharInfo: Remove ascii check from xidStart / xidContinue
Browse files Browse the repository at this point in the history
Tokenizer takes care of basic char set characters in identifiers
  • Loading branch information
ehaas committed Nov 8, 2023
1 parent c0214a9 commit b0ed72f
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 32 deletions.
20 changes: 5 additions & 15 deletions src/CharInfo.zig
Original file line number Diff line number Diff line change
Expand Up @@ -489,19 +489,15 @@ pub fn homoglyph(codepoint: u21) ?u21 {
}

pub fn isXidStart(c: u21) bool {
if (c < tables.ascii_start.len) {
return tables.ascii_start[c];
}
assert(c > 0x7F);
const idx = c / 8 / tables.chunk;
const chunk: usize = if (idx < tables.trie_start.len) tables.trie_start[idx] else 0;
const offset = chunk * tables.chunk / 2 + c / 8 % tables.chunk;
return (tables.leaf[offset] >> (@as(u3, @intCast(c % 8)))) & 1 != 0;
}

pub fn isXidContinue(c: u21) bool {
if (c < tables.ascii_continue.len) {
return tables.ascii_continue[c];
}
assert(c > 0x7F);
const idx = c / 8 / tables.chunk;
const chunk: usize = if (idx < tables.trie_continue.len) tables.trie_continue[idx] else 0;
const offset = chunk * tables.chunk / 2 + c / 8 % tables.chunk;
Expand All @@ -510,7 +506,7 @@ pub fn isXidContinue(c: u21) bool {

test "isXidStart / isXidContinue panic check" {
const std = @import("std");
for (0..std.math.maxInt(u21)) |i| {
for (0x80..0x110000) |i| {
const c: u21 = @intCast(i);
if (std.unicode.utf8ValidCodepoint(c)) {
_ = isXidStart(c);
Expand All @@ -521,21 +517,15 @@ test "isXidStart / isXidContinue panic check" {

test isXidStart {
const std = @import("std");
try std.testing.expect(isXidStart('a'));
try std.testing.expect(isXidStart('Z'));
try std.testing.expect(!isXidStart('0'));
try std.testing.expect(!isXidStart(' '));
try std.testing.expect(!isXidStart('᠑'));
try std.testing.expect(!isXidStart('™'));
try std.testing.expect(!isXidStart('£'));
try std.testing.expect(!isXidStart('\u{1f914}')); // 🤔
}

test isXidContinue {
const std = @import("std");
try std.testing.expect(isXidContinue('a'));
try std.testing.expect(isXidContinue('Z'));
try std.testing.expect(isXidContinue('0'));
try std.testing.expect(!isXidContinue(' '));
try std.testing.expect(isXidContinue('᠑'));
try std.testing.expect(!isXidContinue('™'));
try std.testing.expect(!isXidContinue('£'));
try std.testing.expect(!isXidContinue('\u{1f914}')); // 🤔
Expand Down
17 changes: 0 additions & 17 deletions src/unicode/identifier_tables.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,6 @@
//! and Unicode Standard Annex #31 https://www.unicode.org/reports/tr31/
//! Licensed under the MIT License and the Unicode license

const T: bool = true;
const F: bool = false;

pub const ascii_start: [128]bool align(64) = .{
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F,
F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F,
};

pub const ascii_continue: [128]bool align(64) = .{
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F, F,
F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, T,
F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F,
};

pub const chunk = 64;

pub const trie_start: [402]u8 align(8) = .{
Expand Down

0 comments on commit b0ed72f

Please sign in to comment.