Skip to content

Commit

Permalink
emoji presentation: store single codepoints instead of ranges
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-Bertholet committed Feb 14, 2024
1 parent d1afcac commit 3de94ad
Show file tree
Hide file tree
Showing 2 changed files with 230 additions and 224 deletions.
42 changes: 15 additions & 27 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,29 +389,29 @@ def make_tables(
return tables


def variation_sequences() -> "list[tuple[int, int]]":
def load_variation_sequences(width_map) -> "list[int]":
"""Outputs a list of character ranages, corresponding to all the valid characters for starting
an emoji presentation sequence."""
an emoji presentation sequence, exclusing those that are always wide."""

with fetch_open("emoji/emoji-variation-sequences.txt") as sequences:
sequence = re.compile(r"^([0-9A-F]+)\s+FE0F\s*;\s+emoji style")
ranges = []
codepoints = []
for line in sequences.readlines():
if match := sequence.match(line):
cp = int(match.group(1), 16)
if ranges != [] and ranges[-1][1] == cp - 1:
ranges[-1] = (ranges[-1][0], cp)
else:
ranges.append((cp, cp))

return ranges
if width_map[cp] == EffectiveWidth.WIDE:
# this character would be width 2 even outside a variation sequence,
# so we don't need to store its info
continue
codepoints.append(cp)
return codepoints


def emit_module(
out_name: str,
unicode_version: "tuple[int, int, int]",
tables: "list[Table]",
emoji_variations: "list[tuple[int, int]]",
emoji_variations: "list[int]",
):
"""Outputs a Rust module to `out_name` using table data from `tables`.
If `TABLE_CFGS` is edited, you may need to edit the included code for `lookup_width`.
Expand Down Expand Up @@ -498,19 +498,7 @@ def emit_module(
/// Emoji presentation sequences are considered to have width 2.
#[inline]
pub fn starts_emoji_presentation_seq(c: char) -> bool {
use core::cmp::Ordering::{Equal, Greater, Less};
EMOJI_PRESENTATION_RANGES
.binary_search_by(|&(lo, hi)| {
if lo > c {
Greater
} else if hi < c {
Less
} else {
Equal
}
})
.is_ok()
EMOJI_PRESENTATION_RANGES.binary_search(&c).is_ok()
}
"""
)
Expand Down Expand Up @@ -570,11 +558,11 @@ def emit_module(
f"""
/// Each tuple corresponds to a range (inclusive at both ends)
/// of characters that can start an emoji presentation sequence.
static EMOJI_PRESENTATION_RANGES: [(char, char); {len(emoji_variations)}] = [
static EMOJI_PRESENTATION_RANGES: [char; {len(emoji_variations)}] = [
"""
)
for lo, hi in emoji_variations:
module.write(f" ('\\u{{{lo:X}}}', '\\u{{{hi:X}}}'),\n")
for cp in emoji_variations:
module.write(f" '\\u{{{cp:X}}}',\n")
module.write(" ];\n")

module.write("}\n")
Expand Down Expand Up @@ -612,7 +600,7 @@ def main(module_filename: str):
width_map[0x00AD] = EffectiveWidth.NARROW

tables = make_tables(TABLE_CFGS, enumerate(width_map))
emoji_variations = variation_sequences()
emoji_variations = load_variation_sequences(width_map)

print("------------------------")
total_size = 0
Expand Down
Loading

0 comments on commit 3de94ad

Please sign in to comment.