Skip to content

Commit

Permalink
Generate case_folding_table.rs in advance
Browse files Browse the repository at this point in the history
  • Loading branch information
Thom Chiovoloni committed Jul 7, 2020
1 parent 7cb56f9 commit 3683b08
Show file tree
Hide file tree
Showing 5 changed files with 1,431 additions and 26 deletions.
11 changes: 6 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ description = "Unicode caseless matching"
repository = "https://github.com/SimonSapin/rust-caseless"
license = "MIT"

build = "src/build.rs"

[build-dependencies]
regex = "1.0"

[dependencies]
unicode-normalization = "0.1"

[workspace]
members = [
".",
"print-table",
]
10 changes: 10 additions & 0 deletions print-table/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "print-table"
version = "0.2.1"
authors = ["Simon Sapin <[email protected]>"]
license = "MIT"
publish = false

[dependencies]
regex = "1"

27 changes: 8 additions & 19 deletions src/build.rs → print-table/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
extern crate regex;

use std::char;
use std::env;
use std::fs::File;
use std::io::Write;
use std::path::Path;
use regex::Regex;

// Case folding a single code point can give up to this many code points.
const MAX_FOLDED_CODE_POINTS: usize = 3;

fn main() {
let mut lines = include_str!("../CaseFolding.txt").lines();
let mut lines = include_str!("../../CaseFolding.txt").lines();
let first_line = lines.next().unwrap();
let version_regex = Regex::new(r"^# CaseFolding-(\d+)\.(\d+)\.(\d+).txt$").unwrap();
let unicode_version = &version_regex.captures(first_line).unwrap();
Expand All @@ -21,15 +17,8 @@ fn main() {
unicode_version[3].parse().unwrap(),
);

let dst = Path::new(&env::var("OUT_DIR").unwrap()).join("case_folding_data.rs");
let f = &mut File::create(&dst).unwrap();

macro_rules! w {
($($args: tt)+) => { (write!(f, $($args)+)).unwrap(); }
};

w!("pub const UNICODE_VERSION: (u64, u64, u64) = ({}, {}, {});\n", major, minor, patch);
w!("const CASE_FOLDING_TABLE: &'static [(char, [char; 3])] = &[\n");
print!("pub const UNICODE_VERSION: (u64, u64, u64) = ({}, {}, {});\n", major, minor, patch);
print!("pub const CASE_FOLDING_TABLE: &'static [(char, [char; 3])] = &[\n");

// Entry with C (common case folding) or F (full case folding) status
let c_or_f_entry = Regex::new(r"^([0-9A-F]+); [CF]; ([0-9A-F ]+);").unwrap();
Expand All @@ -42,17 +31,17 @@ fn main() {
let blanks = MAX_FOLDED_CODE_POINTS - to.len();
let mut to = to.into_iter();
let first_to = to.next().unwrap();
w!(" ('{}', ['{}'", hex_to_escaped(from), first_to);
print!(" ('{}', ['{}'", hex_to_escaped(from), first_to);
for c in to {
w!(", '{}'", c);
print!(", '{}'", c);
}
for _ in 0..blanks {
w!(", '\\0'");
print!(", '\\0'");
}
w!("]),\n");
print!("]),\n");
}
}
w!("];\n");
print!("];\n");
}


Expand Down
Loading

0 comments on commit 3683b08

Please sign in to comment.