From 6c49a4129c43c28eeba3ce6e644e2c2b2cf3c6be Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Wed, 8 Feb 2023 11:32:33 +0100 Subject: [PATCH 001/207] refactor --- Cargo.toml | 1 + src/core.rs | 440 ++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 488 ++-------------------------------------------------- src/load.rs | 26 +++ src/util.rs | 72 ++++++++ 5 files changed, 553 insertions(+), 474 deletions(-) create mode 100644 src/core.rs create mode 100644 src/load.rs create mode 100644 src/util.rs diff --git a/Cargo.toml b/Cargo.toml index 1fb806bc..6ab3a53c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ fancy-regex = "0.10.0" regex = "1.7.0" rustc-hash = "1.1.0" bstr = "1.0.1" +linefeed = "0.6" [profile.release] incremental = true diff --git a/src/core.rs b/src/core.rs new file mode 100644 index 00000000..ab0dc5eb --- /dev/null +++ b/src/core.rs @@ -0,0 +1,440 @@ +use std::collections::HashSet; +use std::thread; + +use fancy_regex::Regex; +use rustc_hash::FxHashMap as HashMap; + +#[path = "util.rs"] +mod util; + +const MAX_NUM_THREADS: usize = 128; + +// Various performance notes: +// +// Regex +// ===== +// Most of the time is spent in regex. The easiest way to speed this up is by using less fancy +// regex features. For instance, using a regex parse-able by `regex` crate is 3x faster than +// the usual regex we use. +// +// However, given that we're using a regex parse-able by `regex`, there isn't much difference +// between using the `regex` crate and using the `fancy_regex` crate. +// +// There is an important interaction between threading, `regex` and `fancy_regex`. +// When using `fancy_regex`, we hit `regex.find_at`. It turns out that this causes contention on +// some mutable scratch space inside of `regex`. This absolutely kills performance. When using plain +// old `regex`, we don't hit this, because `find_iter` has a different code path. +// Related: https://github.com/rust-lang/regex/blob/master/PERFORMANCE.md +// Anyway, the way we get around this is with having a (mostly) thread local clone of the regex for +// each thread. +// +// Threading +// ========= +// I tried using `rayon`. It wasn't really faster than using Python threads and releasing the GIL. +// So goodbye `rayon`! Let thread count etc be in control of our Python users. +// +// Caching +// ======= +// The reference tokeniser has an lru cache over the equivalent of `byte_pair_encode`. +// Originally, we had one too! Without it, we were only vaguely faster than Python. +// I used an RWLock to protect the cache. This didn't seem to hurt single threaded performance +// noticeably, but it did affect multi-threaded performance. Weirdly, it seemed to affect +// multi-threaded performance even when I only had readers (maybed I messed something up?). +// Anyway, I realised that we could get rid of the cache, if we treat the set of tokens as a cache! +// These are exactly the set or merges that are likely to be hot. And now we don't have to think +// about interior mutability, memory use, or cloning. +// +// Hashing +// ======= +// We use FxHashMap instead of the standard HashMap. This is maybe like a 5-10% win? +// The current implementation ends up doing a lot of hashing of bytes. In theory, this could be made +// to be hashing of two-tuples of ints, which looks like it may also be a couple percent faster. + +use std::num::NonZeroU64; +pub struct FakeThreadId(NonZeroU64); + +fn hash_current_thread() -> usize { + // It's easier to use unsafe than to use nightly. Rust has this nice u64 thread id counter + // that works great for our use case of avoiding collisions in our array. Unfortunately, + // it's private. However, there are only so many ways you can layout a u64, so just transmute + // https://github.com/rust-lang/rust/issues/67939 + const _: [u8; 8] = [0; std::mem::size_of::()]; + const _: [u8; 8] = [0; std::mem::size_of::()]; + let x = unsafe { + std::mem::transmute::(thread::current().id()).0 + }; + u64::from(x) as usize +} + +pub struct CoreBPENative { + encoder: HashMap, usize>, + special_tokens_encoder: HashMap, + decoder: HashMap>, + special_tokens_decoder: HashMap>, + regex_tls: Vec, + special_regex_tls: Vec, + sorted_token_bytes: Vec>, +} + +impl CoreBPENative { + fn _get_tl_regex(&self) -> &Regex { + // See performance notes above for what this is about + // It's also a little janky, please make a better version of it! + // However, it's nice that this doesn't leak memory to short-lived threads + &self.regex_tls[hash_current_thread() % MAX_NUM_THREADS] + } + + fn _get_tl_special_regex(&self) -> &Regex { + &self.special_regex_tls[hash_current_thread() % MAX_NUM_THREADS] + } + + pub fn _decode_native(&self, tokens: &[usize]) -> Vec { + let mut ret = Vec::with_capacity(tokens.len() * 2); + for token in tokens { + let token_bytes = self + .decoder + .get(token) + .unwrap_or_else(|| &self.special_tokens_decoder[token]); + ret.extend(token_bytes); + } + ret + } + + pub fn _encode_ordinary_native(&self, text: &str) -> Vec { + // This is the core of the encoding logic; the other functions in here + // just make things complicated :-) + let regex = self._get_tl_regex(); + let mut ret = vec![]; + for mat in regex.find_iter(text) { + let piece = mat.unwrap().as_str().as_bytes(); + if let Some(token) = self.encoder.get(piece) { + ret.push(*token); + continue; + } + ret.extend(&util::byte_pair_encode(piece, &self.encoder)); + } + ret + } + + pub fn _encode_native(&self, text: &str, allowed_special: &HashSet<&str>) -> (Vec, usize) { + let special_regex = self._get_tl_special_regex(); + let regex = self._get_tl_regex(); + let mut ret = vec![]; + + let mut start = 0; + let mut last_piece_token_len = 0; + loop { + let mut next_special; + let mut start_find = start; + loop { + // Find the next allowed special token, if any + next_special = special_regex.find_from_pos(text, start_find).unwrap(); + match next_special { + Some(m) => { + if allowed_special.contains(&text[m.start()..m.end()]) { + break; + } + start_find = m.start() + 1; + } + None => break, + } + } + let end = next_special.map_or(text.len(), |m| m.start()); + + // Okay, here we go, compare this logic to _encode_ordinary_native + for mat in regex.find_iter(&text[start..end]) { + let piece = mat.unwrap().as_str().as_bytes(); + if let Some(token) = self.encoder.get(piece) { + last_piece_token_len = 1; + ret.push(*token); + continue; + } + let tokens = util::byte_pair_encode(piece, &self.encoder); + last_piece_token_len = tokens.len(); + ret.extend(&tokens); + } + + match next_special { + // And here we push the special token + Some(m) => { + let piece = m.as_str(); + let token = self.special_tokens_encoder[piece]; + ret.push(token); + start = m.end(); + last_piece_token_len = 0; + } + None => break, + } + } + + // last_piece_token_len is how many tokens came from the last regex split. This is used + // for determining unstable tokens, since you can't merge across (stable) regex splits + (ret, last_piece_token_len) + } + + pub fn _encode_bytes(&self, bytes: &[u8]) -> Vec { + match std::str::from_utf8(bytes) { + Ok(text) => self._encode_ordinary_native(text), + Err(e) => { + let text = unsafe { std::str::from_utf8_unchecked(&bytes[..e.valid_up_to()]) }; + let (tokens, last_piece_token_len) = self._encode_native(text, &HashSet::new()); + let (mut tokens, last_piece_token_len) = + self._increase_last_piece_token_len(tokens, last_piece_token_len); + if !tokens.is_empty() && last_piece_token_len > 0 { + // Lop off the tokens from the last piece and run BPE on the remaining bytes + // Somewhat niche, but this may not be correct if we'd have had a regex + // split between the valid UTF-8 and the invalid bytes, which is why this + // method is private + let mut unstable_bytes = + self._decode_native(&tokens[tokens.len() - last_piece_token_len..]); + unstable_bytes.extend_from_slice(&bytes[e.valid_up_to()..]); + + tokens.truncate(tokens.len() - last_piece_token_len); + tokens.extend(util::byte_pair_encode(&unstable_bytes, &self.encoder)); + } + tokens + } + } + } + + fn _increase_last_piece_token_len( + &self, + tokens: Vec, + mut last_piece_token_len: usize, + ) -> (Vec, usize) { + // Unfortunately, the locations where our regex splits can be unstable. + // For the purposes of determining unstable tokens, unstable regex splitting + // is only a problem if a split that was present disappears, since this can + // lead to merging of tokens otherwise thought to be stable. + // cl100k_base makes our life hard by including the \s*[\r\n]+ + // pattern. This can e.g. cause "\n" + " " to become "\n \n". + // Here is a quick and dirty fix: + { + let token_is_all_space = |token| { + self.decoder + .get(token) + .map(|token_bytes| { + token_bytes + .iter() + .rev() + .all(|&b| [b' ', b'\n', b'\t'].contains(&b)) + }) + .unwrap_or(false) + }; + if last_piece_token_len > 0 + && token_is_all_space(&tokens[tokens.len() - last_piece_token_len]) + { + while (last_piece_token_len < tokens.len()) + && token_is_all_space(&tokens[tokens.len() - last_piece_token_len - 1]) + { + last_piece_token_len += 1; + } + } + } + debug_assert!(last_piece_token_len <= tokens.len()); + + (tokens, last_piece_token_len) + } + + pub fn _encode_unstable_native( + &self, + text: &str, + allowed_special: &HashSet<&str>, + ) -> (Vec, HashSet>) { + let (tokens, last_piece_token_len) = self._encode_native(text, allowed_special); + if last_piece_token_len == 0 { + // If last_piece_token_len is zero, the last token was a special token and we have + // no unstable bytes + return (tokens, HashSet::new()); + } + let (mut tokens, last_piece_token_len) = + self._increase_last_piece_token_len(tokens, last_piece_token_len); + + let unstable_bytes = self._decode_native(&tokens[tokens.len() - last_piece_token_len..]); + tokens.truncate(tokens.len() - last_piece_token_len); + + // TODO: we should try harder to find additional stable tokens + // This would reduce the amount of retokenising when determining completions + // Refer to the logic in an older version of this file + + let mut completions = HashSet::new(); + if unstable_bytes.is_empty() { + return (tokens, completions); + } + + // This is the easy bit. Just find all single tokens that start with unstable_bytes + // (including tokens that exactly match unstable_bytes) + // Separating this from the loop below helps with performance in a common case. + let mut point = self + .sorted_token_bytes + .partition_point(|x| x.as_slice() < unstable_bytes.as_slice()); + while point < self.sorted_token_bytes.len() + && self.sorted_token_bytes[point].starts_with(&unstable_bytes) + { + completions.insert(vec![ + self.encoder[self.sorted_token_bytes[point].as_slice()], + ]); + point += 1; + } + + // Now apply even more brute force. At every (other) possible position for the straddling + // token, concatenate additional bytes from that token (if any) to unstable_bytes, + // and retokenise the whole thing and see what we get. + for i in 1..unstable_bytes.len() { + let prefix = &unstable_bytes[..i]; + let suffix = &unstable_bytes[i..]; + let mut point = self + .sorted_token_bytes + .partition_point(|x| x.as_slice() < suffix); + // TODO: Perf optimisation if suffix starts with " "? + while point < self.sorted_token_bytes.len() + && self.sorted_token_bytes[point].starts_with(suffix) + { + let possibility = [prefix, self.sorted_token_bytes[point].as_slice()].concat(); + let encoded = match std::str::from_utf8(&possibility) { + // Morally, this is byte_pair_encode(&possibility, &self.encoder) + // But we might have introduced a regex split which would prevent merges. + // (particularly possible in the presence of unstable regex splits) + // So convert to UTF-8 and do regex splitting. + // E.g. with cl100k_base " !" gets split to " " + " !", + // but byte_pair_encode(" !") != byte_pair_encode(" ") + Ok(s) => self._encode_ordinary_native(s), + + // Technically, whether or not this arm is correct depends on whether there + // would be a regex split before the UTF-8 truncation point. + // Probably niche enough that no one will ever notice (after all, people didn't + // notice all the big holes in the previous unstable token implementation) + Err(_) => util::byte_pair_encode(&possibility, &self.encoder), + // Something like the following is intriguing but incorrect: + // Err(e) => self._encode_ordinary_native(unsafe { + // std::str::from_utf8_unchecked(&possibility[..e.valid_up_to()]) + // }), + }; + let mut seq = Vec::new(); + let mut seq_len = 0; + for token in encoded { + seq.push(token); + seq_len += self.decoder[&token].len(); + if seq_len >= unstable_bytes.len() { + break; + } + } + completions.insert(seq); + point += 1; + } + } + + // This is also not straightforward. While we generally assume that regex splits are stable, + // unfortunately, they are not. That is, if adding bytes were to make a split appear in + // unstable_bytes, this could make tokens possible which our logic would otherwise think + // would be merged. + // For example, with gpt2, the use of \s+(?!\S) means that "\n\n" could + // develop a split, e.g. "\n\n0" splits into "\n"+"\n"+"0", making "\n" a possible token. + // Here is a quick and dirty fix: + // This isn't right if we ever remove \s+(?!\S) + if unstable_bytes.len() > 1 { + let last_decoded = bstr::decode_last_utf8(unstable_bytes.as_slice()); + if unstable_bytes.len() - last_decoded.1 > 0 + && last_decoded.0.map_or(false, |c| c.is_whitespace()) + { + let mut reencoded = util::byte_pair_encode( + &unstable_bytes[..unstable_bytes.len() - last_decoded.1], + &self.encoder, + ); + reencoded.extend(util::byte_pair_encode( + &unstable_bytes[unstable_bytes.len() - last_decoded.1..], + &self.encoder, + )); + completions.insert(reencoded); + } + } + + (tokens, completions) + } + + pub fn encode_single_token(&self, piece: &[u8]) -> Result> { + if let Some(token) = self.encoder.get(piece).copied() { + return Ok(token); + } + if let Ok(piece_str) = std::str::from_utf8(piece) { + if let Some(token) = self.special_tokens_encoder.get(piece_str).copied() { + return Ok(token); + } + } + Err(piece.to_owned()) + } + + fn encode_single_piece(&self, piece: &[u8]) -> Vec { + if let Some(token) = self.encoder.get(piece) { + return vec![*token]; + } + util::byte_pair_encode(piece, &self.encoder) + } + + // ==================== + // Decoding + // ==================== + + pub fn decode_single_token_bytes(&self, token: usize) -> Result<&[u8], String> { + if let Some(bytes) = self.decoder.get(&token) { + return Ok(bytes); + } + if let Some(bytes) = self.special_tokens_decoder.get(&token) { + return Ok(bytes); + } + Err(token.to_string()) + } + + // ==================== + // Miscellaneous + // ==================== + + pub fn token_byte_values(&self) -> &Vec> { + &self.sorted_token_bytes + } + + pub fn new( + encoder: HashMap, usize>, + special_tokens_encoder: HashMap, + pattern: &str, + ) -> Result { + let regex = Regex::new(pattern)?; + // .map_err(|e| PyErr::new::(e.to_string()))?; + + let special_regex = { + let _parts = special_tokens_encoder + .keys() + .map(|s| fancy_regex::escape(s)) + .collect::>(); + Regex::new(&_parts.join("|"))? + + // .map_err(|e| PyErr::new::(e.to_string()))? + }; + + let decoder: HashMap> = + encoder.iter().map(|(k, v)| (*v, k.clone())).collect(); + + assert!(encoder.len() == decoder.len()); + + let special_tokens_decoder: HashMap> = special_tokens_encoder + .iter() + .map(|(k, v)| (*v, k.as_bytes().to_vec())) + .collect(); + + // Clone because I don't know how to tell Rust I'm not going to change the map + let mut sorted_token_bytes: Vec> = encoder.keys().cloned().collect(); + sorted_token_bytes.sort(); + + Ok(CoreBPENative { + encoder, + special_tokens_encoder, + decoder, + special_tokens_decoder, + regex_tls: (0..MAX_NUM_THREADS).map(|_| regex.clone()).collect(), + special_regex_tls: (0..MAX_NUM_THREADS) + .map(|_| special_regex.clone()) + .collect(), + sorted_token_bytes, + }) + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 8235dbb1..7f0edd89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,388 +2,19 @@ #![allow(clippy::borrow_deref_ref)] use std::collections::HashSet; -use std::thread; -use fancy_regex::Regex; use pyo3::exceptions; use pyo3::prelude::*; use pyo3::types::{PyBytes, PyList, PyTuple}; use pyo3::PyResult; use rustc_hash::FxHashMap as HashMap; -fn _byte_pair_merge(piece: &[u8], ranks: &HashMap, usize>) -> Vec> { - let mut parts: Vec<_> = (0..piece.len()).map(|i| i..i + 1).collect(); +mod util; +mod core; - // If you have n parts and m merges, this does O(mn) work - // We could do something with a heap and do O(m log n) work - - // Note that we hash bytes, not token pairs. As long as we train BPE the way we - // currently do, this is equivalent. An easy way to break this would be to decouple - // merge priority from token index or to prevent specific token merges. - loop { - if parts.len() == 1 { - break; - } - let mut min_rank: Option<(usize, usize)> = None; - for i in 0..parts.len() - 1 { - let rank = if let Some(r) = ranks.get(&piece[parts[i].start..parts[i + 1].end]) { - *r - } else { - continue; - }; - if min_rank.is_none() || rank < min_rank.unwrap().0 { - min_rank = Some((rank, i)); - } - } - if let Some((_, i)) = min_rank { - parts[i] = parts[i].start..parts[i + 1].end; - parts.remove(i + 1); - } else { - break; - } - } - parts -} - -pub fn byte_pair_encode(piece: &[u8], ranks: &HashMap, usize>) -> Vec { - if piece.len() == 1 { - return vec![ranks[piece]]; - } - _byte_pair_merge(piece, ranks) - .iter() - .map(|p| ranks[&piece[p.start..p.end]]) - .collect() -} - -pub fn byte_pair_split<'a>(piece: &'a [u8], ranks: &HashMap, usize>) -> Vec<&'a [u8]> { - if piece.len() == 1 { - return vec![piece]; - } - _byte_pair_merge(piece, ranks) - .iter() - .map(|p| &piece[p.start..p.end]) - .collect() -} - -// Various performance notes: -// -// Regex -// ===== -// Most of the time is spent in regex. The easiest way to speed this up is by using less fancy -// regex features. For instance, using a regex parse-able by `regex` crate is 3x faster than -// the usual regex we use. -// -// However, given that we're using a regex parse-able by `regex`, there isn't much difference -// between using the `regex` crate and using the `fancy_regex` crate. -// -// There is an important interaction between threading, `regex` and `fancy_regex`. -// When using `fancy_regex`, we hit `regex.find_at`. It turns out that this causes contention on -// some mutable scratch space inside of `regex`. This absolutely kills performance. When using plain -// old `regex`, we don't hit this, because `find_iter` has a different code path. -// Related: https://github.com/rust-lang/regex/blob/master/PERFORMANCE.md -// Anyway, the way we get around this is with having a (mostly) thread local clone of the regex for -// each thread. -// -// Threading -// ========= -// I tried using `rayon`. It wasn't really faster than using Python threads and releasing the GIL. -// So goodbye `rayon`! Let thread count etc be in control of our Python users. -// -// Caching -// ======= -// The reference tokeniser has an lru cache over the equivalent of `byte_pair_encode`. -// Originally, we had one too! Without it, we were only vaguely faster than Python. -// I used an RWLock to protect the cache. This didn't seem to hurt single threaded performance -// noticeably, but it did affect multi-threaded performance. Weirdly, it seemed to affect -// multi-threaded performance even when I only had readers (maybed I messed something up?). -// Anyway, I realised that we could get rid of the cache, if we treat the set of tokens as a cache! -// These are exactly the set or merges that are likely to be hot. And now we don't have to think -// about interior mutability, memory use, or cloning. -// -// Hashing -// ======= -// We use FxHashMap instead of the standard HashMap. This is maybe like a 5-10% win? -// The current implementation ends up doing a lot of hashing of bytes. In theory, this could be made -// to be hashing of two-tuples of ints, which looks like it may also be a couple percent faster. - -use std::num::NonZeroU64; -pub struct FakeThreadId(NonZeroU64); - -fn hash_current_thread() -> usize { - // It's easier to use unsafe than to use nightly. Rust has this nice u64 thread id counter - // that works great for our use case of avoiding collisions in our array. Unfortunately, - // it's private. However, there are only so many ways you can layout a u64, so just transmute - // https://github.com/rust-lang/rust/issues/67939 - const _: [u8; 8] = [0; std::mem::size_of::()]; - const _: [u8; 8] = [0; std::mem::size_of::()]; - let x = unsafe { - std::mem::transmute::(thread::current().id()).0 - }; - u64::from(x) as usize -} - -const MAX_NUM_THREADS: usize = 128; #[pyclass] struct CoreBPE { - encoder: HashMap, usize>, - special_tokens_encoder: HashMap, - decoder: HashMap>, - special_tokens_decoder: HashMap>, - regex_tls: Vec, - special_regex_tls: Vec, - sorted_token_bytes: Vec>, -} - -impl CoreBPE { - fn _get_tl_regex(&self) -> &Regex { - // See performance notes above for what this is about - // It's also a little janky, please make a better version of it! - // However, it's nice that this doesn't leak memory to short-lived threads - &self.regex_tls[hash_current_thread() % MAX_NUM_THREADS] - } - - fn _get_tl_special_regex(&self) -> &Regex { - &self.special_regex_tls[hash_current_thread() % MAX_NUM_THREADS] - } - - fn _decode_native(&self, tokens: &[usize]) -> Vec { - let mut ret = Vec::with_capacity(tokens.len() * 2); - for token in tokens { - let token_bytes = self - .decoder - .get(token) - .unwrap_or_else(|| &self.special_tokens_decoder[token]); - ret.extend(token_bytes); - } - ret - } - - fn _encode_ordinary_native(&self, text: &str) -> Vec { - // This is the core of the encoding logic; the other functions in here - // just make things complicated :-) - let regex = self._get_tl_regex(); - let mut ret = vec![]; - for mat in regex.find_iter(text) { - let piece = mat.unwrap().as_str().as_bytes(); - if let Some(token) = self.encoder.get(piece) { - ret.push(*token); - continue; - } - ret.extend(&byte_pair_encode(piece, &self.encoder)); - } - ret - } - - fn _encode_native(&self, text: &str, allowed_special: &HashSet<&str>) -> (Vec, usize) { - let special_regex = self._get_tl_special_regex(); - let regex = self._get_tl_regex(); - let mut ret = vec![]; - - let mut start = 0; - let mut last_piece_token_len = 0; - loop { - let mut next_special; - let mut start_find = start; - loop { - // Find the next allowed special token, if any - next_special = special_regex.find_from_pos(text, start_find).unwrap(); - match next_special { - Some(m) => { - if allowed_special.contains(&text[m.start()..m.end()]) { - break; - } - start_find = m.start() + 1; - } - None => break, - } - } - let end = next_special.map_or(text.len(), |m| m.start()); - - // Okay, here we go, compare this logic to _encode_ordinary_native - for mat in regex.find_iter(&text[start..end]) { - let piece = mat.unwrap().as_str().as_bytes(); - if let Some(token) = self.encoder.get(piece) { - last_piece_token_len = 1; - ret.push(*token); - continue; - } - let tokens = byte_pair_encode(piece, &self.encoder); - last_piece_token_len = tokens.len(); - ret.extend(&tokens); - } - - match next_special { - // And here we push the special token - Some(m) => { - let piece = m.as_str(); - let token = self.special_tokens_encoder[piece]; - ret.push(token); - start = m.end(); - last_piece_token_len = 0; - } - None => break, - } - } - - // last_piece_token_len is how many tokens came from the last regex split. This is used - // for determining unstable tokens, since you can't merge across (stable) regex splits - (ret, last_piece_token_len) - } - - fn _increase_last_piece_token_len( - &self, - tokens: Vec, - mut last_piece_token_len: usize, - ) -> (Vec, usize) { - // Unfortunately, the locations where our regex splits can be unstable. - // For the purposes of determining unstable tokens, unstable regex splitting - // is only a problem if a split that was present disappears, since this can - // lead to merging of tokens otherwise thought to be stable. - // cl100k_base makes our life hard by including the \s*[\r\n]+ - // pattern. This can e.g. cause "\n" + " " to become "\n \n". - // Here is a quick and dirty fix: - { - let token_is_all_space = |token| { - self.decoder - .get(token) - .map(|token_bytes| { - token_bytes - .iter() - .rev() - .all(|&b| [b' ', b'\n', b'\t'].contains(&b)) - }) - .unwrap_or(false) - }; - if last_piece_token_len > 0 - && token_is_all_space(&tokens[tokens.len() - last_piece_token_len]) - { - while (last_piece_token_len < tokens.len()) - && token_is_all_space(&tokens[tokens.len() - last_piece_token_len - 1]) - { - last_piece_token_len += 1; - } - } - } - debug_assert!(last_piece_token_len <= tokens.len()); - - (tokens, last_piece_token_len) - } - - fn _encode_unstable_native( - &self, - text: &str, - allowed_special: &HashSet<&str>, - ) -> (Vec, HashSet>) { - let (tokens, last_piece_token_len) = self._encode_native(text, allowed_special); - if last_piece_token_len == 0 { - // If last_piece_token_len is zero, the last token was a special token and we have - // no unstable bytes - return (tokens, HashSet::new()); - } - let (mut tokens, last_piece_token_len) = - self._increase_last_piece_token_len(tokens, last_piece_token_len); - - let unstable_bytes = self._decode_native(&tokens[tokens.len() - last_piece_token_len..]); - tokens.truncate(tokens.len() - last_piece_token_len); - - // TODO: we should try harder to find additional stable tokens - // This would reduce the amount of retokenising when determining completions - // Refer to the logic in an older version of this file - - let mut completions = HashSet::new(); - if unstable_bytes.is_empty() { - return (tokens, completions); - } - - // This is the easy bit. Just find all single tokens that start with unstable_bytes - // (including tokens that exactly match unstable_bytes) - // Separating this from the loop below helps with performance in a common case. - let mut point = self - .sorted_token_bytes - .partition_point(|x| x.as_slice() < unstable_bytes.as_slice()); - while point < self.sorted_token_bytes.len() - && self.sorted_token_bytes[point].starts_with(&unstable_bytes) - { - completions.insert(vec![ - self.encoder[self.sorted_token_bytes[point].as_slice()], - ]); - point += 1; - } - - // Now apply even more brute force. At every (other) possible position for the straddling - // token, concatenate additional bytes from that token (if any) to unstable_bytes, - // and retokenise the whole thing and see what we get. - for i in 1..unstable_bytes.len() { - let prefix = &unstable_bytes[..i]; - let suffix = &unstable_bytes[i..]; - let mut point = self - .sorted_token_bytes - .partition_point(|x| x.as_slice() < suffix); - // TODO: Perf optimisation if suffix starts with " "? - while point < self.sorted_token_bytes.len() - && self.sorted_token_bytes[point].starts_with(suffix) - { - let possibility = [prefix, self.sorted_token_bytes[point].as_slice()].concat(); - let encoded = match std::str::from_utf8(&possibility) { - // Morally, this is byte_pair_encode(&possibility, &self.encoder) - // But we might have introduced a regex split which would prevent merges. - // (particularly possible in the presence of unstable regex splits) - // So convert to UTF-8 and do regex splitting. - // E.g. with cl100k_base " !" gets split to " " + " !", - // but byte_pair_encode(" !") != byte_pair_encode(" ") - Ok(s) => self._encode_ordinary_native(s), - - // Technically, whether or not this arm is correct depends on whether there - // would be a regex split before the UTF-8 truncation point. - // Probably niche enough that no one will ever notice (after all, people didn't - // notice all the big holes in the previous unstable token implementation) - Err(_) => byte_pair_encode(&possibility, &self.encoder), - // Something like the following is intriguing but incorrect: - // Err(e) => self._encode_ordinary_native(unsafe { - // std::str::from_utf8_unchecked(&possibility[..e.valid_up_to()]) - // }), - }; - let mut seq = Vec::new(); - let mut seq_len = 0; - for token in encoded { - seq.push(token); - seq_len += self.decoder[&token].len(); - if seq_len >= unstable_bytes.len() { - break; - } - } - completions.insert(seq); - point += 1; - } - } - - // This is also not straightforward. While we generally assume that regex splits are stable, - // unfortunately, they are not. That is, if adding bytes were to make a split appear in - // unstable_bytes, this could make tokens possible which our logic would otherwise think - // would be merged. - // For example, with gpt2, the use of \s+(?!\S) means that "\n\n" could - // develop a split, e.g. "\n\n0" splits into "\n"+"\n"+"0", making "\n" a possible token. - // Here is a quick and dirty fix: - // This isn't right if we ever remove \s+(?!\S) - if unstable_bytes.len() > 1 { - let last_decoded = bstr::decode_last_utf8(unstable_bytes.as_slice()); - if unstable_bytes.len() - last_decoded.1 > 0 - && last_decoded.0.map_or(false, |c| c.is_whitespace()) - { - let mut reencoded = byte_pair_encode( - &unstable_bytes[..unstable_bytes.len() - last_decoded.1], - &self.encoder, - ); - reencoded.extend(byte_pair_encode( - &unstable_bytes[unstable_bytes.len() - last_decoded.1..], - &self.encoder, - )); - completions.insert(reencoded); - } - } - - (tokens, completions) - } + native: core::CoreBPENative, } #[pymethods] @@ -394,43 +25,9 @@ impl CoreBPE { special_tokens_encoder: HashMap, pattern: &str, ) -> PyResult { - let regex = Regex::new(pattern) + let native = core::CoreBPENative::new(encoder, special_tokens_encoder, pattern) .map_err(|e| PyErr::new::(e.to_string()))?; - - let special_regex = { - let _parts = special_tokens_encoder - .keys() - .map(|s| fancy_regex::escape(s)) - .collect::>(); - Regex::new(&_parts.join("|")) - .map_err(|e| PyErr::new::(e.to_string()))? - }; - - let decoder: HashMap> = - encoder.iter().map(|(k, v)| (*v, k.clone())).collect(); - - assert!(encoder.len() == decoder.len()); - - let special_tokens_decoder: HashMap> = special_tokens_encoder - .iter() - .map(|(k, v)| (*v, k.as_bytes().to_vec())) - .collect(); - - // Clone because I don't know how to tell Rust I'm not going to change the map - let mut sorted_token_bytes: Vec> = encoder.keys().cloned().collect(); - sorted_token_bytes.sort(); - - Ok(CoreBPE { - encoder, - special_tokens_encoder, - decoder, - special_tokens_decoder, - regex_tls: (0..MAX_NUM_THREADS).map(|_| regex.clone()).collect(), - special_regex_tls: (0..MAX_NUM_THREADS) - .map(|_| special_regex.clone()) - .collect(), - sorted_token_bytes, - }) + Ok(CoreBPE { native }) } // ==================== @@ -438,37 +35,16 @@ impl CoreBPE { // ==================== fn encode_ordinary(&self, py: Python, text: &str) -> Vec { - py.allow_threads(|| self._encode_ordinary_native(text)) + py.allow_threads(|| self.native._encode_ordinary_native(text)) } fn encode(&self, py: Python, text: &str, allowed_special: HashSet<&str>) -> Vec { - py.allow_threads(|| self._encode_native(text, &allowed_special).0) + py.allow_threads(|| self.native._encode_native(text, &allowed_special).0) } fn _encode_bytes(&self, py: Python, bytes: &[u8]) -> Vec { py.allow_threads(|| { - match std::str::from_utf8(bytes) { - Ok(text) => self._encode_ordinary_native(text), - Err(e) => { - let text = unsafe { std::str::from_utf8_unchecked(&bytes[..e.valid_up_to()]) }; - let (tokens, last_piece_token_len) = self._encode_native(text, &HashSet::new()); - let (mut tokens, last_piece_token_len) = - self._increase_last_piece_token_len(tokens, last_piece_token_len); - if !tokens.is_empty() && last_piece_token_len > 0 { - // Lop off the tokens from the last piece and run BPE on the remaining bytes - // Somewhat niche, but this may not be correct if we'd have had a regex - // split between the valid UTF-8 and the invalid bytes, which is why this - // method is private - let mut unstable_bytes = - self._decode_native(&tokens[tokens.len() - last_piece_token_len..]); - unstable_bytes.extend_from_slice(&bytes[e.valid_up_to()..]); - - tokens.truncate(tokens.len() - last_piece_token_len); - tokens.extend(byte_pair_encode(&unstable_bytes, &self.encoder)); - } - tokens - } - } + self.native._encode_bytes(bytes) }) } @@ -479,29 +55,14 @@ impl CoreBPE { allowed_special: HashSet<&str>, ) -> Py { let (tokens, completions) = - py.allow_threads(|| self._encode_unstable_native(text, &allowed_special)); + py.allow_threads(|| self.native._encode_unstable_native(text, &allowed_special)); let py_completions = PyList::new(py, completions.iter().map(|seq| PyList::new(py, &seq[..]))); (tokens, py_completions).into_py(py) } fn encode_single_token(&self, piece: &[u8]) -> PyResult { - if let Some(token) = self.encoder.get(piece).copied() { - return Ok(token); - } - if let Ok(piece_str) = std::str::from_utf8(piece) { - if let Some(token) = self.special_tokens_encoder.get(piece_str).copied() { - return Ok(token); - } - } - Err(PyErr::new::(piece.to_owned())) - } - - fn encode_single_piece(&self, piece: &[u8]) -> Vec { - if let Some(token) = self.encoder.get(piece) { - return vec![*token]; - } - byte_pair_encode(piece, &self.encoder) + self.native.encode_single_token(piece).map_err(|e| PyErr::new::(e)) } // ==================== @@ -509,18 +70,13 @@ impl CoreBPE { // ==================== fn decode_bytes(&self, py: Python, tokens: Vec) -> Py { - let bytes = py.allow_threads(|| self._decode_native(&tokens)); + let bytes = py.allow_threads(|| self.native._decode_native(&tokens)); PyBytes::new(py, &bytes).into() } fn decode_single_token_bytes(&self, py: Python, token: usize) -> PyResult> { - if let Some(bytes) = self.decoder.get(&token) { - return Ok(PyBytes::new(py, bytes).into()); - } - if let Some(bytes) = self.special_tokens_decoder.get(&token) { - return Ok(PyBytes::new(py, bytes).into()); - } - Err(PyErr::new::(token.to_string())) + self.native.decode_single_token_bytes(token).map(|bytes| PyBytes::new(py, &bytes).into()) + .map_err(|e| PyErr::new::(e)) } // ==================== @@ -528,7 +84,7 @@ impl CoreBPE { // ==================== fn token_byte_values(&self, py: Python) -> Vec> { - self.sorted_token_bytes + self.native.token_byte_values() .iter() .map(|x| PyBytes::new(py, x).into()) .collect() @@ -541,19 +97,3 @@ fn _tiktoken(_py: Python, m: &PyModule) -> PyResult<()> { Ok(()) } -#[cfg(test)] -mod tests { - use rustc_hash::FxHashMap as HashMap; - - use crate::byte_pair_split; - - #[test] - fn very_simple_test() { - let mut ranks = HashMap::default(); - ranks.insert(b"ab".to_vec(), 1); - ranks.insert(b"cd".to_vec(), 2); - - let res = byte_pair_split(b"abcd", &ranks); - assert_eq!(res, vec![b"ab", b"cd"]); - } -} diff --git a/src/load.rs b/src/load.rs new file mode 100644 index 00000000..2792aadd --- /dev/null +++ b/src/load.rs @@ -0,0 +1,26 @@ + +use rustc_hash::FxHashMap as HashMap; +use linefeed::chars::is_printable; + +pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: String, encoder_json_file: String) -> HashMap, usize> { + let rank_to_intbyte = (0..256) + .filter(|x| is_printable(x) && x != ' ') + .collect::>(); + + let mut data_gym_byte_to_byte = HashMap::default(); + for b in rank_to_intbyte.iter() { + data_gym_byte_to_byte.insert(b, b); + } + + let mut n = 0; + for b in 0..256 { + if !rank_to_intbyte.contains(b) { + rank_to_intbyte.push(b); + data_gym_byte_to_byte.insert(256 + n, b); + n += 1; + } + } + assert!(rank_to_intbyte.len() == 256); + + ranks +} data_gym_to_mergeable_bpe_ranks \ No newline at end of file diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 00000000..4c360a3e --- /dev/null +++ b/src/util.rs @@ -0,0 +1,72 @@ +use rustc_hash::FxHashMap as HashMap; + + +pub fn _byte_pair_merge(piece: &[u8], ranks: &HashMap, usize>) -> Vec> { + let mut parts: Vec<_> = (0..piece.len()).map(|i| i..i + 1).collect(); + + // If you have n parts and m merges, this does O(mn) work + // We could do something with a heap and do O(m log n) work + + // Note that we hash bytes, not token pairs. As long as we train BPE the way we + // currently do, this is equivalent. An easy way to break this would be to decouple + // merge priority from token index or to prevent specific token merges. + loop { + if parts.len() == 1 { + break; + } + let mut min_rank: Option<(usize, usize)> = None; + for i in 0..parts.len() - 1 { + let rank = if let Some(r) = ranks.get(&piece[parts[i].start..parts[i + 1].end]) { + *r + } else { + continue; + }; + if min_rank.is_none() || rank < min_rank.unwrap().0 { + min_rank = Some((rank, i)); + } + } + if let Some((_, i)) = min_rank { + parts[i] = parts[i].start..parts[i + 1].end; + parts.remove(i + 1); + } else { + break; + } + } + parts +} + +pub fn byte_pair_encode(piece: &[u8], ranks: &HashMap, usize>) -> Vec { + if piece.len() == 1 { + return vec![ranks[piece]]; + } + _byte_pair_merge(piece, ranks) + .iter() + .map(|p| ranks[&piece[p.start..p.end]]) + .collect() +} + +#[cfg(test)] +mod tests { + use rustc_hash::FxHashMap as HashMap; + + use crate::util::_byte_pair_merge; + pub fn byte_pair_split<'a>(piece: &'a [u8], ranks: &HashMap, usize>) -> Vec<&'a [u8]> { + if piece.len() == 1 { + return vec![piece]; + } + _byte_pair_merge(piece, ranks) + .iter() + .map(|p| &piece[p.start..p.end]) + .collect() + } + + #[test] + fn very_simple_test() { + let mut ranks = HashMap::default(); + ranks.insert(b"ab".to_vec(), 1); + ranks.insert(b"cd".to_vec(), 2); + + let res = byte_pair_split(b"abcd", &ranks); + assert_eq!(res, vec![b"ab", b"cd"]); + } +} \ No newline at end of file From 09dada409a238884f6d776ab1371cdd12d030d8a Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 9 Feb 2023 15:44:02 +0100 Subject: [PATCH 002/207] ported loading to Rust --- Cargo.toml | 6 +- src/lib.rs | 15 ++++ src/load.rs | 153 +++++++++++++++++++++++++++++++++--- tests/test_simple_public.py | 15 ++++ tiktoken/load.py | 5 ++ 5 files changed, 184 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6ab3a53c..35c270f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,13 +10,17 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.17.3", features = ["extension-module"] } +jni = "0.20.0" # tiktoken dependencies fancy-regex = "0.10.0" regex = "1.7.0" rustc-hash = "1.1.0" bstr = "1.0.1" -linefeed = "0.6" +reqwest = { version = "0.11.14", features = ["blocking"] } +sha1 = "0.10.5" +json = "0.12.4" +base64 = "0.21.0" [profile.release] incremental = true diff --git a/src/lib.rs b/src/lib.rs index 7f0edd89..0da1eca8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ use rustc_hash::FxHashMap as HashMap; mod util; mod core; +mod load; #[pyclass] struct CoreBPE { @@ -91,9 +92,23 @@ impl CoreBPE { } } +// pub fn py_data_gym_to_mergable_bpe_ranks(py: Python, vocab_bpe_file: &str, encoder_json_file: &str) -> PyResult> { +#[pyfunction] +pub fn py_data_gym_to_mergable_bpe_ranks(py: Python, vocab_bpe_file: &str, encoder_json_file: &str) -> PyResult, usize>> { + let ranks = load::data_gym_to_mergeable_bpe_ranks(vocab_bpe_file, encoder_json_file) + .map_err(|e| PyErr::new::(e.to_string()))?; + + Ok(ranks) + // Ok(ranks + // .iter() + // .map(|(k, v)| (PyBytes::new(py, k).into(), *v)) + // .collect::>()) +} + #[pymodule] fn _tiktoken(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; + m.add_function(wrap_pyfunction!(crate::py_data_gym_to_mergable_bpe_ranks, m)?)?; Ok(()) } diff --git a/src/load.rs b/src/load.rs index 2792aadd..0db3fb1b 100644 --- a/src/load.rs +++ b/src/load.rs @@ -1,20 +1,88 @@ use rustc_hash::FxHashMap as HashMap; -use linefeed::chars::is_printable; +use std::{env, path::PathBuf}; +use sha1::{Sha1, Digest}; +use std::error::Error; +use json; -pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: String, encoder_json_file: String) -> HashMap, usize> { - let rank_to_intbyte = (0..256) - .filter(|x| is_printable(x) && x != ' ') +fn read_file(blobpath: &str) -> Result, Box> { + // TODO: support blobs? + + if !(blobpath.starts_with("http") || blobpath.starts_with("https")) { + return Ok(std::fs::read(blobpath)?); + } + + Ok(reqwest::blocking::get(blobpath)?.bytes()?.to_vec()) +} + +fn get_tiktoken_cache_dir() -> PathBuf { + match env::var_os("TIKTOKEN_CACHE_DIR") { + Some(v) => PathBuf::from(v), + None => { + match env::var_os("DATA_GYM_CACHE_DIR") { + Some(v) => PathBuf::from(v), + None => { + let mut temp_dir = env::temp_dir(); + temp_dir.push("data-gym-cache"); + + temp_dir + } + } + } + } +} + +fn sha1_as_hex(s: &str) -> String { + let mut hasher = Sha1::new(); + hasher.update(s.as_bytes()); + let result = hasher.finalize(); + + format!("{:x}", result) +} + +fn read_file_cached(blobpath: &str) -> Result, Box> { + let mut cache_path = get_tiktoken_cache_dir(); + + if !cache_path.exists() { + std::fs::create_dir_all(&cache_path)?; + // return read_file(blobpath); + } + + cache_path.push(sha1_as_hex(blobpath)); + + println!("cache_path: {:?}", cache_path); + + if cache_path.exists() { + let catch_path_str = cache_path.into_os_string().into_string() + .or(Err(format!("Unable to convert path")))?; // TODO: how to include path here!? + return read_file(&catch_path_str); + } + + let content = read_file(blobpath)?; + + std::fs::write(cache_path, &content)?; + + Ok(content) +} + +fn is_printable(u: u8) -> bool { + // printable ascii characters according to python + !(u <= 31 || (u >= 127 && u <= 160) || u == 173) +} + +pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: &str) -> Result, usize>, Box> { + let mut rank_to_intbyte = (0..=255) + .filter(|x| is_printable(*x) && (*x as char) != ' ') .collect::>(); let mut data_gym_byte_to_byte = HashMap::default(); for b in rank_to_intbyte.iter() { - data_gym_byte_to_byte.insert(b, b); + data_gym_byte_to_byte.insert(*b as u32, *b); } let mut n = 0; - for b in 0..256 { - if !rank_to_intbyte.contains(b) { + for b in 0..=255 { + if !rank_to_intbyte.contains(&b) { rank_to_intbyte.push(b); data_gym_byte_to_byte.insert(256 + n, b); n += 1; @@ -22,5 +90,72 @@ pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: String, encoder_json_file } assert!(rank_to_intbyte.len() == 256); - ranks -} data_gym_to_mergeable_bpe_ranks \ No newline at end of file + // vocab_bpe contains the merges along with associated ranks + let cached_vocab = read_file_cached(vocab_bpe_file)?; + let vocab_bpe_contents = std::str::from_utf8(&cached_vocab)? + .split("\n").collect::>(); + + let bpe_merges = &vocab_bpe_contents[1..(vocab_bpe_contents.len() - 1)] + .iter() + .map(|&s| s.split_whitespace()) + // TODO: would be nice to propagate the error?! + .map(|mut sp| (sp.next().unwrap(), sp.next().unwrap())) + .collect::>(); + + let decode_data_gym = + |value: &str| value.chars().map(|c| { + if !data_gym_byte_to_byte.contains_key(&(c as u32)) { + panic!("Unknown character: {} {}", c, c as u32); + } + + data_gym_byte_to_byte[&(c as u32)] + } ).collect::>(); + + // # add the single byte tokens + let mut bpe_ranks = + rank_to_intbyte + .iter() + .enumerate() + .map(|(i, b)| (vec![*b], i)) + .collect::, usize>>(); + + // add the merged tokens + let mut n = bpe_ranks.len(); + for (first, second) in bpe_merges { + bpe_ranks.insert([decode_data_gym(first), decode_data_gym(second)].concat(), n); + n += 1; + } + + // check that the encoder file matches the merges file + // this sanity check is important since tiktoken assumes that ranks are ordered the same + // as merge priority + let cached_encoder = read_file_cached(encoder_json_file)?; + let encoder_json = json::parse(&std::str::from_utf8(&cached_encoder)?)?; + + let mut encoder_json_loaded = encoder_json.entries() + .map(|(k, v)| (decode_data_gym(k), v.as_usize().unwrap())) + .collect::, usize>>(); + + // drop these two special tokens if present, since they're not mergeable bpe tokens + encoder_json_loaded.remove(&decode_data_gym("<|endoftext|>")); + encoder_json_loaded.remove(&decode_data_gym("<|startoftext|>")); + + assert!(bpe_ranks == encoder_json_loaded); + + Ok(bpe_ranks) +} + +pub fn load_tiktoken_bpe(tiktoken_bpe_file: &str) -> Result, usize>, Box> { + use base64::{engine::general_purpose, Engine as _}; + + let content = read_file_cached(tiktoken_bpe_file)?; + + Ok(std::str::from_utf8(&content)? + .lines() + .filter(|s| s.len() > 0) + .map(|s| s.split_whitespace()) + .map(|mut sp| (sp.next().unwrap(), sp.next().unwrap())) + .map(|(first, second)| (general_purpose::STANDARD.decode(&first).unwrap(), second.parse::().unwrap())) + .collect::, usize>>()) +} + diff --git a/tests/test_simple_public.py b/tests/test_simple_public.py index 44109234..ab63babd 100644 --- a/tests/test_simple_public.py +++ b/tests/test_simple_public.py @@ -24,3 +24,18 @@ def test_encoding_for_model(): assert enc.name == "gpt2" enc = tiktoken.encoding_for_model("text-davinci-003") assert enc.name == "p50k_base" + +def test_loading(): + x = tiktoken.load.data_gym_to_mergeable_bpe_ranks( + vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe", + encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json", + ) + + print(len(x)) + + y = tiktoken._tiktoken.py_data_gym_to_mergable_bpe_ranks( + vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe", + encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json", + ) + + print(len(y)) \ No newline at end of file diff --git a/tiktoken/load.py b/tiktoken/load.py index c5881068..c8f3dbd9 100644 --- a/tiktoken/load.py +++ b/tiktoken/load.py @@ -55,6 +55,7 @@ def data_gym_to_mergeable_bpe_ranks( # NB: do not add caching to this function rank_to_intbyte = [b for b in range(2**8) if chr(b).isprintable() and chr(b) != " "] + print(f"rank_to_intbyte: {len(rank_to_intbyte)}") data_gym_byte_to_byte = {chr(b): b for b in rank_to_intbyte} n = 0 for b in range(2**8): @@ -73,6 +74,10 @@ def decode_data_gym(value: str) -> bytes: # add the single byte tokens bpe_ranks = {bytes([b]): i for i, b in enumerate(rank_to_intbyte)} + + # print(len(rank_to_intbyte)) + print(f"py data gym: {len(data_gym_byte_to_byte)} '{data_gym_byte_to_byte[chr(288)]}'") + # add the merged tokens n = len(bpe_ranks) for first, second in bpe_merges: From 4c6afcb31428e397a2bbdcae91408556f352153f Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 9 Feb 2023 16:12:16 +0100 Subject: [PATCH 003/207] fixed? --- src/load.rs | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/load.rs b/src/load.rs index 0db3fb1b..c919c3b2 100644 --- a/src/load.rs +++ b/src/load.rs @@ -5,7 +5,9 @@ use sha1::{Sha1, Digest}; use std::error::Error; use json; -fn read_file(blobpath: &str) -> Result, Box> { +type Result = std::result::Result>; + +fn read_file(blobpath: &str) -> Result> { // TODO: support blobs? if !(blobpath.starts_with("http") || blobpath.starts_with("https")) { @@ -40,12 +42,11 @@ fn sha1_as_hex(s: &str) -> String { format!("{:x}", result) } -fn read_file_cached(blobpath: &str) -> Result, Box> { +fn read_file_cached(blobpath: &str) -> Result> { let mut cache_path = get_tiktoken_cache_dir(); if !cache_path.exists() { std::fs::create_dir_all(&cache_path)?; - // return read_file(blobpath); } cache_path.push(sha1_as_hex(blobpath)); @@ -54,7 +55,11 @@ fn read_file_cached(blobpath: &str) -> Result, Box> { if cache_path.exists() { let catch_path_str = cache_path.into_os_string().into_string() - .or(Err(format!("Unable to convert path")))?; // TODO: how to include path here!? + .or(Err( { + // let cache_path_lossy_str = cache_path.to_string_lossy().to_string(); + // format!("Unable to convert path {cache_path_lossy_str}") + format!("Unable to convert path") + }))?; return read_file(&catch_path_str); } @@ -70,15 +75,15 @@ fn is_printable(u: u8) -> bool { !(u <= 31 || (u >= 127 && u <= 160) || u == 173) } -pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: &str) -> Result, usize>, Box> { +pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: &str) -> Result, usize>> { let mut rank_to_intbyte = (0..=255) .filter(|x| is_printable(*x) && (*x as char) != ' ') .collect::>(); - let mut data_gym_byte_to_byte = HashMap::default(); - for b in rank_to_intbyte.iter() { - data_gym_byte_to_byte.insert(*b as u32, *b); - } + let mut data_gym_byte_to_byte = rank_to_intbyte + .into_iter() + .map(|x| (x as u32, x)) + .collect::>(); let mut n = 0; for b in 0..=255 { @@ -98,8 +103,11 @@ pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: let bpe_merges = &vocab_bpe_contents[1..(vocab_bpe_contents.len() - 1)] .iter() .map(|&s| s.split_whitespace()) - // TODO: would be nice to propagate the error?! - .map(|mut sp| (sp.next().unwrap(), sp.next().unwrap())) + .map(|mut sp| sp.take(2).collect::>()) + .filter(|v| v.len() == 2) + .map(|v| (v[0], v[1])) + // .map(|mut sp| (sp.next()?, sp.next()?)) + // .map(|mut sp| (sp.next().unwrap(), sp.next().unwrap())) .collect::>(); let decode_data_gym = @@ -145,7 +153,7 @@ pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: Ok(bpe_ranks) } -pub fn load_tiktoken_bpe(tiktoken_bpe_file: &str) -> Result, usize>, Box> { +pub fn load_tiktoken_bpe(tiktoken_bpe_file: &str) -> Result, usize>> { use base64::{engine::general_purpose, Engine as _}; let content = read_file_cached(tiktoken_bpe_file)?; From 502a6ec00e3ba004030a27539b20c834df8e95c8 Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Thu, 9 Feb 2023 10:21:57 -0500 Subject: [PATCH 004/207] Propagate errors from split whitespace in bpe_merges --- src/load.rs | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/load.rs b/src/load.rs index c919c3b2..874cb661 100644 --- a/src/load.rs +++ b/src/load.rs @@ -28,7 +28,7 @@ fn get_tiktoken_cache_dir() -> PathBuf { temp_dir.push("data-gym-cache"); temp_dir - } + } } } } @@ -50,12 +50,12 @@ fn read_file_cached(blobpath: &str) -> Result> { } cache_path.push(sha1_as_hex(blobpath)); - + println!("cache_path: {:?}", cache_path); if cache_path.exists() { let catch_path_str = cache_path.into_os_string().into_string() - .or(Err( { + .or(Err( { // let cache_path_lossy_str = cache_path.to_string_lossy().to_string(); // format!("Unable to convert path {cache_path_lossy_str}") format!("Unable to convert path") @@ -100,15 +100,18 @@ pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: let vocab_bpe_contents = std::str::from_utf8(&cached_vocab)? .split("\n").collect::>(); - let bpe_merges = &vocab_bpe_contents[1..(vocab_bpe_contents.len() - 1)] + let bpe_merges = match vocab_bpe_contents[1..(vocab_bpe_contents.len() - 1)] .iter() .map(|&s| s.split_whitespace()) - .map(|mut sp| sp.take(2).collect::>()) - .filter(|v| v.len() == 2) - .map(|v| (v[0], v[1])) - // .map(|mut sp| (sp.next()?, sp.next()?)) - // .map(|mut sp| (sp.next().unwrap(), sp.next().unwrap())) - .collect::>(); + .map(|mut sp| match (sp.next(), sp.next()) { + (Some(a), Some(b)) => Some((a, b)), + _ => None, + }) + .collect::>>() + { + Some(v) => v, + None => return Err("Unable to parse vocab_bpe file".into()), + }; let decode_data_gym = |value: &str| value.chars().map(|c| { @@ -126,7 +129,7 @@ pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: .enumerate() .map(|(i, b)| (vec![*b], i)) .collect::, usize>>(); - + // add the merged tokens let mut n = bpe_ranks.len(); for (first, second) in bpe_merges { @@ -143,7 +146,7 @@ pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: let mut encoder_json_loaded = encoder_json.entries() .map(|(k, v)| (decode_data_gym(k), v.as_usize().unwrap())) .collect::, usize>>(); - + // drop these two special tokens if present, since they're not mergeable bpe tokens encoder_json_loaded.remove(&decode_data_gym("<|endoftext|>")); encoder_json_loaded.remove(&decode_data_gym("<|startoftext|>")); From c685d9982cd22b3cfcdb4e8b6c180971b8935f87 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 9 Feb 2023 18:19:25 +0100 Subject: [PATCH 005/207] moving registry into rust --- Cargo.toml | 1 + src/core.rs | 24 ++++++++++--- src/lib.rs | 34 +++++++++++++++++- src/load.rs | 11 +++--- src/openai_public.rs | 86 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 144 insertions(+), 12 deletions(-) create mode 100644 src/openai_public.rs diff --git a/Cargo.toml b/Cargo.toml index 35c270f4..b3290a7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ reqwest = { version = "0.11.14", features = ["blocking"] } sha1 = "0.10.5" json = "0.12.4" base64 = "0.21.0" +lazy_static = "1.4.0" [profile.release] incremental = true diff --git a/src/core.rs b/src/core.rs index ab0dc5eb..a5744428 100644 --- a/src/core.rs +++ b/src/core.rs @@ -116,7 +116,8 @@ impl CoreBPENative { ret } - pub fn _encode_native(&self, text: &str, allowed_special: &HashSet<&str>) -> (Vec, usize) { + pub fn _encode_native(&self, text: &str, allowed_special: &HashSet<&str>, max_tokens: Option) -> (Vec, usize, usize) { + let max_tokens = max_tokens.unwrap_or(usize::MAX); let special_regex = self._get_tl_special_regex(); let regex = self._get_tl_regex(); let mut ret = vec![]; @@ -147,11 +148,20 @@ impl CoreBPENative { if let Some(token) = self.encoder.get(piece) { last_piece_token_len = 1; ret.push(*token); + + if ret.len() >= max_tokens { + return (ret, last_piece_token_len, start); + } continue; } let tokens = util::byte_pair_encode(piece, &self.encoder); last_piece_token_len = tokens.len(); - ret.extend(&tokens); + for token in tokens { + ret.push(token); + if ret.len() >= max_tokens { + return (ret, last_piece_token_len, start); + } + } } match next_special { @@ -160,8 +170,12 @@ impl CoreBPENative { let piece = m.as_str(); let token = self.special_tokens_encoder[piece]; ret.push(token); + start = m.end(); last_piece_token_len = 0; + if ret.len() >= max_tokens { + return (ret, last_piece_token_len, start); + } } None => break, } @@ -169,7 +183,7 @@ impl CoreBPENative { // last_piece_token_len is how many tokens came from the last regex split. This is used // for determining unstable tokens, since you can't merge across (stable) regex splits - (ret, last_piece_token_len) + (ret, last_piece_token_len, start) } pub fn _encode_bytes(&self, bytes: &[u8]) -> Vec { @@ -177,7 +191,7 @@ impl CoreBPENative { Ok(text) => self._encode_ordinary_native(text), Err(e) => { let text = unsafe { std::str::from_utf8_unchecked(&bytes[..e.valid_up_to()]) }; - let (tokens, last_piece_token_len) = self._encode_native(text, &HashSet::new()); + let (tokens, last_piece_token_len, _) = self._encode_native(text, &HashSet::new(), None); let (mut tokens, last_piece_token_len) = self._increase_last_piece_token_len(tokens, last_piece_token_len); if !tokens.is_empty() && last_piece_token_len > 0 { @@ -241,7 +255,7 @@ impl CoreBPENative { text: &str, allowed_special: &HashSet<&str>, ) -> (Vec, HashSet>) { - let (tokens, last_piece_token_len) = self._encode_native(text, allowed_special); + let (tokens, last_piece_token_len, _) = self._encode_native(text, allowed_special, None); if last_piece_token_len == 0 { // If last_piece_token_len is zero, the last token was a special token and we have // no unstable bytes diff --git a/src/lib.rs b/src/lib.rs index 0da1eca8..c69b2e3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,10 @@ use rustc_hash::FxHashMap as HashMap; mod util; mod core; mod load; +mod openai_public; + +#[macro_use] +extern crate lazy_static; #[pyclass] struct CoreBPE { @@ -40,7 +44,7 @@ impl CoreBPE { } fn encode(&self, py: Python, text: &str, allowed_special: HashSet<&str>) -> Vec { - py.allow_threads(|| self.native._encode_native(text, &allowed_special).0) + py.allow_threads(|| self.native._encode_native(text, &allowed_special, None).0) } fn _encode_bytes(&self, py: Python, bytes: &[u8]) -> Vec { @@ -112,3 +116,31 @@ fn _tiktoken(_py: Python, m: &PyModule) -> PyResult<()> { Ok(()) } +use jni::JNIEnv; +// These objects are what you should use as arguments to your native +// function. They carry extra lifetime information to prevent them escaping +// this context and getting used after being GC'd. +use jni::objects::{JClass, JString}; + +// This is just a pointer. We'll be returning it from our function. We +// can't return one of the objects with lifetime information because the +// lifetime checker won't let us. +use jni::sys::jstring; + +// pub extern "system" fn Java_tiktoken_Encoding_encode(env: JNIEnv, +// class: JClass, +// input: JString) +// -> jstring { +// // First, we have to get the string out of Java. Check out the `strings` +// // module for more info on how this works. +// let input: String = +// env.get_string(input).expect("Couldn't get java string!").into(); + +// // Then we have to create a new Java string to return. Again, more info +// // in the `strings` module. +// let output = env.new_string(format!("Hello, {}!", input)) +// .expect("Couldn't create java string!"); + +// // Finally, extract the raw pointer to return. +// output.into_inner() +// } \ No newline at end of file diff --git a/src/load.rs b/src/load.rs index 874cb661..4d1e5176 100644 --- a/src/load.rs +++ b/src/load.rs @@ -81,8 +81,8 @@ pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: .collect::>(); let mut data_gym_byte_to_byte = rank_to_intbyte - .into_iter() - .map(|x| (x as u32, x)) + .iter() + .map(|&x| (x as u32, x)) .collect::>(); let mut n = 0; @@ -115,10 +115,9 @@ pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: let decode_data_gym = |value: &str| value.chars().map(|c| { - if !data_gym_byte_to_byte.contains_key(&(c as u32)) { - panic!("Unknown character: {} {}", c, c as u32); - } - + // if !data_gym_byte_to_byte.contains_key(&(c as u32)) { + // panic!("Unknown character: {} {}", c, c as u32); + // } data_gym_byte_to_byte[&(c as u32)] } ).collect::>(); diff --git a/src/openai_public.rs b/src/openai_public.rs new file mode 100644 index 00000000..dc0007fa --- /dev/null +++ b/src/openai_public.rs @@ -0,0 +1,86 @@ + +use rustc_hash::FxHashMap as HashMap; +use std::error::Error; + +#[path = "load.rs"] +mod load; + +type Result = std::result::Result>; + +lazy_static! { + static ref REGISTRY: HashMap = [ + EncodingLazy::new( + "gpt2".into(), + 50257, + r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), + [ + ("<|endoftext|>".into(), 50256), + ].into_iter().collect(), + EncoderLoadingStrategy::DataGym( + DataGymDef { + vocab_bpe_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe".to_string(), + encoder_json_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json".to_string() + } + )) + ] + .into_iter() + .map(|enc| (enc.name.clone(), enc)) + .collect::>(); +} +struct DataGymDef { + vocab_bpe_file: String, + encoder_json_file: String, +} + +enum EncoderLoadingStrategy { + BPE(String), + DataGym(DataGymDef), +} + +struct EncodingLazy { + name: String, + explicit_n_vocab: usize, + pat_str: String, + special_tokens: HashMap, + mergeable_ranks: Option, usize>>, + loading_strategy: EncoderLoadingStrategy, +} + +impl EncodingLazy { + fn new(name: String, + explicit_n_vocab: usize, + pat_str: String, + special_tokens: HashMap, + loading_strategy: EncoderLoadingStrategy) -> Self { + EncodingLazy { + name, + explicit_n_vocab, + pat_str, + special_tokens, + mergeable_ranks: None, + loading_strategy + } + } + + fn get(&mut self) -> Result<&HashMap, usize>> { + if self.mergeable_ranks.is_none() { + self.mergeable_ranks = Some(match &self.loading_strategy { + EncoderLoadingStrategy::BPE(path) => Self::load_bpe(&path)?, + EncoderLoadingStrategy::DataGym(def) => Self::load_data_gym(&def)?, + }) + } + + Ok(&self.mergeable_ranks.unwrap()) + } + + fn load_bpe(path: &str) -> Result, usize>> { + load::load_tiktoken_bpe(path) + } + + fn load_data_gym(def: &DataGymDef) -> Result, usize>> { + load::data_gym_to_mergeable_bpe_ranks(&def.vocab_bpe_file, &def.encoder_json_file) + } +} + + + From 6dcfa9136478137861b712bf5298e2987af3d8e6 Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Thu, 9 Feb 2023 14:52:59 -0500 Subject: [PATCH 006/207] return ref --- src/openai_public.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/openai_public.rs b/src/openai_public.rs index dc0007fa..ff10c86a 100644 --- a/src/openai_public.rs +++ b/src/openai_public.rs @@ -11,7 +11,7 @@ lazy_static! { static ref REGISTRY: HashMap = [ EncodingLazy::new( "gpt2".into(), - 50257, + 50257, r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), [ ("<|endoftext|>".into(), 50256), @@ -19,7 +19,7 @@ lazy_static! { EncoderLoadingStrategy::DataGym( DataGymDef { vocab_bpe_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe".to_string(), - encoder_json_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json".to_string() + encoder_json_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json".to_string() } )) ] @@ -41,13 +41,13 @@ struct EncodingLazy { name: String, explicit_n_vocab: usize, pat_str: String, - special_tokens: HashMap, + special_tokens: HashMap, mergeable_ranks: Option, usize>>, loading_strategy: EncoderLoadingStrategy, } impl EncodingLazy { - fn new(name: String, + fn new(name: String, explicit_n_vocab: usize, pat_str: String, special_tokens: HashMap, @@ -70,7 +70,7 @@ impl EncodingLazy { }) } - Ok(&self.mergeable_ranks.unwrap()) + Ok(self.mergeable_ranks.as_ref().expect("mergeable_ranks should be loaded by now")) } fn load_bpe(path: &str) -> Result, usize>> { From 25306a5a5b4d94a03d05927ff008f108e0ee0af6 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Fri, 10 Feb 2023 20:21:27 +0100 Subject: [PATCH 007/207] split into multiple libs --- Cargo.toml | 31 +------ core/Cargo.toml | 24 +++++ src/core.rs => core/src/lib.rs | 6 +- {src => core/src}/load.rs | 0 {src => core/src}/openai_public.rs | 46 +++++++++- {src => core/src}/util.rs | 0 java/pom.xml | 91 +++++++++++++++++++ java/src/main/java/tiktoken/Encoding.java | 21 +++++ java/src/test/java/tiktoken/EncodingTest.java | 16 ++++ jni/src/lib.rs | 50 ++++++++++ python/Cargo.toml | 17 ++++ {src => python/src}/lib.rs | 55 +---------- setup.py | 1 + 13 files changed, 278 insertions(+), 80 deletions(-) create mode 100644 core/Cargo.toml rename src/core.rs => core/src/lib.rs (99%) rename {src => core/src}/load.rs (100%) rename {src => core/src}/openai_public.rs (59%) rename {src => core/src}/util.rs (100%) create mode 100644 java/pom.xml create mode 100644 java/src/main/java/tiktoken/Encoding.java create mode 100644 java/src/test/java/tiktoken/EncodingTest.java create mode 100644 jni/src/lib.rs create mode 100644 python/Cargo.toml rename {src => python/src}/lib.rs (54%) diff --git a/Cargo.toml b/Cargo.toml index b3290a7e..abe35ef4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,27 +1,6 @@ -[package] -name = "tiktoken" -version = "0.2.0" -edition = "2021" -rust-version = "1.57.0" +[workspace] -[lib] -name = "_tiktoken" -crate-type = ["cdylib"] - -[dependencies] -pyo3 = { version = "0.17.3", features = ["extension-module"] } -jni = "0.20.0" - -# tiktoken dependencies -fancy-regex = "0.10.0" -regex = "1.7.0" -rustc-hash = "1.1.0" -bstr = "1.0.1" -reqwest = { version = "0.11.14", features = ["blocking"] } -sha1 = "0.10.5" -json = "0.12.4" -base64 = "0.21.0" -lazy_static = "1.4.0" - -[profile.release] -incremental = true +members = [ + "core", + "python", +] \ No newline at end of file diff --git a/core/Cargo.toml b/core/Cargo.toml new file mode 100644 index 00000000..d57371e8 --- /dev/null +++ b/core/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "tiktoken_core" +version = "0.2.0" +edition = "2021" +rust-version = "1.57.0" + +[lib] +name = "_tiktoken_core" +crate-type = ["lib"] + +[dependencies] +# tiktoken dependencies +fancy-regex = "0.10.0" +regex = "1.7.0" +rustc-hash = "1.1.0" +bstr = "1.0.1" +reqwest = { version = "0.11.14", features = ["blocking"] } +sha1 = "0.10.5" +json = "0.12.4" +base64 = "0.21.0" +lazy_static = "1.4.0" + +[profile.release] +incremental = true diff --git a/src/core.rs b/core/src/lib.rs similarity index 99% rename from src/core.rs rename to core/src/lib.rs index a5744428..2942b571 100644 --- a/src/core.rs +++ b/core/src/lib.rs @@ -4,8 +4,12 @@ use std::thread; use fancy_regex::Regex; use rustc_hash::FxHashMap as HashMap; -#[path = "util.rs"] mod util; +mod load; +mod openai_public; + +#[macro_use] +extern crate lazy_static; const MAX_NUM_THREADS: usize = 128; diff --git a/src/load.rs b/core/src/load.rs similarity index 100% rename from src/load.rs rename to core/src/load.rs diff --git a/src/openai_public.rs b/core/src/openai_public.rs similarity index 59% rename from src/openai_public.rs rename to core/src/openai_public.rs index ff10c86a..d7896942 100644 --- a/src/openai_public.rs +++ b/core/src/openai_public.rs @@ -8,7 +8,8 @@ mod load; type Result = std::result::Result>; lazy_static! { - static ref REGISTRY: HashMap = [ + pub static ref REGISTRY: HashMap = [ + // TODO EncodingLazy::new( "gpt2".into(), 50257, @@ -26,7 +27,50 @@ lazy_static! { .into_iter() .map(|enc| (enc.name.clone(), enc)) .collect::>(); + + pub static ref MODEL_TO_ENCODING: HashMap = [ + // text + ("text-davinci-003", "p50k_base"), + ("text-davinci-002", "p50k_base"), + ("text-davinci-001", "r50k_base"), + ("text-curie-001", "r50k_base"), + ("text-babbage-001", "r50k_base"), + ("text-ada-001", "r50k_base"), + ("davinci", "r50k_base"), + ("curie", "r50k_base"), + ("babbage", "r50k_base"), + ("ada", "r50k_base"), + // code + ("code-davinci-002", "p50k_base"), + ("code-davinci-001", "p50k_base"), + ("code-cushman-002", "p50k_base"), + ("code-cushman-001", "p50k_base"), + ("davinci-codex", "p50k_base"), + ("cushman-codex", "p50k_base"), + // edit + ("text-davinci-edit-001", "p50k_edit"), + ("code-davinci-edit-001", "p50k_edit"), + // embeddings + ("text-embedding-ada-002", "cl100k_base"), + // old embeddings + ("text-similarity-davinci-001", "r50k_base"), + ("text-similarity-curie-001", "r50k_base"), + ("text-similarity-babbage-001", "r50k_base"), + ("text-similarity-ada-001", "r50k_base"), + ("text-search-davinci-doc-001", "r50k_base"), + ("text-search-curie-doc-001", "r50k_base"), + ("text-search-babbage-doc-001", "r50k_base"), + ("text-search-ada-doc-001", "r50k_base"), + ("code-search-babbage-code-001", "r50k_base"), + ("code-search-ada-code-001", "r50k_base"), + // open source + ("gpt2", "gpt2"), + ] + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect::>(); } + struct DataGymDef { vocab_bpe_file: String, encoder_json_file: String, diff --git a/src/util.rs b/core/src/util.rs similarity index 100% rename from src/util.rs rename to core/src/util.rs diff --git a/java/pom.xml b/java/pom.xml new file mode 100644 index 00000000..ead3f91c --- /dev/null +++ b/java/pom.xml @@ -0,0 +1,91 @@ + + + + 4.0.0 + + tiktoken + tiktoken + 1.0-SNAPSHOT + + tiktoken + https://github.com/openai/tiktoken + + + UTF-8 + 1.7 + 1.7 + + + + + junit + junit + 4.11 + test + + + + + + + + + maven-clean-plugin + 3.1.0 + + + + maven-resources-plugin + 3.0.2 + + + maven-compiler-plugin + 3.8.0 + + + maven-surefire-plugin + 2.22.1 + + + maven-jar-plugin + 3.0.2 + + + maven-install-plugin + 2.5.2 + + + maven-deploy-plugin + 2.8.2 + + + + maven-site-plugin + 3.7.1 + + + maven-project-info-reports-plugin + 3.0.0 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.17 + + + surefire-test + test + + test + + + + + -Djava.library.path=${project.build.directory}/bin/natives/linux_64 + + + + + + diff --git a/java/src/main/java/tiktoken/Encoding.java b/java/src/main/java/tiktoken/Encoding.java new file mode 100644 index 00000000..f4ecbbf6 --- /dev/null +++ b/java/src/main/java/tiktoken/Encoding.java @@ -0,0 +1,21 @@ +package tiktoken; + +import java.util.Set; + +public class Encoding +{ + static { + System.loadLibrary("_tiktoken"); + } + + // initialized by init + private long handle; + + private native void init(String modelName); + + public native long[] encode(String text, Set allowedSpecialTokens, long maxTokenLength); + + public Encoding(String modelName) { + init(modelName); + } +} diff --git a/java/src/test/java/tiktoken/EncodingTest.java b/java/src/test/java/tiktoken/EncodingTest.java new file mode 100644 index 00000000..8c2eba46 --- /dev/null +++ b/java/src/test/java/tiktoken/EncodingTest.java @@ -0,0 +1,16 @@ +package tiktoken; + +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +public class EncodingTest +{ + @Test + public void shouldAnswerWithTrue() + { + Encoding encoding = new Encoding("text-davinci-001"); + + assertTrue( true ); + } +} diff --git a/jni/src/lib.rs b/jni/src/lib.rs new file mode 100644 index 00000000..56e73d5d --- /dev/null +++ b/jni/src/lib.rs @@ -0,0 +1,50 @@ + +use jni::JNIEnv; +// These objects are what you should use as arguments to your native +// function. They carry extra lifetime information to prevent them escaping +// this context and getting used after being GC'd. +use jni::objects::{JClass, JObject, JString}; + +// This is just a pointer. We'll be returning it from our function. We +// can't return one of the objects with lifetime information because the +// lifetime checker won't let us. +use jni::sys::{jstring, jlong}; + +pub extern "system" fn Java_tiktoken_Encoding_init( + env: JNIEnv, + obj: JObject, + model_name: JString +) { + use openai_public::{REGISTRY, MODEL_TO_ENCODING}; + + // First, we have to get the string out of Java. Check out the `strings` + // module for more info on how this works. + let model_name: String = + env.get_string(model_name).expect("Unable to get Java model name").into(); + + let encoding_name = openai_public::MODEL_TO_ENCODING.get(&model_name).expect("Unable to find model"); + + let encoding = openai_public::REGISTRY.get(encoding_name).expect("Unable to find encoding"); + + let encoding_ptr = Box::into_raw(Box::new(encoding)) as jlong; + + env.set_field(obj, "handle", "J", jni::objects::JValue::Long(encoding_ptr)).expect("Unable to store handle"); +} + +// pub extern "system" fn Java_tiktoken_Encoding_encode(env: JNIEnv, +// class: JClass, +// input: JString) +// -> jstring { +// // First, we have to get the string out of Java. Check out the `strings` +// // module for more info on how this works. +// let input: String = +// env.get_string(input).expect("Couldn't get java string!").into(); + +// // Then we have to create a new Java string to return. Again, more info +// // in the `strings` module. +// let output = env.new_string(format!("Hello, {}!", input)) +// .expect("Couldn't create java string!"); + +// // Finally, extract the raw pointer to return. +// output.into_inner() +// } \ No newline at end of file diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 00000000..e674905f --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "tiktoken" +version = "0.2.0" +edition = "2021" +rust-version = "1.57.0" + +[lib] +name = "_tiktoken" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.17.3", features = ["extension-module"] } +tiktoken_core = { path = "../core" } +rustc-hash = "1.1.0" + +[profile.release] +incremental = true diff --git a/src/lib.rs b/python/src/lib.rs similarity index 54% rename from src/lib.rs rename to python/src/lib.rs index c69b2e3e..599105e2 100644 --- a/src/lib.rs +++ b/python/src/lib.rs @@ -1,4 +1,3 @@ -// This check is new and seems buggy (possibly with PyO3 interaction) #![allow(clippy::borrow_deref_ref)] use std::collections::HashSet; @@ -9,17 +8,11 @@ use pyo3::types::{PyBytes, PyList, PyTuple}; use pyo3::PyResult; use rustc_hash::FxHashMap as HashMap; -mod util; -mod core; -mod load; -mod openai_public; - -#[macro_use] -extern crate lazy_static; +use _tiktoken_core::CoreBPENative; #[pyclass] struct CoreBPE { - native: core::CoreBPENative, + native: CoreBPENative, } #[pymethods] @@ -30,7 +23,7 @@ impl CoreBPE { special_tokens_encoder: HashMap, pattern: &str, ) -> PyResult { - let native = core::CoreBPENative::new(encoder, special_tokens_encoder, pattern) + let native = CoreBPENative::new(encoder, special_tokens_encoder, pattern) .map_err(|e| PyErr::new::(e.to_string()))?; Ok(CoreBPE { native }) } @@ -96,51 +89,9 @@ impl CoreBPE { } } -// pub fn py_data_gym_to_mergable_bpe_ranks(py: Python, vocab_bpe_file: &str, encoder_json_file: &str) -> PyResult> { -#[pyfunction] -pub fn py_data_gym_to_mergable_bpe_ranks(py: Python, vocab_bpe_file: &str, encoder_json_file: &str) -> PyResult, usize>> { - let ranks = load::data_gym_to_mergeable_bpe_ranks(vocab_bpe_file, encoder_json_file) - .map_err(|e| PyErr::new::(e.to_string()))?; - - Ok(ranks) - // Ok(ranks - // .iter() - // .map(|(k, v)| (PyBytes::new(py, k).into(), *v)) - // .collect::>()) -} #[pymodule] fn _tiktoken(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; - m.add_function(wrap_pyfunction!(crate::py_data_gym_to_mergable_bpe_ranks, m)?)?; Ok(()) } - -use jni::JNIEnv; -// These objects are what you should use as arguments to your native -// function. They carry extra lifetime information to prevent them escaping -// this context and getting used after being GC'd. -use jni::objects::{JClass, JString}; - -// This is just a pointer. We'll be returning it from our function. We -// can't return one of the objects with lifetime information because the -// lifetime checker won't let us. -use jni::sys::jstring; - -// pub extern "system" fn Java_tiktoken_Encoding_encode(env: JNIEnv, -// class: JClass, -// input: JString) -// -> jstring { -// // First, we have to get the string out of Java. Check out the `strings` -// // module for more info on how this works. -// let input: String = -// env.get_string(input).expect("Couldn't get java string!").into(); - -// // Then we have to create a new Java string to return. Again, more info -// // in the `strings` module. -// let output = env.new_string(format!("Hello, {}!", input)) -// .expect("Couldn't create java string!"); - -// // Finally, extract the raw pointer to return. -// output.into_inner() -// } \ No newline at end of file diff --git a/setup.py b/setup.py index a22e8e5d..a004c653 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ # Between our use of editable installs and wanting to use Rust for performance sensitive # code, it makes sense to just always use --release debug=False, + args=["-p", "python"], ) ], package_data={"tiktoken": ["py.typed"]}, From ba69bd915dd90b36c39da2a15c08d6abddf515f5 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Fri, 10 Feb 2023 20:30:23 +0100 Subject: [PATCH 008/207] fixed core --- core/src/openai_public.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/openai_public.rs b/core/src/openai_public.rs index d7896942..7d91ea30 100644 --- a/core/src/openai_public.rs +++ b/core/src/openai_public.rs @@ -81,7 +81,7 @@ enum EncoderLoadingStrategy { DataGym(DataGymDef), } -struct EncodingLazy { +pub struct EncodingLazy { name: String, explicit_n_vocab: usize, pat_str: String, From c98f8240ed5d63ebc086e1e7d42ca681b174c381 Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Fri, 10 Feb 2023 14:34:53 -0500 Subject: [PATCH 009/207] Fix python setup.py for package in workspace --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a004c653..96ad5d6a 100644 --- a/setup.py +++ b/setup.py @@ -7,10 +7,10 @@ RustExtension( "tiktoken._tiktoken", binding=Binding.PyO3, + path="python/Cargo.toml", # Between our use of editable installs and wanting to use Rust for performance sensitive # code, it makes sense to just always use --release debug=False, - args=["-p", "python"], ) ], package_data={"tiktoken": ["py.typed"]}, From f248694fa94c2f724ef7bd9ed0ef340ae9d06d8b Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Fri, 10 Feb 2023 22:50:13 +0100 Subject: [PATCH 010/207] java bindings... --- Cargo.toml | 1 + core/src/lib.rs | 2 +- core/src/openai_public.rs | 58 +++++++++++++++++++---- java/pom.xml | 2 +- java/src/main/java/tiktoken/Encoding.java | 4 +- jni/Cargo.toml | 17 +++++++ jni/src/lib.rs | 19 +++++--- 7 files changed, 82 insertions(+), 21 deletions(-) create mode 100644 jni/Cargo.toml diff --git a/Cargo.toml b/Cargo.toml index abe35ef4..f07251fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,4 +3,5 @@ members = [ "core", "python", + "jni", ] \ No newline at end of file diff --git a/core/src/lib.rs b/core/src/lib.rs index 2942b571..52cacb04 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -6,7 +6,7 @@ use rustc_hash::FxHashMap as HashMap; mod util; mod load; -mod openai_public; +pub mod openai_public; #[macro_use] extern crate lazy_static; diff --git a/core/src/openai_public.rs b/core/src/openai_public.rs index 7d91ea30..dfa8c5f2 100644 --- a/core/src/openai_public.rs +++ b/core/src/openai_public.rs @@ -9,25 +9,63 @@ type Result = std::result::Result>; lazy_static! { pub static ref REGISTRY: HashMap = [ - // TODO EncodingLazy::new( "gpt2".into(), - 50257, + Some(50257), r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), - [ - ("<|endoftext|>".into(), 50256), - ].into_iter().collect(), + [ ("<|endoftext|>".into(), 50256), ].into_iter().collect(), EncoderLoadingStrategy::DataGym( DataGymDef { - vocab_bpe_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe".to_string(), - encoder_json_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json".to_string() + vocab_bpe_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe".into(), + encoder_json_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json".into() } - )) + )), + EncodingLazy::new( + "r50k_base".into(), + Some(50257), + r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), + [ ("<|endoftext|>".into(), 50256), ].into_iter().collect(), + EncoderLoadingStrategy::BPE("https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken".into()) + ), + EncodingLazy::new( + "p50k_base".into(), + Some(50281), + r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), + [ ("<|endoftext|>".into(), 50256), ].into_iter().collect(), + EncoderLoadingStrategy::BPE("https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken".into()) + ), + EncodingLazy::new( + "p50k_edit".into(), + Some(50281), + r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), + [ + ("<|endoftext|>".into(), 50256), + ("<|fim_prefix|>".into(), 50281), + ("<|fim_middle|>".into(), 50282), + ("<|fim_suffix|>".into(), 50283), + ].into_iter().collect(), + EncoderLoadingStrategy::BPE("https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken".into()) + ), + EncodingLazy::new( + "cl100k_base".into(), + None, + r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+".into(), + [ + ("<|endoftext|>".into(), 100257), + ("<|fim_prefix|>".into(), 100258), + ("<|fim_middle|>".into(), 100259), + ("<|fim_suffix|>".into(), 100260), + ("<|endofprompt|>".into(), 100276), + ].into_iter().collect(), + EncoderLoadingStrategy::BPE("https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken".into()) + ), ] .into_iter() .map(|enc| (enc.name.clone(), enc)) .collect::>(); + + pub static ref MODEL_TO_ENCODING: HashMap = [ // text ("text-davinci-003", "p50k_base"), @@ -83,7 +121,7 @@ enum EncoderLoadingStrategy { pub struct EncodingLazy { name: String, - explicit_n_vocab: usize, + explicit_n_vocab: Option, pat_str: String, special_tokens: HashMap, mergeable_ranks: Option, usize>>, @@ -92,7 +130,7 @@ pub struct EncodingLazy { impl EncodingLazy { fn new(name: String, - explicit_n_vocab: usize, + explicit_n_vocab: Option, pat_str: String, special_tokens: HashMap, loading_strategy: EncoderLoadingStrategy) -> Self { diff --git a/java/pom.xml b/java/pom.xml index ead3f91c..c3a32888 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -82,7 +82,7 @@ - -Djava.library.path=${project.build.directory}/bin/natives/linux_64 + -Djava.library.path=${project.build.directory}/../../target/debug/ diff --git a/java/src/main/java/tiktoken/Encoding.java b/java/src/main/java/tiktoken/Encoding.java index f4ecbbf6..86a02d83 100644 --- a/java/src/main/java/tiktoken/Encoding.java +++ b/java/src/main/java/tiktoken/Encoding.java @@ -5,7 +5,7 @@ public class Encoding { static { - System.loadLibrary("_tiktoken"); + System.loadLibrary("_tiktoken_jni"); } // initialized by init @@ -16,6 +16,6 @@ public class Encoding public native long[] encode(String text, Set allowedSpecialTokens, long maxTokenLength); public Encoding(String modelName) { - init(modelName); + this.init(modelName); } } diff --git a/jni/Cargo.toml b/jni/Cargo.toml new file mode 100644 index 00000000..0a7651d7 --- /dev/null +++ b/jni/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "tiktoken_jni" +version = "0.2.0" +edition = "2021" +rust-version = "1.57.0" + +[lib] +name = "_tiktoken_jni" +crate-type = ["cdylib"] + +[dependencies] +tiktoken_core = { path = "../core" } +rustc-hash = "1.1.0" +jni = "0.20.0" + +[profile.release] +incremental = true diff --git a/jni/src/lib.rs b/jni/src/lib.rs index 56e73d5d..cc6ed9cc 100644 --- a/jni/src/lib.rs +++ b/jni/src/lib.rs @@ -1,30 +1,33 @@ + use jni::JNIEnv; // These objects are what you should use as arguments to your native // function. They carry extra lifetime information to prevent them escaping // this context and getting used after being GC'd. -use jni::objects::{JClass, JObject, JString}; +use jni::objects::{JObject, JString}; // This is just a pointer. We'll be returning it from our function. We // can't return one of the objects with lifetime information because the // lifetime checker won't let us. -use jni::sys::{jstring, jlong}; +use jni::sys::{jlong}; + +use _tiktoken_core; +#[no_mangle] pub extern "system" fn Java_tiktoken_Encoding_init( env: JNIEnv, obj: JObject, model_name: JString ) { - use openai_public::{REGISTRY, MODEL_TO_ENCODING}; - + // First, we have to get the string out of Java. Check out the `strings` // module for more info on how this works. let model_name: String = env.get_string(model_name).expect("Unable to get Java model name").into(); - let encoding_name = openai_public::MODEL_TO_ENCODING.get(&model_name).expect("Unable to find model"); + let encoding_name = _tiktoken_core::openai_public::MODEL_TO_ENCODING.get(&model_name).expect("Unable to find model"); - let encoding = openai_public::REGISTRY.get(encoding_name).expect("Unable to find encoding"); + let encoding = _tiktoken_core::openai_public::REGISTRY.get(encoding_name).expect("Unable to find encoding"); let encoding_ptr = Box::into_raw(Box::new(encoding)) as jlong; @@ -33,7 +36,9 @@ pub extern "system" fn Java_tiktoken_Encoding_init( // pub extern "system" fn Java_tiktoken_Encoding_encode(env: JNIEnv, // class: JClass, -// input: JString) +// input: JString, +// allowedSpecialTokens: JObject, +// maxTokenLength: jlong) // -> jstring { // // First, we have to get the string out of Java. Check out the `strings` // // module for more info on how this works. From 2f371f60f7b3ac97abea447e0385a0fa7e814a01 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Fri, 10 Feb 2023 23:05:29 +0100 Subject: [PATCH 011/207] more TODO --- java/src/main/java/tiktoken/Encoding.java | 2 ++ jni/src/lib.rs | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/java/src/main/java/tiktoken/Encoding.java b/java/src/main/java/tiktoken/Encoding.java index 86a02d83..d0ab711d 100644 --- a/java/src/main/java/tiktoken/Encoding.java +++ b/java/src/main/java/tiktoken/Encoding.java @@ -18,4 +18,6 @@ public class Encoding public Encoding(String modelName) { this.init(modelName); } + + // TODO: close() and/or closeable interface } diff --git a/jni/src/lib.rs b/jni/src/lib.rs index cc6ed9cc..82a1b5e5 100644 --- a/jni/src/lib.rs +++ b/jni/src/lib.rs @@ -27,8 +27,12 @@ pub extern "system" fn Java_tiktoken_Encoding_init( let encoding_name = _tiktoken_core::openai_public::MODEL_TO_ENCODING.get(&model_name).expect("Unable to find model"); + // TODO: this is actually mergable_ranks (lazy) let encoding = _tiktoken_core::openai_public::REGISTRY.get(encoding_name).expect("Unable to find encoding"); + // TODO: initialize the CoreBPE object + + // TODO: this should be CoreBPE let encoding_ptr = Box::into_raw(Box::new(encoding)) as jlong; env.set_field(obj, "handle", "J", jni::objects::JValue::Long(encoding_ptr)).expect("Unable to store handle"); From 1d3f707a27963d4b90d1d47a2a91e69632cbed8d Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Fri, 10 Feb 2023 22:09:29 -0500 Subject: [PATCH 012/207] Initial VERY rough impl of JNI layer --- core/src/openai_public.rs | 57 +++++---- java/src/main/java/tiktoken/Encoding.java | 13 ++- java/src/test/java/tiktoken/EncodingTest.java | 10 +- jni/src/lib.rs | 109 ++++++++++++------ 4 files changed, 127 insertions(+), 62 deletions(-) diff --git a/core/src/openai_public.rs b/core/src/openai_public.rs index dfa8c5f2..17ff7e27 100644 --- a/core/src/openai_public.rs +++ b/core/src/openai_public.rs @@ -1,6 +1,7 @@ use rustc_hash::FxHashMap as HashMap; use std::error::Error; +use std::sync::RwLock; #[path = "load.rs"] mod load; @@ -8,7 +9,7 @@ mod load; type Result = std::result::Result>; lazy_static! { - pub static ref REGISTRY: HashMap = [ + pub static ref REGISTRY: HashMap = [ EncodingLazy::new( "gpt2".into(), Some(50257), @@ -38,7 +39,7 @@ lazy_static! { "p50k_edit".into(), Some(50281), r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), - [ + [ ("<|endoftext|>".into(), 50256), ("<|fim_prefix|>".into(), 50281), ("<|fim_middle|>".into(), 50282), @@ -50,7 +51,7 @@ lazy_static! { "cl100k_base".into(), None, r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+".into(), - [ + [ ("<|endoftext|>".into(), 100257), ("<|fim_prefix|>".into(), 100258), ("<|fim_middle|>".into(), 100259), @@ -109,11 +110,13 @@ lazy_static! { .collect::>(); } +#[derive(Clone, PartialEq, Eq, Hash)] struct DataGymDef { vocab_bpe_file: String, encoder_json_file: String, } +#[derive(Clone, PartialEq, Eq, Hash)] enum EncoderLoadingStrategy { BPE(String), DataGym(DataGymDef), @@ -122,12 +125,29 @@ enum EncoderLoadingStrategy { pub struct EncodingLazy { name: String, explicit_n_vocab: Option, - pat_str: String, - special_tokens: HashMap, - mergeable_ranks: Option, usize>>, + pub pat_str: String, + pub special_tokens: HashMap, + mergeable_ranks: RwLock, usize>>>, loading_strategy: EncoderLoadingStrategy, } +fn load_bpe(path: &str) -> Result, usize>> { + load::load_tiktoken_bpe(path) +} + +fn load_data_gym(def: &DataGymDef) -> Result, usize>> { + load::data_gym_to_mergeable_bpe_ranks(&def.vocab_bpe_file, &def.encoder_json_file) +} + +// #[memoize] +fn load_mergeable_ranks(loading_strategy: &EncoderLoadingStrategy) -> Result, usize>> +{ + match loading_strategy { + EncoderLoadingStrategy::BPE(path) => load_bpe(&path), + EncoderLoadingStrategy::DataGym(def) => load_data_gym(&def), + } +} + impl EncodingLazy { fn new(name: String, explicit_n_vocab: Option, @@ -139,28 +159,23 @@ impl EncodingLazy { explicit_n_vocab, pat_str, special_tokens, - mergeable_ranks: None, + mergeable_ranks: RwLock::new(None), loading_strategy } } - fn get(&mut self) -> Result<&HashMap, usize>> { - if self.mergeable_ranks.is_none() { - self.mergeable_ranks = Some(match &self.loading_strategy { - EncoderLoadingStrategy::BPE(path) => Self::load_bpe(&path)?, - EncoderLoadingStrategy::DataGym(def) => Self::load_data_gym(&def)?, - }) + pub fn get(&self) -> Result, usize>> { + { + let read = self.mergeable_ranks.read().unwrap(); + if read.is_some() { + return Ok(read.as_ref().unwrap().clone()); + } } - Ok(self.mergeable_ranks.as_ref().expect("mergeable_ranks should be loaded by now")) - } - - fn load_bpe(path: &str) -> Result, usize>> { - load::load_tiktoken_bpe(path) - } + let mut write = self.mergeable_ranks.write().unwrap(); + *write = Some(load_mergeable_ranks(&self.loading_strategy)?); - fn load_data_gym(def: &DataGymDef) -> Result, usize>> { - load::data_gym_to_mergeable_bpe_ranks(&def.vocab_bpe_file, &def.encoder_json_file) + Ok(write.as_ref().unwrap().clone()) } } diff --git a/java/src/main/java/tiktoken/Encoding.java b/java/src/main/java/tiktoken/Encoding.java index d0ab711d..cc041e48 100644 --- a/java/src/main/java/tiktoken/Encoding.java +++ b/java/src/main/java/tiktoken/Encoding.java @@ -1,8 +1,6 @@ package tiktoken; -import java.util.Set; - -public class Encoding +public class Encoding implements AutoCloseable { static { System.loadLibrary("_tiktoken_jni"); @@ -13,11 +11,16 @@ public class Encoding private native void init(String modelName); - public native long[] encode(String text, Set allowedSpecialTokens, long maxTokenLength); + public native long[] encode(String text, String[] allowedSpecialTokens, long maxTokenLength); + + private native void destroy(); + public Encoding(String modelName) { this.init(modelName); } - // TODO: close() and/or closeable interface + public void close() throws Exception { + destroy(); + } } diff --git a/java/src/test/java/tiktoken/EncodingTest.java b/java/src/test/java/tiktoken/EncodingTest.java index 8c2eba46..591c5261 100644 --- a/java/src/test/java/tiktoken/EncodingTest.java +++ b/java/src/test/java/tiktoken/EncodingTest.java @@ -1,16 +1,22 @@ package tiktoken; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertTrue; import org.junit.Test; -public class EncodingTest +public class EncodingTest { @Test - public void shouldAnswerWithTrue() + public void shouldAnswerWithTrue() throws Exception { Encoding encoding = new Encoding("text-davinci-001"); + long[] a = encoding.encode("test", new String[0], 0); + + encoding.close(); + assertTrue( true ); + assertArrayEquals(new long[] {9288}, a); } } diff --git a/jni/src/lib.rs b/jni/src/lib.rs index 82a1b5e5..209f23b9 100644 --- a/jni/src/lib.rs +++ b/jni/src/lib.rs @@ -1,5 +1,7 @@ +use std::collections::HashSet; +use std::sync::MutexGuard; - +use _tiktoken_core::openai_public::EncodingLazy; use jni::JNIEnv; // These objects are what you should use as arguments to your native // function. They carry extra lifetime information to prevent them escaping @@ -9,51 +11,90 @@ use jni::objects::{JObject, JString}; // This is just a pointer. We'll be returning it from our function. We // can't return one of the objects with lifetime information because the // lifetime checker won't let us. -use jni::sys::{jlong}; +use jni::sys::{jarray, jlong}; -use _tiktoken_core; +use _tiktoken_core::{self, CoreBPENative}; #[no_mangle] -pub extern "system" fn Java_tiktoken_Encoding_init( - env: JNIEnv, - obj: JObject, - model_name: JString -) { - +pub extern "system" fn Java_tiktoken_Encoding_init(env: JNIEnv, obj: JObject, model_name: JString) { // First, we have to get the string out of Java. Check out the `strings` // module for more info on how this works. - let model_name: String = - env.get_string(model_name).expect("Unable to get Java model name").into(); + let model_name: String = env + .get_string(model_name) + .expect("Unable to get Java model name") + .into(); - let encoding_name = _tiktoken_core::openai_public::MODEL_TO_ENCODING.get(&model_name).expect("Unable to find model"); + let encoding_name = _tiktoken_core::openai_public::MODEL_TO_ENCODING + .get(&model_name) + .expect("Unable to find model"); // TODO: this is actually mergable_ranks (lazy) - let encoding = _tiktoken_core::openai_public::REGISTRY.get(encoding_name).expect("Unable to find encoding"); + let mut encoding = _tiktoken_core::openai_public::REGISTRY + .get(encoding_name) + .expect("Unable to find encoding"); // TODO: initialize the CoreBPE object // TODO: this should be CoreBPE - let encoding_ptr = Box::into_raw(Box::new(encoding)) as jlong; - env.set_field(obj, "handle", "J", jni::objects::JValue::Long(encoding_ptr)).expect("Unable to store handle"); + let bpe_native = CoreBPENative::new( + encoding.get().unwrap(), + encoding.special_tokens.clone(), + &encoding.pat_str, + ) + .unwrap(); + + unsafe { + env.set_rust_field(obj, "handle", bpe_native).unwrap(); + } + + // env.set_field(obj, "handle", "J", jni::objects::JValue::Long(encoding_ptr)).expect("Unable to store handle"); } -// pub extern "system" fn Java_tiktoken_Encoding_encode(env: JNIEnv, -// class: JClass, -// input: JString, -// allowedSpecialTokens: JObject, -// maxTokenLength: jlong) -// -> jstring { -// // First, we have to get the string out of Java. Check out the `strings` -// // module for more info on how this works. -// let input: String = -// env.get_string(input).expect("Couldn't get java string!").into(); - -// // Then we have to create a new Java string to return. Again, more info -// // in the `strings` module. -// let output = env.new_string(format!("Hello, {}!", input)) -// .expect("Couldn't create java string!"); - -// // Finally, extract the raw pointer to return. -// output.into_inner() -// } \ No newline at end of file +#[no_mangle] +pub extern "system" fn Java_tiktoken_Encoding_destroy(env: JNIEnv, obj: JObject) { + unsafe { + let _: CoreBPENative = env.take_rust_field(obj, "handle").unwrap(); + } +} + +#[no_mangle] +pub extern "system" fn Java_tiktoken_Encoding_encode( + env: JNIEnv, + obj: JObject, + text: JString, + allowedSpecialTokens: jarray, + maxTokenLength: jlong, +) -> jarray { + let encoding: MutexGuard = unsafe { env.get_rust_field(obj, "handle").unwrap() }; + + let enc = encoding; + let input: String = env + .get_string(text) + .expect("Couldn't get java string!") + .into(); + + let len = env.get_array_length(allowedSpecialTokens).unwrap(); + let mut strings: Vec = Vec::with_capacity(len as usize); + for i in 0..len { + let element: JObject = env + .get_object_array_element(allowedSpecialTokens, i) + .unwrap(); + let current: String = env.get_string(element.into()).unwrap().into(); + strings.push(current); + } + + let v2: HashSet<&str> = strings.iter().map(|s| &**s).collect(); + + let (tokens, _, _) = enc._encode_native(&input, &v2, Some(maxTokenLength as usize)); + + let mut output = env + .new_long_array(tokens.len().try_into().unwrap()) + .unwrap(); + + let array_of_u64 = tokens.iter().map(|x| *x as i64).collect::>(); + env.set_long_array_region(output, 0, array_of_u64.as_slice()) + .unwrap(); + + output +} From 95aef95cb37dbc86d4432acd2774b366034bb483 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 19 Feb 2023 14:32:49 +0100 Subject: [PATCH 013/207] Add wasm-bindgen, inline ranks --- .gitignore | 3 + Cargo.toml | 6 +- scripts/download_ranks.py | 128 ++++++++++++++++++++ src/lib.rs | 238 ++++++++++++++++++++++++++------------ 4 files changed, 300 insertions(+), 75 deletions(-) create mode 100644 scripts/download_ranks.py diff --git a/.gitignore b/.gitignore index 9e090c8e..1c1340f1 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,6 @@ htmlcov Cargo.lock target/ + +# WASM +ranks/ \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 1fb806bc..8a8243d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,10 @@ name = "_tiktoken" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.17.3", features = ["extension-module"] } +wasm-bindgen = "0.2.83" +js-sys = "0.3.61" +anyhow = "1.0.69" +base64 = "0.21.0" # tiktoken dependencies fancy-regex = "0.10.0" @@ -19,3 +22,4 @@ bstr = "1.0.1" [profile.release] incremental = true +opt-level = "s" \ No newline at end of file diff --git a/scripts/download_ranks.py b/scripts/download_ranks.py new file mode 100644 index 00000000..c1583877 --- /dev/null +++ b/scripts/download_ranks.py @@ -0,0 +1,128 @@ +import base64 +import hashlib +import json +import os +import tempfile +import uuid + +import blobfile +import requests + + +def read_file(blobpath: str) -> bytes: + if not blobpath.startswith("http://") and not blobpath.startswith("https://"): + with blobfile.BlobFile(blobpath, "rb") as f: + return f.read() + # avoiding blobfile for public files helps avoid auth issues, like MFA prompts + return requests.get(blobpath).content + + +def read_file_cached(blobpath: str) -> bytes: + if "TIKTOKEN_CACHE_DIR" in os.environ: + cache_dir = os.environ["TIKTOKEN_CACHE_DIR"] + elif "DATA_GYM_CACHE_DIR" in os.environ: + cache_dir = os.environ["DATA_GYM_CACHE_DIR"] + else: + cache_dir = os.path.join(tempfile.gettempdir(), "data-gym-cache") + + if cache_dir == "": + # disable caching + return read_file(blobpath) + + cache_key = hashlib.sha1(blobpath.encode()).hexdigest() + + cache_path = os.path.join(cache_dir, cache_key) + if os.path.exists(cache_path): + with open(cache_path, "rb") as f: + return f.read() + + contents = read_file(blobpath) + + os.makedirs(cache_dir, exist_ok=True) + tmp_filename = cache_path + "." + str(uuid.uuid4()) + ".tmp" + with open(tmp_filename, "wb") as f: + f.write(contents) + os.rename(tmp_filename, cache_path) + + return contents + + +def data_gym_to_mergeable_bpe_ranks( + vocab_bpe_file: str, encoder_json_file: str +) -> dict[bytes, int]: + # NB: do not add caching to this function + rank_to_intbyte = [b for b in range( + 2**8) if chr(b).isprintable() and chr(b) != " "] + + data_gym_byte_to_byte = {chr(b): b for b in rank_to_intbyte} + n = 0 + for b in range(2**8): + if b not in rank_to_intbyte: + rank_to_intbyte.append(b) + data_gym_byte_to_byte[chr(2**8 + n)] = b + n += 1 + assert len(rank_to_intbyte) == 2**8 + + # vocab_bpe contains the merges along with associated ranks + vocab_bpe_contents = read_file_cached(vocab_bpe_file).decode() + bpe_merges = [tuple(merge_str.split()) + for merge_str in vocab_bpe_contents.split("\n")[1:-1]] + + def decode_data_gym(value: str) -> bytes: + return bytes(data_gym_byte_to_byte[b] for b in value) + + # add the single byte tokens + bpe_ranks = {bytes([b]): i for i, b in enumerate(rank_to_intbyte)} + # add the merged tokens + n = len(bpe_ranks) + for first, second in bpe_merges: + bpe_ranks[decode_data_gym(first) + decode_data_gym(second)] = n + n += 1 + + # check that the encoder file matches the merges file + # this sanity check is important since tiktoken assumes that ranks are ordered the same + # as merge priority + encoder_json = json.loads(read_file_cached(encoder_json_file)) + encoder_json_loaded = {decode_data_gym( + k): v for k, v in encoder_json.items()} + # drop these two special tokens if present, since they're not mergeable bpe tokens + encoder_json_loaded.pop(b"<|endoftext|>", None) + encoder_json_loaded.pop(b"<|startoftext|>", None) + assert bpe_ranks == encoder_json_loaded + + return bpe_ranks + + +def load_tiktoken_bpe(tiktoken_bpe_file: str) -> dict[bytes, int]: + # NB: do not add caching to this function + contents = read_file_cached(tiktoken_bpe_file) + return { + base64.b64decode(token): int(rank) + for token, rank in (line.split() for line in contents.splitlines() if line) + } + + +def dump_tiktoken_bpe(bpe_ranks: dict[bytes, int], tiktoken_bpe_file: str) -> None: + with blobfile.BlobFile(tiktoken_bpe_file, "wb") as f: + for token, rank in sorted(bpe_ranks.items(), key=lambda x: x[1]): + f.write(base64.b64encode(token) + + b" " + str(rank).encode() + b"\n") + + +if __name__ == "__main__": + dump_tiktoken_bpe(data_gym_to_mergeable_bpe_ranks( + vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe", + encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json", + ), "./ranks/gpt2.tiktoken") + + dump_tiktoken_bpe(load_tiktoken_bpe( + "https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken" + ), "./ranks/r50k_base.tiktoken") + + dump_tiktoken_bpe(load_tiktoken_bpe( + "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken" + ), "./ranks/p50k_base.tiktoken") + + dump_tiktoken_bpe(load_tiktoken_bpe( + "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" + ), "./ranks/cl100k_base.tiktoken") diff --git a/src/lib.rs b/src/lib.rs index 8235dbb1..5004ab9d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,132 @@ -// This check is new and seems buggy (possibly with PyO3 interaction) -#![allow(clippy::borrow_deref_ref)] - -use std::collections::HashSet; -use std::thread; - +use anyhow::{anyhow, Result}; +use base64::{engine::general_purpose, Engine as _}; use fancy_regex::Regex; -use pyo3::exceptions; -use pyo3::prelude::*; -use pyo3::types::{PyBytes, PyList, PyTuple}; -use pyo3::PyResult; use rustc_hash::FxHashMap as HashMap; +use std::collections::HashSet; + +use wasm_bindgen::prelude::*; + +const ENDOFTEXT: &'static str = "<|endoftext|>"; + +const FIM_PREFIX: &'static str = "<|fim_prefix|>"; + +const FIM_MIDDLE: &'static str = "<|fim_middle|>"; + +const FIM_SUFFIX: &'static str = "<|fim_suffix|>"; + +const ENDOFPROMPT: &'static str = "<|endofprompt|>"; + +#[wasm_bindgen(typescript_custom_section)] +const TS_APPEND_CONTENT: &'static str = r#" +export type BPEEncoding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; +"#; + +#[wasm_bindgen] +extern "C" { + #[wasm_bindgen(typescript_type = "BPEEncoding")] + pub type BPEEncoding; +} + +#[wasm_bindgen] +pub struct JsBPE { + bpe: Option, +} + +#[wasm_bindgen] +impl JsBPE { + fn get_encoder(tiktoken_bfe: &str) -> Result, usize>> { + let mut encoder = HashMap::default(); + for line in tiktoken_bfe.lines() { + let mut parts = line.split(' '); + let token = &general_purpose::STANDARD.decode(parts.next().unwrap())?; + let rank: usize = parts.next().unwrap().parse().unwrap(); + encoder.insert(token.clone(), rank); + } + + Ok(encoder) + } + + fn gpt2() -> Result { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 50256); + + CoreBPE::new( + JsBPE::get_encoder(include_str!("../ranks/gpt2.tiktoken")).unwrap(), + special_tokens, + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + ) + } + + fn r50k_base() -> Result { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 50256); + + CoreBPE::new( + JsBPE::get_encoder(include_str!("../ranks/r50k_base.tiktoken")).unwrap(), + special_tokens, + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + ) + } + + fn p50k_base() -> Result { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 50256); + + CoreBPE::new( + JsBPE::get_encoder(include_str!("../ranks/p50k_base.tiktoken")).unwrap(), + special_tokens, + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + ) + } + + fn p50k_edit() -> Result { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 50256); + special_tokens.insert(String::from(FIM_PREFIX), 50281); + special_tokens.insert(String::from(FIM_MIDDLE), 50282); + special_tokens.insert(String::from(FIM_SUFFIX), 50283); + + CoreBPE::new( + JsBPE::get_encoder(include_str!("../ranks/p50k_base.tiktoken")).unwrap(), + special_tokens, + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + ) + } + + fn cl100k_base() -> Result { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 100257); + special_tokens.insert(String::from(FIM_PREFIX), 100258); + special_tokens.insert(String::from(FIM_MIDDLE), 100259); + special_tokens.insert(String::from(FIM_SUFFIX), 100260); + special_tokens.insert(String::from(ENDOFPROMPT), 100276); + + CoreBPE::new( + JsBPE::get_encoder(include_str!("../ranks/cl100k_base.tiktoken")).unwrap(), + special_tokens, + "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", + ) + } + + pub fn new(encoding: BPEEncoding) -> Self { + let bpe = match encoding.as_string().unwrap().as_str() { + "gpt2" => JsBPE::gpt2(), + "r50k_base" => JsBPE::r50k_base(), + "p50k_base" => JsBPE::p50k_base(), + "p50k_edit" => JsBPE::p50k_edit(), + "cl100k_base" => JsBPE::cl100k_base(), + _ => Err(anyhow!("Invalid encoder type")), + }; + + JsBPE { + bpe: Some(bpe.unwrap()), + } + } + + pub fn encode(&self, input: &str) -> Vec { + self.bpe.as_ref().unwrap().encode(&input) + } +} fn _byte_pair_merge(piece: &[u8], ranks: &HashMap, usize>) -> Vec> { let mut parts: Vec<_> = (0..piece.len()).map(|i| i..i + 1).collect(); @@ -109,28 +226,13 @@ pub fn byte_pair_split<'a>(piece: &'a [u8], ranks: &HashMap, usize>) -> use std::num::NonZeroU64; pub struct FakeThreadId(NonZeroU64); -fn hash_current_thread() -> usize { - // It's easier to use unsafe than to use nightly. Rust has this nice u64 thread id counter - // that works great for our use case of avoiding collisions in our array. Unfortunately, - // it's private. However, there are only so many ways you can layout a u64, so just transmute - // https://github.com/rust-lang/rust/issues/67939 - const _: [u8; 8] = [0; std::mem::size_of::()]; - const _: [u8; 8] = [0; std::mem::size_of::()]; - let x = unsafe { - std::mem::transmute::(thread::current().id()).0 - }; - u64::from(x) as usize -} - -const MAX_NUM_THREADS: usize = 128; -#[pyclass] struct CoreBPE { encoder: HashMap, usize>, special_tokens_encoder: HashMap, decoder: HashMap>, special_tokens_decoder: HashMap>, - regex_tls: Vec, - special_regex_tls: Vec, + regex: Regex, + special_regex: Regex, sorted_token_bytes: Vec>, } @@ -139,11 +241,11 @@ impl CoreBPE { // See performance notes above for what this is about // It's also a little janky, please make a better version of it! // However, it's nice that this doesn't leak memory to short-lived threads - &self.regex_tls[hash_current_thread() % MAX_NUM_THREADS] + &self.regex } fn _get_tl_special_regex(&self) -> &Regex { - &self.special_regex_tls[hash_current_thread() % MAX_NUM_THREADS] + &self.special_regex } fn _decode_native(&self, tokens: &[usize]) -> Vec { @@ -386,24 +488,20 @@ impl CoreBPE { } } -#[pymethods] impl CoreBPE { - #[new] fn new( encoder: HashMap, usize>, special_tokens_encoder: HashMap, pattern: &str, - ) -> PyResult { - let regex = Regex::new(pattern) - .map_err(|e| PyErr::new::(e.to_string()))?; + ) -> Result { + let regex = Regex::new(pattern)?; let special_regex = { let _parts = special_tokens_encoder .keys() .map(|s| fancy_regex::escape(s)) .collect::>(); - Regex::new(&_parts.join("|")) - .map_err(|e| PyErr::new::(e.to_string()))? + Regex::new(&_parts.join("|"))? }; let decoder: HashMap> = @@ -425,10 +523,8 @@ impl CoreBPE { special_tokens_encoder, decoder, special_tokens_decoder, - regex_tls: (0..MAX_NUM_THREADS).map(|_| regex.clone()).collect(), - special_regex_tls: (0..MAX_NUM_THREADS) - .map(|_| special_regex.clone()) - .collect(), + regex, + special_regex, sorted_token_bytes, }) } @@ -437,16 +533,22 @@ impl CoreBPE { // Encoding // ==================== - fn encode_ordinary(&self, py: Python, text: &str) -> Vec { - py.allow_threads(|| self._encode_ordinary_native(text)) + fn encode_ordinary(&self, text: &str) -> Vec { + self._encode_ordinary_native(text) } - fn encode(&self, py: Python, text: &str, allowed_special: HashSet<&str>) -> Vec { - py.allow_threads(|| self._encode_native(text, &allowed_special).0) + fn encode(&self, text: &str) -> Vec { + let allowed_special = self + .special_tokens_encoder + .keys() + .map(|s| s.as_str()) + .collect(); + + self._encode_native(text, &allowed_special).0 } - fn _encode_bytes(&self, py: Python, bytes: &[u8]) -> Vec { - py.allow_threads(|| { + fn _encode_bytes(&self, bytes: &[u8]) -> Vec { + { match std::str::from_utf8(bytes) { Ok(text) => self._encode_ordinary_native(text), Err(e) => { @@ -469,23 +571,18 @@ impl CoreBPE { tokens } } - }) + } } fn encode_with_unstable( &self, - py: Python, text: &str, allowed_special: HashSet<&str>, - ) -> Py { - let (tokens, completions) = - py.allow_threads(|| self._encode_unstable_native(text, &allowed_special)); - let py_completions = - PyList::new(py, completions.iter().map(|seq| PyList::new(py, &seq[..]))); - (tokens, py_completions).into_py(py) + ) -> (Vec, HashSet>) { + self._encode_unstable_native(text, &allowed_special) } - fn encode_single_token(&self, piece: &[u8]) -> PyResult { + fn encode_single_token(&self, piece: &[u8]) -> Result { if let Some(token) = self.encoder.get(piece).copied() { return Ok(token); } @@ -494,7 +591,7 @@ impl CoreBPE { return Ok(token); } } - Err(PyErr::new::(piece.to_owned())) + Err(anyhow!("Unable to encode single token: {:?}", piece)) } fn encode_single_piece(&self, piece: &[u8]) -> Vec { @@ -508,39 +605,32 @@ impl CoreBPE { // Decoding // ==================== - fn decode_bytes(&self, py: Python, tokens: Vec) -> Py { - let bytes = py.allow_threads(|| self._decode_native(&tokens)); - PyBytes::new(py, &bytes).into() + fn decode_bytes(&self, tokens: Vec) -> Vec { + self._decode_native(&tokens) } - fn decode_single_token_bytes(&self, py: Python, token: usize) -> PyResult> { + fn decode_single_token_bytes(&self, token: usize) -> Result> { if let Some(bytes) = self.decoder.get(&token) { - return Ok(PyBytes::new(py, bytes).into()); + return Ok(bytes.clone()); } if let Some(bytes) = self.special_tokens_decoder.get(&token) { - return Ok(PyBytes::new(py, bytes).into()); + return Ok(bytes.clone()); } - Err(PyErr::new::(token.to_string())) + Err(anyhow!( + "Token not found in the vocabulary: {}", + token.to_string() + )) } // ==================== // Miscellaneous // ==================== - fn token_byte_values(&self, py: Python) -> Vec> { - self.sorted_token_bytes - .iter() - .map(|x| PyBytes::new(py, x).into()) - .collect() + fn token_byte_values(&self) -> Vec> { + self.sorted_token_bytes.clone() } } -#[pymodule] -fn _tiktoken(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_class::()?; - Ok(()) -} - #[cfg(test)] mod tests { use rustc_hash::FxHashMap as HashMap; From 799df7fe862d2c3c0a119be79395c8bbf84f88c6 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 19 Feb 2023 15:28:14 +0100 Subject: [PATCH 014/207] v0.2.1 --- package.json | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 package.json diff --git a/package.json b/package.json new file mode 100644 index 00000000..169fc77d --- /dev/null +++ b/package.json @@ -0,0 +1,24 @@ +{ + "name": "@dqbd/tiktoken", + "version": "0.2.1", + "description": "Javascript bindings for tiktoken", + "files": [ + "dist/**/*", + "package.json" + ], + "license": "Apache-2.0", + "main": "dist/node/_tiktoken.js", + "browser": "dist/web/_tiktoken.js", + "types": "dist/node/_tiktoken.d.ts", + "repository": { + "type": "git", + "url": "https://github.com/dqbd/tiktoken" + }, + "devDependencies": {}, + "scripts": { + "build": "rm -rf dist/ && npm run build:node && npm run build:bundler && npm run build:web", + "build:bundler": "wasm-pack build --target bundler --release --out-dir dist/bundler && rm dist/bundler/.gitignore", + "build:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm dist/node/.gitignore", + "build:web": "wasm-pack build --target no-modules --release --out-dir dist/web && rm dist/web/.gitignore" + } +} From 5a32b889811f17f6e9c32cdbc1fa55243f273386 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 19 Feb 2023 15:30:38 +0100 Subject: [PATCH 015/207] Update README.md, polish API --- .gitignore | 3 +- README.md | 108 +++++++------------------------------------ src/lib.rs | 133 +++++++++++++++++++++++++++++++++++++++++++---------- yarn.lock | 4 ++ 4 files changed, 130 insertions(+), 118 deletions(-) create mode 100644 yarn.lock diff --git a/.gitignore b/.gitignore index 1c1340f1..6b0a18bd 100644 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,5 @@ Cargo.lock target/ # WASM -ranks/ \ No newline at end of file +ranks/ +node_modules \ No newline at end of file diff --git a/README.md b/README.md index d9e461d9..b86b5362 100644 --- a/README.md +++ b/README.md @@ -1,104 +1,28 @@ # ⏳ tiktoken -tiktoken is a fast [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with -OpenAI's models. +tiktoken is a [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with +OpenAI's models, forked from the original tiktoken library to provide NPM bindings for Node and other JS runtimes. -```python -import tiktoken -enc = tiktoken.get_encoding("gpt2") -assert enc.decode(enc.encode("hello world")) == "hello world" +```typescript +import assert from "node:assert"; +import { get_encoding, encoding_for_model } from "@dqbd/tiktoken"; -# To get the tokeniser corresponding to a specific model in the OpenAI API: -enc = tiktoken.encoding_for_model("text-davinci-003") -``` +const enc = get_encoding("gpt2"); +assert( + new TextDecoder().decode(enc.decode(enc.encode("hello world"))) === + "hello world" +); -The open source version of `tiktoken` can be installed from PyPI: +// To get the tokeniser corresponding to a specific model in the OpenAI API: +const enc = encoding_for_model("text-davinci-003"); ``` -pip install tiktoken -``` - -The tokeniser API is documented in `tiktoken/core.py`. - -Example code using `tiktoken` can be found in the -[OpenAI Cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb). - - -## Performance - -`tiktoken` is between 3-6x faster than a comparable open source tokeniser: - -![image](./perf.svg) - -Performance measured on 1GB of text using the GPT-2 tokeniser, using `GPT2TokenizerFast` from -`tokenizers==0.13.2` and `transformers==4.24.0`. - - -## Getting help - -Please post questions in the [issue tracker](https://github.com/openai/tiktoken/issues). - -If you work at OpenAI, make sure to check the internal documentation or feel free to contact -@shantanu. - -## Extending tiktoken - -You may wish to extend `tiktoken` to support new encodings. There are two ways to do this. - - -**Create your `Encoding` object exactly the way you want and simply pass it around.** - -```python -cl100k_base = tiktoken.get_encoding("cl100k_base") - -# In production, load the arguments directly instead of accessing private attributes -# See openai_public.py for examples of arguments for specific encodings -enc = tiktoken.Encoding( - # If you're changing the set of special tokens, make sure to use a different name - # It should be clear from the name what behaviour to expect. - name="cl100k_im", - pat_str=cl100k_base._pat_str, - mergeable_ranks=cl100k_base._mergeable_ranks, - special_tokens={ - **cl100k_base._special_tokens, - "<|im_start|>": 100264, - "<|im_end|>": 100265, - } -) -``` - -**Use the `tiktoken_ext` plugin mechanism to register your `Encoding` objects with `tiktoken`.** - -This is only useful if you need `tiktoken.get_encoding` to find your encoding, otherwise prefer -option 1. - -To do this, you'll need to create a namespace package under `tiktoken_ext`. +The open source version of `tiktoken` can be installed from PyPI: -Layout your project like this, making sure to omit the `tiktoken_ext/__init__.py` file: -``` -my_tiktoken_extension -├── tiktoken_ext -│   └── my_encodings.py -└── setup.py ``` - -`my_encodings.py` should be a module that contains a variable named `ENCODING_CONSTRUCTORS`. -This is a dictionary from an encoding name to a function that takes no arguments and returns -arguments that can be passed to `tiktoken.Encoding` to construct that encoding. For an example, see -`tiktoken_ext/openai_public.py`. For precise details, see `tiktoken/registry.py`. - -Your `setup.py` should look something like this: -```python -from setuptools import setup, find_namespace_packages - -setup( - name="my_tiktoken_extension", - packages=find_namespace_packages(include=['tiktoken_ext.*']) - install_requires=["tiktoken"], - ... -) +npm install @dqbd/tiktoken ``` -Then simply `pip install my_tiktoken_extension` and you should be able to use your custom encodings! -Make sure **not** to use an editable install. +## Acknowledgements +- https://github.com/zurawiki/tiktoken-rs diff --git a/src/lib.rs b/src/lib.rs index 5004ab9d..22863137 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,22 +18,72 @@ const ENDOFPROMPT: &'static str = "<|endofprompt|>"; #[wasm_bindgen(typescript_custom_section)] const TS_APPEND_CONTENT: &'static str = r#" -export type BPEEncoding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; +export type TiktokenEmbedding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; +export type TiktokenModel = + | "text-davinci-003" + | "text-davinci-002" + | "text-davinci-001" + | "text-curie-001" + | "text-babbage-001" + | "text-ada-001" + | "davinci" + | "curie" + | "babbage" + | "ada" + | "code-davinci-002" + | "code-davinci-001" + | "code-cushman-002" + | "code-cushman-001" + | "davinci-codex" + | "cushman-codex" + | "text-davinci-edit-001" + | "code-davinci-edit-001" + | "text-embedding-ada-002" + | "text-similarity-davinci-001" + | "text-similarity-curie-001" + | "text-similarity-babbage-001" + | "text-similarity-ada-001" + | "text-search-davinci-doc-001" + | "text-search-curie-doc-001" + | "text-search-babbage-doc-001" + | "text-search-ada-doc-001" + | "code-search-babbage-code-001" + | "code-search-ada-code-001" + | "gpt2"; + "#; #[wasm_bindgen] extern "C" { - #[wasm_bindgen(typescript_type = "BPEEncoding")] - pub type BPEEncoding; + #[wasm_bindgen(typescript_type = "TiktokenEmbedding")] + pub type TiktokenEmbedding; + + #[wasm_bindgen(typescript_type = "TiktokenModel")] + pub type TiktokenModel; } #[wasm_bindgen] -pub struct JsBPE { +pub struct Tiktoken { bpe: Option, } #[wasm_bindgen] -impl JsBPE { +impl Tiktoken { + fn new(encoding: &str) -> Self { + let bpe = match encoding { + "gpt2" => Tiktoken::gpt2(), + "r50k_base" => Tiktoken::r50k_base(), + "p50k_base" => Tiktoken::p50k_base(), + "p50k_edit" => Tiktoken::p50k_edit(), + "cl100k_base" => Tiktoken::cl100k_base(), + _ => Err(anyhow!("Invalid encoder type")), + }; + + Tiktoken { + bpe: Some(bpe.unwrap()), + } + } + fn get_encoder(tiktoken_bfe: &str) -> Result, usize>> { let mut encoder = HashMap::default(); for line in tiktoken_bfe.lines() { @@ -51,7 +101,7 @@ impl JsBPE { special_tokens.insert(String::from(ENDOFTEXT), 50256); CoreBPE::new( - JsBPE::get_encoder(include_str!("../ranks/gpt2.tiktoken")).unwrap(), + Tiktoken::get_encoder(include_str!("../ranks/gpt2.tiktoken")).unwrap(), special_tokens, "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) @@ -62,7 +112,7 @@ impl JsBPE { special_tokens.insert(String::from(ENDOFTEXT), 50256); CoreBPE::new( - JsBPE::get_encoder(include_str!("../ranks/r50k_base.tiktoken")).unwrap(), + Tiktoken::get_encoder(include_str!("../ranks/r50k_base.tiktoken")).unwrap(), special_tokens, "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) @@ -73,7 +123,7 @@ impl JsBPE { special_tokens.insert(String::from(ENDOFTEXT), 50256); CoreBPE::new( - JsBPE::get_encoder(include_str!("../ranks/p50k_base.tiktoken")).unwrap(), + Tiktoken::get_encoder(include_str!("../ranks/p50k_base.tiktoken")).unwrap(), special_tokens, "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) @@ -87,7 +137,7 @@ impl JsBPE { special_tokens.insert(String::from(FIM_SUFFIX), 50283); CoreBPE::new( - JsBPE::get_encoder(include_str!("../ranks/p50k_base.tiktoken")).unwrap(), + Tiktoken::get_encoder(include_str!("../ranks/p50k_base.tiktoken")).unwrap(), special_tokens, "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) @@ -102,30 +152,63 @@ impl JsBPE { special_tokens.insert(String::from(ENDOFPROMPT), 100276); CoreBPE::new( - JsBPE::get_encoder(include_str!("../ranks/cl100k_base.tiktoken")).unwrap(), + Tiktoken::get_encoder(include_str!("../ranks/cl100k_base.tiktoken")).unwrap(), special_tokens, "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", ) } - pub fn new(encoding: BPEEncoding) -> Self { - let bpe = match encoding.as_string().unwrap().as_str() { - "gpt2" => JsBPE::gpt2(), - "r50k_base" => JsBPE::r50k_base(), - "p50k_base" => JsBPE::p50k_base(), - "p50k_edit" => JsBPE::p50k_edit(), - "cl100k_base" => JsBPE::cl100k_base(), - _ => Err(anyhow!("Invalid encoder type")), - }; - - JsBPE { - bpe: Some(bpe.unwrap()), - } - } - pub fn encode(&self, input: &str) -> Vec { self.bpe.as_ref().unwrap().encode(&input) } + + pub fn decode(&self, tokens: Vec) -> Vec { + self.bpe.as_ref().unwrap().decode_bytes(tokens) + } +} + +#[wasm_bindgen] +pub fn get_encoding(encoding: TiktokenEmbedding) -> Tiktoken { + Tiktoken::new(encoding.as_string().unwrap().as_str()) +} + +#[wasm_bindgen] +pub fn encoding_for_model(encoding: TiktokenModel) -> Tiktoken { + let encoding = match encoding.as_string().unwrap().as_str() { + "text-davinci-003" => "p50k_base", + "text-davinci-002" => "p50k_base", + "text-davinci-001" => "r50k_base", + "text-curie-001" => "r50k_base", + "text-babbage-001" => "r50k_base", + "text-ada-001" => "r50k_base", + "davinci" => "r50k_base", + "curie" => "r50k_base", + "babbage" => "r50k_base", + "ada" => "r50k_base", + "code-davinci-002" => "p50k_base", + "code-davinci-001" => "p50k_base", + "code-cushman-002" => "p50k_base", + "code-cushman-001" => "p50k_base", + "davinci-codex" => "p50k_base", + "cushman-codex" => "p50k_base", + "text-davinci-edit-001" => "p50k_edit", + "code-davinci-edit-001" => "p50k_edit", + "text-embedding-ada-002" => "cl100k_base", + "text-similarity-davinci-001" => "r50k_base", + "text-similarity-curie-001" => "r50k_base", + "text-similarity-babbage-001" => "r50k_base", + "text-similarity-ada-001" => "r50k_base", + "text-search-davinci-doc-001" => "r50k_base", + "text-search-curie-doc-001" => "r50k_base", + "text-search-babbage-doc-001" => "r50k_base", + "text-search-ada-doc-001" => "r50k_base", + "code-search-babbage-code-001" => "r50k_base", + "code-search-ada-code-001" => "r50k_base", + "gpt2" => "gpt2", + &_ => "", + }; + + Tiktoken::new(encoding) } fn _byte_pair_merge(piece: &[u8], ranks: &HashMap, usize>) -> Vec> { diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 00000000..fb57ccd1 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,4 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + From 97d8dea88f6784916c62aba3c7f98af5be6d55f5 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Wed, 22 Feb 2023 16:19:50 +0100 Subject: [PATCH 016/207] improve error handling --- jni/src/lib.rs | 132 ++++++++++++++++++++++++++++--------------------- 1 file changed, 77 insertions(+), 55 deletions(-) diff --git a/jni/src/lib.rs b/jni/src/lib.rs index 209f23b9..32f2b340 100644 --- a/jni/src/lib.rs +++ b/jni/src/lib.rs @@ -15,40 +15,62 @@ use jni::sys::{jarray, jlong}; use _tiktoken_core::{self, CoreBPENative}; -#[no_mangle] -pub extern "system" fn Java_tiktoken_Encoding_init(env: JNIEnv, obj: JObject, model_name: JString) { - // First, we have to get the string out of Java. Check out the `strings` - // module for more info on how this works. - let model_name: String = env - .get_string(model_name) - .expect("Unable to get Java model name") - .into(); - - let encoding_name = _tiktoken_core::openai_public::MODEL_TO_ENCODING - .get(&model_name) - .expect("Unable to find model"); +use jni::errors::Error; - // TODO: this is actually mergable_ranks (lazy) - let mut encoding = _tiktoken_core::openai_public::REGISTRY - .get(encoding_name) - .expect("Unable to find encoding"); - - // TODO: initialize the CoreBPE object +fn unwrap_or_throw(env: &JNIEnv, result: Result, default: T) -> T { + // Check if an exception is already thrown + if env.exception_check().unwrap() { + return default; + } - // TODO: this should be CoreBPE + match result { + Ok(tokenizer) => tokenizer, + Err(error) => { + let exception_class = env + .find_class("java/lang/Exception") + .unwrap(); + env.throw_new(exception_class, format!("{}", error)) + .unwrap(); + default + } + } +} - let bpe_native = CoreBPENative::new( - encoding.get().unwrap(), - encoding.special_tokens.clone(), - &encoding.pat_str, - ) - .unwrap(); +#[no_mangle] +pub extern "system" fn Java_tiktoken_Encoding_init(env: JNIEnv, obj: JObject, model_name: JString) { + let result = || -> Result<(), Error> { + // First, we have to get the string out of Java. Check out the `strings` + // module for more info on how this works. + let model_name: String = env + .get_string(model_name)? + .into(); + + let encoding_name = _tiktoken_core::openai_public::MODEL_TO_ENCODING + .get(&model_name) + .expect("Unable to find model"); + + // TODO: this is actually mergable_ranks (lazy) + let mut encoding = _tiktoken_core::openai_public::REGISTRY + .get(encoding_name) + .expect("Unable to find encoding"); + + // TODO: initialize the CoreBPE object + + // TODO: this should be CoreBPE + + let bpe_native = CoreBPENative::new( + encoding.get().unwrap(), + encoding.special_tokens.clone(), + &encoding.pat_str, + ) + .unwrap(); - unsafe { - env.set_rust_field(obj, "handle", bpe_native).unwrap(); - } + Ok(unsafe { + env.set_rust_field(obj, "handle", bpe_native).unwrap(); + }) + }(); - // env.set_field(obj, "handle", "J", jni::objects::JValue::Long(encoding_ptr)).expect("Unable to store handle"); + unwrap_or_throw(&env, result, ()) } #[no_mangle] @@ -66,35 +88,35 @@ pub extern "system" fn Java_tiktoken_Encoding_encode( allowedSpecialTokens: jarray, maxTokenLength: jlong, ) -> jarray { - let encoding: MutexGuard = unsafe { env.get_rust_field(obj, "handle").unwrap() }; - - let enc = encoding; - let input: String = env - .get_string(text) - .expect("Couldn't get java string!") - .into(); - - let len = env.get_array_length(allowedSpecialTokens).unwrap(); - let mut strings: Vec = Vec::with_capacity(len as usize); - for i in 0..len { - let element: JObject = env - .get_object_array_element(allowedSpecialTokens, i) - .unwrap(); - let current: String = env.get_string(element.into()).unwrap().into(); - strings.push(current); - } + let result = || -> Result { + let encoding: MutexGuard = unsafe { env.get_rust_field(obj, "handle")? }; - let v2: HashSet<&str> = strings.iter().map(|s| &**s).collect(); + let enc = encoding; + let input: String = env + .get_string(text)? + .into(); - let (tokens, _, _) = enc._encode_native(&input, &v2, Some(maxTokenLength as usize)); + let len = env.get_array_length(allowedSpecialTokens)?; + let mut strings: Vec = Vec::with_capacity(len as usize); + for i in 0..len { + let element: JObject = env + .get_object_array_element(allowedSpecialTokens, i)?; + let current: String = env.get_string(element.into())?.into(); + strings.push(current); + } - let mut output = env - .new_long_array(tokens.len().try_into().unwrap()) - .unwrap(); + let v2: HashSet<&str> = strings.iter().map(|s| &**s).collect(); - let array_of_u64 = tokens.iter().map(|x| *x as i64).collect::>(); - env.set_long_array_region(output, 0, array_of_u64.as_slice()) - .unwrap(); + let (tokens, _, _) = enc._encode_native(&input, &v2, Some(maxTokenLength as usize)); + + let mut output = env + .new_long_array(tokens.len().try_into().unwrap())?; + + let array_of_u64 = tokens.iter().map(|x| *x as i64).collect::>(); + env.set_long_array_region(output, 0, array_of_u64.as_slice())?; + + Ok(output) + }(); - output + unwrap_or_throw(&env, result, JObject::null().into_raw()) } From 3139d04e126f596eb6838b58732d830ffa5f63a7 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 23 Feb 2023 04:13:01 +0100 Subject: [PATCH 017/207] chore: add README --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b86b5362..f2354535 100644 --- a/README.md +++ b/README.md @@ -17,12 +17,22 @@ assert( const enc = encoding_for_model("text-davinci-003"); ``` -The open source version of `tiktoken` can be installed from PyPI: +The open source version of `tiktoken` can be installed from NPM: ``` npm install @dqbd/tiktoken ``` +Please note there are some missing features which are present in the Python version but not in the JS version. + ## Acknowledgements - https://github.com/zurawiki/tiktoken-rs + +## Tasks to do before creating an upstream PR + +1. Add back the pyo3 bindings, so we can build both Python version and JS version at the same time +2. Allow loading of embeddings via an argument. This is needed to make the resulting WASM blob smaller, as it is currently inlined during build. +3. Examine the possibility of reintroduction of multithreading (not sure, if that is even needed however due to the sheer perf. difference between other JS libraries) +4. Feature parity match - adding special tokens support etc. +5. Investigate better packaging support for browsers and other runtimes. \ No newline at end of file From 02d132e941e75d0d8fa3fc02a50e5f27f1ab23a3 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 23 Feb 2023 12:28:47 +0100 Subject: [PATCH 018/207] feat: add option to extend special tokens and to provide custom bfe --- Cargo.toml | 8 +- src/lib.rs | 266 ++++++++++++++++++++++++++++++++++------------------- 2 files changed, 180 insertions(+), 94 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8a8243d5..34b1c8fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,8 @@ wasm-bindgen = "0.2.83" js-sys = "0.3.61" anyhow = "1.0.69" base64 = "0.21.0" +gloo-utils = { version = "0.1", features = ["serde"] } +serde = { version = "1.0", features = ["derive"] } # tiktoken dependencies fancy-regex = "0.10.0" @@ -22,4 +24,8 @@ bstr = "1.0.1" [profile.release] incremental = true -opt-level = "s" \ No newline at end of file +opt-level = "s" + +[features] +default = ["inline"] +inline = [] \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 22863137..a1fe9949 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,90 +1,47 @@ use anyhow::{anyhow, Result}; use base64::{engine::general_purpose, Engine as _}; use fancy_regex::Regex; +use gloo_utils::format::JsValueSerdeExt; use rustc_hash::FxHashMap as HashMap; use std::collections::HashSet; use wasm_bindgen::prelude::*; +#[cfg(feature = "inline")] const ENDOFTEXT: &'static str = "<|endoftext|>"; +#[cfg(feature = "inline")] const FIM_PREFIX: &'static str = "<|fim_prefix|>"; +#[cfg(feature = "inline")] const FIM_MIDDLE: &'static str = "<|fim_middle|>"; +#[cfg(feature = "inline")] const FIM_SUFFIX: &'static str = "<|fim_suffix|>"; +#[cfg(feature = "inline")] const ENDOFPROMPT: &'static str = "<|endofprompt|>"; -#[wasm_bindgen(typescript_custom_section)] -const TS_APPEND_CONTENT: &'static str = r#" -export type TiktokenEmbedding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; -export type TiktokenModel = - | "text-davinci-003" - | "text-davinci-002" - | "text-davinci-001" - | "text-curie-001" - | "text-babbage-001" - | "text-ada-001" - | "davinci" - | "curie" - | "babbage" - | "ada" - | "code-davinci-002" - | "code-davinci-001" - | "code-cushman-002" - | "code-cushman-001" - | "davinci-codex" - | "cushman-codex" - | "text-davinci-edit-001" - | "code-davinci-edit-001" - | "text-embedding-ada-002" - | "text-similarity-davinci-001" - | "text-similarity-curie-001" - | "text-similarity-babbage-001" - | "text-similarity-ada-001" - | "text-search-davinci-doc-001" - | "text-search-curie-doc-001" - | "text-search-babbage-doc-001" - | "text-search-ada-doc-001" - | "code-search-babbage-code-001" - | "code-search-ada-code-001" - | "gpt2"; - -"#; - -#[wasm_bindgen] -extern "C" { - #[wasm_bindgen(typescript_type = "TiktokenEmbedding")] - pub type TiktokenEmbedding; - - #[wasm_bindgen(typescript_type = "TiktokenModel")] - pub type TiktokenModel; -} - -#[wasm_bindgen] -pub struct Tiktoken { - bpe: Option, +struct CoreBPEConstructor { + encoder: HashMap, usize>, + special_tokens: HashMap, + pat_str: String, } -#[wasm_bindgen] -impl Tiktoken { - fn new(encoding: &str) -> Self { - let bpe = match encoding { - "gpt2" => Tiktoken::gpt2(), - "r50k_base" => Tiktoken::r50k_base(), - "p50k_base" => Tiktoken::p50k_base(), - "p50k_edit" => Tiktoken::p50k_edit(), - "cl100k_base" => Tiktoken::cl100k_base(), - _ => Err(anyhow!("Invalid encoder type")), - }; - - Tiktoken { - bpe: Some(bpe.unwrap()), +impl CoreBPEConstructor { + fn new( + tiktoken_bfe: &str, + special_tokens: Option>, + pat_str: &str, + ) -> Self { + CoreBPEConstructor { + encoder: CoreBPEConstructor::parse_bfe(tiktoken_bfe).unwrap(), + special_tokens: special_tokens.unwrap_or_default(), + pat_str: String::from(pat_str), } } - fn get_encoder(tiktoken_bfe: &str) -> Result, usize>> { + fn parse_bfe(tiktoken_bfe: &str) -> Result, usize>> { let mut encoder = HashMap::default(); for line in tiktoken_bfe.lines() { let mut parts = line.split(' '); @@ -96,54 +53,59 @@ impl Tiktoken { Ok(encoder) } - fn gpt2() -> Result { + #[cfg(feature = "inline")] + fn gpt2() -> Self { let mut special_tokens = HashMap::default(); special_tokens.insert(String::from(ENDOFTEXT), 50256); - CoreBPE::new( - Tiktoken::get_encoder(include_str!("../ranks/gpt2.tiktoken")).unwrap(), - special_tokens, + CoreBPEConstructor::new( + include_str!("../ranks/gpt2.tiktoken"), + Some(special_tokens), "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) } - fn r50k_base() -> Result { + #[cfg(feature = "inline")] + fn r50k_base() -> Self { let mut special_tokens = HashMap::default(); special_tokens.insert(String::from(ENDOFTEXT), 50256); - CoreBPE::new( - Tiktoken::get_encoder(include_str!("../ranks/r50k_base.tiktoken")).unwrap(), - special_tokens, + CoreBPEConstructor::new( + include_str!("../ranks/r50k_base.tiktoken"), + Some(special_tokens), "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) } - fn p50k_base() -> Result { + #[cfg(feature = "inline")] + fn p50k_base() -> Self { let mut special_tokens = HashMap::default(); special_tokens.insert(String::from(ENDOFTEXT), 50256); - CoreBPE::new( - Tiktoken::get_encoder(include_str!("../ranks/p50k_base.tiktoken")).unwrap(), - special_tokens, + CoreBPEConstructor::new( + include_str!("../ranks/p50k_base.tiktoken"), + Some(special_tokens), "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) } - fn p50k_edit() -> Result { + #[cfg(feature = "inline")] + fn p50k_edit() -> Self { let mut special_tokens = HashMap::default(); special_tokens.insert(String::from(ENDOFTEXT), 50256); special_tokens.insert(String::from(FIM_PREFIX), 50281); special_tokens.insert(String::from(FIM_MIDDLE), 50282); special_tokens.insert(String::from(FIM_SUFFIX), 50283); - CoreBPE::new( - Tiktoken::get_encoder(include_str!("../ranks/p50k_base.tiktoken")).unwrap(), - special_tokens, + CoreBPEConstructor::new( + include_str!("../ranks/p50k_base.tiktoken"), + Some(special_tokens), "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) } - fn cl100k_base() -> Result { + #[cfg(feature = "inline")] + fn cl100k_base() -> Self { let mut special_tokens = HashMap::default(); special_tokens.insert(String::from(ENDOFTEXT), 100257); special_tokens.insert(String::from(FIM_PREFIX), 100258); @@ -151,30 +113,143 @@ impl Tiktoken { special_tokens.insert(String::from(FIM_SUFFIX), 100260); special_tokens.insert(String::from(ENDOFPROMPT), 100276); - CoreBPE::new( - Tiktoken::get_encoder(include_str!("../ranks/cl100k_base.tiktoken")).unwrap(), - special_tokens, + CoreBPEConstructor::new( + include_str!("../ranks/cl100k_base.tiktoken"), + Some(special_tokens), "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", ) } +} + +#[wasm_bindgen] +pub struct Tiktoken { + bpe: CoreBPE, +} + +#[wasm_bindgen] +impl Tiktoken { + #[wasm_bindgen(constructor)] + pub fn new(tiktoken_bfe: &str, special_tokens: JsValue, pat_str: &str) -> Self { + let constructor = CoreBPEConstructor::new( + tiktoken_bfe, + special_tokens.into_serde::>().ok(), + pat_str, + ); + + Tiktoken { + bpe: CoreBPE::new( + constructor.encoder, + constructor.special_tokens, + &constructor.pat_str, + ) + .unwrap(), + } + } + + #[cfg(feature = "inline")] + fn with_encoding( + encoding: &str, + extend_special_tokens: &Option>, + ) -> Self { + let mut bpe: CoreBPEConstructor = match encoding { + "gpt2" => CoreBPEConstructor::gpt2(), + "r50k_base" => CoreBPEConstructor::r50k_base(), + "p50k_base" => CoreBPEConstructor::p50k_base(), + "p50k_edit" => CoreBPEConstructor::p50k_edit(), + "cl100k_base" => CoreBPEConstructor::cl100k_base(), + &_ => unreachable!(), + }; + + match extend_special_tokens { + Some(tokens) => bpe.special_tokens.extend(tokens.clone()), + _ => (), + }; + + Tiktoken { + bpe: CoreBPE::new(bpe.encoder, bpe.special_tokens, &bpe.pat_str).unwrap(), + } + } pub fn encode(&self, input: &str) -> Vec { - self.bpe.as_ref().unwrap().encode(&input) + self.bpe.encode(&input) } pub fn decode(&self, tokens: Vec) -> Vec { - self.bpe.as_ref().unwrap().decode_bytes(tokens) + self.bpe.decode_bytes(tokens) } } -#[wasm_bindgen] -pub fn get_encoding(encoding: TiktokenEmbedding) -> Tiktoken { - Tiktoken::new(encoding.as_string().unwrap().as_str()) +#[cfg(feature = "inline")] +#[wasm_bindgen(typescript_custom_section)] +const _: &'static str = r#" +export type TiktokenEmbedding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; + +/** + * @param {TiktokenEmbedding} encoding + * @param {Record} [extend_special_tokens] + * @returns {Tiktoken} + */ +export function get_encoding(encoding: TiktokenEmbedding, extend_special_tokens?: Record): Tiktoken; +"#; + +#[cfg(feature = "inline")] +#[wasm_bindgen(skip_typescript)] +pub fn get_encoding(encoding: &str, extend_special_tokens: JsValue) -> Tiktoken { + Tiktoken::with_encoding( + encoding, + &extend_special_tokens + .into_serde::>() + .ok(), + ) } -#[wasm_bindgen] -pub fn encoding_for_model(encoding: TiktokenModel) -> Tiktoken { - let encoding = match encoding.as_string().unwrap().as_str() { +#[cfg(feature = "inline")] +#[wasm_bindgen(typescript_custom_section)] +const _: &'static str = r#" +export type TiktokenModel = + | "text-davinci-003" + | "text-davinci-002" + | "text-davinci-001" + | "text-curie-001" + | "text-babbage-001" + | "text-ada-001" + | "davinci" + | "curie" + | "babbage" + | "ada" + | "code-davinci-002" + | "code-davinci-001" + | "code-cushman-002" + | "code-cushman-001" + | "davinci-codex" + | "cushman-codex" + | "text-davinci-edit-001" + | "code-davinci-edit-001" + | "text-embedding-ada-002" + | "text-similarity-davinci-001" + | "text-similarity-curie-001" + | "text-similarity-babbage-001" + | "text-similarity-ada-001" + | "text-search-davinci-doc-001" + | "text-search-curie-doc-001" + | "text-search-babbage-doc-001" + | "text-search-ada-doc-001" + | "code-search-babbage-code-001" + | "code-search-ada-code-001" + | "gpt2"; + +/** + * @param {TiktokenModel} encoding + * @param {Record} [extend_special_tokens] + * @returns {Tiktoken} + */ +export function encoding_for_model(model: TiktokenModel, extend_special_tokens?: Record): Tiktoken; +"#; + +#[cfg(feature = "inline")] +#[wasm_bindgen(skip_typescript)] +pub fn encoding_for_model(model: &str, extend_special_tokens: JsValue) -> Tiktoken { + let encoding = match model { "text-davinci-003" => "p50k_base", "text-davinci-002" => "p50k_base", "text-davinci-001" => "r50k_base", @@ -208,7 +283,12 @@ pub fn encoding_for_model(encoding: TiktokenModel) -> Tiktoken { &_ => "", }; - Tiktoken::new(encoding) + Tiktoken::with_encoding( + encoding, + &extend_special_tokens + .into_serde::>() + .ok(), + ) } fn _byte_pair_merge(piece: &[u8], ranks: &HashMap, usize>) -> Vec> { From 3a39f247647e4961265f96bc30a4a82f1401d40e Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 23 Feb 2023 23:28:19 +0100 Subject: [PATCH 019/207] Bump version, update README --- README.md | 35 +++++++++++++++++++++++++++++------ package.json | 2 +- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f2354535..f2c7c5fb 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,18 @@ tiktoken is a [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with OpenAI's models, forked from the original tiktoken library to provide NPM bindings for Node and other JS runtimes. +The open source version of `tiktoken` can be installed from NPM: + +``` +npm install @dqbd/tiktoken +``` + +> Please note there are some missing features which are present in the Python version but not in the JS version. + +## Usage + +Basic usage follows: + ```typescript import assert from "node:assert"; import { get_encoding, encoding_for_model } from "@dqbd/tiktoken"; @@ -15,15 +27,26 @@ assert( // To get the tokeniser corresponding to a specific model in the OpenAI API: const enc = encoding_for_model("text-davinci-003"); + +// Extend existing encoding with custom special tokens +const enc = encoding_for_model("gpt2", { + "<|im_start|>": 100264, + "<|im_end|>": 100265, +}); ``` -The open source version of `tiktoken` can be installed from NPM: +If desired, you can create a Tiktoken instance directly with custom ranks, special tokens and regex pattern: -``` -npm install @dqbd/tiktoken -``` +```typescript +import { Tiktoken } from "../pkg"; +import { readFileSync } from "fs"; -Please note there are some missing features which are present in the Python version but not in the JS version. +const encoder = new Tiktoken( + readFileSync("./ranks/gpt2.tiktoken").toString("utf-8"), + { "<|endoftext|>": 50256, "<|im_start|>": 100264, "<|im_end|>": 100265 }, + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+" +); +``` ## Acknowledgements @@ -35,4 +58,4 @@ Please note there are some missing features which are present in the Python vers 2. Allow loading of embeddings via an argument. This is needed to make the resulting WASM blob smaller, as it is currently inlined during build. 3. Examine the possibility of reintroduction of multithreading (not sure, if that is even needed however due to the sheer perf. difference between other JS libraries) 4. Feature parity match - adding special tokens support etc. -5. Investigate better packaging support for browsers and other runtimes. \ No newline at end of file +5. Investigate better packaging support for browsers and other runtimes. diff --git a/package.json b/package.json index 169fc77d..1a9f6dcf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "0.2.1", + "version": "0.3.0", "description": "Javascript bindings for tiktoken", "files": [ "dist/**/*", From d0dc32bb6d3f27b307c30e69ad1c53d70235a0ed Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 23 Feb 2023 23:31:08 +0100 Subject: [PATCH 020/207] Improve error handling, add support for parameters --- src/lib.rs | 242 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 177 insertions(+), 65 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a1fe9949..bd2aed16 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,10 @@ -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Error}; use base64::{engine::general_purpose, Engine as _}; use fancy_regex::Regex; use gloo_utils::format::JsValueSerdeExt; use rustc_hash::FxHashMap as HashMap; use std::collections::HashSet; - +use std::result::Result; use wasm_bindgen::prelude::*; #[cfg(feature = "inline")] @@ -41,7 +41,7 @@ impl CoreBPEConstructor { } } - fn parse_bfe(tiktoken_bfe: &str) -> Result, usize>> { + fn parse_bfe(tiktoken_bfe: &str) -> Result, usize>, Error> { let mut encoder = HashMap::default(); for line in tiktoken_bfe.lines() { let mut parts = line.split(' '); @@ -123,6 +123,7 @@ impl CoreBPEConstructor { #[wasm_bindgen] pub struct Tiktoken { + special_tokens_set: HashSet, bpe: CoreBPE, } @@ -137,6 +138,11 @@ impl Tiktoken { ); Tiktoken { + special_tokens_set: constructor + .special_tokens + .keys() + .map(|s| s.clone()) + .collect(), bpe: CoreBPE::new( constructor.encoder, constructor.special_tokens, @@ -150,33 +156,140 @@ impl Tiktoken { fn with_encoding( encoding: &str, extend_special_tokens: &Option>, - ) -> Self { - let mut bpe: CoreBPEConstructor = match encoding { - "gpt2" => CoreBPEConstructor::gpt2(), - "r50k_base" => CoreBPEConstructor::r50k_base(), - "p50k_base" => CoreBPEConstructor::p50k_base(), - "p50k_edit" => CoreBPEConstructor::p50k_edit(), - "cl100k_base" => CoreBPEConstructor::cl100k_base(), - &_ => unreachable!(), - }; + ) -> Result { + let mut constructor: CoreBPEConstructor = match encoding { + "gpt2" => Ok(CoreBPEConstructor::gpt2()), + "r50k_base" => Ok(CoreBPEConstructor::r50k_base()), + "p50k_base" => Ok(CoreBPEConstructor::p50k_base()), + "p50k_edit" => Ok(CoreBPEConstructor::p50k_edit()), + "cl100k_base" => Ok(CoreBPEConstructor::cl100k_base()), + &_ => Err(JsError::new("Invalid encoding")), + }?; + + if let Some(tokens) = extend_special_tokens { + constructor.special_tokens.extend(tokens.clone()); + } - match extend_special_tokens { - Some(tokens) => bpe.special_tokens.extend(tokens.clone()), - _ => (), - }; + Ok(Tiktoken { + // TODO: can we avoid cloning here? + special_tokens_set: constructor + .special_tokens + .keys() + .map(|s| s.clone()) + .collect(), + bpe: CoreBPE::new( + constructor.encoder, + constructor.special_tokens, + &constructor.pat_str, + ) + .unwrap(), + }) + } - Tiktoken { - bpe: CoreBPE::new(bpe.encoder, bpe.special_tokens, &bpe.pat_str).unwrap(), - } + pub fn encode( + &self, + text: &str, + allowed_special: JsValue, + disallowed_special: JsValue, + ) -> Result, JsError> { + let allowed_tokens = + self.validate_allowed_tokens(text, &allowed_special, &disallowed_special)?; + + Ok(self + .bpe + .encode(&text, allowed_tokens.iter().map(AsRef::as_ref).collect())) } - pub fn encode(&self, input: &str) -> Vec { - self.bpe.encode(&input) + pub fn encode_ordinary(&self, text: &str) -> Vec { + self.bpe.encode_ordinary(&text) + } + + pub fn encode_with_unstable( + &self, + text: &str, + allowed_special: JsValue, + disallowed_special: JsValue, + ) -> Result { + let allowed_tokens = + self.validate_allowed_tokens(text, &allowed_special, &disallowed_special)?; + + JsValue::from_serde( + &self + .bpe + .encode_with_unstable(&text, allowed_tokens.iter().map(AsRef::as_ref).collect()), + ) + .map_err(|e| { + JsError::new(&format!( + "Failed to serialize encode_with_unstable result: {}", + e + )) + }) + } + + pub fn encode_single_token(&self, bytes: &[u8]) -> usize { + self.bpe.encode_single_token(&bytes).unwrap_throw() + } + + #[wasm_bindgen(skip_typescript)] + pub fn _encode_single_piece(&self, bytes: &[u8]) -> Vec { + self.bpe.encode_single_piece(&bytes) } pub fn decode(&self, tokens: Vec) -> Vec { self.bpe.decode_bytes(tokens) } + + pub fn decode_single_token_bytes(&self, token: usize) -> Vec { + self.bpe.decode_single_token_bytes(token).unwrap_throw() + } + + pub fn token_byte_values(&self) -> JsValue { + JsValue::from_serde(&self.bpe.token_byte_values()).unwrap_throw() + } + + fn validate_allowed_tokens( + &self, + text: &str, + allowed_special_param: &JsValue, + disallowed_special_param: &JsValue, + ) -> Result, JsError> { + let allowed_special: HashSet = match allowed_special_param.as_string() { + Some(value) => match value.as_str() { + "all" => Ok(self.special_tokens_set.clone()), + _ => Err(JsError::new("Invalid value for allowed_special")), + }, + _ => Ok(JsValue::into_serde(&allowed_special_param).unwrap_or_default()), + }?; + + let disallowed_special: HashSet = match disallowed_special_param.as_string() { + Some(value) => match value.as_str() { + "all" => Ok(&self.special_tokens_set - &allowed_special), + _ => Err(JsError::new("Invalid value for disallowed_special")), + }, + _ => Ok(JsValue::into_serde(&disallowed_special_param).unwrap_or_default()), + }?; + + if !disallowed_special.is_empty() { + if let Some(found) = Tiktoken::special_token_regex(&disallowed_special).find(text)? { + return Err(JsError::new(&format!( + "The text contains a special token that is not allowed: {}", + found.as_str() + ))); + } + } + + return Ok(allowed_special); + } + + fn special_token_regex(tokens: &HashSet) -> Regex { + let inner = tokens + .iter() + .map(|token| regex::escape(token)) + .collect::>() + .join("|"); + + Regex::new(&format!("({})", inner)).unwrap_throw() + } } #[cfg(feature = "inline")] @@ -194,7 +307,7 @@ export function get_encoding(encoding: TiktokenEmbedding, extend_special_tokens? #[cfg(feature = "inline")] #[wasm_bindgen(skip_typescript)] -pub fn get_encoding(encoding: &str, extend_special_tokens: JsValue) -> Tiktoken { +pub fn get_encoding(encoding: &str, extend_special_tokens: JsValue) -> Result { Tiktoken::with_encoding( encoding, &extend_special_tokens @@ -248,40 +361,45 @@ export function encoding_for_model(model: TiktokenModel, extend_special_tokens?: #[cfg(feature = "inline")] #[wasm_bindgen(skip_typescript)] -pub fn encoding_for_model(model: &str, extend_special_tokens: JsValue) -> Tiktoken { +pub fn encoding_for_model( + model: &str, + extend_special_tokens: JsValue, +) -> Result { let encoding = match model { - "text-davinci-003" => "p50k_base", - "text-davinci-002" => "p50k_base", - "text-davinci-001" => "r50k_base", - "text-curie-001" => "r50k_base", - "text-babbage-001" => "r50k_base", - "text-ada-001" => "r50k_base", - "davinci" => "r50k_base", - "curie" => "r50k_base", - "babbage" => "r50k_base", - "ada" => "r50k_base", - "code-davinci-002" => "p50k_base", - "code-davinci-001" => "p50k_base", - "code-cushman-002" => "p50k_base", - "code-cushman-001" => "p50k_base", - "davinci-codex" => "p50k_base", - "cushman-codex" => "p50k_base", - "text-davinci-edit-001" => "p50k_edit", - "code-davinci-edit-001" => "p50k_edit", - "text-embedding-ada-002" => "cl100k_base", - "text-similarity-davinci-001" => "r50k_base", - "text-similarity-curie-001" => "r50k_base", - "text-similarity-babbage-001" => "r50k_base", - "text-similarity-ada-001" => "r50k_base", - "text-search-davinci-doc-001" => "r50k_base", - "text-search-curie-doc-001" => "r50k_base", - "text-search-babbage-doc-001" => "r50k_base", - "text-search-ada-doc-001" => "r50k_base", - "code-search-babbage-code-001" => "r50k_base", - "code-search-ada-code-001" => "r50k_base", - "gpt2" => "gpt2", - &_ => "", - }; + "text-davinci-003" => Ok("p50k_base"), + "text-davinci-002" => Ok("p50k_base"), + "text-davinci-001" => Ok("r50k_base"), + "text-curie-001" => Ok("r50k_base"), + "text-babbage-001" => Ok("r50k_base"), + "text-ada-001" => Ok("r50k_base"), + "davinci" => Ok("r50k_base"), + "curie" => Ok("r50k_base"), + "babbage" => Ok("r50k_base"), + "ada" => Ok("r50k_base"), + "code-davinci-002" => Ok("p50k_base"), + "code-davinci-001" => Ok("p50k_base"), + "code-cushman-002" => Ok("p50k_base"), + "code-cushman-001" => Ok("p50k_base"), + "davinci-codex" => Ok("p50k_base"), + "cushman-codex" => Ok("p50k_base"), + "text-davinci-edit-001" => Ok("p50k_edit"), + "code-davinci-edit-001" => Ok("p50k_edit"), + "text-embedding-ada-002" => Ok("cl100k_base"), + "text-similarity-davinci-001" => Ok("r50k_base"), + "text-similarity-curie-001" => Ok("r50k_base"), + "text-similarity-babbage-001" => Ok("r50k_base"), + "text-similarity-ada-001" => Ok("r50k_base"), + "text-search-davinci-doc-001" => Ok("r50k_base"), + "text-search-curie-doc-001" => Ok("r50k_base"), + "text-search-babbage-doc-001" => Ok("r50k_base"), + "text-search-ada-doc-001" => Ok("r50k_base"), + "code-search-babbage-code-001" => Ok("r50k_base"), + "code-search-ada-code-001" => Ok("r50k_base"), + "gpt2" => Ok("gpt2"), + model => Err(JsError::new( + format!("Invalid model: {}", model.to_string()).as_str(), + )), + }?; Tiktoken::with_encoding( encoding, @@ -656,7 +774,7 @@ impl CoreBPE { encoder: HashMap, usize>, special_tokens_encoder: HashMap, pattern: &str, - ) -> Result { + ) -> Result { let regex = Regex::new(pattern)?; let special_regex = { @@ -700,13 +818,7 @@ impl CoreBPE { self._encode_ordinary_native(text) } - fn encode(&self, text: &str) -> Vec { - let allowed_special = self - .special_tokens_encoder - .keys() - .map(|s| s.as_str()) - .collect(); - + fn encode(&self, text: &str, allowed_special: HashSet<&str>) -> Vec { self._encode_native(text, &allowed_special).0 } @@ -745,7 +857,7 @@ impl CoreBPE { self._encode_unstable_native(text, &allowed_special) } - fn encode_single_token(&self, piece: &[u8]) -> Result { + fn encode_single_token(&self, piece: &[u8]) -> Result { if let Some(token) = self.encoder.get(piece).copied() { return Ok(token); } @@ -772,7 +884,7 @@ impl CoreBPE { self._decode_native(&tokens) } - fn decode_single_token_bytes(&self, token: usize) -> Result> { + fn decode_single_token_bytes(&self, token: usize) -> Result, Error> { if let Some(bytes) = self.decoder.get(&token) { return Ok(bytes.clone()); } From 90ee9f69fb8261dfe28a22c49b25d68eb8a18d7a Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 24 Feb 2023 00:43:29 +0100 Subject: [PATCH 021/207] Fix `any` in TS files, add core tests --- package.json | 16 +- scripts/override_any.ts | 38 ++ tests/test_simple_public.test.ts | 51 ++ yarn.lock | 953 +++++++++++++++++++++++++++++++ 4 files changed, 1054 insertions(+), 4 deletions(-) create mode 100644 scripts/override_any.ts create mode 100644 tests/test_simple_public.test.ts diff --git a/package.json b/package.json index 1a9f6dcf..25f7231c 100644 --- a/package.json +++ b/package.json @@ -14,11 +14,19 @@ "type": "git", "url": "https://github.com/dqbd/tiktoken" }, - "devDependencies": {}, + "devDependencies": { + "ts-morph": "^17.0.1", + "tsx": "^3.12.3", + "typescript": "^4.9.5", + "vitest": "^0.28.5" + }, "scripts": { - "build": "rm -rf dist/ && npm run build:node && npm run build:bundler && npm run build:web", + "build": "rm -rf dist/ && yarn run build:node && yarn run build:bundler && yarn run build:web && yarn run build:cleanup", "build:bundler": "wasm-pack build --target bundler --release --out-dir dist/bundler && rm dist/bundler/.gitignore", "build:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm dist/node/.gitignore", - "build:web": "wasm-pack build --target no-modules --release --out-dir dist/web && rm dist/web/.gitignore" - } + "build:web": "wasm-pack build --target no-modules --release --out-dir dist/web && rm dist/web/.gitignore", + "build:cleanup": "tsx scripts/override_any.ts", + "test": "yarn vitest" + }, + "dependencies": {} } diff --git a/scripts/override_any.ts b/scripts/override_any.ts new file mode 100644 index 00000000..0974e9b8 --- /dev/null +++ b/scripts/override_any.ts @@ -0,0 +1,38 @@ +import { Project, ts } from "ts-morph"; + +const project = new Project(); +project.addSourceFilesAtPaths("./dist/**/*.ts"); + +for (const filename of [ + "./dist/bundler/_tiktoken.d.ts", + "./dist/node/_tiktoken.d.ts", + "./dist/web/_tiktoken.d.ts", +]) { + const sourceFile = project.getSourceFileOrThrow(filename); + const cls = sourceFile.getFirstDescendantByKindOrThrow( + ts.SyntaxKind.ClassDeclaration + ); + + cls + .getConstructors()[0] + .getParameterOrThrow("special_tokens") + .set({ type: "Record" }); + + for (const method of ["encode", "encode_with_unstable"]) { + cls + .getMethodOrThrow(method) + .getParameterOrThrow("allowed_special") + .set({ type: `"all" | string[]`, hasQuestionToken: true }); + + cls + .getMethodOrThrow(method) + .getParameterOrThrow("disallowed_special") + .set({ type: `"all" | string[]`, hasQuestionToken: true }); + } + + cls + .getMemberOrThrow("token_byte_values") + .set({ returnType: "Array>" }); + + sourceFile.saveSync(); +} diff --git a/tests/test_simple_public.test.ts b/tests/test_simple_public.test.ts new file mode 100644 index 00000000..115a9467 --- /dev/null +++ b/tests/test_simple_public.test.ts @@ -0,0 +1,51 @@ +import { it, expect } from "vitest"; +import { encoding_for_model, get_encoding } from "../"; + +it("encoding_for_model initialization", () => { + expect(() => encoding_for_model("gpt2")).not.toThrowError(); + + // @ts-expect-error + expect(() => encoding_for_model("gpt2-unknown")).toThrowError( + "Invalid model" + ); +}); + +it("get_encoding initialization", () => { + expect(() => get_encoding("cl100k_base")).not.toThrowError(); + + // @ts-expect-error + expect(() => get_encoding("unknown")).toThrowError("Invalid encoding"); +}); + +it("test_simple", () => { + const enc = get_encoding("gpt2"); + expect(enc.encode("hello world")).toStrictEqual( + new Uint32Array([31373, 995]) + ); + + expect( + new TextDecoder().decode(enc.decode(new Uint32Array([31373, 995]))) + ).toStrictEqual("hello world"); + + expect(enc.encode("hello <|endoftext|>", "all")).toStrictEqual( + new Uint32Array([31373, 220, 50256]) + ); +}); + +it("test_simple", () => { + const decoder = new TextDecoder(); + const enc = get_encoding("cl100k_base"); + expect(enc.encode("hello world")).toStrictEqual( + new Uint32Array([15339, 1917]) + ); +}); + +it("test_custom_tokens", () => { + const enc = encoding_for_model("gpt2", { + "<|im_start|>": 100264, + "<|im_end|>": 100265, + }); + expect(enc.encode("<|im_start|>test<|im_end|>", "all")).toStrictEqual( + new Uint32Array([100264, 9288, 100265]) + ); +}); diff --git a/yarn.lock b/yarn.lock index fb57ccd1..c4f5d085 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2,3 +2,956 @@ # yarn lockfile v1 +"@esbuild-kit/cjs-loader@^2.4.2": + version "2.4.2" + resolved "https://registry.yarnpkg.com/@esbuild-kit/cjs-loader/-/cjs-loader-2.4.2.tgz#cb4dde00fbf744a68c4f20162ea15a8242d0fa54" + integrity sha512-BDXFbYOJzT/NBEtp71cvsrGPwGAMGRB/349rwKuoxNSiKjPraNNnlK6MIIabViCjqZugu6j+xeMDlEkWdHHJSg== + dependencies: + "@esbuild-kit/core-utils" "^3.0.0" + get-tsconfig "^4.4.0" + +"@esbuild-kit/core-utils@^3.0.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@esbuild-kit/core-utils/-/core-utils-3.1.0.tgz#49945d533dbd5e1b7620aa0fc522c15e6ec089c5" + integrity sha512-Uuk8RpCg/7fdHSceR1M6XbSZFSuMrxcePFuGgyvsBn+u339dk5OeL4jv2EojwTN2st/unJGsVm4qHWjWNmJ/tw== + dependencies: + esbuild "~0.17.6" + source-map-support "^0.5.21" + +"@esbuild-kit/esm-loader@^2.5.5": + version "2.5.5" + resolved "https://registry.yarnpkg.com/@esbuild-kit/esm-loader/-/esm-loader-2.5.5.tgz#b82da14fcee3fc1d219869756c06f43f67d1ca71" + integrity sha512-Qwfvj/qoPbClxCRNuac1Du01r9gvNOT+pMYtJDapfB1eoGN1YlJ1BixLyL9WVENRx5RXgNLdfYdx/CuswlGhMw== + dependencies: + "@esbuild-kit/core-utils" "^3.0.0" + get-tsconfig "^4.4.0" + +"@esbuild/android-arm64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.16.17.tgz#cf91e86df127aa3d141744edafcba0abdc577d23" + integrity sha512-MIGl6p5sc3RDTLLkYL1MyL8BMRN4tLMRCn+yRJJmEDvYZ2M7tmAf80hx1kbNEUX2KJ50RRtxZ4JHLvCfuB6kBg== + +"@esbuild/android-arm64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.17.10.tgz#ad2ee47dd021035abdfb0c38848ff77a1e1918c4" + integrity sha512-ht1P9CmvrPF5yKDtyC+z43RczVs4rrHpRqrmIuoSvSdn44Fs1n6DGlpZKdK6rM83pFLbVaSUwle8IN+TPmkv7g== + +"@esbuild/android-arm@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.16.17.tgz#025b6246d3f68b7bbaa97069144fb5fb70f2fff2" + integrity sha512-N9x1CMXVhtWEAMS7pNNONyA14f71VPQN9Cnavj1XQh6T7bskqiLLrSca4O0Vr8Wdcga943eThxnVp3JLnBMYtw== + +"@esbuild/android-arm@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.17.10.tgz#bb5a68af8adeb94b30eadee7307404dc5237d076" + integrity sha512-7YEBfZ5lSem9Tqpsz+tjbdsEshlO9j/REJrfv4DXgKTt1+/MHqGwbtlyxQuaSlMeUZLxUKBaX8wdzlTfHkmnLw== + +"@esbuild/android-x64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.16.17.tgz#c820e0fef982f99a85c4b8bfdd582835f04cd96e" + integrity sha512-a3kTv3m0Ghh4z1DaFEuEDfz3OLONKuFvI4Xqczqx4BqLyuFaFkuaG4j2MtA6fuWEFeC5x9IvqnX7drmRq/fyAQ== + +"@esbuild/android-x64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.17.10.tgz#751d5d8ae9ece1efa9627b689c888eb85b102360" + integrity sha512-CYzrm+hTiY5QICji64aJ/xKdN70IK8XZ6iiyq0tZkd3tfnwwSWTYH1t3m6zyaaBxkuj40kxgMyj1km/NqdjQZA== + +"@esbuild/darwin-arm64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.16.17.tgz#edef4487af6b21afabba7be5132c26d22379b220" + integrity sha512-/2agbUEfmxWHi9ARTX6OQ/KgXnOWfsNlTeLcoV7HSuSTv63E4DqtAc+2XqGw1KHxKMHGZgbVCZge7HXWX9Vn+w== + +"@esbuild/darwin-arm64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.17.10.tgz#85601ee7efb2129cd3218d5bcbe8da1173bc1e8b" + integrity sha512-3HaGIowI+nMZlopqyW6+jxYr01KvNaLB5znXfbyyjuo4lE0VZfvFGcguIJapQeQMS4cX/NEispwOekJt3gr5Dg== + +"@esbuild/darwin-x64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.16.17.tgz#42829168730071c41ef0d028d8319eea0e2904b4" + integrity sha512-2By45OBHulkd9Svy5IOCZt376Aa2oOkiE9QWUK9fe6Tb+WDr8hXL3dpqi+DeLiMed8tVXspzsTAvd0jUl96wmg== + +"@esbuild/darwin-x64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.17.10.tgz#362c7e988c61fe72d5edef4f717e4b4fc728da98" + integrity sha512-J4MJzGchuCRG5n+B4EHpAMoJmBeAE1L3wGYDIN5oWNqX0tEr7VKOzw0ymSwpoeSpdCa030lagGUfnfhS7OvzrQ== + +"@esbuild/freebsd-arm64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.16.17.tgz#1f4af488bfc7e9ced04207034d398e793b570a27" + integrity sha512-mt+cxZe1tVx489VTb4mBAOo2aKSnJ33L9fr25JXpqQqzbUIw/yzIzi+NHwAXK2qYV1lEFp4OoVeThGjUbmWmdw== + +"@esbuild/freebsd-arm64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.17.10.tgz#e8a85a46ede7c3a048a12f16b9d551d25adc8bb1" + integrity sha512-ZkX40Z7qCbugeK4U5/gbzna/UQkM9d9LNV+Fro8r7HA7sRof5Rwxc46SsqeMvB5ZaR0b1/ITQ/8Y1NmV2F0fXQ== + +"@esbuild/freebsd-x64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.16.17.tgz#636306f19e9bc981e06aa1d777302dad8fddaf72" + integrity sha512-8ScTdNJl5idAKjH8zGAsN7RuWcyHG3BAvMNpKOBaqqR7EbUhhVHOqXRdL7oZvz8WNHL2pr5+eIT5c65kA6NHug== + +"@esbuild/freebsd-x64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.17.10.tgz#cd0a1b68bffbcb5b65e65b3fd542e8c7c3edd86b" + integrity sha512-0m0YX1IWSLG9hWh7tZa3kdAugFbZFFx9XrvfpaCMMvrswSTvUZypp0NFKriUurHpBA3xsHVE9Qb/0u2Bbi/otg== + +"@esbuild/linux-arm64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.16.17.tgz#a003f7ff237c501e095d4f3a09e58fc7b25a4aca" + integrity sha512-7S8gJnSlqKGVJunnMCrXHU9Q8Q/tQIxk/xL8BqAP64wchPCTzuM6W3Ra8cIa1HIflAvDnNOt2jaL17vaW+1V0g== + +"@esbuild/linux-arm64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.17.10.tgz#13b183f432512ed9d9281cc89476caeebe9e9123" + integrity sha512-g1EZJR1/c+MmCgVwpdZdKi4QAJ8DCLP5uTgLWSAVd9wlqk9GMscaNMEViG3aE1wS+cNMzXXgdWiW/VX4J+5nTA== + +"@esbuild/linux-arm@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.16.17.tgz#b591e6a59d9c4fe0eeadd4874b157ab78cf5f196" + integrity sha512-iihzrWbD4gIT7j3caMzKb/RsFFHCwqqbrbH9SqUSRrdXkXaygSZCZg1FybsZz57Ju7N/SHEgPyaR0LZ8Zbe9gQ== + +"@esbuild/linux-arm@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.17.10.tgz#dd11e0a5faa3ea94dc80278a601c3be7b4fdf1da" + integrity sha512-whRdrrl0X+9D6o5f0sTZtDM9s86Xt4wk1bf7ltx6iQqrIIOH+sre1yjpcCdrVXntQPCNw/G+XqsD4HuxeS+2QA== + +"@esbuild/linux-ia32@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.16.17.tgz#24333a11027ef46a18f57019450a5188918e2a54" + integrity sha512-kiX69+wcPAdgl3Lonh1VI7MBr16nktEvOfViszBSxygRQqSpzv7BffMKRPMFwzeJGPxcio0pdD3kYQGpqQ2SSg== + +"@esbuild/linux-ia32@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.17.10.tgz#4d836f87b92807d9292379963c4888270d282405" + integrity sha512-1vKYCjfv/bEwxngHERp7huYfJ4jJzldfxyfaF7hc3216xiDA62xbXJfRlradiMhGZbdNLj2WA1YwYFzs9IWNPw== + +"@esbuild/linux-loong64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.16.17.tgz#d5ad459d41ed42bbd4d005256b31882ec52227d8" + integrity sha512-dTzNnQwembNDhd654cA4QhbS9uDdXC3TKqMJjgOWsC0yNCbpzfWoXdZvp0mY7HU6nzk5E0zpRGGx3qoQg8T2DQ== + +"@esbuild/linux-loong64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.17.10.tgz#92eb2ee200c17ef12c7fb3b648231948699e7a4c" + integrity sha512-mvwAr75q3Fgc/qz3K6sya3gBmJIYZCgcJ0s7XshpoqIAIBszzfXsqhpRrRdVFAyV1G9VUjj7VopL2HnAS8aHFA== + +"@esbuild/linux-mips64el@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.16.17.tgz#4e5967a665c38360b0a8205594377d4dcf9c3726" + integrity sha512-ezbDkp2nDl0PfIUn0CsQ30kxfcLTlcx4Foz2kYv8qdC6ia2oX5Q3E/8m6lq84Dj/6b0FrkgD582fJMIfHhJfSw== + +"@esbuild/linux-mips64el@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.17.10.tgz#14f7d50c40fe7f7ee545a9bd07c6f6e4cba5570e" + integrity sha512-XilKPgM2u1zR1YuvCsFQWl9Fc35BqSqktooumOY2zj7CSn5czJn279j9TE1JEqSqz88izJo7yE4x3LSf7oxHzg== + +"@esbuild/linux-ppc64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.16.17.tgz#206443a02eb568f9fdf0b438fbd47d26e735afc8" + integrity sha512-dzS678gYD1lJsW73zrFhDApLVdM3cUF2MvAa1D8K8KtcSKdLBPP4zZSLy6LFZ0jYqQdQ29bjAHJDgz0rVbLB3g== + +"@esbuild/linux-ppc64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.17.10.tgz#1ab5802e93ae511ce9783e1cb95f37df0f84c4af" + integrity sha512-kM4Rmh9l670SwjlGkIe7pYWezk8uxKHX4Lnn5jBZYBNlWpKMBCVfpAgAJqp5doLobhzF3l64VZVrmGeZ8+uKmQ== + +"@esbuild/linux-riscv64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.16.17.tgz#c351e433d009bf256e798ad048152c8d76da2fc9" + integrity sha512-ylNlVsxuFjZK8DQtNUwiMskh6nT0vI7kYl/4fZgV1llP5d6+HIeL/vmmm3jpuoo8+NuXjQVZxmKuhDApK0/cKw== + +"@esbuild/linux-riscv64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.17.10.tgz#4fae25201ef7ad868731d16c8b50b0e386c4774a" + integrity sha512-r1m9ZMNJBtOvYYGQVXKy+WvWd0BPvSxMsVq8Hp4GzdMBQvfZRvRr5TtX/1RdN6Va8JMVQGpxqde3O+e8+khNJQ== + +"@esbuild/linux-s390x@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.16.17.tgz#661f271e5d59615b84b6801d1c2123ad13d9bd87" + integrity sha512-gzy7nUTO4UA4oZ2wAMXPNBGTzZFP7mss3aKR2hH+/4UUkCOyqmjXiKpzGrY2TlEUhbbejzXVKKGazYcQTZWA/w== + +"@esbuild/linux-s390x@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.17.10.tgz#126254d8335bb3586918b1ca60beb4abb46e6d54" + integrity sha512-LsY7QvOLPw9WRJ+fU5pNB3qrSfA00u32ND5JVDrn/xG5hIQo3kvTxSlWFRP0NJ0+n6HmhPGG0Q4jtQsb6PFoyg== + +"@esbuild/linux-x64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.16.17.tgz#e4ba18e8b149a89c982351443a377c723762b85f" + integrity sha512-mdPjPxfnmoqhgpiEArqi4egmBAMYvaObgn4poorpUaqmvzzbvqbowRllQ+ZgzGVMGKaPkqUmPDOOFQRUFDmeUw== + +"@esbuild/linux-x64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.17.10.tgz#7fa4667b2df81ea0538e1b75e607cf04e526ce91" + integrity sha512-zJUfJLebCYzBdIz/Z9vqwFjIA7iSlLCFvVi7glMgnu2MK7XYigwsonXshy9wP9S7szF+nmwrelNaP3WGanstEg== + +"@esbuild/netbsd-x64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.16.17.tgz#7d4f4041e30c5c07dd24ffa295c73f06038ec775" + integrity sha512-/PzmzD/zyAeTUsduZa32bn0ORug+Jd1EGGAUJvqfeixoEISYpGnAezN6lnJoskauoai0Jrs+XSyvDhppCPoKOA== + +"@esbuild/netbsd-x64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.17.10.tgz#2d24727ddc2305619685bf237a46d6087a02ee9a" + integrity sha512-lOMkailn4Ok9Vbp/q7uJfgicpDTbZFlXlnKT2DqC8uBijmm5oGtXAJy2ZZVo5hX7IOVXikV9LpCMj2U8cTguWA== + +"@esbuild/openbsd-x64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.16.17.tgz#970fa7f8470681f3e6b1db0cc421a4af8060ec35" + integrity sha512-2yaWJhvxGEz2RiftSk0UObqJa/b+rIAjnODJgv2GbGGpRwAfpgzyrg1WLK8rqA24mfZa9GvpjLcBBg8JHkoodg== + +"@esbuild/openbsd-x64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.17.10.tgz#bf3fc38ee6ecf028c1f0cfe11f61d53cc75fef12" + integrity sha512-/VE0Kx6y7eekqZ+ZLU4AjMlB80ov9tEz4H067Y0STwnGOYL8CsNg4J+cCmBznk1tMpxMoUOf0AbWlb1d2Pkbig== + +"@esbuild/sunos-x64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.16.17.tgz#abc60e7c4abf8b89fb7a4fe69a1484132238022c" + integrity sha512-xtVUiev38tN0R3g8VhRfN7Zl42YCJvyBhRKw1RJjwE1d2emWTVToPLNEQj/5Qxc6lVFATDiy6LjVHYhIPrLxzw== + +"@esbuild/sunos-x64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.17.10.tgz#8deabd6dfec6256f80bb101bc59d29dbae99c69b" + integrity sha512-ERNO0838OUm8HfUjjsEs71cLjLMu/xt6bhOlxcJ0/1MG3hNqCmbWaS+w/8nFLa0DDjbwZQuGKVtCUJliLmbVgg== + +"@esbuild/win32-arm64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.16.17.tgz#7b0ff9e8c3265537a7a7b1fd9a24e7bd39fcd87a" + integrity sha512-ga8+JqBDHY4b6fQAmOgtJJue36scANy4l/rL97W+0wYmijhxKetzZdKOJI7olaBaMhWt8Pac2McJdZLxXWUEQw== + +"@esbuild/win32-arm64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.17.10.tgz#1ec1ee04c788c4c57a83370b6abf79587b3e4965" + integrity sha512-fXv+L+Bw2AeK+XJHwDAQ9m3NRlNemG6Z6ijLwJAAVdu4cyoFbBWbEtyZzDeL+rpG2lWI51cXeMt70HA8g2MqIg== + +"@esbuild/win32-ia32@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.16.17.tgz#e90fe5267d71a7b7567afdc403dfd198c292eb09" + integrity sha512-WnsKaf46uSSF/sZhwnqE4L/F89AYNMiD4YtEcYekBt9Q7nj0DiId2XH2Ng2PHM54qi5oPrQ8luuzGszqi/veig== + +"@esbuild/win32-ia32@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.17.10.tgz#a362528d7f3ad5d44fa8710a96764677ef92ebe9" + integrity sha512-3s+HADrOdCdGOi5lnh5DMQEzgbsFsd4w57L/eLKKjMnN0CN4AIEP0DCP3F3N14xnxh3ruNc32A0Na9zYe1Z/AQ== + +"@esbuild/win32-x64@0.16.17": + version "0.16.17" + resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.16.17.tgz#c5a1a4bfe1b57f0c3e61b29883525c6da3e5c091" + integrity sha512-y+EHuSchhL7FjHgvQL/0fnnFmO4T1bhvWANX6gcnqTjtnKWbTvUMCpGnv2+t+31d7RzyEAYAd4u2fnIhHL6N/Q== + +"@esbuild/win32-x64@0.17.10": + version "0.17.10" + resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.17.10.tgz#ac779220f2da96afd480fb3f3148a292f66e7fc3" + integrity sha512-oP+zFUjYNaMNmjTwlFtWep85hvwUu19cZklB3QsBOcZSs6y7hmH4LNCJ7075bsqzYaNvZFXJlAVaQ2ApITDXtw== + +"@nodelib/fs.scandir@2.1.5": + version "2.1.5" + resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" + integrity sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g== + dependencies: + "@nodelib/fs.stat" "2.0.5" + run-parallel "^1.1.9" + +"@nodelib/fs.stat@2.0.5", "@nodelib/fs.stat@^2.0.2": + version "2.0.5" + resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz#5bd262af94e9d25bd1e71b05deed44876a222e8b" + integrity sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A== + +"@nodelib/fs.walk@^1.2.3": + version "1.2.8" + resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz#e95737e8bb6746ddedf69c556953494f196fe69a" + integrity sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg== + dependencies: + "@nodelib/fs.scandir" "2.1.5" + fastq "^1.6.0" + +"@ts-morph/common@~0.18.0": + version "0.18.1" + resolved "https://registry.yarnpkg.com/@ts-morph/common/-/common-0.18.1.tgz#ca40c3a62c3f9e17142e0af42633ad63efbae0ec" + integrity sha512-RVE+zSRICWRsfrkAw5qCAK+4ZH9kwEFv5h0+/YeHTLieWP7F4wWq4JsKFuNWG+fYh/KF+8rAtgdj5zb2mm+DVA== + dependencies: + fast-glob "^3.2.12" + minimatch "^5.1.0" + mkdirp "^1.0.4" + path-browserify "^1.0.1" + +"@types/chai-subset@^1.3.3": + version "1.3.3" + resolved "https://registry.yarnpkg.com/@types/chai-subset/-/chai-subset-1.3.3.tgz#97893814e92abd2c534de422cb377e0e0bdaac94" + integrity sha512-frBecisrNGz+F4T6bcc+NLeolfiojh5FxW2klu669+8BARtyQv2C/GkNW6FUodVe4BroGMP/wER/YDGc7rEllw== + dependencies: + "@types/chai" "*" + +"@types/chai@*", "@types/chai@^4.3.4": + version "4.3.4" + resolved "https://registry.yarnpkg.com/@types/chai/-/chai-4.3.4.tgz#e913e8175db8307d78b4e8fa690408ba6b65dee4" + integrity sha512-KnRanxnpfpjUTqTCXslZSEdLfXExwgNxYPdiO2WGUj8+HDjFi8R3k5RVKPeSCzLjCcshCAtVO2QBbVuAV4kTnw== + +"@types/node@*": + version "18.14.1" + resolved "https://registry.yarnpkg.com/@types/node/-/node-18.14.1.tgz#90dad8476f1e42797c49d6f8b69aaf9f876fc69f" + integrity sha512-QH+37Qds3E0eDlReeboBxfHbX9omAcBCXEzswCu6jySP642jiM3cYSIkU/REqwhCUqXdonHFuBfJDiAJxMNhaQ== + +"@vitest/expect@0.28.5": + version "0.28.5" + resolved "https://registry.yarnpkg.com/@vitest/expect/-/expect-0.28.5.tgz#d5a6eccd014e9ad66fe87a20d16426a2815c0e8a" + integrity sha512-gqTZwoUTwepwGIatnw4UKpQfnoyV0Z9Czn9+Lo2/jLIt4/AXLTn+oVZxlQ7Ng8bzcNkR+3DqLJ08kNr8jRmdNQ== + dependencies: + "@vitest/spy" "0.28.5" + "@vitest/utils" "0.28.5" + chai "^4.3.7" + +"@vitest/runner@0.28.5": + version "0.28.5" + resolved "https://registry.yarnpkg.com/@vitest/runner/-/runner-0.28.5.tgz#4a18fe0e40b25569763f9f1f64b799d1629b3026" + integrity sha512-NKkHtLB+FGjpp5KmneQjTcPLWPTDfB7ie+MmF1PnUBf/tGe2OjGxWyB62ySYZ25EYp9krR5Bw0YPLS/VWh1QiA== + dependencies: + "@vitest/utils" "0.28.5" + p-limit "^4.0.0" + pathe "^1.1.0" + +"@vitest/spy@0.28.5": + version "0.28.5" + resolved "https://registry.yarnpkg.com/@vitest/spy/-/spy-0.28.5.tgz#b69affa0786200251b9e5aac5c58bbfb1b3273c9" + integrity sha512-7if6rsHQr9zbmvxN7h+gGh2L9eIIErgf8nSKYDlg07HHimCxp4H6I/X/DPXktVPPLQfiZ1Cw2cbDIx9fSqDjGw== + dependencies: + tinyspy "^1.0.2" + +"@vitest/utils@0.28.5": + version "0.28.5" + resolved "https://registry.yarnpkg.com/@vitest/utils/-/utils-0.28.5.tgz#7b82b528df86adfbd4a1f6a3b72c39790e81de0d" + integrity sha512-UyZdYwdULlOa4LTUSwZ+Paz7nBHGTT72jKwdFSV4IjHF1xsokp+CabMdhjvVhYwkLfO88ylJT46YMilnkSARZA== + dependencies: + cli-truncate "^3.1.0" + diff "^5.1.0" + loupe "^2.3.6" + picocolors "^1.0.0" + pretty-format "^27.5.1" + +acorn-walk@^8.2.0: + version "8.2.0" + resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-8.2.0.tgz#741210f2e2426454508853a2f44d0ab83b7f69c1" + integrity sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA== + +acorn@^8.8.1, acorn@^8.8.2: + version "8.8.2" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.8.2.tgz#1b2f25db02af965399b9776b0c2c391276d37c4a" + integrity sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw== + +ansi-regex@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304" + integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ== + +ansi-regex@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-6.0.1.tgz#3183e38fae9a65d7cb5e53945cd5897d0260a06a" + integrity sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA== + +ansi-styles@^5.0.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-5.2.0.tgz#07449690ad45777d1924ac2abb2fc8895dba836b" + integrity sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA== + +ansi-styles@^6.0.0: + version "6.2.1" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-6.2.1.tgz#0e62320cf99c21afff3b3012192546aacbfb05c5" + integrity sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug== + +assertion-error@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/assertion-error/-/assertion-error-1.1.0.tgz#e60b6b0e8f301bd97e5375215bda406c85118c0b" + integrity sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw== + +balanced-match@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" + integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== + +brace-expansion@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-2.0.1.tgz#1edc459e0f0c548486ecf9fc99f2221364b9a0ae" + integrity sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA== + dependencies: + balanced-match "^1.0.0" + +braces@^3.0.2: + version "3.0.2" + resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107" + integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A== + dependencies: + fill-range "^7.0.1" + +buffer-from@^1.0.0: + version "1.1.2" + resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5" + integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ== + +cac@^6.7.14: + version "6.7.14" + resolved "https://registry.yarnpkg.com/cac/-/cac-6.7.14.tgz#804e1e6f506ee363cb0e3ccbb09cad5dd9870959" + integrity sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ== + +chai@^4.3.7: + version "4.3.7" + resolved "https://registry.yarnpkg.com/chai/-/chai-4.3.7.tgz#ec63f6df01829088e8bf55fca839bcd464a8ec51" + integrity sha512-HLnAzZ2iupm25PlN0xFreAlBA5zaBSv3og0DdeGA4Ar6h6rJ3A0rolRUKJhSF2V10GZKDgWF/VmAEsNWjCRB+A== + dependencies: + assertion-error "^1.1.0" + check-error "^1.0.2" + deep-eql "^4.1.2" + get-func-name "^2.0.0" + loupe "^2.3.1" + pathval "^1.1.1" + type-detect "^4.0.5" + +check-error@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/check-error/-/check-error-1.0.2.tgz#574d312edd88bb5dd8912e9286dd6c0aed4aac82" + integrity sha512-BrgHpW9NURQgzoNyjfq0Wu6VFO6D7IZEmJNdtgNqpzGG8RuNFHt2jQxWlAs4HMe119chBnv+34syEZtc6IhLtA== + +cli-truncate@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/cli-truncate/-/cli-truncate-3.1.0.tgz#3f23ab12535e3d73e839bb43e73c9de487db1389" + integrity sha512-wfOBkjXteqSnI59oPcJkcPl/ZmwvMMOj340qUIY1SKZCv0B9Cf4D4fAucRkIKQmsIuYK3x1rrgU7MeGRruiuiA== + dependencies: + slice-ansi "^5.0.0" + string-width "^5.0.0" + +code-block-writer@^11.0.3: + version "11.0.3" + resolved "https://registry.yarnpkg.com/code-block-writer/-/code-block-writer-11.0.3.tgz#9eec2993edfb79bfae845fbc093758c0a0b73b76" + integrity sha512-NiujjUFB4SwScJq2bwbYUtXbZhBSlY6vYzm++3Q6oC+U+injTqfPYFK8wS9COOmb2lueqp0ZRB4nK1VYeHgNyw== + +debug@^4.3.4: + version "4.3.4" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" + integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== + dependencies: + ms "2.1.2" + +deep-eql@^4.1.2: + version "4.1.3" + resolved "https://registry.yarnpkg.com/deep-eql/-/deep-eql-4.1.3.tgz#7c7775513092f7df98d8df9996dd085eb668cc6d" + integrity sha512-WaEtAOpRA1MQ0eohqZjpGD8zdI0Ovsm8mmFhaDN8dvDZzyoUMcYDnf5Y6iu7HTXxf8JDS23qWa4a+hKCDyOPzw== + dependencies: + type-detect "^4.0.0" + +diff@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/diff/-/diff-5.1.0.tgz#bc52d298c5ea8df9194800224445ed43ffc87e40" + integrity sha512-D+mk+qE8VC/PAUrlAU34N+VfXev0ghe5ywmpqrawphmVZc1bEfn56uo9qpyGp1p4xpzOHkSW4ztBd6L7Xx4ACw== + +eastasianwidth@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/eastasianwidth/-/eastasianwidth-0.2.0.tgz#696ce2ec0aa0e6ea93a397ffcf24aa7840c827cb" + integrity sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA== + +emoji-regex@^9.2.2: + version "9.2.2" + resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72" + integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg== + +esbuild@^0.16.14: + version "0.16.17" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.16.17.tgz#fc2c3914c57ee750635fee71b89f615f25065259" + integrity sha512-G8LEkV0XzDMNwXKgM0Jwu3nY3lSTwSGY6XbxM9cr9+s0T/qSV1q1JVPBGzm3dcjhCic9+emZDmMffkwgPeOeLg== + optionalDependencies: + "@esbuild/android-arm" "0.16.17" + "@esbuild/android-arm64" "0.16.17" + "@esbuild/android-x64" "0.16.17" + "@esbuild/darwin-arm64" "0.16.17" + "@esbuild/darwin-x64" "0.16.17" + "@esbuild/freebsd-arm64" "0.16.17" + "@esbuild/freebsd-x64" "0.16.17" + "@esbuild/linux-arm" "0.16.17" + "@esbuild/linux-arm64" "0.16.17" + "@esbuild/linux-ia32" "0.16.17" + "@esbuild/linux-loong64" "0.16.17" + "@esbuild/linux-mips64el" "0.16.17" + "@esbuild/linux-ppc64" "0.16.17" + "@esbuild/linux-riscv64" "0.16.17" + "@esbuild/linux-s390x" "0.16.17" + "@esbuild/linux-x64" "0.16.17" + "@esbuild/netbsd-x64" "0.16.17" + "@esbuild/openbsd-x64" "0.16.17" + "@esbuild/sunos-x64" "0.16.17" + "@esbuild/win32-arm64" "0.16.17" + "@esbuild/win32-ia32" "0.16.17" + "@esbuild/win32-x64" "0.16.17" + +esbuild@~0.17.6: + version "0.17.10" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.17.10.tgz#3be050561b34c5dc05b46978f4e1f326d5cc9437" + integrity sha512-n7V3v29IuZy5qgxx25TKJrEm0FHghAlS6QweUcyIgh/U0zYmQcvogWROitrTyZId1mHSkuhhuyEXtI9OXioq7A== + optionalDependencies: + "@esbuild/android-arm" "0.17.10" + "@esbuild/android-arm64" "0.17.10" + "@esbuild/android-x64" "0.17.10" + "@esbuild/darwin-arm64" "0.17.10" + "@esbuild/darwin-x64" "0.17.10" + "@esbuild/freebsd-arm64" "0.17.10" + "@esbuild/freebsd-x64" "0.17.10" + "@esbuild/linux-arm" "0.17.10" + "@esbuild/linux-arm64" "0.17.10" + "@esbuild/linux-ia32" "0.17.10" + "@esbuild/linux-loong64" "0.17.10" + "@esbuild/linux-mips64el" "0.17.10" + "@esbuild/linux-ppc64" "0.17.10" + "@esbuild/linux-riscv64" "0.17.10" + "@esbuild/linux-s390x" "0.17.10" + "@esbuild/linux-x64" "0.17.10" + "@esbuild/netbsd-x64" "0.17.10" + "@esbuild/openbsd-x64" "0.17.10" + "@esbuild/sunos-x64" "0.17.10" + "@esbuild/win32-arm64" "0.17.10" + "@esbuild/win32-ia32" "0.17.10" + "@esbuild/win32-x64" "0.17.10" + +fast-glob@^3.2.12: + version "3.2.12" + resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.12.tgz#7f39ec99c2e6ab030337142da9e0c18f37afae80" + integrity sha512-DVj4CQIYYow0BlaelwK1pHl5n5cRSJfM60UA0zK891sVInoPri2Ekj7+e1CT3/3qxXenpI+nBBmQAcJPJgaj4w== + dependencies: + "@nodelib/fs.stat" "^2.0.2" + "@nodelib/fs.walk" "^1.2.3" + glob-parent "^5.1.2" + merge2 "^1.3.0" + micromatch "^4.0.4" + +fastq@^1.6.0: + version "1.15.0" + resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.15.0.tgz#d04d07c6a2a68fe4599fea8d2e103a937fae6b3a" + integrity sha512-wBrocU2LCXXa+lWBt8RoIRD89Fi8OdABODa/kEnyeyjS5aZO5/GNvI5sEINADqP/h8M29UHTHUb53sUu5Ihqdw== + dependencies: + reusify "^1.0.4" + +fill-range@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40" + integrity sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ== + dependencies: + to-regex-range "^5.0.1" + +fsevents@~2.3.2: + version "2.3.2" + resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a" + integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA== + +function-bind@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" + integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A== + +get-func-name@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/get-func-name/-/get-func-name-2.0.0.tgz#ead774abee72e20409433a066366023dd6887a41" + integrity sha512-Hm0ixYtaSZ/V7C8FJrtZIuBBI+iSgL+1Aq82zSu8VQNB4S3Gk8e7Qs3VwBDJAhmRZcFqkl3tQu36g/Foh5I5ig== + +get-tsconfig@^4.4.0: + version "4.4.0" + resolved "https://registry.yarnpkg.com/get-tsconfig/-/get-tsconfig-4.4.0.tgz#64eee64596668a81b8fce18403f94f245ee0d4e5" + integrity sha512-0Gdjo/9+FzsYhXCEFueo2aY1z1tpXrxWZzP7k8ul9qt1U5o8rYJwTJYmaeHdrVosYIVYkOy2iwCJ9FdpocJhPQ== + +glob-parent@^5.1.2: + version "5.1.2" + resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4" + integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow== + dependencies: + is-glob "^4.0.1" + +has@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796" + integrity sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw== + dependencies: + function-bind "^1.1.1" + +is-core-module@^2.9.0: + version "2.11.0" + resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.11.0.tgz#ad4cb3e3863e814523c96f3f58d26cc570ff0144" + integrity sha512-RRjxlvLDkD1YJwDbroBHMb+cukurkDWNyHx7D3oNB5x9rb5ogcksMC5wHCadcXoo67gVr/+3GFySh3134zi6rw== + dependencies: + has "^1.0.3" + +is-extglob@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" + integrity sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ== + +is-fullwidth-code-point@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-4.0.0.tgz#fae3167c729e7463f8461ce512b080a49268aa88" + integrity sha512-O4L094N2/dZ7xqVdrXhh9r1KODPJpFms8B5sGdJLPy664AgvXsreZUyCQQNItZRDlYug4xStLjNp/sz3HvBowQ== + +is-glob@^4.0.1: + version "4.0.3" + resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" + integrity sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg== + dependencies: + is-extglob "^2.1.1" + +is-number@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b" + integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng== + +jsonc-parser@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/jsonc-parser/-/jsonc-parser-3.2.0.tgz#31ff3f4c2b9793f89c67212627c51c6394f88e76" + integrity sha512-gfFQZrcTc8CnKXp6Y4/CBT3fTc0OVuDofpre4aEeEpSBPV5X5v4+Vmx+8snU7RLPrNHPKSgLxGo9YuQzz20o+w== + +local-pkg@^0.4.2: + version "0.4.3" + resolved "https://registry.yarnpkg.com/local-pkg/-/local-pkg-0.4.3.tgz#0ff361ab3ae7f1c19113d9bb97b98b905dbc4963" + integrity sha512-SFppqq5p42fe2qcZQqqEOiVRXl+WCP1MdT6k7BDEW1j++sp5fIY+/fdRQitvKgB5BrBcmrs5m/L0v2FrU5MY1g== + +loupe@^2.3.1, loupe@^2.3.6: + version "2.3.6" + resolved "https://registry.yarnpkg.com/loupe/-/loupe-2.3.6.tgz#76e4af498103c532d1ecc9be102036a21f787b53" + integrity sha512-RaPMZKiMy8/JruncMU5Bt6na1eftNoo++R4Y+N2FrxkDVTrGvcyzFTsaGif4QTeKESheMGegbhw6iUAq+5A8zA== + dependencies: + get-func-name "^2.0.0" + +merge2@^1.3.0: + version "1.4.1" + resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae" + integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg== + +micromatch@^4.0.4: + version "4.0.5" + resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.5.tgz#bc8999a7cbbf77cdc89f132f6e467051b49090c6" + integrity sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA== + dependencies: + braces "^3.0.2" + picomatch "^2.3.1" + +minimatch@^5.1.0: + version "5.1.6" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-5.1.6.tgz#1cfcb8cf5522ea69952cd2af95ae09477f122a96" + integrity sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g== + dependencies: + brace-expansion "^2.0.1" + +mkdirp@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e" + integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw== + +mlly@^1.1.0, mlly@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/mlly/-/mlly-1.1.1.tgz#f1838b14795e2cc284aa4ebcc76a258a52e6f537" + integrity sha512-Jnlh4W/aI4GySPo6+DyTN17Q75KKbLTyFK8BrGhjNP4rxuUjbRWhE6gHg3bs33URWAF44FRm7gdQA348i3XxRw== + dependencies: + acorn "^8.8.2" + pathe "^1.1.0" + pkg-types "^1.0.1" + ufo "^1.1.0" + +ms@2.1.2: + version "2.1.2" + resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" + integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== + +nanoid@^3.3.4: + version "3.3.4" + resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.4.tgz#730b67e3cd09e2deacf03c027c81c9d9dbc5e8ab" + integrity sha512-MqBkQh/OHTS2egovRtLk45wEyNXwF+cokD+1YPf9u5VfJiRdAiRwB2froX5Co9Rh20xs4siNPm8naNotSD6RBw== + +p-limit@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-4.0.0.tgz#914af6544ed32bfa54670b061cafcbd04984b644" + integrity sha512-5b0R4txpzjPWVw/cXXUResoD4hb6U/x9BH08L7nw+GN1sezDzPdxeRvpc9c433fZhBan/wusjbCsqwqm4EIBIQ== + dependencies: + yocto-queue "^1.0.0" + +path-browserify@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/path-browserify/-/path-browserify-1.0.1.tgz#d98454a9c3753d5790860f16f68867b9e46be1fd" + integrity sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g== + +path-parse@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== + +pathe@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/pathe/-/pathe-1.1.0.tgz#e2e13f6c62b31a3289af4ba19886c230f295ec03" + integrity sha512-ODbEPR0KKHqECXW1GoxdDb+AZvULmXjVPy4rt+pGo2+TnjJTIPJQSVS6N63n8T2Ip+syHhbn52OewKicV0373w== + +pathval@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/pathval/-/pathval-1.1.1.tgz#8534e77a77ce7ac5a2512ea21e0fdb8fcf6c3d8d" + integrity sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ== + +picocolors@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c" + integrity sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ== + +picomatch@^2.3.1: + version "2.3.1" + resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" + integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA== + +pkg-types@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/pkg-types/-/pkg-types-1.0.2.tgz#c233efc5210a781e160e0cafd60c0d0510a4b12e" + integrity sha512-hM58GKXOcj8WTqUXnsQyJYXdeAPbythQgEF3nTcEo+nkD49chjQ9IKm/QJy9xf6JakXptz86h7ecP2024rrLaQ== + dependencies: + jsonc-parser "^3.2.0" + mlly "^1.1.1" + pathe "^1.1.0" + +postcss@^8.4.21: + version "8.4.21" + resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.4.21.tgz#c639b719a57efc3187b13a1d765675485f4134f4" + integrity sha512-tP7u/Sn/dVxK2NnruI4H9BG+x+Wxz6oeZ1cJ8P6G/PZY0IKk4k/63TDsQf2kQq3+qoJeLm2kIBUNlZe3zgb4Zg== + dependencies: + nanoid "^3.3.4" + picocolors "^1.0.0" + source-map-js "^1.0.2" + +pretty-format@^27.5.1: + version "27.5.1" + resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.5.1.tgz#2181879fdea51a7a5851fb39d920faa63f01d88e" + integrity sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ== + dependencies: + ansi-regex "^5.0.1" + ansi-styles "^5.0.0" + react-is "^17.0.1" + +queue-microtask@^1.2.2: + version "1.2.3" + resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243" + integrity sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A== + +react-is@^17.0.1: + version "17.0.2" + resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0" + integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w== + +resolve@^1.22.1: + version "1.22.1" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.1.tgz#27cb2ebb53f91abb49470a928bba7558066ac177" + integrity sha512-nBpuuYuY5jFsli/JIs1oldw6fOQCBioohqWZg/2hiaOybXOft4lonv85uDOKXdf8rhyK159cxU5cDcK/NKk8zw== + dependencies: + is-core-module "^2.9.0" + path-parse "^1.0.7" + supports-preserve-symlinks-flag "^1.0.0" + +reusify@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76" + integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw== + +rollup@^3.10.0: + version "3.17.2" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.17.2.tgz#a4ecd29c488672a0606e41ef57474fad715750a9" + integrity sha512-qMNZdlQPCkWodrAZ3qnJtvCAl4vpQ8q77uEujVCCbC/6CLB7Lcmvjq7HyiOSnf4fxTT9XgsE36oLHJBH49xjqA== + optionalDependencies: + fsevents "~2.3.2" + +run-parallel@^1.1.9: + version "1.2.0" + resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee" + integrity sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA== + dependencies: + queue-microtask "^1.2.2" + +siginfo@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/siginfo/-/siginfo-2.0.0.tgz#32e76c70b79724e3bb567cb9d543eb858ccfaf30" + integrity sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g== + +slice-ansi@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-5.0.0.tgz#b73063c57aa96f9cd881654b15294d95d285c42a" + integrity sha512-FC+lgizVPfie0kkhqUScwRu1O/lF6NOgJmlCgK+/LYxDCTk8sGelYaHDhFcDN+Sn3Cv+3VSa4Byeo+IMCzpMgQ== + dependencies: + ansi-styles "^6.0.0" + is-fullwidth-code-point "^4.0.0" + +source-map-js@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.0.2.tgz#adbc361d9c62df380125e7f161f71c826f1e490c" + integrity sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw== + +source-map-support@^0.5.21: + version "0.5.21" + resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.21.tgz#04fe7c7f9e1ed2d662233c28cb2b35b9f63f6e4f" + integrity sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w== + dependencies: + buffer-from "^1.0.0" + source-map "^0.6.0" + +source-map@^0.6.0, source-map@^0.6.1: + version "0.6.1" + resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263" + integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g== + +stackback@0.0.2: + version "0.0.2" + resolved "https://registry.yarnpkg.com/stackback/-/stackback-0.0.2.tgz#1ac8a0d9483848d1695e418b6d031a3c3ce68e3b" + integrity sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw== + +std-env@^3.3.1: + version "3.3.2" + resolved "https://registry.yarnpkg.com/std-env/-/std-env-3.3.2.tgz#af27343b001616015534292178327b202b9ee955" + integrity sha512-uUZI65yrV2Qva5gqE0+A7uVAvO40iPo6jGhs7s8keRfHCmtg+uB2X6EiLGCI9IgL1J17xGhvoOqSz79lzICPTA== + +string-width@^5.0.0: + version "5.1.2" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-5.1.2.tgz#14f8daec6d81e7221d2a357e668cab73bdbca794" + integrity sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA== + dependencies: + eastasianwidth "^0.2.0" + emoji-regex "^9.2.2" + strip-ansi "^7.0.1" + +strip-ansi@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.0.1.tgz#61740a08ce36b61e50e65653f07060d000975fb2" + integrity sha512-cXNxvT8dFNRVfhVME3JAe98mkXDYN2O1l7jmcwMnOslDeESg1rF/OZMtK0nRAhiari1unG5cD4jG3rapUAkLbw== + dependencies: + ansi-regex "^6.0.1" + +strip-literal@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/strip-literal/-/strip-literal-1.0.1.tgz#0115a332710c849b4e46497891fb8d585e404bd2" + integrity sha512-QZTsipNpa2Ppr6v1AmJHESqJ3Uz247MUS0OjrnnZjFAvEoWqxuyFuXn2xLgMtRnijJShAa1HL0gtJyUs7u7n3Q== + dependencies: + acorn "^8.8.2" + +supports-preserve-symlinks-flag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" + integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== + +tinybench@^2.3.1: + version "2.3.1" + resolved "https://registry.yarnpkg.com/tinybench/-/tinybench-2.3.1.tgz#14f64e6b77d7ef0b1f6ab850c7a808c6760b414d" + integrity sha512-hGYWYBMPr7p4g5IarQE7XhlyWveh1EKhy4wUBS1LrHXCKYgvz+4/jCqgmJqZxxldesn05vccrtME2RLLZNW7iA== + +tinypool@^0.3.1: + version "0.3.1" + resolved "https://registry.yarnpkg.com/tinypool/-/tinypool-0.3.1.tgz#a99c2e446aba9be05d3e1cb756d6aed7af4723b6" + integrity sha512-zLA1ZXlstbU2rlpA4CIeVaqvWq41MTWqLY3FfsAXgC8+f7Pk7zroaJQxDgxn1xNudKW6Kmj4808rPFShUlIRmQ== + +tinyspy@^1.0.2: + version "1.1.1" + resolved "https://registry.yarnpkg.com/tinyspy/-/tinyspy-1.1.1.tgz#0cb91d5157892af38cb2d217f5c7e8507a5bf092" + integrity sha512-UVq5AXt/gQlti7oxoIg5oi/9r0WpF7DGEVwXgqWSMmyN16+e3tl5lIvTaOpJ3TAtu5xFzWccFRM4R5NaWHF+4g== + +to-regex-range@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4" + integrity sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ== + dependencies: + is-number "^7.0.0" + +ts-morph@^17.0.1: + version "17.0.1" + resolved "https://registry.yarnpkg.com/ts-morph/-/ts-morph-17.0.1.tgz#d85df4fcf9a1fcda1b331d52c00655f381c932d1" + integrity sha512-10PkHyXmrtsTvZSL+cqtJLTgFXkU43Gd0JCc0Rw6GchWbqKe0Rwgt1v3ouobTZwQzF1mGhDeAlWYBMGRV7y+3g== + dependencies: + "@ts-morph/common" "~0.18.0" + code-block-writer "^11.0.3" + +tsx@^3.12.3: + version "3.12.3" + resolved "https://registry.yarnpkg.com/tsx/-/tsx-3.12.3.tgz#b29f6c9246d4e3ea46451cd81d7cbc98f45c4b8a" + integrity sha512-Wc5BFH1xccYTXaQob+lEcimkcb/Pq+0en2s+ruiX0VEIC80nV7/0s7XRahx8NnsoCnpCVUPz8wrqVSPi760LkA== + dependencies: + "@esbuild-kit/cjs-loader" "^2.4.2" + "@esbuild-kit/core-utils" "^3.0.0" + "@esbuild-kit/esm-loader" "^2.5.5" + optionalDependencies: + fsevents "~2.3.2" + +type-detect@^4.0.0, type-detect@^4.0.5: + version "4.0.8" + resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c" + integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g== + +typescript@^4.9.5: + version "4.9.5" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.9.5.tgz#095979f9bcc0d09da324d58d03ce8f8374cbe65a" + integrity sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g== + +ufo@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/ufo/-/ufo-1.1.0.tgz#a5c4c814b0a98f7e0ca42c478688663fd3e3c037" + integrity sha512-LQc2s/ZDMaCN3QLpa+uzHUOQ7SdV0qgv3VBXOolQGXTaaZpIur6PwUclF5nN2hNkiTRcUugXd1zFOW3FLJ135Q== + +vite-node@0.28.5: + version "0.28.5" + resolved "https://registry.yarnpkg.com/vite-node/-/vite-node-0.28.5.tgz#56d0f78846ea40fddf2e28390899df52a4738006" + integrity sha512-LmXb9saMGlrMZbXTvOveJKwMTBTNUH66c8rJnQ0ZPNX+myPEol64+szRzXtV5ORb0Hb/91yq+/D3oERoyAt6LA== + dependencies: + cac "^6.7.14" + debug "^4.3.4" + mlly "^1.1.0" + pathe "^1.1.0" + picocolors "^1.0.0" + source-map "^0.6.1" + source-map-support "^0.5.21" + vite "^3.0.0 || ^4.0.0" + +"vite@^3.0.0 || ^4.0.0": + version "4.1.4" + resolved "https://registry.yarnpkg.com/vite/-/vite-4.1.4.tgz#170d93bcff97e0ebc09764c053eebe130bfe6ca0" + integrity sha512-3knk/HsbSTKEin43zHu7jTwYWv81f8kgAL99G5NWBcA1LKvtvcVAC4JjBH1arBunO9kQka+1oGbrMKOjk4ZrBg== + dependencies: + esbuild "^0.16.14" + postcss "^8.4.21" + resolve "^1.22.1" + rollup "^3.10.0" + optionalDependencies: + fsevents "~2.3.2" + +vitest@^0.28.5: + version "0.28.5" + resolved "https://registry.yarnpkg.com/vitest/-/vitest-0.28.5.tgz#94410a8924cd7189e4f1adffa8c5cde809cbf2f9" + integrity sha512-pyCQ+wcAOX7mKMcBNkzDwEHRGqQvHUl0XnoHR+3Pb1hytAHISgSxv9h0gUiSiYtISXUU3rMrKiKzFYDrI6ZIHA== + dependencies: + "@types/chai" "^4.3.4" + "@types/chai-subset" "^1.3.3" + "@types/node" "*" + "@vitest/expect" "0.28.5" + "@vitest/runner" "0.28.5" + "@vitest/spy" "0.28.5" + "@vitest/utils" "0.28.5" + acorn "^8.8.1" + acorn-walk "^8.2.0" + cac "^6.7.14" + chai "^4.3.7" + debug "^4.3.4" + local-pkg "^0.4.2" + pathe "^1.1.0" + picocolors "^1.0.0" + source-map "^0.6.1" + std-env "^3.3.1" + strip-literal "^1.0.0" + tinybench "^2.3.1" + tinypool "^0.3.1" + tinyspy "^1.0.2" + vite "^3.0.0 || ^4.0.0" + vite-node "0.28.5" + why-is-node-running "^2.2.2" + +why-is-node-running@^2.2.2: + version "2.2.2" + resolved "https://registry.yarnpkg.com/why-is-node-running/-/why-is-node-running-2.2.2.tgz#4185b2b4699117819e7154594271e7e344c9973e" + integrity sha512-6tSwToZxTOcotxHeA+qGCq1mVzKR3CwcJGmVcY+QE8SHy6TnpFnh8PAvPNHYr7EcuVeG0QSMxtYCuO1ta/G/oA== + dependencies: + siginfo "^2.0.0" + stackback "0.0.2" + +yocto-queue@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-1.0.0.tgz#7f816433fb2cbc511ec8bf7d263c3b58a1a3c251" + integrity sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g== From f7fe7174b0cfd38156cdec49a7d10a354380852f Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 24 Feb 2023 14:07:07 +0100 Subject: [PATCH 022/207] Update README.md, add tests, fix disallowed special bug --- Cargo.toml | 1 + README.md | 103 ++++++++++++++++++++++++++--- package.json | 24 +++++++ src/lib.rs | 20 ++++-- tests/test_simple_public.test.ts | 107 +++++++++++++++++++++++++------ 5 files changed, 220 insertions(+), 35 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 34b1c8fe..2ee67eeb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ bstr = "1.0.1" [profile.release] incremental = true opt-level = "s" +lto = true [features] default = ["inline"] diff --git a/README.md b/README.md index f2c7c5fb..82aa2c90 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,6 @@ The open source version of `tiktoken` can be installed from NPM: npm install @dqbd/tiktoken ``` -> Please note there are some missing features which are present in the Python version but not in the JS version. - ## Usage Basic usage follows: @@ -48,14 +46,99 @@ const encoder = new Tiktoken( ); ``` -## Acknowledgements +## Compatibility -- https://github.com/zurawiki/tiktoken-rs +As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. + +| Runtime | Status | Notes | +| ------------------- | ------ | ------------------------------- | +| Node.js | ✅ | | +| Bun | ✅ | | +| Vite | ✅ | See [here](#vite) for notes | +| Next.js | ✅ 🚧 | See [here](#nextjs) for caveats | +| Vercel Edge Runtime | 🚧 | Work in progress | +| Cloudflare Workers | 🚧 | Untested | +| Deno | ❌ | Currently unsupported | + +### [Vite](#vite) + +If you are using Vite, you will need to add both the `vite-plugin-wasm` and `vite-plugin-top-level-await`. Add the following to your `vite.config.js`: + +```js +import wasm from "vite-plugin-wasm"; +import topLevelAwait from "vite-plugin-top-level-await"; +import { defineConfig } from "vite"; + +export default defineConfig({ + plugins: [wasm(), topLevelAwait()], +}); +``` + +### [Next.js](#nextjs) + +Both API routes and `/pages` are supported with some caveats. To overcome issues with importing `/node` variant and incorrect `__dirname` resolution, you can import the package from `@dqbd/tiktoken/bundler` instead. -## Tasks to do before creating an upstream PR +```typescript +import { get_encoding } from "@dqbd/tiktoken/bundler"; +import { NextApiRequest, NextApiResponse } from "next"; + +export default function handler(req: NextApiRequest, res: NextApiResponse) { + return res.status(200).json({ + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + message: get_encoding("gpt2").encode(`Hello World ${Math.random()}`), + }); +} +``` + +Additional Webpack configuration is also required, see https://github.com/vercel/next.js/issues/29362. + +```typescript +class WasmChunksFixPlugin { + apply(compiler) { + compiler.hooks.thisCompilation.tap("WasmChunksFixPlugin", (compilation) => { + compilation.hooks.processAssets.tap( + { name: "WasmChunksFixPlugin" }, + (assets) => + Object.entries(assets).forEach(([pathname, source]) => { + if (!pathname.match(/\.wasm$/)) return; + compilation.deleteAsset(pathname); + + const name = pathname.split("/")[1]; + const info = compilation.assetsInfo.get(pathname); + compilation.emitAsset(name, source, info); + }) + ); + }); + } +} + +const config = { + webpack(config, { isServer, dev }) { + config.experiments = { + asyncWebAssembly: true, + layers: true, + }; + + if (!dev && isServer) { + config.output.webassemblyModuleFilename = "chunks/[id].wasm"; + config.plugins.push(new WasmChunksFixPlugin()); + } + + return config; + }, +}; +``` -1. Add back the pyo3 bindings, so we can build both Python version and JS version at the same time -2. Allow loading of embeddings via an argument. This is needed to make the resulting WASM blob smaller, as it is currently inlined during build. -3. Examine the possibility of reintroduction of multithreading (not sure, if that is even needed however due to the sheer perf. difference between other JS libraries) -4. Feature parity match - adding special tokens support etc. -5. Investigate better packaging support for browsers and other runtimes. +To properly resolve `tsconfig.json`, use either `moduleResolution: "node16"` or `moduleResolution: "nodenext"`: + +```json +{ + "compilerOptions": { + "moduleResolution": "node16" + } +} +``` + +## Acknowledgements + +- https://github.com/zurawiki/tiktoken-rs diff --git a/package.json b/package.json index 25f7231c..4d033085 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,30 @@ "main": "dist/node/_tiktoken.js", "browser": "dist/web/_tiktoken.js", "types": "dist/node/_tiktoken.d.ts", + "exports": { + ".": { + "node": { + "types": "./dist/node/_tiktoken.d.ts", + "default": "./dist/node/_tiktoken.js" + }, + "default": { + "types": "./dist/bundler/_tiktoken.d.js", + "default": "./dist/bundler/_tiktoken.mjs" + } + }, + "./bundler": { + "types": "./dist/bundler/_tiktoken.d.ts", + "default": "./dist/bundler/_tiktoken.mjs" + }, + "./web": { + "types": "./dist/web/_tiktoken.d.ts", + "default": "./dist/web/_tiktoken.js" + }, + "./node": { + "types": "./dist/node/_tiktoken.d.ts", + "default": "./dist/node/_tiktoken.js" + } + }, "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/src/lib.rs b/src/lib.rs index bd2aed16..e4fca796 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -123,6 +123,7 @@ impl CoreBPEConstructor { #[wasm_bindgen] pub struct Tiktoken { + name: Option, special_tokens_set: HashSet, bpe: CoreBPE, } @@ -138,6 +139,7 @@ impl Tiktoken { ); Tiktoken { + name: None, special_tokens_set: constructor .special_tokens .keys() @@ -171,6 +173,7 @@ impl Tiktoken { } Ok(Tiktoken { + name: Some(String::from(encoding)), // TODO: can we avoid cloning here? special_tokens_set: constructor .special_tokens @@ -186,6 +189,11 @@ impl Tiktoken { }) } + #[wasm_bindgen(getter)] + pub fn name(&self) -> Option { + self.name.clone() + } + pub fn encode( &self, text: &str, @@ -261,13 +269,11 @@ impl Tiktoken { _ => Ok(JsValue::into_serde(&allowed_special_param).unwrap_or_default()), }?; - let disallowed_special: HashSet = match disallowed_special_param.as_string() { - Some(value) => match value.as_str() { - "all" => Ok(&self.special_tokens_set - &allowed_special), - _ => Err(JsError::new("Invalid value for disallowed_special")), - }, - _ => Ok(JsValue::into_serde(&disallowed_special_param).unwrap_or_default()), - }?; + let disallowed_special: HashSet = + match JsValue::into_serde::>(&disallowed_special_param) { + Ok(value) => value, + Err(_) => &self.special_tokens_set - &allowed_special, + }; if !disallowed_special.is_empty() { if let Some(found) = Tiktoken::special_token_regex(&disallowed_special).find(text)? { diff --git a/tests/test_simple_public.test.ts b/tests/test_simple_public.test.ts index 115a9467..e39dbe36 100644 --- a/tests/test_simple_public.test.ts +++ b/tests/test_simple_public.test.ts @@ -1,9 +1,8 @@ -import { it, expect } from "vitest"; +import { it, expect, describe } from "vitest"; import { encoding_for_model, get_encoding } from "../"; it("encoding_for_model initialization", () => { expect(() => encoding_for_model("gpt2")).not.toThrowError(); - // @ts-expect-error expect(() => encoding_for_model("gpt2-unknown")).toThrowError( "Invalid model" @@ -12,32 +11,76 @@ it("encoding_for_model initialization", () => { it("get_encoding initialization", () => { expect(() => get_encoding("cl100k_base")).not.toThrowError(); - // @ts-expect-error expect(() => get_encoding("unknown")).toThrowError("Invalid encoding"); }); -it("test_simple", () => { +describe("gpt2", () => { const enc = get_encoding("gpt2"); - expect(enc.encode("hello world")).toStrictEqual( - new Uint32Array([31373, 995]) - ); - expect( - new TextDecoder().decode(enc.decode(new Uint32Array([31373, 995]))) - ).toStrictEqual("hello world"); + it("encodes hello world string", () => { + expect(enc.encode("hello world")).toStrictEqual( + new Uint32Array([31373, 995]) + ); + }); - expect(enc.encode("hello <|endoftext|>", "all")).toStrictEqual( - new Uint32Array([31373, 220, 50256]) - ); + it("decodes hello world string", () => { + expect( + new TextDecoder().decode(enc.decode(new Uint32Array([31373, 995]))) + ).toStrictEqual("hello world"); + }); + + it("encodes hello world string, all allowed special characters", () => { + expect(enc.encode("hello <|endoftext|>", "all")).toStrictEqual( + new Uint32Array([31373, 220, 50256]) + ); + }); }); -it("test_simple", () => { - const decoder = new TextDecoder(); +describe("cl100k_base", () => { const enc = get_encoding("cl100k_base"); - expect(enc.encode("hello world")).toStrictEqual( - new Uint32Array([15339, 1917]) - ); + + it("encodes hello world string", () => { + expect(enc.encode("hello world")).toStrictEqual( + new Uint32Array([15339, 1917]) + ); + }); + + it("decodes hello world string", () => { + expect( + new TextDecoder().decode(enc.decode(new Uint32Array([15339, 1917]))) + ).toStrictEqual("hello world"); + }); + + it("encodes hello world string, all allowed special characters", () => { + expect(enc.encode("hello <|endoftext|>", "all")).toStrictEqual( + new Uint32Array([15339, 220, 100257]) + ); + }); +}); + +it("test_simple", () => { + const encodings = [ + "gpt2", + "r50k_base", + "p50k_base", + "p50k_edit", + "cl100k_base", + ] as const; + + for (const encoding of encodings) { + const enc = get_encoding(encoding); + for (let token = 0; token < 10_000; token++) { + expect( + enc.encode_single_token(enc.decode_single_token_bytes(token)) + ).toStrictEqual(token); + } + } +}); + +it("test_encoding_for_model", () => { + expect(encoding_for_model("gpt2").name).toEqual("gpt2"); + expect(encoding_for_model("text-davinci-003").name).toEqual("p50k_base"); }); it("test_custom_tokens", () => { @@ -49,3 +92,31 @@ it("test_custom_tokens", () => { new Uint32Array([100264, 9288, 100265]) ); }); + +it("encode string tokens", () => { + const enc = get_encoding("gpt2", { "<|im_start|>": 100264 }); + + expect(enc.encode("hello world")).toStrictEqual( + new Uint32Array([31373, 995]) + ); + + expect(enc.encode("<|endoftext|>", ["<|endoftext|>"])).toStrictEqual( + new Uint32Array([50256]) + ); + + expect(enc.encode("<|endoftext|>", "all")).toStrictEqual( + new Uint32Array([50256]) + ); + + expect(() => enc.encode("<|endoftext|>")).toThrowError( + "The text contains a special token that is not allowed" + ); + + expect(() => enc.encode("<|im_start|>")).toThrowError( + "The text contains a special token that is not allowed" + ); + + expect(enc.encode("<|endoftext|>", [], [])).toStrictEqual( + new Uint32Array([27, 91, 437, 1659, 5239, 91, 29]) + ); +}); From d38c936f17393a5735bac69e57e2bbbee06855b9 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 24 Feb 2023 14:24:17 +0100 Subject: [PATCH 023/207] Validate the values properly --- src/lib.rs | 16 +++++++++++----- tests/test_simple_public.test.ts | 14 ++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e4fca796..3cb06ed5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -269,11 +269,17 @@ impl Tiktoken { _ => Ok(JsValue::into_serde(&allowed_special_param).unwrap_or_default()), }?; - let disallowed_special: HashSet = - match JsValue::into_serde::>(&disallowed_special_param) { - Ok(value) => value, - Err(_) => &self.special_tokens_set - &allowed_special, - }; + let disallowed_special = JsValue::into_serde::>(&disallowed_special_param) + .or_else(|_| { + match disallowed_special_param + .as_string() + .unwrap_or(String::from("all")) + .as_str() + { + "all" => Ok(&self.special_tokens_set - &allowed_special), + _ => Err(JsError::new("Invalid value for disallowed_special")), + } + })?; if !disallowed_special.is_empty() { if let Some(found) = Tiktoken::special_token_regex(&disallowed_special).find(text)? { diff --git a/tests/test_simple_public.test.ts b/tests/test_simple_public.test.ts index e39dbe36..7bb5f012 100644 --- a/tests/test_simple_public.test.ts +++ b/tests/test_simple_public.test.ts @@ -120,3 +120,17 @@ it("encode string tokens", () => { new Uint32Array([27, 91, 437, 1659, 5239, 91, 29]) ); }); + +it("invalid (dis)allowed_tokens", () => { + const enc = get_encoding("gpt2"); + + // @ts-expect-error + expect(() => enc.encode("hello world", "invalid-string")).toThrowError( + "Invalid value for allowed_special" + ); + + // @ts-expect-error + expect(() => enc.encode("hello world", [], "invalid-string")).toThrowError( + "Invalid value for disallowed_special" + ); +}); From 53628a47a2849cdf6ccd7db720161f68ae4ed543 Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Fri, 24 Feb 2023 10:41:35 -0500 Subject: [PATCH 024/207] Improve error handling in JNI functions --- jni/src/lib.rs | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/jni/src/lib.rs b/jni/src/lib.rs index 32f2b340..778bef84 100644 --- a/jni/src/lib.rs +++ b/jni/src/lib.rs @@ -15,11 +15,11 @@ use jni::sys::{jarray, jlong}; use _tiktoken_core::{self, CoreBPENative}; -use jni::errors::Error; +type Result = std::result::Result>; -fn unwrap_or_throw(env: &JNIEnv, result: Result, default: T) -> T { +fn unwrap_or_throw(env: &JNIEnv, result: Result, default: T) -> T { // Check if an exception is already thrown - if env.exception_check().unwrap() { + if env.exception_check().expect("exception_check() failed") { return default; } @@ -28,9 +28,9 @@ fn unwrap_or_throw(env: &JNIEnv, result: Result, default: T) -> T { Err(error) => { let exception_class = env .find_class("java/lang/Exception") - .unwrap(); + .expect("Unable to find exception class"); env.throw_new(exception_class, format!("{}", error)) - .unwrap(); + .expect("Unable to throw exception"); default } } @@ -38,7 +38,7 @@ fn unwrap_or_throw(env: &JNIEnv, result: Result, default: T) -> T { #[no_mangle] pub extern "system" fn Java_tiktoken_Encoding_init(env: JNIEnv, obj: JObject, model_name: JString) { - let result = || -> Result<(), Error> { + let result = || -> Result<()> { // First, we have to get the string out of Java. Check out the `strings` // module for more info on how this works. let model_name: String = env @@ -46,27 +46,24 @@ pub extern "system" fn Java_tiktoken_Encoding_init(env: JNIEnv, obj: JObject, mo .into(); let encoding_name = _tiktoken_core::openai_public::MODEL_TO_ENCODING - .get(&model_name) - .expect("Unable to find model"); + .get(&model_name).ok_or("Unable to find model")?; // TODO: this is actually mergable_ranks (lazy) - let mut encoding = _tiktoken_core::openai_public::REGISTRY - .get(encoding_name) - .expect("Unable to find encoding"); + let encoding = _tiktoken_core::openai_public::REGISTRY + .get(encoding_name).ok_or("Unable to find encoding")?; // TODO: initialize the CoreBPE object // TODO: this should be CoreBPE let bpe_native = CoreBPENative::new( - encoding.get().unwrap(), + encoding.get()?, encoding.special_tokens.clone(), &encoding.pat_str, - ) - .unwrap(); + )?; Ok(unsafe { - env.set_rust_field(obj, "handle", bpe_native).unwrap(); + env.set_rust_field(obj, "handle", bpe_native)?; }) }(); @@ -76,7 +73,7 @@ pub extern "system" fn Java_tiktoken_Encoding_init(env: JNIEnv, obj: JObject, mo #[no_mangle] pub extern "system" fn Java_tiktoken_Encoding_destroy(env: JNIEnv, obj: JObject) { unsafe { - let _: CoreBPENative = env.take_rust_field(obj, "handle").unwrap(); + let _: CoreBPENative = env.take_rust_field(obj, "handle").expect("Unable to get handle during destruction"); } } @@ -88,7 +85,7 @@ pub extern "system" fn Java_tiktoken_Encoding_encode( allowedSpecialTokens: jarray, maxTokenLength: jlong, ) -> jarray { - let result = || -> Result { + let result = || -> Result { let encoding: MutexGuard = unsafe { env.get_rust_field(obj, "handle")? }; let enc = encoding; @@ -109,8 +106,8 @@ pub extern "system" fn Java_tiktoken_Encoding_encode( let (tokens, _, _) = enc._encode_native(&input, &v2, Some(maxTokenLength as usize)); - let mut output = env - .new_long_array(tokens.len().try_into().unwrap())?; + let output = env + .new_long_array(tokens.len().try_into()?)?; let array_of_u64 = tokens.iter().map(|x| *x as i64).collect::>(); env.set_long_array_region(output, 0, array_of_u64.as_slice())?; From 87603e98137c0fe01cd4c57a379dc3900375305d Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 25 Feb 2023 00:21:23 +0100 Subject: [PATCH 025/207] Bump version to 0.4.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 4d033085..5692f79d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "0.3.0", + "version": "0.4.0", "description": "Javascript bindings for tiktoken", "files": [ "dist/**/*", From e3ab3f643e79ef560823e4161f380926fd6edf4c Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 27 Feb 2023 12:17:37 +0100 Subject: [PATCH 026/207] moved config into json --- MANIFEST.in | 1 + core/src/openai_public.rs | 139 ++++++------------ java/pom.xml | 43 ++++-- java/src/main/java/tiktoken/Encoding.java | 15 +- ...{EncodingTest.java => EncodingTestIT.java} | 5 +- java/src/tiktoken_Encoding.h | 29 ++++ jni/Cargo.toml | 5 + setup.py | 5 +- tests/test_simple_public.py | 15 -- tiktoken/load.py | 4 - tiktoken/model.py | 47 +----- tiktoken/model_to_encoding.json | 32 ++++ tiktoken/registry.json | 50 +++++++ tiktoken/registry.py | 72 ++++----- tiktoken_ext/openai_public.py | 87 ----------- 15 files changed, 241 insertions(+), 308 deletions(-) rename java/src/test/java/tiktoken/{EncodingTest.java => EncodingTestIT.java} (79%) create mode 100644 java/src/tiktoken_Encoding.h create mode 100644 tiktoken/model_to_encoding.json create mode 100644 tiktoken/registry.json delete mode 100644 tiktoken_ext/openai_public.py diff --git a/MANIFEST.in b/MANIFEST.in index 7f25b271..321b66e2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,3 +6,4 @@ global-include py.typed recursive-include scripts *.py recursive-include tests *.py recursive-include src *.rs +include tiktoken *.json \ No newline at end of file diff --git a/core/src/openai_public.rs b/core/src/openai_public.rs index 17ff7e27..2a89843a 100644 --- a/core/src/openai_public.rs +++ b/core/src/openai_public.rs @@ -2,6 +2,7 @@ use rustc_hash::FxHashMap as HashMap; use std::error::Error; use std::sync::RwLock; +use json; #[path = "load.rs"] mod load; @@ -9,105 +10,47 @@ mod load; type Result = std::result::Result>; lazy_static! { - pub static ref REGISTRY: HashMap = [ - EncodingLazy::new( - "gpt2".into(), - Some(50257), - r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), - [ ("<|endoftext|>".into(), 50256), ].into_iter().collect(), - EncoderLoadingStrategy::DataGym( - DataGymDef { - vocab_bpe_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe".into(), - encoder_json_file: "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json".into() - } - )), - EncodingLazy::new( - "r50k_base".into(), - Some(50257), - r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), - [ ("<|endoftext|>".into(), 50256), ].into_iter().collect(), - EncoderLoadingStrategy::BPE("https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken".into()) - ), - EncodingLazy::new( - "p50k_base".into(), - Some(50281), - r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), - [ ("<|endoftext|>".into(), 50256), ].into_iter().collect(), - EncoderLoadingStrategy::BPE("https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken".into()) - ), - EncodingLazy::new( - "p50k_edit".into(), - Some(50281), - r"'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+".into(), - [ - ("<|endoftext|>".into(), 50256), - ("<|fim_prefix|>".into(), 50281), - ("<|fim_middle|>".into(), 50282), - ("<|fim_suffix|>".into(), 50283), - ].into_iter().collect(), - EncoderLoadingStrategy::BPE("https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken".into()) - ), - EncodingLazy::new( - "cl100k_base".into(), - None, - r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+".into(), - [ - ("<|endoftext|>".into(), 100257), - ("<|fim_prefix|>".into(), 100258), - ("<|fim_middle|>".into(), 100259), - ("<|fim_suffix|>".into(), 100260), - ("<|endofprompt|>".into(), 100276), - ].into_iter().collect(), - EncoderLoadingStrategy::BPE("https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken".into()) - ), - ] - .into_iter() + pub static ref REGISTRY: HashMap = { + // TODO: error handling + json::parse(include_str!("../../tiktoken/registry.json")) + .expect("Failed to parse internal JSON") + .entries() + .map(|(key, value)| { + let loading_strategy = if value.has_key("data_gym_to_mergeable_bpe_ranks") { + EncoderLoadingStrategy::DataGym( + DataGymDef { + vocab_bpe_file: value["data_gym_to_mergeable_bpe_ranks"]["vocab_bpe_file"].as_str().expect("error").into(), + encoder_json_file: value["data_gym_to_mergeable_bpe_ranks"]["encoder_json_file"].as_str().expect("error").into() + }) + } + else if value.has_key("load_tiktoken_bpe") { + EncoderLoadingStrategy::BPE(value["load_tiktoken_bpe"].as_str().expect("fail").into()) + } + else { + panic!("Invalid encoding"); + }; + + EncodingLazy::new( + key.into(), + value["explicit_n_vocab"].as_usize(), + value["pat_str"].as_str().expect("foo").into(), + value["special_tokens"].entries() + .map(|(key, value)| (key.into(), value.as_usize().expect("foo"))) + .collect::>(), + loading_strategy + ) + }) + .map(|enc| (enc.name.clone(), enc)) - .collect::>(); - - - - pub static ref MODEL_TO_ENCODING: HashMap = [ - // text - ("text-davinci-003", "p50k_base"), - ("text-davinci-002", "p50k_base"), - ("text-davinci-001", "r50k_base"), - ("text-curie-001", "r50k_base"), - ("text-babbage-001", "r50k_base"), - ("text-ada-001", "r50k_base"), - ("davinci", "r50k_base"), - ("curie", "r50k_base"), - ("babbage", "r50k_base"), - ("ada", "r50k_base"), - // code - ("code-davinci-002", "p50k_base"), - ("code-davinci-001", "p50k_base"), - ("code-cushman-002", "p50k_base"), - ("code-cushman-001", "p50k_base"), - ("davinci-codex", "p50k_base"), - ("cushman-codex", "p50k_base"), - // edit - ("text-davinci-edit-001", "p50k_edit"), - ("code-davinci-edit-001", "p50k_edit"), - // embeddings - ("text-embedding-ada-002", "cl100k_base"), - // old embeddings - ("text-similarity-davinci-001", "r50k_base"), - ("text-similarity-curie-001", "r50k_base"), - ("text-similarity-babbage-001", "r50k_base"), - ("text-similarity-ada-001", "r50k_base"), - ("text-search-davinci-doc-001", "r50k_base"), - ("text-search-curie-doc-001", "r50k_base"), - ("text-search-babbage-doc-001", "r50k_base"), - ("text-search-ada-doc-001", "r50k_base"), - ("code-search-babbage-code-001", "r50k_base"), - ("code-search-ada-code-001", "r50k_base"), - // open source - ("gpt2", "gpt2"), - ] - .into_iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect::>(); + .collect::>() + }; + + pub static ref MODEL_TO_ENCODING: HashMap = + json::parse(include_str!("../../tiktoken/model_to_encoding.json")) + .expect("Failed to parse internal JSON") + .entries() + .map(|(k, v)| (k.into(), v.as_str().expect("foo").into())) + .collect::>(); } #[derive(Clone, PartialEq, Eq, Hash)] diff --git a/java/pom.xml b/java/pom.xml index c3a32888..b93feb95 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -10,6 +10,7 @@ tiktoken https://github.com/openai/tiktoken + jar UTF-8 @@ -24,9 +25,23 @@ 4.11 test + + org.scijava + native-lib-loader + 2.4.0 + + + + ${project.build.directory}/../../target/release/ + ${project.build.directory}/classes/natives/linux_64 + + lib_tiktoken_jni.so + + + @@ -69,22 +84,18 @@ 3.0.0 - org.apache.maven.plugins - maven-surefire-plugin - 2.17 - - - surefire-test - test - - test - - - - - -Djava.library.path=${project.build.directory}/../../target/debug/ - - + org.apache.maven.plugins + maven-failsafe-plugin + 2.22.1 + + + + integration-test + verify + + + + diff --git a/java/src/main/java/tiktoken/Encoding.java b/java/src/main/java/tiktoken/Encoding.java index cc041e48..bfc75868 100644 --- a/java/src/main/java/tiktoken/Encoding.java +++ b/java/src/main/java/tiktoken/Encoding.java @@ -1,9 +1,19 @@ package tiktoken; +import org.scijava.nativelib.NativeLoader; +import java.io.IOException; + public class Encoding implements AutoCloseable { static { - System.loadLibrary("_tiktoken_jni"); + // TODO: unpack the library from the jar + // System.loadLibrary("_tiktoken_jni"); + try { + NativeLoader.loadLibrary("_tiktoken_jni"); + } + catch(IOException e) { + throw new RuntimeException(e); + } } // initialized by init @@ -11,10 +21,9 @@ public class Encoding implements AutoCloseable private native void init(String modelName); - public native long[] encode(String text, String[] allowedSpecialTokens, long maxTokenLength); - private native void destroy(); + public native long[] encode(String text, String[] allowedSpecialTokens, long maxTokenLength); public Encoding(String modelName) { this.init(modelName); diff --git a/java/src/test/java/tiktoken/EncodingTest.java b/java/src/test/java/tiktoken/EncodingTestIT.java similarity index 79% rename from java/src/test/java/tiktoken/EncodingTest.java rename to java/src/test/java/tiktoken/EncodingTestIT.java index 591c5261..602a1ef9 100644 --- a/java/src/test/java/tiktoken/EncodingTest.java +++ b/java/src/test/java/tiktoken/EncodingTestIT.java @@ -1,11 +1,11 @@ package tiktoken; import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertTrue; import org.junit.Test; -public class EncodingTest +// run test: mvn failsafe:integration-test +public class EncodingTestIT { @Test public void shouldAnswerWithTrue() throws Exception @@ -16,7 +16,6 @@ public void shouldAnswerWithTrue() throws Exception encoding.close(); - assertTrue( true ); assertArrayEquals(new long[] {9288}, a); } } diff --git a/java/src/tiktoken_Encoding.h b/java/src/tiktoken_Encoding.h new file mode 100644 index 00000000..030d77fe --- /dev/null +++ b/java/src/tiktoken_Encoding.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class tiktoken_Encoding */ + +#ifndef _Included_tiktoken_Encoding +#define _Included_tiktoken_Encoding +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: tiktoken_Encoding + * Method: init + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_tiktoken_Encoding_init + (JNIEnv *, jobject, jstring); + +/* + * Class: tiktoken_Encoding + * Method: encode + * Signature: (Ljava/lang/String;[Ljava/lang/String;J)[J + */ +JNIEXPORT jlongArray JNICALL Java_tiktoken_Encoding_encode + (JNIEnv *, jobject, jstring, jobjectArray, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/jni/Cargo.toml b/jni/Cargo.toml index 0a7651d7..9ac05ad3 100644 --- a/jni/Cargo.toml +++ b/jni/Cargo.toml @@ -15,3 +15,8 @@ jni = "0.20.0" [profile.release] incremental = true +opt-level = 'z' # Optimize for size +lto = true # Enable link-time optimization +codegen-units = 1 # Reduce number of codegen units to increase optimizations +panic = 'abort' # Abort on panic +strip = true # Strip symbols from binary* \ No newline at end of file diff --git a/setup.py b/setup.py index 96ad5d6a..246487b0 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,8 @@ debug=False, ) ], - package_data={"tiktoken": ["py.typed"]}, - packages=["tiktoken", "tiktoken_ext"], + include_package_data=True, + package_data={ "tiktoken": ["py.typed", "registry.json", "model_to_encoding.json"] }, + packages=["tiktoken"], zip_safe=False, ) diff --git a/tests/test_simple_public.py b/tests/test_simple_public.py index ab63babd..44109234 100644 --- a/tests/test_simple_public.py +++ b/tests/test_simple_public.py @@ -24,18 +24,3 @@ def test_encoding_for_model(): assert enc.name == "gpt2" enc = tiktoken.encoding_for_model("text-davinci-003") assert enc.name == "p50k_base" - -def test_loading(): - x = tiktoken.load.data_gym_to_mergeable_bpe_ranks( - vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe", - encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json", - ) - - print(len(x)) - - y = tiktoken._tiktoken.py_data_gym_to_mergable_bpe_ranks( - vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe", - encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json", - ) - - print(len(y)) \ No newline at end of file diff --git a/tiktoken/load.py b/tiktoken/load.py index c8f3dbd9..5537ecf4 100644 --- a/tiktoken/load.py +++ b/tiktoken/load.py @@ -55,7 +55,6 @@ def data_gym_to_mergeable_bpe_ranks( # NB: do not add caching to this function rank_to_intbyte = [b for b in range(2**8) if chr(b).isprintable() and chr(b) != " "] - print(f"rank_to_intbyte: {len(rank_to_intbyte)}") data_gym_byte_to_byte = {chr(b): b for b in rank_to_intbyte} n = 0 for b in range(2**8): @@ -75,9 +74,6 @@ def decode_data_gym(value: str) -> bytes: # add the single byte tokens bpe_ranks = {bytes([b]): i for i, b in enumerate(rank_to_intbyte)} - # print(len(rank_to_intbyte)) - print(f"py data gym: {len(data_gym_byte_to_byte)} '{data_gym_byte_to_byte[chr(288)]}'") - # add the merged tokens n = len(bpe_ranks) for first, second in bpe_merges: diff --git a/tiktoken/model.py b/tiktoken/model.py index 66e9e046..b3d3ba59 100644 --- a/tiktoken/model.py +++ b/tiktoken/model.py @@ -2,47 +2,16 @@ from .core import Encoding from .registry import get_encoding +import json -# TODO: this will likely be replaced by an API endpoint -MODEL_TO_ENCODING: dict[str, str] = { - # text - "text-davinci-003": "p50k_base", - "text-davinci-002": "p50k_base", - "text-davinci-001": "r50k_base", - "text-curie-001": "r50k_base", - "text-babbage-001": "r50k_base", - "text-ada-001": "r50k_base", - "davinci": "r50k_base", - "curie": "r50k_base", - "babbage": "r50k_base", - "ada": "r50k_base", - # code - "code-davinci-002": "p50k_base", - "code-davinci-001": "p50k_base", - "code-cushman-002": "p50k_base", - "code-cushman-001": "p50k_base", - "davinci-codex": "p50k_base", - "cushman-codex": "p50k_base", - # edit - "text-davinci-edit-001": "p50k_edit", - "code-davinci-edit-001": "p50k_edit", - # embeddings - "text-embedding-ada-002": "cl100k_base", - # old embeddings - "text-similarity-davinci-001": "r50k_base", - "text-similarity-curie-001": "r50k_base", - "text-similarity-babbage-001": "r50k_base", - "text-similarity-ada-001": "r50k_base", - "text-search-davinci-doc-001": "r50k_base", - "text-search-curie-doc-001": "r50k_base", - "text-search-babbage-doc-001": "r50k_base", - "text-search-ada-doc-001": "r50k_base", - "code-search-babbage-code-001": "r50k_base", - "code-search-ada-code-001": "r50k_base", - # open source - "gpt2": "gpt2", -} +try: + import importlib.resources as pkg_resources +except ImportError: + # Try backported to PY<37 `importlib_resources`. + import importlib_resources as pkg_resources +# TODO: this will likely be replaced by an API endpoint +MODEL_TO_ENCODING: dict[str, str] = json.loads(pkg_resources.read_text("tiktoken", "model_to_encoding.json")) def encoding_for_model(model_name: str) -> Encoding: try: diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json new file mode 100644 index 00000000..987ba143 --- /dev/null +++ b/tiktoken/model_to_encoding.json @@ -0,0 +1,32 @@ +{ + "text-davinci-003": "p50k_base", + "text-davinci-002": "p50k_base", + "text-davinci-001": "r50k_base", + "text-curie-001": "r50k_base", + "text-babbage-001": "r50k_base", + "text-ada-001": "r50k_base", + "davinci": "r50k_base", + "curie": "r50k_base", + "babbage": "r50k_base", + "ada": "r50k_base", + "code-davinci-002": "p50k_base", + "code-davinci-001": "p50k_base", + "code-cushman-002": "p50k_base", + "code-cushman-001": "p50k_base", + "davinci-codex": "p50k_base", + "cushman-codex": "p50k_base", + "text-davinci-edit-001": "p50k_edit", + "code-davinci-edit-001": "p50k_edit", + "text-embedding-ada-002": "cl100k_base", + "text-similarity-davinci-001": "r50k_base", + "text-similarity-curie-001": "r50k_base", + "text-similarity-babbage-001": "r50k_base", + "text-similarity-ada-001": "r50k_base", + "text-search-davinci-doc-001": "r50k_base", + "text-search-curie-doc-001": "r50k_base", + "text-search-babbage-doc-001": "r50k_base", + "text-search-ada-doc-001": "r50k_base", + "code-search-babbage-code-001": "r50k_base", + "code-search-ada-code-001": "r50k_base", + "gpt2": "gpt2" +} \ No newline at end of file diff --git a/tiktoken/registry.json b/tiktoken/registry.json new file mode 100644 index 00000000..aa3ee530 --- /dev/null +++ b/tiktoken/registry.json @@ -0,0 +1,50 @@ +{ + "gpt2": { + "data_gym_to_mergeable_bpe_ranks": { + "vocab_bpe_file": "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe", + "encoder_json_file": "https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json" + }, + "explicit_n_vocab": 50257, + "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + "special_tokens": { + "<|endoftext|>": 50256 + } + }, + "r50k_base": { + "load_tiktoken_bpe": "https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken", + "explicit_n_vocab": 50257, + "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + "special_tokens": { + "<|endoftext|>": 50256 + } + }, + "p50k_base": { + "load_tiktoken_bpe": "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken", + "explicit_n_vocab": 50281, + "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + "special_tokens": { + "<|endoftext|>": 50256 + } + }, + "p50k_edit": { + "load_tiktoken_bpe": "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken", + "special_tokens": { + "<|endoftext|>": 50256, + "<|fim_prefix|>": 50281, + "<|fim_middle|>": 50282, + "<|fim_suffix|>": 50283 + }, + "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+" + }, + "cl100k_base": { + "load_tiktoken_bpe": "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken", + "special_tokens": { + "<|endoftext|>": 100257, + "<|fim_prefix|>": 100258, + "<|fim_middle|>": 100259, + "<|fim_suffix|>": 100260, + "<|endofprompt|>": 100276 + }, + "pat_str": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + } +} \ No newline at end of file diff --git a/tiktoken/registry.py b/tiktoken/registry.py index 52d8ec2d..0a55d27e 100644 --- a/tiktoken/registry.py +++ b/tiktoken/registry.py @@ -3,46 +3,32 @@ import importlib import pkgutil import threading +import json from typing import Any, Callable, Optional -import tiktoken_ext - from tiktoken.core import Encoding +from tiktoken.load import data_gym_to_mergeable_bpe_ranks, load_tiktoken_bpe _lock = threading.RLock() ENCODINGS: dict[str, Encoding] = {} -ENCODING_CONSTRUCTORS: Optional[dict[str, Callable[[], dict[str, Any]]]] = None - +ENCODING_DEFS: dict[str, Any] = None -def _find_constructors() -> None: - global ENCODING_CONSTRUCTORS - with _lock: - if ENCODING_CONSTRUCTORS is not None: - return - ENCODING_CONSTRUCTORS = {} +def _load_encoding_defs(): + global ENCODING_DEFS + if not ENCODING_DEFS is None: + return ENCODING_DEFS - # tiktoken_ext is a namespace package - # submodules inside tiktoken_ext will be inspected for ENCODING_CONSTRUCTORS attributes - # - we use namespace package pattern so `pkgutil.iter_modules` is fast - # - it's a separate top-level package because namespace subpackages of non-namespace - # packages don't quite do what you want with editable installs - plugin_mods = pkgutil.iter_modules(tiktoken_ext.__path__, tiktoken_ext.__name__ + ".") + try: + import importlib.resources as pkg_resources + except ImportError: + # Try backported to PY<37 `importlib_resources`. + import importlib_resources as pkg_resources - for _, mod_name, _ in plugin_mods: - mod = importlib.import_module(mod_name) - try: - constructors = mod.ENCODING_CONSTRUCTORS - except AttributeError as e: - raise ValueError( - f"tiktoken plugin {mod_name} does not define ENCODING_CONSTRUCTORS" - ) from e - for enc_name, constructor in constructors.items(): - if enc_name in ENCODING_CONSTRUCTORS: - raise ValueError( - f"Duplicate encoding name {enc_name} in tiktoken plugin {mod_name}" - ) - ENCODING_CONSTRUCTORS[enc_name] = constructor + # read registry.json + # note: was trying to place it into /data/registry.json but python packaging is always unhappy + ENCODING_DEFS = json.loads(pkg_resources.read_text("tiktoken", "registry.json")) + return ENCODING_DEFS def get_encoding(encoding_name: str) -> Encoding: if encoding_name in ENCODINGS: @@ -52,22 +38,26 @@ def get_encoding(encoding_name: str) -> Encoding: if encoding_name in ENCODINGS: return ENCODINGS[encoding_name] - if ENCODING_CONSTRUCTORS is None: - _find_constructors() - assert ENCODING_CONSTRUCTORS is not None - - if encoding_name not in ENCODING_CONSTRUCTORS: + _load_encoding_defs() + if encoding_name not in ENCODING_DEFS: raise ValueError(f"Unknown encoding {encoding_name}") - constructor = ENCODING_CONSTRUCTORS[encoding_name] - enc = Encoding(**constructor()) + encoding_def = dict(ENCODING_DEFS[encoding_name]) + encoding_def["name"] = encoding_name + + if "load_tiktoken_bpe" in encoding_def: + encoding_def["mergeable_ranks"] = load_tiktoken_bpe(encoding_def["load_tiktoken_bpe"]) + del encoding_def["load_tiktoken_bpe"] + elif "data_gym_to_mergeable_bpe_ranks" in encoding_def: + encoding_def["mergeable_ranks"] = data_gym_to_mergeable_bpe_ranks(**encoding_def["data_gym_to_mergeable_bpe_ranks"]) + del encoding_def["data_gym_to_mergeable_bpe_ranks"] + else: + raise ValueError(f"Unknown loader {encoding_name}") + enc = Encoding(**encoding_def) ENCODINGS[encoding_name] = enc return enc def list_encoding_names() -> list[str]: with _lock: - if ENCODING_CONSTRUCTORS is None: - _find_constructors() - assert ENCODING_CONSTRUCTORS is not None - return list(ENCODING_CONSTRUCTORS) + return list(_load_encoding_defs().keys()) diff --git a/tiktoken_ext/openai_public.py b/tiktoken_ext/openai_public.py deleted file mode 100644 index a64db9f6..00000000 --- a/tiktoken_ext/openai_public.py +++ /dev/null @@ -1,87 +0,0 @@ -from tiktoken.load import data_gym_to_mergeable_bpe_ranks, load_tiktoken_bpe - -ENDOFTEXT = "<|endoftext|>" -FIM_PREFIX = "<|fim_prefix|>" -FIM_MIDDLE = "<|fim_middle|>" -FIM_SUFFIX = "<|fim_suffix|>" -ENDOFPROMPT = "<|endofprompt|>" - - -def gpt2(): - mergeable_ranks = data_gym_to_mergeable_bpe_ranks( - vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe", - encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json", - ) - return { - "name": "gpt2", - "explicit_n_vocab": 50257, - "pat_str": r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""", - "mergeable_ranks": mergeable_ranks, - "special_tokens": {"<|endoftext|>": 50256}, - } - - -def r50k_base(): - mergeable_ranks = load_tiktoken_bpe( - "https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken" - ) - return { - "name": "r50k_base", - "explicit_n_vocab": 50257, - "pat_str": r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""", - "mergeable_ranks": mergeable_ranks, - "special_tokens": {ENDOFTEXT: 50256}, - } - - -def p50k_base(): - mergeable_ranks = load_tiktoken_bpe( - "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken" - ) - return { - "name": "p50k_base", - "explicit_n_vocab": 50281, - "pat_str": r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""", - "mergeable_ranks": mergeable_ranks, - "special_tokens": {ENDOFTEXT: 50256}, - } - - -def p50k_edit(): - mergeable_ranks = load_tiktoken_bpe( - "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken" - ) - special_tokens = {ENDOFTEXT: 50256, FIM_PREFIX: 50281, FIM_MIDDLE: 50282, FIM_SUFFIX: 50283} - return { - "name": "p50k_edit", - "pat_str": r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""", - "mergeable_ranks": mergeable_ranks, - "special_tokens": special_tokens, - } - - -def cl100k_base(): - mergeable_ranks = load_tiktoken_bpe( - "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" - ) - special_tokens = { - ENDOFTEXT: 100257, - FIM_PREFIX: 100258, - FIM_MIDDLE: 100259, - FIM_SUFFIX: 100260, - ENDOFPROMPT: 100276, - } - return { - "name": "cl100k_base", - "pat_str": r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""", - "mergeable_ranks": mergeable_ranks, - "special_tokens": special_tokens, - } - - -ENCODING_CONSTRUCTORS = { - "gpt2": gpt2, - "r50k_base": r50k_base, - "p50k_base": p50k_base, - "cl100k_base": cl100k_base, -} From 22584d4eb31137b88c0f0c68fe7ee0c0b7790b6c Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 27 Feb 2023 12:54:59 +0100 Subject: [PATCH 027/207] add github action --- .github/workflows/build_wheels.yml | 49 ++++++++++++++++++++++++++++++ Cargo.toml | 10 +++++- core/Cargo.toml | 3 -- java/src/tiktoken_Encoding.h | 29 ------------------ jni/Cargo.toml | 7 ----- python/Cargo.toml | 3 -- 6 files changed, 58 insertions(+), 43 deletions(-) delete mode 100644 java/src/tiktoken_Encoding.h diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index d2e8dc27..081ecc5f 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -51,3 +51,52 @@ jobs: with: name: dist path: ./dist/*.tar.gz + + build_jni: + name: jni on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + include: + - os: ubuntu-latest + outdir: linux_64 + - os: windows-latest + outdir: windows_64 + - os: macos-latest + outdir: osx_64 + steps: + - uses: actions/checkout@v3 + + - name: Install rust toolchain + uses: actions-rs/toolchain@v1 + with: + # stable doesn't have --out-dir + toolchain: nightly + + - name: Build + working-directory: ./jni + # TODO: 32bit vs 64bit? + # https://github.com/scijava/native-lib-loader + run: cargo build --release -Z unstable-options --out-dir ../build/natives/${{ env.outdir }} + + - uses: actions/upload-artifact@v3 + with: + name: natives + path: ./build/* + + build_java: + name: java + runs-on: ubuntu-latest + needs: [build_jni] + + steps: + - name: Load outputs + uses: actions/download-artifact@v3 + with: + name: natives + path: natives + + - name: debug + run: find natives diff --git a/Cargo.toml b/Cargo.toml index f07251fd..bcc4bb52 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,4 +4,12 @@ members = [ "core", "python", "jni", -] \ No newline at end of file +] + +[profile.release] +incremental = true +opt-level = 'z' # Optimize for size +lto = true # Enable link-time optimization +codegen-units = 1 # Reduce number of codegen units to increase optimizations +panic = 'abort' # Abort on panic +strip = true # Strip symbols from binary* \ No newline at end of file diff --git a/core/Cargo.toml b/core/Cargo.toml index d57371e8..53688fd4 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -19,6 +19,3 @@ sha1 = "0.10.5" json = "0.12.4" base64 = "0.21.0" lazy_static = "1.4.0" - -[profile.release] -incremental = true diff --git a/java/src/tiktoken_Encoding.h b/java/src/tiktoken_Encoding.h deleted file mode 100644 index 030d77fe..00000000 --- a/java/src/tiktoken_Encoding.h +++ /dev/null @@ -1,29 +0,0 @@ -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class tiktoken_Encoding */ - -#ifndef _Included_tiktoken_Encoding -#define _Included_tiktoken_Encoding -#ifdef __cplusplus -extern "C" { -#endif -/* - * Class: tiktoken_Encoding - * Method: init - * Signature: (Ljava/lang/String;)V - */ -JNIEXPORT void JNICALL Java_tiktoken_Encoding_init - (JNIEnv *, jobject, jstring); - -/* - * Class: tiktoken_Encoding - * Method: encode - * Signature: (Ljava/lang/String;[Ljava/lang/String;J)[J - */ -JNIEXPORT jlongArray JNICALL Java_tiktoken_Encoding_encode - (JNIEnv *, jobject, jstring, jobjectArray, jlong); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/jni/Cargo.toml b/jni/Cargo.toml index 9ac05ad3..2deba506 100644 --- a/jni/Cargo.toml +++ b/jni/Cargo.toml @@ -13,10 +13,3 @@ tiktoken_core = { path = "../core" } rustc-hash = "1.1.0" jni = "0.20.0" -[profile.release] -incremental = true -opt-level = 'z' # Optimize for size -lto = true # Enable link-time optimization -codegen-units = 1 # Reduce number of codegen units to increase optimizations -panic = 'abort' # Abort on panic -strip = true # Strip symbols from binary* \ No newline at end of file diff --git a/python/Cargo.toml b/python/Cargo.toml index e674905f..7febd473 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -12,6 +12,3 @@ crate-type = ["cdylib"] pyo3 = { version = "0.17.3", features = ["extension-module"] } tiktoken_core = { path = "../core" } rustc-hash = "1.1.0" - -[profile.release] -incremental = true From 42efde424e5e36c8fc19beee8f8de64e353800f2 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 27 Feb 2023 12:58:16 +0100 Subject: [PATCH 028/207] add jar build --- .github/workflows/build_jar.yml | 57 ++++++++++++++++++++++++++++++ .github/workflows/build_wheels.yml | 49 ------------------------- 2 files changed, 57 insertions(+), 49 deletions(-) create mode 100644 .github/workflows/build_jar.yml diff --git a/.github/workflows/build_jar.yml b/.github/workflows/build_jar.yml new file mode 100644 index 00000000..212db67d --- /dev/null +++ b/.github/workflows/build_jar.yml @@ -0,0 +1,57 @@ +name: Build Java JAR + +on: [push, pull_request, workflow_dispatch] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + build_jni: + name: jni on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + include: + - os: ubuntu-latest + outdir: linux_64 + - os: windows-latest + outdir: windows_64 + - os: macos-latest + outdir: osx_64 + steps: + - uses: actions/checkout@v3 + + - name: Install rust toolchain + uses: actions-rs/toolchain@v1 + with: + # stable doesn't have --out-dir + toolchain: nightly + + - name: Build + working-directory: ./jni + # TODO: 32bit vs 64bit? + # https://github.com/scijava/native-lib-loader + run: cargo build --release -Z unstable-options --out-dir ../build/natives/${{ env.outdir }} + + - uses: actions/upload-artifact@v3 + with: + name: natives + path: ./build/* + + build_java: + name: java + runs-on: ubuntu-latest + needs: [build_jni] + + steps: + - name: Load outputs + uses: actions/download-artifact@v3 + with: + name: natives + path: natives + + - name: debug + run: find natives diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 081ecc5f..d2e8dc27 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -51,52 +51,3 @@ jobs: with: name: dist path: ./dist/*.tar.gz - - build_jni: - name: jni on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - include: - - os: ubuntu-latest - outdir: linux_64 - - os: windows-latest - outdir: windows_64 - - os: macos-latest - outdir: osx_64 - steps: - - uses: actions/checkout@v3 - - - name: Install rust toolchain - uses: actions-rs/toolchain@v1 - with: - # stable doesn't have --out-dir - toolchain: nightly - - - name: Build - working-directory: ./jni - # TODO: 32bit vs 64bit? - # https://github.com/scijava/native-lib-loader - run: cargo build --release -Z unstable-options --out-dir ../build/natives/${{ env.outdir }} - - - uses: actions/upload-artifact@v3 - with: - name: natives - path: ./build/* - - build_java: - name: java - runs-on: ubuntu-latest - needs: [build_jni] - - steps: - - name: Load outputs - uses: actions/download-artifact@v3 - with: - name: natives - path: natives - - - name: debug - run: find natives From 484f15a7a7f624a5d05377ef7b04a00dad47ee52 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 27 Feb 2023 12:59:32 +0100 Subject: [PATCH 029/207] fix rust build --- .github/workflows/build_jar.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_jar.yml b/.github/workflows/build_jar.yml index 212db67d..8530db52 100644 --- a/.github/workflows/build_jar.yml +++ b/.github/workflows/build_jar.yml @@ -29,6 +29,7 @@ jobs: with: # stable doesn't have --out-dir toolchain: nightly + override: true - name: Build working-directory: ./jni From 6c45bf698acf13a0872b83f6c0ca16bc507d67eb Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 27 Feb 2023 13:13:20 +0100 Subject: [PATCH 030/207] build jar --- .github/workflows/build_jar.yml | 18 ++++++++++++++++-- java/pom.xml | 7 ++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_jar.yml b/.github/workflows/build_jar.yml index 8530db52..4a06c24c 100644 --- a/.github/workflows/build_jar.yml +++ b/.github/workflows/build_jar.yml @@ -35,12 +35,12 @@ jobs: working-directory: ./jni # TODO: 32bit vs 64bit? # https://github.com/scijava/native-lib-loader - run: cargo build --release -Z unstable-options --out-dir ../build/natives/${{ env.outdir }} + run: cargo build --release -Z unstable-options --out-dir ../build/natives/${{ matrix.outdir }}/ - uses: actions/upload-artifact@v3 with: name: natives - path: ./build/* + path: ./build/natives/* build_java: name: java @@ -56,3 +56,17 @@ jobs: - name: debug run: find natives + + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + java-version: '11' + distribution: 'microsoft' + architecture: x64 + cache: maven + + - name: Build with Maven + run: mvn --batch-mode package failsafe:integration-test + + # TODO: publish to maven + diff --git a/java/pom.xml b/java/pom.xml index b93feb95..969a8f78 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -35,11 +35,8 @@ - ${project.build.directory}/../../target/release/ - ${project.build.directory}/classes/natives/linux_64 - - lib_tiktoken_jni.so - + ${project.build.directory}/../../natives/ + ${project.build.directory}/classes/natives/ From b883d0c5ea5b558b31bed7a42f80061f1acc0f57 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 27 Feb 2023 13:21:33 +0100 Subject: [PATCH 031/207] fix path add matrix java test --- .github/workflows/build_jar.yml | 12 +++++++----- java/pom.xml | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_jar.yml b/.github/workflows/build_jar.yml index 4a06c24c..030484aa 100644 --- a/.github/workflows/build_jar.yml +++ b/.github/workflows/build_jar.yml @@ -44,7 +44,11 @@ jobs: build_java: name: java - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] needs: [build_jni] steps: @@ -54,9 +58,6 @@ jobs: name: natives path: natives - - name: debug - run: find natives - - name: Set up JDK 11 uses: actions/setup-java@v3 with: @@ -66,7 +67,8 @@ jobs: cache: maven - name: Build with Maven + working-directory: ./java run: mvn --batch-mode package failsafe:integration-test - # TODO: publish to maven + # TODO: publish to maven (only from ubuntu) diff --git a/java/pom.xml b/java/pom.xml index 969a8f78..c0182ccb 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -35,7 +35,7 @@ - ${project.build.directory}/../../natives/ + ${project.basedir}/../natives/ ${project.build.directory}/classes/natives/ From 98070f13746054dfc9d91a9e96a2c74aec7c345a Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 27 Feb 2023 13:28:26 +0100 Subject: [PATCH 032/207] fix java --- .github/workflows/build_jar.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build_jar.yml b/.github/workflows/build_jar.yml index 030484aa..4b0d4476 100644 --- a/.github/workflows/build_jar.yml +++ b/.github/workflows/build_jar.yml @@ -52,6 +52,8 @@ jobs: needs: [build_jni] steps: + - uses: actions/checkout@v3 + - name: Load outputs uses: actions/download-artifact@v3 with: @@ -70,5 +72,10 @@ jobs: working-directory: ./java run: mvn --batch-mode package failsafe:integration-test + - uses: actions/upload-artifact@v3 + with: + name: java + path: ./java/target/*.jar + # TODO: publish to maven (only from ubuntu) From 4e70da002e5d3a053c0877bb07a3de2160c65ccb Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Tue, 28 Feb 2023 17:17:16 +0100 Subject: [PATCH 033/207] cleanup --- core/src/load.rs | 3 --- core/src/openai_public.rs | 1 - jni/Cargo.toml | 2 ++ jni/build.rs | 7 +++++++ jni/src/lib.rs | 15 +++++---------- 5 files changed, 14 insertions(+), 14 deletions(-) create mode 100644 jni/build.rs diff --git a/core/src/load.rs b/core/src/load.rs index 4d1e5176..975f5fcd 100644 --- a/core/src/load.rs +++ b/core/src/load.rs @@ -115,9 +115,6 @@ pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: let decode_data_gym = |value: &str| value.chars().map(|c| { - // if !data_gym_byte_to_byte.contains_key(&(c as u32)) { - // panic!("Unknown character: {} {}", c, c as u32); - // } data_gym_byte_to_byte[&(c as u32)] } ).collect::>(); diff --git a/core/src/openai_public.rs b/core/src/openai_public.rs index 2a89843a..24e0ab99 100644 --- a/core/src/openai_public.rs +++ b/core/src/openai_public.rs @@ -11,7 +11,6 @@ type Result = std::result::Result>; lazy_static! { pub static ref REGISTRY: HashMap = { - // TODO: error handling json::parse(include_str!("../../tiktoken/registry.json")) .expect("Failed to parse internal JSON") .entries() diff --git a/jni/Cargo.toml b/jni/Cargo.toml index 2deba506..7c6d4155 100644 --- a/jni/Cargo.toml +++ b/jni/Cargo.toml @@ -13,3 +13,5 @@ tiktoken_core = { path = "../core" } rustc-hash = "1.1.0" jni = "0.20.0" +[build-dependencies] +json = "0.12.4" diff --git a/jni/build.rs b/jni/build.rs new file mode 100644 index 00000000..9c866413 --- /dev/null +++ b/jni/build.rs @@ -0,0 +1,7 @@ +use json; + +fn main() { + json::parse(include_str!("../tiktoken/registry.json")).expect("Failed to parse internal JSON"); + json::parse(include_str!("../tiktoken/model_to_encoding.json")).expect("Failed to parse internal JSON"); + println!("JSON Parsing validated"); +} diff --git a/jni/src/lib.rs b/jni/src/lib.rs index 778bef84..6bd99d6d 100644 --- a/jni/src/lib.rs +++ b/jni/src/lib.rs @@ -48,14 +48,9 @@ pub extern "system" fn Java_tiktoken_Encoding_init(env: JNIEnv, obj: JObject, mo let encoding_name = _tiktoken_core::openai_public::MODEL_TO_ENCODING .get(&model_name).ok_or("Unable to find model")?; - // TODO: this is actually mergable_ranks (lazy) let encoding = _tiktoken_core::openai_public::REGISTRY .get(encoding_name).ok_or("Unable to find encoding")?; - // TODO: initialize the CoreBPE object - - // TODO: this should be CoreBPE - let bpe_native = CoreBPENative::new( encoding.get()?, encoding.special_tokens.clone(), @@ -82,8 +77,8 @@ pub extern "system" fn Java_tiktoken_Encoding_encode( env: JNIEnv, obj: JObject, text: JString, - allowedSpecialTokens: jarray, - maxTokenLength: jlong, + allowed_special_tokens: jarray, + max_token_length: jlong, ) -> jarray { let result = || -> Result { let encoding: MutexGuard = unsafe { env.get_rust_field(obj, "handle")? }; @@ -93,18 +88,18 @@ pub extern "system" fn Java_tiktoken_Encoding_encode( .get_string(text)? .into(); - let len = env.get_array_length(allowedSpecialTokens)?; + let len = env.get_array_length(allowed_special_tokens)?; let mut strings: Vec = Vec::with_capacity(len as usize); for i in 0..len { let element: JObject = env - .get_object_array_element(allowedSpecialTokens, i)?; + .get_object_array_element(allowed_special_tokens, i)?; let current: String = env.get_string(element.into())?.into(); strings.push(current); } let v2: HashSet<&str> = strings.iter().map(|s| &**s).collect(); - let (tokens, _, _) = enc._encode_native(&input, &v2, Some(maxTokenLength as usize)); + let (tokens, _, _) = enc._encode_native(&input, &v2, Some(max_token_length as usize)); let output = env .new_long_array(tokens.len().try_into()?)?; From 381d32b69d406a46c5905887dd851f38575c8230 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Tue, 28 Feb 2023 17:19:20 +0100 Subject: [PATCH 034/207] update groupid --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index c0182ccb..61cbf01c 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -4,7 +4,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - tiktoken + com.openai tiktoken 1.0-SNAPSHOT From f1560bd3b5ab02f1dc4ea0cfd3f4955878516f0b Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Tue, 28 Feb 2023 17:26:38 +0100 Subject: [PATCH 035/207] remove comments --- java/src/main/java/tiktoken/Encoding.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/java/src/main/java/tiktoken/Encoding.java b/java/src/main/java/tiktoken/Encoding.java index bfc75868..1773225d 100644 --- a/java/src/main/java/tiktoken/Encoding.java +++ b/java/src/main/java/tiktoken/Encoding.java @@ -6,9 +6,8 @@ public class Encoding implements AutoCloseable { static { - // TODO: unpack the library from the jar - // System.loadLibrary("_tiktoken_jni"); try { + // load from JAR NativeLoader.loadLibrary("_tiktoken_jni"); } catch(IOException e) { From 01d4f9e88ba19414fc162eb09e817f34e8ff0a4b Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 1 Mar 2023 23:13:42 +0100 Subject: [PATCH 036/207] Move to separate js folder --- js/.gitignore | 3 + js/Cargo.toml | 28 ++ package.json => js/package.json | 0 {scripts => js/scripts}/download_ranks.py | 0 {scripts => js/scripts}/override_any.ts | 0 js/src/lib.rs | 432 ++++++++++++++++++++++ js/test/test_simple_public.test.ts | 136 +++++++ yarn.lock => js/yarn.lock | 0 8 files changed, 599 insertions(+) create mode 100644 js/.gitignore create mode 100644 js/Cargo.toml rename package.json => js/package.json (100%) rename {scripts => js/scripts}/download_ranks.py (100%) rename {scripts => js/scripts}/override_any.ts (100%) create mode 100644 js/src/lib.rs create mode 100644 js/test/test_simple_public.test.ts rename yarn.lock => js/yarn.lock (100%) diff --git a/js/.gitignore b/js/.gitignore new file mode 100644 index 00000000..d27528eb --- /dev/null +++ b/js/.gitignore @@ -0,0 +1,3 @@ +# WASM +ranks/ +node_modules \ No newline at end of file diff --git a/js/Cargo.toml b/js/Cargo.toml new file mode 100644 index 00000000..fac21a29 --- /dev/null +++ b/js/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "tiktoken_js" +version = "0.2.0" +edition = "2021" +rust-version = "1.57.0" + +[lib] +name = "_tiktoken_js" +crate-type = ["rlib", "cdylib"] + +[dependencies] +tiktoken_core = { path = "../core" } +# tiktoken dependencies +fancy-regex = "0.10.0" +regex = "1.7.0" +rustc-hash = "1.1.0" +bstr = "1.0.1" +wasm-bindgen = "0.2.83" +js-sys = "0.3.61" +anyhow = "1.0.69" +base64 = "0.21.0" +gloo-utils = { version = "0.1", features = ["serde"] } +serde = { version = "1.0", features = ["derive"] } +reqwest = { version = "0.11.14", features = ["blocking"] } + +[features] +default = ["inline"] +inline = [] \ No newline at end of file diff --git a/package.json b/js/package.json similarity index 100% rename from package.json rename to js/package.json diff --git a/scripts/download_ranks.py b/js/scripts/download_ranks.py similarity index 100% rename from scripts/download_ranks.py rename to js/scripts/download_ranks.py diff --git a/scripts/override_any.ts b/js/scripts/override_any.ts similarity index 100% rename from scripts/override_any.ts rename to js/scripts/override_any.ts diff --git a/js/src/lib.rs b/js/src/lib.rs new file mode 100644 index 00000000..62345196 --- /dev/null +++ b/js/src/lib.rs @@ -0,0 +1,432 @@ +use _tiktoken_core::CoreBPENative; +use anyhow::Error; +use base64::{engine::general_purpose, Engine as _}; +use fancy_regex::Regex; +use gloo_utils::format::JsValueSerdeExt; +use rustc_hash::FxHashMap as HashMap; +use std::collections::HashSet; +use std::result::Result; +use wasm_bindgen::prelude::*; + +#[cfg(feature = "inline")] +const ENDOFTEXT: &'static str = "<|endoftext|>"; + +#[cfg(feature = "inline")] +const FIM_PREFIX: &'static str = "<|fim_prefix|>"; + +#[cfg(feature = "inline")] +const FIM_MIDDLE: &'static str = "<|fim_middle|>"; + +#[cfg(feature = "inline")] +const FIM_SUFFIX: &'static str = "<|fim_suffix|>"; + +#[cfg(feature = "inline")] +const ENDOFPROMPT: &'static str = "<|endofprompt|>"; + +struct CoreBPEConstructor { + encoder: HashMap, usize>, + special_tokens: HashMap, + pat_str: String, +} + +impl CoreBPEConstructor { + fn new( + tiktoken_bfe: &str, + special_tokens: Option>, + pat_str: &str, + ) -> Self { + CoreBPEConstructor { + encoder: CoreBPEConstructor::parse_bfe(tiktoken_bfe).unwrap(), + special_tokens: special_tokens.unwrap_or_default(), + pat_str: String::from(pat_str), + } + } + + fn parse_bfe(tiktoken_bfe: &str) -> Result, usize>, Error> { + let mut encoder = HashMap::default(); + for line in tiktoken_bfe.lines() { + let mut parts = line.split(' '); + let token = &general_purpose::STANDARD.decode(parts.next().unwrap())?; + let rank: usize = parts.next().unwrap().parse().unwrap(); + encoder.insert(token.clone(), rank); + } + + Ok(encoder) + } + + #[cfg(feature = "inline")] + fn gpt2() -> Self { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 50256); + + CoreBPEConstructor::new( + include_str!("../ranks/gpt2.tiktoken"), + Some(special_tokens), + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + ) + } + + #[cfg(feature = "inline")] + fn r50k_base() -> Self { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 50256); + + CoreBPEConstructor::new( + include_str!("../ranks/r50k_base.tiktoken"), + Some(special_tokens), + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + ) + } + + #[cfg(feature = "inline")] + fn p50k_base() -> Self { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 50256); + + CoreBPEConstructor::new( + include_str!("../ranks/p50k_base.tiktoken"), + Some(special_tokens), + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + ) + } + + #[cfg(feature = "inline")] + fn p50k_edit() -> Self { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 50256); + special_tokens.insert(String::from(FIM_PREFIX), 50281); + special_tokens.insert(String::from(FIM_MIDDLE), 50282); + special_tokens.insert(String::from(FIM_SUFFIX), 50283); + + CoreBPEConstructor::new( + include_str!("../ranks/p50k_base.tiktoken"), + Some(special_tokens), + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + ) + } + + #[cfg(feature = "inline")] + fn cl100k_base() -> Self { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 100257); + special_tokens.insert(String::from(FIM_PREFIX), 100258); + special_tokens.insert(String::from(FIM_MIDDLE), 100259); + special_tokens.insert(String::from(FIM_SUFFIX), 100260); + special_tokens.insert(String::from(ENDOFPROMPT), 100276); + + CoreBPEConstructor::new( + include_str!("../ranks/cl100k_base.tiktoken"), + Some(special_tokens), + "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", + ) + } +} + +#[wasm_bindgen] +pub struct Tiktoken { + name: Option, + special_tokens_set: HashSet, + bpe: CoreBPENative, +} + +#[wasm_bindgen] +impl Tiktoken { + #[wasm_bindgen(constructor)] + pub fn new(tiktoken_bfe: &str, special_tokens: JsValue, pat_str: &str) -> Self { + let constructor = CoreBPEConstructor::new( + tiktoken_bfe, + special_tokens.into_serde::>().ok(), + pat_str, + ); + + Tiktoken { + name: None, + special_tokens_set: constructor + .special_tokens + .keys() + .map(|s| s.clone()) + .collect(), + bpe: CoreBPENative::new( + constructor.encoder, + constructor.special_tokens, + &constructor.pat_str, + ) + .unwrap(), + } + } + + #[cfg(feature = "inline")] + fn with_encoding( + encoding: &str, + extend_special_tokens: &Option>, + ) -> Result { + let mut constructor: CoreBPEConstructor = match encoding { + "gpt2" => Ok(CoreBPEConstructor::gpt2()), + "r50k_base" => Ok(CoreBPEConstructor::r50k_base()), + "p50k_base" => Ok(CoreBPEConstructor::p50k_base()), + "p50k_edit" => Ok(CoreBPEConstructor::p50k_edit()), + "cl100k_base" => Ok(CoreBPEConstructor::cl100k_base()), + &_ => Err(JsError::new("Invalid encoding")), + }?; + + if let Some(tokens) = extend_special_tokens { + constructor.special_tokens.extend(tokens.clone()); + } + + Ok(Tiktoken { + name: Some(String::from(encoding)), + // TODO: can we avoid cloning here? + special_tokens_set: constructor + .special_tokens + .keys() + .map(|s| s.clone()) + .collect(), + bpe: CoreBPENative::new( + constructor.encoder, + constructor.special_tokens, + &constructor.pat_str, + ) + .unwrap(), + }) + } + + #[wasm_bindgen(getter)] + pub fn name(&self) -> Option { + self.name.clone() + } + + pub fn encode( + &self, + text: &str, + allowed_special: JsValue, + disallowed_special: JsValue, + ) -> Result, JsError> { + let allowed_tokens = + self.validate_allowed_tokens(text, &allowed_special, &disallowed_special)?; + + Ok(self + .bpe + ._encode_native( + &text, + &allowed_tokens.iter().map(AsRef::as_ref).collect(), + Some(0), + ) + .0) + } + + pub fn encode_ordinary(&self, text: &str) -> Vec { + self.bpe._encode_ordinary_native(&text) + } + + pub fn encode_with_unstable( + &self, + text: &str, + allowed_special: JsValue, + disallowed_special: JsValue, + ) -> Result { + let allowed_tokens = + self.validate_allowed_tokens(text, &allowed_special, &disallowed_special)?; + + JsValue::from_serde( + &self.bpe._encode_unstable_native( + &text, + &allowed_tokens.iter().map(AsRef::as_ref).collect(), + ), + ) + .map_err(|e| { + JsError::new(&format!( + "Failed to serialize encode_with_unstable result: {}", + e + )) + }) + } + + pub fn encode_single_token(&self, bytes: &[u8]) -> usize { + self.bpe.encode_single_token(&bytes).unwrap_throw() + } + + #[wasm_bindgen(skip_typescript)] + pub fn _encode_single_piece(&self, bytes: &[u8]) -> Vec { + self.bpe.encode_single_piece(&bytes) + } + + pub fn decode(&self, tokens: Vec) -> Vec { + self.bpe._decode_native(&tokens) + } + + pub fn decode_single_token_bytes(&self, token: usize) -> Vec { + self.bpe + .decode_single_token_bytes(token) + .unwrap_throw() + .to_vec() + } + + pub fn token_byte_values(&self) -> JsValue { + JsValue::from_serde(&self.bpe.token_byte_values()).unwrap_throw() + } + + fn validate_allowed_tokens( + &self, + text: &str, + allowed_special_param: &JsValue, + disallowed_special_param: &JsValue, + ) -> Result, JsError> { + let allowed_special: HashSet = match allowed_special_param.as_string() { + Some(value) => match value.as_str() { + "all" => Ok(self.special_tokens_set.clone()), + _ => Err(JsError::new("Invalid value for allowed_special")), + }, + _ => Ok(JsValue::into_serde(&allowed_special_param).unwrap_or_default()), + }?; + + let disallowed_special = JsValue::into_serde::>(&disallowed_special_param) + .or_else(|_| { + match disallowed_special_param + .as_string() + .unwrap_or(String::from("all")) + .as_str() + { + "all" => Ok(&self.special_tokens_set - &allowed_special), + _ => Err(JsError::new("Invalid value for disallowed_special")), + } + })?; + + if !disallowed_special.is_empty() { + if let Some(found) = Tiktoken::special_token_regex(&disallowed_special).find(text)? { + return Err(JsError::new(&format!( + "The text contains a special token that is not allowed: {}", + found.as_str() + ))); + } + } + + return Ok(allowed_special); + } + + fn special_token_regex(tokens: &HashSet) -> Regex { + let inner = tokens + .iter() + .map(|token| regex::escape(token)) + .collect::>() + .join("|"); + + Regex::new(&format!("({})", inner)).unwrap_throw() + } +} + +#[cfg(feature = "inline")] +#[wasm_bindgen(typescript_custom_section)] +const _: &'static str = r#" +export type TiktokenEmbedding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; + +/** + * @param {TiktokenEmbedding} encoding + * @param {Record} [extend_special_tokens] + * @returns {Tiktoken} + */ +export function get_encoding(encoding: TiktokenEmbedding, extend_special_tokens?: Record): Tiktoken; +"#; + +#[cfg(feature = "inline")] +#[wasm_bindgen(skip_typescript)] +pub fn get_encoding(encoding: &str, extend_special_tokens: JsValue) -> Result { + Tiktoken::with_encoding( + encoding, + &extend_special_tokens + .into_serde::>() + .ok(), + ) +} + +#[cfg(feature = "inline")] +#[wasm_bindgen(typescript_custom_section)] +const _: &'static str = r#" +export type TiktokenModel = + | "text-davinci-003" + | "text-davinci-002" + | "text-davinci-001" + | "text-curie-001" + | "text-babbage-001" + | "text-ada-001" + | "davinci" + | "curie" + | "babbage" + | "ada" + | "code-davinci-002" + | "code-davinci-001" + | "code-cushman-002" + | "code-cushman-001" + | "davinci-codex" + | "cushman-codex" + | "text-davinci-edit-001" + | "code-davinci-edit-001" + | "text-embedding-ada-002" + | "text-similarity-davinci-001" + | "text-similarity-curie-001" + | "text-similarity-babbage-001" + | "text-similarity-ada-001" + | "text-search-davinci-doc-001" + | "text-search-curie-doc-001" + | "text-search-babbage-doc-001" + | "text-search-ada-doc-001" + | "code-search-babbage-code-001" + | "code-search-ada-code-001" + | "gpt2"; + +/** + * @param {TiktokenModel} encoding + * @param {Record} [extend_special_tokens] + * @returns {Tiktoken} + */ +export function encoding_for_model(model: TiktokenModel, extend_special_tokens?: Record): Tiktoken; +"#; + +#[cfg(feature = "inline")] +#[wasm_bindgen(skip_typescript)] +pub fn encoding_for_model( + model: &str, + extend_special_tokens: JsValue, +) -> Result { + let encoding = match model { + "text-davinci-003" => Ok("p50k_base"), + "text-davinci-002" => Ok("p50k_base"), + "text-davinci-001" => Ok("r50k_base"), + "text-curie-001" => Ok("r50k_base"), + "text-babbage-001" => Ok("r50k_base"), + "text-ada-001" => Ok("r50k_base"), + "davinci" => Ok("r50k_base"), + "curie" => Ok("r50k_base"), + "babbage" => Ok("r50k_base"), + "ada" => Ok("r50k_base"), + "code-davinci-002" => Ok("p50k_base"), + "code-davinci-001" => Ok("p50k_base"), + "code-cushman-002" => Ok("p50k_base"), + "code-cushman-001" => Ok("p50k_base"), + "davinci-codex" => Ok("p50k_base"), + "cushman-codex" => Ok("p50k_base"), + "text-davinci-edit-001" => Ok("p50k_edit"), + "code-davinci-edit-001" => Ok("p50k_edit"), + "text-embedding-ada-002" => Ok("cl100k_base"), + "text-similarity-davinci-001" => Ok("r50k_base"), + "text-similarity-curie-001" => Ok("r50k_base"), + "text-similarity-babbage-001" => Ok("r50k_base"), + "text-similarity-ada-001" => Ok("r50k_base"), + "text-search-davinci-doc-001" => Ok("r50k_base"), + "text-search-curie-doc-001" => Ok("r50k_base"), + "text-search-babbage-doc-001" => Ok("r50k_base"), + "text-search-ada-doc-001" => Ok("r50k_base"), + "code-search-babbage-code-001" => Ok("r50k_base"), + "code-search-ada-code-001" => Ok("r50k_base"), + "gpt2" => Ok("gpt2"), + model => Err(JsError::new( + format!("Invalid model: {}", model.to_string()).as_str(), + )), + }?; + + Tiktoken::with_encoding( + encoding, + &extend_special_tokens + .into_serde::>() + .ok(), + ) +} diff --git a/js/test/test_simple_public.test.ts b/js/test/test_simple_public.test.ts new file mode 100644 index 00000000..7bb5f012 --- /dev/null +++ b/js/test/test_simple_public.test.ts @@ -0,0 +1,136 @@ +import { it, expect, describe } from "vitest"; +import { encoding_for_model, get_encoding } from "../"; + +it("encoding_for_model initialization", () => { + expect(() => encoding_for_model("gpt2")).not.toThrowError(); + // @ts-expect-error + expect(() => encoding_for_model("gpt2-unknown")).toThrowError( + "Invalid model" + ); +}); + +it("get_encoding initialization", () => { + expect(() => get_encoding("cl100k_base")).not.toThrowError(); + // @ts-expect-error + expect(() => get_encoding("unknown")).toThrowError("Invalid encoding"); +}); + +describe("gpt2", () => { + const enc = get_encoding("gpt2"); + + it("encodes hello world string", () => { + expect(enc.encode("hello world")).toStrictEqual( + new Uint32Array([31373, 995]) + ); + }); + + it("decodes hello world string", () => { + expect( + new TextDecoder().decode(enc.decode(new Uint32Array([31373, 995]))) + ).toStrictEqual("hello world"); + }); + + it("encodes hello world string, all allowed special characters", () => { + expect(enc.encode("hello <|endoftext|>", "all")).toStrictEqual( + new Uint32Array([31373, 220, 50256]) + ); + }); +}); + +describe("cl100k_base", () => { + const enc = get_encoding("cl100k_base"); + + it("encodes hello world string", () => { + expect(enc.encode("hello world")).toStrictEqual( + new Uint32Array([15339, 1917]) + ); + }); + + it("decodes hello world string", () => { + expect( + new TextDecoder().decode(enc.decode(new Uint32Array([15339, 1917]))) + ).toStrictEqual("hello world"); + }); + + it("encodes hello world string, all allowed special characters", () => { + expect(enc.encode("hello <|endoftext|>", "all")).toStrictEqual( + new Uint32Array([15339, 220, 100257]) + ); + }); +}); + +it("test_simple", () => { + const encodings = [ + "gpt2", + "r50k_base", + "p50k_base", + "p50k_edit", + "cl100k_base", + ] as const; + + for (const encoding of encodings) { + const enc = get_encoding(encoding); + for (let token = 0; token < 10_000; token++) { + expect( + enc.encode_single_token(enc.decode_single_token_bytes(token)) + ).toStrictEqual(token); + } + } +}); + +it("test_encoding_for_model", () => { + expect(encoding_for_model("gpt2").name).toEqual("gpt2"); + expect(encoding_for_model("text-davinci-003").name).toEqual("p50k_base"); +}); + +it("test_custom_tokens", () => { + const enc = encoding_for_model("gpt2", { + "<|im_start|>": 100264, + "<|im_end|>": 100265, + }); + expect(enc.encode("<|im_start|>test<|im_end|>", "all")).toStrictEqual( + new Uint32Array([100264, 9288, 100265]) + ); +}); + +it("encode string tokens", () => { + const enc = get_encoding("gpt2", { "<|im_start|>": 100264 }); + + expect(enc.encode("hello world")).toStrictEqual( + new Uint32Array([31373, 995]) + ); + + expect(enc.encode("<|endoftext|>", ["<|endoftext|>"])).toStrictEqual( + new Uint32Array([50256]) + ); + + expect(enc.encode("<|endoftext|>", "all")).toStrictEqual( + new Uint32Array([50256]) + ); + + expect(() => enc.encode("<|endoftext|>")).toThrowError( + "The text contains a special token that is not allowed" + ); + + expect(() => enc.encode("<|im_start|>")).toThrowError( + "The text contains a special token that is not allowed" + ); + + expect(enc.encode("<|endoftext|>", [], [])).toStrictEqual( + new Uint32Array([27, 91, 437, 1659, 5239, 91, 29]) + ); +}); + +it("invalid (dis)allowed_tokens", () => { + const enc = get_encoding("gpt2"); + + // @ts-expect-error + expect(() => enc.encode("hello world", "invalid-string")).toThrowError( + "Invalid value for allowed_special" + ); + + // @ts-expect-error + expect(() => enc.encode("hello world", [], "invalid-string")).toThrowError( + "Invalid value for disallowed_special" + ); +}); diff --git a/yarn.lock b/js/yarn.lock similarity index 100% rename from yarn.lock rename to js/yarn.lock From ef77b1a2ea2ef0b761fefd0fef913bbaed2f30a6 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 1 Mar 2023 23:36:33 +0100 Subject: [PATCH 037/207] Make sure JS builds --- Cargo.toml | 1 + core/src/lib.rs | 4 ++++ js/Cargo.toml | 3 --- js/scripts/override_any.ts | 6 +++--- js/src/lib.rs | 5 ----- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c251cf87..6709ee3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "core", "python", + "js", "jni", ] diff --git a/core/src/lib.rs b/core/src/lib.rs index 52cacb04..6477ba85 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -5,9 +5,13 @@ use fancy_regex::Regex; use rustc_hash::FxHashMap as HashMap; mod util; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] mod load; + +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] pub mod openai_public; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] #[macro_use] extern crate lazy_static; diff --git a/js/Cargo.toml b/js/Cargo.toml index fac21a29..44d1081f 100644 --- a/js/Cargo.toml +++ b/js/Cargo.toml @@ -16,12 +16,9 @@ regex = "1.7.0" rustc-hash = "1.1.0" bstr = "1.0.1" wasm-bindgen = "0.2.83" -js-sys = "0.3.61" anyhow = "1.0.69" base64 = "0.21.0" gloo-utils = { version = "0.1", features = ["serde"] } -serde = { version = "1.0", features = ["derive"] } -reqwest = { version = "0.11.14", features = ["blocking"] } [features] default = ["inline"] diff --git a/js/scripts/override_any.ts b/js/scripts/override_any.ts index 0974e9b8..c07d1ae3 100644 --- a/js/scripts/override_any.ts +++ b/js/scripts/override_any.ts @@ -4,9 +4,9 @@ const project = new Project(); project.addSourceFilesAtPaths("./dist/**/*.ts"); for (const filename of [ - "./dist/bundler/_tiktoken.d.ts", - "./dist/node/_tiktoken.d.ts", - "./dist/web/_tiktoken.d.ts", + "./dist/bundler/_tiktoken_js.d.ts", + "./dist/node/_tiktoken_js.d.ts", + "./dist/web/_tiktoken_js.d.ts", ]) { const sourceFile = project.getSourceFileOrThrow(filename); const cls = sourceFile.getFirstDescendantByKindOrThrow( diff --git a/js/src/lib.rs b/js/src/lib.rs index 62345196..c575002b 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -245,11 +245,6 @@ impl Tiktoken { self.bpe.encode_single_token(&bytes).unwrap_throw() } - #[wasm_bindgen(skip_typescript)] - pub fn _encode_single_piece(&self, bytes: &[u8]) -> Vec { - self.bpe.encode_single_piece(&bytes) - } - pub fn decode(&self, tokens: Vec) -> Vec { self.bpe._decode_native(&tokens) } From b04f0cf786f94dae449b0799bf5cdbf70c9d1e42 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 2 Mar 2023 15:10:32 +0100 Subject: [PATCH 038/207] Attempt to fix sdist --- MANIFEST.in | 3 +- js/package.json | 26 +++--- pyproject.toml | 2 + tests/test_simple_public.test.ts | 136 ------------------------------- 4 files changed, 17 insertions(+), 150 deletions(-) delete mode 100644 tests/test_simple_public.test.ts diff --git a/MANIFEST.in b/MANIFEST.in index 321b66e2..c1c1fc51 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,5 +5,6 @@ include Makefile global-include py.typed recursive-include scripts *.py recursive-include tests *.py -recursive-include src *.rs +recursive-include core *.rs *.toml +recursive-include python *.rs *.toml include tiktoken *.json \ No newline at end of file diff --git a/js/package.json b/js/package.json index 5692f79d..5b35de74 100644 --- a/js/package.json +++ b/js/package.json @@ -7,31 +7,31 @@ "package.json" ], "license": "Apache-2.0", - "main": "dist/node/_tiktoken.js", - "browser": "dist/web/_tiktoken.js", - "types": "dist/node/_tiktoken.d.ts", + "main": "dist/node/_tiktoken_js.js", + "browser": "dist/web/_tiktoken_js.js", + "types": "dist/node/_tiktoken_js.d.ts", "exports": { ".": { "node": { - "types": "./dist/node/_tiktoken.d.ts", - "default": "./dist/node/_tiktoken.js" + "types": "./dist/node/_tiktoken_js.d.ts", + "default": "./dist/node/_tiktoken_js.js" }, "default": { - "types": "./dist/bundler/_tiktoken.d.js", - "default": "./dist/bundler/_tiktoken.mjs" + "types": "./dist/bundler/_tiktoken_js.d.js", + "default": "./dist/bundler/_tiktoken_js.mjs" } }, "./bundler": { - "types": "./dist/bundler/_tiktoken.d.ts", - "default": "./dist/bundler/_tiktoken.mjs" + "types": "./dist/bundler/_tiktoken_js.d.ts", + "default": "./dist/bundler/_tiktoken_js.mjs" }, "./web": { - "types": "./dist/web/_tiktoken.d.ts", - "default": "./dist/web/_tiktoken.js" + "types": "./dist/web/_tiktoken_js.d.ts", + "default": "./dist/web/_tiktoken_js.js" }, "./node": { - "types": "./dist/node/_tiktoken.d.ts", - "default": "./dist/node/_tiktoken.js" + "types": "./dist/node/_tiktoken_js.d.ts", + "default": "./dist/node/_tiktoken_js.js" } }, "repository": { diff --git a/pyproject.toml b/pyproject.toml index 771b72ac..68d47ffe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,3 +38,5 @@ test-skip = "*-macosx_arm64" before-test = "pip install pytest" test-command = "pytest {project}/tests" +[tool.check-manifest] +ignore = ["js", "jni", "java"] \ No newline at end of file diff --git a/tests/test_simple_public.test.ts b/tests/test_simple_public.test.ts deleted file mode 100644 index 7bb5f012..00000000 --- a/tests/test_simple_public.test.ts +++ /dev/null @@ -1,136 +0,0 @@ -import { it, expect, describe } from "vitest"; -import { encoding_for_model, get_encoding } from "../"; - -it("encoding_for_model initialization", () => { - expect(() => encoding_for_model("gpt2")).not.toThrowError(); - // @ts-expect-error - expect(() => encoding_for_model("gpt2-unknown")).toThrowError( - "Invalid model" - ); -}); - -it("get_encoding initialization", () => { - expect(() => get_encoding("cl100k_base")).not.toThrowError(); - // @ts-expect-error - expect(() => get_encoding("unknown")).toThrowError("Invalid encoding"); -}); - -describe("gpt2", () => { - const enc = get_encoding("gpt2"); - - it("encodes hello world string", () => { - expect(enc.encode("hello world")).toStrictEqual( - new Uint32Array([31373, 995]) - ); - }); - - it("decodes hello world string", () => { - expect( - new TextDecoder().decode(enc.decode(new Uint32Array([31373, 995]))) - ).toStrictEqual("hello world"); - }); - - it("encodes hello world string, all allowed special characters", () => { - expect(enc.encode("hello <|endoftext|>", "all")).toStrictEqual( - new Uint32Array([31373, 220, 50256]) - ); - }); -}); - -describe("cl100k_base", () => { - const enc = get_encoding("cl100k_base"); - - it("encodes hello world string", () => { - expect(enc.encode("hello world")).toStrictEqual( - new Uint32Array([15339, 1917]) - ); - }); - - it("decodes hello world string", () => { - expect( - new TextDecoder().decode(enc.decode(new Uint32Array([15339, 1917]))) - ).toStrictEqual("hello world"); - }); - - it("encodes hello world string, all allowed special characters", () => { - expect(enc.encode("hello <|endoftext|>", "all")).toStrictEqual( - new Uint32Array([15339, 220, 100257]) - ); - }); -}); - -it("test_simple", () => { - const encodings = [ - "gpt2", - "r50k_base", - "p50k_base", - "p50k_edit", - "cl100k_base", - ] as const; - - for (const encoding of encodings) { - const enc = get_encoding(encoding); - for (let token = 0; token < 10_000; token++) { - expect( - enc.encode_single_token(enc.decode_single_token_bytes(token)) - ).toStrictEqual(token); - } - } -}); - -it("test_encoding_for_model", () => { - expect(encoding_for_model("gpt2").name).toEqual("gpt2"); - expect(encoding_for_model("text-davinci-003").name).toEqual("p50k_base"); -}); - -it("test_custom_tokens", () => { - const enc = encoding_for_model("gpt2", { - "<|im_start|>": 100264, - "<|im_end|>": 100265, - }); - expect(enc.encode("<|im_start|>test<|im_end|>", "all")).toStrictEqual( - new Uint32Array([100264, 9288, 100265]) - ); -}); - -it("encode string tokens", () => { - const enc = get_encoding("gpt2", { "<|im_start|>": 100264 }); - - expect(enc.encode("hello world")).toStrictEqual( - new Uint32Array([31373, 995]) - ); - - expect(enc.encode("<|endoftext|>", ["<|endoftext|>"])).toStrictEqual( - new Uint32Array([50256]) - ); - - expect(enc.encode("<|endoftext|>", "all")).toStrictEqual( - new Uint32Array([50256]) - ); - - expect(() => enc.encode("<|endoftext|>")).toThrowError( - "The text contains a special token that is not allowed" - ); - - expect(() => enc.encode("<|im_start|>")).toThrowError( - "The text contains a special token that is not allowed" - ); - - expect(enc.encode("<|endoftext|>", [], [])).toStrictEqual( - new Uint32Array([27, 91, 437, 1659, 5239, 91, 29]) - ); -}); - -it("invalid (dis)allowed_tokens", () => { - const enc = get_encoding("gpt2"); - - // @ts-expect-error - expect(() => enc.encode("hello world", "invalid-string")).toThrowError( - "Invalid value for allowed_special" - ); - - // @ts-expect-error - expect(() => enc.encode("hello world", [], "invalid-string")).toThrowError( - "Invalid value for disallowed_special" - ); -}); From bbcb5914f7ac855c2bf687d59640dc51a1eb3e29 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 2 Mar 2023 15:14:59 +0100 Subject: [PATCH 039/207] Match sdist --- MANIFEST.in | 3 +++ pyproject.toml | 5 +---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index c1c1fc51..a841992e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,4 +7,7 @@ recursive-include scripts *.py recursive-include tests *.py recursive-include core *.rs *.toml recursive-include python *.rs *.toml +recursive-exclude jni * +recursive-exclude java * +recursive-exclude js * include tiktoken *.json \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 68d47ffe..3318ca00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,4 @@ macos.archs = ["x86_64", "arm64"] test-skip = "*-macosx_arm64" before-test = "pip install pytest" -test-command = "pytest {project}/tests" - -[tool.check-manifest] -ignore = ["js", "jni", "java"] \ No newline at end of file +test-command = "pytest {project}/tests" \ No newline at end of file From d1c4af2777075dc01e97532fb145b81a6d580b35 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 2 Mar 2023 15:23:37 +0100 Subject: [PATCH 040/207] Remove the _js suffix --- js/Cargo.toml | 2 +- js/package.json | 26 +++++++++++++------------- js/scripts/override_any.ts | 6 +++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/js/Cargo.toml b/js/Cargo.toml index 44d1081f..012584e6 100644 --- a/js/Cargo.toml +++ b/js/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" rust-version = "1.57.0" [lib] -name = "_tiktoken_js" +name = "_tiktoken" crate-type = ["rlib", "cdylib"] [dependencies] diff --git a/js/package.json b/js/package.json index 5b35de74..5692f79d 100644 --- a/js/package.json +++ b/js/package.json @@ -7,31 +7,31 @@ "package.json" ], "license": "Apache-2.0", - "main": "dist/node/_tiktoken_js.js", - "browser": "dist/web/_tiktoken_js.js", - "types": "dist/node/_tiktoken_js.d.ts", + "main": "dist/node/_tiktoken.js", + "browser": "dist/web/_tiktoken.js", + "types": "dist/node/_tiktoken.d.ts", "exports": { ".": { "node": { - "types": "./dist/node/_tiktoken_js.d.ts", - "default": "./dist/node/_tiktoken_js.js" + "types": "./dist/node/_tiktoken.d.ts", + "default": "./dist/node/_tiktoken.js" }, "default": { - "types": "./dist/bundler/_tiktoken_js.d.js", - "default": "./dist/bundler/_tiktoken_js.mjs" + "types": "./dist/bundler/_tiktoken.d.js", + "default": "./dist/bundler/_tiktoken.mjs" } }, "./bundler": { - "types": "./dist/bundler/_tiktoken_js.d.ts", - "default": "./dist/bundler/_tiktoken_js.mjs" + "types": "./dist/bundler/_tiktoken.d.ts", + "default": "./dist/bundler/_tiktoken.mjs" }, "./web": { - "types": "./dist/web/_tiktoken_js.d.ts", - "default": "./dist/web/_tiktoken_js.js" + "types": "./dist/web/_tiktoken.d.ts", + "default": "./dist/web/_tiktoken.js" }, "./node": { - "types": "./dist/node/_tiktoken_js.d.ts", - "default": "./dist/node/_tiktoken_js.js" + "types": "./dist/node/_tiktoken.d.ts", + "default": "./dist/node/_tiktoken.js" } }, "repository": { diff --git a/js/scripts/override_any.ts b/js/scripts/override_any.ts index c07d1ae3..0974e9b8 100644 --- a/js/scripts/override_any.ts +++ b/js/scripts/override_any.ts @@ -4,9 +4,9 @@ const project = new Project(); project.addSourceFilesAtPaths("./dist/**/*.ts"); for (const filename of [ - "./dist/bundler/_tiktoken_js.d.ts", - "./dist/node/_tiktoken_js.d.ts", - "./dist/web/_tiktoken_js.d.ts", + "./dist/bundler/_tiktoken.d.ts", + "./dist/node/_tiktoken.d.ts", + "./dist/web/_tiktoken.d.ts", ]) { const sourceFile = project.getSourceFileOrThrow(filename); const cls = sourceFile.getFirstDescendantByKindOrThrow( From 33207e6c4f2f7117c09fdb590ba83c9be5089bad Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 2 Mar 2023 15:29:01 +0100 Subject: [PATCH 041/207] Update to newer build wheels --- .github/workflows/build_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index d2e8dc27..383d4bea 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -21,7 +21,7 @@ jobs: steps: - uses: actions/checkout@v3 - - uses: pypa/cibuildwheel@v2.11.3 + - uses: pypa/cibuildwheel@v2.12.0 env: CIBW_BUILD: "cp${{ matrix.python-version}}-*" From 2370284c5c3adf3c61a7bb7f8c175248f681cf93 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 16:52:10 +0100 Subject: [PATCH 042/207] Fix wrong result for None --- js/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/src/lib.rs b/js/src/lib.rs index c575002b..f5ff4e6f 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -209,7 +209,7 @@ impl Tiktoken { ._encode_native( &text, &allowed_tokens.iter().map(AsRef::as_ref).collect(), - Some(0), + None, ) .0) } From 98ac9530ea1ac0a83db3f87cba18503baad694f1 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 16:52:26 +0100 Subject: [PATCH 043/207] Add CI step to build and test --- .github/workflows/build_js.yml | 24 +++++ js/package.json | 3 +- js/scripts/download_ranks.py | 128 -------------------------- js/scripts/download_ranks.ts | 162 +++++++++++++++++++++++++++++++++ js/tsconfig.json | 6 ++ js/yarn.lock | 5 + 6 files changed, 199 insertions(+), 129 deletions(-) create mode 100644 .github/workflows/build_js.yml delete mode 100644 js/scripts/download_ranks.py create mode 100644 js/scripts/download_ranks.ts create mode 100644 js/tsconfig.json diff --git a/.github/workflows/build_js.yml b/.github/workflows/build_js.yml new file mode 100644 index 00000000..78472c94 --- /dev/null +++ b/.github/workflows/build_js.yml @@ -0,0 +1,24 @@ +name: Build JS + +on: [push, pull_request, workflow_dispatch] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + build_js: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v1 + with: + node-version: 18 + registry-url: "https://registry.npmjs.org" + cache: yarn + - run: yarn install --frozen-lockfile + working-directory: ./js + - run: yarn run build + working-directory: ./js + - run: yarn run test + working-directory: ./js diff --git a/js/package.json b/js/package.json index 5692f79d..af93f11f 100644 --- a/js/package.json +++ b/js/package.json @@ -45,7 +45,8 @@ "vitest": "^0.28.5" }, "scripts": { - "build": "rm -rf dist/ && yarn run build:node && yarn run build:bundler && yarn run build:web && yarn run build:cleanup", + "build": "rm -rf dist/ ranks/ && yarn run ranks && yarn run build:node && yarn run build:bundler && yarn run build:web && yarn run build:cleanup", + "ranks": "tsx scripts/download_ranks.ts", "build:bundler": "wasm-pack build --target bundler --release --out-dir dist/bundler && rm dist/bundler/.gitignore", "build:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm dist/node/.gitignore", "build:web": "wasm-pack build --target no-modules --release --out-dir dist/web && rm dist/web/.gitignore", diff --git a/js/scripts/download_ranks.py b/js/scripts/download_ranks.py deleted file mode 100644 index c1583877..00000000 --- a/js/scripts/download_ranks.py +++ /dev/null @@ -1,128 +0,0 @@ -import base64 -import hashlib -import json -import os -import tempfile -import uuid - -import blobfile -import requests - - -def read_file(blobpath: str) -> bytes: - if not blobpath.startswith("http://") and not blobpath.startswith("https://"): - with blobfile.BlobFile(blobpath, "rb") as f: - return f.read() - # avoiding blobfile for public files helps avoid auth issues, like MFA prompts - return requests.get(blobpath).content - - -def read_file_cached(blobpath: str) -> bytes: - if "TIKTOKEN_CACHE_DIR" in os.environ: - cache_dir = os.environ["TIKTOKEN_CACHE_DIR"] - elif "DATA_GYM_CACHE_DIR" in os.environ: - cache_dir = os.environ["DATA_GYM_CACHE_DIR"] - else: - cache_dir = os.path.join(tempfile.gettempdir(), "data-gym-cache") - - if cache_dir == "": - # disable caching - return read_file(blobpath) - - cache_key = hashlib.sha1(blobpath.encode()).hexdigest() - - cache_path = os.path.join(cache_dir, cache_key) - if os.path.exists(cache_path): - with open(cache_path, "rb") as f: - return f.read() - - contents = read_file(blobpath) - - os.makedirs(cache_dir, exist_ok=True) - tmp_filename = cache_path + "." + str(uuid.uuid4()) + ".tmp" - with open(tmp_filename, "wb") as f: - f.write(contents) - os.rename(tmp_filename, cache_path) - - return contents - - -def data_gym_to_mergeable_bpe_ranks( - vocab_bpe_file: str, encoder_json_file: str -) -> dict[bytes, int]: - # NB: do not add caching to this function - rank_to_intbyte = [b for b in range( - 2**8) if chr(b).isprintable() and chr(b) != " "] - - data_gym_byte_to_byte = {chr(b): b for b in rank_to_intbyte} - n = 0 - for b in range(2**8): - if b not in rank_to_intbyte: - rank_to_intbyte.append(b) - data_gym_byte_to_byte[chr(2**8 + n)] = b - n += 1 - assert len(rank_to_intbyte) == 2**8 - - # vocab_bpe contains the merges along with associated ranks - vocab_bpe_contents = read_file_cached(vocab_bpe_file).decode() - bpe_merges = [tuple(merge_str.split()) - for merge_str in vocab_bpe_contents.split("\n")[1:-1]] - - def decode_data_gym(value: str) -> bytes: - return bytes(data_gym_byte_to_byte[b] for b in value) - - # add the single byte tokens - bpe_ranks = {bytes([b]): i for i, b in enumerate(rank_to_intbyte)} - # add the merged tokens - n = len(bpe_ranks) - for first, second in bpe_merges: - bpe_ranks[decode_data_gym(first) + decode_data_gym(second)] = n - n += 1 - - # check that the encoder file matches the merges file - # this sanity check is important since tiktoken assumes that ranks are ordered the same - # as merge priority - encoder_json = json.loads(read_file_cached(encoder_json_file)) - encoder_json_loaded = {decode_data_gym( - k): v for k, v in encoder_json.items()} - # drop these two special tokens if present, since they're not mergeable bpe tokens - encoder_json_loaded.pop(b"<|endoftext|>", None) - encoder_json_loaded.pop(b"<|startoftext|>", None) - assert bpe_ranks == encoder_json_loaded - - return bpe_ranks - - -def load_tiktoken_bpe(tiktoken_bpe_file: str) -> dict[bytes, int]: - # NB: do not add caching to this function - contents = read_file_cached(tiktoken_bpe_file) - return { - base64.b64decode(token): int(rank) - for token, rank in (line.split() for line in contents.splitlines() if line) - } - - -def dump_tiktoken_bpe(bpe_ranks: dict[bytes, int], tiktoken_bpe_file: str) -> None: - with blobfile.BlobFile(tiktoken_bpe_file, "wb") as f: - for token, rank in sorted(bpe_ranks.items(), key=lambda x: x[1]): - f.write(base64.b64encode(token) + - b" " + str(rank).encode() + b"\n") - - -if __name__ == "__main__": - dump_tiktoken_bpe(data_gym_to_mergeable_bpe_ranks( - vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe", - encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json", - ), "./ranks/gpt2.tiktoken") - - dump_tiktoken_bpe(load_tiktoken_bpe( - "https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken" - ), "./ranks/r50k_base.tiktoken") - - dump_tiktoken_bpe(load_tiktoken_bpe( - "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken" - ), "./ranks/p50k_base.tiktoken") - - dump_tiktoken_bpe(load_tiktoken_bpe( - "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" - ), "./ranks/cl100k_base.tiktoken") diff --git a/js/scripts/download_ranks.ts b/js/scripts/download_ranks.ts new file mode 100644 index 00000000..269ed1ba --- /dev/null +++ b/js/scripts/download_ranks.ts @@ -0,0 +1,162 @@ +import assert from "node:assert"; +import fs from "node:fs/promises"; +import path from "node:path"; + +// printable ascii characters according to python +function is_printable(u: number): boolean { + return !(u <= 31 || (u >= 127 && u <= 160) || u == 173); +} + +function data_gym_to_mergeable_bpe_ranks( + vocal_bpe_contents: string, + encoder_json_contents: string +) { + const rank_to_intbyte = Array.from({ length: 2 ** 8 }, (_, i) => i).filter( + (i) => is_printable(i) && String.fromCharCode(i) !== " " + ); + + const data_gym_byte_to_byte = rank_to_intbyte.reduce>( + (memo, item) => { + memo[String.fromCharCode(item)] = item; + return memo; + }, + {} + ); + + let n = 0; + for (let b = 0; b < 2 ** 8; b++) { + if (!rank_to_intbyte.includes(b)) { + rank_to_intbyte.push(b); + data_gym_byte_to_byte[String.fromCharCode(2 ** 8 + n)] = b; + n += 1; + } + } + + assert( + rank_to_intbyte.length === 2 ** 8, + "rank_to_intbyte.length must be 2**8" + ); + + // vocab_bpe contains the merges along with associated ranks + const bpe_merges = vocal_bpe_contents + .split("\n") + .slice(1, -1) + .map((merge_str) => merge_str.split(" ")); + + function decode_data_gym(value: string) { + return value.split("").map((b) => data_gym_byte_to_byte[b]); + } + + // add the single byte tokens + const bpe_ranks = Object.fromEntries(rank_to_intbyte.map((b, i) => [b, i])); + + // add the merged tokens + n = rank_to_intbyte.length; + for (const [first, second] of bpe_merges) { + bpe_ranks[ + [...decode_data_gym(first), ...decode_data_gym(second)].join(",") + ] = n; + n += 1; + } + + // check that the encoder file matches the merges file + // this sanity check is important since tiktoken assumes that ranks are ordered the same + // as merge priority + const encoder_json: Record = JSON.parse( + encoder_json_contents + ); + + const encoder_json_loaded = Object.fromEntries( + Object.entries(encoder_json).map(([k, v]) => [ + decode_data_gym(k).join(","), + v, + ]) + ); + + // drop these two special tokens if present, since they're not mergeable bpe tokens + delete encoder_json_loaded[decode_data_gym("<|endoftext|>").join(",")]; + delete encoder_json_loaded[decode_data_gym("<|startoftext|>").join(",")]; + + function normalize_map(items: Record) { + return JSON.stringify( + Object.keys(items) + .sort() + .map((key) => [key, items[key]]) + ); + } + + assert(normalize_map(bpe_ranks) === normalize_map(encoder_json_loaded)); + return bpe_ranks; +} + +function load_tiktoken_bpe(tiktoken_bpe_file: string) { + return Object.fromEntries( + tiktoken_bpe_file + .split("\n") + .map((line) => line.trim() && line.split(" ")) + .filter((x): x is Array => !!x && Array.isArray(x)) + .map(([token, rank]) => [ + Buffer.from(token, "base64").join(","), + Number.parseInt(rank, 10), + ]) + ); +} + +function dump_tiktoken_bpe(bpe_ranks: Record) { + return ( + Object.entries(bpe_ranks) + .sort((a, b) => a[1] - b[1]) + .map(([token_str, rank]) => + [ + Buffer.from( + token_str.split(",").map((i) => Number.parseInt(i, 10)) + ).toString("base64"), + rank, + ].join(" ") + ) + .join("\n") + "\n" + ); +} + +async function requestText(url: string) { + return await fetch(url).then((a) => a.text()); +} + +async function main() { + try { + await fs.mkdir(path.resolve(__dirname, "../ranks"), { recursive: true }); + } catch {} + + const registry = JSON.parse( + await fs.readFile(path.resolve(__dirname, "../../tiktoken/registry.json"), { + encoding: "utf-8", + }) + ); + + for (const name in registry) { + console.log(name); + const data = registry[name]; + + let ranks: Record | null = null; + + if (data.data_gym_to_mergeable_bpe_ranks) { + ranks = data_gym_to_mergeable_bpe_ranks( + await requestText(data.data_gym_to_mergeable_bpe_ranks.vocab_bpe_file), + await requestText( + data.data_gym_to_mergeable_bpe_ranks.encoder_json_file + ) + ); + } else if (data.load_tiktoken_bpe) { + ranks = load_tiktoken_bpe(await requestText(data.load_tiktoken_bpe)); + } + + if (ranks != null) { + await fs.writeFile( + path.resolve(__dirname, `../ranks/${name}.tiktoken`), + dump_tiktoken_bpe(ranks) + ); + } + } +} + +main(); diff --git a/js/tsconfig.json b/js/tsconfig.json new file mode 100644 index 00000000..3e4bff18 --- /dev/null +++ b/js/tsconfig.json @@ -0,0 +1,6 @@ +{ + "compilerOptions": { + "esModuleInterop": true, + "strict": true + } +} diff --git a/js/yarn.lock b/js/yarn.lock index c4f5d085..b76be6fa 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -294,6 +294,11 @@ resolved "https://registry.yarnpkg.com/@types/node/-/node-18.14.1.tgz#90dad8476f1e42797c49d6f8b69aaf9f876fc69f" integrity sha512-QH+37Qds3E0eDlReeboBxfHbX9omAcBCXEzswCu6jySP642jiM3cYSIkU/REqwhCUqXdonHFuBfJDiAJxMNhaQ== +"@types/node@^18.14.4": + version "18.14.4" + resolved "https://registry.yarnpkg.com/@types/node/-/node-18.14.4.tgz#0e64ec0b35a772e1e3d849f9a0ff61782d0cb647" + integrity sha512-VhCw7I7qO2X49+jaKcAUwi3rR+hbxT5VcYF493+Z5kMLI0DL568b7JI4IDJaxWFH0D/xwmGJNoXisyX+w7GH/g== + "@vitest/expect@0.28.5": version "0.28.5" resolved "https://registry.yarnpkg.com/@vitest/expect/-/expect-0.28.5.tgz#d5a6eccd014e9ad66fe87a20d16426a2815c0e8a" From d989c22e4dcd8b28cbdbac535c63cb5fb53b4c4f Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 16:57:22 +0100 Subject: [PATCH 044/207] CI: install initialize wasm-pack --- .github/workflows/build_js.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build_js.yml b/.github/workflows/build_js.yml index 78472c94..adc3d714 100644 --- a/.github/workflows/build_js.yml +++ b/.github/workflows/build_js.yml @@ -16,6 +16,10 @@ jobs: node-version: 18 registry-url: "https://registry.npmjs.org" cache: yarn + + - name: Install + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + - run: yarn install --frozen-lockfile working-directory: ./js - run: yarn run build From 01bf979f58858e5e4a3b19ab1644fc38c1d49dfd Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 17:47:05 +0100 Subject: [PATCH 045/207] Fix Python CI --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3318ca00..207e6334 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,10 @@ requires = ["setuptools>=62.4", "wheel", "setuptools-rust>=1.5.2"] build-frontend = "build" build-verbosity = 1 -linux.before-all = "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y" +linux.before-all = [ + "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y", + "ulimit -n 1024 && yum install -y openssl-devel" +] linux.environment = { PATH = "$PATH:$HOME/.cargo/bin" } macos.before-all = "rustup target add aarch64-apple-darwin" From 82cd4139ee3372a4124b664034ea99bb13ca048b Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 17:54:51 +0100 Subject: [PATCH 046/207] Replace yum with apt --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 207e6334..99325236 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ build-verbosity = 1 linux.before-all = [ "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y", - "ulimit -n 1024 && yum install -y openssl-devel" + "apt-get install -y pkg-config libssl-dev" ] linux.environment = { PATH = "$PATH:$HOME/.cargo/bin" } macos.before-all = "rustup target add aarch64-apple-darwin" From 7db26cba189a62935027cf1117c50efd76d4db4f Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 17:59:47 +0100 Subject: [PATCH 047/207] Try yum once again --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 99325236..f6516f39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,8 +22,8 @@ build-frontend = "build" build-verbosity = 1 linux.before-all = [ - "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y", - "apt-get install -y pkg-config libssl-dev" + "ulimit -n 1024 && yum install -y openssl-devel", + "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y" ] linux.environment = { PATH = "$PATH:$HOME/.cargo/bin" } macos.before-all = "rustup target add aarch64-apple-darwin" From 08403a144507fd93a0fd0df1debd2ee82b96ab68 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 18:50:15 +0100 Subject: [PATCH 048/207] debug ci --- .github/workflows/build_jar.yml | 81 ------------------------------ .github/workflows/build_js.yml | 28 ----------- .github/workflows/build_wheels.yml | 6 +-- pyproject.toml | 9 +++- 4 files changed, 10 insertions(+), 114 deletions(-) delete mode 100644 .github/workflows/build_jar.yml delete mode 100644 .github/workflows/build_js.yml diff --git a/.github/workflows/build_jar.yml b/.github/workflows/build_jar.yml deleted file mode 100644 index 4b0d4476..00000000 --- a/.github/workflows/build_jar.yml +++ /dev/null @@ -1,81 +0,0 @@ -name: Build Java JAR - -on: [push, pull_request, workflow_dispatch] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - build_jni: - name: jni on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - include: - - os: ubuntu-latest - outdir: linux_64 - - os: windows-latest - outdir: windows_64 - - os: macos-latest - outdir: osx_64 - steps: - - uses: actions/checkout@v3 - - - name: Install rust toolchain - uses: actions-rs/toolchain@v1 - with: - # stable doesn't have --out-dir - toolchain: nightly - override: true - - - name: Build - working-directory: ./jni - # TODO: 32bit vs 64bit? - # https://github.com/scijava/native-lib-loader - run: cargo build --release -Z unstable-options --out-dir ../build/natives/${{ matrix.outdir }}/ - - - uses: actions/upload-artifact@v3 - with: - name: natives - path: ./build/natives/* - - build_java: - name: java - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - needs: [build_jni] - - steps: - - uses: actions/checkout@v3 - - - name: Load outputs - uses: actions/download-artifact@v3 - with: - name: natives - path: natives - - - name: Set up JDK 11 - uses: actions/setup-java@v3 - with: - java-version: '11' - distribution: 'microsoft' - architecture: x64 - cache: maven - - - name: Build with Maven - working-directory: ./java - run: mvn --batch-mode package failsafe:integration-test - - - uses: actions/upload-artifact@v3 - with: - name: java - path: ./java/target/*.jar - - # TODO: publish to maven (only from ubuntu) - diff --git a/.github/workflows/build_js.yml b/.github/workflows/build_js.yml deleted file mode 100644 index adc3d714..00000000 --- a/.github/workflows/build_js.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Build JS - -on: [push, pull_request, workflow_dispatch] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - build_js: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-node@v1 - with: - node-version: 18 - registry-url: "https://registry.npmjs.org" - cache: yarn - - - name: Install - run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - - - run: yarn install --frozen-lockfile - working-directory: ./js - - run: yarn run build - working-directory: ./js - - run: yarn run test - working-directory: ./js diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 383d4bea..b42b299e 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -15,15 +15,15 @@ jobs: matrix: # cibuildwheel builds linux wheels inside a manylinux container # it also takes care of procuring the correct python version for us - os: [ubuntu-latest, windows-latest, macos-latest] - python-version: [38, 39, 310, 311] + os: [ubuntu-latest] + python-version: [38] steps: - uses: actions/checkout@v3 - uses: pypa/cibuildwheel@v2.12.0 env: - CIBW_BUILD: "cp${{ matrix.python-version}}-*" + CIBW_BUILD: "cp${{ matrix.python-version }}-*" - uses: actions/upload-artifact@v3 with: diff --git a/pyproject.toml b/pyproject.toml index f6516f39..7aa51972 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,8 +22,13 @@ build-frontend = "build" build-verbosity = 1 linux.before-all = [ - "ulimit -n 1024 && yum install -y openssl-devel", - "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y" + "ulimit -n 1024", + "(command -v apk || echo 'no-apk')", + "(command -v yum || echo 'no-yum')", + "(command -v apt-get || echo 'no-apt-get')", + "(command -v dnf || echo 'no-dnf')", + "yum install openssl-devel -y", + "(curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y)" ] linux.environment = { PATH = "$PATH:$HOME/.cargo/bin" } macos.before-all = "rustup target add aarch64-apple-darwin" From 12805e372468e6938cfa6a7e6a3312ea9a96e9e4 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 18:58:09 +0100 Subject: [PATCH 049/207] Try CI again --- pyproject.toml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7aa51972..3cbc32a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,11 +23,8 @@ build-verbosity = 1 linux.before-all = [ "ulimit -n 1024", - "(command -v apk || echo 'no-apk')", - "(command -v yum || echo 'no-yum')", - "(command -v apt-get || echo 'no-apt-get')", - "(command -v dnf || echo 'no-dnf')", - "yum install openssl-devel -y", + "(command -v yum || yum install openssl-devel -y)", + "(command -v apk || apk add openssl-dev -y)", "(curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y)" ] linux.environment = { PATH = "$PATH:$HOME/.cargo/bin" } From ae30c13eba7844783fb6f6c94cbb3ba87f591190 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 19:01:24 +0100 Subject: [PATCH 050/207] Fix CI again --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3cbc32a5..a3d6e5a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,8 +23,8 @@ build-verbosity = 1 linux.before-all = [ "ulimit -n 1024", - "(command -v yum || yum install openssl-devel -y)", - "(command -v apk || apk add openssl-dev -y)", + "((command -v yum && yum install openssl-devel -y) || echo 'no yum found')", + "((command -v apk && apk add openssl-dev -y) || echo 'no apk found')", "(curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y)" ] linux.environment = { PATH = "$PATH:$HOME/.cargo/bin" } From dabb296e61553ffccdc1dcb43dcdd9150bee03af Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 19:08:35 +0100 Subject: [PATCH 051/207] invalid `-y` command --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a3d6e5a3..24d26dab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ build-verbosity = 1 linux.before-all = [ "ulimit -n 1024", "((command -v yum && yum install openssl-devel -y) || echo 'no yum found')", - "((command -v apk && apk add openssl-dev -y) || echo 'no apk found')", + "((command -v apk && apk add --no-cache openssl openssl-dev) || echo 'no apk found')", "(curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y)" ] linux.environment = { PATH = "$PATH:$HOME/.cargo/bin" } From 8e4068201b221b71eaa6f1c697cacfca79c98079 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 5 Mar 2023 19:22:51 +0100 Subject: [PATCH 052/207] Revert "debug ci" This reverts commit 08403a144507fd93a0fd0df1debd2ee82b96ab68. --- .github/workflows/build_jar.yml | 81 ++++++++++++++++++++++++++++++ .github/workflows/build_js.yml | 28 +++++++++++ .github/workflows/build_wheels.yml | 6 +-- 3 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/build_jar.yml create mode 100644 .github/workflows/build_js.yml diff --git a/.github/workflows/build_jar.yml b/.github/workflows/build_jar.yml new file mode 100644 index 00000000..4b0d4476 --- /dev/null +++ b/.github/workflows/build_jar.yml @@ -0,0 +1,81 @@ +name: Build Java JAR + +on: [push, pull_request, workflow_dispatch] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + build_jni: + name: jni on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + include: + - os: ubuntu-latest + outdir: linux_64 + - os: windows-latest + outdir: windows_64 + - os: macos-latest + outdir: osx_64 + steps: + - uses: actions/checkout@v3 + + - name: Install rust toolchain + uses: actions-rs/toolchain@v1 + with: + # stable doesn't have --out-dir + toolchain: nightly + override: true + + - name: Build + working-directory: ./jni + # TODO: 32bit vs 64bit? + # https://github.com/scijava/native-lib-loader + run: cargo build --release -Z unstable-options --out-dir ../build/natives/${{ matrix.outdir }}/ + + - uses: actions/upload-artifact@v3 + with: + name: natives + path: ./build/natives/* + + build_java: + name: java + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + needs: [build_jni] + + steps: + - uses: actions/checkout@v3 + + - name: Load outputs + uses: actions/download-artifact@v3 + with: + name: natives + path: natives + + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + java-version: '11' + distribution: 'microsoft' + architecture: x64 + cache: maven + + - name: Build with Maven + working-directory: ./java + run: mvn --batch-mode package failsafe:integration-test + + - uses: actions/upload-artifact@v3 + with: + name: java + path: ./java/target/*.jar + + # TODO: publish to maven (only from ubuntu) + diff --git a/.github/workflows/build_js.yml b/.github/workflows/build_js.yml new file mode 100644 index 00000000..adc3d714 --- /dev/null +++ b/.github/workflows/build_js.yml @@ -0,0 +1,28 @@ +name: Build JS + +on: [push, pull_request, workflow_dispatch] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + build_js: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v1 + with: + node-version: 18 + registry-url: "https://registry.npmjs.org" + cache: yarn + + - name: Install + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + + - run: yarn install --frozen-lockfile + working-directory: ./js + - run: yarn run build + working-directory: ./js + - run: yarn run test + working-directory: ./js diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index b42b299e..383d4bea 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -15,15 +15,15 @@ jobs: matrix: # cibuildwheel builds linux wheels inside a manylinux container # it also takes care of procuring the correct python version for us - os: [ubuntu-latest] - python-version: [38] + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: [38, 39, 310, 311] steps: - uses: actions/checkout@v3 - uses: pypa/cibuildwheel@v2.12.0 env: - CIBW_BUILD: "cp${{ matrix.python-version }}-*" + CIBW_BUILD: "cp${{ matrix.python-version}}-*" - uses: actions/upload-artifact@v3 with: From 4866cf9fa1c3a3c3eb1c80fd245f6adc95752681 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 6 Mar 2023 00:38:44 +0100 Subject: [PATCH 053/207] Optimize performace, enable/disable features of core --- Cargo.toml | 2 +- core/Cargo.toml | 5 +++++ core/src/lib.rs | 14 +++++++++----- jni/Cargo.toml | 2 +- js/Cargo.toml | 4 ++-- python/Cargo.toml | 2 +- 6 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6709ee3a..b8bf6948 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ members = [ [profile.release] incremental = true -opt-level = 'z' # Optimize for size +opt-level = 's' # Optimize for size lto = true # Enable link-time optimization codegen-units = 1 # Reduce number of codegen units to increase optimizations panic = 'abort' # Abort on panic diff --git a/core/Cargo.toml b/core/Cargo.toml index 53688fd4..966e20b7 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -19,3 +19,8 @@ sha1 = "0.10.5" json = "0.12.4" base64 = "0.21.0" lazy_static = "1.4.0" + +[features] +default = [] +lazyload = [] +multithreading = [] \ No newline at end of file diff --git a/core/src/lib.rs b/core/src/lib.rs index 6477ba85..7ee05572 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -5,18 +5,22 @@ use fancy_regex::Regex; use rustc_hash::FxHashMap as HashMap; mod util; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(feature = "lazyload")] mod load; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(feature = "lazyload")] pub mod openai_public; -#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +#[cfg(feature = "lazyload")] #[macro_use] extern crate lazy_static; +#[cfg(feature = "multithreading")] const MAX_NUM_THREADS: usize = 128; +#[cfg(not(feature = "multithreading"))] +const MAX_NUM_THREADS: usize = 1; + // Various performance notes: // // Regex @@ -421,7 +425,7 @@ impl CoreBPENative { pattern: &str, ) -> Result { let regex = Regex::new(pattern)?; - // .map_err(|e| PyErr::new::(e.to_string()))?; + // .map_err(|e| PyErr::new::(e.to_string()))?; let special_regex = { let _parts = special_tokens_encoder @@ -430,7 +434,7 @@ impl CoreBPENative { .collect::>(); Regex::new(&_parts.join("|"))? - // .map_err(|e| PyErr::new::(e.to_string()))? + // .map_err(|e| PyErr::new::(e.to_string()))? }; let decoder: HashMap> = diff --git a/jni/Cargo.toml b/jni/Cargo.toml index 7c6d4155..4309eef4 100644 --- a/jni/Cargo.toml +++ b/jni/Cargo.toml @@ -9,7 +9,7 @@ name = "_tiktoken_jni" crate-type = ["cdylib"] [dependencies] -tiktoken_core = { path = "../core" } +tiktoken_core = { path = "../core", features = ["multithreading", "lazyload"] } rustc-hash = "1.1.0" jni = "0.20.0" diff --git a/js/Cargo.toml b/js/Cargo.toml index 012584e6..d1e5f655 100644 --- a/js/Cargo.toml +++ b/js/Cargo.toml @@ -6,10 +6,10 @@ rust-version = "1.57.0" [lib] name = "_tiktoken" -crate-type = ["rlib", "cdylib"] +crate-type = ["cdylib"] [dependencies] -tiktoken_core = { path = "../core" } +tiktoken_core = { path = "../core", features = [] } # tiktoken dependencies fancy-regex = "0.10.0" regex = "1.7.0" diff --git a/python/Cargo.toml b/python/Cargo.toml index 7febd473..7e97d03b 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -10,5 +10,5 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.17.3", features = ["extension-module"] } -tiktoken_core = { path = "../core" } +tiktoken_core = { path = "../core", features = ["multithreading"] } rustc-hash = "1.1.0" From 33bb13d3f3817bcf2e381eba1efb80a21f0054a6 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Tue, 7 Mar 2023 21:56:47 +0100 Subject: [PATCH 054/207] Add custom initialisation, bundler changes --- js/package.json | 54 ++++++-------- js/scripts/override_any.ts | 116 ++++++++++++++++++++++++++++- js/src/utils/download_ranks.ts | 108 +++++++++++++++++++++++++++ js/test/test_simple_public.test.ts | 2 +- js/tsconfig.json | 13 +++- 5 files changed, 254 insertions(+), 39 deletions(-) create mode 100644 js/src/utils/download_ranks.ts diff --git a/js/package.json b/js/package.json index af93f11f..f96075ee 100644 --- a/js/package.json +++ b/js/package.json @@ -2,56 +2,44 @@ "name": "@dqbd/tiktoken", "version": "0.4.0", "description": "Javascript bindings for tiktoken", - "files": [ - "dist/**/*", - "package.json" - ], "license": "Apache-2.0", - "main": "dist/node/_tiktoken.js", - "browser": "dist/web/_tiktoken.js", - "types": "dist/node/_tiktoken.d.ts", + "main": "node/_tiktoken.js", + "types": "node/_tiktoken.d.ts", "exports": { ".": { "node": { - "types": "./dist/node/_tiktoken.d.ts", - "default": "./dist/node/_tiktoken.js" + "types": "./node/_tiktoken.d.ts", + "default": "./node/_tiktoken.js" }, "default": { - "types": "./dist/bundler/_tiktoken.d.js", - "default": "./dist/bundler/_tiktoken.mjs" + "types": "./bundler/_tiktoken.d.js", + "default": "./bundler/_tiktoken.js" } }, - "./bundler": { - "types": "./dist/bundler/_tiktoken.d.ts", - "default": "./dist/bundler/_tiktoken.mjs" - }, - "./web": { - "types": "./dist/web/_tiktoken.d.ts", - "default": "./dist/web/_tiktoken.js" - }, - "./node": { - "types": "./dist/node/_tiktoken.d.ts", - "default": "./dist/node/_tiktoken.js" + "./init": "./init.js", + "./tiktoken.wasm": { + "types": "./tiktoken.wasm.d.ts", + "default": "./tiktoken.wasm" } }, + "scripts": { + "build": "rm -rf dist/ ranks/ && yarn run ranks && yarn run build:node && yarn run build:bundler && yarn run build:cleanup", + "ranks": "tsx scripts/download_ranks.ts", + "build:bundler": "wasm-pack build --target bundler --release --out-dir dist/bundler && rm dist/bundler/.gitignore", + "build:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm dist/node/.gitignore", + "build:cleanup": "tsx scripts/override_any.ts", + "test": "yarn vitest" + }, "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" }, + "dependencies": {}, "devDependencies": { + "@types/node": "^18.14.4", "ts-morph": "^17.0.1", "tsx": "^3.12.3", "typescript": "^4.9.5", "vitest": "^0.28.5" - }, - "scripts": { - "build": "rm -rf dist/ ranks/ && yarn run ranks && yarn run build:node && yarn run build:bundler && yarn run build:web && yarn run build:cleanup", - "ranks": "tsx scripts/download_ranks.ts", - "build:bundler": "wasm-pack build --target bundler --release --out-dir dist/bundler && rm dist/bundler/.gitignore", - "build:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm dist/node/.gitignore", - "build:web": "wasm-pack build --target no-modules --release --out-dir dist/web && rm dist/web/.gitignore", - "build:cleanup": "tsx scripts/override_any.ts", - "test": "yarn vitest" - }, - "dependencies": {} + } } diff --git a/js/scripts/override_any.ts b/js/scripts/override_any.ts index 0974e9b8..262a791e 100644 --- a/js/scripts/override_any.ts +++ b/js/scripts/override_any.ts @@ -1,12 +1,14 @@ import { Project, ts } from "ts-morph"; +import * as fs from "node:fs"; +import * as path from "node:path"; const project = new Project(); -project.addSourceFilesAtPaths("./dist/**/*.ts"); +project.addSourceFilesAtPaths(["./dist/**/*.ts", "./dist/**/*.js"]); +// make sure the types are correct for (const filename of [ "./dist/bundler/_tiktoken.d.ts", "./dist/node/_tiktoken.d.ts", - "./dist/web/_tiktoken.d.ts", ]) { const sourceFile = project.getSourceFileOrThrow(filename); const cls = sourceFile.getFirstDescendantByKindOrThrow( @@ -36,3 +38,113 @@ for (const filename of [ sourceFile.saveSync(); } + +// use only a single WASM binary +fs.copyFileSync( + path.resolve(__dirname, "../dist/bundler/_tiktoken_bg.wasm"), + path.resolve(__dirname, "../dist/tiktoken.wasm") +); + +fs.copyFileSync( + path.resolve(__dirname, "../dist/bundler/_tiktoken_bg.wasm.d.ts"), + path.resolve(__dirname, "../dist/tiktoken.wasm.d.ts") +); + +// remove unnecessary files +for (const folder of ["bundler", "node"]) { + fs.rmSync(path.resolve(__dirname, `../dist/${folder}/package.json`)); + fs.rmSync(path.resolve(__dirname, `../dist/${folder}/README.md`)); +} + +function replaceContent(file: string, transform: (content: string) => string) { + const options = { encoding: "utf-8" } as const; + fs.writeFileSync( + path.resolve(__dirname, file), + transform(fs.readFileSync(path.resolve(__dirname, file), options)), + options + ); +} + +// bundler +{ + replaceContent("../dist/bundler/_tiktoken.js", (src) => + src.replaceAll(`"./_tiktoken_bg.wasm"`, `"../tiktoken.wasm"`) + ); + + fs.rmSync(path.resolve(__dirname, "../dist/bundler/_tiktoken_bg.wasm")); + fs.rmSync(path.resolve(__dirname, "../dist/bundler/_tiktoken_bg.wasm.d.ts")); +} + +// node +{ + replaceContent("../dist/node/_tiktoken.js", (src) => + src + .replaceAll("__wbindgen_placeholder__", `./_tiktoken_bg.js`) + .replace("'_tiktoken_bg.wasm'", `'../tiktoken.wasm'`) + ); + + fs.rmSync(path.resolve(__dirname, "../dist/node/_tiktoken_bg.wasm")); + fs.rmSync(path.resolve(__dirname, "../dist/node/_tiktoken_bg.wasm.d.ts")); +} + +{ + fs.writeFileSync( + path.resolve(__dirname, "../dist/init.js"), + ` +import * as imports from "./bundler/_tiktoken_bg.js"; + +export async function init(cb) { + const res = await cb({ + "./_tiktoken_bg.js": imports, + }); + + const instance = + "instance" in res && res.instance instanceof WebAssembly.Instance + ? res.instance + : res instanceof WebAssembly.Instance + ? res + : null; + + if (instance == null) throw new Error("Missing instance"); + imports.__wbg_set_wasm(instance.exports); + return imports; +} + +export * from "./bundler/_tiktoken_bg.js"; + `.trim(), + { encoding: "utf-8" } + ); + + fs.writeFileSync( + path.resolve(__dirname, "../dist/init.d.ts"), + ` +/* tslint:disable */ +/* eslint-disable */ +export * from "./bundler/_tiktoken"; +export function init( + callback: ( + imports: WebAssembly.Imports + ) => Promise +): Promise; + `.trim(), + { encoding: "utf-8" } + ); +} + +{ + const pkg = JSON.parse( + fs.readFileSync(path.resolve(__dirname, "../package.json"), { + encoding: "utf-8", + }) + ); + + delete pkg.devDependencies; + delete pkg.scripts; + pkg.files = ["**/*"]; + + fs.writeFileSync( + path.resolve(__dirname, "../dist/package.json"), + JSON.stringify(pkg, null, 2), + { encoding: "utf-8" } + ); +} diff --git a/js/src/utils/download_ranks.ts b/js/src/utils/download_ranks.ts new file mode 100644 index 00000000..70db0dde --- /dev/null +++ b/js/src/utils/download_ranks.ts @@ -0,0 +1,108 @@ +function assert(condition: unknown, message?: string): asserts condition { + if (!condition) { + throw new Error(message); + } +} + +// printable ascii characters according to python +function is_printable(u: number): boolean { + return !(u <= 31 || (u >= 127 && u <= 160) || u == 173); +} + +export function data_gym_to_mergeable_bpe_ranks( + vocal_bpe_contents: string, + encoder_json_contents: string +) { + const rank_to_intbyte = Array.from({ length: 2 ** 8 }, (_, i) => i).filter( + (i) => is_printable(i) && String.fromCharCode(i) !== " " + ); + + const data_gym_byte_to_byte = rank_to_intbyte.reduce>( + (memo, item) => { + memo[String.fromCharCode(item)] = item; + return memo; + }, + {} + ); + + let n = 0; + for (let b = 0; b < 2 ** 8; b++) { + if (!rank_to_intbyte.includes(b)) { + rank_to_intbyte.push(b); + data_gym_byte_to_byte[String.fromCharCode(2 ** 8 + n)] = b; + n += 1; + } + } + + assert( + rank_to_intbyte.length === 2 ** 8, + "rank_to_intbyte.length must be 2**8" + ); + + // vocab_bpe contains the merges along with associated ranks + const bpe_merges = vocal_bpe_contents + .split("\n") + .slice(1, -1) + .map((merge_str) => merge_str.split(" ")); + + function decode_data_gym(value: string) { + return value.split("").map((b) => data_gym_byte_to_byte[b]); + } + + // add the single byte tokens + const bpe_ranks = Object.fromEntries(rank_to_intbyte.map((b, i) => [b, i])); + + // add the merged tokens + n = rank_to_intbyte.length; + for (const [first, second] of bpe_merges) { + bpe_ranks[ + [...decode_data_gym(first), ...decode_data_gym(second)].join(",") + ] = n; + n += 1; + } + + // check that the encoder file matches the merges file + // this sanity check is important since tiktoken assumes that ranks are ordered the same + // as merge priority + const encoder_json: Record = JSON.parse( + encoder_json_contents + ); + + const encoder_json_loaded = Object.fromEntries( + Object.entries(encoder_json).map(([k, v]) => [ + decode_data_gym(k).join(","), + v, + ]) + ); + + // drop these two special tokens if present, since they're not mergeable bpe tokens + delete encoder_json_loaded[decode_data_gym("<|endoftext|>").join(",")]; + delete encoder_json_loaded[decode_data_gym("<|startoftext|>").join(",")]; + + function normalize_map(items: Record) { + return JSON.stringify( + Object.keys(items) + .sort() + .map((key) => [key, items[key]]) + ); + } + + assert(normalize_map(bpe_ranks) === normalize_map(encoder_json_loaded)); + return dump_tiktoken_bpe(bpe_ranks); +} + +export function dump_tiktoken_bpe(bpe_ranks: Record) { + return ( + Object.entries(bpe_ranks) + .sort((a, b) => a[1] - b[1]) + .map(([token_str, rank]) => + [ + Buffer.from( + token_str.split(",").map((i) => Number.parseInt(i, 10)) + ).toString("base64"), + rank, + ].join(" ") + ) + .join("\n") + "\n" + ); +} diff --git a/js/test/test_simple_public.test.ts b/js/test/test_simple_public.test.ts index 7bb5f012..1c63d5cf 100644 --- a/js/test/test_simple_public.test.ts +++ b/js/test/test_simple_public.test.ts @@ -1,5 +1,5 @@ import { it, expect, describe } from "vitest"; -import { encoding_for_model, get_encoding } from "../"; +import { encoding_for_model, get_encoding } from "../dist"; it("encoding_for_model initialization", () => { expect(() => encoding_for_model("gpt2")).not.toThrowError(); diff --git a/js/tsconfig.json b/js/tsconfig.json index 3e4bff18..1c4bee1b 100644 --- a/js/tsconfig.json +++ b/js/tsconfig.json @@ -1,6 +1,13 @@ { "compilerOptions": { - "esModuleInterop": true, - "strict": true - } + "target": "ES2022", + "lib": ["ESNext", "DOM"], + "module": "ES2020", + "moduleResolution": "node", + "strict": true, + "declaration": true, + "outDir": "./dist" + }, + "include": ["./**/*.ts", "./**/*.js"], + "exclude": ["node_modules", "dist"] } From 03cdbb3ad12d466c5a998f0eb3540481797c4ea4 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 8 Mar 2023 01:25:03 +0100 Subject: [PATCH 055/207] Flatten the structure --- js/Cargo.toml | 2 +- js/README.md | 48 +- js/package.json | 36 +- .../{download_ranks.ts => inline_ranks.ts} | 12 +- .../{override_any.ts => post_process.ts} | 92 +-- js/tsconfig.json | 3 +- js/yarn.lock | 642 +++++++++++++++++- 7 files changed, 717 insertions(+), 118 deletions(-) rename js/scripts/{download_ranks.ts => inline_ranks.ts} (95%) rename js/scripts/{override_any.ts => post_process.ts} (59%) diff --git a/js/Cargo.toml b/js/Cargo.toml index d1e5f655..c0698810 100644 --- a/js/Cargo.toml +++ b/js/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" rust-version = "1.57.0" [lib] -name = "_tiktoken" +name = "tiktoken" crate-type = ["cdylib"] [dependencies] diff --git a/js/README.md b/js/README.md index 82aa2c90..85cbda3b 100644 --- a/js/README.md +++ b/js/README.md @@ -55,7 +55,7 @@ As this is a WASM library, there might be some issues with specific runtimes. If | Node.js | ✅ | | | Bun | ✅ | | | Vite | ✅ | See [here](#vite) for notes | -| Next.js | ✅ 🚧 | See [here](#nextjs) for caveats | +| Next.js | ✅ | See [here](#nextjs) for caveats | | Vercel Edge Runtime | 🚧 | Work in progress | | Cloudflare Workers | 🚧 | Untested | | Deno | ❌ | Currently unsupported | @@ -76,42 +76,23 @@ export default defineConfig({ ### [Next.js](#nextjs) -Both API routes and `/pages` are supported with some caveats. To overcome issues with importing `/node` variant and incorrect `__dirname` resolution, you can import the package from `@dqbd/tiktoken/bundler` instead. +Both API routes and `/pages` are supported with the following configuration. To overcome issues with importing Node.js version, you can import the package from `@dqbd/tiktoken/bundler` instead. ```typescript import { get_encoding } from "@dqbd/tiktoken/bundler"; import { NextApiRequest, NextApiResponse } from "next"; export default function handler(req: NextApiRequest, res: NextApiResponse) { - return res.status(200).json({ - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - message: get_encoding("gpt2").encode(`Hello World ${Math.random()}`), - }); + const encoder = get_encoding("gpt2"); + const message = encoder.encode(`Hello World ${Math.random()}`); + encoder.free(); + return res.status(200).json({ message }); } ``` -Additional Webpack configuration is also required, see https://github.com/vercel/next.js/issues/29362. +Additional Webpack configuration is required. ```typescript -class WasmChunksFixPlugin { - apply(compiler) { - compiler.hooks.thisCompilation.tap("WasmChunksFixPlugin", (compilation) => { - compilation.hooks.processAssets.tap( - { name: "WasmChunksFixPlugin" }, - (assets) => - Object.entries(assets).forEach(([pathname, source]) => { - if (!pathname.match(/\.wasm$/)) return; - compilation.deleteAsset(pathname); - - const name = pathname.split("/")[1]; - const info = compilation.assetsInfo.get(pathname); - compilation.emitAsset(name, source, info); - }) - ); - }); - } -} - const config = { webpack(config, { isServer, dev }) { config.experiments = { @@ -119,26 +100,11 @@ const config = { layers: true, }; - if (!dev && isServer) { - config.output.webassemblyModuleFilename = "chunks/[id].wasm"; - config.plugins.push(new WasmChunksFixPlugin()); - } - return config; }, }; ``` -To properly resolve `tsconfig.json`, use either `moduleResolution: "node16"` or `moduleResolution: "nodenext"`: - -```json -{ - "compilerOptions": { - "moduleResolution": "node16" - } -} -``` - ## Acknowledgements - https://github.com/zurawiki/tiktoken-rs diff --git a/js/package.json b/js/package.json index f96075ee..1509f763 100644 --- a/js/package.json +++ b/js/package.json @@ -1,33 +1,16 @@ { "name": "@dqbd/tiktoken", - "version": "0.4.0", + "version": "1.0.0-alpha.1", "description": "Javascript bindings for tiktoken", - "license": "Apache-2.0", - "main": "node/_tiktoken.js", - "types": "node/_tiktoken.d.ts", - "exports": { - ".": { - "node": { - "types": "./node/_tiktoken.d.ts", - "default": "./node/_tiktoken.js" - }, - "default": { - "types": "./bundler/_tiktoken.d.js", - "default": "./bundler/_tiktoken.js" - } - }, - "./init": "./init.js", - "./tiktoken.wasm": { - "types": "./tiktoken.wasm.d.ts", - "default": "./tiktoken.wasm" - } - }, + "license": "MIT", "scripts": { - "build": "rm -rf dist/ ranks/ && yarn run ranks && yarn run build:node && yarn run build:bundler && yarn run build:cleanup", - "ranks": "tsx scripts/download_ranks.ts", - "build:bundler": "wasm-pack build --target bundler --release --out-dir dist/bundler && rm dist/bundler/.gitignore", - "build:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm dist/node/.gitignore", - "build:cleanup": "tsx scripts/override_any.ts", + "build": "run-s build:*", + "build:cleanup": "rm -rf dist/", + "build:rank": "tsx scripts/inline_ranks.ts", + "build:wasm": "run-p wasm:*", + "build:postprocess": "tsx scripts/post_process.ts", + "wasm:bundler": "wasm-pack build --target bundler --release --out-dir dist && rm -rf dist/.gitignore dist/README.md dist/package.json", + "wasm:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm -rf dist/node/.gitignore dist/node/README.md dist/node/package.json", "test": "yarn vitest" }, "repository": { @@ -37,6 +20,7 @@ "dependencies": {}, "devDependencies": { "@types/node": "^18.14.4", + "npm-run-all": "^4.1.5", "ts-morph": "^17.0.1", "tsx": "^3.12.3", "typescript": "^4.9.5", diff --git a/js/scripts/download_ranks.ts b/js/scripts/inline_ranks.ts similarity index 95% rename from js/scripts/download_ranks.ts rename to js/scripts/inline_ranks.ts index 269ed1ba..6e72a2f8 100644 --- a/js/scripts/download_ranks.ts +++ b/js/scripts/inline_ranks.ts @@ -137,6 +137,13 @@ async function main() { console.log(name); const data = registry[name]; + const targetFile = path.resolve(__dirname, `../ranks/${name}.tiktoken`); + + try { + await fs.stat(targetFile); + continue; + } catch {} + let ranks: Record | null = null; if (data.data_gym_to_mergeable_bpe_ranks) { @@ -151,10 +158,7 @@ async function main() { } if (ranks != null) { - await fs.writeFile( - path.resolve(__dirname, `../ranks/${name}.tiktoken`), - dump_tiktoken_bpe(ranks) - ); + await fs.writeFile(targetFile, dump_tiktoken_bpe(ranks)); } } } diff --git a/js/scripts/override_any.ts b/js/scripts/post_process.ts similarity index 59% rename from js/scripts/override_any.ts rename to js/scripts/post_process.ts index 262a791e..74070143 100644 --- a/js/scripts/override_any.ts +++ b/js/scripts/post_process.ts @@ -6,10 +6,7 @@ const project = new Project(); project.addSourceFilesAtPaths(["./dist/**/*.ts", "./dist/**/*.js"]); // make sure the types are correct -for (const filename of [ - "./dist/bundler/_tiktoken.d.ts", - "./dist/node/_tiktoken.d.ts", -]) { +for (const filename of ["./dist/tiktoken.d.ts", "./dist/node/tiktoken.d.ts"]) { const sourceFile = project.getSourceFileOrThrow(filename); const cls = sourceFile.getFirstDescendantByKindOrThrow( ts.SyntaxKind.ClassDeclaration @@ -39,63 +36,48 @@ for (const filename of [ sourceFile.saveSync(); } -// use only a single WASM binary -fs.copyFileSync( - path.resolve(__dirname, "../dist/bundler/_tiktoken_bg.wasm"), - path.resolve(__dirname, "../dist/tiktoken.wasm") -); - -fs.copyFileSync( - path.resolve(__dirname, "../dist/bundler/_tiktoken_bg.wasm.d.ts"), - path.resolve(__dirname, "../dist/tiktoken.wasm.d.ts") -); - -// remove unnecessary files -for (const folder of ["bundler", "node"]) { - fs.rmSync(path.resolve(__dirname, `../dist/${folder}/package.json`)); - fs.rmSync(path.resolve(__dirname, `../dist/${folder}/README.md`)); -} - -function replaceContent(file: string, transform: (content: string) => string) { - const options = { encoding: "utf-8" } as const; - fs.writeFileSync( - path.resolve(__dirname, file), - transform(fs.readFileSync(path.resolve(__dirname, file), options)), - options - ); -} - // bundler { - replaceContent("../dist/bundler/_tiktoken.js", (src) => - src.replaceAll(`"./_tiktoken_bg.wasm"`, `"../tiktoken.wasm"`) + fs.writeFileSync( + path.resolve(__dirname, "../dist/bundler.js"), + `export * from "./tiktoken"; `.trim(), + { encoding: "utf-8" } ); - fs.rmSync(path.resolve(__dirname, "../dist/bundler/_tiktoken_bg.wasm")); - fs.rmSync(path.resolve(__dirname, "../dist/bundler/_tiktoken_bg.wasm.d.ts")); + fs.writeFileSync( + path.resolve(__dirname, "../dist/bundler.d.ts"), + `export * from "./tiktoken"; `.trim(), + { encoding: "utf-8" } + ); } // node { - replaceContent("../dist/node/_tiktoken.js", (src) => - src - .replaceAll("__wbindgen_placeholder__", `./_tiktoken_bg.js`) - .replace("'_tiktoken_bg.wasm'", `'../tiktoken.wasm'`) + const options = { encoding: "utf-8" } as const; + fs.writeFileSync( + path.resolve(__dirname, "../dist/tiktoken.node.js"), + fs + .readFileSync( + path.resolve(__dirname, "../dist/node/tiktoken.js"), + options + ) + .replaceAll("__wbindgen_placeholder__", `./tiktoken_bg.js`), + options ); - fs.rmSync(path.resolve(__dirname, "../dist/node/_tiktoken_bg.wasm")); - fs.rmSync(path.resolve(__dirname, "../dist/node/_tiktoken_bg.wasm.d.ts")); + fs.rmSync(path.resolve(__dirname, "../dist/node"), { recursive: true }); } +// package.json { fs.writeFileSync( path.resolve(__dirname, "../dist/init.js"), ` -import * as imports from "./bundler/_tiktoken_bg.js"; +import * as imports from "./tiktoken_bg.js"; export async function init(cb) { const res = await cb({ - "./_tiktoken_bg.js": imports, + "./tiktoken_bg.js": imports, }); const instance = @@ -110,7 +92,7 @@ export async function init(cb) { return imports; } -export * from "./bundler/_tiktoken_bg.js"; +export * from "./tiktoken_bg.js"; `.trim(), { encoding: "utf-8" } ); @@ -120,7 +102,7 @@ export * from "./bundler/_tiktoken_bg.js"; ` /* tslint:disable */ /* eslint-disable */ -export * from "./bundler/_tiktoken"; +export * from "./tiktoken"; export function init( callback: ( imports: WebAssembly.Imports @@ -142,6 +124,28 @@ export function init( delete pkg.scripts; pkg.files = ["**/*"]; + pkg["main"] = "tiktoken.node.js"; + pkg["types"] = "tiktoken.d.ts"; + pkg["exports"] = { + ".": { + types: "./tiktoken.d.ts", + default: "./tiktoken.js", + node: "./tiktoken.node.js", + }, + "./bundler": { + types: "./bundler.d.ts", + default: "./bundler.js", + }, + "./init": { + types: "./init.d.ts", + default: "./init.js", + }, + "./tiktoken_bg.wasm": { + types: "./tiktoken_bg.wasm.d.ts", + default: "./tiktoken_bg.wasm", + }, + }; + fs.writeFileSync( path.resolve(__dirname, "../dist/package.json"), JSON.stringify(pkg, null, 2), diff --git a/js/tsconfig.json b/js/tsconfig.json index 1c4bee1b..3c5ff0ae 100644 --- a/js/tsconfig.json +++ b/js/tsconfig.json @@ -6,7 +6,8 @@ "moduleResolution": "node", "strict": true, "declaration": true, - "outDir": "./dist" + "outDir": "./dist", + "allowSyntheticDefaultImports": true, }, "include": ["./**/*.ts", "./**/*.js"], "exclude": ["node_modules", "dist"] diff --git a/js/yarn.lock b/js/yarn.lock index b76be6fa..06883178 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -355,6 +355,13 @@ ansi-regex@^6.0.1: resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-6.0.1.tgz#3183e38fae9a65d7cb5e53945cd5897d0260a06a" integrity sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA== +ansi-styles@^3.2.1: + version "3.2.1" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-3.2.1.tgz#41fbb20243e50b12be0f04b8dedbf07520ce841d" + integrity sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA== + dependencies: + color-convert "^1.9.0" + ansi-styles@^5.0.0: version "5.2.0" resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-5.2.0.tgz#07449690ad45777d1924ac2abb2fc8895dba836b" @@ -370,11 +377,24 @@ assertion-error@^1.1.0: resolved "https://registry.yarnpkg.com/assertion-error/-/assertion-error-1.1.0.tgz#e60b6b0e8f301bd97e5375215bda406c85118c0b" integrity sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw== +available-typed-arrays@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz#92f95616501069d07d10edb2fc37d3e1c65123b7" + integrity sha512-DMD0KiN46eipeziST1LPP/STfDU0sufISXmjSgvVsoU2tqxctQeASejWcfNtxYKqETM1UxQ8sp2OrSBWpHY6sw== + balanced-match@^1.0.0: version "1.0.2" resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== +brace-expansion@^1.1.7: + version "1.1.11" + resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" + integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA== + dependencies: + balanced-match "^1.0.0" + concat-map "0.0.1" + brace-expansion@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-2.0.1.tgz#1edc459e0f0c548486ecf9fc99f2221364b9a0ae" @@ -399,6 +419,14 @@ cac@^6.7.14: resolved "https://registry.yarnpkg.com/cac/-/cac-6.7.14.tgz#804e1e6f506ee363cb0e3ccbb09cad5dd9870959" integrity sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ== +call-bind@^1.0.0, call-bind@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/call-bind/-/call-bind-1.0.2.tgz#b1d4e89e688119c3c9a903ad30abb2f6a919be3c" + integrity sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA== + dependencies: + function-bind "^1.1.1" + get-intrinsic "^1.0.2" + chai@^4.3.7: version "4.3.7" resolved "https://registry.yarnpkg.com/chai/-/chai-4.3.7.tgz#ec63f6df01829088e8bf55fca839bcd464a8ec51" @@ -412,6 +440,15 @@ chai@^4.3.7: pathval "^1.1.1" type-detect "^4.0.5" +chalk@^2.4.1: + version "2.4.2" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424" + integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ== + dependencies: + ansi-styles "^3.2.1" + escape-string-regexp "^1.0.5" + supports-color "^5.3.0" + check-error@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/check-error/-/check-error-1.0.2.tgz#574d312edd88bb5dd8912e9286dd6c0aed4aac82" @@ -430,6 +467,34 @@ code-block-writer@^11.0.3: resolved "https://registry.yarnpkg.com/code-block-writer/-/code-block-writer-11.0.3.tgz#9eec2993edfb79bfae845fbc093758c0a0b73b76" integrity sha512-NiujjUFB4SwScJq2bwbYUtXbZhBSlY6vYzm++3Q6oC+U+injTqfPYFK8wS9COOmb2lueqp0ZRB4nK1VYeHgNyw== +color-convert@^1.9.0: + version "1.9.3" + resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8" + integrity sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg== + dependencies: + color-name "1.1.3" + +color-name@1.1.3: + version "1.1.3" + resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.3.tgz#a7d0558bd89c42f795dd42328f740831ca53bc25" + integrity sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw== + +concat-map@0.0.1: + version "0.0.1" + resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" + integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg== + +cross-spawn@^6.0.5: + version "6.0.5" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4" + integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ== + dependencies: + nice-try "^1.0.4" + path-key "^2.0.1" + semver "^5.5.0" + shebang-command "^1.2.0" + which "^1.2.9" + debug@^4.3.4: version "4.3.4" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" @@ -444,6 +509,14 @@ deep-eql@^4.1.2: dependencies: type-detect "^4.0.0" +define-properties@^1.1.3, define-properties@^1.1.4: + version "1.2.0" + resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.2.0.tgz#52988570670c9eacedd8064f4a990f2405849bd5" + integrity sha512-xvqAVKGfT1+UAvPwKTVw/njhdQ8ZhXK4lI0bCIuCMrp2up9nPnaDftrLtmpTazqd1o+UY4zgzU+avtMbDP+ldA== + dependencies: + has-property-descriptors "^1.0.0" + object-keys "^1.1.1" + diff@^5.1.0: version "5.1.0" resolved "https://registry.yarnpkg.com/diff/-/diff-5.1.0.tgz#bc52d298c5ea8df9194800224445ed43ffc87e40" @@ -459,6 +532,70 @@ emoji-regex@^9.2.2: resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72" integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg== +error-ex@^1.3.1: + version "1.3.2" + resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.2.tgz#b4ac40648107fdcdcfae242f428bea8a14d4f1bf" + integrity sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g== + dependencies: + is-arrayish "^0.2.1" + +es-abstract@^1.19.0, es-abstract@^1.20.4: + version "1.21.1" + resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.21.1.tgz#e6105a099967c08377830a0c9cb589d570dd86c6" + integrity sha512-QudMsPOz86xYz/1dG1OuGBKOELjCh99IIWHLzy5znUB6j8xG2yMA7bfTV86VSqKF+Y/H08vQPR+9jyXpuC6hfg== + dependencies: + available-typed-arrays "^1.0.5" + call-bind "^1.0.2" + es-set-tostringtag "^2.0.1" + es-to-primitive "^1.2.1" + function-bind "^1.1.1" + function.prototype.name "^1.1.5" + get-intrinsic "^1.1.3" + get-symbol-description "^1.0.0" + globalthis "^1.0.3" + gopd "^1.0.1" + has "^1.0.3" + has-property-descriptors "^1.0.0" + has-proto "^1.0.1" + has-symbols "^1.0.3" + internal-slot "^1.0.4" + is-array-buffer "^3.0.1" + is-callable "^1.2.7" + is-negative-zero "^2.0.2" + is-regex "^1.1.4" + is-shared-array-buffer "^1.0.2" + is-string "^1.0.7" + is-typed-array "^1.1.10" + is-weakref "^1.0.2" + object-inspect "^1.12.2" + object-keys "^1.1.1" + object.assign "^4.1.4" + regexp.prototype.flags "^1.4.3" + safe-regex-test "^1.0.0" + string.prototype.trimend "^1.0.6" + string.prototype.trimstart "^1.0.6" + typed-array-length "^1.0.4" + unbox-primitive "^1.0.2" + which-typed-array "^1.1.9" + +es-set-tostringtag@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/es-set-tostringtag/-/es-set-tostringtag-2.0.1.tgz#338d502f6f674301d710b80c8592de8a15f09cd8" + integrity sha512-g3OMbtlwY3QewlqAiMLI47KywjWZoEytKr8pf6iTC8uJq5bIAH52Z9pnQ8pVL6whrCto53JZDuUIsifGeLorTg== + dependencies: + get-intrinsic "^1.1.3" + has "^1.0.3" + has-tostringtag "^1.0.0" + +es-to-primitive@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a" + integrity sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA== + dependencies: + is-callable "^1.1.4" + is-date-object "^1.0.1" + is-symbol "^1.0.2" + esbuild@^0.16.14: version "0.16.17" resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.16.17.tgz#fc2c3914c57ee750635fee71b89f615f25065259" @@ -515,6 +652,11 @@ esbuild@~0.17.6: "@esbuild/win32-ia32" "0.17.10" "@esbuild/win32-x64" "0.17.10" +escape-string-regexp@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" + integrity sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg== + fast-glob@^3.2.12: version "3.2.12" resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.12.tgz#7f39ec99c2e6ab030337142da9e0c18f37afae80" @@ -540,6 +682,13 @@ fill-range@^7.0.1: dependencies: to-regex-range "^5.0.1" +for-each@^0.3.3: + version "0.3.3" + resolved "https://registry.yarnpkg.com/for-each/-/for-each-0.3.3.tgz#69b447e88a0a5d32c3e7084f3f1710034b21376e" + integrity sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw== + dependencies: + is-callable "^1.1.3" + fsevents@~2.3.2: version "2.3.2" resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a" @@ -550,11 +699,43 @@ function-bind@^1.1.1: resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A== +function.prototype.name@^1.1.5: + version "1.1.5" + resolved "https://registry.yarnpkg.com/function.prototype.name/-/function.prototype.name-1.1.5.tgz#cce0505fe1ffb80503e6f9e46cc64e46a12a9621" + integrity sha512-uN7m/BzVKQnCUF/iW8jYea67v++2u7m5UgENbHRtdDVclOUP+FMPlCNdmk0h/ysGyo2tavMJEDqJAkJdRa1vMA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.3" + es-abstract "^1.19.0" + functions-have-names "^1.2.2" + +functions-have-names@^1.2.2: + version "1.2.3" + resolved "https://registry.yarnpkg.com/functions-have-names/-/functions-have-names-1.2.3.tgz#0404fe4ee2ba2f607f0e0ec3c80bae994133b834" + integrity sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ== + get-func-name@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/get-func-name/-/get-func-name-2.0.0.tgz#ead774abee72e20409433a066366023dd6887a41" integrity sha512-Hm0ixYtaSZ/V7C8FJrtZIuBBI+iSgL+1Aq82zSu8VQNB4S3Gk8e7Qs3VwBDJAhmRZcFqkl3tQu36g/Foh5I5ig== +get-intrinsic@^1.0.2, get-intrinsic@^1.1.1, get-intrinsic@^1.1.3, get-intrinsic@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.2.0.tgz#7ad1dc0535f3a2904bba075772763e5051f6d05f" + integrity sha512-L049y6nFOuom5wGyRc3/gdTLO94dySVKRACj1RmJZBQXlbTMhtNIgkWkUHq+jYmZvKf14EW1EoJnnjbmoHij0Q== + dependencies: + function-bind "^1.1.1" + has "^1.0.3" + has-symbols "^1.0.3" + +get-symbol-description@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/get-symbol-description/-/get-symbol-description-1.0.0.tgz#7fdb81c900101fbd564dd5f1a30af5aadc1e58d6" + integrity sha512-2EmdH1YvIQiZpltCNgkuiUnyukzxM/R6NDJX31Ke3BG1Nq5b0S2PhX59UKi9vZpPDQVdqn+1IcaAwnzTT5vCjw== + dependencies: + call-bind "^1.0.2" + get-intrinsic "^1.1.1" + get-tsconfig@^4.4.0: version "4.4.0" resolved "https://registry.yarnpkg.com/get-tsconfig/-/get-tsconfig-4.4.0.tgz#64eee64596668a81b8fce18403f94f245ee0d4e5" @@ -567,6 +748,59 @@ glob-parent@^5.1.2: dependencies: is-glob "^4.0.1" +globalthis@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/globalthis/-/globalthis-1.0.3.tgz#5852882a52b80dc301b0660273e1ed082f0b6ccf" + integrity sha512-sFdI5LyBiNTHjRd7cGPWapiHWMOXKyuBNX/cWJ3NfzrZQVa8GI/8cofCl74AOVqq9W5kNmguTIzJ/1s2gyI9wA== + dependencies: + define-properties "^1.1.3" + +gopd@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/gopd/-/gopd-1.0.1.tgz#29ff76de69dac7489b7c0918a5788e56477c332c" + integrity sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA== + dependencies: + get-intrinsic "^1.1.3" + +graceful-fs@^4.1.2: + version "4.2.10" + resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.10.tgz#147d3a006da4ca3ce14728c7aefc287c367d7a6c" + integrity sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA== + +has-bigints@^1.0.1, has-bigints@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/has-bigints/-/has-bigints-1.0.2.tgz#0871bd3e3d51626f6ca0966668ba35d5602d6eaa" + integrity sha512-tSvCKtBr9lkF0Ex0aQiP9N+OpV4zi2r/Nee5VkRDbaqv35RLYMzbwQfFSZZH0kR+Rd6302UJZ2p/bJCEoR3VoQ== + +has-flag@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-3.0.0.tgz#b5d454dc2199ae225699f3467e5a07f3b955bafd" + integrity sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw== + +has-property-descriptors@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/has-property-descriptors/-/has-property-descriptors-1.0.0.tgz#610708600606d36961ed04c196193b6a607fa861" + integrity sha512-62DVLZGoiEBDHQyqG4w9xCuZ7eJEwNmJRWw2VY84Oedb7WFcA27fiEVe8oUQx9hAUJ4ekurquucTGwsyO1XGdQ== + dependencies: + get-intrinsic "^1.1.1" + +has-proto@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/has-proto/-/has-proto-1.0.1.tgz#1885c1305538958aff469fef37937c22795408e0" + integrity sha512-7qE+iP+O+bgF9clE5+UoBFzE65mlBiVj3tKCrlNQ0Ogwm0BjpT/gK4SlLYDMybDh5I3TCTKnPPa0oMG7JDYrhg== + +has-symbols@^1.0.2, has-symbols@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" + integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== + +has-tostringtag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/has-tostringtag/-/has-tostringtag-1.0.0.tgz#7e133818a7d394734f941e73c3d3f9291e658b25" + integrity sha512-kFjcSNhnlGV1kyoGk7OXKSawH5JOb/LzUc5w9B02hOTO0dfFRjbHQKvg1d6cf3HbeUmtU9VbbV3qzZ2Teh97WQ== + dependencies: + has-symbols "^1.0.2" + has@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796" @@ -574,6 +808,54 @@ has@^1.0.3: dependencies: function-bind "^1.1.1" +hosted-git-info@^2.1.4: + version "2.8.9" + resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.9.tgz#dffc0bf9a21c02209090f2aa69429e1414daf3f9" + integrity sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw== + +internal-slot@^1.0.4: + version "1.0.5" + resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.5.tgz#f2a2ee21f668f8627a4667f309dc0f4fb6674986" + integrity sha512-Y+R5hJrzs52QCG2laLn4udYVnxsfny9CpOhNhUvk/SSSVyF6T27FzRbF0sroPidSu3X8oEAkOn2K804mjpt6UQ== + dependencies: + get-intrinsic "^1.2.0" + has "^1.0.3" + side-channel "^1.0.4" + +is-array-buffer@^3.0.1: + version "3.0.2" + resolved "https://registry.yarnpkg.com/is-array-buffer/-/is-array-buffer-3.0.2.tgz#f2653ced8412081638ecb0ebbd0c41c6e0aecbbe" + integrity sha512-y+FyyR/w8vfIRq4eQcM1EYgSTnmHXPqaF+IgzgraytCFq5Xh8lllDVmAZolPJiZttZLeFSINPYMaEJ7/vWUa1w== + dependencies: + call-bind "^1.0.2" + get-intrinsic "^1.2.0" + is-typed-array "^1.1.10" + +is-arrayish@^0.2.1: + version "0.2.1" + resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d" + integrity sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg== + +is-bigint@^1.0.1: + version "1.0.4" + resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.4.tgz#08147a1875bc2b32005d41ccd8291dffc6691df3" + integrity sha512-zB9CruMamjym81i2JZ3UMn54PKGsQzsJeo6xvN3HJJ4CAsQNB6iRutp2To77OfCNuoxspsIhzaPoO1zyCEhFOg== + dependencies: + has-bigints "^1.0.1" + +is-boolean-object@^1.1.0: + version "1.1.2" + resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.2.tgz#5c6dc200246dd9321ae4b885a114bb1f75f63719" + integrity sha512-gDYaKHJmnj4aWxyj6YHyXVpdQawtVLHU5cb+eztPGczf6cjuTdwve5ZIEfgXqH4e57An1D1AKf8CZ3kYrQRqYA== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +is-callable@^1.1.3, is-callable@^1.1.4, is-callable@^1.2.7: + version "1.2.7" + resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.2.7.tgz#3bc2a85ea742d9e36205dcacdd72ca1fdc51b055" + integrity sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA== + is-core-module@^2.9.0: version "2.11.0" resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.11.0.tgz#ad4cb3e3863e814523c96f3f58d26cc570ff0144" @@ -581,6 +863,13 @@ is-core-module@^2.9.0: dependencies: has "^1.0.3" +is-date-object@^1.0.1: + version "1.0.5" + resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.5.tgz#0841d5536e724c25597bf6ea62e1bd38298df31f" + integrity sha512-9YQaSxsAiSwcvS33MBk3wTCVnWK+HhF8VZR2jRxehM16QcVOdHqPn4VPHmRK4lSr38n9JriurInLcP90xsYNfQ== + dependencies: + has-tostringtag "^1.0.0" + is-extglob@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" @@ -598,16 +887,95 @@ is-glob@^4.0.1: dependencies: is-extglob "^2.1.1" +is-negative-zero@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/is-negative-zero/-/is-negative-zero-2.0.2.tgz#7bf6f03a28003b8b3965de3ac26f664d765f3150" + integrity sha512-dqJvarLawXsFbNDeJW7zAz8ItJ9cd28YufuuFzh0G8pNHjJMnY08Dv7sYX2uF5UpQOwieAeOExEYAWWfu7ZZUA== + +is-number-object@^1.0.4: + version "1.0.7" + resolved "https://registry.yarnpkg.com/is-number-object/-/is-number-object-1.0.7.tgz#59d50ada4c45251784e9904f5246c742f07a42fc" + integrity sha512-k1U0IRzLMo7ZlYIfzRu23Oh6MiIFasgpb9X76eqfFZAqwH44UI4KTBvBYIZ1dSL9ZzChTB9ShHfLkR4pdW5krQ== + dependencies: + has-tostringtag "^1.0.0" + is-number@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b" integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng== +is-regex@^1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.4.tgz#eef5663cd59fa4c0ae339505323df6854bb15958" + integrity sha512-kvRdxDsxZjhzUX07ZnLydzS1TU/TJlTUHHY4YLL87e37oUA49DfkLqgy+VjFocowy29cKvcSiu+kIv728jTTVg== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +is-shared-array-buffer@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-shared-array-buffer/-/is-shared-array-buffer-1.0.2.tgz#8f259c573b60b6a32d4058a1a07430c0a7344c79" + integrity sha512-sqN2UDu1/0y6uvXyStCOzyhAjCSlHceFoMKJW8W9EU9cvic/QdsZ0kEU93HEy3IUEFZIiH/3w+AH/UQbPHNdhA== + dependencies: + call-bind "^1.0.2" + +is-string@^1.0.5, is-string@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.7.tgz#0dd12bf2006f255bb58f695110eff7491eebc0fd" + integrity sha512-tE2UXzivje6ofPW7l23cjDOMa09gb7xlAqG6jG5ej6uPV32TlWP3NKPigtaGeHNu9fohccRYvIiZMfOOnOYUtg== + dependencies: + has-tostringtag "^1.0.0" + +is-symbol@^1.0.2, is-symbol@^1.0.3: + version "1.0.4" + resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.4.tgz#a6dac93b635b063ca6872236de88910a57af139c" + integrity sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg== + dependencies: + has-symbols "^1.0.2" + +is-typed-array@^1.1.10, is-typed-array@^1.1.9: + version "1.1.10" + resolved "https://registry.yarnpkg.com/is-typed-array/-/is-typed-array-1.1.10.tgz#36a5b5cb4189b575d1a3e4b08536bfb485801e3f" + integrity sha512-PJqgEHiWZvMpaFZ3uTc8kHPM4+4ADTlDniuQL7cU/UDA0Ql7F70yGfHph3cLNe+c9toaigv+DFzTJKhc2CtO6A== + dependencies: + available-typed-arrays "^1.0.5" + call-bind "^1.0.2" + for-each "^0.3.3" + gopd "^1.0.1" + has-tostringtag "^1.0.0" + +is-weakref@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-weakref/-/is-weakref-1.0.2.tgz#9529f383a9338205e89765e0392efc2f100f06f2" + integrity sha512-qctsuLZmIQ0+vSSMfoVvyFe2+GSEvnmZ2ezTup1SBse9+twCCeial6EEi3Nc2KFcf6+qz2FBPnjXsk8xhKSaPQ== + dependencies: + call-bind "^1.0.2" + +isexe@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" + integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw== + +json-parse-better-errors@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9" + integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw== + jsonc-parser@^3.2.0: version "3.2.0" resolved "https://registry.yarnpkg.com/jsonc-parser/-/jsonc-parser-3.2.0.tgz#31ff3f4c2b9793f89c67212627c51c6394f88e76" integrity sha512-gfFQZrcTc8CnKXp6Y4/CBT3fTc0OVuDofpre4aEeEpSBPV5X5v4+Vmx+8snU7RLPrNHPKSgLxGo9YuQzz20o+w== +load-json-file@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-4.0.0.tgz#2f5f45ab91e33216234fd53adab668eb4ec0993b" + integrity sha512-Kx8hMakjX03tiGTLAIdJ+lL0htKnXjEZN6hk/tozf/WOuYGdZBJrZ+rCJRbVCugsjB3jMLn9746NsQIf5VjBMw== + dependencies: + graceful-fs "^4.1.2" + parse-json "^4.0.0" + pify "^3.0.0" + strip-bom "^3.0.0" + local-pkg@^0.4.2: version "0.4.3" resolved "https://registry.yarnpkg.com/local-pkg/-/local-pkg-0.4.3.tgz#0ff361ab3ae7f1c19113d9bb97b98b905dbc4963" @@ -620,6 +988,11 @@ loupe@^2.3.1, loupe@^2.3.6: dependencies: get-func-name "^2.0.0" +memorystream@^0.3.1: + version "0.3.1" + resolved "https://registry.yarnpkg.com/memorystream/-/memorystream-0.3.1.tgz#86d7090b30ce455d63fbae12dda51a47ddcaf9b2" + integrity sha512-S3UwM3yj5mtUSEfP41UZmt/0SCoVYUcU1rkXv+BQ5Ig8ndL4sPoJNBUJERafdPb5jjHJGuMgytgKvKIf58XNBw== + merge2@^1.3.0: version "1.4.1" resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae" @@ -633,6 +1006,13 @@ micromatch@^4.0.4: braces "^3.0.2" picomatch "^2.3.1" +minimatch@^3.0.4: + version "3.1.2" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" + integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw== + dependencies: + brace-expansion "^1.1.7" + minimatch@^5.1.0: version "5.1.6" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-5.1.6.tgz#1cfcb8cf5522ea69952cd2af95ae09477f122a96" @@ -665,6 +1045,56 @@ nanoid@^3.3.4: resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.4.tgz#730b67e3cd09e2deacf03c027c81c9d9dbc5e8ab" integrity sha512-MqBkQh/OHTS2egovRtLk45wEyNXwF+cokD+1YPf9u5VfJiRdAiRwB2froX5Co9Rh20xs4siNPm8naNotSD6RBw== +nice-try@^1.0.4: + version "1.0.5" + resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366" + integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ== + +normalize-package-data@^2.3.2: + version "2.5.0" + resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8" + integrity sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA== + dependencies: + hosted-git-info "^2.1.4" + resolve "^1.10.0" + semver "2 || 3 || 4 || 5" + validate-npm-package-license "^3.0.1" + +npm-run-all@^4.1.5: + version "4.1.5" + resolved "https://registry.yarnpkg.com/npm-run-all/-/npm-run-all-4.1.5.tgz#04476202a15ee0e2e214080861bff12a51d98fba" + integrity sha512-Oo82gJDAVcaMdi3nuoKFavkIHBRVqQ1qvMb+9LHk/cF4P6B2m8aP04hGf7oL6wZ9BuGwX1onlLhpuoofSyoQDQ== + dependencies: + ansi-styles "^3.2.1" + chalk "^2.4.1" + cross-spawn "^6.0.5" + memorystream "^0.3.1" + minimatch "^3.0.4" + pidtree "^0.3.0" + read-pkg "^3.0.0" + shell-quote "^1.6.1" + string.prototype.padend "^3.0.0" + +object-inspect@^1.12.2, object-inspect@^1.9.0: + version "1.12.3" + resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.12.3.tgz#ba62dffd67ee256c8c086dfae69e016cd1f198b9" + integrity sha512-geUvdk7c+eizMNUDkRpW1wJwgfOiOeHbxBR/hLXK1aT6zmVSO0jsQcs7fj6MGw89jC/cjGfLcNOrtMYtGqm81g== + +object-keys@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" + integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA== + +object.assign@^4.1.4: + version "4.1.4" + resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.4.tgz#9673c7c7c351ab8c4d0b516f4343ebf4dfb7799f" + integrity sha512-1mxKf0e58bvyjSCtKYY4sRe9itRk3PJpquJOjeIkz885CczcI4IvJJDLPS72oowuSh+pBxUFROpX+TU++hxhZQ== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + has-symbols "^1.0.3" + object-keys "^1.1.1" + p-limit@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-4.0.0.tgz#914af6544ed32bfa54670b061cafcbd04984b644" @@ -672,16 +1102,36 @@ p-limit@^4.0.0: dependencies: yocto-queue "^1.0.0" +parse-json@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-4.0.0.tgz#be35f5425be1f7f6c747184f98a788cb99477ee0" + integrity sha512-aOIos8bujGN93/8Ox/jPLh7RwVnPEysynVFE+fQZyg6jKELEHwzgKdLRFHUgXJL6kylijVSBC4BvN9OmsB48Rw== + dependencies: + error-ex "^1.3.1" + json-parse-better-errors "^1.0.1" + path-browserify@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/path-browserify/-/path-browserify-1.0.1.tgz#d98454a9c3753d5790860f16f68867b9e46be1fd" integrity sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g== +path-key@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40" + integrity sha512-fEHGKCSmUSDPv4uoj8AlD+joPlq3peND+HRYyxFz4KPw4z926S/b8rIuFs2FYJg3BwsxJf6A9/3eIdLaYC+9Dw== + path-parse@^1.0.7: version "1.0.7" resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== +path-type@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/path-type/-/path-type-3.0.0.tgz#cef31dc8e0a1a3bb0d105c0cd97cf3bf47f4e36f" + integrity sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg== + dependencies: + pify "^3.0.0" + pathe@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/pathe/-/pathe-1.1.0.tgz#e2e13f6c62b31a3289af4ba19886c230f295ec03" @@ -702,6 +1152,16 @@ picomatch@^2.3.1: resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA== +pidtree@^0.3.0: + version "0.3.1" + resolved "https://registry.yarnpkg.com/pidtree/-/pidtree-0.3.1.tgz#ef09ac2cc0533df1f3250ccf2c4d366b0d12114a" + integrity sha512-qQbW94hLHEqCg7nhby4yRC7G2+jYHY4Rguc2bjw7Uug4GIJuu1tvf2uHaZv5Q8zdt+WKJ6qK1FOI6amaWUo5FA== + +pify@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/pify/-/pify-3.0.0.tgz#e5a4acd2c101fdf3d9a4d07f0dbc4db49dd28176" + integrity sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg== + pkg-types@^1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/pkg-types/-/pkg-types-1.0.2.tgz#c233efc5210a781e160e0cafd60c0d0510a4b12e" @@ -739,7 +1199,25 @@ react-is@^17.0.1: resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0" integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w== -resolve@^1.22.1: +read-pkg@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-3.0.0.tgz#9cbc686978fee65d16c00e2b19c237fcf6e38389" + integrity sha512-BLq/cCO9two+lBgiTYNqD6GdtK8s4NpaWrl6/rCO9w0TUS8oJl7cmToOZfRYllKTISY6nt1U7jQ53brmKqY6BA== + dependencies: + load-json-file "^4.0.0" + normalize-package-data "^2.3.2" + path-type "^3.0.0" + +regexp.prototype.flags@^1.4.3: + version "1.4.3" + resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.4.3.tgz#87cab30f80f66660181a3bb7bf5981a872b367ac" + integrity sha512-fjggEOO3slI6Wvgjwflkc4NFRCTZAu5CnNfBd5qOMYhWdn67nJBBu34/TkD++eeFmd8C9r9jfXJ27+nSiRkSUA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.3" + functions-have-names "^1.2.2" + +resolve@^1.10.0, resolve@^1.22.1: version "1.22.1" resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.1.tgz#27cb2ebb53f91abb49470a928bba7558066ac177" integrity sha512-nBpuuYuY5jFsli/JIs1oldw6fOQCBioohqWZg/2hiaOybXOft4lonv85uDOKXdf8rhyK159cxU5cDcK/NKk8zw== @@ -767,6 +1245,46 @@ run-parallel@^1.1.9: dependencies: queue-microtask "^1.2.2" +safe-regex-test@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/safe-regex-test/-/safe-regex-test-1.0.0.tgz#793b874d524eb3640d1873aad03596db2d4f2295" + integrity sha512-JBUUzyOgEwXQY1NuPtvcj/qcBDbDmEvWufhlnXZIm75DEHp+afM1r1ujJpJsV/gSM4t59tpDyPi1sd6ZaPFfsA== + dependencies: + call-bind "^1.0.2" + get-intrinsic "^1.1.3" + is-regex "^1.1.4" + +"semver@2 || 3 || 4 || 5", semver@^5.5.0: + version "5.7.1" + resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7" + integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ== + +shebang-command@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-1.2.0.tgz#44aac65b695b03398968c39f363fee5deafdf1ea" + integrity sha512-EV3L1+UQWGor21OmnvojK36mhg+TyIKDh3iFBKBohr5xeXIhNBcx8oWdgkTEEQ+BEFFYdLRuqMfd5L84N1V5Vg== + dependencies: + shebang-regex "^1.0.0" + +shebang-regex@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-1.0.0.tgz#da42f49740c0b42db2ca9728571cb190c98efea3" + integrity sha512-wpoSFAxys6b2a2wHZ1XpDSgD7N9iVjg29Ph9uV/uaP9Ex/KXlkTZTeddxDPSYQpgvzKLGJke2UU0AzoGCjNIvQ== + +shell-quote@^1.6.1: + version "1.8.0" + resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.8.0.tgz#20d078d0eaf71d54f43bd2ba14a1b5b9bfa5c8ba" + integrity sha512-QHsz8GgQIGKlRi24yFc6a6lN69Idnx634w49ay6+jA5yFh7a1UY+4Rp6HPx/L/1zcEDPEij8cIsiqR6bQsE5VQ== + +side-channel@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.4.tgz#efce5c8fdc104ee751b25c58d4290011fa5ea2cf" + integrity sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw== + dependencies: + call-bind "^1.0.0" + get-intrinsic "^1.0.2" + object-inspect "^1.9.0" + siginfo@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/siginfo/-/siginfo-2.0.0.tgz#32e76c70b79724e3bb567cb9d543eb858ccfaf30" @@ -798,6 +1316,32 @@ source-map@^0.6.0, source-map@^0.6.1: resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263" integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g== +spdx-correct@^3.0.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/spdx-correct/-/spdx-correct-3.2.0.tgz#4f5ab0668f0059e34f9c00dce331784a12de4e9c" + integrity sha512-kN9dJbvnySHULIluDHy32WHRUu3Og7B9sbY7tsFLctQkIqnMh3hErYgdMjTYuqmcXX+lK5T1lnUt3G7zNswmZA== + dependencies: + spdx-expression-parse "^3.0.0" + spdx-license-ids "^3.0.0" + +spdx-exceptions@^2.1.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz#3f28ce1a77a00372683eade4a433183527a2163d" + integrity sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A== + +spdx-expression-parse@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz#cf70f50482eefdc98e3ce0a6833e4a53ceeba679" + integrity sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q== + dependencies: + spdx-exceptions "^2.1.0" + spdx-license-ids "^3.0.0" + +spdx-license-ids@^3.0.0: + version "3.0.12" + resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.12.tgz#69077835abe2710b65f03969898b6637b505a779" + integrity sha512-rr+VVSXtRhO4OHbXUiAF7xW3Bo9DuuF6C5jH+q/x15j2jniycgKbxU09Hr0WqlSLUs4i4ltHGXqTe7VHclYWyA== + stackback@0.0.2: version "0.0.2" resolved "https://registry.yarnpkg.com/stackback/-/stackback-0.0.2.tgz#1ac8a0d9483848d1695e418b6d031a3c3ce68e3b" @@ -817,6 +1361,33 @@ string-width@^5.0.0: emoji-regex "^9.2.2" strip-ansi "^7.0.1" +string.prototype.padend@^3.0.0: + version "3.1.4" + resolved "https://registry.yarnpkg.com/string.prototype.padend/-/string.prototype.padend-3.1.4.tgz#2c43bb3a89eb54b6750de5942c123d6c98dd65b6" + integrity sha512-67otBXoksdjsnXXRUq+KMVTdlVRZ2af422Y0aTyTjVaoQkGr3mxl2Bc5emi7dOQ3OGVVQQskmLEWwFXwommpNw== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + es-abstract "^1.20.4" + +string.prototype.trimend@^1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/string.prototype.trimend/-/string.prototype.trimend-1.0.6.tgz#c4a27fa026d979d79c04f17397f250a462944533" + integrity sha512-JySq+4mrPf9EsDBEDYMOb/lM7XQLulwg5R/m1r0PXEFqrV0qHvl58sdTilSXtKOflCsK2E8jxf+GKC0T07RWwQ== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + es-abstract "^1.20.4" + +string.prototype.trimstart@^1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/string.prototype.trimstart/-/string.prototype.trimstart-1.0.6.tgz#e90ab66aa8e4007d92ef591bbf3cd422c56bdcf4" + integrity sha512-omqjMDaY92pbn5HOX7f9IccLA+U1tA9GvtU4JrodiXFfYB7jPzzHpRzpglLAjtUV6bB557zwClJezTqnAiYnQA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + es-abstract "^1.20.4" + strip-ansi@^7.0.1: version "7.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.0.1.tgz#61740a08ce36b61e50e65653f07060d000975fb2" @@ -824,6 +1395,11 @@ strip-ansi@^7.0.1: dependencies: ansi-regex "^6.0.1" +strip-bom@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-3.0.0.tgz#2334c18e9c759f7bdd56fdef7e9ae3d588e68ed3" + integrity sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA== + strip-literal@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/strip-literal/-/strip-literal-1.0.1.tgz#0115a332710c849b4e46497891fb8d585e404bd2" @@ -831,6 +1407,13 @@ strip-literal@^1.0.0: dependencies: acorn "^8.8.2" +supports-color@^5.3.0: + version "5.5.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f" + integrity sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow== + dependencies: + has-flag "^3.0.0" + supports-preserve-symlinks-flag@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" @@ -882,6 +1465,15 @@ type-detect@^4.0.0, type-detect@^4.0.5: resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c" integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g== +typed-array-length@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/typed-array-length/-/typed-array-length-1.0.4.tgz#89d83785e5c4098bec72e08b319651f0eac9c1bb" + integrity sha512-KjZypGq+I/H7HI5HlOoGHkWUUGq+Q0TPhQurLbyrVrvnKTBgzLhIJ7j6J/XTQOi0d1RjyZ0wdas8bKs2p0x3Ng== + dependencies: + call-bind "^1.0.2" + for-each "^0.3.3" + is-typed-array "^1.1.9" + typescript@^4.9.5: version "4.9.5" resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.9.5.tgz#095979f9bcc0d09da324d58d03ce8f8374cbe65a" @@ -892,6 +1484,24 @@ ufo@^1.1.0: resolved "https://registry.yarnpkg.com/ufo/-/ufo-1.1.0.tgz#a5c4c814b0a98f7e0ca42c478688663fd3e3c037" integrity sha512-LQc2s/ZDMaCN3QLpa+uzHUOQ7SdV0qgv3VBXOolQGXTaaZpIur6PwUclF5nN2hNkiTRcUugXd1zFOW3FLJ135Q== +unbox-primitive@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/unbox-primitive/-/unbox-primitive-1.0.2.tgz#29032021057d5e6cdbd08c5129c226dff8ed6f9e" + integrity sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw== + dependencies: + call-bind "^1.0.2" + has-bigints "^1.0.2" + has-symbols "^1.0.3" + which-boxed-primitive "^1.0.2" + +validate-npm-package-license@^3.0.1: + version "3.0.4" + resolved "https://registry.yarnpkg.com/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz#fc91f6b9c7ba15c857f4cb2c5defeec39d4f410a" + integrity sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew== + dependencies: + spdx-correct "^3.0.0" + spdx-expression-parse "^3.0.0" + vite-node@0.28.5: version "0.28.5" resolved "https://registry.yarnpkg.com/vite-node/-/vite-node-0.28.5.tgz#56d0f78846ea40fddf2e28390899df52a4738006" @@ -948,6 +1558,36 @@ vitest@^0.28.5: vite-node "0.28.5" why-is-node-running "^2.2.2" +which-boxed-primitive@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6" + integrity sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg== + dependencies: + is-bigint "^1.0.1" + is-boolean-object "^1.1.0" + is-number-object "^1.0.4" + is-string "^1.0.5" + is-symbol "^1.0.3" + +which-typed-array@^1.1.9: + version "1.1.9" + resolved "https://registry.yarnpkg.com/which-typed-array/-/which-typed-array-1.1.9.tgz#307cf898025848cf995e795e8423c7f337efbde6" + integrity sha512-w9c4xkx6mPidwp7180ckYWfMmvxpjlZuIudNtDf4N/tTAUB8VJbX25qZoAsrtGuYNnGw3pa0AXgbGKRB8/EceA== + dependencies: + available-typed-arrays "^1.0.5" + call-bind "^1.0.2" + for-each "^0.3.3" + gopd "^1.0.1" + has-tostringtag "^1.0.0" + is-typed-array "^1.1.10" + +which@^1.2.9: + version "1.3.1" + resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a" + integrity sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ== + dependencies: + isexe "^2.0.0" + why-is-node-running@^2.2.2: version "2.2.2" resolved "https://registry.yarnpkg.com/why-is-node-running/-/why-is-node-running-2.2.2.tgz#4185b2b4699117819e7154594271e7e344c9973e" From 7c8cd78300e657a549854b3eeb984ea980c21055 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 8 Mar 2023 01:33:13 +0100 Subject: [PATCH 056/207] Run WASM build sequentially in CI for now --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index 1509f763..ed65474f 100644 --- a/js/package.json +++ b/js/package.json @@ -7,7 +7,7 @@ "build": "run-s build:*", "build:cleanup": "rm -rf dist/", "build:rank": "tsx scripts/inline_ranks.ts", - "build:wasm": "run-p wasm:*", + "build:wasm": "run-s wasm:*", "build:postprocess": "tsx scripts/post_process.ts", "wasm:bundler": "wasm-pack build --target bundler --release --out-dir dist && rm -rf dist/.gitignore dist/README.md dist/package.json", "wasm:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm -rf dist/node/.gitignore dist/node/README.md dist/node/package.json", From fcb52cf097d5509faea618fd1960d962b79937bf Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 8 Mar 2023 01:34:00 +0100 Subject: [PATCH 057/207] Rename TiktokenEmbedding to TiktokenEncoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes Rename TiktokenEmbedding → TiktokenEncoding? dqbd/tiktoken#4 --- js/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/js/src/lib.rs b/js/src/lib.rs index f5ff4e6f..9993d419 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -312,14 +312,14 @@ impl Tiktoken { #[cfg(feature = "inline")] #[wasm_bindgen(typescript_custom_section)] const _: &'static str = r#" -export type TiktokenEmbedding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; +export type TiktokenEncoding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; /** - * @param {TiktokenEmbedding} encoding + * @param {TiktokenEncoding} encoding * @param {Record} [extend_special_tokens] * @returns {Tiktoken} */ -export function get_encoding(encoding: TiktokenEmbedding, extend_special_tokens?: Record): Tiktoken; +export function get_encoding(encoding: TiktokenEncoding, extend_special_tokens?: Record): Tiktoken; "#; #[cfg(feature = "inline")] From 945d4f2124cb38c16395646e1da9e66d466c79d6 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 8 Mar 2023 01:38:31 +0100 Subject: [PATCH 058/207] Reverse order of default --- js/scripts/post_process.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 74070143..4eec3542 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -129,8 +129,8 @@ export function init( pkg["exports"] = { ".": { types: "./tiktoken.d.ts", - default: "./tiktoken.js", node: "./tiktoken.node.js", + default: "./tiktoken.js", }, "./bundler": { types: "./bundler.d.ts", From 680fbc5562157bb09e77c907c52bf6aae6f420f1 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 8 Mar 2023 01:49:26 +0100 Subject: [PATCH 059/207] Update README.md --- js/README.md | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/js/README.md b/js/README.md index 85cbda3b..c757851d 100644 --- a/js/README.md +++ b/js/README.md @@ -31,6 +31,9 @@ const enc = encoding_for_model("gpt2", { "<|im_start|>": 100264, "<|im_end|>": 100265, }); + +// don't forget to free the encoder after it is not used +enc.free(); ``` If desired, you can create a Tiktoken instance directly with custom ranks, special tokens and regex pattern: @@ -50,15 +53,15 @@ const encoder = new Tiktoken( As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. -| Runtime | Status | Notes | -| ------------------- | ------ | ------------------------------- | -| Node.js | ✅ | | -| Bun | ✅ | | -| Vite | ✅ | See [here](#vite) for notes | -| Next.js | ✅ | See [here](#nextjs) for caveats | -| Vercel Edge Runtime | 🚧 | Work in progress | -| Cloudflare Workers | 🚧 | Untested | -| Deno | ❌ | Currently unsupported | +| Runtime | Status | Notes | +| ------------------- | ------ | ------------------------------------------ | +| Node.js | ✅ | | +| Bun | ✅ | | +| Vite | ✅ | See [here](#vite) for notes | +| Next.js | ✅ | See [here](#nextjs) for notes | +| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | +| Cloudflare Workers | 🚧 | Untested | +| Deno | ❌ | Currently unsupported | ### [Vite](#vite) @@ -105,6 +108,27 @@ const config = { }; ``` +### [Vercel Edge Runtime](#vercel-edge-runtime) + +Vercel Edge Runtime does support WASM modules by adding a `?module` suffix. Initialize the encoder with the following snippet: + +```typescript +import wasm from "@dqbd/tiktoken/tiktoken_bg.wasm?module"; +import { init, get_encoding } from "@dqbd/tiktoken/init"; + +export const config = { runtime: "edge" }; + +export default async function (req: Request) { + await init((imports) => WebAssembly.instantiate(wasm, imports)); + + const encoder = get_encoding("cl100k_base"); + const tokens = encoder.encode("hello world"); + encoder.free(); + + return new Response(`${encoder.encode("hello world")}`); +} +``` + ## Acknowledgements - https://github.com/zurawiki/tiktoken-rs From c1d11fb1912d110f4829ed019daab6cbdba04f34 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 8 Mar 2023 01:59:52 +0100 Subject: [PATCH 060/207] Add gpt-3.5-turbo support in types and matchers --- js/src/lib.rs | 6 +++++- js/test/test_simple_public.test.ts | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/js/src/lib.rs b/js/src/lib.rs index 9993d419..dbbc883a 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -366,7 +366,9 @@ export type TiktokenModel = | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" - | "gpt2"; + | "gpt2" + | "gpt-3.5-turbo" + | "gpt-3.5-turbo-0301"; /** * @param {TiktokenModel} encoding @@ -413,6 +415,8 @@ pub fn encoding_for_model( "code-search-babbage-code-001" => Ok("r50k_base"), "code-search-ada-code-001" => Ok("r50k_base"), "gpt2" => Ok("gpt2"), + "gpt-3.5-turbo" => Ok("cl100k_base"), + "gpt-3.5-turbo-0301" => Ok("cl100k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), diff --git a/js/test/test_simple_public.test.ts b/js/test/test_simple_public.test.ts index 1c63d5cf..85dcd52a 100644 --- a/js/test/test_simple_public.test.ts +++ b/js/test/test_simple_public.test.ts @@ -81,6 +81,7 @@ it("test_simple", () => { it("test_encoding_for_model", () => { expect(encoding_for_model("gpt2").name).toEqual("gpt2"); expect(encoding_for_model("text-davinci-003").name).toEqual("p50k_base"); + expect(encoding_for_model("gpt-3.5-turbo").name).toEqual("cl100k_base"); }); it("test_custom_tokens", () => { From 268bc5c7ee3a995aebd00d9389391a208b9e8d3c Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 8 Mar 2023 02:03:47 +0100 Subject: [PATCH 061/207] Add README.md --- js/scripts/post_process.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 4eec3542..83c0a4ad 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -113,6 +113,7 @@ export function init( ); } +// package.json, README.md { const pkg = JSON.parse( fs.readFileSync(path.resolve(__dirname, "../package.json"), { @@ -151,4 +152,9 @@ export function init( JSON.stringify(pkg, null, 2), { encoding: "utf-8" } ); + + fs.copyFileSync( + path.resolve(__dirname, "../README.md"), + path.resolve(__dirname, "../dist/README.md") + ); } From 15dd0f254ed32c44bd6b48331214c58513ba9f1c Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 10 Mar 2023 20:00:21 +0100 Subject: [PATCH 062/207] Add caveats for CFW --- js/README.md | 49 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/js/README.md b/js/README.md index c757851d..aa4818b8 100644 --- a/js/README.md +++ b/js/README.md @@ -53,15 +53,15 @@ const encoder = new Tiktoken( As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. -| Runtime | Status | Notes | -| ------------------- | ------ | ------------------------------------------ | -| Node.js | ✅ | | -| Bun | ✅ | | -| Vite | ✅ | See [here](#vite) for notes | -| Next.js | ✅ | See [here](#nextjs) for notes | -| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | -| Cloudflare Workers | 🚧 | Untested | -| Deno | ❌ | Currently unsupported | +| Runtime | Status | Notes | +| ------------------- | ------ | ------------------------------------------- | +| Node.js | ✅ | | +| Bun | ✅ | | +| Vite | ✅ | See [here](#vite) for notes | +| Next.js | ✅ | See [here](#nextjs) for notes | +| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | +| Cloudflare Workers | 🚧 | See [here](#cloudflare-workers) for caveats | +| Deno | ❌ | Currently unsupported | ### [Vite](#vite) @@ -129,6 +129,37 @@ export default async function (req: Request) { } ``` +### [Cloudflare Workers](#cloudflare-workers) + +Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually. However, users need to point directly at the WASM binary, including `node_modules` prefix in some cases. + +Add the following rule to the `wrangler.toml` to upload WASM during build: + +```toml +[[rules]] +globs = ["**/*.wasm"] +type = "CompiledWasm" +``` + +Initialize the encoder with the following snippet: + +```javascript +import wasm from "./node_modules/@dqbd/tiktoken/tiktoken_bg.wasm"; +import { get_encoding, init } from "@dqbd/tiktoken/init"; + +export default { + async fetch() { + await init((imports) => WebAssembly.instantiate(wasm, imports)); + const encoder = get_encoder("cl100k_base"); + const tokens = encoder.encode("hello world"); + encoder.free(); + return new Response(`${tokens}`); + }, +}; +``` + +```typescript ## Acknowledgements - https://github.com/zurawiki/tiktoken-rs +``` From 2c7e0e43a69522f99eb43433fd957aad3e48272b Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 11 Mar 2023 02:06:57 +0100 Subject: [PATCH 063/207] Create a lite build which defers loading of weights to consumers --- js/package.json | 2 +- js/scripts/post_process.ts | 143 +++++++++++++++++++++++---------- js/scripts/tsconfig.json | 11 +++ js/src/utils/download_ranks.ts | 108 ------------------------- js/tsconfig.json | 4 +- 5 files changed, 116 insertions(+), 152 deletions(-) create mode 100644 js/scripts/tsconfig.json delete mode 100644 js/src/utils/download_ranks.ts diff --git a/js/package.json b/js/package.json index ed65474f..1b056de0 100644 --- a/js/package.json +++ b/js/package.json @@ -10,7 +10,7 @@ "build:wasm": "run-s wasm:*", "build:postprocess": "tsx scripts/post_process.ts", "wasm:bundler": "wasm-pack build --target bundler --release --out-dir dist && rm -rf dist/.gitignore dist/README.md dist/package.json", - "wasm:node": "wasm-pack build --target nodejs --release --out-dir dist/node && rm -rf dist/node/.gitignore dist/node/README.md dist/node/package.json", + "wasm:lite": "wasm-pack build --target bundler --release --out-dir dist/lite --no-default-features && rm -rf dist/lite/.gitignore dist/lite/README.md dist/lite/package.json", "test": "yarn vitest" }, "repository": { diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 83c0a4ad..82e8df2b 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -1,4 +1,4 @@ -import { Project, ts } from "ts-morph"; +import { Project, StructureKind, ts } from "ts-morph"; import * as fs from "node:fs"; import * as path from "node:path"; @@ -6,7 +6,7 @@ const project = new Project(); project.addSourceFilesAtPaths(["./dist/**/*.ts", "./dist/**/*.js"]); // make sure the types are correct -for (const filename of ["./dist/tiktoken.d.ts", "./dist/node/tiktoken.d.ts"]) { +for (const filename of ["./dist/tiktoken.d.ts", "./dist/lite/tiktoken.d.ts"]) { const sourceFile = project.getSourceFileOrThrow(filename); const cls = sourceFile.getFirstDescendantByKindOrThrow( ts.SyntaxKind.ClassDeclaration @@ -36,43 +36,86 @@ for (const filename of ["./dist/tiktoken.d.ts", "./dist/node/tiktoken.d.ts"]) { sourceFile.saveSync(); } -// bundler -{ - fs.writeFileSync( - path.resolve(__dirname, "../dist/bundler.js"), - `export * from "./tiktoken"; `.trim(), - { encoding: "utf-8" } - ); +for (const filename of [ + "./dist/tiktoken_bg.js", + "./dist/lite/tiktoken_bg.js", +]) { + const targetFileName = filename.replace("_bg", ".node"); + const sourceFile = project.getSourceFileOrThrow(filename); - fs.writeFileSync( - path.resolve(__dirname, "../dist/bundler.d.ts"), - `export * from "./tiktoken"; `.trim(), - { encoding: "utf-8" } - ); -} + sourceFile.insertStatements(0, [ + `let imports = {};`, + `imports["./tiktoken_bg.js"] = module.exports;`, + ]); + + for (const cls of sourceFile.getClasses().filter((x) => x.isExported())) { + cls.set({ + ...cls.getStructure(), + kind: StructureKind.Class, + isExported: false, + }); + + sourceFile.insertStatements(cls.getChildIndex() + 1, [ + `module.exports.${cls.getName()} = ${cls.getName()};`, + ]); + } -// node -{ - const options = { encoding: "utf-8" } as const; - fs.writeFileSync( - path.resolve(__dirname, "../dist/tiktoken.node.js"), - fs - .readFileSync( - path.resolve(__dirname, "../dist/node/tiktoken.js"), - options - ) - .replaceAll("__wbindgen_placeholder__", `./tiktoken_bg.js`), - options - ); + for (const fn of sourceFile.getFunctions().filter((f) => f.isExported())) { + fn.set({ + ...fn.getStructure(), + kind: StructureKind.Function, + isExported: false, + }); + + sourceFile.insertStatements(fn.getChildIndex(), [ + `module.exports.${fn.getName()} = ${fn.getText()};`, + ]); + + sourceFile + .getDescendantsOfKind(ts.SyntaxKind.FunctionExpression) + .filter((x) => x.getName() === fn.getName()) + .forEach((f) => f.removeName()); + + fn.remove(); + } - fs.rmSync(path.resolve(__dirname, "../dist/node"), { recursive: true }); + sourceFile.addStatements([ + `const path = require("path").join(__dirname, "tiktoken_bg.wasm");`, + `const bytes = require("fs").readFileSync(path);`, + + `const wasmModule = new WebAssembly.Module(bytes);`, + `const wasmInstance = new WebAssembly.Instance(wasmModule, imports);`, + `wasm = wasmInstance.exports;`, + `module.exports.__wasm = wasm;`, + ]); + + sourceFile.copy(targetFileName, { overwrite: true }).saveSync(); } -// package.json -{ - fs.writeFileSync( - path.resolve(__dirname, "../dist/init.js"), - ` +for (const targetFile of [ + path.resolve(__dirname, "../dist"), + path.resolve(__dirname, "../dist/lite"), +]) { + // bundler + { + fs.writeFileSync( + path.resolve(targetFile, "bundler.js"), + `export * from "./tiktoken";`.trim(), + { encoding: "utf-8" } + ); + + fs.writeFileSync( + path.resolve(targetFile, "bundler.d.ts"), + `export * from "./tiktoken";`.trim(), + { encoding: "utf-8" } + ); + } + + // init.js + { + fs.writeFileSync( + path.resolve(targetFile, "init.js"), + ` import * as imports from "./tiktoken_bg.js"; export async function init(cb) { @@ -94,12 +137,12 @@ export async function init(cb) { export * from "./tiktoken_bg.js"; `.trim(), - { encoding: "utf-8" } - ); + { encoding: "utf-8" } + ); - fs.writeFileSync( - path.resolve(__dirname, "../dist/init.d.ts"), - ` + fs.writeFileSync( + path.resolve(targetFile, "init.d.ts"), + ` /* tslint:disable */ /* eslint-disable */ export * from "./tiktoken"; @@ -109,8 +152,9 @@ export function init( ) => Promise ): Promise; `.trim(), - { encoding: "utf-8" } - ); + { encoding: "utf-8" } + ); + } } // package.json, README.md @@ -145,6 +189,23 @@ export function init( types: "./tiktoken_bg.wasm.d.ts", default: "./tiktoken_bg.wasm", }, + "./lite": { + types: "./lite/tiktoken.d.ts", + node: "./lite/tiktoken.node.js", + default: "./lite/tiktoken.js", + }, + "./lite/bundler": { + types: "./lite/bundler.d.ts", + default: "./lite/bundler.js", + }, + "./lite/init": { + types: "./lite/init.d.ts", + default: "./lite/init.js", + }, + "./lite/tiktoken_bg.wasm": { + types: "./lite/tiktoken_bg.wasm.d.ts", + default: "./lite/tiktoken_bg.wasm", + }, }; fs.writeFileSync( diff --git a/js/scripts/tsconfig.json b/js/scripts/tsconfig.json new file mode 100644 index 00000000..55681af1 --- /dev/null +++ b/js/scripts/tsconfig.json @@ -0,0 +1,11 @@ +{ + "compilerOptions": { + "target": "ES2022", + "moduleResolution": "node", + "strict": true, + "declaration": true, + "allowSyntheticDefaultImports": true + }, + "include": ["./**/*.ts"], + "exclude": ["node_modules"] +} diff --git a/js/src/utils/download_ranks.ts b/js/src/utils/download_ranks.ts deleted file mode 100644 index 70db0dde..00000000 --- a/js/src/utils/download_ranks.ts +++ /dev/null @@ -1,108 +0,0 @@ -function assert(condition: unknown, message?: string): asserts condition { - if (!condition) { - throw new Error(message); - } -} - -// printable ascii characters according to python -function is_printable(u: number): boolean { - return !(u <= 31 || (u >= 127 && u <= 160) || u == 173); -} - -export function data_gym_to_mergeable_bpe_ranks( - vocal_bpe_contents: string, - encoder_json_contents: string -) { - const rank_to_intbyte = Array.from({ length: 2 ** 8 }, (_, i) => i).filter( - (i) => is_printable(i) && String.fromCharCode(i) !== " " - ); - - const data_gym_byte_to_byte = rank_to_intbyte.reduce>( - (memo, item) => { - memo[String.fromCharCode(item)] = item; - return memo; - }, - {} - ); - - let n = 0; - for (let b = 0; b < 2 ** 8; b++) { - if (!rank_to_intbyte.includes(b)) { - rank_to_intbyte.push(b); - data_gym_byte_to_byte[String.fromCharCode(2 ** 8 + n)] = b; - n += 1; - } - } - - assert( - rank_to_intbyte.length === 2 ** 8, - "rank_to_intbyte.length must be 2**8" - ); - - // vocab_bpe contains the merges along with associated ranks - const bpe_merges = vocal_bpe_contents - .split("\n") - .slice(1, -1) - .map((merge_str) => merge_str.split(" ")); - - function decode_data_gym(value: string) { - return value.split("").map((b) => data_gym_byte_to_byte[b]); - } - - // add the single byte tokens - const bpe_ranks = Object.fromEntries(rank_to_intbyte.map((b, i) => [b, i])); - - // add the merged tokens - n = rank_to_intbyte.length; - for (const [first, second] of bpe_merges) { - bpe_ranks[ - [...decode_data_gym(first), ...decode_data_gym(second)].join(",") - ] = n; - n += 1; - } - - // check that the encoder file matches the merges file - // this sanity check is important since tiktoken assumes that ranks are ordered the same - // as merge priority - const encoder_json: Record = JSON.parse( - encoder_json_contents - ); - - const encoder_json_loaded = Object.fromEntries( - Object.entries(encoder_json).map(([k, v]) => [ - decode_data_gym(k).join(","), - v, - ]) - ); - - // drop these two special tokens if present, since they're not mergeable bpe tokens - delete encoder_json_loaded[decode_data_gym("<|endoftext|>").join(",")]; - delete encoder_json_loaded[decode_data_gym("<|startoftext|>").join(",")]; - - function normalize_map(items: Record) { - return JSON.stringify( - Object.keys(items) - .sort() - .map((key) => [key, items[key]]) - ); - } - - assert(normalize_map(bpe_ranks) === normalize_map(encoder_json_loaded)); - return dump_tiktoken_bpe(bpe_ranks); -} - -export function dump_tiktoken_bpe(bpe_ranks: Record) { - return ( - Object.entries(bpe_ranks) - .sort((a, b) => a[1] - b[1]) - .map(([token_str, rank]) => - [ - Buffer.from( - token_str.split(",").map((i) => Number.parseInt(i, 10)) - ).toString("base64"), - rank, - ].join(" ") - ) - .join("\n") + "\n" - ); -} diff --git a/js/tsconfig.json b/js/tsconfig.json index 3c5ff0ae..cef970e8 100644 --- a/js/tsconfig.json +++ b/js/tsconfig.json @@ -7,8 +7,8 @@ "strict": true, "declaration": true, "outDir": "./dist", - "allowSyntheticDefaultImports": true, + "allowSyntheticDefaultImports": true }, - "include": ["./**/*.ts", "./**/*.js"], + "include": ["./src/**/*.ts"], "exclude": ["node_modules", "dist"] } From 586d205b4fa5f6561e0e3580425a94ad7e11c62e Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 11 Mar 2023 02:20:35 +0100 Subject: [PATCH 064/207] Fix README.md --- js/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/js/README.md b/js/README.md index aa4818b8..4f83ef09 100644 --- a/js/README.md +++ b/js/README.md @@ -158,8 +158,7 @@ export default { }; ``` -```typescript ## Acknowledgements - https://github.com/zurawiki/tiktoken-rs -``` + From 6d8c1dc792484c70dbbdf3bb66a3415300ff84bb Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 11 Mar 2023 12:39:01 +0100 Subject: [PATCH 065/207] Cleanup --- js/scripts/inline_ranks.ts | 90 ++++++++------ js/scripts/post_process.ts | 232 ++++++++++++++++--------------------- js/src/init.ts | 22 ++++ 3 files changed, 177 insertions(+), 167 deletions(-) create mode 100644 js/src/init.ts diff --git a/js/scripts/inline_ranks.ts b/js/scripts/inline_ranks.ts index 6e72a2f8..13e0513c 100644 --- a/js/scripts/inline_ranks.ts +++ b/js/scripts/inline_ranks.ts @@ -1,9 +1,8 @@ -import assert from "node:assert"; import fs from "node:fs/promises"; import path from "node:path"; -// printable ascii characters according to python function is_printable(u: number): boolean { + // printable ascii characters according to python return !(u <= 31 || (u >= 127 && u <= 160) || u == 173); } @@ -32,10 +31,9 @@ function data_gym_to_mergeable_bpe_ranks( } } - assert( - rank_to_intbyte.length === 2 ** 8, - "rank_to_intbyte.length must be 2**8" - ); + if (rank_to_intbyte.length !== 2 ** 8) { + throw new Error("rank_to_intbyte.length must be 2**8"); + } // vocab_bpe contains the merges along with associated ranks const bpe_merges = vocal_bpe_contents @@ -85,21 +83,11 @@ function data_gym_to_mergeable_bpe_ranks( ); } - assert(normalize_map(bpe_ranks) === normalize_map(encoder_json_loaded)); - return bpe_ranks; -} + if (normalize_map(bpe_ranks) !== normalize_map(encoder_json_loaded)) { + throw new Error("bpe_ranks !== encoder_json_loaded"); + } -function load_tiktoken_bpe(tiktoken_bpe_file: string) { - return Object.fromEntries( - tiktoken_bpe_file - .split("\n") - .map((line) => line.trim() && line.split(" ")) - .filter((x): x is Array => !!x && Array.isArray(x)) - .map(([token, rank]) => [ - Buffer.from(token, "base64").join(","), - Number.parseInt(rank, 10), - ]) - ); + return bpe_ranks; } function dump_tiktoken_bpe(bpe_ranks: Record) { @@ -118,8 +106,48 @@ function dump_tiktoken_bpe(bpe_ranks: Record) { ); } -async function requestText(url: string) { - return await fetch(url).then((a) => a.text()); +export async function load( + registry: ( + | { load_tiktoken_bpe: string } + | { + data_gym_to_mergeable_bpe_ranks: { + vocab_bpe_file: string; + encoder_json_file: string; + }; + } + ) & { + explicit_n_vocab: number; + pat_str: string; + special_tokens: Record; + }, + customFetch?: (url: string) => Promise +) { + const ofetch = customFetch + ? customFetch + : (url: string) => fetch(url).then((r) => r.text()); + + if ("data_gym_to_mergeable_bpe_ranks" in registry) { + const [vocab_bpe, encoder_json] = await Promise.all([ + ofetch(registry.data_gym_to_mergeable_bpe_ranks.vocab_bpe_file), + ofetch(registry.data_gym_to_mergeable_bpe_ranks.encoder_json_file), + ]); + + return { + explicit_n_vocab: registry.explicit_n_vocab, + pat_str: registry.pat_str, + special_tokens: registry.special_tokens, + bpe_ranks: dump_tiktoken_bpe( + data_gym_to_mergeable_bpe_ranks(vocab_bpe, encoder_json) + ), + }; + } else { + return { + explicit_n_vocab: registry.explicit_n_vocab, + pat_str: registry.pat_str, + special_tokens: registry.special_tokens, + bpe_ranks: await ofetch(registry.load_tiktoken_bpe), + }; + } } async function main() { @@ -144,22 +172,8 @@ async function main() { continue; } catch {} - let ranks: Record | null = null; - - if (data.data_gym_to_mergeable_bpe_ranks) { - ranks = data_gym_to_mergeable_bpe_ranks( - await requestText(data.data_gym_to_mergeable_bpe_ranks.vocab_bpe_file), - await requestText( - data.data_gym_to_mergeable_bpe_ranks.encoder_json_file - ) - ); - } else if (data.load_tiktoken_bpe) { - ranks = load_tiktoken_bpe(await requestText(data.load_tiktoken_bpe)); - } - - if (ranks != null) { - await fs.writeFile(targetFile, dump_tiktoken_bpe(ranks)); - } + const result = await load(data); + await fs.writeFile(targetFile, result.bpe_ranks, { encoding: "utf-8" }); } } diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 82e8df2b..01552187 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -1,157 +1,131 @@ -import { Project, StructureKind, ts } from "ts-morph"; +import { Project, ScriptTarget, StructureKind, ts } from "ts-morph"; import * as fs from "node:fs"; import * as path from "node:path"; -const project = new Project(); -project.addSourceFilesAtPaths(["./dist/**/*.ts", "./dist/**/*.js"]); - -// make sure the types are correct -for (const filename of ["./dist/tiktoken.d.ts", "./dist/lite/tiktoken.d.ts"]) { - const sourceFile = project.getSourceFileOrThrow(filename); - const cls = sourceFile.getFirstDescendantByKindOrThrow( - ts.SyntaxKind.ClassDeclaration - ); - - cls - .getConstructors()[0] - .getParameterOrThrow("special_tokens") - .set({ type: "Record" }); +for (const baseDir of [ + path.resolve(__dirname, "../dist"), + path.resolve(__dirname, "../dist/lite"), +]) { + // fix `any` types + { + const sourceFile = new Project().addSourceFileAtPath( + path.resolve(baseDir, "tiktoken.d.ts") + ); + const cls = sourceFile.getFirstDescendantByKindOrThrow( + ts.SyntaxKind.ClassDeclaration + ); - for (const method of ["encode", "encode_with_unstable"]) { cls - .getMethodOrThrow(method) - .getParameterOrThrow("allowed_special") - .set({ type: `"all" | string[]`, hasQuestionToken: true }); + .getConstructors()[0] + .getParameterOrThrow("special_tokens") + .set({ type: "Record" }); + + for (const method of ["encode", "encode_with_unstable"]) { + cls + .getMethodOrThrow(method) + .getParameterOrThrow("allowed_special") + .set({ type: `"all" | string[]`, hasQuestionToken: true }); + + cls + .getMethodOrThrow(method) + .getParameterOrThrow("disallowed_special") + .set({ type: `"all" | string[]`, hasQuestionToken: true }); + } cls - .getMethodOrThrow(method) - .getParameterOrThrow("disallowed_special") - .set({ type: `"all" | string[]`, hasQuestionToken: true }); - } - - cls - .getMemberOrThrow("token_byte_values") - .set({ returnType: "Array>" }); + .getMemberOrThrow("token_byte_values") + .set({ returnType: "Array>" }); - sourceFile.saveSync(); -} - -for (const filename of [ - "./dist/tiktoken_bg.js", - "./dist/lite/tiktoken_bg.js", -]) { - const targetFileName = filename.replace("_bg", ".node"); - const sourceFile = project.getSourceFileOrThrow(filename); - - sourceFile.insertStatements(0, [ - `let imports = {};`, - `imports["./tiktoken_bg.js"] = module.exports;`, - ]); - - for (const cls of sourceFile.getClasses().filter((x) => x.isExported())) { - cls.set({ - ...cls.getStructure(), - kind: StructureKind.Class, - isExported: false, - }); - - sourceFile.insertStatements(cls.getChildIndex() + 1, [ - `module.exports.${cls.getName()} = ${cls.getName()};`, - ]); + sourceFile.saveSync(); } - for (const fn of sourceFile.getFunctions().filter((f) => f.isExported())) { - fn.set({ - ...fn.getStructure(), - kind: StructureKind.Function, - isExported: false, - }); + // tiktoken.node.js + { + const sourceFile = new Project().addSourceFileAtPath( + path.resolve(baseDir, "tiktoken_bg.js") + ); + sourceFile.insertStatements(0, [ + `let imports = {};`, + `imports["./tiktoken_bg.js"] = module.exports;`, + ]); - sourceFile.insertStatements(fn.getChildIndex(), [ - `module.exports.${fn.getName()} = ${fn.getText()};`, + for (const cls of sourceFile.getClasses().filter((x) => x.isExported())) { + cls.set({ + ...cls.getStructure(), + kind: StructureKind.Class, + isExported: false, + }); + + sourceFile.insertStatements(cls.getChildIndex() + 1, [ + `module.exports.${cls.getName()} = ${cls.getName()};`, + ]); + } + + for (const fn of sourceFile.getFunctions().filter((f) => f.isExported())) { + fn.set({ + ...fn.getStructure(), + kind: StructureKind.Function, + isExported: false, + }); + + sourceFile.insertStatements(fn.getChildIndex(), [ + `module.exports.${fn.getName()} = ${fn.getText()};`, + ]); + + sourceFile + .getDescendantsOfKind(ts.SyntaxKind.FunctionExpression) + .filter((x) => x.getName() === fn.getName()) + .forEach((f) => f.removeName()); + + fn.remove(); + } + + sourceFile.addStatements([ + `const path = require("path").join(__dirname, "tiktoken_bg.wasm");`, + `const bytes = require("fs").readFileSync(path);`, + `const wasmModule = new WebAssembly.Module(bytes);`, + `const wasmInstance = new WebAssembly.Instance(wasmModule, imports);`, + `wasm = wasmInstance.exports;`, + `module.exports.__wasm = wasm;`, ]); sourceFile - .getDescendantsOfKind(ts.SyntaxKind.FunctionExpression) - .filter((x) => x.getName() === fn.getName()) - .forEach((f) => f.removeName()); - - fn.remove(); + .copy(path.resolve(baseDir, "tiktoken.node.js"), { overwrite: true }) + .saveSync(); } - sourceFile.addStatements([ - `const path = require("path").join(__dirname, "tiktoken_bg.wasm");`, - `const bytes = require("fs").readFileSync(path);`, - - `const wasmModule = new WebAssembly.Module(bytes);`, - `const wasmInstance = new WebAssembly.Instance(wasmModule, imports);`, - `wasm = wasmInstance.exports;`, - `module.exports.__wasm = wasm;`, - ]); - - sourceFile.copy(targetFileName, { overwrite: true }).saveSync(); -} - -for (const targetFile of [ - path.resolve(__dirname, "../dist"), - path.resolve(__dirname, "../dist/lite"), -]) { - // bundler + // init.js { - fs.writeFileSync( - path.resolve(targetFile, "bundler.js"), - `export * from "./tiktoken";`.trim(), - { encoding: "utf-8" } - ); - - fs.writeFileSync( - path.resolve(targetFile, "bundler.d.ts"), - `export * from "./tiktoken";`.trim(), - { encoding: "utf-8" } - ); + const sourceFile = new Project({ + compilerOptions: { + target: ScriptTarget.ES2022, + moduleResolution: ts.ModuleResolutionKind.NodeJs, + strict: true, + declaration: true, + }, + }).addSourceFileAtPath("./src/init.ts"); + + const emitOutput = sourceFile.getEmitOutput(); + for (const file of emitOutput.getOutputFiles()) { + fs.writeFileSync( + path.resolve(baseDir, path.basename(file.getFilePath())), + file.getText(), + { encoding: "utf-8" } + ); + } } - // init.js + // bundler.js { fs.writeFileSync( - path.resolve(targetFile, "init.js"), - ` -import * as imports from "./tiktoken_bg.js"; - -export async function init(cb) { - const res = await cb({ - "./tiktoken_bg.js": imports, - }); - - const instance = - "instance" in res && res.instance instanceof WebAssembly.Instance - ? res.instance - : res instanceof WebAssembly.Instance - ? res - : null; - - if (instance == null) throw new Error("Missing instance"); - imports.__wbg_set_wasm(instance.exports); - return imports; -} - -export * from "./tiktoken_bg.js"; - `.trim(), + path.resolve(baseDir, "bundler.js"), + `export * from "./tiktoken";`.trim(), { encoding: "utf-8" } ); fs.writeFileSync( - path.resolve(targetFile, "init.d.ts"), - ` -/* tslint:disable */ -/* eslint-disable */ -export * from "./tiktoken"; -export function init( - callback: ( - imports: WebAssembly.Imports - ) => Promise -): Promise; - `.trim(), + path.resolve(baseDir, "bundler.d.ts"), + `export * from "./tiktoken";`.trim(), { encoding: "utf-8" } ); } diff --git a/js/src/init.ts b/js/src/init.ts new file mode 100644 index 00000000..62940863 --- /dev/null +++ b/js/src/init.ts @@ -0,0 +1,22 @@ +// @ts-expect-error +import * as imports from "./tiktoken_bg.js"; + +export async function init( + callback: ( + imports: WebAssembly.Imports + ) => Promise +): Promise { + const result = await callback({ "./tiktoken_bg.js": imports }); + const instance = + "instance" in result && result.instance instanceof WebAssembly.Instance + ? result.instance + : result instanceof WebAssembly.Instance + ? result + : null; + if (instance == null) throw new Error("Missing instance"); + imports.__wbg_set_wasm(instance.exports); + return imports; +} + +// @ts-expect-error +export * from "./tiktoken.js"; From 64178e567855f72983b9ff37ebd366a2a20f5647 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 11 Mar 2023 17:11:06 +0100 Subject: [PATCH 066/207] Expose loading script --- js/package.json | 2 +- js/scripts/inline_ranks.ts | 150 +----------------------------------- js/scripts/post_process.ts | 64 ++++++++++++++-- js/src/init.ts | 4 +- js/src/load.ts | 151 +++++++++++++++++++++++++++++++++++++ 5 files changed, 213 insertions(+), 158 deletions(-) create mode 100644 js/src/load.ts diff --git a/js/package.json b/js/package.json index 1b056de0..1947d87d 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.0-alpha.1", + "version": "1.0.0-alpha.2", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { diff --git a/js/scripts/inline_ranks.ts b/js/scripts/inline_ranks.ts index 13e0513c..993d59ff 100644 --- a/js/scripts/inline_ranks.ts +++ b/js/scripts/inline_ranks.ts @@ -1,154 +1,6 @@ import fs from "node:fs/promises"; import path from "node:path"; - -function is_printable(u: number): boolean { - // printable ascii characters according to python - return !(u <= 31 || (u >= 127 && u <= 160) || u == 173); -} - -function data_gym_to_mergeable_bpe_ranks( - vocal_bpe_contents: string, - encoder_json_contents: string -) { - const rank_to_intbyte = Array.from({ length: 2 ** 8 }, (_, i) => i).filter( - (i) => is_printable(i) && String.fromCharCode(i) !== " " - ); - - const data_gym_byte_to_byte = rank_to_intbyte.reduce>( - (memo, item) => { - memo[String.fromCharCode(item)] = item; - return memo; - }, - {} - ); - - let n = 0; - for (let b = 0; b < 2 ** 8; b++) { - if (!rank_to_intbyte.includes(b)) { - rank_to_intbyte.push(b); - data_gym_byte_to_byte[String.fromCharCode(2 ** 8 + n)] = b; - n += 1; - } - } - - if (rank_to_intbyte.length !== 2 ** 8) { - throw new Error("rank_to_intbyte.length must be 2**8"); - } - - // vocab_bpe contains the merges along with associated ranks - const bpe_merges = vocal_bpe_contents - .split("\n") - .slice(1, -1) - .map((merge_str) => merge_str.split(" ")); - - function decode_data_gym(value: string) { - return value.split("").map((b) => data_gym_byte_to_byte[b]); - } - - // add the single byte tokens - const bpe_ranks = Object.fromEntries(rank_to_intbyte.map((b, i) => [b, i])); - - // add the merged tokens - n = rank_to_intbyte.length; - for (const [first, second] of bpe_merges) { - bpe_ranks[ - [...decode_data_gym(first), ...decode_data_gym(second)].join(",") - ] = n; - n += 1; - } - - // check that the encoder file matches the merges file - // this sanity check is important since tiktoken assumes that ranks are ordered the same - // as merge priority - const encoder_json: Record = JSON.parse( - encoder_json_contents - ); - - const encoder_json_loaded = Object.fromEntries( - Object.entries(encoder_json).map(([k, v]) => [ - decode_data_gym(k).join(","), - v, - ]) - ); - - // drop these two special tokens if present, since they're not mergeable bpe tokens - delete encoder_json_loaded[decode_data_gym("<|endoftext|>").join(",")]; - delete encoder_json_loaded[decode_data_gym("<|startoftext|>").join(",")]; - - function normalize_map(items: Record) { - return JSON.stringify( - Object.keys(items) - .sort() - .map((key) => [key, items[key]]) - ); - } - - if (normalize_map(bpe_ranks) !== normalize_map(encoder_json_loaded)) { - throw new Error("bpe_ranks !== encoder_json_loaded"); - } - - return bpe_ranks; -} - -function dump_tiktoken_bpe(bpe_ranks: Record) { - return ( - Object.entries(bpe_ranks) - .sort((a, b) => a[1] - b[1]) - .map(([token_str, rank]) => - [ - Buffer.from( - token_str.split(",").map((i) => Number.parseInt(i, 10)) - ).toString("base64"), - rank, - ].join(" ") - ) - .join("\n") + "\n" - ); -} - -export async function load( - registry: ( - | { load_tiktoken_bpe: string } - | { - data_gym_to_mergeable_bpe_ranks: { - vocab_bpe_file: string; - encoder_json_file: string; - }; - } - ) & { - explicit_n_vocab: number; - pat_str: string; - special_tokens: Record; - }, - customFetch?: (url: string) => Promise -) { - const ofetch = customFetch - ? customFetch - : (url: string) => fetch(url).then((r) => r.text()); - - if ("data_gym_to_mergeable_bpe_ranks" in registry) { - const [vocab_bpe, encoder_json] = await Promise.all([ - ofetch(registry.data_gym_to_mergeable_bpe_ranks.vocab_bpe_file), - ofetch(registry.data_gym_to_mergeable_bpe_ranks.encoder_json_file), - ]); - - return { - explicit_n_vocab: registry.explicit_n_vocab, - pat_str: registry.pat_str, - special_tokens: registry.special_tokens, - bpe_ranks: dump_tiktoken_bpe( - data_gym_to_mergeable_bpe_ranks(vocab_bpe, encoder_json) - ), - }; - } else { - return { - explicit_n_vocab: registry.explicit_n_vocab, - pat_str: registry.pat_str, - special_tokens: registry.special_tokens, - bpe_ranks: await ofetch(registry.load_tiktoken_bpe), - }; - } -} +import { load } from "../src/load"; async function main() { try { diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 01552187..b36afd0f 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -39,7 +39,7 @@ for (const baseDir of [ sourceFile.saveSync(); } - // tiktoken.node.js + // tiktoken.cjs { const sourceFile = new Project().addSourceFileAtPath( path.resolve(baseDir, "tiktoken_bg.js") @@ -90,7 +90,7 @@ for (const baseDir of [ ]); sourceFile - .copy(path.resolve(baseDir, "tiktoken.node.js"), { overwrite: true }) + .copy(path.resolve(baseDir, "tiktoken.cjs"), { overwrite: true }) .saveSync(); } @@ -103,7 +103,7 @@ for (const baseDir of [ strict: true, declaration: true, }, - }).addSourceFileAtPath("./src/init.ts"); + }).addSourceFileAtPath(path.resolve(__dirname, "../src/init.ts")); const emitOutput = sourceFile.getEmitOutput(); for (const file of emitOutput.getOutputFiles()) { @@ -115,6 +115,34 @@ for (const baseDir of [ } } + // load.js and load.cjs + { + for (const module of [ts.ModuleKind.CommonJS, ts.ModuleKind.ES2022]) { + const sourceFile = new Project({ + compilerOptions: { + target: ScriptTarget.ES2022, + module, + moduleResolution: ts.ModuleResolutionKind.NodeJs, + strict: true, + declaration: true, + }, + }).addSourceFileAtPath(path.resolve(__dirname, "../src/load.ts")); + + const emitOutput = sourceFile.getEmitOutput(); + for (const file of emitOutput.getOutputFiles()) { + let targetFile = path.basename(file.getFilePath()); + + if (module === ts.ModuleKind.CommonJS) { + targetFile = targetFile.replace(".js", ".cjs"); + } + + fs.writeFileSync(path.resolve(baseDir, targetFile), file.getText(), { + encoding: "utf-8", + }); + } + } + } + // bundler.js { fs.writeFileSync( @@ -128,6 +156,20 @@ for (const baseDir of [ `export * from "./tiktoken";`.trim(), { encoding: "utf-8" } ); + + fs.writeFileSync( + path.resolve(baseDir, "tiktoken_bg.d.ts"), + `export * from "./tiktoken";`.trim(), + { encoding: "utf-8" } + ); + } + + if (!baseDir.includes("/lite")) { + fs.writeFileSync( + path.resolve(baseDir, "lite.d.ts"), + `export * from "./lite/tiktoken";`.trim(), + { encoding: "utf-8" } + ); } } @@ -143,12 +185,12 @@ for (const baseDir of [ delete pkg.scripts; pkg.files = ["**/*"]; - pkg["main"] = "tiktoken.node.js"; + pkg["main"] = "tiktoken.cjs"; pkg["types"] = "tiktoken.d.ts"; pkg["exports"] = { ".": { types: "./tiktoken.d.ts", - node: "./tiktoken.node.js", + node: "./tiktoken.cjs", default: "./tiktoken.js", }, "./bundler": { @@ -159,13 +201,18 @@ for (const baseDir of [ types: "./init.d.ts", default: "./init.js", }, + "./load": { + types: "./load.d.ts", + node: "./load.cjs", + default: "./load.js", + }, "./tiktoken_bg.wasm": { types: "./tiktoken_bg.wasm.d.ts", default: "./tiktoken_bg.wasm", }, "./lite": { types: "./lite/tiktoken.d.ts", - node: "./lite/tiktoken.node.js", + node: "./lite/tiktoken.cjs", default: "./lite/tiktoken.js", }, "./lite/bundler": { @@ -176,6 +223,11 @@ for (const baseDir of [ types: "./lite/init.d.ts", default: "./lite/init.js", }, + "./lite/load": { + types: "./lite/load.d.ts", + node: "./lite/load.cjs", + default: "./lite/load.js", + }, "./lite/tiktoken_bg.wasm": { types: "./lite/tiktoken_bg.wasm.d.ts", default: "./lite/tiktoken_bg.wasm", diff --git a/js/src/init.ts b/js/src/init.ts index 62940863..c26d5152 100644 --- a/js/src/init.ts +++ b/js/src/init.ts @@ -1,5 +1,5 @@ // @ts-expect-error -import * as imports from "./tiktoken_bg.js"; +import * as imports from "./tiktoken_bg"; export async function init( callback: ( @@ -19,4 +19,4 @@ export async function init( } // @ts-expect-error -export * from "./tiktoken.js"; +export * from "./tiktoken_bg"; diff --git a/js/src/load.ts b/js/src/load.ts new file mode 100644 index 00000000..eba598e2 --- /dev/null +++ b/js/src/load.ts @@ -0,0 +1,151 @@ +import fs from "node:fs/promises"; +import path from "node:path"; + +function is_printable(u: number): boolean { + // printable ascii characters according to python + return !(u <= 31 || (u >= 127 && u <= 160) || u == 173); +} + +function data_gym_to_mergeable_bpe_ranks( + vocal_bpe_contents: string, + encoder_json_contents: string +) { + const rank_to_intbyte = Array.from({ length: 2 ** 8 }, (_, i) => i).filter( + (i) => is_printable(i) && String.fromCharCode(i) !== " " + ); + + const data_gym_byte_to_byte = rank_to_intbyte.reduce>( + (memo, item) => { + memo[String.fromCharCode(item)] = item; + return memo; + }, + {} + ); + + let n = 0; + for (let b = 0; b < 2 ** 8; b++) { + if (!rank_to_intbyte.includes(b)) { + rank_to_intbyte.push(b); + data_gym_byte_to_byte[String.fromCharCode(2 ** 8 + n)] = b; + n += 1; + } + } + + if (rank_to_intbyte.length !== 2 ** 8) { + throw new Error("rank_to_intbyte.length must be 2**8"); + } + + // vocab_bpe contains the merges along with associated ranks + const bpe_merges = vocal_bpe_contents + .split("\n") + .slice(1, -1) + .map((merge_str) => merge_str.split(" ")); + + function decode_data_gym(value: string) { + return value.split("").map((b) => data_gym_byte_to_byte[b]); + } + + // add the single byte tokens + const bpe_ranks = Object.fromEntries(rank_to_intbyte.map((b, i) => [b, i])); + + // add the merged tokens + n = rank_to_intbyte.length; + for (const [first, second] of bpe_merges) { + bpe_ranks[ + [...decode_data_gym(first), ...decode_data_gym(second)].join(",") + ] = n; + n += 1; + } + + // check that the encoder file matches the merges file + // this sanity check is important since tiktoken assumes that ranks are ordered the same + // as merge priority + const encoder_json: Record = JSON.parse( + encoder_json_contents + ); + + const encoder_json_loaded = Object.fromEntries( + Object.entries(encoder_json).map(([k, v]) => [ + decode_data_gym(k).join(","), + v, + ]) + ); + + // drop these two special tokens if present, since they're not mergeable bpe tokens + delete encoder_json_loaded[decode_data_gym("<|endoftext|>").join(",")]; + delete encoder_json_loaded[decode_data_gym("<|startoftext|>").join(",")]; + + function normalize_map(items: Record) { + return JSON.stringify( + Object.keys(items) + .sort() + .map((key) => [key, items[key]]) + ); + } + + if (normalize_map(bpe_ranks) !== normalize_map(encoder_json_loaded)) { + throw new Error("bpe_ranks !== encoder_json_loaded"); + } + + return bpe_ranks; +} + +function dump_tiktoken_bpe(bpe_ranks: Record) { + return ( + Object.entries(bpe_ranks) + .sort((a, b) => a[1] - b[1]) + .map(([token_str, rank]) => + [ + Buffer.from( + token_str.split(",").map((i) => Number.parseInt(i, 10)) + ).toString("base64"), + rank, + ].join(" ") + ) + .join("\n") + "\n" + ); +} + +export async function load( + registry: ( + | { load_tiktoken_bpe: string } + | { + data_gym_to_mergeable_bpe_ranks: { + vocab_bpe_file: string; + encoder_json_file: string; + }; + } + ) & { + explicit_n_vocab: number; + pat_str: string; + special_tokens: Record; + }, + customFetch?: (url: string) => Promise +) { + const ofetch = customFetch + ? customFetch + : (url: string) => fetch(url).then((r) => r.text()); + + if ("data_gym_to_mergeable_bpe_ranks" in registry) { + const [vocab_bpe, encoder_json] = await Promise.all([ + ofetch(registry.data_gym_to_mergeable_bpe_ranks.vocab_bpe_file), + ofetch(registry.data_gym_to_mergeable_bpe_ranks.encoder_json_file), + ]); + + return { + explicit_n_vocab: registry.explicit_n_vocab, + pat_str: registry.pat_str, + special_tokens: registry.special_tokens, + bpe_ranks: dump_tiktoken_bpe( + data_gym_to_mergeable_bpe_ranks(vocab_bpe, encoder_json) + ), + }; + } else { + return { + explicit_n_vocab: registry.explicit_n_vocab, + pat_str: registry.pat_str, + special_tokens: registry.special_tokens, + bpe_ranks: await ofetch(registry.load_tiktoken_bpe), + }; + } +} From 17dd0ba5ec00d0e6c1b8263369da6896cd441194 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 11 Mar 2023 17:46:00 +0100 Subject: [PATCH 067/207] Add polyfill for Buffer.from --- js/src/load.ts | 97 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 93 insertions(+), 4 deletions(-) diff --git a/js/src/load.ts b/js/src/load.ts index eba598e2..895919c9 100644 --- a/js/src/load.ts +++ b/js/src/load.ts @@ -1,5 +1,94 @@ -import fs from "node:fs/promises"; -import path from "node:path"; +/** +Copyright (c) 2014 Jameson Little + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + */ +const lookup: string[] = []; +const revLookup: number[] = []; + +const code = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +for (var i = 0, len = code.length; i < len; ++i) { + lookup[i] = code[i]; + revLookup[code.charCodeAt(i)] = i; +} + +// Support decoding URL-safe base64 strings, as Node.js does. +// See: https://en.wikipedia.org/wiki/Base64#URL_applications +revLookup["-".charCodeAt(0)] = 62; +revLookup["_".charCodeAt(0)] = 63; + +function tripletToBase64(num: number) { + return ( + lookup[(num >> 18) & 0x3f] + + lookup[(num >> 12) & 0x3f] + + lookup[(num >> 6) & 0x3f] + + lookup[num & 0x3f] + ); +} + +function encodeChunk(uint8: number[], start: number, end: number) { + var tmp; + var output = []; + for (var i = start; i < end; i += 3) { + tmp = + ((uint8[i] << 16) & 0xff0000) + + ((uint8[i + 1] << 8) & 0xff00) + + (uint8[i + 2] & 0xff); + output.push(tripletToBase64(tmp)); + } + return output.join(""); +} + +function fromByteArray(uint8: number[]) { + var tmp; + var len = uint8.length; + var extraBytes = len % 3; // if we have 1 byte left, pad 2 bytes + var parts = []; + var maxChunkLength = 16383; // must be multiple of 3 + + // go through the array every three bytes, we'll deal with trailing stuff later + for (var i = 0, len2 = len - extraBytes; i < len2; i += maxChunkLength) { + parts.push( + encodeChunk( + uint8, + i, + i + maxChunkLength > len2 ? len2 : i + maxChunkLength + ) + ); + } + + // pad the end with zeros, but make sure to not forget the extra bytes + if (extraBytes === 1) { + tmp = uint8[len - 1]; + parts.push(lookup[tmp >> 2] + lookup[(tmp << 4) & 0x3f] + "=="); + } else if (extraBytes === 2) { + tmp = (uint8[len - 2] << 8) + uint8[len - 1]; + parts.push( + lookup[tmp >> 10] + + lookup[(tmp >> 4) & 0x3f] + + lookup[(tmp << 2) & 0x3f] + + "=" + ); + } + + return parts.join(""); +} function is_printable(u: number): boolean { // printable ascii characters according to python @@ -96,9 +185,9 @@ function dump_tiktoken_bpe(bpe_ranks: Record) { .sort((a, b) => a[1] - b[1]) .map(([token_str, rank]) => [ - Buffer.from( + fromByteArray( token_str.split(",").map((i) => Number.parseInt(i, 10)) - ).toString("base64"), + ), rank, ].join(" ") ) From e6f072654be412e5af7b1a74e6da2f379c767aaa Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 11 Mar 2023 21:03:46 +0100 Subject: [PATCH 068/207] Fix exports for CJS (Node ESM) --- js/scripts/post_process.ts | 102 +++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 32 deletions(-) diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index b36afd0f..9da4e124 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -6,6 +6,7 @@ for (const baseDir of [ path.resolve(__dirname, "../dist"), path.resolve(__dirname, "../dist/lite"), ]) { + let publicExports: string[] = []; // fix `any` types { const sourceFile = new Project().addSourceFileAtPath( @@ -36,18 +37,27 @@ for (const baseDir of [ .getMemberOrThrow("token_byte_values") .set({ returnType: "Array>" }); + publicExports = sourceFile + .getExportSymbols() + .filter((sym) => + sym + .getDeclarations() + .some( + (dcl) => + dcl.isKind(ts.SyntaxKind.ClassDeclaration) || + dcl.isKind(ts.SyntaxKind.FunctionDeclaration) + ) + ) + .map((i) => i.getName()); + sourceFile.saveSync(); } - // tiktoken.cjs + // tiktoken_bg.cjs { const sourceFile = new Project().addSourceFileAtPath( path.resolve(baseDir, "tiktoken_bg.js") ); - sourceFile.insertStatements(0, [ - `let imports = {};`, - `imports["./tiktoken_bg.js"] = module.exports;`, - ]); for (const cls of sourceFile.getClasses().filter((x) => x.isExported())) { cls.set({ @@ -80,38 +90,64 @@ for (const baseDir of [ fn.remove(); } - sourceFile.addStatements([ - `const path = require("path").join(__dirname, "tiktoken_bg.wasm");`, - `const bytes = require("fs").readFileSync(path);`, - `const wasmModule = new WebAssembly.Module(bytes);`, - `const wasmInstance = new WebAssembly.Instance(wasmModule, imports);`, - `wasm = wasmInstance.exports;`, - `module.exports.__wasm = wasm;`, - ]); - sourceFile - .copy(path.resolve(baseDir, "tiktoken.cjs"), { overwrite: true }) + .copy(path.resolve(baseDir, "tiktoken_bg.cjs"), { overwrite: true }) .saveSync(); } - // init.js + // tiktoken.js + { + fs.writeFileSync( + path.resolve(baseDir, "tiktoken.cjs"), + [ + `const wasm = require("./tiktoken_bg.cjs");`, + `let imports = {};`, + `imports["./tiktoken_bg.js"] = wasm;`, + `const path = require("path").join(__dirname, "tiktoken_bg.wasm");`, + `const bytes = require("fs").readFileSync(path);`, + `const wasmModule = new WebAssembly.Module(bytes);`, + `const wasmInstance = new WebAssembly.Instance(wasmModule, imports);`, + `wasm.__wbg_set_wasm(wasmInstance.exports);`, + ...publicExports.map((name) => `exports["${name}"] = wasm["${name}"];`), + ].join("\n"), + { encoding: "utf-8" } + ); + } + + // init.js and init.cjs { - const sourceFile = new Project({ - compilerOptions: { - target: ScriptTarget.ES2022, - moduleResolution: ts.ModuleResolutionKind.NodeJs, - strict: true, - declaration: true, - }, - }).addSourceFileAtPath(path.resolve(__dirname, "../src/init.ts")); - - const emitOutput = sourceFile.getEmitOutput(); - for (const file of emitOutput.getOutputFiles()) { - fs.writeFileSync( - path.resolve(baseDir, path.basename(file.getFilePath())), - file.getText(), - { encoding: "utf-8" } - ); + for (const module of [ts.ModuleKind.CommonJS, ts.ModuleKind.ES2022]) { + const sourceFile = new Project({ + compilerOptions: { + target: ScriptTarget.ES2022, + module, + moduleResolution: ts.ModuleResolutionKind.NodeJs, + strict: true, + declaration: true, + }, + }).addSourceFileAtPath(path.resolve(__dirname, "../src/init.ts")); + + const emitOutput = sourceFile.getEmitOutput(); + for (const file of emitOutput.getOutputFiles()) { + let targetFile = path.basename(file.getFilePath()); + + let source = file.getText(); + if (module === ts.ModuleKind.CommonJS) { + targetFile = targetFile.replace(".js", ".cjs"); + source = source + .replaceAll(`"./tiktoken_bg"`, `"./tiktoken_bg.cjs"`) + .replaceAll( + `exports.init = init;`, + `exports.init = init;\n${publicExports + .map((name) => `exports["${name}"] = imports["${name}"];`) + .join("\n")}` + ); + } + + fs.writeFileSync(path.resolve(baseDir, targetFile), source, { + encoding: "utf-8", + }); + } } } @@ -199,6 +235,7 @@ for (const baseDir of [ }, "./init": { types: "./init.d.ts", + node: "./init.cjs", default: "./init.js", }, "./load": { @@ -221,6 +258,7 @@ for (const baseDir of [ }, "./lite/init": { types: "./lite/init.d.ts", + node: "./lite/init.cjs", default: "./lite/init.js", }, "./lite/load": { From 1ee26c35324dd95af1a0216b12426968ad8a959a Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 11 Mar 2023 21:37:51 +0100 Subject: [PATCH 069/207] Remove bundler, as it is unnecessary --- js/README.md | 31 +++++++++++++++---------------- js/package.json | 2 +- js/scripts/post_process.ts | 22 +--------------------- 3 files changed, 17 insertions(+), 38 deletions(-) diff --git a/js/README.md b/js/README.md index 4f83ef09..20be6ddb 100644 --- a/js/README.md +++ b/js/README.md @@ -79,21 +79,7 @@ export default defineConfig({ ### [Next.js](#nextjs) -Both API routes and `/pages` are supported with the following configuration. To overcome issues with importing Node.js version, you can import the package from `@dqbd/tiktoken/bundler` instead. - -```typescript -import { get_encoding } from "@dqbd/tiktoken/bundler"; -import { NextApiRequest, NextApiResponse } from "next"; - -export default function handler(req: NextApiRequest, res: NextApiResponse) { - const encoder = get_encoding("gpt2"); - const message = encoder.encode(`Hello World ${Math.random()}`); - encoder.free(); - return res.status(200).json({ message }); -} -``` - -Additional Webpack configuration is required. +Both API routes and `/pages` are supported with the following `next.config.js` configuration. ```typescript const config = { @@ -108,6 +94,20 @@ const config = { }; ``` +Here is an example usage in API routes: + +```typescript +import { get_encoding } from "@dqbd/tiktoken"; +import { NextApiRequest, NextApiResponse } from "next"; + +export default function handler(req: NextApiRequest, res: NextApiResponse) { + const encoder = get_encoding("gpt2"); + const message = encoder.encode(`Hello World ${Math.random()}`); + encoder.free(); + return res.status(200).json({ message }); +} +``` + ### [Vercel Edge Runtime](#vercel-edge-runtime) Vercel Edge Runtime does support WASM modules by adding a `?module` suffix. Initialize the encoder with the following snippet: @@ -161,4 +161,3 @@ export default { ## Acknowledgements - https://github.com/zurawiki/tiktoken-rs - diff --git a/js/package.json b/js/package.json index 1947d87d..1898bbad 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.0-alpha.2", + "version": "1.0.0-alpha.3", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 9da4e124..7eaf9636 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -179,20 +179,8 @@ for (const baseDir of [ } } - // bundler.js + // tiktoken_bg.d.ts { - fs.writeFileSync( - path.resolve(baseDir, "bundler.js"), - `export * from "./tiktoken";`.trim(), - { encoding: "utf-8" } - ); - - fs.writeFileSync( - path.resolve(baseDir, "bundler.d.ts"), - `export * from "./tiktoken";`.trim(), - { encoding: "utf-8" } - ); - fs.writeFileSync( path.resolve(baseDir, "tiktoken_bg.d.ts"), `export * from "./tiktoken";`.trim(), @@ -229,10 +217,6 @@ for (const baseDir of [ node: "./tiktoken.cjs", default: "./tiktoken.js", }, - "./bundler": { - types: "./bundler.d.ts", - default: "./bundler.js", - }, "./init": { types: "./init.d.ts", node: "./init.cjs", @@ -252,10 +236,6 @@ for (const baseDir of [ node: "./lite/tiktoken.cjs", default: "./lite/tiktoken.js", }, - "./lite/bundler": { - types: "./lite/bundler.d.ts", - default: "./lite/bundler.js", - }, "./lite/init": { types: "./lite/init.d.ts", node: "./lite/init.cjs", From 8984ea764ad338701b83bf60b76458fca771d38c Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 11 Mar 2023 21:42:11 +0100 Subject: [PATCH 070/207] Add disclaimer for CFW --- js/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/js/README.md b/js/README.md index 20be6ddb..010192c0 100644 --- a/js/README.md +++ b/js/README.md @@ -131,6 +131,8 @@ export default async function (req: Request) { ### [Cloudflare Workers](#cloudflare-workers) +> Currently work in progress, investigating crashes and workarounds to compress ranks. + Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually. However, users need to point directly at the WASM binary, including `node_modules` prefix in some cases. Add the following rule to the `wrangler.toml` to upload WASM during build: From 9c8caec715dc8cef5eb3a9d234dc2998f30c26d0 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Tue, 14 Mar 2023 19:21:25 +0100 Subject: [PATCH 071/207] Add support for GPT-4 --- js/src/lib.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/js/src/lib.rs b/js/src/lib.rs index dbbc883a..571ee008 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -133,9 +133,10 @@ pub struct Tiktoken { impl Tiktoken { #[wasm_bindgen(constructor)] pub fn new(tiktoken_bfe: &str, special_tokens: JsValue, pat_str: &str) -> Self { + let constructor = CoreBPEConstructor::new( tiktoken_bfe, - special_tokens.into_serde::>().ok(), + Some(HashMap::default()), pat_str, ); @@ -367,6 +368,8 @@ export type TiktokenModel = | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt2" + | "gpt-4" + | "gpt-4-32k" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301"; @@ -417,6 +420,8 @@ pub fn encoding_for_model( "gpt2" => Ok("gpt2"), "gpt-3.5-turbo" => Ok("cl100k_base"), "gpt-3.5-turbo-0301" => Ok("cl100k_base"), + "gpt-4" => Ok("cl100k_base"), + "gpt-4-32k" => Ok("cl100k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), From 103d0107006a1c2e804e2d2a4740b136a5c05816 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 11:56:53 +0100 Subject: [PATCH 072/207] Expose model_to_encoding.json and registry.json --- js/README.md | 36 +++++++++++++++++++++++++++++++----- js/scripts/post_process.ts | 12 ++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/js/README.md b/js/README.md index 010192c0..5a5cc890 100644 --- a/js/README.md +++ b/js/README.md @@ -82,6 +82,7 @@ export default defineConfig({ Both API routes and `/pages` are supported with the following `next.config.js` configuration. ```typescript +// next.config.json const config = { webpack(config, { isServer, dev }) { config.experiments = { @@ -94,17 +95,42 @@ const config = { }; ``` -Here is an example usage in API routes: +Usage in pages: + +```tsx +import { get_encoding } from "@dqbd/tiktoken"; +import { useState } from "react"; + +const encoding = get_encoding("cl100k_base"); + +export default function Home() { + const [input, setInput] = useState("hello world"); + const tokens = encoding.encode(input); + + return ( +
+ setInput(e.target.value)} + /> +
{tokens.toString()}
+
+ ); +} +``` + +Usage in API routes: ```typescript import { get_encoding } from "@dqbd/tiktoken"; import { NextApiRequest, NextApiResponse } from "next"; export default function handler(req: NextApiRequest, res: NextApiResponse) { - const encoder = get_encoding("gpt2"); - const message = encoder.encode(`Hello World ${Math.random()}`); - encoder.free(); - return res.status(200).json({ message }); + const encoding = get_encoding("cl100k_base"); + const tokens = encoding.encode("hello world"); + encoding.free(); + return res.status(200).json({ tokens }); } ``` diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 7eaf9636..dc063414 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -250,6 +250,8 @@ for (const baseDir of [ types: "./lite/tiktoken_bg.wasm.d.ts", default: "./lite/tiktoken_bg.wasm", }, + "./model_to_encoding.json": "./model_to_encoding.json", + "./registry.json": "./registry.json", }; fs.writeFileSync( @@ -262,4 +264,14 @@ for (const baseDir of [ path.resolve(__dirname, "../README.md"), path.resolve(__dirname, "../dist/README.md") ); + + fs.copyFileSync( + path.resolve(__dirname, "../../tiktoken/model_to_encoding.json"), + path.resolve(__dirname, "../dist/model_to_encoding.json") + ); + + fs.copyFileSync( + path.resolve(__dirname, "../../tiktoken/registry.json"), + path.resolve(__dirname, "../dist/registry.json") + ); } From 79ac0360ded94e219bfef4ee3328a512933855b5 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 11:56:59 +0100 Subject: [PATCH 073/207] Bump to 1.0.0-alpha.5 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index 1898bbad..67ba7d9f 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.0-alpha.3", + "version": "1.0.0-alpha.5", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From f7487547ea07a7ff4b0ec6e7b72b03c0581b6743 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 13:12:03 +0100 Subject: [PATCH 074/207] Fix lite crash, add README.md --- js/README.md | 22 ++++++++++++++++++++++ js/src/lib.rs | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/js/README.md b/js/README.md index 5a5cc890..7863f80f 100644 --- a/js/README.md +++ b/js/README.md @@ -49,6 +49,28 @@ const encoder = new Tiktoken( ); ``` +For more constrained runtimes (eg. Edge Runtime), use the `@dqbd/tiktoken/lite` module, which does not inline the ranks into the WASM binary, reducing the binary size from 1.8 MB gzipped down to ~270 kB gzipped. + +```typescript +const { Tiktoken } = require("@dqbd/tiktoken/lite"); +const { load } = require("@dqbd/tiktoken/load"); +const registry = require("@dqbd/tiktoken/registry.json"); +const models = require("@dqbd/tiktoken/model_to_encoding.json"); + +async function main() { + const model = await load(registry[models["gpt2"]]); + const encoder = new Tiktoken( + model.bpe_ranks, + model.special_tokens, + model.pat_str + ); + const tokens = encoding.encode("hello world"); + encoder.free(); +} + +main(); +``` + ## Compatibility As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. diff --git a/js/src/lib.rs b/js/src/lib.rs index 571ee008..4d956559 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -136,7 +136,7 @@ impl Tiktoken { let constructor = CoreBPEConstructor::new( tiktoken_bfe, - Some(HashMap::default()), + special_tokens.into_serde::>().ok(), pat_str, ); From bfe38177e128f50ca1585dca34e3bf73f4e92267 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 13:23:19 +0100 Subject: [PATCH 075/207] Update README.md --- js/README.md | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/js/README.md b/js/README.md index 7863f80f..01946036 100644 --- a/js/README.md +++ b/js/README.md @@ -11,7 +11,7 @@ npm install @dqbd/tiktoken ## Usage -Basic usage follows: +Basic usage follows, which includes all the OpenAI encoders and ranks: ```typescript import assert from "node:assert"; @@ -36,6 +36,28 @@ const enc = encoding_for_model("gpt2", { enc.free(); ``` +In constrained environments (eg. Edge Runtime, Cloudflare Workers), where you don't want to load all the encoders at once, you can use the lightweight WASM binary via `@dqbd/tiktoken/lite` and load the appropriate encoder data from registry manually. + +```typescript +const { Tiktoken } = require("@dqbd/tiktoken/lite"); +const { load } = require("@dqbd/tiktoken/load"); +const registry = require("@dqbd/tiktoken/registry.json"); +const models = require("@dqbd/tiktoken/model_to_encoding.json"); + +async function main() { + const model = await load(registry[models["gpt-3.5-turbo"]]); + const encoder = new Tiktoken( + model.bpe_ranks, + model.special_tokens, + model.pat_str + ); + const tokens = encoding.encode("hello world"); + encoder.free(); +} + +main(); +``` + If desired, you can create a Tiktoken instance directly with custom ranks, special tokens and regex pattern: ```typescript @@ -49,23 +71,18 @@ const encoder = new Tiktoken( ); ``` -For more constrained runtimes (eg. Edge Runtime), use the `@dqbd/tiktoken/lite` module, which does not inline the ranks into the WASM binary, reducing the binary size from 1.8 MB gzipped down to ~270 kB gzipped. +Finally, you can a custom `init` function to override the WASM initialization logic for non-Node environments. This is useful if you are using a bundler that does not support WASM ESM integration. ```typescript -const { Tiktoken } = require("@dqbd/tiktoken/lite"); -const { load } = require("@dqbd/tiktoken/load"); -const registry = require("@dqbd/tiktoken/registry.json"); -const models = require("@dqbd/tiktoken/model_to_encoding.json"); +import { get_encoding, init } from "@dqbd/tiktoken/init"; async function main() { - const model = await load(registry[models["gpt2"]]); - const encoder = new Tiktoken( - model.bpe_ranks, - model.special_tokens, - model.pat_str - ); + const wasm = "..."; // fetch the WASM binary somehow + await init((imports) => WebAssembly.instantiate(wasm, imports)); + + const encoding = get_encoding("cl100k_base"); const tokens = encoding.encode("hello world"); - encoder.free(); + encoding.free(); } main(); From 9847f4d5804a6ba8667a9706bdc011fe0fb65ff7 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 13:44:10 +0100 Subject: [PATCH 076/207] Bump to 1.0.0-alpha.6 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index 67ba7d9f..ab5f0803 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.0-alpha.5", + "version": "1.0.0-alpha.6", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From c01af191edc5ddf138c4ec0a4d2bc37f08fbb12e Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 16:03:44 +0100 Subject: [PATCH 077/207] Add JSON importable modules --- js/scripts/inline_ranks.ts | 13 ++++++++++--- js/scripts/post_process.ts | 27 +++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/js/scripts/inline_ranks.ts b/js/scripts/inline_ranks.ts index 993d59ff..9dc11f46 100644 --- a/js/scripts/inline_ranks.ts +++ b/js/scripts/inline_ranks.ts @@ -17,15 +17,22 @@ async function main() { console.log(name); const data = registry[name]; - const targetFile = path.resolve(__dirname, `../ranks/${name}.tiktoken`); + const tiktokenFile = path.resolve(__dirname, `../ranks/${name}.tiktoken`); + const jsonFile = path.resolve(__dirname, `../ranks/${name}.json`); try { - await fs.stat(targetFile); + await Promise.all([fs.stat(tiktokenFile), fs.stat(jsonFile)]); continue; } catch {} const result = await load(data); - await fs.writeFile(targetFile, result.bpe_ranks, { encoding: "utf-8" }); + + await Promise.all([ + fs.writeFile(tiktokenFile, result.bpe_ranks, { encoding: "utf-8" }), + fs.writeFile(jsonFile, JSON.stringify(result), { + encoding: "utf-8", + }), + ]); } } diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index dc063414..5b6cd969 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -254,12 +254,25 @@ for (const baseDir of [ "./registry.json": "./registry.json", }; - fs.writeFileSync( - path.resolve(__dirname, "../dist/package.json"), - JSON.stringify(pkg, null, 2), - { encoding: "utf-8" } + const registry = JSON.parse( + fs.readFileSync(path.resolve(__dirname, "../../tiktoken/registry.json"), { + encoding: "utf-8", + }) ); + fs.mkdirSync(path.resolve(__dirname, "../dist/encoders"), { + recursive: true, + }); + + for (const key in registry) { + fs.copyFileSync( + path.resolve(__dirname, `../ranks/${key}.json`), + path.resolve(__dirname, `../dist/encoders/${key}.json`) + ); + + pkg["exports"][`./encoders/${key}.json`] = `./encoders/${key}.json`; + } + fs.copyFileSync( path.resolve(__dirname, "../README.md"), path.resolve(__dirname, "../dist/README.md") @@ -274,4 +287,10 @@ for (const baseDir of [ path.resolve(__dirname, "../../tiktoken/registry.json"), path.resolve(__dirname, "../dist/registry.json") ); + + fs.writeFileSync( + path.resolve(__dirname, "../dist/package.json"), + JSON.stringify(pkg, null, 2), + { encoding: "utf-8" } + ); } From 23bb57d0c3f5d6e016551b687f5dc8191081c9dc Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 16:03:52 +0100 Subject: [PATCH 078/207] Bump to 1.0.0-alpha.7 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index ab5f0803..ac7247a4 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.0-alpha.6", + "version": "1.0.0-alpha.7", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From a3baa6ca183e76a0fd4d2bb2104726a506ea8b1b Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 18:50:07 +0100 Subject: [PATCH 079/207] Bump to 1.0.0-alpha.8 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index ac7247a4..11bc95ea 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.0-alpha.7", + "version": "1.0.0-alpha.8", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From 6ac1a1ae1621f30c585bd89ca595284409f26088 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 18:53:18 +0100 Subject: [PATCH 080/207] Compress ranks --- js/scripts/inline_ranks.ts | 82 ++++++++++++++++++++++++++++++++++---- js/src/lib.rs | 24 ++++++++--- js/src/load.ts | 2 +- 3 files changed, 94 insertions(+), 14 deletions(-) diff --git a/js/scripts/inline_ranks.ts b/js/scripts/inline_ranks.ts index 9dc11f46..f508fd85 100644 --- a/js/scripts/inline_ranks.ts +++ b/js/scripts/inline_ranks.ts @@ -2,6 +2,61 @@ import fs from "node:fs/promises"; import path from "node:path"; import { load } from "../src/load"; +function compress_tiktoken_bpe(tiktoken_bpe_file: string) { + const original = tiktoken_bpe_file + .split("\n") + .map((line) => line.trim() && line.split(" ")) + .filter((x): x is Array => !!x && Array.isArray(x)) + .map(([token, rank]) => [token, Number.parseInt(rank, 10)] as const) + .sort((a, b) => a[1] - b[1]); + + const newTokens = original.reduce< + Array<{ offset: number; tokens: string[] }> + >((memo, item) => { + if (memo.length === 0) return [{ offset: item[1], tokens: [item[0]] }]; + const lastSplit = memo[memo.length - 1]; + const nextOffset = lastSplit.offset + lastSplit.tokens.length; + + if (nextOffset === item[1]) { + lastSplit.tokens.push(item[0]); + return memo; + } + + return [...memo, { offset: item[1], tokens: [item[0]] }]; + }, []); + + const compressed = newTokens + .map((x) => `! ${x.offset} ${x.tokens.join(" ")}`) + .join("\n"); + + // make sure the compressed and the original files are the same + const tiktokenOld = compressed + .split("\n") + .filter(Boolean) + .reduce>((memo, x) => { + const [_, offsetStr, ...tokens] = x.split(" "); + const offset = Number.parseInt(offsetStr, 10); + tokens.forEach((token, i) => (memo[token] = offset + i)); + return memo; + }, {}); + + function normalize_map(items: Record) { + return JSON.stringify( + Object.keys(items) + .sort() + .map((key) => [key, items[key]]) + ); + } + + if ( + normalize_map(tiktokenOld) !== normalize_map(Object.fromEntries(original)) + ) { + throw new Error("Invalid compression"); + } + + return compressed; +} + async function main() { try { await fs.mkdir(path.resolve(__dirname, "../ranks"), { recursive: true }); @@ -18,21 +73,34 @@ async function main() { const data = registry[name]; const tiktokenFile = path.resolve(__dirname, `../ranks/${name}.tiktoken`); + const tiktokenCompressedFile = path.resolve( + __dirname, + `../ranks/${name}.compress.tiktoken` + ); const jsonFile = path.resolve(__dirname, `../ranks/${name}.json`); try { - await Promise.all([fs.stat(tiktokenFile), fs.stat(jsonFile)]); + await Promise.all([ + fs.stat(tiktokenFile), + fs.stat(jsonFile), + fs.stat(tiktokenCompressedFile), + ]); continue; } catch {} const result = await load(data); + await fs.writeFile(tiktokenFile, result.bpe_ranks, { encoding: "utf-8" }); + + const compress = compress_tiktoken_bpe(result.bpe_ranks); + await fs.writeFile(tiktokenCompressedFile, compress, { + encoding: "utf-8", + }); - await Promise.all([ - fs.writeFile(tiktokenFile, result.bpe_ranks, { encoding: "utf-8" }), - fs.writeFile(jsonFile, JSON.stringify(result), { - encoding: "utf-8", - }), - ]); + await fs.writeFile( + jsonFile, + JSON.stringify({ ...result, bpe_ranks: compress }), + { encoding: "utf-8" } + ); } } diff --git a/js/src/lib.rs b/js/src/lib.rs index 4d956559..fd85d9c9 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -44,11 +44,24 @@ impl CoreBPEConstructor { fn parse_bfe(tiktoken_bfe: &str) -> Result, usize>, Error> { let mut encoder = HashMap::default(); - for line in tiktoken_bfe.lines() { - let mut parts = line.split(' '); - let token = &general_purpose::STANDARD.decode(parts.next().unwrap())?; - let rank: usize = parts.next().unwrap().parse().unwrap(); - encoder.insert(token.clone(), rank); + if tiktoken_bfe.chars().next().unwrap() == '!' { + for line in tiktoken_bfe.lines() { + let mut parts = line.split(' '); + parts.next().unwrap(); + + let offset: i32 = parts.next().unwrap().parse()?; + for (pos, token) in parts.enumerate() { + let token = &general_purpose::STANDARD.decode(token)?; + encoder.insert(token.clone(), (offset as usize) + pos); + } + } + } else { + for line in tiktoken_bfe.lines() { + let mut parts = line.split(' '); + let token = &general_purpose::STANDARD.decode(parts.next().unwrap())?; + let rank: usize = parts.next().unwrap().parse().unwrap(); + encoder.insert(token.clone(), rank); + } } Ok(encoder) @@ -133,7 +146,6 @@ pub struct Tiktoken { impl Tiktoken { #[wasm_bindgen(constructor)] pub fn new(tiktoken_bfe: &str, special_tokens: JsValue, pat_str: &str) -> Self { - let constructor = CoreBPEConstructor::new( tiktoken_bfe, special_tokens.into_serde::>().ok(), diff --git a/js/src/load.ts b/js/src/load.ts index 895919c9..e4634006 100644 --- a/js/src/load.ts +++ b/js/src/load.ts @@ -205,7 +205,7 @@ export async function load( }; } ) & { - explicit_n_vocab: number; + explicit_n_vocab?: number; pat_str: string; special_tokens: Record; }, From 7efba72caa37585e0f8671cae456e601182b2477 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 18:55:52 +0100 Subject: [PATCH 081/207] Use compressed version to make main WASM smaller --- js/src/lib.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/js/src/lib.rs b/js/src/lib.rs index fd85d9c9..e780f3ef 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -73,7 +73,7 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(ENDOFTEXT), 50256); CoreBPEConstructor::new( - include_str!("../ranks/gpt2.tiktoken"), + include_str!("../ranks/gpt2.compress.tiktoken"), Some(special_tokens), "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) @@ -85,7 +85,7 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(ENDOFTEXT), 50256); CoreBPEConstructor::new( - include_str!("../ranks/r50k_base.tiktoken"), + include_str!("../ranks/r50k_base.compress.tiktoken"), Some(special_tokens), "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) @@ -97,7 +97,7 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(ENDOFTEXT), 50256); CoreBPEConstructor::new( - include_str!("../ranks/p50k_base.tiktoken"), + include_str!("../ranks/p50k_base.compress.tiktoken"), Some(special_tokens), "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) @@ -112,7 +112,7 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(FIM_SUFFIX), 50283); CoreBPEConstructor::new( - include_str!("../ranks/p50k_base.tiktoken"), + include_str!("../ranks/p50k_base.compress.tiktoken"), Some(special_tokens), "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", ) @@ -128,7 +128,7 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(ENDOFPROMPT), 100276); CoreBPEConstructor::new( - include_str!("../ranks/cl100k_base.tiktoken"), + include_str!("../ranks/cl100k_base.compress.tiktoken"), Some(special_tokens), "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", ) From 4d4b921e519c5679833c1f193310fdbeabd75e3c Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 19:00:12 +0100 Subject: [PATCH 082/207] Bump to 1.0.0-alpha.10 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index 11bc95ea..1c405e7d 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.0-alpha.8", + "version": "1.0.0-alpha.10", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From efe372862d19d2da8b466d0b1432a7344558d978 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 19:13:32 +0100 Subject: [PATCH 083/207] Update README.md --- js/README.md | 56 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/js/README.md b/js/README.md index 01946036..45fda50c 100644 --- a/js/README.md +++ b/js/README.md @@ -36,7 +36,22 @@ const enc = encoding_for_model("gpt2", { enc.free(); ``` -In constrained environments (eg. Edge Runtime, Cloudflare Workers), where you don't want to load all the encoders at once, you can use the lightweight WASM binary via `@dqbd/tiktoken/lite` and load the appropriate encoder data from registry manually. +In constrained environments (eg. Edge Runtime, Cloudflare Workers), where you don't want to load all the encoders at once, you can use the lightweight WASM binary via `@dqbd/tiktoken/lite`. + +```typescript +const { Tiktoken } = require("@dqbd/tiktoken/lite"); +const cl100k_base = require("@dqbd/tiktoken/encoders/cl100k_base.json"); + +const encoding = new Tiktoken( + cl100k_base.bpe_ranks, + cl100k_base.special_tokens, + cl100k_base.pat_str +); +const tokens = encoding.encode("hello world"); +encoding.free(); +``` + +If you want to fetch the latest ranks, use the `load` function: ```typescript const { Tiktoken } = require("@dqbd/tiktoken/lite"); @@ -98,7 +113,7 @@ As this is a WASM library, there might be some issues with specific runtimes. If | Bun | ✅ | | | Vite | ✅ | See [here](#vite) for notes | | Next.js | ✅ | See [here](#nextjs) for notes | -| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | +| Vercel Edge Runtime | 🚧 | See [here](#vercel-edge-runtime) for notes | | Cloudflare Workers | 🚧 | See [here](#cloudflare-workers) for caveats | | Deno | ❌ | Currently unsupported | @@ -178,27 +193,33 @@ export default function handler(req: NextApiRequest, res: NextApiResponse) { Vercel Edge Runtime does support WASM modules by adding a `?module` suffix. Initialize the encoder with the following snippet: ```typescript -import wasm from "@dqbd/tiktoken/tiktoken_bg.wasm?module"; -import { init, get_encoding } from "@dqbd/tiktoken/init"; +// @ts-expect-error +import wasm from "@dqbd/tiktoken/lite/tiktoken_bg.wasm?module"; +import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; +import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; export const config = { runtime: "edge" }; export default async function (req: Request) { await init((imports) => WebAssembly.instantiate(wasm, imports)); - const encoder = get_encoding("cl100k_base"); - const tokens = encoder.encode("hello world"); - encoder.free(); + const encoding = new Tiktoken( + model.bpe_ranks, + model.special_tokens, + model.pat_str + ); - return new Response(`${encoder.encode("hello world")}`); + const tokens = encoding.encode("hello world"); + encoding.free(); + + return new Response(`${tokens}`); } + ``` ### [Cloudflare Workers](#cloudflare-workers) -> Currently work in progress, investigating crashes and workarounds to compress ranks. - -Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually. However, users need to point directly at the WASM binary, including `node_modules` prefix in some cases. +Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually and use the `@dqbd/tiktoken/lite` version to fit the 1 MB limit. However, users need to point directly at the WASM binary via a relative path (including `./node_modules/`). Add the following rule to the `wrangler.toml` to upload WASM during build: @@ -211,14 +232,19 @@ type = "CompiledWasm" Initialize the encoder with the following snippet: ```javascript -import wasm from "./node_modules/@dqbd/tiktoken/tiktoken_bg.wasm"; -import { get_encoding, init } from "@dqbd/tiktoken/init"; +import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; +import wasm from "./node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm"; +import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; export default { async fetch() { await init((imports) => WebAssembly.instantiate(wasm, imports)); - const encoder = get_encoder("cl100k_base"); - const tokens = encoder.encode("hello world"); + const encoder = new Tiktoken( + model.bpe_ranks, + model.special_tokens, + model.pat_str + ); + const tokens = encoder.encode("test"); encoder.free(); return new Response(`${tokens}`); }, From 8476ecaed92ef0304e508e8e7c16ebf9ea78bfc0 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Mar 2023 19:22:21 +0100 Subject: [PATCH 084/207] Bump to 1.0.0 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index 1c405e7d..2d708208 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.0-alpha.10", + "version": "1.0.0", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From e1c4313a35a32e7b586aee2094192c0457a2bfa0 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 16 Mar 2023 02:44:43 +0100 Subject: [PATCH 085/207] Fix issues with duplicate initialization --- js/src/init.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/js/src/init.ts b/js/src/init.ts index c26d5152..135099e7 100644 --- a/js/src/init.ts +++ b/js/src/init.ts @@ -1,11 +1,13 @@ // @ts-expect-error import * as imports from "./tiktoken_bg"; +let isInitialized = false; export async function init( callback: ( imports: WebAssembly.Imports ) => Promise ): Promise { + if (isInitialized) return imports; const result = await callback({ "./tiktoken_bg.js": imports }); const instance = "instance" in result && result.instance instanceof WebAssembly.Instance @@ -15,6 +17,7 @@ export async function init( : null; if (instance == null) throw new Error("Missing instance"); imports.__wbg_set_wasm(instance.exports); + isInitialized = true; return imports; } From c4dfaadb3df08f6f8c64cec5761faa0fd85ef042 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 16 Mar 2023 02:44:50 +0100 Subject: [PATCH 086/207] Bump to 1.0.1 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index 2d708208..a55c1902 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.0", + "version": "1.0.1", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From 68efa86723b67247a039548f03fa2286dfcf030a Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 16 Mar 2023 02:49:21 +0100 Subject: [PATCH 087/207] Clarifies usage Fixes Crash in Cloudflare Workers / Vercel Edge Runtime dqbd/tiktoken#20 --- js/README.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/js/README.md b/js/README.md index 45fda50c..1756d4a9 100644 --- a/js/README.md +++ b/js/README.md @@ -107,15 +107,15 @@ main(); As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. -| Runtime | Status | Notes | -| ------------------- | ------ | ------------------------------------------- | -| Node.js | ✅ | | -| Bun | ✅ | | -| Vite | ✅ | See [here](#vite) for notes | -| Next.js | ✅ | See [here](#nextjs) for notes | -| Vercel Edge Runtime | 🚧 | See [here](#vercel-edge-runtime) for notes | -| Cloudflare Workers | 🚧 | See [here](#cloudflare-workers) for caveats | -| Deno | ❌ | Currently unsupported | +| Runtime | Status | Notes | +| ------------------- | ------ | ------------------------------------------ | +| Node.js | ✅ | | +| Bun | ✅ | | +| Vite | ✅ | See [here](#vite) for notes | +| Next.js | ✅ | See [here](#nextjs) for notes | +| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | +| Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | +| Deno | ❌ | Currently unsupported | ### [Vite](#vite) @@ -214,7 +214,6 @@ export default async function (req: Request) { return new Response(`${tokens}`); } - ``` ### [Cloudflare Workers](#cloudflare-workers) From 481fb453c9f406d7b7c8b7e10cf7246b4a944d35 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 16 Mar 2023 02:49:59 +0100 Subject: [PATCH 088/207] Bump to 1.0.2 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index a55c1902..d45178a1 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.1", + "version": "1.0.2", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From 6823b8e59260c90e30f71e795e68aac65ed92bd2 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 19 Mar 2023 18:09:08 +0100 Subject: [PATCH 089/207] Update README.md with information about Create React App --- js/README.md | 48 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/js/README.md b/js/README.md index 1756d4a9..f9b1ac23 100644 --- a/js/README.md +++ b/js/README.md @@ -107,15 +107,16 @@ main(); As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. -| Runtime | Status | Notes | -| ------------------- | ------ | ------------------------------------------ | -| Node.js | ✅ | | -| Bun | ✅ | | -| Vite | ✅ | See [here](#vite) for notes | -| Next.js | ✅ | See [here](#nextjs) for notes | -| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | -| Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | -| Deno | ❌ | Currently unsupported | +| Runtime | Status | Notes | +| ---------------------------- | ------ | ------------------------------------------ | +| Node.js | ✅ | | +| Bun | ✅ | | +| Vite | ✅ | See [here](#vite) for notes | +| Next.js | ✅ | See [here](#nextjs) for notes | +| Create React App (via Craco) | ✅ | See [here](#create-react-app) for notes | +| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | +| Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | +| Deno | ❌ | Currently unsupported | ### [Vite](#vite) @@ -188,6 +189,35 @@ export default function handler(req: NextApiRequest, res: NextApiResponse) { } ``` +### [Create React App](#create-react-app) + +By default, the Webpack configugration found in Create React App does not support WASM ESM modules. To add support, please do the following: + +1. Swap `react-scripts` with `craco`, using the guide found here: https://craco.js.org/docs/getting-started/. +2. Add the following to `craco.config.js`: + +```js +module.exports = { + webpack: { + configure: (config) => { + config.experiments = { + asyncWebAssembly: true, + layers: true, + }; + + // turn off static file serving of WASM files + // we need to let Webpack handle WASM import + config.module.rules + .find((i) => "oneOf" in i) + .oneOf.find((i) => i.type === "asset/resource") + .exclude.push(/\.wasm$/); + + return config; + }, + }, +}; +``` + ### [Vercel Edge Runtime](#vercel-edge-runtime) Vercel Edge Runtime does support WASM modules by adding a `?module` suffix. Initialize the encoder with the following snippet: From b9a03a7293a3c958eb1bf6891a05ae5a99033e06 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 20 Mar 2023 00:43:43 +0100 Subject: [PATCH 090/207] Add custom exception when not initialized --- js/scripts/post_process.ts | 54 +++++++++++++++++++++++++++++- js/test/init_error.test.ts | 18 ++++++++++ js/test/test_simple_public.test.ts | 2 ++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 js/test/init_error.test.ts diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 5b6cd969..fa1dbd6e 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -1,4 +1,12 @@ -import { Project, ScriptTarget, StructureKind, ts } from "ts-morph"; +import { + ConstructorDeclaration, + FunctionDeclaration, + MethodDeclaration, + Project, + ScriptTarget, + StructureKind, + ts, +} from "ts-morph"; import * as fs from "node:fs"; import * as path from "node:path"; @@ -53,6 +61,50 @@ for (const baseDir of [ sourceFile.saveSync(); } + // tiktoken_bg.js + { + const sourceFile = new Project().addSourceFileAtPath( + path.resolve(baseDir, "tiktoken_bg.js") + ); + + function prependWasmCheck( + call: FunctionDeclaration | MethodDeclaration | ConstructorDeclaration + ) { + if ( + call instanceof FunctionDeclaration && + call.getName() === "__wbg_set_wasm" + ) { + return; + } + + const statements = call + .getDescendantsOfKind(ts.SyntaxKind.Identifier) + .filter((i) => i.getText() === "wasm"); + + if (statements.length > 0) { + call.insertStatements( + 0, + `if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized.");` + ); + } + } + + for (const cls of sourceFile.getClasses().filter((i) => i.isExported())) { + for (const method of cls.getMethods()) { + prependWasmCheck(method); + } + + for (const constructor of cls.getConstructors()) { + prependWasmCheck(constructor); + } + } + for (const fn of sourceFile.getFunctions().filter((i) => i.isExported())) { + prependWasmCheck(fn); + } + + sourceFile.saveSync(); + } + // tiktoken_bg.cjs { const sourceFile = new Project().addSourceFileAtPath( diff --git a/js/test/init_error.test.ts b/js/test/init_error.test.ts new file mode 100644 index 00000000..f7c39d64 --- /dev/null +++ b/js/test/init_error.test.ts @@ -0,0 +1,18 @@ +import { it, expect } from "vitest"; +import { encoding_for_model, get_encoding, Tiktoken } from "../dist/init"; +import model from "../dist/encoders/cl100k_base.json"; + +it("use before initialization", () => { + expect(() => encoding_for_model("gpt2")).toThrowError( + "@dqbd/tiktoken: WASM binary has not been propery initialized." + ); + expect(() => get_encoding("gpt2")).toThrowError( + "@dqbd/tiktoken: WASM binary has not been propery initialized." + ); + + expect( + () => new Tiktoken(model.bpe_ranks, model.special_tokens, model.pat_str) + ).toThrowError( + "@dqbd/tiktoken: WASM binary has not been propery initialized." + ); +}); diff --git a/js/test/test_simple_public.test.ts b/js/test/test_simple_public.test.ts index 85dcd52a..e74a539f 100644 --- a/js/test/test_simple_public.test.ts +++ b/js/test/test_simple_public.test.ts @@ -135,3 +135,5 @@ it("invalid (dis)allowed_tokens", () => { "Invalid value for disallowed_special" ); }); + +it("invalid"); From 6bba615d0dcb9a710fad274c2dfe7787f5286cfd Mon Sep 17 00:00:00 2001 From: Christoph Witzko Date: Tue, 4 Apr 2023 11:24:32 +0200 Subject: [PATCH 091/207] Add gpt-4 to model_to_encoding.json --- tiktoken/model_to_encoding.json | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index d4eccd9a..6a90bbe8 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -30,5 +30,9 @@ "code-search-ada-code-001": "r50k_base", "gpt2": "gpt2", "gpt-3.5-turbo": "cl100k_base", - "gpt-3.5-turbo-0301": "cl100k_base" -} \ No newline at end of file + "gpt-3.5-turbo-0301": "cl100k_base", + "gpt-4": "cl100k_base", + "gpt-4-0314": "cl100k_base", + "gpt-4-32k": "cl100k_base", + "gpt-4-32k-0314": "cl100k_base" +} From 10e17c32bc8e7ab11b62d3555ead61b56543971c Mon Sep 17 00:00:00 2001 From: Christoph Witzko Date: Tue, 4 Apr 2023 16:11:45 +0200 Subject: [PATCH 092/207] Update lib.rs --- js/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/js/src/lib.rs b/js/src/lib.rs index e780f3ef..3b40312e 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -381,7 +381,9 @@ export type TiktokenModel = | "code-search-ada-code-001" | "gpt2" | "gpt-4" + | "gpt-4-0314" | "gpt-4-32k" + | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301"; @@ -433,7 +435,9 @@ pub fn encoding_for_model( "gpt-3.5-turbo" => Ok("cl100k_base"), "gpt-3.5-turbo-0301" => Ok("cl100k_base"), "gpt-4" => Ok("cl100k_base"), + "gpt-4-0314" => Ok("cl100k_base"), "gpt-4-32k" => Ok("cl100k_base"), + "gpt-4-32k-0314" => Ok("cl100k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), From 7261f281842d7d2132c335fb3a4b4655ff59e522 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 5 Apr 2023 10:01:40 +0200 Subject: [PATCH 093/207] Bump to 1.0.3 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index d45178a1..a4024a27 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.2", + "version": "1.0.3", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From ff230c81a5857f86c67a7e087a5b884e076c69c5 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 8 Apr 2023 20:07:20 +0200 Subject: [PATCH 094/207] Fix Next.js + Webpack import issue due to missing exports entry --- js/scripts/post_process.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index fa1dbd6e..c83bc1b8 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -283,6 +283,10 @@ for (const baseDir of [ types: "./tiktoken_bg.wasm.d.ts", default: "./tiktoken_bg.wasm", }, + "./tiktoken_bg.wasm?module": { + types: "./tiktoken_bg.wasm.d.ts", + default: "./tiktoken_bg.wasm?module", + }, "./lite": { types: "./lite/tiktoken.d.ts", node: "./lite/tiktoken.cjs", @@ -302,6 +306,10 @@ for (const baseDir of [ types: "./lite/tiktoken_bg.wasm.d.ts", default: "./lite/tiktoken_bg.wasm", }, + "./lite/tiktoken_bg.wasm?module": { + types: "./lite/tiktoken_bg.wasm.d.ts", + default: "./lite/tiktoken_bg.wasm?module", + }, "./model_to_encoding.json": "./model_to_encoding.json", "./registry.json": "./registry.json", }; From 83c2a857e1f8c299a61e84bc01b075a8acb31598 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 8 Apr 2023 20:08:01 +0200 Subject: [PATCH 095/207] Bump to 1.0.4 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index a4024a27..37fbfbe3 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.3", + "version": "1.0.4", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From d40517f826afe01e8abbbfd14b1de213073a7b06 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 12 Apr 2023 00:40:04 +0200 Subject: [PATCH 096/207] Reimplement node_modules detection for Next 13 appDir --- js/package.json | 1 + js/scripts/post_process.ts | 62 +++++++++++++++++++++++++++++++------- js/yarn.lock | 5 +++ 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/js/package.json b/js/package.json index 37fbfbe3..a047fb8c 100644 --- a/js/package.json +++ b/js/package.json @@ -21,6 +21,7 @@ "devDependencies": { "@types/node": "^18.14.4", "npm-run-all": "^4.1.5", + "outdent": "^0.8.0", "ts-morph": "^17.0.1", "tsx": "^3.12.3", "typescript": "^4.9.5", diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index c83bc1b8..87651dab 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -10,6 +10,8 @@ import { import * as fs from "node:fs"; import * as path from "node:path"; +import outdent from "outdent"; + for (const baseDir of [ path.resolve(__dirname, "../dist"), path.resolve(__dirname, "../dist/lite"), @@ -149,19 +151,57 @@ for (const baseDir of [ // tiktoken.js { + const relativeDir = path.relative( + path.resolve(__dirname, "../dist"), + baseDir + ); + fs.writeFileSync( path.resolve(baseDir, "tiktoken.cjs"), - [ - `const wasm = require("./tiktoken_bg.cjs");`, - `let imports = {};`, - `imports["./tiktoken_bg.js"] = wasm;`, - `const path = require("path").join(__dirname, "tiktoken_bg.wasm");`, - `const bytes = require("fs").readFileSync(path);`, - `const wasmModule = new WebAssembly.Module(bytes);`, - `const wasmInstance = new WebAssembly.Instance(wasmModule, imports);`, - `wasm.__wbg_set_wasm(wasmInstance.exports);`, - ...publicExports.map((name) => `exports["${name}"] = wasm["${name}"];`), - ].join("\n"), + outdent` + const wasm = require("./tiktoken_bg.cjs"); + let imports = {}; + imports["./tiktoken_bg.js"] = wasm; + const path = require("path"); + const fs = require("fs"); + + const candidates = __dirname + .split(path.sep) + .reduce((memo, _, index, array) => { + const prefix = array.slice(0, index + 1).join(path.sep) + path.sep; + memo.push( + prefix.includes("node_modules" + path.sep) + ? path.join(prefix, "./tiktoken_bg.wasm") + : path.join( + prefix, + "node_modules", + "@dqbd", + "tiktoken", + "${relativeDir}", + "./tiktoken_bg.wasm" + ) + ); + return memo; + }, []) + .reverse(); + + let bytes = null; + for (const candidate of candidates) { + try { + bytes = fs.readFileSync(candidate); + break; + } catch {} + } + + if (bytes == null) throw new Error("Missing tiktoken_bg.wasm"); + const wasmModule = new WebAssembly.Module(bytes); + const wasmInstance = new WebAssembly.Instance(wasmModule, imports); + wasm.__wbg_set_wasm(wasmInstance.exports); + ` + + "\n" + + publicExports + .map((name) => `exports["${name}"] = wasm["${name}"];`) + .join("\n"), { encoding: "utf-8" } ); } diff --git a/js/yarn.lock b/js/yarn.lock index 06883178..a60606e4 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1095,6 +1095,11 @@ object.assign@^4.1.4: has-symbols "^1.0.3" object-keys "^1.1.1" +outdent@^0.8.0: + version "0.8.0" + resolved "https://registry.yarnpkg.com/outdent/-/outdent-0.8.0.tgz#2ebc3e77bf49912543f1008100ff8e7f44428eb0" + integrity sha512-KiOAIsdpUTcAXuykya5fnVVT+/5uS0Q1mrkRHcF89tpieSmY33O/tmc54CqwA+bfhbtEfZUNLHaPUiB9X3jt1A== + p-limit@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-4.0.0.tgz#914af6544ed32bfa54670b061cafcbd04984b644" From 496168ce29911febe582563a96d6962b3cb31da4 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 12 Apr 2023 00:40:17 +0200 Subject: [PATCH 097/207] Bump to 1.0.5 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index a047fb8c..d8c321f7 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.4", + "version": "1.0.5", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From 0c0cf001faa695f3288a3cb1e67e1e12ead82820 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 12 Apr 2023 00:55:26 +0200 Subject: [PATCH 098/207] Fix broken resolution when used in tests --- js/scripts/post_process.ts | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 87651dab..a8181310 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -169,21 +169,21 @@ for (const baseDir of [ .split(path.sep) .reduce((memo, _, index, array) => { const prefix = array.slice(0, index + 1).join(path.sep) + path.sep; - memo.push( - prefix.includes("node_modules" + path.sep) - ? path.join(prefix, "./tiktoken_bg.wasm") - : path.join( - prefix, - "node_modules", - "@dqbd", - "tiktoken", - "${relativeDir}", - "./tiktoken_bg.wasm" - ) - ); + if (!prefix.includes("node_modules" + path.sep)) { + memo.unshift( + path.join( + prefix, + "node_modules", + "@dqbd", + "tiktoken", + "${relativeDir}", + "./tiktoken_bg.wasm" + ) + ); + } return memo; }, []) - .reverse(); + candidates.unshift(path.join(__dirname, "./tiktoken_bg.wasm")); let bytes = null; for (const candidate of candidates) { From 26bf591198b1077361f7a9c47e2ca826d32cccc9 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 12 Apr 2023 01:03:54 +0200 Subject: [PATCH 099/207] Bump to 1.0.6 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index d8c321f7..c1d5dea1 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.5", + "version": "1.0.6", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From 33405e996c0c5e351867a0946398daf6400ac352 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 28 Apr 2023 13:16:36 +0200 Subject: [PATCH 100/207] Use override for edge-light / NextJS appHandlers --- js/scripts/post_process.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index a8181310..9b7255de 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -306,16 +306,19 @@ for (const baseDir of [ pkg["exports"] = { ".": { types: "./tiktoken.d.ts", + "edge-light": "./tiktoken.js", node: "./tiktoken.cjs", default: "./tiktoken.js", }, "./init": { types: "./init.d.ts", + "edge-light": "./init.js", node: "./init.cjs", default: "./init.js", }, "./load": { types: "./load.d.ts", + "edge-light": "./load.js", node: "./load.cjs", default: "./load.js", }, @@ -329,16 +332,19 @@ for (const baseDir of [ }, "./lite": { types: "./lite/tiktoken.d.ts", + "edge-light": "./lite/tiktoken.js", node: "./lite/tiktoken.cjs", default: "./lite/tiktoken.js", }, "./lite/init": { types: "./lite/init.d.ts", + "edge-light": "./lite/init.js", node: "./lite/init.cjs", default: "./lite/init.js", }, "./lite/load": { types: "./lite/load.d.ts", + "edge-light": "./lite/load.js", node: "./lite/load.cjs", default: "./lite/load.js", }, From a54dd5746afafe8ba9219a73ba0ae5160b580a77 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 28 Apr 2023 13:18:18 +0200 Subject: [PATCH 101/207] Publish 1.0.7-alpha.0 build --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index c1d5dea1..daf0f0ab 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.6", + "version": "1.0.7-alpha.0", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From ea11d75dbad8a821caccb6cc852cf24b65dec045 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 28 Apr 2023 13:38:51 +0200 Subject: [PATCH 102/207] Bump to 1.0.7 --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index daf0f0ab..627881c8 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.7-alpha.0", + "version": "1.0.7", "description": "Javascript bindings for tiktoken", "license": "MIT", "scripts": { From 754a378c1e50e0cf453cc93f9658149608b3cb4a Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 8 May 2023 16:58:01 +0200 Subject: [PATCH 103/207] Remove Java bindings --- .github/workflows/build_jar.yml | 81 --------- Cargo.toml | 3 +- core/Cargo.toml | 6 - core/src/lib.rs | 163 ++++++++++++++--- core/src/load.rs | 168 ------------------ core/src/openai_public.rs | 125 ------------- core/src/util.rs | 136 -------------- java/pom.xml | 99 ----------- java/src/main/java/tiktoken/Encoding.java | 34 ---- .../test/java/tiktoken/EncodingTestIT.java | 21 --- jni/Cargo.toml | 17 -- jni/build.rs | 7 - jni/src/lib.rs | 114 ------------ 13 files changed, 143 insertions(+), 831 deletions(-) delete mode 100644 .github/workflows/build_jar.yml delete mode 100644 core/src/load.rs delete mode 100644 core/src/openai_public.rs delete mode 100644 core/src/util.rs delete mode 100644 java/pom.xml delete mode 100644 java/src/main/java/tiktoken/Encoding.java delete mode 100644 java/src/test/java/tiktoken/EncodingTestIT.java delete mode 100644 jni/Cargo.toml delete mode 100644 jni/build.rs delete mode 100644 jni/src/lib.rs diff --git a/.github/workflows/build_jar.yml b/.github/workflows/build_jar.yml deleted file mode 100644 index 4b0d4476..00000000 --- a/.github/workflows/build_jar.yml +++ /dev/null @@ -1,81 +0,0 @@ -name: Build Java JAR - -on: [push, pull_request, workflow_dispatch] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - build_jni: - name: jni on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - include: - - os: ubuntu-latest - outdir: linux_64 - - os: windows-latest - outdir: windows_64 - - os: macos-latest - outdir: osx_64 - steps: - - uses: actions/checkout@v3 - - - name: Install rust toolchain - uses: actions-rs/toolchain@v1 - with: - # stable doesn't have --out-dir - toolchain: nightly - override: true - - - name: Build - working-directory: ./jni - # TODO: 32bit vs 64bit? - # https://github.com/scijava/native-lib-loader - run: cargo build --release -Z unstable-options --out-dir ../build/natives/${{ matrix.outdir }}/ - - - uses: actions/upload-artifact@v3 - with: - name: natives - path: ./build/natives/* - - build_java: - name: java - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - needs: [build_jni] - - steps: - - uses: actions/checkout@v3 - - - name: Load outputs - uses: actions/download-artifact@v3 - with: - name: natives - path: natives - - - name: Set up JDK 11 - uses: actions/setup-java@v3 - with: - java-version: '11' - distribution: 'microsoft' - architecture: x64 - cache: maven - - - name: Build with Maven - working-directory: ./java - run: mvn --batch-mode package failsafe:integration-test - - - uses: actions/upload-artifact@v3 - with: - name: java - path: ./java/target/*.jar - - # TODO: publish to maven (only from ubuntu) - diff --git a/Cargo.toml b/Cargo.toml index b8bf6948..937b8684 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,8 +3,7 @@ members = [ "core", "python", - "js", - "jni", + "js" ] [profile.release] diff --git a/core/Cargo.toml b/core/Cargo.toml index ef25f145..44a28bbc 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -14,13 +14,7 @@ fancy-regex = "0.10.0" regex = "1.7.0" rustc-hash = "1.1.0" bstr = "1.0.1" -reqwest = { version = "0.11.14", features = ["blocking"] } -sha1 = "0.10.5" -json = "0.12.4" -base64 = "0.21.0" -lazy_static = "1.4.0" [features] default = [] -lazyload = [] multithreading = [] \ No newline at end of file diff --git a/core/src/lib.rs b/core/src/lib.rs index 7ee05572..c719a6d1 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -4,23 +4,121 @@ use std::thread; use fancy_regex::Regex; use rustc_hash::FxHashMap as HashMap; -mod util; -#[cfg(feature = "lazyload")] -mod load; - -#[cfg(feature = "lazyload")] -pub mod openai_public; - -#[cfg(feature = "lazyload")] -#[macro_use] -extern crate lazy_static; - #[cfg(feature = "multithreading")] const MAX_NUM_THREADS: usize = 128; #[cfg(not(feature = "multithreading"))] const MAX_NUM_THREADS: usize = 1; +fn _byte_pair_merge( + piece: &[u8], + ranks: &HashMap, usize>, + f: impl Fn(std::ops::Range) -> T, +) -> Vec { + // This is a vector of (start, rank). + // The rank is of the byte pair starting at position start. + // The rank of the last item in the vector is not a valid value. + let mut parts: Vec<(usize, usize)> = (0..piece.len() + 1).map(|i| (i, usize::MAX)).collect(); + + // NOTE: using a macro here because a closure fails to get inlined + // according to optimization remarks. + // A closure also cannot capture a reference to `piece` without + // the borrow checker complaining about the mutable borrows during + // the assignments later in this code. + macro_rules! get_rank { + ($start_idx:expr, $skip:expr) => {{ + let start_idx: usize = $start_idx; + let skip: usize = $skip; + if (start_idx + skip + 2) < parts.len() { + ranks + .get(&piece[parts[start_idx].0..parts[start_idx + skip + 2].0]) + .map(|r| *r) + } else { + None + } + }}; + ($idx:expr) => {{ + get_rank!($idx, 0) + }}; + } + + // We look up the ranks once in the beggining and iteratively update + // them during each merge, which reduces the number of rank lookups. + for i in 0..parts.len() - 2 { + match get_rank!(i) { + Some(rank) => { + // usize::MAX is a sentinel value and cannot be a valid rank + debug_assert!(rank != usize::MAX); + parts[i].1 = rank; + } + None => { + continue; + } + }; + } + + // If you have n parts and m merges, this does O(mn) work. + // We could do something with a heap and do O(m log n) work. + // It is important to consider that n is often small (<100), and as such + // the cache-locality benefits outweigh the algorithmic complexity downsides + // of the `parts` vector data structure above. + + // Note that we hash bytes, not token pairs. As long as we train BPE the way we + // currently do, this is equivalent. An easy way to break this would be to decouple + // merge priority from token index or to prevent specific token merges. + loop { + if parts.len() == 1 { + break; + } + + // usize::MAX is a sentinel rank value allowing us to + // take the min more quickly + let mut min_rank: (usize, usize) = (usize::MAX, 0); + for (i, &(_, rank)) in parts[..parts.len() - 1].iter().enumerate() { + if rank < min_rank.0 { + min_rank = (rank, i); + } + } + + if min_rank.0 != usize::MAX { + let i = min_rank.1; + + // NOTE: We are about to remove parts[i + 1]. We do not do it + // yet because there are cache-locality benefits to updating + // parts[i] and parts[i-1] before removing, which could thrash + // the cache. Thus, we update the rank calculation by skipping over + // parts[i + 1], by invoking `get_rank!` with `skip = 1`. + parts[i].1 = get_rank!(i, 1).unwrap_or(usize::MAX); + if i > 0 { + parts[i - 1].1 = get_rank!(i - 1, 1).unwrap_or(usize::MAX); + } + + parts.remove(i + 1); + } else { + break; + } + } + let mut out: Vec = Vec::with_capacity(parts.len() - 1); + for i in 0..parts.len() - 1 { + out.push(f(parts[i].0..parts[i + 1].0)); + } + out +} + +pub fn byte_pair_encode(piece: &[u8], ranks: &HashMap, usize>) -> Vec { + if piece.len() == 1 { + return vec![ranks[piece]]; + } + _byte_pair_merge(piece, ranks, |p| ranks[&piece[p.start..p.end]]) +} + +pub fn byte_pair_split<'a>(piece: &'a [u8], ranks: &HashMap, usize>) -> Vec<&'a [u8]> { + if piece.len() == 1 { + return vec![piece]; + } + _byte_pair_merge(piece, ranks, |p| &piece[p.start..p.end]) +} + // Various performance notes: // // Regex @@ -123,12 +221,17 @@ impl CoreBPENative { ret.push(*token); continue; } - ret.extend(&util::byte_pair_encode(piece, &self.encoder)); + ret.extend(&byte_pair_encode(piece, &self.encoder)); } ret } - pub fn _encode_native(&self, text: &str, allowed_special: &HashSet<&str>, max_tokens: Option) -> (Vec, usize, usize) { + pub fn _encode_native( + &self, + text: &str, + allowed_special: &HashSet<&str>, + max_tokens: Option, + ) -> (Vec, usize, usize) { let max_tokens = max_tokens.unwrap_or(usize::MAX); let special_regex = self._get_tl_special_regex(); let regex = self._get_tl_regex(); @@ -166,7 +269,7 @@ impl CoreBPENative { } continue; } - let tokens = util::byte_pair_encode(piece, &self.encoder); + let tokens = byte_pair_encode(piece, &self.encoder); last_piece_token_len = tokens.len(); for token in tokens { ret.push(token); @@ -203,7 +306,8 @@ impl CoreBPENative { Ok(text) => self._encode_ordinary_native(text), Err(e) => { let text = unsafe { std::str::from_utf8_unchecked(&bytes[..e.valid_up_to()]) }; - let (tokens, last_piece_token_len, _) = self._encode_native(text, &HashSet::new(), None); + let (tokens, last_piece_token_len, _) = + self._encode_native(text, &HashSet::new(), None); let (mut tokens, last_piece_token_len) = self._increase_last_piece_token_len(tokens, last_piece_token_len); if !tokens.is_empty() && last_piece_token_len > 0 { @@ -216,7 +320,7 @@ impl CoreBPENative { unstable_bytes.extend_from_slice(&bytes[e.valid_up_to()..]); tokens.truncate(tokens.len() - last_piece_token_len); - tokens.extend(util::byte_pair_encode(&unstable_bytes, &self.encoder)); + tokens.extend(byte_pair_encode(&unstable_bytes, &self.encoder)); } tokens } @@ -330,7 +434,7 @@ impl CoreBPENative { // would be a regex split before the UTF-8 truncation point. // Probably niche enough that no one will ever notice (after all, people didn't // notice all the big holes in the previous unstable token implementation) - Err(_) => util::byte_pair_encode(&possibility, &self.encoder), + Err(_) => byte_pair_encode(&possibility, &self.encoder), // Something like the following is intriguing but incorrect: // Err(e) => self._encode_ordinary_native(unsafe { // std::str::from_utf8_unchecked(&possibility[..e.valid_up_to()]) @@ -363,11 +467,11 @@ impl CoreBPENative { if unstable_bytes.len() - last_decoded.1 > 0 && last_decoded.0.map_or(false, |c| c.is_whitespace()) { - let mut reencoded = util::byte_pair_encode( + let mut reencoded = byte_pair_encode( &unstable_bytes[..unstable_bytes.len() - last_decoded.1], &self.encoder, ); - reencoded.extend(util::byte_pair_encode( + reencoded.extend(byte_pair_encode( &unstable_bytes[unstable_bytes.len() - last_decoded.1..], &self.encoder, )); @@ -394,7 +498,7 @@ impl CoreBPENative { if let Some(token) = self.encoder.get(piece) { return vec![*token]; } - util::byte_pair_encode(piece, &self.encoder) + byte_pair_encode(piece, &self.encoder) } // ==================== @@ -463,4 +567,21 @@ impl CoreBPENative { sorted_token_bytes, }) } -} \ No newline at end of file +} + +#[cfg(test)] +mod tests { + use rustc_hash::FxHashMap as HashMap; + + use crate::byte_pair_split; + + #[test] + fn very_simple_test() { + let mut ranks = HashMap::default(); + ranks.insert(b"ab".to_vec(), 1); + ranks.insert(b"cd".to_vec(), 2); + + let res = byte_pair_split(b"abcd", &ranks); + assert_eq!(res, vec![b"ab", b"cd"]); + } +} diff --git a/core/src/load.rs b/core/src/load.rs deleted file mode 100644 index 975f5fcd..00000000 --- a/core/src/load.rs +++ /dev/null @@ -1,168 +0,0 @@ - -use rustc_hash::FxHashMap as HashMap; -use std::{env, path::PathBuf}; -use sha1::{Sha1, Digest}; -use std::error::Error; -use json; - -type Result = std::result::Result>; - -fn read_file(blobpath: &str) -> Result> { - // TODO: support blobs? - - if !(blobpath.starts_with("http") || blobpath.starts_with("https")) { - return Ok(std::fs::read(blobpath)?); - } - - Ok(reqwest::blocking::get(blobpath)?.bytes()?.to_vec()) -} - -fn get_tiktoken_cache_dir() -> PathBuf { - match env::var_os("TIKTOKEN_CACHE_DIR") { - Some(v) => PathBuf::from(v), - None => { - match env::var_os("DATA_GYM_CACHE_DIR") { - Some(v) => PathBuf::from(v), - None => { - let mut temp_dir = env::temp_dir(); - temp_dir.push("data-gym-cache"); - - temp_dir - } - } - } - } -} - -fn sha1_as_hex(s: &str) -> String { - let mut hasher = Sha1::new(); - hasher.update(s.as_bytes()); - let result = hasher.finalize(); - - format!("{:x}", result) -} - -fn read_file_cached(blobpath: &str) -> Result> { - let mut cache_path = get_tiktoken_cache_dir(); - - if !cache_path.exists() { - std::fs::create_dir_all(&cache_path)?; - } - - cache_path.push(sha1_as_hex(blobpath)); - - println!("cache_path: {:?}", cache_path); - - if cache_path.exists() { - let catch_path_str = cache_path.into_os_string().into_string() - .or(Err( { - // let cache_path_lossy_str = cache_path.to_string_lossy().to_string(); - // format!("Unable to convert path {cache_path_lossy_str}") - format!("Unable to convert path") - }))?; - return read_file(&catch_path_str); - } - - let content = read_file(blobpath)?; - - std::fs::write(cache_path, &content)?; - - Ok(content) -} - -fn is_printable(u: u8) -> bool { - // printable ascii characters according to python - !(u <= 31 || (u >= 127 && u <= 160) || u == 173) -} - -pub fn data_gym_to_mergeable_bpe_ranks(vocab_bpe_file: &str, encoder_json_file: &str) -> Result, usize>> { - let mut rank_to_intbyte = (0..=255) - .filter(|x| is_printable(*x) && (*x as char) != ' ') - .collect::>(); - - let mut data_gym_byte_to_byte = rank_to_intbyte - .iter() - .map(|&x| (x as u32, x)) - .collect::>(); - - let mut n = 0; - for b in 0..=255 { - if !rank_to_intbyte.contains(&b) { - rank_to_intbyte.push(b); - data_gym_byte_to_byte.insert(256 + n, b); - n += 1; - } - } - assert!(rank_to_intbyte.len() == 256); - - // vocab_bpe contains the merges along with associated ranks - let cached_vocab = read_file_cached(vocab_bpe_file)?; - let vocab_bpe_contents = std::str::from_utf8(&cached_vocab)? - .split("\n").collect::>(); - - let bpe_merges = match vocab_bpe_contents[1..(vocab_bpe_contents.len() - 1)] - .iter() - .map(|&s| s.split_whitespace()) - .map(|mut sp| match (sp.next(), sp.next()) { - (Some(a), Some(b)) => Some((a, b)), - _ => None, - }) - .collect::>>() - { - Some(v) => v, - None => return Err("Unable to parse vocab_bpe file".into()), - }; - - let decode_data_gym = - |value: &str| value.chars().map(|c| { - data_gym_byte_to_byte[&(c as u32)] - } ).collect::>(); - - // # add the single byte tokens - let mut bpe_ranks = - rank_to_intbyte - .iter() - .enumerate() - .map(|(i, b)| (vec![*b], i)) - .collect::, usize>>(); - - // add the merged tokens - let mut n = bpe_ranks.len(); - for (first, second) in bpe_merges { - bpe_ranks.insert([decode_data_gym(first), decode_data_gym(second)].concat(), n); - n += 1; - } - - // check that the encoder file matches the merges file - // this sanity check is important since tiktoken assumes that ranks are ordered the same - // as merge priority - let cached_encoder = read_file_cached(encoder_json_file)?; - let encoder_json = json::parse(&std::str::from_utf8(&cached_encoder)?)?; - - let mut encoder_json_loaded = encoder_json.entries() - .map(|(k, v)| (decode_data_gym(k), v.as_usize().unwrap())) - .collect::, usize>>(); - - // drop these two special tokens if present, since they're not mergeable bpe tokens - encoder_json_loaded.remove(&decode_data_gym("<|endoftext|>")); - encoder_json_loaded.remove(&decode_data_gym("<|startoftext|>")); - - assert!(bpe_ranks == encoder_json_loaded); - - Ok(bpe_ranks) -} - -pub fn load_tiktoken_bpe(tiktoken_bpe_file: &str) -> Result, usize>> { - use base64::{engine::general_purpose, Engine as _}; - - let content = read_file_cached(tiktoken_bpe_file)?; - - Ok(std::str::from_utf8(&content)? - .lines() - .filter(|s| s.len() > 0) - .map(|s| s.split_whitespace()) - .map(|mut sp| (sp.next().unwrap(), sp.next().unwrap())) - .map(|(first, second)| (general_purpose::STANDARD.decode(&first).unwrap(), second.parse::().unwrap())) - .collect::, usize>>()) -} - diff --git a/core/src/openai_public.rs b/core/src/openai_public.rs deleted file mode 100644 index 24e0ab99..00000000 --- a/core/src/openai_public.rs +++ /dev/null @@ -1,125 +0,0 @@ - -use rustc_hash::FxHashMap as HashMap; -use std::error::Error; -use std::sync::RwLock; -use json; - -#[path = "load.rs"] -mod load; - -type Result = std::result::Result>; - -lazy_static! { - pub static ref REGISTRY: HashMap = { - json::parse(include_str!("../../tiktoken/registry.json")) - .expect("Failed to parse internal JSON") - .entries() - .map(|(key, value)| { - let loading_strategy = if value.has_key("data_gym_to_mergeable_bpe_ranks") { - EncoderLoadingStrategy::DataGym( - DataGymDef { - vocab_bpe_file: value["data_gym_to_mergeable_bpe_ranks"]["vocab_bpe_file"].as_str().expect("error").into(), - encoder_json_file: value["data_gym_to_mergeable_bpe_ranks"]["encoder_json_file"].as_str().expect("error").into() - }) - } - else if value.has_key("load_tiktoken_bpe") { - EncoderLoadingStrategy::BPE(value["load_tiktoken_bpe"].as_str().expect("fail").into()) - } - else { - panic!("Invalid encoding"); - }; - - EncodingLazy::new( - key.into(), - value["explicit_n_vocab"].as_usize(), - value["pat_str"].as_str().expect("foo").into(), - value["special_tokens"].entries() - .map(|(key, value)| (key.into(), value.as_usize().expect("foo"))) - .collect::>(), - loading_strategy - ) - }) - - .map(|enc| (enc.name.clone(), enc)) - .collect::>() - }; - - pub static ref MODEL_TO_ENCODING: HashMap = - json::parse(include_str!("../../tiktoken/model_to_encoding.json")) - .expect("Failed to parse internal JSON") - .entries() - .map(|(k, v)| (k.into(), v.as_str().expect("foo").into())) - .collect::>(); -} - -#[derive(Clone, PartialEq, Eq, Hash)] -struct DataGymDef { - vocab_bpe_file: String, - encoder_json_file: String, -} - -#[derive(Clone, PartialEq, Eq, Hash)] -enum EncoderLoadingStrategy { - BPE(String), - DataGym(DataGymDef), -} - -pub struct EncodingLazy { - name: String, - explicit_n_vocab: Option, - pub pat_str: String, - pub special_tokens: HashMap, - mergeable_ranks: RwLock, usize>>>, - loading_strategy: EncoderLoadingStrategy, -} - -fn load_bpe(path: &str) -> Result, usize>> { - load::load_tiktoken_bpe(path) -} - -fn load_data_gym(def: &DataGymDef) -> Result, usize>> { - load::data_gym_to_mergeable_bpe_ranks(&def.vocab_bpe_file, &def.encoder_json_file) -} - -// #[memoize] -fn load_mergeable_ranks(loading_strategy: &EncoderLoadingStrategy) -> Result, usize>> -{ - match loading_strategy { - EncoderLoadingStrategy::BPE(path) => load_bpe(&path), - EncoderLoadingStrategy::DataGym(def) => load_data_gym(&def), - } -} - -impl EncodingLazy { - fn new(name: String, - explicit_n_vocab: Option, - pat_str: String, - special_tokens: HashMap, - loading_strategy: EncoderLoadingStrategy) -> Self { - EncodingLazy { - name, - explicit_n_vocab, - pat_str, - special_tokens, - mergeable_ranks: RwLock::new(None), - loading_strategy - } - } - - pub fn get(&self) -> Result, usize>> { - { - let read = self.mergeable_ranks.read().unwrap(); - if read.is_some() { - return Ok(read.as_ref().unwrap().clone()); - } - } - - let mut write = self.mergeable_ranks.write().unwrap(); - *write = Some(load_mergeable_ranks(&self.loading_strategy)?); - - Ok(write.as_ref().unwrap().clone()) - } -} - - - diff --git a/core/src/util.rs b/core/src/util.rs deleted file mode 100644 index b9605a18..00000000 --- a/core/src/util.rs +++ /dev/null @@ -1,136 +0,0 @@ -use rustc_hash::FxHashMap as HashMap; - -fn _byte_pair_merge( - piece: &[u8], - ranks: &HashMap, usize>, - f: impl Fn(std::ops::Range) -> T, -) -> Vec { - // This is a vector of (start, rank). - // The rank is of the byte pair starting at position start. - // The rank of the last item in the vector is not a valid value. - let mut parts: Vec<(usize, usize)> = (0..piece.len() + 1).map(|i| (i, usize::MAX)).collect(); - - // NOTE: using a macro here because a closure fails to get inlined - // according to optimization remarks. - // A closure also cannot capture a reference to `piece` without - // the borrow checker complaining about the mutable borrows during - // the assignments later in this code. - macro_rules! get_rank { - ($start_idx:expr, $skip:expr) => {{ - let start_idx: usize = $start_idx; - let skip: usize = $skip; - if (start_idx + skip + 2) < parts.len() { - ranks - .get(&piece[parts[start_idx].0..parts[start_idx + skip + 2].0]) - .map(|r| *r) - } else { - None - } - }}; - ($idx:expr) => {{ - get_rank!($idx, 0) - }}; - } - - // We look up the ranks once in the beggining and iteratively update - // them during each merge, which reduces the number of rank lookups. - for i in 0..parts.len() - 2 { - match get_rank!(i) { - Some(rank) => { - // usize::MAX is a sentinel value and cannot be a valid rank - debug_assert!(rank != usize::MAX); - parts[i].1 = rank; - } - None => { - continue; - } - }; - } - - // If you have n parts and m merges, this does O(mn) work. - // We could do something with a heap and do O(m log n) work. - // It is important to consider that n is often small (<100), and as such - // the cache-locality benefits outweigh the algorithmic complexity downsides - // of the `parts` vector data structure above. - - // Note that we hash bytes, not token pairs. As long as we train BPE the way we - // currently do, this is equivalent. An easy way to break this would be to decouple - // merge priority from token index or to prevent specific token merges. - loop { - if parts.len() == 1 { - break; - } - - // usize::MAX is a sentinel rank value allowing us to - // take the min more quickly - let mut min_rank: (usize, usize) = (usize::MAX, 0); - for (i, &(_, rank)) in parts[..parts.len() - 1].iter().enumerate() { - if rank < min_rank.0 { - min_rank = (rank, i); - } - } - - if min_rank.0 != usize::MAX { - let i = min_rank.1; - - // NOTE: We are about to remove parts[i + 1]. We do not do it - // yet because there are cache-locality benefits to updating - // parts[i] and parts[i-1] before removing, which could thrash - // the cache. Thus, we update the rank calculation by skipping over - // parts[i + 1], by invoking `get_rank!` with `skip = 1`. - parts[i].1 = get_rank!(i, 1).unwrap_or(usize::MAX); - if i > 0 { - parts[i - 1].1 = get_rank!(i - 1, 1).unwrap_or(usize::MAX); - } - - parts.remove(i + 1); - } else { - break; - } - } - let mut out: Vec = Vec::with_capacity(parts.len() - 1); - for i in 0..parts.len() - 1 { - out.push(f(parts[i].0..parts[i + 1].0)); - } - out -} - -pub fn byte_pair_encode(piece: &[u8], ranks: &HashMap, usize>) -> Vec { - if piece.len() == 1 { - return vec![ranks[piece]]; - } - _byte_pair_merge(piece, ranks, |p| ranks[&piece[p.start..p.end]]) -} - -pub fn byte_pair_split<'a>(piece: &'a [u8], ranks: &HashMap, usize>) -> Vec<&'a [u8]> { - if piece.len() == 1 { - return vec![piece]; - } - _byte_pair_merge(piece, ranks, |p| &piece[p.start..p.end]) -} - -#[cfg(test)] -mod tests { - use rustc_hash::FxHashMap as HashMap; - - use crate::util::_byte_pair_merge; - pub fn byte_pair_split<'a>(piece: &'a [u8], ranks: &HashMap, usize>) -> Vec<&'a [u8]> { - if piece.len() == 1 { - return vec![piece]; - } - _byte_pair_merge(piece, ranks) - .iter() - .map(|p| &piece[p.start..p.end]) - .collect() - } - - #[test] - fn very_simple_test() { - let mut ranks = HashMap::default(); - ranks.insert(b"ab".to_vec(), 1); - ranks.insert(b"cd".to_vec(), 2); - - let res = byte_pair_split(b"abcd", &ranks); - assert_eq!(res, vec![b"ab", b"cd"]); - } -} \ No newline at end of file diff --git a/java/pom.xml b/java/pom.xml deleted file mode 100644 index 61cbf01c..00000000 --- a/java/pom.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - - 4.0.0 - - com.openai - tiktoken - 1.0-SNAPSHOT - - tiktoken - https://github.com/openai/tiktoken - jar - - - UTF-8 - 1.7 - 1.7 - - - - - junit - junit - 4.11 - test - - - org.scijava - native-lib-loader - 2.4.0 - - - - - - - ${project.basedir}/../natives/ - ${project.build.directory}/classes/natives/ - - - - - - - maven-clean-plugin - 3.1.0 - - - - maven-resources-plugin - 3.0.2 - - - maven-compiler-plugin - 3.8.0 - - - maven-surefire-plugin - 2.22.1 - - - maven-jar-plugin - 3.0.2 - - - maven-install-plugin - 2.5.2 - - - maven-deploy-plugin - 2.8.2 - - - - maven-site-plugin - 3.7.1 - - - maven-project-info-reports-plugin - 3.0.0 - - - org.apache.maven.plugins - maven-failsafe-plugin - 2.22.1 - - - - integration-test - verify - - - - - - - - diff --git a/java/src/main/java/tiktoken/Encoding.java b/java/src/main/java/tiktoken/Encoding.java deleted file mode 100644 index 1773225d..00000000 --- a/java/src/main/java/tiktoken/Encoding.java +++ /dev/null @@ -1,34 +0,0 @@ -package tiktoken; - -import org.scijava.nativelib.NativeLoader; -import java.io.IOException; - -public class Encoding implements AutoCloseable -{ - static { - try { - // load from JAR - NativeLoader.loadLibrary("_tiktoken_jni"); - } - catch(IOException e) { - throw new RuntimeException(e); - } - } - - // initialized by init - private long handle; - - private native void init(String modelName); - - private native void destroy(); - - public native long[] encode(String text, String[] allowedSpecialTokens, long maxTokenLength); - - public Encoding(String modelName) { - this.init(modelName); - } - - public void close() throws Exception { - destroy(); - } -} diff --git a/java/src/test/java/tiktoken/EncodingTestIT.java b/java/src/test/java/tiktoken/EncodingTestIT.java deleted file mode 100644 index 602a1ef9..00000000 --- a/java/src/test/java/tiktoken/EncodingTestIT.java +++ /dev/null @@ -1,21 +0,0 @@ -package tiktoken; - -import static org.junit.Assert.assertArrayEquals; - -import org.junit.Test; - -// run test: mvn failsafe:integration-test -public class EncodingTestIT -{ - @Test - public void shouldAnswerWithTrue() throws Exception - { - Encoding encoding = new Encoding("text-davinci-001"); - - long[] a = encoding.encode("test", new String[0], 0); - - encoding.close(); - - assertArrayEquals(new long[] {9288}, a); - } -} diff --git a/jni/Cargo.toml b/jni/Cargo.toml deleted file mode 100644 index 4309eef4..00000000 --- a/jni/Cargo.toml +++ /dev/null @@ -1,17 +0,0 @@ -[package] -name = "tiktoken_jni" -version = "0.2.0" -edition = "2021" -rust-version = "1.57.0" - -[lib] -name = "_tiktoken_jni" -crate-type = ["cdylib"] - -[dependencies] -tiktoken_core = { path = "../core", features = ["multithreading", "lazyload"] } -rustc-hash = "1.1.0" -jni = "0.20.0" - -[build-dependencies] -json = "0.12.4" diff --git a/jni/build.rs b/jni/build.rs deleted file mode 100644 index 9c866413..00000000 --- a/jni/build.rs +++ /dev/null @@ -1,7 +0,0 @@ -use json; - -fn main() { - json::parse(include_str!("../tiktoken/registry.json")).expect("Failed to parse internal JSON"); - json::parse(include_str!("../tiktoken/model_to_encoding.json")).expect("Failed to parse internal JSON"); - println!("JSON Parsing validated"); -} diff --git a/jni/src/lib.rs b/jni/src/lib.rs deleted file mode 100644 index 6bd99d6d..00000000 --- a/jni/src/lib.rs +++ /dev/null @@ -1,114 +0,0 @@ -use std::collections::HashSet; -use std::sync::MutexGuard; - -use _tiktoken_core::openai_public::EncodingLazy; -use jni::JNIEnv; -// These objects are what you should use as arguments to your native -// function. They carry extra lifetime information to prevent them escaping -// this context and getting used after being GC'd. -use jni::objects::{JObject, JString}; - -// This is just a pointer. We'll be returning it from our function. We -// can't return one of the objects with lifetime information because the -// lifetime checker won't let us. -use jni::sys::{jarray, jlong}; - -use _tiktoken_core::{self, CoreBPENative}; - -type Result = std::result::Result>; - -fn unwrap_or_throw(env: &JNIEnv, result: Result, default: T) -> T { - // Check if an exception is already thrown - if env.exception_check().expect("exception_check() failed") { - return default; - } - - match result { - Ok(tokenizer) => tokenizer, - Err(error) => { - let exception_class = env - .find_class("java/lang/Exception") - .expect("Unable to find exception class"); - env.throw_new(exception_class, format!("{}", error)) - .expect("Unable to throw exception"); - default - } - } -} - -#[no_mangle] -pub extern "system" fn Java_tiktoken_Encoding_init(env: JNIEnv, obj: JObject, model_name: JString) { - let result = || -> Result<()> { - // First, we have to get the string out of Java. Check out the `strings` - // module for more info on how this works. - let model_name: String = env - .get_string(model_name)? - .into(); - - let encoding_name = _tiktoken_core::openai_public::MODEL_TO_ENCODING - .get(&model_name).ok_or("Unable to find model")?; - - let encoding = _tiktoken_core::openai_public::REGISTRY - .get(encoding_name).ok_or("Unable to find encoding")?; - - let bpe_native = CoreBPENative::new( - encoding.get()?, - encoding.special_tokens.clone(), - &encoding.pat_str, - )?; - - Ok(unsafe { - env.set_rust_field(obj, "handle", bpe_native)?; - }) - }(); - - unwrap_or_throw(&env, result, ()) -} - -#[no_mangle] -pub extern "system" fn Java_tiktoken_Encoding_destroy(env: JNIEnv, obj: JObject) { - unsafe { - let _: CoreBPENative = env.take_rust_field(obj, "handle").expect("Unable to get handle during destruction"); - } -} - -#[no_mangle] -pub extern "system" fn Java_tiktoken_Encoding_encode( - env: JNIEnv, - obj: JObject, - text: JString, - allowed_special_tokens: jarray, - max_token_length: jlong, -) -> jarray { - let result = || -> Result { - let encoding: MutexGuard = unsafe { env.get_rust_field(obj, "handle")? }; - - let enc = encoding; - let input: String = env - .get_string(text)? - .into(); - - let len = env.get_array_length(allowed_special_tokens)?; - let mut strings: Vec = Vec::with_capacity(len as usize); - for i in 0..len { - let element: JObject = env - .get_object_array_element(allowed_special_tokens, i)?; - let current: String = env.get_string(element.into())?.into(); - strings.push(current); - } - - let v2: HashSet<&str> = strings.iter().map(|s| &**s).collect(); - - let (tokens, _, _) = enc._encode_native(&input, &v2, Some(max_token_length as usize)); - - let output = env - .new_long_array(tokens.len().try_into()?)?; - - let array_of_u64 = tokens.iter().map(|x| *x as i64).collect::>(); - env.set_long_array_region(output, 0, array_of_u64.as_slice())?; - - Ok(output) - }(); - - unwrap_or_throw(&env, result, JObject::null().into_raw()) -} From 9390c0389453346eceb5d330ec288d11c98d7e79 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 8 May 2023 17:01:56 +0200 Subject: [PATCH 104/207] Replace README.md for clarity --- README.md | 318 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 250 insertions(+), 68 deletions(-) diff --git a/README.md b/README.md index 80d5b9f6..bfbe4a9f 100644 --- a/README.md +++ b/README.md @@ -1,103 +1,285 @@ -# ⏳ tiktoken +# ⏳ @dqbd/tiktoken -tiktoken is a fast [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with -OpenAI's models. +tiktoken is a [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with +OpenAI's models, forked from the original tiktoken library to provide NPM bindings for Node and other JS runtimes. -```python -import tiktoken -enc = tiktoken.get_encoding("cl100k_base") -assert enc.decode(enc.encode("hello world")) == "hello world" +The open source version of `tiktoken` can be installed from NPM: -# To get the tokeniser corresponding to a specific model in the OpenAI API: -enc = tiktoken.encoding_for_model("gpt-4") ``` +npm install @dqbd/tiktoken +``` + +## Usage + +Basic usage follows, which includes all the OpenAI encoders and ranks: + +```typescript +import assert from "node:assert"; +import { get_encoding, encoding_for_model } from "@dqbd/tiktoken"; + +const enc = get_encoding("gpt2"); +assert( + new TextDecoder().decode(enc.decode(enc.encode("hello world"))) === + "hello world" +); + +// To get the tokeniser corresponding to a specific model in the OpenAI API: +const enc = encoding_for_model("text-davinci-003"); -The open source version of `tiktoken` can be installed from PyPI: +// Extend existing encoding with custom special tokens +const enc = encoding_for_model("gpt2", { + "<|im_start|>": 100264, + "<|im_end|>": 100265, +}); + +// don't forget to free the encoder after it is not used +enc.free(); ``` -pip install tiktoken + +In constrained environments (eg. Edge Runtime, Cloudflare Workers), where you don't want to load all the encoders at once, you can use the lightweight WASM binary via `@dqbd/tiktoken/lite`. + +```typescript +const { Tiktoken } = require("@dqbd/tiktoken/lite"); +const cl100k_base = require("@dqbd/tiktoken/encoders/cl100k_base.json"); + +const encoding = new Tiktoken( + cl100k_base.bpe_ranks, + cl100k_base.special_tokens, + cl100k_base.pat_str +); +const tokens = encoding.encode("hello world"); +encoding.free(); ``` -The tokeniser API is documented in `tiktoken/core.py`. +If you want to fetch the latest ranks, use the `load` function: + +```typescript +const { Tiktoken } = require("@dqbd/tiktoken/lite"); +const { load } = require("@dqbd/tiktoken/load"); +const registry = require("@dqbd/tiktoken/registry.json"); +const models = require("@dqbd/tiktoken/model_to_encoding.json"); -Example code using `tiktoken` can be found in the -[OpenAI Cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb). +async function main() { + const model = await load(registry[models["gpt-3.5-turbo"]]); + const encoder = new Tiktoken( + model.bpe_ranks, + model.special_tokens, + model.pat_str + ); + const tokens = encoding.encode("hello world"); + encoder.free(); +} +main(); +``` -## Performance +If desired, you can create a Tiktoken instance directly with custom ranks, special tokens and regex pattern: -`tiktoken` is between 3-6x faster than a comparable open source tokeniser: +```typescript +import { Tiktoken } from "../pkg"; +import { readFileSync } from "fs"; -![image](https://raw.githubusercontent.com/openai/tiktoken/main/perf.svg) +const encoder = new Tiktoken( + readFileSync("./ranks/gpt2.tiktoken").toString("utf-8"), + { "<|endoftext|>": 50256, "<|im_start|>": 100264, "<|im_end|>": 100265 }, + "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+" +); +``` -Performance measured on 1GB of text using the GPT-2 tokeniser, using `GPT2TokenizerFast` from -`tokenizers==0.13.2`, `transformers==4.24.0` and `tiktoken==0.2.0`. +Finally, you can a custom `init` function to override the WASM initialization logic for non-Node environments. This is useful if you are using a bundler that does not support WASM ESM integration. +```typescript +import { get_encoding, init } from "@dqbd/tiktoken/init"; -## Getting help +async function main() { + const wasm = "..."; // fetch the WASM binary somehow + await init((imports) => WebAssembly.instantiate(wasm, imports)); -Please post questions in the [issue tracker](https://github.com/openai/tiktoken/issues). + const encoding = get_encoding("cl100k_base"); + const tokens = encoding.encode("hello world"); + encoding.free(); +} -If you work at OpenAI, make sure to check the internal documentation or feel free to contact -@shantanu. +main(); +``` +## Compatibility -## Extending tiktoken +As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. -You may wish to extend `tiktoken` to support new encodings. There are two ways to do this. +| Runtime | Status | Notes | +| ---------------------------- | ------ | ------------------------------------------ | +| Node.js | ✅ | | +| Bun | ✅ | | +| Vite | ✅ | See [here](#vite) for notes | +| Next.js | ✅ | See [here](#nextjs) for notes | +| Create React App (via Craco) | ✅ | See [here](#create-react-app) for notes | +| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | +| Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | +| Deno | ❌ | Currently unsupported | +### [Vite](#vite) -**Create your `Encoding` object exactly the way you want and simply pass it around.** +If you are using Vite, you will need to add both the `vite-plugin-wasm` and `vite-plugin-top-level-await`. Add the following to your `vite.config.js`: -```python -cl100k_base = tiktoken.get_encoding("cl100k_base") +```js +import wasm from "vite-plugin-wasm"; +import topLevelAwait from "vite-plugin-top-level-await"; +import { defineConfig } from "vite"; -# In production, load the arguments directly instead of accessing private attributes -# See openai_public.py for examples of arguments for specific encodings -enc = tiktoken.Encoding( - # If you're changing the set of special tokens, make sure to use a different name - # It should be clear from the name what behaviour to expect. - name="cl100k_im", - pat_str=cl100k_base._pat_str, - mergeable_ranks=cl100k_base._mergeable_ranks, - special_tokens={ - **cl100k_base._special_tokens, - "<|im_start|>": 100264, - "<|im_end|>": 100265, - } -) +export default defineConfig({ + plugins: [wasm(), topLevelAwait()], +}); ``` -**Use the `tiktoken_ext` plugin mechanism to register your `Encoding` objects with `tiktoken`.** +### [Next.js](#nextjs) -This is only useful if you need `tiktoken.get_encoding` to find your encoding, otherwise prefer -option 1. +Both API routes and `/pages` are supported with the following `next.config.js` configuration. -To do this, you'll need to create a namespace package under `tiktoken_ext`. +```typescript +// next.config.json +const config = { + webpack(config, { isServer, dev }) { + config.experiments = { + asyncWebAssembly: true, + layers: true, + }; -Layout your project like this, making sure to omit the `tiktoken_ext/__init__.py` file: + return config; + }, +}; ``` -my_tiktoken_extension -├── tiktoken_ext -│ └── my_encodings.py -└── setup.py + +Usage in pages: + +```tsx +import { get_encoding } from "@dqbd/tiktoken"; +import { useState } from "react"; + +const encoding = get_encoding("cl100k_base"); + +export default function Home() { + const [input, setInput] = useState("hello world"); + const tokens = encoding.encode(input); + + return ( +
+ setInput(e.target.value)} + /> +
{tokens.toString()}
+
+ ); +} ``` -`my_encodings.py` should be a module that contains a variable named `ENCODING_CONSTRUCTORS`. -This is a dictionary from an encoding name to a function that takes no arguments and returns -arguments that can be passed to `tiktoken.Encoding` to construct that encoding. For an example, see -`tiktoken_ext/openai_public.py`. For precise details, see `tiktoken/registry.py`. - -Your `setup.py` should look something like this: -```python -from setuptools import setup, find_namespace_packages - -setup( - name="my_tiktoken_extension", - packages=find_namespace_packages(include=['tiktoken_ext*']), - install_requires=["tiktoken"], - ... -) +Usage in API routes: + +```typescript +import { get_encoding } from "@dqbd/tiktoken"; +import { NextApiRequest, NextApiResponse } from "next"; + +export default function handler(req: NextApiRequest, res: NextApiResponse) { + const encoding = get_encoding("cl100k_base"); + const tokens = encoding.encode("hello world"); + encoding.free(); + return res.status(200).json({ tokens }); +} ``` -Then simply `pip install ./my_tiktoken_extension` and you should be able to use your -custom encodings! Make sure **not** to use an editable install. +### [Create React App](#create-react-app) + +By default, the Webpack configugration found in Create React App does not support WASM ESM modules. To add support, please do the following: + +1. Swap `react-scripts` with `craco`, using the guide found here: https://craco.js.org/docs/getting-started/. +2. Add the following to `craco.config.js`: + +```js +module.exports = { + webpack: { + configure: (config) => { + config.experiments = { + asyncWebAssembly: true, + layers: true, + }; + + // turn off static file serving of WASM files + // we need to let Webpack handle WASM import + config.module.rules + .find((i) => "oneOf" in i) + .oneOf.find((i) => i.type === "asset/resource") + .exclude.push(/\.wasm$/); + + return config; + }, + }, +}; +``` + +### [Vercel Edge Runtime](#vercel-edge-runtime) + +Vercel Edge Runtime does support WASM modules by adding a `?module` suffix. Initialize the encoder with the following snippet: + +```typescript +// @ts-expect-error +import wasm from "@dqbd/tiktoken/lite/tiktoken_bg.wasm?module"; +import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; +import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; + +export const config = { runtime: "edge" }; + +export default async function (req: Request) { + await init((imports) => WebAssembly.instantiate(wasm, imports)); + + const encoding = new Tiktoken( + model.bpe_ranks, + model.special_tokens, + model.pat_str + ); + + const tokens = encoding.encode("hello world"); + encoding.free(); + + return new Response(`${tokens}`); +} +``` + +### [Cloudflare Workers](#cloudflare-workers) + +Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually and use the `@dqbd/tiktoken/lite` version to fit the 1 MB limit. However, users need to point directly at the WASM binary via a relative path (including `./node_modules/`). + +Add the following rule to the `wrangler.toml` to upload WASM during build: + +```toml +[[rules]] +globs = ["**/*.wasm"] +type = "CompiledWasm" +``` + +Initialize the encoder with the following snippet: + +```javascript +import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; +import wasm from "./node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm"; +import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; + +export default { + async fetch() { + await init((imports) => WebAssembly.instantiate(wasm, imports)); + const encoder = new Tiktoken( + model.bpe_ranks, + model.special_tokens, + model.pat_str + ); + const tokens = encoder.encode("test"); + encoder.free(); + return new Response(`${tokens}`); + }, +}; +``` + +## Acknowledgements + +- https://github.com/zurawiki/tiktoken-rs From b8e7817b52b8491f38c8458e8c187246ce04318f Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 8 May 2023 17:51:49 +0200 Subject: [PATCH 105/207] Remove duplicate README.md --- js/README.md | 285 ------------------------------------- js/scripts/post_process.ts | 2 +- 2 files changed, 1 insertion(+), 286 deletions(-) delete mode 100644 js/README.md diff --git a/js/README.md b/js/README.md deleted file mode 100644 index f9b1ac23..00000000 --- a/js/README.md +++ /dev/null @@ -1,285 +0,0 @@ -# ⏳ tiktoken - -tiktoken is a [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with -OpenAI's models, forked from the original tiktoken library to provide NPM bindings for Node and other JS runtimes. - -The open source version of `tiktoken` can be installed from NPM: - -``` -npm install @dqbd/tiktoken -``` - -## Usage - -Basic usage follows, which includes all the OpenAI encoders and ranks: - -```typescript -import assert from "node:assert"; -import { get_encoding, encoding_for_model } from "@dqbd/tiktoken"; - -const enc = get_encoding("gpt2"); -assert( - new TextDecoder().decode(enc.decode(enc.encode("hello world"))) === - "hello world" -); - -// To get the tokeniser corresponding to a specific model in the OpenAI API: -const enc = encoding_for_model("text-davinci-003"); - -// Extend existing encoding with custom special tokens -const enc = encoding_for_model("gpt2", { - "<|im_start|>": 100264, - "<|im_end|>": 100265, -}); - -// don't forget to free the encoder after it is not used -enc.free(); -``` - -In constrained environments (eg. Edge Runtime, Cloudflare Workers), where you don't want to load all the encoders at once, you can use the lightweight WASM binary via `@dqbd/tiktoken/lite`. - -```typescript -const { Tiktoken } = require("@dqbd/tiktoken/lite"); -const cl100k_base = require("@dqbd/tiktoken/encoders/cl100k_base.json"); - -const encoding = new Tiktoken( - cl100k_base.bpe_ranks, - cl100k_base.special_tokens, - cl100k_base.pat_str -); -const tokens = encoding.encode("hello world"); -encoding.free(); -``` - -If you want to fetch the latest ranks, use the `load` function: - -```typescript -const { Tiktoken } = require("@dqbd/tiktoken/lite"); -const { load } = require("@dqbd/tiktoken/load"); -const registry = require("@dqbd/tiktoken/registry.json"); -const models = require("@dqbd/tiktoken/model_to_encoding.json"); - -async function main() { - const model = await load(registry[models["gpt-3.5-turbo"]]); - const encoder = new Tiktoken( - model.bpe_ranks, - model.special_tokens, - model.pat_str - ); - const tokens = encoding.encode("hello world"); - encoder.free(); -} - -main(); -``` - -If desired, you can create a Tiktoken instance directly with custom ranks, special tokens and regex pattern: - -```typescript -import { Tiktoken } from "../pkg"; -import { readFileSync } from "fs"; - -const encoder = new Tiktoken( - readFileSync("./ranks/gpt2.tiktoken").toString("utf-8"), - { "<|endoftext|>": 50256, "<|im_start|>": 100264, "<|im_end|>": 100265 }, - "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+" -); -``` - -Finally, you can a custom `init` function to override the WASM initialization logic for non-Node environments. This is useful if you are using a bundler that does not support WASM ESM integration. - -```typescript -import { get_encoding, init } from "@dqbd/tiktoken/init"; - -async function main() { - const wasm = "..."; // fetch the WASM binary somehow - await init((imports) => WebAssembly.instantiate(wasm, imports)); - - const encoding = get_encoding("cl100k_base"); - const tokens = encoding.encode("hello world"); - encoding.free(); -} - -main(); -``` - -## Compatibility - -As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. - -| Runtime | Status | Notes | -| ---------------------------- | ------ | ------------------------------------------ | -| Node.js | ✅ | | -| Bun | ✅ | | -| Vite | ✅ | See [here](#vite) for notes | -| Next.js | ✅ | See [here](#nextjs) for notes | -| Create React App (via Craco) | ✅ | See [here](#create-react-app) for notes | -| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | -| Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | -| Deno | ❌ | Currently unsupported | - -### [Vite](#vite) - -If you are using Vite, you will need to add both the `vite-plugin-wasm` and `vite-plugin-top-level-await`. Add the following to your `vite.config.js`: - -```js -import wasm from "vite-plugin-wasm"; -import topLevelAwait from "vite-plugin-top-level-await"; -import { defineConfig } from "vite"; - -export default defineConfig({ - plugins: [wasm(), topLevelAwait()], -}); -``` - -### [Next.js](#nextjs) - -Both API routes and `/pages` are supported with the following `next.config.js` configuration. - -```typescript -// next.config.json -const config = { - webpack(config, { isServer, dev }) { - config.experiments = { - asyncWebAssembly: true, - layers: true, - }; - - return config; - }, -}; -``` - -Usage in pages: - -```tsx -import { get_encoding } from "@dqbd/tiktoken"; -import { useState } from "react"; - -const encoding = get_encoding("cl100k_base"); - -export default function Home() { - const [input, setInput] = useState("hello world"); - const tokens = encoding.encode(input); - - return ( -
- setInput(e.target.value)} - /> -
{tokens.toString()}
-
- ); -} -``` - -Usage in API routes: - -```typescript -import { get_encoding } from "@dqbd/tiktoken"; -import { NextApiRequest, NextApiResponse } from "next"; - -export default function handler(req: NextApiRequest, res: NextApiResponse) { - const encoding = get_encoding("cl100k_base"); - const tokens = encoding.encode("hello world"); - encoding.free(); - return res.status(200).json({ tokens }); -} -``` - -### [Create React App](#create-react-app) - -By default, the Webpack configugration found in Create React App does not support WASM ESM modules. To add support, please do the following: - -1. Swap `react-scripts` with `craco`, using the guide found here: https://craco.js.org/docs/getting-started/. -2. Add the following to `craco.config.js`: - -```js -module.exports = { - webpack: { - configure: (config) => { - config.experiments = { - asyncWebAssembly: true, - layers: true, - }; - - // turn off static file serving of WASM files - // we need to let Webpack handle WASM import - config.module.rules - .find((i) => "oneOf" in i) - .oneOf.find((i) => i.type === "asset/resource") - .exclude.push(/\.wasm$/); - - return config; - }, - }, -}; -``` - -### [Vercel Edge Runtime](#vercel-edge-runtime) - -Vercel Edge Runtime does support WASM modules by adding a `?module` suffix. Initialize the encoder with the following snippet: - -```typescript -// @ts-expect-error -import wasm from "@dqbd/tiktoken/lite/tiktoken_bg.wasm?module"; -import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; -import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; - -export const config = { runtime: "edge" }; - -export default async function (req: Request) { - await init((imports) => WebAssembly.instantiate(wasm, imports)); - - const encoding = new Tiktoken( - model.bpe_ranks, - model.special_tokens, - model.pat_str - ); - - const tokens = encoding.encode("hello world"); - encoding.free(); - - return new Response(`${tokens}`); -} -``` - -### [Cloudflare Workers](#cloudflare-workers) - -Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually and use the `@dqbd/tiktoken/lite` version to fit the 1 MB limit. However, users need to point directly at the WASM binary via a relative path (including `./node_modules/`). - -Add the following rule to the `wrangler.toml` to upload WASM during build: - -```toml -[[rules]] -globs = ["**/*.wasm"] -type = "CompiledWasm" -``` - -Initialize the encoder with the following snippet: - -```javascript -import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; -import wasm from "./node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm"; -import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; - -export default { - async fetch() { - await init((imports) => WebAssembly.instantiate(wasm, imports)); - const encoder = new Tiktoken( - model.bpe_ranks, - model.special_tokens, - model.pat_str - ); - const tokens = encoder.encode("test"); - encoder.free(); - return new Response(`${tokens}`); - }, -}; -``` - -## Acknowledgements - -- https://github.com/zurawiki/tiktoken-rs diff --git a/js/scripts/post_process.ts b/js/scripts/post_process.ts index 9b7255de..34db13b5 100644 --- a/js/scripts/post_process.ts +++ b/js/scripts/post_process.ts @@ -380,7 +380,7 @@ for (const baseDir of [ } fs.copyFileSync( - path.resolve(__dirname, "../README.md"), + path.resolve(__dirname, "../../README.md"), path.resolve(__dirname, "../dist/README.md") ); From 83de511cc73fb640063996a35e9be0ea663e514e Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 10 May 2023 11:25:58 +0200 Subject: [PATCH 106/207] Add weak refs --- js/package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/package.json b/js/package.json index 627881c8..5557f654 100644 --- a/js/package.json +++ b/js/package.json @@ -9,8 +9,8 @@ "build:rank": "tsx scripts/inline_ranks.ts", "build:wasm": "run-s wasm:*", "build:postprocess": "tsx scripts/post_process.ts", - "wasm:bundler": "wasm-pack build --target bundler --release --out-dir dist && rm -rf dist/.gitignore dist/README.md dist/package.json", - "wasm:lite": "wasm-pack build --target bundler --release --out-dir dist/lite --no-default-features && rm -rf dist/lite/.gitignore dist/lite/README.md dist/lite/package.json", + "wasm:bundler": "wasm-pack build --target bundler --weak-refs --release --out-dir dist && rm -rf dist/.gitignore dist/README.md dist/package.json", + "wasm:lite": "wasm-pack build --target bundler --weak-refs --release --out-dir dist/lite --no-default-features && rm -rf dist/lite/.gitignore dist/lite/README.md dist/lite/package.json", "test": "yarn vitest" }, "repository": { From 243c061fa9004c0698dda3a03020c2ae56df9add Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 10 May 2023 11:56:24 +0200 Subject: [PATCH 107/207] Add changesets folder --- js/.changeset/config.json | 11 + js/package.json | 1 + js/yarn.lock | 1173 ++++++++++++++++++++++++++++++++++++- 3 files changed, 1179 insertions(+), 6 deletions(-) create mode 100644 js/.changeset/config.json diff --git a/js/.changeset/config.json b/js/.changeset/config.json new file mode 100644 index 00000000..6d2119a4 --- /dev/null +++ b/js/.changeset/config.json @@ -0,0 +1,11 @@ +{ + "$schema": "https://unpkg.com/@changesets/config@2.3.0/schema.json", + "changelog": "@changesets/cli/changelog", + "commit": false, + "fixed": [], + "linked": [], + "access": "restricted", + "baseBranch": "main", + "updateInternalDependencies": "patch", + "ignore": [] +} diff --git a/js/package.json b/js/package.json index 5557f654..bae9787e 100644 --- a/js/package.json +++ b/js/package.json @@ -19,6 +19,7 @@ }, "dependencies": {}, "devDependencies": { + "@changesets/cli": "^2.26.1", "@types/node": "^18.14.4", "npm-run-all": "^4.1.5", "outdent": "^0.8.0", diff --git a/js/yarn.lock b/js/yarn.lock index a60606e4..b207017e 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -2,6 +2,234 @@ # yarn lockfile v1 +"@babel/code-frame@^7.0.0": + version "7.21.4" + resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.21.4.tgz#d0fa9e4413aca81f2b23b9442797bda1826edb39" + integrity sha512-LYvhNKfwWSPpocw8GI7gpK2nq3HSDuEPC/uSYaALSJu9xjsalaaYFOq0Pwt5KmVqwEbZlDu81aLXwBOmD/Fv9g== + dependencies: + "@babel/highlight" "^7.18.6" + +"@babel/helper-validator-identifier@^7.18.6": + version "7.19.1" + resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.19.1.tgz#7eea834cf32901ffdc1a7ee555e2f9c27e249ca2" + integrity sha512-awrNfaMtnHUr653GgGEs++LlAvW6w+DcPrOliSMXWCKo597CwL5Acf/wWdNkf/tfEQE3mjkeD1YOVZOUV/od1w== + +"@babel/highlight@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.18.6.tgz#81158601e93e2563795adcbfbdf5d64be3f2ecdf" + integrity sha512-u7stbOuYjaPezCuLj29hNW1v64M2Md2qupEKP1fHc7WdOA3DgLh37suiSrZYY7haUB7iBeQZ9P1uiRF359do3g== + dependencies: + "@babel/helper-validator-identifier" "^7.18.6" + chalk "^2.0.0" + js-tokens "^4.0.0" + +"@babel/runtime@^7.20.1", "@babel/runtime@^7.5.5": + version "7.21.5" + resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.21.5.tgz#8492dddda9644ae3bda3b45eabe87382caee7200" + integrity sha512-8jI69toZqqcsnqGGqwGS4Qb1VwLOEp4hz+CXPywcvjs60u3B4Pom/U/7rm4W8tMOYEB+E9wgD0mW1l3r8qlI9Q== + dependencies: + regenerator-runtime "^0.13.11" + +"@changesets/apply-release-plan@^6.1.3": + version "6.1.3" + resolved "https://registry.yarnpkg.com/@changesets/apply-release-plan/-/apply-release-plan-6.1.3.tgz#3bcc0bd57ba00d50d20df7d0141f1a9b2134eaf7" + integrity sha512-ECDNeoc3nfeAe1jqJb5aFQX7CqzQhD2klXRez2JDb/aVpGUbX673HgKrnrgJRuQR/9f2TtLoYIzrGB9qwD77mg== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/config" "^2.3.0" + "@changesets/get-version-range-type" "^0.3.2" + "@changesets/git" "^2.0.0" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + detect-indent "^6.0.0" + fs-extra "^7.0.1" + lodash.startcase "^4.4.0" + outdent "^0.5.0" + prettier "^2.7.1" + resolve-from "^5.0.0" + semver "^5.4.1" + +"@changesets/assemble-release-plan@^5.2.3": + version "5.2.3" + resolved "https://registry.yarnpkg.com/@changesets/assemble-release-plan/-/assemble-release-plan-5.2.3.tgz#5ce6191c6e193d40b566a7b0e01690cfb106f4db" + integrity sha512-g7EVZCmnWz3zMBAdrcKhid4hkHT+Ft1n0mLussFMcB1dE2zCuwcvGoy9ec3yOgPGF4hoMtgHaMIk3T3TBdvU9g== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/errors" "^0.1.4" + "@changesets/get-dependents-graph" "^1.3.5" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + semver "^5.4.1" + +"@changesets/changelog-git@^0.1.14": + version "0.1.14" + resolved "https://registry.yarnpkg.com/@changesets/changelog-git/-/changelog-git-0.1.14.tgz#852caa7727dcf91497c131d05bc2cd6248532ada" + integrity sha512-+vRfnKtXVWsDDxGctOfzJsPhaCdXRYoe+KyWYoq5X/GqoISREiat0l3L8B0a453B2B4dfHGcZaGyowHbp9BSaA== + dependencies: + "@changesets/types" "^5.2.1" + +"@changesets/cli@^2.26.1": + version "2.26.1" + resolved "https://registry.yarnpkg.com/@changesets/cli/-/cli-2.26.1.tgz#2d10858d7d32314a524e383111c96d831eb0402f" + integrity sha512-XnTa+b51vt057fyAudvDKGB0Sh72xutQZNAdXkCqPBKO2zvs2yYZx5hFZj1u9cbtpwM6Sxtcr02/FQJfZOzemQ== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/apply-release-plan" "^6.1.3" + "@changesets/assemble-release-plan" "^5.2.3" + "@changesets/changelog-git" "^0.1.14" + "@changesets/config" "^2.3.0" + "@changesets/errors" "^0.1.4" + "@changesets/get-dependents-graph" "^1.3.5" + "@changesets/get-release-plan" "^3.0.16" + "@changesets/git" "^2.0.0" + "@changesets/logger" "^0.0.5" + "@changesets/pre" "^1.0.14" + "@changesets/read" "^0.5.9" + "@changesets/types" "^5.2.1" + "@changesets/write" "^0.2.3" + "@manypkg/get-packages" "^1.1.3" + "@types/is-ci" "^3.0.0" + "@types/semver" "^6.0.0" + ansi-colors "^4.1.3" + chalk "^2.1.0" + enquirer "^2.3.0" + external-editor "^3.1.0" + fs-extra "^7.0.1" + human-id "^1.0.2" + is-ci "^3.0.1" + meow "^6.0.0" + outdent "^0.5.0" + p-limit "^2.2.0" + preferred-pm "^3.0.0" + resolve-from "^5.0.0" + semver "^5.4.1" + spawndamnit "^2.0.0" + term-size "^2.1.0" + tty-table "^4.1.5" + +"@changesets/config@^2.3.0": + version "2.3.0" + resolved "https://registry.yarnpkg.com/@changesets/config/-/config-2.3.0.tgz#bff074d6492fa772cee139f9a04efa4cd56445bb" + integrity sha512-EgP/px6mhCx8QeaMAvWtRrgyxW08k/Bx2tpGT+M84jEdX37v3VKfh4Cz1BkwrYKuMV2HZKeHOh8sHvja/HcXfQ== + dependencies: + "@changesets/errors" "^0.1.4" + "@changesets/get-dependents-graph" "^1.3.5" + "@changesets/logger" "^0.0.5" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + fs-extra "^7.0.1" + micromatch "^4.0.2" + +"@changesets/errors@^0.1.4": + version "0.1.4" + resolved "https://registry.yarnpkg.com/@changesets/errors/-/errors-0.1.4.tgz#f79851746c43679a66b383fdff4c012f480f480d" + integrity sha512-HAcqPF7snsUJ/QzkWoKfRfXushHTu+K5KZLJWPb34s4eCZShIf8BFO3fwq6KU8+G7L5KdtN2BzQAXOSXEyiY9Q== + dependencies: + extendable-error "^0.1.5" + +"@changesets/get-dependents-graph@^1.3.5": + version "1.3.5" + resolved "https://registry.yarnpkg.com/@changesets/get-dependents-graph/-/get-dependents-graph-1.3.5.tgz#f94c6672d2f9a87aa35512eea74550585ba41c21" + integrity sha512-w1eEvnWlbVDIY8mWXqWuYE9oKhvIaBhzqzo4ITSJY9hgoqQ3RoBqwlcAzg11qHxv/b8ReDWnMrpjpKrW6m1ZTA== + dependencies: + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + chalk "^2.1.0" + fs-extra "^7.0.1" + semver "^5.4.1" + +"@changesets/get-release-plan@^3.0.16": + version "3.0.16" + resolved "https://registry.yarnpkg.com/@changesets/get-release-plan/-/get-release-plan-3.0.16.tgz#5d9cfc4ffda02c496ef0fde407210de8e3a0fb19" + integrity sha512-OpP9QILpBp1bY2YNIKFzwigKh7Qe9KizRsZomzLe6pK8IUo8onkAAVUD8+JRKSr8R7d4+JRuQrfSSNlEwKyPYg== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/assemble-release-plan" "^5.2.3" + "@changesets/config" "^2.3.0" + "@changesets/pre" "^1.0.14" + "@changesets/read" "^0.5.9" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + +"@changesets/get-version-range-type@^0.3.2": + version "0.3.2" + resolved "https://registry.yarnpkg.com/@changesets/get-version-range-type/-/get-version-range-type-0.3.2.tgz#8131a99035edd11aa7a44c341cbb05e668618c67" + integrity sha512-SVqwYs5pULYjYT4op21F2pVbcrca4qA/bAA3FmFXKMN7Y+HcO8sbZUTx3TAy2VXulP2FACd1aC7f2nTuqSPbqg== + +"@changesets/git@^2.0.0": + version "2.0.0" + resolved "https://registry.yarnpkg.com/@changesets/git/-/git-2.0.0.tgz#8de57649baf13a86eb669a25fa51bcad5cea517f" + integrity sha512-enUVEWbiqUTxqSnmesyJGWfzd51PY4H7mH9yUw0hPVpZBJ6tQZFMU3F3mT/t9OJ/GjyiM4770i+sehAn6ymx6A== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/errors" "^0.1.4" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + is-subdir "^1.1.1" + micromatch "^4.0.2" + spawndamnit "^2.0.0" + +"@changesets/logger@^0.0.5": + version "0.0.5" + resolved "https://registry.yarnpkg.com/@changesets/logger/-/logger-0.0.5.tgz#68305dd5a643e336be16a2369cb17cdd8ed37d4c" + integrity sha512-gJyZHomu8nASHpaANzc6bkQMO9gU/ib20lqew1rVx753FOxffnCrJlGIeQVxNWCqM+o6OOleCo/ivL8UAO5iFw== + dependencies: + chalk "^2.1.0" + +"@changesets/parse@^0.3.16": + version "0.3.16" + resolved "https://registry.yarnpkg.com/@changesets/parse/-/parse-0.3.16.tgz#f8337b70aeb476dc81745ab3294022909bc4a84a" + integrity sha512-127JKNd167ayAuBjUggZBkmDS5fIKsthnr9jr6bdnuUljroiERW7FBTDNnNVyJ4l69PzR57pk6mXQdtJyBCJKg== + dependencies: + "@changesets/types" "^5.2.1" + js-yaml "^3.13.1" + +"@changesets/pre@^1.0.14": + version "1.0.14" + resolved "https://registry.yarnpkg.com/@changesets/pre/-/pre-1.0.14.tgz#9df73999a4d15804da7381358d77bb37b00ddf0f" + integrity sha512-dTsHmxQWEQekHYHbg+M1mDVYFvegDh9j/kySNuDKdylwfMEevTeDouR7IfHNyVodxZXu17sXoJuf2D0vi55FHQ== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/errors" "^0.1.4" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + fs-extra "^7.0.1" + +"@changesets/read@^0.5.9": + version "0.5.9" + resolved "https://registry.yarnpkg.com/@changesets/read/-/read-0.5.9.tgz#a1b63a82b8e9409738d7a0f9cc39b6d7c28cbab0" + integrity sha512-T8BJ6JS6j1gfO1HFq50kU3qawYxa4NTbI/ASNVVCBTsKquy2HYwM9r7ZnzkiMe8IEObAJtUVGSrePCOxAK2haQ== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/git" "^2.0.0" + "@changesets/logger" "^0.0.5" + "@changesets/parse" "^0.3.16" + "@changesets/types" "^5.2.1" + chalk "^2.1.0" + fs-extra "^7.0.1" + p-filter "^2.1.0" + +"@changesets/types@^4.0.1": + version "4.1.0" + resolved "https://registry.yarnpkg.com/@changesets/types/-/types-4.1.0.tgz#fb8f7ca2324fd54954824e864f9a61a82cb78fe0" + integrity sha512-LDQvVDv5Kb50ny2s25Fhm3d9QSZimsoUGBsUioj6MC3qbMUCuC8GPIvk/M6IvXx3lYhAs0lwWUQLb+VIEUCECw== + +"@changesets/types@^5.2.1": + version "5.2.1" + resolved "https://registry.yarnpkg.com/@changesets/types/-/types-5.2.1.tgz#a228c48004aa8a93bce4be2d1d31527ef3bf21f6" + integrity sha512-myLfHbVOqaq9UtUKqR/nZA/OY7xFjQMdfgfqeZIBK4d0hA6pgxArvdv8M+6NUzzBsjWLOtvApv8YHr4qM+Kpfg== + +"@changesets/write@^0.2.3": + version "0.2.3" + resolved "https://registry.yarnpkg.com/@changesets/write/-/write-0.2.3.tgz#baf6be8ada2a67b9aba608e251bfea4fdc40bc63" + integrity sha512-Dbamr7AIMvslKnNYsLFafaVORx4H0pvCA2MHqgtNCySMe1blImEyAEOzDmcgKAkgz4+uwoLz7demIrX+JBr/Xw== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/types" "^5.2.1" + fs-extra "^7.0.1" + human-id "^1.0.2" + prettier "^2.7.1" + "@esbuild-kit/cjs-loader@^2.4.2": version "2.4.2" resolved "https://registry.yarnpkg.com/@esbuild-kit/cjs-loader/-/cjs-loader-2.4.2.tgz#cb4dde00fbf744a68c4f20162ea15a8242d0fa54" @@ -246,6 +474,28 @@ resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.17.10.tgz#ac779220f2da96afd480fb3f3148a292f66e7fc3" integrity sha512-oP+zFUjYNaMNmjTwlFtWep85hvwUu19cZklB3QsBOcZSs6y7hmH4LNCJ7075bsqzYaNvZFXJlAVaQ2ApITDXtw== +"@manypkg/find-root@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@manypkg/find-root/-/find-root-1.1.0.tgz#a62d8ed1cd7e7d4c11d9d52a8397460b5d4ad29f" + integrity sha512-mki5uBvhHzO8kYYix/WRy2WX8S3B5wdVSc9D6KcU5lQNglP2yt58/VfLuAK49glRXChosY8ap2oJ1qgma3GUVA== + dependencies: + "@babel/runtime" "^7.5.5" + "@types/node" "^12.7.1" + find-up "^4.1.0" + fs-extra "^8.1.0" + +"@manypkg/get-packages@^1.1.3": + version "1.1.3" + resolved "https://registry.yarnpkg.com/@manypkg/get-packages/-/get-packages-1.1.3.tgz#e184db9bba792fa4693de4658cfb1463ac2c9c47" + integrity sha512-fo+QhuU3qE/2TQMQmbVMqaQ6EWbMhi4ABWP+O4AM1NqPBuy0OrApV5LO6BrrgnhtAHS2NH6RrVk9OL181tTi8A== + dependencies: + "@babel/runtime" "^7.5.5" + "@changesets/types" "^4.0.1" + "@manypkg/find-root" "^1.1.0" + fs-extra "^8.1.0" + globby "^11.0.0" + read-yaml-file "^1.1.0" + "@nodelib/fs.scandir@2.1.5": version "2.1.5" resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" @@ -289,16 +539,43 @@ resolved "https://registry.yarnpkg.com/@types/chai/-/chai-4.3.4.tgz#e913e8175db8307d78b4e8fa690408ba6b65dee4" integrity sha512-KnRanxnpfpjUTqTCXslZSEdLfXExwgNxYPdiO2WGUj8+HDjFi8R3k5RVKPeSCzLjCcshCAtVO2QBbVuAV4kTnw== +"@types/is-ci@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@types/is-ci/-/is-ci-3.0.0.tgz#7e8910af6857601315592436f030aaa3ed9783c3" + integrity sha512-Q0Op0hdWbYd1iahB+IFNQcWXFq4O0Q5MwQP7uN0souuQ4rPg1vEYcnIOfr1gY+M+6rc8FGoRaBO1mOOvL29sEQ== + dependencies: + ci-info "^3.1.0" + +"@types/minimist@^1.2.0": + version "1.2.2" + resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.2.tgz#ee771e2ba4b3dc5b372935d549fd9617bf345b8c" + integrity sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ== + "@types/node@*": version "18.14.1" resolved "https://registry.yarnpkg.com/@types/node/-/node-18.14.1.tgz#90dad8476f1e42797c49d6f8b69aaf9f876fc69f" integrity sha512-QH+37Qds3E0eDlReeboBxfHbX9omAcBCXEzswCu6jySP642jiM3cYSIkU/REqwhCUqXdonHFuBfJDiAJxMNhaQ== +"@types/node@^12.7.1": + version "12.20.55" + resolved "https://registry.yarnpkg.com/@types/node/-/node-12.20.55.tgz#c329cbd434c42164f846b909bd6f85b5537f6240" + integrity sha512-J8xLz7q2OFulZ2cyGTLE1TbbZcjpno7FaN6zdJNrgAdrJ+DZzh/uFR6YrTb4C+nXakvud8Q4+rbhoIWlYQbUFQ== + "@types/node@^18.14.4": version "18.14.4" resolved "https://registry.yarnpkg.com/@types/node/-/node-18.14.4.tgz#0e64ec0b35a772e1e3d849f9a0ff61782d0cb647" integrity sha512-VhCw7I7qO2X49+jaKcAUwi3rR+hbxT5VcYF493+Z5kMLI0DL568b7JI4IDJaxWFH0D/xwmGJNoXisyX+w7GH/g== +"@types/normalize-package-data@^2.4.0": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz#d3357479a0fdfdd5907fe67e17e0a85c906e1301" + integrity sha512-Gj7cI7z+98M282Tqmp2K5EIsoouUEzbBJhQQzDE3jSIRk6r9gsz0oUokqIUR4u1R3dMHo0pDHM7sNOHyhulypw== + +"@types/semver@^6.0.0": + version "6.2.3" + resolved "https://registry.yarnpkg.com/@types/semver/-/semver-6.2.3.tgz#5798ecf1bec94eaa64db39ee52808ec0693315aa" + integrity sha512-KQf+QAMWKMrtBMsB8/24w53tEsxllMj6TuA80TT/5igJalLI/zm0L3oXRbIAl4Ohfc85gyHX/jhMwsVkmhLU4A== + "@vitest/expect@0.28.5": version "0.28.5" resolved "https://registry.yarnpkg.com/@vitest/expect/-/expect-0.28.5.tgz#d5a6eccd014e9ad66fe87a20d16426a2815c0e8a" @@ -345,6 +622,11 @@ acorn@^8.8.1, acorn@^8.8.2: resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.8.2.tgz#1b2f25db02af965399b9776b0c2c391276d37c4a" integrity sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw== +ansi-colors@^4.1.1, ansi-colors@^4.1.3: + version "4.1.3" + resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.3.tgz#37611340eb2243e70cc604cad35d63270d48781b" + integrity sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw== + ansi-regex@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304" @@ -362,6 +644,13 @@ ansi-styles@^3.2.1: dependencies: color-convert "^1.9.0" +ansi-styles@^4.0.0, ansi-styles@^4.1.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937" + integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg== + dependencies: + color-convert "^2.0.1" + ansi-styles@^5.0.0: version "5.2.0" resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-5.2.0.tgz#07449690ad45777d1924ac2abb2fc8895dba836b" @@ -372,6 +661,33 @@ ansi-styles@^6.0.0: resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-6.2.1.tgz#0e62320cf99c21afff3b3012192546aacbfb05c5" integrity sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug== +argparse@^1.0.7: + version "1.0.10" + resolved "https://registry.yarnpkg.com/argparse/-/argparse-1.0.10.tgz#bcd6791ea5ae09725e17e5ad988134cd40b3d911" + integrity sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg== + dependencies: + sprintf-js "~1.0.2" + +array-union@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/array-union/-/array-union-2.1.0.tgz#b798420adbeb1de828d84acd8a2e23d3efe85e8d" + integrity sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw== + +array.prototype.flat@^1.2.3: + version "1.3.1" + resolved "https://registry.yarnpkg.com/array.prototype.flat/-/array.prototype.flat-1.3.1.tgz#ffc6576a7ca3efc2f46a143b9d1dda9b4b3cf5e2" + integrity sha512-roTU0KWIOmJ4DRLmwKd19Otg0/mT3qPNt0Qb3GWW8iObuZXxrjB/pzn0R3hqpRSWg4HCwqx+0vwOnWnvlOyeIA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + es-abstract "^1.20.4" + es-shim-unscopables "^1.0.0" + +arrify@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/arrify/-/arrify-1.0.1.tgz#898508da2226f380df904728456849c1501a4b0d" + integrity sha512-3CYzex9M9FGQjCGMGyi6/31c8GJbgb0qGyrx5HWxPd0aCwh4cB2YjMb2Xf9UuoogrMrlO9cTqnB5rI5GHZTcUA== + assertion-error@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/assertion-error/-/assertion-error-1.1.0.tgz#e60b6b0e8f301bd97e5375215bda406c85118c0b" @@ -387,6 +703,13 @@ balanced-match@^1.0.0: resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== +better-path-resolve@1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/better-path-resolve/-/better-path-resolve-1.0.0.tgz#13a35a1104cdd48a7b74bf8758f96a1ee613f99d" + integrity sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g== + dependencies: + is-windows "^1.0.0" + brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" @@ -409,6 +732,13 @@ braces@^3.0.2: dependencies: fill-range "^7.0.1" +breakword@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/breakword/-/breakword-1.0.5.tgz#fd420a417f55016736b5b615161cae1c8f819810" + integrity sha512-ex5W9DoOQ/LUEU3PMdLs9ua/CYZl1678NUkKOdUSi8Aw5F1idieaiRURCBFJCwVcrD1J8Iy3vfWSloaMwO2qFg== + dependencies: + wcwidth "^1.0.1" + buffer-from@^1.0.0: version "1.1.2" resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5" @@ -427,6 +757,20 @@ call-bind@^1.0.0, call-bind@^1.0.2: function-bind "^1.1.1" get-intrinsic "^1.0.2" +camelcase-keys@^6.2.2: + version "6.2.2" + resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-6.2.2.tgz#5e755d6ba51aa223ec7d3d52f25778210f9dc3c0" + integrity sha512-YrwaA0vEKazPBkn0ipTiMpSajYDSe+KjQfrjhcBMxJt/znbvlHd8Pw/Vamaz5EB4Wfhs3SUR3Z9mwRu/P3s3Yg== + dependencies: + camelcase "^5.3.1" + map-obj "^4.0.0" + quick-lru "^4.0.1" + +camelcase@^5.0.0, camelcase@^5.3.1: + version "5.3.1" + resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320" + integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg== + chai@^4.3.7: version "4.3.7" resolved "https://registry.yarnpkg.com/chai/-/chai-4.3.7.tgz#ec63f6df01829088e8bf55fca839bcd464a8ec51" @@ -440,7 +784,7 @@ chai@^4.3.7: pathval "^1.1.1" type-detect "^4.0.5" -chalk@^2.4.1: +chalk@^2.0.0, chalk@^2.1.0, chalk@^2.4.1: version "2.4.2" resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424" integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ== @@ -449,11 +793,29 @@ chalk@^2.4.1: escape-string-regexp "^1.0.5" supports-color "^5.3.0" +chalk@^4.1.2: + version "4.1.2" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01" + integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== + dependencies: + ansi-styles "^4.1.0" + supports-color "^7.1.0" + +chardet@^0.7.0: + version "0.7.0" + resolved "https://registry.yarnpkg.com/chardet/-/chardet-0.7.0.tgz#90094849f0937f2eedc2425d0d28a9e5f0cbad9e" + integrity sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA== + check-error@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/check-error/-/check-error-1.0.2.tgz#574d312edd88bb5dd8912e9286dd6c0aed4aac82" integrity sha512-BrgHpW9NURQgzoNyjfq0Wu6VFO6D7IZEmJNdtgNqpzGG8RuNFHt2jQxWlAs4HMe119chBnv+34syEZtc6IhLtA== +ci-info@^3.1.0, ci-info@^3.2.0: + version "3.8.0" + resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.8.0.tgz#81408265a5380c929f0bc665d62256628ce9ef91" + integrity sha512-eXTggHWSooYhq49F2opQhuHWgzucfF2YgODK4e1566GQs5BIfP30B0oenwBJHfWxAs2fyPB1s7Mg949zLf61Yw== + cli-truncate@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/cli-truncate/-/cli-truncate-3.1.0.tgz#3f23ab12535e3d73e839bb43e73c9de487db1389" @@ -462,6 +824,29 @@ cli-truncate@^3.1.0: slice-ansi "^5.0.0" string-width "^5.0.0" +cliui@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/cliui/-/cliui-6.0.0.tgz#511d702c0c4e41ca156d7d0e96021f23e13225b1" + integrity sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ== + dependencies: + string-width "^4.2.0" + strip-ansi "^6.0.0" + wrap-ansi "^6.2.0" + +cliui@^8.0.1: + version "8.0.1" + resolved "https://registry.yarnpkg.com/cliui/-/cliui-8.0.1.tgz#0c04b075db02cbfe60dc8e6cf2f5486b1a3608aa" + integrity sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ== + dependencies: + string-width "^4.2.0" + strip-ansi "^6.0.1" + wrap-ansi "^7.0.0" + +clone@^1.0.2: + version "1.0.4" + resolved "https://registry.yarnpkg.com/clone/-/clone-1.0.4.tgz#da309cc263df15994c688ca902179ca3c7cd7c7e" + integrity sha512-JQHZ2QMW6l3aH/j6xCqQThY/9OH4D/9ls34cgkUBiEeocRTU04tHfKPBsUK1PqZCUQM7GiA0IIXJSuXHI64Kbg== + code-block-writer@^11.0.3: version "11.0.3" resolved "https://registry.yarnpkg.com/code-block-writer/-/code-block-writer-11.0.3.tgz#9eec2993edfb79bfae845fbc093758c0a0b73b76" @@ -474,16 +859,37 @@ color-convert@^1.9.0: dependencies: color-name "1.1.3" +color-convert@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" + integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ== + dependencies: + color-name "~1.1.4" + color-name@1.1.3: version "1.1.3" resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.3.tgz#a7d0558bd89c42f795dd42328f740831ca53bc25" integrity sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw== +color-name@~1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" + integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== + concat-map@0.0.1: version "0.0.1" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg== +cross-spawn@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-5.1.0.tgz#e8bd0efee58fcff6f8f94510a0a554bbfa235449" + integrity sha512-pTgQJ5KC0d2hcY8eyL1IzlBPYjTkyH72XRZPnLyKus2mBfNjQs3klqbJU2VILqZryAZUt9JOb3h/mWMy23/f5A== + dependencies: + lru-cache "^4.0.1" + shebang-command "^1.2.0" + which "^1.2.9" + cross-spawn@^6.0.5: version "6.0.5" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4" @@ -495,6 +901,31 @@ cross-spawn@^6.0.5: shebang-command "^1.2.0" which "^1.2.9" +csv-generate@^3.4.3: + version "3.4.3" + resolved "https://registry.yarnpkg.com/csv-generate/-/csv-generate-3.4.3.tgz#bc42d943b45aea52afa896874291da4b9108ffff" + integrity sha512-w/T+rqR0vwvHqWs/1ZyMDWtHHSJaN06klRqJXBEpDJaM/+dZkso0OKh1VcuuYvK3XM53KysVNq8Ko/epCK8wOw== + +csv-parse@^4.16.3: + version "4.16.3" + resolved "https://registry.yarnpkg.com/csv-parse/-/csv-parse-4.16.3.tgz#7ca624d517212ebc520a36873c3478fa66efbaf7" + integrity sha512-cO1I/zmz4w2dcKHVvpCr7JVRu8/FymG5OEpmvsZYlccYolPBLoVGKUHgNoc4ZGkFeFlWGEDmMyBM+TTqRdW/wg== + +csv-stringify@^5.6.5: + version "5.6.5" + resolved "https://registry.yarnpkg.com/csv-stringify/-/csv-stringify-5.6.5.tgz#c6d74badda4b49a79bf4e72f91cce1e33b94de00" + integrity sha512-PjiQ659aQ+fUTQqSrd1XEDnOr52jh30RBurfzkscaE2tPaFsDH5wOAHJiw8XAHphRknCwMUE9KRayc4K/NbO8A== + +csv@^5.5.3: + version "5.5.3" + resolved "https://registry.yarnpkg.com/csv/-/csv-5.5.3.tgz#cd26c1e45eae00ce6a9b7b27dcb94955ec95207d" + integrity sha512-QTaY0XjjhTQOdguARF0lGKm5/mEq9PD9/VhZZegHDIBq2tQwgNpHc3dneD4mGo2iJs+fTKv5Bp0fZ+BRuY3Z0g== + dependencies: + csv-generate "^3.4.3" + csv-parse "^4.16.3" + csv-stringify "^5.6.5" + stream-transform "^2.1.3" + debug@^4.3.4: version "4.3.4" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" @@ -502,6 +933,19 @@ debug@^4.3.4: dependencies: ms "2.1.2" +decamelize-keys@^1.1.0: + version "1.1.1" + resolved "https://registry.yarnpkg.com/decamelize-keys/-/decamelize-keys-1.1.1.tgz#04a2d523b2f18d80d0158a43b895d56dff8d19d8" + integrity sha512-WiPxgEirIV0/eIOMcnFBA3/IJZAZqKnwAwWyvvdi4lsr1WCN22nhdf/3db3DoZcUjTV2SqfzIwNyp6y2xs3nmg== + dependencies: + decamelize "^1.1.0" + map-obj "^1.0.0" + +decamelize@^1.1.0, decamelize@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290" + integrity sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA== + deep-eql@^4.1.2: version "4.1.3" resolved "https://registry.yarnpkg.com/deep-eql/-/deep-eql-4.1.3.tgz#7c7775513092f7df98d8df9996dd085eb668cc6d" @@ -509,6 +953,13 @@ deep-eql@^4.1.2: dependencies: type-detect "^4.0.0" +defaults@^1.0.3: + version "1.0.4" + resolved "https://registry.yarnpkg.com/defaults/-/defaults-1.0.4.tgz#b0b02062c1e2aa62ff5d9528f0f98baa90978d7a" + integrity sha512-eFuaLoy/Rxalv2kr+lqMlUnrDWV+3j4pljOIJgLIhI058IQfWJ7vXhyEIHu+HtC738klGALYxOKDO0bQP3tg8A== + dependencies: + clone "^1.0.2" + define-properties@^1.1.3, define-properties@^1.1.4: version "1.2.0" resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.2.0.tgz#52988570670c9eacedd8064f4a990f2405849bd5" @@ -517,21 +968,45 @@ define-properties@^1.1.3, define-properties@^1.1.4: has-property-descriptors "^1.0.0" object-keys "^1.1.1" +detect-indent@^6.0.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-6.1.0.tgz#592485ebbbf6b3b1ab2be175c8393d04ca0d57e6" + integrity sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA== + diff@^5.1.0: version "5.1.0" resolved "https://registry.yarnpkg.com/diff/-/diff-5.1.0.tgz#bc52d298c5ea8df9194800224445ed43ffc87e40" integrity sha512-D+mk+qE8VC/PAUrlAU34N+VfXev0ghe5ywmpqrawphmVZc1bEfn56uo9qpyGp1p4xpzOHkSW4ztBd6L7Xx4ACw== +dir-glob@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-3.0.1.tgz#56dbf73d992a4a93ba1584f4534063fd2e41717f" + integrity sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA== + dependencies: + path-type "^4.0.0" + eastasianwidth@^0.2.0: version "0.2.0" resolved "https://registry.yarnpkg.com/eastasianwidth/-/eastasianwidth-0.2.0.tgz#696ce2ec0aa0e6ea93a397ffcf24aa7840c827cb" integrity sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA== +emoji-regex@^8.0.0: + version "8.0.0" + resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37" + integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A== + emoji-regex@^9.2.2: version "9.2.2" resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72" integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg== +enquirer@^2.3.0: + version "2.3.6" + resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d" + integrity sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg== + dependencies: + ansi-colors "^4.1.1" + error-ex@^1.3.1: version "1.3.2" resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.2.tgz#b4ac40648107fdcdcfae242f428bea8a14d4f1bf" @@ -587,6 +1062,13 @@ es-set-tostringtag@^2.0.1: has "^1.0.3" has-tostringtag "^1.0.0" +es-shim-unscopables@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/es-shim-unscopables/-/es-shim-unscopables-1.0.0.tgz#702e632193201e3edf8713635d083d378e510241" + integrity sha512-Jm6GPcCdC30eMLbZ2x8z2WuRwAws3zTBBKuusffYVUrNj/GVSUAZ+xKMaUpfNDR5IbyNA5LJbaecoUVbmUcB1w== + dependencies: + has "^1.0.3" + es-to-primitive@^1.2.1: version "1.2.1" resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a" @@ -652,12 +1134,36 @@ esbuild@~0.17.6: "@esbuild/win32-ia32" "0.17.10" "@esbuild/win32-x64" "0.17.10" +escalade@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.1.tgz#d8cfdc7000965c5a0174b4a82eaa5c0552742e40" + integrity sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw== + escape-string-regexp@^1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" integrity sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg== -fast-glob@^3.2.12: +esprima@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71" + integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A== + +extendable-error@^0.1.5: + version "0.1.7" + resolved "https://registry.yarnpkg.com/extendable-error/-/extendable-error-0.1.7.tgz#60b9adf206264ac920058a7395685ae4670c2b96" + integrity sha512-UOiS2in6/Q0FK0R0q6UY9vYpQ21mr/Qn1KOnte7vsACuNJf514WvCCUHSRCPcgjPT2bAhNIJdlE6bVap1GKmeg== + +external-editor@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/external-editor/-/external-editor-3.1.0.tgz#cb03f740befae03ea4d283caed2741a83f335495" + integrity sha512-hMQ4CX1p1izmuLYyZqLMO/qGNw10wSv9QDCPfzXfyFrOaCSSoRfqE1Kf1s5an66J5JZC62NewG+mK49jOCtQew== + dependencies: + chardet "^0.7.0" + iconv-lite "^0.4.24" + tmp "^0.0.33" + +fast-glob@^3.2.12, fast-glob@^3.2.9: version "3.2.12" resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.12.tgz#7f39ec99c2e6ab030337142da9e0c18f37afae80" integrity sha512-DVj4CQIYYow0BlaelwK1pHl5n5cRSJfM60UA0zK891sVInoPri2Ekj7+e1CT3/3qxXenpI+nBBmQAcJPJgaj4w== @@ -682,6 +1188,30 @@ fill-range@^7.0.1: dependencies: to-regex-range "^5.0.1" +find-up@^4.0.0, find-up@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/find-up/-/find-up-4.1.0.tgz#97afe7d6cdc0bc5928584b7c8d7b16e8a9aa5d19" + integrity sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw== + dependencies: + locate-path "^5.0.0" + path-exists "^4.0.0" + +find-up@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc" + integrity sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng== + dependencies: + locate-path "^6.0.0" + path-exists "^4.0.0" + +find-yarn-workspace-root2@1.2.16: + version "1.2.16" + resolved "https://registry.yarnpkg.com/find-yarn-workspace-root2/-/find-yarn-workspace-root2-1.2.16.tgz#60287009dd2f324f59646bdb4b7610a6b301c2a9" + integrity sha512-hr6hb1w8ePMpPVUK39S4RlwJzi+xPLuVuG8XlwXU3KD5Yn3qgBWVfy3AzNlDhWvE1EORCE65/Qm26rFQt3VLVA== + dependencies: + micromatch "^4.0.2" + pkg-dir "^4.2.0" + for-each@^0.3.3: version "0.3.3" resolved "https://registry.yarnpkg.com/for-each/-/for-each-0.3.3.tgz#69b447e88a0a5d32c3e7084f3f1710034b21376e" @@ -689,6 +1219,24 @@ for-each@^0.3.3: dependencies: is-callable "^1.1.3" +fs-extra@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-7.0.1.tgz#4f189c44aa123b895f722804f55ea23eadc348e9" + integrity sha512-YJDaCJZEnBmcbw13fvdAM9AwNOJwOzrE4pqMqBq5nFiEqXUqHwlK4B+3pUw6JNvfSPtX05xFHtYy/1ni01eGCw== + dependencies: + graceful-fs "^4.1.2" + jsonfile "^4.0.0" + universalify "^0.1.0" + +fs-extra@^8.1.0: + version "8.1.0" + resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-8.1.0.tgz#49d43c45a88cd9677668cb7be1b46efdb8d2e1c0" + integrity sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g== + dependencies: + graceful-fs "^4.2.0" + jsonfile "^4.0.0" + universalify "^0.1.0" + fsevents@~2.3.2: version "2.3.2" resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a" @@ -714,6 +1262,11 @@ functions-have-names@^1.2.2: resolved "https://registry.yarnpkg.com/functions-have-names/-/functions-have-names-1.2.3.tgz#0404fe4ee2ba2f607f0e0ec3c80bae994133b834" integrity sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ== +get-caller-file@^2.0.1, get-caller-file@^2.0.5: + version "2.0.5" + resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e" + integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg== + get-func-name@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/get-func-name/-/get-func-name-2.0.0.tgz#ead774abee72e20409433a066366023dd6887a41" @@ -755,6 +1308,18 @@ globalthis@^1.0.3: dependencies: define-properties "^1.1.3" +globby@^11.0.0: + version "11.1.0" + resolved "https://registry.yarnpkg.com/globby/-/globby-11.1.0.tgz#bd4be98bb042f83d796f7e3811991fbe82a0d34b" + integrity sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g== + dependencies: + array-union "^2.1.0" + dir-glob "^3.0.1" + fast-glob "^3.2.9" + ignore "^5.2.0" + merge2 "^1.4.1" + slash "^3.0.0" + gopd@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/gopd/-/gopd-1.0.1.tgz#29ff76de69dac7489b7c0918a5788e56477c332c" @@ -767,6 +1332,21 @@ graceful-fs@^4.1.2: resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.10.tgz#147d3a006da4ca3ce14728c7aefc287c367d7a6c" integrity sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA== +graceful-fs@^4.1.5, graceful-fs@^4.1.6, graceful-fs@^4.2.0: + version "4.2.11" + resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3" + integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ== + +grapheme-splitter@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz#9cf3a665c6247479896834af35cf1dbb4400767e" + integrity sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ== + +hard-rejection@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/hard-rejection/-/hard-rejection-2.1.0.tgz#1c6eda5c1685c63942766d79bb40ae773cecd883" + integrity sha512-VIZB+ibDhx7ObhAe7OVtoEbuP4h/MuOTHJ+J8h/eBXotJYl0fBgR72xDFCKgIh22OJZIOVNxBMWuhAr10r8HdA== + has-bigints@^1.0.1, has-bigints@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/has-bigints/-/has-bigints-1.0.2.tgz#0871bd3e3d51626f6ca0966668ba35d5602d6eaa" @@ -777,6 +1357,11 @@ has-flag@^3.0.0: resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-3.0.0.tgz#b5d454dc2199ae225699f3467e5a07f3b955bafd" integrity sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw== +has-flag@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" + integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== + has-property-descriptors@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/has-property-descriptors/-/has-property-descriptors-1.0.0.tgz#610708600606d36961ed04c196193b6a607fa861" @@ -813,6 +1398,28 @@ hosted-git-info@^2.1.4: resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.9.tgz#dffc0bf9a21c02209090f2aa69429e1414daf3f9" integrity sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw== +human-id@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/human-id/-/human-id-1.0.2.tgz#e654d4b2b0d8b07e45da9f6020d8af17ec0a5df3" + integrity sha512-UNopramDEhHJD+VR+ehk8rOslwSfByxPIZyJRfV739NDhN5LF1fa1MqnzKm2lGTQRjNrjK19Q5fhkgIfjlVUKw== + +iconv-lite@^0.4.24: + version "0.4.24" + resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b" + integrity sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA== + dependencies: + safer-buffer ">= 2.1.2 < 3" + +ignore@^5.2.0: + version "5.2.4" + resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.2.4.tgz#a291c0c6178ff1b960befe47fcdec301674a6324" + integrity sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ== + +indent-string@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251" + integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg== + internal-slot@^1.0.4: version "1.0.5" resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.5.tgz#f2a2ee21f668f8627a4667f309dc0f4fb6674986" @@ -856,6 +1463,13 @@ is-callable@^1.1.3, is-callable@^1.1.4, is-callable@^1.2.7: resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.2.7.tgz#3bc2a85ea742d9e36205dcacdd72ca1fdc51b055" integrity sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA== +is-ci@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/is-ci/-/is-ci-3.0.1.tgz#db6ecbed1bd659c43dac0f45661e7674103d1867" + integrity sha512-ZYvCgrefwqoQ6yTyYUbQu64HsITZ3NfKX1lzaEYdkTDcfKzzCI/wthRRYKkdjHKFVgNiXKAKm65Zo1pk2as/QQ== + dependencies: + ci-info "^3.2.0" + is-core-module@^2.9.0: version "2.11.0" resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.11.0.tgz#ad4cb3e3863e814523c96f3f58d26cc570ff0144" @@ -875,6 +1489,11 @@ is-extglob@^2.1.1: resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" integrity sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ== +is-fullwidth-code-point@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d" + integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg== + is-fullwidth-code-point@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-4.0.0.tgz#fae3167c729e7463f8461ce512b080a49268aa88" @@ -904,6 +1523,11 @@ is-number@^7.0.0: resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b" integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng== +is-plain-obj@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e" + integrity sha512-yvkRyxmFKEOQ4pNXCmJG5AEQNlXJS5LaONXo5/cLdTZdWvsZ1ioJEonLGAosKlMWE8lwUy/bJzMjcw8az73+Fg== + is-regex@^1.1.4: version "1.1.4" resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.4.tgz#eef5663cd59fa4c0ae339505323df6854bb15958" @@ -926,6 +1550,13 @@ is-string@^1.0.5, is-string@^1.0.7: dependencies: has-tostringtag "^1.0.0" +is-subdir@^1.1.1: + version "1.2.0" + resolved "https://registry.yarnpkg.com/is-subdir/-/is-subdir-1.2.0.tgz#b791cd28fab5202e91a08280d51d9d7254fd20d4" + integrity sha512-2AT6j+gXe/1ueqbW6fLZJiIw3F8iXGJtt0yDrZaBhAZEG1raiTxKWU+IPqMCzQAXOUCKdA4UDMgacKH25XG2Cw== + dependencies: + better-path-resolve "1.0.0" + is-symbol@^1.0.2, is-symbol@^1.0.3: version "1.0.4" resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.4.tgz#a6dac93b635b063ca6872236de88910a57af139c" @@ -951,21 +1582,66 @@ is-weakref@^1.0.2: dependencies: call-bind "^1.0.2" +is-windows@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d" + integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA== + isexe@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw== +js-tokens@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499" + integrity sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ== + +js-yaml@^3.13.0, js-yaml@^3.13.1, js-yaml@^3.6.1: + version "3.14.1" + resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.14.1.tgz#dae812fdb3825fa306609a8717383c50c36a0537" + integrity sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g== + dependencies: + argparse "^1.0.7" + esprima "^4.0.0" + json-parse-better-errors@^1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9" integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw== +json-parse-even-better-errors@^2.3.0: + version "2.3.1" + resolved "https://registry.yarnpkg.com/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz#7c47805a94319928e05777405dc12e1f7a4ee02d" + integrity sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w== + jsonc-parser@^3.2.0: version "3.2.0" resolved "https://registry.yarnpkg.com/jsonc-parser/-/jsonc-parser-3.2.0.tgz#31ff3f4c2b9793f89c67212627c51c6394f88e76" integrity sha512-gfFQZrcTc8CnKXp6Y4/CBT3fTc0OVuDofpre4aEeEpSBPV5X5v4+Vmx+8snU7RLPrNHPKSgLxGo9YuQzz20o+w== +jsonfile@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-4.0.0.tgz#8771aae0799b64076b76640fca058f9c10e33ecb" + integrity sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg== + optionalDependencies: + graceful-fs "^4.1.6" + +kind-of@^6.0.3: + version "6.0.3" + resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd" + integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw== + +kleur@^4.1.5: + version "4.1.5" + resolved "https://registry.yarnpkg.com/kleur/-/kleur-4.1.5.tgz#95106101795f7050c6c650f350c683febddb1780" + integrity sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ== + +lines-and-columns@^1.1.6: + version "1.2.4" + resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" + integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg== + load-json-file@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-4.0.0.tgz#2f5f45ab91e33216234fd53adab668eb4ec0993b" @@ -976,11 +1652,40 @@ load-json-file@^4.0.0: pify "^3.0.0" strip-bom "^3.0.0" +load-yaml-file@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/load-yaml-file/-/load-yaml-file-0.2.0.tgz#af854edaf2bea89346c07549122753c07372f64d" + integrity sha512-OfCBkGEw4nN6JLtgRidPX6QxjBQGQf72q3si2uvqyFEMbycSFFHwAZeXx6cJgFM9wmLrf9zBwCP3Ivqa+LLZPw== + dependencies: + graceful-fs "^4.1.5" + js-yaml "^3.13.0" + pify "^4.0.1" + strip-bom "^3.0.0" + local-pkg@^0.4.2: version "0.4.3" resolved "https://registry.yarnpkg.com/local-pkg/-/local-pkg-0.4.3.tgz#0ff361ab3ae7f1c19113d9bb97b98b905dbc4963" integrity sha512-SFppqq5p42fe2qcZQqqEOiVRXl+WCP1MdT6k7BDEW1j++sp5fIY+/fdRQitvKgB5BrBcmrs5m/L0v2FrU5MY1g== +locate-path@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-5.0.0.tgz#1afba396afd676a6d42504d0a67a3a7eb9f62aa0" + integrity sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g== + dependencies: + p-locate "^4.1.0" + +locate-path@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-6.0.0.tgz#55321eb309febbc59c4801d931a72452a681d286" + integrity sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw== + dependencies: + p-locate "^5.0.0" + +lodash.startcase@^4.4.0: + version "4.4.0" + resolved "https://registry.yarnpkg.com/lodash.startcase/-/lodash.startcase-4.4.0.tgz#9436e34ed26093ed7ffae1936144350915d9add8" + integrity sha512-+WKqsK294HMSc2jEbNgpHpd0JfIBhp7rEV4aqXWqFr6AlXov+SlcgB1Fv01y2kGe3Gc8nMW7VA0SrGuSkRfIEg== + loupe@^2.3.1, loupe@^2.3.6: version "2.3.6" resolved "https://registry.yarnpkg.com/loupe/-/loupe-2.3.6.tgz#76e4af498103c532d1ecc9be102036a21f787b53" @@ -988,17 +1693,52 @@ loupe@^2.3.1, loupe@^2.3.6: dependencies: get-func-name "^2.0.0" +lru-cache@^4.0.1: + version "4.1.5" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-4.1.5.tgz#8bbe50ea85bed59bc9e33dcab8235ee9bcf443cd" + integrity sha512-sWZlbEP2OsHNkXrMl5GYk/jKk70MBng6UU4YI/qGDYbgf6YbP4EvmqISbXCoJiRKs+1bSpFHVgQxvJ17F2li5g== + dependencies: + pseudomap "^1.0.2" + yallist "^2.1.2" + +map-obj@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-1.0.1.tgz#d933ceb9205d82bdcf4886f6742bdc2b4dea146d" + integrity sha512-7N/q3lyZ+LVCp7PzuxrJr4KMbBE2hW7BT7YNia330OFxIf4d3r5zVpicP2650l7CPN6RM9zOJRl3NGpqSiw3Eg== + +map-obj@^4.0.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-4.3.0.tgz#9304f906e93faae70880da102a9f1df0ea8bb05a" + integrity sha512-hdN1wVrZbb29eBGiGjJbeP8JbKjq1urkHJ/LIP/NY48MZ1QVXUsQBV1G1zvYFHn1XE06cwjBsOI2K3Ulnj1YXQ== + memorystream@^0.3.1: version "0.3.1" resolved "https://registry.yarnpkg.com/memorystream/-/memorystream-0.3.1.tgz#86d7090b30ce455d63fbae12dda51a47ddcaf9b2" integrity sha512-S3UwM3yj5mtUSEfP41UZmt/0SCoVYUcU1rkXv+BQ5Ig8ndL4sPoJNBUJERafdPb5jjHJGuMgytgKvKIf58XNBw== -merge2@^1.3.0: +meow@^6.0.0: + version "6.1.1" + resolved "https://registry.yarnpkg.com/meow/-/meow-6.1.1.tgz#1ad64c4b76b2a24dfb2f635fddcadf320d251467" + integrity sha512-3YffViIt2QWgTy6Pale5QpopX/IvU3LPL03jOTqp6pGj3VjesdO/U8CuHMKpnQr4shCNCM5fd5XFFvIIl6JBHg== + dependencies: + "@types/minimist" "^1.2.0" + camelcase-keys "^6.2.2" + decamelize-keys "^1.1.0" + hard-rejection "^2.1.0" + minimist-options "^4.0.2" + normalize-package-data "^2.5.0" + read-pkg-up "^7.0.1" + redent "^3.0.0" + trim-newlines "^3.0.0" + type-fest "^0.13.1" + yargs-parser "^18.1.3" + +merge2@^1.3.0, merge2@^1.4.1: version "1.4.1" resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae" integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg== -micromatch@^4.0.4: +micromatch@^4.0.2, micromatch@^4.0.4: version "4.0.5" resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.5.tgz#bc8999a7cbbf77cdc89f132f6e467051b49090c6" integrity sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA== @@ -1006,6 +1746,11 @@ micromatch@^4.0.4: braces "^3.0.2" picomatch "^2.3.1" +min-indent@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869" + integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg== + minimatch@^3.0.4: version "3.1.2" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" @@ -1020,6 +1765,20 @@ minimatch@^5.1.0: dependencies: brace-expansion "^2.0.1" +minimist-options@^4.0.2: + version "4.1.0" + resolved "https://registry.yarnpkg.com/minimist-options/-/minimist-options-4.1.0.tgz#c0655713c53a8a2ebd77ffa247d342c40f010619" + integrity sha512-Q4r8ghd80yhO/0j1O3B2BjweX3fiHg9cdOwjJd2J76Q135c+NDxGCqdYKQ1SKBuFfgWbAUzBfvYjPUEeNgqN1A== + dependencies: + arrify "^1.0.1" + is-plain-obj "^1.1.0" + kind-of "^6.0.3" + +mixme@^0.5.1: + version "0.5.9" + resolved "https://registry.yarnpkg.com/mixme/-/mixme-0.5.9.tgz#a5a58e17354632179ff3ce5b0fc130899c8ba81c" + integrity sha512-VC5fg6ySUscaWUpI4gxCBTQMH2RdUpNrk+MsbpCYtIvf9SBJdiUey4qE7BXviJsJR4nDQxCZ+3yaYNW3guz/Pw== + mkdirp@^1.0.4: version "1.0.4" resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e" @@ -1050,7 +1809,7 @@ nice-try@^1.0.4: resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366" integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ== -normalize-package-data@^2.3.2: +normalize-package-data@^2.3.2, normalize-package-data@^2.5.0: version "2.5.0" resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8" integrity sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA== @@ -1095,11 +1854,42 @@ object.assign@^4.1.4: has-symbols "^1.0.3" object-keys "^1.1.1" +os-tmpdir@~1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz#bbe67406c79aa85c5cfec766fe5734555dfa1274" + integrity sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g== + +outdent@^0.5.0: + version "0.5.0" + resolved "https://registry.yarnpkg.com/outdent/-/outdent-0.5.0.tgz#9e10982fdc41492bb473ad13840d22f9655be2ff" + integrity sha512-/jHxFIzoMXdqPzTaCpFzAAWhpkSjZPF4Vsn6jAfNpmbH/ymsmd7Qc6VE9BGn0L6YMj6uwpQLxCECpus4ukKS9Q== + outdent@^0.8.0: version "0.8.0" resolved "https://registry.yarnpkg.com/outdent/-/outdent-0.8.0.tgz#2ebc3e77bf49912543f1008100ff8e7f44428eb0" integrity sha512-KiOAIsdpUTcAXuykya5fnVVT+/5uS0Q1mrkRHcF89tpieSmY33O/tmc54CqwA+bfhbtEfZUNLHaPUiB9X3jt1A== +p-filter@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/p-filter/-/p-filter-2.1.0.tgz#1b1472562ae7a0f742f0f3d3d3718ea66ff9c09c" + integrity sha512-ZBxxZ5sL2HghephhpGAQdoskxplTwr7ICaehZwLIlfL6acuVgZPm8yBNuRAFBGEqtD/hmUeq9eqLg2ys9Xr/yw== + dependencies: + p-map "^2.0.0" + +p-limit@^2.2.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-2.3.0.tgz#3dd33c647a214fdfffd835933eb086da0dc21db1" + integrity sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w== + dependencies: + p-try "^2.0.0" + +p-limit@^3.0.2: + version "3.1.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b" + integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ== + dependencies: + yocto-queue "^0.1.0" + p-limit@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-4.0.0.tgz#914af6544ed32bfa54670b061cafcbd04984b644" @@ -1107,6 +1897,30 @@ p-limit@^4.0.0: dependencies: yocto-queue "^1.0.0" +p-locate@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-4.1.0.tgz#a3428bb7088b3a60292f66919278b7c297ad4f07" + integrity sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A== + dependencies: + p-limit "^2.2.0" + +p-locate@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-5.0.0.tgz#83c8315c6785005e3bd021839411c9e110e6d834" + integrity sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw== + dependencies: + p-limit "^3.0.2" + +p-map@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/p-map/-/p-map-2.1.0.tgz#310928feef9c9ecc65b68b17693018a665cea175" + integrity sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw== + +p-try@^2.0.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6" + integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ== + parse-json@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-4.0.0.tgz#be35f5425be1f7f6c747184f98a788cb99477ee0" @@ -1115,11 +1929,26 @@ parse-json@^4.0.0: error-ex "^1.3.1" json-parse-better-errors "^1.0.1" +parse-json@^5.0.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-5.2.0.tgz#c76fc66dee54231c962b22bcc8a72cf2f99753cd" + integrity sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg== + dependencies: + "@babel/code-frame" "^7.0.0" + error-ex "^1.3.1" + json-parse-even-better-errors "^2.3.0" + lines-and-columns "^1.1.6" + path-browserify@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/path-browserify/-/path-browserify-1.0.1.tgz#d98454a9c3753d5790860f16f68867b9e46be1fd" integrity sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g== +path-exists@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3" + integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w== + path-key@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40" @@ -1137,6 +1966,11 @@ path-type@^3.0.0: dependencies: pify "^3.0.0" +path-type@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b" + integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw== + pathe@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/pathe/-/pathe-1.1.0.tgz#e2e13f6c62b31a3289af4ba19886c230f295ec03" @@ -1167,6 +2001,18 @@ pify@^3.0.0: resolved "https://registry.yarnpkg.com/pify/-/pify-3.0.0.tgz#e5a4acd2c101fdf3d9a4d07f0dbc4db49dd28176" integrity sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg== +pify@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/pify/-/pify-4.0.1.tgz#4b2cd25c50d598735c50292224fd8c6df41e3231" + integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g== + +pkg-dir@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3" + integrity sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ== + dependencies: + find-up "^4.0.0" + pkg-types@^1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/pkg-types/-/pkg-types-1.0.2.tgz#c233efc5210a781e160e0cafd60c0d0510a4b12e" @@ -1185,6 +2031,21 @@ postcss@^8.4.21: picocolors "^1.0.0" source-map-js "^1.0.2" +preferred-pm@^3.0.0: + version "3.0.3" + resolved "https://registry.yarnpkg.com/preferred-pm/-/preferred-pm-3.0.3.tgz#1b6338000371e3edbce52ef2e4f65eb2e73586d6" + integrity sha512-+wZgbxNES/KlJs9q40F/1sfOd/j7f1O9JaHcW5Dsn3aUUOZg3L2bjpVUcKV2jvtElYfoTuQiNeMfQJ4kwUAhCQ== + dependencies: + find-up "^5.0.0" + find-yarn-workspace-root2 "1.2.16" + path-exists "^4.0.0" + which-pm "2.0.0" + +prettier@^2.7.1: + version "2.8.8" + resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.8.8.tgz#e8c5d7e98a4305ffe3de2e1fc4aca1a71c28b1da" + integrity sha512-tdN8qQGvNjw4CHbY+XXk0JgCXn9QiF21a55rBe5LJAU+kDyC4WQn4+awm2Xfk2lQMk5fKup9XgzTZtGkjBdP9Q== + pretty-format@^27.5.1: version "27.5.1" resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.5.1.tgz#2181879fdea51a7a5851fb39d920faa63f01d88e" @@ -1194,16 +2055,35 @@ pretty-format@^27.5.1: ansi-styles "^5.0.0" react-is "^17.0.1" +pseudomap@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/pseudomap/-/pseudomap-1.0.2.tgz#f052a28da70e618917ef0a8ac34c1ae5a68286b3" + integrity sha512-b/YwNhb8lk1Zz2+bXXpS/LK9OisiZZ1SNsSLxN1x2OXVEhW2Ckr/7mWE5vrC1ZTiJlD9g19jWszTmJsB+oEpFQ== + queue-microtask@^1.2.2: version "1.2.3" resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243" integrity sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A== +quick-lru@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-4.0.1.tgz#5b8878f113a58217848c6482026c73e1ba57727f" + integrity sha512-ARhCpm70fzdcvNQfPoy49IaanKkTlRWF2JMzqhcJbhSFRZv7nPTvZJdcY7301IPmvW+/p0RgIWnQDLJxifsQ7g== + react-is@^17.0.1: version "17.0.2" resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0" integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w== +read-pkg-up@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-7.0.1.tgz#f3a6135758459733ae2b95638056e1854e7ef507" + integrity sha512-zK0TB7Xd6JpCLmlLmufqykGE+/TlOePD6qKClNW7hHDKFh/J7/7gCWGR7joEQEW1bKq3a3yUZSObOoWLFQ4ohg== + dependencies: + find-up "^4.1.0" + read-pkg "^5.2.0" + type-fest "^0.8.1" + read-pkg@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-3.0.0.tgz#9cbc686978fee65d16c00e2b19c237fcf6e38389" @@ -1213,6 +2093,39 @@ read-pkg@^3.0.0: normalize-package-data "^2.3.2" path-type "^3.0.0" +read-pkg@^5.2.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-5.2.0.tgz#7bf295438ca5a33e56cd30e053b34ee7250c93cc" + integrity sha512-Ug69mNOpfvKDAc2Q8DRpMjjzdtrnv9HcSMX+4VsZxD1aZ6ZzrIE7rlzXBtWTyhULSMKg076AW6WR5iZpD0JiOg== + dependencies: + "@types/normalize-package-data" "^2.4.0" + normalize-package-data "^2.5.0" + parse-json "^5.0.0" + type-fest "^0.6.0" + +read-yaml-file@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/read-yaml-file/-/read-yaml-file-1.1.0.tgz#9362bbcbdc77007cc8ea4519fe1c0b821a7ce0d8" + integrity sha512-VIMnQi/Z4HT2Fxuwg5KrY174U1VdUIASQVWXXyqtNRtxSr9IYkn1rsI6Tb6HsrHCmB7gVpNwX6JxPTHcH6IoTA== + dependencies: + graceful-fs "^4.1.5" + js-yaml "^3.6.1" + pify "^4.0.1" + strip-bom "^3.0.0" + +redent@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/redent/-/redent-3.0.0.tgz#e557b7998316bb53c9f1f56fa626352c6963059f" + integrity sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg== + dependencies: + indent-string "^4.0.0" + strip-indent "^3.0.0" + +regenerator-runtime@^0.13.11: + version "0.13.11" + resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz#f6dca3e7ceec20590d07ada785636a90cdca17f9" + integrity sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg== + regexp.prototype.flags@^1.4.3: version "1.4.3" resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.4.3.tgz#87cab30f80f66660181a3bb7bf5981a872b367ac" @@ -1222,6 +2135,21 @@ regexp.prototype.flags@^1.4.3: define-properties "^1.1.3" functions-have-names "^1.2.2" +require-directory@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" + integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q== + +require-main-filename@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-2.0.0.tgz#d0b329ecc7cc0f61649f62215be69af54aa8989b" + integrity sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg== + +resolve-from@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-5.0.0.tgz#c35225843df8f776df21c57557bc087e9dfdfc69" + integrity sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw== + resolve@^1.10.0, resolve@^1.22.1: version "1.22.1" resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.1.tgz#27cb2ebb53f91abb49470a928bba7558066ac177" @@ -1259,11 +2187,21 @@ safe-regex-test@^1.0.0: get-intrinsic "^1.1.3" is-regex "^1.1.4" -"semver@2 || 3 || 4 || 5", semver@^5.5.0: +"safer-buffer@>= 2.1.2 < 3": + version "2.1.2" + resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" + integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== + +"semver@2 || 3 || 4 || 5", semver@^5.4.1, semver@^5.5.0: version "5.7.1" resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7" integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ== +set-blocking@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7" + integrity sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw== + shebang-command@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-1.2.0.tgz#44aac65b695b03398968c39f363fee5deafdf1ea" @@ -1295,6 +2233,16 @@ siginfo@^2.0.0: resolved "https://registry.yarnpkg.com/siginfo/-/siginfo-2.0.0.tgz#32e76c70b79724e3bb567cb9d543eb858ccfaf30" integrity sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g== +signal-exit@^3.0.2: + version "3.0.7" + resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9" + integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== + +slash@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634" + integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q== + slice-ansi@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-5.0.0.tgz#b73063c57aa96f9cd881654b15294d95d285c42a" @@ -1303,6 +2251,18 @@ slice-ansi@^5.0.0: ansi-styles "^6.0.0" is-fullwidth-code-point "^4.0.0" +smartwrap@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/smartwrap/-/smartwrap-2.0.2.tgz#7e25d3dd58b51c6ca4aba3a9e391650ea62698a4" + integrity sha512-vCsKNQxb7PnCNd2wY1WClWifAc2lwqsG8OaswpJkVJsvMGcnEntdTCDajZCkk93Ay1U3t/9puJmb525Rg5MZBA== + dependencies: + array.prototype.flat "^1.2.3" + breakword "^1.0.5" + grapheme-splitter "^1.0.4" + strip-ansi "^6.0.0" + wcwidth "^1.0.1" + yargs "^15.1.0" + source-map-js@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.0.2.tgz#adbc361d9c62df380125e7f161f71c826f1e490c" @@ -1321,6 +2281,14 @@ source-map@^0.6.0, source-map@^0.6.1: resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263" integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g== +spawndamnit@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/spawndamnit/-/spawndamnit-2.0.0.tgz#9f762ac5c3476abb994b42ad592b5ad22bb4b0ad" + integrity sha512-j4JKEcncSjFlqIwU5L/rp2N5SIPsdxaRsIv678+TZxZ0SRDJTm8JrxJMjE/XuiEZNEir3S8l0Fa3Ke339WI4qA== + dependencies: + cross-spawn "^5.1.0" + signal-exit "^3.0.2" + spdx-correct@^3.0.0: version "3.2.0" resolved "https://registry.yarnpkg.com/spdx-correct/-/spdx-correct-3.2.0.tgz#4f5ab0668f0059e34f9c00dce331784a12de4e9c" @@ -1347,6 +2315,11 @@ spdx-license-ids@^3.0.0: resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.12.tgz#69077835abe2710b65f03969898b6637b505a779" integrity sha512-rr+VVSXtRhO4OHbXUiAF7xW3Bo9DuuF6C5jH+q/x15j2jniycgKbxU09Hr0WqlSLUs4i4ltHGXqTe7VHclYWyA== +sprintf-js@~1.0.2: + version "1.0.3" + resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c" + integrity sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g== + stackback@0.0.2: version "0.0.2" resolved "https://registry.yarnpkg.com/stackback/-/stackback-0.0.2.tgz#1ac8a0d9483848d1695e418b6d031a3c3ce68e3b" @@ -1357,6 +2330,22 @@ std-env@^3.3.1: resolved "https://registry.yarnpkg.com/std-env/-/std-env-3.3.2.tgz#af27343b001616015534292178327b202b9ee955" integrity sha512-uUZI65yrV2Qva5gqE0+A7uVAvO40iPo6jGhs7s8keRfHCmtg+uB2X6EiLGCI9IgL1J17xGhvoOqSz79lzICPTA== +stream-transform@^2.1.3: + version "2.1.3" + resolved "https://registry.yarnpkg.com/stream-transform/-/stream-transform-2.1.3.tgz#a1c3ecd72ddbf500aa8d342b0b9df38f5aa598e3" + integrity sha512-9GHUiM5hMiCi6Y03jD2ARC1ettBXkQBoQAe7nJsPknnI0ow10aXjTnew8QtYQmLjzn974BnmWEAJgCY6ZP1DeQ== + dependencies: + mixme "^0.5.1" + +string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + string-width@^5.0.0: version "5.1.2" resolved "https://registry.yarnpkg.com/string-width/-/string-width-5.1.2.tgz#14f8daec6d81e7221d2a357e668cab73bdbca794" @@ -1393,6 +2382,13 @@ string.prototype.trimstart@^1.0.6: define-properties "^1.1.4" es-abstract "^1.20.4" +strip-ansi@^6.0.0, strip-ansi@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + strip-ansi@^7.0.1: version "7.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.0.1.tgz#61740a08ce36b61e50e65653f07060d000975fb2" @@ -1405,6 +2401,13 @@ strip-bom@^3.0.0: resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-3.0.0.tgz#2334c18e9c759f7bdd56fdef7e9ae3d588e68ed3" integrity sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA== +strip-indent@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-3.0.0.tgz#c32e1cee940b6b3432c771bc2c54bcce73cd3001" + integrity sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ== + dependencies: + min-indent "^1.0.0" + strip-literal@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/strip-literal/-/strip-literal-1.0.1.tgz#0115a332710c849b4e46497891fb8d585e404bd2" @@ -1419,11 +2422,23 @@ supports-color@^5.3.0: dependencies: has-flag "^3.0.0" +supports-color@^7.1.0: + version "7.2.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da" + integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw== + dependencies: + has-flag "^4.0.0" + supports-preserve-symlinks-flag@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== +term-size@^2.1.0: + version "2.2.1" + resolved "https://registry.yarnpkg.com/term-size/-/term-size-2.2.1.tgz#2a6a54840432c2fb6320fea0f415531e90189f54" + integrity sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg== + tinybench@^2.3.1: version "2.3.1" resolved "https://registry.yarnpkg.com/tinybench/-/tinybench-2.3.1.tgz#14f64e6b77d7ef0b1f6ab850c7a808c6760b414d" @@ -1439,6 +2454,13 @@ tinyspy@^1.0.2: resolved "https://registry.yarnpkg.com/tinyspy/-/tinyspy-1.1.1.tgz#0cb91d5157892af38cb2d217f5c7e8507a5bf092" integrity sha512-UVq5AXt/gQlti7oxoIg5oi/9r0WpF7DGEVwXgqWSMmyN16+e3tl5lIvTaOpJ3TAtu5xFzWccFRM4R5NaWHF+4g== +tmp@^0.0.33: + version "0.0.33" + resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.33.tgz#6d34335889768d21b2bcda0aa277ced3b1bfadf9" + integrity sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw== + dependencies: + os-tmpdir "~1.0.2" + to-regex-range@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4" @@ -1446,6 +2468,11 @@ to-regex-range@^5.0.1: dependencies: is-number "^7.0.0" +trim-newlines@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.1.tgz#260a5d962d8b752425b32f3a7db0dcacd176c144" + integrity sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw== + ts-morph@^17.0.1: version "17.0.1" resolved "https://registry.yarnpkg.com/ts-morph/-/ts-morph-17.0.1.tgz#d85df4fcf9a1fcda1b331d52c00655f381c932d1" @@ -1465,11 +2492,39 @@ tsx@^3.12.3: optionalDependencies: fsevents "~2.3.2" +tty-table@^4.1.5: + version "4.2.1" + resolved "https://registry.yarnpkg.com/tty-table/-/tty-table-4.2.1.tgz#c06cd76c54542acf4e2b4a0e9a5802984b65cba6" + integrity sha512-xz0uKo+KakCQ+Dxj1D/tKn2FSyreSYWzdkL/BYhgN6oMW808g8QRMuh1atAV9fjTPbWBjfbkKQpI/5rEcnAc7g== + dependencies: + chalk "^4.1.2" + csv "^5.5.3" + kleur "^4.1.5" + smartwrap "^2.0.2" + strip-ansi "^6.0.1" + wcwidth "^1.0.1" + yargs "^17.7.1" + type-detect@^4.0.0, type-detect@^4.0.5: version "4.0.8" resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c" integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g== +type-fest@^0.13.1: + version "0.13.1" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.13.1.tgz#0172cb5bce80b0bd542ea348db50c7e21834d934" + integrity sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg== + +type-fest@^0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.6.0.tgz#8d2a2370d3df886eb5c90ada1c5bf6188acf838b" + integrity sha512-q+MB8nYR1KDLrgr4G5yemftpMC7/QLqVndBmEEdqzmNj5dcFOO4Oo8qlwZE3ULT3+Zim1F8Kq4cBnikNhlCMlg== + +type-fest@^0.8.1: + version "0.8.1" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d" + integrity sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA== + typed-array-length@^1.0.4: version "1.0.4" resolved "https://registry.yarnpkg.com/typed-array-length/-/typed-array-length-1.0.4.tgz#89d83785e5c4098bec72e08b319651f0eac9c1bb" @@ -1499,6 +2554,11 @@ unbox-primitive@^1.0.2: has-symbols "^1.0.3" which-boxed-primitive "^1.0.2" +universalify@^0.1.0: + version "0.1.2" + resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66" + integrity sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg== + validate-npm-package-license@^3.0.1: version "3.0.4" resolved "https://registry.yarnpkg.com/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz#fc91f6b9c7ba15c857f4cb2c5defeec39d4f410a" @@ -1563,6 +2623,13 @@ vitest@^0.28.5: vite-node "0.28.5" why-is-node-running "^2.2.2" +wcwidth@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/wcwidth/-/wcwidth-1.0.1.tgz#f0b0dcf915bc5ff1528afadb2c0e17b532da2fe8" + integrity sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg== + dependencies: + defaults "^1.0.3" + which-boxed-primitive@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6" @@ -1574,6 +2641,19 @@ which-boxed-primitive@^1.0.2: is-string "^1.0.5" is-symbol "^1.0.3" +which-module@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/which-module/-/which-module-2.0.1.tgz#776b1fe35d90aebe99e8ac15eb24093389a4a409" + integrity sha512-iBdZ57RDvnOR9AGBhML2vFZf7h8vmBjhoaZqODJBFWHVtKkDmKuHai3cx5PgVMrX5YDNp27AofYbAwctSS+vhQ== + +which-pm@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/which-pm/-/which-pm-2.0.0.tgz#8245609ecfe64bf751d0eef2f376d83bf1ddb7ae" + integrity sha512-Lhs9Pmyph0p5n5Z3mVnN0yWcbQYUAD7rbQUiMsQxOJ3T57k7RFe35SUwWMf7dsbDZks1uOmw4AecB/JMDj3v/w== + dependencies: + load-yaml-file "^0.2.0" + path-exists "^4.0.0" + which-typed-array@^1.1.9: version "1.1.9" resolved "https://registry.yarnpkg.com/which-typed-array/-/which-typed-array-1.1.9.tgz#307cf898025848cf995e795e8423c7f337efbde6" @@ -1601,6 +2681,87 @@ why-is-node-running@^2.2.2: siginfo "^2.0.0" stackback "0.0.2" +wrap-ansi@^6.2.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53" + integrity sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrap-ansi@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +y18n@^4.0.0: + version "4.0.3" + resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.3.tgz#b5f259c82cd6e336921efd7bfd8bf560de9eeedf" + integrity sha512-JKhqTOwSrqNA1NY5lSztJ1GrBiUodLMmIZuLiDaMRJ+itFd+ABVE8XBjOvIWL+rSqNDC74LCSFmlb/U4UZ4hJQ== + +y18n@^5.0.5: + version "5.0.8" + resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55" + integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA== + +yallist@^2.1.2: + version "2.1.2" + resolved "https://registry.yarnpkg.com/yallist/-/yallist-2.1.2.tgz#1c11f9218f076089a47dd512f93c6699a6a81d52" + integrity sha512-ncTzHV7NvsQZkYe1DW7cbDLm0YpzHmZF5r/iyP3ZnQtMiJ+pjzisCiMNI+Sj+xQF5pXhSHxSB3uDbsBTzY/c2A== + +yargs-parser@^18.1.2, yargs-parser@^18.1.3: + version "18.1.3" + resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0" + integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ== + dependencies: + camelcase "^5.0.0" + decamelize "^1.2.0" + +yargs-parser@^21.1.1: + version "21.1.1" + resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35" + integrity sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw== + +yargs@^15.1.0: + version "15.4.1" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-15.4.1.tgz#0d87a16de01aee9d8bec2bfbf74f67851730f4f8" + integrity sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A== + dependencies: + cliui "^6.0.0" + decamelize "^1.2.0" + find-up "^4.1.0" + get-caller-file "^2.0.1" + require-directory "^2.1.1" + require-main-filename "^2.0.0" + set-blocking "^2.0.0" + string-width "^4.2.0" + which-module "^2.0.0" + y18n "^4.0.0" + yargs-parser "^18.1.2" + +yargs@^17.7.1: + version "17.7.2" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.2.tgz#991df39aca675a192b816e1e0363f9d75d2aa269" + integrity sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w== + dependencies: + cliui "^8.0.1" + escalade "^3.1.1" + get-caller-file "^2.0.5" + require-directory "^2.1.1" + string-width "^4.2.3" + y18n "^5.0.5" + yargs-parser "^21.1.1" + +yocto-queue@^0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b" + integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q== + yocto-queue@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-1.0.0.tgz#7f816433fb2cbc511ec8bf7d263c3b58a1a3c251" From 67d21c3c99d5c1787fd35d70f362bee99e0a539d Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 12 May 2023 22:01:08 +0200 Subject: [PATCH 108/207] Move WASM bindings to wasm folder --- Cargo.toml | 2 +- {js => wasm}/.changeset/config.json | 0 {js => wasm}/.gitignore | 0 {js => wasm}/Cargo.toml | 0 {js => wasm}/package.json | 0 {js => wasm}/scripts/inline_ranks.ts | 0 {js => wasm}/scripts/post_process.ts | 0 {js => wasm}/scripts/tsconfig.json | 3 ++- {js => wasm}/src/init.ts | 0 {js => wasm}/src/lib.rs | 0 {js => wasm}/src/load.ts | 0 {js => wasm}/test/init_error.test.ts | 0 {js => wasm}/test/test_simple_public.test.ts | 0 {js => wasm}/tsconfig.json | 0 {js => wasm}/yarn.lock | 25 ++++++++++++++++++++ 15 files changed, 28 insertions(+), 2 deletions(-) rename {js => wasm}/.changeset/config.json (100%) rename {js => wasm}/.gitignore (100%) rename {js => wasm}/Cargo.toml (100%) rename {js => wasm}/package.json (100%) rename {js => wasm}/scripts/inline_ranks.ts (100%) rename {js => wasm}/scripts/post_process.ts (100%) rename {js => wasm}/scripts/tsconfig.json (72%) rename {js => wasm}/src/init.ts (100%) rename {js => wasm}/src/lib.rs (100%) rename {js => wasm}/src/load.ts (100%) rename {js => wasm}/test/init_error.test.ts (100%) rename {js => wasm}/test/test_simple_public.test.ts (100%) rename {js => wasm}/tsconfig.json (100%) rename {js => wasm}/yarn.lock (99%) diff --git a/Cargo.toml b/Cargo.toml index 937b8684..7abfb903 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = [ "core", "python", - "js" + "wasm" ] [profile.release] diff --git a/js/.changeset/config.json b/wasm/.changeset/config.json similarity index 100% rename from js/.changeset/config.json rename to wasm/.changeset/config.json diff --git a/js/.gitignore b/wasm/.gitignore similarity index 100% rename from js/.gitignore rename to wasm/.gitignore diff --git a/js/Cargo.toml b/wasm/Cargo.toml similarity index 100% rename from js/Cargo.toml rename to wasm/Cargo.toml diff --git a/js/package.json b/wasm/package.json similarity index 100% rename from js/package.json rename to wasm/package.json diff --git a/js/scripts/inline_ranks.ts b/wasm/scripts/inline_ranks.ts similarity index 100% rename from js/scripts/inline_ranks.ts rename to wasm/scripts/inline_ranks.ts diff --git a/js/scripts/post_process.ts b/wasm/scripts/post_process.ts similarity index 100% rename from js/scripts/post_process.ts rename to wasm/scripts/post_process.ts diff --git a/js/scripts/tsconfig.json b/wasm/scripts/tsconfig.json similarity index 72% rename from js/scripts/tsconfig.json rename to wasm/scripts/tsconfig.json index 55681af1..033d913a 100644 --- a/js/scripts/tsconfig.json +++ b/wasm/scripts/tsconfig.json @@ -4,7 +4,8 @@ "moduleResolution": "node", "strict": true, "declaration": true, - "allowSyntheticDefaultImports": true + "allowSyntheticDefaultImports": true, + "resolveJsonModule": true }, "include": ["./**/*.ts"], "exclude": ["node_modules"] diff --git a/js/src/init.ts b/wasm/src/init.ts similarity index 100% rename from js/src/init.ts rename to wasm/src/init.ts diff --git a/js/src/lib.rs b/wasm/src/lib.rs similarity index 100% rename from js/src/lib.rs rename to wasm/src/lib.rs diff --git a/js/src/load.ts b/wasm/src/load.ts similarity index 100% rename from js/src/load.ts rename to wasm/src/load.ts diff --git a/js/test/init_error.test.ts b/wasm/test/init_error.test.ts similarity index 100% rename from js/test/init_error.test.ts rename to wasm/test/init_error.test.ts diff --git a/js/test/test_simple_public.test.ts b/wasm/test/test_simple_public.test.ts similarity index 100% rename from js/test/test_simple_public.test.ts rename to wasm/test/test_simple_public.test.ts diff --git a/js/tsconfig.json b/wasm/tsconfig.json similarity index 100% rename from js/tsconfig.json rename to wasm/tsconfig.json diff --git a/js/yarn.lock b/wasm/yarn.lock similarity index 99% rename from js/yarn.lock rename to wasm/yarn.lock index b207017e..7a41d041 100644 --- a/js/yarn.lock +++ b/wasm/yarn.lock @@ -23,6 +23,14 @@ chalk "^2.0.0" js-tokens "^4.0.0" +"@babel/runtime-corejs3@^7.16.5": + version "7.21.5" + resolved "https://registry.yarnpkg.com/@babel/runtime-corejs3/-/runtime-corejs3-7.21.5.tgz#a6d4e132ab1cb2fae2354f02284ebb6e07b4f7d8" + integrity sha512-FRqFlFKNazWYykft5zvzuEl1YyTDGsIRrjV9rvxvYkUC7W/ueBng1X68Xd6uRMzAaJ0xMKn08/wem5YS1lpX8w== + dependencies: + core-js-pure "^3.25.1" + regenerator-runtime "^0.13.11" + "@babel/runtime@^7.20.1", "@babel/runtime@^7.5.5": version "7.21.5" resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.21.5.tgz#8492dddda9644ae3bda3b45eabe87382caee7200" @@ -703,6 +711,11 @@ balanced-match@^1.0.0: resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== +base64-js@^1.5.1: + version "1.5.1" + resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" + integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== + better-path-resolve@1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/better-path-resolve/-/better-path-resolve-1.0.0.tgz#13a35a1104cdd48a7b74bf8758f96a1ee613f99d" @@ -881,6 +894,11 @@ concat-map@0.0.1: resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg== +core-js-pure@^3.25.1: + version "3.30.2" + resolved "https://registry.yarnpkg.com/core-js-pure/-/core-js-pure-3.30.2.tgz#005a82551f4af3250dcfb46ed360fad32ced114e" + integrity sha512-p/npFUJXXBkCCTIlEGBdghofn00jWG6ZOtdoIXSJmAu2QBvN0IqpZXWweOytcwE6cfx8ZvVUy1vw8zxhe4Y2vg== + cross-spawn@^5.1.0: version "5.1.0" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-5.1.0.tgz#e8bd0efee58fcff6f8f94510a0a554bbfa235449" @@ -2699,6 +2717,13 @@ wrap-ansi@^7.0.0: string-width "^4.1.0" strip-ansi "^6.0.0" +xregexp@^5.1.1: + version "5.1.1" + resolved "https://registry.yarnpkg.com/xregexp/-/xregexp-5.1.1.tgz#6d3fe18819e3143aaf52f9284d34f49a59583ebb" + integrity sha512-fKXeVorD+CzWvFs7VBuKTYIW63YD1e1osxwQ8caZ6o1jg6pDAbABDG54LCIq0j5cy7PjRvGIq6sef9DYPXpncg== + dependencies: + "@babel/runtime-corejs3" "^7.16.5" + y18n@^4.0.0: version "4.0.3" resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.3.tgz#b5f259c82cd6e336921efd7bfd8bf560de9eeedf" From 150cf05ac36406d0ba35d9d38eab09a24ed94569 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 12 May 2023 22:15:58 +0200 Subject: [PATCH 109/207] Add JS port --- {wasm/.changeset => .changeset}/config.json | 0 js/.gitignore | 2 + js/package.json | 15 + js/scripts/regex.ts | 34 + js/src/index.ts | 299 +++ js/test/compatibility.test.ts | 36 + js/tsconfig.json | 15 + package.json | 15 + wasm/package.json | 8 +- wasm/tsconfig.json | 3 +- yarn.lock | 2647 +++++++++++++++++++ 11 files changed, 3066 insertions(+), 8 deletions(-) rename {wasm/.changeset => .changeset}/config.json (100%) create mode 100644 js/.gitignore create mode 100644 js/package.json create mode 100644 js/scripts/regex.ts create mode 100644 js/src/index.ts create mode 100644 js/test/compatibility.test.ts create mode 100644 js/tsconfig.json create mode 100644 package.json create mode 100644 yarn.lock diff --git a/wasm/.changeset/config.json b/.changeset/config.json similarity index 100% rename from wasm/.changeset/config.json rename to .changeset/config.json diff --git a/js/.gitignore b/js/.gitignore new file mode 100644 index 00000000..755d2dce --- /dev/null +++ b/js/.gitignore @@ -0,0 +1,2 @@ +ranks/ +node_modules \ No newline at end of file diff --git a/js/package.json b/js/package.json new file mode 100644 index 00000000..2bc6de5c --- /dev/null +++ b/js/package.json @@ -0,0 +1,15 @@ +{ + "name": "tiktoken", + "version": "1.0.0", + "description": "Javascript port of tiktoken", + "license": "MIT", + "scripts": {}, + "repository": { + "type": "git", + "url": "https://github.com/dqbd/tiktoken" + }, + "dependencies": { + "base64-js": "^1.5.1" + }, + "devDependencies": {} +} diff --git a/js/scripts/regex.ts b/js/scripts/regex.ts new file mode 100644 index 00000000..ca2db8a3 --- /dev/null +++ b/js/scripts/regex.ts @@ -0,0 +1,34 @@ +// generate combinations + +const strings = "'s|'t|'re|'ve|'m|'ll|'d"; + +const testRegex = + "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"; + +function recombine(value: string, acc: string[] = [""]): string[] { + if (value.length === 0) return acc; + if (value[0].match(/[a-zA-Z]/)) { + return recombine( + value.substring(1), + acc.flatMap((i) => [ + `${i}${value[0].toLocaleLowerCase()}`, + `${i}${value[0].toLocaleUpperCase()}`, + ]) + ); + } + + return recombine( + value.substring(1), + acc.map((i) => `${i}${value[0]}`) + ); +} + +let match = testRegex.replace(/\(\?i:(.*?)\)/, (_, match: string) => { + const insensitive = match + .split("|") + .flatMap((a) => recombine(a)) + .join("|"); + return `(${insensitive})`; +}); + +console.log(match); diff --git a/js/src/index.ts b/js/src/index.ts new file mode 100644 index 00000000..843cfe7b --- /dev/null +++ b/js/src/index.ts @@ -0,0 +1,299 @@ +import gpt2 from "../../wasm/dist/encoders/gpt2.json"; +import p50k_base from "../../wasm/dist/encoders/p50k_base.json"; +import p50k_edit from "../../wasm/dist/encoders/p50k_edit.json"; +import r50k_base from "../../wasm/dist/encoders/r50k_base.json"; +import cl100k_base from "../../wasm/dist/encoders/cl100k_base.json"; + +import base64 from "base64-js"; + +function never(message: string, _: never) { + throw new Error(message); +} + +function bytePairMerge( + piece: Uint8Array, + ranks: Map +): Array<{ start: number; end: number }> { + let parts: Array<{ start: number; end: number }> = Array.from( + { length: piece.length }, + (_, i) => ({ start: i, end: i + 1 }) + ); + + while (parts.length > 1) { + let minRank: [number, number] | null = null; + + for (let i = 0; i < parts.length - 1; i++) { + const slice = piece.slice(parts[i].start, parts[i + 1].end); + const rank = ranks.get(slice.join(",")); + if (rank == null) continue; + + if (minRank == null || rank < minRank[0]) { + minRank = [rank, i]; + } + } + + if (minRank != null) { + const i = minRank[1]; + parts[i] = { start: parts[i].start, end: parts[i + 1].end }; + parts.splice(i + 1, 1); + } else { + break; + } + } + return parts; +} + +function bytePairEncode(piece: Uint8Array, ranks: Map) { + if (piece.length === 1) return [ranks.get(piece.join(","))!]; + + return bytePairMerge(piece, ranks) + .map((p) => ranks.get(piece.slice(p.start, p.end).join(","))) + .filter((x): x is number => x != null); +} + +function escapeRegex(str: string) { + return str.replace(/[\\^$*+?.()|[\]{}]/g, "\\$&"); +} + +export class Tiktoken { + protected specialTokens: Record; + protected inverseSpecialTokens: Record; + + protected patStr: string; + + protected textEncoder = new TextEncoder(); + protected textDecoder = new TextDecoder("utf-8"); + + protected rankMap = new Map(); + protected textMap = new Map(); + + constructor( + ranks: { + pat_str: string; + special_tokens: Record; + bpe_ranks: string; + } = cl100k_base, + extendedSpecialTokens?: Record + ) { + this.patStr = ranks.pat_str; + + const uncompressed = ranks.bpe_ranks + .split("\n") + .filter(Boolean) + .reduce>((memo, x) => { + const [_, offsetStr, ...tokens] = x.split(" "); + const offset = Number.parseInt(offsetStr, 10); + tokens.forEach((token, i) => (memo[token] = offset + i)); + return memo; + }, {}); + + for (const [token, rank] of Object.entries(uncompressed)) { + const bytes = base64.toByteArray(token); + this.rankMap.set(bytes.join(","), rank); + this.textMap.set(rank, bytes); + } + + this.specialTokens = { ...ranks.special_tokens, ...extendedSpecialTokens }; + this.inverseSpecialTokens = Object.entries(this.specialTokens).reduce< + Record + >((memo, [text, rank]) => { + memo[rank] = this.textEncoder.encode(text); + return memo; + }, {}); + } + + encode(text: string, allowedSpecial: Set | "all" = new Set()) { + const regexes = new RegExp(this.patStr, "ug"); + const specialRegex = new RegExp( + Object.keys(this.specialTokens) + .map((i) => escapeRegex(i)) + .join("|"), + "g" + ); + + const ret: number[] = []; + + const allowedSpecialSet = + allowedSpecial === "all" + ? new Set(Object.keys(this.specialTokens)) + : allowedSpecial; + + let start = 0; + while (true) { + let nextSpecial: RegExpMatchArray | null = null; + let startFind = start; + + while (true) { + specialRegex.lastIndex = startFind; + nextSpecial = specialRegex.exec(text); + if (nextSpecial == null || allowedSpecialSet.has(nextSpecial[0])) break; + startFind = nextSpecial.index! + 1; + } + + const end = nextSpecial?.index ?? text.length; + for (const match of text.substring(start, end).matchAll(regexes)) { + const piece = this.textEncoder.encode(match[0]); + const token = this.rankMap.get(piece.join(",")); + + if (token != null) { + ret.push(token); + continue; + } + + ret.push(...bytePairEncode(piece, this.rankMap)); + } + + if (nextSpecial == null) break; + let token = this.specialTokens[nextSpecial[0]]; + ret.push(token); + + start = nextSpecial.index! + nextSpecial[0].length; + } + + return ret; + } + + decode(tokens: number[]) { + const res: Uint8Array[] = []; + let length = 0; + for (let i = 0; i < tokens.length; ++i) { + const token = tokens[i]; + const bytes = this.textMap.get(token) ?? this.inverseSpecialTokens[token]; + + if (bytes != null) { + res.push(bytes); + length += bytes.length; + } + } + + const mergedArray = new Uint8Array(length); + let i = 0; + for (const bytes of res) { + mergedArray.set(bytes, i); + i += bytes.length; + } + + return this.textDecoder.decode(mergedArray); + } +} + +export type TiktokenEncoding = + | "gpt2" + | "r50k_base" + | "p50k_base" + | "p50k_edit" + | "cl100k_base"; + +export function getEncoding( + encoding: TiktokenEncoding, + extendSpecialTokens?: Record +) { + switch (encoding) { + case "gpt2": + return new Tiktoken(gpt2, extendSpecialTokens); + case "r50k_base": + return new Tiktoken(r50k_base, extendSpecialTokens); + case "p50k_base": + return new Tiktoken(p50k_base, extendSpecialTokens); + case "p50k_edit": + return new Tiktoken(p50k_edit, extendSpecialTokens); + case "cl100k_base": + return new Tiktoken(cl100k_base, extendSpecialTokens); + default: + never("Unknown encoding", encoding); + } +} + +export type TiktokenModel = + | "text-davinci-003" + | "text-davinci-002" + | "text-davinci-001" + | "text-curie-001" + | "text-babbage-001" + | "text-ada-001" + | "davinci" + | "curie" + | "babbage" + | "ada" + | "code-davinci-002" + | "code-davinci-001" + | "code-cushman-002" + | "code-cushman-001" + | "davinci-codex" + | "cushman-codex" + | "text-davinci-edit-001" + | "code-davinci-edit-001" + | "text-embedding-ada-002" + | "text-similarity-davinci-001" + | "text-similarity-curie-001" + | "text-similarity-babbage-001" + | "text-similarity-ada-001" + | "text-search-davinci-doc-001" + | "text-search-curie-doc-001" + | "text-search-babbage-doc-001" + | "text-search-ada-doc-001" + | "code-search-babbage-code-001" + | "code-search-ada-code-001" + | "gpt2" + | "gpt-4" + | "gpt-4-0314" + | "gpt-4-32k" + | "gpt-4-32k-0314" + | "gpt-3.5-turbo" + | "gpt-3.5-turbo-0301"; + +export function encodingForModel( + model: TiktokenModel, + extendSpecialTokens?: Record +) { + switch (model) { + case "gpt2": { + return getEncoding("gpt2", extendSpecialTokens); + } + case "code-cushman-001": + case "code-cushman-002": + case "code-davinci-001": + case "code-davinci-002": + case "cushman-codex": + case "davinci-codex": + case "text-davinci-002": + case "text-davinci-003": { + return getEncoding("p50k_base", extendSpecialTokens); + } + case "code-davinci-edit-001": + case "text-davinci-edit-001": { + return getEncoding("p50k_edit", extendSpecialTokens); + } + case "ada": + case "babbage": + case "code-search-ada-code-001": + case "code-search-babbage-code-001": + case "curie": + case "davinci": + case "text-ada-001": + case "text-babbage-001": + case "text-curie-001": + case "text-davinci-001": + case "text-search-ada-doc-001": + case "text-search-babbage-doc-001": + case "text-search-curie-doc-001": + case "text-search-davinci-doc-001": + case "text-similarity-ada-001": + case "text-similarity-babbage-001": + case "text-similarity-curie-001": + case "text-similarity-davinci-001": { + return getEncoding("r50k_base", extendSpecialTokens); + } + case "gpt-3.5-turbo-0301": + case "gpt-3.5-turbo": + case "gpt-4-0314": + case "gpt-4-32k-0314": + case "gpt-4-32k": + case "gpt-4": + case "text-embedding-ada-002": { + return getEncoding("cl100k_base", extendSpecialTokens); + } + default: + never("Unknown model", model); + } +} diff --git a/js/test/compatibility.test.ts b/js/test/compatibility.test.ts new file mode 100644 index 00000000..5c942e7d --- /dev/null +++ b/js/test/compatibility.test.ts @@ -0,0 +1,36 @@ +import { test, expect, describe, afterAll } from "vitest"; +import { get_encoding } from "../../wasm/dist"; +import { Tiktoken } from "../src/index"; + +describe("LiteTokenizer matches the behavior of @dqbd/tiktoken", () => { + const lite = new Tiktoken(); + const full = get_encoding("cl100k_base"); + + afterAll(() => full.free()); + + test("Simple test", () => { + const text = "hello world"; + expect([...lite.encode(text)]).toEqual([...full.encode(text)]); + }); + + test("Magic tokens", () => { + const text = "<|fim_prefix|>test<|fim_suffix|>"; + expect([...lite.encode(text, "all")]).toEqual([ + ...full.encode(text, "all"), + ]); + }); + + test("Emojis and non-latin characters", () => { + const fixtures = [ + "Hello world", + "New lines\n\n\n\n\n Spaces", + "👩‍👦‍👦 👩‍👧‍👦 👩‍👧‍👧 👩‍👩‍👦 👩‍👩‍👧 🇨🇿 Emojis: 🧑🏾‍💻️🧑🏿‍🎓️🧑🏿‍🏭️🧑🏿‍💻️", + "是美國一個人工智能研究實驗室 由非營利組織OpenAI Inc", + "<|im_start|>test<|im_end|>", + ]; + + for (const text of fixtures) { + expect([...lite.encode(text)]).toEqual([...full.encode(text)]); + } + }); +}); diff --git a/js/tsconfig.json b/js/tsconfig.json new file mode 100644 index 00000000..27d2313e --- /dev/null +++ b/js/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2022", + "lib": ["ESNext", "DOM"], + "module": "ES2020", + "moduleResolution": "node", + "strict": true, + "declaration": true, + "outDir": "./dist", + "allowSyntheticDefaultImports": true, + "resolveJsonModule": true + }, + "include": ["./src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/package.json b/package.json new file mode 100644 index 00000000..2faabbc6 --- /dev/null +++ b/package.json @@ -0,0 +1,15 @@ +{ + "private": true, + "workspaces": [ + "js", + "wasm" + ], + "devDependencies": { + "@types/node": "^18.14.4", + "@changesets/cli": "^2.26.1", + "npm-run-all": "^4.1.5", + "vitest": "^0.28.5", + "typescript": "^4.9.5", + "tsx": "^3.12.3" + } +} diff --git a/wasm/package.json b/wasm/package.json index bae9787e..5164a79b 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -19,13 +19,7 @@ }, "dependencies": {}, "devDependencies": { - "@changesets/cli": "^2.26.1", - "@types/node": "^18.14.4", - "npm-run-all": "^4.1.5", "outdent": "^0.8.0", - "ts-morph": "^17.0.1", - "tsx": "^3.12.3", - "typescript": "^4.9.5", - "vitest": "^0.28.5" + "ts-morph": "^17.0.1" } } diff --git a/wasm/tsconfig.json b/wasm/tsconfig.json index cef970e8..27d2313e 100644 --- a/wasm/tsconfig.json +++ b/wasm/tsconfig.json @@ -7,7 +7,8 @@ "strict": true, "declaration": true, "outDir": "./dist", - "allowSyntheticDefaultImports": true + "allowSyntheticDefaultImports": true, + "resolveJsonModule": true }, "include": ["./src/**/*.ts"], "exclude": ["node_modules", "dist"] diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 00000000..8ac754a9 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,2647 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +"@babel/code-frame@^7.0.0": + version "7.21.4" + resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.21.4.tgz#d0fa9e4413aca81f2b23b9442797bda1826edb39" + integrity sha512-LYvhNKfwWSPpocw8GI7gpK2nq3HSDuEPC/uSYaALSJu9xjsalaaYFOq0Pwt5KmVqwEbZlDu81aLXwBOmD/Fv9g== + dependencies: + "@babel/highlight" "^7.18.6" + +"@babel/helper-validator-identifier@^7.18.6": + version "7.19.1" + resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.19.1.tgz#7eea834cf32901ffdc1a7ee555e2f9c27e249ca2" + integrity sha512-awrNfaMtnHUr653GgGEs++LlAvW6w+DcPrOliSMXWCKo597CwL5Acf/wWdNkf/tfEQE3mjkeD1YOVZOUV/od1w== + +"@babel/highlight@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.18.6.tgz#81158601e93e2563795adcbfbdf5d64be3f2ecdf" + integrity sha512-u7stbOuYjaPezCuLj29hNW1v64M2Md2qupEKP1fHc7WdOA3DgLh37suiSrZYY7haUB7iBeQZ9P1uiRF359do3g== + dependencies: + "@babel/helper-validator-identifier" "^7.18.6" + chalk "^2.0.0" + js-tokens "^4.0.0" + +"@babel/runtime@^7.20.1", "@babel/runtime@^7.5.5": + version "7.21.5" + resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.21.5.tgz#8492dddda9644ae3bda3b45eabe87382caee7200" + integrity sha512-8jI69toZqqcsnqGGqwGS4Qb1VwLOEp4hz+CXPywcvjs60u3B4Pom/U/7rm4W8tMOYEB+E9wgD0mW1l3r8qlI9Q== + dependencies: + regenerator-runtime "^0.13.11" + +"@changesets/apply-release-plan@^6.1.3": + version "6.1.3" + resolved "https://registry.yarnpkg.com/@changesets/apply-release-plan/-/apply-release-plan-6.1.3.tgz#3bcc0bd57ba00d50d20df7d0141f1a9b2134eaf7" + integrity sha512-ECDNeoc3nfeAe1jqJb5aFQX7CqzQhD2klXRez2JDb/aVpGUbX673HgKrnrgJRuQR/9f2TtLoYIzrGB9qwD77mg== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/config" "^2.3.0" + "@changesets/get-version-range-type" "^0.3.2" + "@changesets/git" "^2.0.0" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + detect-indent "^6.0.0" + fs-extra "^7.0.1" + lodash.startcase "^4.4.0" + outdent "^0.5.0" + prettier "^2.7.1" + resolve-from "^5.0.0" + semver "^5.4.1" + +"@changesets/assemble-release-plan@^5.2.3": + version "5.2.3" + resolved "https://registry.yarnpkg.com/@changesets/assemble-release-plan/-/assemble-release-plan-5.2.3.tgz#5ce6191c6e193d40b566a7b0e01690cfb106f4db" + integrity sha512-g7EVZCmnWz3zMBAdrcKhid4hkHT+Ft1n0mLussFMcB1dE2zCuwcvGoy9ec3yOgPGF4hoMtgHaMIk3T3TBdvU9g== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/errors" "^0.1.4" + "@changesets/get-dependents-graph" "^1.3.5" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + semver "^5.4.1" + +"@changesets/changelog-git@^0.1.14": + version "0.1.14" + resolved "https://registry.yarnpkg.com/@changesets/changelog-git/-/changelog-git-0.1.14.tgz#852caa7727dcf91497c131d05bc2cd6248532ada" + integrity sha512-+vRfnKtXVWsDDxGctOfzJsPhaCdXRYoe+KyWYoq5X/GqoISREiat0l3L8B0a453B2B4dfHGcZaGyowHbp9BSaA== + dependencies: + "@changesets/types" "^5.2.1" + +"@changesets/cli@^2.26.1": + version "2.26.1" + resolved "https://registry.yarnpkg.com/@changesets/cli/-/cli-2.26.1.tgz#2d10858d7d32314a524e383111c96d831eb0402f" + integrity sha512-XnTa+b51vt057fyAudvDKGB0Sh72xutQZNAdXkCqPBKO2zvs2yYZx5hFZj1u9cbtpwM6Sxtcr02/FQJfZOzemQ== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/apply-release-plan" "^6.1.3" + "@changesets/assemble-release-plan" "^5.2.3" + "@changesets/changelog-git" "^0.1.14" + "@changesets/config" "^2.3.0" + "@changesets/errors" "^0.1.4" + "@changesets/get-dependents-graph" "^1.3.5" + "@changesets/get-release-plan" "^3.0.16" + "@changesets/git" "^2.0.0" + "@changesets/logger" "^0.0.5" + "@changesets/pre" "^1.0.14" + "@changesets/read" "^0.5.9" + "@changesets/types" "^5.2.1" + "@changesets/write" "^0.2.3" + "@manypkg/get-packages" "^1.1.3" + "@types/is-ci" "^3.0.0" + "@types/semver" "^6.0.0" + ansi-colors "^4.1.3" + chalk "^2.1.0" + enquirer "^2.3.0" + external-editor "^3.1.0" + fs-extra "^7.0.1" + human-id "^1.0.2" + is-ci "^3.0.1" + meow "^6.0.0" + outdent "^0.5.0" + p-limit "^2.2.0" + preferred-pm "^3.0.0" + resolve-from "^5.0.0" + semver "^5.4.1" + spawndamnit "^2.0.0" + term-size "^2.1.0" + tty-table "^4.1.5" + +"@changesets/config@^2.3.0": + version "2.3.0" + resolved "https://registry.yarnpkg.com/@changesets/config/-/config-2.3.0.tgz#bff074d6492fa772cee139f9a04efa4cd56445bb" + integrity sha512-EgP/px6mhCx8QeaMAvWtRrgyxW08k/Bx2tpGT+M84jEdX37v3VKfh4Cz1BkwrYKuMV2HZKeHOh8sHvja/HcXfQ== + dependencies: + "@changesets/errors" "^0.1.4" + "@changesets/get-dependents-graph" "^1.3.5" + "@changesets/logger" "^0.0.5" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + fs-extra "^7.0.1" + micromatch "^4.0.2" + +"@changesets/errors@^0.1.4": + version "0.1.4" + resolved "https://registry.yarnpkg.com/@changesets/errors/-/errors-0.1.4.tgz#f79851746c43679a66b383fdff4c012f480f480d" + integrity sha512-HAcqPF7snsUJ/QzkWoKfRfXushHTu+K5KZLJWPb34s4eCZShIf8BFO3fwq6KU8+G7L5KdtN2BzQAXOSXEyiY9Q== + dependencies: + extendable-error "^0.1.5" + +"@changesets/get-dependents-graph@^1.3.5": + version "1.3.5" + resolved "https://registry.yarnpkg.com/@changesets/get-dependents-graph/-/get-dependents-graph-1.3.5.tgz#f94c6672d2f9a87aa35512eea74550585ba41c21" + integrity sha512-w1eEvnWlbVDIY8mWXqWuYE9oKhvIaBhzqzo4ITSJY9hgoqQ3RoBqwlcAzg11qHxv/b8ReDWnMrpjpKrW6m1ZTA== + dependencies: + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + chalk "^2.1.0" + fs-extra "^7.0.1" + semver "^5.4.1" + +"@changesets/get-release-plan@^3.0.16": + version "3.0.16" + resolved "https://registry.yarnpkg.com/@changesets/get-release-plan/-/get-release-plan-3.0.16.tgz#5d9cfc4ffda02c496ef0fde407210de8e3a0fb19" + integrity sha512-OpP9QILpBp1bY2YNIKFzwigKh7Qe9KizRsZomzLe6pK8IUo8onkAAVUD8+JRKSr8R7d4+JRuQrfSSNlEwKyPYg== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/assemble-release-plan" "^5.2.3" + "@changesets/config" "^2.3.0" + "@changesets/pre" "^1.0.14" + "@changesets/read" "^0.5.9" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + +"@changesets/get-version-range-type@^0.3.2": + version "0.3.2" + resolved "https://registry.yarnpkg.com/@changesets/get-version-range-type/-/get-version-range-type-0.3.2.tgz#8131a99035edd11aa7a44c341cbb05e668618c67" + integrity sha512-SVqwYs5pULYjYT4op21F2pVbcrca4qA/bAA3FmFXKMN7Y+HcO8sbZUTx3TAy2VXulP2FACd1aC7f2nTuqSPbqg== + +"@changesets/git@^2.0.0": + version "2.0.0" + resolved "https://registry.yarnpkg.com/@changesets/git/-/git-2.0.0.tgz#8de57649baf13a86eb669a25fa51bcad5cea517f" + integrity sha512-enUVEWbiqUTxqSnmesyJGWfzd51PY4H7mH9yUw0hPVpZBJ6tQZFMU3F3mT/t9OJ/GjyiM4770i+sehAn6ymx6A== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/errors" "^0.1.4" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + is-subdir "^1.1.1" + micromatch "^4.0.2" + spawndamnit "^2.0.0" + +"@changesets/logger@^0.0.5": + version "0.0.5" + resolved "https://registry.yarnpkg.com/@changesets/logger/-/logger-0.0.5.tgz#68305dd5a643e336be16a2369cb17cdd8ed37d4c" + integrity sha512-gJyZHomu8nASHpaANzc6bkQMO9gU/ib20lqew1rVx753FOxffnCrJlGIeQVxNWCqM+o6OOleCo/ivL8UAO5iFw== + dependencies: + chalk "^2.1.0" + +"@changesets/parse@^0.3.16": + version "0.3.16" + resolved "https://registry.yarnpkg.com/@changesets/parse/-/parse-0.3.16.tgz#f8337b70aeb476dc81745ab3294022909bc4a84a" + integrity sha512-127JKNd167ayAuBjUggZBkmDS5fIKsthnr9jr6bdnuUljroiERW7FBTDNnNVyJ4l69PzR57pk6mXQdtJyBCJKg== + dependencies: + "@changesets/types" "^5.2.1" + js-yaml "^3.13.1" + +"@changesets/pre@^1.0.14": + version "1.0.14" + resolved "https://registry.yarnpkg.com/@changesets/pre/-/pre-1.0.14.tgz#9df73999a4d15804da7381358d77bb37b00ddf0f" + integrity sha512-dTsHmxQWEQekHYHbg+M1mDVYFvegDh9j/kySNuDKdylwfMEevTeDouR7IfHNyVodxZXu17sXoJuf2D0vi55FHQ== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/errors" "^0.1.4" + "@changesets/types" "^5.2.1" + "@manypkg/get-packages" "^1.1.3" + fs-extra "^7.0.1" + +"@changesets/read@^0.5.9": + version "0.5.9" + resolved "https://registry.yarnpkg.com/@changesets/read/-/read-0.5.9.tgz#a1b63a82b8e9409738d7a0f9cc39b6d7c28cbab0" + integrity sha512-T8BJ6JS6j1gfO1HFq50kU3qawYxa4NTbI/ASNVVCBTsKquy2HYwM9r7ZnzkiMe8IEObAJtUVGSrePCOxAK2haQ== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/git" "^2.0.0" + "@changesets/logger" "^0.0.5" + "@changesets/parse" "^0.3.16" + "@changesets/types" "^5.2.1" + chalk "^2.1.0" + fs-extra "^7.0.1" + p-filter "^2.1.0" + +"@changesets/types@^4.0.1": + version "4.1.0" + resolved "https://registry.yarnpkg.com/@changesets/types/-/types-4.1.0.tgz#fb8f7ca2324fd54954824e864f9a61a82cb78fe0" + integrity sha512-LDQvVDv5Kb50ny2s25Fhm3d9QSZimsoUGBsUioj6MC3qbMUCuC8GPIvk/M6IvXx3lYhAs0lwWUQLb+VIEUCECw== + +"@changesets/types@^5.2.1": + version "5.2.1" + resolved "https://registry.yarnpkg.com/@changesets/types/-/types-5.2.1.tgz#a228c48004aa8a93bce4be2d1d31527ef3bf21f6" + integrity sha512-myLfHbVOqaq9UtUKqR/nZA/OY7xFjQMdfgfqeZIBK4d0hA6pgxArvdv8M+6NUzzBsjWLOtvApv8YHr4qM+Kpfg== + +"@changesets/write@^0.2.3": + version "0.2.3" + resolved "https://registry.yarnpkg.com/@changesets/write/-/write-0.2.3.tgz#baf6be8ada2a67b9aba608e251bfea4fdc40bc63" + integrity sha512-Dbamr7AIMvslKnNYsLFafaVORx4H0pvCA2MHqgtNCySMe1blImEyAEOzDmcgKAkgz4+uwoLz7demIrX+JBr/Xw== + dependencies: + "@babel/runtime" "^7.20.1" + "@changesets/types" "^5.2.1" + fs-extra "^7.0.1" + human-id "^1.0.2" + prettier "^2.7.1" + +"@esbuild-kit/cjs-loader@^2.4.2": + version "2.4.2" + resolved "https://registry.yarnpkg.com/@esbuild-kit/cjs-loader/-/cjs-loader-2.4.2.tgz#cb4dde00fbf744a68c4f20162ea15a8242d0fa54" + integrity sha512-BDXFbYOJzT/NBEtp71cvsrGPwGAMGRB/349rwKuoxNSiKjPraNNnlK6MIIabViCjqZugu6j+xeMDlEkWdHHJSg== + dependencies: + "@esbuild-kit/core-utils" "^3.0.0" + get-tsconfig "^4.4.0" + +"@esbuild-kit/core-utils@^3.0.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@esbuild-kit/core-utils/-/core-utils-3.1.0.tgz#49945d533dbd5e1b7620aa0fc522c15e6ec089c5" + integrity sha512-Uuk8RpCg/7fdHSceR1M6XbSZFSuMrxcePFuGgyvsBn+u339dk5OeL4jv2EojwTN2st/unJGsVm4qHWjWNmJ/tw== + dependencies: + esbuild "~0.17.6" + source-map-support "^0.5.21" + +"@esbuild-kit/esm-loader@^2.5.5": + version "2.5.5" + resolved "https://registry.yarnpkg.com/@esbuild-kit/esm-loader/-/esm-loader-2.5.5.tgz#b82da14fcee3fc1d219869756c06f43f67d1ca71" + integrity sha512-Qwfvj/qoPbClxCRNuac1Du01r9gvNOT+pMYtJDapfB1eoGN1YlJ1BixLyL9WVENRx5RXgNLdfYdx/CuswlGhMw== + dependencies: + "@esbuild-kit/core-utils" "^3.0.0" + get-tsconfig "^4.4.0" + +"@esbuild/android-arm64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.17.18.tgz#4aa8d8afcffb4458736ca9b32baa97d7cb5861ea" + integrity sha512-/iq0aK0eeHgSC3z55ucMAHO05OIqmQehiGay8eP5l/5l+iEr4EIbh4/MI8xD9qRFjqzgkc0JkX0LculNC9mXBw== + +"@esbuild/android-arm@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.17.18.tgz#74a7e95af4ee212ebc9db9baa87c06a594f2a427" + integrity sha512-EmwL+vUBZJ7mhFCs5lA4ZimpUH3WMAoqvOIYhVQwdIgSpHC8ImHdsRyhHAVxpDYUSm0lWvd63z0XH1IlImS2Qw== + +"@esbuild/android-x64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.17.18.tgz#1dcd13f201997c9fe0b204189d3a0da4eb4eb9b6" + integrity sha512-x+0efYNBF3NPW2Xc5bFOSFW7tTXdAcpfEg2nXmxegm4mJuVeS+i109m/7HMiOQ6M12aVGGFlqJX3RhNdYM2lWg== + +"@esbuild/darwin-arm64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.17.18.tgz#444f3b961d4da7a89eb9bd35cfa4415141537c2a" + integrity sha512-6tY+djEAdF48M1ONWnQb1C+6LiXrKjmqjzPNPWXhu/GzOHTHX2nh8Mo2ZAmBFg0kIodHhciEgUBtcYCAIjGbjQ== + +"@esbuild/darwin-x64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.17.18.tgz#a6da308d0ac8a498c54d62e0b2bfb7119b22d315" + integrity sha512-Qq84ykvLvya3dO49wVC9FFCNUfSrQJLbxhoQk/TE1r6MjHo3sFF2tlJCwMjhkBVq3/ahUisj7+EpRSz0/+8+9A== + +"@esbuild/freebsd-arm64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.17.18.tgz#b83122bb468889399d0d63475d5aea8d6829c2c2" + integrity sha512-fw/ZfxfAzuHfaQeMDhbzxp9mc+mHn1Y94VDHFHjGvt2Uxl10mT4CDavHm+/L9KG441t1QdABqkVYwakMUeyLRA== + +"@esbuild/freebsd-x64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.17.18.tgz#af59e0e03fcf7f221b34d4c5ab14094862c9c864" + integrity sha512-FQFbRtTaEi8ZBi/A6kxOC0V0E9B/97vPdYjY9NdawyLd4Qk5VD5g2pbWN2VR1c0xhzcJm74HWpObPszWC+qTew== + +"@esbuild/linux-arm64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.17.18.tgz#8551d72ba540c5bce4bab274a81c14ed01eafdcf" + integrity sha512-R7pZvQZFOY2sxUG8P6A21eq6q+eBv7JPQYIybHVf1XkQYC+lT7nDBdC7wWKTrbvMXKRaGudp/dzZCwL/863mZQ== + +"@esbuild/linux-arm@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.17.18.tgz#e09e76e526df4f665d4d2720d28ff87d15cdf639" + integrity sha512-jW+UCM40LzHcouIaqv3e/oRs0JM76JfhHjCavPxMUti7VAPh8CaGSlS7cmyrdpzSk7A+8f0hiedHqr/LMnfijg== + +"@esbuild/linux-ia32@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.17.18.tgz#47878860ce4fe73a36fd8627f5647bcbbef38ba4" + integrity sha512-ygIMc3I7wxgXIxk6j3V00VlABIjq260i967Cp9BNAk5pOOpIXmd1RFQJQX9Io7KRsthDrQYrtcx7QCof4o3ZoQ== + +"@esbuild/linux-loong64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.17.18.tgz#3f8fbf5267556fc387d20b2e708ce115de5c967a" + integrity sha512-bvPG+MyFs5ZlwYclCG1D744oHk1Pv7j8psF5TfYx7otCVmcJsEXgFEhQkbhNW8otDHL1a2KDINW20cfCgnzgMQ== + +"@esbuild/linux-mips64el@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.17.18.tgz#9d896d8f3c75f6c226cbeb840127462e37738226" + integrity sha512-oVqckATOAGuiUOa6wr8TXaVPSa+6IwVJrGidmNZS1cZVx0HqkTMkqFGD2HIx9H1RvOwFeWYdaYbdY6B89KUMxA== + +"@esbuild/linux-ppc64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.17.18.tgz#3d9deb60b2d32c9985bdc3e3be090d30b7472783" + integrity sha512-3dLlQO+b/LnQNxgH4l9rqa2/IwRJVN9u/bK63FhOPB4xqiRqlQAU0qDU3JJuf0BmaH0yytTBdoSBHrb2jqc5qQ== + +"@esbuild/linux-riscv64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.17.18.tgz#8a943cf13fd24ff7ed58aefb940ef178f93386bc" + integrity sha512-/x7leOyDPjZV3TcsdfrSI107zItVnsX1q2nho7hbbQoKnmoeUWjs+08rKKt4AUXju7+3aRZSsKrJtaRmsdL1xA== + +"@esbuild/linux-s390x@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.17.18.tgz#66cb01f4a06423e5496facabdce4f7cae7cb80e5" + integrity sha512-cX0I8Q9xQkL/6F5zWdYmVf5JSQt+ZfZD2bJudZrWD+4mnUvoZ3TDDXtDX2mUaq6upMFv9FlfIh4Gfun0tbGzuw== + +"@esbuild/linux-x64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.17.18.tgz#23c26050c6c5d1359c7b774823adc32b3883b6c9" + integrity sha512-66RmRsPlYy4jFl0vG80GcNRdirx4nVWAzJmXkevgphP1qf4dsLQCpSKGM3DUQCojwU1hnepI63gNZdrr02wHUA== + +"@esbuild/netbsd-x64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.17.18.tgz#789a203d3115a52633ff6504f8cbf757f15e703b" + integrity sha512-95IRY7mI2yrkLlTLb1gpDxdC5WLC5mZDi+kA9dmM5XAGxCME0F8i4bYH4jZreaJ6lIZ0B8hTrweqG1fUyW7jbg== + +"@esbuild/openbsd-x64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.17.18.tgz#d7b998a30878f8da40617a10af423f56f12a5e90" + integrity sha512-WevVOgcng+8hSZ4Q3BKL3n1xTv5H6Nb53cBrtzzEjDbbnOmucEVcZeGCsCOi9bAOcDYEeBZbD2SJNBxlfP3qiA== + +"@esbuild/sunos-x64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.17.18.tgz#ecad0736aa7dae07901ba273db9ef3d3e93df31f" + integrity sha512-Rzf4QfQagnwhQXVBS3BYUlxmEbcV7MY+BH5vfDZekU5eYpcffHSyjU8T0xucKVuOcdCsMo+Ur5wmgQJH2GfNrg== + +"@esbuild/win32-arm64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.17.18.tgz#58dfc177da30acf956252d7c8ae9e54e424887c4" + integrity sha512-Kb3Ko/KKaWhjeAm2YoT/cNZaHaD1Yk/pa3FTsmqo9uFh1D1Rfco7BBLIPdDOozrObj2sahslFuAQGvWbgWldAg== + +"@esbuild/win32-ia32@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.17.18.tgz#340f6163172b5272b5ae60ec12c312485f69232b" + integrity sha512-0/xUMIdkVHwkvxfbd5+lfG7mHOf2FRrxNbPiKWg9C4fFrB8H0guClmaM3BFiRUYrznVoyxTIyC/Ou2B7QQSwmw== + +"@esbuild/win32-x64@0.17.18": + version "0.17.18" + resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.17.18.tgz#3a8e57153905308db357fd02f57c180ee3a0a1fa" + integrity sha512-qU25Ma1I3NqTSHJUOKi9sAH1/Mzuvlke0ioMJRthLXKm7JiSKVwFghlGbDLOO2sARECGhja4xYfRAZNPAkooYg== + +"@manypkg/find-root@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@manypkg/find-root/-/find-root-1.1.0.tgz#a62d8ed1cd7e7d4c11d9d52a8397460b5d4ad29f" + integrity sha512-mki5uBvhHzO8kYYix/WRy2WX8S3B5wdVSc9D6KcU5lQNglP2yt58/VfLuAK49glRXChosY8ap2oJ1qgma3GUVA== + dependencies: + "@babel/runtime" "^7.5.5" + "@types/node" "^12.7.1" + find-up "^4.1.0" + fs-extra "^8.1.0" + +"@manypkg/get-packages@^1.1.3": + version "1.1.3" + resolved "https://registry.yarnpkg.com/@manypkg/get-packages/-/get-packages-1.1.3.tgz#e184db9bba792fa4693de4658cfb1463ac2c9c47" + integrity sha512-fo+QhuU3qE/2TQMQmbVMqaQ6EWbMhi4ABWP+O4AM1NqPBuy0OrApV5LO6BrrgnhtAHS2NH6RrVk9OL181tTi8A== + dependencies: + "@babel/runtime" "^7.5.5" + "@changesets/types" "^4.0.1" + "@manypkg/find-root" "^1.1.0" + fs-extra "^8.1.0" + globby "^11.0.0" + read-yaml-file "^1.1.0" + +"@nodelib/fs.scandir@2.1.5": + version "2.1.5" + resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" + integrity sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g== + dependencies: + "@nodelib/fs.stat" "2.0.5" + run-parallel "^1.1.9" + +"@nodelib/fs.stat@2.0.5", "@nodelib/fs.stat@^2.0.2": + version "2.0.5" + resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz#5bd262af94e9d25bd1e71b05deed44876a222e8b" + integrity sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A== + +"@nodelib/fs.walk@^1.2.3": + version "1.2.8" + resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz#e95737e8bb6746ddedf69c556953494f196fe69a" + integrity sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg== + dependencies: + "@nodelib/fs.scandir" "2.1.5" + fastq "^1.6.0" + +"@ts-morph/common@~0.18.0": + version "0.18.1" + resolved "https://registry.yarnpkg.com/@ts-morph/common/-/common-0.18.1.tgz#ca40c3a62c3f9e17142e0af42633ad63efbae0ec" + integrity sha512-RVE+zSRICWRsfrkAw5qCAK+4ZH9kwEFv5h0+/YeHTLieWP7F4wWq4JsKFuNWG+fYh/KF+8rAtgdj5zb2mm+DVA== + dependencies: + fast-glob "^3.2.12" + minimatch "^5.1.0" + mkdirp "^1.0.4" + path-browserify "^1.0.1" + +"@types/chai-subset@^1.3.3": + version "1.3.3" + resolved "https://registry.yarnpkg.com/@types/chai-subset/-/chai-subset-1.3.3.tgz#97893814e92abd2c534de422cb377e0e0bdaac94" + integrity sha512-frBecisrNGz+F4T6bcc+NLeolfiojh5FxW2klu669+8BARtyQv2C/GkNW6FUodVe4BroGMP/wER/YDGc7rEllw== + dependencies: + "@types/chai" "*" + +"@types/chai@*", "@types/chai@^4.3.4": + version "4.3.5" + resolved "https://registry.yarnpkg.com/@types/chai/-/chai-4.3.5.tgz#ae69bcbb1bebb68c4ac0b11e9d8ed04526b3562b" + integrity sha512-mEo1sAde+UCE6b2hxn332f1g1E8WfYRu6p5SvTKr2ZKC1f7gFJXk4h5PyGP9Dt6gCaG8y8XhwnXWC6Iy2cmBng== + +"@types/is-ci@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@types/is-ci/-/is-ci-3.0.0.tgz#7e8910af6857601315592436f030aaa3ed9783c3" + integrity sha512-Q0Op0hdWbYd1iahB+IFNQcWXFq4O0Q5MwQP7uN0souuQ4rPg1vEYcnIOfr1gY+M+6rc8FGoRaBO1mOOvL29sEQ== + dependencies: + ci-info "^3.1.0" + +"@types/minimist@^1.2.0": + version "1.2.2" + resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.2.tgz#ee771e2ba4b3dc5b372935d549fd9617bf345b8c" + integrity sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ== + +"@types/node@*": + version "20.1.3" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.1.3.tgz#bc8e7cd8065a5fc355a3a191a68db8019c58bc00" + integrity sha512-NP2yfZpgmf2eDRPmgGq+fjGjSwFgYbihA8/gK+ey23qT9RkxsgNTZvGOEpXgzIGqesTYkElELLgtKoMQTys5vA== + +"@types/node@^12.7.1": + version "12.20.55" + resolved "https://registry.yarnpkg.com/@types/node/-/node-12.20.55.tgz#c329cbd434c42164f846b909bd6f85b5537f6240" + integrity sha512-J8xLz7q2OFulZ2cyGTLE1TbbZcjpno7FaN6zdJNrgAdrJ+DZzh/uFR6YrTb4C+nXakvud8Q4+rbhoIWlYQbUFQ== + +"@types/node@^18.14.4": + version "18.16.8" + resolved "https://registry.yarnpkg.com/@types/node/-/node-18.16.8.tgz#fcd9bd0a793aba2701caff4aeae7c988d4da6ce5" + integrity sha512-p0iAXcfWCOTCBbsExHIDFCfwsqFwBTgETJveKMT+Ci3LY9YqQCI91F5S+TB20+aRCXpcWfvx5Qr5EccnwCm2NA== + +"@types/normalize-package-data@^2.4.0": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz#d3357479a0fdfdd5907fe67e17e0a85c906e1301" + integrity sha512-Gj7cI7z+98M282Tqmp2K5EIsoouUEzbBJhQQzDE3jSIRk6r9gsz0oUokqIUR4u1R3dMHo0pDHM7sNOHyhulypw== + +"@types/semver@^6.0.0": + version "6.2.3" + resolved "https://registry.yarnpkg.com/@types/semver/-/semver-6.2.3.tgz#5798ecf1bec94eaa64db39ee52808ec0693315aa" + integrity sha512-KQf+QAMWKMrtBMsB8/24w53tEsxllMj6TuA80TT/5igJalLI/zm0L3oXRbIAl4Ohfc85gyHX/jhMwsVkmhLU4A== + +"@vitest/expect@0.28.5": + version "0.28.5" + resolved "https://registry.yarnpkg.com/@vitest/expect/-/expect-0.28.5.tgz#d5a6eccd014e9ad66fe87a20d16426a2815c0e8a" + integrity sha512-gqTZwoUTwepwGIatnw4UKpQfnoyV0Z9Czn9+Lo2/jLIt4/AXLTn+oVZxlQ7Ng8bzcNkR+3DqLJ08kNr8jRmdNQ== + dependencies: + "@vitest/spy" "0.28.5" + "@vitest/utils" "0.28.5" + chai "^4.3.7" + +"@vitest/runner@0.28.5": + version "0.28.5" + resolved "https://registry.yarnpkg.com/@vitest/runner/-/runner-0.28.5.tgz#4a18fe0e40b25569763f9f1f64b799d1629b3026" + integrity sha512-NKkHtLB+FGjpp5KmneQjTcPLWPTDfB7ie+MmF1PnUBf/tGe2OjGxWyB62ySYZ25EYp9krR5Bw0YPLS/VWh1QiA== + dependencies: + "@vitest/utils" "0.28.5" + p-limit "^4.0.0" + pathe "^1.1.0" + +"@vitest/spy@0.28.5": + version "0.28.5" + resolved "https://registry.yarnpkg.com/@vitest/spy/-/spy-0.28.5.tgz#b69affa0786200251b9e5aac5c58bbfb1b3273c9" + integrity sha512-7if6rsHQr9zbmvxN7h+gGh2L9eIIErgf8nSKYDlg07HHimCxp4H6I/X/DPXktVPPLQfiZ1Cw2cbDIx9fSqDjGw== + dependencies: + tinyspy "^1.0.2" + +"@vitest/utils@0.28.5": + version "0.28.5" + resolved "https://registry.yarnpkg.com/@vitest/utils/-/utils-0.28.5.tgz#7b82b528df86adfbd4a1f6a3b72c39790e81de0d" + integrity sha512-UyZdYwdULlOa4LTUSwZ+Paz7nBHGTT72jKwdFSV4IjHF1xsokp+CabMdhjvVhYwkLfO88ylJT46YMilnkSARZA== + dependencies: + cli-truncate "^3.1.0" + diff "^5.1.0" + loupe "^2.3.6" + picocolors "^1.0.0" + pretty-format "^27.5.1" + +acorn-walk@^8.2.0: + version "8.2.0" + resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-8.2.0.tgz#741210f2e2426454508853a2f44d0ab83b7f69c1" + integrity sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA== + +acorn@^8.8.1, acorn@^8.8.2: + version "8.8.2" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.8.2.tgz#1b2f25db02af965399b9776b0c2c391276d37c4a" + integrity sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw== + +ansi-colors@^4.1.1, ansi-colors@^4.1.3: + version "4.1.3" + resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.3.tgz#37611340eb2243e70cc604cad35d63270d48781b" + integrity sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw== + +ansi-regex@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304" + integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ== + +ansi-regex@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-6.0.1.tgz#3183e38fae9a65d7cb5e53945cd5897d0260a06a" + integrity sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA== + +ansi-styles@^3.2.1: + version "3.2.1" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-3.2.1.tgz#41fbb20243e50b12be0f04b8dedbf07520ce841d" + integrity sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA== + dependencies: + color-convert "^1.9.0" + +ansi-styles@^4.0.0, ansi-styles@^4.1.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937" + integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg== + dependencies: + color-convert "^2.0.1" + +ansi-styles@^5.0.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-5.2.0.tgz#07449690ad45777d1924ac2abb2fc8895dba836b" + integrity sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA== + +ansi-styles@^6.0.0: + version "6.2.1" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-6.2.1.tgz#0e62320cf99c21afff3b3012192546aacbfb05c5" + integrity sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug== + +argparse@^1.0.7: + version "1.0.10" + resolved "https://registry.yarnpkg.com/argparse/-/argparse-1.0.10.tgz#bcd6791ea5ae09725e17e5ad988134cd40b3d911" + integrity sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg== + dependencies: + sprintf-js "~1.0.2" + +array-buffer-byte-length@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/array-buffer-byte-length/-/array-buffer-byte-length-1.0.0.tgz#fabe8bc193fea865f317fe7807085ee0dee5aead" + integrity sha512-LPuwb2P+NrQw3XhxGc36+XSvuBPopovXYTR9Ew++Du9Yb/bx5AzBfrIsBoj0EZUifjQU+sHL21sseZ3jerWO/A== + dependencies: + call-bind "^1.0.2" + is-array-buffer "^3.0.1" + +array-union@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/array-union/-/array-union-2.1.0.tgz#b798420adbeb1de828d84acd8a2e23d3efe85e8d" + integrity sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw== + +array.prototype.flat@^1.2.3: + version "1.3.1" + resolved "https://registry.yarnpkg.com/array.prototype.flat/-/array.prototype.flat-1.3.1.tgz#ffc6576a7ca3efc2f46a143b9d1dda9b4b3cf5e2" + integrity sha512-roTU0KWIOmJ4DRLmwKd19Otg0/mT3qPNt0Qb3GWW8iObuZXxrjB/pzn0R3hqpRSWg4HCwqx+0vwOnWnvlOyeIA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + es-abstract "^1.20.4" + es-shim-unscopables "^1.0.0" + +arrify@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/arrify/-/arrify-1.0.1.tgz#898508da2226f380df904728456849c1501a4b0d" + integrity sha512-3CYzex9M9FGQjCGMGyi6/31c8GJbgb0qGyrx5HWxPd0aCwh4cB2YjMb2Xf9UuoogrMrlO9cTqnB5rI5GHZTcUA== + +assertion-error@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/assertion-error/-/assertion-error-1.1.0.tgz#e60b6b0e8f301bd97e5375215bda406c85118c0b" + integrity sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw== + +available-typed-arrays@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz#92f95616501069d07d10edb2fc37d3e1c65123b7" + integrity sha512-DMD0KiN46eipeziST1LPP/STfDU0sufISXmjSgvVsoU2tqxctQeASejWcfNtxYKqETM1UxQ8sp2OrSBWpHY6sw== + +balanced-match@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" + integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== + +base64-js@^1.5.1: + version "1.5.1" + resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" + integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== + +better-path-resolve@1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/better-path-resolve/-/better-path-resolve-1.0.0.tgz#13a35a1104cdd48a7b74bf8758f96a1ee613f99d" + integrity sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g== + dependencies: + is-windows "^1.0.0" + +brace-expansion@^1.1.7: + version "1.1.11" + resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" + integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA== + dependencies: + balanced-match "^1.0.0" + concat-map "0.0.1" + +brace-expansion@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-2.0.1.tgz#1edc459e0f0c548486ecf9fc99f2221364b9a0ae" + integrity sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA== + dependencies: + balanced-match "^1.0.0" + +braces@^3.0.2: + version "3.0.2" + resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107" + integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A== + dependencies: + fill-range "^7.0.1" + +breakword@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/breakword/-/breakword-1.0.5.tgz#fd420a417f55016736b5b615161cae1c8f819810" + integrity sha512-ex5W9DoOQ/LUEU3PMdLs9ua/CYZl1678NUkKOdUSi8Aw5F1idieaiRURCBFJCwVcrD1J8Iy3vfWSloaMwO2qFg== + dependencies: + wcwidth "^1.0.1" + +buffer-from@^1.0.0: + version "1.1.2" + resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5" + integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ== + +cac@^6.7.14: + version "6.7.14" + resolved "https://registry.yarnpkg.com/cac/-/cac-6.7.14.tgz#804e1e6f506ee363cb0e3ccbb09cad5dd9870959" + integrity sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ== + +call-bind@^1.0.0, call-bind@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/call-bind/-/call-bind-1.0.2.tgz#b1d4e89e688119c3c9a903ad30abb2f6a919be3c" + integrity sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA== + dependencies: + function-bind "^1.1.1" + get-intrinsic "^1.0.2" + +camelcase-keys@^6.2.2: + version "6.2.2" + resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-6.2.2.tgz#5e755d6ba51aa223ec7d3d52f25778210f9dc3c0" + integrity sha512-YrwaA0vEKazPBkn0ipTiMpSajYDSe+KjQfrjhcBMxJt/znbvlHd8Pw/Vamaz5EB4Wfhs3SUR3Z9mwRu/P3s3Yg== + dependencies: + camelcase "^5.3.1" + map-obj "^4.0.0" + quick-lru "^4.0.1" + +camelcase@^5.0.0, camelcase@^5.3.1: + version "5.3.1" + resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320" + integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg== + +chai@^4.3.7: + version "4.3.7" + resolved "https://registry.yarnpkg.com/chai/-/chai-4.3.7.tgz#ec63f6df01829088e8bf55fca839bcd464a8ec51" + integrity sha512-HLnAzZ2iupm25PlN0xFreAlBA5zaBSv3og0DdeGA4Ar6h6rJ3A0rolRUKJhSF2V10GZKDgWF/VmAEsNWjCRB+A== + dependencies: + assertion-error "^1.1.0" + check-error "^1.0.2" + deep-eql "^4.1.2" + get-func-name "^2.0.0" + loupe "^2.3.1" + pathval "^1.1.1" + type-detect "^4.0.5" + +chalk@^2.0.0, chalk@^2.1.0, chalk@^2.4.1: + version "2.4.2" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424" + integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ== + dependencies: + ansi-styles "^3.2.1" + escape-string-regexp "^1.0.5" + supports-color "^5.3.0" + +chalk@^4.1.2: + version "4.1.2" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01" + integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== + dependencies: + ansi-styles "^4.1.0" + supports-color "^7.1.0" + +chardet@^0.7.0: + version "0.7.0" + resolved "https://registry.yarnpkg.com/chardet/-/chardet-0.7.0.tgz#90094849f0937f2eedc2425d0d28a9e5f0cbad9e" + integrity sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA== + +check-error@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/check-error/-/check-error-1.0.2.tgz#574d312edd88bb5dd8912e9286dd6c0aed4aac82" + integrity sha512-BrgHpW9NURQgzoNyjfq0Wu6VFO6D7IZEmJNdtgNqpzGG8RuNFHt2jQxWlAs4HMe119chBnv+34syEZtc6IhLtA== + +ci-info@^3.1.0, ci-info@^3.2.0: + version "3.8.0" + resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.8.0.tgz#81408265a5380c929f0bc665d62256628ce9ef91" + integrity sha512-eXTggHWSooYhq49F2opQhuHWgzucfF2YgODK4e1566GQs5BIfP30B0oenwBJHfWxAs2fyPB1s7Mg949zLf61Yw== + +cli-truncate@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/cli-truncate/-/cli-truncate-3.1.0.tgz#3f23ab12535e3d73e839bb43e73c9de487db1389" + integrity sha512-wfOBkjXteqSnI59oPcJkcPl/ZmwvMMOj340qUIY1SKZCv0B9Cf4D4fAucRkIKQmsIuYK3x1rrgU7MeGRruiuiA== + dependencies: + slice-ansi "^5.0.0" + string-width "^5.0.0" + +cliui@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/cliui/-/cliui-6.0.0.tgz#511d702c0c4e41ca156d7d0e96021f23e13225b1" + integrity sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ== + dependencies: + string-width "^4.2.0" + strip-ansi "^6.0.0" + wrap-ansi "^6.2.0" + +cliui@^8.0.1: + version "8.0.1" + resolved "https://registry.yarnpkg.com/cliui/-/cliui-8.0.1.tgz#0c04b075db02cbfe60dc8e6cf2f5486b1a3608aa" + integrity sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ== + dependencies: + string-width "^4.2.0" + strip-ansi "^6.0.1" + wrap-ansi "^7.0.0" + +clone@^1.0.2: + version "1.0.4" + resolved "https://registry.yarnpkg.com/clone/-/clone-1.0.4.tgz#da309cc263df15994c688ca902179ca3c7cd7c7e" + integrity sha512-JQHZ2QMW6l3aH/j6xCqQThY/9OH4D/9ls34cgkUBiEeocRTU04tHfKPBsUK1PqZCUQM7GiA0IIXJSuXHI64Kbg== + +code-block-writer@^11.0.3: + version "11.0.3" + resolved "https://registry.yarnpkg.com/code-block-writer/-/code-block-writer-11.0.3.tgz#9eec2993edfb79bfae845fbc093758c0a0b73b76" + integrity sha512-NiujjUFB4SwScJq2bwbYUtXbZhBSlY6vYzm++3Q6oC+U+injTqfPYFK8wS9COOmb2lueqp0ZRB4nK1VYeHgNyw== + +color-convert@^1.9.0: + version "1.9.3" + resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8" + integrity sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg== + dependencies: + color-name "1.1.3" + +color-convert@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" + integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ== + dependencies: + color-name "~1.1.4" + +color-name@1.1.3: + version "1.1.3" + resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.3.tgz#a7d0558bd89c42f795dd42328f740831ca53bc25" + integrity sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw== + +color-name@~1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" + integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== + +concat-map@0.0.1: + version "0.0.1" + resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" + integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg== + +cross-spawn@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-5.1.0.tgz#e8bd0efee58fcff6f8f94510a0a554bbfa235449" + integrity sha512-pTgQJ5KC0d2hcY8eyL1IzlBPYjTkyH72XRZPnLyKus2mBfNjQs3klqbJU2VILqZryAZUt9JOb3h/mWMy23/f5A== + dependencies: + lru-cache "^4.0.1" + shebang-command "^1.2.0" + which "^1.2.9" + +cross-spawn@^6.0.5: + version "6.0.5" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4" + integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ== + dependencies: + nice-try "^1.0.4" + path-key "^2.0.1" + semver "^5.5.0" + shebang-command "^1.2.0" + which "^1.2.9" + +csv-generate@^3.4.3: + version "3.4.3" + resolved "https://registry.yarnpkg.com/csv-generate/-/csv-generate-3.4.3.tgz#bc42d943b45aea52afa896874291da4b9108ffff" + integrity sha512-w/T+rqR0vwvHqWs/1ZyMDWtHHSJaN06klRqJXBEpDJaM/+dZkso0OKh1VcuuYvK3XM53KysVNq8Ko/epCK8wOw== + +csv-parse@^4.16.3: + version "4.16.3" + resolved "https://registry.yarnpkg.com/csv-parse/-/csv-parse-4.16.3.tgz#7ca624d517212ebc520a36873c3478fa66efbaf7" + integrity sha512-cO1I/zmz4w2dcKHVvpCr7JVRu8/FymG5OEpmvsZYlccYolPBLoVGKUHgNoc4ZGkFeFlWGEDmMyBM+TTqRdW/wg== + +csv-stringify@^5.6.5: + version "5.6.5" + resolved "https://registry.yarnpkg.com/csv-stringify/-/csv-stringify-5.6.5.tgz#c6d74badda4b49a79bf4e72f91cce1e33b94de00" + integrity sha512-PjiQ659aQ+fUTQqSrd1XEDnOr52jh30RBurfzkscaE2tPaFsDH5wOAHJiw8XAHphRknCwMUE9KRayc4K/NbO8A== + +csv@^5.5.3: + version "5.5.3" + resolved "https://registry.yarnpkg.com/csv/-/csv-5.5.3.tgz#cd26c1e45eae00ce6a9b7b27dcb94955ec95207d" + integrity sha512-QTaY0XjjhTQOdguARF0lGKm5/mEq9PD9/VhZZegHDIBq2tQwgNpHc3dneD4mGo2iJs+fTKv5Bp0fZ+BRuY3Z0g== + dependencies: + csv-generate "^3.4.3" + csv-parse "^4.16.3" + csv-stringify "^5.6.5" + stream-transform "^2.1.3" + +debug@^4.3.4: + version "4.3.4" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" + integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== + dependencies: + ms "2.1.2" + +decamelize-keys@^1.1.0: + version "1.1.1" + resolved "https://registry.yarnpkg.com/decamelize-keys/-/decamelize-keys-1.1.1.tgz#04a2d523b2f18d80d0158a43b895d56dff8d19d8" + integrity sha512-WiPxgEirIV0/eIOMcnFBA3/IJZAZqKnwAwWyvvdi4lsr1WCN22nhdf/3db3DoZcUjTV2SqfzIwNyp6y2xs3nmg== + dependencies: + decamelize "^1.1.0" + map-obj "^1.0.0" + +decamelize@^1.1.0, decamelize@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290" + integrity sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA== + +deep-eql@^4.1.2: + version "4.1.3" + resolved "https://registry.yarnpkg.com/deep-eql/-/deep-eql-4.1.3.tgz#7c7775513092f7df98d8df9996dd085eb668cc6d" + integrity sha512-WaEtAOpRA1MQ0eohqZjpGD8zdI0Ovsm8mmFhaDN8dvDZzyoUMcYDnf5Y6iu7HTXxf8JDS23qWa4a+hKCDyOPzw== + dependencies: + type-detect "^4.0.0" + +defaults@^1.0.3: + version "1.0.4" + resolved "https://registry.yarnpkg.com/defaults/-/defaults-1.0.4.tgz#b0b02062c1e2aa62ff5d9528f0f98baa90978d7a" + integrity sha512-eFuaLoy/Rxalv2kr+lqMlUnrDWV+3j4pljOIJgLIhI058IQfWJ7vXhyEIHu+HtC738klGALYxOKDO0bQP3tg8A== + dependencies: + clone "^1.0.2" + +define-properties@^1.1.3, define-properties@^1.1.4, define-properties@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.2.0.tgz#52988570670c9eacedd8064f4a990f2405849bd5" + integrity sha512-xvqAVKGfT1+UAvPwKTVw/njhdQ8ZhXK4lI0bCIuCMrp2up9nPnaDftrLtmpTazqd1o+UY4zgzU+avtMbDP+ldA== + dependencies: + has-property-descriptors "^1.0.0" + object-keys "^1.1.1" + +detect-indent@^6.0.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-6.1.0.tgz#592485ebbbf6b3b1ab2be175c8393d04ca0d57e6" + integrity sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA== + +diff@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/diff/-/diff-5.1.0.tgz#bc52d298c5ea8df9194800224445ed43ffc87e40" + integrity sha512-D+mk+qE8VC/PAUrlAU34N+VfXev0ghe5ywmpqrawphmVZc1bEfn56uo9qpyGp1p4xpzOHkSW4ztBd6L7Xx4ACw== + +dir-glob@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-3.0.1.tgz#56dbf73d992a4a93ba1584f4534063fd2e41717f" + integrity sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA== + dependencies: + path-type "^4.0.0" + +eastasianwidth@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/eastasianwidth/-/eastasianwidth-0.2.0.tgz#696ce2ec0aa0e6ea93a397ffcf24aa7840c827cb" + integrity sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA== + +emoji-regex@^8.0.0: + version "8.0.0" + resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37" + integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A== + +emoji-regex@^9.2.2: + version "9.2.2" + resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72" + integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg== + +enquirer@^2.3.0: + version "2.3.6" + resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d" + integrity sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg== + dependencies: + ansi-colors "^4.1.1" + +error-ex@^1.3.1: + version "1.3.2" + resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.2.tgz#b4ac40648107fdcdcfae242f428bea8a14d4f1bf" + integrity sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g== + dependencies: + is-arrayish "^0.2.1" + +es-abstract@^1.19.0, es-abstract@^1.20.4: + version "1.21.2" + resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.21.2.tgz#a56b9695322c8a185dc25975aa3b8ec31d0e7eff" + integrity sha512-y/B5POM2iBnIxCiernH1G7rC9qQoM77lLIMQLuob0zhp8C56Po81+2Nj0WFKnd0pNReDTnkYryc+zhOzpEIROg== + dependencies: + array-buffer-byte-length "^1.0.0" + available-typed-arrays "^1.0.5" + call-bind "^1.0.2" + es-set-tostringtag "^2.0.1" + es-to-primitive "^1.2.1" + function.prototype.name "^1.1.5" + get-intrinsic "^1.2.0" + get-symbol-description "^1.0.0" + globalthis "^1.0.3" + gopd "^1.0.1" + has "^1.0.3" + has-property-descriptors "^1.0.0" + has-proto "^1.0.1" + has-symbols "^1.0.3" + internal-slot "^1.0.5" + is-array-buffer "^3.0.2" + is-callable "^1.2.7" + is-negative-zero "^2.0.2" + is-regex "^1.1.4" + is-shared-array-buffer "^1.0.2" + is-string "^1.0.7" + is-typed-array "^1.1.10" + is-weakref "^1.0.2" + object-inspect "^1.12.3" + object-keys "^1.1.1" + object.assign "^4.1.4" + regexp.prototype.flags "^1.4.3" + safe-regex-test "^1.0.0" + string.prototype.trim "^1.2.7" + string.prototype.trimend "^1.0.6" + string.prototype.trimstart "^1.0.6" + typed-array-length "^1.0.4" + unbox-primitive "^1.0.2" + which-typed-array "^1.1.9" + +es-set-tostringtag@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/es-set-tostringtag/-/es-set-tostringtag-2.0.1.tgz#338d502f6f674301d710b80c8592de8a15f09cd8" + integrity sha512-g3OMbtlwY3QewlqAiMLI47KywjWZoEytKr8pf6iTC8uJq5bIAH52Z9pnQ8pVL6whrCto53JZDuUIsifGeLorTg== + dependencies: + get-intrinsic "^1.1.3" + has "^1.0.3" + has-tostringtag "^1.0.0" + +es-shim-unscopables@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/es-shim-unscopables/-/es-shim-unscopables-1.0.0.tgz#702e632193201e3edf8713635d083d378e510241" + integrity sha512-Jm6GPcCdC30eMLbZ2x8z2WuRwAws3zTBBKuusffYVUrNj/GVSUAZ+xKMaUpfNDR5IbyNA5LJbaecoUVbmUcB1w== + dependencies: + has "^1.0.3" + +es-to-primitive@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a" + integrity sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA== + dependencies: + is-callable "^1.1.4" + is-date-object "^1.0.1" + is-symbol "^1.0.2" + +esbuild@^0.17.5, esbuild@~0.17.6: + version "0.17.18" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.17.18.tgz#f4f8eb6d77384d68cd71c53eb6601c7efe05e746" + integrity sha512-z1lix43jBs6UKjcZVKOw2xx69ffE2aG0PygLL5qJ9OS/gy0Ewd1gW/PUQIOIQGXBHWNywSc0floSKoMFF8aK2w== + optionalDependencies: + "@esbuild/android-arm" "0.17.18" + "@esbuild/android-arm64" "0.17.18" + "@esbuild/android-x64" "0.17.18" + "@esbuild/darwin-arm64" "0.17.18" + "@esbuild/darwin-x64" "0.17.18" + "@esbuild/freebsd-arm64" "0.17.18" + "@esbuild/freebsd-x64" "0.17.18" + "@esbuild/linux-arm" "0.17.18" + "@esbuild/linux-arm64" "0.17.18" + "@esbuild/linux-ia32" "0.17.18" + "@esbuild/linux-loong64" "0.17.18" + "@esbuild/linux-mips64el" "0.17.18" + "@esbuild/linux-ppc64" "0.17.18" + "@esbuild/linux-riscv64" "0.17.18" + "@esbuild/linux-s390x" "0.17.18" + "@esbuild/linux-x64" "0.17.18" + "@esbuild/netbsd-x64" "0.17.18" + "@esbuild/openbsd-x64" "0.17.18" + "@esbuild/sunos-x64" "0.17.18" + "@esbuild/win32-arm64" "0.17.18" + "@esbuild/win32-ia32" "0.17.18" + "@esbuild/win32-x64" "0.17.18" + +escalade@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.1.tgz#d8cfdc7000965c5a0174b4a82eaa5c0552742e40" + integrity sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw== + +escape-string-regexp@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" + integrity sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg== + +esprima@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71" + integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A== + +extendable-error@^0.1.5: + version "0.1.7" + resolved "https://registry.yarnpkg.com/extendable-error/-/extendable-error-0.1.7.tgz#60b9adf206264ac920058a7395685ae4670c2b96" + integrity sha512-UOiS2in6/Q0FK0R0q6UY9vYpQ21mr/Qn1KOnte7vsACuNJf514WvCCUHSRCPcgjPT2bAhNIJdlE6bVap1GKmeg== + +external-editor@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/external-editor/-/external-editor-3.1.0.tgz#cb03f740befae03ea4d283caed2741a83f335495" + integrity sha512-hMQ4CX1p1izmuLYyZqLMO/qGNw10wSv9QDCPfzXfyFrOaCSSoRfqE1Kf1s5an66J5JZC62NewG+mK49jOCtQew== + dependencies: + chardet "^0.7.0" + iconv-lite "^0.4.24" + tmp "^0.0.33" + +fast-glob@^3.2.12, fast-glob@^3.2.9: + version "3.2.12" + resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.12.tgz#7f39ec99c2e6ab030337142da9e0c18f37afae80" + integrity sha512-DVj4CQIYYow0BlaelwK1pHl5n5cRSJfM60UA0zK891sVInoPri2Ekj7+e1CT3/3qxXenpI+nBBmQAcJPJgaj4w== + dependencies: + "@nodelib/fs.stat" "^2.0.2" + "@nodelib/fs.walk" "^1.2.3" + glob-parent "^5.1.2" + merge2 "^1.3.0" + micromatch "^4.0.4" + +fastq@^1.6.0: + version "1.15.0" + resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.15.0.tgz#d04d07c6a2a68fe4599fea8d2e103a937fae6b3a" + integrity sha512-wBrocU2LCXXa+lWBt8RoIRD89Fi8OdABODa/kEnyeyjS5aZO5/GNvI5sEINADqP/h8M29UHTHUb53sUu5Ihqdw== + dependencies: + reusify "^1.0.4" + +fill-range@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40" + integrity sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ== + dependencies: + to-regex-range "^5.0.1" + +find-up@^4.0.0, find-up@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/find-up/-/find-up-4.1.0.tgz#97afe7d6cdc0bc5928584b7c8d7b16e8a9aa5d19" + integrity sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw== + dependencies: + locate-path "^5.0.0" + path-exists "^4.0.0" + +find-up@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc" + integrity sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng== + dependencies: + locate-path "^6.0.0" + path-exists "^4.0.0" + +find-yarn-workspace-root2@1.2.16: + version "1.2.16" + resolved "https://registry.yarnpkg.com/find-yarn-workspace-root2/-/find-yarn-workspace-root2-1.2.16.tgz#60287009dd2f324f59646bdb4b7610a6b301c2a9" + integrity sha512-hr6hb1w8ePMpPVUK39S4RlwJzi+xPLuVuG8XlwXU3KD5Yn3qgBWVfy3AzNlDhWvE1EORCE65/Qm26rFQt3VLVA== + dependencies: + micromatch "^4.0.2" + pkg-dir "^4.2.0" + +for-each@^0.3.3: + version "0.3.3" + resolved "https://registry.yarnpkg.com/for-each/-/for-each-0.3.3.tgz#69b447e88a0a5d32c3e7084f3f1710034b21376e" + integrity sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw== + dependencies: + is-callable "^1.1.3" + +fs-extra@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-7.0.1.tgz#4f189c44aa123b895f722804f55ea23eadc348e9" + integrity sha512-YJDaCJZEnBmcbw13fvdAM9AwNOJwOzrE4pqMqBq5nFiEqXUqHwlK4B+3pUw6JNvfSPtX05xFHtYy/1ni01eGCw== + dependencies: + graceful-fs "^4.1.2" + jsonfile "^4.0.0" + universalify "^0.1.0" + +fs-extra@^8.1.0: + version "8.1.0" + resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-8.1.0.tgz#49d43c45a88cd9677668cb7be1b46efdb8d2e1c0" + integrity sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g== + dependencies: + graceful-fs "^4.2.0" + jsonfile "^4.0.0" + universalify "^0.1.0" + +fsevents@~2.3.2: + version "2.3.2" + resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a" + integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA== + +function-bind@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" + integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A== + +function.prototype.name@^1.1.5: + version "1.1.5" + resolved "https://registry.yarnpkg.com/function.prototype.name/-/function.prototype.name-1.1.5.tgz#cce0505fe1ffb80503e6f9e46cc64e46a12a9621" + integrity sha512-uN7m/BzVKQnCUF/iW8jYea67v++2u7m5UgENbHRtdDVclOUP+FMPlCNdmk0h/ysGyo2tavMJEDqJAkJdRa1vMA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.3" + es-abstract "^1.19.0" + functions-have-names "^1.2.2" + +functions-have-names@^1.2.2, functions-have-names@^1.2.3: + version "1.2.3" + resolved "https://registry.yarnpkg.com/functions-have-names/-/functions-have-names-1.2.3.tgz#0404fe4ee2ba2f607f0e0ec3c80bae994133b834" + integrity sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ== + +get-caller-file@^2.0.1, get-caller-file@^2.0.5: + version "2.0.5" + resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e" + integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg== + +get-func-name@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/get-func-name/-/get-func-name-2.0.0.tgz#ead774abee72e20409433a066366023dd6887a41" + integrity sha512-Hm0ixYtaSZ/V7C8FJrtZIuBBI+iSgL+1Aq82zSu8VQNB4S3Gk8e7Qs3VwBDJAhmRZcFqkl3tQu36g/Foh5I5ig== + +get-intrinsic@^1.0.2, get-intrinsic@^1.1.1, get-intrinsic@^1.1.3, get-intrinsic@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.2.0.tgz#7ad1dc0535f3a2904bba075772763e5051f6d05f" + integrity sha512-L049y6nFOuom5wGyRc3/gdTLO94dySVKRACj1RmJZBQXlbTMhtNIgkWkUHq+jYmZvKf14EW1EoJnnjbmoHij0Q== + dependencies: + function-bind "^1.1.1" + has "^1.0.3" + has-symbols "^1.0.3" + +get-symbol-description@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/get-symbol-description/-/get-symbol-description-1.0.0.tgz#7fdb81c900101fbd564dd5f1a30af5aadc1e58d6" + integrity sha512-2EmdH1YvIQiZpltCNgkuiUnyukzxM/R6NDJX31Ke3BG1Nq5b0S2PhX59UKi9vZpPDQVdqn+1IcaAwnzTT5vCjw== + dependencies: + call-bind "^1.0.2" + get-intrinsic "^1.1.1" + +get-tsconfig@^4.4.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/get-tsconfig/-/get-tsconfig-4.5.0.tgz#6d52d1c7b299bd3ee9cd7638561653399ac77b0f" + integrity sha512-MjhiaIWCJ1sAU4pIQ5i5OfOuHHxVo1oYeNsWTON7jxYkod8pHocXeh+SSbmu5OZZZK73B6cbJ2XADzXehLyovQ== + +glob-parent@^5.1.2: + version "5.1.2" + resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4" + integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow== + dependencies: + is-glob "^4.0.1" + +globalthis@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/globalthis/-/globalthis-1.0.3.tgz#5852882a52b80dc301b0660273e1ed082f0b6ccf" + integrity sha512-sFdI5LyBiNTHjRd7cGPWapiHWMOXKyuBNX/cWJ3NfzrZQVa8GI/8cofCl74AOVqq9W5kNmguTIzJ/1s2gyI9wA== + dependencies: + define-properties "^1.1.3" + +globby@^11.0.0: + version "11.1.0" + resolved "https://registry.yarnpkg.com/globby/-/globby-11.1.0.tgz#bd4be98bb042f83d796f7e3811991fbe82a0d34b" + integrity sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g== + dependencies: + array-union "^2.1.0" + dir-glob "^3.0.1" + fast-glob "^3.2.9" + ignore "^5.2.0" + merge2 "^1.4.1" + slash "^3.0.0" + +gopd@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/gopd/-/gopd-1.0.1.tgz#29ff76de69dac7489b7c0918a5788e56477c332c" + integrity sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA== + dependencies: + get-intrinsic "^1.1.3" + +graceful-fs@^4.1.2, graceful-fs@^4.1.5, graceful-fs@^4.1.6, graceful-fs@^4.2.0: + version "4.2.11" + resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3" + integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ== + +grapheme-splitter@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz#9cf3a665c6247479896834af35cf1dbb4400767e" + integrity sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ== + +hard-rejection@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/hard-rejection/-/hard-rejection-2.1.0.tgz#1c6eda5c1685c63942766d79bb40ae773cecd883" + integrity sha512-VIZB+ibDhx7ObhAe7OVtoEbuP4h/MuOTHJ+J8h/eBXotJYl0fBgR72xDFCKgIh22OJZIOVNxBMWuhAr10r8HdA== + +has-bigints@^1.0.1, has-bigints@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/has-bigints/-/has-bigints-1.0.2.tgz#0871bd3e3d51626f6ca0966668ba35d5602d6eaa" + integrity sha512-tSvCKtBr9lkF0Ex0aQiP9N+OpV4zi2r/Nee5VkRDbaqv35RLYMzbwQfFSZZH0kR+Rd6302UJZ2p/bJCEoR3VoQ== + +has-flag@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-3.0.0.tgz#b5d454dc2199ae225699f3467e5a07f3b955bafd" + integrity sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw== + +has-flag@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" + integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== + +has-property-descriptors@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/has-property-descriptors/-/has-property-descriptors-1.0.0.tgz#610708600606d36961ed04c196193b6a607fa861" + integrity sha512-62DVLZGoiEBDHQyqG4w9xCuZ7eJEwNmJRWw2VY84Oedb7WFcA27fiEVe8oUQx9hAUJ4ekurquucTGwsyO1XGdQ== + dependencies: + get-intrinsic "^1.1.1" + +has-proto@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/has-proto/-/has-proto-1.0.1.tgz#1885c1305538958aff469fef37937c22795408e0" + integrity sha512-7qE+iP+O+bgF9clE5+UoBFzE65mlBiVj3tKCrlNQ0Ogwm0BjpT/gK4SlLYDMybDh5I3TCTKnPPa0oMG7JDYrhg== + +has-symbols@^1.0.2, has-symbols@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" + integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== + +has-tostringtag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/has-tostringtag/-/has-tostringtag-1.0.0.tgz#7e133818a7d394734f941e73c3d3f9291e658b25" + integrity sha512-kFjcSNhnlGV1kyoGk7OXKSawH5JOb/LzUc5w9B02hOTO0dfFRjbHQKvg1d6cf3HbeUmtU9VbbV3qzZ2Teh97WQ== + dependencies: + has-symbols "^1.0.2" + +has@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796" + integrity sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw== + dependencies: + function-bind "^1.1.1" + +hosted-git-info@^2.1.4: + version "2.8.9" + resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.9.tgz#dffc0bf9a21c02209090f2aa69429e1414daf3f9" + integrity sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw== + +human-id@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/human-id/-/human-id-1.0.2.tgz#e654d4b2b0d8b07e45da9f6020d8af17ec0a5df3" + integrity sha512-UNopramDEhHJD+VR+ehk8rOslwSfByxPIZyJRfV739NDhN5LF1fa1MqnzKm2lGTQRjNrjK19Q5fhkgIfjlVUKw== + +iconv-lite@^0.4.24: + version "0.4.24" + resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b" + integrity sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA== + dependencies: + safer-buffer ">= 2.1.2 < 3" + +ignore@^5.2.0: + version "5.2.4" + resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.2.4.tgz#a291c0c6178ff1b960befe47fcdec301674a6324" + integrity sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ== + +indent-string@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251" + integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg== + +internal-slot@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.5.tgz#f2a2ee21f668f8627a4667f309dc0f4fb6674986" + integrity sha512-Y+R5hJrzs52QCG2laLn4udYVnxsfny9CpOhNhUvk/SSSVyF6T27FzRbF0sroPidSu3X8oEAkOn2K804mjpt6UQ== + dependencies: + get-intrinsic "^1.2.0" + has "^1.0.3" + side-channel "^1.0.4" + +is-array-buffer@^3.0.1, is-array-buffer@^3.0.2: + version "3.0.2" + resolved "https://registry.yarnpkg.com/is-array-buffer/-/is-array-buffer-3.0.2.tgz#f2653ced8412081638ecb0ebbd0c41c6e0aecbbe" + integrity sha512-y+FyyR/w8vfIRq4eQcM1EYgSTnmHXPqaF+IgzgraytCFq5Xh8lllDVmAZolPJiZttZLeFSINPYMaEJ7/vWUa1w== + dependencies: + call-bind "^1.0.2" + get-intrinsic "^1.2.0" + is-typed-array "^1.1.10" + +is-arrayish@^0.2.1: + version "0.2.1" + resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d" + integrity sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg== + +is-bigint@^1.0.1: + version "1.0.4" + resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.4.tgz#08147a1875bc2b32005d41ccd8291dffc6691df3" + integrity sha512-zB9CruMamjym81i2JZ3UMn54PKGsQzsJeo6xvN3HJJ4CAsQNB6iRutp2To77OfCNuoxspsIhzaPoO1zyCEhFOg== + dependencies: + has-bigints "^1.0.1" + +is-boolean-object@^1.1.0: + version "1.1.2" + resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.2.tgz#5c6dc200246dd9321ae4b885a114bb1f75f63719" + integrity sha512-gDYaKHJmnj4aWxyj6YHyXVpdQawtVLHU5cb+eztPGczf6cjuTdwve5ZIEfgXqH4e57An1D1AKf8CZ3kYrQRqYA== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +is-callable@^1.1.3, is-callable@^1.1.4, is-callable@^1.2.7: + version "1.2.7" + resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.2.7.tgz#3bc2a85ea742d9e36205dcacdd72ca1fdc51b055" + integrity sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA== + +is-ci@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/is-ci/-/is-ci-3.0.1.tgz#db6ecbed1bd659c43dac0f45661e7674103d1867" + integrity sha512-ZYvCgrefwqoQ6yTyYUbQu64HsITZ3NfKX1lzaEYdkTDcfKzzCI/wthRRYKkdjHKFVgNiXKAKm65Zo1pk2as/QQ== + dependencies: + ci-info "^3.2.0" + +is-core-module@^2.11.0: + version "2.12.0" + resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.12.0.tgz#36ad62f6f73c8253fd6472517a12483cf03e7ec4" + integrity sha512-RECHCBCd/viahWmwj6enj19sKbHfJrddi/6cBDsNTKbNq0f7VeaUkBo60BqzvPqo/W54ChS62Z5qyun7cfOMqQ== + dependencies: + has "^1.0.3" + +is-date-object@^1.0.1: + version "1.0.5" + resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.5.tgz#0841d5536e724c25597bf6ea62e1bd38298df31f" + integrity sha512-9YQaSxsAiSwcvS33MBk3wTCVnWK+HhF8VZR2jRxehM16QcVOdHqPn4VPHmRK4lSr38n9JriurInLcP90xsYNfQ== + dependencies: + has-tostringtag "^1.0.0" + +is-extglob@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" + integrity sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ== + +is-fullwidth-code-point@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d" + integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg== + +is-fullwidth-code-point@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-4.0.0.tgz#fae3167c729e7463f8461ce512b080a49268aa88" + integrity sha512-O4L094N2/dZ7xqVdrXhh9r1KODPJpFms8B5sGdJLPy664AgvXsreZUyCQQNItZRDlYug4xStLjNp/sz3HvBowQ== + +is-glob@^4.0.1: + version "4.0.3" + resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" + integrity sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg== + dependencies: + is-extglob "^2.1.1" + +is-negative-zero@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/is-negative-zero/-/is-negative-zero-2.0.2.tgz#7bf6f03a28003b8b3965de3ac26f664d765f3150" + integrity sha512-dqJvarLawXsFbNDeJW7zAz8ItJ9cd28YufuuFzh0G8pNHjJMnY08Dv7sYX2uF5UpQOwieAeOExEYAWWfu7ZZUA== + +is-number-object@^1.0.4: + version "1.0.7" + resolved "https://registry.yarnpkg.com/is-number-object/-/is-number-object-1.0.7.tgz#59d50ada4c45251784e9904f5246c742f07a42fc" + integrity sha512-k1U0IRzLMo7ZlYIfzRu23Oh6MiIFasgpb9X76eqfFZAqwH44UI4KTBvBYIZ1dSL9ZzChTB9ShHfLkR4pdW5krQ== + dependencies: + has-tostringtag "^1.0.0" + +is-number@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b" + integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng== + +is-plain-obj@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e" + integrity sha512-yvkRyxmFKEOQ4pNXCmJG5AEQNlXJS5LaONXo5/cLdTZdWvsZ1ioJEonLGAosKlMWE8lwUy/bJzMjcw8az73+Fg== + +is-regex@^1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.4.tgz#eef5663cd59fa4c0ae339505323df6854bb15958" + integrity sha512-kvRdxDsxZjhzUX07ZnLydzS1TU/TJlTUHHY4YLL87e37oUA49DfkLqgy+VjFocowy29cKvcSiu+kIv728jTTVg== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +is-shared-array-buffer@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-shared-array-buffer/-/is-shared-array-buffer-1.0.2.tgz#8f259c573b60b6a32d4058a1a07430c0a7344c79" + integrity sha512-sqN2UDu1/0y6uvXyStCOzyhAjCSlHceFoMKJW8W9EU9cvic/QdsZ0kEU93HEy3IUEFZIiH/3w+AH/UQbPHNdhA== + dependencies: + call-bind "^1.0.2" + +is-string@^1.0.5, is-string@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.7.tgz#0dd12bf2006f255bb58f695110eff7491eebc0fd" + integrity sha512-tE2UXzivje6ofPW7l23cjDOMa09gb7xlAqG6jG5ej6uPV32TlWP3NKPigtaGeHNu9fohccRYvIiZMfOOnOYUtg== + dependencies: + has-tostringtag "^1.0.0" + +is-subdir@^1.1.1: + version "1.2.0" + resolved "https://registry.yarnpkg.com/is-subdir/-/is-subdir-1.2.0.tgz#b791cd28fab5202e91a08280d51d9d7254fd20d4" + integrity sha512-2AT6j+gXe/1ueqbW6fLZJiIw3F8iXGJtt0yDrZaBhAZEG1raiTxKWU+IPqMCzQAXOUCKdA4UDMgacKH25XG2Cw== + dependencies: + better-path-resolve "1.0.0" + +is-symbol@^1.0.2, is-symbol@^1.0.3: + version "1.0.4" + resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.4.tgz#a6dac93b635b063ca6872236de88910a57af139c" + integrity sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg== + dependencies: + has-symbols "^1.0.2" + +is-typed-array@^1.1.10, is-typed-array@^1.1.9: + version "1.1.10" + resolved "https://registry.yarnpkg.com/is-typed-array/-/is-typed-array-1.1.10.tgz#36a5b5cb4189b575d1a3e4b08536bfb485801e3f" + integrity sha512-PJqgEHiWZvMpaFZ3uTc8kHPM4+4ADTlDniuQL7cU/UDA0Ql7F70yGfHph3cLNe+c9toaigv+DFzTJKhc2CtO6A== + dependencies: + available-typed-arrays "^1.0.5" + call-bind "^1.0.2" + for-each "^0.3.3" + gopd "^1.0.1" + has-tostringtag "^1.0.0" + +is-weakref@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-weakref/-/is-weakref-1.0.2.tgz#9529f383a9338205e89765e0392efc2f100f06f2" + integrity sha512-qctsuLZmIQ0+vSSMfoVvyFe2+GSEvnmZ2ezTup1SBse9+twCCeial6EEi3Nc2KFcf6+qz2FBPnjXsk8xhKSaPQ== + dependencies: + call-bind "^1.0.2" + +is-windows@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d" + integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA== + +isexe@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" + integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw== + +js-tokens@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499" + integrity sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ== + +js-yaml@^3.13.0, js-yaml@^3.13.1, js-yaml@^3.6.1: + version "3.14.1" + resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.14.1.tgz#dae812fdb3825fa306609a8717383c50c36a0537" + integrity sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g== + dependencies: + argparse "^1.0.7" + esprima "^4.0.0" + +json-parse-better-errors@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9" + integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw== + +json-parse-even-better-errors@^2.3.0: + version "2.3.1" + resolved "https://registry.yarnpkg.com/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz#7c47805a94319928e05777405dc12e1f7a4ee02d" + integrity sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w== + +jsonc-parser@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/jsonc-parser/-/jsonc-parser-3.2.0.tgz#31ff3f4c2b9793f89c67212627c51c6394f88e76" + integrity sha512-gfFQZrcTc8CnKXp6Y4/CBT3fTc0OVuDofpre4aEeEpSBPV5X5v4+Vmx+8snU7RLPrNHPKSgLxGo9YuQzz20o+w== + +jsonfile@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-4.0.0.tgz#8771aae0799b64076b76640fca058f9c10e33ecb" + integrity sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg== + optionalDependencies: + graceful-fs "^4.1.6" + +kind-of@^6.0.3: + version "6.0.3" + resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd" + integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw== + +kleur@^4.1.5: + version "4.1.5" + resolved "https://registry.yarnpkg.com/kleur/-/kleur-4.1.5.tgz#95106101795f7050c6c650f350c683febddb1780" + integrity sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ== + +lines-and-columns@^1.1.6: + version "1.2.4" + resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" + integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg== + +load-json-file@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-4.0.0.tgz#2f5f45ab91e33216234fd53adab668eb4ec0993b" + integrity sha512-Kx8hMakjX03tiGTLAIdJ+lL0htKnXjEZN6hk/tozf/WOuYGdZBJrZ+rCJRbVCugsjB3jMLn9746NsQIf5VjBMw== + dependencies: + graceful-fs "^4.1.2" + parse-json "^4.0.0" + pify "^3.0.0" + strip-bom "^3.0.0" + +load-yaml-file@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/load-yaml-file/-/load-yaml-file-0.2.0.tgz#af854edaf2bea89346c07549122753c07372f64d" + integrity sha512-OfCBkGEw4nN6JLtgRidPX6QxjBQGQf72q3si2uvqyFEMbycSFFHwAZeXx6cJgFM9wmLrf9zBwCP3Ivqa+LLZPw== + dependencies: + graceful-fs "^4.1.5" + js-yaml "^3.13.0" + pify "^4.0.1" + strip-bom "^3.0.0" + +local-pkg@^0.4.2: + version "0.4.3" + resolved "https://registry.yarnpkg.com/local-pkg/-/local-pkg-0.4.3.tgz#0ff361ab3ae7f1c19113d9bb97b98b905dbc4963" + integrity sha512-SFppqq5p42fe2qcZQqqEOiVRXl+WCP1MdT6k7BDEW1j++sp5fIY+/fdRQitvKgB5BrBcmrs5m/L0v2FrU5MY1g== + +locate-path@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-5.0.0.tgz#1afba396afd676a6d42504d0a67a3a7eb9f62aa0" + integrity sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g== + dependencies: + p-locate "^4.1.0" + +locate-path@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-6.0.0.tgz#55321eb309febbc59c4801d931a72452a681d286" + integrity sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw== + dependencies: + p-locate "^5.0.0" + +lodash.startcase@^4.4.0: + version "4.4.0" + resolved "https://registry.yarnpkg.com/lodash.startcase/-/lodash.startcase-4.4.0.tgz#9436e34ed26093ed7ffae1936144350915d9add8" + integrity sha512-+WKqsK294HMSc2jEbNgpHpd0JfIBhp7rEV4aqXWqFr6AlXov+SlcgB1Fv01y2kGe3Gc8nMW7VA0SrGuSkRfIEg== + +loupe@^2.3.1, loupe@^2.3.6: + version "2.3.6" + resolved "https://registry.yarnpkg.com/loupe/-/loupe-2.3.6.tgz#76e4af498103c532d1ecc9be102036a21f787b53" + integrity sha512-RaPMZKiMy8/JruncMU5Bt6na1eftNoo++R4Y+N2FrxkDVTrGvcyzFTsaGif4QTeKESheMGegbhw6iUAq+5A8zA== + dependencies: + get-func-name "^2.0.0" + +lru-cache@^4.0.1: + version "4.1.5" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-4.1.5.tgz#8bbe50ea85bed59bc9e33dcab8235ee9bcf443cd" + integrity sha512-sWZlbEP2OsHNkXrMl5GYk/jKk70MBng6UU4YI/qGDYbgf6YbP4EvmqISbXCoJiRKs+1bSpFHVgQxvJ17F2li5g== + dependencies: + pseudomap "^1.0.2" + yallist "^2.1.2" + +map-obj@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-1.0.1.tgz#d933ceb9205d82bdcf4886f6742bdc2b4dea146d" + integrity sha512-7N/q3lyZ+LVCp7PzuxrJr4KMbBE2hW7BT7YNia330OFxIf4d3r5zVpicP2650l7CPN6RM9zOJRl3NGpqSiw3Eg== + +map-obj@^4.0.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-4.3.0.tgz#9304f906e93faae70880da102a9f1df0ea8bb05a" + integrity sha512-hdN1wVrZbb29eBGiGjJbeP8JbKjq1urkHJ/LIP/NY48MZ1QVXUsQBV1G1zvYFHn1XE06cwjBsOI2K3Ulnj1YXQ== + +memorystream@^0.3.1: + version "0.3.1" + resolved "https://registry.yarnpkg.com/memorystream/-/memorystream-0.3.1.tgz#86d7090b30ce455d63fbae12dda51a47ddcaf9b2" + integrity sha512-S3UwM3yj5mtUSEfP41UZmt/0SCoVYUcU1rkXv+BQ5Ig8ndL4sPoJNBUJERafdPb5jjHJGuMgytgKvKIf58XNBw== + +meow@^6.0.0: + version "6.1.1" + resolved "https://registry.yarnpkg.com/meow/-/meow-6.1.1.tgz#1ad64c4b76b2a24dfb2f635fddcadf320d251467" + integrity sha512-3YffViIt2QWgTy6Pale5QpopX/IvU3LPL03jOTqp6pGj3VjesdO/U8CuHMKpnQr4shCNCM5fd5XFFvIIl6JBHg== + dependencies: + "@types/minimist" "^1.2.0" + camelcase-keys "^6.2.2" + decamelize-keys "^1.1.0" + hard-rejection "^2.1.0" + minimist-options "^4.0.2" + normalize-package-data "^2.5.0" + read-pkg-up "^7.0.1" + redent "^3.0.0" + trim-newlines "^3.0.0" + type-fest "^0.13.1" + yargs-parser "^18.1.3" + +merge2@^1.3.0, merge2@^1.4.1: + version "1.4.1" + resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae" + integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg== + +micromatch@^4.0.2, micromatch@^4.0.4: + version "4.0.5" + resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.5.tgz#bc8999a7cbbf77cdc89f132f6e467051b49090c6" + integrity sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA== + dependencies: + braces "^3.0.2" + picomatch "^2.3.1" + +min-indent@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869" + integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg== + +minimatch@^3.0.4: + version "3.1.2" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" + integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw== + dependencies: + brace-expansion "^1.1.7" + +minimatch@^5.1.0: + version "5.1.6" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-5.1.6.tgz#1cfcb8cf5522ea69952cd2af95ae09477f122a96" + integrity sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g== + dependencies: + brace-expansion "^2.0.1" + +minimist-options@^4.0.2: + version "4.1.0" + resolved "https://registry.yarnpkg.com/minimist-options/-/minimist-options-4.1.0.tgz#c0655713c53a8a2ebd77ffa247d342c40f010619" + integrity sha512-Q4r8ghd80yhO/0j1O3B2BjweX3fiHg9cdOwjJd2J76Q135c+NDxGCqdYKQ1SKBuFfgWbAUzBfvYjPUEeNgqN1A== + dependencies: + arrify "^1.0.1" + is-plain-obj "^1.1.0" + kind-of "^6.0.3" + +mixme@^0.5.1: + version "0.5.9" + resolved "https://registry.yarnpkg.com/mixme/-/mixme-0.5.9.tgz#a5a58e17354632179ff3ce5b0fc130899c8ba81c" + integrity sha512-VC5fg6ySUscaWUpI4gxCBTQMH2RdUpNrk+MsbpCYtIvf9SBJdiUey4qE7BXviJsJR4nDQxCZ+3yaYNW3guz/Pw== + +mkdirp@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e" + integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw== + +mlly@^1.1.0, mlly@^1.2.0: + version "1.2.1" + resolved "https://registry.yarnpkg.com/mlly/-/mlly-1.2.1.tgz#cd50151f5712b651c5c379085157bcdff661133b" + integrity sha512-1aMEByaWgBPEbWV2BOPEMySRrzl7rIHXmQxam4DM8jVjalTQDjpN2ZKOLUrwyhfZQO7IXHml2StcHMhooDeEEQ== + dependencies: + acorn "^8.8.2" + pathe "^1.1.0" + pkg-types "^1.0.3" + ufo "^1.1.2" + +ms@2.1.2: + version "2.1.2" + resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" + integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== + +nanoid@^3.3.6: + version "3.3.6" + resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.6.tgz#443380c856d6e9f9824267d960b4236ad583ea4c" + integrity sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA== + +nice-try@^1.0.4: + version "1.0.5" + resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366" + integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ== + +normalize-package-data@^2.3.2, normalize-package-data@^2.5.0: + version "2.5.0" + resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8" + integrity sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA== + dependencies: + hosted-git-info "^2.1.4" + resolve "^1.10.0" + semver "2 || 3 || 4 || 5" + validate-npm-package-license "^3.0.1" + +npm-run-all@^4.1.5: + version "4.1.5" + resolved "https://registry.yarnpkg.com/npm-run-all/-/npm-run-all-4.1.5.tgz#04476202a15ee0e2e214080861bff12a51d98fba" + integrity sha512-Oo82gJDAVcaMdi3nuoKFavkIHBRVqQ1qvMb+9LHk/cF4P6B2m8aP04hGf7oL6wZ9BuGwX1onlLhpuoofSyoQDQ== + dependencies: + ansi-styles "^3.2.1" + chalk "^2.4.1" + cross-spawn "^6.0.5" + memorystream "^0.3.1" + minimatch "^3.0.4" + pidtree "^0.3.0" + read-pkg "^3.0.0" + shell-quote "^1.6.1" + string.prototype.padend "^3.0.0" + +object-inspect@^1.12.3, object-inspect@^1.9.0: + version "1.12.3" + resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.12.3.tgz#ba62dffd67ee256c8c086dfae69e016cd1f198b9" + integrity sha512-geUvdk7c+eizMNUDkRpW1wJwgfOiOeHbxBR/hLXK1aT6zmVSO0jsQcs7fj6MGw89jC/cjGfLcNOrtMYtGqm81g== + +object-keys@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" + integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA== + +object.assign@^4.1.4: + version "4.1.4" + resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.4.tgz#9673c7c7c351ab8c4d0b516f4343ebf4dfb7799f" + integrity sha512-1mxKf0e58bvyjSCtKYY4sRe9itRk3PJpquJOjeIkz885CczcI4IvJJDLPS72oowuSh+pBxUFROpX+TU++hxhZQ== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + has-symbols "^1.0.3" + object-keys "^1.1.1" + +os-tmpdir@~1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz#bbe67406c79aa85c5cfec766fe5734555dfa1274" + integrity sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g== + +outdent@^0.5.0: + version "0.5.0" + resolved "https://registry.yarnpkg.com/outdent/-/outdent-0.5.0.tgz#9e10982fdc41492bb473ad13840d22f9655be2ff" + integrity sha512-/jHxFIzoMXdqPzTaCpFzAAWhpkSjZPF4Vsn6jAfNpmbH/ymsmd7Qc6VE9BGn0L6YMj6uwpQLxCECpus4ukKS9Q== + +outdent@^0.8.0: + version "0.8.0" + resolved "https://registry.yarnpkg.com/outdent/-/outdent-0.8.0.tgz#2ebc3e77bf49912543f1008100ff8e7f44428eb0" + integrity sha512-KiOAIsdpUTcAXuykya5fnVVT+/5uS0Q1mrkRHcF89tpieSmY33O/tmc54CqwA+bfhbtEfZUNLHaPUiB9X3jt1A== + +p-filter@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/p-filter/-/p-filter-2.1.0.tgz#1b1472562ae7a0f742f0f3d3d3718ea66ff9c09c" + integrity sha512-ZBxxZ5sL2HghephhpGAQdoskxplTwr7ICaehZwLIlfL6acuVgZPm8yBNuRAFBGEqtD/hmUeq9eqLg2ys9Xr/yw== + dependencies: + p-map "^2.0.0" + +p-limit@^2.2.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-2.3.0.tgz#3dd33c647a214fdfffd835933eb086da0dc21db1" + integrity sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w== + dependencies: + p-try "^2.0.0" + +p-limit@^3.0.2: + version "3.1.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b" + integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ== + dependencies: + yocto-queue "^0.1.0" + +p-limit@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-4.0.0.tgz#914af6544ed32bfa54670b061cafcbd04984b644" + integrity sha512-5b0R4txpzjPWVw/cXXUResoD4hb6U/x9BH08L7nw+GN1sezDzPdxeRvpc9c433fZhBan/wusjbCsqwqm4EIBIQ== + dependencies: + yocto-queue "^1.0.0" + +p-locate@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-4.1.0.tgz#a3428bb7088b3a60292f66919278b7c297ad4f07" + integrity sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A== + dependencies: + p-limit "^2.2.0" + +p-locate@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-5.0.0.tgz#83c8315c6785005e3bd021839411c9e110e6d834" + integrity sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw== + dependencies: + p-limit "^3.0.2" + +p-map@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/p-map/-/p-map-2.1.0.tgz#310928feef9c9ecc65b68b17693018a665cea175" + integrity sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw== + +p-try@^2.0.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6" + integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ== + +parse-json@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-4.0.0.tgz#be35f5425be1f7f6c747184f98a788cb99477ee0" + integrity sha512-aOIos8bujGN93/8Ox/jPLh7RwVnPEysynVFE+fQZyg6jKELEHwzgKdLRFHUgXJL6kylijVSBC4BvN9OmsB48Rw== + dependencies: + error-ex "^1.3.1" + json-parse-better-errors "^1.0.1" + +parse-json@^5.0.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-5.2.0.tgz#c76fc66dee54231c962b22bcc8a72cf2f99753cd" + integrity sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg== + dependencies: + "@babel/code-frame" "^7.0.0" + error-ex "^1.3.1" + json-parse-even-better-errors "^2.3.0" + lines-and-columns "^1.1.6" + +path-browserify@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/path-browserify/-/path-browserify-1.0.1.tgz#d98454a9c3753d5790860f16f68867b9e46be1fd" + integrity sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g== + +path-exists@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3" + integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w== + +path-key@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40" + integrity sha512-fEHGKCSmUSDPv4uoj8AlD+joPlq3peND+HRYyxFz4KPw4z926S/b8rIuFs2FYJg3BwsxJf6A9/3eIdLaYC+9Dw== + +path-parse@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== + +path-type@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/path-type/-/path-type-3.0.0.tgz#cef31dc8e0a1a3bb0d105c0cd97cf3bf47f4e36f" + integrity sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg== + dependencies: + pify "^3.0.0" + +path-type@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b" + integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw== + +pathe@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/pathe/-/pathe-1.1.0.tgz#e2e13f6c62b31a3289af4ba19886c230f295ec03" + integrity sha512-ODbEPR0KKHqECXW1GoxdDb+AZvULmXjVPy4rt+pGo2+TnjJTIPJQSVS6N63n8T2Ip+syHhbn52OewKicV0373w== + +pathval@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/pathval/-/pathval-1.1.1.tgz#8534e77a77ce7ac5a2512ea21e0fdb8fcf6c3d8d" + integrity sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ== + +picocolors@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c" + integrity sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ== + +picomatch@^2.3.1: + version "2.3.1" + resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" + integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA== + +pidtree@^0.3.0: + version "0.3.1" + resolved "https://registry.yarnpkg.com/pidtree/-/pidtree-0.3.1.tgz#ef09ac2cc0533df1f3250ccf2c4d366b0d12114a" + integrity sha512-qQbW94hLHEqCg7nhby4yRC7G2+jYHY4Rguc2bjw7Uug4GIJuu1tvf2uHaZv5Q8zdt+WKJ6qK1FOI6amaWUo5FA== + +pify@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/pify/-/pify-3.0.0.tgz#e5a4acd2c101fdf3d9a4d07f0dbc4db49dd28176" + integrity sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg== + +pify@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/pify/-/pify-4.0.1.tgz#4b2cd25c50d598735c50292224fd8c6df41e3231" + integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g== + +pkg-dir@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3" + integrity sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ== + dependencies: + find-up "^4.0.0" + +pkg-types@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/pkg-types/-/pkg-types-1.0.3.tgz#988b42ab19254c01614d13f4f65a2cfc7880f868" + integrity sha512-nN7pYi0AQqJnoLPC9eHFQ8AcyaixBUOwvqc5TDnIKCMEE6I0y8P7OKA7fPexsXGCGxQDl/cmrLAp26LhcwxZ4A== + dependencies: + jsonc-parser "^3.2.0" + mlly "^1.2.0" + pathe "^1.1.0" + +postcss@^8.4.23: + version "8.4.23" + resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.4.23.tgz#df0aee9ac7c5e53e1075c24a3613496f9e6552ab" + integrity sha512-bQ3qMcpF6A/YjR55xtoTr0jGOlnPOKAIMdOWiv0EIT6HVPEaJiJB4NLljSbiHoC2RX7DN5Uvjtpbg1NPdwv1oA== + dependencies: + nanoid "^3.3.6" + picocolors "^1.0.0" + source-map-js "^1.0.2" + +preferred-pm@^3.0.0: + version "3.0.3" + resolved "https://registry.yarnpkg.com/preferred-pm/-/preferred-pm-3.0.3.tgz#1b6338000371e3edbce52ef2e4f65eb2e73586d6" + integrity sha512-+wZgbxNES/KlJs9q40F/1sfOd/j7f1O9JaHcW5Dsn3aUUOZg3L2bjpVUcKV2jvtElYfoTuQiNeMfQJ4kwUAhCQ== + dependencies: + find-up "^5.0.0" + find-yarn-workspace-root2 "1.2.16" + path-exists "^4.0.0" + which-pm "2.0.0" + +prettier@^2.7.1: + version "2.8.8" + resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.8.8.tgz#e8c5d7e98a4305ffe3de2e1fc4aca1a71c28b1da" + integrity sha512-tdN8qQGvNjw4CHbY+XXk0JgCXn9QiF21a55rBe5LJAU+kDyC4WQn4+awm2Xfk2lQMk5fKup9XgzTZtGkjBdP9Q== + +pretty-format@^27.5.1: + version "27.5.1" + resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.5.1.tgz#2181879fdea51a7a5851fb39d920faa63f01d88e" + integrity sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ== + dependencies: + ansi-regex "^5.0.1" + ansi-styles "^5.0.0" + react-is "^17.0.1" + +pseudomap@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/pseudomap/-/pseudomap-1.0.2.tgz#f052a28da70e618917ef0a8ac34c1ae5a68286b3" + integrity sha512-b/YwNhb8lk1Zz2+bXXpS/LK9OisiZZ1SNsSLxN1x2OXVEhW2Ckr/7mWE5vrC1ZTiJlD9g19jWszTmJsB+oEpFQ== + +queue-microtask@^1.2.2: + version "1.2.3" + resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243" + integrity sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A== + +quick-lru@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-4.0.1.tgz#5b8878f113a58217848c6482026c73e1ba57727f" + integrity sha512-ARhCpm70fzdcvNQfPoy49IaanKkTlRWF2JMzqhcJbhSFRZv7nPTvZJdcY7301IPmvW+/p0RgIWnQDLJxifsQ7g== + +react-is@^17.0.1: + version "17.0.2" + resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0" + integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w== + +read-pkg-up@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-7.0.1.tgz#f3a6135758459733ae2b95638056e1854e7ef507" + integrity sha512-zK0TB7Xd6JpCLmlLmufqykGE+/TlOePD6qKClNW7hHDKFh/J7/7gCWGR7joEQEW1bKq3a3yUZSObOoWLFQ4ohg== + dependencies: + find-up "^4.1.0" + read-pkg "^5.2.0" + type-fest "^0.8.1" + +read-pkg@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-3.0.0.tgz#9cbc686978fee65d16c00e2b19c237fcf6e38389" + integrity sha512-BLq/cCO9two+lBgiTYNqD6GdtK8s4NpaWrl6/rCO9w0TUS8oJl7cmToOZfRYllKTISY6nt1U7jQ53brmKqY6BA== + dependencies: + load-json-file "^4.0.0" + normalize-package-data "^2.3.2" + path-type "^3.0.0" + +read-pkg@^5.2.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-5.2.0.tgz#7bf295438ca5a33e56cd30e053b34ee7250c93cc" + integrity sha512-Ug69mNOpfvKDAc2Q8DRpMjjzdtrnv9HcSMX+4VsZxD1aZ6ZzrIE7rlzXBtWTyhULSMKg076AW6WR5iZpD0JiOg== + dependencies: + "@types/normalize-package-data" "^2.4.0" + normalize-package-data "^2.5.0" + parse-json "^5.0.0" + type-fest "^0.6.0" + +read-yaml-file@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/read-yaml-file/-/read-yaml-file-1.1.0.tgz#9362bbcbdc77007cc8ea4519fe1c0b821a7ce0d8" + integrity sha512-VIMnQi/Z4HT2Fxuwg5KrY174U1VdUIASQVWXXyqtNRtxSr9IYkn1rsI6Tb6HsrHCmB7gVpNwX6JxPTHcH6IoTA== + dependencies: + graceful-fs "^4.1.5" + js-yaml "^3.6.1" + pify "^4.0.1" + strip-bom "^3.0.0" + +redent@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/redent/-/redent-3.0.0.tgz#e557b7998316bb53c9f1f56fa626352c6963059f" + integrity sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg== + dependencies: + indent-string "^4.0.0" + strip-indent "^3.0.0" + +regenerator-runtime@^0.13.11: + version "0.13.11" + resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz#f6dca3e7ceec20590d07ada785636a90cdca17f9" + integrity sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg== + +regexp.prototype.flags@^1.4.3: + version "1.5.0" + resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.5.0.tgz#fe7ce25e7e4cca8db37b6634c8a2c7009199b9cb" + integrity sha512-0SutC3pNudRKgquxGoRGIz946MZVHqbNfPjBdxeOhBrdgDKlRoXmYLQN9xRbrR09ZXWeGAdPuif7egofn6v5LA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + functions-have-names "^1.2.3" + +require-directory@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" + integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q== + +require-main-filename@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-2.0.0.tgz#d0b329ecc7cc0f61649f62215be69af54aa8989b" + integrity sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg== + +resolve-from@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-5.0.0.tgz#c35225843df8f776df21c57557bc087e9dfdfc69" + integrity sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw== + +resolve@^1.10.0: + version "1.22.2" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.2.tgz#0ed0943d4e301867955766c9f3e1ae6d01c6845f" + integrity sha512-Sb+mjNHOULsBv818T40qSPeRiuWLyaGMa5ewydRLFimneixmVy2zdivRl+AF6jaYPC8ERxGDmFSiqui6SfPd+g== + dependencies: + is-core-module "^2.11.0" + path-parse "^1.0.7" + supports-preserve-symlinks-flag "^1.0.0" + +reusify@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76" + integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw== + +rollup@^3.21.0: + version "3.21.6" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.21.6.tgz#f5649ccdf8fcc7729254faa457cbea9547eb86db" + integrity sha512-SXIICxvxQxR3D4dp/3LDHZIJPC8a4anKMHd4E3Jiz2/JnY+2bEjqrOokAauc5ShGVNFHlEFjBXAXlaxkJqIqSg== + optionalDependencies: + fsevents "~2.3.2" + +run-parallel@^1.1.9: + version "1.2.0" + resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee" + integrity sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA== + dependencies: + queue-microtask "^1.2.2" + +safe-regex-test@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/safe-regex-test/-/safe-regex-test-1.0.0.tgz#793b874d524eb3640d1873aad03596db2d4f2295" + integrity sha512-JBUUzyOgEwXQY1NuPtvcj/qcBDbDmEvWufhlnXZIm75DEHp+afM1r1ujJpJsV/gSM4t59tpDyPi1sd6ZaPFfsA== + dependencies: + call-bind "^1.0.2" + get-intrinsic "^1.1.3" + is-regex "^1.1.4" + +"safer-buffer@>= 2.1.2 < 3": + version "2.1.2" + resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" + integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== + +"semver@2 || 3 || 4 || 5", semver@^5.4.1, semver@^5.5.0: + version "5.7.1" + resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7" + integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ== + +set-blocking@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7" + integrity sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw== + +shebang-command@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-1.2.0.tgz#44aac65b695b03398968c39f363fee5deafdf1ea" + integrity sha512-EV3L1+UQWGor21OmnvojK36mhg+TyIKDh3iFBKBohr5xeXIhNBcx8oWdgkTEEQ+BEFFYdLRuqMfd5L84N1V5Vg== + dependencies: + shebang-regex "^1.0.0" + +shebang-regex@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-1.0.0.tgz#da42f49740c0b42db2ca9728571cb190c98efea3" + integrity sha512-wpoSFAxys6b2a2wHZ1XpDSgD7N9iVjg29Ph9uV/uaP9Ex/KXlkTZTeddxDPSYQpgvzKLGJke2UU0AzoGCjNIvQ== + +shell-quote@^1.6.1: + version "1.8.1" + resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.8.1.tgz#6dbf4db75515ad5bac63b4f1894c3a154c766680" + integrity sha512-6j1W9l1iAs/4xYBI1SYOVZyFcCis9b4KCLQ8fgAGG07QvzaRLVVRQvAy85yNmmZSjYjg4MWh4gNvlPujU/5LpA== + +side-channel@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.4.tgz#efce5c8fdc104ee751b25c58d4290011fa5ea2cf" + integrity sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw== + dependencies: + call-bind "^1.0.0" + get-intrinsic "^1.0.2" + object-inspect "^1.9.0" + +siginfo@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/siginfo/-/siginfo-2.0.0.tgz#32e76c70b79724e3bb567cb9d543eb858ccfaf30" + integrity sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g== + +signal-exit@^3.0.2: + version "3.0.7" + resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9" + integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== + +slash@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634" + integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q== + +slice-ansi@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-5.0.0.tgz#b73063c57aa96f9cd881654b15294d95d285c42a" + integrity sha512-FC+lgizVPfie0kkhqUScwRu1O/lF6NOgJmlCgK+/LYxDCTk8sGelYaHDhFcDN+Sn3Cv+3VSa4Byeo+IMCzpMgQ== + dependencies: + ansi-styles "^6.0.0" + is-fullwidth-code-point "^4.0.0" + +smartwrap@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/smartwrap/-/smartwrap-2.0.2.tgz#7e25d3dd58b51c6ca4aba3a9e391650ea62698a4" + integrity sha512-vCsKNQxb7PnCNd2wY1WClWifAc2lwqsG8OaswpJkVJsvMGcnEntdTCDajZCkk93Ay1U3t/9puJmb525Rg5MZBA== + dependencies: + array.prototype.flat "^1.2.3" + breakword "^1.0.5" + grapheme-splitter "^1.0.4" + strip-ansi "^6.0.0" + wcwidth "^1.0.1" + yargs "^15.1.0" + +source-map-js@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.0.2.tgz#adbc361d9c62df380125e7f161f71c826f1e490c" + integrity sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw== + +source-map-support@^0.5.21: + version "0.5.21" + resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.21.tgz#04fe7c7f9e1ed2d662233c28cb2b35b9f63f6e4f" + integrity sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w== + dependencies: + buffer-from "^1.0.0" + source-map "^0.6.0" + +source-map@^0.6.0, source-map@^0.6.1: + version "0.6.1" + resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263" + integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g== + +spawndamnit@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/spawndamnit/-/spawndamnit-2.0.0.tgz#9f762ac5c3476abb994b42ad592b5ad22bb4b0ad" + integrity sha512-j4JKEcncSjFlqIwU5L/rp2N5SIPsdxaRsIv678+TZxZ0SRDJTm8JrxJMjE/XuiEZNEir3S8l0Fa3Ke339WI4qA== + dependencies: + cross-spawn "^5.1.0" + signal-exit "^3.0.2" + +spdx-correct@^3.0.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/spdx-correct/-/spdx-correct-3.2.0.tgz#4f5ab0668f0059e34f9c00dce331784a12de4e9c" + integrity sha512-kN9dJbvnySHULIluDHy32WHRUu3Og7B9sbY7tsFLctQkIqnMh3hErYgdMjTYuqmcXX+lK5T1lnUt3G7zNswmZA== + dependencies: + spdx-expression-parse "^3.0.0" + spdx-license-ids "^3.0.0" + +spdx-exceptions@^2.1.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz#3f28ce1a77a00372683eade4a433183527a2163d" + integrity sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A== + +spdx-expression-parse@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz#cf70f50482eefdc98e3ce0a6833e4a53ceeba679" + integrity sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q== + dependencies: + spdx-exceptions "^2.1.0" + spdx-license-ids "^3.0.0" + +spdx-license-ids@^3.0.0: + version "3.0.13" + resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.13.tgz#7189a474c46f8d47c7b0da4b987bb45e908bd2d5" + integrity sha512-XkD+zwiqXHikFZm4AX/7JSCXA98U5Db4AFd5XUg/+9UNtnH75+Z9KxtpYiJZx36mUDVOwH83pl7yvCer6ewM3w== + +sprintf-js@~1.0.2: + version "1.0.3" + resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c" + integrity sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g== + +stackback@0.0.2: + version "0.0.2" + resolved "https://registry.yarnpkg.com/stackback/-/stackback-0.0.2.tgz#1ac8a0d9483848d1695e418b6d031a3c3ce68e3b" + integrity sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw== + +std-env@^3.3.1: + version "3.3.3" + resolved "https://registry.yarnpkg.com/std-env/-/std-env-3.3.3.tgz#a54f06eb245fdcfef53d56f3c0251f1d5c3d01fe" + integrity sha512-Rz6yejtVyWnVjC1RFvNmYL10kgjC49EOghxWn0RFqlCHGFpQx+Xe7yW3I4ceK1SGrWIGMjD5Kbue8W/udkbMJg== + +stream-transform@^2.1.3: + version "2.1.3" + resolved "https://registry.yarnpkg.com/stream-transform/-/stream-transform-2.1.3.tgz#a1c3ecd72ddbf500aa8d342b0b9df38f5aa598e3" + integrity sha512-9GHUiM5hMiCi6Y03jD2ARC1ettBXkQBoQAe7nJsPknnI0ow10aXjTnew8QtYQmLjzn974BnmWEAJgCY6ZP1DeQ== + dependencies: + mixme "^0.5.1" + +string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + +string-width@^5.0.0: + version "5.1.2" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-5.1.2.tgz#14f8daec6d81e7221d2a357e668cab73bdbca794" + integrity sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA== + dependencies: + eastasianwidth "^0.2.0" + emoji-regex "^9.2.2" + strip-ansi "^7.0.1" + +string.prototype.padend@^3.0.0: + version "3.1.4" + resolved "https://registry.yarnpkg.com/string.prototype.padend/-/string.prototype.padend-3.1.4.tgz#2c43bb3a89eb54b6750de5942c123d6c98dd65b6" + integrity sha512-67otBXoksdjsnXXRUq+KMVTdlVRZ2af422Y0aTyTjVaoQkGr3mxl2Bc5emi7dOQ3OGVVQQskmLEWwFXwommpNw== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + es-abstract "^1.20.4" + +string.prototype.trim@^1.2.7: + version "1.2.7" + resolved "https://registry.yarnpkg.com/string.prototype.trim/-/string.prototype.trim-1.2.7.tgz#a68352740859f6893f14ce3ef1bb3037f7a90533" + integrity sha512-p6TmeT1T3411M8Cgg9wBTMRtY2q9+PNy9EV1i2lIXUN/btt763oIfxwN3RR8VU6wHX8j/1CFy0L+YuThm6bgOg== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + es-abstract "^1.20.4" + +string.prototype.trimend@^1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/string.prototype.trimend/-/string.prototype.trimend-1.0.6.tgz#c4a27fa026d979d79c04f17397f250a462944533" + integrity sha512-JySq+4mrPf9EsDBEDYMOb/lM7XQLulwg5R/m1r0PXEFqrV0qHvl58sdTilSXtKOflCsK2E8jxf+GKC0T07RWwQ== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + es-abstract "^1.20.4" + +string.prototype.trimstart@^1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/string.prototype.trimstart/-/string.prototype.trimstart-1.0.6.tgz#e90ab66aa8e4007d92ef591bbf3cd422c56bdcf4" + integrity sha512-omqjMDaY92pbn5HOX7f9IccLA+U1tA9GvtU4JrodiXFfYB7jPzzHpRzpglLAjtUV6bB557zwClJezTqnAiYnQA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.1.4" + es-abstract "^1.20.4" + +strip-ansi@^6.0.0, strip-ansi@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + +strip-ansi@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.0.1.tgz#61740a08ce36b61e50e65653f07060d000975fb2" + integrity sha512-cXNxvT8dFNRVfhVME3JAe98mkXDYN2O1l7jmcwMnOslDeESg1rF/OZMtK0nRAhiari1unG5cD4jG3rapUAkLbw== + dependencies: + ansi-regex "^6.0.1" + +strip-bom@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-3.0.0.tgz#2334c18e9c759f7bdd56fdef7e9ae3d588e68ed3" + integrity sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA== + +strip-indent@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-3.0.0.tgz#c32e1cee940b6b3432c771bc2c54bcce73cd3001" + integrity sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ== + dependencies: + min-indent "^1.0.0" + +strip-literal@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/strip-literal/-/strip-literal-1.0.1.tgz#0115a332710c849b4e46497891fb8d585e404bd2" + integrity sha512-QZTsipNpa2Ppr6v1AmJHESqJ3Uz247MUS0OjrnnZjFAvEoWqxuyFuXn2xLgMtRnijJShAa1HL0gtJyUs7u7n3Q== + dependencies: + acorn "^8.8.2" + +supports-color@^5.3.0: + version "5.5.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f" + integrity sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow== + dependencies: + has-flag "^3.0.0" + +supports-color@^7.1.0: + version "7.2.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da" + integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw== + dependencies: + has-flag "^4.0.0" + +supports-preserve-symlinks-flag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" + integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== + +term-size@^2.1.0: + version "2.2.1" + resolved "https://registry.yarnpkg.com/term-size/-/term-size-2.2.1.tgz#2a6a54840432c2fb6320fea0f415531e90189f54" + integrity sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg== + +tinybench@^2.3.1: + version "2.5.0" + resolved "https://registry.yarnpkg.com/tinybench/-/tinybench-2.5.0.tgz#4711c99bbf6f3e986f67eb722fed9cddb3a68ba5" + integrity sha512-kRwSG8Zx4tjF9ZiyH4bhaebu+EDz1BOx9hOigYHlUW4xxI/wKIUQUqo018UlU4ar6ATPBsaMrdbKZ+tmPdohFA== + +tinypool@^0.3.1: + version "0.3.1" + resolved "https://registry.yarnpkg.com/tinypool/-/tinypool-0.3.1.tgz#a99c2e446aba9be05d3e1cb756d6aed7af4723b6" + integrity sha512-zLA1ZXlstbU2rlpA4CIeVaqvWq41MTWqLY3FfsAXgC8+f7Pk7zroaJQxDgxn1xNudKW6Kmj4808rPFShUlIRmQ== + +tinyspy@^1.0.2: + version "1.1.1" + resolved "https://registry.yarnpkg.com/tinyspy/-/tinyspy-1.1.1.tgz#0cb91d5157892af38cb2d217f5c7e8507a5bf092" + integrity sha512-UVq5AXt/gQlti7oxoIg5oi/9r0WpF7DGEVwXgqWSMmyN16+e3tl5lIvTaOpJ3TAtu5xFzWccFRM4R5NaWHF+4g== + +tmp@^0.0.33: + version "0.0.33" + resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.33.tgz#6d34335889768d21b2bcda0aa277ced3b1bfadf9" + integrity sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw== + dependencies: + os-tmpdir "~1.0.2" + +to-regex-range@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4" + integrity sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ== + dependencies: + is-number "^7.0.0" + +trim-newlines@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.1.tgz#260a5d962d8b752425b32f3a7db0dcacd176c144" + integrity sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw== + +ts-morph@^17.0.1: + version "17.0.1" + resolved "https://registry.yarnpkg.com/ts-morph/-/ts-morph-17.0.1.tgz#d85df4fcf9a1fcda1b331d52c00655f381c932d1" + integrity sha512-10PkHyXmrtsTvZSL+cqtJLTgFXkU43Gd0JCc0Rw6GchWbqKe0Rwgt1v3ouobTZwQzF1mGhDeAlWYBMGRV7y+3g== + dependencies: + "@ts-morph/common" "~0.18.0" + code-block-writer "^11.0.3" + +tsx@^3.12.3: + version "3.12.7" + resolved "https://registry.yarnpkg.com/tsx/-/tsx-3.12.7.tgz#b3b8b0fc79afc8260d1e14f9e995616c859a91e9" + integrity sha512-C2Ip+jPmqKd1GWVQDvz/Eyc6QJbGfE7NrR3fx5BpEHMZsEHoIxHL1j+lKdGobr8ovEyqeNkPLSKp6SCSOt7gmw== + dependencies: + "@esbuild-kit/cjs-loader" "^2.4.2" + "@esbuild-kit/core-utils" "^3.0.0" + "@esbuild-kit/esm-loader" "^2.5.5" + optionalDependencies: + fsevents "~2.3.2" + +tty-table@^4.1.5: + version "4.2.1" + resolved "https://registry.yarnpkg.com/tty-table/-/tty-table-4.2.1.tgz#c06cd76c54542acf4e2b4a0e9a5802984b65cba6" + integrity sha512-xz0uKo+KakCQ+Dxj1D/tKn2FSyreSYWzdkL/BYhgN6oMW808g8QRMuh1atAV9fjTPbWBjfbkKQpI/5rEcnAc7g== + dependencies: + chalk "^4.1.2" + csv "^5.5.3" + kleur "^4.1.5" + smartwrap "^2.0.2" + strip-ansi "^6.0.1" + wcwidth "^1.0.1" + yargs "^17.7.1" + +type-detect@^4.0.0, type-detect@^4.0.5: + version "4.0.8" + resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c" + integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g== + +type-fest@^0.13.1: + version "0.13.1" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.13.1.tgz#0172cb5bce80b0bd542ea348db50c7e21834d934" + integrity sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg== + +type-fest@^0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.6.0.tgz#8d2a2370d3df886eb5c90ada1c5bf6188acf838b" + integrity sha512-q+MB8nYR1KDLrgr4G5yemftpMC7/QLqVndBmEEdqzmNj5dcFOO4Oo8qlwZE3ULT3+Zim1F8Kq4cBnikNhlCMlg== + +type-fest@^0.8.1: + version "0.8.1" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d" + integrity sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA== + +typed-array-length@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/typed-array-length/-/typed-array-length-1.0.4.tgz#89d83785e5c4098bec72e08b319651f0eac9c1bb" + integrity sha512-KjZypGq+I/H7HI5HlOoGHkWUUGq+Q0TPhQurLbyrVrvnKTBgzLhIJ7j6J/XTQOi0d1RjyZ0wdas8bKs2p0x3Ng== + dependencies: + call-bind "^1.0.2" + for-each "^0.3.3" + is-typed-array "^1.1.9" + +typescript@^4.9.5: + version "4.9.5" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.9.5.tgz#095979f9bcc0d09da324d58d03ce8f8374cbe65a" + integrity sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g== + +ufo@^1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/ufo/-/ufo-1.1.2.tgz#d0d9e0fa09dece0c31ffd57bd363f030a35cfe76" + integrity sha512-TrY6DsjTQQgyS3E3dBaOXf0TpPD8u9FVrVYmKVegJuFw51n/YB9XPt+U6ydzFG5ZIN7+DIjPbNmXoBj9esYhgQ== + +unbox-primitive@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/unbox-primitive/-/unbox-primitive-1.0.2.tgz#29032021057d5e6cdbd08c5129c226dff8ed6f9e" + integrity sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw== + dependencies: + call-bind "^1.0.2" + has-bigints "^1.0.2" + has-symbols "^1.0.3" + which-boxed-primitive "^1.0.2" + +universalify@^0.1.0: + version "0.1.2" + resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66" + integrity sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg== + +validate-npm-package-license@^3.0.1: + version "3.0.4" + resolved "https://registry.yarnpkg.com/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz#fc91f6b9c7ba15c857f4cb2c5defeec39d4f410a" + integrity sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew== + dependencies: + spdx-correct "^3.0.0" + spdx-expression-parse "^3.0.0" + +vite-node@0.28.5: + version "0.28.5" + resolved "https://registry.yarnpkg.com/vite-node/-/vite-node-0.28.5.tgz#56d0f78846ea40fddf2e28390899df52a4738006" + integrity sha512-LmXb9saMGlrMZbXTvOveJKwMTBTNUH66c8rJnQ0ZPNX+myPEol64+szRzXtV5ORb0Hb/91yq+/D3oERoyAt6LA== + dependencies: + cac "^6.7.14" + debug "^4.3.4" + mlly "^1.1.0" + pathe "^1.1.0" + picocolors "^1.0.0" + source-map "^0.6.1" + source-map-support "^0.5.21" + vite "^3.0.0 || ^4.0.0" + +"vite@^3.0.0 || ^4.0.0": + version "4.3.5" + resolved "https://registry.yarnpkg.com/vite/-/vite-4.3.5.tgz#3871fe0f4b582ea7f49a85386ac80e84826367d9" + integrity sha512-0gEnL9wiRFxgz40o/i/eTBwm+NEbpUeTWhzKrZDSdKm6nplj+z4lKz8ANDgildxHm47Vg8EUia0aicKbawUVVA== + dependencies: + esbuild "^0.17.5" + postcss "^8.4.23" + rollup "^3.21.0" + optionalDependencies: + fsevents "~2.3.2" + +vitest@^0.28.5: + version "0.28.5" + resolved "https://registry.yarnpkg.com/vitest/-/vitest-0.28.5.tgz#94410a8924cd7189e4f1adffa8c5cde809cbf2f9" + integrity sha512-pyCQ+wcAOX7mKMcBNkzDwEHRGqQvHUl0XnoHR+3Pb1hytAHISgSxv9h0gUiSiYtISXUU3rMrKiKzFYDrI6ZIHA== + dependencies: + "@types/chai" "^4.3.4" + "@types/chai-subset" "^1.3.3" + "@types/node" "*" + "@vitest/expect" "0.28.5" + "@vitest/runner" "0.28.5" + "@vitest/spy" "0.28.5" + "@vitest/utils" "0.28.5" + acorn "^8.8.1" + acorn-walk "^8.2.0" + cac "^6.7.14" + chai "^4.3.7" + debug "^4.3.4" + local-pkg "^0.4.2" + pathe "^1.1.0" + picocolors "^1.0.0" + source-map "^0.6.1" + std-env "^3.3.1" + strip-literal "^1.0.0" + tinybench "^2.3.1" + tinypool "^0.3.1" + tinyspy "^1.0.2" + vite "^3.0.0 || ^4.0.0" + vite-node "0.28.5" + why-is-node-running "^2.2.2" + +wcwidth@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/wcwidth/-/wcwidth-1.0.1.tgz#f0b0dcf915bc5ff1528afadb2c0e17b532da2fe8" + integrity sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg== + dependencies: + defaults "^1.0.3" + +which-boxed-primitive@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6" + integrity sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg== + dependencies: + is-bigint "^1.0.1" + is-boolean-object "^1.1.0" + is-number-object "^1.0.4" + is-string "^1.0.5" + is-symbol "^1.0.3" + +which-module@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/which-module/-/which-module-2.0.1.tgz#776b1fe35d90aebe99e8ac15eb24093389a4a409" + integrity sha512-iBdZ57RDvnOR9AGBhML2vFZf7h8vmBjhoaZqODJBFWHVtKkDmKuHai3cx5PgVMrX5YDNp27AofYbAwctSS+vhQ== + +which-pm@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/which-pm/-/which-pm-2.0.0.tgz#8245609ecfe64bf751d0eef2f376d83bf1ddb7ae" + integrity sha512-Lhs9Pmyph0p5n5Z3mVnN0yWcbQYUAD7rbQUiMsQxOJ3T57k7RFe35SUwWMf7dsbDZks1uOmw4AecB/JMDj3v/w== + dependencies: + load-yaml-file "^0.2.0" + path-exists "^4.0.0" + +which-typed-array@^1.1.9: + version "1.1.9" + resolved "https://registry.yarnpkg.com/which-typed-array/-/which-typed-array-1.1.9.tgz#307cf898025848cf995e795e8423c7f337efbde6" + integrity sha512-w9c4xkx6mPidwp7180ckYWfMmvxpjlZuIudNtDf4N/tTAUB8VJbX25qZoAsrtGuYNnGw3pa0AXgbGKRB8/EceA== + dependencies: + available-typed-arrays "^1.0.5" + call-bind "^1.0.2" + for-each "^0.3.3" + gopd "^1.0.1" + has-tostringtag "^1.0.0" + is-typed-array "^1.1.10" + +which@^1.2.9: + version "1.3.1" + resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a" + integrity sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ== + dependencies: + isexe "^2.0.0" + +why-is-node-running@^2.2.2: + version "2.2.2" + resolved "https://registry.yarnpkg.com/why-is-node-running/-/why-is-node-running-2.2.2.tgz#4185b2b4699117819e7154594271e7e344c9973e" + integrity sha512-6tSwToZxTOcotxHeA+qGCq1mVzKR3CwcJGmVcY+QE8SHy6TnpFnh8PAvPNHYr7EcuVeG0QSMxtYCuO1ta/G/oA== + dependencies: + siginfo "^2.0.0" + stackback "0.0.2" + +wrap-ansi@^6.2.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53" + integrity sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrap-ansi@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +y18n@^4.0.0: + version "4.0.3" + resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.3.tgz#b5f259c82cd6e336921efd7bfd8bf560de9eeedf" + integrity sha512-JKhqTOwSrqNA1NY5lSztJ1GrBiUodLMmIZuLiDaMRJ+itFd+ABVE8XBjOvIWL+rSqNDC74LCSFmlb/U4UZ4hJQ== + +y18n@^5.0.5: + version "5.0.8" + resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55" + integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA== + +yallist@^2.1.2: + version "2.1.2" + resolved "https://registry.yarnpkg.com/yallist/-/yallist-2.1.2.tgz#1c11f9218f076089a47dd512f93c6699a6a81d52" + integrity sha512-ncTzHV7NvsQZkYe1DW7cbDLm0YpzHmZF5r/iyP3ZnQtMiJ+pjzisCiMNI+Sj+xQF5pXhSHxSB3uDbsBTzY/c2A== + +yargs-parser@^18.1.2, yargs-parser@^18.1.3: + version "18.1.3" + resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0" + integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ== + dependencies: + camelcase "^5.0.0" + decamelize "^1.2.0" + +yargs-parser@^21.1.1: + version "21.1.1" + resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35" + integrity sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw== + +yargs@^15.1.0: + version "15.4.1" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-15.4.1.tgz#0d87a16de01aee9d8bec2bfbf74f67851730f4f8" + integrity sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A== + dependencies: + cliui "^6.0.0" + decamelize "^1.2.0" + find-up "^4.1.0" + get-caller-file "^2.0.1" + require-directory "^2.1.1" + require-main-filename "^2.0.0" + set-blocking "^2.0.0" + string-width "^4.2.0" + which-module "^2.0.0" + y18n "^4.0.0" + yargs-parser "^18.1.2" + +yargs@^17.7.1: + version "17.7.2" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.2.tgz#991df39aca675a192b816e1e0363f9d75d2aa269" + integrity sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w== + dependencies: + cliui "^8.0.1" + escalade "^3.1.1" + get-caller-file "^2.0.5" + require-directory "^2.1.1" + string-width "^4.2.3" + y18n "^5.0.5" + yargs-parser "^21.1.1" + +yocto-queue@^0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b" + integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q== + +yocto-queue@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-1.0.0.tgz#7f816433fb2cbc511ec8bf7d263c3b58a1a3c251" + integrity sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g== From fdf61214284b1745255b5ef99f8f862f8fb835cc Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 12 May 2023 22:23:12 +0200 Subject: [PATCH 110/207] Rename WASM workflows --- .github/workflows/{build_js.yml => build_wasm.yml} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename .github/workflows/{build_js.yml => build_wasm.yml} (83%) diff --git a/.github/workflows/build_js.yml b/.github/workflows/build_wasm.yml similarity index 83% rename from .github/workflows/build_js.yml rename to .github/workflows/build_wasm.yml index adc3d714..d4f593d5 100644 --- a/.github/workflows/build_js.yml +++ b/.github/workflows/build_wasm.yml @@ -1,4 +1,4 @@ -name: Build JS +name: Build WASM on: [push, pull_request, workflow_dispatch] @@ -21,8 +21,8 @@ jobs: run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - run: yarn install --frozen-lockfile - working-directory: ./js + working-directory: ./wasm - run: yarn run build - working-directory: ./js + working-directory: ./wasm - run: yarn run test - working-directory: ./js + working-directory: ./wasm From 6b13f93ba836088888dabd8d0179b4ac9f10be84 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 02:00:33 +0200 Subject: [PATCH 111/207] Generate ranks --- js/scripts/regex.ts | 34 -- js/src/index.ts | 10 +- package.json | 2 + scripts/ranks.ts | 305 ++++++++++++++++++ {wasm/scripts => scripts}/tsconfig.json | 0 wasm/package.json | 4 +- wasm/scripts/inline_ranks.ts | 107 ------ .../{post_process.ts => postprocess.ts} | 23 +- wasm/tsconfig.json | 2 +- 9 files changed, 336 insertions(+), 151 deletions(-) delete mode 100644 js/scripts/regex.ts create mode 100644 scripts/ranks.ts rename {wasm/scripts => scripts}/tsconfig.json (100%) delete mode 100644 wasm/scripts/inline_ranks.ts rename wasm/scripts/{post_process.ts => postprocess.ts} (93%) diff --git a/js/scripts/regex.ts b/js/scripts/regex.ts deleted file mode 100644 index ca2db8a3..00000000 --- a/js/scripts/regex.ts +++ /dev/null @@ -1,34 +0,0 @@ -// generate combinations - -const strings = "'s|'t|'re|'ve|'m|'ll|'d"; - -const testRegex = - "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"; - -function recombine(value: string, acc: string[] = [""]): string[] { - if (value.length === 0) return acc; - if (value[0].match(/[a-zA-Z]/)) { - return recombine( - value.substring(1), - acc.flatMap((i) => [ - `${i}${value[0].toLocaleLowerCase()}`, - `${i}${value[0].toLocaleUpperCase()}`, - ]) - ); - } - - return recombine( - value.substring(1), - acc.map((i) => `${i}${value[0]}`) - ); -} - -let match = testRegex.replace(/\(\?i:(.*?)\)/, (_, match: string) => { - const insensitive = match - .split("|") - .flatMap((a) => recombine(a)) - .join("|"); - return `(${insensitive})`; -}); - -console.log(match); diff --git a/js/src/index.ts b/js/src/index.ts index 843cfe7b..efc16dda 100644 --- a/js/src/index.ts +++ b/js/src/index.ts @@ -1,8 +1,8 @@ -import gpt2 from "../../wasm/dist/encoders/gpt2.json"; -import p50k_base from "../../wasm/dist/encoders/p50k_base.json"; -import p50k_edit from "../../wasm/dist/encoders/p50k_edit.json"; -import r50k_base from "../../wasm/dist/encoders/r50k_base.json"; -import cl100k_base from "../../wasm/dist/encoders/cl100k_base.json"; +import gpt2 from "./ranks/gpt2"; +import p50k_base from "./ranks/p50k_base"; +import p50k_edit from "./ranks/p50k_edit"; +import r50k_base from "./ranks/r50k_base"; +import cl100k_base from "./ranks/cl100k_base"; import base64 from "base64-js"; diff --git a/package.json b/package.json index 2faabbc6..e9b54328 100644 --- a/package.json +++ b/package.json @@ -5,11 +5,13 @@ "wasm" ], "devDependencies": { + "base64-js": "^1.5.1", "@types/node": "^18.14.4", "@changesets/cli": "^2.26.1", "npm-run-all": "^4.1.5", "vitest": "^0.28.5", "typescript": "^4.9.5", + "outdent": "^0.8.0", "tsx": "^3.12.3" } } diff --git a/scripts/ranks.ts b/scripts/ranks.ts new file mode 100644 index 00000000..44a3d1ab --- /dev/null +++ b/scripts/ranks.ts @@ -0,0 +1,305 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import outdent from "outdent"; +import { fromByteArray } from "base64-js"; +import registry from "../tiktoken/registry.json"; + +// printable ascii characters according to python +function isPrintable(u: number): boolean { + return !(u <= 31 || (u >= 127 && u <= 160) || u == 173); +} + +function dataGymToMergeableBpeRanks( + vocal_bpe_contents: string, + encoder_json_contents: string +) { + const rank_to_intbyte = Array.from({ length: 2 ** 8 }, (_, i) => i).filter( + (i) => isPrintable(i) && String.fromCharCode(i) !== " " + ); + + const data_gym_byte_to_byte = rank_to_intbyte.reduce>( + (memo, item) => { + memo[String.fromCharCode(item)] = item; + return memo; + }, + {} + ); + + let n = 0; + for (let b = 0; b < 2 ** 8; b++) { + if (!rank_to_intbyte.includes(b)) { + rank_to_intbyte.push(b); + data_gym_byte_to_byte[String.fromCharCode(2 ** 8 + n)] = b; + n += 1; + } + } + + if (rank_to_intbyte.length !== 2 ** 8) { + throw new Error("rank_to_intbyte.length must be 2**8"); + } + + // vocab_bpe contains the merges along with associated ranks + const bpe_merges = vocal_bpe_contents + .split("\n") + .slice(1, -1) + .map((merge_str) => merge_str.split(" ")); + + function decodeDataGym(value: string) { + return value.split("").map((b) => data_gym_byte_to_byte[b]); + } + + // add the single byte tokens + const bpe_ranks = Object.fromEntries(rank_to_intbyte.map((b, i) => [b, i])); + + // add the merged tokens + n = rank_to_intbyte.length; + for (const [first, second] of bpe_merges) { + bpe_ranks[[...decodeDataGym(first), ...decodeDataGym(second)].join(",")] = + n; + n += 1; + } + + // check that the encoder file matches the merges file + // this sanity check is important since tiktoken assumes that ranks are ordered the same + // as merge priority + const encoder_json: Record = JSON.parse( + encoder_json_contents + ); + + const encoder_json_loaded = Object.fromEntries( + Object.entries(encoder_json).map(([k, v]) => [ + decodeDataGym(k).join(","), + v, + ]) + ); + + // drop these two special tokens if present, since they're not mergeable bpe tokens + delete encoder_json_loaded[decodeDataGym("<|endoftext|>").join(",")]; + delete encoder_json_loaded[decodeDataGym("<|startoftext|>").join(",")]; + + function normalize_map(items: Record) { + return JSON.stringify( + Object.keys(items) + .sort() + .map((key) => [key, items[key]]) + ); + } + + if (normalize_map(bpe_ranks) !== normalize_map(encoder_json_loaded)) { + throw new Error("bpe_ranks !== encoder_json_loaded"); + } + + return bpe_ranks; +} + +function dumpTiktokenBpe(bpe_ranks: Record) { + return ( + Object.entries(bpe_ranks) + .sort((a, b) => a[1] - b[1]) + .map(([token_str, rank]) => + [ + fromByteArray( + new Uint8Array( + token_str.split(",").map((i) => Number.parseInt(i, 10)) + ) + ), + rank, + ].join(" ") + ) + .join("\n") + "\n" + ); +} + +async function downloadBpe( + registry: ( + | { load_tiktoken_bpe: string } + | { + data_gym_to_mergeable_bpe_ranks: { + vocab_bpe_file: string; + encoder_json_file: string; + }; + } + ) & { + explicit_n_vocab?: number; + pat_str: string; + special_tokens: Record; + } +) { + if ("data_gym_to_mergeable_bpe_ranks" in registry) { + const [vocab_bpe, encoder_json] = await Promise.all([ + fetch(registry.data_gym_to_mergeable_bpe_ranks.vocab_bpe_file).then((a) => + a.text() + ), + fetch(registry.data_gym_to_mergeable_bpe_ranks.encoder_json_file).then( + (a) => a.text() + ), + ]); + + return { + explicit_n_vocab: registry.explicit_n_vocab, + pat_str: registry.pat_str, + special_tokens: registry.special_tokens, + bpe_ranks: dumpTiktokenBpe( + dataGymToMergeableBpeRanks(vocab_bpe, encoder_json) + ), + }; + } else { + return { + explicit_n_vocab: registry.explicit_n_vocab, + pat_str: registry.pat_str, + special_tokens: registry.special_tokens, + bpe_ranks: await fetch(registry.load_tiktoken_bpe).then((a) => a.text()), + }; + } +} + +function compressTiktokenBpe(tiktoken_bpe_file: string) { + const original = tiktoken_bpe_file + .split("\n") + .map((line) => line.trim() && line.split(" ")) + .filter((x): x is Array => !!x && Array.isArray(x)) + .map(([token, rank]) => [token, Number.parseInt(rank, 10)] as const) + .sort((a, b) => a[1] - b[1]); + + const newTokens = original.reduce< + Array<{ offset: number; tokens: string[] }> + >((memo, item) => { + if (memo.length === 0) return [{ offset: item[1], tokens: [item[0]] }]; + const lastSplit = memo[memo.length - 1]; + const nextOffset = lastSplit.offset + lastSplit.tokens.length; + + if (nextOffset === item[1]) { + lastSplit.tokens.push(item[0]); + return memo; + } + + return [...memo, { offset: item[1], tokens: [item[0]] }]; + }, []); + + const compressed = newTokens + .map((x) => `! ${x.offset} ${x.tokens.join(" ")}`) + .join("\n"); + + // make sure the compressed and the original files are the same + const tiktokenOld = compressed + .split("\n") + .filter(Boolean) + .reduce>((memo, x) => { + const [_, offsetStr, ...tokens] = x.split(" "); + const offset = Number.parseInt(offsetStr, 10); + tokens.forEach((token, i) => (memo[token] = offset + i)); + return memo; + }, {}); + + function normalizeMap(items: Record) { + return JSON.stringify( + Object.keys(items) + .sort() + .map((key) => [key, items[key]]) + ); + } + + if ( + normalizeMap(tiktokenOld) !== normalizeMap(Object.fromEntries(original)) + ) { + throw new Error("Invalid compression"); + } + + return compressed; +} + +function combineInsensitive(value: string, acc: string[] = [""]): string[] { + if (value.length === 0) return acc; + if (value[0].match(/[a-zA-Z]/)) { + return combineInsensitive( + value.substring(1), + acc.flatMap((i) => [ + `${i}${value[0].toLocaleLowerCase()}`, + `${i}${value[0].toLocaleUpperCase()}`, + ]) + ); + } + + return combineInsensitive( + value.substring(1), + acc.map((i) => `${i}${value[0]}`) + ); +} + +async function main() { + for (const name in registry) { + console.log(name); + + for (const lib of ["wasm", "js"]) { + const targetDir = path.resolve(__dirname, "../", lib, "src/ranks"); + + try { + await fs.mkdir(targetDir, { recursive: true }); + } catch {} + + const data = registry[name as keyof typeof registry]; + const bpePath = path.resolve(targetDir, `${name}.tiktoken`); + const compressPath = path.resolve(targetDir, `${name}.compress.tiktoken`); + const jsonPath = path.resolve(targetDir, `${name}.json`); + const cjsPath = path.resolve(targetDir, `${name}.cjs`); + const dtsPath = path.resolve(targetDir, `${name}.d.ts`); + const mjsPath = path.resolve(targetDir, `${name}.js`); + + try { + await Promise.all([ + fs.stat(bpePath), + fs.stat(jsonPath), + fs.stat(compressPath), + fs.stat(cjsPath), + fs.stat(mjsPath), + fs.stat(dtsPath), + ]); + + continue; + } catch {} + + const bpe = await downloadBpe(data); + + if (lib === "js") { + bpe.pat_str = bpe.pat_str.replace( + /\(\?i:(.*?)\)/, + (_, match: string) => + `(${match + .split("|") + .flatMap((a) => combineInsensitive(a)) + .join("|")})` + ); + + // attempt to create a regexp + new RegExp(bpe.pat_str, "u"); + } + + await fs.writeFile(bpePath, bpe.bpe_ranks, { encoding: "utf-8" }); + + const compress = compressTiktokenBpe(bpe.bpe_ranks); + await fs.writeFile(compressPath, compress, { encoding: "utf-8" }); + + const json = JSON.stringify({ ...bpe, bpe_ranks: compress }); + await fs.writeFile(jsonPath, json, { encoding: "utf-8" }); + + const mjs = `export default ${json};`; + await fs.writeFile(mjsPath, mjs, { encoding: "utf-8" }); + + const cjs = `module.exports = ${json};`; + await fs.writeFile(cjsPath, cjs, { encoding: "utf-8" }); + + const dts = outdent` + declare const encoder: { + pat_str: string; + special_tokens: Record; + bpe_ranks: string; + }; + export default encoder; + `; + + await fs.writeFile(dtsPath, dts, { encoding: "utf-8" }); + } + } +} + +main(); diff --git a/wasm/scripts/tsconfig.json b/scripts/tsconfig.json similarity index 100% rename from wasm/scripts/tsconfig.json rename to scripts/tsconfig.json diff --git a/wasm/package.json b/wasm/package.json index 5164a79b..5d9d7ee7 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -6,9 +6,8 @@ "scripts": { "build": "run-s build:*", "build:cleanup": "rm -rf dist/", - "build:rank": "tsx scripts/inline_ranks.ts", "build:wasm": "run-s wasm:*", - "build:postprocess": "tsx scripts/post_process.ts", + "build:postprocess": "tsx scripts/postprocess.ts", "wasm:bundler": "wasm-pack build --target bundler --weak-refs --release --out-dir dist && rm -rf dist/.gitignore dist/README.md dist/package.json", "wasm:lite": "wasm-pack build --target bundler --weak-refs --release --out-dir dist/lite --no-default-features && rm -rf dist/lite/.gitignore dist/lite/README.md dist/lite/package.json", "test": "yarn vitest" @@ -19,7 +18,6 @@ }, "dependencies": {}, "devDependencies": { - "outdent": "^0.8.0", "ts-morph": "^17.0.1" } } diff --git a/wasm/scripts/inline_ranks.ts b/wasm/scripts/inline_ranks.ts deleted file mode 100644 index f508fd85..00000000 --- a/wasm/scripts/inline_ranks.ts +++ /dev/null @@ -1,107 +0,0 @@ -import fs from "node:fs/promises"; -import path from "node:path"; -import { load } from "../src/load"; - -function compress_tiktoken_bpe(tiktoken_bpe_file: string) { - const original = tiktoken_bpe_file - .split("\n") - .map((line) => line.trim() && line.split(" ")) - .filter((x): x is Array => !!x && Array.isArray(x)) - .map(([token, rank]) => [token, Number.parseInt(rank, 10)] as const) - .sort((a, b) => a[1] - b[1]); - - const newTokens = original.reduce< - Array<{ offset: number; tokens: string[] }> - >((memo, item) => { - if (memo.length === 0) return [{ offset: item[1], tokens: [item[0]] }]; - const lastSplit = memo[memo.length - 1]; - const nextOffset = lastSplit.offset + lastSplit.tokens.length; - - if (nextOffset === item[1]) { - lastSplit.tokens.push(item[0]); - return memo; - } - - return [...memo, { offset: item[1], tokens: [item[0]] }]; - }, []); - - const compressed = newTokens - .map((x) => `! ${x.offset} ${x.tokens.join(" ")}`) - .join("\n"); - - // make sure the compressed and the original files are the same - const tiktokenOld = compressed - .split("\n") - .filter(Boolean) - .reduce>((memo, x) => { - const [_, offsetStr, ...tokens] = x.split(" "); - const offset = Number.parseInt(offsetStr, 10); - tokens.forEach((token, i) => (memo[token] = offset + i)); - return memo; - }, {}); - - function normalize_map(items: Record) { - return JSON.stringify( - Object.keys(items) - .sort() - .map((key) => [key, items[key]]) - ); - } - - if ( - normalize_map(tiktokenOld) !== normalize_map(Object.fromEntries(original)) - ) { - throw new Error("Invalid compression"); - } - - return compressed; -} - -async function main() { - try { - await fs.mkdir(path.resolve(__dirname, "../ranks"), { recursive: true }); - } catch {} - - const registry = JSON.parse( - await fs.readFile(path.resolve(__dirname, "../../tiktoken/registry.json"), { - encoding: "utf-8", - }) - ); - - for (const name in registry) { - console.log(name); - const data = registry[name]; - - const tiktokenFile = path.resolve(__dirname, `../ranks/${name}.tiktoken`); - const tiktokenCompressedFile = path.resolve( - __dirname, - `../ranks/${name}.compress.tiktoken` - ); - const jsonFile = path.resolve(__dirname, `../ranks/${name}.json`); - - try { - await Promise.all([ - fs.stat(tiktokenFile), - fs.stat(jsonFile), - fs.stat(tiktokenCompressedFile), - ]); - continue; - } catch {} - - const result = await load(data); - await fs.writeFile(tiktokenFile, result.bpe_ranks, { encoding: "utf-8" }); - - const compress = compress_tiktoken_bpe(result.bpe_ranks); - await fs.writeFile(tiktokenCompressedFile, compress, { - encoding: "utf-8", - }); - - await fs.writeFile( - jsonFile, - JSON.stringify({ ...result, bpe_ranks: compress }), - { encoding: "utf-8" } - ); - } -} - -main(); diff --git a/wasm/scripts/post_process.ts b/wasm/scripts/postprocess.ts similarity index 93% rename from wasm/scripts/post_process.ts rename to wasm/scripts/postprocess.ts index 34db13b5..ab8865f6 100644 --- a/wasm/scripts/post_process.ts +++ b/wasm/scripts/postprocess.ts @@ -372,11 +372,32 @@ for (const baseDir of [ for (const key in registry) { fs.copyFileSync( - path.resolve(__dirname, `../ranks/${key}.json`), + path.resolve(__dirname, `../src/ranks/${key}.json`), path.resolve(__dirname, `../dist/encoders/${key}.json`) ); pkg["exports"][`./encoders/${key}.json`] = `./encoders/${key}.json`; + + fs.copyFileSync( + path.resolve(__dirname, `../src/ranks/${key}.js`), + path.resolve(__dirname, `../dist/encoders/${key}.js`) + ); + + fs.copyFileSync( + path.resolve(__dirname, `../src/ranks/${key}.cjs`), + path.resolve(__dirname, `../dist/encoders/${key}.cjs`) + ); + + fs.copyFileSync( + path.resolve(__dirname, `../src/ranks/${key}.d.ts`), + path.resolve(__dirname, `../dist/encoders/${key}.d.ts`) + ); + pkg["exports"][`./encoders/${key}`] = { + types: `./encoders/${key}.d.ts`, + "edge-light": `./encoders/${key}.js`, + node: `./encoders/${key}.cjs`, + default: `./encoders/${key}.js`, + }; } fs.copyFileSync( diff --git a/wasm/tsconfig.json b/wasm/tsconfig.json index 27d2313e..e9b8e79e 100644 --- a/wasm/tsconfig.json +++ b/wasm/tsconfig.json @@ -10,6 +10,6 @@ "allowSyntheticDefaultImports": true, "resolveJsonModule": true }, - "include": ["./src/**/*.ts"], + "include": ["./src/**/*.ts", "./scripts/**/*.ts"], "exclude": ["node_modules", "dist"] } From 7e5ab3529de216bab074ac33a13e8bb62b6fd404 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 02:46:29 +0200 Subject: [PATCH 112/207] Monorepo changes --- .gitignore | 4 ++-- js/.gitignore | 2 -- js/package.json | 5 +++- js/src/index.ts | 58 +++++++++++------------------------------------ js/tsconfig.json | 3 ++- package.json | 16 +++++++++---- scripts/ranks.ts | 31 +++++++++++++++++++------ turbo.json | 16 +++++++++++++ wasm/package.json | 2 +- wasm/src/lib.rs | 20 ++++++++-------- yarn.lock | 42 ++++++++++++++++++++++++++++++++++ 11 files changed, 125 insertions(+), 74 deletions(-) delete mode 100644 js/.gitignore create mode 100644 turbo.json diff --git a/.gitignore b/.gitignore index 6b0a18bd..88947d24 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,6 @@ htmlcov Cargo.lock target/ -# WASM ranks/ -node_modules \ No newline at end of file +node_modules +.turbo \ No newline at end of file diff --git a/js/.gitignore b/js/.gitignore deleted file mode 100644 index 755d2dce..00000000 --- a/js/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -ranks/ -node_modules \ No newline at end of file diff --git a/js/package.json b/js/package.json index 2bc6de5c..d5c0d110 100644 --- a/js/package.json +++ b/js/package.json @@ -3,7 +3,10 @@ "version": "1.0.0", "description": "Javascript port of tiktoken", "license": "MIT", - "scripts": {}, + "scripts": { + "build": "tsc", + "test": "vitest run" + }, "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/js/src/index.ts b/js/src/index.ts index efc16dda..4c495bbb 100644 --- a/js/src/index.ts +++ b/js/src/index.ts @@ -1,3 +1,4 @@ +import { TiktokenModel, TiktokenEncoding } from "./ranks/ranks"; import gpt2 from "./ranks/gpt2"; import p50k_base from "./ranks/p50k_base"; import p50k_edit from "./ranks/p50k_edit"; @@ -56,15 +57,25 @@ function escapeRegex(str: string) { } export class Tiktoken { + /** @internal */ protected specialTokens: Record; + + /** @internal */ protected inverseSpecialTokens: Record; + /** @internal */ protected patStr: string; + /** @internal */ protected textEncoder = new TextEncoder(); + + /** @internal */ protected textDecoder = new TextDecoder("utf-8"); + /** @internal */ protected rankMap = new Map(); + + /** @internal */ protected textMap = new Map(); constructor( @@ -177,13 +188,6 @@ export class Tiktoken { } } -export type TiktokenEncoding = - | "gpt2" - | "r50k_base" - | "p50k_base" - | "p50k_edit" - | "cl100k_base"; - export function getEncoding( encoding: TiktokenEncoding, extendSpecialTokens?: Record @@ -204,44 +208,6 @@ export function getEncoding( } } -export type TiktokenModel = - | "text-davinci-003" - | "text-davinci-002" - | "text-davinci-001" - | "text-curie-001" - | "text-babbage-001" - | "text-ada-001" - | "davinci" - | "curie" - | "babbage" - | "ada" - | "code-davinci-002" - | "code-davinci-001" - | "code-cushman-002" - | "code-cushman-001" - | "davinci-codex" - | "cushman-codex" - | "text-davinci-edit-001" - | "code-davinci-edit-001" - | "text-embedding-ada-002" - | "text-similarity-davinci-001" - | "text-similarity-curie-001" - | "text-similarity-babbage-001" - | "text-similarity-ada-001" - | "text-search-davinci-doc-001" - | "text-search-curie-doc-001" - | "text-search-babbage-doc-001" - | "text-search-ada-doc-001" - | "code-search-babbage-code-001" - | "code-search-ada-code-001" - | "gpt2" - | "gpt-4" - | "gpt-4-0314" - | "gpt-4-32k" - | "gpt-4-32k-0314" - | "gpt-3.5-turbo" - | "gpt-3.5-turbo-0301"; - export function encodingForModel( model: TiktokenModel, extendSpecialTokens?: Record @@ -297,3 +263,5 @@ export function encodingForModel( never("Unknown model", model); } } + +export { TiktokenModel, TiktokenEncoding } from "./ranks/ranks"; diff --git a/js/tsconfig.json b/js/tsconfig.json index 27d2313e..a6d81595 100644 --- a/js/tsconfig.json +++ b/js/tsconfig.json @@ -8,7 +8,8 @@ "declaration": true, "outDir": "./dist", "allowSyntheticDefaultImports": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "stripInternal": true }, "include": ["./src/**/*.ts"], "exclude": ["node_modules", "dist"] diff --git a/package.json b/package.json index e9b54328..814502a7 100644 --- a/package.json +++ b/package.json @@ -4,14 +4,20 @@ "js", "wasm" ], + "scripts": { + "postinstall": "tsx scripts/ranks", + "build": "turbo run build", + "test": "turbo run test" + }, "devDependencies": { - "base64-js": "^1.5.1", - "@types/node": "^18.14.4", "@changesets/cli": "^2.26.1", + "@types/node": "^18.14.4", + "base64-js": "^1.5.1", "npm-run-all": "^4.1.5", - "vitest": "^0.28.5", - "typescript": "^4.9.5", "outdent": "^0.8.0", - "tsx": "^3.12.3" + "tsx": "^3.12.3", + "turbo": "^1.9.4", + "typescript": "^4.9.5", + "vitest": "^0.28.5" } } diff --git a/scripts/ranks.ts b/scripts/ranks.ts index 44a3d1ab..7cc89b5d 100644 --- a/scripts/ranks.ts +++ b/scripts/ranks.ts @@ -3,6 +3,7 @@ import path from "node:path"; import outdent from "outdent"; import { fromByteArray } from "base64-js"; import registry from "../tiktoken/registry.json"; +import modelToEncoding from "../tiktoken/model_to_encoding.json"; // printable ascii characters according to python function isPrintable(u: number): boolean { @@ -227,19 +228,20 @@ function combineInsensitive(value: string, acc: string[] = [""]): string[] { } async function main() { - for (const name in registry) { - console.log(name); + for (const lib of ["wasm", "js"]) { + const targetDir = path.resolve(__dirname, "../", lib, "src/ranks"); - for (const lib of ["wasm", "js"]) { - const targetDir = path.resolve(__dirname, "../", lib, "src/ranks"); + try { + await fs.mkdir(targetDir, { recursive: true }); + } catch {} - try { - await fs.mkdir(targetDir, { recursive: true }); - } catch {} + for (const name in registry) { + console.log(name); const data = registry[name as keyof typeof registry]; const bpePath = path.resolve(targetDir, `${name}.tiktoken`); const compressPath = path.resolve(targetDir, `${name}.compress.tiktoken`); + const regexPath = path.resolve(targetDir, `${name}.regex.tiktoken`); const jsonPath = path.resolve(targetDir, `${name}.json`); const cjsPath = path.resolve(targetDir, `${name}.cjs`); const dtsPath = path.resolve(targetDir, `${name}.d.ts`); @@ -250,6 +252,7 @@ async function main() { fs.stat(bpePath), fs.stat(jsonPath), fs.stat(compressPath), + fs.stat(regexPath), fs.stat(cjsPath), fs.stat(mjsPath), fs.stat(dtsPath), @@ -279,6 +282,9 @@ async function main() { const compress = compressTiktokenBpe(bpe.bpe_ranks); await fs.writeFile(compressPath, compress, { encoding: "utf-8" }); + const regex = bpe.pat_str; + await fs.writeFile(regexPath, regex, { encoding: "utf-8" }); + const json = JSON.stringify({ ...bpe, bpe_ranks: compress }); await fs.writeFile(jsonPath, json, { encoding: "utf-8" }); @@ -299,6 +305,17 @@ async function main() { await fs.writeFile(dtsPath, dts, { encoding: "utf-8" }); } + + const indexPath = path.resolve(targetDir, "ranks.ts"); + const indexMjs = outdent` + export type TiktokenEncoding = ${Object.keys(registry) + .map((i) => `"${i}"`) + .join(" | ")}; + export type TiktokenModel = ${Object.keys(modelToEncoding) + .map((i) => `"${i}"`) + .join(" | ")}; + `; + await fs.writeFile(indexPath, indexMjs, { encoding: "utf-8" }); } } diff --git a/turbo.json b/turbo.json new file mode 100644 index 00000000..ceacfc17 --- /dev/null +++ b/turbo.json @@ -0,0 +1,16 @@ +{ + "$schema": "https://turbo.build/schema.json", + "pipeline": { + "build": { + "dependsOn": ["^build"], + "outputs": ["dist/**"], + "inputs": ["src/**"] + }, + "test": { + "dependsOn": ["build"] + }, + "tiktoken#test": { + "dependsOn": ["@dqbd/tiktoken#build", "tiktoken#build"] + } + } +} diff --git a/wasm/package.json b/wasm/package.json index 5d9d7ee7..ac645393 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -10,7 +10,7 @@ "build:postprocess": "tsx scripts/postprocess.ts", "wasm:bundler": "wasm-pack build --target bundler --weak-refs --release --out-dir dist && rm -rf dist/.gitignore dist/README.md dist/package.json", "wasm:lite": "wasm-pack build --target bundler --weak-refs --release --out-dir dist/lite --no-default-features && rm -rf dist/lite/.gitignore dist/lite/README.md dist/lite/package.json", - "test": "yarn vitest" + "test": "yarn vitest run" }, "repository": { "type": "git", diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 3b40312e..39efbc07 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -73,9 +73,9 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(ENDOFTEXT), 50256); CoreBPEConstructor::new( - include_str!("../ranks/gpt2.compress.tiktoken"), + include_str!("./ranks/gpt2.compress.tiktoken"), Some(special_tokens), - "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + include_str!("./ranks/gpt2.regex.tiktoken"), ) } @@ -85,9 +85,9 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(ENDOFTEXT), 50256); CoreBPEConstructor::new( - include_str!("../ranks/r50k_base.compress.tiktoken"), + include_str!("./ranks/r50k_base.compress.tiktoken"), Some(special_tokens), - "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + include_str!("./ranks/r50k_base.regex.tiktoken"), ) } @@ -97,9 +97,9 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(ENDOFTEXT), 50256); CoreBPEConstructor::new( - include_str!("../ranks/p50k_base.compress.tiktoken"), + include_str!("./ranks/p50k_base.compress.tiktoken"), Some(special_tokens), - "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + include_str!("./ranks/p50k_base.regex.tiktoken"), ) } @@ -112,9 +112,9 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(FIM_SUFFIX), 50283); CoreBPEConstructor::new( - include_str!("../ranks/p50k_base.compress.tiktoken"), + include_str!("./ranks/p50k_base.compress.tiktoken"), Some(special_tokens), - "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", + include_str!("./ranks/p50k_base.regex.tiktoken"), ) } @@ -128,9 +128,9 @@ impl CoreBPEConstructor { special_tokens.insert(String::from(ENDOFPROMPT), 100276); CoreBPEConstructor::new( - include_str!("../ranks/cl100k_base.compress.tiktoken"), + include_str!("./ranks/cl100k_base.compress.tiktoken"), Some(special_tokens), - "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", + include_str!("./ranks/cl100k_base.regex.tiktoken"), ) } } diff --git a/yarn.lock b/yarn.lock index 8ac754a9..5b85a984 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2385,6 +2385,48 @@ tty-table@^4.1.5: wcwidth "^1.0.1" yargs "^17.7.1" +turbo-darwin-64@1.9.4: + version "1.9.4" + resolved "https://registry.yarnpkg.com/turbo-darwin-64/-/turbo-darwin-64-1.9.4.tgz#3f40cba2dcc13011f4c51101ba03f4c1aa511daf" + integrity sha512-kCmDmxyUWWI+BstTZQKNM87UbNx40C0ZHUTFqs9tmeH7d5+gA2QhqrSoBuwQYw7YYNLpbkqu1ObbppsUlIFPdQ== + +turbo-darwin-arm64@1.9.4: + version "1.9.4" + resolved "https://registry.yarnpkg.com/turbo-darwin-arm64/-/turbo-darwin-arm64-1.9.4.tgz#257e67438033d1bfb75650b5ca8664d0a00e0ccb" + integrity sha512-Of64jMEaDDHx0dzU7RwdOuh1lP021vtQun9wmEHhT0Hk/TQF+kDCywoHcY7R5nlSRcssFjysVyhCeZW6CkWrrA== + +turbo-linux-64@1.9.4: + version "1.9.4" + resolved "https://registry.yarnpkg.com/turbo-linux-64/-/turbo-linux-64-1.9.4.tgz#a4d7a06b8b786144d3c967a8f647561a71d8057a" + integrity sha512-kajvUnXlUNtgVzLW3Y/RoHrC64G+G0Ky/o1F+oP6QK/T85H8NwNHXq2F6hyIrZPNGbKpPgpetuQ1waIibxJ0rA== + +turbo-linux-arm64@1.9.4: + version "1.9.4" + resolved "https://registry.yarnpkg.com/turbo-linux-arm64/-/turbo-linux-arm64-1.9.4.tgz#5426bd72ef2a80cad390ecf4220779824faab41f" + integrity sha512-11P9Y8MoimqUzib3SU3md4g1loLF0FRHpYCbPzUTWPT3beOcdM2nop2u/yFHyBnbSxz1rTWczRJPnNoAki0B/Q== + +turbo-windows-64@1.9.4: + version "1.9.4" + resolved "https://registry.yarnpkg.com/turbo-windows-64/-/turbo-windows-64-1.9.4.tgz#b6458e3f715f0b0ab0b1bf1ded3ed6f2a8e0f1f9" + integrity sha512-2tFcFhuqs1c1DGFAk2wjU0TXrOXKoPdma9vxrTVdwvtz5Nc8XPF8RNW+1jbmRjpumGUkXou6Pe973GSvPjvD5w== + +turbo-windows-arm64@1.9.4: + version "1.9.4" + resolved "https://registry.yarnpkg.com/turbo-windows-arm64/-/turbo-windows-arm64-1.9.4.tgz#d4959a8b81dc5c3561b8e22355ab74b05d47f803" + integrity sha512-wJfEwUyWXxn6VKD2Vbycke6cm99gJ0llkr9gUnbR06eaRu1TiLY24FcFqN95/wftp0n5nne7b6K7Wz1TLh1fJQ== + +turbo@^1.9.4: + version "1.9.4" + resolved "https://registry.yarnpkg.com/turbo/-/turbo-1.9.4.tgz#d9a3e350767dc894a5f5b427144d20d435c22032" + integrity sha512-PqhlMCmu6sOqcVswt1tYL0TV/O0uQ8kUZWfmlEl0EHPusc2R3nzg7KVXrZbXTHXzQH5HE2oJm9iUI0mYz31i7Q== + optionalDependencies: + turbo-darwin-64 "1.9.4" + turbo-darwin-arm64 "1.9.4" + turbo-linux-64 "1.9.4" + turbo-linux-arm64 "1.9.4" + turbo-windows-64 "1.9.4" + turbo-windows-arm64 "1.9.4" + type-detect@^4.0.0, type-detect@^4.0.5: version "4.0.8" resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c" From 1bf31d3125a2addc856bb6bb177a0ef1a0a8971f Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 02:53:34 +0200 Subject: [PATCH 113/207] Fix actions --- .github/workflows/{build_wasm.yml => build.yml} | 3 --- 1 file changed, 3 deletions(-) rename .github/workflows/{build_wasm.yml => build.yml} (85%) diff --git a/.github/workflows/build_wasm.yml b/.github/workflows/build.yml similarity index 85% rename from .github/workflows/build_wasm.yml rename to .github/workflows/build.yml index d4f593d5..a3192318 100644 --- a/.github/workflows/build_wasm.yml +++ b/.github/workflows/build.yml @@ -21,8 +21,5 @@ jobs: run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - run: yarn install --frozen-lockfile - working-directory: ./wasm - run: yarn run build - working-directory: ./wasm - run: yarn run test - working-directory: ./wasm From 184554893c55cdf6c15431fb450a080e81ce232d Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 03:00:36 +0200 Subject: [PATCH 114/207] More descriptive name --- .github/workflows/{build.yml => build_js.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{build.yml => build_js.yml} (95%) diff --git a/.github/workflows/build.yml b/.github/workflows/build_js.yml similarity index 95% rename from .github/workflows/build.yml rename to .github/workflows/build_js.yml index a3192318..7d2c010d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build_js.yml @@ -1,4 +1,4 @@ -name: Build WASM +name: Build and Test JS/WASM on: [push, pull_request, workflow_dispatch] From 3debe21bea61ffc137b96045be76bc870411ab22 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 03:26:48 +0200 Subject: [PATCH 115/207] Add CDN hosting of ranks --- package.json | 3 ++- static/.gitignore | 2 ++ static/package.json | 11 +++++++++++ static/src/_redirects | 1 + turbo.json | 3 +++ 5 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 static/.gitignore create mode 100644 static/package.json create mode 100644 static/src/_redirects diff --git a/package.json b/package.json index 814502a7..9fbc5bec 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,8 @@ "private": true, "workspaces": [ "js", - "wasm" + "wasm", + "static" ], "scripts": { "postinstall": "tsx scripts/ranks", diff --git a/static/.gitignore b/static/.gitignore new file mode 100644 index 00000000..cb155ee6 --- /dev/null +++ b/static/.gitignore @@ -0,0 +1,2 @@ +src/js +src/wasm \ No newline at end of file diff --git a/static/package.json b/static/package.json new file mode 100644 index 00000000..84568995 --- /dev/null +++ b/static/package.json @@ -0,0 +1,11 @@ +{ + "name": "static", + "private": true, + "scripts": { + "build": "mkdir -p ./src/wasm ./src/js && cp -R ../js/src/ranks/*.json ./src/js && cp -R ../wasm/src/ranks/*.json ./src/wasm", + "deploy": "wrangler pages publish ./src" + }, + "devDependencies": { + "wrangler": "^1.15.0" + } +} diff --git a/static/src/_redirects b/static/src/_redirects new file mode 100644 index 00000000..79ee2f10 --- /dev/null +++ b/static/src/_redirects @@ -0,0 +1 @@ +/ https://github.com/dqbd/tiktoken 301 \ No newline at end of file diff --git a/turbo.json b/turbo.json index ceacfc17..cc9d98b3 100644 --- a/turbo.json +++ b/turbo.json @@ -9,6 +9,9 @@ "test": { "dependsOn": ["build"] }, + "deploy": { + "dependsOn": ["test"] + }, "tiktoken#test": { "dependsOn": ["@dqbd/tiktoken#build", "tiktoken#build"] } From 990d8925177f399086d87ac5791e8014a0b50efe Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 10:44:13 +0200 Subject: [PATCH 116/207] Fix sdist --- MANIFEST.in | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MANIFEST.in b/MANIFEST.in index a841992e..18c7ad7f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ include *.svg include *.toml include *.md +exclude yarn.lock include Makefile global-include py.typed recursive-include scripts *.py @@ -10,4 +11,8 @@ recursive-include python *.rs *.toml recursive-exclude jni * recursive-exclude java * recursive-exclude js * +recursive-exclude wasm * +recursive-exclude static * +recursive-exclude .changeset * +recursive-exclude scripts *.ts *.json include tiktoken *.json \ No newline at end of file From 81bd68934be298eacd214a0696e4f668bda50078 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 11:25:41 +0200 Subject: [PATCH 117/207] Rename js to `tiktoken-js`, rename wasm to `tiktoken` --- README.md | 40 +++++++++++++++++------------------ js/package.json | 2 +- js/test/compatibility.test.ts | 2 +- turbo.json | 4 ++-- wasm/package.json | 2 +- wasm/scripts/postprocess.ts | 2 +- wasm/test/init_error.test.ts | 6 +++--- 7 files changed, 29 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index bfbe4a9f..e58053ad 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ⏳ @dqbd/tiktoken +# ⏳ tiktoken tiktoken is a [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with OpenAI's models, forked from the original tiktoken library to provide NPM bindings for Node and other JS runtimes. @@ -6,7 +6,7 @@ OpenAI's models, forked from the original tiktoken library to provide NPM bindin The open source version of `tiktoken` can be installed from NPM: ``` -npm install @dqbd/tiktoken +npm install tiktoken ``` ## Usage @@ -15,7 +15,7 @@ Basic usage follows, which includes all the OpenAI encoders and ranks: ```typescript import assert from "node:assert"; -import { get_encoding, encoding_for_model } from "@dqbd/tiktoken"; +import { get_encoding, encoding_for_model } from "tiktoken"; const enc = get_encoding("gpt2"); assert( @@ -36,11 +36,11 @@ const enc = encoding_for_model("gpt2", { enc.free(); ``` -In constrained environments (eg. Edge Runtime, Cloudflare Workers), where you don't want to load all the encoders at once, you can use the lightweight WASM binary via `@dqbd/tiktoken/lite`. +In constrained environments (eg. Edge Runtime, Cloudflare Workers), where you don't want to load all the encoders at once, you can use the lightweight WASM binary via `tiktoken/lite`. ```typescript -const { Tiktoken } = require("@dqbd/tiktoken/lite"); -const cl100k_base = require("@dqbd/tiktoken/encoders/cl100k_base.json"); +const { Tiktoken } = require("tiktoken/lite"); +const cl100k_base = require("tiktoken/encoders/cl100k_base.json"); const encoding = new Tiktoken( cl100k_base.bpe_ranks, @@ -54,10 +54,10 @@ encoding.free(); If you want to fetch the latest ranks, use the `load` function: ```typescript -const { Tiktoken } = require("@dqbd/tiktoken/lite"); -const { load } = require("@dqbd/tiktoken/load"); -const registry = require("@dqbd/tiktoken/registry.json"); -const models = require("@dqbd/tiktoken/model_to_encoding.json"); +const { Tiktoken } = require("tiktoken/lite"); +const { load } = require("tiktoken/load"); +const registry = require("tiktoken/registry.json"); +const models = require("tiktoken/model_to_encoding.json"); async function main() { const model = await load(registry[models["gpt-3.5-turbo"]]); @@ -89,7 +89,7 @@ const encoder = new Tiktoken( Finally, you can a custom `init` function to override the WASM initialization logic for non-Node environments. This is useful if you are using a bundler that does not support WASM ESM integration. ```typescript -import { get_encoding, init } from "@dqbd/tiktoken/init"; +import { get_encoding, init } from "tiktoken/init"; async function main() { const wasm = "..."; // fetch the WASM binary somehow @@ -153,7 +153,7 @@ const config = { Usage in pages: ```tsx -import { get_encoding } from "@dqbd/tiktoken"; +import { get_encoding } from "tiktoken"; import { useState } from "react"; const encoding = get_encoding("cl100k_base"); @@ -178,7 +178,7 @@ export default function Home() { Usage in API routes: ```typescript -import { get_encoding } from "@dqbd/tiktoken"; +import { get_encoding } from "tiktoken"; import { NextApiRequest, NextApiResponse } from "next"; export default function handler(req: NextApiRequest, res: NextApiResponse) { @@ -224,9 +224,9 @@ Vercel Edge Runtime does support WASM modules by adding a `?module` suffix. Init ```typescript // @ts-expect-error -import wasm from "@dqbd/tiktoken/lite/tiktoken_bg.wasm?module"; -import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; -import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; +import wasm from "tiktoken/lite/tiktoken_bg.wasm?module"; +import model from "tiktoken/encoders/cl100k_base.json"; +import { init, Tiktoken } from "tiktoken/lite/init"; export const config = { runtime: "edge" }; @@ -248,7 +248,7 @@ export default async function (req: Request) { ### [Cloudflare Workers](#cloudflare-workers) -Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually and use the `@dqbd/tiktoken/lite` version to fit the 1 MB limit. However, users need to point directly at the WASM binary via a relative path (including `./node_modules/`). +Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually and use the `tiktoken/lite` version to fit the 1 MB limit. However, users need to point directly at the WASM binary via a relative path (including `./node_modules/`). Add the following rule to the `wrangler.toml` to upload WASM during build: @@ -261,9 +261,9 @@ type = "CompiledWasm" Initialize the encoder with the following snippet: ```javascript -import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; -import wasm from "./node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm"; -import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; +import { init, Tiktoken } from "tiktoken/lite/init"; +import wasm from "./node_modules/tiktoken/lite/tiktoken_bg.wasm"; +import model from "tiktoken/encoders/cl100k_base.json"; export default { async fetch() { diff --git a/js/package.json b/js/package.json index d5c0d110..6572731b 100644 --- a/js/package.json +++ b/js/package.json @@ -1,5 +1,5 @@ { - "name": "tiktoken", + "name": "tiktoken-js", "version": "1.0.0", "description": "Javascript port of tiktoken", "license": "MIT", diff --git a/js/test/compatibility.test.ts b/js/test/compatibility.test.ts index 5c942e7d..05638d59 100644 --- a/js/test/compatibility.test.ts +++ b/js/test/compatibility.test.ts @@ -2,7 +2,7 @@ import { test, expect, describe, afterAll } from "vitest"; import { get_encoding } from "../../wasm/dist"; import { Tiktoken } from "../src/index"; -describe("LiteTokenizer matches the behavior of @dqbd/tiktoken", () => { +describe("LiteTokenizer matches the behavior of tiktoken", () => { const lite = new Tiktoken(); const full = get_encoding("cl100k_base"); diff --git a/turbo.json b/turbo.json index cc9d98b3..afb6d68b 100644 --- a/turbo.json +++ b/turbo.json @@ -12,8 +12,8 @@ "deploy": { "dependsOn": ["test"] }, - "tiktoken#test": { - "dependsOn": ["@dqbd/tiktoken#build", "tiktoken#build"] + "tiktoken-js#test": { + "dependsOn": ["tiktoken#build", "tiktoken-js#build"] } } } diff --git a/wasm/package.json b/wasm/package.json index ac645393..d11b2228 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,5 +1,5 @@ { - "name": "@dqbd/tiktoken", + "name": "tiktoken", "version": "1.0.7", "description": "Javascript bindings for tiktoken", "license": "MIT", diff --git a/wasm/scripts/postprocess.ts b/wasm/scripts/postprocess.ts index ab8865f6..c6421ea7 100644 --- a/wasm/scripts/postprocess.ts +++ b/wasm/scripts/postprocess.ts @@ -86,7 +86,7 @@ for (const baseDir of [ if (statements.length > 0) { call.insertStatements( 0, - `if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized.");` + `if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized.");` ); } } diff --git a/wasm/test/init_error.test.ts b/wasm/test/init_error.test.ts index f7c39d64..74e3cba7 100644 --- a/wasm/test/init_error.test.ts +++ b/wasm/test/init_error.test.ts @@ -4,15 +4,15 @@ import model from "../dist/encoders/cl100k_base.json"; it("use before initialization", () => { expect(() => encoding_for_model("gpt2")).toThrowError( - "@dqbd/tiktoken: WASM binary has not been propery initialized." + "tiktoken: WASM binary has not been propery initialized." ); expect(() => get_encoding("gpt2")).toThrowError( - "@dqbd/tiktoken: WASM binary has not been propery initialized." + "tiktoken: WASM binary has not been propery initialized." ); expect( () => new Tiktoken(model.bpe_ranks, model.special_tokens, model.pat_str) ).toThrowError( - "@dqbd/tiktoken: WASM binary has not been propery initialized." + "tiktoken: WASM binary has not been propery initialized." ); }); From f245e6930233660733a61c4210549ac66500b2d3 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 11:29:09 +0200 Subject: [PATCH 118/207] Rename description --- js/package.json | 2 +- wasm/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/js/package.json b/js/package.json index 6572731b..913fcf76 100644 --- a/js/package.json +++ b/js/package.json @@ -1,7 +1,7 @@ { "name": "tiktoken-js", "version": "1.0.0", - "description": "Javascript port of tiktoken", + "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { "build": "tsc", diff --git a/wasm/package.json b/wasm/package.json index d11b2228..7a994f93 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,7 +1,7 @@ { "name": "tiktoken", "version": "1.0.7", - "description": "Javascript bindings for tiktoken", + "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { "build": "run-s build:*", From 6b00942fb713d9bc83edec75d1f9697e57a62eb8 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 11:48:39 +0200 Subject: [PATCH 119/207] Add publish CI step --- .github/workflows/publish.yml | 25 +++++++++++++++++++++++++ package.json | 3 ++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/publish.yml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000..3ff79f88 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,25 @@ +name: Publish +on: + push: + branches: + - "main" + +concurrency: ${{ github.workflow }}-${{ github.ref }} + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3 + with: + cache: yarn + - run: yarn install --frozen-lockfile + - name: Create Release Pull Request or Publish + id: changesets + uses: changesets/action@v1 + with: + publish: yarn run publish + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} \ No newline at end of file diff --git a/package.json b/package.json index 9fbc5bec..8f5d61b3 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,8 @@ "scripts": { "postinstall": "tsx scripts/ranks", "build": "turbo run build", - "test": "turbo run test" + "test": "turbo run test", + "publish": "turbo run build && changeset publish" }, "devDependencies": { "@changesets/cli": "^2.26.1", From 7c6a304792c8612639bf5b79b328060656be2d64 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 11:53:33 +0200 Subject: [PATCH 120/207] Normalise workflows --- .github/workflows/build_js.yml | 2 -- .github/workflows/publish.yml | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_js.yml b/.github/workflows/build_js.yml index 7d2c010d..18235719 100644 --- a/.github/workflows/build_js.yml +++ b/.github/workflows/build_js.yml @@ -16,10 +16,8 @@ jobs: node-version: 18 registry-url: "https://registry.npmjs.org" cache: yarn - - name: Install run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - - run: yarn install --frozen-lockfile - run: yarn run build - run: yarn run test diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3ff79f88..eae9574e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,4 +1,4 @@ -name: Publish +name: Publish JS on: push: branches: @@ -7,13 +7,17 @@ on: concurrency: ${{ github.workflow }}-${{ github.ref }} jobs: - build: + publish: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-node@v3 with: + node-version: 18 + registry-url: "https://registry.npmjs.org" cache: yarn + - name: Install + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - run: yarn install --frozen-lockfile - name: Create Release Pull Request or Publish id: changesets @@ -22,4 +26,4 @@ jobs: publish: yarn run publish env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} \ No newline at end of file + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} From 975ed0f3ac8c50c472152faf6181253a643bd510 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 May 2023 13:50:54 +0200 Subject: [PATCH 121/207] Rename package once again --- js/package.json | 14 +++++++++++++- turbo.json | 4 ++-- wasm/package.json | 4 ++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/js/package.json b/js/package.json index 913fcf76..cbe2e0ce 100644 --- a/js/package.json +++ b/js/package.json @@ -1,5 +1,5 @@ { - "name": "tiktoken-js", + "name": "js-tiktoken", "version": "1.0.0", "description": "JavaScript port of tiktoken", "license": "MIT", @@ -7,6 +7,15 @@ "build": "tsc", "test": "vitest run" }, + "files": [ + "dist/**/*" + ], + "exports": { + ".": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + }, "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" @@ -14,5 +23,8 @@ "dependencies": { "base64-js": "^1.5.1" }, + "publishConfig": { + "access": "public" + }, "devDependencies": {} } diff --git a/turbo.json b/turbo.json index afb6d68b..4480ffda 100644 --- a/turbo.json +++ b/turbo.json @@ -12,8 +12,8 @@ "deploy": { "dependsOn": ["test"] }, - "tiktoken-js#test": { - "dependsOn": ["tiktoken#build", "tiktoken-js#build"] + "js-tiktoken#test": { + "dependsOn": ["tiktoken#build", "js-tiktoken#build"] } } } diff --git a/wasm/package.json b/wasm/package.json index 7a994f93..2fb6fde9 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -19,5 +19,9 @@ "dependencies": {}, "devDependencies": { "ts-morph": "^17.0.1" + }, + "publishConfig": { + "directory": "./dist", + "access": "public" } } From 14f108c3d5d3bf1b9703bf10289e6b8909060335 Mon Sep 17 00:00:00 2001 From: Abhishek Maharjan Date: Sun, 14 May 2023 08:55:09 +0200 Subject: [PATCH 122/207] fix: typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e58053ad..346adaec 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ async function main() { model.special_tokens, model.pat_str ); - const tokens = encoding.encode("hello world"); + const tokens = encoder.encode("hello world"); encoder.free(); } From 3240a8e48684faf334497065a46c247f63fb5edd Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 15 May 2023 01:52:48 +0200 Subject: [PATCH 123/207] Split into smaller files --- .changeset/weak-fireants-hunt.md | 5 + js/examples/dynamic.ts | 22 ++ js/package.json | 13 +- js/src/core.ts | 180 ++++++++++ js/src/index.ts | 185 +---------- js/src/utils.ts | 3 + yarn.lock | 543 ++++++++++++++++++++++++++++++- 7 files changed, 759 insertions(+), 192 deletions(-) create mode 100644 .changeset/weak-fireants-hunt.md create mode 100644 js/examples/dynamic.ts create mode 100644 js/src/core.ts create mode 100644 js/src/utils.ts diff --git a/.changeset/weak-fireants-hunt.md b/.changeset/weak-fireants-hunt.md new file mode 100644 index 00000000..1a0b6e86 --- /dev/null +++ b/.changeset/weak-fireants-hunt.md @@ -0,0 +1,5 @@ +--- +"js-tiktoken": patch +--- + +Split core into smaller packages, expose core tokenizer for CDN pulling diff --git a/js/examples/dynamic.ts b/js/examples/dynamic.ts new file mode 100644 index 00000000..7ce28b20 --- /dev/null +++ b/js/examples/dynamic.ts @@ -0,0 +1,22 @@ +import { Tiktoken, TiktokenBPE, TiktokenEncoding } from "../dist"; + +const cache: Record = {}; + +async function getEncoding(encoding: TiktokenEncoding) { + if (!(encoding in cache)) { + const res = await fetch(`https://tiktoken.pages.dev/js/${encoding}.json`); + + if (!res.ok) throw new Error("Failed to fetch encoding"); + cache[encoding] = await res.json(); + } + return new Tiktoken(cache[encoding]); +} + +async function main() { + const encodings = await getEncoding("cl100k_base"); + const text = "function foo() { return 1; }"; + const tokens = encodings.encode(text); + console.log(tokens); +} + +main(); diff --git a/js/package.json b/js/package.json index cbe2e0ce..6719f2ea 100644 --- a/js/package.json +++ b/js/package.json @@ -4,7 +4,7 @@ "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { - "build": "tsc", + "build": "rm -rf dist && tsup", "test": "vitest run" }, "files": [ @@ -26,5 +26,14 @@ "publishConfig": { "access": "public" }, - "devDependencies": {} + "devDependencies": { + "tsup": "^6.7.0" + }, + "tsup": { + "entry": [ + "src/index.ts" + ], + "dts": true, + "clean": true + } } diff --git a/js/src/core.ts b/js/src/core.ts new file mode 100644 index 00000000..06e20758 --- /dev/null +++ b/js/src/core.ts @@ -0,0 +1,180 @@ +import base64 from "base64-js"; + +function bytePairMerge( + piece: Uint8Array, + ranks: Map +): Array<{ start: number; end: number }> { + let parts: Array<{ start: number; end: number }> = Array.from( + { length: piece.length }, + (_, i) => ({ start: i, end: i + 1 }) + ); + + while (parts.length > 1) { + let minRank: [number, number] | null = null; + + for (let i = 0; i < parts.length - 1; i++) { + const slice = piece.slice(parts[i].start, parts[i + 1].end); + const rank = ranks.get(slice.join(",")); + if (rank == null) continue; + + if (minRank == null || rank < minRank[0]) { + minRank = [rank, i]; + } + } + + if (minRank != null) { + const i = minRank[1]; + parts[i] = { start: parts[i].start, end: parts[i + 1].end }; + parts.splice(i + 1, 1); + } else { + break; + } + } + return parts; +} + +function bytePairEncode(piece: Uint8Array, ranks: Map) { + if (piece.length === 1) return [ranks.get(piece.join(","))!]; + + return bytePairMerge(piece, ranks) + .map((p) => ranks.get(piece.slice(p.start, p.end).join(","))) + .filter((x): x is number => x != null); +} + +function escapeRegex(str: string) { + return str.replace(/[\\^$*+?.()|[\]{}]/g, "\\$&"); +} + +export interface TiktokenBPE { + pat_str: string; + special_tokens: Record; + bpe_ranks: string; +} + +export class Tiktoken { + /** @internal */ + protected specialTokens: Record; + + /** @internal */ + protected inverseSpecialTokens: Record; + + /** @internal */ + protected patStr: string; + + /** @internal */ + protected textEncoder = new TextEncoder(); + + /** @internal */ + protected textDecoder = new TextDecoder("utf-8"); + + /** @internal */ + protected rankMap = new Map(); + + /** @internal */ + protected textMap = new Map(); + + constructor( + ranks: TiktokenBPE, + extendedSpecialTokens?: Record + ) { + this.patStr = ranks.pat_str; + + const uncompressed = ranks.bpe_ranks + .split("\n") + .filter(Boolean) + .reduce>((memo, x) => { + const [_, offsetStr, ...tokens] = x.split(" "); + const offset = Number.parseInt(offsetStr, 10); + tokens.forEach((token, i) => (memo[token] = offset + i)); + return memo; + }, {}); + + for (const [token, rank] of Object.entries(uncompressed)) { + const bytes = base64.toByteArray(token); + this.rankMap.set(bytes.join(","), rank); + this.textMap.set(rank, bytes); + } + + this.specialTokens = { ...ranks.special_tokens, ...extendedSpecialTokens }; + this.inverseSpecialTokens = Object.entries(this.specialTokens).reduce< + Record + >((memo, [text, rank]) => { + memo[rank] = this.textEncoder.encode(text); + return memo; + }, {}); + } + + encode(text: string, allowedSpecial: Set | "all" = new Set()) { + const regexes = new RegExp(this.patStr, "ug"); + const specialRegex = new RegExp( + Object.keys(this.specialTokens) + .map((i) => escapeRegex(i)) + .join("|"), + "g" + ); + + const ret: number[] = []; + + const allowedSpecialSet = + allowedSpecial === "all" + ? new Set(Object.keys(this.specialTokens)) + : allowedSpecial; + + let start = 0; + while (true) { + let nextSpecial: RegExpMatchArray | null = null; + let startFind = start; + + while (true) { + specialRegex.lastIndex = startFind; + nextSpecial = specialRegex.exec(text); + if (nextSpecial == null || allowedSpecialSet.has(nextSpecial[0])) break; + startFind = nextSpecial.index! + 1; + } + + const end = nextSpecial?.index ?? text.length; + for (const match of text.substring(start, end).matchAll(regexes)) { + const piece = this.textEncoder.encode(match[0]); + const token = this.rankMap.get(piece.join(",")); + + if (token != null) { + ret.push(token); + continue; + } + + ret.push(...bytePairEncode(piece, this.rankMap)); + } + + if (nextSpecial == null) break; + let token = this.specialTokens[nextSpecial[0]]; + ret.push(token); + + start = nextSpecial.index! + nextSpecial[0].length; + } + + return ret; + } + + decode(tokens: number[]) { + const res: Uint8Array[] = []; + let length = 0; + for (let i = 0; i < tokens.length; ++i) { + const token = tokens[i]; + const bytes = this.textMap.get(token) ?? this.inverseSpecialTokens[token]; + + if (bytes != null) { + res.push(bytes); + length += bytes.length; + } + } + + const mergedArray = new Uint8Array(length); + let i = 0; + for (const bytes of res) { + mergedArray.set(bytes, i); + i += bytes.length; + } + + return this.textDecoder.decode(mergedArray); + } +} diff --git a/js/src/index.ts b/js/src/index.ts index 4c495bbb..a6c1205d 100644 --- a/js/src/index.ts +++ b/js/src/index.ts @@ -5,188 +5,8 @@ import p50k_edit from "./ranks/p50k_edit"; import r50k_base from "./ranks/r50k_base"; import cl100k_base from "./ranks/cl100k_base"; -import base64 from "base64-js"; - -function never(message: string, _: never) { - throw new Error(message); -} - -function bytePairMerge( - piece: Uint8Array, - ranks: Map -): Array<{ start: number; end: number }> { - let parts: Array<{ start: number; end: number }> = Array.from( - { length: piece.length }, - (_, i) => ({ start: i, end: i + 1 }) - ); - - while (parts.length > 1) { - let minRank: [number, number] | null = null; - - for (let i = 0; i < parts.length - 1; i++) { - const slice = piece.slice(parts[i].start, parts[i + 1].end); - const rank = ranks.get(slice.join(",")); - if (rank == null) continue; - - if (minRank == null || rank < minRank[0]) { - minRank = [rank, i]; - } - } - - if (minRank != null) { - const i = minRank[1]; - parts[i] = { start: parts[i].start, end: parts[i + 1].end }; - parts.splice(i + 1, 1); - } else { - break; - } - } - return parts; -} - -function bytePairEncode(piece: Uint8Array, ranks: Map) { - if (piece.length === 1) return [ranks.get(piece.join(","))!]; - - return bytePairMerge(piece, ranks) - .map((p) => ranks.get(piece.slice(p.start, p.end).join(","))) - .filter((x): x is number => x != null); -} - -function escapeRegex(str: string) { - return str.replace(/[\\^$*+?.()|[\]{}]/g, "\\$&"); -} - -export class Tiktoken { - /** @internal */ - protected specialTokens: Record; - - /** @internal */ - protected inverseSpecialTokens: Record; - - /** @internal */ - protected patStr: string; - - /** @internal */ - protected textEncoder = new TextEncoder(); - - /** @internal */ - protected textDecoder = new TextDecoder("utf-8"); - - /** @internal */ - protected rankMap = new Map(); - - /** @internal */ - protected textMap = new Map(); - - constructor( - ranks: { - pat_str: string; - special_tokens: Record; - bpe_ranks: string; - } = cl100k_base, - extendedSpecialTokens?: Record - ) { - this.patStr = ranks.pat_str; - - const uncompressed = ranks.bpe_ranks - .split("\n") - .filter(Boolean) - .reduce>((memo, x) => { - const [_, offsetStr, ...tokens] = x.split(" "); - const offset = Number.parseInt(offsetStr, 10); - tokens.forEach((token, i) => (memo[token] = offset + i)); - return memo; - }, {}); - - for (const [token, rank] of Object.entries(uncompressed)) { - const bytes = base64.toByteArray(token); - this.rankMap.set(bytes.join(","), rank); - this.textMap.set(rank, bytes); - } - - this.specialTokens = { ...ranks.special_tokens, ...extendedSpecialTokens }; - this.inverseSpecialTokens = Object.entries(this.specialTokens).reduce< - Record - >((memo, [text, rank]) => { - memo[rank] = this.textEncoder.encode(text); - return memo; - }, {}); - } - - encode(text: string, allowedSpecial: Set | "all" = new Set()) { - const regexes = new RegExp(this.patStr, "ug"); - const specialRegex = new RegExp( - Object.keys(this.specialTokens) - .map((i) => escapeRegex(i)) - .join("|"), - "g" - ); - - const ret: number[] = []; - - const allowedSpecialSet = - allowedSpecial === "all" - ? new Set(Object.keys(this.specialTokens)) - : allowedSpecial; - - let start = 0; - while (true) { - let nextSpecial: RegExpMatchArray | null = null; - let startFind = start; - - while (true) { - specialRegex.lastIndex = startFind; - nextSpecial = specialRegex.exec(text); - if (nextSpecial == null || allowedSpecialSet.has(nextSpecial[0])) break; - startFind = nextSpecial.index! + 1; - } - - const end = nextSpecial?.index ?? text.length; - for (const match of text.substring(start, end).matchAll(regexes)) { - const piece = this.textEncoder.encode(match[0]); - const token = this.rankMap.get(piece.join(",")); - - if (token != null) { - ret.push(token); - continue; - } - - ret.push(...bytePairEncode(piece, this.rankMap)); - } - - if (nextSpecial == null) break; - let token = this.specialTokens[nextSpecial[0]]; - ret.push(token); - - start = nextSpecial.index! + nextSpecial[0].length; - } - - return ret; - } - - decode(tokens: number[]) { - const res: Uint8Array[] = []; - let length = 0; - for (let i = 0; i < tokens.length; ++i) { - const token = tokens[i]; - const bytes = this.textMap.get(token) ?? this.inverseSpecialTokens[token]; - - if (bytes != null) { - res.push(bytes); - length += bytes.length; - } - } - - const mergedArray = new Uint8Array(length); - let i = 0; - for (const bytes of res) { - mergedArray.set(bytes, i); - i += bytes.length; - } - - return this.textDecoder.decode(mergedArray); - } -} +import { Tiktoken } from "./core"; +import { never } from "./utils"; export function getEncoding( encoding: TiktokenEncoding, @@ -264,4 +84,5 @@ export function encodingForModel( } } +export { Tiktoken, TiktokenBPE } from "./core"; export { TiktokenModel, TiktokenEncoding } from "./ranks/ranks"; diff --git a/js/src/utils.ts b/js/src/utils.ts new file mode 100644 index 00000000..b7615d12 --- /dev/null +++ b/js/src/utils.ts @@ -0,0 +1,3 @@ +export function never(message: string, _: never) { + throw new Error(message); +} diff --git a/yarn.lock b/yarn.lock index 5b85a984..f9816538 100644 --- a/yarn.lock +++ b/yarn.lock @@ -259,111 +259,258 @@ resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.17.18.tgz#4aa8d8afcffb4458736ca9b32baa97d7cb5861ea" integrity sha512-/iq0aK0eeHgSC3z55ucMAHO05OIqmQehiGay8eP5l/5l+iEr4EIbh4/MI8xD9qRFjqzgkc0JkX0LculNC9mXBw== +"@esbuild/android-arm64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.17.19.tgz#bafb75234a5d3d1b690e7c2956a599345e84a2fd" + integrity sha512-KBMWvEZooR7+kzY0BtbTQn0OAYY7CsiydT63pVEaPtVYF0hXbUaOyZog37DKxK7NF3XacBJOpYT4adIJh+avxA== + "@esbuild/android-arm@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.17.18.tgz#74a7e95af4ee212ebc9db9baa87c06a594f2a427" integrity sha512-EmwL+vUBZJ7mhFCs5lA4ZimpUH3WMAoqvOIYhVQwdIgSpHC8ImHdsRyhHAVxpDYUSm0lWvd63z0XH1IlImS2Qw== +"@esbuild/android-arm@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.17.19.tgz#5898f7832c2298bc7d0ab53701c57beb74d78b4d" + integrity sha512-rIKddzqhmav7MSmoFCmDIb6e2W57geRsM94gV2l38fzhXMwq7hZoClug9USI2pFRGL06f4IOPHHpFNOkWieR8A== + "@esbuild/android-x64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.17.18.tgz#1dcd13f201997c9fe0b204189d3a0da4eb4eb9b6" integrity sha512-x+0efYNBF3NPW2Xc5bFOSFW7tTXdAcpfEg2nXmxegm4mJuVeS+i109m/7HMiOQ6M12aVGGFlqJX3RhNdYM2lWg== +"@esbuild/android-x64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.17.19.tgz#658368ef92067866d95fb268719f98f363d13ae1" + integrity sha512-uUTTc4xGNDT7YSArp/zbtmbhO0uEEK9/ETW29Wk1thYUJBz3IVnvgEiEwEa9IeLyvnpKrWK64Utw2bgUmDveww== + "@esbuild/darwin-arm64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.17.18.tgz#444f3b961d4da7a89eb9bd35cfa4415141537c2a" integrity sha512-6tY+djEAdF48M1ONWnQb1C+6LiXrKjmqjzPNPWXhu/GzOHTHX2nh8Mo2ZAmBFg0kIodHhciEgUBtcYCAIjGbjQ== +"@esbuild/darwin-arm64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.17.19.tgz#584c34c5991b95d4d48d333300b1a4e2ff7be276" + integrity sha512-80wEoCfF/hFKM6WE1FyBHc9SfUblloAWx6FJkFWTWiCoht9Mc0ARGEM47e67W9rI09YoUxJL68WHfDRYEAvOhg== + "@esbuild/darwin-x64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.17.18.tgz#a6da308d0ac8a498c54d62e0b2bfb7119b22d315" integrity sha512-Qq84ykvLvya3dO49wVC9FFCNUfSrQJLbxhoQk/TE1r6MjHo3sFF2tlJCwMjhkBVq3/ahUisj7+EpRSz0/+8+9A== +"@esbuild/darwin-x64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.17.19.tgz#7751d236dfe6ce136cce343dce69f52d76b7f6cb" + integrity sha512-IJM4JJsLhRYr9xdtLytPLSH9k/oxR3boaUIYiHkAawtwNOXKE8KoU8tMvryogdcT8AU+Bflmh81Xn6Q0vTZbQw== + "@esbuild/freebsd-arm64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.17.18.tgz#b83122bb468889399d0d63475d5aea8d6829c2c2" integrity sha512-fw/ZfxfAzuHfaQeMDhbzxp9mc+mHn1Y94VDHFHjGvt2Uxl10mT4CDavHm+/L9KG441t1QdABqkVYwakMUeyLRA== +"@esbuild/freebsd-arm64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.17.19.tgz#cacd171665dd1d500f45c167d50c6b7e539d5fd2" + integrity sha512-pBwbc7DufluUeGdjSU5Si+P3SoMF5DQ/F/UmTSb8HXO80ZEAJmrykPyzo1IfNbAoaqw48YRpv8shwd1NoI0jcQ== + "@esbuild/freebsd-x64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.17.18.tgz#af59e0e03fcf7f221b34d4c5ab14094862c9c864" integrity sha512-FQFbRtTaEi8ZBi/A6kxOC0V0E9B/97vPdYjY9NdawyLd4Qk5VD5g2pbWN2VR1c0xhzcJm74HWpObPszWC+qTew== +"@esbuild/freebsd-x64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.17.19.tgz#0769456eee2a08b8d925d7c00b79e861cb3162e4" + integrity sha512-4lu+n8Wk0XlajEhbEffdy2xy53dpR06SlzvhGByyg36qJw6Kpfk7cp45DR/62aPH9mtJRmIyrXAS5UWBrJT6TQ== + "@esbuild/linux-arm64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.17.18.tgz#8551d72ba540c5bce4bab274a81c14ed01eafdcf" integrity sha512-R7pZvQZFOY2sxUG8P6A21eq6q+eBv7JPQYIybHVf1XkQYC+lT7nDBdC7wWKTrbvMXKRaGudp/dzZCwL/863mZQ== +"@esbuild/linux-arm64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.17.19.tgz#38e162ecb723862c6be1c27d6389f48960b68edb" + integrity sha512-ct1Tg3WGwd3P+oZYqic+YZF4snNl2bsnMKRkb3ozHmnM0dGWuxcPTTntAF6bOP0Sp4x0PjSF+4uHQ1xvxfRKqg== + "@esbuild/linux-arm@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.17.18.tgz#e09e76e526df4f665d4d2720d28ff87d15cdf639" integrity sha512-jW+UCM40LzHcouIaqv3e/oRs0JM76JfhHjCavPxMUti7VAPh8CaGSlS7cmyrdpzSk7A+8f0hiedHqr/LMnfijg== +"@esbuild/linux-arm@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.17.19.tgz#1a2cd399c50040184a805174a6d89097d9d1559a" + integrity sha512-cdmT3KxjlOQ/gZ2cjfrQOtmhG4HJs6hhvm3mWSRDPtZ/lP5oe8FWceS10JaSJC13GBd4eH/haHnqf7hhGNLerA== + "@esbuild/linux-ia32@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.17.18.tgz#47878860ce4fe73a36fd8627f5647bcbbef38ba4" integrity sha512-ygIMc3I7wxgXIxk6j3V00VlABIjq260i967Cp9BNAk5pOOpIXmd1RFQJQX9Io7KRsthDrQYrtcx7QCof4o3ZoQ== +"@esbuild/linux-ia32@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.17.19.tgz#e28c25266b036ce1cabca3c30155222841dc035a" + integrity sha512-w4IRhSy1VbsNxHRQpeGCHEmibqdTUx61Vc38APcsRbuVgK0OPEnQ0YD39Brymn96mOx48Y2laBQGqgZ0j9w6SQ== + "@esbuild/linux-loong64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.17.18.tgz#3f8fbf5267556fc387d20b2e708ce115de5c967a" integrity sha512-bvPG+MyFs5ZlwYclCG1D744oHk1Pv7j8psF5TfYx7otCVmcJsEXgFEhQkbhNW8otDHL1a2KDINW20cfCgnzgMQ== +"@esbuild/linux-loong64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.17.19.tgz#0f887b8bb3f90658d1a0117283e55dbd4c9dcf72" + integrity sha512-2iAngUbBPMq439a+z//gE+9WBldoMp1s5GWsUSgqHLzLJ9WoZLZhpwWuym0u0u/4XmZ3gpHmzV84PonE+9IIdQ== + "@esbuild/linux-mips64el@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.17.18.tgz#9d896d8f3c75f6c226cbeb840127462e37738226" integrity sha512-oVqckATOAGuiUOa6wr8TXaVPSa+6IwVJrGidmNZS1cZVx0HqkTMkqFGD2HIx9H1RvOwFeWYdaYbdY6B89KUMxA== +"@esbuild/linux-mips64el@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.17.19.tgz#f5d2a0b8047ea9a5d9f592a178ea054053a70289" + integrity sha512-LKJltc4LVdMKHsrFe4MGNPp0hqDFA1Wpt3jE1gEyM3nKUvOiO//9PheZZHfYRfYl6AwdTH4aTcXSqBerX0ml4A== + "@esbuild/linux-ppc64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.17.18.tgz#3d9deb60b2d32c9985bdc3e3be090d30b7472783" integrity sha512-3dLlQO+b/LnQNxgH4l9rqa2/IwRJVN9u/bK63FhOPB4xqiRqlQAU0qDU3JJuf0BmaH0yytTBdoSBHrb2jqc5qQ== +"@esbuild/linux-ppc64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.17.19.tgz#876590e3acbd9fa7f57a2c7d86f83717dbbac8c7" + integrity sha512-/c/DGybs95WXNS8y3Ti/ytqETiW7EU44MEKuCAcpPto3YjQbyK3IQVKfF6nbghD7EcLUGl0NbiL5Rt5DMhn5tg== + "@esbuild/linux-riscv64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.17.18.tgz#8a943cf13fd24ff7ed58aefb940ef178f93386bc" integrity sha512-/x7leOyDPjZV3TcsdfrSI107zItVnsX1q2nho7hbbQoKnmoeUWjs+08rKKt4AUXju7+3aRZSsKrJtaRmsdL1xA== +"@esbuild/linux-riscv64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.17.19.tgz#7f49373df463cd9f41dc34f9b2262d771688bf09" + integrity sha512-FC3nUAWhvFoutlhAkgHf8f5HwFWUL6bYdvLc/TTuxKlvLi3+pPzdZiFKSWz/PF30TB1K19SuCxDTI5KcqASJqA== + "@esbuild/linux-s390x@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.17.18.tgz#66cb01f4a06423e5496facabdce4f7cae7cb80e5" integrity sha512-cX0I8Q9xQkL/6F5zWdYmVf5JSQt+ZfZD2bJudZrWD+4mnUvoZ3TDDXtDX2mUaq6upMFv9FlfIh4Gfun0tbGzuw== +"@esbuild/linux-s390x@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.17.19.tgz#e2afd1afcaf63afe2c7d9ceacd28ec57c77f8829" + integrity sha512-IbFsFbxMWLuKEbH+7sTkKzL6NJmG2vRyy6K7JJo55w+8xDk7RElYn6xvXtDW8HCfoKBFK69f3pgBJSUSQPr+4Q== + "@esbuild/linux-x64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.17.18.tgz#23c26050c6c5d1359c7b774823adc32b3883b6c9" integrity sha512-66RmRsPlYy4jFl0vG80GcNRdirx4nVWAzJmXkevgphP1qf4dsLQCpSKGM3DUQCojwU1hnepI63gNZdrr02wHUA== +"@esbuild/linux-x64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.17.19.tgz#8a0e9738b1635f0c53389e515ae83826dec22aa4" + integrity sha512-68ngA9lg2H6zkZcyp22tsVt38mlhWde8l3eJLWkyLrp4HwMUr3c1s/M2t7+kHIhvMjglIBrFpncX1SzMckomGw== + "@esbuild/netbsd-x64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.17.18.tgz#789a203d3115a52633ff6504f8cbf757f15e703b" integrity sha512-95IRY7mI2yrkLlTLb1gpDxdC5WLC5mZDi+kA9dmM5XAGxCME0F8i4bYH4jZreaJ6lIZ0B8hTrweqG1fUyW7jbg== +"@esbuild/netbsd-x64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.17.19.tgz#c29fb2453c6b7ddef9a35e2c18b37bda1ae5c462" + integrity sha512-CwFq42rXCR8TYIjIfpXCbRX0rp1jo6cPIUPSaWwzbVI4aOfX96OXY8M6KNmtPcg7QjYeDmN+DD0Wp3LaBOLf4Q== + "@esbuild/openbsd-x64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.17.18.tgz#d7b998a30878f8da40617a10af423f56f12a5e90" integrity sha512-WevVOgcng+8hSZ4Q3BKL3n1xTv5H6Nb53cBrtzzEjDbbnOmucEVcZeGCsCOi9bAOcDYEeBZbD2SJNBxlfP3qiA== +"@esbuild/openbsd-x64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.17.19.tgz#95e75a391403cb10297280d524d66ce04c920691" + integrity sha512-cnq5brJYrSZ2CF6c35eCmviIN3k3RczmHz8eYaVlNasVqsNY+JKohZU5MKmaOI+KkllCdzOKKdPs762VCPC20g== + "@esbuild/sunos-x64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.17.18.tgz#ecad0736aa7dae07901ba273db9ef3d3e93df31f" integrity sha512-Rzf4QfQagnwhQXVBS3BYUlxmEbcV7MY+BH5vfDZekU5eYpcffHSyjU8T0xucKVuOcdCsMo+Ur5wmgQJH2GfNrg== +"@esbuild/sunos-x64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.17.19.tgz#722eaf057b83c2575937d3ffe5aeb16540da7273" + integrity sha512-vCRT7yP3zX+bKWFeP/zdS6SqdWB8OIpaRq/mbXQxTGHnIxspRtigpkUcDMlSCOejlHowLqII7K2JKevwyRP2rg== + "@esbuild/win32-arm64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.17.18.tgz#58dfc177da30acf956252d7c8ae9e54e424887c4" integrity sha512-Kb3Ko/KKaWhjeAm2YoT/cNZaHaD1Yk/pa3FTsmqo9uFh1D1Rfco7BBLIPdDOozrObj2sahslFuAQGvWbgWldAg== +"@esbuild/win32-arm64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.17.19.tgz#9aa9dc074399288bdcdd283443e9aeb6b9552b6f" + integrity sha512-yYx+8jwowUstVdorcMdNlzklLYhPxjniHWFKgRqH7IFlUEa0Umu3KuYplf1HUZZ422e3NU9F4LGb+4O0Kdcaag== + "@esbuild/win32-ia32@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.17.18.tgz#340f6163172b5272b5ae60ec12c312485f69232b" integrity sha512-0/xUMIdkVHwkvxfbd5+lfG7mHOf2FRrxNbPiKWg9C4fFrB8H0guClmaM3BFiRUYrznVoyxTIyC/Ou2B7QQSwmw== +"@esbuild/win32-ia32@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.17.19.tgz#95ad43c62ad62485e210f6299c7b2571e48d2b03" + integrity sha512-eggDKanJszUtCdlVs0RB+h35wNlb5v4TWEkq4vZcmVt5u/HiDZrTXe2bWFQUez3RgNHwx/x4sk5++4NSSicKkw== + "@esbuild/win32-x64@0.17.18": version "0.17.18" resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.17.18.tgz#3a8e57153905308db357fd02f57c180ee3a0a1fa" integrity sha512-qU25Ma1I3NqTSHJUOKi9sAH1/Mzuvlke0ioMJRthLXKm7JiSKVwFghlGbDLOO2sARECGhja4xYfRAZNPAkooYg== +"@esbuild/win32-x64@0.17.19": + version "0.17.19" + resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.17.19.tgz#8cfaf2ff603e9aabb910e9c0558c26cf32744061" + integrity sha512-lAhycmKnVOuRYNtRtatQR1LPQf2oYCkRGkSFnseDAKPl8lu5SOsK/e1sXe5a0Pc5kHIHe6P2I/ilntNv2xf3cA== + +"@jridgewell/gen-mapping@^0.3.2": + version "0.3.3" + resolved "https://registry.yarnpkg.com/@jridgewell/gen-mapping/-/gen-mapping-0.3.3.tgz#7e02e6eb5df901aaedb08514203b096614024098" + integrity sha512-HLhSWOLRi875zjjMG/r+Nv0oCW8umGb0BgEhyX3dDX3egwZtB8PqLnjz3yedt8R5StBrzcg4aBpnh8UA9D1BoQ== + dependencies: + "@jridgewell/set-array" "^1.0.1" + "@jridgewell/sourcemap-codec" "^1.4.10" + "@jridgewell/trace-mapping" "^0.3.9" + +"@jridgewell/resolve-uri@3.1.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@jridgewell/resolve-uri/-/resolve-uri-3.1.0.tgz#2203b118c157721addfe69d47b70465463066d78" + integrity sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w== + +"@jridgewell/set-array@^1.0.1": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@jridgewell/set-array/-/set-array-1.1.2.tgz#7c6cf998d6d20b914c0a55a91ae928ff25965e72" + integrity sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw== + +"@jridgewell/sourcemap-codec@1.4.14": + version "1.4.14" + resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz#add4c98d341472a289190b424efbdb096991bb24" + integrity sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw== + +"@jridgewell/sourcemap-codec@^1.4.10": + version "1.4.15" + resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz#d7c6e6755c78567a951e04ab52ef0fd26de59f32" + integrity sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg== + +"@jridgewell/trace-mapping@^0.3.9": + version "0.3.18" + resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.18.tgz#25783b2086daf6ff1dcb53c9249ae480e4dd4cd6" + integrity sha512-w+niJYzMHdd7USdiH2U6869nqhD2nbfZXND5Yp93qIbEmnDNk7PD48o+YchRVpzMU7M6jVCbenTR7PA1FLQ9pA== + dependencies: + "@jridgewell/resolve-uri" "3.1.0" + "@jridgewell/sourcemap-codec" "1.4.14" + "@manypkg/find-root@^1.1.0": version "1.1.0" resolved "https://registry.yarnpkg.com/@manypkg/find-root/-/find-root-1.1.0.tgz#a62d8ed1cd7e7d4c11d9d52a8397460b5d4ad29f" @@ -551,6 +698,19 @@ ansi-styles@^6.0.0: resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-6.2.1.tgz#0e62320cf99c21afff3b3012192546aacbfb05c5" integrity sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug== +any-promise@^1.0.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/any-promise/-/any-promise-1.3.0.tgz#abc6afeedcea52e809cdc0376aed3ce39635d17f" + integrity sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A== + +anymatch@~3.1.2: + version "3.1.3" + resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.3.tgz#790c58b19ba1720a84205b57c618d5ad8524973e" + integrity sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw== + dependencies: + normalize-path "^3.0.0" + picomatch "^2.0.4" + argparse@^1.0.7: version "1.0.10" resolved "https://registry.yarnpkg.com/argparse/-/argparse-1.0.10.tgz#bcd6791ea5ae09725e17e5ad988134cd40b3d911" @@ -613,6 +773,11 @@ better-path-resolve@1.0.0: dependencies: is-windows "^1.0.0" +binary-extensions@^2.0.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.2.0.tgz#75f502eeaf9ffde42fc98829645be4ea76bd9e2d" + integrity sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA== + brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" @@ -628,7 +793,7 @@ brace-expansion@^2.0.1: dependencies: balanced-match "^1.0.0" -braces@^3.0.2: +braces@^3.0.2, braces@~3.0.2: version "3.0.2" resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107" integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A== @@ -647,7 +812,14 @@ buffer-from@^1.0.0: resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5" integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ== -cac@^6.7.14: +bundle-require@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/bundle-require/-/bundle-require-4.0.1.tgz#2cc1ad76428043d15e0e7f30990ee3d5404aa2e3" + integrity sha512-9NQkRHlNdNpDBGmLpngF3EFDcwodhMUuLz9PaWYciVcQF9SE4LFjM2DB/xV1Li5JiuDMv7ZUWuC3rGbqR0MAXQ== + dependencies: + load-tsconfig "^0.2.3" + +cac@^6.7.12, cac@^6.7.14: version "6.7.14" resolved "https://registry.yarnpkg.com/cac/-/cac-6.7.14.tgz#804e1e6f506ee363cb0e3ccbb09cad5dd9870959" integrity sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ== @@ -714,6 +886,21 @@ check-error@^1.0.2: resolved "https://registry.yarnpkg.com/check-error/-/check-error-1.0.2.tgz#574d312edd88bb5dd8912e9286dd6c0aed4aac82" integrity sha512-BrgHpW9NURQgzoNyjfq0Wu6VFO6D7IZEmJNdtgNqpzGG8RuNFHt2jQxWlAs4HMe119chBnv+34syEZtc6IhLtA== +chokidar@^3.5.1: + version "3.5.3" + resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.3.tgz#1cf37c8707b932bd1af1ae22c0432e2acd1903bd" + integrity sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw== + dependencies: + anymatch "~3.1.2" + braces "~3.0.2" + glob-parent "~5.1.2" + is-binary-path "~2.1.0" + is-glob "~4.0.1" + normalize-path "~3.0.0" + readdirp "~3.6.0" + optionalDependencies: + fsevents "~2.3.2" + ci-info@^3.1.0, ci-info@^3.2.0: version "3.8.0" resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.8.0.tgz#81408265a5380c929f0bc665d62256628ce9ef91" @@ -779,6 +966,11 @@ color-name@~1.1.4: resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== +commander@^4.0.0: + version "4.1.1" + resolved "https://registry.yarnpkg.com/commander/-/commander-4.1.1.tgz#9fd602bd936294e9e9ef46a3f4d6964044b18068" + integrity sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA== + concat-map@0.0.1: version "0.0.1" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" @@ -804,6 +996,15 @@ cross-spawn@^6.0.5: shebang-command "^1.2.0" which "^1.2.9" +cross-spawn@^7.0.3: + version "7.0.3" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" + integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== + dependencies: + path-key "^3.1.0" + shebang-command "^2.0.0" + which "^2.0.1" + csv-generate@^3.4.3: version "3.4.3" resolved "https://registry.yarnpkg.com/csv-generate/-/csv-generate-3.4.3.tgz#bc42d943b45aea52afa896874291da4b9108ffff" @@ -829,7 +1030,7 @@ csv@^5.5.3: csv-stringify "^5.6.5" stream-transform "^2.1.3" -debug@^4.3.4: +debug@^4.3.1, debug@^4.3.4: version "4.3.4" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== @@ -1010,6 +1211,34 @@ esbuild@^0.17.5, esbuild@~0.17.6: "@esbuild/win32-ia32" "0.17.18" "@esbuild/win32-x64" "0.17.18" +esbuild@^0.17.6: + version "0.17.19" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.17.19.tgz#087a727e98299f0462a3d0bcdd9cd7ff100bd955" + integrity sha512-XQ0jAPFkK/u3LcVRcvVHQcTIqD6E2H1fvZMA5dQPSOWb3suUbWbfbRf94pjc0bNzRYLfIrDRQXr7X+LHIm5oHw== + optionalDependencies: + "@esbuild/android-arm" "0.17.19" + "@esbuild/android-arm64" "0.17.19" + "@esbuild/android-x64" "0.17.19" + "@esbuild/darwin-arm64" "0.17.19" + "@esbuild/darwin-x64" "0.17.19" + "@esbuild/freebsd-arm64" "0.17.19" + "@esbuild/freebsd-x64" "0.17.19" + "@esbuild/linux-arm" "0.17.19" + "@esbuild/linux-arm64" "0.17.19" + "@esbuild/linux-ia32" "0.17.19" + "@esbuild/linux-loong64" "0.17.19" + "@esbuild/linux-mips64el" "0.17.19" + "@esbuild/linux-ppc64" "0.17.19" + "@esbuild/linux-riscv64" "0.17.19" + "@esbuild/linux-s390x" "0.17.19" + "@esbuild/linux-x64" "0.17.19" + "@esbuild/netbsd-x64" "0.17.19" + "@esbuild/openbsd-x64" "0.17.19" + "@esbuild/sunos-x64" "0.17.19" + "@esbuild/win32-arm64" "0.17.19" + "@esbuild/win32-ia32" "0.17.19" + "@esbuild/win32-x64" "0.17.19" + escalade@^3.1.1: version "3.1.1" resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.1.tgz#d8cfdc7000965c5a0174b4a82eaa5c0552742e40" @@ -1025,6 +1254,21 @@ esprima@^4.0.0: resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71" integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A== +execa@^5.0.0: + version "5.1.1" + resolved "https://registry.yarnpkg.com/execa/-/execa-5.1.1.tgz#f80ad9cbf4298f7bd1d4c9555c21e93741c411dd" + integrity sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg== + dependencies: + cross-spawn "^7.0.3" + get-stream "^6.0.0" + human-signals "^2.1.0" + is-stream "^2.0.0" + merge-stream "^2.0.0" + npm-run-path "^4.0.1" + onetime "^5.1.2" + signal-exit "^3.0.3" + strip-final-newline "^2.0.0" + extendable-error@^0.1.5: version "0.1.7" resolved "https://registry.yarnpkg.com/extendable-error/-/extendable-error-0.1.7.tgz#60b9adf206264ac920058a7395685ae4670c2b96" @@ -1113,6 +1357,11 @@ fs-extra@^8.1.0: jsonfile "^4.0.0" universalify "^0.1.0" +fs.realpath@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" + integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw== + fsevents@~2.3.2: version "2.3.2" resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a" @@ -1157,6 +1406,11 @@ get-intrinsic@^1.0.2, get-intrinsic@^1.1.1, get-intrinsic@^1.1.3, get-intrinsic@ has "^1.0.3" has-symbols "^1.0.3" +get-stream@^6.0.0: + version "6.0.1" + resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-6.0.1.tgz#a262d8eef67aced57c2852ad6167526a43cbf7b7" + integrity sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg== + get-symbol-description@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/get-symbol-description/-/get-symbol-description-1.0.0.tgz#7fdb81c900101fbd564dd5f1a30af5aadc1e58d6" @@ -1170,13 +1424,25 @@ get-tsconfig@^4.4.0: resolved "https://registry.yarnpkg.com/get-tsconfig/-/get-tsconfig-4.5.0.tgz#6d52d1c7b299bd3ee9cd7638561653399ac77b0f" integrity sha512-MjhiaIWCJ1sAU4pIQ5i5OfOuHHxVo1oYeNsWTON7jxYkod8pHocXeh+SSbmu5OZZZK73B6cbJ2XADzXehLyovQ== -glob-parent@^5.1.2: +glob-parent@^5.1.2, glob-parent@~5.1.2: version "5.1.2" resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4" integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow== dependencies: is-glob "^4.0.1" +glob@7.1.6: + version "7.1.6" + resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6" + integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA== + dependencies: + fs.realpath "^1.0.0" + inflight "^1.0.4" + inherits "2" + minimatch "^3.0.4" + once "^1.3.0" + path-is-absolute "^1.0.0" + globalthis@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/globalthis/-/globalthis-1.0.3.tgz#5852882a52b80dc301b0660273e1ed082f0b6ccf" @@ -1184,7 +1450,7 @@ globalthis@^1.0.3: dependencies: define-properties "^1.1.3" -globby@^11.0.0: +globby@^11.0.0, globby@^11.0.3: version "11.1.0" resolved "https://registry.yarnpkg.com/globby/-/globby-11.1.0.tgz#bd4be98bb042f83d796f7e3811991fbe82a0d34b" integrity sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g== @@ -1274,6 +1540,11 @@ human-id@^1.0.2: resolved "https://registry.yarnpkg.com/human-id/-/human-id-1.0.2.tgz#e654d4b2b0d8b07e45da9f6020d8af17ec0a5df3" integrity sha512-UNopramDEhHJD+VR+ehk8rOslwSfByxPIZyJRfV739NDhN5LF1fa1MqnzKm2lGTQRjNrjK19Q5fhkgIfjlVUKw== +human-signals@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0" + integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw== + iconv-lite@^0.4.24: version "0.4.24" resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b" @@ -1291,6 +1562,19 @@ indent-string@^4.0.0: resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251" integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg== +inflight@^1.0.4: + version "1.0.6" + resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9" + integrity sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA== + dependencies: + once "^1.3.0" + wrappy "1" + +inherits@2: + version "2.0.4" + resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" + integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== + internal-slot@^1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.5.tgz#f2a2ee21f668f8627a4667f309dc0f4fb6674986" @@ -1321,6 +1605,13 @@ is-bigint@^1.0.1: dependencies: has-bigints "^1.0.1" +is-binary-path@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/is-binary-path/-/is-binary-path-2.1.0.tgz#ea1f7f3b80f064236e83470f86c09c254fb45b09" + integrity sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw== + dependencies: + binary-extensions "^2.0.0" + is-boolean-object@^1.1.0: version "1.1.2" resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.2.tgz#5c6dc200246dd9321ae4b885a114bb1f75f63719" @@ -1370,7 +1661,7 @@ is-fullwidth-code-point@^4.0.0: resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-4.0.0.tgz#fae3167c729e7463f8461ce512b080a49268aa88" integrity sha512-O4L094N2/dZ7xqVdrXhh9r1KODPJpFms8B5sGdJLPy664AgvXsreZUyCQQNItZRDlYug4xStLjNp/sz3HvBowQ== -is-glob@^4.0.1: +is-glob@^4.0.1, is-glob@~4.0.1: version "4.0.3" resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" integrity sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg== @@ -1414,6 +1705,11 @@ is-shared-array-buffer@^1.0.2: dependencies: call-bind "^1.0.2" +is-stream@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.1.tgz#fac1e3d53b97ad5a9d0ae9cef2389f5810a5c077" + integrity sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg== + is-string@^1.0.5, is-string@^1.0.7: version "1.0.7" resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.7.tgz#0dd12bf2006f255bb58f695110eff7491eebc0fd" @@ -1463,6 +1759,11 @@ isexe@^2.0.0: resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw== +joycon@^3.0.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/joycon/-/joycon-3.1.1.tgz#bce8596d6ae808f8b68168f5fc69280996894f03" + integrity sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw== + js-tokens@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499" @@ -1508,6 +1809,11 @@ kleur@^4.1.5: resolved "https://registry.yarnpkg.com/kleur/-/kleur-4.1.5.tgz#95106101795f7050c6c650f350c683febddb1780" integrity sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ== +lilconfig@^2.0.5: + version "2.1.0" + resolved "https://registry.yarnpkg.com/lilconfig/-/lilconfig-2.1.0.tgz#78e23ac89ebb7e1bfbf25b18043de756548e7f52" + integrity sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ== + lines-and-columns@^1.1.6: version "1.2.4" resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" @@ -1523,6 +1829,11 @@ load-json-file@^4.0.0: pify "^3.0.0" strip-bom "^3.0.0" +load-tsconfig@^0.2.3: + version "0.2.5" + resolved "https://registry.yarnpkg.com/load-tsconfig/-/load-tsconfig-0.2.5.tgz#453b8cd8961bfb912dea77eb6c168fe8cca3d3a1" + integrity sha512-IXO6OCs9yg8tMKzfPZ1YmheJbZCiEsnBdcB03l0OcfK9prKnJb96siuHCr5Fl37/yo9DnKU+TLpxzTUspw9shg== + load-yaml-file@^0.2.0: version "0.2.0" resolved "https://registry.yarnpkg.com/load-yaml-file/-/load-yaml-file-0.2.0.tgz#af854edaf2bea89346c07549122753c07372f64d" @@ -1552,6 +1863,11 @@ locate-path@^6.0.0: dependencies: p-locate "^5.0.0" +lodash.sortby@^4.7.0: + version "4.7.0" + resolved "https://registry.yarnpkg.com/lodash.sortby/-/lodash.sortby-4.7.0.tgz#edd14c824e2cc9c1e0b0a1b42bb5210516a42438" + integrity sha512-HDWXG8isMntAyRF5vZ7xKuEvOhT4AhlRt/3czTSjvGUxjYCBVRQY48ViDHyfYz9VIoBkW4TMGQNapx+l3RUwdA== + lodash.startcase@^4.4.0: version "4.4.0" resolved "https://registry.yarnpkg.com/lodash.startcase/-/lodash.startcase-4.4.0.tgz#9436e34ed26093ed7ffae1936144350915d9add8" @@ -1604,6 +1920,11 @@ meow@^6.0.0: type-fest "^0.13.1" yargs-parser "^18.1.3" +merge-stream@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60" + integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w== + merge2@^1.3.0, merge2@^1.4.1: version "1.4.1" resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae" @@ -1617,6 +1938,11 @@ micromatch@^4.0.2, micromatch@^4.0.4: braces "^3.0.2" picomatch "^2.3.1" +mimic-fn@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b" + integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg== + min-indent@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869" @@ -1670,6 +1996,15 @@ ms@2.1.2: resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== +mz@^2.7.0: + version "2.7.0" + resolved "https://registry.yarnpkg.com/mz/-/mz-2.7.0.tgz#95008057a56cafadc2bc63dde7f9ff6955948e32" + integrity sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q== + dependencies: + any-promise "^1.0.0" + object-assign "^4.0.1" + thenify-all "^1.0.0" + nanoid@^3.3.6: version "3.3.6" resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.6.tgz#443380c856d6e9f9824267d960b4236ad583ea4c" @@ -1690,6 +2025,11 @@ normalize-package-data@^2.3.2, normalize-package-data@^2.5.0: semver "2 || 3 || 4 || 5" validate-npm-package-license "^3.0.1" +normalize-path@^3.0.0, normalize-path@~3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65" + integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA== + npm-run-all@^4.1.5: version "4.1.5" resolved "https://registry.yarnpkg.com/npm-run-all/-/npm-run-all-4.1.5.tgz#04476202a15ee0e2e214080861bff12a51d98fba" @@ -1705,6 +2045,18 @@ npm-run-all@^4.1.5: shell-quote "^1.6.1" string.prototype.padend "^3.0.0" +npm-run-path@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea" + integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw== + dependencies: + path-key "^3.0.0" + +object-assign@^4.0.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863" + integrity sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg== + object-inspect@^1.12.3, object-inspect@^1.9.0: version "1.12.3" resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.12.3.tgz#ba62dffd67ee256c8c086dfae69e016cd1f198b9" @@ -1725,6 +2077,20 @@ object.assign@^4.1.4: has-symbols "^1.0.3" object-keys "^1.1.1" +once@^1.3.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1" + integrity sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w== + dependencies: + wrappy "1" + +onetime@^5.1.2: + version "5.1.2" + resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e" + integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg== + dependencies: + mimic-fn "^2.1.0" + os-tmpdir@~1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz#bbe67406c79aa85c5cfec766fe5734555dfa1274" @@ -1820,11 +2186,21 @@ path-exists@^4.0.0: resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3" integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w== +path-is-absolute@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f" + integrity sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg== + path-key@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40" integrity sha512-fEHGKCSmUSDPv4uoj8AlD+joPlq3peND+HRYyxFz4KPw4z926S/b8rIuFs2FYJg3BwsxJf6A9/3eIdLaYC+9Dw== +path-key@^3.0.0, path-key@^3.1.0: + version "3.1.1" + resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375" + integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== + path-parse@^1.0.7: version "1.0.7" resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" @@ -1857,7 +2233,7 @@ picocolors@^1.0.0: resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c" integrity sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ== -picomatch@^2.3.1: +picomatch@^2.0.4, picomatch@^2.2.1, picomatch@^2.3.1: version "2.3.1" resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA== @@ -1877,6 +2253,11 @@ pify@^4.0.1: resolved "https://registry.yarnpkg.com/pify/-/pify-4.0.1.tgz#4b2cd25c50d598735c50292224fd8c6df41e3231" integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g== +pirates@^4.0.1: + version "4.0.5" + resolved "https://registry.yarnpkg.com/pirates/-/pirates-4.0.5.tgz#feec352ea5c3268fb23a37c702ab1699f35a5f3b" + integrity sha512-8V9+HQPupnaXMA23c5hvl69zXvTwTzyAYasnkb0Tts4XvO4CliqONMOnvlq26rkhLC3nWDFBJf73LU1e1VZLaQ== + pkg-dir@^4.2.0: version "4.2.0" resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3" @@ -1893,6 +2274,14 @@ pkg-types@^1.0.3: mlly "^1.2.0" pathe "^1.1.0" +postcss-load-config@^3.0.1: + version "3.1.4" + resolved "https://registry.yarnpkg.com/postcss-load-config/-/postcss-load-config-3.1.4.tgz#1ab2571faf84bb078877e1d07905eabe9ebda855" + integrity sha512-6DiM4E7v4coTE4uzA8U//WhtPwyhiim3eyjEMFCnUpzbrkK9wJHgKDT2mR+HbtSrd/NubVaYTOpSpjUl8NQeRg== + dependencies: + lilconfig "^2.0.5" + yaml "^1.10.2" + postcss@^8.4.23: version "8.4.23" resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.4.23.tgz#df0aee9ac7c5e53e1075c24a3613496f9e6552ab" @@ -1931,6 +2320,11 @@ pseudomap@^1.0.2: resolved "https://registry.yarnpkg.com/pseudomap/-/pseudomap-1.0.2.tgz#f052a28da70e618917ef0a8ac34c1ae5a68286b3" integrity sha512-b/YwNhb8lk1Zz2+bXXpS/LK9OisiZZ1SNsSLxN1x2OXVEhW2Ckr/7mWE5vrC1ZTiJlD9g19jWszTmJsB+oEpFQ== +punycode@^2.1.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.0.tgz#f67fa67c94da8f4d0cfff981aee4118064199b8f" + integrity sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA== + queue-microtask@^1.2.2: version "1.2.3" resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243" @@ -1984,6 +2378,13 @@ read-yaml-file@^1.1.0: pify "^4.0.1" strip-bom "^3.0.0" +readdirp@~3.6.0: + version "3.6.0" + resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-3.6.0.tgz#74a370bd857116e245b29cc97340cd431a02a6c7" + integrity sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA== + dependencies: + picomatch "^2.2.1" + redent@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/redent/-/redent-3.0.0.tgz#e557b7998316bb53c9f1f56fa626352c6963059f" @@ -2035,6 +2436,13 @@ reusify@^1.0.4: resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76" integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw== +rollup@^3.2.5: + version "3.21.7" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.21.7.tgz#022ab47416712c93546f369abfb20da5fb8ed42e" + integrity sha512-KXPaEuR8FfUoK2uHwNjxTmJ18ApyvD6zJpYv9FOJSqLStmt6xOY84l1IjK2dSolQmoXknrhEFRaPRgOPdqCT5w== + optionalDependencies: + fsevents "~2.3.2" + rollup@^3.21.0: version "3.21.6" resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.21.6.tgz#f5649ccdf8fcc7729254faa457cbea9547eb86db" @@ -2080,11 +2488,23 @@ shebang-command@^1.2.0: dependencies: shebang-regex "^1.0.0" +shebang-command@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea" + integrity sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA== + dependencies: + shebang-regex "^3.0.0" + shebang-regex@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-1.0.0.tgz#da42f49740c0b42db2ca9728571cb190c98efea3" integrity sha512-wpoSFAxys6b2a2wHZ1XpDSgD7N9iVjg29Ph9uV/uaP9Ex/KXlkTZTeddxDPSYQpgvzKLGJke2UU0AzoGCjNIvQ== +shebang-regex@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172" + integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A== + shell-quote@^1.6.1: version "1.8.1" resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.8.1.tgz#6dbf4db75515ad5bac63b4f1894c3a154c766680" @@ -2104,7 +2524,7 @@ siginfo@^2.0.0: resolved "https://registry.yarnpkg.com/siginfo/-/siginfo-2.0.0.tgz#32e76c70b79724e3bb567cb9d543eb858ccfaf30" integrity sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g== -signal-exit@^3.0.2: +signal-exit@^3.0.2, signal-exit@^3.0.3: version "3.0.7" resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9" integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== @@ -2147,6 +2567,13 @@ source-map-support@^0.5.21: buffer-from "^1.0.0" source-map "^0.6.0" +source-map@0.8.0-beta.0: + version "0.8.0-beta.0" + resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.8.0-beta.0.tgz#d4c1bb42c3f7ee925f005927ba10709e0d1d1f11" + integrity sha512-2ymg6oRBpebeZi9UUNsgQ89bhx01TcTkmNTGnNO88imTmbSgy4nfujrgVEFKWpMTEGA11EDkTt7mqObTPdigIA== + dependencies: + whatwg-url "^7.0.0" + source-map@^0.6.0, source-map@^0.6.1: version "0.6.1" resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263" @@ -2281,6 +2708,11 @@ strip-bom@^3.0.0: resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-3.0.0.tgz#2334c18e9c759f7bdd56fdef7e9ae3d588e68ed3" integrity sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA== +strip-final-newline@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad" + integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA== + strip-indent@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-3.0.0.tgz#c32e1cee940b6b3432c771bc2c54bcce73cd3001" @@ -2295,6 +2727,19 @@ strip-literal@^1.0.0: dependencies: acorn "^8.8.2" +sucrase@^3.20.3: + version "3.32.0" + resolved "https://registry.yarnpkg.com/sucrase/-/sucrase-3.32.0.tgz#c4a95e0f1e18b6847127258a75cf360bc568d4a7" + integrity sha512-ydQOU34rpSyj2TGyz4D2p8rbktIOZ8QY9s+DGLvFU1i5pWJE8vkpruCjGCMHsdXwnD7JDcS+noSwM/a7zyNFDQ== + dependencies: + "@jridgewell/gen-mapping" "^0.3.2" + commander "^4.0.0" + glob "7.1.6" + lines-and-columns "^1.1.6" + mz "^2.7.0" + pirates "^4.0.1" + ts-interface-checker "^0.1.9" + supports-color@^5.3.0: version "5.5.0" resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f" @@ -2319,6 +2764,20 @@ term-size@^2.1.0: resolved "https://registry.yarnpkg.com/term-size/-/term-size-2.2.1.tgz#2a6a54840432c2fb6320fea0f415531e90189f54" integrity sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg== +thenify-all@^1.0.0: + version "1.6.0" + resolved "https://registry.yarnpkg.com/thenify-all/-/thenify-all-1.6.0.tgz#1a1918d402d8fc3f98fbf234db0bcc8cc10e9726" + integrity sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA== + dependencies: + thenify ">= 3.1.0 < 4" + +"thenify@>= 3.1.0 < 4": + version "3.3.1" + resolved "https://registry.yarnpkg.com/thenify/-/thenify-3.3.1.tgz#8932e686a4066038a016dd9e2ca46add9838a95f" + integrity sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw== + dependencies: + any-promise "^1.0.0" + tinybench@^2.3.1: version "2.5.0" resolved "https://registry.yarnpkg.com/tinybench/-/tinybench-2.5.0.tgz#4711c99bbf6f3e986f67eb722fed9cddb3a68ba5" @@ -2348,11 +2807,28 @@ to-regex-range@^5.0.1: dependencies: is-number "^7.0.0" +tr46@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/tr46/-/tr46-1.0.1.tgz#a8b13fd6bfd2489519674ccde55ba3693b706d09" + integrity sha512-dTpowEjclQ7Kgx5SdBkqRzVhERQXov8/l9Ft9dVM9fmg0W0KQSVaXX9T4i6twCPNtYiZM53lpSSUAwJbFPOHxA== + dependencies: + punycode "^2.1.0" + +tree-kill@^1.2.2: + version "1.2.2" + resolved "https://registry.yarnpkg.com/tree-kill/-/tree-kill-1.2.2.tgz#4ca09a9092c88b73a7cdc5e8a01b507b0790a0cc" + integrity sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A== + trim-newlines@^3.0.0: version "3.0.1" resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.1.tgz#260a5d962d8b752425b32f3a7db0dcacd176c144" integrity sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw== +ts-interface-checker@^0.1.9: + version "0.1.13" + resolved "https://registry.yarnpkg.com/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz#784fd3d679722bc103b1b4b8030bcddb5db2a699" + integrity sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA== + ts-morph@^17.0.1: version "17.0.1" resolved "https://registry.yarnpkg.com/ts-morph/-/ts-morph-17.0.1.tgz#d85df4fcf9a1fcda1b331d52c00655f381c932d1" @@ -2361,6 +2837,26 @@ ts-morph@^17.0.1: "@ts-morph/common" "~0.18.0" code-block-writer "^11.0.3" +tsup@^6.7.0: + version "6.7.0" + resolved "https://registry.yarnpkg.com/tsup/-/tsup-6.7.0.tgz#416f350f32a07b6ae86792ad7e52b0cafc566d64" + integrity sha512-L3o8hGkaHnu5TdJns+mCqFsDBo83bJ44rlK7e6VdanIvpea4ArPcU3swWGsLVbXak1PqQx/V+SSmFPujBK+zEQ== + dependencies: + bundle-require "^4.0.0" + cac "^6.7.12" + chokidar "^3.5.1" + debug "^4.3.1" + esbuild "^0.17.6" + execa "^5.0.0" + globby "^11.0.3" + joycon "^3.0.1" + postcss-load-config "^3.0.1" + resolve-from "^5.0.0" + rollup "^3.2.5" + source-map "0.8.0-beta.0" + sucrase "^3.20.3" + tree-kill "^1.2.2" + tsx@^3.12.3: version "3.12.7" resolved "https://registry.yarnpkg.com/tsx/-/tsx-3.12.7.tgz#b3b8b0fc79afc8260d1e14f9e995616c859a91e9" @@ -2551,6 +3047,20 @@ wcwidth@^1.0.1: dependencies: defaults "^1.0.3" +webidl-conversions@^4.0.2: + version "4.0.2" + resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-4.0.2.tgz#a855980b1f0b6b359ba1d5d9fb39ae941faa63ad" + integrity sha512-YQ+BmxuTgd6UXZW3+ICGfyqRyHXVlD5GtQr5+qjiNW7bF0cqrzX500HVXPBOvgXb5YnzDd+h0zqyv61KUD7+Sg== + +whatwg-url@^7.0.0: + version "7.1.0" + resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-7.1.0.tgz#c2c492f1eca612988efd3d2266be1b9fc6170d06" + integrity sha512-WUu7Rg1DroM7oQvGWfOiAK21n74Gg+T4elXEQYkOhtyLeWiJFoOGLXPKI/9gzIie9CtwVLm8wtw6YJdKyxSjeg== + dependencies: + lodash.sortby "^4.7.0" + tr46 "^1.0.1" + webidl-conversions "^4.0.2" + which-boxed-primitive@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6" @@ -2594,6 +3104,13 @@ which@^1.2.9: dependencies: isexe "^2.0.0" +which@^2.0.1: + version "2.0.2" + resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1" + integrity sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA== + dependencies: + isexe "^2.0.0" + why-is-node-running@^2.2.2: version "2.2.2" resolved "https://registry.yarnpkg.com/why-is-node-running/-/why-is-node-running-2.2.2.tgz#4185b2b4699117819e7154594271e7e344c9973e" @@ -2620,6 +3137,11 @@ wrap-ansi@^7.0.0: string-width "^4.1.0" strip-ansi "^6.0.0" +wrappy@1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" + integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ== + y18n@^4.0.0: version "4.0.3" resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.3.tgz#b5f259c82cd6e336921efd7bfd8bf560de9eeedf" @@ -2635,6 +3157,11 @@ yallist@^2.1.2: resolved "https://registry.yarnpkg.com/yallist/-/yallist-2.1.2.tgz#1c11f9218f076089a47dd512f93c6699a6a81d52" integrity sha512-ncTzHV7NvsQZkYe1DW7cbDLm0YpzHmZF5r/iyP3ZnQtMiJ+pjzisCiMNI+Sj+xQF5pXhSHxSB3uDbsBTzY/c2A== +yaml@^1.10.2: + version "1.10.2" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-1.10.2.tgz#2301c5ffbf12b467de8da2333a459e29e7920e4b" + integrity sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg== + yargs-parser@^18.1.2, yargs-parser@^18.1.3: version "18.1.3" resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0" From 2d18fcc4fd70788b1de0b5ff2d58f3ee15c02f41 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 15 May 2023 02:12:23 +0200 Subject: [PATCH 124/207] Expose model to encoding mapping, fix types --- .changeset/dull-experts-itch.md | 5 +++++ js/README.md | 22 ++++++++++++++++++++++ js/examples/dynamic.ts | 24 ++++++++++++++++++++---- js/examples/simple.ts | 5 +++++ js/package.json | 9 ++++++++- js/src/index.ts | 30 ++++++++++++++++++------------ js/src/utils.ts | 4 +--- 7 files changed, 79 insertions(+), 20 deletions(-) create mode 100644 .changeset/dull-experts-itch.md create mode 100644 js/README.md create mode 100644 js/examples/simple.ts diff --git a/.changeset/dull-experts-itch.md b/.changeset/dull-experts-itch.md new file mode 100644 index 00000000..add94841 --- /dev/null +++ b/.changeset/dull-experts-itch.md @@ -0,0 +1,5 @@ +--- +"js-tiktoken": patch +--- + +Expose model to encoding mapping, fix types diff --git a/js/README.md b/js/README.md new file mode 100644 index 00000000..47541102 --- /dev/null +++ b/js/README.md @@ -0,0 +1,22 @@ +# ⏳ js-tiktoken + +tiktoken is a [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with +OpenAI's models. This is a pure JS port of the original tiktoken library. + +Install the library from NPM: + +``` +npm install js-tiktoken +``` + +## Usage + +Basic usage follows, which includes all the OpenAI encoders and ranks: + +```typescript +import assert from "node:assert"; +import { getEncoding, encodingForModel } from "js-tiktoken"; + +const enc = getEncoding("gpt2"); +assert(enc.decode(enc.encode("hello world")) === "hello world"); +``` diff --git a/js/examples/dynamic.ts b/js/examples/dynamic.ts index 7ce28b20..5a1132e5 100644 --- a/js/examples/dynamic.ts +++ b/js/examples/dynamic.ts @@ -1,19 +1,35 @@ -import { Tiktoken, TiktokenBPE, TiktokenEncoding } from "../dist"; +import { + Tiktoken, + TiktokenBPE, + TiktokenEncoding, + TiktokenModel, + getEncodingNameForModel, +} from "../dist"; const cache: Record = {}; -async function getEncoding(encoding: TiktokenEncoding) { +async function getEncoding( + encoding: TiktokenEncoding, + extendedSpecialTokens?: Record +) { if (!(encoding in cache)) { const res = await fetch(`https://tiktoken.pages.dev/js/${encoding}.json`); if (!res.ok) throw new Error("Failed to fetch encoding"); cache[encoding] = await res.json(); } - return new Tiktoken(cache[encoding]); + return new Tiktoken(cache[encoding], extendedSpecialTokens); +} + +async function encodingForModel( + model: TiktokenModel, + extendedSpecialTokens?: Record +) { + return getEncoding(getEncodingNameForModel(model), extendedSpecialTokens); } async function main() { - const encodings = await getEncoding("cl100k_base"); + const encodings = await encodingForModel("gpt-4"); const text = "function foo() { return 1; }"; const tokens = encodings.encode(text); console.log(tokens); diff --git a/js/examples/simple.ts b/js/examples/simple.ts new file mode 100644 index 00000000..6f8eb48b --- /dev/null +++ b/js/examples/simple.ts @@ -0,0 +1,5 @@ +import assert from "node:assert"; +import { getEncoding } from "../dist"; + +const enc = getEncoding("gpt2"); +assert(enc.decode(enc.encode("hello world")) === "hello world"); diff --git a/js/package.json b/js/package.json index 6719f2ea..24961e5e 100644 --- a/js/package.json +++ b/js/package.json @@ -7,12 +7,14 @@ "build": "rm -rf dist && tsup", "test": "vitest run" }, + "type": "module", "files": [ "dist/**/*" ], "exports": { ".": { "types": "./dist/index.d.ts", + "node": "./dist/index.cjs", "default": "./dist/index.js" } }, @@ -33,7 +35,12 @@ "entry": [ "src/index.ts" ], + "format": [ + "cjs", + "esm" + ], "dts": true, - "clean": true + "clean": true, + "treeshake": true } } diff --git a/js/src/index.ts b/js/src/index.ts index a6c1205d..c5f146b3 100644 --- a/js/src/index.ts +++ b/js/src/index.ts @@ -11,7 +11,7 @@ import { never } from "./utils"; export function getEncoding( encoding: TiktokenEncoding, extendSpecialTokens?: Record -) { +): Tiktoken { switch (encoding) { case "gpt2": return new Tiktoken(gpt2, extendSpecialTokens); @@ -24,17 +24,15 @@ export function getEncoding( case "cl100k_base": return new Tiktoken(cl100k_base, extendSpecialTokens); default: - never("Unknown encoding", encoding); + never(encoding); + throw new Error("Unknown encoding"); } } -export function encodingForModel( - model: TiktokenModel, - extendSpecialTokens?: Record -) { +export function getEncodingNameForModel(model: TiktokenModel) { switch (model) { case "gpt2": { - return getEncoding("gpt2", extendSpecialTokens); + return "gpt2"; } case "code-cushman-001": case "code-cushman-002": @@ -44,11 +42,11 @@ export function encodingForModel( case "davinci-codex": case "text-davinci-002": case "text-davinci-003": { - return getEncoding("p50k_base", extendSpecialTokens); + return "p50k_base"; } case "code-davinci-edit-001": case "text-davinci-edit-001": { - return getEncoding("p50k_edit", extendSpecialTokens); + return "p50k_edit"; } case "ada": case "babbage": @@ -68,7 +66,7 @@ export function encodingForModel( case "text-similarity-babbage-001": case "text-similarity-curie-001": case "text-similarity-davinci-001": { - return getEncoding("r50k_base", extendSpecialTokens); + return "r50k_base"; } case "gpt-3.5-turbo-0301": case "gpt-3.5-turbo": @@ -77,12 +75,20 @@ export function encodingForModel( case "gpt-4-32k": case "gpt-4": case "text-embedding-ada-002": { - return getEncoding("cl100k_base", extendSpecialTokens); + return "cl100k_base"; } default: - never("Unknown model", model); + never(model); + throw new Error("Unknown model"); } } +export function encodingForModel( + model: TiktokenModel, + extendSpecialTokens?: Record +) { + return getEncoding(getEncodingNameForModel(model), extendSpecialTokens); +} + export { Tiktoken, TiktokenBPE } from "./core"; export { TiktokenModel, TiktokenEncoding } from "./ranks/ranks"; diff --git a/js/src/utils.ts b/js/src/utils.ts index b7615d12..ab0dd97d 100644 --- a/js/src/utils.ts +++ b/js/src/utils.ts @@ -1,3 +1 @@ -export function never(message: string, _: never) { - throw new Error(message); -} +export function never(_: never) {} From 1da0b9d45f8ddad53da999ada204889a9e7263ec Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 15 May 2023 02:18:29 +0200 Subject: [PATCH 125/207] Bump js-tiktoken to 1.0.1 --- .changeset/dull-experts-itch.md | 5 ----- .changeset/weak-fireants-hunt.md | 5 ----- js/CHANGELOG.md | 8 ++++++++ js/package.json | 2 +- 4 files changed, 9 insertions(+), 11 deletions(-) delete mode 100644 .changeset/dull-experts-itch.md delete mode 100644 .changeset/weak-fireants-hunt.md create mode 100644 js/CHANGELOG.md diff --git a/.changeset/dull-experts-itch.md b/.changeset/dull-experts-itch.md deleted file mode 100644 index add94841..00000000 --- a/.changeset/dull-experts-itch.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"js-tiktoken": patch ---- - -Expose model to encoding mapping, fix types diff --git a/.changeset/weak-fireants-hunt.md b/.changeset/weak-fireants-hunt.md deleted file mode 100644 index 1a0b6e86..00000000 --- a/.changeset/weak-fireants-hunt.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"js-tiktoken": patch ---- - -Split core into smaller packages, expose core tokenizer for CDN pulling diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md new file mode 100644 index 00000000..3eeafdeb --- /dev/null +++ b/js/CHANGELOG.md @@ -0,0 +1,8 @@ +# js-tiktoken + +## 1.0.1 + +### Patch Changes + +- 2d18fcc: Expose model to encoding mapping, fix types +- 3240a8e: Split core into smaller packages, expose core tokenizer for CDN pulling diff --git a/js/package.json b/js/package.json index 24961e5e..47c5239a 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.0", + "version": "1.0.1", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { From 621ae351c2c579b421a5d7cbbda930d96d96a5b6 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 15 May 2023 02:56:41 +0200 Subject: [PATCH 126/207] Add missing main and types for compat --- js/CHANGELOG.md | 6 ++++++ js/package.json | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 3eeafdeb..254dbbad 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.2 + +### Patch Changes + +- 2ba9a78: Add missing main and types for compat + ## 1.0.1 ### Patch Changes diff --git a/js/package.json b/js/package.json index 47c5239a..75c0875d 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.1", + "version": "1.0.2", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { @@ -8,6 +8,8 @@ "test": "vitest run" }, "type": "module", + "main": "./dist/index.cjs", + "types": "./dist/index.d.ts", "files": [ "dist/**/*" ], From de929d49f88b655cd36055e3841d36b7aa07cc69 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 15 May 2023 19:52:44 +0200 Subject: [PATCH 127/207] Add disallowed tokens --- .changeset/chatty-numbers-chew.md | 5 ++++ js/src/core.ts | 43 +++++++++++++++++++++++++------ js/test/compatibility.test.ts | 21 +++++++++++++-- 3 files changed, 59 insertions(+), 10 deletions(-) create mode 100644 .changeset/chatty-numbers-chew.md diff --git a/.changeset/chatty-numbers-chew.md b/.changeset/chatty-numbers-chew.md new file mode 100644 index 00000000..b8f868e7 --- /dev/null +++ b/.changeset/chatty-numbers-chew.md @@ -0,0 +1,5 @@ +--- +"js-tiktoken": patch +--- + +Add disallowed tokens diff --git a/js/src/core.ts b/js/src/core.ts index 06e20758..26b111f1 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -104,21 +104,48 @@ export class Tiktoken { }, {}); } - encode(text: string, allowedSpecial: Set | "all" = new Set()) { + private static specialTokenRegex = (tokens: string[]) => { + return new RegExp(tokens.map((i) => escapeRegex(i)).join("|"), "g"); + }; + + encode( + text: string, + allowedSpecial: Array | "all" = [], + disallowedSpecial: Array | "all" = "all" + ) { const regexes = new RegExp(this.patStr, "ug"); - const specialRegex = new RegExp( + const specialRegex = Tiktoken.specialTokenRegex( Object.keys(this.specialTokens) - .map((i) => escapeRegex(i)) - .join("|"), - "g" ); const ret: number[] = []; - const allowedSpecialSet = + const allowedSpecialSet = new Set( allowedSpecial === "all" - ? new Set(Object.keys(this.specialTokens)) - : allowedSpecial; + ? Object.keys(this.specialTokens) + : allowedSpecial + ); + + const disallowedSpecialSet = new Set( + disallowedSpecial === "all" + ? Object.keys(this.specialTokens).filter( + (x) => !allowedSpecialSet.has(x) + ) + : disallowedSpecial + ); + + if (disallowedSpecialSet.size > 0) { + const disallowedSpecialRegex = Tiktoken.specialTokenRegex([ + ...disallowedSpecialSet, + ]); + + const specialMatch = text.match(disallowedSpecialRegex); + if (specialMatch != null) { + throw new Error( + `The text contains a special token that is not allowed: ${specialMatch[0]}` + ); + } + } let start = 0; while (true) { diff --git a/js/test/compatibility.test.ts b/js/test/compatibility.test.ts index 05638d59..4adce37b 100644 --- a/js/test/compatibility.test.ts +++ b/js/test/compatibility.test.ts @@ -1,9 +1,9 @@ import { test, expect, describe, afterAll } from "vitest"; import { get_encoding } from "../../wasm/dist"; -import { Tiktoken } from "../src/index"; +import { getEncoding } from "../src/index"; describe("LiteTokenizer matches the behavior of tiktoken", () => { - const lite = new Tiktoken(); + const lite = getEncoding("cl100k_base"); const full = get_encoding("cl100k_base"); afterAll(() => full.free()); @@ -15,9 +15,26 @@ describe("LiteTokenizer matches the behavior of tiktoken", () => { test("Magic tokens", () => { const text = "<|fim_prefix|>test<|fim_suffix|>"; + + expect(() => lite.encode(text)).toThrowError( + "The text contains a special token that is not allowed: <|fim_prefix|>" + ); + + expect(() => lite.encode(text, [], "all")).toThrowError( + "The text contains a special token that is not allowed: <|fim_prefix|>" + ); + expect([...lite.encode(text, "all")]).toEqual([ ...full.encode(text, "all"), ]); + + expect(() => [...lite.encode(text, ["<|fim_prefix|>"])]).toThrowError( + "The text contains a special token that is not allowed: <|fim_suffix|>" + ); + + expect([ + ...lite.encode(text, ["<|fim_prefix|>", "<|fim_suffix|>"]), + ]).toEqual([...full.encode(text, ["<|fim_prefix|>", "<|fim_suffix|>"])]); }); test("Emojis and non-latin characters", () => { From dba22a9f2836875b13b57c3942b1d5acc14fc7ae Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 15 May 2023 17:53:33 +0000 Subject: [PATCH 128/207] Version Packages --- .changeset/chatty-numbers-chew.md | 5 ----- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) delete mode 100644 .changeset/chatty-numbers-chew.md diff --git a/.changeset/chatty-numbers-chew.md b/.changeset/chatty-numbers-chew.md deleted file mode 100644 index b8f868e7..00000000 --- a/.changeset/chatty-numbers-chew.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"js-tiktoken": patch ---- - -Add disallowed tokens diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 254dbbad..285cadf1 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.3 + +### Patch Changes + +- de929d4: Add disallowed tokens + ## 1.0.2 ### Patch Changes diff --git a/js/package.json b/js/package.json index 75c0875d..7a9b4f12 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.2", + "version": "1.0.3", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { From 265e01df64cce529873caa9c78715fc50227aa41 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 15 May 2023 20:08:03 +0200 Subject: [PATCH 129/207] Expose ranks --- .changeset/long-dragons-impress.md | 5 +++++ js/package.json | 32 +++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 .changeset/long-dragons-impress.md diff --git a/.changeset/long-dragons-impress.md b/.changeset/long-dragons-impress.md new file mode 100644 index 00000000..9e29294c --- /dev/null +++ b/.changeset/long-dragons-impress.md @@ -0,0 +1,5 @@ +--- +"js-tiktoken": patch +--- + +Expose ranks diff --git a/js/package.json b/js/package.json index 7a9b4f12..c1ee0fcb 100644 --- a/js/package.json +++ b/js/package.json @@ -18,6 +18,31 @@ "types": "./dist/index.d.ts", "node": "./dist/index.cjs", "default": "./dist/index.js" + }, + "./ranks/cl100k_base": { + "types": "./dist/ranks/cl100k_base.d.ts", + "node": "./dist/ranks/cl100k_base.cjs", + "default": "./dist/ranks/cl100k_base.js" + }, + "./ranks/gpt2": { + "types": "./dist/ranks/gpt2.d.ts", + "node": "./dist/ranks/gpt2.cjs", + "default": "./dist/ranks/gpt2.js" + }, + "./ranks/p50k_base": { + "types": "./dist/ranks/p50k_base.d.ts", + "node": "./dist/ranks/p50k_base.cjs", + "default": "./dist/ranks/p50k_base.js" + }, + "./ranks/p50k_edit": { + "types": "./dist/ranks/p50k_edit.d.ts", + "node": "./dist/ranks/p50k_edit.cjs", + "default": "./dist/ranks/p50k_edit.js" + }, + "./ranks/r50k_base": { + "types": "./dist/ranks/r50k_base.d.ts", + "node": "./dist/ranks/r50k_base.cjs", + "default": "./dist/ranks/r50k_base.js" } }, "repository": { @@ -35,7 +60,12 @@ }, "tsup": { "entry": [ - "src/index.ts" + "src/index.ts", + "src/ranks/cl100k_base.ts", + "src/ranks/gpt2.ts", + "src/ranks/p50k_base.ts", + "src/ranks/p50k_edit.ts", + "src/ranks/r50k_base.ts" ], "format": [ "cjs", From 7fbd89944c150a6e1cdcbf9c33a28979e3eda9ba Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 15 May 2023 20:09:46 +0200 Subject: [PATCH 130/207] Bump to 1.0.4 --- .changeset/long-dragons-impress.md | 5 ----- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) delete mode 100644 .changeset/long-dragons-impress.md diff --git a/.changeset/long-dragons-impress.md b/.changeset/long-dragons-impress.md deleted file mode 100644 index 9e29294c..00000000 --- a/.changeset/long-dragons-impress.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"js-tiktoken": patch ---- - -Expose ranks diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 285cadf1..5bcf3bf9 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.4 + +### Patch Changes + +- 265e01d: Expose ranks + ## 1.0.3 ### Patch Changes diff --git a/js/package.json b/js/package.json index c1ee0fcb..083f27a5 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.3", + "version": "1.0.4", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { From 4d3f97f5240d56eadb36d098339067b42dd6147a Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 15 May 2023 22:12:59 +0200 Subject: [PATCH 131/207] Bump js-tiktoken to 1.0.5 Use require instead for exports, add lite entrypoint --- js/CHANGELOG.md | 6 +++++ js/package.json | 20 +++++++++++------ js/src/core.ts | 56 +++++++++++++++++++++++++++++++++++++++++++++++ js/src/index.ts | 58 ++----------------------------------------------- js/src/lite.ts | 2 ++ 5 files changed, 79 insertions(+), 63 deletions(-) create mode 100644 js/src/lite.ts diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 5bcf3bf9..a8b10e48 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.5 + +### Patch Changes + +- Use require instead for exports, add lite entrypoint + ## 1.0.4 ### Patch Changes diff --git a/js/package.json b/js/package.json index 083f27a5..3bd8481d 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.4", + "version": "1.0.5", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { @@ -16,32 +16,37 @@ "exports": { ".": { "types": "./dist/index.d.ts", - "node": "./dist/index.cjs", + "require": "./dist/index.cjs", "default": "./dist/index.js" }, + "./lite": { + "types": "./dist/lite.d.ts", + "require": "./dist/lite.cjs", + "default": "./dist/lite.js" + }, "./ranks/cl100k_base": { "types": "./dist/ranks/cl100k_base.d.ts", - "node": "./dist/ranks/cl100k_base.cjs", + "require": "./dist/ranks/cl100k_base.cjs", "default": "./dist/ranks/cl100k_base.js" }, "./ranks/gpt2": { "types": "./dist/ranks/gpt2.d.ts", - "node": "./dist/ranks/gpt2.cjs", + "require": "./dist/ranks/gpt2.cjs", "default": "./dist/ranks/gpt2.js" }, "./ranks/p50k_base": { "types": "./dist/ranks/p50k_base.d.ts", - "node": "./dist/ranks/p50k_base.cjs", + "require": "./dist/ranks/p50k_base.cjs", "default": "./dist/ranks/p50k_base.js" }, "./ranks/p50k_edit": { "types": "./dist/ranks/p50k_edit.d.ts", - "node": "./dist/ranks/p50k_edit.cjs", + "require": "./dist/ranks/p50k_edit.cjs", "default": "./dist/ranks/p50k_edit.js" }, "./ranks/r50k_base": { "types": "./dist/ranks/r50k_base.d.ts", - "node": "./dist/ranks/r50k_base.cjs", + "require": "./dist/ranks/r50k_base.cjs", "default": "./dist/ranks/r50k_base.js" } }, @@ -61,6 +66,7 @@ "tsup": { "entry": [ "src/index.ts", + "src/lite.ts", "src/ranks/cl100k_base.ts", "src/ranks/gpt2.ts", "src/ranks/p50k_base.ts", diff --git a/js/src/core.ts b/js/src/core.ts index 26b111f1..5a9d65f4 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -1,4 +1,6 @@ import base64 from "base64-js"; +import type { TiktokenModel } from "./ranks/ranks"; +import { never } from "./utils"; function bytePairMerge( piece: Uint8Array, @@ -205,3 +207,57 @@ export class Tiktoken { return this.textDecoder.decode(mergedArray); } } + +export function getEncodingNameForModel(model: TiktokenModel) { + switch (model) { + case "gpt2": { + return "gpt2"; + } + case "code-cushman-001": + case "code-cushman-002": + case "code-davinci-001": + case "code-davinci-002": + case "cushman-codex": + case "davinci-codex": + case "text-davinci-002": + case "text-davinci-003": { + return "p50k_base"; + } + case "code-davinci-edit-001": + case "text-davinci-edit-001": { + return "p50k_edit"; + } + case "ada": + case "babbage": + case "code-search-ada-code-001": + case "code-search-babbage-code-001": + case "curie": + case "davinci": + case "text-ada-001": + case "text-babbage-001": + case "text-curie-001": + case "text-davinci-001": + case "text-search-ada-doc-001": + case "text-search-babbage-doc-001": + case "text-search-curie-doc-001": + case "text-search-davinci-doc-001": + case "text-similarity-ada-001": + case "text-similarity-babbage-001": + case "text-similarity-curie-001": + case "text-similarity-davinci-001": { + return "r50k_base"; + } + case "gpt-3.5-turbo-0301": + case "gpt-3.5-turbo": + case "gpt-4-0314": + case "gpt-4-32k-0314": + case "gpt-4-32k": + case "gpt-4": + case "text-embedding-ada-002": { + return "cl100k_base"; + } + default: + never(model); + throw new Error("Unknown model"); + } +} diff --git a/js/src/index.ts b/js/src/index.ts index c5f146b3..e27ceada 100644 --- a/js/src/index.ts +++ b/js/src/index.ts @@ -5,7 +5,7 @@ import p50k_edit from "./ranks/p50k_edit"; import r50k_base from "./ranks/r50k_base"; import cl100k_base from "./ranks/cl100k_base"; -import { Tiktoken } from "./core"; +import { Tiktoken, getEncodingNameForModel } from "./core"; import { never } from "./utils"; export function getEncoding( @@ -29,60 +29,6 @@ export function getEncoding( } } -export function getEncodingNameForModel(model: TiktokenModel) { - switch (model) { - case "gpt2": { - return "gpt2"; - } - case "code-cushman-001": - case "code-cushman-002": - case "code-davinci-001": - case "code-davinci-002": - case "cushman-codex": - case "davinci-codex": - case "text-davinci-002": - case "text-davinci-003": { - return "p50k_base"; - } - case "code-davinci-edit-001": - case "text-davinci-edit-001": { - return "p50k_edit"; - } - case "ada": - case "babbage": - case "code-search-ada-code-001": - case "code-search-babbage-code-001": - case "curie": - case "davinci": - case "text-ada-001": - case "text-babbage-001": - case "text-curie-001": - case "text-davinci-001": - case "text-search-ada-doc-001": - case "text-search-babbage-doc-001": - case "text-search-curie-doc-001": - case "text-search-davinci-doc-001": - case "text-similarity-ada-001": - case "text-similarity-babbage-001": - case "text-similarity-curie-001": - case "text-similarity-davinci-001": { - return "r50k_base"; - } - case "gpt-3.5-turbo-0301": - case "gpt-3.5-turbo": - case "gpt-4-0314": - case "gpt-4-32k-0314": - case "gpt-4-32k": - case "gpt-4": - case "text-embedding-ada-002": { - return "cl100k_base"; - } - default: - never(model); - throw new Error("Unknown model"); - } -} - export function encodingForModel( model: TiktokenModel, extendSpecialTokens?: Record @@ -90,5 +36,5 @@ export function encodingForModel( return getEncoding(getEncodingNameForModel(model), extendSpecialTokens); } -export { Tiktoken, TiktokenBPE } from "./core"; +export { Tiktoken, TiktokenBPE, getEncodingNameForModel } from "./core"; export { TiktokenModel, TiktokenEncoding } from "./ranks/ranks"; diff --git a/js/src/lite.ts b/js/src/lite.ts new file mode 100644 index 00000000..329e78de --- /dev/null +++ b/js/src/lite.ts @@ -0,0 +1,2 @@ +export { Tiktoken, TiktokenBPE, getEncodingNameForModel } from "./core"; +export { TiktokenModel, TiktokenEncoding } from "./ranks/ranks"; From 1fcb052df8861a43546e9f56318f4341f279da4f Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 17 May 2023 14:29:45 +0200 Subject: [PATCH 132/207] Bump to 1.0.6 --- js/CHANGELOG.md | 6 ++++++ js/index.d.ts | 1 + js/index.js | 1 + js/lite.d.ts | 1 + js/lite.js | 1 + js/package.json | 8 ++++++-- 6 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 js/index.d.ts create mode 100644 js/index.js create mode 100644 js/lite.d.ts create mode 100644 js/lite.js diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index a8b10e48..6ef52995 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.6 + +### Patch Changes + +- Add entrypoints for Jest in CRA + ## 1.0.5 ### Patch Changes diff --git a/js/index.d.ts b/js/index.d.ts new file mode 100644 index 00000000..aabf0510 --- /dev/null +++ b/js/index.d.ts @@ -0,0 +1 @@ +export * from "./dist/index.js"; diff --git a/js/index.js b/js/index.js new file mode 100644 index 00000000..e6bddc33 --- /dev/null +++ b/js/index.js @@ -0,0 +1 @@ +module.exports = require("./dist/index.cjs"); diff --git a/js/lite.d.ts b/js/lite.d.ts new file mode 100644 index 00000000..7852e5ab --- /dev/null +++ b/js/lite.d.ts @@ -0,0 +1 @@ +export * from "./dist/lite.js"; diff --git a/js/lite.js b/js/lite.js new file mode 100644 index 00000000..1857a238 --- /dev/null +++ b/js/lite.js @@ -0,0 +1 @@ +module.exports = require('./dist/lite.cjs'); \ No newline at end of file diff --git a/js/package.json b/js/package.json index 3bd8481d..9f025c82 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.5", + "version": "1.0.6", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { @@ -11,7 +11,11 @@ "main": "./dist/index.cjs", "types": "./dist/index.d.ts", "files": [ - "dist/**/*" + "dist/**/*", + "index.js", + "index.d.ts", + "lite.js", + "lite.d.ts" ], "exports": { ".": { From 3f2c59c6bdbf013ba9afe6923ec691b58ea8f831 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 2 Jun 2023 18:02:52 +0200 Subject: [PATCH 133/207] Add disclaimer about Svelte + Cloudflare Workers --- README.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 346adaec..6fc37728 100644 --- a/README.md +++ b/README.md @@ -107,16 +107,19 @@ main(); As this is a WASM library, there might be some issues with specific runtimes. If you encounter any issues, please open an issue. -| Runtime | Status | Notes | -| ---------------------------- | ------ | ------------------------------------------ | -| Node.js | ✅ | | -| Bun | ✅ | | -| Vite | ✅ | See [here](#vite) for notes | -| Next.js | ✅ | See [here](#nextjs) for notes | -| Create React App (via Craco) | ✅ | See [here](#create-react-app) for notes | -| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | -| Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | -| Deno | ❌ | Currently unsupported | +| Runtime | Status | Notes | +| ---------------------------- | ------ | ------------------------------------------------------------------------------------------ | +| Node.js | ✅ | | +| Bun | ✅ | | +| Vite | ✅ | See [here](#vite) for notes | +| Next.js | ✅ | See [here](#nextjs) for notes | +| Create React App (via Craco) | ✅ | See [here](#create-react-app) for notes | +| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | +| Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | +| Deno | ❌ | Currently unsupported (see [dqbd/tiktoken#22](https://github.com/dqbd/tiktoken/issues/22)) | +| Svelte + Cloudflare Workers | ❌ | Currently unsupported (see [dqbd/tiktoken#37](https://github.com/dqbd/tiktoken/issues/37)) | + +For unsupported runtimes, consider using [`js-tiktoken`](https://www.npmjs.com/package/js-tiktoken), which is a pure JS implementation of the tokeniser. ### [Vite](#vite) From c4a61809d55123de49a72ca651764888f04c0400 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 2 Jun 2023 18:07:50 +0200 Subject: [PATCH 134/207] Clarify the difference between `js-tiktoken` and `tiktoken` --- README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6fc37728..50407620 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,21 @@ # ⏳ tiktoken tiktoken is a [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with -OpenAI's models, forked from the original tiktoken library to provide NPM bindings for Node and other JS runtimes. +OpenAI's models, forked from the original tiktoken library to provide JS/WASM bindings for NodeJS and other JS runtimes. -The open source version of `tiktoken` can be installed from NPM: +This repository contains the following packages: + +- `tiktoken` (formally hosted at `@dqbd/tiktoken`): WASM bindings for the original Python library, providing full 1-to-1 feature parity. +- `js-tiktoken`: Pure JavaScript port of the original library with the core functionality, suitable for environments where WASM is not well supported or not desired (such as edge runtimes). + +The WASM version of `tiktoken` can be installed from NPM: ``` npm install tiktoken ``` +Documentation for `js-tiktoken` can be found in [here](https://github.com/dqbd/tiktoken/blob/main/js/README.md). + ## Usage Basic usage follows, which includes all the OpenAI encoders and ranks: From c7faff0e3309eeac12144822377c1eb64b42e639 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 2 Jun 2023 18:09:42 +0200 Subject: [PATCH 135/207] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 50407620..0b7df234 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,9 @@ OpenAI's models, forked from the original tiktoken library to provide JS/WASM bi This repository contains the following packages: - `tiktoken` (formally hosted at `@dqbd/tiktoken`): WASM bindings for the original Python library, providing full 1-to-1 feature parity. -- `js-tiktoken`: Pure JavaScript port of the original library with the core functionality, suitable for environments where WASM is not well supported or not desired (such as edge runtimes). +- `js-tiktoken`: Pure JavaScript port of the original library with the core functionality, suitable for environments where WASM is not well supported or not desired (such as edge runtimes). + +Documentation for `js-tiktoken` can be found in [here](https://github.com/dqbd/tiktoken/blob/main/js/README.md). Documentation for the `tiktoken` can be found here below. The WASM version of `tiktoken` can be installed from NPM: @@ -14,8 +16,6 @@ The WASM version of `tiktoken` can be installed from NPM: npm install tiktoken ``` -Documentation for `js-tiktoken` can be found in [here](https://github.com/dqbd/tiktoken/blob/main/js/README.md). - ## Usage Basic usage follows, which includes all the OpenAI encoders and ranks: From ebfc8140845cfe73b80b86c59a2a532abf718173 Mon Sep 17 00:00:00 2001 From: Changje Jeong Date: Wed, 14 Jun 2023 23:23:41 +0900 Subject: [PATCH 136/207] Add new models - gpt-3.5-turbo-0613 - gpt-3.5-turbo-16k - gpt-3.5-turbo-16k-0613 - gpt-4-0613 - gpt-4-32k-0613 --- js/src/core.ts | 7 ++++++- tiktoken/model_to_encoding.json | 7 ++++++- wasm/src/lib.rs | 4 ++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/js/src/core.ts b/js/src/core.ts index 5a9d65f4..6616e53c 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -247,11 +247,16 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "text-similarity-davinci-001": { return "r50k_base"; } + case "gpt-3.5-turbo-16k-0613": + case "gpt-3.5-turbo-16k": + case "gpt-3.5-turbo-0613": case "gpt-3.5-turbo-0301": case "gpt-3.5-turbo": - case "gpt-4-0314": + case "gpt-4-32k-0613": case "gpt-4-32k-0314": case "gpt-4-32k": + case "gpt-4-0613": + case "gpt-4-0314": case "gpt-4": case "text-embedding-ada-002": { return "cl100k_base"; diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 6a90bbe8..a7a244b5 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -31,8 +31,13 @@ "gpt2": "gpt2", "gpt-3.5-turbo": "cl100k_base", "gpt-3.5-turbo-0301": "cl100k_base", + "gpt-3.5-turbo-0613": "cl100k_base", + "gpt-3.5-turbo-16k": "cl100k_base", + "gpt-3.5-turbo-16k-0613": "cl100k_base", "gpt-4": "cl100k_base", "gpt-4-0314": "cl100k_base", + "gpt-4-0613": "cl100k_base", "gpt-4-32k": "cl100k_base", - "gpt-4-32k-0314": "cl100k_base" + "gpt-4-32k-0314": "cl100k_base", + "gpt-4-32k-0613": "cl100k_base" } diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 39efbc07..781878f6 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -434,10 +434,14 @@ pub fn encoding_for_model( "gpt2" => Ok("gpt2"), "gpt-3.5-turbo" => Ok("cl100k_base"), "gpt-3.5-turbo-0301" => Ok("cl100k_base"), + "gpt-3.5-turbo-0613" => Ok("cl100k_base"), + "gpt-3.5-turbo-16k" => Ok("cl100k_base"), + "gpt-3.5-turbo-16k-0613" => Ok("cl100k_base"), "gpt-4" => Ok("cl100k_base"), "gpt-4-0314" => Ok("cl100k_base"), "gpt-4-32k" => Ok("cl100k_base"), "gpt-4-32k-0314" => Ok("cl100k_base"), + "gpt-4-32k-0613" => Ok("cl100k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), From baca5b1e38e0144348c2e2314546db967f1e77ab Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 15 Jun 2023 15:10:27 +0200 Subject: [PATCH 137/207] Add changeset --- .changeset/strange-pumas-compete.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changeset/strange-pumas-compete.md diff --git a/.changeset/strange-pumas-compete.md b/.changeset/strange-pumas-compete.md new file mode 100644 index 00000000..758f11a3 --- /dev/null +++ b/.changeset/strange-pumas-compete.md @@ -0,0 +1,6 @@ +--- +"js-tiktoken": patch +"tiktoken": patch +--- + +Add new GPT-3.5 and GPT-4 models From a25079d19256dbbb49c151fc3c0e3671502dc9c5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 15 Jun 2023 13:11:40 +0000 Subject: [PATCH 138/207] Version Packages --- .changeset/strange-pumas-compete.md | 6 ------ js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 7 +++++++ wasm/package.json | 2 +- 5 files changed, 15 insertions(+), 8 deletions(-) delete mode 100644 .changeset/strange-pumas-compete.md create mode 100644 wasm/CHANGELOG.md diff --git a/.changeset/strange-pumas-compete.md b/.changeset/strange-pumas-compete.md deleted file mode 100644 index 758f11a3..00000000 --- a/.changeset/strange-pumas-compete.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -"js-tiktoken": patch -"tiktoken": patch ---- - -Add new GPT-3.5 and GPT-4 models diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 6ef52995..0b9227d9 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.7 + +### Patch Changes + +- baca5b1: Add new GPT-3.5 and GPT-4 models + ## 1.0.6 ### Patch Changes diff --git a/js/package.json b/js/package.json index 9f025c82..a0a26cfa 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.6", + "version": "1.0.7", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md new file mode 100644 index 00000000..0059afec --- /dev/null +++ b/wasm/CHANGELOG.md @@ -0,0 +1,7 @@ +# tiktoken + +## 1.0.8 + +### Patch Changes + +- baca5b1: Add new GPT-3.5 and GPT-4 models diff --git a/wasm/package.json b/wasm/package.json index 2fb6fde9..fac15a98 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.7", + "version": "1.0.8", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From 5a7be3cb5b9820a86ca69a58acfbae1d67dca32f Mon Sep 17 00:00:00 2001 From: Daewoon Kim Date: Fri, 16 Jun 2023 10:52:22 +0900 Subject: [PATCH 139/207] feat: Add missing models --- wasm/src/lib.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 781878f6..f8773dd2 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -384,8 +384,12 @@ export type TiktokenModel = | "gpt-4-0314" | "gpt-4-32k" | "gpt-4-32k-0314" + | "gpt-4-32k-0613" | "gpt-3.5-turbo" - | "gpt-3.5-turbo-0301"; + | "gpt-3.5-turbo-0301" + | "gpt-3.5-turbo-0613" + | "gpt-3.5-turbo-16k" + | "gpt-3.5-turbo-16k-0613"; /** * @param {TiktokenModel} encoding @@ -439,6 +443,7 @@ pub fn encoding_for_model( "gpt-3.5-turbo-16k-0613" => Ok("cl100k_base"), "gpt-4" => Ok("cl100k_base"), "gpt-4-0314" => Ok("cl100k_base"), + "gpt-4-0613" => Ok("cl100k_base"), "gpt-4-32k" => Ok("cl100k_base"), "gpt-4-32k-0314" => Ok("cl100k_base"), "gpt-4-32k-0613" => Ok("cl100k_base"), From 1bc9ada93ea77818293f23c5faa097b77e2944a2 Mon Sep 17 00:00:00 2001 From: Daewoon Kim Date: Fri, 16 Jun 2023 11:01:13 +0900 Subject: [PATCH 140/207] Update lib.rs --- wasm/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index f8773dd2..ef8138f3 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -382,6 +382,7 @@ export type TiktokenModel = | "gpt2" | "gpt-4" | "gpt-4-0314" + | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" From 3310bfeb94b38ff9738df97721893e76fba0603f Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 22 Jun 2023 23:55:43 +0100 Subject: [PATCH 141/207] Bump tiktoken to 1.0.9 --- wasm/CHANGELOG.md | 6 ++++++ wasm/package.json | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index 0059afec..cdfc56b2 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.9 + +### Patch Changes + +- Fix missing types for 0613 models + ## 1.0.8 ### Patch Changes diff --git a/wasm/package.json b/wasm/package.json index fac15a98..59ca3680 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.8", + "version": "1.0.9", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From 15b8a8bf4759fe0472c7f816cbc018218bfaf306 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 23 Jun 2023 00:18:48 +0100 Subject: [PATCH 142/207] Remove python CI steps for now, write .npmrc in actions --- .github/workflows/build_wheels.yml | 83 ------------------------------ .github/workflows/publish.yml | 7 +++ 2 files changed, 7 insertions(+), 83 deletions(-) delete mode 100644 .github/workflows/build_wheels.yml diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml deleted file mode 100644 index 4a399a60..00000000 --- a/.github/workflows/build_wheels.yml +++ /dev/null @@ -1,83 +0,0 @@ -name: Build wheels - -on: [push, pull_request, workflow_dispatch] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - build_wheels: - name: py${{ matrix.python-version }} on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - # cibuildwheel builds linux wheels inside a manylinux container - # it also takes care of procuring the correct python version for us - os: [ubuntu-latest, windows-latest, macos-latest] - python-version: [38, 39, 310, 311] - - steps: - - uses: actions/checkout@v3 - - - uses: pypa/cibuildwheel@v2.12.0 - env: - CIBW_BUILD: "cp${{ matrix.python-version}}-*" - - - uses: actions/upload-artifact@v3 - with: - name: dist - path: ./wheelhouse/*.whl - - build_wheels_aarch64: - name: py${{ matrix.python-version }} on ${{ matrix.os }} (aarch64) - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python-version: [38, 39, 310, 311] - - steps: - - uses: actions/checkout@v3 - - - name: Setup up QEMU - uses: docker/setup-qemu-action@v2 - with: - platforms: arm64 - - - name: Build wheels - uses: pypa/cibuildwheel@v2.11.3 - env: - CIBW_BUILD: "cp${{ matrix.python-version}}-*" - CIBW_ARCHS: aarch64 - CIBW_BUILD_VERBOSITY: 3 - # https://github.com/rust-lang/cargo/issues/10583 - CIBW_ENVIRONMENT_LINUX: PATH="$PATH:$HOME/.cargo/bin" CARGO_NET_GIT_FETCH_WITH_CLI=true - - uses: actions/upload-artifact@v3 - with: - name: dist - path: ./wheelhouse/*.whl - - build_sdist: - name: sdist - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - name: Install Python - with: - python-version: "3.9" - - name: Run check-manifest - run: | - pip install check-manifest - check-manifest -v - - name: Build sdist - run: | - pip install --upgrade build - python -m build --sdist - - uses: actions/upload-artifact@v3 - with: - name: dist - path: ./dist/*.tar.gz diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index eae9574e..34ed8404 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -19,6 +19,13 @@ jobs: - name: Install run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - run: yarn install --frozen-lockfile + - name: Create .npmrc + run: | + cat << EOF > "$HOME/.npmrc" + //registry.npmjs.org/:_authToken=$NPM_TOKEN + EOF + env: + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - name: Create Release Pull Request or Publish id: changesets uses: changesets/action@v1 From 71db4346ae33286adcedad491e8c7573352ad562 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 23 Jun 2023 00:51:51 +0100 Subject: [PATCH 143/207] Use NODE_AUTH_TOKEN --- .github/workflows/publish.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 34ed8404..89de1647 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -19,13 +19,6 @@ jobs: - name: Install run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - run: yarn install --frozen-lockfile - - name: Create .npmrc - run: | - cat << EOF > "$HOME/.npmrc" - //registry.npmjs.org/:_authToken=$NPM_TOKEN - EOF - env: - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - name: Create Release Pull Request or Publish id: changesets uses: changesets/action@v1 @@ -33,4 +26,5 @@ jobs: publish: yarn run publish env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} NPM_TOKEN: ${{ secrets.NPM_TOKEN }} From 8e776d36dc3ae5ef5fc051ac44d9f3af6725dd8e Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 25 Jun 2023 00:06:03 +0100 Subject: [PATCH 144/207] Remove prefix from folder Closes When running 1.0.9 there is a Missing tiktoken_bg.wasm error dqbd/tiktoken#56 --- wasm/scripts/postprocess.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/wasm/scripts/postprocess.ts b/wasm/scripts/postprocess.ts index c6421ea7..3773246e 100644 --- a/wasm/scripts/postprocess.ts +++ b/wasm/scripts/postprocess.ts @@ -174,7 +174,6 @@ for (const baseDir of [ path.join( prefix, "node_modules", - "@dqbd", "tiktoken", "${relativeDir}", "./tiktoken_bg.wasm" From a691152a24757fd342c9576ef17d450b8ced3e0f Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 25 Jun 2023 00:11:53 +0100 Subject: [PATCH 145/207] Add changeset --- .changeset/rude-candles-dress.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/rude-candles-dress.md diff --git a/.changeset/rude-candles-dress.md b/.changeset/rude-candles-dress.md new file mode 100644 index 00000000..f52ce697 --- /dev/null +++ b/.changeset/rude-candles-dress.md @@ -0,0 +1,5 @@ +--- +"tiktoken": patch +--- + +Fix invalid prefix for resolving broken resolution From 41bb68a0a2c5e60ecd73f2caf8c1a724a4b4072c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 24 Jun 2023 23:18:00 +0000 Subject: [PATCH 146/207] Version Packages --- .changeset/rude-candles-dress.md | 5 ----- wasm/CHANGELOG.md | 6 ++++++ wasm/package.json | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) delete mode 100644 .changeset/rude-candles-dress.md diff --git a/.changeset/rude-candles-dress.md b/.changeset/rude-candles-dress.md deleted file mode 100644 index f52ce697..00000000 --- a/.changeset/rude-candles-dress.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"tiktoken": patch ---- - -Fix invalid prefix for resolving broken resolution diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index cdfc56b2..fb5e430f 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.10 + +### Patch Changes + +- a691152: Fix invalid prefix for resolving broken resolution + ## 1.0.9 ### Patch Changes diff --git a/wasm/package.json b/wasm/package.json index 59ca3680..63997bb8 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.9", + "version": "1.0.10", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From 82d44e1629058722035d0a26fdfa8d0cb47a4202 Mon Sep 17 00:00:00 2001 From: Niklas Wenzel Date: Sat, 5 Aug 2023 18:11:58 +0200 Subject: [PATCH 147/207] Add Electron setup instructions to README --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index 0b7df234..537d86d5 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,7 @@ As this is a WASM library, there might be some issues with specific runtimes. If | Create React App (via Craco) | ✅ | See [here](#create-react-app) for notes | | Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | | Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | +| Electron | ✅ | See [here](#electron) for notes | | Deno | ❌ | Currently unsupported (see [dqbd/tiktoken#22](https://github.com/dqbd/tiktoken/issues/22)) | | Svelte + Cloudflare Workers | ❌ | Currently unsupported (see [dqbd/tiktoken#37](https://github.com/dqbd/tiktoken/issues/37)) | @@ -290,6 +291,27 @@ export default { }; ``` +### [Electron](#electron) + +To use tiktoken in your Electron main process, you need to make sure the WASM binary gets copied into your application package. + +Assuming a setup with [Electron Forge](https://www.electronforge.io) and [`@electron-forge/plugin-webpack`](https://www.npmjs.com/package/@electron-forge/plugin-webpack), add the following to your `webpack.main.config.js`: + +```javascript +const CopyPlugin = require("copy-webpack-plugin"); + +module.exports = { + // ... + plugins: [ + new CopyPlugin({ + patterns: [ + { from: "./node_modules/tiktoken/tiktoken_bg.wasm" }, + ], + }), + ], +}; +``` + ## Acknowledgements - https://github.com/zurawiki/tiktoken-rs From db9a80452e095fe07f0d14dfb301a840646ea4e9 Mon Sep 17 00:00:00 2001 From: Prince Mendiratta Date: Tue, 19 Sep 2023 10:46:33 +0530 Subject: [PATCH 148/207] feat: add gpt3.5-turbo-instruct model Signed-off-by: Prince Mendiratta --- js/src/core.ts | 2 ++ tiktoken/model_to_encoding.json | 2 ++ wasm/src/lib.rs | 2 ++ 3 files changed, 6 insertions(+) diff --git a/js/src/core.ts b/js/src/core.ts index 6616e53c..4f759349 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -247,6 +247,8 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "text-similarity-davinci-001": { return "r50k_base"; } + case "gpt-3.5-turbo-instruct-0914": + case "gpt-3.5-turbo-instruct": case "gpt-3.5-turbo-16k-0613": case "gpt-3.5-turbo-16k": case "gpt-3.5-turbo-0613": diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index a7a244b5..7d176230 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -34,6 +34,8 @@ "gpt-3.5-turbo-0613": "cl100k_base", "gpt-3.5-turbo-16k": "cl100k_base", "gpt-3.5-turbo-16k-0613": "cl100k_base", + "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-instruct", + "gpt-3.5-turbo-instruct-0914": "gpt-3.5-turbo-instruct-0914", "gpt-4": "cl100k_base", "gpt-4-0314": "cl100k_base", "gpt-4-0613": "cl100k_base", diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index ef8138f3..6743f044 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -442,6 +442,8 @@ pub fn encoding_for_model( "gpt-3.5-turbo-0613" => Ok("cl100k_base"), "gpt-3.5-turbo-16k" => Ok("cl100k_base"), "gpt-3.5-turbo-16k-0613" => Ok("cl100k_base"), + "gpt-3.5-turbo-instruct" => Ok("clk100k_base") + "gpt-3.5-turbo-instruct-0914" => Ok("cl100k_base"), "gpt-4" => Ok("cl100k_base"), "gpt-4-0314" => Ok("cl100k_base"), "gpt-4-0613" => Ok("cl100k_base"), From 4cda3797817810b90a30716ed387ea3874c2af34 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Nov 2023 23:44:20 +0100 Subject: [PATCH 149/207] Add new encodings --- tiktoken/model_to_encoding.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index f40f5d97..2047e960 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -35,6 +35,7 @@ "gpt-35-turbo": "cl100k_base", "gpt-3.5-turbo-0301": "cl100k_base", "gpt-3.5-turbo-0613": "cl100k_base", + "gpt-3.5-turbo-1106": "cl100k_base", "gpt-3.5-turbo-16k": "cl100k_base", "gpt-3.5-turbo-16k-0613": "cl100k_base", "gpt-4": "cl100k_base", @@ -42,5 +43,7 @@ "gpt-4-0613": "cl100k_base", "gpt-4-32k": "cl100k_base", "gpt-4-32k-0314": "cl100k_base", - "gpt-4-32k-0613": "cl100k_base" + "gpt-4-32k-0613": "cl100k_base", + "gpt-4-1106-preview": "cl100k_base", + "gpt-4-vision-preview": "cl100k_base" } From 32882f7ef2014aefe8d8c00d123ca393a27815cd Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Nov 2023 23:49:49 +0100 Subject: [PATCH 150/207] Update WASM and JS to add new models --- js/src/core.ts | 6 ++++++ wasm/src/lib.rs | 14 +++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/js/src/core.ts b/js/src/core.ts index 6616e53c..f413ccfb 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -219,6 +219,7 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "code-davinci-002": case "cushman-codex": case "davinci-codex": + case "davinci-002": case "text-davinci-002": case "text-davinci-003": { return "p50k_base"; @@ -229,6 +230,7 @@ export function getEncodingNameForModel(model: TiktokenModel) { } case "ada": case "babbage": + case "babbage-002": case "code-search-ada-code-001": case "code-search-babbage-code-001": case "curie": @@ -258,6 +260,10 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "gpt-4-0613": case "gpt-4-0314": case "gpt-4": + case "gpt-3.5-turbo-1106": + case "gpt-35-turbo": + case "gpt-4-1106-preview": + case "gpt-4-vision-preview": case "text-embedding-ada-002": { return "cl100k_base"; } diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index ef8138f3..aefe246d 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -350,6 +350,8 @@ pub fn get_encoding(encoding: &str, extend_special_tokens: JsValue) -> Result Result { let encoding = match model { + "davinci" => Ok("p50k_base"), "text-davinci-003" => Ok("p50k_base"), "text-davinci-002" => Ok("p50k_base"), "text-davinci-001" => Ok("r50k_base"), @@ -416,6 +423,7 @@ pub fn encoding_for_model( "davinci" => Ok("r50k_base"), "curie" => Ok("r50k_base"), "babbage" => Ok("r50k_base"), + "babbage-002" => Ok("r50k_base"), "ada" => Ok("r50k_base"), "code-davinci-002" => Ok("p50k_base"), "code-davinci-001" => Ok("p50k_base"), @@ -448,6 +456,10 @@ pub fn encoding_for_model( "gpt-4-32k" => Ok("cl100k_base"), "gpt-4-32k-0314" => Ok("cl100k_base"), "gpt-4-32k-0613" => Ok("cl100k_base"), + "gpt-3.5-turbo-1106" => Ok("cl100k_base"), + "gpt-35-turbo" => Ok("cl100k_base"), + "gpt-4-1106-preview" => Ok("cl100k_base"), + "gpt-4-vision-preview" => Ok("cl100k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), From 10302a2d90848960f02caecf653bbbb0a2eb93a4 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Wed, 15 Nov 2023 23:57:20 +0100 Subject: [PATCH 151/207] Update examples --- js/examples/dynamic.ts | 2 +- js/examples/simple.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/js/examples/dynamic.ts b/js/examples/dynamic.ts index 5a1132e5..ade04cb9 100644 --- a/js/examples/dynamic.ts +++ b/js/examples/dynamic.ts @@ -29,7 +29,7 @@ async function encodingForModel( } async function main() { - const encodings = await encodingForModel("gpt-4"); + const encodings = await encodingForModel("gpt-4-1106-preview"); const text = "function foo() { return 1; }"; const tokens = encodings.encode(text); console.log(tokens); diff --git a/js/examples/simple.ts b/js/examples/simple.ts index 6f8eb48b..9f220d7e 100644 --- a/js/examples/simple.ts +++ b/js/examples/simple.ts @@ -1,5 +1,5 @@ import assert from "node:assert"; import { getEncoding } from "../dist"; -const enc = getEncoding("gpt2"); +const enc = getEncoding("cl100k_base"); assert(enc.decode(enc.encode("hello world")) === "hello world"); From bd3a360f1ea63c80c926841b47188befb08c22d0 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 16 Nov 2023 00:01:21 +0100 Subject: [PATCH 152/207] Add new changeset --- .changeset/thick-monkeys-wink.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changeset/thick-monkeys-wink.md diff --git a/.changeset/thick-monkeys-wink.md b/.changeset/thick-monkeys-wink.md new file mode 100644 index 00000000..c3159570 --- /dev/null +++ b/.changeset/thick-monkeys-wink.md @@ -0,0 +1,6 @@ +--- +"js-tiktoken": patch +"tiktoken": patch +--- + +Add new GPT models From 20d610f3a59f1168fe05428439229de1ffe7c825 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 15 Nov 2023 23:01:58 +0000 Subject: [PATCH 153/207] Version Packages --- .changeset/thick-monkeys-wink.md | 6 ------ js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/package.json | 2 +- 5 files changed, 14 insertions(+), 8 deletions(-) delete mode 100644 .changeset/thick-monkeys-wink.md diff --git a/.changeset/thick-monkeys-wink.md b/.changeset/thick-monkeys-wink.md deleted file mode 100644 index c3159570..00000000 --- a/.changeset/thick-monkeys-wink.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -"js-tiktoken": patch -"tiktoken": patch ---- - -Add new GPT models diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 0b9227d9..e99c368c 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.8 + +### Patch Changes + +- bd3a360: Add new GPT models + ## 1.0.7 ### Patch Changes diff --git a/js/package.json b/js/package.json index a0a26cfa..b53c1a90 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.7", + "version": "1.0.8", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index fb5e430f..68c57c6d 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.11 + +### Patch Changes + +- bd3a360: Add new GPT models + ## 1.0.10 ### Patch Changes diff --git a/wasm/package.json b/wasm/package.json index 63997bb8..05c3b4ca 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.10", + "version": "1.0.11", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From ff9c6377e832f074e7ac858055793541e23422a8 Mon Sep 17 00:00:00 2001 From: risu729 <79110363+risu729@users.noreply.github.com> Date: Fri, 26 Jan 2024 14:47:26 +0900 Subject: [PATCH 154/207] Add new models --- js/src/core.ts | 3 +++ tiktoken/model_to_encoding.json | 3 +++ wasm/src/lib.rs | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/js/src/core.ts b/js/src/core.ts index f413ccfb..7579400c 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -264,6 +264,9 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "gpt-35-turbo": case "gpt-4-1106-preview": case "gpt-4-vision-preview": + case "gpt-3.5-turbo-0125": + case "gpt-4-turbo-preview": + case "gpt-4-0125-preview": case "text-embedding-ada-002": { return "cl100k_base"; } diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 2047e960..336dcade 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -36,6 +36,7 @@ "gpt-3.5-turbo-0301": "cl100k_base", "gpt-3.5-turbo-0613": "cl100k_base", "gpt-3.5-turbo-1106": "cl100k_base", + "gpt-3.5-turbo-0125": "cl100k_base", "gpt-3.5-turbo-16k": "cl100k_base", "gpt-3.5-turbo-16k-0613": "cl100k_base", "gpt-4": "cl100k_base", @@ -44,6 +45,8 @@ "gpt-4-32k": "cl100k_base", "gpt-4-32k-0314": "cl100k_base", "gpt-4-32k-0613": "cl100k_base", + "gpt-4-turbo-preview": "cl100k_base", "gpt-4-1106-preview": "cl100k_base", + "gpt-4-0125-preview": "cl100k_base", "gpt-4-vision-preview": "cl100k_base" } diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index aefe246d..b9e9f28c 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -393,9 +393,12 @@ export type TiktokenModel = | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" + | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" + | "gpt-4-turbo-preview" | "gpt-4-1106-preview" + | "gpt-4-0125-preview" | "gpt-4-vision-preview" /** @@ -460,6 +463,9 @@ pub fn encoding_for_model( "gpt-35-turbo" => Ok("cl100k_base"), "gpt-4-1106-preview" => Ok("cl100k_base"), "gpt-4-vision-preview" => Ok("cl100k_base"), + "gpt-3.5-turbo-0125" => Ok("cl100k_base"), + "gpt-4-turbo-preview" => Ok("cl100k_base"), + "gpt-4-0125-preview" => Ok("cl100k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), From 221a30ee3de5814b0cc8380ce773c39f5dbd4a6f Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 27 Jan 2024 23:05:21 -0800 Subject: [PATCH 155/207] Fix build --- wasm/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index cdf0a415..61e6072e 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -453,7 +453,7 @@ pub fn encoding_for_model( "gpt-3.5-turbo-0613" => Ok("cl100k_base"), "gpt-3.5-turbo-16k" => Ok("cl100k_base"), "gpt-3.5-turbo-16k-0613" => Ok("cl100k_base"), - "gpt-3.5-turbo-instruct" => Ok("clk100k_base") + "gpt-3.5-turbo-instruct" => Ok("clk100k_base"), "gpt-3.5-turbo-instruct-0914" => Ok("cl100k_base"), "gpt-4" => Ok("cl100k_base"), "gpt-4-0314" => Ok("cl100k_base"), From 43e99b4c3c73a509dc4fc995e82ce4227f03e429 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 27 Jan 2024 23:10:11 -0800 Subject: [PATCH 156/207] Add support for new models, instruct models --- .changeset/ninety-cheetahs-tell.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changeset/ninety-cheetahs-tell.md diff --git a/.changeset/ninety-cheetahs-tell.md b/.changeset/ninety-cheetahs-tell.md new file mode 100644 index 00000000..f93de43f --- /dev/null +++ b/.changeset/ninety-cheetahs-tell.md @@ -0,0 +1,6 @@ +--- +"js-tiktoken": patch +"tiktoken": patch +--- + +Add support for new models, instruct models From 708f77c0a54f436b1016121ed4073498448d2e7d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 28 Jan 2024 07:10:49 +0000 Subject: [PATCH 157/207] Version Packages --- .changeset/ninety-cheetahs-tell.md | 6 ------ js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/package.json | 2 +- 5 files changed, 14 insertions(+), 8 deletions(-) delete mode 100644 .changeset/ninety-cheetahs-tell.md diff --git a/.changeset/ninety-cheetahs-tell.md b/.changeset/ninety-cheetahs-tell.md deleted file mode 100644 index f93de43f..00000000 --- a/.changeset/ninety-cheetahs-tell.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -"js-tiktoken": patch -"tiktoken": patch ---- - -Add support for new models, instruct models diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index e99c368c..efa3f825 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.9 + +### Patch Changes + +- 43e99b4: Add support for new models, instruct models + ## 1.0.8 ### Patch Changes diff --git a/js/package.json b/js/package.json index b53c1a90..9151d7e7 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.8", + "version": "1.0.9", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index 68c57c6d..36e882aa 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.12 + +### Patch Changes + +- 43e99b4: Add support for new models, instruct models + ## 1.0.11 ### Patch Changes diff --git a/wasm/package.json b/wasm/package.json index 05c3b4ca..c8e258b4 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.11", + "version": "1.0.12", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From de6d1c772ed750bdac2c425ae082183e8f9b570b Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 27 Jan 2024 23:25:58 -0800 Subject: [PATCH 158/207] Fix invalid model --- tiktoken/model_to_encoding.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 155abbec..1001fec8 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -39,8 +39,8 @@ "gpt-3.5-turbo-0125": "cl100k_base", "gpt-3.5-turbo-16k": "cl100k_base", "gpt-3.5-turbo-16k-0613": "cl100k_base", - "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-instruct", - "gpt-3.5-turbo-instruct-0914": "gpt-3.5-turbo-instruct-0914", + "gpt-3.5-turbo-instruct": "cl100k_base", + "gpt-3.5-turbo-instruct-0914": "cl100k_base", "gpt-4": "cl100k_base", "gpt-4-0314": "cl100k_base", "gpt-4-0613": "cl100k_base", From 4491997c6d7a6f5d38331ed7d99a755350a1e76b Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 27 Jan 2024 23:35:35 -0800 Subject: [PATCH 159/207] Backport changes to @dqbd/tiktoken --- package.json | 1 + wasm/alias/package.json | 12 ++++++++++++ wasm/package.json | 3 ++- wasm/scripts/alias.ts | 20 ++++++++++++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 wasm/alias/package.json create mode 100644 wasm/scripts/alias.ts diff --git a/package.json b/package.json index 8f5d61b3..0b53e526 100644 --- a/package.json +++ b/package.json @@ -3,6 +3,7 @@ "workspaces": [ "js", "wasm", + "wasm/alias", "static" ], "scripts": { diff --git a/wasm/alias/package.json b/wasm/alias/package.json new file mode 100644 index 00000000..6a9e139a --- /dev/null +++ b/wasm/alias/package.json @@ -0,0 +1,12 @@ +{ + "name": "@dqbd/tiktoken", + "version": "1.0.12", + "repository": { + "type": "git", + "url": "https://github.com/dqbd/tiktoken" + }, + "publishConfig": { + "directory": "./dist", + "access": "public" + } +} diff --git a/wasm/package.json b/wasm/package.json index c8e258b4..9bec7956 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -5,9 +5,10 @@ "license": "MIT", "scripts": { "build": "run-s build:*", - "build:cleanup": "rm -rf dist/", + "build:cleanup": "rm -rf dist/ && rm -rf alias/dist/", "build:wasm": "run-s wasm:*", "build:postprocess": "tsx scripts/postprocess.ts", + "build:alias": "tsx scripts/alias.ts", "wasm:bundler": "wasm-pack build --target bundler --weak-refs --release --out-dir dist && rm -rf dist/.gitignore dist/README.md dist/package.json", "wasm:lite": "wasm-pack build --target bundler --weak-refs --release --out-dir dist/lite --no-default-features && rm -rf dist/lite/.gitignore dist/lite/README.md dist/lite/package.json", "test": "yarn vitest run" diff --git a/wasm/scripts/alias.ts b/wasm/scripts/alias.ts new file mode 100644 index 00000000..ece99ce6 --- /dev/null +++ b/wasm/scripts/alias.ts @@ -0,0 +1,20 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +async function createAlias() { + const srcDist = path.resolve(__dirname, "../dist"); + const targetDist = path.resolve(__dirname, "../alias/dist"); + + await fs.cp(srcDist, targetDist, { recursive: true }); + + const pkgPath = path.resolve(srcDist, "package.json"); + + const pkg = JSON.parse(await fs.readFile(pkgPath, { encoding: "utf-8" })); + pkg["name"] = "@dqbd/tiktoken"; + + await fs.writeFile(pkgPath, JSON.stringify(pkg, null, 2), { + encoding: "utf-8", + }); +} + +createAlias(); From e3fcb126b2dff2d1e9bd6ddb88fa923f87540150 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 27 Jan 2024 23:38:39 -0800 Subject: [PATCH 160/207] Fix typings for wasm --- wasm/src/lib.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 61e6072e..80f005c8 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -382,12 +382,6 @@ export type TiktokenModel = | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt2" - | "gpt-4" - | "gpt-4-0314" - | "gpt-4-0613" - | "gpt-4-32k" - | "gpt-4-32k-0314" - | "gpt-4-32k-0613" | "gpt-3.5-turbo" | "gpt-35-turbo" | "gpt-3.5-turbo-0301" @@ -396,6 +390,14 @@ export type TiktokenModel = | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" + | "gpt-3.5-turbo-instruct" + | "gpt-3.5-turbo-instruct-0914" + | "gpt-4" + | "gpt-4-0314" + | "gpt-4-0613" + | "gpt-4-32k" + | "gpt-4-32k-0314" + | "gpt-4-32k-0613" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" From f39912b9d919d2b969a6dcc155570a6e67e64028 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 27 Jan 2024 23:42:42 -0800 Subject: [PATCH 161/207] Fix typo --- wasm/scripts/alias.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm/scripts/alias.ts b/wasm/scripts/alias.ts index ece99ce6..c62e9343 100644 --- a/wasm/scripts/alias.ts +++ b/wasm/scripts/alias.ts @@ -7,7 +7,7 @@ async function createAlias() { await fs.cp(srcDist, targetDist, { recursive: true }); - const pkgPath = path.resolve(srcDist, "package.json"); + const pkgPath = path.resolve(targetDist, "package.json"); const pkg = JSON.parse(await fs.readFile(pkgPath, { encoding: "utf-8" })); pkg["name"] = "@dqbd/tiktoken"; From d54f04c46a298ffb46ad3ae8ee03f9110208529e Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 27 Jan 2024 23:47:50 -0800 Subject: [PATCH 162/207] Fix invalid model when requesting an instruct model --- .changeset/mighty-carpets-smoke.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/mighty-carpets-smoke.md diff --git a/.changeset/mighty-carpets-smoke.md b/.changeset/mighty-carpets-smoke.md new file mode 100644 index 00000000..4cef9b77 --- /dev/null +++ b/.changeset/mighty-carpets-smoke.md @@ -0,0 +1,7 @@ +--- +"js-tiktoken": patch +"tiktoken": patch +"@dqbd/tiktoken": patch +--- + +Fix invalid model when requesting an instruct model From f6b28196038a0114ff4d14c30472da78fb9542d2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 28 Jan 2024 07:48:23 +0000 Subject: [PATCH 163/207] Version Packages --- .changeset/mighty-carpets-smoke.md | 7 ------- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/alias/CHANGELOG.md | 7 +++++++ wasm/alias/package.json | 2 +- wasm/package.json | 2 +- 7 files changed, 22 insertions(+), 10 deletions(-) delete mode 100644 .changeset/mighty-carpets-smoke.md create mode 100644 wasm/alias/CHANGELOG.md diff --git a/.changeset/mighty-carpets-smoke.md b/.changeset/mighty-carpets-smoke.md deleted file mode 100644 index 4cef9b77..00000000 --- a/.changeset/mighty-carpets-smoke.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -"js-tiktoken": patch -"tiktoken": patch -"@dqbd/tiktoken": patch ---- - -Fix invalid model when requesting an instruct model diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index efa3f825..6bd469d1 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.10 + +### Patch Changes + +- d54f04c: Fix invalid model when requesting an instruct model + ## 1.0.9 ### Patch Changes diff --git a/js/package.json b/js/package.json index 9151d7e7..377ae827 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.9", + "version": "1.0.10", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index 36e882aa..a725d68f 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.13 + +### Patch Changes + +- d54f04c: Fix invalid model when requesting an instruct model + ## 1.0.12 ### Patch Changes diff --git a/wasm/alias/CHANGELOG.md b/wasm/alias/CHANGELOG.md new file mode 100644 index 00000000..e3589a90 --- /dev/null +++ b/wasm/alias/CHANGELOG.md @@ -0,0 +1,7 @@ +# @dqbd/tiktoken + +## 1.0.13 + +### Patch Changes + +- d54f04c: Fix invalid model when requesting an instruct model diff --git a/wasm/alias/package.json b/wasm/alias/package.json index 6a9e139a..9a2719a3 100644 --- a/wasm/alias/package.json +++ b/wasm/alias/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.12", + "version": "1.0.13", "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/wasm/package.json b/wasm/package.json index 9bec7956..b3585829 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.12", + "version": "1.0.13", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From 110eef4f6830f4f31e0f9810c8f9b3ef3175a5b4 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 28 Jan 2024 00:02:00 -0800 Subject: [PATCH 164/207] Update development README.md Closes #54 --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 537d86d5..24d834bc 100644 --- a/README.md +++ b/README.md @@ -312,6 +312,14 @@ module.exports = { }; ``` +## Development + +To build the `tiktoken` library, make sure to have: +- Rust and [`wasm-pack`](https://github.com/rustwasm/wasm-pack) installed. +- Node.js 18+ is required to build the JS bindings and fetch the latest encoder ranks via `fetch`. + +Install all the dev-dependencies with `yarn install` and build both WASM binary and JS bindings with `yarn build`. + ## Acknowledgements - https://github.com/zurawiki/tiktoken-rs From 37682318b564cb12232333d42ec93c638069ed68 Mon Sep 17 00:00:00 2001 From: Akyas Naushad <10742193+akynau@users.noreply.github.com> Date: Wed, 10 Apr 2024 17:53:32 +0100 Subject: [PATCH 165/207] Add GPT-4 Turbo --- js/src/core.ts | 2 ++ tiktoken/model_to_encoding.json | 2 ++ wasm/src/lib.rs | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/js/src/core.ts b/js/src/core.ts index d6341526..63e261ef 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -267,6 +267,8 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "gpt-4-1106-preview": case "gpt-4-vision-preview": case "gpt-3.5-turbo-0125": + case "gpt-4-turbo": + case "gpt-4-turbo-2024-04-09": case "gpt-4-turbo-preview": case "gpt-4-0125-preview": case "text-embedding-ada-002": { diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 1001fec8..f527ffbf 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -47,6 +47,8 @@ "gpt-4-32k": "cl100k_base", "gpt-4-32k-0314": "cl100k_base", "gpt-4-32k-0613": "cl100k_base", + "gpt-4-turbo": "cl100k_base", + "gpt-4-turbo-2024-04-09": "cl100k_base", "gpt-4-turbo-preview": "cl100k_base", "gpt-4-1106-preview": "cl100k_base", "gpt-4-0125-preview": "cl100k_base", diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 80f005c8..85630909 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -398,6 +398,8 @@ export type TiktokenModel = | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" + | "gpt-4-turbo" + | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" @@ -468,6 +470,8 @@ pub fn encoding_for_model( "gpt-4-1106-preview" => Ok("cl100k_base"), "gpt-4-vision-preview" => Ok("cl100k_base"), "gpt-3.5-turbo-0125" => Ok("cl100k_base"), + "gpt-4-turbo" => Ok("cl100k_base"), + "gpt-4-turbo-2024-04-09" => Ok("cl100k_base"), "gpt-4-turbo-preview" => Ok("cl100k_base"), "gpt-4-0125-preview" => Ok("cl100k_base"), model => Err(JsError::new( From f386b281ca4be40c825e2830b25d81db3f9820af Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sat, 13 Apr 2024 00:58:08 +0200 Subject: [PATCH 166/207] Add changelog --- .changeset/purple-jokes-invent.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/purple-jokes-invent.md diff --git a/.changeset/purple-jokes-invent.md b/.changeset/purple-jokes-invent.md new file mode 100644 index 00000000..ae00e0c6 --- /dev/null +++ b/.changeset/purple-jokes-invent.md @@ -0,0 +1,7 @@ +--- +"js-tiktoken": patch +"tiktoken": patch +"@dqbd/tiktoken": patch +--- + +Add new GPT-4-turbo models From f6165e7c01005331b3e84272b57bd8d3f5ebc3bc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 12 Apr 2024 22:58:38 +0000 Subject: [PATCH 167/207] Version Packages --- .changeset/purple-jokes-invent.md | 7 ------- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/alias/CHANGELOG.md | 6 ++++++ wasm/alias/package.json | 2 +- wasm/package.json | 2 +- 7 files changed, 21 insertions(+), 10 deletions(-) delete mode 100644 .changeset/purple-jokes-invent.md diff --git a/.changeset/purple-jokes-invent.md b/.changeset/purple-jokes-invent.md deleted file mode 100644 index ae00e0c6..00000000 --- a/.changeset/purple-jokes-invent.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -"js-tiktoken": patch -"tiktoken": patch -"@dqbd/tiktoken": patch ---- - -Add new GPT-4-turbo models diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 6bd469d1..3510543e 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.11 + +### Patch Changes + +- f386b28: Add new GPT-4-turbo models + ## 1.0.10 ### Patch Changes diff --git a/js/package.json b/js/package.json index 377ae827..473e8399 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.10", + "version": "1.0.11", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index a725d68f..f112a972 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.14 + +### Patch Changes + +- f386b28: Add new GPT-4-turbo models + ## 1.0.13 ### Patch Changes diff --git a/wasm/alias/CHANGELOG.md b/wasm/alias/CHANGELOG.md index e3589a90..70a9ee6a 100644 --- a/wasm/alias/CHANGELOG.md +++ b/wasm/alias/CHANGELOG.md @@ -1,5 +1,11 @@ # @dqbd/tiktoken +## 1.0.14 + +### Patch Changes + +- f386b28: Add new GPT-4-turbo models + ## 1.0.13 ### Patch Changes diff --git a/wasm/alias/package.json b/wasm/alias/package.json index 9a2719a3..1b33bb45 100644 --- a/wasm/alias/package.json +++ b/wasm/alias/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.13", + "version": "1.0.14", "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/wasm/package.json b/wasm/package.json index b3585829..89b9d568 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.13", + "version": "1.0.14", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From f2e1ac26a7ef08ed7f95ca36837909edc705b268 Mon Sep 17 00:00:00 2001 From: Phil Kallos Date: Mon, 13 May 2024 11:23:37 -0700 Subject: [PATCH 168/207] Add support for GPT-4-O, "Omni" model --- js/src/core.ts | 4 ++++ js/src/index.ts | 3 +++ scripts/ranks.ts | 4 ++-- tiktoken/model_to_encoding.json | 4 +++- tiktoken/registry.json | 10 +++++++++- wasm/src/lib.rs | 20 +++++++++++++++++++- wasm/test/test_simple_public.test.ts | 22 ++++++++++++++++++++++ 7 files changed, 62 insertions(+), 5 deletions(-) diff --git a/js/src/core.ts b/js/src/core.ts index 63e261ef..556eab19 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -274,6 +274,10 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "text-embedding-ada-002": { return "cl100k_base"; } + case "gpt-4o": + case "gpt-4o-2024-05-13": { + return "o200k_base"; + } default: never(model); throw new Error("Unknown model"); diff --git a/js/src/index.ts b/js/src/index.ts index e27ceada..2c2dd3e9 100644 --- a/js/src/index.ts +++ b/js/src/index.ts @@ -4,6 +4,7 @@ import p50k_base from "./ranks/p50k_base"; import p50k_edit from "./ranks/p50k_edit"; import r50k_base from "./ranks/r50k_base"; import cl100k_base from "./ranks/cl100k_base"; +import o200k_base from "./ranks/o200k_base"; import { Tiktoken, getEncodingNameForModel } from "./core"; import { never } from "./utils"; @@ -23,6 +24,8 @@ export function getEncoding( return new Tiktoken(p50k_edit, extendSpecialTokens); case "cl100k_base": return new Tiktoken(cl100k_base, extendSpecialTokens); + case "o200k_base": + return new Tiktoken(o200k_base, extendSpecialTokens); default: never(encoding); throw new Error("Unknown encoding"); diff --git a/scripts/ranks.ts b/scripts/ranks.ts index 7cc89b5d..a8d9899c 100644 --- a/scripts/ranks.ts +++ b/scripts/ranks.ts @@ -264,8 +264,8 @@ async function main() { const bpe = await downloadBpe(data); if (lib === "js") { - bpe.pat_str = bpe.pat_str.replace( - /\(\?i:(.*?)\)/, + bpe.pat_str = bpe.pat_str.replaceAll( + /\(\?i:(.*?)\)/g, (_, match: string) => `(${match .split("|") diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index f527ffbf..e9bfb424 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -52,5 +52,7 @@ "gpt-4-turbo-preview": "cl100k_base", "gpt-4-1106-preview": "cl100k_base", "gpt-4-0125-preview": "cl100k_base", - "gpt-4-vision-preview": "cl100k_base" + "gpt-4-vision-preview": "cl100k_base", + "gpt-4o": "o200k_base", + "gpt-4o-2024-05-13": "o200k_base" } diff --git a/tiktoken/registry.json b/tiktoken/registry.json index aa3ee530..83cdbd9f 100644 --- a/tiktoken/registry.json +++ b/tiktoken/registry.json @@ -46,5 +46,13 @@ "<|endofprompt|>": 100276 }, "pat_str": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "o200k_base": { + "load_tiktoken_bpe": "https://openaipublic.blob.core.windows.net/encodings/o200k_base.tiktoken", + "special_tokens": { + "<|endoftext|>": 199999, + "<|endofprompt|>": 200018 + }, + "pat_str": "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" } -} \ No newline at end of file +} diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 85630909..d0c1b4e0 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -133,6 +133,19 @@ impl CoreBPEConstructor { include_str!("./ranks/cl100k_base.regex.tiktoken"), ) } + + #[cfg(feature = "inline")] + fn o200k_base() -> Self { + let mut special_tokens = HashMap::default(); + special_tokens.insert(String::from(ENDOFTEXT), 199999); + special_tokens.insert(String::from(ENDOFPROMPT), 200018); + + CoreBPEConstructor::new( + include_str!("./ranks/o200k_base.compress.tiktoken"), + Some(special_tokens), + include_str!("./ranks/o200k_base.regex.tiktoken"), + ) + } } #[wasm_bindgen] @@ -179,6 +192,7 @@ impl Tiktoken { "p50k_base" => Ok(CoreBPEConstructor::p50k_base()), "p50k_edit" => Ok(CoreBPEConstructor::p50k_edit()), "cl100k_base" => Ok(CoreBPEConstructor::cl100k_base()), + "o200k_base" => Ok(CoreBPEConstructor::o200k_base()), &_ => Err(JsError::new("Invalid encoding")), }?; @@ -325,7 +339,7 @@ impl Tiktoken { #[cfg(feature = "inline")] #[wasm_bindgen(typescript_custom_section)] const _: &'static str = r#" -export type TiktokenEncoding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base"; +export type TiktokenEncoding = "gpt2" | "r50k_base" | "p50k_base" | "p50k_edit" | "cl100k_base" | "o200k_base"; /** * @param {TiktokenEncoding} encoding @@ -404,6 +418,8 @@ export type TiktokenModel = | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" + | "gpt-4o" + | "gpt-4o-2024-05-13" /** * @param {TiktokenModel} encoding @@ -474,6 +490,8 @@ pub fn encoding_for_model( "gpt-4-turbo-2024-04-09" => Ok("cl100k_base"), "gpt-4-turbo-preview" => Ok("cl100k_base"), "gpt-4-0125-preview" => Ok("cl100k_base"), + "gpt-4o" => Ok("o200k_base"), + "gpt-4o-2024-05-13" => Ok("o200k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), diff --git a/wasm/test/test_simple_public.test.ts b/wasm/test/test_simple_public.test.ts index e74a539f..56839f4a 100644 --- a/wasm/test/test_simple_public.test.ts +++ b/wasm/test/test_simple_public.test.ts @@ -59,6 +59,28 @@ describe("cl100k_base", () => { }); }); +describe("o200k_base", () => { + const enc = get_encoding("o200k_base"); + + it("encodes hello world string", () => { + expect(enc.encode("hello world")).toStrictEqual( + new Uint32Array([24912, 2375]) + ); + }); + + it("decodes hello world string", () => { + expect( + new TextDecoder().decode(enc.decode(new Uint32Array([24912, 2375]))) + ).toStrictEqual("hello world"); + }); + + it("encodes hello world string, all allowed special characters", () => { + expect(enc.encode("hello <|endoftext|>", "all")).toStrictEqual( + new Uint32Array([24912, 220, 199999]) + ); + }); +}); + it("test_simple", () => { const encodings = [ "gpt2", From ed9cc4f1caaae8b32ad149a91739b7d0d2caf40e Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 13 May 2024 23:57:04 +0200 Subject: [PATCH 169/207] fix(js): specify exports in js-tiktoken --- js/package.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/js/package.json b/js/package.json index 473e8399..647e1b7f 100644 --- a/js/package.json +++ b/js/package.json @@ -52,6 +52,11 @@ "types": "./dist/ranks/r50k_base.d.ts", "require": "./dist/ranks/r50k_base.cjs", "default": "./dist/ranks/r50k_base.js" + }, + "./ranks/o200k_base": { + "types": "./dist/ranks/o200k_base.d.ts", + "require": "./dist/ranks/o200k_base.cjs", + "default": "./dist/ranks/o200k_base.js" } }, "repository": { From e228b479efebcc49dbf02a400216371747bc654a Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Mon, 13 May 2024 23:58:21 +0200 Subject: [PATCH 170/207] Add entry to tsup --- js/package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index 647e1b7f..20bc0ce4 100644 --- a/js/package.json +++ b/js/package.json @@ -80,7 +80,8 @@ "src/ranks/gpt2.ts", "src/ranks/p50k_base.ts", "src/ranks/p50k_edit.ts", - "src/ranks/r50k_base.ts" + "src/ranks/r50k_base.ts", + "src/ranks/o200k_base.ts" ], "format": [ "cjs", From 791cf62151ff86cc50fee27b41b32a4cb8db25f9 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Tue, 14 May 2024 00:12:43 +0200 Subject: [PATCH 171/207] Add changeset --- .changeset/twelve-chairs-rhyme.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/twelve-chairs-rhyme.md diff --git a/.changeset/twelve-chairs-rhyme.md b/.changeset/twelve-chairs-rhyme.md new file mode 100644 index 00000000..d6e8f2b2 --- /dev/null +++ b/.changeset/twelve-chairs-rhyme.md @@ -0,0 +1,7 @@ +--- +"js-tiktoken": patch +"tiktoken": patch +"@dqbd/tiktoken": patch +--- + +Add gpt-4o support From 50f70ddb6cfa535bce18f05d9d59059474092659 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 13 May 2024 22:13:22 +0000 Subject: [PATCH 172/207] Version Packages --- .changeset/twelve-chairs-rhyme.md | 7 ------- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/alias/CHANGELOG.md | 6 ++++++ wasm/alias/package.json | 2 +- wasm/package.json | 2 +- 7 files changed, 21 insertions(+), 10 deletions(-) delete mode 100644 .changeset/twelve-chairs-rhyme.md diff --git a/.changeset/twelve-chairs-rhyme.md b/.changeset/twelve-chairs-rhyme.md deleted file mode 100644 index d6e8f2b2..00000000 --- a/.changeset/twelve-chairs-rhyme.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -"js-tiktoken": patch -"tiktoken": patch -"@dqbd/tiktoken": patch ---- - -Add gpt-4o support diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 3510543e..fc8accd5 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.12 + +### Patch Changes + +- 791cf62: Add gpt-4o support + ## 1.0.11 ### Patch Changes diff --git a/js/package.json b/js/package.json index 20bc0ce4..34c82b2b 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.11", + "version": "1.0.12", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index f112a972..78810840 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.15 + +### Patch Changes + +- 791cf62: Add gpt-4o support + ## 1.0.14 ### Patch Changes diff --git a/wasm/alias/CHANGELOG.md b/wasm/alias/CHANGELOG.md index 70a9ee6a..021de7e4 100644 --- a/wasm/alias/CHANGELOG.md +++ b/wasm/alias/CHANGELOG.md @@ -1,5 +1,11 @@ # @dqbd/tiktoken +## 1.0.15 + +### Patch Changes + +- 791cf62: Add gpt-4o support + ## 1.0.14 ### Patch Changes diff --git a/wasm/alias/package.json b/wasm/alias/package.json index 1b33bb45..f1c292ef 100644 --- a/wasm/alias/package.json +++ b/wasm/alias/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.14", + "version": "1.0.15", "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/wasm/package.json b/wasm/package.json index 89b9d568..59815fca 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.14", + "version": "1.0.15", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From 28ae6b3d2425196a385214634f7f0035a46f3873 Mon Sep 17 00:00:00 2001 From: mbarretol <63501911+mbarretol@users.noreply.github.com> Date: Fri, 19 Jul 2024 00:09:14 +0200 Subject: [PATCH 173/207] add support for gpt-4o-mini model --- js/src/core.ts | 3 ++- tiktoken/model_to_encoding.json | 3 ++- wasm/src/lib.rs | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/js/src/core.ts b/js/src/core.ts index 556eab19..b8924475 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -275,7 +275,8 @@ export function getEncodingNameForModel(model: TiktokenModel) { return "cl100k_base"; } case "gpt-4o": - case "gpt-4o-2024-05-13": { + case "gpt-4o-2024-05-13": + case "gpt-4o-mini": { return "o200k_base"; } default: diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index e9bfb424..cece4c7f 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -54,5 +54,6 @@ "gpt-4-0125-preview": "cl100k_base", "gpt-4-vision-preview": "cl100k_base", "gpt-4o": "o200k_base", - "gpt-4o-2024-05-13": "o200k_base" + "gpt-4o-2024-05-13": "o200k_base", + "gpt-4o-mini": "o200k_base" } diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index d0c1b4e0..6a87d7a7 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -420,6 +420,7 @@ export type TiktokenModel = | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" + | "gpt-4o-mini" /** * @param {TiktokenModel} encoding @@ -492,6 +493,7 @@ pub fn encoding_for_model( "gpt-4-0125-preview" => Ok("cl100k_base"), "gpt-4o" => Ok("o200k_base"), "gpt-4o-2024-05-13" => Ok("o200k_base"), + "gpt-4o-mini" => Ok("o200k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), From 25bb43aeda501857e1a1bdf82400d585fcaabbe7 Mon Sep 17 00:00:00 2001 From: Marco Barreto <63501911+mbarretol@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:15:29 +0200 Subject: [PATCH 174/207] Update js/src/core.ts Co-authored-by: Luigi Pederzani --- js/src/core.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/js/src/core.ts b/js/src/core.ts index b8924475..5b87c3f1 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -276,6 +276,7 @@ export function getEncodingNameForModel(model: TiktokenModel) { } case "gpt-4o": case "gpt-4o-2024-05-13": + case "gpt-4o-mini-2024-07-18": case "gpt-4o-mini": { return "o200k_base"; } From 396a8d0c5d55b0af213211d7133281f90ee54bca Mon Sep 17 00:00:00 2001 From: Marco Barreto <63501911+mbarretol@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:15:37 +0200 Subject: [PATCH 175/207] Update tiktoken/model_to_encoding.json Co-authored-by: Luigi Pederzani --- tiktoken/model_to_encoding.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index cece4c7f..559e2ce3 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -55,5 +55,6 @@ "gpt-4-vision-preview": "cl100k_base", "gpt-4o": "o200k_base", "gpt-4o-2024-05-13": "o200k_base", + "gpt-4o-mini-2024-07-18": "o200k_base", "gpt-4o-mini": "o200k_base" } From be5db3488dc56477343c2c1d24a7cd4bd4ed43e1 Mon Sep 17 00:00:00 2001 From: Marco Barreto <63501911+mbarretol@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:15:46 +0200 Subject: [PATCH 176/207] Update wasm/src/lib.rs Co-authored-by: Luigi Pederzani --- wasm/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 6a87d7a7..c9aea2d2 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -493,6 +493,7 @@ pub fn encoding_for_model( "gpt-4-0125-preview" => Ok("cl100k_base"), "gpt-4o" => Ok("o200k_base"), "gpt-4o-2024-05-13" => Ok("o200k_base"), + "gpt-4o-mini-2024-07-18" => Ok("o200k_base"), "gpt-4o-mini" => Ok("o200k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), From f7e3df106ffc8e99682714ea9f7decdfada0463d Mon Sep 17 00:00:00 2001 From: Marco Barreto <63501911+mbarretol@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:15:53 +0200 Subject: [PATCH 177/207] Update wasm/src/lib.rs Co-authored-by: Luigi Pederzani --- wasm/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index c9aea2d2..3d8dfc8a 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -420,6 +420,7 @@ export type TiktokenModel = | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" + | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" /** From de6a05200d66f8c6f8d5f6c0ab766973e7020a27 Mon Sep 17 00:00:00 2001 From: Marco Barreto <63501911+mbarretol@users.noreply.github.com> Date: Sat, 10 Aug 2024 11:32:42 +0200 Subject: [PATCH 178/207] Update js/src/core.ts Co-authored-by: huytool157 <97813378+huytool157@users.noreply.github.com> --- js/src/core.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/js/src/core.ts b/js/src/core.ts index 5b87c3f1..e968f54e 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -276,6 +276,7 @@ export function getEncodingNameForModel(model: TiktokenModel) { } case "gpt-4o": case "gpt-4o-2024-05-13": + case "gpt-4o-2024-08-06": case "gpt-4o-mini-2024-07-18": case "gpt-4o-mini": { return "o200k_base"; From fb1ca7bf32de0ed0017e725f01256f2a756a3f1f Mon Sep 17 00:00:00 2001 From: Marco Barreto <63501911+mbarretol@users.noreply.github.com> Date: Sat, 10 Aug 2024 11:33:03 +0200 Subject: [PATCH 179/207] Update tiktoken/model_to_encoding.json Co-authored-by: huytool157 <97813378+huytool157@users.noreply.github.com> --- tiktoken/model_to_encoding.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 559e2ce3..6b14ad10 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -55,6 +55,7 @@ "gpt-4-vision-preview": "cl100k_base", "gpt-4o": "o200k_base", "gpt-4o-2024-05-13": "o200k_base", + "gpt-4o-2024-08-06":"o200k_base", "gpt-4o-mini-2024-07-18": "o200k_base", "gpt-4o-mini": "o200k_base" } From 26ec6cc78cf9cefe4f3dbcac1c291f5017637ba6 Mon Sep 17 00:00:00 2001 From: Marco Barreto <63501911+mbarretol@users.noreply.github.com> Date: Sat, 10 Aug 2024 11:33:12 +0200 Subject: [PATCH 180/207] Update wasm/src/lib.rs Co-authored-by: huytool157 <97813378+huytool157@users.noreply.github.com> --- wasm/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 3d8dfc8a..b070ba4c 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -420,6 +420,7 @@ export type TiktokenModel = | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" + | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" From a32f0a4c53a38039d4d85c20141feac57860e27d Mon Sep 17 00:00:00 2001 From: Marco Barreto <63501911+mbarretol@users.noreply.github.com> Date: Sat, 10 Aug 2024 11:33:19 +0200 Subject: [PATCH 181/207] Update wasm/src/lib.rs Co-authored-by: huytool157 <97813378+huytool157@users.noreply.github.com> --- wasm/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index b070ba4c..638798cc 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -495,6 +495,7 @@ pub fn encoding_for_model( "gpt-4-0125-preview" => Ok("cl100k_base"), "gpt-4o" => Ok("o200k_base"), "gpt-4o-2024-05-13" => Ok("o200k_base"), + "gpt-4o-2024-08-06" => Ok("o200k_base"), "gpt-4o-mini-2024-07-18" => Ok("o200k_base"), "gpt-4o-mini" => Ok("o200k_base"), model => Err(JsError::new( From bba4ed6f4b8038fd5c5b9013f648d47ee9ec0fbc Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 16 Aug 2024 00:53:13 +0200 Subject: [PATCH 182/207] Fix unreachable code --- wasm/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 638798cc..c7331f03 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -439,7 +439,6 @@ pub fn encoding_for_model( extend_special_tokens: JsValue, ) -> Result { let encoding = match model { - "davinci" => Ok("p50k_base"), "text-davinci-003" => Ok("p50k_base"), "text-davinci-002" => Ok("p50k_base"), "text-davinci-001" => Ok("r50k_base"), From 4796f27ec4a37f0c7501add29a623386022199ff Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 16 Aug 2024 00:55:11 +0200 Subject: [PATCH 183/207] Add changeset --- .changeset/five-queens-tan.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/five-queens-tan.md diff --git a/.changeset/five-queens-tan.md b/.changeset/five-queens-tan.md new file mode 100644 index 00000000..0140f47a --- /dev/null +++ b/.changeset/five-queens-tan.md @@ -0,0 +1,7 @@ +--- +"tiktoken": patch +"js-tiktoken": patch +"@dqbd/tiktoken": patch +--- + +Add support for gpt-4o and gpt-4o-miniy From 9d385cb0e7f8465d88fc7ecd2e3b1b0abb5b710d Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 16 Aug 2024 00:59:10 +0200 Subject: [PATCH 184/207] Add text-embedding-3-small and text-embedding-3-large Closes #109 --- js/src/core.ts | 4 +++- tiktoken/model_to_encoding.json | 2 ++ wasm/src/lib.rs | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/js/src/core.ts b/js/src/core.ts index e968f54e..7aee83fc 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -271,7 +271,9 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "gpt-4-turbo-2024-04-09": case "gpt-4-turbo-preview": case "gpt-4-0125-preview": - case "text-embedding-ada-002": { + case "text-embedding-ada-002": + case "text-embedding-3-small": + case "text-embedding-3-large": { return "cl100k_base"; } case "gpt-4o": diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 6b14ad10..e7890e2a 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -20,6 +20,8 @@ "text-davinci-edit-001": "p50k_edit", "code-davinci-edit-001": "p50k_edit", "text-embedding-ada-002": "cl100k_base", + "text-embedding-3-small": "cl100k_base", + "text-embedding-3-large": "cl100k_base", "text-similarity-davinci-001": "r50k_base", "text-similarity-curie-001": "r50k_base", "text-similarity-babbage-001": "r50k_base", diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index c7331f03..72cb06b1 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -459,6 +459,8 @@ pub fn encoding_for_model( "text-davinci-edit-001" => Ok("p50k_edit"), "code-davinci-edit-001" => Ok("p50k_edit"), "text-embedding-ada-002" => Ok("cl100k_base"), + "text-embedding-3-small" => Ok("cl100k_base"), + "text-embedding-3-large" => Ok("cl100k_base"), "text-similarity-davinci-001" => Ok("r50k_base"), "text-similarity-curie-001" => Ok("r50k_base"), "text-similarity-babbage-001" => Ok("r50k_base"), From 831a7e8693fc1da4c7cbed36bfedda9e8c3d4775 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 16 Aug 2024 00:59:18 +0200 Subject: [PATCH 185/207] Add changeset --- .changeset/five-queens-tan.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/five-queens-tan.md b/.changeset/five-queens-tan.md index 0140f47a..5be52afe 100644 --- a/.changeset/five-queens-tan.md +++ b/.changeset/five-queens-tan.md @@ -4,4 +4,4 @@ "@dqbd/tiktoken": patch --- -Add support for gpt-4o and gpt-4o-miniy +Add support for gpt-4o, gpt-4o-mini, text-embedding-3-small and text-embedding-3-large From ed2e7857aa09f5db73f7b771cc1543c7e05d4b52 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 15 Aug 2024 23:04:34 +0000 Subject: [PATCH 186/207] Version Packages --- .changeset/five-queens-tan.md | 7 ------- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/alias/CHANGELOG.md | 6 ++++++ wasm/alias/package.json | 2 +- wasm/package.json | 2 +- 7 files changed, 21 insertions(+), 10 deletions(-) delete mode 100644 .changeset/five-queens-tan.md diff --git a/.changeset/five-queens-tan.md b/.changeset/five-queens-tan.md deleted file mode 100644 index 5be52afe..00000000 --- a/.changeset/five-queens-tan.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -"tiktoken": patch -"js-tiktoken": patch -"@dqbd/tiktoken": patch ---- - -Add support for gpt-4o, gpt-4o-mini, text-embedding-3-small and text-embedding-3-large diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index fc8accd5..1bad71ca 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.13 + +### Patch Changes + +- 4796f27: Add support for gpt-4o, gpt-4o-mini, text-embedding-3-small and text-embedding-3-large + ## 1.0.12 ### Patch Changes diff --git a/js/package.json b/js/package.json index 34c82b2b..a9110ea1 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.12", + "version": "1.0.13", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index 78810840..9b698c48 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.16 + +### Patch Changes + +- 4796f27: Add support for gpt-4o, gpt-4o-mini, text-embedding-3-small and text-embedding-3-large + ## 1.0.15 ### Patch Changes diff --git a/wasm/alias/CHANGELOG.md b/wasm/alias/CHANGELOG.md index 021de7e4..2d35e6d0 100644 --- a/wasm/alias/CHANGELOG.md +++ b/wasm/alias/CHANGELOG.md @@ -1,5 +1,11 @@ # @dqbd/tiktoken +## 1.0.16 + +### Patch Changes + +- 4796f27: Add support for gpt-4o, gpt-4o-mini, text-embedding-3-small and text-embedding-3-large + ## 1.0.15 ### Patch Changes diff --git a/wasm/alias/package.json b/wasm/alias/package.json index f1c292ef..a1f6979a 100644 --- a/wasm/alias/package.json +++ b/wasm/alias/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.15", + "version": "1.0.16", "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/wasm/package.json b/wasm/package.json index 59815fca..d4f169c8 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.15", + "version": "1.0.16", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From a825d5f253b7b11c3784458a0e20675ff66403eb Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 16 Aug 2024 01:26:46 +0200 Subject: [PATCH 187/207] fix(js): add missing rank files back to js-tiktoken Closes #108 --- js/package.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index a9110ea1..d4d78b33 100644 --- a/js/package.json +++ b/js/package.json @@ -4,7 +4,7 @@ "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { - "build": "rm -rf dist && tsup", + "build": "rm -rf dist && tsup && cp -R src/ranks dist/ranks", "test": "vitest run" }, "type": "module", @@ -12,6 +12,9 @@ "types": "./dist/index.d.ts", "files": [ "dist/**/*", + "!dist/ranks/*.tiktoken", + "!dist/ranks/*.json", + "!dist/ranks/ranks.ts", "index.js", "index.d.ts", "lite.js", From 74e7870a0a5d80a37021a3455713bbbd42385fd8 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 16 Aug 2024 01:27:20 +0200 Subject: [PATCH 188/207] Changeset --- .changeset/wise-falcons-sing.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/wise-falcons-sing.md diff --git a/.changeset/wise-falcons-sing.md b/.changeset/wise-falcons-sing.md new file mode 100644 index 00000000..2b6f6e72 --- /dev/null +++ b/.changeset/wise-falcons-sing.md @@ -0,0 +1,5 @@ +--- +"js-tiktoken": patch +--- + +Add missing rank files for js-tiktoken From 386acb8e649bf0dcd073a990ff965657c33ee867 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 15 Aug 2024 23:31:13 +0000 Subject: [PATCH 189/207] Version Packages --- .changeset/wise-falcons-sing.md | 5 ----- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) delete mode 100644 .changeset/wise-falcons-sing.md diff --git a/.changeset/wise-falcons-sing.md b/.changeset/wise-falcons-sing.md deleted file mode 100644 index 2b6f6e72..00000000 --- a/.changeset/wise-falcons-sing.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"js-tiktoken": patch ---- - -Add missing rank files for js-tiktoken diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 1bad71ca..b80aec69 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.14 + +### Patch Changes + +- 74e7870: Add missing rank files for js-tiktoken + ## 1.0.13 ### Patch Changes diff --git a/js/package.json b/js/package.json index d4d78b33..d56d3e6e 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.13", + "version": "1.0.14", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { From 0ba7dbb000b0d46a8b051bde50d7f1dba6e206d8 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 4 Oct 2024 01:20:41 +0200 Subject: [PATCH 190/207] Add missing o1 models --- js/src/core.ts | 10 +++++++++- tiktoken/model_to_encoding.json | 9 ++++++++- wasm/src/lib.rs | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/js/src/core.ts b/js/src/core.ts index 7aee83fc..5bc298ba 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -280,7 +280,15 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "gpt-4o-2024-05-13": case "gpt-4o-2024-08-06": case "gpt-4o-mini-2024-07-18": - case "gpt-4o-mini": { + case "gpt-4o-mini": + case "o1-mini": + case "o1-preview": + case "o1-preview-2024-09-12": + case "o1-mini-2024-09-12": + case "chatgpt-4o-latest": + case "gpt-4o-realtime": + case "gpt-4o-realtime-preview-2024-10-01": + { return "o200k_base"; } default: diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index e7890e2a..06324b89 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -59,5 +59,12 @@ "gpt-4o-2024-05-13": "o200k_base", "gpt-4o-2024-08-06":"o200k_base", "gpt-4o-mini-2024-07-18": "o200k_base", - "gpt-4o-mini": "o200k_base" + "gpt-4o-mini": "o200k_base", + "o1-mini": "o200k_base", + "o1-preview": "o200k_base", + "o1-preview-2024-09-12": "o200k_base", + "o1-mini-2024-09-12": "o200k_base", + "chatgpt-4o-latest": "o200k_base", + "gpt-4o-realtime": "o200k_base", + "gpt-4o-realtime-preview-2024-10-01": "o200k_base" } diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 72cb06b1..8c9f8633 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -423,6 +423,13 @@ export type TiktokenModel = | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" + | "o1-mini" + | "o1-preview" + | "o1-preview-2024-09-12" + | "o1-mini-2024-09-12" + | "chatgpt-4o-latest" + | "gpt-4o-realtime" + | "gpt-4o-realtime-preview-2024-10-01" /** * @param {TiktokenModel} encoding @@ -499,6 +506,13 @@ pub fn encoding_for_model( "gpt-4o-2024-08-06" => Ok("o200k_base"), "gpt-4o-mini-2024-07-18" => Ok("o200k_base"), "gpt-4o-mini" => Ok("o200k_base"), + "o1-mini" => Ok("o200k_base"), + "o1-preview" => Ok("o200k_base"), + "o1-preview-2024-09-12" => Ok("o200k_base"), + "o1-mini-2024-09-12" => Ok("o200k_base"), + "chatgpt-4o-latest" => Ok("o200k_base"), + "gpt-4o-realtime" => Ok("o200k_base"), + "gpt-4o-realtime-preview-2024-10-01" => Ok("o200k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), From 53bd14a713b40c1d4dcce444e79470781a48467e Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Fri, 4 Oct 2024 01:21:22 +0200 Subject: [PATCH 191/207] Changeset --- .changeset/cuddly-dolphins-sneeze.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/cuddly-dolphins-sneeze.md diff --git a/.changeset/cuddly-dolphins-sneeze.md b/.changeset/cuddly-dolphins-sneeze.md new file mode 100644 index 00000000..437c0ce7 --- /dev/null +++ b/.changeset/cuddly-dolphins-sneeze.md @@ -0,0 +1,7 @@ +--- +"tiktoken": patch +"js-tiktoken": patch +"@dqbd/tiktoken": patch +--- + +Add o1 class of models From 7e6cddc86d2de33e4e0adb00951ad13143b12ffe Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 3 Oct 2024 23:25:52 +0000 Subject: [PATCH 192/207] Version Packages --- .changeset/cuddly-dolphins-sneeze.md | 7 ------- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/alias/CHANGELOG.md | 6 ++++++ wasm/alias/package.json | 2 +- wasm/package.json | 2 +- 7 files changed, 21 insertions(+), 10 deletions(-) delete mode 100644 .changeset/cuddly-dolphins-sneeze.md diff --git a/.changeset/cuddly-dolphins-sneeze.md b/.changeset/cuddly-dolphins-sneeze.md deleted file mode 100644 index 437c0ce7..00000000 --- a/.changeset/cuddly-dolphins-sneeze.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -"tiktoken": patch -"js-tiktoken": patch -"@dqbd/tiktoken": patch ---- - -Add o1 class of models diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index b80aec69..d2e464f6 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.15 + +### Patch Changes + +- 53bd14a: Add o1 class of models + ## 1.0.14 ### Patch Changes diff --git a/js/package.json b/js/package.json index d56d3e6e..44902cd2 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.14", + "version": "1.0.15", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index 9b698c48..98498ff9 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.17 + +### Patch Changes + +- 53bd14a: Add o1 class of models + ## 1.0.16 ### Patch Changes diff --git a/wasm/alias/CHANGELOG.md b/wasm/alias/CHANGELOG.md index 2d35e6d0..2328d758 100644 --- a/wasm/alias/CHANGELOG.md +++ b/wasm/alias/CHANGELOG.md @@ -1,5 +1,11 @@ # @dqbd/tiktoken +## 1.0.17 + +### Patch Changes + +- 53bd14a: Add o1 class of models + ## 1.0.16 ### Patch Changes diff --git a/wasm/alias/package.json b/wasm/alias/package.json index a1f6979a..ebb3b638 100644 --- a/wasm/alias/package.json +++ b/wasm/alias/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.16", + "version": "1.0.17", "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/wasm/package.json b/wasm/package.json index d4f169c8..1133bcb5 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.16", + "version": "1.0.17", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From f40be08a7ae61dea27b7539b1a187b3d13c44c96 Mon Sep 17 00:00:00 2001 From: Lucia Date: Tue, 17 Dec 2024 16:36:33 -0500 Subject: [PATCH 193/207] Add new o1-2024-12-17 model --- js/src/core.ts | 1 + tiktoken/model_to_encoding.json | 1 + wasm/src/lib.rs | 1 + 3 files changed, 3 insertions(+) diff --git a/js/src/core.ts b/js/src/core.ts index 5bc298ba..6688b326 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -281,6 +281,7 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "gpt-4o-2024-08-06": case "gpt-4o-mini-2024-07-18": case "gpt-4o-mini": + case "o1-2024-12-17": case "o1-mini": case "o1-preview": case "o1-preview-2024-09-12": diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 06324b89..445f32a7 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -60,6 +60,7 @@ "gpt-4o-2024-08-06":"o200k_base", "gpt-4o-mini-2024-07-18": "o200k_base", "gpt-4o-mini": "o200k_base", + "o1-2024-12-17": "o200k_base", "o1-mini": "o200k_base", "o1-preview": "o200k_base", "o1-preview-2024-09-12": "o200k_base", diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 8c9f8633..a4d7a403 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -423,6 +423,7 @@ export type TiktokenModel = | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" + | "o1-2024-12-17" | "o1-mini" | "o1-preview" | "o1-preview-2024-09-12" From c73f19e069f5a15143cf72fa2cb627fa8517e3a8 Mon Sep 17 00:00:00 2001 From: Lucia Date: Tue, 17 Dec 2024 16:36:33 -0500 Subject: [PATCH 194/207] Add new o1-2024-12-17 model --- js/src/core.ts | 1 + tiktoken/model_to_encoding.json | 1 + wasm/src/lib.rs | 2 ++ 3 files changed, 4 insertions(+) diff --git a/js/src/core.ts b/js/src/core.ts index 5bc298ba..6688b326 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -281,6 +281,7 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "gpt-4o-2024-08-06": case "gpt-4o-mini-2024-07-18": case "gpt-4o-mini": + case "o1-2024-12-17": case "o1-mini": case "o1-preview": case "o1-preview-2024-09-12": diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 06324b89..445f32a7 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -60,6 +60,7 @@ "gpt-4o-2024-08-06":"o200k_base", "gpt-4o-mini-2024-07-18": "o200k_base", "gpt-4o-mini": "o200k_base", + "o1-2024-12-17": "o200k_base", "o1-mini": "o200k_base", "o1-preview": "o200k_base", "o1-preview-2024-09-12": "o200k_base", diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 8c9f8633..7da08e87 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -423,6 +423,7 @@ export type TiktokenModel = | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" + | "o1-2024-12-17" | "o1-mini" | "o1-preview" | "o1-preview-2024-09-12" @@ -506,6 +507,7 @@ pub fn encoding_for_model( "gpt-4o-2024-08-06" => Ok("o200k_base"), "gpt-4o-mini-2024-07-18" => Ok("o200k_base"), "gpt-4o-mini" => Ok("o200k_base"), + "o1-2024-12-17" => Ok("o200k_base"), "o1-mini" => Ok("o200k_base"), "o1-preview" => Ok("o200k_base"), "o1-preview-2024-09-12" => Ok("o200k_base"), From 919eb6b9ea9c9b0bc6754e4441ce92c0c11fca23 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 19 Dec 2024 18:01:14 +0100 Subject: [PATCH 195/207] Add changeset --- .changeset/selfish-buckets-fry.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/selfish-buckets-fry.md diff --git a/.changeset/selfish-buckets-fry.md b/.changeset/selfish-buckets-fry.md new file mode 100644 index 00000000..3273d0d1 --- /dev/null +++ b/.changeset/selfish-buckets-fry.md @@ -0,0 +1,7 @@ +--- +"tiktoken": patch +"js-tiktoken": patch +"@dqbd/tiktoken": patch +--- + +Add o1-2024-12-17 model From 7a8687d737b271871fa288ec016e604c3b24cb28 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 19 Dec 2024 17:02:07 +0000 Subject: [PATCH 196/207] Version Packages --- .changeset/selfish-buckets-fry.md | 7 ------- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/alias/CHANGELOG.md | 6 ++++++ wasm/alias/package.json | 2 +- wasm/package.json | 2 +- 7 files changed, 21 insertions(+), 10 deletions(-) delete mode 100644 .changeset/selfish-buckets-fry.md diff --git a/.changeset/selfish-buckets-fry.md b/.changeset/selfish-buckets-fry.md deleted file mode 100644 index 3273d0d1..00000000 --- a/.changeset/selfish-buckets-fry.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -"tiktoken": patch -"js-tiktoken": patch -"@dqbd/tiktoken": patch ---- - -Add o1-2024-12-17 model diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index d2e464f6..e654ab7d 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.16 + +### Patch Changes + +- 919eb6b: Add o1-2024-12-17 model + ## 1.0.15 ### Patch Changes diff --git a/js/package.json b/js/package.json index 44902cd2..b0fab15b 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.15", + "version": "1.0.16", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index 98498ff9..f457b738 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.18 + +### Patch Changes + +- 919eb6b: Add o1-2024-12-17 model + ## 1.0.17 ### Patch Changes diff --git a/wasm/alias/CHANGELOG.md b/wasm/alias/CHANGELOG.md index 2328d758..41dfd2d2 100644 --- a/wasm/alias/CHANGELOG.md +++ b/wasm/alias/CHANGELOG.md @@ -1,5 +1,11 @@ # @dqbd/tiktoken +## 1.0.18 + +### Patch Changes + +- 919eb6b: Add o1-2024-12-17 model + ## 1.0.17 ### Patch Changes diff --git a/wasm/alias/package.json b/wasm/alias/package.json index ebb3b638..2e732529 100644 --- a/wasm/alias/package.json +++ b/wasm/alias/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.17", + "version": "1.0.18", "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/wasm/package.json b/wasm/package.json index 1133bcb5..59b446b0 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.17", + "version": "1.0.18", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From 46e3c7c8400fd2e7596b54e7f543b8e3b3aea6fb Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 2 Feb 2025 22:06:53 +0100 Subject: [PATCH 197/207] feat: add o3-mini, missing o1 model --- js/src/core.ts | 3 +++ tiktoken/model_to_encoding.json | 3 +++ wasm/src/lib.rs | 3 +++ 3 files changed, 9 insertions(+) diff --git a/js/src/core.ts b/js/src/core.ts index 6688b326..0499e7b8 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -281,11 +281,14 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "gpt-4o-2024-08-06": case "gpt-4o-mini-2024-07-18": case "gpt-4o-mini": + case "o1": case "o1-2024-12-17": case "o1-mini": case "o1-preview": case "o1-preview-2024-09-12": case "o1-mini-2024-09-12": + case "o3-mini": + case "o3-mini-2025-01-31": case "chatgpt-4o-latest": case "gpt-4o-realtime": case "gpt-4o-realtime-preview-2024-10-01": diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 445f32a7..dd6e1e9c 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -60,11 +60,14 @@ "gpt-4o-2024-08-06":"o200k_base", "gpt-4o-mini-2024-07-18": "o200k_base", "gpt-4o-mini": "o200k_base", + "o1": "o200k_base", "o1-2024-12-17": "o200k_base", "o1-mini": "o200k_base", "o1-preview": "o200k_base", "o1-preview-2024-09-12": "o200k_base", "o1-mini-2024-09-12": "o200k_base", + "o3-mini": "o200k_base", + "o3-mini-2025-01-31": "o200k_base", "chatgpt-4o-latest": "o200k_base", "gpt-4o-realtime": "o200k_base", "gpt-4o-realtime-preview-2024-10-01": "o200k_base" diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 7da08e87..9ae10092 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -423,11 +423,14 @@ export type TiktokenModel = | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" + | "o1" | "o1-2024-12-17" | "o1-mini" | "o1-preview" | "o1-preview-2024-09-12" | "o1-mini-2024-09-12" + | "o3-mini" + | "o3-mini-2025-01-31" | "chatgpt-4o-latest" | "gpt-4o-realtime" | "gpt-4o-realtime-preview-2024-10-01" From 5982def4687df72cc74da9850e07d8d68866b4af Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 2 Feb 2025 22:07:36 +0100 Subject: [PATCH 198/207] Add changeset --- .changeset/silver-moons-boil.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/silver-moons-boil.md diff --git a/.changeset/silver-moons-boil.md b/.changeset/silver-moons-boil.md new file mode 100644 index 00000000..a494ce61 --- /dev/null +++ b/.changeset/silver-moons-boil.md @@ -0,0 +1,7 @@ +--- +"tiktoken": patch +"js-tiktoken": patch +"@dqbd/tiktoken": patch +--- + +feat: add o3-mini, missing o1 model From 335b29166b125035f586cbfe556eabde202e36ce Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Sun, 2 Feb 2025 22:11:40 +0100 Subject: [PATCH 199/207] Add missing types for text-embedding-* --- wasm/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 9ae10092..3ba0c846 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -385,6 +385,8 @@ export type TiktokenModel = | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" + | "text-embedding-3-small" + | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" From 1c78c18408c97d65b0284c549a5692b886eb7adb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 2 Feb 2025 21:15:22 +0000 Subject: [PATCH 200/207] Version Packages --- .changeset/silver-moons-boil.md | 7 ------- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/alias/CHANGELOG.md | 6 ++++++ wasm/alias/package.json | 2 +- wasm/package.json | 2 +- 7 files changed, 21 insertions(+), 10 deletions(-) delete mode 100644 .changeset/silver-moons-boil.md diff --git a/.changeset/silver-moons-boil.md b/.changeset/silver-moons-boil.md deleted file mode 100644 index a494ce61..00000000 --- a/.changeset/silver-moons-boil.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -"tiktoken": patch -"js-tiktoken": patch -"@dqbd/tiktoken": patch ---- - -feat: add o3-mini, missing o1 model diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index e654ab7d..d4ace6ec 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.17 + +### Patch Changes + +- 5982def: feat: add o3-mini, missing o1 model + ## 1.0.16 ### Patch Changes diff --git a/js/package.json b/js/package.json index b0fab15b..90809e48 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.16", + "version": "1.0.17", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index f457b738..2066f89c 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.19 + +### Patch Changes + +- 5982def: feat: add o3-mini, missing o1 model + ## 1.0.18 ### Patch Changes diff --git a/wasm/alias/CHANGELOG.md b/wasm/alias/CHANGELOG.md index 41dfd2d2..39fe9b88 100644 --- a/wasm/alias/CHANGELOG.md +++ b/wasm/alias/CHANGELOG.md @@ -1,5 +1,11 @@ # @dqbd/tiktoken +## 1.0.19 + +### Patch Changes + +- 5982def: feat: add o3-mini, missing o1 model + ## 1.0.18 ### Patch Changes diff --git a/wasm/alias/package.json b/wasm/alias/package.json index 2e732529..ac8061ef 100644 --- a/wasm/alias/package.json +++ b/wasm/alias/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.18", + "version": "1.0.19", "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/wasm/package.json b/wasm/package.json index 59b446b0..228709a6 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.18", + "version": "1.0.19", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From 5568320c5df6d7c7cd170d8919bd66108aeb750e Mon Sep 17 00:00:00 2001 From: chenqianhe <1278095698@qq.com> Date: Mon, 3 Feb 2025 14:31:59 +0800 Subject: [PATCH 201/207] feat: add missing gpt-4o-2024-11-20 models --- js/src/core.ts | 1 + tiktoken/model_to_encoding.json | 1 + wasm/src/lib.rs | 2 ++ 3 files changed, 4 insertions(+) diff --git a/js/src/core.ts b/js/src/core.ts index 0499e7b8..90bde7a3 100644 --- a/js/src/core.ts +++ b/js/src/core.ts @@ -279,6 +279,7 @@ export function getEncodingNameForModel(model: TiktokenModel) { case "gpt-4o": case "gpt-4o-2024-05-13": case "gpt-4o-2024-08-06": + case "gpt-4o-2024-11-20": case "gpt-4o-mini-2024-07-18": case "gpt-4o-mini": case "o1": diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index dd6e1e9c..9a56d861 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -58,6 +58,7 @@ "gpt-4o": "o200k_base", "gpt-4o-2024-05-13": "o200k_base", "gpt-4o-2024-08-06":"o200k_base", + "gpt-4o-2024-11-20":"o200k_base", "gpt-4o-mini-2024-07-18": "o200k_base", "gpt-4o-mini": "o200k_base", "o1": "o200k_base", diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 3ba0c846..cfd7939f 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -423,6 +423,7 @@ export type TiktokenModel = | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" + | "gpt-4o-2024-11-20" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "o1" @@ -510,6 +511,7 @@ pub fn encoding_for_model( "gpt-4o" => Ok("o200k_base"), "gpt-4o-2024-05-13" => Ok("o200k_base"), "gpt-4o-2024-08-06" => Ok("o200k_base"), + "gpt-4o-2024-11-20" => Ok("o200k_base"), "gpt-4o-mini-2024-07-18" => Ok("o200k_base"), "gpt-4o-mini" => Ok("o200k_base"), "o1-2024-12-17" => Ok("o200k_base"), From 4dbb38e552e683e75f2681abe9af550babea2a44 Mon Sep 17 00:00:00 2001 From: chenqianhe <1278095698@qq.com> Date: Mon, 3 Feb 2025 14:51:51 +0800 Subject: [PATCH 202/207] feat: add missing o3-mini --- wasm/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index cfd7939f..53ef87a3 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -522,6 +522,8 @@ pub fn encoding_for_model( "chatgpt-4o-latest" => Ok("o200k_base"), "gpt-4o-realtime" => Ok("o200k_base"), "gpt-4o-realtime-preview-2024-10-01" => Ok("o200k_base"), + "o3-mini" => Ok("o200k_base"), + "o3-mini-2025-01-31" => Ok("o200k_base"), model => Err(JsError::new( format!("Invalid model: {}", model.to_string()).as_str(), )), From 9fc37ed57a1418a3012d4a7968a2d1fe62c7660e Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Tue, 4 Feb 2025 02:46:03 +0100 Subject: [PATCH 203/207] Correct other models as well --- tiktoken/model_to_encoding.json | 4 ++-- wasm/src/lib.rs | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tiktoken/model_to_encoding.json b/tiktoken/model_to_encoding.json index 9a56d861..6b55f317 100644 --- a/tiktoken/model_to_encoding.json +++ b/tiktoken/model_to_encoding.json @@ -57,8 +57,8 @@ "gpt-4-vision-preview": "cl100k_base", "gpt-4o": "o200k_base", "gpt-4o-2024-05-13": "o200k_base", - "gpt-4o-2024-08-06":"o200k_base", - "gpt-4o-2024-11-20":"o200k_base", + "gpt-4o-2024-08-06": "o200k_base", + "gpt-4o-2024-11-20": "o200k_base", "gpt-4o-mini-2024-07-18": "o200k_base", "gpt-4o-mini": "o200k_base", "o1": "o200k_base", diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 53ef87a3..65b29a00 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -460,9 +460,10 @@ pub fn encoding_for_model( "text-babbage-001" => Ok("r50k_base"), "text-ada-001" => Ok("r50k_base"), "davinci" => Ok("r50k_base"), + "davinci-002" => Ok("cl100k_base"), "curie" => Ok("r50k_base"), "babbage" => Ok("r50k_base"), - "babbage-002" => Ok("r50k_base"), + "babbage-002" => Ok("cl100k_base"), "ada" => Ok("r50k_base"), "code-davinci-002" => Ok("p50k_base"), "code-davinci-001" => Ok("p50k_base"), @@ -491,7 +492,7 @@ pub fn encoding_for_model( "gpt-3.5-turbo-0613" => Ok("cl100k_base"), "gpt-3.5-turbo-16k" => Ok("cl100k_base"), "gpt-3.5-turbo-16k-0613" => Ok("cl100k_base"), - "gpt-3.5-turbo-instruct" => Ok("clk100k_base"), + "gpt-3.5-turbo-instruct" => Ok("cl100k_base"), "gpt-3.5-turbo-instruct-0914" => Ok("cl100k_base"), "gpt-4" => Ok("cl100k_base"), "gpt-4-0314" => Ok("cl100k_base"), @@ -514,6 +515,7 @@ pub fn encoding_for_model( "gpt-4o-2024-11-20" => Ok("o200k_base"), "gpt-4o-mini-2024-07-18" => Ok("o200k_base"), "gpt-4o-mini" => Ok("o200k_base"), + "o1" => Ok("o200k_base"), "o1-2024-12-17" => Ok("o200k_base"), "o1-mini" => Ok("o200k_base"), "o1-preview" => Ok("o200k_base"), From 5f923480675fa7172d70abfbaf1ccc84e18b8733 Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Tue, 4 Feb 2025 02:47:25 +0100 Subject: [PATCH 204/207] Add changeset --- .changeset/beige-grapes-wink.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/beige-grapes-wink.md diff --git a/.changeset/beige-grapes-wink.md b/.changeset/beige-grapes-wink.md new file mode 100644 index 00000000..61fc9db5 --- /dev/null +++ b/.changeset/beige-grapes-wink.md @@ -0,0 +1,7 @@ +--- +"tiktoken": patch +"js-tiktoken": patch +"@dqbd/tiktoken": patch +--- + +Add missing historic models, fix incorrect tokenizers for old instruct models From 4c0b023719c681547327dd6282757c131d2e4453 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 4 Feb 2025 01:55:55 +0000 Subject: [PATCH 205/207] Version Packages --- .changeset/beige-grapes-wink.md | 7 ------- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- wasm/CHANGELOG.md | 6 ++++++ wasm/alias/CHANGELOG.md | 6 ++++++ wasm/alias/package.json | 2 +- wasm/package.json | 2 +- 7 files changed, 21 insertions(+), 10 deletions(-) delete mode 100644 .changeset/beige-grapes-wink.md diff --git a/.changeset/beige-grapes-wink.md b/.changeset/beige-grapes-wink.md deleted file mode 100644 index 61fc9db5..00000000 --- a/.changeset/beige-grapes-wink.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -"tiktoken": patch -"js-tiktoken": patch -"@dqbd/tiktoken": patch ---- - -Add missing historic models, fix incorrect tokenizers for old instruct models diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index d4ace6ec..0a4ce5cf 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.18 + +### Patch Changes + +- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models + ## 1.0.17 ### Patch Changes diff --git a/js/package.json b/js/package.json index 90809e48..42c78c49 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.17", + "version": "1.0.18", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": { diff --git a/wasm/CHANGELOG.md b/wasm/CHANGELOG.md index 2066f89c..8d9bf5b9 100644 --- a/wasm/CHANGELOG.md +++ b/wasm/CHANGELOG.md @@ -1,5 +1,11 @@ # tiktoken +## 1.0.20 + +### Patch Changes + +- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models + ## 1.0.19 ### Patch Changes diff --git a/wasm/alias/CHANGELOG.md b/wasm/alias/CHANGELOG.md index 39fe9b88..0379d0c9 100644 --- a/wasm/alias/CHANGELOG.md +++ b/wasm/alias/CHANGELOG.md @@ -1,5 +1,11 @@ # @dqbd/tiktoken +## 1.0.20 + +### Patch Changes + +- 5f92348: Add missing historic models, fix incorrect tokenizers for old instruct models + ## 1.0.19 ### Patch Changes diff --git a/wasm/alias/package.json b/wasm/alias/package.json index ac8061ef..ad1b2915 100644 --- a/wasm/alias/package.json +++ b/wasm/alias/package.json @@ -1,6 +1,6 @@ { "name": "@dqbd/tiktoken", - "version": "1.0.19", + "version": "1.0.20", "repository": { "type": "git", "url": "https://github.com/dqbd/tiktoken" diff --git a/wasm/package.json b/wasm/package.json index 228709a6..7dec0d58 100644 --- a/wasm/package.json +++ b/wasm/package.json @@ -1,6 +1,6 @@ { "name": "tiktoken", - "version": "1.0.19", + "version": "1.0.20", "description": "JS/WASM bindings for tiktoken", "license": "MIT", "scripts": { From 77ba758bfb7f5a46f6ff625c634c08635b4bdeec Mon Sep 17 00:00:00 2001 From: Tat Dat Duong Date: Thu, 13 Feb 2025 17:09:47 -0800 Subject: [PATCH 206/207] Update guidance on lite --- .changeset/tame-turkeys-compete.md | 5 +++++ js/README.md | 31 ++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 .changeset/tame-turkeys-compete.md diff --git a/.changeset/tame-turkeys-compete.md b/.changeset/tame-turkeys-compete.md new file mode 100644 index 00000000..40b76d83 --- /dev/null +++ b/.changeset/tame-turkeys-compete.md @@ -0,0 +1,5 @@ +--- +"js-tiktoken": patch +--- + +Update guidance on lite diff --git a/js/README.md b/js/README.md index 47541102..6cf35d69 100644 --- a/js/README.md +++ b/js/README.md @@ -9,9 +9,36 @@ Install the library from NPM: npm install js-tiktoken ``` -## Usage +## Lite -Basic usage follows, which includes all the OpenAI encoders and ranks: +You can only load the ranks you need, which will significantly reduce the bundle size: + +```typescript +import { Tiktoken } from "js-tiktoken/lite"; +import o200k_base from "js-tiktoken/ranks/o200k_base"; + +const enc = new Tiktoken(o200k_base); +assert(enc.decode(enc.encode("hello world")) === "hello world"); +``` + +Alternatively, encodings can be loaded dynamically from our CDN hosted on Cloudflare Pages. + +```typescript +import { Tiktoken } from "js-tiktoken/lite"; + +const res = await fetch(`https://tiktoken.pages.dev/js/o200k_base.json`); +const o200k_base = await res.json(); + +const enc = new Tiktoken(o200k_base); +assert(enc.decode(enc.encode("hello world")) === "hello world"); +``` + +## Full usage + +If you need all the OpenAI tokenizers, you can import the entire library: + +> [!CAUTION] +> This will include all the OpenAI tokenizers, which may significantly increase the bundle size. See ```typescript import assert from "node:assert"; From 3827a62f1f5e02f0c0b363c25ec3e9264e224912 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 14 Feb 2025 01:12:15 +0000 Subject: [PATCH 207/207] Version Packages --- .changeset/tame-turkeys-compete.md | 5 ----- js/CHANGELOG.md | 6 ++++++ js/package.json | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) delete mode 100644 .changeset/tame-turkeys-compete.md diff --git a/.changeset/tame-turkeys-compete.md b/.changeset/tame-turkeys-compete.md deleted file mode 100644 index 40b76d83..00000000 --- a/.changeset/tame-turkeys-compete.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"js-tiktoken": patch ---- - -Update guidance on lite diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 0a4ce5cf..297b582c 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,11 @@ # js-tiktoken +## 1.0.19 + +### Patch Changes + +- 77ba758: Update guidance on lite + ## 1.0.18 ### Patch Changes diff --git a/js/package.json b/js/package.json index 42c78c49..ceb04ace 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "js-tiktoken", - "version": "1.0.18", + "version": "1.0.19", "description": "JavaScript port of tiktoken", "license": "MIT", "scripts": {