-
Notifications
You must be signed in to change notification settings - Fork 0
/
App.tsx
94 lines (82 loc) · 2.45 KB
/
App.tsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import * as React from 'react';
import './style.css';
import { getEncoding } from 'js-tiktoken';
const tiktoken = getEncoding('cl100k_base');
function numberToColor(number) {
const goldenRatioConjugate = 0.618033988749895;
// Constants for a linear congruential generator (LCG)
const a = 1664525;
const c = 1013904223;
const m = Math.pow(2, 32);
// Generate a pseudorandom number using the LCG.
const pseudorandom = (a * number + c) % m;
// Compute a hue value using the golden angle.
const hue = ((pseudorandom * goldenRatioConjugate) % 1) * 360;
// Allow saturation to vary between 60% and 100%.
const s = 60 + (pseudorandom % 21);
// Allow lightness to vary between 70% and 90%.
const l = 70 + (pseudorandom % 21);
return `hsl(${hue}, ${s}%, ${l}%)`;
}
function mapTextToSubstringsAndTokens(text: string): Array<[string, number]> {
const tokens = tiktoken.encode(text);
return tokens.map((token) => [tiktoken.decode([token]), token]);
}
const TextWithColors = ({ tokensAndSubstrings }) => {
return (
<pre className="tokens-visualized" style={{ whiteSpace: 'pre-wrap' }}>
{tokensAndSubstrings.map(([substring, token], index) => (
<span
key={index}
title={token.toString()}
className="token-span"
style={{
backgroundColor: numberToColor(token),
}}
>
{substring}
</span>
))}
</pre>
);
};
export default function App() {
const [txt, setTxt] = React.useState('');
const toks = mapTextToSubstringsAndTokens(txt);
return (
<div className="page">
<h1 className="title">GPT4 Tokenizer Visualizer</h1>
<p className="description">
Like{' '}
<a href="https://platform.openai.com/tokenizer">
https://platform.openai.com/tokenizer
</a>{' '}
but for GPT4
</p>
<textarea
className="text-input"
value={txt}
onChange={(e) => setTxt(e.target.value)}
/>
<div className="stats">
<span>
<b>Chars:</b> {txt.length}
</span>
<span>
<b>Words:</b> {txt.split(' ').length}
</span>
<span>
<b>Tokens:</b> {toks.length}
</span>
</div>
<TextWithColors tokensAndSubstrings={toks} />
<pre className="tokens-raw">
{JSON.stringify(
toks.map(([, t]) => t),
null,
2
).replaceAll('\n', ' ')}
</pre>
</div>
);
}