forked from hmarr/openai-chat-tokens
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: reduce bundle size file to remove unused large object
- Loading branch information
1 parent
1118e49
commit cc08bbd
Showing
7 changed files
with
976 additions
and
613 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,17 @@ | ||
name: CI | ||
|
||
on: | ||
push: | ||
branches: [ "main" ] | ||
pull_request: | ||
|
||
branches-ignore: | ||
- main | ||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
runs-on: [ubuntu-latest] | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Use Node.js ${{ matrix.node-version }} | ||
uses: actions/setup-node@v3 | ||
with: | ||
node-version: 18 | ||
cache: 'npm' | ||
- run: npm ci | ||
- run: npm run build | ||
- run: npm test | ||
- uses: actions/checkout@v4 | ||
- uses: actions/setup-node@v4 | ||
with: | ||
node-version: 20.x | ||
registry-url: "https://npm.pkg.github.com" | ||
- run: npm install | ||
- run: npm run build | ||
- run: npm test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,24 @@ | ||
name: Publish Package to npmjs | ||
name: Publish Package | ||
on: | ||
release: | ||
types: [created] | ||
push: | ||
branches: | ||
- main | ||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: read | ||
id-token: write | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: actions/setup-node@v3 | ||
with: | ||
node-version: '18.x' | ||
registry-url: 'https://registry.npmjs.org' | ||
- run: npm install -g npm | ||
- run: npm ci | ||
- run: npm publish --provenance --access public | ||
env: | ||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} | ||
deploy: | ||
runs-on: [ubuntu-latest] | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: actions/setup-node@v4 | ||
with: | ||
node-version: 20.x | ||
registry-url: "https://registry.npmjs.org" | ||
- name: Install | ||
run: npm install | ||
- name: Build | ||
run: | | ||
npm run build | ||
node scripts/pack-package.js | ||
- name: Publish | ||
run: npm publish | ||
env: | ||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,58 +1,43 @@ | ||
# openai-chat-tokens | ||
|
||
[![npm version](https://badge.fury.io/js/openai-chat-tokens.svg)](https://badge.fury.io/js/openai-chat-tokens) | ||
[![CI](https://github.com/hmarr/openai-chat-tokens/actions/workflows/ci.yml/badge.svg)](https://github.com/hmarr/openai-chat-tokens/actions/workflows/ci.yml) | ||
This repository is a fork of [hmarr/openai-chat-tokens](https://github.com/hmarr/openai-chat-tokens) with modifications to reduce the bundle size by replacing unused large objects. | ||
|
||
A TypeScript / JavaScript library for estimating the number of tokens an OpenAI chat completion request will use. | ||
## Purpose | ||
|
||
Estimating token usage for chat completions isn't quite as easy as it sounds. | ||
The primary purpose of this fork is to minimize the size of the JavaScript bundle by replacing large, unused objects with empty objects. This helps improve the performance and loading time of applications that use this package. | ||
|
||
For regular chat messages, you need to consider how the messages are formatted by OpenAI when they're provided to the model, as they don't simply dump the JSON messages they receive via the API into the model. | ||
## Changes Made | ||
|
||
For function calling, things are even more complex, as the OpenAPI-style function definitions get rewritten into TypeScript type definitions. | ||
The following large objects have been replaced as they were not used in our specific implementation: | ||
|
||
This library handles both of those cases, as well as a minor adjustment needed for handling the _results_ of function calling. [tiktoken](https://github.com/dqbd/tiktoken) is used to do the tokenization. | ||
- `gpt2_default` | ||
- `r50k_base_default` | ||
- `p50k_base_default` | ||
- `p50k_edit_default` | ||
- `o200k_base_default` | ||
|
||
## Usage | ||
|
||
```typescript | ||
import { promptTokensEstimate } from "openai-chat-tokens"; | ||
|
||
const estimate = promptTokensEstimate({ | ||
messages: [ | ||
{ role: "system", content: "These aren't the droids you're looking for" }, | ||
{ role: "user", content: "You can go about your business. Move along." }, | ||
], | ||
functions: [ | ||
{ | ||
name: "activate_hyperdrive", | ||
description: "Activate the hyperdrive", | ||
parameters: { | ||
type: "object", | ||
properties: { | ||
destination: { type: "string" }, | ||
}, | ||
}, | ||
}, | ||
], | ||
}); | ||
``` | ||
These objects are defined in the original repository and can significantly increase the bundle size. By replacing them with empty objects, we ensure that the bundle remains lightweight. | ||
|
||
## Development and testing | ||
## Installation | ||
|
||
Built in TypeScript, tested with Jest. | ||
You can install the modified package using npm: | ||
|
||
```bash | ||
$ npm install | ||
$ npm test | ||
$ npm install @mootod/openai-chat-tokens | ||
``` | ||
|
||
When adding new test cases or debugging token count mismatches, it can be helpful to validate the estimated tokens in the tests against the live OpenAI API. To do this: | ||
## Usage | ||
|
||
1. Set up the `OPENAI_API_KEY` environment variable with a live API key | ||
2. Add `validate: true` to one of the test examples, or set `validateAll` to `true` in `token-counts.test.ts`, then run the tests | ||
```js | ||
import { encode, decode } from "@mootod/openai-chat-tokens"; | ||
|
||
const encoded = encode("Hello, world!"); | ||
const decoded = decode(encoded); | ||
|
||
console.log("encoded:", encoded); | ||
console.log("decoded:", decoded); | ||
``` | ||
|
||
## References | ||
## How to Replace the Objects | ||
|
||
1. "Counting tokens for chat completions API calls" in OpenAI's ["How to count tokens with tiktoken" notebook](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) | ||
2. A post about [counting function call tokens](https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/23) on the OpenAI forum. | ||
We used esbuild to replace the large objects with empty objects in the bundle. For more details, refer to the `esbuild.config.js` file in the repository. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
const path = require("path"); | ||
const fs = require("fs"); | ||
const { execSync } = require("child_process"); | ||
const esbuild = require("esbuild"); | ||
const chalk = require("chalk"); | ||
const gzipSize = require("gzip-size"); | ||
|
||
const replaceVariablePlugin = (variableName, replacement) => ({ | ||
name: "replace-variable-plugin", | ||
setup(build) { | ||
build.onEnd((result) => { | ||
const outputFiles = result.outputFiles || []; | ||
outputFiles.forEach((file) => { | ||
let content = new TextDecoder("utf-8").decode(file.contents); | ||
const regex = new RegExp( | ||
`var\\s+${variableName}\\s*=\\s*\\{[^;]*\\};`, | ||
"g", | ||
); | ||
content = content.replace( | ||
regex, | ||
`var ${variableName} = ${replacement};`, | ||
); | ||
file.contents = new TextEncoder().encode(content); | ||
}); | ||
}); | ||
}, | ||
}); | ||
|
||
esbuild | ||
.build({ | ||
entryPoints: ["src/index.ts"], | ||
outdir: "dist", | ||
format: "esm", | ||
bundle: true, | ||
minify: false, | ||
sourcemap: true, | ||
write: false, | ||
metafile: true, | ||
plugins: [ | ||
replaceVariablePlugin("gpt2_default", "{}"), | ||
replaceVariablePlugin("r50k_base_default", "{}"), | ||
replaceVariablePlugin("p50k_base_default", "{}"), | ||
replaceVariablePlugin("p50k_edit_default", "{}"), | ||
replaceVariablePlugin("o200k_base_default", "{}"), | ||
], | ||
}) | ||
.then((result) => { | ||
if (!fs.existsSync("dist")) { | ||
fs.mkdirSync("dist"); | ||
} | ||
const outputFiles = result.outputFiles || []; | ||
outputFiles.forEach((file) => { | ||
fs.writeFileSync(file.path, file.contents); | ||
}); | ||
execSync("npx tsc --declaration --emitDeclarationOnly -p tsconfig.json"); | ||
|
||
const outputIndexPath = "dist/index.js"; | ||
const separatorIndex = outputIndexPath.lastIndexOf("/"); | ||
const dirname = outputIndexPath.substring(0, separatorIndex + 1); | ||
const filename = outputIndexPath.substring(separatorIndex + 1); | ||
const gzipFile = gzipSize.fileSync( | ||
path.resolve(__dirname, outputIndexPath), | ||
); | ||
const fileStat = fs.statSync(outputIndexPath, "utf-8"); | ||
const bundleSize = Number((fileStat.size / 1024).toFixed(2)); | ||
const bundleGzipSize = Number((gzipFile / 1024).toFixed(2)); | ||
|
||
console.log( | ||
chalk.gray( | ||
`${dirname}${chalk.cyan(filename)} ${chalk.yellowBright.bold( | ||
`${bundleSize.toLocaleString("en-US")} kB ${chalk.reset.gray( | ||
`| gzip: ${bundleGzipSize.toLocaleString("en-US")} kB`, | ||
)}`, | ||
)}`, | ||
), | ||
); | ||
|
||
console.log(chalk.green("✓ esbuild done")); | ||
}); |
Oops, something went wrong.