diff --git a/README.md b/README.md index ea2e468..1569ae4 100644 --- a/README.md +++ b/README.md @@ -14,3 +14,4 @@ Weights are stored in the binary as bfloat16, and unpacked to float32 at runtime The bundled tensor math lib uses compile-time shapes and in-place storage, so there is no dynamic memory allocation at all. +Headers are installed with python packge, can be found with `python -m torch2cpp.includes` \ No newline at end of file diff --git a/example/Makefile b/example/Makefile index 19ef5f5..2dfc532 100644 --- a/example/Makefile +++ b/example/Makefile @@ -1,13 +1,13 @@ INCLUDES := $(shell python -m torch2cpp.includes) build/model.js : build/model.cpp - em++ -Os build/model.cpp -I$(INCLUDES) \ - -o build/model.js -s MODULARIZE=1 -s EXPORT_NAME=load_model \ - -s EXPORTED_FUNCTIONS=_model_step,_model_reset,_model_encode,_model_decode + em++ $^ -o $@ -I$(INCLUDES) \ + -Os -s MODULARIZE=1 -s EXPORT_NAME=load_model \ + -s EXPORTED_FUNCTIONS=_model_step,_model_reset,_model_encode,_model_decode,_malloc -build/chat_cli : chat_cli.cpp build/model.cpp - c++ -std=c++17 -Os -march=native -ffast-math \ - build/model.cpp chat_cli.cpp -I$(INCLUDES) -o build/chat_cli +build/chat_cli : build/model.cpp chat_cli.cpp + c++ $^ -o $@ -I$(INCLUDES) \ + -Os -std=c++17 -march=native -ffast-math .PHONY: model.js model.js: build/model.js diff --git a/example/chat_cli.cpp b/example/chat_cli.cpp index a86eebc..d9ea8b9 100644 --- a/example/chat_cli.cpp +++ b/example/chat_cli.cpp @@ -18,9 +18,15 @@ int main(int argc, char ** argv) while(true) { std::getline(std::cin, prompt); - prompt += "\n"; + // prompt += "\n"; int n_tok = model_encode(prompt.c_str(), prompt.size(), toks, max_tokens); + + std::cout << n_tok << std::endl; + for(int i=0 ; i -#include - template struct Tokenizer { diff --git a/src/torch2cpp/torch2cpp.py b/src/torch2cpp/torch2cpp.py index 44cb524..547d3ef 100644 --- a/src/torch2cpp/torch2cpp.py +++ b/src/torch2cpp/torch2cpp.py @@ -144,7 +144,7 @@ def codegen( out_file, args=[], kwargs={}, - tokenizer=None, + tokens=None, autowrap_functions=[], c_prefix='model', skip_weights=False, @@ -159,10 +159,9 @@ def codegen( out = interp.run(*args, **kwargs) - if tokenizer is not None: - n_vocab = tokenizer.get_vocab_size() - vocab = tokenizer.decode_batch([[i] for i in range(n_vocab)]) - vocab = [bytes(t, 'utf8') for t in vocab] + if tokens is not None: + n_vocab = len(tokens) + vocab = tokens token_pack = [struct.pack('B',len(t))+t for t in vocab] token_pack = [hex(c) for tok in token_pack for c in tok] token_pack = ','.join(token_pack) @@ -217,7 +216,7 @@ def __exit__(self, *_): w = Writer(out_file) w('#include "torch2cpp/tensor.h"') - if tokenizer is not None: + if tokens is not None: w('#include "torch2cpp/tokenizer.h"') w('\n') @@ -229,7 +228,7 @@ def __exit__(self, *_): w(','.join([hex(x) for x in blob])) w(';') - if tokenizer is not None: + if tokens is not None: w(f'uint8_t const g_token_pack[] = {{ {token_pack} }};') w('// weight tensors') @@ -268,7 +267,7 @@ def __exit__(self, *_): w(f'ml::rng64 g_rng;') w(f'{class_name} g_model;') - if tokenizer is not None: + if tokens is not None: w(f'Tokenizer<{n_vocab}, {n_trees}> g_tokenizer = {{ g_token_pack }};') w('\n') w('} // namespace\n') @@ -290,7 +289,7 @@ def __exit__(self, *_): }} ''') - if tokenizer is not None: + if tokens is not None: w(f''' int {c_prefix}_encode(char const* str, int str_len, int * out, int out_len) {{ @@ -300,6 +299,23 @@ def __exit__(self, *_): {{ return g_tokenizer.decode(toks, toks_len, out, out_len); }} +''') + else: + w(f''' +int {c_prefix}_encode(char const* str, int str_len, int * out, int out_len) +{{ + int i = 0; + for(; i