forked from pytorch/executorch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add phi-3-mini runner (pytorch#3951)
Summary: This PR adds a basic runner for running the phi-3-mini model. It uses sentencepiece to create the tokenizer. Commands for running the model: ``` # setup executorch per instructions in https://pytorch.org/executorch/stable/getting-started-setup.html # install latest transformers pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers # export the model, will take a few minutes cd examples/models/phi-3-mini python export_model.py # download the tokenizer.model wget -O tokenizer.model https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true # build the runner mkdir cmake-out cd cmake-out cmake .. cd .. cmake --build cmake-out -j10 ./cmake-out/phi_3_mini_runner ``` Pull Request resolved: pytorch#3951 Reviewed By: larryliu0820 Differential Revision: D58477481 Pulled By: helunwencser fbshipit-source-id: c5a7e6781338d4347a1b9d06b22e23613633df6b
- Loading branch information
1 parent
4ed5bc7
commit 70743bb
Showing
6 changed files
with
204 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
cmake_minimum_required(VERSION 3.19) | ||
project(phi_3_mini_runner) | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CXX_STANDARD_REQUIRED True) | ||
set(CMAKE_BUILD_TYPE Release) | ||
|
||
# Set options for executorch build. | ||
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON) | ||
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON) | ||
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON) | ||
option(EXECUTORCH_BUILD_XNNPACK "" ON) | ||
|
||
add_subdirectory( | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../.. | ||
${CMAKE_BINARY_DIR}/../../..) | ||
add_subdirectory( | ||
${CMAKE_CURRENT_SOURCE_DIR}/third-party/sentencepiece | ||
${CMAKE_BINARY_DIR}/third-party/sentencepiece) | ||
|
||
add_executable(phi_3_mini_runner main.cpp) | ||
target_include_directories( | ||
phi_3_mini_runner | ||
PUBLIC | ||
${CMAKE_CURRENT_SOURCE_DIR}/third-party/sentencepiece/src) | ||
target_link_libraries( | ||
phi_3_mini_runner | ||
PRIVATE | ||
executorch | ||
extension_module_static | ||
optimized_native_cpu_ops_lib | ||
xnnpack_backend | ||
sentencepiece) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Summary | ||
This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) 3.8B model via ExecuTorch. We use XNNPACK to accelarate the performance and XNNPACK symmetric per channel quantization. | ||
|
||
# Instructions | ||
## Step 1: Setup | ||
1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_requirements.sh --pybind xnnpack` | ||
2. Phi-3 Mini-128K-Instruct has been integrated in the development version (4.41.0.dev0) of transformers. Make sure that you install transformers with version at least 4.41.0: `pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers` | ||
|
||
|
||
## Step 2: Prepare and run the model | ||
1. Download the `tokenizer.model` from HuggingFace. | ||
``` | ||
cd examples/models/phi-3-mini | ||
wget -O tokenizer.model https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true | ||
``` | ||
2. Export the model. This step will take a few minutes to finish. | ||
``` | ||
python export_model.py | ||
``` | ||
3. Build and run the runner. | ||
``` | ||
mkdir cmake-out | ||
cd cmake-out | ||
cmake .. | ||
cd .. | ||
cmake --build cmake-out -j10 | ||
./cmake-out/phi_3_mini_runner | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
// main.cpp | ||
|
||
#include <iostream> | ||
|
||
#include <executorch/extension/module/module.h> | ||
#include <executorch/extension/runner_util/managed_tensor.h> | ||
|
||
#include "sentence_piece_tokenizer.h" | ||
|
||
using namespace torch::executor; | ||
|
||
// The value of the phi-3-mini `<|endoftext|>` token. | ||
#define ENDOFTEXT_TOKEN 32000 | ||
#define VOCABULARY_SIZE 32064 | ||
|
||
// TODO(lunwenh): refactor and share with llama | ||
void generate( | ||
Module& llm_model, | ||
std::string& prompt, | ||
SentencePieceTokenizer& tokenizer, | ||
size_t max_output_length) { | ||
// Convert the input text into a list of integers (tokens) that represents | ||
// it, using the string-to-token mapping that the model was trained on. | ||
// Each token is an integer that represents a word or part of a word. | ||
std::vector<int64_t> input_tokens = tokenizer.encode(prompt); | ||
|
||
std::cout << "Generating tokens ..." << std::endl; | ||
|
||
std::vector<int64_t> output_tokens; | ||
|
||
for (size_t i = 0; i < max_output_length; i++) { | ||
ManagedTensor tensor_tokens( | ||
input_tokens.data(), | ||
{1, static_cast<int>(input_tokens.size())}, | ||
ScalarType::Long); | ||
std::vector<EValue> inputs = {tensor_tokens.get_aliasing_tensor()}; | ||
|
||
Result<std::vector<EValue>> result_evalue = llm_model.forward(inputs); | ||
|
||
const auto error = result_evalue.error(); | ||
Tensor logits_tensor = result_evalue.get()[0].toTensor(); | ||
const auto sentence_length = logits_tensor.size(1); | ||
std::vector<float> logits( | ||
logits_tensor.data_ptr<float>() + | ||
(sentence_length - 1) * VOCABULARY_SIZE, | ||
logits_tensor.data_ptr<float>() + sentence_length * VOCABULARY_SIZE); | ||
|
||
// Sample the next token from the logits. | ||
int64_t next_token = | ||
std::max_element(logits.begin(), logits.end()) - logits.begin(); | ||
|
||
std::cout << next_token << "\t"; | ||
std::cout.flush(); | ||
|
||
// Break if we reached the end of the text. | ||
if (next_token == ENDOFTEXT_TOKEN) { | ||
break; | ||
} | ||
|
||
output_tokens.push_back(next_token); | ||
|
||
// Update next input. | ||
input_tokens.push_back(next_token); | ||
} | ||
|
||
std::cout << std::endl; | ||
std::cout << tokenizer.decode(output_tokens) << std::endl; | ||
} | ||
|
||
int main() { | ||
// Set up the prompt. This provides the seed text for the model to elaborate. | ||
std::cout << "Enter model prompt: "; | ||
std::string prompt; | ||
std::getline(std::cin, prompt); | ||
|
||
SentencePieceTokenizer tokenizer("tokenizer.model"); | ||
|
||
Module model("phi-3-mini.pte", Module::MlockConfig::UseMlockIgnoreErrors); | ||
|
||
const auto max_output_tokens = 128; | ||
generate(model, prompt, tokenizer, max_output_tokens); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include <sstream> | ||
|
||
#include <sentencepiece_processor.h> | ||
|
||
// TODO(lunwenh): Add unit tests | ||
class SentencePieceTokenizer { | ||
public: | ||
SentencePieceTokenizer(const std::string& filePath) { | ||
const auto status = processor_.Load(filePath); | ||
if (!status.ok()) { | ||
std::ostringstream errorMessageStream; | ||
errorMessageStream << "Failed to load SentencePiece model from " | ||
<< filePath << " with error " << status.ToString(); | ||
throw std::runtime_error(errorMessageStream.str()); | ||
} | ||
processor_.SetEncodeExtraOptions("bos"); | ||
} | ||
|
||
std::vector<int64_t> encode(const std::string& piece) { | ||
std::vector<int> ids; | ||
processor_.Encode(piece, &ids); | ||
std::vector<int64_t> idsLong(ids.begin(), ids.end()); | ||
return idsLong; | ||
} | ||
|
||
std::string decode(const std::vector<int64_t>& ids) { | ||
std::vector<int> idsInt(ids.begin(), ids.end()); | ||
std::string piece; | ||
processor_.Decode(idsInt, &piece); | ||
return piece; | ||
} | ||
|
||
private: | ||
sentencepiece::SentencePieceProcessor processor_; | ||
}; |
Submodule sentencepiece
added at
6225e0