Skip to content

Commit

Permalink
add phi-3-mini runner (pytorch#3951)
Browse files Browse the repository at this point in the history
Summary:
This PR adds a basic runner for running the phi-3-mini model. It uses sentencepiece to create the tokenizer.

Commands for running the model:
```
# setup executorch per instructions in https://pytorch.org/executorch/stable/getting-started-setup.html
# install latest transformers
pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers
# export the model, will take a few minutes
cd examples/models/phi-3-mini
python export_model.py
# download the tokenizer.model
wget -O tokenizer.model https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true
# build the runner
mkdir cmake-out
cd cmake-out
cmake ..
cd ..
cmake --build cmake-out -j10
./cmake-out/phi_3_mini_runner
```

Pull Request resolved: pytorch#3951

Reviewed By: larryliu0820

Differential Revision: D58477481

Pulled By: helunwencser

fbshipit-source-id: c5a7e6781338d4347a1b9d06b22e23613633df6b
  • Loading branch information
helunwencser authored and facebook-github-bot committed Jun 13, 2024
1 parent 4ed5bc7 commit 70743bb
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,6 @@
[submodule "third-party/ios-cmake"]
path = third-party/ios-cmake
url = https://github.com/leetal/ios-cmake
[submodule "examples/models/phi-3-mini/third-party/sentencepiece"]
path = examples/models/phi-3-mini/third-party/sentencepiece
url = https://github.com/google/sentencepiece.git
39 changes: 39 additions & 0 deletions examples/models/phi-3-mini/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.19)
project(phi_3_mini_runner)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(CMAKE_BUILD_TYPE Release)

# Set options for executorch build.
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
option(EXECUTORCH_BUILD_XNNPACK "" ON)

add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../..
${CMAKE_BINARY_DIR}/../../..)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/third-party/sentencepiece
${CMAKE_BINARY_DIR}/third-party/sentencepiece)

add_executable(phi_3_mini_runner main.cpp)
target_include_directories(
phi_3_mini_runner
PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/third-party/sentencepiece/src)
target_link_libraries(
phi_3_mini_runner
PRIVATE
executorch
extension_module_static
optimized_native_cpu_ops_lib
xnnpack_backend
sentencepiece)
28 changes: 28 additions & 0 deletions examples/models/phi-3-mini/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Summary
This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) 3.8B model via ExecuTorch. We use XNNPACK to accelarate the performance and XNNPACK symmetric per channel quantization.

# Instructions
## Step 1: Setup
1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_requirements.sh --pybind xnnpack`
2. Phi-3 Mini-128K-Instruct has been integrated in the development version (4.41.0.dev0) of transformers. Make sure that you install transformers with version at least 4.41.0: `pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers`


## Step 2: Prepare and run the model
1. Download the `tokenizer.model` from HuggingFace.
```
cd examples/models/phi-3-mini
wget -O tokenizer.model https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true
```
2. Export the model. This step will take a few minutes to finish.
```
python export_model.py
```
3. Build and run the runner.
```
mkdir cmake-out
cd cmake-out
cmake ..
cd ..
cmake --build cmake-out -j10
./cmake-out/phi_3_mini_runner
```
90 changes: 90 additions & 0 deletions examples/models/phi-3-mini/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

// main.cpp

#include <iostream>

#include <executorch/extension/module/module.h>
#include <executorch/extension/runner_util/managed_tensor.h>

#include "sentence_piece_tokenizer.h"

using namespace torch::executor;

// The value of the phi-3-mini `<|endoftext|>` token.
#define ENDOFTEXT_TOKEN 32000
#define VOCABULARY_SIZE 32064

// TODO(lunwenh): refactor and share with llama
void generate(
Module& llm_model,
std::string& prompt,
SentencePieceTokenizer& tokenizer,
size_t max_output_length) {
// Convert the input text into a list of integers (tokens) that represents
// it, using the string-to-token mapping that the model was trained on.
// Each token is an integer that represents a word or part of a word.
std::vector<int64_t> input_tokens = tokenizer.encode(prompt);

std::cout << "Generating tokens ..." << std::endl;

std::vector<int64_t> output_tokens;

for (size_t i = 0; i < max_output_length; i++) {
ManagedTensor tensor_tokens(
input_tokens.data(),
{1, static_cast<int>(input_tokens.size())},
ScalarType::Long);
std::vector<EValue> inputs = {tensor_tokens.get_aliasing_tensor()};

Result<std::vector<EValue>> result_evalue = llm_model.forward(inputs);

const auto error = result_evalue.error();
Tensor logits_tensor = result_evalue.get()[0].toTensor();
const auto sentence_length = logits_tensor.size(1);
std::vector<float> logits(
logits_tensor.data_ptr<float>() +
(sentence_length - 1) * VOCABULARY_SIZE,
logits_tensor.data_ptr<float>() + sentence_length * VOCABULARY_SIZE);

// Sample the next token from the logits.
int64_t next_token =
std::max_element(logits.begin(), logits.end()) - logits.begin();

std::cout << next_token << "\t";
std::cout.flush();

// Break if we reached the end of the text.
if (next_token == ENDOFTEXT_TOKEN) {
break;
}

output_tokens.push_back(next_token);

// Update next input.
input_tokens.push_back(next_token);
}

std::cout << std::endl;
std::cout << tokenizer.decode(output_tokens) << std::endl;
}

int main() {
// Set up the prompt. This provides the seed text for the model to elaborate.
std::cout << "Enter model prompt: ";
std::string prompt;
std::getline(std::cin, prompt);

SentencePieceTokenizer tokenizer("tokenizer.model");

Module model("phi-3-mini.pte", Module::MlockConfig::UseMlockIgnoreErrors);

const auto max_output_tokens = 128;
generate(model, prompt, tokenizer, max_output_tokens);
}
43 changes: 43 additions & 0 deletions examples/models/phi-3-mini/sentence_piece_tokenizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <sstream>

#include <sentencepiece_processor.h>

// TODO(lunwenh): Add unit tests
class SentencePieceTokenizer {
public:
SentencePieceTokenizer(const std::string& filePath) {
const auto status = processor_.Load(filePath);
if (!status.ok()) {
std::ostringstream errorMessageStream;
errorMessageStream << "Failed to load SentencePiece model from "
<< filePath << " with error " << status.ToString();
throw std::runtime_error(errorMessageStream.str());
}
processor_.SetEncodeExtraOptions("bos");
}

std::vector<int64_t> encode(const std::string& piece) {
std::vector<int> ids;
processor_.Encode(piece, &ids);
std::vector<int64_t> idsLong(ids.begin(), ids.end());
return idsLong;
}

std::string decode(const std::vector<int64_t>& ids) {
std::vector<int> idsInt(ids.begin(), ids.end());
std::string piece;
processor_.Decode(idsInt, &piece);
return piece;
}

private:
sentencepiece::SentencePieceProcessor processor_;
};
1 change: 1 addition & 0 deletions examples/models/phi-3-mini/third-party/sentencepiece
Submodule sentencepiece added at 6225e0

0 comments on commit 70743bb

Please sign in to comment.