From fe46dfde05d6cce6e57d1189d9676b08262a9a5c Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 11 Nov 2024 08:49:31 +0000 Subject: [PATCH 1/9] Add Bound to PyString and PyModule Looks like in new version of Rust we need to put the Bound for generics, so the compiler can know what is implemented for that generic. - Also rename DICT_COLLECTION to TOKENIZER_COLLECTION and revise docstring to make it more clear that the collection actually stores dictionary-based tokenizers (and not dictionaries). - Update package metadata --- CITATION.cff | 6 ++++ Cargo.toml | 8 ++--- nlpo3-cli/Cargo.toml | 6 ++-- nlpo3-nodejs/Cargo.toml | 6 ++-- nlpo3-nodejs/src/lib.rs | 22 ++++++++---- nlpo3-python/Cargo.lock | 2 +- nlpo3-python/Cargo.toml | 16 ++++++--- nlpo3-python/pyproject.toml | 2 +- nlpo3-python/setup.cfg | 2 +- nlpo3-python/src/lib.rs | 69 +++++++++++++++++++++---------------- 10 files changed, 87 insertions(+), 52 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 4d0be53..665542b 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -7,6 +7,12 @@ type: software authors: - family-names: Suntorntip given-names: Thanathip + - family-names: "Suriyawongkul" + given-names: "Arthit" + orcid: "https://orcid.org/0000-0002-9698-1899" + - family-names: Phatthiyaphaibun + given-names: Wannaphong + orcid: "https://orcid.org/0000-0002-4153-4354" repository-code: "https://github.com/PyThaiNLP/nlpo3/" repository: "https://github.com/PyThaiNLP/nlpo3/" url: "https://github.com/PyThaiNLP/nlpo3/" diff --git a/Cargo.toml b/Cargo.toml index 9e04e29..1205841 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,14 +3,14 @@ name = "nlpo3" version = "1.4.0" edition = "2018" license = "Apache-2.0" -authors = ["Thanathip Suntorntip Gorlph"] +authors = ["Thanathip Suntorntip Gorlph", "Arthit Suriyawongkul"] description = "Thai natural language processing library, with Python and Node bindings" +categories = ["text-processing"] +keywords = ["thai", "tokenizer", "nlp", "word-segmentation"] homepage = "https://github.com/PyThaiNLP/nlpo3/" -documentation = "https://github.com/PyThaiNLP/nlpo3/blob/main/README.md" repository = "https://github.com/PyThaiNLP/nlpo3/" +documentation = "https://github.com/PyThaiNLP/nlpo3/blob/main/README.md" readme = "README.md" -keywords = ["thai", "tokenizer", "nlp", "word-segmentation"] -categories = ["text-processing"] exclude = [ ".gitignore", ".github/*", diff --git a/nlpo3-cli/Cargo.toml b/nlpo3-cli/Cargo.toml index 2ae7ea3..333b3aa 100644 --- a/nlpo3-cli/Cargo.toml +++ b/nlpo3-cli/Cargo.toml @@ -5,12 +5,12 @@ edition = "2018" license = "Apache-2.0" authors = ["Vee Satayamas <5ssgdxltv@relay.firefox.com>"] description = "Command line interface for nlpO3, a Thai natural language processing library" -documentation = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-cli/README.md" +categories = ["text-processing", "command-line-utilities"] +keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "cli"] homepage = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-cli/" repository = "https://github.com/PyThaiNLP/nlpo3/" +documentation = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-cli/README.md" readme = "README.md" -keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "cli"] -categories = ["text-processing", "command-line-utilities"] [[bin]] name = "nlpo3" diff --git a/nlpo3-nodejs/Cargo.toml b/nlpo3-nodejs/Cargo.toml index 1e86fae..d3a7411 100644 --- a/nlpo3-nodejs/Cargo.toml +++ b/nlpo3-nodejs/Cargo.toml @@ -5,12 +5,12 @@ edition = "2018" license = "Apache-2.0" authors = ["Thanathip Suntorntip Gorlph"] description = "Node binding for nlpO3 Thai language processing library" -documentation = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-nodejs/README.md" +keywords = ["thai", "tokenizer", "nlp", "word-segmentation"] +categories = ["text-processing"] homepage = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-nodejs/" repository = "https://github.com/PyThaiNLP/nlpo3/" +documentation = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-nodejs/README.md" readme = "README.md" -keywords = ["thai", "tokenizer", "nlp", "word-segmentation"] -categories = ["text-processing"] exclude = ["index.node"] [lib] diff --git a/nlpo3-nodejs/src/lib.rs b/nlpo3-nodejs/src/lib.rs index dfd164e..2660b0e 100644 --- a/nlpo3-nodejs/src/lib.rs +++ b/nlpo3-nodejs/src/lib.rs @@ -9,22 +9,28 @@ use neon::prelude::*; use nlpo3::tokenizer::{newmm::NewmmTokenizer, tokenizer_trait::Tokenizer}; lazy_static! { - static ref DICT_COLLECTION: Mutex>> = + static ref TOKENIZER_COLLECTION: Mutex>> = Mutex::new(HashMap::new()); } +// Load a dictionary file to a tokenizer, +// and add that tokenizer to the tokenizer collection. +// +// Dictionary file must one word per line. +// If successful, will insert a NewmmTokenizer to TOKENIZER_COLLECTION. +// returns a tuple of string of loading result and a boolean fn load_dict(mut cx: FunctionContext) -> JsResult { - let mut dict_col_lock = DICT_COLLECTION.lock().unwrap(); + let mut tokenizer_col_lock = TOKENIZER_COLLECTION.lock().unwrap(); let file_path = cx.argument::(0)?.value(&mut cx); let dict_name = cx.argument::(1)?.value(&mut cx); - if let Some(_) = dict_col_lock.get(&dict_name) { + if let Some(_) = tokenizer_col_lock.get(&dict_name) { Ok(cx.string(format!( "Failed: dictionary {} exists, please use another name.", dict_name ))) } else { let tokenizer = NewmmTokenizer::new(&file_path); - dict_col_lock.insert(dict_name.to_owned(), Box::new(tokenizer)); + tokenizer_col_lock.insert(dict_name.to_owned(), Box::new(tokenizer)); Ok(cx.string(format!( "Successful: dictionary name {} from file {} has been successfully loaded", @@ -33,13 +39,17 @@ fn load_dict(mut cx: FunctionContext) -> JsResult { } } +// Break text into tokens. +// Use newmm algorithm. +/// Can use multithreading, but takes a lot of memory. +/// returns an array of string fn segment(mut cx: FunctionContext) -> JsResult { let text = cx.argument::(0)?.value(&mut cx); let dict_name = cx.argument::(1)?.value(&mut cx); let safe = cx.argument::(2)?.value(&mut cx); let parallel = cx.argument::(3)?.value(&mut cx); - if let Some(loaded_dict) = DICT_COLLECTION.lock().unwrap().get(&dict_name) { - let result = loaded_dict.segment_to_string(&text, safe, parallel); + if let Some(loaded_tokenizer) = TOKENIZER_COLLECTION.lock().unwrap().get(&dict_name) { + let result = loaded_tokenizer.segment_to_string(&text, safe, parallel); let js_result_array = JsArray::new(&mut cx, result.len() as u32); for (i, obj) in result.iter().enumerate() { let js_string = cx.string(obj); diff --git a/nlpo3-python/Cargo.lock b/nlpo3-python/Cargo.lock index f58fbe6..9d9d273 100644 --- a/nlpo3-python/Cargo.lock +++ b/nlpo3-python/Cargo.lock @@ -199,7 +199,7 @@ dependencies = [ [[package]] name = "nlpo3-python" -version = "1.3.1-dev" +version = "1.3.1" dependencies = [ "ahash", "lazy_static", diff --git a/nlpo3-python/Cargo.toml b/nlpo3-python/Cargo.toml index 6b26879..22454ff 100644 --- a/nlpo3-python/Cargo.toml +++ b/nlpo3-python/Cargo.toml @@ -1,13 +1,21 @@ [package] name = "nlpo3-python" -version = "1.3.1-dev" +version = "1.3.1" edition = "2018" license = "Apache-2.0" -authors = ["Thanathip Suntorntip Gorlph"] +authors = [ + "Thanathip Suntorntip Gorlph", + "Arthit Suriyawongkul", + "Wannaphong Phatthiyaphaibun ", +] description = "Python binding for nlpO3 Thai language processing library" -exclude = ["notebooks"] -keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "python"] categories = ["text-processing"] +keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "python"] +homepage = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-python" +repository = "https://github.com/PyThaiNLP/nlpo3/" +documentation = "https://github.com/PyThaiNLP/nlpo3/blob/main/nlpo3-python/README.md" +readme = "README.md" +exclude = [".gitignore", ".github/", "notebooks/", "tests/", "*.sh"] [lib] name = "_nlpo3_python_backend" diff --git a/nlpo3-python/pyproject.toml b/nlpo3-python/pyproject.toml index 2c22692..4c70514 100644 --- a/nlpo3-python/pyproject.toml +++ b/nlpo3-python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "nlpo3" -version = "1.3.1-dev" +version = "1.3.1" description = "Python binding for nlpO3 Thai language processing library in Rust" readme = "README.md" requires-python = ">=3.6" diff --git a/nlpo3-python/setup.cfg b/nlpo3-python/setup.cfg index 5014268..e3db675 100644 --- a/nlpo3-python/setup.cfg +++ b/nlpo3-python/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = nlpo3 -version = 1.3.1-dev +version = 1.3.1 description = Python binding for nlpO3 Thai language processing library long_description = Python binding for nlpO3, a Thai natural language processing library in Rust. diff --git a/nlpo3-python/src/lib.rs b/nlpo3-python/src/lib.rs index 6401743..9dcea49 100644 --- a/nlpo3-python/src/lib.rs +++ b/nlpo3-python/src/lib.rs @@ -21,38 +21,23 @@ use pyo3::types::PyString; use pyo3::{exceptions, wrap_pyfunction}; lazy_static! { - static ref DICT_COLLECTION: Mutex>> = + static ref TOKENIZER_COLLECTION: Mutex>> = Mutex::new(HashMap::new()); } -/// Break text into tokens. -/// Use newmm algorithm. -/// Can use multithreading, but takes a lot of memory. -/// returns list of valid utf-8 bytes list -/// signature: (text: str, dict_name: str, safe: boolean = false, parallel: boolean = false) -> List[List[u8]] -#[pyfunction] -#[pyo3(signature = (text, dict_name, safe=false, parallel=false))] -fn segment(text: &PyString, dict_name: &str, safe: bool, parallel: bool) -> PyResult> { - if let Some(loaded_dict) = DICT_COLLECTION.lock().unwrap().get(dict_name) { - let result = loaded_dict.segment_to_string(text.to_str()?, safe, parallel); - Ok(result) - } else { - Err(exceptions::PyRuntimeError::new_err(format!( - "Dictionary name {} does not exist.", - dict_name - ))) - } -} - -/// Load a dictionary file to the dict collection. +/// Load a dictionary file to a tokenizer, +/// and add that tokenizer to the tokenizer collection. +/// /// Dictionary file must one word per line. +/// If successful, will insert a NewmmTokenizer to TOKENIZER_COLLECTION. /// returns a tuple of string of loading result and a boolean +/// /// signature: (file_path: str, dict_name: str) -> (str, boolean) #[pyfunction] #[pyo3(signature = (file_path, dict_name))] fn load_dict(file_path: &str, dict_name: &str) -> PyResult<(String, bool)> { - let mut dict_col_lock = DICT_COLLECTION.lock().unwrap(); - if dict_col_lock.get(dict_name).is_some() { + let mut tokenizer_col_lock = TOKENIZER_COLLECTION.lock().unwrap(); + if tokenizer_col_lock.get(dict_name).is_some() { Ok(( format!( "Failed: dictionary name {} already exists, please use another name.", @@ -62,7 +47,7 @@ fn load_dict(file_path: &str, dict_name: &str) -> PyResult<(String, bool)> { )) } else { let tokenizer = NewmmTokenizer::new(file_path); - dict_col_lock.insert(dict_name.to_owned(), Box::new(tokenizer)); + tokenizer_col_lock.insert(dict_name.to_owned(), Box::new(tokenizer)); Ok(( format!( @@ -74,12 +59,38 @@ fn load_dict(file_path: &str, dict_name: &str) -> PyResult<(String, bool)> { } } +/// Break text into tokens. +/// Use newmm algorithm. +/// Can use multithreading, but takes a lot of memory. +/// returns list of valid utf-8 bytes list +/// +/// signature: (text: str, dict_name: str, safe: boolean = false, parallel: boolean = false) -> List[List[u8]] +/// +#[pyfunction] +#[pyo3(signature = (text, dict_name, safe=false, parallel=false))] +fn segment( + text: &Bound<'_, PyString>, + dict_name: &str, + safe: bool, + parallel: bool, +) -> PyResult> { + if let Some(loaded_tokenizer) = TOKENIZER_COLLECTION.lock().unwrap().get(dict_name) { + let result = loaded_tokenizer.segment_to_string(text.to_str()?, safe, parallel); + Ok(result) + } else { + Err(exceptions::PyRuntimeError::new_err(format!( + "Dictionary name {} does not exist.", + dict_name + ))) + } +} + /* /// Add words to existing dictionary #[pyfunction] fn add_word(dict_name: &str, words: Vec<&str>) -> PyResult<(String, bool)> { - let mut dict_col_lock = DICT_COLLECTION.lock().unwrap(); - if let Some(newmm_dict) = dict_col_lock.get(dict_name) { + let mut tokenizer_col_lock = TOKENIZER_COLLECTION.lock().unwrap(); + if let Some(newmm_dict) = tokenizer_col_lock.get(dict_name) { newmm_dict.add_word(&words); Ok((format!("Add new word(s) successfully."), true)) } else { @@ -96,8 +107,8 @@ fn add_word(dict_name: &str, words: Vec<&str>) -> PyResult<(String, bool)> { /// Remove words from existing dictionary #[pyfunction] fn remove_word(dict_name: &str, words: Vec<&str>) -> PyResult<(String, bool)> { - let mut dict_col_lock = DICT_COLLECTION.lock().unwrap(); - if let Some(newmm_dict) = dict_col_lock.get(dict_name) { + let mut tokenizer_col_lock = TOKENIZER_COLLECTION.lock().unwrap(); + if let Some(newmm_dict) = tokenizer_col_lock.get(dict_name) { newmm_dict.remove_word(&words); Ok((format!("Remove word(s) successfully."), true)) } else { @@ -113,7 +124,7 @@ fn remove_word(dict_name: &str, words: Vec<&str>) -> PyResult<(String, bool)> { */ #[pymodule] -fn _nlpo3_python_backend(_py: Python, m: &PyModule) -> PyResult<()> { +fn _nlpo3_python_backend(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(load_dict, m)?)?; m.add_function(wrap_pyfunction!(segment, m)?)?; Ok(()) From 291cf8c893bf1990cfde24e0544b8ce2ac4f92f5 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 11 Nov 2024 09:15:19 +0000 Subject: [PATCH 2/9] Fix unittest data path --- CITATION.cff | 4 ++-- README.md | 5 +++-- nlpo3-cli/README.md | 6 ++++++ nlpo3-nodejs/Cargo.toml | 2 +- nlpo3-nodejs/README.md | 6 ++++++ nlpo3-python/Cargo.toml | 11 ++++++++++- nlpo3-python/README.md | 21 +++++++++++++++++++++ nlpo3-python/tests/test_tokenize.py | 2 +- 8 files changed, 50 insertions(+), 7 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 665542b..804d84d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -7,8 +7,8 @@ type: software authors: - family-names: Suntorntip given-names: Thanathip - - family-names: "Suriyawongkul" - given-names: "Arthit" + - family-names: Suriyawongkul + given-names: Arthit orcid: "https://orcid.org/0000-0002-9698-1899" - family-names: Phatthiyaphaibun given-names: Wannaphong diff --git a/README.md b/README.md index d10ff94..bae6f95 100644 --- a/README.md +++ b/README.md @@ -191,5 +191,6 @@ Issues: ## License -nlpO3 is copyrighted by its authors and licensed under terms of the Apache -Software License 2.0 (Apache-2.0) - see file [LICENSE](./LICENSE) for details. +nlpO3 is copyrighted by its authors +and licensed under terms of the Apache Software License 2.0 (Apache-2.0). +See file [LICENSE](./LICENSE) for details. diff --git a/nlpo3-cli/README.md b/nlpo3-cli/README.md index 044ade3..c2af967 100644 --- a/nlpo3-cli/README.md +++ b/nlpo3-cli/README.md @@ -27,3 +27,9 @@ nlpo3 help ```bash echo "ฉันกินข้าว" | nlpo3 segment ``` + +## License + +nlpo3-cli is copyrighted by its authors +and licensed under terms of the Apache Software License 2.0 (Apache-2.0). +See file [LICENSE](./LICENSE) for details. diff --git a/nlpo3-nodejs/Cargo.toml b/nlpo3-nodejs/Cargo.toml index d3a7411..e51753d 100644 --- a/nlpo3-nodejs/Cargo.toml +++ b/nlpo3-nodejs/Cargo.toml @@ -5,8 +5,8 @@ edition = "2018" license = "Apache-2.0" authors = ["Thanathip Suntorntip Gorlph"] description = "Node binding for nlpO3 Thai language processing library" -keywords = ["thai", "tokenizer", "nlp", "word-segmentation"] categories = ["text-processing"] +keywords = ["thai", "tokenizer", "nlp", "word-segmentation"] homepage = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-nodejs/" repository = "https://github.com/PyThaiNLP/nlpo3/" documentation = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-nodejs/README.md" diff --git a/nlpo3-nodejs/README.md b/nlpo3-nodejs/README.md index 6611086..7d7756c 100644 --- a/nlpo3-nodejs/README.md +++ b/nlpo3-nodejs/README.md @@ -92,3 +92,9 @@ Please report issues at ## TODO - Find a way to build binaries and publish on npm. + +## License + +nlpO3 Node binding is copyrighted by its authors +and licensed under terms of the Apache Software License 2.0 (Apache-2.0). +See file [LICENSE](./LICENSE) for details. diff --git a/nlpo3-python/Cargo.toml b/nlpo3-python/Cargo.toml index 22454ff..130feab 100644 --- a/nlpo3-python/Cargo.toml +++ b/nlpo3-python/Cargo.toml @@ -15,7 +15,16 @@ homepage = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-python" repository = "https://github.com/PyThaiNLP/nlpo3/" documentation = "https://github.com/PyThaiNLP/nlpo3/blob/main/nlpo3-python/README.md" readme = "README.md" -exclude = [".gitignore", ".github/", "notebooks/", "tests/", "*.sh"] +exclude = [ + ".gitignore", + ".github/", + "build/", + "dist/", + "notebooks/", + "target", + "tests/", + "*.sh", +] [lib] name = "_nlpo3_python_backend" diff --git a/nlpo3-python/README.md b/nlpo3-python/README.md index 4652b7e..3e68601 100644 --- a/nlpo3-python/README.md +++ b/nlpo3-python/README.md @@ -103,6 +103,27 @@ python -m build This should generate a wheel file, in `dist/` directory, which can be installed by pip. +To install a wheel from a local directory: + +```bash +pip install dist/nlpo3-1.3.1-cp311-cp311-macosx_12_0_x86_64.whl +``` + +## Test + +To run the Python unit test: + +```bash +cd tests +python -m unittest +``` + ## Issues Please report issues at + +## License + +nlpO3 Python binding is copyrighted by its authors +and licensed under terms of the Apache Software License 2.0 (Apache-2.0). +See file [LICENSE](./LICENSE) for details. diff --git a/nlpo3-python/tests/test_tokenize.py b/nlpo3-python/tests/test_tokenize.py index 4cf1566..705db7b 100644 --- a/nlpo3-python/tests/test_tokenize.py +++ b/nlpo3-python/tests/test_tokenize.py @@ -175,7 +175,7 @@ def setUp(self): ) def test_segment(self): - DICT_FILENAME = "tests/data/test_dict.txt" + DICT_FILENAME = "data/test_dict.txt" DICT_NAME = "test_dict" load_dict(DICT_FILENAME, DICT_NAME) From b23a442b3d08c7dbf7b222cfd573f98dd356d64e Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 11 Nov 2024 09:31:07 +0000 Subject: [PATCH 3/9] Add workflow to test nlpo3-python Try to test from 3.6 to 3.13 --- .github/workflows/test-main-lib.yml | 6 +-- .github/workflows/test-nlpo3-cli.yml | 8 ++-- .github/workflows/test-nlpo3-python.yml | 50 +++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/test-nlpo3-python.yml diff --git a/.github/workflows/test-main-lib.yml b/.github/workflows/test-main-lib.yml index 553ea21..fef1d83 100644 --- a/.github/workflows/test-main-lib.yml +++ b/.github/workflows/test-main-lib.yml @@ -1,4 +1,4 @@ -name: Test main lib (Rust crate) +name: Test main lib on: push: branches: @@ -19,8 +19,6 @@ on: jobs: test: - runs-on: ${{ matrix.os }} - strategy: fail-fast: false matrix: @@ -30,6 +28,8 @@ jobs: - os: windows-latest bitness: 32 + runs-on: ${{ matrix.os }} + steps: - name: Checkout source code uses: actions/checkout@v4 diff --git a/.github/workflows/test-nlpo3-cli.yml b/.github/workflows/test-nlpo3-cli.yml index ccd78ed..2c33c2d 100644 --- a/.github/workflows/test-nlpo3-cli.yml +++ b/.github/workflows/test-nlpo3-cli.yml @@ -1,4 +1,4 @@ -name: Test nlpo3-cli (command line) +name: Test nlpo3-cli on: push: branches: @@ -17,8 +17,6 @@ defaults: jobs: test: - runs-on: ${{ matrix.os }} - strategy: fail-fast: false matrix: @@ -28,9 +26,11 @@ jobs: - os: windows-latest bitness: 32 + runs-on: ${{ matrix.os }} + steps: - name: Checkout source code - uses: actions/checkout@master + uses: actions/checkout@v4 - name: Setup Rust toolchain - non-win32 uses: actions-rs/toolchain@v1 diff --git a/.github/workflows/test-nlpo3-python.yml b/.github/workflows/test-nlpo3-python.yml new file mode 100644 index 0000000..3bc981b --- /dev/null +++ b/.github/workflows/test-nlpo3-python.yml @@ -0,0 +1,50 @@ +name: Test nlpo3-python +on: + push: + branches: + - main + paths: + - 'nlpo3-python/**' + pull_request: + branches: + - main + paths: + - 'nlpo3-python/**' + +defaults: + run: + working-directory: nlpo3-python + +jobs: + test: + strategy: + fail-fast: false + matrix: + os: [macos-latest, ubuntu-latest, windows-latest] + python-version: ["3.13", "3.12", "3.11", "3.10", "3.9", "3.8", "3.7", "3.6"] + bitness: [64] # 32, 64 + include: + - os: windows-latest + bitness: 32 + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout source code + uses: actions/checkout@v4 + + - name: Setup Rust toolchain + uses: actions-rust-lang/setup-rust-toolchain@v1 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Test Rust part + run: cargo test + + - name: Test Python part + run: | + cd tests + python -m unittest From cfec83cd94c7d36f06427e4b4ab5c879bed3e4ab Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 11 Nov 2024 09:49:33 +0000 Subject: [PATCH 4/9] Build and install wheel in test workflow --- .github/workflows/test-nlpo3-python.yml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-nlpo3-python.yml b/.github/workflows/test-nlpo3-python.yml index 3bc981b..7152410 100644 --- a/.github/workflows/test-nlpo3-python.yml +++ b/.github/workflows/test-nlpo3-python.yml @@ -41,10 +41,17 @@ jobs: with: python-version: ${{ matrix.python-version }} cache: "pip" - - name: Test Rust part - run: cargo test - - name: Test Python part + - name: Build wheel + run: | + pip install -U pip + pip install -U build setuptools setuptools-rust wheel + python -m build --wheel + + - name: Install wheel + run: pip install --no-index --find-links=build/dist mapscript + + - name: Test run: | cd tests python -m unittest From 563111232a9dddf66ce250f88473cbaec17c2426 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 11 Nov 2024 09:53:25 +0000 Subject: [PATCH 5/9] Update test-nlpo3-python.yml --- .github/workflows/test-nlpo3-python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-nlpo3-python.yml b/.github/workflows/test-nlpo3-python.yml index 7152410..5ecd159 100644 --- a/.github/workflows/test-nlpo3-python.yml +++ b/.github/workflows/test-nlpo3-python.yml @@ -49,7 +49,7 @@ jobs: python -m build --wheel - name: Install wheel - run: pip install --no-index --find-links=build/dist mapscript + run: pip install --no-index --find-links=build/dist nlpo3 - name: Test run: | From 52c27be6b808bef1e9af8c568aa2c54ec2a7d944 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 11 Nov 2024 10:04:35 +0000 Subject: [PATCH 6/9] Fix wheel path --- .github/workflows/test-nlpo3-python.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-nlpo3-python.yml b/.github/workflows/test-nlpo3-python.yml index 5ecd159..ac46143 100644 --- a/.github/workflows/test-nlpo3-python.yml +++ b/.github/workflows/test-nlpo3-python.yml @@ -5,11 +5,19 @@ on: - main paths: - 'nlpo3-python/**' + exclude: + - 'notebooks/' + - 'LICENSE' + - '*.md' pull_request: branches: - main paths: - 'nlpo3-python/**' + exclude: + - 'notebooks/' + - 'LICENSE' + - '*.md' defaults: run: @@ -49,7 +57,9 @@ jobs: python -m build --wheel - name: Install wheel - run: pip install --no-index --find-links=build/dist nlpo3 + run: pip install --no-index --find-links=dist nlpo3 + # Since we don't know the exact name of the wheel from previous step, + # use --find-links instead. - name: Test run: | From 48bab54389a67d3d8f50b112078dfc202ddec368 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 11 Nov 2024 10:29:32 +0000 Subject: [PATCH 7/9] Update test-nlpo3-python.yml --- .github/workflows/test-nlpo3-python.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-nlpo3-python.yml b/.github/workflows/test-nlpo3-python.yml index ac46143..3048d6e 100644 --- a/.github/workflows/test-nlpo3-python.yml +++ b/.github/workflows/test-nlpo3-python.yml @@ -2,22 +2,22 @@ name: Test nlpo3-python on: push: branches: - - main + - main paths: - - 'nlpo3-python/**' - exclude: - - 'notebooks/' - - 'LICENSE' - - '*.md' + - 'nlpo3-python/**' + paths-ignore: + - 'notebooks/' + - 'LICENSE' + - '*.md' pull_request: branches: - - main + - main paths: - - 'nlpo3-python/**' - exclude: - - 'notebooks/' - - 'LICENSE' - - '*.md' + - 'nlpo3-python/**' + paths-ignore: + - 'notebooks/' + - 'LICENSE' + - '*.md' defaults: run: From 9057611784577835a3041b732b57bcbb6052cb34 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 11 Nov 2024 10:39:28 +0000 Subject: [PATCH 8/9] Update test-nlpo3-python.yml --- .github/workflows/test-nlpo3-python.yml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-nlpo3-python.yml b/.github/workflows/test-nlpo3-python.yml index 3048d6e..a99ab9e 100644 --- a/.github/workflows/test-nlpo3-python.yml +++ b/.github/workflows/test-nlpo3-python.yml @@ -5,19 +5,17 @@ on: - main paths: - 'nlpo3-python/**' - paths-ignore: - - 'notebooks/' - - 'LICENSE' - - '*.md' + - '!notebooks/' + - '!LICENSE' + - '!*.md' pull_request: branches: - main paths: - 'nlpo3-python/**' - paths-ignore: - - 'notebooks/' - - 'LICENSE' - - '*.md' + - '!notebooks/' + - '!LICENSE' + - '!*.md' defaults: run: From 931fd5e7aad2d24a930d6bb13e32ba9f9d36acef Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 11 Nov 2024 10:48:39 +0000 Subject: [PATCH 9/9] Remove 3.6 support 3.7 is PyO3's minimum supported version Although 3.7 is not available on macOS arm64 on GitHub runner --- .github/workflows/test-nlpo3-python.yml | 6 +++++- nlpo3-python/README.md | 4 ++-- nlpo3-python/pyproject.toml | 5 ++--- nlpo3-python/setup.cfg | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-nlpo3-python.yml b/.github/workflows/test-nlpo3-python.yml index a99ab9e..aa0c557 100644 --- a/.github/workflows/test-nlpo3-python.yml +++ b/.github/workflows/test-nlpo3-python.yml @@ -27,11 +27,15 @@ jobs: fail-fast: false matrix: os: [macos-latest, ubuntu-latest, windows-latest] - python-version: ["3.13", "3.12", "3.11", "3.10", "3.9", "3.8", "3.7", "3.6"] + python-version: ["3.13", "3.12", "3.11", "3.10", "3.9", "3.8", "3.7"] bitness: [64] # 32, 64 include: - os: windows-latest + python-version: "3.9" bitness: 32 + exclude: + - os: macos-latest + python-version: "3.7" runs-on: ${{ matrix.os }} diff --git a/nlpo3-python/README.md b/nlpo3-python/README.md index 3e68601..21619e8 100644 --- a/nlpo3-python/README.md +++ b/nlpo3-python/README.md @@ -6,7 +6,7 @@ SPDX-License-Identifier: Apache-2.0 # nlpO3 Python binding [![PyPI](https://img.shields.io/pypi/v/nlpo3.svg "PyPI")](https://pypi.python.org/pypi/nlpo3) -[![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg "Python 3.6")](https://www.python.org/downloads/) +[![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg "Python 3.7")](https://www.python.org/downloads/) [![Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg "Apache-2.0")](https://opensource.org/license/apache-2-0) Python binding for nlpO3, a Thai natural language processing library in Rust. @@ -86,7 +86,7 @@ segment("สวัสดีครับ", dict_name="dict_name", safe=True) ### Requirements - [Rust 2018 Edition](https://www.rust-lang.org/tools/install) -- Python 3.6 or newer +- Python 3.7 or newer (PyO3's minimum supported version) - Python Development Headers - Ubuntu: `sudo apt-get install python3-dev` - macOS: No action needed diff --git a/nlpo3-python/pyproject.toml b/nlpo3-python/pyproject.toml index 4c70514..e9d386c 100644 --- a/nlpo3-python/pyproject.toml +++ b/nlpo3-python/pyproject.toml @@ -7,7 +7,7 @@ name = "nlpo3" version = "1.3.1" description = "Python binding for nlpO3 Thai language processing library in Rust" readme = "README.md" -requires-python = ">=3.6" +requires-python = ">=3.7" license = { text = "Apache-2.0" } keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "pythainlp"] authors = [ @@ -18,7 +18,6 @@ authors = [ classifiers = [ "Development Status :: 5 - Production/Stable", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", @@ -38,7 +37,7 @@ homepage = "https://github.com/PyThaiNLP/nlpo3/" repository = "https://github.com/PyThaiNLP/nlpo3/" [tool.poetry.dependencies] -python = "^3.6" +python = "^3.7" [tool.poetry.dev-dependencies] pytest = "*" diff --git a/nlpo3-python/setup.cfg b/nlpo3-python/setup.cfg index e3db675..2a5b3e7 100644 --- a/nlpo3-python/setup.cfg +++ b/nlpo3-python/setup.cfg @@ -67,7 +67,7 @@ classifiers = #obsoletes = pythainlp-rust-modules [options] -python_requires = >=3.6 +python_requires = >=3.7 include_package_data = True packages = nlpo3 zip_safe = False