From d1351fe3004402fa80d4c7ad8b73f71581541f38 Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Fri, 24 May 2024 14:58:52 +0100 Subject: [PATCH 01/13] Add aho class --- src/aho-corasick.cpp | 182 +++++++++++++++++++++++++++++++++++++++++++ src/main.cpp | 1 + src/main.hpp | 1 + 3 files changed, 184 insertions(+) create mode 100644 src/aho-corasick.cpp diff --git a/src/aho-corasick.cpp b/src/aho-corasick.cpp new file mode 100644 index 00000000..55c4a1d5 --- /dev/null +++ b/src/aho-corasick.cpp @@ -0,0 +1,182 @@ +// +// libsemigroups_pybind11 +// Copyright (C) 2024 Joseph Edwards +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// + +// C++ stl headers.... +#include // for vector + +// libsemigroups.... +#include // for AhoCorasick, AhoCorasick::... + +// pybind11.... +#include // for class_, init, module + +// libsemigroups_pybind11.... +#include "main.hpp" // for init_aho_corasick + +namespace py = pybind11; + +namespace libsemigroups { + + void init_aho_corasick(py::module& m) { + py::class_ thing(m, + "AhoCorasick", + R"pbdoc( +For an implementation of the Aho-Corasick algorithm. + +Defined in ``aho-corasick.hpp``.This class implements a trie based data structure with suffix links to be used with the Aho-Corasick dictionary searching algorithm. An introduction to this algorithm can be found at:`https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm `_Several helper functions are provided in the ``aho_corasick`` namespace.)pbdoc"); + thing.def("__repr__", &aho_corasick::repr); + // thing.def_static("root", + // &AhoCorasick::root, + // R"pbdoc( + // Constant for the root of the trie. + // )pbdoc"); + thing.def(py::init<>(), R"pbdoc( +Construct an empty AhoCorasick. +Construct an :any:`AhoCorasick` containing only the root that corresponds to the empty word :math:`\varepsilon`.)pbdoc"); + thing.def(py::init(), R"pbdoc( +Default copy constructor. +)pbdoc"); + thing.def("init", + &AhoCorasick::init, + R"pbdoc( +Reinitialise an existing AhoCorasick object. +This function puts an :any:`AhoCorasick` object back into the same state as if it had been newly default constructed. + +:returns: A reference to ``self``. + +:rtype: AhoCorasick +)pbdoc"); + thing.def("number_of_nodes", + &AhoCorasick::number_of_nodes, + R"pbdoc( +Returns the number of nodes in the trie. +This function Returns the number of nodes in the trie. + +:exceptions: This function is ``noexcept`` and is guaranteed never to throw. + +:complexity: Constant + +:returns: A ``int``. + +:rtype: int +)pbdoc"); + thing.def("add_word", + &AhoCorasick::add_word, + py::arg("first"), + py::arg("last"), + R"pbdoc( +Check and add a word to the trie. +This function does the same as :any:`add_word_no_checks(Iterator, Iterator)` after first checking that the word corresponding to ``first`` and ``last`` does not correspond to an existing terminal node in the trie. + +:raises LibsemigroupsError: if the word corresponding to ``first`` and ``last`` corresponds to an existing terminal node in the trie. + +.. seealso:: :any:`add_word_no_checks`)pbdoc"); + thing.def("rm_word", + &AhoCorasick::rm_word, + py::arg("first"), + py::arg("last"), + R"pbdoc( +Check and add a word to the trie. +This function does the same as :any:`rm_word_no_checks(Iterator, Iterator)` after first checking that the word corresponding to ``first`` and ``last`` is terminal node in the trie. + +:raises LibsemigroupsError: if the word corresponding to ``first`` and ``last`` does not correspond to an existing terminal node in the trie. + +.. seealso:: :any:`rm_word_no_checks`)pbdoc"); + thing.def("traverse", + &AhoCorasick::traverse, + py::arg("current"), + py::arg("a"), + R"pbdoc( +After checking, traverse the trie using suffix links where necessary. +See :any:`traverse_no_checks` + +:raises LibsemigroupsError: if ``validate_active_node_index(current)`` throws.)pbdoc"); + thing.def("signature", + &AhoCorasick::signature, + py::arg("w"), + py::arg("i"), + R"pbdoc( +After checking, find the signature of a node. +See :any:`signature_no_checks` + +:raises LibsemigroupsError: if ``validate_active_node_index(i)`` throws.)pbdoc"); + thing.def("height", + &AhoCorasick::height, + py::arg("i"), + R"pbdoc( +After checking, calculate the height of a node. +See :any:`height_no_checks` + +:raises LibsemigroupsError: if ``validate_active_node_index(i)`` throws.)pbdoc"); + thing.def("suffix_link", + &AhoCorasick::suffix_link, + py::arg("current"), + R"pbdoc( +After checking, calculate the index of the suffix link of a node. +See :any:`suffix_link_no_checks` + +:raises LibsemigroupsError: if ``validate_active_node_index(current)`` throws.)pbdoc"); + thing.def("node", + &AhoCorasick::node, + py::arg("i"), + R"pbdoc( +After checking, return the node given an index. +See :any:`node_no_checks` + +:raises LibsemigroupsError: if ``validate_node_index(i)`` throws.)pbdoc"); + thing.def("child", + &AhoCorasick::child, + py::arg("parent"), + py::arg("letter"), + R"pbdoc( +After checking, return the child of parent with edge-label letter. +See :any:`child_no_checks` + +:raises LibsemigroupsError: if ``validate_active_node_index(parent)`` throws.)pbdoc"); + thing.def("validate_node_index", + &AhoCorasick::validate_node_index, + py::arg("i"), + R"pbdoc( +Check if an index corresponds to a node. + +:param i: the index to validate +:type i: index_type +This function checks if the given index ``i`` corresponds to the index of a node. + +:complexity: Constant + +:raises LibsemigroupsError: if ``i`` does not correspond to the index of a node; that is, if ``i`` is larger than the size of the container storing the indices of nodes.)pbdoc"); + thing.def("validate_active_node_index", + &AhoCorasick::validate_active_node_index, + py::arg("i"), + R"pbdoc( +Check if an index corresponds to a node currently in the trie. + +:param i: the index to validate +:type i: index_type +The implementation of :any:`AhoCorasick` uses two different types of node; *active* and *inactive* . An active node is a node that is currently a node in the trie. An inactive node is a node that used to be part of the trie, but has since been removed. It may later become active again after being reinitialised (see :any:`init` ), and exists as a way of minimising how frequently memory needs to be allocated and deallocated for nodes.This function validates whether the given index ``i`` corresponds to an active node. + +:complexity: Constant + +:raises LibsemigroupsError: if ``validate_node_index(i)`` throws, or if ``i`` is not an active node. + +.. seealso:: :any:`validate_node_index` , :any:`init`.)pbdoc"); + + } // init_aho_corasick + +} // namespace libsemigroups diff --git a/src/main.cpp b/src/main.cpp index 208dd541..8d7563ea 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -132,6 +132,7 @@ namespace libsemigroups { init_imagerightaction(m); init_action(m); init_bmat8(m); + init_aho_corasick(m); init_forest(m); init_gabow(m); init_knuth_bendix(m); diff --git a/src/main.hpp b/src/main.hpp index d2ea52ed..3eb3059a 100644 --- a/src/main.hpp +++ b/src/main.hpp @@ -30,6 +30,7 @@ namespace libsemigroups { void init_bmat8(py::module&); void init_imagerightaction(py::module&); + void init_aho_corasick(py::module&); void init_forest(py::module&); void init_gabow(py::module&); void init_knuth_bendix(py::module&); From a00c7cf5559015f5d3b1febb193c03be390e26e2 Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Fri, 24 May 2024 16:15:53 +0100 Subject: [PATCH 02/13] Add helpers --- src/aho-corasick.cpp | 92 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) diff --git a/src/aho-corasick.cpp b/src/aho-corasick.cpp index 55c4a1d5..c9c0c6ec 100644 --- a/src/aho-corasick.cpp +++ b/src/aho-corasick.cpp @@ -38,7 +38,12 @@ namespace libsemigroups { R"pbdoc( For an implementation of the Aho-Corasick algorithm. -Defined in ``aho-corasick.hpp``.This class implements a trie based data structure with suffix links to be used with the Aho-Corasick dictionary searching algorithm. An introduction to this algorithm can be found at:`https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm `_Several helper functions are provided in the ``aho_corasick`` namespace.)pbdoc"); +This class implements a trie based data structure with suffix links to be used +with the Aho-Corasick dictionary searching algorithm. An introduction to this +algorithm can be found `here `_. + +Several helper functions are provided in the ``aho_corasick`` +namespace.)pbdoc"); thing.def("__repr__", &aho_corasick::repr); // thing.def_static("root", // &AhoCorasick::root, @@ -47,7 +52,10 @@ Defined in ``aho-corasick.hpp``.This class implements a trie based data structur // )pbdoc"); thing.def(py::init<>(), R"pbdoc( Construct an empty AhoCorasick. -Construct an :any:`AhoCorasick` containing only the root that corresponds to the empty word :math:`\varepsilon`.)pbdoc"); + +Construct an :any:`AhoCorasick` containing only the root that corresponds to the +empty word :math:`\varepsilon`. +)pbdoc"); thing.def(py::init(), R"pbdoc( Default copy constructor. )pbdoc"); @@ -156,6 +164,7 @@ Check if an index corresponds to a node. :param i: the index to validate :type i: index_type + This function checks if the given index ``i`` corresponds to the index of a node. :complexity: Constant @@ -169,6 +178,7 @@ Check if an index corresponds to a node currently in the trie. :param i: the index to validate :type i: index_type + The implementation of :any:`AhoCorasick` uses two different types of node; *active* and *inactive* . An active node is a node that is currently a node in the trie. An inactive node is a node that used to be part of the trie, but has since been removed. It may later become active again after being reinitialised (see :any:`init` ), and exists as a way of minimising how frequently memory needs to be allocated and deallocated for nodes.This function validates whether the given index ``i`` corresponds to an active node. :complexity: Constant @@ -177,6 +187,84 @@ The implementation of :any:`AhoCorasick` uses two different types of node; *acti .. seealso:: :any:`validate_node_index` , :any:`init`.)pbdoc"); + // Helpers + using index_type = AhoCorasick::index_type; + + m.def("add_word", + &aho_corasick::add_word, + py::arg("ac"), + py::arg("w"), + R"pbdoc( +TODO doc. +)pbdoc"); + m.def("rm_word", + &aho_corasick::rm_word, + py::arg("ac"), + py::arg("w"), + R"pbdoc( +TODO doc. +)pbdoc"); + m.def( + "traverse_from", + [](AhoCorasick const& ac, + index_type start, + AhoCorasick::const_iterator first, + AhoCorasick::const_iterator last) { + return aho_corasick::traverse_from(ac, start, first, last); + }, + py::arg("ac"), + py::arg("start"), + py::arg("first"), + py::arg("last"), + R"pbdoc( +TODO doc. +)pbdoc"); + m.def( + "traverse_from", + [](AhoCorasick const& ac, index_type start, char const& w) { + return aho_corasick::traverse_from(ac, start, w); + }, + py::arg("ac"), + py::arg("start"), + py::arg("w"), + R"pbdoc( +TODO doc. +)pbdoc"); + m.def( + "traverse_from", + [](AhoCorasick const& ac, index_type start, word_type const& w) { + return aho_corasick::traverse_from(ac, start, w); + }, + py::arg("ac"), + py::arg("start"), + py::arg("w"), + R"pbdoc( +TODO doc. +)pbdoc"); + m.def( + "traverse", + [](AhoCorasick const& ac, + AhoCorasick::const_iterator first, + AhoCorasick::const_iterator last) { + return aho_corasick::traverse(ac, first, last); + }, + py::arg("ac"), + py::arg("first"), + py::arg("last"), + R"pbdoc( +TODO doc. +)pbdoc"); + m.def( + "traverse", + [](AhoCorasick const& ac, std::string const& w) { + return aho_corasick::traverse(ac, w); + }, + py::arg("ac"), + py::arg("w"), + R"pbdoc( +TODO doc. +)pbdoc"); + } // init_aho_corasick } // namespace libsemigroups From 350ea5eb37b0ef402b0f46fe21927490be242db3 Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Fri, 24 May 2024 16:16:18 +0100 Subject: [PATCH 03/13] Make aho accessible --- libsemigroups_pybind11/__init__.py | 1 + libsemigroups_pybind11/aho_corasick.py | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 libsemigroups_pybind11/aho_corasick.py diff --git a/libsemigroups_pybind11/__init__.py b/libsemigroups_pybind11/__init__.py index 620be44e..28a7228d 100644 --- a/libsemigroups_pybind11/__init__.py +++ b/libsemigroups_pybind11/__init__.py @@ -71,6 +71,7 @@ one, domain, image, + AhoCorasick, ) except ModuleNotFoundError as e: raise ModuleNotFoundError( diff --git a/libsemigroups_pybind11/aho_corasick.py b/libsemigroups_pybind11/aho_corasick.py new file mode 100644 index 00000000..e3853271 --- /dev/null +++ b/libsemigroups_pybind11/aho_corasick.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2024, Joseph Edwards +# +# Distributed under the terms of the GPL license version 3. +# +# The full license is in the file LICENSE, distributed with this software. +# pylint:disable=no-name-in-module, unused-import +from _libsemigroups_pybind11 import add_word, rm_word, traverse_from, traverse From aee23a73fdb06bc40e38a5b25480de0ccdff13c3 Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Fri, 24 May 2024 16:16:33 +0100 Subject: [PATCH 04/13] Add aho to docs --- docs/source/index.rst | 1 + .../aho-corasick/ac-helpers.rst | 28 +++++++++++++++ .../aho-corasick/aho-corasick.rst | 35 +++++++++++++++++++ .../main-algorithms/aho-corasick/index.rst | 17 +++++++++ 4 files changed, 81 insertions(+) create mode 100644 docs/source/main-algorithms/aho-corasick/ac-helpers.rst create mode 100644 docs/source/main-algorithms/aho-corasick/aho-corasick.rst create mode 100644 docs/source/main-algorithms/aho-corasick/index.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 13101d5f..b3bae62e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -62,6 +62,7 @@ See the installation instructions: :hidden: main-algorithms/action/index.rst + main-algorithms/aho-corasick/index.rst main-algorithms/congruences/index main-algorithms/froidure-pin/index main-algorithms/kambites/index diff --git a/docs/source/main-algorithms/aho-corasick/ac-helpers.rst b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst new file mode 100644 index 00000000..6422ea02 --- /dev/null +++ b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst @@ -0,0 +1,28 @@ +.. Copyright (c) 2024 Joseph Edwards + + Distributed under the terms of the GPL license version 3. + + The full license is in the file LICENSE, distributed with this software. + + +Aho-Corasick helper functions +============================= + +Contents +-------- +.. currentmodule:: libsemigroups_pybind11.aho_corasick + +.. autosummary:: + :nosignatures: + + add_word + rm_word + traverse_from + traverse + +Full API +-------- + +.. automodule:: libsemigroups_pybind11.aho_corasick + :members: + :imported-members: \ No newline at end of file diff --git a/docs/source/main-algorithms/aho-corasick/aho-corasick.rst b/docs/source/main-algorithms/aho-corasick/aho-corasick.rst new file mode 100644 index 00000000..4a56fb34 --- /dev/null +++ b/docs/source/main-algorithms/aho-corasick/aho-corasick.rst @@ -0,0 +1,35 @@ +.. Copyright (c) 2024 Joseph Edwards + + Distributed under the terms of the GPL license version 3. + + The full license is in the file LICENSE, distributed with this software. + +.. currentmodule:: _libsemigroups_pybind11 + +Aho-Corasick +============ + +Contents +-------- + +.. autosummary:: + :nosignatures: + + AhoCorasick.add_word + AhoCorasick.child + AhoCorasick.height + AhoCorasick.init + AhoCorasick.node + AhoCorasick.number_of_nodes + AhoCorasick.rm_word + AhoCorasick.signature + AhoCorasick.suffix_link + AhoCorasick.traverse + AhoCorasick.validate_active_node_index + AhoCorasick.validate_node_index + +Full API +-------- + +.. autoclass:: AhoCorasick + :members: diff --git a/docs/source/main-algorithms/aho-corasick/index.rst b/docs/source/main-algorithms/aho-corasick/index.rst new file mode 100644 index 00000000..2ca1a971 --- /dev/null +++ b/docs/source/main-algorithms/aho-corasick/index.rst @@ -0,0 +1,17 @@ +.. Copyright (c) 2024 Joseph Edwards + + Distributed under the terms of the GPL license version 3. + + The full license is in the file LICENSE, distributed with this software. + +Aho-Corasick +============ + +This page describes the functionality related to Aho-Corasick's algorithm. + + +.. toctree:: + :maxdepth: 1 + + aho-corasick + ac-helpers From 18088e5112d97952828c8579c31f50803f391107 Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Sat, 25 May 2024 20:16:20 +0100 Subject: [PATCH 05/13] Update doc --- docs/source/libsemigroups.bib | 19 + .../aho-corasick/ac-helpers.rst | 3 +- .../aho-corasick/aho-corasick.rst | 5 +- .../main-algorithms/aho-corasick/index.rst | 1 - libsemigroups_pybind11/aho_corasick.py | 2 +- src/aho-corasick.cpp | 339 +++++++++++------- 6 files changed, 229 insertions(+), 140 deletions(-) diff --git a/docs/source/libsemigroups.bib b/docs/source/libsemigroups.bib index 649325e3..00f368e0 100644 --- a/docs/source/libsemigroups.bib +++ b/docs/source/libsemigroups.bib @@ -6,6 +6,25 @@ %% Saved with string encoding Unicode (UTF-8) +@article{Aho1975aa, + Author = {Aho, Alfred V. and Corasick, Margaret J.}, + Journal = {Communications of the {ACM}}, + Month = { June }, + Year = {1975}, + Title = {Efficient string matching: an aid to bibliographic search}, + Volume = {18}, + Issn = {0001-0782, 1557-7317}, + Url = {https://dl.acm.org/doi/10.1145/360825.360855}, + Doi = {10.1145/360825.360855}, + Shorttitle = {Efficient string matching}, + Abstract = {This paper describes a simple, efficient algorithm to locate all occurrences of any of a finite number of keywords in a string of text. The algorithm consists of constructing a finite state pattern matching machine from the keywords and then using the pattern matching machine to process the text string in a single pass. Construction of the pattern matching machine takes time proportional to the sum of the lengths of the keywords. The number of state transitions made by the pattern matching machine in processing the text string is independent of the number of keywords. The algorithm has been used to improve the speed of a library bibliographic search program by a factor of 5 to 10.}, + Pages = {333--340}, + Number = {6}, + Shortjournal = {Commun. {ACM}}, + Urldate = {2024-03-26}, + Date = {1975-06}, + Langid = {english}, +} @article{Gilman1979, Author = {Robert H Gilman}, diff --git a/docs/source/main-algorithms/aho-corasick/ac-helpers.rst b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst index 6422ea02..0edb461f 100644 --- a/docs/source/main-algorithms/aho-corasick/ac-helpers.rst +++ b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst @@ -17,8 +17,7 @@ Contents add_word rm_word - traverse_from - traverse + traverse_word Full API -------- diff --git a/docs/source/main-algorithms/aho-corasick/aho-corasick.rst b/docs/source/main-algorithms/aho-corasick/aho-corasick.rst index 4a56fb34..d86ef8d0 100644 --- a/docs/source/main-algorithms/aho-corasick/aho-corasick.rst +++ b/docs/source/main-algorithms/aho-corasick/aho-corasick.rst @@ -15,13 +15,10 @@ Contents .. autosummary:: :nosignatures: - AhoCorasick.add_word AhoCorasick.child AhoCorasick.height AhoCorasick.init - AhoCorasick.node AhoCorasick.number_of_nodes - AhoCorasick.rm_word AhoCorasick.signature AhoCorasick.suffix_link AhoCorasick.traverse @@ -32,4 +29,6 @@ Full API -------- .. autoclass:: AhoCorasick + :class-doc-from: class + :special-members: __init__ :members: diff --git a/docs/source/main-algorithms/aho-corasick/index.rst b/docs/source/main-algorithms/aho-corasick/index.rst index 2ca1a971..e9ce6268 100644 --- a/docs/source/main-algorithms/aho-corasick/index.rst +++ b/docs/source/main-algorithms/aho-corasick/index.rst @@ -9,7 +9,6 @@ Aho-Corasick This page describes the functionality related to Aho-Corasick's algorithm. - .. toctree:: :maxdepth: 1 diff --git a/libsemigroups_pybind11/aho_corasick.py b/libsemigroups_pybind11/aho_corasick.py index e3853271..163346f2 100644 --- a/libsemigroups_pybind11/aho_corasick.py +++ b/libsemigroups_pybind11/aho_corasick.py @@ -6,4 +6,4 @@ # # The full license is in the file LICENSE, distributed with this software. # pylint:disable=no-name-in-module, unused-import -from _libsemigroups_pybind11 import add_word, rm_word, traverse_from, traverse +from _libsemigroups_pybind11 import add_word, rm_word, traverse_word diff --git a/src/aho-corasick.cpp b/src/aho-corasick.cpp index c9c0c6ec..5699dd89 100644 --- a/src/aho-corasick.cpp +++ b/src/aho-corasick.cpp @@ -44,148 +44,219 @@ algorithm can be found `here (), R"pbdoc( Construct an empty AhoCorasick. -Construct an :any:`AhoCorasick` containing only the root that corresponds to the -empty word :math:`\varepsilon`. -)pbdoc"); - thing.def(py::init(), R"pbdoc( +Construct an :any:`AhoCorasick` containing only the root that corresponds to +the empty word :math:`\varepsilon`.)pbdoc"); + + thing.def( + "__copy__", + [](const AhoCorasick& that) { return AhoCorasick(that); }, + R"pbdoc( Default copy constructor. + +Default copy constructor)pbdoc"); + + thing.def("child", + &AhoCorasick::child, + py::arg("parent"), + py::arg("letter"), + R"pbdoc( +Return the child of *parent* with edge-label *letter* + +This function returns the index of the child of the node with index +*parent* along the edge labelled by *letter*. If no such child exists, +:any:`UNDEFINED` is returned. + +:param parent: the index of the node whose child is sought. +:type parent: int + +:param letter: the edge-label connecting the parent to the desired child. +:type letter: int + +:returns: the index of the child. +:rtype: int + +:raises LibsemigroupsError: if ``validate_active_node_index(parent)`` throws. + +:complexity: Constant. + +.. seealso:: :any:`validate_active_node_index`. + +)pbdoc"); + + thing.def("height", + &AhoCorasick::height, + py::arg("i"), + R"pbdoc( +Calculate the height of a node. + +:param i: the index of the node whose height is sought +:type i: int + +:returns: the height. +:rtype: int + +:raises LibsemigroupsError: if ``validate_active_node_index(i)`` throws. + +:complexity: Linear in the return value which is, at worst, the maximum length of a word in the trie + +.. seealso:: :any:`validate_active_node_index`. + )pbdoc"); + thing.def("init", &AhoCorasick::init, R"pbdoc( Reinitialise an existing AhoCorasick object. -This function puts an :any:`AhoCorasick` object back into the same state as if it had been newly default constructed. -:returns: A reference to ``self``. +This function puts an :any:`AhoCorasick` object back into the same state as +if it had been newly default constructed. + +:exceptions: This function guarantees not to throw a :any: `LibsemigroupsError`. +:complexity: Linear in the number of nodes in the trie + +:returns: ``self``. :rtype: AhoCorasick )pbdoc"); + thing.def("number_of_nodes", &AhoCorasick::number_of_nodes, R"pbdoc( Returns the number of nodes in the trie. + This function Returns the number of nodes in the trie. -:exceptions: This function is ``noexcept`` and is guaranteed never to throw. +:exceptions: This function is guaranteed never to throw. :complexity: Constant -:returns: A ``int``. - +:returns: The number of nodes> :rtype: int )pbdoc"); - thing.def("add_word", - &AhoCorasick::add_word, - py::arg("first"), - py::arg("last"), + + // TODO: What should we do here? Return a string instead of do it in place? + thing.def("signature", + &AhoCorasick::signature, + py::arg("w"), + py::arg("i"), R"pbdoc( -Check and add a word to the trie. -This function does the same as :any:`add_word_no_checks(Iterator, Iterator)` after first checking that the word corresponding to ``first`` and ``last`` does not correspond to an existing terminal node in the trie. +Find the signature of a node. -:raises LibsemigroupsError: if the word corresponding to ``first`` and ``last`` corresponds to an existing terminal node in the trie. +After validating ``i`` , this function performs the same as +``signature_no_checks(w, i)``. -.. seealso:: :any:`add_word_no_checks`)pbdoc"); - thing.def("rm_word", - &AhoCorasick::rm_word, - py::arg("first"), - py::arg("last"), +:raises LibsemigroupsError: if ``validate_active_node_index(i)`` throws. + +.. seealso:: :any:`validate_active_node_index`. + +)pbdoc"); + + thing.def("suffix_link", + &AhoCorasick::suffix_link, + py::arg("current"), R"pbdoc( -Check and add a word to the trie. -This function does the same as :any:`rm_word_no_checks(Iterator, Iterator)` after first checking that the word corresponding to ``first`` and ``last`` is terminal node in the trie. +Calculate the index of the suffix link of a node. + +Calculate the index of a suffix link of a node. Recall that the *suffix link* of +a node with signature :math:`W` is the node with the signature equal to that of +the longest proper suffix of :math:`W` contained in the trie. + +:param current: the index of the node whose suffix link is sought +:type current: int -:raises LibsemigroupsError: if the word corresponding to ``first`` and ``last`` does not correspond to an existing terminal node in the trie. +:returns: The index of the suffix link. +:rtype: int + +:raises LibsemigroupsError: if ``validate_active_node_index(current)`` throws. + +:complexity: Linear in the height of the node. + +.. seealso:: :any:`validate_active_node_index`. + +)pbdoc"); -.. seealso:: :any:`rm_word_no_checks`)pbdoc"); thing.def("traverse", &AhoCorasick::traverse, py::arg("current"), py::arg("a"), R"pbdoc( -After checking, traverse the trie using suffix links where necessary. -See :any:`traverse_no_checks` +Traverse the trie using suffix links where necessary. -:raises LibsemigroupsError: if ``validate_active_node_index(current)`` throws.)pbdoc"); - thing.def("signature", - &AhoCorasick::signature, - py::arg("w"), - py::arg("i"), - R"pbdoc( -After checking, find the signature of a node. -See :any:`signature_no_checks` +This function traverses the trie using suffix links where necessary, behaving +like a combination of the *goto* function and the *fail* function in :cite:`Aho1975aa`. -:raises LibsemigroupsError: if ``validate_active_node_index(i)`` throws.)pbdoc"); - thing.def("height", - &AhoCorasick::height, - py::arg("i"), - R"pbdoc( -After checking, calculate the height of a node. -See :any:`height_no_checks` +If *current* is the index of a node with signature :math:`W`, and *a* is the +letter :math:`a`, then `traverse_no_checks(current, a)` returns the index of the +node with signature equal to the longest suffix of :math:`Wa` contained in the +trie. -:raises LibsemigroupsError: if ``validate_active_node_index(i)`` throws.)pbdoc"); - thing.def("suffix_link", - &AhoCorasick::suffix_link, - py::arg("current"), - R"pbdoc( -After checking, calculate the index of the suffix link of a node. -See :any:`suffix_link_no_checks` +:param current: the index of the node to traverse from +:type current: int -:raises LibsemigroupsError: if ``validate_active_node_index(current)`` throws.)pbdoc"); - thing.def("node", - &AhoCorasick::node, - py::arg("i"), - R"pbdoc( -After checking, return the node given an index. -See :any:`node_no_checks` +:param a: the letter to traverse +:type a: int -:raises LibsemigroupsError: if ``validate_node_index(i)`` throws.)pbdoc"); - thing.def("child", - &AhoCorasick::child, - py::arg("parent"), - py::arg("letter"), - R"pbdoc( -After checking, return the child of parent with edge-label letter. -See :any:`child_no_checks` +:returns: The index of the node traversed to +:rtype: int -:raises LibsemigroupsError: if ``validate_active_node_index(parent)`` throws.)pbdoc"); - thing.def("validate_node_index", - &AhoCorasick::validate_node_index, +:raises LibsemigroupsError: if ``validate_active_node_index(current)`` throws. + +.. seealso:: :any:`validate_active_node_index`. + +)pbdoc"); + + thing.def("validate_active_node_index", + &AhoCorasick::validate_active_node_index, py::arg("i"), R"pbdoc( -Check if an index corresponds to a node. +Check if an index corresponds to a node currently in the trie. + +The implementation of :any:`AhoCorasick` uses two different types of node; +*active* and *inactive* . An active node is a node that is currently a node +in the trie. An inactive node is a node that used to be part of the trie, but +has since been removed. It may later become active again after being +reinitialised, and exists as a way of minimising how frequently memory needs +to be allocated and deallocated for nodes. This function validates whether the +given index *i* corresponds to an active node. :param i: the index to validate :type i: index_type -This function checks if the given index ``i`` corresponds to the index of a node. +:raises LibsemigroupsError: if ``validate_node_index(i)`` throws, or if *i* is + not an active node. :complexity: Constant -:raises LibsemigroupsError: if ``i`` does not correspond to the index of a node; that is, if ``i`` is larger than the size of the container storing the indices of nodes.)pbdoc"); - thing.def("validate_active_node_index", - &AhoCorasick::validate_active_node_index, +.. seealso:: :any:`validate_node_index`. + +)pbdoc"); + + thing.def("validate_node_index", + &AhoCorasick::validate_node_index, py::arg("i"), R"pbdoc( -Check if an index corresponds to a node currently in the trie. +Check if an index corresponds to a node. + +This function checks if the given index *i* corresponds to the index of a +node. :param i: the index to validate :type i: index_type -The implementation of :any:`AhoCorasick` uses two different types of node; *active* and *inactive* . An active node is a node that is currently a node in the trie. An inactive node is a node that used to be part of the trie, but has since been removed. It may later become active again after being reinitialised (see :any:`init` ), and exists as a way of minimising how frequently memory needs to be allocated and deallocated for nodes.This function validates whether the given index ``i`` corresponds to an active node. +:raises LibsemigroupsError: if *i* does not correspond to the index of a + node; that is, if *i* is larger than the size of the container storing the + indices of nodes. :complexity: Constant - -:raises LibsemigroupsError: if ``validate_node_index(i)`` throws, or if ``i`` is not an active node. - -.. seealso:: :any:`validate_node_index` , :any:`init`.)pbdoc"); +)pbdoc"); // Helpers using index_type = AhoCorasick::index_type; @@ -195,75 +266,77 @@ The implementation of :any:`AhoCorasick` uses two different types of node; *acti py::arg("ac"), py::arg("w"), R"pbdoc( -TODO doc. +Add a word to the trie of ac. + +This function performs the same as ``ac.add_word(w.begin(), w.end())`` + +:param ac: AhoCorasick object to add the word to. +:type ac: AhoCorasick + +:param w: the word to add. +:type w: Word + +:returns: An index_type corresponding to the final node added to the ``ac``. +:rtype: typename Word + +:raises LibsemigroupsError: if the word ``w`` corresponds to an existing terminal node in the trie. + +:complexity: Linear in the length of ``w``. + +.. seealso:: :any:`AhoCorasick::add_word`. + )pbdoc"); m.def("rm_word", &aho_corasick::rm_word, py::arg("ac"), py::arg("w"), R"pbdoc( -TODO doc. -)pbdoc"); - m.def( - "traverse_from", - [](AhoCorasick const& ac, - index_type start, - AhoCorasick::const_iterator first, - AhoCorasick::const_iterator last) { - return aho_corasick::traverse_from(ac, start, first, last); - }, - py::arg("ac"), - py::arg("start"), - py::arg("first"), - py::arg("last"), - R"pbdoc( -TODO doc. -)pbdoc"); - m.def( - "traverse_from", - [](AhoCorasick const& ac, index_type start, char const& w) { - return aho_corasick::traverse_from(ac, start, w); - }, - py::arg("ac"), - py::arg("start"), - py::arg("w"), - R"pbdoc( -TODO doc. +Remove a word from the trie of ac. + +:param ac: AhoCorasick object to remove the word from. +:type ac: AhoCorasick + +:param w: the word to remove. +:type w: Word + +This function performs the same as ``ac.rm_word(w.begin(), w.end())`` + +:raises LibsemigroupsError: if the word ``w`` does not correspond to an existing terminal node in the trie. + +:complexity: Linear in the length of ``w``. + +.. seealso:: :any:`AhoCorasick::rm_word`. + + +:returns: An index_type corresponding to the node with signature equal to ``w``. + +:rtype: typename Word )pbdoc"); m.def( - "traverse_from", + "traverse_word", [](AhoCorasick const& ac, index_type start, word_type const& w) { - return aho_corasick::traverse_from(ac, start, w); + return aho_corasick::traverse_word(ac, start, w); }, py::arg("ac"), py::arg("start"), py::arg("w"), R"pbdoc( -TODO doc. -)pbdoc"); - m.def( - "traverse", - [](AhoCorasick const& ac, - AhoCorasick::const_iterator first, - AhoCorasick::const_iterator last) { - return aho_corasick::traverse(ac, first, last); - }, - py::arg("ac"), - py::arg("first"), - py::arg("last"), - R"pbdoc( -TODO doc. -)pbdoc"); +Traverse the trie of ac using suffix links where necessary. +This function performs the same as ``traverse_word(ac, start, w.cbegin(), w.cend())`` + +.. seealso:: :any:`traverse_word`.)pbdoc"); m.def( - "traverse", + "traverse_word", [](AhoCorasick const& ac, std::string const& w) { - return aho_corasick::traverse(ac, w); + return aho_corasick::traverse_word(ac, w); }, py::arg("ac"), py::arg("w"), R"pbdoc( -TODO doc. -)pbdoc"); +Traverse the trie of ac from the root using suffix links where necessary. +This function performs the same as ``traverse_word_no_checks(ac, AhoCorasick::root, w.cbegin(), w.end())`` + +.. seealso:: :any:`traverse_word_no_checks`.)pbdoc"); } // init_aho_corasick From f35394eae001577e9fbb0bbc44599e1b565e4a4d Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Mon, 27 May 2024 17:22:45 +0100 Subject: [PATCH 06/13] Fix doc and add overloads --- src/aho-corasick.cpp | 137 +++++++++++++++++++++++++++++++++---------- 1 file changed, 107 insertions(+), 30 deletions(-) diff --git a/src/aho-corasick.cpp b/src/aho-corasick.cpp index 5699dd89..0b97da51 100644 --- a/src/aho-corasick.cpp +++ b/src/aho-corasick.cpp @@ -24,6 +24,7 @@ // pybind11.... #include // for class_, init, module +#include // for std::vector conversion // libsemigroups_pybind11.... #include "main.hpp" // for init_aho_corasick @@ -42,8 +43,7 @@ This class implements a trie based data structure with suffix links to be used with the Aho-Corasick dictionary searching algorithm. An introduction to this algorithm can be found `here `_. -Several helper functions are provided in the ``aho_corasick`` -namespace.)pbdoc"); +Several helper functions are provided in the ``aho_corasick`` namespace.)pbdoc"); thing.def("__repr__", &to_string); @@ -262,55 +262,93 @@ node. using index_type = AhoCorasick::index_type; m.def("add_word", - &aho_corasick::add_word, + &aho_corasick::add_word, py::arg("ac"), py::arg("w"), R"pbdoc( -Add a word to the trie of ac. +Add a word to the trie of *ac* -This function performs the same as ``ac.add_word(w.begin(), w.end())`` +Calling this function immediately adds the word *w* to the trie of *ac*, and +makes the final node on the path labelled by this word terminal (if it +wasn't already). After adding a word, existing suffix links become +invalid. If an identical word has already been added to the trie of *ac*, then +this function does nothing. -:param ac: AhoCorasick object to add the word to. +:param ac: object whose trie is to be added to :type ac: AhoCorasick -:param w: the word to add. -:type w: Word +:param w: the word to add +:type w: List[int] + +:returns: The index corresponding to the final node added to the trie of *ac*. + This node will have a :any:`signature` equal to that of *w*. +:rtype: int -:returns: An index_type corresponding to the final node added to the ``ac``. -:rtype: typename Word +:exceptions: This function guarantees not to throw a :any: `LibsemigroupsError`. -:raises LibsemigroupsError: if the word ``w`` corresponds to an existing terminal node in the trie. +:complexity: Linear in the length of *w*. -:complexity: Linear in the length of ``w``. +.. seealso:: :any:`AhoCorasick.signature` + +)pbdoc"); + m.def("add_word", + &aho_corasick::add_word, + py::arg("ac"), + py::arg("w"), + R"pbdoc( +Add a word to the trie of *ac* -.. seealso:: :any:`AhoCorasick::add_word`. +This function performs the same as ``add_word(AhoCorasick ac, List[int] w)``, +but *w* is a :any:`string` rather than List[:any:`int`]. )pbdoc"); m.def("rm_word", - &aho_corasick::rm_word, + &aho_corasick::rm_word, py::arg("ac"), py::arg("w"), R"pbdoc( -Remove a word from the trie of ac. +Remove a word from the trie of *ac*. + +From the trie of *ac*, remove each node of the given word *w* that is not part of +the prefix of a different word. + +If the word *w* corresponds to a terminal node with no children, then +calling this function removes the nodes :math:`n_i` from the trie of *ac* +that correspond to the largest suffix *w*, such that each :math:`n_i` has either +zero children or one. After this, existing suffix links become invalid. -:param ac: AhoCorasick object to remove the word from. +If *w* corresponds to a terminal node :math:`n` with children, then calling this +function makes :math`n` not terminal. + +If *w* does not correspond to a terminal node, then calling this function does +nothing. + +:param ac: object whose trie is to be removed from :type ac: AhoCorasick -:param w: the word to remove. -:type w: Word +:param w: the word to remove +:type w: List[int] -This function performs the same as ``ac.rm_word(w.begin(), w.end())`` +:returns: The index corresponding to the node with signature equal to *w*. +:rtype: int -:raises LibsemigroupsError: if the word ``w`` does not correspond to an existing terminal node in the trie. +:exceptions: This function guarantees not to throw a :any: `LibsemigroupsError`. -:complexity: Linear in the length of ``w``. +:complexity: Linear in the length of *w*. -.. seealso:: :any:`AhoCorasick::rm_word`. +.. seealso:: :any:`AhoCorasick.signature` +)pbdoc"); + m.def("rm_word", + &aho_corasick::rm_word, + py::arg("ac"), + py::arg("w"), + R"pbdoc( +Remove a word from the trie of *ac*. -:returns: An index_type corresponding to the node with signature equal to ``w``. +This function performs the same as ``rm_word(AhoCorasick ac, List[int] w)``, +but *w* is a :any:`string` rather than List[:any:`int`]. -:rtype: typename Word )pbdoc"); m.def( "traverse_word", @@ -321,10 +359,49 @@ This function performs the same as ``ac.rm_word(w.begin(), w.end())`` py::arg("start"), py::arg("w"), R"pbdoc( -Traverse the trie of ac using suffix links where necessary. -This function performs the same as ``traverse_word(ac, start, w.cbegin(), w.cend())`` +Traverse the trie of *ac* using suffix links where necessary. + +This function traverses the trie of *ac*, starting at the node with +index *start*, and traversing using the letters in the word *w*. + +:param ac: object to traverse. +:type ac: AhoCorasick -.. seealso:: :any:`traverse_word`.)pbdoc"); +:param w: Word to traverse by +:type w: List[int] + +:returns: The result of the traversal +:rtype: int + +:exceptions: This function guarantees not to throw a :any: `LibsemigroupsError`. + +)pbdoc"); + m.def( + "traverse_word", + [](AhoCorasick const& ac, index_type start, std::string const& w) { + return aho_corasick::traverse_word(ac, start, w); + }, + py::arg("ac"), + py::arg("start"), + py::arg("w"), + R"pbdoc( +Traverse the trie of *ac* using suffix links where necessary. + +This function performs the same as ``traverse_word(AhoCorasick ac, List[int] w)``, +but *w* is a :any:`string` rather than List[:any:`int`]. +)pbdoc"); + m.def( + "traverse_word", + [](AhoCorasick const& ac, word_type const& w) { + return aho_corasick::traverse_word(ac, w); + }, + py::arg("ac"), + py::arg("w"), + R"pbdoc( +Traverse the trie of *ac* from the root using suffix links where necessary. + +This function performs the same as ``traverse_word(ac, AhoCorasick.root, w)`` +)pbdoc"); m.def( "traverse_word", [](AhoCorasick const& ac, std::string const& w) { @@ -333,10 +410,10 @@ This function performs the same as ``traverse_word(ac, start, w.cbegin(), w.cend py::arg("ac"), py::arg("w"), R"pbdoc( -Traverse the trie of ac from the root using suffix links where necessary. -This function performs the same as ``traverse_word_no_checks(ac, AhoCorasick::root, w.cbegin(), w.end())`` +Traverse the trie of *ac* from the root using suffix links where necessary. -.. seealso:: :any:`traverse_word_no_checks`.)pbdoc"); +This function performs the same as ``traverse_word(ac, AhoCorasick.root, w)`` +)pbdoc"); } // init_aho_corasick From 2659a5496c80e465f1fd6e4eca68dc2daf96932d Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Tue, 28 May 2024 15:01:56 +0100 Subject: [PATCH 07/13] Fix signature and doc --- src/aho-corasick.cpp | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/aho-corasick.cpp b/src/aho-corasick.cpp index 0b97da51..3c9caa55 100644 --- a/src/aho-corasick.cpp +++ b/src/aho-corasick.cpp @@ -34,6 +34,7 @@ namespace py = pybind11; namespace libsemigroups { void init_aho_corasick(py::module& m) { + using index_type = AhoCorasick::index_type; py::class_ thing(m, "AhoCorasick", R"pbdoc( @@ -142,21 +143,27 @@ This function Returns the number of nodes in the trie. :rtype: int )pbdoc"); - // TODO: What should we do here? Return a string instead of do it in place? - thing.def("signature", - &AhoCorasick::signature, - py::arg("w"), - py::arg("i"), - R"pbdoc( -Find the signature of a node. + thing.def( + "signature", + py::overload_cast(&AhoCorasick::signature, py::const_), + py::arg("i"), + R"pbdoc( +Find the signature of a node (out-of-place) -After validating ``i`` , this function performs the same as -``signature_no_checks(w, i)``. +Return the the signature of the node with index *i*. Recall that the +*signature* of a node :math:`n` is the word consisting of the edge labels +of the unique path from the root to +:math:`n`. -:raises LibsemigroupsError: if ``validate_active_node_index(i)`` throws. +:param i: the index of the node whose signature is sought +:type i: int -.. seealso:: :any:`validate_active_node_index`. +:returns: The signature +:rtype: List[int] + +:exceptions: This function guarantees not to throw a :any: `LibsemigroupsError`. +:complexity: Linear in the height of the node )pbdoc"); thing.def("suffix_link", @@ -228,7 +235,7 @@ to be allocated and deallocated for nodes. This function validates whether the given index *i* corresponds to an active node. :param i: the index to validate -:type i: index_type +:type i: int :raises LibsemigroupsError: if ``validate_node_index(i)`` throws, or if *i* is not an active node. @@ -259,8 +266,6 @@ node. )pbdoc"); // Helpers - using index_type = AhoCorasick::index_type; - m.def("add_word", &aho_corasick::add_word, py::arg("ac"), From 1bd5e1c42d20706ba918819a3d876fa07e3a2c0b Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Tue, 28 May 2024 17:22:41 +0100 Subject: [PATCH 08/13] Add aho test file --- tests/test_aho_corasick.py | 151 +++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 tests/test_aho_corasick.py diff --git a/tests/test_aho_corasick.py b/tests/test_aho_corasick.py new file mode 100644 index 00000000..88f15502 --- /dev/null +++ b/tests/test_aho_corasick.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2024, Joseph Edwards +# +# Distributed under the terms of the GPL license version 3. +# +# The full license is in the file LICENSE, distributed with this software. + +""" +This module contains some tests for the libsemigroups_pybind11 functionality +arising from aho-corasick.*pp in libsemigroups. +""" +import copy, pytest +from libsemigroups_pybind11 import ( + AhoCorasick, + aho_corasick, + LibsemigroupsError, + UNDEFINED, +) + + +def basic_ac(): + ac = AhoCorasick() + aho_corasick.add_word(ac, [0, 1, 0, 1]) + aho_corasick.add_word(ac, [0, 1, 1, 0]) + aho_corasick.add_word(ac, [0, 1, 1, 0, 1]) + aho_corasick.add_word(ac, [0, 1, 1, 0, 0]) + aho_corasick.rm_word(ac, [0, 1, 1, 0, 1]) + + return ac + + +def test_add_rm_word(): + ac = AhoCorasick() + assert ac.number_of_nodes() == 1 + + aho_corasick.add_word(ac, [0, 1, 0, 1]) + with pytest.raises(LibsemigroupsError): + # Can't add the same word twice + aho_corasick.add_word(ac, [0, 1, 0, 1]) + + aho_corasick.add_word(ac, [0, 1, 1, 0]) + aho_corasick.add_word(ac, [0, 1, 1, 0, 1]) + aho_corasick.add_word(ac, [0, 1, 1, 0, 0]) + aho_corasick.rm_word(ac, [0, 1, 1, 0, 1]) + with pytest.raises(LibsemigroupsError): + # Can't remove a word not in the trie + aho_corasick.rm_word(ac, [0, 1, 1, 0, 1]) + with pytest.raises(LibsemigroupsError): + # Can't remove a non-terminal node + aho_corasick.rm_word(ac, [0, 1]) + + +def test_copy_constructor(): + ac = basic_ac() + ac2 = copy.copy(ac) + assert ac != ac2 + test_child(ac2) + test_number_of_nodes(ac2) + test_signature(ac2) + test_traverse(ac2) + test_validate_active_node_index(ac2) + test_validate_node_index(ac2) + + +def test_child(ac=None): + if ac is None: + ac = basic_ac() + + assert ac.child(2, 0) == 3 + assert ac.child(2, 1) == 5 + assert ac.child(2, 2) == UNDEFINED + assert ac.child(6, 0) == 8 + assert ac.child(6, 1) == UNDEFINED + with pytest.raises(LibsemigroupsError): + # Can't get the child of a node that doesn't exist + ac.child(9, 0) + with pytest.raises(LibsemigroupsError): + # Can't get the child of an inactive node + ac.child(7, 0) + + +def test_number_of_nodes(ac=None): + if ac is None: + ac = basic_ac() + + assert ac.number_of_nodes() == 8 + + +def test_signature(ac=None): + if ac is None: + ac = basic_ac() + + assert ac.signature(8) == [0, 1, 1, 0, 0] + with pytest.raises(LibsemigroupsError): + # Can't get the signature of a node that doesn't exist + ac.signature(9) + with pytest.raises(LibsemigroupsError): + # Can't get the signature of an inactive node + ac.signature(7) + + +def test_traverse(ac=None): + if ac is None: + ac = basic_ac() + + assert ac.traverse(0, 0) == 1 + assert ac.traverse(0, 1) == 0 + assert ac.traverse(2, 0) == 3 + assert ac.traverse(2, 1) == 5 + assert ac.traverse(3, 0) == 1 + assert ac.traverse(3, 1) == 4 + with pytest.raises(LibsemigroupsError): + # Can't traverse from inactive node + ac.traverse(7, 0) + with pytest.raises(LibsemigroupsError): + # Can't traverse from a node that doesn't exist + ac.traverse(9, 0) + + assert aho_corasick.traverse_word(ac, 1, [1, 1, 0, 1]) == 2 + assert aho_corasick.traverse_word(ac, 8, [0, 0, 0, 0, 0, 1, 0, 1]) == 4 + assert aho_corasick.traverse_word(ac, [0, 0, 0, 0, 0, 1, 0, 1]) == 4 + assert aho_corasick.traverse_word(ac, [0, 1, 0, 1, 1, 0, 1]) == 2 + with pytest.raises(LibsemigroupsError): + # Can't traverse from inactive node + aho_corasick.traverse_word(ac, 7, [0, 1, 0]) + with pytest.raises(LibsemigroupsError): + # Can't traverse from a node that doesn't exist + aho_corasick.traverse_word(ac, 9, [0, 1, 0]) + + +def test_validate_active_node_index(ac=None): + if ac is None: + ac = basic_ac() + + ac.validate_active_node_index(8) + with pytest.raises(LibsemigroupsError): + ac.validate_active_node_index(7) + with pytest.raises(LibsemigroupsError): + ac.validate_active_node_index(9) + + +def test_validate_node_index(ac=None): + if ac is None: + ac = basic_ac() + + # 7 is a node, but note an active one + ac.validate_node_index(7) + ac.validate_node_index(8) + with pytest.raises(LibsemigroupsError): + ac.validate_node_index(9) From b6aecb80a845d9a253b92018099b3b457c748556 Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Tue, 28 May 2024 17:22:50 +0100 Subject: [PATCH 09/13] More doc --- docs/source/main-algorithms/aho-corasick/ac-helpers.rst | 5 +++++ docs/source/main-algorithms/aho-corasick/aho-corasick.rst | 5 ++++- src/aho-corasick.cpp | 6 ++++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/source/main-algorithms/aho-corasick/ac-helpers.rst b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst index 0edb461f..e1d774c6 100644 --- a/docs/source/main-algorithms/aho-corasick/ac-helpers.rst +++ b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst @@ -8,6 +8,11 @@ Aho-Corasick helper functions ============================= +This module contains various helper functions for the class :any:`AhoCorasick`. +These functions could be functions of :any:`AhoCorasick` but they only use +public member functions of :any:`AhoCorasick`, and so they are declared +as free functions instead. + Contents -------- .. currentmodule:: libsemigroups_pybind11.aho_corasick diff --git a/docs/source/main-algorithms/aho-corasick/aho-corasick.rst b/docs/source/main-algorithms/aho-corasick/aho-corasick.rst index d86ef8d0..a73da1aa 100644 --- a/docs/source/main-algorithms/aho-corasick/aho-corasick.rst +++ b/docs/source/main-algorithms/aho-corasick/aho-corasick.rst @@ -8,6 +8,9 @@ Aho-Corasick ============ +.. autoclass:: AhoCorasick + :doc-only: + :class-doc-from: class Contents -------- @@ -29,6 +32,6 @@ Full API -------- .. autoclass:: AhoCorasick - :class-doc-from: class + :no-doc: :special-members: __init__ :members: diff --git a/src/aho-corasick.cpp b/src/aho-corasick.cpp index 3c9caa55..c37e35b1 100644 --- a/src/aho-corasick.cpp +++ b/src/aho-corasick.cpp @@ -44,7 +44,9 @@ This class implements a trie based data structure with suffix links to be used with the Aho-Corasick dictionary searching algorithm. An introduction to this algorithm can be found `here `_. -Several helper functions are provided in the ``aho_corasick`` namespace.)pbdoc"); +Several helper functions are provided in the ``aho_corasick`` module, documented +:doc:`here `. +)pbdoc"); thing.def("__repr__", &to_string); @@ -148,7 +150,7 @@ This function Returns the number of nodes in the trie. py::overload_cast(&AhoCorasick::signature, py::const_), py::arg("i"), R"pbdoc( -Find the signature of a node (out-of-place) +Find the signature of a node Return the the signature of the node with index *i*. Recall that the *signature* of a node :math:`n` is the word consisting of the edge labels From 5c175f76891cba21719875566d49a48c3d00d443 Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Tue, 28 May 2024 18:30:51 +0100 Subject: [PATCH 10/13] Add doctest --- .../aho-corasick/ac-helpers.rst | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/docs/source/main-algorithms/aho-corasick/ac-helpers.rst b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst index e1d774c6..5cd85268 100644 --- a/docs/source/main-algorithms/aho-corasick/ac-helpers.rst +++ b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst @@ -13,6 +13,46 @@ These functions could be functions of :any:`AhoCorasick` but they only use public member functions of :any:`AhoCorasick`, and so they are declared as free functions instead. + +.. doctest:: + + >>> from libsemigroups_pybind11 import AhoCorasick, aho_corasick + >>> # Construct an empty AhoCorasick + >>> ac = AhoCorasick() + + >>> # Add words + >>> aho_corasick.add_word(ac, [0, 1, 0, 1]) + 4 + >>> aho_corasick.add_word(ac, [0, 1, 1, 0]) + 6 + >>> aho_corasick.add_word(ac, [0, 1, 1, 0, 1]) + 7 + >>> aho_corasick.add_word(ac, [0, 1, 1, 0, 0]) + 8 + + >>> # Can't add a word that already exists + >>> aho_corasick.add_word(ac, [0, 1, 1, 0, 0]) + Traceback (most recent call last): + ... + LibsemigroupsError: the word [0, 1, 1, 0, 0] given by the arguments [first, last) already belongs to the trie + + >>> # Remove words + >>> aho_corasick.rm_word(ac, [0, 1, 0, 1]) + 4 + + >>> # Can't remove a word that is not a terminal node in the trie + >>> aho_corasick.rm_word(ac, [0, 1, 0, 1]) + Traceback (most recent call last): + ... + LibsemigroupsError: cannot remove the word [0, 1, 0, 1] given by the arguments [first, last), as it does not correspond to a node in the trie + + >>> # Traverse + >>> aho_corasick.traverse_word(ac, 5, [0, 1]) + 7 + >>> aho_corasick.traverse_word(ac, [0, 1, 0, 1, 1, 0]) + 6 + + Contents -------- .. currentmodule:: libsemigroups_pybind11.aho_corasick From 42a63bf88e8af34dad275c0e81311485125ef5b6 Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Tue, 28 May 2024 18:48:02 +0100 Subject: [PATCH 11/13] Fix spelling --- docs/source/libsemigroups.bib | 234 +++++++++++++++++----------------- 1 file changed, 119 insertions(+), 115 deletions(-) diff --git a/docs/source/libsemigroups.bib b/docs/source/libsemigroups.bib index 00f368e0..72fd31da 100644 --- a/docs/source/libsemigroups.bib +++ b/docs/source/libsemigroups.bib @@ -7,139 +7,143 @@ %% Saved with string encoding Unicode (UTF-8) @article{Aho1975aa, - Author = {Aho, Alfred V. and Corasick, Margaret J.}, - Journal = {Communications of the {ACM}}, - Month = { June }, - Year = {1975}, - Title = {Efficient string matching: an aid to bibliographic search}, - Volume = {18}, - Issn = {0001-0782, 1557-7317}, - Url = {https://dl.acm.org/doi/10.1145/360825.360855}, - Doi = {10.1145/360825.360855}, - Shorttitle = {Efficient string matching}, - Abstract = {This paper describes a simple, efficient algorithm to locate all occurrences of any of a finite number of keywords in a string of text. The algorithm consists of constructing a finite state pattern matching machine from the keywords and then using the pattern matching machine to process the text string in a single pass. Construction of the pattern matching machine takes time proportional to the sum of the lengths of the keywords. The number of state transitions made by the pattern matching machine in processing the text string is independent of the number of keywords. The algorithm has been used to improve the speed of a library bibliographic search program by a factor of 5 to 10.}, - Pages = {333--340}, - Number = {6}, - Shortjournal = {Commun. {ACM}}, - Urldate = {2024-03-26}, - Date = {1975-06}, - Langid = {english}, + author = {Aho, Alfred V. and Corasick, Margaret J.}, + journal = {Communications of the {ACM}}, + month = { June }, + year = {1975}, + title = {Efficient string matching: an aid to bibliographic search}, + volume = {18}, + issn = {0001-0782, 1557-7317}, + url = {https://dl.acm.org/doi/10.1145/360825.360855}, + doi = {10.1145/360825.360855}, + shorttitle = {Efficient string matching}, + abstract = {This paper describes a simple, efficient algorithm to locate all occurrences of any of a finite number of keywords in a string of text. The algorithm consists of constructing a finite state pattern matching machine from the keywords and then using the pattern matching machine to process the text string in a single pass. Construction of the pattern matching machine takes time proportional to the sum of the lengths of the keywords. The number of state transitions made by the pattern matching machine in processing the text string is independent of the number of keywords. The algorithm has been used to improve the speed of a library bibliographic search program by a factor of 5 to 10.}, + pages = {333--340}, + number = {6}, + urldate = {2024-03-26}, + date = {1975-06}, + langid = {english} } @article{Gilman1979, - Author = {Robert H Gilman}, - Journal = {Journal of Algebra}, - Month = { April }, - Number = {2}, - Pages = {544--554}, - Title = {Presentations of groups and monoids}, - Volume = {57}, - Year = {1979}} + author = {Robert H Gilman}, + journal = {Journal of Algebra}, + month = { April }, + number = {2}, + pages = {544--554}, + title = {Presentations of groups and monoids}, + volume = {57}, + year = {1979} +} @misc{Holt2018aa, - Author = {Holt, Derek}, - Title = {kbmag -- {GAP} package, {V}ersion 1.5.9}, - Month = { July }, - Year = { 2019 }, - Url = {https://gap-packages.github.io/kbmag/}, + author = {Holt, Derek}, + title = {kbmag -- {GAP} package, {V}ersion 1.5.9}, + month = { July }, + year = { 2019 }, + url = {https://gap-packages.github.io/kbmag/} } @book{Jantzen2012aa, - Author = {Jantzen, Matthias}, - Date-Added = {2019-12-20 14:27:56 +0000}, - Date-Modified = {2019-12-20 14:28:00 +0000}, - Publisher = {Springer Science \& Business Media}, - Title = {Confluent string rewriting}, - Volume = {14}, - Year = {2012}} + author = {Jantzen, Matthias}, + date-added = {2019-12-20 14:27:56 +0000}, + date-modified = {2019-12-20 14:28:00 +0000}, + publisher = {Springer Science \& Business Media}, + title = {Confluent string rewriting}, + volume = {14}, + year = {2012} +} @book{Sims1994aa, - Address = {Cambridge,, England, New York}, - Author = {Sims, Charles C.}, - Date-Added = {2019-10-23 12:55:38 +0100}, - Date-Modified = {2019-10-23 12:55:38 +0100}, - Isbn = {0-521-43213-8}, - Publisher = {Cambridge University Press}, - Series = {Encyclopedia of mathematics and its applications}, - Title = {Computation with finitely presented groups}, - Url = {http://opac.inria.fr/record=b1082972}, - Year = 1994, - Bdsk-File-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxDGLi4vLi4vLi4vTGlicmFyeS9Nb2JpbGUgRG9jdW1lbnRzL2NvbX5hcHBsZX5DbG91ZERvY3MvTWF0aHMvQmlidGV4L0ZpbGVkLyhFbmN5Y2xvcGVkaWEgb2YgTWF0aGVtYXRpY3MgYW5kIGl0cyBBcHBsaWNhdGlvbnMpIENoYXJsZXMgQy4gU2ltcy1Db21wdXRhdGlvbiB3aXRoIGZpbml0ZWx5IHByZXNlbnRlZCBncm91cHMtQ1VQICgxOTk0KS5kanZ1TxEDaAAAAAADaAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAAAAAAAEJEAAH/////HyhFbmN5Y2xvcGVkaWEgb2YgI0ZGRkZGRkZGLmRqdnUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP////8AAAAAAAAAAAAAAAAAAwAHAAAKIGN1AAAAAAAAAAAAAAAAAAVGaWxlZAAAAgDJLzpVc2VyczpqZG06TGlicmFyeTpNb2JpbGUgRG9jdW1lbnRzOmNvbX5hcHBsZX5DbG91ZERvY3M6TWF0aHM6QmlidGV4OkZpbGVkOihFbmN5Y2xvcGVkaWEgb2YgTWF0aGVtYXRpY3MgYW5kIGl0cyBBcHBsaWNhdGlvbnMpIENoYXJsZXMgQy4gU2ltcy1Db21wdXRhdGlvbiB3aXRoIGZpbml0ZWx5IHByZXNlbnRlZCBncm91cHMtQ1VQICgxOTk0KS5kanZ1AAAOAPwAfQAoAEUAbgBjAHkAYwBsAG8AcABlAGQAaQBhACAAbwBmACAATQBhAHQAaABlAG0AYQB0AGkAYwBzACAAYQBuAGQAIABpAHQAcwAgAEEAcABwAGwAaQBjAGEAdABpAG8AbgBzACkAIABDAGgAYQByAGwAZQBzACAAQwAuACAAUwBpAG0AcwAtAEMAbwBtAHAAdQB0AGEAdABpAG8AbgAgAHcAaQB0AGgAIABmAGkAbgBpAHQAZQBsAHkAIABwAHIAZQBzAGUAbgB0AGUAZAAgAGcAcgBvAHUAcABzAC0AQwBVAFAAIAAoADEAOQA5ADQAKQAuAGQAagB2AHUADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgDHVXNlcnMvamRtL0xpYnJhcnkvTW9iaWxlIERvY3VtZW50cy9jb21+YXBwbGV+Q2xvdWREb2NzL01hdGhzL0JpYnRleC9GaWxlZC8oRW5jeWNsb3BlZGlhIG9mIE1hdGhlbWF0aWNzIGFuZCBpdHMgQXBwbGljYXRpb25zKSBDaGFybGVzIEMuIFNpbXMtQ29tcHV0YXRpb24gd2l0aCBmaW5pdGVseSBwcmVzZW50ZWQgZ3JvdXBzLUNVUCAoMTk5NCkuZGp2dQAAEwABLwAAFQACAAr//wAAAAgADQAaACQA7QAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAARZ}, - Bdsk-Url-1 = {http://opac.inria.fr/record=b1082972}} + address = {Cambridge,, England, New York}, + author = {Sims, Charles C.}, + date-added = {2019-10-23 12:55:38 +0100}, + date-modified = {2019-10-23 12:55:38 +0100}, + isbn = {0-521-43213-8}, + publisher = {Cambridge University Press}, + series = {Encyclopedia of mathematics and its applications}, + title = {Computation with finitely presented groups}, + url = {http://opac.inria.fr/record=b1082972}, + year = 1994, + bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxDGLi4vLi4vLi4vTGlicmFyeS9Nb2JpbGUgRG9jdW1lbnRzL2NvbX5hcHBsZX5DbG91ZERvY3MvTWF0aHMvQmlidGV4L0ZpbGVkLyhFbmN5Y2xvcGVkaWEgb2YgTWF0aGVtYXRpY3MgYW5kIGl0cyBBcHBsaWNhdGlvbnMpIENoYXJsZXMgQy4gU2ltcy1Db21wdXRhdGlvbiB3aXRoIGZpbml0ZWx5IHByZXNlbnRlZCBncm91cHMtQ1VQICgxOTk0KS5kanZ1TxEDaAAAAAADaAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAAAAAAAEJEAAH/////HyhFbmN5Y2xvcGVkaWEgb2YgI0ZGRkZGRkZGLmRqdnUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP////8AAAAAAAAAAAAAAAAAAwAHAAAKIGN1AAAAAAAAAAAAAAAAAAVGaWxlZAAAAgDJLzpVc2VyczpqZG06TGlicmFyeTpNb2JpbGUgRG9jdW1lbnRzOmNvbX5hcHBsZX5DbG91ZERvY3M6TWF0aHM6QmlidGV4OkZpbGVkOihFbmN5Y2xvcGVkaWEgb2YgTWF0aGVtYXRpY3MgYW5kIGl0cyBBcHBsaWNhdGlvbnMpIENoYXJsZXMgQy4gU2ltcy1Db21wdXRhdGlvbiB3aXRoIGZpbml0ZWx5IHByZXNlbnRlZCBncm91cHMtQ1VQICgxOTk0KS5kanZ1AAAOAPwAfQAoAEUAbgBjAHkAYwBsAG8AcABlAGQAaQBhACAAbwBmACAATQBhAHQAaABlAG0AYQB0AGkAYwBzACAAYQBuAGQAIABpAHQAcwAgAEEAcABwAGwAaQBjAGEAdABpAG8AbgBzACkAIABDAGgAYQByAGwAZQBzACAAQwAuACAAUwBpAG0AcwAtAEMAbwBtAHAAdQB0AGEAdABpAG8AbgAgAHcAaQB0AGgAIABmAGkAbgBpAHQAZQBsAHkAIABwAHIAZQBzAGUAbgB0AGUAZAAgAGcAcgBvAHUAcABzAC0AQwBVAFAAIAAoADEAOQA5ADQAKQAuAGQAagB2AHUADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgDHVXNlcnMvamRtL0xpYnJhcnkvTW9iaWxlIERvY3VtZW50cy9jb21+YXBwbGV+Q2xvdWREb2NzL01hdGhzL0JpYnRleC9GaWxlZC8oRW5jeWNsb3BlZGlhIG9mIE1hdGhlbWF0aWNzIGFuZCBpdHMgQXBwbGljYXRpb25zKSBDaGFybGVzIEMuIFNpbXMtQ29tcHV0YXRpb24gd2l0aCBmaW5pdGVseSBwcmVzZW50ZWQgZ3JvdXBzLUNVUCAoMTk5NCkuZGp2dQAAEwABLwAAFQACAAr//wAAAAgADQAaACQA7QAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAARZ}, + bdsk-url-1 = {http://opac.inria.fr/record=b1082972} +} @book{Knuth2009aa, - Author = {Knuth, Donald E.}, - Date-Added = {2019-10-22 15:37:10 +0100}, - Date-Modified = {2019-10-22 15:37:13 +0100}, - Edition = {12th}, - Isbn = {0321580508, 9780321580504}, - Publisher = {Addison-Wesley Professional}, - Title = {The Art of Computer Programming, Volume 4, Fascicle 1: Bitwise Tricks \& Techniques; Binary Decision Diagrams}, - Year = {2009}} + author = {Knuth, Donald E.}, + date-added = {2019-10-22 15:37:10 +0100}, + date-modified = {2019-10-22 15:37:13 +0100}, + edition = {12th}, + isbn = {0321580508, 9780321580504}, + publisher = {Addison-Wesley Professional}, + title = {The Art of Computer Programming, Volume 4, Fascicle 1: Bitwise Tricks \& Techniques; Binary Decision Diagrams}, + year = {2009} +} @article{Jonusas2017aa, - Author = {Jonusas, Julius and Mitchell, James D. and Pfeiffer, Markus}, - Date-Added = {2019-10-18 13:47:37 +0100}, - Date-Modified = {2019-10-18 13:52:37 +0100}, - Doi = {10.4171/PM/2001}, - Fjournal = {Portugaliae Mathematica. A Journal of the Portuguese Mathematical Society}, - Issn = {0032-5155}, - Journal = {Port. Math.}, - Mrclass = {20M10 (20B40 20M20 68Q42)}, - Mrnumber = {3763897}, - Mrreviewer = {Karim Ahmadidelir}, - Number = {3}, - Pages = {173--200}, - Title = {Two variants of the {F}roidure-{P}in algorithm for finite semigroups}, - Url = {https://doi.org/10.4171/PM/2001}, - Volume = {74}, - Year = {2017}, - Bdsk-Url-1 = {https://doi.org/10.4171/PM/2001}} + author = {Jonusas, Julius and Mitchell, James D. and Pfeiffer, Markus}, + date-added = {2019-10-18 13:47:37 +0100}, + date-modified = {2019-10-18 13:52:37 +0100}, + doi = {10.4171/PM/2001}, + fjournal = {Portugaliae Mathematica. A Journal of the Portuguese Mathematical Society}, + issn = {0032-5155}, + journal = {Port. Math.}, + mrclass = {20M10 (20B40 20M20 68Q42)}, + mrnumber = {3763897}, + mrreviewer = {Karim Ahmadidelir}, + number = {3}, + pages = {173--200}, + title = {Two variants of the {F}roidure-{P}in algorithm for finite semigroups}, + url = {https://doi.org/10.4171/PM/2001}, + volume = {74}, + year = {2017}, + bdsk-url-1 = {https://doi.org/10.4171/PM/2001} +} @incollection{Froidure1997aa, - Address = {Berlin}, - Author = {Froidure, V{\'e}ronique and Pin, Jean-Eric}, - Booktitle = {Foundations of computational mathematics ({R}io de {J}aneiro, 1997)}, - Date-Added = {2019-10-18 13:44:44 +0100}, - Date-Modified = {2019-10-18 13:44:44 +0100}, - Mrclass = {20M10}, - Mrnumber = {MR1661975 (99k:20111)}, - Mrreviewer = {Jorge Almeida}, - Pages = {112--126}, - Publisher = {Springer}, - Title = {Algorithms for computing finite semigroups}, - Year = {1997}, + address = {Berlin}, + author = {Froidure, V{\'e}ronique and Pin, Jean-Eric}, + booktitle = {Foundations of computational mathematics ({R}io de {J}aneiro, 1997)}, + date-added = {2019-10-18 13:44:44 +0100}, + date-modified = {2019-10-18 13:44:44 +0100}, + mrclass = {20M10}, + mrnumber = {MR1661975 (99k:20111)}, + mrreviewer = {Jorge Almeida}, + pages = {112--126}, + publisher = {Springer}, + title = {Algorithms for computing finite semigroups}, + year = {1997} } -@article {Konieczny1994aa, - AUTHOR = {Konieczny, Janusz}, - TITLE = {Green's equivalences in finite semigroups of binary relations}, - JOURNAL = {Semigroup Forum}, - FJOURNAL = {Semigroup Forum}, - VOLUME = {48}, - YEAR = {1994}, - NUMBER = {2}, - PAGES = {235--252}, - ISSN = {0037-1912}, - MRCLASS = {20M20}, - MRNUMBER = {1256691}, -MRREVIEWER = {G. J. Lallement}, - DOI = {10.1007/BF02573672}, +@article{Konieczny1994aa, + author = {Konieczny, Janusz}, + title = {Green's equivalences in finite semigroups of binary relations}, + journal = {Semigroup Forum}, + fjournal = {Semigroup Forum}, + volume = {48}, + year = {1994}, + number = {2}, + pages = {235--252}, + issn = {0037-1912}, + mrclass = {20M20}, + mrnumber = {1256691}, + mrreviewer = {G. J. Lallement}, + doi = {10.1007/BF02573672} } -@article {Lallement1990aa, - AUTHOR = {Lallement, Gerard and McFadden, Robert}, - TITLE = {On the determination of {G}reen's relations in finite - transformation semigroups}, - JOURNAL = {J. Symbolic Comput.}, - FJOURNAL = {Journal of Symbolic Computation}, - VOLUME = {10}, - YEAR = {1990}, - NUMBER = {5}, - PAGES = {481--498}, - ISSN = {0747-7171}, - MRCLASS = {20M20 (68Q45)}, - MRNUMBER = {1087717}, -MRREVIEWER = {Dominique Perrin}, - DOI = {10.1016/S0747-7171(08)80057-0}, +@article{Lallement1990aa, + author = {Lallement, Gerard and McFadden, Robert}, + title = {On the determination of {G}reen's relations in finite + transformation semigroups}, + journal = {J. Symbolic Comput.}, + fjournal = {Journal of Symbolic Computation}, + volume = {10}, + year = {1990}, + number = {5}, + pages = {481--498}, + issn = {0747-7171}, + mrclass = {20M20 (68Q45)}, + mrnumber = {1087717}, + mrreviewer = {Dominique Perrin}, + doi = {10.1016/S0747-7171(08)80057-0} } From 01d5a4d458a463070b25e248ccdf4c51e90a0fae Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Tue, 28 May 2024 19:00:30 +0100 Subject: [PATCH 12/13] Fix linting --- docs/source/main-algorithms/aho-corasick/ac-helpers.rst | 7 ++----- libsemigroups_pybind11/aho_corasick.py | 6 ++++++ src/aho-corasick.cpp | 1 - tests/test_aho_corasick.py | 4 +++- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/source/main-algorithms/aho-corasick/ac-helpers.rst b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst index 5cd85268..8d82e021 100644 --- a/docs/source/main-algorithms/aho-corasick/ac-helpers.rst +++ b/docs/source/main-algorithms/aho-corasick/ac-helpers.rst @@ -8,11 +8,8 @@ Aho-Corasick helper functions ============================= -This module contains various helper functions for the class :any:`AhoCorasick`. -These functions could be functions of :any:`AhoCorasick` but they only use -public member functions of :any:`AhoCorasick`, and so they are declared -as free functions instead. - +.. automodule:: libsemigroups_pybind11.aho_corasick + :no-index: .. doctest:: diff --git a/libsemigroups_pybind11/aho_corasick.py b/libsemigroups_pybind11/aho_corasick.py index 163346f2..7dce8d8d 100644 --- a/libsemigroups_pybind11/aho_corasick.py +++ b/libsemigroups_pybind11/aho_corasick.py @@ -6,4 +6,10 @@ # # The full license is in the file LICENSE, distributed with this software. # pylint:disable=no-name-in-module, unused-import +""" +This module contains various helper functions for the class :any:`AhoCorasick`. +These functions could be functions of :any:`AhoCorasick` but they only use +public member functions of :any:`AhoCorasick`, and so they are declared +as free functions instead. +""" from _libsemigroups_pybind11 import add_word, rm_word, traverse_word diff --git a/src/aho-corasick.cpp b/src/aho-corasick.cpp index c37e35b1..4c8d9438 100644 --- a/src/aho-corasick.cpp +++ b/src/aho-corasick.cpp @@ -421,7 +421,6 @@ Traverse the trie of *ac* from the root using suffix links where necessary. This function performs the same as ``traverse_word(ac, AhoCorasick.root, w)`` )pbdoc"); - } // init_aho_corasick } // namespace libsemigroups diff --git a/tests/test_aho_corasick.py b/tests/test_aho_corasick.py index 88f15502..5bc55bd8 100644 --- a/tests/test_aho_corasick.py +++ b/tests/test_aho_corasick.py @@ -5,12 +5,14 @@ # Distributed under the terms of the GPL license version 3. # # The full license is in the file LICENSE, distributed with this software. +# pylint: disable=missing-function-docstring """ This module contains some tests for the libsemigroups_pybind11 functionality arising from aho-corasick.*pp in libsemigroups. """ -import copy, pytest +import copy +import pytest from libsemigroups_pybind11 import ( AhoCorasick, aho_corasick, From 9b2928fcae5526d644c29da2faa6ba98af066137 Mon Sep 17 00:00:00 2001 From: Joseph Edwards Date: Tue, 28 May 2024 19:13:23 +0100 Subject: [PATCH 13/13] iwyu --- src/aho-corasick.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/aho-corasick.cpp b/src/aho-corasick.cpp index 4c8d9438..cea40faf 100644 --- a/src/aho-corasick.cpp +++ b/src/aho-corasick.cpp @@ -17,14 +17,20 @@ // // C++ stl headers.... +#include // for string #include // for vector // libsemigroups.... #include // for AhoCorasick, AhoCorasick::... +#include // for word_type // pybind11.... -#include // for class_, init, module -#include // for std::vector conversion +#include // for arg +#include // for const_, overload_cast, ove... +#include // for operator+ +#include // for class_, init, module +#include // for sequence, str_attr_accessor +#include // for std::vector conversion // libsemigroups_pybind11.... #include "main.hpp" // for init_aho_corasick