diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml new file mode 100644 index 0000000..7631024 --- /dev/null +++ b/.github/workflows/draft-pdf.yml @@ -0,0 +1,23 @@ +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: paper/paper.md + - name: Upload + uses: actions/upload-artifact@v1 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: paper/paper.pdf diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 0000000..f7ad329 --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,119 @@ +@book{chomsky1, + author = "Chomsky, N.", + title = "Some methodological remarks on generative grammar", + year = 1961, + publisher = {Aspects of the Theory of Syntax} +} +@book{chomsky2, + author = "Chomsky, N.", + title = "Syntactic structures", + year = 2002, + publisher = {Walter de Gruyter} +} +@inproceedings{Azimov, +author = {Azimov, Rustam and Grigorev, Semyon}, +title = {Context-Free Path Querying by Matrix Multiplication}, +year = {2018}, +isbn = {9781450356954}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/3210259.3210264}, +doi = {10.1145/3210259.3210264}, +booktitle = {Proceedings of the 1st ACM SIGMOD Joint International Workshop on Graph Data Management Experiences & Systems (GRADES) and Network Data Analytics (NDA)}, +articleno = {5}, +numpages = {10}, +keywords = {transitive closure, context-free path querying, context-free grammar, matrix multiplication, graph databases, GPGPU}, +location = {Houston, Texas}, +series = {GRADES-NDA ’18} +} +@inproceedings{Hellings2015QueryingFP, + title={Querying for Paths in Graphs using Context-Free Path Queries}, + author={Jelle Hellings}, + year={2015} +} +@article{Huber2007, +author={Huber, Wolfgang +and Carey, Vincent J. +and Long, Li +and Falcon, Seth +and Gentleman, Robert}, +title={Graphs in molecular biology}, +journal={BMC Bioinformatics}, +year={2007}, +month={Sep}, +day={27}, +volume={8}, +number={6}, +pages={S8}, +abstract={Graph theoretical concepts are useful for the description and analysis of interactions and relationships in biological systems. We give a brief introduction into some of the concepts and their areas of application in molecular biology. We discuss software that is available through the Bioconductor project and present a simple example application to the integration of a protein-protein interaction and a co-expression network.}, +issn={1471-2105}, +doi={10.1186/1471-2105-8-S6-S8}, +url={https://doi.org/10.1186/1471-2105-8-S6-S8} +} +@article{scott1988social, + title={Social network analysis}, + author={Scott, John}, + journal={Sociology}, + volume={22}, + number={1}, + pages={109--127}, + year={1988}, + publisher={British Sociological Association Publications Limited} +} +@article{ConjGrammars, +author = {Okhotin, Alexander}, +title = {Conjunctive Grammars}, +year = {2001}, +issue_date = {April 2001}, +publisher = {Otto-von-Guericke-Universitat}, +address = {DEU}, +volume = {6}, +number = {4}, +issn = {1430-189X}, +journal = {J. Autom. Lang. Comb.}, +month = apr, +pages = {519–535}, +numpages = {17}, +keywords = {parsing, intersection, conjuctive grammar, descriptional complexity, context-free grammar} +} +@article{BoolGrammars, +title = "Boolean grammars", +journal = "Information and Computation", +volume = "194", +number = "1", +pages = "19 - 48", +year = "2004", +issn = "0890-5401", +doi = "https://doi.org/10.1016/j.ic.2004.03.006", +url = "http://www.sciencedirect.com/science/article/pii/S0890540104001075", +author = "Alexander Okhotin", +keywords = "Context-free grammar, Intersection, Complement, Language equation, Parsing, Conjunctive grammar, Trellis automaton, Cellular automaton", +abstract = "A new generalization of context-free grammars is introduced: Boolean grammars allow the use of all set-theoretic operations as an integral part of the formalism of rules. Rigorous semantics for these grammars is defined by language equations in a way that allows to generalize some techniques from the theory of context-free grammars, including Chomsky normal form, Cocke–Kasami–Younger cubic-time recognition algorithm and some limited extension of the notion of a parse tree, which together allow to conjecture practical applicability of the new concept." +} +@article{Sapir, + ISSN = {0003486X}, + URL = {http://www.jstor.org/stable/3597195}, + author = {Mark V. Sapir and Jean-Camille Birget and Eliyahu Rips}, + journal = {Annals of Mathematics}, + number = {2}, + pages = {345--466}, + publisher = {Annals of Mathematics}, + title = {Isoperimetric and Isodiametric Functions of Groups}, + volume = {156}, + year = {2002} +} +@incollection{rotman1995word, + title={The Word Problem}, + author={Rotman, Joseph J}, + booktitle={An Introduction to the Theory of Groups}, + pages={418--470}, + year={1995}, + publisher={Springer} +} +@manual{GAP4, + key = "GAP", + organization = "The GAP~Group", + title = "{GAP -- Groups, Algorithms, and Programming, Version 4.11.1}", + year = 2021, + url = "\url{https://www.gap-system.org}", +} diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..b16df40 --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,64 @@ +--- +title: 'LangToGroup: A Haskell package for group presentation construction from formal language' +tags: + - Haskell + - formal languages + - group theory + - Turing machines + - group presentation construction +authors: + - name: Maksym Shamrai + affiliation: 3 + - name: Sergey Kuzivanov + affiliation: 1 + - name: Ekaterina Vinnik + affiliation: 1 + - name: Marin Gleb + affiliation: 4 + - name: Semyon Grigorev + affiliation: "1, 2" +affiliations: + - name: Saint Petersburg State University + index: 1 + - name: JetBrains Research + index: 2 + - name: Kyiv Academic University + index: 3 + - name: National Research University Higher School of Economics + index: 4 +date: ?? November 2021 +bibliography: paper.bib +--- + + +# Summary + + +The formal language analysis approach has begun new development in the 50s of the last century [@chomsky1; @chomsky2]. During this time, it found application not only in areas directly related to programming but, among other things, in the analysis of graph data models [@Hellings2015QueryingFP; @Azimov], which are used, for example, in bioinformatics [@Huber2007] and social networks [@scott1988social]. Development entails new problems and challenges. Several of them are associated with the advent of new language classes [@ConjGrammars; @BoolGrammars], but not restricted with that. + +Sometimes it happens that researchers lack the traditional methods of the field and come up with new ones. It seems like the most obvious way to invent a new approach is to connect your field with another field of study which already well known and try to apply methods of that field. So, Sapir, Birget, and Rips did like that [@Sapir]. They linked group theory and computational complexity theory by constructing a group presentation from a Turing machine. And had proved that group presentation can be treated like a computational model. Thus, they presented a new approach for representing computation, and accordingly, this can be used to take a fresh look at open problems. + +Inspired by this, we wondered what if we treat formal language as a group presentation? This will allow us to use various group theoretical methods for formal languages. Moreover, it allows experimenting with computational group theory algorithms [@GAP4]. But construction by hand like that takes a lot of time and paper. So it requires an efficient tool for automatic group presentation construction from formal language. + + +# Statement of need + + +Modern research methods should not be limited to the traditional framework but should use all possible means to achieve a breakthrough result. The LangToGroup provides new opportunities for researchers in research related to computational models. Accordingly, the tool adds ways to analyze a problem, and sometimes understanding is a key that can lead to some insights about the task. + +Mainly LangToGroup package has been developed for research in disciplines such as official languages, computational theory, and group theory, but this may be of interest to researchers in many other fields. The tool connects "computation" with group theory. That allows us to think mathematically about computational processes and use the concepts of group theory to analyze and solve problems. + + +# Solution description + + +LangToGroup is a Haskell console application for group presentation construction. Haskell was chosen due to its rich and convenient type system, +which we apply to represent algorithmic types such as formal grammar, +Turing machine, S-machine, semigroup presentation, or group presentation. Furthermore, it is worth noting that data types were intentionally described in the same way as in the definitions of the corresponding concepts. This was done to maintain the maximum similarity to the articles [@Sapir, @rotman1995word]. + +The package provides the capability to obtain a presentation of a group from a Turing machine in two ways described in [@Sapir] and [@rotman1995word]. All of them are building a presentation of a group for a Turing Machine with preservation of its language. The first one does it through Symmetric Turing Machine and S-machine sequential constructions. The second one is via building a semigroup presentation. Moreover, two modifications of the second approach which output smaller presentations were developed as well. The time algorithms take depends on input grammar, but usually, it is no more than a few minutes. + +From the beginning, the tool has been developing for the study of formal languages because of what algorithms for constructing a Turing machine from context-free and Boolean languages have additionally developed. Moreover, LangToGroup supports several types of input and output and each step of the construction can be printed in a document of LaTeX format for convenient usage. Therefore, LangToGroup presents a full pipeline of group presentation construction from formal languages and also can be used for construction from Turing machines. + + +# References