diff --git a/mathics/doc/doc_entries.py b/mathics/doc/doc_entries.py index 114343180..7219206de 100644 --- a/mathics/doc/doc_entries.py +++ b/mathics/doc/doc_entries.py @@ -13,6 +13,7 @@ from typing import Callable, List, Optional from mathics.core.evaluation import Message, Print +from mathics.doc.rst_parser import normalize_indent, rst_to_native # Used for getting test results by test expression and chapter/section information. test_result_map = {} @@ -69,6 +70,17 @@ LIST_RE = re.compile(r"(?s)<(?Pul|ol)>(?P.*?)") MATHICS_RE = re.compile(r"(?.*?)\]\((?P.*?)\)") +MD_IMG_LABEL_RE = re.compile(r"!\[(?P.*?)\]\((?P<src>.*?)\)\{\#(?P<label>.*?)\}") +MD_PYTHON_RE = re.compile( + r"``\s*[pP]ython\n(?P<pythoncode>.*?)``", re.DOTALL | re.MULTILINE +) +MD_REF_RE = re.compile(r"\[(?P<label>.*?)\]\((?P<url>.*?)\)") +MD_URL_RE = re.compile(r"\<(?P<prot>http|https|ftp|mail?)\:\/\/(?P<url>.*?)\>") + +MD_TAG_RE = re.compile(r"[{]\#(?P<label>.*?)[}]") + PYTHON_RE = re.compile(r"(?s)<python>(.*?)</python>") QUOTATIONS_RE = re.compile(r"\"([\w\s,]*?)\"") REF_RE = re.compile(r'<ref label="(?P<label>.*?)">') @@ -96,6 +108,72 @@ TESTCASE_OUT_RE = re.compile(r"^\s*([:|=])(.*)$") +# TODO: Check if it wouldn't be better to go in the opposite direction, +# to have a ReStructured markdown compliant syntax everywhere. +def markdown_to_native(text): + """ + This function converts common markdown syntax into + the Mathics XML native documentation syntax. + """ + text, post_substitutions = pre_sub( + MD_PYTHON_RE, text, lambda m: "<python>%s</python>" % m.group(1) + ) + + # First, convert some RsT syntax into the native + # format. + text = rst_to_native(text) + + def repl_figs_with_label(match): + caption = match.group(1) + src = match.group(2) + label = match.group(3) + return ( + r"<imgpng src=" + f"'{src}'" + " title=" + f"'{caption}'" + " label=" + f"'{label}'" + ">" + ) + + text = MD_IMG_LABEL_RE.sub(repl_figs_with_label, text) + + def repl_figs(match): + caption = match.group(1) + src = match.group(2) + return r"<imgpng src=" f"'{src}'" " title=" f"'{caption}'" ">" + + text = MD_IMG_RE.sub(repl_figs, text) + + def repl_ref(match): + label = match.group(1) + reference = match.group(2) + return f"<url>:{label}:{reference}</url>" + + text = MD_REF_RE.sub(repl_ref, text) + + def repl_url(match): + prot = match.group(1) + reference = match.group(2) + return f"<url>{prot}://{reference}</url>" + + text = MD_URL_RE.sub(repl_url, text) + + def repl_labels(match): + label = match.group(1) + return r" \label{" f"{label}" "} " + + text = MD_TAG_RE.sub(repl_labels, text) + + def repl_python_code(match): + pass + + text = MD_PYTHON_RE.sub(repl_python_code, text) + + return post_sub(text, post_substitutions) + + def get_results_by_test(test_expr: str, full_test_key: list, doc_data: dict) -> dict: """ Sometimes test numbering is off, either due to bugs or changes since the @@ -214,10 +292,16 @@ def parse_docstring_to_DocumentationEntry_items( # Remove commented lines. doc = filter_comments(doc).strip(r"\s") + # Normalize the indent level. + text = normalize_indent(doc) # Remove leading <dl>...</dl> # doc = DL_RE.sub("", doc) + # Convert markdown syntax to XML native syntax. + # TODO: See if it wouldn't be better to go in the opposite way: + # convert the native syntax to a common-markdown compliant syntax. + # pre-substitute Python code because it might contain tests doc, post_substitutions = pre_sub( PYTHON_RE, doc, lambda m: "<python>%s</python>" % m.group(1) @@ -451,7 +535,7 @@ class DocText: """ def __init__(self, text): - self.text = text + self.text = markdown_to_native(text) def __str__(self) -> str: return self.text diff --git a/mathics/doc/documentation/1-Manual.mdoc b/mathics/doc/documentation/1-Manual.mdoc index fd10ee5c6..e364019e4 100644 --- a/mathics/doc/documentation/1-Manual.mdoc +++ b/mathics/doc/documentation/1-Manual.mdoc @@ -10,9 +10,9 @@ The programming language and built-in functions of \Mathics tries to match the \ \Mathics is in no way affiliated or supported by \Wolfram. \Mathics will probably never have the power to compete with \Mathematica in industrial applications; it is a free alternative though. It also invites community development at all levels. -See the <url>:installation instructions: https://mathics-development-guide.readthedocs.io/en/latest/installing/index.html</url> for the most recent instructions for installing from PyPI, or the source. +See the [installation instructions](https://mathics-development-guide.readthedocs.io/en/latest/installing/index.html) for the most recent instructions for installing from PyPI, or the source. -For implementation details, please refer to the <url>:Developers Guide:https://mathics-development-guide.readthedocs.io/en/latest/</url>. +For implementation details please refer to <https://mathics-development-guide.readthedocs.io/en/latest/>. <section title="Why try to recreate Wolfram Language?"> \Mathematica is great, but it a couple of disadvantages. @@ -30,10 +30,10 @@ However, even if you are willing to pay hundreds of dollars for the software, yo \Mathics combines the beauty of \Mathematica implemented in an open-source environment written in Python. The Python ecosystem includes libraries and toos like: <ul> - <li><url>:mpmath: https://mpmath.org/</url> for floating-point arithmetic with arbitrary precision, - <li><url>:NumPy: https://numpy.org</url> for numeric computation, - <li><url>:SymPy: https://sympy.org</url> for symbolic mathematics, and - <li><url>:SciPy: https://www.scipy.org/</url> for Scientific calculations. + <li>[mpmath](https://mpmath.org/) for floating-point arithmetic with arbitrary precision, + <li>[numpy](https://numpy.org/numpy) for numeric computation, + <li>[SymPy](https://sympy.org) for symbolic mathematics, and + <li>optionally [SciPy](https://www.scipy.org/) for Scientific calculations. </ul> Performance of \Mathics is not, right now, practical in large-scale projects and calculations. However can be used as a tool for exploration and education. @@ -53,9 +53,10 @@ Outside of the "core" \Mathics kernel (which has a only primitive command-line i <ul> <li>a <url>:command-line interface:https://pypi.org/project/mathicsscript/</url> using either <url>:prompt-toolkit:https://python-prompt-toolkit.readthedocs.io/en/master/</url>, or GNU Readline <li>a <url>:Django-based web server:https://pypi.org/project/Mathics-Django/</url> - <li>a <url>:Mathics3 module for Graphs:https://pypi.org/project/pymathics-graph/</url> (via <url>:NetworkX:https://networkx.org/</url>), - <li>a <url>:Mathics3 module for NLP:https://pypi.org/project/pymathics-natlang/</url> (via <url>:nltk:https://www.nltk.org/</url>, <url>:spacy:https://spacy.io/</url>, and others) - <li>a <url>:A docker container:https://hub.docker.com/r/mathicsorg/mathics</url> which bundles all of the above + <li>a command-line interface using either prompt-toolkit, or GNU Readline + <li>a [Mathics3 module for Graphs](https://pypi.org/project/pymathics-graph/) (via [NetworkX](https://networkx.org/)), + <li>a [Mathics3 module for NLP](https://pypi.org/project/pymathics-natlang/) (via [nltk](https://www.nltk.org/), [spacy](https://spacy.io/), and others) + <li>a [A docker container](https://hub.docker.com/r/mathicsorg/mathics) which bundles all of the above </ul> </section> @@ -238,8 +239,7 @@ The relative uncertainty of '3.1416`3' is 10^-3. It is numerically equivalent, i >> 3.1416`3 == 3.1413`4 = True - -We can get the precision of the number by using the \Mathics Built-in function <url>:'Precision': /doc/reference-of-built-in-symbols/atomic-elements-of-expressions/representation-of-numbers/precision</url>: +We can get the precision of the number by using the \Mathics Built-in function <url>:'Precision': /doc/reference-of-built-in-symbols/atomic-elements-of-expressions/precision</url>: >> Precision[3.1413`4] = 4. diff --git a/mathics/doc/rst_parser.py b/mathics/doc/rst_parser.py new file mode 100644 index 000000000..f1f9c915e --- /dev/null +++ b/mathics/doc/rst_parser.py @@ -0,0 +1,161 @@ +""" +Minimal parser for ReStructuredText + +This module provides a compatibility support for RsT syntax +in the Mathics documentation system. + +We cannot use an standard library like docutils or sphinx since +by now, the documentation is written in a Mathics-specific syntax, +and for a while, both syntaxes will have to coexist. + +""" + +import re + +RST_BLOCK_RE = re.compile(r"^\.\.\s+(.*)\n((?:^[ ]+.*\n|^\n)+)", re.MULTILINE) +RST_URL_RE = re.compile(r"`(?P<label>.*?)\<(?P<url>.*?)\>`_(?P<under>_?)") + + +PROCESS_RST_BLOCK = {} + + +def indent_level(line_str: str) -> int: + """ + Compute the number of blank spaces at the left + of a string. + """ + line_lstrip = line_str.lstrip() + if line_lstrip == "": + return 80 + return len(line_str) - len(line_lstrip) + + +def normalize_indent(text: str, omit_first_line: bool = True) -> str: + """ + Normalize the indentation level of the text. + Usually, the docstring has an indentation equal + to the code where its belongs. + For processing the documentation, it is useful + to normalize the indentation level. + + Usually, in a docstring, the first line has a different + indentation level just because the "indentation" lays before the quotes. + `omit_first_line` controls if that line must be taken into account to compute + the indentation reference. + + """ + lines = text.splitlines() + if len(lines) > 1: + # First, look for the minimal level + # of indentation. + lines_ = lines[1:] if omit_first_line else lines + + # 80 is a safe upper limit in standard docstrings, + # because the line shouldn't have more characters. + block_indent_level = min(min(indent_level(line) for line in lines_), 80) + if block_indent_level == 80: + block_indent_level = 0 + + # Now, remove the extra indent. + if block_indent_level: + if omit_first_line: + return ( + lines[0] + + "\n" + + "\n".join( + line[block_indent_level:] if line else "" for line in lines_ + ) + ) + return "\n".join( + line[block_indent_level:] if line else "" for line in lines_ + ) + return text + + +def process_image_block(head: str, block: str) -> str: + """ """ + src = head.split("::")[1] + lines = block.splitlines() + keys = f" src='{src}'" + for line in lines: + try: + _, key, val = line.strip().split(":") + except ValueError: + continue + keys += f""" {key}='{val.strip()}'""" + return f"""<imgpng {keys}>""" + + +PROCESS_RST_BLOCK["image"] = process_image_block + + +def process_code_block(head: str, block: str) -> str: + """ + Process a block of code + """ + if block.strip() == "": + return None + + try: + lang = head.split("::")[1].strip() + except ValueError: + lang = "" + + if lang.lower() == "python": + lines = block.splitlines() + if len(lines) == 1: + return f"""<python>{lines[0]}</python>""" + code = normalize_indent(block, False) + return f"""<python>\n{code}</python>""" + if lang.lower() == "mathics": + indentation = 7 * " " + lines = [ + indentation + line.lstrip() if idx else line.lstrip() + for idx, line in enumerate(block.splitlines()) + ] + code = " >> " + "\n".join(lines) + return code + return None + + +PROCESS_RST_BLOCK["code"] = process_code_block + + +# TODO: Check if it wouldn't be better to go in the opposite direction, +# to have a ReStructured markdown compliant syntax everywhere. +def rst_to_native(text): + """ + convert a RsT syntax to the Mathics XML + native documentation syntax + """ + + def repl_url(match): + label = strip(match.group(1)) + url = strip(match.group(2)) + private = "_" == match.group(3) + if label == "" and private: + return f"<url>{url}</url>" + return f"<url>:{label}:{url}</url>" + + text = RST_URL_RE.sub(repl_url, text) + + def repl_block(match): + head = match.group(1) + block = match.group(2) + lines = block.splitlines() + block_type = head.split(" ")[0].split("::")[0].strip() + last_line = lines[-1] + if last_line and last_line[0] != " ": + lines = lines[:-1] + block = "\n".join(lines) + else: + last_line = "" + + result = PROCESS_RST_BLOCK.get(block_type, None)(head, block) + if result is None: + return + return result + "\n" + last_line + + text = RST_BLOCK_RE.sub(repl_block, text) + + return text diff --git a/test/doc/test_common.py b/test/doc/test_common.py index bf8bf6212..ec13abb14 100644 --- a/test/doc/test_common.py +++ b/test/doc/test_common.py @@ -218,3 +218,30 @@ def test_load_mathics_documentation(): for subsection in section.subsections: assert subsection.title not in visited_subsections visited_subsections.add(subsection.title) + + +def test_doc_parser(): + for input_str, output_str in ( + ["![figure](figure.png)", "<imgpng src='figure.png' title='figure'>"], + [ + "![figure](figure.png){#figure-label}", + "<imgpng src='figure.png' title='figure' label='figure-label'>", + ], + [ + ("""\n`` python\ndef f(x):\n g[i](x)\n""" """ return x + 2\n``\n"""), + """<python>def f(x):\n g[i](x)\n return x + 2\n</python>""", + ], + ["[url de destino](/doc/algo)", "<url>:url de destino:/doc/algo</url>"], + ): + result = parse_docstring_to_DocumentationEntry_items( + input_str, + DocTests, + DocTest, + DocText, + ( + "part example", + "chapter example", + "section example", + ), + )[0].text + assert result == output_str diff --git a/test/doc/test_rst_parser.py b/test/doc/test_rst_parser.py new file mode 100644 index 000000000..a0115fffb --- /dev/null +++ b/test/doc/test_rst_parser.py @@ -0,0 +1,68 @@ +from mathics.doc.doc_entries import normalize_indent +from mathics.doc.rst_parser import rst_to_native + +RST_EXAMPLES = ( + ( + """ + See the picture: + + .. image:: picture.jpeg + :height: 100px + :width: 200 px + :scale: 50 % + :loading: embed + :alt: alternate text + :align: right + + The previous image ilustrates something. + + Then, let's consider the following code: + + .. code:: python + def f(x): + return x**2 + + f(2) + + which computes the square of 2. + + In Mathics, the same can be done in the following way: + + .. code:: mathics + F[x_]:=x^2 + F[2] + """, + """ + See the picture: + + <imgpng src=' picture.jpeg' height='100px' width='200 px' scale='50 %' loading='embed' alt='alternate text' align='right'> + The previous image ilustrates something. + + Then, let's consider the following code: + + <python> + def f(x): + return x**2 + + f(2) + </python> + which computes the square of 2. + + In Mathics, the same can be done in the following way: + + >> F[x_]:=x^2 + F[2] + +""", + ), +) + + +def test_rst_parser(): + """ + Test the convertion of RsT text to + the native syntax. + """ + for text, expected in RST_EXAMPLES: + result = rst_to_native(normalize_indent(text)) + assert normalize_indent(expected) == result