diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index f516353..197a2e2 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -1,4 +1,4 @@ -name: Python package +name: Python package tests on: [push] diff --git a/.gitignore b/.gitignore index 7bbc71c..b4cf3c6 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ nosetests.xml coverage.xml *.cover .hypothesis/ +.includescache # Translations *.mo diff --git a/README.md b/README.md index f246c0f..55688c9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) +[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) [![Tests](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml/badge.svg)](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml) # Includes for Foliant @@ -32,6 +32,7 @@ preprocessors: - j2 aliases: ... + includes_map: true ``` `cache_dir` @@ -79,6 +80,11 @@ Default `true`. Note that in the second example the default revision (`develop`) will be overridden with the custom one (`master`). +`includes_map` +: Enables generation of the `includes_map.json` file containing information about files inserted using the includes preprocessor. + + From this file, third-party services can receive information about the presence of inclusions in files, for example, to check links using a linter. + ## Usage The preprocessor allows two syntax variants for include statements. diff --git a/README_ru.md b/README_ru.md index ddede50..dd652e2 100644 --- a/README_ru.md +++ b/README_ru.md @@ -1,4 +1,4 @@ -[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) +[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) [![Tests](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml/badge.svg)](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml) # Препроцессор Includes для Foliant @@ -34,6 +34,7 @@ preprocessors: - j2 aliases: ... + includes_map: true ``` `cache_dir` @@ -64,6 +65,10 @@ preprocessors: `aliases` : Сопоставление псевдонимов с URL-адресами репозитория Git. После определения этого параметра псевдоним может использоваться для ссылки на репозиторий вместо его полного URL-адреса. +`includes_map` +: Включает генерацию файла `includes_map.json`, содержащего информацию о файлах, вставленных с помощью препроцессора includes. + Из этого файла сторонние сервисы могут получать информацию о наличии текста вставленного в файл с помощью препроцессора, например, для проверки ссылок с помощью линтера. + >**Внимание!** > > Псевдонимы доступны только в рамках устаревшего синтаксиса инструкций include (см. ниже) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index eb6a582..c153e3c 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -6,6 +6,8 @@ from pathlib import Path import socket from subprocess import run, CalledProcessError, PIPE, STDOUT +from json import dump +from os import getcwd from foliant.preprocessors.base import BasePreprocessor @@ -43,11 +45,45 @@ def __init__(self, *args, **kwargs): self._cache_dir_path = self.project_path / self.options['cache_dir'] self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' + self.src_dir = self.config.get("src_dir") + self.includes_map_enable = True # TODO:set the default value to False + self.includes_map_anchors = True # TODO:set the default value to False + if 'includes_map' in self.options: + self.includes_map_enable = True + if type(self.options['includes_map']) != bool and 'anchors' in self.options['includes_map']: + self.includes_map_anchors = True + + if self.includes_map_enable: + self.includes_map = [] + self.enable_clean_tokens = True + + self.chapters = [] + self.chapters_list(self.config["chapters"], self.chapters) # converting chapters to a list self.logger = self.logger.getChild('includes') self.logger.debug(f'Preprocessor inited: {self.__dict__}') + def chapters_list(self, obj, chapters: list) -> list: + '''Converting chapters to a list + :param config_chapters: Chapters from config + :param chapters: List of chapters + ''' + if isinstance(obj, list): + for item in obj: + if isinstance(item, str): + chapters.append(f"{self.src_dir}/{item}") + else: + self.chapters_list(item, chapters) + elif isinstance(obj, Path): + chapters.append(f"{self.src_dir}/{obj.as_posix()}") + elif isinstance(obj, object): + for k, v in obj.items(): + if isinstance(v, str): + chapters.append(f"{self.src_dir}/{v}") + else: + self.chapters_list(v, chapters) + def _find_file( self, file_name: str, @@ -162,7 +198,7 @@ def _download_file_from_url(self, url: str) -> Path: for line in dict_new_link: downloaded_content = downloaded_content.replace(line, dict_new_link[line]) - # End of the conversion code block + # End of the conversion code block with open(downloaded_file_path, 'w', encoding='utf8') as downloaded_file: @@ -217,6 +253,8 @@ def _sync_repo( except CalledProcessError as exception: self.logger.warning(str(exception)) + except Exception as exception: + self.logger.warning(str(exception)) else: self.logger.error(str(exception)) @@ -684,7 +722,7 @@ def _get_included_file_path( ) self.logger.debug(f'Finally, included file path: {included_file_path}') - + return included_file_path def _process_include( @@ -699,7 +737,7 @@ def _process_include( sethead: int or None = None, nohead: bool = False, include_link: str or None = None - ) -> str: + ) -> (str, list): '''Replace a local include statement with the file content. Necessary adjustments are applied to the content: cut between certain headings, strip the top heading, set heading level. @@ -723,8 +761,10 @@ def _process_include( f'Included file path: {included_file_path}, from heading: {from_heading}, ' + f'to heading: {to_heading}, sethead: {sethead}, nohead: {nohead}' ) - - + + anchors = [] + + if included_file_path.exists(): included_file_path = included_file_path else: @@ -757,9 +797,9 @@ def _process_include( old_found_link = regexp_find_link.findall(included_content) - for line in old_found_link: + for line in old_found_link: relative_path = regexp_find_path.findall(line) - + for ex_line in relative_path: exceptions_characters = re.findall(r'https?://[^\s]+|@|:|\.png|\.jpeg|.svg', ex_line) if exceptions_characters: @@ -771,7 +811,26 @@ def _process_include( for line in dict_new_link: included_content = included_content.replace(line, dict_new_link[line]) - # End of the conversion code block + # End of the conversion code block + + # Removing metadata from content before including + + included_content = remove_meta(included_content) + + included_content = self._cut_from_position_to_position( + included_content, + from_heading, + to_heading, + from_id, + to_id, + to_end, + sethead, + nohead + ) + + # Find anchors + if self.includes_map_anchors: + anchors = self._add_anchors(anchors, included_content) if self.config.get('escape_code', False): if isinstance(self.config['escape_code'], dict): @@ -793,21 +852,6 @@ def _process_include( escapecode_options ).escape(included_content) - # Removing metadata from content before including - - included_content = remove_meta(included_content) - - included_content = self._cut_from_position_to_position( - included_content, - from_heading, - to_heading, - from_id, - to_id, - to_end, - sethead, - nohead - ) - included_content = self._adjust_image_paths(included_content, included_file_path) if project_root_path: @@ -829,7 +873,86 @@ def _process_include( included_file_path.parent ) - return included_content + return included_content, anchors + + def _prepare_path_for_includes_map(self, path: Path) -> str: + """Preparing the path of the inserted file for the includes map + + :param path: The path to the Markdown file to be inserted + + :returns: The path that will be used in the includes map + """ + donor_path = None + if path.as_posix().startswith(self.working_dir.as_posix()): + _path = path.relative_to(self.working_dir) + donor_path = f"{self.src_dir}/{_path.as_posix()}" + elif path.as_posix().startswith(getcwd()): + _path = path.relative_to(getcwd()) + if _path.as_posix().startswith(self.working_dir.as_posix()): + _path = _path.relative_to(self.working_dir) + if _path.as_posix().startswith(self.working_dir.as_posix()): + donor_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" + else: + donor_path = f"{self.src_dir}/{_path.as_posix()}" + else: + donor_path = _path.as_posix() + return donor_path + + def _exist_in_includes_map(self, map: list, path: str) -> bool: + """Is there a path on the includes map + + :param map: Includes map + :param path: Path + + :returns: True or False + """ + for obj in map: + if obj["file"] == path: + return True + return False + + def _find_anchors(self, content: str) -> list: + """Search for anchor links in the text + + :param content: Markdown content + + :returns: List of anchor links + """ + anchors_list = [] + + anchors = re.findall(r'\([\-\_A-Za-z0-9]+)\<\/anchor\>', content) + for anchor in anchors: + anchors_list.append(anchor) + custom_ids = re.findall(r'\{\#([\-\_A-Za-z0-9]+)\}', content) + for anchor in custom_ids: + anchors_list.append(anchor) + elements_with_ids = re.findall(r'id\=[\"\']([\-\_A-Za-z0-9]+)[\"\']', content) + for anchor in elements_with_ids: + anchors_list.append(anchor) + return anchors_list + + def _add_anchors(self, l: list, content: str) -> list: + """Add an anchor link to the list of anchor links + + :param l: The original list + :param content: Markdown content + + :returns: A list with added anchors + """ + anchors = self._find_anchors(content) + if len(anchors) > 0: + for anchor in anchors: + l.append(anchor) + return l + + def clean_tokens(self, url: str) -> str: + if self.enable_clean_tokens: + try: + s = re.sub(r"(https*://)(.*)@(.*)", r"\1\3", url) + except: + s = url + + return s def process_includes( self, @@ -850,6 +973,12 @@ def process_includes( :returns: Markdown content with resolved includes ''' + if self.includes_map_enable: + if markdown_file_path.as_posix().startswith(self.working_dir.as_posix()): + recipient_md_path = f'{self.src_dir}/{markdown_file_path.relative_to(self.working_dir).as_posix()}' + else: + recipient_md_path = f'{self.src_dir}/{markdown_file_path.as_posix()}' + markdown_file_path = markdown_file_path.resolve() self.logger.debug(f'Processing Markdown file: {markdown_file_path}') @@ -867,6 +996,10 @@ def process_includes( include_statement = self.pattern.fullmatch(content_part) if include_statement: + if self.includes_map_enable: + donor_md_path = None + donor_anchors = [] + current_project_root_path = project_root_path body = self._tag_body_pattern.match(include_statement.group('body').strip()) @@ -893,25 +1026,27 @@ def process_includes( self.logger.debug(f'Set new current sethead: {current_sethead}') - # If the tag body is not empty, the legacy syntax is expected: - # - # - # ($repo_url#revision$path|src)#from_heading:to_heading - # - # - # If the tag body is empty, the new syntax is expected: - # - # + """ + If the tag body is not empty, the legacy syntax is expected: + + + ($repo_url#revision$path|src)#from_heading:to_heading + + + If the tag body is empty, the new syntax is expected: + + + """ if body: self.logger.debug('Using the legacy syntax rules') @@ -950,6 +1085,12 @@ def process_includes( included_file_path = repo_path / body.group('path') + if self.includes_map_enable: + donor_md_path = included_file_path.as_posix() + donor_md_path = self.clean_tokens(donor_md_path) + self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') + + if included_file_path.name.startswith('^'): included_file_path = self._find_file( included_file_path.name[1:], included_file_path.parent @@ -963,7 +1104,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=body.group('from_heading'), @@ -972,6 +1113,9 @@ def process_includes( nohead=options.get('nohead') ) + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + else: self.logger.debug('Local file referenced') @@ -991,7 +1135,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=body.group('from_heading'), @@ -1000,7 +1144,15 @@ def process_includes( nohead=options.get('nohead') ) - else: # if body + if self.includes_map_enable: + donor_md_path = self._prepare_path_for_includes_map(included_file_path) + donor_md_path = self.clean_tokens(donor_md_path) + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') + + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + + else: # if body is missing self.logger.debug('Using the new syntax rules') if options.get('repo_url') and options.get('path'): @@ -1023,7 +1175,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=options.get('from_heading'), @@ -1036,6 +1188,14 @@ def process_includes( include_link=include_link ) + if self.includes_map_enable: + donor_md_path = include_link + options.get('path') + donor_md_path = self.clean_tokens(donor_md_path) + self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') + + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1050,7 +1210,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=options.get('from_heading'), @@ -1062,11 +1222,18 @@ def process_includes( nohead=options.get('nohead') ) + if self.includes_map_enable: + donor_md_path = options['url'] + donor_md_path = self.clean_tokens(donor_md_path) + self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') + + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + elif options.get('src'): self.logger.debug('Local file referenced') included_file_path = self._get_included_file_path(options.get('src'), markdown_file_path) - self.logger.debug(f'Resolved path to the included file: {included_file_path}') if options.get('project_root'): @@ -1076,7 +1243,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=options.get('from_heading'), @@ -1087,6 +1254,15 @@ def process_includes( sethead=current_sethead, nohead=options.get('nohead') ) + + if self.includes_map_enable: + donor_md_path = self._prepare_path_for_includes_map(included_file_path) + donor_md_path = self.clean_tokens(donor_md_path) + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') + + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + else: self.logger.warning( 'Neither repo_url+path nor src specified, ignoring the include statement' @@ -1144,6 +1320,25 @@ def process_includes( processed_content_part = re.sub(r'\s+', ' ', processed_content_part).strip() + if self.includes_map_enable: + if donor_md_path: + if recipient_md_path in self.chapters or "index.md" in recipient_md_path: + if not self._exist_in_includes_map(self.includes_map, recipient_md_path): + if not self.includes_map_anchors or len(donor_anchors) == 0: + self.includes_map.append({ 'file': recipient_md_path, "includes": []}) + else: + self.includes_map.append({ 'file': recipient_md_path, "includes": [], 'anchors': []}) + + for i, f in enumerate(self.includes_map): + if f['file'] == recipient_md_path: + self.includes_map[i]['includes'].append(donor_md_path) + + if self.includes_map_anchors: + for anchor in donor_anchors: + if not 'anchors' in self.includes_map[i]: + self.includes_map[i]['anchors'] = [] + self.includes_map[i]['anchors'].append(anchor) + else: processed_content_part = content_part @@ -1179,7 +1374,7 @@ def _get_source_files_extensions(self) -> list: return source_files_extensions def apply(self): - + self.logger.info('Applying preprocessor') # Cleaning up downloads because the content of remote source may have modified @@ -1202,4 +1397,12 @@ def apply(self): with open(source_file_path, 'w', encoding='utf8') as processed_file: processed_file.write(processed_content) + # Write includes map + if self.includes_map_enable: + output = f'{self.working_dir}/static/includes_map.json' + Path(f'{self.working_dir}/static/').mkdir(parents=True, exist_ok=True) + with open(f'{self.working_dir}/static/includes_map.json', 'w', encoding='utf8') as f: + dump(self.includes_map, f) + self.logger.debug(f'includes_map write to {output}') + self.logger.info('Preprocessor applied') diff --git a/setup.py b/setup.py index d518e3e..7c430a3 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ description=SHORT_DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type='text/markdown', - version='1.1.17', + version='1.1.19', author='Konstantin Molchanov', author_email='moigagoo@live.com', url='https://github.com/foliant-docs/foliantcontrib.includes', diff --git a/test/test_includes.py b/test/test_includes.py index d5638ce..049de10 100644 --- a/test/test_includes.py +++ b/test/test_includes.py @@ -243,3 +243,21 @@ def test_extensions(self): 'index.j2': '# My title\n\nIncluded content', 'sub/sub.md': 'Included content' } + + def test_includes_map(self): + self.ptf.options = {'includes_map': True } + input_map = { + 'index.md': '# My title\n\n\n\n', + 'sub/sub-1.md': 'Included content 1', + 'sub/sub-2.md': 'Included content 2' + } + expected_map = { + 'index.md': '# My title\n\nIncluded content 1\n\nIncluded content 2', + 'static/includes_map.json': "[{\"file\": \"__src__/index.md\", \"includes\": [\"__src__/sub/sub-1.md\", \"__src__/sub/sub-2.md\"]}]", + 'sub/sub-1.md': 'Included content 1', + 'sub/sub-2.md': 'Included content 2' + } + self.ptf.test_preprocessor( + input_mapping=input_map, + expected_mapping=expected_map, + ) diff --git a/test_in_docker.sh b/test_in_docker.sh new file mode 100755 index 0000000..66dc98b --- /dev/null +++ b/test_in_docker.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +docker run --rm -it \ + -v "./:/app/" \ + --workdir "/app/" \ + python:3.9 "/app/test.sh" \ No newline at end of file