From e13da3a78e7f0e677d7da40d6f1cfbbee35c3526 Mon Sep 17 00:00:00 2001 From: Timur Osmanov <54434686+TOsmanov@users.noreply.github.com> Date: Wed, 16 Oct 2024 12:44:09 +0300 Subject: [PATCH] add: generation of the includes map (#27) * added includes map generation * fix * update * fix * update and add debug * update debug * fix case 3 * update * add option * fix import * remove uncessary comments * update * add test includes map * update README.md * update python-test.yml * test * add: anchors into include map * test: enabled by default * fix: tests and remove anchors * update: README * bump version --- .github/workflows/python-test.yml | 2 +- README.md | 8 +- README_ru.md | 7 +- changelog.md | 4 + foliant/preprocessors/includes.py | 121 +++++++++++++++++++++++++++--- setup.py | 2 +- test/test_includes.py | 18 +++++ 7 files changed, 148 insertions(+), 14 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index f516353..197a2e2 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -1,4 +1,4 @@ -name: Python package +name: Python package tests on: [push] diff --git a/README.md b/README.md index f246c0f..55688c9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) +[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) [![Tests](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml/badge.svg)](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml) # Includes for Foliant @@ -32,6 +32,7 @@ preprocessors: - j2 aliases: ... + includes_map: true ``` `cache_dir` @@ -79,6 +80,11 @@ Default `true`. Note that in the second example the default revision (`develop`) will be overridden with the custom one (`master`). +`includes_map` +: Enables generation of the `includes_map.json` file containing information about files inserted using the includes preprocessor. + + From this file, third-party services can receive information about the presence of inclusions in files, for example, to check links using a linter. + ## Usage The preprocessor allows two syntax variants for include statements. diff --git a/README_ru.md b/README_ru.md index ddede50..dd652e2 100644 --- a/README_ru.md +++ b/README_ru.md @@ -1,4 +1,4 @@ -[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) +[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) [![Tests](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml/badge.svg)](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml) # Препроцессор Includes для Foliant @@ -34,6 +34,7 @@ preprocessors: - j2 aliases: ... + includes_map: true ``` `cache_dir` @@ -64,6 +65,10 @@ preprocessors: `aliases` : Сопоставление псевдонимов с URL-адресами репозитория Git. После определения этого параметра псевдоним может использоваться для ссылки на репозиторий вместо его полного URL-адреса. +`includes_map` +: Включает генерацию файла `includes_map.json`, содержащего информацию о файлах, вставленных с помощью препроцессора includes. + Из этого файла сторонние сервисы могут получать информацию о наличии текста вставленного в файл с помощью препроцессора, например, для проверки ссылок с помощью линтера. + >**Внимание!** > > Псевдонимы доступны только в рамках устаревшего синтаксиса инструкций include (см. ниже) diff --git a/changelog.md b/changelog.md index dfb0923..f012ea8 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,7 @@ +# 1.1.18 + +- Add: option for generation of the includes map containing information about files inserted using the preprocessor. + # 1.1.17 - Fix: fixed a link processing error for files with diagrams diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index eb6a582..7c24ce6 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -6,6 +6,8 @@ from pathlib import Path import socket from subprocess import run, CalledProcessError, PIPE, STDOUT +from json import dump +from os import getcwd from foliant.preprocessors.base import BasePreprocessor @@ -43,6 +45,12 @@ def __init__(self, *args, **kwargs): self._cache_dir_path = self.project_path / self.options['cache_dir'] self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' + self.src_dir = self.config.get("src_dir") + self.includes_map_enable = False + if 'includes_map' in self.options: + self.includes_map_enable = True + if self.includes_map_enable: + self.includes_map = [] self.logger = self.logger.getChild('includes') @@ -162,7 +170,7 @@ def _download_file_from_url(self, url: str) -> Path: for line in dict_new_link: downloaded_content = downloaded_content.replace(line, dict_new_link[line]) - # End of the conversion code block + # End of the conversion code block with open(downloaded_file_path, 'w', encoding='utf8') as downloaded_file: @@ -217,6 +225,8 @@ def _sync_repo( except CalledProcessError as exception: self.logger.warning(str(exception)) + except Exception as exception: + self.logger.warning(str(exception)) else: self.logger.error(str(exception)) @@ -684,7 +694,7 @@ def _get_included_file_path( ) self.logger.debug(f'Finally, included file path: {included_file_path}') - + return included_file_path def _process_include( @@ -723,8 +733,8 @@ def _process_include( f'Included file path: {included_file_path}, from heading: {from_heading}, ' + f'to heading: {to_heading}, sethead: {sethead}, nohead: {nohead}' ) - - + + if included_file_path.exists(): included_file_path = included_file_path else: @@ -757,9 +767,9 @@ def _process_include( old_found_link = regexp_find_link.findall(included_content) - for line in old_found_link: + for line in old_found_link: relative_path = regexp_find_path.findall(line) - + for ex_line in relative_path: exceptions_characters = re.findall(r'https?://[^\s]+|@|:|\.png|\.jpeg|.svg', ex_line) if exceptions_characters: @@ -771,7 +781,7 @@ def _process_include( for line in dict_new_link: included_content = included_content.replace(line, dict_new_link[line]) - # End of the conversion code block + # End of the conversion code block if self.config.get('escape_code', False): if isinstance(self.config['escape_code'], dict): @@ -831,6 +841,50 @@ def _process_include( return included_content + def _prepare_path_for_includes_map(self, path: Path) -> str: + donor_path = None + if path.as_posix().startswith(self.working_dir.as_posix()): + _path = path.relative_to(self.working_dir) + donor_path = f"{self.src_dir}/{_path.as_posix()}" + elif path.as_posix().startswith(getcwd()): + _path = path.relative_to(getcwd()) + if _path.as_posix().startswith(self.working_dir.as_posix()): + _path = _path.relative_to(self.working_dir) + if _path.as_posix().startswith(self.working_dir.as_posix()): + donor_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" + else: + donor_path = f"{self.src_dir}/{_path.as_posix()}" + else: + donor_path = _path.as_posix() + return donor_path + + def _exist_in_includes_map(self, map: list, path: str) -> bool: + for obj in map: + if obj["file"] == path: + return True + return False + + def _find_anchors(self, content: str) -> list: + anchors_list = [] + + anchors = re.findall(r'\([\-\_A-Za-z0-9]+)\<\/anchor\>', content) + for anchor in anchors: + anchors_list.append(anchor) + custom_ids = re.findall(r'\{\#([\-A-Za-z0-9]+)\}', content) + for anchor in custom_ids: + anchors_list.append(anchor) + elements_with_ids = re.findall(r'id\=[\"\']([\-A-Za-z0-9]+)[\"\']', content) + for anchor in elements_with_ids: + anchors_list.append(anchor) + return anchors_list + + def _add_anchors(self, l: list, content: str) -> list: + anchors = self._find_anchors(content) + if len(anchors) > 0: + for anchor in anchors: + l.append(anchor) + return l + def process_includes( self, markdown_file_path: Path, @@ -850,6 +904,12 @@ def process_includes( :returns: Markdown content with resolved includes ''' + if self.includes_map_enable: + if markdown_file_path.as_posix().startswith(self.working_dir.as_posix()): + recipient_md_path = f'{self.src_dir}/{markdown_file_path.relative_to(self.working_dir).as_posix()}' + else: + recipient_md_path = f'{self.src_dir}/{markdown_file_path.as_posix()}' + markdown_file_path = markdown_file_path.resolve() self.logger.debug(f'Processing Markdown file: {markdown_file_path}') @@ -867,6 +927,10 @@ def process_includes( include_statement = self.pattern.fullmatch(content_part) if include_statement: + if self.includes_map_enable: + donor_md_path = None + donor_anchors = [] + current_project_root_path = project_root_path body = self._tag_body_pattern.match(include_statement.group('body').strip()) @@ -950,6 +1014,10 @@ def process_includes( included_file_path = repo_path / body.group('path') + if self.includes_map_enable: + donor_md_path = included_file_path.as_posix() + self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') + if included_file_path.name.startswith('^'): included_file_path = self._find_file( included_file_path.name[1:], included_file_path.parent @@ -1000,7 +1068,11 @@ def process_includes( nohead=options.get('nohead') ) - else: # if body + if self.includes_map_enable: + donor_md_path = self._prepare_path_for_includes_map(included_file_path) + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') + + else: # if body is missing self.logger.debug('Using the new syntax rules') if options.get('repo_url') and options.get('path'): @@ -1036,6 +1108,10 @@ def process_includes( include_link=include_link ) + if self.includes_map_enable: + donor_md_path = include_link + options.get('path') + self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') + elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1062,13 +1138,20 @@ def process_includes( nohead=options.get('nohead') ) + if self.includes_map_enable: + donor_md_path = options['url'] + self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') + elif options.get('src'): self.logger.debug('Local file referenced') included_file_path = self._get_included_file_path(options.get('src'), markdown_file_path) - self.logger.debug(f'Resolved path to the included file: {included_file_path}') + if self.includes_map_enable: + donor_md_path = self._prepare_path_for_includes_map(included_file_path) + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') + if options.get('project_root'): current_project_root_path = ( markdown_file_path.parent / options.get('project_root') @@ -1087,6 +1170,7 @@ def process_includes( sethead=current_sethead, nohead=options.get('nohead') ) + else: self.logger.warning( 'Neither repo_url+path nor src specified, ignoring the include statement' @@ -1144,6 +1228,15 @@ def process_includes( processed_content_part = re.sub(r'\s+', ' ', processed_content_part).strip() + if self.includes_map_enable: + if donor_md_path: + if not self._exist_in_includes_map(self.includes_map, recipient_md_path): + self.includes_map.append({ 'file': recipient_md_path, "includes": []}) + + for i, f in enumerate(self.includes_map): + if f['file'] == recipient_md_path: + self.includes_map[i]['includes'].append(donor_md_path) + else: processed_content_part = content_part @@ -1179,7 +1272,7 @@ def _get_source_files_extensions(self) -> list: return source_files_extensions def apply(self): - + self.logger.info('Applying preprocessor') # Cleaning up downloads because the content of remote source may have modified @@ -1202,4 +1295,12 @@ def apply(self): with open(source_file_path, 'w', encoding='utf8') as processed_file: processed_file.write(processed_content) + # Write includes map + if self.includes_map_enable: + output = f'{self.working_dir}/static/includes_map.json' + Path(f'{self.working_dir}/static/').mkdir(parents=True, exist_ok=True) + with open(f'{self.working_dir}/static/includes_map.json', 'w', encoding='utf8') as f: + dump(self.includes_map, f) + self.logger.debug(f'includes_map write to {output}') + self.logger.info('Preprocessor applied') diff --git a/setup.py b/setup.py index d518e3e..7a10d0e 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ description=SHORT_DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type='text/markdown', - version='1.1.17', + version='1.1.18', author='Konstantin Molchanov', author_email='moigagoo@live.com', url='https://github.com/foliant-docs/foliantcontrib.includes', diff --git a/test/test_includes.py b/test/test_includes.py index d5638ce..049de10 100644 --- a/test/test_includes.py +++ b/test/test_includes.py @@ -243,3 +243,21 @@ def test_extensions(self): 'index.j2': '# My title\n\nIncluded content', 'sub/sub.md': 'Included content' } + + def test_includes_map(self): + self.ptf.options = {'includes_map': True } + input_map = { + 'index.md': '# My title\n\n\n\n', + 'sub/sub-1.md': 'Included content 1', + 'sub/sub-2.md': 'Included content 2' + } + expected_map = { + 'index.md': '# My title\n\nIncluded content 1\n\nIncluded content 2', + 'static/includes_map.json': "[{\"file\": \"__src__/index.md\", \"includes\": [\"__src__/sub/sub-1.md\", \"__src__/sub/sub-2.md\"]}]", + 'sub/sub-1.md': 'Included content 1', + 'sub/sub-2.md': 'Included content 2' + } + self.ptf.test_preprocessor( + input_mapping=input_map, + expected_mapping=expected_map, + )