diff --git a/.gitignore b/.gitignore index 9a75db0..0a89534 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,7 @@ *.pdf example/merged_entry.tex + +__pycache__/ +dist/ +texpack.egg-info/ diff --git a/README.md b/README.md index cc1e66e..e38fff0 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,35 @@ -# latexmerger - Merge .tex Files into Single .tex File +[![PyPI version](https://badge.fury.io/py/texpack.svg)](https://badge.fury.io/py/texpack) + +# texpack +Pack .tex files into a single .tex file. +This may be useful for arXiv, etc. + +## Background +When creating LaTeX documents, it's often convenient to split the work into smaller .tex files using commands like `\input` or `\subfile`. +However, platforms like arXiv require submissions as a single .tex file. +This package, texpack, addresses this need by packing multiple .tex files connected via `\input` or `\subfile` into a single .tex file. +The package name is inspired by the JavaScript module [webpack](https://github.com/webpack/webpack), which serves a similar purpose. + +## Usage +Install the package if you don't have yet. +```bash +python3 -m pip install texpack +``` +Move to your LaTeX project directory. +```bash +cd your/LaTeX/project +``` +Call our module to execute. In the 1st argument, apply the root tex file of your project. +```bash +python3 -m texpack root-texfile.tex +``` +You'll find a new tex file in the same directory as your root tex file. +By default, the file name of the generated file is in the form "texpack-" added to the beginning of the file you have specified. +In the example above, the file "texpack-root-texfile.tex" will be generated. + +## Custom output file name +You can specify the custom output file name by adding the `-o` argument. +```bash +python3 -m texpack root-texfile.tex -o main.tex +``` +In the example above, the file "main.tex" will be generated. diff --git a/mergelatex.py b/mergelatex.py deleted file mode 100644 index 9d6e7e3..0000000 --- a/mergelatex.py +++ /dev/null @@ -1,139 +0,0 @@ -from typing import Tuple, List, Optional -from argparse import ArgumentParser -from pathlib import Path -import re -from abc import ABC, abstractmethod - -entry_parent: Path = None - - -def parse_args() -> Tuple[Path, Path, Path]: - parser = ArgumentParser( - prog="LaTeX Merger", description="Merge LaTeX Files into Single .tex File" - ) - parser.add_argument("entry", type=Path, help="Entry .tex File Path") - parser.add_argument( - "--output", - "-o", - required=False, - type=str, - help="Output .tex File Name", - default=None, - ) - parsed = parser.parse_args() - entry = Path(parsed.entry) - entry_fn = entry.name - parent = entry.parent - output = parsed.output or f"merged_{entry_fn}" - output_fp = parent / output - return entry, parent, output_fp - - -def read_text(f: Path) -> str: - return f.read_text("utf-8") - - -class BodyExtractor(ABC): - @abstractmethod - def matches(self, line: str) -> bool: - raise NotImplementedError() - - @abstractmethod - def extract(self, parent: Path, line: str) -> Tuple[List[str], Path]: - raise NotImplementedError() - - -class InputExtractor(BodyExtractor): - def __init__(self) -> None: - self.pat = re.compile(r"\\input\{(.*)\}") - - def matches(self, line: str) -> bool: - return bool(self.pat.match(line)) - - def extract(self, parent: Path, line: str) -> Tuple[List[str], Path]: - mat = self.pat.match(line) - stem = mat.group(1) - # The entry file's directory - based relative path is required for \input. - target = entry_parent / f"{stem}.tex" - body = read_text(target) - body_lines = body.split("\n") - return body_lines, target - - -class SubfileExtractor(BodyExtractor): - def __init__(self) -> None: - self.pat = re.compile(r"\\subfile\{(.*)\}") - - def matches(self, line: str) -> bool: - return bool(self.pat.match(line)) - - def extract(self, parent: Path, line: str) -> Tuple[List[str], Path]: - mat = self.pat.match(line) - stem = mat.group(1) - target = parent / f"{stem}.tex" - text = read_text(target) - lines = text.split("\n") - body_lines = [] - - in_body = False - for line in lines: - if line.startswith("\\begin{document}"): - in_body = True - elif line.endswith("\\end{document}"): - in_body = False - elif in_body: - body_lines.append(line) - - return body_lines, target - - -extractors: List[BodyExtractor] = [InputExtractor(), SubfileExtractor()] - - -def extract_ifany(parent: Path, line: str) -> Optional[Tuple[List[str], Path]]: - for extractor in extractors: - if extractor.matches(line): - return extractor.extract(parent, line) - return None - - -def expand(parent: Path, lines: List[str], depth: int = 0) -> List[str]: - expanded_lines = [] - - for line in lines: - extracted = extract_ifany(parent, line) - if extracted is not None: - body_lines, target = extracted - arrow = ">" * (depth + 1) - barrow = "<" * (depth + 1) - target_relation = str(target.relative_to(entry_parent)) - expanded_lines.append(f"% {arrow} {target_relation} {arrow} : LaTeX Merger") - expanded_lines += expand(target.parent, body_lines, depth=depth + 1) - expanded_lines.append( - f"% {barrow} {target_relation} {barrow} : LaTeX Merger" - ) - else: - expanded_lines.append(line) - return expanded_lines - - -def main(): - entry, parent, output_fp = parse_args() - if output_fp.exists(): - response = input( - f"The output file {output_fp.name} already exists. OVERWRITE THIS? ARE YOU SURE? [Y/other]" - ) - if response!="Y": - print("Aborted.") - exit(-1) - - global entry_parent - entry_parent = parent - entry_lines = read_text(entry).split("\n") - result = expand(parent, entry_lines) - text = "\n".join(result) - output_fp.write_text(text) - print("Merged File Written.") - -if __name__ == "__main__": - main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..2b16b57 --- /dev/null +++ b/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup +from pathlib import Path +from texpack import __version__ +from texpack.utils import read_text + +name = "texpack" + +setup( + name=name, + packages=[name], + version=__version__, + license="MIT", + install_requires="", + tests_require="", + author="Mya-Mya", + url="https://github.com/Mya-Mya/texpack", + description="Pack LaTeX Files into Single .tex File", + keywords="LaTeX, tex, academic", + long_description=read_text(Path("./README.md")), + long_description_content_type="text/markdown", + classifiers=[ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Topic :: Text Processing :: Markup :: LaTeX", + "Intended Audience :: Science/Research", + "Topic :: Utilities", + "Topic :: Education", + ], +) diff --git a/texpack/__init__.py b/texpack/__init__.py new file mode 100644 index 0000000..d538f87 --- /dev/null +++ b/texpack/__init__.py @@ -0,0 +1 @@ +__version__ = "1.0.0" \ No newline at end of file diff --git a/texpack/__main__.py b/texpack/__main__.py new file mode 100644 index 0000000..d1f514f --- /dev/null +++ b/texpack/__main__.py @@ -0,0 +1,76 @@ +from typing import Tuple, List, Optional +from argparse import ArgumentParser +from pathlib import Path +from .extractors import * +root: Path = None + + +def parse_args() -> Tuple[Path, Path, Path]: + parser = ArgumentParser( + prog="texpack", description="Pack LaTeX Files into Single .tex File" + ) + parser.add_argument("entry", type=Path, help="Entry .tex File Path") + parser.add_argument( + "--output", + "-o", + required=False, + type=str, + help="Output .tex File Name", + default=None, + ) + parsed = parser.parse_args() + entry = Path(parsed.entry) + entry_fn = entry.name + parent = entry.parent + output = parsed.output or f"texpack-{entry_fn}" + output_fp = parent / output + return entry, parent, output_fp + + + +def extract_ifany(parent: Path, line: str) -> Optional[Tuple[List[str], Path]]: + for extractor in extractors: + if extractor.matches(line): + return extractor.extract(parent, root, line) + return None + + +def expand(parent: Path, lines: List[str], depth: int = 0) -> List[str]: + expanded_lines = [] + + for line in lines: + extracted = extract_ifany(parent, line) + if extracted is not None: + body_lines, target = extracted + arrow = ">" * (depth + 1) + barrow = "<" * (depth + 1) + target_relation = str(target.relative_to(root)) + expanded_lines.append(f"% {arrow} {target_relation} {arrow} : texpack") + expanded_lines += expand(target.parent, body_lines, depth=depth + 1) + expanded_lines.append(f"% {barrow} {target_relation} {barrow} : texpack") + else: + expanded_lines.append(line) + return expanded_lines + + +def main(): + entry, parent, output_fp = parse_args() + if output_fp.exists(): + response = input( + f"The output file {output_fp.name} already exists. OVERWRITE THIS? ARE YOU SURE? [Y/other]" + ) + if response != "Y": + print("Aborted.") + exit(-1) + + global root + root = parent + entry_lines = read_text(entry).split("\n") + result = expand(parent, entry_lines) + text = "\n".join(result) + output_fp.write_text(text) + print(f"Written to {str(output_fp)}.") + + +if __name__ == "__main__": + main() diff --git a/texpack/extractors.py b/texpack/extractors.py new file mode 100644 index 0000000..f62e799 --- /dev/null +++ b/texpack/extractors.py @@ -0,0 +1,60 @@ +from .utils import * +from typing import Tuple, List, Optional +import re +from abc import ABC, abstractmethod + +class BodyExtractor(ABC): + @abstractmethod + def matches(self, line: str) -> bool: + raise NotImplementedError() + + @abstractmethod + def extract(self, parent: Path, root: Path, line: str) -> Tuple[List[str], Path]: + raise NotImplementedError() + + +class InputExtractor(BodyExtractor): + def __init__(self) -> None: + self.pat = re.compile(r"\\input\{(.*)\}") + + def matches(self, line: str) -> bool: + return bool(self.pat.match(line)) + + def extract(self, parent: Path, root: Path, line: str) -> Tuple[List[str], Path]: + mat = self.pat.match(line) + stem = mat.group(1) + # The entry file's directory - based relative path is required for \input. + target = root / f"{stem}.tex" + body = read_text(target) + body_lines = body.split("\n") + return body_lines, target + + +class SubfileExtractor(BodyExtractor): + def __init__(self) -> None: + self.pat = re.compile(r"\\subfile\{(.*)\}") + + def matches(self, line: str) -> bool: + return bool(self.pat.match(line)) + + def extract(self, parent: Path, root: Path, line: str) -> Tuple[List[str], Path]: + mat = self.pat.match(line) + stem = mat.group(1) + target = parent / f"{stem}.tex" + text = read_text(target) + lines = text.split("\n") + body_lines = [] + + in_body = False + for line in lines: + if line.startswith("\\begin{document}"): + in_body = True + elif line.endswith("\\end{document}"): + in_body = False + elif in_body: + body_lines.append(line) + + return body_lines, target + + +extractors: List[BodyExtractor] = [InputExtractor(), SubfileExtractor()] diff --git a/texpack/utils.py b/texpack/utils.py new file mode 100644 index 0000000..eac5620 --- /dev/null +++ b/texpack/utils.py @@ -0,0 +1,5 @@ +from pathlib import Path + + +def read_text(f: Path) -> str: + return f.read_text("utf-8")