Skip to content

Commit

Permalink
refactor: simplify code + readme update
Browse files Browse the repository at this point in the history
  • Loading branch information
rkaminsk committed Jan 20, 2025
1 parent 9b32d61 commit 92d39b2
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 23 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ documentation][bibtex].
> - `Manuel {Ojeda Aciego}` -> `M. {Ojeda Aciego}`
> - `John von Neumann` -> `J. von Neumann`
> - `Allen {V}an Gelder` -> `A. {V}an Gelder`
> - `Luis Fari{\~{n}}as del Cerro` -> `L. {Fari{\~{n}}as del Cerro}`
> - `Luis Fari{\~n}as del Cerro` -> `L. {Fari{\~n}as del Cerro}`
> - `Marcello D'Agostino` -> `M. {D}'\relax Agostino`
In the `title` field, **enclose capital letters** (other than the first letter)
Expand Down Expand Up @@ -141,9 +141,9 @@ Use the **strings** defined in [krr.bib] for journal names.
> example: `@string{lncs = {Lecture Notes in Computer Science}}` for use in
> BibTex entries as `series = lncs`
Use LaTeX commands for **special characters** in all fields putting braces around the command.
Use LaTeX commands for **special characters** in all fields putting (a single pair of) braces around the command.
> example: `J. P{\"a}tynen` or `J. P{\"{a}}tynen` but neither `J. Pätynen`, `J. P\"atynen`, `J. P\"{a}tynen`, nor `J. P{{\"a}}tynen`.
> example: `J. P{\"a}tynen` but neither `J. P{\"{a}}tynen`, `J. Pätynen`, `J. P\"atynen`, `J. P\"{a}tynen`, nor `J. P{{\"a}}tynen`.
Use `-` rather than `--` for **hyphens** in the pages, volume, and number fields.
Don’t terminate field contents with `.`.
Expand Down
40 changes: 20 additions & 20 deletions bibfmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ def check_min_version():
Ensure that a new enough version of python and bibtexparser is used.
"""
if sys.version_info < (3, 10):
raise SystemExit("The script requires at least python version 3.10.")
sys.exit("The script requires at least python version 3.10.")
vers = bp.__version__.split(".")
if (int(vers[0]), int(vers[1])) < (1, 2):
raise SystemExit("The script requires at least bibtexparser version 1.2.")
sys.exit("The script requires at least bibtexparser version 1.2.")


def is_ascii(x):
Expand All @@ -41,15 +41,6 @@ def is_ascii(x):
return False


# Map from unicode symbols to latex expressions.
#
# The bibtexparser.latexenc module also maps some ascii characters to unicode
# symbols. Such characters are ignored in the map.
UNICODE_TO_LATEX = {
key: value for key, value in unicode_to_latex_map.items() if not is_ascii(key)
}


def apply_on_expression(x, f):
"""
Apply the function f for converting strings to bibtex expressions as
Expand All @@ -62,23 +53,34 @@ def apply_on_expression(x, f):
return x


# Map from unicode symbols to latex expressions.
#
# The bibtexparser.latexenc module also maps some ascii characters to unicode
# symbols. Such characters are ignored in the map.
UNICODE_TO_LATEX = {
key: value for key, value in unicode_to_latex_map.items() if not is_ascii(key)
}
ACCENTS = "".join(re.escape(k) for k in """ = ~ ^ . " ' """.split())
WHITESPACE_RE = re.compile(r"\s+")
ACCENTS_RE = re.compile(r"\{\\([" + ACCENTS + r"])\{([a-zA-Z])\}\}")


def cleanup_expression(x):
"""
Convert the given string containing unicode symbols into a string with
latex escapes only.
Convert the given string containing unicode symbols into a single line
string with latex escapes only.
"""
ret = []
for char in x:
if char in (" ", "{", "}"):
ret.append(char)
else:
ret.append(UNICODE_TO_LATEX.get(char, char))
return "".join(ret)


ACCENTS = "".join(re.escape(k) for k in """ = ~ ^ . " ' """.split())
WHITESPACE_RE = re.compile(r"\s+")
ACCENTS_RE = re.compile(r"\{\\([" + ACCENTS + r"])\{([a-zA-Z])\}\}")
res = "".join(ret)
res = WHITESPACE_RE.sub(" ", res)
res = ACCENTS_RE.sub(r"{\\\1\2}", x)
return res


def cleanup_record(x):
Expand All @@ -89,8 +91,6 @@ def cleanup_record(x):
if val in ("ID",):
continue
x[val] = apply_on_expression(x[val], cleanup_expression)
x[val] = apply_on_expression(x[val], lambda x: WHITESPACE_RE.sub(" ", x))
x[val] = apply_on_expression(x[val], lambda x: ACCENTS_RE.sub(r"{\\\1\2}", x))
if val.lower() == "pages":
x[val] = x[val].replace("--", "-")
return x
Expand Down

0 comments on commit 92d39b2

Please sign in to comment.