Skip to content

Commit

Permalink
normalize accents and white spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgefandinno committed Jan 17, 2025
1 parent 1f7390d commit 88eed6b
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 100 deletions.
8 changes: 8 additions & 0 deletions bibfmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Script to cleanup bibtex records and pretty print them.
"""

import re
import sys
from io import StringIO
from argparse import ArgumentParser
Expand Down Expand Up @@ -73,6 +74,11 @@ def cleanup_expression(x):
return "".join(ret)


ACCENTS = "".join(re.escape(k) for k in """ = ~ ^ . " ' """.split())
WHITESPACE_RE = re.compile(r"\s+")
ACCENTS_RE = re.compile(r"\{\\([" + ACCENTS + r"])\{([a-zA-Z])\}\}")


def cleanup_record(x):
"""
Cleanup a record as returned by the bibtexparser module.
Expand All @@ -81,6 +87,8 @@ def cleanup_record(x):
if val in ("ID",):
continue
x[val] = apply_on_expression(x[val], cleanup_expression)
x[val] = apply_on_expression(x[val], lambda x: WHITESPACE_RE.sub(" ", x))
x[val] = apply_on_expression(x[val], lambda x: ACCENTS_RE.sub(r"{\\\1\2}", x))
if val.lower() == "pages":
x[val] = x[val].replace("--", "-")
return x
Expand Down
Loading

0 comments on commit 88eed6b

Please sign in to comment.