Skip to content

Commit

Permalink
Skip other chars from TOCs
Browse files Browse the repository at this point in the history
  • Loading branch information
ferblape committed Nov 19, 2024
1 parent a24899f commit d18785f
Showing 1 changed file with 9 additions and 11 deletions.
20 changes: 9 additions & 11 deletions lib/section_extractor/toc_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ class TocParser
ROMAN_SERIES = %w[I II III IV V VI VII VIII IX X XI XII XIII XIV XV].freeze
ALPHA_SERIES = ("a".."z").to_a
MAX_TOC_ITEM_SIZE = 70
RE_NUMERIC = /\n(\d+(?:\.\d+)*\.?\-?\s+[^\n]+)\n/m
RE_ROMAN = /\n((?:IX|IV|V?I{1,3}|VI{1,3})\.?\-?\s+[^\n]+)\n/m
RE_ALPHA = /\n([a-zA-Z][\).-]+\s+[^\n]+)\n/m
RE_SPECIAL = /\n((?:ANEXO|CAPITULO|CAPÍTULO)\s+(?:IX|IV|V?I{1,3}|VI{1,3}|X{1,3}V?I{0,3})[.-]*\s+[^\n]+)\n/mi

attr_reader :content

Expand All @@ -14,24 +18,18 @@ def initialize(content)

def call
tocs = []
re_numeric = /\n(\d+(?:\.\d+)*\.?\-?\s+[^\n]+)\n/m
re_roman = /\n((?:IX|IV|V?I{1,3}|VI{1,3})\.?\-?\s+[^\n]+)\n/m
re_alpha = /\n([a-zA-Z][\).-]+\s+[^\n]+)\n/m
re_special = /\n((?:ANEXO|CAPITULO|CAPÍTULO)\s+(?:IX|IV|V?I{1,3}|VI{1,3}|X{1,3}V?I{0,3})[.-]*\s+[^\n]+)\n/mi

[re_numeric, re_roman, re_alpha, re_special].map do |re|
[RE_NUMERIC, RE_ROMAN, RE_ALPHA, RE_SPECIAL].map do |re|
toc = Toc.new
content.scan(re).each do |match|
toc_item_title = match.first.strip.gsub(/\n/, "").gsub(/\s+/, " ")
toc_item_title = toc_item_title.split(":").first.strip if toc_item_title.include?(":")
# Skip the TOC item if it has more than 5 dots
# (this happens in the docs with a TOC)
next if toc_item_title.include?(".....")
if toc_item_title.size > MAX_TOC_ITEM_SIZE
toc_item_title = toc_item_title.slice(0, MAX_TOC_ITEM_SIZE)
end
next if toc_item_title.include?(".....") || toc_item_title.include?("_____")

toc_item_title = toc_item_title.slice(0, MAX_TOC_ITEM_SIZE) if toc_item_title.size > MAX_TOC_ITEM_SIZE

puts " - Adding TOC item: #{toc_item_title}"
# puts " - Adding TOC item: #{toc_item_title}"
toc.add_item(toc_item_title, content.rindex(match.first))
end

Expand Down

0 comments on commit d18785f

Please sign in to comment.