Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
ferblape committed Nov 19, 2024
1 parent d4d7b24 commit 69f184d
Show file tree
Hide file tree
Showing 3 changed files with 12,175 additions and 8 deletions.
21 changes: 14 additions & 7 deletions lib/section_extractor/document_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,17 @@ def call

private

def extract_sections(content, tocs)
def extract_sections(content, tocs) # rubocop:disable Metrics/AbcSize
sections = []

tocs.each do |toc|
toc_items_to_skip = []
# toc_items_to_skip = []

0.upto(toc.toc_items.size - 1) do |index|
section = Section.new(content, toc.toc_items[index], toc.toc_items[index + 1])
sections << section unless sections.find{ |s| s.raw_title == section.raw_title && s.positions&.first == section.positions&.first }
# TODO: review
sections << section unless section_exists?(sections, section)

# TODO: re-activate when we use the content again
# Skip empty sections, because they are not real sections, but just sentences that start with
# toc item title format
# if section.content.empty?
Expand All @@ -36,14 +37,20 @@ def extract_sections(content, tocs)
# end
end

puts "- Skipping #{toc_items_to_skip.join(", ")} empty sections" if toc_items_to_skip.any?
toc_items_to_skip.each { |index| toc.toc_items.delete_at(index) }
# TODO: re-activate when we use the content again
# puts "- Skipping #{toc_items_to_skip.join(", ")} empty sections" if toc_items_to_skip.any?
# toc_items_to_skip.each { |index| toc.toc_items.delete_at(index) }
end
sections.sort_by{ |s| s.positions.first }

sections.sort_by { |s| s.positions.first }
end

def extract_tocs(content)
SectionExtractor::TocParser.new(content).call
end

def section_exists?(sections, section)
sections.find { |s| s.raw_title == section.raw_title && s.positions&.first == section.positions&.first }
end
end
end
2 changes: 1 addition & 1 deletion lib/section_extractor/section.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def initialize(document_content, toc_item, next_toc_item)
end

def inspect
# Restore
# TODO: restore content
# "#<Section title: #{@raw_title}, content: #{@content.slice(0, 50)}>"
"#<Section title: #{@raw_title} positions: #{@positions}>"
end
Expand Down
Loading

0 comments on commit 69f184d

Please sign in to comment.