From 95e326918875e6d0432dac6f25156da48d6bdc20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20D=C3=B6rfelt?= Date: Sat, 5 Oct 2024 14:28:55 +0200 Subject: [PATCH] don't fail if there is no table body --- src/formats/synology_note_station.py | 10 ++++++++-- test/data | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/formats/synology_note_station.py b/src/formats/synology_note_station.py index 92ae7fa7..6bb31f0b 100644 --- a/src/formats/synology_note_station.py +++ b/src/formats/synology_note_station.py @@ -32,15 +32,21 @@ def streamline_html(content_html: str) -> str: # another hack: make the first row of a table to the header soup = BeautifulSoup(content_html, "html.parser") for table in soup.find_all("table"): + # Remove all divs, since they cause pandoc to fail converting the table. + # https://stackoverflow.com/a/32064299/7410886 + for div in table.find_all("div"): + div.unwrap() + for row_index, row in enumerate(table.find_all("tr")): for td in row.find_all("td"): # tables seem to be headerless always # make first row to header if row_index == 0: td.name = "th" + # remove "tbody" - body = table.find("tbody") - body.unwrap() + if (body := table.find("tbody")) is not None: + body.unwrap() return str(soup) diff --git a/test/data b/test/data index 223a8a61..3e4a2d6b 160000 --- a/test/data +++ b/test/data @@ -1 +1 @@ -Subproject commit 223a8a6124f42d1a77a2ee0b18f72e2beaa4011f +Subproject commit 3e4a2d6be7076aab13e2d234f99524d1e6e7ec68