Skip to content

Commit

Permalink
Exclude contact cards (and other non-editable blocks) from HIX
Browse files Browse the repository at this point in the history
  • Loading branch information
charludo committed Jan 31, 2025
1 parent 822721e commit edd67bd
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion integreat_cms/cms/views/utils/hix.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from django.http import JsonResponse
from django.views.decorators.http import require_POST
from lxml.etree import LxmlError
from lxml.html import fromstring
from lxml.html import fromstring, tostring

from integreat_cms.cms.models.pages.page_translation import PageTranslation
from integreat_cms.cms.models.regions.region import Region
Expand Down Expand Up @@ -54,12 +54,19 @@ def lookup_hix_score_helper(text: str) -> TextlabResult:
"""
try:
html = fromstring(text)

# remove divs which the authors have no control over (e.g. contact cards)
for div in html.xpath('//div[@contenteditable="false"]'):
div.getparent().remove(div)

text_content = html.text_content()
if not text_content.strip():
return {
"score": None,
"feedback": [],
}

text = tostring(html, encoding="unicode")
except LxmlError:
pass

Expand Down

0 comments on commit edd67bd

Please sign in to comment.