Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove strong from titles #535

Merged
merged 5 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class ArticleApiProperties extends BaseProps {
ResourceType.H5P.toString -> H5PAddress
)

def InlineHtmlTags: Set[String] = Set("code", "em", "span", "strong", "sub", "sup")
def InlineHtmlTags: Set[String] = Set("code", "em", "span", "sub", "sup")
def IntroductionHtmlTags: Set[String] = InlineHtmlTags ++ Set("br", "p")
gunnarvelle marked this conversation as resolved.
Show resolved Hide resolved

private def H5PAddress: String = propOrElse(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Part of NDLA article-api
* Copyright (C) 2024 NDLA
*
* See LICENSE
*/

package no.ndla.articleapi.db.migration

import io.circe.parser
import io.circe.syntax.EncoderOps
import no.ndla.common.model.domain.{ArticleContent, Title}
import no.ndla.common.model.domain.article.Article
import org.flywaydb.core.api.migration.{BaseJavaMigration, Context}
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import org.jsoup.nodes.Entities.EscapeMode
import org.postgresql.util.PGobject
import scalikejdbc.{DB, DBSession, *}

class V54__RemoveStrongFromTitle extends BaseJavaMigration {
private def countAllRows(implicit session: DBSession): Option[Long] = {
sql"select count(*) from contentdata where document is not NULL"
.map(rs => rs.long("count"))
.single()
}

private def allRows(offset: Long)(implicit session: DBSession): Seq[(Long, String)] = {
sql"select id, document, article_id from contentdata where document is not null order by id limit 1000 offset $offset"
.map(rs => {
(rs.long("id"), rs.string("document"))
})
.list()
}

private def updateRow(document: String, id: Long)(implicit session: DBSession): Int = {
val dataObject = new PGobject()
dataObject.setType("jsonb")
dataObject.setValue(document)

sql"update contentdata set document = $dataObject where id = $id"
.update()
}

override def migrate(context: Context): Unit = DB(context.getConnection)
.autoClose(false)
.withinTx { session => migrateRows(session) }

private def migrateRows(implicit session: DBSession): Unit = {
val count = countAllRows.get
var numPagesLeft = (count / 1000) + 1
var offset = 0L

while (numPagesLeft > 0) {
allRows(offset * 1000).map { case (id, document) =>
updateRow(convertArticleUpdate(document), id)
}: Unit
numPagesLeft -= 1
offset += 1
}
}

private def stringToJsoupDocument(htmlString: String): Element = {
val document = Jsoup.parseBodyFragment(htmlString)
document.outputSettings().escapeMode(EscapeMode.xhtml).prettyPrint(false)
document.select("body").first()
}

private def jsoupDocumentToString(element: Element): String = {
element.select("body").html()
}

def convertTitle(t: Title): Title = {
val doc = stringToJsoupDocument(t.title)

doc
.select("strong")
.forEach(strong => {
strong.unwrap(): Unit
})
t.copy(title = jsoupDocumentToString(doc))
}

def convertContent(c: ArticleContent): ArticleContent = {
val doc = stringToJsoupDocument(c.content)

doc
.select("h2, h3, h4")
.forEach(header => {
header.select("strong").forEach(strong => strong.unwrap(): Unit)
})
c.copy(content = jsoupDocumentToString(doc))
}

private[migration] def convertArticleUpdate(document: String): String = {
val oldArticle = parser.parse(document).flatMap(_.as[Article]).toTry.get
val titles = oldArticle.title.map(t => convertTitle(t))
val contents = oldArticle.content.map(c => convertContent(c))
val newArticle = oldArticle.copy(title = titles, content = contents)
newArticle.asJson.noSpaces
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@ import no.ndla.articleapi.repository.ArticleRepository
import no.ndla.common.errors.{ValidationException, ValidationMessage}
import no.ndla.common.model.NDLADate
import no.ndla.common.model.domain.article.{Article, Copyright}
import no.ndla.common.model.domain._
import no.ndla.common.model.domain.*
import no.ndla.language.model.{Iso639, LanguageField}
import no.ndla.mapping.License.getLicense
import no.ndla.validation.HtmlTagRules.{allLegalTags, stringToJsoupDocument}
import no.ndla.validation.SlugValidator.validateSlug
import no.ndla.validation.TextValidator

import scala.jdk.CollectionConverters._
import scala.jdk.CollectionConverters.*
import scala.util.{Failure, Success, Try}

trait ContentValidator {
this: ArticleRepository with Props =>
this: ArticleRepository & Props =>
val contentValidator: ContentValidator

class ContentValidator() {
class ContentValidator {
private val inlineHtmlTags = props.InlineHtmlTags
private val introductionHtmlTags = props.IntroductionHtmlTags

Expand Down Expand Up @@ -73,7 +73,7 @@ trait ContentValidator {
}
}

private def validateNonEmpty(field: String, values: Seq[LanguageField[_]]): Option[ValidationMessage] = {
private def validateNonEmpty(field: String, values: Seq[LanguageField[?]]): Option[ValidationMessage] = {
if (values.isEmpty || values.forall(_.isEmpty)) {
Some(ValidationMessage(field, "Field must contain at least one entry"))
} else
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Part of NDLA article-api
* Copyright (C) 2024 NDLA
*
* See LICENSE
*/

package no.ndla.articleapi.db.migration

import no.ndla.articleapi.{TestEnvironment, UnitSuite}
import no.ndla.common.model.domain.{ArticleContent, Title}

class V54__RemoveStrongFromTitleTest extends UnitSuite with TestEnvironment {
test("That strong are removed from title") {
val oldTitle = Title("This is a <strong>title</strong>", language = "nb")
val expectedTitle = Title("This is a title", language = "nb")

val migration = new V54__RemoveStrongFromTitle
val result = migration.convertTitle(oldTitle)
result should be(expectedTitle)
}

test("That nested strong are removed from title") {
val oldTitle = Title("This is a <strong><em>title</em></strong>", language = "nb")
val expectedTitle = Title("This is a <em>title</em>", language = "nb")

val migration = new V54__RemoveStrongFromTitle
val result = migration.convertTitle(oldTitle)
result should be(expectedTitle)
}

test("That strong are removed from title in article") {
val oldContent =
ArticleContent("<section><h2>This is a <strong>title</strong></h2><p>Some text</p></section>", language = "nb")
val expectedContent = ArticleContent("<section><h2>This is a title</h2><p>Some text</p></section>", language = "nb")

val migration = new V54__RemoveStrongFromTitle
val result = migration.convertContent(oldContent)
result should be(expectedContent)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class DraftApiProperties extends BaseProps with StrictLogging {
"image-api" -> s"http://$ImageApiHost/intern"
)

def InlineHtmlTags: Set[String] = Set("code", "em", "span", "strong", "sub", "sup")
def InlineHtmlTags: Set[String] = Set("code", "em", "span", "sub", "sup")
def IntroductionHtmlTags: Set[String] = InlineHtmlTags ++ Set("br", "p")

private def BrightcoveAccountId: String = prop("NDLA_BRIGHTCOVE_ACCOUNT_ID")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Part of NDLA draft-api
* Copyright (C) 2024 NDLA
*
* See LICENSE
*/

package no.ndla.draftapi.db.migration

import io.circe.parser
import io.circe.syntax.EncoderOps
import no.ndla.common.model.domain.{ArticleContent, Title}
import no.ndla.common.model.domain.draft.Draft
import org.flywaydb.core.api.migration.{BaseJavaMigration, Context}
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import org.jsoup.nodes.Entities.EscapeMode
import org.postgresql.util.PGobject
import scalikejdbc.{DB, DBSession, *}

class V65__RemoveStrongFromTitle extends BaseJavaMigration {
private def countAllRows(implicit session: DBSession): Option[Long] = {
sql"select count(*) from articledata where document is not NULL"
.map(rs => rs.long("count"))
.single()
}

private def allRows(offset: Long)(implicit session: DBSession): Seq[(Long, String)] = {
sql"select id, document, article_id from articledata where document is not null order by id limit 1000 offset $offset"
.map(rs => {
(rs.long("id"), rs.string("document"))
})
.list()
}

private def updateRow(document: String, id: Long)(implicit session: DBSession): Int = {
val dataObject = new PGobject()
dataObject.setType("jsonb")
dataObject.setValue(document)

sql"update articledata set document = $dataObject where id = $id"
.update()
}

override def migrate(context: Context): Unit = DB(context.getConnection)
.autoClose(false)
.withinTx { session => migrateRows(session) }

private def migrateRows(implicit session: DBSession): Unit = {
val count = countAllRows.get
var numPagesLeft = (count / 1000) + 1
var offset = 0L

while (numPagesLeft > 0) {
allRows(offset * 1000).map { case (id, document) =>
updateRow(convertArticleUpdate(document), id)
}: Unit
numPagesLeft -= 1
offset += 1
}
}

private def stringToJsoupDocument(htmlString: String): Element = {
val document = Jsoup.parseBodyFragment(htmlString)
document.outputSettings().escapeMode(EscapeMode.xhtml).prettyPrint(false)
document.select("body").first()
}

private def jsoupDocumentToString(element: Element): String = {
element.select("body").html()
}

def convertTitle(t: Title): Title = {
val doc = stringToJsoupDocument(t.title)

doc
.select("strong")
.forEach(strong => {
strong.unwrap(): Unit
})
t.copy(title = jsoupDocumentToString(doc))
}

def convertContent(c: ArticleContent): ArticleContent = {
val doc = stringToJsoupDocument(c.content)

doc
.select("h2, h3, h4")
.forEach(header => {
header.select("strong").forEach(strong => strong.unwrap(): Unit)
})
c.copy(content = jsoupDocumentToString(doc))
}

private[migration] def convertArticleUpdate(document: String): String = {
val oldArticle = parser.parse(document).flatMap(_.as[Draft]).toTry.get

val titles = oldArticle.title.map(t => convertTitle(t))
val contents = oldArticle.content.map(c => convertContent(c))

val newArticle = oldArticle.copy(title = titles, content = contents)
newArticle.asJson.noSpaces
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ package no.ndla.draftapi.validation

import no.ndla.common.errors.{ValidationException, ValidationMessage}
import no.ndla.common.model.NDLADate
import no.ndla.common.model.domain._
import no.ndla.common.model.domain.draft._
import no.ndla.common.model.domain.*
import no.ndla.common.model.domain.draft.*
import no.ndla.draftapi.Props
import no.ndla.draftapi.integration.ArticleApiClient
import no.ndla.draftapi.model.api.{ContentId, NotFoundException, UpdatedArticle}
Expand All @@ -21,18 +21,18 @@ import no.ndla.mapping.License.getLicense
import no.ndla.network.tapir.auth.TokenUser
import no.ndla.validation.HtmlTagRules.{allLegalTags, stringToJsoupDocument}
import no.ndla.validation.SlugValidator.validateSlug
import no.ndla.validation._
import no.ndla.validation.*
import scalikejdbc.ReadOnlyAutoSession

import scala.jdk.CollectionConverters._
import scala.jdk.CollectionConverters.*
import scala.util.{Failure, Success, Try}

trait ContentValidator {
this: DraftRepository with ConverterService with ArticleApiClient with Props =>
this: DraftRepository & ConverterService & ArticleApiClient & Props =>
val contentValidator: ContentValidator
val importValidator: ContentValidator

class ContentValidator() {
class ContentValidator {
import props.{BrightcoveVideoScriptUrl, H5PResizerScriptUrl, NRKVideoScriptUrl}
private val inlineHtmlTags = props.InlineHtmlTags
private val introductionHtmlTags = props.IntroductionHtmlTags
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Part of NDLA draft-api
* Copyright (C) 2024 NDLA
*
* See LICENSE
*/

package no.ndla.draftapi.db.migration

import no.ndla.common.model.domain.{ArticleContent, Title}
import no.ndla.draftapi.{TestEnvironment, UnitSuite}

class V65__RemoveStrongFromTitleTest extends UnitSuite with TestEnvironment {
test("That strong are removed from title") {
val oldTitle = Title("This is a <strong>title</strong>", language = "nb")
val expectedTitle = Title("This is a title", language = "nb")

val migration = new V65__RemoveStrongFromTitle
val result = migration.convertTitle(oldTitle)
result should be(expectedTitle)
}

test("That nested strong are removed from title") {
val oldTitle = Title("This is a <strong><em>title</em></strong>", language = "nb")
val expectedTitle = Title("This is a <em>title</em>", language = "nb")

val migration = new V65__RemoveStrongFromTitle
val result = migration.convertTitle(oldTitle)
result should be(expectedTitle)
}

test("That strong are removed from title in article") {
val oldContent =
ArticleContent("<section><h2>This is a <strong>title</strong></h2><p>Some text</p></section>", language = "nb")
val expectedContent = ArticleContent("<section><h2>This is a title</h2><p>Some text</p></section>", language = "nb")

val migration = new V65__RemoveStrongFromTitle
val result = migration.convertContent(oldContent)
result should be(expectedContent)
}
}
Loading