-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #535 from NDLANO/strip-strong-from-title
Remove strong from titles
- Loading branch information
Showing
8 changed files
with
304 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
102 changes: 102 additions & 0 deletions
102
article-api/src/main/scala/no/ndla/articleapi/db/migration/V54__RemoveStrongFromTitle.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/* | ||
* Part of NDLA article-api | ||
* Copyright (C) 2024 NDLA | ||
* | ||
* See LICENSE | ||
*/ | ||
|
||
package no.ndla.articleapi.db.migration | ||
|
||
import io.circe.parser | ||
import io.circe.syntax.EncoderOps | ||
import no.ndla.common.model.domain.{ArticleContent, Title} | ||
import no.ndla.common.model.domain.article.Article | ||
import org.flywaydb.core.api.migration.{BaseJavaMigration, Context} | ||
import org.jsoup.Jsoup | ||
import org.jsoup.nodes.Element | ||
import org.jsoup.nodes.Entities.EscapeMode | ||
import org.postgresql.util.PGobject | ||
import scalikejdbc.{DB, DBSession, *} | ||
|
||
class V54__RemoveStrongFromTitle extends BaseJavaMigration { | ||
private def countAllRows(implicit session: DBSession): Option[Long] = { | ||
sql"select count(*) from contentdata where document is not NULL" | ||
.map(rs => rs.long("count")) | ||
.single() | ||
} | ||
|
||
private def allRows(offset: Long)(implicit session: DBSession): Seq[(Long, String)] = { | ||
sql"select id, document, article_id from contentdata where document is not null order by id limit 1000 offset $offset" | ||
.map(rs => { | ||
(rs.long("id"), rs.string("document")) | ||
}) | ||
.list() | ||
} | ||
|
||
private def updateRow(document: String, id: Long)(implicit session: DBSession): Int = { | ||
val dataObject = new PGobject() | ||
dataObject.setType("jsonb") | ||
dataObject.setValue(document) | ||
|
||
sql"update contentdata set document = $dataObject where id = $id" | ||
.update() | ||
} | ||
|
||
override def migrate(context: Context): Unit = DB(context.getConnection) | ||
.autoClose(false) | ||
.withinTx { session => migrateRows(session) } | ||
|
||
private def migrateRows(implicit session: DBSession): Unit = { | ||
val count = countAllRows.get | ||
var numPagesLeft = (count / 1000) + 1 | ||
var offset = 0L | ||
|
||
while (numPagesLeft > 0) { | ||
allRows(offset * 1000).map { case (id, document) => | ||
updateRow(convertArticleUpdate(document), id) | ||
}: Unit | ||
numPagesLeft -= 1 | ||
offset += 1 | ||
} | ||
} | ||
|
||
private def stringToJsoupDocument(htmlString: String): Element = { | ||
val document = Jsoup.parseBodyFragment(htmlString) | ||
document.outputSettings().escapeMode(EscapeMode.xhtml).prettyPrint(false) | ||
document.select("body").first() | ||
} | ||
|
||
private def jsoupDocumentToString(element: Element): String = { | ||
element.select("body").html() | ||
} | ||
|
||
def convertTitle(t: Title): Title = { | ||
val doc = stringToJsoupDocument(t.title) | ||
|
||
doc | ||
.select("strong") | ||
.forEach(strong => { | ||
strong.unwrap(): Unit | ||
}) | ||
t.copy(title = jsoupDocumentToString(doc)) | ||
} | ||
|
||
def convertContent(c: ArticleContent): ArticleContent = { | ||
val doc = stringToJsoupDocument(c.content) | ||
|
||
doc | ||
.select("h2, h3, h4") | ||
.forEach(header => { | ||
header.select("strong").forEach(strong => strong.unwrap(): Unit) | ||
}) | ||
c.copy(content = jsoupDocumentToString(doc)) | ||
} | ||
|
||
private[migration] def convertArticleUpdate(document: String): String = { | ||
val oldArticle = parser.parse(document).flatMap(_.as[Article]).toTry.get | ||
val titles = oldArticle.title.map(t => convertTitle(t)) | ||
val contents = oldArticle.content.map(c => convertContent(c)) | ||
val newArticle = oldArticle.copy(title = titles, content = contents) | ||
newArticle.asJson.noSpaces | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
...e-api/src/test/scala/no/ndla/articleapi/db/migration/V54__RemoveStrongFromTitleTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Part of NDLA article-api | ||
* Copyright (C) 2024 NDLA | ||
* | ||
* See LICENSE | ||
*/ | ||
|
||
package no.ndla.articleapi.db.migration | ||
|
||
import no.ndla.articleapi.{TestEnvironment, UnitSuite} | ||
import no.ndla.common.model.domain.{ArticleContent, Title} | ||
|
||
class V54__RemoveStrongFromTitleTest extends UnitSuite with TestEnvironment { | ||
test("That strong are removed from title") { | ||
val oldTitle = Title("This is a <strong>title</strong>", language = "nb") | ||
val expectedTitle = Title("This is a title", language = "nb") | ||
|
||
val migration = new V54__RemoveStrongFromTitle | ||
val result = migration.convertTitle(oldTitle) | ||
result should be(expectedTitle) | ||
} | ||
|
||
test("That nested strong are removed from title") { | ||
val oldTitle = Title("This is a <strong><em>title</em></strong>", language = "nb") | ||
val expectedTitle = Title("This is a <em>title</em>", language = "nb") | ||
|
||
val migration = new V54__RemoveStrongFromTitle | ||
val result = migration.convertTitle(oldTitle) | ||
result should be(expectedTitle) | ||
} | ||
|
||
test("That strong are removed from title in article") { | ||
val oldContent = | ||
ArticleContent("<section><h2>This is a <strong>title</strong></h2><p>Some text</p></section>", language = "nb") | ||
val expectedContent = ArticleContent("<section><h2>This is a title</h2><p>Some text</p></section>", language = "nb") | ||
|
||
val migration = new V54__RemoveStrongFromTitle | ||
val result = migration.convertContent(oldContent) | ||
result should be(expectedContent) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
draft-api/src/main/scala/no/ndla/draftapi/db/migration/V65__RemoveStrongFromTitle.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* Part of NDLA draft-api | ||
* Copyright (C) 2024 NDLA | ||
* | ||
* See LICENSE | ||
*/ | ||
|
||
package no.ndla.draftapi.db.migration | ||
|
||
import io.circe.parser | ||
import io.circe.syntax.EncoderOps | ||
import no.ndla.common.model.domain.{ArticleContent, Title} | ||
import no.ndla.common.model.domain.draft.Draft | ||
import org.flywaydb.core.api.migration.{BaseJavaMigration, Context} | ||
import org.jsoup.Jsoup | ||
import org.jsoup.nodes.Element | ||
import org.jsoup.nodes.Entities.EscapeMode | ||
import org.postgresql.util.PGobject | ||
import scalikejdbc.{DB, DBSession, *} | ||
|
||
class V65__RemoveStrongFromTitle extends BaseJavaMigration { | ||
private def countAllRows(implicit session: DBSession): Option[Long] = { | ||
sql"select count(*) from articledata where document is not NULL" | ||
.map(rs => rs.long("count")) | ||
.single() | ||
} | ||
|
||
private def allRows(offset: Long)(implicit session: DBSession): Seq[(Long, String)] = { | ||
sql"select id, document, article_id from articledata where document is not null order by id limit 1000 offset $offset" | ||
.map(rs => { | ||
(rs.long("id"), rs.string("document")) | ||
}) | ||
.list() | ||
} | ||
|
||
private def updateRow(document: String, id: Long)(implicit session: DBSession): Int = { | ||
val dataObject = new PGobject() | ||
dataObject.setType("jsonb") | ||
dataObject.setValue(document) | ||
|
||
sql"update articledata set document = $dataObject where id = $id" | ||
.update() | ||
} | ||
|
||
override def migrate(context: Context): Unit = DB(context.getConnection) | ||
.autoClose(false) | ||
.withinTx { session => migrateRows(session) } | ||
|
||
private def migrateRows(implicit session: DBSession): Unit = { | ||
val count = countAllRows.get | ||
var numPagesLeft = (count / 1000) + 1 | ||
var offset = 0L | ||
|
||
while (numPagesLeft > 0) { | ||
allRows(offset * 1000).map { case (id, document) => | ||
updateRow(convertArticleUpdate(document), id) | ||
}: Unit | ||
numPagesLeft -= 1 | ||
offset += 1 | ||
} | ||
} | ||
|
||
private def stringToJsoupDocument(htmlString: String): Element = { | ||
val document = Jsoup.parseBodyFragment(htmlString) | ||
document.outputSettings().escapeMode(EscapeMode.xhtml).prettyPrint(false) | ||
document.select("body").first() | ||
} | ||
|
||
private def jsoupDocumentToString(element: Element): String = { | ||
element.select("body").html() | ||
} | ||
|
||
def convertTitle(t: Title): Title = { | ||
val doc = stringToJsoupDocument(t.title) | ||
|
||
doc | ||
.select("strong") | ||
.forEach(strong => { | ||
strong.unwrap(): Unit | ||
}) | ||
t.copy(title = jsoupDocumentToString(doc)) | ||
} | ||
|
||
def convertContent(c: ArticleContent): ArticleContent = { | ||
val doc = stringToJsoupDocument(c.content) | ||
|
||
doc | ||
.select("h2, h3, h4") | ||
.forEach(header => { | ||
header.select("strong").forEach(strong => strong.unwrap(): Unit) | ||
}) | ||
c.copy(content = jsoupDocumentToString(doc)) | ||
} | ||
|
||
private[migration] def convertArticleUpdate(document: String): String = { | ||
val oldArticle = parser.parse(document).flatMap(_.as[Draft]).toTry.get | ||
|
||
val titles = oldArticle.title.map(t => convertTitle(t)) | ||
val contents = oldArticle.content.map(c => convertContent(c)) | ||
|
||
val newArticle = oldArticle.copy(title = titles, content = contents) | ||
newArticle.asJson.noSpaces | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
draft-api/src/test/scala/no/ndla/draftapi/db/migration/V65__RemoveStrongFromTitleTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* | ||
* Part of NDLA draft-api | ||
* Copyright (C) 2024 NDLA | ||
* | ||
* See LICENSE | ||
*/ | ||
|
||
package no.ndla.draftapi.db.migration | ||
|
||
import no.ndla.common.model.domain.{ArticleContent, Title} | ||
import no.ndla.draftapi.{TestEnvironment, UnitSuite} | ||
|
||
class V65__RemoveStrongFromTitleTest extends UnitSuite with TestEnvironment { | ||
test("That strong are removed from title") { | ||
val oldTitle = Title("This is a <strong>title</strong>", language = "nb") | ||
val expectedTitle = Title("This is a title", language = "nb") | ||
|
||
val migration = new V65__RemoveStrongFromTitle | ||
val result = migration.convertTitle(oldTitle) | ||
result should be(expectedTitle) | ||
} | ||
|
||
test("That nested strong are removed from title") { | ||
val oldTitle = Title("This is a <strong><em>title</em></strong>", language = "nb") | ||
val expectedTitle = Title("This is a <em>title</em>", language = "nb") | ||
|
||
val migration = new V65__RemoveStrongFromTitle | ||
val result = migration.convertTitle(oldTitle) | ||
result should be(expectedTitle) | ||
} | ||
|
||
test("That strong are removed from title in article") { | ||
val oldContent = | ||
ArticleContent("<section><h2>This is a <strong>title</strong></h2><p>Some text</p></section>", language = "nb") | ||
val expectedContent = ArticleContent("<section><h2>This is a title</h2><p>Some text</p></section>", language = "nb") | ||
|
||
val migration = new V65__RemoveStrongFromTitle | ||
val result = migration.convertContent(oldContent) | ||
result should be(expectedContent) | ||
} | ||
} |