From c2ea2830a6555ec795d638ce3ae6b4dde455faf3 Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Thu, 5 Dec 2024 12:35:28 +0100 Subject: [PATCH 1/9] Refactor bulk reindexing Move a bunch of code to shared base classes --- .../model/domain/ReindexResult.scala | 10 -- .../service/search/IndexService.scala | 44 ++------- .../no/ndla/audioapi/model/api/Error.scala | 1 - .../model/domain/AudioMetaInformation.scala | 2 - .../service/search/IndexService.scala | 82 ++++------------ .../conceptapi/model/api/NDLAErrors.scala | 1 - .../model/domain/ReindexResult.scala | 10 -- .../service/search/IndexService.scala | 44 ++------- .../controller/InternController.scala | 3 +- .../draftapi/model/domain/ReindexResult.scala | 10 -- .../service/search/IndexService.scala | 73 +++----------- .../draftapi/service/WriteServiceTest.scala | 2 +- .../no/ndla/imageapi/model/NDLAErrors.scala | 1 - .../model/domain/ImageApiModels.scala | 2 - .../service/search/IndexService.scala | 71 +++----------- .../model/api/ErrorHandling.scala | 8 +- .../model/domain/NDLAErrors.scala | 1 - .../model/domain/ReindexResult.scala | 10 -- .../service/search/SearchIndexService.scala | 48 ++-------- .../no/ndla/searchapi/model/api/Error.scala | 1 - .../model/domain/ReindexResult.scala | 10 -- .../service/StandaloneIndexing.scala | 3 +- .../service/search/IndexService.scala | 51 +++------- .../service/search/MultiSearchService.scala | 18 +--- .../search/SearchConverterService.scala | 4 +- .../service/search/SearchService.scala | 11 ++- .../no/ndla/search/BaseIndexService.scala | 95 +++++++++++++++++-- 27 files changed, 195 insertions(+), 421 deletions(-) delete mode 100644 article-api/src/main/scala/no/ndla/articleapi/model/domain/ReindexResult.scala delete mode 100644 concept-api/src/main/scala/no/ndla/conceptapi/model/domain/ReindexResult.scala delete mode 100644 draft-api/src/main/scala/no/ndla/draftapi/model/domain/ReindexResult.scala delete mode 100644 learningpath-api/src/main/scala/no/ndla/learningpathapi/model/domain/ReindexResult.scala delete mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/domain/ReindexResult.scala diff --git a/article-api/src/main/scala/no/ndla/articleapi/model/domain/ReindexResult.scala b/article-api/src/main/scala/no/ndla/articleapi/model/domain/ReindexResult.scala deleted file mode 100644 index 17138de1c..000000000 --- a/article-api/src/main/scala/no/ndla/articleapi/model/domain/ReindexResult.scala +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Part of NDLA article-api - * Copyright (C) 2017 NDLA - * - * See LICENSE - */ - -package no.ndla.articleapi.model.domain - -case class ReindexResult(totalIndexed: Int, millisUsed: Long) diff --git a/article-api/src/main/scala/no/ndla/articleapi/service/search/IndexService.scala b/article-api/src/main/scala/no/ndla/articleapi/service/search/IndexService.scala index 19d81f1dd..0dcebf24c 100644 --- a/article-api/src/main/scala/no/ndla/articleapi/service/search/IndexService.scala +++ b/article-api/src/main/scala/no/ndla/articleapi/service/search/IndexService.scala @@ -15,10 +15,10 @@ import com.sksamuel.elastic4s.requests.indexes.IndexRequest import com.sksamuel.elastic4s.requests.mappings.dynamictemplate.DynamicTemplateRequest import com.typesafe.scalalogging.StrictLogging import no.ndla.articleapi.Props -import no.ndla.articleapi.model.domain.ReindexResult import no.ndla.articleapi.repository.ArticleRepository import no.ndla.common.model.domain.article.Article import no.ndla.search.SearchLanguage.languageAnalyzers +import no.ndla.search.model.domain.{BulkIndexResult, ReindexResult} import no.ndla.search.{BaseIndexService, Elastic4sClient, SearchLanguage} import scala.collection.mutable.ListBuffer @@ -42,25 +42,12 @@ trait IndexService { } def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = synchronized { - val start = System.currentTimeMillis() - createIndexWithGeneratedName(numShards).flatMap(indexName => { - val operations = for { - numIndexed <- sendToElastic(indexName) - aliasTarget <- getAliasTarget - _ <- updateAliasTarget(aliasTarget, indexName) - } yield numIndexed - - operations match { - case Failure(f) => - deleteIndexWithName(Some(indexName)): Unit - Failure(f) - case Success(totalIndexed) => - Success(ReindexResult(totalIndexed, System.currentTimeMillis() - start)) - } - }) + indexDocumentsInBulk(numShards) { + sendToElastic + } } - def sendToElastic(indexName: String): Try[Int] = { + def sendToElastic(indexName: String): Try[BulkIndexResult] = { getRanges .flatMap(ranges => { ranges.traverse { case (start, end) => @@ -68,7 +55,7 @@ trait IndexService { indexDocuments(toIndex, indexName) } }) - .map(_.sum) + .map(countBulkIndexed) } def getRanges: Try[List[(Long, Long)]] = { @@ -82,9 +69,9 @@ trait IndexService { } } - def indexDocuments(contents: Seq[Article], indexName: String): Try[Int] = { + def indexDocuments(contents: Seq[Article], indexName: String): Try[BulkIndexResult] = { if (contents.isEmpty) { - Success(0) + Success(BulkIndexResult.empty) } else { val response = e4sClient.execute { bulk(contents.map(content => { @@ -95,25 +82,12 @@ trait IndexService { response match { case Success(r) => logger.info(s"Indexed ${contents.size} documents. No of failed items: ${r.result.failures.size}") - Success(contents.size) + Success(BulkIndexResult(r.result.successes.size, contents.size)) case Failure(ex) => Failure(ex) } } } - def findAllIndexes(indexName: String): Try[Seq[String]] = { - val response = e4sClient.execute { - getAliases() - } - - response match { - case Success(results) => - Success(results.result.mappings.toList.map { case (index, _) => index.name }.filter(_.startsWith(indexName))) - case Failure(ex) => - Failure(ex) - } - } - /** Returns Sequence of FieldDefinitions for a given field. * * @param fieldName diff --git a/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala b/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala index bfc3d8c7c..58c1de5a1 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala @@ -65,4 +65,3 @@ case class CouldNotFindLanguageException(message: String) extends Run class AudioStorageException(message: String) extends RuntimeException(message) class LanguageMappingException(message: String) extends RuntimeException(message) class ImportException(message: String) extends RuntimeException(message) -case class ElasticIndexingException(message: String) extends RuntimeException(message) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/model/domain/AudioMetaInformation.scala b/audio-api/src/main/scala/no/ndla/audioapi/model/domain/AudioMetaInformation.scala index 7263f9e6d..32a4744a8 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/model/domain/AudioMetaInformation.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/model/domain/AudioMetaInformation.scala @@ -91,5 +91,3 @@ object AudioMetaInformation extends SQLSyntaxSupport[AudioMetaInformation] { rs.longOpt(au.c("id")).map(_ => fromResultSet(au)(rs)) } } - -case class ReindexResult(totalIndexed: Int, millisUsed: Long) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/search/IndexService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/search/IndexService.scala index 0512ef84a..d058506c7 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/search/IndexService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/search/IndexService.scala @@ -8,24 +8,24 @@ package no.ndla.audioapi.service.search -import cats.implicits._ -import com.sksamuel.elastic4s.ElasticDsl._ +import cats.implicits.* +import com.sksamuel.elastic4s.ElasticDsl.* import com.sksamuel.elastic4s.fields.ElasticField import com.sksamuel.elastic4s.requests.indexes.IndexRequest import com.sksamuel.elastic4s.requests.mappings.MappingDefinition import com.sksamuel.elastic4s.requests.mappings.dynamictemplate.DynamicTemplateRequest import com.typesafe.scalalogging.StrictLogging import no.ndla.audioapi.Props -import no.ndla.audioapi.model.domain.ReindexResult import no.ndla.audioapi.repository.{AudioRepository, Repository} import no.ndla.search.SearchLanguage.languageAnalyzers +import no.ndla.search.model.domain.{BulkIndexResult, ReindexResult} import no.ndla.search.{BaseIndexService, Elastic4sClient, SearchLanguage} import scala.collection.mutable.ListBuffer import scala.util.{Failure, Success, Try} trait IndexService { - this: Elastic4sClient with BaseIndexService with SearchConverterService with AudioRepository with Props => + this: Elastic4sClient & BaseIndexService & SearchConverterService & AudioRepository & Props => trait IndexService[D, T] extends BaseIndexService with StrictLogging { override val MaxResultWindowOption: Int = props.ElasticSearchIndexMaxResultWindow @@ -45,33 +45,21 @@ trait IndexService { } yield imported } - def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = { - synchronized { - val start = System.currentTimeMillis() - createIndexWithGeneratedName(numShards).flatMap(indexName => { - val operations = for { - numIndexed <- sendToElastic(indexName) - aliasTarget <- getAliasTarget - _ <- updateAliasTarget(aliasTarget, indexName) - } yield numIndexed - - operations match { - case Failure(f) => deleteIndexWithName(Some(indexName)).flatMap(_ => Failure(f)) - case Success(totalIndexed) => Success(ReindexResult(totalIndexed, System.currentTimeMillis() - start)) - } - }) - } + def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = synchronized { + indexDocumentsInBulk(numShards)(sendToElastic) } - def sendToElastic(indexName: String): Try[Int] = { + def sendToElastic(indexName: String): Try[BulkIndexResult] = { getRanges .flatMap(ranges => { - ranges.traverse { case (start, end) => - val documentsToIndex = repository.documentsWithIdBetween(start, end) - documentsToIndex.flatMap(indexDocuments(_, indexName)) - } + ranges + .traverse { case (start, end) => + repository + .documentsWithIdBetween(start, end) + .flatMap(toIndex => indexDocuments(toIndex, indexName)) + } + .map(countBulkIndexed) }) - .map(_.sum) } def getRanges: Try[List[(Long, Long)]] = { @@ -88,16 +76,18 @@ trait IndexService { } - def indexDocuments(contents: Seq[D], indexName: String): Try[Int] = { + def indexDocuments(contents: Seq[D], indexName: String): Try[BulkIndexResult] = { if (contents.isEmpty) { - Success(0) + Success(BulkIndexResult.empty) } else { val requests = contents.traverse(content => createIndexRequests(content, indexName)) requests.flatMap(rs => { executeRequests(rs.flatten) match { - case Success((numSuccessful, numFailures)) => - logger.info(s"Indexed $numSuccessful documents ($searchIndex). No of failed items: $numFailures") - Success(contents.size) + case Success(result) => + logger.info( + s"Indexed ${result.successful} documents ($searchIndex). No of failed items: ${result.failed}" + ) + Success(result) case Failure(ex) => Failure(ex) } }) @@ -105,36 +95,6 @@ trait IndexService { } } - def findAllIndexes(indexName: String): Try[Seq[String]] = { - val response = e4sClient.execute { - getAliases() - } - - response match { - case Success(results) => - Success(results.result.mappings.toList.map { case (index, _) => index.name }.filter(_.startsWith(indexName))) - case Failure(ex) => - Failure(ex) - } - } - - /** Executes elasticsearch requests in bulk. Returns success (without executing anything) if supplied with an empty - * list. - * - * @param requests - * a list of elasticsearch [[IndexRequest]]'s - * @return - * A Try suggesting if the request was successful or not with a tuple containing number of successful requests - * and number of failed requests (in that order) - */ - private def executeRequests(requests: Seq[IndexRequest]): Try[(Int, Int)] = { - requests match { - case Nil => Success((0, 0)) - case head :: Nil => e4sClient.execute(head).map(r => if (r.isSuccess) (1, 0) else (0, 1)) - case reqs => e4sClient.execute(bulk(reqs)).map(r => (r.result.successes.size, r.result.failures.size)) - } - } - /** @deprecated * Returns Sequence of FieldDefinitions for a given field. * diff --git a/concept-api/src/main/scala/no/ndla/conceptapi/model/api/NDLAErrors.scala b/concept-api/src/main/scala/no/ndla/conceptapi/model/api/NDLAErrors.scala index 331bad865..387dd0e4f 100644 --- a/concept-api/src/main/scala/no/ndla/conceptapi/model/api/NDLAErrors.scala +++ b/concept-api/src/main/scala/no/ndla/conceptapi/model/api/NDLAErrors.scala @@ -66,5 +66,4 @@ case class NotFoundException(message: String, supportedLanguages: Seq[String] = extends RuntimeException(message) case class ConceptMissingIdException(message: String) extends RuntimeException(message) case class ConceptExistsAlreadyException(message: String) extends RuntimeException(message) -case class ElasticIndexingException(message: String) extends RuntimeException(message) case class OperationNotAllowedException(message: String) extends RuntimeException(message) diff --git a/concept-api/src/main/scala/no/ndla/conceptapi/model/domain/ReindexResult.scala b/concept-api/src/main/scala/no/ndla/conceptapi/model/domain/ReindexResult.scala deleted file mode 100644 index 4d797cedd..000000000 --- a/concept-api/src/main/scala/no/ndla/conceptapi/model/domain/ReindexResult.scala +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Part of NDLA concept-api - * Copyright (C) 2019 NDLA - * - * See LICENSE - */ - -package no.ndla.conceptapi.model.domain - -case class ReindexResult(totalIndexed: Int, millisUsed: Long) diff --git a/concept-api/src/main/scala/no/ndla/conceptapi/service/search/IndexService.scala b/concept-api/src/main/scala/no/ndla/conceptapi/service/search/IndexService.scala index d92312f3a..61d207b86 100644 --- a/concept-api/src/main/scala/no/ndla/conceptapi/service/search/IndexService.scala +++ b/concept-api/src/main/scala/no/ndla/conceptapi/service/search/IndexService.scala @@ -20,10 +20,10 @@ import no.ndla.common.model.domain.concept.Concept import no.ndla.conceptapi.Props import no.ndla.conceptapi.integration.TaxonomyApiClient import no.ndla.conceptapi.integration.model.TaxonomyData -import no.ndla.conceptapi.model.api.{ConceptMissingIdException, ElasticIndexingException} -import no.ndla.conceptapi.model.domain.ReindexResult +import no.ndla.conceptapi.model.api.ConceptMissingIdException import no.ndla.conceptapi.repository.Repository import no.ndla.search.SearchLanguage.{NynorskLanguageAnalyzer, languageAnalyzers} +import no.ndla.search.model.domain.{BulkIndexResult, ElasticIndexingException, ReindexResult} import no.ndla.search.{BaseIndexService, Elastic4sClient, SearchLanguage} import scala.util.{Failure, Success, Try} @@ -70,36 +70,19 @@ trait IndexService { } yield imported } - def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = { - synchronized { - val start = System.currentTimeMillis() - createIndexWithGeneratedName(numShards).flatMap(indexName => { - val operations = for { - numIndexed <- sendToElastic(indexName) - aliasTarget <- getAliasTarget - _ <- updateAliasTarget(aliasTarget, indexName) - } yield numIndexed - - operations match { - case Failure(f) => - deleteIndexWithName(Some(indexName)): Unit - Failure(f) - case Success(totalIndexed) => - Success(ReindexResult(totalIndexed, System.currentTimeMillis() - start)) - } - }) - } + def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = synchronized { + indexDocumentsInBulk(numShards)(sendToElastic) } - private def sendToElastic(indexName: String): Try[Int] = { + private def sendToElastic(indexName: String): Try[BulkIndexResult] = { for { taxonomyData <- taxonomyApiClient.getSubjects ranges <- getRanges indexed <- ranges.traverse { case (start, end) => val toIndex = repository.documentsWithIdBetween(start, end) - indexDocuments(toIndex, indexName, taxonomyData) + indexDocuments(toIndex, indexName, taxonomyData).map(numIndexed => (numIndexed, toIndex.size)) } - } yield indexed.sum + } yield countIndexed(indexed) } private def getRanges: Try[List[(Long, Long)]] = { @@ -142,19 +125,6 @@ trait IndexService { def findAllIndexes: Try[Seq[String]] = findAllIndexes(this.searchIndex) - private def findAllIndexes(indexName: String): Try[Seq[String]] = { - val response = e4sClient.execute { - getAliases() - } - - response match { - case Success(results) => - Success(results.result.mappings.toList.map { case (index, _) => index.name }.filter(_.startsWith(indexName))) - case Failure(ex) => - Failure(ex) - } - } - /** Returns Sequence of DynamicTemplateRequest for a given field. * * @param fieldName diff --git a/draft-api/src/main/scala/no/ndla/draftapi/controller/InternController.scala b/draft-api/src/main/scala/no/ndla/draftapi/controller/InternController.scala index ad04435fc..dea66ac07 100644 --- a/draft-api/src/main/scala/no/ndla/draftapi/controller/InternController.scala +++ b/draft-api/src/main/scala/no/ndla/draftapi/controller/InternController.scala @@ -11,7 +11,7 @@ import no.ndla.common.model.domain.draft.{Draft, DraftStatus} import no.ndla.draftapi.Props import no.ndla.draftapi.integration.ArticleApiClient import no.ndla.draftapi.model.api.{ArticleDomainDump, ArticleDump, ContentId, NotFoundException} -import no.ndla.draftapi.model.domain.{ArticleIds, ImportId, ReindexResult} +import no.ndla.draftapi.model.domain.{ArticleIds, ImportId} import no.ndla.draftapi.repository.DraftRepository import no.ndla.draftapi.service.* import no.ndla.draftapi.service.search.* @@ -28,6 +28,7 @@ import scalikejdbc.ReadOnlyAutoSession import sttp.model.StatusCode import sttp.tapir.server.ServerEndpoint import io.circe.generic.auto.* +import no.ndla.search.model.domain.ReindexResult import sttp.tapir.generic.auto.* import java.util.concurrent.{Executors, TimeUnit} diff --git a/draft-api/src/main/scala/no/ndla/draftapi/model/domain/ReindexResult.scala b/draft-api/src/main/scala/no/ndla/draftapi/model/domain/ReindexResult.scala deleted file mode 100644 index 9935b3a72..000000000 --- a/draft-api/src/main/scala/no/ndla/draftapi/model/domain/ReindexResult.scala +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Part of NDLA draft-api - * Copyright (C) 2017 NDLA - * - * See LICENSE - */ - -package no.ndla.draftapi.model.domain - -case class ReindexResult(totalIndexed: Int, millisUsed: Long) diff --git a/draft-api/src/main/scala/no/ndla/draftapi/service/search/IndexService.scala b/draft-api/src/main/scala/no/ndla/draftapi/service/search/IndexService.scala index 5936f35d9..fb680036f 100644 --- a/draft-api/src/main/scala/no/ndla/draftapi/service/search/IndexService.scala +++ b/draft-api/src/main/scala/no/ndla/draftapi/service/search/IndexService.scala @@ -7,20 +7,20 @@ package no.ndla.draftapi.service.search -import com.sksamuel.elastic4s.ElasticDsl._ +import com.sksamuel.elastic4s.ElasticDsl.* import com.sksamuel.elastic4s.fields.ElasticField import com.sksamuel.elastic4s.requests.indexes.IndexRequest import com.sksamuel.elastic4s.requests.mappings.dynamictemplate.DynamicTemplateRequest import com.typesafe.scalalogging.StrictLogging import no.ndla.draftapi.Props -import no.ndla.draftapi.model.domain.ReindexResult import no.ndla.draftapi.repository.Repository import no.ndla.search.SearchLanguage.languageAnalyzers import no.ndla.search.{BaseIndexService, Elastic4sClient, SearchLanguage} import scala.collection.mutable.ListBuffer import scala.util.{Failure, Success, Try} -import cats.implicits._ +import cats.implicits.* +import no.ndla.search.model.domain.{BulkIndexResult, ReindexResult} import scala.concurrent.{ExecutionContext, Future} @@ -54,30 +54,11 @@ trait IndexService { } yield imported } - def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = { - synchronized { - val start = System.currentTimeMillis() - createIndexWithGeneratedName(numShards).flatMap(indexName => { - val operations = for { - numIndexed <- sendToElastic(indexName) - aliasTarget <- getAliasTarget - _ <- updateAliasTarget(aliasTarget, indexName) - } yield numIndexed - - operations match { - case Failure(f) => { - deleteIndexWithName(Some(indexName)): Unit - Failure(f) - } - case Success(totalIndexed) => { - Success(ReindexResult(totalIndexed, System.currentTimeMillis() - start)) - } - } - }) - } + def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = synchronized { + indexDocumentsInBulk(numShards)(sendToElastic) } - def sendToElastic(indexName: String): Try[Int] = { + def sendToElastic(indexName: String): Try[BulkIndexResult] = { getRanges .flatMap(ranges => { ranges.traverse { case (start, end) => @@ -85,7 +66,7 @@ trait IndexService { indexDocuments(toIndex, indexName) } }) - .map(_.sum) + .map(countBulkIndexed) } def getRanges: Try[List[(Long, Long)]] = { @@ -99,53 +80,23 @@ trait IndexService { } } - def indexDocuments(contents: Seq[D], indexName: String): Try[Int] = { + def indexDocuments(contents: Seq[D], indexName: String): Try[BulkIndexResult] = { if (contents.isEmpty) { - Success(0) + Success(BulkIndexResult.empty) } else { val requests = contents.flatMap(content => { createIndexRequests(content, indexName) }) executeRequests(requests) match { - case Success((numSuccessful, numFailures)) => - logger.info(s"Indexed $numSuccessful documents ($searchIndex). No of failed items: $numFailures") - Success(contents.size) + case Success(result) => + logger.info(s"Indexed ${result.successful} documents ($searchIndex). No of failed items: ${result.failed}") + Success(result) case Failure(ex) => Failure(ex) } } } - def findAllIndexes(indexName: String): Try[Seq[String]] = { - val response = e4sClient.execute { - getAliases() - } - - response match { - case Success(results) => - Success(results.result.mappings.toList.map { case (index, _) => index.name }.filter(_.startsWith(indexName))) - case Failure(ex) => - Failure(ex) - } - } - - /** Executes elasticsearch requests in bulk. Returns success (without executing anything) if supplied with an empty - * list. - * - * @param requests - * a list of elasticsearch [[IndexRequest]]'s - * @return - * A Try suggesting if the request was successful or not with a tuple containing number of successful requests - * and number of failed requests (in that order) - */ - private def executeRequests(requests: Seq[IndexRequest]): Try[(Int, Int)] = { - requests match { - case Nil => Success((0, 0)) - case head :: Nil => e4sClient.execute(head).map(r => if (r.isSuccess) (1, 0) else (0, 1)) - case reqs => e4sClient.execute(bulk(reqs)).map(r => (r.result.successes.size, r.result.failures.size)) - } - } - /** Returns Sequence of FieldDefinitions for a given field. * * @param fieldName diff --git a/draft-api/src/test/scala/no/ndla/draftapi/service/WriteServiceTest.scala b/draft-api/src/test/scala/no/ndla/draftapi/service/WriteServiceTest.scala index d90bb1448..f740d28b3 100644 --- a/draft-api/src/test/scala/no/ndla/draftapi/service/WriteServiceTest.scala +++ b/draft-api/src/test/scala/no/ndla/draftapi/service/WriteServiceTest.scala @@ -73,7 +73,7 @@ class WriteServiceTest extends UnitSuite with TestEnvironment { when(tagIndexService.indexDocument(any[Draft])).thenAnswer((invocation: InvocationOnMock) => Try(invocation.getArgument[Draft](0)) ) - when(grepCodesIndexService.indexDocument(any[Draft])).thenAnswer((invocation: InvocationOnMock) => + when(grepCodesIndexService.indexDocument(any)).thenAnswer((invocation: InvocationOnMock) => Try(invocation.getArgument[Draft](0)) ) when(readService.addUrlsOnEmbedResources(any[Draft])).thenAnswer((invocation: InvocationOnMock) => diff --git a/image-api/src/main/scala/no/ndla/imageapi/model/NDLAErrors.scala b/image-api/src/main/scala/no/ndla/imageapi/model/NDLAErrors.scala index 9aec325e7..e2570f673 100644 --- a/image-api/src/main/scala/no/ndla/imageapi/model/NDLAErrors.scala +++ b/image-api/src/main/scala/no/ndla/imageapi/model/NDLAErrors.scala @@ -15,7 +15,6 @@ class ImportException(message: String) extends RuntimeException(message) case class InvalidUrlException(message: String) extends RuntimeException(message) class ResultWindowTooLargeException(message: String) extends RuntimeException(message) -case class ElasticIndexingException(message: String) extends RuntimeException(message) class ImageStorageException(message: String) extends RuntimeException(message) case class ImageConversionException(message: String) extends RuntimeException(message) diff --git a/image-api/src/main/scala/no/ndla/imageapi/model/domain/ImageApiModels.scala b/image-api/src/main/scala/no/ndla/imageapi/model/domain/ImageApiModels.scala index d8e00ea9a..6c46fea8e 100644 --- a/image-api/src/main/scala/no/ndla/imageapi/model/domain/ImageApiModels.scala +++ b/image-api/src/main/scala/no/ndla/imageapi/model/domain/ImageApiModels.scala @@ -103,5 +103,3 @@ object ModelReleasedStatus extends Enumeration { implicit val encoder: Encoder[ModelReleasedStatus.Value] = Encoder.encodeEnumeration(ModelReleasedStatus) implicit val decoder: Decoder[ModelReleasedStatus.Value] = Decoder.decodeEnumeration(ModelReleasedStatus) } - -case class ReindexResult(totalIndexed: Int, millisUsed: Long) diff --git a/image-api/src/main/scala/no/ndla/imageapi/service/search/IndexService.scala b/image-api/src/main/scala/no/ndla/imageapi/service/search/IndexService.scala index 352696caf..15b96c9cd 100644 --- a/image-api/src/main/scala/no/ndla/imageapi/service/search/IndexService.scala +++ b/image-api/src/main/scala/no/ndla/imageapi/service/search/IndexService.scala @@ -7,16 +7,16 @@ package no.ndla.imageapi.service.search -import cats.implicits._ -import com.sksamuel.elastic4s.ElasticDsl._ +import cats.implicits.* +import com.sksamuel.elastic4s.ElasticDsl.* import com.sksamuel.elastic4s.fields.ElasticField import com.sksamuel.elastic4s.requests.indexes.IndexRequest import com.sksamuel.elastic4s.requests.mappings.dynamictemplate.DynamicTemplateRequest import com.typesafe.scalalogging.StrictLogging import no.ndla.imageapi.Props -import no.ndla.imageapi.model.domain.ReindexResult import no.ndla.imageapi.repository.{ImageRepository, Repository} import no.ndla.search.SearchLanguage.languageAnalyzers +import no.ndla.search.model.domain.{BulkIndexResult, ReindexResult} import no.ndla.search.{BaseIndexService, Elastic4sClient, SearchLanguage} import scala.collection.mutable.ListBuffer @@ -39,28 +39,11 @@ trait IndexService { } yield imported } - def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = { - synchronized { - val start = System.currentTimeMillis() - createIndexWithGeneratedName(numShards).flatMap(indexName => { - val operations = for { - numIndexed <- sendToElastic(indexName) - aliasTarget <- getAliasTarget - _ <- updateAliasTarget(aliasTarget, indexName) - } yield numIndexed - - operations match { - case Failure(f) => - deleteIndexWithName(Some(indexName)): Unit - Failure(f) - case Success(totalIndexed) => - Success(ReindexResult(totalIndexed, System.currentTimeMillis() - start)) - } - }) - } + def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = synchronized { + indexDocumentsInBulk(numShards)(sendToElastic) } - def sendToElastic(indexName: String): Try[Int] = { + def sendToElastic(indexName: String): Try[BulkIndexResult] = { getRanges .flatMap(ranges => { ranges.traverse { case (start, end) => @@ -68,7 +51,7 @@ trait IndexService { indexDocuments(toIndex, indexName) } }) - .map(_.sum) + .map(countBulkIndexed) } def getRanges: Try[List[(Long, Long)]] = { @@ -82,53 +65,23 @@ trait IndexService { } } - def indexDocuments(contents: Seq[D], indexName: String): Try[Int] = { + def indexDocuments(contents: Seq[D], indexName: String): Try[BulkIndexResult] = { if (contents.isEmpty) { - Success(0) + Success(BulkIndexResult.empty) } else { val requests = contents.flatMap(content => { createIndexRequests(content, indexName) }) executeRequests(requests) match { - case Success((numSuccessful, numFailures)) => - logger.info(s"Indexed $numSuccessful documents ($searchIndex). No of failed items: $numFailures") - Success(contents.size) + case Success(result) => + logger.info(s"Indexed ${result.successful} documents ($searchIndex). No of failed items: ${result.failed}") + Success(result) case Failure(ex) => Failure(ex) } } } - def findAllIndexes(indexName: String): Try[Seq[String]] = { - val response = e4sClient.execute { - getAliases() - } - - response match { - case Success(results) => - Success(results.result.mappings.toList.map { case (index, _) => index.name }.filter(_.startsWith(indexName))) - case Failure(ex) => - Failure(ex) - } - } - - /** Executes elasticsearch requests in bulk. Returns success (without executing anything) if supplied with an empty - * list. - * - * @param requests - * a list of elasticsearch [[IndexRequest]]'s - * @return - * A Try suggesting if the request was successful or not with a tuple containing number of successful requests - * and number of failed requests (in that order) - */ - private def executeRequests(requests: Seq[IndexRequest]): Try[(Int, Int)] = { - requests match { - case Nil => Success((0, 0)) - case head :: Nil => e4sClient.execute(head).map(r => if (r.isSuccess) (1, 0) else (0, 1)) - case reqs => e4sClient.execute(bulk(reqs)).map(r => (r.result.successes.size, r.result.failures.size)) - } - } - /** Returns Sequence of FieldDefinitions for a given field. * * @param fieldName diff --git a/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/api/ErrorHandling.scala b/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/api/ErrorHandling.scala index a9f1c9c88..a1a8513bd 100644 --- a/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/api/ErrorHandling.scala +++ b/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/api/ErrorHandling.scala @@ -12,14 +12,10 @@ import no.ndla.common.Clock import no.ndla.common.errors.{AccessDeniedException, NotFoundException, ValidationException} import no.ndla.database.DataSource import no.ndla.learningpathapi.Props -import no.ndla.learningpathapi.model.domain.{ - ElasticIndexingException, - ImportException, - InvalidLpStatusException, - OptimisticLockException -} +import no.ndla.learningpathapi.model.domain.{ImportException, InvalidLpStatusException, OptimisticLockException} import no.ndla.network.model.HttpRequestException import no.ndla.network.tapir.{AllErrors, TapirErrorHandling} +import no.ndla.search.model.domain.ElasticIndexingException import no.ndla.search.{IndexNotFoundException, NdlaSearchException} import org.postgresql.util.PSQLException diff --git a/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/domain/NDLAErrors.scala b/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/domain/NDLAErrors.scala index 26a50b234..8f8f78119 100644 --- a/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/domain/NDLAErrors.scala +++ b/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/domain/NDLAErrors.scala @@ -10,7 +10,6 @@ package no.ndla.learningpathapi.model.domain class OptimisticLockException(message: String) extends RuntimeException(message) class ImportException(message: String) extends RuntimeException(message) -case class ElasticIndexingException(message: String) extends RuntimeException(message) case class SearchException(message: String) extends RuntimeException(message) case class TaxonomyUpdateException(message: String) extends RuntimeException(message) case class InvalidOembedResponse(message: String) extends RuntimeException(message) diff --git a/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/domain/ReindexResult.scala b/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/domain/ReindexResult.scala deleted file mode 100644 index 626302189..000000000 --- a/learningpath-api/src/main/scala/no/ndla/learningpathapi/model/domain/ReindexResult.scala +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Part of NDLA learningpath-api - * Copyright (C) 2017 NDLA - * - * See LICENSE - */ - -package no.ndla.learningpathapi.model.domain - -case class ReindexResult(totalIndexed: Int, millisUsed: Long) diff --git a/learningpath-api/src/main/scala/no/ndla/learningpathapi/service/search/SearchIndexService.scala b/learningpath-api/src/main/scala/no/ndla/learningpathapi/service/search/SearchIndexService.scala index 683a5b928..44b9b59bd 100644 --- a/learningpath-api/src/main/scala/no/ndla/learningpathapi/service/search/SearchIndexService.scala +++ b/learningpath-api/src/main/scala/no/ndla/learningpathapi/service/search/SearchIndexService.scala @@ -16,7 +16,6 @@ import com.sksamuel.elastic4s.requests.mappings.dynamictemplate.DynamicTemplateR import com.typesafe.scalalogging.StrictLogging import no.ndla.learningpathapi.Props import no.ndla.learningpathapi.integration.SearchApiClient -import no.ndla.learningpathapi.model.domain.{ElasticIndexingException, ReindexResult} import no.ndla.learningpathapi.repository.LearningPathRepositoryComponent import no.ndla.search.SearchLanguage.languageAnalyzers import no.ndla.search.{BaseIndexService, Elastic4sClient, SearchLanguage} @@ -27,6 +26,7 @@ import cats.implicits.* import no.ndla.common.CirceUtil import no.ndla.common.model.domain.learningpath.LearningPath import no.ndla.network.tapir.auth.TokenUser +import no.ndla.search.model.domain.{BulkIndexResult, ElasticIndexingException, ReindexResult} trait SearchIndexService { this: Elastic4sClient & SearchConverterServiceComponent & LearningPathRepositoryComponent & SearchApiClient & @@ -40,24 +40,8 @@ trait SearchIndexService { override val MaxResultWindowOption: Int = ElasticSearchIndexMaxResultWindow def indexDocuments: Try[ReindexResult] = indexDocuments(None) - def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = { - synchronized { - val start = System.currentTimeMillis() - createIndexWithGeneratedName(numShards).flatMap(indexName => { - val operations = for { - numIndexed <- sendToElastic(indexName) - aliasTarget <- getAliasTarget - _ <- updateAliasTarget(aliasTarget, indexName) - } yield numIndexed - - operations match { - case Failure(f) => - deleteIndexWithName(Some(indexName)): Unit - Failure(f) - case Success(totalIndexed) => Success(ReindexResult(totalIndexed, System.currentTimeMillis() - start)) - } - }) - } + def indexDocuments(numShards: Option[Int]): Try[ReindexResult] = synchronized { + indexDocumentsInBulk(numShards)(sendToElastic) } def indexDocument(learningPath: LearningPath): Try[LearningPath] = for { @@ -88,28 +72,16 @@ trait SearchIndexService { .getOrElse(Success(learningPath)) } - def findAllIndexes(indexName: String): Try[Seq[String]] = { - val response = e4sClient.execute { - getAliases() - } - - response match { - case Success(results) => - Success(results.result.mappings.toList.map { case (index, _) => index.name }.filter(_.startsWith(indexName))) - case Failure(ex) => - Failure(ex) - } - } - - private def sendToElastic(indexName: String): Try[Int] = { + private def sendToElastic(indexName: String): Try[BulkIndexResult] = { getRanges .flatMap(ranges => { - ranges.traverse { case (start, end) => - val toIndex = learningPathRepository.learningPathsWithIdBetween(start, end) - indexLearningPaths(toIndex, indexName) - } + ranges + .traverse { case (start, end) => + val toIndex = learningPathRepository.learningPathsWithIdBetween(start, end) + indexLearningPaths(toIndex, indexName).map(numIndexed => (numIndexed, toIndex.size)) + } + .map(countIndexed) }) - .map(_.sum) } private def getRanges: Try[List[(Long, Long)]] = { diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/Error.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/Error.scala index cfde85c01..c34a74200 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/Error.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/Error.scala @@ -53,6 +53,5 @@ trait ErrorHandling extends TapirErrorHandling { } class ApiSearchException(val apiName: String, message: String) extends RuntimeException(message) -case class ElasticIndexingException(message: String) extends RuntimeException(message) case class TaxonomyException(message: String) extends RuntimeException(message) case class GrepException(message: String) extends RuntimeException(message) diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/domain/ReindexResult.scala b/search-api/src/main/scala/no/ndla/searchapi/model/domain/ReindexResult.scala deleted file mode 100644 index a1afb8189..000000000 --- a/search-api/src/main/scala/no/ndla/searchapi/model/domain/ReindexResult.scala +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Part of NDLA search-api - * Copyright (C) 2018 NDLA - * - * See LICENSE - */ - -package no.ndla.searchapi.model.domain - -case class ReindexResult(name: String, failedIndexed: Int, totalIndexed: Int, millisUsed: Long) diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/StandaloneIndexing.scala b/search-api/src/main/scala/no/ndla/searchapi/service/StandaloneIndexing.scala index e11063c08..57484e114 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/StandaloneIndexing.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/StandaloneIndexing.scala @@ -12,7 +12,8 @@ import io.circe.{Decoder, Encoder} import no.ndla.common.CirceUtil import no.ndla.common.Environment.{booleanPropOrFalse, prop} import no.ndla.common.model.domain.Content -import no.ndla.searchapi.model.domain.{IndexingBundle, ReindexResult} +import no.ndla.search.model.domain.ReindexResult +import no.ndla.searchapi.model.domain.IndexingBundle import no.ndla.searchapi.model.search.SearchType import no.ndla.searchapi.{ComponentRegistry, SearchApiProperties} import sttp.client3.quick.* diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala index 2c9a1f85f..ce70b982e 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala @@ -18,11 +18,11 @@ import io.circe.Decoder import no.ndla.common.model.domain.Content import no.ndla.network.clients.MyNDLAApiClient import no.ndla.search.SearchLanguage.NynorskLanguageAnalyzer +import no.ndla.search.model.domain.{BulkIndexResult, ElasticIndexingException, ReindexResult} import no.ndla.search.{BaseIndexService, Elastic4sClient, SearchLanguage} import no.ndla.searchapi.Props import no.ndla.searchapi.integration.* -import no.ndla.searchapi.model.api.ElasticIndexingException -import no.ndla.searchapi.model.domain.{IndexingBundle, ReindexResult} +import no.ndla.searchapi.model.domain.IndexingBundle import scala.util.{Failure, Success, Try} @@ -102,41 +102,14 @@ trait IndexService { )(implicit d: Decoder[D] ): Try[ReindexResult] = { - val start = System.currentTimeMillis() - createIndexWithGeneratedName(numShards).flatMap(indexName => { - sendToElastic(indexName, indexingBundle) match { - case Failure(ex) => - deleteIndexWithName(Some(indexName)): Unit - Failure(ex) - case Success((count, totalCount)) => - val numErrors = totalCount - count - - if (numErrors > 0) { - logger.error(s"Indexing completed, but with $numErrors errors.") - deleteIndexWithName(Some(indexName)): Unit - Failure( - ElasticIndexingException( - s"Indexing $documentType completed with $numErrors errors, will not replace index." - ) - ) - } else { - val operations = getAliasTarget.flatMap(updateAliasTarget(_, indexName)) - operations.map(_ => - ReindexResult( - documentType, - numErrors, - count, - System.currentTimeMillis() - start - ) - ) - } - } - }) + indexDocumentsInBulk(numShards) { indexName => + sendToElastic(indexName, indexingBundle) + } } private def sendToElastic(indexName: String, indexingBundle: IndexingBundle)(implicit d: Decoder[D] - ): Try[(Int, Int)] = { + ): Try[BulkIndexResult] = { val chunks = apiClient.getChunks[D] val results = chunks @@ -154,13 +127,11 @@ trait IndexService { (chunkIndexed, chunkSize) } - val (count, totalCount) = successfulChunks.foldLeft((0, 0)) { - case ((totalIndexed, totalSize), (chunkIndexed, chunkSize)) => - (totalIndexed + chunkIndexed, totalSize + chunkSize) - } - - logger.info(s"$count/$totalCount documents ($documentType) were indexed successfully.") - Success((count, totalCount)) + val indexResult = countIndexed(successfulChunks) + logger.info( + s"${indexResult.count}/${indexResult.totalCount} documents ($documentType) were indexed successfully." + ) + Success(indexResult) case notEmpty => notEmpty.head } diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiSearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiSearchService.scala index 4230783cf..67a835d9d 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiSearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiSearchService.scala @@ -86,30 +86,22 @@ trait MultiSearchService { val filteredSearch = baseQuery.filter(getSearchFilters(settings)) - val (startAt, numResults) = getStartAtAndNumResults(settings.page, settings.pageSize) - val requestedResultWindow = settings.pageSize * settings.page - if (requestedResultWindow > ElasticSearchIndexMaxResultWindow) { - logger.info( - s"Max supported results are $ElasticSearchIndexMaxResultWindow, user requested $requestedResultWindow" - ) - Failure(ResultWindowTooLargeException()) - } else { - + getStartAtAndNumResults(settings.page, settings.pageSize).flatMap { pagination => val aggregations = buildTermsAggregation(settings.aggregatePaths, indexServices.map(_.getMapping)) val searchToExecute = search(searchIndex) .query(filteredSearch) .suggestions(suggestions(settings.query.underlying, searchLanguage, settings.fallback)) - .from(startAt) + .from(pagination.startAt) .trackTotalHits(true) - .size(numResults) + .size(pagination.pageSize) .highlighting(highlight("*")) .aggs(aggregations) .sortBy(getSortDefinition(settings.sort, searchLanguage)) // Only add scroll param if it is first page val searchWithScroll = - if (startAt == 0 && settings.shouldScroll) { + if (pagination.startAt == 0 && settings.shouldScroll) { searchToExecute.scroll(ElasticSearchScrollKeepAlive) } else { searchToExecute } @@ -119,7 +111,7 @@ trait MultiSearchService { SearchResult( totalCount = response.result.totalHits, page = Some(settings.page), - pageSize = numResults, + pageSize = pagination.pageSize, language = searchLanguage, results = hits, suggestions = getSuggestions(response.result), diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala index 342b30917..924c71598 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala @@ -35,7 +35,7 @@ import no.ndla.mapping.License.getLicense import no.ndla.network.clients.MyNDLAApiClient import no.ndla.search.AggregationBuilder.toApiMultiTermsAggregation import no.ndla.search.SearchConverter.getEmbedValues -import no.ndla.search.model.domain.EmbedValues +import no.ndla.search.model.domain.{ElasticIndexingException, EmbedValues} import no.ndla.search.model.{LanguageValue, SearchableLanguageList, SearchableLanguageValues} import no.ndla.search.{SearchLanguage, model} import no.ndla.searchapi.Props @@ -53,7 +53,7 @@ import org.jsoup.nodes.Entities.EscapeMode import scala.collection.mutable.ListBuffer import scala.jdk.CollectionConverters.* -import scala.util.{Success, Try} +import scala.util.{Failure, Success, Try} trait SearchConverterService { this: DraftApiClient & TaxonomyApiClient & ConverterService & Props & MyNDLAApiClient => diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchService.scala index 5980b1858..28d55cfdb 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchService.scala @@ -325,7 +325,16 @@ trait SearchService { } case t: Throwable => Failure(t) } + private val maxResultWindow = props.ElasticSearchIndexMaxResultWindow + def getStartAtAndNumResults(page: Int, pageSize: Int): Try[SearchPagination] = { + val safePageSize = max(pageSize.min(MaxPageSize), 0) + val safePage = page.max(1) + val startAt = page - 1 + val resultWindow = (startAt + 1) * safePageSize + if (resultWindow > maxResultWindow) { + logger.info(s"Max supported results are $maxResultWindow, user requested $resultWindow") + Failure(ResultWindowTooLargeException()) + } else Success(SearchPagination(safePage, safePageSize, startAt)) } - } } diff --git a/search/src/main/scala/no/ndla/search/BaseIndexService.scala b/search/src/main/scala/no/ndla/search/BaseIndexService.scala index f225bd507..ededeb98c 100644 --- a/search/src/main/scala/no/ndla/search/BaseIndexService.scala +++ b/search/src/main/scala/no/ndla/search/BaseIndexService.scala @@ -8,17 +8,18 @@ package no.ndla.search -import cats.implicits._ -import com.sksamuel.elastic4s.ElasticDsl._ +import cats.implicits.* +import com.sksamuel.elastic4s.ElasticDsl.* import com.sksamuel.elastic4s.Indexes import com.sksamuel.elastic4s.analysis.Analysis import com.sksamuel.elastic4s.requests.alias.AliasAction -import com.sksamuel.elastic4s.requests.indexes.CreateIndexRequest +import com.sksamuel.elastic4s.requests.indexes.{CreateIndexRequest, IndexRequest} import com.sksamuel.elastic4s.requests.mappings.MappingDefinition import com.typesafe.scalalogging.StrictLogging import no.ndla.common.configuration.HasBaseProps import no.ndla.common.implicits.TryQuestionMark import no.ndla.search.SearchLanguage.NynorskLanguageAnalyzer +import no.ndla.search.model.domain.{BulkIndexResult, ElasticIndexingException, ReindexResult} import java.text.SimpleDateFormat import java.util.Calendar @@ -32,6 +33,9 @@ trait BaseIndexService { val searchIndex: String val MaxResultWindowOption: Int + /** Replace index even if bulk indexing had failures */ + protected val allowIndexingErrors: Boolean = false + val analysis: Analysis = Analysis( analyzers = List(NynorskLanguageAnalyzer), @@ -69,9 +73,8 @@ trait BaseIndexService { if (indexWithNameExists(indexName).getOrElse(false)) { Success(indexName) } else { - val response = e4sClient.execute { - buildCreateIndexRequest(indexName, numShards) - } + val request = buildCreateIndexRequest(indexName, numShards) + val response = e4sClient.execute(request) response match { case Success(_) => Success(indexName) @@ -96,6 +99,50 @@ trait BaseIndexService { def createIndexWithGeneratedName(numShards: Option[Int]): Try[String] = createIndexWithName(getNewIndexName(), numShards) + protected def validateBulkIndexing(indexResult: BulkIndexResult): Try[BulkIndexResult] = { + if (indexResult.failed == 0 || allowIndexingErrors) Success(indexResult) + else { + logger.error( + s"Indexing completed for index $searchIndex ($documentType), but with ${indexResult.failed} errors." + ) + Failure( + ElasticIndexingException( + s"Indexing $documentType completed with ${indexResult.failed} errors, will not replace index." + ) + ) + } + } + + def countBulkIndexed(indexChunks: List[BulkIndexResult]): BulkIndexResult = { + indexChunks.foldLeft(BulkIndexResult(0, 0)) { case (total, chunk) => + BulkIndexResult(total.count + chunk.count, total.totalCount + chunk.totalCount) + } + } + + def countIndexed(indexChunks: List[(Int, Int)]): BulkIndexResult = { + val (count, totalCount) = indexChunks.foldLeft((0, 0)) { + case ((totalIndexed, totalSize), (chunkIndexed, chunkSize)) => + (totalIndexed + chunkIndexed, totalSize + chunkSize) + } + BulkIndexResult(count, totalCount) + } + + type SendToElastic = String => Try[BulkIndexResult] + def indexDocumentsInBulk(numShards: Option[Int])(sendToElasticFunction: SendToElastic): Try[ReindexResult] = + for { + start <- Try(System.currentTimeMillis()) + indexName <- createIndexWithGeneratedName(numShards) + indexResult <- sendToElasticFunction(indexName) + result <- validateBulkIndexing(indexResult) + aliasTarget <- getAliasTarget + _ <- updateAliasTarget(aliasTarget, indexName) + } yield ReindexResult( + documentType, + result.failed, + result.count, + System.currentTimeMillis() - start + ) + def createIndexWithGeneratedName: Try[String] = createIndexWithName(getNewIndexName()) @@ -279,5 +326,41 @@ trait BaseIndexService { } def getTimestamp: String = new SimpleDateFormat("yyyyMMddHHmmss").format(Calendar.getInstance.getTime) + + def findAllIndexes(indexName: String): Try[Seq[String]] = { + val response = e4sClient.execute { + getAliases() + } + + response match { + case Success(results) => + Success(results.result.mappings.toList.map { case (index, _) => index.name }.filter(_.startsWith(indexName))) + case Failure(ex) => + Failure(ex) + } + } + + /** Executes elasticsearch requests in bulk. Returns success (without executing anything) if supplied with an empty + * list. + * + * @param requests + * a list of elasticsearch [[IndexRequest]]'s + * @return + * A Try suggesting if the request was successful or not with a tuple containing number of successful requests + * and number of failed requests (in that order) + */ + protected def executeRequests(requests: Seq[IndexRequest]): Try[BulkIndexResult] = { + requests match { + case Nil => + Success(BulkIndexResult(0, requests.size)) + case head :: Nil => + e4sClient + .execute(head) + .map(r => if (r.isSuccess) BulkIndexResult(1, requests.size) else BulkIndexResult(0, requests.size)) + case reqs => + e4sClient.execute(bulk(reqs)).map(r => BulkIndexResult(r.result.successes.size, requests.size)) + } + } + } } From 54ac86a043212bba5b89e9991f2f75485bda4502 Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Thu, 5 Dec 2024 12:42:57 +0100 Subject: [PATCH 2/9] search-api: Rename `Title` to `HtmlTitle` to avoid conflict A little bit more descriptive to avoid conflict with a regular title object. --- .../searchapi/model/api/ArticleResult.scala | 2 +- .../searchapi/model/api/AudioResult.scala | 2 +- .../searchapi/model/api/ImageResult.scala | 2 +- .../model/api/LearningpathResult.scala | 2 +- .../model/api/MultiSearchSummary.scala | 2 +- .../no/ndla/searchapi/model/api/Title.scala | 1 - .../searchapi/model/api/TitleWithHtml.scala | 28 ++++++++++++++++ .../model/api/article/ArticleSummary.scala | 25 --------------- .../model/api/draft/DraftSummary.scala | 28 ---------------- .../learningpath/LearningPathSummary.scala | 32 ------------------- .../searchapi/service/ConverterService.scala | 8 ++--- .../search/SearchConverterService.scala | 20 ++++++------ 12 files changed, 48 insertions(+), 104 deletions(-) create mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/api/TitleWithHtml.scala delete mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/api/article/ArticleSummary.scala delete mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/api/draft/DraftSummary.scala delete mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/api/learningpath/LearningPathSummary.scala diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/ArticleResult.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/ArticleResult.scala index 4d0a80639..116f4ae85 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/ArticleResult.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/ArticleResult.scala @@ -13,7 +13,7 @@ import sttp.tapir.Schema.annotations.description @description("Search result for article api") case class ArticleResult( @description("The unique id of this article") id: Long, - @description("The title of the article") title: Title, + @description("The title of the article") title: TitleWithHtml, @description("The introduction of the article") introduction: Option[ArticleIntroduction], @description("The type of the article") articleType: String, @description("List of supported languages") supportedLanguages: Seq[String] diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/AudioResult.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/AudioResult.scala index c77e92c4e..9813a0cd5 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/AudioResult.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/AudioResult.scala @@ -12,7 +12,7 @@ import sttp.tapir.Schema.annotations.description @description("Search result for audio api") case class AudioResult( @description("The unique id of this audio") id: Long, - @description("The title of this audio") title: Title, + @description("The title of this audio") title: TitleWithHtml, @description("A direct link to the audio") url: String, @description("List of supported languages") supportedLanguages: Seq[String] ) diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/ImageResult.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/ImageResult.scala index 35a9f35af..88e3703f3 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/ImageResult.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/ImageResult.scala @@ -12,7 +12,7 @@ import sttp.tapir.Schema.annotations.description @description("Search result for image api") case class ImageResult( @description("The unique id of this image") id: Long, - @description("The title of this image") title: Title, + @description("The title of this image") title: TitleWithHtml, @description("The alt text of this image") altText: ImageAltText, @description("A direct link to the image") previewUrl: String, @description("A link to get meta data related to the image") metaUrl: String, diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/LearningpathResult.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/LearningpathResult.scala index 40ee90d37..8d672746e 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/LearningpathResult.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/LearningpathResult.scala @@ -12,7 +12,7 @@ import sttp.tapir.Schema.annotations.description @description("Search result for learningpath api") case class LearningpathResult( @description("The unique id of this learningpath") id: Long, - @description("The title of the learningpath") title: Title, + @description("The title of the learningpath") title: TitleWithHtml, @description("The introduction of the learningpath") introduction: LearningPathIntroduction, @description("List of supported languages") supportedLanguages: Seq[String] ) diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/MultiSearchSummary.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/MultiSearchSummary.scala index 338ae239a..a9a095e7a 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/MultiSearchSummary.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/MultiSearchSummary.scala @@ -30,7 +30,7 @@ object HighlightedField { @description("Short summary of information about the resource") case class MultiSearchSummary( @description("The unique id of the resource") id: Long, - @description("The title of the resource") title: Title, + @description("The title of the resource") title: TitleWithHtml, @description("The meta description of the resource") metaDescription: MetaDescription, @description("The meta image for the resource") metaImage: Option[MetaImage], @description("Url pointing to the resource") url: String, diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/Title.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/Title.scala index c68f84320..3da3b056c 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/Title.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/Title.scala @@ -15,7 +15,6 @@ import sttp.tapir.Schema.annotations.description @description("Title of resource") case class Title( @description("The freetext title of the resource") title: String, - @description("The freetext html-version title of the article") htmlTitle: String, @description("ISO 639-1 code that represents the language used in title") language: String ) extends LanguageField[String] { override def value: String = title diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/TitleWithHtml.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/TitleWithHtml.scala new file mode 100644 index 000000000..e9a310bab --- /dev/null +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/TitleWithHtml.scala @@ -0,0 +1,28 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2018 NDLA + * + * See LICENSE + */ + +package no.ndla.searchapi.model.api + +import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} +import io.circe.{Decoder, Encoder} +import no.ndla.language.model.LanguageField +import sttp.tapir.Schema.annotations.description + +@description("Title of resource") +case class TitleWithHtml( + @description("The freetext title of the resource") title: String, + @description("The freetext html-version title of the article") htmlTitle: String, + @description("ISO 639-1 code that represents the language used in title") language: String +) extends LanguageField[String] { + override def value: String = title + override def isEmpty: Boolean = title.isEmpty +} + +object TitleWithHtml { + implicit val encoder: Encoder[TitleWithHtml] = deriveEncoder + implicit val decoder: Decoder[TitleWithHtml] = deriveDecoder +} diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/article/ArticleSummary.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/article/ArticleSummary.scala deleted file mode 100644 index a38c24739..000000000 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/article/ArticleSummary.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Part of NDLA search-api - * Copyright (C) 2018 NDLA - * - * See LICENSE - */ - -package no.ndla.searchapi.model.api.article - -import no.ndla.searchapi.model.api.{MetaDescription, Title} -import sttp.tapir.Schema.annotations.description - -@description("Short summary of information about the article") -case class ArticleSummary( - @description("The unique id of the article") id: Long, - @description("The title of the article") title: Title, - @description("A visual element article") visualElement: Option[VisualElement], - @description("An introduction for the article") introduction: Option[ArticleIntroduction], - @description("A metaDescription for the article") metaDescription: Option[MetaDescription], - @description("A meta image for the article") metaImage: Option[ArticleMetaImage], - @description("The full url to where the complete information about the article can be found") url: String, - @description("Describes the license of the article") license: String, - @description("The type of article this is. Possible values are topic-article,standard") articleType: String, - @description("A list of available languages for this article") supportedLanguages: Seq[String] -) diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/draft/DraftSummary.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/draft/DraftSummary.scala deleted file mode 100644 index 172d1e61a..000000000 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/draft/DraftSummary.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Part of NDLA search-api - * Copyright (C) 2018 NDLA - * - * See LICENSE - */ - -package no.ndla.searchapi.model.api.draft - -import no.ndla.common.model.api.draft.Comment -import no.ndla.searchapi.model.api.Title -import no.ndla.searchapi.model.api.article.{ArticleIntroduction, VisualElement} -import sttp.tapir.Schema.annotations.description - -// format: off -@description("Short summary of information about the article") -case class DraftSummary( - @description("The unique id of the article") id: Long, - @description("The title of the article") title: Title, - @description("A visual element article") visualElement: Option[VisualElement], - @description("An introduction for the article") introduction: Option[ArticleIntroduction], - @description("The full url to where the complete information about the article can be found") url: String, - @description("Describes the license of the article") license: String, - @description("The type of article this is. Possible values are topic-article,standard") articleType: String, - @description("A list of available languages for this audio") supportedLanguages: Seq[String], - @description("The notes for this draft article") notes: Seq[String], - @description("Information about comments attached to the article") comments: Seq[Comment] -) diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/learningpath/LearningPathSummary.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/learningpath/LearningPathSummary.scala deleted file mode 100644 index c1b9caca3..000000000 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/learningpath/LearningPathSummary.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Part of NDLA search-api - * Copyright (C) 2018 NDLA - * - * See LICENSE - */ - -package no.ndla.searchapi.model.api.learningpath - -import no.ndla.searchapi.model.api.Title -import sttp.tapir.Schema.annotations.description - -import java.time.LocalDateTime - -@description("Summary of meta information for a learningpath") -case class LearningPathSummary( - @description("The unique id of the learningpath") id: Long, - @description("The titles of the learningpath") title: Title, - @description("The descriptions of the learningpath") description: Description, - @description("The introductions of the learningpath") introduction: Introduction, - @description( - "The full url to where the complete metainformation about the learningpath can be found" - ) metaUrl: String, - @description("Url to where a cover photo can be found") coverPhotoUrl: Option[String], - @description("The duration of the learningpath in minutes") duration: Option[Int], - @description("The publishing status of the learningpath.") status: String, - @description("The date when this learningpath was last updated.") lastUpdated: LocalDateTime, - @description("Searchable tags for the learningpath") tags: LearningPathTags, - @description("The contributors of this learningpath") copyright: Copyright, - @description("A list of available languages for this audio") supportedLanguages: Seq[String], - @description("The id this learningpath is based on, if any") isBasedOn: Option[Long] -) diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/ConverterService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/ConverterService.scala index 893be3c1b..eb6bead07 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/ConverterService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/ConverterService.scala @@ -45,7 +45,7 @@ trait ConverterService { private def articleSearchResultToApi(article: ArticleApiSearchResult): api.ArticleResult = { api.ArticleResult( article.id, - api.Title(article.title.title, article.title.htmlTitle, article.title.language), + api.TitleWithHtml(article.title.title, article.title.htmlTitle, article.title.language), article.introduction.map(i => ArticleIntroduction(i.introduction, i.htmlIntroduction, i.language)), article.articleType, article.supportedLanguages @@ -66,7 +66,7 @@ trait ConverterService { private def learningpathSearchResultToApi(learningpath: LearningpathApiSearchResult): api.LearningpathResult = { api.LearningpathResult( learningpath.id, - api.Title(learningpath.title.title, learningpath.title.title, learningpath.title.language), + api.TitleWithHtml(learningpath.title.title, learningpath.title.title, learningpath.title.language), LearningPathIntroduction(learningpath.introduction.introduction, learningpath.introduction.language), learningpath.supportedLanguages ) @@ -92,7 +92,7 @@ trait ConverterService { api.ImageResult( image.id.toLong, - api.Title(image.title.title, image.title.title, image.title.language), + api.TitleWithHtml(image.title.title, image.title.title, image.title.language), api.ImageAltText(image.altText.alttext, image.altText.language), previewUrl.toString, metaUrl.toString, @@ -118,7 +118,7 @@ trait ConverterService { val url = audio.url.withHost(host).withScheme(scheme).toString api.AudioResult( audio.id, - api.Title(audio.title.title, audio.title.title, audio.title.language), + api.TitleWithHtml(audio.title.title, audio.title.title, audio.title.language), url, audio.supportedLanguages ) diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala index 924c71598..e4a6bffae 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala @@ -591,7 +591,7 @@ trait SearchConverterService { val contexts = filterContexts(searchableArticle.contexts, language, filterInactive) val titles = searchableArticle.domainObject.title.map(title => - api.Title(Jsoup.parseBodyFragment(title.title).body().text(), title.title, title.language) + api.TitleWithHtml(Jsoup.parseBodyFragment(title.title).body().text(), title.title, title.language) ) val introductions = searchableArticle.domainObject.introduction.map(intro => api.article @@ -611,7 +611,7 @@ trait SearchConverterService { }) val title = - findByLanguageOrBestEffort(titles, language).getOrElse(api.Title("", "", UnknownLanguage.toString)) + findByLanguageOrBestEffort(titles, language).getOrElse(api.TitleWithHtml("", "", UnknownLanguage.toString)) val metaDescription = findByLanguageOrBestEffort(metaDescriptions, language).getOrElse( api.MetaDescription("", UnknownLanguage.toString) ) @@ -657,7 +657,7 @@ trait SearchConverterService { val contexts = filterContexts(searchableDraft.contexts, language, filterInactive) val titles = searchableDraft.domainObject.title.map(title => - api.Title(Jsoup.parseBodyFragment(title.title).body().text(), title.title, title.language) + api.TitleWithHtml(Jsoup.parseBodyFragment(title.title).body().text(), title.title, title.language) ) val introductions = searchableDraft.domainObject.introduction.map(intro => api.article @@ -677,7 +677,7 @@ trait SearchConverterService { }) val title = - findByLanguageOrBestEffort(titles, language).getOrElse(api.Title("", "", UnknownLanguage.toString)) + findByLanguageOrBestEffort(titles, language).getOrElse(api.TitleWithHtml("", "", UnknownLanguage.toString)) val metaDescription = findByLanguageOrBestEffort(metaDescriptions, language).getOrElse( api.MetaDescription("", UnknownLanguage.toString) ) @@ -731,7 +731,8 @@ trait SearchConverterService { val searchableLearningPath = CirceUtil.unsafeParseAs[SearchableLearningPath](hit.sourceAsString) val contexts = filterContexts(searchableLearningPath.contexts, language, filterInactive) - val titles = searchableLearningPath.title.languageValues.map(lv => api.Title(lv.value, lv.value, lv.language)) + val titles = + searchableLearningPath.title.languageValues.map(lv => api.TitleWithHtml(lv.value, lv.value, lv.language)) val metaDescriptions = searchableLearningPath.description.languageValues.map(lv => api.MetaDescription(lv.value, lv.language)) val tags = @@ -740,7 +741,7 @@ trait SearchConverterService { val supportedLanguages = getSupportedLanguages(titles, metaDescriptions, tags) val title = - findByLanguageOrBestEffort(titles, language).getOrElse(api.Title("", "", UnknownLanguage.toString)) + findByLanguageOrBestEffort(titles, language).getOrElse(api.TitleWithHtml("", "", UnknownLanguage.toString)) val metaDescription = findByLanguageOrBestEffort(metaDescriptions, language).getOrElse( api.MetaDescription("", UnknownLanguage.toString) ) @@ -788,15 +789,16 @@ trait SearchConverterService { def conceptHitAsMultiSummary(hit: SearchHit, language: String): MultiSearchSummary = { val searchableConcept = CirceUtil.unsafeParseAs[SearchableConcept](hit.sourceAsString) - val titles = searchableConcept.title.languageValues.map(lv => api.Title(lv.value, lv.value, lv.language)) + val titles = searchableConcept.title.languageValues.map(lv => api.TitleWithHtml(lv.value, lv.value, lv.language)) val content = searchableConcept.content.languageValues.map(lv => api.MetaDescription(lv.value, lv.language)) val tags = searchableConcept.tags.languageValues.map(lv => Tag(lv.value, lv.language)) val supportedLanguages = getSupportedLanguages(titles, content, tags) - val title = findByLanguageOrBestEffort(titles, language).getOrElse(api.Title("", "", UnknownLanguage.toString)) - val url = s"${props.ExternalApiUrls("concept-api")}/${searchableConcept.id}" + val title = + findByLanguageOrBestEffort(titles, language).getOrElse(api.TitleWithHtml("", "", UnknownLanguage.toString)) + val url = s"${props.ExternalApiUrls("concept-api")}/${searchableConcept.id}" val metaImages = searchableConcept.domainObject.metaImage.map(image => { val metaImageUrl = s"${props.ExternalApiUrls("raw-image")}/${image.imageId}" api.MetaImage(metaImageUrl, image.altText, image.language) From 1a32da4bc5a2f3f286c53ff2ecd212ed16cbc86c Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Thu, 5 Dec 2024 12:45:16 +0100 Subject: [PATCH 3/9] search-api: Add grep code indexing and search This patch introduces `POST /search-api/v1/search/grep`. A new endpoint that can be used to search all the indexed grep codes by prefix, query and a direct code filter. --- project/searchapi.scala | 3 +- .../no/ndla/searchapi/ComponentRegistry.scala | 6 +- .../ndla/searchapi/SearchApiProperties.scala | 3 + .../controller/InternController.scala | 35 +++- .../controller/SearchController.scala | 19 ++- .../parameters/GrepSearchInput.scala | 45 ++++++ .../searchapi/model/api/grep/GrepResult.scala | 25 +++ .../model/api/grep/GrepSearchResults.scala | 27 ++++ .../searchapi/model/api/grep/GrepSort.scala | 35 ++++ .../model/search/SearchPagination.scala | 15 ++ .../searchapi/model/search/SearchType.scala | 1 + .../model/search/SearchableGrepElement.scala | 24 +++ .../service/search/GrepIndexService.scala | 75 +++++++++ .../service/search/GrepSearchService.scala | 145 +++++++++++++++++ .../service/search/IndexService.scala | 153 +++++++++--------- .../search/MultiDraftSearchService.scala | 114 +++++-------- .../service/search/MultiSearchService.scala | 27 +--- .../search/SearchConverterService.scala | 30 +++- .../service/search/SearchService.scala | 136 ++++++---------- .../no/ndla/searchapi/TestEnvironment.scala | 4 + .../search/GrepSearchServiceTest.scala | 132 +++++++++++++++ .../search/MultiDraftSearchServiceTest.scala | 19 +-- .../search/MultiSearchServiceTest.scala | 19 +-- .../service/search/SearchServiceTest.scala | 1 - .../search/model/domain/BulkIndexResult.scala | 18 +++ .../domain/ElasticIndexingException.scala | 11 ++ .../search/model/domain/ReindexResult.scala | 10 ++ typescript/types-backend/search-api.ts | 26 ++- 28 files changed, 848 insertions(+), 310 deletions(-) create mode 100644 search-api/src/main/scala/no/ndla/searchapi/controller/parameters/GrepSearchInput.scala create mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepResult.scala create mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepSearchResults.scala create mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepSort.scala create mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/search/SearchPagination.scala create mode 100644 search-api/src/main/scala/no/ndla/searchapi/model/search/SearchableGrepElement.scala create mode 100644 search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala create mode 100644 search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala create mode 100644 search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala create mode 100644 search/src/main/scala/no/ndla/search/model/domain/BulkIndexResult.scala create mode 100644 search/src/main/scala/no/ndla/search/model/domain/ElasticIndexingException.scala create mode 100644 search/src/main/scala/no/ndla/search/model/domain/ReindexResult.scala diff --git a/project/searchapi.scala b/project/searchapi.scala index 6148e63d1..ee02b45e6 100644 --- a/project/searchapi.scala +++ b/project/searchapi.scala @@ -42,7 +42,8 @@ object searchapi extends Module { "SearchParams", "DraftSearchParams", "SubjectAggregations", - "SubjectAggsInput" + "SubjectAggsInput", + "GrepSearchInput" ) ) diff --git a/search-api/src/main/scala/no/ndla/searchapi/ComponentRegistry.scala b/search-api/src/main/scala/no/ndla/searchapi/ComponentRegistry.scala index 827099743..a180dda07 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/ComponentRegistry.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/ComponentRegistry.scala @@ -50,10 +50,12 @@ class ComponentRegistry(properties: SearchApiProperties) with FeideApiClient with RedisClient with InternController + with GrepIndexService with SearchApiClient with GrepApiClient with Props - with SwaggerDocControllerConfig { + with SwaggerDocControllerConfig + with GrepSearchService { override val props: SearchApiProperties = properties import props._ @@ -80,6 +82,8 @@ class ComponentRegistry(properties: SearchApiProperties) lazy val learningPathIndexService = new LearningPathIndexService lazy val draftIndexService = new DraftIndexService lazy val multiDraftSearchService = new MultiDraftSearchService + lazy val grepIndexService = new GrepIndexService + lazy val grepSearchService = new GrepSearchService lazy val searchController = new SearchController lazy val healthController: TapirHealthController = new TapirHealthController diff --git a/search-api/src/main/scala/no/ndla/searchapi/SearchApiProperties.scala b/search-api/src/main/scala/no/ndla/searchapi/SearchApiProperties.scala index 87654be3a..ed436eaed 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/SearchApiProperties.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/SearchApiProperties.scala @@ -35,12 +35,14 @@ class SearchApiProperties extends BaseProps with StrictLogging { val draftIndexName = propOrElse("DRAFT_SEARCH_INDEX_NAME", "drafts") val learningpathIndexName = propOrElse("LEARNINGPATH_SEARCH_INDEX_NAME", "learningpaths") val conceptIndexName = propOrElse("DRAFT_CONCEPT_SEARCH_INDEX_NAME", "draftconcepts") + val grepIndexName = propOrElse("GREP_SEARCH_INDEX_NAME", "greps") def SearchIndex(searchType: SearchType) = searchType match { case SearchType.Articles => articleIndexName case SearchType.Drafts => draftIndexName case SearchType.LearningPaths => learningpathIndexName case SearchType.Concepts => conceptIndexName + case SearchType.Grep => grepIndexName } def indexToSearchType(indexName: String): Try[SearchType] = indexName match { @@ -48,6 +50,7 @@ class SearchApiProperties extends BaseProps with StrictLogging { case `draftIndexName` => Success(SearchType.Drafts) case `learningpathIndexName` => Success(SearchType.LearningPaths) case `conceptIndexName` => Success(SearchType.Concepts) + case `grepIndexName` => Success(SearchType.Grep) case _ => Failure(new IllegalArgumentException(s"Unknown index name: $indexName")) } diff --git a/search-api/src/main/scala/no/ndla/searchapi/controller/InternController.scala b/search-api/src/main/scala/no/ndla/searchapi/controller/InternController.scala index d997371a9..2a352294e 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/controller/InternController.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/controller/InternController.scala @@ -21,15 +21,17 @@ import no.ndla.network.model.RequestInfo import no.ndla.network.tapir.NoNullJsonPrinter.jsonBody import no.ndla.network.tapir.{AllErrors, TapirController} import no.ndla.network.tapir.TapirUtil.errorOutputsFor +import no.ndla.search.model.domain.ReindexResult import no.ndla.searchapi.Props import no.ndla.searchapi.integration.{GrepApiClient, TaxonomyApiClient} import no.ndla.searchapi.model.api.ErrorHandling -import no.ndla.searchapi.model.domain.{IndexingBundle, ReindexResult} +import no.ndla.searchapi.model.domain.IndexingBundle import no.ndla.searchapi.model.search.SearchType import no.ndla.searchapi.service.search.{ ArticleIndexService, DraftConceptIndexService, DraftIndexService, + GrepIndexService, IndexService, LearningPathIndexService } @@ -46,7 +48,7 @@ import sttp.tapir.server.ServerEndpoint trait InternController { this: IndexService & ArticleIndexService & LearningPathIndexService & DraftIndexService & DraftConceptIndexService & - TaxonomyApiClient & GrepApiClient & Props & ErrorHandling & MyNDLAApiClient & TapirController => + TaxonomyApiClient & GrepApiClient & GrepIndexService & Props & ErrorHandling & MyNDLAApiClient & TapirController => val internController: InternController class InternController extends TapirController with StrictLogging { @@ -100,6 +102,7 @@ trait InternController { reindexById, reindexArticle, reindexDraft, + reindexGrep, reindexLearningpath, reindexConcept ) @@ -230,6 +233,21 @@ trait InternController { resolveResultFutures(List(articleIndex)) } + def reindexGrep: ServerEndpoint[Any, Eff] = endpoint.post + .in("index" / "grep") + .in(query[Option[Int]]("numShards")) + .errorOut(stringInternalServerError) + .out(stringBody) + .serverLogicPure { numShards => + val requestInfo = RequestInfo.fromThreadContext() + val grepIndex = Future { + requestInfo.setThreadContextRequestInfo() + ("greps", grepIndexService.indexDocuments(numShards, None)) + } + + resolveResultFutures(List(grepIndex)) + } + def reindexLearningpath: ServerEndpoint[Any, Eff] = endpoint.post .in("index" / "learningpath") .in(query[Option[Int]]("numShards")) @@ -256,12 +274,14 @@ trait InternController { draftIndexService.cleanupIndexes(): Unit learningPathIndexService.cleanupIndexes(): Unit draftConceptIndexService.cleanupIndexes(): Unit + grepIndexService.cleanupIndexes(): Unit val articles = articleIndexService.reindexWithShards(numShards) val drafts = draftIndexService.reindexWithShards(numShards) val learningpaths = learningPathIndexService.reindexWithShards(numShards) val concept = draftConceptIndexService.reindexWithShards(numShards) - List(articles, drafts, learningpaths, concept).sequence match { + val greps = grepIndexService.reindexWithShards(numShards) + List(articles, drafts, learningpaths, concept, greps).sequence match { case Success(_) => s"Reindexing with $numShards shards completed in ${System.currentTimeMillis() - startTime}ms".asRight case Failure(ex) => @@ -280,12 +300,14 @@ trait InternController { draftIndexService.cleanupIndexes(): Unit learningPathIndexService.cleanupIndexes(): Unit draftConceptIndexService.cleanupIndexes(): Unit + grepIndexService.cleanupIndexes(): Unit val articles = articleIndexService.updateReplicaNumber(numReplicas) val drafts = draftIndexService.updateReplicaNumber(numReplicas) val learningpaths = learningPathIndexService.updateReplicaNumber(numReplicas) val concepts = draftConceptIndexService.updateReplicaNumber(numReplicas) - List(articles, drafts, learningpaths, concepts).sequence match { + val greps = grepIndexService.updateReplicaNumber(numReplicas) + List(articles, drafts, learningpaths, concepts, greps).sequence match { case Success(_) => s"Updated replication setting for indexes to $numReplicas replicas. Populating may take some time.".asRight case Failure(ex) => @@ -326,6 +348,7 @@ trait InternController { articleIndexService.cleanupIndexes(): Unit draftIndexService.cleanupIndexes(): Unit draftConceptIndexService.cleanupIndexes(): Unit + grepIndexService.cleanupIndexes(): Unit val publishedIndexingBundle = IndexingBundle( grepBundle = Some(grepBundle), @@ -356,6 +379,10 @@ trait InternController { Future { requestInfo.setThreadContextRequestInfo() ("concepts", draftConceptIndexService.indexDocuments(numShards, draftIndexingBundle)) + }, + Future { + requestInfo.setThreadContextRequestInfo() + ("greps", grepIndexService.indexDocuments(numShards, Some(grepBundle))) } ) if (runInBackground) { diff --git a/search-api/src/main/scala/no/ndla/searchapi/controller/SearchController.scala b/search-api/src/main/scala/no/ndla/searchapi/controller/SearchController.scala index fd0769e9a..d9d0937e1 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/controller/SearchController.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/controller/SearchController.scala @@ -21,14 +21,16 @@ import no.ndla.network.tapir.Parameters.feideHeader import no.ndla.network.tapir.{AllErrors, DynamicHeaders, NonEmptyString, TapirController} import no.ndla.network.tapir.TapirUtil.errorOutputsFor import no.ndla.network.tapir.auth.Permission.DRAFT_API_WRITE -import no.ndla.searchapi.controller.parameters.{DraftSearchParams, SearchParams, SubjectAggsInput} +import no.ndla.searchapi.controller.parameters.{DraftSearchParams, GrepSearchInput, SearchParams, SubjectAggsInput} import no.ndla.searchapi.Props import no.ndla.searchapi.integration.SearchApiClient +import no.ndla.searchapi.model.api.grep.GrepSearchResults import no.ndla.searchapi.model.api.{ErrorHandling, GroupSearchResult, MultiSearchResult, SubjectAggregations} import no.ndla.searchapi.model.domain.{LearningResourceType, Sort} import no.ndla.searchapi.model.search.SearchType import no.ndla.searchapi.model.search.settings.{MultiDraftSearchSettings, SearchSettings} import no.ndla.searchapi.service.search.{ + GrepSearchService, MultiDraftSearchService, MultiSearchService, SearchConverterService, @@ -47,7 +49,7 @@ import sttp.tapir.server.ServerEndpoint trait SearchController { this: SearchApiClient & MultiSearchService & SearchConverterService & SearchService & MultiDraftSearchService & - FeideApiClient & Props & ErrorHandling & TapirController => + FeideApiClient & Props & ErrorHandling & TapirController & GrepSearchService => val searchController: SearchController class SearchController extends TapirController { @@ -159,7 +161,8 @@ trait SearchController { searchDraftLearningResources, searchDraftLearningResourcesGet, postSearchLearningResources, - subjectAggs + subjectAggs, + searchGrep ) def subjectAggs: ServerEndpoint[Any, Eff] = endpoint.post @@ -593,6 +596,16 @@ trait SearchController { } } + def searchGrep: ServerEndpoint[Any, Eff] = endpoint.post + .summary("Search for grep codes") + .description("Search for grep codes") + .in("grep") + .in(jsonBody[GrepSearchInput]) + .out(jsonBody[GrepSearchResults]) + .errorOut(errorOutputsFor(400, 401, 403)) + .requirePermission(DRAFT_API_WRITE) + .serverLogicPure { _ => input => grepSearchService.searchGreps(input) } + /** This method fetches availability based on FEIDE access token in the request This does an actual api-call to the * feide api and should be used sparingly. */ diff --git a/search-api/src/main/scala/no/ndla/searchapi/controller/parameters/GrepSearchInput.scala b/search-api/src/main/scala/no/ndla/searchapi/controller/parameters/GrepSearchInput.scala new file mode 100644 index 000000000..fae25ad11 --- /dev/null +++ b/search-api/src/main/scala/no/ndla/searchapi/controller/parameters/GrepSearchInput.scala @@ -0,0 +1,45 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.searchapi.controller.parameters + +import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} +import io.circe.{Decoder, Encoder} +import no.ndla.network.tapir.NonEmptyString +import no.ndla.searchapi.model.api.grep.GrepSort +import sttp.tapir.Schema.annotations.description + +// format: off +@description("Input parameters to subject aggregations endpoint") +case class GrepSearchInput( + @description("A comma separated list of prefixes that should be returned in the search.") + prefixFilter: Option[List[String]], + + @description("A comma separated list of codes that should be returned in the search.") + codes: Option[List[String]], + + @description("A query to filter the query by.") + query: Option[NonEmptyString], + + @description("The page number of the search hits to display.") + page: Option[Int], + + @description(s"The number of search hits to display for each page.") + pageSize: Option[Int], + + @description("The sort order of the search hits.") + sort: Option[GrepSort], + + @description("The ISO 639-1 language code describing language used in query-params") + language: Option[String] +) + +object GrepSearchInput { + implicit val encoder: Encoder[GrepSearchInput] = deriveEncoder + implicit val decoder: Decoder[GrepSearchInput] = deriveDecoder +} diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepResult.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepResult.scala new file mode 100644 index 000000000..2d11ce2a7 --- /dev/null +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepResult.scala @@ -0,0 +1,25 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.searchapi.model.api.grep + +import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} +import io.circe.{Decoder, Encoder} +import no.ndla.searchapi.model.api.Title +import sttp.tapir.Schema.annotations.description + +@description("Information about a single grep search result entry") +case class GrepResult( + @description("The grep code") code: String, + @description("The greps title") title: Title +) + +object GrepResult { + implicit val encoder: Encoder[GrepResult] = deriveEncoder + implicit val decoder: Decoder[GrepResult] = deriveDecoder +} diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepSearchResults.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepSearchResults.scala new file mode 100644 index 000000000..beeff9867 --- /dev/null +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepSearchResults.scala @@ -0,0 +1,27 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.searchapi.model.api.grep + +import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} +import io.circe.{Decoder, Encoder} +import sttp.tapir.Schema.annotations.description + +@description("Information about search-results") +case class GrepSearchResults( + @description("The total number of resources matching this query") totalCount: Long, + @description("For which page results are shown from") page: Int, + @description("The number of results per page") pageSize: Int, + @description("The chosen search language") language: String, + @description("The search results") results: Seq[GrepResult] +) + +object GrepSearchResults { + implicit val encoder: Encoder[GrepSearchResults] = deriveEncoder + implicit val decoder: Decoder[GrepSearchResults] = deriveDecoder +} diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepSort.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepSort.scala new file mode 100644 index 000000000..af58421a9 --- /dev/null +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/grep/GrepSort.scala @@ -0,0 +1,35 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.searchapi.model.api.grep + +import com.scalatsi.TypescriptType.{TSLiteralString, TSUnion} +import com.scalatsi.{TSNamedType, TSType} +import enumeratum.* +import sttp.tapir.Codec.PlainCodec +import sttp.tapir.Schema +import sttp.tapir.codec.enumeratum.* + +sealed abstract class GrepSort(override val entryName: String) extends EnumEntry +object GrepSort extends Enum[GrepSort] with CirceEnum[GrepSort] { + val values: IndexedSeq[GrepSort] = findValues + val all: Seq[String] = values.map(_.entryName) + + case object ByRelevanceDesc extends GrepSort("-relevance") + case object ByRelevanceAsc extends GrepSort("relevance") + case object ByTitleDesc extends GrepSort("-title") + case object ByTitleAsc extends GrepSort("title") + case object ByCodeDesc extends GrepSort("-code") + case object ByCodeAsc extends GrepSort("code") + + implicit val schema: Schema[GrepSort] = schemaForEnumEntry[GrepSort] + implicit val codec: PlainCodec[GrepSort] = plainCodecEnumEntry[GrepSort] + implicit val enumTsType: TSNamedType[GrepSort] = + TSType.alias[GrepSort]("GrepSort", TSUnion(all.map(s => TSLiteralString(s)))) + +} diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchPagination.scala b/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchPagination.scala new file mode 100644 index 000000000..1c5bd7ab8 --- /dev/null +++ b/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchPagination.scala @@ -0,0 +1,15 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.searchapi.model.search + +case class SearchPagination( + page: Int, + pageSize: Int, + startAt: Int +) diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchType.scala b/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchType.scala index dd82ea92e..32cf0b3f4 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchType.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchType.scala @@ -22,6 +22,7 @@ object SearchType extends Enum[SearchType] with CirceEnumWithErrors[SearchType] case object Drafts extends SearchType("draft") case object LearningPaths extends SearchType("learningpath") case object Concepts extends SearchType("concept") + case object Grep extends SearchType("grep") def all: List[String] = SearchType.values.map(_.toString).toList override def values: IndexedSeq[SearchType] = findValues diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchableGrepElement.scala b/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchableGrepElement.scala new file mode 100644 index 000000000..d62c15de2 --- /dev/null +++ b/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchableGrepElement.scala @@ -0,0 +1,24 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.searchapi.model.search + +import io.circe.{Decoder, Encoder} +import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} +import no.ndla.search.model.SearchableLanguageValues + +case class SearchableGrepElement( + code: String, + title: SearchableLanguageValues, + defaultTitle: Option[String] +) + +object SearchableGrepElement { + implicit def encoder: Encoder[SearchableGrepElement] = deriveEncoder + implicit def decoder: Decoder[SearchableGrepElement] = deriveDecoder +} diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala new file mode 100644 index 000000000..ef01ade49 --- /dev/null +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala @@ -0,0 +1,75 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.searchapi.service.search + +import cats.implicits.toTraverseOps +import no.ndla.common.implicits.TryQuestionMark +import com.sksamuel.elastic4s.ElasticDsl.* +import com.sksamuel.elastic4s.requests.indexes.IndexRequest +import com.sksamuel.elastic4s.requests.mappings.MappingDefinition +import com.typesafe.scalalogging.StrictLogging +import no.ndla.common.CirceUtil +import no.ndla.search.model.domain.{BulkIndexResult, ReindexResult} +import no.ndla.searchapi.Props +import no.ndla.searchapi.integration.GrepApiClient +import no.ndla.searchapi.model.grep.{GrepBundle, GrepElement} +import no.ndla.searchapi.model.search.SearchType + +import scala.util.{Success, Try} + +trait GrepIndexService { + this: SearchConverterService & IndexService & Props & GrepApiClient => + val grepIndexService: GrepIndexService + + class GrepIndexService extends BulkIndexingService with StrictLogging { + import props.SearchIndex + override val documentType: String = "grep" + override val searchIndex: String = SearchIndex(SearchType.Grep) + override val MaxResultWindowOption: Int = props.ElasticSearchIndexMaxResultWindow + + override def getMapping: MappingDefinition = { + val fields = List(keywordField("code")) + val dynamics = generateLanguageSupportedDynamicTemplates("title", keepRaw = true) + + properties(fields).dynamicTemplates(dynamics) + } + + def indexDocuments(numShards: Option[Int], grepBundle: Option[GrepBundle]): Try[ReindexResult] = { + indexDocumentsInBulk(numShards) { indexName => + sendToElastic(grepBundle, indexName) + } + } + + def createIndexRequest(grepElement: GrepElement, indexName: String): Try[IndexRequest] = { + val searchable = searchConverterService.asSearchableGrep(grepElement).? + val source = CirceUtil.toJsonString(searchable) + Success(indexInto(indexName).doc(source).id(grepElement.kode)) + } + + private def sendChunkToElastic(chunk: List[GrepElement], indexName: String): Try[BulkIndexResult] = { + chunk + .traverse(grepElement => createIndexRequest(grepElement, indexName)) + .map(executeRequests) + .flatten + } + + def sendToElastic(grepBundle: Option[GrepBundle], indexName: String): Try[BulkIndexResult] = { + val bundle = (grepBundle match { + case Some(value) => Success(value) + case None => grepApiClient.getGrepBundle() + }).? + + bundle.grepContext + .grouped(props.IndexBulkSize) + .toList + .traverse(group => sendChunkToElastic(group, indexName)) + .map(countBulkIndexed) + } + } +} diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala new file mode 100644 index 000000000..d0d31f9ad --- /dev/null +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala @@ -0,0 +1,145 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.searchapi.service.search + +import cats.implicits.* +import com.sksamuel.elastic4s.ElasticDsl.* +import com.sksamuel.elastic4s.RequestSuccess +import com.sksamuel.elastic4s.requests.searches.queries.Query +import com.sksamuel.elastic4s.requests.searches.sort.FieldSort +import com.sksamuel.elastic4s.requests.searches.sort.SortOrder.{Asc, Desc} +import com.sksamuel.elastic4s.requests.searches.{SearchHit, SearchResponse} +import no.ndla.common.CirceUtil +import no.ndla.common.implicits.TryQuestionMark +import no.ndla.language.Language +import no.ndla.language.Language.{AllLanguages, findByLanguageOrBestEffort} +import no.ndla.language.model.Iso639 +import no.ndla.search.model.LanguageValue +import no.ndla.search.{BaseIndexService, Elastic4sClient} +import no.ndla.searchapi.Props +import no.ndla.searchapi.controller.parameters.GrepSearchInput +import no.ndla.searchapi.model.api.Title +import no.ndla.searchapi.model.api.grep.GrepSort.* +import no.ndla.searchapi.model.api.grep.{GrepResult, GrepSearchResults, GrepSort} +import no.ndla.searchapi.model.search.{SearchType, SearchableGrepElement} + +import scala.util.{Success, Try} + +trait GrepSearchService { + this: Props & SearchService & GrepIndexService & BaseIndexService & Elastic4sClient => + val grepSearchService: GrepSearchService + + class GrepSearchService extends SearchService { + import props.SearchIndex + override val searchIndex: List[String] = List(SearchType.Grep).map(SearchIndex) + override val indexServices: List[BaseIndexService] = List(grepIndexService) + + def grepSortDefinition(maybeSort: Option[GrepSort], language: String): FieldSort = maybeSort match { + case Some(ByRelevanceAsc) => sortField("_score", Asc, missingLast = false) + case Some(ByRelevanceDesc) | None => sortField("_score", Desc, missingLast = false) + case Some(ByTitleAsc) => defaultSort("defaultTitle", "title", Asc, language) + case Some(ByTitleDesc) => defaultSort("defaultTitle", "title", Desc, language) + case Some(ByCodeAsc) => sortField("code", Asc, missingLast = false) + case Some(ByCodeDesc) => sortField("code", Desc, missingLast = false) + } + + protected def buildQuery(input: GrepSearchInput, searchLanguage: String): Query = { + val query = input.query + .map { q => + val langQueryFunc = (fieldName: String, boost: Double) => + buildSimpleStringQueryForField( + q, + fieldName, + boost, + searchLanguage, + fallback = true, + searchDecompounded = true + ) + boolQuery() + .should( + langQueryFunc("title", 6), + idsQuery(q.underlying).boost(100) + ) + } + .getOrElse(boolQuery()) + query.filter( + Seq( + idsFilter(input), + prefixFilter(input) + ).flatten + ) + } + + def idsFilter(input: GrepSearchInput): Option[Query] = input.codes match { + case Some(ids) if ids.nonEmpty => idsQuery(ids).some + case _ => None + } + + def prefixFilter(input: GrepSearchInput): Option[Query] = input.prefixFilter match { + case Some(prefixes) if prefixes.nonEmpty => + Some( + boolQuery().should( + prefixes.map(prefix => prefixQuery("code", prefix)) + ) + ) + case _ => None + } + + def searchGreps(input: GrepSearchInput): Try[GrepSearchResults] = { + val searchLanguage = input.language match { + case Some(lang) if Iso639.get(lang).isSuccess => lang + case _ => AllLanguages + } + val searchPage = input.page.getOrElse(1) + val searchPageSize = input.pageSize.getOrElse(10) + val pagination = getStartAtAndNumResults(page = searchPage, pageSize = searchPageSize).? + + val sort = grepSortDefinition(input.sort, searchLanguage) + val filteredQuery = buildQuery(input, searchLanguage) + + val searchToExecute = search(searchIndex) + .query(filteredQuery) + .from(pagination.startAt) + .size(pagination.pageSize) + .trackTotalHits(true) + .sortBy(sort) + + e4sClient.execute(searchToExecute).flatMap { response => + getGrepHits(response, searchLanguage).map { results => + GrepSearchResults( + totalCount = response.result.totalHits, + page = pagination.page, + pageSize = searchPageSize, + language = searchLanguage, + results = results + ) + } + } + } + + def hitToResult(hit: SearchHit, language: String): Try[GrepResult] = { + val jsonString = hit.sourceAsString + val searchable = CirceUtil.tryParseAs[SearchableGrepElement](jsonString).? + val titleLv = findByLanguageOrBestEffort(searchable.title.languageValues, language) + .getOrElse(LanguageValue(Language.DefaultLanguage, "")) + val title = Title(title = titleLv.value, language = titleLv.language) + + Success( + GrepResult( + code = searchable.code, + title = title + ) + ) + } + + def getGrepHits(response: RequestSuccess[SearchResponse], language: String): Try[List[GrepResult]] = { + response.result.hits.hits.toList.traverse { hit => hitToResult(hit, language) } + } + } +} diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala index ce70b982e..cf79b5c6a 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala @@ -30,7 +30,83 @@ trait IndexService { this: Elastic4sClient & SearchApiClient & BaseIndexService & TaxonomyApiClient & GrepApiClient & Props & MyNDLAApiClient => - trait IndexService[D <: Content] extends BaseIndexService with StrictLogging { + trait BulkIndexingService extends BaseIndexService { + + /** Returns Sequence of DynamicTemplateRequest for a given field. + * + * @param fieldName + * Name of field in mapping. + * @param keepRaw + * Whether to add a keywordField named raw. Usually used for sorting, aggregations or scripts. + * @return + * Sequence of DynamicTemplateRequest for a field. + */ + protected def generateLanguageSupportedDynamicTemplates( + fieldName: String, + keepRaw: Boolean = false + ): Seq[DynamicTemplateRequest] = { + val dynamicFunc = (name: String, analyzer: String, subFields: List[ElasticField]) => { + val field = textField(name).analyzer(analyzer).fields(subFields) + DynamicTemplateRequest( + name = name, + mapping = field, + matchMappingType = Some("string"), + pathMatch = Some(name) + ) + } + + val sf = List( + textField("trigram").analyzer("trigram"), + textField("decompounded").searchAnalyzer("standard").analyzer("compound_analyzer"), + textField("exact").analyzer("exact") + ) + val subFields = if (keepRaw) sf :+ keywordField("raw") else sf + + val languageTemplates = SearchLanguage.languageAnalyzers.map(languageAnalyzer => { + val name = s"$fieldName.${languageAnalyzer.languageTag.toString()}" + dynamicFunc(name, languageAnalyzer.analyzer, subFields) + }) + val languageSubTemplates = SearchLanguage.languageAnalyzers.map(languageAnalyzer => { + val name = s"*.$fieldName.${languageAnalyzer.languageTag.toString()}" + dynamicFunc(name, languageAnalyzer.analyzer, subFields) + }) + val catchAllTemplate = dynamicFunc(s"$fieldName.*", "standard", subFields) + val catchAllSubTemplate = dynamicFunc(s"*.$fieldName.*", "standard", subFields) + languageTemplates ++ languageSubTemplates ++ Seq(catchAllTemplate, catchAllSubTemplate) + } + + private val hyphDecompounderTokenFilter: CompoundWordTokenFilter = CompoundWordTokenFilter( + name = "hyphenation_decompounder", + `type` = HyphenationDecompounder, + wordListPath = Some("compound-words-norwegian-wordlist.txt"), + hyphenationPatternsPath = Some("hyph/no.xml"), + minSubwordSize = Some(4), + onlyLongestMatch = Some(false) + ) + + private val customCompoundAnalyzer = + CustomAnalyzer( + "compound_analyzer", + "whitespace", + tokenFilters = List(hyphDecompounderTokenFilter.name) + ) + + private val customExactAnalyzer = CustomAnalyzer("exact", "whitespace") + + val shingle: ShingleTokenFilter = + ShingleTokenFilter(name = "shingle", minShingleSize = Some(2), maxShingleSize = Some(3)) + + val trigram: CustomAnalyzer = + CustomAnalyzer(name = "trigram", tokenizer = "standard", tokenFilters = List("lowercase", "shingle")) + + override val analysis: Analysis = + Analysis( + analyzers = List(trigram, customExactAnalyzer, customCompoundAnalyzer, NynorskLanguageAnalyzer), + tokenFilters = List(hyphDecompounderTokenFilter) ++ SearchLanguage.NynorskTokenFilters + ) + } + + trait IndexService[D <: Content] extends BulkIndexingService with StrictLogging { val apiClient: SearchApiClient override val MaxResultWindowOption: Int = props.ElasticSearchIndexMaxResultWindow @@ -180,36 +256,6 @@ trait IndexService { } } - private val hyphDecompounderTokenFilter: CompoundWordTokenFilter = CompoundWordTokenFilter( - name = "hyphenation_decompounder", - `type` = HyphenationDecompounder, - wordListPath = Some("compound-words-norwegian-wordlist.txt"), - hyphenationPatternsPath = Some("hyph/no.xml"), - minSubwordSize = Some(4), - onlyLongestMatch = Some(false) - ) - - private val customCompoundAnalyzer = - CustomAnalyzer( - "compound_analyzer", - "whitespace", - tokenFilters = List(hyphDecompounderTokenFilter.name) - ) - - private val customExactAnalyzer = CustomAnalyzer("exact", "whitespace") - - val shingle: ShingleTokenFilter = - ShingleTokenFilter(name = "shingle", minShingleSize = Some(2), maxShingleSize = Some(3)) - - val trigram: CustomAnalyzer = - CustomAnalyzer(name = "trigram", tokenizer = "standard", tokenFilters = List("lowercase", "shingle")) - - override val analysis: Analysis = - Analysis( - analyzers = List(trigram, customExactAnalyzer, customCompoundAnalyzer, NynorskLanguageAnalyzer), - tokenFilters = List(hyphDecompounderTokenFilter) ++ SearchLanguage.NynorskTokenFilters - ) - /** Returns Sequence of FieldDefinitions for a given field. * * @param fieldName @@ -241,51 +287,6 @@ trait IndexService { }) } - /** Returns Sequence of DynamicTemplateRequest for a given field. - * - * @param fieldName - * Name of field in mapping. - * @param keepRaw - * Whether to add a keywordField named raw. Usually used for sorting, aggregations or scripts. - * @return - * Sequence of DynamicTemplateRequest for a field. - */ - protected def generateLanguageSupportedDynamicTemplates( - fieldName: String, - keepRaw: Boolean = false - ): Seq[DynamicTemplateRequest] = { - val dynamicFunc = (name: String, analyzer: String, subFields: List[ElasticField]) => { - DynamicTemplateRequest( - name = name, - mapping = textField(name).analyzer(analyzer).fields(subFields), - matchMappingType = Some("string"), - pathMatch = Some(name) - ) - } - - val sf = List( - textField("trigram").analyzer("trigram"), - textField("decompounded") - .searchAnalyzer("standard") - .analyzer("compound_analyzer"), - textField("exact") - .analyzer("exact") - ) - val subFields = if (keepRaw) sf :+ keywordField("raw") else sf - - val languageTemplates = SearchLanguage.languageAnalyzers.map(languageAnalyzer => { - val name = s"$fieldName.${languageAnalyzer.languageTag.toString()}" - dynamicFunc(name, languageAnalyzer.analyzer, subFields) - }) - val languageSubTemplates = SearchLanguage.languageAnalyzers.map(languageAnalyzer => { - val name = s"*.$fieldName.${languageAnalyzer.languageTag.toString()}" - dynamicFunc(name, languageAnalyzer.analyzer, subFields) - }) - val catchAllTemplate = dynamicFunc(s"$fieldName.*", "standard", subFields) - val catchAllSubTemplate = dynamicFunc(s"*.$fieldName.*", "standard", subFields) - languageTemplates ++ languageSubTemplates ++ Seq(catchAllTemplate, catchAllSubTemplate) - } - protected def getTaxonomyContextMapping: NestedField = { nestedField("contexts").fields( keywordField("publicId"), diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiDraftSearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiDraftSearchService.scala index d9f5c2548..31689a9a9 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiDraftSearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiDraftSearchService.scala @@ -16,11 +16,10 @@ import com.typesafe.scalalogging.StrictLogging import no.ndla.common.errors.{ValidationException, ValidationMessage} import no.ndla.common.implicits.TryQuestionMark import no.ndla.common.model.NDLADate -import no.ndla.common.model.domain.Priority +import no.ndla.common.model.domain.{Content, Priority} import no.ndla.common.model.domain.draft.DraftStatus import no.ndla.language.Language.AllLanguages import no.ndla.language.model.Iso639 -import no.ndla.network.model.RequestInfo import no.ndla.search.AggregationBuilder.{buildTermsAggregation, getAggregationsFromResult} import no.ndla.search.Elastic4sClient import no.ndla.searchapi.Props @@ -29,10 +28,7 @@ import no.ndla.searchapi.model.domain.{LearningResourceType, SearchResult} import no.ndla.searchapi.model.search.SearchType import no.ndla.searchapi.model.search.settings.MultiDraftSearchSettings -import java.util.concurrent.Executors -import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future} import scala.util.{Failure, Success, Try} -import no.ndla.common.model.domain.Content trait MultiDraftSearchService { this: Elastic4sClient & SearchConverterService & IndexService & SearchService & DraftIndexService & @@ -40,7 +36,7 @@ trait MultiDraftSearchService { val multiDraftSearchService: MultiDraftSearchService class MultiDraftSearchService extends StrictLogging with SearchService with TaxonomyFiltering { - import props.{ElasticSearchIndexMaxResultWindow, ElasticSearchScrollKeepAlive, SearchIndex} + import props.{ElasticSearchScrollKeepAlive, SearchIndex} override val searchIndex: List[String] = List( SearchType.Drafts, SearchType.LearningPaths, @@ -216,52 +212,41 @@ trait MultiDraftSearchService { case _ => AllLanguages } val filteredSearch = baseQuery.filter(getSearchFilters(settings)) - - val (startAt, numResults) = getStartAtAndNumResults(settings.page, settings.pageSize) - val requestedResultWindow = settings.pageSize * settings.page - if (requestedResultWindow > ElasticSearchIndexMaxResultWindow) { - logger.info( - s"Max supported results are $ElasticSearchIndexMaxResultWindow, user requested $requestedResultWindow" - ) - Failure(ResultWindowTooLargeException()) - } else { - - val aggregations = buildTermsAggregation(settings.aggregatePaths, indexServices.map(_.getMapping)) - - val index = getSearchIndexes(settings).? - val searchToExecute = search(index) - .query(filteredSearch) - .suggestions(suggestions(settings.query.underlying, searchLanguage, settings.fallback)) - .trackTotalHits(true) - .from(startAt) - .size(numResults) - .highlighting(highlight("*")) - .aggs(aggregations) - .sortBy(getSortDefinition(settings.sort, searchLanguage)) - - // Only add scroll param if it is first page - val searchWithScroll = - if (startAt == 0 && settings.shouldScroll) { - searchToExecute.scroll(ElasticSearchScrollKeepAlive) - } else { searchToExecute } - - e4sClient.execute(searchWithScroll) match { - case Success(response) => - getHits(response.result, settings.language, settings.filterInactive).map(hits => { - SearchResult( - totalCount = response.result.totalHits, - page = Some(settings.page), - pageSize = numResults, - language = searchLanguage, - results = hits, - suggestions = getSuggestions(response.result), - aggregations = getAggregationsFromResult(response.result), - scrollId = response.result.scrollId - ) - }) - - case Failure(ex) => Failure(ex) - } + val pagination = getStartAtAndNumResults(settings.page, settings.pageSize).? + val aggregations = buildTermsAggregation(settings.aggregatePaths, indexServices.map(_.getMapping)) + val index = getSearchIndexes(settings).? + val searchToExecute = search(index) + .query(filteredSearch) + .suggestions(suggestions(settings.query.underlying, searchLanguage, settings.fallback)) + .trackTotalHits(true) + .from(pagination.startAt) + .size(pagination.pageSize) + .highlighting(highlight("*")) + .aggs(aggregations) + .sortBy(getSortDefinition(settings.sort, searchLanguage)) + + // Only add scroll param if it is first page + val searchWithScroll = + if (pagination.startAt == 0 && settings.shouldScroll) { + searchToExecute.scroll(ElasticSearchScrollKeepAlive) + } else { searchToExecute } + + e4sClient.execute(searchWithScroll) match { + case Success(response) => + getHits(response.result, settings.language, settings.filterInactive).map(hits => { + SearchResult( + totalCount = response.result.totalHits, + page = Some(settings.page), + pageSize = pagination.pageSize, + language = searchLanguage, + results = hits, + suggestions = getSuggestions(response.result), + aggregations = getAggregationsFromResult(response.result), + scrollId = response.result.scrollId + ) + }) + + case Failure(ex) => Failure(ex) } } @@ -402,31 +387,6 @@ trait MultiDraftSearchService { Some( boolQuery().should(users.map(simpleStringQuery(_).field("users", 1))) ) - - override def scheduleIndexDocuments(): Unit = { - val threadPoolSize = if (searchIndex.nonEmpty) searchIndex.size else 1 - implicit val ec: ExecutionContextExecutor = - ExecutionContext.fromExecutor(Executors.newFixedThreadPool(threadPoolSize)) - val requestInfo = RequestInfo.fromThreadContext() - - val draftFuture = Future { - requestInfo.setThreadContextRequestInfo() - draftIndexService.indexDocuments(shouldUsePublishedTax = false) - } - val learningPathFuture = Future { - requestInfo.setThreadContextRequestInfo() - learningPathIndexService.indexDocuments(shouldUsePublishedTax = true) - } - - val conceptFuture = Future { - requestInfo.setThreadContextRequestInfo() - draftConceptIndexService.indexDocuments(shouldUsePublishedTax = true) - } - - handleScheduledIndexResults(SearchIndex(SearchType.Drafts), draftFuture) - handleScheduledIndexResults(SearchIndex(SearchType.LearningPaths), learningPathFuture) - handleScheduledIndexResults(SearchIndex(SearchType.Concepts), conceptFuture) - } } } diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiSearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiSearchService.scala index 67a835d9d..14fc10ed1 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiSearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/MultiSearchService.scala @@ -11,10 +11,9 @@ import com.sksamuel.elastic4s.ElasticDsl.* import com.sksamuel.elastic4s.requests.searches.queries.Query import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery import com.typesafe.scalalogging.StrictLogging -import no.ndla.common.model.domain.Availability +import no.ndla.common.model.domain.{Availability, Content} import no.ndla.language.Language.AllLanguages import no.ndla.language.model.Iso639 -import no.ndla.network.model.RequestInfo import no.ndla.search.AggregationBuilder.{buildTermsAggregation, getAggregationsFromResult} import no.ndla.search.Elastic4sClient import no.ndla.searchapi.Props @@ -23,10 +22,7 @@ import no.ndla.searchapi.model.domain.SearchResult import no.ndla.searchapi.model.search.SearchType import no.ndla.searchapi.model.search.settings.SearchSettings -import java.util.concurrent.Executors -import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future} import scala.util.{Failure, Success, Try} -import no.ndla.common.model.domain.Content trait MultiSearchService { this: Elastic4sClient & SearchConverterService & SearchService & IndexService & ArticleIndexService & @@ -35,7 +31,7 @@ trait MultiSearchService { val multiSearchService: MultiSearchService class MultiSearchService extends StrictLogging with SearchService with TaxonomyFiltering { - import props.{ElasticSearchIndexMaxResultWindow, ElasticSearchScrollKeepAlive, SearchIndex} + import props.{ElasticSearchScrollKeepAlive, SearchIndex} override val searchIndex: List[String] = List(SearchType.Articles, SearchType.LearningPaths).map(SearchIndex) override val indexServices: List[IndexService[? <: Content]] = List(articleIndexService, learningPathIndexService) @@ -189,25 +185,6 @@ trait MultiSearchService { availabilityFilter ).flatten } - - override def scheduleIndexDocuments(): Unit = { - val threadPoolSize = if (searchIndex.nonEmpty) searchIndex.size else 1 - implicit val ec: ExecutionContextExecutor = - ExecutionContext.fromExecutor(Executors.newFixedThreadPool(threadPoolSize)) - val requestInfo = RequestInfo.fromThreadContext() - - val articleFuture = Future { - requestInfo.setThreadContextRequestInfo() - articleIndexService.indexDocuments(shouldUsePublishedTax = true) - } - val learningPathFuture = Future { - requestInfo.setThreadContextRequestInfo() - learningPathIndexService.indexDocuments(shouldUsePublishedTax = true) - } - - handleScheduledIndexResults(SearchIndex(SearchType.Articles), articleFuture) - handleScheduledIndexResults(SearchIndex(SearchType.LearningPaths), learningPathFuture) - } } } diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala index e4a6bffae..bfe0453a8 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchConverterService.scala @@ -13,8 +13,8 @@ import com.typesafe.scalalogging.StrictLogging import no.ndla.common.CirceUtil import no.ndla.common.configuration.Constants.EmbedTagName import no.ndla.common.implicits.* -import no.ndla.common.model.api.{Author, License} import no.ndla.common.model.api.draft.Comment +import no.ndla.common.model.api.{Author, License} import no.ndla.common.model.domain.article.Article import no.ndla.common.model.domain.concept.Concept import no.ndla.common.model.domain.draft.{Draft, RevisionStatus} @@ -35,7 +35,7 @@ import no.ndla.mapping.License.getLicense import no.ndla.network.clients.MyNDLAApiClient import no.ndla.search.AggregationBuilder.toApiMultiTermsAggregation import no.ndla.search.SearchConverter.getEmbedValues -import no.ndla.search.model.domain.{ElasticIndexingException, EmbedValues} +import no.ndla.search.model.domain.EmbedValues import no.ndla.search.model.{LanguageValue, SearchableLanguageList, SearchableLanguageValues} import no.ndla.search.{SearchLanguage, model} import no.ndla.searchapi.Props @@ -53,7 +53,7 @@ import org.jsoup.nodes.Entities.EscapeMode import scala.collection.mutable.ListBuffer import scala.jdk.CollectionConverters.* -import scala.util.{Failure, Success, Try} +import scala.util.{Success, Try} trait SearchConverterService { this: DraftApiClient & TaxonomyApiClient & ConverterService & Props & MyNDLAApiClient => @@ -249,6 +249,30 @@ trait SearchConverterService { } + def asSearchableGrep(grepElement: GrepElement): Try[SearchableGrepElement] = { + val defaultTitle = grepElement.tittel.find(_.spraak == "default") + val titles = grepElement.tittel.flatMap(gt => { + ISO639.get6391CodeFor6392Code(gt.spraak) match { + case Some(convertedLanguage) => + Some(LanguageValue(language = convertedLanguage, value = gt.verdi.trim)) + case None if gt.spraak == "default" => None + case None => + logger.warn(s"Could not convert language code '${gt.spraak}' for grep code '${grepElement.kode}'") + None + } + }) + + val title = SearchableLanguageValues.fromFields(titles.distinctBy(_.language)) + + Success( + SearchableGrepElement( + code = grepElement.kode, + title = title, + defaultTitle = defaultTitle.map(_.verdi) + ) + ) + } + def asSearchableLearningPath(lp: LearningPath, indexingBundle: IndexingBundle): Try[SearchableLearningPath] = { val taxonomyContexts = indexingBundle.taxonomyBundle match { case Some(bundle) => diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchService.scala index 28d55cfdb..e78e1381a 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/SearchService.scala @@ -21,24 +21,30 @@ import no.ndla.language.Language import no.ndla.language.model.Iso639 import no.ndla.network.tapir.NonEmptyString import no.ndla.search.AggregationBuilder.getAggregationsFromResult -import no.ndla.search.{Elastic4sClient, IndexNotFoundException, NdlaSearchException, SearchLanguage} +import no.ndla.search.{BaseIndexService, Elastic4sClient, NdlaSearchException, SearchLanguage} import no.ndla.searchapi.Props -import no.ndla.searchapi.model.api.{MultiSearchSuggestion, MultiSearchSummary, SearchSuggestion, SuggestOption} +import no.ndla.searchapi.model.api.{ + ErrorHandling, + MultiSearchSuggestion, + MultiSearchSummary, + SearchSuggestion, + SuggestOption +} import no.ndla.searchapi.model.domain.Sort.* import no.ndla.searchapi.model.domain.* -import no.ndla.searchapi.model.search.SearchType +import no.ndla.searchapi.model.search.{SearchPagination, SearchType} import java.lang.Math.max -import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success, Try} trait SearchService { - this: Elastic4sClient & IndexService & SearchConverterService & StrictLogging & Props => + this: Elastic4sClient & IndexService & SearchConverterService & StrictLogging & Props & BaseIndexService & + ErrorHandling => trait SearchService { import props.{DefaultLanguage, ElasticSearchScrollKeepAlive, MaxPageSize} val searchIndex: List[String] - val indexServices: List[IndexService[?]] + val indexServices: List[BaseIndexService] /** Returns hit as summary * @@ -49,7 +55,7 @@ trait SearchService { * @return * api-model summary of hit */ - private def hitToApiModel(hit: SearchHit, language: String, filterInactive: Boolean) = { + private def hitToApiModel(hit: SearchHit, language: String, filterInactive: Boolean): Try[MultiSearchSummary] = { val indexName = hit.index.split("_").headOption.traverse(x => props.indexToSearchType(x)) indexName.flatMap { case Some(SearchType.Articles) => @@ -60,6 +66,8 @@ trait SearchService { Success(searchConverterService.learningpathHitAsMultiSummary(hit, language, filterInactive)) case Some(SearchType.Concepts) => Success(searchConverterService.conceptHitAsMultiSummary(hit, language)) + case Some(SearchType.Grep) => + Failure(NdlaSearchException("Got hit from grep index (SearchType.Grep) in `hitToApiModel`. This is a bug.")) case None => Failure(NdlaSearchException("Index type was bad when determining search result type.")) } @@ -222,18 +230,18 @@ trait SearchService { }) } - private def sortField(field: String, order: SortOrder, missingLast: Boolean = true): FieldSort = { + protected def sortField(field: String, order: SortOrder, missingLast: Boolean = true): FieldSort = { val sortDefinition = fieldSort(field).sortOrder(order) if (missingLast) sortDefinition.missing("_last") else sortDefinition } - def getSortDefinition(sort: Sort, language: String): FieldSort = { + protected def defaultSort(default: String, withLanguage: String, order: SortOrder, language: String): FieldSort = { val sortLanguage = language match { case Language.NoLanguage => DefaultLanguage case _ => language } - def defaultSort(default: String, withLanguage: String, order: SortOrder): FieldSort = sortLanguage match { + sortLanguage match { case Language.AllLanguages => fieldSort(default) .sortOrder(order) @@ -244,92 +252,44 @@ trait SearchService { .missing("_last") .unmappedType("long") } - - sort match { - case ByTitleAsc => defaultSort("defaultTitle", "title", Asc) - case ByTitleDesc => defaultSort("defaultTitle", "title", Desc) - case ByPrimaryRootAsc => defaultSort("defaultRoot", "primaryRoot", Asc) - case ByPrimaryRootDesc => defaultSort("defaultRoot", "primaryRoot", Desc) - case ByParentTopicNameAsc => defaultSort("defaultParentTopicName", "parentTopicName", Asc) - case ByParentTopicNameDesc => defaultSort("defaultParentTopicName", "parentTopicName", Desc) - case ByResourceTypeAsc => defaultSort("defaultResourceTypeName", "resourceTypeName", Asc) - case ByResourceTypeDesc => defaultSort("defaultResourceTypeName", "resourceTypeName", Desc) - case ByDurationAsc => sortField("duration", Asc) - case ByDurationDesc => sortField("duration", Desc) - case ByStatusAsc => sortField("draftStatus.current", Asc) - case ByStatusDesc => sortField("draftStatus.current", Desc) - case ByRelevanceAsc => sortField("_score", Asc, missingLast = false) - case ByRelevanceDesc => sortField("_score", Desc, missingLast = false) - case ByLastUpdatedAsc => sortField("lastUpdated", Asc) - case ByLastUpdatedDesc => sortField("lastUpdated", Desc) - case ByIdAsc => sortField("id", Asc) - case ByIdDesc => sortField("id", Desc) - case ByRevisionDateAsc => sortField("nextRevision.revisionDate", Asc) - case ByRevisionDateDesc => sortField("nextRevision.revisionDate", Desc) - case ByResponsibleLastUpdatedAsc => sortField("responsible.lastUpdated", Asc) - case ByResponsibleLastUpdatedDesc => sortField("responsible.lastUpdated", Desc) - case ByPrioritizedAsc => sortField("prioritized", Asc) - case ByPrioritizedDesc => sortField("prioritized", Desc) - case ByPublishedAsc => sortField("published", Asc) - case ByPublishedDesc => sortField("published", Desc) - case ByFavoritedAsc => sortField("favorited", Asc) - case ByFavoritedDesc => sortField("favorited", Desc) - } - } - - def getStartAtAndNumResults(page: Int, pageSize: Int): (Int, Int) = { - val numResults = max(pageSize.min(MaxPageSize), 0) - val startAt = (page - 1).max(0) * numResults - - (startAt, numResults) } - protected def scheduleIndexDocuments(): Unit - - /** Takes care of logging reindexResults, used in subclasses overriding [[scheduleIndexDocuments]] - * - * @param indexName - * Name of index to use for logging - * @param reindexFuture - * Reindexing future to handle - * @param executor - * Execution context for the future - */ - protected def handleScheduledIndexResults(indexName: String, reindexFuture: Future[Try[ReindexResult]])(implicit - executor: ExecutionContext - ): Unit = { - reindexFuture.onComplete { - case Success(Success(reindexResult: ReindexResult)) => - logger.info( - s"Completed indexing of ${reindexResult.totalIndexed} $indexName in ${reindexResult.millisUsed} ms." - ) - case Success(Failure(ex)) => logger.warn(ex.getMessage, ex) - case Failure(ex) => logger.warn(s"Unable to create index '$indexName': " + ex.getMessage, ex) - } + def getSortDefinition(sort: Sort, language: String): FieldSort = sort match { + case ByTitleAsc => defaultSort("defaultTitle", "title", Asc, language) + case ByTitleDesc => defaultSort("defaultTitle", "title", Desc, language) + case ByPrimaryRootAsc => defaultSort("defaultRoot", "primaryRoot", Asc, language) + case ByPrimaryRootDesc => defaultSort("defaultRoot", "primaryRoot", Desc, language) + case ByParentTopicNameAsc => defaultSort("defaultParentTopicName", "parentTopicName", Asc, language) + case ByParentTopicNameDesc => defaultSort("defaultParentTopicName", "parentTopicName", Desc, language) + case ByResourceTypeAsc => defaultSort("defaultResourceTypeName", "resourceTypeName", Asc, language) + case ByResourceTypeDesc => defaultSort("defaultResourceTypeName", "resourceTypeName", Desc, language) + case ByDurationAsc => sortField("duration", Asc) + case ByDurationDesc => sortField("duration", Desc) + case ByStatusAsc => sortField("draftStatus.current", Asc) + case ByStatusDesc => sortField("draftStatus.current", Desc) + case ByRelevanceAsc => sortField("_score", Asc, missingLast = false) + case ByRelevanceDesc => sortField("_score", Desc, missingLast = false) + case ByLastUpdatedAsc => sortField("lastUpdated", Asc) + case ByLastUpdatedDesc => sortField("lastUpdated", Desc) + case ByIdAsc => sortField("id", Asc) + case ByIdDesc => sortField("id", Desc) + case ByRevisionDateAsc => sortField("nextRevision.revisionDate", Asc) + case ByRevisionDateDesc => sortField("nextRevision.revisionDate", Desc) + case ByResponsibleLastUpdatedAsc => sortField("responsible.lastUpdated", Asc) + case ByResponsibleLastUpdatedDesc => sortField("responsible.lastUpdated", Desc) + case ByPrioritizedAsc => sortField("prioritized", Asc) + case ByPrioritizedDesc => sortField("prioritized", Desc) + case ByPublishedAsc => sortField("published", Asc) + case ByPublishedDesc => sortField("published", Desc) + case ByFavoritedAsc => sortField("favorited", Asc) + case ByFavoritedDesc => sortField("favorited", Desc) } - protected def errorHandler[U](failure: Throwable): Failure[U] = { - failure match { - case e: NdlaSearchException[?] => - e.rf.map(_.status).getOrElse(0) match { - case notFound: Int if notFound == 404 => - val msg = s"Index ${e.rf.flatMap(_.error.index).getOrElse("")} not found. Scheduling a reindex." - logger.error(msg) - scheduleIndexDocuments() - Failure(IndexNotFoundException(msg)) - case _ => - logger.error(e.getMessage) - Failure( - NdlaSearchException(s"Unable to execute search in ${e.rf.flatMap(_.error.index).getOrElse("")}", e) - ) - } - case t: Throwable => Failure(t) - } private val maxResultWindow = props.ElasticSearchIndexMaxResultWindow def getStartAtAndNumResults(page: Int, pageSize: Int): Try[SearchPagination] = { val safePageSize = max(pageSize.min(MaxPageSize), 0) val safePage = page.max(1) - val startAt = page - 1 + val startAt = (safePage - 1) * safePageSize val resultWindow = (startAt + 1) * safePageSize if (resultWindow > maxResultWindow) { logger.info(s"Max supported results are $maxResultWindow, user requested $resultWindow") diff --git a/search-api/src/test/scala/no/ndla/searchapi/TestEnvironment.scala b/search-api/src/test/scala/no/ndla/searchapi/TestEnvironment.scala index 3a8652b08..70e8f265f 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/TestEnvironment.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/TestEnvironment.scala @@ -46,8 +46,10 @@ trait TestEnvironment with MyNDLAApiClient with SearchService with SearchController + with GrepSearchService with LearningPathIndexService with InternController + with GrepIndexService with SearchApiClient with ErrorHandling with Clock @@ -78,11 +80,13 @@ trait TestEnvironment val converterService: ConverterService = mock[ConverterService] val searchConverterService: SearchConverterService = mock[SearchConverterService] val multiSearchService: MultiSearchService = mock[MultiSearchService] + val grepSearchService: GrepSearchService = mock[GrepSearchService] val articleIndexService: ArticleIndexService = mock[ArticleIndexService] val learningPathIndexService: LearningPathIndexService = mock[LearningPathIndexService] val draftIndexService: DraftIndexService = mock[DraftIndexService] val draftConceptIndexService: DraftConceptIndexService = mock[DraftConceptIndexService] + val grepIndexService: GrepIndexService = mock[GrepIndexService] val multiDraftSearchService: MultiDraftSearchService = mock[MultiDraftSearchService] diff --git a/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala new file mode 100644 index 000000000..733419a89 --- /dev/null +++ b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala @@ -0,0 +1,132 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2021 NDLA + * + * See LICENSE + */ + +package no.ndla.searchapi.service.search + +import cats.implicits.catsSyntaxOptionId +import no.ndla.network.tapir.NonEmptyString +import no.ndla.scalatestsuite.IntegrationSuite +import no.ndla.searchapi.TestEnvironment +import no.ndla.searchapi.controller.parameters.GrepSearchInput +import no.ndla.searchapi.model.api.grep.GrepSort.{ByCodeAsc, ByCodeDesc} +import no.ndla.searchapi.model.grep.{GrepBundle, GrepElement, GrepTitle} + +class GrepSearchServiceTest extends IntegrationSuite(EnableElasticsearchContainer = true) with TestEnvironment { + e4sClient = Elastic4sClientFactory.getClient(elasticSearchHost.getOrElse("")) + + override val grepIndexService: GrepIndexService = new GrepIndexService { + override val indexShards = 1 + } + override val grepSearchService = new GrepSearchService + override val converterService = new ConverterService + override val searchConverterService = new SearchConverterService + + override def beforeEach(): Unit = { + if (elasticSearchContainer.isSuccess) { + grepIndexService.createIndexAndAlias().get + } + } + + override def afterEach(): Unit = { + if (elasticSearchContainer.isSuccess) { + grepIndexService.deleteIndexAndAlias() + } + } + + val grepTestBundle: GrepBundle = GrepBundle( + kjerneelementer = List( + GrepElement( + "KE12", + Seq(GrepTitle("default", "Utforsking og problemløysing"), GrepTitle("nob", "Utforsking og problemløsning")) + ), + GrepElement( + "KE34", + Seq(GrepTitle("default", "Abstraksjon og generalisering"), GrepTitle("nob", "Abstraksjon og generalisering")) + ) + ), + kompetansemaal = List( + GrepElement( + "KM123", + Seq( + GrepTitle("default", "bruke ulike kilder på en kritisk, hensiktsmessig og etterrettelig måte"), + GrepTitle("nob", "bruke ulike kilder på en kritisk, hensiktsmessig og etterrettelig måte") + ) + ) + ), + tverrfagligeTemaer = List( + GrepElement( + "TT2", + Seq(GrepTitle("default", "Demokrati og medborgerskap"), GrepTitle("nob", "Demokrati og medborgerskap")) + ) + ) + ) + + val emptyInput: GrepSearchInput = GrepSearchInput( + codes = None, + language = None, + page = None, + pageSize = None, + query = None, + prefixFilter = None, + sort = None + ) + + test("That searching for all grep codes works as expected") { + grepIndexService.indexDocuments(1.some, Some(grepTestBundle)).get + blockUntil(() => grepIndexService.countDocuments == grepTestBundle.grepContext.size) + + val result = grepSearchService.searchGreps(emptyInput).get + result.results.map(_.code).sorted should be(grepTestBundle.grepContext.map(_.kode).sorted) + } + + test("That searching for all grep prefixes works as expected") { + grepIndexService.indexDocuments(1.some, Some(grepTestBundle)).get + blockUntil(() => grepIndexService.countDocuments == grepTestBundle.grepContext.size) + + val input = emptyInput.copy(prefixFilter = Some(List("KE"))) + val result = grepSearchService.searchGreps(input).get + result.results.map(_.code).sorted should be(List("KE12", "KE34")) + } + + test("That querying the grep codes searches titles") { + grepIndexService.indexDocuments(1.some, Some(grepTestBundle)).get + blockUntil(() => grepIndexService.countDocuments == grepTestBundle.grepContext.size) + + val input = emptyInput.copy(query = NonEmptyString.fromString("hensiktsmessig")) + val result = grepSearchService.searchGreps(input).get + result.results.map(_.code).sorted should be(List("KM123")) + } + + test("That looking up based on id works as expected") { + grepIndexService.indexDocuments(1.some, Some(grepTestBundle)).get + blockUntil(() => grepIndexService.countDocuments == grepTestBundle.grepContext.size) + + val input = emptyInput.copy(codes = Some(List("KM123", "ENUKJENT123"))) + val result = grepSearchService.searchGreps(input).get + result.results.map(_.code).sorted should be(List("KM123")) + } + + test("That querying based on id works as expected") { + grepIndexService.indexDocuments(1.some, Some(grepTestBundle)).get + blockUntil(() => grepIndexService.countDocuments == grepTestBundle.grepContext.size) + + val input = emptyInput.copy(query = NonEmptyString.fromString("KM123")) + val result = grepSearchService.searchGreps(input).get + result.results.map(_.code).sorted should be(List("KM123")) + } + + test("That sorting works as expected") { + grepIndexService.indexDocuments(1.some, Some(grepTestBundle)).get + blockUntil(() => grepIndexService.countDocuments == grepTestBundle.grepContext.size) + + val result1 = grepSearchService.searchGreps(emptyInput.copy(sort = Some(ByCodeAsc))).get + result1.results.map(_.code) should be(List("KE12", "KE34", "KM123", "TT2")) + + val result2 = grepSearchService.searchGreps(emptyInput.copy(sort = Some(ByCodeDesc))).get + result2.results.map(_.code) should be(List("TT2", "KM123", "KE34", "KE12")) + } +} diff --git a/search-api/src/test/scala/no/ndla/searchapi/service/search/MultiDraftSearchServiceTest.scala b/search-api/src/test/scala/no/ndla/searchapi/service/search/MultiDraftSearchServiceTest.scala index c20961c3b..a0ed1c511 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/service/search/MultiDraftSearchServiceTest.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/service/search/MultiDraftSearchServiceTest.scala @@ -16,12 +16,13 @@ import no.ndla.scalatestsuite.IntegrationSuite import no.ndla.searchapi.TestData.* import no.ndla.searchapi.model.api.MetaImage import no.ndla.searchapi.model.domain.{IndexingBundle, LearningResourceType, Sort} +import no.ndla.searchapi.model.search.SearchPagination import no.ndla.searchapi.{TestData, TestEnvironment} import scala.util.Success class MultiDraftSearchServiceTest extends IntegrationSuite(EnableElasticsearchContainer = true) with TestEnvironment { - import props.{DefaultPageSize, MaxPageSize} + import props.DefaultPageSize e4sClient = Elastic4sClientFactory.getClient(elasticSearchHost.getOrElse("")) override val articleIndexService: ArticleIndexService = new ArticleIndexService { @@ -88,24 +89,16 @@ class MultiDraftSearchServiceTest extends IntegrationSuite(EnableElasticsearchCo } test("That getStartAtAndNumResults returns SEARCH_MAX_PAGE_SIZE for value greater than SEARCH_MAX_PAGE_SIZE") { - multiDraftSearchService.getStartAtAndNumResults(0, 10001) should equal((0, MaxPageSize)) - } - - test( - "That getStartAtAndNumResults returns the correct calculated start at for page and page-size with default page-size" - ) { - val page = 74 - val expectedStartAt = (page - 1) * DefaultPageSize - multiDraftSearchService.getStartAtAndNumResults(page, DefaultPageSize) should equal( - (expectedStartAt, DefaultPageSize) + multiDraftSearchService.getStartAtAndNumResults(0, 10001) should equal( + Success(SearchPagination(1, props.MaxPageSize, 0)) ) } test("That getStartAtAndNumResults returns the correct calculated start at for page and page-size") { - val page = 123 + val page = 74 val expectedStartAt = (page - 1) * DefaultPageSize multiDraftSearchService.getStartAtAndNumResults(page, DefaultPageSize) should equal( - (expectedStartAt, DefaultPageSize) + Success(SearchPagination(page, DefaultPageSize, expectedStartAt)) ) } diff --git a/search-api/src/test/scala/no/ndla/searchapi/service/search/MultiSearchServiceTest.scala b/search-api/src/test/scala/no/ndla/searchapi/service/search/MultiSearchServiceTest.scala index 5e930913e..5f2f3da14 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/service/search/MultiSearchServiceTest.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/service/search/MultiSearchServiceTest.scala @@ -17,6 +17,7 @@ import no.ndla.scalatestsuite.IntegrationSuite import no.ndla.searchapi.TestData.* import no.ndla.searchapi.model.api.MetaImage import no.ndla.searchapi.model.domain.{IndexingBundle, LearningResourceType, Sort} +import no.ndla.searchapi.model.search.SearchPagination import no.ndla.searchapi.{TestData, TestEnvironment, UnitSuite} import scala.util.Success @@ -112,21 +113,17 @@ class MultiSearchServiceTest } test("That getStartAtAndNumResults returns SEARCH_MAX_PAGE_SIZE for value greater than SEARCH_MAX_PAGE_SIZE") { - multiSearchService.getStartAtAndNumResults(0, 10001) should equal((0, props.MaxPageSize)) - } - - test( - "That getStartAtAndNumResults returns the correct calculated start at for page and page-size with default page-size" - ) { - val page = 74 - val expectedStartAt = (page - 1) * DefaultPageSize - multiSearchService.getStartAtAndNumResults(page, DefaultPageSize) should equal((expectedStartAt, DefaultPageSize)) + multiSearchService.getStartAtAndNumResults(0, 10001) should equal( + Success(SearchPagination(1, props.MaxPageSize, 0)) + ) } test("That getStartAtAndNumResults returns the correct calculated start at for page and page-size") { - val page = 123 + val page = 74 val expectedStartAt = (page - 1) * DefaultPageSize - multiSearchService.getStartAtAndNumResults(page, DefaultPageSize) should equal((expectedStartAt, DefaultPageSize)) + multiSearchService.getStartAtAndNumResults(page, DefaultPageSize) should equal( + Success(SearchPagination(page, DefaultPageSize, expectedStartAt)) + ) } test("That all returns all documents ordered by id ascending") { diff --git a/search-api/src/test/scala/no/ndla/searchapi/service/search/SearchServiceTest.scala b/search-api/src/test/scala/no/ndla/searchapi/service/search/SearchServiceTest.scala index ba520a4e2..9ffc6de3f 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/service/search/SearchServiceTest.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/service/search/SearchServiceTest.scala @@ -22,7 +22,6 @@ class SearchServiceTest extends UnitSuite with TestEnvironment { val service: SearchService = new SearchService { override val searchIndex = List(SearchType.Drafts, SearchType.LearningPaths).map(props.SearchIndex) override val indexServices: List[IndexService[_]] = List(draftIndexService, learningPathIndexService) - override protected def scheduleIndexDocuments(): Unit = {} } } diff --git a/search/src/main/scala/no/ndla/search/model/domain/BulkIndexResult.scala b/search/src/main/scala/no/ndla/search/model/domain/BulkIndexResult.scala new file mode 100644 index 000000000..129c90442 --- /dev/null +++ b/search/src/main/scala/no/ndla/search/model/domain/BulkIndexResult.scala @@ -0,0 +1,18 @@ +/* + * Part of NDLA search + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.search.model.domain + +case class BulkIndexResult(count: Int, totalCount: Int) { + def failed: Int = totalCount - count + def successful: Int = count +} + +object BulkIndexResult { + def empty: BulkIndexResult = BulkIndexResult(0, 0) +} diff --git a/search/src/main/scala/no/ndla/search/model/domain/ElasticIndexingException.scala b/search/src/main/scala/no/ndla/search/model/domain/ElasticIndexingException.scala new file mode 100644 index 000000000..621198df0 --- /dev/null +++ b/search/src/main/scala/no/ndla/search/model/domain/ElasticIndexingException.scala @@ -0,0 +1,11 @@ +/* + * Part of NDLA search + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + +package no.ndla.search.model.domain + +case class ElasticIndexingException(message: String) extends RuntimeException(message) diff --git a/search/src/main/scala/no/ndla/search/model/domain/ReindexResult.scala b/search/src/main/scala/no/ndla/search/model/domain/ReindexResult.scala new file mode 100644 index 000000000..b9d7e1872 --- /dev/null +++ b/search/src/main/scala/no/ndla/search/model/domain/ReindexResult.scala @@ -0,0 +1,10 @@ +/* + * Part of NDLA search-api + * Copyright (C) 2018 NDLA + * + * See LICENSE + */ + +package no.ndla.search.model.domain + +case class ReindexResult(name: String, failedIndexed: Int, totalIndexed: Int, millisUsed: Long) diff --git a/typescript/types-backend/search-api.ts b/typescript/types-backend/search-api.ts index 5b58b5cfb..402c4f766 100644 --- a/typescript/types-backend/search-api.ts +++ b/typescript/types-backend/search-api.ts @@ -1,5 +1,7 @@ // DO NOT EDIT: generated file by scala-tsi +export type GrepSort = ("-relevance" | "relevance" | "-title" | "title" | "-code" | "code") + export interface IApiTaxonomyContext { publicId: string root: string @@ -25,7 +27,7 @@ export interface IArticleIntroduction { export interface IArticleResult { id: number - title: ITitle + title: ITitleWithHtml introduction?: IArticleIntroduction articleType: string supportedLanguages: string[] @@ -42,7 +44,7 @@ export interface IArticleResults { export interface IAudioResult { id: number - title: ITitle + title: ITitleWithHtml url: string supportedLanguages: string[] } @@ -107,6 +109,16 @@ export interface IDraftSearchParams { resultTypes?: SearchType[] } +export interface IGrepSearchInput { + prefixFilter?: string[] + codes?: string[] + query?: string + page?: number + pageSize?: number + sort?: GrepSort + language?: string +} + export interface IGroupSearchResult { totalCount: number page?: number @@ -130,7 +142,7 @@ export interface IImageAltText { export interface IImageResult { id: number - title: ITitle + title: ITitleWithHtml altText: IImageAltText previewUrl: string metaUrl: string @@ -153,7 +165,7 @@ export interface ILearningPathIntroduction { export interface ILearningpathResult { id: number - title: ITitle + title: ITitleWithHtml introduction: ILearningPathIntroduction supportedLanguages: string[] } @@ -195,7 +207,7 @@ export interface IMultiSearchSuggestion { export interface IMultiSearchSummary { id: number - title: ITitle + title: ITitleWithHtml metaDescription: IMetaDescription metaImage?: IMetaImage url: string @@ -309,7 +321,7 @@ export interface ITermValue { count: number } -export interface ITitle { +export interface ITitleWithHtml { title: string htmlTitle: string language: string @@ -317,7 +329,7 @@ export interface ITitle { export type LearningResourceType = ("standard" | "topic-article" | "frontpage-article" | "learningpath" | "concept" | "gloss") -export type SearchType = ("article" | "draft" | "learningpath" | "concept") +export type SearchType = ("article" | "draft" | "learningpath" | "concept" | "grep") export type Sort = SortEnum From 1e9abc12881f94e2607c8566a5c34f2522e820aa Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Fri, 6 Dec 2024 08:25:51 +0100 Subject: [PATCH 4/9] search-api: Add kompetansemaalsett to grep bundle --- .../searchapi/integration/GrepApiClient.scala | 18 ++++++++++++++---- .../ndla/searchapi/model/grep/GrepBundle.scala | 3 ++- .../scala/no/ndla/searchapi/TestData.scala | 1 + .../service/search/GrepSearchServiceTest.scala | 1 + 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/search-api/src/main/scala/no/ndla/searchapi/integration/GrepApiClient.scala b/search-api/src/main/scala/no/ndla/searchapi/integration/GrepApiClient.scala index 2b9c0ab6b..d13037a80 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/integration/GrepApiClient.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/integration/GrepApiClient.scala @@ -36,6 +36,9 @@ trait GrepApiClient { private def getAllKompetansemaal: Try[List[GrepElement]] = get[List[GrepElement]](s"$GrepApiEndpoint/kompetansemaal-lk20/").map(_.distinct) + private def getAllKompetansemaalSett: Try[List[GrepElement]] = + get[List[GrepElement]](s"$GrepApiEndpoint/kompetansemaalsett-lk20/").map(_.distinct) + private def getAllTverrfagligeTemaer: Try[List[GrepElement]] = get[List[GrepElement]](s"$GrepApiEndpoint/tverrfaglige-temaer-lk20/").map(_.distinct) @@ -60,13 +63,20 @@ trait GrepApiClient { val kjerneelementer = tryToFuture(() => getAllKjerneelementer) val kompetansemaal = tryToFuture(() => getAllKompetansemaal) + val kompetansemaalsett = tryToFuture(() => getAllKompetansemaalSett) val tverrfagligeTemaer = tryToFuture(() => getAllTverrfagligeTemaer) val x = for { - f1 <- kjerneelementer - f2 <- kompetansemaal - f3 <- tverrfagligeTemaer - } yield GrepBundle(f1, f2, f3) + kjerne <- kjerneelementer + kompetanse <- kompetansemaal + kompetansesett <- kompetansemaalsett + tverrfag <- tverrfagligeTemaer + } yield GrepBundle( + kjerneelementer = kjerne, + kompetansemaal = kompetanse, + kompetansemaalsett = kompetansesett, + tverrfagligeTemaer = tverrfag + ) Try(Await.result(x, Duration(300, "seconds"))) match { case Success(bundle) => diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/grep/GrepBundle.scala b/search-api/src/main/scala/no/ndla/searchapi/model/grep/GrepBundle.scala index 4588f83bc..102534d86 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/grep/GrepBundle.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/grep/GrepBundle.scala @@ -10,10 +10,11 @@ package no.ndla.searchapi.model.grep case class GrepBundle( kjerneelementer: List[GrepElement], kompetansemaal: List[GrepElement], + kompetansemaalsett: List[GrepElement], tverrfagligeTemaer: List[GrepElement] ) { - val grepContext: List[GrepElement] = kjerneelementer ++ kompetansemaal ++ tverrfagligeTemaer + val grepContext: List[GrepElement] = kjerneelementer ++ kompetansemaal ++ kompetansemaalsett ++ tverrfagligeTemaer val grepContextByCode: Map[String, GrepElement] = Map.from(grepContext.map(elem => elem.kode -> elem)) diff --git a/search-api/src/test/scala/no/ndla/searchapi/TestData.scala b/search-api/src/test/scala/no/ndla/searchapi/TestData.scala index 774642ab3..ef4b3aef2 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/TestData.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/TestData.scala @@ -1566,6 +1566,7 @@ object TestData { val emptyGrepBundle: GrepBundle = GrepBundle( kjerneelementer = List.empty, kompetansemaal = List.empty, + kompetansemaalsett = List.empty, tverrfagligeTemaer = List.empty ) diff --git a/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala index 733419a89..62f3e2a00 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala @@ -57,6 +57,7 @@ class GrepSearchServiceTest extends IntegrationSuite(EnableElasticsearchContaine ) ) ), + kompetansemaalsett = List.empty, tverrfagligeTemaer = List( GrepElement( "TT2", From d468960000ff250b5308aabe6f274a2ad5654135 Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Fri, 6 Dec 2024 10:02:11 +0100 Subject: [PATCH 5/9] search-api: Stop using html enabled title for non-html titles --- .../scala/no/ndla/searchapi/model/api/AudioResult.scala | 2 +- .../scala/no/ndla/searchapi/model/api/ImageResult.scala | 2 +- .../no/ndla/searchapi/model/api/LearningpathResult.scala | 2 +- .../scala/no/ndla/searchapi/service/ConverterService.scala | 6 +++--- .../ndla/searchapi/service/search/GrepSearchService.scala | 1 + 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/AudioResult.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/AudioResult.scala index 9813a0cd5..c77e92c4e 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/AudioResult.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/AudioResult.scala @@ -12,7 +12,7 @@ import sttp.tapir.Schema.annotations.description @description("Search result for audio api") case class AudioResult( @description("The unique id of this audio") id: Long, - @description("The title of this audio") title: TitleWithHtml, + @description("The title of this audio") title: Title, @description("A direct link to the audio") url: String, @description("List of supported languages") supportedLanguages: Seq[String] ) diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/ImageResult.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/ImageResult.scala index 88e3703f3..35a9f35af 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/ImageResult.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/ImageResult.scala @@ -12,7 +12,7 @@ import sttp.tapir.Schema.annotations.description @description("Search result for image api") case class ImageResult( @description("The unique id of this image") id: Long, - @description("The title of this image") title: TitleWithHtml, + @description("The title of this image") title: Title, @description("The alt text of this image") altText: ImageAltText, @description("A direct link to the image") previewUrl: String, @description("A link to get meta data related to the image") metaUrl: String, diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/api/LearningpathResult.scala b/search-api/src/main/scala/no/ndla/searchapi/model/api/LearningpathResult.scala index 8d672746e..40ee90d37 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/api/LearningpathResult.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/api/LearningpathResult.scala @@ -12,7 +12,7 @@ import sttp.tapir.Schema.annotations.description @description("Search result for learningpath api") case class LearningpathResult( @description("The unique id of this learningpath") id: Long, - @description("The title of the learningpath") title: TitleWithHtml, + @description("The title of the learningpath") title: Title, @description("The introduction of the learningpath") introduction: LearningPathIntroduction, @description("List of supported languages") supportedLanguages: Seq[String] ) diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/ConverterService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/ConverterService.scala index eb6bead07..1bff81baa 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/ConverterService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/ConverterService.scala @@ -66,7 +66,7 @@ trait ConverterService { private def learningpathSearchResultToApi(learningpath: LearningpathApiSearchResult): api.LearningpathResult = { api.LearningpathResult( learningpath.id, - api.TitleWithHtml(learningpath.title.title, learningpath.title.title, learningpath.title.language), + api.Title(learningpath.title.title, learningpath.title.language), LearningPathIntroduction(learningpath.introduction.introduction, learningpath.introduction.language), learningpath.supportedLanguages ) @@ -92,7 +92,7 @@ trait ConverterService { api.ImageResult( image.id.toLong, - api.TitleWithHtml(image.title.title, image.title.title, image.title.language), + api.Title(image.title.title, image.title.language), api.ImageAltText(image.altText.alttext, image.altText.language), previewUrl.toString, metaUrl.toString, @@ -118,7 +118,7 @@ trait ConverterService { val url = audio.url.withHost(host).withScheme(scheme).toString api.AudioResult( audio.id, - api.TitleWithHtml(audio.title.title, audio.title.title, audio.title.language), + api.Title(audio.title.title, audio.title.language), url, audio.supportedLanguages ) diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala index d0d31f9ad..b9b292973 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala @@ -64,6 +64,7 @@ trait GrepSearchService { boolQuery() .should( langQueryFunc("title", 6), + prefixQuery("code", q).boost(50), idsQuery(q.underlying).boost(100) ) } From c2c6a09506bd1871903841dabd15c66de99cedaa Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Fri, 6 Dec 2024 10:07:01 +0100 Subject: [PATCH 6/9] search-api: Generate grep result types --- project/searchapi.scala | 3 ++- typescript/types-backend/search-api.ts | 24 +++++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/project/searchapi.scala b/project/searchapi.scala index ee02b45e6..249b3f5ba 100644 --- a/project/searchapi.scala +++ b/project/searchapi.scala @@ -43,7 +43,8 @@ object searchapi extends Module { "DraftSearchParams", "SubjectAggregations", "SubjectAggsInput", - "GrepSearchInput" + "GrepSearchInput", + "grep.GrepSearchResults" ) ) diff --git a/typescript/types-backend/search-api.ts b/typescript/types-backend/search-api.ts index 402c4f766..318db5b72 100644 --- a/typescript/types-backend/search-api.ts +++ b/typescript/types-backend/search-api.ts @@ -44,7 +44,7 @@ export interface IArticleResults { export interface IAudioResult { id: number - title: ITitleWithHtml + title: ITitle url: string supportedLanguages: string[] } @@ -109,6 +109,11 @@ export interface IDraftSearchParams { resultTypes?: SearchType[] } +export interface IGrepResult { + code: string + title: ITitle +} + export interface IGrepSearchInput { prefixFilter?: string[] codes?: string[] @@ -119,6 +124,14 @@ export interface IGrepSearchInput { language?: string } +export interface IGrepSearchResults { + totalCount: number + page: number + pageSize: number + language: string + results: IGrepResult[] +} + export interface IGroupSearchResult { totalCount: number page?: number @@ -142,7 +155,7 @@ export interface IImageAltText { export interface IImageResult { id: number - title: ITitleWithHtml + title: ITitle altText: IImageAltText previewUrl: string metaUrl: string @@ -165,7 +178,7 @@ export interface ILearningPathIntroduction { export interface ILearningpathResult { id: number - title: ITitleWithHtml + title: ITitle introduction: ILearningPathIntroduction supportedLanguages: string[] } @@ -321,6 +334,11 @@ export interface ITermValue { count: number } +export interface ITitle { + title: string + language: string +} + export interface ITitleWithHtml { title: string htmlTitle: string From e304dd663ae2514806438224da58019a88c3e1e3 Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Fri, 6 Dec 2024 10:28:52 +0100 Subject: [PATCH 7/9] search-api: Make codes in grep search case insensitive --- .../service/ConverterServiceTest.scala | 11 ++++++++++- .../repository/FolderRepositoryTest.scala | 8 +++++++- .../no/ndla/network/tapir/NonEmptyString.scala | 2 ++ .../service/search/GrepIndexService.scala | 4 +++- .../service/search/GrepSearchService.scala | 4 +++- .../searchapi/service/search/IndexService.scala | 6 +++++- .../service/search/GrepSearchServiceTest.scala | 17 +++++++++++++++++ .../service/search/SearchServiceTest.scala | 2 +- 8 files changed, 48 insertions(+), 6 deletions(-) diff --git a/learningpath-api/src/test/scala/no/ndla/learningpathapi/service/ConverterServiceTest.scala b/learningpath-api/src/test/scala/no/ndla/learningpathapi/service/ConverterServiceTest.scala index aa609d960..d7050c07f 100644 --- a/learningpath-api/src/test/scala/no/ndla/learningpathapi/service/ConverterServiceTest.scala +++ b/learningpath-api/src/test/scala/no/ndla/learningpathapi/service/ConverterServiceTest.scala @@ -512,7 +512,16 @@ class ConverterServiceTest extends UnitSuite with UnitTestEnvironment { test("asDomainLearningStep should work with learningpaths no matter the amount of steps") { val newLs = - NewLearningStepV2("Tittel", Some("Beskrivelse"), None, "nb", Some(api.EmbedUrlV2("", "oembed")), true, "TEXT", None) + NewLearningStepV2( + "Tittel", + Some("Beskrivelse"), + None, + "nb", + Some(api.EmbedUrlV2("", "oembed")), + true, + "TEXT", + None + ) val lpId = 5591L val lp1 = TestData.sampleDomainLearningPath.copy(id = Some(lpId), learningsteps = None) val lp2 = TestData.sampleDomainLearningPath.copy(id = Some(lpId), learningsteps = Some(Seq.empty)) diff --git a/myndla-api/src/test/scala/no/ndla/myndlaapi/repository/FolderRepositoryTest.scala b/myndla-api/src/test/scala/no/ndla/myndlaapi/repository/FolderRepositoryTest.scala index 40a7adb17..618397a64 100644 --- a/myndla-api/src/test/scala/no/ndla/myndlaapi/repository/FolderRepositoryTest.scala +++ b/myndla-api/src/test/scala/no/ndla/myndlaapi/repository/FolderRepositoryTest.scala @@ -116,7 +116,13 @@ class FolderRepositoryTest val resource2 = repository.insertResource("feide", "/path2", ResourceType.Topic, created, TestData.baseResourceDocument) val resource3 = - repository.insertResource("feide", "/path3", ResourceType.Multidisciplinary, created, TestData.baseResourceDocument) + repository.insertResource( + "feide", + "/path3", + ResourceType.Multidisciplinary, + created, + TestData.baseResourceDocument + ) val resource4 = repository.insertResource("feide", "/path4", ResourceType.Image, created, TestData.baseResourceDocument) val resource5 = diff --git a/network/src/main/scala/no/ndla/network/tapir/NonEmptyString.scala b/network/src/main/scala/no/ndla/network/tapir/NonEmptyString.scala index 03f71cefc..27f427e57 100644 --- a/network/src/main/scala/no/ndla/network/tapir/NonEmptyString.scala +++ b/network/src/main/scala/no/ndla/network/tapir/NonEmptyString.scala @@ -20,6 +20,8 @@ class NonEmptyString private (val underlying: String) { case other: NonEmptyString => other.underlying == underlying case _ => false } + + override def toString: String = underlying } object NonEmptyString { diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala index ef01ade49..94449c6a2 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala @@ -34,7 +34,9 @@ trait GrepIndexService { override val MaxResultWindowOption: Int = props.ElasticSearchIndexMaxResultWindow override def getMapping: MappingDefinition = { - val fields = List(keywordField("code")) + val fields = List( + keywordField("code").normalizer("lower") + ) val dynamics = generateLanguageSupportedDynamicTemplates("title", keepRaw = true) properties(fields).dynamicTemplates(dynamics) diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala index b9b292973..aecf63429 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala @@ -61,10 +61,12 @@ trait GrepSearchService { fallback = true, searchDecompounded = true ) + boolQuery() .should( langQueryFunc("title", 6), - prefixQuery("code", q).boost(50), + prefixQuery("code", q.underlying).boost(50), + matchQuery("code", q.underlying).boost(10), idsQuery(q.underlying).boost(100) ) } diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala index cf79b5c6a..a9b40e9a4 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala @@ -99,10 +99,14 @@ trait IndexService { val trigram: CustomAnalyzer = CustomAnalyzer(name = "trigram", tokenizer = "standard", tokenFilters = List("lowercase", "shingle")) + val lowerNormalizer: CustomNormalizer = + CustomNormalizer("lower", charFilters = List.empty, tokenFilters = List("lowercase")) + override val analysis: Analysis = Analysis( analyzers = List(trigram, customExactAnalyzer, customCompoundAnalyzer, NynorskLanguageAnalyzer), - tokenFilters = List(hyphDecompounderTokenFilter) ++ SearchLanguage.NynorskTokenFilters + tokenFilters = List(hyphDecompounderTokenFilter) ++ SearchLanguage.NynorskTokenFilters, + normalizers = List(lowerNormalizer) ) } diff --git a/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala index 62f3e2a00..f8e229387 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala @@ -130,4 +130,21 @@ class GrepSearchServiceTest extends IntegrationSuite(EnableElasticsearchContaine val result2 = grepSearchService.searchGreps(emptyInput.copy(sort = Some(ByCodeDesc))).get result2.results.map(_.code) should be(List("TT2", "KM123", "KE34", "KE12")) } + + test("That prefix filter is case insensitive") { + grepIndexService.indexDocuments(1.some, Some(grepTestBundle)).get + blockUntil(() => grepIndexService.countDocuments == grepTestBundle.grepContext.size) + + val result1 = grepSearchService.searchGreps(emptyInput.copy(prefixFilter = Some(List("ke")))).get + result1.results.map(_.code) should be(List("KE12", "KE34")) + + val result2 = grepSearchService.searchGreps(emptyInput.copy(query = NonEmptyString.fromString("ke"))).get + result2.results.map(_.code) should be(List("KE12", "KE34")) + + val result3 = grepSearchService.searchGreps(emptyInput.copy(prefixFilter = Some(List("KE")))).get + result3.results.map(_.code) should be(List("KE12", "KE34")) + + val result4 = grepSearchService.searchGreps(emptyInput.copy(query = NonEmptyString.fromString("KE"))).get + result4.results.map(_.code) should be(List("KE12", "KE34")) + } } diff --git a/search-api/src/test/scala/no/ndla/searchapi/service/search/SearchServiceTest.scala b/search-api/src/test/scala/no/ndla/searchapi/service/search/SearchServiceTest.scala index 9ffc6de3f..9ac33631a 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/service/search/SearchServiceTest.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/service/search/SearchServiceTest.scala @@ -21,7 +21,7 @@ class SearchServiceTest extends UnitSuite with TestEnvironment { val service: SearchService = new SearchService { override val searchIndex = List(SearchType.Drafts, SearchType.LearningPaths).map(props.SearchIndex) - override val indexServices: List[IndexService[_]] = List(draftIndexService, learningPathIndexService) + override val indexServices: List[IndexService[_]] = List(draftIndexService, learningPathIndexService) } } From 15d8dbfa1ef13d0f3ba242583616e5d2d10b8888 Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Fri, 6 Dec 2024 11:55:28 +0100 Subject: [PATCH 8/9] search-api: Make `codes` input case insensitive @katrinewi asks and @katrinewi receives --- .../no/ndla/searchapi/service/search/GrepSearchService.scala | 4 ++-- .../ndla/searchapi/service/search/GrepSearchServiceTest.scala | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala index aecf63429..623281fa4 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala @@ -67,7 +67,7 @@ trait GrepSearchService { langQueryFunc("title", 6), prefixQuery("code", q.underlying).boost(50), matchQuery("code", q.underlying).boost(10), - idsQuery(q.underlying).boost(100) + termQuery("code", q.underlying).boost(100) ) } .getOrElse(boolQuery()) @@ -80,7 +80,7 @@ trait GrepSearchService { } def idsFilter(input: GrepSearchInput): Option[Query] = input.codes match { - case Some(ids) if ids.nonEmpty => idsQuery(ids).some + case Some(ids) if ids.nonEmpty => termsQuery("code", ids).some case _ => None } diff --git a/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala index f8e229387..c2dfb97cf 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala @@ -109,6 +109,10 @@ class GrepSearchServiceTest extends IntegrationSuite(EnableElasticsearchContaine val input = emptyInput.copy(codes = Some(List("KM123", "ENUKJENT123"))) val result = grepSearchService.searchGreps(input).get result.results.map(_.code).sorted should be(List("KM123")) + + val input2 = emptyInput.copy(codes = Some(List("km123", "ENUKJENT123"))) + val result2 = grepSearchService.searchGreps(input2).get + result2.results.map(_.code).sorted should be(List("KM123")) } test("That querying based on id works as expected") { From 35448aacd41a1d38d9badba09bf66c45fa92095d Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Fri, 6 Dec 2024 14:36:42 +0100 Subject: [PATCH 9/9] search-api: Try different grep query for different query results --- .../service/search/GrepSearchService.scala | 35 +++++++++++-------- .../search/GrepSearchServiceTest.scala | 11 ++++++ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala index 623281fa4..9db8d8629 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala @@ -62,29 +62,36 @@ trait GrepSearchService { searchDecompounded = true ) + val codeQueries = boolQuery().should( + prefixQuery("code", q.underlying).boost(50), + matchQuery("code", q.underlying).boost(10), + termQuery("code", q.underlying).boost(100) + ) + val titleQuery = langQueryFunc("title", 6) + + val onlyCodeQuery = boolQuery() + .must(codeQueries) + .not(titleQuery) + boolQuery() - .should( - langQueryFunc("title", 6), - prefixQuery("code", q.underlying).boost(50), - matchQuery("code", q.underlying).boost(10), - termQuery("code", q.underlying).boost(100) - ) + .must(boolQuery().should(titleQuery, onlyCodeQuery)) } .getOrElse(boolQuery()) - query.filter( - Seq( - idsFilter(input), - prefixFilter(input) - ).flatten - ) + query.filter(getFilters(input)) } - def idsFilter(input: GrepSearchInput): Option[Query] = input.codes match { + private def getFilters(input: GrepSearchInput): List[Query] = + List( + idsFilter(input), + prefixFilter(input) + ).flatten + + private def idsFilter(input: GrepSearchInput): Option[Query] = input.codes match { case Some(ids) if ids.nonEmpty => termsQuery("code", ids).some case _ => None } - def prefixFilter(input: GrepSearchInput): Option[Query] = input.prefixFilter match { + private def prefixFilter(input: GrepSearchInput): Option[Query] = input.prefixFilter match { case Some(prefixes) if prefixes.nonEmpty => Some( boolQuery().should( diff --git a/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala index c2dfb97cf..e62abe331 100644 --- a/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala +++ b/search-api/src/test/scala/no/ndla/searchapi/service/search/GrepSearchServiceTest.scala @@ -84,6 +84,17 @@ class GrepSearchServiceTest extends IntegrationSuite(EnableElasticsearchContaine result.results.map(_.code).sorted should be(grepTestBundle.grepContext.map(_.kode).sorted) } + test("That querying grep codes with prefixes returns nothing") { + grepIndexService.indexDocuments(1.some, Some(grepTestBundle)).get + blockUntil(() => grepIndexService.countDocuments == grepTestBundle.grepContext.size) + + val result = grepSearchService + .searchGreps(emptyInput.copy(query = NonEmptyString.fromString("kakepenger"), prefixFilter = Some(List("TT")))) + .get + + result.results.map(_.code).sorted should be(Seq.empty) + } + test("That searching for all grep prefixes works as expected") { grepIndexService.indexDocuments(1.some, Some(grepTestBundle)).get blockUntil(() => grepIndexService.countDocuments == grepTestBundle.grepContext.size)