From 767a9f2f5ba782a13f5cd94b65e521ba698c63f9 Mon Sep 17 00:00:00 2001 From: Jonas Natten Date: Thu, 5 Dec 2024 12:18:20 +0100 Subject: [PATCH] fix them --- .../search/GrepCodesIndexServiceTest.scala | 33 --------- .../ndla/searchapi/SearchApiProperties.scala | 1 + .../controller/InternController.scala | 13 +++- .../searchapi/model/search/SearchType.scala | 2 +- .../service/search/GrepIndexService.scala | 2 +- .../service/search/GrepSearchService.scala | 2 +- .../service/search/IndexService.scala | 68 +++++++++---------- .../no/ndla/search/BaseIndexService.scala | 5 +- 8 files changed, 50 insertions(+), 76 deletions(-) delete mode 100644 draft-api/src/test/scala/no/ndla/draftapi/service/search/GrepCodesIndexServiceTest.scala diff --git a/draft-api/src/test/scala/no/ndla/draftapi/service/search/GrepCodesIndexServiceTest.scala b/draft-api/src/test/scala/no/ndla/draftapi/service/search/GrepCodesIndexServiceTest.scala deleted file mode 100644 index 7fba0ba79..000000000 --- a/draft-api/src/test/scala/no/ndla/draftapi/service/search/GrepCodesIndexServiceTest.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Part of NDLA draft-api - * Copyright (C) 2021 NDLA - * - * See LICENSE - */ - -package no.ndla.draftapi.service.search - -import no.ndla.draftapi._ -import no.ndla.scalatestsuite.IntegrationSuite - -class GrepCodesIndexServiceTest extends IntegrationSuite(EnableElasticsearchContainer = true) with TestEnvironment { - - e4sClient = Elastic4sClientFactory.getClient(elasticSearchHost.getOrElse("http://localhost:9200")) - - override val grepCodesIndexService: GrepCodesIndexService = new GrepCodesIndexService { - override val indexShards = 1 - } - override val converterService = new ConverterService - override val searchConverterService = new SearchConverterService - - test("That indexing does not fail if no grepCodes are present") { - tagIndexService.createIndexWithName(props.DraftGrepCodesSearchIndex) - - val article = TestData.sampleDomainArticle.copy(grepCodes = Seq.empty) -// grepCodesIndexService.indexDocument(article).isSuccess should be(true) - // TODO: - - grepCodesIndexService.deleteIndexWithName(Some(props.DraftGrepCodesSearchIndex)) - } - -} diff --git a/search-api/src/main/scala/no/ndla/searchapi/SearchApiProperties.scala b/search-api/src/main/scala/no/ndla/searchapi/SearchApiProperties.scala index f2a43f23b..ed436eaed 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/SearchApiProperties.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/SearchApiProperties.scala @@ -50,6 +50,7 @@ class SearchApiProperties extends BaseProps with StrictLogging { case `draftIndexName` => Success(SearchType.Drafts) case `learningpathIndexName` => Success(SearchType.LearningPaths) case `conceptIndexName` => Success(SearchType.Concepts) + case `grepIndexName` => Success(SearchType.Grep) case _ => Failure(new IllegalArgumentException(s"Unknown index name: $indexName")) } diff --git a/search-api/src/main/scala/no/ndla/searchapi/controller/InternController.scala b/search-api/src/main/scala/no/ndla/searchapi/controller/InternController.scala index 2b4026bea..2a352294e 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/controller/InternController.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/controller/InternController.scala @@ -274,12 +274,14 @@ trait InternController { draftIndexService.cleanupIndexes(): Unit learningPathIndexService.cleanupIndexes(): Unit draftConceptIndexService.cleanupIndexes(): Unit + grepIndexService.cleanupIndexes(): Unit val articles = articleIndexService.reindexWithShards(numShards) val drafts = draftIndexService.reindexWithShards(numShards) val learningpaths = learningPathIndexService.reindexWithShards(numShards) val concept = draftConceptIndexService.reindexWithShards(numShards) - List(articles, drafts, learningpaths, concept).sequence match { + val greps = grepIndexService.reindexWithShards(numShards) + List(articles, drafts, learningpaths, concept, greps).sequence match { case Success(_) => s"Reindexing with $numShards shards completed in ${System.currentTimeMillis() - startTime}ms".asRight case Failure(ex) => @@ -298,12 +300,14 @@ trait InternController { draftIndexService.cleanupIndexes(): Unit learningPathIndexService.cleanupIndexes(): Unit draftConceptIndexService.cleanupIndexes(): Unit + grepIndexService.cleanupIndexes(): Unit val articles = articleIndexService.updateReplicaNumber(numReplicas) val drafts = draftIndexService.updateReplicaNumber(numReplicas) val learningpaths = learningPathIndexService.updateReplicaNumber(numReplicas) val concepts = draftConceptIndexService.updateReplicaNumber(numReplicas) - List(articles, drafts, learningpaths, concepts).sequence match { + val greps = grepIndexService.updateReplicaNumber(numReplicas) + List(articles, drafts, learningpaths, concepts, greps).sequence match { case Success(_) => s"Updated replication setting for indexes to $numReplicas replicas. Populating may take some time.".asRight case Failure(ex) => @@ -344,6 +348,7 @@ trait InternController { articleIndexService.cleanupIndexes(): Unit draftIndexService.cleanupIndexes(): Unit draftConceptIndexService.cleanupIndexes(): Unit + grepIndexService.cleanupIndexes(): Unit val publishedIndexingBundle = IndexingBundle( grepBundle = Some(grepBundle), @@ -374,6 +379,10 @@ trait InternController { Future { requestInfo.setThreadContextRequestInfo() ("concepts", draftConceptIndexService.indexDocuments(numShards, draftIndexingBundle)) + }, + Future { + requestInfo.setThreadContextRequestInfo() + ("greps", grepIndexService.indexDocuments(numShards, Some(grepBundle))) } ) if (runInBackground) { diff --git a/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchType.scala b/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchType.scala index 318956240..32cf0b3f4 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchType.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/model/search/SearchType.scala @@ -22,7 +22,7 @@ object SearchType extends Enum[SearchType] with CirceEnumWithErrors[SearchType] case object Drafts extends SearchType("draft") case object LearningPaths extends SearchType("learningpath") case object Concepts extends SearchType("concept") - case object Grep extends SearchType("grep") // TODO: Er dette dumt? + case object Grep extends SearchType("grep") def all: List[String] = SearchType.values.map(_.toString).toList override def values: IndexedSeq[SearchType] = findValues diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala index c6c9a838f..9651bd260 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepIndexService.scala @@ -35,7 +35,7 @@ trait GrepIndexService { override def getMapping: MappingDefinition = { val fields = List(keywordField("code")) - val dynamics = generateLanguageSupportedDynamicTemplates("title", disableSubfields = true) + val dynamics = generateLanguageSupportedDynamicTemplates("title", keepRaw = true) properties(fields).dynamicTemplates(dynamics) } diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala index 96465149d..88703984e 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/GrepSearchService.scala @@ -59,7 +59,7 @@ trait GrepSearchService { boost, searchLanguage, fallback = true, - searchDecompounded = false // TODO: Vil vi ha denne? + searchDecompounded = true ) boolQuery() .should( diff --git a/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala b/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala index cb21c9033..cf79b5c6a 100644 --- a/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala +++ b/search-api/src/main/scala/no/ndla/searchapi/service/search/IndexService.scala @@ -43,15 +43,13 @@ trait IndexService { */ protected def generateLanguageSupportedDynamicTemplates( fieldName: String, - keepRaw: Boolean = false, - disableSubfields: Boolean = false + keepRaw: Boolean = false ): Seq[DynamicTemplateRequest] = { val dynamicFunc = (name: String, analyzer: String, subFields: List[ElasticField]) => { - val field = textField(name).analyzer(analyzer) - val withSubfields = if (disableSubfields) field else field.fields(subFields) + val field = textField(name).analyzer(analyzer).fields(subFields) DynamicTemplateRequest( name = name, - mapping = withSubfields, + mapping = field, matchMappingType = Some("string"), pathMatch = Some(name) ) @@ -76,6 +74,36 @@ trait IndexService { val catchAllSubTemplate = dynamicFunc(s"*.$fieldName.*", "standard", subFields) languageTemplates ++ languageSubTemplates ++ Seq(catchAllTemplate, catchAllSubTemplate) } + + private val hyphDecompounderTokenFilter: CompoundWordTokenFilter = CompoundWordTokenFilter( + name = "hyphenation_decompounder", + `type` = HyphenationDecompounder, + wordListPath = Some("compound-words-norwegian-wordlist.txt"), + hyphenationPatternsPath = Some("hyph/no.xml"), + minSubwordSize = Some(4), + onlyLongestMatch = Some(false) + ) + + private val customCompoundAnalyzer = + CustomAnalyzer( + "compound_analyzer", + "whitespace", + tokenFilters = List(hyphDecompounderTokenFilter.name) + ) + + private val customExactAnalyzer = CustomAnalyzer("exact", "whitespace") + + val shingle: ShingleTokenFilter = + ShingleTokenFilter(name = "shingle", minShingleSize = Some(2), maxShingleSize = Some(3)) + + val trigram: CustomAnalyzer = + CustomAnalyzer(name = "trigram", tokenizer = "standard", tokenFilters = List("lowercase", "shingle")) + + override val analysis: Analysis = + Analysis( + analyzers = List(trigram, customExactAnalyzer, customCompoundAnalyzer, NynorskLanguageAnalyzer), + tokenFilters = List(hyphDecompounderTokenFilter) ++ SearchLanguage.NynorskTokenFilters + ) } trait IndexService[D <: Content] extends BulkIndexingService with StrictLogging { @@ -228,36 +256,6 @@ trait IndexService { } } - private val hyphDecompounderTokenFilter: CompoundWordTokenFilter = CompoundWordTokenFilter( - name = "hyphenation_decompounder", - `type` = HyphenationDecompounder, - wordListPath = Some("compound-words-norwegian-wordlist.txt"), - hyphenationPatternsPath = Some("hyph/no.xml"), - minSubwordSize = Some(4), - onlyLongestMatch = Some(false) - ) - - private val customCompoundAnalyzer = - CustomAnalyzer( - "compound_analyzer", - "whitespace", - tokenFilters = List(hyphDecompounderTokenFilter.name) - ) - - private val customExactAnalyzer = CustomAnalyzer("exact", "whitespace") - - val shingle: ShingleTokenFilter = - ShingleTokenFilter(name = "shingle", minShingleSize = Some(2), maxShingleSize = Some(3)) - - val trigram: CustomAnalyzer = - CustomAnalyzer(name = "trigram", tokenizer = "standard", tokenFilters = List("lowercase", "shingle")) - - override val analysis: Analysis = - Analysis( - analyzers = List(trigram, customExactAnalyzer, customCompoundAnalyzer, NynorskLanguageAnalyzer), - tokenFilters = List(hyphDecompounderTokenFilter) ++ SearchLanguage.NynorskTokenFilters - ) - /** Returns Sequence of FieldDefinitions for a given field. * * @param fieldName diff --git a/search/src/main/scala/no/ndla/search/BaseIndexService.scala b/search/src/main/scala/no/ndla/search/BaseIndexService.scala index a66bbd720..ededeb98c 100644 --- a/search/src/main/scala/no/ndla/search/BaseIndexService.scala +++ b/search/src/main/scala/no/ndla/search/BaseIndexService.scala @@ -73,9 +73,8 @@ trait BaseIndexService { if (indexWithNameExists(indexName).getOrElse(false)) { Success(indexName) } else { - val response = e4sClient.execute { - buildCreateIndexRequest(indexName, numShards) - } + val request = buildCreateIndexRequest(indexName, numShards) + val response = e4sClient.execute(request) response match { case Success(_) => Success(indexName)