Skip to content

Commit

Permalink
more flexible querying
Browse files Browse the repository at this point in the history
  • Loading branch information
jillesvangurp committed Jan 18, 2024
1 parent f725a70 commit 2bcedd1
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 27 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,6 @@ Both docs exist: 2 after the transaction
In case of an exception, there is a rollback.

```kotlin

// rollbacks happen if there are exceptions
val another = MyModel(
title = "Transactional",
Expand Down
10 changes: 10 additions & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ configure<ComposeExtension> {
removeContainers = true
useComposeFiles = listOf("docker-compose.yml")
setProjectName("pg-docstore")
listOf("/usr/bin/docker","/usr/local/bin/docker").firstOrNull {
File(it).exists()
}?.let { docker ->
// works around an issue where the docker
// command is not found
// falls back to the default, which may work on
// some platforms
dockerExecutable.set(docker)
}

}

val sourcesJar by tasks.registering(Jar::class) {
Expand Down
49 changes: 32 additions & 17 deletions src/main/kotlin/com/tryformation/pgdocstore/DocStore.kt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ data class DocStoreEntry(
val similarity: Float? = null
)

enum class BooleanOperator {
OR,
AND
}

fun String.sanitizeInputForDB(): String {
// Define a regular expression for disallowed characters or strings
// For example, this regex will remove single quotes, double quotes, semicolons, and SQL comment syntax
Expand Down Expand Up @@ -66,7 +71,7 @@ val RowData.docStoreEntry
},
text = getString(DocStoreEntry::text.name),
// only there on searches with a text
similarity = this.size.takeIf { it > 6}?.let { getFloat("rank") },
similarity = this.size.takeIf { it > 6 }?.let { getFloat("rank") },
)

class DocStore<T : Any>(
Expand Down Expand Up @@ -180,7 +185,7 @@ class DocStore<T : Any>(
* Retrieve multiple documents by their [ids].
*/
suspend fun multiGetById(ids: List<String>): List<T> {
return if(ids.isEmpty()) {
return if (ids.isEmpty()) {
emptyList()
} else {
connection.sendPreparedStatement(
Expand Down Expand Up @@ -457,25 +462,28 @@ class DocStore<T : Any>(
* from the specified [offset]
*
* You can optionally constrain
* the query with [tags]. If you set [orTags] to true, a logical OR will be used.
* the query with [tags]. If you set [whereClauseOperator] to OR,
* a logical OR will be used instead of the default AND.
*
* You can also specify a text [query]. In that case the results will be ordered by
* their ranking. You can use [similarityThreshold] to control how strict it matches.
*/
suspend fun documentsByRecency(
tags: List<String> = emptyList(),
orTags: Boolean = false,
query: String? = null,
tagsClauseOperator: BooleanOperator = BooleanOperator.AND,
whereClauseOperator: BooleanOperator = BooleanOperator.AND,
limit: Int = 100,
offset: Int = 0,
similarityThreshold: Double = 0.1,
): List<T> {
val q = constructQuery(
tags = tags,
query = query,
orTags = orTags,
tagsClauseOperator = tagsClauseOperator,
limit = limit,
offset = offset,
whereClauseOperator = whereClauseOperator,
similarityThreshold = similarityThreshold
)
return connection.sendPreparedStatement(q, tags + listOfNotNull(query)).let { result ->
Expand All @@ -491,18 +499,20 @@ class DocStore<T : Any>(
*/
suspend fun entriesByRecency(
tags: List<String> = emptyList(),
orTags: Boolean = false,
query: String? = null,
tagsClauseOperator: BooleanOperator = BooleanOperator.AND,
whereClauseOperator: BooleanOperator = BooleanOperator.AND,
limit: Int = 100,
offset: Int = 0,
similarityThreshold: Double = 0.1,
): List<DocStoreEntry> {
val q = constructQuery(
tags = tags,
query = query,
orTags = orTags,
tagsClauseOperator = tagsClauseOperator,
limit = limit,
offset = offset,
whereClauseOperator = whereClauseOperator,
similarityThreshold = similarityThreshold
)
return connection.sendPreparedStatement(q, tags + listOfNotNull(query)).let { result ->
Expand All @@ -521,23 +531,25 @@ class DocStore<T : Any>(
* You can use this to efficiently process all documents in your store.
*
* You can optionally constrain
* the query with [tags]. If you set [orTags] to true, a logical OR will be used
* and otherwise it defaults to an AND.
* the query with [tags]. If you set [whereClauseOperator] to OR,
* a logical OR will be used instead of the default AND.
*
* You can also specify a text [query]. In that case the results will be ordered by
* their ranking. You can use [similarityThreshold] to control how strict it matches.
*/
suspend fun documentsByRecencyScrolling(
tags: List<String> = emptyList(),
orTags: Boolean = false,
query: String? = null,
fetchSize: Int = 100,
tagsClauseOperator: BooleanOperator = BooleanOperator.AND,
whereClauseOperator: BooleanOperator = BooleanOperator.AND,
similarityThreshold: Double = 0.1,
fetchSize: Int = 100,
): Flow<T> {
val q = constructQuery(
tags = tags,
query = query,
orTags = orTags,
tagsClauseOperator = tagsClauseOperator,
whereClauseOperator = whereClauseOperator,
similarityThreshold = similarityThreshold
)
return queryFlow(
Expand All @@ -558,16 +570,18 @@ class DocStore<T : Any>(
*/
suspend fun entriesByRecencyScrolling(
tags: List<String> = emptyList(),
orTags: Boolean = false,
query: String? = null,
tagsClauseOperator: BooleanOperator = BooleanOperator.AND,
whereClauseOperator: BooleanOperator = BooleanOperator.AND,
fetchSize: Int = 100,
similarityThreshold: Double = 0.1,
): Flow<DocStoreEntry> {
return queryFlow(
query = constructQuery(
tags = tags,
query = query,
orTags = orTags,
tagsClauseOperator = tagsClauseOperator,
whereClauseOperator = whereClauseOperator,
similarityThreshold = similarityThreshold
),
// query is used in select and then once more in the where
Expand All @@ -581,7 +595,8 @@ class DocStore<T : Any>(
private fun constructQuery(
tags: List<String>,
query: String?,
orTags: Boolean,
tagsClauseOperator: BooleanOperator = BooleanOperator.AND,
whereClauseOperator: BooleanOperator = BooleanOperator.AND,
limit: Int? = null,
offset: Int = 0,
similarityThreshold: Double = 0.01
Expand All @@ -599,7 +614,7 @@ class DocStore<T : Any>(
tags.takeIf { it.isNotEmpty() }
?.let {
tags.joinToString(
if (orTags) " OR " else " AND "
" $tagsClauseOperator "
) { "? = ANY(tags)" }
}
?.let {
Expand All @@ -609,7 +624,7 @@ class DocStore<T : Any>(
query?.takeIf { q -> q.isNotBlank() }?.let {
"""similarity(text, ?) > $similarityThreshold"""
}
).joinToString(" AND ")
).joinToString(" $whereClauseOperator ")

}

Expand Down
2 changes: 1 addition & 1 deletion src/test/kotlin/com/tryformation/pgdocstore/TaggingTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class TaggingTest : DbTestBase() {
ds.entriesByRecency(listOf("foo")).count() shouldBe 2
ds.entriesByRecencyScrolling(listOf("foo")).count() shouldBe 2
ds.documentsByRecencyScrolling(listOf("foo", "bar")).count() shouldBe 1
ds.documentsByRecencyScrolling(listOf("foo", "bar"), orTags = true).count() shouldBe 3
ds.documentsByRecencyScrolling(listOf("foo", "bar"), tagsClauseOperator = BooleanOperator.OR).count() shouldBe 3
}

}
48 changes: 46 additions & 2 deletions src/test/kotlin/com/tryformation/pgdocstore/TextSearchTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class SearchableModel(
val title: String,
val description: String? = null,
val id: String = UUID.randomUUID().toString(),
val tags: List<String> = emptyList()
)

class TextSearchTest : DbTestBase() {
Expand Down Expand Up @@ -50,7 +51,7 @@ class TextSearchTest : DbTestBase() {

@Test
fun shouldRankCorrectly() = coRun {
val ds = DocStore<SearchableModel>(
val ds = DocStore(
db,
SearchableModel.serializer(),
tableName,
Expand Down Expand Up @@ -80,9 +81,52 @@ class TextSearchTest : DbTestBase() {
it shouldHaveSize 0
}
}

@Test
fun shouldDoANDorOR() = coRun {
val ds = DocStore(
db,
SearchableModel.serializer(),
tableName,
textExtractor = { listOfNotNull(it.title, it.description).joinToString("\n") },
tagExtractor = SearchableModel::tags
)
ds.bulkInsert(
listOf(
SearchableModel("Document Numero Uno", tags = listOf("foo", "bar")),
SearchableModel("The second one", tags = listOf("foo")),
SearchableModel("Another Document", tags = listOf("bar")),
)
)
ds.documentsByRecency(
tags = listOf("foo"),
tagsClauseOperator = BooleanOperator.AND,
query = "Another",
whereClauseOperator = BooleanOperator.AND,
) shouldHaveSize 0
ds.documentsByRecency(
tags = listOf("foo"),
tagsClauseOperator = BooleanOperator.AND,
query = "Another",
whereClauseOperator = BooleanOperator.OR,
) shouldHaveSize 3
ds.documentsByRecency(
tags = listOf("foo", "foobar"),
tagsClauseOperator = BooleanOperator.AND,
query = "Another",
whereClauseOperator = BooleanOperator.AND,
) shouldHaveSize 0
ds.documentsByRecency(
tags = listOf("foo", "foobar"),
tagsClauseOperator = BooleanOperator.OR,
query = "Document",
whereClauseOperator = BooleanOperator.AND,
) shouldHaveSize 1

}
}

private suspend fun DocStore<*>.search(query: String, similarityThreshold:Double = 0.1) =
private suspend fun DocStore<*>.search(query: String, similarityThreshold: Double = 0.1) =
entriesByRecency(query = query, similarityThreshold = similarityThreshold).also {
println("Found for '$query' with threshold $similarityThreshold:")
it.forEach { e ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,6 @@ val readmeMd = sourceGitRepository.md {
""".trimIndent()

example {

// rollbacks happen if there are exceptions
val another = MyModel(
title = "Transactional",
Expand Down
15 changes: 10 additions & 5 deletions versions.properties
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ version.ch.qos.logback..logback-classic=1.4.14

version.com.github.jasync-sql..jasync-postgresql=2.2.4

version.com.github.jillesvangurp..kotlin4example=1.1.1
version.com.github.jillesvangurp..kotlin4example=1.1.2

version.io.github.microutils..kotlin-logging=3.0.5
## # available=4.0.0-beta-1
Expand All @@ -28,6 +28,7 @@ version.kotest=5.8.0
version.kotlin=1.9.22
## # available=2.0.0-Beta1
## # available=2.0.0-Beta2
## # available=2.0.0-Beta3

version.kotlinx.coroutines=1.7.3
## # available=1.8.0-RC
Expand All @@ -41,14 +42,18 @@ version.org.apache.logging.log4j..log4j-to-slf4j=2.22.1
## # available=3.0.0-alpha1
## # available=3.0.0-beta1

version.org.slf4j..jcl-over-slf4j=2.0.10
version.org.slf4j..jcl-over-slf4j=2.0.11
## # available=2.1.0-alpha0
## # available=2.1.0-alpha1

version.org.slf4j..jul-to-slf4j=2.0.10
version.org.slf4j..jul-to-slf4j=2.0.11
## # available=2.1.0-alpha0
## # available=2.1.0-alpha1

version.org.slf4j..log4j-over-slf4j=2.0.10
version.org.slf4j..log4j-over-slf4j=2.0.11
## # available=2.1.0-alpha0
## # available=2.1.0-alpha1

version.org.slf4j..slf4j-api=2.0.10
version.org.slf4j..slf4j-api=2.0.11
## # available=2.1.0-alpha0
## # available=2.1.0-alpha1

0 comments on commit 2bcedd1

Please sign in to comment.