Add search module
This commit is contained in:
parent
88b6eb56ae
commit
3861fb6b97
1
pom.xml
1
pom.xml
@ -10,6 +10,7 @@
|
||||
<module>app</module>
|
||||
<module>domain</module>
|
||||
<module>shared</module>
|
||||
<module>search</module>
|
||||
</modules>
|
||||
|
||||
<packaging>pom</packaging>
|
||||
|
||||
50
search/pom.xml
Normal file
50
search/pom.xml
Normal file
@ -0,0 +1,50 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>parent</artifactId>
|
||||
<groupId>be.simplenotes</groupId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>search</artifactId>
|
||||
|
||||
<properties>
|
||||
<lucene.version>8.5.2</lucene.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>be.simplenotes</groupId>
|
||||
<artifactId>domain</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queryparser</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>be.simplenotes</groupId>
|
||||
<artifactId>shared</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
</project>
|
||||
7
search/src/main/kotlin/Constants.kt
Normal file
7
search/src/main/kotlin/Constants.kt
Normal file
@ -0,0 +1,7 @@
|
||||
package be.simplenotes.search
|
||||
|
||||
internal const val uuidField = "uuid"
|
||||
internal const val titleField = "title"
|
||||
internal const val tagsField = "tags"
|
||||
internal const val contentField = "content"
|
||||
internal const val updatedAtField = "updatedAt"
|
||||
35
search/src/main/kotlin/Extractors.kt
Normal file
35
search/src/main/kotlin/Extractors.kt
Normal file
@ -0,0 +1,35 @@
|
||||
package be.simplenotes.search
|
||||
|
||||
import be.simplenotes.domain.model.PersistedNote
|
||||
import be.simplenotes.domain.model.PersistedNoteMetadata
|
||||
import org.apache.lucene.document.Document
|
||||
import org.apache.lucene.document.Field
|
||||
import org.apache.lucene.document.StringField
|
||||
import org.apache.lucene.document.TextField
|
||||
import org.apache.lucene.search.IndexSearcher
|
||||
import org.apache.lucene.search.TopDocs
|
||||
|
||||
internal fun PersistedNote.toDocument(): Document {
|
||||
val note = this
|
||||
return Document().apply {
|
||||
// non searchable fields
|
||||
add(StringField(uuidField, UuidFieldConverter.toDoc(note.uuid), Field.Store.YES))
|
||||
add(StringField(updatedAtField, LocalDateTimeFieldConverter.toDoc(note.updatedAt), Field.Store.YES))
|
||||
|
||||
// searchable fields
|
||||
add(TextField(titleField, note.meta.title, Field.Store.YES))
|
||||
add(TextField(tagsField, TagsFieldConverter.toDoc(note.meta.tags), Field.Store.YES))
|
||||
add(TextField(contentField, note.html, Field.Store.YES))
|
||||
}
|
||||
}
|
||||
|
||||
internal fun TopDocs.toResults(searcher: IndexSearcher) = scoreDocs.map {
|
||||
searcher.doc(it.doc).let { doc ->
|
||||
PersistedNoteMetadata(
|
||||
title = doc.get(titleField),
|
||||
uuid = UuidFieldConverter.fromDoc(doc.get(uuidField)),
|
||||
updatedAt = LocalDateTimeFieldConverter.fromDoc(doc.get(updatedAtField)),
|
||||
tags = TagsFieldConverter.fromDoc(doc.get(tagsField))
|
||||
)
|
||||
}
|
||||
}
|
||||
26
search/src/main/kotlin/FieldConverters.kt
Normal file
26
search/src/main/kotlin/FieldConverters.kt
Normal file
@ -0,0 +1,26 @@
|
||||
package be.simplenotes.search
|
||||
|
||||
import java.time.LocalDateTime
|
||||
import java.time.format.DateTimeFormatter
|
||||
import java.util.*
|
||||
|
||||
internal interface FieldConverter<T> {
|
||||
fun toDoc(value: T): String
|
||||
fun fromDoc(value: String): T
|
||||
}
|
||||
|
||||
internal object LocalDateTimeFieldConverter : FieldConverter<LocalDateTime> {
|
||||
private val formatter = DateTimeFormatter.ISO_DATE_TIME
|
||||
override fun toDoc(value: LocalDateTime): String = formatter.format(value)
|
||||
override fun fromDoc(value: String): LocalDateTime = LocalDateTime.parse(value, formatter)
|
||||
}
|
||||
|
||||
internal object UuidFieldConverter : FieldConverter<UUID> {
|
||||
override fun toDoc(value: UUID): String = value.toString()
|
||||
override fun fromDoc(value: String): UUID = UUID.fromString(value)
|
||||
}
|
||||
|
||||
internal object TagsFieldConverter : FieldConverter<List<String>> {
|
||||
override fun toDoc(value: List<String>): String = value.joinToString(" ")
|
||||
override fun fromDoc(value: String): List<String> = value.split(" ")
|
||||
}
|
||||
123
search/src/main/kotlin/NoteSearcher.kt
Normal file
123
search/src/main/kotlin/NoteSearcher.kt
Normal file
@ -0,0 +1,123 @@
|
||||
package be.simplenotes.search
|
||||
|
||||
import be.simplenotes.domain.model.PersistedNote
|
||||
import be.simplenotes.domain.model.PersistedNoteMetadata
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer
|
||||
import org.apache.lucene.index.*
|
||||
import org.apache.lucene.search.*
|
||||
import org.apache.lucene.store.Directory
|
||||
import org.apache.lucene.store.FSDirectory
|
||||
import org.slf4j.LoggerFactory
|
||||
import java.io.File
|
||||
import java.io.IOException
|
||||
import java.nio.file.FileVisitResult
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.SimpleFileVisitor
|
||||
import java.nio.file.attribute.BasicFileAttributes
|
||||
import java.util.*
|
||||
|
||||
data class SearchTerms(val title: String?, val tag: String?, val content: String?)
|
||||
|
||||
class NoteSearcher(basePath: Path = Path.of("/tmp", "lucene")) {
|
||||
private val baseFile = basePath.toFile()
|
||||
|
||||
private val logger = LoggerFactory.getLogger(javaClass)
|
||||
|
||||
// region utils
|
||||
private fun getDirectory(userId: Int): Directory {
|
||||
val index = File(baseFile, userId.toString()).toPath()
|
||||
return FSDirectory.open(index)
|
||||
}
|
||||
|
||||
private fun getIndexSearcher(userId: Int): IndexSearcher {
|
||||
val directory = getDirectory(userId)
|
||||
val reader: IndexReader = DirectoryReader.open(directory)
|
||||
return IndexSearcher(reader)
|
||||
}
|
||||
// endregion
|
||||
|
||||
fun indexNote(userId: Int, note: PersistedNote) {
|
||||
logger.debug("Indexing note ${note.uuid} for user $userId")
|
||||
|
||||
val dir = getDirectory(userId)
|
||||
val config = IndexWriterConfig(StandardAnalyzer())
|
||||
val writer = IndexWriter(dir, config)
|
||||
val doc = note.toDocument()
|
||||
|
||||
with(writer) {
|
||||
addDocument(doc)
|
||||
commit()
|
||||
close()
|
||||
}
|
||||
}
|
||||
|
||||
fun deleteIndex(userId: Int, uuid: UUID) {
|
||||
logger.debug("Deleting indexing $uuid for user $userId")
|
||||
|
||||
val dir = getDirectory(userId)
|
||||
val config = IndexWriterConfig(StandardAnalyzer())
|
||||
val writer = IndexWriter(dir, config)
|
||||
|
||||
with(writer) {
|
||||
deleteDocuments(TermQuery(Term(uuidField, UuidFieldConverter.toDoc(uuid))))
|
||||
commit()
|
||||
close()
|
||||
}
|
||||
}
|
||||
|
||||
fun updateIndex(userId: Int, note: PersistedNote) {
|
||||
logger.debug("Updating note ${note.uuid} for user $userId")
|
||||
deleteIndex(userId, note.uuid)
|
||||
indexNote(userId, note)
|
||||
}
|
||||
|
||||
fun search(userId: Int, terms: SearchTerms): List<PersistedNoteMetadata> {
|
||||
val searcher = getIndexSearcher(userId)
|
||||
|
||||
val builder = BooleanQuery.Builder()
|
||||
|
||||
terms.title?.let {
|
||||
val titleQuery = FuzzyQuery(Term(titleField, it))
|
||||
builder.add(BooleanClause(titleQuery, BooleanClause.Occur.SHOULD))
|
||||
}
|
||||
|
||||
terms.tag?.let {
|
||||
val tagsQuery = FuzzyQuery(Term(tagsField, it))
|
||||
builder.add(BooleanClause(tagsQuery, BooleanClause.Occur.SHOULD))
|
||||
}
|
||||
|
||||
terms.content?.let {
|
||||
val contentQuery = FuzzyQuery(Term(contentField, it))
|
||||
builder.add(BooleanClause(contentQuery, BooleanClause.Occur.SHOULD))
|
||||
}
|
||||
|
||||
val query = builder.build()
|
||||
logger.debug("Searching: $query")
|
||||
|
||||
val topDocs = searcher.search(query, 10)
|
||||
return topDocs.toResults(searcher)
|
||||
}
|
||||
|
||||
fun dropIndex(userId: Int) {
|
||||
val index = File(baseFile, userId.toString()).toPath()
|
||||
try {
|
||||
Files.walkFileTree(
|
||||
index,
|
||||
object : SimpleFileVisitor<Path>() {
|
||||
override fun visitFile(file: Path, attrs: BasicFileAttributes?): FileVisitResult {
|
||||
Files.delete(file)
|
||||
return FileVisitResult.CONTINUE
|
||||
}
|
||||
|
||||
override fun postVisitDirectory(dir: Path, exc: IOException?): FileVisitResult {
|
||||
Files.delete(dir)
|
||||
return FileVisitResult.CONTINUE
|
||||
}
|
||||
}
|
||||
)
|
||||
} catch (e: IOException) {
|
||||
// This is fine
|
||||
}
|
||||
}
|
||||
}
|
||||
162
search/src/test/kotlin/NoteSearcherTest.kt
Normal file
162
search/src/test/kotlin/NoteSearcherTest.kt
Normal file
@ -0,0 +1,162 @@
|
||||
package be.simplenotes.search
|
||||
|
||||
import be.simplenotes.domain.model.NoteMetadata
|
||||
import be.simplenotes.domain.model.PersistedNote
|
||||
import be.simplenotes.domain.model.PersistedNoteMetadata
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.junit.jupiter.api.AfterAll
|
||||
import org.junit.jupiter.api.BeforeEach
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.api.parallel.ResourceLock
|
||||
import java.time.LocalDateTime
|
||||
import java.util.*
|
||||
|
||||
@ResourceLock("lucene")
|
||||
internal class NoteSearcherTest {
|
||||
|
||||
// region setup
|
||||
private val searcher = NoteSearcher()
|
||||
|
||||
private fun index(
|
||||
title: String,
|
||||
tags: List<String> = emptyList(),
|
||||
content: String = "",
|
||||
uuid: UUID = UUID.randomUUID(),
|
||||
): PersistedNote {
|
||||
val note = PersistedNote(NoteMetadata(title, tags), markdown = "", content, LocalDateTime.now(), uuid)
|
||||
searcher.indexNote(1, note)
|
||||
return note
|
||||
}
|
||||
|
||||
private fun search(
|
||||
title: String? = null,
|
||||
tag: String? = null,
|
||||
content: String? = null,
|
||||
): List<PersistedNoteMetadata> = searcher.search(1, SearchTerms(title, tag, content))
|
||||
|
||||
@BeforeEach
|
||||
@AfterAll
|
||||
fun dropIndexes() {
|
||||
searcher.dropIndex(1)
|
||||
}
|
||||
// endregion
|
||||
|
||||
@Test
|
||||
fun `exact title search`() {
|
||||
index("first")
|
||||
index("second")
|
||||
index("flip")
|
||||
|
||||
assertThat(search("first"))
|
||||
.hasSizeGreaterThanOrEqualTo(1)
|
||||
.anyMatch { it.title == "first" }
|
||||
|
||||
assertThat(search("nothing")).isEmpty()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `fuzzy title search`() {
|
||||
index("first")
|
||||
index("second")
|
||||
index("flip")
|
||||
|
||||
assertThat(search("firt"))
|
||||
.hasSizeGreaterThanOrEqualTo(1)
|
||||
.anyMatch { it.title == "first" }
|
||||
|
||||
assertThat(search("nothing")).isEmpty()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `exact tags search`() {
|
||||
index("first", tags = listOf("example", "flamingo"))
|
||||
index("second", tags = listOf("yes"))
|
||||
index("second")
|
||||
|
||||
assertThat(search(tag = "example"))
|
||||
.hasSize(1)
|
||||
.anyMatch { it.title == "first" }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `exact content search`() {
|
||||
@Language("html")
|
||||
val content =
|
||||
"""
|
||||
<div>
|
||||
<h1 class="title">Apache Lucene Core</h1>
|
||||
<p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
|
||||
high-performance, full-featured text search engine library written entirely in Java.
|
||||
It is a technology suitable for nearly any application that requires full-text search,
|
||||
especially cross-platform.</p>
|
||||
<p>Apache Lucene is an open source project available for free download. Please use the
|
||||
links on the right to access Lucene.</p>
|
||||
<h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
|
||||
<p>Lucene offers powerful features through a simple API:</p>
|
||||
<h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
|
||||
<ul>
|
||||
<li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
|
||||
<li>small RAM requirements -- only 1MB heap</li>
|
||||
<li>incremental indexing as fast as batch indexing</li>
|
||||
<li>index size roughly 20-30% the size of text indexed</li>
|
||||
</ul>
|
||||
""".trimIndent()
|
||||
|
||||
index("first", content = content)
|
||||
|
||||
assertThat(search(content = "fast"))
|
||||
.hasSize(1)
|
||||
.anyMatch { it.title == "first" }
|
||||
|
||||
@Suppress("SpellCheckingInspection")
|
||||
assertThat(search(content = "preformance")) // <- note the error
|
||||
.hasSize(1)
|
||||
.anyMatch { it.title == "first" }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `combined search`() {
|
||||
@Language("html")
|
||||
val content =
|
||||
"""
|
||||
<div>
|
||||
<h1 class="title">Apache Lucene Core</h1>
|
||||
<p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
|
||||
high-performance, full-featured text search engine library written entirely in Java.
|
||||
It is a technology suitable for nearly any application that requires full-text search,
|
||||
especially cross-platform.</p>
|
||||
<p>Apache Lucene is an open source project available for free download. Please use the
|
||||
links on the right to access Lucene.</p>
|
||||
<h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
|
||||
<p>Lucene offers powerful features through a simple API:</p>
|
||||
<h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
|
||||
<ul>
|
||||
<li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
|
||||
<li>small RAM requirements -- only 1MB heap</li>
|
||||
<li>incremental indexing as fast as batch indexing</li>
|
||||
<li>index size roughly 20-30% the size of text indexed</li>
|
||||
</ul>
|
||||
""".trimIndent()
|
||||
|
||||
index("first", content = content, tags = listOf("abc"))
|
||||
|
||||
assertThat(search(title = "fir", tag = "abc", content = "20"))
|
||||
.hasSize(1)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `delete index`() {
|
||||
val uuid = index("first").uuid
|
||||
searcher.deleteIndex(1, uuid)
|
||||
assertThat(search("first")).isEmpty()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `update index`() {
|
||||
val note = index("first")
|
||||
searcher.updateIndex(1, note.copy(meta = note.meta.copy(title = "new")))
|
||||
assertThat(search("first")).isEmpty()
|
||||
assertThat(search("new")).hasSize(1)
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user