Prefix maven modules

This commit is contained in:
2020-10-23 15:45:28 +02:00
parent 4ff97044f0
commit 4c9ac8944e
135 changed files with 30 additions and 30 deletions
+66
View File
@@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>simplenotes-parent</artifactId>
<groupId>be.simplenotes</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>simplenotes-search</artifactId>
<properties>
<lucene.version>8.6.1</lucene.version>
</properties>
<dependencies>
<dependency>
<groupId>be.simplenotes</groupId>
<artifactId>simplenotes-domain</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>be.simplenotes</groupId>
<artifactId>simplenotes-shared</artifactId>
<version>1.0-SNAPSHOT</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
</dependencies>
</project>
@@ -0,0 +1,7 @@
package be.simplenotes.search
internal const val uuidField = "uuid"
internal const val titleField = "title"
internal const val tagsField = "tags"
internal const val contentField = "content"
internal const val updatedAtField = "updatedAt"
@@ -0,0 +1,29 @@
package be.simplenotes.search
import be.simplenotes.domain.model.PersistedNote
import be.simplenotes.domain.model.PersistedNoteMetadata
import org.apache.lucene.document.Document
import org.apache.lucene.document.Field
import org.apache.lucene.document.StringField
import org.apache.lucene.document.TextField
internal fun PersistedNote.toDocument(): Document {
val note = this
return Document().apply {
// non searchable fields
add(StringField(uuidField, UuidFieldConverter.toDoc(note.uuid), Field.Store.YES))
add(StringField(updatedAtField, LocalDateTimeFieldConverter.toDoc(note.updatedAt), Field.Store.YES))
// searchable fields
add(TextField(titleField, note.meta.title, Field.Store.YES))
add(TextField(tagsField, TagsFieldConverter.toDoc(note.meta.tags), Field.Store.YES))
add(TextField(contentField, note.markdown, Field.Store.YES))
}
}
internal fun Document.toNoteMeta() = PersistedNoteMetadata(
title = get(titleField),
uuid = UuidFieldConverter.fromDoc(get(uuidField)),
updatedAt = LocalDateTimeFieldConverter.fromDoc(get(updatedAtField)),
tags = TagsFieldConverter.fromDoc(get(tagsField))
)
@@ -0,0 +1,26 @@
package be.simplenotes.search
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.util.*
internal interface FieldConverter<T> {
fun toDoc(value: T): String
fun fromDoc(value: String): T
}
internal object LocalDateTimeFieldConverter : FieldConverter<LocalDateTime> {
private val formatter = DateTimeFormatter.ISO_DATE_TIME
override fun toDoc(value: LocalDateTime): String = formatter.format(value)
override fun fromDoc(value: String): LocalDateTime = LocalDateTime.parse(value, formatter)
}
internal object UuidFieldConverter : FieldConverter<UUID> {
override fun toDoc(value: UUID): String = value.toString()
override fun fromDoc(value: String): UUID = UUID.fromString(value)
}
internal object TagsFieldConverter : FieldConverter<List<String>> {
override fun toDoc(value: List<String>): String = value.joinToString(" ")
override fun fromDoc(value: String): List<String> = value.split(" ").filter(String::isNotEmpty)
}
@@ -0,0 +1,47 @@
package be.simplenotes.search
import org.apache.lucene.document.Document
import org.apache.lucene.index.Term
import org.apache.lucene.search.*
import org.slf4j.LoggerFactory
private val logger = LoggerFactory.getLogger("be.simplenotes.search.dsl")
fun IndexSearcher.query(receiver: LuceneDsl.() -> Unit): List<Document> {
val indexSearcher = this
val builder = BooleanQuery.Builder()
val dsl = LuceneDsl()
dsl.apply { this.receiver() }
dsl.clauses.forEach { (field, query) ->
query?.let {
builder.add(BooleanClause(FuzzyQuery(Term(field, query)), BooleanClause.Occur.SHOULD))
}
}
val query = builder.build()
val topDocs = indexSearcher.search(query, dsl.count)
logger.debug("Searching: `$query` results: ${topDocs.totalHits.value}")
return topDocs.scoreDocs.map { indexSearcher.doc(it.doc) }
}
class LuceneDsl {
val clauses = mutableListOf<BooleanExpression>()
var count: Int = 10
fun addBooleanClause(booleanDsl: BooleanExpression) {
clauses.add(booleanDsl)
}
infix fun List<String>.anyMatch(query: String?) {
map { BooleanExpression(it, query) }.forEach {
addBooleanClause(it)
}
}
}
fun LuceneDsl.or(booleanExpression: () -> BooleanExpression) {
addBooleanClause(booleanExpression())
}
infix fun String.eq(query: String?) = BooleanExpression(this, query)
data class BooleanExpression(val term: String, val query: String?)
@@ -0,0 +1,98 @@
package be.simplenotes.search
import be.simplenotes.domain.model.PersistedNote
import be.simplenotes.domain.usecases.search.NoteSearcher
import be.simplenotes.domain.usecases.search.SearchTerms
import be.simplenotes.search.utils.rmdir
import org.apache.lucene.analysis.standard.StandardAnalyzer
import org.apache.lucene.document.Document
import org.apache.lucene.index.*
import org.apache.lucene.search.IndexSearcher
import org.apache.lucene.search.TermQuery
import org.apache.lucene.store.Directory
import org.apache.lucene.store.FSDirectory
import org.slf4j.LoggerFactory
import java.io.File
import java.nio.file.Path
import java.util.*
class NoteSearcherImpl(basePath: Path = Path.of("/tmp", "lucene")) : NoteSearcher {
private val baseFile = basePath.toFile()
private val logger = LoggerFactory.getLogger(javaClass)
// region utils
private fun getDirectory(userId: Int): Directory {
val index = File(baseFile, userId.toString()).toPath()
return FSDirectory.open(index)
}
private fun indexSearcher(userId: Int): IndexSearcher {
val directory = getDirectory(userId)
val reader: IndexReader = DirectoryReader.open(directory)
return IndexSearcher(reader)
}
private fun writer(userId: Int): IndexWriter {
val dir = getDirectory(userId)
val config = IndexWriterConfig(StandardAnalyzer())
return IndexWriter(dir, config)
}
// endregion
override fun indexNote(userId: Int, note: PersistedNote) {
logger.debug("Indexing note ${note.uuid} for user $userId")
val doc = note.toDocument()
with(writer(userId)) {
addDocument(doc)
commit()
close()
}
}
override fun indexNotes(userId: Int, notes: List<PersistedNote>) {
logger.debug("Indexing notes for user $userId")
val docs = notes.map { it.toDocument() }
with(writer(userId)) {
addDocuments(docs)
commit()
close()
}
}
override fun deleteIndex(userId: Int, uuid: UUID) {
logger.debug("Deleting index $uuid for user $userId")
with(writer(userId)) {
deleteDocuments(TermQuery(Term(uuidField, UuidFieldConverter.toDoc(uuid))))
commit()
close()
}
}
override fun updateIndex(userId: Int, note: PersistedNote) {
logger.debug("Updating note ${note.uuid} for user $userId")
deleteIndex(userId, note.uuid)
indexNote(userId, note)
}
override fun search(userId: Int, terms: SearchTerms) = try {
indexSearcher(userId).query {
or { titleField eq terms.title }
or { tagsField eq terms.tag }
or { contentField eq terms.content }
listOf(titleField, tagsField, contentField) anyMatch terms.all
}.map(Document::toNoteMeta)
} catch (e: IndexNotFoundException) {
logger.warn("Index not found for user $userId")
emptyList()
}
override fun dropIndex(userId: Int) = rmdir(File(baseFile, userId.toString()).toPath())
override fun dropAll() = rmdir(baseFile.toPath())
}
@@ -0,0 +1,9 @@
package be.simplenotes.search
import be.simplenotes.domain.usecases.search.NoteSearcher
import org.koin.dsl.module
import java.nio.file.Path
val searchModule = module {
single<NoteSearcher> { NoteSearcherImpl(Path.of(".lucene")) }
}
@@ -0,0 +1,29 @@
package be.simplenotes.search.utils
import java.io.IOException
import java.nio.file.FileVisitResult
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.SimpleFileVisitor
import java.nio.file.attribute.BasicFileAttributes
internal fun rmdir(path: Path) {
try {
Files.walkFileTree(
path,
object : SimpleFileVisitor<Path>() {
override fun visitFile(file: Path, attrs: BasicFileAttributes?): FileVisitResult {
Files.delete(file)
return FileVisitResult.CONTINUE
}
override fun postVisitDirectory(dir: Path, exc: IOException?): FileVisitResult {
Files.delete(dir)
return FileVisitResult.CONTINUE
}
}
)
} catch (e: IOException) {
// This is fine
}
}
@@ -0,0 +1,156 @@
package be.simplenotes.search
import be.simplenotes.domain.model.NoteMetadata
import be.simplenotes.domain.model.PersistedNote
import be.simplenotes.domain.model.PersistedNoteMetadata
import be.simplenotes.domain.usecases.search.SearchTerms
import org.assertj.core.api.Assertions.assertThat
import org.intellij.lang.annotations.Language
import org.junit.jupiter.api.AfterAll
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.parallel.ResourceLock
import java.time.LocalDateTime
import java.util.*
@ResourceLock("lucene")
internal class NoteSearcherImplTest {
// region setup
private val searcher = NoteSearcherImpl()
private fun index(
title: String,
tags: List<String> = emptyList(),
content: String = "",
uuid: UUID = UUID.randomUUID(),
): PersistedNote {
val note = PersistedNote(NoteMetadata(title, tags),
markdown = content,
html = "",
LocalDateTime.MIN,
uuid,
public = false)
searcher.indexNote(1, note)
return note
}
private fun search(
title: String? = null,
tag: String? = null,
content: String? = null,
all: String? = null,
): List<PersistedNoteMetadata> = searcher.search(1, SearchTerms(title, tag, content, all))
@BeforeEach
@AfterAll
fun dropIndexes() {
searcher.dropIndex(1)
}
@Language("markdown")
val markdownSample =
"""
# Apache Lucene Core
Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java.
It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
Apache Lucene is an open source project available for free download. Please use the links on the right to access Lucene.
# Lucene Features
Lucene offers powerful features through a simple API:
## Scalable, High-Performance Indexing
* over [150GB/hour on modern hardware](http://home.apache.org/~mikemccand/lucenebench/indexing.html)
* small RAM requirements -- only 1MB heap
* incremental indexing as fast as batch indexing
* index size roughly 20-30% the size of text indexed
""".trimIndent()
// endregion
@Test
fun `exact title search`() {
index("first")
index("second")
index("flip")
assertThat(search("first"))
.hasSizeGreaterThanOrEqualTo(1)
.anyMatch { it.title == "first" }
assertThat(search("nothing")).isEmpty()
}
@Test
fun `fuzzy title search`() {
index("first")
index("second")
index("flip")
@Suppress("SpellCheckingInspection")
assertThat(search("firt"))
.hasSizeGreaterThanOrEqualTo(1)
.anyMatch { it.title == "first" }
assertThat(search("nothing")).isEmpty()
}
@Test
fun `exact tags search`() {
index("first", tags = listOf("example", "flamingo"))
index("second", tags = listOf("yes"))
index("second")
assertThat(search(tag = "example"))
.hasSize(1)
.anyMatch { it.title == "first" }
}
@Test
fun `exact content search`() {
index("first", content = markdownSample)
assertThat(search(content = "fast"))
.hasSize(1)
.anyMatch { it.title == "first" }
@Suppress("SpellCheckingInspection")
assertThat(search(content = "preformance")) // <- note the error
.hasSize(1)
.anyMatch { it.title == "first" }
}
@Test
fun `combined search`() {
index("first", content = markdownSample, tags = listOf("abc"))
assertThat(search(title = "fir", tag = "abc", content = "20"))
.hasSize(1)
}
@Test
fun `search all`() {
index("first", content = markdownSample, tags = listOf("abc"))
assertThat(search(all = "abc", title = "first"))
.hasSize(1)
}
@Test
fun `delete index`() {
val uuid = index("first").uuid
searcher.deleteIndex(1, uuid)
assertThat(search("first")).isEmpty()
}
@Test
fun `update index`() {
val note = index("first")
searcher.updateIndex(1, note.copy(meta = note.meta.copy(title = "new")))
assertThat(search("first")).isEmpty()
assertThat(search("new")).hasSize(1)
}
}
@@ -0,0 +1,26 @@
package be.simplenotes.search
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
import java.util.stream.Stream
internal class TagsFieldConverterTest {
@Suppress("unused")
fun tags(): Stream<List<String>> = Stream.of(
listOf("example"),
listOf("example", "second"),
listOf(),
)
@ParameterizedTest
@MethodSource("tags")
fun `tags should stay the same`(input: List<String>) {
val doc = TagsFieldConverter.toDoc(input)
val out = TagsFieldConverter.fromDoc(doc)
assertThat(out)
.hasSameSizeAs(input)
.containsExactlyInAnyOrderElementsOf(input)
}
}