From 8ba89d3e05125151ad9afcaa12e6d9731ab70849 Mon Sep 17 00:00:00 2001 From: Hubert Van De Walle Date: Fri, 21 Aug 2020 17:04:14 +0200 Subject: [PATCH] Search now apply to all fields by default --- .../main/kotlin/utils/SearchTermsParser.kt | 38 +++++++++++-------- .../kotlin/utils/SearchTermsParserKtTest.kt | 13 ++++--- .../kotlin/usecases/search/SearchUseCase.kt | 2 +- search/src/main/kotlin/LuceneDsl.kt | 6 +++ search/src/main/kotlin/NoteSearcherImpl.kt | 1 + .../src/test/kotlin/NoteSearcherImplTest.kt | 35 ++++++++++++++++- 6 files changed, 71 insertions(+), 24 deletions(-) diff --git a/app/src/main/kotlin/utils/SearchTermsParser.kt b/app/src/main/kotlin/utils/SearchTermsParser.kt index 733f43e..234ea64 100644 --- a/app/src/main/kotlin/utils/SearchTermsParser.kt +++ b/app/src/main/kotlin/utils/SearchTermsParser.kt @@ -2,32 +2,40 @@ package be.simplenotes.app.utils import be.simplenotes.domain.usecases.search.SearchTerms -private val titleRe = """title:['"](?.*?)['"]""".toRegex() -private val outerTitleRe = """(?<title>title:['"].*?['"])""".toRegex() +private fun innerRegex(name: String) = """$name:['"](.*?)['"]""".toRegex() +private fun outerRegex(name: String) = """($name:['"].*?['"])""".toRegex() -private val tagRe = """tag:['"](?<tag>.*?)['"]""".toRegex() -private val outerTagRe = """(?<tag>tag:['"].*?['"])""".toRegex() +private val titleRe = innerRegex("title") +private val outerTitleRe = outerRegex("title") + +private val tagRe = innerRegex("tag") +private val outerTagRe = outerRegex("tag") + +private val contentRe = innerRegex("content") +private val outerContentRe = outerRegex("content") fun parseSearchTerms(input: String): SearchTerms { - val title: String? = titleRe.find(input)?.groups?.get(1)?.value - val tag: String? = tagRe.find(input)?.groups?.get(1)?.value var c: String = input - if (title != null) { - val titleGroup = outerTitleRe.find(input)?.groups?.get(1)?.value - titleGroup?.let { c = c.replace(it, "") } + fun extract(innerRegex: Regex, outerRegex: Regex): String? { + val match = innerRegex.find(input)?.groups?.get(1)?.value + if (match != null) { + val group = outerRegex.find(input)?.groups?.get(1)?.value + group?.let { c = c.replace(it, " ") } + } + return match } - if (tag != null) { - val tagGroup = outerTagRe.find(input)?.groups?.get(1)?.value - tagGroup?.let { c = c.replace(it, "") } - } + val title: String? = extract(titleRe, outerTitleRe) + val tag: String? = extract(tagRe, outerTagRe) + val content: String? = extract(contentRe, outerContentRe) - val content = c.trim().ifEmpty { null } + val all = c.trim().ifEmpty { null } return SearchTerms( title = title, tag = tag, - content = content + content = content, + all = all ) } diff --git a/app/src/test/kotlin/utils/SearchTermsParserKtTest.kt b/app/src/test/kotlin/utils/SearchTermsParserKtTest.kt index a98334d..96178ca 100644 --- a/app/src/test/kotlin/utils/SearchTermsParserKtTest.kt +++ b/app/src/test/kotlin/utils/SearchTermsParserKtTest.kt @@ -13,7 +13,8 @@ internal class SearchTermsParserKtTest { title: String? = null, tag: String? = null, content: String? = null, - ): Pair<String, SearchTerms> = input to SearchTerms(title, tag, content) + all: String? = null + ): Pair<String, SearchTerms> = input to SearchTerms(title, tag, content, all) @Suppress("Unused") private fun results() = Stream.of( @@ -21,13 +22,13 @@ internal class SearchTermsParserKtTest { createResult("title:'example with words'", title = "example with words"), createResult("title:'example with words'", title = "example with words"), createResult("""title:"double quotes"""", title = "double quotes"), - createResult("title:'example' something else", title = "example", content = "something else"), + createResult("title:'example' something else", title = "example", all = "something else"), createResult("tag:'example'", tag = "example"), createResult("tag:'example' title:'other'", title = "other", tag = "example"), - createResult("blah blah tag:'example' title:'other'", title = "other", tag = "example", content = "blah blah"), - createResult("tag:'example' middle title:'other'", title = "other", tag = "example", content = "middle"), - createResult("tag:'example' title:'other' end", title = "other", tag = "example", content = "end"), - createResult("tag:'example abc' title:'other with words' this is the end ", title = "other with words", tag = "example abc", content = "this is the end"), + createResult("blah blah tag:'example' title:'other'", title = "other", tag = "example", all = "blah blah"), + createResult("tag:'example' middle title:'other'", title = "other", tag = "example", all = "middle"), + createResult("tag:'example' title:'other' end", title = "other", tag = "example", all = "end"), + createResult("tag:'example abc' title:'other with words' this is the end ", title = "other with words", tag = "example abc", all = "this is the end"), ) @ParameterizedTest diff --git a/domain/src/main/kotlin/usecases/search/SearchUseCase.kt b/domain/src/main/kotlin/usecases/search/SearchUseCase.kt index d2d406c..aae7896 100644 --- a/domain/src/main/kotlin/usecases/search/SearchUseCase.kt +++ b/domain/src/main/kotlin/usecases/search/SearchUseCase.kt @@ -4,7 +4,7 @@ import be.simplenotes.domain.model.PersistedNote import be.simplenotes.domain.model.PersistedNoteMetadata import java.util.* -data class SearchTerms(val title: String?, val tag: String?, val content: String?) +data class SearchTerms(val title: String?, val tag: String?, val content: String?, val all: String?) interface NoteSearcher { fun indexNote(userId: Int, note: PersistedNote) diff --git a/search/src/main/kotlin/LuceneDsl.kt b/search/src/main/kotlin/LuceneDsl.kt index a1e374d..ae5c2cc 100644 --- a/search/src/main/kotlin/LuceneDsl.kt +++ b/search/src/main/kotlin/LuceneDsl.kt @@ -30,6 +30,12 @@ class LuceneDsl { fun addBooleanClause(booleanDsl: BooleanExpression) { clauses.add(booleanDsl) } + + infix fun List<String>.anyMatch(query: String?) { + map { BooleanExpression(it, query) }.forEach { + addBooleanClause(it) + } + } } fun LuceneDsl.or(booleanExpression: () -> BooleanExpression) { diff --git a/search/src/main/kotlin/NoteSearcherImpl.kt b/search/src/main/kotlin/NoteSearcherImpl.kt index 254860e..c38b7a0 100644 --- a/search/src/main/kotlin/NoteSearcherImpl.kt +++ b/search/src/main/kotlin/NoteSearcherImpl.kt @@ -85,6 +85,7 @@ class NoteSearcherImpl(basePath: Path = Path.of("/tmp", "lucene")) : NoteSearche or { titleField eq terms.title } or { tagsField eq terms.tag } or { contentField eq terms.content } + listOf(titleField, tagsField, contentField) anyMatch terms.all }.map(Document::toNoteMeta) override fun dropIndex(userId: Int) = rmdir(File(baseFile, userId.toString()).toPath()) diff --git a/search/src/test/kotlin/NoteSearcherImplTest.kt b/search/src/test/kotlin/NoteSearcherImplTest.kt index b6a5eae..3a47840 100644 --- a/search/src/test/kotlin/NoteSearcherImplTest.kt +++ b/search/src/test/kotlin/NoteSearcherImplTest.kt @@ -25,7 +25,7 @@ internal class NoteSearcherImplTest { content: String = "", uuid: UUID = UUID.randomUUID(), ): PersistedNote { - val note = PersistedNote(NoteMetadata(title, tags), markdown = "", content, LocalDateTime.now(), uuid) + val note = PersistedNote(NoteMetadata(title, tags), markdown = "", content, LocalDateTime.MIN, uuid) searcher.indexNote(1, note) return note } @@ -34,7 +34,8 @@ internal class NoteSearcherImplTest { title: String? = null, tag: String? = null, content: String? = null, - ): List<PersistedNoteMetadata> = searcher.search(1, SearchTerms(title, tag, content)) + all: String? = null, + ): List<PersistedNoteMetadata> = searcher.search(1, SearchTerms(title, tag, content, all)) @BeforeEach @AfterAll @@ -146,6 +147,36 @@ internal class NoteSearcherImplTest { .hasSize(1) } + @Test + fun `search all`() { + @Language("html") + val content = + """ + <div> + <h1 class="title">Apache Lucene Core</h1> + <p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a + high-performance, full-featured text search engine library written entirely in Java. + It is a technology suitable for nearly any application that requires full-text search, + especially cross-platform.</p> + <p>Apache Lucene is an open source project available for free download. Please use the + links on the right to access Lucene.</p> + <h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1> + <p>Lucene offers powerful features through a simple API:</p> + <h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2> + <ul> + <li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li> + <li>small RAM requirements -- only 1MB heap</li> + <li>incremental indexing as fast as batch indexing</li> + <li>index size roughly 20-30% the size of text indexed</li> + </ul> + """.trimIndent() + + index("first", content = content, tags = listOf("abc")) + + assertThat(search(all = "abc", title = "first")) + .hasSize(1) + } + @Test fun `delete index`() { val uuid = index("first").uuid