From b27fd292304a363b52c5f6daee2555ecec9d2a27 Mon Sep 17 00:00:00 2001 From: Hubert Van De Walle Date: Fri, 21 Aug 2020 18:03:33 +0200 Subject: [PATCH] Index md instead of html --- .../main/kotlin/utils/SearchTermsParser.kt | 2 +- search/src/main/kotlin/Extractors.kt | 2 +- .../src/test/kotlin/NoteSearcherImplTest.kt | 96 +++++-------------- 3 files changed, 28 insertions(+), 72 deletions(-) diff --git a/app/src/main/kotlin/utils/SearchTermsParser.kt b/app/src/main/kotlin/utils/SearchTermsParser.kt index 234ea64..d4c34ca 100644 --- a/app/src/main/kotlin/utils/SearchTermsParser.kt +++ b/app/src/main/kotlin/utils/SearchTermsParser.kt @@ -21,7 +21,7 @@ fun parseSearchTerms(input: String): SearchTerms { val match = innerRegex.find(input)?.groups?.get(1)?.value if (match != null) { val group = outerRegex.find(input)?.groups?.get(1)?.value - group?.let { c = c.replace(it, " ") } + group?.let { c = c.replace(it, "") } } return match } diff --git a/search/src/main/kotlin/Extractors.kt b/search/src/main/kotlin/Extractors.kt index 295a6dd..cd89e0c 100644 --- a/search/src/main/kotlin/Extractors.kt +++ b/search/src/main/kotlin/Extractors.kt @@ -19,7 +19,7 @@ internal fun PersistedNote.toDocument(): Document { // searchable fields add(TextField(titleField, note.meta.title, Field.Store.YES)) add(TextField(tagsField, TagsFieldConverter.toDoc(note.meta.tags), Field.Store.YES)) - add(TextField(contentField, note.html, Field.Store.YES)) + add(TextField(contentField, note.markdown, Field.Store.YES)) } } diff --git a/search/src/test/kotlin/NoteSearcherImplTest.kt b/search/src/test/kotlin/NoteSearcherImplTest.kt index 3a47840..5fdba99 100644 --- a/search/src/test/kotlin/NoteSearcherImplTest.kt +++ b/search/src/test/kotlin/NoteSearcherImplTest.kt @@ -25,7 +25,7 @@ internal class NoteSearcherImplTest { content: String = "", uuid: UUID = UUID.randomUUID(), ): PersistedNote { - val note = PersistedNote(NoteMetadata(title, tags), markdown = "", content, LocalDateTime.MIN, uuid) + val note = PersistedNote(NoteMetadata(title, tags), markdown = content, html = "", LocalDateTime.MIN, uuid) searcher.indexNote(1, note) return note } @@ -42,6 +42,28 @@ internal class NoteSearcherImplTest { fun dropIndexes() { searcher.dropIndex(1) } + + @Language("markdown") + val markdownSample = + """ + # Apache Lucene Core + + Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java. + It is a technology suitable for nearly any application that requires full-text search, especially cross-platform. + + Apache Lucene is an open source project available for free download. Please use the links on the right to access Lucene. + + # Lucene Features + + Lucene offers powerful features through a simple API: + + ## Scalable, High-Performance Indexing + + * over [150GB/hour on modern hardware](http://home.apache.org/~mikemccand/lucenebench/indexing.html) + * small RAM requirements -- only 1MB heap + * incremental indexing as fast as batch indexing + * index size roughly 20-30% the size of text indexed + """.trimIndent() // endregion @Test @@ -83,29 +105,7 @@ internal class NoteSearcherImplTest { @Test fun `exact content search`() { - @Language("html") - val content = - """ -
-

Apache Lucene Core

-

Apache LuceneTM is a - high-performance, full-featured text search engine library written entirely in Java. - It is a technology suitable for nearly any application that requires full-text search, - especially cross-platform.

-

Apache Lucene is an open source project available for free download. Please use the - links on the right to access Lucene.

-

LuceneTM Features

-

Lucene offers powerful features through a simple API:

-

Scalable, High-Performance Indexing

- - """.trimIndent() - - index("first", content = content) + index("first", content = markdownSample) assertThat(search(content = "fast")) .hasSize(1) @@ -119,29 +119,7 @@ internal class NoteSearcherImplTest { @Test fun `combined search`() { - @Language("html") - val content = - """ -
-

Apache Lucene Core

-

Apache LuceneTM is a - high-performance, full-featured text search engine library written entirely in Java. - It is a technology suitable for nearly any application that requires full-text search, - especially cross-platform.

-

Apache Lucene is an open source project available for free download. Please use the - links on the right to access Lucene.

-

LuceneTM Features

-

Lucene offers powerful features through a simple API:

-

Scalable, High-Performance Indexing

-
    -
  • over 150GB/hour on modern hardware
  • -
  • small RAM requirements -- only 1MB heap
  • -
  • incremental indexing as fast as batch indexing
  • -
  • index size roughly 20-30% the size of text indexed
  • -
- """.trimIndent() - - index("first", content = content, tags = listOf("abc")) + index("first", content = markdownSample, tags = listOf("abc")) assertThat(search(title = "fir", tag = "abc", content = "20")) .hasSize(1) @@ -149,29 +127,7 @@ internal class NoteSearcherImplTest { @Test fun `search all`() { - @Language("html") - val content = - """ -
-

Apache Lucene Core

-

Apache LuceneTM is a - high-performance, full-featured text search engine library written entirely in Java. - It is a technology suitable for nearly any application that requires full-text search, - especially cross-platform.

-

Apache Lucene is an open source project available for free download. Please use the - links on the right to access Lucene.

-

LuceneTM Features

-

Lucene offers powerful features through a simple API:

-

Scalable, High-Performance Indexing

-
    -
  • over 150GB/hour on modern hardware
  • -
  • small RAM requirements -- only 1MB heap
  • -
  • incremental indexing as fast as batch indexing
  • -
  • index size roughly 20-30% the size of text indexed
  • -
- """.trimIndent() - - index("first", content = content, tags = listOf("abc")) + index("first", content = markdownSample, tags = listOf("abc")) assertThat(search(all = "abc", title = "first")) .hasSize(1)