From b27fd292304a363b52c5f6daee2555ecec9d2a27 Mon Sep 17 00:00:00 2001
From: Hubert Van De Walle <hubv@protonmail.com>
Date: Fri, 21 Aug 2020 18:03:33 +0200
Subject: [PATCH] Index md instead of html

---
 .../main/kotlin/utils/SearchTermsParser.kt    |  2 +-
 search/src/main/kotlin/Extractors.kt          |  2 +-
 .../src/test/kotlin/NoteSearcherImplTest.kt   | 96 +++++--------------
 3 files changed, 28 insertions(+), 72 deletions(-)
diff --git a/app/src/main/kotlin/utils/SearchTermsParser.kt b/app/src/main/kotlin/utils/SearchTermsParser.kt
index 234ea64..d4c34ca 100644
--- a/app/src/main/kotlin/utils/SearchTermsParser.kt
+++ b/app/src/main/kotlin/utils/SearchTermsParser.kt
@@ -21,7 +21,7 @@ fun parseSearchTerms(input: String): SearchTerms {
         val match = innerRegex.find(input)?.groups?.get(1)?.value
         if (match != null) {
             val group = outerRegex.find(input)?.groups?.get(1)?.value
-            group?.let { c = c.replace(it, "    ") }
+            group?.let { c = c.replace(it, "") }
         }
         return match
     }
diff --git a/search/src/main/kotlin/Extractors.kt b/search/src/main/kotlin/Extractors.kt
index 295a6dd..cd89e0c 100644
--- a/search/src/main/kotlin/Extractors.kt
+++ b/search/src/main/kotlin/Extractors.kt
@@ -19,7 +19,7 @@ internal fun PersistedNote.toDocument(): Document {
         // searchable fields
         add(TextField(titleField, note.meta.title, Field.Store.YES))
         add(TextField(tagsField, TagsFieldConverter.toDoc(note.meta.tags), Field.Store.YES))
-        add(TextField(contentField, note.html, Field.Store.YES))
+        add(TextField(contentField, note.markdown, Field.Store.YES))
     }
 }
 
diff --git a/search/src/test/kotlin/NoteSearcherImplTest.kt b/search/src/test/kotlin/NoteSearcherImplTest.kt
index 3a47840..5fdba99 100644
--- a/search/src/test/kotlin/NoteSearcherImplTest.kt
+++ b/search/src/test/kotlin/NoteSearcherImplTest.kt
@@ -25,7 +25,7 @@ internal class NoteSearcherImplTest {
         content: String = "",
         uuid: UUID = UUID.randomUUID(),
     ): PersistedNote {
-        val note = PersistedNote(NoteMetadata(title, tags), markdown = "", content, LocalDateTime.MIN, uuid)
+        val note = PersistedNote(NoteMetadata(title, tags), markdown = content, html = "", LocalDateTime.MIN, uuid)
         searcher.indexNote(1, note)
         return note
     }
@@ -42,6 +42,28 @@ internal class NoteSearcherImplTest {
     fun dropIndexes() {
         searcher.dropIndex(1)
     }
+
+    @Language("markdown")
+    val markdownSample =
+        """
+            # Apache Lucene Core
+
+            Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java.
+            It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
+
+            Apache Lucene is an open source project available for free download. Please use the links on the right to access Lucene.
+
+            # Lucene Features
+
+            Lucene offers powerful features through a simple API:
+
+            ## Scalable, High-Performance Indexing
+
+            *   over [150GB/hour on modern hardware](http://home.apache.org/~mikemccand/lucenebench/indexing.html)
+            *   small RAM requirements -- only 1MB heap
+            *   incremental indexing as fast as batch indexing
+            *   index size roughly 20-30% the size of text indexed
+            """.trimIndent()
     // endregion
 
     @Test
@@ -83,29 +105,7 @@ internal class NoteSearcherImplTest {
 
     @Test
     fun `exact content search`() {
-        @Language("html")
-        val content =
-            """
-            <div>
-              <h1 class="title">Apache Lucene Core</h1>
-              <p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
-            high-performance, full-featured text search engine library written entirely in Java.
-            It is a technology suitable for nearly any application that requires full-text search,
-            especially cross-platform.</p>
-            <p>Apache Lucene is an open source project available for free download. Please use the
-            links on the right to access Lucene.</p>
-            <h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
-            <p>Lucene offers powerful features through a simple API:</p>
-            <h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
-            <ul>
-            <li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
-            <li>small RAM requirements -- only 1MB heap</li>
-            <li>incremental indexing as fast as batch indexing</li>
-            <li>index size roughly 20-30% the size of text indexed</li>
-            </ul>
-            """.trimIndent()
-
-        index("first", content = content)
+        index("first", content = markdownSample)
 
         assertThat(search(content = "fast"))
             .hasSize(1)
@@ -119,29 +119,7 @@ internal class NoteSearcherImplTest {
 
     @Test
     fun `combined search`() {
-        @Language("html")
-        val content =
-            """
-            <div>
-              <h1 class="title">Apache Lucene Core</h1>
-              <p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
-            high-performance, full-featured text search engine library written entirely in Java.
-            It is a technology suitable for nearly any application that requires full-text search,
-            especially cross-platform.</p>
-            <p>Apache Lucene is an open source project available for free download. Please use the
-            links on the right to access Lucene.</p>
-            <h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
-            <p>Lucene offers powerful features through a simple API:</p>
-            <h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
-            <ul>
-            <li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
-            <li>small RAM requirements -- only 1MB heap</li>
-            <li>incremental indexing as fast as batch indexing</li>
-            <li>index size roughly 20-30% the size of text indexed</li>
-            </ul>
-            """.trimIndent()
-
-        index("first", content = content, tags = listOf("abc"))
+        index("first", content = markdownSample, tags = listOf("abc"))
 
         assertThat(search(title = "fir", tag = "abc", content = "20"))
             .hasSize(1)
@@ -149,29 +127,7 @@ internal class NoteSearcherImplTest {
 
     @Test
     fun `search all`() {
-        @Language("html")
-        val content =
-            """
-            <div>
-              <h1 class="title">Apache Lucene Core</h1>
-              <p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
-            high-performance, full-featured text search engine library written entirely in Java.
-            It is a technology suitable for nearly any application that requires full-text search,
-            especially cross-platform.</p>
-            <p>Apache Lucene is an open source project available for free download. Please use the
-            links on the right to access Lucene.</p>
-            <h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
-            <p>Lucene offers powerful features through a simple API:</p>
-            <h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
-            <ul>
-            <li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
-            <li>small RAM requirements -- only 1MB heap</li>
-            <li>incremental indexing as fast as batch indexing</li>
-            <li>index size roughly 20-30% the size of text indexed</li>
-            </ul>
-            """.trimIndent()
-
-        index("first", content = content, tags = listOf("abc"))
+        index("first", content = markdownSample, tags = listOf("abc"))
 
         assertThat(search(all = "abc", title = "first"))
             .hasSize(1)