Index md instead of html

2020-08-21 18:03:33 +02:00 · 2020-08-21 18:03:33 +02:00 · b27fd29230
commit b27fd29230
parent c02f7c039a
3 changed files with 28 additions and 72 deletions
--- a/search/src/main/kotlin/Extractors.kt
+++ b/search/src/main/kotlin/Extractors.kt
@ -19,7 +19,7 @@ internal fun PersistedNote.toDocument(): Document {
        // searchable fields
        add(TextField(titleField, note.meta.title, Field.Store.YES))
        add(TextField(tagsField, TagsFieldConverter.toDoc(note.meta.tags), Field.Store.YES))
-        add(TextField(contentField, note.html, Field.Store.YES))
+        add(TextField(contentField, note.markdown, Field.Store.YES))
    }
 }
--- a/search/src/test/kotlin/NoteSearcherImplTest.kt
+++ b/search/src/test/kotlin/NoteSearcherImplTest.kt
@ -25,7 +25,7 @@ internal class NoteSearcherImplTest {
        content: String = "",
        uuid: UUID = UUID.randomUUID(),
    ): PersistedNote {
-        val note = PersistedNote(NoteMetadata(title, tags), markdown = "", content, LocalDateTime.MIN, uuid)
+        val note = PersistedNote(NoteMetadata(title, tags), markdown = content, html = "", LocalDateTime.MIN, uuid)
        searcher.indexNote(1, note)
        return note
    }
@ -42,6 +42,28 @@ internal class NoteSearcherImplTest {
    fun dropIndexes() {
        searcher.dropIndex(1)
    }
    @Language("markdown")
    val markdownSample =
        """
            # Apache Lucene Core
            Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java.
            It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
            Apache Lucene is an open source project available for free download. Please use the links on the right to access Lucene.
            # Lucene Features
            Lucene offers powerful features through a simple API:
            ## Scalable, High-Performance Indexing
            *   over [150GB/hour on modern hardware](http://home.apache.org/~mikemccand/lucenebench/indexing.html)
            *   small RAM requirements -- only 1MB heap
            *   incremental indexing as fast as batch indexing
            *   index size roughly 20-30% the size of text indexed
            """.trimIndent()
    // endregion
    @Test
@ -83,29 +105,7 @@ internal class NoteSearcherImplTest {
    @Test
    fun `exact content search`() {
-        @Language("html")
+        index("first", content = markdownSample)
        val content =
            """
            <div>
              <h1 class="title">Apache Lucene Core</h1>
              <p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
            high-performance, full-featured text search engine library written entirely in Java.
            It is a technology suitable for nearly any application that requires full-text search,
            especially cross-platform.</p>
            <p>Apache Lucene is an open source project available for free download. Please use the
            links on the right to access Lucene.</p>
            <h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
            <p>Lucene offers powerful features through a simple API:</p>
            <h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
            <ul>
            <li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
            <li>small RAM requirements -- only 1MB heap</li>
            <li>incremental indexing as fast as batch indexing</li>
            <li>index size roughly 20-30% the size of text indexed</li>
            </ul>
            """.trimIndent()
        index("first", content = content)
        assertThat(search(content = "fast"))
            .hasSize(1)
@ -119,29 +119,7 @@ internal class NoteSearcherImplTest {
    @Test
    fun `combined search`() {
-        @Language("html")
+        index("first", content = markdownSample, tags = listOf("abc"))
        val content =
            """
            <div>
              <h1 class="title">Apache Lucene Core</h1>
              <p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
            high-performance, full-featured text search engine library written entirely in Java.
            It is a technology suitable for nearly any application that requires full-text search,
            especially cross-platform.</p>
            <p>Apache Lucene is an open source project available for free download. Please use the
            links on the right to access Lucene.</p>
            <h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
            <p>Lucene offers powerful features through a simple API:</p>
            <h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
            <ul>
            <li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
            <li>small RAM requirements -- only 1MB heap</li>
            <li>incremental indexing as fast as batch indexing</li>
            <li>index size roughly 20-30% the size of text indexed</li>
            </ul>
            """.trimIndent()
        index("first", content = content, tags = listOf("abc"))
        assertThat(search(title = "fir", tag = "abc", content = "20"))
            .hasSize(1)
@ -149,29 +127,7 @@ internal class NoteSearcherImplTest {
    @Test
    fun `search all`() {
-        @Language("html")
+        index("first", content = markdownSample, tags = listOf("abc"))
        val content =
            """
            <div>
              <h1 class="title">Apache Lucene Core</h1>
              <p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
            high-performance, full-featured text search engine library written entirely in Java.
            It is a technology suitable for nearly any application that requires full-text search,
            especially cross-platform.</p>
            <p>Apache Lucene is an open source project available for free download. Please use the
            links on the right to access Lucene.</p>
            <h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
            <p>Lucene offers powerful features through a simple API:</p>
            <h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
            <ul>
            <li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
            <li>small RAM requirements -- only 1MB heap</li>
            <li>incremental indexing as fast as batch indexing</li>
            <li>index size roughly 20-30% the size of text indexed</li>
            </ul>
            """.trimIndent()
        index("first", content = content, tags = listOf("abc"))
        assertThat(search(all = "abc", title = "first"))
            .hasSize(1)