Index md instead of html

This commit is contained in:
Hubert Van De Walle 2020-08-21 18:03:33 +02:00
parent c02f7c039a
commit b27fd29230
3 changed files with 28 additions and 72 deletions

View File

@ -21,7 +21,7 @@ fun parseSearchTerms(input: String): SearchTerms {
val match = innerRegex.find(input)?.groups?.get(1)?.value val match = innerRegex.find(input)?.groups?.get(1)?.value
if (match != null) { if (match != null) {
val group = outerRegex.find(input)?.groups?.get(1)?.value val group = outerRegex.find(input)?.groups?.get(1)?.value
group?.let { c = c.replace(it, " ") } group?.let { c = c.replace(it, "") }
} }
return match return match
} }

View File

@ -19,7 +19,7 @@ internal fun PersistedNote.toDocument(): Document {
// searchable fields // searchable fields
add(TextField(titleField, note.meta.title, Field.Store.YES)) add(TextField(titleField, note.meta.title, Field.Store.YES))
add(TextField(tagsField, TagsFieldConverter.toDoc(note.meta.tags), Field.Store.YES)) add(TextField(tagsField, TagsFieldConverter.toDoc(note.meta.tags), Field.Store.YES))
add(TextField(contentField, note.html, Field.Store.YES)) add(TextField(contentField, note.markdown, Field.Store.YES))
} }
} }

View File

@ -25,7 +25,7 @@ internal class NoteSearcherImplTest {
content: String = "", content: String = "",
uuid: UUID = UUID.randomUUID(), uuid: UUID = UUID.randomUUID(),
): PersistedNote { ): PersistedNote {
val note = PersistedNote(NoteMetadata(title, tags), markdown = "", content, LocalDateTime.MIN, uuid) val note = PersistedNote(NoteMetadata(title, tags), markdown = content, html = "", LocalDateTime.MIN, uuid)
searcher.indexNote(1, note) searcher.indexNote(1, note)
return note return note
} }
@ -42,6 +42,28 @@ internal class NoteSearcherImplTest {
fun dropIndexes() { fun dropIndexes() {
searcher.dropIndex(1) searcher.dropIndex(1)
} }
@Language("markdown")
val markdownSample =
"""
# Apache Lucene Core
Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java.
It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
Apache Lucene is an open source project available for free download. Please use the links on the right to access Lucene.
# Lucene Features
Lucene offers powerful features through a simple API:
## Scalable, High-Performance Indexing
* over [150GB/hour on modern hardware](http://home.apache.org/~mikemccand/lucenebench/indexing.html)
* small RAM requirements -- only 1MB heap
* incremental indexing as fast as batch indexing
* index size roughly 20-30% the size of text indexed
""".trimIndent()
// endregion // endregion
@Test @Test
@ -83,29 +105,7 @@ internal class NoteSearcherImplTest {
@Test @Test
fun `exact content search`() { fun `exact content search`() {
@Language("html") index("first", content = markdownSample)
val content =
"""
<div>
<h1 class="title">Apache Lucene Core</h1>
<p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
high-performance, full-featured text search engine library written entirely in Java.
It is a technology suitable for nearly any application that requires full-text search,
especially cross-platform.</p>
<p>Apache Lucene is an open source project available for free download. Please use the
links on the right to access Lucene.</p>
<h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
<p>Lucene offers powerful features through a simple API:</p>
<h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
<ul>
<li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
<li>small RAM requirements -- only 1MB heap</li>
<li>incremental indexing as fast as batch indexing</li>
<li>index size roughly 20-30% the size of text indexed</li>
</ul>
""".trimIndent()
index("first", content = content)
assertThat(search(content = "fast")) assertThat(search(content = "fast"))
.hasSize(1) .hasSize(1)
@ -119,29 +119,7 @@ internal class NoteSearcherImplTest {
@Test @Test
fun `combined search`() { fun `combined search`() {
@Language("html") index("first", content = markdownSample, tags = listOf("abc"))
val content =
"""
<div>
<h1 class="title">Apache Lucene Core</h1>
<p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
high-performance, full-featured text search engine library written entirely in Java.
It is a technology suitable for nearly any application that requires full-text search,
especially cross-platform.</p>
<p>Apache Lucene is an open source project available for free download. Please use the
links on the right to access Lucene.</p>
<h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
<p>Lucene offers powerful features through a simple API:</p>
<h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
<ul>
<li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
<li>small RAM requirements -- only 1MB heap</li>
<li>incremental indexing as fast as batch indexing</li>
<li>index size roughly 20-30% the size of text indexed</li>
</ul>
""".trimIndent()
index("first", content = content, tags = listOf("abc"))
assertThat(search(title = "fir", tag = "abc", content = "20")) assertThat(search(title = "fir", tag = "abc", content = "20"))
.hasSize(1) .hasSize(1)
@ -149,29 +127,7 @@ internal class NoteSearcherImplTest {
@Test @Test
fun `search all`() { fun `search all`() {
@Language("html") index("first", content = markdownSample, tags = listOf("abc"))
val content =
"""
<div>
<h1 class="title">Apache Lucene Core</h1>
<p>Apache Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> is a
high-performance, full-featured text search engine library written entirely in Java.
It is a technology suitable for nearly any application that requires full-text search,
especially cross-platform.</p>
<p>Apache Lucene is an open source project available for free download. Please use the
links on the right to access Lucene.</p>
<h1 id="lucenetm-features">Lucene<span style="vertical-align: super; font-size: xx-small">TM</span> Features</h1>
<p>Lucene offers powerful features through a simple API:</p>
<h2 id="scalable-high-performance-indexing">Scalable, High-Performance Indexing</h2>
<ul>
<li>over <a href="http://home.apache.org/~mikemccand/lucenebench/indexing.html">150GB/hour on modern hardware</a></li>
<li>small RAM requirements -- only 1MB heap</li>
<li>incremental indexing as fast as batch indexing</li>
<li>index size roughly 20-30% the size of text indexed</li>
</ul>
""".trimIndent()
index("first", content = content, tags = listOf("abc"))
assertThat(search(all = "abc", title = "first")) assertThat(search(all = "abc", title = "first"))
.hasSize(1) .hasSize(1)