Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/CaiJimmy/hugo-theme-stack.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJimmy Cai <github@jimmycai.com>2022-01-19 01:19:30 +0300
committerGitHub <noreply@github.com>2022-01-19 01:19:30 +0300
commit4764a92df3f929f98741e59cee79fe90add523d3 (patch)
treee23412a2050d3fe1e6e5982c8cddb367baf889eb
parent86cbc1b682c999f62708bb3aea82615e862c7f95 (diff)
refactor(search): avoid issue with one character keyword (#447)
* refactor(search): avoid issue with one character keyword closes https://github.com/CaiJimmy/hugo-theme-stack/issues/184 * Remove keyword sorts * fix typo: secion -> section * fix(search): avoid matching html entity * Use | operator to concatenate keywords Idea from https://github.com/CaiJimmy/hugo-theme-stack/pull/436 * Add missing `matchCount` * Limit preview length * Don't add ellipsis to title * add comment to `processMatches` * Initialize DOMParser only once * Remove marker function * Deal with blank search * Use const keyword for constant arrays
-rw-r--r--assets/jsconfig.json2
-rw-r--r--assets/ts/search.tsx164
2 files changed, 115 insertions, 51 deletions
diff --git a/assets/jsconfig.json b/assets/jsconfig.json
index 9321136..040177a 100644
--- a/assets/jsconfig.json
+++ b/assets/jsconfig.json
@@ -6,5 +6,7 @@
"*"
]
},
+ "lib": ["es2020", "dom"],
+ "jsx": "preserve"
}
} \ No newline at end of file
diff --git a/assets/ts/search.tsx b/assets/ts/search.tsx
index 8e4eb6f..68db7b3 100644
--- a/assets/ts/search.tsx
+++ b/assets/ts/search.tsx
@@ -8,6 +8,11 @@ interface pageData {
matchCount: number
}
+interface match {
+ start: number,
+ end: number
+}
+
/**
* Escape HTML tags as HTML entities
* Edited from:
@@ -53,79 +58,131 @@ class Search {
this.bindSearchForm();
}
- private async searchKeywords(keywords: string[]) {
- const rawData = await this.getData();
- let results: pageData[] = [];
-
- /// Sort keywords by their length
- keywords.sort((a, b) => {
- return b.length - a.length
+ /**
+ * Processes search matches
+ * @param str original text
+ * @param matches array of matches
+ * @param ellipsis whether to add ellipsis to the end of each match
+ * @param charLimit max length of preview string
+ * @param offset how many characters before and after the match to include in preview
+ * @returns preview string
+ */
+ private static processMatches(str: string, matches: match[], ellipsis: boolean = true, charLimit = 140, offset = 20): string {
+ matches.sort((a, b) => {
+ return a.start - b.start;
});
- for (const item of rawData) {
- let result = {
- ...item,
- preview: '',
- matchCount: 0
+ let i = 0,
+ lastIndex = 0,
+ charCount = 0;
+
+ const resultArray: string[] = [];
+
+ while (i < matches.length) {
+ const item = matches[i];
+
+ /// item.start >= lastIndex (equal only for the first iteration)
+ /// because of the while loop that comes after, iterating over variable j
+
+ if (ellipsis && item.start - offset > lastIndex) {
+ resultArray.push(`${replaceHTMLEnt(str.substring(lastIndex, lastIndex + offset))} [...] `);
+ resultArray.push(`${replaceHTMLEnt(str.substring(item.start - offset, item.start))}`);
+ charCount += offset * 2;
+ }
+ else {
+ /// If the match is too close to the end of last match, don't add ellipsis
+ resultArray.push(replaceHTMLEnt(str.substring(lastIndex, item.start)));
+ charCount += item.start - lastIndex;
}
- let matched = false;
+ let j = i + 1,
+ end = item.end;
- for (const keyword of keywords) {
- if (keyword === '') continue;
+ /// Include as many matches as possible
+ /// [item.start, end] is the range of the match
+ while (j < matches.length && matches[j].start <= end) {
+ end = Math.max(matches[j].end, end);
+ ++j;
+ }
- const regex = new RegExp(escapeRegExp(replaceHTMLEnt(keyword)), 'gi');
+ resultArray.push(`<mark>${replaceHTMLEnt(str.substring(item.start, end))}</mark>`);
+ charCount += end - item.start;
- const contentMatch = regex.exec(result.content);
- regex.lastIndex = 0; /// Reset regex
+ i = j;
+ lastIndex = end;
- const titleMatch = regex.exec(result.title);
- regex.lastIndex = 0; /// Reset regex
+ if (ellipsis && charCount > charLimit) break;
+ }
- if (titleMatch) {
- result.title = result.title.replace(regex, Search.marker);
- }
+ /// Add the rest of the string
+ if (lastIndex < str.length) {
+ let end = str.length;
+ if (ellipsis) end = Math.min(end, lastIndex + offset);
- if (titleMatch || contentMatch) {
- matched = true;
- ++result.matchCount;
+ resultArray.push(`${replaceHTMLEnt(str.substring(lastIndex, end))}`);
- let start = 0,
- end = 100;
+ if (ellipsis && end != str.length) {
+ resultArray.push(` [...]`);
+ }
+ }
- if (contentMatch) {
- start = contentMatch.index - 20;
- end = contentMatch.index + 80
+ return resultArray.join('');
+ }
- if (start < 0) start = 0;
- }
+ private async searchKeywords(keywords: string[]) {
+ const rawData = await this.getData();
+ const results: pageData[] = [];
- if (result.preview.indexOf(keyword) !== -1) {
- result.preview = result.preview.replace(regex, Search.marker);
- }
- else {
- if (start !== 0) result.preview += `[...] `;
- result.preview += `${result.content.slice(start, end).replace(regex, Search.marker)} `;
- }
- }
+ const regex = new RegExp(keywords.filter((v, index, arr) => {
+ arr[index] = escapeRegExp(v);
+ return v.trim() !== '';
+ }).join('|'), 'gi');
+
+ for (const item of rawData) {
+ const titleMatches: match[] = [],
+ contentMatches: match[] = [];
+
+ let result = {
+ ...item,
+ preview: '',
+ matchCount: 0
}
- if (matched) {
- result.preview += '[...]';
- results.push(result);
+ const contentMatchAll = item.content.matchAll(regex);
+ for (const match of Array.from(contentMatchAll)) {
+ contentMatches.push({
+ start: match.index,
+ end: match.index + match[0].length
+ });
}
+
+ const titleMatchAll = item.title.matchAll(regex);
+ for (const match of Array.from(titleMatchAll)) {
+ titleMatches.push({
+ start: match.index,
+ end: match.index + match[0].length
+ });
+ }
+
+ if (titleMatches.length > 0) result.title = Search.processMatches(result.title, titleMatches, false);
+ if (contentMatches.length > 0) {
+ result.preview = Search.processMatches(result.content, contentMatches);
+ }
+ else {
+ /// If there are no matches in the content, use the first 140 characters as preview
+ result.preview = replaceHTMLEnt(result.content.substring(0, 140));
+ }
+
+ result.matchCount = titleMatches.length + contentMatches.length;
+ if (result.matchCount > 0) results.push(result);
}
- /** Result with more matches appears first */
+ /// Result with more matches appears first
return results.sort((a, b) => {
return b.matchCount - a.matchCount;
});
}
- public static marker(match) {
- return '<mark>' + match + '</mark>';
- }
-
private async doSearch(keywords: string[]) {
const startTime = performance.now();
@@ -150,6 +207,11 @@ class Search {
/// Not fetched yet
const jsonURL = this.form.dataset.json;
this.data = await fetch(jsonURL).then(res => res.json());
+ const parser = new DOMParser();
+
+ for (const item of this.data) {
+ item.content = parser.parseFromString(item.content, 'text/html').body.innerText;
+ }
}
return this.data;
@@ -160,7 +222,7 @@ class Search {
const eventHandler = (e) => {
e.preventDefault();
- const keywords = this.input.value;
+ const keywords = this.input.value.trim();
Search.updateQueryString(keywords, true);
@@ -225,7 +287,7 @@ class Search {
<a href={item.permalink}>
<div class="article-details">
<h2 class="article-title" dangerouslySetInnerHTML={{ __html: item.title }}></h2>
- <secion class="article-preview" dangerouslySetInnerHTML={{ __html: item.preview }}></secion>
+ <section class="article-preview" dangerouslySetInnerHTML={{ __html: item.preview }}></section>
</div>
{item.image &&
<div class="article-image">