Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/gohugoio/hugo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2017-08-19 14:16:00 +0300
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2017-09-06 01:20:02 +0300
commit3b4f17bbc9ff789faa581ac278ad109d1ac5b816 (patch)
tree7b706ad5fce15afa1825b6565bae09bc517cc687 /related/inverted_index_test.go
parent16c9127663951ace1a3901cf669c49cc72780ced (diff)
hugolib: Implement "related content"
This closes #98, even if this commit does not do full content text search. We may revisit that problem in the future, but that deserves its own issue. Fixes #98
Diffstat (limited to 'related/inverted_index_test.go')
-rw-r--r--related/inverted_index_test.go276
1 files changed, 276 insertions, 0 deletions
diff --git a/related/inverted_index_test.go b/related/inverted_index_test.go
new file mode 100644
index 000000000..781a969fb
--- /dev/null
+++ b/related/inverted_index_test.go
@@ -0,0 +1,276 @@
+// Copyright 2017-present The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package related
+
+import (
+ "fmt"
+ "math/rand"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/require"
+)
+
+type testDoc struct {
+ keywords map[string][]Keyword
+ date time.Time
+}
+
+func (k *testDoc) String() string {
+ s := "\n"
+ for k, v := range k.keywords {
+ s += k + ":\t\t"
+ for _, vv := range v {
+ s += " " + vv.String()
+ }
+ s += "\n"
+ }
+ return s
+}
+
+func newTestDoc(name string, keywords ...string) *testDoc {
+ km := make(map[string][]Keyword)
+
+ time.Sleep(1 * time.Millisecond)
+ kw := &testDoc{keywords: km, date: time.Now()}
+
+ kw.addKeywords(name, keywords...)
+ return kw
+}
+
+func (t *testDoc) addKeywords(name string, keywords ...string) *testDoc {
+ keywordm := createTestKeywords(name, keywords...)
+
+ for k, v := range keywordm {
+ keywords := make([]Keyword, len(v))
+ for i := 0; i < len(v); i++ {
+ keywords[i] = StringKeyword(v[i])
+ }
+ t.keywords[k] = keywords
+ }
+ return t
+}
+
+func createTestKeywords(name string, keywords ...string) map[string][]string {
+ return map[string][]string{
+ name: keywords,
+ }
+}
+
+func (k *testDoc) SearchKeywords(cfg IndexConfig) ([]Keyword, error) {
+ return k.keywords[cfg.Name], nil
+}
+
+func (k *testDoc) PubDate() time.Time {
+ return k.date
+}
+
+func TestSearch(t *testing.T) {
+
+ config := Config{
+ Threshold: 90,
+ IncludeNewer: false,
+ Indices: IndexConfigs{
+ IndexConfig{Name: "tags", Weight: 50},
+ IndexConfig{Name: "keywords", Weight: 65},
+ },
+ }
+
+ idx := NewInvertedIndex(config)
+ //idx.debug = true
+
+ docs := []Document{
+ newTestDoc("tags", "a", "b", "c", "d"),
+ newTestDoc("tags", "b", "d", "g"),
+ newTestDoc("tags", "b", "h").addKeywords("keywords", "a"),
+ newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"),
+ }
+
+ idx.Add(docs...)
+
+ t.Run("count", func(t *testing.T) {
+ assert := require.New(t)
+ assert.Len(idx.index, 2)
+ set1, found := idx.index["tags"]
+ assert.True(found)
+ // 6 tags
+ assert.Len(set1, 6)
+
+ set2, found := idx.index["keywords"]
+ assert.True(found)
+ assert.Len(set2, 2)
+
+ })
+
+ t.Run("search-tags", func(t *testing.T) {
+ assert := require.New(t)
+ m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...))
+ assert.NoError(err)
+ assert.Len(m, 2)
+ assert.Equal(docs[0], m[0])
+ assert.Equal(docs[1], m[1])
+ })
+
+ t.Run("search-tags-and-keywords", func(t *testing.T) {
+ assert := require.New(t)
+ m, err := idx.search(
+ newQueryElement("tags", StringsToKeywords("a", "b", "z")...),
+ newQueryElement("keywords", StringsToKeywords("a", "b")...))
+ assert.NoError(err)
+ assert.Len(m, 3)
+ assert.Equal(docs[3], m[0])
+ assert.Equal(docs[2], m[1])
+ assert.Equal(docs[0], m[2])
+ })
+
+ t.Run("searchdoc-all", func(t *testing.T) {
+ assert := require.New(t)
+ doc := newTestDoc("tags", "a").addKeywords("keywords", "a")
+ m, err := idx.SearchDoc(doc)
+ assert.NoError(err)
+ assert.Len(m, 2)
+ assert.Equal(docs[3], m[0])
+ assert.Equal(docs[2], m[1])
+ })
+
+ t.Run("searchdoc-tags", func(t *testing.T) {
+ assert := require.New(t)
+ doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
+ m, err := idx.SearchDoc(doc, "tags")
+ assert.NoError(err)
+ assert.Len(m, 2)
+ assert.Equal(docs[0], m[0])
+ assert.Equal(docs[1], m[1])
+ })
+
+ t.Run("searchdoc-keywords-date", func(t *testing.T) {
+ assert := require.New(t)
+ doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
+ // This will get a date newer than the others.
+ newDoc := newTestDoc("keywords", "a", "b")
+ idx.Add(newDoc)
+
+ m, err := idx.SearchDoc(doc, "keywords")
+ assert.NoError(err)
+ assert.Len(m, 2)
+ assert.Equal(docs[3], m[0])
+ })
+
+}
+
+func BenchmarkRelatedNewIndex(b *testing.B) {
+
+ pages := make([]*testDoc, 100)
+ numkeywords := 30
+ allKeywords := make([]string, numkeywords)
+ for i := 0; i < numkeywords; i++ {
+ allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
+ }
+
+ for i := 0; i < len(pages); i++ {
+ start := rand.Intn(len(allKeywords))
+ end := start + 3
+ if end >= len(allKeywords) {
+ end = start + 1
+ }
+
+ kw := newTestDoc("tags", allKeywords[start:end]...)
+ if i%5 == 0 {
+ start := rand.Intn(len(allKeywords))
+ end := start + 3
+ if end >= len(allKeywords) {
+ end = start + 1
+ }
+ kw.addKeywords("keywords", allKeywords[start:end]...)
+ }
+
+ pages[i] = kw
+ }
+
+ cfg := Config{
+ Threshold: 50,
+ Indices: IndexConfigs{
+ IndexConfig{Name: "tags", Weight: 100},
+ IndexConfig{Name: "keywords", Weight: 200},
+ },
+ }
+
+ b.Run("singles", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ idx := NewInvertedIndex(cfg)
+ for _, doc := range pages {
+ idx.Add(doc)
+ }
+ }
+ })
+
+ b.Run("all", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ idx := NewInvertedIndex(cfg)
+ docs := make([]Document, len(pages))
+ for i := 0; i < len(pages); i++ {
+ docs[i] = pages[i]
+ }
+ idx.Add(docs...)
+ }
+ })
+
+}
+
+func BenchmarkRelatedMatchesIn(b *testing.B) {
+
+ q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...)
+ q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...)
+
+ docs := make([]*testDoc, 1000)
+ numkeywords := 20
+ allKeywords := make([]string, numkeywords)
+ for i := 0; i < numkeywords; i++ {
+ allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
+ }
+
+ cfg := Config{
+ Threshold: 20,
+ Indices: IndexConfigs{
+ IndexConfig{Name: "tags", Weight: 100},
+ IndexConfig{Name: "keywords", Weight: 200},
+ },
+ }
+
+ idx := NewInvertedIndex(cfg)
+
+ for i := 0; i < len(docs); i++ {
+ start := rand.Intn(len(allKeywords))
+ end := start + 3
+ if end >= len(allKeywords) {
+ end = start + 1
+ }
+
+ index := "tags"
+ if i%5 == 0 {
+ index = "keywords"
+ }
+
+ idx.Add(newTestDoc(index, allKeywords[start:end]...))
+ }
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ if i%10 == 0 {
+ idx.search(q2)
+ } else {
+ idx.search(q1)
+ }
+ }
+}