diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-08-19 14:16:00 +0300 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-09-06 01:20:02 +0300 |
commit | 3b4f17bbc9ff789faa581ac278ad109d1ac5b816 (patch) | |
tree | 7b706ad5fce15afa1825b6565bae09bc517cc687 /related/inverted_index_test.go | |
parent | 16c9127663951ace1a3901cf669c49cc72780ced (diff) |
hugolib: Implement "related content"
This closes #98, even if this commit does not do full content text search.
We may revisit that problem in the future, but that deserves its own issue.
Fixes #98
Diffstat (limited to 'related/inverted_index_test.go')
-rw-r--r-- | related/inverted_index_test.go | 276 |
1 files changed, 276 insertions, 0 deletions
diff --git a/related/inverted_index_test.go b/related/inverted_index_test.go new file mode 100644 index 000000000..781a969fb --- /dev/null +++ b/related/inverted_index_test.go @@ -0,0 +1,276 @@ +// Copyright 2017-present The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package related + +import ( + "fmt" + "math/rand" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +type testDoc struct { + keywords map[string][]Keyword + date time.Time +} + +func (k *testDoc) String() string { + s := "\n" + for k, v := range k.keywords { + s += k + ":\t\t" + for _, vv := range v { + s += " " + vv.String() + } + s += "\n" + } + return s +} + +func newTestDoc(name string, keywords ...string) *testDoc { + km := make(map[string][]Keyword) + + time.Sleep(1 * time.Millisecond) + kw := &testDoc{keywords: km, date: time.Now()} + + kw.addKeywords(name, keywords...) + return kw +} + +func (t *testDoc) addKeywords(name string, keywords ...string) *testDoc { + keywordm := createTestKeywords(name, keywords...) + + for k, v := range keywordm { + keywords := make([]Keyword, len(v)) + for i := 0; i < len(v); i++ { + keywords[i] = StringKeyword(v[i]) + } + t.keywords[k] = keywords + } + return t +} + +func createTestKeywords(name string, keywords ...string) map[string][]string { + return map[string][]string{ + name: keywords, + } +} + +func (k *testDoc) SearchKeywords(cfg IndexConfig) ([]Keyword, error) { + return k.keywords[cfg.Name], nil +} + +func (k *testDoc) PubDate() time.Time { + return k.date +} + +func TestSearch(t *testing.T) { + + config := Config{ + Threshold: 90, + IncludeNewer: false, + Indices: IndexConfigs{ + IndexConfig{Name: "tags", Weight: 50}, + IndexConfig{Name: "keywords", Weight: 65}, + }, + } + + idx := NewInvertedIndex(config) + //idx.debug = true + + docs := []Document{ + newTestDoc("tags", "a", "b", "c", "d"), + newTestDoc("tags", "b", "d", "g"), + newTestDoc("tags", "b", "h").addKeywords("keywords", "a"), + newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"), + } + + idx.Add(docs...) + + t.Run("count", func(t *testing.T) { + assert := require.New(t) + assert.Len(idx.index, 2) + set1, found := idx.index["tags"] + assert.True(found) + // 6 tags + assert.Len(set1, 6) + + set2, found := idx.index["keywords"] + assert.True(found) + assert.Len(set2, 2) + + }) + + t.Run("search-tags", func(t *testing.T) { + assert := require.New(t) + m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...)) + assert.NoError(err) + assert.Len(m, 2) + assert.Equal(docs[0], m[0]) + assert.Equal(docs[1], m[1]) + }) + + t.Run("search-tags-and-keywords", func(t *testing.T) { + assert := require.New(t) + m, err := idx.search( + newQueryElement("tags", StringsToKeywords("a", "b", "z")...), + newQueryElement("keywords", StringsToKeywords("a", "b")...)) + assert.NoError(err) + assert.Len(m, 3) + assert.Equal(docs[3], m[0]) + assert.Equal(docs[2], m[1]) + assert.Equal(docs[0], m[2]) + }) + + t.Run("searchdoc-all", func(t *testing.T) { + assert := require.New(t) + doc := newTestDoc("tags", "a").addKeywords("keywords", "a") + m, err := idx.SearchDoc(doc) + assert.NoError(err) + assert.Len(m, 2) + assert.Equal(docs[3], m[0]) + assert.Equal(docs[2], m[1]) + }) + + t.Run("searchdoc-tags", func(t *testing.T) { + assert := require.New(t) + doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b") + m, err := idx.SearchDoc(doc, "tags") + assert.NoError(err) + assert.Len(m, 2) + assert.Equal(docs[0], m[0]) + assert.Equal(docs[1], m[1]) + }) + + t.Run("searchdoc-keywords-date", func(t *testing.T) { + assert := require.New(t) + doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b") + // This will get a date newer than the others. + newDoc := newTestDoc("keywords", "a", "b") + idx.Add(newDoc) + + m, err := idx.SearchDoc(doc, "keywords") + assert.NoError(err) + assert.Len(m, 2) + assert.Equal(docs[3], m[0]) + }) + +} + +func BenchmarkRelatedNewIndex(b *testing.B) { + + pages := make([]*testDoc, 100) + numkeywords := 30 + allKeywords := make([]string, numkeywords) + for i := 0; i < numkeywords; i++ { + allKeywords[i] = fmt.Sprintf("keyword%d", i+1) + } + + for i := 0; i < len(pages); i++ { + start := rand.Intn(len(allKeywords)) + end := start + 3 + if end >= len(allKeywords) { + end = start + 1 + } + + kw := newTestDoc("tags", allKeywords[start:end]...) + if i%5 == 0 { + start := rand.Intn(len(allKeywords)) + end := start + 3 + if end >= len(allKeywords) { + end = start + 1 + } + kw.addKeywords("keywords", allKeywords[start:end]...) + } + + pages[i] = kw + } + + cfg := Config{ + Threshold: 50, + Indices: IndexConfigs{ + IndexConfig{Name: "tags", Weight: 100}, + IndexConfig{Name: "keywords", Weight: 200}, + }, + } + + b.Run("singles", func(b *testing.B) { + for i := 0; i < b.N; i++ { + idx := NewInvertedIndex(cfg) + for _, doc := range pages { + idx.Add(doc) + } + } + }) + + b.Run("all", func(b *testing.B) { + for i := 0; i < b.N; i++ { + idx := NewInvertedIndex(cfg) + docs := make([]Document, len(pages)) + for i := 0; i < len(pages); i++ { + docs[i] = pages[i] + } + idx.Add(docs...) + } + }) + +} + +func BenchmarkRelatedMatchesIn(b *testing.B) { + + q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...) + q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...) + + docs := make([]*testDoc, 1000) + numkeywords := 20 + allKeywords := make([]string, numkeywords) + for i := 0; i < numkeywords; i++ { + allKeywords[i] = fmt.Sprintf("keyword%d", i+1) + } + + cfg := Config{ + Threshold: 20, + Indices: IndexConfigs{ + IndexConfig{Name: "tags", Weight: 100}, + IndexConfig{Name: "keywords", Weight: 200}, + }, + } + + idx := NewInvertedIndex(cfg) + + for i := 0; i < len(docs); i++ { + start := rand.Intn(len(allKeywords)) + end := start + 3 + if end >= len(allKeywords) { + end = start + 1 + } + + index := "tags" + if i%5 == 0 { + index = "keywords" + } + + idx.Add(newTestDoc(index, allKeywords[start:end]...)) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + if i%10 == 0 { + idx.search(q2) + } else { + idx.search(q1) + } + } +} |