diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2016-08-17 07:37:19 +0300 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2016-09-14 11:50:56 +0300 |
commit | 4abaec5c045e92ae5f8b3a2dc66606b080ef6ea5 (patch) | |
tree | cf8c09108526475a9b445a9ba34656a9b17b9494 /helpers | |
parent | bcd434794a28ff75a6e6504c6c3bada554ba88ce (diff) |
Improve TotalWords counter func
It is obviously more efficient when we do not care about the actual words.
```
BenchmarkTotalWords-4 100000 18795 ns/op 0 B/op 0 allocs/op
BenchmarkTotalWordsOld-4 30000 46751 ns/op 6400 B/op 1 allocs/op
```
Diffstat (limited to 'helpers')
-rw-r--r-- | helpers/content.go | 19 | ||||
-rw-r--r-- | helpers/content_test.go | 41 |
2 files changed, 55 insertions, 5 deletions
diff --git a/helpers/content.go b/helpers/content.go index bb7819175..9d35675f7 100644 --- a/helpers/content.go +++ b/helpers/content.go @@ -384,8 +384,25 @@ func RenderBytes(ctx *RenderingContext) []byte { } } -// TotalWords returns an int of the total number of words in a given content. +// TotalWords counts instance of one or more consecutive white space +// characters, as defined by unicode.IsSpace, in s. +// This is a cheaper way of word counting than the obvious len(strings.Fields(s)). func TotalWords(s string) int { + n := 0 + inWord := false + for _, r := range s { + wasInWord := inWord + inWord = !unicode.IsSpace(r) + if inWord && !wasInWord { + n++ + } + } + return n +} + +// Old implementation only kept for benchmark comparison. +// TODO(bep) remove +func totalWordsOld(s string) int { return len(strings.Fields(s)) } diff --git a/helpers/content_test.go b/helpers/content_test.go index 5165a7a26..82af70f8f 100644 --- a/helpers/content_test.go +++ b/helpers/content_test.go @@ -408,12 +408,45 @@ func TestExtractNoTOC(t *testing.T) { } } +var totalWordsBenchmarkString = strings.Repeat("Hugo Rocks ", 200) + func TestTotalWords(t *testing.T) { - testString := "Two, Words!" - actualWordCount := TotalWords(testString) - if actualWordCount != 2 { - t.Errorf("Actual word count (%d) for test string (%s) did not match 2.", actualWordCount, testString) + for i, this := range []struct { + s string + words int + }{ + {"Two, Words!", 2}, + {"Word", 1}, + {"", 0}, + {"One, Two, Three", 3}, + {totalWordsBenchmarkString, 400}, + } { + actualWordCount := TotalWords(this.s) + + if actualWordCount != this.words { + t.Errorf("[%d] Actual word count (%d) for test string (%s) did not match %d", i, actualWordCount, this.s, this.words) + } + } +} + +func BenchmarkTotalWords(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + wordCount := TotalWords(totalWordsBenchmarkString) + if wordCount != 400 { + b.Fatal("Wordcount error") + } + } +} + +func BenchmarkTotalWordsOld(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + wordCount := totalWordsOld(totalWordsBenchmarkString) + if wordCount != 400 { + b.Fatal("Wordcount error") + } } } |