diff options
author | coderzh <pythonzh@gmail.com> | 2015-09-03 13:22:20 +0300 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2015-09-12 16:41:17 +0300 |
commit | 0e1fd78fb22e8a870ff3a922f36a9a4d0475c090 (patch) | |
tree | 48a0a74ba70aaf33bcc9f9d1d48576109500ae94 /helpers | |
parent | c7521b3d672b8d857bfe698f021c498dd27226c9 (diff) |
WordCount Summary support UTF-8 string
Diffstat (limited to 'helpers')
-rw-r--r-- | helpers/content.go | 61 | ||||
-rw-r--r-- | helpers/content_test.go | 2 |
2 files changed, 51 insertions, 12 deletions
diff --git a/helpers/content.go b/helpers/content.go index 8e3fda505..6bb7ed4d3 100644 --- a/helpers/content.go +++ b/helpers/content.go @@ -19,6 +19,7 @@ package helpers import ( "bytes" + "unicode/utf8" "html/template" "os/exec" @@ -386,21 +387,57 @@ func TruncateWords(s string, max int) string { // and returns entire sentences from content, delimited by the int // and whether it's truncated or not. func TruncateWordsToWholeSentence(words []string, max int) (string, bool) { - if max >= len(words) { - return strings.Join(words, " "), false + count := 0 + index, word := 0, "" + truncated := false + + for index, word = range words { + runeCount := utf8.RuneCountInString(word) + if len(word) == runeCount { + count++; + } else { + if count + runeCount <= max { + count += runeCount + } else { + offset := 0 + for count < max { + _, width := utf8.DecodeRuneInString(word[offset:]) + offset += width + count++ + } + words[index] = word[:offset] + truncated = true + } + } + + if count >= max { + if index < len(words) - 1 { + truncated = true + } + break + } } - - for counter, word := range words[max:] { - if strings.HasSuffix(word, ".") || - strings.HasSuffix(word, "?") || - strings.HasSuffix(word, ".\"") || - strings.HasSuffix(word, "!") { - upper := max + counter + 1 - return strings.Join(words[:upper], " "), (upper < len(words)) + + index += 1 + + if index < len(words) { + for counter, word := range words[index:] { + if len(word) != utf8.RuneCountInString(word) { + break + } + if strings.HasSuffix(word, ".") || + strings.HasSuffix(word, "?") || + strings.HasSuffix(word, ".\"") || + strings.HasSuffix(word, "!") { + upper := index + counter + 1 + return strings.Join(words[:upper], " "), (upper < len(words)) + } } + } else if index > len(words) { + return strings.Join(words, " "), truncated } - - return strings.Join(words[:max], " "), true + + return strings.Join(words[:index], " "), truncated } // GetAsciidocContent calls asciidoctor or asciidoc as an external helper diff --git a/helpers/content_test.go b/helpers/content_test.go index 602ca3785..f614011c0 100644 --- a/helpers/content_test.go +++ b/helpers/content_test.go @@ -54,6 +54,8 @@ func TestTruncateWordsToWholeSentence(t *testing.T) { {"a b c", "a b c", 12, false}, {"a b c", "a b c", 3, false}, {"a", "a", 1, false}, + {"Hello 中国", "Hello 中", 2, true}, + {"Hello 中国", "Hello 中国", 3, false}, {"This is a sentence.", "This is a sentence.", 5, false}, {"This is also a sentence!", "This is also a sentence!", 1, false}, {"To be. Or not to be. That's the question.", "To be.", 1, true}, |