WordCount Summary support UTF-8 string

author: coderzh <pythonzh@gmail.com> 2015-09-03 13:22:20 +0300
committer: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2015-09-12 16:41:17 +0300
commit: 0e1fd78fb22e8a870ff3a922f36a9a4d0475c090 (patch)
tree: 48a0a74ba70aaf33bcc9f9d1d48576109500ae94 /helpers
parent: c7521b3d672b8d857bfe698f021c498dd27226c9 (diff)
2 files changed, 51 insertions, 12 deletions
diff --git a/helpers/content.go b/helpers/content.go
index 8e3fda505..6bb7ed4d3 100644
--- a/helpers/content.go
+++ b/helpers/content.go
@@ -19,6 +19,7 @@ package helpers
 
 import (
 	"bytes"
+	"unicode/utf8"
 	"html/template"
 	"os/exec"
 
@@ -386,21 +387,57 @@ func TruncateWords(s string, max int) string {
 // and returns entire sentences from content, delimited by the int
 // and whether it's truncated or not.
 func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
-	if max >= len(words) {
-		return strings.Join(words, " "), false
+	count := 0
+	index, word := 0, ""
+	truncated := false
+	
+	for index, word = range words {
+		runeCount := utf8.RuneCountInString(word)
+		if len(word) == runeCount {
+			count++;
+		} else {
+			if count + runeCount <= max {
+				count += runeCount
+			} else {
+				offset := 0
+				for count < max {
+					_, width := utf8.DecodeRuneInString(word[offset:])
+			        offset += width
+					count++
+				}
+				words[index] = word[:offset]
+				truncated = true
+			}
+		}
+		
+		if count >= max {
+			if index < len(words) - 1 {
+				truncated = true	
+			}
+			break
+		}
 	}
-
-	for counter, word := range words[max:] {
-		if strings.HasSuffix(word, ".") ||
-			strings.HasSuffix(word, "?") ||
-			strings.HasSuffix(word, ".\"") ||
-			strings.HasSuffix(word, "!") {
-			upper := max + counter + 1
-			return strings.Join(words[:upper], " "), (upper < len(words))
+	
+	index += 1
+	
+	if index < len(words) {
+		for counter, word := range words[index:] {
+			if len(word) != utf8.RuneCountInString(word) {
+				break
+			}
+			if strings.HasSuffix(word, ".") ||
+				strings.HasSuffix(word, "?") ||
+				strings.HasSuffix(word, ".\"") ||
+				strings.HasSuffix(word, "!") {
+				upper := index + counter + 1
+				return strings.Join(words[:upper], " "), (upper < len(words))
+			}
 		}
+	} else if index > len(words) {
+		return strings.Join(words, " "), truncated
 	}
-
-	return strings.Join(words[:max], " "), true
+	
+	return strings.Join(words[:index], " "), truncated
 }
 
 // GetAsciidocContent calls asciidoctor or asciidoc as an external helper
diff --git a/helpers/content_test.go b/helpers/content_test.go
index 602ca3785..f614011c0 100644
--- a/helpers/content_test.go
+++ b/helpers/content_test.go
@@ -54,6 +54,8 @@ func TestTruncateWordsToWholeSentence(t *testing.T) {
 		{"a b c", "a b c", 12, false},
 		{"a b c", "a b c", 3, false},
 		{"a", "a", 1, false},
+		{"Hello 中国", "Hello 中", 2, true},
+		{"Hello 中国", "Hello 中国", 3, false},
 		{"This is a sentence.", "This is a sentence.", 5, false},
 		{"This is also a sentence!", "This is also a sentence!", 1, false},
 		{"To be. Or not to be. That's the question.", "To be.", 1, true},
author	coderzh <pythonzh@gmail.com>	2015-09-03 13:22:20 +0300
committer	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2015-09-12 16:41:17 +0300
commit	0e1fd78fb22e8a870ff3a922f36a9a4d0475c090 (patch)
tree	48a0a74ba70aaf33bcc9f9d1d48576109500ae94 /helpers
parent	c7521b3d672b8d857bfe698f021c498dd27226c9 (diff)