diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2022-05-25 11:56:14 +0300 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2022-05-25 18:55:23 +0300 |
commit | 3854a6fa6c323d1c09aa71a0626c9eef62709294 (patch) | |
tree | ea3727c14f73fb73aef89d43795dd6d6f75f1220 /tpl/template.go | |
parent | cd0112a05a9ddb7043c9808284f93d8099c48473 (diff) |
Fix Plainify edge cases
This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package.
It's a little slower, but correctness is more important:
```bash
BenchmarkStripHTMLOld-10 680316 1764 ns/op 728 B/op 4 allocs/op
BenchmarkStripHTMLNew-10 384520 3099 ns/op 2089 B/op 10 allocs/op
```
Fixes #9199
Fixes #9909
Closes #9410
Diffstat (limited to 'tpl/template.go')
-rw-r--r-- | tpl/template.go | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/tpl/template.go b/tpl/template.go index 299b7208d..738750de7 100644 --- a/tpl/template.go +++ b/tpl/template.go @@ -18,9 +18,14 @@ import ( "io" "reflect" "regexp" + "strings" + "unicode" + + bp "github.com/gohugoio/hugo/bufferpool" "github.com/gohugoio/hugo/output" + htmltemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/htmltemplate" texttemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate" ) @@ -163,3 +168,44 @@ func GetHasLockFromContext(ctx context.Context) bool { func SetHasLockInContext(ctx context.Context, hasLock bool) context.Context { return context.WithValue(ctx, texttemplate.HasLockContextKey, hasLock) } + +const hugoNewLinePlaceholder = "___hugonl_" + +var ( + stripHTMLReplacerPre = strings.NewReplacer("\n", " ", "</p>", hugoNewLinePlaceholder, "<br>", hugoNewLinePlaceholder, "<br />", hugoNewLinePlaceholder) + whitespaceRe = regexp.MustCompile(`\s+`) +) + +// StripHTML strips out all HTML tags in s. +func StripHTML(s string) string { + // Shortcut strings with no tags in them + if !strings.ContainsAny(s, "<>") { + return s + } + + pre := stripHTMLReplacerPre.Replace(s) + preReplaced := pre != s + + s = htmltemplate.StripTags(pre) + + if preReplaced { + s = strings.ReplaceAll(s, hugoNewLinePlaceholder, "\n") + } + + var wasSpace bool + b := bp.GetBuffer() + defer bp.PutBuffer(b) + for _, r := range s { + isSpace := unicode.IsSpace(r) + if !(isSpace && wasSpace) { + b.WriteRune(r) + } + wasSpace = isSpace + } + + if b.Len() > 0 { + s = b.String() + } + + return s +} |