From ffcb4aeb8e392a80da7cad0f1e03a4102efb24ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Mon, 9 Mar 2020 12:04:33 +0100 Subject: Fix handling of HTML files without front matter This means that any HTML file inside /content will be treated as a regular file. If you want it processes with shortcodes and a layout, add front matter. The defintion of an HTML file here is: * File with extension .htm or .html * With first non-whitespace character "<" that isn't a HTML comment. This is in line with the documentation. Fixes #7030 Fixes #7028 See #6789 --- hugofs/files/classifier.go | 78 ++++++++++++++++++++++++++++++++++++++++- hugofs/files/classifier_test.go | 12 +++++++ hugofs/filter_fs.go | 2 +- 3 files changed, 90 insertions(+), 2 deletions(-) (limited to 'hugofs') diff --git a/hugofs/files/classifier.go b/hugofs/files/classifier.go index e8f8241b7..5e26bbac0 100644 --- a/hugofs/files/classifier.go +++ b/hugofs/files/classifier.go @@ -14,10 +14,16 @@ package files import ( + "bufio" + "fmt" + "io" "os" "path/filepath" "sort" "strings" + "unicode" + + "github.com/spf13/afero" ) var ( @@ -32,6 +38,11 @@ var ( "pandoc", "pdc"} contentFileExtensionsSet map[string]bool + + htmlFileExtensions = []string{ + "html", "htm"} + + htmlFileExtensionsSet map[string]bool ) func init() { @@ -39,12 +50,20 @@ func init() { for _, ext := range contentFileExtensions { contentFileExtensionsSet[ext] = true } + htmlFileExtensionsSet = make(map[string]bool) + for _, ext := range htmlFileExtensions { + htmlFileExtensionsSet[ext] = true + } } func IsContentFile(filename string) bool { return contentFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")] } +func IsHTMLFile(filename string) bool { + return htmlFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")] +} + func IsContentExt(ext string) bool { return contentFileExtensionsSet[ext] } @@ -62,10 +81,33 @@ func (c ContentClass) IsBundle() bool { return c == ContentClassLeaf || c == ContentClassBranch } -func ClassifyContentFile(filename string) ContentClass { +func ClassifyContentFile(filename string, open func() (afero.File, error)) ContentClass { if !IsContentFile(filename) { return ContentClassFile } + + if IsHTMLFile(filename) { + // We need to look inside the file. If the first non-whitespace + // character is a "<", then we treat it as a regular file. + // Eearlier we created pages for these files, but that had all sorts + // of troubles, and isn't what it says in the documentation. + // See https://github.com/gohugoio/hugo/issues/7030 + if open == nil { + panic(fmt.Sprintf("no file opener provided for %q", filename)) + } + + f, err := open() + if err != nil { + return ContentClassFile + } + ishtml := isHTMLContent(f) + f.Close() + if ishtml { + return ContentClassFile + } + + } + if strings.HasPrefix(filename, "_index.") { return ContentClassBranch } @@ -77,6 +119,40 @@ func ClassifyContentFile(filename string) ContentClass { return ContentClassContent } +var htmlComment = []rune{'<', '!', '-', '-'} + +func isHTMLContent(r io.Reader) bool { + br := bufio.NewReader(r) + i := 0 + for { + c, _, err := br.ReadRune() + if err != nil { + break + } + + if i > 0 { + if i >= len(htmlComment) { + return false + } + + if c != htmlComment[i] { + return true + } + + i++ + continue + } + + if !unicode.IsSpace(c) { + if i == 0 && c != '<' { + return false + } + i++ + } + } + return true +} + const ( ComponentFolderArchetypes = "archetypes" ComponentFolderStatic = "static" diff --git a/hugofs/files/classifier_test.go b/hugofs/files/classifier_test.go index af188f349..0cd7e4177 100644 --- a/hugofs/files/classifier_test.go +++ b/hugofs/files/classifier_test.go @@ -15,6 +15,7 @@ package files import ( "path/filepath" + "strings" "testing" qt "github.com/frankban/quicktest" @@ -30,6 +31,17 @@ func TestIsContentFile(t *testing.T) { c.Assert(IsContentExt("json"), qt.Equals, false) } +func TestIsHTMLContent(t *testing.T) { + c := qt.New(t) + + c.Assert(isHTMLContent(strings.NewReader(" ")), qt.Equals, true) + c.Assert(isHTMLContent(strings.NewReader("