diff options
Diffstat (limited to 'internal/redirects/matching.go')
-rw-r--r-- | internal/redirects/matching.go | 107 |
1 files changed, 76 insertions, 31 deletions
diff --git a/internal/redirects/matching.go b/internal/redirects/matching.go index d52863ee..e5cb8a48 100644 --- a/internal/redirects/matching.go +++ b/internal/redirects/matching.go @@ -2,6 +2,7 @@ package redirects import ( "fmt" + "net/url" "regexp" "strings" @@ -12,11 +13,12 @@ import ( ) var ( - regexMultipleSlashes = regexp.MustCompile(`//+`) + regexMultipleSlashes = regexp.MustCompile(`([^:])//+`) regexPlaceholderOrSplats = regexp.MustCompile(`(?i)\*|:[a-z]+`) ) // matchesRule returns `true` if the rule's "from" pattern matches the requested URL. +// This internally calls matchesRuleWithPlaceholderOrSplats to match rules. // // For example, given a "from" URL like this: // @@ -30,37 +32,56 @@ var ( // If the first return value is `true`, the second return value is the path that this // rule should redirect/rewrite to. This path is effectively the rule's "to" path that // has been templated with all the placeholders (if any) from the originally requested URL. -// -// TODO: Likely these should include host comparison once we have domain-level redirects -// https://gitlab.com/gitlab-org/gitlab-pages/-/issues/601 -func matchesRule(rule *netlifyRedirects.Rule, path string) (bool, string) { +func matchesRule(rule *netlifyRedirects.Rule, originalURL *url.URL) (bool, string) { + hostMatches, fromPath := matchHost(originalURL, rule.From) + + if !hostMatches { + return false, "" + } + + path := originalURL.Path + + if !feature.DomainRedirects.Enabled() && isDomainURL(rule.To) { + return false, "" + } + // If the requested URL exactly matches this rule's "from" path, // exit early and return the rule's "to" path to avoid building // and compiling the regex below. // However, only do this if there's nothing to template in the "to" path, - // to avoid redirect/rewriting to a url with a literal `:placeholder` in it. - if normalizePath(rule.From) == normalizePath(path) && !regexPlaceholderOrSplats.MatchString(rule.To) { + // to avoid redirect/rewriting to a originalURL with a literal `:placeholder` in it. + if normalizePath(fromPath) == normalizePath(path) && !regexPlaceholderOrSplats.MatchString(rule.To) { return true, rule.To } + return matchesRuleWithPlaceholderOrSplats(path, fromPath, rule.To, rule.Status) +} + +// matchesRuleWithPlaceholderOrSplats returns `true` if the rule's "from" pattern matches the requested URL. +// This is specifically for Placeholders and Splats matching +// +// For example, given a "from" URL like this: +// +// /a/*/originalURL/with/:placeholders +// +// this function would match URLs like this: +// +// /a/nice/originalURL/with/text +// /a/super/extra/nice/originalURL/with/matches +// +// If the first return value is `true`, the second return value is the path that this +// rule should redirect/rewrite to. This path is effectively the rule's "to" path that +// has been templated with all the placeholders (if any) from the originally requested URL. +func matchesRuleWithPlaceholderOrSplats(requestPath string, fromPath string, toPath string, status int) (bool, string) { // Any logic beyond this point handles placeholders and splats. // If the FF_ENABLE_PLACEHOLDERS feature flag isn't enabled, exit now. if !feature.RedirectsPlaceholders.Enabled() { return false, "" } - var regexSegments []string - for _, segment := range strings.Split(rule.From, "/") { - if segment == "" { - continue - } else if regexSplat.MatchString(segment) { - regexSegments = append(regexSegments, `/*(?P<splat>.*)/*`) - } else if regexPlaceholder.MatchString(segment) { - segmentName := strings.Replace(segment, ":", "", 1) - regexSegments = append(regexSegments, fmt.Sprintf(`/+(?P<%s>[^/]+)`, segmentName)) - } else { - regexSegments = append(regexSegments, "/+"+regexp.QuoteMeta(segment)) - } + regexSegments := convertToRegexSegments(fromPath) + if len(regexSegments) == 0 { + return false, "" } fromRegexString := `(?i)^` + strings.Join(regexSegments, "") + `/*$` @@ -68,40 +89,64 @@ func matchesRule(rule *netlifyRedirects.Rule, path string) (bool, string) { if err != nil { log.WithFields(log.Fields{ "fromRegexString": fromRegexString, - "rule.From": rule.From, - "rule.To": rule.To, - "rule.Status": rule.Status, - "path": path, + "rule.From": fromPath, + "rule.To": toPath, + "rule.Status": status, + "path": requestPath, }).WithError(err).Warnf("matchesRule generated an invalid regex: %q", fromRegexString) return false, "" } - template := regexPlaceholderReplacement.ReplaceAllString(rule.To, `${$placeholder}`) - submatchIndex := fromRegex.FindStringSubmatchIndex(path) + template := regexPlaceholderReplacement.ReplaceAllString(toPath, `${$placeholder}`) + subMatchIndex := fromRegex.FindStringSubmatchIndex(requestPath) - if submatchIndex == nil { + if subMatchIndex == nil { return false, "" } - templatedToPath := []byte{} - templatedToPath = fromRegex.ExpandString(templatedToPath, template, path, submatchIndex) + var templatedToPath []byte + templatedToPath = fromRegex.ExpandString(templatedToPath, template, requestPath, subMatchIndex) // Some replacements result in subsequent slashes. For example, a rule with a "to" // like `foo/:splat/bar` will result in a path like `foo//bar` if the splat // character matches nothing. To avoid this, replace all instances // of multiple subsequent forward slashes with a single forward slash. - templatedToPath = regexMultipleSlashes.ReplaceAll(templatedToPath, []byte("/")) + // The regex captures any character except a colon ([^:]) before the double slashes + // and includes it in the replacement. + templatedToPath = regexMultipleSlashes.ReplaceAll(templatedToPath, []byte("$1/")) return true, string(templatedToPath) } +// convertToRegexSegments converts the path string to an array of regex segments +// It replaces placeholders with named capture groups and splat characters with a wildcard regex +// This allows matching the path segments to the request path and extracting matched placeholder values +func convertToRegexSegments(path string) []string { + var regexSegments []string + + for _, segment := range strings.Split(path, "/") { + if segment == "" { + continue + } else if regexSplat.MatchString(segment) { + regexSegments = append(regexSegments, `/*(?P<splat>.*)/*`) + } else if regexPlaceholder.MatchString(segment) { + segmentName := strings.Replace(segment, ":", "", 1) + regexSegments = append(regexSegments, fmt.Sprintf(`/+(?P<%s>[^/]+)`, segmentName)) + } else { + regexSegments = append(regexSegments, "/+"+regexp.QuoteMeta(segment)) + } + } + + return regexSegments +} + // `match` returns: // 1. The first valid redirect or rewrite rule that matches the requested URL // 2. The URL to redirect/rewrite to // // If no rule matches, this function returns `nil` and an empty string -func (r *Redirects) match(path string) (*netlifyRedirects.Rule, string) { +func (r *Redirects) match(originalURL *url.URL) (*netlifyRedirects.Rule, string) { for i := range r.rules { if i >= cfg.MaxRuleCount { // do not process any more rules @@ -116,7 +161,7 @@ func (r *Redirects) match(path string) (*netlifyRedirects.Rule, string) { continue } - if isMatch, path := matchesRule(&rule, path); isMatch { + if isMatch, path := matchesRule(&rule, originalURL); isMatch { return &rule, path } } |