Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.busybox.net/busybox.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2023-06-03 01:39:33 +0300
committerDenys Vlasenko <vda.linux@googlemail.com>2023-06-03 01:42:10 +0300
commit5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6 (patch)
treef3c3aa3267164310b55192a1a3b174523aa49dbe
parent0256e00a9d077588bd3a39f5a1ef7e2eaa2911e4 (diff)
awk: fix backslash handling in sub() builtins
function old new delta awk_sub 559 544 -15 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c41
-rwxr-xr-xtestsuite/awk.tests47
2 files changed, 66 insertions, 22 deletions
diff --git a/editors/awk.c b/editors/awk.c
index 0f062dcdb..f77573806 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2492,7 +2492,7 @@ static char *awk_printf(node *n, size_t *len)
* store result into (dest), return number of substitutions.
* If nm = 0, replace all matches.
* If src or dst is NULL, use $0.
- * If subexp != 0, enable subexpression matching (\1-\9).
+ * If subexp != 0, enable subexpression matching (\0-\9).
*/
static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
{
@@ -2520,35 +2520,32 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
residx += eo;
if (++match_no >= nm) {
const char *s;
- int nbs;
+ int bslash;
/* replace */
residx -= (eo - so);
- nbs = 0;
+ bslash = 0;
for (s = repl; *s; s++) {
- char c = resbuf[residx++] = *s;
- if (c == '\\') {
- nbs++;
- continue;
+ char c = *s;
+ if (c == '\\' && s[1]) {
+ bslash ^= 1;
+ if (bslash)
+ continue;
}
- if (c == '&' || (subexp && c >= '0' && c <= '9')) {
- int j;
- residx -= ((nbs + 3) >> 1);
- j = 0;
+ if ((!bslash && c == '&')
+ || (subexp && bslash && c >= '0' && c <= '9')
+ ) {
+ int n, j = 0;
if (c != '&') {
j = c - '0';
- nbs++;
}
- if (nbs % 2) {
- resbuf[residx++] = c;
- } else {
- int n = pmatch[j].rm_eo - pmatch[j].rm_so;
- resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
- memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
- residx += n;
- }
- }
- nbs = 0;
+ n = pmatch[j].rm_eo - pmatch[j].rm_so;
+ resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
+ memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
+ residx += n;
+ } else
+ resbuf[residx++] = c;
+ bslash = 0;
}
}
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index cdab93d21..c61d32947 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -552,4 +552,51 @@ testing "awk = has higher precedence than == (despite what gawk manpage claims)"
'0\n1\n2\n1\n3\n' \
'' ''
+sq="'"
+testing 'awk gensub backslashes \' \
+ 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+ 's=\\
+\\|\\
+' \
+ '' ''
+testing 'awk gensub backslashes \\' \
+ 'awk '$sq'BEGIN { s="\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+ 's=\\\\
+\\|\\
+' \
+ '' ''
+# gawk 5.1.1 handles trailing unpaired \ inconsistently.
+# If replace string is single \, it is used verbatim,
+# but if it is \\\ (three slashes), gawk uses "\<NUL>" (!!!), not "\\" as you would expect.
+testing 'awk gensub backslashes \\\' \
+ 'awk '$sq'BEGIN { s="\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+ 's=\\\\\\
+\\\\|\\\\
+' \
+ '' ''
+testing 'awk gensub backslashes \\\\' \
+ 'awk '$sq'BEGIN { s="\\\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+ 's=\\\\\\\\
+\\\\|\\\\
+' \
+ '' ''
+testing 'awk gensub backslashes \&' \
+ 'awk '$sq'BEGIN { s="\\&"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+ 's=\\&
+&|&
+' \
+ '' ''
+testing 'awk gensub backslashes \0' \
+ 'awk '$sq'BEGIN { s="\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+ 's=\\0
+a|a
+' \
+ '' ''
+testing 'awk gensub backslashes \\0' \
+ 'awk '$sq'BEGIN { s="\\\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+ 's=\\\\0
+\\0|\\0
+' \
+ '' ''
+
exit $FAILCOUNT