diff options
Diffstat (limited to 'utils/cert-expr.c')
-rw-r--r-- | utils/cert-expr.c | 968 |
1 files changed, 968 insertions, 0 deletions
diff --git a/utils/cert-expr.c b/utils/cert-expr.c new file mode 100644 index 00000000..b22b380c --- /dev/null +++ b/utils/cert-expr.c @@ -0,0 +1,968 @@ +/* + * Parser for the boolean expression language used to configure what + * host names an OpenSSH certificate will be trusted to sign for. + */ + +/* + +Language specification +====================== + +Outer lexical layer: the input expression is broken up into tokens, +with any whitespace between them discarded and ignored. The following +tokens are special: + + ( ) && || ! + +and the remaining token type is an 'atom', which is any non-empty +sequence of characters from the following set: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ + abcdefghijklmnopqrstuvwxyz + 0123456789 + .-_*?[]/: + +Inner lexical layer: once the boundaries of an 'atom' token have been +determined by the outer lex layer, each atom is further classified +into one of the following subtypes: + + - If it contains no ':' or '/', it's taken to be a wildcard matching + hostnames, e.g. "*.example.com". + + - If it begins with 'port:' followed by digits, it's taken to be a + single port number specification, e.g. "port:22". + + - If it begins with 'port:' followed by two digit sequences separated + by '-', it's taken to be a port number range, e.g. "port:0-1023". + + - Any other atom is reserved for future expansion. (See Rationale.) + +Syntax layer: all of those types of atom are interpreted as predicates +applied to the (hostname, port) data configured for the SSH connection +for which the certificate is being validated. + +Wildcards are handled using the syntax in wildcard.c. The dot- +separated structure of hostnames is thus not special; the '*' in +"*.example.com" will match any number of subdomains under example.com. + +More complex boolean expressions can be made by combining those +predicates using the boolean operators and parentheses, in the obvious +way: && and || are infix operators representing logical AND and OR, ! +is a prefix operator representing logical NOT, and parentheses +indicate grouping. + +Each of && and || can associate freely with itself (that is, you can +write "a && b && c" without having to parenthesise one or the other +subexpression). But they are forbidden to associate with _each other_. +That is, if you write "a && b || c" or "a || b && c", it's a syntax +error, and you must add parentheses to indicate which operator was +intended to have the higher priority. + +Rationale +========= + +Atoms: restrictions +------------------- + +The characters permitted in the 'atom' token don't include \, even +though it's a special character defined by wildcard.c. That's because +in this restricted context wildcards will never need it: no hostname +contains a literal \, and neither does any hostname contain a literal +instance of any of the wildcard characters that wildcard.c allows you +to use \ to escape. + +Atoms: future extension +----------------------- + +The specification of the 'atom' token is intended to leave space for +more than one kind of future extension. + +Most obviously, additional special predicates similar to "port:", with +different disambiguating prefixes. I don't know what things of that +kind we might need, but space is left for them just in case. + +Also, the unused '/' in the permitted-characters spec is intended to +leave open the possibility of allowing certificate acceptance to be +based on IP address, because the usual CIDR syntax for specifying IP +ranges (e.g. "192.168.1.0/24" or "2345:6789:abcd:ef01::/128") would be +lexed as a single atom under these rules. + +For the moment, certificate acceptance rules based on IP address are +not supported, because it's not clear what the semantics ought to be. +There are two problems with using IP addresses for this purpose: + + 1. Sometimes they come from the DNS, which means you can't trust + them. The whole idea of SSH is to end-to-end authenticate the host + key against only the input given _by the user_ to the client. Any + additional data provided by the network, such as the result of a + DNS lookup, is suspect. + + On the other hand, sometimes the IP address *is* part of the user + input, because the user can provide an IP address rather than a + hostname as the intended connection destination. So there are two + kinds of IP address, and they should very likely be treated + differently. + + 2. Sometimes the server's IP address is not even *known* by the + client, if you're connecting via a proxy and leaving DNS lookups + to the proxy. + +So, what should a boolean expression do if it's asked to accept or +reject based on an IP address, and the IP address is unknown or +untrustworthy? I'm not sure, and therefore, in the initial version of +this expression system, I haven't implemented them at all. + +But the syntax is still available for a future extension to use, if we +come up with good answers to these questions. + +(One possibility would be to evaluate the whole expression in Kleene +three-valued logic, so that every subexpression has the possible +answers TRUE, FALSE and UNKNOWN. If a definite IP address is not +available, IP address predicates evaluate to UNKNOWN. Then, once the +expression as a whole is evaluated, fail closed, by interpreting +UNKNOWN as 'reject'. The effect would be that a positive _or_ negative +constraint on the IP address would cause rejection if the IP address +is not reliably known, because once the predicate itself has returned +UNKNOWN, negating it still gives UNKNOWN. The only way you could still +accept a certificate in that situation would be if the overall +structure of the expression meant that the test of the IP address +couldn't affect the result anyway, e.g. if it was ANDed with another +subexpression that definitely evaluated to FALSE, or ORed with one +that evaluated to TRUE. This system seems conceptually elegant to me, +but the argument against it is that it's complicated and +counterintuitive, which is not a property you want in something a user +is writing for security purposes!) + +Operator precedence +------------------- + +Why did I choose to make && and || refuse to associate with each +other, instead of applying the usual C precedence rule that && beats +||? Because I think the C precedence rule is essentially arbitrary, in +the sense that when people are writing boolean expressions in practice +based on predicates from the rest of their program, it's about equally +common to want to nest an && within an || and vice versa. So the +default precedence rule only gives the user what they actually wanted +about 50% of the time, and leads to absent-minded errors about as +often as it conveniently allows you to omit a pair of parens. + +With my mathematician hat on, it's not so arbitrary. I agree that if +you're *going* to give || and && a relative priority then it makes +more sense to make && the higher-priority one, because if you're +thinking algebraically, && is more multiplicative and || is more +additive. But the pure-maths contexts in which that's convenient have +nothing to do with general boolean expressions in if statements. + +This boolean syntax is still close enough to that of C and its +derivatives to allow easy enough expression interchange (not counting +the fact that atoms would need rewriting). Any boolean expression +structure accepted by this syntax is also legal C and means the same +thing; any expression structure accepted by C is either legal and +equivalent in this syntax, or will fail with an error. In no case is +anything accepted but mapped to a different meaning. + + */ + +#include "putty.h" + +typedef enum Token { + TOK_LPAR, TOK_RPAR, + TOK_AND, TOK_OR, TOK_NOT, + TOK_ATOM, + TOK_END, TOK_ERROR +} Token; + +static inline bool is_space(char c) +{ + return (c == ' ' || c == '\n' || c == '\r' || c == '\t' || + c == '\f' || c == '\v'); +} + +static inline bool is_operator_char(char c) +{ + return (c == '(' || c == ')' || c == '&' || c == '|' || c == '!'); +} + +static inline bool is_atom_char(char c) +{ + return (('A' <= c && c <= 'Z') || + ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || + c == '.' || c == '-' || c == '_' || c == '*' || c == '?' || + c == '[' || c == ']' || c == '/' || c == ':'); +} + +static Token lex(ptrlen *text, ptrlen *token, char **err) +{ + const char *p = text->ptr, *e = p + text->len; + Token type = TOK_ERROR; + + /* Skip whitespace */ + while (p < e && is_space(*p)) + p++; + + const char *start = p; + + if (!(p < e)) { + type = TOK_END; + goto out; + } + + if (is_operator_char(*p)) { + /* Match boolean-expression tokens */ + static const struct operator { + ptrlen text; + Token type; + } operators[] = { + {PTRLEN_DECL_LITERAL("("), TOK_LPAR}, + {PTRLEN_DECL_LITERAL(")"), TOK_RPAR}, + {PTRLEN_DECL_LITERAL("&&"), TOK_AND}, + {PTRLEN_DECL_LITERAL("||"), TOK_OR}, + {PTRLEN_DECL_LITERAL("!"), TOK_NOT}, + }; + + for (size_t i = 0; i < lenof(operators); i++) { + const struct operator *op = &operators[i]; + if (e - p >= op->text.len && + ptrlen_eq_ptrlen(op->text, make_ptrlen(p, op->text.len))) { + p += op->text.len; + type = op->type; + goto out; + } + } + + /* + * Report an error if one of the operator characters is used + * in a way that doesn't match something in that table (e.g. a + * single &). + */ + p++; + type = TOK_ERROR; + *err = dupstr("unrecognised boolean operator"); + goto out; + } else if (is_atom_char(*p)) { + /* + * Match an 'atom' token, which is any non-empty sequence of + * characters from the combined set that allows hostname + * wildcards, IP address ranges and special predicates like + * port numbers. + */ + do { + p++; + } while (p < e && is_atom_char(*p)); + + type = TOK_ATOM; + goto out; + } else { + /* + * Otherwise, report an error. + */ + p++; + type = TOK_ERROR; + *err = dupstr("unexpected character in expression"); + goto out; + } + + out: + *token = make_ptrlen(start, p - start); + text->ptr = p; + text->len = e - p; + return type; +} + +typedef enum Operator { + OP_AND, OP_OR, OP_NOT, + OP_HOSTNAME_WC, OP_PORT_RANGE +} Operator; + +typedef struct ExprNode ExprNode; +struct ExprNode { + Operator op; + ptrlen text; + union { + struct { + /* OP_AND, OP_OR */ + ExprNode **subexprs; + size_t nsubexprs; + }; + struct { + /* OP_NOT */ + ExprNode *subexpr; + }; + struct { + /* OP_HOSTNAME_WC */ + char *wc; + }; + struct { + /* OP_PORT_RANGE */ + unsigned lo, hi; /* both inclusive */ + }; + }; +}; + +static ExprNode *exprnode_new(Operator op, ptrlen text) +{ + ExprNode *en = snew(ExprNode); + memset(en, 0, sizeof(*en)); + en->op = op; + en->text = text; + return en; +} + +static void exprnode_free(ExprNode *en) +{ + switch (en->op) { + case OP_AND: + case OP_OR: + for (size_t i = 0; i < en->nsubexprs; i++) + exprnode_free(en->subexprs[i]); + sfree(en->subexprs); + break; + case OP_NOT: + exprnode_free(en->subexpr); + break; + case OP_HOSTNAME_WC: + sfree(en->wc); + break; + case OP_PORT_RANGE: + break; + default: + unreachable("unhandled node type in exprnode_free"); + } + + sfree(en); +} + +static unsigned ptrlen_to_port_number(ptrlen input) +{ + unsigned val = 0; + for (const char *p = input.ptr, *end = p + input.len; p < end; p++) { + assert('0' <= *p && *p <= '9'); /* expect parser to have checked */ + val = 10 * val + (*p - '0'); + if (val >= 65536) + val = 65536; /* normalise 'too large' to avoid integer overflow */ + } + return val; +} + +typedef struct ParserState ParserState; +struct ParserState { + ptrlen currtext; + Token tok; + ptrlen toktext; + char *err; + ptrlen errloc; +}; + +static void error(ParserState *ps, char *errtext, ptrlen errloc) +{ + if (!ps->err) { + ps->err = errtext; + ps->errloc = errloc; + } else { + sfree(errtext); + } +} + +static void advance(ParserState *ps) +{ + char *err = NULL; + ps->tok = lex(&ps->currtext, &ps->toktext, &err); + if (ps->tok == TOK_ERROR) + error(ps, err, ps->toktext); +} + +static ExprNode *parse_atom(ParserState *ps); +static ExprNode *parse_expr(ParserState *ps); + +static bool atom_is_hostname_wc(ptrlen toktext) +{ + return !ptrlen_contains(toktext, ":/"); +} + +static ExprNode *parse_atom(ParserState *ps) +{ + if (ps->tok == TOK_LPAR) { + ptrlen openpar = ps->toktext; + advance(ps); /* eat the ( */ + + ExprNode *subexpr = parse_expr(ps); + if (!subexpr) + return NULL; + + if (ps->tok != TOK_RPAR) { + error(ps, dupstr("expected ')' after parenthesised subexpression"), + subexpr->text); + exprnode_free(subexpr); + return NULL; + } + + ptrlen closepar = ps->toktext; + advance(ps); /* eat the ) */ + + /* We can reuse the existing AST node, but we need to extend + * its bounds within the input expression to include the + * parentheses */ + subexpr->text = make_ptrlen_startend( + openpar.ptr, ptrlen_end(closepar)); + return subexpr; + } + + if (ps->tok == TOK_NOT) { + ptrlen notloc = ps->toktext; + advance(ps); /* eat the ! */ + + ExprNode *subexpr = parse_atom(ps); + if (!subexpr) + return NULL; + + ExprNode *en = exprnode_new( + OP_NOT, make_ptrlen_startend( + notloc.ptr, ptrlen_end(subexpr->text))); + en->subexpr = subexpr; + return en; + } + + if (ps->tok == TOK_ATOM) { + if (atom_is_hostname_wc(ps->toktext)) { + /* Hostname wildcard. */ + ExprNode *en = exprnode_new(OP_HOSTNAME_WC, ps->toktext); + en->wc = mkstr(ps->toktext); + advance(ps); + return en; + } + + ptrlen tail; + if (ptrlen_startswith(ps->toktext, PTRLEN_LITERAL("port:"), &tail)) { + /* Port number (single or range). */ + unsigned lo, hi; + char *minus; + static const char DIGITS[] = "0123456789\0"; + bool parse_ok = false; + + if (tail.len > 0 && ptrlen_contains_only(tail, DIGITS)) { + lo = ptrlen_to_port_number(tail); + if (lo >= 65536) { + error(ps, dupstr("port number too large"), tail); + return NULL; + } + hi = lo; + parse_ok = true; + } else if ((minus = memchr(tail.ptr, '-', tail.len)) != NULL) { + ptrlen pl_lo = make_ptrlen_startend(tail.ptr, minus); + ptrlen pl_hi = make_ptrlen_startend(minus+1, ptrlen_end(tail)); + if (pl_lo.len > 0 && ptrlen_contains_only(pl_lo, DIGITS) && + pl_hi.len > 0 && ptrlen_contains_only(pl_hi, DIGITS)) { + + lo = ptrlen_to_port_number(pl_lo); + if (lo >= 65536) { + error(ps, dupstr("port number too large"), pl_lo); + return NULL; + } + + hi = ptrlen_to_port_number(pl_hi); + if (hi >= 65536) { + error(ps, dupstr("port number too large"), pl_hi); + return NULL; + } + + if (hi < lo) { + error(ps, dupstr("port number range is backwards"), + make_ptrlen_startend(pl_lo.ptr, + ptrlen_end(pl_hi))); + return NULL; + } + + parse_ok = true; + } + } + + if (!parse_ok) { + error(ps, dupstr("unable to parse port number specification"), + ps->toktext); + return NULL; + } + + + ExprNode *en = exprnode_new(OP_PORT_RANGE, ps->toktext); + en->lo = lo; + en->hi = hi; + advance(ps); + return en; + } + } + + error(ps, dupstr("expected a predicate or a parenthesised subexpression"), + ps->toktext); + return NULL; +} + +static ExprNode *parse_expr(ParserState *ps) +{ + ExprNode *subexpr = parse_atom(ps); + if (!subexpr) + return NULL; + + if (ps->tok != TOK_AND && ps->tok != TOK_OR) + return subexpr; + + Token operator = ps->tok; + ExprNode *en = exprnode_new(ps->tok == TOK_AND ? OP_AND : OP_OR, + subexpr->text); + size_t subexprs_size = 0; + + sgrowarray(en->subexprs, subexprs_size, en->nsubexprs); + en->subexprs[en->nsubexprs++] = subexpr; + + while (true) { + advance(ps); /* eat the operator */ + + subexpr = parse_atom(ps); + if (!subexpr) { + exprnode_free(en); + return NULL; + } + sgrowarray(en->subexprs, subexprs_size, en->nsubexprs); + en->subexprs[en->nsubexprs++] = subexpr; + en->text = make_ptrlen_startend( + en->text.ptr, ptrlen_end(subexpr->text)); + + if (ps->tok != TOK_AND && ps->tok != TOK_OR) + return en; + + if (ps->tok != operator) { + error(ps, dupstr("expected parentheses to disambiguate && and || " + "on either side of expression"), subexpr->text); + exprnode_free(en); + return NULL; + } + } +} + +static ExprNode *parse(ptrlen expr, char **error_msg, ptrlen *error_loc) +{ + ParserState ps[1]; + ps->currtext = expr; + ps->err = NULL; + advance(ps); + + ExprNode *en = parse_expr(ps); + if (en && ps->tok != TOK_END) { + error(ps, dupstr("unexpected text at end of expression"), + make_ptrlen_startend(ps->toktext.ptr, ptrlen_end(expr))); + exprnode_free(en); + en = NULL; + } + + if (!en) { + if (error_msg) + *error_msg = ps->err; + else + sfree(ps->err); + if (error_loc) + *error_loc = ps->errloc; + return NULL; + } + + return en; +} + +static bool eval(ExprNode *en, const char *hostname, unsigned port) +{ + switch (en->op) { + case OP_AND: + for (size_t i = 0; i < en->nsubexprs; i++) + if (!eval(en->subexprs[i], hostname, port)) + return false; + return true; + + case OP_OR: + for (size_t i = 0; i < en->nsubexprs; i++) + if (eval(en->subexprs[i], hostname, port)) + return true; + return false; + + case OP_NOT: + return !eval(en->subexpr, hostname, port); + + case OP_HOSTNAME_WC: + return wc_match(en->wc, hostname); + + case OP_PORT_RANGE: + return en->lo <= port && port <= en->hi; + + default: + unreachable("unhandled node type in eval"); + } +} + +bool cert_expr_match_str(const char *expression, + const char *hostname, unsigned port) +{ + ExprNode *en = parse(ptrlen_from_asciz(expression), NULL, NULL); + if (!en) + return false; + + bool matched = eval(en, hostname, port); + exprnode_free(en); + return matched; +} + +bool cert_expr_valid(const char *expression, + char **error_msg, ptrlen *error_loc) +{ + ExprNode *en = parse(ptrlen_from_asciz(expression), error_msg, error_loc); + if (en) { + exprnode_free(en); + return true; + } else { + return false; + } +} + +struct CertExprBuilder { + char **wcs; + size_t nwcs, wcsize; +}; + +CertExprBuilder *cert_expr_builder_new(void) +{ + CertExprBuilder *eb = snew(CertExprBuilder); + eb->wcs = NULL; + eb->nwcs = eb->wcsize = 0; + return eb; +} + +void cert_expr_builder_free(CertExprBuilder *eb) +{ + for (size_t i = 0; i < eb->nwcs; i++) + sfree(eb->wcs[i]); + sfree(eb->wcs); + sfree(eb); +} + +void cert_expr_builder_add(CertExprBuilder *eb, const char *wildcard) +{ + /* Check this wildcard is lexically valid as an atom */ + ptrlen orig = ptrlen_from_asciz(wildcard), pl = orig; + ptrlen toktext; + char *err; + Token tok = lex(&pl, &toktext, &err); + if (!(tok == TOK_ATOM && + toktext.ptr == orig.ptr && + toktext.len == orig.len && + atom_is_hostname_wc(toktext))) { + if (tok == TOK_ERROR) + sfree(err); + return; + } + + sgrowarray(eb->wcs, eb->wcsize, eb->nwcs); + eb->wcs[eb->nwcs++] = mkstr(orig); +} + +char *cert_expr_expression(CertExprBuilder *eb) +{ + strbuf *sb = strbuf_new(); + for (size_t i = 0; i < eb->nwcs; i++) { + if (i) + put_dataz(sb, " || "); + put_dataz(sb, eb->wcs[i]); + } + return strbuf_to_str(sb); +} + +#ifdef TEST + +void out_of_memory(void) { fprintf(stderr, "out of memory\n"); abort(); } + +static void exprnode_dump(BinarySink *bs, ExprNode *en, const char *origtext) +{ + put_fmt(bs, "(%zu:%zu ", + (size_t)((const char *)en->text.ptr - origtext), + (size_t)((const char *)ptrlen_end(en->text) - origtext)); + switch (en->op) { + case OP_AND: + case OP_OR: + put_dataz(bs, en->op == OP_AND ? "and" : "or"); + for (size_t i = 0; i < en->nsubexprs; i++) { + put_byte(bs, ' '); + exprnode_dump(bs, en->subexprs[i], origtext); + } + break; + case OP_NOT: + put_dataz(bs, "not "); + exprnode_dump(bs, en->subexpr, origtext); + break; + case OP_HOSTNAME_WC: + put_dataz(bs, "host-wc '"); + put_dataz(bs, en->wc); + put_byte(bs, '\''); + break; + case OP_PORT_RANGE: + put_fmt(bs, "port-range %u %u", en->lo, en->hi); + break; + default: + unreachable("unhandled node type in exprnode_dump"); + } + put_byte(bs, ')'); +} + +static const struct ParseTest { + const char *file; + int line; + const char *expr, *output; +} parsetests[] = { +#define T(expr_, output_) { \ + .file=__FILE__, .line=__LINE__, .expr=expr_, .output=output_} + + T("*.example.com", "(0:13 host-wc '*.example.com')"), + T("port:0", "(0:6 port-range 0 0)"), + T("port:22", "(0:7 port-range 22 22)"), + T("port:22-22", "(0:10 port-range 22 22)"), + T("port:65535", "(0:10 port-range 65535 65535)"), + T("port:0-1023", "(0:11 port-range 0 1023)"), + + T("&", "ERR:0:1:unrecognised boolean operator"), + T("|", "ERR:0:1:unrecognised boolean operator"), + T(";", "ERR:0:1:unexpected character in expression"), + T("port:", "ERR:0:5:unable to parse port number specification"), + T("port:abc", "ERR:0:8:unable to parse port number specification"), + T("port:65536", "ERR:5:10:port number too large"), + T("port:65536-65537", "ERR:5:10:port number too large"), + T("port:0-65536", "ERR:7:12:port number too large"), + T("port:23-22", "ERR:5:10:port number range is backwards"), + + T("a", "(0:1 host-wc 'a')"), + T("(a)", "(0:3 host-wc 'a')"), + T("((a))", "(0:5 host-wc 'a')"), + T(" (\n(\ra\t)\f)\v", "(1:10 host-wc 'a')"), + T("a&&b", "(0:4 and (0:1 host-wc 'a') (3:4 host-wc 'b'))"), + T("a||b", "(0:4 or (0:1 host-wc 'a') (3:4 host-wc 'b'))"), + T("a&&b&&c", "(0:7 and (0:1 host-wc 'a') (3:4 host-wc 'b') (6:7 host-wc 'c'))"), + T("a||b||c", "(0:7 or (0:1 host-wc 'a') (3:4 host-wc 'b') (6:7 host-wc 'c'))"), + T("a&&(b||c)", "(0:9 and (0:1 host-wc 'a') (3:9 or (4:5 host-wc 'b') (7:8 host-wc 'c')))"), + T("a||(b&&c)", "(0:9 or (0:1 host-wc 'a') (3:9 and (4:5 host-wc 'b') (7:8 host-wc 'c')))"), + T("(a&&b)||c", "(0:9 or (0:6 and (1:2 host-wc 'a') (4:5 host-wc 'b')) (8:9 host-wc 'c'))"), + T("(a||b)&&c", "(0:9 and (0:6 or (1:2 host-wc 'a') (4:5 host-wc 'b')) (8:9 host-wc 'c'))"), + T("!a&&b", "(0:5 and (0:2 not (1:2 host-wc 'a')) (4:5 host-wc 'b'))"), + T("a&&!b&&c", "(0:8 and (0:1 host-wc 'a') (3:5 not (4:5 host-wc 'b')) (7:8 host-wc 'c'))"), + T("!a||b", "(0:5 or (0:2 not (1:2 host-wc 'a')) (4:5 host-wc 'b'))"), + T("a||!b||c", "(0:8 or (0:1 host-wc 'a') (3:5 not (4:5 host-wc 'b')) (7:8 host-wc 'c'))"), + + T("", "ERR:0:0:expected a predicate or a parenthesised subexpression"), + T("a &&", "ERR:4:4:expected a predicate or a parenthesised subexpression"), + T("a ||", "ERR:4:4:expected a predicate or a parenthesised subexpression"), + T("a b c d", "ERR:2:7:unexpected text at end of expression"), + T("(", "ERR:1:1:expected a predicate or a parenthesised subexpression"), + T("(a", "ERR:1:2:expected ')' after parenthesised subexpression"), + T("(a b", "ERR:1:2:expected ')' after parenthesised subexpression"), + T("a&&b&&c||d||e", "ERR:6:7:expected parentheses to disambiguate && and || on either side of expression"), + T("a||b||c&&d&&e", "ERR:6:7:expected parentheses to disambiguate && and || on either side of expression"), + T("!", "ERR:1:1:expected a predicate or a parenthesised subexpression"), + + T("!a", "(0:2 not (1:2 host-wc 'a'))"), + +#undef T +}; + +static const struct EvalTest { + const char *file; + int line; + const char *expr; + const char *host; + unsigned port; + bool output; +} evaltests[] = { +#define T(expr_, host_, port_, output_) { \ + .file=__FILE__, .line=__LINE__, \ + .expr=expr_, .host=host_, .port=port_, .output=output_} + + T("*.example.com", "hostname.example.com", 22, true), + T("*.example.com", "hostname.example.org", 22, false), + T("*.example.com", "hostname.dept.example.com", 22, true), + T("*.example.com && port:22", "hostname.example.com", 21, false), + T("*.example.com && port:22", "hostname.example.com", 22, true), + T("*.example.com && port:22", "hostname.example.com", 23, false), + T("*.example.com && port:22-24", "hostname.example.com", 21, false), + T("*.example.com && port:22-24", "hostname.example.com", 22, true), + T("*.example.com && port:22-24", "hostname.example.com", 23, true), + T("*.example.com && port:22-24", "hostname.example.com", 24, true), + T("*.example.com && port:22-24", "hostname.example.com", 25, false), + + T("*a* && *b* && *c*", "", 22, false), + T("*a* && *b* && *c*", "a", 22, false), + T("*a* && *b* && *c*", "b", 22, false), + T("*a* && *b* && *c*", "c", 22, false), + T("*a* && *b* && *c*", "ab", 22, false), + T("*a* && *b* && *c*", "ac", 22, false), + T("*a* && *b* && *c*", "bc", 22, false), + T("*a* && *b* && *c*", "abc", 22, true), + + T("*a* || *b* || *c*", "", 22, false), + T("*a* || *b* || *c*", "a", 22, true), + T("*a* || *b* || *c*", "b", 22, true), + T("*a* || *b* || *c*", "c", 22, true), + T("*a* || *b* || *c*", "ab", 22, true), + T("*a* || *b* || *c*", "ac", 22, true), + T("*a* || *b* || *c*", "bc", 22, true), + T("*a* || *b* || *c*", "abc", 22, true), + + T("*a* && !*b* && *c*", "", 22, false), + T("*a* && !*b* && *c*", "a", 22, false), + T("*a* && !*b* && *c*", "b", 22, false), + T("*a* && !*b* && *c*", "c", 22, false), + T("*a* && !*b* && *c*", "ab", 22, false), + T("*a* && !*b* && *c*", "ac", 22, true), + T("*a* && !*b* && *c*", "bc", 22, false), + T("*a* && !*b* && *c*", "abc", 22, false), + + T("*a* || !*b* || *c*", "", 22, true), + T("*a* || !*b* || *c*", "a", 22, true), + T("*a* || !*b* || *c*", "b", 22, false), + T("*a* || !*b* || *c*", "c", 22, true), + T("*a* || !*b* || *c*", "ab", 22, true), + T("*a* || !*b* || *c*", "ac", 22, true), + T("*a* || !*b* || *c*", "bc", 22, true), + T("*a* || !*b* || *c*", "abc", 22, true), + +#undef T +}; + +int main(int argc, char **argv) +{ + if (argc > 1) { + /* + * Parse an expression from the command line. + */ + + ptrlen expr = ptrlen_from_asciz(argv[1]); + char *error_msg; + ptrlen error_loc; + ExprNode *en = parse(expr, &error_msg, &error_loc); + if (!en) { + fprintf(stderr, "ERR:%zu:%zu:%s\n", + (size_t)((const char *)error_loc.ptr - argv[1]), + (size_t)((const char *)ptrlen_end(error_loc) - argv[1]), + error_msg); + fprintf(stderr, "%.*s\n", PTRLEN_PRINTF(expr)); + for (const char *p = expr.ptr, *e = error_loc.ptr; p<e; p++) + fputc(' ', stderr); + for (size_t i = 0; i < error_loc.len || i < 1; i++) + fputc('^', stderr); + fputc('\n', stderr); + sfree(error_msg); + return 1; + } + + if (argc > 2) { + /* + * Test-evaluate against a host/port pair given on the + * command line. + */ + const char *host = argv[2]; + unsigned port = (argc > 3 ? strtoul(argv[3], NULL, 0) : 22); + bool result = eval(en, host, port); + printf("%s\n", result ? "accept" : "reject"); + } else { + /* + * Just dump the result of parsing the expression. + */ + stdio_sink ss[1]; + stdio_sink_init(ss, stdout); + exprnode_dump(BinarySink_UPCAST(ss), en, expr.ptr); + put_byte(ss, '\n'); + } + + exprnode_free(en); + + return 0; + } else { + /* + * Run our automated tests. + */ + size_t pass = 0, fail = 0; + + for (size_t i = 0; i < lenof(parsetests); i++) { + const struct ParseTest *test = &parsetests[i]; + + ptrlen expr = ptrlen_from_asciz(test->expr); + char *error_msg; + ptrlen error_loc; + ExprNode *en = parse(expr, &error_msg, &error_loc); + + strbuf *output = strbuf_new(); + if (!en) { + put_fmt(output, "ERR:%zu:%zu:%s", + (size_t)((const char *)error_loc.ptr - test->expr), + (size_t)((const char *)ptrlen_end(error_loc) - + test->expr), + error_msg); + sfree(error_msg); + } else { + exprnode_dump(BinarySink_UPCAST(output), en, expr.ptr); + exprnode_free(en); + } + + if (ptrlen_eq_ptrlen(ptrlen_from_strbuf(output), + ptrlen_from_asciz(test->output))) { + pass++; + } else { + fprintf(stderr, "FAIL: parsetests[%zu] @ %s:%d:\n" + " expression: %s\n" + " expected: %s\n" + " actual: %s\n", + i, test->file, test->line, test->expr, + test->output, output->s); + fail++; + } + + strbuf_free(output); + } + + for (size_t i = 0; i < lenof(evaltests); i++) { + const struct EvalTest *test = &evaltests[i]; + + ptrlen expr = ptrlen_from_asciz(test->expr); + char *error_msg; + ptrlen error_loc; + ExprNode *en = parse(expr, &error_msg, &error_loc); + + if (!en) { + fprintf(stderr, "FAIL: evaltests[%zu] @ %s:%d:\n" + " expression: %s\n" + " parse error: %zu:%zu:%s\n", + i, test->file, test->line, test->expr, + (size_t)((const char *)error_loc.ptr - test->expr), + (size_t)((const char *)ptrlen_end(error_loc) - + test->expr), + error_msg); + sfree(error_msg); + } else { + bool output = eval(en, test->host, test->port); + if (output == test->output) { + pass++; + } else { + fprintf(stderr, "FAIL: evaltests[%zu] @ %s:%d:\n" + " expression: %s\n" + " host: %s\n" + " port: %u\n" + " expected: %s\n" + " actual: %s\n", + i, test->file, test->line, test->expr, + test->host, test->port, + test->output ? "accept" : "reject", + output ? "accept" : "reject"); + fail++; + } + exprnode_free(en); + } + } + + fprintf(stderr, "pass %zu fail %zu total %zu\n", + pass, fail, pass+fail); + return fail != 0; + } +} + +#endif // TEST |