Introduce a new operator `~` and new `/.../eis` regular expression syntax.
This allows filtering by regular expression, e.g.
jsonfilter -s '[ "foo", "bar", "baz" ]' -e '$[@ ~ /^b/]'
... would yield the values `bar` and `baz`.
Possible regular expression modifiers are:
- `e` ... enable extended POSIX regular expressions
- `i` ... perform case insensitive matches
- `s` ... let ranges and `.` match the newline character
A regular expression literal may occur on the left or the right side of
the `~` operator, but not on both.
In case neither side of the `~` operator is a regular expression, the right
side will be treated as regular expression pattern. Non-string values are
converted to their string representation before performing matching.
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include <regex.h>
#include "ast.h"
#include "lexer.h"
case 'r': *out = '\r'; break;
case 't': *out = '\t'; break;
case 'v': *out = '\v'; break;
- default: *out = *in; break;
+ default:
+ /* in regexp mode, retain backslash */
+ if (q == '/')
+ {
+ if (rem-- < 1)
+ {
+ s->error_pos = s->off + (in - buf);
+ return -3;
+ }
+
+ *out++ = '\\';
+ }
+
+ *out = *in;
+ break;
}
in++;
}
+/*
+ * Parses a regexp literal from the given buffer.
+ *
+ * Returns a negative value on error, otherwise the amount of consumed
+ * characters from the given buffer.
+ *
+ * Error values:
+ * -1 Unterminated regexp
+ * -2 Invalid escape sequence
+ * -3 Regexp literal too long
+ */
+
+static int
+parse_regexp(const char *buf, struct jp_opcode *op, struct jp_state *s)
+{
+ int len = parse_string(buf, op, s);
+ const char *p;
+
+ if (len >= 2)
+ {
+ op->num = REG_NOSUB | REG_NEWLINE;
+
+ for (p = buf + len; p; p++)
+ {
+ switch (*p)
+ {
+ case 'e':
+ op->num |= REG_EXTENDED;
+ len++;
+ break;
+
+ case 'i':
+ op->num |= REG_ICASE;
+ len++;
+ break;
+
+ case 's':
+ op->num &= ~REG_NEWLINE;
+ len++;
+ break;
+
+ default:
+ return len;
+ }
+ }
+
+ }
+
+ return len;
+}
+
+
/*
* Parses a label from the given buffer.
*
{ T_LT, "<", 1 },
{ T_GT, ">", 1 },
{ T_EQ, "=", 1 },
+ { T_MATCH, "~", 1 },
{ T_NOT, "!", 1 },
{ T_WILDCARD, "*", 1 },
+ { T_REGEXP, "/", 1, parse_regexp },
{ T_STRING, "'", 1, parse_string },
{ T_STRING, "\"", 1, parse_string },
{ T_LABEL, "_", 1, parse_label },
{ T_NUMBER, "09", 0, parse_number },
};
-const char *tokennames[23] = {
+const char *tokennames[25] = {
[0] = "End of file",
[T_AND] = "'&&'",
[T_OR] = "'||'",
[T_GE] = "'>='",
[T_LT] = "'<'",
[T_LE] = "'<='",
+ [T_MATCH] = "'~'",
[T_NOT] = "'!'",
[T_LABEL] = "Label",
[T_ROOT] = "'$'",
[T_THIS] = "'@'",
[T_DOT] = "'.'",
[T_WILDCARD] = "'*'",
+ [T_REGEXP] = "/.../",
[T_BROPEN] = "'['",
[T_BRCLOSE] = "']'",
[T_BOOL] = "Bool",
#include "ast.h"
-extern const char *tokennames[23];
+extern const char *tokennames[25];
struct jp_opcode *
jp_get_token(struct jp_state *s, const char *input, int *mlen);
#include "parser.h"
#include "matcher.h"
+
static struct json_object *
jp_match_next(struct jp_opcode *ptr,
struct json_object *root, struct json_object *cur,
}
}
+static bool
+jp_regmatch(struct jp_opcode *op, struct json_object *root, struct json_object *cur)
+{
+ struct jp_opcode left, right;
+ char lbuf[22], rbuf[22], *lval, *rval;
+ int err, rflags = REG_NOSUB | REG_NEWLINE;
+ regex_t preg;
+
+
+ if (!jp_resolve(root, cur, op->down, &left) ||
+ !jp_resolve(root, cur, op->down->sibling, &right))
+ return false;
+
+ if (left.type == T_REGEXP)
+ {
+ switch (right.type)
+ {
+ case T_BOOL:
+ lval = right.num ? "true" : "false";
+ break;
+
+ case T_NUMBER:
+ snprintf(lbuf, sizeof(lbuf), "%d", right.num);
+ lval = lbuf;
+ break;
+
+ case T_STRING:
+ lval = right.str;
+ break;
+
+ default:
+ return false;
+ }
+
+ rval = left.str;
+ rflags = left.num;
+ }
+ else
+ {
+ switch (left.type)
+ {
+ case T_BOOL:
+ lval = left.num ? "true" : "false";
+ break;
+
+ case T_NUMBER:
+ snprintf(lbuf, sizeof(lbuf), "%d", left.num);
+ lval = lbuf;
+ break;
+
+ case T_STRING:
+ lval = left.str;
+ break;
+
+ default:
+ return false;
+ }
+
+ switch (right.type)
+ {
+ case T_BOOL:
+ rval = right.num ? "true" : "false";
+ break;
+
+ case T_NUMBER:
+ snprintf(rbuf, sizeof(rbuf), "%d", right.num);
+ rval = rbuf;
+ break;
+
+ case T_STRING:
+ rval = right.str;
+ break;
+
+ case T_REGEXP:
+ rval = right.str;
+ rflags = right.num;
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ if (regcomp(&preg, rval, rflags))
+ return false;
+
+ err = regexec(&preg, lval, 0, NULL, 0);
+
+ regfree(&preg);
+
+ return err ? false : true;
+}
+
static bool
jp_expr(struct jp_opcode *op, struct json_object *root, struct json_object *cur,
int idx, const char *key, jp_match_cb_t cb, void *priv)
case T_GE:
return jp_cmp(op, root, cur);
+ case T_MATCH:
+ return jp_regmatch(op, root, cur);
+
case T_ROOT:
return !!jp_match(op, root, NULL, NULL);
#include <string.h>
#include <stdbool.h>
+#include <stdio.h>
+#include <regex.h>
#ifdef JSONC
#include <json.h>
%left T_AND.
%left T_OR.
%left T_UNION.
-%nonassoc T_EQ T_NE T_GT T_GE T_LT T_LE.
+%nonassoc T_EQ T_NE T_GT T_GE T_LT T_LE T_MATCH.
%right T_NOT.
%include {
cmp_exp(A) ::= unary_exp(B) T_GE unary_exp(C). { A = alloc_op(T_GE, 0, NULL, B, C); }
cmp_exp(A) ::= unary_exp(B) T_EQ unary_exp(C). { A = alloc_op(T_EQ, 0, NULL, B, C); }
cmp_exp(A) ::= unary_exp(B) T_NE unary_exp(C). { A = alloc_op(T_NE, 0, NULL, B, C); }
+cmp_exp(A) ::= unary_exp(B) T_MATCH unary_exp(C). { A = alloc_op(T_MATCH, 0, NULL, B, C); }
cmp_exp(A) ::= unary_exp(B). { A = B; }
unary_exp(A) ::= T_BOOL(B). { A = B; }
unary_exp(A) ::= T_NUMBER(B). { A = B; }
unary_exp(A) ::= T_STRING(B). { A = B; }
+unary_exp(A) ::= T_REGEXP(B). { A = B; }
unary_exp(A) ::= T_WILDCARD(B). { A = B; }
unary_exp(A) ::= T_POPEN or_exps(B) T_PCLOSE. { A = B; }
unary_exp(A) ::= T_NOT unary_exp(B). { A = alloc_op(T_NOT, 0, NULL, B); }