From afa3a10096e6d3ad50dc9d8250f40d8f23a9ad42 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 18 Jun 2014 17:59:51 +0200 Subject: [PATCH] Improve error reporting Keep track of the exact location the error occured in and switch the internal error description from a dynamic string buffer to an integer which either holds negative values for lexer errors or positives values for grammer violations. In case of grammer violations the "error_code" member will hold a bitfield describing the expected tokens. Also rework the error messages emitted by the cli to be more precise. Examples: $ jsonfilter -s '{}' -e '@.foo bar' Syntax error: Expecting End of file In expression @.foo bar Near here ----------^ $ jsonfilter -s '{}' -e '@.foo\bar' Syntax error: Unexpected character In expression @.foo\bar Near here ---------^ $ jsonfilter -s '{}' -e '@.foo..bar' Syntax error: Expecting Label or '*' In expression @.foo..bar Near here ----------^ Signed-off-by: Jo-Philipp Wich --- ast.c | 14 +++---------- ast.h | 4 ++-- lexer.c | 58 +++++++++++++++++++++++++++++++++++++++++++-------- main.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++----- parser.y | 30 ++++----------------------- 5 files changed, 116 insertions(+), 53 deletions(-) diff --git a/ast.c b/ast.c index 5093e8b..6db7a46 100644 --- a/ast.c +++ b/ast.c @@ -73,9 +73,6 @@ jp_free(struct jp_state *s) op = tmp; } - if (s->error) - free(s->error); - free(s); } @@ -101,18 +98,11 @@ jp_parse(const char *expr) while (len > 0) { - s->off = (ptr - expr); - op = jp_get_token(s, ptr, &mlen); if (mlen < 0) { - s->erroff = s->off; - s->error = strdup((mlen == -3) ? "String too long" : - (mlen == -2) ? "Invalid escape sequence" : - (mlen == -1) ? "Unterminated string" : - "Unknown error"); - + s->error_code = mlen; goto out; } @@ -121,6 +111,8 @@ jp_parse(const char *expr) len -= mlen; ptr += mlen; + + s->off += mlen; } Parse(pParser, 0, NULL, s); diff --git a/ast.h b/ast.h index c2d6f2b..ec1f55c 100644 --- a/ast.h +++ b/ast.h @@ -31,8 +31,8 @@ struct jp_opcode { struct jp_state { struct jp_opcode *pool; struct jp_opcode *path; - char *error; - int erroff; + int error_pos; + int error_code; int off; }; diff --git a/lexer.c b/lexer.c index 3703d56..b1615ad 100644 --- a/lexer.c +++ b/lexer.c @@ -28,7 +28,7 @@ struct token { int type; const char *pat; int plen; - int (*parse)(const char *buf, struct jp_opcode *op); + int (*parse)(const char *buf, struct jp_opcode *op, struct jp_state *s); }; #define dec(o) \ @@ -106,7 +106,7 @@ utf8enc(char **out, int *rem, int code) */ static int -parse_string(const char *buf, struct jp_opcode *op) +parse_string(const char *buf, struct jp_opcode *op, struct jp_state *s) { char q = *(buf++); char str[128] = { 0 }; @@ -132,12 +132,16 @@ parse_string(const char *buf, struct jp_opcode *op) hex(in[2]) * 16 * 16 + hex(in[3]) * 16 + hex(in[4]))) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 5; } else { + s->error_pos = s->off + (in - buf); return -2; } } @@ -148,12 +152,16 @@ parse_string(const char *buf, struct jp_opcode *op) if (isxdigit(in[1]) && isxdigit(in[2])) { if (!utf8enc(&out, &rem, hex(in[1]) * 16 + hex(in[2]))) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 3; } else { + s->error_pos = s->off + (in - buf); return -2; } } @@ -170,10 +178,16 @@ parse_string(const char *buf, struct jp_opcode *op) dec(in[2]); if (code > 255) + { + s->error_pos = s->off + (in - buf); return -2; + } if (!utf8enc(&out, &rem, code)) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 3; } @@ -182,7 +196,10 @@ parse_string(const char *buf, struct jp_opcode *op) else if (in[1] >= '0' && in[1] <= '7') { if (!utf8enc(&out, &rem, dec(in[0]) * 8 + dec(in[1]))) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 2; } @@ -191,7 +208,10 @@ parse_string(const char *buf, struct jp_opcode *op) else { if (!utf8enc(&out, &rem, dec(in[0]))) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 1; } @@ -201,7 +221,10 @@ parse_string(const char *buf, struct jp_opcode *op) else { if (rem-- < 1) + { + s->error_pos = s->off + (in - buf); return -3; + } switch (in[0]) { @@ -241,7 +264,10 @@ parse_string(const char *buf, struct jp_opcode *op) else { if (rem-- < 1) + { + s->error_pos = s->off + (in - buf); return -3; + } *out++ = *in++; } @@ -262,7 +288,7 @@ parse_string(const char *buf, struct jp_opcode *op) */ static int -parse_label(const char *buf, struct jp_opcode *op) +parse_label(const char *buf, struct jp_opcode *op, struct jp_state *s) { char str[128] = { 0 }; char *out = str; @@ -272,7 +298,10 @@ parse_label(const char *buf, struct jp_opcode *op) while (*in == '_' || isalnum(*in)) { if (rem-- < 1) + { + s->error_pos = s->off + (in - buf); return -3; + } *out++ = *in++; } @@ -302,13 +331,16 @@ parse_label(const char *buf, struct jp_opcode *op) */ static int -parse_number(const char *buf, struct jp_opcode *op) +parse_number(const char *buf, struct jp_opcode *op, struct jp_state *s) { char *e; int n = strtol(buf, &e, 10); if (e == buf) + { + s->error_pos = s->off; return -2; + } op->num = n; @@ -374,7 +406,7 @@ const char *tokennames[23] = { static int -match_token(const char *ptr, struct jp_opcode *op) +match_token(const char *ptr, struct jp_opcode *op, struct jp_state *s) { int i; const struct token *tok; @@ -389,13 +421,14 @@ match_token(const char *ptr, struct jp_opcode *op) op->type = tok->type; if (tok->parse) - return tok->parse(ptr, op); + return tok->parse(ptr, op, s); return tok->plen; } } - return -1; + s->error_pos = s->off; + return -4; } struct jp_opcode * @@ -403,10 +436,17 @@ jp_get_token(struct jp_state *s, const char *input, int *mlen) { struct jp_opcode op = { 0 }; - *mlen = match_token(input, &op); + *mlen = match_token(input, &op, s); - if (*mlen < 0 || op.type == 0) + if (*mlen < 0) + { + s->error_code = *mlen; return NULL; + } + else if (op.type == 0) + { + return NULL; + } return jp_alloc_op(s, op.type, op.num, op.str, NULL); } diff --git a/main.c b/main.c index 3166746..62e38e9 100644 --- a/main.c +++ b/main.c @@ -250,6 +250,57 @@ match_cb(struct json_object *res, void *priv) } } +static void +print_error(struct jp_state *state, char *expr) +{ + int i; + bool first = true; + + fprintf(stderr, "Syntax error: "); + + switch (state->error_code) + { + case -4: + fprintf(stderr, "Unexpected character\n"); + break; + + case -3: + fprintf(stderr, "String or label literal too long\n"); + break; + + case -2: + fprintf(stderr, "Invalid escape sequence\n"); + break; + + case -1: + fprintf(stderr, "Unterminated string\n"); + break; + + default: + for (i = 0; i < sizeof(state->error_code) * 8; i++) + { + if (state->error_code & (1 << i)) + { + fprintf(stderr, + first ? "Expecting %s" : " or %s", tokennames[i]); + + first = false; + } + } + + fprintf(stderr, "\n"); + break; + } + + fprintf(stderr, "In expression %s\n", expr); + fprintf(stderr, "Near here ----"); + + for (i = 0; i < state->error_pos; i++) + fprintf(stderr, "-"); + + fprintf(stderr, "^\n"); +} + static bool filter_json(int opt, struct json_object *jsobj, char *expr) { @@ -261,12 +312,14 @@ filter_json(int opt, struct json_object *jsobj, char *expr) state = jp_parse(expr); - if (!state || state->error) + if (!state) { - fprintf(stderr, "Syntax error near {%s}: %s\n", - state ? expr + state->erroff : expr, - state ? state->error : "Out of memory"); - + fprintf(stderr, "Out of memory\n"); + goto out; + } + else if (state->error_code) + { + print_error(state, expr); goto out; } diff --git a/parser.y b/parser.y index 9c06c07..f77779b 100644 --- a/parser.y +++ b/parser.y @@ -39,35 +39,13 @@ } %syntax_error { - int n = sizeof(tokennames) / sizeof(tokennames[0]); - int l = strlen("Expecting "); - int c = 0; int i; - for (i = 0; i < n; i++) - { + for (i = 0; i < sizeof(tokennames) / sizeof(tokennames[0]); i++) if (yy_find_shift_action(yypParser, (YYCODETYPE)i) < YYNSTATE + YYNRULE) - l += strlen(tokennames[i]) + 4; - } - - s->error = malloc(l); - - if (s->error) - { - s->erroff = s->off; - strcpy(s->error, "Expecting "); - - for (i = 0; i < n; i++) - { - if (yy_find_shift_action(yypParser, (YYCODETYPE)i) < YYNSTATE + YYNRULE) - { - if (c++) - strcat(s->error, " or "); - - strcat(s->error, tokennames[i]); - } - } - } + s->error_code |= (1 << i); + + s->error_pos = s->off; } -- 2.30.2