blob: 94cb7344f9f425dcaf4ed70136a3372d5346c6b1 [file] [log] [blame]
Radek Krejcib1646a92018-11-02 16:08:26 +01001/**
2 * @file xpath.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief YANG XPath evaluation functions
5 *
6 * Copyright (c) 2015 - 2017 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include "common.h"
16
17#include <ctype.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <stdint.h>
21#include <string.h>
22#include <assert.h>
23#include <limits.h>
24#include <errno.h>
25#include <math.h>
26#include <pcre.h>
27
28#include "xpath.h"
29#include "xml.h"
30
31/**
32 * @brief Parse NCName.
33 *
34 * @param[in] ncname Name to parse.
35 *
36 * @return Length of \p ncname valid bytes.
37 */
Radek Krejcid4270262019-01-07 15:07:25 +010038static long int
Radek Krejcib1646a92018-11-02 16:08:26 +010039parse_ncname(const char *ncname)
40{
41 unsigned int uc;
Radek Krejcid4270262019-01-07 15:07:25 +010042 size_t size;
43 long int len = 0;
Radek Krejcib1646a92018-11-02 16:08:26 +010044
45 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), 0);
46 if (!is_xmlqnamestartchar(uc) || (uc == ':')) {
47 return len;
48 }
49
50 do {
51 len += size;
Radek Krejci9a564c92019-01-07 14:53:57 +010052 if (!*ncname) {
53 break;
54 }
Radek Krejcid4270262019-01-07 15:07:25 +010055 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), -len);
Radek Krejcib1646a92018-11-02 16:08:26 +010056 } while (is_xmlqnamechar(uc) && (uc != ':'));
57
58 return len;
59}
60
61/**
62 * @brief Add \p token into the expression \p exp.
63 *
64 * @param[in] ctx libyang context to log in.
65 * @param[in] exp Expression to use.
66 * @param[in] token Token to add.
67 * @param[in] expr_pos Token position in the XPath expression.
68 * @param[in] tok_len Token length in the XPath expression.
69 * @return LY_ERR value
70 */
71static LY_ERR
72exp_add_token(struct ly_ctx *ctx, struct lyxp_expr *exp, enum lyxp_token token, uint16_t expr_pos, uint16_t tok_len)
73{
74 uint32_t prev;
75
76 if (exp->used == exp->size) {
77 prev = exp->size;
78 exp->size += LYXP_EXPR_SIZE_STEP;
79 if (prev > exp->size) {
80 LOGINT(ctx);
81 return LY_EINT;
82 }
83
84 exp->tokens = ly_realloc(exp->tokens, exp->size * sizeof *exp->tokens);
85 LY_CHECK_ERR_RET(!exp->tokens, LOGMEM(ctx), LY_EMEM);
86 exp->tok_pos = ly_realloc(exp->tok_pos, exp->size * sizeof *exp->tok_pos);
87 LY_CHECK_ERR_RET(!exp->tok_pos, LOGMEM(ctx), LY_EMEM);
88 exp->tok_len = ly_realloc(exp->tok_len, exp->size * sizeof *exp->tok_len);
89 LY_CHECK_ERR_RET(!exp->tok_len, LOGMEM(ctx), LY_EMEM);
90 }
91
92 exp->tokens[exp->used] = token;
93 exp->tok_pos[exp->used] = expr_pos;
94 exp->tok_len[exp->used] = tok_len;
95 ++exp->used;
96 return LY_SUCCESS;
97}
98
99void
100lyxp_expr_free(struct ly_ctx *ctx, struct lyxp_expr *expr)
101{
102 uint16_t i;
103
104 if (!expr) {
105 return;
106 }
107
108 lydict_remove(ctx, expr->expr);
109 free(expr->tokens);
110 free(expr->tok_pos);
111 free(expr->tok_len);
112 if (expr->repeat) {
113 for (i = 0; i < expr->used; ++i) {
114 free(expr->repeat[i]);
115 }
116 }
117 free(expr->repeat);
118 free(expr);
119}
120
121struct lyxp_expr *
122lyxp_expr_parse(struct ly_ctx *ctx, const char *expr)
123{
124 struct lyxp_expr *ret;
Radek Krejcid4270262019-01-07 15:07:25 +0100125 size_t parsed = 0, tok_len;
126 long int ncname_len;
Radek Krejcib1646a92018-11-02 16:08:26 +0100127 enum lyxp_token tok_type;
128 int prev_function_check = 0;
129
130 if (strlen(expr) > UINT16_MAX) {
131 LOGERR(ctx, LY_EINVAL, "XPath expression cannot be longer than %ud characters.", UINT16_MAX);
132 return NULL;
133 }
134
135 /* init lyxp_expr structure */
136 ret = calloc(1, sizeof *ret);
137 LY_CHECK_ERR_GOTO(!ret, LOGMEM(ctx), error);
138 ret->expr = lydict_insert(ctx, expr, strlen(expr));
139 LY_CHECK_ERR_GOTO(!ret->expr, LOGMEM(ctx), error);
140 ret->used = 0;
141 ret->size = LYXP_EXPR_SIZE_START;
142 ret->tokens = malloc(ret->size * sizeof *ret->tokens);
143 LY_CHECK_ERR_GOTO(!ret->tokens, LOGMEM(ctx), error);
144
145 ret->tok_pos = malloc(ret->size * sizeof *ret->tok_pos);
146 LY_CHECK_ERR_GOTO(!ret->tok_pos, LOGMEM(ctx), error);
147
148 ret->tok_len = malloc(ret->size * sizeof *ret->tok_len);
149 LY_CHECK_ERR_GOTO(!ret->tok_len, LOGMEM(ctx), error);
150
151 while (is_xmlws(expr[parsed])) {
152 ++parsed;
153 }
154
155 do {
156 if (expr[parsed] == '(') {
157
158 /* '(' */
159 tok_len = 1;
160 tok_type = LYXP_TOKEN_PAR1;
161
162 if (prev_function_check && ret->used && (ret->tokens[ret->used - 1] == LYXP_TOKEN_NAMETEST)) {
163 /* it is a NodeType/FunctionName after all */
164 if (((ret->tok_len[ret->used - 1] == 4)
165 && (!strncmp(&expr[ret->tok_pos[ret->used - 1]], "node", 4)
166 || !strncmp(&expr[ret->tok_pos[ret->used - 1]], "text", 4))) ||
167 ((ret->tok_len[ret->used - 1] == 7)
168 && !strncmp(&expr[ret->tok_pos[ret->used - 1]], "comment", 7))) {
169 ret->tokens[ret->used - 1] = LYXP_TOKEN_NODETYPE;
170 } else {
171 ret->tokens[ret->used - 1] = LYXP_TOKEN_FUNCNAME;
172 }
173 prev_function_check = 0;
174 }
175
176 } else if (expr[parsed] == ')') {
177
178 /* ')' */
179 tok_len = 1;
180 tok_type = LYXP_TOKEN_PAR2;
181
182 } else if (expr[parsed] == '[') {
183
184 /* '[' */
185 tok_len = 1;
186 tok_type = LYXP_TOKEN_BRACK1;
187
188 } else if (expr[parsed] == ']') {
189
190 /* ']' */
191 tok_len = 1;
192 tok_type = LYXP_TOKEN_BRACK2;
193
194 } else if (!strncmp(&expr[parsed], "..", 2)) {
195
196 /* '..' */
197 tok_len = 2;
198 tok_type = LYXP_TOKEN_DDOT;
199
200 } else if ((expr[parsed] == '.') && (!isdigit(expr[parsed + 1]))) {
201
202 /* '.' */
203 tok_len = 1;
204 tok_type = LYXP_TOKEN_DOT;
205
206 } else if (expr[parsed] == '@') {
207
208 /* '@' */
209 tok_len = 1;
210 tok_type = LYXP_TOKEN_AT;
211
212 } else if (expr[parsed] == ',') {
213
214 /* ',' */
215 tok_len = 1;
216 tok_type = LYXP_TOKEN_COMMA;
217
218 } else if (expr[parsed] == '\'') {
219
220 /* Literal with ' */
221 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\''); ++tok_len);
222 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
223 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
224 ++tok_len;
225 tok_type = LYXP_TOKEN_LITERAL;
226
227 } else if (expr[parsed] == '\"') {
228
229 /* Literal with " */
230 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\"'); ++tok_len);
231 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
232 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
233 ++tok_len;
234 tok_type = LYXP_TOKEN_LITERAL;
235
236 } else if ((expr[parsed] == '.') || (isdigit(expr[parsed]))) {
237
238 /* Number */
239 for (tok_len = 0; isdigit(expr[parsed + tok_len]); ++tok_len);
240 if (expr[parsed + tok_len] == '.') {
241 ++tok_len;
242 for (; isdigit(expr[parsed + tok_len]); ++tok_len);
243 }
244 tok_type = LYXP_TOKEN_NUMBER;
245
246 } else if (expr[parsed] == '/') {
247
248 /* Operator '/', '//' */
249 if (!strncmp(&expr[parsed], "//", 2)) {
250 tok_len = 2;
251 } else {
252 tok_len = 1;
253 }
254 tok_type = LYXP_TOKEN_OPERATOR_PATH;
255
256 } else if (!strncmp(&expr[parsed], "!=", 2) || !strncmp(&expr[parsed], "<=", 2)
257 || !strncmp(&expr[parsed], ">=", 2)) {
258
259 /* Operator '!=', '<=', '>=' */
260 tok_len = 2;
261 tok_type = LYXP_TOKEN_OPERATOR_COMP;
262
263 } else if (expr[parsed] == '|') {
264
265 /* Operator '|' */
266 tok_len = 1;
267 tok_type = LYXP_TOKEN_OPERATOR_UNI;
268
269 } else if ((expr[parsed] == '+') || (expr[parsed] == '-')) {
270
271 /* Operator '+', '-' */
272 tok_len = 1;
273 tok_type = LYXP_TOKEN_OPERATOR_MATH;
274
275 } else if ((expr[parsed] == '=') || (expr[parsed] == '<') || (expr[parsed] == '>')) {
276
277 /* Operator '=', '<', '>' */
278 tok_len = 1;
279 tok_type = LYXP_TOKEN_OPERATOR_COMP;
280
281 } else if (ret->used && (ret->tokens[ret->used - 1] != LYXP_TOKEN_AT)
282 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_PAR1)
283 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_BRACK1)
284 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_COMMA)
285 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_LOG)
286 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_COMP)
287 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_MATH)
288 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_UNI)
289 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_PATH)) {
290
291 /* Operator '*', 'or', 'and', 'mod', or 'div' */
292 if (expr[parsed] == '*') {
293 tok_len = 1;
294 tok_type = LYXP_TOKEN_OPERATOR_MATH;
295
296 } else if (!strncmp(&expr[parsed], "or", 2)) {
297 tok_len = 2;
298 tok_type = LYXP_TOKEN_OPERATOR_LOG;
299
300 } else if (!strncmp(&expr[parsed], "and", 3)) {
301 tok_len = 3;
302 tok_type = LYXP_TOKEN_OPERATOR_LOG;
303
304 } else if (!strncmp(&expr[parsed], "mod", 3) || !strncmp(&expr[parsed], "div", 3)) {
305 tok_len = 3;
306 tok_type = LYXP_TOKEN_OPERATOR_MATH;
307
308 } else if (prev_function_check) {
309 LOGVAL(ctx, LY_VLOG_NONE, NULL, LYVE_XPATH, "Invalid character 0x%x, perhaps \"%.*s\" is supposed to be a function call.",
310 expr[parsed], &expr[parsed], ret->tok_len[ret->used - 1], &ret->expr[ret->tok_pos[ret->used - 1]]);
311 goto error;
312 } else {
Radek Krejcid4270262019-01-07 15:07:25 +0100313 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, parsed + 1, expr);
Radek Krejcib1646a92018-11-02 16:08:26 +0100314 goto error;
315 }
316 } else if (expr[parsed] == '*') {
317
318 /* NameTest '*' */
319 tok_len = 1;
320 tok_type = LYXP_TOKEN_NAMETEST;
321
322 } else {
323
324 /* NameTest (NCName ':' '*' | QName) or NodeType/FunctionName */
325 ncname_len = parse_ncname(&expr[parsed]);
Radek Krejcid4270262019-01-07 15:07:25 +0100326 LY_CHECK_ERR_GOTO(ncname_len < 0, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, parsed - ncname_len + 1, expr), error);
Radek Krejcib1646a92018-11-02 16:08:26 +0100327 tok_len = ncname_len;
328
329 if (expr[parsed + tok_len] == ':') {
330 ++tok_len;
331 if (expr[parsed + tok_len] == '*') {
332 ++tok_len;
333 } else {
334 ncname_len = parse_ncname(&expr[parsed + tok_len]);
Radek Krejcid4270262019-01-07 15:07:25 +0100335 LY_CHECK_ERR_GOTO(ncname_len < 0, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, parsed - ncname_len + 1, expr), error);
Radek Krejcib1646a92018-11-02 16:08:26 +0100336 tok_len += ncname_len;
337 }
338 /* remove old flag to prevent ambiguities */
339 prev_function_check = 0;
340 tok_type = LYXP_TOKEN_NAMETEST;
341 } else {
342 /* there is no prefix so it can still be NodeType/FunctionName, we can't finally decide now */
343 prev_function_check = 1;
344 tok_type = LYXP_TOKEN_NAMETEST;
345 }
346 }
347
348 /* store the token, move on to the next one */
349 LY_CHECK_GOTO(exp_add_token(ctx, ret, tok_type, parsed, tok_len), error);
350 parsed += tok_len;
351 while (is_xmlws(expr[parsed])) {
352 ++parsed;
353 }
354
355 } while (expr[parsed]);
356
357 /* prealloc repeat */
358 ret->repeat = calloc(ret->size, sizeof *ret->repeat);
359 LY_CHECK_ERR_GOTO(!ret->repeat, LOGMEM(ctx), error);
360
361 return ret;
362
363error:
364 lyxp_expr_free(ctx, ret);
365 return NULL;
366}
367