blob: 6e25cb9f740e206374d81357b36b0f33f5aa79a4 [file] [log] [blame]
Radek Krejcib1646a92018-11-02 16:08:26 +01001/**
2 * @file xpath.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief YANG XPath evaluation functions
5 *
6 * Copyright (c) 2015 - 2017 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include "common.h"
16
17#include <ctype.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <stdint.h>
21#include <string.h>
22#include <assert.h>
23#include <limits.h>
24#include <errno.h>
25#include <math.h>
26#include <pcre.h>
27
28#include "xpath.h"
29#include "xml.h"
30
31/**
32 * @brief Parse NCName.
33 *
34 * @param[in] ncname Name to parse.
35 *
36 * @return Length of \p ncname valid bytes.
37 */
38static size_t
39parse_ncname(const char *ncname)
40{
41 unsigned int uc;
42 size_t size, len = 0;
43
44 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), 0);
45 if (!is_xmlqnamestartchar(uc) || (uc == ':')) {
46 return len;
47 }
48
49 do {
50 len += size;
Radek Krejci9a564c92019-01-07 14:53:57 +010051 if (!*ncname) {
52 break;
53 }
Radek Krejcib1646a92018-11-02 16:08:26 +010054 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), 0);
55 } while (is_xmlqnamechar(uc) && (uc != ':'));
56
57 return len;
58}
59
60/**
61 * @brief Add \p token into the expression \p exp.
62 *
63 * @param[in] ctx libyang context to log in.
64 * @param[in] exp Expression to use.
65 * @param[in] token Token to add.
66 * @param[in] expr_pos Token position in the XPath expression.
67 * @param[in] tok_len Token length in the XPath expression.
68 * @return LY_ERR value
69 */
70static LY_ERR
71exp_add_token(struct ly_ctx *ctx, struct lyxp_expr *exp, enum lyxp_token token, uint16_t expr_pos, uint16_t tok_len)
72{
73 uint32_t prev;
74
75 if (exp->used == exp->size) {
76 prev = exp->size;
77 exp->size += LYXP_EXPR_SIZE_STEP;
78 if (prev > exp->size) {
79 LOGINT(ctx);
80 return LY_EINT;
81 }
82
83 exp->tokens = ly_realloc(exp->tokens, exp->size * sizeof *exp->tokens);
84 LY_CHECK_ERR_RET(!exp->tokens, LOGMEM(ctx), LY_EMEM);
85 exp->tok_pos = ly_realloc(exp->tok_pos, exp->size * sizeof *exp->tok_pos);
86 LY_CHECK_ERR_RET(!exp->tok_pos, LOGMEM(ctx), LY_EMEM);
87 exp->tok_len = ly_realloc(exp->tok_len, exp->size * sizeof *exp->tok_len);
88 LY_CHECK_ERR_RET(!exp->tok_len, LOGMEM(ctx), LY_EMEM);
89 }
90
91 exp->tokens[exp->used] = token;
92 exp->tok_pos[exp->used] = expr_pos;
93 exp->tok_len[exp->used] = tok_len;
94 ++exp->used;
95 return LY_SUCCESS;
96}
97
98void
99lyxp_expr_free(struct ly_ctx *ctx, struct lyxp_expr *expr)
100{
101 uint16_t i;
102
103 if (!expr) {
104 return;
105 }
106
107 lydict_remove(ctx, expr->expr);
108 free(expr->tokens);
109 free(expr->tok_pos);
110 free(expr->tok_len);
111 if (expr->repeat) {
112 for (i = 0; i < expr->used; ++i) {
113 free(expr->repeat[i]);
114 }
115 }
116 free(expr->repeat);
117 free(expr);
118}
119
120struct lyxp_expr *
121lyxp_expr_parse(struct ly_ctx *ctx, const char *expr)
122{
123 struct lyxp_expr *ret;
124 size_t parsed = 0, tok_len, ncname_len;
125 enum lyxp_token tok_type;
126 int prev_function_check = 0;
127
128 if (strlen(expr) > UINT16_MAX) {
129 LOGERR(ctx, LY_EINVAL, "XPath expression cannot be longer than %ud characters.", UINT16_MAX);
130 return NULL;
131 }
132
133 /* init lyxp_expr structure */
134 ret = calloc(1, sizeof *ret);
135 LY_CHECK_ERR_GOTO(!ret, LOGMEM(ctx), error);
136 ret->expr = lydict_insert(ctx, expr, strlen(expr));
137 LY_CHECK_ERR_GOTO(!ret->expr, LOGMEM(ctx), error);
138 ret->used = 0;
139 ret->size = LYXP_EXPR_SIZE_START;
140 ret->tokens = malloc(ret->size * sizeof *ret->tokens);
141 LY_CHECK_ERR_GOTO(!ret->tokens, LOGMEM(ctx), error);
142
143 ret->tok_pos = malloc(ret->size * sizeof *ret->tok_pos);
144 LY_CHECK_ERR_GOTO(!ret->tok_pos, LOGMEM(ctx), error);
145
146 ret->tok_len = malloc(ret->size * sizeof *ret->tok_len);
147 LY_CHECK_ERR_GOTO(!ret->tok_len, LOGMEM(ctx), error);
148
149 while (is_xmlws(expr[parsed])) {
150 ++parsed;
151 }
152
153 do {
154 if (expr[parsed] == '(') {
155
156 /* '(' */
157 tok_len = 1;
158 tok_type = LYXP_TOKEN_PAR1;
159
160 if (prev_function_check && ret->used && (ret->tokens[ret->used - 1] == LYXP_TOKEN_NAMETEST)) {
161 /* it is a NodeType/FunctionName after all */
162 if (((ret->tok_len[ret->used - 1] == 4)
163 && (!strncmp(&expr[ret->tok_pos[ret->used - 1]], "node", 4)
164 || !strncmp(&expr[ret->tok_pos[ret->used - 1]], "text", 4))) ||
165 ((ret->tok_len[ret->used - 1] == 7)
166 && !strncmp(&expr[ret->tok_pos[ret->used - 1]], "comment", 7))) {
167 ret->tokens[ret->used - 1] = LYXP_TOKEN_NODETYPE;
168 } else {
169 ret->tokens[ret->used - 1] = LYXP_TOKEN_FUNCNAME;
170 }
171 prev_function_check = 0;
172 }
173
174 } else if (expr[parsed] == ')') {
175
176 /* ')' */
177 tok_len = 1;
178 tok_type = LYXP_TOKEN_PAR2;
179
180 } else if (expr[parsed] == '[') {
181
182 /* '[' */
183 tok_len = 1;
184 tok_type = LYXP_TOKEN_BRACK1;
185
186 } else if (expr[parsed] == ']') {
187
188 /* ']' */
189 tok_len = 1;
190 tok_type = LYXP_TOKEN_BRACK2;
191
192 } else if (!strncmp(&expr[parsed], "..", 2)) {
193
194 /* '..' */
195 tok_len = 2;
196 tok_type = LYXP_TOKEN_DDOT;
197
198 } else if ((expr[parsed] == '.') && (!isdigit(expr[parsed + 1]))) {
199
200 /* '.' */
201 tok_len = 1;
202 tok_type = LYXP_TOKEN_DOT;
203
204 } else if (expr[parsed] == '@') {
205
206 /* '@' */
207 tok_len = 1;
208 tok_type = LYXP_TOKEN_AT;
209
210 } else if (expr[parsed] == ',') {
211
212 /* ',' */
213 tok_len = 1;
214 tok_type = LYXP_TOKEN_COMMA;
215
216 } else if (expr[parsed] == '\'') {
217
218 /* Literal with ' */
219 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\''); ++tok_len);
220 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
221 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
222 ++tok_len;
223 tok_type = LYXP_TOKEN_LITERAL;
224
225 } else if (expr[parsed] == '\"') {
226
227 /* Literal with " */
228 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\"'); ++tok_len);
229 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
230 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
231 ++tok_len;
232 tok_type = LYXP_TOKEN_LITERAL;
233
234 } else if ((expr[parsed] == '.') || (isdigit(expr[parsed]))) {
235
236 /* Number */
237 for (tok_len = 0; isdigit(expr[parsed + tok_len]); ++tok_len);
238 if (expr[parsed + tok_len] == '.') {
239 ++tok_len;
240 for (; isdigit(expr[parsed + tok_len]); ++tok_len);
241 }
242 tok_type = LYXP_TOKEN_NUMBER;
243
244 } else if (expr[parsed] == '/') {
245
246 /* Operator '/', '//' */
247 if (!strncmp(&expr[parsed], "//", 2)) {
248 tok_len = 2;
249 } else {
250 tok_len = 1;
251 }
252 tok_type = LYXP_TOKEN_OPERATOR_PATH;
253
254 } else if (!strncmp(&expr[parsed], "!=", 2) || !strncmp(&expr[parsed], "<=", 2)
255 || !strncmp(&expr[parsed], ">=", 2)) {
256
257 /* Operator '!=', '<=', '>=' */
258 tok_len = 2;
259 tok_type = LYXP_TOKEN_OPERATOR_COMP;
260
261 } else if (expr[parsed] == '|') {
262
263 /* Operator '|' */
264 tok_len = 1;
265 tok_type = LYXP_TOKEN_OPERATOR_UNI;
266
267 } else if ((expr[parsed] == '+') || (expr[parsed] == '-')) {
268
269 /* Operator '+', '-' */
270 tok_len = 1;
271 tok_type = LYXP_TOKEN_OPERATOR_MATH;
272
273 } else if ((expr[parsed] == '=') || (expr[parsed] == '<') || (expr[parsed] == '>')) {
274
275 /* Operator '=', '<', '>' */
276 tok_len = 1;
277 tok_type = LYXP_TOKEN_OPERATOR_COMP;
278
279 } else if (ret->used && (ret->tokens[ret->used - 1] != LYXP_TOKEN_AT)
280 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_PAR1)
281 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_BRACK1)
282 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_COMMA)
283 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_LOG)
284 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_COMP)
285 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_MATH)
286 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_UNI)
287 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_PATH)) {
288
289 /* Operator '*', 'or', 'and', 'mod', or 'div' */
290 if (expr[parsed] == '*') {
291 tok_len = 1;
292 tok_type = LYXP_TOKEN_OPERATOR_MATH;
293
294 } else if (!strncmp(&expr[parsed], "or", 2)) {
295 tok_len = 2;
296 tok_type = LYXP_TOKEN_OPERATOR_LOG;
297
298 } else if (!strncmp(&expr[parsed], "and", 3)) {
299 tok_len = 3;
300 tok_type = LYXP_TOKEN_OPERATOR_LOG;
301
302 } else if (!strncmp(&expr[parsed], "mod", 3) || !strncmp(&expr[parsed], "div", 3)) {
303 tok_len = 3;
304 tok_type = LYXP_TOKEN_OPERATOR_MATH;
305
306 } else if (prev_function_check) {
307 LOGVAL(ctx, LY_VLOG_NONE, NULL, LYVE_XPATH, "Invalid character 0x%x, perhaps \"%.*s\" is supposed to be a function call.",
308 expr[parsed], &expr[parsed], ret->tok_len[ret->used - 1], &ret->expr[ret->tok_pos[ret->used - 1]]);
309 goto error;
310 } else {
311 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, expr[parsed], &expr[parsed]);
312 goto error;
313 }
314 } else if (expr[parsed] == '*') {
315
316 /* NameTest '*' */
317 tok_len = 1;
318 tok_type = LYXP_TOKEN_NAMETEST;
319
320 } else {
321
322 /* NameTest (NCName ':' '*' | QName) or NodeType/FunctionName */
323 ncname_len = parse_ncname(&expr[parsed]);
324 LY_CHECK_ERR_GOTO(!ncname_len, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, expr[parsed], &expr[parsed]), error);
325 tok_len = ncname_len;
326
327 if (expr[parsed + tok_len] == ':') {
328 ++tok_len;
329 if (expr[parsed + tok_len] == '*') {
330 ++tok_len;
331 } else {
332 ncname_len = parse_ncname(&expr[parsed + tok_len]);
333 LY_CHECK_ERR_GOTO(!ncname_len, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, expr[parsed], &expr[parsed]), error);
334 tok_len += ncname_len;
335 }
336 /* remove old flag to prevent ambiguities */
337 prev_function_check = 0;
338 tok_type = LYXP_TOKEN_NAMETEST;
339 } else {
340 /* there is no prefix so it can still be NodeType/FunctionName, we can't finally decide now */
341 prev_function_check = 1;
342 tok_type = LYXP_TOKEN_NAMETEST;
343 }
344 }
345
346 /* store the token, move on to the next one */
347 LY_CHECK_GOTO(exp_add_token(ctx, ret, tok_type, parsed, tok_len), error);
348 parsed += tok_len;
349 while (is_xmlws(expr[parsed])) {
350 ++parsed;
351 }
352
353 } while (expr[parsed]);
354
355 /* prealloc repeat */
356 ret->repeat = calloc(ret->size, sizeof *ret->repeat);
357 LY_CHECK_ERR_GOTO(!ret->repeat, LOGMEM(ctx), error);
358
359 return ret;
360
361error:
362 lyxp_expr_free(ctx, ret);
363 return NULL;
364}
365