blob: 4a3792db2af77f7b641b769b0ec99f3448f07c22 [file] [log] [blame]
Radek Krejcib1646a92018-11-02 16:08:26 +01001/**
2 * @file xpath.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief YANG XPath evaluation functions
5 *
6 * Copyright (c) 2015 - 2017 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include "common.h"
16
17#include <ctype.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <stdint.h>
21#include <string.h>
22#include <assert.h>
23#include <limits.h>
24#include <errno.h>
25#include <math.h>
26#include <pcre.h>
27
28#include "xpath.h"
29#include "xml.h"
30
31/**
32 * @brief Parse NCName.
33 *
34 * @param[in] ncname Name to parse.
35 *
36 * @return Length of \p ncname valid bytes.
37 */
38static size_t
39parse_ncname(const char *ncname)
40{
41 unsigned int uc;
42 size_t size, len = 0;
43
44 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), 0);
45 if (!is_xmlqnamestartchar(uc) || (uc == ':')) {
46 return len;
47 }
48
49 do {
50 len += size;
51 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), 0);
52 } while (is_xmlqnamechar(uc) && (uc != ':'));
53
54 return len;
55}
56
57/**
58 * @brief Add \p token into the expression \p exp.
59 *
60 * @param[in] ctx libyang context to log in.
61 * @param[in] exp Expression to use.
62 * @param[in] token Token to add.
63 * @param[in] expr_pos Token position in the XPath expression.
64 * @param[in] tok_len Token length in the XPath expression.
65 * @return LY_ERR value
66 */
67static LY_ERR
68exp_add_token(struct ly_ctx *ctx, struct lyxp_expr *exp, enum lyxp_token token, uint16_t expr_pos, uint16_t tok_len)
69{
70 uint32_t prev;
71
72 if (exp->used == exp->size) {
73 prev = exp->size;
74 exp->size += LYXP_EXPR_SIZE_STEP;
75 if (prev > exp->size) {
76 LOGINT(ctx);
77 return LY_EINT;
78 }
79
80 exp->tokens = ly_realloc(exp->tokens, exp->size * sizeof *exp->tokens);
81 LY_CHECK_ERR_RET(!exp->tokens, LOGMEM(ctx), LY_EMEM);
82 exp->tok_pos = ly_realloc(exp->tok_pos, exp->size * sizeof *exp->tok_pos);
83 LY_CHECK_ERR_RET(!exp->tok_pos, LOGMEM(ctx), LY_EMEM);
84 exp->tok_len = ly_realloc(exp->tok_len, exp->size * sizeof *exp->tok_len);
85 LY_CHECK_ERR_RET(!exp->tok_len, LOGMEM(ctx), LY_EMEM);
86 }
87
88 exp->tokens[exp->used] = token;
89 exp->tok_pos[exp->used] = expr_pos;
90 exp->tok_len[exp->used] = tok_len;
91 ++exp->used;
92 return LY_SUCCESS;
93}
94
95void
96lyxp_expr_free(struct ly_ctx *ctx, struct lyxp_expr *expr)
97{
98 uint16_t i;
99
100 if (!expr) {
101 return;
102 }
103
104 lydict_remove(ctx, expr->expr);
105 free(expr->tokens);
106 free(expr->tok_pos);
107 free(expr->tok_len);
108 if (expr->repeat) {
109 for (i = 0; i < expr->used; ++i) {
110 free(expr->repeat[i]);
111 }
112 }
113 free(expr->repeat);
114 free(expr);
115}
116
117struct lyxp_expr *
118lyxp_expr_parse(struct ly_ctx *ctx, const char *expr)
119{
120 struct lyxp_expr *ret;
121 size_t parsed = 0, tok_len, ncname_len;
122 enum lyxp_token tok_type;
123 int prev_function_check = 0;
124
125 if (strlen(expr) > UINT16_MAX) {
126 LOGERR(ctx, LY_EINVAL, "XPath expression cannot be longer than %ud characters.", UINT16_MAX);
127 return NULL;
128 }
129
130 /* init lyxp_expr structure */
131 ret = calloc(1, sizeof *ret);
132 LY_CHECK_ERR_GOTO(!ret, LOGMEM(ctx), error);
133 ret->expr = lydict_insert(ctx, expr, strlen(expr));
134 LY_CHECK_ERR_GOTO(!ret->expr, LOGMEM(ctx), error);
135 ret->used = 0;
136 ret->size = LYXP_EXPR_SIZE_START;
137 ret->tokens = malloc(ret->size * sizeof *ret->tokens);
138 LY_CHECK_ERR_GOTO(!ret->tokens, LOGMEM(ctx), error);
139
140 ret->tok_pos = malloc(ret->size * sizeof *ret->tok_pos);
141 LY_CHECK_ERR_GOTO(!ret->tok_pos, LOGMEM(ctx), error);
142
143 ret->tok_len = malloc(ret->size * sizeof *ret->tok_len);
144 LY_CHECK_ERR_GOTO(!ret->tok_len, LOGMEM(ctx), error);
145
146 while (is_xmlws(expr[parsed])) {
147 ++parsed;
148 }
149
150 do {
151 if (expr[parsed] == '(') {
152
153 /* '(' */
154 tok_len = 1;
155 tok_type = LYXP_TOKEN_PAR1;
156
157 if (prev_function_check && ret->used && (ret->tokens[ret->used - 1] == LYXP_TOKEN_NAMETEST)) {
158 /* it is a NodeType/FunctionName after all */
159 if (((ret->tok_len[ret->used - 1] == 4)
160 && (!strncmp(&expr[ret->tok_pos[ret->used - 1]], "node", 4)
161 || !strncmp(&expr[ret->tok_pos[ret->used - 1]], "text", 4))) ||
162 ((ret->tok_len[ret->used - 1] == 7)
163 && !strncmp(&expr[ret->tok_pos[ret->used - 1]], "comment", 7))) {
164 ret->tokens[ret->used - 1] = LYXP_TOKEN_NODETYPE;
165 } else {
166 ret->tokens[ret->used - 1] = LYXP_TOKEN_FUNCNAME;
167 }
168 prev_function_check = 0;
169 }
170
171 } else if (expr[parsed] == ')') {
172
173 /* ')' */
174 tok_len = 1;
175 tok_type = LYXP_TOKEN_PAR2;
176
177 } else if (expr[parsed] == '[') {
178
179 /* '[' */
180 tok_len = 1;
181 tok_type = LYXP_TOKEN_BRACK1;
182
183 } else if (expr[parsed] == ']') {
184
185 /* ']' */
186 tok_len = 1;
187 tok_type = LYXP_TOKEN_BRACK2;
188
189 } else if (!strncmp(&expr[parsed], "..", 2)) {
190
191 /* '..' */
192 tok_len = 2;
193 tok_type = LYXP_TOKEN_DDOT;
194
195 } else if ((expr[parsed] == '.') && (!isdigit(expr[parsed + 1]))) {
196
197 /* '.' */
198 tok_len = 1;
199 tok_type = LYXP_TOKEN_DOT;
200
201 } else if (expr[parsed] == '@') {
202
203 /* '@' */
204 tok_len = 1;
205 tok_type = LYXP_TOKEN_AT;
206
207 } else if (expr[parsed] == ',') {
208
209 /* ',' */
210 tok_len = 1;
211 tok_type = LYXP_TOKEN_COMMA;
212
213 } else if (expr[parsed] == '\'') {
214
215 /* Literal with ' */
216 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\''); ++tok_len);
217 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
218 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
219 ++tok_len;
220 tok_type = LYXP_TOKEN_LITERAL;
221
222 } else if (expr[parsed] == '\"') {
223
224 /* Literal with " */
225 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\"'); ++tok_len);
226 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
227 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
228 ++tok_len;
229 tok_type = LYXP_TOKEN_LITERAL;
230
231 } else if ((expr[parsed] == '.') || (isdigit(expr[parsed]))) {
232
233 /* Number */
234 for (tok_len = 0; isdigit(expr[parsed + tok_len]); ++tok_len);
235 if (expr[parsed + tok_len] == '.') {
236 ++tok_len;
237 for (; isdigit(expr[parsed + tok_len]); ++tok_len);
238 }
239 tok_type = LYXP_TOKEN_NUMBER;
240
241 } else if (expr[parsed] == '/') {
242
243 /* Operator '/', '//' */
244 if (!strncmp(&expr[parsed], "//", 2)) {
245 tok_len = 2;
246 } else {
247 tok_len = 1;
248 }
249 tok_type = LYXP_TOKEN_OPERATOR_PATH;
250
251 } else if (!strncmp(&expr[parsed], "!=", 2) || !strncmp(&expr[parsed], "<=", 2)
252 || !strncmp(&expr[parsed], ">=", 2)) {
253
254 /* Operator '!=', '<=', '>=' */
255 tok_len = 2;
256 tok_type = LYXP_TOKEN_OPERATOR_COMP;
257
258 } else if (expr[parsed] == '|') {
259
260 /* Operator '|' */
261 tok_len = 1;
262 tok_type = LYXP_TOKEN_OPERATOR_UNI;
263
264 } else if ((expr[parsed] == '+') || (expr[parsed] == '-')) {
265
266 /* Operator '+', '-' */
267 tok_len = 1;
268 tok_type = LYXP_TOKEN_OPERATOR_MATH;
269
270 } else if ((expr[parsed] == '=') || (expr[parsed] == '<') || (expr[parsed] == '>')) {
271
272 /* Operator '=', '<', '>' */
273 tok_len = 1;
274 tok_type = LYXP_TOKEN_OPERATOR_COMP;
275
276 } else if (ret->used && (ret->tokens[ret->used - 1] != LYXP_TOKEN_AT)
277 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_PAR1)
278 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_BRACK1)
279 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_COMMA)
280 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_LOG)
281 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_COMP)
282 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_MATH)
283 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_UNI)
284 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_PATH)) {
285
286 /* Operator '*', 'or', 'and', 'mod', or 'div' */
287 if (expr[parsed] == '*') {
288 tok_len = 1;
289 tok_type = LYXP_TOKEN_OPERATOR_MATH;
290
291 } else if (!strncmp(&expr[parsed], "or", 2)) {
292 tok_len = 2;
293 tok_type = LYXP_TOKEN_OPERATOR_LOG;
294
295 } else if (!strncmp(&expr[parsed], "and", 3)) {
296 tok_len = 3;
297 tok_type = LYXP_TOKEN_OPERATOR_LOG;
298
299 } else if (!strncmp(&expr[parsed], "mod", 3) || !strncmp(&expr[parsed], "div", 3)) {
300 tok_len = 3;
301 tok_type = LYXP_TOKEN_OPERATOR_MATH;
302
303 } else if (prev_function_check) {
304 LOGVAL(ctx, LY_VLOG_NONE, NULL, LYVE_XPATH, "Invalid character 0x%x, perhaps \"%.*s\" is supposed to be a function call.",
305 expr[parsed], &expr[parsed], ret->tok_len[ret->used - 1], &ret->expr[ret->tok_pos[ret->used - 1]]);
306 goto error;
307 } else {
308 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, expr[parsed], &expr[parsed]);
309 goto error;
310 }
311 } else if (expr[parsed] == '*') {
312
313 /* NameTest '*' */
314 tok_len = 1;
315 tok_type = LYXP_TOKEN_NAMETEST;
316
317 } else {
318
319 /* NameTest (NCName ':' '*' | QName) or NodeType/FunctionName */
320 ncname_len = parse_ncname(&expr[parsed]);
321 LY_CHECK_ERR_GOTO(!ncname_len, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, expr[parsed], &expr[parsed]), error);
322 tok_len = ncname_len;
323
324 if (expr[parsed + tok_len] == ':') {
325 ++tok_len;
326 if (expr[parsed + tok_len] == '*') {
327 ++tok_len;
328 } else {
329 ncname_len = parse_ncname(&expr[parsed + tok_len]);
330 LY_CHECK_ERR_GOTO(!ncname_len, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, expr[parsed], &expr[parsed]), error);
331 tok_len += ncname_len;
332 }
333 /* remove old flag to prevent ambiguities */
334 prev_function_check = 0;
335 tok_type = LYXP_TOKEN_NAMETEST;
336 } else {
337 /* there is no prefix so it can still be NodeType/FunctionName, we can't finally decide now */
338 prev_function_check = 1;
339 tok_type = LYXP_TOKEN_NAMETEST;
340 }
341 }
342
343 /* store the token, move on to the next one */
344 LY_CHECK_GOTO(exp_add_token(ctx, ret, tok_type, parsed, tok_len), error);
345 parsed += tok_len;
346 while (is_xmlws(expr[parsed])) {
347 ++parsed;
348 }
349
350 } while (expr[parsed]);
351
352 /* prealloc repeat */
353 ret->repeat = calloc(ret->size, sizeof *ret->repeat);
354 LY_CHECK_ERR_GOTO(!ret->repeat, LOGMEM(ctx), error);
355
356 return ret;
357
358error:
359 lyxp_expr_free(ctx, ret);
360 return NULL;
361}
362