blob: a73c2e026bd8955f4b3b5704b1228c2a7bf5ecbf [file] [log] [blame]
Radek Krejcib1646a92018-11-02 16:08:26 +01001/**
2 * @file xpath.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief YANG XPath evaluation functions
5 *
6 * Copyright (c) 2015 - 2017 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include "common.h"
16
17#include <ctype.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <stdint.h>
21#include <string.h>
22#include <assert.h>
23#include <limits.h>
24#include <errno.h>
25#include <math.h>
Radek Krejcib1646a92018-11-02 16:08:26 +010026
27#include "xpath.h"
28#include "xml.h"
29
30/**
31 * @brief Parse NCName.
32 *
33 * @param[in] ncname Name to parse.
34 *
35 * @return Length of \p ncname valid bytes.
36 */
Radek Krejcid4270262019-01-07 15:07:25 +010037static long int
Radek Krejcib1646a92018-11-02 16:08:26 +010038parse_ncname(const char *ncname)
39{
40 unsigned int uc;
Radek Krejcid4270262019-01-07 15:07:25 +010041 size_t size;
42 long int len = 0;
Radek Krejcib1646a92018-11-02 16:08:26 +010043
44 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), 0);
45 if (!is_xmlqnamestartchar(uc) || (uc == ':')) {
46 return len;
47 }
48
49 do {
50 len += size;
Radek Krejci9a564c92019-01-07 14:53:57 +010051 if (!*ncname) {
52 break;
53 }
Radek Krejcid4270262019-01-07 15:07:25 +010054 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), -len);
Radek Krejcib1646a92018-11-02 16:08:26 +010055 } while (is_xmlqnamechar(uc) && (uc != ':'));
56
57 return len;
58}
59
60/**
61 * @brief Add \p token into the expression \p exp.
62 *
63 * @param[in] ctx libyang context to log in.
64 * @param[in] exp Expression to use.
65 * @param[in] token Token to add.
66 * @param[in] expr_pos Token position in the XPath expression.
67 * @param[in] tok_len Token length in the XPath expression.
68 * @return LY_ERR value
69 */
70static LY_ERR
71exp_add_token(struct ly_ctx *ctx, struct lyxp_expr *exp, enum lyxp_token token, uint16_t expr_pos, uint16_t tok_len)
72{
73 uint32_t prev;
74
75 if (exp->used == exp->size) {
76 prev = exp->size;
77 exp->size += LYXP_EXPR_SIZE_STEP;
78 if (prev > exp->size) {
79 LOGINT(ctx);
80 return LY_EINT;
81 }
82
83 exp->tokens = ly_realloc(exp->tokens, exp->size * sizeof *exp->tokens);
84 LY_CHECK_ERR_RET(!exp->tokens, LOGMEM(ctx), LY_EMEM);
85 exp->tok_pos = ly_realloc(exp->tok_pos, exp->size * sizeof *exp->tok_pos);
86 LY_CHECK_ERR_RET(!exp->tok_pos, LOGMEM(ctx), LY_EMEM);
87 exp->tok_len = ly_realloc(exp->tok_len, exp->size * sizeof *exp->tok_len);
88 LY_CHECK_ERR_RET(!exp->tok_len, LOGMEM(ctx), LY_EMEM);
89 }
90
91 exp->tokens[exp->used] = token;
92 exp->tok_pos[exp->used] = expr_pos;
93 exp->tok_len[exp->used] = tok_len;
94 ++exp->used;
95 return LY_SUCCESS;
96}
97
98void
99lyxp_expr_free(struct ly_ctx *ctx, struct lyxp_expr *expr)
100{
101 uint16_t i;
102
103 if (!expr) {
104 return;
105 }
106
107 lydict_remove(ctx, expr->expr);
108 free(expr->tokens);
109 free(expr->tok_pos);
110 free(expr->tok_len);
111 if (expr->repeat) {
112 for (i = 0; i < expr->used; ++i) {
113 free(expr->repeat[i]);
114 }
115 }
116 free(expr->repeat);
117 free(expr);
118}
119
120struct lyxp_expr *
121lyxp_expr_parse(struct ly_ctx *ctx, const char *expr)
122{
123 struct lyxp_expr *ret;
Radek Krejcid4270262019-01-07 15:07:25 +0100124 size_t parsed = 0, tok_len;
125 long int ncname_len;
Radek Krejcib1646a92018-11-02 16:08:26 +0100126 enum lyxp_token tok_type;
127 int prev_function_check = 0;
128
129 if (strlen(expr) > UINT16_MAX) {
130 LOGERR(ctx, LY_EINVAL, "XPath expression cannot be longer than %ud characters.", UINT16_MAX);
131 return NULL;
132 }
133
134 /* init lyxp_expr structure */
135 ret = calloc(1, sizeof *ret);
136 LY_CHECK_ERR_GOTO(!ret, LOGMEM(ctx), error);
137 ret->expr = lydict_insert(ctx, expr, strlen(expr));
138 LY_CHECK_ERR_GOTO(!ret->expr, LOGMEM(ctx), error);
139 ret->used = 0;
140 ret->size = LYXP_EXPR_SIZE_START;
141 ret->tokens = malloc(ret->size * sizeof *ret->tokens);
142 LY_CHECK_ERR_GOTO(!ret->tokens, LOGMEM(ctx), error);
143
144 ret->tok_pos = malloc(ret->size * sizeof *ret->tok_pos);
145 LY_CHECK_ERR_GOTO(!ret->tok_pos, LOGMEM(ctx), error);
146
147 ret->tok_len = malloc(ret->size * sizeof *ret->tok_len);
148 LY_CHECK_ERR_GOTO(!ret->tok_len, LOGMEM(ctx), error);
149
150 while (is_xmlws(expr[parsed])) {
151 ++parsed;
152 }
153
154 do {
155 if (expr[parsed] == '(') {
156
157 /* '(' */
158 tok_len = 1;
159 tok_type = LYXP_TOKEN_PAR1;
160
161 if (prev_function_check && ret->used && (ret->tokens[ret->used - 1] == LYXP_TOKEN_NAMETEST)) {
162 /* it is a NodeType/FunctionName after all */
163 if (((ret->tok_len[ret->used - 1] == 4)
164 && (!strncmp(&expr[ret->tok_pos[ret->used - 1]], "node", 4)
165 || !strncmp(&expr[ret->tok_pos[ret->used - 1]], "text", 4))) ||
166 ((ret->tok_len[ret->used - 1] == 7)
167 && !strncmp(&expr[ret->tok_pos[ret->used - 1]], "comment", 7))) {
168 ret->tokens[ret->used - 1] = LYXP_TOKEN_NODETYPE;
169 } else {
170 ret->tokens[ret->used - 1] = LYXP_TOKEN_FUNCNAME;
171 }
172 prev_function_check = 0;
173 }
174
175 } else if (expr[parsed] == ')') {
176
177 /* ')' */
178 tok_len = 1;
179 tok_type = LYXP_TOKEN_PAR2;
180
181 } else if (expr[parsed] == '[') {
182
183 /* '[' */
184 tok_len = 1;
185 tok_type = LYXP_TOKEN_BRACK1;
186
187 } else if (expr[parsed] == ']') {
188
189 /* ']' */
190 tok_len = 1;
191 tok_type = LYXP_TOKEN_BRACK2;
192
193 } else if (!strncmp(&expr[parsed], "..", 2)) {
194
195 /* '..' */
196 tok_len = 2;
197 tok_type = LYXP_TOKEN_DDOT;
198
199 } else if ((expr[parsed] == '.') && (!isdigit(expr[parsed + 1]))) {
200
201 /* '.' */
202 tok_len = 1;
203 tok_type = LYXP_TOKEN_DOT;
204
205 } else if (expr[parsed] == '@') {
206
207 /* '@' */
208 tok_len = 1;
209 tok_type = LYXP_TOKEN_AT;
210
211 } else if (expr[parsed] == ',') {
212
213 /* ',' */
214 tok_len = 1;
215 tok_type = LYXP_TOKEN_COMMA;
216
217 } else if (expr[parsed] == '\'') {
218
219 /* Literal with ' */
220 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\''); ++tok_len);
221 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
222 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
223 ++tok_len;
224 tok_type = LYXP_TOKEN_LITERAL;
225
226 } else if (expr[parsed] == '\"') {
227
228 /* Literal with " */
229 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\"'); ++tok_len);
230 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
231 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
232 ++tok_len;
233 tok_type = LYXP_TOKEN_LITERAL;
234
235 } else if ((expr[parsed] == '.') || (isdigit(expr[parsed]))) {
236
237 /* Number */
238 for (tok_len = 0; isdigit(expr[parsed + tok_len]); ++tok_len);
239 if (expr[parsed + tok_len] == '.') {
240 ++tok_len;
241 for (; isdigit(expr[parsed + tok_len]); ++tok_len);
242 }
243 tok_type = LYXP_TOKEN_NUMBER;
244
245 } else if (expr[parsed] == '/') {
246
247 /* Operator '/', '//' */
248 if (!strncmp(&expr[parsed], "//", 2)) {
249 tok_len = 2;
250 } else {
251 tok_len = 1;
252 }
253 tok_type = LYXP_TOKEN_OPERATOR_PATH;
254
255 } else if (!strncmp(&expr[parsed], "!=", 2) || !strncmp(&expr[parsed], "<=", 2)
256 || !strncmp(&expr[parsed], ">=", 2)) {
257
258 /* Operator '!=', '<=', '>=' */
259 tok_len = 2;
260 tok_type = LYXP_TOKEN_OPERATOR_COMP;
261
262 } else if (expr[parsed] == '|') {
263
264 /* Operator '|' */
265 tok_len = 1;
266 tok_type = LYXP_TOKEN_OPERATOR_UNI;
267
268 } else if ((expr[parsed] == '+') || (expr[parsed] == '-')) {
269
270 /* Operator '+', '-' */
271 tok_len = 1;
272 tok_type = LYXP_TOKEN_OPERATOR_MATH;
273
274 } else if ((expr[parsed] == '=') || (expr[parsed] == '<') || (expr[parsed] == '>')) {
275
276 /* Operator '=', '<', '>' */
277 tok_len = 1;
278 tok_type = LYXP_TOKEN_OPERATOR_COMP;
279
280 } else if (ret->used && (ret->tokens[ret->used - 1] != LYXP_TOKEN_AT)
281 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_PAR1)
282 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_BRACK1)
283 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_COMMA)
284 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_LOG)
285 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_COMP)
286 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_MATH)
287 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_UNI)
288 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_PATH)) {
289
290 /* Operator '*', 'or', 'and', 'mod', or 'div' */
291 if (expr[parsed] == '*') {
292 tok_len = 1;
293 tok_type = LYXP_TOKEN_OPERATOR_MATH;
294
295 } else if (!strncmp(&expr[parsed], "or", 2)) {
296 tok_len = 2;
297 tok_type = LYXP_TOKEN_OPERATOR_LOG;
298
299 } else if (!strncmp(&expr[parsed], "and", 3)) {
300 tok_len = 3;
301 tok_type = LYXP_TOKEN_OPERATOR_LOG;
302
303 } else if (!strncmp(&expr[parsed], "mod", 3) || !strncmp(&expr[parsed], "div", 3)) {
304 tok_len = 3;
305 tok_type = LYXP_TOKEN_OPERATOR_MATH;
306
307 } else if (prev_function_check) {
308 LOGVAL(ctx, LY_VLOG_NONE, NULL, LYVE_XPATH, "Invalid character 0x%x, perhaps \"%.*s\" is supposed to be a function call.",
309 expr[parsed], &expr[parsed], ret->tok_len[ret->used - 1], &ret->expr[ret->tok_pos[ret->used - 1]]);
310 goto error;
311 } else {
Radek Krejcid4270262019-01-07 15:07:25 +0100312 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, parsed + 1, expr);
Radek Krejcib1646a92018-11-02 16:08:26 +0100313 goto error;
314 }
315 } else if (expr[parsed] == '*') {
316
317 /* NameTest '*' */
318 tok_len = 1;
319 tok_type = LYXP_TOKEN_NAMETEST;
320
321 } else {
322
323 /* NameTest (NCName ':' '*' | QName) or NodeType/FunctionName */
324 ncname_len = parse_ncname(&expr[parsed]);
Radek Krejcid4270262019-01-07 15:07:25 +0100325 LY_CHECK_ERR_GOTO(ncname_len < 0, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, parsed - ncname_len + 1, expr), error);
Radek Krejcib1646a92018-11-02 16:08:26 +0100326 tok_len = ncname_len;
327
328 if (expr[parsed + tok_len] == ':') {
329 ++tok_len;
330 if (expr[parsed + tok_len] == '*') {
331 ++tok_len;
332 } else {
333 ncname_len = parse_ncname(&expr[parsed + tok_len]);
Radek Krejcid4270262019-01-07 15:07:25 +0100334 LY_CHECK_ERR_GOTO(ncname_len < 0, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, parsed - ncname_len + 1, expr), error);
Radek Krejcib1646a92018-11-02 16:08:26 +0100335 tok_len += ncname_len;
336 }
337 /* remove old flag to prevent ambiguities */
338 prev_function_check = 0;
339 tok_type = LYXP_TOKEN_NAMETEST;
340 } else {
341 /* there is no prefix so it can still be NodeType/FunctionName, we can't finally decide now */
342 prev_function_check = 1;
343 tok_type = LYXP_TOKEN_NAMETEST;
344 }
345 }
346
347 /* store the token, move on to the next one */
348 LY_CHECK_GOTO(exp_add_token(ctx, ret, tok_type, parsed, tok_len), error);
349 parsed += tok_len;
350 while (is_xmlws(expr[parsed])) {
351 ++parsed;
352 }
353
354 } while (expr[parsed]);
355
356 /* prealloc repeat */
357 ret->repeat = calloc(ret->size, sizeof *ret->repeat);
358 LY_CHECK_ERR_GOTO(!ret->repeat, LOGMEM(ctx), error);
359
360 return ret;
361
362error:
363 lyxp_expr_free(ctx, ret);
364 return NULL;
365}
366