blob: ba818ca59e6508e6ef35ce9ed111c0340a51187e [file] [log] [blame]
Radek Krejcib1646a92018-11-02 16:08:26 +01001/**
2 * @file xpath.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief YANG XPath evaluation functions
5 *
6 * Copyright (c) 2015 - 2017 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include "common.h"
16
17#include <ctype.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020018#include <stdint.h>
Radek Krejcib1646a92018-11-02 16:08:26 +010019#include <stdio.h>
20#include <stdlib.h>
Radek Krejcib1646a92018-11-02 16:08:26 +010021#include <string.h>
Radek Krejcib1646a92018-11-02 16:08:26 +010022
23#include "xpath.h"
Radek Krejcie7b95092019-05-15 11:03:07 +020024#include "dict.h"
Radek Krejcib1646a92018-11-02 16:08:26 +010025#include "xml.h"
26
27/**
28 * @brief Parse NCName.
29 *
30 * @param[in] ncname Name to parse.
31 *
32 * @return Length of \p ncname valid bytes.
33 */
Radek Krejcid4270262019-01-07 15:07:25 +010034static long int
Radek Krejcib1646a92018-11-02 16:08:26 +010035parse_ncname(const char *ncname)
36{
37 unsigned int uc;
Radek Krejcid4270262019-01-07 15:07:25 +010038 size_t size;
39 long int len = 0;
Radek Krejcib1646a92018-11-02 16:08:26 +010040
41 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), 0);
42 if (!is_xmlqnamestartchar(uc) || (uc == ':')) {
43 return len;
44 }
45
46 do {
47 len += size;
Radek Krejci9a564c92019-01-07 14:53:57 +010048 if (!*ncname) {
49 break;
50 }
Radek Krejcid4270262019-01-07 15:07:25 +010051 LY_CHECK_RET(ly_getutf8(&ncname, &uc, &size), -len);
Radek Krejcib1646a92018-11-02 16:08:26 +010052 } while (is_xmlqnamechar(uc) && (uc != ':'));
53
54 return len;
55}
56
57/**
58 * @brief Add \p token into the expression \p exp.
59 *
60 * @param[in] ctx libyang context to log in.
61 * @param[in] exp Expression to use.
62 * @param[in] token Token to add.
63 * @param[in] expr_pos Token position in the XPath expression.
64 * @param[in] tok_len Token length in the XPath expression.
65 * @return LY_ERR value
66 */
67static LY_ERR
68exp_add_token(struct ly_ctx *ctx, struct lyxp_expr *exp, enum lyxp_token token, uint16_t expr_pos, uint16_t tok_len)
69{
70 uint32_t prev;
71
72 if (exp->used == exp->size) {
73 prev = exp->size;
74 exp->size += LYXP_EXPR_SIZE_STEP;
75 if (prev > exp->size) {
76 LOGINT(ctx);
77 return LY_EINT;
78 }
79
80 exp->tokens = ly_realloc(exp->tokens, exp->size * sizeof *exp->tokens);
81 LY_CHECK_ERR_RET(!exp->tokens, LOGMEM(ctx), LY_EMEM);
82 exp->tok_pos = ly_realloc(exp->tok_pos, exp->size * sizeof *exp->tok_pos);
83 LY_CHECK_ERR_RET(!exp->tok_pos, LOGMEM(ctx), LY_EMEM);
84 exp->tok_len = ly_realloc(exp->tok_len, exp->size * sizeof *exp->tok_len);
85 LY_CHECK_ERR_RET(!exp->tok_len, LOGMEM(ctx), LY_EMEM);
86 }
87
88 exp->tokens[exp->used] = token;
89 exp->tok_pos[exp->used] = expr_pos;
90 exp->tok_len[exp->used] = tok_len;
91 ++exp->used;
92 return LY_SUCCESS;
93}
94
95void
96lyxp_expr_free(struct ly_ctx *ctx, struct lyxp_expr *expr)
97{
98 uint16_t i;
99
100 if (!expr) {
101 return;
102 }
103
104 lydict_remove(ctx, expr->expr);
105 free(expr->tokens);
106 free(expr->tok_pos);
107 free(expr->tok_len);
108 if (expr->repeat) {
109 for (i = 0; i < expr->used; ++i) {
110 free(expr->repeat[i]);
111 }
112 }
113 free(expr->repeat);
114 free(expr);
115}
116
117struct lyxp_expr *
118lyxp_expr_parse(struct ly_ctx *ctx, const char *expr)
119{
120 struct lyxp_expr *ret;
Radek Krejcid4270262019-01-07 15:07:25 +0100121 size_t parsed = 0, tok_len;
122 long int ncname_len;
Radek Krejcib1646a92018-11-02 16:08:26 +0100123 enum lyxp_token tok_type;
124 int prev_function_check = 0;
125
126 if (strlen(expr) > UINT16_MAX) {
127 LOGERR(ctx, LY_EINVAL, "XPath expression cannot be longer than %ud characters.", UINT16_MAX);
128 return NULL;
129 }
130
131 /* init lyxp_expr structure */
132 ret = calloc(1, sizeof *ret);
133 LY_CHECK_ERR_GOTO(!ret, LOGMEM(ctx), error);
134 ret->expr = lydict_insert(ctx, expr, strlen(expr));
135 LY_CHECK_ERR_GOTO(!ret->expr, LOGMEM(ctx), error);
136 ret->used = 0;
137 ret->size = LYXP_EXPR_SIZE_START;
138 ret->tokens = malloc(ret->size * sizeof *ret->tokens);
139 LY_CHECK_ERR_GOTO(!ret->tokens, LOGMEM(ctx), error);
140
141 ret->tok_pos = malloc(ret->size * sizeof *ret->tok_pos);
142 LY_CHECK_ERR_GOTO(!ret->tok_pos, LOGMEM(ctx), error);
143
144 ret->tok_len = malloc(ret->size * sizeof *ret->tok_len);
145 LY_CHECK_ERR_GOTO(!ret->tok_len, LOGMEM(ctx), error);
146
147 while (is_xmlws(expr[parsed])) {
148 ++parsed;
149 }
150
151 do {
152 if (expr[parsed] == '(') {
153
154 /* '(' */
155 tok_len = 1;
156 tok_type = LYXP_TOKEN_PAR1;
157
158 if (prev_function_check && ret->used && (ret->tokens[ret->used - 1] == LYXP_TOKEN_NAMETEST)) {
159 /* it is a NodeType/FunctionName after all */
160 if (((ret->tok_len[ret->used - 1] == 4)
161 && (!strncmp(&expr[ret->tok_pos[ret->used - 1]], "node", 4)
162 || !strncmp(&expr[ret->tok_pos[ret->used - 1]], "text", 4))) ||
163 ((ret->tok_len[ret->used - 1] == 7)
164 && !strncmp(&expr[ret->tok_pos[ret->used - 1]], "comment", 7))) {
165 ret->tokens[ret->used - 1] = LYXP_TOKEN_NODETYPE;
166 } else {
167 ret->tokens[ret->used - 1] = LYXP_TOKEN_FUNCNAME;
168 }
169 prev_function_check = 0;
170 }
171
172 } else if (expr[parsed] == ')') {
173
174 /* ')' */
175 tok_len = 1;
176 tok_type = LYXP_TOKEN_PAR2;
177
178 } else if (expr[parsed] == '[') {
179
180 /* '[' */
181 tok_len = 1;
182 tok_type = LYXP_TOKEN_BRACK1;
183
184 } else if (expr[parsed] == ']') {
185
186 /* ']' */
187 tok_len = 1;
188 tok_type = LYXP_TOKEN_BRACK2;
189
190 } else if (!strncmp(&expr[parsed], "..", 2)) {
191
192 /* '..' */
193 tok_len = 2;
194 tok_type = LYXP_TOKEN_DDOT;
195
196 } else if ((expr[parsed] == '.') && (!isdigit(expr[parsed + 1]))) {
197
198 /* '.' */
199 tok_len = 1;
200 tok_type = LYXP_TOKEN_DOT;
201
202 } else if (expr[parsed] == '@') {
203
204 /* '@' */
205 tok_len = 1;
206 tok_type = LYXP_TOKEN_AT;
207
208 } else if (expr[parsed] == ',') {
209
210 /* ',' */
211 tok_len = 1;
212 tok_type = LYXP_TOKEN_COMMA;
213
214 } else if (expr[parsed] == '\'') {
215
216 /* Literal with ' */
217 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\''); ++tok_len);
218 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
219 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
220 ++tok_len;
221 tok_type = LYXP_TOKEN_LITERAL;
222
223 } else if (expr[parsed] == '\"') {
224
225 /* Literal with " */
226 for (tok_len = 1; (expr[parsed + tok_len] != '\0') && (expr[parsed + tok_len] != '\"'); ++tok_len);
227 LY_CHECK_ERR_GOTO(expr[parsed + tok_len] == '\0',
228 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_EOE, expr[parsed], &expr[parsed]), error);
229 ++tok_len;
230 tok_type = LYXP_TOKEN_LITERAL;
231
232 } else if ((expr[parsed] == '.') || (isdigit(expr[parsed]))) {
233
234 /* Number */
235 for (tok_len = 0; isdigit(expr[parsed + tok_len]); ++tok_len);
236 if (expr[parsed + tok_len] == '.') {
237 ++tok_len;
238 for (; isdigit(expr[parsed + tok_len]); ++tok_len);
239 }
240 tok_type = LYXP_TOKEN_NUMBER;
241
242 } else if (expr[parsed] == '/') {
243
244 /* Operator '/', '//' */
245 if (!strncmp(&expr[parsed], "//", 2)) {
246 tok_len = 2;
247 } else {
248 tok_len = 1;
249 }
250 tok_type = LYXP_TOKEN_OPERATOR_PATH;
251
252 } else if (!strncmp(&expr[parsed], "!=", 2) || !strncmp(&expr[parsed], "<=", 2)
253 || !strncmp(&expr[parsed], ">=", 2)) {
254
255 /* Operator '!=', '<=', '>=' */
256 tok_len = 2;
257 tok_type = LYXP_TOKEN_OPERATOR_COMP;
258
259 } else if (expr[parsed] == '|') {
260
261 /* Operator '|' */
262 tok_len = 1;
263 tok_type = LYXP_TOKEN_OPERATOR_UNI;
264
265 } else if ((expr[parsed] == '+') || (expr[parsed] == '-')) {
266
267 /* Operator '+', '-' */
268 tok_len = 1;
269 tok_type = LYXP_TOKEN_OPERATOR_MATH;
270
271 } else if ((expr[parsed] == '=') || (expr[parsed] == '<') || (expr[parsed] == '>')) {
272
273 /* Operator '=', '<', '>' */
274 tok_len = 1;
275 tok_type = LYXP_TOKEN_OPERATOR_COMP;
276
277 } else if (ret->used && (ret->tokens[ret->used - 1] != LYXP_TOKEN_AT)
278 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_PAR1)
279 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_BRACK1)
280 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_COMMA)
281 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_LOG)
282 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_COMP)
283 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_MATH)
284 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_UNI)
285 && (ret->tokens[ret->used - 1] != LYXP_TOKEN_OPERATOR_PATH)) {
286
287 /* Operator '*', 'or', 'and', 'mod', or 'div' */
288 if (expr[parsed] == '*') {
289 tok_len = 1;
290 tok_type = LYXP_TOKEN_OPERATOR_MATH;
291
292 } else if (!strncmp(&expr[parsed], "or", 2)) {
293 tok_len = 2;
294 tok_type = LYXP_TOKEN_OPERATOR_LOG;
295
296 } else if (!strncmp(&expr[parsed], "and", 3)) {
297 tok_len = 3;
298 tok_type = LYXP_TOKEN_OPERATOR_LOG;
299
300 } else if (!strncmp(&expr[parsed], "mod", 3) || !strncmp(&expr[parsed], "div", 3)) {
301 tok_len = 3;
302 tok_type = LYXP_TOKEN_OPERATOR_MATH;
303
304 } else if (prev_function_check) {
305 LOGVAL(ctx, LY_VLOG_NONE, NULL, LYVE_XPATH, "Invalid character 0x%x, perhaps \"%.*s\" is supposed to be a function call.",
306 expr[parsed], &expr[parsed], ret->tok_len[ret->used - 1], &ret->expr[ret->tok_pos[ret->used - 1]]);
307 goto error;
308 } else {
Radek Krejcid4270262019-01-07 15:07:25 +0100309 LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, parsed + 1, expr);
Radek Krejcib1646a92018-11-02 16:08:26 +0100310 goto error;
311 }
312 } else if (expr[parsed] == '*') {
313
314 /* NameTest '*' */
315 tok_len = 1;
316 tok_type = LYXP_TOKEN_NAMETEST;
317
318 } else {
319
320 /* NameTest (NCName ':' '*' | QName) or NodeType/FunctionName */
321 ncname_len = parse_ncname(&expr[parsed]);
Radek Krejcid4270262019-01-07 15:07:25 +0100322 LY_CHECK_ERR_GOTO(ncname_len < 0, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, parsed - ncname_len + 1, expr), error);
Radek Krejcib1646a92018-11-02 16:08:26 +0100323 tok_len = ncname_len;
324
325 if (expr[parsed + tok_len] == ':') {
326 ++tok_len;
327 if (expr[parsed + tok_len] == '*') {
328 ++tok_len;
329 } else {
330 ncname_len = parse_ncname(&expr[parsed + tok_len]);
Radek Krejcid4270262019-01-07 15:07:25 +0100331 LY_CHECK_ERR_GOTO(ncname_len < 0, LOGVAL(ctx, LY_VLOG_NONE, NULL, LY_VCODE_XP_INEXPR, parsed - ncname_len + 1, expr), error);
Radek Krejcib1646a92018-11-02 16:08:26 +0100332 tok_len += ncname_len;
333 }
334 /* remove old flag to prevent ambiguities */
335 prev_function_check = 0;
336 tok_type = LYXP_TOKEN_NAMETEST;
337 } else {
338 /* there is no prefix so it can still be NodeType/FunctionName, we can't finally decide now */
339 prev_function_check = 1;
340 tok_type = LYXP_TOKEN_NAMETEST;
341 }
342 }
343
344 /* store the token, move on to the next one */
345 LY_CHECK_GOTO(exp_add_token(ctx, ret, tok_type, parsed, tok_len), error);
346 parsed += tok_len;
347 while (is_xmlws(expr[parsed])) {
348 ++parsed;
349 }
350
351 } while (expr[parsed]);
352
353 /* prealloc repeat */
354 ret->repeat = calloc(ret->size, sizeof *ret->repeat);
355 LY_CHECK_ERR_GOTO(!ret->repeat, LOGMEM(ctx), error);
356
357 return ret;
358
359error:
360 lyxp_expr_free(ctx, ret);
361 return NULL;
362}
363