blob: 347dfe0d845f24fe996fa59f10f18fb93349f89d [file] [log] [blame]
/**
* @file xml.c
* @author Radek Krejci <rkrejci@cesnet.cz>
* @brief XML parser implementation for libyang
*
* Copyright (c) 2015 CESNET, z.s.p.o.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*/
#include <assert.h>
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "common.h"
#include "dict_private.h"
#include "printer.h"
#include "parser.h"
#include "tree_schema.h"
#include "xml_internal.h"
#ifndef NDEBUG
static unsigned int lineno, lws_lineno;
#endif
#define ign_xmlws(p) \
while (is_xmlws(*p)) { \
COUNTLINE(*p); \
p++; \
}
struct lyxml_ns *
lyxml_get_ns(struct lyxml_elem *elem, const char *prefix)
{
struct lyxml_attr *attr;
int len;
if (!elem) {
return NULL;
}
if (!prefix) {
len = 0;
} else {
len = strlen(prefix) + 1;
}
for (attr = elem->attr; attr; attr = attr->next) {
if (attr->type != LYXML_ATTR_NS) {
continue;
}
if (!attr->name) {
if (!len) {
/* default namespace found */
if (!attr->value) {
/* empty default namespace -> no default namespace */
return NULL;
}
return (struct lyxml_ns *)attr;
}
} else if (len && !memcmp(attr->name, prefix, len)) {
/* prefix found */
return (struct lyxml_ns *)attr;
}
}
/* go recursively */
return lyxml_get_ns(elem->parent, prefix);
}
struct lyxml_attr *
lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
{
struct lyxml_attr *result, *a;
if (!attr || !parent) {
return NULL;
}
if (attr->type == LYXML_ATTR_NS) {
/* this is correct, despite that all attributes seems like a standard
* attributes (struct lyxml_attr), some of them can be namespace
* definitions (and in that case they are struct lyxml_ns).
*/
result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns));
} else {
result = calloc(1, sizeof (struct lyxml_attr));
}
result->value = lydict_insert(ctx, attr->value, 0);
result->name = lydict_insert(ctx, attr->name, 0);
result->type = attr->type;
/* set namespace in case of standard attributes */
if (result->type == LYXML_ATTR_STD && attr->ns) {
result->ns = lyxml_get_ns(parent, attr->ns->prefix);
}
/* set parent pointer in case of namespace attribute */
if (result->type == LYXML_ATTR_NS) {
((struct lyxml_ns *)result)->parent = parent;
}
/* put attribute into the parent's attributes list */
if (parent->attr) {
/* go to the end of the list */
for (a = parent->attr; a->next; a = a->next);
/* and append new attribute */
a->next = result;
} else {
/* add the first attribute in the list */
parent->attr = result;
}
return result;
}
struct lyxml_elem *
lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive)
{
struct lyxml_elem *result, *child;
struct lyxml_attr *attr;
if (!elem) {
return NULL;
}
result = calloc(1, sizeof *result);
result->content = lydict_insert(ctx, elem->content, 0);
result->name = lydict_insert(ctx, elem->name, 0);
result->flags = elem->flags;
#ifndef NDEBUG
result->line = elem->line;
#endif
result->prev = result;
if (parent) {
lyxml_add_child(ctx, parent, result);
}
/* namespace */
if (elem->ns) {
result->ns = lyxml_get_ns(result, elem->ns->prefix);
}
/* duplicate attributes */
for (attr = elem->attr; attr; attr = attr->next) {
lyxml_dup_attr(ctx, result, attr);
}
if (!recursive) {
return result;
}
/* duplicate children */
LY_TREE_FOR(elem->child, child) {
lyxml_dup_elem(ctx, child, result, 1);
}
return result;
}
static struct lyxml_ns *
lyxml_find_ns(struct lyxml_elem *elem, const char *prefix, const char *value)
{
int pref_match, val_match;
struct lyxml_attr *attr;
if (!elem) {
return NULL;
}
for (; elem; elem = elem->parent) {
for (attr = elem->attr; attr; attr = attr->next) {
if (attr->type != LYXML_ATTR_NS) {
continue;
}
pref_match = 0;
if (!prefix && !attr->name) {
pref_match = 1;
}
if (prefix && attr->name && !strcmp(attr->name, prefix)) {
pref_match = 1;
}
val_match = 0;
if (!value && !attr->value) {
val_match = 1;
}
if (value && attr->value && !strcmp(attr->value, value)) {
val_match = 1;
}
if (pref_match && val_match) {
return (struct lyxml_ns *)attr;
}
}
}
return NULL;
}
/* copy_ns: 0 - set invalid namespaces to NULL, 1 - copy them into this subtree */
static void
lyxml_correct_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns)
{
const struct lyxml_ns *elem_ns;
struct lyxml_elem *elem_root, *ns_root, *tmp;
/* find the root of elem */
for (elem_root = elem; elem_root->parent; elem_root = elem_root->parent);
LY_TREE_DFS_BEGIN(elem, tmp, elem) {
if (elem->ns) {
/* find the root of elem NS */
for (ns_root = elem->ns->parent; ns_root->parent; ns_root = ns_root->parent);
/* elem NS is defined outside elem subtree */
if (ns_root != elem_root) {
if (copy_ns) {
elem_ns = elem->ns;
/* we may have already copied the NS over? */
elem->ns = lyxml_find_ns(elem, elem_ns->prefix, elem_ns->value);
/* we haven't copied it over, copy it now */
if (!elem->ns) {
elem->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, elem, (struct lyxml_attr *)elem_ns);
}
} else {
elem->ns = NULL;
}
}
}
LY_TREE_DFS_END(elem, tmp, elem)
}
}
void
lyxml_unlink_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns)
{
struct lyxml_elem *parent, *first;
if (!elem) {
return;
}
/* store pointers to important nodes */
parent = elem->parent;
/* unlink from parent */
if (parent) {
if (parent->child == elem) {
/* we unlink the first child */
/* update the parent's link */
parent->child = elem->next;
}
/* forget about the parent */
elem->parent = NULL;
}
/* unlink from siblings */
if (elem->prev == elem) {
/* there are no more siblings */
return;
}
if (elem->next) {
elem->next->prev = elem->prev;
} else {
/* unlinking the last element */
if (parent) {
first = parent->child;
} else {
first = elem;
while (elem->prev->next) {
first = elem->prev;
}
}
first->prev = elem->prev;
}
if (elem->prev->next) {
elem->prev->next = elem->next;
}
/* clean up the unlinked element */
elem->next = NULL;
elem->prev = elem;
if (copy_ns < 2) {
lyxml_correct_ns(ctx, elem, copy_ns);
}
}
void
lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
{
struct lyxml_attr *aiter, *aprev;
if (!attr) {
return;
}
if (parent) {
/* unlink attribute from the parent's list of attributes */
aprev = NULL;
for (aiter = parent->attr; aiter; aiter = aiter->next) {
if (aiter == attr) {
break;
}
aprev = aiter;
}
if (!aiter) {
/* attribute to remove not found */
return;
}
if (!aprev) {
/* attribute is first in parent's list of attributes */
parent->attr = attr->next;
} else {
/* reconnect previous attribute to the next */
aprev->next = attr->next;
}
}
lydict_remove(ctx, attr->name);
lydict_remove(ctx, attr->value);
free(attr);
}
void
lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
{
struct lyxml_attr *a, *next;
if (!elem || !elem->attr) {
return;
}
a = elem->attr;
do {
next = a->next;
lydict_remove(ctx, a->name);
lydict_remove(ctx, a->value);
free(a);
a = next;
} while (a);
}
static void
lyxml_free_elem_(struct ly_ctx *ctx, struct lyxml_elem *elem)
{
struct lyxml_elem *e, *next;
if (!elem) {
return;
}
lyxml_free_attrs(ctx, elem);
LY_TREE_FOR_SAFE(elem->child, next, e) {
lyxml_free_elem_(ctx, e);
}
lydict_remove(ctx, elem->name);
lydict_remove(ctx, elem->content);
free(elem);
}
API void
lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
{
if (!elem) {
return;
}
lyxml_unlink_elem(ctx, elem, 2);
lyxml_free_elem_(ctx, elem);
}
const char *
lyxml_get_attr(struct lyxml_elem *elem, const char *name, const char *ns)
{
struct lyxml_attr *a;
assert(elem);
assert(name);
for (a = elem->attr; a; a = a->next) {
if (a->type != LYXML_ATTR_STD) {
continue;
}
if (!strcmp(name, a->name)) {
if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) {
return a->value;
}
}
}
return NULL;
}
int
lyxml_add_child(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_elem *elem)
{
struct lyxml_elem *e;
assert(parent);
assert(elem);
/* (re)link element to parent */
if (elem->parent) {
lyxml_unlink_elem(ctx, elem, 1);
}
elem->parent = parent;
/* link parent to element */
if (parent->child) {
e = parent->child;
elem->prev = e->prev;
elem->next = NULL;
elem->prev->next = elem;
e->prev = elem;
} else {
parent->child = elem;
elem->prev = elem;
elem->next = NULL;
}
return EXIT_SUCCESS;
}
int
lyxml_getutf8(const char *buf, unsigned int *read, unsigned int line)
{
int c, aux;
int i;
c = buf[0];
*read = 0;
/* buf is NULL terminated string, so 0 means EOF */
if (!c) {
LOGVAL(LYE_EOF, line);
return 0;
}
*read = 1;
/* process character byte(s) */
if ((c & 0xf8) == 0xf0) {
/* four bytes character */
*read = 4;
c &= 0x07;
for (i = 1; i <= 3; i++) {
aux = buf[i];
if ((aux & 0xc0) != 0x80) {
LOGVAL(LYE_XML_INVAL, line, "input character");
return 0;
}
c = (c << 6) | (aux & 0x3f);
}
if (c < 0x1000 || c > 0x10ffff) {
LOGVAL(LYE_XML_INVAL, line, "input character");
return 0;
}
} else if ((c & 0xf0) == 0xe0) {
/* three bytes character */
*read = 3;
c &= 0x0f;
for (i = 1; i <= 2; i++) {
aux = buf[i];
if ((aux & 0xc0) != 0x80) {
LOGVAL(LYE_XML_INVAL, line, "input character");
return 0;
}
c = (c << 6) | (aux & 0x3f);
}
if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
LOGVAL(LYE_XML_INVAL, line, "input character");
return 0;
}
} else if ((c & 0xe0) == 0xc0) {
/* two bytes character */
*read = 2;
aux = buf[1];
if ((aux & 0xc0) != 0x80) {
LOGVAL(LYE_XML_INVAL, line, "input character");
return 0;
}
c = ((c & 0x1f) << 6) | (aux & 0x3f);
if (c < 0x80) {
LOGVAL(LYE_XML_INVAL, line, "input character");
return 0;
}
} else if (!(c & 0x80)) {
/* one byte character */
if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
/* invalid character */
LOGVAL(LYE_XML_INVAL, line, "input character");
return 0;
}
} else {
/* invalid character */
LOGVAL(LYE_XML_INVAL, line, "input character");
return 0;
}
return c;
}
/* logs directly */
static int
parse_ignore(const char *data, const char *endstr, unsigned int *len)
{
unsigned int slen;
const char *c = data;
slen = strlen(endstr);
while (*c && memcmp(c, endstr, slen)) {
COUNTLINE(*c);
c++;
}
if (!*c) {
LOGVAL(LYE_XML_MISS, lineno, "closing sequence", endstr);
return EXIT_FAILURE;
}
c += slen;
*len = c - data;
return EXIT_SUCCESS;
}
/* logs directly */
static char *
parse_text(const char *data, char delim, unsigned int *len)
{
#define BUFSIZE 1024
char buf[BUFSIZE];
char *result = NULL, *aux;
unsigned int r;
int o, size = 0;
int cdsect = 0;
int32_t n;
for (*len = o = 0; cdsect || data[*len] != delim; o++) {
if (!data[*len] || (!cdsect && !memcmp(&data[*len], "]]>", 2))) {
LOGVAL(LYE_XML_INVAL, lineno, "element content, \"]]>\" found");
goto error;
}
loop:
if (o > BUFSIZE - 3) {
/* add buffer into the result */
if (result) {
size = size + o;
aux = realloc(result, size + 1);
result = aux;
} else {
size = o;
result = malloc((size + 1) * sizeof *result);
}
memcpy(&result[size - o], buf, o);
/* write again into the beginning of the buffer */
o = 0;
}
if (cdsect || !memcmp(&data[*len], "<![CDATA[", 9)) {
/* CDSect */
if (!cdsect) {
cdsect = 1;
*len += 9;
}
if (data[*len] && !memcmp(&data[*len], "]]>", 3)) {
*len += 3;
cdsect = 0;
o--; /* we don't write any data in this iteration */
} else {
buf[o] = data[*len];
(*len)++;
}
} else if (data[*len] == '&') {
(*len)++;
if (data[*len] != '#') {
/* entity reference - only predefined refs are supported */
if (!memcmp(&data[*len], "lt;", 3)) {
buf[o] = '<';
*len += 3;
} else if (!memcmp(&data[*len], "gt;", 3)) {
buf[o] = '>';
*len += 3;
} else if (!memcmp(&data[*len], "amp;", 4)) {
buf[o] = '&';
*len += 4;
} else if (!memcmp(&data[*len], "apos;", 5)) {
buf[o] = '\'';
*len += 5;
} else if (!memcmp(&data[*len], "quot;", 5)) {
buf[o] = '\"';
*len += 5;
} else {
LOGVAL(LYE_XML_INVAL, lineno, "entity reference (only predefined references are supported)");
goto error;
}
} else {
/* character reference */
(*len)++;
if (isdigit(data[*len])) {
for (n = 0; isdigit(data[*len]); (*len)++) {
n = (10 * n) + (data[*len] - '0');
}
if (data[*len] != ';') {
LOGVAL(LYE_XML_INVAL, lineno, "character reference, missing semicolon");
goto error;
}
} else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
for (n = 0; isxdigit(data[*len]); (*len)++) {
if (isdigit(data[*len])) {
r = (data[*len] - '0');
} else if (data[*len] > 'F') {
r = 10 + (data[*len] - 'a');
} else {
r = 10 + (data[*len] - 'A');
}
n = (16 * n) + r;
}
} else {
LOGVAL(LYE_XML_INVAL, lineno, "character reference");
goto error;
}
r = pututf8(&buf[o], n, lineno);
if (!r) {
LOGVAL(LYE_XML_INVAL, lineno, "character reference value");
goto error;
}
o += r - 1; /* o is ++ in for loop */
(*len)++;
}
} else {
buf[o] = data[*len];
COUNTLINE(buf[o]);
(*len)++;
}
}
if (delim == '<' && !memcmp(&data[*len], "<![CDATA[", 9)) {
/* ignore loop's end condition on beginning of CDSect */
goto loop;
}
#undef BUFSIZE
if (o) {
if (result) {
size = size + o;
aux = realloc(result, size + 1);
result = aux;
} else {
size = o;
result = malloc((size + 1) * sizeof *result);
}
memcpy(&result[size - o], buf, o);
}
if (result) {
result[size] = '\0';
} else {
size = 0;
result = strdup("");
}
return result;
error:
free(result);
return NULL;
}
/* logs directly */
static struct lyxml_attr *
parse_attr(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
{
const char *c = data, *start, *delim;
char prefix[32];
int uc;
struct lyxml_attr *attr = NULL, *a;
unsigned int size;
/* check if it is attribute or namespace */
if (!memcmp(c, "xmlns", 5)) {
/* namespace */
attr = calloc(1, sizeof (struct lyxml_ns));
attr->type = LYXML_ATTR_NS;
((struct lyxml_ns *)attr)->parent = parent;
c += 5;
if (*c != ':') {
/* default namespace, prefix will be empty */
goto equal;
}
c++; /* go after ':' to the prefix value */
} else {
/* attribute */
attr = calloc(1, sizeof *attr);
attr->type = LYXML_ATTR_STD;
}
/* process name part of the attribute */
start = c;
uc = lyxml_getutf8(c, &size, lineno);
if (!is_xmlnamestartchar(uc)) {
LOGVAL(LYE_XML_INVAL, lineno, "NameStartChar of the attribute");
free(attr);
return NULL;
}
c += size;
uc = lyxml_getutf8(c, &size, lineno);
while (is_xmlnamechar(uc)) {
if (attr->type == LYXML_ATTR_STD && *c == ':') {
/* attribute in a namespace */
start = c + 1;
/* look for the prefix in namespaces */
memcpy(prefix, data, c - data);
prefix[c - data] = '\0';
attr->ns = lyxml_get_ns(parent, prefix);
}
c += size;
uc = lyxml_getutf8(c, &size, lineno);
}
/* store the name */
size = c - start;
attr->name = lydict_insert(ctx, start, size);
equal:
/* check Eq mark that can be surrounded by whitespaces */
ign_xmlws(c);
if (*c != '=') {
LOGVAL(LYE_XML_INVAL, lineno, "attribute definition, \"=\" expected");
goto error;
}
c++;
ign_xmlws(c);
/* process value part of the attribute */
if (!*c || (*c != '"' && *c != '\'')) {
LOGVAL(LYE_XML_INVAL, lineno, "attribute value, \" or \' expected");
goto error;
}
delim = c;
attr->value = lydict_insert_zc(ctx, parse_text(++c, *delim, &size));
if (ly_errno) {
goto error;
}
*len = c + size + 1 - data; /* +1 is delimiter size */
/* put attribute into the parent's attributes list */
if (parent->attr) {
/* go to the end of the list */
for (a = parent->attr; a->next; a = a->next);
/* and append new attribute */
a->next = attr;
} else {
/* add the first attribute in the list */
parent->attr = attr;
}
return attr;
error:
lyxml_free_attr(ctx, NULL, attr);
return NULL;
}
/* logs directly */
static struct lyxml_elem *
parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
{
const char *c = data, *start, *e;
const char *lws; /* leading white space for handling mixed content */
int uc;
char *str;
char prefix[32] = { 0 };
unsigned int prefix_len = 0;
struct lyxml_elem *elem = NULL, *child;
struct lyxml_attr *attr;
unsigned int size;
int nons_flag = 0, closed_flag = 0;
*len = 0;
if (*c != '<') {
return NULL;
}
/* locate element name */
c++;
e = c;
uc = lyxml_getutf8(e, &size, lineno);
if (!is_xmlnamestartchar(uc)) {
LOGVAL(LYE_XML_INVAL, lineno, "NameStartChar of the element");
return NULL;
}
e += size;
uc = lyxml_getutf8(e, &size, lineno);
while (is_xmlnamechar(uc)) {
if (*e == ':') {
if (prefix_len) {
LOGVAL(LYE_XML_INVAL, lineno, "element name, multiple colons found");
goto error;
}
/* element in a namespace */
start = e + 1;
/* look for the prefix in namespaces */
memcpy(prefix, c, prefix_len = e - c);
prefix[prefix_len] = '\0';
c = start;
}
e += size;
uc = lyxml_getutf8(e, &size, lineno);
}
if (!*e) {
LOGVAL(LYE_EOF, lineno);
return NULL;
}
/* allocate element structure */
elem = calloc(1, sizeof *elem);
#ifndef NDEBUG
elem->line = lineno;
#endif
elem->next = NULL;
elem->prev = elem;
if (parent) {
lyxml_add_child(ctx, parent, elem);
}
/* store the name into the element structure */
elem->name = lydict_insert(ctx, c, e - c);
c = e;
process:
ly_errno = 0;
ign_xmlws(c);
if (!memcmp("/>", c, 2)) {
/* we are done, it was EmptyElemTag */
c += 2;
closed_flag = 1;
} else if (*c == '>') {
/* process element content */
c++;
lws = NULL;
while (*c) {
if (!memcmp(c, "</", 2)) {
if (lws && !elem->child) {
/* leading white spaces were actually content */
goto store_content;
}
/* Etag */
c += 2;
/* get name and check it */
e = c;
uc = lyxml_getutf8(e, &size, lineno);
if (!is_xmlnamestartchar(uc)) {
LOGVAL(LYE_XML_INVAL, lineno, "NameStartChar of the attribute");
goto error;
}
e += size;
uc = lyxml_getutf8(e, &size, lineno);
while (is_xmlnamechar(uc)) {
if (*e == ':') {
/* element in a namespace */
start = e + 1;
/* look for the prefix in namespaces */
if (memcmp(prefix, c, e - c)) {
LOGVAL(LYE_SPEC, lineno,
"Mixed opening (%s) and closing element tags (different namespaces).", elem->name);
goto error;
}
c = start;
}
e += size;
uc = lyxml_getutf8(e, &size, lineno);
}
if (!*e) {
LOGVAL(LYE_EOF, lineno);
goto error;
}
/* check that it corresponds to opening tag */
size = e - c;
str = malloc((size + 1) * sizeof *str);
memcpy(str, c, e - c);
str[e - c] = '\0';
if (size != strlen(elem->name) || memcmp(str, elem->name, size)) {
LOGVAL(LYE_SPEC, lineno, "Mixed opening (%s) and closing (%s) element tags.", elem->name, str);
free(str);
goto error;
}
free(str);
c = e;
ign_xmlws(c);
if (*c != '>') {
LOGVAL(LYE_SPEC, lineno, "Close element tag \"%s\" contain additional data.", elem->name);
goto error;
}
c++;
closed_flag = 1;
break;
} else if (!memcmp(c, "<?", 2)) {
if (lws) {
/* leading white spaces were only formatting */
lws = NULL;
}
/* PI - ignore it */
c += 2;
if (parse_ignore(c, "?>", &size)) {
goto error;
}
c += size;
} else if (!memcmp(c, "<!--", 4)) {
if (lws) {
/* leading white spaces were only formatting */
lws = NULL;
}
/* Comment - ignore it */
c += 4;
if (parse_ignore(c, "-->", &size)) {
goto error;
}
c += size;
} else if (!memcmp(c, "<![CDATA[", 9)) {
/* CDSect */
goto store_content;
} else if (*c == '<') {
if (lws) {
if (elem->flags & LYXML_ELEM_MIXED) {
/* we have a mixed content */
goto store_content;
} else {
/* leading white spaces were only formatting */
lws = NULL;
}
}
if (elem->content) {
/* we have a mixed content */
child = calloc(1, sizeof *child);
child->content = elem->content;
elem->content = NULL;
lyxml_add_child(ctx, elem, child);
elem->flags |= LYXML_ELEM_MIXED;
}
child = parse_elem(ctx, c, &size, elem);
if (!child) {
goto error;
}
c += size; /* move after processed child element */
} else if (is_xmlws(*c)) {
lws = c;
#ifndef NDEBUG
lws_lineno = lineno;
#endif
ign_xmlws(c);
} else {
store_content:
/* store text content */
if (lws) {
/* process content including the leading white spaces */
c = lws;
#ifndef NDEBUG
lineno = lws_lineno;
#endif
lws = NULL;
}
elem->content = lydict_insert_zc(ctx, parse_text(c, '<', &size));
if (ly_errno) {
goto error;
}
c += size; /* move after processed text content */
if (elem->child) {
/* we have a mixed content */
child = calloc(1, sizeof *child);
child->content = elem->content;
elem->content = NULL;
lyxml_add_child(ctx, elem, child);
elem->flags |= LYXML_ELEM_MIXED;
}
}
}
} else {
/* process attribute */
attr = parse_attr(ctx, c, &size, elem);
if (!attr) {
goto error;
}
c += size; /* move after processed attribute */
/* check namespace */
if (attr->type == LYXML_ATTR_NS) {
if (!prefix[0] && !attr->name) {
if (attr->value) {
/* default prefix */
elem->ns = (struct lyxml_ns *)attr;
} else {
/* xmlns="" -> no namespace */
nons_flag = 1;
}
} else if (prefix[0] && attr->name && !memcmp(attr->name, prefix, prefix_len + 1)) {
/* matching namespace with prefix */
elem->ns = (struct lyxml_ns *)attr;
}
}
/* go back to finish element processing */
goto process;
}
*len = c - data;
if (!closed_flag) {
LOGVAL(LYE_XML_MISS, lineno, "closing element tag", elem->name);
goto error;
}
if (!elem->ns && !nons_flag && parent) {
elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL);
}
return elem;
error:
lyxml_free_elem(ctx, elem);
return NULL;
}
/* logs directly */
API struct lyxml_elem *
lyxml_read(struct ly_ctx *ctx, const char *data, int UNUSED(options))
{
const char *c = data;
unsigned int len;
struct lyxml_elem *root = NULL;
#ifndef NDEBUG
/* TODO: threads support */
lineno = 1;
#endif
/* process document */
while (*c) {
if (is_xmlws(*c)) {
/* skip whitespaces */
ign_xmlws(c);
} else if (!memcmp(c, "<?", 2)) {
/* XMLDecl or PI - ignore it */
c += 2;
if (parse_ignore(c, "?>", &len)) {
return NULL;
}
c += len;
} else if (!memcmp(c, "<!--", 4)) {
/* Comment - ignore it */
c += 2;
if (parse_ignore(c, "-->", &len)) {
return NULL;
}
c += len;
} else if (!memcmp(c, "<!", 2)) {
/* DOCTYPE */
/* TODO - standalone ignore counting < and > */
LOGERR(LY_EINVAL, "DOCTYPE not supported in XML documents.");
return NULL;
} else if (*c == '<') {
/* element - process it in next loop to strictly follow XML
* format
*/
break;
} else {
LOGVAL(LYE_XML_INCHAR, lineno, c);
return NULL;
}
}
root = parse_elem(ctx, c, &len, NULL);
if (!root) {
return NULL;
}
c += len;
/* ignore the rest of document where can be comments, PIs and whitespaces,
* note that we are not detecting syntax errors in these parts
*/
ign_xmlws(c);
if (*c) {
LOGWRN("There are some not parsed data:\n%s", c);
}
return root;
}
API struct lyxml_elem *
lyxml_read_fd(struct ly_ctx *ctx, int fd, int UNUSED(options))
{
if (fd == -1 || !ctx) {
LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
return NULL;
}
LOGERR(LY_EINT, "%s function is not implemented", __func__);
return NULL;
}
API struct lyxml_elem *
lyxml_read_file(struct ly_ctx *ctx, const char *filename, int UNUSED(options))
{
if (!filename || !ctx) {
LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
return NULL;
}
LOGERR(LY_EINT, "%s function is not implemented", __func__);
return NULL;
}
int
lyxml_dump_text(struct lyout *out, const char *text)
{
unsigned int i, n;
if (!text) {
return 0;
}
for (i = n = 0; text[i]; i++) {
switch (text[i]) {
case '&':
n += ly_print(out, "&amp;");
break;
case '<':
n += ly_print(out, "&lt;");
break;
case '>':
/* not needed, just for readability */
n += ly_print(out, "&gt;");
break;
default:
ly_write(out, &text[i], 1);
n++;
}
}
return n;
}
static int
dump_elem(struct lyout *out, struct lyxml_elem *e, int level, int options)
{
int size = 0;
struct lyxml_attr *a;
struct lyxml_elem *child;
const char *delim, *delim_outer;
int indent;
if (!e->name) {
/* mixed content */
if (e->content) {
return lyxml_dump_text(out, e->content);
} else {
return 0;
}
}
delim = delim_outer = (options & LYXML_DUMP_FORMAT) ? "\n" : "";
indent = 2 * level;
if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
delim = "";
}
if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
delim_outer = "";
indent = 0;
}
if (!(options & (LYXML_DUMP_OPEN|LYXML_DUMP_CLOSE|LYXML_DUMP_ATTRS)) || (options & LYXML_DUMP_OPEN)) {
/* opening tag */
if (e->ns && e->ns->prefix) {
size += ly_print(out, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
} else {
size += ly_print(out, "%*s<%s", indent, "", e->name);
}
} else if (options & LYXML_DUMP_CLOSE) {
indent = 0;
goto close;
}
/* attributes */
for (a = e->attr; a; a = a->next) {
if (a->type == LYXML_ATTR_NS) {
if (a->name) {
size += ly_print(out, " xmlns:%s=\"%s\"", a->name, a->value ? a->value : "");
} else {
size += ly_print(out, " xmlns=\"%s\"", a->value ? a->value : "");
}
} else if (a->ns && a->ns->prefix) {
size += ly_print(out, " %s:%s=\"%s\"", a->ns->prefix, a->name, a->value);
} else {
size += ly_print(out, " %s=\"%s\"", a->name, a->value);
}
}
/* apply options */
if ((options & LYXML_DUMP_CLOSE) && (options & LYXML_DUMP_OPEN)) {
size += ly_print(out, "/>%s", delim);
return size;
} else if (options & LYXML_DUMP_OPEN) {
ly_print(out, ">");
return ++size;
} else if (options & LYXML_DUMP_ATTRS) {
return size;
}
if (!e->child && !e->content) {
size += ly_print(out, "/>%s", delim);
return size;
} else if (e->content) {
ly_print(out, ">");
size++;
size += lyxml_dump_text(out, e->content);
if (e->ns && e->ns->prefix) {
size += ly_print(out, "</%s:%s>%s", e->ns->prefix, e->name, delim);
} else {
size += ly_print(out, "</%s>%s", e->name, delim);
}
return size;
} else {
size += ly_print(out, ">%s", delim);
}
/* go recursively */
LY_TREE_FOR(e->child, child) {
if (options & LYXML_DUMP_FORMAT) {
size += dump_elem(out, child, level + 1, LYXML_DUMP_FORMAT);
} else {
size += dump_elem(out, child, level, 0);
}
}
close:
/* closing tag */
if (e->ns && e->ns->prefix) {
size += ly_print(out, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name, delim_outer);
} else {
size += ly_print(out, "%*s</%s>%s", indent, "", e->name, delim_outer);
}
return size;
}
API int
lyxml_dump(FILE *stream, struct lyxml_elem *elem, int options)
{
struct lyout out;
if (!stream || !elem) {
return 0;
}
out.type = LYOUT_STREAM;
out.method.f = stream;
return dump_elem(&out, elem, 0, options);
}
API int
lyxml_dump_fd(int fd, struct lyxml_elem *elem, int options)
{
struct lyout out;
if (fd < 0 || !elem) {
return 0;
}
out.type = LYOUT_FD;
out.method.fd = fd;
return dump_elem(&out, elem, 0, options);
}
API int
lyxml_dump_mem(char **strp, struct lyxml_elem *elem, int options)
{
struct lyout out;
int r;
if (!strp || !elem) {
return 0;
}
out.type = LYOUT_MEMORY;
out.method.mem.buf = NULL;
out.method.mem.len = 0;
out.method.mem.size = 0;
r = dump_elem(&out, elem, 0, options);
*strp = out.method.mem.buf;
return r;
}
API int
lyxml_dump_clb(ssize_t (*writeclb)(void *arg, const void *buf, size_t count), void *arg, struct lyxml_elem *elem, int options)
{
struct lyout out;
if (!writeclb || !elem) {
return 0;
}
out.type = LYOUT_CALLBACK;
out.method.clb.f = writeclb;
out.method.clb.arg = arg;
return dump_elem(&out, elem, 0, options);
}