/* $NetBSD: prop_intern.c,v 1.2 2025/05/14 03:25:46 thorpej Exp $ */ /*- * Copyright (c) 2006, 2007, 2025 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "prop_object_impl.h" #include #if !defined(_KERNEL) && !defined(_STANDALONE) #include #include #include #include #endif /* !_KERNEL && !_STANDALONE */ /* * _prop_intern_skip_whitespace -- * Skip and span of whitespace. */ const char * _prop_intern_skip_whitespace(const char *cp) { while (_PROP_ISSPACE(*cp)) { cp++; } return cp; } /* * _prop_intern_match -- * Returns true if the two character streams match. */ bool _prop_intern_match(const char *str1, size_t len1, const char *str2, size_t len2) { return (len1 == len2 && memcmp(str1, str2, len1) == 0); } /* * _prop_xml_intern_skip_comment -- * Skip the body and end tag of an XML comment. */ static bool _prop_xml_intern_skip_comment(struct _prop_object_internalize_context *ctx) { const char *cp = ctx->poic_cp; for (cp = ctx->poic_cp; !_PROP_EOF(*cp); cp++) { if (cp[0] == '-' && cp[1] == '-' && cp[2] == '>') { ctx->poic_cp = cp + 3; return true; } } return false; /* ran out of buffer */ } /* * _prop_xml_intern_find_tag -- * Find the next tag in an XML stream. Optionally compare the found * tag to an expected tag name. State of the context is undefined * if this routine returns false. Upon success, the context points * to the first octet after the tag. */ bool _prop_xml_intern_find_tag(struct _prop_object_internalize_context *ctx, const char *tag, _prop_tag_type_t type) { const char *cp; size_t taglen; taglen = tag != NULL ? strlen(tag) : 0; start_over: cp = ctx->poic_cp; /* * Find the start of the tag. */ cp = _prop_intern_skip_whitespace(cp); if (*cp != '<') { return false; } ctx->poic_tag_start = cp++; if (_PROP_EOF(*cp)) { return false; } if (*cp == '!') { if (cp[1] != '-' || cp[2] != '-') { return false; } /* * Comment block -- only allowed if we are allowed to * return a start tag. */ if (type == _PROP_TAG_TYPE_END) { return false; } ctx->poic_cp = cp + 3; if (_prop_xml_intern_skip_comment(ctx) == false) { return false; } goto start_over; } if (*cp == '/') { if (type != _PROP_TAG_TYPE_END && type != _PROP_TAG_TYPE_EITHER) { return false; } cp++; if (_PROP_EOF(*cp)) { return false; } ctx->poic_tag_type = _PROP_TAG_TYPE_END; } else { if (type != _PROP_TAG_TYPE_START && type != _PROP_TAG_TYPE_EITHER) { return false; } ctx->poic_tag_type = _PROP_TAG_TYPE_START; } ctx->poic_tagname = cp; while (!_PROP_ISSPACE(*cp) && *cp != '/' && *cp != '>') { if (_PROP_EOF(*cp)) { return false; } cp++; } ctx->poic_tagname_len = cp - ctx->poic_tagname; /* Make sure this is the tag we're looking for. */ if (tag != NULL && (taglen != ctx->poic_tagname_len || memcmp(tag, ctx->poic_tagname, taglen) != 0)) { return false; } /* Check for empty tag. */ if (*cp == '/') { if (ctx->poic_tag_type != _PROP_TAG_TYPE_START) { return false; /* only valid on start tags */ } ctx->poic_is_empty_element = true; cp++; if (_PROP_EOF(*cp) || *cp != '>') { return false; } } else { ctx->poic_is_empty_element = false; } /* Easy case of no arguments. */ if (*cp == '>') { ctx->poic_tagattr = NULL; ctx->poic_tagattr_len = 0; ctx->poic_tagattrval = NULL; ctx->poic_tagattrval_len = 0; ctx->poic_cp = cp + 1; return true; } _PROP_ASSERT(!_PROP_EOF(*cp)); cp++; if (_PROP_EOF(*cp)) { return false; } cp = _prop_intern_skip_whitespace(cp); if (_PROP_EOF(*cp)) { return false; } ctx->poic_tagattr = cp; while (!_PROP_ISSPACE(*cp) && *cp != '=') { if (_PROP_EOF(*cp)) { return false; } cp++; } ctx->poic_tagattr_len = cp - ctx->poic_tagattr; cp++; if (*cp != '\"') { return false; } cp++; if (_PROP_EOF(*cp)) { return false; } ctx->poic_tagattrval = cp; while (*cp != '\"') { if (_PROP_EOF(*cp)) { return false; } cp++; } ctx->poic_tagattrval_len = cp - ctx->poic_tagattrval; cp++; if (*cp != '>') { return false; } ctx->poic_cp = cp + 1; return true; } #define INTERNALIZER(t, f) \ { t, sizeof(t) - 1, f } static const struct _prop_object_internalizer { const char *poi_tag; size_t poi_taglen; prop_object_internalizer_t poi_intern; } _prop_object_internalizer_table[] = { INTERNALIZER("array", _prop_array_internalize), INTERNALIZER("true", _prop_bool_internalize), INTERNALIZER("false", _prop_bool_internalize), INTERNALIZER("data", _prop_data_internalize), INTERNALIZER("dict", _prop_dictionary_internalize), INTERNALIZER("integer", _prop_number_internalize), INTERNALIZER("string", _prop_string_internalize), { 0, 0, NULL } }; #undef INTERNALIZER /* * _prop_xml_intern_by_tag -- * Determine the object type from the tag in the context and * internalize it. */ static prop_object_t _prop_xml_intern_by_tag(struct _prop_object_internalize_context *ctx) { const struct _prop_object_internalizer *poi; prop_object_t obj, parent_obj; void *data, *iter; prop_object_internalizer_continue_t iter_func; struct _prop_stack stack; _prop_stack_init(&stack); match_start: for (poi = _prop_object_internalizer_table; poi->poi_tag != NULL; poi++) { if (_prop_intern_match(ctx->poic_tagname, ctx->poic_tagname_len, poi->poi_tag, poi->poi_taglen)) { break; } } if (poi == NULL || poi->poi_tag == NULL) { while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) { iter_func = (prop_object_internalizer_continue_t)iter; (*iter_func)(&stack, &obj, ctx, data, NULL); } return NULL; } obj = NULL; if (!(*poi->poi_intern)(&stack, &obj, ctx)) { goto match_start; } parent_obj = obj; while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) { iter_func = (prop_object_internalizer_continue_t)iter; if (!(*iter_func)(&stack, &parent_obj, ctx, data, obj)) { goto match_start; } obj = parent_obj; } return parent_obj; } #define ADDCHAR(x) \ do { \ if (target) { \ if (tarindex >= targsize) { \ return false; \ } \ target[tarindex] = (x); \ } \ tarindex++; \ } while (/*CONSTCOND*/0) /* * _prop_json_intern_decode_uesc_getu16 -- * Get the 16-bit value from a "u-escape" ("\uXXXX"). */ static unsigned int _prop_json_intern_decode_uesc_getu16(const char *src, unsigned int idx, uint16_t *valp) { unsigned int i; uint16_t val; unsigned char c; if (src[idx] != '\\' || src[idx + 1] != 'u') { return 0; } for (val = 0, i = 2; i < 6; i++) { val <<= 4; c = src[idx + i]; if (c >= 'A' && c <= 'F') { val |= 10 + (c - 'A'); } else if (c >= 'a' && c <= 'f') { val |= 10 + (c - 'a'); } else if (c >= '0' && c <= '9') { val |= c - '0'; } else { return 0; } } *valp = val; return idx + i; } #define HS_FIRST 0xd800 #define HS_LAST 0xdbff #define HS_SHIFT 10 #define LS_FIRST 0xdc00 #define LS_LAST 0xdfff #define HIGH_SURROGAGE_P(x) \ ((x) >= HS_FIRST && (x) <= HS_LAST) #define LOW_SURROGATE_P(x) \ ((x) >= LS_FIRST && (x) <= LS_LAST) #define SURROGATE_P(x) \ (HIGH_SURROGAGE_P(x) || LOW_SURROGATE_P(x)) /* * _prop_json_intern_decode_uesc -- * Decode a JSON UTF-16 "u-escape" ("\uXXXX"). */ static int _prop_json_intern_decode_uesc(const char *src, char *c, unsigned int *cszp) { unsigned int idx = 0; uint32_t code; uint16_t code16[2] = { 0, 0 }; idx = _prop_json_intern_decode_uesc_getu16(src, idx, &code16[0]); if (idx == 0) { return 0; } if (! SURROGATE_P(code16[0])) { /* Simple case: not a surrogate pair */ code = code16[0]; } else if (HIGH_SURROGAGE_P(code16[0])) { idx = _prop_json_intern_decode_uesc_getu16(src, idx, &code16[1]); if (idx == 0) { return 0; } /* Next code must be the low surrogate. */ if (! LOW_SURROGATE_P(code16[1])) { return 0; } code = (((uint32_t)code16[0] - HS_FIRST) << HS_SHIFT) + ( code16[1] - LS_FIRST) + 0x10000; } else { /* Got the low surrogate first; this is an error. */ return 0; } /* * Ok, we have the code point. Now convert it to UTF-8. * First we'll just split into nybbles. */ uint8_t u = (code >> 20) & 0xf; uint8_t v = (code >> 16) & 0xf; uint8_t w = (code >> 12) & 0xf; uint8_t x = (code >> 8) & 0xf; uint8_t y = (code >> 4) & 0xf; uint8_t z = (code ) & 0xf; /* * ...and swizzle the nybbles accordingly. * * N.B. we expcitly disallow inserting a NUL into the string * by way of a \uXXXX escape. */ if (code == 0) { /* Not allowed. */ return 0; } else if (/*code >= 0x0000 &&*/ code <= 0x007f) { c[0] = (char)code; /* == (y << 4) | z */ *cszp = 1; } else if (/*code >= 0x0080 &&*/ code <= 0x07ff) { c[0] = 0xc0 | (x << 2) | (y >> 2); c[1] = 0x80 | ((y & 3) << 4) | z; *cszp = 2; } else if (/*code >= 0x0800 &&*/ code <= 0xffff) { c[0] = 0xe0 | w; c[1] = 0x80 | (x << 2) | (y >> 2); c[2] = 0x80 | ((y & 3) << 4) | z; *cszp = 3; } else if (/*code >= 0x010000 &&*/ code <= 0x10ffff) { c[0] = 0xf0 | ((u & 1) << 2) | (v >> 2); c[1] = 0x80 | ((v & 3) << 4) | w; c[2] = 0x80 | (x << 2) | (y >> 2); c[3] = 0x80 | ((y & 3) << 4) | z; *cszp = 4; } else { /* Invalid code. */ return 0; } return idx; /* advance input by this much */ } #undef HS_FIRST #undef HS_LAST #undef LS_FIRST #undef LS_LAST #undef HIGH_SURROGAGE_P #undef LOW_SURROGATE_P #undef SURROGATE_P /* * _prop_json_intern_decode_string -- * Decode a JSON-encoded string. */ static bool _prop_json_intern_decode_string(struct _prop_object_internalize_context *ctx, char *target, size_t targsize, size_t *sizep, const char **cpp) { const char *src; size_t tarindex; char c[4]; unsigned int csz; tarindex = 0; src = ctx->poic_cp; for (;;) { if (_PROP_EOF(*src)) { return false; } if (*src == '"') { break; } csz = 1; if ((c[0] = *src) == '\\') { int advance = 2; switch ((c[0] = src[1])) { case '"': /* quotation mark */ case '\\': /* reverse solidus */ case '/': /* solidus */ /* identity mapping */ break; case 'b': /* backspace */ c[0] = 0x08; break; case 'f': /* form feed */ c[0] = 0x0c; break; case 'n': /* line feed */ c[0] = 0x0a; break; case 'r': /* carriage return */ c[0] = 0x0d; break; case 't': /* tab */ c[0] = 0x09; break; case 'u': advance = _prop_json_intern_decode_uesc( src, c, &csz); if (advance == 0) { return false; } break; default: /* invalid escape */ return false; } src += advance; } else { src++; } for (unsigned int i = 0; i < csz; i++) { ADDCHAR(c[i]); } } _PROP_ASSERT(*src == '"'); if (sizep != NULL) { *sizep = tarindex; } if (cpp != NULL) { *cpp = src; } return true; } /* * _prop_xml_intern_decode_string -- * Decode an XML-encoded string. */ static bool _prop_xml_intern_decode_string(struct _prop_object_internalize_context *ctx, char *target, size_t targsize, size_t *sizep, const char **cpp) { const char *src; size_t tarindex; char c; tarindex = 0; src = ctx->poic_cp; for (;;) { if (_PROP_EOF(*src)) { return true; } if (*src == '<') { break; } if ((c = *src) == '&') { if (src[1] == 'a' && src[2] == 'm' && src[3] == 'p' && src[4] == ';') { c = '&'; src += 5; } else if (src[1] == 'l' && src[2] == 't' && src[3] == ';') { c = '<'; src += 4; } else if (src[1] == 'g' && src[2] == 't' && src[3] == ';') { c = '>'; src += 4; } else if (src[1] == 'a' && src[2] == 'p' && src[3] == 'o' && src[4] == 's' && src[5] == ';') { c = '\''; src += 6; } else if (src[1] == 'q' && src[2] == 'u' && src[3] == 'o' && src[4] == 't' && src[5] == ';') { c = '\"'; src += 6; } else { return false; } } else { src++; } ADDCHAR(c); } _PROP_ASSERT(*src == '<'); if (sizep != NULL) { *sizep = tarindex; } if (cpp != NULL) { *cpp = src; } return true; } #undef ADDCHAR /* * _prop_intern_decode_string -- * Decode an encoded string. */ bool _prop_intern_decode_string(struct _prop_object_internalize_context *ctx, char *target, size_t targsize, size_t *sizep, const char **cpp) { _PROP_ASSERT(ctx->poic_format == PROP_FORMAT_XML || ctx->poic_format == PROP_FORMAT_JSON); switch (ctx->poic_format) { case PROP_FORMAT_JSON: return _prop_json_intern_decode_string(ctx, target, targsize, sizep, cpp); default: /* PROP_FORMAT_XML */ return _prop_xml_intern_decode_string(ctx, target, targsize, sizep, cpp); } } /* * _prop_intern_context_alloc -- * Allocate an internalize context. */ static struct _prop_object_internalize_context * _prop_intern_context_alloc(const char *data, prop_format_t fmt) { struct _prop_object_internalize_context *ctx; ctx = _PROP_MALLOC(sizeof(*ctx), M_TEMP); if (ctx == NULL) { return NULL; } ctx->poic_format = fmt; ctx->poic_data = ctx->poic_cp = data; /* * If we're digesting JSON, check for a byte order mark and * skip it, if present. We should never see one, but we're * allowed to detect and ignore it. (RFC 8259 section 8.1) */ if (fmt == PROP_FORMAT_JSON) { if (((unsigned char)data[0] == 0xff && (unsigned char)data[1] == 0xfe) || ((unsigned char)data[0] == 0xfe && (unsigned char)data[1] == 0xff)) { ctx->poic_cp = data + 2; } /* No additional processing work to do for JSON. */ return ctx; } /* * Skip any whitespace and XML preamble stuff that we don't * know about / care about. */ for (;;) { data = _prop_intern_skip_whitespace(data); if (_PROP_EOF(*data) || *data != '<') { goto bad; } #define MATCH(str) (strncmp(&data[1], str, strlen(str)) == 0) /* * Skip over the XML preamble that Apple XML property * lists usually include at the top of the file. */ if (MATCH("?xml ") || MATCH("!DOCTYPE plist")) { while (*data != '>' && !_PROP_EOF(*data)) { data++; } if (_PROP_EOF(*data)) { goto bad; } data++; /* advance past the '>' */ continue; } if (MATCH("